Yetixx
Yetixx
Server: nginx/1.28.0
System: Linux instance-rr9enuui 6.1.0-15-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.1.66-1 (2023-12-09) x86_64
User: www (1000)
PHP: 8.0.26
Disabled: passthru,exec,system,putenv,chroot,chgrp,chown,shell_exec,popen,proc_open,pcntl_exec,ini_alter,ini_restore,dl,openlog,syslog,readlink,symlink,popepassthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,imap_open,apache_setenv
Upload Files
File: //opt/bcm-agent/log/bcm-si.instance-rr9enuui.root.log.INFO.20260319-102916.543705
Log file created at: 2026/03/19 10:29:16
Running on machine: instance-rr9enuui
Binary: Built with gc go1.23.8 for linux/amd64
Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg
I0319 10:29:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:29:16.458167  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:29:16.458186  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:29:16.472517  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:29:18.691090  543705 disk_info.go:125] begin check local disk info of client
I0319 10:29:18.693461  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:29:18.693467  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be00 0xc00007be40]
E0319 10:29:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:29:23.409775  543705 memory.go:184] no items to output this cycle
I0319 10:29:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 10:29:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:29:33.409781  543705 memory.go:184] no items to output this cycle
I0319 10:29:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 10:29:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:29:43.409808  543705 memory.go:191] Add success.
I0319 10:29:43.409821  543705 cpu.go:282] Add success.
I0319 10:29:43.419924  543705 net.go:648] Add success.
I0319 10:29:43.422685  543705 net.go:770] primary dev: ETH0
I0319 10:29:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:29:43.422710  543705 net.go:698] Add success.
I0319 10:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:29:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:29:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:29:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:29:53.409798  543705 memory.go:184] no items to output this cycle
I0319 10:29:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 10:30:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:30:03.409775  543705 memory.go:184] no items to output this cycle
I0319 10:30:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 10:30:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:30:13.409792  543705 memory.go:191] Add success.
I0319 10:30:13.409798  543705 cpu.go:282] Add success.
W0319 10:30:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:30:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:30:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:30:13.420216  543705 net.go:648] Add success.
I0319 10:30:13.423031  543705 net.go:770] primary dev: ETH0
I0319 10:30:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:30:13.423057  543705 net.go:698] Add success.
I0319 10:30:13.652533  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dc5ca1b0-1e5d-4749-aaed-f8c5060d09a6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:30:13.652565  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:30:14.454677  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:30:14.454843  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:30:14.454921  543705 disk_worker.go:708] disk space is not compliant
W0319 10:30:14.454924  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:30:14.456356  543705 disk_worker.go:494] system disk:vda1
I0319 10:30:14.456386  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:30:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:30:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:30:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:30:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:30:18.693671  543705 disk_info.go:125] begin check local disk info of client
I0319 10:30:18.696009  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:30:18.696016  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e6d80 0xc0004e6dc0]
E0319 10:30:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:30:23.409811  543705 memory.go:184] no items to output this cycle
I0319 10:30:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 10:30:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:30:33.409780  543705 memory.go:184] no items to output this cycle
I0319 10:30:33.409785  543705 cpu.go:275] no items to output this cycle
I0319 10:30:37.683108  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:30:37.683114  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:30:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:30:43.410632  543705 memory.go:191] Add success.
I0319 10:30:43.409802  543705 cpu.go:282] Add success.
I0319 10:30:43.420387  543705 net.go:648] Add success.
I0319 10:30:43.422907  543705 net.go:770] primary dev: ETH0
I0319 10:30:43.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:30:43.422931  543705 net.go:698] Add success.
I0319 10:30:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:30:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:30:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:30:53.409794  543705 memory.go:184] no items to output this cycle
I0319 10:30:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 10:31:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:31:03.409783  543705 memory.go:184] no items to output this cycle
I0319 10:31:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 10:31:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:31:13.409797  543705 memory.go:191] Add success.
I0319 10:31:13.409800  543705 cpu.go:282] Add success.
W0319 10:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:31:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:31:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:31:13.420078  543705 net.go:648] Add success.
I0319 10:31:13.422871  543705 net.go:770] primary dev: ETH0
I0319 10:31:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:31:13.422900  543705 net.go:698] Add success.
I0319 10:31:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:31:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:31:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 10:31:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:31:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 10:31:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:31:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:31:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:31:18.697125  543705 disk_info.go:125] begin check local disk info of client
I0319 10:31:18.699537  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:31:18.699543  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002724c0 0xc000272500]
E0319 10:31:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:31:23.409791  543705 memory.go:184] no items to output this cycle
I0319 10:31:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 10:31:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:31:33.409803  543705 memory.go:184] no items to output this cycle
I0319 10:31:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 10:31:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:31:43.409819  543705 memory.go:191] Add success.
I0319 10:31:43.409860  543705 cpu.go:282] Add success.
I0319 10:31:43.420087  543705 net.go:648] Add success.
I0319 10:31:43.422800  543705 net.go:770] primary dev: ETH0
I0319 10:31:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:31:43.422829  543705 net.go:698] Add success.
I0319 10:31:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:31:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:31:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:31:53.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:31:53.409905  543705 memory.go:184] no items to output this cycle
I0319 10:31:53.410012  543705 cpu.go:275] no items to output this cycle
E0319 10:32:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:32:03.409811  543705 memory.go:184] no items to output this cycle
I0319 10:32:03.409825  543705 cpu.go:275] no items to output this cycle
E0319 10:32:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:32:13.409785  543705 memory.go:191] Add success.
W0319 10:32:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 10:32:13.409815  543705 cpu.go:282] Add success.
W0319 10:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:32:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:32:13.420171  543705 net.go:648] Add success.
I0319 10:32:13.423684  543705 net.go:770] primary dev: ETH0
I0319 10:32:13.423697  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:32:13.423709  543705 net.go:698] Add success.
W0319 10:32:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:32:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 10:32:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0319 10:32:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 10:32:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 10:32:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0319 10:32:14.456553  543705 disk_worker.go:494] system disk:vda1
I0319 10:32:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 10:32:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 10:32:15.456863  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:32:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 10:32:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 10:32:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:32:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:32:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:32:18.700149  543705 disk_info.go:125] begin check local disk info of client
I0319 10:32:18.702485  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:32:18.702492  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a6c0 0xc00035a700]
E0319 10:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:32:23.409795  543705 memory.go:184] no items to output this cycle
I0319 10:32:23.409856  543705 cpu.go:275] no items to output this cycle
E0319 10:32:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:32:33.409791  543705 memory.go:184] no items to output this cycle
I0319 10:32:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 10:32:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:32:43.409795  543705 memory.go:191] Add success.
I0319 10:32:43.409828  543705 cpu.go:282] Add success.
I0319 10:32:43.419985  543705 net.go:648] Add success.
I0319 10:32:43.422777  543705 net.go:770] primary dev: ETH0
I0319 10:32:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:32:43.422804  543705 net.go:698] Add success.
I0319 10:32:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:32:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:32:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:32:53.409804  543705 memory.go:184] no items to output this cycle
I0319 10:32:53.409822  543705 cpu.go:275] no items to output this cycle
E0319 10:33:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:33:03.409792  543705 memory.go:184] no items to output this cycle
I0319 10:33:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 10:33:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:33:13.409793  543705 memory.go:191] Add success.
W0319 10:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:33:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:33:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:33:13.409850  543705 cpu.go:282] Add success.
I0319 10:33:13.420079  543705 net.go:648] Add success.
I0319 10:33:13.422840  543705 net.go:770] primary dev: ETH0
I0319 10:33:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:33:13.422865  543705 net.go:698] Add success.
I0319 10:33:13.469183  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"df0e4c01-af13-4f76-8450-ef59bc717ada","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:33:13.469216  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:33:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:33:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:33:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 10:33:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:33:14.456614  543705 disk_worker.go:494] system disk:vda1
I0319 10:33:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:33:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:33:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:33:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:33:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:33:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:33:18.702573  543705 disk_info.go:125] begin check local disk info of client
I0319 10:33:18.705024  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:33:18.705031  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390640 0xc000390680]
E0319 10:33:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:33:23.409776  543705 memory.go:184] no items to output this cycle
I0319 10:33:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 10:33:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:33:33.409821  543705 memory.go:184] no items to output this cycle
I0319 10:33:33.409836  543705 cpu.go:275] no items to output this cycle
I0319 10:33:37.683261  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:33:37.683268  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:33:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:33:43.410669  543705 memory.go:191] Add success.
I0319 10:33:43.409811  543705 cpu.go:282] Add success.
I0319 10:33:43.420409  543705 net.go:648] Add success.
I0319 10:33:43.423222  543705 net.go:770] primary dev: ETH0
I0319 10:33:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:33:43.423248  543705 net.go:698] Add success.
I0319 10:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:33:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:33:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:33:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:33:53.409784  543705 memory.go:184] no items to output this cycle
I0319 10:33:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 10:34:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:34:03.409802  543705 memory.go:184] no items to output this cycle
I0319 10:34:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 10:34:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:34:13.409812  543705 memory.go:191] Add success.
I0319 10:34:13.409832  543705 cpu.go:282] Add success.
W0319 10:34:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:34:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:34:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:34:13.420120  543705 net.go:648] Add success.
I0319 10:34:13.423199  543705 net.go:770] primary dev: ETH0
I0319 10:34:13.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:34:13.423225  543705 net.go:698] Add success.
I0319 10:34:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:34:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:34:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 10:34:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:34:14.456561  543705 disk_worker.go:494] system disk:vda1
I0319 10:34:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:34:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:34:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:34:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:34:18.705673  543705 disk_info.go:125] begin check local disk info of client
I0319 10:34:18.708029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:34:18.708034  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315c80 0xc000315cc0]
E0319 10:34:23.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:34:23.409814  543705 cpu.go:275] no items to output this cycle
I0319 10:34:23.409830  543705 memory.go:184] no items to output this cycle
E0319 10:34:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:34:33.409809  543705 memory.go:184] no items to output this cycle
I0319 10:34:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 10:34:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:34:43.409910  543705 memory.go:191] Add success.
I0319 10:34:43.409994  543705 cpu.go:282] Add success.
I0319 10:34:43.419708  543705 net.go:648] Add success.
I0319 10:34:43.422579  543705 net.go:770] primary dev: ETH0
I0319 10:34:43.422593  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:34:43.422605  543705 net.go:698] Add success.
I0319 10:34:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:34:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:34:53.409805  543705 memory.go:184] no items to output this cycle
I0319 10:34:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 10:35:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:35:03.409777  543705 memory.go:184] no items to output this cycle
I0319 10:35:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 10:35:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:35:13.409817  543705 memory.go:191] Add success.
I0319 10:35:13.409825  543705 cpu.go:282] Add success.
W0319 10:35:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:35:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:35:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:35:13.420073  543705 net.go:648] Add success.
I0319 10:35:13.423010  543705 net.go:770] primary dev: ETH0
I0319 10:35:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:35:13.423048  543705 net.go:698] Add success.
I0319 10:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:35:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:35:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0319 10:35:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:35:14.456631  543705 disk_worker.go:494] system disk:vda1
I0319 10:35:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:35:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:35:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:35:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:35:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:35:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:35:18.709170  543705 disk_info.go:125] begin check local disk info of client
I0319 10:35:18.711541  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:35:18.711547  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c6c0 0xc00035c700]
E0319 10:35:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:35:23.409786  543705 memory.go:184] no items to output this cycle
I0319 10:35:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 10:35:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:35:33.409775  543705 memory.go:184] no items to output this cycle
I0319 10:35:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 10:35:43.409825  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:35:43.409864  543705 memory.go:191] Add success.
I0319 10:35:43.409866  543705 cpu.go:282] Add success.
I0319 10:35:43.420022  543705 net.go:648] Add success.
I0319 10:35:43.422955  543705 net.go:770] primary dev: ETH0
I0319 10:35:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:35:43.422981  543705 net.go:698] Add success.
I0319 10:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:35:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:35:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:35:53.409793  543705 memory.go:184] no items to output this cycle
I0319 10:35:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 10:36:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:36:03.409792  543705 memory.go:184] no items to output this cycle
I0319 10:36:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 10:36:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:36:13.409789  543705 memory.go:191] Add success.
I0319 10:36:13.409790  543705 cpu.go:282] Add success.
W0319 10:36:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:36:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:36:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:36:13.420134  543705 net.go:648] Add success.
I0319 10:36:13.422802  543705 net.go:770] primary dev: ETH0
I0319 10:36:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:36:13.422826  543705 net.go:698] Add success.
I0319 10:36:13.469294  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"28bc020a-8051-41dd-8b3d-4a3232e69270","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:36:13.469338  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:36:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:36:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 10:36:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:36:14.456665  543705 disk_worker.go:494] system disk:vda1
I0319 10:36:14.456693  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:36:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:36:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:36:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:36:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:36:18.712255  543705 disk_info.go:125] begin check local disk info of client
I0319 10:36:18.714735  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:36:18.714741  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034de40 0xc00034de80]
E0319 10:36:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:36:23.409797  543705 memory.go:184] no items to output this cycle
I0319 10:36:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 10:36:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:36:33.409917  543705 memory.go:184] no items to output this cycle
I0319 10:36:33.410010  543705 cpu.go:275] no items to output this cycle
I0319 10:36:37.683403  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:36:37.683410  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:36:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:36:43.410815  543705 memory.go:191] Add success.
I0319 10:36:43.409809  543705 cpu.go:282] Add success.
I0319 10:36:43.420542  543705 net.go:648] Add success.
I0319 10:36:43.423388  543705 net.go:770] primary dev: ETH0
I0319 10:36:43.423401  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:36:43.423413  543705 net.go:698] Add success.
I0319 10:36:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:36:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:36:53.409816  543705 memory.go:184] no items to output this cycle
I0319 10:36:53.409824  543705 cpu.go:275] no items to output this cycle
E0319 10:37:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:37:03.409780  543705 cpu.go:275] no items to output this cycle
I0319 10:37:03.409783  543705 memory.go:184] no items to output this cycle
E0319 10:37:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:37:13.409799  543705 memory.go:191] Add success.
I0319 10:37:13.409802  543705 cpu.go:282] Add success.
W0319 10:37:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:37:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:37:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:37:13.420188  543705 net.go:648] Add success.
I0319 10:37:13.422934  543705 net.go:770] primary dev: ETH0
I0319 10:37:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:37:13.422959  543705 net.go:698] Add success.
I0319 10:37:13.453516  543705 event_worker.go:152] Polling the log file for events...
W0319 10:37:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:37:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 10:37:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:37:14.456809  543705 disk_worker.go:494] system disk:vda1
I0319 10:37:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 10:37:14.457119  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 10:37:14.457127  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 10:37:14.457131  543705 custom_config.go:64] query custom config with name: gpu
E0319 10:37:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 10:37:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:37:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 10:37:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 10:37:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:37:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:37:16.472346  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:37:18.716217  543705 disk_info.go:125] begin check local disk info of client
I0319 10:37:18.718568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:37:18.718574  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057e0c0 0xc00057e100]
E0319 10:37:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:37:23.409801  543705 memory.go:184] no items to output this cycle
I0319 10:37:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 10:37:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:37:33.409886  543705 memory.go:184] no items to output this cycle
I0319 10:37:33.409927  543705 cpu.go:275] no items to output this cycle
E0319 10:37:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:37:43.409805  543705 memory.go:191] Add success.
I0319 10:37:43.409809  543705 cpu.go:282] Add success.
I0319 10:37:43.419962  543705 net.go:648] Add success.
I0319 10:37:43.423190  543705 net.go:770] primary dev: ETH0
I0319 10:37:43.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:37:43.423217  543705 net.go:698] Add success.
I0319 10:37:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:37:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:37:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:37:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:37:53.409819  543705 memory.go:184] no items to output this cycle
I0319 10:37:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 10:38:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:38:03.409773  543705 memory.go:184] no items to output this cycle
I0319 10:38:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 10:38:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:38:13.409797  543705 memory.go:191] Add success.
I0319 10:38:13.409800  543705 cpu.go:282] Add success.
W0319 10:38:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:38:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:38:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:38:13.420081  543705 net.go:648] Add success.
I0319 10:38:13.422783  543705 net.go:770] primary dev: ETH0
I0319 10:38:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:38:13.422809  543705 net.go:698] Add success.
I0319 10:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:38:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:38:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 10:38:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:38:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 10:38:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:38:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:38:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:38:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:38:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:38:16.472422  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:38:18.718652  543705 disk_info.go:125] begin check local disk info of client
I0319 10:38:18.721059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:38:18.721064  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a640 0xc00027a680]
E0319 10:38:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:38:23.409775  543705 memory.go:184] no items to output this cycle
I0319 10:38:23.409777  543705 cpu.go:275] no items to output this cycle
E0319 10:38:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:38:33.409771  543705 memory.go:184] no items to output this cycle
I0319 10:38:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 10:38:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:38:43.409827  543705 memory.go:191] Add success.
I0319 10:38:43.409836  543705 cpu.go:282] Add success.
I0319 10:38:43.419989  543705 net.go:648] Add success.
I0319 10:38:43.423105  543705 net.go:770] primary dev: ETH0
I0319 10:38:43.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:38:43.423131  543705 net.go:698] Add success.
I0319 10:38:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:38:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:38:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:38:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:38:53.409817  543705 memory.go:184] no items to output this cycle
I0319 10:38:53.409819  543705 cpu.go:275] no items to output this cycle
E0319 10:39:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:39:03.409766  543705 memory.go:184] no items to output this cycle
I0319 10:39:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 10:39:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:39:13.409794  543705 memory.go:191] Add success.
I0319 10:39:13.409798  543705 cpu.go:282] Add success.
W0319 10:39:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:39:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:39:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:39:13.420113  543705 net.go:648] Add success.
I0319 10:39:13.422845  543705 net.go:770] primary dev: ETH0
I0319 10:39:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:39:13.422871  543705 net.go:698] Add success.
I0319 10:39:13.468832  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8485047f-0d49-4956-8604-3b3c5e86858a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:39:13.468871  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:39:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:39:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:39:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 10:39:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:39:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 10:39:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:39:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:39:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:39:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:39:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:39:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:39:18.721671  543705 disk_info.go:125] begin check local disk info of client
I0319 10:39:18.724031  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:39:18.724037  543705 disk_info.go:196] parse disk info done, disk is : [0xc000295400 0xc000295440]
E0319 10:39:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:39:23.409799  543705 memory.go:184] no items to output this cycle
I0319 10:39:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 10:39:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:39:33.409776  543705 memory.go:184] no items to output this cycle
I0319 10:39:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 10:39:37.684119  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:39:37.684125  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:39:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:39:43.410606  543705 memory.go:191] Add success.
I0319 10:39:43.409804  543705 cpu.go:282] Add success.
I0319 10:39:43.420471  543705 net.go:648] Add success.
I0319 10:39:43.422928  543705 net.go:770] primary dev: ETH0
I0319 10:39:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:39:43.422954  543705 net.go:698] Add success.
I0319 10:39:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:39:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:39:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:39:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:39:53.409816  543705 memory.go:184] no items to output this cycle
I0319 10:39:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 10:40:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:40:03.409763  543705 memory.go:184] no items to output this cycle
I0319 10:40:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 10:40:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:40:13.409798  543705 memory.go:191] Add success.
I0319 10:40:13.409801  543705 cpu.go:282] Add success.
W0319 10:40:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:40:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:40:13.420044  543705 net.go:648] Add success.
I0319 10:40:13.422890  543705 net.go:770] primary dev: ETH0
I0319 10:40:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:40:13.422914  543705 net.go:698] Add success.
I0319 10:40:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:40:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:40:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 10:40:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:40:14.456574  543705 disk_worker.go:494] system disk:vda1
I0319 10:40:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:40:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:40:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:40:18.725264  543705 disk_info.go:125] begin check local disk info of client
I0319 10:40:18.727627  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:40:18.727633  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027abc0 0xc00027ac00]
E0319 10:40:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:40:23.409779  543705 cpu.go:275] no items to output this cycle
I0319 10:40:23.409781  543705 memory.go:184] no items to output this cycle
E0319 10:40:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:40:33.409775  543705 memory.go:184] no items to output this cycle
I0319 10:40:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 10:40:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:40:43.409798  543705 memory.go:191] Add success.
I0319 10:40:43.409799  543705 cpu.go:282] Add success.
I0319 10:40:43.419737  543705 net.go:648] Add success.
I0319 10:40:43.422496  543705 net.go:770] primary dev: ETH0
I0319 10:40:43.422508  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:40:43.422520  543705 net.go:698] Add success.
I0319 10:40:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:40:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:40:53.409819  543705 memory.go:184] no items to output this cycle
I0319 10:40:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 10:41:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:41:03.409800  543705 memory.go:184] no items to output this cycle
I0319 10:41:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 10:41:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:41:13.409812  543705 memory.go:191] Add success.
I0319 10:41:13.409822  543705 cpu.go:282] Add success.
W0319 10:41:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:41:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:41:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:41:13.420170  543705 net.go:648] Add success.
I0319 10:41:13.422740  543705 net.go:770] primary dev: ETH0
I0319 10:41:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:41:13.422766  543705 net.go:698] Add success.
I0319 10:41:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:41:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:41:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 10:41:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:41:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 10:41:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:41:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:41:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:41:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:41:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:41:18.728292  543705 disk_info.go:125] begin check local disk info of client
I0319 10:41:18.730703  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:41:18.730708  543705 disk_info.go:196] parse disk info done, disk is : [0xc000539640 0xc000539680]
E0319 10:41:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:41:23.409796  543705 memory.go:184] no items to output this cycle
I0319 10:41:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 10:41:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:41:33.409791  543705 memory.go:184] no items to output this cycle
I0319 10:41:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 10:41:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:41:43.409897  543705 cpu.go:282] Add success.
I0319 10:41:43.409904  543705 memory.go:191] Add success.
I0319 10:41:43.419713  543705 net.go:648] Add success.
I0319 10:41:43.422532  543705 net.go:770] primary dev: ETH0
I0319 10:41:43.422544  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:41:43.422555  543705 net.go:698] Add success.
I0319 10:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:41:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:41:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:41:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:41:53.409785  543705 memory.go:184] no items to output this cycle
I0319 10:41:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 10:42:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:42:03.409765  543705 memory.go:184] no items to output this cycle
I0319 10:42:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 10:42:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:42:13.409796  543705 memory.go:191] Add success.
I0319 10:42:13.409796  543705 cpu.go:282] Add success.
W0319 10:42:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:42:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:42:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:42:13.420123  543705 net.go:648] Add success.
I0319 10:42:13.423008  543705 net.go:770] primary dev: ETH0
I0319 10:42:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:42:13.423034  543705 net.go:698] Add success.
I0319 10:42:13.469652  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7af4c89c-a15f-48ff-89c9-1082ca0719ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:42:13.469690  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 10:42:14.455229  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:42:14.455243  543705 disk_worker.go:708] disk space is not compliant
W0319 10:42:14.455248  543705 disk_worker.go:728] disk inode is not compliant
E0319 10:42:14.455964  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 10:42:14.455973  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 10:42:14.455979  543705 custom_config.go:64] query custom config with name: gpu
I0319 10:42:14.456863  543705 disk_worker.go:494] system disk:vda1
I0319 10:42:14.456891  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 10:42:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 10:42:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 10:42:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 10:42:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:42:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:42:16.472331  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:42:18.731357  543705 disk_info.go:125] begin check local disk info of client
I0319 10:42:18.733664  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:42:18.733670  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377bc0 0xc000377c00]
E0319 10:42:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:42:23.409792  543705 memory.go:184] no items to output this cycle
I0319 10:42:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 10:42:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:42:33.409773  543705 memory.go:184] no items to output this cycle
I0319 10:42:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 10:42:37.685130  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:42:37.685136  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:42:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:42:43.410936  543705 memory.go:191] Add success.
I0319 10:42:43.409825  543705 cpu.go:282] Add success.
I0319 10:42:43.420663  543705 net.go:648] Add success.
I0319 10:42:43.423657  543705 net.go:770] primary dev: ETH0
I0319 10:42:43.423670  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:42:43.423683  543705 net.go:698] Add success.
I0319 10:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:42:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:42:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:42:53.409825  543705 memory.go:184] no items to output this cycle
I0319 10:42:53.409836  543705 cpu.go:275] no items to output this cycle
E0319 10:43:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:43:03.409805  543705 memory.go:184] no items to output this cycle
I0319 10:43:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 10:43:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:43:13.409827  543705 memory.go:191] Add success.
I0319 10:43:13.409836  543705 cpu.go:282] Add success.
W0319 10:43:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:43:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:43:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:43:13.420145  543705 net.go:648] Add success.
I0319 10:43:13.422749  543705 net.go:770] primary dev: ETH0
I0319 10:43:13.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:43:13.422776  543705 net.go:698] Add success.
I0319 10:43:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:43:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:43:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0319 10:43:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:43:14.456622  543705 disk_worker.go:494] system disk:vda1
I0319 10:43:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:43:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:43:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:43:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:43:18.733751  543705 disk_info.go:125] begin check local disk info of client
I0319 10:43:18.736130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:43:18.736136  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377340 0xc000377380]
E0319 10:43:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:43:23.409806  543705 memory.go:184] no items to output this cycle
I0319 10:43:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 10:43:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:43:33.409792  543705 memory.go:184] no items to output this cycle
I0319 10:43:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 10:43:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:43:43.409802  543705 cpu.go:282] Add success.
I0319 10:43:43.409809  543705 memory.go:191] Add success.
I0319 10:43:43.419956  543705 net.go:648] Add success.
I0319 10:43:43.422593  543705 net.go:770] primary dev: ETH0
I0319 10:43:43.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:43:43.422618  543705 net.go:698] Add success.
I0319 10:43:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:43:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:43:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:43:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:43:53.409825  543705 memory.go:184] no items to output this cycle
I0319 10:43:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 10:44:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:44:03.409782  543705 memory.go:184] no items to output this cycle
I0319 10:44:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 10:44:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:44:13.409787  543705 memory.go:191] Add success.
I0319 10:44:13.409805  543705 cpu.go:282] Add success.
W0319 10:44:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:44:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:44:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:44:13.420323  543705 net.go:648] Add success.
I0319 10:44:13.423236  543705 net.go:770] primary dev: ETH0
I0319 10:44:13.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:44:13.423261  543705 net.go:698] Add success.
I0319 10:44:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:44:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:44:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 10:44:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:44:14.456522  543705 disk_worker.go:494] system disk:vda1
I0319 10:44:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:44:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:44:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:44:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:44:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:44:18.736216  543705 disk_info.go:125] begin check local disk info of client
I0319 10:44:18.738672  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:44:18.738678  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0319 10:44:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:44:23.409763  543705 memory.go:184] no items to output this cycle
I0319 10:44:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 10:44:33.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:44:33.409889  543705 memory.go:184] no items to output this cycle
I0319 10:44:33.409969  543705 cpu.go:275] no items to output this cycle
E0319 10:44:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:44:43.409798  543705 memory.go:191] Add success.
I0319 10:44:43.409801  543705 cpu.go:282] Add success.
I0319 10:44:43.419925  543705 net.go:648] Add success.
I0319 10:44:43.422782  543705 net.go:770] primary dev: ETH0
I0319 10:44:43.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:44:43.422812  543705 net.go:698] Add success.
I0319 10:44:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:44:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:44:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:44:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:44:53.409809  543705 cpu.go:275] no items to output this cycle
I0319 10:44:53.409825  543705 memory.go:184] no items to output this cycle
E0319 10:45:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:45:03.409802  543705 memory.go:184] no items to output this cycle
I0319 10:45:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 10:45:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:45:13.409816  543705 memory.go:191] Add success.
I0319 10:45:13.409824  543705 cpu.go:282] Add success.
W0319 10:45:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:45:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:45:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:45:13.420122  543705 net.go:648] Add success.
I0319 10:45:13.423005  543705 net.go:770] primary dev: ETH0
I0319 10:45:13.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:45:13.423034  543705 net.go:698] Add success.
I0319 10:45:13.464265  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44cc1f5c-b8c6-4dd0-8800-b0c81fb84af8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:45:13.464300  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:45:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:45:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:45:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 10:45:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:45:14.456712  543705 disk_worker.go:494] system disk:vda1
I0319 10:45:14.456745  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:45:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:45:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:45:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:45:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:45:18.738758  543705 disk_info.go:125] begin check local disk info of client
I0319 10:45:18.741156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:45:18.741163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396dc0 0xc000396e00]
E0319 10:45:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:45:23.409763  543705 memory.go:184] no items to output this cycle
I0319 10:45:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 10:45:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:45:33.409815  543705 memory.go:184] no items to output this cycle
I0319 10:45:33.409826  543705 cpu.go:275] no items to output this cycle
I0319 10:45:37.685735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:45:37.685743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:45:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:45:43.410625  543705 memory.go:191] Add success.
I0319 10:45:43.409834  543705 cpu.go:282] Add success.
I0319 10:45:43.420174  543705 net.go:770] primary dev: ETH0
I0319 10:45:43.420187  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:45:43.420201  543705 net.go:698] Add success.
I0319 10:45:43.420433  543705 net.go:648] Add success.
I0319 10:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:45:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:45:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:45:53.409790  543705 memory.go:184] no items to output this cycle
I0319 10:45:53.409855  543705 cpu.go:275] no items to output this cycle
E0319 10:46:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:46:03.409790  543705 memory.go:184] no items to output this cycle
I0319 10:46:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 10:46:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:46:13.409817  543705 memory.go:191] Add success.
I0319 10:46:13.409828  543705 cpu.go:282] Add success.
W0319 10:46:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:46:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:46:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:46:13.420029  543705 net.go:648] Add success.
I0319 10:46:13.422774  543705 net.go:770] primary dev: ETH0
I0319 10:46:13.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:46:13.422800  543705 net.go:698] Add success.
I0319 10:46:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:46:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:46:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 10:46:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:46:14.456562  543705 disk_worker.go:494] system disk:vda1
I0319 10:46:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:46:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:46:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:46:18.741710  543705 disk_info.go:125] begin check local disk info of client
I0319 10:46:18.744067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:46:18.744072  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002736c0 0xc000273700]
E0319 10:46:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:46:23.409802  543705 memory.go:184] no items to output this cycle
I0319 10:46:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 10:46:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:46:33.409783  543705 memory.go:184] no items to output this cycle
I0319 10:46:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 10:46:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:46:43.409793  543705 memory.go:191] Add success.
I0319 10:46:43.409825  543705 cpu.go:282] Add success.
I0319 10:46:43.419811  543705 net.go:770] primary dev: ETH0
I0319 10:46:43.419823  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:46:43.419836  543705 net.go:698] Add success.
I0319 10:46:43.420063  543705 net.go:648] Add success.
I0319 10:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:46:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:46:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:46:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 10:46:53.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:46:53.409840  543705 memory.go:184] no items to output this cycle
E0319 10:47:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:47:03.409780  543705 memory.go:184] no items to output this cycle
I0319 10:47:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 10:47:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:47:13.409799  543705 memory.go:191] Add success.
I0319 10:47:13.409799  543705 cpu.go:282] Add success.
W0319 10:47:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:47:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:47:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:47:13.420065  543705 net.go:648] Add success.
I0319 10:47:13.422711  543705 net.go:770] primary dev: ETH0
I0319 10:47:13.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:47:13.422736  543705 net.go:698] Add success.
I0319 10:47:13.453258  543705 event_worker.go:152] Polling the log file for events...
W0319 10:47:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:47:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 10:47:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0319 10:47:14.456915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 10:47:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 10:47:14.456930  543705 custom_config.go:64] query custom config with name: gpu
I0319 10:47:14.456979  543705 disk_worker.go:494] system disk:vda1
I0319 10:47:14.457021  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 10:47:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 10:47:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:47:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 10:47:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 10:47:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:47:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:47:16.472325  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:47:18.745366  543705 disk_info.go:125] begin check local disk info of client
I0319 10:47:18.747744  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:47:18.747750  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298d40 0xc000298d80]
E0319 10:47:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:47:23.409803  543705 memory.go:184] no items to output this cycle
I0319 10:47:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 10:47:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:47:33.409815  543705 memory.go:184] no items to output this cycle
I0319 10:47:33.409832  543705 cpu.go:275] no items to output this cycle
E0319 10:47:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:47:43.409882  543705 memory.go:191] Add success.
I0319 10:47:43.409926  543705 cpu.go:282] Add success.
I0319 10:47:43.419719  543705 net.go:648] Add success.
I0319 10:47:43.422824  543705 net.go:770] primary dev: ETH0
I0319 10:47:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:47:43.422849  543705 net.go:698] Add success.
I0319 10:47:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:47:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:47:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:47:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:47:53.409818  543705 cpu.go:275] no items to output this cycle
I0319 10:47:53.409827  543705 memory.go:184] no items to output this cycle
E0319 10:48:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:48:03.409791  543705 memory.go:184] no items to output this cycle
I0319 10:48:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 10:48:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:48:13.409795  543705 memory.go:191] Add success.
I0319 10:48:13.409796  543705 cpu.go:282] Add success.
W0319 10:48:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:48:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:48:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:48:13.419886  543705 net.go:770] primary dev: ETH0
I0319 10:48:13.419900  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:48:13.419911  543705 net.go:698] Add success.
I0319 10:48:13.420286  543705 net.go:648] Add success.
I0319 10:48:13.468776  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"76e0493f-e054-461e-ab0b-6d79222ab208","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:48:13.468807  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:48:14.454943  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:48:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:48:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0319 10:48:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:48:14.456546  543705 disk_worker.go:494] system disk:vda1
I0319 10:48:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:48:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:48:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:48:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:48:16.472355  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:48:18.748446  543705 disk_info.go:125] begin check local disk info of client
I0319 10:48:18.750841  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:48:18.750847  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abac0 0xc0001abb00]
E0319 10:48:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:48:23.409802  543705 memory.go:184] no items to output this cycle
I0319 10:48:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 10:48:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:48:33.409786  543705 memory.go:184] no items to output this cycle
I0319 10:48:33.409807  543705 cpu.go:275] no items to output this cycle
I0319 10:48:37.685875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:48:37.685882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:48:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:48:43.409803  543705 cpu.go:282] Add success.
I0319 10:48:43.410796  543705 memory.go:191] Add success.
I0319 10:48:43.419509  543705 net.go:770] primary dev: ETH0
I0319 10:48:43.419524  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:48:43.419538  543705 net.go:698] Add success.
I0319 10:48:43.419887  543705 net.go:648] Add success.
I0319 10:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:48:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:48:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:48:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:48:53.409791  543705 memory.go:184] no items to output this cycle
I0319 10:48:53.409852  543705 cpu.go:275] no items to output this cycle
E0319 10:49:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:49:03.409789  543705 memory.go:184] no items to output this cycle
I0319 10:49:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 10:49:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:49:13.409796  543705 memory.go:191] Add success.
I0319 10:49:13.409798  543705 cpu.go:282] Add success.
W0319 10:49:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:49:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:49:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:49:13.420131  543705 net.go:648] Add success.
I0319 10:49:13.423190  543705 net.go:770] primary dev: ETH0
I0319 10:49:13.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:49:13.423215  543705 net.go:698] Add success.
I0319 10:49:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:49:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:49:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0319 10:49:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:49:14.456575  543705 disk_worker.go:494] system disk:vda1
I0319 10:49:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:49:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:49:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:49:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:49:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:49:16.472470  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:49:18.752396  543705 disk_info.go:125] begin check local disk info of client
I0319 10:49:18.754823  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:49:18.754829  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298680 0xc0002986c0]
E0319 10:49:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:49:23.409770  543705 memory.go:184] no items to output this cycle
I0319 10:49:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 10:49:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:49:33.409816  543705 memory.go:184] no items to output this cycle
I0319 10:49:33.409828  543705 cpu.go:275] no items to output this cycle
E0319 10:49:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:49:43.409827  543705 memory.go:191] Add success.
I0319 10:49:43.409832  543705 cpu.go:282] Add success.
I0319 10:49:43.420024  543705 net.go:648] Add success.
I0319 10:49:43.423362  543705 net.go:770] primary dev: ETH0
I0319 10:49:43.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:49:43.423390  543705 net.go:698] Add success.
I0319 10:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:49:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:49:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:49:53.410584  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:49:53.410630  543705 memory.go:184] no items to output this cycle
I0319 10:49:53.410714  543705 cpu.go:275] no items to output this cycle
E0319 10:50:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:50:03.409792  543705 memory.go:184] no items to output this cycle
I0319 10:50:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 10:50:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:50:13.409817  543705 memory.go:191] Add success.
I0319 10:50:13.409825  543705 cpu.go:282] Add success.
W0319 10:50:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:50:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:50:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:50:13.420110  543705 net.go:648] Add success.
I0319 10:50:13.422993  543705 net.go:770] primary dev: ETH0
I0319 10:50:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:50:13.423019  543705 net.go:698] Add success.
I0319 10:50:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:50:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:50:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 10:50:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:50:14.456567  543705 disk_worker.go:494] system disk:vda1
I0319 10:50:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:50:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:50:18.754912  543705 disk_info.go:125] begin check local disk info of client
I0319 10:50:18.757318  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:50:18.757324  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331b00 0xc000331b40]
E0319 10:50:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:50:23.409759  543705 memory.go:184] no items to output this cycle
I0319 10:50:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 10:50:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:50:33.409789  543705 memory.go:184] no items to output this cycle
I0319 10:50:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 10:50:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:50:43.409814  543705 memory.go:191] Add success.
I0319 10:50:43.409822  543705 cpu.go:282] Add success.
I0319 10:50:43.419965  543705 net.go:648] Add success.
I0319 10:50:43.423107  543705 net.go:770] primary dev: ETH0
I0319 10:50:43.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:50:43.423134  543705 net.go:698] Add success.
I0319 10:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:50:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:50:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:50:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:50:53.409776  543705 memory.go:184] no items to output this cycle
I0319 10:50:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 10:51:03.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:51:03.409887  543705 memory.go:184] no items to output this cycle
I0319 10:51:03.409989  543705 cpu.go:275] no items to output this cycle
E0319 10:51:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:51:13.409804  543705 memory.go:191] Add success.
I0319 10:51:13.409808  543705 cpu.go:282] Add success.
W0319 10:51:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:51:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:51:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:51:13.420043  543705 net.go:648] Add success.
I0319 10:51:13.422860  543705 net.go:770] primary dev: ETH0
I0319 10:51:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:51:13.422885  543705 net.go:698] Add success.
I0319 10:51:13.473321  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"21946fda-14d4-45c7-b5d0-19e5f927c7fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:51:13.473355  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:51:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:51:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:51:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0319 10:51:14.455244  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:51:14.456779  543705 disk_worker.go:494] system disk:vda1
I0319 10:51:14.456810  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:51:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:51:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:51:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:51:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:51:18.757674  543705 disk_info.go:125] begin check local disk info of client
I0319 10:51:18.760057  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:51:18.760063  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331a80 0xc000331ac0]
E0319 10:51:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:51:23.409772  543705 memory.go:184] no items to output this cycle
I0319 10:51:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 10:51:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:51:33.409800  543705 memory.go:184] no items to output this cycle
I0319 10:51:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 10:51:37.686021  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:51:37.686028  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:51:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:51:43.410593  543705 memory.go:191] Add success.
I0319 10:51:43.409812  543705 cpu.go:282] Add success.
I0319 10:51:43.420288  543705 net.go:648] Add success.
I0319 10:51:43.423001  543705 net.go:770] primary dev: ETH0
I0319 10:51:43.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:51:43.423026  543705 net.go:698] Add success.
I0319 10:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:51:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:51:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:51:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:51:53.409789  543705 memory.go:184] no items to output this cycle
I0319 10:51:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 10:52:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:52:03.409813  543705 memory.go:184] no items to output this cycle
I0319 10:52:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 10:52:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:52:13.409800  543705 cpu.go:282] Add success.
I0319 10:52:13.409805  543705 memory.go:191] Add success.
W0319 10:52:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:52:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:52:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:52:13.420065  543705 net.go:648] Add success.
I0319 10:52:13.422884  543705 net.go:770] primary dev: ETH0
I0319 10:52:13.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:52:13.422909  543705 net.go:698] Add success.
W0319 10:52:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:52:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0319 10:52:14.455153  543705 disk_worker.go:728] disk inode is not compliant
E0319 10:52:14.456930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 10:52:14.456940  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 10:52:14.456946  543705 custom_config.go:64] query custom config with name: gpu
I0319 10:52:14.457003  543705 disk_worker.go:494] system disk:vda1
I0319 10:52:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 10:52:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 10:52:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:52:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 10:52:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 10:52:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:52:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:52:16.472339  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:52:18.760144  543705 disk_info.go:125] begin check local disk info of client
I0319 10:52:18.762521  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:52:18.762527  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c00 0xc0000c5cc0]
E0319 10:52:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:52:23.409775  543705 memory.go:184] no items to output this cycle
I0319 10:52:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 10:52:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:52:33.409796  543705 memory.go:184] no items to output this cycle
I0319 10:52:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 10:52:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:52:43.409787  543705 memory.go:191] Add success.
I0319 10:52:43.409815  543705 cpu.go:282] Add success.
I0319 10:52:43.419882  543705 net.go:648] Add success.
I0319 10:52:43.422586  543705 net.go:770] primary dev: ETH0
I0319 10:52:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:52:43.422614  543705 net.go:698] Add success.
I0319 10:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:52:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:52:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:52:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:52:53.409777  543705 memory.go:184] no items to output this cycle
I0319 10:52:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 10:53:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:53:03.409816  543705 memory.go:184] no items to output this cycle
I0319 10:53:03.409837  543705 cpu.go:275] no items to output this cycle
E0319 10:53:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:53:13.409802  543705 memory.go:191] Add success.
I0319 10:53:13.409803  543705 cpu.go:282] Add success.
W0319 10:53:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:53:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:53:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:53:13.420196  543705 net.go:648] Add success.
I0319 10:53:13.422891  543705 net.go:770] primary dev: ETH0
I0319 10:53:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:53:13.422920  543705 net.go:698] Add success.
I0319 10:53:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:53:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:53:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 10:53:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:53:14.456569  543705 disk_worker.go:494] system disk:vda1
I0319 10:53:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:53:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:53:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:53:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:53:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:53:16.472444  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:53:18.763456  543705 disk_info.go:125] begin check local disk info of client
I0319 10:53:18.765814  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:53:18.765835  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331bc0 0xc000331c00]
E0319 10:53:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:53:23.409792  543705 memory.go:184] no items to output this cycle
I0319 10:53:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 10:53:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:53:33.409802  543705 memory.go:184] no items to output this cycle
I0319 10:53:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 10:53:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:53:43.409782  543705 memory.go:191] Add success.
I0319 10:53:43.409816  543705 cpu.go:282] Add success.
I0319 10:53:43.419891  543705 net.go:648] Add success.
I0319 10:53:43.423419  543705 net.go:770] primary dev: ETH0
I0319 10:53:43.423432  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:53:43.423444  543705 net.go:698] Add success.
I0319 10:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:53:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:53:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:53:53.410256  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:53:53.410273  543705 memory.go:184] no items to output this cycle
I0319 10:53:53.410272  543705 cpu.go:275] no items to output this cycle
E0319 10:54:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:54:03.409780  543705 memory.go:184] no items to output this cycle
I0319 10:54:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 10:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:54:13.409793  543705 memory.go:191] Add success.
I0319 10:54:13.409811  543705 cpu.go:282] Add success.
W0319 10:54:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:54:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:54:13.420120  543705 net.go:648] Add success.
I0319 10:54:13.423023  543705 net.go:770] primary dev: ETH0
I0319 10:54:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:54:13.423051  543705 net.go:698] Add success.
I0319 10:54:13.474541  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"359f2aba-95e3-4198-aacf-bf533b8d180b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:54:13.474575  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 10:54:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:54:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:54:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 10:54:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:54:14.456517  543705 disk_worker.go:494] system disk:vda1
I0319 10:54:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:54:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:54:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:54:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:54:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:54:18.766725  543705 disk_info.go:125] begin check local disk info of client
I0319 10:54:18.769153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:54:18.769159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582980 0xc0005829c0]
E0319 10:54:23.410262  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:54:23.410278  543705 memory.go:184] no items to output this cycle
I0319 10:54:23.410291  543705 cpu.go:275] no items to output this cycle
E0319 10:54:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:54:33.409803  543705 memory.go:184] no items to output this cycle
I0319 10:54:33.409812  543705 cpu.go:275] no items to output this cycle
I0319 10:54:37.687141  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:54:37.687147  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:54:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:54:43.410676  543705 memory.go:191] Add success.
I0319 10:54:43.409827  543705 cpu.go:282] Add success.
I0319 10:54:43.420389  543705 net.go:648] Add success.
I0319 10:54:43.423128  543705 net.go:770] primary dev: ETH0
I0319 10:54:43.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:54:43.423158  543705 net.go:698] Add success.
I0319 10:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:54:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:54:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:54:53.409778  543705 memory.go:184] no items to output this cycle
I0319 10:54:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 10:55:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:55:03.409809  543705 memory.go:184] no items to output this cycle
I0319 10:55:03.409820  543705 cpu.go:275] no items to output this cycle
W0319 10:55:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:55:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:55:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 10:55:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:55:13.409819  543705 cpu.go:282] Add success.
I0319 10:55:13.409823  543705 memory.go:191] Add success.
I0319 10:55:13.420147  543705 net.go:648] Add success.
I0319 10:55:13.422997  543705 net.go:770] primary dev: ETH0
I0319 10:55:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:55:13.423025  543705 net.go:698] Add success.
I0319 10:55:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:55:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:55:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 10:55:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:55:14.456595  543705 disk_worker.go:494] system disk:vda1
I0319 10:55:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:55:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:55:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:55:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:55:18.769673  543705 disk_info.go:125] begin check local disk info of client
I0319 10:55:18.772087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:55:18.772093  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f6600 0xc0004f6640]
E0319 10:55:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:55:23.409766  543705 memory.go:184] no items to output this cycle
I0319 10:55:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 10:55:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:55:33.409804  543705 memory.go:184] no items to output this cycle
I0319 10:55:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 10:55:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:55:43.409786  543705 memory.go:191] Add success.
I0319 10:55:43.409803  543705 cpu.go:282] Add success.
I0319 10:55:43.419871  543705 net.go:648] Add success.
I0319 10:55:43.422751  543705 net.go:770] primary dev: ETH0
I0319 10:55:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:55:43.422776  543705 net.go:698] Add success.
I0319 10:55:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:55:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:55:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:55:53.409819  543705 memory.go:184] no items to output this cycle
I0319 10:55:53.409830  543705 cpu.go:275] no items to output this cycle
E0319 10:56:03.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:56:03.409891  543705 memory.go:184] no items to output this cycle
I0319 10:56:03.409999  543705 cpu.go:275] no items to output this cycle
E0319 10:56:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:56:13.409798  543705 memory.go:191] Add success.
I0319 10:56:13.409800  543705 cpu.go:282] Add success.
W0319 10:56:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:56:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:56:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:56:13.420146  543705 net.go:648] Add success.
I0319 10:56:13.422709  543705 net.go:770] primary dev: ETH0
I0319 10:56:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:56:13.422733  543705 net.go:698] Add success.
I0319 10:56:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:56:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:56:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0319 10:56:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:56:14.456602  543705 disk_worker.go:494] system disk:vda1
I0319 10:56:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:56:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:56:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:56:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:56:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:56:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:56:18.773506  543705 disk_info.go:125] begin check local disk info of client
I0319 10:56:18.775830  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:56:18.775836  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330bc0 0xc000330c00]
E0319 10:56:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:56:23.409794  543705 memory.go:184] no items to output this cycle
I0319 10:56:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 10:56:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:56:33.409788  543705 memory.go:184] no items to output this cycle
I0319 10:56:33.409788  543705 cpu.go:275] no items to output this cycle
E0319 10:56:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:56:43.409791  543705 memory.go:191] Add success.
I0319 10:56:43.409795  543705 cpu.go:282] Add success.
I0319 10:56:43.419888  543705 net.go:648] Add success.
I0319 10:56:43.422642  543705 net.go:770] primary dev: ETH0
I0319 10:56:43.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:56:43.422672  543705 net.go:698] Add success.
I0319 10:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:56:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:56:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:56:53.409779  543705 cpu.go:275] no items to output this cycle
I0319 10:56:53.409780  543705 memory.go:184] no items to output this cycle
E0319 10:57:03.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:57:03.409909  543705 memory.go:184] no items to output this cycle
I0319 10:57:03.409886  543705 cpu.go:275] no items to output this cycle
E0319 10:57:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:57:13.409781  543705 memory.go:191] Add success.
W0319 10:57:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 10:57:13.409813  543705 cpu.go:282] Add success.
W0319 10:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:57:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:57:13.420163  543705 net.go:648] Add success.
I0319 10:57:13.429210  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 10:57:13.429283  543705 net.go:770] primary dev: ETH0
I0319 10:57:13.429295  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:57:13.429306  543705 net.go:698] Add success.
I0319 10:57:13.453031  543705 event_worker.go:152] Polling the log file for events...
I0319 10:57:13.468473  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0094a6e6-3c34-43e6-93dc-c63dc816a11b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 10:57:13.468504  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 10:57:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:57:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 10:57:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0319 10:57:14.456006  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 10:57:14.456014  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 10:57:14.456020  543705 custom_config.go:64] query custom config with name: gpu
I0319 10:57:14.456468  543705 disk_worker.go:494] system disk:vda1
I0319 10:57:14.456494  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 10:57:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 10:57:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:57:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 10:57:16.457991  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 10:57:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:57:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:57:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:57:18.776581  543705 disk_info.go:125] begin check local disk info of client
I0319 10:57:18.778893  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:57:18.778899  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331640 0xc000331680]
E0319 10:57:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:57:23.409799  543705 memory.go:184] no items to output this cycle
I0319 10:57:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 10:57:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:57:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 10:57:33.409796  543705 memory.go:184] no items to output this cycle
I0319 10:57:37.687292  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 10:57:37.687299  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 10:57:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:57:43.410538  543705 memory.go:191] Add success.
I0319 10:57:43.409787  543705 cpu.go:282] Add success.
I0319 10:57:43.420324  543705 net.go:648] Add success.
I0319 10:57:43.423068  543705 net.go:770] primary dev: ETH0
I0319 10:57:43.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:57:43.423100  543705 net.go:698] Add success.
I0319 10:57:46.457667  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:57:46.457734  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:57:46.457759  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:57:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:57:53.409775  543705 memory.go:184] no items to output this cycle
I0319 10:57:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 10:58:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:58:03.409786  543705 memory.go:184] no items to output this cycle
I0319 10:58:03.409790  543705 cpu.go:275] no items to output this cycle
W0319 10:58:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:58:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:58:13.409728  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 10:58:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:58:13.409822  543705 memory.go:191] Add success.
I0319 10:58:13.409828  543705 cpu.go:282] Add success.
I0319 10:58:13.420080  543705 net.go:648] Add success.
I0319 10:58:13.423223  543705 net.go:770] primary dev: ETH0
I0319 10:58:13.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:58:13.423258  543705 net.go:698] Add success.
I0319 10:58:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:58:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:58:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 10:58:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:58:14.456585  543705 disk_worker.go:494] system disk:vda1
I0319 10:58:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:58:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:58:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:58:18.780537  543705 disk_info.go:125] begin check local disk info of client
I0319 10:58:18.783132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:58:18.783138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f00 0xc0000c5f40]
E0319 10:58:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:58:23.409773  543705 memory.go:184] no items to output this cycle
I0319 10:58:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 10:58:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:58:33.409811  543705 memory.go:184] no items to output this cycle
I0319 10:58:33.409825  543705 cpu.go:275] no items to output this cycle
E0319 10:58:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:58:43.409780  543705 memory.go:191] Add success.
I0319 10:58:43.409808  543705 cpu.go:282] Add success.
I0319 10:58:43.419738  543705 net.go:648] Add success.
I0319 10:58:43.422777  543705 net.go:770] primary dev: ETH0
I0319 10:58:43.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:58:43.422805  543705 net.go:698] Add success.
I0319 10:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:58:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:58:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:58:53.409794  543705 memory.go:184] no items to output this cycle
I0319 10:58:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 10:59:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:59:03.409788  543705 cpu.go:275] no items to output this cycle
I0319 10:59:03.409793  543705 memory.go:184] no items to output this cycle
E0319 10:59:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:59:13.409810  543705 memory.go:191] Add success.
I0319 10:59:13.409818  543705 cpu.go:282] Add success.
W0319 10:59:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 10:59:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 10:59:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 10:59:13.420070  543705 net.go:648] Add success.
I0319 10:59:13.422785  543705 net.go:770] primary dev: ETH0
I0319 10:59:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:59:13.422819  543705 net.go:698] Add success.
I0319 10:59:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 10:59:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 10:59:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 10:59:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0319 10:59:14.456606  543705 disk_worker.go:494] system disk:vda1
I0319 10:59:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 10:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 10:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:59:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:59:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 10:59:16.472485  543705 disk_local_worker.go:436] Get disk info: []
I0319 10:59:18.783218  543705 disk_info.go:125] begin check local disk info of client
I0319 10:59:18.785618  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 10:59:18.785624  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba40 0xc00007ba80]
E0319 10:59:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:59:23.409778  543705 cpu.go:275] no items to output this cycle
I0319 10:59:23.409780  543705 memory.go:184] no items to output this cycle
E0319 10:59:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:59:33.409808  543705 memory.go:184] no items to output this cycle
I0319 10:59:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 10:59:43.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:59:43.409916  543705 cpu.go:282] Add success.
I0319 10:59:43.409919  543705 memory.go:191] Add success.
I0319 10:59:43.419736  543705 net.go:648] Add success.
I0319 10:59:43.422405  543705 net.go:770] primary dev: ETH0
I0319 10:59:43.422420  543705 net.go:802] Send network stats successfully!,count is 6
I0319 10:59:43.422434  543705 net.go:698] Add success.
I0319 10:59:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 10:59:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 10:59:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 10:59:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 10:59:53.409761  543705 memory.go:184] no items to output this cycle
I0319 10:59:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:00:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:00:03.409806  543705 memory.go:184] no items to output this cycle
I0319 11:00:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 11:00:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:00:13.409788  543705 memory.go:191] Add success.
I0319 11:00:13.409805  543705 cpu.go:282] Add success.
W0319 11:00:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:00:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:00:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:00:13.420056  543705 net.go:648] Add success.
I0319 11:00:13.423241  543705 net.go:770] primary dev: ETH0
I0319 11:00:13.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:00:13.423270  543705 net.go:698] Add success.
I0319 11:00:13.468122  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2068d41-afed-4f9d-98ed-d5b3c6e71f7a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:00:13.468154  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:00:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:00:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:00:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 11:00:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:00:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 11:00:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:00:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:00:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:00:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:00:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:00:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:00:18.785674  543705 disk_info.go:125] begin check local disk info of client
I0319 11:00:18.788052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:00:18.788057  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f12c0 0xc0000f1300]
E0319 11:00:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:00:23.409775  543705 memory.go:184] no items to output this cycle
I0319 11:00:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 11:00:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:00:33.409917  543705 cpu.go:275] no items to output this cycle
I0319 11:00:33.409921  543705 memory.go:184] no items to output this cycle
I0319 11:00:37.688136  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:00:37.688143  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:00:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:00:43.410746  543705 memory.go:191] Add success.
I0319 11:00:43.409807  543705 cpu.go:282] Add success.
I0319 11:00:43.420474  543705 net.go:648] Add success.
I0319 11:00:43.423500  543705 net.go:770] primary dev: ETH0
I0319 11:00:43.423514  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:00:43.423529  543705 net.go:698] Add success.
I0319 11:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:00:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:00:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:00:53.409794  543705 memory.go:184] no items to output this cycle
I0319 11:00:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 11:01:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:01:03.409770  543705 memory.go:184] no items to output this cycle
I0319 11:01:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:01:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:01:13.409790  543705 memory.go:191] Add success.
I0319 11:01:13.409794  543705 cpu.go:282] Add success.
W0319 11:01:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:01:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:01:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:01:13.420037  543705 net.go:648] Add success.
I0319 11:01:13.422770  543705 net.go:770] primary dev: ETH0
I0319 11:01:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:01:13.422796  543705 net.go:698] Add success.
I0319 11:01:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:01:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:01:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0319 11:01:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:01:14.456591  543705 disk_worker.go:494] system disk:vda1
I0319 11:01:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:01:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:01:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:01:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:01:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:01:18.789578  543705 disk_info.go:125] begin check local disk info of client
I0319 11:01:18.791849  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:01:18.791856  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034fa00 0xc00034fa40]
E0319 11:01:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:01:23.409775  543705 memory.go:184] no items to output this cycle
I0319 11:01:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 11:01:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:01:33.409818  543705 memory.go:184] no items to output this cycle
I0319 11:01:33.409831  543705 cpu.go:275] no items to output this cycle
E0319 11:01:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:01:43.409784  543705 memory.go:191] Add success.
I0319 11:01:43.409808  543705 cpu.go:282] Add success.
I0319 11:01:43.420022  543705 net.go:648] Add success.
I0319 11:01:43.422745  543705 net.go:770] primary dev: ETH0
I0319 11:01:43.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:01:43.422773  543705 net.go:698] Add success.
I0319 11:01:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:01:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:01:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:01:53.410272  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:01:53.410289  543705 memory.go:184] no items to output this cycle
I0319 11:01:53.410293  543705 cpu.go:275] no items to output this cycle
E0319 11:02:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:02:03.409789  543705 memory.go:184] no items to output this cycle
I0319 11:02:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 11:02:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:02:13.409785  543705 memory.go:191] Add success.
I0319 11:02:13.409801  543705 cpu.go:282] Add success.
W0319 11:02:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:02:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:02:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:02:13.420344  543705 net.go:648] Add success.
I0319 11:02:13.422920  543705 net.go:770] primary dev: ETH0
I0319 11:02:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:02:13.422949  543705 net.go:698] Add success.
W0319 11:02:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:02:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 11:02:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:02:14.456797  543705 disk_worker.go:494] system disk:vda1
I0319 11:02:14.456836  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:02:14.457116  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:02:14.457124  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:02:14.457128  543705 custom_config.go:64] query custom config with name: gpu
E0319 11:02:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:02:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:02:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:02:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:02:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:02:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:02:16.472318  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:02:18.791940  543705 disk_info.go:125] begin check local disk info of client
I0319 11:02:18.794416  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:02:18.794424  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487680 0xc0004876c0]
E0319 11:02:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:02:23.409764  543705 memory.go:184] no items to output this cycle
I0319 11:02:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:02:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:02:33.409791  543705 memory.go:184] no items to output this cycle
I0319 11:02:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:02:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:02:43.409781  543705 memory.go:191] Add success.
I0319 11:02:43.409802  543705 cpu.go:282] Add success.
I0319 11:02:43.419872  543705 net.go:648] Add success.
I0319 11:02:43.422531  543705 net.go:770] primary dev: ETH0
I0319 11:02:43.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:02:43.422555  543705 net.go:698] Add success.
I0319 11:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:02:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:02:53.410217  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:02:53.410238  543705 memory.go:184] no items to output this cycle
I0319 11:02:53.410248  543705 cpu.go:275] no items to output this cycle
E0319 11:03:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:03:03.409798  543705 memory.go:184] no items to output this cycle
I0319 11:03:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 11:03:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:03:13.409799  543705 memory.go:191] Add success.
I0319 11:03:13.409803  543705 cpu.go:282] Add success.
W0319 11:03:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:03:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:03:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:03:13.420194  543705 net.go:648] Add success.
I0319 11:03:13.422838  543705 net.go:770] primary dev: ETH0
I0319 11:03:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:03:13.422863  543705 net.go:698] Add success.
I0319 11:03:13.469380  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3285e59e-e48d-4aaf-bfcc-db30d58822dc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:03:13.469414  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:03:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:03:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 11:03:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:03:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 11:03:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:03:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:03:16.472430  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:03:18.794515  543705 disk_info.go:125] begin check local disk info of client
I0319 11:03:18.796958  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:03:18.796965  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c000 0xc00035c040]
E0319 11:03:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:03:23.409806  543705 memory.go:184] no items to output this cycle
I0319 11:03:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 11:03:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:03:33.409786  543705 memory.go:184] no items to output this cycle
I0319 11:03:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 11:03:37.688279  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:03:37.688286  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:03:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:03:43.410676  543705 memory.go:191] Add success.
I0319 11:03:43.409815  543705 cpu.go:282] Add success.
I0319 11:03:43.420450  543705 net.go:648] Add success.
I0319 11:03:43.423372  543705 net.go:770] primary dev: ETH0
I0319 11:03:43.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:03:43.423398  543705 net.go:698] Add success.
I0319 11:03:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:03:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:03:53.409789  543705 cpu.go:275] no items to output this cycle
I0319 11:03:53.409791  543705 memory.go:184] no items to output this cycle
E0319 11:04:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:04:03.409789  543705 memory.go:184] no items to output this cycle
I0319 11:04:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:04:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:04:13.409815  543705 memory.go:191] Add success.
I0319 11:04:13.409824  543705 cpu.go:282] Add success.
W0319 11:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:04:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:04:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:04:13.420129  543705 net.go:648] Add success.
I0319 11:04:13.423061  543705 net.go:770] primary dev: ETH0
I0319 11:04:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:04:13.423090  543705 net.go:698] Add success.
I0319 11:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:04:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:04:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 11:04:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:04:14.456567  543705 disk_worker.go:494] system disk:vda1
I0319 11:04:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:04:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:04:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:04:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:04:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:04:16.472091  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:04:18.797676  543705 disk_info.go:125] begin check local disk info of client
I0319 11:04:18.800146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:04:18.800151  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a380 0xc00048a3c0]
E0319 11:04:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:04:23.409782  543705 memory.go:184] no items to output this cycle
I0319 11:04:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:04:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:04:33.409783  543705 memory.go:184] no items to output this cycle
I0319 11:04:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 11:04:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:04:43.409780  543705 memory.go:191] Add success.
I0319 11:04:43.409800  543705 cpu.go:282] Add success.
I0319 11:04:43.419960  543705 net.go:648] Add success.
I0319 11:04:43.422962  543705 net.go:770] primary dev: ETH0
I0319 11:04:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:04:43.422990  543705 net.go:698] Add success.
I0319 11:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:04:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:04:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:04:53.409767  543705 memory.go:184] no items to output this cycle
I0319 11:04:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 11:05:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:05:03.409811  543705 memory.go:184] no items to output this cycle
I0319 11:05:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 11:05:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:05:13.409790  543705 memory.go:191] Add success.
I0319 11:05:13.409797  543705 cpu.go:282] Add success.
W0319 11:05:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:05:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:05:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:05:13.420201  543705 net.go:648] Add success.
I0319 11:05:13.423248  543705 net.go:770] primary dev: ETH0
I0319 11:05:13.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:05:13.423273  543705 net.go:698] Add success.
I0319 11:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:05:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:05:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0319 11:05:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:05:14.456477  543705 disk_worker.go:494] system disk:vda1
I0319 11:05:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:05:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:05:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:05:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:05:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:05:16.472477  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:05:18.800234  543705 disk_info.go:125] begin check local disk info of client
I0319 11:05:18.802715  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:05:18.802722  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a86c0 0xc0004a8700]
E0319 11:05:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:05:23.409799  543705 memory.go:184] no items to output this cycle
I0319 11:05:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 11:05:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:05:33.409782  543705 memory.go:184] no items to output this cycle
I0319 11:05:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 11:05:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:05:43.409792  543705 memory.go:191] Add success.
I0319 11:05:43.409796  543705 cpu.go:282] Add success.
I0319 11:05:43.419967  543705 net.go:648] Add success.
I0319 11:05:43.423038  543705 net.go:770] primary dev: ETH0
I0319 11:05:43.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:05:43.423062  543705 net.go:698] Add success.
I0319 11:05:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:05:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:05:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:05:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:05:53.409766  543705 memory.go:184] no items to output this cycle
I0319 11:05:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:06:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:06:03.409785  543705 memory.go:184] no items to output this cycle
I0319 11:06:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 11:06:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:06:13.409792  543705 memory.go:191] Add success.
I0319 11:06:13.409794  543705 cpu.go:282] Add success.
W0319 11:06:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:06:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:06:13.420070  543705 net.go:648] Add success.
I0319 11:06:13.422652  543705 net.go:770] primary dev: ETH0
I0319 11:06:13.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:06:13.422681  543705 net.go:698] Add success.
I0319 11:06:13.468454  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c08612f-1af6-4b1b-90dc-586544af2d54","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:06:13.468488  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:06:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:06:14.455262  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:06:14.455355  543705 disk_worker.go:708] disk space is not compliant
W0319 11:06:14.455359  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:06:14.457477  543705 disk_worker.go:494] system disk:vda1
I0319 11:06:14.457525  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:06:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:06:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:06:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:06:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:06:18.803734  543705 disk_info.go:125] begin check local disk info of client
I0319 11:06:18.806141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:06:18.806147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d65c0 0xc0004d6600]
E0319 11:06:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:06:23.409777  543705 memory.go:184] no items to output this cycle
I0319 11:06:23.409780  543705 cpu.go:275] no items to output this cycle
E0319 11:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:06:33.409795  543705 memory.go:184] no items to output this cycle
I0319 11:06:33.409802  543705 cpu.go:275] no items to output this cycle
I0319 11:06:37.689131  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:06:37.689138  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:06:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:06:43.410742  543705 memory.go:191] Add success.
I0319 11:06:43.409828  543705 cpu.go:282] Add success.
I0319 11:06:43.420584  543705 net.go:648] Add success.
I0319 11:06:43.423714  543705 net.go:770] primary dev: ETH0
I0319 11:06:43.423727  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:06:43.423740  543705 net.go:698] Add success.
I0319 11:06:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:06:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:06:53.409802  543705 cpu.go:275] no items to output this cycle
I0319 11:06:53.409805  543705 memory.go:184] no items to output this cycle
E0319 11:07:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:07:03.409814  543705 memory.go:184] no items to output this cycle
I0319 11:07:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 11:07:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:07:13.409795  543705 memory.go:191] Add success.
I0319 11:07:13.409823  543705 cpu.go:282] Add success.
W0319 11:07:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:07:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:07:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:07:13.420071  543705 net.go:648] Add success.
I0319 11:07:13.423123  543705 net.go:770] primary dev: ETH0
I0319 11:07:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:07:13.423151  543705 net.go:698] Add success.
I0319 11:07:13.453668  543705 event_worker.go:152] Polling the log file for events...
W0319 11:07:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:07:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 11:07:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:07:14.456158  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:07:14.456168  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:07:14.456174  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:07:14.456449  543705 disk_worker.go:494] system disk:vda1
I0319 11:07:14.456506  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:07:15.456794  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:07:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:07:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:07:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:07:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:07:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:07:16.472342  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:07:18.807669  543705 disk_info.go:125] begin check local disk info of client
I0319 11:07:18.810041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:07:18.810047  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c140 0xc00039c180]
E0319 11:07:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:07:23.409786  543705 memory.go:184] no items to output this cycle
I0319 11:07:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 11:07:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:07:33.409794  543705 memory.go:184] no items to output this cycle
I0319 11:07:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 11:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:07:43.409794  543705 memory.go:191] Add success.
I0319 11:07:43.409817  543705 cpu.go:282] Add success.
I0319 11:07:43.419902  543705 net.go:648] Add success.
I0319 11:07:43.422962  543705 net.go:770] primary dev: ETH0
I0319 11:07:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:07:43.422988  543705 net.go:698] Add success.
I0319 11:07:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:07:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:07:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:07:53.409778  543705 memory.go:184] no items to output this cycle
I0319 11:07:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 11:08:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:08:03.409782  543705 memory.go:184] no items to output this cycle
I0319 11:08:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 11:08:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:08:13.409830  543705 memory.go:191] Add success.
I0319 11:08:13.409851  543705 cpu.go:282] Add success.
W0319 11:08:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:08:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:08:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:08:13.420550  543705 net.go:648] Add success.
I0319 11:08:13.423540  543705 net.go:770] primary dev: ETH0
I0319 11:08:13.423555  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:08:13.423571  543705 net.go:698] Add success.
I0319 11:08:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:08:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:08:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 11:08:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:08:14.456592  543705 disk_worker.go:494] system disk:vda1
I0319 11:08:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:08:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:08:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:08:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:08:18.811752  543705 disk_info.go:125] begin check local disk info of client
I0319 11:08:18.814163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:08:18.814169  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a0c0 0xc00039a100]
E0319 11:08:23.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:08:23.410256  543705 memory.go:184] no items to output this cycle
I0319 11:08:23.410267  543705 cpu.go:275] no items to output this cycle
E0319 11:08:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:08:33.409780  543705 memory.go:184] no items to output this cycle
I0319 11:08:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 11:08:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:08:43.409823  543705 memory.go:191] Add success.
I0319 11:08:43.409825  543705 cpu.go:282] Add success.
I0319 11:08:43.420027  543705 net.go:648] Add success.
I0319 11:08:43.422853  543705 net.go:770] primary dev: ETH0
I0319 11:08:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:08:43.422879  543705 net.go:698] Add success.
I0319 11:08:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:08:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:08:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:08:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:08:53.409794  543705 memory.go:184] no items to output this cycle
I0319 11:08:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 11:09:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:09:03.409779  543705 memory.go:184] no items to output this cycle
I0319 11:09:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 11:09:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:09:13.409789  543705 memory.go:191] Add success.
I0319 11:09:13.409808  543705 cpu.go:282] Add success.
W0319 11:09:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:09:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:09:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:09:13.420163  543705 net.go:648] Add success.
I0319 11:09:13.422959  543705 net.go:770] primary dev: ETH0
I0319 11:09:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:09:13.422998  543705 net.go:698] Add success.
I0319 11:09:13.470471  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9f623839-1cc3-48bd-bf1a-14c090ca4b5c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:09:13.470503  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:09:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:09:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0319 11:09:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:09:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 11:09:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:09:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:09:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:09:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:09:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:09:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:09:18.815778  543705 disk_info.go:125] begin check local disk info of client
I0319 11:09:18.818187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:09:18.818193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa100 0xc0001aa140]
E0319 11:09:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:09:23.409773  543705 memory.go:184] no items to output this cycle
I0319 11:09:23.409777  543705 cpu.go:275] no items to output this cycle
E0319 11:09:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:09:33.409782  543705 memory.go:184] no items to output this cycle
I0319 11:09:33.409791  543705 cpu.go:275] no items to output this cycle
I0319 11:09:37.689728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:09:37.689734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:09:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:09:43.410822  543705 memory.go:191] Add success.
I0319 11:09:43.409817  543705 cpu.go:282] Add success.
I0319 11:09:43.420615  543705 net.go:648] Add success.
I0319 11:09:43.423335  543705 net.go:770] primary dev: ETH0
I0319 11:09:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:09:43.423369  543705 net.go:698] Add success.
I0319 11:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:09:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:09:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:09:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:09:53.409766  543705 memory.go:184] no items to output this cycle
I0319 11:09:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 11:10:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:10:03.409807  543705 memory.go:184] no items to output this cycle
I0319 11:10:03.409825  543705 cpu.go:275] no items to output this cycle
E0319 11:10:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:10:13.409787  543705 memory.go:191] Add success.
I0319 11:10:13.409808  543705 cpu.go:282] Add success.
W0319 11:10:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:10:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:10:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:10:13.420115  543705 net.go:648] Add success.
I0319 11:10:13.423052  543705 net.go:770] primary dev: ETH0
I0319 11:10:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:10:13.423086  543705 net.go:698] Add success.
I0319 11:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:10:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:10:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 11:10:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:10:14.456584  543705 disk_worker.go:494] system disk:vda1
I0319 11:10:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:10:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:10:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:10:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:10:16.472431  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:10:18.819730  543705 disk_info.go:125] begin check local disk info of client
I0319 11:10:18.822110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:10:18.822116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344080 0xc0003440c0]
E0319 11:10:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:10:23.409778  543705 memory.go:184] no items to output this cycle
I0319 11:10:23.409783  543705 cpu.go:275] no items to output this cycle
E0319 11:10:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:10:33.409777  543705 memory.go:184] no items to output this cycle
I0319 11:10:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 11:10:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:10:43.409813  543705 memory.go:191] Add success.
I0319 11:10:43.409820  543705 cpu.go:282] Add success.
I0319 11:10:43.419988  543705 net.go:648] Add success.
I0319 11:10:43.423006  543705 net.go:770] primary dev: ETH0
I0319 11:10:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:10:43.423032  543705 net.go:698] Add success.
I0319 11:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:10:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:10:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:10:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:10:53.409799  543705 memory.go:184] no items to output this cycle
I0319 11:10:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 11:11:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:11:03.409804  543705 memory.go:184] no items to output this cycle
I0319 11:11:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 11:11:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:11:13.409813  543705 memory.go:191] Add success.
I0319 11:11:13.409815  543705 cpu.go:282] Add success.
W0319 11:11:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:11:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:11:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:11:13.420143  543705 net.go:648] Add success.
I0319 11:11:13.422838  543705 net.go:770] primary dev: ETH0
I0319 11:11:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:11:13.422865  543705 net.go:698] Add success.
I0319 11:11:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:11:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:11:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0319 11:11:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:11:14.456602  543705 disk_worker.go:494] system disk:vda1
I0319 11:11:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:11:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:11:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:11:18.822190  543705 disk_info.go:125] begin check local disk info of client
I0319 11:11:18.824614  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:11:18.824621  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466980 0xc0004669c0]
E0319 11:11:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:11:23.409766  543705 memory.go:184] no items to output this cycle
I0319 11:11:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 11:11:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:11:33.409791  543705 memory.go:184] no items to output this cycle
I0319 11:11:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:11:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:11:43.409787  543705 memory.go:191] Add success.
I0319 11:11:43.409788  543705 cpu.go:282] Add success.
I0319 11:11:43.419972  543705 net.go:648] Add success.
I0319 11:11:43.423232  543705 net.go:770] primary dev: ETH0
I0319 11:11:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:11:43.423258  543705 net.go:698] Add success.
I0319 11:11:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:11:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:11:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:11:53.409807  543705 memory.go:184] no items to output this cycle
I0319 11:11:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 11:12:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:12:03.409781  543705 memory.go:184] no items to output this cycle
I0319 11:12:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 11:12:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:12:13.409818  543705 memory.go:191] Add success.
I0319 11:12:13.409822  543705 cpu.go:282] Add success.
W0319 11:12:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:12:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:12:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:12:13.420140  543705 net.go:648] Add success.
I0319 11:12:13.422862  543705 net.go:770] primary dev: ETH0
I0319 11:12:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:12:13.422888  543705 net.go:698] Add success.
I0319 11:12:13.464467  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"16f672e7-153f-4ae6-9aa2-bfc65b266d66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:12:13.464503  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 11:12:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:12:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 11:12:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:12:14.456830  543705 disk_worker.go:494] system disk:vda1
I0319 11:12:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:12:14.457076  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:12:14.457084  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:12:14.457088  543705 custom_config.go:64] query custom config with name: gpu
E0319 11:12:15.456887  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:12:15.456895  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:12:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:12:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:12:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:12:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:12:16.472334  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:12:18.825673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:12:18.827966  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:12:18.827972  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ea0c0 0xc0000ea100]
E0319 11:12:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:12:23.409768  543705 memory.go:184] no items to output this cycle
I0319 11:12:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 11:12:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:12:33.409777  543705 memory.go:184] no items to output this cycle
I0319 11:12:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 11:12:37.691145  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:12:37.691152  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:12:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:12:43.410613  543705 memory.go:191] Add success.
I0319 11:12:43.409797  543705 cpu.go:282] Add success.
I0319 11:12:43.420419  543705 net.go:648] Add success.
I0319 11:12:43.423496  543705 net.go:770] primary dev: ETH0
I0319 11:12:43.423509  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:12:43.423521  543705 net.go:698] Add success.
I0319 11:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:12:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:12:53.409782  543705 memory.go:184] no items to output this cycle
I0319 11:12:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 11:13:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:13:03.409784  543705 memory.go:184] no items to output this cycle
I0319 11:13:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 11:13:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:13:13.409819  543705 memory.go:191] Add success.
I0319 11:13:13.409820  543705 cpu.go:282] Add success.
W0319 11:13:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:13:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:13:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:13:13.420160  543705 net.go:648] Add success.
I0319 11:13:13.423193  543705 net.go:770] primary dev: ETH0
I0319 11:13:13.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:13:13.423218  543705 net.go:698] Add success.
I0319 11:13:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:13:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:13:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 11:13:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:13:14.456521  543705 disk_worker.go:494] system disk:vda1
I0319 11:13:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:13:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:13:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:13:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:13:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:13:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:13:18.829672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:13:18.831993  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:13:18.831999  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c100 0xc00048c140]
E0319 11:13:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:13:23.409767  543705 memory.go:184] no items to output this cycle
I0319 11:13:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 11:13:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:13:33.409812  543705 memory.go:184] no items to output this cycle
I0319 11:13:33.409826  543705 cpu.go:275] no items to output this cycle
E0319 11:13:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:13:43.409813  543705 memory.go:191] Add success.
I0319 11:13:43.409819  543705 cpu.go:282] Add success.
I0319 11:13:43.419984  543705 net.go:648] Add success.
I0319 11:13:43.423024  543705 net.go:770] primary dev: ETH0
I0319 11:13:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:13:43.423049  543705 net.go:698] Add success.
I0319 11:13:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:13:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:13:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:13:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:13:53.409770  543705 memory.go:184] no items to output this cycle
I0319 11:13:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 11:14:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:14:03.409810  543705 memory.go:184] no items to output this cycle
I0319 11:14:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 11:14:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:14:13.409817  543705 memory.go:191] Add success.
I0319 11:14:13.409823  543705 cpu.go:282] Add success.
W0319 11:14:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:14:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:14:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:14:13.420100  543705 net.go:648] Add success.
I0319 11:14:13.423092  543705 net.go:770] primary dev: ETH0
I0319 11:14:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:14:13.423116  543705 net.go:698] Add success.
I0319 11:14:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:14:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:14:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 11:14:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:14:14.456584  543705 disk_worker.go:494] system disk:vda1
I0319 11:14:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:14:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:14:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:14:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:14:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:14:18.833673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:14:18.836047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:14:18.836053  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a100 0xc00048a140]
E0319 11:14:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:14:23.409788  543705 memory.go:184] no items to output this cycle
I0319 11:14:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 11:14:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:14:33.409783  543705 memory.go:184] no items to output this cycle
I0319 11:14:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 11:14:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:14:43.409812  543705 memory.go:191] Add success.
I0319 11:14:43.409825  543705 cpu.go:282] Add success.
I0319 11:14:43.419993  543705 net.go:648] Add success.
I0319 11:14:43.422949  543705 net.go:770] primary dev: ETH0
I0319 11:14:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:14:43.422979  543705 net.go:698] Add success.
I0319 11:14:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:14:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:14:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:14:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:14:53.409776  543705 cpu.go:275] no items to output this cycle
I0319 11:14:53.409779  543705 memory.go:184] no items to output this cycle
E0319 11:15:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:15:03.409810  543705 memory.go:184] no items to output this cycle
I0319 11:15:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 11:15:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:15:13.409791  543705 memory.go:191] Add success.
I0319 11:15:13.409792  543705 cpu.go:282] Add success.
W0319 11:15:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:15:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:15:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:15:13.420131  543705 net.go:648] Add success.
I0319 11:15:13.422699  543705 net.go:770] primary dev: ETH0
I0319 11:15:13.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:15:13.422725  543705 net.go:698] Add success.
I0319 11:15:13.469125  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44be6a60-5929-4f21-8e12-cf90924580ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:15:13.469159  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:15:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:15:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0319 11:15:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:15:14.456630  543705 disk_worker.go:494] system disk:vda1
I0319 11:15:14.456661  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:15:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:15:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:15:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:15:18.837674  543705 disk_info.go:125] begin check local disk info of client
I0319 11:15:18.840041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:15:18.840047  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296000 0xc000296040]
E0319 11:15:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:15:23.409760  543705 memory.go:184] no items to output this cycle
I0319 11:15:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 11:15:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:15:33.409768  543705 memory.go:184] no items to output this cycle
I0319 11:15:33.409803  543705 cpu.go:275] no items to output this cycle
I0319 11:15:37.692144  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:15:37.692151  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:15:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:15:43.410635  543705 memory.go:191] Add success.
I0319 11:15:43.409787  543705 cpu.go:282] Add success.
I0319 11:15:43.420430  543705 net.go:648] Add success.
I0319 11:15:43.422948  543705 net.go:770] primary dev: ETH0
I0319 11:15:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:15:43.422978  543705 net.go:698] Add success.
I0319 11:15:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:15:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:15:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:15:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:15:53.409798  543705 memory.go:184] no items to output this cycle
I0319 11:15:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 11:16:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:16:03.409783  543705 cpu.go:275] no items to output this cycle
I0319 11:16:03.409790  543705 memory.go:184] no items to output this cycle
E0319 11:16:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:16:13.409795  543705 memory.go:191] Add success.
I0319 11:16:13.409799  543705 cpu.go:282] Add success.
W0319 11:16:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:16:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:16:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:16:13.420222  543705 net.go:648] Add success.
I0319 11:16:13.423228  543705 net.go:770] primary dev: ETH0
I0319 11:16:13.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:16:13.423255  543705 net.go:698] Add success.
I0319 11:16:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:16:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:16:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 11:16:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:16:14.459221  543705 disk_worker.go:494] system disk:vda1
I0319 11:16:14.459253  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:16:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:16:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:16:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:16:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:16:18.841673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:16:18.844128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:16:18.844134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee100 0xc0003ee140]
E0319 11:16:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:16:23.409774  543705 cpu.go:275] no items to output this cycle
I0319 11:16:23.409777  543705 memory.go:184] no items to output this cycle
E0319 11:16:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:16:33.409779  543705 memory.go:184] no items to output this cycle
I0319 11:16:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 11:16:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:16:43.409783  543705 memory.go:191] Add success.
I0319 11:16:43.409806  543705 cpu.go:282] Add success.
I0319 11:16:43.419840  543705 net.go:648] Add success.
I0319 11:16:43.422574  543705 net.go:770] primary dev: ETH0
I0319 11:16:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:16:43.422601  543705 net.go:698] Add success.
I0319 11:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:16:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:16:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:16:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:16:53.409783  543705 memory.go:184] no items to output this cycle
I0319 11:16:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 11:17:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:17:03.409786  543705 memory.go:184] no items to output this cycle
I0319 11:17:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 11:17:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:17:13.409825  543705 memory.go:191] Add success.
I0319 11:17:13.409830  543705 cpu.go:282] Add success.
W0319 11:17:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:17:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:17:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:17:13.420209  543705 net.go:648] Add success.
I0319 11:17:13.422819  543705 net.go:770] primary dev: ETH0
I0319 11:17:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:17:13.422844  543705 net.go:698] Add success.
I0319 11:17:13.453375  543705 event_worker.go:152] Polling the log file for events...
W0319 11:17:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:17:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 11:17:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:17:14.455881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:17:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:17:14.455896  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:17:14.456786  543705 disk_worker.go:494] system disk:vda1
I0319 11:17:14.456881  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:17:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:17:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:17:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:17:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:17:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:17:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:17:16.472327  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:17:18.845670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:17:18.848035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:17:18.848041  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003eefc0 0xc0003ef000]
E0319 11:17:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:17:23.409780  543705 memory.go:184] no items to output this cycle
I0319 11:17:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 11:17:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:17:33.409812  543705 memory.go:184] no items to output this cycle
I0319 11:17:33.409823  543705 cpu.go:275] no items to output this cycle
E0319 11:17:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:17:43.409790  543705 memory.go:191] Add success.
I0319 11:17:43.409810  543705 cpu.go:282] Add success.
I0319 11:17:43.419883  543705 net.go:648] Add success.
I0319 11:17:43.422645  543705 net.go:770] primary dev: ETH0
I0319 11:17:43.422660  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:17:43.422674  543705 net.go:698] Add success.
I0319 11:17:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:17:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:17:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:17:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:17:53.409786  543705 memory.go:184] no items to output this cycle
I0319 11:17:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 11:18:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:18:03.409782  543705 memory.go:184] no items to output this cycle
I0319 11:18:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 11:18:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:18:13.409789  543705 memory.go:191] Add success.
I0319 11:18:13.409807  543705 cpu.go:282] Add success.
W0319 11:18:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:18:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:18:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:18:13.420355  543705 net.go:648] Add success.
I0319 11:18:13.423370  543705 net.go:770] primary dev: ETH0
I0319 11:18:13.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:18:13.423394  543705 net.go:698] Add success.
I0319 11:18:13.469490  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"abbd50c3-cdc1-4ebb-b699-df662eea4c0d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:18:13.469524  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:18:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:18:14.455304  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:18:14.455367  543705 disk_worker.go:708] disk space is not compliant
W0319 11:18:14.455370  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:18:14.457552  543705 disk_worker.go:494] system disk:vda1
I0319 11:18:14.457580  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:18:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:18:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:18:18.849674  543705 disk_info.go:125] begin check local disk info of client
I0319 11:18:18.852046  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:18:18.852052  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dd940 0xc0004dd980]
E0319 11:18:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:18:23.409793  543705 memory.go:184] no items to output this cycle
I0319 11:18:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 11:18:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:18:33.409785  543705 memory.go:184] no items to output this cycle
I0319 11:18:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 11:18:37.692289  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:18:37.692296  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:18:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:18:43.410755  543705 memory.go:191] Add success.
I0319 11:18:43.409806  543705 cpu.go:282] Add success.
I0319 11:18:43.420430  543705 net.go:648] Add success.
I0319 11:18:43.423180  543705 net.go:770] primary dev: ETH0
I0319 11:18:43.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:18:43.423206  543705 net.go:698] Add success.
I0319 11:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:18:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:18:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:18:53.409781  543705 memory.go:184] no items to output this cycle
I0319 11:18:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 11:19:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:19:03.409814  543705 memory.go:184] no items to output this cycle
I0319 11:19:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 11:19:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:19:13.409803  543705 memory.go:191] Add success.
I0319 11:19:13.409807  543705 cpu.go:282] Add success.
W0319 11:19:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:19:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:19:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:19:13.420061  543705 net.go:648] Add success.
I0319 11:19:13.422910  543705 net.go:770] primary dev: ETH0
I0319 11:19:13.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:19:13.422942  543705 net.go:698] Add success.
I0319 11:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:19:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:19:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 11:19:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:19:14.456557  543705 disk_worker.go:494] system disk:vda1
I0319 11:19:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:19:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:19:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:19:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:19:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:19:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:19:18.853672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:19:18.856023  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:19:18.856029  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484600 0xc000484640]
E0319 11:19:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:19:23.409784  543705 memory.go:184] no items to output this cycle
I0319 11:19:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 11:19:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:19:33.409796  543705 memory.go:184] no items to output this cycle
I0319 11:19:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 11:19:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:19:43.409799  543705 memory.go:191] Add success.
I0319 11:19:43.409820  543705 cpu.go:282] Add success.
I0319 11:19:43.419958  543705 net.go:648] Add success.
I0319 11:19:43.422831  543705 net.go:770] primary dev: ETH0
I0319 11:19:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:19:43.422856  543705 net.go:698] Add success.
I0319 11:19:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:19:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:19:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:19:53.409810  543705 memory.go:184] no items to output this cycle
I0319 11:19:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 11:20:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:20:03.409788  543705 memory.go:184] no items to output this cycle
I0319 11:20:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:20:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:20:13.409783  543705 memory.go:191] Add success.
I0319 11:20:13.409809  543705 cpu.go:282] Add success.
W0319 11:20:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:20:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:20:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:20:13.420060  543705 net.go:648] Add success.
I0319 11:20:13.423051  543705 net.go:770] primary dev: ETH0
I0319 11:20:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:20:13.423081  543705 net.go:698] Add success.
I0319 11:20:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:20:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:20:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 11:20:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:20:14.456565  543705 disk_worker.go:494] system disk:vda1
I0319 11:20:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:20:15.456022  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:20:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:20:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:20:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:20:18.857671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:20:18.860005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:20:18.860012  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc340 0xc0003dc380]
E0319 11:20:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:20:23.409765  543705 memory.go:184] no items to output this cycle
I0319 11:20:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 11:20:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:20:33.409771  543705 memory.go:184] no items to output this cycle
I0319 11:20:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 11:20:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:20:43.409815  543705 memory.go:191] Add success.
I0319 11:20:43.409825  543705 cpu.go:282] Add success.
I0319 11:20:43.419966  543705 net.go:648] Add success.
I0319 11:20:43.422961  543705 net.go:770] primary dev: ETH0
I0319 11:20:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:20:43.422986  543705 net.go:698] Add success.
I0319 11:20:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:20:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:20:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:20:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:20:53.409781  543705 memory.go:184] no items to output this cycle
I0319 11:20:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 11:21:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:21:03.409783  543705 memory.go:184] no items to output this cycle
I0319 11:21:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 11:21:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:21:13.409783  543705 memory.go:191] Add success.
I0319 11:21:13.409805  543705 cpu.go:282] Add success.
W0319 11:21:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:21:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:21:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:21:13.420052  543705 net.go:648] Add success.
I0319 11:21:13.422746  543705 net.go:770] primary dev: ETH0
I0319 11:21:13.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:21:13.422776  543705 net.go:698] Add success.
I0319 11:21:13.471296  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"52f862d9-306e-45bb-85ce-e71796e961cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:21:13.471329  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:21:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:21:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 11:21:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:21:14.456575  543705 disk_worker.go:494] system disk:vda1
I0319 11:21:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:21:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:21:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:21:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:21:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:21:18.861672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:21:18.864043  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:21:18.864049  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492ec0 0xc000492f00]
E0319 11:21:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:21:23.409773  543705 cpu.go:275] no items to output this cycle
I0319 11:21:23.409780  543705 memory.go:184] no items to output this cycle
E0319 11:21:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:21:33.409805  543705 memory.go:184] no items to output this cycle
I0319 11:21:33.409818  543705 cpu.go:275] no items to output this cycle
I0319 11:21:37.692431  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:21:37.692438  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:21:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:21:43.410627  543705 memory.go:191] Add success.
I0319 11:21:43.409809  543705 cpu.go:282] Add success.
I0319 11:21:43.420333  543705 net.go:648] Add success.
I0319 11:21:43.422910  543705 net.go:770] primary dev: ETH0
I0319 11:21:43.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:21:43.422949  543705 net.go:698] Add success.
I0319 11:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:21:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:21:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:21:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:21:53.409793  543705 memory.go:184] no items to output this cycle
I0319 11:21:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:22:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:22:03.409791  543705 memory.go:184] no items to output this cycle
I0319 11:22:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:22:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:22:13.409786  543705 memory.go:191] Add success.
I0319 11:22:13.409786  543705 cpu.go:282] Add success.
W0319 11:22:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:22:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:22:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:22:13.420152  543705 net.go:648] Add success.
I0319 11:22:13.423181  543705 net.go:770] primary dev: ETH0
I0319 11:22:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:22:13.423205  543705 net.go:698] Add success.
W0319 11:22:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:22:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 11:22:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:22:14.456807  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:22:14.456816  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:22:14.456823  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:22:14.456926  543705 disk_worker.go:494] system disk:vda1
I0319 11:22:14.456955  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:22:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:22:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:22:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:22:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:22:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:22:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:22:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:22:18.865665  543705 disk_info.go:125] begin check local disk info of client
I0319 11:22:18.867952  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:22:18.867958  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509ac0 0xc000509b00]
E0319 11:22:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:22:23.409837  543705 memory.go:184] no items to output this cycle
I0319 11:22:23.409950  543705 cpu.go:275] no items to output this cycle
E0319 11:22:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:22:33.409811  543705 memory.go:184] no items to output this cycle
I0319 11:22:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 11:22:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:22:43.409781  543705 memory.go:191] Add success.
I0319 11:22:43.409806  543705 cpu.go:282] Add success.
I0319 11:22:43.419877  543705 net.go:648] Add success.
I0319 11:22:43.422490  543705 net.go:770] primary dev: ETH0
I0319 11:22:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:22:43.422518  543705 net.go:698] Add success.
I0319 11:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:22:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:22:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:22:53.409775  543705 memory.go:184] no items to output this cycle
I0319 11:22:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 11:23:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:23:03.409782  543705 memory.go:184] no items to output this cycle
I0319 11:23:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:23:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:23:13.409817  543705 memory.go:191] Add success.
I0319 11:23:13.409824  543705 cpu.go:282] Add success.
W0319 11:23:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:23:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:23:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:23:13.420163  543705 net.go:648] Add success.
I0319 11:23:13.423121  543705 net.go:770] primary dev: ETH0
I0319 11:23:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:23:13.423145  543705 net.go:698] Add success.
I0319 11:23:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:23:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:23:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0319 11:23:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:23:14.456489  543705 disk_worker.go:494] system disk:vda1
I0319 11:23:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:23:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:23:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:23:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:23:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:23:16.472413  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:23:18.869675  543705 disk_info.go:125] begin check local disk info of client
I0319 11:23:18.872028  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:23:18.872034  543705 disk_info.go:196] parse disk info done, disk is : [0xc000345800 0xc000345840]
E0319 11:23:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:23:23.409771  543705 cpu.go:275] no items to output this cycle
I0319 11:23:23.409773  543705 memory.go:184] no items to output this cycle
E0319 11:23:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:23:33.409796  543705 memory.go:184] no items to output this cycle
I0319 11:23:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 11:23:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:23:43.409787  543705 memory.go:191] Add success.
I0319 11:23:43.409805  543705 cpu.go:282] Add success.
I0319 11:23:43.420012  543705 net.go:648] Add success.
I0319 11:23:43.422846  543705 net.go:770] primary dev: ETH0
I0319 11:23:43.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:23:43.422872  543705 net.go:698] Add success.
I0319 11:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:23:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:23:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:23:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:23:53.409777  543705 memory.go:184] no items to output this cycle
I0319 11:23:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 11:24:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:24:03.409786  543705 cpu.go:275] no items to output this cycle
I0319 11:24:03.409792  543705 memory.go:184] no items to output this cycle
E0319 11:24:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:24:13.409809  543705 memory.go:191] Add success.
I0319 11:24:13.409817  543705 cpu.go:282] Add success.
W0319 11:24:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:24:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:24:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:24:13.420230  543705 net.go:648] Add success.
I0319 11:24:13.423404  543705 net.go:770] primary dev: ETH0
I0319 11:24:13.423418  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:24:13.423430  543705 net.go:698] Add success.
I0319 11:24:13.469807  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5d0ef736-934e-4a85-ac1d-59401a082d9f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:24:13.469865  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:24:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:24:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:24:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 11:24:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:24:14.456750  543705 disk_worker.go:494] system disk:vda1
I0319 11:24:14.456778  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:24:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:24:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:24:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:24:18.873672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:24:18.876016  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:24:18.876022  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039cd80 0xc00039cdc0]
E0319 11:24:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:24:23.409795  543705 memory.go:184] no items to output this cycle
I0319 11:24:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 11:24:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:24:33.409813  543705 memory.go:184] no items to output this cycle
I0319 11:24:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 11:24:37.692583  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:24:37.692590  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:24:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:24:43.410714  543705 memory.go:191] Add success.
I0319 11:24:43.409818  543705 cpu.go:282] Add success.
I0319 11:24:43.420451  543705 net.go:648] Add success.
I0319 11:24:43.423548  543705 net.go:770] primary dev: ETH0
I0319 11:24:43.423563  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:24:43.423578  543705 net.go:698] Add success.
I0319 11:24:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:24:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:24:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:24:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:24:53.409776  543705 memory.go:184] no items to output this cycle
I0319 11:24:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 11:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:25:03.409803  543705 memory.go:184] no items to output this cycle
I0319 11:25:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 11:25:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:25:13.409793  543705 memory.go:191] Add success.
I0319 11:25:13.409817  543705 cpu.go:282] Add success.
W0319 11:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:25:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:25:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:25:13.419965  543705 net.go:770] primary dev: ETH0
I0319 11:25:13.419979  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:25:13.419991  543705 net.go:698] Add success.
I0319 11:25:13.420221  543705 net.go:648] Add success.
I0319 11:25:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:25:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:25:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 11:25:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:25:14.456475  543705 disk_worker.go:494] system disk:vda1
I0319 11:25:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:25:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:25:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:25:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:25:18.877668  543705 disk_info.go:125] begin check local disk info of client
I0319 11:25:18.880041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:25:18.880046  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396780 0xc0003967c0]
E0319 11:25:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:25:23.409776  543705 memory.go:184] no items to output this cycle
I0319 11:25:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:25:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:25:33.409810  543705 memory.go:184] no items to output this cycle
I0319 11:25:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 11:25:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:25:43.409823  543705 memory.go:191] Add success.
I0319 11:25:43.409830  543705 cpu.go:282] Add success.
I0319 11:25:43.419899  543705 net.go:648] Add success.
I0319 11:25:43.422676  543705 net.go:770] primary dev: ETH0
I0319 11:25:43.422690  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:25:43.422701  543705 net.go:698] Add success.
I0319 11:25:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:25:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:25:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:25:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:25:53.409807  543705 memory.go:184] no items to output this cycle
I0319 11:25:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 11:26:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:26:03.409790  543705 memory.go:184] no items to output this cycle
I0319 11:26:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 11:26:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:26:13.409797  543705 memory.go:191] Add success.
I0319 11:26:13.409823  543705 cpu.go:282] Add success.
W0319 11:26:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:26:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:26:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:26:13.420246  543705 net.go:648] Add success.
I0319 11:26:13.423282  543705 net.go:770] primary dev: ETH0
I0319 11:26:13.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:26:13.423309  543705 net.go:698] Add success.
I0319 11:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:26:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:26:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 11:26:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:26:14.456564  543705 disk_worker.go:494] system disk:vda1
I0319 11:26:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:26:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:26:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:26:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:26:18.881669  543705 disk_info.go:125] begin check local disk info of client
I0319 11:26:18.884018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:26:18.884024  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f2080 0xc0001f20c0]
E0319 11:26:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:26:23.409796  543705 memory.go:184] no items to output this cycle
I0319 11:26:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:26:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:26:33.409821  543705 memory.go:184] no items to output this cycle
I0319 11:26:33.409829  543705 cpu.go:275] no items to output this cycle
E0319 11:26:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:26:43.409789  543705 memory.go:191] Add success.
I0319 11:26:43.409793  543705 cpu.go:282] Add success.
I0319 11:26:43.420303  543705 net.go:648] Add success.
I0319 11:26:43.423053  543705 net.go:770] primary dev: ETH0
I0319 11:26:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:26:43.423077  543705 net.go:698] Add success.
I0319 11:26:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:26:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:26:53.409800  543705 memory.go:184] no items to output this cycle
I0319 11:26:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 11:27:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:27:03.409804  543705 memory.go:184] no items to output this cycle
I0319 11:27:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 11:27:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:27:13.409782  543705 memory.go:191] Add success.
W0319 11:27:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 11:27:13.409806  543705 cpu.go:282] Add success.
W0319 11:27:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:27:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:27:13.420110  543705 net.go:648] Add success.
I0319 11:27:13.428797  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 11:27:13.428882  543705 net.go:770] primary dev: ETH0
I0319 11:27:13.428894  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:27:13.428904  543705 net.go:698] Add success.
I0319 11:27:13.453417  543705 event_worker.go:152] Polling the log file for events...
I0319 11:27:13.468645  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe6d4fdc-b043-40be-a5bc-f5bb1f2271d8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:27:13.468677  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 11:27:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:27:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0319 11:27:14.455259  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:27:14.456100  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:27:14.456110  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:27:14.456116  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:27:14.456933  543705 disk_worker.go:494] system disk:vda1
I0319 11:27:14.456964  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:27:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:27:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:27:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:27:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:27:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:27:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:27:16.472289  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:27:18.885671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:27:18.888067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:27:18.888073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa440 0xc0002aa480]
E0319 11:27:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:27:23.409781  543705 memory.go:184] no items to output this cycle
I0319 11:27:23.409781  543705 cpu.go:275] no items to output this cycle
E0319 11:27:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:27:33.409771  543705 memory.go:184] no items to output this cycle
I0319 11:27:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 11:27:37.693159  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:27:37.693165  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:27:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:27:43.410769  543705 memory.go:191] Add success.
I0319 11:27:43.409821  543705 cpu.go:282] Add success.
I0319 11:27:43.420749  543705 net.go:648] Add success.
I0319 11:27:43.423426  543705 net.go:770] primary dev: ETH0
I0319 11:27:43.423439  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:27:43.423451  543705 net.go:698] Add success.
I0319 11:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:27:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:27:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:27:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:27:53.409803  543705 memory.go:184] no items to output this cycle
I0319 11:27:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 11:28:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:28:03.409802  543705 memory.go:184] no items to output this cycle
I0319 11:28:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 11:28:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:28:13.409778  543705 memory.go:191] Add success.
W0319 11:28:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 11:28:13.409807  543705 cpu.go:282] Add success.
W0319 11:28:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:28:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:28:13.420099  543705 net.go:648] Add success.
I0319 11:28:13.422987  543705 net.go:770] primary dev: ETH0
I0319 11:28:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:28:13.423012  543705 net.go:698] Add success.
I0319 11:28:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:28:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:28:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 11:28:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:28:14.456562  543705 disk_worker.go:494] system disk:vda1
I0319 11:28:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:28:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:28:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:28:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:28:18.889674  543705 disk_info.go:125] begin check local disk info of client
I0319 11:28:18.892008  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:28:18.892014  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf00 0xc0001aaf40]
E0319 11:28:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:28:23.409789  543705 memory.go:184] no items to output this cycle
I0319 11:28:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:28:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:28:33.409789  543705 cpu.go:275] no items to output this cycle
I0319 11:28:33.409793  543705 memory.go:184] no items to output this cycle
E0319 11:28:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:28:43.409810  543705 memory.go:191] Add success.
I0319 11:28:43.409819  543705 cpu.go:282] Add success.
I0319 11:28:43.420039  543705 net.go:648] Add success.
I0319 11:28:43.422833  543705 net.go:770] primary dev: ETH0
I0319 11:28:43.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:28:43.422858  543705 net.go:698] Add success.
I0319 11:28:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:28:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:28:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:28:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:28:53.409787  543705 memory.go:184] no items to output this cycle
I0319 11:28:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 11:29:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:29:03.409785  543705 memory.go:184] no items to output this cycle
I0319 11:29:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 11:29:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:29:13.409806  543705 memory.go:191] Add success.
I0319 11:29:13.409820  543705 cpu.go:282] Add success.
W0319 11:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:29:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:29:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:29:13.420314  543705 net.go:648] Add success.
I0319 11:29:13.422853  543705 net.go:770] primary dev: ETH0
I0319 11:29:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:29:13.422879  543705 net.go:698] Add success.
I0319 11:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:29:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:29:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0319 11:29:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:29:14.456593  543705 disk_worker.go:494] system disk:vda1
I0319 11:29:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:29:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:29:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:29:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:29:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:29:16.472457  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:29:18.893670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:29:18.896064  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:29:18.896071  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0319 11:29:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:29:23.409771  543705 cpu.go:275] no items to output this cycle
I0319 11:29:23.409776  543705 memory.go:184] no items to output this cycle
E0319 11:29:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:29:33.409802  543705 memory.go:184] no items to output this cycle
I0319 11:29:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 11:29:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:29:43.409828  543705 memory.go:191] Add success.
I0319 11:29:43.409830  543705 cpu.go:282] Add success.
I0319 11:29:43.419973  543705 net.go:648] Add success.
I0319 11:29:43.423111  543705 net.go:770] primary dev: ETH0
I0319 11:29:43.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:29:43.423136  543705 net.go:698] Add success.
I0319 11:29:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:29:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:29:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:29:53.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:29:53.409909  543705 cpu.go:275] no items to output this cycle
I0319 11:29:53.409938  543705 memory.go:184] no items to output this cycle
E0319 11:30:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:30:03.409814  543705 memory.go:184] no items to output this cycle
I0319 11:30:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 11:30:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:30:13.409787  543705 memory.go:191] Add success.
I0319 11:30:13.409788  543705 cpu.go:282] Add success.
W0319 11:30:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:30:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:30:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:30:13.420331  543705 net.go:648] Add success.
I0319 11:30:13.423192  543705 net.go:770] primary dev: ETH0
I0319 11:30:13.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:30:13.423218  543705 net.go:698] Add success.
I0319 11:30:13.469771  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"922b6db6-8fed-4ab9-87f9-3d27ded68eac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:30:13.469819  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:30:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:30:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:30:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 11:30:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:30:14.456661  543705 disk_worker.go:494] system disk:vda1
I0319 11:30:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:30:15.455604  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:30:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:30:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:30:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:30:18.897670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:30:18.900118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:30:18.900125  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bc80 0xc00007bcc0]
E0319 11:30:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:30:23.409774  543705 cpu.go:275] no items to output this cycle
I0319 11:30:23.409784  543705 memory.go:184] no items to output this cycle
E0319 11:30:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:30:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 11:30:33.409794  543705 memory.go:184] no items to output this cycle
I0319 11:30:37.693736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:30:37.693743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:30:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:30:43.410629  543705 memory.go:191] Add success.
I0319 11:30:43.409826  543705 cpu.go:282] Add success.
I0319 11:30:43.420323  543705 net.go:648] Add success.
I0319 11:30:43.423187  543705 net.go:770] primary dev: ETH0
I0319 11:30:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:30:43.423213  543705 net.go:698] Add success.
I0319 11:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:30:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:30:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:30:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:30:53.409777  543705 memory.go:184] no items to output this cycle
I0319 11:30:53.409890  543705 cpu.go:275] no items to output this cycle
E0319 11:31:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:31:03.409766  543705 memory.go:184] no items to output this cycle
I0319 11:31:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 11:31:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:31:13.409808  543705 memory.go:191] Add success.
I0319 11:31:13.409813  543705 cpu.go:282] Add success.
W0319 11:31:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:31:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:31:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:31:13.420067  543705 net.go:648] Add success.
I0319 11:31:13.422771  543705 net.go:770] primary dev: ETH0
I0319 11:31:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:31:13.422800  543705 net.go:698] Add success.
I0319 11:31:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:31:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:31:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 11:31:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:31:14.456581  543705 disk_worker.go:494] system disk:vda1
I0319 11:31:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:31:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:31:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:31:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:31:16.472430  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:31:18.901681  543705 disk_info.go:125] begin check local disk info of client
I0319 11:31:18.904029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:31:18.904035  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508700 0xc000508740]
E0319 11:31:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:31:23.409791  543705 memory.go:184] no items to output this cycle
I0319 11:31:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:31:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:31:33.409781  543705 memory.go:184] no items to output this cycle
I0319 11:31:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:31:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:31:43.409808  543705 memory.go:191] Add success.
I0319 11:31:43.409818  543705 cpu.go:282] Add success.
I0319 11:31:43.419955  543705 net.go:648] Add success.
I0319 11:31:43.422997  543705 net.go:770] primary dev: ETH0
I0319 11:31:43.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:31:43.423030  543705 net.go:698] Add success.
I0319 11:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:31:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:31:53.409784  543705 cpu.go:275] no items to output this cycle
I0319 11:31:53.409790  543705 memory.go:184] no items to output this cycle
E0319 11:32:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:32:03.409790  543705 memory.go:184] no items to output this cycle
I0319 11:32:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 11:32:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:32:13.409793  543705 memory.go:191] Add success.
I0319 11:32:13.409794  543705 cpu.go:282] Add success.
W0319 11:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:32:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:32:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:32:13.420211  543705 net.go:648] Add success.
I0319 11:32:13.422897  543705 net.go:770] primary dev: ETH0
I0319 11:32:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:32:13.422922  543705 net.go:698] Add success.
W0319 11:32:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:32:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 11:32:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:32:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:32:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:32:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:32:14.456550  543705 disk_worker.go:494] system disk:vda1
I0319 11:32:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:32:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:32:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:32:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:32:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:32:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:32:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:32:16.472349  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:32:18.905673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:32:18.907997  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:32:18.908003  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b50c0 0xc0002b5100]
E0319 11:32:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:32:23.409761  543705 memory.go:184] no items to output this cycle
I0319 11:32:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:32:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:32:33.409778  543705 memory.go:184] no items to output this cycle
I0319 11:32:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:32:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:32:43.409779  543705 memory.go:191] Add success.
I0319 11:32:43.409800  543705 cpu.go:282] Add success.
I0319 11:32:43.419859  543705 net.go:648] Add success.
I0319 11:32:43.422546  543705 net.go:770] primary dev: ETH0
I0319 11:32:43.422559  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:32:43.422573  543705 net.go:698] Add success.
I0319 11:32:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:32:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:32:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:32:53.410273  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:32:53.410303  543705 memory.go:184] no items to output this cycle
I0319 11:32:53.410303  543705 cpu.go:275] no items to output this cycle
E0319 11:33:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:33:03.409791  543705 memory.go:184] no items to output this cycle
I0319 11:33:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:33:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:33:13.409793  543705 memory.go:191] Add success.
I0319 11:33:13.409798  543705 cpu.go:282] Add success.
W0319 11:33:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:33:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:33:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:33:13.420200  543705 net.go:648] Add success.
I0319 11:33:13.423058  543705 net.go:770] primary dev: ETH0
I0319 11:33:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:33:13.423087  543705 net.go:698] Add success.
I0319 11:33:13.469173  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dec46e48-f5a5-41b7-8f27-eb876005629d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:33:13.469210  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:33:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:33:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0319 11:33:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:33:14.456782  543705 disk_worker.go:494] system disk:vda1
I0319 11:33:14.456810  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:33:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:33:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:33:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:33:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:33:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:33:18.909671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:33:18.912013  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:33:18.912018  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466800 0xc000466840]
E0319 11:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:33:23.409774  543705 cpu.go:275] no items to output this cycle
I0319 11:33:23.409779  543705 memory.go:184] no items to output this cycle
E0319 11:33:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:33:33.409778  543705 memory.go:184] no items to output this cycle
I0319 11:33:33.409807  543705 cpu.go:275] no items to output this cycle
I0319 11:33:37.695162  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:33:37.695169  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:33:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:33:43.410837  543705 memory.go:191] Add success.
I0319 11:33:43.409817  543705 cpu.go:282] Add success.
I0319 11:33:43.420539  543705 net.go:648] Add success.
I0319 11:33:43.423259  543705 net.go:770] primary dev: ETH0
I0319 11:33:43.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:33:43.423455  543705 net.go:698] Add success.
I0319 11:33:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:33:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:33:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:33:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:33:53.409805  543705 memory.go:184] no items to output this cycle
I0319 11:33:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 11:34:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:34:03.409809  543705 memory.go:184] no items to output this cycle
I0319 11:34:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 11:34:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:34:13.409781  543705 memory.go:191] Add success.
W0319 11:34:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 11:34:13.409815  543705 cpu.go:282] Add success.
W0319 11:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:34:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:34:13.420036  543705 net.go:648] Add success.
I0319 11:34:13.422935  543705 net.go:770] primary dev: ETH0
I0319 11:34:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:34:13.422961  543705 net.go:698] Add success.
I0319 11:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:34:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:34:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 11:34:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:34:14.456565  543705 disk_worker.go:494] system disk:vda1
I0319 11:34:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:34:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:34:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:34:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:34:18.913670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:34:18.916005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:34:18.916012  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab000 0xc0001ab040]
E0319 11:34:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:34:23.409770  543705 cpu.go:275] no items to output this cycle
I0319 11:34:23.409774  543705 memory.go:184] no items to output this cycle
E0319 11:34:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:34:33.409786  543705 cpu.go:275] no items to output this cycle
I0319 11:34:33.409791  543705 memory.go:184] no items to output this cycle
E0319 11:34:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:34:43.409792  543705 memory.go:191] Add success.
I0319 11:34:43.409795  543705 cpu.go:282] Add success.
I0319 11:34:43.419838  543705 net.go:648] Add success.
I0319 11:34:43.422794  543705 net.go:770] primary dev: ETH0
I0319 11:34:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:34:43.422828  543705 net.go:698] Add success.
I0319 11:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:34:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:34:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:34:53.409780  543705 memory.go:184] no items to output this cycle
I0319 11:34:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 11:35:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:35:03.409878  543705 cpu.go:275] no items to output this cycle
I0319 11:35:03.409889  543705 memory.go:184] no items to output this cycle
E0319 11:35:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:35:13.409808  543705 memory.go:191] Add success.
I0319 11:35:13.409826  543705 cpu.go:282] Add success.
W0319 11:35:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:35:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:35:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:35:13.420101  543705 net.go:648] Add success.
I0319 11:35:13.422638  543705 net.go:770] primary dev: ETH0
I0319 11:35:13.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:35:13.422662  543705 net.go:698] Add success.
I0319 11:35:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:35:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:35:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 11:35:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:35:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 11:35:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:35:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:35:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:35:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:35:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:35:18.917670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:35:18.920085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:35:18.920092  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508a00 0xc000508a40]
E0319 11:35:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:35:23.409792  543705 memory.go:184] no items to output this cycle
I0319 11:35:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:35:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:35:33.409782  543705 memory.go:184] no items to output this cycle
I0319 11:35:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:35:43.409808  543705 memory.go:191] Add success.
I0319 11:35:43.409818  543705 cpu.go:282] Add success.
I0319 11:35:43.419892  543705 net.go:648] Add success.
I0319 11:35:43.422540  543705 net.go:770] primary dev: ETH0
I0319 11:35:43.422553  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:35:43.422565  543705 net.go:698] Add success.
I0319 11:35:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:35:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:35:53.409773  543705 memory.go:184] no items to output this cycle
I0319 11:35:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 11:36:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:36:03.409786  543705 memory.go:184] no items to output this cycle
I0319 11:36:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 11:36:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:36:13.409782  543705 memory.go:191] Add success.
W0319 11:36:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 11:36:13.409813  543705 cpu.go:282] Add success.
W0319 11:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:36:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:36:13.420155  543705 net.go:648] Add success.
I0319 11:36:13.423099  543705 net.go:770] primary dev: ETH0
I0319 11:36:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:36:13.423125  543705 net.go:698] Add success.
I0319 11:36:13.743848  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"503c2a59-3b1b-4985-b470-ad7b87872355","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:36:13.743881  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:36:14.454456  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:36:14.454676  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:36:14.454687  543705 disk_worker.go:708] disk space is not compliant
W0319 11:36:14.454689  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:36:14.456024  543705 disk_worker.go:494] system disk:vda1
I0319 11:36:14.456068  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:36:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:36:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:36:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:36:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:36:18.921671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:36:18.924015  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:36:18.924021  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f40 0xc0000c4f80]
E0319 11:36:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:36:23.409787  543705 memory.go:184] no items to output this cycle
I0319 11:36:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 11:36:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:36:33.409790  543705 memory.go:184] no items to output this cycle
I0319 11:36:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 11:36:37.696189  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:36:37.696196  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:36:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:36:43.410665  543705 memory.go:191] Add success.
I0319 11:36:43.409816  543705 cpu.go:282] Add success.
I0319 11:36:43.420373  543705 net.go:648] Add success.
I0319 11:36:43.423036  543705 net.go:770] primary dev: ETH0
I0319 11:36:43.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:36:43.423063  543705 net.go:698] Add success.
I0319 11:36:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:36:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:36:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:36:53.409767  543705 memory.go:184] no items to output this cycle
I0319 11:36:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 11:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:37:03.409786  543705 memory.go:184] no items to output this cycle
I0319 11:37:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 11:37:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:37:13.409779  543705 memory.go:191] Add success.
W0319 11:37:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 11:37:13.409812  543705 cpu.go:282] Add success.
W0319 11:37:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:37:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:37:13.420212  543705 net.go:648] Add success.
I0319 11:37:13.423175  543705 net.go:770] primary dev: ETH0
I0319 11:37:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:37:13.423200  543705 net.go:698] Add success.
I0319 11:37:13.453738  543705 event_worker.go:152] Polling the log file for events...
W0319 11:37:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:37:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 11:37:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:37:14.456907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:37:14.456917  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:37:14.456922  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:37:14.456992  543705 disk_worker.go:494] system disk:vda1
I0319 11:37:14.457019  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:37:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:37:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:37:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:37:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:37:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:37:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:37:16.472355  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:37:18.925670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:37:18.928021  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:37:18.928027  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee300 0xc0003ee340]
E0319 11:37:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:37:23.409759  543705 memory.go:184] no items to output this cycle
I0319 11:37:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:37:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:37:33.409805  543705 memory.go:184] no items to output this cycle
I0319 11:37:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 11:37:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:37:43.409775  543705 memory.go:191] Add success.
I0319 11:37:43.409807  543705 cpu.go:282] Add success.
I0319 11:37:43.419928  543705 net.go:648] Add success.
I0319 11:37:43.422673  543705 net.go:770] primary dev: ETH0
I0319 11:37:43.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:37:43.422702  543705 net.go:698] Add success.
I0319 11:37:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:37:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:37:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:37:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:37:53.409775  543705 memory.go:184] no items to output this cycle
I0319 11:37:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 11:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:38:03.409784  543705 memory.go:184] no items to output this cycle
I0319 11:38:03.409918  543705 cpu.go:275] no items to output this cycle
E0319 11:38:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:38:13.409796  543705 memory.go:191] Add success.
I0319 11:38:13.409810  543705 cpu.go:282] Add success.
W0319 11:38:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:38:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:38:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:38:13.420209  543705 net.go:648] Add success.
I0319 11:38:13.423042  543705 net.go:770] primary dev: ETH0
I0319 11:38:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:38:13.423071  543705 net.go:698] Add success.
I0319 11:38:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:38:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:38:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 11:38:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:38:14.456516  543705 disk_worker.go:494] system disk:vda1
I0319 11:38:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:38:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:38:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:38:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:38:18.929672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:38:18.932064  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:38:18.932070  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ee080 0xc0001ee0c0]
E0319 11:38:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:38:23.409811  543705 memory.go:184] no items to output this cycle
I0319 11:38:23.409825  543705 cpu.go:275] no items to output this cycle
E0319 11:38:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:38:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 11:38:33.409803  543705 memory.go:184] no items to output this cycle
E0319 11:38:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:38:43.409800  543705 memory.go:191] Add success.
I0319 11:38:43.409810  543705 cpu.go:282] Add success.
I0319 11:38:43.420057  543705 net.go:648] Add success.
I0319 11:38:43.422896  543705 net.go:770] primary dev: ETH0
I0319 11:38:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:38:43.422926  543705 net.go:698] Add success.
I0319 11:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:38:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:38:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:38:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:38:53.409769  543705 memory.go:184] no items to output this cycle
I0319 11:38:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 11:39:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:39:03.409784  543705 memory.go:184] no items to output this cycle
I0319 11:39:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 11:39:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:39:13.409800  543705 memory.go:191] Add success.
I0319 11:39:13.409801  543705 cpu.go:282] Add success.
W0319 11:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:39:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:39:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:39:13.420167  543705 net.go:648] Add success.
I0319 11:39:13.423613  543705 net.go:770] primary dev: ETH0
I0319 11:39:13.423628  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:39:13.423640  543705 net.go:698] Add success.
I0319 11:39:13.469614  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"580517cf-1484-4df5-ac68-26b120d099dc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:39:13.469658  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:39:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:39:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:39:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0319 11:39:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:39:14.456512  543705 disk_worker.go:494] system disk:vda1
I0319 11:39:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:39:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:39:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:39:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:39:16.472434  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:39:18.933670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:39:18.936039  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:39:18.936045  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f4600 0xc0004f4640]
E0319 11:39:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:39:23.409794  543705 memory.go:184] no items to output this cycle
I0319 11:39:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 11:39:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:39:33.409812  543705 memory.go:184] no items to output this cycle
I0319 11:39:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 11:39:37.696335  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:39:37.696342  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:39:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:39:43.410665  543705 memory.go:191] Add success.
I0319 11:39:43.409806  543705 cpu.go:282] Add success.
I0319 11:39:43.420387  543705 net.go:648] Add success.
I0319 11:39:43.422893  543705 net.go:770] primary dev: ETH0
I0319 11:39:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:39:43.422917  543705 net.go:698] Add success.
I0319 11:39:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:39:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:39:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:39:53.410348  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:39:53.410366  543705 memory.go:184] no items to output this cycle
I0319 11:39:53.410375  543705 cpu.go:275] no items to output this cycle
E0319 11:40:03.410587  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:40:03.410624  543705 memory.go:184] no items to output this cycle
I0319 11:40:03.410636  543705 cpu.go:275] no items to output this cycle
E0319 11:40:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:40:13.409791  543705 memory.go:191] Add success.
I0319 11:40:13.409794  543705 cpu.go:282] Add success.
W0319 11:40:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:40:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:40:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:40:13.420285  543705 net.go:648] Add success.
I0319 11:40:13.423379  543705 net.go:770] primary dev: ETH0
I0319 11:40:13.423393  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:40:13.423407  543705 net.go:698] Add success.
I0319 11:40:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:40:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:40:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 11:40:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:40:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 11:40:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:40:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:40:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:40:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:40:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:40:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:40:18.937670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:40:18.940008  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:40:18.940014  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003412c0 0xc000341300]
E0319 11:40:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:40:23.409797  543705 memory.go:184] no items to output this cycle
I0319 11:40:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 11:40:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:40:33.409820  543705 memory.go:184] no items to output this cycle
I0319 11:40:33.409831  543705 cpu.go:275] no items to output this cycle
E0319 11:40:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:40:43.409788  543705 memory.go:191] Add success.
I0319 11:40:43.409817  543705 cpu.go:282] Add success.
I0319 11:40:43.419885  543705 net.go:648] Add success.
I0319 11:40:43.422511  543705 net.go:770] primary dev: ETH0
I0319 11:40:43.422525  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:40:43.422537  543705 net.go:698] Add success.
I0319 11:40:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:40:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:40:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:40:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:40:53.409782  543705 memory.go:184] no items to output this cycle
I0319 11:40:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 11:41:03.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:41:03.409936  543705 memory.go:184] no items to output this cycle
I0319 11:41:03.409945  543705 cpu.go:275] no items to output this cycle
E0319 11:41:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:41:13.409822  543705 memory.go:191] Add success.
I0319 11:41:13.409830  543705 cpu.go:282] Add success.
W0319 11:41:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:41:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:41:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:41:13.420172  543705 net.go:648] Add success.
I0319 11:41:13.423043  543705 net.go:770] primary dev: ETH0
I0319 11:41:13.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:41:13.423069  543705 net.go:698] Add success.
I0319 11:41:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:41:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:41:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0319 11:41:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:41:14.456473  543705 disk_worker.go:494] system disk:vda1
I0319 11:41:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:41:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:41:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:41:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:41:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:41:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:41:18.941671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:41:18.944054  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:41:18.944060  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001eebc0 0xc0001eec00]
E0319 11:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:41:23.409797  543705 memory.go:184] no items to output this cycle
I0319 11:41:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 11:41:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:41:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 11:41:33.409804  543705 memory.go:184] no items to output this cycle
E0319 11:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:41:43.409805  543705 memory.go:191] Add success.
I0319 11:41:43.409811  543705 cpu.go:282] Add success.
I0319 11:41:43.419988  543705 net.go:648] Add success.
I0319 11:41:43.422845  543705 net.go:770] primary dev: ETH0
I0319 11:41:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:41:43.422871  543705 net.go:698] Add success.
I0319 11:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:41:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:41:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:41:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:41:53.410258  543705 memory.go:184] no items to output this cycle
I0319 11:41:53.410289  543705 cpu.go:275] no items to output this cycle
E0319 11:42:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:42:03.409812  543705 memory.go:184] no items to output this cycle
I0319 11:42:03.409825  543705 cpu.go:275] no items to output this cycle
E0319 11:42:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:42:13.409795  543705 memory.go:191] Add success.
I0319 11:42:13.409814  543705 cpu.go:282] Add success.
W0319 11:42:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:42:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:42:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:42:13.420106  543705 net.go:648] Add success.
I0319 11:42:13.423059  543705 net.go:770] primary dev: ETH0
I0319 11:42:13.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:42:13.423084  543705 net.go:698] Add success.
I0319 11:42:13.464193  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"96e52ebe-bc85-4b32-a9b9-d0253d7c3f0b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:42:13.464227  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 11:42:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:42:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 11:42:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:42:14.456854  543705 disk_worker.go:494] system disk:vda1
E0319 11:42:14.456865  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:42:14.456883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:42:14.456888  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:42:14.456892  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:42:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:42:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:42:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:42:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:42:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:42:16.457982  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:42:16.472309  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:42:18.945673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:42:18.948007  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:42:18.948012  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee980 0xc0003ee9c0]
E0319 11:42:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:42:23.409769  543705 memory.go:184] no items to output this cycle
I0319 11:42:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 11:42:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:42:33.409789  543705 memory.go:184] no items to output this cycle
I0319 11:42:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 11:42:37.697177  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:42:37.697184  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:42:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:42:43.410622  543705 memory.go:191] Add success.
I0319 11:42:43.409793  543705 cpu.go:282] Add success.
I0319 11:42:43.420324  543705 net.go:648] Add success.
I0319 11:42:43.422953  543705 net.go:770] primary dev: ETH0
I0319 11:42:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:42:43.422978  543705 net.go:698] Add success.
I0319 11:42:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:42:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:42:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:42:53.409783  543705 cpu.go:275] no items to output this cycle
I0319 11:42:53.409795  543705 memory.go:184] no items to output this cycle
E0319 11:43:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:43:03.409780  543705 memory.go:184] no items to output this cycle
I0319 11:43:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 11:43:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:43:13.409799  543705 memory.go:191] Add success.
I0319 11:43:13.409815  543705 cpu.go:282] Add success.
W0319 11:43:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:43:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:43:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:43:13.420109  543705 net.go:648] Add success.
I0319 11:43:13.422949  543705 net.go:770] primary dev: ETH0
I0319 11:43:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:43:13.422979  543705 net.go:698] Add success.
I0319 11:43:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:43:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:43:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 11:43:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:43:14.456571  543705 disk_worker.go:494] system disk:vda1
I0319 11:43:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:43:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:43:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:43:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:43:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:43:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:43:18.949671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:43:18.952023  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:43:18.952029  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee400 0xc0003ee440]
E0319 11:43:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:43:23.409789  543705 memory.go:184] no items to output this cycle
I0319 11:43:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:43:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:43:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 11:43:33.409792  543705 memory.go:184] no items to output this cycle
E0319 11:43:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:43:43.409797  543705 memory.go:191] Add success.
I0319 11:43:43.409799  543705 cpu.go:282] Add success.
I0319 11:43:43.419874  543705 net.go:648] Add success.
I0319 11:43:43.422948  543705 net.go:770] primary dev: ETH0
I0319 11:43:43.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:43:43.422975  543705 net.go:698] Add success.
I0319 11:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:43:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:43:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:43:53.409789  543705 memory.go:184] no items to output this cycle
I0319 11:43:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 11:44:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:44:03.409791  543705 memory.go:184] no items to output this cycle
I0319 11:44:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:44:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:44:13.409812  543705 memory.go:191] Add success.
I0319 11:44:13.409820  543705 cpu.go:282] Add success.
W0319 11:44:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:44:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:44:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:44:13.420491  543705 net.go:648] Add success.
I0319 11:44:13.423395  543705 net.go:770] primary dev: ETH0
I0319 11:44:13.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:44:13.423419  543705 net.go:698] Add success.
I0319 11:44:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:44:14.455081  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:44:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0319 11:44:14.455146  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:44:14.456480  543705 disk_worker.go:494] system disk:vda1
I0319 11:44:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:44:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:44:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:44:16.472353  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:44:18.953672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:44:18.956135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:44:18.956141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2500 0xc0003b2540]
E0319 11:44:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:44:23.409767  543705 memory.go:184] no items to output this cycle
I0319 11:44:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 11:44:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:44:33.409779  543705 memory.go:184] no items to output this cycle
I0319 11:44:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 11:44:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:44:43.409802  543705 memory.go:191] Add success.
I0319 11:44:43.409803  543705 cpu.go:282] Add success.
I0319 11:44:43.419882  543705 net.go:648] Add success.
I0319 11:44:43.422870  543705 net.go:770] primary dev: ETH0
I0319 11:44:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:44:43.422896  543705 net.go:698] Add success.
I0319 11:44:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:44:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:44:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:44:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:44:53.409793  543705 memory.go:184] no items to output this cycle
I0319 11:44:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:45:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:45:03.409777  543705 memory.go:184] no items to output this cycle
I0319 11:45:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:45:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:45:13.409801  543705 memory.go:191] Add success.
I0319 11:45:13.409802  543705 cpu.go:282] Add success.
W0319 11:45:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:45:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:45:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:45:13.420166  543705 net.go:648] Add success.
I0319 11:45:13.422958  543705 net.go:770] primary dev: ETH0
I0319 11:45:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:45:13.422984  543705 net.go:698] Add success.
I0319 11:45:13.469144  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"796e055a-fd08-4dea-9b49-57d6f22f41c7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:45:13.469182  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:45:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:45:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:45:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 11:45:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:45:14.456514  543705 disk_worker.go:494] system disk:vda1
I0319 11:45:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:45:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:45:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:45:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:45:18.957671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:45:18.960094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:45:18.960099  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ee600 0xc0001ee640]
E0319 11:45:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:45:23.409775  543705 memory.go:184] no items to output this cycle
I0319 11:45:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 11:45:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:45:33.409775  543705 memory.go:184] no items to output this cycle
I0319 11:45:33.409790  543705 cpu.go:275] no items to output this cycle
I0319 11:45:37.697736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:45:37.697744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:45:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:45:43.410736  543705 memory.go:191] Add success.
I0319 11:45:43.409803  543705 cpu.go:282] Add success.
I0319 11:45:43.420498  543705 net.go:648] Add success.
I0319 11:45:43.423176  543705 net.go:770] primary dev: ETH0
I0319 11:45:43.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:45:43.423203  543705 net.go:698] Add success.
I0319 11:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:45:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:45:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:45:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:45:53.409773  543705 memory.go:184] no items to output this cycle
I0319 11:45:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 11:46:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:46:03.409789  543705 memory.go:184] no items to output this cycle
I0319 11:46:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 11:46:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:46:13.409799  543705 memory.go:191] Add success.
I0319 11:46:13.409802  543705 cpu.go:282] Add success.
W0319 11:46:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:46:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:46:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:46:13.420363  543705 net.go:648] Add success.
I0319 11:46:13.422956  543705 net.go:770] primary dev: ETH0
I0319 11:46:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:46:13.422981  543705 net.go:698] Add success.
I0319 11:46:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:46:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:46:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 11:46:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:46:14.456513  543705 disk_worker.go:494] system disk:vda1
I0319 11:46:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:46:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:46:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:46:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:46:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:46:18.961673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:46:18.964029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:46:18.964034  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344200 0xc000344240]
E0319 11:46:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:46:23.409789  543705 memory.go:184] no items to output this cycle
I0319 11:46:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 11:46:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:46:33.409806  543705 memory.go:184] no items to output this cycle
I0319 11:46:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 11:46:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:46:43.409797  543705 memory.go:191] Add success.
I0319 11:46:43.409798  543705 cpu.go:282] Add success.
I0319 11:46:43.419966  543705 net.go:648] Add success.
I0319 11:46:43.422705  543705 net.go:770] primary dev: ETH0
I0319 11:46:43.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:46:43.422731  543705 net.go:698] Add success.
I0319 11:46:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:46:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:46:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:46:53.409799  543705 memory.go:184] no items to output this cycle
I0319 11:46:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 11:47:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:47:03.409790  543705 cpu.go:275] no items to output this cycle
I0319 11:47:03.409792  543705 memory.go:184] no items to output this cycle
E0319 11:47:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:47:13.409813  543705 memory.go:191] Add success.
I0319 11:47:13.409821  543705 cpu.go:282] Add success.
W0319 11:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:47:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:47:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:47:13.420128  543705 net.go:648] Add success.
I0319 11:47:13.423092  543705 net.go:770] primary dev: ETH0
I0319 11:47:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:47:13.423121  543705 net.go:698] Add success.
I0319 11:47:13.453171  543705 event_worker.go:152] Polling the log file for events...
W0319 11:47:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:47:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 11:47:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:47:14.456926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:47:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:47:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:47:14.456991  543705 disk_worker.go:494] system disk:vda1
I0319 11:47:14.457033  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:47:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:47:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:47:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:47:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:47:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:47:16.458040  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:47:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:47:18.965674  543705 disk_info.go:125] begin check local disk info of client
I0319 11:47:18.967978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:47:18.967983  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004da2c0 0xc0004da300]
E0319 11:47:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:47:23.409758  543705 memory.go:184] no items to output this cycle
I0319 11:47:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 11:47:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:47:33.409807  543705 memory.go:184] no items to output this cycle
I0319 11:47:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 11:47:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:47:43.409786  543705 memory.go:191] Add success.
I0319 11:47:43.409804  543705 cpu.go:282] Add success.
I0319 11:47:43.420002  543705 net.go:648] Add success.
I0319 11:47:43.422591  543705 net.go:770] primary dev: ETH0
I0319 11:47:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:47:43.422618  543705 net.go:698] Add success.
I0319 11:47:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:47:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:47:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:47:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:47:53.409780  543705 memory.go:184] no items to output this cycle
I0319 11:47:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 11:48:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:48:03.409816  543705 memory.go:184] no items to output this cycle
I0319 11:48:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 11:48:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:48:13.409816  543705 memory.go:191] Add success.
I0319 11:48:13.409817  543705 cpu.go:282] Add success.
W0319 11:48:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:48:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:48:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:48:13.420040  543705 net.go:770] primary dev: ETH0
I0319 11:48:13.420053  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:48:13.420065  543705 net.go:698] Add success.
I0319 11:48:13.420300  543705 net.go:648] Add success.
I0319 11:48:13.467805  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44bff1db-f316-44ac-aa30-3a9ff66eca2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:48:13.467836  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:48:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:48:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:48:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 11:48:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:48:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 11:48:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:48:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:48:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:48:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:48:18.969670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:48:18.972040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:48:18.972048  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf40 0xc0000c4100]
E0319 11:48:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:48:23.409794  543705 memory.go:184] no items to output this cycle
I0319 11:48:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 11:48:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:48:33.409782  543705 memory.go:184] no items to output this cycle
I0319 11:48:33.409802  543705 cpu.go:275] no items to output this cycle
I0319 11:48:37.699195  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:48:37.699207  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:48:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:48:43.410600  543705 memory.go:191] Add success.
I0319 11:48:43.409788  543705 cpu.go:282] Add success.
I0319 11:48:43.420344  543705 net.go:648] Add success.
I0319 11:48:43.422828  543705 net.go:770] primary dev: ETH0
I0319 11:48:43.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:48:43.422856  543705 net.go:698] Add success.
I0319 11:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:48:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:48:53.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:48:53.409890  543705 memory.go:184] no items to output this cycle
I0319 11:48:53.409930  543705 cpu.go:275] no items to output this cycle
E0319 11:49:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:49:03.409774  543705 memory.go:184] no items to output this cycle
I0319 11:49:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 11:49:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:49:13.409780  543705 memory.go:191] Add success.
W0319 11:49:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 11:49:13.409806  543705 cpu.go:282] Add success.
W0319 11:49:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:49:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:49:13.420245  543705 net.go:648] Add success.
I0319 11:49:13.423301  543705 net.go:770] primary dev: ETH0
I0319 11:49:13.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:49:13.423330  543705 net.go:698] Add success.
I0319 11:49:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:49:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:49:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 11:49:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:49:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 11:49:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:49:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:49:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:49:18.973671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:49:18.976067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:49:18.976073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee340 0xc0003ee380]
E0319 11:49:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:49:23.409771  543705 cpu.go:275] no items to output this cycle
I0319 11:49:23.409781  543705 memory.go:184] no items to output this cycle
E0319 11:49:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:49:33.409824  543705 memory.go:184] no items to output this cycle
I0319 11:49:33.409852  543705 cpu.go:275] no items to output this cycle
E0319 11:49:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:49:43.409818  543705 memory.go:191] Add success.
I0319 11:49:43.409824  543705 cpu.go:282] Add success.
I0319 11:49:43.420019  543705 net.go:648] Add success.
I0319 11:49:43.422841  543705 net.go:770] primary dev: ETH0
I0319 11:49:43.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:49:43.422871  543705 net.go:698] Add success.
I0319 11:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:49:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:49:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:49:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:49:53.409763  543705 memory.go:184] no items to output this cycle
I0319 11:49:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:50:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:50:03.409771  543705 memory.go:184] no items to output this cycle
I0319 11:50:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 11:50:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:50:13.409812  543705 memory.go:191] Add success.
I0319 11:50:13.409822  543705 cpu.go:282] Add success.
W0319 11:50:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:50:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:50:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:50:13.420154  543705 net.go:648] Add success.
I0319 11:50:13.422995  543705 net.go:770] primary dev: ETH0
I0319 11:50:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:50:13.423023  543705 net.go:698] Add success.
I0319 11:50:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:50:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:50:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 11:50:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:50:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 11:50:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:50:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:50:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:50:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:50:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:50:18.977671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:50:18.980065  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:50:18.980070  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509540 0xc000509580]
E0319 11:50:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:50:23.409765  543705 memory.go:184] no items to output this cycle
I0319 11:50:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 11:50:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:50:33.409808  543705 memory.go:184] no items to output this cycle
I0319 11:50:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 11:50:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:50:43.409787  543705 memory.go:191] Add success.
I0319 11:50:43.409787  543705 cpu.go:282] Add success.
I0319 11:50:43.420193  543705 net.go:648] Add success.
I0319 11:50:43.422872  543705 net.go:770] primary dev: ETH0
I0319 11:50:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:50:43.422896  543705 net.go:698] Add success.
I0319 11:50:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:50:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:50:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:50:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:50:53.409769  543705 memory.go:184] no items to output this cycle
I0319 11:50:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 11:51:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:51:03.409807  543705 memory.go:184] no items to output this cycle
I0319 11:51:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 11:51:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:51:13.409791  543705 memory.go:191] Add success.
I0319 11:51:13.409810  543705 cpu.go:282] Add success.
W0319 11:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:51:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:51:13.420163  543705 net.go:648] Add success.
I0319 11:51:13.422816  543705 net.go:770] primary dev: ETH0
I0319 11:51:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:51:13.422842  543705 net.go:698] Add success.
I0319 11:51:13.463774  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d938bbb3-f84b-4c32-af99-88dca57cd790","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:51:13.463809  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:51:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:51:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:51:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0319 11:51:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:51:14.456517  543705 disk_worker.go:494] system disk:vda1
I0319 11:51:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:51:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:51:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:51:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:51:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:51:18.981672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:51:18.984066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:51:18.984072  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004861c0 0xc000486200]
E0319 11:51:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:51:23.409767  543705 memory.go:184] no items to output this cycle
I0319 11:51:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:51:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:51:33.409799  543705 memory.go:184] no items to output this cycle
I0319 11:51:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 11:51:37.699350  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:51:37.699357  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:51:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:51:43.410679  543705 memory.go:191] Add success.
I0319 11:51:43.409827  543705 cpu.go:282] Add success.
I0319 11:51:43.419757  543705 net.go:648] Add success.
I0319 11:51:43.422425  543705 net.go:770] primary dev: ETH0
I0319 11:51:43.422439  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:51:43.422454  543705 net.go:698] Add success.
I0319 11:51:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:51:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:51:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:51:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:51:53.409791  543705 memory.go:184] no items to output this cycle
I0319 11:51:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 11:52:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:52:03.409788  543705 memory.go:184] no items to output this cycle
I0319 11:52:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:52:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:52:13.409794  543705 cpu.go:282] Add success.
I0319 11:52:13.409796  543705 memory.go:191] Add success.
W0319 11:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:52:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:52:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:52:13.420066  543705 net.go:648] Add success.
I0319 11:52:13.422636  543705 net.go:770] primary dev: ETH0
I0319 11:52:13.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:52:13.422674  543705 net.go:698] Add success.
W0319 11:52:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:52:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 11:52:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:52:14.456934  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:52:14.456944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:52:14.456950  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:52:14.457001  543705 disk_worker.go:494] system disk:vda1
I0319 11:52:14.457031  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:52:15.456428  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:52:15.456436  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:52:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:52:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:52:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:52:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:52:16.472356  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:52:18.985679  543705 disk_info.go:125] begin check local disk info of client
I0319 11:52:18.987953  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:52:18.987959  543705 disk_info.go:196] parse disk info done, disk is : [0xc000383e00 0xc000383e40]
E0319 11:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:52:23.409778  543705 memory.go:184] no items to output this cycle
I0319 11:52:23.409787  543705 cpu.go:275] no items to output this cycle
E0319 11:52:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:52:33.409788  543705 memory.go:184] no items to output this cycle
I0319 11:52:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 11:52:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:52:43.409903  543705 memory.go:191] Add success.
I0319 11:52:43.409930  543705 cpu.go:282] Add success.
I0319 11:52:43.419717  543705 net.go:648] Add success.
I0319 11:52:43.422386  543705 net.go:770] primary dev: ETH0
I0319 11:52:43.422399  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:52:43.422411  543705 net.go:698] Add success.
I0319 11:52:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:52:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:52:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:52:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:52:53.409787  543705 memory.go:184] no items to output this cycle
I0319 11:52:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:53:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:53:03.409809  543705 memory.go:184] no items to output this cycle
I0319 11:53:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 11:53:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:53:13.409799  543705 memory.go:191] Add success.
I0319 11:53:13.409823  543705 cpu.go:282] Add success.
W0319 11:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:53:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:53:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:53:13.420138  543705 net.go:648] Add success.
I0319 11:53:13.422981  543705 net.go:770] primary dev: ETH0
I0319 11:53:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:53:13.423010  543705 net.go:698] Add success.
I0319 11:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:53:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:53:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 11:53:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:53:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 11:53:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:53:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:53:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:53:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:53:16.472352  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:53:18.989673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:53:18.992102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:53:18.992109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d80 0xc0000c5dc0]
E0319 11:53:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:53:23.409781  543705 memory.go:184] no items to output this cycle
I0319 11:53:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 11:53:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:53:33.409785  543705 memory.go:184] no items to output this cycle
I0319 11:53:33.409827  543705 cpu.go:275] no items to output this cycle
E0319 11:53:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:53:43.409912  543705 memory.go:191] Add success.
I0319 11:53:43.409936  543705 cpu.go:282] Add success.
I0319 11:53:43.419732  543705 net.go:648] Add success.
I0319 11:53:43.422722  543705 net.go:770] primary dev: ETH0
I0319 11:53:43.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:53:43.422748  543705 net.go:698] Add success.
I0319 11:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:53:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:53:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:53:53.409777  543705 memory.go:184] no items to output this cycle
I0319 11:53:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 11:54:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:54:03.409821  543705 memory.go:184] no items to output this cycle
I0319 11:54:03.409835  543705 cpu.go:275] no items to output this cycle
E0319 11:54:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:54:13.409793  543705 memory.go:191] Add success.
I0319 11:54:13.409794  543705 cpu.go:282] Add success.
W0319 11:54:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:54:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:54:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:54:13.420143  543705 net.go:648] Add success.
I0319 11:54:13.422896  543705 net.go:770] primary dev: ETH0
I0319 11:54:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:54:13.422924  543705 net.go:698] Add success.
I0319 11:54:13.464871  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d63e06df-3b77-406d-b9aa-1d7ca7dac31a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:54:13.464907  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 11:54:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:54:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:54:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0319 11:54:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:54:14.456722  543705 disk_worker.go:494] system disk:vda1
I0319 11:54:14.456754  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:54:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:54:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:54:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:54:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:54:18.993670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:54:18.996032  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:54:18.996038  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035bc00 0xc00035bc40]
E0319 11:54:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:54:23.409774  543705 memory.go:184] no items to output this cycle
I0319 11:54:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 11:54:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:54:33.409784  543705 memory.go:184] no items to output this cycle
I0319 11:54:33.409806  543705 cpu.go:275] no items to output this cycle
I0319 11:54:37.699500  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:54:37.699506  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:54:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:54:43.410714  543705 memory.go:191] Add success.
I0319 11:54:43.409813  543705 cpu.go:282] Add success.
I0319 11:54:43.420414  543705 net.go:648] Add success.
I0319 11:54:43.423277  543705 net.go:770] primary dev: ETH0
I0319 11:54:43.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:54:43.423307  543705 net.go:698] Add success.
I0319 11:54:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:54:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:54:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:54:53.409802  543705 memory.go:184] no items to output this cycle
I0319 11:54:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 11:55:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:55:03.409794  543705 memory.go:184] no items to output this cycle
I0319 11:55:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 11:55:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:55:13.409803  543705 memory.go:191] Add success.
I0319 11:55:13.409804  543705 cpu.go:282] Add success.
W0319 11:55:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:55:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:55:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:55:13.420112  543705 net.go:648] Add success.
I0319 11:55:13.422960  543705 net.go:770] primary dev: ETH0
I0319 11:55:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:55:13.422986  543705 net.go:698] Add success.
I0319 11:55:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:55:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:55:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 11:55:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:55:14.456485  543705 disk_worker.go:494] system disk:vda1
I0319 11:55:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:55:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:55:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:55:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:55:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:55:16.472423  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:55:18.997673  543705 disk_info.go:125] begin check local disk info of client
I0319 11:55:19.000046  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:55:19.000052  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035bb40 0xc00035bb80]
E0319 11:55:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:55:23.409798  543705 memory.go:184] no items to output this cycle
I0319 11:55:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 11:55:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:55:33.409779  543705 memory.go:184] no items to output this cycle
I0319 11:55:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 11:55:43.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:55:43.409928  543705 memory.go:191] Add success.
I0319 11:55:43.409966  543705 cpu.go:282] Add success.
I0319 11:55:43.419724  543705 net.go:648] Add success.
I0319 11:55:43.422489  543705 net.go:770] primary dev: ETH0
I0319 11:55:43.422503  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:55:43.422514  543705 net.go:698] Add success.
I0319 11:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:55:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:55:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:55:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:55:53.409787  543705 memory.go:184] no items to output this cycle
I0319 11:55:53.409819  543705 cpu.go:275] no items to output this cycle
E0319 11:56:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:56:03.409790  543705 memory.go:184] no items to output this cycle
I0319 11:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 11:56:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:56:13.409800  543705 memory.go:191] Add success.
I0319 11:56:13.409802  543705 cpu.go:282] Add success.
W0319 11:56:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:56:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:56:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:56:13.420096  543705 net.go:648] Add success.
I0319 11:56:13.423014  543705 net.go:770] primary dev: ETH0
I0319 11:56:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:56:13.423041  543705 net.go:698] Add success.
I0319 11:56:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:56:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:56:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 11:56:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:56:14.456509  543705 disk_worker.go:494] system disk:vda1
I0319 11:56:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:56:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:56:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:56:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:56:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:56:16.472354  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:56:19.001679  543705 disk_info.go:125] begin check local disk info of client
I0319 11:56:19.004021  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:56:19.004028  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc180 0xc0002bc240]
E0319 11:56:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:56:23.409765  543705 memory.go:184] no items to output this cycle
I0319 11:56:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 11:56:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:56:33.409805  543705 memory.go:184] no items to output this cycle
I0319 11:56:33.409807  543705 cpu.go:275] no items to output this cycle
E0319 11:56:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:56:43.409807  543705 cpu.go:282] Add success.
I0319 11:56:43.409799  543705 memory.go:191] Add success.
I0319 11:56:43.419981  543705 net.go:648] Add success.
I0319 11:56:43.422995  543705 net.go:770] primary dev: ETH0
I0319 11:56:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:56:43.423039  543705 net.go:698] Add success.
I0319 11:56:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:56:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:56:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:56:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:56:53.409793  543705 memory.go:184] no items to output this cycle
I0319 11:56:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 11:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:57:03.409779  543705 memory.go:184] no items to output this cycle
I0319 11:57:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 11:57:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:57:13.409807  543705 memory.go:191] Add success.
I0319 11:57:13.409819  543705 cpu.go:282] Add success.
W0319 11:57:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:57:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:57:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:57:13.420234  543705 net.go:648] Add success.
I0319 11:57:13.428922  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 11:57:13.428997  543705 net.go:770] primary dev: ETH0
I0319 11:57:13.429009  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:57:13.429020  543705 net.go:698] Add success.
I0319 11:57:13.453549  543705 event_worker.go:152] Polling the log file for events...
I0319 11:57:13.464643  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d16c0786-ab2a-4ce3-b5eb-828354d76315","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 11:57:13.464677  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 11:57:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:57:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 11:57:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0319 11:57:14.456969  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 11:57:14.456978  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 11:57:14.456984  543705 custom_config.go:64] query custom config with name: gpu
I0319 11:57:14.456996  543705 disk_worker.go:494] system disk:vda1
I0319 11:57:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 11:57:15.456778  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 11:57:15.456786  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:57:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 11:57:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 11:57:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:57:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:57:16.472327  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:57:19.005672  543705 disk_info.go:125] begin check local disk info of client
I0319 11:57:19.008001  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:57:19.008009  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329f40 0xc000474000]
E0319 11:57:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:57:23.409797  543705 memory.go:184] no items to output this cycle
I0319 11:57:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 11:57:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:57:33.409800  543705 memory.go:184] no items to output this cycle
I0319 11:57:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 11:57:37.700201  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 11:57:37.700208  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 11:57:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:57:43.410760  543705 memory.go:191] Add success.
I0319 11:57:43.409822  543705 cpu.go:282] Add success.
I0319 11:57:43.420479  543705 net.go:648] Add success.
I0319 11:57:43.423727  543705 net.go:770] primary dev: ETH0
I0319 11:57:43.423741  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:57:43.423755  543705 net.go:698] Add success.
I0319 11:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:57:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:57:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:57:53.409784  543705 memory.go:184] no items to output this cycle
I0319 11:57:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 11:58:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:58:03.409789  543705 memory.go:184] no items to output this cycle
I0319 11:58:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 11:58:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:58:13.409797  543705 memory.go:191] Add success.
I0319 11:58:13.409798  543705 cpu.go:282] Add success.
W0319 11:58:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:58:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:58:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:58:13.420108  543705 net.go:648] Add success.
I0319 11:58:13.422750  543705 net.go:770] primary dev: ETH0
I0319 11:58:13.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:58:13.422774  543705 net.go:698] Add success.
I0319 11:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:58:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:58:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 11:58:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:58:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 11:58:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:58:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:58:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:58:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:58:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:58:19.009671  543705 disk_info.go:125] begin check local disk info of client
I0319 11:58:19.012098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:58:19.012105  543705 disk_info.go:196] parse disk info done, disk is : [0xc000257300 0xc000257340]
E0319 11:58:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:58:23.409758  543705 memory.go:184] no items to output this cycle
I0319 11:58:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 11:58:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:58:33.409789  543705 memory.go:184] no items to output this cycle
I0319 11:58:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 11:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:58:43.409809  543705 memory.go:191] Add success.
I0319 11:58:43.409818  543705 cpu.go:282] Add success.
I0319 11:58:43.420009  543705 net.go:648] Add success.
I0319 11:58:43.422885  543705 net.go:770] primary dev: ETH0
I0319 11:58:43.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:58:43.422911  543705 net.go:698] Add success.
I0319 11:58:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:58:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:58:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:58:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:58:53.409775  543705 memory.go:184] no items to output this cycle
I0319 11:58:53.409776  543705 cpu.go:275] no items to output this cycle
E0319 11:59:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:59:03.409781  543705 memory.go:184] no items to output this cycle
I0319 11:59:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 11:59:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:59:13.409780  543705 memory.go:191] Add success.
I0319 11:59:13.409801  543705 cpu.go:282] Add success.
W0319 11:59:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 11:59:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 11:59:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 11:59:13.420166  543705 net.go:648] Add success.
I0319 11:59:13.423103  543705 net.go:770] primary dev: ETH0
I0319 11:59:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:59:13.423128  543705 net.go:698] Add success.
I0319 11:59:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 11:59:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 11:59:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 11:59:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 11:59:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 11:59:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 11:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 11:59:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 11:59:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0319 11:59:19.013670  543705 disk_info.go:125] begin check local disk info of client
I0319 11:59:19.016031  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 11:59:19.016037  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034b700 0xc00034b740]
E0319 11:59:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:59:23.409804  543705 memory.go:184] no items to output this cycle
I0319 11:59:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 11:59:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:59:33.409795  543705 memory.go:184] no items to output this cycle
I0319 11:59:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 11:59:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:59:43.409795  543705 memory.go:191] Add success.
I0319 11:59:43.409818  543705 cpu.go:282] Add success.
I0319 11:59:43.419943  543705 net.go:648] Add success.
I0319 11:59:43.423259  543705 net.go:770] primary dev: ETH0
I0319 11:59:43.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0319 11:59:43.423285  543705 net.go:698] Add success.
I0319 11:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 11:59:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 11:59:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 11:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 11:59:53.409777  543705 memory.go:184] no items to output this cycle
I0319 11:59:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 12:00:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:00:03.409785  543705 memory.go:184] no items to output this cycle
I0319 12:00:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 12:00:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:00:13.409814  543705 memory.go:191] Add success.
I0319 12:00:13.409817  543705 cpu.go:282] Add success.
W0319 12:00:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:00:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:00:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:00:13.420050  543705 net.go:648] Add success.
I0319 12:00:13.423398  543705 net.go:770] primary dev: ETH0
I0319 12:00:13.423414  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:00:13.423428  543705 net.go:698] Add success.
I0319 12:00:13.609525  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9287e534-31c3-4efa-a832-afb93ea67566","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:00:13.609559  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:00:14.453966  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:00:14.455292  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:00:14.455302  543705 disk_worker.go:708] disk space is not compliant
W0319 12:00:14.455305  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:00:14.456793  543705 disk_worker.go:494] system disk:vda1
I0319 12:00:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:00:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:00:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:00:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:00:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:00:19.017670  543705 disk_info.go:125] begin check local disk info of client
I0319 12:00:19.020091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:00:19.020097  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314bc0 0xc000314c00]
E0319 12:00:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:00:23.409776  543705 memory.go:184] no items to output this cycle
I0319 12:00:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 12:00:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:00:33.409799  543705 memory.go:184] no items to output this cycle
I0319 12:00:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 12:00:37.700360  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:00:37.700367  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:00:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:00:43.410889  543705 memory.go:191] Add success.
I0319 12:00:43.409820  543705 cpu.go:282] Add success.
I0319 12:00:43.420569  543705 net.go:648] Add success.
I0319 12:00:43.423493  543705 net.go:770] primary dev: ETH0
I0319 12:00:43.423507  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:00:43.423520  543705 net.go:698] Add success.
I0319 12:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:00:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:00:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:00:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:00:53.409796  543705 memory.go:184] no items to output this cycle
I0319 12:00:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:01:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:01:03.409776  543705 memory.go:184] no items to output this cycle
I0319 12:01:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 12:01:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:01:13.409811  543705 memory.go:191] Add success.
I0319 12:01:13.409822  543705 cpu.go:282] Add success.
W0319 12:01:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:01:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:01:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:01:13.420219  543705 net.go:648] Add success.
I0319 12:01:13.423170  543705 net.go:770] primary dev: ETH0
I0319 12:01:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:01:13.423197  543705 net.go:698] Add success.
I0319 12:01:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:01:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:01:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 12:01:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:01:14.456518  543705 disk_worker.go:494] system disk:vda1
I0319 12:01:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:01:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:01:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:01:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:01:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:01:19.021671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:01:19.024098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:01:19.024104  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001efc40 0xc0001efc80]
E0319 12:01:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:01:23.409771  543705 memory.go:184] no items to output this cycle
I0319 12:01:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:01:33.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:01:33.409888  543705 memory.go:184] no items to output this cycle
I0319 12:01:33.409921  543705 cpu.go:275] no items to output this cycle
E0319 12:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:01:43.409793  543705 memory.go:191] Add success.
I0319 12:01:43.409798  543705 cpu.go:282] Add success.
I0319 12:01:43.419999  543705 net.go:648] Add success.
I0319 12:01:43.422809  543705 net.go:770] primary dev: ETH0
I0319 12:01:43.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:01:43.422837  543705 net.go:698] Add success.
I0319 12:01:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:01:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:01:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:01:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:01:53.409775  543705 cpu.go:275] no items to output this cycle
I0319 12:01:53.409779  543705 memory.go:184] no items to output this cycle
E0319 12:02:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:02:03.409783  543705 memory.go:184] no items to output this cycle
I0319 12:02:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 12:02:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:02:13.409780  543705 memory.go:191] Add success.
I0319 12:02:13.409799  543705 cpu.go:282] Add success.
W0319 12:02:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:02:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:02:13.420335  543705 net.go:648] Add success.
I0319 12:02:13.423123  543705 net.go:770] primary dev: ETH0
I0319 12:02:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:02:13.423149  543705 net.go:698] Add success.
W0319 12:02:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:02:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 12:02:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:02:14.455865  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:02:14.455874  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:02:14.455880  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:02:14.456607  543705 disk_worker.go:494] system disk:vda1
I0319 12:02:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:02:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:02:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:02:16.457903  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:02:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:02:16.457959  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:02:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:02:16.472301  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:02:19.025669  543705 disk_info.go:125] begin check local disk info of client
I0319 12:02:19.027987  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:02:19.027992  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509880 0xc0005098c0]
E0319 12:02:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:02:23.409787  543705 memory.go:184] no items to output this cycle
I0319 12:02:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 12:02:33.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:02:33.409905  543705 memory.go:184] no items to output this cycle
I0319 12:02:33.409936  543705 cpu.go:275] no items to output this cycle
E0319 12:02:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:02:43.409812  543705 memory.go:191] Add success.
I0319 12:02:43.409829  543705 cpu.go:282] Add success.
I0319 12:02:43.419949  543705 net.go:648] Add success.
I0319 12:02:43.422797  543705 net.go:770] primary dev: ETH0
I0319 12:02:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:02:43.422827  543705 net.go:698] Add success.
I0319 12:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:02:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:02:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:02:53.409763  543705 memory.go:184] no items to output this cycle
I0319 12:02:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 12:03:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:03:03.409822  543705 memory.go:184] no items to output this cycle
I0319 12:03:03.409834  543705 cpu.go:275] no items to output this cycle
E0319 12:03:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:03:13.409786  543705 memory.go:191] Add success.
W0319 12:03:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:03:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:03:13.409828  543705 cpu.go:282] Add success.
I0319 12:03:13.420128  543705 net.go:648] Add success.
I0319 12:03:13.423152  543705 net.go:770] primary dev: ETH0
I0319 12:03:13.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:03:13.423177  543705 net.go:698] Add success.
I0319 12:03:13.508275  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc90e21a-1d47-4a44-9072-bfc01757b7da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:03:13.508319  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:03:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:03:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:03:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0319 12:03:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:03:14.456737  543705 disk_worker.go:494] system disk:vda1
I0319 12:03:14.456769  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:03:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:03:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:03:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:03:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:03:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:03:19.029680  543705 disk_info.go:125] begin check local disk info of client
I0319 12:03:19.032076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:03:19.032082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a52c0 0xc0002a5300]
E0319 12:03:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:03:23.409802  543705 memory.go:184] no items to output this cycle
I0319 12:03:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 12:03:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:03:33.409779  543705 memory.go:184] no items to output this cycle
I0319 12:03:33.409782  543705 cpu.go:275] no items to output this cycle
I0319 12:03:37.700507  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:03:37.700515  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:03:43.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:03:43.411023  543705 memory.go:191] Add success.
I0319 12:03:43.410072  543705 cpu.go:282] Add success.
I0319 12:03:43.419716  543705 net.go:648] Add success.
I0319 12:03:43.422908  543705 net.go:770] primary dev: ETH0
I0319 12:03:43.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:03:43.422934  543705 net.go:698] Add success.
I0319 12:03:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:03:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:03:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:03:53.409781  543705 memory.go:184] no items to output this cycle
I0319 12:03:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 12:04:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:04:03.409812  543705 memory.go:184] no items to output this cycle
I0319 12:04:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 12:04:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:04:13.409779  543705 memory.go:191] Add success.
W0319 12:04:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:04:13.409809  543705 cpu.go:282] Add success.
W0319 12:04:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:04:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:04:13.420120  543705 net.go:648] Add success.
I0319 12:04:13.422804  543705 net.go:770] primary dev: ETH0
I0319 12:04:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:04:13.422834  543705 net.go:698] Add success.
I0319 12:04:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:04:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:04:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 12:04:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:04:14.456621  543705 disk_worker.go:494] system disk:vda1
I0319 12:04:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:04:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:04:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:04:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:04:19.033675  543705 disk_info.go:125] begin check local disk info of client
I0319 12:04:19.036122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:04:19.036130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b740 0xc00007b780]
E0319 12:04:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:04:23.409801  543705 memory.go:184] no items to output this cycle
I0319 12:04:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 12:04:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:04:33.409783  543705 memory.go:184] no items to output this cycle
I0319 12:04:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:04:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:04:43.409917  543705 memory.go:191] Add success.
I0319 12:04:43.409947  543705 cpu.go:282] Add success.
I0319 12:04:43.419709  543705 net.go:648] Add success.
I0319 12:04:43.422585  543705 net.go:770] primary dev: ETH0
I0319 12:04:43.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:04:43.422609  543705 net.go:698] Add success.
I0319 12:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:04:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:04:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:04:53.409795  543705 memory.go:184] no items to output this cycle
I0319 12:04:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:05:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:05:03.409807  543705 memory.go:184] no items to output this cycle
I0319 12:05:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 12:05:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:05:13.409782  543705 memory.go:191] Add success.
W0319 12:05:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:05:13.409813  543705 cpu.go:282] Add success.
W0319 12:05:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:05:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:05:13.420125  543705 net.go:648] Add success.
I0319 12:05:13.422760  543705 net.go:770] primary dev: ETH0
I0319 12:05:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:05:13.422784  543705 net.go:698] Add success.
I0319 12:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:05:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:05:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 12:05:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:05:14.456571  543705 disk_worker.go:494] system disk:vda1
I0319 12:05:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:05:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:05:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:05:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:05:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:05:19.037677  543705 disk_info.go:125] begin check local disk info of client
I0319 12:05:19.040097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:05:19.040104  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a100 0xc00047a140]
E0319 12:05:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:05:23.409793  543705 memory.go:184] no items to output this cycle
I0319 12:05:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 12:05:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:05:33.409800  543705 memory.go:184] no items to output this cycle
I0319 12:05:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 12:05:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:05:43.409778  543705 memory.go:191] Add success.
I0319 12:05:43.409809  543705 cpu.go:282] Add success.
I0319 12:05:43.420183  543705 net.go:648] Add success.
I0319 12:05:43.423118  543705 net.go:770] primary dev: ETH0
I0319 12:05:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:05:43.423143  543705 net.go:698] Add success.
I0319 12:05:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:05:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:05:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:05:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:05:53.409799  543705 memory.go:184] no items to output this cycle
I0319 12:05:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 12:06:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:06:03.409788  543705 memory.go:184] no items to output this cycle
I0319 12:06:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 12:06:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:06:13.409813  543705 memory.go:191] Add success.
I0319 12:06:13.409825  543705 cpu.go:282] Add success.
W0319 12:06:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:06:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:06:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:06:13.420392  543705 net.go:648] Add success.
I0319 12:06:13.422933  543705 net.go:770] primary dev: ETH0
I0319 12:06:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:06:13.422958  543705 net.go:698] Add success.
I0319 12:06:13.573918  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3e351b98-be9f-4214-8a60-e57e9887c08f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:06:13.573956  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:06:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:06:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:06:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0319 12:06:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:06:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 12:06:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:06:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:06:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:06:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:06:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:06:19.041673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:06:19.044128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:06:19.044135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005096c0 0xc000509700]
E0319 12:06:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:06:23.409760  543705 memory.go:184] no items to output this cycle
I0319 12:06:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 12:06:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:06:33.409810  543705 memory.go:184] no items to output this cycle
I0319 12:06:33.409824  543705 cpu.go:275] no items to output this cycle
I0319 12:06:37.701198  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:06:37.701204  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:06:43.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:06:43.411021  543705 memory.go:191] Add success.
I0319 12:06:43.409983  543705 cpu.go:282] Add success.
I0319 12:06:43.419730  543705 net.go:648] Add success.
I0319 12:06:43.422879  543705 net.go:770] primary dev: ETH0
I0319 12:06:43.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:06:43.422903  543705 net.go:698] Add success.
I0319 12:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:06:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:06:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:06:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:06:53.409772  543705 memory.go:184] no items to output this cycle
I0319 12:06:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 12:07:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:07:03.409778  543705 memory.go:184] no items to output this cycle
I0319 12:07:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 12:07:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:07:13.409778  543705 memory.go:191] Add success.
W0319 12:07:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:07:13.409819  543705 cpu.go:282] Add success.
I0319 12:07:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:07:13.420250  543705 net.go:648] Add success.
I0319 12:07:13.423050  543705 net.go:770] primary dev: ETH0
I0319 12:07:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:07:13.423074  543705 net.go:698] Add success.
I0319 12:07:13.453630  543705 event_worker.go:152] Polling the log file for events...
W0319 12:07:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:07:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 12:07:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:07:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:07:14.455919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:07:14.455925  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:07:14.456538  543705 disk_worker.go:494] system disk:vda1
I0319 12:07:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:07:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:07:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:07:16.458054  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:07:16.458065  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:07:16.458106  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:07:16.458125  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:07:16.472485  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:07:19.045673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:07:19.048076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:07:19.048083  543705 disk_info.go:196] parse disk info done, disk is : [0xc000595940 0xc000595980]
E0319 12:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:07:23.409795  543705 memory.go:184] no items to output this cycle
I0319 12:07:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:07:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:07:33.409779  543705 memory.go:184] no items to output this cycle
I0319 12:07:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 12:07:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:07:43.409792  543705 memory.go:191] Add success.
I0319 12:07:43.409823  543705 cpu.go:282] Add success.
I0319 12:07:43.419758  543705 net.go:648] Add success.
I0319 12:07:43.422525  543705 net.go:770] primary dev: ETH0
I0319 12:07:43.422538  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:07:43.422550  543705 net.go:698] Add success.
I0319 12:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:07:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:07:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:07:53.409774  543705 memory.go:184] no items to output this cycle
I0319 12:07:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 12:08:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:08:03.409783  543705 memory.go:184] no items to output this cycle
I0319 12:08:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 12:08:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:08:13.409807  543705 memory.go:191] Add success.
I0319 12:08:13.409808  543705 cpu.go:282] Add success.
W0319 12:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:08:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:08:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:08:13.419791  543705 net.go:648] Add success.
I0319 12:08:13.422674  543705 net.go:770] primary dev: ETH0
I0319 12:08:13.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:08:13.422709  543705 net.go:698] Add success.
I0319 12:08:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:08:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:08:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 12:08:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:08:14.456796  543705 disk_worker.go:494] system disk:vda1
I0319 12:08:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:08:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:08:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:08:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:08:16.472475  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:08:19.049672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:08:19.052136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:08:19.052143  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469200 0xc000469240]
E0319 12:08:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:08:23.409805  543705 memory.go:184] no items to output this cycle
I0319 12:08:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 12:08:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:08:33.409782  543705 memory.go:184] no items to output this cycle
I0319 12:08:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 12:08:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:08:43.409798  543705 memory.go:191] Add success.
I0319 12:08:43.409803  543705 cpu.go:282] Add success.
I0319 12:08:43.419892  543705 net.go:648] Add success.
I0319 12:08:43.422618  543705 net.go:770] primary dev: ETH0
I0319 12:08:43.422631  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:08:43.422643  543705 net.go:698] Add success.
I0319 12:08:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:08:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:08:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:08:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:08:53.409810  543705 memory.go:184] no items to output this cycle
I0319 12:08:53.409822  543705 cpu.go:275] no items to output this cycle
E0319 12:09:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:09:03.409807  543705 memory.go:184] no items to output this cycle
I0319 12:09:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:09:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:09:13.409824  543705 memory.go:191] Add success.
I0319 12:09:13.409831  543705 cpu.go:282] Add success.
W0319 12:09:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:09:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:09:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:09:13.420294  543705 net.go:648] Add success.
I0319 12:09:13.422981  543705 net.go:770] primary dev: ETH0
I0319 12:09:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:09:13.423006  543705 net.go:698] Add success.
I0319 12:09:13.617519  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b73645ce-8beb-4713-b469-fdade4da1721","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:09:13.617553  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:09:14.453984  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:09:14.454202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:09:14.454213  543705 disk_worker.go:708] disk space is not compliant
W0319 12:09:14.454216  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:09:14.455578  543705 disk_worker.go:494] system disk:vda1
I0319 12:09:14.455629  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:09:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:09:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:09:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:09:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:09:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:09:19.053672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:09:19.056080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:09:19.056086  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049c280 0xc00049c2c0]
E0319 12:09:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:09:23.409789  543705 cpu.go:275] no items to output this cycle
I0319 12:09:23.409790  543705 memory.go:184] no items to output this cycle
E0319 12:09:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:09:33.409806  543705 memory.go:184] no items to output this cycle
I0319 12:09:33.409822  543705 cpu.go:275] no items to output this cycle
I0319 12:09:37.701734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:09:37.701742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:09:43.410759  543705 memory.go:191] Add success.
I0319 12:09:43.409810  543705 cpu.go:282] Add success.
I0319 12:09:43.420549  543705 net.go:648] Add success.
I0319 12:09:43.423966  543705 net.go:770] primary dev: ETH0
I0319 12:09:43.423980  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:09:43.423993  543705 net.go:698] Add success.
I0319 12:09:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:09:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:09:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:09:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:09:53.409791  543705 memory.go:184] no items to output this cycle
I0319 12:09:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 12:10:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:10:03.409774  543705 memory.go:184] no items to output this cycle
I0319 12:10:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 12:10:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:10:13.409782  543705 memory.go:191] Add success.
W0319 12:10:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:10:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:10:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:10:13.409834  543705 cpu.go:282] Add success.
I0319 12:10:13.420065  543705 net.go:648] Add success.
I0319 12:10:13.422701  543705 net.go:770] primary dev: ETH0
I0319 12:10:13.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:10:13.422733  543705 net.go:698] Add success.
I0319 12:10:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:10:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:10:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0319 12:10:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:10:14.456475  543705 disk_worker.go:494] system disk:vda1
I0319 12:10:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:10:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:10:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:10:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:10:16.472361  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:10:19.057678  543705 disk_info.go:125] begin check local disk info of client
I0319 12:10:19.060056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:10:19.060061  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8a00 0xc0003d8a40]
E0319 12:10:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:10:23.409801  543705 memory.go:184] no items to output this cycle
I0319 12:10:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 12:10:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:10:33.409800  543705 memory.go:184] no items to output this cycle
I0319 12:10:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 12:10:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:10:43.409802  543705 memory.go:191] Add success.
I0319 12:10:43.409805  543705 cpu.go:282] Add success.
I0319 12:10:43.419899  543705 net.go:648] Add success.
I0319 12:10:43.422826  543705 net.go:770] primary dev: ETH0
I0319 12:10:43.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:10:43.422855  543705 net.go:698] Add success.
I0319 12:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:10:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:10:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:10:53.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:10:53.409903  543705 memory.go:184] no items to output this cycle
I0319 12:10:53.409919  543705 cpu.go:275] no items to output this cycle
E0319 12:11:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:11:03.409800  543705 memory.go:184] no items to output this cycle
I0319 12:11:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:11:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:11:13.409832  543705 memory.go:191] Add success.
I0319 12:11:13.409835  543705 cpu.go:282] Add success.
W0319 12:11:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:11:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:11:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:11:13.420164  543705 net.go:648] Add success.
I0319 12:11:13.422817  543705 net.go:770] primary dev: ETH0
I0319 12:11:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:11:13.422845  543705 net.go:698] Add success.
I0319 12:11:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:11:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:11:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 12:11:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:11:14.456497  543705 disk_worker.go:494] system disk:vda1
I0319 12:11:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:11:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:11:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:11:16.472444  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:11:19.061674  543705 disk_info.go:125] begin check local disk info of client
I0319 12:11:19.064051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:11:19.064057  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c59c0 0xc0000c5a00]
E0319 12:11:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:11:23.409802  543705 memory.go:184] no items to output this cycle
I0319 12:11:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 12:11:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:11:33.409789  543705 memory.go:184] no items to output this cycle
I0319 12:11:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 12:11:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:11:43.409806  543705 memory.go:191] Add success.
I0319 12:11:43.409807  543705 cpu.go:282] Add success.
I0319 12:11:43.419873  543705 net.go:648] Add success.
I0319 12:11:43.422666  543705 net.go:770] primary dev: ETH0
I0319 12:11:43.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:11:43.422696  543705 net.go:698] Add success.
I0319 12:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:11:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:11:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:11:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:11:53.409804  543705 memory.go:184] no items to output this cycle
I0319 12:11:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 12:12:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:12:03.409806  543705 memory.go:184] no items to output this cycle
I0319 12:12:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 12:12:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:12:13.409824  543705 memory.go:191] Add success.
I0319 12:12:13.409842  543705 cpu.go:282] Add success.
W0319 12:12:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:12:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:12:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:12:13.420220  543705 net.go:648] Add success.
I0319 12:12:13.423130  543705 net.go:770] primary dev: ETH0
I0319 12:12:13.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:12:13.423156  543705 net.go:698] Add success.
I0319 12:12:13.468961  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b9821d09-e7a7-430f-bb0f-13343796a91d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:12:13.468995  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 12:12:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:12:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 12:12:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:12:14.456011  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:12:14.456020  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:12:14.456025  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:12:14.456453  543705 disk_worker.go:494] system disk:vda1
I0319 12:12:14.456483  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:12:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:12:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:12:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:12:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:12:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:12:16.457982  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:12:16.472308  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:12:19.065673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:12:19.068050  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:12:19.068056  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
E0319 12:12:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:12:23.409782  543705 memory.go:184] no items to output this cycle
I0319 12:12:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 12:12:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:12:33.409818  543705 memory.go:184] no items to output this cycle
I0319 12:12:33.409837  543705 cpu.go:275] no items to output this cycle
I0319 12:12:37.701884  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:12:37.701891  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:12:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:12:43.410802  543705 memory.go:191] Add success.
I0319 12:12:43.409816  543705 cpu.go:282] Add success.
I0319 12:12:43.420514  543705 net.go:648] Add success.
I0319 12:12:43.423368  543705 net.go:770] primary dev: ETH0
I0319 12:12:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:12:43.423394  543705 net.go:698] Add success.
I0319 12:12:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:12:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:12:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:12:53.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:12:53.409901  543705 memory.go:184] no items to output this cycle
I0319 12:12:53.410035  543705 cpu.go:275] no items to output this cycle
E0319 12:13:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:13:03.409772  543705 memory.go:184] no items to output this cycle
I0319 12:13:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 12:13:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:13:13.409803  543705 memory.go:191] Add success.
I0319 12:13:13.409813  543705 cpu.go:282] Add success.
W0319 12:13:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:13:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:13:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:13:13.420287  543705 net.go:648] Add success.
I0319 12:13:13.423039  543705 net.go:770] primary dev: ETH0
I0319 12:13:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:13:13.423063  543705 net.go:698] Add success.
I0319 12:13:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:13:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:13:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 12:13:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:13:14.456543  543705 disk_worker.go:494] system disk:vda1
I0319 12:13:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:13:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:13:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:13:19.069672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:13:19.072070  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:13:19.072077  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa00 0xc0001aaa40]
E0319 12:13:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:13:23.409773  543705 memory.go:184] no items to output this cycle
I0319 12:13:23.409778  543705 cpu.go:275] no items to output this cycle
E0319 12:13:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:13:33.409770  543705 memory.go:184] no items to output this cycle
I0319 12:13:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 12:13:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:13:43.409813  543705 memory.go:191] Add success.
I0319 12:13:43.409821  543705 cpu.go:282] Add success.
I0319 12:13:43.420420  543705 net.go:648] Add success.
I0319 12:13:43.423160  543705 net.go:770] primary dev: ETH0
I0319 12:13:43.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:13:43.423337  543705 net.go:698] Add success.
I0319 12:13:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:13:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:13:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:13:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:13:53.409797  543705 memory.go:184] no items to output this cycle
I0319 12:13:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:14:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:14:03.409812  543705 memory.go:184] no items to output this cycle
I0319 12:14:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 12:14:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:14:13.409789  543705 memory.go:191] Add success.
W0319 12:14:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:14:13.409816  543705 cpu.go:282] Add success.
W0319 12:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:14:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:14:13.420285  543705 net.go:648] Add success.
I0319 12:14:13.423141  543705 net.go:770] primary dev: ETH0
I0319 12:14:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:14:13.423170  543705 net.go:698] Add success.
I0319 12:14:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:14:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:14:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 12:14:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:14:14.456485  543705 disk_worker.go:494] system disk:vda1
I0319 12:14:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:14:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:14:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:14:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:14:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:14:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:14:19.073677  543705 disk_info.go:125] begin check local disk info of client
I0319 12:14:19.076027  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:14:19.076033  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486300 0xc000486340]
E0319 12:14:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:14:23.409789  543705 memory.go:184] no items to output this cycle
I0319 12:14:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 12:14:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:14:33.409782  543705 memory.go:184] no items to output this cycle
I0319 12:14:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:14:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:14:43.409780  543705 memory.go:191] Add success.
I0319 12:14:43.409794  543705 cpu.go:282] Add success.
I0319 12:14:43.420045  543705 net.go:648] Add success.
I0319 12:14:43.421017  543705 net.go:770] primary dev: ETH0
I0319 12:14:43.421029  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:14:43.421041  543705 net.go:698] Add success.
I0319 12:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:14:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:14:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:14:53.409786  543705 memory.go:184] no items to output this cycle
I0319 12:14:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 12:15:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:15:03.409789  543705 memory.go:184] no items to output this cycle
I0319 12:15:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:15:13.409779  543705 memory.go:191] Add success.
I0319 12:15:13.409802  543705 cpu.go:282] Add success.
W0319 12:15:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:15:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:15:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:15:13.420259  543705 net.go:648] Add success.
I0319 12:15:13.422941  543705 net.go:770] primary dev: ETH0
I0319 12:15:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:15:13.422965  543705 net.go:698] Add success.
I0319 12:15:13.463555  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf0b45c5-f413-4f71-bf12-20d9ee1207e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:15:13.463598  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:15:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:15:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 12:15:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:15:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 12:15:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:15:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:15:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:15:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:15:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:15:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:15:19.077671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:15:19.080094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:15:19.080101  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ef080 0xc0001ef0c0]
E0319 12:15:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:15:23.409787  543705 memory.go:184] no items to output this cycle
I0319 12:15:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:15:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:15:33.409791  543705 memory.go:184] no items to output this cycle
I0319 12:15:33.409818  543705 cpu.go:275] no items to output this cycle
I0319 12:15:37.702031  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:15:37.702039  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:15:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:15:43.410726  543705 memory.go:191] Add success.
I0319 12:15:43.409793  543705 cpu.go:282] Add success.
I0319 12:15:43.420601  543705 net.go:648] Add success.
I0319 12:15:43.424590  543705 net.go:770] primary dev: ETH0
I0319 12:15:43.424603  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:15:43.424615  543705 net.go:698] Add success.
I0319 12:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:15:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:15:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:15:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:15:53.409769  543705 memory.go:184] no items to output this cycle
I0319 12:15:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 12:16:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:16:03.409814  543705 memory.go:184] no items to output this cycle
I0319 12:16:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 12:16:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:16:13.409796  543705 cpu.go:282] Add success.
I0319 12:16:13.409802  543705 memory.go:191] Add success.
W0319 12:16:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:16:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:16:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:16:13.420182  543705 net.go:648] Add success.
I0319 12:16:13.423215  543705 net.go:770] primary dev: ETH0
I0319 12:16:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:16:13.423242  543705 net.go:698] Add success.
I0319 12:16:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:16:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:16:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 12:16:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:16:14.456593  543705 disk_worker.go:494] system disk:vda1
I0319 12:16:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:16:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:16:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:16:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:16:19.081671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:16:19.084049  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:16:19.084055  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de900 0xc0003de940]
E0319 12:16:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:16:23.409795  543705 memory.go:184] no items to output this cycle
I0319 12:16:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:16:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:16:33.409786  543705 memory.go:184] no items to output this cycle
I0319 12:16:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 12:16:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:16:43.409789  543705 memory.go:191] Add success.
I0319 12:16:43.409793  543705 cpu.go:282] Add success.
I0319 12:16:43.420189  543705 net.go:648] Add success.
I0319 12:16:43.423136  543705 net.go:770] primary dev: ETH0
I0319 12:16:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:16:43.423173  543705 net.go:698] Add success.
I0319 12:16:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:16:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:16:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:16:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:16:53.409778  543705 memory.go:184] no items to output this cycle
I0319 12:16:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 12:17:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:17:03.409810  543705 memory.go:184] no items to output this cycle
I0319 12:17:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 12:17:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:17:13.409816  543705 memory.go:191] Add success.
I0319 12:17:13.409828  543705 cpu.go:282] Add success.
W0319 12:17:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:17:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:17:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:17:13.420153  543705 net.go:648] Add success.
I0319 12:17:13.422958  543705 net.go:770] primary dev: ETH0
I0319 12:17:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:17:13.422983  543705 net.go:698] Add success.
I0319 12:17:13.453521  543705 event_worker.go:152] Polling the log file for events...
W0319 12:17:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:17:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 12:17:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:17:14.455921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:17:14.455930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:17:14.455936  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:17:14.456562  543705 disk_worker.go:494] system disk:vda1
I0319 12:17:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:17:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:17:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:17:16.457896  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:17:16.457895  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:17:16.457947  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:17:16.457966  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:17:16.472285  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:17:19.085669  543705 disk_info.go:125] begin check local disk info of client
I0319 12:17:19.088004  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:17:19.088012  543705 disk_info.go:196] parse disk info done, disk is : [0xc000251b80 0xc000251bc0]
E0319 12:17:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:17:23.409779  543705 memory.go:184] no items to output this cycle
I0319 12:17:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 12:17:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:17:33.409765  543705 memory.go:184] no items to output this cycle
I0319 12:17:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 12:17:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:17:43.409803  543705 memory.go:191] Add success.
I0319 12:17:43.409805  543705 cpu.go:282] Add success.
I0319 12:17:43.419732  543705 net.go:648] Add success.
I0319 12:17:43.422459  543705 net.go:770] primary dev: ETH0
I0319 12:17:43.422473  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:17:43.422484  543705 net.go:698] Add success.
I0319 12:17:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:17:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:17:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:17:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:17:53.409781  543705 memory.go:184] no items to output this cycle
I0319 12:17:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 12:18:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:18:03.409783  543705 memory.go:184] no items to output this cycle
I0319 12:18:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:18:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:18:13.409784  543705 memory.go:191] Add success.
W0319 12:18:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:18:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:18:13.409822  543705 cpu.go:282] Add success.
I0319 12:18:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:18:13.420162  543705 net.go:648] Add success.
I0319 12:18:13.422829  543705 net.go:770] primary dev: ETH0
I0319 12:18:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:18:13.422860  543705 net.go:698] Add success.
I0319 12:18:13.464071  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2eed5528-3ce8-46a5-b6c1-d62d67137e12","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:18:13.464107  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:18:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:18:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 12:18:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:18:14.456595  543705 disk_worker.go:494] system disk:vda1
I0319 12:18:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:18:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:18:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:18:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:18:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:18:16.472470  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:18:19.089668  543705 disk_info.go:125] begin check local disk info of client
I0319 12:18:19.092134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:18:19.092141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ef080 0xc0003ef0c0]
E0319 12:18:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:18:23.409766  543705 memory.go:184] no items to output this cycle
I0319 12:18:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 12:18:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:18:33.409813  543705 memory.go:184] no items to output this cycle
I0319 12:18:33.409826  543705 cpu.go:275] no items to output this cycle
I0319 12:18:37.702195  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:18:37.702201  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:18:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:18:43.410799  543705 memory.go:191] Add success.
I0319 12:18:43.409802  543705 cpu.go:282] Add success.
I0319 12:18:43.420564  543705 net.go:648] Add success.
I0319 12:18:43.423559  543705 net.go:770] primary dev: ETH0
I0319 12:18:43.423574  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:18:43.423589  543705 net.go:698] Add success.
I0319 12:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:18:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:18:53.409776  543705 memory.go:184] no items to output this cycle
I0319 12:18:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 12:19:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:19:03.409780  543705 memory.go:184] no items to output this cycle
I0319 12:19:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 12:19:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:19:13.409820  543705 memory.go:191] Add success.
I0319 12:19:13.409830  543705 cpu.go:282] Add success.
W0319 12:19:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:19:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:19:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:19:13.420368  543705 net.go:648] Add success.
I0319 12:19:13.423412  543705 net.go:770] primary dev: ETH0
I0319 12:19:13.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:19:13.423441  543705 net.go:698] Add success.
I0319 12:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:19:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:19:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 12:19:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:19:14.456562  543705 disk_worker.go:494] system disk:vda1
I0319 12:19:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:19:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:19:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:19:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:19:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:19:19.093673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:19:19.096061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:19:19.096067  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003deb80 0xc0003debc0]
E0319 12:19:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:19:23.409772  543705 memory.go:184] no items to output this cycle
I0319 12:19:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 12:19:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:19:33.409771  543705 memory.go:184] no items to output this cycle
I0319 12:19:33.409801  543705 cpu.go:275] no items to output this cycle
E0319 12:19:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:19:43.409793  543705 memory.go:191] Add success.
I0319 12:19:43.409825  543705 cpu.go:282] Add success.
I0319 12:19:43.420037  543705 net.go:648] Add success.
I0319 12:19:43.422967  543705 net.go:770] primary dev: ETH0
I0319 12:19:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:19:43.422991  543705 net.go:698] Add success.
I0319 12:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:19:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:19:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:19:53.409794  543705 memory.go:184] no items to output this cycle
I0319 12:19:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:20:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:20:03.409789  543705 memory.go:184] no items to output this cycle
I0319 12:20:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:20:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:20:13.409829  543705 memory.go:191] Add success.
I0319 12:20:13.409842  543705 cpu.go:282] Add success.
W0319 12:20:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:20:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:20:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:20:13.420223  543705 net.go:648] Add success.
I0319 12:20:13.423230  543705 net.go:770] primary dev: ETH0
I0319 12:20:13.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:20:13.423259  543705 net.go:698] Add success.
I0319 12:20:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:20:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:20:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0319 12:20:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:20:14.456616  543705 disk_worker.go:494] system disk:vda1
I0319 12:20:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:20:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:20:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:20:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:20:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:20:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:20:19.097671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:20:19.100061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:20:19.100066  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cfb00 0xc0003cfb40]
E0319 12:20:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:20:23.409797  543705 memory.go:184] no items to output this cycle
I0319 12:20:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:20:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:20:33.409790  543705 memory.go:184] no items to output this cycle
I0319 12:20:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:20:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:20:43.409817  543705 memory.go:191] Add success.
I0319 12:20:43.409825  543705 cpu.go:282] Add success.
I0319 12:20:43.420007  543705 net.go:648] Add success.
I0319 12:20:43.422882  543705 net.go:770] primary dev: ETH0
I0319 12:20:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:20:43.422911  543705 net.go:698] Add success.
I0319 12:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:20:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:20:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:20:53.409779  543705 memory.go:184] no items to output this cycle
I0319 12:20:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 12:21:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:21:03.409778  543705 memory.go:184] no items to output this cycle
I0319 12:21:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 12:21:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:21:13.409779  543705 memory.go:191] Add success.
W0319 12:21:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:21:13.409810  543705 cpu.go:282] Add success.
W0319 12:21:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:21:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:21:13.419688  543705 net.go:648] Add success.
I0319 12:21:13.422445  543705 net.go:770] primary dev: ETH0
I0319 12:21:13.422458  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:21:13.422472  543705 net.go:698] Add success.
I0319 12:21:13.463426  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ae01966-2b32-45b4-ba46-6d6a1faa0f27","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:21:13.463460  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:21:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:21:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:21:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 12:21:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:21:14.456629  543705 disk_worker.go:494] system disk:vda1
I0319 12:21:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:21:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:21:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:21:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:21:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:21:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:21:19.101673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:21:19.104072  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:21:19.104079  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382d80 0xc000382dc0]
E0319 12:21:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:21:23.409793  543705 memory.go:184] no items to output this cycle
I0319 12:21:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:21:33.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:21:33.409913  543705 cpu.go:275] no items to output this cycle
I0319 12:21:33.409987  543705 memory.go:184] no items to output this cycle
I0319 12:21:37.702335  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:21:37.702343  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:21:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:21:43.410699  543705 memory.go:191] Add success.
I0319 12:21:43.409829  543705 cpu.go:282] Add success.
I0319 12:21:43.420483  543705 net.go:648] Add success.
I0319 12:21:43.423280  543705 net.go:770] primary dev: ETH0
I0319 12:21:43.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:21:43.423312  543705 net.go:698] Add success.
I0319 12:21:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:21:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:21:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:21:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:21:53.409782  543705 memory.go:184] no items to output this cycle
I0319 12:21:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 12:22:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:22:03.409809  543705 memory.go:184] no items to output this cycle
I0319 12:22:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 12:22:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:22:13.409777  543705 memory.go:191] Add success.
I0319 12:22:13.409797  543705 cpu.go:282] Add success.
W0319 12:22:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:22:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:22:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:22:13.420145  543705 net.go:648] Add success.
I0319 12:22:13.422959  543705 net.go:770] primary dev: ETH0
I0319 12:22:13.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:22:13.422989  543705 net.go:698] Add success.
W0319 12:22:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:22:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 12:22:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:22:14.455875  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:22:14.455884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:22:14.455889  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:22:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 12:22:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:22:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:22:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:22:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:22:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:22:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:22:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:22:16.472339  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:22:19.105673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:22:19.108033  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:22:19.108039  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272400 0xc000272440]
E0319 12:22:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:22:23.409794  543705 memory.go:184] no items to output this cycle
I0319 12:22:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 12:22:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:22:33.409910  543705 memory.go:184] no items to output this cycle
I0319 12:22:33.409964  543705 cpu.go:275] no items to output this cycle
E0319 12:22:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:22:43.409796  543705 cpu.go:282] Add success.
I0319 12:22:43.409803  543705 memory.go:191] Add success.
I0319 12:22:43.419908  543705 net.go:648] Add success.
I0319 12:22:43.422685  543705 net.go:770] primary dev: ETH0
I0319 12:22:43.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:22:43.422709  543705 net.go:698] Add success.
I0319 12:22:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:22:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:22:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:22:53.409768  543705 memory.go:184] no items to output this cycle
I0319 12:22:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:23:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:23:03.409784  543705 memory.go:184] no items to output this cycle
I0319 12:23:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 12:23:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:23:13.409814  543705 memory.go:191] Add success.
I0319 12:23:13.409814  543705 cpu.go:282] Add success.
W0319 12:23:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:23:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:23:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:23:13.420134  543705 net.go:648] Add success.
I0319 12:23:13.423177  543705 net.go:770] primary dev: ETH0
I0319 12:23:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:23:13.423208  543705 net.go:698] Add success.
I0319 12:23:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:23:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:23:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 12:23:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:23:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 12:23:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:23:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:23:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:23:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:23:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:23:19.109670  543705 disk_info.go:125] begin check local disk info of client
I0319 12:23:19.111961  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:23:19.111967  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035bb80 0xc00035bbc0]
E0319 12:23:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:23:23.409769  543705 memory.go:184] no items to output this cycle
I0319 12:23:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 12:23:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:23:33.409774  543705 memory.go:184] no items to output this cycle
I0319 12:23:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 12:23:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:23:43.409828  543705 memory.go:191] Add success.
I0319 12:23:43.409838  543705 cpu.go:282] Add success.
I0319 12:23:43.419979  543705 net.go:648] Add success.
I0319 12:23:43.422941  543705 net.go:770] primary dev: ETH0
I0319 12:23:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:23:43.422975  543705 net.go:698] Add success.
I0319 12:23:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:23:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:23:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:23:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:23:53.409799  543705 memory.go:184] no items to output this cycle
I0319 12:23:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 12:24:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:24:03.409783  543705 memory.go:184] no items to output this cycle
I0319 12:24:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 12:24:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:24:13.409790  543705 memory.go:191] Add success.
I0319 12:24:13.409808  543705 cpu.go:282] Add success.
W0319 12:24:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:24:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:24:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:24:13.420061  543705 net.go:648] Add success.
I0319 12:24:13.422931  543705 net.go:770] primary dev: ETH0
I0319 12:24:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:24:13.422959  543705 net.go:698] Add success.
I0319 12:24:13.480209  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"966e98ff-eff3-4044-8d68-53b1fbe217bd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:24:13.480244  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:24:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:24:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:24:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 12:24:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:24:14.456636  543705 disk_worker.go:494] system disk:vda1
I0319 12:24:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:24:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:24:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:24:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:24:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:24:19.113671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:24:19.116058  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:24:19.116064  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252c00 0xc000252c40]
E0319 12:24:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:24:23.409794  543705 memory.go:184] no items to output this cycle
I0319 12:24:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:24:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:24:33.409782  543705 memory.go:184] no items to output this cycle
I0319 12:24:33.409795  543705 cpu.go:275] no items to output this cycle
I0319 12:24:37.703221  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:24:37.703227  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:24:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:24:43.410809  543705 memory.go:191] Add success.
I0319 12:24:43.409805  543705 cpu.go:282] Add success.
I0319 12:24:43.420500  543705 net.go:648] Add success.
I0319 12:24:43.423349  543705 net.go:770] primary dev: ETH0
I0319 12:24:43.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:24:43.423380  543705 net.go:698] Add success.
I0319 12:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:24:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:24:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:24:53.409799  543705 memory.go:184] no items to output this cycle
I0319 12:24:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 12:25:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:25:03.409794  543705 memory.go:184] no items to output this cycle
I0319 12:25:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 12:25:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:25:13.409790  543705 memory.go:191] Add success.
I0319 12:25:13.409811  543705 cpu.go:282] Add success.
W0319 12:25:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:25:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:25:13.420168  543705 net.go:648] Add success.
I0319 12:25:13.422975  543705 net.go:770] primary dev: ETH0
I0319 12:25:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:25:13.423003  543705 net.go:698] Add success.
I0319 12:25:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:25:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:25:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 12:25:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:25:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 12:25:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:25:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:25:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:25:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:25:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:25:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:25:19.117669  543705 disk_info.go:125] begin check local disk info of client
I0319 12:25:19.120011  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:25:19.120017  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307000 0xc000307040]
E0319 12:25:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:25:23.409801  543705 memory.go:184] no items to output this cycle
I0319 12:25:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 12:25:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:25:33.409784  543705 memory.go:184] no items to output this cycle
I0319 12:25:33.409790  543705 cpu.go:275] no items to output this cycle
E0319 12:25:43.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:25:43.409920  543705 memory.go:191] Add success.
I0319 12:25:43.409932  543705 cpu.go:282] Add success.
I0319 12:25:43.419712  543705 net.go:648] Add success.
I0319 12:25:43.422479  543705 net.go:770] primary dev: ETH0
I0319 12:25:43.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:25:43.422503  543705 net.go:698] Add success.
I0319 12:25:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:25:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:25:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:25:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:25:53.409774  543705 memory.go:184] no items to output this cycle
I0319 12:25:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:26:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:26:03.409812  543705 memory.go:184] no items to output this cycle
I0319 12:26:03.409828  543705 cpu.go:275] no items to output this cycle
E0319 12:26:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:26:13.409772  543705 memory.go:191] Add success.
W0319 12:26:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:26:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:26:13.409809  543705 cpu.go:282] Add success.
I0319 12:26:13.409810  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:26:13.420112  543705 net.go:648] Add success.
I0319 12:26:13.422828  543705 net.go:770] primary dev: ETH0
I0319 12:26:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:26:13.422856  543705 net.go:698] Add success.
I0319 12:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:26:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:26:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0319 12:26:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:26:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 12:26:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:26:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:26:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:26:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:26:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:26:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:26:19.121673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:26:19.123988  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:26:19.123994  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306d80 0xc000306dc0]
E0319 12:26:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:26:23.409795  543705 memory.go:184] no items to output this cycle
I0319 12:26:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:26:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:26:33.409809  543705 memory.go:184] no items to output this cycle
I0319 12:26:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 12:26:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:26:43.409887  543705 cpu.go:282] Add success.
I0319 12:26:43.409894  543705 memory.go:191] Add success.
I0319 12:26:43.419732  543705 net.go:648] Add success.
I0319 12:26:43.422985  543705 net.go:770] primary dev: ETH0
I0319 12:26:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:26:43.423012  543705 net.go:698] Add success.
I0319 12:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:26:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:26:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:26:53.409768  543705 memory.go:184] no items to output this cycle
I0319 12:26:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 12:27:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:27:03.409789  543705 memory.go:184] no items to output this cycle
I0319 12:27:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 12:27:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:27:13.409781  543705 memory.go:191] Add success.
I0319 12:27:13.409803  543705 cpu.go:282] Add success.
W0319 12:27:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:27:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:27:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:27:13.420176  543705 net.go:648] Add success.
I0319 12:27:13.423281  543705 net.go:770] primary dev: ETH0
I0319 12:27:13.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:27:13.423307  543705 net.go:698] Add success.
I0319 12:27:13.429702  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 12:27:13.452938  543705 event_worker.go:152] Polling the log file for events...
I0319 12:27:13.468361  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5be117a5-59be-4a36-8e53-058b9a411dba","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:27:13.468394  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 12:27:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:27:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 12:27:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:27:14.456967  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:27:14.456976  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:27:14.456981  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:27:14.456982  543705 disk_worker.go:494] system disk:vda1
I0319 12:27:14.457011  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:27:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:27:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:27:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:27:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:27:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:27:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:27:16.472353  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:27:19.125671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:27:19.128005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:27:19.128011  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508d00 0xc000508d40]
E0319 12:27:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:27:23.409771  543705 memory.go:184] no items to output this cycle
I0319 12:27:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 12:27:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:27:33.409801  543705 memory.go:184] no items to output this cycle
I0319 12:27:33.409817  543705 cpu.go:275] no items to output this cycle
I0319 12:27:37.704229  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:27:37.704236  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:27:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:27:43.410806  543705 memory.go:191] Add success.
I0319 12:27:43.409791  543705 cpu.go:282] Add success.
I0319 12:27:43.419771  543705 net.go:648] Add success.
I0319 12:27:43.422660  543705 net.go:770] primary dev: ETH0
I0319 12:27:43.422686  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:27:43.422701  543705 net.go:698] Add success.
I0319 12:27:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:27:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:27:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:27:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:27:53.409785  543705 memory.go:184] no items to output this cycle
I0319 12:27:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 12:28:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:28:03.409800  543705 memory.go:184] no items to output this cycle
I0319 12:28:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:28:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:28:13.409827  543705 memory.go:191] Add success.
I0319 12:28:13.409835  543705 cpu.go:282] Add success.
W0319 12:28:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:28:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:28:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:28:13.420134  543705 net.go:648] Add success.
I0319 12:28:13.423046  543705 net.go:770] primary dev: ETH0
I0319 12:28:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:28:13.423076  543705 net.go:698] Add success.
I0319 12:28:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:28:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:28:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 12:28:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:28:14.456489  543705 disk_worker.go:494] system disk:vda1
I0319 12:28:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:28:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:28:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:28:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:28:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:28:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:28:19.129672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:28:19.132031  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:28:19.132037  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481740 0xc000481780]
E0319 12:28:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:28:23.409805  543705 memory.go:184] no items to output this cycle
I0319 12:28:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 12:28:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:28:33.409821  543705 memory.go:184] no items to output this cycle
I0319 12:28:33.409835  543705 cpu.go:275] no items to output this cycle
E0319 12:28:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:28:43.409786  543705 memory.go:191] Add success.
I0319 12:28:43.409811  543705 cpu.go:282] Add success.
I0319 12:28:43.420190  543705 net.go:648] Add success.
I0319 12:28:43.423092  543705 net.go:770] primary dev: ETH0
I0319 12:28:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:28:43.423116  543705 net.go:698] Add success.
I0319 12:28:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:28:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:28:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:28:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:28:53.409811  543705 memory.go:184] no items to output this cycle
I0319 12:28:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 12:29:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:29:03.409787  543705 memory.go:184] no items to output this cycle
I0319 12:29:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:29:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:29:13.409824  543705 memory.go:191] Add success.
I0319 12:29:13.409834  543705 cpu.go:282] Add success.
W0319 12:29:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:29:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:29:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:29:13.420139  543705 net.go:648] Add success.
I0319 12:29:13.423037  543705 net.go:770] primary dev: ETH0
I0319 12:29:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:29:13.423062  543705 net.go:698] Add success.
I0319 12:29:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:29:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:29:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 12:29:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:29:14.456564  543705 disk_worker.go:494] system disk:vda1
I0319 12:29:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:29:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:29:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:29:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:29:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:29:19.133672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:29:19.136014  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:29:19.136021  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003978c0 0xc000397c40]
E0319 12:29:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:29:23.409777  543705 memory.go:184] no items to output this cycle
I0319 12:29:23.409782  543705 cpu.go:275] no items to output this cycle
E0319 12:29:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:29:33.409779  543705 memory.go:184] no items to output this cycle
I0319 12:29:33.409781  543705 cpu.go:275] no items to output this cycle
E0319 12:29:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:29:43.409787  543705 memory.go:191] Add success.
I0319 12:29:43.409803  543705 cpu.go:282] Add success.
I0319 12:29:43.419891  543705 net.go:648] Add success.
I0319 12:29:43.423187  543705 net.go:770] primary dev: ETH0
I0319 12:29:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:29:43.423220  543705 net.go:698] Add success.
I0319 12:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:29:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:29:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:29:53.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:29:53.410255  543705 memory.go:184] no items to output this cycle
I0319 12:29:53.410285  543705 cpu.go:275] no items to output this cycle
E0319 12:30:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:30:03.409799  543705 memory.go:184] no items to output this cycle
I0319 12:30:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 12:30:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:30:13.409781  543705 memory.go:191] Add success.
W0319 12:30:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:30:13.409810  543705 cpu.go:282] Add success.
W0319 12:30:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:30:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:30:13.420162  543705 net.go:648] Add success.
I0319 12:30:13.423081  543705 net.go:770] primary dev: ETH0
I0319 12:30:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:30:13.423107  543705 net.go:698] Add success.
I0319 12:30:13.518421  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2ba94a5f-a500-49e0-9ad9-951c6b9de644","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:30:13.518454  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:30:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:30:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:30:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0319 12:30:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:30:14.456614  543705 disk_worker.go:494] system disk:vda1
I0319 12:30:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:30:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:30:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:30:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:30:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:30:19.137672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:30:19.140034  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:30:19.140040  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abe00 0xc0001abe40]
E0319 12:30:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:30:23.409788  543705 memory.go:184] no items to output this cycle
I0319 12:30:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:30:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:30:33.409811  543705 memory.go:184] no items to output this cycle
I0319 12:30:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 12:30:37.705219  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:30:37.705226  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:30:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:30:43.411020  543705 memory.go:191] Add success.
I0319 12:30:43.409814  543705 cpu.go:282] Add success.
I0319 12:30:43.419698  543705 net.go:648] Add success.
I0319 12:30:43.422451  543705 net.go:770] primary dev: ETH0
I0319 12:30:43.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:30:43.422479  543705 net.go:698] Add success.
I0319 12:30:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:30:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:30:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:30:53.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:30:53.409892  543705 memory.go:184] no items to output this cycle
I0319 12:30:53.410029  543705 cpu.go:275] no items to output this cycle
E0319 12:31:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:31:03.409773  543705 memory.go:184] no items to output this cycle
I0319 12:31:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 12:31:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:31:13.409775  543705 memory.go:191] Add success.
W0319 12:31:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:31:13.409808  543705 cpu.go:282] Add success.
W0319 12:31:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:31:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:31:13.420171  543705 net.go:648] Add success.
I0319 12:31:13.422912  543705 net.go:770] primary dev: ETH0
I0319 12:31:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:31:13.422949  543705 net.go:698] Add success.
I0319 12:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:31:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:31:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 12:31:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:31:14.456499  543705 disk_worker.go:494] system disk:vda1
I0319 12:31:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:31:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:31:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:31:19.141674  543705 disk_info.go:125] begin check local disk info of client
I0319 12:31:19.144041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:31:19.144048  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab8c0 0xc0001ab900]
E0319 12:31:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:31:23.409764  543705 memory.go:184] no items to output this cycle
I0319 12:31:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 12:31:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:31:33.409779  543705 memory.go:184] no items to output this cycle
I0319 12:31:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 12:31:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:31:43.409813  543705 memory.go:191] Add success.
I0319 12:31:43.409822  543705 cpu.go:282] Add success.
I0319 12:31:43.419970  543705 net.go:648] Add success.
I0319 12:31:43.423258  543705 net.go:770] primary dev: ETH0
I0319 12:31:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:31:43.423283  543705 net.go:698] Add success.
I0319 12:31:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:31:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:31:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:31:53.410464  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:31:53.410579  543705 cpu.go:275] no items to output this cycle
I0319 12:31:53.410591  543705 memory.go:184] no items to output this cycle
E0319 12:32:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:32:03.409813  543705 memory.go:184] no items to output this cycle
I0319 12:32:03.409830  543705 cpu.go:275] no items to output this cycle
E0319 12:32:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:32:13.409813  543705 memory.go:191] Add success.
I0319 12:32:13.409829  543705 cpu.go:282] Add success.
W0319 12:32:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:32:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:32:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:32:13.420198  543705 net.go:648] Add success.
I0319 12:32:13.422853  543705 net.go:770] primary dev: ETH0
I0319 12:32:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:32:13.422878  543705 net.go:698] Add success.
W0319 12:32:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:32:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 12:32:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:32:14.456899  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:32:14.456908  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:32:14.456915  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:32:14.456996  543705 disk_worker.go:494] system disk:vda1
I0319 12:32:14.457050  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:32:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:32:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:32:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:32:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:32:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:32:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:32:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:32:19.145671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:32:19.148047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:32:19.148053  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee380 0xc0003ee3c0]
E0319 12:32:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:32:23.409771  543705 memory.go:184] no items to output this cycle
I0319 12:32:23.409782  543705 cpu.go:275] no items to output this cycle
E0319 12:32:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:32:33.409810  543705 memory.go:184] no items to output this cycle
I0319 12:32:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 12:32:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:32:43.409775  543705 memory.go:191] Add success.
I0319 12:32:43.409814  543705 cpu.go:282] Add success.
I0319 12:32:43.419848  543705 net.go:648] Add success.
I0319 12:32:43.422669  543705 net.go:770] primary dev: ETH0
I0319 12:32:43.422683  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:32:43.422696  543705 net.go:698] Add success.
I0319 12:32:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:32:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:32:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:32:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:32:53.409773  543705 memory.go:184] no items to output this cycle
I0319 12:32:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:33:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:33:03.409793  543705 memory.go:184] no items to output this cycle
I0319 12:33:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 12:33:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:33:13.409813  543705 memory.go:191] Add success.
I0319 12:33:13.409817  543705 cpu.go:282] Add success.
W0319 12:33:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:33:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:33:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:33:13.420248  543705 net.go:648] Add success.
I0319 12:33:13.423032  543705 net.go:770] primary dev: ETH0
I0319 12:33:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:33:13.423057  543705 net.go:698] Add success.
I0319 12:33:13.471691  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ea0ee75-6b42-4099-abf7-a204be223434","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:33:13.471724  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:33:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:33:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:33:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0319 12:33:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:33:14.456634  543705 disk_worker.go:494] system disk:vda1
I0319 12:33:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:33:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:33:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:33:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:33:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:33:19.149672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:33:19.152130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:33:19.152136  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad40 0xc00007ad80]
E0319 12:33:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:33:23.409765  543705 memory.go:184] no items to output this cycle
I0319 12:33:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 12:33:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:33:33.409785  543705 memory.go:184] no items to output this cycle
I0319 12:33:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 12:33:37.705730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:33:37.705736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:33:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:33:43.410699  543705 memory.go:191] Add success.
I0319 12:33:43.409791  543705 cpu.go:282] Add success.
I0319 12:33:43.420395  543705 net.go:648] Add success.
I0319 12:33:43.423057  543705 net.go:770] primary dev: ETH0
I0319 12:33:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:33:43.423096  543705 net.go:698] Add success.
I0319 12:33:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:33:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:33:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:33:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:33:53.409776  543705 memory.go:184] no items to output this cycle
I0319 12:33:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 12:34:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:34:03.409788  543705 memory.go:184] no items to output this cycle
I0319 12:34:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:34:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:34:13.409816  543705 memory.go:191] Add success.
I0319 12:34:13.409819  543705 cpu.go:282] Add success.
W0319 12:34:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:34:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:34:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:34:13.420163  543705 net.go:648] Add success.
I0319 12:34:13.423282  543705 net.go:770] primary dev: ETH0
I0319 12:34:13.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:34:13.423312  543705 net.go:698] Add success.
I0319 12:34:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:34:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:34:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 12:34:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:34:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 12:34:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:34:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:34:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:34:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:34:19.153671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:34:19.156042  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:34:19.156048  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007afc0 0xc00007b000]
E0319 12:34:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:34:23.409758  543705 memory.go:184] no items to output this cycle
I0319 12:34:23.409787  543705 cpu.go:275] no items to output this cycle
E0319 12:34:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:34:33.409782  543705 memory.go:184] no items to output this cycle
I0319 12:34:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 12:34:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:34:43.409808  543705 memory.go:191] Add success.
I0319 12:34:43.409816  543705 cpu.go:282] Add success.
I0319 12:34:43.419948  543705 net.go:648] Add success.
I0319 12:34:43.422804  543705 net.go:770] primary dev: ETH0
I0319 12:34:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:34:43.422830  543705 net.go:698] Add success.
I0319 12:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:34:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:34:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:34:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:34:53.409805  543705 memory.go:184] no items to output this cycle
I0319 12:34:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 12:35:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:35:03.409808  543705 memory.go:184] no items to output this cycle
I0319 12:35:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 12:35:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:35:13.409813  543705 memory.go:191] Add success.
I0319 12:35:13.409822  543705 cpu.go:282] Add success.
W0319 12:35:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:35:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:35:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:35:13.420179  543705 net.go:648] Add success.
I0319 12:35:13.423051  543705 net.go:770] primary dev: ETH0
I0319 12:35:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:35:13.423076  543705 net.go:698] Add success.
I0319 12:35:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:35:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:35:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 12:35:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:35:14.456588  543705 disk_worker.go:494] system disk:vda1
I0319 12:35:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:35:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:35:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:35:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:35:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:35:19.157673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:35:19.160036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:35:19.160042  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab40 0xc00007ab80]
E0319 12:35:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:35:23.409774  543705 memory.go:184] no items to output this cycle
I0319 12:35:23.409774  543705 cpu.go:275] no items to output this cycle
E0319 12:35:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:35:33.409770  543705 memory.go:184] no items to output this cycle
I0319 12:35:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 12:35:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:35:43.409792  543705 memory.go:191] Add success.
I0319 12:35:43.409793  543705 cpu.go:282] Add success.
I0319 12:35:43.419984  543705 net.go:648] Add success.
I0319 12:35:43.423133  543705 net.go:770] primary dev: ETH0
I0319 12:35:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:35:43.423160  543705 net.go:698] Add success.
I0319 12:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:35:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:35:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:35:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:35:53.409902  543705 cpu.go:275] no items to output this cycle
I0319 12:35:53.409905  543705 memory.go:184] no items to output this cycle
E0319 12:36:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:36:03.409795  543705 memory.go:184] no items to output this cycle
I0319 12:36:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 12:36:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:36:13.409816  543705 memory.go:191] Add success.
I0319 12:36:13.409834  543705 cpu.go:282] Add success.
W0319 12:36:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:36:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:36:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:36:13.420371  543705 net.go:648] Add success.
I0319 12:36:13.423552  543705 net.go:770] primary dev: ETH0
I0319 12:36:13.423565  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:36:13.423578  543705 net.go:698] Add success.
I0319 12:36:13.463962  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7ec9f1ae-96a9-4401-8c45-0ffd2de2eaf0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:36:13.463995  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:36:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:36:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:36:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 12:36:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:36:14.456521  543705 disk_worker.go:494] system disk:vda1
I0319 12:36:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:36:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:36:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:36:19.161692  543705 disk_info.go:125] begin check local disk info of client
I0319 12:36:19.164055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:36:19.164061  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003efbc0 0xc0003efc00]
E0319 12:36:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:36:23.409798  543705 memory.go:184] no items to output this cycle
I0319 12:36:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:36:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:36:33.409780  543705 memory.go:184] no items to output this cycle
I0319 12:36:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 12:36:37.707233  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:36:37.707239  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:36:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:36:43.410871  543705 memory.go:191] Add success.
I0319 12:36:43.410002  543705 cpu.go:282] Add success.
I0319 12:36:43.419734  543705 net.go:648] Add success.
I0319 12:36:43.422614  543705 net.go:770] primary dev: ETH0
I0319 12:36:43.422628  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:36:43.422642  543705 net.go:698] Add success.
I0319 12:36:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:36:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:36:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:36:53.409785  543705 cpu.go:275] no items to output this cycle
I0319 12:36:53.409794  543705 memory.go:184] no items to output this cycle
E0319 12:37:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:37:03.409793  543705 cpu.go:275] no items to output this cycle
I0319 12:37:03.409797  543705 memory.go:184] no items to output this cycle
W0319 12:37:13.409701  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:37:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:37:13.409722  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 12:37:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:37:13.409816  543705 memory.go:191] Add success.
I0319 12:37:13.409826  543705 cpu.go:282] Add success.
I0319 12:37:13.420105  543705 net.go:648] Add success.
I0319 12:37:13.422937  543705 net.go:770] primary dev: ETH0
I0319 12:37:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:37:13.422962  543705 net.go:698] Add success.
I0319 12:37:13.453539  543705 event_worker.go:152] Polling the log file for events...
W0319 12:37:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:37:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 12:37:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:37:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:37:14.455898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:37:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:37:14.456567  543705 disk_worker.go:494] system disk:vda1
I0319 12:37:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:37:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:37:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:37:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:37:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:37:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:37:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:37:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:37:19.165674  543705 disk_info.go:125] begin check local disk info of client
I0319 12:37:19.168010  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:37:19.168016  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267380 0xc0002673c0]
E0319 12:37:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:37:23.409900  543705 cpu.go:275] no items to output this cycle
I0319 12:37:23.409905  543705 memory.go:184] no items to output this cycle
E0319 12:37:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:37:33.409784  543705 memory.go:184] no items to output this cycle
I0319 12:37:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 12:37:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:37:43.409785  543705 memory.go:191] Add success.
I0319 12:37:43.409817  543705 cpu.go:282] Add success.
I0319 12:37:43.419886  543705 net.go:648] Add success.
I0319 12:37:43.422746  543705 net.go:770] primary dev: ETH0
I0319 12:37:43.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:37:43.422774  543705 net.go:698] Add success.
I0319 12:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:37:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:37:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:37:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:37:53.409821  543705 memory.go:184] no items to output this cycle
I0319 12:37:53.409831  543705 cpu.go:275] no items to output this cycle
E0319 12:38:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:38:03.409791  543705 memory.go:184] no items to output this cycle
I0319 12:38:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 12:38:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:38:13.409786  543705 memory.go:191] Add success.
W0319 12:38:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:38:13.409821  543705 cpu.go:282] Add success.
W0319 12:38:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:38:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:38:13.420231  543705 net.go:648] Add success.
I0319 12:38:13.423148  543705 net.go:770] primary dev: ETH0
I0319 12:38:13.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:38:13.423173  543705 net.go:698] Add success.
I0319 12:38:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:38:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:38:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 12:38:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:38:14.456503  543705 disk_worker.go:494] system disk:vda1
I0319 12:38:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:38:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:38:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:38:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:38:19.169674  543705 disk_info.go:125] begin check local disk info of client
I0319 12:38:19.172045  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:38:19.172051  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6140 0xc0003b6180]
E0319 12:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:38:23.409790  543705 memory.go:184] no items to output this cycle
I0319 12:38:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:38:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:38:33.409792  543705 memory.go:184] no items to output this cycle
I0319 12:38:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:38:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:38:43.409810  543705 memory.go:191] Add success.
I0319 12:38:43.409815  543705 cpu.go:282] Add success.
I0319 12:38:43.419884  543705 net.go:648] Add success.
I0319 12:38:43.422901  543705 net.go:770] primary dev: ETH0
I0319 12:38:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:38:43.422930  543705 net.go:698] Add success.
I0319 12:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:38:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:38:53.410352  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:38:53.410368  543705 memory.go:184] no items to output this cycle
I0319 12:38:53.410375  543705 cpu.go:275] no items to output this cycle
E0319 12:39:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:39:03.409789  543705 memory.go:184] no items to output this cycle
I0319 12:39:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 12:39:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:39:13.409778  543705 memory.go:191] Add success.
W0319 12:39:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:39:13.409811  543705 cpu.go:282] Add success.
W0319 12:39:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:39:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:39:13.420104  543705 net.go:648] Add success.
I0319 12:39:13.423242  543705 net.go:770] primary dev: ETH0
I0319 12:39:13.423255  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:39:13.423267  543705 net.go:698] Add success.
I0319 12:39:13.470626  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"17b329d2-faef-4784-96d3-c16b8f8daa07","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:39:13.470660  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:39:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:39:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:39:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 12:39:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:39:14.456592  543705 disk_worker.go:494] system disk:vda1
I0319 12:39:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:39:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:39:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:39:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:39:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:39:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:39:19.173672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:39:19.176051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:39:19.176057  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f64c0 0xc0004f6500]
E0319 12:39:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:39:23.409795  543705 memory.go:184] no items to output this cycle
I0319 12:39:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:39:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:39:33.409794  543705 memory.go:184] no items to output this cycle
I0319 12:39:33.409808  543705 cpu.go:275] no items to output this cycle
I0319 12:39:37.708248  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:39:37.708256  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:39:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:39:43.410777  543705 memory.go:191] Add success.
I0319 12:39:43.409815  543705 cpu.go:282] Add success.
I0319 12:39:43.420505  543705 net.go:648] Add success.
I0319 12:39:43.423150  543705 net.go:770] primary dev: ETH0
I0319 12:39:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:39:43.423175  543705 net.go:698] Add success.
I0319 12:39:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:39:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:39:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:39:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:39:53.409771  543705 memory.go:184] no items to output this cycle
I0319 12:39:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 12:40:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:40:03.409774  543705 memory.go:184] no items to output this cycle
I0319 12:40:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:40:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:40:13.409792  543705 memory.go:191] Add success.
I0319 12:40:13.409797  543705 cpu.go:282] Add success.
W0319 12:40:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:40:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:40:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:40:13.420045  543705 net.go:648] Add success.
I0319 12:40:13.422873  543705 net.go:770] primary dev: ETH0
I0319 12:40:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:40:13.422898  543705 net.go:698] Add success.
I0319 12:40:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:40:14.455325  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:40:14.455427  543705 disk_worker.go:708] disk space is not compliant
W0319 12:40:14.455437  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:40:14.457053  543705 disk_worker.go:494] system disk:vda1
I0319 12:40:14.457081  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:40:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:40:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:40:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:40:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:40:16.472464  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:40:19.177672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:40:19.180039  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:40:19.180045  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374380 0xc0003743c0]
E0319 12:40:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:40:23.409765  543705 memory.go:184] no items to output this cycle
I0319 12:40:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 12:40:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:40:33.409783  543705 memory.go:184] no items to output this cycle
I0319 12:40:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:40:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:40:43.409789  543705 memory.go:191] Add success.
I0319 12:40:43.409795  543705 cpu.go:282] Add success.
I0319 12:40:43.419847  543705 net.go:648] Add success.
I0319 12:40:43.422680  543705 net.go:770] primary dev: ETH0
I0319 12:40:43.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:40:43.422706  543705 net.go:698] Add success.
I0319 12:40:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:40:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:40:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:40:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:40:53.409811  543705 memory.go:184] no items to output this cycle
I0319 12:40:53.409819  543705 cpu.go:275] no items to output this cycle
E0319 12:41:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:41:03.409813  543705 memory.go:184] no items to output this cycle
I0319 12:41:03.409827  543705 cpu.go:275] no items to output this cycle
W0319 12:41:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:41:13.409735  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:41:13.409741  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:41:13.409801  543705 cpu.go:282] Add success.
E0319 12:41:13.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:41:13.409864  543705 memory.go:191] Add success.
I0319 12:41:13.420038  543705 net.go:648] Add success.
I0319 12:41:13.423060  543705 net.go:770] primary dev: ETH0
I0319 12:41:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:41:13.423087  543705 net.go:698] Add success.
I0319 12:41:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:41:14.455470  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:41:14.455484  543705 disk_worker.go:708] disk space is not compliant
W0319 12:41:14.455493  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:41:14.457064  543705 disk_worker.go:494] system disk:vda1
I0319 12:41:14.457092  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:41:15.454976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:41:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:41:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:41:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:41:19.181671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:41:19.184057  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:41:19.184063  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e100 0xc00034e140]
E0319 12:41:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:41:23.409784  543705 memory.go:184] no items to output this cycle
I0319 12:41:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 12:41:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:41:33.409811  543705 memory.go:184] no items to output this cycle
I0319 12:41:33.409823  543705 cpu.go:275] no items to output this cycle
E0319 12:41:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:41:43.409788  543705 memory.go:191] Add success.
I0319 12:41:43.409813  543705 cpu.go:282] Add success.
I0319 12:41:43.419885  543705 net.go:648] Add success.
I0319 12:41:43.423206  543705 net.go:770] primary dev: ETH0
I0319 12:41:43.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:41:43.423233  543705 net.go:698] Add success.
I0319 12:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:41:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:41:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:41:53.409778  543705 memory.go:184] no items to output this cycle
I0319 12:41:53.409777  543705 cpu.go:275] no items to output this cycle
E0319 12:42:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:42:03.409786  543705 memory.go:184] no items to output this cycle
I0319 12:42:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 12:42:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:42:13.409808  543705 memory.go:191] Add success.
I0319 12:42:13.409816  543705 cpu.go:282] Add success.
W0319 12:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:42:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:42:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:42:13.420219  543705 net.go:648] Add success.
I0319 12:42:13.423088  543705 net.go:770] primary dev: ETH0
I0319 12:42:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:42:13.423115  543705 net.go:698] Add success.
I0319 12:42:13.470004  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e95a1050-3383-4a60-a185-ad44431bb964","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:42:13.470035  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 12:42:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:42:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0319 12:42:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:42:14.456931  543705 disk_worker.go:494] system disk:vda1
I0319 12:42:14.456971  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:42:14.457001  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:42:14.457010  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:42:14.457014  543705 custom_config.go:64] query custom config with name: gpu
E0319 12:42:15.456427  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:42:15.456436  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:42:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:42:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:42:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:42:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:42:16.472320  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:42:19.185672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:42:19.187984  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:42:19.187990  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492500 0xc000492540]
E0319 12:42:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:42:23.409790  543705 memory.go:184] no items to output this cycle
I0319 12:42:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 12:42:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:42:33.409783  543705 memory.go:184] no items to output this cycle
I0319 12:42:33.409791  543705 cpu.go:275] no items to output this cycle
I0319 12:42:37.709240  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:42:37.709247  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:42:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:42:43.410701  543705 memory.go:191] Add success.
I0319 12:42:43.409822  543705 cpu.go:282] Add success.
I0319 12:42:43.420480  543705 net.go:648] Add success.
I0319 12:42:43.423251  543705 net.go:770] primary dev: ETH0
I0319 12:42:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:42:43.423276  543705 net.go:698] Add success.
I0319 12:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:42:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:42:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:42:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:42:53.409801  543705 memory.go:184] no items to output this cycle
I0319 12:42:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 12:43:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:43:03.409794  543705 memory.go:184] no items to output this cycle
I0319 12:43:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 12:43:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:43:13.409823  543705 memory.go:191] Add success.
I0319 12:43:13.409828  543705 cpu.go:282] Add success.
W0319 12:43:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:43:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:43:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:43:13.419887  543705 net.go:770] primary dev: ETH0
I0319 12:43:13.419899  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:43:13.419911  543705 net.go:698] Add success.
I0319 12:43:13.420464  543705 net.go:648] Add success.
I0319 12:43:14.453936  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:43:14.455222  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:43:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0319 12:43:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:43:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 12:43:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:43:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:43:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:43:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:43:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:43:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:43:19.189679  543705 disk_info.go:125] begin check local disk info of client
I0319 12:43:19.192069  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:43:19.192075  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394440 0xc000394480]
E0319 12:43:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:43:23.409784  543705 memory.go:184] no items to output this cycle
I0319 12:43:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 12:43:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:43:33.409787  543705 memory.go:184] no items to output this cycle
I0319 12:43:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 12:43:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:43:43.409826  543705 memory.go:191] Add success.
I0319 12:43:43.409839  543705 cpu.go:282] Add success.
I0319 12:43:43.419998  543705 net.go:648] Add success.
I0319 12:43:43.422673  543705 net.go:770] primary dev: ETH0
I0319 12:43:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:43:43.422709  543705 net.go:698] Add success.
I0319 12:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:43:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:43:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:43:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:43:53.409808  543705 memory.go:184] no items to output this cycle
I0319 12:43:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 12:44:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:44:03.409783  543705 memory.go:184] no items to output this cycle
I0319 12:44:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 12:44:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:44:13.409801  543705 memory.go:191] Add success.
I0319 12:44:13.409806  543705 cpu.go:282] Add success.
W0319 12:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:44:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:44:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:44:13.419704  543705 net.go:648] Add success.
I0319 12:44:13.422606  543705 net.go:770] primary dev: ETH0
I0319 12:44:13.422619  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:44:13.422629  543705 net.go:698] Add success.
I0319 12:44:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:44:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:44:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 12:44:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:44:14.456570  543705 disk_worker.go:494] system disk:vda1
I0319 12:44:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:44:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:44:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:44:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:44:16.472361  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:44:19.193673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:44:19.196113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:44:19.196121  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275a80 0xc000275ac0]
E0319 12:44:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:44:23.409770  543705 memory.go:184] no items to output this cycle
I0319 12:44:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 12:44:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:44:33.409794  543705 memory.go:184] no items to output this cycle
I0319 12:44:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 12:44:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:44:43.409829  543705 memory.go:191] Add success.
I0319 12:44:43.409840  543705 cpu.go:282] Add success.
I0319 12:44:43.419993  543705 net.go:648] Add success.
I0319 12:44:43.422825  543705 net.go:770] primary dev: ETH0
I0319 12:44:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:44:43.422851  543705 net.go:698] Add success.
I0319 12:44:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:44:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:44:53.409787  543705 memory.go:184] no items to output this cycle
I0319 12:44:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 12:45:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:45:03.409785  543705 memory.go:184] no items to output this cycle
I0319 12:45:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 12:45:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:45:13.409808  543705 memory.go:191] Add success.
I0319 12:45:13.409808  543705 cpu.go:282] Add success.
W0319 12:45:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:45:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:45:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:45:13.420125  543705 net.go:648] Add success.
I0319 12:45:13.423034  543705 net.go:770] primary dev: ETH0
I0319 12:45:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:45:13.423058  543705 net.go:698] Add success.
I0319 12:45:13.464193  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7766f1ad-446f-4202-ad64-be3112d36738","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:45:13.464225  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:45:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:45:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:45:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0319 12:45:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:45:14.456832  543705 disk_worker.go:494] system disk:vda1
I0319 12:45:14.456865  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:45:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:45:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:45:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:45:16.472446  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:45:19.197671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:45:19.200028  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:45:19.200034  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7c40 0xc0003b7c80]
E0319 12:45:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:45:23.409811  543705 memory.go:184] no items to output this cycle
I0319 12:45:23.409822  543705 cpu.go:275] no items to output this cycle
E0319 12:45:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:45:33.409778  543705 memory.go:184] no items to output this cycle
I0319 12:45:33.409780  543705 cpu.go:275] no items to output this cycle
I0319 12:45:37.709737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:45:37.709744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:45:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:45:43.410638  543705 memory.go:191] Add success.
I0319 12:45:43.409824  543705 cpu.go:282] Add success.
I0319 12:45:43.420364  543705 net.go:648] Add success.
I0319 12:45:43.423039  543705 net.go:770] primary dev: ETH0
I0319 12:45:43.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:45:43.423065  543705 net.go:698] Add success.
I0319 12:45:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:45:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:45:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:45:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:45:53.409784  543705 memory.go:184] no items to output this cycle
I0319 12:45:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 12:46:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:46:03.409796  543705 memory.go:184] no items to output this cycle
I0319 12:46:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:46:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:46:13.409788  543705 memory.go:191] Add success.
I0319 12:46:13.409810  543705 cpu.go:282] Add success.
W0319 12:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:46:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:46:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:46:13.420206  543705 net.go:648] Add success.
I0319 12:46:13.423194  543705 net.go:770] primary dev: ETH0
I0319 12:46:13.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:46:13.423219  543705 net.go:698] Add success.
I0319 12:46:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:46:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:46:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 12:46:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:46:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 12:46:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:46:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:46:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:46:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:46:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:46:19.201677  543705 disk_info.go:125] begin check local disk info of client
I0319 12:46:19.204076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:46:19.204082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e7940 0xc0001e7980]
E0319 12:46:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:46:23.409818  543705 memory.go:184] no items to output this cycle
I0319 12:46:23.409831  543705 cpu.go:275] no items to output this cycle
E0319 12:46:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:46:33.409815  543705 memory.go:184] no items to output this cycle
I0319 12:46:33.409828  543705 cpu.go:275] no items to output this cycle
E0319 12:46:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:46:43.409785  543705 memory.go:191] Add success.
I0319 12:46:43.409815  543705 cpu.go:282] Add success.
I0319 12:46:43.419931  543705 net.go:648] Add success.
I0319 12:46:43.422729  543705 net.go:770] primary dev: ETH0
I0319 12:46:43.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:46:43.422757  543705 net.go:698] Add success.
I0319 12:46:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:46:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:46:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:46:53.409799  543705 memory.go:184] no items to output this cycle
I0319 12:46:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:47:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:47:03.409788  543705 memory.go:184] no items to output this cycle
I0319 12:47:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 12:47:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:47:13.409800  543705 memory.go:191] Add success.
I0319 12:47:13.409811  543705 cpu.go:282] Add success.
W0319 12:47:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:47:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:47:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:47:13.420132  543705 net.go:648] Add success.
I0319 12:47:13.423367  543705 net.go:770] primary dev: ETH0
I0319 12:47:13.423380  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:47:13.423392  543705 net.go:698] Add success.
I0319 12:47:13.452986  543705 event_worker.go:152] Polling the log file for events...
W0319 12:47:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:47:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 12:47:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:47:14.456957  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:47:14.456965  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:47:14.456971  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:47:14.457029  543705 disk_worker.go:494] system disk:vda1
I0319 12:47:14.457058  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:47:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:47:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:47:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:47:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:47:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:47:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:47:16.472335  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:47:19.205672  543705 disk_info.go:125] begin check local disk info of client
I0319 12:47:19.208036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:47:19.208041  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba00 0xc00007ba40]
E0319 12:47:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:47:23.409767  543705 memory.go:184] no items to output this cycle
I0319 12:47:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 12:47:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:47:33.409779  543705 memory.go:184] no items to output this cycle
I0319 12:47:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 12:47:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:47:43.409780  543705 memory.go:191] Add success.
I0319 12:47:43.409815  543705 cpu.go:282] Add success.
I0319 12:47:43.419875  543705 net.go:648] Add success.
I0319 12:47:43.423053  543705 net.go:770] primary dev: ETH0
I0319 12:47:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:47:43.423080  543705 net.go:698] Add success.
I0319 12:47:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:47:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:47:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:47:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:47:53.409805  543705 memory.go:184] no items to output this cycle
I0319 12:47:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 12:48:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:48:03.409789  543705 memory.go:184] no items to output this cycle
I0319 12:48:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 12:48:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:48:13.409825  543705 memory.go:191] Add success.
I0319 12:48:13.409829  543705 cpu.go:282] Add success.
W0319 12:48:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:48:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:48:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:48:13.420133  543705 net.go:648] Add success.
I0319 12:48:13.423695  543705 net.go:770] primary dev: ETH0
I0319 12:48:13.423708  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:48:13.423720  543705 net.go:698] Add success.
I0319 12:48:13.545566  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"74a6a192-4434-4307-95af-0c769298058a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:48:13.545600  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:48:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:48:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:48:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0319 12:48:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:48:14.456487  543705 disk_worker.go:494] system disk:vda1
I0319 12:48:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:48:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:48:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:48:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:48:19.209671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:48:19.212085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:48:19.212091  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c80 0xc0000c5cc0]
E0319 12:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:48:23.409779  543705 memory.go:184] no items to output this cycle
I0319 12:48:23.409787  543705 cpu.go:275] no items to output this cycle
E0319 12:48:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:48:33.409814  543705 memory.go:184] no items to output this cycle
I0319 12:48:33.409822  543705 cpu.go:275] no items to output this cycle
I0319 12:48:37.711250  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:48:37.711257  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:48:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:48:43.410751  543705 memory.go:191] Add success.
I0319 12:48:43.409823  543705 cpu.go:282] Add success.
I0319 12:48:43.420462  543705 net.go:648] Add success.
I0319 12:48:43.423431  543705 net.go:770] primary dev: ETH0
I0319 12:48:43.423448  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:48:43.423463  543705 net.go:698] Add success.
I0319 12:48:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:48:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:48:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:48:53.409763  543705 memory.go:184] no items to output this cycle
I0319 12:48:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 12:49:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:49:03.409775  543705 memory.go:184] no items to output this cycle
I0319 12:49:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:49:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:49:13.409794  543705 memory.go:191] Add success.
I0319 12:49:13.409805  543705 cpu.go:282] Add success.
W0319 12:49:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:49:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:49:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:49:13.420267  543705 net.go:648] Add success.
I0319 12:49:13.423304  543705 net.go:770] primary dev: ETH0
I0319 12:49:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:49:13.423329  543705 net.go:698] Add success.
I0319 12:49:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:49:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:49:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 12:49:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:49:14.456572  543705 disk_worker.go:494] system disk:vda1
I0319 12:49:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:49:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:49:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:49:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:49:19.213675  543705 disk_info.go:125] begin check local disk info of client
I0319 12:49:19.216257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:49:19.216263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8040 0xc0004a8080]
E0319 12:49:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:49:23.409790  543705 memory.go:184] no items to output this cycle
I0319 12:49:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 12:49:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:49:33.409769  543705 memory.go:184] no items to output this cycle
I0319 12:49:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:49:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:49:43.409798  543705 memory.go:191] Add success.
I0319 12:49:43.409799  543705 cpu.go:282] Add success.
I0319 12:49:43.419877  543705 net.go:648] Add success.
I0319 12:49:43.422966  543705 net.go:770] primary dev: ETH0
I0319 12:49:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:49:43.422990  543705 net.go:698] Add success.
I0319 12:49:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:49:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:49:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:49:53.409766  543705 memory.go:184] no items to output this cycle
I0319 12:49:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 12:50:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:50:03.409811  543705 memory.go:184] no items to output this cycle
I0319 12:50:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 12:50:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:50:13.409811  543705 memory.go:191] Add success.
I0319 12:50:13.409815  543705 cpu.go:282] Add success.
W0319 12:50:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:50:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:50:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:50:13.420127  543705 net.go:648] Add success.
I0319 12:50:13.422939  543705 net.go:770] primary dev: ETH0
I0319 12:50:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:50:13.422971  543705 net.go:698] Add success.
I0319 12:50:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:50:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:50:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 12:50:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:50:14.456550  543705 disk_worker.go:494] system disk:vda1
I0319 12:50:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:50:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:50:19.217670  543705 disk_info.go:125] begin check local disk info of client
I0319 12:50:19.220029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:50:19.220034  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382440 0xc000382480]
E0319 12:50:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:50:23.409796  543705 memory.go:184] no items to output this cycle
I0319 12:50:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:50:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:50:33.409807  543705 memory.go:184] no items to output this cycle
I0319 12:50:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 12:50:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:50:43.409785  543705 memory.go:191] Add success.
I0319 12:50:43.409816  543705 cpu.go:282] Add success.
I0319 12:50:43.419888  543705 net.go:648] Add success.
I0319 12:50:43.422763  543705 net.go:770] primary dev: ETH0
I0319 12:50:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:50:43.422794  543705 net.go:698] Add success.
I0319 12:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:50:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:50:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:50:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:50:53.409777  543705 memory.go:184] no items to output this cycle
I0319 12:50:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 12:51:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:51:03.409775  543705 memory.go:184] no items to output this cycle
I0319 12:51:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:51:13.409813  543705 memory.go:191] Add success.
I0319 12:51:13.409818  543705 cpu.go:282] Add success.
W0319 12:51:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:51:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:51:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:51:13.420112  543705 net.go:648] Add success.
I0319 12:51:13.422976  543705 net.go:770] primary dev: ETH0
I0319 12:51:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:51:13.423001  543705 net.go:698] Add success.
I0319 12:51:13.491205  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49c6f160-7c57-4f51-a000-ef2366f77c31","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:51:13.491246  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:51:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:51:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:51:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 12:51:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:51:14.456528  543705 disk_worker.go:494] system disk:vda1
I0319 12:51:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:51:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:51:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:51:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:51:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:51:19.221673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:51:19.224061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:51:19.224067  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002565c0 0xc000256600]
E0319 12:51:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:51:23.409766  543705 memory.go:184] no items to output this cycle
I0319 12:51:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 12:51:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:51:33.409797  543705 memory.go:184] no items to output this cycle
I0319 12:51:33.409812  543705 cpu.go:275] no items to output this cycle
I0319 12:51:37.712259  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:51:37.712268  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:51:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:51:43.410833  543705 memory.go:191] Add success.
I0319 12:51:43.409801  543705 cpu.go:282] Add success.
I0319 12:51:43.420513  543705 net.go:648] Add success.
I0319 12:51:43.423493  543705 net.go:770] primary dev: ETH0
I0319 12:51:43.423511  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:51:43.423524  543705 net.go:698] Add success.
I0319 12:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:51:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:51:53.409777  543705 memory.go:184] no items to output this cycle
I0319 12:51:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 12:52:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:52:03.409808  543705 memory.go:184] no items to output this cycle
I0319 12:52:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 12:52:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:52:13.409812  543705 memory.go:191] Add success.
I0319 12:52:13.409824  543705 cpu.go:282] Add success.
W0319 12:52:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:52:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:52:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:52:13.420107  543705 net.go:648] Add success.
I0319 12:52:13.422849  543705 net.go:770] primary dev: ETH0
I0319 12:52:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:52:13.422879  543705 net.go:698] Add success.
W0319 12:52:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:52:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 12:52:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:52:14.456932  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:52:14.456941  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:52:14.456948  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:52:14.456999  543705 disk_worker.go:494] system disk:vda1
I0319 12:52:14.457044  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:52:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:52:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:52:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:52:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:52:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:52:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:52:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:52:19.225680  543705 disk_info.go:125] begin check local disk info of client
I0319 12:52:19.227999  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:52:19.228005  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8300 0xc0003b8340]
E0319 12:52:23.410218  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:52:23.410237  543705 memory.go:184] no items to output this cycle
I0319 12:52:23.410248  543705 cpu.go:275] no items to output this cycle
E0319 12:52:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:52:33.409815  543705 memory.go:184] no items to output this cycle
I0319 12:52:33.409826  543705 cpu.go:275] no items to output this cycle
E0319 12:52:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:52:43.409818  543705 memory.go:191] Add success.
I0319 12:52:43.409829  543705 cpu.go:282] Add success.
I0319 12:52:43.419966  543705 net.go:648] Add success.
I0319 12:52:43.423283  543705 net.go:770] primary dev: ETH0
I0319 12:52:43.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:52:43.423309  543705 net.go:698] Add success.
I0319 12:52:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:52:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:52:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:52:53.409771  543705 memory.go:184] no items to output this cycle
I0319 12:52:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 12:53:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:53:03.409813  543705 memory.go:184] no items to output this cycle
I0319 12:53:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 12:53:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:53:13.409792  543705 memory.go:191] Add success.
W0319 12:53:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:53:13.409822  543705 cpu.go:282] Add success.
W0319 12:53:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:53:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:53:13.420135  543705 net.go:648] Add success.
I0319 12:53:13.423166  543705 net.go:770] primary dev: ETH0
I0319 12:53:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:53:13.423192  543705 net.go:698] Add success.
I0319 12:53:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:53:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:53:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 12:53:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:53:14.456561  543705 disk_worker.go:494] system disk:vda1
I0319 12:53:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:53:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:53:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:53:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:53:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:53:19.229664  543705 disk_info.go:125] begin check local disk info of client
I0319 12:53:19.232074  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:53:19.232080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aa600 0xc0003aa640]
E0319 12:53:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:53:23.409778  543705 memory.go:184] no items to output this cycle
I0319 12:53:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:53:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:53:33.409828  543705 memory.go:184] no items to output this cycle
I0319 12:53:33.409841  543705 cpu.go:275] no items to output this cycle
E0319 12:53:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:53:43.409828  543705 memory.go:191] Add success.
I0319 12:53:43.409839  543705 cpu.go:282] Add success.
I0319 12:53:43.419932  543705 net.go:648] Add success.
I0319 12:53:43.422836  543705 net.go:770] primary dev: ETH0
I0319 12:53:43.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:53:43.422861  543705 net.go:698] Add success.
I0319 12:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:53:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:53:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:53:53.409807  543705 memory.go:184] no items to output this cycle
I0319 12:53:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 12:54:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:54:03.409789  543705 memory.go:184] no items to output this cycle
I0319 12:54:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 12:54:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:54:13.409797  543705 memory.go:191] Add success.
I0319 12:54:13.409799  543705 cpu.go:282] Add success.
W0319 12:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:54:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:54:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:54:13.420146  543705 net.go:648] Add success.
I0319 12:54:13.422973  543705 net.go:770] primary dev: ETH0
I0319 12:54:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:54:13.422998  543705 net.go:698] Add success.
I0319 12:54:13.469687  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"72e6cbf2-87ea-4f9a-ab0c-8344e2bd336c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:54:13.469721  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 12:54:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:54:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:54:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 12:54:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:54:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 12:54:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:54:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:54:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:54:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:54:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:54:19.233673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:54:19.236048  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:54:19.236055  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9240 0xc0004a9280]
E0319 12:54:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:54:23.409801  543705 memory.go:184] no items to output this cycle
I0319 12:54:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 12:54:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:54:33.409893  543705 cpu.go:275] no items to output this cycle
I0319 12:54:33.409905  543705 memory.go:184] no items to output this cycle
I0319 12:54:37.713264  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:54:37.713271  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:54:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:54:43.410539  543705 memory.go:191] Add success.
I0319 12:54:43.409817  543705 cpu.go:282] Add success.
I0319 12:54:43.420314  543705 net.go:648] Add success.
I0319 12:54:43.422965  543705 net.go:770] primary dev: ETH0
I0319 12:54:43.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:54:43.422995  543705 net.go:698] Add success.
I0319 12:54:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:54:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:54:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:54:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:54:53.409802  543705 memory.go:184] no items to output this cycle
I0319 12:54:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:55:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:55:03.409784  543705 memory.go:184] no items to output this cycle
I0319 12:55:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 12:55:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:55:13.409817  543705 memory.go:191] Add success.
I0319 12:55:13.409825  543705 cpu.go:282] Add success.
W0319 12:55:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:55:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:55:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:55:13.420272  543705 net.go:648] Add success.
I0319 12:55:13.423188  543705 net.go:770] primary dev: ETH0
I0319 12:55:13.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:55:13.423214  543705 net.go:698] Add success.
I0319 12:55:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:55:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:55:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 12:55:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:55:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 12:55:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:55:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:55:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:55:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:55:19.237673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:55:19.240051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:55:19.240056  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003816c0 0xc000381700]
E0319 12:55:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:55:23.409776  543705 memory.go:184] no items to output this cycle
I0319 12:55:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 12:55:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:55:33.409790  543705 memory.go:184] no items to output this cycle
I0319 12:55:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 12:55:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:55:43.409801  543705 memory.go:191] Add success.
I0319 12:55:43.409803  543705 cpu.go:282] Add success.
I0319 12:55:43.420016  543705 net.go:648] Add success.
I0319 12:55:43.422998  543705 net.go:770] primary dev: ETH0
I0319 12:55:43.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:55:43.423023  543705 net.go:698] Add success.
I0319 12:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:55:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:55:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:55:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:55:53.409769  543705 memory.go:184] no items to output this cycle
I0319 12:55:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 12:56:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:56:03.409788  543705 memory.go:184] no items to output this cycle
I0319 12:56:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 12:56:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:56:13.409816  543705 memory.go:191] Add success.
I0319 12:56:13.409824  543705 cpu.go:282] Add success.
W0319 12:56:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:56:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:56:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:56:13.420167  543705 net.go:648] Add success.
I0319 12:56:13.422818  543705 net.go:770] primary dev: ETH0
I0319 12:56:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:56:13.422843  543705 net.go:698] Add success.
I0319 12:56:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:56:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:56:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 12:56:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:56:14.456593  543705 disk_worker.go:494] system disk:vda1
I0319 12:56:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:56:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:56:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:56:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:56:19.241675  543705 disk_info.go:125] begin check local disk info of client
I0319 12:56:19.244030  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:56:19.244035  543705 disk_info.go:196] parse disk info done, disk is : [0xc000517340 0xc000517380]
E0319 12:56:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:56:23.409798  543705 memory.go:184] no items to output this cycle
I0319 12:56:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:56:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:56:33.409809  543705 memory.go:184] no items to output this cycle
I0319 12:56:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 12:56:43.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:56:43.409959  543705 memory.go:191] Add success.
I0319 12:56:43.410087  543705 cpu.go:282] Add success.
I0319 12:56:43.419730  543705 net.go:648] Add success.
I0319 12:56:43.422737  543705 net.go:770] primary dev: ETH0
I0319 12:56:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:56:43.422762  543705 net.go:698] Add success.
I0319 12:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:56:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:56:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:56:53.409765  543705 memory.go:184] no items to output this cycle
I0319 12:56:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 12:57:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:57:03.409795  543705 memory.go:184] no items to output this cycle
I0319 12:57:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 12:57:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:57:13.409778  543705 memory.go:191] Add success.
W0319 12:57:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 12:57:13.409809  543705 cpu.go:282] Add success.
W0319 12:57:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:57:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:57:13.420142  543705 net.go:648] Add success.
I0319 12:57:13.423081  543705 net.go:770] primary dev: ETH0
I0319 12:57:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:57:13.423105  543705 net.go:698] Add success.
I0319 12:57:13.429709  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 12:57:13.452892  543705 event_worker.go:152] Polling the log file for events...
I0319 12:57:13.463584  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07898ae3-4395-4887-b39f-ae705e7ed561","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 12:57:13.463619  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 12:57:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:57:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 12:57:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0319 12:57:14.456971  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 12:57:14.456991  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 12:57:14.456997  543705 custom_config.go:64] query custom config with name: gpu
I0319 12:57:14.457019  543705 disk_worker.go:494] system disk:vda1
I0319 12:57:14.457059  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 12:57:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 12:57:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:57:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 12:57:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 12:57:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:57:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:57:16.472360  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:57:19.245673  543705 disk_info.go:125] begin check local disk info of client
I0319 12:57:19.248018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:57:19.248024  543705 disk_info.go:196] parse disk info done, disk is : [0xc000292640 0xc000292680]
E0319 12:57:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:57:23.409787  543705 memory.go:184] no items to output this cycle
I0319 12:57:23.409802  543705 cpu.go:275] no items to output this cycle
I0319 12:57:33.409880  543705 cpu.go:275] no items to output this cycle
E0319 12:57:33.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:57:33.409900  543705 memory.go:184] no items to output this cycle
I0319 12:57:37.713733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 12:57:37.713740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 12:57:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:57:43.410623  543705 memory.go:191] Add success.
I0319 12:57:43.409813  543705 cpu.go:282] Add success.
I0319 12:57:43.420415  543705 net.go:648] Add success.
I0319 12:57:43.422981  543705 net.go:770] primary dev: ETH0
I0319 12:57:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:57:43.423006  543705 net.go:698] Add success.
I0319 12:57:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:57:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:57:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:57:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:57:53.409794  543705 memory.go:184] no items to output this cycle
I0319 12:57:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 12:58:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:58:03.409795  543705 memory.go:184] no items to output this cycle
I0319 12:58:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 12:58:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:58:13.409801  543705 memory.go:191] Add success.
I0319 12:58:13.409802  543705 cpu.go:282] Add success.
W0319 12:58:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:58:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:58:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:58:13.420150  543705 net.go:648] Add success.
I0319 12:58:13.422873  543705 net.go:770] primary dev: ETH0
I0319 12:58:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:58:13.422901  543705 net.go:698] Add success.
I0319 12:58:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:58:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:58:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 12:58:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:58:14.456601  543705 disk_worker.go:494] system disk:vda1
I0319 12:58:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:58:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:58:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:58:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:58:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:58:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:58:19.249671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:58:19.252059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:58:19.252065  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396780 0xc0003967c0]
E0319 12:58:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:58:23.409787  543705 memory.go:184] no items to output this cycle
I0319 12:58:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 12:58:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:58:33.409810  543705 memory.go:184] no items to output this cycle
I0319 12:58:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 12:58:43.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:58:43.409926  543705 memory.go:191] Add success.
I0319 12:58:43.409949  543705 cpu.go:282] Add success.
I0319 12:58:43.419742  543705 net.go:648] Add success.
I0319 12:58:43.422371  543705 net.go:770] primary dev: ETH0
I0319 12:58:43.422385  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:58:43.422399  543705 net.go:698] Add success.
I0319 12:58:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:58:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:58:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:58:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:58:53.409773  543705 memory.go:184] no items to output this cycle
I0319 12:58:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 12:59:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:59:03.409776  543705 memory.go:184] no items to output this cycle
I0319 12:59:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 12:59:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:59:13.409795  543705 memory.go:191] Add success.
I0319 12:59:13.409802  543705 cpu.go:282] Add success.
W0319 12:59:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 12:59:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 12:59:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 12:59:13.420080  543705 net.go:648] Add success.
I0319 12:59:13.422716  543705 net.go:770] primary dev: ETH0
I0319 12:59:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:59:13.422742  543705 net.go:698] Add success.
I0319 12:59:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 12:59:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 12:59:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 12:59:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0319 12:59:14.456491  543705 disk_worker.go:494] system disk:vda1
I0319 12:59:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 12:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 12:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:59:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:59:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 12:59:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 12:59:19.253671  543705 disk_info.go:125] begin check local disk info of client
I0319 12:59:19.256096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 12:59:19.256102  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521f00 0xc000521f40]
E0319 12:59:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:59:23.409803  543705 memory.go:184] no items to output this cycle
I0319 12:59:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 12:59:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:59:33.409803  543705 memory.go:184] no items to output this cycle
I0319 12:59:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 12:59:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:59:43.409788  543705 memory.go:191] Add success.
I0319 12:59:43.409813  543705 cpu.go:282] Add success.
I0319 12:59:43.420264  543705 net.go:648] Add success.
I0319 12:59:43.423047  543705 net.go:770] primary dev: ETH0
I0319 12:59:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0319 12:59:43.423075  543705 net.go:698] Add success.
I0319 12:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 12:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 12:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 12:59:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 12:59:53.409786  543705 memory.go:184] no items to output this cycle
I0319 12:59:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 13:00:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:00:03.409793  543705 memory.go:184] no items to output this cycle
I0319 13:00:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:00:13.409789  543705 memory.go:191] Add success.
W0319 13:00:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:00:13.409820  543705 cpu.go:282] Add success.
W0319 13:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:00:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:00:13.420185  543705 net.go:648] Add success.
I0319 13:00:13.423055  543705 net.go:770] primary dev: ETH0
I0319 13:00:13.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:00:13.423082  543705 net.go:698] Add success.
I0319 13:00:13.470175  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"acc03c8c-ddeb-4782-ae1c-5fe149b3f274","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:00:13.470208  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:00:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:00:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:00:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 13:00:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:00:14.456489  543705 disk_worker.go:494] system disk:vda1
I0319 13:00:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:00:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:00:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:00:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:00:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:00:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:00:19.257681  543705 disk_info.go:125] begin check local disk info of client
I0319 13:00:19.260047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:00:19.260053  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be800 0xc0002be840]
E0319 13:00:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:00:23.409799  543705 memory.go:184] no items to output this cycle
I0319 13:00:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:00:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:00:33.409792  543705 memory.go:184] no items to output this cycle
I0319 13:00:33.409814  543705 cpu.go:275] no items to output this cycle
I0319 13:00:37.713887  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:00:37.713894  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:00:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:00:43.410716  543705 memory.go:191] Add success.
I0319 13:00:43.409805  543705 cpu.go:282] Add success.
I0319 13:00:43.420622  543705 net.go:648] Add success.
I0319 13:00:43.423178  543705 net.go:770] primary dev: ETH0
I0319 13:00:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:00:43.423203  543705 net.go:698] Add success.
I0319 13:00:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:00:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:00:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:00:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:00:53.409785  543705 memory.go:184] no items to output this cycle
I0319 13:00:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 13:01:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:01:03.409783  543705 memory.go:184] no items to output this cycle
I0319 13:01:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:01:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:01:13.409808  543705 memory.go:191] Add success.
I0319 13:01:13.409829  543705 cpu.go:282] Add success.
W0319 13:01:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:01:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:01:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:01:13.420275  543705 net.go:648] Add success.
I0319 13:01:13.423414  543705 net.go:770] primary dev: ETH0
I0319 13:01:13.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:01:13.423438  543705 net.go:698] Add success.
I0319 13:01:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:01:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:01:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 13:01:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:01:14.456523  543705 disk_worker.go:494] system disk:vda1
I0319 13:01:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:01:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:01:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:01:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:01:19.261675  543705 disk_info.go:125] begin check local disk info of client
I0319 13:01:19.264035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:01:19.264041  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be9c0 0xc0002bea00]
E0319 13:01:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:01:23.409803  543705 memory.go:184] no items to output this cycle
I0319 13:01:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 13:01:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:01:33.409795  543705 memory.go:184] no items to output this cycle
I0319 13:01:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:01:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:01:43.409834  543705 memory.go:191] Add success.
I0319 13:01:43.409840  543705 cpu.go:282] Add success.
I0319 13:01:43.419765  543705 net.go:648] Add success.
I0319 13:01:43.422391  543705 net.go:770] primary dev: ETH0
I0319 13:01:43.422405  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:01:43.422419  543705 net.go:698] Add success.
I0319 13:01:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:01:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:01:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:01:53.410231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:01:53.410247  543705 memory.go:184] no items to output this cycle
I0319 13:01:53.410279  543705 cpu.go:275] no items to output this cycle
E0319 13:02:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:02:03.409774  543705 memory.go:184] no items to output this cycle
I0319 13:02:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 13:02:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:02:13.409777  543705 memory.go:191] Add success.
W0319 13:02:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:02:13.409820  543705 cpu.go:282] Add success.
W0319 13:02:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:02:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:02:13.420159  543705 net.go:648] Add success.
I0319 13:02:13.423004  543705 net.go:770] primary dev: ETH0
I0319 13:02:13.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:02:13.423033  543705 net.go:698] Add success.
W0319 13:02:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:02:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 13:02:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:02:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:02:14.455919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:02:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:02:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 13:02:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:02:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:02:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:02:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:02:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:02:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:02:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:02:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:02:19.265671  543705 disk_info.go:125] begin check local disk info of client
I0319 13:02:19.268067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:02:19.268072  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521a40 0xc000521a80]
E0319 13:02:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:02:23.409786  543705 memory.go:184] no items to output this cycle
I0319 13:02:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 13:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:02:33.409787  543705 memory.go:184] no items to output this cycle
I0319 13:02:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 13:02:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:02:43.409874  543705 memory.go:191] Add success.
I0319 13:02:43.409955  543705 cpu.go:282] Add success.
I0319 13:02:43.419731  543705 net.go:648] Add success.
I0319 13:02:43.422578  543705 net.go:770] primary dev: ETH0
I0319 13:02:43.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:02:43.422606  543705 net.go:698] Add success.
I0319 13:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:02:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:02:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:02:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:02:53.409795  543705 memory.go:184] no items to output this cycle
I0319 13:02:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:03:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:03:03.409810  543705 memory.go:184] no items to output this cycle
I0319 13:03:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 13:03:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:03:13.409821  543705 memory.go:191] Add success.
I0319 13:03:13.409830  543705 cpu.go:282] Add success.
W0319 13:03:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:03:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:03:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:03:13.420131  543705 net.go:648] Add success.
I0319 13:03:13.422943  543705 net.go:770] primary dev: ETH0
I0319 13:03:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:03:13.422970  543705 net.go:698] Add success.
I0319 13:03:13.469183  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9b137a76-8dd6-44ee-a067-a0cf50c8f9b6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:03:13.469215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:03:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:03:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:03:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 13:03:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:03:14.456583  543705 disk_worker.go:494] system disk:vda1
I0319 13:03:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:03:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:03:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:03:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:03:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:03:19.269673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:03:19.272089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:03:19.272095  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386100 0xc000386140]
E0319 13:03:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:03:23.409786  543705 memory.go:184] no items to output this cycle
I0319 13:03:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 13:03:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:03:33.409769  543705 memory.go:184] no items to output this cycle
I0319 13:03:33.409798  543705 cpu.go:275] no items to output this cycle
I0319 13:03:37.715270  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:03:37.715277  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:03:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:03:43.410753  543705 memory.go:191] Add success.
I0319 13:03:43.409969  543705 cpu.go:282] Add success.
I0319 13:03:43.419716  543705 net.go:648] Add success.
I0319 13:03:43.422606  543705 net.go:770] primary dev: ETH0
I0319 13:03:43.422621  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:03:43.422636  543705 net.go:698] Add success.
I0319 13:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:03:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:03:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:03:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:03:53.409804  543705 memory.go:184] no items to output this cycle
I0319 13:03:53.409820  543705 cpu.go:275] no items to output this cycle
E0319 13:04:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:04:03.409792  543705 cpu.go:275] no items to output this cycle
I0319 13:04:03.409797  543705 memory.go:184] no items to output this cycle
E0319 13:04:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:04:13.409788  543705 memory.go:191] Add success.
I0319 13:04:13.409790  543705 cpu.go:282] Add success.
W0319 13:04:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:04:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:04:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:04:13.420090  543705 net.go:648] Add success.
I0319 13:04:13.423059  543705 net.go:770] primary dev: ETH0
I0319 13:04:13.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:04:13.423086  543705 net.go:698] Add success.
I0319 13:04:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:04:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:04:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 13:04:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:04:14.456519  543705 disk_worker.go:494] system disk:vda1
I0319 13:04:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:04:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:04:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:04:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:04:16.472361  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:04:19.273673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:04:19.276103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:04:19.276110  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9940 0xc0004a9980]
E0319 13:04:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:04:23.409774  543705 memory.go:184] no items to output this cycle
I0319 13:04:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 13:04:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:04:33.409815  543705 memory.go:184] no items to output this cycle
I0319 13:04:33.409826  543705 cpu.go:275] no items to output this cycle
E0319 13:04:43.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:04:43.409961  543705 memory.go:191] Add success.
I0319 13:04:43.409965  543705 cpu.go:282] Add success.
I0319 13:04:43.419760  543705 net.go:648] Add success.
I0319 13:04:43.422556  543705 net.go:770] primary dev: ETH0
I0319 13:04:43.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:04:43.422584  543705 net.go:698] Add success.
I0319 13:04:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:04:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:04:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:04:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:04:53.409776  543705 memory.go:184] no items to output this cycle
I0319 13:04:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 13:05:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:05:03.409817  543705 memory.go:184] no items to output this cycle
I0319 13:05:03.409831  543705 cpu.go:275] no items to output this cycle
E0319 13:05:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:05:13.409777  543705 memory.go:191] Add success.
W0319 13:05:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:05:13.409807  543705 cpu.go:282] Add success.
W0319 13:05:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:05:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:05:13.420067  543705 net.go:648] Add success.
I0319 13:05:13.422831  543705 net.go:770] primary dev: ETH0
I0319 13:05:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:05:13.422855  543705 net.go:698] Add success.
I0319 13:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:05:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:05:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 13:05:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:05:14.456582  543705 disk_worker.go:494] system disk:vda1
I0319 13:05:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:05:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:05:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:05:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:05:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:05:19.277673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:05:19.280048  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:05:19.280055  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521600 0xc000521640]
E0319 13:05:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:05:23.409804  543705 memory.go:184] no items to output this cycle
I0319 13:05:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 13:05:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:05:33.409782  543705 memory.go:184] no items to output this cycle
I0319 13:05:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 13:05:43.409960  543705 cpu.go:282] Add success.
E0319 13:05:43.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:05:43.410074  543705 memory.go:191] Add success.
I0319 13:05:43.419708  543705 net.go:648] Add success.
I0319 13:05:43.422285  543705 net.go:770] primary dev: ETH0
I0319 13:05:43.422298  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:05:43.422310  543705 net.go:698] Add success.
I0319 13:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:05:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:05:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:05:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:05:53.409768  543705 memory.go:184] no items to output this cycle
I0319 13:05:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:06:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:06:03.409783  543705 memory.go:184] no items to output this cycle
I0319 13:06:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 13:06:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:06:13.409782  543705 memory.go:191] Add success.
W0319 13:06:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:06:13.409814  543705 cpu.go:282] Add success.
W0319 13:06:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:06:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:06:13.420156  543705 net.go:648] Add success.
I0319 13:06:13.422696  543705 net.go:770] primary dev: ETH0
I0319 13:06:13.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:06:13.422725  543705 net.go:698] Add success.
I0319 13:06:13.565265  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f78e7bd2-c4e7-4585-986d-723e5a845070","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:06:13.565298  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:06:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:06:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:06:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 13:06:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:06:14.456742  543705 disk_worker.go:494] system disk:vda1
I0319 13:06:14.456771  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:06:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:06:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:06:16.472427  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:06:19.281678  543705 disk_info.go:125] begin check local disk info of client
I0319 13:06:19.284018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:06:19.284028  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bff40 0xc00032a000]
E0319 13:06:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:06:23.409781  543705 memory.go:184] no items to output this cycle
I0319 13:06:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 13:06:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:06:33.409788  543705 memory.go:184] no items to output this cycle
I0319 13:06:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 13:06:37.715422  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:06:37.715429  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:06:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:06:43.410657  543705 memory.go:191] Add success.
I0319 13:06:43.409804  543705 cpu.go:282] Add success.
I0319 13:06:43.420362  543705 net.go:648] Add success.
I0319 13:06:43.422873  543705 net.go:770] primary dev: ETH0
I0319 13:06:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:06:43.422898  543705 net.go:698] Add success.
I0319 13:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:06:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:06:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:06:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:06:53.409766  543705 memory.go:184] no items to output this cycle
I0319 13:06:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 13:07:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:07:03.409807  543705 memory.go:184] no items to output this cycle
I0319 13:07:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 13:07:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:07:13.409783  543705 memory.go:191] Add success.
I0319 13:07:13.409803  543705 cpu.go:282] Add success.
W0319 13:07:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:07:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:07:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:07:13.420047  543705 net.go:648] Add success.
I0319 13:07:13.422888  543705 net.go:770] primary dev: ETH0
I0319 13:07:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:07:13.422917  543705 net.go:698] Add success.
I0319 13:07:13.452780  543705 event_worker.go:152] Polling the log file for events...
W0319 13:07:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:07:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 13:07:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:07:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:07:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:07:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:07:14.456553  543705 disk_worker.go:494] system disk:vda1
I0319 13:07:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:07:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:07:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:07:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:07:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:07:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:07:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:07:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:07:19.285677  543705 disk_info.go:125] begin check local disk info of client
I0319 13:07:19.288130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:07:19.288136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0080 0xc0003b00c0]
E0319 13:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:07:23.409796  543705 memory.go:184] no items to output this cycle
I0319 13:07:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:07:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:07:33.409866  543705 memory.go:184] no items to output this cycle
I0319 13:07:33.409923  543705 cpu.go:275] no items to output this cycle
E0319 13:07:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:07:43.409783  543705 memory.go:191] Add success.
I0319 13:07:43.409818  543705 cpu.go:282] Add success.
I0319 13:07:43.420035  543705 net.go:648] Add success.
I0319 13:07:43.422878  543705 net.go:770] primary dev: ETH0
I0319 13:07:43.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:07:43.422905  543705 net.go:698] Add success.
I0319 13:07:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:07:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:07:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:07:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:07:53.409767  543705 memory.go:184] no items to output this cycle
I0319 13:07:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 13:08:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:08:03.409779  543705 memory.go:184] no items to output this cycle
I0319 13:08:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 13:08:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:08:13.409828  543705 memory.go:191] Add success.
I0319 13:08:13.409843  543705 cpu.go:282] Add success.
W0319 13:08:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:08:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:08:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:08:13.420290  543705 net.go:648] Add success.
I0319 13:08:13.423120  543705 net.go:770] primary dev: ETH0
I0319 13:08:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:08:13.423146  543705 net.go:698] Add success.
I0319 13:08:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:08:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:08:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 13:08:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:08:14.456607  543705 disk_worker.go:494] system disk:vda1
I0319 13:08:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:08:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:08:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:08:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:08:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:08:19.289676  543705 disk_info.go:125] begin check local disk info of client
I0319 13:08:19.292104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:08:19.292111  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290640 0xc000290680]
E0319 13:08:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:08:23.409802  543705 memory.go:184] no items to output this cycle
I0319 13:08:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:08:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:08:33.409780  543705 memory.go:184] no items to output this cycle
I0319 13:08:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:08:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:08:43.409780  543705 memory.go:191] Add success.
I0319 13:08:43.409804  543705 cpu.go:282] Add success.
I0319 13:08:43.419989  543705 net.go:648] Add success.
I0319 13:08:43.422969  543705 net.go:770] primary dev: ETH0
I0319 13:08:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:08:43.422999  543705 net.go:698] Add success.
I0319 13:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:08:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:08:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:08:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:08:53.409779  543705 memory.go:184] no items to output this cycle
I0319 13:08:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 13:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:09:03.409784  543705 memory.go:184] no items to output this cycle
I0319 13:09:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:09:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:09:13.409817  543705 memory.go:191] Add success.
I0319 13:09:13.409822  543705 cpu.go:282] Add success.
W0319 13:09:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:09:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:09:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:09:13.420401  543705 net.go:648] Add success.
I0319 13:09:13.423156  543705 net.go:770] primary dev: ETH0
I0319 13:09:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:09:13.423181  543705 net.go:698] Add success.
I0319 13:09:13.624332  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"19e3b232-2629-45fb-acc3-5dfeb7c45501","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:09:13.624368  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:09:14.453987  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:09:14.454204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:09:14.454217  543705 disk_worker.go:708] disk space is not compliant
W0319 13:09:14.454220  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:09:14.455606  543705 disk_worker.go:494] system disk:vda1
I0319 13:09:14.455663  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:09:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:09:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:09:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:09:16.472527  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:09:19.293676  543705 disk_info.go:125] begin check local disk info of client
I0319 13:09:19.296138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:09:19.296145  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a340 0xc00029a380]
E0319 13:09:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:09:23.409777  543705 memory.go:184] no items to output this cycle
I0319 13:09:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 13:09:33.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:09:33.409882  543705 memory.go:184] no items to output this cycle
I0319 13:09:33.409964  543705 cpu.go:275] no items to output this cycle
I0319 13:09:37.716272  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:09:37.716279  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:09:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:09:43.410680  543705 memory.go:191] Add success.
I0319 13:09:43.409805  543705 cpu.go:282] Add success.
I0319 13:09:43.420403  543705 net.go:648] Add success.
I0319 13:09:43.423115  543705 net.go:770] primary dev: ETH0
I0319 13:09:43.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:09:43.423143  543705 net.go:698] Add success.
I0319 13:09:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:09:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:09:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:09:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:09:53.409770  543705 memory.go:184] no items to output this cycle
I0319 13:09:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 13:10:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:10:03.409783  543705 memory.go:184] no items to output this cycle
I0319 13:10:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:10:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:10:13.409791  543705 memory.go:191] Add success.
I0319 13:10:13.409795  543705 cpu.go:282] Add success.
W0319 13:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:10:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:10:13.420051  543705 net.go:648] Add success.
I0319 13:10:13.422877  543705 net.go:770] primary dev: ETH0
I0319 13:10:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:10:13.422904  543705 net.go:698] Add success.
I0319 13:10:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:10:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:10:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 13:10:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:10:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 13:10:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:10:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:10:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:10:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:10:16.472515  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:10:19.297674  543705 disk_info.go:125] begin check local disk info of client
I0319 13:10:19.300121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:10:19.300127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002af540 0xc0002af580]
E0319 13:10:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:10:23.409803  543705 memory.go:184] no items to output this cycle
I0319 13:10:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:10:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:10:33.409786  543705 cpu.go:275] no items to output this cycle
I0319 13:10:33.409793  543705 memory.go:184] no items to output this cycle
E0319 13:10:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:10:43.409801  543705 memory.go:191] Add success.
I0319 13:10:43.409803  543705 cpu.go:282] Add success.
I0319 13:10:43.420023  543705 net.go:648] Add success.
I0319 13:10:43.422701  543705 net.go:770] primary dev: ETH0
I0319 13:10:43.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:10:43.422727  543705 net.go:698] Add success.
I0319 13:10:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:10:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:10:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:10:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:10:53.409791  543705 memory.go:184] no items to output this cycle
I0319 13:10:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 13:11:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:11:03.409775  543705 memory.go:184] no items to output this cycle
I0319 13:11:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 13:11:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:11:13.409809  543705 memory.go:191] Add success.
I0319 13:11:13.409818  543705 cpu.go:282] Add success.
W0319 13:11:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:11:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:11:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:11:13.420135  543705 net.go:648] Add success.
I0319 13:11:13.422982  543705 net.go:770] primary dev: ETH0
I0319 13:11:13.423001  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:11:13.423016  543705 net.go:698] Add success.
I0319 13:11:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:11:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:11:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 13:11:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:11:14.456574  543705 disk_worker.go:494] system disk:vda1
I0319 13:11:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:11:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:11:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:11:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:11:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:11:16.472495  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:11:19.301674  543705 disk_info.go:125] begin check local disk info of client
I0319 13:11:19.304150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:11:19.304156  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6e80 0xc0003b6ec0]
E0319 13:11:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:11:23.409775  543705 memory.go:184] no items to output this cycle
I0319 13:11:23.409782  543705 cpu.go:275] no items to output this cycle
E0319 13:11:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:11:33.409770  543705 memory.go:184] no items to output this cycle
I0319 13:11:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:11:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:11:43.409904  543705 cpu.go:282] Add success.
I0319 13:11:43.409921  543705 memory.go:191] Add success.
I0319 13:11:43.419745  543705 net.go:648] Add success.
I0319 13:11:43.422881  543705 net.go:770] primary dev: ETH0
I0319 13:11:43.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:11:43.422908  543705 net.go:698] Add success.
I0319 13:11:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:11:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:11:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:11:53.410733  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:11:53.410753  543705 memory.go:184] no items to output this cycle
I0319 13:11:53.410764  543705 cpu.go:275] no items to output this cycle
E0319 13:12:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:12:03.409807  543705 memory.go:184] no items to output this cycle
I0319 13:12:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 13:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:12:13.409791  543705 cpu.go:282] Add success.
I0319 13:12:13.409801  543705 memory.go:191] Add success.
W0319 13:12:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:12:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:12:13.420299  543705 net.go:648] Add success.
I0319 13:12:13.422992  543705 net.go:770] primary dev: ETH0
I0319 13:12:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:12:13.423018  543705 net.go:698] Add success.
I0319 13:12:13.469705  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0c2bd783-78ee-4f30-9602-0684e7e5af0b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:12:13.469741  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 13:12:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:12:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 13:12:14.455229  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:12:14.455991  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:12:14.456000  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:12:14.456006  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:12:14.456846  543705 disk_worker.go:494] system disk:vda1
I0319 13:12:14.456880  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:12:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:12:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:12:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:12:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:12:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:12:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:12:16.472357  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:12:19.305671  543705 disk_info.go:125] begin check local disk info of client
I0319 13:12:19.308006  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:12:19.308012  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024d240 0xc00024d280]
E0319 13:12:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:12:23.409797  543705 memory.go:184] no items to output this cycle
I0319 13:12:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 13:12:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:12:33.409795  543705 cpu.go:275] no items to output this cycle
I0319 13:12:33.409796  543705 memory.go:184] no items to output this cycle
I0319 13:12:37.717284  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:12:37.717291  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:12:43.409938  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:12:43.410809  543705 memory.go:191] Add success.
I0319 13:12:43.409993  543705 cpu.go:282] Add success.
I0319 13:12:43.419829  543705 net.go:648] Add success.
I0319 13:12:43.422638  543705 net.go:770] primary dev: ETH0
I0319 13:12:43.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:12:43.422662  543705 net.go:698] Add success.
I0319 13:12:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:12:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:12:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:12:53.409772  543705 memory.go:184] no items to output this cycle
I0319 13:12:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 13:13:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:13:03.409781  543705 memory.go:184] no items to output this cycle
I0319 13:13:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 13:13:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:13:13.409809  543705 memory.go:191] Add success.
I0319 13:13:13.409808  543705 cpu.go:282] Add success.
W0319 13:13:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:13:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:13:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:13:13.420077  543705 net.go:648] Add success.
I0319 13:13:13.423286  543705 net.go:770] primary dev: ETH0
I0319 13:13:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:13:13.423311  543705 net.go:698] Add success.
I0319 13:13:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:13:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:13:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 13:13:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:13:14.456597  543705 disk_worker.go:494] system disk:vda1
I0319 13:13:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:13:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:13:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:13:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:13:19.309673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:13:19.312053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:13:19.312062  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7700 0xc0003b7740]
E0319 13:13:23.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:13:23.410390  543705 memory.go:184] no items to output this cycle
I0319 13:13:23.410400  543705 cpu.go:275] no items to output this cycle
E0319 13:13:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:13:33.409766  543705 memory.go:184] no items to output this cycle
I0319 13:13:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:13:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:13:43.409930  543705 memory.go:191] Add success.
I0319 13:13:43.410119  543705 cpu.go:282] Add success.
I0319 13:13:43.419713  543705 net.go:648] Add success.
I0319 13:13:43.422378  543705 net.go:770] primary dev: ETH0
I0319 13:13:43.422391  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:13:43.422403  543705 net.go:698] Add success.
I0319 13:13:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:13:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:13:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:13:53.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:13:53.410262  543705 memory.go:184] no items to output this cycle
I0319 13:13:53.410282  543705 cpu.go:275] no items to output this cycle
E0319 13:14:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:14:03.409793  543705 memory.go:184] no items to output this cycle
I0319 13:14:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 13:14:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:14:13.409799  543705 memory.go:191] Add success.
I0319 13:14:13.409799  543705 cpu.go:282] Add success.
W0319 13:14:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:14:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:14:13.420323  543705 net.go:648] Add success.
I0319 13:14:13.422949  543705 net.go:770] primary dev: ETH0
I0319 13:14:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:14:13.422981  543705 net.go:698] Add success.
I0319 13:14:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:14:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:14:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 13:14:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:14:14.456505  543705 disk_worker.go:494] system disk:vda1
I0319 13:14:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:14:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:14:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:14:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:14:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:14:19.313671  543705 disk_info.go:125] begin check local disk info of client
I0319 13:14:19.316105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:14:19.316110  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7100 0xc0003b7140]
E0319 13:14:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:14:23.409799  543705 memory.go:184] no items to output this cycle
I0319 13:14:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:14:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:14:33.409774  543705 memory.go:184] no items to output this cycle
I0319 13:14:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 13:14:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:14:43.409830  543705 memory.go:191] Add success.
I0319 13:14:43.409839  543705 cpu.go:282] Add success.
I0319 13:14:43.420050  543705 net.go:648] Add success.
I0319 13:14:43.422742  543705 net.go:770] primary dev: ETH0
I0319 13:14:43.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:14:43.422767  543705 net.go:698] Add success.
I0319 13:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:14:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:14:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:14:53.410410  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:14:53.410426  543705 memory.go:184] no items to output this cycle
I0319 13:14:53.410449  543705 cpu.go:275] no items to output this cycle
E0319 13:15:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:15:03.409783  543705 memory.go:184] no items to output this cycle
I0319 13:15:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 13:15:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:15:13.409819  543705 memory.go:191] Add success.
I0319 13:15:13.409830  543705 cpu.go:282] Add success.
W0319 13:15:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:15:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:15:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:15:13.420259  543705 net.go:648] Add success.
I0319 13:15:13.423106  543705 net.go:770] primary dev: ETH0
I0319 13:15:13.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:15:13.423130  543705 net.go:698] Add success.
I0319 13:15:13.468928  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b5130d5-9708-45bc-813d-9944e2d79716","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:15:13.468962  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:15:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:15:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 13:15:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:15:14.456678  543705 disk_worker.go:494] system disk:vda1
I0319 13:15:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:15:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:15:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:15:19.317676  543705 disk_info.go:125] begin check local disk info of client
I0319 13:15:19.320045  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:15:19.320051  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abe80 0xc0001abec0]
E0319 13:15:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:15:23.409796  543705 memory.go:184] no items to output this cycle
I0319 13:15:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:15:33.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:15:33.409908  543705 memory.go:184] no items to output this cycle
I0319 13:15:33.410014  543705 cpu.go:275] no items to output this cycle
I0319 13:15:37.717731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:15:37.717738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:15:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:15:43.410686  543705 memory.go:191] Add success.
I0319 13:15:43.409840  543705 cpu.go:282] Add success.
I0319 13:15:43.420371  543705 net.go:648] Add success.
I0319 13:15:43.423213  543705 net.go:770] primary dev: ETH0
I0319 13:15:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:15:43.423239  543705 net.go:698] Add success.
I0319 13:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:15:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:15:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:15:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:15:53.409770  543705 memory.go:184] no items to output this cycle
I0319 13:15:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 13:16:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:16:03.409813  543705 memory.go:184] no items to output this cycle
I0319 13:16:03.409825  543705 cpu.go:275] no items to output this cycle
E0319 13:16:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:16:13.409783  543705 memory.go:191] Add success.
W0319 13:16:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:16:13.409811  543705 cpu.go:282] Add success.
W0319 13:16:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:16:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:16:13.420399  543705 net.go:648] Add success.
I0319 13:16:13.423125  543705 net.go:770] primary dev: ETH0
I0319 13:16:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:16:13.423150  543705 net.go:698] Add success.
I0319 13:16:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:16:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:16:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 13:16:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:16:14.456523  543705 disk_worker.go:494] system disk:vda1
I0319 13:16:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:16:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:16:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:16:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:16:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:16:19.321673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:16:19.324040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:16:19.324046  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf3c0 0xc0003bf400]
E0319 13:16:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:16:23.409805  543705 memory.go:184] no items to output this cycle
I0319 13:16:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 13:16:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:16:33.409795  543705 memory.go:184] no items to output this cycle
I0319 13:16:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:16:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:16:43.409788  543705 memory.go:191] Add success.
I0319 13:16:43.409814  543705 cpu.go:282] Add success.
I0319 13:16:43.419990  543705 net.go:648] Add success.
I0319 13:16:43.422815  543705 net.go:770] primary dev: ETH0
I0319 13:16:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:16:43.422845  543705 net.go:698] Add success.
I0319 13:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:16:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:16:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:16:53.410382  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:16:53.410401  543705 memory.go:184] no items to output this cycle
I0319 13:16:53.410416  543705 cpu.go:275] no items to output this cycle
E0319 13:17:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:17:03.409788  543705 memory.go:184] no items to output this cycle
I0319 13:17:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 13:17:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:17:13.409820  543705 memory.go:191] Add success.
I0319 13:17:13.409832  543705 cpu.go:282] Add success.
W0319 13:17:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:17:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:17:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:17:13.420185  543705 net.go:648] Add success.
I0319 13:17:13.423012  543705 net.go:770] primary dev: ETH0
I0319 13:17:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:17:13.423042  543705 net.go:698] Add success.
I0319 13:17:13.453610  543705 event_worker.go:152] Polling the log file for events...
W0319 13:17:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:17:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 13:17:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:17:14.456778  543705 disk_worker.go:494] system disk:vda1
I0319 13:17:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:17:14.457139  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:17:14.457146  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:17:14.457151  543705 custom_config.go:64] query custom config with name: gpu
E0319 13:17:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:17:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:17:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:17:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:17:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:17:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:17:16.472346  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:17:19.327012  543705 disk_info.go:125] begin check local disk info of client
I0319 13:17:19.329372  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:17:19.329378  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be980 0xc0003be9c0]
E0319 13:17:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:17:23.409758  543705 memory.go:184] no items to output this cycle
I0319 13:17:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 13:17:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:17:33.409780  543705 memory.go:184] no items to output this cycle
I0319 13:17:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 13:17:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:17:43.409827  543705 memory.go:191] Add success.
I0319 13:17:43.409828  543705 cpu.go:282] Add success.
I0319 13:17:43.420008  543705 net.go:648] Add success.
I0319 13:17:43.422895  543705 net.go:770] primary dev: ETH0
I0319 13:17:43.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:17:43.422921  543705 net.go:698] Add success.
I0319 13:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:17:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:17:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:17:53.409774  543705 memory.go:184] no items to output this cycle
I0319 13:17:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 13:18:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:18:03.409812  543705 memory.go:184] no items to output this cycle
I0319 13:18:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 13:18:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:18:13.409794  543705 memory.go:191] Add success.
I0319 13:18:13.409815  543705 cpu.go:282] Add success.
W0319 13:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:18:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:18:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:18:13.420161  543705 net.go:648] Add success.
I0319 13:18:13.423025  543705 net.go:770] primary dev: ETH0
I0319 13:18:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:18:13.423055  543705 net.go:698] Add success.
I0319 13:18:13.533607  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f560c095-4868-4073-b481-b1009afb0d87","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:18:13.533660  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:18:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:18:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:18:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 13:18:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:18:14.456526  543705 disk_worker.go:494] system disk:vda1
I0319 13:18:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:18:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:18:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:18:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:18:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:18:19.329675  543705 disk_info.go:125] begin check local disk info of client
I0319 13:18:19.332048  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:18:19.332056  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474000 0xc000474040]
E0319 13:18:23.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:18:23.409874  543705 memory.go:184] no items to output this cycle
I0319 13:18:23.409949  543705 cpu.go:275] no items to output this cycle
E0319 13:18:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:18:33.409773  543705 memory.go:184] no items to output this cycle
I0319 13:18:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 13:18:37.719284  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:18:37.719291  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:18:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:18:43.410761  543705 memory.go:191] Add success.
I0319 13:18:43.409804  543705 cpu.go:282] Add success.
I0319 13:18:43.420523  543705 net.go:648] Add success.
I0319 13:18:43.423171  543705 net.go:770] primary dev: ETH0
I0319 13:18:43.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:18:43.423200  543705 net.go:698] Add success.
I0319 13:18:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:18:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:18:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:18:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:18:53.409768  543705 memory.go:184] no items to output this cycle
I0319 13:18:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 13:19:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:19:03.409780  543705 memory.go:184] no items to output this cycle
I0319 13:19:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 13:19:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:19:13.409802  543705 memory.go:191] Add success.
I0319 13:19:13.409804  543705 cpu.go:282] Add success.
W0319 13:19:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:19:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:19:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:19:13.420124  543705 net.go:648] Add success.
I0319 13:19:13.422874  543705 net.go:770] primary dev: ETH0
I0319 13:19:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:19:13.422904  543705 net.go:698] Add success.
I0319 13:19:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:19:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:19:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 13:19:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:19:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 13:19:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:19:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:19:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:19:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:19:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:19:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:19:19.333669  543705 disk_info.go:125] begin check local disk info of client
I0319 13:19:19.336082  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:19:19.336088  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304ac0 0xc000304b00]
E0319 13:19:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:19:23.409776  543705 cpu.go:275] no items to output this cycle
I0319 13:19:23.409778  543705 memory.go:184] no items to output this cycle
E0319 13:19:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:19:33.409768  543705 memory.go:184] no items to output this cycle
I0319 13:19:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:19:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:19:43.409821  543705 memory.go:191] Add success.
I0319 13:19:43.409835  543705 cpu.go:282] Add success.
I0319 13:19:43.420232  543705 net.go:648] Add success.
I0319 13:19:43.423134  543705 net.go:770] primary dev: ETH0
I0319 13:19:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:19:43.423159  543705 net.go:698] Add success.
I0319 13:19:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:19:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:19:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:19:53.409768  543705 memory.go:184] no items to output this cycle
I0319 13:19:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 13:20:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:20:03.409817  543705 memory.go:184] no items to output this cycle
I0319 13:20:03.409836  543705 cpu.go:275] no items to output this cycle
E0319 13:20:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:20:13.409798  543705 memory.go:191] Add success.
I0319 13:20:13.409804  543705 cpu.go:282] Add success.
W0319 13:20:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:20:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:20:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:20:13.420096  543705 net.go:648] Add success.
I0319 13:20:13.422784  543705 net.go:770] primary dev: ETH0
I0319 13:20:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:20:13.422813  543705 net.go:698] Add success.
I0319 13:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:20:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:20:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0319 13:20:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:20:14.456478  543705 disk_worker.go:494] system disk:vda1
I0319 13:20:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:20:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:20:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:20:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:20:19.337671  543705 disk_info.go:125] begin check local disk info of client
I0319 13:20:19.340117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:20:19.340124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028a400 0xc00028a440]
E0319 13:20:23.410510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:20:23.410547  543705 memory.go:184] no items to output this cycle
I0319 13:20:23.410588  543705 cpu.go:275] no items to output this cycle
E0319 13:20:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:20:33.409778  543705 memory.go:184] no items to output this cycle
I0319 13:20:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:20:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:20:43.409814  543705 memory.go:191] Add success.
I0319 13:20:43.409822  543705 cpu.go:282] Add success.
I0319 13:20:43.419955  543705 net.go:648] Add success.
I0319 13:20:43.422863  543705 net.go:770] primary dev: ETH0
I0319 13:20:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:20:43.422893  543705 net.go:698] Add success.
I0319 13:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:20:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:20:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:20:53.410357  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:20:53.410370  543705 cpu.go:275] no items to output this cycle
I0319 13:20:53.410373  543705 memory.go:184] no items to output this cycle
E0319 13:21:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:21:03.409782  543705 memory.go:184] no items to output this cycle
I0319 13:21:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 13:21:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:21:13.409825  543705 memory.go:191] Add success.
I0319 13:21:13.409834  543705 cpu.go:282] Add success.
W0319 13:21:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:21:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:21:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:21:13.420128  543705 net.go:648] Add success.
I0319 13:21:13.422964  543705 net.go:770] primary dev: ETH0
I0319 13:21:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:21:13.422988  543705 net.go:698] Add success.
I0319 13:21:13.464157  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e19901d8-0b58-4e1b-b34f-093570fe7f7a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:21:13.464189  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:21:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:21:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:21:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 13:21:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:21:14.456504  543705 disk_worker.go:494] system disk:vda1
I0319 13:21:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:21:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:21:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:21:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:21:19.341675  543705 disk_info.go:125] begin check local disk info of client
I0319 13:21:19.344039  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:21:19.344045  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a840 0xc00036a880]
E0319 13:21:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:21:23.409788  543705 memory.go:184] no items to output this cycle
I0319 13:21:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 13:21:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:21:33.409783  543705 memory.go:184] no items to output this cycle
I0319 13:21:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 13:21:37.720302  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:21:37.720309  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:21:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:21:43.410768  543705 memory.go:191] Add success.
I0319 13:21:43.409802  543705 cpu.go:282] Add success.
I0319 13:21:43.420476  543705 net.go:648] Add success.
I0319 13:21:43.423038  543705 net.go:770] primary dev: ETH0
I0319 13:21:43.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:21:43.423067  543705 net.go:698] Add success.
I0319 13:21:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:21:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:21:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:21:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:21:53.409764  543705 memory.go:184] no items to output this cycle
I0319 13:21:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:22:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:22:03.409794  543705 memory.go:184] no items to output this cycle
I0319 13:22:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:22:13.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:22:13.409911  543705 memory.go:191] Add success.
W0319 13:22:13.409940  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:22:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:22:13.409960  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:22:13.410188  543705 cpu.go:282] Add success.
I0319 13:22:13.419717  543705 net.go:648] Add success.
I0319 13:22:13.422355  543705 net.go:770] primary dev: ETH0
I0319 13:22:13.422368  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:22:13.422379  543705 net.go:698] Add success.
W0319 13:22:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:22:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 13:22:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:22:14.456781  543705 disk_worker.go:494] system disk:vda1
I0319 13:22:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:22:14.457100  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:22:14.457108  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:22:14.457112  543705 custom_config.go:64] query custom config with name: gpu
E0319 13:22:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:22:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:22:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:22:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:22:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:22:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:22:16.472323  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:22:19.345682  543705 disk_info.go:125] begin check local disk info of client
I0319 13:22:19.348005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:22:19.348011  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6380 0xc0003b63c0]
E0319 13:22:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:22:23.409794  543705 memory.go:184] no items to output this cycle
I0319 13:22:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 13:22:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:22:33.409814  543705 memory.go:184] no items to output this cycle
I0319 13:22:33.409825  543705 cpu.go:275] no items to output this cycle
E0319 13:22:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:22:43.409789  543705 memory.go:191] Add success.
I0319 13:22:43.409822  543705 cpu.go:282] Add success.
I0319 13:22:43.419994  543705 net.go:648] Add success.
I0319 13:22:43.422645  543705 net.go:770] primary dev: ETH0
I0319 13:22:43.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:22:43.422671  543705 net.go:698] Add success.
I0319 13:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:22:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:22:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:22:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:22:53.409776  543705 memory.go:184] no items to output this cycle
I0319 13:22:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:23:03.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:23:03.409946  543705 memory.go:184] no items to output this cycle
I0319 13:23:03.409953  543705 cpu.go:275] no items to output this cycle
E0319 13:23:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:23:13.409801  543705 memory.go:191] Add success.
W0319 13:23:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:23:13.409837  543705 cpu.go:282] Add success.
W0319 13:23:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:23:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:23:13.420291  543705 net.go:648] Add success.
I0319 13:23:13.423019  543705 net.go:770] primary dev: ETH0
I0319 13:23:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:23:13.423048  543705 net.go:698] Add success.
I0319 13:23:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:23:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:23:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0319 13:23:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:23:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 13:23:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:23:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:23:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:23:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:23:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:23:19.349674  543705 disk_info.go:125] begin check local disk info of client
I0319 13:23:19.352052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:23:19.352058  543705 disk_info.go:196] parse disk info done, disk is : [0xc000321500 0xc000321540]
E0319 13:23:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:23:23.409804  543705 memory.go:184] no items to output this cycle
I0319 13:23:23.409825  543705 cpu.go:275] no items to output this cycle
E0319 13:23:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:23:33.409816  543705 memory.go:184] no items to output this cycle
I0319 13:23:33.409823  543705 cpu.go:275] no items to output this cycle
E0319 13:23:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:23:43.409797  543705 memory.go:191] Add success.
I0319 13:23:43.409842  543705 cpu.go:282] Add success.
I0319 13:23:43.420101  543705 net.go:648] Add success.
I0319 13:23:43.422899  543705 net.go:770] primary dev: ETH0
I0319 13:23:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:23:43.422924  543705 net.go:698] Add success.
I0319 13:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:23:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:23:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:23:53.409767  543705 memory.go:184] no items to output this cycle
I0319 13:23:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 13:24:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:24:03.409780  543705 memory.go:184] no items to output this cycle
I0319 13:24:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 13:24:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:24:13.409822  543705 memory.go:191] Add success.
I0319 13:24:13.409823  543705 cpu.go:282] Add success.
W0319 13:24:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:24:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:24:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:24:13.420130  543705 net.go:648] Add success.
I0319 13:24:13.423124  543705 net.go:770] primary dev: ETH0
I0319 13:24:13.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:24:13.423158  543705 net.go:698] Add success.
I0319 13:24:13.806745  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c1d8d9f1-9e9e-4027-b0ca-b9825152647f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:24:13.806783  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:24:14.453985  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:24:14.454218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:24:14.454228  543705 disk_worker.go:708] disk space is not compliant
W0319 13:24:14.454231  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:24:14.455724  543705 disk_worker.go:494] system disk:vda1
I0319 13:24:14.455752  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:24:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:24:19.353672  543705 disk_info.go:125] begin check local disk info of client
I0319 13:24:19.356114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:24:19.356121  543705 disk_info.go:196] parse disk info done, disk is : [0xc00055dc80 0xc00055dcc0]
E0319 13:24:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:24:23.409766  543705 memory.go:184] no items to output this cycle
I0319 13:24:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 13:24:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:24:33.409789  543705 memory.go:184] no items to output this cycle
I0319 13:24:33.409806  543705 cpu.go:275] no items to output this cycle
I0319 13:24:37.721297  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:24:37.721304  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:24:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:24:43.410759  543705 memory.go:191] Add success.
I0319 13:24:43.409800  543705 cpu.go:282] Add success.
I0319 13:24:43.420469  543705 net.go:648] Add success.
I0319 13:24:43.423627  543705 net.go:770] primary dev: ETH0
I0319 13:24:43.423640  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:24:43.423653  543705 net.go:698] Add success.
I0319 13:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:24:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:24:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:24:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:24:53.409765  543705 memory.go:184] no items to output this cycle
I0319 13:24:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:25:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:25:03.409812  543705 memory.go:184] no items to output this cycle
I0319 13:25:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 13:25:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:25:13.409816  543705 memory.go:191] Add success.
I0319 13:25:13.409820  543705 cpu.go:282] Add success.
W0319 13:25:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:25:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:25:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:25:13.420152  543705 net.go:648] Add success.
I0319 13:25:13.423225  543705 net.go:770] primary dev: ETH0
I0319 13:25:13.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:25:13.423252  543705 net.go:698] Add success.
I0319 13:25:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:25:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:25:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 13:25:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:25:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 13:25:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:25:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:25:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:25:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:25:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:25:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:25:19.357672  543705 disk_info.go:125] begin check local disk info of client
I0319 13:25:19.360079  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:25:19.360086  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c280 0xc00034c2c0]
E0319 13:25:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:25:23.409760  543705 memory.go:184] no items to output this cycle
I0319 13:25:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 13:25:33.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:25:33.409867  543705 memory.go:184] no items to output this cycle
I0319 13:25:33.409972  543705 cpu.go:275] no items to output this cycle
E0319 13:25:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:25:43.409807  543705 memory.go:191] Add success.
I0319 13:25:43.409809  543705 cpu.go:282] Add success.
I0319 13:25:43.419807  543705 net.go:770] primary dev: ETH0
I0319 13:25:43.419822  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:25:43.419836  543705 net.go:698] Add success.
I0319 13:25:43.420218  543705 net.go:648] Add success.
I0319 13:25:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:25:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:25:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:25:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:25:53.409766  543705 memory.go:184] no items to output this cycle
I0319 13:25:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:26:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:26:03.409805  543705 cpu.go:275] no items to output this cycle
I0319 13:26:03.409817  543705 memory.go:184] no items to output this cycle
E0319 13:26:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:26:13.409817  543705 memory.go:191] Add success.
I0319 13:26:13.409829  543705 cpu.go:282] Add success.
W0319 13:26:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:26:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:26:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:26:13.420131  543705 net.go:648] Add success.
I0319 13:26:13.422961  543705 net.go:770] primary dev: ETH0
I0319 13:26:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:26:13.422990  543705 net.go:698] Add success.
I0319 13:26:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:26:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:26:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 13:26:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:26:14.456690  543705 disk_worker.go:494] system disk:vda1
I0319 13:26:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:26:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:26:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:26:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:26:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:26:19.361674  543705 disk_info.go:125] begin check local disk info of client
I0319 13:26:19.364044  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:26:19.364049  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000df800 0xc0000df840]
E0319 13:26:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:26:23.409793  543705 memory.go:184] no items to output this cycle
I0319 13:26:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:26:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:26:33.409784  543705 memory.go:184] no items to output this cycle
I0319 13:26:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 13:26:43.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:26:43.409889  543705 memory.go:191] Add success.
I0319 13:26:43.409952  543705 cpu.go:282] Add success.
I0319 13:26:43.419730  543705 net.go:648] Add success.
I0319 13:26:43.422556  543705 net.go:770] primary dev: ETH0
I0319 13:26:43.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:26:43.422581  543705 net.go:698] Add success.
I0319 13:26:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:26:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:26:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:26:53.409800  543705 memory.go:184] no items to output this cycle
I0319 13:26:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 13:27:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:27:03.409782  543705 memory.go:184] no items to output this cycle
I0319 13:27:03.409821  543705 cpu.go:275] no items to output this cycle
W0319 13:27:13.409701  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0319 13:27:13.409739  543705 conf_downlod.go:89] use old conf
E0319 13:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:27:13.409795  543705 cpu.go:282] Add success.
I0319 13:27:13.409801  543705 memory.go:191] Add success.
W0319 13:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:27:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:27:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:27:13.420156  543705 net.go:648] Add success.
I0319 13:27:13.422767  543705 net.go:770] primary dev: ETH0
I0319 13:27:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:27:13.422793  543705 net.go:698] Add success.
I0319 13:27:13.429092  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 13:27:13.453266  543705 event_worker.go:152] Polling the log file for events...
I0319 13:27:14.247417  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7882db4-f173-4857-b9f6-b6edfb78ce02","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:27:14.247454  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 13:27:14.454240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:27:14.454251  543705 disk_worker.go:708] disk space is not compliant
W0319 13:27:14.454253  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:27:14.455826  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:27:14.455846  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:27:14.455853  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:27:14.456043  543705 disk_worker.go:494] system disk:vda1
I0319 13:27:14.456083  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:27:15.456454  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:27:15.456465  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:27:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:27:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:27:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:27:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:27:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:27:19.365671  543705 disk_info.go:125] begin check local disk info of client
I0319 13:27:19.368088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:27:19.368094  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328b00 0xc000328b40]
E0319 13:27:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:27:23.409768  543705 memory.go:184] no items to output this cycle
I0319 13:27:23.409775  543705 cpu.go:275] no items to output this cycle
E0319 13:27:33.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:27:33.409891  543705 memory.go:184] no items to output this cycle
I0319 13:27:33.409976  543705 cpu.go:275] no items to output this cycle
I0319 13:27:37.721728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:27:37.721734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:27:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:27:43.410658  543705 memory.go:191] Add success.
I0319 13:27:43.409801  543705 cpu.go:282] Add success.
I0319 13:27:43.420339  543705 net.go:648] Add success.
I0319 13:27:43.422939  543705 net.go:770] primary dev: ETH0
I0319 13:27:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:27:43.422966  543705 net.go:698] Add success.
I0319 13:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:27:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:27:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:27:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:27:53.409774  543705 memory.go:184] no items to output this cycle
I0319 13:27:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 13:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:28:03.409795  543705 memory.go:184] no items to output this cycle
I0319 13:28:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:28:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:28:13.409820  543705 memory.go:191] Add success.
I0319 13:28:13.409831  543705 cpu.go:282] Add success.
W0319 13:28:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:28:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:28:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:28:13.420122  543705 net.go:648] Add success.
I0319 13:28:13.423019  543705 net.go:770] primary dev: ETH0
I0319 13:28:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:28:13.423044  543705 net.go:698] Add success.
I0319 13:28:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:28:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:28:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 13:28:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:28:14.456504  543705 disk_worker.go:494] system disk:vda1
I0319 13:28:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:28:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:28:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:28:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:28:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:28:19.369673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:28:19.372104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:28:19.372110  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364a40 0xc000364a80]
E0319 13:28:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:28:23.409762  543705 memory.go:184] no items to output this cycle
I0319 13:28:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 13:28:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:28:33.409857  543705 memory.go:184] no items to output this cycle
I0319 13:28:33.409905  543705 cpu.go:275] no items to output this cycle
E0319 13:28:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:28:43.409804  543705 memory.go:191] Add success.
I0319 13:28:43.409807  543705 cpu.go:282] Add success.
I0319 13:28:43.419855  543705 net.go:648] Add success.
I0319 13:28:43.422673  543705 net.go:770] primary dev: ETH0
I0319 13:28:43.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:28:43.422703  543705 net.go:698] Add success.
I0319 13:28:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:28:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:28:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:28:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:28:53.409767  543705 memory.go:184] no items to output this cycle
I0319 13:28:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 13:29:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:29:03.409784  543705 memory.go:184] no items to output this cycle
I0319 13:29:03.409834  543705 cpu.go:275] no items to output this cycle
E0319 13:29:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:29:13.409796  543705 memory.go:191] Add success.
I0319 13:29:13.409799  543705 cpu.go:282] Add success.
W0319 13:29:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:29:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:29:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:29:13.420113  543705 net.go:648] Add success.
I0319 13:29:13.423035  543705 net.go:770] primary dev: ETH0
I0319 13:29:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:29:13.423061  543705 net.go:698] Add success.
I0319 13:29:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:29:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:29:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 13:29:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:29:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 13:29:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:29:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:29:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:29:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:29:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:29:19.373671  543705 disk_info.go:125] begin check local disk info of client
I0319 13:29:19.376089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:29:19.376095  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314300 0xc000314340]
E0319 13:29:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:29:23.409798  543705 memory.go:184] no items to output this cycle
I0319 13:29:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:29:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:29:33.409769  543705 memory.go:184] no items to output this cycle
I0319 13:29:33.409790  543705 cpu.go:275] no items to output this cycle
E0319 13:29:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:29:43.409828  543705 memory.go:191] Add success.
I0319 13:29:43.409835  543705 cpu.go:282] Add success.
I0319 13:29:43.419979  543705 net.go:648] Add success.
I0319 13:29:43.422799  543705 net.go:770] primary dev: ETH0
I0319 13:29:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:29:43.422828  543705 net.go:698] Add success.
I0319 13:29:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:29:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:29:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:29:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:29:53.409763  543705 memory.go:184] no items to output this cycle
I0319 13:29:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 13:30:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:30:03.409815  543705 memory.go:184] no items to output this cycle
I0319 13:30:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 13:30:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:30:13.409816  543705 memory.go:191] Add success.
I0319 13:30:13.409824  543705 cpu.go:282] Add success.
W0319 13:30:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:30:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:30:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:30:13.420261  543705 net.go:648] Add success.
I0319 13:30:13.422777  543705 net.go:770] primary dev: ETH0
I0319 13:30:13.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:30:13.422805  543705 net.go:698] Add success.
I0319 13:30:13.468665  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ee7c7ab1-fddf-4bef-b1d0-cbf2da82fccd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:30:13.468706  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:30:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:30:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:30:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 13:30:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:30:14.456638  543705 disk_worker.go:494] system disk:vda1
I0319 13:30:14.456670  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:30:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:30:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:30:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:30:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:30:19.377673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:30:19.380045  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:30:19.380052  543705 disk_info.go:196] parse disk info done, disk is : [0xc000287480 0xc0002874c0]
E0319 13:30:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:30:23.409799  543705 memory.go:184] no items to output this cycle
I0319 13:30:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 13:30:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:30:33.409776  543705 memory.go:184] no items to output this cycle
I0319 13:30:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 13:30:37.723306  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:30:37.723313  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:30:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:30:43.410628  543705 memory.go:191] Add success.
I0319 13:30:43.409922  543705 cpu.go:282] Add success.
I0319 13:30:43.419731  543705 net.go:648] Add success.
I0319 13:30:43.422450  543705 net.go:770] primary dev: ETH0
I0319 13:30:43.422464  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:30:43.422477  543705 net.go:698] Add success.
I0319 13:30:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:30:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:30:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:30:53.409798  543705 memory.go:184] no items to output this cycle
I0319 13:30:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 13:31:03.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:31:03.409825  543705 cpu.go:275] no items to output this cycle
I0319 13:31:03.409836  543705 memory.go:184] no items to output this cycle
E0319 13:31:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:31:13.409794  543705 memory.go:191] Add success.
I0319 13:31:13.409798  543705 cpu.go:282] Add success.
W0319 13:31:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:31:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:31:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:31:13.420371  543705 net.go:648] Add success.
I0319 13:31:13.423144  543705 net.go:770] primary dev: ETH0
I0319 13:31:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:31:13.423183  543705 net.go:698] Add success.
I0319 13:31:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:31:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:31:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 13:31:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:31:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 13:31:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:31:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:31:19.381669  543705 disk_info.go:125] begin check local disk info of client
I0319 13:31:19.384082  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:31:19.384088  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac80 0xc0001aacc0]
E0319 13:31:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:31:23.409776  543705 memory.go:184] no items to output this cycle
I0319 13:31:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 13:31:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:31:33.409802  543705 memory.go:184] no items to output this cycle
I0319 13:31:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 13:31:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:31:43.409795  543705 memory.go:191] Add success.
I0319 13:31:43.409795  543705 cpu.go:282] Add success.
I0319 13:31:43.419827  543705 net.go:648] Add success.
I0319 13:31:43.422768  543705 net.go:770] primary dev: ETH0
I0319 13:31:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:31:43.422793  543705 net.go:698] Add success.
I0319 13:31:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:31:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:31:53.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:31:53.409877  543705 memory.go:184] no items to output this cycle
I0319 13:31:53.409987  543705 cpu.go:275] no items to output this cycle
E0319 13:32:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:32:03.409814  543705 memory.go:184] no items to output this cycle
I0319 13:32:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 13:32:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:32:13.409795  543705 memory.go:191] Add success.
I0319 13:32:13.409796  543705 cpu.go:282] Add success.
W0319 13:32:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:32:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:32:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:32:13.420398  543705 net.go:648] Add success.
I0319 13:32:13.423342  543705 net.go:770] primary dev: ETH0
I0319 13:32:13.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:32:13.423372  543705 net.go:698] Add success.
W0319 13:32:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:32:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 13:32:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:32:14.456767  543705 disk_worker.go:494] system disk:vda1
I0319 13:32:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:32:14.457161  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:32:14.457168  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:32:14.457173  543705 custom_config.go:64] query custom config with name: gpu
E0319 13:32:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:32:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:32:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:32:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:32:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:32:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:32:16.472332  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:32:19.385673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:32:19.388018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:32:19.388024  543705 disk_info.go:196] parse disk info done, disk is : [0xc000356b40 0xc000356b80]
E0319 13:32:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:32:23.409791  543705 memory.go:184] no items to output this cycle
I0319 13:32:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:32:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:32:33.409786  543705 memory.go:184] no items to output this cycle
I0319 13:32:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 13:32:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:32:43.409794  543705 memory.go:191] Add success.
I0319 13:32:43.409797  543705 cpu.go:282] Add success.
I0319 13:32:43.420013  543705 net.go:648] Add success.
I0319 13:32:43.422767  543705 net.go:770] primary dev: ETH0
I0319 13:32:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:32:43.422793  543705 net.go:698] Add success.
I0319 13:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:32:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:32:53.409766  543705 memory.go:184] no items to output this cycle
I0319 13:32:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:33:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:33:03.409814  543705 memory.go:184] no items to output this cycle
I0319 13:33:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 13:33:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:33:13.409808  543705 memory.go:191] Add success.
I0319 13:33:13.409810  543705 cpu.go:282] Add success.
W0319 13:33:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:33:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:33:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:33:13.420187  543705 net.go:648] Add success.
I0319 13:33:13.422818  543705 net.go:770] primary dev: ETH0
I0319 13:33:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:33:13.422843  543705 net.go:698] Add success.
I0319 13:33:13.470030  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f75e6234-2039-4b5e-973b-258c239348b6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:33:13.470063  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:33:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:33:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:33:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 13:33:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:33:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 13:33:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:33:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:33:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:33:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:33:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:33:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:33:19.392061  543705 disk_info.go:125] begin check local disk info of client
I0319 13:33:19.394467  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:33:19.394473  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bda00 0xc0002bda40]
E0319 13:33:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:33:23.409760  543705 memory.go:184] no items to output this cycle
I0319 13:33:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 13:33:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:33:33.409803  543705 memory.go:184] no items to output this cycle
I0319 13:33:33.409819  543705 cpu.go:275] no items to output this cycle
I0319 13:33:37.723451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:33:37.723458  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:33:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:33:43.410732  543705 memory.go:191] Add success.
I0319 13:33:43.409800  543705 cpu.go:282] Add success.
I0319 13:33:43.420430  543705 net.go:648] Add success.
I0319 13:33:43.423048  543705 net.go:770] primary dev: ETH0
I0319 13:33:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:33:43.423073  543705 net.go:698] Add success.
I0319 13:33:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:33:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:33:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:33:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:33:53.409774  543705 memory.go:184] no items to output this cycle
I0319 13:33:53.409810  543705 cpu.go:275] no items to output this cycle
I0319 13:34:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:34:03.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:34:03.409823  543705 memory.go:184] no items to output this cycle
E0319 13:34:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:34:13.409796  543705 memory.go:191] Add success.
W0319 13:34:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:34:13.409825  543705 cpu.go:282] Add success.
W0319 13:34:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:34:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:34:13.420312  543705 net.go:648] Add success.
I0319 13:34:13.423386  543705 net.go:770] primary dev: ETH0
I0319 13:34:13.423399  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:34:13.423410  543705 net.go:698] Add success.
I0319 13:34:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:34:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:34:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0319 13:34:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:34:14.456493  543705 disk_worker.go:494] system disk:vda1
I0319 13:34:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:34:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:34:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:34:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:34:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:34:19.397672  543705 disk_info.go:125] begin check local disk info of client
I0319 13:34:19.400051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:34:19.400057  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bac0 0xc00007bb00]
E0319 13:34:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:34:23.409789  543705 memory.go:184] no items to output this cycle
I0319 13:34:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:34:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:34:33.409776  543705 memory.go:184] no items to output this cycle
I0319 13:34:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:34:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:34:43.409784  543705 memory.go:191] Add success.
I0319 13:34:43.409794  543705 cpu.go:282] Add success.
I0319 13:34:43.419906  543705 net.go:648] Add success.
I0319 13:34:43.422543  543705 net.go:770] primary dev: ETH0
I0319 13:34:43.422559  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:34:43.422572  543705 net.go:698] Add success.
I0319 13:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:34:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:34:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:34:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:34:53.409781  543705 cpu.go:275] no items to output this cycle
I0319 13:34:53.409782  543705 memory.go:184] no items to output this cycle
I0319 13:35:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 13:35:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:35:03.409812  543705 memory.go:184] no items to output this cycle
E0319 13:35:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:35:13.409937  543705 cpu.go:282] Add success.
I0319 13:35:13.409962  543705 memory.go:191] Add success.
W0319 13:35:13.410019  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:35:13.410044  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:35:13.410049  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:35:13.419760  543705 net.go:648] Add success.
I0319 13:35:13.422626  543705 net.go:770] primary dev: ETH0
I0319 13:35:13.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:35:13.422654  543705 net.go:698] Add success.
I0319 13:35:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:35:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:35:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 13:35:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:35:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 13:35:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:35:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:35:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:35:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:35:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:35:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:35:19.401673  543705 disk_info.go:125] begin check local disk info of client
I0319 13:35:19.404036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:35:19.404042  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264600 0xc000264640]
E0319 13:35:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:35:23.409792  543705 memory.go:184] no items to output this cycle
I0319 13:35:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 13:35:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:35:33.409782  543705 memory.go:184] no items to output this cycle
I0319 13:35:33.409788  543705 cpu.go:275] no items to output this cycle
E0319 13:35:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:35:43.409816  543705 memory.go:191] Add success.
I0319 13:35:43.409825  543705 cpu.go:282] Add success.
I0319 13:35:43.419870  543705 net.go:648] Add success.
I0319 13:35:43.422725  543705 net.go:770] primary dev: ETH0
I0319 13:35:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:35:43.422751  543705 net.go:698] Add success.
I0319 13:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:35:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:35:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:35:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:35:53.409803  543705 memory.go:184] no items to output this cycle
I0319 13:35:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 13:36:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:36:03.409759  543705 memory.go:184] no items to output this cycle
I0319 13:36:03.409837  543705 cpu.go:275] no items to output this cycle
E0319 13:36:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:36:13.409820  543705 memory.go:191] Add success.
I0319 13:36:13.409832  543705 cpu.go:282] Add success.
W0319 13:36:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:36:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:36:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:36:13.419723  543705 net.go:648] Add success.
I0319 13:36:13.422578  543705 net.go:770] primary dev: ETH0
I0319 13:36:13.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:36:13.422604  543705 net.go:698] Add success.
I0319 13:36:13.481360  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e2985640-baad-4dbc-85c9-d245ae8d49a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:36:13.481391  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:36:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:36:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 13:36:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:36:14.456539  543705 disk_worker.go:494] system disk:vda1
I0319 13:36:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:36:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:36:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:36:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:36:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:36:19.405677  543705 disk_info.go:125] begin check local disk info of client
I0319 13:36:19.408102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:36:19.408108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa480 0xc0001aa4c0]
E0319 13:36:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:36:23.409795  543705 memory.go:184] no items to output this cycle
I0319 13:36:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:36:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:36:33.409785  543705 memory.go:184] no items to output this cycle
I0319 13:36:33.409810  543705 cpu.go:275] no items to output this cycle
I0319 13:36:37.723599  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:36:37.723606  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:36:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:36:43.410719  543705 memory.go:191] Add success.
I0319 13:36:43.409822  543705 cpu.go:282] Add success.
I0319 13:36:43.420424  543705 net.go:648] Add success.
I0319 13:36:43.423374  543705 net.go:770] primary dev: ETH0
I0319 13:36:43.423388  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:36:43.423407  543705 net.go:698] Add success.
I0319 13:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:36:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:36:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:36:53.409792  543705 cpu.go:275] no items to output this cycle
I0319 13:36:53.409799  543705 memory.go:184] no items to output this cycle
E0319 13:37:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:37:03.409781  543705 memory.go:184] no items to output this cycle
I0319 13:37:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 13:37:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:37:13.409792  543705 memory.go:191] Add success.
W0319 13:37:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:37:13.409817  543705 cpu.go:282] Add success.
W0319 13:37:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:37:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:37:13.420242  543705 net.go:648] Add success.
I0319 13:37:13.423513  543705 net.go:770] primary dev: ETH0
I0319 13:37:13.423528  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:37:13.423541  543705 net.go:698] Add success.
I0319 13:37:13.452777  543705 event_worker.go:152] Polling the log file for events...
W0319 13:37:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:37:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 13:37:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:37:14.456794  543705 disk_worker.go:494] system disk:vda1
I0319 13:37:14.456830  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:37:14.457045  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:37:14.457053  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:37:14.457058  543705 custom_config.go:64] query custom config with name: gpu
E0319 13:37:15.456885  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:37:15.456894  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:37:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:37:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:37:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:37:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:37:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:37:19.409672  543705 disk_info.go:125] begin check local disk info of client
I0319 13:37:19.412004  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:37:19.412010  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1d40 0xc0003e1d80]
E0319 13:37:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:37:23.409812  543705 memory.go:184] no items to output this cycle
I0319 13:37:23.409823  543705 cpu.go:275] no items to output this cycle
E0319 13:37:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:37:33.409771  543705 memory.go:184] no items to output this cycle
I0319 13:37:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:37:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:37:43.409818  543705 memory.go:191] Add success.
I0319 13:37:43.409826  543705 cpu.go:282] Add success.
I0319 13:37:43.419957  543705 net.go:648] Add success.
I0319 13:37:43.422996  543705 net.go:770] primary dev: ETH0
I0319 13:37:43.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:37:43.423024  543705 net.go:698] Add success.
I0319 13:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:37:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:37:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:37:53.409778  543705 memory.go:184] no items to output this cycle
I0319 13:37:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:38:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:38:03.409758  543705 memory.go:184] no items to output this cycle
I0319 13:38:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:38:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:38:13.409826  543705 memory.go:191] Add success.
I0319 13:38:13.409841  543705 cpu.go:282] Add success.
W0319 13:38:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:38:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:38:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:38:13.420397  543705 net.go:648] Add success.
I0319 13:38:13.423219  543705 net.go:770] primary dev: ETH0
I0319 13:38:13.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:38:13.423244  543705 net.go:698] Add success.
I0319 13:38:14.453981  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:38:14.454129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:38:14.454192  543705 disk_worker.go:708] disk space is not compliant
W0319 13:38:14.454195  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:38:14.455506  543705 disk_worker.go:494] system disk:vda1
I0319 13:38:14.455548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:38:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:38:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:38:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:38:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:38:16.472435  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:38:19.412792  543705 disk_info.go:125] begin check local disk info of client
I0319 13:38:19.415164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:38:19.415170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000587880 0xc0005878c0]
E0319 13:38:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:38:23.409799  543705 memory.go:184] no items to output this cycle
I0319 13:38:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:38:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:38:33.409788  543705 memory.go:184] no items to output this cycle
I0319 13:38:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 13:38:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:38:43.409799  543705 memory.go:191] Add success.
I0319 13:38:43.409805  543705 cpu.go:282] Add success.
I0319 13:38:43.419901  543705 net.go:648] Add success.
I0319 13:38:43.422818  543705 net.go:770] primary dev: ETH0
I0319 13:38:43.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:38:43.422847  543705 net.go:698] Add success.
I0319 13:38:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:38:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:38:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:38:53.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:38:53.410262  543705 memory.go:184] no items to output this cycle
I0319 13:38:53.410266  543705 cpu.go:275] no items to output this cycle
E0319 13:39:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:39:03.409804  543705 memory.go:184] no items to output this cycle
I0319 13:39:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 13:39:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:39:13.409830  543705 memory.go:191] Add success.
I0319 13:39:13.409837  543705 cpu.go:282] Add success.
W0319 13:39:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:39:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:39:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:39:13.420124  543705 net.go:648] Add success.
I0319 13:39:13.422982  543705 net.go:770] primary dev: ETH0
I0319 13:39:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:39:13.423007  543705 net.go:698] Add success.
I0319 13:39:13.469117  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"18902144-eef8-4d0d-b835-fe78cb31df5c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:39:13.469150  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:39:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:39:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:39:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 13:39:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:39:14.458934  543705 disk_worker.go:494] system disk:vda1
I0319 13:39:14.458963  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:39:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:39:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:39:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:39:19.415795  543705 disk_info.go:125] begin check local disk info of client
I0319 13:39:19.418171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:39:19.418177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003766c0 0xc000376700]
E0319 13:39:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:39:23.409806  543705 memory.go:184] no items to output this cycle
I0319 13:39:23.409817  543705 cpu.go:275] no items to output this cycle
E0319 13:39:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:39:33.409786  543705 memory.go:184] no items to output this cycle
I0319 13:39:33.409800  543705 cpu.go:275] no items to output this cycle
I0319 13:39:37.724313  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:39:37.724319  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:39:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:39:43.410716  543705 memory.go:191] Add success.
I0319 13:39:43.409827  543705 cpu.go:282] Add success.
I0319 13:39:43.420450  543705 net.go:648] Add success.
I0319 13:39:43.423138  543705 net.go:770] primary dev: ETH0
I0319 13:39:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:39:43.423164  543705 net.go:698] Add success.
I0319 13:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:39:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:39:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:39:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:39:53.409782  543705 memory.go:184] no items to output this cycle
I0319 13:39:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 13:40:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:40:03.409784  543705 memory.go:184] no items to output this cycle
I0319 13:40:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 13:40:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:40:13.409807  543705 memory.go:191] Add success.
I0319 13:40:13.409807  543705 cpu.go:282] Add success.
W0319 13:40:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:40:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:40:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:40:13.420217  543705 net.go:648] Add success.
I0319 13:40:13.422981  543705 net.go:770] primary dev: ETH0
I0319 13:40:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:40:13.423007  543705 net.go:698] Add success.
I0319 13:40:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:40:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:40:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 13:40:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:40:14.457642  543705 disk_worker.go:494] system disk:vda1
I0319 13:40:14.457698  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:40:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:40:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:40:19.418803  543705 disk_info.go:125] begin check local disk info of client
I0319 13:40:19.421196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:40:19.421204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab500 0xc0001ab540]
E0319 13:40:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:40:23.409764  543705 memory.go:184] no items to output this cycle
I0319 13:40:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 13:40:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:40:33.409799  543705 memory.go:184] no items to output this cycle
I0319 13:40:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:40:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:40:43.409795  543705 memory.go:191] Add success.
I0319 13:40:43.409808  543705 cpu.go:282] Add success.
I0319 13:40:43.419879  543705 net.go:648] Add success.
I0319 13:40:43.422848  543705 net.go:770] primary dev: ETH0
I0319 13:40:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:40:43.422889  543705 net.go:698] Add success.
I0319 13:40:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:40:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:40:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:40:53.409770  543705 memory.go:184] no items to output this cycle
I0319 13:40:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 13:41:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:41:03.409797  543705 memory.go:184] no items to output this cycle
I0319 13:41:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 13:41:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:41:13.409822  543705 memory.go:191] Add success.
I0319 13:41:13.409829  543705 cpu.go:282] Add success.
W0319 13:41:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:41:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:41:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:41:13.420147  543705 net.go:648] Add success.
I0319 13:41:13.422950  543705 net.go:770] primary dev: ETH0
I0319 13:41:13.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:41:13.422976  543705 net.go:698] Add success.
I0319 13:41:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:41:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:41:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 13:41:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:41:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 13:41:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:41:15.454980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:41:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:41:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:41:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:41:19.421800  543705 disk_info.go:125] begin check local disk info of client
I0319 13:41:19.424221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:41:19.424227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380400 0xc000380440]
E0319 13:41:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:41:23.409769  543705 memory.go:184] no items to output this cycle
I0319 13:41:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 13:41:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:41:33.409798  543705 memory.go:184] no items to output this cycle
I0319 13:41:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 13:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:41:43.409812  543705 memory.go:191] Add success.
I0319 13:41:43.409817  543705 cpu.go:282] Add success.
I0319 13:41:43.419856  543705 net.go:648] Add success.
I0319 13:41:43.422765  543705 net.go:770] primary dev: ETH0
I0319 13:41:43.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:41:43.422791  543705 net.go:698] Add success.
I0319 13:41:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:41:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:41:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:41:53.409799  543705 memory.go:184] no items to output this cycle
I0319 13:41:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 13:42:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:42:03.409771  543705 memory.go:184] no items to output this cycle
I0319 13:42:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 13:42:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:42:13.409810  543705 memory.go:191] Add success.
I0319 13:42:13.409810  543705 cpu.go:282] Add success.
W0319 13:42:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:42:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:42:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:42:13.420150  543705 net.go:648] Add success.
I0319 13:42:13.422845  543705 net.go:770] primary dev: ETH0
I0319 13:42:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:42:13.422874  543705 net.go:698] Add success.
I0319 13:42:13.468912  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e83beed2-c5dc-4058-98d1-19b4c7a2ec2f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:42:13.468960  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 13:42:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:42:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 13:42:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:42:14.456805  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:42:14.456814  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:42:14.456820  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:42:14.456838  543705 disk_worker.go:494] system disk:vda1
I0319 13:42:14.456867  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:42:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:42:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:42:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:42:16.458011  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:42:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:42:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:42:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:42:19.424819  543705 disk_info.go:125] begin check local disk info of client
I0319 13:42:19.427171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:42:19.427179  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005140c0 0xc000514100]
E0319 13:42:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:42:23.409775  543705 memory.go:184] no items to output this cycle
I0319 13:42:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 13:42:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:42:33.409781  543705 memory.go:184] no items to output this cycle
I0319 13:42:33.409822  543705 cpu.go:275] no items to output this cycle
I0319 13:42:37.724456  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:42:37.724464  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:42:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:42:43.409822  543705 cpu.go:282] Add success.
I0319 13:42:43.410748  543705 memory.go:191] Add success.
I0319 13:42:43.420519  543705 net.go:648] Add success.
I0319 13:42:43.423631  543705 net.go:770] primary dev: ETH0
I0319 13:42:43.423648  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:42:43.423663  543705 net.go:698] Add success.
I0319 13:42:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:42:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:42:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:42:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:42:53.409804  543705 memory.go:184] no items to output this cycle
I0319 13:42:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 13:43:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:43:03.409779  543705 memory.go:184] no items to output this cycle
I0319 13:43:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 13:43:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:43:13.409820  543705 memory.go:191] Add success.
I0319 13:43:13.409830  543705 cpu.go:282] Add success.
W0319 13:43:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:43:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:43:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:43:13.420260  543705 net.go:648] Add success.
I0319 13:43:13.423053  543705 net.go:770] primary dev: ETH0
I0319 13:43:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:43:13.423087  543705 net.go:698] Add success.
I0319 13:43:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:43:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:43:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 13:43:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:43:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 13:43:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:43:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:43:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:43:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:43:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:43:16.472454  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:43:19.427831  543705 disk_info.go:125] begin check local disk info of client
I0319 13:43:19.430273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:43:19.430279  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a7c0 0xc00007a940]
E0319 13:43:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:43:23.409791  543705 memory.go:184] no items to output this cycle
I0319 13:43:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:43:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:43:33.409776  543705 memory.go:184] no items to output this cycle
I0319 13:43:33.409780  543705 cpu.go:275] no items to output this cycle
E0319 13:43:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:43:43.409797  543705 memory.go:191] Add success.
I0319 13:43:43.409802  543705 cpu.go:282] Add success.
I0319 13:43:43.419874  543705 net.go:648] Add success.
I0319 13:43:43.422869  543705 net.go:770] primary dev: ETH0
I0319 13:43:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:43:43.422895  543705 net.go:698] Add success.
I0319 13:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:43:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:43:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:43:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:43:53.409808  543705 memory.go:184] no items to output this cycle
I0319 13:43:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 13:44:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:44:03.409783  543705 memory.go:184] no items to output this cycle
I0319 13:44:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 13:44:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:44:13.409796  543705 memory.go:191] Add success.
I0319 13:44:13.409814  543705 cpu.go:282] Add success.
W0319 13:44:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:44:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:44:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:44:13.420180  543705 net.go:648] Add success.
I0319 13:44:13.423030  543705 net.go:770] primary dev: ETH0
I0319 13:44:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:44:13.423059  543705 net.go:698] Add success.
I0319 13:44:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:44:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:44:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 13:44:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:44:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 13:44:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:44:15.456024  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:44:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:44:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:44:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:44:19.430838  543705 disk_info.go:125] begin check local disk info of client
I0319 13:44:19.433297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:44:19.433303  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a7c0 0xc00007a940]
E0319 13:44:23.410416  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:44:23.410431  543705 memory.go:184] no items to output this cycle
I0319 13:44:23.410432  543705 cpu.go:275] no items to output this cycle
E0319 13:44:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:44:33.409795  543705 memory.go:184] no items to output this cycle
I0319 13:44:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:44:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:44:43.409820  543705 memory.go:191] Add success.
I0319 13:44:43.409830  543705 cpu.go:282] Add success.
I0319 13:44:43.420305  543705 net.go:648] Add success.
I0319 13:44:43.422941  543705 net.go:770] primary dev: ETH0
I0319 13:44:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:44:43.422967  543705 net.go:698] Add success.
I0319 13:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:44:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:44:53.409799  543705 memory.go:184] no items to output this cycle
I0319 13:44:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 13:45:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:45:03.409773  543705 memory.go:184] no items to output this cycle
I0319 13:45:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 13:45:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:45:13.409795  543705 memory.go:191] Add success.
I0319 13:45:13.409816  543705 cpu.go:282] Add success.
W0319 13:45:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:45:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:45:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:45:13.420134  543705 net.go:648] Add success.
I0319 13:45:13.423309  543705 net.go:770] primary dev: ETH0
I0319 13:45:13.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:45:13.423338  543705 net.go:698] Add success.
I0319 13:45:13.463335  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fdd5b097-c578-4636-9171-6eaf45f46cb8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:45:13.463367  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:45:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:45:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:45:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 13:45:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:45:14.456623  543705 disk_worker.go:494] system disk:vda1
I0319 13:45:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:45:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:45:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:45:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:45:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:45:16.472453  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:45:19.433858  543705 disk_info.go:125] begin check local disk info of client
I0319 13:45:19.436240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:45:19.436246  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c080 0xc00034c0c0]
E0319 13:45:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:45:23.409795  543705 memory.go:184] no items to output this cycle
I0319 13:45:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 13:45:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:45:33.409785  543705 memory.go:184] no items to output this cycle
I0319 13:45:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 13:45:37.724602  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:45:37.724608  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:45:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:45:43.410704  543705 memory.go:191] Add success.
I0319 13:45:43.409813  543705 cpu.go:282] Add success.
I0319 13:45:43.420415  543705 net.go:648] Add success.
I0319 13:45:43.423109  543705 net.go:770] primary dev: ETH0
I0319 13:45:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:45:43.423134  543705 net.go:698] Add success.
I0319 13:45:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:45:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:45:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:45:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:45:53.409801  543705 memory.go:184] no items to output this cycle
I0319 13:45:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:46:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:46:03.409772  543705 memory.go:184] no items to output this cycle
I0319 13:46:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 13:46:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:46:13.409798  543705 memory.go:191] Add success.
I0319 13:46:13.409802  543705 cpu.go:282] Add success.
W0319 13:46:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:46:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:46:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:46:13.420266  543705 net.go:648] Add success.
I0319 13:46:13.423185  543705 net.go:770] primary dev: ETH0
I0319 13:46:13.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:46:13.423211  543705 net.go:698] Add success.
I0319 13:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:46:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:46:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 13:46:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:46:14.456634  543705 disk_worker.go:494] system disk:vda1
I0319 13:46:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:46:15.456016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:46:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:46:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:46:16.472493  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:46:19.436877  543705 disk_info.go:125] begin check local disk info of client
I0319 13:46:19.439319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:46:19.439325  543705 disk_info.go:196] parse disk info done, disk is : [0xc000544480 0xc0005444c0]
E0319 13:46:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:46:23.409771  543705 memory.go:184] no items to output this cycle
I0319 13:46:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 13:46:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:46:33.409798  543705 memory.go:184] no items to output this cycle
I0319 13:46:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:46:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:46:43.409815  543705 memory.go:191] Add success.
I0319 13:46:43.409823  543705 cpu.go:282] Add success.
I0319 13:46:43.419961  543705 net.go:648] Add success.
I0319 13:46:43.423098  543705 net.go:770] primary dev: ETH0
I0319 13:46:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:46:43.423124  543705 net.go:698] Add success.
I0319 13:46:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:46:53.410509  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:46:53.410525  543705 memory.go:184] no items to output this cycle
I0319 13:46:53.410531  543705 cpu.go:275] no items to output this cycle
E0319 13:47:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:47:03.409774  543705 memory.go:184] no items to output this cycle
I0319 13:47:03.409778  543705 cpu.go:275] no items to output this cycle
E0319 13:47:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:47:13.409781  543705 memory.go:191] Add success.
W0319 13:47:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 13:47:13.409814  543705 cpu.go:282] Add success.
W0319 13:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:47:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:47:13.420120  543705 net.go:648] Add success.
I0319 13:47:13.423188  543705 net.go:770] primary dev: ETH0
I0319 13:47:13.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:47:13.423213  543705 net.go:698] Add success.
I0319 13:47:13.453744  543705 event_worker.go:152] Polling the log file for events...
W0319 13:47:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:47:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 13:47:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:47:14.455911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:47:14.455920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:47:14.455926  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:47:14.456568  543705 disk_worker.go:494] system disk:vda1
I0319 13:47:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:47:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:47:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:47:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:47:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:47:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:47:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:47:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:47:19.439877  543705 disk_info.go:125] begin check local disk info of client
I0319 13:47:19.442239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:47:19.442246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000d7440 0xc0000d7480]
E0319 13:47:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:47:23.409797  543705 memory.go:184] no items to output this cycle
I0319 13:47:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 13:47:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:47:33.409803  543705 memory.go:184] no items to output this cycle
I0319 13:47:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 13:47:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:47:43.409819  543705 memory.go:191] Add success.
I0319 13:47:43.409836  543705 cpu.go:282] Add success.
I0319 13:47:43.419973  543705 net.go:648] Add success.
I0319 13:47:43.422711  543705 net.go:770] primary dev: ETH0
I0319 13:47:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:47:43.422741  543705 net.go:698] Add success.
I0319 13:47:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:47:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:47:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:47:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:47:53.409797  543705 memory.go:184] no items to output this cycle
I0319 13:47:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 13:48:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:48:03.409785  543705 memory.go:184] no items to output this cycle
I0319 13:48:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 13:48:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:48:13.409801  543705 memory.go:191] Add success.
I0319 13:48:13.409803  543705 cpu.go:282] Add success.
W0319 13:48:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:48:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:48:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:48:13.420215  543705 net.go:648] Add success.
I0319 13:48:13.422978  543705 net.go:770] primary dev: ETH0
I0319 13:48:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:48:13.423003  543705 net.go:698] Add success.
I0319 13:48:13.464058  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cc6d80eb-0932-4ccf-9002-4e0f972051e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:48:13.464091  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:48:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:48:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:48:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 13:48:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:48:14.456513  543705 disk_worker.go:494] system disk:vda1
I0319 13:48:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:48:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:48:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:48:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:48:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:48:16.472413  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:48:19.442906  543705 disk_info.go:125] begin check local disk info of client
I0319 13:48:19.445265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:48:19.445270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001cccc0 0xc0001ccd00]
E0319 13:48:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:48:23.409789  543705 memory.go:184] no items to output this cycle
I0319 13:48:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:48:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:48:33.409826  543705 memory.go:184] no items to output this cycle
I0319 13:48:33.409836  543705 cpu.go:275] no items to output this cycle
I0319 13:48:37.725315  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:48:37.725322  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:48:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:48:43.410782  543705 memory.go:191] Add success.
I0319 13:48:43.409821  543705 cpu.go:282] Add success.
I0319 13:48:43.420301  543705 net.go:770] primary dev: ETH0
I0319 13:48:43.420314  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:48:43.420327  543705 net.go:698] Add success.
I0319 13:48:43.420674  543705 net.go:648] Add success.
I0319 13:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:48:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:48:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:48:53.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:48:53.410280  543705 memory.go:184] no items to output this cycle
I0319 13:48:53.410297  543705 cpu.go:275] no items to output this cycle
E0319 13:49:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:49:03.409764  543705 memory.go:184] no items to output this cycle
I0319 13:49:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 13:49:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:49:13.409819  543705 memory.go:191] Add success.
I0319 13:49:13.409827  543705 cpu.go:282] Add success.
W0319 13:49:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:49:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:49:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:49:13.420187  543705 net.go:648] Add success.
I0319 13:49:13.422987  543705 net.go:770] primary dev: ETH0
I0319 13:49:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:49:13.423013  543705 net.go:698] Add success.
I0319 13:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:49:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:49:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 13:49:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:49:14.456564  543705 disk_worker.go:494] system disk:vda1
I0319 13:49:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:49:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:49:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:49:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:49:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:49:19.445919  543705 disk_info.go:125] begin check local disk info of client
I0319 13:49:19.448278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:49:19.448284  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252440 0xc000252480]
E0319 13:49:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:49:23.409788  543705 memory.go:184] no items to output this cycle
I0319 13:49:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:49:33.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:49:33.409894  543705 memory.go:184] no items to output this cycle
I0319 13:49:33.410022  543705 cpu.go:275] no items to output this cycle
E0319 13:49:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:49:43.409825  543705 memory.go:191] Add success.
I0319 13:49:43.409836  543705 cpu.go:282] Add success.
I0319 13:49:43.419917  543705 net.go:648] Add success.
I0319 13:49:43.422770  543705 net.go:770] primary dev: ETH0
I0319 13:49:43.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:49:43.422794  543705 net.go:698] Add success.
I0319 13:49:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:49:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:49:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:49:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:49:53.409766  543705 memory.go:184] no items to output this cycle
I0319 13:49:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 13:50:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:50:03.409796  543705 memory.go:184] no items to output this cycle
I0319 13:50:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 13:50:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:50:13.409827  543705 memory.go:191] Add success.
I0319 13:50:13.409830  543705 cpu.go:282] Add success.
W0319 13:50:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:50:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:50:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:50:13.420155  543705 net.go:648] Add success.
I0319 13:50:13.423239  543705 net.go:770] primary dev: ETH0
I0319 13:50:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:50:13.423264  543705 net.go:698] Add success.
I0319 13:50:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:50:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:50:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0319 13:50:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:50:14.456513  543705 disk_worker.go:494] system disk:vda1
I0319 13:50:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:50:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:50:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:50:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:50:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:50:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:50:19.448938  543705 disk_info.go:125] begin check local disk info of client
I0319 13:50:19.451373  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:50:19.451379  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028b0c0 0xc00028b100]
E0319 13:50:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:50:23.409794  543705 memory.go:184] no items to output this cycle
I0319 13:50:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 13:50:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:50:33.409788  543705 memory.go:184] no items to output this cycle
I0319 13:50:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 13:50:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:50:43.409827  543705 memory.go:191] Add success.
I0319 13:50:43.409830  543705 cpu.go:282] Add success.
I0319 13:50:43.420060  543705 net.go:648] Add success.
I0319 13:50:43.423012  543705 net.go:770] primary dev: ETH0
I0319 13:50:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:50:43.423037  543705 net.go:698] Add success.
I0319 13:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:50:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:50:53.409799  543705 memory.go:184] no items to output this cycle
I0319 13:50:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:51:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:51:03.409777  543705 memory.go:184] no items to output this cycle
I0319 13:51:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 13:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:51:13.409801  543705 memory.go:191] Add success.
I0319 13:51:13.409803  543705 cpu.go:282] Add success.
W0319 13:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:51:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:51:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:51:13.420150  543705 net.go:648] Add success.
I0319 13:51:13.422694  543705 net.go:770] primary dev: ETH0
I0319 13:51:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:51:13.422724  543705 net.go:698] Add success.
I0319 13:51:13.473748  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd531098-b312-43ea-b9ae-26555b27b2e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:51:13.473779  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:51:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:51:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 13:51:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:51:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 13:51:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:51:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:51:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:51:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:51:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:51:19.451942  543705 disk_info.go:125] begin check local disk info of client
I0319 13:51:19.454356  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:51:19.454362  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6180 0xc0002a61c0]
E0319 13:51:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:51:23.409791  543705 memory.go:184] no items to output this cycle
I0319 13:51:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 13:51:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:51:33.409797  543705 memory.go:184] no items to output this cycle
I0319 13:51:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 13:51:37.725739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:51:37.725746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:51:43.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:51:43.410785  543705 memory.go:191] Add success.
I0319 13:51:43.409995  543705 cpu.go:282] Add success.
I0319 13:51:43.419730  543705 net.go:648] Add success.
I0319 13:51:43.422424  543705 net.go:770] primary dev: ETH0
I0319 13:51:43.422437  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:51:43.422449  543705 net.go:698] Add success.
I0319 13:51:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:51:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:51:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:51:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:51:53.409765  543705 memory.go:184] no items to output this cycle
I0319 13:51:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 13:52:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:52:03.409795  543705 memory.go:184] no items to output this cycle
I0319 13:52:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 13:52:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:52:13.409798  543705 memory.go:191] Add success.
I0319 13:52:13.409802  543705 cpu.go:282] Add success.
W0319 13:52:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:52:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:52:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:52:13.420072  543705 net.go:648] Add success.
I0319 13:52:13.423089  543705 net.go:770] primary dev: ETH0
I0319 13:52:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:52:13.423117  543705 net.go:698] Add success.
W0319 13:52:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:52:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 13:52:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:52:14.455872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:52:14.455880  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:52:14.455886  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:52:14.456569  543705 disk_worker.go:494] system disk:vda1
I0319 13:52:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:52:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:52:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:52:16.457899  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:52:16.457899  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:52:16.457957  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:52:16.457976  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:52:16.472288  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:52:19.454961  543705 disk_info.go:125] begin check local disk info of client
I0319 13:52:19.457289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:52:19.457295  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028a140 0xc00028a180]
E0319 13:52:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:52:23.409800  543705 memory.go:184] no items to output this cycle
I0319 13:52:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:52:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:52:33.409807  543705 memory.go:184] no items to output this cycle
I0319 13:52:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 13:52:43.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:52:43.409936  543705 memory.go:191] Add success.
I0319 13:52:43.409945  543705 cpu.go:282] Add success.
I0319 13:52:43.419714  543705 net.go:648] Add success.
I0319 13:52:43.422444  543705 net.go:770] primary dev: ETH0
I0319 13:52:43.422457  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:52:43.422468  543705 net.go:698] Add success.
I0319 13:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:52:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:52:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:52:53.409800  543705 memory.go:184] no items to output this cycle
I0319 13:52:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:53:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:53:03.409769  543705 memory.go:184] no items to output this cycle
I0319 13:53:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 13:53:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:53:13.409823  543705 memory.go:191] Add success.
I0319 13:53:13.409826  543705 cpu.go:282] Add success.
W0319 13:53:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:53:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:53:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:53:13.420238  543705 net.go:648] Add success.
I0319 13:53:13.423589  543705 net.go:770] primary dev: ETH0
I0319 13:53:13.423604  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:53:13.423616  543705 net.go:698] Add success.
I0319 13:53:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:53:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:53:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 13:53:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:53:14.456581  543705 disk_worker.go:494] system disk:vda1
I0319 13:53:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:53:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:53:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:53:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:53:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:53:19.457972  543705 disk_info.go:125] begin check local disk info of client
I0319 13:53:19.460329  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:53:19.460336  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a3440 0xc0004a3480]
E0319 13:53:23.410025  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:53:23.410041  543705 memory.go:184] no items to output this cycle
I0319 13:53:23.410116  543705 cpu.go:275] no items to output this cycle
E0319 13:53:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:53:33.409778  543705 memory.go:184] no items to output this cycle
I0319 13:53:33.409781  543705 cpu.go:275] no items to output this cycle
E0319 13:53:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:53:43.409798  543705 memory.go:191] Add success.
I0319 13:53:43.409799  543705 cpu.go:282] Add success.
I0319 13:53:43.420149  543705 net.go:648] Add success.
I0319 13:53:43.422980  543705 net.go:770] primary dev: ETH0
I0319 13:53:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:53:43.423006  543705 net.go:698] Add success.
I0319 13:53:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:53:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:53:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:53:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:53:53.409800  543705 memory.go:184] no items to output this cycle
I0319 13:53:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 13:54:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:54:03.409774  543705 memory.go:184] no items to output this cycle
I0319 13:54:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 13:54:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:54:13.409800  543705 memory.go:191] Add success.
I0319 13:54:13.409806  543705 cpu.go:282] Add success.
W0319 13:54:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:54:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:54:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:54:13.420145  543705 net.go:648] Add success.
I0319 13:54:13.423047  543705 net.go:770] primary dev: ETH0
I0319 13:54:13.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:54:13.423073  543705 net.go:698] Add success.
I0319 13:54:13.464069  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"125aaa0a-4311-47d3-821c-4cd77b89ed51","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:54:13.464105  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 13:54:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:54:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:54:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 13:54:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:54:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 13:54:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:54:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:54:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:54:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:54:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:54:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:54:19.460807  543705 disk_info.go:125] begin check local disk info of client
I0319 13:54:19.463176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:54:19.463182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005323c0 0xc000532400]
E0319 13:54:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:54:23.409796  543705 memory.go:184] no items to output this cycle
I0319 13:54:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 13:54:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:54:33.409813  543705 memory.go:184] no items to output this cycle
I0319 13:54:33.409829  543705 cpu.go:275] no items to output this cycle
I0319 13:54:37.727337  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:54:37.727345  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:54:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:54:43.410696  543705 memory.go:191] Add success.
I0319 13:54:43.409824  543705 cpu.go:282] Add success.
I0319 13:54:43.420682  543705 net.go:648] Add success.
I0319 13:54:43.423382  543705 net.go:770] primary dev: ETH0
I0319 13:54:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:54:43.423408  543705 net.go:698] Add success.
I0319 13:54:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:54:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:54:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:54:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:54:53.409798  543705 memory.go:184] no items to output this cycle
I0319 13:54:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 13:55:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:55:03.409777  543705 memory.go:184] no items to output this cycle
I0319 13:55:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 13:55:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:55:13.409826  543705 memory.go:191] Add success.
I0319 13:55:13.409831  543705 cpu.go:282] Add success.
W0319 13:55:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:55:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:55:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:55:13.420120  543705 net.go:648] Add success.
I0319 13:55:13.422896  543705 net.go:770] primary dev: ETH0
I0319 13:55:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:55:13.422920  543705 net.go:698] Add success.
I0319 13:55:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:55:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:55:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 13:55:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:55:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 13:55:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:55:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:55:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:55:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:55:19.464003  543705 disk_info.go:125] begin check local disk info of client
I0319 13:55:19.466386  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:55:19.466392  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b740 0xc00007b780]
E0319 13:55:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:55:23.409793  543705 memory.go:184] no items to output this cycle
I0319 13:55:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 13:55:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:55:33.409781  543705 memory.go:184] no items to output this cycle
I0319 13:55:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 13:55:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:55:43.409792  543705 memory.go:191] Add success.
I0319 13:55:43.409819  543705 cpu.go:282] Add success.
I0319 13:55:43.419992  543705 net.go:648] Add success.
I0319 13:55:43.422888  543705 net.go:770] primary dev: ETH0
I0319 13:55:43.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:55:43.422913  543705 net.go:698] Add success.
I0319 13:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:55:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:55:53.410428  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:55:53.410503  543705 memory.go:184] no items to output this cycle
I0319 13:55:53.410566  543705 cpu.go:275] no items to output this cycle
E0319 13:56:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:56:03.409784  543705 memory.go:184] no items to output this cycle
I0319 13:56:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:56:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:56:13.409845  543705 memory.go:191] Add success.
I0319 13:56:13.409846  543705 cpu.go:282] Add success.
W0319 13:56:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:56:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:56:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:56:13.420292  543705 net.go:648] Add success.
I0319 13:56:13.423062  543705 net.go:770] primary dev: ETH0
I0319 13:56:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:56:13.423092  543705 net.go:698] Add success.
I0319 13:56:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:56:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:56:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 13:56:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:56:14.456493  543705 disk_worker.go:494] system disk:vda1
I0319 13:56:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:56:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:56:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:56:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:56:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:56:19.467084  543705 disk_info.go:125] begin check local disk info of client
I0319 13:56:19.469457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:56:19.469463  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498240 0xc000498280]
E0319 13:56:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:56:23.409808  543705 memory.go:184] no items to output this cycle
I0319 13:56:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 13:56:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:56:33.409789  543705 memory.go:184] no items to output this cycle
I0319 13:56:33.409862  543705 cpu.go:275] no items to output this cycle
E0319 13:56:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:56:43.409842  543705 memory.go:191] Add success.
I0319 13:56:43.409847  543705 cpu.go:282] Add success.
I0319 13:56:43.420072  543705 net.go:648] Add success.
I0319 13:56:43.422684  543705 net.go:770] primary dev: ETH0
I0319 13:56:43.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:56:43.422709  543705 net.go:698] Add success.
I0319 13:56:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:56:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:56:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:56:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:56:53.409868  543705 memory.go:184] no items to output this cycle
I0319 13:56:53.409937  543705 cpu.go:275] no items to output this cycle
E0319 13:57:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:57:03.409776  543705 memory.go:184] no items to output this cycle
I0319 13:57:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:57:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:57:13.409807  543705 memory.go:191] Add success.
I0319 13:57:13.409809  543705 cpu.go:282] Add success.
W0319 13:57:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:57:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:57:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:57:13.420163  543705 net.go:648] Add success.
I0319 13:57:13.423485  543705 net.go:770] primary dev: ETH0
I0319 13:57:13.423497  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:57:13.423509  543705 net.go:698] Add success.
I0319 13:57:13.429943  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 13:57:13.453114  543705 event_worker.go:152] Polling the log file for events...
I0319 13:57:13.468151  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c0363d42-c9b0-452e-a8aa-721c1da4a994","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 13:57:13.468193  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 13:57:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:57:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 13:57:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0319 13:57:14.456790  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 13:57:14.456807  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 13:57:14.456813  543705 custom_config.go:64] query custom config with name: gpu
I0319 13:57:14.456825  543705 disk_worker.go:494] system disk:vda1
I0319 13:57:14.456857  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 13:57:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 13:57:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:57:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 13:57:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 13:57:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:57:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:57:16.472358  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:57:19.470041  543705 disk_info.go:125] begin check local disk info of client
I0319 13:57:19.472419  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:57:19.472425  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499200 0xc000499240]
E0319 13:57:23.410389  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:57:23.410405  543705 memory.go:184] no items to output this cycle
I0319 13:57:23.410436  543705 cpu.go:275] no items to output this cycle
E0319 13:57:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:57:33.409803  543705 memory.go:184] no items to output this cycle
I0319 13:57:33.409816  543705 cpu.go:275] no items to output this cycle
I0319 13:57:37.728345  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 13:57:37.728352  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 13:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:57:43.410582  543705 memory.go:191] Add success.
I0319 13:57:43.409813  543705 cpu.go:282] Add success.
I0319 13:57:43.420256  543705 net.go:648] Add success.
I0319 13:57:43.423029  543705 net.go:770] primary dev: ETH0
I0319 13:57:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:57:43.423053  543705 net.go:698] Add success.
I0319 13:57:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:57:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:57:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:57:53.409766  543705 memory.go:184] no items to output this cycle
I0319 13:57:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:58:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:58:03.409767  543705 memory.go:184] no items to output this cycle
I0319 13:58:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 13:58:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:58:13.409801  543705 memory.go:191] Add success.
I0319 13:58:13.409807  543705 cpu.go:282] Add success.
W0319 13:58:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:58:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:58:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:58:13.420141  543705 net.go:648] Add success.
I0319 13:58:13.423016  543705 net.go:770] primary dev: ETH0
I0319 13:58:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:58:13.423042  543705 net.go:698] Add success.
I0319 13:58:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:58:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:58:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 13:58:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:58:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 13:58:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:58:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:58:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:58:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:58:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:58:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:58:19.472889  543705 disk_info.go:125] begin check local disk info of client
I0319 13:58:19.475304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:58:19.475311  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7cc0 0xc0003b7d00]
E0319 13:58:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:58:23.409762  543705 memory.go:184] no items to output this cycle
I0319 13:58:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 13:58:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:58:33.409776  543705 memory.go:184] no items to output this cycle
I0319 13:58:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 13:58:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:58:43.409801  543705 memory.go:191] Add success.
I0319 13:58:43.409804  543705 cpu.go:282] Add success.
I0319 13:58:43.419871  543705 net.go:648] Add success.
I0319 13:58:43.422586  543705 net.go:770] primary dev: ETH0
I0319 13:58:43.422599  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:58:43.422611  543705 net.go:698] Add success.
I0319 13:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:58:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:58:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:58:53.409762  543705 memory.go:184] no items to output this cycle
I0319 13:58:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 13:59:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:59:03.409768  543705 memory.go:184] no items to output this cycle
I0319 13:59:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 13:59:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:59:13.409807  543705 memory.go:191] Add success.
I0319 13:59:13.409807  543705 cpu.go:282] Add success.
W0319 13:59:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 13:59:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 13:59:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 13:59:13.420190  543705 net.go:648] Add success.
I0319 13:59:13.423080  543705 net.go:770] primary dev: ETH0
I0319 13:59:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:59:13.423104  543705 net.go:698] Add success.
I0319 13:59:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 13:59:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 13:59:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 13:59:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 13:59:14.456591  543705 disk_worker.go:494] system disk:vda1
I0319 13:59:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 13:59:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 13:59:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:59:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:59:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 13:59:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 13:59:19.476123  543705 disk_info.go:125] begin check local disk info of client
I0319 13:59:19.478587  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 13:59:19.478594  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329080 0xc0003290c0]
E0319 13:59:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:59:23.409758  543705 memory.go:184] no items to output this cycle
I0319 13:59:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 13:59:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:59:33.409784  543705 memory.go:184] no items to output this cycle
I0319 13:59:33.409789  543705 cpu.go:275] no items to output this cycle
E0319 13:59:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:59:43.409817  543705 memory.go:191] Add success.
I0319 13:59:43.409820  543705 cpu.go:282] Add success.
I0319 13:59:43.419992  543705 net.go:648] Add success.
I0319 13:59:43.423048  543705 net.go:770] primary dev: ETH0
I0319 13:59:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0319 13:59:43.423073  543705 net.go:698] Add success.
I0319 13:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 13:59:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 13:59:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0319 13:59:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 13:59:53.409777  543705 memory.go:184] no items to output this cycle
I0319 13:59:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 14:00:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:00:03.409765  543705 memory.go:184] no items to output this cycle
I0319 14:00:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 14:00:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:00:13.409818  543705 memory.go:191] Add success.
I0319 14:00:13.409836  543705 cpu.go:282] Add success.
W0319 14:00:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:00:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:00:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:00:13.420514  543705 net.go:648] Add success.
I0319 14:00:13.423450  543705 net.go:770] primary dev: ETH0
I0319 14:00:13.423465  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:00:13.423478  543705 net.go:698] Add success.
I0319 14:00:13.467826  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4746de8d-b88d-4912-b0f7-123f6208adc6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:00:13.467862  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:00:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:00:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:00:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 14:00:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:00:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 14:00:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:00:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:00:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:00:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:00:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:00:19.479082  543705 disk_info.go:125] begin check local disk info of client
I0319 14:00:19.481463  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:00:19.481469  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c49c0 0xc0000c4a00]
E0319 14:00:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:00:23.409792  543705 memory.go:184] no items to output this cycle
I0319 14:00:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:00:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:00:33.409779  543705 memory.go:184] no items to output this cycle
I0319 14:00:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 14:00:37.729337  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:00:37.729345  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:00:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:00:43.410673  543705 memory.go:191] Add success.
I0319 14:00:43.409807  543705 cpu.go:282] Add success.
I0319 14:00:43.420376  543705 net.go:648] Add success.
I0319 14:00:43.423513  543705 net.go:770] primary dev: ETH0
I0319 14:00:43.423525  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:00:43.423537  543705 net.go:698] Add success.
I0319 14:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:00:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:00:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:00:53.410475  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:00:53.410493  543705 memory.go:184] no items to output this cycle
I0319 14:00:53.410507  543705 cpu.go:275] no items to output this cycle
E0319 14:01:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:01:03.409761  543705 memory.go:184] no items to output this cycle
I0319 14:01:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:01:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:01:13.409817  543705 memory.go:191] Add success.
I0319 14:01:13.409817  543705 cpu.go:282] Add success.
W0319 14:01:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:01:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:01:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:01:13.420190  543705 net.go:648] Add success.
I0319 14:01:13.422910  543705 net.go:770] primary dev: ETH0
I0319 14:01:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:01:13.422935  543705 net.go:698] Add success.
I0319 14:01:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:01:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:01:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 14:01:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:01:14.456559  543705 disk_worker.go:494] system disk:vda1
I0319 14:01:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:01:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:01:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:01:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:01:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:01:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:01:19.482099  543705 disk_info.go:125] begin check local disk info of client
I0319 14:01:19.484500  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:01:19.484506  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c80 0xc0000c4cc0]
E0319 14:01:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:01:23.409763  543705 memory.go:184] no items to output this cycle
I0319 14:01:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 14:01:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:01:33.409780  543705 memory.go:184] no items to output this cycle
I0319 14:01:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:01:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:01:43.409800  543705 memory.go:191] Add success.
I0319 14:01:43.409802  543705 cpu.go:282] Add success.
I0319 14:01:43.420177  543705 net.go:648] Add success.
I0319 14:01:43.423078  543705 net.go:770] primary dev: ETH0
I0319 14:01:43.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:01:43.423105  543705 net.go:698] Add success.
I0319 14:01:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:01:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:01:53.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:01:53.409867  543705 memory.go:184] no items to output this cycle
I0319 14:01:53.409959  543705 cpu.go:275] no items to output this cycle
E0319 14:02:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:02:03.409772  543705 memory.go:184] no items to output this cycle
I0319 14:02:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 14:02:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:02:13.409790  543705 memory.go:191] Add success.
W0319 14:02:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:02:13.409823  543705 cpu.go:282] Add success.
W0319 14:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:02:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:02:13.420156  543705 net.go:648] Add success.
I0319 14:02:13.422701  543705 net.go:770] primary dev: ETH0
I0319 14:02:13.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:02:13.422732  543705 net.go:698] Add success.
W0319 14:02:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:02:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 14:02:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:02:14.456928  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:02:14.456937  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:02:14.456943  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:02:14.457005  543705 disk_worker.go:494] system disk:vda1
I0319 14:02:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:02:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:02:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:02:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:02:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:02:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:02:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:02:16.472299  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:02:19.485116  543705 disk_info.go:125] begin check local disk info of client
I0319 14:02:19.487465  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:02:19.487470  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5900 0xc0000c5940]
E0319 14:02:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:02:23.409795  543705 memory.go:184] no items to output this cycle
I0319 14:02:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 14:02:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:02:33.409785  543705 memory.go:184] no items to output this cycle
I0319 14:02:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 14:02:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:02:43.409802  543705 memory.go:191] Add success.
I0319 14:02:43.409803  543705 cpu.go:282] Add success.
I0319 14:02:43.420012  543705 net.go:648] Add success.
I0319 14:02:43.422526  543705 net.go:770] primary dev: ETH0
I0319 14:02:43.422540  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:02:43.422553  543705 net.go:698] Add success.
I0319 14:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:02:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:02:53.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:02:53.409850  543705 memory.go:184] no items to output this cycle
I0319 14:02:53.409932  543705 cpu.go:275] no items to output this cycle
E0319 14:03:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:03:03.409766  543705 memory.go:184] no items to output this cycle
I0319 14:03:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 14:03:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:03:13.409827  543705 memory.go:191] Add success.
I0319 14:03:13.409837  543705 cpu.go:282] Add success.
W0319 14:03:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:03:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:03:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:03:13.420237  543705 net.go:648] Add success.
I0319 14:03:13.423041  543705 net.go:770] primary dev: ETH0
I0319 14:03:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:03:13.423066  543705 net.go:698] Add success.
I0319 14:03:13.469405  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fef43cf8-4feb-4ec7-a269-03e9ef9d8e2b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:03:13.469436  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:03:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:03:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:03:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 14:03:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:03:14.456518  543705 disk_worker.go:494] system disk:vda1
I0319 14:03:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:03:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:03:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:03:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:03:16.472498  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:03:19.488130  543705 disk_info.go:125] begin check local disk info of client
I0319 14:03:19.490559  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:03:19.490565  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329a00 0xc000329d80]
E0319 14:03:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:03:23.409778  543705 memory.go:184] no items to output this cycle
I0319 14:03:23.409782  543705 cpu.go:275] no items to output this cycle
E0319 14:03:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:03:33.409804  543705 memory.go:184] no items to output this cycle
I0319 14:03:33.409817  543705 cpu.go:275] no items to output this cycle
I0319 14:03:37.729733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:03:37.729740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:03:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:03:43.411158  543705 memory.go:191] Add success.
I0319 14:03:43.409826  543705 cpu.go:282] Add success.
I0319 14:03:43.419830  543705 net.go:648] Add success.
I0319 14:03:43.422742  543705 net.go:770] primary dev: ETH0
I0319 14:03:43.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:03:43.422770  543705 net.go:698] Add success.
I0319 14:03:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:03:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:03:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:03:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:03:53.409767  543705 memory.go:184] no items to output this cycle
I0319 14:03:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 14:04:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:04:03.409805  543705 memory.go:184] no items to output this cycle
I0319 14:04:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 14:04:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:04:13.409799  543705 memory.go:191] Add success.
W0319 14:04:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:04:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:04:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:04:13.409843  543705 cpu.go:282] Add success.
I0319 14:04:13.420389  543705 net.go:648] Add success.
I0319 14:04:13.423139  543705 net.go:770] primary dev: ETH0
I0319 14:04:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:04:13.423169  543705 net.go:698] Add success.
I0319 14:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:04:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:04:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 14:04:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:04:14.456588  543705 disk_worker.go:494] system disk:vda1
I0319 14:04:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:04:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:04:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:04:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:04:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:04:19.491138  543705 disk_info.go:125] begin check local disk info of client
I0319 14:04:19.493559  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:04:19.493566  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052be40 0xc00052be80]
E0319 14:04:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:04:23.409790  543705 memory.go:184] no items to output this cycle
I0319 14:04:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:04:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:04:33.409814  543705 memory.go:184] no items to output this cycle
I0319 14:04:33.409826  543705 cpu.go:275] no items to output this cycle
E0319 14:04:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:04:43.409785  543705 memory.go:191] Add success.
I0319 14:04:43.409819  543705 cpu.go:282] Add success.
I0319 14:04:43.419912  543705 net.go:648] Add success.
I0319 14:04:43.422806  543705 net.go:770] primary dev: ETH0
I0319 14:04:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:04:43.422836  543705 net.go:698] Add success.
I0319 14:04:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:04:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:04:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:04:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:04:53.409838  543705 memory.go:184] no items to output this cycle
I0319 14:04:53.409938  543705 cpu.go:275] no items to output this cycle
E0319 14:05:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:05:03.409789  543705 memory.go:184] no items to output this cycle
I0319 14:05:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 14:05:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:05:13.409806  543705 memory.go:191] Add success.
I0319 14:05:13.409808  543705 cpu.go:282] Add success.
W0319 14:05:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:05:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:05:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:05:13.420155  543705 net.go:648] Add success.
I0319 14:05:13.422821  543705 net.go:770] primary dev: ETH0
I0319 14:05:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:05:13.422847  543705 net.go:698] Add success.
I0319 14:05:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:05:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:05:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0319 14:05:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:05:14.456638  543705 disk_worker.go:494] system disk:vda1
I0319 14:05:14.456668  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:05:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:05:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:05:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:05:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:05:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:05:19.494160  543705 disk_info.go:125] begin check local disk info of client
I0319 14:05:19.496538  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:05:19.496544  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6e80 0xc0003b6ec0]
E0319 14:05:23.410217  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:05:23.410235  543705 memory.go:184] no items to output this cycle
I0319 14:05:23.410257  543705 cpu.go:275] no items to output this cycle
E0319 14:05:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:05:33.409810  543705 memory.go:184] no items to output this cycle
I0319 14:05:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 14:05:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:05:43.409821  543705 memory.go:191] Add success.
I0319 14:05:43.409825  543705 cpu.go:282] Add success.
I0319 14:05:43.420066  543705 net.go:648] Add success.
I0319 14:05:43.423008  543705 net.go:770] primary dev: ETH0
I0319 14:05:43.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:05:43.423035  543705 net.go:698] Add success.
I0319 14:05:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:05:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:05:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:05:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:05:53.409803  543705 memory.go:184] no items to output this cycle
I0319 14:05:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 14:06:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:06:03.409773  543705 memory.go:184] no items to output this cycle
I0319 14:06:03.409778  543705 cpu.go:275] no items to output this cycle
E0319 14:06:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:06:13.409801  543705 memory.go:191] Add success.
I0319 14:06:13.409823  543705 cpu.go:282] Add success.
W0319 14:06:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:06:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:06:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:06:13.420117  543705 net.go:648] Add success.
I0319 14:06:13.422869  543705 net.go:770] primary dev: ETH0
I0319 14:06:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:06:13.422893  543705 net.go:698] Add success.
I0319 14:06:13.468512  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e53133b4-5a6b-4ea1-a542-c418ee8bd446","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:06:13.468546  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:06:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:06:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:06:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0319 14:06:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:06:14.456537  543705 disk_worker.go:494] system disk:vda1
I0319 14:06:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:06:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:06:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:06:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:06:19.497172  543705 disk_info.go:125] begin check local disk info of client
I0319 14:06:19.499556  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:06:19.499562  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6880 0xc0003b68c0]
E0319 14:06:23.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:06:23.410265  543705 memory.go:184] no items to output this cycle
I0319 14:06:23.410298  543705 cpu.go:275] no items to output this cycle
E0319 14:06:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:06:33.409766  543705 memory.go:184] no items to output this cycle
I0319 14:06:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 14:06:37.731367  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:06:37.731375  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:06:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:06:43.410766  543705 memory.go:191] Add success.
I0319 14:06:43.409802  543705 cpu.go:282] Add success.
I0319 14:06:43.420482  543705 net.go:648] Add success.
I0319 14:06:43.423544  543705 net.go:770] primary dev: ETH0
I0319 14:06:43.423560  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:06:43.423573  543705 net.go:698] Add success.
I0319 14:06:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:06:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:06:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:06:53.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:06:53.410381  543705 memory.go:184] no items to output this cycle
I0319 14:06:53.410395  543705 cpu.go:275] no items to output this cycle
E0319 14:07:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:07:03.409791  543705 cpu.go:275] no items to output this cycle
I0319 14:07:03.409793  543705 memory.go:184] no items to output this cycle
E0319 14:07:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:07:13.409777  543705 memory.go:191] Add success.
W0319 14:07:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:07:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:07:13.409821  543705 cpu.go:282] Add success.
I0319 14:07:13.420254  543705 net.go:648] Add success.
I0319 14:07:13.423418  543705 net.go:770] primary dev: ETH0
I0319 14:07:13.423433  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:07:13.423447  543705 net.go:698] Add success.
I0319 14:07:13.452971  543705 event_worker.go:152] Polling the log file for events...
W0319 14:07:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:07:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 14:07:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:07:14.456875  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:07:14.456884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:07:14.456890  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:07:14.456974  543705 disk_worker.go:494] system disk:vda1
I0319 14:07:14.457015  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:07:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:07:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:07:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:07:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:07:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:07:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:07:16.472349  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:07:19.499984  543705 disk_info.go:125] begin check local disk info of client
I0319 14:07:19.502427  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:07:19.502433  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7e80 0xc0003b7ec0]
E0319 14:07:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:07:23.409771  543705 memory.go:184] no items to output this cycle
I0319 14:07:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 14:07:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:07:33.409773  543705 memory.go:184] no items to output this cycle
I0319 14:07:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:07:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:07:43.409805  543705 memory.go:191] Add success.
I0319 14:07:43.409830  543705 cpu.go:282] Add success.
I0319 14:07:43.420014  543705 net.go:648] Add success.
I0319 14:07:43.422724  543705 net.go:770] primary dev: ETH0
I0319 14:07:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:07:43.422750  543705 net.go:698] Add success.
I0319 14:07:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:07:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:07:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:07:53.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:07:53.409893  543705 cpu.go:275] no items to output this cycle
I0319 14:07:53.409902  543705 memory.go:184] no items to output this cycle
E0319 14:08:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:08:03.409803  543705 memory.go:184] no items to output this cycle
I0319 14:08:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 14:08:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:08:13.409793  543705 memory.go:191] Add success.
I0319 14:08:13.409794  543705 cpu.go:282] Add success.
W0319 14:08:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:08:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:08:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:08:13.420148  543705 net.go:648] Add success.
I0319 14:08:13.422782  543705 net.go:770] primary dev: ETH0
I0319 14:08:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:08:13.422811  543705 net.go:698] Add success.
I0319 14:08:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:08:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:08:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 14:08:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:08:14.456610  543705 disk_worker.go:494] system disk:vda1
I0319 14:08:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:08:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:08:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:08:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:08:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:08:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:08:19.503206  543705 disk_info.go:125] begin check local disk info of client
I0319 14:08:19.505577  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:08:19.505584  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328e40 0xc000328e80]
E0319 14:08:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:08:23.409806  543705 memory.go:184] no items to output this cycle
I0319 14:08:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 14:08:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:08:33.409779  543705 memory.go:184] no items to output this cycle
I0319 14:08:33.409789  543705 cpu.go:275] no items to output this cycle
E0319 14:08:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:08:43.409798  543705 memory.go:191] Add success.
I0319 14:08:43.409801  543705 cpu.go:282] Add success.
I0319 14:08:43.419927  543705 net.go:648] Add success.
I0319 14:08:43.422345  543705 net.go:770] primary dev: ETH0
I0319 14:08:43.422360  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:08:43.422376  543705 net.go:698] Add success.
I0319 14:08:46.457666  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:08:46.457729  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:08:46.457756  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:08:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:08:53.409765  543705 memory.go:184] no items to output this cycle
I0319 14:08:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 14:09:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:09:03.409786  543705 memory.go:184] no items to output this cycle
I0319 14:09:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 14:09:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:09:13.409789  543705 memory.go:191] Add success.
W0319 14:09:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:09:13.409820  543705 cpu.go:282] Add success.
W0319 14:09:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:09:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:09:13.420161  543705 net.go:648] Add success.
I0319 14:09:13.423234  543705 net.go:770] primary dev: ETH0
I0319 14:09:13.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:09:13.423260  543705 net.go:698] Add success.
I0319 14:09:14.392766  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c4e04c2-9b20-475f-9b13-759e43f45954","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:09:14.392801  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:09:14.454143  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:09:14.454331  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:09:14.454341  543705 disk_worker.go:708] disk space is not compliant
W0319 14:09:14.454343  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:09:14.455683  543705 disk_worker.go:494] system disk:vda1
I0319 14:09:14.455726  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:09:15.455609  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:09:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:09:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:09:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:09:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:09:19.506223  543705 disk_info.go:125] begin check local disk info of client
I0319 14:09:19.508670  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:09:19.508678  543705 disk_info.go:196] parse disk info done, disk is : [0xc000356540 0xc000356580]
E0319 14:09:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:09:23.409802  543705 memory.go:184] no items to output this cycle
I0319 14:09:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 14:09:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:09:33.409774  543705 memory.go:184] no items to output this cycle
I0319 14:09:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 14:09:37.732348  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:09:37.732355  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:09:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:09:43.410557  543705 memory.go:191] Add success.
I0319 14:09:43.409801  543705 cpu.go:282] Add success.
I0319 14:09:43.420287  543705 net.go:648] Add success.
I0319 14:09:43.422959  543705 net.go:770] primary dev: ETH0
I0319 14:09:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:09:43.422985  543705 net.go:698] Add success.
I0319 14:09:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:09:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:09:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:09:53.409777  543705 memory.go:184] no items to output this cycle
I0319 14:09:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 14:10:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:10:03.409777  543705 memory.go:184] no items to output this cycle
I0319 14:10:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 14:10:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:10:13.409812  543705 memory.go:191] Add success.
I0319 14:10:13.409815  543705 cpu.go:282] Add success.
W0319 14:10:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:10:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:10:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:10:13.420075  543705 net.go:648] Add success.
I0319 14:10:13.423144  543705 net.go:770] primary dev: ETH0
I0319 14:10:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:10:13.423171  543705 net.go:698] Add success.
I0319 14:10:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:10:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:10:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 14:10:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:10:14.456520  543705 disk_worker.go:494] system disk:vda1
I0319 14:10:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:10:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:10:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:10:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:10:16.472445  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:10:19.508762  543705 disk_info.go:125] begin check local disk info of client
I0319 14:10:19.511227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:10:19.511234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7340 0xc0003b7380]
E0319 14:10:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:10:23.409767  543705 memory.go:184] no items to output this cycle
I0319 14:10:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 14:10:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:10:33.409787  543705 memory.go:184] no items to output this cycle
I0319 14:10:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 14:10:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:10:43.409802  543705 memory.go:191] Add success.
I0319 14:10:43.409803  543705 cpu.go:282] Add success.
I0319 14:10:43.420058  543705 net.go:648] Add success.
I0319 14:10:43.422951  543705 net.go:770] primary dev: ETH0
I0319 14:10:43.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:10:43.422977  543705 net.go:698] Add success.
I0319 14:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:10:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:10:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:10:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:10:53.409777  543705 memory.go:184] no items to output this cycle
I0319 14:10:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:11:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:11:03.409810  543705 memory.go:184] no items to output this cycle
I0319 14:11:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 14:11:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:11:13.409850  543705 memory.go:191] Add success.
I0319 14:11:13.409853  543705 cpu.go:282] Add success.
W0319 14:11:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:11:13.409898  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:11:13.409902  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:11:13.420241  543705 net.go:648] Add success.
I0319 14:11:13.423062  543705 net.go:770] primary dev: ETH0
I0319 14:11:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:11:13.423091  543705 net.go:698] Add success.
I0319 14:11:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:11:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:11:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 14:11:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:11:14.456498  543705 disk_worker.go:494] system disk:vda1
I0319 14:11:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:11:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:11:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:11:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:11:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:11:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:11:19.512257  543705 disk_info.go:125] begin check local disk info of client
I0319 14:11:19.514715  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:11:19.514722  543705 disk_info.go:196] parse disk info done, disk is : [0xc000356ac0 0xc000356b00]
E0319 14:11:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:11:23.409797  543705 memory.go:184] no items to output this cycle
I0319 14:11:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 14:11:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:11:33.409766  543705 memory.go:184] no items to output this cycle
I0319 14:11:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 14:11:43.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:11:43.409847  543705 memory.go:191] Add success.
I0319 14:11:43.409847  543705 cpu.go:282] Add success.
I0319 14:11:43.419982  543705 net.go:648] Add success.
I0319 14:11:43.422947  543705 net.go:770] primary dev: ETH0
I0319 14:11:43.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:11:43.422975  543705 net.go:698] Add success.
I0319 14:11:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:11:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:11:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:11:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:11:53.409783  543705 memory.go:184] no items to output this cycle
I0319 14:11:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 14:12:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:12:03.409804  543705 memory.go:184] no items to output this cycle
I0319 14:12:03.409834  543705 cpu.go:275] no items to output this cycle
E0319 14:12:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:12:13.409787  543705 memory.go:191] Add success.
W0319 14:12:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:12:13.409813  543705 cpu.go:282] Add success.
W0319 14:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:12:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:12:13.419717  543705 net.go:648] Add success.
I0319 14:12:13.422849  543705 net.go:770] primary dev: ETH0
I0319 14:12:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:12:13.422878  543705 net.go:698] Add success.
I0319 14:12:13.527713  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84534fd2-1122-4215-8895-696e4549e61d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:12:13.527744  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 14:12:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:12:14.455242  543705 disk_worker.go:708] disk space is not compliant
W0319 14:12:14.455246  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:12:14.455923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:12:14.455932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:12:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:12:14.456855  543705 disk_worker.go:494] system disk:vda1
I0319 14:12:14.456884  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:12:15.456778  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:12:15.456787  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:12:16.458013  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:12:16.458013  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:12:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:12:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:12:16.472448  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:12:19.514805  543705 disk_info.go:125] begin check local disk info of client
I0319 14:12:19.517325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:12:19.517333  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dce40 0xc0004dce80]
E0319 14:12:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:12:23.409767  543705 memory.go:184] no items to output this cycle
I0319 14:12:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:12:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:12:33.409798  543705 memory.go:184] no items to output this cycle
I0319 14:12:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 14:12:37.733361  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:12:37.733369  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:12:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:12:43.410584  543705 memory.go:191] Add success.
I0319 14:12:43.409823  543705 cpu.go:282] Add success.
I0319 14:12:43.420346  543705 net.go:648] Add success.
I0319 14:12:43.422859  543705 net.go:770] primary dev: ETH0
I0319 14:12:43.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:12:43.422898  543705 net.go:698] Add success.
I0319 14:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:12:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:12:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:12:53.409776  543705 memory.go:184] no items to output this cycle
I0319 14:12:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 14:13:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:13:03.409776  543705 memory.go:184] no items to output this cycle
I0319 14:13:03.409778  543705 cpu.go:275] no items to output this cycle
E0319 14:13:13.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:13:13.409918  543705 memory.go:191] Add success.
W0319 14:13:13.409960  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:13:13.409979  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:13:13.409982  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:13:13.410026  543705 cpu.go:282] Add success.
I0319 14:13:13.419738  543705 net.go:648] Add success.
I0319 14:13:13.422719  543705 net.go:770] primary dev: ETH0
I0319 14:13:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:13:13.422747  543705 net.go:698] Add success.
I0319 14:13:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:13:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:13:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0319 14:13:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:13:14.456585  543705 disk_worker.go:494] system disk:vda1
I0319 14:13:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:13:15.456026  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:13:16.458054  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:13:16.458121  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:13:16.458150  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:13:16.472547  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:13:19.518297  543705 disk_info.go:125] begin check local disk info of client
I0319 14:13:19.520756  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:13:19.520762  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1680 0xc0002a16c0]
E0319 14:13:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:13:23.409796  543705 memory.go:184] no items to output this cycle
I0319 14:13:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 14:13:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:13:33.409781  543705 memory.go:184] no items to output this cycle
I0319 14:13:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 14:13:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:13:43.409825  543705 memory.go:191] Add success.
I0319 14:13:43.409836  543705 cpu.go:282] Add success.
I0319 14:13:43.420134  543705 net.go:648] Add success.
I0319 14:13:43.423028  543705 net.go:770] primary dev: ETH0
I0319 14:13:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:13:43.423055  543705 net.go:698] Add success.
I0319 14:13:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:13:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:13:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:13:53.409775  543705 memory.go:184] no items to output this cycle
I0319 14:13:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 14:14:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:14:03.409762  543705 memory.go:184] no items to output this cycle
I0319 14:14:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 14:14:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:14:13.409913  543705 cpu.go:282] Add success.
I0319 14:14:13.409917  543705 memory.go:191] Add success.
W0319 14:14:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:14:13.409978  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:14:13.409981  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:14:13.419753  543705 net.go:648] Add success.
I0319 14:14:13.422916  543705 net.go:770] primary dev: ETH0
I0319 14:14:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:14:13.422942  543705 net.go:698] Add success.
I0319 14:14:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:14:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:14:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 14:14:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:14:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 14:14:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:14:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:14:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:14:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:14:16.472442  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:14:19.521302  543705 disk_info.go:125] begin check local disk info of client
I0319 14:14:19.523793  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:14:19.523799  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7780 0xc0003b77c0]
E0319 14:14:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:14:23.409769  543705 memory.go:184] no items to output this cycle
I0319 14:14:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 14:14:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:14:33.409761  543705 memory.go:184] no items to output this cycle
I0319 14:14:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 14:14:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:14:43.409796  543705 memory.go:191] Add success.
I0319 14:14:43.409797  543705 cpu.go:282] Add success.
I0319 14:14:43.419865  543705 net.go:648] Add success.
I0319 14:14:43.422534  543705 net.go:770] primary dev: ETH0
I0319 14:14:43.422549  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:14:43.422560  543705 net.go:698] Add success.
I0319 14:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:14:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:14:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:14:53.409778  543705 memory.go:184] no items to output this cycle
I0319 14:14:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 14:15:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:15:03.409799  543705 memory.go:184] no items to output this cycle
I0319 14:15:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:15:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:15:13.409787  543705 memory.go:191] Add success.
W0319 14:15:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:15:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:15:13.409837  543705 cpu.go:282] Add success.
I0319 14:15:13.420152  543705 net.go:770] primary dev: ETH0
I0319 14:15:13.420166  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:15:13.420180  543705 net.go:698] Add success.
I0319 14:15:13.420498  543705 net.go:648] Add success.
I0319 14:15:13.469232  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6b89d40-b7ec-4288-be59-fa7ba1bbf83c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:15:13.469263  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:15:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:15:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 14:15:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:15:14.456538  543705 disk_worker.go:494] system disk:vda1
I0319 14:15:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:15:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:15:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:15:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:15:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:15:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:15:19.524262  543705 disk_info.go:125] begin check local disk info of client
I0319 14:15:19.526669  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:15:19.526676  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d4c0 0xc00039d500]
E0319 14:15:23.410212  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:15:23.410228  543705 memory.go:184] no items to output this cycle
I0319 14:15:23.410237  543705 cpu.go:275] no items to output this cycle
E0319 14:15:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:15:33.409777  543705 memory.go:184] no items to output this cycle
I0319 14:15:33.409778  543705 cpu.go:275] no items to output this cycle
I0319 14:15:37.733733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:15:37.733739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:15:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:15:43.410594  543705 memory.go:191] Add success.
I0319 14:15:43.409814  543705 cpu.go:282] Add success.
I0319 14:15:43.420301  543705 net.go:648] Add success.
I0319 14:15:43.423271  543705 net.go:770] primary dev: ETH0
I0319 14:15:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:15:43.423296  543705 net.go:698] Add success.
I0319 14:15:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:15:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:15:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:15:53.409785  543705 memory.go:184] no items to output this cycle
I0319 14:15:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 14:16:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:16:03.409773  543705 memory.go:184] no items to output this cycle
I0319 14:16:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 14:16:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:16:13.409797  543705 memory.go:191] Add success.
I0319 14:16:13.409798  543705 cpu.go:282] Add success.
W0319 14:16:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:16:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:16:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:16:13.419727  543705 net.go:648] Add success.
I0319 14:16:13.422314  543705 net.go:770] primary dev: ETH0
I0319 14:16:13.422327  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:16:13.422339  543705 net.go:698] Add success.
I0319 14:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:16:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:16:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 14:16:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:16:14.456497  543705 disk_worker.go:494] system disk:vda1
I0319 14:16:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:16:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:16:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:16:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:16:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:16:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:16:19.527376  543705 disk_info.go:125] begin check local disk info of client
I0319 14:16:19.529772  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:16:19.529778  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0319 14:16:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:16:23.409799  543705 memory.go:184] no items to output this cycle
I0319 14:16:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:16:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:16:33.409771  543705 memory.go:184] no items to output this cycle
I0319 14:16:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 14:16:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:16:43.409826  543705 memory.go:191] Add success.
I0319 14:16:43.409832  543705 cpu.go:282] Add success.
I0319 14:16:43.419895  543705 net.go:648] Add success.
I0319 14:16:43.422850  543705 net.go:770] primary dev: ETH0
I0319 14:16:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:16:43.422880  543705 net.go:698] Add success.
I0319 14:16:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:16:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:16:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:16:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:16:53.409794  543705 memory.go:184] no items to output this cycle
I0319 14:16:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 14:17:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:17:03.409773  543705 memory.go:184] no items to output this cycle
I0319 14:17:03.409782  543705 cpu.go:275] no items to output this cycle
W0319 14:17:13.409712  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:17:13.409734  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:17:13.409741  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:17:13.409829  543705 cpu.go:282] Add success.
E0319 14:17:13.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:17:13.409852  543705 memory.go:191] Add success.
I0319 14:17:13.420378  543705 net.go:648] Add success.
I0319 14:17:13.423362  543705 net.go:770] primary dev: ETH0
I0319 14:17:13.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:17:13.423386  543705 net.go:698] Add success.
I0319 14:17:13.452785  543705 event_worker.go:152] Polling the log file for events...
W0319 14:17:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:17:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 14:17:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:17:14.456767  543705 disk_worker.go:494] system disk:vda1
I0319 14:17:14.456807  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:17:14.456979  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:17:14.456988  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:17:14.456993  543705 custom_config.go:64] query custom config with name: gpu
E0319 14:17:15.456773  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:17:15.456781  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:17:16.457871  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:17:16.457871  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:17:16.457923  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:17:16.457943  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:17:16.472255  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:17:19.530335  543705 disk_info.go:125] begin check local disk info of client
I0319 14:17:19.532560  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:17:19.532566  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394c00 0xc000394c40]
E0319 14:17:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:17:23.409763  543705 memory.go:184] no items to output this cycle
I0319 14:17:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 14:17:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:17:33.409795  543705 memory.go:184] no items to output this cycle
I0319 14:17:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:17:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:17:43.409778  543705 memory.go:191] Add success.
I0319 14:17:43.409802  543705 cpu.go:282] Add success.
I0319 14:17:43.419896  543705 net.go:648] Add success.
I0319 14:17:43.422563  543705 net.go:770] primary dev: ETH0
I0319 14:17:43.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:17:43.422592  543705 net.go:698] Add success.
I0319 14:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:17:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:17:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:17:53.409769  543705 memory.go:184] no items to output this cycle
I0319 14:17:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 14:18:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:18:03.409798  543705 memory.go:184] no items to output this cycle
I0319 14:18:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:18:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:18:13.409785  543705 memory.go:191] Add success.
I0319 14:18:13.409806  543705 cpu.go:282] Add success.
W0319 14:18:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:18:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:18:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:18:13.420241  543705 net.go:648] Add success.
I0319 14:18:13.423231  543705 net.go:770] primary dev: ETH0
I0319 14:18:13.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:18:13.423255  543705 net.go:698] Add success.
I0319 14:18:13.468484  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9161e4d0-68d4-4c5e-8bf4-670f3ccb13bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:18:13.468516  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:18:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:18:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:18:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 14:18:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:18:14.456517  543705 disk_worker.go:494] system disk:vda1
I0319 14:18:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:18:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:18:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:18:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:18:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:18:19.533411  543705 disk_info.go:125] begin check local disk info of client
I0319 14:18:19.535902  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:18:19.535909  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b00 0xc0000c4b40]
E0319 14:18:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:18:23.409780  543705 cpu.go:275] no items to output this cycle
I0319 14:18:23.409785  543705 memory.go:184] no items to output this cycle
E0319 14:18:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:18:33.409780  543705 memory.go:184] no items to output this cycle
I0319 14:18:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 14:18:37.733891  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:18:37.733899  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:18:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:18:43.410603  543705 memory.go:191] Add success.
I0319 14:18:43.409804  543705 cpu.go:282] Add success.
I0319 14:18:43.420335  543705 net.go:648] Add success.
I0319 14:18:43.422984  543705 net.go:770] primary dev: ETH0
I0319 14:18:43.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:18:43.423010  543705 net.go:698] Add success.
I0319 14:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:18:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:18:53.409795  543705 memory.go:184] no items to output this cycle
I0319 14:18:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 14:19:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:19:03.409795  543705 memory.go:184] no items to output this cycle
I0319 14:19:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:19:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:19:13.409783  543705 memory.go:191] Add success.
W0319 14:19:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:19:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:19:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:19:13.409827  543705 cpu.go:282] Add success.
I0319 14:19:13.420426  543705 net.go:648] Add success.
I0319 14:19:13.423279  543705 net.go:770] primary dev: ETH0
I0319 14:19:13.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:19:13.423304  543705 net.go:698] Add success.
I0319 14:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:19:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:19:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 14:19:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:19:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 14:19:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:19:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:19:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:19:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:19:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:19:16.472351  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:19:19.535992  543705 disk_info.go:125] begin check local disk info of client
I0319 14:19:19.538398  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:19:19.538404  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056b380 0xc00056b3c0]
E0319 14:19:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:19:23.409790  543705 memory.go:184] no items to output this cycle
I0319 14:19:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 14:19:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:19:33.409780  543705 memory.go:184] no items to output this cycle
I0319 14:19:33.409801  543705 cpu.go:275] no items to output this cycle
E0319 14:19:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:19:43.409822  543705 memory.go:191] Add success.
I0319 14:19:43.409829  543705 cpu.go:282] Add success.
I0319 14:19:43.419965  543705 net.go:648] Add success.
I0319 14:19:43.422991  543705 net.go:770] primary dev: ETH0
I0319 14:19:43.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:19:43.423018  543705 net.go:698] Add success.
I0319 14:19:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:19:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:19:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:19:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:19:53.409776  543705 memory.go:184] no items to output this cycle
I0319 14:19:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 14:20:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:20:03.409793  543705 memory.go:184] no items to output this cycle
I0319 14:20:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 14:20:13.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:20:13.409911  543705 cpu.go:282] Add success.
I0319 14:20:13.409920  543705 memory.go:191] Add success.
W0319 14:20:13.409954  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:20:13.409992  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:20:13.409998  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:20:13.419748  543705 net.go:648] Add success.
I0319 14:20:13.422693  543705 net.go:770] primary dev: ETH0
I0319 14:20:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:20:13.422721  543705 net.go:698] Add success.
I0319 14:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:20:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:20:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 14:20:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:20:14.456565  543705 disk_worker.go:494] system disk:vda1
I0319 14:20:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:20:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:20:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:20:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:20:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:20:19.539382  543705 disk_info.go:125] begin check local disk info of client
I0319 14:20:19.541799  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:20:19.541805  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056ab00 0xc00056ab40]
E0319 14:20:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:20:23.409797  543705 memory.go:184] no items to output this cycle
I0319 14:20:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 14:20:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:20:33.409799  543705 memory.go:184] no items to output this cycle
I0319 14:20:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 14:20:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:20:43.409794  543705 memory.go:191] Add success.
I0319 14:20:43.409815  543705 cpu.go:282] Add success.
I0319 14:20:43.419939  543705 net.go:648] Add success.
I0319 14:20:43.422653  543705 net.go:770] primary dev: ETH0
I0319 14:20:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:20:43.422677  543705 net.go:698] Add success.
I0319 14:20:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:20:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:20:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:20:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:20:53.409804  543705 memory.go:184] no items to output this cycle
I0319 14:20:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:21:03.409799  543705 memory.go:184] no items to output this cycle
I0319 14:21:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 14:21:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:21:13.409810  543705 memory.go:191] Add success.
I0319 14:21:13.409811  543705 cpu.go:282] Add success.
W0319 14:21:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:21:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:21:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:21:13.420222  543705 net.go:648] Add success.
I0319 14:21:13.422832  543705 net.go:770] primary dev: ETH0
I0319 14:21:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:21:13.422858  543705 net.go:698] Add success.
I0319 14:21:13.463316  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"40b64f46-2995-4dff-9894-829b4c7be31b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:21:13.463350  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:21:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:21:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:21:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 14:21:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:21:14.456509  543705 disk_worker.go:494] system disk:vda1
I0319 14:21:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:21:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:21:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:21:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:21:19.542353  543705 disk_info.go:125] begin check local disk info of client
I0319 14:21:19.544741  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:21:19.544748  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6c40 0xc0003b6c80]
E0319 14:21:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:21:23.409793  543705 memory.go:184] no items to output this cycle
I0319 14:21:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:21:33.409797  543705 memory.go:184] no items to output this cycle
I0319 14:21:33.409810  543705 cpu.go:275] no items to output this cycle
I0319 14:21:37.734046  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:21:37.734053  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:21:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:21:43.410595  543705 memory.go:191] Add success.
I0319 14:21:43.409807  543705 cpu.go:282] Add success.
I0319 14:21:43.420288  543705 net.go:648] Add success.
I0319 14:21:43.422873  543705 net.go:770] primary dev: ETH0
I0319 14:21:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:21:43.422900  543705 net.go:698] Add success.
I0319 14:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:21:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:21:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:21:53.409779  543705 memory.go:184] no items to output this cycle
I0319 14:21:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 14:22:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:22:03.409766  543705 memory.go:184] no items to output this cycle
I0319 14:22:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 14:22:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:22:13.409791  543705 memory.go:191] Add success.
W0319 14:22:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:22:13.409825  543705 cpu.go:282] Add success.
W0319 14:22:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:22:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:22:13.420123  543705 net.go:648] Add success.
I0319 14:22:13.422791  543705 net.go:770] primary dev: ETH0
I0319 14:22:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:22:13.422817  543705 net.go:698] Add success.
W0319 14:22:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:22:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 14:22:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:22:14.456808  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:22:14.456817  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:22:14.456823  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:22:14.456866  543705 disk_worker.go:494] system disk:vda1
I0319 14:22:14.456908  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:22:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:22:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:22:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:22:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:22:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:22:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:22:16.472352  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:22:19.545422  543705 disk_info.go:125] begin check local disk info of client
I0319 14:22:19.547781  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:22:19.547787  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab600 0xc0001ab640]
E0319 14:22:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:22:23.409794  543705 memory.go:184] no items to output this cycle
I0319 14:22:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:22:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:22:33.409773  543705 memory.go:184] no items to output this cycle
I0319 14:22:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 14:22:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:22:43.409800  543705 memory.go:191] Add success.
I0319 14:22:43.409802  543705 cpu.go:282] Add success.
I0319 14:22:43.419989  543705 net.go:648] Add success.
I0319 14:22:43.422647  543705 net.go:770] primary dev: ETH0
I0319 14:22:43.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:22:43.422677  543705 net.go:698] Add success.
I0319 14:22:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:22:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:22:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:22:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:22:53.409788  543705 cpu.go:275] no items to output this cycle
I0319 14:22:53.409791  543705 memory.go:184] no items to output this cycle
E0319 14:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:23:03.409771  543705 memory.go:184] no items to output this cycle
I0319 14:23:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:23:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:23:13.409818  543705 memory.go:191] Add success.
I0319 14:23:13.409821  543705 cpu.go:282] Add success.
W0319 14:23:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:23:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:23:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:23:13.420295  543705 net.go:648] Add success.
I0319 14:23:13.423226  543705 net.go:770] primary dev: ETH0
I0319 14:23:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:23:13.423264  543705 net.go:698] Add success.
I0319 14:23:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:23:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:23:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 14:23:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:23:14.456528  543705 disk_worker.go:494] system disk:vda1
I0319 14:23:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:23:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:23:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:23:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:23:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:23:16.472451  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:23:19.548379  543705 disk_info.go:125] begin check local disk info of client
I0319 14:23:19.550804  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:23:19.550810  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5200 0xc0000c5240]
E0319 14:23:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:23:23.409779  543705 cpu.go:275] no items to output this cycle
I0319 14:23:23.409781  543705 memory.go:184] no items to output this cycle
E0319 14:23:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:23:33.409798  543705 memory.go:184] no items to output this cycle
I0319 14:23:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:23:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:23:43.409795  543705 memory.go:191] Add success.
I0319 14:23:43.409811  543705 cpu.go:282] Add success.
I0319 14:23:43.420276  543705 net.go:648] Add success.
I0319 14:23:43.423056  543705 net.go:770] primary dev: ETH0
I0319 14:23:43.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:23:43.423080  543705 net.go:698] Add success.
I0319 14:23:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:23:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:23:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:23:53.409796  543705 memory.go:184] no items to output this cycle
I0319 14:23:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 14:24:03.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:24:03.409858  543705 memory.go:184] no items to output this cycle
I0319 14:24:03.409924  543705 cpu.go:275] no items to output this cycle
E0319 14:24:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:24:13.409804  543705 memory.go:191] Add success.
I0319 14:24:13.409807  543705 cpu.go:282] Add success.
W0319 14:24:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:24:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:24:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:24:13.420180  543705 net.go:648] Add success.
I0319 14:24:13.423015  543705 net.go:770] primary dev: ETH0
I0319 14:24:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:24:13.423041  543705 net.go:698] Add success.
I0319 14:24:13.563774  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49b38030-5282-4d61-9738-4cc3aa7ec8e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:24:13.563807  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:24:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:24:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:24:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 14:24:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:24:14.456612  543705 disk_worker.go:494] system disk:vda1
I0319 14:24:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:24:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:24:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:24:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:24:16.472434  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:24:19.551388  543705 disk_info.go:125] begin check local disk info of client
I0319 14:24:19.553771  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:24:19.553777  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057d8c0 0xc00057d900]
E0319 14:24:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:24:23.409789  543705 memory.go:184] no items to output this cycle
I0319 14:24:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 14:24:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:24:33.409765  543705 memory.go:184] no items to output this cycle
I0319 14:24:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 14:24:37.735378  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:24:37.735398  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:24:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:24:43.410672  543705 memory.go:191] Add success.
I0319 14:24:43.409801  543705 cpu.go:282] Add success.
I0319 14:24:43.420442  543705 net.go:648] Add success.
I0319 14:24:43.423263  543705 net.go:770] primary dev: ETH0
I0319 14:24:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:24:43.423293  543705 net.go:698] Add success.
I0319 14:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:24:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:24:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:24:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:24:53.409778  543705 memory.go:184] no items to output this cycle
I0319 14:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 14:25:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:25:03.409770  543705 memory.go:184] no items to output this cycle
I0319 14:25:03.409895  543705 cpu.go:275] no items to output this cycle
E0319 14:25:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:25:13.409830  543705 memory.go:191] Add success.
I0319 14:25:13.409832  543705 cpu.go:282] Add success.
W0319 14:25:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:25:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:25:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:25:13.420234  543705 net.go:648] Add success.
I0319 14:25:13.423546  543705 net.go:770] primary dev: ETH0
I0319 14:25:13.423575  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:25:13.423589  543705 net.go:698] Add success.
I0319 14:25:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:25:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:25:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 14:25:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:25:14.456594  543705 disk_worker.go:494] system disk:vda1
I0319 14:25:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:25:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:25:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:25:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:25:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:25:19.554470  543705 disk_info.go:125] begin check local disk info of client
I0319 14:25:19.556853  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:25:19.556859  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057cd00 0xc00057cd40]
E0319 14:25:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:25:23.409795  543705 memory.go:184] no items to output this cycle
I0319 14:25:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 14:25:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:25:33.409780  543705 memory.go:184] no items to output this cycle
I0319 14:25:33.409782  543705 cpu.go:275] no items to output this cycle
E0319 14:25:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:25:43.409816  543705 memory.go:191] Add success.
I0319 14:25:43.409820  543705 cpu.go:282] Add success.
I0319 14:25:43.419871  543705 net.go:648] Add success.
I0319 14:25:43.422746  543705 net.go:770] primary dev: ETH0
I0319 14:25:43.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:25:43.422773  543705 net.go:698] Add success.
I0319 14:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:25:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:25:53.409778  543705 memory.go:184] no items to output this cycle
I0319 14:25:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 14:26:03.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:26:03.409914  543705 memory.go:184] no items to output this cycle
I0319 14:26:03.409918  543705 cpu.go:275] no items to output this cycle
E0319 14:26:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:26:13.409795  543705 memory.go:191] Add success.
W0319 14:26:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:26:13.409831  543705 cpu.go:282] Add success.
W0319 14:26:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:26:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:26:13.420163  543705 net.go:648] Add success.
I0319 14:26:13.423249  543705 net.go:770] primary dev: ETH0
I0319 14:26:13.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:26:13.423278  543705 net.go:698] Add success.
I0319 14:26:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:26:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:26:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 14:26:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:26:14.456596  543705 disk_worker.go:494] system disk:vda1
I0319 14:26:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:26:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:26:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:26:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:26:16.472440  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:26:19.557428  543705 disk_info.go:125] begin check local disk info of client
I0319 14:26:19.559847  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:26:19.559853  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7580 0xc0003b75c0]
E0319 14:26:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:26:23.409792  543705 memory.go:184] no items to output this cycle
I0319 14:26:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:26:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:26:33.409772  543705 memory.go:184] no items to output this cycle
I0319 14:26:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 14:26:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:26:43.409826  543705 memory.go:191] Add success.
I0319 14:26:43.409833  543705 cpu.go:282] Add success.
I0319 14:26:43.420013  543705 net.go:648] Add success.
I0319 14:26:43.422900  543705 net.go:770] primary dev: ETH0
I0319 14:26:43.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:26:43.422928  543705 net.go:698] Add success.
I0319 14:26:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:26:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:26:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:26:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:26:53.410261  543705 memory.go:184] no items to output this cycle
I0319 14:26:53.410279  543705 cpu.go:275] no items to output this cycle
E0319 14:27:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:27:03.409895  543705 memory.go:184] no items to output this cycle
I0319 14:27:03.409963  543705 cpu.go:275] no items to output this cycle
E0319 14:27:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:27:13.409805  543705 memory.go:191] Add success.
I0319 14:27:13.409821  543705 cpu.go:282] Add success.
W0319 14:27:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:27:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:27:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:27:13.420138  543705 net.go:648] Add success.
I0319 14:27:13.428822  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 14:27:13.428900  543705 net.go:770] primary dev: ETH0
I0319 14:27:13.428918  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:27:13.428933  543705 net.go:698] Add success.
I0319 14:27:13.453455  543705 event_worker.go:152] Polling the log file for events...
I0319 14:27:13.468601  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6b6e02f5-f55e-4f98-a4da-4035830a4317","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:27:13.468644  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 14:27:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:27:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 14:27:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:27:14.456891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:27:14.456902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:27:14.456907  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:27:14.456977  543705 disk_worker.go:494] system disk:vda1
I0319 14:27:14.457019  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:27:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:27:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:27:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:27:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:27:16.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:27:16.457980  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:27:16.472322  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:27:19.560507  543705 disk_info.go:125] begin check local disk info of client
I0319 14:27:19.562891  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:27:19.562897  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a84c0 0xc0004a8500]
E0319 14:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:27:23.409797  543705 memory.go:184] no items to output this cycle
I0319 14:27:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 14:27:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:27:33.409783  543705 memory.go:184] no items to output this cycle
I0319 14:27:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 14:27:37.736388  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:27:37.736394  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:27:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:27:43.410560  543705 memory.go:191] Add success.
I0319 14:27:43.409838  543705 cpu.go:282] Add success.
I0319 14:27:43.420255  543705 net.go:648] Add success.
I0319 14:27:43.422840  543705 net.go:770] primary dev: ETH0
I0319 14:27:43.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:27:43.422865  543705 net.go:698] Add success.
I0319 14:27:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:27:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:27:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:27:53.409803  543705 memory.go:184] no items to output this cycle
I0319 14:27:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 14:28:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:28:03.409794  543705 memory.go:184] no items to output this cycle
I0319 14:28:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:28:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:28:13.409811  543705 memory.go:191] Add success.
I0319 14:28:13.409817  543705 cpu.go:282] Add success.
W0319 14:28:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:28:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:28:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:28:13.420316  543705 net.go:648] Add success.
I0319 14:28:13.423187  543705 net.go:770] primary dev: ETH0
I0319 14:28:13.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:28:13.423216  543705 net.go:698] Add success.
I0319 14:28:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:28:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:28:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 14:28:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:28:14.456545  543705 disk_worker.go:494] system disk:vda1
I0319 14:28:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:28:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:28:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:28:19.562978  543705 disk_info.go:125] begin check local disk info of client
I0319 14:28:19.565367  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:28:19.565372  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a88c0 0xc0004a8900]
E0319 14:28:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:28:23.409820  543705 memory.go:184] no items to output this cycle
I0319 14:28:23.409828  543705 cpu.go:275] no items to output this cycle
E0319 14:28:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:28:33.409769  543705 memory.go:184] no items to output this cycle
I0319 14:28:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 14:28:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:28:43.409821  543705 memory.go:191] Add success.
I0319 14:28:43.409838  543705 cpu.go:282] Add success.
I0319 14:28:43.420022  543705 net.go:648] Add success.
I0319 14:28:43.422817  543705 net.go:770] primary dev: ETH0
I0319 14:28:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:28:43.422872  543705 net.go:698] Add success.
I0319 14:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:28:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:28:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:28:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:28:53.409804  543705 memory.go:184] no items to output this cycle
I0319 14:28:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 14:29:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:29:03.409788  543705 memory.go:184] no items to output this cycle
I0319 14:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 14:29:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:29:13.409803  543705 memory.go:191] Add success.
I0319 14:29:13.409805  543705 cpu.go:282] Add success.
W0319 14:29:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:29:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:29:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:29:13.420232  543705 net.go:648] Add success.
I0319 14:29:13.423049  543705 net.go:770] primary dev: ETH0
I0319 14:29:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:29:13.423075  543705 net.go:698] Add success.
I0319 14:29:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:29:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:29:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 14:29:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:29:14.456591  543705 disk_worker.go:494] system disk:vda1
I0319 14:29:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:29:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:29:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:29:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:29:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:29:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:29:19.565465  543705 disk_info.go:125] begin check local disk info of client
I0319 14:29:19.567850  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:29:19.567856  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003293c0 0xc000329400]
E0319 14:29:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:29:23.409794  543705 memory.go:184] no items to output this cycle
I0319 14:29:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 14:29:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:29:33.409777  543705 memory.go:184] no items to output this cycle
I0319 14:29:33.409781  543705 cpu.go:275] no items to output this cycle
E0319 14:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:29:43.409790  543705 memory.go:191] Add success.
I0319 14:29:43.409809  543705 cpu.go:282] Add success.
I0319 14:29:43.420044  543705 net.go:648] Add success.
I0319 14:29:43.422910  543705 net.go:770] primary dev: ETH0
I0319 14:29:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:29:43.422936  543705 net.go:698] Add success.
I0319 14:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:29:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:29:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:29:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:29:53.409802  543705 memory.go:184] no items to output this cycle
I0319 14:29:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 14:30:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:30:03.409780  543705 memory.go:184] no items to output this cycle
I0319 14:30:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:30:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:30:13.409800  543705 memory.go:191] Add success.
I0319 14:30:13.409802  543705 cpu.go:282] Add success.
W0319 14:30:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:30:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:30:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:30:13.420209  543705 net.go:648] Add success.
I0319 14:30:13.422907  543705 net.go:770] primary dev: ETH0
I0319 14:30:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:30:13.422931  543705 net.go:698] Add success.
I0319 14:30:13.469214  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e9df7ce7-89cc-471e-b137-29af99d2df0e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:30:13.469251  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:30:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:30:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:30:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 14:30:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:30:14.456734  543705 disk_worker.go:494] system disk:vda1
I0319 14:30:14.456764  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:30:15.455609  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:30:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:30:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:30:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:30:19.568477  543705 disk_info.go:125] begin check local disk info of client
I0319 14:30:19.570908  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:30:19.570913  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab780 0xc0001ab7c0]
E0319 14:30:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:30:23.409809  543705 memory.go:184] no items to output this cycle
I0319 14:30:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 14:30:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:30:33.409764  543705 memory.go:184] no items to output this cycle
I0319 14:30:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 14:30:37.737391  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:30:37.737399  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:30:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:30:43.410670  543705 memory.go:191] Add success.
I0319 14:30:43.409816  543705 cpu.go:282] Add success.
I0319 14:30:43.420176  543705 net.go:770] primary dev: ETH0
I0319 14:30:43.420188  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:30:43.420202  543705 net.go:698] Add success.
I0319 14:30:43.420539  543705 net.go:648] Add success.
I0319 14:30:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:30:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:30:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:30:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:30:53.409799  543705 memory.go:184] no items to output this cycle
I0319 14:30:53.409815  543705 cpu.go:275] no items to output this cycle
I0319 14:31:03.409874  543705 cpu.go:275] no items to output this cycle
E0319 14:31:03.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:31:03.409893  543705 memory.go:184] no items to output this cycle
E0319 14:31:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:31:13.409796  543705 memory.go:191] Add success.
I0319 14:31:13.409802  543705 cpu.go:282] Add success.
W0319 14:31:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:31:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:31:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:31:13.420117  543705 net.go:648] Add success.
I0319 14:31:13.422754  543705 net.go:770] primary dev: ETH0
I0319 14:31:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:31:13.422779  543705 net.go:698] Add success.
I0319 14:31:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:31:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:31:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 14:31:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:31:14.456521  543705 disk_worker.go:494] system disk:vda1
I0319 14:31:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:31:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:31:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:31:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:31:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:31:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:31:19.571498  543705 disk_info.go:125] begin check local disk info of client
I0319 14:31:19.573962  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:31:19.573968  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007adc0 0xc00007ae00]
E0319 14:31:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:31:23.409760  543705 memory.go:184] no items to output this cycle
I0319 14:31:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 14:31:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:31:33.409802  543705 memory.go:184] no items to output this cycle
I0319 14:31:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:31:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:31:43.409794  543705 cpu.go:282] Add success.
I0319 14:31:43.409802  543705 memory.go:191] Add success.
I0319 14:31:43.419961  543705 net.go:648] Add success.
I0319 14:31:43.423065  543705 net.go:770] primary dev: ETH0
I0319 14:31:43.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:31:43.423091  543705 net.go:698] Add success.
I0319 14:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:31:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:31:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:31:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:31:53.409766  543705 memory.go:184] no items to output this cycle
I0319 14:31:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 14:32:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:32:03.409786  543705 memory.go:184] no items to output this cycle
I0319 14:32:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 14:32:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:32:13.409791  543705 memory.go:191] Add success.
W0319 14:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:32:13.409825  543705 cpu.go:282] Add success.
W0319 14:32:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:32:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:32:13.420120  543705 net.go:648] Add success.
I0319 14:32:13.422870  543705 net.go:770] primary dev: ETH0
I0319 14:32:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:32:13.422895  543705 net.go:698] Add success.
W0319 14:32:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:32:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 14:32:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:32:14.456918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:32:14.456927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:32:14.456934  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:32:14.457000  543705 disk_worker.go:494] system disk:vda1
I0319 14:32:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:32:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:32:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:32:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:32:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:32:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:32:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:32:16.472318  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:32:19.574514  543705 disk_info.go:125] begin check local disk info of client
I0319 14:32:19.576908  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:32:19.576913  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c46c0 0xc0000c4700]
E0319 14:32:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:32:23.409781  543705 memory.go:184] no items to output this cycle
I0319 14:32:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 14:32:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:32:33.409804  543705 memory.go:184] no items to output this cycle
I0319 14:32:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 14:32:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:32:43.409805  543705 memory.go:191] Add success.
I0319 14:32:43.409804  543705 cpu.go:282] Add success.
I0319 14:32:43.419952  543705 net.go:648] Add success.
I0319 14:32:43.422647  543705 net.go:770] primary dev: ETH0
I0319 14:32:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:32:43.422672  543705 net.go:698] Add success.
I0319 14:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:32:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:32:53.410205  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:32:53.410220  543705 memory.go:184] no items to output this cycle
I0319 14:32:53.410229  543705 cpu.go:275] no items to output this cycle
E0319 14:33:03.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:33:03.409865  543705 cpu.go:275] no items to output this cycle
I0319 14:33:03.409886  543705 memory.go:184] no items to output this cycle
E0319 14:33:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:33:13.409830  543705 memory.go:191] Add success.
I0319 14:33:13.409832  543705 cpu.go:282] Add success.
W0319 14:33:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:33:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:33:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:33:13.420197  543705 net.go:648] Add success.
I0319 14:33:13.423034  543705 net.go:770] primary dev: ETH0
I0319 14:33:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:33:13.423060  543705 net.go:698] Add success.
I0319 14:33:13.464001  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"105e095c-5c5f-40b6-8189-f13fa5ec2c9a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:33:13.464035  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:33:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:33:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:33:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 14:33:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:33:14.456526  543705 disk_worker.go:494] system disk:vda1
I0319 14:33:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:33:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:33:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:33:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:33:19.577525  543705 disk_info.go:125] begin check local disk info of client
I0319 14:33:19.579964  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:33:19.579970  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4bc0 0xc0000c4c00]
E0319 14:33:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:33:23.409758  543705 memory.go:184] no items to output this cycle
I0319 14:33:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:33:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:33:33.409771  543705 memory.go:184] no items to output this cycle
I0319 14:33:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 14:33:37.737733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:33:37.737740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:33:43.410680  543705 memory.go:191] Add success.
I0319 14:33:43.409807  543705 cpu.go:282] Add success.
I0319 14:33:43.420445  543705 net.go:648] Add success.
I0319 14:33:43.423552  543705 net.go:770] primary dev: ETH0
I0319 14:33:43.423567  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:33:43.423578  543705 net.go:698] Add success.
I0319 14:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:33:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:33:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:33:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:33:53.409815  543705 memory.go:184] no items to output this cycle
I0319 14:33:53.409829  543705 cpu.go:275] no items to output this cycle
E0319 14:34:03.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:34:03.409920  543705 memory.go:184] no items to output this cycle
I0319 14:34:03.409925  543705 cpu.go:275] no items to output this cycle
E0319 14:34:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:34:13.409830  543705 memory.go:191] Add success.
I0319 14:34:13.409857  543705 cpu.go:282] Add success.
W0319 14:34:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:34:13.413131  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:34:13.413137  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:34:13.419817  543705 net.go:648] Add success.
I0319 14:34:13.422235  543705 net.go:770] primary dev: ETH0
I0319 14:34:13.422252  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:34:13.422266  543705 net.go:698] Add success.
I0319 14:34:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:34:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:34:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 14:34:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:34:14.456669  543705 disk_worker.go:494] system disk:vda1
I0319 14:34:14.456700  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:34:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:34:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:34:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:34:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:34:16.472457  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:34:19.580053  543705 disk_info.go:125] begin check local disk info of client
I0319 14:34:19.582631  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:34:19.582637  543705 disk_info.go:196] parse disk info done, disk is : [0xc000217600 0xc000217640]
E0319 14:34:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:34:23.409802  543705 memory.go:184] no items to output this cycle
I0319 14:34:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:34:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:34:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 14:34:33.409808  543705 memory.go:184] no items to output this cycle
E0319 14:34:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:34:43.409809  543705 memory.go:191] Add success.
I0319 14:34:43.409810  543705 cpu.go:282] Add success.
I0319 14:34:43.419883  543705 net.go:648] Add success.
I0319 14:34:43.422694  543705 net.go:770] primary dev: ETH0
I0319 14:34:43.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:34:43.422719  543705 net.go:698] Add success.
I0319 14:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:34:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:34:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:34:53.409806  543705 memory.go:184] no items to output this cycle
I0319 14:34:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 14:35:03.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:35:03.409939  543705 memory.go:184] no items to output this cycle
I0319 14:35:03.409918  543705 cpu.go:275] no items to output this cycle
W0319 14:35:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:35:13.409744  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:35:13.409750  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:35:13.409811  543705 cpu.go:282] Add success.
E0319 14:35:13.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:35:13.409873  543705 memory.go:191] Add success.
I0319 14:35:13.420344  543705 net.go:648] Add success.
I0319 14:35:13.423116  543705 net.go:770] primary dev: ETH0
I0319 14:35:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:35:13.423146  543705 net.go:698] Add success.
I0319 14:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:35:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:35:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 14:35:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:35:14.456534  543705 disk_worker.go:494] system disk:vda1
I0319 14:35:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:35:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:35:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:35:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:35:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:35:19.583599  543705 disk_info.go:125] begin check local disk info of client
I0319 14:35:19.586004  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:35:19.586010  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4700 0xc0000c4740]
E0319 14:35:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:35:23.409800  543705 memory.go:184] no items to output this cycle
I0319 14:35:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:35:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:35:33.409790  543705 memory.go:184] no items to output this cycle
I0319 14:35:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:35:43.409801  543705 memory.go:191] Add success.
I0319 14:35:43.409802  543705 cpu.go:282] Add success.
I0319 14:35:43.419892  543705 net.go:648] Add success.
I0319 14:35:43.422481  543705 net.go:770] primary dev: ETH0
I0319 14:35:43.422494  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:35:43.422507  543705 net.go:698] Add success.
I0319 14:35:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:35:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:35:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:35:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:35:53.409787  543705 cpu.go:275] no items to output this cycle
I0319 14:35:53.409792  543705 memory.go:184] no items to output this cycle
E0319 14:36:03.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:36:03.409899  543705 memory.go:184] no items to output this cycle
I0319 14:36:03.409900  543705 cpu.go:275] no items to output this cycle
E0319 14:36:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:36:13.409833  543705 memory.go:191] Add success.
I0319 14:36:13.409845  543705 cpu.go:282] Add success.
W0319 14:36:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:36:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:36:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:36:13.420175  543705 net.go:648] Add success.
I0319 14:36:13.422995  543705 net.go:770] primary dev: ETH0
I0319 14:36:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:36:13.423019  543705 net.go:698] Add success.
I0319 14:36:13.469166  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0165d532-0813-4693-8160-9ab6c9a0cdaa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:36:13.469199  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:36:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:36:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:36:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 14:36:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:36:14.456588  543705 disk_worker.go:494] system disk:vda1
I0319 14:36:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:36:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:36:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:36:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:36:19.586622  543705 disk_info.go:125] begin check local disk info of client
I0319 14:36:19.589043  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:36:19.589049  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab200 0xc0001ab240]
E0319 14:36:23.410235  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:36:23.410250  543705 memory.go:184] no items to output this cycle
I0319 14:36:23.410278  543705 cpu.go:275] no items to output this cycle
E0319 14:36:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:36:33.409782  543705 memory.go:184] no items to output this cycle
I0319 14:36:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 14:36:37.737900  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:36:37.737907  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:36:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:36:43.410687  543705 memory.go:191] Add success.
I0319 14:36:43.409846  543705 cpu.go:282] Add success.
I0319 14:36:43.420385  543705 net.go:648] Add success.
I0319 14:36:43.423212  543705 net.go:770] primary dev: ETH0
I0319 14:36:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:36:43.423241  543705 net.go:698] Add success.
I0319 14:36:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:36:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:36:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:36:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:36:53.409790  543705 memory.go:184] no items to output this cycle
I0319 14:36:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 14:37:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:37:03.409777  543705 memory.go:184] no items to output this cycle
I0319 14:37:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 14:37:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:37:13.409807  543705 memory.go:191] Add success.
I0319 14:37:13.409809  543705 cpu.go:282] Add success.
W0319 14:37:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:37:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:37:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:37:13.420157  543705 net.go:648] Add success.
I0319 14:37:13.422810  543705 net.go:770] primary dev: ETH0
I0319 14:37:13.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:37:13.422835  543705 net.go:698] Add success.
I0319 14:37:13.452780  543705 event_worker.go:152] Polling the log file for events...
W0319 14:37:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:37:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 14:37:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:37:14.455896  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:37:14.455905  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:37:14.455911  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:37:14.456554  543705 disk_worker.go:494] system disk:vda1
I0319 14:37:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:37:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:37:15.456868  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:37:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:37:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:37:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:37:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:37:16.472333  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:37:19.589132  543705 disk_info.go:125] begin check local disk info of client
I0319 14:37:19.591503  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:37:19.591509  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad40 0xc0001aad80]
E0319 14:37:23.410720  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:37:23.410734  543705 memory.go:184] no items to output this cycle
I0319 14:37:23.410737  543705 cpu.go:275] no items to output this cycle
E0319 14:37:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:37:33.409775  543705 memory.go:184] no items to output this cycle
I0319 14:37:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 14:37:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:37:43.409818  543705 memory.go:191] Add success.
I0319 14:37:43.409821  543705 cpu.go:282] Add success.
I0319 14:37:43.419924  543705 net.go:648] Add success.
I0319 14:37:43.422736  543705 net.go:770] primary dev: ETH0
I0319 14:37:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:37:43.422764  543705 net.go:698] Add success.
I0319 14:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:37:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:37:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:37:53.409784  543705 memory.go:184] no items to output this cycle
I0319 14:37:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 14:38:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:38:03.409805  543705 memory.go:184] no items to output this cycle
I0319 14:38:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 14:38:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:38:13.409800  543705 memory.go:191] Add success.
I0319 14:38:13.409808  543705 cpu.go:282] Add success.
W0319 14:38:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:38:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:38:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:38:13.420134  543705 net.go:648] Add success.
I0319 14:38:13.422903  543705 net.go:770] primary dev: ETH0
I0319 14:38:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:38:13.422928  543705 net.go:698] Add success.
I0319 14:38:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:38:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:38:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 14:38:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:38:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 14:38:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:38:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:38:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:38:16.472435  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:38:19.591605  543705 disk_info.go:125] begin check local disk info of client
I0319 14:38:19.594018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:38:19.594024  543705 disk_info.go:196] parse disk info done, disk is : [0xc000356bc0 0xc000356c00]
E0319 14:38:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:38:23.409797  543705 memory.go:184] no items to output this cycle
I0319 14:38:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:38:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:38:33.409764  543705 memory.go:184] no items to output this cycle
I0319 14:38:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 14:38:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:38:43.409796  543705 memory.go:191] Add success.
I0319 14:38:43.409796  543705 cpu.go:282] Add success.
I0319 14:38:43.419976  543705 net.go:648] Add success.
I0319 14:38:43.423001  543705 net.go:770] primary dev: ETH0
I0319 14:38:43.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:38:43.423025  543705 net.go:698] Add success.
I0319 14:38:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:38:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:38:53.410210  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:38:53.410340  543705 memory.go:184] no items to output this cycle
I0319 14:38:53.410343  543705 cpu.go:275] no items to output this cycle
E0319 14:39:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:39:03.409772  543705 memory.go:184] no items to output this cycle
I0319 14:39:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:39:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:39:13.409828  543705 memory.go:191] Add success.
I0319 14:39:13.409830  543705 cpu.go:282] Add success.
W0319 14:39:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:39:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:39:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:39:13.420200  543705 net.go:648] Add success.
I0319 14:39:13.422805  543705 net.go:770] primary dev: ETH0
I0319 14:39:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:39:13.422830  543705 net.go:698] Add success.
I0319 14:39:13.463997  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98722457-5358-4142-898e-8541ef22a4c7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:39:13.464029  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:39:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:39:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:39:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 14:39:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:39:14.456731  543705 disk_worker.go:494] system disk:vda1
I0319 14:39:14.456763  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:39:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:39:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:39:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:39:19.594609  543705 disk_info.go:125] begin check local disk info of client
I0319 14:39:19.597018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:39:19.597025  543705 disk_info.go:196] parse disk info done, disk is : [0xc000585500 0xc000585540]
E0319 14:39:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:39:23.409796  543705 memory.go:184] no items to output this cycle
I0319 14:39:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 14:39:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:39:33.409785  543705 memory.go:184] no items to output this cycle
I0319 14:39:33.409792  543705 cpu.go:275] no items to output this cycle
I0319 14:39:37.739391  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:39:37.739397  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:39:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:39:43.410638  543705 memory.go:191] Add success.
I0319 14:39:43.409812  543705 cpu.go:282] Add success.
I0319 14:39:43.420640  543705 net.go:648] Add success.
I0319 14:39:43.423398  543705 net.go:770] primary dev: ETH0
I0319 14:39:43.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:39:43.423423  543705 net.go:698] Add success.
I0319 14:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:39:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:39:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:39:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:39:53.409790  543705 memory.go:184] no items to output this cycle
I0319 14:39:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 14:40:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:40:03.409806  543705 memory.go:184] no items to output this cycle
I0319 14:40:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 14:40:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:40:13.409802  543705 cpu.go:282] Add success.
I0319 14:40:13.409804  543705 memory.go:191] Add success.
W0319 14:40:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:40:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:40:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:40:13.420057  543705 net.go:648] Add success.
I0319 14:40:13.422672  543705 net.go:770] primary dev: ETH0
I0319 14:40:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:40:13.422697  543705 net.go:698] Add success.
I0319 14:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:40:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:40:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 14:40:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:40:14.456593  543705 disk_worker.go:494] system disk:vda1
I0319 14:40:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:40:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:40:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:40:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:40:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:40:19.597619  543705 disk_info.go:125] begin check local disk info of client
I0319 14:40:19.600055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:40:19.600061  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039b6c0 0xc00039b700]
E0319 14:40:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:40:23.409798  543705 memory.go:184] no items to output this cycle
I0319 14:40:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:40:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:40:33.409779  543705 memory.go:184] no items to output this cycle
I0319 14:40:33.409801  543705 cpu.go:275] no items to output this cycle
E0319 14:40:43.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:40:43.409924  543705 memory.go:191] Add success.
I0319 14:40:43.409979  543705 cpu.go:282] Add success.
I0319 14:40:43.419714  543705 net.go:648] Add success.
I0319 14:40:43.423230  543705 net.go:770] primary dev: ETH0
I0319 14:40:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:40:43.423255  543705 net.go:698] Add success.
I0319 14:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:40:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:40:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:40:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:40:53.409787  543705 memory.go:184] no items to output this cycle
I0319 14:40:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:41:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:41:03.409790  543705 memory.go:184] no items to output this cycle
I0319 14:41:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 14:41:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:41:13.409782  543705 memory.go:191] Add success.
W0319 14:41:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:41:13.409818  543705 cpu.go:282] Add success.
W0319 14:41:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:41:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:41:13.420376  543705 net.go:648] Add success.
I0319 14:41:13.423264  543705 net.go:770] primary dev: ETH0
I0319 14:41:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:41:13.423291  543705 net.go:698] Add success.
I0319 14:41:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:41:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:41:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 14:41:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:41:14.456518  543705 disk_worker.go:494] system disk:vda1
I0319 14:41:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:41:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:41:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:41:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:41:16.472453  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:41:19.600144  543705 disk_info.go:125] begin check local disk info of client
I0319 14:41:19.602541  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:41:19.602550  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a000 0xc00032a040]
E0319 14:41:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:41:23.409793  543705 memory.go:184] no items to output this cycle
I0319 14:41:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:41:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:41:33.409797  543705 memory.go:184] no items to output this cycle
I0319 14:41:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:41:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:41:43.409830  543705 memory.go:191] Add success.
I0319 14:41:43.409840  543705 cpu.go:282] Add success.
I0319 14:41:43.419963  543705 net.go:648] Add success.
I0319 14:41:43.422695  543705 net.go:770] primary dev: ETH0
I0319 14:41:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:41:43.422724  543705 net.go:698] Add success.
I0319 14:41:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:41:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:41:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:41:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:41:53.409804  543705 memory.go:184] no items to output this cycle
I0319 14:41:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:42:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:42:03.409780  543705 memory.go:184] no items to output this cycle
I0319 14:42:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 14:42:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:42:13.409798  543705 memory.go:191] Add success.
I0319 14:42:13.409799  543705 cpu.go:282] Add success.
W0319 14:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:42:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:42:13.420063  543705 net.go:648] Add success.
I0319 14:42:13.422961  543705 net.go:770] primary dev: ETH0
I0319 14:42:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:42:13.422991  543705 net.go:698] Add success.
I0319 14:42:13.555969  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f83ddd21-ea4a-466b-b60b-a70dcc3e6744","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:42:13.556003  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 14:42:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:42:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 14:42:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:42:14.455963  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:42:14.455971  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:42:14.455976  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:42:14.456469  543705 disk_worker.go:494] system disk:vda1
I0319 14:42:14.456501  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:42:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 14:42:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:42:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:42:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:42:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:42:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:42:16.472316  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:42:19.602630  543705 disk_info.go:125] begin check local disk info of client
I0319 14:42:19.605034  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:42:19.605040  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369b80 0xc000369bc0]
E0319 14:42:23.410703  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:42:23.410815  543705 memory.go:184] no items to output this cycle
I0319 14:42:23.410830  543705 cpu.go:275] no items to output this cycle
E0319 14:42:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:42:33.409783  543705 memory.go:184] no items to output this cycle
I0319 14:42:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 14:42:37.740394  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:42:37.740401  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:42:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:42:43.410672  543705 memory.go:191] Add success.
I0319 14:42:43.409816  543705 cpu.go:282] Add success.
I0319 14:42:43.420372  543705 net.go:648] Add success.
I0319 14:42:43.423458  543705 net.go:770] primary dev: ETH0
I0319 14:42:43.423481  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:42:43.423496  543705 net.go:698] Add success.
I0319 14:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:42:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:42:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:42:53.409770  543705 memory.go:184] no items to output this cycle
I0319 14:42:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:43:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:43:03.409799  543705 memory.go:184] no items to output this cycle
I0319 14:43:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 14:43:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:43:13.409793  543705 memory.go:191] Add success.
I0319 14:43:13.409810  543705 cpu.go:282] Add success.
W0319 14:43:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:43:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:43:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:43:13.420142  543705 net.go:648] Add success.
I0319 14:43:13.422802  543705 net.go:770] primary dev: ETH0
I0319 14:43:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:43:13.422829  543705 net.go:698] Add success.
I0319 14:43:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:43:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:43:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 14:43:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:43:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 14:43:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:43:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:43:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:43:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:43:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:43:19.605670  543705 disk_info.go:125] begin check local disk info of client
I0319 14:43:19.608038  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:43:19.608044  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e6f40 0xc0000e6f80]
E0319 14:43:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:43:23.409791  543705 memory.go:184] no items to output this cycle
I0319 14:43:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:43:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:43:33.409804  543705 memory.go:184] no items to output this cycle
I0319 14:43:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 14:43:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:43:43.409785  543705 memory.go:191] Add success.
I0319 14:43:43.409807  543705 cpu.go:282] Add success.
I0319 14:43:43.419928  543705 net.go:648] Add success.
I0319 14:43:43.422861  543705 net.go:770] primary dev: ETH0
I0319 14:43:43.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:43:43.422889  543705 net.go:698] Add success.
I0319 14:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:43:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:43:53.409799  543705 memory.go:184] no items to output this cycle
I0319 14:43:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:44:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:44:03.409768  543705 memory.go:184] no items to output this cycle
I0319 14:44:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 14:44:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:44:13.409822  543705 memory.go:191] Add success.
I0319 14:44:13.409831  543705 cpu.go:282] Add success.
W0319 14:44:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:44:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:44:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:44:13.420172  543705 net.go:648] Add success.
I0319 14:44:13.423291  543705 net.go:770] primary dev: ETH0
I0319 14:44:13.423304  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:44:13.423316  543705 net.go:698] Add success.
I0319 14:44:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:44:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:44:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0319 14:44:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:44:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 14:44:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:44:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:44:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:44:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:44:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:44:19.608683  543705 disk_info.go:125] begin check local disk info of client
I0319 14:44:19.611173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:44:19.611179  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305180 0xc0003051c0]
E0319 14:44:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:44:23.409790  543705 memory.go:184] no items to output this cycle
I0319 14:44:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 14:44:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:44:33.409887  543705 memory.go:184] no items to output this cycle
I0319 14:44:33.409887  543705 cpu.go:275] no items to output this cycle
E0319 14:44:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:44:43.409800  543705 memory.go:191] Add success.
I0319 14:44:43.409843  543705 cpu.go:282] Add success.
I0319 14:44:43.420439  543705 net.go:648] Add success.
I0319 14:44:43.423311  543705 net.go:770] primary dev: ETH0
I0319 14:44:43.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:44:43.423336  543705 net.go:698] Add success.
I0319 14:44:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:44:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:44:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:44:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:44:53.409815  543705 memory.go:184] no items to output this cycle
I0319 14:44:53.409830  543705 cpu.go:275] no items to output this cycle
E0319 14:45:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:45:03.409771  543705 memory.go:184] no items to output this cycle
I0319 14:45:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 14:45:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:45:13.409839  543705 memory.go:191] Add success.
I0319 14:45:13.409864  543705 cpu.go:282] Add success.
W0319 14:45:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:45:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:45:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:45:13.420233  543705 net.go:648] Add success.
I0319 14:45:13.422927  543705 net.go:770] primary dev: ETH0
I0319 14:45:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:45:13.422951  543705 net.go:698] Add success.
I0319 14:45:13.469966  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bfcce61a-4060-465c-a824-b850097f06e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:45:13.470001  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:45:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:45:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:45:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0319 14:45:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:45:14.456635  543705 disk_worker.go:494] system disk:vda1
I0319 14:45:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:45:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:45:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:45:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:45:19.611699  543705 disk_info.go:125] begin check local disk info of client
I0319 14:45:19.614099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:45:19.614105  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253500 0xc000253540]
E0319 14:45:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:45:23.409788  543705 memory.go:184] no items to output this cycle
I0319 14:45:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 14:45:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:45:33.409812  543705 memory.go:184] no items to output this cycle
I0319 14:45:33.409829  543705 cpu.go:275] no items to output this cycle
I0319 14:45:37.741409  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:45:37.741415  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:45:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:45:43.410730  543705 memory.go:191] Add success.
I0319 14:45:43.409819  543705 cpu.go:282] Add success.
I0319 14:45:43.420533  543705 net.go:648] Add success.
I0319 14:45:43.423630  543705 net.go:770] primary dev: ETH0
I0319 14:45:43.423644  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:45:43.423657  543705 net.go:698] Add success.
I0319 14:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:45:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:45:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:45:53.409797  543705 memory.go:184] no items to output this cycle
I0319 14:45:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 14:46:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:46:03.409768  543705 memory.go:184] no items to output this cycle
I0319 14:46:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:46:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:46:13.409828  543705 memory.go:191] Add success.
I0319 14:46:13.409836  543705 cpu.go:282] Add success.
W0319 14:46:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:46:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:46:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:46:13.420138  543705 net.go:648] Add success.
I0319 14:46:13.423107  543705 net.go:770] primary dev: ETH0
I0319 14:46:13.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:46:13.423132  543705 net.go:698] Add success.
I0319 14:46:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:46:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:46:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 14:46:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:46:14.456568  543705 disk_worker.go:494] system disk:vda1
I0319 14:46:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:46:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:46:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:46:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:46:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:46:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:46:19.614768  543705 disk_info.go:125] begin check local disk info of client
I0319 14:46:19.617149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:46:19.617155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003246c0 0xc000324700]
E0319 14:46:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:46:23.409798  543705 memory.go:184] no items to output this cycle
I0319 14:46:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 14:46:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:46:33.409770  543705 memory.go:184] no items to output this cycle
I0319 14:46:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 14:46:43.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:46:43.409920  543705 memory.go:191] Add success.
I0319 14:46:43.410014  543705 cpu.go:282] Add success.
I0319 14:46:43.419731  543705 net.go:648] Add success.
I0319 14:46:43.422396  543705 net.go:770] primary dev: ETH0
I0319 14:46:43.422409  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:46:43.422422  543705 net.go:698] Add success.
I0319 14:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:46:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:46:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:46:53.409770  543705 memory.go:184] no items to output this cycle
I0319 14:46:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 14:47:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:47:03.409779  543705 memory.go:184] no items to output this cycle
I0319 14:47:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 14:47:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:47:13.409816  543705 memory.go:191] Add success.
I0319 14:47:13.409827  543705 cpu.go:282] Add success.
W0319 14:47:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:47:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:47:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:47:13.420149  543705 net.go:648] Add success.
I0319 14:47:13.423256  543705 net.go:770] primary dev: ETH0
I0319 14:47:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:47:13.423287  543705 net.go:698] Add success.
I0319 14:47:13.452793  543705 event_worker.go:152] Polling the log file for events...
W0319 14:47:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:47:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 14:47:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:47:14.456784  543705 disk_worker.go:494] system disk:vda1
I0319 14:47:14.456825  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:47:14.457003  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:47:14.457012  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:47:14.457018  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:47:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 14:47:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:47:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:47:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:47:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:47:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:47:16.472338  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:47:19.617673  543705 disk_info.go:125] begin check local disk info of client
I0319 14:47:19.620016  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:47:19.620023  543705 disk_info.go:196] parse disk info done, disk is : [0xc000325800 0xc000325840]
E0319 14:47:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:47:23.409789  543705 memory.go:184] no items to output this cycle
I0319 14:47:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:47:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:47:33.409779  543705 memory.go:184] no items to output this cycle
I0319 14:47:33.409781  543705 cpu.go:275] no items to output this cycle
E0319 14:47:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:47:43.409793  543705 memory.go:191] Add success.
I0319 14:47:43.409796  543705 cpu.go:282] Add success.
I0319 14:47:43.419917  543705 net.go:648] Add success.
I0319 14:47:43.422777  543705 net.go:770] primary dev: ETH0
I0319 14:47:43.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:47:43.422806  543705 net.go:698] Add success.
I0319 14:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:47:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:47:53.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:47:53.409981  543705 cpu.go:275] no items to output this cycle
I0319 14:47:53.410040  543705 memory.go:184] no items to output this cycle
E0319 14:48:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:48:03.409776  543705 memory.go:184] no items to output this cycle
I0319 14:48:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 14:48:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:48:13.409794  543705 memory.go:191] Add success.
W0319 14:48:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:48:13.409827  543705 cpu.go:282] Add success.
W0319 14:48:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:48:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:48:13.420170  543705 net.go:648] Add success.
I0319 14:48:13.423158  543705 net.go:770] primary dev: ETH0
I0319 14:48:13.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:48:13.423188  543705 net.go:698] Add success.
I0319 14:48:13.538188  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b07ba48b-0a88-4b7e-b013-52f472b35bd5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:48:13.538222  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:48:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:48:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:48:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 14:48:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:48:14.456739  543705 disk_worker.go:494] system disk:vda1
I0319 14:48:14.456772  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:48:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:48:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:48:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:48:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:48:19.620749  543705 disk_info.go:125] begin check local disk info of client
I0319 14:48:19.623177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:48:19.623183  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328800 0xc000328840]
E0319 14:48:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:48:23.409796  543705 memory.go:184] no items to output this cycle
I0319 14:48:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:48:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:48:33.409779  543705 memory.go:184] no items to output this cycle
I0319 14:48:33.409782  543705 cpu.go:275] no items to output this cycle
I0319 14:48:37.741733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:48:37.741741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:48:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:48:43.410607  543705 memory.go:191] Add success.
I0319 14:48:43.409797  543705 cpu.go:282] Add success.
I0319 14:48:43.420341  543705 net.go:648] Add success.
I0319 14:48:43.423056  543705 net.go:770] primary dev: ETH0
I0319 14:48:43.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:48:43.423082  543705 net.go:698] Add success.
I0319 14:48:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:48:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:48:53.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:48:53.409879  543705 memory.go:184] no items to output this cycle
I0319 14:48:53.410016  543705 cpu.go:275] no items to output this cycle
E0319 14:49:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:49:03.409808  543705 memory.go:184] no items to output this cycle
I0319 14:49:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 14:49:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:49:13.409823  543705 memory.go:191] Add success.
I0319 14:49:13.409834  543705 cpu.go:282] Add success.
W0319 14:49:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:49:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:49:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:49:13.420176  543705 net.go:648] Add success.
I0319 14:49:13.423230  543705 net.go:770] primary dev: ETH0
I0319 14:49:13.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:49:13.423255  543705 net.go:698] Add success.
I0319 14:49:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:49:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:49:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0319 14:49:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:49:14.456511  543705 disk_worker.go:494] system disk:vda1
I0319 14:49:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:49:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:49:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:49:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:49:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:49:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:49:19.623761  543705 disk_info.go:125] begin check local disk info of client
I0319 14:49:19.626176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:49:19.626183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7cc0 0xc0003b7d00]
E0319 14:49:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:49:23.409790  543705 memory.go:184] no items to output this cycle
I0319 14:49:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 14:49:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:49:33.409776  543705 memory.go:184] no items to output this cycle
I0319 14:49:33.409782  543705 cpu.go:275] no items to output this cycle
E0319 14:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:49:43.409803  543705 memory.go:191] Add success.
I0319 14:49:43.409803  543705 cpu.go:282] Add success.
I0319 14:49:43.419931  543705 net.go:648] Add success.
I0319 14:49:43.423019  543705 net.go:770] primary dev: ETH0
I0319 14:49:43.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:49:43.423050  543705 net.go:698] Add success.
I0319 14:49:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:49:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:49:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:49:53.409904  543705 cpu.go:275] no items to output this cycle
E0319 14:49:53.409917  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:49:53.409961  543705 memory.go:184] no items to output this cycle
E0319 14:50:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:50:03.409766  543705 memory.go:184] no items to output this cycle
I0319 14:50:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 14:50:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:50:13.409805  543705 memory.go:191] Add success.
I0319 14:50:13.409811  543705 cpu.go:282] Add success.
W0319 14:50:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:50:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:50:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:50:13.420219  543705 net.go:648] Add success.
I0319 14:50:13.423092  543705 net.go:770] primary dev: ETH0
I0319 14:50:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:50:13.423117  543705 net.go:698] Add success.
I0319 14:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:50:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:50:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 14:50:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:50:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 14:50:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:50:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:50:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:50:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:50:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:50:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:50:19.626773  543705 disk_info.go:125] begin check local disk info of client
I0319 14:50:19.629186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:50:19.629193  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252cc0 0xc000252d00]
E0319 14:50:23.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:50:23.410423  543705 memory.go:184] no items to output this cycle
I0319 14:50:23.410446  543705 cpu.go:275] no items to output this cycle
E0319 14:50:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:50:33.409773  543705 memory.go:184] no items to output this cycle
I0319 14:50:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 14:50:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:50:43.409807  543705 memory.go:191] Add success.
I0319 14:50:43.409808  543705 cpu.go:282] Add success.
I0319 14:50:43.420041  543705 net.go:648] Add success.
I0319 14:50:43.422643  543705 net.go:770] primary dev: ETH0
I0319 14:50:43.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:50:43.422669  543705 net.go:698] Add success.
I0319 14:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:50:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:50:53.409806  543705 memory.go:184] no items to output this cycle
I0319 14:50:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 14:51:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:51:03.409809  543705 memory.go:184] no items to output this cycle
I0319 14:51:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 14:51:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:51:13.409806  543705 memory.go:191] Add success.
I0319 14:51:13.409807  543705 cpu.go:282] Add success.
W0319 14:51:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:51:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:51:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:51:13.420230  543705 net.go:648] Add success.
I0319 14:51:13.422826  543705 net.go:770] primary dev: ETH0
I0319 14:51:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:51:13.422855  543705 net.go:698] Add success.
I0319 14:51:13.569682  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0b1ceefb-e043-4755-bf79-0def63b161bf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:51:13.569716  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:51:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:51:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:51:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 14:51:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:51:14.456504  543705 disk_worker.go:494] system disk:vda1
I0319 14:51:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:51:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:51:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:51:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:51:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:51:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:51:19.629672  543705 disk_info.go:125] begin check local disk info of client
I0319 14:51:19.632056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:51:19.632063  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6740 0xc0003b6780]
E0319 14:51:23.410402  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:51:23.410416  543705 memory.go:184] no items to output this cycle
I0319 14:51:23.410449  543705 cpu.go:275] no items to output this cycle
E0319 14:51:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:51:33.409765  543705 memory.go:184] no items to output this cycle
I0319 14:51:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 14:51:37.743415  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:51:37.743420  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:51:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:51:43.410662  543705 memory.go:191] Add success.
I0319 14:51:43.409828  543705 cpu.go:282] Add success.
I0319 14:51:43.420378  543705 net.go:648] Add success.
I0319 14:51:43.423102  543705 net.go:770] primary dev: ETH0
I0319 14:51:43.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:51:43.423128  543705 net.go:698] Add success.
I0319 14:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:51:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:51:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:51:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:51:53.409803  543705 memory.go:184] no items to output this cycle
I0319 14:51:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 14:52:03.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:52:03.409895  543705 memory.go:184] no items to output this cycle
I0319 14:52:03.409964  543705 cpu.go:275] no items to output this cycle
E0319 14:52:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:52:13.409796  543705 memory.go:191] Add success.
I0319 14:52:13.409807  543705 cpu.go:282] Add success.
W0319 14:52:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:52:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:52:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:52:13.420052  543705 net.go:648] Add success.
I0319 14:52:13.422889  543705 net.go:770] primary dev: ETH0
I0319 14:52:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:52:13.422913  543705 net.go:698] Add success.
W0319 14:52:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:52:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 14:52:14.455156  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:52:14.456937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:52:14.456946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:52:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:52:14.457001  543705 disk_worker.go:494] system disk:vda1
I0319 14:52:14.457043  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:52:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:52:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:52:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:52:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:52:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:52:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:52:16.472343  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:52:19.632668  543705 disk_info.go:125] begin check local disk info of client
I0319 14:52:19.635111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:52:19.635118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003563c0 0xc000356400]
E0319 14:52:23.410392  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:52:23.410409  543705 memory.go:184] no items to output this cycle
I0319 14:52:23.410420  543705 cpu.go:275] no items to output this cycle
E0319 14:52:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:52:33.409773  543705 memory.go:184] no items to output this cycle
I0319 14:52:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 14:52:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:52:43.409829  543705 memory.go:191] Add success.
I0319 14:52:43.409838  543705 cpu.go:282] Add success.
I0319 14:52:43.419925  543705 net.go:648] Add success.
I0319 14:52:43.422568  543705 net.go:770] primary dev: ETH0
I0319 14:52:43.422583  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:52:43.422598  543705 net.go:698] Add success.
I0319 14:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:52:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:52:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:52:53.409785  543705 memory.go:184] no items to output this cycle
I0319 14:52:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 14:53:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:53:03.409790  543705 memory.go:184] no items to output this cycle
I0319 14:53:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 14:53:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:53:13.409840  543705 memory.go:191] Add success.
I0319 14:53:13.409847  543705 cpu.go:282] Add success.
W0319 14:53:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:53:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:53:13.409894  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:53:13.420201  543705 net.go:648] Add success.
I0319 14:53:13.422846  543705 net.go:770] primary dev: ETH0
I0319 14:53:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:53:13.422871  543705 net.go:698] Add success.
I0319 14:53:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:53:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:53:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 14:53:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:53:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 14:53:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:53:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:53:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:53:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:53:19.635198  543705 disk_info.go:125] begin check local disk info of client
I0319 14:53:19.637635  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:53:19.637641  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357440 0xc000357480]
E0319 14:53:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:53:23.409771  543705 memory.go:184] no items to output this cycle
I0319 14:53:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 14:53:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:53:33.409788  543705 memory.go:184] no items to output this cycle
I0319 14:53:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 14:53:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:53:43.409811  543705 memory.go:191] Add success.
I0319 14:53:43.409813  543705 cpu.go:282] Add success.
I0319 14:53:43.420005  543705 net.go:648] Add success.
I0319 14:53:43.422796  543705 net.go:770] primary dev: ETH0
I0319 14:53:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:53:43.422827  543705 net.go:698] Add success.
I0319 14:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:53:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:53:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:53:53.409790  543705 cpu.go:275] no items to output this cycle
I0319 14:53:53.409792  543705 memory.go:184] no items to output this cycle
E0319 14:54:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:54:03.409770  543705 memory.go:184] no items to output this cycle
I0319 14:54:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:54:13.409985  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:54:13.410016  543705 memory.go:191] Add success.
W0319 14:54:13.410051  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:54:13.410073  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:54:13.410077  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:54:13.409984  543705 cpu.go:282] Add success.
I0319 14:54:13.419723  543705 net.go:648] Add success.
I0319 14:54:13.422413  543705 net.go:770] primary dev: ETH0
I0319 14:54:13.422425  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:54:13.422437  543705 net.go:698] Add success.
I0319 14:54:13.471571  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2732c29c-7950-49fa-aa21-c29b25140623","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:54:13.471613  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 14:54:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:54:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:54:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 14:54:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:54:14.456691  543705 disk_worker.go:494] system disk:vda1
I0319 14:54:14.456725  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:54:15.455612  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:54:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:54:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:54:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:54:19.637737  543705 disk_info.go:125] begin check local disk info of client
I0319 14:54:19.640198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:54:19.640205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0319 14:54:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:54:23.409763  543705 memory.go:184] no items to output this cycle
I0319 14:54:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 14:54:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:54:33.409800  543705 memory.go:184] no items to output this cycle
I0319 14:54:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 14:54:37.743566  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:54:37.743574  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:54:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:54:43.410801  543705 memory.go:191] Add success.
I0319 14:54:43.409806  543705 cpu.go:282] Add success.
I0319 14:54:43.420564  543705 net.go:648] Add success.
I0319 14:54:43.423362  543705 net.go:770] primary dev: ETH0
I0319 14:54:43.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:54:43.423389  543705 net.go:698] Add success.
I0319 14:54:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:54:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:54:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:54:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:54:53.409777  543705 cpu.go:275] no items to output this cycle
I0319 14:54:53.409778  543705 memory.go:184] no items to output this cycle
E0319 14:55:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:55:03.409802  543705 memory.go:184] no items to output this cycle
I0319 14:55:03.409821  543705 cpu.go:275] no items to output this cycle
W0319 14:55:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:55:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:55:13.409747  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:55:13.409810  543705 cpu.go:282] Add success.
E0319 14:55:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:55:13.409865  543705 memory.go:191] Add success.
I0319 14:55:13.420070  543705 net.go:648] Add success.
I0319 14:55:13.423142  543705 net.go:770] primary dev: ETH0
I0319 14:55:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:55:13.423173  543705 net.go:698] Add success.
I0319 14:55:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:55:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:55:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 14:55:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:55:14.456569  543705 disk_worker.go:494] system disk:vda1
I0319 14:55:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:55:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:55:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:55:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:55:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:55:19.640905  543705 disk_info.go:125] begin check local disk info of client
I0319 14:55:19.643356  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:55:19.643364  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f40 0xc000356000]
E0319 14:55:23.410263  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:55:23.410281  543705 memory.go:184] no items to output this cycle
I0319 14:55:23.410289  543705 cpu.go:275] no items to output this cycle
E0319 14:55:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:55:33.409791  543705 memory.go:184] no items to output this cycle
I0319 14:55:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 14:55:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:55:43.409789  543705 memory.go:191] Add success.
I0319 14:55:43.409808  543705 cpu.go:282] Add success.
I0319 14:55:43.420012  543705 net.go:648] Add success.
I0319 14:55:43.422874  543705 net.go:770] primary dev: ETH0
I0319 14:55:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:55:43.422899  543705 net.go:698] Add success.
I0319 14:55:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:55:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:55:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:55:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:55:53.409797  543705 memory.go:184] no items to output this cycle
I0319 14:55:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 14:56:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:56:03.409766  543705 memory.go:184] no items to output this cycle
I0319 14:56:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 14:56:13.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:56:13.409894  543705 memory.go:191] Add success.
W0319 14:56:13.409928  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:56:13.409944  543705 cpu.go:282] Add success.
W0319 14:56:13.409991  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:56:13.409996  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:56:13.419709  543705 net.go:648] Add success.
I0319 14:56:13.422724  543705 net.go:770] primary dev: ETH0
I0319 14:56:13.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:56:13.422751  543705 net.go:698] Add success.
I0319 14:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:56:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:56:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0319 14:56:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:56:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 14:56:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:56:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:56:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:56:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:56:16.472434  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:56:19.643444  543705 disk_info.go:125] begin check local disk info of client
I0319 14:56:19.645901  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:56:19.645908  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004db300 0xc0004db340]
E0319 14:56:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:56:23.409760  543705 memory.go:184] no items to output this cycle
I0319 14:56:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:56:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:56:33.409794  543705 memory.go:184] no items to output this cycle
I0319 14:56:33.409807  543705 cpu.go:275] no items to output this cycle
E0319 14:56:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:56:43.409792  543705 memory.go:191] Add success.
I0319 14:56:43.409799  543705 cpu.go:282] Add success.
I0319 14:56:43.419895  543705 net.go:648] Add success.
I0319 14:56:43.422650  543705 net.go:770] primary dev: ETH0
I0319 14:56:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:56:43.422679  543705 net.go:698] Add success.
I0319 14:56:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:56:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:56:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:56:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:56:53.409765  543705 memory.go:184] no items to output this cycle
I0319 14:56:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 14:57:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:57:03.409798  543705 memory.go:184] no items to output this cycle
I0319 14:57:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:57:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:57:13.409826  543705 memory.go:191] Add success.
I0319 14:57:13.409833  543705 cpu.go:282] Add success.
W0319 14:57:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:57:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:57:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:57:13.420174  543705 net.go:648] Add success.
I0319 14:57:13.422969  543705 net.go:770] primary dev: ETH0
I0319 14:57:13.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:57:13.422996  543705 net.go:698] Add success.
I0319 14:57:13.429408  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 14:57:13.453580  543705 event_worker.go:152] Polling the log file for events...
I0319 14:57:13.517413  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ceafe8b-2017-4be6-8209-0a9e91bfaa01","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 14:57:13.517446  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 14:57:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:57:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 14:57:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0319 14:57:14.455897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 14:57:14.455906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 14:57:14.455911  543705 custom_config.go:64] query custom config with name: gpu
I0319 14:57:14.456535  543705 disk_worker.go:494] system disk:vda1
I0319 14:57:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 14:57:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 14:57:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:57:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 14:57:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 14:57:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:57:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:57:16.472335  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:57:19.646869  543705 disk_info.go:125] begin check local disk info of client
I0319 14:57:19.649200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:57:19.649206  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329e40 0xc000329e80]
E0319 14:57:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:57:23.409798  543705 memory.go:184] no items to output this cycle
I0319 14:57:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 14:57:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:57:33.409784  543705 cpu.go:275] no items to output this cycle
I0319 14:57:33.409789  543705 memory.go:184] no items to output this cycle
I0319 14:57:37.744433  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 14:57:37.744439  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 14:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:57:43.410702  543705 memory.go:191] Add success.
I0319 14:57:43.409795  543705 cpu.go:282] Add success.
I0319 14:57:43.420393  543705 net.go:648] Add success.
I0319 14:57:43.422962  543705 net.go:770] primary dev: ETH0
I0319 14:57:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:57:43.422989  543705 net.go:698] Add success.
I0319 14:57:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:57:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:57:53.410348  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:57:53.410365  543705 memory.go:184] no items to output this cycle
I0319 14:57:53.410397  543705 cpu.go:275] no items to output this cycle
E0319 14:58:03.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:58:03.409862  543705 memory.go:184] no items to output this cycle
I0319 14:58:03.410014  543705 cpu.go:275] no items to output this cycle
E0319 14:58:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:58:13.409793  543705 memory.go:191] Add success.
W0319 14:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 14:58:13.409822  543705 cpu.go:282] Add success.
W0319 14:58:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:58:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:58:13.420146  543705 net.go:770] primary dev: ETH0
I0319 14:58:13.420160  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:58:13.420193  543705 net.go:698] Add success.
I0319 14:58:13.420543  543705 net.go:648] Add success.
I0319 14:58:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:58:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:58:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 14:58:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:58:14.456570  543705 disk_worker.go:494] system disk:vda1
I0319 14:58:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:58:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:58:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:58:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:58:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:58:19.649675  543705 disk_info.go:125] begin check local disk info of client
I0319 14:58:19.652018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:58:19.652024  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa240 0xc0001aa280]
E0319 14:58:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:58:23.409795  543705 memory.go:184] no items to output this cycle
I0319 14:58:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 14:58:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:58:33.409775  543705 memory.go:184] no items to output this cycle
I0319 14:58:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 14:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:58:43.409800  543705 memory.go:191] Add success.
I0319 14:58:43.409806  543705 cpu.go:282] Add success.
I0319 14:58:43.419883  543705 net.go:648] Add success.
I0319 14:58:43.422691  543705 net.go:770] primary dev: ETH0
I0319 14:58:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:58:43.422720  543705 net.go:698] Add success.
I0319 14:58:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:58:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:58:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:58:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:58:53.409782  543705 memory.go:184] no items to output this cycle
I0319 14:58:53.409783  543705 cpu.go:275] no items to output this cycle
I0319 14:59:03.409902  543705 cpu.go:275] no items to output this cycle
E0319 14:59:03.409934  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:59:03.409969  543705 memory.go:184] no items to output this cycle
E0319 14:59:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:59:13.409799  543705 memory.go:191] Add success.
I0319 14:59:13.409810  543705 cpu.go:282] Add success.
W0319 14:59:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 14:59:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 14:59:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 14:59:13.420141  543705 net.go:648] Add success.
I0319 14:59:13.422759  543705 net.go:770] primary dev: ETH0
I0319 14:59:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:59:13.422784  543705 net.go:698] Add success.
I0319 14:59:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 14:59:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 14:59:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 14:59:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 14:59:14.456576  543705 disk_worker.go:494] system disk:vda1
I0319 14:59:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 14:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 14:59:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:59:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:59:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 14:59:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 14:59:19.652109  543705 disk_info.go:125] begin check local disk info of client
I0319 14:59:19.654534  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 14:59:19.654540  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465680 0xc0004656c0]
E0319 14:59:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:59:23.409793  543705 memory.go:184] no items to output this cycle
I0319 14:59:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 14:59:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:59:33.409774  543705 memory.go:184] no items to output this cycle
I0319 14:59:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 14:59:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:59:43.409830  543705 memory.go:191] Add success.
I0319 14:59:43.409832  543705 cpu.go:282] Add success.
I0319 14:59:43.419982  543705 net.go:648] Add success.
I0319 14:59:43.422821  543705 net.go:770] primary dev: ETH0
I0319 14:59:43.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0319 14:59:43.422847  543705 net.go:698] Add success.
I0319 14:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 14:59:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 14:59:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 14:59:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 14:59:53.409764  543705 memory.go:184] no items to output this cycle
I0319 14:59:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:00:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:00:03.409903  543705 cpu.go:275] no items to output this cycle
I0319 15:00:03.409907  543705 memory.go:184] no items to output this cycle
E0319 15:00:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:00:13.409827  543705 memory.go:191] Add success.
I0319 15:00:13.409831  543705 cpu.go:282] Add success.
W0319 15:00:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:00:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:00:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:00:13.420210  543705 net.go:648] Add success.
I0319 15:00:13.423203  543705 net.go:770] primary dev: ETH0
I0319 15:00:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:00:13.423234  543705 net.go:698] Add success.
I0319 15:00:13.547107  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dca45700-ffeb-4fff-ac84-efe033023136","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:00:13.547140  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:00:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:00:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:00:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 15:00:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:00:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 15:00:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:00:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:00:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:00:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:00:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:00:19.654910  543705 disk_info.go:125] begin check local disk info of client
I0319 15:00:19.657316  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:00:19.657321  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004adcc0 0xc0004add00]
E0319 15:00:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:00:23.409786  543705 memory.go:184] no items to output this cycle
I0319 15:00:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 15:00:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:00:33.409792  543705 memory.go:184] no items to output this cycle
I0319 15:00:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 15:00:37.744584  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:00:37.744592  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:00:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:00:43.410816  543705 memory.go:191] Add success.
I0319 15:00:43.409821  543705 cpu.go:282] Add success.
I0319 15:00:43.420521  543705 net.go:648] Add success.
I0319 15:00:43.423422  543705 net.go:770] primary dev: ETH0
I0319 15:00:43.423438  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:00:43.423453  543705 net.go:698] Add success.
I0319 15:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:00:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:00:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:00:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:00:53.409797  543705 memory.go:184] no items to output this cycle
I0319 15:00:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:01:03.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:01:03.409897  543705 memory.go:184] no items to output this cycle
I0319 15:01:03.409948  543705 cpu.go:275] no items to output this cycle
E0319 15:01:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:01:13.409830  543705 memory.go:191] Add success.
I0319 15:01:13.409843  543705 cpu.go:282] Add success.
W0319 15:01:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:01:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:01:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:01:13.420312  543705 net.go:648] Add success.
I0319 15:01:13.422926  543705 net.go:770] primary dev: ETH0
I0319 15:01:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:01:13.422952  543705 net.go:698] Add success.
I0319 15:01:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:01:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:01:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 15:01:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:01:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 15:01:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:01:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:01:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:01:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:01:19.657672  543705 disk_info.go:125] begin check local disk info of client
I0319 15:01:19.660059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:01:19.660064  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab8c0 0xc0001ab900]
E0319 15:01:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:01:23.409797  543705 memory.go:184] no items to output this cycle
I0319 15:01:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:01:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:01:33.409790  543705 cpu.go:275] no items to output this cycle
I0319 15:01:33.409793  543705 memory.go:184] no items to output this cycle
E0319 15:01:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:01:43.409794  543705 memory.go:191] Add success.
I0319 15:01:43.409794  543705 cpu.go:282] Add success.
I0319 15:01:43.419859  543705 net.go:648] Add success.
I0319 15:01:43.422865  543705 net.go:770] primary dev: ETH0
I0319 15:01:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:01:43.422890  543705 net.go:698] Add success.
I0319 15:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:01:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:01:53.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:01:53.410254  543705 memory.go:184] no items to output this cycle
I0319 15:01:53.410280  543705 cpu.go:275] no items to output this cycle
E0319 15:02:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:02:03.409797  543705 memory.go:184] no items to output this cycle
I0319 15:02:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 15:02:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:02:13.409828  543705 memory.go:191] Add success.
I0319 15:02:13.409835  543705 cpu.go:282] Add success.
W0319 15:02:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:02:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:02:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:02:13.420154  543705 net.go:648] Add success.
I0319 15:02:13.423093  543705 net.go:770] primary dev: ETH0
I0319 15:02:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:02:13.423118  543705 net.go:698] Add success.
W0319 15:02:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:02:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 15:02:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:02:14.455951  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:02:14.455960  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:02:14.455966  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:02:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 15:02:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:02:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:02:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:02:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:02:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:02:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:02:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:02:16.472323  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:02:19.660943  543705 disk_info.go:125] begin check local disk info of client
I0319 15:02:19.663305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:02:19.663311  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf00 0xc0001aaf40]
E0319 15:02:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:02:23.409782  543705 memory.go:184] no items to output this cycle
I0319 15:02:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 15:02:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:02:33.409782  543705 memory.go:184] no items to output this cycle
I0319 15:02:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 15:02:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:02:43.409825  543705 memory.go:191] Add success.
I0319 15:02:43.409835  543705 cpu.go:282] Add success.
I0319 15:02:43.420019  543705 net.go:648] Add success.
I0319 15:02:43.422589  543705 net.go:770] primary dev: ETH0
I0319 15:02:43.422610  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:02:43.422622  543705 net.go:698] Add success.
I0319 15:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:02:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:02:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:02:53.409786  543705 cpu.go:275] no items to output this cycle
I0319 15:02:53.409793  543705 memory.go:184] no items to output this cycle
E0319 15:03:03.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:03:03.409901  543705 cpu.go:275] no items to output this cycle
I0319 15:03:03.409904  543705 memory.go:184] no items to output this cycle
E0319 15:03:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:03:13.409811  543705 memory.go:191] Add success.
I0319 15:03:13.409812  543705 cpu.go:282] Add success.
W0319 15:03:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:03:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:03:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:03:13.420156  543705 net.go:648] Add success.
I0319 15:03:13.423032  543705 net.go:770] primary dev: ETH0
I0319 15:03:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:03:13.423058  543705 net.go:698] Add success.
I0319 15:03:13.468733  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29cba4b6-19cd-4c90-8c97-8b92ffc10a5d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:03:13.468767  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:03:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:03:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:03:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 15:03:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:03:14.456490  543705 disk_worker.go:494] system disk:vda1
I0319 15:03:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:03:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:03:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:03:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:03:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:03:19.663392  543705 disk_info.go:125] begin check local disk info of client
I0319 15:03:19.665818  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:03:19.665824  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af40 0xc00007af80]
E0319 15:03:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:03:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:03:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 15:03:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:03:33.409783  543705 memory.go:184] no items to output this cycle
I0319 15:03:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 15:03:37.745427  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:03:37.745434  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:03:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:03:43.410611  543705 memory.go:191] Add success.
I0319 15:03:43.409817  543705 cpu.go:282] Add success.
I0319 15:03:43.420330  543705 net.go:648] Add success.
I0319 15:03:43.423053  543705 net.go:770] primary dev: ETH0
I0319 15:03:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:03:43.423078  543705 net.go:698] Add success.
I0319 15:03:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:03:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:03:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:03:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:03:53.409805  543705 memory.go:184] no items to output this cycle
I0319 15:03:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 15:04:03.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:04:03.409907  543705 memory.go:184] no items to output this cycle
I0319 15:04:03.409954  543705 cpu.go:275] no items to output this cycle
E0319 15:04:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:04:13.409805  543705 memory.go:191] Add success.
I0319 15:04:13.409813  543705 cpu.go:282] Add success.
W0319 15:04:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:04:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:04:13.420172  543705 net.go:648] Add success.
I0319 15:04:13.422824  543705 net.go:770] primary dev: ETH0
I0319 15:04:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:04:13.422850  543705 net.go:698] Add success.
I0319 15:04:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:04:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:04:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 15:04:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:04:14.456588  543705 disk_worker.go:494] system disk:vda1
I0319 15:04:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:04:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:04:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:04:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:04:19.666966  543705 disk_info.go:125] begin check local disk info of client
I0319 15:04:19.669305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:04:19.669310  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab300 0xc0001ab400]
E0319 15:04:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:04:23.409785  543705 memory.go:184] no items to output this cycle
I0319 15:04:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:04:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:04:33.409783  543705 memory.go:184] no items to output this cycle
I0319 15:04:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 15:04:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:04:43.409793  543705 memory.go:191] Add success.
I0319 15:04:43.409795  543705 cpu.go:282] Add success.
I0319 15:04:43.419975  543705 net.go:648] Add success.
I0319 15:04:43.422826  543705 net.go:770] primary dev: ETH0
I0319 15:04:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:04:43.422855  543705 net.go:698] Add success.
I0319 15:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:04:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:04:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:04:53.409792  543705 memory.go:184] no items to output this cycle
I0319 15:04:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 15:05:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:05:03.409806  543705 memory.go:184] no items to output this cycle
I0319 15:05:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 15:05:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:05:13.409825  543705 memory.go:191] Add success.
I0319 15:05:13.409827  543705 cpu.go:282] Add success.
W0319 15:05:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:05:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:05:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:05:13.420488  543705 net.go:648] Add success.
I0319 15:05:13.423130  543705 net.go:770] primary dev: ETH0
I0319 15:05:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:05:13.423159  543705 net.go:698] Add success.
I0319 15:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:05:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:05:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 15:05:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:05:14.456574  543705 disk_worker.go:494] system disk:vda1
I0319 15:05:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:05:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:05:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:05:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:05:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:05:19.669672  543705 disk_info.go:125] begin check local disk info of client
I0319 15:05:19.672059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:05:19.672067  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf40 0xc0000c4100]
E0319 15:05:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:05:23.409787  543705 memory.go:184] no items to output this cycle
I0319 15:05:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:05:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:05:33.409784  543705 memory.go:184] no items to output this cycle
I0319 15:05:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 15:05:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:05:43.409814  543705 memory.go:191] Add success.
I0319 15:05:43.409821  543705 cpu.go:282] Add success.
I0319 15:05:43.419945  543705 net.go:648] Add success.
I0319 15:05:43.422944  543705 net.go:770] primary dev: ETH0
I0319 15:05:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:05:43.422971  543705 net.go:698] Add success.
I0319 15:05:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:05:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:05:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:05:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:05:53.409772  543705 memory.go:184] no items to output this cycle
I0319 15:05:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 15:06:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:06:03.409805  543705 memory.go:184] no items to output this cycle
I0319 15:06:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 15:06:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:06:13.409797  543705 memory.go:191] Add success.
W0319 15:06:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:06:13.409827  543705 cpu.go:282] Add success.
W0319 15:06:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:06:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:06:13.420165  543705 net.go:648] Add success.
I0319 15:06:13.423184  543705 net.go:770] primary dev: ETH0
I0319 15:06:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:06:13.423212  543705 net.go:698] Add success.
I0319 15:06:13.468319  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"26d5ca81-87f5-41cc-8f68-7ac773a5f75d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:06:13.468353  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:06:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:06:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:06:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 15:06:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:06:14.456572  543705 disk_worker.go:494] system disk:vda1
I0319 15:06:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:06:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:06:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:06:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:06:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:06:19.673004  543705 disk_info.go:125] begin check local disk info of client
I0319 15:06:19.675556  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:06:19.675562  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd40 0xc0001abd80]
E0319 15:06:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:06:23.409775  543705 memory.go:184] no items to output this cycle
I0319 15:06:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 15:06:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:06:33.409774  543705 memory.go:184] no items to output this cycle
I0319 15:06:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 15:06:37.745739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:06:37.745747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:06:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:06:43.410752  543705 memory.go:191] Add success.
I0319 15:06:43.409800  543705 cpu.go:282] Add success.
I0319 15:06:43.420438  543705 net.go:648] Add success.
I0319 15:06:43.423371  543705 net.go:770] primary dev: ETH0
I0319 15:06:43.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:06:43.423397  543705 net.go:698] Add success.
I0319 15:06:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:06:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:06:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:06:53.409784  543705 cpu.go:275] no items to output this cycle
I0319 15:06:53.409793  543705 memory.go:184] no items to output this cycle
E0319 15:07:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:07:03.409783  543705 memory.go:184] no items to output this cycle
I0319 15:07:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 15:07:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:07:13.409786  543705 memory.go:191] Add success.
I0319 15:07:13.409795  543705 cpu.go:282] Add success.
W0319 15:07:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:07:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:07:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:07:13.420184  543705 net.go:648] Add success.
I0319 15:07:13.421180  543705 net.go:770] primary dev: ETH0
I0319 15:07:13.421194  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:07:13.421220  543705 net.go:698] Add success.
I0319 15:07:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0319 15:07:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:07:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 15:07:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:07:14.456805  543705 disk_worker.go:494] system disk:vda1
I0319 15:07:14.456846  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:07:14.457135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:07:14.457143  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:07:14.457148  543705 custom_config.go:64] query custom config with name: gpu
E0319 15:07:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:07:15.456884  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:07:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:07:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:07:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:07:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:07:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:07:19.676018  543705 disk_info.go:125] begin check local disk info of client
I0319 15:07:19.678414  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:07:19.678420  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6300 0xc0003b6340]
E0319 15:07:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:07:23.409796  543705 memory.go:184] no items to output this cycle
I0319 15:07:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 15:07:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:07:33.409779  543705 memory.go:184] no items to output this cycle
I0319 15:07:33.409779  543705 cpu.go:275] no items to output this cycle
E0319 15:07:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:07:43.409794  543705 memory.go:191] Add success.
I0319 15:07:43.409797  543705 cpu.go:282] Add success.
I0319 15:07:43.419881  543705 net.go:648] Add success.
I0319 15:07:43.422536  543705 net.go:770] primary dev: ETH0
I0319 15:07:43.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:07:43.422564  543705 net.go:698] Add success.
I0319 15:07:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:07:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:07:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:07:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:07:53.409793  543705 memory.go:184] no items to output this cycle
I0319 15:07:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 15:08:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:08:03.409769  543705 memory.go:184] no items to output this cycle
I0319 15:08:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 15:08:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:08:13.409794  543705 memory.go:191] Add success.
I0319 15:08:13.409812  543705 cpu.go:282] Add success.
W0319 15:08:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:08:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:08:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:08:13.420226  543705 net.go:648] Add success.
I0319 15:08:13.423226  543705 net.go:770] primary dev: ETH0
I0319 15:08:13.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:08:13.423256  543705 net.go:698] Add success.
I0319 15:08:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:08:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:08:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 15:08:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:08:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 15:08:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:08:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:08:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:08:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:08:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:08:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:08:19.679089  543705 disk_info.go:125] begin check local disk info of client
I0319 15:08:19.681531  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:08:19.681537  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9580 0xc0003b95c0]
E0319 15:08:23.410228  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:08:23.410249  543705 memory.go:184] no items to output this cycle
I0319 15:08:23.410264  543705 cpu.go:275] no items to output this cycle
E0319 15:08:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:08:33.409775  543705 memory.go:184] no items to output this cycle
I0319 15:08:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 15:08:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:08:43.409822  543705 memory.go:191] Add success.
I0319 15:08:43.409828  543705 cpu.go:282] Add success.
I0319 15:08:43.419885  543705 net.go:648] Add success.
I0319 15:08:43.422698  543705 net.go:770] primary dev: ETH0
I0319 15:08:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:08:43.422728  543705 net.go:698] Add success.
I0319 15:08:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:08:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:08:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:08:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:08:53.409793  543705 memory.go:184] no items to output this cycle
I0319 15:08:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:09:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:09:03.409770  543705 memory.go:184] no items to output this cycle
I0319 15:09:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 15:09:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:09:13.409827  543705 memory.go:191] Add success.
I0319 15:09:13.409834  543705 cpu.go:282] Add success.
W0319 15:09:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:09:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:09:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:09:13.420130  543705 net.go:648] Add success.
I0319 15:09:13.422876  543705 net.go:770] primary dev: ETH0
I0319 15:09:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:09:13.422901  543705 net.go:698] Add success.
I0319 15:09:13.469415  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eccc735f-e79b-45e3-b1c8-f1e47ea598f7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:09:13.469459  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:09:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:09:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:09:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 15:09:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:09:14.456591  543705 disk_worker.go:494] system disk:vda1
I0319 15:09:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:09:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:09:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:09:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:09:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:09:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:09:19.681671  543705 disk_info.go:125] begin check local disk info of client
I0319 15:09:19.684069  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:09:19.684075  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047bac0 0xc00047bb00]
E0319 15:09:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:09:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:09:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 15:09:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:09:33.409784  543705 memory.go:184] no items to output this cycle
I0319 15:09:33.409785  543705 cpu.go:275] no items to output this cycle
I0319 15:09:37.747451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:09:37.747458  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:09:43.410743  543705 memory.go:191] Add success.
I0319 15:09:43.409811  543705 cpu.go:282] Add success.
I0319 15:09:43.420518  543705 net.go:648] Add success.
I0319 15:09:43.423411  543705 net.go:770] primary dev: ETH0
I0319 15:09:43.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:09:43.423455  543705 net.go:698] Add success.
I0319 15:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:09:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:09:53.409771  543705 memory.go:184] no items to output this cycle
I0319 15:09:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 15:10:03.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:10:03.409885  543705 memory.go:184] no items to output this cycle
I0319 15:10:03.409959  543705 cpu.go:275] no items to output this cycle
E0319 15:10:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:10:13.409803  543705 memory.go:191] Add success.
I0319 15:10:13.409804  543705 cpu.go:282] Add success.
W0319 15:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:10:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:10:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:10:13.420182  543705 net.go:648] Add success.
I0319 15:10:13.423026  543705 net.go:770] primary dev: ETH0
I0319 15:10:13.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:10:13.423054  543705 net.go:698] Add success.
I0319 15:10:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:10:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:10:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 15:10:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:10:14.456582  543705 disk_worker.go:494] system disk:vda1
I0319 15:10:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:10:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:10:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:10:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:10:19.685125  543705 disk_info.go:125] begin check local disk info of client
I0319 15:10:19.687529  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:10:19.687534  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003288c0 0xc000328900]
E0319 15:10:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:10:23.409781  543705 memory.go:184] no items to output this cycle
I0319 15:10:23.409780  543705 cpu.go:275] no items to output this cycle
E0319 15:10:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:10:33.409774  543705 memory.go:184] no items to output this cycle
I0319 15:10:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:10:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:10:43.409824  543705 memory.go:191] Add success.
I0319 15:10:43.409833  543705 cpu.go:282] Add success.
I0319 15:10:43.420251  543705 net.go:648] Add success.
I0319 15:10:43.423092  543705 net.go:770] primary dev: ETH0
I0319 15:10:43.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:10:43.423123  543705 net.go:698] Add success.
I0319 15:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:10:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:10:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:10:53.409768  543705 memory.go:184] no items to output this cycle
I0319 15:10:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:11:03.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:11:03.409927  543705 memory.go:184] no items to output this cycle
I0319 15:11:03.409978  543705 cpu.go:275] no items to output this cycle
E0319 15:11:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:11:13.409785  543705 memory.go:191] Add success.
W0319 15:11:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:11:13.409816  543705 cpu.go:282] Add success.
W0319 15:11:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:11:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:11:13.420192  543705 net.go:648] Add success.
I0319 15:11:13.423181  543705 net.go:770] primary dev: ETH0
I0319 15:11:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:11:13.423210  543705 net.go:698] Add success.
I0319 15:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:11:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:11:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 15:11:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:11:14.456509  543705 disk_worker.go:494] system disk:vda1
I0319 15:11:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:11:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:11:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:11:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:11:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:11:19.687628  543705 disk_info.go:125] begin check local disk info of client
I0319 15:11:19.689908  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:11:19.689914  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ddc0 0xc00047de00]
E0319 15:11:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:11:23.409781  543705 cpu.go:275] no items to output this cycle
I0319 15:11:23.409782  543705 memory.go:184] no items to output this cycle
E0319 15:11:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:11:33.409764  543705 memory.go:184] no items to output this cycle
I0319 15:11:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 15:11:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:11:43.409800  543705 memory.go:191] Add success.
I0319 15:11:43.409811  543705 cpu.go:282] Add success.
I0319 15:11:43.419895  543705 net.go:648] Add success.
I0319 15:11:43.422663  543705 net.go:770] primary dev: ETH0
I0319 15:11:43.422682  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:11:43.422702  543705 net.go:698] Add success.
I0319 15:11:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:11:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:11:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:11:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:11:53.409764  543705 memory.go:184] no items to output this cycle
I0319 15:11:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:12:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:12:03.409798  543705 memory.go:184] no items to output this cycle
I0319 15:12:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 15:12:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:12:13.409812  543705 memory.go:191] Add success.
I0319 15:12:13.409814  543705 cpu.go:282] Add success.
W0319 15:12:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:12:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:12:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:12:13.420178  543705 net.go:648] Add success.
I0319 15:12:13.423064  543705 net.go:770] primary dev: ETH0
I0319 15:12:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:12:13.423089  543705 net.go:698] Add success.
I0319 15:12:13.464480  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"185cc2e3-2941-4e8c-8578-97797f7de880","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:12:13.464514  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 15:12:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:12:14.455248  543705 disk_worker.go:708] disk space is not compliant
W0319 15:12:14.455253  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:12:14.455927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:12:14.455937  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:12:14.455942  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:12:14.456850  543705 disk_worker.go:494] system disk:vda1
I0319 15:12:14.456907  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:12:15.456903  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:12:15.456913  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:12:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:12:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:12:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:12:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:12:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:12:19.689999  543705 disk_info.go:125] begin check local disk info of client
I0319 15:12:19.692409  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:12:19.692416  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab880 0xc0001ab8c0]
E0319 15:12:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:12:23.409800  543705 memory.go:184] no items to output this cycle
I0319 15:12:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 15:12:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:12:33.409781  543705 memory.go:184] no items to output this cycle
I0319 15:12:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 15:12:37.747612  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:12:37.747620  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:12:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:12:43.410779  543705 memory.go:191] Add success.
I0319 15:12:43.409840  543705 cpu.go:282] Add success.
I0319 15:12:43.420485  543705 net.go:648] Add success.
I0319 15:12:43.423382  543705 net.go:770] primary dev: ETH0
I0319 15:12:43.423399  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:12:43.423415  543705 net.go:698] Add success.
I0319 15:12:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:12:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:12:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:12:53.409770  543705 memory.go:184] no items to output this cycle
I0319 15:12:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:13:03.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:13:03.409893  543705 memory.go:184] no items to output this cycle
I0319 15:13:03.410010  543705 cpu.go:275] no items to output this cycle
E0319 15:13:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:13:13.409805  543705 memory.go:191] Add success.
I0319 15:13:13.409815  543705 cpu.go:282] Add success.
W0319 15:13:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:13:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:13:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:13:13.420206  543705 net.go:648] Add success.
I0319 15:13:13.422878  543705 net.go:770] primary dev: ETH0
I0319 15:13:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:13:13.422903  543705 net.go:698] Add success.
I0319 15:13:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:13:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:13:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 15:13:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:13:14.456481  543705 disk_worker.go:494] system disk:vda1
I0319 15:13:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:13:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:13:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:13:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:13:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:13:19.692505  543705 disk_info.go:125] begin check local disk info of client
I0319 15:13:19.695020  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:13:19.695026  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357240 0xc000357280]
E0319 15:13:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:13:23.409805  543705 memory.go:184] no items to output this cycle
I0319 15:13:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 15:13:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:13:33.409809  543705 memory.go:184] no items to output this cycle
I0319 15:13:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 15:13:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:13:43.409790  543705 memory.go:191] Add success.
I0319 15:13:43.409798  543705 cpu.go:282] Add success.
I0319 15:13:43.419866  543705 net.go:648] Add success.
I0319 15:13:43.422525  543705 net.go:770] primary dev: ETH0
I0319 15:13:43.422538  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:13:43.422559  543705 net.go:698] Add success.
I0319 15:13:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:13:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:13:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:13:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:13:53.409767  543705 memory.go:184] no items to output this cycle
I0319 15:13:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 15:14:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:14:03.409767  543705 memory.go:184] no items to output this cycle
I0319 15:14:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 15:14:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:14:13.409809  543705 memory.go:191] Add success.
I0319 15:14:13.409824  543705 cpu.go:282] Add success.
W0319 15:14:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:14:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:14:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:14:13.420163  543705 net.go:648] Add success.
I0319 15:14:13.422932  543705 net.go:770] primary dev: ETH0
I0319 15:14:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:14:13.422957  543705 net.go:698] Add success.
I0319 15:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:14:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:14:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 15:14:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:14:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 15:14:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:14:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:14:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:14:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:14:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:14:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:14:19.695115  543705 disk_info.go:125] begin check local disk info of client
I0319 15:14:19.697574  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:14:19.697581  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004adf00 0xc0004adf40]
E0319 15:14:23.410418  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:14:23.410434  543705 memory.go:184] no items to output this cycle
I0319 15:14:23.410436  543705 cpu.go:275] no items to output this cycle
E0319 15:14:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:14:33.409798  543705 memory.go:184] no items to output this cycle
I0319 15:14:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 15:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:14:43.409795  543705 memory.go:191] Add success.
I0319 15:14:43.409800  543705 cpu.go:282] Add success.
I0319 15:14:43.419967  543705 net.go:648] Add success.
I0319 15:14:43.422786  543705 net.go:770] primary dev: ETH0
I0319 15:14:43.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:14:43.422822  543705 net.go:698] Add success.
I0319 15:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:14:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:14:53.410392  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:14:53.410411  543705 memory.go:184] no items to output this cycle
I0319 15:14:53.410421  543705 cpu.go:275] no items to output this cycle
E0319 15:15:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:15:03.409766  543705 memory.go:184] no items to output this cycle
I0319 15:15:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 15:15:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:15:13.409920  543705 memory.go:191] Add success.
W0319 15:15:13.409967  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:15:13.410026  543705 cpu.go:282] Add success.
W0319 15:15:13.410097  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:15:13.410101  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:15:13.419757  543705 net.go:648] Add success.
I0319 15:15:13.422669  543705 net.go:770] primary dev: ETH0
I0319 15:15:13.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:15:13.422696  543705 net.go:698] Add success.
I0319 15:15:13.468629  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d6903129-a4de-4a3f-bac2-93fd9fc3d319","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:15:13.468662  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:15:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:15:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 15:15:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:15:14.456671  543705 disk_worker.go:494] system disk:vda1
I0319 15:15:14.456700  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:15:15.455611  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:15:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:15:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:15:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:15:16.472450  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:15:19.697671  543705 disk_info.go:125] begin check local disk info of client
I0319 15:15:19.700138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:15:19.700144  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328440 0xc000328480]
E0319 15:15:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:15:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:15:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 15:15:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:15:33.409800  543705 memory.go:184] no items to output this cycle
I0319 15:15:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 15:15:37.748431  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:15:37.748438  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:15:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:15:43.410871  543705 memory.go:191] Add success.
I0319 15:15:43.409861  543705 cpu.go:282] Add success.
I0319 15:15:43.420678  543705 net.go:648] Add success.
I0319 15:15:43.423622  543705 net.go:770] primary dev: ETH0
I0319 15:15:43.423637  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:15:43.423651  543705 net.go:698] Add success.
I0319 15:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:15:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:15:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:15:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:15:53.409797  543705 memory.go:184] no items to output this cycle
I0319 15:15:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 15:16:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:16:03.409772  543705 memory.go:184] no items to output this cycle
I0319 15:16:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 15:16:13.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:16:13.409897  543705 cpu.go:282] Add success.
I0319 15:16:13.409903  543705 memory.go:191] Add success.
W0319 15:16:13.409939  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:16:13.409957  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:16:13.409967  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:16:13.419713  543705 net.go:648] Add success.
I0319 15:16:13.422627  543705 net.go:770] primary dev: ETH0
I0319 15:16:13.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:16:13.422656  543705 net.go:698] Add success.
I0319 15:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:16:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:16:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 15:16:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:16:14.456587  543705 disk_worker.go:494] system disk:vda1
I0319 15:16:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:16:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:16:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:16:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:16:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:16:16.472451  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:16:19.701145  543705 disk_info.go:125] begin check local disk info of client
I0319 15:16:19.703697  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:16:19.703704  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ac380 0xc0004ac3c0]
E0319 15:16:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:16:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:16:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:16:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:16:33.409795  543705 memory.go:184] no items to output this cycle
I0319 15:16:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 15:16:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:16:43.409789  543705 memory.go:191] Add success.
I0319 15:16:43.409817  543705 cpu.go:282] Add success.
I0319 15:16:43.419881  543705 net.go:648] Add success.
I0319 15:16:43.422825  543705 net.go:770] primary dev: ETH0
I0319 15:16:43.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:16:43.422851  543705 net.go:698] Add success.
I0319 15:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:16:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:16:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:16:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:16:53.409775  543705 memory.go:184] no items to output this cycle
I0319 15:16:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 15:17:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:17:03.409796  543705 memory.go:184] no items to output this cycle
I0319 15:17:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 15:17:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:17:13.409800  543705 memory.go:191] Add success.
I0319 15:17:13.409805  543705 cpu.go:282] Add success.
W0319 15:17:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:17:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:17:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:17:13.420164  543705 net.go:648] Add success.
I0319 15:17:13.423181  543705 net.go:770] primary dev: ETH0
I0319 15:17:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:17:13.423210  543705 net.go:698] Add success.
I0319 15:17:13.452788  543705 event_worker.go:152] Polling the log file for events...
W0319 15:17:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:17:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 15:17:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:17:14.457012  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:17:14.457022  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:17:14.457029  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:17:14.457049  543705 disk_worker.go:494] system disk:vda1
I0319 15:17:14.457092  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:17:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:17:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:17:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:17:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:17:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:17:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:17:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:17:19.705164  543705 disk_info.go:125] begin check local disk info of client
I0319 15:17:19.707539  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:17:19.707545  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ca800 0xc0004ca840]
E0319 15:17:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:17:23.409769  543705 memory.go:184] no items to output this cycle
I0319 15:17:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 15:17:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:17:33.409799  543705 memory.go:184] no items to output this cycle
I0319 15:17:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 15:17:43.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:17:43.409848  543705 memory.go:191] Add success.
I0319 15:17:43.409932  543705 cpu.go:282] Add success.
I0319 15:17:43.420238  543705 net.go:648] Add success.
I0319 15:17:43.423325  543705 net.go:770] primary dev: ETH0
I0319 15:17:43.423339  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:17:43.423353  543705 net.go:698] Add success.
I0319 15:17:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:17:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:17:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:17:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:17:53.409769  543705 memory.go:184] no items to output this cycle
I0319 15:17:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:18:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:18:03.409767  543705 memory.go:184] no items to output this cycle
I0319 15:18:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:18:13.409804  543705 memory.go:191] Add success.
I0319 15:18:13.409806  543705 cpu.go:282] Add success.
W0319 15:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:18:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:18:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:18:13.420276  543705 net.go:648] Add success.
I0319 15:18:13.422953  543705 net.go:770] primary dev: ETH0
I0319 15:18:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:18:13.422981  543705 net.go:698] Add success.
I0319 15:18:13.470138  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"348f6d04-d646-4476-8b3d-d26557d6f826","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:18:13.470172  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:18:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:18:14.455335  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:18:14.455346  543705 disk_worker.go:708] disk space is not compliant
W0319 15:18:14.455349  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:18:14.456777  543705 disk_worker.go:494] system disk:vda1
I0319 15:18:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:18:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:18:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:18:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:18:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:18:19.707626  543705 disk_info.go:125] begin check local disk info of client
I0319 15:18:19.710085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:18:19.710091  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a4c0 0xc00029a500]
E0319 15:18:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:18:23.409765  543705 memory.go:184] no items to output this cycle
I0319 15:18:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 15:18:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:18:33.409797  543705 memory.go:184] no items to output this cycle
I0319 15:18:33.409807  543705 cpu.go:275] no items to output this cycle
I0319 15:18:37.749440  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:18:37.749448  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:18:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:18:43.410603  543705 memory.go:191] Add success.
I0319 15:18:43.409839  543705 cpu.go:282] Add success.
I0319 15:18:43.420384  543705 net.go:648] Add success.
I0319 15:18:43.423246  543705 net.go:770] primary dev: ETH0
I0319 15:18:43.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:18:43.423276  543705 net.go:698] Add success.
I0319 15:18:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:18:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:18:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:18:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:18:53.409796  543705 memory.go:184] no items to output this cycle
I0319 15:18:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 15:19:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:19:03.409781  543705 memory.go:184] no items to output this cycle
I0319 15:19:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 15:19:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:19:13.409805  543705 memory.go:191] Add success.
I0319 15:19:13.409809  543705 cpu.go:282] Add success.
W0319 15:19:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:19:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:19:13.420189  543705 net.go:648] Add success.
I0319 15:19:13.423219  543705 net.go:770] primary dev: ETH0
I0319 15:19:13.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:19:13.423244  543705 net.go:698] Add success.
I0319 15:19:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:19:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:19:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 15:19:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:19:14.457232  543705 disk_worker.go:494] system disk:vda1
I0319 15:19:14.457260  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:19:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:19:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:19:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:19:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:19:19.711193  543705 disk_info.go:125] begin check local disk info of client
I0319 15:19:19.713617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:19:19.713623  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2180 0xc0004a21c0]
E0319 15:19:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:19:23.409769  543705 memory.go:184] no items to output this cycle
I0319 15:19:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 15:19:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:19:33.409797  543705 memory.go:184] no items to output this cycle
I0319 15:19:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 15:19:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:19:43.409780  543705 memory.go:191] Add success.
I0319 15:19:43.409802  543705 cpu.go:282] Add success.
I0319 15:19:43.419978  543705 net.go:648] Add success.
I0319 15:19:43.422934  543705 net.go:770] primary dev: ETH0
I0319 15:19:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:19:43.422960  543705 net.go:698] Add success.
I0319 15:19:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:19:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:19:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:19:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:19:53.409798  543705 memory.go:184] no items to output this cycle
I0319 15:19:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 15:20:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:20:03.409795  543705 memory.go:184] no items to output this cycle
I0319 15:20:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 15:20:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:20:13.409796  543705 memory.go:191] Add success.
I0319 15:20:13.409814  543705 cpu.go:282] Add success.
W0319 15:20:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:20:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:20:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:20:13.420109  543705 net.go:648] Add success.
I0319 15:20:13.423317  543705 net.go:770] primary dev: ETH0
I0319 15:20:13.423330  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:20:13.423341  543705 net.go:698] Add success.
I0319 15:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:20:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:20:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 15:20:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:20:14.456567  543705 disk_worker.go:494] system disk:vda1
I0319 15:20:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:20:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:20:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:20:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:20:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:20:19.713684  543705 disk_info.go:125] begin check local disk info of client
I0319 15:20:19.716162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:20:19.716168  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e0c0 0xc00037e100]
E0319 15:20:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:20:23.409768  543705 memory.go:184] no items to output this cycle
I0319 15:20:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 15:20:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:20:33.409766  543705 memory.go:184] no items to output this cycle
I0319 15:20:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 15:20:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:20:43.409828  543705 memory.go:191] Add success.
I0319 15:20:43.409831  543705 cpu.go:282] Add success.
I0319 15:20:43.420039  543705 net.go:648] Add success.
I0319 15:20:43.423107  543705 net.go:770] primary dev: ETH0
I0319 15:20:43.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:20:43.423136  543705 net.go:698] Add success.
I0319 15:20:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:20:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:20:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:20:53.409778  543705 memory.go:184] no items to output this cycle
I0319 15:20:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 15:21:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:21:03.409802  543705 memory.go:184] no items to output this cycle
I0319 15:21:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 15:21:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:21:13.409822  543705 memory.go:191] Add success.
I0319 15:21:13.409833  543705 cpu.go:282] Add success.
W0319 15:21:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:21:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:21:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:21:13.420123  543705 net.go:648] Add success.
I0319 15:21:13.422866  543705 net.go:770] primary dev: ETH0
I0319 15:21:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:21:13.422891  543705 net.go:698] Add success.
I0319 15:21:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:21:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:21:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0319 15:21:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:21:14.456588  543705 disk_worker.go:494] system disk:vda1
I0319 15:21:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:21:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:21:16.188164  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5b7f72eb-5269-4ac3-bce3-72715cfbcfcb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:21:16.188202  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:21:16.457639  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:21:16.457707  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:21:16.457735  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:21:16.473046  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:21:19.717282  543705 disk_info.go:125] begin check local disk info of client
I0319 15:21:19.719712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:21:19.719719  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a400 0xc00039a440]
E0319 15:21:23.410501  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:21:23.410520  543705 memory.go:184] no items to output this cycle
I0319 15:21:23.410530  543705 cpu.go:275] no items to output this cycle
E0319 15:21:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:21:33.409777  543705 memory.go:184] no items to output this cycle
I0319 15:21:33.409782  543705 cpu.go:275] no items to output this cycle
I0319 15:21:37.749731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:21:37.749738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:21:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:21:43.410639  543705 memory.go:191] Add success.
I0319 15:21:43.409828  543705 cpu.go:282] Add success.
I0319 15:21:43.420379  543705 net.go:648] Add success.
I0319 15:21:43.423048  543705 net.go:770] primary dev: ETH0
I0319 15:21:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:21:43.423074  543705 net.go:698] Add success.
I0319 15:21:46.458496  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:21:46.458569  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:21:46.458603  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:21:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:21:53.409773  543705 memory.go:184] no items to output this cycle
I0319 15:21:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 15:22:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:22:03.409772  543705 memory.go:184] no items to output this cycle
I0319 15:22:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 15:22:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:22:13.409794  543705 memory.go:191] Add success.
I0319 15:22:13.409811  543705 cpu.go:282] Add success.
W0319 15:22:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:22:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:22:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:22:13.420282  543705 net.go:648] Add success.
I0319 15:22:13.422983  543705 net.go:770] primary dev: ETH0
I0319 15:22:13.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:22:13.423009  543705 net.go:698] Add success.
W0319 15:22:14.455232  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:22:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0319 15:22:14.455250  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:22:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:22:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:22:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:22:14.456846  543705 disk_worker.go:494] system disk:vda1
I0319 15:22:14.456891  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:22:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:22:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:22:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:22:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:22:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:22:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:22:16.472298  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:22:19.721235  543705 disk_info.go:125] begin check local disk info of client
I0319 15:22:19.723686  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:22:19.723691  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bed80 0xc0003bedc0]
I0319 15:22:23.409942  543705 cpu.go:275] no items to output this cycle
E0319 15:22:23.409949  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:22:23.410025  543705 memory.go:184] no items to output this cycle
E0319 15:22:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:22:33.409792  543705 memory.go:184] no items to output this cycle
I0319 15:22:33.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:22:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:22:43.409801  543705 memory.go:191] Add success.
I0319 15:22:43.409808  543705 cpu.go:282] Add success.
I0319 15:22:43.420013  543705 net.go:648] Add success.
I0319 15:22:43.422648  543705 net.go:770] primary dev: ETH0
I0319 15:22:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:22:43.422688  543705 net.go:698] Add success.
I0319 15:22:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:22:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:22:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:22:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:22:53.409788  543705 memory.go:184] no items to output this cycle
I0319 15:22:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 15:23:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:23:03.409779  543705 memory.go:184] no items to output this cycle
I0319 15:23:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:23:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:23:13.409803  543705 cpu.go:282] Add success.
I0319 15:23:13.409807  543705 memory.go:191] Add success.
W0319 15:23:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:23:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:23:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:23:13.420164  543705 net.go:648] Add success.
I0319 15:23:13.423171  543705 net.go:770] primary dev: ETH0
I0319 15:23:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:23:13.423197  543705 net.go:698] Add success.
I0319 15:23:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:23:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:23:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 15:23:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:23:14.456593  543705 disk_worker.go:494] system disk:vda1
I0319 15:23:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:23:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:23:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:23:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:23:19.725262  543705 disk_info.go:125] begin check local disk info of client
I0319 15:23:19.727736  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:23:19.727742  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348d80 0xc000348dc0]
E0319 15:23:23.410319  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:23:23.410328  543705 cpu.go:275] no items to output this cycle
I0319 15:23:23.410334  543705 memory.go:184] no items to output this cycle
E0319 15:23:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:23:33.409806  543705 memory.go:184] no items to output this cycle
I0319 15:23:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 15:23:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:23:43.409804  543705 memory.go:191] Add success.
I0319 15:23:43.409806  543705 cpu.go:282] Add success.
I0319 15:23:43.419949  543705 net.go:648] Add success.
I0319 15:23:43.422542  543705 net.go:770] primary dev: ETH0
I0319 15:23:43.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:23:43.422571  543705 net.go:698] Add success.
I0319 15:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:23:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:23:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:23:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:23:53.409804  543705 memory.go:184] no items to output this cycle
I0319 15:23:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 15:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:24:03.409774  543705 memory.go:184] no items to output this cycle
I0319 15:24:03.409779  543705 cpu.go:275] no items to output this cycle
E0319 15:24:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:24:13.409801  543705 memory.go:191] Add success.
I0319 15:24:13.409802  543705 cpu.go:282] Add success.
W0319 15:24:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:24:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:24:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:24:13.420347  543705 net.go:648] Add success.
I0319 15:24:13.423260  543705 net.go:770] primary dev: ETH0
I0319 15:24:13.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:24:13.423290  543705 net.go:698] Add success.
I0319 15:24:13.463334  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1089aa0b-4eab-44ba-8870-2de1300fad75","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:24:13.463369  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:24:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:24:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:24:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0319 15:24:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:24:14.456803  543705 disk_worker.go:494] system disk:vda1
I0319 15:24:14.456850  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:24:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:24:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:24:16.472109  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:24:19.729290  543705 disk_info.go:125] begin check local disk info of client
I0319 15:24:19.731779  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:24:19.731785  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326000 0xc000326040]
E0319 15:24:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:24:23.409781  543705 memory.go:184] no items to output this cycle
I0319 15:24:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 15:24:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:24:33.409774  543705 memory.go:184] no items to output this cycle
I0319 15:24:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 15:24:37.749879  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:24:37.749885  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:24:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:24:43.410796  543705 memory.go:191] Add success.
I0319 15:24:43.409797  543705 cpu.go:282] Add success.
I0319 15:24:43.420533  543705 net.go:648] Add success.
I0319 15:24:43.423550  543705 net.go:770] primary dev: ETH0
I0319 15:24:43.423565  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:24:43.423580  543705 net.go:698] Add success.
I0319 15:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:24:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:24:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:24:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:24:53.409779  543705 memory.go:184] no items to output this cycle
I0319 15:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 15:25:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:25:03.409773  543705 memory.go:184] no items to output this cycle
I0319 15:25:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 15:25:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:25:13.409791  543705 memory.go:191] Add success.
I0319 15:25:13.409802  543705 cpu.go:282] Add success.
W0319 15:25:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:25:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:25:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:25:13.420059  543705 net.go:648] Add success.
I0319 15:25:13.422813  543705 net.go:770] primary dev: ETH0
I0319 15:25:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:25:13.422838  543705 net.go:698] Add success.
I0319 15:25:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:25:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:25:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 15:25:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:25:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 15:25:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:25:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:25:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:25:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:25:16.472090  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:25:19.733300  543705 disk_info.go:125] begin check local disk info of client
I0319 15:25:19.735737  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:25:19.735744  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314740 0xc000314780]
E0319 15:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:25:23.409775  543705 memory.go:184] no items to output this cycle
I0319 15:25:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 15:25:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:25:33.409802  543705 memory.go:184] no items to output this cycle
I0319 15:25:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 15:25:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:25:43.409780  543705 memory.go:191] Add success.
I0319 15:25:43.409801  543705 cpu.go:282] Add success.
I0319 15:25:43.420015  543705 net.go:648] Add success.
I0319 15:25:43.423108  543705 net.go:770] primary dev: ETH0
I0319 15:25:43.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:25:43.423138  543705 net.go:698] Add success.
I0319 15:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:25:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:25:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:25:53.409784  543705 memory.go:184] no items to output this cycle
I0319 15:25:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 15:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:26:03.409772  543705 memory.go:184] no items to output this cycle
I0319 15:26:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:26:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:26:13.409822  543705 memory.go:191] Add success.
I0319 15:26:13.409827  543705 cpu.go:282] Add success.
W0319 15:26:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:26:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:26:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:26:13.420529  543705 net.go:648] Add success.
I0319 15:26:13.423283  543705 net.go:770] primary dev: ETH0
I0319 15:26:13.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:26:13.423307  543705 net.go:698] Add success.
I0319 15:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:26:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:26:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 15:26:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:26:14.456581  543705 disk_worker.go:494] system disk:vda1
I0319 15:26:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:26:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:26:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:26:16.472090  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:26:19.735828  543705 disk_info.go:125] begin check local disk info of client
I0319 15:26:19.738289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:26:19.738296  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2080 0xc0003b20c0]
E0319 15:26:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:26:23.409776  543705 memory.go:184] no items to output this cycle
I0319 15:26:23.409780  543705 cpu.go:275] no items to output this cycle
E0319 15:26:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:26:33.409800  543705 memory.go:184] no items to output this cycle
I0319 15:26:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 15:26:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:26:43.409788  543705 memory.go:191] Add success.
I0319 15:26:43.409811  543705 cpu.go:282] Add success.
I0319 15:26:43.419966  543705 net.go:648] Add success.
I0319 15:26:43.422626  543705 net.go:770] primary dev: ETH0
I0319 15:26:43.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:26:43.422653  543705 net.go:698] Add success.
I0319 15:26:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:26:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:26:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:26:53.410208  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:26:53.410217  543705 cpu.go:275] no items to output this cycle
I0319 15:26:53.410222  543705 memory.go:184] no items to output this cycle
E0319 15:27:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:27:03.409797  543705 memory.go:184] no items to output this cycle
I0319 15:27:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 15:27:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:27:13.409783  543705 memory.go:191] Add success.
I0319 15:27:13.409805  543705 cpu.go:282] Add success.
W0319 15:27:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:27:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:27:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:27:13.420129  543705 net.go:648] Add success.
I0319 15:27:13.429061  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 15:27:13.429148  543705 net.go:770] primary dev: ETH0
I0319 15:27:13.429160  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:27:13.429171  543705 net.go:698] Add success.
I0319 15:27:13.453662  543705 event_worker.go:152] Polling the log file for events...
I0319 15:27:13.470423  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31e06cdf-7a7b-43a8-95c1-0734914a3ffb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:27:13.470459  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 15:27:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:27:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 15:27:14.455205  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:27:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:27:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:27:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:27:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 15:27:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:27:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:27:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:27:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:27:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:27:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:27:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:27:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:27:19.739324  543705 disk_info.go:125] begin check local disk info of client
I0319 15:27:19.741851  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:27:19.741858  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c0c0 0xc00039c100]
E0319 15:27:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:27:23.409759  543705 memory.go:184] no items to output this cycle
I0319 15:27:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 15:27:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:27:33.409768  543705 memory.go:184] no items to output this cycle
I0319 15:27:33.409792  543705 cpu.go:275] no items to output this cycle
I0319 15:27:37.750027  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:27:37.750034  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:27:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:27:43.410674  543705 memory.go:191] Add success.
I0319 15:27:43.409820  543705 cpu.go:282] Add success.
I0319 15:27:43.420431  543705 net.go:648] Add success.
I0319 15:27:43.423422  543705 net.go:770] primary dev: ETH0
I0319 15:27:43.423435  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:27:43.423448  543705 net.go:698] Add success.
I0319 15:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:27:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:27:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:27:53.410409  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:27:53.410425  543705 memory.go:184] no items to output this cycle
I0319 15:27:53.410440  543705 cpu.go:275] no items to output this cycle
E0319 15:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:28:03.409797  543705 memory.go:184] no items to output this cycle
I0319 15:28:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 15:28:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:28:13.409793  543705 memory.go:191] Add success.
W0319 15:28:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:28:13.409826  543705 cpu.go:282] Add success.
W0319 15:28:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:28:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:28:13.420159  543705 net.go:648] Add success.
I0319 15:28:13.422585  543705 net.go:770] primary dev: ETH0
I0319 15:28:13.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:28:13.422611  543705 net.go:698] Add success.
I0319 15:28:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:28:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:28:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 15:28:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:28:14.456731  543705 disk_worker.go:494] system disk:vda1
I0319 15:28:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:28:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:28:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:28:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:28:19.743347  543705 disk_info.go:125] begin check local disk info of client
I0319 15:28:19.745777  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:28:19.745784  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
E0319 15:28:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:28:23.409792  543705 memory.go:184] no items to output this cycle
I0319 15:28:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:28:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:28:33.409806  543705 memory.go:184] no items to output this cycle
I0319 15:28:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 15:28:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:28:43.409795  543705 memory.go:191] Add success.
I0319 15:28:43.409810  543705 cpu.go:282] Add success.
I0319 15:28:43.419885  543705 net.go:648] Add success.
I0319 15:28:43.422651  543705 net.go:770] primary dev: ETH0
I0319 15:28:43.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:28:43.422677  543705 net.go:698] Add success.
I0319 15:28:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:28:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:28:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:28:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:28:53.409792  543705 memory.go:184] no items to output this cycle
I0319 15:28:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:29:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:29:03.409783  543705 memory.go:184] no items to output this cycle
I0319 15:29:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 15:29:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:29:13.409799  543705 memory.go:191] Add success.
I0319 15:29:13.409801  543705 cpu.go:282] Add success.
W0319 15:29:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:29:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:29:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:29:13.420171  543705 net.go:648] Add success.
I0319 15:29:13.423133  543705 net.go:770] primary dev: ETH0
I0319 15:29:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:29:13.423163  543705 net.go:698] Add success.
I0319 15:29:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:29:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:29:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0319 15:29:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:29:14.456498  543705 disk_worker.go:494] system disk:vda1
I0319 15:29:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:29:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:29:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:29:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:29:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:29:16.472466  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:29:19.747373  543705 disk_info.go:125] begin check local disk info of client
I0319 15:29:19.749769  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:29:19.749775  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba100 0xc0003ba140]
E0319 15:29:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:29:23.409788  543705 memory.go:184] no items to output this cycle
I0319 15:29:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 15:29:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:29:33.409779  543705 cpu.go:275] no items to output this cycle
I0319 15:29:33.409791  543705 memory.go:184] no items to output this cycle
E0319 15:29:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:29:43.409812  543705 memory.go:191] Add success.
I0319 15:29:43.409820  543705 cpu.go:282] Add success.
I0319 15:29:43.419998  543705 net.go:648] Add success.
I0319 15:29:43.423111  543705 net.go:770] primary dev: ETH0
I0319 15:29:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:29:43.423137  543705 net.go:698] Add success.
I0319 15:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:29:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:29:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:29:53.409781  543705 memory.go:184] no items to output this cycle
I0319 15:29:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 15:30:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:30:03.409794  543705 memory.go:184] no items to output this cycle
I0319 15:30:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 15:30:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:30:13.409801  543705 cpu.go:282] Add success.
I0319 15:30:13.409811  543705 memory.go:191] Add success.
W0319 15:30:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:30:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:30:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:30:13.420126  543705 net.go:648] Add success.
I0319 15:30:13.422625  543705 net.go:770] primary dev: ETH0
I0319 15:30:13.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:30:13.422652  543705 net.go:698] Add success.
I0319 15:30:13.483204  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c372f1a8-0f07-4d4d-855a-03dc1e6c84e0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:30:13.483239  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:30:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:30:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:30:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 15:30:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:30:14.456865  543705 disk_worker.go:494] system disk:vda1
I0319 15:30:14.456894  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:30:16.457577  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:30:16.457657  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:30:16.457685  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:30:16.473044  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:30:19.751391  543705 disk_info.go:125] begin check local disk info of client
I0319 15:30:19.753816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:30:19.753822  543705 disk_info.go:196] parse disk info done, disk is : [0xc000356080 0xc0003560c0]
E0319 15:30:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:30:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:30:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 15:30:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:30:33.409780  543705 cpu.go:275] no items to output this cycle
I0319 15:30:33.409784  543705 memory.go:184] no items to output this cycle
I0319 15:30:37.751464  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:30:37.751471  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:30:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:30:43.410788  543705 memory.go:191] Add success.
I0319 15:30:43.409808  543705 cpu.go:282] Add success.
I0319 15:30:43.420595  543705 net.go:648] Add success.
I0319 15:30:43.423931  543705 net.go:770] primary dev: ETH0
I0319 15:30:43.423946  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:30:43.423960  543705 net.go:698] Add success.
I0319 15:30:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:30:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:30:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:30:53.409772  543705 memory.go:184] no items to output this cycle
I0319 15:30:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:31:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:31:03.409788  543705 memory.go:184] no items to output this cycle
I0319 15:31:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 15:31:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:31:13.409797  543705 memory.go:191] Add success.
I0319 15:31:13.409806  543705 cpu.go:282] Add success.
W0319 15:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:31:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:31:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:31:13.420294  543705 net.go:648] Add success.
I0319 15:31:13.423029  543705 net.go:770] primary dev: ETH0
I0319 15:31:13.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:31:13.423058  543705 net.go:698] Add success.
I0319 15:31:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:31:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:31:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 15:31:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:31:14.456597  543705 disk_worker.go:494] system disk:vda1
I0319 15:31:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:31:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:31:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:31:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:31:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:31:19.753907  543705 disk_info.go:125] begin check local disk info of client
I0319 15:31:19.756309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:31:19.756315  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256340 0xc000256380]
E0319 15:31:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:31:23.409787  543705 memory.go:184] no items to output this cycle
I0319 15:31:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 15:31:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:31:33.409782  543705 cpu.go:275] no items to output this cycle
I0319 15:31:33.409794  543705 memory.go:184] no items to output this cycle
E0319 15:31:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:31:43.409804  543705 memory.go:191] Add success.
I0319 15:31:43.409814  543705 cpu.go:282] Add success.
I0319 15:31:43.419879  543705 net.go:648] Add success.
I0319 15:31:43.423148  543705 net.go:770] primary dev: ETH0
I0319 15:31:43.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:31:43.423174  543705 net.go:698] Add success.
I0319 15:31:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:31:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:31:53.410351  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:31:53.410368  543705 memory.go:184] no items to output this cycle
I0319 15:31:53.410389  543705 cpu.go:275] no items to output this cycle
E0319 15:32:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:32:03.409784  543705 memory.go:184] no items to output this cycle
I0319 15:32:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 15:32:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:32:13.409798  543705 memory.go:191] Add success.
I0319 15:32:13.409799  543705 cpu.go:282] Add success.
W0319 15:32:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:32:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:32:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:32:13.420072  543705 net.go:648] Add success.
I0319 15:32:13.422830  543705 net.go:770] primary dev: ETH0
I0319 15:32:13.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:32:13.422860  543705 net.go:698] Add success.
W0319 15:32:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:32:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 15:32:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:32:14.456174  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:32:14.456184  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:32:14.456190  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:32:14.456487  543705 disk_worker.go:494] system disk:vda1
I0319 15:32:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:32:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:32:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:32:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:32:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:32:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:32:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:32:16.472442  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:32:19.757420  543705 disk_info.go:125] begin check local disk info of client
I0319 15:32:19.759831  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:32:19.759837  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd500 0xc0002bd540]
E0319 15:32:23.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:32:23.409861  543705 memory.go:184] no items to output this cycle
I0319 15:32:23.409944  543705 cpu.go:275] no items to output this cycle
E0319 15:32:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:32:33.409775  543705 memory.go:184] no items to output this cycle
I0319 15:32:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:32:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:32:43.409795  543705 memory.go:191] Add success.
I0319 15:32:43.409832  543705 cpu.go:282] Add success.
I0319 15:32:43.419964  543705 net.go:648] Add success.
I0319 15:32:43.422798  543705 net.go:770] primary dev: ETH0
I0319 15:32:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:32:43.422823  543705 net.go:698] Add success.
I0319 15:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:32:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:32:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:32:53.409788  543705 cpu.go:275] no items to output this cycle
I0319 15:32:53.409789  543705 memory.go:184] no items to output this cycle
E0319 15:33:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:33:03.409818  543705 memory.go:184] no items to output this cycle
I0319 15:33:03.409828  543705 cpu.go:275] no items to output this cycle
E0319 15:33:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:33:13.409803  543705 memory.go:191] Add success.
I0319 15:33:13.409821  543705 cpu.go:282] Add success.
W0319 15:33:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:33:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:33:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:33:13.420197  543705 net.go:648] Add success.
I0319 15:33:13.423154  543705 net.go:770] primary dev: ETH0
I0319 15:33:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:33:13.423184  543705 net.go:698] Add success.
I0319 15:33:13.469257  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7790ec70-d2c8-4454-a627-28a6d1988430","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:33:13.469291  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:33:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:33:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:33:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 15:33:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:33:14.456596  543705 disk_worker.go:494] system disk:vda1
I0319 15:33:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:33:15.456022  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:33:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:33:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:33:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:33:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:33:19.760443  543705 disk_info.go:125] begin check local disk info of client
I0319 15:33:19.762887  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:33:19.762893  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc2c0 0xc0002bc300]
E0319 15:33:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:33:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:33:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 15:33:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:33:33.409809  543705 memory.go:184] no items to output this cycle
I0319 15:33:33.409820  543705 cpu.go:275] no items to output this cycle
I0319 15:33:37.752465  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:33:37.752471  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:33:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:33:43.410829  543705 memory.go:191] Add success.
I0319 15:33:43.409811  543705 cpu.go:282] Add success.
I0319 15:33:43.420611  543705 net.go:648] Add success.
I0319 15:33:43.423338  543705 net.go:770] primary dev: ETH0
I0319 15:33:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:33:43.423368  543705 net.go:698] Add success.
I0319 15:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:33:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:33:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:33:53.409791  543705 memory.go:184] no items to output this cycle
I0319 15:33:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:34:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:34:03.409815  543705 memory.go:184] no items to output this cycle
I0319 15:34:03.409828  543705 cpu.go:275] no items to output this cycle
E0319 15:34:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:34:13.409810  543705 memory.go:191] Add success.
I0319 15:34:13.409810  543705 cpu.go:282] Add success.
W0319 15:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:34:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:34:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:34:13.420131  543705 net.go:648] Add success.
I0319 15:34:13.423224  543705 net.go:770] primary dev: ETH0
I0319 15:34:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:34:13.423253  543705 net.go:698] Add success.
I0319 15:34:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:34:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:34:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 15:34:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:34:14.456590  543705 disk_worker.go:494] system disk:vda1
I0319 15:34:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:34:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:34:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:34:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:34:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:34:19.764456  543705 disk_info.go:125] begin check local disk info of client
I0319 15:34:19.766910  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:34:19.766916  543705 disk_info.go:196] parse disk info done, disk is : [0xc000494340 0xc000494380]
E0319 15:34:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:34:23.409764  543705 memory.go:184] no items to output this cycle
I0319 15:34:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 15:34:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:34:33.409789  543705 cpu.go:275] no items to output this cycle
I0319 15:34:33.409791  543705 memory.go:184] no items to output this cycle
E0319 15:34:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:34:43.409820  543705 memory.go:191] Add success.
I0319 15:34:43.409837  543705 cpu.go:282] Add success.
I0319 15:34:43.419980  543705 net.go:648] Add success.
I0319 15:34:43.422774  543705 net.go:770] primary dev: ETH0
I0319 15:34:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:34:43.422804  543705 net.go:698] Add success.
I0319 15:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:34:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:34:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:34:53.409779  543705 memory.go:184] no items to output this cycle
I0319 15:34:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 15:35:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:35:03.409794  543705 cpu.go:275] no items to output this cycle
I0319 15:35:03.409800  543705 memory.go:184] no items to output this cycle
E0319 15:35:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:35:13.409792  543705 memory.go:191] Add success.
I0319 15:35:13.409805  543705 cpu.go:282] Add success.
W0319 15:35:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:35:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:35:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:35:13.420128  543705 net.go:648] Add success.
I0319 15:35:13.422687  543705 net.go:770] primary dev: ETH0
I0319 15:35:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:35:13.422721  543705 net.go:698] Add success.
I0319 15:35:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:35:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:35:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 15:35:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:35:14.456583  543705 disk_worker.go:494] system disk:vda1
I0319 15:35:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:35:15.456020  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:35:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:35:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:35:19.768467  543705 disk_info.go:125] begin check local disk info of client
I0319 15:35:19.770868  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:35:19.770874  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266080 0xc0002660c0]
E0319 15:35:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:35:23.409798  543705 memory.go:184] no items to output this cycle
I0319 15:35:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 15:35:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:35:33.409767  543705 memory.go:184] no items to output this cycle
I0319 15:35:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 15:35:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:35:43.409798  543705 memory.go:191] Add success.
I0319 15:35:43.409800  543705 cpu.go:282] Add success.
I0319 15:35:43.419906  543705 net.go:648] Add success.
I0319 15:35:43.422551  543705 net.go:770] primary dev: ETH0
I0319 15:35:43.422566  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:35:43.422581  543705 net.go:698] Add success.
I0319 15:35:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:35:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:35:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:35:53.410271  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:35:53.410289  543705 memory.go:184] no items to output this cycle
I0319 15:35:53.410293  543705 cpu.go:275] no items to output this cycle
E0319 15:36:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:36:03.409791  543705 memory.go:184] no items to output this cycle
I0319 15:36:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 15:36:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:36:13.409818  543705 memory.go:191] Add success.
I0319 15:36:13.409824  543705 cpu.go:282] Add success.
W0319 15:36:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:36:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:36:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:36:13.420210  543705 net.go:648] Add success.
I0319 15:36:13.423057  543705 net.go:770] primary dev: ETH0
I0319 15:36:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:36:13.423085  543705 net.go:698] Add success.
I0319 15:36:13.468442  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b8d7efd-401f-48b1-bfe2-d4405781b8bb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:36:13.468477  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:36:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:36:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:36:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 15:36:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:36:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 15:36:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:36:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:36:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:36:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:36:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:36:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:36:19.772497  543705 disk_info.go:125] begin check local disk info of client
I0319 15:36:19.774947  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:36:19.774953  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348300 0xc000348340]
E0319 15:36:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:36:23.409794  543705 memory.go:184] no items to output this cycle
I0319 15:36:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:36:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:36:33.409779  543705 cpu.go:275] no items to output this cycle
I0319 15:36:33.409781  543705 memory.go:184] no items to output this cycle
I0319 15:36:37.752629  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:36:37.752635  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:36:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:36:43.410676  543705 memory.go:191] Add success.
I0319 15:36:43.409801  543705 cpu.go:282] Add success.
I0319 15:36:43.420455  543705 net.go:648] Add success.
I0319 15:36:43.423215  543705 net.go:770] primary dev: ETH0
I0319 15:36:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:36:43.423241  543705 net.go:698] Add success.
I0319 15:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:36:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:36:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:36:53.409769  543705 memory.go:184] no items to output this cycle
I0319 15:36:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 15:37:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:37:03.409799  543705 memory.go:184] no items to output this cycle
I0319 15:37:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 15:37:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:37:13.409787  543705 memory.go:191] Add success.
I0319 15:37:13.409811  543705 cpu.go:282] Add success.
W0319 15:37:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:37:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:37:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:37:13.420135  543705 net.go:648] Add success.
I0319 15:37:13.423150  543705 net.go:770] primary dev: ETH0
I0319 15:37:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:37:13.423174  543705 net.go:698] Add success.
I0319 15:37:13.453724  543705 event_worker.go:152] Polling the log file for events...
W0319 15:37:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:37:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 15:37:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:37:14.456863  543705 disk_worker.go:494] system disk:vda1
I0319 15:37:14.456902  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:37:14.457686  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:37:14.457713  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:37:14.457718  543705 custom_config.go:64] query custom config with name: gpu
E0319 15:37:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:37:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:37:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:37:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:37:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:37:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:37:16.472333  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:37:19.776566  543705 disk_info.go:125] begin check local disk info of client
I0319 15:37:19.778986  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:37:19.778992  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e3c0 0xc00035e400]
E0319 15:37:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:37:23.409776  543705 memory.go:184] no items to output this cycle
I0319 15:37:23.409777  543705 cpu.go:275] no items to output this cycle
E0319 15:37:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:37:33.409804  543705 memory.go:184] no items to output this cycle
I0319 15:37:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 15:37:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:37:43.409781  543705 memory.go:191] Add success.
I0319 15:37:43.409788  543705 cpu.go:282] Add success.
I0319 15:37:43.419871  543705 net.go:648] Add success.
I0319 15:37:43.422837  543705 net.go:770] primary dev: ETH0
I0319 15:37:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:37:43.422862  543705 net.go:698] Add success.
I0319 15:37:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:37:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:37:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:37:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:37:53.409765  543705 memory.go:184] no items to output this cycle
I0319 15:37:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 15:38:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:38:03.409779  543705 memory.go:184] no items to output this cycle
I0319 15:38:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:38:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:38:13.409828  543705 memory.go:191] Add success.
I0319 15:38:13.409835  543705 cpu.go:282] Add success.
W0319 15:38:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:38:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:38:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:38:13.420165  543705 net.go:648] Add success.
I0319 15:38:13.422956  543705 net.go:770] primary dev: ETH0
I0319 15:38:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:38:13.422982  543705 net.go:698] Add success.
I0319 15:38:14.455021  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:38:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:38:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 15:38:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:38:14.456564  543705 disk_worker.go:494] system disk:vda1
I0319 15:38:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:38:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:38:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:38:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:38:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:38:19.780539  543705 disk_info.go:125] begin check local disk info of client
I0319 15:38:19.783043  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:38:19.783050  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051e900 0xc00051e940]
E0319 15:38:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:38:23.409760  543705 memory.go:184] no items to output this cycle
I0319 15:38:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 15:38:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:38:33.409808  543705 memory.go:184] no items to output this cycle
I0319 15:38:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 15:38:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:38:43.409800  543705 memory.go:191] Add success.
I0319 15:38:43.409825  543705 cpu.go:282] Add success.
I0319 15:38:43.419868  543705 net.go:648] Add success.
I0319 15:38:43.423138  543705 net.go:770] primary dev: ETH0
I0319 15:38:43.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:38:43.423164  543705 net.go:698] Add success.
I0319 15:38:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:38:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:38:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:38:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 15:38:53.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:38:53.409830  543705 memory.go:184] no items to output this cycle
E0319 15:39:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:39:03.409787  543705 memory.go:184] no items to output this cycle
I0319 15:39:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 15:39:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:39:13.409836  543705 memory.go:191] Add success.
I0319 15:39:13.409842  543705 cpu.go:282] Add success.
W0319 15:39:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:39:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:39:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:39:13.420182  543705 net.go:648] Add success.
I0319 15:39:13.422825  543705 net.go:770] primary dev: ETH0
I0319 15:39:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:39:13.422850  543705 net.go:698] Add success.
I0319 15:39:13.469494  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7cf8f641-2e05-44de-abaf-4a8e3f09be46","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:39:13.469528  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:39:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:39:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:39:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 15:39:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:39:14.456608  543705 disk_worker.go:494] system disk:vda1
I0319 15:39:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:39:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:39:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:39:16.472435  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:39:19.784555  543705 disk_info.go:125] begin check local disk info of client
I0319 15:39:19.786982  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:39:19.786988  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc440 0xc0002bc480]
E0319 15:39:23.410230  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:39:23.410246  543705 memory.go:184] no items to output this cycle
I0319 15:39:23.410274  543705 cpu.go:275] no items to output this cycle
E0319 15:39:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:39:33.409816  543705 memory.go:184] no items to output this cycle
I0319 15:39:33.409828  543705 cpu.go:275] no items to output this cycle
I0319 15:39:37.752789  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:39:37.752795  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:39:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:39:43.410632  543705 memory.go:191] Add success.
I0319 15:39:43.409831  543705 cpu.go:282] Add success.
I0319 15:39:43.420317  543705 net.go:648] Add success.
I0319 15:39:43.423088  543705 net.go:770] primary dev: ETH0
I0319 15:39:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:39:43.423113  543705 net.go:698] Add success.
I0319 15:39:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:39:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:39:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:39:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:39:53.409776  543705 memory.go:184] no items to output this cycle
I0319 15:39:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:40:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:40:03.409785  543705 memory.go:184] no items to output this cycle
I0319 15:40:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 15:40:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:40:13.409795  543705 memory.go:191] Add success.
I0319 15:40:13.409798  543705 cpu.go:282] Add success.
W0319 15:40:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:40:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:40:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:40:13.420236  543705 net.go:648] Add success.
I0319 15:40:13.422923  543705 net.go:770] primary dev: ETH0
I0319 15:40:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:40:13.422952  543705 net.go:698] Add success.
I0319 15:40:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:40:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:40:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0319 15:40:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:40:14.456522  543705 disk_worker.go:494] system disk:vda1
I0319 15:40:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:40:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:40:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:40:19.788572  543705 disk_info.go:125] begin check local disk info of client
I0319 15:40:19.791095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:40:19.791101  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc140 0xc0002bc180]
E0319 15:40:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:40:23.409759  543705 memory.go:184] no items to output this cycle
I0319 15:40:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 15:40:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:40:33.409798  543705 memory.go:184] no items to output this cycle
I0319 15:40:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 15:40:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:40:43.409829  543705 memory.go:191] Add success.
I0319 15:40:43.409830  543705 cpu.go:282] Add success.
I0319 15:40:43.419990  543705 net.go:648] Add success.
I0319 15:40:43.423069  543705 net.go:770] primary dev: ETH0
I0319 15:40:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:40:43.423099  543705 net.go:698] Add success.
I0319 15:40:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:40:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:40:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:40:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:40:53.409783  543705 cpu.go:275] no items to output this cycle
I0319 15:40:53.409787  543705 memory.go:184] no items to output this cycle
E0319 15:41:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:41:03.409824  543705 memory.go:184] no items to output this cycle
I0319 15:41:03.409838  543705 cpu.go:275] no items to output this cycle
E0319 15:41:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:41:13.409798  543705 memory.go:191] Add success.
I0319 15:41:13.409811  543705 cpu.go:282] Add success.
W0319 15:41:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:41:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:41:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:41:13.420343  543705 net.go:648] Add success.
I0319 15:41:13.423052  543705 net.go:770] primary dev: ETH0
I0319 15:41:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:41:13.423076  543705 net.go:698] Add success.
I0319 15:41:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:41:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:41:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 15:41:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:41:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 15:41:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:41:15.456012  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:41:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:41:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:41:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:41:19.792592  543705 disk_info.go:125] begin check local disk info of client
I0319 15:41:19.795046  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:41:19.795051  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2080 0xc0002a20c0]
E0319 15:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:41:23.409798  543705 memory.go:184] no items to output this cycle
I0319 15:41:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 15:41:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:41:33.409773  543705 memory.go:184] no items to output this cycle
I0319 15:41:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 15:41:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:41:43.409812  543705 memory.go:191] Add success.
I0319 15:41:43.409819  543705 cpu.go:282] Add success.
I0319 15:41:43.419976  543705 net.go:648] Add success.
I0319 15:41:43.423271  543705 net.go:770] primary dev: ETH0
I0319 15:41:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:41:43.423301  543705 net.go:698] Add success.
I0319 15:41:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:41:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:41:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:41:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:41:53.409811  543705 memory.go:184] no items to output this cycle
I0319 15:41:53.409819  543705 cpu.go:275] no items to output this cycle
E0319 15:42:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:42:03.409800  543705 memory.go:184] no items to output this cycle
I0319 15:42:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 15:42:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:42:13.409824  543705 memory.go:191] Add success.
I0319 15:42:13.409831  543705 cpu.go:282] Add success.
W0319 15:42:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:42:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:42:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:42:13.420180  543705 net.go:648] Add success.
I0319 15:42:13.422820  543705 net.go:770] primary dev: ETH0
I0319 15:42:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:42:13.422845  543705 net.go:698] Add success.
I0319 15:42:13.469153  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"83f3560a-bf62-4ab9-a60f-4e0748766053","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:42:13.469195  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 15:42:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:42:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 15:42:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:42:14.457032  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:42:14.457041  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:42:14.457047  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:42:14.457116  543705 disk_worker.go:494] system disk:vda1
I0319 15:42:14.457169  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:42:15.456477  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:42:15.456487  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:42:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:42:16.457998  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:42:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:42:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:42:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:42:19.796619  543705 disk_info.go:125] begin check local disk info of client
I0319 15:42:19.799084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:42:19.799091  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8000 0xc0002b8040]
E0319 15:42:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:42:23.409763  543705 memory.go:184] no items to output this cycle
I0319 15:42:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 15:42:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:42:33.409767  543705 memory.go:184] no items to output this cycle
I0319 15:42:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 15:42:37.753744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:42:37.753751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:42:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:42:43.410704  543705 memory.go:191] Add success.
I0319 15:42:43.409815  543705 cpu.go:282] Add success.
I0319 15:42:43.420428  543705 net.go:648] Add success.
I0319 15:42:43.423512  543705 net.go:770] primary dev: ETH0
I0319 15:42:43.423526  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:42:43.423541  543705 net.go:698] Add success.
I0319 15:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:42:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:42:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:42:53.409807  543705 memory.go:184] no items to output this cycle
I0319 15:42:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 15:43:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:43:03.409798  543705 memory.go:184] no items to output this cycle
I0319 15:43:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 15:43:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:43:13.409820  543705 memory.go:191] Add success.
I0319 15:43:13.409827  543705 cpu.go:282] Add success.
W0319 15:43:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:43:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:43:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:43:13.420149  543705 net.go:648] Add success.
I0319 15:43:13.422805  543705 net.go:770] primary dev: ETH0
I0319 15:43:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:43:13.422836  543705 net.go:698] Add success.
I0319 15:43:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:43:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:43:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 15:43:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:43:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 15:43:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:43:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:43:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:43:19.800464  543705 disk_info.go:125] begin check local disk info of client
I0319 15:43:19.802909  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:43:19.802915  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e1c0 0xc00039e200]
E0319 15:43:23.410004  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:43:23.410032  543705 memory.go:184] no items to output this cycle
I0319 15:43:23.410191  543705 cpu.go:275] no items to output this cycle
E0319 15:43:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:43:33.409773  543705 memory.go:184] no items to output this cycle
I0319 15:43:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 15:43:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:43:43.409792  543705 memory.go:191] Add success.
I0319 15:43:43.409793  543705 cpu.go:282] Add success.
I0319 15:43:43.419954  543705 net.go:648] Add success.
I0319 15:43:43.422680  543705 net.go:770] primary dev: ETH0
I0319 15:43:43.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:43:43.422709  543705 net.go:698] Add success.
I0319 15:43:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:43:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:43:53.409809  543705 memory.go:184] no items to output this cycle
I0319 15:43:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 15:44:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:44:03.409780  543705 memory.go:184] no items to output this cycle
I0319 15:44:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 15:44:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:44:13.409797  543705 memory.go:191] Add success.
I0319 15:44:13.409805  543705 cpu.go:282] Add success.
W0319 15:44:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:44:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:44:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:44:13.420052  543705 net.go:648] Add success.
I0319 15:44:13.422757  543705 net.go:770] primary dev: ETH0
I0319 15:44:13.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:44:13.422782  543705 net.go:698] Add success.
I0319 15:44:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:44:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:44:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0319 15:44:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:44:14.456519  543705 disk_worker.go:494] system disk:vda1
I0319 15:44:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:44:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:44:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:44:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:44:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:44:19.804650  543705 disk_info.go:125] begin check local disk info of client
I0319 15:44:19.807051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:44:19.807056  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005083c0 0xc000508400]
E0319 15:44:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:44:23.409764  543705 memory.go:184] no items to output this cycle
I0319 15:44:23.409891  543705 cpu.go:275] no items to output this cycle
E0319 15:44:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:44:33.409778  543705 memory.go:184] no items to output this cycle
I0319 15:44:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 15:44:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:44:43.409803  543705 memory.go:191] Add success.
I0319 15:44:43.409819  543705 cpu.go:282] Add success.
I0319 15:44:43.419967  543705 net.go:648] Add success.
I0319 15:44:43.422624  543705 net.go:770] primary dev: ETH0
I0319 15:44:43.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:44:43.422649  543705 net.go:698] Add success.
I0319 15:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:44:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:44:53.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:44:53.409828  543705 memory.go:184] no items to output this cycle
I0319 15:44:53.409842  543705 cpu.go:275] no items to output this cycle
E0319 15:45:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:45:03.409773  543705 memory.go:184] no items to output this cycle
I0319 15:45:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:45:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:45:13.409834  543705 memory.go:191] Add success.
I0319 15:45:13.409847  543705 cpu.go:282] Add success.
W0319 15:45:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:45:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:45:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:45:13.420121  543705 net.go:648] Add success.
I0319 15:45:13.422789  543705 net.go:770] primary dev: ETH0
I0319 15:45:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:45:13.422815  543705 net.go:698] Add success.
I0319 15:45:13.469262  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"32d56532-dad3-4796-8bb1-57dc859365f0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:45:13.469302  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:45:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:45:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 15:45:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:45:14.456561  543705 disk_worker.go:494] system disk:vda1
I0319 15:45:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:45:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:45:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:45:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:45:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:45:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:45:19.808671  543705 disk_info.go:125] begin check local disk info of client
I0319 15:45:19.811110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:45:19.811116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000248a40 0xc000248a80]
E0319 15:45:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:45:23.409773  543705 memory.go:184] no items to output this cycle
I0319 15:45:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 15:45:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:45:33.409782  543705 memory.go:184] no items to output this cycle
I0319 15:45:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 15:45:37.755473  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:45:37.755480  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:45:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:45:43.410760  543705 memory.go:191] Add success.
I0319 15:45:43.409812  543705 cpu.go:282] Add success.
I0319 15:45:43.420465  543705 net.go:648] Add success.
I0319 15:45:43.423121  543705 net.go:770] primary dev: ETH0
I0319 15:45:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:45:43.423146  543705 net.go:698] Add success.
I0319 15:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:45:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:45:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:45:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:45:53.409802  543705 memory.go:184] no items to output this cycle
I0319 15:45:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 15:46:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:46:03.409773  543705 memory.go:184] no items to output this cycle
I0319 15:46:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 15:46:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:46:13.409783  543705 memory.go:191] Add success.
W0319 15:46:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:46:13.409815  543705 cpu.go:282] Add success.
W0319 15:46:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:46:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:46:13.420131  543705 net.go:648] Add success.
I0319 15:46:13.423118  543705 net.go:770] primary dev: ETH0
I0319 15:46:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:46:13.423143  543705 net.go:698] Add success.
I0319 15:46:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:46:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:46:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 15:46:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:46:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 15:46:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:46:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:46:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:46:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:46:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:46:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:46:19.812699  543705 disk_info.go:125] begin check local disk info of client
I0319 15:46:19.815163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:46:19.815170  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf800 0xc0003bf840]
E0319 15:46:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:46:23.409762  543705 memory.go:184] no items to output this cycle
I0319 15:46:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:46:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:46:33.409774  543705 memory.go:184] no items to output this cycle
I0319 15:46:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 15:46:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:46:43.409805  543705 memory.go:191] Add success.
I0319 15:46:43.409821  543705 cpu.go:282] Add success.
I0319 15:46:43.419991  543705 net.go:648] Add success.
I0319 15:46:43.423129  543705 net.go:770] primary dev: ETH0
I0319 15:46:43.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:46:43.423154  543705 net.go:698] Add success.
I0319 15:46:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:46:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:46:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:46:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:46:53.409773  543705 memory.go:184] no items to output this cycle
I0319 15:46:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 15:47:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:47:03.409767  543705 memory.go:184] no items to output this cycle
I0319 15:47:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:47:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:47:13.409792  543705 memory.go:191] Add success.
I0319 15:47:13.409815  543705 cpu.go:282] Add success.
W0319 15:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:47:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:47:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:47:13.420200  543705 net.go:648] Add success.
I0319 15:47:13.422972  543705 net.go:770] primary dev: ETH0
I0319 15:47:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:47:13.423002  543705 net.go:698] Add success.
I0319 15:47:13.453563  543705 event_worker.go:152] Polling the log file for events...
W0319 15:47:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:47:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 15:47:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:47:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:47:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:47:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:47:14.457018  543705 disk_worker.go:494] system disk:vda1
I0319 15:47:14.457060  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:47:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:47:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:47:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:47:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:47:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:47:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:47:16.472351  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:47:19.816857  543705 disk_info.go:125] begin check local disk info of client
I0319 15:47:19.819275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:47:19.819282  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003529c0 0xc000352a00]
E0319 15:47:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:47:23.409792  543705 memory.go:184] no items to output this cycle
I0319 15:47:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 15:47:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:47:33.409796  543705 memory.go:184] no items to output this cycle
I0319 15:47:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 15:47:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:47:43.409799  543705 memory.go:191] Add success.
I0319 15:47:43.409817  543705 cpu.go:282] Add success.
I0319 15:47:43.419987  543705 net.go:648] Add success.
I0319 15:47:43.422900  543705 net.go:770] primary dev: ETH0
I0319 15:47:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:47:43.422930  543705 net.go:698] Add success.
I0319 15:47:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:47:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:47:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:47:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:47:53.409770  543705 memory.go:184] no items to output this cycle
I0319 15:47:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 15:48:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:48:03.409768  543705 memory.go:184] no items to output this cycle
I0319 15:48:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:48:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:48:13.409797  543705 memory.go:191] Add success.
I0319 15:48:13.409799  543705 cpu.go:282] Add success.
W0319 15:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:48:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:48:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:48:13.420060  543705 net.go:648] Add success.
I0319 15:48:13.422965  543705 net.go:770] primary dev: ETH0
I0319 15:48:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:48:13.422999  543705 net.go:698] Add success.
I0319 15:48:13.549438  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"df6cc837-9db0-4117-89c5-38dce1823315","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:48:13.549476  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:48:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:48:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:48:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 15:48:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:48:14.456546  543705 disk_worker.go:494] system disk:vda1
I0319 15:48:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:48:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:48:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:48:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:48:19.820734  543705 disk_info.go:125] begin check local disk info of client
I0319 15:48:19.823177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:48:19.823183  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b8c0 0xc00027b900]
E0319 15:48:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:48:23.409810  543705 memory.go:184] no items to output this cycle
I0319 15:48:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 15:48:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:48:33.409781  543705 memory.go:184] no items to output this cycle
I0319 15:48:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 15:48:37.756482  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:48:37.756488  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:48:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:48:43.410729  543705 memory.go:191] Add success.
I0319 15:48:43.409836  543705 cpu.go:282] Add success.
I0319 15:48:43.420468  543705 net.go:648] Add success.
I0319 15:48:43.423150  543705 net.go:770] primary dev: ETH0
I0319 15:48:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:48:43.423176  543705 net.go:698] Add success.
I0319 15:48:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:48:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:48:53.409769  543705 memory.go:184] no items to output this cycle
I0319 15:48:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 15:49:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:49:03.409788  543705 memory.go:184] no items to output this cycle
I0319 15:49:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 15:49:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:49:13.409790  543705 memory.go:191] Add success.
I0319 15:49:13.409810  543705 cpu.go:282] Add success.
W0319 15:49:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:49:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:49:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:49:13.420233  543705 net.go:648] Add success.
I0319 15:49:13.423025  543705 net.go:770] primary dev: ETH0
I0319 15:49:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:49:13.423057  543705 net.go:698] Add success.
I0319 15:49:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:49:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:49:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 15:49:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:49:14.456494  543705 disk_worker.go:494] system disk:vda1
I0319 15:49:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:49:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:49:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:49:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:49:16.472496  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:49:19.824757  543705 disk_info.go:125] begin check local disk info of client
I0319 15:49:19.827216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:49:19.827223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cd680 0xc0004cd6c0]
E0319 15:49:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:49:23.409769  543705 memory.go:184] no items to output this cycle
I0319 15:49:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:49:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:49:33.409862  543705 cpu.go:275] no items to output this cycle
I0319 15:49:33.409889  543705 memory.go:184] no items to output this cycle
E0319 15:49:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:49:43.409823  543705 memory.go:191] Add success.
I0319 15:49:43.409828  543705 cpu.go:282] Add success.
I0319 15:49:43.420006  543705 net.go:648] Add success.
I0319 15:49:43.423166  543705 net.go:770] primary dev: ETH0
I0319 15:49:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:49:43.423194  543705 net.go:698] Add success.
I0319 15:49:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:49:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:49:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:49:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:49:53.409801  543705 memory.go:184] no items to output this cycle
I0319 15:49:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 15:50:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:50:03.409800  543705 memory.go:184] no items to output this cycle
I0319 15:50:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 15:50:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:50:13.409805  543705 memory.go:191] Add success.
I0319 15:50:13.409815  543705 cpu.go:282] Add success.
W0319 15:50:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:50:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:50:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:50:13.420177  543705 net.go:648] Add success.
I0319 15:50:13.422952  543705 net.go:770] primary dev: ETH0
I0319 15:50:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:50:13.422980  543705 net.go:698] Add success.
I0319 15:50:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:50:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:50:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 15:50:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:50:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 15:50:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:50:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:50:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:50:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:50:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:50:19.828778  543705 disk_info.go:125] begin check local disk info of client
I0319 15:50:19.831243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:50:19.831249  543705 disk_info.go:196] parse disk info done, disk is : [0xc000204cc0 0xc000204d00]
E0319 15:50:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:50:23.409774  543705 memory.go:184] no items to output this cycle
I0319 15:50:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 15:50:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:50:33.409779  543705 memory.go:184] no items to output this cycle
I0319 15:50:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 15:50:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:50:43.409798  543705 memory.go:191] Add success.
I0319 15:50:43.409832  543705 cpu.go:282] Add success.
I0319 15:50:43.420065  543705 net.go:648] Add success.
I0319 15:50:43.422828  543705 net.go:770] primary dev: ETH0
I0319 15:50:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:50:43.422854  543705 net.go:698] Add success.
I0319 15:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:50:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:50:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:50:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:50:53.409780  543705 memory.go:184] no items to output this cycle
I0319 15:50:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 15:51:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:51:03.409777  543705 memory.go:184] no items to output this cycle
I0319 15:51:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 15:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:51:13.409799  543705 memory.go:191] Add success.
I0319 15:51:13.409800  543705 cpu.go:282] Add success.
W0319 15:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:51:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:51:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:51:13.420188  543705 net.go:648] Add success.
I0319 15:51:13.423220  543705 net.go:770] primary dev: ETH0
I0319 15:51:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:51:13.423245  543705 net.go:698] Add success.
I0319 15:51:13.468206  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bb5e6b89-4f42-4225-9d02-6691d8b5f565","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:51:13.468242  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:51:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:51:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 15:51:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:51:14.456829  543705 disk_worker.go:494] system disk:vda1
I0319 15:51:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:51:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:51:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:51:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:51:19.832787  543705 disk_info.go:125] begin check local disk info of client
I0319 15:51:19.835263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:51:19.835270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2240 0xc0003e2280]
E0319 15:51:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:51:23.409764  543705 memory.go:184] no items to output this cycle
I0319 15:51:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 15:51:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:51:33.409770  543705 memory.go:184] no items to output this cycle
I0319 15:51:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 15:51:37.757486  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:51:37.757493  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:51:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:51:43.410762  543705 memory.go:191] Add success.
I0319 15:51:43.409832  543705 cpu.go:282] Add success.
I0319 15:51:43.420488  543705 net.go:648] Add success.
I0319 15:51:43.423464  543705 net.go:770] primary dev: ETH0
I0319 15:51:43.423476  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:51:43.423489  543705 net.go:698] Add success.
I0319 15:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:51:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:51:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:51:53.409772  543705 memory.go:184] no items to output this cycle
I0319 15:51:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 15:52:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:52:03.409794  543705 memory.go:184] no items to output this cycle
I0319 15:52:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 15:52:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:52:13.409801  543705 memory.go:191] Add success.
I0319 15:52:13.409811  543705 cpu.go:282] Add success.
W0319 15:52:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:52:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:52:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:52:13.420060  543705 net.go:648] Add success.
I0319 15:52:13.422819  543705 net.go:770] primary dev: ETH0
I0319 15:52:13.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:52:13.422845  543705 net.go:698] Add success.
W0319 15:52:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:52:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 15:52:14.455207  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:52:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:52:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:52:14.455938  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:52:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 15:52:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:52:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:52:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:52:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:52:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:52:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:52:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:52:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:52:19.836814  543705 disk_info.go:125] begin check local disk info of client
I0319 15:52:19.839215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:52:19.839221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329a00 0xc000329a40]
E0319 15:52:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:52:23.409790  543705 memory.go:184] no items to output this cycle
I0319 15:52:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 15:52:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:52:33.409900  543705 cpu.go:275] no items to output this cycle
I0319 15:52:33.409906  543705 memory.go:184] no items to output this cycle
E0319 15:52:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:52:43.409828  543705 memory.go:191] Add success.
I0319 15:52:43.409832  543705 cpu.go:282] Add success.
I0319 15:52:43.419877  543705 net.go:770] primary dev: ETH0
I0319 15:52:43.419891  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:52:43.419907  543705 net.go:698] Add success.
I0319 15:52:43.420266  543705 net.go:648] Add success.
I0319 15:52:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:52:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:52:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:52:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:52:53.409784  543705 memory.go:184] no items to output this cycle
I0319 15:52:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 15:53:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:53:03.409799  543705 memory.go:184] no items to output this cycle
I0319 15:53:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 15:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:53:13.409791  543705 memory.go:191] Add success.
I0319 15:53:13.409809  543705 cpu.go:282] Add success.
W0319 15:53:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:53:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:53:13.420230  543705 net.go:648] Add success.
I0319 15:53:13.423348  543705 net.go:770] primary dev: ETH0
I0319 15:53:13.423361  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:53:13.423373  543705 net.go:698] Add success.
I0319 15:53:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:53:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:53:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 15:53:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:53:14.456506  543705 disk_worker.go:494] system disk:vda1
I0319 15:53:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:53:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:53:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:53:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:53:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:53:19.840842  543705 disk_info.go:125] begin check local disk info of client
I0319 15:53:19.843266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:53:19.843272  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e5c00 0xc0000e5c40]
E0319 15:53:23.410387  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:53:23.410401  543705 memory.go:184] no items to output this cycle
I0319 15:53:23.410430  543705 cpu.go:275] no items to output this cycle
E0319 15:53:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:53:33.409777  543705 memory.go:184] no items to output this cycle
I0319 15:53:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 15:53:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:53:43.409802  543705 memory.go:191] Add success.
I0319 15:53:43.409821  543705 cpu.go:282] Add success.
I0319 15:53:43.420068  543705 net.go:648] Add success.
I0319 15:53:43.423156  543705 net.go:770] primary dev: ETH0
I0319 15:53:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:53:43.423181  543705 net.go:698] Add success.
I0319 15:53:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:53:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:53:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:53:53.410200  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:53:53.410216  543705 memory.go:184] no items to output this cycle
I0319 15:53:53.410235  543705 cpu.go:275] no items to output this cycle
E0319 15:54:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:54:03.409789  543705 memory.go:184] no items to output this cycle
I0319 15:54:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 15:54:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:54:13.409794  543705 memory.go:191] Add success.
W0319 15:54:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:54:13.409827  543705 cpu.go:282] Add success.
W0319 15:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:54:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:54:13.420200  543705 net.go:648] Add success.
I0319 15:54:13.422696  543705 net.go:770] primary dev: ETH0
I0319 15:54:13.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:54:13.422728  543705 net.go:698] Add success.
I0319 15:54:13.463162  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"679f03f7-203c-41cc-a624-acd7e2ac3046","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:54:13.463196  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 15:54:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:54:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:54:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 15:54:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:54:14.456515  543705 disk_worker.go:494] system disk:vda1
I0319 15:54:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:54:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:54:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:54:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:54:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:54:19.843355  543705 disk_info.go:125] begin check local disk info of client
I0319 15:54:19.845817  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:54:19.845823  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329e80 0xc000329ec0]
E0319 15:54:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:54:23.409776  543705 memory.go:184] no items to output this cycle
I0319 15:54:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:54:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:54:33.409821  543705 memory.go:184] no items to output this cycle
I0319 15:54:33.409832  543705 cpu.go:275] no items to output this cycle
I0319 15:54:37.757733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:54:37.757750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0319 15:54:43.409921  543705 cpu.go:282] Add success.
E0319 15:54:43.409958  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:54:43.410942  543705 memory.go:191] Add success.
I0319 15:54:43.419744  543705 net.go:648] Add success.
I0319 15:54:43.422377  543705 net.go:770] primary dev: ETH0
I0319 15:54:43.422396  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:54:43.422411  543705 net.go:698] Add success.
I0319 15:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:54:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:54:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:54:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:54:53.409808  543705 memory.go:184] no items to output this cycle
I0319 15:54:53.409820  543705 cpu.go:275] no items to output this cycle
E0319 15:55:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:55:03.409804  543705 memory.go:184] no items to output this cycle
I0319 15:55:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 15:55:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:55:13.409817  543705 cpu.go:282] Add success.
I0319 15:55:13.409819  543705 memory.go:191] Add success.
W0319 15:55:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:55:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:55:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:55:13.420157  543705 net.go:648] Add success.
I0319 15:55:13.422890  543705 net.go:770] primary dev: ETH0
I0319 15:55:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:55:13.422915  543705 net.go:698] Add success.
I0319 15:55:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:55:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:55:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 15:55:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:55:14.456612  543705 disk_worker.go:494] system disk:vda1
I0319 15:55:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:55:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:55:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:55:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:55:19.847862  543705 disk_info.go:125] begin check local disk info of client
I0319 15:55:19.850261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:55:19.850268  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd7c0 0xc0002bd800]
E0319 15:55:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:55:23.409791  543705 memory.go:184] no items to output this cycle
I0319 15:55:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 15:55:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:55:33.409799  543705 memory.go:184] no items to output this cycle
I0319 15:55:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 15:55:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:55:43.409787  543705 memory.go:191] Add success.
I0319 15:55:43.409817  543705 cpu.go:282] Add success.
I0319 15:55:43.419977  543705 net.go:648] Add success.
I0319 15:55:43.422881  543705 net.go:770] primary dev: ETH0
I0319 15:55:43.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:55:43.422906  543705 net.go:698] Add success.
I0319 15:55:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:55:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:55:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:55:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:55:53.409773  543705 memory.go:184] no items to output this cycle
I0319 15:55:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:56:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:56:03.409767  543705 memory.go:184] no items to output this cycle
I0319 15:56:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 15:56:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:56:13.409822  543705 memory.go:191] Add success.
I0319 15:56:13.409842  543705 cpu.go:282] Add success.
W0319 15:56:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:56:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:56:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:56:13.420203  543705 net.go:648] Add success.
I0319 15:56:13.422922  543705 net.go:770] primary dev: ETH0
I0319 15:56:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:56:13.422947  543705 net.go:698] Add success.
I0319 15:56:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:56:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:56:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 15:56:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:56:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 15:56:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:56:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:56:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:56:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:56:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:56:19.851883  543705 disk_info.go:125] begin check local disk info of client
I0319 15:56:19.854334  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:56:19.854341  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329400 0xc000329440]
E0319 15:56:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:56:23.409763  543705 memory.go:184] no items to output this cycle
I0319 15:56:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 15:56:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:56:33.409771  543705 memory.go:184] no items to output this cycle
I0319 15:56:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:56:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:56:43.409797  543705 memory.go:191] Add success.
I0319 15:56:43.409800  543705 cpu.go:282] Add success.
I0319 15:56:43.419880  543705 net.go:648] Add success.
I0319 15:56:43.422784  543705 net.go:770] primary dev: ETH0
I0319 15:56:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:56:43.422819  543705 net.go:698] Add success.
I0319 15:56:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:56:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:56:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:56:53.409798  543705 memory.go:184] no items to output this cycle
I0319 15:56:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 15:57:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:57:03.409809  543705 memory.go:184] no items to output this cycle
I0319 15:57:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 15:57:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:57:13.409790  543705 memory.go:191] Add success.
W0319 15:57:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 15:57:13.409824  543705 cpu.go:282] Add success.
W0319 15:57:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:57:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:57:13.420206  543705 net.go:648] Add success.
I0319 15:57:13.423084  543705 net.go:770] primary dev: ETH0
I0319 15:57:13.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:57:13.423113  543705 net.go:698] Add success.
I0319 15:57:13.429427  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 15:57:13.453618  543705 event_worker.go:152] Polling the log file for events...
I0319 15:57:13.464327  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"272ddf84-f77b-4a8b-8b46-352305f92fb5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 15:57:13.464363  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 15:57:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:57:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 15:57:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0319 15:57:14.456135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 15:57:14.456145  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 15:57:14.456151  543705 custom_config.go:64] query custom config with name: gpu
I0319 15:57:14.456512  543705 disk_worker.go:494] system disk:vda1
I0319 15:57:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 15:57:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 15:57:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:57:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 15:57:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 15:57:16.458011  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:57:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:57:16.472454  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:57:19.855910  543705 disk_info.go:125] begin check local disk info of client
I0319 15:57:19.858298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:57:19.858303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6540 0xc0003b6580]
E0319 15:57:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:57:23.409761  543705 memory.go:184] no items to output this cycle
I0319 15:57:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 15:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:57:33.409775  543705 memory.go:184] no items to output this cycle
I0319 15:57:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 15:57:37.759496  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 15:57:37.759503  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 15:57:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:57:43.410735  543705 memory.go:191] Add success.
I0319 15:57:43.409807  543705 cpu.go:282] Add success.
I0319 15:57:43.420450  543705 net.go:648] Add success.
I0319 15:57:43.423195  543705 net.go:770] primary dev: ETH0
I0319 15:57:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:57:43.423223  543705 net.go:698] Add success.
I0319 15:57:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:57:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:57:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:57:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:57:53.409801  543705 memory.go:184] no items to output this cycle
I0319 15:57:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 15:58:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:58:03.409892  543705 memory.go:184] no items to output this cycle
I0319 15:58:03.409921  543705 cpu.go:275] no items to output this cycle
E0319 15:58:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:58:13.409801  543705 memory.go:191] Add success.
I0319 15:58:13.409812  543705 cpu.go:282] Add success.
W0319 15:58:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:58:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:58:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:58:13.420300  543705 net.go:648] Add success.
I0319 15:58:13.422802  543705 net.go:770] primary dev: ETH0
I0319 15:58:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:58:13.422829  543705 net.go:698] Add success.
I0319 15:58:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:58:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:58:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 15:58:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:58:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 15:58:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:58:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:58:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:58:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:58:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:58:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:58:19.859927  543705 disk_info.go:125] begin check local disk info of client
I0319 15:58:19.862336  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:58:19.862342  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa6c0 0xc0001aa700]
E0319 15:58:23.410228  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:58:23.410242  543705 memory.go:184] no items to output this cycle
I0319 15:58:23.410275  543705 cpu.go:275] no items to output this cycle
E0319 15:58:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:58:33.409779  543705 memory.go:184] no items to output this cycle
I0319 15:58:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 15:58:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:58:43.409789  543705 memory.go:191] Add success.
I0319 15:58:43.409806  543705 cpu.go:282] Add success.
I0319 15:58:43.419972  543705 net.go:648] Add success.
I0319 15:58:43.422979  543705 net.go:770] primary dev: ETH0
I0319 15:58:43.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:58:43.423006  543705 net.go:698] Add success.
I0319 15:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:58:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:58:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:58:53.409773  543705 memory.go:184] no items to output this cycle
I0319 15:58:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 15:59:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:59:03.409797  543705 memory.go:184] no items to output this cycle
I0319 15:59:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 15:59:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:59:13.409805  543705 memory.go:191] Add success.
I0319 15:59:13.409823  543705 cpu.go:282] Add success.
W0319 15:59:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 15:59:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 15:59:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 15:59:13.420223  543705 net.go:648] Add success.
I0319 15:59:13.423388  543705 net.go:770] primary dev: ETH0
I0319 15:59:13.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:59:13.423416  543705 net.go:698] Add success.
I0319 15:59:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 15:59:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 15:59:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0319 15:59:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0319 15:59:14.456606  543705 disk_worker.go:494] system disk:vda1
I0319 15:59:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 15:59:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 15:59:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:59:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 15:59:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 15:59:19.863956  543705 disk_info.go:125] begin check local disk info of client
I0319 15:59:19.866425  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 15:59:19.866431  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9980 0xc0004d99c0]
E0319 15:59:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:59:23.409795  543705 memory.go:184] no items to output this cycle
I0319 15:59:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 15:59:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:59:33.409766  543705 memory.go:184] no items to output this cycle
I0319 15:59:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 15:59:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:59:43.409798  543705 memory.go:191] Add success.
I0319 15:59:43.409806  543705 cpu.go:282] Add success.
I0319 15:59:43.419967  543705 net.go:648] Add success.
I0319 15:59:43.422613  543705 net.go:770] primary dev: ETH0
I0319 15:59:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0319 15:59:43.422640  543705 net.go:698] Add success.
I0319 15:59:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 15:59:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 15:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 15:59:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 15:59:53.409798  543705 memory.go:184] no items to output this cycle
I0319 15:59:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:00:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:00:03.409767  543705 memory.go:184] no items to output this cycle
I0319 16:00:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 16:00:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:00:13.409807  543705 memory.go:191] Add success.
I0319 16:00:13.409809  543705 cpu.go:282] Add success.
W0319 16:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:00:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:00:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:00:13.420605  543705 net.go:648] Add success.
I0319 16:00:13.423404  543705 net.go:770] primary dev: ETH0
I0319 16:00:13.423416  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:00:13.423429  543705 net.go:698] Add success.
I0319 16:00:13.469071  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"40a0bc7a-aae7-456f-94d6-da19590d72ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:00:13.469103  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:00:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:00:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:00:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 16:00:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:00:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 16:00:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:00:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:00:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:00:19.867970  543705 disk_info.go:125] begin check local disk info of client
I0319 16:00:19.870405  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:00:19.870411  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa640 0xc0001aa680]
E0319 16:00:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:00:23.409796  543705 memory.go:184] no items to output this cycle
I0319 16:00:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:00:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:00:33.409777  543705 memory.go:184] no items to output this cycle
I0319 16:00:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 16:00:37.759640  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:00:37.759647  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:00:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:00:43.410654  543705 memory.go:191] Add success.
I0319 16:00:43.409830  543705 cpu.go:282] Add success.
I0319 16:00:43.420355  543705 net.go:648] Add success.
I0319 16:00:43.423159  543705 net.go:770] primary dev: ETH0
I0319 16:00:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:00:43.423187  543705 net.go:698] Add success.
I0319 16:00:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:00:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:00:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:00:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:00:53.409776  543705 memory.go:184] no items to output this cycle
I0319 16:00:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 16:01:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:01:03.409766  543705 memory.go:184] no items to output this cycle
I0319 16:01:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 16:01:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:01:13.409794  543705 memory.go:191] Add success.
W0319 16:01:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:01:13.409826  543705 cpu.go:282] Add success.
W0319 16:01:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:01:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:01:13.420137  543705 net.go:648] Add success.
I0319 16:01:13.423201  543705 net.go:770] primary dev: ETH0
I0319 16:01:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:01:13.423229  543705 net.go:698] Add success.
I0319 16:01:14.454946  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:01:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:01:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 16:01:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:01:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 16:01:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:01:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:01:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:01:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:01:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:01:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:01:19.870492  543705 disk_info.go:125] begin check local disk info of client
I0319 16:01:19.872957  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:01:19.872964  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492340 0xc000492380]
E0319 16:01:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:01:23.409793  543705 memory.go:184] no items to output this cycle
I0319 16:01:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 16:01:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:01:33.409774  543705 memory.go:184] no items to output this cycle
I0319 16:01:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:01:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:01:43.409804  543705 memory.go:191] Add success.
I0319 16:01:43.409816  543705 cpu.go:282] Add success.
I0319 16:01:43.419891  543705 net.go:648] Add success.
I0319 16:01:43.422489  543705 net.go:770] primary dev: ETH0
I0319 16:01:43.422503  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:01:43.422517  543705 net.go:698] Add success.
I0319 16:01:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:01:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:01:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:01:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:01:53.409773  543705 memory.go:184] no items to output this cycle
I0319 16:01:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 16:02:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:02:03.409776  543705 memory.go:184] no items to output this cycle
I0319 16:02:03.409782  543705 cpu.go:275] no items to output this cycle
W0319 16:02:13.409718  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:02:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:02:13.409741  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:02:13.409809  543705 cpu.go:282] Add success.
E0319 16:02:13.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:02:13.409863  543705 memory.go:191] Add success.
I0319 16:02:13.420179  543705 net.go:648] Add success.
I0319 16:02:13.422898  543705 net.go:770] primary dev: ETH0
I0319 16:02:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:02:13.422927  543705 net.go:698] Add success.
W0319 16:02:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:02:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 16:02:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:02:14.456184  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:02:14.456193  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:02:14.456200  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:02:14.456451  543705 disk_worker.go:494] system disk:vda1
I0319 16:02:14.456480  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:02:15.456890  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:02:15.456898  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:02:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:02:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:02:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:02:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:02:16.472344  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:02:19.873673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:02:19.876036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:02:19.876043  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee0c0 0xc0003ee100]
E0319 16:02:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:02:23.409762  543705 memory.go:184] no items to output this cycle
I0319 16:02:23.409781  543705 cpu.go:275] no items to output this cycle
E0319 16:02:33.410279  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:02:33.410297  543705 memory.go:184] no items to output this cycle
I0319 16:02:33.410312  543705 cpu.go:275] no items to output this cycle
E0319 16:02:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:02:43.409799  543705 memory.go:191] Add success.
I0319 16:02:43.409804  543705 cpu.go:282] Add success.
I0319 16:02:43.420032  543705 net.go:648] Add success.
I0319 16:02:43.422962  543705 net.go:770] primary dev: ETH0
I0319 16:02:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:02:43.422988  543705 net.go:698] Add success.
I0319 16:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:02:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:02:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:02:53.409769  543705 memory.go:184] no items to output this cycle
I0319 16:02:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 16:03:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:03:03.409799  543705 memory.go:184] no items to output this cycle
I0319 16:03:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:03:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:03:13.409797  543705 memory.go:191] Add success.
I0319 16:03:13.409820  543705 cpu.go:282] Add success.
W0319 16:03:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:03:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:03:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:03:13.420202  543705 net.go:648] Add success.
I0319 16:03:13.422910  543705 net.go:770] primary dev: ETH0
I0319 16:03:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:03:13.422939  543705 net.go:698] Add success.
I0319 16:03:13.726809  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c0bd437e-3483-4219-ad59-85c24d3f0a0c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:03:13.726846  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:03:14.454500  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:03:14.454748  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:03:14.454760  543705 disk_worker.go:708] disk space is not compliant
W0319 16:03:14.454762  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:03:14.456445  543705 disk_worker.go:494] system disk:vda1
I0319 16:03:14.456486  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:03:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:03:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:03:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:03:19.877675  543705 disk_info.go:125] begin check local disk info of client
I0319 16:03:19.880056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:03:19.880061  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4200 0xc0000c4240]
E0319 16:03:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:03:23.409775  543705 memory.go:184] no items to output this cycle
I0319 16:03:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 16:03:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:03:33.409781  543705 memory.go:184] no items to output this cycle
I0319 16:03:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 16:03:37.759785  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:03:37.759792  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:03:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:03:43.410618  543705 memory.go:191] Add success.
I0319 16:03:43.409823  543705 cpu.go:282] Add success.
I0319 16:03:43.420387  543705 net.go:648] Add success.
I0319 16:03:43.423156  543705 net.go:770] primary dev: ETH0
I0319 16:03:43.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:03:43.423186  543705 net.go:698] Add success.
I0319 16:03:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:03:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:03:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:03:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:03:53.409777  543705 memory.go:184] no items to output this cycle
I0319 16:03:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:04:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:04:03.409774  543705 memory.go:184] no items to output this cycle
I0319 16:04:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 16:04:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:04:13.409791  543705 memory.go:191] Add success.
W0319 16:04:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:04:13.409822  543705 cpu.go:282] Add success.
W0319 16:04:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:04:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:04:13.420192  543705 net.go:648] Add success.
I0319 16:04:13.423201  543705 net.go:770] primary dev: ETH0
I0319 16:04:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:04:13.423225  543705 net.go:698] Add success.
I0319 16:04:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:04:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:04:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 16:04:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:04:14.456635  543705 disk_worker.go:494] system disk:vda1
I0319 16:04:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:04:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:04:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:04:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:04:19.881680  543705 disk_info.go:125] begin check local disk info of client
I0319 16:04:19.884039  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:04:19.884045  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348f80 0xc000348fc0]
I0319 16:04:23.409870  543705 cpu.go:275] no items to output this cycle
E0319 16:04:23.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:04:23.409928  543705 memory.go:184] no items to output this cycle
E0319 16:04:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:04:33.409810  543705 memory.go:184] no items to output this cycle
I0319 16:04:33.409823  543705 cpu.go:275] no items to output this cycle
E0319 16:04:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:04:43.409827  543705 memory.go:191] Add success.
I0319 16:04:43.409837  543705 cpu.go:282] Add success.
I0319 16:04:43.420025  543705 net.go:648] Add success.
I0319 16:04:43.422750  543705 net.go:770] primary dev: ETH0
I0319 16:04:43.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:04:43.422777  543705 net.go:698] Add success.
I0319 16:04:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:04:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:04:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:04:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:04:53.409772  543705 memory.go:184] no items to output this cycle
I0319 16:04:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 16:05:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:05:03.409811  543705 memory.go:184] no items to output this cycle
I0319 16:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 16:05:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:05:13.409801  543705 memory.go:191] Add success.
I0319 16:05:13.409825  543705 cpu.go:282] Add success.
W0319 16:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:05:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:05:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:05:13.420161  543705 net.go:648] Add success.
I0319 16:05:13.423081  543705 net.go:770] primary dev: ETH0
I0319 16:05:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:05:13.423106  543705 net.go:698] Add success.
I0319 16:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:05:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:05:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 16:05:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:05:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 16:05:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:05:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:05:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:05:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:05:19.885672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:05:19.888068  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:05:19.888075  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047d500 0xc00047d540]
E0319 16:05:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:05:23.409899  543705 memory.go:184] no items to output this cycle
I0319 16:05:23.409928  543705 cpu.go:275] no items to output this cycle
E0319 16:05:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:05:33.409818  543705 memory.go:184] no items to output this cycle
I0319 16:05:33.409827  543705 cpu.go:275] no items to output this cycle
E0319 16:05:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:05:43.409799  543705 memory.go:191] Add success.
I0319 16:05:43.409810  543705 cpu.go:282] Add success.
I0319 16:05:43.420056  543705 net.go:648] Add success.
I0319 16:05:43.423111  543705 net.go:770] primary dev: ETH0
I0319 16:05:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:05:43.423136  543705 net.go:698] Add success.
I0319 16:05:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:05:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:05:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:05:53.409782  543705 memory.go:184] no items to output this cycle
I0319 16:05:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:06:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:06:03.409808  543705 memory.go:184] no items to output this cycle
I0319 16:06:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 16:06:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:06:13.409802  543705 memory.go:191] Add success.
W0319 16:06:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:06:13.409839  543705 cpu.go:282] Add success.
W0319 16:06:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:06:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:06:13.420142  543705 net.go:648] Add success.
I0319 16:06:13.423151  543705 net.go:770] primary dev: ETH0
I0319 16:06:13.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:06:13.423175  543705 net.go:698] Add success.
I0319 16:06:13.464445  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0d1fbad2-9a76-4076-83f5-6f36baecbb78","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:06:13.464478  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:06:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:06:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:06:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 16:06:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:06:14.456553  543705 disk_worker.go:494] system disk:vda1
I0319 16:06:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:06:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:06:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:06:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:06:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:06:19.889672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:06:19.892103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:06:19.892109  543705 disk_info.go:196] parse disk info done, disk is : [0xc000286cc0 0xc000286d00]
E0319 16:06:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:06:23.409907  543705 memory.go:184] no items to output this cycle
I0319 16:06:23.409914  543705 cpu.go:275] no items to output this cycle
E0319 16:06:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:06:33.409806  543705 memory.go:184] no items to output this cycle
I0319 16:06:33.409831  543705 cpu.go:275] no items to output this cycle
I0319 16:06:37.761502  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:06:37.761509  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:06:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:06:43.410726  543705 memory.go:191] Add success.
I0319 16:06:43.409810  543705 cpu.go:282] Add success.
I0319 16:06:43.420464  543705 net.go:648] Add success.
I0319 16:06:43.423212  543705 net.go:770] primary dev: ETH0
I0319 16:06:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:06:43.423239  543705 net.go:698] Add success.
I0319 16:06:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:06:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:06:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:06:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:06:53.409779  543705 memory.go:184] no items to output this cycle
I0319 16:06:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 16:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:07:03.409777  543705 memory.go:184] no items to output this cycle
I0319 16:07:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:07:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:07:13.409791  543705 memory.go:191] Add success.
I0319 16:07:13.409819  543705 cpu.go:282] Add success.
W0319 16:07:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:07:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:07:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:07:13.420327  543705 net.go:648] Add success.
I0319 16:07:13.423325  543705 net.go:770] primary dev: ETH0
I0319 16:07:13.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:07:13.423352  543705 net.go:698] Add success.
I0319 16:07:13.453035  543705 event_worker.go:152] Polling the log file for events...
W0319 16:07:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:07:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 16:07:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:07:14.456996  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:07:14.457006  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:07:14.457013  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:07:14.457055  543705 disk_worker.go:494] system disk:vda1
I0319 16:07:14.457084  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:07:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:07:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:07:16.457900  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:07:16.457900  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:07:16.457953  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:07:16.457973  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:07:16.472271  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:07:19.893674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:07:19.896037  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:07:19.896043  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358880 0xc0003588c0]
E0319 16:07:23.410208  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:07:23.410223  543705 memory.go:184] no items to output this cycle
I0319 16:07:23.410237  543705 cpu.go:275] no items to output this cycle
E0319 16:07:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:07:33.409790  543705 memory.go:184] no items to output this cycle
I0319 16:07:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 16:07:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:07:43.409775  543705 memory.go:191] Add success.
I0319 16:07:43.409818  543705 cpu.go:282] Add success.
I0319 16:07:43.420058  543705 net.go:648] Add success.
I0319 16:07:43.422910  543705 net.go:770] primary dev: ETH0
I0319 16:07:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:07:43.422948  543705 net.go:698] Add success.
I0319 16:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:07:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:07:53.409784  543705 memory.go:184] no items to output this cycle
I0319 16:07:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 16:08:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:08:03.409770  543705 memory.go:184] no items to output this cycle
I0319 16:08:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 16:08:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:08:13.409802  543705 memory.go:191] Add success.
I0319 16:08:13.409822  543705 cpu.go:282] Add success.
W0319 16:08:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:08:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:08:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:08:13.420253  543705 net.go:648] Add success.
I0319 16:08:13.422938  543705 net.go:770] primary dev: ETH0
I0319 16:08:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:08:13.422962  543705 net.go:698] Add success.
I0319 16:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:08:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:08:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 16:08:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:08:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 16:08:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:08:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:08:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:08:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:08:19.897673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:08:19.899988  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:08:19.899994  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f540 0xc00035f580]
E0319 16:08:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:08:23.409754  543705 memory.go:184] no items to output this cycle
I0319 16:08:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:08:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:08:33.409808  543705 memory.go:184] no items to output this cycle
I0319 16:08:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 16:08:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:08:43.409804  543705 memory.go:191] Add success.
I0319 16:08:43.409828  543705 cpu.go:282] Add success.
I0319 16:08:43.420090  543705 net.go:648] Add success.
I0319 16:08:43.422633  543705 net.go:770] primary dev: ETH0
I0319 16:08:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:08:43.422674  543705 net.go:698] Add success.
I0319 16:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:08:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:08:53.409800  543705 memory.go:184] no items to output this cycle
I0319 16:08:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:09:03.409781  543705 memory.go:184] no items to output this cycle
I0319 16:09:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 16:09:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:09:13.409838  543705 memory.go:191] Add success.
I0319 16:09:13.409846  543705 cpu.go:282] Add success.
W0319 16:09:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:09:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:09:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:09:13.420158  543705 net.go:648] Add success.
I0319 16:09:13.423175  543705 net.go:770] primary dev: ETH0
I0319 16:09:13.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:09:13.423207  543705 net.go:698] Add success.
I0319 16:09:13.469092  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6bfe0ab6-61fd-4e86-b1d1-2e2ce07421da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:09:13.469127  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:09:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:09:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 16:09:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:09:14.456729  543705 disk_worker.go:494] system disk:vda1
I0319 16:09:14.456757  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:09:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:09:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:09:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:09:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:09:19.901201  543705 disk_info.go:125] begin check local disk info of client
I0319 16:09:19.903623  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:09:19.903629  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c700 0xc00034c740]
E0319 16:09:23.410764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:09:23.410778  543705 memory.go:184] no items to output this cycle
I0319 16:09:23.410785  543705 cpu.go:275] no items to output this cycle
E0319 16:09:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:09:33.409785  543705 memory.go:184] no items to output this cycle
I0319 16:09:33.409808  543705 cpu.go:275] no items to output this cycle
I0319 16:09:37.761728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:09:37.761744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:09:43.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:09:43.410807  543705 memory.go:191] Add success.
I0319 16:09:43.409936  543705 cpu.go:282] Add success.
I0319 16:09:43.419731  543705 net.go:648] Add success.
I0319 16:09:43.423016  543705 net.go:770] primary dev: ETH0
I0319 16:09:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:09:43.423041  543705 net.go:698] Add success.
I0319 16:09:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:09:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:09:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:09:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:09:53.409786  543705 memory.go:184] no items to output this cycle
I0319 16:09:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:10:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:10:03.409800  543705 memory.go:184] no items to output this cycle
I0319 16:10:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 16:10:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:10:13.409814  543705 memory.go:191] Add success.
I0319 16:10:13.409815  543705 cpu.go:282] Add success.
W0319 16:10:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:10:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:10:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:10:13.420333  543705 net.go:648] Add success.
I0319 16:10:13.423138  543705 net.go:770] primary dev: ETH0
I0319 16:10:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:10:13.423164  543705 net.go:698] Add success.
I0319 16:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:10:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:10:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 16:10:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:10:14.456511  543705 disk_worker.go:494] system disk:vda1
I0319 16:10:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:10:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:10:19.905149  543705 disk_info.go:125] begin check local disk info of client
I0319 16:10:19.907566  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:10:19.907572  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aa040 0xc0003aa080]
E0319 16:10:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:10:23.409812  543705 memory.go:184] no items to output this cycle
I0319 16:10:23.409822  543705 cpu.go:275] no items to output this cycle
E0319 16:10:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:10:33.409797  543705 memory.go:184] no items to output this cycle
I0319 16:10:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:10:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:10:43.409796  543705 memory.go:191] Add success.
I0319 16:10:43.409815  543705 cpu.go:282] Add success.
I0319 16:10:43.420079  543705 net.go:648] Add success.
I0319 16:10:43.422604  543705 net.go:770] primary dev: ETH0
I0319 16:10:43.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:10:43.422629  543705 net.go:698] Add success.
I0319 16:10:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:10:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:10:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:10:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:10:53.409782  543705 memory.go:184] no items to output this cycle
I0319 16:10:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 16:11:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:11:03.409778  543705 memory.go:184] no items to output this cycle
I0319 16:11:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 16:11:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:11:13.409804  543705 memory.go:191] Add success.
I0319 16:11:13.409804  543705 cpu.go:282] Add success.
W0319 16:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:11:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:11:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:11:13.420125  543705 net.go:648] Add success.
I0319 16:11:13.422942  543705 net.go:770] primary dev: ETH0
I0319 16:11:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:11:13.422967  543705 net.go:698] Add success.
I0319 16:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:11:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:11:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 16:11:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:11:14.456587  543705 disk_worker.go:494] system disk:vda1
I0319 16:11:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:11:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:11:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:11:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:11:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:11:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:11:19.909227  543705 disk_info.go:125] begin check local disk info of client
I0319 16:11:19.911632  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:11:19.911638  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7dc0 0xc0003b7e00]
E0319 16:11:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:11:23.409800  543705 memory.go:184] no items to output this cycle
I0319 16:11:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:11:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:11:33.409770  543705 memory.go:184] no items to output this cycle
I0319 16:11:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 16:11:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:11:43.409830  543705 memory.go:191] Add success.
I0319 16:11:43.409837  543705 cpu.go:282] Add success.
I0319 16:11:43.420008  543705 net.go:648] Add success.
I0319 16:11:43.422645  543705 net.go:770] primary dev: ETH0
I0319 16:11:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:11:43.422671  543705 net.go:698] Add success.
I0319 16:11:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:11:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:11:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:11:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:11:53.409764  543705 memory.go:184] no items to output this cycle
I0319 16:11:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:12:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:12:03.409795  543705 memory.go:184] no items to output this cycle
I0319 16:12:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:12:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:12:13.409824  543705 memory.go:191] Add success.
I0319 16:12:13.409834  543705 cpu.go:282] Add success.
W0319 16:12:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:12:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:12:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:12:13.420233  543705 net.go:648] Add success.
I0319 16:12:13.422900  543705 net.go:770] primary dev: ETH0
I0319 16:12:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:12:13.422925  543705 net.go:698] Add success.
I0319 16:12:13.463465  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84746cdd-7652-414d-b89f-e455bbc50cdb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:12:13.463499  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 16:12:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:12:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 16:12:14.455199  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:12:14.455951  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:12:14.455959  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:12:14.455964  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:12:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 16:12:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:12:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:12:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:12:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:12:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:12:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:12:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:12:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:12:19.913195  543705 disk_info.go:125] begin check local disk info of client
I0319 16:12:19.915577  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:12:19.915583  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314ec0 0xc000314f00]
E0319 16:12:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:12:23.409813  543705 memory.go:184] no items to output this cycle
I0319 16:12:23.409826  543705 cpu.go:275] no items to output this cycle
E0319 16:12:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:12:33.409794  543705 memory.go:184] no items to output this cycle
I0319 16:12:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 16:12:37.763514  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:12:37.763521  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:12:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:12:43.410616  543705 memory.go:191] Add success.
I0319 16:12:43.409792  543705 cpu.go:282] Add success.
I0319 16:12:43.420353  543705 net.go:648] Add success.
I0319 16:12:43.422926  543705 net.go:770] primary dev: ETH0
I0319 16:12:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:12:43.422951  543705 net.go:698] Add success.
I0319 16:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:12:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:12:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:12:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:12:53.409780  543705 cpu.go:275] no items to output this cycle
I0319 16:12:53.409782  543705 memory.go:184] no items to output this cycle
E0319 16:13:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:13:03.409796  543705 memory.go:184] no items to output this cycle
I0319 16:13:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:13:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:13:13.409814  543705 memory.go:191] Add success.
I0319 16:13:13.409817  543705 cpu.go:282] Add success.
W0319 16:13:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:13:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:13:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:13:13.420168  543705 net.go:648] Add success.
I0319 16:13:13.422849  543705 net.go:770] primary dev: ETH0
I0319 16:13:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:13:13.422875  543705 net.go:698] Add success.
I0319 16:13:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:13:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:13:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 16:13:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:13:14.456581  543705 disk_worker.go:494] system disk:vda1
I0319 16:13:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:13:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:13:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:13:16.472415  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:13:19.917207  543705 disk_info.go:125] begin check local disk info of client
I0319 16:13:19.919661  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:13:19.919666  543705 disk_info.go:196] parse disk info done, disk is : [0xc000285400 0xc000285440]
E0319 16:13:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:13:23.409781  543705 memory.go:184] no items to output this cycle
I0319 16:13:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 16:13:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:13:33.409771  543705 memory.go:184] no items to output this cycle
I0319 16:13:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 16:13:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:13:43.409792  543705 memory.go:191] Add success.
I0319 16:13:43.409797  543705 cpu.go:282] Add success.
I0319 16:13:43.419918  543705 net.go:648] Add success.
I0319 16:13:43.422534  543705 net.go:770] primary dev: ETH0
I0319 16:13:43.422548  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:13:43.422560  543705 net.go:698] Add success.
I0319 16:13:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:13:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:13:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:13:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:13:53.409784  543705 memory.go:184] no items to output this cycle
I0319 16:13:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 16:14:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:14:03.409778  543705 memory.go:184] no items to output this cycle
I0319 16:14:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 16:14:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:14:13.409801  543705 memory.go:191] Add success.
I0319 16:14:13.409821  543705 cpu.go:282] Add success.
W0319 16:14:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:14:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:14:13.420247  543705 net.go:648] Add success.
I0319 16:14:13.422892  543705 net.go:770] primary dev: ETH0
I0319 16:14:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:14:13.422915  543705 net.go:698] Add success.
I0319 16:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:14:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:14:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0319 16:14:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:14:14.456629  543705 disk_worker.go:494] system disk:vda1
I0319 16:14:14.456661  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:14:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:14:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:14:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:14:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:14:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:14:19.921677  543705 disk_info.go:125] begin check local disk info of client
I0319 16:14:19.924103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:14:19.924110  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256c80 0xc000256cc0]
E0319 16:14:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:14:23.409800  543705 memory.go:184] no items to output this cycle
I0319 16:14:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:14:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:14:33.409776  543705 memory.go:184] no items to output this cycle
I0319 16:14:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 16:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:14:43.409793  543705 memory.go:191] Add success.
I0319 16:14:43.409810  543705 cpu.go:282] Add success.
I0319 16:14:43.420015  543705 net.go:648] Add success.
I0319 16:14:43.422761  543705 net.go:770] primary dev: ETH0
I0319 16:14:43.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:14:43.422787  543705 net.go:698] Add success.
I0319 16:14:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:14:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:14:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:14:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:14:53.409777  543705 memory.go:184] no items to output this cycle
I0319 16:14:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 16:15:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:15:03.409798  543705 memory.go:184] no items to output this cycle
I0319 16:15:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 16:15:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:15:13.409817  543705 memory.go:191] Add success.
I0319 16:15:13.409816  543705 cpu.go:282] Add success.
W0319 16:15:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:15:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:15:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:15:13.420148  543705 net.go:648] Add success.
I0319 16:15:13.422752  543705 net.go:770] primary dev: ETH0
I0319 16:15:13.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:15:13.422782  543705 net.go:698] Add success.
I0319 16:15:13.463802  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c7f799ab-845e-48a5-b75b-ba2d4b9a1f60","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:15:13.463835  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:15:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:15:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 16:15:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:15:14.456555  543705 disk_worker.go:494] system disk:vda1
I0319 16:15:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:15:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:15:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:15:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:15:16.472473  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:15:19.925677  543705 disk_info.go:125] begin check local disk info of client
I0319 16:15:19.928124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:15:19.928131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a42c0 0xc0002a4300]
E0319 16:15:23.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:15:23.410384  543705 memory.go:184] no items to output this cycle
I0319 16:15:23.410446  543705 cpu.go:275] no items to output this cycle
E0319 16:15:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:15:33.409780  543705 memory.go:184] no items to output this cycle
I0319 16:15:33.409791  543705 cpu.go:275] no items to output this cycle
I0319 16:15:37.764523  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:15:37.764530  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:15:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:15:43.410664  543705 memory.go:191] Add success.
I0319 16:15:43.409805  543705 cpu.go:282] Add success.
I0319 16:15:43.420367  543705 net.go:648] Add success.
I0319 16:15:43.423094  543705 net.go:770] primary dev: ETH0
I0319 16:15:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:15:43.423122  543705 net.go:698] Add success.
I0319 16:15:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:15:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:15:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:15:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:15:53.409787  543705 memory.go:184] no items to output this cycle
I0319 16:15:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 16:16:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:16:03.409797  543705 memory.go:184] no items to output this cycle
I0319 16:16:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 16:16:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:16:13.409831  543705 memory.go:191] Add success.
I0319 16:16:13.409840  543705 cpu.go:282] Add success.
W0319 16:16:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:16:13.413003  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:16:13.413008  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:16:13.420662  543705 net.go:648] Add success.
I0319 16:16:13.422418  543705 net.go:770] primary dev: ETH0
I0319 16:16:13.422439  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:16:13.422453  543705 net.go:698] Add success.
I0319 16:16:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:16:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:16:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0319 16:16:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:16:14.456638  543705 disk_worker.go:494] system disk:vda1
I0319 16:16:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:16:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:16:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:16:19.929673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:16:19.932174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:16:19.932181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e2dc0 0xc0001e2e00]
E0319 16:16:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:16:23.409764  543705 memory.go:184] no items to output this cycle
I0319 16:16:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 16:16:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:16:33.409810  543705 memory.go:184] no items to output this cycle
I0319 16:16:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 16:16:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:16:43.409807  543705 memory.go:191] Add success.
I0319 16:16:43.409808  543705 cpu.go:282] Add success.
I0319 16:16:43.419968  543705 net.go:648] Add success.
I0319 16:16:43.423093  543705 net.go:770] primary dev: ETH0
I0319 16:16:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:16:43.423122  543705 net.go:698] Add success.
I0319 16:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:16:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:16:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:16:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:16:53.409769  543705 memory.go:184] no items to output this cycle
I0319 16:16:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 16:17:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:17:03.409778  543705 memory.go:184] no items to output this cycle
I0319 16:17:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 16:17:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:17:13.409790  543705 memory.go:191] Add success.
W0319 16:17:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:17:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:17:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:17:13.409872  543705 cpu.go:282] Add success.
I0319 16:17:13.420371  543705 net.go:648] Add success.
I0319 16:17:13.423285  543705 net.go:770] primary dev: ETH0
I0319 16:17:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:17:13.423311  543705 net.go:698] Add success.
I0319 16:17:13.452836  543705 event_worker.go:152] Polling the log file for events...
W0319 16:17:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:17:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 16:17:14.455215  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:17:14.456025  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:17:14.456035  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:17:14.456041  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:17:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 16:17:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:17:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:17:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:17:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:17:16.457991  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:17:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:17:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:17:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:17:19.933677  543705 disk_info.go:125] begin check local disk info of client
I0319 16:17:19.936139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:17:19.936149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028fe80 0xc00028fec0]
E0319 16:17:23.409834  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:17:23.409853  543705 memory.go:184] no items to output this cycle
I0319 16:17:23.409994  543705 cpu.go:275] no items to output this cycle
E0319 16:17:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:17:33.409768  543705 memory.go:184] no items to output this cycle
I0319 16:17:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:17:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:17:43.409789  543705 memory.go:191] Add success.
I0319 16:17:43.409796  543705 cpu.go:282] Add success.
I0319 16:17:43.419837  543705 net.go:648] Add success.
I0319 16:17:43.422635  543705 net.go:770] primary dev: ETH0
I0319 16:17:43.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:17:43.422662  543705 net.go:698] Add success.
I0319 16:17:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:17:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:17:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:17:53.409793  543705 memory.go:184] no items to output this cycle
I0319 16:17:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 16:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:18:03.409775  543705 memory.go:184] no items to output this cycle
I0319 16:18:03.409779  543705 cpu.go:275] no items to output this cycle
E0319 16:18:13.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:18:13.409830  543705 cpu.go:282] Add success.
I0319 16:18:13.409839  543705 memory.go:191] Add success.
W0319 16:18:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:18:13.409892  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:18:13.409896  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:18:13.420310  543705 net.go:648] Add success.
I0319 16:18:13.423359  543705 net.go:770] primary dev: ETH0
I0319 16:18:13.423373  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:18:13.423386  543705 net.go:698] Add success.
I0319 16:18:13.463655  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"15fdc906-51ae-4e93-9965-f220ab77f6fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:18:13.463690  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:18:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:18:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0319 16:18:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:18:14.456648  543705 disk_worker.go:494] system disk:vda1
I0319 16:18:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:18:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:18:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:18:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:18:16.472447  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:18:19.937672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:18:19.940144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:18:19.940150  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047c500 0xc00047c540]
E0319 16:18:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:18:23.409776  543705 memory.go:184] no items to output this cycle
I0319 16:18:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 16:18:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:18:33.409799  543705 memory.go:184] no items to output this cycle
I0319 16:18:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 16:18:37.765524  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:18:37.765529  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:18:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:18:43.410685  543705 memory.go:191] Add success.
I0319 16:18:43.409808  543705 cpu.go:282] Add success.
I0319 16:18:43.420381  543705 net.go:648] Add success.
I0319 16:18:43.423219  543705 net.go:770] primary dev: ETH0
I0319 16:18:43.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:18:43.423246  543705 net.go:698] Add success.
I0319 16:18:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:18:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:18:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:18:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:18:53.409769  543705 memory.go:184] no items to output this cycle
I0319 16:18:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 16:19:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:19:03.409804  543705 memory.go:184] no items to output this cycle
I0319 16:19:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 16:19:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:19:13.409816  543705 cpu.go:282] Add success.
I0319 16:19:13.409843  543705 memory.go:191] Add success.
W0319 16:19:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:19:13.409896  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:19:13.409900  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:19:13.420413  543705 net.go:648] Add success.
I0319 16:19:13.423650  543705 net.go:770] primary dev: ETH0
I0319 16:19:13.423664  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:19:13.423678  543705 net.go:698] Add success.
I0319 16:19:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:19:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:19:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 16:19:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:19:14.456609  543705 disk_worker.go:494] system disk:vda1
I0319 16:19:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:19:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:19:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:19:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:19:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:19:19.941671  543705 disk_info.go:125] begin check local disk info of client
I0319 16:19:19.944166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:19:19.944173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2340 0xc0003e2380]
E0319 16:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:19:23.409795  543705 memory.go:184] no items to output this cycle
I0319 16:19:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:19:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:19:33.409797  543705 memory.go:184] no items to output this cycle
I0319 16:19:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:19:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:19:43.409816  543705 memory.go:191] Add success.
I0319 16:19:43.409821  543705 cpu.go:282] Add success.
I0319 16:19:43.419863  543705 net.go:648] Add success.
I0319 16:19:43.422943  543705 net.go:770] primary dev: ETH0
I0319 16:19:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:19:43.422972  543705 net.go:698] Add success.
I0319 16:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:19:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:19:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:19:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:19:53.409774  543705 memory.go:184] no items to output this cycle
I0319 16:19:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:20:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:20:03.409772  543705 memory.go:184] no items to output this cycle
I0319 16:20:03.409778  543705 cpu.go:275] no items to output this cycle
E0319 16:20:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:20:13.409782  543705 memory.go:191] Add success.
W0319 16:20:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:20:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:20:13.409863  543705 cpu.go:282] Add success.
I0319 16:20:13.420491  543705 net.go:648] Add success.
I0319 16:20:13.423368  543705 net.go:770] primary dev: ETH0
I0319 16:20:13.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:20:13.423395  543705 net.go:698] Add success.
I0319 16:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:20:14.455219  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:20:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0319 16:20:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:20:14.456629  543705 disk_worker.go:494] system disk:vda1
I0319 16:20:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:20:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:20:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:20:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:20:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:20:16.472447  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:20:19.945672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:20:19.948116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:20:19.948122  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370480 0xc0003704c0]
E0319 16:20:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:20:23.409775  543705 cpu.go:275] no items to output this cycle
I0319 16:20:23.409784  543705 memory.go:184] no items to output this cycle
E0319 16:20:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:20:33.409806  543705 memory.go:184] no items to output this cycle
I0319 16:20:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 16:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:20:43.409786  543705 memory.go:191] Add success.
I0319 16:20:43.409806  543705 cpu.go:282] Add success.
I0319 16:20:43.420002  543705 net.go:648] Add success.
I0319 16:20:43.422761  543705 net.go:770] primary dev: ETH0
I0319 16:20:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:20:43.422785  543705 net.go:698] Add success.
I0319 16:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:20:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:20:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:20:53.409772  543705 memory.go:184] no items to output this cycle
I0319 16:20:53.409788  543705 cpu.go:275] no items to output this cycle
I0319 16:21:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 16:21:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:21:03.409804  543705 memory.go:184] no items to output this cycle
E0319 16:21:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:21:13.409822  543705 memory.go:191] Add success.
I0319 16:21:13.409830  543705 cpu.go:282] Add success.
W0319 16:21:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:21:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:21:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:21:13.420217  543705 net.go:648] Add success.
I0319 16:21:13.422862  543705 net.go:770] primary dev: ETH0
I0319 16:21:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:21:13.422887  543705 net.go:698] Add success.
I0319 16:21:13.469213  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9a4a10c0-920d-485f-9284-7e7e18554ec1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:21:13.469244  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:21:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:21:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:21:14.455242  543705 disk_worker.go:708] disk space is not compliant
W0319 16:21:14.455246  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:21:14.456661  543705 disk_worker.go:494] system disk:vda1
I0319 16:21:14.456697  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:21:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:21:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:21:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:21:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:21:19.949672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:21:19.952034  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:21:19.952040  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354300 0xc000354340]
E0319 16:21:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:21:23.409796  543705 memory.go:184] no items to output this cycle
I0319 16:21:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:21:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:21:33.409770  543705 memory.go:184] no items to output this cycle
I0319 16:21:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 16:21:37.765733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:21:37.765740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:21:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:21:43.410708  543705 memory.go:191] Add success.
I0319 16:21:43.409825  543705 cpu.go:282] Add success.
I0319 16:21:43.420549  543705 net.go:648] Add success.
I0319 16:21:43.423462  543705 net.go:770] primary dev: ETH0
I0319 16:21:43.423480  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:21:43.423497  543705 net.go:698] Add success.
I0319 16:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:21:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:21:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:21:53.410280  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:21:53.410303  543705 memory.go:184] no items to output this cycle
I0319 16:21:53.410307  543705 cpu.go:275] no items to output this cycle
E0319 16:22:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:22:03.409776  543705 memory.go:184] no items to output this cycle
I0319 16:22:03.409781  543705 cpu.go:275] no items to output this cycle
I0319 16:22:13.409805  543705 cpu.go:282] Add success.
E0319 16:22:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:22:13.409826  543705 memory.go:191] Add success.
W0319 16:22:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:22:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:22:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:22:13.420749  543705 net.go:648] Add success.
I0319 16:22:13.423799  543705 net.go:770] primary dev: ETH0
I0319 16:22:13.423812  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:22:13.423823  543705 net.go:698] Add success.
W0319 16:22:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:22:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0319 16:22:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:22:14.456838  543705 disk_worker.go:494] system disk:vda1
I0319 16:22:14.456881  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:22:14.457095  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:22:14.457104  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:22:14.457110  543705 custom_config.go:64] query custom config with name: gpu
E0319 16:22:15.456876  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:22:15.456887  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:22:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:22:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:22:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:22:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:22:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:22:19.953671  543705 disk_info.go:125] begin check local disk info of client
I0319 16:22:19.956014  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:22:19.956020  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003584c0 0xc000358500]
E0319 16:22:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:22:23.409763  543705 memory.go:184] no items to output this cycle
I0319 16:22:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 16:22:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:22:33.409799  543705 memory.go:184] no items to output this cycle
I0319 16:22:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 16:22:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:22:43.409804  543705 memory.go:191] Add success.
I0319 16:22:43.409805  543705 cpu.go:282] Add success.
I0319 16:22:43.420079  543705 net.go:648] Add success.
I0319 16:22:43.422946  543705 net.go:770] primary dev: ETH0
I0319 16:22:43.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:22:43.422972  543705 net.go:698] Add success.
I0319 16:22:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:22:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:22:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:22:53.409774  543705 memory.go:184] no items to output this cycle
I0319 16:22:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 16:23:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:23:03.409808  543705 memory.go:184] no items to output this cycle
I0319 16:23:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 16:23:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:23:13.409797  543705 memory.go:191] Add success.
I0319 16:23:13.409818  543705 cpu.go:282] Add success.
W0319 16:23:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:23:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:23:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:23:13.420759  543705 net.go:648] Add success.
I0319 16:23:13.424260  543705 net.go:770] primary dev: ETH0
I0319 16:23:13.424273  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:23:13.424286  543705 net.go:698] Add success.
I0319 16:23:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:23:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:23:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0319 16:23:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:23:14.456615  543705 disk_worker.go:494] system disk:vda1
I0319 16:23:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:23:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:23:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:23:16.472365  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:23:19.957674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:23:19.960157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:23:19.960163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499800 0xc000499840]
E0319 16:23:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:23:23.409811  543705 memory.go:184] no items to output this cycle
I0319 16:23:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 16:23:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:23:33.409807  543705 memory.go:184] no items to output this cycle
I0319 16:23:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 16:23:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:23:43.409784  543705 memory.go:191] Add success.
I0319 16:23:43.409816  543705 cpu.go:282] Add success.
I0319 16:23:43.419868  543705 net.go:648] Add success.
I0319 16:23:43.422856  543705 net.go:770] primary dev: ETH0
I0319 16:23:43.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:23:43.422881  543705 net.go:698] Add success.
I0319 16:23:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:23:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:23:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:23:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:23:53.409778  543705 memory.go:184] no items to output this cycle
I0319 16:23:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 16:24:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:24:03.409767  543705 memory.go:184] no items to output this cycle
I0319 16:24:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 16:24:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:24:13.409806  543705 memory.go:191] Add success.
I0319 16:24:13.409810  543705 cpu.go:282] Add success.
W0319 16:24:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:24:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:24:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:24:13.420122  543705 net.go:648] Add success.
I0319 16:24:13.423136  543705 net.go:770] primary dev: ETH0
I0319 16:24:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:24:13.423162  543705 net.go:698] Add success.
I0319 16:24:13.469026  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98952ef8-f59c-40b6-a217-ff8ded3ff2ad","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:24:13.469057  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:24:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:24:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:24:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 16:24:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:24:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 16:24:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:24:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:24:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:24:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:24:19.961669  543705 disk_info.go:125] begin check local disk info of client
I0319 16:24:19.964014  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:24:19.964020  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463a00 0xc000463a40]
E0319 16:24:23.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:24:23.410255  543705 memory.go:184] no items to output this cycle
I0319 16:24:23.410271  543705 cpu.go:275] no items to output this cycle
E0319 16:24:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:24:33.409779  543705 memory.go:184] no items to output this cycle
I0319 16:24:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 16:24:37.767528  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:24:37.767535  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:24:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:24:43.410839  543705 memory.go:191] Add success.
I0319 16:24:43.409829  543705 cpu.go:282] Add success.
I0319 16:24:43.420556  543705 net.go:648] Add success.
I0319 16:24:43.423230  543705 net.go:770] primary dev: ETH0
I0319 16:24:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:24:43.423256  543705 net.go:698] Add success.
I0319 16:24:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:24:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:24:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:24:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:24:53.409776  543705 cpu.go:275] no items to output this cycle
I0319 16:24:53.409780  543705 memory.go:184] no items to output this cycle
E0319 16:25:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:25:03.409794  543705 memory.go:184] no items to output this cycle
I0319 16:25:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 16:25:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:25:13.409816  543705 memory.go:191] Add success.
I0319 16:25:13.409826  543705 cpu.go:282] Add success.
W0319 16:25:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:25:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:25:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:25:13.420209  543705 net.go:648] Add success.
I0319 16:25:13.422901  543705 net.go:770] primary dev: ETH0
I0319 16:25:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:25:13.422933  543705 net.go:698] Add success.
I0319 16:25:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:25:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:25:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0319 16:25:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:25:14.456671  543705 disk_worker.go:494] system disk:vda1
I0319 16:25:14.456705  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:25:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:25:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:25:16.472417  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:25:19.965675  543705 disk_info.go:125] begin check local disk info of client
I0319 16:25:19.968064  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:25:19.968070  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a2c0 0xc00036a300]
E0319 16:25:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:25:23.409791  543705 memory.go:184] no items to output this cycle
I0319 16:25:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:25:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:25:33.409778  543705 memory.go:184] no items to output this cycle
I0319 16:25:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 16:25:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:25:43.409786  543705 memory.go:191] Add success.
I0319 16:25:43.409785  543705 cpu.go:282] Add success.
I0319 16:25:43.419964  543705 net.go:648] Add success.
I0319 16:25:43.422800  543705 net.go:770] primary dev: ETH0
I0319 16:25:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:25:43.422827  543705 net.go:698] Add success.
I0319 16:25:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:25:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:25:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:25:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:25:53.409782  543705 memory.go:184] no items to output this cycle
I0319 16:25:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 16:26:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:26:03.409798  543705 memory.go:184] no items to output this cycle
I0319 16:26:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:26:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:26:13.409791  543705 memory.go:191] Add success.
I0319 16:26:13.409809  543705 cpu.go:282] Add success.
W0319 16:26:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:26:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:26:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:26:13.420117  543705 net.go:648] Add success.
I0319 16:26:13.422801  543705 net.go:770] primary dev: ETH0
I0319 16:26:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:26:13.422834  543705 net.go:698] Add success.
I0319 16:26:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:26:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:26:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0319 16:26:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:26:14.456616  543705 disk_worker.go:494] system disk:vda1
I0319 16:26:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:26:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:26:16.458150  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:26:16.458176  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:26:16.472091  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:26:19.969677  543705 disk_info.go:125] begin check local disk info of client
I0319 16:26:19.972119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:26:19.972125  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462080 0xc0004620c0]
E0319 16:26:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:26:23.409780  543705 memory.go:184] no items to output this cycle
I0319 16:26:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 16:26:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:26:33.409786  543705 cpu.go:275] no items to output this cycle
I0319 16:26:33.409789  543705 memory.go:184] no items to output this cycle
E0319 16:26:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:26:43.409799  543705 memory.go:191] Add success.
I0319 16:26:43.409800  543705 cpu.go:282] Add success.
I0319 16:26:43.419886  543705 net.go:648] Add success.
I0319 16:26:43.422706  543705 net.go:770] primary dev: ETH0
I0319 16:26:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:26:43.422753  543705 net.go:698] Add success.
I0319 16:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:26:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:26:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:26:53.409766  543705 memory.go:184] no items to output this cycle
I0319 16:26:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:27:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:27:03.409781  543705 memory.go:184] no items to output this cycle
I0319 16:27:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 16:27:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:27:13.409792  543705 memory.go:191] Add success.
I0319 16:27:13.409792  543705 cpu.go:282] Add success.
W0319 16:27:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:27:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:27:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:27:13.420044  543705 net.go:648] Add success.
I0319 16:27:13.422869  543705 net.go:770] primary dev: ETH0
I0319 16:27:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:27:13.422895  543705 net.go:698] Add success.
I0319 16:27:13.429580  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 16:27:13.452769  543705 event_worker.go:152] Polling the log file for events...
I0319 16:27:13.462949  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"065f536e-6886-40dd-b622-425b8ad179b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:27:13.462982  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 16:27:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:27:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 16:27:14.455211  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:27:14.456335  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:27:14.456347  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:27:14.456354  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:27:14.456625  543705 disk_worker.go:494] system disk:vda1
I0319 16:27:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:27:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:27:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 16:27:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:27:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:27:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:27:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:27:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:27:19.973669  543705 disk_info.go:125] begin check local disk info of client
I0319 16:27:19.976037  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:27:19.976043  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ce00 0xc00047ce40]
E0319 16:27:23.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:27:23.410012  543705 memory.go:184] no items to output this cycle
I0319 16:27:23.410014  543705 cpu.go:275] no items to output this cycle
E0319 16:27:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:27:33.409779  543705 memory.go:184] no items to output this cycle
I0319 16:27:33.409792  543705 cpu.go:275] no items to output this cycle
I0319 16:27:37.768545  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:27:37.768551  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:27:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:27:43.410716  543705 memory.go:191] Add success.
I0319 16:27:43.409790  543705 cpu.go:282] Add success.
I0319 16:27:43.420419  543705 net.go:648] Add success.
I0319 16:27:43.423077  543705 net.go:770] primary dev: ETH0
I0319 16:27:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:27:43.423106  543705 net.go:698] Add success.
I0319 16:27:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:27:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:27:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:27:53.409801  543705 memory.go:184] no items to output this cycle
I0319 16:27:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 16:28:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:28:03.409771  543705 memory.go:184] no items to output this cycle
I0319 16:28:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 16:28:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:28:13.409803  543705 memory.go:191] Add success.
I0319 16:28:13.409817  543705 cpu.go:282] Add success.
W0319 16:28:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:28:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:28:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:28:13.420196  543705 net.go:648] Add success.
I0319 16:28:13.422815  543705 net.go:770] primary dev: ETH0
I0319 16:28:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:28:13.422844  543705 net.go:698] Add success.
I0319 16:28:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:28:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:28:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 16:28:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:28:14.456634  543705 disk_worker.go:494] system disk:vda1
I0319 16:28:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:28:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:28:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:28:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:28:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:28:19.977677  543705 disk_info.go:125] begin check local disk info of client
I0319 16:28:19.980088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:28:19.980094  543705 disk_info.go:196] parse disk info done, disk is : [0xc000286340 0xc000286380]
E0319 16:28:23.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:28:23.409903  543705 memory.go:184] no items to output this cycle
I0319 16:28:23.409920  543705 cpu.go:275] no items to output this cycle
E0319 16:28:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:28:33.409776  543705 memory.go:184] no items to output this cycle
I0319 16:28:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 16:28:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:28:43.409826  543705 memory.go:191] Add success.
I0319 16:28:43.409828  543705 cpu.go:282] Add success.
I0319 16:28:43.420011  543705 net.go:648] Add success.
I0319 16:28:43.422984  543705 net.go:770] primary dev: ETH0
I0319 16:28:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:28:43.423014  543705 net.go:698] Add success.
I0319 16:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:28:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:28:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:28:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:28:53.409781  543705 cpu.go:275] no items to output this cycle
I0319 16:28:53.409784  543705 memory.go:184] no items to output this cycle
E0319 16:29:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:29:03.409761  543705 memory.go:184] no items to output this cycle
I0319 16:29:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 16:29:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:29:13.409798  543705 memory.go:191] Add success.
I0319 16:29:13.409799  543705 cpu.go:282] Add success.
W0319 16:29:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:29:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:29:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:29:13.420120  543705 net.go:648] Add success.
I0319 16:29:13.423010  543705 net.go:770] primary dev: ETH0
I0319 16:29:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:29:13.423036  543705 net.go:698] Add success.
I0319 16:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:29:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:29:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 16:29:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:29:14.456664  543705 disk_worker.go:494] system disk:vda1
I0319 16:29:14.456701  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:29:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:29:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:29:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:29:19.981673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:29:19.984110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:29:19.984116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278c00 0xc000278c40]
E0319 16:29:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:29:23.409901  543705 cpu.go:275] no items to output this cycle
I0319 16:29:23.409904  543705 memory.go:184] no items to output this cycle
E0319 16:29:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:29:33.409805  543705 memory.go:184] no items to output this cycle
I0319 16:29:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 16:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:29:43.409809  543705 memory.go:191] Add success.
I0319 16:29:43.409812  543705 cpu.go:282] Add success.
I0319 16:29:43.419911  543705 net.go:648] Add success.
I0319 16:29:43.422803  543705 net.go:770] primary dev: ETH0
I0319 16:29:43.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:29:43.422828  543705 net.go:698] Add success.
I0319 16:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:29:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:29:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:29:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:29:53.409799  543705 memory.go:184] no items to output this cycle
I0319 16:29:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:30:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:30:03.409769  543705 memory.go:184] no items to output this cycle
I0319 16:30:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:30:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:30:13.409794  543705 memory.go:191] Add success.
I0319 16:30:13.409796  543705 cpu.go:282] Add success.
W0319 16:30:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:30:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:30:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:30:13.420162  543705 net.go:648] Add success.
I0319 16:30:13.423135  543705 net.go:770] primary dev: ETH0
I0319 16:30:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:30:13.423160  543705 net.go:698] Add success.
I0319 16:30:13.464401  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4964824d-67d4-421e-8133-4ccb33eeaaaa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:30:13.464437  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:30:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:30:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:30:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0319 16:30:14.455245  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:30:14.456679  543705 disk_worker.go:494] system disk:vda1
I0319 16:30:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:30:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:30:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:30:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:30:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:30:19.985672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:30:19.988065  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:30:19.988071  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032ed00 0xc00032ed40]
E0319 16:30:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:30:23.409793  543705 memory.go:184] no items to output this cycle
I0319 16:30:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 16:30:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:30:33.409877  543705 memory.go:184] no items to output this cycle
I0319 16:30:33.409917  543705 cpu.go:275] no items to output this cycle
I0319 16:30:37.769543  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:30:37.769549  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:30:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:30:43.410781  543705 memory.go:191] Add success.
I0319 16:30:43.409847  543705 cpu.go:282] Add success.
I0319 16:30:43.420497  543705 net.go:648] Add success.
I0319 16:30:43.423547  543705 net.go:770] primary dev: ETH0
I0319 16:30:43.423562  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:30:43.423577  543705 net.go:698] Add success.
I0319 16:30:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:30:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:30:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:30:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:30:53.409770  543705 memory.go:184] no items to output this cycle
I0319 16:30:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 16:31:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:31:03.409796  543705 memory.go:184] no items to output this cycle
I0319 16:31:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 16:31:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:31:13.409790  543705 memory.go:191] Add success.
I0319 16:31:13.409814  543705 cpu.go:282] Add success.
W0319 16:31:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:31:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:31:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:31:13.420105  543705 net.go:648] Add success.
I0319 16:31:13.423090  543705 net.go:770] primary dev: ETH0
I0319 16:31:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:31:13.423116  543705 net.go:698] Add success.
I0319 16:31:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:31:14.455241  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:31:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0319 16:31:14.455257  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:31:14.456696  543705 disk_worker.go:494] system disk:vda1
I0319 16:31:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:31:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:31:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:31:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:31:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:31:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:31:19.989678  543705 disk_info.go:125] begin check local disk info of client
I0319 16:31:19.992065  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:31:19.992071  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6980 0xc0001c69c0]
E0319 16:31:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:31:23.409801  543705 memory.go:184] no items to output this cycle
I0319 16:31:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 16:31:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:31:33.409783  543705 memory.go:184] no items to output this cycle
I0319 16:31:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 16:31:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:31:43.409806  543705 memory.go:191] Add success.
I0319 16:31:43.409809  543705 cpu.go:282] Add success.
I0319 16:31:43.420044  543705 net.go:648] Add success.
I0319 16:31:43.423271  543705 net.go:770] primary dev: ETH0
I0319 16:31:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:31:43.423297  543705 net.go:698] Add success.
I0319 16:31:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:31:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:31:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:31:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:31:53.409817  543705 memory.go:184] no items to output this cycle
I0319 16:31:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 16:32:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:32:03.409783  543705 memory.go:184] no items to output this cycle
I0319 16:32:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:32:13.409790  543705 memory.go:191] Add success.
I0319 16:32:13.409808  543705 cpu.go:282] Add success.
W0319 16:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:32:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:32:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:32:13.420050  543705 net.go:648] Add success.
I0319 16:32:13.422921  543705 net.go:770] primary dev: ETH0
I0319 16:32:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:32:13.422946  543705 net.go:698] Add success.
W0319 16:32:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:32:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 16:32:14.455212  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:32:14.457152  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:32:14.457164  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:32:14.457169  543705 disk_worker.go:494] system disk:vda1
I0319 16:32:14.457170  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:32:14.457205  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:32:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:32:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:32:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:32:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:32:16.458011  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:32:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:32:16.472446  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:32:19.993671  543705 disk_info.go:125] begin check local disk info of client
I0319 16:32:19.996105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:32:19.996110  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d40 0xc0000c5d80]
E0319 16:32:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:32:23.409781  543705 memory.go:184] no items to output this cycle
I0319 16:32:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 16:32:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:32:33.409804  543705 memory.go:184] no items to output this cycle
I0319 16:32:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 16:32:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:32:43.409826  543705 memory.go:191] Add success.
I0319 16:32:43.409830  543705 cpu.go:282] Add success.
I0319 16:32:43.419714  543705 net.go:648] Add success.
I0319 16:32:43.422522  543705 net.go:770] primary dev: ETH0
I0319 16:32:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:32:43.422547  543705 net.go:698] Add success.
I0319 16:32:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:32:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:32:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:32:53.409803  543705 memory.go:184] no items to output this cycle
I0319 16:32:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 16:33:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:33:03.409791  543705 cpu.go:275] no items to output this cycle
I0319 16:33:03.409812  543705 memory.go:184] no items to output this cycle
E0319 16:33:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:33:13.409830  543705 memory.go:191] Add success.
I0319 16:33:13.409832  543705 cpu.go:282] Add success.
W0319 16:33:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:33:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:33:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:33:13.420312  543705 net.go:648] Add success.
I0319 16:33:13.423138  543705 net.go:770] primary dev: ETH0
I0319 16:33:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:33:13.423164  543705 net.go:698] Add success.
I0319 16:33:13.469314  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"774153b2-85d4-4a9a-96a7-6f16d40933e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:33:13.469348  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:33:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:33:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:33:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0319 16:33:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:33:14.456644  543705 disk_worker.go:494] system disk:vda1
I0319 16:33:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:33:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:33:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:33:19.997672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:33:20.000100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:33:20.000107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1240 0xc0003b1280]
E0319 16:33:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:33:23.409790  543705 memory.go:184] no items to output this cycle
I0319 16:33:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:33:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:33:33.409780  543705 memory.go:184] no items to output this cycle
I0319 16:33:33.409785  543705 cpu.go:275] no items to output this cycle
I0319 16:33:37.769733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:33:37.769740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:33:43.410686  543705 memory.go:191] Add success.
I0319 16:33:43.409813  543705 cpu.go:282] Add success.
I0319 16:33:43.419745  543705 net.go:648] Add success.
I0319 16:33:43.422572  543705 net.go:770] primary dev: ETH0
I0319 16:33:43.422585  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:33:43.422597  543705 net.go:698] Add success.
I0319 16:33:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:33:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:33:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:33:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:33:53.409803  543705 memory.go:184] no items to output this cycle
I0319 16:33:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 16:34:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:34:03.409804  543705 memory.go:184] no items to output this cycle
I0319 16:34:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 16:34:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:34:13.409785  543705 memory.go:191] Add success.
I0319 16:34:13.409807  543705 cpu.go:282] Add success.
W0319 16:34:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:34:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:34:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:34:13.420139  543705 net.go:648] Add success.
I0319 16:34:13.422957  543705 net.go:770] primary dev: ETH0
I0319 16:34:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:34:13.422987  543705 net.go:698] Add success.
I0319 16:34:14.453930  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:34:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:34:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0319 16:34:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:34:14.458066  543705 disk_worker.go:494] system disk:vda1
I0319 16:34:14.458100  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:34:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:34:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:34:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:34:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:34:16.472534  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:34:20.001672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:34:20.004195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:34:20.004201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000250d80 0xc000250dc0]
E0319 16:34:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:34:23.409791  543705 memory.go:184] no items to output this cycle
I0319 16:34:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 16:34:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:34:33.409770  543705 memory.go:184] no items to output this cycle
I0319 16:34:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 16:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:34:43.409793  543705 memory.go:191] Add success.
I0319 16:34:43.409812  543705 cpu.go:282] Add success.
I0319 16:34:43.419839  543705 net.go:770] primary dev: ETH0
I0319 16:34:43.419852  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:34:43.419864  543705 net.go:698] Add success.
I0319 16:34:43.420347  543705 net.go:648] Add success.
I0319 16:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:34:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:34:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:34:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:34:53.409786  543705 memory.go:184] no items to output this cycle
I0319 16:34:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 16:35:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:35:03.409784  543705 cpu.go:275] no items to output this cycle
I0319 16:35:03.409793  543705 memory.go:184] no items to output this cycle
E0319 16:35:13.410701  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:35:13.410727  543705 memory.go:191] Add success.
I0319 16:35:13.410728  543705 cpu.go:282] Add success.
W0319 16:35:13.410757  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:35:13.410769  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:35:13.410772  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:35:13.420162  543705 net.go:648] Add success.
I0319 16:35:13.423295  543705 net.go:770] primary dev: ETH0
I0319 16:35:13.423308  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:35:13.423320  543705 net.go:698] Add success.
I0319 16:35:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:35:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:35:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 16:35:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:35:14.457424  543705 disk_worker.go:494] system disk:vda1
I0319 16:35:14.457456  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:35:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:35:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:35:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:35:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:35:16.472431  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:35:20.005675  543705 disk_info.go:125] begin check local disk info of client
I0319 16:35:20.008213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:35:20.008219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab8c0 0xc0001ab900]
E0319 16:35:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:35:23.409762  543705 memory.go:184] no items to output this cycle
I0319 16:35:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 16:35:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:35:33.409768  543705 memory.go:184] no items to output this cycle
I0319 16:35:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 16:35:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:35:43.409805  543705 memory.go:191] Add success.
I0319 16:35:43.409816  543705 cpu.go:282] Add success.
I0319 16:35:43.420045  543705 net.go:648] Add success.
I0319 16:35:43.422778  543705 net.go:770] primary dev: ETH0
I0319 16:35:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:35:43.422803  543705 net.go:698] Add success.
I0319 16:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:35:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:35:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:35:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:35:53.409786  543705 memory.go:184] no items to output this cycle
I0319 16:35:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 16:36:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:36:03.409784  543705 memory.go:184] no items to output this cycle
I0319 16:36:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 16:36:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:36:13.409793  543705 cpu.go:282] Add success.
I0319 16:36:13.409804  543705 memory.go:191] Add success.
W0319 16:36:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:36:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:36:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:36:13.420038  543705 net.go:648] Add success.
I0319 16:36:13.422957  543705 net.go:770] primary dev: ETH0
I0319 16:36:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:36:13.422984  543705 net.go:698] Add success.
I0319 16:36:13.999999  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5cfb3401-4158-4656-8880-b55050adca59","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:36:14.000033  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:36:14.454439  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:36:14.454664  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:36:14.454676  543705 disk_worker.go:708] disk space is not compliant
W0319 16:36:14.454679  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:36:14.456137  543705 disk_worker.go:494] system disk:vda1
I0319 16:36:14.456186  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:36:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:36:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:36:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:36:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:36:16.472444  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:36:20.009676  543705 disk_info.go:125] begin check local disk info of client
I0319 16:36:20.012327  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:36:20.012333  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab000 0xc0001ab040]
E0319 16:36:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:36:23.409771  543705 memory.go:184] no items to output this cycle
I0319 16:36:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 16:36:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:36:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 16:36:33.409787  543705 memory.go:184] no items to output this cycle
I0319 16:36:37.771554  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:36:37.771561  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:36:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:36:43.410684  543705 memory.go:191] Add success.
I0319 16:36:43.409830  543705 cpu.go:282] Add success.
I0319 16:36:43.419740  543705 net.go:648] Add success.
I0319 16:36:43.422581  543705 net.go:770] primary dev: ETH0
I0319 16:36:43.422594  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:36:43.422607  543705 net.go:698] Add success.
I0319 16:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:36:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:36:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:36:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:36:53.409791  543705 memory.go:184] no items to output this cycle
I0319 16:36:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 16:37:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:37:03.409789  543705 memory.go:184] no items to output this cycle
I0319 16:37:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 16:37:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:37:13.409787  543705 memory.go:191] Add success.
I0319 16:37:13.409789  543705 cpu.go:282] Add success.
W0319 16:37:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:37:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:37:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:37:13.420079  543705 net.go:648] Add success.
I0319 16:37:13.422864  543705 net.go:770] primary dev: ETH0
I0319 16:37:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:37:13.422892  543705 net.go:698] Add success.
I0319 16:37:13.453460  543705 event_worker.go:152] Polling the log file for events...
W0319 16:37:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:37:14.455289  543705 disk_worker.go:708] disk space is not compliant
W0319 16:37:14.455294  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:37:14.456237  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:37:14.456248  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:37:14.456255  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:37:14.457320  543705 disk_worker.go:494] system disk:vda1
I0319 16:37:14.457370  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:37:15.457006  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:37:15.457020  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:37:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:37:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:37:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:37:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:37:16.472358  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:37:20.013676  543705 disk_info.go:125] begin check local disk info of client
I0319 16:37:20.016180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:37:20.016187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000531c80 0xc000531cc0]
E0319 16:37:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:37:23.409763  543705 memory.go:184] no items to output this cycle
I0319 16:37:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 16:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:37:33.409793  543705 memory.go:184] no items to output this cycle
I0319 16:37:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:37:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:37:43.409877  543705 memory.go:191] Add success.
I0319 16:37:43.409909  543705 cpu.go:282] Add success.
I0319 16:37:43.419709  543705 net.go:648] Add success.
I0319 16:37:43.423086  543705 net.go:770] primary dev: ETH0
I0319 16:37:43.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:37:43.423110  543705 net.go:698] Add success.
I0319 16:37:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:37:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:37:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:37:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:37:53.409774  543705 memory.go:184] no items to output this cycle
I0319 16:37:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 16:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:38:03.409782  543705 memory.go:184] no items to output this cycle
I0319 16:38:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 16:38:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:38:13.409797  543705 memory.go:191] Add success.
I0319 16:38:13.409796  543705 cpu.go:282] Add success.
W0319 16:38:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:38:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:38:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:38:13.420182  543705 net.go:648] Add success.
I0319 16:38:13.423140  543705 net.go:770] primary dev: ETH0
I0319 16:38:13.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:38:13.423166  543705 net.go:698] Add success.
I0319 16:38:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:38:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:38:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0319 16:38:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:38:14.456693  543705 disk_worker.go:494] system disk:vda1
I0319 16:38:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:38:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:38:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:38:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:38:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:38:20.017673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:38:20.020309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:38:20.020316  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c000 0xc00035c040]
E0319 16:38:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:38:23.409776  543705 memory.go:184] no items to output this cycle
I0319 16:38:23.409781  543705 cpu.go:275] no items to output this cycle
E0319 16:38:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:38:33.409794  543705 memory.go:184] no items to output this cycle
I0319 16:38:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:38:43.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:38:43.409922  543705 memory.go:191] Add success.
I0319 16:38:43.410001  543705 cpu.go:282] Add success.
I0319 16:38:43.419719  543705 net.go:648] Add success.
I0319 16:38:43.422871  543705 net.go:770] primary dev: ETH0
I0319 16:38:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:38:43.422896  543705 net.go:698] Add success.
I0319 16:38:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:38:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:38:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:38:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:38:53.409777  543705 memory.go:184] no items to output this cycle
I0319 16:38:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 16:39:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:39:03.409781  543705 memory.go:184] no items to output this cycle
I0319 16:39:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 16:39:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:39:13.409786  543705 memory.go:191] Add success.
I0319 16:39:13.409786  543705 cpu.go:282] Add success.
W0319 16:39:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:39:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:39:13.420179  543705 net.go:648] Add success.
I0319 16:39:13.422999  543705 net.go:770] primary dev: ETH0
I0319 16:39:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:39:13.423027  543705 net.go:698] Add success.
I0319 16:39:13.498779  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7ab4cc4b-60a3-414f-be3e-73011f0253be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:39:13.498812  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:39:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:39:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:39:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 16:39:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:39:14.456832  543705 disk_worker.go:494] system disk:vda1
I0319 16:39:14.456878  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:39:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:39:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:39:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:39:16.472483  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:39:20.021676  543705 disk_info.go:125] begin check local disk info of client
I0319 16:39:20.024216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:39:20.024222  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513b40 0xc000513b80]
E0319 16:39:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:39:23.409798  543705 memory.go:184] no items to output this cycle
I0319 16:39:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 16:39:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:39:33.409793  543705 memory.go:184] no items to output this cycle
I0319 16:39:33.409807  543705 cpu.go:275] no items to output this cycle
I0319 16:39:37.772575  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:39:37.772581  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:39:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:39:43.410717  543705 memory.go:191] Add success.
I0319 16:39:43.409828  543705 cpu.go:282] Add success.
I0319 16:39:43.420411  543705 net.go:648] Add success.
I0319 16:39:43.423446  543705 net.go:770] primary dev: ETH0
I0319 16:39:43.423459  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:39:43.423471  543705 net.go:698] Add success.
I0319 16:39:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:39:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:39:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:39:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:39:53.409771  543705 memory.go:184] no items to output this cycle
I0319 16:39:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 16:40:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:40:03.409802  543705 memory.go:184] no items to output this cycle
I0319 16:40:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 16:40:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:40:13.409808  543705 memory.go:191] Add success.
I0319 16:40:13.409820  543705 cpu.go:282] Add success.
W0319 16:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:40:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:40:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:40:13.420117  543705 net.go:648] Add success.
I0319 16:40:13.422885  543705 net.go:770] primary dev: ETH0
I0319 16:40:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:40:13.422911  543705 net.go:698] Add success.
W0319 16:40:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:40:14.455267  543705 disk_worker.go:708] disk space is not compliant
W0319 16:40:14.455272  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:40:14.455635  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:40:14.457447  543705 disk_worker.go:494] system disk:vda1
I0319 16:40:14.457495  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:40:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:40:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:40:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:40:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:40:16.472483  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:40:20.025672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:40:20.028195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:40:20.028201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a800 0xc00036a840]
E0319 16:40:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:40:23.409789  543705 memory.go:184] no items to output this cycle
I0319 16:40:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 16:40:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:40:33.409799  543705 memory.go:184] no items to output this cycle
I0319 16:40:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 16:40:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:40:43.409830  543705 memory.go:191] Add success.
I0319 16:40:43.409837  543705 cpu.go:282] Add success.
I0319 16:40:43.420209  543705 net.go:770] primary dev: ETH0
I0319 16:40:43.420223  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:40:43.420236  543705 net.go:698] Add success.
I0319 16:40:43.420470  543705 net.go:648] Add success.
I0319 16:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:40:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:40:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:40:53.409772  543705 memory.go:184] no items to output this cycle
I0319 16:40:53.409774  543705 cpu.go:275] no items to output this cycle
E0319 16:41:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:41:03.409796  543705 memory.go:184] no items to output this cycle
I0319 16:41:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:41:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:41:13.409783  543705 memory.go:191] Add success.
W0319 16:41:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:41:13.409814  543705 cpu.go:282] Add success.
W0319 16:41:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:41:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:41:13.420155  543705 net.go:648] Add success.
I0319 16:41:13.422998  543705 net.go:770] primary dev: ETH0
I0319 16:41:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:41:13.423030  543705 net.go:698] Add success.
I0319 16:41:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:41:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:41:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 16:41:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:41:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 16:41:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:41:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:41:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:41:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:41:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:41:16.472475  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:41:20.029673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:41:20.032268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:41:20.032275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab8c0 0xc0001ab900]
E0319 16:41:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:41:23.409792  543705 memory.go:184] no items to output this cycle
I0319 16:41:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 16:41:33.410705  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:41:33.410721  543705 memory.go:184] no items to output this cycle
I0319 16:41:33.410737  543705 cpu.go:275] no items to output this cycle
E0319 16:41:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:41:43.409803  543705 memory.go:191] Add success.
I0319 16:41:43.409805  543705 cpu.go:282] Add success.
I0319 16:41:43.419886  543705 net.go:648] Add success.
I0319 16:41:43.422602  543705 net.go:770] primary dev: ETH0
I0319 16:41:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:41:43.422627  543705 net.go:698] Add success.
I0319 16:41:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:41:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:41:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:41:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:41:53.409787  543705 memory.go:184] no items to output this cycle
I0319 16:41:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 16:42:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:42:03.409786  543705 memory.go:184] no items to output this cycle
I0319 16:42:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 16:42:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:42:13.409797  543705 memory.go:191] Add success.
I0319 16:42:13.409800  543705 cpu.go:282] Add success.
W0319 16:42:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:42:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:42:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:42:13.420205  543705 net.go:648] Add success.
I0319 16:42:13.422888  543705 net.go:770] primary dev: ETH0
I0319 16:42:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:42:13.422913  543705 net.go:698] Add success.
I0319 16:42:13.836123  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"37c8d68b-40c8-422b-8102-4927851b3971","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:42:13.836156  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 16:42:14.454514  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:42:14.454525  543705 disk_worker.go:708] disk space is not compliant
W0319 16:42:14.454527  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:42:14.455422  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:42:14.455431  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:42:14.455446  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:42:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 16:42:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:42:15.456996  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:42:15.457011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:42:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:42:16.457991  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:42:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:42:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:42:16.472422  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:42:20.033677  543705 disk_info.go:125] begin check local disk info of client
I0319 16:42:20.036116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:42:20.036122  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349000 0xc000349040]
E0319 16:42:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:42:23.409774  543705 memory.go:184] no items to output this cycle
I0319 16:42:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 16:42:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:42:33.409784  543705 memory.go:184] no items to output this cycle
I0319 16:42:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 16:42:37.773555  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:42:37.773561  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:42:43.409922  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:42:43.411022  543705 memory.go:191] Add success.
I0319 16:42:43.409973  543705 cpu.go:282] Add success.
I0319 16:42:43.419724  543705 net.go:648] Add success.
I0319 16:42:43.422704  543705 net.go:770] primary dev: ETH0
I0319 16:42:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:42:43.422728  543705 net.go:698] Add success.
I0319 16:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:42:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:42:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:42:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:42:53.409793  543705 memory.go:184] no items to output this cycle
I0319 16:42:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:43:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:43:03.409816  543705 memory.go:184] no items to output this cycle
I0319 16:43:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 16:43:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:43:13.409811  543705 memory.go:191] Add success.
I0319 16:43:13.409813  543705 cpu.go:282] Add success.
W0319 16:43:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:43:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:43:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:43:13.420214  543705 net.go:648] Add success.
I0319 16:43:13.422904  543705 net.go:770] primary dev: ETH0
I0319 16:43:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:43:13.422929  543705 net.go:698] Add success.
I0319 16:43:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:43:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:43:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 16:43:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:43:14.456508  543705 disk_worker.go:494] system disk:vda1
I0319 16:43:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:43:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:43:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:43:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:43:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:43:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:43:20.037674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:43:20.040299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:43:20.040306  543705 disk_info.go:196] parse disk info done, disk is : [0xc000288780 0xc0002887c0]
E0319 16:43:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:43:23.409776  543705 memory.go:184] no items to output this cycle
I0319 16:43:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 16:43:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:43:33.409783  543705 memory.go:184] no items to output this cycle
I0319 16:43:33.409789  543705 cpu.go:275] no items to output this cycle
E0319 16:43:43.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:43:43.409899  543705 memory.go:191] Add success.
I0319 16:43:43.409916  543705 cpu.go:282] Add success.
I0319 16:43:43.419710  543705 net.go:648] Add success.
I0319 16:43:43.422587  543705 net.go:770] primary dev: ETH0
I0319 16:43:43.422600  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:43:43.422612  543705 net.go:698] Add success.
I0319 16:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:43:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:43:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:43:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:43:53.409784  543705 memory.go:184] no items to output this cycle
I0319 16:43:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 16:44:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:44:03.409802  543705 memory.go:184] no items to output this cycle
I0319 16:44:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 16:44:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:44:13.409781  543705 memory.go:191] Add success.
W0319 16:44:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:44:13.409810  543705 cpu.go:282] Add success.
W0319 16:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:44:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:44:13.420160  543705 net.go:648] Add success.
I0319 16:44:13.423032  543705 net.go:770] primary dev: ETH0
I0319 16:44:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:44:13.423057  543705 net.go:698] Add success.
I0319 16:44:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:44:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:44:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 16:44:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:44:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 16:44:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:44:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:44:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:44:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:44:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:44:20.041676  543705 disk_info.go:125] begin check local disk info of client
I0319 16:44:20.044233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:44:20.044240  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039bf40 0xc00033c000]
E0319 16:44:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:44:23.409808  543705 memory.go:184] no items to output this cycle
I0319 16:44:23.409820  543705 cpu.go:275] no items to output this cycle
E0319 16:44:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:44:33.409798  543705 memory.go:184] no items to output this cycle
I0319 16:44:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:44:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:44:43.409796  543705 memory.go:191] Add success.
I0319 16:44:43.409825  543705 cpu.go:282] Add success.
I0319 16:44:43.419715  543705 net.go:648] Add success.
I0319 16:44:43.423139  543705 net.go:770] primary dev: ETH0
I0319 16:44:43.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:44:43.423164  543705 net.go:698] Add success.
I0319 16:44:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:44:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:44:53.410201  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:44:53.410217  543705 memory.go:184] no items to output this cycle
I0319 16:44:53.410223  543705 cpu.go:275] no items to output this cycle
E0319 16:45:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:45:03.409790  543705 memory.go:184] no items to output this cycle
I0319 16:45:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 16:45:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:45:13.409783  543705 memory.go:191] Add success.
W0319 16:45:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:45:13.409814  543705 cpu.go:282] Add success.
W0319 16:45:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:45:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:45:13.420072  543705 net.go:648] Add success.
I0319 16:45:13.423050  543705 net.go:770] primary dev: ETH0
I0319 16:45:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:45:13.423076  543705 net.go:698] Add success.
I0319 16:45:13.469320  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d1f81da8-04f4-42ec-894f-471fa1251a2e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:45:13.469354  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:45:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:45:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:45:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0319 16:45:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:45:14.456610  543705 disk_worker.go:494] system disk:vda1
I0319 16:45:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:45:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:45:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:45:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:45:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:45:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:45:20.045675  543705 disk_info.go:125] begin check local disk info of client
I0319 16:45:20.048230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:45:20.048236  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b4800 0xc0002b4840]
E0319 16:45:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:45:23.409796  543705 memory.go:184] no items to output this cycle
I0319 16:45:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 16:45:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:45:33.409782  543705 memory.go:184] no items to output this cycle
I0319 16:45:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 16:45:37.773728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:45:37.773735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:45:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:45:43.410583  543705 memory.go:191] Add success.
I0319 16:45:43.409812  543705 cpu.go:282] Add success.
I0319 16:45:43.419711  543705 net.go:648] Add success.
I0319 16:45:43.422374  543705 net.go:770] primary dev: ETH0
I0319 16:45:43.422387  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:45:43.422398  543705 net.go:698] Add success.
I0319 16:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:45:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:45:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:45:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:45:53.409811  543705 memory.go:184] no items to output this cycle
I0319 16:45:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 16:46:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:46:03.409770  543705 memory.go:184] no items to output this cycle
I0319 16:46:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:46:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:46:13.409807  543705 memory.go:191] Add success.
I0319 16:46:13.409816  543705 cpu.go:282] Add success.
W0319 16:46:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:46:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:46:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:46:13.420119  543705 net.go:648] Add success.
I0319 16:46:13.423095  543705 net.go:770] primary dev: ETH0
I0319 16:46:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:46:13.423134  543705 net.go:698] Add success.
I0319 16:46:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:46:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:46:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 16:46:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:46:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 16:46:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:46:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:46:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:46:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:46:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:46:16.472442  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:46:20.049670  543705 disk_info.go:125] begin check local disk info of client
I0319 16:46:20.052229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:46:20.052235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330e00 0xc000330e40]
E0319 16:46:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:46:23.409770  543705 memory.go:184] no items to output this cycle
I0319 16:46:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 16:46:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:46:33.409797  543705 memory.go:184] no items to output this cycle
I0319 16:46:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 16:46:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:46:43.409825  543705 memory.go:191] Add success.
I0319 16:46:43.409829  543705 cpu.go:282] Add success.
I0319 16:46:43.419807  543705 net.go:770] primary dev: ETH0
I0319 16:46:43.419821  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:46:43.419834  543705 net.go:698] Add success.
I0319 16:46:43.420490  543705 net.go:648] Add success.
I0319 16:46:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:46:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:46:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:46:53.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:46:53.410263  543705 memory.go:184] no items to output this cycle
I0319 16:46:53.410275  543705 cpu.go:275] no items to output this cycle
E0319 16:47:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:47:03.409780  543705 memory.go:184] no items to output this cycle
I0319 16:47:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 16:47:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:47:13.409790  543705 memory.go:191] Add success.
I0319 16:47:13.409790  543705 cpu.go:282] Add success.
W0319 16:47:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:47:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:47:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:47:13.420117  543705 net.go:648] Add success.
I0319 16:47:13.423199  543705 net.go:770] primary dev: ETH0
I0319 16:47:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:47:13.423226  543705 net.go:698] Add success.
I0319 16:47:13.452854  543705 event_worker.go:152] Polling the log file for events...
W0319 16:47:14.455246  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:47:14.455262  543705 disk_worker.go:708] disk space is not compliant
W0319 16:47:14.455266  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:47:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:47:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:47:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:47:14.456825  543705 disk_worker.go:494] system disk:vda1
I0319 16:47:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:47:15.457026  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:47:15.457041  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:47:16.458038  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:47:16.458049  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:47:16.458092  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:47:16.458110  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:47:16.472485  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:47:20.053672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:47:20.056121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:47:20.056128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7d80 0xc0003b7dc0]
E0319 16:47:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:47:23.409771  543705 memory.go:184] no items to output this cycle
I0319 16:47:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:47:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:47:33.409780  543705 memory.go:184] no items to output this cycle
I0319 16:47:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 16:47:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:47:43.409813  543705 memory.go:191] Add success.
I0319 16:47:43.409822  543705 cpu.go:282] Add success.
I0319 16:47:43.419872  543705 net.go:648] Add success.
I0319 16:47:43.422546  543705 net.go:770] primary dev: ETH0
I0319 16:47:43.422561  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:47:43.422575  543705 net.go:698] Add success.
I0319 16:47:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:47:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:47:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:47:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:47:53.409790  543705 memory.go:184] no items to output this cycle
I0319 16:47:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 16:48:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:48:03.409783  543705 memory.go:184] no items to output this cycle
I0319 16:48:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 16:48:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:48:13.409788  543705 memory.go:191] Add success.
W0319 16:48:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:48:13.409821  543705 cpu.go:282] Add success.
W0319 16:48:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:48:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:48:13.420238  543705 net.go:648] Add success.
I0319 16:48:13.423050  543705 net.go:770] primary dev: ETH0
I0319 16:48:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:48:13.423081  543705 net.go:698] Add success.
I0319 16:48:13.469134  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65639d3f-5f28-403f-b6a5-907e1c61fbde","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:48:13.469166  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:48:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:48:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:48:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 16:48:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:48:14.456585  543705 disk_worker.go:494] system disk:vda1
I0319 16:48:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:48:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:48:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:48:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:48:16.472445  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:48:20.057672  543705 disk_info.go:125] begin check local disk info of client
I0319 16:48:20.060204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:48:20.060211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7380 0xc0003b73c0]
E0319 16:48:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:48:23.409767  543705 memory.go:184] no items to output this cycle
I0319 16:48:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 16:48:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:48:33.409798  543705 memory.go:184] no items to output this cycle
I0319 16:48:33.409810  543705 cpu.go:275] no items to output this cycle
I0319 16:48:37.775574  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:48:37.775582  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:48:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:48:43.410673  543705 memory.go:191] Add success.
I0319 16:48:43.409811  543705 cpu.go:282] Add success.
I0319 16:48:43.420446  543705 net.go:648] Add success.
I0319 16:48:43.423253  543705 net.go:770] primary dev: ETH0
I0319 16:48:43.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:48:43.423281  543705 net.go:698] Add success.
I0319 16:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:48:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:48:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:48:53.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:48:53.409902  543705 memory.go:184] no items to output this cycle
I0319 16:48:53.409924  543705 cpu.go:275] no items to output this cycle
E0319 16:49:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:49:03.409760  543705 memory.go:184] no items to output this cycle
I0319 16:49:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 16:49:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:49:13.409802  543705 memory.go:191] Add success.
I0319 16:49:13.409808  543705 cpu.go:282] Add success.
W0319 16:49:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:49:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:49:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:49:13.420052  543705 net.go:648] Add success.
I0319 16:49:13.422996  543705 net.go:770] primary dev: ETH0
I0319 16:49:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:49:13.423038  543705 net.go:698] Add success.
I0319 16:49:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:49:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:49:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 16:49:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:49:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 16:49:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:49:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:49:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:49:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:49:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:49:16.472480  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:49:20.061675  543705 disk_info.go:125] begin check local disk info of client
I0319 16:49:20.064268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:49:20.064275  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047da80 0xc00047dac0]
E0319 16:49:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:49:23.409773  543705 memory.go:184] no items to output this cycle
I0319 16:49:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 16:49:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:49:33.409780  543705 memory.go:184] no items to output this cycle
I0319 16:49:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 16:49:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:49:43.409779  543705 memory.go:191] Add success.
I0319 16:49:43.409810  543705 cpu.go:282] Add success.
I0319 16:49:43.419976  543705 net.go:648] Add success.
I0319 16:49:43.422981  543705 net.go:770] primary dev: ETH0
I0319 16:49:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:49:43.423010  543705 net.go:698] Add success.
I0319 16:49:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:49:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:49:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:49:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:49:53.409773  543705 memory.go:184] no items to output this cycle
I0319 16:49:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 16:50:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:50:03.409900  543705 memory.go:184] no items to output this cycle
I0319 16:50:03.409933  543705 cpu.go:275] no items to output this cycle
E0319 16:50:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:50:13.409777  543705 memory.go:191] Add success.
W0319 16:50:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 16:50:13.409810  543705 cpu.go:282] Add success.
W0319 16:50:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:50:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:50:13.420266  543705 net.go:648] Add success.
I0319 16:50:13.423319  543705 net.go:770] primary dev: ETH0
I0319 16:50:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:50:13.423347  543705 net.go:698] Add success.
I0319 16:50:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:50:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:50:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 16:50:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:50:14.456486  543705 disk_worker.go:494] system disk:vda1
I0319 16:50:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:50:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:50:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:50:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:50:16.472476  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:50:20.065678  543705 disk_info.go:125] begin check local disk info of client
I0319 16:50:20.068214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:50:20.068230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003566c0 0xc000356700]
E0319 16:50:23.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:50:23.410285  543705 memory.go:184] no items to output this cycle
I0319 16:50:23.410296  543705 cpu.go:275] no items to output this cycle
E0319 16:50:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:50:33.409769  543705 memory.go:184] no items to output this cycle
I0319 16:50:33.409790  543705 cpu.go:275] no items to output this cycle
E0319 16:50:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:50:43.409796  543705 memory.go:191] Add success.
I0319 16:50:43.409798  543705 cpu.go:282] Add success.
I0319 16:50:43.419868  543705 net.go:648] Add success.
I0319 16:50:43.422576  543705 net.go:770] primary dev: ETH0
I0319 16:50:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:50:43.422604  543705 net.go:698] Add success.
I0319 16:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:50:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:50:53.409783  543705 memory.go:184] no items to output this cycle
I0319 16:50:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 16:51:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:51:03.409767  543705 memory.go:184] no items to output this cycle
I0319 16:51:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:51:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:51:13.409821  543705 memory.go:191] Add success.
I0319 16:51:13.409831  543705 cpu.go:282] Add success.
W0319 16:51:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:51:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:51:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:51:13.420173  543705 net.go:648] Add success.
I0319 16:51:13.423426  543705 net.go:770] primary dev: ETH0
I0319 16:51:13.423438  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:51:13.423450  543705 net.go:698] Add success.
I0319 16:51:13.464191  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0159ad41-909f-4507-9b96-29e5245e9a7b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:51:13.464225  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:51:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:51:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:51:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0319 16:51:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:51:14.456790  543705 disk_worker.go:494] system disk:vda1
I0319 16:51:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:51:15.455991  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:51:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:51:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:51:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:51:16.472463  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:51:20.069674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:51:20.072281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:51:20.072287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b40 0xc0000c4b80]
E0319 16:51:23.410421  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:51:23.410436  543705 memory.go:184] no items to output this cycle
I0319 16:51:23.410441  543705 cpu.go:275] no items to output this cycle
E0319 16:51:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:51:33.409762  543705 memory.go:184] no items to output this cycle
I0319 16:51:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 16:51:37.776570  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:51:37.776577  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:51:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:51:43.410544  543705 memory.go:191] Add success.
I0319 16:51:43.409805  543705 cpu.go:282] Add success.
I0319 16:51:43.420237  543705 net.go:648] Add success.
I0319 16:51:43.422910  543705 net.go:770] primary dev: ETH0
I0319 16:51:43.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:51:43.422939  543705 net.go:698] Add success.
I0319 16:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:51:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:51:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:51:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:51:53.409769  543705 memory.go:184] no items to output this cycle
I0319 16:51:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 16:52:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:52:03.409782  543705 cpu.go:275] no items to output this cycle
I0319 16:52:03.409787  543705 memory.go:184] no items to output this cycle
E0319 16:52:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:52:13.409798  543705 memory.go:191] Add success.
I0319 16:52:13.409800  543705 cpu.go:282] Add success.
W0319 16:52:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:52:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:52:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:52:13.420128  543705 net.go:648] Add success.
I0319 16:52:13.423312  543705 net.go:770] primary dev: ETH0
I0319 16:52:13.423325  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:52:13.423337  543705 net.go:698] Add success.
W0319 16:52:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:52:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 16:52:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:52:14.456914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:52:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:52:14.456930  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:52:14.456989  543705 disk_worker.go:494] system disk:vda1
I0319 16:52:14.457019  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:52:15.456970  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:52:15.456989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:52:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:52:16.458150  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:52:16.458174  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:52:16.458194  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:52:16.472561  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:52:20.073674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:52:20.076123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:52:20.076128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4440 0xc0000c4480]
E0319 16:52:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:52:23.409793  543705 memory.go:184] no items to output this cycle
I0319 16:52:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 16:52:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:52:33.409806  543705 memory.go:184] no items to output this cycle
I0319 16:52:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 16:52:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:52:43.409832  543705 memory.go:191] Add success.
I0319 16:52:43.409835  543705 cpu.go:282] Add success.
I0319 16:52:43.420120  543705 net.go:648] Add success.
I0319 16:52:43.422850  543705 net.go:770] primary dev: ETH0
I0319 16:52:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:52:43.422876  543705 net.go:698] Add success.
I0319 16:52:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:52:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:52:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:52:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:52:53.409766  543705 memory.go:184] no items to output this cycle
I0319 16:52:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 16:53:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:53:03.409799  543705 memory.go:184] no items to output this cycle
I0319 16:53:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:53:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:53:13.409830  543705 memory.go:191] Add success.
I0319 16:53:13.409840  543705 cpu.go:282] Add success.
W0319 16:53:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:53:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:53:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:53:13.420182  543705 net.go:648] Add success.
I0319 16:53:13.423041  543705 net.go:770] primary dev: ETH0
I0319 16:53:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:53:13.423067  543705 net.go:698] Add success.
I0319 16:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:53:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:53:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 16:53:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:53:14.456603  543705 disk_worker.go:494] system disk:vda1
I0319 16:53:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:53:15.455987  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:53:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:53:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:53:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:53:16.472495  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:53:20.077674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:53:20.080200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:53:20.080207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa3c0 0xc0001aa400]
E0319 16:53:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:53:23.409796  543705 memory.go:184] no items to output this cycle
I0319 16:53:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 16:53:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:53:33.409770  543705 memory.go:184] no items to output this cycle
I0319 16:53:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 16:53:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:53:43.409798  543705 memory.go:191] Add success.
I0319 16:53:43.409802  543705 cpu.go:282] Add success.
I0319 16:53:43.419977  543705 net.go:648] Add success.
I0319 16:53:43.422572  543705 net.go:770] primary dev: ETH0
I0319 16:53:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:53:43.422601  543705 net.go:698] Add success.
I0319 16:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:53:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:53:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:53:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:53:53.409770  543705 memory.go:184] no items to output this cycle
I0319 16:53:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:54:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:54:03.409798  543705 memory.go:184] no items to output this cycle
I0319 16:54:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:54:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:54:13.409797  543705 memory.go:191] Add success.
I0319 16:54:13.409804  543705 cpu.go:282] Add success.
W0319 16:54:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:54:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:54:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:54:13.420450  543705 net.go:648] Add success.
I0319 16:54:13.423408  543705 net.go:770] primary dev: ETH0
I0319 16:54:13.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:54:13.423433  543705 net.go:698] Add success.
I0319 16:54:13.468572  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4dea14ab-600f-4ecb-8d5c-8357892fb7da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:54:13.468604  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 16:54:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:54:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:54:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0319 16:54:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:54:14.456638  543705 disk_worker.go:494] system disk:vda1
I0319 16:54:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:54:15.456007  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:54:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:54:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:54:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:54:16.472468  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:54:20.081673  543705 disk_info.go:125] begin check local disk info of client
I0319 16:54:20.084212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:54:20.084219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab700 0xc0001ab740]
E0319 16:54:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:54:23.409800  543705 memory.go:184] no items to output this cycle
I0319 16:54:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:54:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:54:33.409778  543705 memory.go:184] no items to output this cycle
I0319 16:54:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 16:54:37.777588  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:54:37.777602  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:54:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:54:43.410769  543705 memory.go:191] Add success.
I0319 16:54:43.409835  543705 cpu.go:282] Add success.
I0319 16:54:43.420528  543705 net.go:648] Add success.
I0319 16:54:43.423258  543705 net.go:770] primary dev: ETH0
I0319 16:54:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:54:43.423285  543705 net.go:698] Add success.
I0319 16:54:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:54:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:54:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:54:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:54:53.409786  543705 memory.go:184] no items to output this cycle
I0319 16:54:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 16:55:03.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:55:03.409902  543705 memory.go:184] no items to output this cycle
I0319 16:55:03.409919  543705 cpu.go:275] no items to output this cycle
E0319 16:55:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:55:13.409790  543705 memory.go:191] Add success.
I0319 16:55:13.409807  543705 cpu.go:282] Add success.
W0319 16:55:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:55:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:55:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:55:13.420249  543705 net.go:648] Add success.
I0319 16:55:13.423111  543705 net.go:770] primary dev: ETH0
I0319 16:55:13.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:55:13.423136  543705 net.go:698] Add success.
I0319 16:55:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:55:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:55:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 16:55:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:55:14.456569  543705 disk_worker.go:494] system disk:vda1
I0319 16:55:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:55:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:55:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:55:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:55:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:55:16.472483  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:55:20.085692  543705 disk_info.go:125] begin check local disk info of client
I0319 16:55:20.088249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:55:20.088254  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357b00 0xc000357b40]
E0319 16:55:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:55:23.409777  543705 memory.go:184] no items to output this cycle
I0319 16:55:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 16:55:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:55:33.409777  543705 memory.go:184] no items to output this cycle
I0319 16:55:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 16:55:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:55:43.409788  543705 memory.go:191] Add success.
I0319 16:55:43.409820  543705 cpu.go:282] Add success.
I0319 16:55:43.420030  543705 net.go:648] Add success.
I0319 16:55:43.422712  543705 net.go:770] primary dev: ETH0
I0319 16:55:43.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:55:43.422743  543705 net.go:698] Add success.
I0319 16:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:55:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:55:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:55:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:55:53.409782  543705 memory.go:184] no items to output this cycle
I0319 16:55:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 16:56:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:56:03.409790  543705 memory.go:184] no items to output this cycle
I0319 16:56:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 16:56:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:56:13.409794  543705 memory.go:191] Add success.
I0319 16:56:13.409799  543705 cpu.go:282] Add success.
W0319 16:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:56:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:56:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:56:13.420073  543705 net.go:648] Add success.
I0319 16:56:13.422967  543705 net.go:770] primary dev: ETH0
I0319 16:56:13.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:56:13.422993  543705 net.go:698] Add success.
I0319 16:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:56:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:56:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 16:56:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:56:14.456545  543705 disk_worker.go:494] system disk:vda1
I0319 16:56:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:56:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:56:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:56:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:56:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:56:16.472493  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:56:20.089687  543705 disk_info.go:125] begin check local disk info of client
I0319 16:56:20.092323  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:56:20.092329  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5200 0xc0000c5240]
E0319 16:56:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:56:23.409769  543705 memory.go:184] no items to output this cycle
I0319 16:56:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 16:56:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:56:33.409778  543705 memory.go:184] no items to output this cycle
I0319 16:56:33.409782  543705 cpu.go:275] no items to output this cycle
E0319 16:56:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:56:43.409802  543705 memory.go:191] Add success.
I0319 16:56:43.409820  543705 cpu.go:282] Add success.
I0319 16:56:43.420061  543705 net.go:648] Add success.
I0319 16:56:43.422741  543705 net.go:770] primary dev: ETH0
I0319 16:56:43.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:56:43.422770  543705 net.go:698] Add success.
I0319 16:56:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:56:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:56:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:56:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:56:53.409780  543705 memory.go:184] no items to output this cycle
I0319 16:56:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 16:57:03.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:57:03.409904  543705 memory.go:184] no items to output this cycle
I0319 16:57:03.409910  543705 cpu.go:275] no items to output this cycle
E0319 16:57:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:57:13.409784  543705 memory.go:191] Add success.
I0319 16:57:13.409810  543705 cpu.go:282] Add success.
W0319 16:57:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:57:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:57:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:57:13.420110  543705 net.go:648] Add success.
I0319 16:57:13.422869  543705 net.go:770] primary dev: ETH0
I0319 16:57:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:57:13.422894  543705 net.go:698] Add success.
I0319 16:57:13.429485  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 16:57:13.453721  543705 event_worker.go:152] Polling the log file for events...
I0319 16:57:13.463973  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3c529449-0df6-45d9-bc71-11731a63d77a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 16:57:13.464007  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 16:57:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:57:14.455247  543705 disk_worker.go:708] disk space is not compliant
W0319 16:57:14.455252  543705 disk_worker.go:728] disk inode is not compliant
E0319 16:57:14.455864  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 16:57:14.455872  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 16:57:14.455877  543705 custom_config.go:64] query custom config with name: gpu
I0319 16:57:14.456777  543705 disk_worker.go:494] system disk:vda1
I0319 16:57:14.456806  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 16:57:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 16:57:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:57:16.458100  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 16:57:16.458147  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 16:57:16.458172  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:57:16.458192  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:57:16.472606  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:57:20.093675  543705 disk_info.go:125] begin check local disk info of client
I0319 16:57:20.096168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:57:20.096174  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252800 0xc000252840]
E0319 16:57:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:57:23.409765  543705 memory.go:184] no items to output this cycle
I0319 16:57:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 16:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:57:33.409801  543705 memory.go:184] no items to output this cycle
I0319 16:57:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 16:57:37.777734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 16:57:37.777741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 16:57:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:57:43.410705  543705 memory.go:191] Add success.
I0319 16:57:43.409800  543705 cpu.go:282] Add success.
I0319 16:57:43.420390  543705 net.go:648] Add success.
I0319 16:57:43.423130  543705 net.go:770] primary dev: ETH0
I0319 16:57:43.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:57:43.423158  543705 net.go:698] Add success.
I0319 16:57:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:57:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:57:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:57:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:57:53.409791  543705 memory.go:184] no items to output this cycle
I0319 16:57:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 16:58:03.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:58:03.409905  543705 cpu.go:275] no items to output this cycle
I0319 16:58:03.409916  543705 memory.go:184] no items to output this cycle
E0319 16:58:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:58:13.409779  543705 memory.go:191] Add success.
W0319 16:58:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:58:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:58:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:58:13.409850  543705 cpu.go:282] Add success.
I0319 16:58:13.420509  543705 net.go:648] Add success.
I0319 16:58:13.423373  543705 net.go:770] primary dev: ETH0
I0319 16:58:13.423391  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:58:13.423410  543705 net.go:698] Add success.
I0319 16:58:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:58:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:58:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0319 16:58:14.455246  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:58:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 16:58:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:58:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:58:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:58:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:58:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:58:16.472504  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:58:20.097674  543705 disk_info.go:125] begin check local disk info of client
I0319 16:58:20.100241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:58:20.100247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007adc0 0xc00007ae00]
E0319 16:58:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:58:23.409800  543705 memory.go:184] no items to output this cycle
I0319 16:58:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:58:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:58:33.409772  543705 memory.go:184] no items to output this cycle
I0319 16:58:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 16:58:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:58:43.409829  543705 memory.go:191] Add success.
I0319 16:58:43.409832  543705 cpu.go:282] Add success.
I0319 16:58:43.420002  543705 net.go:648] Add success.
I0319 16:58:43.422706  543705 net.go:770] primary dev: ETH0
I0319 16:58:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:58:43.422734  543705 net.go:698] Add success.
I0319 16:58:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:58:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:58:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:58:53.409776  543705 cpu.go:275] no items to output this cycle
E0319 16:58:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:58:53.409790  543705 memory.go:184] no items to output this cycle
E0319 16:59:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:59:03.409805  543705 memory.go:184] no items to output this cycle
I0319 16:59:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 16:59:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:59:13.409791  543705 memory.go:191] Add success.
I0319 16:59:13.409807  543705 cpu.go:282] Add success.
W0319 16:59:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 16:59:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 16:59:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 16:59:13.420115  543705 net.go:648] Add success.
I0319 16:59:13.422788  543705 net.go:770] primary dev: ETH0
I0319 16:59:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:59:13.422813  543705 net.go:698] Add success.
I0319 16:59:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 16:59:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 16:59:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 16:59:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 16:59:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 16:59:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 16:59:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 16:59:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:59:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:59:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0319 16:59:16.472475  543705 disk_local_worker.go:436] Get disk info: []
I0319 16:59:20.101678  543705 disk_info.go:125] begin check local disk info of client
I0319 16:59:20.104198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 16:59:20.104205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b69c0 0xc0003b6a00]
E0319 16:59:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:59:23.409798  543705 memory.go:184] no items to output this cycle
I0319 16:59:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 16:59:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:59:33.409770  543705 memory.go:184] no items to output this cycle
I0319 16:59:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 16:59:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:59:43.409794  543705 memory.go:191] Add success.
I0319 16:59:43.409808  543705 cpu.go:282] Add success.
I0319 16:59:43.419908  543705 net.go:648] Add success.
I0319 16:59:43.422725  543705 net.go:770] primary dev: ETH0
I0319 16:59:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0319 16:59:43.422749  543705 net.go:698] Add success.
I0319 16:59:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 16:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 16:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 16:59:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 16:59:53.409812  543705 memory.go:184] no items to output this cycle
I0319 16:59:53.409821  543705 cpu.go:275] no items to output this cycle
E0319 17:00:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:00:03.409779  543705 memory.go:184] no items to output this cycle
I0319 17:00:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 17:00:13.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:00:13.409912  543705 memory.go:191] Add success.
W0319 17:00:13.409940  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:00:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:00:13.409959  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:00:13.410089  543705 cpu.go:282] Add success.
I0319 17:00:13.419713  543705 net.go:648] Add success.
I0319 17:00:13.422412  543705 net.go:770] primary dev: ETH0
I0319 17:00:13.422427  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:00:13.422441  543705 net.go:698] Add success.
I0319 17:00:13.512878  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b05e35d1-402c-479f-bab4-ddae3e8c620a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:00:13.512909  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:00:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:00:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:00:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0319 17:00:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:00:14.456476  543705 disk_worker.go:494] system disk:vda1
I0319 17:00:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:00:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:00:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:00:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:00:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:00:16.472488  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:00:20.105677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:00:20.108171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:00:20.108177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab6c0 0xc0003ab700]
E0319 17:00:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:00:23.409794  543705 memory.go:184] no items to output this cycle
I0319 17:00:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 17:00:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:00:33.409787  543705 memory.go:184] no items to output this cycle
I0319 17:00:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 17:00:37.777882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:00:37.777889  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:00:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:00:43.410758  543705 memory.go:191] Add success.
I0319 17:00:43.409823  543705 cpu.go:282] Add success.
I0319 17:00:43.420644  543705 net.go:648] Add success.
I0319 17:00:43.423412  543705 net.go:770] primary dev: ETH0
I0319 17:00:43.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:00:43.423438  543705 net.go:698] Add success.
I0319 17:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:00:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:00:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:00:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:00:53.409797  543705 memory.go:184] no items to output this cycle
I0319 17:00:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 17:01:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:01:03.409769  543705 memory.go:184] no items to output this cycle
I0319 17:01:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 17:01:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:01:13.409816  543705 memory.go:191] Add success.
I0319 17:01:13.409823  543705 cpu.go:282] Add success.
W0319 17:01:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:01:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:01:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:01:13.419952  543705 net.go:770] primary dev: ETH0
I0319 17:01:13.419965  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:01:13.419979  543705 net.go:698] Add success.
I0319 17:01:13.420451  543705 net.go:648] Add success.
I0319 17:01:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:01:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:01:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 17:01:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:01:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 17:01:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:01:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:01:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:01:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:01:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:01:16.472471  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:01:20.109674  543705 disk_info.go:125] begin check local disk info of client
I0319 17:01:20.112225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:01:20.112231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2440 0xc0003b2480]
E0319 17:01:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:01:23.409799  543705 memory.go:184] no items to output this cycle
I0319 17:01:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 17:01:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:01:33.409779  543705 memory.go:184] no items to output this cycle
I0319 17:01:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 17:01:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:01:43.409802  543705 memory.go:191] Add success.
I0319 17:01:43.409802  543705 cpu.go:282] Add success.
I0319 17:01:43.420071  543705 net.go:648] Add success.
I0319 17:01:43.422837  543705 net.go:770] primary dev: ETH0
I0319 17:01:43.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:01:43.422867  543705 net.go:698] Add success.
I0319 17:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:01:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:01:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:01:53.409804  543705 memory.go:184] no items to output this cycle
I0319 17:01:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:02:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:02:03.409768  543705 memory.go:184] no items to output this cycle
I0319 17:02:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 17:02:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:02:13.409793  543705 memory.go:191] Add success.
I0319 17:02:13.409793  543705 cpu.go:282] Add success.
W0319 17:02:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:02:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:02:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:02:13.420146  543705 net.go:648] Add success.
I0319 17:02:13.423177  543705 net.go:770] primary dev: ETH0
I0319 17:02:13.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:02:13.423203  543705 net.go:698] Add success.
W0319 17:02:14.455282  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:02:14.455404  543705 disk_worker.go:708] disk space is not compliant
W0319 17:02:14.455407  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:02:14.458899  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:02:14.458907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:02:14.458911  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:02:14.459104  543705 disk_worker.go:494] system disk:vda1
I0319 17:02:14.459131  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:02:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:02:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:02:16.458102  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:02:16.458140  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:02:16.458175  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:02:16.458195  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:02:16.472539  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:02:20.113674  543705 disk_info.go:125] begin check local disk info of client
I0319 17:02:20.116255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:02:20.116261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049c400 0xc00049c440]
E0319 17:02:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:02:23.409788  543705 memory.go:184] no items to output this cycle
I0319 17:02:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 17:02:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:02:33.409795  543705 memory.go:184] no items to output this cycle
I0319 17:02:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 17:02:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:02:43.409835  543705 memory.go:191] Add success.
I0319 17:02:43.409840  543705 cpu.go:282] Add success.
I0319 17:02:43.420396  543705 net.go:648] Add success.
I0319 17:02:43.423501  543705 net.go:770] primary dev: ETH0
I0319 17:02:43.423514  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:02:43.423536  543705 net.go:698] Add success.
I0319 17:02:46.458393  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:02:46.458471  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:02:46.458494  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:02:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:02:53.409799  543705 memory.go:184] no items to output this cycle
I0319 17:02:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 17:03:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:03:03.409800  543705 memory.go:184] no items to output this cycle
I0319 17:03:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 17:03:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:03:13.409805  543705 cpu.go:282] Add success.
I0319 17:03:13.409828  543705 memory.go:191] Add success.
W0319 17:03:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:03:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:03:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:03:13.420498  543705 net.go:648] Add success.
I0319 17:03:13.423407  543705 net.go:770] primary dev: ETH0
I0319 17:03:13.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:03:13.423444  543705 net.go:698] Add success.
I0319 17:03:13.958621  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6662ce26-c59c-4519-b892-87d00d4a3992","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:03:13.958660  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:03:14.453965  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:03:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:03:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0319 17:03:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:03:14.456545  543705 disk_worker.go:494] system disk:vda1
I0319 17:03:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:03:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:03:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:03:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:03:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:03:16.472440  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:03:20.117671  543705 disk_info.go:125] begin check local disk info of client
I0319 17:03:20.120234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:03:20.120241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005752c0 0xc000575300]
E0319 17:03:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:03:23.409766  543705 memory.go:184] no items to output this cycle
I0319 17:03:23.409833  543705 cpu.go:275] no items to output this cycle
E0319 17:03:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:03:33.409801  543705 memory.go:184] no items to output this cycle
I0319 17:03:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 17:03:37.778027  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:03:37.778034  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:03:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:03:43.410687  543705 memory.go:191] Add success.
I0319 17:03:43.409800  543705 cpu.go:282] Add success.
I0319 17:03:43.420398  543705 net.go:648] Add success.
I0319 17:03:43.423085  543705 net.go:770] primary dev: ETH0
I0319 17:03:43.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:03:43.423112  543705 net.go:698] Add success.
I0319 17:03:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:03:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:03:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:03:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:03:53.409780  543705 memory.go:184] no items to output this cycle
I0319 17:03:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 17:04:03.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:04:03.409925  543705 memory.go:184] no items to output this cycle
I0319 17:04:03.409932  543705 cpu.go:275] no items to output this cycle
E0319 17:04:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:04:13.409828  543705 memory.go:191] Add success.
I0319 17:04:13.409833  543705 cpu.go:282] Add success.
W0319 17:04:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:04:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:04:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:04:13.420094  543705 net.go:648] Add success.
I0319 17:04:13.422676  543705 net.go:770] primary dev: ETH0
I0319 17:04:13.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:04:13.422701  543705 net.go:698] Add success.
I0319 17:04:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:04:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:04:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0319 17:04:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:04:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 17:04:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:04:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:04:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:04:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:04:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:04:16.472484  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:04:20.121683  543705 disk_info.go:125] begin check local disk info of client
I0319 17:04:20.124225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:04:20.124232  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa680 0xc0001aa6c0]
E0319 17:04:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:04:23.409777  543705 memory.go:184] no items to output this cycle
I0319 17:04:23.409839  543705 cpu.go:275] no items to output this cycle
E0319 17:04:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:04:33.409777  543705 memory.go:184] no items to output this cycle
I0319 17:04:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 17:04:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:04:43.409817  543705 memory.go:191] Add success.
I0319 17:04:43.409822  543705 cpu.go:282] Add success.
I0319 17:04:43.419717  543705 net.go:770] primary dev: ETH0
I0319 17:04:43.419733  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:04:43.419748  543705 net.go:698] Add success.
I0319 17:04:43.420128  543705 net.go:648] Add success.
I0319 17:04:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:04:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:04:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:04:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:04:53.409793  543705 memory.go:184] no items to output this cycle
I0319 17:04:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:05:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:05:03.409790  543705 memory.go:184] no items to output this cycle
I0319 17:05:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 17:05:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:05:13.409823  543705 memory.go:191] Add success.
I0319 17:05:13.409828  543705 cpu.go:282] Add success.
W0319 17:05:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:05:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:05:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:05:13.420046  543705 net.go:770] primary dev: ETH0
I0319 17:05:13.420060  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:05:13.420072  543705 net.go:698] Add success.
I0319 17:05:13.420306  543705 net.go:648] Add success.
I0319 17:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:05:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:05:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 17:05:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:05:14.456472  543705 disk_worker.go:494] system disk:vda1
I0319 17:05:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:05:16.458016  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:05:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:05:16.458120  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:05:16.472532  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:05:20.125672  543705 disk_info.go:125] begin check local disk info of client
I0319 17:05:20.128227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:05:20.128233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003499c0 0xc000349a00]
E0319 17:05:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:05:23.409806  543705 memory.go:184] no items to output this cycle
I0319 17:05:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 17:05:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:05:33.409789  543705 memory.go:184] no items to output this cycle
I0319 17:05:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 17:05:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:05:43.409815  543705 memory.go:191] Add success.
I0319 17:05:43.409815  543705 cpu.go:282] Add success.
I0319 17:05:43.419879  543705 net.go:648] Add success.
I0319 17:05:43.422940  543705 net.go:770] primary dev: ETH0
I0319 17:05:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:05:43.422964  543705 net.go:698] Add success.
I0319 17:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:05:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:05:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:05:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:05:53.409781  543705 memory.go:184] no items to output this cycle
I0319 17:05:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 17:06:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:06:03.409765  543705 memory.go:184] no items to output this cycle
I0319 17:06:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 17:06:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:06:13.409810  543705 memory.go:191] Add success.
I0319 17:06:13.409816  543705 cpu.go:282] Add success.
W0319 17:06:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:06:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:06:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:06:13.420218  543705 net.go:648] Add success.
I0319 17:06:13.422934  543705 net.go:770] primary dev: ETH0
I0319 17:06:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:06:13.422958  543705 net.go:698] Add success.
I0319 17:06:13.469669  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3e944744-d94b-4089-b5b4-54fc64ca0409","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:06:13.469704  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:06:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:06:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0319 17:06:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:06:14.456596  543705 disk_worker.go:494] system disk:vda1
I0319 17:06:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:06:16.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:06:16.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:06:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:06:16.472513  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:06:20.129676  543705 disk_info.go:125] begin check local disk info of client
I0319 17:06:20.132236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:06:20.132242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e00 0xc0000c5e40]
E0319 17:06:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:06:23.409798  543705 memory.go:184] no items to output this cycle
I0319 17:06:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:06:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:06:33.409792  543705 memory.go:184] no items to output this cycle
I0319 17:06:33.409810  543705 cpu.go:275] no items to output this cycle
I0319 17:06:37.779594  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:06:37.779600  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:06:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:06:43.410705  543705 memory.go:191] Add success.
I0319 17:06:43.409834  543705 cpu.go:282] Add success.
I0319 17:06:43.420438  543705 net.go:648] Add success.
I0319 17:06:43.423128  543705 net.go:770] primary dev: ETH0
I0319 17:06:43.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:06:43.423153  543705 net.go:698] Add success.
I0319 17:06:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:06:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:06:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:06:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:06:53.409775  543705 memory.go:184] no items to output this cycle
I0319 17:06:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 17:07:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:07:03.409770  543705 memory.go:184] no items to output this cycle
I0319 17:07:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 17:07:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:07:13.409811  543705 memory.go:191] Add success.
I0319 17:07:13.409817  543705 cpu.go:282] Add success.
W0319 17:07:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:07:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:07:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:07:13.420119  543705 net.go:648] Add success.
I0319 17:07:13.423183  543705 net.go:770] primary dev: ETH0
I0319 17:07:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:07:13.423209  543705 net.go:698] Add success.
I0319 17:07:13.452862  543705 event_worker.go:152] Polling the log file for events...
W0319 17:07:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:07:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 17:07:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:07:14.456994  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:07:14.457004  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:07:14.457010  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:07:14.457027  543705 disk_worker.go:494] system disk:vda1
I0319 17:07:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:07:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:07:15.456800  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:07:16.458120  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:07:16.458193  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:07:16.458219  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:07:16.458222  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:07:16.472693  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:07:20.133677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:07:20.136205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:07:20.136212  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c740 0xc00035c780]
E0319 17:07:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:07:23.409777  543705 memory.go:184] no items to output this cycle
I0319 17:07:23.409780  543705 cpu.go:275] no items to output this cycle
E0319 17:07:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:07:33.409820  543705 memory.go:184] no items to output this cycle
I0319 17:07:33.409836  543705 cpu.go:275] no items to output this cycle
E0319 17:07:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:07:43.409778  543705 memory.go:191] Add success.
I0319 17:07:43.409797  543705 cpu.go:282] Add success.
I0319 17:07:43.419894  543705 net.go:648] Add success.
I0319 17:07:43.422674  543705 net.go:770] primary dev: ETH0
I0319 17:07:43.422687  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:07:43.422699  543705 net.go:698] Add success.
I0319 17:07:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:07:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:07:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:07:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:07:53.409777  543705 memory.go:184] no items to output this cycle
I0319 17:07:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:08:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:08:03.409773  543705 memory.go:184] no items to output this cycle
I0319 17:08:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 17:08:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:08:13.409780  543705 memory.go:191] Add success.
I0319 17:08:13.409801  543705 cpu.go:282] Add success.
W0319 17:08:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:08:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:08:13.420069  543705 net.go:648] Add success.
I0319 17:08:13.423509  543705 net.go:770] primary dev: ETH0
I0319 17:08:13.423522  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:08:13.423534  543705 net.go:698] Add success.
I0319 17:08:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:08:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:08:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 17:08:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:08:14.456516  543705 disk_worker.go:494] system disk:vda1
I0319 17:08:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:08:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:08:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:08:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:08:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:08:16.472523  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:08:20.137678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:08:20.140240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:08:20.140247  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273080 0xc0002730c0]
E0319 17:08:23.409914  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:08:23.409932  543705 memory.go:184] no items to output this cycle
I0319 17:08:23.410029  543705 cpu.go:275] no items to output this cycle
E0319 17:08:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:08:33.409787  543705 memory.go:184] no items to output this cycle
I0319 17:08:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 17:08:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:08:43.409829  543705 memory.go:191] Add success.
I0319 17:08:43.409839  543705 cpu.go:282] Add success.
I0319 17:08:43.419816  543705 net.go:770] primary dev: ETH0
I0319 17:08:43.419832  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:08:43.419847  543705 net.go:698] Add success.
I0319 17:08:43.420209  543705 net.go:648] Add success.
I0319 17:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:08:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:08:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:08:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:08:53.409776  543705 cpu.go:275] no items to output this cycle
I0319 17:08:53.409789  543705 memory.go:184] no items to output this cycle
E0319 17:09:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:09:03.409781  543705 cpu.go:275] no items to output this cycle
I0319 17:09:03.409787  543705 memory.go:184] no items to output this cycle
E0319 17:09:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:09:13.409792  543705 memory.go:191] Add success.
I0319 17:09:13.409798  543705 cpu.go:282] Add success.
W0319 17:09:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:09:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:09:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:09:13.420041  543705 net.go:648] Add success.
I0319 17:09:13.422928  543705 net.go:770] primary dev: ETH0
I0319 17:09:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:09:13.422953  543705 net.go:698] Add success.
I0319 17:09:13.463766  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f4a332df-56e8-4514-b2d9-30080bf5713d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:09:13.463802  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:09:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:09:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 17:09:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:09:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 17:09:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:09:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:09:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:09:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:09:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:09:16.472522  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:09:20.141674  543705 disk_info.go:125] begin check local disk info of client
I0319 17:09:20.144216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:09:20.144222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a2c0 0xc00032a300]
E0319 17:09:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:09:23.409801  543705 memory.go:184] no items to output this cycle
I0319 17:09:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 17:09:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:09:33.409773  543705 memory.go:184] no items to output this cycle
I0319 17:09:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 17:09:37.780594  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:09:37.780601  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:09:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:09:43.410790  543705 memory.go:191] Add success.
I0319 17:09:43.409809  543705 cpu.go:282] Add success.
I0319 17:09:43.420493  543705 net.go:648] Add success.
I0319 17:09:43.423401  543705 net.go:770] primary dev: ETH0
I0319 17:09:43.423415  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:09:43.423427  543705 net.go:698] Add success.
I0319 17:09:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:09:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:09:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:09:53.410259  543705 memory.go:184] no items to output this cycle
I0319 17:09:53.410290  543705 cpu.go:275] no items to output this cycle
E0319 17:10:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:10:03.409794  543705 memory.go:184] no items to output this cycle
I0319 17:10:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 17:10:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:10:13.409782  543705 memory.go:191] Add success.
I0319 17:10:13.409799  543705 cpu.go:282] Add success.
W0319 17:10:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:10:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:10:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:10:13.420144  543705 net.go:648] Add success.
I0319 17:10:13.422966  543705 net.go:770] primary dev: ETH0
I0319 17:10:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:10:13.422991  543705 net.go:698] Add success.
I0319 17:10:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:10:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:10:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 17:10:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:10:14.456565  543705 disk_worker.go:494] system disk:vda1
I0319 17:10:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:10:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:10:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:10:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:10:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:10:16.472537  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:10:20.145679  543705 disk_info.go:125] begin check local disk info of client
I0319 17:10:20.148235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:10:20.148241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e6080 0xc0000e60c0]
E0319 17:10:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:10:23.409782  543705 memory.go:184] no items to output this cycle
I0319 17:10:23.409783  543705 cpu.go:275] no items to output this cycle
E0319 17:10:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:10:33.409804  543705 memory.go:184] no items to output this cycle
I0319 17:10:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 17:10:43.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:10:43.409882  543705 memory.go:191] Add success.
I0319 17:10:43.409913  543705 cpu.go:282] Add success.
I0319 17:10:43.420066  543705 net.go:648] Add success.
I0319 17:10:43.423023  543705 net.go:770] primary dev: ETH0
I0319 17:10:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:10:43.423048  543705 net.go:698] Add success.
I0319 17:10:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:10:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:10:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:10:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:10:53.409776  543705 memory.go:184] no items to output this cycle
I0319 17:10:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 17:11:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:11:03.409763  543705 memory.go:184] no items to output this cycle
I0319 17:11:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 17:11:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:11:13.409828  543705 memory.go:191] Add success.
I0319 17:11:13.409828  543705 cpu.go:282] Add success.
W0319 17:11:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:11:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:11:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:11:13.420169  543705 net.go:648] Add success.
I0319 17:11:13.423212  543705 net.go:770] primary dev: ETH0
I0319 17:11:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:11:13.423244  543705 net.go:698] Add success.
I0319 17:11:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:11:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:11:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 17:11:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:11:14.456557  543705 disk_worker.go:494] system disk:vda1
I0319 17:11:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:11:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:11:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:11:16.472518  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:11:20.149668  543705 disk_info.go:125] begin check local disk info of client
I0319 17:11:20.152297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:11:20.152305  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3680 0xc0003b36c0]
E0319 17:11:23.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:11:23.409912  543705 memory.go:184] no items to output this cycle
I0319 17:11:23.409984  543705 cpu.go:275] no items to output this cycle
E0319 17:11:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:11:33.409796  543705 memory.go:184] no items to output this cycle
I0319 17:11:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 17:11:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:11:43.409786  543705 memory.go:191] Add success.
I0319 17:11:43.409818  543705 cpu.go:282] Add success.
I0319 17:11:43.419966  543705 net.go:648] Add success.
I0319 17:11:43.422491  543705 net.go:770] primary dev: ETH0
I0319 17:11:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:11:43.422517  543705 net.go:698] Add success.
I0319 17:11:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:11:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:11:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:11:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:11:53.409771  543705 memory.go:184] no items to output this cycle
I0319 17:11:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 17:12:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:12:03.409793  543705 memory.go:184] no items to output this cycle
I0319 17:12:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:12:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:12:13.409778  543705 memory.go:191] Add success.
W0319 17:12:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:12:13.409803  543705 cpu.go:282] Add success.
W0319 17:12:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:12:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:12:13.420089  543705 net.go:648] Add success.
I0319 17:12:13.422622  543705 net.go:770] primary dev: ETH0
I0319 17:12:13.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:12:13.422651  543705 net.go:698] Add success.
I0319 17:12:13.464006  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6fe8d001-6744-47fe-920d-70f270d7a2e0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:12:13.464043  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 17:12:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:12:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 17:12:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:12:14.455895  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:12:14.455904  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:12:14.455910  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:12:14.456576  543705 disk_worker.go:494] system disk:vda1
I0319 17:12:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:12:15.456862  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:12:15.456871  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:12:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:12:16.458007  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:12:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:12:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:12:16.472502  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:12:20.153679  543705 disk_info.go:125] begin check local disk info of client
I0319 17:12:20.156286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:12:20.156293  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0380 0xc0002b03c0]
E0319 17:12:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:12:23.409780  543705 memory.go:184] no items to output this cycle
I0319 17:12:23.409782  543705 cpu.go:275] no items to output this cycle
E0319 17:12:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:12:33.409776  543705 memory.go:184] no items to output this cycle
I0319 17:12:33.409798  543705 cpu.go:275] no items to output this cycle
I0319 17:12:37.781593  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:12:37.781600  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:12:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:12:43.410922  543705 memory.go:191] Add success.
I0319 17:12:43.409874  543705 cpu.go:282] Add success.
I0319 17:12:43.420663  543705 net.go:648] Add success.
I0319 17:12:43.423339  543705 net.go:770] primary dev: ETH0
I0319 17:12:43.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:12:43.423372  543705 net.go:698] Add success.
I0319 17:12:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:12:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:12:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:12:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:12:53.409776  543705 memory.go:184] no items to output this cycle
I0319 17:12:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:13:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:13:03.409799  543705 memory.go:184] no items to output this cycle
I0319 17:13:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:13:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:13:13.409818  543705 memory.go:191] Add success.
I0319 17:13:13.409822  543705 cpu.go:282] Add success.
W0319 17:13:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:13:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:13:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:13:13.420124  543705 net.go:648] Add success.
I0319 17:13:13.422940  543705 net.go:770] primary dev: ETH0
I0319 17:13:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:13:13.422965  543705 net.go:698] Add success.
I0319 17:13:14.453929  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:13:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:13:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0319 17:13:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:13:14.457431  543705 disk_worker.go:494] system disk:vda1
I0319 17:13:14.457466  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:13:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:13:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:13:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:13:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:13:16.472506  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:13:20.157677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:13:20.160391  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:13:20.160398  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0480 0xc0002b04c0]
E0319 17:13:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:13:23.409808  543705 memory.go:184] no items to output this cycle
I0319 17:13:23.409820  543705 cpu.go:275] no items to output this cycle
E0319 17:13:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:13:33.409777  543705 memory.go:184] no items to output this cycle
I0319 17:13:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 17:13:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:13:43.409802  543705 memory.go:191] Add success.
I0319 17:13:43.409817  543705 cpu.go:282] Add success.
I0319 17:13:43.419863  543705 net.go:648] Add success.
I0319 17:13:43.422514  543705 net.go:770] primary dev: ETH0
I0319 17:13:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:13:43.422541  543705 net.go:698] Add success.
I0319 17:13:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:13:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:13:53.409808  543705 memory.go:184] no items to output this cycle
I0319 17:13:53.409822  543705 cpu.go:275] no items to output this cycle
E0319 17:14:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:14:03.409785  543705 memory.go:184] no items to output this cycle
I0319 17:14:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 17:14:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:14:13.409773  543705 memory.go:191] Add success.
W0319 17:14:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:14:13.409805  543705 cpu.go:282] Add success.
W0319 17:14:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:14:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:14:13.420061  543705 net.go:648] Add success.
I0319 17:14:13.422940  543705 net.go:770] primary dev: ETH0
I0319 17:14:13.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:14:13.422963  543705 net.go:698] Add success.
I0319 17:14:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:14:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:14:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 17:14:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:14:14.456817  543705 disk_worker.go:494] system disk:vda1
I0319 17:14:14.456847  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:14:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:14:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:14:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:14:16.472419  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:14:20.161678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:14:20.164221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:14:20.164228  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395080 0xc0003950c0]
E0319 17:14:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:14:23.409804  543705 memory.go:184] no items to output this cycle
I0319 17:14:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:14:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:14:33.409797  543705 memory.go:184] no items to output this cycle
I0319 17:14:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 17:14:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:14:43.409784  543705 memory.go:191] Add success.
I0319 17:14:43.409862  543705 cpu.go:282] Add success.
I0319 17:14:43.420066  543705 net.go:648] Add success.
I0319 17:14:43.422874  543705 net.go:770] primary dev: ETH0
I0319 17:14:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:14:43.422900  543705 net.go:698] Add success.
I0319 17:14:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:14:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:14:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:14:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:14:53.409772  543705 memory.go:184] no items to output this cycle
I0319 17:14:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 17:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:15:03.409779  543705 memory.go:184] no items to output this cycle
I0319 17:15:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 17:15:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:15:13.409788  543705 memory.go:191] Add success.
I0319 17:15:13.409788  543705 cpu.go:282] Add success.
W0319 17:15:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:15:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:15:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:15:13.420129  543705 net.go:648] Add success.
I0319 17:15:13.422951  543705 net.go:770] primary dev: ETH0
I0319 17:15:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:15:13.422982  543705 net.go:698] Add success.
I0319 17:15:13.469682  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a651fae5-919a-4cd9-8f43-85e395d472ce","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:15:13.469717  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:15:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:15:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 17:15:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:15:14.459000  543705 disk_worker.go:494] system disk:vda1
I0319 17:15:14.459038  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:15:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:15:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:15:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:15:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:15:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:15:20.165676  543705 disk_info.go:125] begin check local disk info of client
I0319 17:15:20.168272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:15:20.168279  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b0300 0xc0004b0340]
E0319 17:15:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:15:23.409783  543705 memory.go:184] no items to output this cycle
I0319 17:15:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 17:15:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:15:33.409799  543705 memory.go:184] no items to output this cycle
I0319 17:15:33.409814  543705 cpu.go:275] no items to output this cycle
I0319 17:15:37.781731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:15:37.781737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:15:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:15:43.410703  543705 memory.go:191] Add success.
I0319 17:15:43.409802  543705 cpu.go:282] Add success.
I0319 17:15:43.420420  543705 net.go:648] Add success.
I0319 17:15:43.423060  543705 net.go:770] primary dev: ETH0
I0319 17:15:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:15:43.423090  543705 net.go:698] Add success.
I0319 17:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:15:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:15:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:15:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:15:53.409781  543705 memory.go:184] no items to output this cycle
I0319 17:15:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 17:16:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:16:03.409785  543705 memory.go:184] no items to output this cycle
I0319 17:16:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:16:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:16:13.409794  543705 memory.go:191] Add success.
I0319 17:16:13.409797  543705 cpu.go:282] Add success.
W0319 17:16:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:16:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:16:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:16:13.420382  543705 net.go:648] Add success.
I0319 17:16:13.423212  543705 net.go:770] primary dev: ETH0
I0319 17:16:13.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:16:13.423241  543705 net.go:698] Add success.
I0319 17:16:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:16:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:16:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0319 17:16:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:16:14.459201  543705 disk_worker.go:494] system disk:vda1
I0319 17:16:14.459230  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:16:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:16:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:16:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:16:20.169685  543705 disk_info.go:125] begin check local disk info of client
I0319 17:16:20.172146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:16:20.172154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa180 0xc0001aa1c0]
E0319 17:16:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:16:23.409801  543705 memory.go:184] no items to output this cycle
I0319 17:16:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 17:16:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:16:33.409783  543705 memory.go:184] no items to output this cycle
I0319 17:16:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 17:16:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:16:43.409819  543705 cpu.go:282] Add success.
I0319 17:16:43.409829  543705 memory.go:191] Add success.
I0319 17:16:43.420067  543705 net.go:648] Add success.
I0319 17:16:43.422892  543705 net.go:770] primary dev: ETH0
I0319 17:16:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:16:43.422920  543705 net.go:698] Add success.
I0319 17:16:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:16:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:16:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:16:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:16:53.409809  543705 memory.go:184] no items to output this cycle
I0319 17:16:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 17:17:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:17:03.409769  543705 memory.go:184] no items to output this cycle
I0319 17:17:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 17:17:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:17:13.409796  543705 cpu.go:282] Add success.
I0319 17:17:13.409800  543705 memory.go:191] Add success.
W0319 17:17:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:17:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:17:13.420080  543705 net.go:648] Add success.
I0319 17:17:13.422817  543705 net.go:770] primary dev: ETH0
I0319 17:17:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:17:13.422844  543705 net.go:698] Add success.
I0319 17:17:13.453505  543705 event_worker.go:152] Polling the log file for events...
W0319 17:17:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:17:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0319 17:17:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:17:14.456188  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:17:14.456196  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:17:14.456202  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:17:14.457028  543705 disk_worker.go:494] system disk:vda1
I0319 17:17:14.457068  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:17:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:17:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:17:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:17:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:17:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:17:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:17:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:17:20.173677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:17:20.176096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:17:20.176102  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462780 0xc0004627c0]
E0319 17:17:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:17:23.409767  543705 memory.go:184] no items to output this cycle
I0319 17:17:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 17:17:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:17:33.409795  543705 memory.go:184] no items to output this cycle
I0319 17:17:33.409807  543705 cpu.go:275] no items to output this cycle
E0319 17:17:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:17:43.409786  543705 memory.go:191] Add success.
I0319 17:17:43.409826  543705 cpu.go:282] Add success.
I0319 17:17:43.419970  543705 net.go:648] Add success.
I0319 17:17:43.422705  543705 net.go:770] primary dev: ETH0
I0319 17:17:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:17:43.422732  543705 net.go:698] Add success.
I0319 17:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:17:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:17:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:17:53.410500  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:17:53.410519  543705 memory.go:184] no items to output this cycle
I0319 17:17:53.410521  543705 cpu.go:275] no items to output this cycle
E0319 17:18:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:18:03.409805  543705 memory.go:184] no items to output this cycle
I0319 17:18:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 17:18:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:18:13.409781  543705 memory.go:191] Add success.
W0319 17:18:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:18:13.409808  543705 cpu.go:282] Add success.
W0319 17:18:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:18:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:18:13.420263  543705 net.go:648] Add success.
I0319 17:18:13.423167  543705 net.go:770] primary dev: ETH0
I0319 17:18:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:18:13.423196  543705 net.go:698] Add success.
I0319 17:18:13.469890  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a03be92-c2b4-4e13-836c-76aa87292c9a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:18:13.469931  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:18:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:18:14.455296  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:18:14.455310  543705 disk_worker.go:708] disk space is not compliant
W0319 17:18:14.455315  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:18:14.456943  543705 disk_worker.go:494] system disk:vda1
I0319 17:18:14.456988  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:18:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:18:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:18:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:18:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:18:20.177677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:18:20.180185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:18:20.180191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b01c0 0xc0002b0200]
E0319 17:18:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:18:23.409790  543705 memory.go:184] no items to output this cycle
I0319 17:18:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 17:18:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:18:33.409800  543705 memory.go:184] no items to output this cycle
I0319 17:18:33.409816  543705 cpu.go:275] no items to output this cycle
I0319 17:18:37.783611  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:18:37.783618  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:18:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:18:43.410664  543705 memory.go:191] Add success.
I0319 17:18:43.409826  543705 cpu.go:282] Add success.
I0319 17:18:43.420412  543705 net.go:648] Add success.
I0319 17:18:43.423028  543705 net.go:770] primary dev: ETH0
I0319 17:18:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:18:43.423054  543705 net.go:698] Add success.
I0319 17:18:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:18:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:18:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:18:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:18:53.409786  543705 memory.go:184] no items to output this cycle
I0319 17:18:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 17:19:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:19:03.409766  543705 memory.go:184] no items to output this cycle
I0319 17:19:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 17:19:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:19:13.409812  543705 memory.go:191] Add success.
I0319 17:19:13.409815  543705 cpu.go:282] Add success.
W0319 17:19:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:19:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:19:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:19:13.420054  543705 net.go:648] Add success.
I0319 17:19:13.423187  543705 net.go:770] primary dev: ETH0
I0319 17:19:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:19:13.423212  543705 net.go:698] Add success.
I0319 17:19:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:19:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:19:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0319 17:19:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:19:14.457084  543705 disk_worker.go:494] system disk:vda1
I0319 17:19:14.457119  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:19:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:19:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:19:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:19:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:19:16.472453  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:19:20.181678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:19:20.184276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:19:20.184283  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052a180 0xc00052a1c0]
E0319 17:19:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:19:23.409771  543705 memory.go:184] no items to output this cycle
I0319 17:19:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 17:19:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:19:33.409761  543705 memory.go:184] no items to output this cycle
I0319 17:19:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 17:19:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:19:43.409807  543705 memory.go:191] Add success.
I0319 17:19:43.409815  543705 cpu.go:282] Add success.
I0319 17:19:43.419868  543705 net.go:648] Add success.
I0319 17:19:43.422917  543705 net.go:770] primary dev: ETH0
I0319 17:19:43.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:19:43.422946  543705 net.go:698] Add success.
I0319 17:19:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:19:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:19:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:19:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:19:53.409808  543705 memory.go:184] no items to output this cycle
I0319 17:19:53.409829  543705 cpu.go:275] no items to output this cycle
E0319 17:20:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:20:03.409783  543705 memory.go:184] no items to output this cycle
I0319 17:20:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 17:20:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:20:13.409796  543705 memory.go:191] Add success.
I0319 17:20:13.409796  543705 cpu.go:282] Add success.
W0319 17:20:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:20:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:20:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:20:13.420173  543705 net.go:648] Add success.
I0319 17:20:13.423063  543705 net.go:770] primary dev: ETH0
I0319 17:20:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:20:13.423088  543705 net.go:698] Add success.
I0319 17:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:20:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:20:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 17:20:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:20:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 17:20:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:20:15.456016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:20:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:20:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:20:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:20:16.472514  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:20:20.185680  543705 disk_info.go:125] begin check local disk info of client
I0319 17:20:20.188237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:20:20.188243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352080 0xc0003520c0]
E0319 17:20:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:20:23.409773  543705 memory.go:184] no items to output this cycle
I0319 17:20:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 17:20:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:20:33.409767  543705 memory.go:184] no items to output this cycle
I0319 17:20:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 17:20:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:20:43.409795  543705 memory.go:191] Add success.
I0319 17:20:43.409796  543705 cpu.go:282] Add success.
I0319 17:20:43.419964  543705 net.go:648] Add success.
I0319 17:20:43.422717  543705 net.go:770] primary dev: ETH0
I0319 17:20:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:20:43.422746  543705 net.go:698] Add success.
I0319 17:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:20:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:20:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:20:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:20:53.409769  543705 memory.go:184] no items to output this cycle
I0319 17:20:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 17:21:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:21:03.409785  543705 memory.go:184] no items to output this cycle
I0319 17:21:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 17:21:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:21:13.409807  543705 memory.go:191] Add success.
I0319 17:21:13.409807  543705 cpu.go:282] Add success.
W0319 17:21:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:21:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:21:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:21:13.420208  543705 net.go:648] Add success.
I0319 17:21:13.422891  543705 net.go:770] primary dev: ETH0
I0319 17:21:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:21:13.422917  543705 net.go:698] Add success.
I0319 17:21:13.464060  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7fb0844-98c4-4295-be83-b1218642bf0a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:21:13.464093  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:21:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:21:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:21:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 17:21:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:21:14.456622  543705 disk_worker.go:494] system disk:vda1
I0319 17:21:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:21:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:21:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:21:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:21:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:21:16.472492  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:21:20.189685  543705 disk_info.go:125] begin check local disk info of client
I0319 17:21:20.192234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:21:20.192241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272980 0xc0002729c0]
E0319 17:21:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:21:23.409786  543705 memory.go:184] no items to output this cycle
I0319 17:21:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:21:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:21:33.409823  543705 memory.go:184] no items to output this cycle
I0319 17:21:33.409841  543705 cpu.go:275] no items to output this cycle
I0319 17:21:37.784619  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:21:37.784626  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:21:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:21:43.410639  543705 memory.go:191] Add success.
I0319 17:21:43.409814  543705 cpu.go:282] Add success.
I0319 17:21:43.420563  543705 net.go:648] Add success.
I0319 17:21:43.423107  543705 net.go:770] primary dev: ETH0
I0319 17:21:43.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:21:43.423133  543705 net.go:698] Add success.
I0319 17:21:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:21:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:21:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:21:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:21:53.409824  543705 memory.go:184] no items to output this cycle
I0319 17:21:53.409835  543705 cpu.go:275] no items to output this cycle
E0319 17:22:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:22:03.409813  543705 memory.go:184] no items to output this cycle
I0319 17:22:03.409829  543705 cpu.go:275] no items to output this cycle
E0319 17:22:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:22:13.409794  543705 memory.go:191] Add success.
I0319 17:22:13.409812  543705 cpu.go:282] Add success.
W0319 17:22:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:22:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:22:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:22:13.420060  543705 net.go:648] Add success.
I0319 17:22:13.423036  543705 net.go:770] primary dev: ETH0
I0319 17:22:13.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:22:13.423062  543705 net.go:698] Add success.
W0319 17:22:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:22:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 17:22:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:22:14.456136  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:22:14.456146  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:22:14.456152  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:22:14.456457  543705 disk_worker.go:494] system disk:vda1
I0319 17:22:14.456487  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:22:15.457038  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:22:15.457051  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:22:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:22:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:22:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:22:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:22:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:22:20.193680  543705 disk_info.go:125] begin check local disk info of client
I0319 17:22:20.196248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:22:20.196256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ec500 0xc0004ec540]
E0319 17:22:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:22:23.409812  543705 memory.go:184] no items to output this cycle
I0319 17:22:23.409825  543705 cpu.go:275] no items to output this cycle
E0319 17:22:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:22:33.409798  543705 cpu.go:275] no items to output this cycle
I0319 17:22:33.409801  543705 memory.go:184] no items to output this cycle
E0319 17:22:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:22:43.409822  543705 memory.go:191] Add success.
I0319 17:22:43.409830  543705 cpu.go:282] Add success.
I0319 17:22:43.419953  543705 net.go:648] Add success.
I0319 17:22:43.422708  543705 net.go:770] primary dev: ETH0
I0319 17:22:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:22:43.422734  543705 net.go:698] Add success.
I0319 17:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:22:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:22:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:22:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:22:53.409774  543705 memory.go:184] no items to output this cycle
I0319 17:22:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 17:23:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:23:03.409796  543705 memory.go:184] no items to output this cycle
I0319 17:23:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 17:23:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:23:13.409782  543705 memory.go:191] Add success.
I0319 17:23:13.409804  543705 cpu.go:282] Add success.
W0319 17:23:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:23:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:23:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:23:13.420124  543705 net.go:648] Add success.
I0319 17:23:13.423073  543705 net.go:770] primary dev: ETH0
I0319 17:23:13.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:23:13.423102  543705 net.go:698] Add success.
I0319 17:23:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:23:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:23:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0319 17:23:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:23:14.456627  543705 disk_worker.go:494] system disk:vda1
I0319 17:23:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:23:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:23:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:23:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:23:20.197678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:23:20.200256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:23:20.200263  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296080 0xc0002960c0]
E0319 17:23:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:23:23.409783  543705 cpu.go:275] no items to output this cycle
I0319 17:23:23.409792  543705 memory.go:184] no items to output this cycle
E0319 17:23:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:23:33.409782  543705 memory.go:184] no items to output this cycle
I0319 17:23:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:23:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:23:43.409796  543705 memory.go:191] Add success.
I0319 17:23:43.409798  543705 cpu.go:282] Add success.
I0319 17:23:43.419872  543705 net.go:648] Add success.
I0319 17:23:43.422358  543705 net.go:770] primary dev: ETH0
I0319 17:23:43.422370  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:23:43.422382  543705 net.go:698] Add success.
I0319 17:23:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:23:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:23:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:23:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:23:53.409785  543705 memory.go:184] no items to output this cycle
I0319 17:23:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 17:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:24:03.409778  543705 memory.go:184] no items to output this cycle
I0319 17:24:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 17:24:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:24:13.409801  543705 memory.go:191] Add success.
I0319 17:24:13.409800  543705 cpu.go:282] Add success.
W0319 17:24:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:24:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:24:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:24:13.420152  543705 net.go:648] Add success.
I0319 17:24:13.423051  543705 net.go:770] primary dev: ETH0
I0319 17:24:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:24:13.423076  543705 net.go:698] Add success.
I0319 17:24:13.469294  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e148f46-d143-4418-8a91-0ec92d630db3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:24:13.469331  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:24:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:24:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:24:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 17:24:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:24:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 17:24:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:24:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:24:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:24:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:24:16.472459  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:24:20.201678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:24:20.204253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:24:20.204259  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466100 0xc000466140]
E0319 17:24:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:24:23.409774  543705 memory.go:184] no items to output this cycle
I0319 17:24:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:24:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:24:33.409800  543705 memory.go:184] no items to output this cycle
I0319 17:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 17:24:37.784773  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:24:37.784779  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:24:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:24:43.410962  543705 memory.go:191] Add success.
I0319 17:24:43.409825  543705 cpu.go:282] Add success.
I0319 17:24:43.419703  543705 net.go:648] Add success.
I0319 17:24:43.422570  543705 net.go:770] primary dev: ETH0
I0319 17:24:43.422585  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:24:43.422599  543705 net.go:698] Add success.
I0319 17:24:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:24:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:24:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:24:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:24:53.409778  543705 memory.go:184] no items to output this cycle
I0319 17:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 17:25:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:25:03.409771  543705 memory.go:184] no items to output this cycle
I0319 17:25:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 17:25:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:25:13.409793  543705 memory.go:191] Add success.
I0319 17:25:13.409798  543705 cpu.go:282] Add success.
W0319 17:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:25:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:25:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:25:13.420148  543705 net.go:648] Add success.
I0319 17:25:13.422806  543705 net.go:770] primary dev: ETH0
I0319 17:25:13.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:25:13.422835  543705 net.go:698] Add success.
I0319 17:25:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:25:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:25:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 17:25:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:25:14.456515  543705 disk_worker.go:494] system disk:vda1
I0319 17:25:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:25:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:25:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:25:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:25:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:25:16.472107  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:25:20.206784  543705 disk_info.go:125] begin check local disk info of client
I0319 17:25:20.209400  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:25:20.209406  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4180 0xc0000c41c0]
E0319 17:25:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:25:23.409779  543705 memory.go:184] no items to output this cycle
I0319 17:25:23.409781  543705 cpu.go:275] no items to output this cycle
I0319 17:25:33.409776  543705 cpu.go:275] no items to output this cycle
E0319 17:25:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:25:33.409791  543705 memory.go:184] no items to output this cycle
E0319 17:25:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:25:43.409816  543705 memory.go:191] Add success.
I0319 17:25:43.409826  543705 cpu.go:282] Add success.
I0319 17:25:43.420019  543705 net.go:648] Add success.
I0319 17:25:43.422661  543705 net.go:770] primary dev: ETH0
I0319 17:25:43.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:25:43.422686  543705 net.go:698] Add success.
I0319 17:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:25:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:25:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:25:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:25:53.409784  543705 memory.go:184] no items to output this cycle
I0319 17:25:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:26:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:26:03.409775  543705 memory.go:184] no items to output this cycle
I0319 17:26:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 17:26:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:26:13.409814  543705 memory.go:191] Add success.
I0319 17:26:13.409823  543705 cpu.go:282] Add success.
W0319 17:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:26:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:26:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:26:13.420136  543705 net.go:648] Add success.
I0319 17:26:13.423043  543705 net.go:770] primary dev: ETH0
I0319 17:26:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:26:13.423074  543705 net.go:698] Add success.
I0319 17:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:26:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:26:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0319 17:26:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:26:14.456619  543705 disk_worker.go:494] system disk:vda1
I0319 17:26:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:26:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:26:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:26:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:26:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:26:20.209672  543705 disk_info.go:125] begin check local disk info of client
I0319 17:26:20.212293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:26:20.212300  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a600 0xc00032a640]
E0319 17:26:23.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:26:23.409908  543705 memory.go:184] no items to output this cycle
I0319 17:26:23.409989  543705 cpu.go:275] no items to output this cycle
E0319 17:26:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:26:33.409802  543705 memory.go:184] no items to output this cycle
I0319 17:26:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 17:26:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:26:43.409814  543705 memory.go:191] Add success.
I0319 17:26:43.409820  543705 cpu.go:282] Add success.
I0319 17:26:43.419955  543705 net.go:648] Add success.
I0319 17:26:43.422873  543705 net.go:770] primary dev: ETH0
I0319 17:26:43.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:26:43.422903  543705 net.go:698] Add success.
I0319 17:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:26:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:26:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:26:53.409781  543705 memory.go:184] no items to output this cycle
I0319 17:26:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 17:27:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:27:03.409815  543705 memory.go:184] no items to output this cycle
I0319 17:27:03.409825  543705 cpu.go:275] no items to output this cycle
E0319 17:27:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:27:13.409788  543705 memory.go:191] Add success.
I0319 17:27:13.409813  543705 cpu.go:282] Add success.
W0319 17:27:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:27:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:27:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:27:13.420151  543705 net.go:648] Add success.
I0319 17:27:13.423063  543705 net.go:770] primary dev: ETH0
I0319 17:27:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:27:13.423093  543705 net.go:698] Add success.
I0319 17:27:13.429697  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 17:27:13.452813  543705 event_worker.go:152] Polling the log file for events...
I0319 17:27:13.463581  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f74adf20-6192-4be6-bb41-82951dceb9da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:27:13.463616  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 17:27:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:27:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 17:27:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:27:14.456917  543705 disk_worker.go:494] system disk:vda1
E0319 17:27:14.456916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:27:14.456926  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:27:14.456931  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:27:14.456951  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:27:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:27:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:27:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:27:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:27:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:27:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:27:16.472309  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:27:20.213679  543705 disk_info.go:125] begin check local disk info of client
I0319 17:27:20.216236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:27:20.216242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f4280 0xc0001f42c0]
E0319 17:27:23.410274  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:27:23.410294  543705 memory.go:184] no items to output this cycle
I0319 17:27:23.410297  543705 cpu.go:275] no items to output this cycle
E0319 17:27:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:27:33.409766  543705 memory.go:184] no items to output this cycle
I0319 17:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0319 17:27:37.785733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:27:37.785739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:27:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:27:43.410761  543705 memory.go:191] Add success.
I0319 17:27:43.409821  543705 cpu.go:282] Add success.
I0319 17:27:43.420543  543705 net.go:648] Add success.
I0319 17:27:43.424425  543705 net.go:770] primary dev: ETH0
I0319 17:27:43.424439  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:27:43.424454  543705 net.go:698] Add success.
I0319 17:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:27:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:27:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:27:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:27:53.409771  543705 memory.go:184] no items to output this cycle
I0319 17:27:53.409793  543705 cpu.go:275] no items to output this cycle
E0319 17:28:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:28:03.409794  543705 memory.go:184] no items to output this cycle
I0319 17:28:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 17:28:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:28:13.409790  543705 memory.go:191] Add success.
I0319 17:28:13.409811  543705 cpu.go:282] Add success.
W0319 17:28:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:28:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:28:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:28:13.420170  543705 net.go:648] Add success.
I0319 17:28:13.423199  543705 net.go:770] primary dev: ETH0
I0319 17:28:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:28:13.423226  543705 net.go:698] Add success.
I0319 17:28:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:28:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:28:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0319 17:28:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:28:14.456504  543705 disk_worker.go:494] system disk:vda1
I0319 17:28:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:28:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:28:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:28:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:28:16.472430  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:28:20.217684  543705 disk_info.go:125] begin check local disk info of client
I0319 17:28:20.220260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:28:20.220267  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e100 0xc00039e140]
E0319 17:28:23.410710  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:28:23.410726  543705 memory.go:184] no items to output this cycle
I0319 17:28:23.410729  543705 cpu.go:275] no items to output this cycle
E0319 17:28:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:28:33.409775  543705 memory.go:184] no items to output this cycle
I0319 17:28:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 17:28:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:28:43.409794  543705 memory.go:191] Add success.
I0319 17:28:43.409798  543705 cpu.go:282] Add success.
I0319 17:28:43.420003  543705 net.go:648] Add success.
I0319 17:28:43.422669  543705 net.go:770] primary dev: ETH0
I0319 17:28:43.422683  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:28:43.422698  543705 net.go:698] Add success.
I0319 17:28:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:28:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:28:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:28:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:28:53.409807  543705 memory.go:184] no items to output this cycle
I0319 17:28:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 17:29:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:29:03.409773  543705 memory.go:184] no items to output this cycle
I0319 17:29:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 17:29:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:29:13.409818  543705 memory.go:191] Add success.
I0319 17:29:13.409825  543705 cpu.go:282] Add success.
W0319 17:29:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:29:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:29:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:29:13.420415  543705 net.go:648] Add success.
I0319 17:29:13.423433  543705 net.go:770] primary dev: ETH0
I0319 17:29:13.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:29:13.423458  543705 net.go:698] Add success.
I0319 17:29:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:29:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:29:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 17:29:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:29:14.456600  543705 disk_worker.go:494] system disk:vda1
I0319 17:29:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:29:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:29:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:29:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:29:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:29:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:29:20.221671  543705 disk_info.go:125] begin check local disk info of client
I0319 17:29:20.224326  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:29:20.224333  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f1040 0xc0000f1080]
E0319 17:29:23.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:29:23.409880  543705 memory.go:184] no items to output this cycle
I0319 17:29:23.409899  543705 cpu.go:275] no items to output this cycle
E0319 17:29:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:29:33.409778  543705 memory.go:184] no items to output this cycle
I0319 17:29:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 17:29:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:29:43.409786  543705 memory.go:191] Add success.
I0319 17:29:43.409787  543705 cpu.go:282] Add success.
I0319 17:29:43.419888  543705 net.go:648] Add success.
I0319 17:29:43.422440  543705 net.go:770] primary dev: ETH0
I0319 17:29:43.422453  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:29:43.422465  543705 net.go:698] Add success.
I0319 17:29:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:29:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:29:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:29:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:29:53.409782  543705 memory.go:184] no items to output this cycle
I0319 17:29:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:30:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:30:03.409782  543705 memory.go:184] no items to output this cycle
I0319 17:30:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 17:30:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:30:13.409792  543705 memory.go:191] Add success.
W0319 17:30:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:30:13.409824  543705 cpu.go:282] Add success.
W0319 17:30:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:30:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:30:13.420165  543705 net.go:648] Add success.
I0319 17:30:13.423092  543705 net.go:770] primary dev: ETH0
I0319 17:30:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:30:13.423122  543705 net.go:698] Add success.
I0319 17:30:13.469918  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"097d2b83-ac18-40d1-af06-bd21f3334f38","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:30:13.469952  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:30:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:30:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:30:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 17:30:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:30:14.456602  543705 disk_worker.go:494] system disk:vda1
I0319 17:30:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:30:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:30:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:30:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:30:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:30:20.225676  543705 disk_info.go:125] begin check local disk info of client
I0319 17:30:20.228253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:30:20.228260  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475980 0xc0004759c0]
E0319 17:30:23.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:30:23.409884  543705 memory.go:184] no items to output this cycle
I0319 17:30:23.410036  543705 cpu.go:275] no items to output this cycle
E0319 17:30:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:30:33.409785  543705 memory.go:184] no items to output this cycle
I0319 17:30:33.409800  543705 cpu.go:275] no items to output this cycle
I0319 17:30:37.785874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:30:37.785881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:30:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:30:43.410650  543705 memory.go:191] Add success.
I0319 17:30:43.409821  543705 cpu.go:282] Add success.
I0319 17:30:43.420438  543705 net.go:648] Add success.
I0319 17:30:43.423282  543705 net.go:770] primary dev: ETH0
I0319 17:30:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:30:43.423309  543705 net.go:698] Add success.
I0319 17:30:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:30:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:30:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:30:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:30:53.410254  543705 memory.go:184] no items to output this cycle
I0319 17:30:53.410275  543705 cpu.go:275] no items to output this cycle
E0319 17:31:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:31:03.409804  543705 memory.go:184] no items to output this cycle
I0319 17:31:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 17:31:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:31:13.409791  543705 memory.go:191] Add success.
I0319 17:31:13.409813  543705 cpu.go:282] Add success.
W0319 17:31:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:31:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:31:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:31:13.420253  543705 net.go:648] Add success.
I0319 17:31:13.423364  543705 net.go:770] primary dev: ETH0
I0319 17:31:13.423377  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:31:13.423389  543705 net.go:698] Add success.
I0319 17:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:31:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:31:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 17:31:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:31:14.456515  543705 disk_worker.go:494] system disk:vda1
I0319 17:31:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:31:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:31:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:31:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:31:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:31:20.229682  543705 disk_info.go:125] begin check local disk info of client
I0319 17:31:20.232254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:31:20.232261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0319 17:31:23.410408  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:31:23.410423  543705 cpu.go:275] no items to output this cycle
I0319 17:31:23.410425  543705 memory.go:184] no items to output this cycle
E0319 17:31:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:31:33.409792  543705 memory.go:184] no items to output this cycle
I0319 17:31:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 17:31:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:31:43.409814  543705 memory.go:191] Add success.
I0319 17:31:43.409823  543705 cpu.go:282] Add success.
I0319 17:31:43.419954  543705 net.go:648] Add success.
I0319 17:31:43.422852  543705 net.go:770] primary dev: ETH0
I0319 17:31:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:31:43.422881  543705 net.go:698] Add success.
I0319 17:31:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:31:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:31:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:31:53.409781  543705 cpu.go:275] no items to output this cycle
I0319 17:31:53.409784  543705 memory.go:184] no items to output this cycle
E0319 17:32:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:32:03.409799  543705 memory.go:184] no items to output this cycle
I0319 17:32:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:32:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:32:13.409783  543705 memory.go:191] Add success.
I0319 17:32:13.409804  543705 cpu.go:282] Add success.
W0319 17:32:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:32:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:32:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:32:13.420270  543705 net.go:648] Add success.
I0319 17:32:13.423323  543705 net.go:770] primary dev: ETH0
I0319 17:32:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:32:13.423352  543705 net.go:698] Add success.
W0319 17:32:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:32:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 17:32:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:32:14.456131  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:32:14.456141  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:32:14.456147  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:32:14.456462  543705 disk_worker.go:494] system disk:vda1
I0319 17:32:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:32:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:32:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:32:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:32:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:32:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:32:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:32:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:32:20.233676  543705 disk_info.go:125] begin check local disk info of client
I0319 17:32:20.236317  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:32:20.236324  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492e40 0xc000492e80]
E0319 17:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:32:23.409795  543705 memory.go:184] no items to output this cycle
I0319 17:32:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 17:32:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:32:33.409774  543705 memory.go:184] no items to output this cycle
I0319 17:32:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 17:32:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:32:43.409822  543705 memory.go:191] Add success.
I0319 17:32:43.409823  543705 cpu.go:282] Add success.
I0319 17:32:43.420019  543705 net.go:648] Add success.
I0319 17:32:43.423092  543705 net.go:770] primary dev: ETH0
I0319 17:32:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:32:43.423118  543705 net.go:698] Add success.
I0319 17:32:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:32:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:32:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:32:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:32:53.409772  543705 memory.go:184] no items to output this cycle
I0319 17:32:53.409820  543705 cpu.go:275] no items to output this cycle
E0319 17:33:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:33:03.409795  543705 memory.go:184] no items to output this cycle
I0319 17:33:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:33:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:33:13.409788  543705 memory.go:191] Add success.
I0319 17:33:13.409805  543705 cpu.go:282] Add success.
W0319 17:33:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:33:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:33:13.420098  543705 net.go:648] Add success.
I0319 17:33:13.422802  543705 net.go:770] primary dev: ETH0
I0319 17:33:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:33:13.422827  543705 net.go:698] Add success.
I0319 17:33:13.469582  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"33c42cab-3cb6-4aff-8c32-2b43496041e4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:33:13.469620  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:33:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:33:14.455344  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:33:14.455358  543705 disk_worker.go:708] disk space is not compliant
W0319 17:33:14.455361  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:33:14.457494  543705 disk_worker.go:494] system disk:vda1
I0319 17:33:14.457536  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:33:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:33:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:33:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:33:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:33:20.237678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:33:20.240221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:33:20.240228  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395000 0xc000395040]
E0319 17:33:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:33:23.409797  543705 memory.go:184] no items to output this cycle
I0319 17:33:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 17:33:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:33:33.409779  543705 memory.go:184] no items to output this cycle
I0319 17:33:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 17:33:37.786022  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:33:37.786028  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:33:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:33:43.410677  543705 memory.go:191] Add success.
I0319 17:33:43.409827  543705 cpu.go:282] Add success.
I0319 17:33:43.420464  543705 net.go:648] Add success.
I0319 17:33:43.423322  543705 net.go:770] primary dev: ETH0
I0319 17:33:43.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:33:43.423361  543705 net.go:698] Add success.
I0319 17:33:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:33:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:33:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:33:53.409778  543705 memory.go:184] no items to output this cycle
I0319 17:33:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 17:34:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:34:03.409804  543705 memory.go:184] no items to output this cycle
I0319 17:34:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 17:34:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:34:13.409783  543705 memory.go:191] Add success.
I0319 17:34:13.409808  543705 cpu.go:282] Add success.
W0319 17:34:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:34:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:34:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:34:13.420088  543705 net.go:648] Add success.
I0319 17:34:13.422882  543705 net.go:770] primary dev: ETH0
I0319 17:34:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:34:13.422912  543705 net.go:698] Add success.
I0319 17:34:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:34:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:34:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 17:34:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:34:14.456576  543705 disk_worker.go:494] system disk:vda1
I0319 17:34:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:34:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:34:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:34:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:34:16.472518  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:34:20.241687  543705 disk_info.go:125] begin check local disk info of client
I0319 17:34:20.244249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:34:20.244256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003562c0 0xc000356300]
E0319 17:34:23.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:34:23.410271  543705 memory.go:184] no items to output this cycle
I0319 17:34:23.410274  543705 cpu.go:275] no items to output this cycle
E0319 17:34:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:34:33.409803  543705 memory.go:184] no items to output this cycle
I0319 17:34:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:34:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:34:43.409812  543705 memory.go:191] Add success.
I0319 17:34:43.409825  543705 cpu.go:282] Add success.
I0319 17:34:43.419950  543705 net.go:648] Add success.
I0319 17:34:43.422881  543705 net.go:770] primary dev: ETH0
I0319 17:34:43.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:34:43.422907  543705 net.go:698] Add success.
I0319 17:34:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:34:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:34:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:34:53.409799  543705 memory.go:184] no items to output this cycle
I0319 17:34:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 17:35:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:35:03.409775  543705 memory.go:184] no items to output this cycle
I0319 17:35:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 17:35:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:35:13.409835  543705 memory.go:191] Add success.
I0319 17:35:13.409847  543705 cpu.go:282] Add success.
W0319 17:35:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:35:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:35:13.409896  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:35:13.420327  543705 net.go:648] Add success.
I0319 17:35:13.422947  543705 net.go:770] primary dev: ETH0
I0319 17:35:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:35:13.422973  543705 net.go:698] Add success.
I0319 17:35:14.453954  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:35:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:35:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0319 17:35:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:35:14.456560  543705 disk_worker.go:494] system disk:vda1
I0319 17:35:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:35:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:35:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:35:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:35:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:35:20.245680  543705 disk_info.go:125] begin check local disk info of client
I0319 17:35:20.248270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:35:20.248276  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278300 0xc000278340]
E0319 17:35:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:35:23.409797  543705 memory.go:184] no items to output this cycle
I0319 17:35:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 17:35:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:35:33.409803  543705 memory.go:184] no items to output this cycle
I0319 17:35:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 17:35:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:35:43.409773  543705 memory.go:191] Add success.
I0319 17:35:43.409805  543705 cpu.go:282] Add success.
I0319 17:35:43.419866  543705 net.go:648] Add success.
I0319 17:35:43.422771  543705 net.go:770] primary dev: ETH0
I0319 17:35:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:35:43.422801  543705 net.go:698] Add success.
I0319 17:35:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:35:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:35:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:35:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:35:53.409779  543705 memory.go:184] no items to output this cycle
I0319 17:35:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 17:36:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:36:03.409776  543705 memory.go:184] no items to output this cycle
I0319 17:36:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 17:36:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:36:13.409790  543705 cpu.go:282] Add success.
I0319 17:36:13.409795  543705 memory.go:191] Add success.
W0319 17:36:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:36:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:36:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:36:13.420211  543705 net.go:648] Add success.
I0319 17:36:13.422858  543705 net.go:770] primary dev: ETH0
I0319 17:36:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:36:13.422888  543705 net.go:698] Add success.
I0319 17:36:13.469641  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fdbc3cc3-3ad4-4d3c-aa91-cfa0478a935c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:36:13.469692  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:36:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:36:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:36:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 17:36:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:36:14.456680  543705 disk_worker.go:494] system disk:vda1
I0319 17:36:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:36:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:36:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:36:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:36:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:36:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:36:20.249679  543705 disk_info.go:125] begin check local disk info of client
I0319 17:36:20.252271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:36:20.252278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003904c0 0xc000390980]
E0319 17:36:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:36:23.409783  543705 memory.go:184] no items to output this cycle
I0319 17:36:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 17:36:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:36:33.409782  543705 memory.go:184] no items to output this cycle
I0319 17:36:33.409790  543705 cpu.go:275] no items to output this cycle
I0319 17:36:37.787637  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:36:37.787643  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:36:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:36:43.410659  543705 memory.go:191] Add success.
I0319 17:36:43.409800  543705 cpu.go:282] Add success.
I0319 17:36:43.420354  543705 net.go:648] Add success.
I0319 17:36:43.422929  543705 net.go:770] primary dev: ETH0
I0319 17:36:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:36:43.422954  543705 net.go:698] Add success.
I0319 17:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:36:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:36:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:36:53.409775  543705 memory.go:184] no items to output this cycle
I0319 17:36:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 17:37:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:37:03.409788  543705 memory.go:184] no items to output this cycle
I0319 17:37:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 17:37:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:37:13.409800  543705 memory.go:191] Add success.
I0319 17:37:13.409804  543705 cpu.go:282] Add success.
W0319 17:37:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:37:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:37:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:37:13.420310  543705 net.go:648] Add success.
I0319 17:37:13.423089  543705 net.go:770] primary dev: ETH0
I0319 17:37:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:37:13.423115  543705 net.go:698] Add success.
I0319 17:37:13.453673  543705 event_worker.go:152] Polling the log file for events...
W0319 17:37:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:37:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 17:37:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:37:14.458426  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:37:14.458462  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:37:14.458467  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:37:14.458455  543705 disk_worker.go:494] system disk:vda1
I0319 17:37:14.458500  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:37:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:37:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:37:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:37:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:37:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:37:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:37:16.472329  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:37:20.253689  543705 disk_info.go:125] begin check local disk info of client
I0319 17:37:20.256231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:37:20.256238  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e180 0xc00047e1c0]
E0319 17:37:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:37:23.409809  543705 memory.go:184] no items to output this cycle
I0319 17:37:23.409823  543705 cpu.go:275] no items to output this cycle
E0319 17:37:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:37:33.409806  543705 memory.go:184] no items to output this cycle
I0319 17:37:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 17:37:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:37:43.409799  543705 memory.go:191] Add success.
I0319 17:37:43.409821  543705 cpu.go:282] Add success.
I0319 17:37:43.419962  543705 net.go:648] Add success.
I0319 17:37:43.422701  543705 net.go:770] primary dev: ETH0
I0319 17:37:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:37:43.422730  543705 net.go:698] Add success.
I0319 17:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:37:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:37:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:37:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:37:53.409807  543705 memory.go:184] no items to output this cycle
I0319 17:37:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 17:38:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:38:03.409795  543705 memory.go:184] no items to output this cycle
I0319 17:38:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 17:38:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:38:13.409829  543705 memory.go:191] Add success.
I0319 17:38:13.409838  543705 cpu.go:282] Add success.
W0319 17:38:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:38:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:38:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:38:13.420221  543705 net.go:648] Add success.
I0319 17:38:13.423162  543705 net.go:770] primary dev: ETH0
I0319 17:38:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:38:13.423189  543705 net.go:698] Add success.
I0319 17:38:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:38:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:38:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 17:38:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:38:14.456589  543705 disk_worker.go:494] system disk:vda1
I0319 17:38:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:38:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:38:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:38:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:38:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:38:20.257680  543705 disk_info.go:125] begin check local disk info of client
I0319 17:38:20.260235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:38:20.260242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e0100 0xc0003e0140]
E0319 17:38:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:38:23.409778  543705 memory.go:184] no items to output this cycle
I0319 17:38:23.409820  543705 cpu.go:275] no items to output this cycle
E0319 17:38:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:38:33.409801  543705 memory.go:184] no items to output this cycle
I0319 17:38:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:38:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:38:43.409808  543705 memory.go:191] Add success.
I0319 17:38:43.409810  543705 cpu.go:282] Add success.
I0319 17:38:43.419958  543705 net.go:648] Add success.
I0319 17:38:43.422893  543705 net.go:770] primary dev: ETH0
I0319 17:38:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:38:43.422918  543705 net.go:698] Add success.
I0319 17:38:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:38:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:38:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:38:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:38:53.409785  543705 memory.go:184] no items to output this cycle
I0319 17:38:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 17:39:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:39:03.409773  543705 memory.go:184] no items to output this cycle
I0319 17:39:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 17:39:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:39:13.409829  543705 memory.go:191] Add success.
I0319 17:39:13.409836  543705 cpu.go:282] Add success.
W0319 17:39:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:39:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:39:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:39:13.420189  543705 net.go:648] Add success.
I0319 17:39:13.423236  543705 net.go:770] primary dev: ETH0
I0319 17:39:13.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:39:13.423262  543705 net.go:698] Add success.
I0319 17:39:13.468126  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"54c8413e-b96c-40cd-8e5f-953146984956","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:39:13.468158  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:39:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:39:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:39:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 17:39:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:39:14.456527  543705 disk_worker.go:494] system disk:vda1
I0319 17:39:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:39:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:39:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:39:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:39:20.261684  543705 disk_info.go:125] begin check local disk info of client
I0319 17:39:20.264211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:39:20.264218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474000 0xc000474040]
E0319 17:39:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:39:23.409766  543705 memory.go:184] no items to output this cycle
I0319 17:39:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 17:39:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:39:33.409769  543705 memory.go:184] no items to output this cycle
I0319 17:39:33.409795  543705 cpu.go:275] no items to output this cycle
I0319 17:39:37.788639  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:39:37.788646  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:39:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:39:43.410828  543705 memory.go:191] Add success.
I0319 17:39:43.409828  543705 cpu.go:282] Add success.
I0319 17:39:43.420538  543705 net.go:648] Add success.
I0319 17:39:43.423333  543705 net.go:770] primary dev: ETH0
I0319 17:39:43.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:39:43.423359  543705 net.go:698] Add success.
I0319 17:39:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:39:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:39:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:39:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:39:53.409784  543705 memory.go:184] no items to output this cycle
I0319 17:39:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 17:40:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:40:03.409776  543705 memory.go:184] no items to output this cycle
I0319 17:40:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 17:40:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:40:13.409786  543705 memory.go:191] Add success.
I0319 17:40:13.409803  543705 cpu.go:282] Add success.
W0319 17:40:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:40:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:40:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:40:13.420153  543705 net.go:648] Add success.
I0319 17:40:13.422986  543705 net.go:770] primary dev: ETH0
I0319 17:40:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:40:13.423013  543705 net.go:698] Add success.
I0319 17:40:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:40:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:40:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0319 17:40:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:40:14.456469  543705 disk_worker.go:494] system disk:vda1
I0319 17:40:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:40:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:40:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:40:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:40:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:40:20.265673  543705 disk_info.go:125] begin check local disk info of client
I0319 17:40:20.268214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:40:20.268220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7900 0xc0003b7940]
E0319 17:40:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:40:23.409873  543705 memory.go:184] no items to output this cycle
I0319 17:40:23.409979  543705 cpu.go:275] no items to output this cycle
E0319 17:40:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:40:33.409775  543705 memory.go:184] no items to output this cycle
I0319 17:40:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 17:40:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:40:43.409808  543705 cpu.go:282] Add success.
I0319 17:40:43.409817  543705 memory.go:191] Add success.
I0319 17:40:43.419982  543705 net.go:648] Add success.
I0319 17:40:43.422821  543705 net.go:770] primary dev: ETH0
I0319 17:40:43.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:40:43.422845  543705 net.go:698] Add success.
I0319 17:40:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:40:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:40:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:40:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:40:53.409778  543705 memory.go:184] no items to output this cycle
I0319 17:40:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:41:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:41:03.409779  543705 memory.go:184] no items to output this cycle
I0319 17:41:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 17:41:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:41:13.409809  543705 memory.go:191] Add success.
I0319 17:41:13.409815  543705 cpu.go:282] Add success.
W0319 17:41:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:41:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:41:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:41:13.420223  543705 net.go:648] Add success.
I0319 17:41:13.423146  543705 net.go:770] primary dev: ETH0
I0319 17:41:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:41:13.423171  543705 net.go:698] Add success.
I0319 17:41:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:41:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:41:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 17:41:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:41:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 17:41:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:41:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:41:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:41:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:41:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:41:20.269678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:41:20.272272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:41:20.272279  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003beb80 0xc0003bebc0]
E0319 17:41:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:41:23.409779  543705 memory.go:184] no items to output this cycle
I0319 17:41:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 17:41:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:41:33.409806  543705 memory.go:184] no items to output this cycle
I0319 17:41:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 17:41:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:41:43.409787  543705 memory.go:191] Add success.
I0319 17:41:43.409803  543705 cpu.go:282] Add success.
I0319 17:41:43.419881  543705 net.go:648] Add success.
I0319 17:41:43.422755  543705 net.go:770] primary dev: ETH0
I0319 17:41:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:41:43.422781  543705 net.go:698] Add success.
I0319 17:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:41:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:41:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:41:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:41:53.409799  543705 memory.go:184] no items to output this cycle
I0319 17:41:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 17:42:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:42:03.409787  543705 memory.go:184] no items to output this cycle
I0319 17:42:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 17:42:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:42:13.409784  543705 cpu.go:282] Add success.
I0319 17:42:13.409791  543705 memory.go:191] Add success.
W0319 17:42:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:42:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:42:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:42:13.420072  543705 net.go:648] Add success.
I0319 17:42:13.422777  543705 net.go:770] primary dev: ETH0
I0319 17:42:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:42:13.422805  543705 net.go:698] Add success.
I0319 17:42:13.468983  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cc245613-d879-4656-823d-3aebb3aa737c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:42:13.469018  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 17:42:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:42:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0319 17:42:14.455251  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:42:14.456093  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:42:14.456102  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:42:14.456107  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:42:14.457045  543705 disk_worker.go:494] system disk:vda1
I0319 17:42:14.457077  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:42:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:42:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:42:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:42:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:42:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:42:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:42:16.472333  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:42:20.273677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:42:20.276294  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:42:20.276301  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396fc0 0xc000397000]
E0319 17:42:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:42:23.409765  543705 memory.go:184] no items to output this cycle
I0319 17:42:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:42:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:42:33.409761  543705 memory.go:184] no items to output this cycle
I0319 17:42:33.409791  543705 cpu.go:275] no items to output this cycle
I0319 17:42:37.789672  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:42:37.789679  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:42:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:42:43.410758  543705 memory.go:191] Add success.
I0319 17:42:43.409788  543705 cpu.go:282] Add success.
I0319 17:42:43.420533  543705 net.go:648] Add success.
I0319 17:42:43.423436  543705 net.go:770] primary dev: ETH0
I0319 17:42:43.423451  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:42:43.423465  543705 net.go:698] Add success.
I0319 17:42:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:42:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:42:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:42:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:42:53.409772  543705 memory.go:184] no items to output this cycle
I0319 17:42:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 17:43:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:43:03.409776  543705 memory.go:184] no items to output this cycle
I0319 17:43:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 17:43:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:43:13.409781  543705 memory.go:191] Add success.
W0319 17:43:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:43:13.409809  543705 cpu.go:282] Add success.
W0319 17:43:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:43:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:43:13.420145  543705 net.go:648] Add success.
I0319 17:43:13.422908  543705 net.go:770] primary dev: ETH0
I0319 17:43:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:43:13.422933  543705 net.go:698] Add success.
I0319 17:43:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:43:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:43:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 17:43:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:43:14.456531  543705 disk_worker.go:494] system disk:vda1
I0319 17:43:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:43:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:43:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:43:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:43:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:43:20.277681  543705 disk_info.go:125] begin check local disk info of client
I0319 17:43:20.280267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:43:20.280274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004686c0 0xc000468700]
E0319 17:43:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:43:23.409798  543705 memory.go:184] no items to output this cycle
I0319 17:43:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 17:43:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:43:33.409786  543705 memory.go:184] no items to output this cycle
I0319 17:43:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 17:43:43.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:43:43.409949  543705 memory.go:191] Add success.
I0319 17:43:43.410204  543705 cpu.go:282] Add success.
I0319 17:43:43.419708  543705 net.go:648] Add success.
I0319 17:43:43.422563  543705 net.go:770] primary dev: ETH0
I0319 17:43:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:43:43.422588  543705 net.go:698] Add success.
I0319 17:43:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:43:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:43:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:43:53.409781  543705 memory.go:184] no items to output this cycle
I0319 17:43:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 17:44:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:44:03.409774  543705 memory.go:184] no items to output this cycle
I0319 17:44:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 17:44:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:44:13.409811  543705 memory.go:191] Add success.
I0319 17:44:13.409819  543705 cpu.go:282] Add success.
W0319 17:44:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:44:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:44:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:44:13.420199  543705 net.go:648] Add success.
I0319 17:44:13.422902  543705 net.go:770] primary dev: ETH0
I0319 17:44:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:44:13.422929  543705 net.go:698] Add success.
I0319 17:44:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:44:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:44:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 17:44:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:44:14.456517  543705 disk_worker.go:494] system disk:vda1
I0319 17:44:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:44:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:44:20.281677  543705 disk_info.go:125] begin check local disk info of client
I0319 17:44:20.284262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:44:20.284270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b66c0 0xc0003b6700]
E0319 17:44:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:44:23.409760  543705 memory.go:184] no items to output this cycle
I0319 17:44:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 17:44:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:44:33.409766  543705 memory.go:184] no items to output this cycle
I0319 17:44:33.409788  543705 cpu.go:275] no items to output this cycle
E0319 17:44:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:44:43.409793  543705 memory.go:191] Add success.
I0319 17:44:43.409811  543705 cpu.go:282] Add success.
I0319 17:44:43.419998  543705 net.go:648] Add success.
I0319 17:44:43.422696  543705 net.go:770] primary dev: ETH0
I0319 17:44:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:44:43.422723  543705 net.go:698] Add success.
I0319 17:44:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:44:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:44:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:44:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:44:53.409787  543705 memory.go:184] no items to output this cycle
I0319 17:44:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 17:45:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:45:03.409795  543705 memory.go:184] no items to output this cycle
I0319 17:45:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 17:45:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:45:13.409775  543705 memory.go:191] Add success.
W0319 17:45:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:45:13.409806  543705 cpu.go:282] Add success.
W0319 17:45:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:45:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:45:13.420136  543705 net.go:648] Add success.
I0319 17:45:13.423342  543705 net.go:770] primary dev: ETH0
I0319 17:45:13.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:45:13.423367  543705 net.go:698] Add success.
I0319 17:45:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:45:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:45:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 17:45:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:45:14.456563  543705 disk_worker.go:494] system disk:vda1
I0319 17:45:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:45:14.605854  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"66ff2508-6f47-4817-8ce7-ff2c34ae1a3e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:45:14.605889  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:45:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:45:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:45:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:45:20.285678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:45:20.288332  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:45:20.288339  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a7c0 0xc00034a800]
E0319 17:45:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:45:23.409768  543705 memory.go:184] no items to output this cycle
I0319 17:45:23.409800  543705 cpu.go:275] no items to output this cycle
I0319 17:45:33.409905  543705 cpu.go:275] no items to output this cycle
E0319 17:45:33.409922  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:45:33.409996  543705 memory.go:184] no items to output this cycle
I0319 17:45:37.789813  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:45:37.789819  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:45:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:45:43.410813  543705 memory.go:191] Add success.
I0319 17:45:43.409819  543705 cpu.go:282] Add success.
I0319 17:45:43.420603  543705 net.go:648] Add success.
I0319 17:45:43.423632  543705 net.go:770] primary dev: ETH0
I0319 17:45:43.423644  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:45:43.423656  543705 net.go:698] Add success.
I0319 17:45:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:45:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:45:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:45:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:45:53.409775  543705 memory.go:184] no items to output this cycle
I0319 17:45:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:46:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:46:03.409778  543705 memory.go:184] no items to output this cycle
I0319 17:46:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 17:46:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:46:13.409791  543705 memory.go:191] Add success.
I0319 17:46:13.409800  543705 cpu.go:282] Add success.
W0319 17:46:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:46:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:46:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:46:13.420128  543705 net.go:648] Add success.
I0319 17:46:13.422855  543705 net.go:770] primary dev: ETH0
I0319 17:46:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:46:13.422884  543705 net.go:698] Add success.
I0319 17:46:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:46:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:46:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0319 17:46:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:46:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 17:46:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:46:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:46:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:46:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:46:20.289680  543705 disk_info.go:125] begin check local disk info of client
I0319 17:46:20.292288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:46:20.292294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005e8f00 0xc0005e8f40]
E0319 17:46:23.410349  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:46:23.410369  543705 memory.go:184] no items to output this cycle
I0319 17:46:23.410398  543705 cpu.go:275] no items to output this cycle
E0319 17:46:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:46:33.409768  543705 memory.go:184] no items to output this cycle
I0319 17:46:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:46:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:46:43.409796  543705 memory.go:191] Add success.
I0319 17:46:43.409796  543705 cpu.go:282] Add success.
I0319 17:46:43.419982  543705 net.go:648] Add success.
I0319 17:46:43.422589  543705 net.go:770] primary dev: ETH0
I0319 17:46:43.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:46:43.422615  543705 net.go:698] Add success.
I0319 17:46:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:46:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:46:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:46:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:46:53.409773  543705 memory.go:184] no items to output this cycle
I0319 17:46:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 17:47:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:47:03.409808  543705 memory.go:184] no items to output this cycle
I0319 17:47:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 17:47:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:47:13.409814  543705 memory.go:191] Add success.
I0319 17:47:13.409823  543705 cpu.go:282] Add success.
W0319 17:47:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:47:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:47:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:47:13.420162  543705 net.go:648] Add success.
I0319 17:47:13.422879  543705 net.go:770] primary dev: ETH0
I0319 17:47:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:47:13.422904  543705 net.go:698] Add success.
I0319 17:47:13.453472  543705 event_worker.go:152] Polling the log file for events...
W0319 17:47:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:47:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 17:47:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:47:14.455877  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:47:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:47:14.455891  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:47:14.456545  543705 disk_worker.go:494] system disk:vda1
I0319 17:47:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:47:15.456874  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:47:15.456883  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:47:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:47:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:47:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:47:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:47:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:47:20.293678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:47:20.296278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:47:20.296285  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029ebc0 0xc00029ec00]
E0319 17:47:23.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:47:23.410255  543705 memory.go:184] no items to output this cycle
I0319 17:47:23.410259  543705 cpu.go:275] no items to output this cycle
E0319 17:47:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:47:33.409781  543705 memory.go:184] no items to output this cycle
I0319 17:47:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 17:47:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:47:43.409785  543705 memory.go:191] Add success.
I0319 17:47:43.409820  543705 cpu.go:282] Add success.
I0319 17:47:43.419966  543705 net.go:648] Add success.
I0319 17:47:43.422956  543705 net.go:770] primary dev: ETH0
I0319 17:47:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:47:43.422981  543705 net.go:698] Add success.
I0319 17:47:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:47:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:47:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:47:53.410355  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:47:53.410374  543705 memory.go:184] no items to output this cycle
I0319 17:47:53.410391  543705 cpu.go:275] no items to output this cycle
E0319 17:48:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:48:03.409795  543705 memory.go:184] no items to output this cycle
I0319 17:48:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 17:48:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:48:13.409787  543705 memory.go:191] Add success.
I0319 17:48:13.409813  543705 cpu.go:282] Add success.
W0319 17:48:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:48:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:48:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:48:13.420156  543705 net.go:648] Add success.
I0319 17:48:13.423170  543705 net.go:770] primary dev: ETH0
I0319 17:48:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:48:13.423196  543705 net.go:698] Add success.
I0319 17:48:14.455132  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:48:14.455222  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:48:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0319 17:48:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:48:14.456666  543705 disk_worker.go:494] system disk:vda1
I0319 17:48:14.456701  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:48:14.643391  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab3dd75e-c69e-49bd-88c2-e9849a5354d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:48:14.643426  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:48:15.454982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:48:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:48:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:48:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:48:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:48:20.297681  543705 disk_info.go:125] begin check local disk info of client
I0319 17:48:20.300211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:48:20.300218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340a80 0xc000340ac0]
E0319 17:48:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:48:23.409805  543705 memory.go:184] no items to output this cycle
I0319 17:48:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 17:48:33.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:48:33.409916  543705 memory.go:184] no items to output this cycle
I0319 17:48:33.409921  543705 cpu.go:275] no items to output this cycle
I0319 17:48:37.789955  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:48:37.789961  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:48:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:48:43.410749  543705 memory.go:191] Add success.
I0319 17:48:43.409808  543705 cpu.go:282] Add success.
I0319 17:48:43.420439  543705 net.go:648] Add success.
I0319 17:48:43.423180  543705 net.go:770] primary dev: ETH0
I0319 17:48:43.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:48:43.423211  543705 net.go:698] Add success.
I0319 17:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:48:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:48:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:48:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:48:53.409792  543705 memory.go:184] no items to output this cycle
I0319 17:48:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 17:49:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:49:03.409771  543705 memory.go:184] no items to output this cycle
I0319 17:49:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 17:49:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:49:13.409786  543705 memory.go:191] Add success.
I0319 17:49:13.409809  543705 cpu.go:282] Add success.
W0319 17:49:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:49:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:49:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:49:13.420241  543705 net.go:648] Add success.
I0319 17:49:13.423157  543705 net.go:770] primary dev: ETH0
I0319 17:49:13.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:49:13.423181  543705 net.go:698] Add success.
I0319 17:49:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:49:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:49:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 17:49:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:49:14.456623  543705 disk_worker.go:494] system disk:vda1
I0319 17:49:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:49:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:49:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:49:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:49:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:49:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:49:20.301686  543705 disk_info.go:125] begin check local disk info of client
I0319 17:49:20.304243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:49:20.304251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a41c0 0xc0002a4200]
E0319 17:49:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:49:23.409806  543705 memory.go:184] no items to output this cycle
I0319 17:49:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:49:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:49:33.409765  543705 memory.go:184] no items to output this cycle
I0319 17:49:33.409807  543705 cpu.go:275] no items to output this cycle
E0319 17:49:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:49:43.409814  543705 memory.go:191] Add success.
I0319 17:49:43.409819  543705 cpu.go:282] Add success.
I0319 17:49:43.419894  543705 net.go:648] Add success.
I0319 17:49:43.422664  543705 net.go:770] primary dev: ETH0
I0319 17:49:43.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:49:43.422689  543705 net.go:698] Add success.
I0319 17:49:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:49:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:49:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:49:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:49:53.409785  543705 cpu.go:275] no items to output this cycle
I0319 17:49:53.409789  543705 memory.go:184] no items to output this cycle
E0319 17:50:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:50:03.409785  543705 memory.go:184] no items to output this cycle
I0319 17:50:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 17:50:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:50:13.409813  543705 memory.go:191] Add success.
I0319 17:50:13.409822  543705 cpu.go:282] Add success.
W0319 17:50:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:50:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:50:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:50:13.420115  543705 net.go:648] Add success.
I0319 17:50:13.422984  543705 net.go:770] primary dev: ETH0
I0319 17:50:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:50:13.423008  543705 net.go:698] Add success.
I0319 17:50:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:50:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:50:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 17:50:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:50:14.456587  543705 disk_worker.go:494] system disk:vda1
I0319 17:50:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:50:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:50:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:50:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:50:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:50:16.472090  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:50:20.305696  543705 disk_info.go:125] begin check local disk info of client
I0319 17:50:20.308301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:50:20.308309  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee000 0xc0003ee040]
E0319 17:50:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:50:23.409782  543705 memory.go:184] no items to output this cycle
I0319 17:50:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 17:50:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:50:33.409791  543705 memory.go:184] no items to output this cycle
I0319 17:50:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 17:50:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:50:43.409805  543705 memory.go:191] Add success.
I0319 17:50:43.409805  543705 cpu.go:282] Add success.
I0319 17:50:43.419953  543705 net.go:648] Add success.
I0319 17:50:43.423092  543705 net.go:770] primary dev: ETH0
I0319 17:50:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:50:43.423118  543705 net.go:698] Add success.
I0319 17:50:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:50:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:50:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:50:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:50:53.409815  543705 memory.go:184] no items to output this cycle
I0319 17:50:53.409827  543705 cpu.go:275] no items to output this cycle
E0319 17:51:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:51:03.409792  543705 memory.go:184] no items to output this cycle
I0319 17:51:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 17:51:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:51:13.409818  543705 memory.go:191] Add success.
I0319 17:51:13.409830  543705 cpu.go:282] Add success.
W0319 17:51:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:51:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:51:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:51:13.420118  543705 net.go:648] Add success.
I0319 17:51:13.422915  543705 net.go:770] primary dev: ETH0
I0319 17:51:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:51:13.422944  543705 net.go:698] Add success.
I0319 17:51:13.469339  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a48e001b-2227-43e2-b734-f5027474855c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:51:13.469372  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:51:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:51:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 17:51:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:51:14.456509  543705 disk_worker.go:494] system disk:vda1
I0319 17:51:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:51:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:51:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:51:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:51:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:51:20.309684  543705 disk_info.go:125] begin check local disk info of client
I0319 17:51:20.312249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:51:20.312256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482ac0 0xc000482b00]
E0319 17:51:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:51:23.409809  543705 memory.go:184] no items to output this cycle
I0319 17:51:23.409822  543705 cpu.go:275] no items to output this cycle
E0319 17:51:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:51:33.409775  543705 memory.go:184] no items to output this cycle
I0319 17:51:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 17:51:37.790102  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:51:37.790109  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:51:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:51:43.410654  543705 memory.go:191] Add success.
I0319 17:51:43.409809  543705 cpu.go:282] Add success.
I0319 17:51:43.420372  543705 net.go:648] Add success.
I0319 17:51:43.423028  543705 net.go:770] primary dev: ETH0
I0319 17:51:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:51:43.423054  543705 net.go:698] Add success.
I0319 17:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:51:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:51:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:51:53.409815  543705 memory.go:184] no items to output this cycle
I0319 17:51:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 17:52:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:52:03.409797  543705 memory.go:184] no items to output this cycle
I0319 17:52:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 17:52:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:52:13.409810  543705 memory.go:191] Add success.
I0319 17:52:13.409827  543705 cpu.go:282] Add success.
W0319 17:52:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:52:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:52:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:52:13.420128  543705 net.go:648] Add success.
I0319 17:52:13.423094  543705 net.go:770] primary dev: ETH0
I0319 17:52:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:52:13.423124  543705 net.go:698] Add success.
W0319 17:52:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:52:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 17:52:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:52:14.456432  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:52:14.456458  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:52:14.456465  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:52:14.456856  543705 disk_worker.go:494] system disk:vda1
I0319 17:52:14.456900  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:52:15.456781  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:52:15.456789  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:52:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:52:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:52:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:52:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:52:16.472320  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:52:20.313681  543705 disk_info.go:125] begin check local disk info of client
I0319 17:52:20.316230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:52:20.316237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029ef80 0xc00029efc0]
E0319 17:52:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:52:23.409781  543705 memory.go:184] no items to output this cycle
I0319 17:52:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:52:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:52:33.409813  543705 memory.go:184] no items to output this cycle
I0319 17:52:33.409823  543705 cpu.go:275] no items to output this cycle
E0319 17:52:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:52:43.409776  543705 memory.go:191] Add success.
I0319 17:52:43.409807  543705 cpu.go:282] Add success.
I0319 17:52:43.419902  543705 net.go:648] Add success.
I0319 17:52:43.422855  543705 net.go:770] primary dev: ETH0
I0319 17:52:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:52:43.422892  543705 net.go:698] Add success.
I0319 17:52:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:52:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:52:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:52:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:52:53.410264  543705 memory.go:184] no items to output this cycle
I0319 17:52:53.410285  543705 cpu.go:275] no items to output this cycle
E0319 17:53:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:53:03.409799  543705 memory.go:184] no items to output this cycle
I0319 17:53:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 17:53:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:53:13.409786  543705 memory.go:191] Add success.
W0319 17:53:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:53:13.409812  543705 cpu.go:282] Add success.
W0319 17:53:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:53:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:53:13.420103  543705 net.go:648] Add success.
I0319 17:53:13.422941  543705 net.go:770] primary dev: ETH0
I0319 17:53:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:53:13.422966  543705 net.go:698] Add success.
W0319 17:53:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:53:14.455248  543705 disk_worker.go:708] disk space is not compliant
W0319 17:53:14.455253  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:53:14.458136  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:53:14.459122  543705 disk_worker.go:494] system disk:vda1
I0319 17:53:14.459165  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:53:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:53:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:53:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:53:16.472464  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:53:20.317678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:53:20.320232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:53:20.320239  543705 disk_info.go:196] parse disk info done, disk is : [0xc000291200 0xc000291240]
E0319 17:53:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:53:23.409795  543705 memory.go:184] no items to output this cycle
I0319 17:53:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 17:53:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:53:33.409801  543705 memory.go:184] no items to output this cycle
I0319 17:53:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 17:53:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:53:43.409815  543705 memory.go:191] Add success.
I0319 17:53:43.409823  543705 cpu.go:282] Add success.
I0319 17:53:43.419912  543705 net.go:648] Add success.
I0319 17:53:43.422571  543705 net.go:770] primary dev: ETH0
I0319 17:53:43.422586  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:53:43.422598  543705 net.go:698] Add success.
I0319 17:53:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:53:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:53:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:53:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:53:53.409800  543705 memory.go:184] no items to output this cycle
I0319 17:53:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 17:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:54:03.409774  543705 memory.go:184] no items to output this cycle
I0319 17:54:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 17:54:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:54:13.409806  543705 memory.go:191] Add success.
I0319 17:54:13.409814  543705 cpu.go:282] Add success.
W0319 17:54:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:54:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:54:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:54:13.420059  543705 net.go:648] Add success.
I0319 17:54:13.423156  543705 net.go:770] primary dev: ETH0
I0319 17:54:13.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:54:13.423180  543705 net.go:698] Add success.
I0319 17:54:13.464574  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5dfad9a6-f07b-45f9-be49-fb1a56ac55cc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:54:13.464610  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 17:54:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:54:14.455306  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:54:14.455320  543705 disk_worker.go:708] disk space is not compliant
W0319 17:54:14.455324  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:54:14.456950  543705 disk_worker.go:494] system disk:vda1
I0319 17:54:14.456992  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:54:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:54:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:54:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:54:20.321679  543705 disk_info.go:125] begin check local disk info of client
I0319 17:54:20.324249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:54:20.324255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe1c0 0xc0003fe200]
E0319 17:54:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:54:23.409771  543705 memory.go:184] no items to output this cycle
I0319 17:54:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 17:54:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:54:33.409773  543705 memory.go:184] no items to output this cycle
I0319 17:54:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 17:54:37.790253  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:54:37.790260  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:54:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:54:43.410698  543705 memory.go:191] Add success.
I0319 17:54:43.409820  543705 cpu.go:282] Add success.
I0319 17:54:43.420465  543705 net.go:648] Add success.
I0319 17:54:43.423304  543705 net.go:770] primary dev: ETH0
I0319 17:54:43.423318  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:54:43.423330  543705 net.go:698] Add success.
I0319 17:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:54:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:54:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:54:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:54:53.409785  543705 memory.go:184] no items to output this cycle
I0319 17:54:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 17:55:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:55:03.409778  543705 memory.go:184] no items to output this cycle
I0319 17:55:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 17:55:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:55:13.409791  543705 memory.go:191] Add success.
I0319 17:55:13.409797  543705 cpu.go:282] Add success.
W0319 17:55:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:55:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:55:13.420332  543705 net.go:648] Add success.
I0319 17:55:13.423247  543705 net.go:770] primary dev: ETH0
I0319 17:55:13.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:55:13.423271  543705 net.go:698] Add success.
I0319 17:55:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:55:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:55:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 17:55:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:55:14.456555  543705 disk_worker.go:494] system disk:vda1
I0319 17:55:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:55:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:55:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:55:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:55:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:55:20.325678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:55:20.328223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:55:20.328231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6280 0xc0003b6300]
E0319 17:55:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:55:23.409766  543705 memory.go:184] no items to output this cycle
I0319 17:55:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 17:55:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:55:33.409778  543705 memory.go:184] no items to output this cycle
I0319 17:55:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:55:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:55:43.409799  543705 memory.go:191] Add success.
I0319 17:55:43.409799  543705 cpu.go:282] Add success.
I0319 17:55:43.420013  543705 net.go:648] Add success.
I0319 17:55:43.422781  543705 net.go:770] primary dev: ETH0
I0319 17:55:43.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:55:43.422823  543705 net.go:698] Add success.
I0319 17:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:55:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:55:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:55:53.409792  543705 memory.go:184] no items to output this cycle
I0319 17:55:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 17:56:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:56:03.409776  543705 memory.go:184] no items to output this cycle
I0319 17:56:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:56:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:56:13.409776  543705 memory.go:191] Add success.
W0319 17:56:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:56:13.409811  543705 cpu.go:282] Add success.
W0319 17:56:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:56:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:56:13.420202  543705 net.go:648] Add success.
I0319 17:56:13.423021  543705 net.go:770] primary dev: ETH0
I0319 17:56:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:56:13.423045  543705 net.go:698] Add success.
I0319 17:56:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:56:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:56:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 17:56:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:56:14.456584  543705 disk_worker.go:494] system disk:vda1
I0319 17:56:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:56:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:56:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:56:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:56:20.329681  543705 disk_info.go:125] begin check local disk info of client
I0319 17:56:20.332218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:56:20.332226  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344a40 0xc000344a80]
E0319 17:56:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:56:23.409802  543705 memory.go:184] no items to output this cycle
I0319 17:56:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 17:56:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:56:33.409777  543705 memory.go:184] no items to output this cycle
I0319 17:56:33.409780  543705 cpu.go:275] no items to output this cycle
E0319 17:56:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:56:43.409781  543705 memory.go:191] Add success.
I0319 17:56:43.409815  543705 cpu.go:282] Add success.
I0319 17:56:43.419962  543705 net.go:648] Add success.
I0319 17:56:43.422622  543705 net.go:770] primary dev: ETH0
I0319 17:56:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:56:43.422649  543705 net.go:698] Add success.
I0319 17:56:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:56:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:56:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:56:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:56:53.409772  543705 memory.go:184] no items to output this cycle
I0319 17:56:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 17:57:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:57:03.409779  543705 memory.go:184] no items to output this cycle
I0319 17:57:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 17:57:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:57:13.409790  543705 memory.go:191] Add success.
I0319 17:57:13.409792  543705 cpu.go:282] Add success.
W0319 17:57:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:57:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:57:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:57:13.420122  543705 net.go:648] Add success.
I0319 17:57:13.428804  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 17:57:13.428880  543705 net.go:770] primary dev: ETH0
I0319 17:57:13.428894  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:57:13.428908  543705 net.go:698] Add success.
I0319 17:57:13.452770  543705 event_worker.go:152] Polling the log file for events...
I0319 17:57:13.780639  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"66a8badf-6602-4a10-a759-10ecc915bb9e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 17:57:13.780670  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 17:57:14.454851  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:57:14.454919  543705 disk_worker.go:708] disk space is not compliant
W0319 17:57:14.454924  543705 disk_worker.go:728] disk inode is not compliant
E0319 17:57:14.455620  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 17:57:14.455629  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 17:57:14.455634  543705 custom_config.go:64] query custom config with name: gpu
I0319 17:57:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 17:57:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 17:57:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 17:57:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:57:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 17:57:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 17:57:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:57:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:57:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:57:20.333679  543705 disk_info.go:125] begin check local disk info of client
I0319 17:57:20.336234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:57:20.336241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005204c0 0xc000520500]
E0319 17:57:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:57:23.409773  543705 memory.go:184] no items to output this cycle
I0319 17:57:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 17:57:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:57:33.409769  543705 memory.go:184] no items to output this cycle
I0319 17:57:33.409806  543705 cpu.go:275] no items to output this cycle
I0319 17:57:37.791654  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 17:57:37.791660  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 17:57:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:57:43.410775  543705 memory.go:191] Add success.
I0319 17:57:43.409808  543705 cpu.go:282] Add success.
I0319 17:57:43.420489  543705 net.go:648] Add success.
I0319 17:57:43.423286  543705 net.go:770] primary dev: ETH0
I0319 17:57:43.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:57:43.423321  543705 net.go:698] Add success.
I0319 17:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:57:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:57:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:57:53.409803  543705 memory.go:184] no items to output this cycle
I0319 17:57:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 17:58:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:58:03.409779  543705 memory.go:184] no items to output this cycle
I0319 17:58:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 17:58:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:58:13.409774  543705 memory.go:191] Add success.
W0319 17:58:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 17:58:13.409808  543705 cpu.go:282] Add success.
W0319 17:58:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:58:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:58:13.419724  543705 net.go:648] Add success.
I0319 17:58:13.422394  543705 net.go:770] primary dev: ETH0
I0319 17:58:13.422409  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:58:13.422422  543705 net.go:698] Add success.
I0319 17:58:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:58:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:58:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 17:58:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:58:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 17:58:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:58:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:58:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:58:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:58:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:58:20.337678  543705 disk_info.go:125] begin check local disk info of client
I0319 17:58:20.340218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:58:20.340225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a29c0 0xc0004a2a00]
E0319 17:58:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:58:23.409771  543705 memory.go:184] no items to output this cycle
I0319 17:58:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 17:58:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:58:33.409800  543705 memory.go:184] no items to output this cycle
I0319 17:58:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 17:58:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:58:43.409827  543705 memory.go:191] Add success.
I0319 17:58:43.409836  543705 cpu.go:282] Add success.
I0319 17:58:43.419967  543705 net.go:648] Add success.
I0319 17:58:43.422958  543705 net.go:770] primary dev: ETH0
I0319 17:58:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:58:43.422986  543705 net.go:698] Add success.
I0319 17:58:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:58:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:58:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:58:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:58:53.409784  543705 cpu.go:275] no items to output this cycle
I0319 17:58:53.409786  543705 memory.go:184] no items to output this cycle
E0319 17:59:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:59:03.409780  543705 memory.go:184] no items to output this cycle
I0319 17:59:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 17:59:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:59:13.409810  543705 memory.go:191] Add success.
I0319 17:59:13.409819  543705 cpu.go:282] Add success.
W0319 17:59:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 17:59:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 17:59:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 17:59:13.419726  543705 net.go:648] Add success.
I0319 17:59:13.422593  543705 net.go:770] primary dev: ETH0
I0319 17:59:13.422606  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:59:13.422617  543705 net.go:698] Add success.
I0319 17:59:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 17:59:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 17:59:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 17:59:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 17:59:14.456567  543705 disk_worker.go:494] system disk:vda1
I0319 17:59:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 17:59:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 17:59:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:59:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:59:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 17:59:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0319 17:59:20.341680  543705 disk_info.go:125] begin check local disk info of client
I0319 17:59:20.344230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 17:59:20.344237  543705 disk_info.go:196] parse disk info done, disk is : [0xc000578340 0xc000578380]
E0319 17:59:23.410227  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:59:23.410246  543705 memory.go:184] no items to output this cycle
I0319 17:59:23.410259  543705 cpu.go:275] no items to output this cycle
E0319 17:59:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:59:33.409778  543705 memory.go:184] no items to output this cycle
I0319 17:59:33.409779  543705 cpu.go:275] no items to output this cycle
E0319 17:59:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:59:43.409816  543705 memory.go:191] Add success.
I0319 17:59:43.409824  543705 cpu.go:282] Add success.
I0319 17:59:43.419863  543705 net.go:648] Add success.
I0319 17:59:43.422811  543705 net.go:770] primary dev: ETH0
I0319 17:59:43.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0319 17:59:43.422838  543705 net.go:698] Add success.
I0319 17:59:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 17:59:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 17:59:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 17:59:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 17:59:53.409783  543705 memory.go:184] no items to output this cycle
I0319 17:59:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 18:00:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:00:03.409782  543705 memory.go:184] no items to output this cycle
I0319 18:00:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 18:00:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:00:13.409804  543705 memory.go:191] Add success.
I0319 18:00:13.409816  543705 cpu.go:282] Add success.
W0319 18:00:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:00:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:00:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:00:13.420178  543705 net.go:648] Add success.
I0319 18:00:13.422922  543705 net.go:770] primary dev: ETH0
I0319 18:00:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:00:13.422947  543705 net.go:698] Add success.
I0319 18:00:13.463677  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e3b1341-04d8-4a2e-aeed-990eb4083210","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:00:13.463706  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:00:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:00:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0319 18:00:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:00:14.456475  543705 disk_worker.go:494] system disk:vda1
I0319 18:00:14.456517  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:00:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:00:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:00:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:00:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:00:20.345683  543705 disk_info.go:125] begin check local disk info of client
I0319 18:00:20.348287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:00:20.348295  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005fab40 0xc0005fab80]
E0319 18:00:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:00:23.409770  543705 memory.go:184] no items to output this cycle
I0319 18:00:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 18:00:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:00:33.409774  543705 memory.go:184] no items to output this cycle
I0319 18:00:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 18:00:37.792656  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:00:37.792663  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:00:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:00:43.410619  543705 memory.go:191] Add success.
I0319 18:00:43.409820  543705 cpu.go:282] Add success.
I0319 18:00:43.420331  543705 net.go:648] Add success.
I0319 18:00:43.422908  543705 net.go:770] primary dev: ETH0
I0319 18:00:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:00:43.422937  543705 net.go:698] Add success.
I0319 18:00:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:00:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:00:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:00:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:00:53.409774  543705 memory.go:184] no items to output this cycle
I0319 18:00:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 18:01:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:01:03.409780  543705 memory.go:184] no items to output this cycle
I0319 18:01:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 18:01:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:01:13.409788  543705 memory.go:191] Add success.
W0319 18:01:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 18:01:13.409814  543705 cpu.go:282] Add success.
W0319 18:01:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:01:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:01:13.420319  543705 net.go:648] Add success.
I0319 18:01:13.423101  543705 net.go:770] primary dev: ETH0
I0319 18:01:13.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:01:13.423129  543705 net.go:698] Add success.
I0319 18:01:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:01:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:01:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 18:01:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:01:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 18:01:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:01:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:01:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:01:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:01:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:01:20.351524  543705 disk_info.go:125] begin check local disk info of client
I0319 18:01:20.354127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:01:20.354134  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e780 0xc00035e7c0]
E0319 18:01:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:01:23.409781  543705 cpu.go:275] no items to output this cycle
I0319 18:01:23.409787  543705 memory.go:184] no items to output this cycle
E0319 18:01:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:01:33.409797  543705 memory.go:184] no items to output this cycle
I0319 18:01:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 18:01:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:01:43.409781  543705 memory.go:191] Add success.
I0319 18:01:43.409799  543705 cpu.go:282] Add success.
I0319 18:01:43.419858  543705 net.go:648] Add success.
I0319 18:01:43.422749  543705 net.go:770] primary dev: ETH0
I0319 18:01:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:01:43.422775  543705 net.go:698] Add success.
I0319 18:01:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:01:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:01:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:01:53.409804  543705 memory.go:184] no items to output this cycle
I0319 18:01:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 18:02:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:02:03.409775  543705 memory.go:184] no items to output this cycle
I0319 18:02:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 18:02:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:02:13.409787  543705 memory.go:191] Add success.
I0319 18:02:13.409790  543705 cpu.go:282] Add success.
W0319 18:02:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:02:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:02:13.420234  543705 net.go:648] Add success.
I0319 18:02:13.423524  543705 net.go:770] primary dev: ETH0
I0319 18:02:13.423537  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:02:13.423549  543705 net.go:698] Add success.
W0319 18:02:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:02:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 18:02:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:02:14.456832  543705 disk_worker.go:494] system disk:vda1
I0319 18:02:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:02:14.457679  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:02:14.457688  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:02:14.457694  543705 custom_config.go:64] query custom config with name: gpu
E0319 18:02:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:02:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:02:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:02:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:02:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:02:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:02:16.472325  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:02:20.357683  543705 disk_info.go:125] begin check local disk info of client
I0319 18:02:20.360301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:02:20.360309  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051a640 0xc00051a680]
E0319 18:02:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:02:23.409778  543705 cpu.go:275] no items to output this cycle
I0319 18:02:23.409787  543705 memory.go:184] no items to output this cycle
E0319 18:02:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:02:33.409801  543705 memory.go:184] no items to output this cycle
I0319 18:02:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 18:02:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:02:43.409791  543705 memory.go:191] Add success.
I0319 18:02:43.409820  543705 cpu.go:282] Add success.
I0319 18:02:43.419946  543705 net.go:648] Add success.
I0319 18:02:43.422906  543705 net.go:770] primary dev: ETH0
I0319 18:02:43.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:02:43.422932  543705 net.go:698] Add success.
I0319 18:02:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:02:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:02:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:02:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:02:53.409803  543705 memory.go:184] no items to output this cycle
I0319 18:02:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:03:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:03:03.409780  543705 memory.go:184] no items to output this cycle
I0319 18:03:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:03:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:03:13.409809  543705 memory.go:191] Add success.
I0319 18:03:13.409821  543705 cpu.go:282] Add success.
W0319 18:03:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:03:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:03:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:03:13.420113  543705 net.go:648] Add success.
I0319 18:03:13.422934  543705 net.go:770] primary dev: ETH0
I0319 18:03:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:03:13.422966  543705 net.go:698] Add success.
I0319 18:03:14.276746  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"383119da-1412-472f-8bbb-4489ca117023","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:03:14.276781  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:03:14.454706  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:03:14.454862  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:03:14.454872  543705 disk_worker.go:708] disk space is not compliant
W0319 18:03:14.454875  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:03:14.456207  543705 disk_worker.go:494] system disk:vda1
I0319 18:03:14.456260  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:03:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:03:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:03:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:03:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:03:16.472465  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:03:20.361682  543705 disk_info.go:125] begin check local disk info of client
I0319 18:03:20.364239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:03:20.364246  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460740 0xc000460780]
E0319 18:03:23.410257  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:03:23.410272  543705 memory.go:184] no items to output this cycle
I0319 18:03:23.410280  543705 cpu.go:275] no items to output this cycle
E0319 18:03:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:03:33.409771  543705 memory.go:184] no items to output this cycle
I0319 18:03:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 18:03:37.793732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:03:37.793738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:03:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:03:43.410676  543705 memory.go:191] Add success.
I0319 18:03:43.409820  543705 cpu.go:282] Add success.
I0319 18:03:43.420380  543705 net.go:648] Add success.
I0319 18:03:43.423281  543705 net.go:770] primary dev: ETH0
I0319 18:03:43.423294  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:03:43.423307  543705 net.go:698] Add success.
I0319 18:03:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:03:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:03:53.410189  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:03:53.410207  543705 memory.go:184] no items to output this cycle
I0319 18:03:53.410231  543705 cpu.go:275] no items to output this cycle
E0319 18:04:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:04:03.409793  543705 memory.go:184] no items to output this cycle
I0319 18:04:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 18:04:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:04:13.409785  543705 memory.go:191] Add success.
I0319 18:04:13.409806  543705 cpu.go:282] Add success.
W0319 18:04:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:04:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:04:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:04:13.420278  543705 net.go:648] Add success.
I0319 18:04:13.423121  543705 net.go:770] primary dev: ETH0
I0319 18:04:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:04:13.423149  543705 net.go:698] Add success.
I0319 18:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:04:14.455257  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:04:14.455329  543705 disk_worker.go:708] disk space is not compliant
W0319 18:04:14.455332  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:04:14.456779  543705 disk_worker.go:494] system disk:vda1
I0319 18:04:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:04:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:04:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:04:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:04:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:04:20.365685  543705 disk_info.go:125] begin check local disk info of client
I0319 18:04:20.368222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:04:20.368229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000516480 0xc0005164c0]
E0319 18:04:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:04:23.409799  543705 memory.go:184] no items to output this cycle
I0319 18:04:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 18:04:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:04:33.409778  543705 memory.go:184] no items to output this cycle
I0319 18:04:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:04:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:04:43.409789  543705 memory.go:191] Add success.
I0319 18:04:43.409804  543705 cpu.go:282] Add success.
I0319 18:04:43.419893  543705 net.go:648] Add success.
I0319 18:04:43.422698  543705 net.go:770] primary dev: ETH0
I0319 18:04:43.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:04:43.422724  543705 net.go:698] Add success.
I0319 18:04:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:04:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:04:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:04:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:04:53.409805  543705 memory.go:184] no items to output this cycle
I0319 18:04:53.409820  543705 cpu.go:275] no items to output this cycle
E0319 18:05:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:05:03.409785  543705 cpu.go:275] no items to output this cycle
I0319 18:05:03.409789  543705 memory.go:184] no items to output this cycle
E0319 18:05:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:05:13.409811  543705 memory.go:191] Add success.
I0319 18:05:13.409818  543705 cpu.go:282] Add success.
W0319 18:05:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:05:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:05:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:05:13.420054  543705 net.go:648] Add success.
I0319 18:05:13.422979  543705 net.go:770] primary dev: ETH0
I0319 18:05:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:05:13.423009  543705 net.go:698] Add success.
I0319 18:05:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:05:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:05:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 18:05:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:05:14.456490  543705 disk_worker.go:494] system disk:vda1
I0319 18:05:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:05:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:05:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:05:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:05:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:05:20.369682  543705 disk_info.go:125] begin check local disk info of client
I0319 18:05:20.372234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:05:20.372241  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a100 0xc00032a140]
E0319 18:05:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:05:23.409770  543705 memory.go:184] no items to output this cycle
I0319 18:05:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 18:05:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:05:33.409808  543705 memory.go:184] no items to output this cycle
I0319 18:05:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 18:05:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:05:43.409780  543705 memory.go:191] Add success.
I0319 18:05:43.409798  543705 cpu.go:282] Add success.
I0319 18:05:43.419881  543705 net.go:648] Add success.
I0319 18:05:43.422717  543705 net.go:770] primary dev: ETH0
I0319 18:05:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:05:43.422743  543705 net.go:698] Add success.
I0319 18:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:05:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:05:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:05:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:05:53.409780  543705 memory.go:184] no items to output this cycle
I0319 18:05:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:06:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:06:03.409770  543705 memory.go:184] no items to output this cycle
I0319 18:06:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 18:06:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:06:13.409787  543705 memory.go:191] Add success.
I0319 18:06:13.409805  543705 cpu.go:282] Add success.
W0319 18:06:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:06:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:06:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:06:13.420055  543705 net.go:648] Add success.
I0319 18:06:13.423927  543705 net.go:770] primary dev: ETH0
I0319 18:06:13.423939  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:06:13.423951  543705 net.go:698] Add success.
I0319 18:06:13.466503  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5688c392-168a-4280-8807-d47b4962eb00","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:06:13.466540  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:06:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:06:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:06:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 18:06:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:06:14.456847  543705 disk_worker.go:494] system disk:vda1
I0319 18:06:14.456876  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:06:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:06:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:06:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:06:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:06:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:06:20.373693  543705 disk_info.go:125] begin check local disk info of client
I0319 18:06:20.376240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:06:20.376247  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464e80 0xc000464ec0]
E0319 18:06:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:06:23.409803  543705 memory.go:184] no items to output this cycle
I0319 18:06:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:06:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:06:33.409786  543705 memory.go:184] no items to output this cycle
I0319 18:06:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 18:06:37.795668  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:06:37.795674  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:06:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:06:43.410744  543705 memory.go:191] Add success.
I0319 18:06:43.409796  543705 cpu.go:282] Add success.
I0319 18:06:43.420535  543705 net.go:648] Add success.
I0319 18:06:43.423300  543705 net.go:770] primary dev: ETH0
I0319 18:06:43.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:06:43.423325  543705 net.go:698] Add success.
I0319 18:06:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:06:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:06:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:06:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:06:53.409784  543705 memory.go:184] no items to output this cycle
I0319 18:06:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:07:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:07:03.409819  543705 memory.go:184] no items to output this cycle
I0319 18:07:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 18:07:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:07:13.409788  543705 memory.go:191] Add success.
W0319 18:07:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 18:07:13.409819  543705 cpu.go:282] Add success.
W0319 18:07:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:07:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:07:13.420428  543705 net.go:648] Add success.
I0319 18:07:13.423198  543705 net.go:770] primary dev: ETH0
I0319 18:07:13.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:07:13.423224  543705 net.go:698] Add success.
I0319 18:07:13.452883  543705 event_worker.go:152] Polling the log file for events...
W0319 18:07:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:07:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 18:07:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:07:14.456155  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:07:14.456165  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:07:14.456171  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:07:14.456427  543705 disk_worker.go:494] system disk:vda1
I0319 18:07:14.456459  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:07:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:07:15.456883  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:07:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:07:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:07:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:07:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:07:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:07:20.377685  543705 disk_info.go:125] begin check local disk info of client
I0319 18:07:20.380205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:07:20.380211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be200 0xc0002be240]
E0319 18:07:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:07:23.409805  543705 memory.go:184] no items to output this cycle
I0319 18:07:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:07:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:07:33.409818  543705 memory.go:184] no items to output this cycle
I0319 18:07:33.409834  543705 cpu.go:275] no items to output this cycle
E0319 18:07:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:07:43.409798  543705 memory.go:191] Add success.
I0319 18:07:43.409831  543705 cpu.go:282] Add success.
I0319 18:07:43.419897  543705 net.go:648] Add success.
I0319 18:07:43.422795  543705 net.go:770] primary dev: ETH0
I0319 18:07:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:07:43.422824  543705 net.go:698] Add success.
I0319 18:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:07:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:07:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:07:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:07:53.409815  543705 memory.go:184] no items to output this cycle
I0319 18:07:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 18:08:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:08:03.409792  543705 memory.go:184] no items to output this cycle
I0319 18:08:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 18:08:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:08:13.409796  543705 memory.go:191] Add success.
I0319 18:08:13.409806  543705 cpu.go:282] Add success.
W0319 18:08:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:08:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:08:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:08:13.420161  543705 net.go:648] Add success.
I0319 18:08:13.423019  543705 net.go:770] primary dev: ETH0
I0319 18:08:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:08:13.423043  543705 net.go:698] Add success.
I0319 18:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:08:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:08:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 18:08:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:08:14.456503  543705 disk_worker.go:494] system disk:vda1
I0319 18:08:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:08:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:08:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:08:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:08:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:08:20.381689  543705 disk_info.go:125] begin check local disk info of client
I0319 18:08:20.384244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:08:20.384251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4000 0xc0002a4400]
E0319 18:08:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:08:23.409810  543705 memory.go:184] no items to output this cycle
I0319 18:08:23.409823  543705 cpu.go:275] no items to output this cycle
E0319 18:08:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:08:33.409801  543705 memory.go:184] no items to output this cycle
I0319 18:08:33.409828  543705 cpu.go:275] no items to output this cycle
E0319 18:08:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:08:43.409786  543705 memory.go:191] Add success.
I0319 18:08:43.409810  543705 cpu.go:282] Add success.
I0319 18:08:43.419913  543705 net.go:648] Add success.
I0319 18:08:43.422707  543705 net.go:770] primary dev: ETH0
I0319 18:08:43.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:08:43.422736  543705 net.go:698] Add success.
I0319 18:08:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:08:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:08:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:08:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:08:53.409804  543705 memory.go:184] no items to output this cycle
I0319 18:08:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:09:03.409780  543705 memory.go:184] no items to output this cycle
I0319 18:09:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 18:09:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:09:13.409788  543705 memory.go:191] Add success.
I0319 18:09:13.409797  543705 cpu.go:282] Add success.
W0319 18:09:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:09:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:09:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:09:13.420122  543705 net.go:648] Add success.
I0319 18:09:13.423125  543705 net.go:770] primary dev: ETH0
I0319 18:09:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:09:13.423151  543705 net.go:698] Add success.
I0319 18:09:13.715845  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c364380-0560-4dd6-ae0d-ad9122267b45","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:09:13.715890  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:09:14.454606  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:09:14.454806  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:09:14.454816  543705 disk_worker.go:708] disk space is not compliant
W0319 18:09:14.454819  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:09:14.456181  543705 disk_worker.go:494] system disk:vda1
I0319 18:09:14.456236  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:09:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:09:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:09:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:09:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:09:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:09:20.385676  543705 disk_info.go:125] begin check local disk info of client
I0319 18:09:20.388220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:09:20.388226  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1c40 0xc0002a1c80]
E0319 18:09:23.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:09:23.409891  543705 memory.go:184] no items to output this cycle
I0319 18:09:23.409973  543705 cpu.go:275] no items to output this cycle
E0319 18:09:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:09:33.409802  543705 memory.go:184] no items to output this cycle
I0319 18:09:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 18:09:37.795809  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:09:37.795816  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:09:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:09:43.410607  543705 memory.go:191] Add success.
I0319 18:09:43.409815  543705 cpu.go:282] Add success.
I0319 18:09:43.420382  543705 net.go:648] Add success.
I0319 18:09:43.423043  543705 net.go:770] primary dev: ETH0
I0319 18:09:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:09:43.423072  543705 net.go:698] Add success.
I0319 18:09:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:09:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:09:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:09:53.409782  543705 memory.go:184] no items to output this cycle
I0319 18:09:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:10:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:10:03.409764  543705 memory.go:184] no items to output this cycle
I0319 18:10:03.409803  543705 cpu.go:275] no items to output this cycle
E0319 18:10:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:10:13.409795  543705 memory.go:191] Add success.
I0319 18:10:13.409795  543705 cpu.go:282] Add success.
W0319 18:10:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:10:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:10:13.420146  543705 net.go:648] Add success.
I0319 18:10:13.422973  543705 net.go:770] primary dev: ETH0
I0319 18:10:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:10:13.422999  543705 net.go:698] Add success.
I0319 18:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:10:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:10:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 18:10:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:10:14.456555  543705 disk_worker.go:494] system disk:vda1
I0319 18:10:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:10:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:10:20.389680  543705 disk_info.go:125] begin check local disk info of client
I0319 18:10:20.392241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:10:20.392249  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039fd40 0xc00039fd80]
E0319 18:10:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:10:23.409798  543705 memory.go:184] no items to output this cycle
I0319 18:10:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 18:10:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:10:33.409790  543705 memory.go:184] no items to output this cycle
I0319 18:10:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 18:10:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:10:43.409776  543705 memory.go:191] Add success.
I0319 18:10:43.409817  543705 cpu.go:282] Add success.
I0319 18:10:43.420004  543705 net.go:648] Add success.
I0319 18:10:43.423082  543705 net.go:770] primary dev: ETH0
I0319 18:10:43.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:10:43.423108  543705 net.go:698] Add success.
I0319 18:10:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:10:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:10:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:10:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:10:53.409769  543705 memory.go:184] no items to output this cycle
I0319 18:10:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 18:11:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:11:03.409760  543705 memory.go:184] no items to output this cycle
I0319 18:11:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 18:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:11:13.409790  543705 memory.go:191] Add success.
I0319 18:11:13.409795  543705 cpu.go:282] Add success.
W0319 18:11:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:11:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:11:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:11:13.420160  543705 net.go:648] Add success.
I0319 18:11:13.423322  543705 net.go:770] primary dev: ETH0
I0319 18:11:13.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:11:13.423354  543705 net.go:698] Add success.
I0319 18:11:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:11:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:11:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 18:11:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:11:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 18:11:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:11:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:11:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:11:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:11:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:11:20.393681  543705 disk_info.go:125] begin check local disk info of client
I0319 18:11:20.396458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:11:20.396466  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394340 0xc000394380]
E0319 18:11:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:11:23.409798  543705 memory.go:184] no items to output this cycle
I0319 18:11:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 18:11:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:11:33.409791  543705 memory.go:184] no items to output this cycle
I0319 18:11:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 18:11:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:11:43.409797  543705 memory.go:191] Add success.
I0319 18:11:43.409797  543705 cpu.go:282] Add success.
I0319 18:11:43.419975  543705 net.go:648] Add success.
I0319 18:11:43.422843  543705 net.go:770] primary dev: ETH0
I0319 18:11:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:11:43.422870  543705 net.go:698] Add success.
I0319 18:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:11:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:11:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:11:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:11:53.409775  543705 memory.go:184] no items to output this cycle
I0319 18:11:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 18:12:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:12:03.409777  543705 memory.go:184] no items to output this cycle
I0319 18:12:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:12:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:12:13.409778  543705 memory.go:191] Add success.
W0319 18:12:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 18:12:13.409810  543705 cpu.go:282] Add success.
W0319 18:12:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:12:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:12:13.420047  543705 net.go:648] Add success.
I0319 18:12:13.422873  543705 net.go:770] primary dev: ETH0
I0319 18:12:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:12:13.422899  543705 net.go:698] Add success.
I0319 18:12:13.463982  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b66597e4-e885-42a5-af03-b72889c1e6e0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:12:13.464013  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 18:12:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:12:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 18:12:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:12:14.456868  543705 disk_worker.go:494] system disk:vda1
E0319 18:12:14.456874  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:12:14.456882  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:12:14.456887  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:12:14.456909  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:12:15.456510  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:12:15.456518  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:12:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:12:16.457903  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:12:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:12:16.457976  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:12:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:12:20.397683  543705 disk_info.go:125] begin check local disk info of client
I0319 18:12:20.400321  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:12:20.400329  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462ac0 0xc000462b00]
E0319 18:12:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:12:23.409780  543705 memory.go:184] no items to output this cycle
I0319 18:12:23.409784  543705 cpu.go:275] no items to output this cycle
I0319 18:12:33.409886  543705 cpu.go:275] no items to output this cycle
E0319 18:12:33.409931  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:12:33.409951  543705 memory.go:184] no items to output this cycle
I0319 18:12:37.797682  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:12:37.797688  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:12:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:12:43.410770  543705 memory.go:191] Add success.
I0319 18:12:43.409798  543705 cpu.go:282] Add success.
I0319 18:12:43.420454  543705 net.go:648] Add success.
I0319 18:12:43.423404  543705 net.go:770] primary dev: ETH0
I0319 18:12:43.423419  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:12:43.423434  543705 net.go:698] Add success.
I0319 18:12:46.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:12:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:12:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:12:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:12:53.409787  543705 memory.go:184] no items to output this cycle
I0319 18:12:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 18:13:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:13:03.409770  543705 memory.go:184] no items to output this cycle
I0319 18:13:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 18:13:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:13:13.409777  543705 memory.go:191] Add success.
W0319 18:13:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:13:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:13:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:13:13.409827  543705 cpu.go:282] Add success.
I0319 18:13:13.420162  543705 net.go:648] Add success.
I0319 18:13:13.422814  543705 net.go:770] primary dev: ETH0
I0319 18:13:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:13:13.422843  543705 net.go:698] Add success.
I0319 18:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:13:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:13:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 18:13:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:13:14.456597  543705 disk_worker.go:494] system disk:vda1
I0319 18:13:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:13:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:13:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:13:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:13:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:13:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:13:20.401686  543705 disk_info.go:125] begin check local disk info of client
I0319 18:13:20.404272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:13:20.404280  543705 disk_info.go:196] parse disk info done, disk is : [0xc000579580 0xc0005795c0]
E0319 18:13:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:13:23.409811  543705 memory.go:184] no items to output this cycle
I0319 18:13:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:13:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:13:33.409793  543705 memory.go:184] no items to output this cycle
I0319 18:13:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 18:13:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:13:43.409823  543705 memory.go:191] Add success.
I0319 18:13:43.409825  543705 cpu.go:282] Add success.
I0319 18:13:43.419954  543705 net.go:648] Add success.
I0319 18:13:43.423105  543705 net.go:770] primary dev: ETH0
I0319 18:13:43.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:13:43.423130  543705 net.go:698] Add success.
I0319 18:13:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:13:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:13:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:13:53.409806  543705 memory.go:184] no items to output this cycle
I0319 18:13:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:14:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:14:03.409774  543705 memory.go:184] no items to output this cycle
I0319 18:14:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:14:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:14:13.409788  543705 memory.go:191] Add success.
I0319 18:14:13.409797  543705 cpu.go:282] Add success.
W0319 18:14:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:14:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:14:13.420161  543705 net.go:648] Add success.
I0319 18:14:13.423205  543705 net.go:770] primary dev: ETH0
I0319 18:14:13.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:14:13.423241  543705 net.go:698] Add success.
I0319 18:14:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:14:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:14:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0319 18:14:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:14:14.456503  543705 disk_worker.go:494] system disk:vda1
I0319 18:14:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:14:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:14:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:14:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:14:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:14:20.405697  543705 disk_info.go:125] begin check local disk info of client
I0319 18:14:20.408194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:14:20.408202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8480 0xc0003c84c0]
I0319 18:14:23.409895  543705 cpu.go:275] no items to output this cycle
E0319 18:14:23.409976  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:14:23.409992  543705 memory.go:184] no items to output this cycle
E0319 18:14:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:14:33.409791  543705 memory.go:184] no items to output this cycle
I0319 18:14:33.409801  543705 cpu.go:275] no items to output this cycle
E0319 18:14:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:14:43.409796  543705 memory.go:191] Add success.
I0319 18:14:43.409803  543705 cpu.go:282] Add success.
I0319 18:14:43.419961  543705 net.go:648] Add success.
I0319 18:14:43.422762  543705 net.go:770] primary dev: ETH0
I0319 18:14:43.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:14:43.422788  543705 net.go:698] Add success.
I0319 18:14:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:14:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:14:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:14:53.410257  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:14:53.410282  543705 memory.go:184] no items to output this cycle
I0319 18:14:53.410286  543705 cpu.go:275] no items to output this cycle
E0319 18:15:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:15:03.409777  543705 memory.go:184] no items to output this cycle
I0319 18:15:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 18:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:15:13.409779  543705 memory.go:191] Add success.
W0319 18:15:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:15:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:15:13.409817  543705 cpu.go:282] Add success.
I0319 18:15:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:15:13.420049  543705 net.go:648] Add success.
I0319 18:15:13.423277  543705 net.go:770] primary dev: ETH0
I0319 18:15:13.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:15:13.423302  543705 net.go:698] Add success.
I0319 18:15:13.468334  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c865989b-088d-4c99-915d-6d64718139ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:15:13.468378  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:15:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:15:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:15:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 18:15:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:15:14.456739  543705 disk_worker.go:494] system disk:vda1
I0319 18:15:14.456766  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:15:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:15:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:15:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:15:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:15:20.409700  543705 disk_info.go:125] begin check local disk info of client
I0319 18:15:20.412143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:15:20.412152  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2100 0xc0002a2140]
E0319 18:15:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:15:23.409782  543705 memory.go:184] no items to output this cycle
I0319 18:15:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 18:15:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:15:33.409781  543705 memory.go:184] no items to output this cycle
I0319 18:15:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 18:15:37.799684  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:15:37.799690  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:15:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:15:43.410680  543705 memory.go:191] Add success.
I0319 18:15:43.409808  543705 cpu.go:282] Add success.
I0319 18:15:43.420392  543705 net.go:648] Add success.
I0319 18:15:43.423271  543705 net.go:770] primary dev: ETH0
I0319 18:15:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:15:43.423297  543705 net.go:698] Add success.
I0319 18:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:15:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:15:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:15:53.409783  543705 memory.go:184] no items to output this cycle
I0319 18:15:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 18:16:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:16:03.409800  543705 memory.go:184] no items to output this cycle
I0319 18:16:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:16:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:16:13.409816  543705 memory.go:191] Add success.
I0319 18:16:13.409824  543705 cpu.go:282] Add success.
W0319 18:16:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:16:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:16:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:16:13.420137  543705 net.go:648] Add success.
I0319 18:16:13.423182  543705 net.go:770] primary dev: ETH0
I0319 18:16:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:16:13.423209  543705 net.go:698] Add success.
I0319 18:16:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:16:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:16:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 18:16:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:16:14.456542  543705 disk_worker.go:494] system disk:vda1
I0319 18:16:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:16:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:16:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:16:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:16:16.472489  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:16:20.412824  543705 disk_info.go:125] begin check local disk info of client
I0319 18:16:20.416301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:16:20.416311  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc380 0xc0004dc3c0]
E0319 18:16:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:16:23.409784  543705 cpu.go:275] no items to output this cycle
I0319 18:16:23.409795  543705 memory.go:184] no items to output this cycle
E0319 18:16:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:16:33.409774  543705 memory.go:184] no items to output this cycle
I0319 18:16:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 18:16:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:16:43.409816  543705 memory.go:191] Add success.
I0319 18:16:43.409824  543705 cpu.go:282] Add success.
I0319 18:16:43.419893  543705 net.go:648] Add success.
I0319 18:16:43.422522  543705 net.go:770] primary dev: ETH0
I0319 18:16:43.422535  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:16:43.422548  543705 net.go:698] Add success.
I0319 18:16:46.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:16:46.458102  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:16:46.458139  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:16:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:16:53.409806  543705 memory.go:184] no items to output this cycle
I0319 18:16:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:17:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:17:03.409780  543705 cpu.go:275] no items to output this cycle
I0319 18:17:03.409784  543705 memory.go:184] no items to output this cycle
E0319 18:17:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:17:13.409825  543705 memory.go:191] Add success.
I0319 18:17:13.409827  543705 cpu.go:282] Add success.
W0319 18:17:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:17:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:17:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:17:13.420127  543705 net.go:648] Add success.
I0319 18:17:13.422788  543705 net.go:770] primary dev: ETH0
I0319 18:17:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:17:13.422815  543705 net.go:698] Add success.
I0319 18:17:13.453352  543705 event_worker.go:152] Polling the log file for events...
W0319 18:17:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:17:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0319 18:17:14.455153  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:17:14.456884  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:17:14.456893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:17:14.456899  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:17:14.456971  543705 disk_worker.go:494] system disk:vda1
I0319 18:17:14.457014  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:17:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:17:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:17:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:17:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:17:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:17:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:17:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:17:20.416803  543705 disk_info.go:125] begin check local disk info of client
I0319 18:17:20.419344  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:17:20.419351  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e100 0xc00039e180]
E0319 18:17:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:17:23.409807  543705 memory.go:184] no items to output this cycle
I0319 18:17:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:17:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:17:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 18:17:33.409785  543705 memory.go:184] no items to output this cycle
E0319 18:17:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:17:43.409785  543705 memory.go:191] Add success.
I0319 18:17:43.409789  543705 cpu.go:282] Add success.
I0319 18:17:43.419867  543705 net.go:648] Add success.
I0319 18:17:43.422745  543705 net.go:770] primary dev: ETH0
I0319 18:17:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:17:43.422772  543705 net.go:698] Add success.
I0319 18:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:17:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:17:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:17:53.409785  543705 memory.go:184] no items to output this cycle
I0319 18:17:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 18:18:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:18:03.409778  543705 memory.go:184] no items to output this cycle
I0319 18:18:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 18:18:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:18:13.409781  543705 memory.go:191] Add success.
I0319 18:18:13.409804  543705 cpu.go:282] Add success.
W0319 18:18:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:18:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:18:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:18:13.420137  543705 net.go:648] Add success.
I0319 18:18:13.423038  543705 net.go:770] primary dev: ETH0
I0319 18:18:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:18:13.423064  543705 net.go:698] Add success.
I0319 18:18:13.468928  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"131617d5-1a29-45f2-a3ff-8419dfc132ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:18:13.468960  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:18:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:18:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0319 18:18:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:18:14.456503  543705 disk_worker.go:494] system disk:vda1
I0319 18:18:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:18:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:18:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:18:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:18:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:18:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:18:20.419816  543705 disk_info.go:125] begin check local disk info of client
I0319 18:18:20.422272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:18:20.422280  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039fcc0 0xc00039fd00]
E0319 18:18:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:18:23.409791  543705 memory.go:184] no items to output this cycle
I0319 18:18:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 18:18:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:18:33.409768  543705 memory.go:184] no items to output this cycle
I0319 18:18:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 18:18:37.801716  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:18:37.801724  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:18:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:18:43.410753  543705 memory.go:191] Add success.
I0319 18:18:43.409801  543705 cpu.go:282] Add success.
I0319 18:18:43.420458  543705 net.go:648] Add success.
I0319 18:18:43.423317  543705 net.go:770] primary dev: ETH0
I0319 18:18:43.423331  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:18:43.423343  543705 net.go:698] Add success.
I0319 18:18:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:18:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:18:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:18:53.409785  543705 memory.go:184] no items to output this cycle
I0319 18:18:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 18:19:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:19:03.409795  543705 memory.go:184] no items to output this cycle
I0319 18:19:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 18:19:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:19:13.409792  543705 memory.go:191] Add success.
I0319 18:19:13.409794  543705 cpu.go:282] Add success.
W0319 18:19:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:19:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:19:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:19:13.420138  543705 net.go:648] Add success.
I0319 18:19:13.422559  543705 net.go:770] primary dev: ETH0
I0319 18:19:13.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:19:13.422586  543705 net.go:698] Add success.
I0319 18:19:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:19:14.455346  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:19:14.455427  543705 disk_worker.go:708] disk space is not compliant
W0319 18:19:14.455431  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:19:14.457046  543705 disk_worker.go:494] system disk:vda1
I0319 18:19:14.457074  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:19:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:19:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:19:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:19:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:19:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:19:20.422815  543705 disk_info.go:125] begin check local disk info of client
I0319 18:19:20.425312  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:19:20.425322  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342100 0xc000342140]
E0319 18:19:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:19:23.409792  543705 memory.go:184] no items to output this cycle
I0319 18:19:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 18:19:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:19:33.409763  543705 memory.go:184] no items to output this cycle
I0319 18:19:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 18:19:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:19:43.409780  543705 memory.go:191] Add success.
I0319 18:19:43.409799  543705 cpu.go:282] Add success.
I0319 18:19:43.419687  543705 net.go:770] primary dev: ETH0
I0319 18:19:43.419700  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:19:43.419714  543705 net.go:698] Add success.
I0319 18:19:43.419945  543705 net.go:648] Add success.
I0319 18:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:19:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:19:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:19:53.410327  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:19:53.410346  543705 memory.go:184] no items to output this cycle
I0319 18:19:53.410369  543705 cpu.go:275] no items to output this cycle
E0319 18:20:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:20:03.409798  543705 memory.go:184] no items to output this cycle
I0319 18:20:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 18:20:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:20:13.409776  543705 memory.go:191] Add success.
W0319 18:20:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 18:20:13.409807  543705 cpu.go:282] Add success.
W0319 18:20:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:20:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:20:13.420158  543705 net.go:648] Add success.
I0319 18:20:13.423049  543705 net.go:770] primary dev: ETH0
I0319 18:20:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:20:13.423090  543705 net.go:698] Add success.
I0319 18:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:20:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:20:14.455301  543705 disk_worker.go:708] disk space is not compliant
W0319 18:20:14.455307  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:20:14.456960  543705 disk_worker.go:494] system disk:vda1
I0319 18:20:14.456988  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:20:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:20:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:20:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:20:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:20:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:20:20.425826  543705 disk_info.go:125] begin check local disk info of client
I0319 18:20:20.428280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:20:20.428286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c3d40 0xc0004c3d80]
E0319 18:20:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:20:23.409809  543705 memory.go:184] no items to output this cycle
I0319 18:20:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:20:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:20:33.409777  543705 memory.go:184] no items to output this cycle
I0319 18:20:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 18:20:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:20:43.409811  543705 memory.go:191] Add success.
I0319 18:20:43.409814  543705 cpu.go:282] Add success.
I0319 18:20:43.420033  543705 net.go:648] Add success.
I0319 18:20:43.422961  543705 net.go:770] primary dev: ETH0
I0319 18:20:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:20:43.422993  543705 net.go:698] Add success.
I0319 18:20:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:20:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:20:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:20:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:20:53.409783  543705 memory.go:184] no items to output this cycle
I0319 18:20:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 18:21:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:21:03.409767  543705 memory.go:184] no items to output this cycle
I0319 18:21:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 18:21:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:21:13.409782  543705 memory.go:191] Add success.
I0319 18:21:13.409802  543705 cpu.go:282] Add success.
W0319 18:21:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:21:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:21:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:21:13.420036  543705 net.go:648] Add success.
I0319 18:21:13.422790  543705 net.go:770] primary dev: ETH0
I0319 18:21:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:21:13.422817  543705 net.go:698] Add success.
I0319 18:21:13.468880  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44d7d0c3-f62f-4ac6-be99-2b731370d589","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:21:13.468911  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:21:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:21:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 18:21:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:21:14.456537  543705 disk_worker.go:494] system disk:vda1
I0319 18:21:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:21:15.455793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:21:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:21:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:21:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:21:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:21:20.428826  543705 disk_info.go:125] begin check local disk info of client
I0319 18:21:20.431436  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:21:20.431444  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c38c0 0xc0004c3900]
E0319 18:21:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:21:23.409787  543705 memory.go:184] no items to output this cycle
I0319 18:21:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 18:21:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:21:33.409797  543705 memory.go:184] no items to output this cycle
I0319 18:21:33.409808  543705 cpu.go:275] no items to output this cycle
I0319 18:21:37.803711  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:21:37.803718  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:21:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:21:43.410645  543705 memory.go:191] Add success.
I0319 18:21:43.409806  543705 cpu.go:282] Add success.
I0319 18:21:43.420360  543705 net.go:648] Add success.
I0319 18:21:43.423135  543705 net.go:770] primary dev: ETH0
I0319 18:21:43.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:21:43.423169  543705 net.go:698] Add success.
I0319 18:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:21:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:21:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:21:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:21:53.409768  543705 memory.go:184] no items to output this cycle
I0319 18:21:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 18:22:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:22:03.409761  543705 memory.go:184] no items to output this cycle
I0319 18:22:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 18:22:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:22:13.409787  543705 memory.go:191] Add success.
I0319 18:22:13.409791  543705 cpu.go:282] Add success.
W0319 18:22:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:22:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:22:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:22:13.420143  543705 net.go:648] Add success.
I0319 18:22:13.423020  543705 net.go:770] primary dev: ETH0
I0319 18:22:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:22:13.423053  543705 net.go:698] Add success.
W0319 18:22:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:22:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0319 18:22:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:22:14.456969  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:22:14.456978  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:22:14.456984  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:22:14.457023  543705 disk_worker.go:494] system disk:vda1
I0319 18:22:14.457052  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:22:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:22:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:22:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:22:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:22:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:22:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:22:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:22:20.431852  543705 disk_info.go:125] begin check local disk info of client
I0319 18:22:20.434380  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:22:20.434386  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3d00 0xc0002a3d40]
E0319 18:22:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:22:23.409811  543705 memory.go:184] no items to output this cycle
I0319 18:22:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 18:22:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:22:33.409800  543705 memory.go:184] no items to output this cycle
I0319 18:22:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:22:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:22:43.409783  543705 memory.go:191] Add success.
I0319 18:22:43.409802  543705 cpu.go:282] Add success.
I0319 18:22:43.419991  543705 net.go:648] Add success.
I0319 18:22:43.422750  543705 net.go:770] primary dev: ETH0
I0319 18:22:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:22:43.422779  543705 net.go:698] Add success.
I0319 18:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:22:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:22:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:22:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:22:53.409792  543705 memory.go:184] no items to output this cycle
I0319 18:22:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 18:23:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:23:03.409776  543705 memory.go:184] no items to output this cycle
I0319 18:23:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 18:23:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:23:13.409793  543705 memory.go:191] Add success.
I0319 18:23:13.409796  543705 cpu.go:282] Add success.
W0319 18:23:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:23:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:23:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:23:13.420328  543705 net.go:648] Add success.
I0319 18:23:13.423035  543705 net.go:770] primary dev: ETH0
I0319 18:23:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:23:13.423065  543705 net.go:698] Add success.
I0319 18:23:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:23:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:23:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 18:23:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:23:14.456581  543705 disk_worker.go:494] system disk:vda1
I0319 18:23:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:23:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:23:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:23:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:23:16.472517  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:23:20.434873  543705 disk_info.go:125] begin check local disk info of client
I0319 18:23:20.437387  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:23:20.437393  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051d240 0xc00051d280]
E0319 18:23:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:23:23.409810  543705 memory.go:184] no items to output this cycle
I0319 18:23:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 18:23:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:23:33.409779  543705 memory.go:184] no items to output this cycle
I0319 18:23:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 18:23:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:23:43.409790  543705 memory.go:191] Add success.
I0319 18:23:43.409791  543705 cpu.go:282] Add success.
I0319 18:23:43.419867  543705 net.go:648] Add success.
I0319 18:23:43.422745  543705 net.go:770] primary dev: ETH0
I0319 18:23:43.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:23:43.422771  543705 net.go:698] Add success.
I0319 18:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:23:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:23:53.409804  543705 memory.go:184] no items to output this cycle
I0319 18:23:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:24:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:24:03.409770  543705 memory.go:184] no items to output this cycle
I0319 18:24:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 18:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:24:13.409810  543705 memory.go:191] Add success.
I0319 18:24:13.409819  543705 cpu.go:282] Add success.
W0319 18:24:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:24:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:24:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:24:13.420335  543705 net.go:648] Add success.
I0319 18:24:13.423394  543705 net.go:770] primary dev: ETH0
I0319 18:24:13.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:24:13.423418  543705 net.go:698] Add success.
I0319 18:24:13.470680  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2c14d1fa-fd2c-4b37-bbc5-06ec4a8a2137","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:24:13.470710  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:24:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:24:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:24:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 18:24:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:24:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 18:24:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:24:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:24:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:24:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:24:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:24:20.437882  543705 disk_info.go:125] begin check local disk info of client
I0319 18:24:20.440412  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:24:20.440418  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a63c0 0xc0004a6400]
E0319 18:24:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:24:23.409813  543705 memory.go:184] no items to output this cycle
I0319 18:24:23.409824  543705 cpu.go:275] no items to output this cycle
E0319 18:24:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:24:33.409778  543705 memory.go:184] no items to output this cycle
I0319 18:24:33.409782  543705 cpu.go:275] no items to output this cycle
I0319 18:24:37.805729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:24:37.805734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:24:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:24:43.410722  543705 memory.go:191] Add success.
I0319 18:24:43.409806  543705 cpu.go:282] Add success.
I0319 18:24:43.420415  543705 net.go:648] Add success.
I0319 18:24:43.423275  543705 net.go:770] primary dev: ETH0
I0319 18:24:43.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:24:43.423304  543705 net.go:698] Add success.
I0319 18:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:24:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:24:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:24:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:24:53.409807  543705 memory.go:184] no items to output this cycle
I0319 18:24:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:25:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:25:03.409765  543705 memory.go:184] no items to output this cycle
I0319 18:25:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 18:25:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:25:13.409797  543705 memory.go:191] Add success.
I0319 18:25:13.409816  543705 cpu.go:282] Add success.
W0319 18:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:25:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:25:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:25:13.419709  543705 net.go:648] Add success.
I0319 18:25:13.422561  543705 net.go:770] primary dev: ETH0
I0319 18:25:13.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:25:13.422595  543705 net.go:698] Add success.
I0319 18:25:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:25:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:25:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 18:25:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:25:14.456553  543705 disk_worker.go:494] system disk:vda1
I0319 18:25:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:25:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:25:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:25:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:25:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:25:20.440892  543705 disk_info.go:125] begin check local disk info of client
I0319 18:25:20.443388  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:25:20.443396  543705 disk_info.go:196] parse disk info done, disk is : [0xc000575cc0 0xc000575d00]
E0319 18:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:25:23.409781  543705 memory.go:184] no items to output this cycle
I0319 18:25:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 18:25:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:25:33.409781  543705 memory.go:184] no items to output this cycle
I0319 18:25:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 18:25:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:25:43.409773  543705 memory.go:191] Add success.
I0319 18:25:43.409801  543705 cpu.go:282] Add success.
I0319 18:25:43.419906  543705 net.go:648] Add success.
I0319 18:25:43.422898  543705 net.go:770] primary dev: ETH0
I0319 18:25:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:25:43.422924  543705 net.go:698] Add success.
I0319 18:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:25:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:25:53.409786  543705 memory.go:184] no items to output this cycle
I0319 18:25:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 18:26:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:26:03.409767  543705 memory.go:184] no items to output this cycle
I0319 18:26:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 18:26:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:26:13.409812  543705 cpu.go:282] Add success.
I0319 18:26:13.409833  543705 memory.go:191] Add success.
W0319 18:26:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:26:13.409898  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:26:13.409903  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:26:13.419888  543705 net.go:648] Add success.
I0319 18:26:13.423114  543705 net.go:770] primary dev: ETH0
I0319 18:26:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:26:13.423153  543705 net.go:698] Add success.
I0319 18:26:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:26:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:26:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0319 18:26:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:26:14.456616  543705 disk_worker.go:494] system disk:vda1
I0319 18:26:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:26:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:26:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:26:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:26:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:26:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:26:20.443924  543705 disk_info.go:125] begin check local disk info of client
I0319 18:26:20.446375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:26:20.446382  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e3c0 0xc00035e400]
E0319 18:26:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:26:23.409811  543705 memory.go:184] no items to output this cycle
I0319 18:26:23.409824  543705 cpu.go:275] no items to output this cycle
E0319 18:26:33.410170  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:26:33.410188  543705 memory.go:184] no items to output this cycle
I0319 18:26:33.410255  543705 cpu.go:275] no items to output this cycle
E0319 18:26:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:26:43.409815  543705 memory.go:191] Add success.
I0319 18:26:43.409823  543705 cpu.go:282] Add success.
I0319 18:26:43.419995  543705 net.go:648] Add success.
I0319 18:26:43.423053  543705 net.go:770] primary dev: ETH0
I0319 18:26:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:26:43.423089  543705 net.go:698] Add success.
I0319 18:26:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:26:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:26:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:26:53.409809  543705 memory.go:184] no items to output this cycle
I0319 18:26:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:27:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:27:03.409764  543705 memory.go:184] no items to output this cycle
I0319 18:27:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 18:27:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:27:13.409795  543705 memory.go:191] Add success.
I0319 18:27:13.409796  543705 cpu.go:282] Add success.
W0319 18:27:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:27:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:27:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:27:13.420129  543705 net.go:648] Add success.
I0319 18:27:13.428576  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 18:27:13.428644  543705 net.go:770] primary dev: ETH0
I0319 18:27:13.428657  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:27:13.428670  543705 net.go:698] Add success.
I0319 18:27:13.453249  543705 event_worker.go:152] Polling the log file for events...
I0319 18:27:13.463810  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d639cc80-2e79-48fa-b940-62b397c95fff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:27:13.463841  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 18:27:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:27:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 18:27:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:27:14.456833  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:27:14.456842  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:27:14.456848  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:27:14.456880  543705 disk_worker.go:494] system disk:vda1
I0319 18:27:14.456908  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:27:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:27:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:27:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:27:16.457900  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:27:16.457955  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:27:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:27:16.472290  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:27:20.446924  543705 disk_info.go:125] begin check local disk info of client
I0319 18:27:20.449303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:27:20.449309  543705 disk_info.go:196] parse disk info done, disk is : [0xc000397300 0xc000397340]
E0319 18:27:23.410205  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:27:23.410224  543705 memory.go:184] no items to output this cycle
I0319 18:27:23.410240  543705 cpu.go:275] no items to output this cycle
E0319 18:27:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:27:33.409764  543705 memory.go:184] no items to output this cycle
I0319 18:27:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 18:27:37.805866  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:27:37.805873  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:27:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:27:43.410837  543705 memory.go:191] Add success.
I0319 18:27:43.409809  543705 cpu.go:282] Add success.
I0319 18:27:43.420585  543705 net.go:648] Add success.
I0319 18:27:43.423767  543705 net.go:770] primary dev: ETH0
I0319 18:27:43.423781  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:27:43.423793  543705 net.go:698] Add success.
I0319 18:27:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:27:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:27:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:27:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:27:53.409806  543705 memory.go:184] no items to output this cycle
I0319 18:27:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:28:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:28:03.409784  543705 cpu.go:275] no items to output this cycle
I0319 18:28:03.409789  543705 memory.go:184] no items to output this cycle
E0319 18:28:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:28:13.409786  543705 memory.go:191] Add success.
I0319 18:28:13.409807  543705 cpu.go:282] Add success.
W0319 18:28:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:28:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:28:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:28:13.420053  543705 net.go:648] Add success.
I0319 18:28:13.422863  543705 net.go:770] primary dev: ETH0
I0319 18:28:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:28:13.422889  543705 net.go:698] Add success.
I0319 18:28:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:28:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:28:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0319 18:28:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:28:14.456468  543705 disk_worker.go:494] system disk:vda1
I0319 18:28:14.456511  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:28:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:28:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:28:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:28:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:28:20.449942  543705 disk_info.go:125] begin check local disk info of client
I0319 18:28:20.452446  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:28:20.452452  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003761c0 0xc000376200]
E0319 18:28:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:28:23.409814  543705 memory.go:184] no items to output this cycle
I0319 18:28:23.409827  543705 cpu.go:275] no items to output this cycle
E0319 18:28:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:28:33.409786  543705 memory.go:184] no items to output this cycle
I0319 18:28:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 18:28:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:28:43.409821  543705 memory.go:191] Add success.
I0319 18:28:43.409830  543705 cpu.go:282] Add success.
I0319 18:28:43.419965  543705 net.go:648] Add success.
I0319 18:28:43.422708  543705 net.go:770] primary dev: ETH0
I0319 18:28:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:28:43.422733  543705 net.go:698] Add success.
I0319 18:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:28:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:28:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:28:53.409799  543705 memory.go:184] no items to output this cycle
I0319 18:28:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 18:29:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:29:03.409806  543705 memory.go:184] no items to output this cycle
I0319 18:29:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 18:29:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:29:13.409821  543705 memory.go:191] Add success.
I0319 18:29:13.409832  543705 cpu.go:282] Add success.
W0319 18:29:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:29:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:29:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:29:13.420169  543705 net.go:648] Add success.
I0319 18:29:13.422939  543705 net.go:770] primary dev: ETH0
I0319 18:29:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:29:13.423165  543705 net.go:698] Add success.
I0319 18:29:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:29:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:29:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 18:29:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:29:14.456488  543705 disk_worker.go:494] system disk:vda1
I0319 18:29:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:29:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:29:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:29:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:29:20.452947  543705 disk_info.go:125] begin check local disk info of client
I0319 18:29:20.455362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:29:20.455367  543705 disk_info.go:196] parse disk info done, disk is : [0xc000535a00 0xc000535a40]
E0319 18:29:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:29:23.409817  543705 memory.go:184] no items to output this cycle
I0319 18:29:23.409828  543705 cpu.go:275] no items to output this cycle
E0319 18:29:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:29:33.409800  543705 memory.go:184] no items to output this cycle
I0319 18:29:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 18:29:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:29:43.409798  543705 memory.go:191] Add success.
I0319 18:29:43.409816  543705 cpu.go:282] Add success.
I0319 18:29:43.419873  543705 net.go:648] Add success.
I0319 18:29:43.422428  543705 net.go:770] primary dev: ETH0
I0319 18:29:43.422441  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:29:43.422454  543705 net.go:698] Add success.
I0319 18:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:29:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:29:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:29:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:29:53.409792  543705 memory.go:184] no items to output this cycle
I0319 18:29:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 18:30:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:30:03.409781  543705 memory.go:184] no items to output this cycle
I0319 18:30:03.409806  543705 cpu.go:275] no items to output this cycle
E0319 18:30:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:30:13.409833  543705 memory.go:191] Add success.
I0319 18:30:13.409844  543705 cpu.go:282] Add success.
W0319 18:30:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:30:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:30:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:30:13.420130  543705 net.go:648] Add success.
I0319 18:30:13.422885  543705 net.go:770] primary dev: ETH0
I0319 18:30:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:30:13.422911  543705 net.go:698] Add success.
I0319 18:30:13.471204  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c667f95c-0c65-486a-84b6-bd68ef877a06","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:30:13.471237  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:30:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:30:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:30:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0319 18:30:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:30:14.456498  543705 disk_worker.go:494] system disk:vda1
I0319 18:30:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:30:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:30:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:30:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:30:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:30:20.455975  543705 disk_info.go:125] begin check local disk info of client
I0319 18:30:20.458430  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:30:20.458436  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509440 0xc000509480]
E0319 18:30:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:30:23.409780  543705 memory.go:184] no items to output this cycle
I0319 18:30:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:30:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:30:33.409797  543705 memory.go:184] no items to output this cycle
I0319 18:30:33.409810  543705 cpu.go:275] no items to output this cycle
I0319 18:30:37.807738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:30:37.807745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:30:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:30:43.410661  543705 memory.go:191] Add success.
I0319 18:30:43.409804  543705 cpu.go:282] Add success.
I0319 18:30:43.420348  543705 net.go:648] Add success.
I0319 18:30:43.422802  543705 net.go:770] primary dev: ETH0
I0319 18:30:43.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:30:43.422828  543705 net.go:698] Add success.
I0319 18:30:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:30:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:30:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:30:53.410228  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:30:53.410247  543705 memory.go:184] no items to output this cycle
I0319 18:30:53.410281  543705 cpu.go:275] no items to output this cycle
E0319 18:31:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:31:03.409766  543705 memory.go:184] no items to output this cycle
I0319 18:31:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 18:31:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:31:13.409792  543705 memory.go:191] Add success.
I0319 18:31:13.409816  543705 cpu.go:282] Add success.
W0319 18:31:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:31:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:31:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:31:13.420158  543705 net.go:770] primary dev: ETH0
I0319 18:31:13.420172  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:31:13.420186  543705 net.go:698] Add success.
I0319 18:31:13.420533  543705 net.go:648] Add success.
I0319 18:31:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:31:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:31:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 18:31:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:31:14.457032  543705 disk_worker.go:494] system disk:vda1
I0319 18:31:14.457075  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:31:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:31:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:31:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:31:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:31:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:31:20.458990  543705 disk_info.go:125] begin check local disk info of client
I0319 18:31:20.461347  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:31:20.461354  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab80 0xc00007abc0]
E0319 18:31:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:31:23.409773  543705 memory.go:184] no items to output this cycle
I0319 18:31:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 18:31:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:31:33.409784  543705 cpu.go:275] no items to output this cycle
I0319 18:31:33.409791  543705 memory.go:184] no items to output this cycle
E0319 18:31:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:31:43.409792  543705 memory.go:191] Add success.
I0319 18:31:43.409794  543705 cpu.go:282] Add success.
I0319 18:31:43.419995  543705 net.go:648] Add success.
I0319 18:31:43.422813  543705 net.go:770] primary dev: ETH0
I0319 18:31:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:31:43.422839  543705 net.go:698] Add success.
I0319 18:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:31:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:31:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:31:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:31:53.409772  543705 memory.go:184] no items to output this cycle
I0319 18:31:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 18:32:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:32:03.409793  543705 memory.go:184] no items to output this cycle
I0319 18:32:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 18:32:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:32:13.409814  543705 memory.go:191] Add success.
I0319 18:32:13.409824  543705 cpu.go:282] Add success.
W0319 18:32:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:32:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:32:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:32:13.420519  543705 net.go:648] Add success.
I0319 18:32:13.423219  543705 net.go:770] primary dev: ETH0
I0319 18:32:13.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:32:13.423247  543705 net.go:698] Add success.
W0319 18:32:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:32:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 18:32:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:32:14.456047  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:32:14.456057  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:32:14.456063  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:32:14.456911  543705 disk_worker.go:494] system disk:vda1
I0319 18:32:14.456940  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:32:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:32:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:32:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:32:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:32:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:32:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:32:16.472351  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:32:20.462007  543705 disk_info.go:125] begin check local disk info of client
I0319 18:32:20.464439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:32:20.464446  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384300 0xc000384340]
E0319 18:32:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:32:23.409812  543705 memory.go:184] no items to output this cycle
I0319 18:32:23.409820  543705 cpu.go:275] no items to output this cycle
E0319 18:32:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:32:33.409776  543705 memory.go:184] no items to output this cycle
I0319 18:32:33.409783  543705 cpu.go:275] no items to output this cycle
E0319 18:32:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:32:43.409795  543705 memory.go:191] Add success.
I0319 18:32:43.409796  543705 cpu.go:282] Add success.
I0319 18:32:43.419978  543705 net.go:648] Add success.
I0319 18:32:43.422786  543705 net.go:770] primary dev: ETH0
I0319 18:32:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:32:43.422813  543705 net.go:698] Add success.
I0319 18:32:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:32:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:32:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:32:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:32:53.409788  543705 memory.go:184] no items to output this cycle
I0319 18:32:53.409793  543705 cpu.go:275] no items to output this cycle
E0319 18:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:33:03.409779  543705 memory.go:184] no items to output this cycle
I0319 18:33:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 18:33:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:33:13.409799  543705 memory.go:191] Add success.
I0319 18:33:13.409799  543705 cpu.go:282] Add success.
W0319 18:33:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:33:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:33:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:33:13.420291  543705 net.go:648] Add success.
I0319 18:33:13.423546  543705 net.go:770] primary dev: ETH0
I0319 18:33:13.423559  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:33:13.423571  543705 net.go:698] Add success.
I0319 18:33:13.469713  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"02870f2e-2b2f-470e-ab1e-4de4c8144180","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:33:13.469748  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:33:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:33:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:33:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0319 18:33:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:33:14.457473  543705 disk_worker.go:494] system disk:vda1
I0319 18:33:14.457512  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:33:15.456024  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:33:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:33:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:33:20.465013  543705 disk_info.go:125] begin check local disk info of client
I0319 18:33:20.467529  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:33:20.467536  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004645c0 0xc000464600]
E0319 18:33:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:33:23.409789  543705 memory.go:184] no items to output this cycle
I0319 18:33:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 18:33:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:33:33.409782  543705 memory.go:184] no items to output this cycle
I0319 18:33:33.409786  543705 cpu.go:275] no items to output this cycle
I0319 18:33:37.809732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:33:37.809739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:33:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:33:43.410670  543705 memory.go:191] Add success.
I0319 18:33:43.409799  543705 cpu.go:282] Add success.
I0319 18:33:43.420221  543705 net.go:770] primary dev: ETH0
I0319 18:33:43.420234  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:33:43.420247  543705 net.go:698] Add success.
I0319 18:33:43.420594  543705 net.go:648] Add success.
I0319 18:33:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:33:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:33:53.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:33:53.410394  543705 memory.go:184] no items to output this cycle
I0319 18:33:53.410396  543705 cpu.go:275] no items to output this cycle
E0319 18:34:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:34:03.409767  543705 memory.go:184] no items to output this cycle
I0319 18:34:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 18:34:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:34:13.409794  543705 memory.go:191] Add success.
I0319 18:34:13.409813  543705 cpu.go:282] Add success.
W0319 18:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:34:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:34:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:34:13.420107  543705 net.go:648] Add success.
I0319 18:34:13.422873  543705 net.go:770] primary dev: ETH0
I0319 18:34:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:34:13.422898  543705 net.go:698] Add success.
I0319 18:34:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:34:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:34:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 18:34:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:34:14.456822  543705 disk_worker.go:494] system disk:vda1
I0319 18:34:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:34:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:34:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:34:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:34:16.472431  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:34:20.468026  543705 disk_info.go:125] begin check local disk info of client
I0319 18:34:20.470450  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:34:20.470456  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4280 0xc0004b42c0]
E0319 18:34:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:34:23.409812  543705 memory.go:184] no items to output this cycle
I0319 18:34:23.409820  543705 cpu.go:275] no items to output this cycle
E0319 18:34:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:34:33.409786  543705 memory.go:184] no items to output this cycle
I0319 18:34:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 18:34:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:34:43.409792  543705 memory.go:191] Add success.
I0319 18:34:43.409793  543705 cpu.go:282] Add success.
I0319 18:34:43.419877  543705 net.go:648] Add success.
I0319 18:34:43.422686  543705 net.go:770] primary dev: ETH0
I0319 18:34:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:34:43.422711  543705 net.go:698] Add success.
I0319 18:34:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:34:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:34:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:34:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:34:53.409821  543705 memory.go:184] no items to output this cycle
I0319 18:34:53.409829  543705 cpu.go:275] no items to output this cycle
E0319 18:35:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:35:03.409776  543705 memory.go:184] no items to output this cycle
I0319 18:35:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 18:35:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:35:13.409785  543705 memory.go:191] Add success.
I0319 18:35:13.409803  543705 cpu.go:282] Add success.
W0319 18:35:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:35:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:35:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:35:13.420149  543705 net.go:648] Add success.
I0319 18:35:13.423374  543705 net.go:770] primary dev: ETH0
I0319 18:35:13.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:35:13.423402  543705 net.go:698] Add success.
I0319 18:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:35:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:35:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0319 18:35:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:35:14.456582  543705 disk_worker.go:494] system disk:vda1
I0319 18:35:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:35:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:35:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:35:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:35:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:35:20.471046  543705 disk_info.go:125] begin check local disk info of client
I0319 18:35:20.473446  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:35:20.473452  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2100 0xc0003b2140]
E0319 18:35:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:35:23.409780  543705 memory.go:184] no items to output this cycle
I0319 18:35:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 18:35:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:35:33.409782  543705 memory.go:184] no items to output this cycle
I0319 18:35:33.409787  543705 cpu.go:275] no items to output this cycle
E0319 18:35:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:35:43.409775  543705 memory.go:191] Add success.
I0319 18:35:43.409816  543705 cpu.go:282] Add success.
I0319 18:35:43.419967  543705 net.go:648] Add success.
I0319 18:35:43.422927  543705 net.go:770] primary dev: ETH0
I0319 18:35:43.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:35:43.422961  543705 net.go:698] Add success.
I0319 18:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:35:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:35:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:35:53.409787  543705 memory.go:184] no items to output this cycle
I0319 18:35:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 18:36:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:36:03.409767  543705 memory.go:184] no items to output this cycle
I0319 18:36:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 18:36:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:36:13.409795  543705 cpu.go:282] Add success.
I0319 18:36:13.409796  543705 memory.go:191] Add success.
W0319 18:36:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:36:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:36:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:36:13.420054  543705 net.go:648] Add success.
I0319 18:36:13.422937  543705 net.go:770] primary dev: ETH0
I0319 18:36:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:36:13.422961  543705 net.go:698] Add success.
I0319 18:36:13.464294  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d98836fa-a207-4fd0-85ca-f1216748338f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:36:13.464328  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:36:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:36:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:36:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 18:36:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:36:14.456676  543705 disk_worker.go:494] system disk:vda1
I0319 18:36:14.456705  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:36:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:36:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:36:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:36:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:36:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:36:20.474058  543705 disk_info.go:125] begin check local disk info of client
I0319 18:36:20.476519  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:36:20.476526  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d0700 0xc0004d0740]
E0319 18:36:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:36:23.409786  543705 cpu.go:275] no items to output this cycle
I0319 18:36:23.409796  543705 memory.go:184] no items to output this cycle
E0319 18:36:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:36:33.409799  543705 memory.go:184] no items to output this cycle
I0319 18:36:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 18:36:37.809879  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:36:37.809885  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:36:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:36:43.410848  543705 memory.go:191] Add success.
I0319 18:36:43.409811  543705 cpu.go:282] Add success.
I0319 18:36:43.420596  543705 net.go:648] Add success.
I0319 18:36:43.423112  543705 net.go:770] primary dev: ETH0
I0319 18:36:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:36:43.423141  543705 net.go:698] Add success.
I0319 18:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:36:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:36:53.409788  543705 memory.go:184] no items to output this cycle
I0319 18:36:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 18:37:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:37:03.409802  543705 memory.go:184] no items to output this cycle
I0319 18:37:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:37:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:37:13.409786  543705 memory.go:191] Add success.
I0319 18:37:13.409807  543705 cpu.go:282] Add success.
W0319 18:37:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:37:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:37:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:37:13.420132  543705 net.go:648] Add success.
I0319 18:37:13.422824  543705 net.go:770] primary dev: ETH0
I0319 18:37:13.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:37:13.422850  543705 net.go:698] Add success.
I0319 18:37:13.453401  543705 event_worker.go:152] Polling the log file for events...
W0319 18:37:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:37:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 18:37:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:37:14.455916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:37:14.455925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:37:14.455931  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:37:14.456565  543705 disk_worker.go:494] system disk:vda1
I0319 18:37:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:37:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:37:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:37:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:37:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:37:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:37:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:37:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:37:20.477073  543705 disk_info.go:125] begin check local disk info of client
I0319 18:37:20.479477  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:37:20.479483  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003440c0 0xc000344100]
E0319 18:37:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:37:23.409812  543705 memory.go:184] no items to output this cycle
I0319 18:37:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:37:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:37:33.409779  543705 cpu.go:275] no items to output this cycle
I0319 18:37:33.409785  543705 memory.go:184] no items to output this cycle
E0319 18:37:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:37:43.409809  543705 memory.go:191] Add success.
I0319 18:37:43.409818  543705 cpu.go:282] Add success.
I0319 18:37:43.419944  543705 net.go:648] Add success.
I0319 18:37:43.422867  543705 net.go:770] primary dev: ETH0
I0319 18:37:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:37:43.422896  543705 net.go:698] Add success.
I0319 18:37:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:37:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:37:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:37:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:37:53.409782  543705 memory.go:184] no items to output this cycle
I0319 18:37:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 18:38:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:38:03.409776  543705 memory.go:184] no items to output this cycle
I0319 18:38:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 18:38:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:38:13.409792  543705 memory.go:191] Add success.
I0319 18:38:13.409798  543705 cpu.go:282] Add success.
W0319 18:38:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:38:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:38:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:38:13.420142  543705 net.go:648] Add success.
I0319 18:38:13.422997  543705 net.go:770] primary dev: ETH0
I0319 18:38:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:38:13.423023  543705 net.go:698] Add success.
I0319 18:38:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:38:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:38:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 18:38:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:38:14.456606  543705 disk_worker.go:494] system disk:vda1
I0319 18:38:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:38:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:38:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:38:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:38:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:38:20.480085  543705 disk_info.go:125] begin check local disk info of client
I0319 18:38:20.482911  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:38:20.482919  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e000 0xc00049e040]
E0319 18:38:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:38:23.409789  543705 memory.go:184] no items to output this cycle
I0319 18:38:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 18:38:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:38:33.409775  543705 memory.go:184] no items to output this cycle
I0319 18:38:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 18:38:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:38:43.409807  543705 memory.go:191] Add success.
I0319 18:38:43.409815  543705 cpu.go:282] Add success.
I0319 18:38:43.419978  543705 net.go:648] Add success.
I0319 18:38:43.422600  543705 net.go:770] primary dev: ETH0
I0319 18:38:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:38:43.422629  543705 net.go:698] Add success.
I0319 18:38:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:38:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:38:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:38:53.409775  543705 memory.go:184] no items to output this cycle
I0319 18:38:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 18:39:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:39:03.409777  543705 memory.go:184] no items to output this cycle
I0319 18:39:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 18:39:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:39:13.409788  543705 cpu.go:282] Add success.
I0319 18:39:13.409795  543705 memory.go:191] Add success.
W0319 18:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:39:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:39:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:39:13.420056  543705 net.go:648] Add success.
I0319 18:39:13.422695  543705 net.go:770] primary dev: ETH0
I0319 18:39:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:39:13.422724  543705 net.go:698] Add success.
I0319 18:39:13.467736  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"241bfd0f-cd78-48b8-a1bf-96e3a8e23954","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:39:13.467769  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:39:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:39:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:39:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 18:39:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:39:14.456601  543705 disk_worker.go:494] system disk:vda1
I0319 18:39:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:39:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:39:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:39:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:39:20.483060  543705 disk_info.go:125] begin check local disk info of client
I0319 18:39:20.485727  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:39:20.485733  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296040 0xc000296080]
E0319 18:39:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:39:23.409814  543705 memory.go:184] no items to output this cycle
I0319 18:39:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:39:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:39:33.409782  543705 memory.go:184] no items to output this cycle
I0319 18:39:33.409791  543705 cpu.go:275] no items to output this cycle
I0319 18:39:37.810025  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:39:37.810032  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:39:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:39:43.410639  543705 memory.go:191] Add success.
I0319 18:39:43.409828  543705 cpu.go:282] Add success.
I0319 18:39:43.420324  543705 net.go:648] Add success.
I0319 18:39:43.423148  543705 net.go:770] primary dev: ETH0
I0319 18:39:43.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:39:43.423173  543705 net.go:698] Add success.
I0319 18:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:39:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:39:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:39:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:39:53.409804  543705 memory.go:184] no items to output this cycle
I0319 18:39:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:40:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:40:03.409771  543705 memory.go:184] no items to output this cycle
I0319 18:40:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 18:40:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:40:13.409812  543705 memory.go:191] Add success.
I0319 18:40:13.409819  543705 cpu.go:282] Add success.
W0319 18:40:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:40:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:40:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:40:13.420197  543705 net.go:648] Add success.
I0319 18:40:13.423155  543705 net.go:770] primary dev: ETH0
I0319 18:40:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:40:13.423181  543705 net.go:698] Add success.
I0319 18:40:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:40:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:40:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0319 18:40:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:40:14.456499  543705 disk_worker.go:494] system disk:vda1
I0319 18:40:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:40:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:40:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:40:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:40:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:40:20.486073  543705 disk_info.go:125] begin check local disk info of client
I0319 18:40:20.488515  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:40:20.488521  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b20c0 0xc0003b2100]
E0319 18:40:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:40:23.409780  543705 memory.go:184] no items to output this cycle
I0319 18:40:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 18:40:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:40:33.409798  543705 memory.go:184] no items to output this cycle
I0319 18:40:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:40:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:40:43.409782  543705 memory.go:191] Add success.
I0319 18:40:43.409808  543705 cpu.go:282] Add success.
I0319 18:40:43.419892  543705 net.go:648] Add success.
I0319 18:40:43.422692  543705 net.go:770] primary dev: ETH0
I0319 18:40:43.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:40:43.422721  543705 net.go:698] Add success.
I0319 18:40:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:40:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:40:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:40:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:40:53.409787  543705 memory.go:184] no items to output this cycle
I0319 18:40:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 18:41:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:41:03.409802  543705 memory.go:184] no items to output this cycle
I0319 18:41:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 18:41:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:41:13.409808  543705 memory.go:191] Add success.
I0319 18:41:13.409817  543705 cpu.go:282] Add success.
W0319 18:41:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:41:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:41:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:41:13.420132  543705 net.go:648] Add success.
I0319 18:41:13.422955  543705 net.go:770] primary dev: ETH0
I0319 18:41:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:41:13.422983  543705 net.go:698] Add success.
I0319 18:41:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:41:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:41:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 18:41:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:41:14.456568  543705 disk_worker.go:494] system disk:vda1
I0319 18:41:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:41:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:41:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:41:20.489120  543705 disk_info.go:125] begin check local disk info of client
I0319 18:41:20.491618  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:41:20.491623  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253980 0xc0002539c0]
I0319 18:41:23.409944  543705 cpu.go:275] no items to output this cycle
E0319 18:41:23.410015  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:41:23.410033  543705 memory.go:184] no items to output this cycle
E0319 18:41:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:41:33.409774  543705 memory.go:184] no items to output this cycle
I0319 18:41:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 18:41:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:41:43.409794  543705 memory.go:191] Add success.
I0319 18:41:43.409805  543705 cpu.go:282] Add success.
I0319 18:41:43.419909  543705 net.go:648] Add success.
I0319 18:41:43.422538  543705 net.go:770] primary dev: ETH0
I0319 18:41:43.422551  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:41:43.422563  543705 net.go:698] Add success.
I0319 18:41:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:41:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:41:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:41:53.410208  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:41:53.410225  543705 memory.go:184] no items to output this cycle
I0319 18:41:53.410253  543705 cpu.go:275] no items to output this cycle
E0319 18:42:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:42:03.409782  543705 memory.go:184] no items to output this cycle
I0319 18:42:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 18:42:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:42:13.409798  543705 memory.go:191] Add success.
I0319 18:42:13.409800  543705 cpu.go:282] Add success.
W0319 18:42:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:42:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:42:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:42:13.420042  543705 net.go:648] Add success.
I0319 18:42:13.422759  543705 net.go:770] primary dev: ETH0
I0319 18:42:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:42:13.422784  543705 net.go:698] Add success.
I0319 18:42:13.464258  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5dce0ac3-f385-49c8-acb4-49b2e5e37360","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:42:13.464292  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 18:42:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:42:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 18:42:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:42:14.456956  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:42:14.456965  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:42:14.456970  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:42:14.457020  543705 disk_worker.go:494] system disk:vda1
I0319 18:42:14.457049  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:42:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:42:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:42:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:42:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:42:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:42:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:42:16.472341  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:42:20.492168  543705 disk_info.go:125] begin check local disk info of client
I0319 18:42:20.494592  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:42:20.494598  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c05c0 0xc0003c0600]
E0319 18:42:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:42:23.409826  543705 memory.go:184] no items to output this cycle
I0319 18:42:23.409947  543705 cpu.go:275] no items to output this cycle
E0319 18:42:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:42:33.409788  543705 memory.go:184] no items to output this cycle
I0319 18:42:33.409795  543705 cpu.go:275] no items to output this cycle
I0319 18:42:37.810169  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:42:37.810175  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:42:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:42:43.410659  543705 memory.go:191] Add success.
I0319 18:42:43.409831  543705 cpu.go:282] Add success.
I0319 18:42:43.420445  543705 net.go:648] Add success.
I0319 18:42:43.423088  543705 net.go:770] primary dev: ETH0
I0319 18:42:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:42:43.423113  543705 net.go:698] Add success.
I0319 18:42:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:42:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:42:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:42:53.409795  543705 memory.go:184] no items to output this cycle
I0319 18:42:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 18:43:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:43:03.409781  543705 memory.go:184] no items to output this cycle
I0319 18:43:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 18:43:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:43:13.409827  543705 memory.go:191] Add success.
I0319 18:43:13.409838  543705 cpu.go:282] Add success.
W0319 18:43:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:43:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:43:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:43:13.420154  543705 net.go:648] Add success.
I0319 18:43:13.422928  543705 net.go:770] primary dev: ETH0
I0319 18:43:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:43:13.422954  543705 net.go:698] Add success.
I0319 18:43:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:43:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:43:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 18:43:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:43:14.456571  543705 disk_worker.go:494] system disk:vda1
I0319 18:43:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:43:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:43:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:43:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:43:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:43:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:43:20.495166  543705 disk_info.go:125] begin check local disk info of client
I0319 18:43:20.497826  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:43:20.497832  543705 disk_info.go:196] parse disk info done, disk is : [0xc000263c00 0xc000263c40]
E0319 18:43:23.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:43:23.409834  543705 memory.go:184] no items to output this cycle
I0319 18:43:23.409842  543705 cpu.go:275] no items to output this cycle
E0319 18:43:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:43:33.409816  543705 memory.go:184] no items to output this cycle
I0319 18:43:33.409827  543705 cpu.go:275] no items to output this cycle
E0319 18:43:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:43:43.409775  543705 memory.go:191] Add success.
I0319 18:43:43.409805  543705 cpu.go:282] Add success.
I0319 18:43:43.419885  543705 net.go:648] Add success.
I0319 18:43:43.422936  543705 net.go:770] primary dev: ETH0
I0319 18:43:43.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:43:43.422961  543705 net.go:698] Add success.
I0319 18:43:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:43:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:43:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:43:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:43:53.409783  543705 cpu.go:275] no items to output this cycle
I0319 18:43:53.409794  543705 memory.go:184] no items to output this cycle
E0319 18:44:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:44:03.409780  543705 memory.go:184] no items to output this cycle
I0319 18:44:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:44:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:44:13.409807  543705 memory.go:191] Add success.
I0319 18:44:13.409812  543705 cpu.go:282] Add success.
W0319 18:44:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:44:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:44:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:44:13.420123  543705 net.go:648] Add success.
I0319 18:44:13.422695  543705 net.go:770] primary dev: ETH0
I0319 18:44:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:44:13.422720  543705 net.go:698] Add success.
I0319 18:44:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:44:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:44:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0319 18:44:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:44:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 18:44:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:44:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:44:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:44:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:44:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:44:20.497912  543705 disk_info.go:125] begin check local disk info of client
I0319 18:44:20.500417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:44:20.500424  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b200 0xc00036b240]
E0319 18:44:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:44:23.409775  543705 memory.go:184] no items to output this cycle
I0319 18:44:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 18:44:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:44:33.409804  543705 memory.go:184] no items to output this cycle
I0319 18:44:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 18:44:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:44:43.409808  543705 memory.go:191] Add success.
I0319 18:44:43.409817  543705 cpu.go:282] Add success.
I0319 18:44:43.420006  543705 net.go:648] Add success.
I0319 18:44:43.422774  543705 net.go:770] primary dev: ETH0
I0319 18:44:43.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:44:43.422799  543705 net.go:698] Add success.
I0319 18:44:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:44:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:44:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:44:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:44:53.409779  543705 memory.go:184] no items to output this cycle
I0319 18:44:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 18:45:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:45:03.409786  543705 memory.go:184] no items to output this cycle
I0319 18:45:03.409790  543705 cpu.go:275] no items to output this cycle
I0319 18:45:13.409788  543705 cpu.go:282] Add success.
E0319 18:45:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:45:13.409809  543705 memory.go:191] Add success.
W0319 18:45:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:45:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:45:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:45:13.420051  543705 net.go:648] Add success.
I0319 18:45:13.423089  543705 net.go:770] primary dev: ETH0
I0319 18:45:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:45:13.423115  543705 net.go:698] Add success.
I0319 18:45:13.470292  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"67db016b-4c18-45df-8afc-69c8402dae16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:45:13.470325  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:45:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:45:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 18:45:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:45:14.456514  543705 disk_worker.go:494] system disk:vda1
I0319 18:45:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:45:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:45:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:45:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:45:20.501190  543705 disk_info.go:125] begin check local disk info of client
I0319 18:45:20.503603  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:45:20.503609  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5380 0xc0002a53c0]
E0319 18:45:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:45:23.409807  543705 memory.go:184] no items to output this cycle
I0319 18:45:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 18:45:33.410651  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:45:33.410672  543705 memory.go:184] no items to output this cycle
I0319 18:45:33.410688  543705 cpu.go:275] no items to output this cycle
I0319 18:45:37.811761  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:45:37.811768  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:45:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:45:43.410598  543705 memory.go:191] Add success.
I0319 18:45:43.409788  543705 cpu.go:282] Add success.
I0319 18:45:43.420309  543705 net.go:648] Add success.
I0319 18:45:43.422714  543705 net.go:770] primary dev: ETH0
I0319 18:45:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:45:43.422747  543705 net.go:698] Add success.
I0319 18:45:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:45:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:45:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:45:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:45:53.409792  543705 memory.go:184] no items to output this cycle
I0319 18:45:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 18:46:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:46:03.409786  543705 memory.go:184] no items to output this cycle
I0319 18:46:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:46:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:46:13.409785  543705 memory.go:191] Add success.
W0319 18:46:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 18:46:13.409814  543705 cpu.go:282] Add success.
W0319 18:46:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:46:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:46:13.420097  543705 net.go:648] Add success.
I0319 18:46:13.422905  543705 net.go:770] primary dev: ETH0
I0319 18:46:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:46:13.422930  543705 net.go:698] Add success.
I0319 18:46:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:46:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:46:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 18:46:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:46:14.456590  543705 disk_worker.go:494] system disk:vda1
I0319 18:46:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:46:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:46:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:46:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:46:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:46:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:46:20.504210  543705 disk_info.go:125] begin check local disk info of client
I0319 18:46:20.506556  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:46:20.506562  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275480 0xc0002754c0]
E0319 18:46:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:46:23.409813  543705 memory.go:184] no items to output this cycle
I0319 18:46:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 18:46:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:46:33.409766  543705 memory.go:184] no items to output this cycle
I0319 18:46:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 18:46:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:46:43.409815  543705 memory.go:191] Add success.
I0319 18:46:43.409825  543705 cpu.go:282] Add success.
I0319 18:46:43.420178  543705 net.go:648] Add success.
I0319 18:46:43.422826  543705 net.go:770] primary dev: ETH0
I0319 18:46:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:46:43.422850  543705 net.go:698] Add success.
I0319 18:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:46:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:46:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:46:53.410417  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:46:53.410441  543705 memory.go:184] no items to output this cycle
I0319 18:46:53.410444  543705 cpu.go:275] no items to output this cycle
E0319 18:47:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:47:03.409766  543705 memory.go:184] no items to output this cycle
I0319 18:47:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 18:47:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:47:13.409788  543705 memory.go:191] Add success.
I0319 18:47:13.409793  543705 cpu.go:282] Add success.
W0319 18:47:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:47:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:47:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:47:13.420090  543705 net.go:648] Add success.
I0319 18:47:13.422772  543705 net.go:770] primary dev: ETH0
I0319 18:47:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:47:13.422798  543705 net.go:698] Add success.
I0319 18:47:13.453352  543705 event_worker.go:152] Polling the log file for events...
W0319 18:47:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:47:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 18:47:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:47:14.456862  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:47:14.456871  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:47:14.456877  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:47:14.456949  543705 disk_worker.go:494] system disk:vda1
I0319 18:47:14.456994  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:47:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:47:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:47:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:47:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:47:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:47:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:47:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:47:20.507225  543705 disk_info.go:125] begin check local disk info of client
I0319 18:47:20.509626  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:47:20.509632  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7140 0xc0003b7180]
E0319 18:47:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:47:23.409792  543705 cpu.go:275] no items to output this cycle
I0319 18:47:23.409804  543705 memory.go:184] no items to output this cycle
E0319 18:47:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:47:33.409771  543705 memory.go:184] no items to output this cycle
I0319 18:47:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 18:47:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:47:43.409815  543705 memory.go:191] Add success.
I0319 18:47:43.409822  543705 cpu.go:282] Add success.
I0319 18:47:43.419738  543705 net.go:648] Add success.
I0319 18:47:43.422853  543705 net.go:770] primary dev: ETH0
I0319 18:47:43.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:47:43.422880  543705 net.go:698] Add success.
I0319 18:47:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:47:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:47:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:47:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:47:53.409813  543705 memory.go:184] no items to output this cycle
I0319 18:47:53.409827  543705 cpu.go:275] no items to output this cycle
E0319 18:48:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:48:03.409808  543705 memory.go:184] no items to output this cycle
I0319 18:48:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 18:48:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:48:13.409788  543705 memory.go:191] Add success.
I0319 18:48:13.409791  543705 cpu.go:282] Add success.
W0319 18:48:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:48:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:48:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:48:13.420314  543705 net.go:648] Add success.
I0319 18:48:13.423111  543705 net.go:770] primary dev: ETH0
I0319 18:48:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:48:13.423137  543705 net.go:698] Add success.
I0319 18:48:13.469114  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d8b31f26-8c8c-453c-bac4-3ce918778a8a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:48:13.469150  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:48:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:48:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 18:48:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:48:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 18:48:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:48:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:48:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:48:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:48:16.472447  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:48:20.510240  543705 disk_info.go:125] begin check local disk info of client
I0319 18:48:20.512706  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:48:20.512712  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba0c0 0xc0003ba100]
E0319 18:48:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:48:23.409821  543705 memory.go:184] no items to output this cycle
I0319 18:48:23.409826  543705 cpu.go:275] no items to output this cycle
E0319 18:48:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:48:33.409798  543705 memory.go:184] no items to output this cycle
I0319 18:48:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 18:48:37.813743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:48:37.813749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:48:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:48:43.410870  543705 memory.go:191] Add success.
I0319 18:48:43.409817  543705 cpu.go:282] Add success.
I0319 18:48:43.420573  543705 net.go:648] Add success.
I0319 18:48:43.423178  543705 net.go:770] primary dev: ETH0
I0319 18:48:43.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:48:43.423205  543705 net.go:698] Add success.
I0319 18:48:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:48:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:48:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:48:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:48:53.409814  543705 memory.go:184] no items to output this cycle
I0319 18:48:53.409824  543705 cpu.go:275] no items to output this cycle
E0319 18:49:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:49:03.409782  543705 memory.go:184] no items to output this cycle
I0319 18:49:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:49:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:49:13.409806  543705 memory.go:191] Add success.
I0319 18:49:13.409815  543705 cpu.go:282] Add success.
W0319 18:49:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:49:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:49:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:49:13.420251  543705 net.go:648] Add success.
I0319 18:49:13.423046  543705 net.go:770] primary dev: ETH0
I0319 18:49:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:49:13.423070  543705 net.go:698] Add success.
I0319 18:49:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:49:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:49:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 18:49:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:49:14.456567  543705 disk_worker.go:494] system disk:vda1
I0319 18:49:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:49:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:49:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:49:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:49:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:49:20.513214  543705 disk_info.go:125] begin check local disk info of client
I0319 18:49:20.515617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:49:20.515624  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0300 0xc0003b0340]
E0319 18:49:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:49:23.409818  543705 memory.go:184] no items to output this cycle
I0319 18:49:23.409827  543705 cpu.go:275] no items to output this cycle
E0319 18:49:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:49:33.409793  543705 memory.go:184] no items to output this cycle
I0319 18:49:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 18:49:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:49:43.409793  543705 memory.go:191] Add success.
I0319 18:49:43.409796  543705 cpu.go:282] Add success.
I0319 18:49:43.420024  543705 net.go:648] Add success.
I0319 18:49:43.422728  543705 net.go:770] primary dev: ETH0
I0319 18:49:43.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:49:43.422753  543705 net.go:698] Add success.
I0319 18:49:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:49:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:49:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:49:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:49:53.409764  543705 memory.go:184] no items to output this cycle
I0319 18:49:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 18:50:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:50:03.409781  543705 memory.go:184] no items to output this cycle
I0319 18:50:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 18:50:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:50:13.409795  543705 cpu.go:282] Add success.
I0319 18:50:13.409802  543705 memory.go:191] Add success.
W0319 18:50:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:50:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:50:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:50:13.420061  543705 net.go:648] Add success.
I0319 18:50:13.422782  543705 net.go:770] primary dev: ETH0
I0319 18:50:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:50:13.422808  543705 net.go:698] Add success.
I0319 18:50:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:50:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:50:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0319 18:50:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:50:14.456619  543705 disk_worker.go:494] system disk:vda1
I0319 18:50:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:50:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:50:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:50:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:50:20.516281  543705 disk_info.go:125] begin check local disk info of client
I0319 18:50:20.518707  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:50:20.518712  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032af80 0xc00032afc0]
E0319 18:50:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:50:23.409811  543705 memory.go:184] no items to output this cycle
I0319 18:50:23.409828  543705 cpu.go:275] no items to output this cycle
E0319 18:50:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:50:33.409779  543705 memory.go:184] no items to output this cycle
I0319 18:50:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 18:50:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:50:43.409891  543705 memory.go:191] Add success.
I0319 18:50:43.409892  543705 cpu.go:282] Add success.
I0319 18:50:43.419720  543705 net.go:648] Add success.
I0319 18:50:43.422483  543705 net.go:770] primary dev: ETH0
I0319 18:50:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:50:43.422512  543705 net.go:698] Add success.
I0319 18:50:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:50:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:50:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:50:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:50:53.409779  543705 memory.go:184] no items to output this cycle
I0319 18:50:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 18:51:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:51:03.409779  543705 memory.go:184] no items to output this cycle
I0319 18:51:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 18:51:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:51:13.409789  543705 memory.go:191] Add success.
W0319 18:51:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 18:51:13.409822  543705 cpu.go:282] Add success.
W0319 18:51:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:51:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:51:13.420109  543705 net.go:648] Add success.
I0319 18:51:13.422727  543705 net.go:770] primary dev: ETH0
I0319 18:51:13.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:51:13.422752  543705 net.go:698] Add success.
I0319 18:51:13.468773  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"998d72f5-2ab1-4925-a2ce-75dbc08a0836","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:51:13.468805  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:51:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:51:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:51:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 18:51:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:51:14.456755  543705 disk_worker.go:494] system disk:vda1
I0319 18:51:14.456789  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:51:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:51:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:51:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:51:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:51:20.519235  543705 disk_info.go:125] begin check local disk info of client
I0319 18:51:20.521684  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:51:20.521691  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035efc0 0xc00035f000]
E0319 18:51:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:51:23.409792  543705 memory.go:184] no items to output this cycle
I0319 18:51:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 18:51:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:51:33.409767  543705 memory.go:184] no items to output this cycle
I0319 18:51:33.409819  543705 cpu.go:275] no items to output this cycle
I0319 18:51:37.813888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:51:37.813894  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:51:43.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:51:43.410886  543705 memory.go:191] Add success.
I0319 18:51:43.409904  543705 cpu.go:282] Add success.
I0319 18:51:43.419756  543705 net.go:648] Add success.
I0319 18:51:43.422819  543705 net.go:770] primary dev: ETH0
I0319 18:51:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:51:43.422849  543705 net.go:698] Add success.
I0319 18:51:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:51:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:51:53.409826  543705 memory.go:184] no items to output this cycle
I0319 18:51:53.409836  543705 cpu.go:275] no items to output this cycle
E0319 18:52:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:52:03.409807  543705 memory.go:184] no items to output this cycle
I0319 18:52:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 18:52:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:52:13.409789  543705 memory.go:191] Add success.
I0319 18:52:13.409805  543705 cpu.go:282] Add success.
W0319 18:52:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:52:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:52:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:52:13.420123  543705 net.go:648] Add success.
I0319 18:52:13.422852  543705 net.go:770] primary dev: ETH0
I0319 18:52:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:52:13.422880  543705 net.go:698] Add success.
W0319 18:52:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:52:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 18:52:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:52:14.455918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:52:14.455926  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:52:14.455932  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:52:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 18:52:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:52:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:52:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:52:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:52:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:52:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:52:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:52:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:52:20.522250  543705 disk_info.go:125] begin check local disk info of client
I0319 18:52:20.524613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:52:20.524618  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ea80 0xc00037eac0]
E0319 18:52:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:52:23.409792  543705 memory.go:184] no items to output this cycle
I0319 18:52:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 18:52:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:52:33.409770  543705 memory.go:184] no items to output this cycle
I0319 18:52:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 18:52:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:52:43.409902  543705 memory.go:191] Add success.
I0319 18:52:43.409982  543705 cpu.go:282] Add success.
I0319 18:52:43.419713  543705 net.go:648] Add success.
I0319 18:52:43.422621  543705 net.go:770] primary dev: ETH0
I0319 18:52:43.422634  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:52:43.422646  543705 net.go:698] Add success.
I0319 18:52:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:52:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:52:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:52:53.409781  543705 memory.go:184] no items to output this cycle
I0319 18:52:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 18:53:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:53:03.409798  543705 memory.go:184] no items to output this cycle
I0319 18:53:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 18:53:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:53:13.409817  543705 memory.go:191] Add success.
I0319 18:53:13.409827  543705 cpu.go:282] Add success.
W0319 18:53:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:53:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:53:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:53:13.420179  543705 net.go:648] Add success.
I0319 18:53:13.422987  543705 net.go:770] primary dev: ETH0
I0319 18:53:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:53:13.423016  543705 net.go:698] Add success.
I0319 18:53:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:53:14.455084  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:53:14.455144  543705 disk_worker.go:708] disk space is not compliant
W0319 18:53:14.455147  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:53:14.456466  543705 disk_worker.go:494] system disk:vda1
I0319 18:53:14.456510  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:53:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:53:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:53:20.525321  543705 disk_info.go:125] begin check local disk info of client
I0319 18:53:20.527851  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:53:20.527858  543705 disk_info.go:196] parse disk info done, disk is : [0xc000579980 0xc0005799c0]
E0319 18:53:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:53:23.409817  543705 memory.go:184] no items to output this cycle
I0319 18:53:23.409826  543705 cpu.go:275] no items to output this cycle
E0319 18:53:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:53:33.409794  543705 memory.go:184] no items to output this cycle
I0319 18:53:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 18:53:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:53:43.409783  543705 memory.go:191] Add success.
I0319 18:53:43.409807  543705 cpu.go:282] Add success.
I0319 18:53:43.420420  543705 net.go:648] Add success.
I0319 18:53:43.423338  543705 net.go:770] primary dev: ETH0
I0319 18:53:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:53:43.423367  543705 net.go:698] Add success.
I0319 18:53:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:53:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:53:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:53:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:53:53.409781  543705 memory.go:184] no items to output this cycle
I0319 18:53:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 18:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:54:03.409774  543705 memory.go:184] no items to output this cycle
I0319 18:54:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 18:54:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:54:13.409820  543705 memory.go:191] Add success.
I0319 18:54:13.409825  543705 cpu.go:282] Add success.
W0319 18:54:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:54:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:54:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:54:13.420099  543705 net.go:648] Add success.
I0319 18:54:13.422757  543705 net.go:770] primary dev: ETH0
I0319 18:54:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:54:13.422787  543705 net.go:698] Add success.
I0319 18:54:13.516372  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"14255c05-6d02-417d-b03e-568b5f5bddc5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:54:13.516405  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 18:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:54:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:54:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 18:54:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:54:14.456694  543705 disk_worker.go:494] system disk:vda1
I0319 18:54:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:54:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:54:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:54:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:54:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:54:20.527940  543705 disk_info.go:125] begin check local disk info of client
I0319 18:54:20.530410  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:54:20.530416  543705 disk_info.go:196] parse disk info done, disk is : [0xc000579640 0xc000579680]
E0319 18:54:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:54:23.409804  543705 memory.go:184] no items to output this cycle
I0319 18:54:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 18:54:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:54:33.409767  543705 memory.go:184] no items to output this cycle
I0319 18:54:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 18:54:37.815782  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:54:37.815788  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:54:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:54:43.411068  543705 memory.go:191] Add success.
I0319 18:54:43.409819  543705 cpu.go:282] Add success.
I0319 18:54:43.419756  543705 net.go:648] Add success.
I0319 18:54:43.422579  543705 net.go:770] primary dev: ETH0
I0319 18:54:43.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:54:43.422602  543705 net.go:698] Add success.
I0319 18:54:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:54:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:54:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:54:53.409817  543705 memory.go:184] no items to output this cycle
I0319 18:54:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 18:55:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:55:03.409780  543705 memory.go:184] no items to output this cycle
I0319 18:55:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 18:55:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:55:13.409797  543705 memory.go:191] Add success.
I0319 18:55:13.409799  543705 cpu.go:282] Add success.
W0319 18:55:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:55:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:55:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:55:13.420248  543705 net.go:648] Add success.
I0319 18:55:13.422983  543705 net.go:770] primary dev: ETH0
I0319 18:55:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:55:13.423012  543705 net.go:698] Add success.
I0319 18:55:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:55:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:55:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0319 18:55:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:55:14.456484  543705 disk_worker.go:494] system disk:vda1
I0319 18:55:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:55:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:55:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:55:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:55:20.531348  543705 disk_info.go:125] begin check local disk info of client
I0319 18:55:20.533792  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:55:20.533798  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8500 0xc0004a8540]
E0319 18:55:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:55:23.409814  543705 memory.go:184] no items to output this cycle
I0319 18:55:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 18:55:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:55:33.409778  543705 memory.go:184] no items to output this cycle
I0319 18:55:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 18:55:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:55:43.409780  543705 memory.go:191] Add success.
I0319 18:55:43.409812  543705 cpu.go:282] Add success.
I0319 18:55:43.420161  543705 net.go:648] Add success.
I0319 18:55:43.422989  543705 net.go:770] primary dev: ETH0
I0319 18:55:43.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:55:43.423020  543705 net.go:698] Add success.
I0319 18:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:55:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:55:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:55:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:55:53.409787  543705 memory.go:184] no items to output this cycle
I0319 18:55:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 18:56:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:56:03.409804  543705 memory.go:184] no items to output this cycle
I0319 18:56:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 18:56:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:56:13.409818  543705 memory.go:191] Add success.
I0319 18:56:13.409830  543705 cpu.go:282] Add success.
W0319 18:56:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:56:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:56:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:56:13.420058  543705 net.go:648] Add success.
I0319 18:56:13.422773  543705 net.go:770] primary dev: ETH0
I0319 18:56:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:56:13.422798  543705 net.go:698] Add success.
I0319 18:56:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:56:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:56:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 18:56:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:56:14.456509  543705 disk_worker.go:494] system disk:vda1
I0319 18:56:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:56:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:56:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:56:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:56:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:56:20.534304  543705 disk_info.go:125] begin check local disk info of client
I0319 18:56:20.536717  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:56:20.536723  543705 disk_info.go:196] parse disk info done, disk is : [0xc000578480 0xc0005784c0]
E0319 18:56:23.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:56:23.409822  543705 memory.go:184] no items to output this cycle
I0319 18:56:23.409827  543705 cpu.go:275] no items to output this cycle
E0319 18:56:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:56:33.409792  543705 memory.go:184] no items to output this cycle
I0319 18:56:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 18:56:43.409951  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:56:43.409980  543705 cpu.go:282] Add success.
I0319 18:56:43.409982  543705 memory.go:191] Add success.
I0319 18:56:43.419717  543705 net.go:648] Add success.
I0319 18:56:43.422825  543705 net.go:770] primary dev: ETH0
I0319 18:56:43.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:56:43.422850  543705 net.go:698] Add success.
I0319 18:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:56:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:56:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:56:53.409788  543705 memory.go:184] no items to output this cycle
I0319 18:56:53.409824  543705 cpu.go:275] no items to output this cycle
E0319 18:57:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:57:03.409792  543705 memory.go:184] no items to output this cycle
I0319 18:57:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 18:57:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:57:13.409796  543705 cpu.go:282] Add success.
I0319 18:57:13.409802  543705 memory.go:191] Add success.
W0319 18:57:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:57:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:57:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:57:13.420071  543705 net.go:648] Add success.
I0319 18:57:13.428982  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 18:57:13.429055  543705 net.go:770] primary dev: ETH0
I0319 18:57:13.429067  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:57:13.429080  543705 net.go:698] Add success.
I0319 18:57:13.453620  543705 event_worker.go:152] Polling the log file for events...
I0319 18:57:13.463869  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e7587f43-aa93-4b4e-817b-68dd9cbf4962","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 18:57:13.463906  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 18:57:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:57:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 18:57:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0319 18:57:14.455886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 18:57:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 18:57:14.455899  543705 custom_config.go:64] query custom config with name: gpu
I0319 18:57:14.456625  543705 disk_worker.go:494] system disk:vda1
I0319 18:57:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 18:57:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 18:57:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:57:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 18:57:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 18:57:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:57:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:57:16.472332  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:57:20.537321  543705 disk_info.go:125] begin check local disk info of client
I0319 18:57:20.539690  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:57:20.539696  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001bc4c0 0xc0001bc500]
E0319 18:57:23.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:57:23.409885  543705 memory.go:184] no items to output this cycle
I0319 18:57:23.409891  543705 cpu.go:275] no items to output this cycle
E0319 18:57:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:57:33.409782  543705 memory.go:184] no items to output this cycle
I0319 18:57:33.409805  543705 cpu.go:275] no items to output this cycle
I0319 18:57:37.817733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 18:57:37.817739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 18:57:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:57:43.410657  543705 memory.go:191] Add success.
I0319 18:57:43.409819  543705 cpu.go:282] Add success.
I0319 18:57:43.420398  543705 net.go:648] Add success.
I0319 18:57:43.423157  543705 net.go:770] primary dev: ETH0
I0319 18:57:43.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:57:43.423187  543705 net.go:698] Add success.
I0319 18:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:57:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:57:53.409769  543705 memory.go:184] no items to output this cycle
I0319 18:57:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 18:58:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:58:03.409781  543705 memory.go:184] no items to output this cycle
I0319 18:58:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 18:58:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:58:13.409789  543705 memory.go:191] Add success.
I0319 18:58:13.409790  543705 cpu.go:282] Add success.
W0319 18:58:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:58:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:58:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:58:13.420070  543705 net.go:648] Add success.
I0319 18:58:13.423306  543705 net.go:770] primary dev: ETH0
I0319 18:58:13.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:58:13.423335  543705 net.go:698] Add success.
I0319 18:58:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:58:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:58:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0319 18:58:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:58:14.456802  543705 disk_worker.go:494] system disk:vda1
I0319 18:58:14.456845  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:58:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:58:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:58:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:58:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:58:20.539779  543705 disk_info.go:125] begin check local disk info of client
I0319 18:58:20.542353  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:58:20.542360  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e240 0xc00034e280]
E0319 18:58:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:58:23.409893  543705 cpu.go:275] no items to output this cycle
I0319 18:58:23.409916  543705 memory.go:184] no items to output this cycle
E0319 18:58:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:58:33.409778  543705 memory.go:184] no items to output this cycle
I0319 18:58:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 18:58:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:58:43.409785  543705 memory.go:191] Add success.
I0319 18:58:43.409811  543705 cpu.go:282] Add success.
I0319 18:58:43.420056  543705 net.go:648] Add success.
I0319 18:58:43.422796  543705 net.go:770] primary dev: ETH0
I0319 18:58:43.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:58:43.422822  543705 net.go:698] Add success.
I0319 18:58:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:58:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:58:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:58:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:58:53.409805  543705 memory.go:184] no items to output this cycle
I0319 18:58:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 18:59:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:59:03.409772  543705 memory.go:184] no items to output this cycle
I0319 18:59:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 18:59:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:59:13.409808  543705 memory.go:191] Add success.
I0319 18:59:13.409816  543705 cpu.go:282] Add success.
W0319 18:59:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 18:59:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 18:59:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 18:59:13.420097  543705 net.go:648] Add success.
I0319 18:59:13.422815  543705 net.go:770] primary dev: ETH0
I0319 18:59:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:59:13.422841  543705 net.go:698] Add success.
I0319 18:59:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 18:59:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 18:59:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0319 18:59:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0319 18:59:14.456502  543705 disk_worker.go:494] system disk:vda1
I0319 18:59:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 18:59:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 18:59:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 18:59:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 18:59:20.543399  543705 disk_info.go:125] begin check local disk info of client
I0319 18:59:20.545895  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 18:59:20.545907  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
E0319 18:59:23.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:59:23.409899  543705 cpu.go:275] no items to output this cycle
I0319 18:59:23.409922  543705 memory.go:184] no items to output this cycle
E0319 18:59:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:59:33.409784  543705 cpu.go:275] no items to output this cycle
I0319 18:59:33.409794  543705 memory.go:184] no items to output this cycle
E0319 18:59:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:59:43.409790  543705 memory.go:191] Add success.
I0319 18:59:43.409792  543705 cpu.go:282] Add success.
I0319 18:59:43.420027  543705 net.go:648] Add success.
I0319 18:59:43.422783  543705 net.go:770] primary dev: ETH0
I0319 18:59:43.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0319 18:59:43.422809  543705 net.go:698] Add success.
I0319 18:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 18:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 18:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 18:59:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 18:59:53.409781  543705 memory.go:184] no items to output this cycle
I0319 18:59:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:00:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:00:03.409775  543705 memory.go:184] no items to output this cycle
I0319 19:00:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 19:00:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:00:13.409790  543705 memory.go:191] Add success.
I0319 19:00:13.409797  543705 cpu.go:282] Add success.
W0319 19:00:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:00:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:00:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:00:13.420132  543705 net.go:648] Add success.
I0319 19:00:13.423371  543705 net.go:770] primary dev: ETH0
I0319 19:00:13.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:00:13.423401  543705 net.go:698] Add success.
I0319 19:00:13.501373  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6884e0ed-abff-4620-9a57-ee0ddf245f82","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:00:13.501406  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:00:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:00:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:00:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0319 19:00:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:00:14.456742  543705 disk_worker.go:494] system disk:vda1
I0319 19:00:14.456778  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:00:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:00:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:00:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:00:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:00:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:00:20.545994  543705 disk_info.go:125] begin check local disk info of client
I0319 19:00:20.548554  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:00:20.548560  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003426c0 0xc000342700]
E0319 19:00:23.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:00:23.409906  543705 cpu.go:275] no items to output this cycle
I0319 19:00:23.409909  543705 memory.go:184] no items to output this cycle
E0319 19:00:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:00:33.409805  543705 memory.go:184] no items to output this cycle
I0319 19:00:33.409820  543705 cpu.go:275] no items to output this cycle
I0319 19:00:37.817888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:00:37.817894  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:00:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:00:43.410827  543705 memory.go:191] Add success.
I0319 19:00:43.409830  543705 cpu.go:282] Add success.
I0319 19:00:43.420557  543705 net.go:648] Add success.
I0319 19:00:43.423315  543705 net.go:770] primary dev: ETH0
I0319 19:00:43.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:00:43.423342  543705 net.go:698] Add success.
I0319 19:00:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:00:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:00:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:00:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:00:53.409785  543705 memory.go:184] no items to output this cycle
I0319 19:00:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 19:01:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:01:03.409803  543705 memory.go:184] no items to output this cycle
I0319 19:01:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 19:01:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:01:13.409776  543705 memory.go:191] Add success.
W0319 19:01:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 19:01:13.409809  543705 cpu.go:282] Add success.
W0319 19:01:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:01:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:01:13.420035  543705 net.go:648] Add success.
I0319 19:01:13.422991  543705 net.go:770] primary dev: ETH0
I0319 19:01:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:01:13.423017  543705 net.go:698] Add success.
I0319 19:01:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:01:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:01:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 19:01:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:01:14.456533  543705 disk_worker.go:494] system disk:vda1
I0319 19:01:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:01:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:01:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:01:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:01:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:01:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:01:20.549444  543705 disk_info.go:125] begin check local disk info of client
I0319 19:01:20.551862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:01:20.551868  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2000 0xc0003e2040]
E0319 19:01:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:01:23.409822  543705 memory.go:184] no items to output this cycle
I0319 19:01:23.409825  543705 cpu.go:275] no items to output this cycle
E0319 19:01:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:01:33.409765  543705 memory.go:184] no items to output this cycle
I0319 19:01:33.409811  543705 cpu.go:275] no items to output this cycle
E0319 19:01:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:01:43.409788  543705 memory.go:191] Add success.
I0319 19:01:43.409805  543705 cpu.go:282] Add success.
I0319 19:01:43.419967  543705 net.go:648] Add success.
I0319 19:01:43.422883  543705 net.go:770] primary dev: ETH0
I0319 19:01:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:01:43.422909  543705 net.go:698] Add success.
I0319 19:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:01:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:01:53.409774  543705 memory.go:184] no items to output this cycle
I0319 19:01:53.409793  543705 cpu.go:275] no items to output this cycle
E0319 19:02:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:02:03.409809  543705 memory.go:184] no items to output this cycle
I0319 19:02:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 19:02:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:02:13.409773  543705 memory.go:191] Add success.
W0319 19:02:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 19:02:13.409800  543705 cpu.go:282] Add success.
W0319 19:02:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:02:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:02:13.420058  543705 net.go:648] Add success.
I0319 19:02:13.422806  543705 net.go:770] primary dev: ETH0
I0319 19:02:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:02:13.422838  543705 net.go:698] Add success.
W0319 19:02:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:02:14.455261  543705 disk_worker.go:708] disk space is not compliant
W0319 19:02:14.455266  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:02:14.455933  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:02:14.455943  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:02:14.455949  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:02:14.456853  543705 disk_worker.go:494] system disk:vda1
I0319 19:02:14.456884  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:02:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:02:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:02:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:02:16.457992  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:02:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:02:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:02:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:02:20.552397  543705 disk_info.go:125] begin check local disk info of client
I0319 19:02:20.554774  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:02:20.554780  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354ec0 0xc000354f00]
E0319 19:02:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:02:23.409801  543705 memory.go:184] no items to output this cycle
I0319 19:02:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:02:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:02:33.409808  543705 memory.go:184] no items to output this cycle
I0319 19:02:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 19:02:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:02:43.409787  543705 memory.go:191] Add success.
I0319 19:02:43.409806  543705 cpu.go:282] Add success.
I0319 19:02:43.419887  543705 net.go:648] Add success.
I0319 19:02:43.422573  543705 net.go:770] primary dev: ETH0
I0319 19:02:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:02:43.422599  543705 net.go:698] Add success.
I0319 19:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:02:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:02:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:02:53.409810  543705 memory.go:184] no items to output this cycle
I0319 19:02:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 19:03:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:03:03.409802  543705 memory.go:184] no items to output this cycle
I0319 19:03:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 19:03:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:03:13.409782  543705 memory.go:191] Add success.
W0319 19:03:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:03:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:03:13.409832  543705 cpu.go:282] Add success.
I0319 19:03:13.420039  543705 net.go:648] Add success.
I0319 19:03:13.422524  543705 net.go:770] primary dev: ETH0
I0319 19:03:13.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:03:13.422550  543705 net.go:698] Add success.
I0319 19:03:13.463891  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c238f65-6425-49f7-9f84-1ad25baba171","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:03:13.463925  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:03:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:03:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:03:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 19:03:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:03:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 19:03:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:03:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:03:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:03:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:03:16.472513  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:03:20.554862  543705 disk_info.go:125] begin check local disk info of client
I0319 19:03:20.557470  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:03:20.557477  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002903c0 0xc000290400]
I0319 19:03:23.409943  543705 cpu.go:275] no items to output this cycle
E0319 19:03:23.409944  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:03:23.409971  543705 memory.go:184] no items to output this cycle
E0319 19:03:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:03:33.409790  543705 memory.go:184] no items to output this cycle
I0319 19:03:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 19:03:37.819790  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:03:37.819797  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:03:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:03:43.411147  543705 memory.go:191] Add success.
I0319 19:03:43.409814  543705 cpu.go:282] Add success.
I0319 19:03:43.419870  543705 net.go:648] Add success.
I0319 19:03:43.422794  543705 net.go:770] primary dev: ETH0
I0319 19:03:43.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:03:43.422835  543705 net.go:698] Add success.
I0319 19:03:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:03:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:03:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:03:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:03:53.409783  543705 cpu.go:275] no items to output this cycle
I0319 19:03:53.409785  543705 memory.go:184] no items to output this cycle
E0319 19:04:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:04:03.409791  543705 memory.go:184] no items to output this cycle
I0319 19:04:03.409792  543705 cpu.go:275] no items to output this cycle
W0319 19:04:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:04:13.409728  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:04:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:04:13.409787  543705 cpu.go:282] Add success.
E0319 19:04:13.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:04:13.409852  543705 memory.go:191] Add success.
I0319 19:04:13.420052  543705 net.go:648] Add success.
I0319 19:04:13.423383  543705 net.go:770] primary dev: ETH0
I0319 19:04:13.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:04:13.423414  543705 net.go:698] Add success.
I0319 19:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:04:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:04:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0319 19:04:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:04:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 19:04:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:04:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:04:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:04:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:04:20.558482  543705 disk_info.go:125] begin check local disk info of client
I0319 19:04:20.560992  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:04:20.560998  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7000 0xc0003e7040]
E0319 19:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:04:23.409784  543705 memory.go:184] no items to output this cycle
I0319 19:04:23.409963  543705 cpu.go:275] no items to output this cycle
E0319 19:04:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:04:33.409793  543705 memory.go:184] no items to output this cycle
I0319 19:04:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 19:04:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:04:43.409795  543705 cpu.go:282] Add success.
I0319 19:04:43.409804  543705 memory.go:191] Add success.
I0319 19:04:43.419858  543705 net.go:648] Add success.
I0319 19:04:43.422855  543705 net.go:770] primary dev: ETH0
I0319 19:04:43.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:04:43.422880  543705 net.go:698] Add success.
I0319 19:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:04:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:04:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:04:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:04:53.409784  543705 memory.go:184] no items to output this cycle
I0319 19:04:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 19:05:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:05:03.409786  543705 memory.go:184] no items to output this cycle
I0319 19:05:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 19:05:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:05:13.409804  543705 memory.go:191] Add success.
I0319 19:05:13.409818  543705 cpu.go:282] Add success.
W0319 19:05:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:05:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:05:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:05:13.420128  543705 net.go:648] Add success.
I0319 19:05:13.422857  543705 net.go:770] primary dev: ETH0
I0319 19:05:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:05:13.422881  543705 net.go:698] Add success.
I0319 19:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:05:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:05:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 19:05:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:05:14.456553  543705 disk_worker.go:494] system disk:vda1
I0319 19:05:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:05:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:05:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:05:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:05:20.561084  543705 disk_info.go:125] begin check local disk info of client
I0319 19:05:20.563549  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:05:20.563555  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e8f00 0xc0004e8f40]
I0319 19:05:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 19:05:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:05:23.409810  543705 memory.go:184] no items to output this cycle
E0319 19:05:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:05:33.409792  543705 memory.go:184] no items to output this cycle
I0319 19:05:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:05:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:05:43.409803  543705 memory.go:191] Add success.
I0319 19:05:43.409804  543705 cpu.go:282] Add success.
I0319 19:05:43.419890  543705 net.go:648] Add success.
I0319 19:05:43.422719  543705 net.go:770] primary dev: ETH0
I0319 19:05:43.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:05:43.422745  543705 net.go:698] Add success.
I0319 19:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:05:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:05:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:05:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:05:53.409777  543705 memory.go:184] no items to output this cycle
I0319 19:05:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 19:06:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:06:03.409786  543705 memory.go:184] no items to output this cycle
I0319 19:06:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:06:13.410507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:06:13.410537  543705 memory.go:191] Add success.
I0319 19:06:13.410552  543705 cpu.go:282] Add success.
W0319 19:06:13.410565  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:06:13.410578  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:06:13.410582  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:06:13.419655  543705 net.go:770] primary dev: ETH0
I0319 19:06:13.419669  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:06:13.419681  543705 net.go:698] Add success.
I0319 19:06:13.420028  543705 net.go:648] Add success.
I0319 19:06:13.474203  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e327f2aa-5e67-42fd-9a14-c5ecd5092d08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:06:13.474236  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:06:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:06:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 19:06:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:06:14.456692  543705 disk_worker.go:494] system disk:vda1
I0319 19:06:14.456728  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:06:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:06:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:06:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:06:20.564524  543705 disk_info.go:125] begin check local disk info of client
I0319 19:06:20.566954  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:06:20.566960  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e8640 0xc0004e8680]
E0319 19:06:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:06:23.409807  543705 cpu.go:275] no items to output this cycle
I0319 19:06:23.409977  543705 memory.go:184] no items to output this cycle
E0319 19:06:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:06:33.409793  543705 memory.go:184] no items to output this cycle
I0319 19:06:33.409802  543705 cpu.go:275] no items to output this cycle
I0319 19:06:37.821742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:06:37.821749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:06:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:06:43.410742  543705 memory.go:191] Add success.
I0319 19:06:43.409800  543705 cpu.go:282] Add success.
I0319 19:06:43.420444  543705 net.go:648] Add success.
I0319 19:06:43.422892  543705 net.go:770] primary dev: ETH0
I0319 19:06:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:06:43.422921  543705 net.go:698] Add success.
I0319 19:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:06:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:06:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:06:53.409785  543705 memory.go:184] no items to output this cycle
I0319 19:06:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 19:07:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:07:03.409779  543705 cpu.go:275] no items to output this cycle
I0319 19:07:03.409780  543705 memory.go:184] no items to output this cycle
E0319 19:07:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:07:13.409792  543705 memory.go:191] Add success.
I0319 19:07:13.409795  543705 cpu.go:282] Add success.
W0319 19:07:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:07:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:07:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:07:13.420112  543705 net.go:648] Add success.
I0319 19:07:13.422750  543705 net.go:770] primary dev: ETH0
I0319 19:07:13.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:07:13.422778  543705 net.go:698] Add success.
I0319 19:07:13.453308  543705 event_worker.go:152] Polling the log file for events...
W0319 19:07:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:07:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 19:07:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:07:14.456963  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:07:14.456973  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:07:14.456979  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:07:14.457043  543705 disk_worker.go:494] system disk:vda1
I0319 19:07:14.457074  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:07:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:07:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:07:16.458032  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:07:16.458040  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:07:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:07:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:07:16.472470  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:07:20.567476  543705 disk_info.go:125] begin check local disk info of client
I0319 19:07:20.569903  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:07:20.569909  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475880 0xc0004758c0]
E0319 19:07:23.409955  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:07:23.410039  543705 memory.go:184] no items to output this cycle
I0319 19:07:23.410053  543705 cpu.go:275] no items to output this cycle
E0319 19:07:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:07:33.409772  543705 memory.go:184] no items to output this cycle
I0319 19:07:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 19:07:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:07:43.409795  543705 memory.go:191] Add success.
I0319 19:07:43.409812  543705 cpu.go:282] Add success.
I0319 19:07:43.419769  543705 net.go:770] primary dev: ETH0
I0319 19:07:43.419782  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:07:43.419795  543705 net.go:698] Add success.
I0319 19:07:43.420042  543705 net.go:648] Add success.
I0319 19:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:07:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:07:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:07:53.409793  543705 memory.go:184] no items to output this cycle
I0319 19:07:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 19:08:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:08:03.409813  543705 memory.go:184] no items to output this cycle
I0319 19:08:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 19:08:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:08:13.409799  543705 cpu.go:282] Add success.
I0319 19:08:13.409802  543705 memory.go:191] Add success.
W0319 19:08:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:08:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:08:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:08:13.420055  543705 net.go:648] Add success.
I0319 19:08:13.422768  543705 net.go:770] primary dev: ETH0
I0319 19:08:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:08:13.422796  543705 net.go:698] Add success.
I0319 19:08:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:08:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:08:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 19:08:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:08:14.456513  543705 disk_worker.go:494] system disk:vda1
I0319 19:08:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:08:16.458240  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:08:16.458303  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:08:16.458328  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:08:16.472686  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:08:20.569996  543705 disk_info.go:125] begin check local disk info of client
I0319 19:08:20.572468  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:08:20.572475  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029c000 0xc00029c040]
E0319 19:08:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:08:23.409796  543705 memory.go:184] no items to output this cycle
I0319 19:08:23.409845  543705 cpu.go:275] no items to output this cycle
E0319 19:08:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:08:33.409796  543705 memory.go:184] no items to output this cycle
I0319 19:08:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 19:08:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:08:43.409801  543705 memory.go:191] Add success.
I0319 19:08:43.409806  543705 cpu.go:282] Add success.
I0319 19:08:43.420043  543705 net.go:648] Add success.
I0319 19:08:43.422846  543705 net.go:770] primary dev: ETH0
I0319 19:08:43.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:08:43.422873  543705 net.go:698] Add success.
I0319 19:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:08:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:08:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:08:53.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:08:53.409843  543705 memory.go:184] no items to output this cycle
I0319 19:08:53.409854  543705 cpu.go:275] no items to output this cycle
E0319 19:09:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:09:03.409792  543705 memory.go:184] no items to output this cycle
I0319 19:09:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 19:09:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:09:13.409841  543705 memory.go:191] Add success.
I0319 19:09:13.409846  543705 cpu.go:282] Add success.
W0319 19:09:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:09:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:09:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:09:13.420164  543705 net.go:648] Add success.
I0319 19:09:13.422804  543705 net.go:770] primary dev: ETH0
I0319 19:09:13.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:09:13.422828  543705 net.go:698] Add success.
I0319 19:09:13.468609  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b47bcb33-be50-4324-bd55-7cb8d15ccbfa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:09:13.468641  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:09:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:09:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:09:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 19:09:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:09:14.456647  543705 disk_worker.go:494] system disk:vda1
I0319 19:09:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:09:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:09:16.457579  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:09:16.457662  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:09:16.457689  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:09:16.473055  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:09:20.573549  543705 disk_info.go:125] begin check local disk info of client
I0319 19:09:20.576016  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:09:20.576022  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be080 0xc0002be0c0]
E0319 19:09:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:09:23.409778  543705 memory.go:184] no items to output this cycle
I0319 19:09:23.409847  543705 cpu.go:275] no items to output this cycle
E0319 19:09:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:09:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 19:09:33.409801  543705 memory.go:184] no items to output this cycle
I0319 19:09:37.821887  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:09:37.821893  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:09:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:09:43.410617  543705 memory.go:191] Add success.
I0319 19:09:43.409810  543705 cpu.go:282] Add success.
I0319 19:09:43.420340  543705 net.go:648] Add success.
I0319 19:09:43.423044  543705 net.go:770] primary dev: ETH0
I0319 19:09:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:09:43.423072  543705 net.go:698] Add success.
I0319 19:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:09:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:09:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:09:53.409783  543705 cpu.go:275] no items to output this cycle
I0319 19:09:53.409785  543705 memory.go:184] no items to output this cycle
E0319 19:10:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:10:03.409819  543705 memory.go:184] no items to output this cycle
I0319 19:10:03.409830  543705 cpu.go:275] no items to output this cycle
E0319 19:10:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:10:13.409798  543705 memory.go:191] Add success.
I0319 19:10:13.409799  543705 cpu.go:282] Add success.
W0319 19:10:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:10:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:10:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:10:13.420073  543705 net.go:648] Add success.
I0319 19:10:13.423068  543705 net.go:770] primary dev: ETH0
I0319 19:10:13.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:10:13.423092  543705 net.go:698] Add success.
I0319 19:10:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:10:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:10:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 19:10:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:10:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 19:10:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:10:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:10:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:10:16.472448  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:10:20.576516  543705 disk_info.go:125] begin check local disk info of client
I0319 19:10:20.579003  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:10:20.579009  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0b80 0xc0002a0bc0]
E0319 19:10:23.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:10:23.409898  543705 memory.go:184] no items to output this cycle
I0319 19:10:23.410045  543705 cpu.go:275] no items to output this cycle
E0319 19:10:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:10:33.409792  543705 memory.go:184] no items to output this cycle
I0319 19:10:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 19:10:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:10:43.409792  543705 memory.go:191] Add success.
I0319 19:10:43.409792  543705 cpu.go:282] Add success.
I0319 19:10:43.420002  543705 net.go:648] Add success.
I0319 19:10:43.423182  543705 net.go:770] primary dev: ETH0
I0319 19:10:43.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:10:43.423211  543705 net.go:698] Add success.
I0319 19:10:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:10:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:10:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:10:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:10:53.409821  543705 memory.go:184] no items to output this cycle
I0319 19:10:53.409831  543705 cpu.go:275] no items to output this cycle
E0319 19:11:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:11:03.409807  543705 memory.go:184] no items to output this cycle
I0319 19:11:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 19:11:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:11:13.409786  543705 memory.go:191] Add success.
I0319 19:11:13.409807  543705 cpu.go:282] Add success.
W0319 19:11:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:11:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:11:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:11:13.420164  543705 net.go:648] Add success.
I0319 19:11:13.423028  543705 net.go:770] primary dev: ETH0
I0319 19:11:13.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:11:13.423053  543705 net.go:698] Add success.
I0319 19:11:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:11:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:11:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 19:11:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:11:14.456633  543705 disk_worker.go:494] system disk:vda1
I0319 19:11:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:11:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:11:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:11:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:11:20.579092  543705 disk_info.go:125] begin check local disk info of client
I0319 19:11:20.581544  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:11:20.581554  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf640 0xc0002bf680]
E0319 19:11:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:11:23.409762  543705 memory.go:184] no items to output this cycle
I0319 19:11:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 19:11:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:11:33.409806  543705 memory.go:184] no items to output this cycle
I0319 19:11:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 19:11:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:11:43.409788  543705 memory.go:191] Add success.
I0319 19:11:43.409805  543705 cpu.go:282] Add success.
I0319 19:11:43.420105  543705 net.go:648] Add success.
I0319 19:11:43.422651  543705 net.go:770] primary dev: ETH0
I0319 19:11:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:11:43.422681  543705 net.go:698] Add success.
I0319 19:11:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:11:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:11:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:11:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:11:53.409811  543705 memory.go:184] no items to output this cycle
I0319 19:11:53.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:12:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:12:03.409782  543705 memory.go:184] no items to output this cycle
I0319 19:12:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:12:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:12:13.409805  543705 memory.go:191] Add success.
I0319 19:12:13.409813  543705 cpu.go:282] Add success.
W0319 19:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:12:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:12:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:12:13.420152  543705 net.go:648] Add success.
I0319 19:12:13.422868  543705 net.go:770] primary dev: ETH0
I0319 19:12:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:12:13.422896  543705 net.go:698] Add success.
I0319 19:12:13.469047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"192f51d1-c95c-4ba0-8546-a9d0508c6517","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:12:13.469080  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 19:12:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:12:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0319 19:12:14.455250  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:12:14.455921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:12:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:12:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:12:14.456836  543705 disk_worker.go:494] system disk:vda1
I0319 19:12:14.456864  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:12:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:12:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 19:12:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:12:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:12:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:12:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:12:16.472357  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:12:20.582591  543705 disk_info.go:125] begin check local disk info of client
I0319 19:12:20.584932  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:12:20.584937  543705 disk_info.go:196] parse disk info done, disk is : [0xc00055af00 0xc00055af40]
E0319 19:12:23.410236  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:12:23.410249  543705 cpu.go:275] no items to output this cycle
I0319 19:12:23.410252  543705 memory.go:184] no items to output this cycle
E0319 19:12:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:12:33.409909  543705 memory.go:184] no items to output this cycle
I0319 19:12:33.409994  543705 cpu.go:275] no items to output this cycle
I0319 19:12:37.822046  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:12:37.822053  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:12:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:12:43.410742  543705 memory.go:191] Add success.
I0319 19:12:43.409812  543705 cpu.go:282] Add success.
I0319 19:12:43.420439  543705 net.go:648] Add success.
I0319 19:12:43.423366  543705 net.go:770] primary dev: ETH0
I0319 19:12:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:12:43.423395  543705 net.go:698] Add success.
I0319 19:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:12:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:12:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:12:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:12:53.409781  543705 cpu.go:275] no items to output this cycle
I0319 19:12:53.409794  543705 memory.go:184] no items to output this cycle
E0319 19:13:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:13:03.409788  543705 cpu.go:275] no items to output this cycle
I0319 19:13:03.409789  543705 memory.go:184] no items to output this cycle
E0319 19:13:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:13:13.409786  543705 memory.go:191] Add success.
I0319 19:13:13.409787  543705 cpu.go:282] Add success.
W0319 19:13:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:13:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:13:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:13:13.420480  543705 net.go:648] Add success.
I0319 19:13:13.423135  543705 net.go:770] primary dev: ETH0
I0319 19:13:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:13:13.423161  543705 net.go:698] Add success.
I0319 19:13:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:13:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:13:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 19:13:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:13:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 19:13:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:13:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:13:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:13:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:13:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:13:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:13:20.585567  543705 disk_info.go:125] begin check local disk info of client
I0319 19:13:20.588041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:13:20.588048  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aab80 0xc0003aabc0]
E0319 19:13:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:13:23.409787  543705 memory.go:184] no items to output this cycle
I0319 19:13:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:13:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:13:33.409782  543705 memory.go:184] no items to output this cycle
I0319 19:13:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:13:43.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:13:43.409883  543705 memory.go:191] Add success.
I0319 19:13:43.409928  543705 cpu.go:282] Add success.
I0319 19:13:43.420088  543705 net.go:648] Add success.
I0319 19:13:43.422830  543705 net.go:770] primary dev: ETH0
I0319 19:13:43.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:13:43.422859  543705 net.go:698] Add success.
I0319 19:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:13:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:13:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:13:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:13:53.409770  543705 memory.go:184] no items to output this cycle
I0319 19:13:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:14:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:14:03.409792  543705 memory.go:184] no items to output this cycle
I0319 19:14:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:14:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:14:13.409790  543705 cpu.go:282] Add success.
I0319 19:14:13.409797  543705 memory.go:191] Add success.
W0319 19:14:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:14:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:14:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:14:13.420115  543705 net.go:648] Add success.
I0319 19:14:13.422850  543705 net.go:770] primary dev: ETH0
I0319 19:14:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:14:13.422878  543705 net.go:698] Add success.
I0319 19:14:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:14:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:14:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 19:14:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:14:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 19:14:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:14:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:14:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:14:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:14:20.588613  543705 disk_info.go:125] begin check local disk info of client
I0319 19:14:20.591116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:14:20.591124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab880 0xc0003ab8c0]
E0319 19:14:23.410256  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:14:23.410274  543705 memory.go:184] no items to output this cycle
I0319 19:14:23.410287  543705 cpu.go:275] no items to output this cycle
E0319 19:14:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:14:33.409795  543705 cpu.go:275] no items to output this cycle
I0319 19:14:33.409797  543705 memory.go:184] no items to output this cycle
E0319 19:14:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:14:43.409782  543705 memory.go:191] Add success.
I0319 19:14:43.409812  543705 cpu.go:282] Add success.
I0319 19:14:43.419939  543705 net.go:648] Add success.
I0319 19:14:43.422821  543705 net.go:770] primary dev: ETH0
I0319 19:14:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:14:43.422846  543705 net.go:698] Add success.
I0319 19:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:14:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:14:53.409773  543705 memory.go:184] no items to output this cycle
I0319 19:14:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 19:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:15:03.409778  543705 memory.go:184] no items to output this cycle
I0319 19:15:03.409779  543705 cpu.go:275] no items to output this cycle
E0319 19:15:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:15:13.409809  543705 memory.go:191] Add success.
I0319 19:15:13.409817  543705 cpu.go:282] Add success.
W0319 19:15:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:15:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:15:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:15:13.420067  543705 net.go:648] Add success.
I0319 19:15:13.422770  543705 net.go:770] primary dev: ETH0
I0319 19:15:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:15:13.422793  543705 net.go:698] Add success.
I0319 19:15:13.469987  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"24a55ab7-a49a-44ee-b44e-8956b1e9f05c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:15:13.470020  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:15:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:15:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:15:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 19:15:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:15:14.456678  543705 disk_worker.go:494] system disk:vda1
I0319 19:15:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:15:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:15:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:15:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:15:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:15:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:15:20.591207  543705 disk_info.go:125] begin check local disk info of client
I0319 19:15:20.593700  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:15:20.593707  543705 disk_info.go:196] parse disk info done, disk is : [0xc00055b940 0xc00055b980]
E0319 19:15:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:15:23.409787  543705 memory.go:184] no items to output this cycle
I0319 19:15:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 19:15:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:15:33.409883  543705 memory.go:184] no items to output this cycle
I0319 19:15:33.409949  543705 cpu.go:275] no items to output this cycle
I0319 19:15:37.823806  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:15:37.823813  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:15:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:15:43.410620  543705 memory.go:191] Add success.
I0319 19:15:43.409804  543705 cpu.go:282] Add success.
I0319 19:15:43.420132  543705 net.go:770] primary dev: ETH0
I0319 19:15:43.420145  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:15:43.420157  543705 net.go:698] Add success.
I0319 19:15:43.420518  543705 net.go:648] Add success.
I0319 19:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:15:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:15:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:15:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:15:53.409800  543705 memory.go:184] no items to output this cycle
I0319 19:15:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 19:16:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:16:03.409782  543705 cpu.go:275] no items to output this cycle
I0319 19:16:03.409785  543705 memory.go:184] no items to output this cycle
E0319 19:16:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:16:13.409814  543705 memory.go:191] Add success.
I0319 19:16:13.409815  543705 cpu.go:282] Add success.
W0319 19:16:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:16:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:16:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:16:13.420172  543705 net.go:648] Add success.
I0319 19:16:13.423519  543705 net.go:770] primary dev: ETH0
I0319 19:16:13.423534  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:16:13.423548  543705 net.go:698] Add success.
I0319 19:16:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:16:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:16:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 19:16:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:16:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 19:16:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:16:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:16:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:16:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:16:16.472422  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:16:20.593789  543705 disk_info.go:125] begin check local disk info of client
I0319 19:16:20.596243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:16:20.596249  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260340 0xc000260380]
E0319 19:16:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:16:23.409781  543705 cpu.go:275] no items to output this cycle
I0319 19:16:23.409787  543705 memory.go:184] no items to output this cycle
E0319 19:16:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:16:33.409804  543705 memory.go:184] no items to output this cycle
I0319 19:16:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 19:16:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:16:43.409913  543705 memory.go:191] Add success.
I0319 19:16:43.410020  543705 cpu.go:282] Add success.
I0319 19:16:43.419748  543705 net.go:648] Add success.
I0319 19:16:43.422578  543705 net.go:770] primary dev: ETH0
I0319 19:16:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:16:43.422602  543705 net.go:698] Add success.
I0319 19:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:16:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:16:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:16:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:16:53.409810  543705 memory.go:184] no items to output this cycle
I0319 19:16:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 19:17:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:17:03.409776  543705 memory.go:184] no items to output this cycle
I0319 19:17:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 19:17:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:17:13.409786  543705 memory.go:191] Add success.
I0319 19:17:13.409791  543705 cpu.go:282] Add success.
W0319 19:17:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:17:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:17:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:17:13.420127  543705 net.go:648] Add success.
I0319 19:17:13.422798  543705 net.go:770] primary dev: ETH0
I0319 19:17:13.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:17:13.422823  543705 net.go:698] Add success.
I0319 19:17:13.453358  543705 event_worker.go:152] Polling the log file for events...
W0319 19:17:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:17:14.455139  543705 disk_worker.go:708] disk space is not compliant
W0319 19:17:14.455142  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:17:14.456868  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:17:14.456877  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:17:14.456884  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:17:14.456956  543705 disk_worker.go:494] system disk:vda1
I0319 19:17:14.456997  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:17:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:17:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:17:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:17:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:17:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:17:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:17:16.472326  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:17:20.596619  543705 disk_info.go:125] begin check local disk info of client
I0319 19:17:20.599047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:17:20.599053  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f5c0 0xc00029f600]
E0319 19:17:23.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:17:23.409819  543705 memory.go:184] no items to output this cycle
I0319 19:17:23.409832  543705 cpu.go:275] no items to output this cycle
E0319 19:17:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:17:33.409785  543705 memory.go:184] no items to output this cycle
I0319 19:17:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 19:17:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:17:43.409794  543705 memory.go:191] Add success.
I0319 19:17:43.409810  543705 cpu.go:282] Add success.
I0319 19:17:43.419900  543705 net.go:648] Add success.
I0319 19:17:43.422517  543705 net.go:770] primary dev: ETH0
I0319 19:17:43.422529  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:17:43.422542  543705 net.go:698] Add success.
I0319 19:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:17:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:17:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:17:53.409777  543705 memory.go:184] no items to output this cycle
I0319 19:17:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 19:18:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:18:03.409811  543705 memory.go:184] no items to output this cycle
I0319 19:18:03.409829  543705 cpu.go:275] no items to output this cycle
E0319 19:18:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:18:13.409808  543705 memory.go:191] Add success.
I0319 19:18:13.409815  543705 cpu.go:282] Add success.
W0319 19:18:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:18:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:18:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:18:13.420117  543705 net.go:648] Add success.
I0319 19:18:13.422807  543705 net.go:770] primary dev: ETH0
I0319 19:18:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:18:13.422834  543705 net.go:698] Add success.
I0319 19:18:13.469252  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c9014a2-1bc3-4abb-bf36-4896aecedc30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:18:13.469286  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:18:14.453933  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:18:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:18:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0319 19:18:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:18:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 19:18:14.456661  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:18:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:18:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:18:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:18:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:18:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:18:20.599640  543705 disk_info.go:125] begin check local disk info of client
I0319 19:18:20.602140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:18:20.602145  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f000 0xc00029f040]
E0319 19:18:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:18:23.409795  543705 memory.go:184] no items to output this cycle
I0319 19:18:23.409810  543705 cpu.go:275] no items to output this cycle
I0319 19:18:33.409914  543705 cpu.go:275] no items to output this cycle
E0319 19:18:33.409962  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:18:33.409974  543705 memory.go:184] no items to output this cycle
I0319 19:18:37.825732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:18:37.825738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:18:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:18:43.410757  543705 memory.go:191] Add success.
I0319 19:18:43.409803  543705 cpu.go:282] Add success.
I0319 19:18:43.420540  543705 net.go:648] Add success.
I0319 19:18:43.423278  543705 net.go:770] primary dev: ETH0
I0319 19:18:43.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:18:43.423303  543705 net.go:698] Add success.
I0319 19:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:18:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:18:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:18:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:18:53.409772  543705 memory.go:184] no items to output this cycle
I0319 19:18:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:19:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:19:03.409781  543705 memory.go:184] no items to output this cycle
I0319 19:19:03.409782  543705 cpu.go:275] no items to output this cycle
E0319 19:19:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:19:13.409815  543705 memory.go:191] Add success.
I0319 19:19:13.409827  543705 cpu.go:282] Add success.
W0319 19:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:19:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:19:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:19:13.420137  543705 net.go:648] Add success.
I0319 19:19:13.422839  543705 net.go:770] primary dev: ETH0
I0319 19:19:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:19:13.422873  543705 net.go:698] Add success.
I0319 19:19:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:19:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:19:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 19:19:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:19:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 19:19:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:19:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:19:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:19:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:19:20.602225  543705 disk_info.go:125] begin check local disk info of client
I0319 19:19:20.604688  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:19:20.604694  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024ad80 0xc00024adc0]
I0319 19:19:23.409776  543705 cpu.go:275] no items to output this cycle
E0319 19:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:19:23.409793  543705 memory.go:184] no items to output this cycle
E0319 19:19:33.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:19:33.409887  543705 memory.go:184] no items to output this cycle
I0319 19:19:33.409948  543705 cpu.go:275] no items to output this cycle
E0319 19:19:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:19:43.409825  543705 memory.go:191] Add success.
I0319 19:19:43.409838  543705 cpu.go:282] Add success.
I0319 19:19:43.419782  543705 net.go:770] primary dev: ETH0
I0319 19:19:43.419797  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:19:43.419812  543705 net.go:698] Add success.
I0319 19:19:43.420166  543705 net.go:648] Add success.
I0319 19:19:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:19:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:19:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:19:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:19:53.409785  543705 memory.go:184] no items to output this cycle
I0319 19:19:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 19:20:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:20:03.409780  543705 memory.go:184] no items to output this cycle
I0319 19:20:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:20:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:20:13.409819  543705 memory.go:191] Add success.
I0319 19:20:13.409832  543705 cpu.go:282] Add success.
W0319 19:20:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:20:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:20:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:20:13.420133  543705 net.go:648] Add success.
I0319 19:20:13.422824  543705 net.go:770] primary dev: ETH0
I0319 19:20:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:20:13.422851  543705 net.go:698] Add success.
I0319 19:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:20:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:20:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 19:20:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:20:14.456572  543705 disk_worker.go:494] system disk:vda1
I0319 19:20:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:20:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:20:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:20:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:20:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:20:20.605671  543705 disk_info.go:125] begin check local disk info of client
I0319 19:20:20.608097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:20:20.608104  543705 disk_info.go:196] parse disk info done, disk is : [0xc000230700 0xc000230740]
E0319 19:20:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:20:23.409821  543705 memory.go:184] no items to output this cycle
I0319 19:20:23.409829  543705 cpu.go:275] no items to output this cycle
E0319 19:20:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:20:33.409797  543705 memory.go:184] no items to output this cycle
I0319 19:20:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:20:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:20:43.409800  543705 memory.go:191] Add success.
I0319 19:20:43.409827  543705 cpu.go:282] Add success.
I0319 19:20:43.419951  543705 net.go:648] Add success.
I0319 19:20:43.423139  543705 net.go:770] primary dev: ETH0
I0319 19:20:43.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:20:43.423165  543705 net.go:698] Add success.
I0319 19:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:20:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:20:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:20:53.409784  543705 memory.go:184] no items to output this cycle
I0319 19:20:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 19:21:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:21:03.409805  543705 memory.go:184] no items to output this cycle
I0319 19:21:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 19:21:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:21:13.409825  543705 memory.go:191] Add success.
I0319 19:21:13.409826  543705 cpu.go:282] Add success.
W0319 19:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:21:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:21:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:21:13.420144  543705 net.go:648] Add success.
I0319 19:21:13.422931  543705 net.go:770] primary dev: ETH0
I0319 19:21:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:21:13.422956  543705 net.go:698] Add success.
I0319 19:21:13.464792  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ee7e12a-779f-4bae-9ad4-33dca58a1862","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:21:13.464828  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:21:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:21:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:21:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0319 19:21:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:21:14.456483  543705 disk_worker.go:494] system disk:vda1
I0319 19:21:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:21:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:21:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:21:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:21:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:21:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:21:20.608673  543705 disk_info.go:125] begin check local disk info of client
I0319 19:21:20.611147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:21:20.611154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f7f00 0xc0001f7f40]
E0319 19:21:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:21:23.409779  543705 memory.go:184] no items to output this cycle
I0319 19:21:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 19:21:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:21:33.409782  543705 memory.go:184] no items to output this cycle
I0319 19:21:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 19:21:37.827850  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:21:37.827856  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:21:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:21:43.410719  543705 memory.go:191] Add success.
I0319 19:21:43.409812  543705 cpu.go:282] Add success.
I0319 19:21:43.420412  543705 net.go:648] Add success.
I0319 19:21:43.423093  543705 net.go:770] primary dev: ETH0
I0319 19:21:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:21:43.423123  543705 net.go:698] Add success.
I0319 19:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:21:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:21:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:21:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:21:53.409799  543705 memory.go:184] no items to output this cycle
I0319 19:21:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 19:22:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:22:03.409769  543705 memory.go:184] no items to output this cycle
I0319 19:22:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 19:22:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:22:13.409811  543705 memory.go:191] Add success.
I0319 19:22:13.409822  543705 cpu.go:282] Add success.
W0319 19:22:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:22:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:22:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:22:13.420672  543705 net.go:648] Add success.
I0319 19:22:13.423681  543705 net.go:770] primary dev: ETH0
I0319 19:22:13.423695  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:22:13.423710  543705 net.go:698] Add success.
W0319 19:22:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:22:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 19:22:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:22:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:22:14.456951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:22:14.456957  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:22:14.457016  543705 disk_worker.go:494] system disk:vda1
I0319 19:22:14.457047  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:22:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:22:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:22:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:22:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:22:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:22:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:22:16.472316  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:22:20.611238  543705 disk_info.go:125] begin check local disk info of client
I0319 19:22:20.613707  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:22:20.613715  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005786c0 0xc000578700]
E0319 19:22:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:22:23.409770  543705 memory.go:184] no items to output this cycle
I0319 19:22:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 19:22:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:22:33.409789  543705 memory.go:184] no items to output this cycle
I0319 19:22:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 19:22:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:22:43.409786  543705 memory.go:191] Add success.
I0319 19:22:43.409812  543705 cpu.go:282] Add success.
I0319 19:22:43.419970  543705 net.go:648] Add success.
I0319 19:22:43.422575  543705 net.go:770] primary dev: ETH0
I0319 19:22:43.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:22:43.422600  543705 net.go:698] Add success.
I0319 19:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:22:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:22:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:22:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:22:53.409810  543705 memory.go:184] no items to output this cycle
I0319 19:22:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 19:23:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:23:03.409804  543705 memory.go:184] no items to output this cycle
I0319 19:23:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 19:23:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:23:13.409781  543705 memory.go:191] Add success.
I0319 19:23:13.409804  543705 cpu.go:282] Add success.
W0319 19:23:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:23:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:23:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:23:13.420136  543705 net.go:648] Add success.
I0319 19:23:13.422996  543705 net.go:770] primary dev: ETH0
I0319 19:23:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:23:13.423021  543705 net.go:698] Add success.
I0319 19:23:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:23:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:23:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 19:23:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:23:14.456507  543705 disk_worker.go:494] system disk:vda1
I0319 19:23:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:23:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:23:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:23:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:23:16.472459  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:23:20.613801  543705 disk_info.go:125] begin check local disk info of client
I0319 19:23:20.616272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:23:20.616279  543705 disk_info.go:196] parse disk info done, disk is : [0xc000271180 0xc0002711c0]
E0319 19:23:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:23:23.409798  543705 memory.go:184] no items to output this cycle
I0319 19:23:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:23:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:23:33.409773  543705 memory.go:184] no items to output this cycle
I0319 19:23:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 19:23:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:23:43.409799  543705 memory.go:191] Add success.
I0319 19:23:43.409814  543705 cpu.go:282] Add success.
I0319 19:23:43.420084  543705 net.go:648] Add success.
I0319 19:23:43.422662  543705 net.go:770] primary dev: ETH0
I0319 19:23:43.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:23:43.422688  543705 net.go:698] Add success.
I0319 19:23:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:23:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:23:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:23:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:23:53.409809  543705 memory.go:184] no items to output this cycle
I0319 19:23:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 19:24:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:24:03.409782  543705 memory.go:184] no items to output this cycle
I0319 19:24:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 19:24:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:24:13.409788  543705 memory.go:191] Add success.
I0319 19:24:13.409796  543705 cpu.go:282] Add success.
W0319 19:24:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:24:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:24:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:24:13.420070  543705 net.go:648] Add success.
I0319 19:24:13.422899  543705 net.go:770] primary dev: ETH0
I0319 19:24:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:24:13.422924  543705 net.go:698] Add success.
I0319 19:24:13.468266  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"88c117f4-2869-4bc9-a261-88e5296e8666","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:24:13.468301  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:24:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:24:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:24:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 19:24:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:24:14.456544  543705 disk_worker.go:494] system disk:vda1
I0319 19:24:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:24:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:24:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:24:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:24:20.616725  543705 disk_info.go:125] begin check local disk info of client
I0319 19:24:20.619335  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:24:20.619341  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6f80 0xc0003b6fc0]
E0319 19:24:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:24:23.409767  543705 memory.go:184] no items to output this cycle
I0319 19:24:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 19:24:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:24:33.409780  543705 memory.go:184] no items to output this cycle
I0319 19:24:33.409817  543705 cpu.go:275] no items to output this cycle
I0319 19:24:37.829740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:24:37.829747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:24:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:24:43.410668  543705 memory.go:191] Add success.
I0319 19:24:43.409809  543705 cpu.go:282] Add success.
I0319 19:24:43.420382  543705 net.go:648] Add success.
I0319 19:24:43.423047  543705 net.go:770] primary dev: ETH0
I0319 19:24:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:24:43.423084  543705 net.go:698] Add success.
I0319 19:24:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:24:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:24:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:24:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:24:53.409787  543705 memory.go:184] no items to output this cycle
I0319 19:24:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 19:25:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:25:03.409797  543705 memory.go:184] no items to output this cycle
I0319 19:25:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 19:25:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:25:13.409795  543705 memory.go:191] Add success.
I0319 19:25:13.409798  543705 cpu.go:282] Add success.
W0319 19:25:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:25:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:25:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:25:13.420147  543705 net.go:648] Add success.
I0319 19:25:13.422902  543705 net.go:770] primary dev: ETH0
I0319 19:25:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:25:13.422927  543705 net.go:698] Add success.
I0319 19:25:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:25:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:25:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0319 19:25:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:25:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 19:25:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:25:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:25:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:25:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:25:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:25:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:25:20.619735  543705 disk_info.go:125] begin check local disk info of client
I0319 19:25:20.622221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:25:20.622228  543705 disk_info.go:196] parse disk info done, disk is : [0xc000250100 0xc000250140]
E0319 19:25:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:25:23.409803  543705 memory.go:184] no items to output this cycle
I0319 19:25:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 19:25:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:25:33.409777  543705 memory.go:184] no items to output this cycle
I0319 19:25:33.409899  543705 cpu.go:275] no items to output this cycle
E0319 19:25:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:25:43.409801  543705 memory.go:191] Add success.
I0319 19:25:43.409801  543705 cpu.go:282] Add success.
I0319 19:25:43.419984  543705 net.go:648] Add success.
I0319 19:25:43.422655  543705 net.go:770] primary dev: ETH0
I0319 19:25:43.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:25:43.422683  543705 net.go:698] Add success.
I0319 19:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:25:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:25:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:25:53.409764  543705 memory.go:184] no items to output this cycle
I0319 19:25:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 19:26:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:26:03.409779  543705 memory.go:184] no items to output this cycle
I0319 19:26:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 19:26:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:26:13.409816  543705 memory.go:191] Add success.
I0319 19:26:13.409828  543705 cpu.go:282] Add success.
W0319 19:26:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:26:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:26:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:26:13.420167  543705 net.go:648] Add success.
I0319 19:26:13.422888  543705 net.go:770] primary dev: ETH0
I0319 19:26:13.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:26:13.422912  543705 net.go:698] Add success.
I0319 19:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:26:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:26:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 19:26:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:26:14.456507  543705 disk_worker.go:494] system disk:vda1
I0319 19:26:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:26:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:26:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:26:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:26:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:26:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:26:20.622754  543705 disk_info.go:125] begin check local disk info of client
I0319 19:26:20.625265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:26:20.625271  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024fc40 0xc00024fc80]
E0319 19:26:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:26:23.409791  543705 memory.go:184] no items to output this cycle
I0319 19:26:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 19:26:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:26:33.409808  543705 memory.go:184] no items to output this cycle
I0319 19:26:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:26:43.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:26:43.409901  543705 memory.go:191] Add success.
I0319 19:26:43.409947  543705 cpu.go:282] Add success.
I0319 19:26:43.419726  543705 net.go:648] Add success.
I0319 19:26:43.422558  543705 net.go:770] primary dev: ETH0
I0319 19:26:43.422573  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:26:43.422587  543705 net.go:698] Add success.
I0319 19:26:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:26:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:26:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:26:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:26:53.409790  543705 memory.go:184] no items to output this cycle
I0319 19:26:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 19:27:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:27:03.409803  543705 memory.go:184] no items to output this cycle
I0319 19:27:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:27:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:27:13.409795  543705 memory.go:191] Add success.
I0319 19:27:13.409807  543705 cpu.go:282] Add success.
W0319 19:27:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:27:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:27:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:27:13.419947  543705 net.go:770] primary dev: ETH0
I0319 19:27:13.419960  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:27:13.419972  543705 net.go:698] Add success.
I0319 19:27:13.420359  543705 net.go:648] Add success.
I0319 19:27:13.429808  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 19:27:13.452979  543705 event_worker.go:152] Polling the log file for events...
I0319 19:27:13.468636  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"910bfc41-758e-46d5-aa69-c65da3b2cb59","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:27:13.468671  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 19:27:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:27:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 19:27:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:27:14.456111  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:27:14.456120  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:27:14.456126  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:27:14.456518  543705 disk_worker.go:494] system disk:vda1
I0319 19:27:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:27:15.456562  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:27:15.456576  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:27:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:27:16.458005  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:27:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:27:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:27:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:27:20.625673  543705 disk_info.go:125] begin check local disk info of client
I0319 19:27:20.628175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:27:20.628182  543705 disk_info.go:196] parse disk info done, disk is : [0xc000292d80 0xc000292dc0]
E0319 19:27:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:27:23.409780  543705 memory.go:184] no items to output this cycle
I0319 19:27:23.409781  543705 cpu.go:275] no items to output this cycle
E0319 19:27:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:27:33.409777  543705 memory.go:184] no items to output this cycle
I0319 19:27:33.409806  543705 cpu.go:275] no items to output this cycle
I0319 19:27:37.829889  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:27:37.829896  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0319 19:27:43.409918  543705 cpu.go:282] Add success.
E0319 19:27:43.409940  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:27:43.410761  543705 memory.go:191] Add success.
I0319 19:27:43.419717  543705 net.go:648] Add success.
I0319 19:27:43.422563  543705 net.go:770] primary dev: ETH0
I0319 19:27:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:27:43.422604  543705 net.go:698] Add success.
I0319 19:27:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:27:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:27:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:27:53.409767  543705 memory.go:184] no items to output this cycle
I0319 19:27:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 19:28:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:28:03.409780  543705 memory.go:184] no items to output this cycle
I0319 19:28:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:28:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:28:13.409811  543705 memory.go:191] Add success.
I0319 19:28:13.409817  543705 cpu.go:282] Add success.
W0319 19:28:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:28:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:28:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:28:13.420067  543705 net.go:648] Add success.
I0319 19:28:13.422976  543705 net.go:770] primary dev: ETH0
I0319 19:28:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:28:13.423001  543705 net.go:698] Add success.
I0319 19:28:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:28:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:28:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0319 19:28:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:28:14.456632  543705 disk_worker.go:494] system disk:vda1
I0319 19:28:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:28:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:28:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:28:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:28:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:28:16.472514  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:28:20.628265  543705 disk_info.go:125] begin check local disk info of client
I0319 19:28:20.630869  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:28:20.630876  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001cc380 0xc0001cc3c0]
E0319 19:28:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:28:23.409775  543705 cpu.go:275] no items to output this cycle
I0319 19:28:23.409787  543705 memory.go:184] no items to output this cycle
E0319 19:28:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:28:33.409808  543705 memory.go:184] no items to output this cycle
I0319 19:28:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 19:28:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:28:43.409904  543705 memory.go:191] Add success.
I0319 19:28:43.409962  543705 cpu.go:282] Add success.
I0319 19:28:43.419713  543705 net.go:648] Add success.
I0319 19:28:43.422528  543705 net.go:770] primary dev: ETH0
I0319 19:28:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:28:43.422552  543705 net.go:698] Add success.
I0319 19:28:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:28:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:28:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:28:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:28:53.409814  543705 memory.go:184] no items to output this cycle
I0319 19:28:53.409820  543705 cpu.go:275] no items to output this cycle
E0319 19:29:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:29:03.409777  543705 memory.go:184] no items to output this cycle
I0319 19:29:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:29:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:29:13.409814  543705 memory.go:191] Add success.
I0319 19:29:13.409818  543705 cpu.go:282] Add success.
W0319 19:29:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:29:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:29:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:29:13.420058  543705 net.go:648] Add success.
I0319 19:29:13.423160  543705 net.go:770] primary dev: ETH0
I0319 19:29:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:29:13.423187  543705 net.go:698] Add success.
I0319 19:29:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:29:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:29:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 19:29:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:29:14.456505  543705 disk_worker.go:494] system disk:vda1
I0319 19:29:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:29:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:29:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:29:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:29:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:29:20.631794  543705 disk_info.go:125] begin check local disk info of client
I0319 19:29:20.634225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:29:20.634231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f8c00 0xc0001f8c40]
E0319 19:29:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:29:23.409793  543705 memory.go:184] no items to output this cycle
I0319 19:29:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 19:29:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:29:33.409780  543705 memory.go:184] no items to output this cycle
I0319 19:29:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 19:29:43.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:29:43.409897  543705 memory.go:191] Add success.
I0319 19:29:43.409990  543705 cpu.go:282] Add success.
I0319 19:29:43.419551  543705 net.go:770] primary dev: ETH0
I0319 19:29:43.419564  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:29:43.419576  543705 net.go:698] Add success.
I0319 19:29:43.419888  543705 net.go:648] Add success.
I0319 19:29:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:29:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:29:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:29:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:29:53.409772  543705 memory.go:184] no items to output this cycle
I0319 19:29:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 19:30:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:30:03.409777  543705 memory.go:184] no items to output this cycle
I0319 19:30:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:30:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:30:13.409820  543705 memory.go:191] Add success.
I0319 19:30:13.409824  543705 cpu.go:282] Add success.
W0319 19:30:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:30:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:30:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:30:13.420145  543705 net.go:648] Add success.
I0319 19:30:13.422874  543705 net.go:770] primary dev: ETH0
I0319 19:30:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:30:13.422899  543705 net.go:698] Add success.
I0319 19:30:13.469150  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ce4faa5-b1a3-4778-9f74-223254278640","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:30:13.469194  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:30:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:30:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:30:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 19:30:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:30:14.456690  543705 disk_worker.go:494] system disk:vda1
I0319 19:30:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:30:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:30:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:30:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:30:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:30:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:30:20.634819  543705 disk_info.go:125] begin check local disk info of client
I0319 19:30:20.637230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:30:20.637236  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6d80 0xc0003b6dc0]
E0319 19:30:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:30:23.409797  543705 memory.go:184] no items to output this cycle
I0319 19:30:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 19:30:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:30:33.409803  543705 memory.go:184] no items to output this cycle
I0319 19:30:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 19:30:37.830031  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:30:37.830038  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:30:43.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:30:43.410761  543705 memory.go:191] Add success.
I0319 19:30:43.409897  543705 cpu.go:282] Add success.
I0319 19:30:43.419744  543705 net.go:648] Add success.
I0319 19:30:43.422350  543705 net.go:770] primary dev: ETH0
I0319 19:30:43.422365  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:30:43.422379  543705 net.go:698] Add success.
I0319 19:30:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:30:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:30:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:30:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:30:53.409774  543705 memory.go:184] no items to output this cycle
I0319 19:30:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:31:03.410530  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:31:03.410552  543705 memory.go:184] no items to output this cycle
I0319 19:31:03.410575  543705 cpu.go:275] no items to output this cycle
E0319 19:31:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:31:13.409795  543705 memory.go:191] Add success.
I0319 19:31:13.409794  543705 cpu.go:282] Add success.
W0319 19:31:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:31:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:31:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:31:13.420432  543705 net.go:648] Add success.
I0319 19:31:13.423158  543705 net.go:770] primary dev: ETH0
I0319 19:31:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:31:13.423185  543705 net.go:698] Add success.
I0319 19:31:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:31:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:31:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 19:31:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:31:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 19:31:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:31:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:31:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:31:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:31:20.637670  543705 disk_info.go:125] begin check local disk info of client
I0319 19:31:20.640183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:31:20.640190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463f00 0xc000463f40]
E0319 19:31:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:31:23.409768  543705 memory.go:184] no items to output this cycle
I0319 19:31:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 19:31:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:31:33.409776  543705 memory.go:184] no items to output this cycle
I0319 19:31:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:31:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:31:43.409795  543705 memory.go:191] Add success.
I0319 19:31:43.409798  543705 cpu.go:282] Add success.
I0319 19:31:43.420268  543705 net.go:648] Add success.
I0319 19:31:43.422903  543705 net.go:770] primary dev: ETH0
I0319 19:31:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:31:43.422931  543705 net.go:698] Add success.
I0319 19:31:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:31:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:31:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:31:53.409770  543705 memory.go:184] no items to output this cycle
I0319 19:31:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 19:32:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:32:03.409807  543705 memory.go:184] no items to output this cycle
I0319 19:32:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:32:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:32:13.409820  543705 memory.go:191] Add success.
I0319 19:32:13.409830  543705 cpu.go:282] Add success.
W0319 19:32:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:32:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:32:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:32:13.420145  543705 net.go:648] Add success.
I0319 19:32:13.422725  543705 net.go:770] primary dev: ETH0
I0319 19:32:13.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:32:13.422750  543705 net.go:698] Add success.
W0319 19:32:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:32:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 19:32:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:32:14.456831  543705 disk_worker.go:494] system disk:vda1
I0319 19:32:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:32:14.457132  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:32:14.457140  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:32:14.457144  543705 custom_config.go:64] query custom config with name: gpu
E0319 19:32:15.456896  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:32:15.456905  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:32:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:32:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:32:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:32:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:32:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:32:20.640836  543705 disk_info.go:125] begin check local disk info of client
I0319 19:32:20.643278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:32:20.643284  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466580 0xc0004665c0]
E0319 19:32:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:32:23.409802  543705 memory.go:184] no items to output this cycle
I0319 19:32:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 19:32:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:32:33.409790  543705 memory.go:184] no items to output this cycle
I0319 19:32:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 19:32:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:32:43.409819  543705 memory.go:191] Add success.
I0319 19:32:43.409828  543705 cpu.go:282] Add success.
I0319 19:32:43.419963  543705 net.go:648] Add success.
I0319 19:32:43.423304  543705 net.go:770] primary dev: ETH0
I0319 19:32:43.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:32:43.423329  543705 net.go:698] Add success.
I0319 19:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:32:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:32:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:32:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:32:53.409817  543705 memory.go:184] no items to output this cycle
I0319 19:32:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 19:33:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:33:03.409810  543705 memory.go:184] no items to output this cycle
I0319 19:33:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:33:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:33:13.409809  543705 memory.go:191] Add success.
I0319 19:33:13.409809  543705 cpu.go:282] Add success.
W0319 19:33:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:33:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:33:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:33:13.420089  543705 net.go:648] Add success.
I0319 19:33:13.422974  543705 net.go:770] primary dev: ETH0
I0319 19:33:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:33:13.423001  543705 net.go:698] Add success.
I0319 19:33:13.464204  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bb616a8a-d6ad-48c1-8e3b-c9000694fe99","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:33:13.464237  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:33:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:33:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 19:33:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:33:14.456624  543705 disk_worker.go:494] system disk:vda1
I0319 19:33:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:33:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:33:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:33:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:33:16.472504  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:33:20.643368  543705 disk_info.go:125] begin check local disk info of client
I0319 19:33:20.645941  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:33:20.645949  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464440 0xc000464480]
E0319 19:33:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:33:23.409780  543705 memory.go:184] no items to output this cycle
I0319 19:33:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 19:33:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:33:33.409798  543705 memory.go:184] no items to output this cycle
I0319 19:33:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 19:33:37.831874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:33:37.831882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:33:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:33:43.410846  543705 memory.go:191] Add success.
I0319 19:33:43.409816  543705 cpu.go:282] Add success.
I0319 19:33:43.420517  543705 net.go:648] Add success.
I0319 19:33:43.423693  543705 net.go:770] primary dev: ETH0
I0319 19:33:43.423704  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:33:43.423722  543705 net.go:698] Add success.
I0319 19:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:33:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:33:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:33:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:33:53.409776  543705 memory.go:184] no items to output this cycle
I0319 19:33:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:34:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:34:03.409782  543705 memory.go:184] no items to output this cycle
I0319 19:34:03.409785  543705 cpu.go:275] no items to output this cycle
W0319 19:34:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:34:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:34:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:34:13.409797  543705 cpu.go:282] Add success.
E0319 19:34:13.409832  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:34:13.409856  543705 memory.go:191] Add success.
I0319 19:34:13.420222  543705 net.go:648] Add success.
I0319 19:34:13.423230  543705 net.go:770] primary dev: ETH0
I0319 19:34:13.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:34:13.423256  543705 net.go:698] Add success.
I0319 19:34:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:34:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:34:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 19:34:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:34:14.456522  543705 disk_worker.go:494] system disk:vda1
I0319 19:34:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:34:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:34:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:34:20.646032  543705 disk_info.go:125] begin check local disk info of client
I0319 19:34:20.648483  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:34:20.648490  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465900 0xc000465940]
E0319 19:34:23.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:34:23.409753  543705 memory.go:184] no items to output this cycle
I0319 19:34:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 19:34:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:34:33.409771  543705 memory.go:184] no items to output this cycle
I0319 19:34:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:34:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:34:43.409806  543705 memory.go:191] Add success.
I0319 19:34:43.409817  543705 cpu.go:282] Add success.
I0319 19:34:43.420035  543705 net.go:648] Add success.
I0319 19:34:43.422583  543705 net.go:770] primary dev: ETH0
I0319 19:34:43.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:34:43.422608  543705 net.go:698] Add success.
I0319 19:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:34:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:34:53.409778  543705 memory.go:184] no items to output this cycle
I0319 19:34:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 19:35:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:35:03.409778  543705 memory.go:184] no items to output this cycle
I0319 19:35:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:35:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:35:13.409813  543705 memory.go:191] Add success.
I0319 19:35:13.409819  543705 cpu.go:282] Add success.
W0319 19:35:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:35:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:35:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:35:13.420216  543705 net.go:648] Add success.
I0319 19:35:13.422970  543705 net.go:770] primary dev: ETH0
I0319 19:35:13.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:35:13.422997  543705 net.go:698] Add success.
I0319 19:35:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:35:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:35:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 19:35:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:35:14.456587  543705 disk_worker.go:494] system disk:vda1
I0319 19:35:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:35:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:35:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:35:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:35:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:35:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:35:20.648878  543705 disk_info.go:125] begin check local disk info of client
I0319 19:35:20.651318  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:35:20.651324  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7880 0xc0003b78c0]
E0319 19:35:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:35:23.409794  543705 memory.go:184] no items to output this cycle
I0319 19:35:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 19:35:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:35:33.409786  543705 memory.go:184] no items to output this cycle
I0319 19:35:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 19:35:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:35:43.409797  543705 memory.go:191] Add success.
I0319 19:35:43.409798  543705 cpu.go:282] Add success.
I0319 19:35:43.419844  543705 net.go:648] Add success.
I0319 19:35:43.423081  543705 net.go:770] primary dev: ETH0
I0319 19:35:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:35:43.423107  543705 net.go:698] Add success.
I0319 19:35:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:35:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:35:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:35:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:35:53.409807  543705 memory.go:184] no items to output this cycle
I0319 19:35:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 19:36:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:36:03.409809  543705 memory.go:184] no items to output this cycle
I0319 19:36:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 19:36:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:36:13.409881  543705 memory.go:191] Add success.
W0319 19:36:13.409919  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:36:13.409933  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:36:13.409936  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:36:13.409947  543705 cpu.go:282] Add success.
I0319 19:36:13.419735  543705 net.go:648] Add success.
I0319 19:36:13.422320  543705 net.go:770] primary dev: ETH0
I0319 19:36:13.422334  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:36:13.422347  543705 net.go:698] Add success.
I0319 19:36:13.463416  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8764d19b-7e44-409d-bf8c-cc1e5782048e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:36:13.463446  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:36:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:36:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:36:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 19:36:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:36:14.456717  543705 disk_worker.go:494] system disk:vda1
I0319 19:36:14.456745  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:36:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:36:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:36:16.472434  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:36:20.651887  543705 disk_info.go:125] begin check local disk info of client
I0319 19:36:20.654331  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:36:20.654337  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bea80 0xc0004beac0]
E0319 19:36:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:36:23.409797  543705 memory.go:184] no items to output this cycle
I0319 19:36:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 19:36:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:36:33.409775  543705 memory.go:184] no items to output this cycle
I0319 19:36:33.409806  543705 cpu.go:275] no items to output this cycle
I0319 19:36:37.833736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:36:37.833743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:36:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:36:43.410821  543705 memory.go:191] Add success.
I0319 19:36:43.409808  543705 cpu.go:282] Add success.
I0319 19:36:43.420538  543705 net.go:648] Add success.
I0319 19:36:43.423581  543705 net.go:770] primary dev: ETH0
I0319 19:36:43.423597  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:36:43.423610  543705 net.go:698] Add success.
I0319 19:36:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:36:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:36:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:36:53.409789  543705 memory.go:184] no items to output this cycle
I0319 19:36:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:37:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:37:03.409781  543705 memory.go:184] no items to output this cycle
I0319 19:37:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 19:37:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:37:13.409831  543705 memory.go:191] Add success.
I0319 19:37:13.409835  543705 cpu.go:282] Add success.
W0319 19:37:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:37:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:37:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:37:13.420190  543705 net.go:648] Add success.
I0319 19:37:13.422929  543705 net.go:770] primary dev: ETH0
I0319 19:37:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:37:13.422954  543705 net.go:698] Add success.
I0319 19:37:13.453493  543705 event_worker.go:152] Polling the log file for events...
W0319 19:37:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:37:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 19:37:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:37:14.456953  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:37:14.456963  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:37:14.456969  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:37:14.457012  543705 disk_worker.go:494] system disk:vda1
I0319 19:37:14.457041  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:37:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:37:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:37:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:37:16.457983  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:37:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:37:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:37:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:37:20.654419  543705 disk_info.go:125] begin check local disk info of client
I0319 19:37:20.656866  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:37:20.656874  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003718c0 0xc000371900]
E0319 19:37:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:37:23.409760  543705 memory.go:184] no items to output this cycle
I0319 19:37:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 19:37:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:37:33.409808  543705 memory.go:184] no items to output this cycle
I0319 19:37:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:37:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:37:43.409796  543705 memory.go:191] Add success.
I0319 19:37:43.409795  543705 cpu.go:282] Add success.
I0319 19:37:43.419996  543705 net.go:648] Add success.
I0319 19:37:43.422660  543705 net.go:770] primary dev: ETH0
I0319 19:37:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:37:43.422689  543705 net.go:698] Add success.
I0319 19:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:37:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:37:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:37:53.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:37:53.410282  543705 memory.go:184] no items to output this cycle
I0319 19:37:53.410287  543705 cpu.go:275] no items to output this cycle
E0319 19:38:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:38:03.409802  543705 memory.go:184] no items to output this cycle
I0319 19:38:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 19:38:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:38:13.409778  543705 memory.go:191] Add success.
W0319 19:38:13.409981  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:38:13.409996  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:38:13.409996  543705 cpu.go:282] Add success.
I0319 19:38:13.409999  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:38:13.419716  543705 net.go:648] Add success.
I0319 19:38:13.422471  543705 net.go:770] primary dev: ETH0
I0319 19:38:13.422485  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:38:13.422499  543705 net.go:698] Add success.
I0319 19:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:38:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:38:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 19:38:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:38:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 19:38:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:38:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:38:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:38:16.472433  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:38:20.657675  543705 disk_info.go:125] begin check local disk info of client
I0319 19:38:20.660205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:38:20.660212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf000 0xc0002bf040]
E0319 19:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:38:23.409792  543705 memory.go:184] no items to output this cycle
I0319 19:38:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:38:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:38:33.409773  543705 memory.go:184] no items to output this cycle
I0319 19:38:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:38:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:38:43.409791  543705 memory.go:191] Add success.
I0319 19:38:43.409791  543705 cpu.go:282] Add success.
I0319 19:38:43.419953  543705 net.go:648] Add success.
I0319 19:38:43.422793  543705 net.go:770] primary dev: ETH0
I0319 19:38:43.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:38:43.422824  543705 net.go:698] Add success.
I0319 19:38:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:38:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:38:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:38:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:38:53.409809  543705 memory.go:184] no items to output this cycle
I0319 19:38:53.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:39:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:39:03.409773  543705 memory.go:184] no items to output this cycle
I0319 19:39:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 19:39:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:39:13.409924  543705 memory.go:191] Add success.
I0319 19:39:13.409946  543705 cpu.go:282] Add success.
W0319 19:39:13.409960  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:39:13.409974  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:39:13.409979  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:39:13.419709  543705 net.go:648] Add success.
I0319 19:39:13.422428  543705 net.go:770] primary dev: ETH0
I0319 19:39:13.422441  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:39:13.422452  543705 net.go:698] Add success.
I0319 19:39:13.601412  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"acfec661-488a-442c-b4d6-885c70cb769d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:39:13.601453  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:39:14.453971  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:39:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:39:14.455279  543705 disk_worker.go:708] disk space is not compliant
W0319 19:39:14.455282  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:39:14.456847  543705 disk_worker.go:494] system disk:vda1
I0319 19:39:14.456876  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:39:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:39:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:39:20.660313  543705 disk_info.go:125] begin check local disk info of client
I0319 19:39:20.662796  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:39:20.662803  543705 disk_info.go:196] parse disk info done, disk is : [0xc00055ca40 0xc00055ca80]
E0319 19:39:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:39:23.409795  543705 memory.go:184] no items to output this cycle
I0319 19:39:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:39:33.409782  543705 memory.go:184] no items to output this cycle
I0319 19:39:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 19:39:37.835868  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:39:37.835875  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:39:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:39:43.410703  543705 memory.go:191] Add success.
I0319 19:39:43.409791  543705 cpu.go:282] Add success.
I0319 19:39:43.420421  543705 net.go:648] Add success.
I0319 19:39:43.423236  543705 net.go:770] primary dev: ETH0
I0319 19:39:43.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:39:43.423268  543705 net.go:698] Add success.
I0319 19:39:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:39:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:39:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:39:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:39:53.409796  543705 memory.go:184] no items to output this cycle
I0319 19:39:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 19:40:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:40:03.409782  543705 memory.go:184] no items to output this cycle
I0319 19:40:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:40:13.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:40:13.409892  543705 memory.go:191] Add success.
W0319 19:40:13.409929  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:40:13.409999  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:40:13.410010  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:40:13.410221  543705 cpu.go:282] Add success.
I0319 19:40:13.419705  543705 net.go:648] Add success.
I0319 19:40:13.422408  543705 net.go:770] primary dev: ETH0
I0319 19:40:13.422423  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:40:13.422437  543705 net.go:698] Add success.
I0319 19:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:40:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:40:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0319 19:40:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:40:14.456483  543705 disk_worker.go:494] system disk:vda1
I0319 19:40:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:40:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:40:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:40:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:40:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:40:20.662886  543705 disk_info.go:125] begin check local disk info of client
I0319 19:40:20.665388  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:40:20.665394  543705 disk_info.go:196] parse disk info done, disk is : [0xc000578380 0xc0005783c0]
E0319 19:40:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:40:23.409774  543705 memory.go:184] no items to output this cycle
I0319 19:40:23.409831  543705 cpu.go:275] no items to output this cycle
E0319 19:40:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:40:33.409813  543705 memory.go:184] no items to output this cycle
I0319 19:40:33.409826  543705 cpu.go:275] no items to output this cycle
E0319 19:40:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:40:43.409784  543705 memory.go:191] Add success.
I0319 19:40:43.409809  543705 cpu.go:282] Add success.
I0319 19:40:43.419942  543705 net.go:648] Add success.
I0319 19:40:43.422983  543705 net.go:770] primary dev: ETH0
I0319 19:40:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:40:43.423009  543705 net.go:698] Add success.
I0319 19:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:40:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:40:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:40:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:40:53.409785  543705 memory.go:184] no items to output this cycle
I0319 19:40:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 19:41:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:41:03.409789  543705 memory.go:184] no items to output this cycle
I0319 19:41:03.409790  543705 cpu.go:275] no items to output this cycle
W0319 19:41:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:41:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:41:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 19:41:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:41:13.409829  543705 memory.go:191] Add success.
I0319 19:41:13.409837  543705 cpu.go:282] Add success.
I0319 19:41:13.420238  543705 net.go:648] Add success.
I0319 19:41:13.422971  543705 net.go:770] primary dev: ETH0
I0319 19:41:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:41:13.423000  543705 net.go:698] Add success.
I0319 19:41:14.454921  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:41:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:41:14.455104  543705 disk_worker.go:708] disk space is not compliant
W0319 19:41:14.455106  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:41:14.456414  543705 disk_worker.go:494] system disk:vda1
I0319 19:41:14.456475  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:41:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:41:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:41:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:41:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:41:20.665684  543705 disk_info.go:125] begin check local disk info of client
I0319 19:41:20.668264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:41:20.668270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aea00 0xc0002aea40]
E0319 19:41:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:41:23.409799  543705 memory.go:184] no items to output this cycle
I0319 19:41:23.409812  543705 cpu.go:275] no items to output this cycle
E0319 19:41:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:41:33.409783  543705 memory.go:184] no items to output this cycle
I0319 19:41:33.409786  543705 cpu.go:275] no items to output this cycle
E0319 19:41:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:41:43.409778  543705 memory.go:191] Add success.
I0319 19:41:43.409801  543705 cpu.go:282] Add success.
I0319 19:41:43.419883  543705 net.go:648] Add success.
I0319 19:41:43.422630  543705 net.go:770] primary dev: ETH0
I0319 19:41:43.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:41:43.422656  543705 net.go:698] Add success.
I0319 19:41:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:41:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:41:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:41:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:41:53.409776  543705 memory.go:184] no items to output this cycle
I0319 19:41:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 19:42:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:42:03.409779  543705 memory.go:184] no items to output this cycle
I0319 19:42:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:42:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:42:13.409819  543705 memory.go:191] Add success.
I0319 19:42:13.409825  543705 cpu.go:282] Add success.
W0319 19:42:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:42:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:42:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:42:13.420171  543705 net.go:648] Add success.
I0319 19:42:13.423013  543705 net.go:770] primary dev: ETH0
I0319 19:42:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:42:13.423038  543705 net.go:698] Add success.
I0319 19:42:13.463656  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf3cb589-20cc-48e3-9d0c-5bb6402d4faf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:42:13.463689  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 19:42:14.455085  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:42:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0319 19:42:14.455148  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:42:14.457016  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:42:14.457023  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:42:14.457027  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:42:14.457043  543705 disk_worker.go:494] system disk:vda1
I0319 19:42:14.457094  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:42:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:42:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:42:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:42:16.457995  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:42:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:42:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:42:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:42:20.668980  543705 disk_info.go:125] begin check local disk info of client
I0319 19:42:20.671419  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:42:20.671426  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ec00 0xc00037ec40]
E0319 19:42:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:42:23.409772  543705 memory.go:184] no items to output this cycle
I0319 19:42:23.409775  543705 cpu.go:275] no items to output this cycle
E0319 19:42:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:42:33.409809  543705 memory.go:184] no items to output this cycle
I0319 19:42:33.409824  543705 cpu.go:275] no items to output this cycle
I0319 19:42:37.837734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:42:37.837740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:42:43.410231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:42:43.411167  543705 memory.go:191] Add success.
I0319 19:42:43.410287  543705 cpu.go:282] Add success.
I0319 19:42:43.419956  543705 net.go:648] Add success.
I0319 19:42:43.422513  543705 net.go:770] primary dev: ETH0
I0319 19:42:43.422528  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:42:43.422542  543705 net.go:698] Add success.
I0319 19:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:42:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:42:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:42:53.409808  543705 memory.go:184] no items to output this cycle
I0319 19:42:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:43:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:43:03.409774  543705 memory.go:184] no items to output this cycle
I0319 19:43:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:43:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:43:13.409817  543705 memory.go:191] Add success.
I0319 19:43:13.409826  543705 cpu.go:282] Add success.
W0319 19:43:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:43:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:43:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:43:13.420277  543705 net.go:648] Add success.
I0319 19:43:13.423203  543705 net.go:770] primary dev: ETH0
I0319 19:43:13.423217  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:43:13.423229  543705 net.go:698] Add success.
I0319 19:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:43:14.455085  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:43:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0319 19:43:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:43:14.456457  543705 disk_worker.go:494] system disk:vda1
I0319 19:43:14.456500  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:43:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:43:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:43:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:43:20.672001  543705 disk_info.go:125] begin check local disk info of client
I0319 19:43:20.674505  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:43:20.674512  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cb280 0xc0004cb2c0]
E0319 19:43:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:43:23.409776  543705 memory.go:184] no items to output this cycle
I0319 19:43:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 19:43:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:43:33.409788  543705 memory.go:184] no items to output this cycle
I0319 19:43:33.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:43:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:43:43.409822  543705 memory.go:191] Add success.
I0319 19:43:43.409833  543705 cpu.go:282] Add success.
I0319 19:43:43.419915  543705 net.go:648] Add success.
I0319 19:43:43.422702  543705 net.go:770] primary dev: ETH0
I0319 19:43:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:43:43.422730  543705 net.go:698] Add success.
I0319 19:43:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:43:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:43:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:43:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:43:53.409779  543705 memory.go:184] no items to output this cycle
I0319 19:43:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:44:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:44:03.409816  543705 memory.go:184] no items to output this cycle
I0319 19:44:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 19:44:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:44:13.409797  543705 memory.go:191] Add success.
W0319 19:44:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 19:44:13.409824  543705 cpu.go:282] Add success.
W0319 19:44:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:44:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:44:13.420370  543705 net.go:648] Add success.
I0319 19:44:13.423610  543705 net.go:770] primary dev: ETH0
I0319 19:44:13.423623  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:44:13.423634  543705 net.go:698] Add success.
I0319 19:44:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:44:14.455351  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:44:14.455445  543705 disk_worker.go:708] disk space is not compliant
W0319 19:44:14.455449  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:44:14.457057  543705 disk_worker.go:494] system disk:vda1
I0319 19:44:14.457085  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:44:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:44:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:44:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:44:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:44:20.675012  543705 disk_info.go:125] begin check local disk info of client
I0319 19:44:20.677413  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:44:20.677419  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bd3c0 0xc0004bd400]
E0319 19:44:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:44:23.409805  543705 memory.go:184] no items to output this cycle
I0319 19:44:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 19:44:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:44:33.409795  543705 memory.go:184] no items to output this cycle
I0319 19:44:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 19:44:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:44:43.409805  543705 memory.go:191] Add success.
I0319 19:44:43.409808  543705 cpu.go:282] Add success.
I0319 19:44:43.419872  543705 net.go:648] Add success.
I0319 19:44:43.422709  543705 net.go:770] primary dev: ETH0
I0319 19:44:43.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:44:43.422734  543705 net.go:698] Add success.
I0319 19:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:44:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:44:53.409789  543705 memory.go:184] no items to output this cycle
I0319 19:44:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 19:45:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:45:03.409817  543705 memory.go:184] no items to output this cycle
I0319 19:45:03.409830  543705 cpu.go:275] no items to output this cycle
E0319 19:45:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:45:13.409800  543705 memory.go:191] Add success.
I0319 19:45:13.409801  543705 cpu.go:282] Add success.
W0319 19:45:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:45:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:45:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:45:13.420117  543705 net.go:648] Add success.
I0319 19:45:13.423141  543705 net.go:770] primary dev: ETH0
I0319 19:45:13.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:45:13.423166  543705 net.go:698] Add success.
I0319 19:45:13.469024  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00c13ee2-69e2-4120-b620-f0ccf0b84c96","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:45:13.469056  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:45:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:45:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:45:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 19:45:14.455272  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:45:14.456889  543705 disk_worker.go:494] system disk:vda1
I0319 19:45:14.456929  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:45:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:45:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:45:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:45:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:45:16.472428  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:45:20.677673  543705 disk_info.go:125] begin check local disk info of client
I0319 19:45:20.680165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:45:20.680171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ae300 0xc0002ae340]
E0319 19:45:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:45:23.409761  543705 memory.go:184] no items to output this cycle
I0319 19:45:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:45:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:45:33.409808  543705 memory.go:184] no items to output this cycle
I0319 19:45:33.409838  543705 cpu.go:275] no items to output this cycle
I0319 19:45:37.837883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:45:37.837890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:45:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:45:43.410660  543705 memory.go:191] Add success.
I0319 19:45:43.409802  543705 cpu.go:282] Add success.
I0319 19:45:43.420381  543705 net.go:648] Add success.
I0319 19:45:43.423128  543705 net.go:770] primary dev: ETH0
I0319 19:45:43.423141  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:45:43.423154  543705 net.go:698] Add success.
I0319 19:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:45:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:45:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:45:53.409769  543705 memory.go:184] no items to output this cycle
I0319 19:45:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 19:46:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:46:03.409782  543705 memory.go:184] no items to output this cycle
I0319 19:46:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 19:46:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:46:13.409793  543705 memory.go:191] Add success.
I0319 19:46:13.409796  543705 cpu.go:282] Add success.
W0319 19:46:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:46:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:46:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:46:13.420295  543705 net.go:648] Add success.
I0319 19:46:13.423367  543705 net.go:770] primary dev: ETH0
I0319 19:46:13.423385  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:46:13.423399  543705 net.go:698] Add success.
I0319 19:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:46:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:46:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0319 19:46:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:46:14.457675  543705 disk_worker.go:494] system disk:vda1
I0319 19:46:14.457718  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:46:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:46:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:46:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:46:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:46:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:46:20.681036  543705 disk_info.go:125] begin check local disk info of client
I0319 19:46:20.683479  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:46:20.683484  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482080 0xc0004820c0]
E0319 19:46:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:46:23.409759  543705 memory.go:184] no items to output this cycle
I0319 19:46:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:46:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:46:33.409804  543705 memory.go:184] no items to output this cycle
I0319 19:46:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 19:46:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:46:43.409783  543705 memory.go:191] Add success.
I0319 19:46:43.409806  543705 cpu.go:282] Add success.
I0319 19:46:43.419902  543705 net.go:648] Add success.
I0319 19:46:43.422681  543705 net.go:770] primary dev: ETH0
I0319 19:46:43.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:46:43.422711  543705 net.go:698] Add success.
I0319 19:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:46:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:46:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:46:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:46:53.409790  543705 memory.go:184] no items to output this cycle
I0319 19:46:53.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:47:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:47:03.409773  543705 memory.go:184] no items to output this cycle
I0319 19:47:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 19:47:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:47:13.409811  543705 memory.go:191] Add success.
I0319 19:47:13.409819  543705 cpu.go:282] Add success.
W0319 19:47:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:47:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:47:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:47:13.420280  543705 net.go:648] Add success.
I0319 19:47:13.423254  543705 net.go:770] primary dev: ETH0
I0319 19:47:13.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:47:13.423294  543705 net.go:698] Add success.
I0319 19:47:13.452935  543705 event_worker.go:152] Polling the log file for events...
W0319 19:47:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:47:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 19:47:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:47:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:47:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:47:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:47:14.456540  543705 disk_worker.go:494] system disk:vda1
I0319 19:47:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:47:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:47:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:47:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:47:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:47:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:47:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:47:16.472322  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:47:20.684060  543705 disk_info.go:125] begin check local disk info of client
I0319 19:47:20.686454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:47:20.686459  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028c500 0xc00028c540]
E0319 19:47:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:47:23.409793  543705 memory.go:184] no items to output this cycle
I0319 19:47:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:47:33.410321  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:47:33.410341  543705 memory.go:184] no items to output this cycle
I0319 19:47:33.410351  543705 cpu.go:275] no items to output this cycle
E0319 19:47:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:47:43.409785  543705 memory.go:191] Add success.
I0319 19:47:43.409787  543705 cpu.go:282] Add success.
I0319 19:47:43.419881  543705 net.go:648] Add success.
I0319 19:47:43.422590  543705 net.go:770] primary dev: ETH0
I0319 19:47:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:47:43.422619  543705 net.go:698] Add success.
I0319 19:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:47:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:47:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:47:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:47:53.409786  543705 memory.go:184] no items to output this cycle
I0319 19:47:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 19:48:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:48:03.409783  543705 memory.go:184] no items to output this cycle
I0319 19:48:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 19:48:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:48:13.409796  543705 memory.go:191] Add success.
I0319 19:48:13.409797  543705 cpu.go:282] Add success.
W0319 19:48:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:48:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:48:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:48:13.420613  543705 net.go:648] Add success.
I0319 19:48:13.423223  543705 net.go:770] primary dev: ETH0
I0319 19:48:13.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:48:13.423250  543705 net.go:698] Add success.
I0319 19:48:13.468342  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c11e6322-5b1c-48c3-8ba2-d773ed0ea310","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:48:13.468375  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:48:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:48:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0319 19:48:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:48:14.456872  543705 disk_worker.go:494] system disk:vda1
I0319 19:48:14.456907  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:48:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:48:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:48:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:48:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:48:20.687076  543705 disk_info.go:125] begin check local disk info of client
I0319 19:48:20.689562  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:48:20.689569  543705 disk_info.go:196] parse disk info done, disk is : [0xc000494080 0xc0004940c0]
E0319 19:48:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:48:23.409765  543705 memory.go:184] no items to output this cycle
I0319 19:48:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 19:48:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:48:33.409813  543705 memory.go:184] no items to output this cycle
I0319 19:48:33.409825  543705 cpu.go:275] no items to output this cycle
I0319 19:48:37.839900  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:48:37.839907  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:48:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:48:43.410628  543705 memory.go:191] Add success.
I0319 19:48:43.409827  543705 cpu.go:282] Add success.
I0319 19:48:43.420406  543705 net.go:648] Add success.
I0319 19:48:43.423189  543705 net.go:770] primary dev: ETH0
I0319 19:48:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:48:43.423216  543705 net.go:698] Add success.
I0319 19:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:48:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:48:53.409806  543705 memory.go:184] no items to output this cycle
I0319 19:48:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 19:49:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:49:03.409789  543705 memory.go:184] no items to output this cycle
I0319 19:49:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 19:49:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:49:13.409804  543705 memory.go:191] Add success.
I0319 19:49:13.409804  543705 cpu.go:282] Add success.
W0319 19:49:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:49:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:49:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:49:13.420258  543705 net.go:648] Add success.
I0319 19:49:13.422830  543705 net.go:770] primary dev: ETH0
I0319 19:49:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:49:13.422855  543705 net.go:698] Add success.
I0319 19:49:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:49:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:49:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 19:49:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:49:14.456596  543705 disk_worker.go:494] system disk:vda1
I0319 19:49:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:49:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:49:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:49:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:49:20.689674  543705 disk_info.go:125] begin check local disk info of client
I0319 19:49:20.692158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:49:20.692164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396080 0xc0003960c0]
E0319 19:49:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:49:23.409782  543705 memory.go:184] no items to output this cycle
I0319 19:49:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 19:49:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:49:33.409783  543705 memory.go:184] no items to output this cycle
I0319 19:49:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 19:49:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:49:43.409795  543705 memory.go:191] Add success.
I0319 19:49:43.409794  543705 cpu.go:282] Add success.
I0319 19:49:43.419906  543705 net.go:648] Add success.
I0319 19:49:43.423010  543705 net.go:770] primary dev: ETH0
I0319 19:49:43.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:49:43.423043  543705 net.go:698] Add success.
I0319 19:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:49:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:49:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:49:53.409762  543705 memory.go:184] no items to output this cycle
I0319 19:49:53.409835  543705 cpu.go:275] no items to output this cycle
E0319 19:50:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:50:03.409785  543705 memory.go:184] no items to output this cycle
I0319 19:50:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 19:50:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:50:13.409785  543705 memory.go:191] Add success.
I0319 19:50:13.409808  543705 cpu.go:282] Add success.
W0319 19:50:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:50:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:50:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:50:13.420178  543705 net.go:648] Add success.
I0319 19:50:13.423234  543705 net.go:770] primary dev: ETH0
I0319 19:50:13.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:50:13.423260  543705 net.go:698] Add success.
I0319 19:50:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:50:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:50:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 19:50:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:50:14.456572  543705 disk_worker.go:494] system disk:vda1
I0319 19:50:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:50:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:50:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:50:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:50:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:50:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:50:20.693100  543705 disk_info.go:125] begin check local disk info of client
I0319 19:50:20.695527  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:50:20.695533  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e380 0xc00047e3c0]
E0319 19:50:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:50:23.409758  543705 memory.go:184] no items to output this cycle
I0319 19:50:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 19:50:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:50:33.409785  543705 memory.go:184] no items to output this cycle
I0319 19:50:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:50:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:50:43.409797  543705 memory.go:191] Add success.
I0319 19:50:43.409799  543705 cpu.go:282] Add success.
I0319 19:50:43.419885  543705 net.go:648] Add success.
I0319 19:50:43.422881  543705 net.go:770] primary dev: ETH0
I0319 19:50:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:50:43.422910  543705 net.go:698] Add success.
I0319 19:50:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:50:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:50:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:50:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:50:53.409814  543705 memory.go:184] no items to output this cycle
I0319 19:50:53.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:51:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:51:03.409773  543705 memory.go:184] no items to output this cycle
I0319 19:51:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 19:51:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:51:13.409788  543705 memory.go:191] Add success.
I0319 19:51:13.409788  543705 cpu.go:282] Add success.
W0319 19:51:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:51:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:51:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:51:13.420130  543705 net.go:648] Add success.
I0319 19:51:13.422806  543705 net.go:770] primary dev: ETH0
I0319 19:51:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:51:13.422830  543705 net.go:698] Add success.
I0319 19:51:13.473776  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a4239847-79ab-46ea-bc3a-2f4d037c8bf4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:51:13.473811  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:51:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:51:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 19:51:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:51:14.457181  543705 disk_worker.go:494] system disk:vda1
I0319 19:51:14.457210  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:51:15.455603  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:51:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:51:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:51:20.695614  543705 disk_info.go:125] begin check local disk info of client
I0319 19:51:20.698101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:51:20.698107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe300 0xc0003fe340]
E0319 19:51:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:51:23.409790  543705 memory.go:184] no items to output this cycle
I0319 19:51:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:51:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:51:33.409784  543705 memory.go:184] no items to output this cycle
I0319 19:51:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 19:51:37.841734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:51:37.841741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:51:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:51:43.410892  543705 memory.go:191] Add success.
I0319 19:51:43.409806  543705 cpu.go:282] Add success.
I0319 19:51:43.420655  543705 net.go:648] Add success.
I0319 19:51:43.423889  543705 net.go:770] primary dev: ETH0
I0319 19:51:43.423902  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:51:43.423915  543705 net.go:698] Add success.
I0319 19:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:51:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:51:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:51:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:51:53.409770  543705 memory.go:184] no items to output this cycle
I0319 19:51:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:52:03.409783  543705 memory.go:184] no items to output this cycle
I0319 19:52:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 19:52:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:52:13.409811  543705 memory.go:191] Add success.
I0319 19:52:13.409816  543705 cpu.go:282] Add success.
W0319 19:52:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:52:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:52:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:52:13.420137  543705 net.go:648] Add success.
I0319 19:52:13.423177  543705 net.go:770] primary dev: ETH0
I0319 19:52:13.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:52:13.423207  543705 net.go:698] Add success.
W0319 19:52:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:52:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 19:52:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:52:14.456128  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:52:14.456137  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:52:14.456144  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:52:14.456422  543705 disk_worker.go:494] system disk:vda1
I0319 19:52:14.456454  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:52:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:52:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:52:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:52:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:52:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:52:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:52:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:52:20.699135  543705 disk_info.go:125] begin check local disk info of client
I0319 19:52:20.701617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:52:20.701624  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482140 0xc000482180]
E0319 19:52:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:52:23.409763  543705 memory.go:184] no items to output this cycle
I0319 19:52:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:52:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:52:33.409773  543705 memory.go:184] no items to output this cycle
I0319 19:52:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 19:52:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:52:43.409816  543705 memory.go:191] Add success.
I0319 19:52:43.409824  543705 cpu.go:282] Add success.
I0319 19:52:43.419867  543705 net.go:648] Add success.
I0319 19:52:43.422559  543705 net.go:770] primary dev: ETH0
I0319 19:52:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:52:43.422585  543705 net.go:698] Add success.
I0319 19:52:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:52:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:52:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:52:53.409794  543705 memory.go:184] no items to output this cycle
I0319 19:52:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 19:53:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:53:03.409775  543705 memory.go:184] no items to output this cycle
I0319 19:53:03.409779  543705 cpu.go:275] no items to output this cycle
E0319 19:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:53:13.409786  543705 memory.go:191] Add success.
I0319 19:53:13.409811  543705 cpu.go:282] Add success.
W0319 19:53:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:53:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:53:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:53:13.420190  543705 net.go:648] Add success.
I0319 19:53:13.423057  543705 net.go:770] primary dev: ETH0
I0319 19:53:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:53:13.423085  543705 net.go:698] Add success.
I0319 19:53:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:53:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:53:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 19:53:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:53:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 19:53:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:53:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:53:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:53:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:53:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:53:16.472463  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:53:20.701682  543705 disk_info.go:125] begin check local disk info of client
I0319 19:53:20.704159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:53:20.704165  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382100 0xc000382140]
E0319 19:53:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:53:23.409795  543705 memory.go:184] no items to output this cycle
I0319 19:53:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 19:53:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:53:33.409768  543705 memory.go:184] no items to output this cycle
I0319 19:53:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:53:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:53:43.409823  543705 memory.go:191] Add success.
I0319 19:53:43.409826  543705 cpu.go:282] Add success.
I0319 19:53:43.419905  543705 net.go:648] Add success.
I0319 19:53:43.422982  543705 net.go:770] primary dev: ETH0
I0319 19:53:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:53:43.423008  543705 net.go:698] Add success.
I0319 19:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:53:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:53:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:53:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:53:53.409794  543705 memory.go:184] no items to output this cycle
I0319 19:53:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:54:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:54:03.409789  543705 memory.go:184] no items to output this cycle
I0319 19:54:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 19:54:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:54:13.409834  543705 memory.go:191] Add success.
I0319 19:54:13.409834  543705 cpu.go:282] Add success.
W0319 19:54:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:54:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:54:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:54:13.420172  543705 net.go:648] Add success.
I0319 19:54:13.422759  543705 net.go:770] primary dev: ETH0
I0319 19:54:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:54:13.422785  543705 net.go:698] Add success.
I0319 19:54:13.468856  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d01bd806-9dd3-4889-8637-2303f66e46c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:54:13.468889  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 19:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:54:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:54:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 19:54:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:54:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 19:54:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:54:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:54:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:54:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:54:20.705200  543705 disk_info.go:125] begin check local disk info of client
I0319 19:54:20.707704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:54:20.707712  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342200 0xc000342240]
E0319 19:54:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:54:23.409800  543705 memory.go:184] no items to output this cycle
I0319 19:54:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 19:54:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:54:33.409812  543705 memory.go:184] no items to output this cycle
I0319 19:54:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 19:54:37.843907  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:54:37.843913  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:54:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:54:43.410637  543705 memory.go:191] Add success.
I0319 19:54:43.409804  543705 cpu.go:282] Add success.
I0319 19:54:43.420363  543705 net.go:648] Add success.
I0319 19:54:43.423208  543705 net.go:770] primary dev: ETH0
I0319 19:54:43.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:54:43.423234  543705 net.go:698] Add success.
I0319 19:54:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:54:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:54:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:54:53.410264  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:54:53.410282  543705 memory.go:184] no items to output this cycle
I0319 19:54:53.410288  543705 cpu.go:275] no items to output this cycle
E0319 19:55:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:55:03.409785  543705 memory.go:184] no items to output this cycle
I0319 19:55:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 19:55:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:55:13.409805  543705 memory.go:191] Add success.
I0319 19:55:13.409817  543705 cpu.go:282] Add success.
W0319 19:55:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:55:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:55:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:55:13.420068  543705 net.go:648] Add success.
I0319 19:55:13.422682  543705 net.go:770] primary dev: ETH0
I0319 19:55:13.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:55:13.422706  543705 net.go:698] Add success.
I0319 19:55:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:55:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:55:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 19:55:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:55:14.456493  543705 disk_worker.go:494] system disk:vda1
I0319 19:55:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:55:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:55:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:55:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:55:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:55:20.709174  543705 disk_info.go:125] begin check local disk info of client
I0319 19:55:20.711603  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:55:20.711609  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f7c80 0xc0001f7cc0]
E0319 19:55:23.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:55:23.409878  543705 memory.go:184] no items to output this cycle
I0319 19:55:23.409964  543705 cpu.go:275] no items to output this cycle
E0319 19:55:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:55:33.409783  543705 memory.go:184] no items to output this cycle
I0319 19:55:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 19:55:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:55:43.409782  543705 memory.go:191] Add success.
I0319 19:55:43.409802  543705 cpu.go:282] Add success.
I0319 19:55:43.419962  543705 net.go:648] Add success.
I0319 19:55:43.422499  543705 net.go:770] primary dev: ETH0
I0319 19:55:43.422512  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:55:43.422524  543705 net.go:698] Add success.
I0319 19:55:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:55:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:55:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:55:53.409820  543705 cpu.go:275] no items to output this cycle
I0319 19:55:53.409822  543705 memory.go:184] no items to output this cycle
E0319 19:56:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:56:03.409806  543705 memory.go:184] no items to output this cycle
I0319 19:56:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 19:56:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:56:13.409776  543705 memory.go:191] Add success.
W0319 19:56:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:56:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:56:13.409812  543705 cpu.go:282] Add success.
I0319 19:56:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:56:13.420321  543705 net.go:648] Add success.
I0319 19:56:13.423132  543705 net.go:770] primary dev: ETH0
I0319 19:56:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:56:13.423160  543705 net.go:698] Add success.
I0319 19:56:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:56:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:56:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 19:56:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:56:14.456488  543705 disk_worker.go:494] system disk:vda1
I0319 19:56:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:56:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:56:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:56:20.712196  543705 disk_info.go:125] begin check local disk info of client
I0319 19:56:20.714650  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:56:20.714656  543705 disk_info.go:196] parse disk info done, disk is : [0xc000234580 0xc0002345c0]
E0319 19:56:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:56:23.409792  543705 memory.go:184] no items to output this cycle
I0319 19:56:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 19:56:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:56:33.409787  543705 memory.go:184] no items to output this cycle
I0319 19:56:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 19:56:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:56:43.409782  543705 memory.go:191] Add success.
I0319 19:56:43.409794  543705 cpu.go:282] Add success.
I0319 19:56:43.419868  543705 net.go:648] Add success.
I0319 19:56:43.422852  543705 net.go:770] primary dev: ETH0
I0319 19:56:43.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:56:43.422884  543705 net.go:698] Add success.
I0319 19:56:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:56:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:56:53.409793  543705 cpu.go:275] no items to output this cycle
I0319 19:56:53.409799  543705 memory.go:184] no items to output this cycle
E0319 19:57:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:57:03.409796  543705 memory.go:184] no items to output this cycle
I0319 19:57:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 19:57:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:57:13.409789  543705 memory.go:191] Add success.
I0319 19:57:13.409788  543705 cpu.go:282] Add success.
W0319 19:57:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:57:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:57:13.420058  543705 net.go:648] Add success.
I0319 19:57:13.422672  543705 net.go:770] primary dev: ETH0
I0319 19:57:13.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:57:13.422700  543705 net.go:698] Add success.
I0319 19:57:13.428573  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 19:57:13.452842  543705 event_worker.go:152] Polling the log file for events...
I0319 19:57:13.467995  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9a9f30af-c0d5-47a4-8deb-4d7d8b16cdfd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 19:57:13.468026  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 19:57:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:57:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 19:57:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0319 19:57:14.455923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 19:57:14.455932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 19:57:14.455938  543705 custom_config.go:64] query custom config with name: gpu
I0319 19:57:14.456694  543705 disk_worker.go:494] system disk:vda1
I0319 19:57:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 19:57:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 19:57:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:57:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 19:57:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 19:57:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:57:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:57:16.472308  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:57:20.715214  543705 disk_info.go:125] begin check local disk info of client
I0319 19:57:20.717686  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:57:20.717692  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004645c0 0xc000464600]
E0319 19:57:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:57:23.409761  543705 memory.go:184] no items to output this cycle
I0319 19:57:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:57:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:57:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 19:57:33.409797  543705 memory.go:184] no items to output this cycle
I0319 19:57:37.845739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 19:57:37.845746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 19:57:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:57:43.410725  543705 memory.go:191] Add success.
I0319 19:57:43.409825  543705 cpu.go:282] Add success.
I0319 19:57:43.420637  543705 net.go:648] Add success.
I0319 19:57:43.423851  543705 net.go:770] primary dev: ETH0
I0319 19:57:43.423864  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:57:43.423876  543705 net.go:698] Add success.
I0319 19:57:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:57:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:57:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:57:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:57:53.409784  543705 memory.go:184] no items to output this cycle
I0319 19:57:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 19:58:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:58:03.409795  543705 cpu.go:275] no items to output this cycle
I0319 19:58:03.409797  543705 memory.go:184] no items to output this cycle
E0319 19:58:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:58:13.409781  543705 memory.go:191] Add success.
W0319 19:58:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 19:58:13.409813  543705 cpu.go:282] Add success.
W0319 19:58:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:58:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:58:13.420132  543705 net.go:648] Add success.
I0319 19:58:13.422953  543705 net.go:770] primary dev: ETH0
I0319 19:58:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:58:13.422978  543705 net.go:698] Add success.
I0319 19:58:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:58:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:58:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 19:58:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:58:14.456569  543705 disk_worker.go:494] system disk:vda1
I0319 19:58:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:58:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:58:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:58:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:58:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:58:20.719232  543705 disk_info.go:125] begin check local disk info of client
I0319 19:58:20.721750  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:58:20.721756  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256400 0xc000256440]
E0319 19:58:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:58:23.409763  543705 memory.go:184] no items to output this cycle
I0319 19:58:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 19:58:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:58:33.409778  543705 memory.go:184] no items to output this cycle
I0319 19:58:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 19:58:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:58:43.409916  543705 memory.go:191] Add success.
I0319 19:58:43.409944  543705 cpu.go:282] Add success.
I0319 19:58:43.419722  543705 net.go:648] Add success.
I0319 19:58:43.422585  543705 net.go:770] primary dev: ETH0
I0319 19:58:43.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:58:43.422609  543705 net.go:698] Add success.
I0319 19:58:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:58:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:58:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:58:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:58:53.409772  543705 memory.go:184] no items to output this cycle
I0319 19:58:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 19:59:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:59:03.409779  543705 memory.go:184] no items to output this cycle
I0319 19:59:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 19:59:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:59:13.409794  543705 memory.go:191] Add success.
I0319 19:59:13.409799  543705 cpu.go:282] Add success.
W0319 19:59:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 19:59:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 19:59:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 19:59:13.420116  543705 net.go:648] Add success.
I0319 19:59:13.422778  543705 net.go:770] primary dev: ETH0
I0319 19:59:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:59:13.422803  543705 net.go:698] Add success.
I0319 19:59:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 19:59:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 19:59:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 19:59:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 19:59:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 19:59:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 19:59:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 19:59:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:59:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:59:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 19:59:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 19:59:20.723248  543705 disk_info.go:125] begin check local disk info of client
I0319 19:59:20.725715  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 19:59:20.725721  543705 disk_info.go:196] parse disk info done, disk is : [0xc000559900 0xc000559940]
E0319 19:59:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:59:23.409769  543705 memory.go:184] no items to output this cycle
I0319 19:59:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 19:59:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:59:33.409785  543705 memory.go:184] no items to output this cycle
I0319 19:59:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 19:59:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:59:43.409810  543705 memory.go:191] Add success.
I0319 19:59:43.409823  543705 cpu.go:282] Add success.
I0319 19:59:43.420185  543705 net.go:648] Add success.
I0319 19:59:43.423025  543705 net.go:770] primary dev: ETH0
I0319 19:59:43.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0319 19:59:43.423049  543705 net.go:698] Add success.
I0319 19:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 19:59:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 19:59:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 19:59:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 19:59:53.409816  543705 memory.go:184] no items to output this cycle
I0319 19:59:53.409860  543705 cpu.go:275] no items to output this cycle
E0319 20:00:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:00:03.409804  543705 memory.go:184] no items to output this cycle
I0319 20:00:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 20:00:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:00:13.409787  543705 memory.go:191] Add success.
I0319 20:00:13.409809  543705 cpu.go:282] Add success.
W0319 20:00:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:00:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:00:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:00:13.420044  543705 net.go:648] Add success.
I0319 20:00:13.422540  543705 net.go:770] primary dev: ETH0
I0319 20:00:13.422553  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:00:13.422565  543705 net.go:698] Add success.
I0319 20:00:13.463720  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1688084f-3c94-41cc-b108-275873f2156f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:00:13.463753  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:00:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:00:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 20:00:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:00:14.456666  543705 disk_worker.go:494] system disk:vda1
I0319 20:00:14.456700  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:00:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:00:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:00:20.725802  543705 disk_info.go:125] begin check local disk info of client
I0319 20:00:20.728214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:00:20.728219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c82c0 0xc0003c8300]
E0319 20:00:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:00:23.409771  543705 memory.go:184] no items to output this cycle
I0319 20:00:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:00:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:00:33.409782  543705 memory.go:184] no items to output this cycle
I0319 20:00:33.409803  543705 cpu.go:275] no items to output this cycle
I0319 20:00:37.847940  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:00:37.847948  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:00:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:00:43.410642  543705 memory.go:191] Add success.
I0319 20:00:43.409804  543705 cpu.go:282] Add success.
I0319 20:00:43.420561  543705 net.go:648] Add success.
I0319 20:00:43.423179  543705 net.go:770] primary dev: ETH0
I0319 20:00:43.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:00:43.423203  543705 net.go:698] Add success.
I0319 20:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:00:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:00:53.410206  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:00:53.410225  543705 memory.go:184] no items to output this cycle
I0319 20:00:53.410258  543705 cpu.go:275] no items to output this cycle
E0319 20:01:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:01:03.409799  543705 memory.go:184] no items to output this cycle
I0319 20:01:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 20:01:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:01:13.409780  543705 memory.go:191] Add success.
W0319 20:01:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:01:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:01:13.409818  543705 cpu.go:282] Add success.
I0319 20:01:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:01:13.420508  543705 net.go:648] Add success.
I0319 20:01:13.423387  543705 net.go:770] primary dev: ETH0
I0319 20:01:13.423399  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:01:13.423412  543705 net.go:698] Add success.
I0319 20:01:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:01:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:01:14.455140  543705 disk_worker.go:708] disk space is not compliant
W0319 20:01:14.455143  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:01:14.456469  543705 disk_worker.go:494] system disk:vda1
I0319 20:01:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:01:16.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:01:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:01:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:01:16.472432  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:01:20.729285  543705 disk_info.go:125] begin check local disk info of client
I0319 20:01:20.731767  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:01:20.731773  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8680 0xc0003c86c0]
E0319 20:01:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:01:23.409796  543705 memory.go:184] no items to output this cycle
I0319 20:01:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 20:01:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:01:33.409812  543705 memory.go:184] no items to output this cycle
I0319 20:01:33.409827  543705 cpu.go:275] no items to output this cycle
E0319 20:01:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:01:43.409799  543705 memory.go:191] Add success.
I0319 20:01:43.409801  543705 cpu.go:282] Add success.
I0319 20:01:43.419986  543705 net.go:648] Add success.
I0319 20:01:43.423047  543705 net.go:770] primary dev: ETH0
I0319 20:01:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:01:43.423078  543705 net.go:698] Add success.
I0319 20:01:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:01:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:01:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:01:53.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:01:53.409900  543705 memory.go:184] no items to output this cycle
I0319 20:01:53.410052  543705 cpu.go:275] no items to output this cycle
E0319 20:02:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:02:03.409784  543705 memory.go:184] no items to output this cycle
I0319 20:02:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 20:02:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:02:13.409786  543705 memory.go:191] Add success.
W0319 20:02:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 20:02:13.409813  543705 cpu.go:282] Add success.
W0319 20:02:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:02:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:02:13.420256  543705 net.go:648] Add success.
I0319 20:02:13.423079  543705 net.go:770] primary dev: ETH0
I0319 20:02:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:02:13.423108  543705 net.go:698] Add success.
W0319 20:02:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:02:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 20:02:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:02:14.455908  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:02:14.455916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:02:14.455923  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:02:14.456542  543705 disk_worker.go:494] system disk:vda1
I0319 20:02:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:02:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:02:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:02:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:02:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:02:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:02:16.457980  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:02:16.472294  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:02:20.731856  543705 disk_info.go:125] begin check local disk info of client
I0319 20:02:20.734305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:02:20.734311  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396540 0xc000396580]
E0319 20:02:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:02:23.409758  543705 memory.go:184] no items to output this cycle
I0319 20:02:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:02:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:02:33.409790  543705 memory.go:184] no items to output this cycle
I0319 20:02:33.409789  543705 cpu.go:275] no items to output this cycle
E0319 20:02:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:02:43.409795  543705 memory.go:191] Add success.
I0319 20:02:43.409804  543705 cpu.go:282] Add success.
I0319 20:02:43.419843  543705 net.go:648] Add success.
I0319 20:02:43.422808  543705 net.go:770] primary dev: ETH0
I0319 20:02:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:02:43.422833  543705 net.go:698] Add success.
I0319 20:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:02:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:02:53.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:02:53.409896  543705 cpu.go:275] no items to output this cycle
I0319 20:02:53.409899  543705 memory.go:184] no items to output this cycle
E0319 20:03:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:03:03.409763  543705 memory.go:184] no items to output this cycle
I0319 20:03:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 20:03:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:03:13.409815  543705 memory.go:191] Add success.
I0319 20:03:13.409827  543705 cpu.go:282] Add success.
W0319 20:03:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:03:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:03:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:03:13.420119  543705 net.go:648] Add success.
I0319 20:03:13.422994  543705 net.go:770] primary dev: ETH0
I0319 20:03:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:03:13.423019  543705 net.go:698] Add success.
I0319 20:03:13.525713  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6bb4eeb0-9667-45ec-87cc-bc671b0b70fd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:03:13.525745  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:03:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:03:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:03:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 20:03:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:03:14.456503  543705 disk_worker.go:494] system disk:vda1
I0319 20:03:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:03:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:03:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:03:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:03:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:03:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:03:20.734399  543705 disk_info.go:125] begin check local disk info of client
I0319 20:03:20.736904  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:03:20.736910  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b6c0 0xc00007b700]
E0319 20:03:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:03:23.409796  543705 memory.go:184] no items to output this cycle
I0319 20:03:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:03:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:03:33.409780  543705 memory.go:184] no items to output this cycle
I0319 20:03:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 20:03:37.849735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:03:37.849741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:03:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:03:43.410781  543705 memory.go:191] Add success.
I0319 20:03:43.409829  543705 cpu.go:282] Add success.
I0319 20:03:43.420503  543705 net.go:648] Add success.
I0319 20:03:43.423210  543705 net.go:770] primary dev: ETH0
I0319 20:03:43.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:03:43.423235  543705 net.go:698] Add success.
I0319 20:03:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:03:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:03:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:03:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:03:53.409774  543705 memory.go:184] no items to output this cycle
I0319 20:03:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 20:04:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:04:03.409795  543705 memory.go:184] no items to output this cycle
I0319 20:04:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:04:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:04:13.409791  543705 memory.go:191] Add success.
I0319 20:04:13.409800  543705 cpu.go:282] Add success.
W0319 20:04:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:04:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:04:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:04:13.420207  543705 net.go:648] Add success.
I0319 20:04:13.423076  543705 net.go:770] primary dev: ETH0
I0319 20:04:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:04:13.423101  543705 net.go:698] Add success.
I0319 20:04:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:04:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:04:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0319 20:04:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:04:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 20:04:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:04:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:04:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:04:20.737685  543705 disk_info.go:125] begin check local disk info of client
I0319 20:04:20.740215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:04:20.740222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab180 0xc0001ab1c0]
E0319 20:04:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:04:23.409775  543705 memory.go:184] no items to output this cycle
I0319 20:04:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 20:04:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:04:33.409819  543705 memory.go:184] no items to output this cycle
I0319 20:04:33.409835  543705 cpu.go:275] no items to output this cycle
E0319 20:04:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:04:43.409831  543705 memory.go:191] Add success.
I0319 20:04:43.409838  543705 cpu.go:282] Add success.
I0319 20:04:43.419707  543705 net.go:770] primary dev: ETH0
I0319 20:04:43.419722  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:04:43.419736  543705 net.go:698] Add success.
I0319 20:04:43.419966  543705 net.go:648] Add success.
I0319 20:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:04:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:04:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:04:53.409796  543705 cpu.go:275] no items to output this cycle
I0319 20:04:53.409797  543705 memory.go:184] no items to output this cycle
E0319 20:05:03.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:05:03.409916  543705 memory.go:184] no items to output this cycle
I0319 20:05:03.409999  543705 cpu.go:275] no items to output this cycle
E0319 20:05:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:05:13.409801  543705 memory.go:191] Add success.
I0319 20:05:13.409818  543705 cpu.go:282] Add success.
W0319 20:05:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:05:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:05:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:05:13.420105  543705 net.go:648] Add success.
I0319 20:05:13.422610  543705 net.go:770] primary dev: ETH0
I0319 20:05:13.422623  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:05:13.422635  543705 net.go:698] Add success.
I0319 20:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:05:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:05:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 20:05:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:05:14.456773  543705 disk_worker.go:494] system disk:vda1
I0319 20:05:14.456803  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:05:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:05:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:05:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:05:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:05:20.740306  543705 disk_info.go:125] begin check local disk info of client
I0319 20:05:20.742857  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:05:20.742863  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004875c0 0xc000487600]
E0319 20:05:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:05:23.409769  543705 memory.go:184] no items to output this cycle
I0319 20:05:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 20:05:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:05:33.409809  543705 memory.go:184] no items to output this cycle
I0319 20:05:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 20:05:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:05:43.409789  543705 memory.go:191] Add success.
I0319 20:05:43.409816  543705 cpu.go:282] Add success.
I0319 20:05:43.420076  543705 net.go:648] Add success.
I0319 20:05:43.422833  543705 net.go:770] primary dev: ETH0
I0319 20:05:43.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:05:43.422859  543705 net.go:698] Add success.
I0319 20:05:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:05:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:05:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:05:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:05:53.409786  543705 cpu.go:275] no items to output this cycle
I0319 20:05:53.409787  543705 memory.go:184] no items to output this cycle
E0319 20:06:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:06:03.409812  543705 memory.go:184] no items to output this cycle
I0319 20:06:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 20:06:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:06:13.409798  543705 memory.go:191] Add success.
I0319 20:06:13.409817  543705 cpu.go:282] Add success.
W0319 20:06:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:06:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:06:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:06:13.420126  543705 net.go:648] Add success.
I0319 20:06:13.423107  543705 net.go:770] primary dev: ETH0
I0319 20:06:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:06:13.423136  543705 net.go:698] Add success.
I0319 20:06:13.545766  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a23c5f2d-19a5-4ac1-97b6-d5b076013ba9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:06:13.545799  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:06:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:06:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:06:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 20:06:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:06:14.456592  543705 disk_worker.go:494] system disk:vda1
I0319 20:06:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:06:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:06:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:06:20.744362  543705 disk_info.go:125] begin check local disk info of client
I0319 20:06:20.746748  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:06:20.746755  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053a480 0xc00053a4c0]
E0319 20:06:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:06:23.409775  543705 memory.go:184] no items to output this cycle
I0319 20:06:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:06:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:06:33.409822  543705 memory.go:184] no items to output this cycle
I0319 20:06:33.409832  543705 cpu.go:275] no items to output this cycle
I0319 20:06:37.852018  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:06:37.852026  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:06:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:06:43.410714  543705 memory.go:191] Add success.
I0319 20:06:43.409820  543705 cpu.go:282] Add success.
I0319 20:06:43.420440  543705 net.go:648] Add success.
I0319 20:06:43.423189  543705 net.go:770] primary dev: ETH0
I0319 20:06:43.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:06:43.423231  543705 net.go:698] Add success.
I0319 20:06:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:06:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:06:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:06:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:06:53.409828  543705 memory.go:184] no items to output this cycle
I0319 20:06:53.409832  543705 cpu.go:275] no items to output this cycle
E0319 20:07:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:07:03.409781  543705 memory.go:184] no items to output this cycle
I0319 20:07:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 20:07:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:07:13.409828  543705 memory.go:191] Add success.
I0319 20:07:13.409832  543705 cpu.go:282] Add success.
W0319 20:07:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:07:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:07:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:07:13.420209  543705 net.go:648] Add success.
I0319 20:07:13.423330  543705 net.go:770] primary dev: ETH0
I0319 20:07:13.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:07:13.423359  543705 net.go:698] Add success.
I0319 20:07:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0319 20:07:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:07:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 20:07:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:07:14.455906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:07:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:07:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:07:14.456532  543705 disk_worker.go:494] system disk:vda1
I0319 20:07:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:07:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:07:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:07:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:07:16.457965  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:07:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:07:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:07:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:07:20.748379  543705 disk_info.go:125] begin check local disk info of client
I0319 20:07:20.750846  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:07:20.750852  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002870c0 0xc000287100]
E0319 20:07:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:07:23.409771  543705 memory.go:184] no items to output this cycle
I0319 20:07:23.409775  543705 cpu.go:275] no items to output this cycle
E0319 20:07:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:07:33.409776  543705 memory.go:184] no items to output this cycle
I0319 20:07:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:07:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:07:43.409794  543705 memory.go:191] Add success.
I0319 20:07:43.409805  543705 cpu.go:282] Add success.
I0319 20:07:43.419971  543705 net.go:648] Add success.
I0319 20:07:43.422890  543705 net.go:770] primary dev: ETH0
I0319 20:07:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:07:43.422915  543705 net.go:698] Add success.
I0319 20:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:07:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:07:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:07:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:07:53.409802  543705 memory.go:184] no items to output this cycle
I0319 20:07:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 20:08:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:08:03.409774  543705 memory.go:184] no items to output this cycle
I0319 20:08:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 20:08:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:08:13.409816  543705 memory.go:191] Add success.
I0319 20:08:13.409826  543705 cpu.go:282] Add success.
W0319 20:08:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:08:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:08:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:08:13.419727  543705 net.go:648] Add success.
I0319 20:08:13.422757  543705 net.go:770] primary dev: ETH0
I0319 20:08:13.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:08:13.422781  543705 net.go:698] Add success.
I0319 20:08:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:08:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:08:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 20:08:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:08:14.456564  543705 disk_worker.go:494] system disk:vda1
I0319 20:08:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:08:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:08:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:08:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:08:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:08:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:08:20.752393  543705 disk_info.go:125] begin check local disk info of client
I0319 20:08:20.754881  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:08:20.754888  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029c140 0xc00029c180]
E0319 20:08:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:08:23.409783  543705 memory.go:184] no items to output this cycle
I0319 20:08:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:08:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:08:33.409790  543705 memory.go:184] no items to output this cycle
I0319 20:08:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 20:08:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:08:43.409799  543705 memory.go:191] Add success.
I0319 20:08:43.409804  543705 cpu.go:282] Add success.
I0319 20:08:43.420075  543705 net.go:648] Add success.
I0319 20:08:43.423082  543705 net.go:770] primary dev: ETH0
I0319 20:08:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:08:43.423111  543705 net.go:698] Add success.
I0319 20:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:08:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:08:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:08:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:08:53.409788  543705 memory.go:184] no items to output this cycle
I0319 20:08:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 20:09:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:09:03.409798  543705 memory.go:184] no items to output this cycle
I0319 20:09:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 20:09:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:09:13.409824  543705 memory.go:191] Add success.
I0319 20:09:13.409843  543705 cpu.go:282] Add success.
W0319 20:09:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:09:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:09:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:09:13.420429  543705 net.go:648] Add success.
I0319 20:09:13.423256  543705 net.go:770] primary dev: ETH0
I0319 20:09:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:09:13.423281  543705 net.go:698] Add success.
I0319 20:09:13.467598  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"28cdf3fe-8784-45d0-b255-a99399018a14","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:09:13.467629  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:09:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:09:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:09:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 20:09:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:09:14.456661  543705 disk_worker.go:494] system disk:vda1
I0319 20:09:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:09:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:09:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:09:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:09:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:09:20.756420  543705 disk_info.go:125] begin check local disk info of client
I0319 20:09:20.758955  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:09:20.758962  543705 disk_info.go:196] parse disk info done, disk is : [0xc000594840 0xc0005948c0]
E0319 20:09:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:09:23.409773  543705 memory.go:184] no items to output this cycle
I0319 20:09:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 20:09:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:09:33.409792  543705 memory.go:184] no items to output this cycle
I0319 20:09:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 20:09:37.853725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:09:37.853733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:09:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:09:43.410642  543705 memory.go:191] Add success.
I0319 20:09:43.409808  543705 cpu.go:282] Add success.
I0319 20:09:43.420406  543705 net.go:648] Add success.
I0319 20:09:43.423055  543705 net.go:770] primary dev: ETH0
I0319 20:09:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:09:43.423081  543705 net.go:698] Add success.
I0319 20:09:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:09:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:09:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:09:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:09:53.409797  543705 memory.go:184] no items to output this cycle
I0319 20:09:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:10:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:10:03.409801  543705 memory.go:184] no items to output this cycle
I0319 20:10:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:10:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:10:13.409792  543705 memory.go:191] Add success.
I0319 20:10:13.409792  543705 cpu.go:282] Add success.
W0319 20:10:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:10:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:10:13.420405  543705 net.go:648] Add success.
I0319 20:10:13.423066  543705 net.go:770] primary dev: ETH0
I0319 20:10:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:10:13.423100  543705 net.go:698] Add success.
I0319 20:10:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:10:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:10:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 20:10:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:10:14.456528  543705 disk_worker.go:494] system disk:vda1
I0319 20:10:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:10:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:10:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:10:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:10:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:10:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:10:20.760460  543705 disk_info.go:125] begin check local disk info of client
I0319 20:10:20.762925  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:10:20.762932  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367ac0 0xc000367b00]
E0319 20:10:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:10:23.409762  543705 memory.go:184] no items to output this cycle
I0319 20:10:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:10:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:10:33.409772  543705 memory.go:184] no items to output this cycle
I0319 20:10:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 20:10:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:10:43.409819  543705 memory.go:191] Add success.
I0319 20:10:43.409829  543705 cpu.go:282] Add success.
I0319 20:10:43.419897  543705 net.go:648] Add success.
I0319 20:10:43.422777  543705 net.go:770] primary dev: ETH0
I0319 20:10:43.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:10:43.422806  543705 net.go:698] Add success.
I0319 20:10:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:10:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:10:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:10:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:10:53.409809  543705 memory.go:184] no items to output this cycle
I0319 20:10:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 20:11:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:11:03.409779  543705 memory.go:184] no items to output this cycle
I0319 20:11:03.409784  543705 cpu.go:275] no items to output this cycle
E0319 20:11:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:11:13.409778  543705 memory.go:191] Add success.
W0319 20:11:13.409988  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 20:11:13.409994  543705 cpu.go:282] Add success.
W0319 20:11:13.410002  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:11:13.410005  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:11:13.419731  543705 net.go:648] Add success.
I0319 20:11:13.422651  543705 net.go:770] primary dev: ETH0
I0319 20:11:13.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:11:13.422679  543705 net.go:698] Add success.
I0319 20:11:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:11:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:11:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0319 20:11:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:11:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 20:11:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:11:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:11:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:11:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:11:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:11:16.472457  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:11:20.763018  543705 disk_info.go:125] begin check local disk info of client
I0319 20:11:20.765523  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:11:20.765530  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c1ac0 0xc0004c1b00]
E0319 20:11:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:11:23.409777  543705 cpu.go:275] no items to output this cycle
I0319 20:11:23.409787  543705 memory.go:184] no items to output this cycle
E0319 20:11:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:11:33.409775  543705 memory.go:184] no items to output this cycle
I0319 20:11:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 20:11:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:11:43.409805  543705 memory.go:191] Add success.
I0319 20:11:43.409814  543705 cpu.go:282] Add success.
I0319 20:11:43.419994  543705 net.go:648] Add success.
I0319 20:11:43.422667  543705 net.go:770] primary dev: ETH0
I0319 20:11:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:11:43.422692  543705 net.go:698] Add success.
I0319 20:11:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:11:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:11:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:11:53.409805  543705 memory.go:184] no items to output this cycle
I0319 20:11:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:12:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:12:03.409783  543705 cpu.go:275] no items to output this cycle
I0319 20:12:03.409792  543705 memory.go:184] no items to output this cycle
E0319 20:12:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:12:13.409890  543705 cpu.go:282] Add success.
I0319 20:12:13.409907  543705 memory.go:191] Add success.
W0319 20:12:13.409936  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:12:13.409958  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:12:13.409963  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:12:13.419729  543705 net.go:648] Add success.
I0319 20:12:13.422520  543705 net.go:770] primary dev: ETH0
I0319 20:12:13.422532  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:12:13.422546  543705 net.go:698] Add success.
I0319 20:12:13.469063  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"43d8c194-ead7-46bd-ad49-c7819c8fa946","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:12:13.469091  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 20:12:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:12:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 20:12:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:12:14.455998  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:12:14.456007  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:12:14.456012  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:12:14.456447  543705 disk_worker.go:494] system disk:vda1
I0319 20:12:14.456475  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:12:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:12:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:12:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:12:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:12:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:12:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:12:16.472330  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:12:20.765674  543705 disk_info.go:125] begin check local disk info of client
I0319 20:12:20.768081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:12:20.768087  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1400 0xc0003b1440]
E0319 20:12:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:12:23.409791  543705 memory.go:184] no items to output this cycle
I0319 20:12:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:12:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:12:33.409778  543705 memory.go:184] no items to output this cycle
I0319 20:12:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 20:12:37.855970  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:12:37.855977  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:12:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:12:43.410655  543705 memory.go:191] Add success.
I0319 20:12:43.409825  543705 cpu.go:282] Add success.
I0319 20:12:43.420362  543705 net.go:648] Add success.
I0319 20:12:43.422931  543705 net.go:770] primary dev: ETH0
I0319 20:12:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:12:43.422966  543705 net.go:698] Add success.
I0319 20:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:12:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:12:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:12:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:12:53.409789  543705 memory.go:184] no items to output this cycle
I0319 20:12:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 20:13:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:13:03.409799  543705 memory.go:184] no items to output this cycle
I0319 20:13:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 20:13:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:13:13.409820  543705 memory.go:191] Add success.
I0319 20:13:13.409828  543705 cpu.go:282] Add success.
W0319 20:13:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:13:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:13:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:13:13.420145  543705 net.go:648] Add success.
I0319 20:13:13.423130  543705 net.go:770] primary dev: ETH0
I0319 20:13:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:13:13.423159  543705 net.go:698] Add success.
I0319 20:13:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:13:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:13:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0319 20:13:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:13:14.456637  543705 disk_worker.go:494] system disk:vda1
I0319 20:13:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:13:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:13:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:13:20.768175  543705 disk_info.go:125] begin check local disk info of client
I0319 20:13:20.770677  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:13:20.770683  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abcc0 0xc0001abd00]
E0319 20:13:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:13:23.409799  543705 memory.go:184] no items to output this cycle
I0319 20:13:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 20:13:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:13:33.409786  543705 memory.go:184] no items to output this cycle
I0319 20:13:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:13:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:13:43.409814  543705 memory.go:191] Add success.
I0319 20:13:43.409826  543705 cpu.go:282] Add success.
I0319 20:13:43.419916  543705 net.go:648] Add success.
I0319 20:13:43.422875  543705 net.go:770] primary dev: ETH0
I0319 20:13:43.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:13:43.422904  543705 net.go:698] Add success.
I0319 20:13:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:13:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:13:53.409777  543705 cpu.go:275] no items to output this cycle
I0319 20:13:53.409781  543705 memory.go:184] no items to output this cycle
E0319 20:14:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:14:03.409778  543705 memory.go:184] no items to output this cycle
I0319 20:14:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:14:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:14:13.409790  543705 memory.go:191] Add success.
I0319 20:14:13.409798  543705 cpu.go:282] Add success.
W0319 20:14:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:14:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:14:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:14:13.420222  543705 net.go:648] Add success.
I0319 20:14:13.423118  543705 net.go:770] primary dev: ETH0
I0319 20:14:13.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:14:13.423144  543705 net.go:698] Add success.
I0319 20:14:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:14:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:14:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 20:14:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:14:14.456518  543705 disk_worker.go:494] system disk:vda1
I0319 20:14:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:14:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:14:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:14:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:14:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:14:20.772500  543705 disk_info.go:125] begin check local disk info of client
I0319 20:14:20.774994  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:14:20.775002  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307bc0 0xc000307c00]
E0319 20:14:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:14:23.409757  543705 memory.go:184] no items to output this cycle
I0319 20:14:23.409791  543705 cpu.go:275] no items to output this cycle
E0319 20:14:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:14:33.409804  543705 memory.go:184] no items to output this cycle
I0319 20:14:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 20:14:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:14:43.409780  543705 memory.go:191] Add success.
I0319 20:14:43.409816  543705 cpu.go:282] Add success.
I0319 20:14:43.420006  543705 net.go:648] Add success.
I0319 20:14:43.423264  543705 net.go:770] primary dev: ETH0
I0319 20:14:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:14:43.423290  543705 net.go:698] Add success.
I0319 20:14:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:14:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:14:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:14:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:14:53.409815  543705 memory.go:184] no items to output this cycle
I0319 20:14:53.409819  543705 cpu.go:275] no items to output this cycle
E0319 20:15:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:15:03.409784  543705 memory.go:184] no items to output this cycle
I0319 20:15:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 20:15:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:15:13.409795  543705 memory.go:191] Add success.
I0319 20:15:13.409795  543705 cpu.go:282] Add success.
W0319 20:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:15:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:15:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:15:13.420165  543705 net.go:648] Add success.
I0319 20:15:13.422795  543705 net.go:770] primary dev: ETH0
I0319 20:15:13.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:15:13.422825  543705 net.go:698] Add success.
I0319 20:15:13.499598  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"11f8b70a-6038-4def-bd24-18a2c32f4198","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:15:13.499632  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:15:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:15:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:15:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 20:15:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:15:14.456673  543705 disk_worker.go:494] system disk:vda1
I0319 20:15:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:15:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:15:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:15:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:15:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:15:20.775084  543705 disk_info.go:125] begin check local disk info of client
I0319 20:15:20.777587  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:15:20.777593  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bec0 0xc00007bf00]
E0319 20:15:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:15:23.409768  543705 memory.go:184] no items to output this cycle
I0319 20:15:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 20:15:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:15:33.409774  543705 memory.go:184] no items to output this cycle
I0319 20:15:33.409798  543705 cpu.go:275] no items to output this cycle
I0319 20:15:37.857739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:15:37.857745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:15:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:15:43.410588  543705 memory.go:191] Add success.
I0319 20:15:43.409801  543705 cpu.go:282] Add success.
I0319 20:15:43.420299  543705 net.go:648] Add success.
I0319 20:15:43.422826  543705 net.go:770] primary dev: ETH0
I0319 20:15:43.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:15:43.422856  543705 net.go:698] Add success.
I0319 20:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:15:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:15:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:15:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:15:53.409896  543705 memory.go:184] no items to output this cycle
I0319 20:15:53.409925  543705 cpu.go:275] no items to output this cycle
E0319 20:16:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:16:03.409803  543705 memory.go:184] no items to output this cycle
I0319 20:16:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 20:16:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:16:13.409789  543705 memory.go:191] Add success.
W0319 20:16:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 20:16:13.409819  543705 cpu.go:282] Add success.
W0319 20:16:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:16:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:16:13.420174  543705 net.go:648] Add success.
I0319 20:16:13.422818  543705 net.go:770] primary dev: ETH0
I0319 20:16:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:16:13.422843  543705 net.go:698] Add success.
I0319 20:16:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:16:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:16:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0319 20:16:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:16:14.456561  543705 disk_worker.go:494] system disk:vda1
I0319 20:16:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:16:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:16:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:16:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:16:20.777685  543705 disk_info.go:125] begin check local disk info of client
I0319 20:16:20.780148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:16:20.780154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b580 0xc00007b5c0]
E0319 20:16:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:16:23.409800  543705 memory.go:184] no items to output this cycle
I0319 20:16:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 20:16:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:16:33.409789  543705 memory.go:184] no items to output this cycle
I0319 20:16:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 20:16:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:16:43.409793  543705 memory.go:191] Add success.
I0319 20:16:43.409810  543705 cpu.go:282] Add success.
I0319 20:16:43.420073  543705 net.go:648] Add success.
I0319 20:16:43.422721  543705 net.go:770] primary dev: ETH0
I0319 20:16:43.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:16:43.422745  543705 net.go:698] Add success.
I0319 20:16:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:16:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:16:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:16:53.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:16:53.409832  543705 memory.go:184] no items to output this cycle
I0319 20:16:53.409842  543705 cpu.go:275] no items to output this cycle
E0319 20:17:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:17:03.409778  543705 memory.go:184] no items to output this cycle
I0319 20:17:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:17:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:17:13.409819  543705 memory.go:191] Add success.
I0319 20:17:13.409824  543705 cpu.go:282] Add success.
W0319 20:17:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:17:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:17:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:17:13.420089  543705 net.go:648] Add success.
I0319 20:17:13.422965  543705 net.go:770] primary dev: ETH0
I0319 20:17:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:17:13.422994  543705 net.go:698] Add success.
I0319 20:17:13.453669  543705 event_worker.go:152] Polling the log file for events...
W0319 20:17:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:17:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 20:17:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:17:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:17:14.456937  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:17:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:17:14.456986  543705 disk_worker.go:494] system disk:vda1
I0319 20:17:14.457017  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:17:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:17:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:17:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:17:16.457990  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:17:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:17:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:17:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:17:20.781538  543705 disk_info.go:125] begin check local disk info of client
I0319 20:17:20.783972  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:17:20.783979  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364280 0xc0003642c0]
E0319 20:17:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:17:23.409792  543705 memory.go:184] no items to output this cycle
I0319 20:17:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 20:17:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:17:33.409794  543705 memory.go:184] no items to output this cycle
I0319 20:17:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 20:17:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:17:43.409806  543705 memory.go:191] Add success.
I0319 20:17:43.409806  543705 cpu.go:282] Add success.
I0319 20:17:43.420033  543705 net.go:648] Add success.
I0319 20:17:43.422925  543705 net.go:770] primary dev: ETH0
I0319 20:17:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:17:43.422949  543705 net.go:698] Add success.
I0319 20:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:17:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:17:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:17:53.410268  543705 memory.go:184] no items to output this cycle
I0319 20:17:53.410281  543705 cpu.go:275] no items to output this cycle
E0319 20:18:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:18:03.409777  543705 memory.go:184] no items to output this cycle
I0319 20:18:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:18:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:18:13.409810  543705 memory.go:191] Add success.
I0319 20:18:13.409819  543705 cpu.go:282] Add success.
W0319 20:18:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:18:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:18:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:18:13.420138  543705 net.go:648] Add success.
I0319 20:18:13.422866  543705 net.go:770] primary dev: ETH0
I0319 20:18:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:18:13.422893  543705 net.go:698] Add success.
I0319 20:18:13.543210  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"feeac589-25f5-452e-a117-f97809e3f46f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:18:13.543245  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:18:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:18:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:18:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0319 20:18:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:18:14.456663  543705 disk_worker.go:494] system disk:vda1
I0319 20:18:14.456692  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:18:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:18:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:18:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:18:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:18:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:18:20.784063  543705 disk_info.go:125] begin check local disk info of client
I0319 20:18:20.786575  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:18:20.786582  543705 disk_info.go:196] parse disk info done, disk is : [0xc000397940 0xc000397980]
E0319 20:18:23.410100  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:18:23.410121  543705 memory.go:184] no items to output this cycle
I0319 20:18:23.410134  543705 cpu.go:275] no items to output this cycle
E0319 20:18:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:18:33.409794  543705 memory.go:184] no items to output this cycle
I0319 20:18:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 20:18:37.859993  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:18:37.860000  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:18:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:18:43.410775  543705 memory.go:191] Add success.
I0319 20:18:43.409826  543705 cpu.go:282] Add success.
I0319 20:18:43.420891  543705 net.go:648] Add success.
I0319 20:18:43.423924  543705 net.go:770] primary dev: ETH0
I0319 20:18:43.423938  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:18:43.423950  543705 net.go:698] Add success.
I0319 20:18:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:18:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:18:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:18:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:18:53.409786  543705 memory.go:184] no items to output this cycle
I0319 20:18:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 20:19:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:19:03.409783  543705 cpu.go:275] no items to output this cycle
I0319 20:19:03.409791  543705 memory.go:184] no items to output this cycle
E0319 20:19:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:19:13.409814  543705 memory.go:191] Add success.
I0319 20:19:13.409819  543705 cpu.go:282] Add success.
W0319 20:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:19:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:19:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:19:13.420217  543705 net.go:648] Add success.
I0319 20:19:13.422980  543705 net.go:770] primary dev: ETH0
I0319 20:19:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:19:13.423007  543705 net.go:698] Add success.
I0319 20:19:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:19:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:19:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 20:19:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:19:14.456502  543705 disk_worker.go:494] system disk:vda1
I0319 20:19:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:19:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:19:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:19:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:19:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:19:20.787570  543705 disk_info.go:125] begin check local disk info of client
I0319 20:19:20.790031  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:19:20.790037  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047d700 0xc00047d740]
E0319 20:19:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:19:23.409767  543705 memory.go:184] no items to output this cycle
I0319 20:19:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 20:19:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:19:33.409812  543705 memory.go:184] no items to output this cycle
I0319 20:19:33.409823  543705 cpu.go:275] no items to output this cycle
E0319 20:19:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:19:43.409788  543705 memory.go:191] Add success.
I0319 20:19:43.409811  543705 cpu.go:282] Add success.
I0319 20:19:43.419956  543705 net.go:648] Add success.
I0319 20:19:43.422529  543705 net.go:770] primary dev: ETH0
I0319 20:19:43.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:19:43.422555  543705 net.go:698] Add success.
I0319 20:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:19:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:19:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:19:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:19:53.409767  543705 memory.go:184] no items to output this cycle
I0319 20:19:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 20:20:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:20:03.409780  543705 memory.go:184] no items to output this cycle
I0319 20:20:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 20:20:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:20:13.409811  543705 memory.go:191] Add success.
I0319 20:20:13.409818  543705 cpu.go:282] Add success.
W0319 20:20:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:20:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:20:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:20:13.420163  543705 net.go:648] Add success.
I0319 20:20:13.423292  543705 net.go:770] primary dev: ETH0
I0319 20:20:13.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:20:13.423318  543705 net.go:698] Add success.
I0319 20:20:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:20:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:20:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 20:20:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:20:14.456502  543705 disk_worker.go:494] system disk:vda1
I0319 20:20:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:20:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:20:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:20:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:20:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:20:20.790120  543705 disk_info.go:125] begin check local disk info of client
I0319 20:20:20.792826  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:20:20.792834  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343c80 0xc000343cc0]
E0319 20:20:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:20:23.409764  543705 memory.go:184] no items to output this cycle
I0319 20:20:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 20:20:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:20:33.409925  543705 cpu.go:275] no items to output this cycle
I0319 20:20:33.409952  543705 memory.go:184] no items to output this cycle
E0319 20:20:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:20:43.409811  543705 memory.go:191] Add success.
I0319 20:20:43.409825  543705 cpu.go:282] Add success.
I0319 20:20:43.419979  543705 net.go:648] Add success.
I0319 20:20:43.422656  543705 net.go:770] primary dev: ETH0
I0319 20:20:43.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:20:43.422681  543705 net.go:698] Add success.
I0319 20:20:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:20:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:20:53.409798  543705 memory.go:184] no items to output this cycle
I0319 20:20:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 20:21:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:21:03.409800  543705 memory.go:184] no items to output this cycle
I0319 20:21:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 20:21:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:21:13.409777  543705 memory.go:191] Add success.
W0319 20:21:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 20:21:13.409809  543705 cpu.go:282] Add success.
W0319 20:21:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:21:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:21:13.420337  543705 net.go:648] Add success.
I0319 20:21:13.423279  543705 net.go:770] primary dev: ETH0
I0319 20:21:13.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:21:13.423306  543705 net.go:698] Add success.
I0319 20:21:13.572278  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dbf7184a-6820-4adc-8e39-c79c67f61050","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:21:13.572313  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:21:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:21:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:21:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0319 20:21:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:21:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 20:21:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:21:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:21:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:21:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:21:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:21:20.793675  543705 disk_info.go:125] begin check local disk info of client
I0319 20:21:20.796117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:21:20.796123  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a2c0 0xc00027a300]
E0319 20:21:23.410362  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:21:23.410382  543705 memory.go:184] no items to output this cycle
I0319 20:21:23.410393  543705 cpu.go:275] no items to output this cycle
E0319 20:21:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:21:33.409810  543705 memory.go:184] no items to output this cycle
I0319 20:21:33.409821  543705 cpu.go:275] no items to output this cycle
I0319 20:21:37.861738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:21:37.861745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:21:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:21:43.410605  543705 memory.go:191] Add success.
I0319 20:21:43.409802  543705 cpu.go:282] Add success.
I0319 20:21:43.420319  543705 net.go:648] Add success.
I0319 20:21:43.423227  543705 net.go:770] primary dev: ETH0
I0319 20:21:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:21:43.423265  543705 net.go:698] Add success.
I0319 20:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:21:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:21:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:21:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:21:53.409782  543705 memory.go:184] no items to output this cycle
I0319 20:21:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 20:22:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:22:03.409781  543705 memory.go:184] no items to output this cycle
I0319 20:22:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 20:22:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:22:13.409807  543705 memory.go:191] Add success.
I0319 20:22:13.409818  543705 cpu.go:282] Add success.
W0319 20:22:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:22:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:22:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:22:13.420077  543705 net.go:648] Add success.
I0319 20:22:13.422917  543705 net.go:770] primary dev: ETH0
I0319 20:22:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:22:13.422946  543705 net.go:698] Add success.
W0319 20:22:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:22:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 20:22:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:22:14.455869  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:22:14.455877  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:22:14.455883  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:22:14.456542  543705 disk_worker.go:494] system disk:vda1
I0319 20:22:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:22:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:22:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:22:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:22:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:22:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:22:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:22:16.472334  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:22:20.796206  543705 disk_info.go:125] begin check local disk info of client
I0319 20:22:20.798659  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:22:20.798665  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460a80 0xc000460ac0]
E0319 20:22:23.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:22:23.409916  543705 cpu.go:275] no items to output this cycle
I0319 20:22:23.409931  543705 memory.go:184] no items to output this cycle
E0319 20:22:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:22:33.409784  543705 memory.go:184] no items to output this cycle
I0319 20:22:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 20:22:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:22:43.409790  543705 memory.go:191] Add success.
I0319 20:22:43.409808  543705 cpu.go:282] Add success.
I0319 20:22:43.420040  543705 net.go:648] Add success.
I0319 20:22:43.422834  543705 net.go:770] primary dev: ETH0
I0319 20:22:43.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:22:43.422860  543705 net.go:698] Add success.
I0319 20:22:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:22:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:22:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:22:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:22:53.409790  543705 memory.go:184] no items to output this cycle
I0319 20:22:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 20:23:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:23:03.409771  543705 memory.go:184] no items to output this cycle
I0319 20:23:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 20:23:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:23:13.409813  543705 memory.go:191] Add success.
I0319 20:23:13.409821  543705 cpu.go:282] Add success.
W0319 20:23:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:23:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:23:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:23:13.420259  543705 net.go:648] Add success.
I0319 20:23:13.423066  543705 net.go:770] primary dev: ETH0
I0319 20:23:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:23:13.423091  543705 net.go:698] Add success.
I0319 20:23:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:23:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:23:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 20:23:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:23:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 20:23:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:23:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:23:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:23:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:23:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:23:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:23:20.798753  543705 disk_info.go:125] begin check local disk info of client
I0319 20:23:20.801281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:23:20.801287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa100 0xc0001aa140]
E0319 20:23:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:23:23.409804  543705 memory.go:184] no items to output this cycle
I0319 20:23:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 20:23:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:23:33.409780  543705 memory.go:184] no items to output this cycle
I0319 20:23:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:23:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:23:43.409818  543705 memory.go:191] Add success.
I0319 20:23:43.409822  543705 cpu.go:282] Add success.
I0319 20:23:43.419921  543705 net.go:648] Add success.
I0319 20:23:43.422558  543705 net.go:770] primary dev: ETH0
I0319 20:23:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:23:43.422587  543705 net.go:698] Add success.
I0319 20:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:23:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:23:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:23:53.409804  543705 memory.go:184] no items to output this cycle
I0319 20:23:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 20:24:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:24:03.409801  543705 memory.go:184] no items to output this cycle
I0319 20:24:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 20:24:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:24:13.409787  543705 memory.go:191] Add success.
I0319 20:24:13.409790  543705 cpu.go:282] Add success.
W0319 20:24:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:24:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:24:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:24:13.420062  543705 net.go:648] Add success.
I0319 20:24:13.423075  543705 net.go:770] primary dev: ETH0
I0319 20:24:13.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:24:13.423100  543705 net.go:698] Add success.
I0319 20:24:13.633015  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"175d99fd-f1b9-4b7e-8232-e39e35be610b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:24:13.633051  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:24:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:24:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:24:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0319 20:24:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:24:14.456497  543705 disk_worker.go:494] system disk:vda1
I0319 20:24:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:24:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:24:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:24:20.801682  543705 disk_info.go:125] begin check local disk info of client
I0319 20:24:20.804080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:24:20.804087  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c3c0 0xc00039c400]
E0319 20:24:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:24:23.409777  543705 memory.go:184] no items to output this cycle
I0319 20:24:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 20:24:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:24:33.409784  543705 memory.go:184] no items to output this cycle
I0319 20:24:33.409798  543705 cpu.go:275] no items to output this cycle
I0319 20:24:37.864013  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:24:37.864020  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:24:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:24:43.410644  543705 memory.go:191] Add success.
I0319 20:24:43.409791  543705 cpu.go:282] Add success.
I0319 20:24:43.420350  543705 net.go:648] Add success.
I0319 20:24:43.423010  543705 net.go:770] primary dev: ETH0
I0319 20:24:43.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:24:43.423041  543705 net.go:698] Add success.
I0319 20:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:24:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:24:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:24:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:24:53.409812  543705 memory.go:184] no items to output this cycle
I0319 20:24:53.409826  543705 cpu.go:275] no items to output this cycle
E0319 20:25:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:25:03.409772  543705 memory.go:184] no items to output this cycle
I0319 20:25:03.409807  543705 cpu.go:275] no items to output this cycle
W0319 20:25:13.409723  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:25:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:25:13.409746  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:25:13.409812  543705 cpu.go:282] Add success.
E0319 20:25:13.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:25:13.409846  543705 memory.go:191] Add success.
I0319 20:25:13.420252  543705 net.go:648] Add success.
I0319 20:25:13.423036  543705 net.go:770] primary dev: ETH0
I0319 20:25:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:25:13.423065  543705 net.go:698] Add success.
I0319 20:25:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:25:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:25:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0319 20:25:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:25:14.456872  543705 disk_worker.go:494] system disk:vda1
I0319 20:25:14.456906  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:25:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:25:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:25:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:25:16.472451  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:25:20.805677  543705 disk_info.go:125] begin check local disk info of client
I0319 20:25:20.808097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:25:20.808105  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c140 0xc00034c180]
E0319 20:25:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:25:23.409780  543705 memory.go:184] no items to output this cycle
I0319 20:25:23.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:25:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:25:33.409793  543705 memory.go:184] no items to output this cycle
I0319 20:25:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 20:25:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:25:43.409793  543705 memory.go:191] Add success.
I0319 20:25:43.409816  543705 cpu.go:282] Add success.
I0319 20:25:43.419898  543705 net.go:648] Add success.
I0319 20:25:43.422567  543705 net.go:770] primary dev: ETH0
I0319 20:25:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:25:43.422593  543705 net.go:698] Add success.
I0319 20:25:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:25:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:25:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:25:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:25:53.409777  543705 memory.go:184] no items to output this cycle
I0319 20:25:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:26:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:26:03.409816  543705 memory.go:184] no items to output this cycle
I0319 20:26:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 20:26:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:26:13.409789  543705 memory.go:191] Add success.
I0319 20:26:13.409814  543705 cpu.go:282] Add success.
W0319 20:26:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:26:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:26:13.420284  543705 net.go:648] Add success.
I0319 20:26:13.422883  543705 net.go:770] primary dev: ETH0
I0319 20:26:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:26:13.422907  543705 net.go:698] Add success.
I0319 20:26:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:26:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:26:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0319 20:26:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:26:14.456463  543705 disk_worker.go:494] system disk:vda1
I0319 20:26:14.456509  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:26:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:26:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:26:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:26:20.809675  543705 disk_info.go:125] begin check local disk info of client
I0319 20:26:20.812156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:26:20.812163  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004da100 0xc0004da140]
E0319 20:26:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:26:23.409807  543705 memory.go:184] no items to output this cycle
I0319 20:26:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 20:26:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:26:33.409792  543705 memory.go:184] no items to output this cycle
I0319 20:26:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 20:26:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:26:43.409822  543705 memory.go:191] Add success.
I0319 20:26:43.409835  543705 cpu.go:282] Add success.
I0319 20:26:43.419892  543705 net.go:648] Add success.
I0319 20:26:43.422488  543705 net.go:770] primary dev: ETH0
I0319 20:26:43.422500  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:26:43.422513  543705 net.go:698] Add success.
I0319 20:26:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:26:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:26:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:26:53.409793  543705 memory.go:184] no items to output this cycle
I0319 20:26:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 20:27:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:27:03.409805  543705 memory.go:184] no items to output this cycle
I0319 20:27:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 20:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:27:13.409812  543705 memory.go:191] Add success.
I0319 20:27:13.409820  543705 cpu.go:282] Add success.
W0319 20:27:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:27:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:27:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:27:13.420106  543705 net.go:648] Add success.
I0319 20:27:13.430024  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 20:27:13.430116  543705 net.go:770] primary dev: ETH0
I0319 20:27:13.430133  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:27:13.430147  543705 net.go:698] Add success.
I0319 20:27:13.453662  543705 event_worker.go:152] Polling the log file for events...
I0319 20:27:13.469378  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4011fdef-1b16-4948-b9c1-da30e5883f5e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:27:13.469409  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 20:27:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:27:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 20:27:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:27:14.455940  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:27:14.455949  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:27:14.455955  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:27:14.456535  543705 disk_worker.go:494] system disk:vda1
I0319 20:27:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:27:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:27:15.456800  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:27:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:27:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:27:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:27:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:27:16.472335  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:27:20.813674  543705 disk_info.go:125] begin check local disk info of client
I0319 20:27:20.816155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:27:20.816162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e640 0xc00034e680]
E0319 20:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:27:23.409796  543705 memory.go:184] no items to output this cycle
I0319 20:27:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 20:27:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:27:33.409775  543705 memory.go:184] no items to output this cycle
I0319 20:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0319 20:27:37.865744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:27:37.865751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:27:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:27:43.410967  543705 memory.go:191] Add success.
I0319 20:27:43.409825  543705 cpu.go:282] Add success.
I0319 20:27:43.420689  543705 net.go:648] Add success.
I0319 20:27:43.423507  543705 net.go:770] primary dev: ETH0
I0319 20:27:43.423523  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:27:43.423536  543705 net.go:698] Add success.
I0319 20:27:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:27:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:27:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:27:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:27:53.409804  543705 memory.go:184] no items to output this cycle
I0319 20:27:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 20:28:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:28:03.409787  543705 memory.go:184] no items to output this cycle
I0319 20:28:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 20:28:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:28:13.409811  543705 memory.go:191] Add success.
I0319 20:28:13.409822  543705 cpu.go:282] Add success.
W0319 20:28:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:28:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:28:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:28:13.420378  543705 net.go:648] Add success.
I0319 20:28:13.423045  543705 net.go:770] primary dev: ETH0
I0319 20:28:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:28:13.423069  543705 net.go:698] Add success.
I0319 20:28:14.453952  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:28:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:28:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0319 20:28:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:28:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 20:28:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:28:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:28:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:28:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:28:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:28:20.817676  543705 disk_info.go:125] begin check local disk info of client
I0319 20:28:20.820186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:28:20.820193  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049af40 0xc00049af80]
E0319 20:28:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:28:23.409767  543705 memory.go:184] no items to output this cycle
I0319 20:28:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 20:28:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:28:33.409777  543705 memory.go:184] no items to output this cycle
I0319 20:28:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:28:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:28:43.409784  543705 memory.go:191] Add success.
I0319 20:28:43.409815  543705 cpu.go:282] Add success.
I0319 20:28:43.419884  543705 net.go:648] Add success.
I0319 20:28:43.422720  543705 net.go:770] primary dev: ETH0
I0319 20:28:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:28:43.422757  543705 net.go:698] Add success.
I0319 20:28:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:28:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:28:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:28:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:28:53.409799  543705 cpu.go:275] no items to output this cycle
I0319 20:28:53.409803  543705 memory.go:184] no items to output this cycle
E0319 20:29:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:29:03.409796  543705 memory.go:184] no items to output this cycle
I0319 20:29:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 20:29:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:29:13.409784  543705 memory.go:191] Add success.
I0319 20:29:13.409805  543705 cpu.go:282] Add success.
W0319 20:29:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:29:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:29:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:29:13.420297  543705 net.go:648] Add success.
I0319 20:29:13.423142  543705 net.go:770] primary dev: ETH0
I0319 20:29:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:29:13.423170  543705 net.go:698] Add success.
I0319 20:29:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:29:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:29:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 20:29:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:29:14.456623  543705 disk_worker.go:494] system disk:vda1
I0319 20:29:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:29:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:29:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:29:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:29:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:29:16.472465  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:29:20.821673  543705 disk_info.go:125] begin check local disk info of client
I0319 20:29:20.824189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:29:20.824196  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ef80 0xc00035efc0]
E0319 20:29:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:29:23.409790  543705 memory.go:184] no items to output this cycle
I0319 20:29:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 20:29:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:29:33.409786  543705 memory.go:184] no items to output this cycle
I0319 20:29:33.409789  543705 cpu.go:275] no items to output this cycle
E0319 20:29:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:29:43.409780  543705 memory.go:191] Add success.
I0319 20:29:43.409806  543705 cpu.go:282] Add success.
I0319 20:29:43.419858  543705 net.go:648] Add success.
I0319 20:29:43.422932  543705 net.go:770] primary dev: ETH0
I0319 20:29:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:29:43.422957  543705 net.go:698] Add success.
I0319 20:29:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:29:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:29:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:29:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:29:53.409771  543705 memory.go:184] no items to output this cycle
I0319 20:29:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 20:30:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:30:03.409779  543705 memory.go:184] no items to output this cycle
I0319 20:30:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:30:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:30:13.409783  543705 memory.go:191] Add success.
I0319 20:30:13.409785  543705 cpu.go:282] Add success.
W0319 20:30:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:30:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:30:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:30:13.420241  543705 net.go:648] Add success.
I0319 20:30:13.423324  543705 net.go:770] primary dev: ETH0
I0319 20:30:13.423339  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:30:13.423352  543705 net.go:698] Add success.
I0319 20:30:13.528837  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf6edcf2-b5d8-43ab-ba20-0558804dc274","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:30:13.528876  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:30:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:30:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 20:30:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:30:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 20:30:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:30:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:30:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:30:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:30:16.472468  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:30:20.825673  543705 disk_info.go:125] begin check local disk info of client
I0319 20:30:20.828253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:30:20.828259  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cc940 0xc0004cc980]
E0319 20:30:23.410505  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:30:23.410524  543705 memory.go:184] no items to output this cycle
I0319 20:30:23.410536  543705 cpu.go:275] no items to output this cycle
E0319 20:30:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:30:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 20:30:33.409801  543705 memory.go:184] no items to output this cycle
I0319 20:30:37.868033  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:30:37.868040  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:30:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:30:43.410635  543705 memory.go:191] Add success.
I0319 20:30:43.409819  543705 cpu.go:282] Add success.
I0319 20:30:43.420356  543705 net.go:648] Add success.
I0319 20:30:43.423115  543705 net.go:770] primary dev: ETH0
I0319 20:30:43.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:30:43.423144  543705 net.go:698] Add success.
I0319 20:30:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:30:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:30:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:30:53.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:30:53.410272  543705 memory.go:184] no items to output this cycle
I0319 20:30:53.410314  543705 cpu.go:275] no items to output this cycle
E0319 20:31:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:31:03.409769  543705 memory.go:184] no items to output this cycle
I0319 20:31:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:31:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:31:13.409827  543705 memory.go:191] Add success.
I0319 20:31:13.409830  543705 cpu.go:282] Add success.
W0319 20:31:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:31:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:31:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:31:13.420298  543705 net.go:648] Add success.
I0319 20:31:13.423128  543705 net.go:770] primary dev: ETH0
I0319 20:31:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:31:13.423151  543705 net.go:698] Add success.
I0319 20:31:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:31:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:31:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 20:31:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:31:14.456510  543705 disk_worker.go:494] system disk:vda1
I0319 20:31:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:31:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:31:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:31:16.472417  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:31:20.829673  543705 disk_info.go:125] begin check local disk info of client
I0319 20:31:20.832058  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:31:20.832064  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005425c0 0xc000542600]
E0319 20:31:23.410421  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:31:23.410436  543705 memory.go:184] no items to output this cycle
I0319 20:31:23.410471  543705 cpu.go:275] no items to output this cycle
E0319 20:31:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:31:33.409775  543705 memory.go:184] no items to output this cycle
I0319 20:31:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 20:31:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:31:43.409780  543705 memory.go:191] Add success.
I0319 20:31:43.409811  543705 cpu.go:282] Add success.
I0319 20:31:43.419884  543705 net.go:648] Add success.
I0319 20:31:43.422746  543705 net.go:770] primary dev: ETH0
I0319 20:31:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:31:43.422773  543705 net.go:698] Add success.
I0319 20:31:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:31:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:31:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:31:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:31:53.409778  543705 memory.go:184] no items to output this cycle
I0319 20:31:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 20:32:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:32:03.409778  543705 memory.go:184] no items to output this cycle
I0319 20:32:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 20:32:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:32:13.409817  543705 memory.go:191] Add success.
I0319 20:32:13.409819  543705 cpu.go:282] Add success.
W0319 20:32:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:32:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:32:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:32:13.419773  543705 net.go:648] Add success.
I0319 20:32:13.422459  543705 net.go:770] primary dev: ETH0
I0319 20:32:13.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:32:13.422482  543705 net.go:698] Add success.
W0319 20:32:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:32:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 20:32:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:32:14.456783  543705 disk_worker.go:494] system disk:vda1
I0319 20:32:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:32:14.457136  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:32:14.457144  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:32:14.457148  543705 custom_config.go:64] query custom config with name: gpu
E0319 20:32:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:32:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:32:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:32:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:32:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:32:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:32:16.472358  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:32:20.833673  543705 disk_info.go:125] begin check local disk info of client
I0319 20:32:20.836099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:32:20.836105  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c48c0 0xc0000c4900]
E0319 20:32:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:32:23.409765  543705 memory.go:184] no items to output this cycle
I0319 20:32:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 20:32:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:32:33.409782  543705 memory.go:184] no items to output this cycle
I0319 20:32:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:32:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:32:43.409778  543705 memory.go:191] Add success.
I0319 20:32:43.409801  543705 cpu.go:282] Add success.
I0319 20:32:43.419852  543705 net.go:648] Add success.
I0319 20:32:43.422849  543705 net.go:770] primary dev: ETH0
I0319 20:32:43.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:32:43.422876  543705 net.go:698] Add success.
I0319 20:32:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:32:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:32:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:32:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:32:53.409805  543705 memory.go:184] no items to output this cycle
I0319 20:32:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 20:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:33:03.409781  543705 memory.go:184] no items to output this cycle
I0319 20:33:03.409786  543705 cpu.go:275] no items to output this cycle
E0319 20:33:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:33:13.409800  543705 memory.go:191] Add success.
I0319 20:33:13.409817  543705 cpu.go:282] Add success.
W0319 20:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:33:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:33:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:33:13.420322  543705 net.go:648] Add success.
I0319 20:33:13.423002  543705 net.go:770] primary dev: ETH0
I0319 20:33:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:33:13.423028  543705 net.go:698] Add success.
I0319 20:33:13.483475  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0cc7c0cb-b49c-4348-be69-a14da095cb30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:33:13.483511  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:33:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:33:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:33:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 20:33:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:33:14.456627  543705 disk_worker.go:494] system disk:vda1
I0319 20:33:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:33:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:33:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:33:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:33:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:33:20.837677  543705 disk_info.go:125] begin check local disk info of client
I0319 20:33:20.840175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:33:20.840181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab600 0xc0001ab640]
E0319 20:33:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:33:23.409769  543705 memory.go:184] no items to output this cycle
I0319 20:33:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:33:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:33:33.409810  543705 memory.go:184] no items to output this cycle
I0319 20:33:33.409822  543705 cpu.go:275] no items to output this cycle
I0319 20:33:37.869736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:33:37.869742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:33:43.410652  543705 memory.go:191] Add success.
I0319 20:33:43.409799  543705 cpu.go:282] Add success.
I0319 20:33:43.420356  543705 net.go:648] Add success.
I0319 20:33:43.423114  543705 net.go:770] primary dev: ETH0
I0319 20:33:43.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:33:43.423140  543705 net.go:698] Add success.
I0319 20:33:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:33:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:33:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:33:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:33:53.409814  543705 memory.go:184] no items to output this cycle
I0319 20:33:53.409825  543705 cpu.go:275] no items to output this cycle
E0319 20:34:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:34:03.409809  543705 memory.go:184] no items to output this cycle
I0319 20:34:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:34:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:34:13.409803  543705 memory.go:191] Add success.
I0319 20:34:13.409804  543705 cpu.go:282] Add success.
W0319 20:34:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:34:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:34:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:34:13.420166  543705 net.go:648] Add success.
I0319 20:34:13.423030  543705 net.go:770] primary dev: ETH0
I0319 20:34:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:34:13.423055  543705 net.go:698] Add success.
I0319 20:34:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:34:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:34:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 20:34:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:34:14.456592  543705 disk_worker.go:494] system disk:vda1
I0319 20:34:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:34:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:34:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:34:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:34:20.841679  543705 disk_info.go:125] begin check local disk info of client
I0319 20:34:20.844128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:34:20.844135  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396600 0xc000396640]
E0319 20:34:23.410441  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:34:23.410459  543705 memory.go:184] no items to output this cycle
I0319 20:34:23.410474  543705 cpu.go:275] no items to output this cycle
E0319 20:34:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:34:33.409793  543705 memory.go:184] no items to output this cycle
I0319 20:34:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:34:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:34:43.409779  543705 memory.go:191] Add success.
I0319 20:34:43.409811  543705 cpu.go:282] Add success.
I0319 20:34:43.419918  543705 net.go:648] Add success.
I0319 20:34:43.422731  543705 net.go:770] primary dev: ETH0
I0319 20:34:43.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:34:43.422757  543705 net.go:698] Add success.
I0319 20:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:34:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:34:53.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:34:53.409822  543705 cpu.go:275] no items to output this cycle
I0319 20:34:53.409837  543705 memory.go:184] no items to output this cycle
E0319 20:35:03.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:35:03.409906  543705 cpu.go:275] no items to output this cycle
I0319 20:35:03.409920  543705 memory.go:184] no items to output this cycle
E0319 20:35:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:35:13.409819  543705 memory.go:191] Add success.
I0319 20:35:13.409832  543705 cpu.go:282] Add success.
W0319 20:35:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:35:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:35:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:35:13.420137  543705 net.go:648] Add success.
I0319 20:35:13.423118  543705 net.go:770] primary dev: ETH0
I0319 20:35:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:35:13.423142  543705 net.go:698] Add success.
I0319 20:35:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:35:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:35:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 20:35:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:35:14.456490  543705 disk_worker.go:494] system disk:vda1
I0319 20:35:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:35:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:35:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:35:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:35:20.845671  543705 disk_info.go:125] begin check local disk info of client
I0319 20:35:20.848103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:35:20.848109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9280 0xc0003c92c0]
E0319 20:35:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:35:23.409760  543705 memory.go:184] no items to output this cycle
I0319 20:35:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 20:35:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:35:33.409815  543705 memory.go:184] no items to output this cycle
I0319 20:35:33.409827  543705 cpu.go:275] no items to output this cycle
E0319 20:35:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:35:43.409810  543705 memory.go:191] Add success.
I0319 20:35:43.409811  543705 cpu.go:282] Add success.
I0319 20:35:43.419892  543705 net.go:648] Add success.
I0319 20:35:43.423036  543705 net.go:770] primary dev: ETH0
I0319 20:35:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:35:43.423062  543705 net.go:698] Add success.
I0319 20:35:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:35:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:35:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:35:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:35:53.409786  543705 memory.go:184] no items to output this cycle
I0319 20:35:53.409793  543705 cpu.go:275] no items to output this cycle
E0319 20:36:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:36:03.409804  543705 memory.go:184] no items to output this cycle
I0319 20:36:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 20:36:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:36:13.409806  543705 memory.go:191] Add success.
I0319 20:36:13.409808  543705 cpu.go:282] Add success.
W0319 20:36:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:36:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:36:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:36:13.420044  543705 net.go:648] Add success.
I0319 20:36:13.422638  543705 net.go:770] primary dev: ETH0
I0319 20:36:13.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:36:13.422663  543705 net.go:698] Add success.
I0319 20:36:13.468950  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3dbf2296-79a8-43aa-8d4c-5ce0de3c34f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:36:13.468984  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:36:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:36:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:36:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 20:36:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:36:14.456584  543705 disk_worker.go:494] system disk:vda1
I0319 20:36:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:36:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:36:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:36:20.849671  543705 disk_info.go:125] begin check local disk info of client
I0319 20:36:20.852126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:36:20.852132  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d40 0xc0000c4d80]
E0319 20:36:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:36:23.409784  543705 memory.go:184] no items to output this cycle
I0319 20:36:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 20:36:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:36:33.409823  543705 memory.go:184] no items to output this cycle
I0319 20:36:33.409838  543705 cpu.go:275] no items to output this cycle
I0319 20:36:37.872050  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:36:37.872057  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:36:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:36:43.410885  543705 memory.go:191] Add success.
I0319 20:36:43.409804  543705 cpu.go:282] Add success.
I0319 20:36:43.420614  543705 net.go:648] Add success.
I0319 20:36:43.423393  543705 net.go:770] primary dev: ETH0
I0319 20:36:43.423406  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:36:43.423420  543705 net.go:698] Add success.
I0319 20:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:36:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:36:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 20:36:53.409945  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:36:53.409959  543705 memory.go:184] no items to output this cycle
E0319 20:37:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:37:03.409790  543705 cpu.go:275] no items to output this cycle
I0319 20:37:03.409806  543705 memory.go:184] no items to output this cycle
E0319 20:37:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:37:13.409794  543705 memory.go:191] Add success.
I0319 20:37:13.409804  543705 cpu.go:282] Add success.
W0319 20:37:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:37:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:37:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:37:13.420114  543705 net.go:648] Add success.
I0319 20:37:13.422805  543705 net.go:770] primary dev: ETH0
I0319 20:37:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:37:13.422840  543705 net.go:698] Add success.
I0319 20:37:13.453377  543705 event_worker.go:152] Polling the log file for events...
W0319 20:37:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:37:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 20:37:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:37:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:37:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:37:14.455895  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:37:14.456532  543705 disk_worker.go:494] system disk:vda1
I0319 20:37:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:37:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:37:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:37:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:37:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:37:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:37:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:37:16.472325  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:37:20.853681  543705 disk_info.go:125] begin check local disk info of client
I0319 20:37:20.856053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:37:20.856059  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb40 0xc00007bb80]
E0319 20:37:23.410385  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:37:23.410388  543705 cpu.go:275] no items to output this cycle
I0319 20:37:23.410398  543705 memory.go:184] no items to output this cycle
E0319 20:37:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:37:33.409828  543705 memory.go:184] no items to output this cycle
I0319 20:37:33.409839  543705 cpu.go:275] no items to output this cycle
E0319 20:37:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:37:43.409793  543705 memory.go:191] Add success.
I0319 20:37:43.409826  543705 cpu.go:282] Add success.
I0319 20:37:43.419988  543705 net.go:648] Add success.
I0319 20:37:43.423109  543705 net.go:770] primary dev: ETH0
I0319 20:37:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:37:43.423134  543705 net.go:698] Add success.
I0319 20:37:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:37:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:37:53.409805  543705 memory.go:184] no items to output this cycle
I0319 20:37:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 20:38:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:38:03.409784  543705 memory.go:184] no items to output this cycle
I0319 20:38:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 20:38:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:38:13.409798  543705 cpu.go:282] Add success.
I0319 20:38:13.409802  543705 memory.go:191] Add success.
W0319 20:38:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:38:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:38:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:38:13.420163  543705 net.go:648] Add success.
I0319 20:38:13.422996  543705 net.go:770] primary dev: ETH0
I0319 20:38:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:38:13.423022  543705 net.go:698] Add success.
I0319 20:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:38:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:38:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 20:38:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:38:14.456557  543705 disk_worker.go:494] system disk:vda1
I0319 20:38:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:38:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:38:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:38:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:38:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:38:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:38:20.857680  543705 disk_info.go:125] begin check local disk info of client
I0319 20:38:20.860185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:38:20.860191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bed00 0xc0003bed40]
E0319 20:38:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:38:23.409800  543705 memory.go:184] no items to output this cycle
I0319 20:38:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 20:38:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:38:33.409788  543705 memory.go:184] no items to output this cycle
I0319 20:38:33.409790  543705 cpu.go:275] no items to output this cycle
E0319 20:38:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:38:43.409788  543705 memory.go:191] Add success.
I0319 20:38:43.409790  543705 cpu.go:282] Add success.
I0319 20:38:43.420125  543705 net.go:648] Add success.
I0319 20:38:43.422973  543705 net.go:770] primary dev: ETH0
I0319 20:38:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:38:43.423002  543705 net.go:698] Add success.
I0319 20:38:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:38:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:38:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:38:53.409907  543705 memory.go:184] no items to output this cycle
I0319 20:38:53.409973  543705 cpu.go:275] no items to output this cycle
E0319 20:39:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:39:03.409812  543705 memory.go:184] no items to output this cycle
I0319 20:39:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 20:39:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:39:13.409813  543705 memory.go:191] Add success.
I0319 20:39:13.409820  543705 cpu.go:282] Add success.
W0319 20:39:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:39:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:39:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:39:13.420148  543705 net.go:648] Add success.
I0319 20:39:13.422994  543705 net.go:770] primary dev: ETH0
I0319 20:39:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:39:13.423019  543705 net.go:698] Add success.
I0319 20:39:13.479006  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c335162-d067-45c9-a42c-3a1702aaea71","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:39:13.479041  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:39:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:39:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:39:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 20:39:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:39:14.456735  543705 disk_worker.go:494] system disk:vda1
I0319 20:39:14.456767  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:39:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:39:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:39:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:39:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:39:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:39:20.861673  543705 disk_info.go:125] begin check local disk info of client
I0319 20:39:20.864243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:39:20.864249  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381f00 0xc000381f40]
E0319 20:39:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:39:23.409781  543705 cpu.go:275] no items to output this cycle
I0319 20:39:23.409783  543705 memory.go:184] no items to output this cycle
E0319 20:39:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:39:33.409818  543705 memory.go:184] no items to output this cycle
I0319 20:39:33.409829  543705 cpu.go:275] no items to output this cycle
I0319 20:39:37.873731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:39:37.873739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:39:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:39:43.410790  543705 memory.go:191] Add success.
I0319 20:39:43.409807  543705 cpu.go:282] Add success.
I0319 20:39:43.419719  543705 net.go:648] Add success.
I0319 20:39:43.422601  543705 net.go:770] primary dev: ETH0
I0319 20:39:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:39:43.422630  543705 net.go:698] Add success.
I0319 20:39:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:39:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:39:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:39:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:39:53.409786  543705 memory.go:184] no items to output this cycle
I0319 20:39:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 20:40:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:40:03.409779  543705 memory.go:184] no items to output this cycle
I0319 20:40:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:40:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:40:13.409820  543705 memory.go:191] Add success.
I0319 20:40:13.409821  543705 cpu.go:282] Add success.
W0319 20:40:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:40:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:40:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:40:13.420160  543705 net.go:648] Add success.
I0319 20:40:13.423437  543705 net.go:770] primary dev: ETH0
I0319 20:40:13.423450  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:40:13.423461  543705 net.go:698] Add success.
I0319 20:40:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:40:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:40:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0319 20:40:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:40:14.456614  543705 disk_worker.go:494] system disk:vda1
I0319 20:40:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:40:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:40:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:40:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:40:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:40:20.865679  543705 disk_info.go:125] begin check local disk info of client
I0319 20:40:20.868110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:40:20.868116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376400 0xc000376440]
E0319 20:40:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:40:23.409794  543705 memory.go:184] no items to output this cycle
I0319 20:40:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:40:33.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:40:33.409922  543705 memory.go:184] no items to output this cycle
I0319 20:40:33.410023  543705 cpu.go:275] no items to output this cycle
E0319 20:40:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:40:43.409791  543705 memory.go:191] Add success.
I0319 20:40:43.409806  543705 cpu.go:282] Add success.
I0319 20:40:43.419967  543705 net.go:648] Add success.
I0319 20:40:43.422879  543705 net.go:770] primary dev: ETH0
I0319 20:40:43.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:40:43.422905  543705 net.go:698] Add success.
I0319 20:40:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:40:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:40:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:40:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:40:53.409766  543705 memory.go:184] no items to output this cycle
I0319 20:40:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 20:41:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:41:03.409806  543705 memory.go:184] no items to output this cycle
I0319 20:41:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 20:41:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:41:13.409784  543705 memory.go:191] Add success.
W0319 20:41:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 20:41:13.409811  543705 cpu.go:282] Add success.
W0319 20:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:41:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:41:13.420075  543705 net.go:648] Add success.
I0319 20:41:13.422991  543705 net.go:770] primary dev: ETH0
I0319 20:41:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:41:13.423016  543705 net.go:698] Add success.
I0319 20:41:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:41:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:41:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 20:41:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:41:14.456505  543705 disk_worker.go:494] system disk:vda1
I0319 20:41:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:41:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:41:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:41:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:41:16.472430  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:41:20.869670  543705 disk_info.go:125] begin check local disk info of client
I0319 20:41:20.872135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:41:20.872141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be040 0xc0002be080]
E0319 20:41:23.409921  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:41:23.409953  543705 memory.go:184] no items to output this cycle
I0319 20:41:23.409967  543705 cpu.go:275] no items to output this cycle
E0319 20:41:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:41:33.409776  543705 memory.go:184] no items to output this cycle
I0319 20:41:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 20:41:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:41:43.409816  543705 memory.go:191] Add success.
I0319 20:41:43.409816  543705 cpu.go:282] Add success.
I0319 20:41:43.419971  543705 net.go:648] Add success.
I0319 20:41:43.422644  543705 net.go:770] primary dev: ETH0
I0319 20:41:43.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:41:43.422670  543705 net.go:698] Add success.
I0319 20:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:41:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:41:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:41:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:41:53.409774  543705 memory.go:184] no items to output this cycle
I0319 20:41:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 20:42:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:42:03.409778  543705 memory.go:184] no items to output this cycle
I0319 20:42:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 20:42:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:42:13.409801  543705 memory.go:191] Add success.
I0319 20:42:13.409808  543705 cpu.go:282] Add success.
W0319 20:42:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:42:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:42:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:42:13.420144  543705 net.go:648] Add success.
I0319 20:42:13.423565  543705 net.go:770] primary dev: ETH0
I0319 20:42:13.423578  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:42:13.423591  543705 net.go:698] Add success.
I0319 20:42:13.463528  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae08531a-fcd7-44a5-9bd0-f7b230e10e4f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:42:13.463560  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 20:42:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:42:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 20:42:14.455214  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:42:14.455918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:42:14.455928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:42:14.455934  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:42:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 20:42:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:42:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:42:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:42:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:42:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:42:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:42:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:42:16.472336  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:42:20.873675  543705 disk_info.go:125] begin check local disk info of client
I0319 20:42:20.876083  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:42:20.876089  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0319 20:42:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:42:23.409783  543705 memory.go:184] no items to output this cycle
I0319 20:42:23.409787  543705 cpu.go:275] no items to output this cycle
E0319 20:42:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:42:33.409777  543705 memory.go:184] no items to output this cycle
I0319 20:42:33.409815  543705 cpu.go:275] no items to output this cycle
I0319 20:42:37.876068  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:42:37.876074  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:42:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:42:43.410646  543705 memory.go:191] Add success.
I0319 20:42:43.409804  543705 cpu.go:282] Add success.
I0319 20:42:43.420406  543705 net.go:648] Add success.
I0319 20:42:43.422949  543705 net.go:770] primary dev: ETH0
I0319 20:42:43.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:42:43.422974  543705 net.go:698] Add success.
I0319 20:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:42:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:42:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:42:53.409780  543705 memory.go:184] no items to output this cycle
I0319 20:42:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 20:43:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:43:03.409818  543705 memory.go:184] no items to output this cycle
I0319 20:43:03.409833  543705 cpu.go:275] no items to output this cycle
E0319 20:43:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:43:13.409787  543705 memory.go:191] Add success.
I0319 20:43:13.409788  543705 cpu.go:282] Add success.
W0319 20:43:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:43:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:43:13.420632  543705 net.go:648] Add success.
I0319 20:43:13.423603  543705 net.go:770] primary dev: ETH0
I0319 20:43:13.423619  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:43:13.423633  543705 net.go:698] Add success.
I0319 20:43:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:43:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:43:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 20:43:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:43:14.458980  543705 disk_worker.go:494] system disk:vda1
I0319 20:43:14.459009  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:43:15.456009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:43:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:43:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:43:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:43:20.877676  543705 disk_info.go:125] begin check local disk info of client
I0319 20:43:20.880165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:43:20.880172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289140 0xc000289180]
E0319 20:43:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:43:23.409798  543705 memory.go:184] no items to output this cycle
I0319 20:43:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 20:43:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:43:33.409806  543705 memory.go:184] no items to output this cycle
I0319 20:43:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:43:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:43:43.409787  543705 memory.go:191] Add success.
I0319 20:43:43.409807  543705 cpu.go:282] Add success.
I0319 20:43:43.419881  543705 net.go:648] Add success.
I0319 20:43:43.422801  543705 net.go:770] primary dev: ETH0
I0319 20:43:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:43:43.422842  543705 net.go:698] Add success.
I0319 20:43:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:43:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:43:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:43:53.409805  543705 memory.go:184] no items to output this cycle
I0319 20:43:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 20:44:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:44:03.409778  543705 memory.go:184] no items to output this cycle
I0319 20:44:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 20:44:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:44:13.409789  543705 memory.go:191] Add success.
I0319 20:44:13.409789  543705 cpu.go:282] Add success.
W0319 20:44:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:44:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:44:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:44:13.420115  543705 net.go:648] Add success.
I0319 20:44:13.422925  543705 net.go:770] primary dev: ETH0
I0319 20:44:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:44:13.423114  543705 net.go:698] Add success.
I0319 20:44:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:44:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:44:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0319 20:44:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:44:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 20:44:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:44:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:44:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:44:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:44:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:44:20.881671  543705 disk_info.go:125] begin check local disk info of client
I0319 20:44:20.884198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:44:20.884205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1400 0xc0003f1440]
E0319 20:44:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:44:23.409779  543705 memory.go:184] no items to output this cycle
I0319 20:44:23.409783  543705 cpu.go:275] no items to output this cycle
E0319 20:44:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:44:33.409785  543705 memory.go:184] no items to output this cycle
I0319 20:44:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 20:44:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:44:43.409796  543705 cpu.go:282] Add success.
I0319 20:44:43.409802  543705 memory.go:191] Add success.
I0319 20:44:43.419828  543705 net.go:648] Add success.
I0319 20:44:43.422481  543705 net.go:770] primary dev: ETH0
I0319 20:44:43.422494  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:44:43.422506  543705 net.go:698] Add success.
I0319 20:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:44:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:44:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:44:53.409796  543705 memory.go:184] no items to output this cycle
I0319 20:44:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 20:45:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:45:03.409809  543705 memory.go:184] no items to output this cycle
I0319 20:45:03.409829  543705 cpu.go:275] no items to output this cycle
E0319 20:45:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:45:13.409782  543705 memory.go:191] Add success.
W0319 20:45:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:45:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:45:13.409819  543705 cpu.go:282] Add success.
I0319 20:45:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:45:13.420259  543705 net.go:648] Add success.
I0319 20:45:13.423209  543705 net.go:770] primary dev: ETH0
I0319 20:45:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:45:13.423233  543705 net.go:698] Add success.
I0319 20:45:13.469737  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b3afa715-c1f9-4197-9172-8577a26c655a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:45:13.469779  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:45:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:45:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:45:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 20:45:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:45:14.457278  543705 disk_worker.go:494] system disk:vda1
I0319 20:45:14.457384  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:45:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:45:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:45:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:45:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:45:20.885675  543705 disk_info.go:125] begin check local disk info of client
I0319 20:45:20.888129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:45:20.888136  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386cc0 0xc000386d00]
E0319 20:45:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:45:23.409765  543705 memory.go:184] no items to output this cycle
I0319 20:45:23.409817  543705 cpu.go:275] no items to output this cycle
E0319 20:45:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:45:33.409815  543705 memory.go:184] no items to output this cycle
I0319 20:45:33.409826  543705 cpu.go:275] no items to output this cycle
I0319 20:45:37.877739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:45:37.877746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:45:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:45:43.410662  543705 memory.go:191] Add success.
I0319 20:45:43.409791  543705 cpu.go:282] Add success.
I0319 20:45:43.420363  543705 net.go:648] Add success.
I0319 20:45:43.423099  543705 net.go:770] primary dev: ETH0
I0319 20:45:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:45:43.423124  543705 net.go:698] Add success.
I0319 20:45:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:45:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:45:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:45:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:45:53.409776  543705 memory.go:184] no items to output this cycle
I0319 20:45:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 20:46:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:46:03.409779  543705 memory.go:184] no items to output this cycle
I0319 20:46:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 20:46:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:46:13.409812  543705 memory.go:191] Add success.
I0319 20:46:13.409817  543705 cpu.go:282] Add success.
W0319 20:46:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:46:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:46:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:46:13.420144  543705 net.go:648] Add success.
I0319 20:46:13.423108  543705 net.go:770] primary dev: ETH0
I0319 20:46:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:46:13.423133  543705 net.go:698] Add success.
I0319 20:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:46:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:46:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0319 20:46:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:46:14.456922  543705 disk_worker.go:494] system disk:vda1
I0319 20:46:14.456952  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:46:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:46:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:46:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:46:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:46:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:46:20.889674  543705 disk_info.go:125] begin check local disk info of client
I0319 20:46:20.892114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:46:20.892120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8300 0xc0003c8340]
E0319 20:46:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:46:23.409775  543705 cpu.go:275] no items to output this cycle
I0319 20:46:23.409779  543705 memory.go:184] no items to output this cycle
E0319 20:46:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:46:33.409804  543705 memory.go:184] no items to output this cycle
I0319 20:46:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 20:46:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:46:43.409782  543705 memory.go:191] Add success.
I0319 20:46:43.409812  543705 cpu.go:282] Add success.
I0319 20:46:43.419875  543705 net.go:648] Add success.
I0319 20:46:43.422802  543705 net.go:770] primary dev: ETH0
I0319 20:46:43.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:46:43.422827  543705 net.go:698] Add success.
I0319 20:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:46:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:46:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:46:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:46:53.409767  543705 memory.go:184] no items to output this cycle
I0319 20:46:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 20:47:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:47:03.409783  543705 memory.go:184] no items to output this cycle
I0319 20:47:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 20:47:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:47:13.409799  543705 memory.go:191] Add success.
I0319 20:47:13.409820  543705 cpu.go:282] Add success.
W0319 20:47:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:47:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:47:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:47:13.420111  543705 net.go:648] Add success.
I0319 20:47:13.422835  543705 net.go:770] primary dev: ETH0
I0319 20:47:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:47:13.422865  543705 net.go:698] Add success.
I0319 20:47:13.453440  543705 event_worker.go:152] Polling the log file for events...
W0319 20:47:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:47:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 20:47:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:47:14.456936  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:47:14.456945  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:47:14.456951  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:47:14.457112  543705 disk_worker.go:494] system disk:vda1
I0319 20:47:14.457148  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:47:15.456794  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:47:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:47:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:47:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:47:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:47:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:47:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:47:20.893679  543705 disk_info.go:125] begin check local disk info of client
I0319 20:47:20.896055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:47:20.896061  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8340 0xc0002b8380]
E0319 20:47:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:47:23.409800  543705 memory.go:184] no items to output this cycle
I0319 20:47:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 20:47:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:47:33.409789  543705 memory.go:184] no items to output this cycle
I0319 20:47:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 20:47:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:47:43.409825  543705 memory.go:191] Add success.
I0319 20:47:43.409828  543705 cpu.go:282] Add success.
I0319 20:47:43.419960  543705 net.go:648] Add success.
I0319 20:47:43.422590  543705 net.go:770] primary dev: ETH0
I0319 20:47:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:47:43.422618  543705 net.go:698] Add success.
I0319 20:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:47:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:47:53.409777  543705 memory.go:184] no items to output this cycle
I0319 20:47:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 20:48:03.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:48:03.409824  543705 memory.go:184] no items to output this cycle
I0319 20:48:03.409835  543705 cpu.go:275] no items to output this cycle
E0319 20:48:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:48:13.409814  543705 memory.go:191] Add success.
I0319 20:48:13.409821  543705 cpu.go:282] Add success.
W0319 20:48:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:48:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:48:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:48:13.420070  543705 net.go:648] Add success.
I0319 20:48:13.422769  543705 net.go:770] primary dev: ETH0
I0319 20:48:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:48:13.422796  543705 net.go:698] Add success.
I0319 20:48:13.469462  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1a396fb9-1edb-420d-b76d-435e1e759b9d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:48:13.469495  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:48:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:48:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:48:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0319 20:48:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:48:14.456598  543705 disk_worker.go:494] system disk:vda1
I0319 20:48:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:48:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:48:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:48:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:48:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:48:16.472355  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:48:20.897672  543705 disk_info.go:125] begin check local disk info of client
I0319 20:48:20.900134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:48:20.900140  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328340 0xc000328380]
E0319 20:48:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:48:23.409796  543705 memory.go:184] no items to output this cycle
I0319 20:48:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 20:48:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:48:33.409778  543705 memory.go:184] no items to output this cycle
I0319 20:48:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 20:48:37.880093  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:48:37.880099  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:48:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:48:43.410745  543705 memory.go:191] Add success.
I0319 20:48:43.409826  543705 cpu.go:282] Add success.
I0319 20:48:43.420445  543705 net.go:648] Add success.
I0319 20:48:43.423419  543705 net.go:770] primary dev: ETH0
I0319 20:48:43.423432  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:48:43.423444  543705 net.go:698] Add success.
I0319 20:48:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:48:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:48:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:48:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:48:53.409805  543705 memory.go:184] no items to output this cycle
I0319 20:48:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 20:49:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:49:03.409813  543705 memory.go:184] no items to output this cycle
I0319 20:49:03.409829  543705 cpu.go:275] no items to output this cycle
E0319 20:49:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:49:13.409788  543705 memory.go:191] Add success.
I0319 20:49:13.409815  543705 cpu.go:282] Add success.
W0319 20:49:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:49:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:49:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:49:13.420116  543705 net.go:648] Add success.
I0319 20:49:13.423111  543705 net.go:770] primary dev: ETH0
I0319 20:49:13.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:49:13.423140  543705 net.go:698] Add success.
I0319 20:49:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:49:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:49:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 20:49:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:49:14.456590  543705 disk_worker.go:494] system disk:vda1
I0319 20:49:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:49:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:49:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:49:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:49:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:49:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:49:20.901675  543705 disk_info.go:125] begin check local disk info of client
I0319 20:49:20.904098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:49:20.904105  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328100 0xc000328140]
E0319 20:49:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:49:23.409802  543705 memory.go:184] no items to output this cycle
I0319 20:49:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 20:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:49:33.409785  543705 memory.go:184] no items to output this cycle
I0319 20:49:33.409830  543705 cpu.go:275] no items to output this cycle
E0319 20:49:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:49:43.409821  543705 memory.go:191] Add success.
I0319 20:49:43.409831  543705 cpu.go:282] Add success.
I0319 20:49:43.419906  543705 net.go:648] Add success.
I0319 20:49:43.423414  543705 net.go:770] primary dev: ETH0
I0319 20:49:43.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:49:43.423440  543705 net.go:698] Add success.
I0319 20:49:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:49:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:49:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:49:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:49:53.409785  543705 memory.go:184] no items to output this cycle
I0319 20:49:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 20:50:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:50:03.409774  543705 memory.go:184] no items to output this cycle
I0319 20:50:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:50:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:50:13.409825  543705 memory.go:191] Add success.
I0319 20:50:13.409834  543705 cpu.go:282] Add success.
W0319 20:50:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:50:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:50:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:50:13.420180  543705 net.go:648] Add success.
I0319 20:50:13.422844  543705 net.go:770] primary dev: ETH0
I0319 20:50:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:50:13.422869  543705 net.go:698] Add success.
I0319 20:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:50:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:50:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 20:50:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:50:14.456491  543705 disk_worker.go:494] system disk:vda1
I0319 20:50:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:50:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:50:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:50:20.905672  543705 disk_info.go:125] begin check local disk info of client
I0319 20:50:20.908096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:50:20.908102  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba2c0 0xc0002ba300]
E0319 20:50:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:50:23.409765  543705 memory.go:184] no items to output this cycle
I0319 20:50:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 20:50:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:50:33.409779  543705 memory.go:184] no items to output this cycle
I0319 20:50:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 20:50:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:50:43.409817  543705 memory.go:191] Add success.
I0319 20:50:43.409827  543705 cpu.go:282] Add success.
I0319 20:50:43.420286  543705 net.go:648] Add success.
I0319 20:50:43.423019  543705 net.go:770] primary dev: ETH0
I0319 20:50:43.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:50:43.423045  543705 net.go:698] Add success.
I0319 20:50:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:50:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:50:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:50:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:50:53.409770  543705 memory.go:184] no items to output this cycle
I0319 20:50:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 20:51:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:51:03.409777  543705 memory.go:184] no items to output this cycle
I0319 20:51:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 20:51:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:51:13.409791  543705 cpu.go:282] Add success.
I0319 20:51:13.409793  543705 memory.go:191] Add success.
W0319 20:51:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:51:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:51:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:51:13.420178  543705 net.go:648] Add success.
I0319 20:51:13.423013  543705 net.go:770] primary dev: ETH0
I0319 20:51:13.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:51:13.423038  543705 net.go:698] Add success.
I0319 20:51:13.464390  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"19f5dba8-d152-4ec7-826c-6639d1dd69db","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:51:13.464424  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:51:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:51:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:51:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 20:51:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:51:14.456614  543705 disk_worker.go:494] system disk:vda1
I0319 20:51:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:51:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:51:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:51:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:51:20.909667  543705 disk_info.go:125] begin check local disk info of client
I0319 20:51:20.912120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:51:20.912127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8ac0 0xc0002b8b00]
E0319 20:51:23.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:51:23.409890  543705 memory.go:184] no items to output this cycle
I0319 20:51:23.410002  543705 cpu.go:275] no items to output this cycle
E0319 20:51:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:51:33.409779  543705 memory.go:184] no items to output this cycle
I0319 20:51:33.409819  543705 cpu.go:275] no items to output this cycle
I0319 20:51:37.881760  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:51:37.881768  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:51:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:51:43.410813  543705 memory.go:191] Add success.
I0319 20:51:43.409846  543705 cpu.go:282] Add success.
I0319 20:51:43.420615  543705 net.go:648] Add success.
I0319 20:51:43.423351  543705 net.go:770] primary dev: ETH0
I0319 20:51:43.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:51:43.423378  543705 net.go:698] Add success.
I0319 20:51:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:51:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:51:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:51:53.409804  543705 memory.go:184] no items to output this cycle
I0319 20:51:53.409818  543705 cpu.go:275] no items to output this cycle
E0319 20:52:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:52:03.409780  543705 memory.go:184] no items to output this cycle
I0319 20:52:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 20:52:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:52:13.409791  543705 memory.go:191] Add success.
I0319 20:52:13.409798  543705 cpu.go:282] Add success.
W0319 20:52:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:52:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:52:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:52:13.420059  543705 net.go:648] Add success.
I0319 20:52:13.422918  543705 net.go:770] primary dev: ETH0
I0319 20:52:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:52:13.422942  543705 net.go:698] Add success.
W0319 20:52:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:52:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 20:52:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:52:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:52:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:52:14.455908  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:52:14.456554  543705 disk_worker.go:494] system disk:vda1
I0319 20:52:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:52:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:52:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:52:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:52:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:52:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:52:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:52:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:52:20.913677  543705 disk_info.go:125] begin check local disk info of client
I0319 20:52:20.916100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:52:20.916107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f90c0 0xc0001f9100]
E0319 20:52:23.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:52:23.409859  543705 memory.go:184] no items to output this cycle
I0319 20:52:23.409937  543705 cpu.go:275] no items to output this cycle
E0319 20:52:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:52:33.409788  543705 memory.go:184] no items to output this cycle
I0319 20:52:33.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:52:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:52:43.409812  543705 memory.go:191] Add success.
I0319 20:52:43.409817  543705 cpu.go:282] Add success.
I0319 20:52:43.420026  543705 net.go:648] Add success.
I0319 20:52:43.423156  543705 net.go:770] primary dev: ETH0
I0319 20:52:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:52:43.423184  543705 net.go:698] Add success.
I0319 20:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:52:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:52:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:52:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:52:53.409788  543705 memory.go:184] no items to output this cycle
I0319 20:52:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 20:53:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:53:03.409778  543705 memory.go:184] no items to output this cycle
I0319 20:53:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 20:53:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:53:13.409785  543705 memory.go:191] Add success.
I0319 20:53:13.409801  543705 cpu.go:282] Add success.
W0319 20:53:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:53:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:53:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:53:13.420071  543705 net.go:648] Add success.
I0319 20:53:13.423268  543705 net.go:770] primary dev: ETH0
I0319 20:53:13.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:53:13.423293  543705 net.go:698] Add success.
I0319 20:53:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:53:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:53:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 20:53:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:53:14.456594  543705 disk_worker.go:494] system disk:vda1
I0319 20:53:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:53:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:53:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:53:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:53:16.472458  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:53:20.917681  543705 disk_info.go:125] begin check local disk info of client
I0319 20:53:20.920146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:53:20.920152  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9e40 0xc0002b9e80]
E0319 20:53:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:53:23.409774  543705 memory.go:184] no items to output this cycle
I0319 20:53:23.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:53:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:53:33.409913  543705 memory.go:184] no items to output this cycle
I0319 20:53:33.409960  543705 cpu.go:275] no items to output this cycle
E0319 20:53:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:53:43.409795  543705 memory.go:191] Add success.
I0319 20:53:43.409796  543705 cpu.go:282] Add success.
I0319 20:53:43.419977  543705 net.go:648] Add success.
I0319 20:53:43.423068  543705 net.go:770] primary dev: ETH0
I0319 20:53:43.423083  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:53:43.423110  543705 net.go:698] Add success.
I0319 20:53:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:53:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:53:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:53:53.410397  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:53:53.410419  543705 memory.go:184] no items to output this cycle
I0319 20:53:53.410434  543705 cpu.go:275] no items to output this cycle
E0319 20:54:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:54:03.409820  543705 memory.go:184] no items to output this cycle
I0319 20:54:03.409833  543705 cpu.go:275] no items to output this cycle
E0319 20:54:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:54:13.409782  543705 memory.go:191] Add success.
I0319 20:54:13.409800  543705 cpu.go:282] Add success.
W0319 20:54:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:54:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:54:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:54:13.420107  543705 net.go:648] Add success.
I0319 20:54:13.422978  543705 net.go:770] primary dev: ETH0
I0319 20:54:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:54:13.423004  543705 net.go:698] Add success.
I0319 20:54:13.464718  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07ebac0a-678b-46a9-8c7e-a4eb8af144d0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:54:13.464759  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 20:54:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:54:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:54:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 20:54:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:54:14.456536  543705 disk_worker.go:494] system disk:vda1
I0319 20:54:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:54:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:54:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:54:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:54:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:54:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:54:20.921672  543705 disk_info.go:125] begin check local disk info of client
I0319 20:54:20.924096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:54:20.924103  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028d680 0xc00028d6c0]
E0319 20:54:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:54:23.409786  543705 memory.go:184] no items to output this cycle
I0319 20:54:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:54:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:54:33.409819  543705 memory.go:184] no items to output this cycle
I0319 20:54:33.409829  543705 cpu.go:275] no items to output this cycle
I0319 20:54:37.884122  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:54:37.884128  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:54:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:54:43.410575  543705 memory.go:191] Add success.
I0319 20:54:43.409807  543705 cpu.go:282] Add success.
I0319 20:54:43.420290  543705 net.go:648] Add success.
I0319 20:54:43.422891  543705 net.go:770] primary dev: ETH0
I0319 20:54:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:54:43.422919  543705 net.go:698] Add success.
I0319 20:54:46.458490  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:54:46.458555  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:54:46.458579  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:54:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:54:53.409777  543705 memory.go:184] no items to output this cycle
I0319 20:54:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 20:55:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:55:03.409784  543705 memory.go:184] no items to output this cycle
I0319 20:55:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 20:55:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:55:13.409788  543705 memory.go:191] Add success.
I0319 20:55:13.409789  543705 cpu.go:282] Add success.
W0319 20:55:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:55:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:55:13.420069  543705 net.go:648] Add success.
I0319 20:55:13.422918  543705 net.go:770] primary dev: ETH0
I0319 20:55:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:55:13.422948  543705 net.go:698] Add success.
I0319 20:55:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:55:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:55:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 20:55:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:55:14.456600  543705 disk_worker.go:494] system disk:vda1
I0319 20:55:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:55:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:55:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:55:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:55:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:55:20.925672  543705 disk_info.go:125] begin check local disk info of client
I0319 20:55:20.928075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:55:20.928081  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e8c0 0xc00039e900]
E0319 20:55:23.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:55:23.410250  543705 cpu.go:275] no items to output this cycle
I0319 20:55:23.410365  543705 memory.go:184] no items to output this cycle
E0319 20:55:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:55:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 20:55:33.409807  543705 memory.go:184] no items to output this cycle
E0319 20:55:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:55:43.409811  543705 memory.go:191] Add success.
I0319 20:55:43.409825  543705 cpu.go:282] Add success.
I0319 20:55:43.420060  543705 net.go:648] Add success.
I0319 20:55:43.422977  543705 net.go:770] primary dev: ETH0
I0319 20:55:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:55:43.423002  543705 net.go:698] Add success.
I0319 20:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:55:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:55:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:55:53.409804  543705 memory.go:184] no items to output this cycle
I0319 20:55:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 20:56:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:56:03.409781  543705 memory.go:184] no items to output this cycle
I0319 20:56:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 20:56:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:56:13.409776  543705 memory.go:191] Add success.
W0319 20:56:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 20:56:13.409806  543705 cpu.go:282] Add success.
W0319 20:56:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:56:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:56:13.420064  543705 net.go:648] Add success.
I0319 20:56:13.423073  543705 net.go:770] primary dev: ETH0
I0319 20:56:13.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:56:13.423101  543705 net.go:698] Add success.
I0319 20:56:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:56:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:56:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 20:56:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:56:14.456577  543705 disk_worker.go:494] system disk:vda1
I0319 20:56:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:56:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:56:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:56:16.472428  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:56:20.929671  543705 disk_info.go:125] begin check local disk info of client
I0319 20:56:20.932071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:56:20.932077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395a00 0xc000395a40]
E0319 20:56:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:56:23.409896  543705 memory.go:184] no items to output this cycle
I0319 20:56:23.409895  543705 cpu.go:275] no items to output this cycle
E0319 20:56:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:56:33.409815  543705 memory.go:184] no items to output this cycle
I0319 20:56:33.409829  543705 cpu.go:275] no items to output this cycle
E0319 20:56:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:56:43.409787  543705 memory.go:191] Add success.
I0319 20:56:43.409807  543705 cpu.go:282] Add success.
I0319 20:56:43.420044  543705 net.go:648] Add success.
I0319 20:56:43.422956  543705 net.go:770] primary dev: ETH0
I0319 20:56:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:56:43.422982  543705 net.go:698] Add success.
I0319 20:56:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:56:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:56:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:56:53.409777  543705 memory.go:184] no items to output this cycle
I0319 20:56:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 20:57:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:57:03.409787  543705 memory.go:184] no items to output this cycle
I0319 20:57:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 20:57:13.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:57:13.409769  543705 memory.go:191] Add success.
W0319 20:57:13.409795  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:57:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:57:13.409809  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:57:13.409814  543705 cpu.go:282] Add success.
I0319 20:57:13.420112  543705 net.go:648] Add success.
I0319 20:57:13.429535  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 20:57:13.429618  543705 net.go:770] primary dev: ETH0
I0319 20:57:13.429630  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:57:13.429641  543705 net.go:698] Add success.
I0319 20:57:13.453219  543705 event_worker.go:152] Polling the log file for events...
I0319 20:57:13.703075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"340aee55-9f52-4488-a4e4-31051f91c045","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 20:57:13.703110  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 20:57:14.454856  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:57:14.454936  543705 disk_worker.go:708] disk space is not compliant
W0319 20:57:14.454940  543705 disk_worker.go:728] disk inode is not compliant
E0319 20:57:14.455675  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 20:57:14.455684  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 20:57:14.455690  543705 custom_config.go:64] query custom config with name: gpu
I0319 20:57:14.456480  543705 disk_worker.go:494] system disk:vda1
I0319 20:57:14.456509  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 20:57:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 20:57:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:57:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 20:57:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 20:57:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:57:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:57:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:57:20.933677  543705 disk_info.go:125] begin check local disk info of client
I0319 20:57:20.936070  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:57:20.936077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368000 0xc000368040]
E0319 20:57:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:57:23.409786  543705 memory.go:184] no items to output this cycle
I0319 20:57:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:57:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:57:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 20:57:33.409796  543705 memory.go:184] no items to output this cycle
I0319 20:57:37.885731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 20:57:37.885738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 20:57:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:57:43.410638  543705 memory.go:191] Add success.
I0319 20:57:43.409801  543705 cpu.go:282] Add success.
I0319 20:57:43.420338  543705 net.go:648] Add success.
I0319 20:57:43.422968  543705 net.go:770] primary dev: ETH0
I0319 20:57:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:57:43.422994  543705 net.go:698] Add success.
I0319 20:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:57:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:57:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:57:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:57:53.409786  543705 memory.go:184] no items to output this cycle
I0319 20:57:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 20:58:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:58:03.409783  543705 memory.go:184] no items to output this cycle
I0319 20:58:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 20:58:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:58:13.409820  543705 memory.go:191] Add success.
I0319 20:58:13.409830  543705 cpu.go:282] Add success.
W0319 20:58:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:58:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:58:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:58:13.420121  543705 net.go:648] Add success.
I0319 20:58:13.423151  543705 net.go:770] primary dev: ETH0
I0319 20:58:13.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:58:13.423181  543705 net.go:698] Add success.
I0319 20:58:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:58:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:58:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0319 20:58:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:58:14.456497  543705 disk_worker.go:494] system disk:vda1
I0319 20:58:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:58:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:58:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:58:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:58:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:58:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:58:20.937681  543705 disk_info.go:125] begin check local disk info of client
I0319 20:58:20.940165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:58:20.940172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000576300 0xc000576340]
E0319 20:58:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:58:23.409800  543705 memory.go:184] no items to output this cycle
I0319 20:58:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 20:58:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:58:33.409792  543705 memory.go:184] no items to output this cycle
I0319 20:58:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 20:58:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:58:43.409794  543705 cpu.go:282] Add success.
I0319 20:58:43.409807  543705 memory.go:191] Add success.
I0319 20:58:43.419908  543705 net.go:648] Add success.
I0319 20:58:43.422771  543705 net.go:770] primary dev: ETH0
I0319 20:58:43.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:58:43.422797  543705 net.go:698] Add success.
I0319 20:58:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:58:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:58:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:58:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:58:53.409810  543705 memory.go:184] no items to output this cycle
I0319 20:58:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 20:59:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:59:03.409796  543705 memory.go:184] no items to output this cycle
I0319 20:59:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 20:59:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:59:13.409800  543705 cpu.go:282] Add success.
I0319 20:59:13.409806  543705 memory.go:191] Add success.
W0319 20:59:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 20:59:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 20:59:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 20:59:13.420044  543705 net.go:648] Add success.
I0319 20:59:13.422877  543705 net.go:770] primary dev: ETH0
I0319 20:59:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:59:13.422902  543705 net.go:698] Add success.
I0319 20:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 20:59:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 20:59:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 20:59:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 20:59:14.456586  543705 disk_worker.go:494] system disk:vda1
I0319 20:59:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 20:59:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 20:59:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:59:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:59:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 20:59:16.472468  543705 disk_local_worker.go:436] Get disk info: []
I0319 20:59:20.941676  543705 disk_info.go:125] begin check local disk info of client
I0319 20:59:20.944100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 20:59:20.944106  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4080 0xc0004b40c0]
E0319 20:59:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:59:23.409770  543705 memory.go:184] no items to output this cycle
I0319 20:59:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 20:59:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:59:33.409823  543705 memory.go:184] no items to output this cycle
I0319 20:59:33.409834  543705 cpu.go:275] no items to output this cycle
E0319 20:59:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:59:43.409782  543705 memory.go:191] Add success.
I0319 20:59:43.409820  543705 cpu.go:282] Add success.
I0319 20:59:43.419954  543705 net.go:648] Add success.
I0319 20:59:43.422452  543705 net.go:770] primary dev: ETH0
I0319 20:59:43.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0319 20:59:43.422480  543705 net.go:698] Add success.
I0319 20:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 20:59:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 20:59:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 20:59:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 20:59:53.409781  543705 cpu.go:275] no items to output this cycle
I0319 20:59:53.409784  543705 memory.go:184] no items to output this cycle
E0319 21:00:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:00:03.409779  543705 memory.go:184] no items to output this cycle
I0319 21:00:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 21:00:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:00:13.409781  543705 memory.go:191] Add success.
I0319 21:00:13.409806  543705 cpu.go:282] Add success.
W0319 21:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:00:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:00:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:00:13.420060  543705 net.go:648] Add success.
I0319 21:00:13.422730  543705 net.go:770] primary dev: ETH0
I0319 21:00:13.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:00:13.422754  543705 net.go:698] Add success.
I0319 21:00:13.469109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"094f6fda-7d31-4611-97c9-43f1367616f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:00:13.469152  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:00:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:00:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:00:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 21:00:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:00:14.456665  543705 disk_worker.go:494] system disk:vda1
I0319 21:00:14.456696  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:00:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:00:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:00:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:00:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:00:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:00:20.945679  543705 disk_info.go:125] begin check local disk info of client
I0319 21:00:20.948192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:00:20.948198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003924c0 0xc000392500]
E0319 21:00:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:00:23.409792  543705 memory.go:184] no items to output this cycle
I0319 21:00:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 21:00:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:00:33.409810  543705 memory.go:184] no items to output this cycle
I0319 21:00:33.409820  543705 cpu.go:275] no items to output this cycle
I0319 21:00:37.888128  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:00:37.888135  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:00:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:00:43.410621  543705 memory.go:191] Add success.
I0319 21:00:43.409805  543705 cpu.go:282] Add success.
I0319 21:00:43.420311  543705 net.go:648] Add success.
I0319 21:00:43.423086  543705 net.go:770] primary dev: ETH0
I0319 21:00:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:00:43.423113  543705 net.go:698] Add success.
I0319 21:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:00:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:00:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:00:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:00:53.409774  543705 memory.go:184] no items to output this cycle
I0319 21:00:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 21:01:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:01:03.409797  543705 memory.go:184] no items to output this cycle
I0319 21:01:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 21:01:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:01:13.409777  543705 memory.go:191] Add success.
W0319 21:01:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:01:13.409807  543705 cpu.go:282] Add success.
W0319 21:01:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:01:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:01:13.420088  543705 net.go:648] Add success.
I0319 21:01:13.423505  543705 net.go:770] primary dev: ETH0
I0319 21:01:13.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:01:13.423534  543705 net.go:698] Add success.
I0319 21:01:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:01:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:01:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 21:01:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:01:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 21:01:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:01:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:01:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:01:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:01:16.472427  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:01:20.949665  543705 disk_info.go:125] begin check local disk info of client
I0319 21:01:20.952073  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:01:20.952080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5880 0xc0002a58c0]
E0319 21:01:23.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:01:23.409900  543705 memory.go:184] no items to output this cycle
I0319 21:01:23.409960  543705 cpu.go:275] no items to output this cycle
E0319 21:01:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:01:33.409789  543705 cpu.go:275] no items to output this cycle
I0319 21:01:33.409796  543705 memory.go:184] no items to output this cycle
E0319 21:01:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:01:43.409789  543705 memory.go:191] Add success.
I0319 21:01:43.409803  543705 cpu.go:282] Add success.
I0319 21:01:43.419949  543705 net.go:648] Add success.
I0319 21:01:43.422864  543705 net.go:770] primary dev: ETH0
I0319 21:01:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:01:43.422893  543705 net.go:698] Add success.
I0319 21:01:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:01:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:01:53.409787  543705 memory.go:184] no items to output this cycle
I0319 21:01:53.409792  543705 cpu.go:275] no items to output this cycle
E0319 21:02:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:02:03.409779  543705 memory.go:184] no items to output this cycle
I0319 21:02:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:02:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:02:13.409784  543705 memory.go:191] Add success.
I0319 21:02:13.409786  543705 cpu.go:282] Add success.
W0319 21:02:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:02:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:02:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:02:13.420065  543705 net.go:648] Add success.
I0319 21:02:13.422869  543705 net.go:770] primary dev: ETH0
I0319 21:02:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:02:13.422896  543705 net.go:698] Add success.
W0319 21:02:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:02:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 21:02:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:02:14.456595  543705 disk_worker.go:494] system disk:vda1
I0319 21:02:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:02:14.456947  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:02:14.456957  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:02:14.456963  543705 custom_config.go:64] query custom config with name: gpu
E0319 21:02:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:02:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:02:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:02:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:02:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:02:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:02:16.472327  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:02:20.953678  543705 disk_info.go:125] begin check local disk info of client
I0319 21:02:20.956258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:02:20.956264  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f280 0xc00029f2c0]
E0319 21:02:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:02:23.409763  543705 memory.go:184] no items to output this cycle
I0319 21:02:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:02:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:02:33.409795  543705 memory.go:184] no items to output this cycle
I0319 21:02:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 21:02:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:02:43.409785  543705 memory.go:191] Add success.
I0319 21:02:43.409796  543705 cpu.go:282] Add success.
I0319 21:02:43.419961  543705 net.go:648] Add success.
I0319 21:02:43.422777  543705 net.go:770] primary dev: ETH0
I0319 21:02:43.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:02:43.422802  543705 net.go:698] Add success.
I0319 21:02:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:02:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:02:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:02:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:02:53.409798  543705 memory.go:184] no items to output this cycle
I0319 21:02:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 21:03:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:03:03.409787  543705 memory.go:184] no items to output this cycle
I0319 21:03:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 21:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:03:13.409785  543705 memory.go:191] Add success.
I0319 21:03:13.409801  543705 cpu.go:282] Add success.
W0319 21:03:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:03:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:03:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:03:13.420210  543705 net.go:648] Add success.
I0319 21:03:13.422989  543705 net.go:770] primary dev: ETH0
I0319 21:03:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:03:13.423018  543705 net.go:698] Add success.
I0319 21:03:13.838093  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1dc7bee4-b268-45a5-b90e-aecf1b4bb522","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:03:13.838139  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:03:14.454694  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:03:14.454897  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:03:14.454907  543705 disk_worker.go:708] disk space is not compliant
W0319 21:03:14.454910  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:03:14.456249  543705 disk_worker.go:494] system disk:vda1
I0319 21:03:14.456292  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:03:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:03:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:03:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:03:20.957676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:03:20.960171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:03:20.960178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374f00 0xc000374f40]
E0319 21:03:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:03:23.409791  543705 memory.go:184] no items to output this cycle
I0319 21:03:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 21:03:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:03:33.409798  543705 memory.go:184] no items to output this cycle
I0319 21:03:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 21:03:37.889730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:03:37.889737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:03:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:03:43.410635  543705 memory.go:191] Add success.
I0319 21:03:43.409796  543705 cpu.go:282] Add success.
I0319 21:03:43.420381  543705 net.go:648] Add success.
I0319 21:03:43.422811  543705 net.go:770] primary dev: ETH0
I0319 21:03:43.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:03:43.422837  543705 net.go:698] Add success.
I0319 21:03:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:03:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:03:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:03:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:03:53.409799  543705 memory.go:184] no items to output this cycle
I0319 21:03:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 21:04:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:04:03.409781  543705 memory.go:184] no items to output this cycle
I0319 21:04:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 21:04:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:04:13.409816  543705 memory.go:191] Add success.
I0319 21:04:13.409828  543705 cpu.go:282] Add success.
W0319 21:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:04:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:04:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:04:13.420148  543705 net.go:648] Add success.
I0319 21:04:13.422921  543705 net.go:770] primary dev: ETH0
I0319 21:04:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:04:13.422951  543705 net.go:698] Add success.
I0319 21:04:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:04:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:04:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 21:04:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:04:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 21:04:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:04:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:04:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:04:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:04:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:04:20.961672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:04:20.964118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:04:20.964125  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6740 0xc0002b6780]
E0319 21:04:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:04:23.409785  543705 memory.go:184] no items to output this cycle
I0319 21:04:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 21:04:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:04:33.409889  543705 memory.go:184] no items to output this cycle
I0319 21:04:33.409977  543705 cpu.go:275] no items to output this cycle
E0319 21:04:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:04:43.409796  543705 memory.go:191] Add success.
I0319 21:04:43.409806  543705 cpu.go:282] Add success.
I0319 21:04:43.419938  543705 net.go:648] Add success.
I0319 21:04:43.422685  543705 net.go:770] primary dev: ETH0
I0319 21:04:43.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:04:43.422710  543705 net.go:698] Add success.
I0319 21:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:04:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:04:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:04:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:04:53.409795  543705 memory.go:184] no items to output this cycle
I0319 21:04:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 21:05:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:05:03.409809  543705 memory.go:184] no items to output this cycle
I0319 21:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 21:05:13.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:05:13.409769  543705 memory.go:191] Add success.
W0319 21:05:13.409794  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:05:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:05:13.409808  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:05:13.409809  543705 cpu.go:282] Add success.
I0319 21:05:13.420278  543705 net.go:648] Add success.
I0319 21:05:13.423413  543705 net.go:770] primary dev: ETH0
I0319 21:05:13.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:05:13.423438  543705 net.go:698] Add success.
I0319 21:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:05:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:05:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0319 21:05:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:05:14.456591  543705 disk_worker.go:494] system disk:vda1
I0319 21:05:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:05:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:05:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:05:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:05:20.965673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:05:20.968090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:05:20.968096  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000de980 0xc0000de9c0]
E0319 21:05:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:05:23.409760  543705 memory.go:184] no items to output this cycle
I0319 21:05:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 21:05:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:05:33.409777  543705 memory.go:184] no items to output this cycle
I0319 21:05:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 21:05:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:05:43.409804  543705 memory.go:191] Add success.
I0319 21:05:43.409806  543705 cpu.go:282] Add success.
I0319 21:05:43.419982  543705 net.go:648] Add success.
I0319 21:05:43.422866  543705 net.go:770] primary dev: ETH0
I0319 21:05:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:05:43.422894  543705 net.go:698] Add success.
I0319 21:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:05:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:05:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:05:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:05:53.409779  543705 memory.go:184] no items to output this cycle
I0319 21:05:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 21:06:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:06:03.409782  543705 memory.go:184] no items to output this cycle
I0319 21:06:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 21:06:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:06:13.409776  543705 memory.go:191] Add success.
W0319 21:06:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:06:13.409809  543705 cpu.go:282] Add success.
W0319 21:06:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:06:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:06:13.420035  543705 net.go:648] Add success.
I0319 21:06:13.422762  543705 net.go:770] primary dev: ETH0
I0319 21:06:13.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:06:13.422787  543705 net.go:698] Add success.
I0319 21:06:13.480875  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"26baa5b2-75b3-49c5-93bf-da8d0c5ea60b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:06:13.480908  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:06:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:06:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:06:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 21:06:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:06:14.456603  543705 disk_worker.go:494] system disk:vda1
I0319 21:06:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:06:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:06:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:06:16.472566  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:06:20.969673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:06:20.972095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:06:20.972101  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003073c0 0xc000307400]
E0319 21:06:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:06:23.409790  543705 memory.go:184] no items to output this cycle
I0319 21:06:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 21:06:33.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:06:33.409888  543705 cpu.go:275] no items to output this cycle
I0319 21:06:33.409896  543705 memory.go:184] no items to output this cycle
I0319 21:06:37.892162  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:06:37.892169  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:06:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:06:43.410765  543705 memory.go:191] Add success.
I0319 21:06:43.409829  543705 cpu.go:282] Add success.
I0319 21:06:43.420475  543705 net.go:648] Add success.
I0319 21:06:43.423230  543705 net.go:770] primary dev: ETH0
I0319 21:06:43.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:06:43.423257  543705 net.go:698] Add success.
I0319 21:06:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:06:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:06:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:06:53.409774  543705 memory.go:184] no items to output this cycle
I0319 21:06:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 21:07:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:07:03.409790  543705 memory.go:184] no items to output this cycle
I0319 21:07:03.409793  543705 cpu.go:275] no items to output this cycle
W0319 21:07:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:07:13.409725  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:07:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:07:13.409798  543705 cpu.go:282] Add success.
E0319 21:07:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:07:13.409820  543705 memory.go:191] Add success.
I0319 21:07:13.420068  543705 net.go:648] Add success.
I0319 21:07:13.423184  543705 net.go:770] primary dev: ETH0
I0319 21:07:13.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:07:13.423214  543705 net.go:698] Add success.
I0319 21:07:13.452855  543705 event_worker.go:152] Polling the log file for events...
W0319 21:07:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 21:07:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:07:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:07:14.455893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:07:14.455898  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:07:14.456555  543705 disk_worker.go:494] system disk:vda1
I0319 21:07:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:07:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:07:15.456884  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:07:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:07:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:07:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:07:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:07:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:07:20.973675  543705 disk_info.go:125] begin check local disk info of client
I0319 21:07:20.976051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:07:20.976057  543705 disk_info.go:196] parse disk info done, disk is : [0xc000216ac0 0xc000216b00]
E0319 21:07:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:07:23.409785  543705 memory.go:184] no items to output this cycle
I0319 21:07:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 21:07:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:07:33.409786  543705 memory.go:184] no items to output this cycle
I0319 21:07:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:07:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:07:43.409828  543705 memory.go:191] Add success.
I0319 21:07:43.409835  543705 cpu.go:282] Add success.
I0319 21:07:43.419964  543705 net.go:648] Add success.
I0319 21:07:43.423046  543705 net.go:770] primary dev: ETH0
I0319 21:07:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:07:43.423072  543705 net.go:698] Add success.
I0319 21:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:07:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:07:53.409801  543705 memory.go:184] no items to output this cycle
I0319 21:07:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 21:08:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:08:03.409783  543705 memory.go:184] no items to output this cycle
I0319 21:08:03.409790  543705 cpu.go:275] no items to output this cycle
W0319 21:08:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:08:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:08:13.409729  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 21:08:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:08:13.409817  543705 memory.go:191] Add success.
I0319 21:08:13.409817  543705 cpu.go:282] Add success.
I0319 21:08:13.420149  543705 net.go:648] Add success.
I0319 21:08:13.423104  543705 net.go:770] primary dev: ETH0
I0319 21:08:13.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:08:13.423128  543705 net.go:698] Add success.
I0319 21:08:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:08:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:08:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 21:08:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:08:14.456514  543705 disk_worker.go:494] system disk:vda1
I0319 21:08:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:08:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:08:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:08:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:08:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:08:20.977673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:08:20.980149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:08:20.980155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa800 0xc0001aa840]
E0319 21:08:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:08:23.409822  543705 memory.go:184] no items to output this cycle
I0319 21:08:23.409826  543705 cpu.go:275] no items to output this cycle
E0319 21:08:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:08:33.409780  543705 memory.go:184] no items to output this cycle
I0319 21:08:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 21:08:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:08:43.409798  543705 memory.go:191] Add success.
I0319 21:08:43.409802  543705 cpu.go:282] Add success.
I0319 21:08:43.420243  543705 net.go:648] Add success.
I0319 21:08:43.422813  543705 net.go:770] primary dev: ETH0
I0319 21:08:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:08:43.422838  543705 net.go:698] Add success.
I0319 21:08:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:08:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:08:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:08:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:08:53.409786  543705 memory.go:184] no items to output this cycle
I0319 21:08:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 21:09:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:09:03.409780  543705 memory.go:184] no items to output this cycle
I0319 21:09:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 21:09:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:09:13.409781  543705 memory.go:191] Add success.
I0319 21:09:13.409803  543705 cpu.go:282] Add success.
W0319 21:09:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:09:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:09:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:09:13.420000  543705 net.go:770] primary dev: ETH0
I0319 21:09:13.420012  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:09:13.420025  543705 net.go:698] Add success.
I0319 21:09:13.420264  543705 net.go:648] Add success.
I0319 21:09:13.562102  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4f82594a-5bb8-49bf-9512-cbad26f7fc30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:09:13.562140  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:09:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:09:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:09:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 21:09:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:09:14.456628  543705 disk_worker.go:494] system disk:vda1
I0319 21:09:14.456658  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:09:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:09:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:09:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:09:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:09:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:09:20.981673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:09:20.984140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:09:20.984146  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343a80 0xc000343ac0]
E0319 21:09:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:09:23.409773  543705 memory.go:184] no items to output this cycle
I0319 21:09:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 21:09:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:09:33.409813  543705 memory.go:184] no items to output this cycle
I0319 21:09:33.409822  543705 cpu.go:275] no items to output this cycle
I0319 21:09:37.893726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:09:37.893734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:09:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:09:43.410758  543705 memory.go:191] Add success.
I0319 21:09:43.409828  543705 cpu.go:282] Add success.
I0319 21:09:43.419717  543705 net.go:648] Add success.
I0319 21:09:43.422680  543705 net.go:770] primary dev: ETH0
I0319 21:09:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:09:43.422706  543705 net.go:698] Add success.
I0319 21:09:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:09:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:09:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:09:53.409802  543705 memory.go:184] no items to output this cycle
I0319 21:09:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 21:10:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:10:03.409806  543705 memory.go:184] no items to output this cycle
I0319 21:10:03.409820  543705 cpu.go:275] no items to output this cycle
W0319 21:10:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:10:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:10:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 21:10:13.409832  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:10:13.409837  543705 cpu.go:282] Add success.
I0319 21:10:13.409851  543705 memory.go:191] Add success.
I0319 21:10:13.419983  543705 net.go:648] Add success.
I0319 21:10:13.422983  543705 net.go:770] primary dev: ETH0
I0319 21:10:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:10:13.423011  543705 net.go:698] Add success.
I0319 21:10:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:10:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:10:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 21:10:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:10:14.456571  543705 disk_worker.go:494] system disk:vda1
I0319 21:10:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:10:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:10:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:10:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:10:20.985672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:10:20.988123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:10:20.988129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004625c0 0xc000462600]
E0319 21:10:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:10:23.409766  543705 memory.go:184] no items to output this cycle
I0319 21:10:23.409781  543705 cpu.go:275] no items to output this cycle
E0319 21:10:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:10:33.409811  543705 memory.go:184] no items to output this cycle
I0319 21:10:33.409825  543705 cpu.go:275] no items to output this cycle
E0319 21:10:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:10:43.409781  543705 memory.go:191] Add success.
I0319 21:10:43.409815  543705 cpu.go:282] Add success.
I0319 21:10:43.420122  543705 net.go:648] Add success.
I0319 21:10:43.423278  543705 net.go:770] primary dev: ETH0
I0319 21:10:43.423291  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:10:43.423303  543705 net.go:698] Add success.
I0319 21:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:10:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:10:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:10:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:10:53.409775  543705 memory.go:184] no items to output this cycle
I0319 21:10:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 21:11:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:11:03.409787  543705 memory.go:184] no items to output this cycle
I0319 21:11:03.409793  543705 cpu.go:275] no items to output this cycle
W0319 21:11:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:11:13.409721  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:11:13.409726  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 21:11:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:11:13.409814  543705 memory.go:191] Add success.
I0319 21:11:13.409824  543705 cpu.go:282] Add success.
I0319 21:11:13.420052  543705 net.go:648] Add success.
I0319 21:11:13.423647  543705 net.go:770] primary dev: ETH0
I0319 21:11:13.423659  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:11:13.423670  543705 net.go:698] Add success.
I0319 21:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:11:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:11:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 21:11:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:11:14.456608  543705 disk_worker.go:494] system disk:vda1
I0319 21:11:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:11:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:11:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:11:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:11:20.989671  543705 disk_info.go:125] begin check local disk info of client
I0319 21:11:20.992166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:11:20.992173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f7280 0xc0001f72c0]
E0319 21:11:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:11:23.409766  543705 memory.go:184] no items to output this cycle
I0319 21:11:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 21:11:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:11:33.409781  543705 memory.go:184] no items to output this cycle
I0319 21:11:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 21:11:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:11:43.409799  543705 memory.go:191] Add success.
I0319 21:11:43.409800  543705 cpu.go:282] Add success.
I0319 21:11:43.419967  543705 net.go:648] Add success.
I0319 21:11:43.422901  543705 net.go:770] primary dev: ETH0
I0319 21:11:43.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:11:43.422926  543705 net.go:698] Add success.
I0319 21:11:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:11:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:11:53.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:11:53.409913  543705 memory.go:184] no items to output this cycle
I0319 21:11:53.410105  543705 cpu.go:275] no items to output this cycle
E0319 21:12:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:12:03.409808  543705 memory.go:184] no items to output this cycle
I0319 21:12:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 21:12:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:12:13.409790  543705 memory.go:191] Add success.
I0319 21:12:13.409794  543705 cpu.go:282] Add success.
W0319 21:12:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:12:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:12:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:12:13.420057  543705 net.go:648] Add success.
I0319 21:12:13.422791  543705 net.go:770] primary dev: ETH0
I0319 21:12:13.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:12:13.422819  543705 net.go:698] Add success.
I0319 21:12:13.858134  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"77b9025e-b7b5-403e-8831-8737b099651f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:12:13.858169  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 21:12:14.454856  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:12:14.454922  543705 disk_worker.go:708] disk space is not compliant
W0319 21:12:14.454924  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:12:14.455915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:12:14.455924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:12:14.455931  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:12:14.456296  543705 disk_worker.go:494] system disk:vda1
I0319 21:12:14.456324  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:12:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:12:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:12:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:12:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:12:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:12:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:12:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:12:20.993673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:12:20.996049  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:12:20.996055  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367800 0xc000367840]
E0319 21:12:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:12:23.409756  543705 memory.go:184] no items to output this cycle
I0319 21:12:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:12:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:12:33.409781  543705 memory.go:184] no items to output this cycle
I0319 21:12:33.409813  543705 cpu.go:275] no items to output this cycle
I0319 21:12:37.893875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:12:37.893882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:12:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:12:43.410746  543705 memory.go:191] Add success.
I0319 21:12:43.409825  543705 cpu.go:282] Add success.
I0319 21:12:43.420507  543705 net.go:648] Add success.
I0319 21:12:43.423274  543705 net.go:770] primary dev: ETH0
I0319 21:12:43.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:12:43.423308  543705 net.go:698] Add success.
I0319 21:12:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:12:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:12:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:12:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:12:53.409787  543705 memory.go:184] no items to output this cycle
I0319 21:12:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 21:13:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:13:03.409792  543705 memory.go:184] no items to output this cycle
I0319 21:13:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:13:13.409795  543705 memory.go:191] Add success.
I0319 21:13:13.409812  543705 cpu.go:282] Add success.
W0319 21:13:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:13:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:13:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:13:13.420273  543705 net.go:648] Add success.
I0319 21:13:13.422954  543705 net.go:770] primary dev: ETH0
I0319 21:13:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:13:13.422978  543705 net.go:698] Add success.
I0319 21:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:13:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:13:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 21:13:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:13:14.456490  543705 disk_worker.go:494] system disk:vda1
I0319 21:13:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:13:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:13:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:13:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:13:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:13:20.997674  543705 disk_info.go:125] begin check local disk info of client
I0319 21:13:21.000229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:13:21.000237  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391300 0xc000391340]
E0319 21:13:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:13:23.409781  543705 memory.go:184] no items to output this cycle
I0319 21:13:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 21:13:33.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:13:33.409817  543705 cpu.go:275] no items to output this cycle
I0319 21:13:33.409831  543705 memory.go:184] no items to output this cycle
E0319 21:13:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:13:43.409810  543705 memory.go:191] Add success.
I0319 21:13:43.409810  543705 cpu.go:282] Add success.
I0319 21:13:43.420440  543705 net.go:648] Add success.
I0319 21:13:43.423513  543705 net.go:770] primary dev: ETH0
I0319 21:13:43.423529  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:13:43.423542  543705 net.go:698] Add success.
I0319 21:13:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:13:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:13:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:13:53.409790  543705 memory.go:184] no items to output this cycle
I0319 21:13:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 21:14:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:14:03.409796  543705 memory.go:184] no items to output this cycle
I0319 21:14:03.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:14:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:14:13.409804  543705 memory.go:191] Add success.
I0319 21:14:13.409815  543705 cpu.go:282] Add success.
W0319 21:14:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:14:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:14:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:14:13.420161  543705 net.go:648] Add success.
I0319 21:14:13.422920  543705 net.go:770] primary dev: ETH0
I0319 21:14:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:14:13.422944  543705 net.go:698] Add success.
I0319 21:14:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:14:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:14:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 21:14:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:14:14.456494  543705 disk_worker.go:494] system disk:vda1
I0319 21:14:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:14:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:14:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:14:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:14:21.001672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:14:21.004104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:14:21.004111  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004746c0 0xc000474700]
E0319 21:14:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:14:23.409762  543705 memory.go:184] no items to output this cycle
I0319 21:14:23.409822  543705 cpu.go:275] no items to output this cycle
E0319 21:14:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:14:33.409779  543705 memory.go:184] no items to output this cycle
I0319 21:14:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 21:14:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:14:43.409923  543705 memory.go:191] Add success.
I0319 21:14:43.409943  543705 cpu.go:282] Add success.
I0319 21:14:43.419752  543705 net.go:648] Add success.
I0319 21:14:43.422832  543705 net.go:770] primary dev: ETH0
I0319 21:14:43.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:14:43.422861  543705 net.go:698] Add success.
I0319 21:14:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:14:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:14:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:14:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:14:53.409797  543705 memory.go:184] no items to output this cycle
I0319 21:14:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 21:15:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:15:03.409783  543705 memory.go:184] no items to output this cycle
I0319 21:15:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 21:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:15:13.409779  543705 memory.go:191] Add success.
I0319 21:15:13.409804  543705 cpu.go:282] Add success.
W0319 21:15:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:15:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:15:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:15:13.420073  543705 net.go:648] Add success.
I0319 21:15:13.422904  543705 net.go:770] primary dev: ETH0
I0319 21:15:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:15:13.422934  543705 net.go:698] Add success.
I0319 21:15:13.721883  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fbc7c6c4-7fc9-49cf-9eed-8c44c0206840","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:15:13.721917  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:15:14.454680  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:15:14.454816  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:15:14.454888  543705 disk_worker.go:708] disk space is not compliant
W0319 21:15:14.454891  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:15:14.456233  543705 disk_worker.go:494] system disk:vda1
I0319 21:15:14.456287  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:15:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:15:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:15:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:15:16.472476  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:15:21.005676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:15:21.008072  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:15:21.008078  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abec0 0xc0001abf00]
E0319 21:15:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:15:23.409793  543705 memory.go:184] no items to output this cycle
I0319 21:15:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 21:15:33.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:15:33.409873  543705 memory.go:184] no items to output this cycle
I0319 21:15:33.409947  543705 cpu.go:275] no items to output this cycle
I0319 21:15:37.896178  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:15:37.896184  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:15:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:15:43.410667  543705 memory.go:191] Add success.
I0319 21:15:43.409810  543705 cpu.go:282] Add success.
I0319 21:15:43.420392  543705 net.go:648] Add success.
I0319 21:15:43.423030  543705 net.go:770] primary dev: ETH0
I0319 21:15:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:15:43.423061  543705 net.go:698] Add success.
I0319 21:15:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:15:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:15:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:15:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:15:53.409786  543705 memory.go:184] no items to output this cycle
I0319 21:15:53.409789  543705 cpu.go:275] no items to output this cycle
E0319 21:16:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:16:03.409804  543705 memory.go:184] no items to output this cycle
I0319 21:16:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 21:16:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:16:13.409780  543705 memory.go:191] Add success.
I0319 21:16:13.409804  543705 cpu.go:282] Add success.
W0319 21:16:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:16:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:16:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:16:13.420090  543705 net.go:648] Add success.
I0319 21:16:13.422956  543705 net.go:770] primary dev: ETH0
I0319 21:16:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:16:13.422984  543705 net.go:698] Add success.
I0319 21:16:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:16:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:16:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 21:16:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:16:14.456572  543705 disk_worker.go:494] system disk:vda1
I0319 21:16:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:16:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:16:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:16:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:16:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:16:21.009676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:16:21.012154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:16:21.012160  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466140 0xc000466180]
E0319 21:16:23.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:16:23.409864  543705 memory.go:184] no items to output this cycle
I0319 21:16:23.409956  543705 cpu.go:275] no items to output this cycle
E0319 21:16:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:16:33.409789  543705 memory.go:184] no items to output this cycle
I0319 21:16:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 21:16:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:16:43.409784  543705 memory.go:191] Add success.
I0319 21:16:43.409804  543705 cpu.go:282] Add success.
I0319 21:16:43.420016  543705 net.go:648] Add success.
I0319 21:16:43.422662  543705 net.go:770] primary dev: ETH0
I0319 21:16:43.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:16:43.422688  543705 net.go:698] Add success.
I0319 21:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:16:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:16:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:16:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:16:53.409799  543705 memory.go:184] no items to output this cycle
I0319 21:16:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 21:17:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:17:03.409793  543705 cpu.go:275] no items to output this cycle
I0319 21:17:03.409795  543705 memory.go:184] no items to output this cycle
E0319 21:17:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:17:13.409810  543705 memory.go:191] Add success.
I0319 21:17:13.409817  543705 cpu.go:282] Add success.
W0319 21:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:17:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:17:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:17:13.420084  543705 net.go:648] Add success.
I0319 21:17:13.423094  543705 net.go:770] primary dev: ETH0
I0319 21:17:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:17:13.423120  543705 net.go:698] Add success.
I0319 21:17:13.453665  543705 event_worker.go:152] Polling the log file for events...
W0319 21:17:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:17:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 21:17:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:17:14.455882  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:17:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:17:14.455896  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:17:14.456551  543705 disk_worker.go:494] system disk:vda1
I0319 21:17:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:17:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:17:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:17:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:17:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:17:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:17:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:17:16.472356  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:17:21.013677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:17:21.016066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:17:21.016072  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a0c0 0xc00039a100]
E0319 21:17:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:17:23.409794  543705 memory.go:184] no items to output this cycle
I0319 21:17:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 21:17:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:17:33.409774  543705 memory.go:184] no items to output this cycle
I0319 21:17:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 21:17:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:17:43.409814  543705 memory.go:191] Add success.
I0319 21:17:43.409819  543705 cpu.go:282] Add success.
I0319 21:17:43.419960  543705 net.go:648] Add success.
I0319 21:17:43.422755  543705 net.go:770] primary dev: ETH0
I0319 21:17:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:17:43.422781  543705 net.go:698] Add success.
I0319 21:17:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:17:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:17:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:17:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:17:53.409785  543705 memory.go:184] no items to output this cycle
I0319 21:17:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 21:18:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:18:03.409780  543705 memory.go:184] no items to output this cycle
I0319 21:18:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 21:18:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:18:13.409812  543705 memory.go:191] Add success.
I0319 21:18:13.409819  543705 cpu.go:282] Add success.
W0319 21:18:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:18:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:18:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:18:13.420056  543705 net.go:648] Add success.
I0319 21:18:13.423114  543705 net.go:770] primary dev: ETH0
I0319 21:18:13.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:18:13.423139  543705 net.go:698] Add success.
I0319 21:18:13.518906  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"73afa646-e6b9-43a3-b6d5-ca427f2be51a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:18:13.518941  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:18:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:18:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:18:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 21:18:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:18:14.456546  543705 disk_worker.go:494] system disk:vda1
I0319 21:18:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:18:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:18:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:18:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:18:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:18:21.017669  543705 disk_info.go:125] begin check local disk info of client
I0319 21:18:21.020151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:18:21.020159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256380 0xc0002563c0]
E0319 21:18:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:18:23.409800  543705 memory.go:184] no items to output this cycle
I0319 21:18:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 21:18:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:18:33.409812  543705 memory.go:184] no items to output this cycle
I0319 21:18:33.409826  543705 cpu.go:275] no items to output this cycle
I0319 21:18:37.897733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:18:37.897740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:18:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:18:43.410628  543705 memory.go:191] Add success.
I0319 21:18:43.409832  543705 cpu.go:282] Add success.
I0319 21:18:43.420415  543705 net.go:648] Add success.
I0319 21:18:43.422923  543705 net.go:770] primary dev: ETH0
I0319 21:18:43.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:18:43.422948  543705 net.go:698] Add success.
I0319 21:18:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:18:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:18:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:18:53.409801  543705 memory.go:184] no items to output this cycle
I0319 21:18:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 21:19:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:19:03.409776  543705 memory.go:184] no items to output this cycle
I0319 21:19:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:19:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:19:13.409786  543705 memory.go:191] Add success.
I0319 21:19:13.409802  543705 cpu.go:282] Add success.
W0319 21:19:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:19:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:19:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:19:13.420240  543705 net.go:648] Add success.
I0319 21:19:13.423552  543705 net.go:770] primary dev: ETH0
I0319 21:19:13.423568  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:19:13.423581  543705 net.go:698] Add success.
I0319 21:19:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:19:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:19:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 21:19:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:19:14.456590  543705 disk_worker.go:494] system disk:vda1
I0319 21:19:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:19:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:19:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:19:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:19:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:19:21.021667  543705 disk_info.go:125] begin check local disk info of client
I0319 21:19:21.024144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:19:21.024151  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370340 0xc000370380]
E0319 21:19:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:19:23.409759  543705 memory.go:184] no items to output this cycle
I0319 21:19:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 21:19:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:19:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 21:19:33.409803  543705 memory.go:184] no items to output this cycle
E0319 21:19:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:19:43.409783  543705 memory.go:191] Add success.
I0319 21:19:43.409788  543705 cpu.go:282] Add success.
I0319 21:19:43.419888  543705 net.go:648] Add success.
I0319 21:19:43.423097  543705 net.go:770] primary dev: ETH0
I0319 21:19:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:19:43.423123  543705 net.go:698] Add success.
I0319 21:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:19:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:19:53.409797  543705 memory.go:184] no items to output this cycle
I0319 21:19:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 21:20:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:20:03.409803  543705 memory.go:184] no items to output this cycle
I0319 21:20:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 21:20:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:20:13.409813  543705 memory.go:191] Add success.
I0319 21:20:13.409825  543705 cpu.go:282] Add success.
W0319 21:20:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:20:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:20:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:20:13.420064  543705 net.go:648] Add success.
I0319 21:20:13.423331  543705 net.go:770] primary dev: ETH0
I0319 21:20:13.423344  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:20:13.423356  543705 net.go:698] Add success.
I0319 21:20:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:20:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:20:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 21:20:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:20:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 21:20:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:20:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:20:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:20:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:20:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:20:21.025671  543705 disk_info.go:125] begin check local disk info of client
I0319 21:20:21.028151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:20:21.028157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265200 0xc000265240]
E0319 21:20:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:20:23.409762  543705 memory.go:184] no items to output this cycle
I0319 21:20:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 21:20:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:20:33.409799  543705 memory.go:184] no items to output this cycle
I0319 21:20:33.409801  543705 cpu.go:275] no items to output this cycle
E0319 21:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:20:43.409784  543705 memory.go:191] Add success.
I0319 21:20:43.409815  543705 cpu.go:282] Add success.
I0319 21:20:43.419895  543705 net.go:648] Add success.
I0319 21:20:43.422854  543705 net.go:770] primary dev: ETH0
I0319 21:20:43.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:20:43.422880  543705 net.go:698] Add success.
I0319 21:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:20:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:20:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:20:53.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:20:53.410283  543705 memory.go:184] no items to output this cycle
I0319 21:20:53.410296  543705 cpu.go:275] no items to output this cycle
E0319 21:21:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:21:03.409782  543705 memory.go:184] no items to output this cycle
I0319 21:21:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 21:21:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:21:13.409789  543705 memory.go:191] Add success.
I0319 21:21:13.409812  543705 cpu.go:282] Add success.
W0319 21:21:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:21:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:21:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:21:13.420112  543705 net.go:648] Add success.
I0319 21:21:13.422821  543705 net.go:770] primary dev: ETH0
I0319 21:21:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:21:13.422850  543705 net.go:698] Add success.
I0319 21:21:13.535811  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0c76dba3-5727-4872-9a64-6b1f6b334142","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:21:13.535846  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:21:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:21:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:21:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 21:21:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:21:14.456697  543705 disk_worker.go:494] system disk:vda1
I0319 21:21:14.456735  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:21:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:21:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:21:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:21:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:21:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:21:21.029673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:21:21.032081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:21:21.032088  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002982c0 0xc000298300]
E0319 21:21:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:21:23.409760  543705 memory.go:184] no items to output this cycle
I0319 21:21:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 21:21:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:21:33.409901  543705 memory.go:184] no items to output this cycle
I0319 21:21:33.409968  543705 cpu.go:275] no items to output this cycle
I0319 21:21:37.900201  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:21:37.900206  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:21:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:21:43.410715  543705 memory.go:191] Add success.
I0319 21:21:43.409811  543705 cpu.go:282] Add success.
I0319 21:21:43.420539  543705 net.go:648] Add success.
I0319 21:21:43.423145  543705 net.go:770] primary dev: ETH0
I0319 21:21:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:21:43.423169  543705 net.go:698] Add success.
I0319 21:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:21:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:21:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:21:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:21:53.409783  543705 memory.go:184] no items to output this cycle
I0319 21:21:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 21:22:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:22:03.409784  543705 memory.go:184] no items to output this cycle
I0319 21:22:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 21:22:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:22:13.409786  543705 memory.go:191] Add success.
W0319 21:22:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:22:13.409812  543705 cpu.go:282] Add success.
W0319 21:22:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:22:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:22:13.420048  543705 net.go:648] Add success.
I0319 21:22:13.423254  543705 net.go:770] primary dev: ETH0
I0319 21:22:13.423268  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:22:13.423279  543705 net.go:698] Add success.
W0319 21:22:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:22:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 21:22:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:22:14.455870  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:22:14.455879  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:22:14.455885  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:22:14.456603  543705 disk_worker.go:494] system disk:vda1
I0319 21:22:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:22:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:22:15.456868  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:22:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:22:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:22:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:22:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:22:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:22:21.033673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:22:21.036047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:22:21.036053  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252d00 0xc000252d40]
E0319 21:22:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:22:23.409774  543705 memory.go:184] no items to output this cycle
I0319 21:22:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 21:22:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:22:33.409786  543705 memory.go:184] no items to output this cycle
I0319 21:22:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 21:22:43.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:22:43.409940  543705 memory.go:191] Add success.
I0319 21:22:43.409990  543705 cpu.go:282] Add success.
I0319 21:22:43.419711  543705 net.go:648] Add success.
I0319 21:22:43.422340  543705 net.go:770] primary dev: ETH0
I0319 21:22:43.422353  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:22:43.422365  543705 net.go:698] Add success.
I0319 21:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:22:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:22:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:22:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:22:53.409788  543705 memory.go:184] no items to output this cycle
I0319 21:22:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:23:03.409770  543705 memory.go:184] no items to output this cycle
I0319 21:23:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 21:23:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:23:13.409826  543705 memory.go:191] Add success.
I0319 21:23:13.409834  543705 cpu.go:282] Add success.
W0319 21:23:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:23:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:23:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:23:13.420108  543705 net.go:648] Add success.
I0319 21:23:13.422984  543705 net.go:770] primary dev: ETH0
I0319 21:23:13.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:23:13.423009  543705 net.go:698] Add success.
I0319 21:23:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:23:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:23:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0319 21:23:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:23:14.456570  543705 disk_worker.go:494] system disk:vda1
I0319 21:23:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:23:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:23:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:23:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:23:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:23:21.037675  543705 disk_info.go:125] begin check local disk info of client
I0319 21:23:21.040106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:23:21.040113  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e8300 0xc0000e8340]
E0319 21:23:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:23:23.409789  543705 memory.go:184] no items to output this cycle
I0319 21:23:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 21:23:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:23:33.409805  543705 memory.go:184] no items to output this cycle
I0319 21:23:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 21:23:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:23:43.409877  543705 memory.go:191] Add success.
I0319 21:23:43.409945  543705 cpu.go:282] Add success.
I0319 21:23:43.419712  543705 net.go:648] Add success.
I0319 21:23:43.422634  543705 net.go:770] primary dev: ETH0
I0319 21:23:43.422646  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:23:43.422658  543705 net.go:698] Add success.
I0319 21:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:23:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:23:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:23:53.409776  543705 memory.go:184] no items to output this cycle
I0319 21:23:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 21:24:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:24:03.409788  543705 memory.go:184] no items to output this cycle
I0319 21:24:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 21:24:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:24:13.409784  543705 memory.go:191] Add success.
I0319 21:24:13.409804  543705 cpu.go:282] Add success.
W0319 21:24:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:24:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:24:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:24:13.420285  543705 net.go:648] Add success.
I0319 21:24:13.423031  543705 net.go:770] primary dev: ETH0
I0319 21:24:13.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:24:13.423057  543705 net.go:698] Add success.
I0319 21:24:13.699424  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29a197ec-3e39-42da-ae8d-80ccbf3a5459","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:24:13.699457  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:24:14.453966  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:24:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:24:14.455281  543705 disk_worker.go:708] disk space is not compliant
W0319 21:24:14.455284  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:24:14.456828  543705 disk_worker.go:494] system disk:vda1
I0319 21:24:14.456858  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:24:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:24:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:24:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:24:21.041670  543705 disk_info.go:125] begin check local disk info of client
I0319 21:24:21.044055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:24:21.044061  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f8900 0xc0001f8940]
E0319 21:24:23.410297  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:24:23.410317  543705 memory.go:184] no items to output this cycle
I0319 21:24:23.410325  543705 cpu.go:275] no items to output this cycle
E0319 21:24:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:24:33.409791  543705 memory.go:184] no items to output this cycle
I0319 21:24:33.409792  543705 cpu.go:275] no items to output this cycle
I0319 21:24:37.901740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:24:37.901747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:24:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:24:43.409827  543705 cpu.go:282] Add success.
I0319 21:24:43.410800  543705 memory.go:191] Add success.
I0319 21:24:43.419695  543705 net.go:648] Add success.
I0319 21:24:43.422390  543705 net.go:770] primary dev: ETH0
I0319 21:24:43.422402  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:24:43.422414  543705 net.go:698] Add success.
I0319 21:24:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:24:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:24:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:24:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:24:53.409792  543705 memory.go:184] no items to output this cycle
I0319 21:24:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 21:25:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:25:03.409784  543705 memory.go:184] no items to output this cycle
I0319 21:25:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 21:25:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:25:13.409794  543705 memory.go:191] Add success.
I0319 21:25:13.409796  543705 cpu.go:282] Add success.
W0319 21:25:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:25:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:25:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:25:13.420089  543705 net.go:648] Add success.
I0319 21:25:13.422992  543705 net.go:770] primary dev: ETH0
I0319 21:25:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:25:13.423018  543705 net.go:698] Add success.
I0319 21:25:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:25:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:25:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0319 21:25:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:25:14.456571  543705 disk_worker.go:494] system disk:vda1
I0319 21:25:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:25:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:25:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:25:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:25:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:25:21.045673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:25:21.048126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:25:21.048132  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9a80 0xc0001f9ac0]
E0319 21:25:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:25:23.409788  543705 cpu.go:275] no items to output this cycle
I0319 21:25:23.409798  543705 memory.go:184] no items to output this cycle
E0319 21:25:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:25:33.409779  543705 memory.go:184] no items to output this cycle
I0319 21:25:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:25:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:25:43.409807  543705 memory.go:191] Add success.
I0319 21:25:43.409815  543705 cpu.go:282] Add success.
I0319 21:25:43.420118  543705 net.go:648] Add success.
I0319 21:25:43.422925  543705 net.go:770] primary dev: ETH0
I0319 21:25:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:25:43.422954  543705 net.go:698] Add success.
I0319 21:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:25:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:25:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:25:53.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:25:53.409827  543705 memory.go:184] no items to output this cycle
I0319 21:25:53.409833  543705 cpu.go:275] no items to output this cycle
E0319 21:26:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:26:03.409801  543705 memory.go:184] no items to output this cycle
I0319 21:26:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 21:26:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:26:13.409788  543705 memory.go:191] Add success.
I0319 21:26:13.409792  543705 cpu.go:282] Add success.
W0319 21:26:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:26:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:26:13.420288  543705 net.go:648] Add success.
I0319 21:26:13.423261  543705 net.go:770] primary dev: ETH0
I0319 21:26:13.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:26:13.423288  543705 net.go:698] Add success.
I0319 21:26:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:26:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:26:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0319 21:26:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:26:14.456597  543705 disk_worker.go:494] system disk:vda1
I0319 21:26:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:26:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:26:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:26:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:26:21.049687  543705 disk_info.go:125] begin check local disk info of client
I0319 21:26:21.052128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:26:21.052135  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be80 0xc00007bec0]
E0319 21:26:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:26:23.409811  543705 memory.go:184] no items to output this cycle
I0319 21:26:23.409823  543705 cpu.go:275] no items to output this cycle
E0319 21:26:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:26:33.409790  543705 memory.go:184] no items to output this cycle
I0319 21:26:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:26:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:26:43.409785  543705 memory.go:191] Add success.
I0319 21:26:43.409803  543705 cpu.go:282] Add success.
I0319 21:26:43.419894  543705 net.go:648] Add success.
I0319 21:26:43.422589  543705 net.go:770] primary dev: ETH0
I0319 21:26:43.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:26:43.422617  543705 net.go:698] Add success.
I0319 21:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:26:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:26:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:26:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:26:53.409778  543705 memory.go:184] no items to output this cycle
I0319 21:26:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 21:27:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:27:03.409810  543705 memory.go:184] no items to output this cycle
I0319 21:27:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 21:27:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:27:13.409795  543705 memory.go:191] Add success.
I0319 21:27:13.409797  543705 cpu.go:282] Add success.
W0319 21:27:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:27:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:27:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:27:13.420030  543705 net.go:648] Add success.
I0319 21:27:13.423224  543705 net.go:770] primary dev: ETH0
I0319 21:27:13.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:27:13.423248  543705 net.go:698] Add success.
I0319 21:27:13.429499  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 21:27:13.453662  543705 event_worker.go:152] Polling the log file for events...
I0319 21:27:13.469715  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2bac6259-cb1a-4946-9dbf-e36de3ca6f6a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:27:13.469749  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 21:27:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:27:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 21:27:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:27:14.455933  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:27:14.455942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:27:14.455948  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:27:14.456797  543705 disk_worker.go:494] system disk:vda1
I0319 21:27:14.456828  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:27:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:27:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:27:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:27:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:27:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:27:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:27:16.472347  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:27:21.053679  543705 disk_info.go:125] begin check local disk info of client
I0319 21:27:21.056112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:27:21.056119  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049cf80 0xc00049cfc0]
E0319 21:27:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:27:23.409761  543705 memory.go:184] no items to output this cycle
I0319 21:27:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:27:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:27:33.409814  543705 memory.go:184] no items to output this cycle
I0319 21:27:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 21:27:37.901890  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:27:37.901897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:27:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:27:43.410724  543705 memory.go:191] Add success.
I0319 21:27:43.409803  543705 cpu.go:282] Add success.
I0319 21:27:43.420547  543705 net.go:648] Add success.
I0319 21:27:43.423325  543705 net.go:770] primary dev: ETH0
I0319 21:27:43.423341  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:27:43.423355  543705 net.go:698] Add success.
I0319 21:27:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:27:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:27:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:27:53.410257  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:27:53.410274  543705 memory.go:184] no items to output this cycle
I0319 21:27:53.410278  543705 cpu.go:275] no items to output this cycle
E0319 21:28:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:28:03.409774  543705 memory.go:184] no items to output this cycle
I0319 21:28:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 21:28:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:28:13.409801  543705 memory.go:191] Add success.
I0319 21:28:13.409805  543705 cpu.go:282] Add success.
W0319 21:28:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:28:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:28:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:28:13.420054  543705 net.go:648] Add success.
I0319 21:28:13.422768  543705 net.go:770] primary dev: ETH0
I0319 21:28:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:28:13.422797  543705 net.go:698] Add success.
I0319 21:28:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:28:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:28:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0319 21:28:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:28:14.456520  543705 disk_worker.go:494] system disk:vda1
I0319 21:28:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:28:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:28:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:28:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:28:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:28:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:28:21.057687  543705 disk_info.go:125] begin check local disk info of client
I0319 21:28:21.060179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:28:21.060186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9780 0xc0001f97c0]
E0319 21:28:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:28:23.409771  543705 memory.go:184] no items to output this cycle
I0319 21:28:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:28:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:28:33.409797  543705 memory.go:184] no items to output this cycle
I0319 21:28:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 21:28:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:28:43.409830  543705 memory.go:191] Add success.
I0319 21:28:43.409831  543705 cpu.go:282] Add success.
I0319 21:28:43.419967  543705 net.go:648] Add success.
I0319 21:28:43.422859  543705 net.go:770] primary dev: ETH0
I0319 21:28:43.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:28:43.422889  543705 net.go:698] Add success.
I0319 21:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:28:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:28:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:28:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:28:53.409793  543705 memory.go:184] no items to output this cycle
I0319 21:28:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:29:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:29:03.409823  543705 memory.go:184] no items to output this cycle
I0319 21:29:03.409838  543705 cpu.go:275] no items to output this cycle
E0319 21:29:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:29:13.409799  543705 memory.go:191] Add success.
I0319 21:29:13.409814  543705 cpu.go:282] Add success.
W0319 21:29:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:29:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:29:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:29:13.420073  543705 net.go:648] Add success.
I0319 21:29:13.422968  543705 net.go:770] primary dev: ETH0
I0319 21:29:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:29:13.423002  543705 net.go:698] Add success.
I0319 21:29:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:29:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:29:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 21:29:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:29:14.456568  543705 disk_worker.go:494] system disk:vda1
I0319 21:29:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:29:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:29:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:29:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:29:21.061677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:29:21.064166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:29:21.064172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8580 0xc0004a85c0]
E0319 21:29:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:29:23.409806  543705 memory.go:184] no items to output this cycle
I0319 21:29:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 21:29:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:29:33.409815  543705 memory.go:184] no items to output this cycle
I0319 21:29:33.409834  543705 cpu.go:275] no items to output this cycle
E0319 21:29:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:29:43.409778  543705 memory.go:191] Add success.
I0319 21:29:43.409821  543705 cpu.go:282] Add success.
I0319 21:29:43.419865  543705 net.go:648] Add success.
I0319 21:29:43.422917  543705 net.go:770] primary dev: ETH0
I0319 21:29:43.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:29:43.422945  543705 net.go:698] Add success.
I0319 21:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:29:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:29:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:29:53.409861  543705 memory.go:184] no items to output this cycle
I0319 21:29:53.409959  543705 cpu.go:275] no items to output this cycle
E0319 21:30:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:30:03.409790  543705 memory.go:184] no items to output this cycle
I0319 21:30:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 21:30:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:30:13.409802  543705 memory.go:191] Add success.
I0319 21:30:13.409808  543705 cpu.go:282] Add success.
W0319 21:30:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:30:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:30:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:30:13.420276  543705 net.go:648] Add success.
I0319 21:30:13.423283  543705 net.go:770] primary dev: ETH0
I0319 21:30:13.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:30:13.423308  543705 net.go:698] Add success.
I0319 21:30:13.468070  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"360dbfd3-86ba-4b29-b619-d4db5da69117","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:30:13.468103  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:30:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:30:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:30:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 21:30:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:30:14.456659  543705 disk_worker.go:494] system disk:vda1
I0319 21:30:14.456690  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:30:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:30:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:30:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:30:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:30:21.065677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:30:21.068149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:30:21.068158  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049b780 0xc00049b7c0]
E0319 21:30:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:30:23.409798  543705 memory.go:184] no items to output this cycle
I0319 21:30:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 21:30:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:30:33.409793  543705 memory.go:184] no items to output this cycle
I0319 21:30:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 21:30:37.904217  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:30:37.904224  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:30:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:30:43.410673  543705 memory.go:191] Add success.
I0319 21:30:43.409799  543705 cpu.go:282] Add success.
I0319 21:30:43.420363  543705 net.go:648] Add success.
I0319 21:30:43.422965  543705 net.go:770] primary dev: ETH0
I0319 21:30:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:30:43.422993  543705 net.go:698] Add success.
I0319 21:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:30:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:30:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:30:53.409899  543705 memory.go:184] no items to output this cycle
I0319 21:30:53.409918  543705 cpu.go:275] no items to output this cycle
E0319 21:31:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:31:03.409797  543705 memory.go:184] no items to output this cycle
I0319 21:31:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:31:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:31:13.409793  543705 memory.go:191] Add success.
I0319 21:31:13.409796  543705 cpu.go:282] Add success.
W0319 21:31:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:31:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:31:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:31:13.420078  543705 net.go:648] Add success.
I0319 21:31:13.422831  543705 net.go:770] primary dev: ETH0
I0319 21:31:13.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:31:13.422857  543705 net.go:698] Add success.
I0319 21:31:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:31:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:31:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0319 21:31:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:31:14.456608  543705 disk_worker.go:494] system disk:vda1
I0319 21:31:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:31:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:31:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:31:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:31:21.069676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:31:21.072207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:31:21.072215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9ac0 0xc0004a9b00]
E0319 21:31:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:31:23.409769  543705 memory.go:184] no items to output this cycle
I0319 21:31:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:31:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:31:33.409792  543705 memory.go:184] no items to output this cycle
I0319 21:31:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:31:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:31:43.409814  543705 memory.go:191] Add success.
I0319 21:31:43.409818  543705 cpu.go:282] Add success.
I0319 21:31:43.420035  543705 net.go:648] Add success.
I0319 21:31:43.422915  543705 net.go:770] primary dev: ETH0
I0319 21:31:43.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:31:43.422947  543705 net.go:698] Add success.
I0319 21:31:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:31:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:31:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:31:53.409765  543705 memory.go:184] no items to output this cycle
I0319 21:31:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:32:03.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:32:03.409858  543705 memory.go:184] no items to output this cycle
I0319 21:32:03.410094  543705 cpu.go:275] no items to output this cycle
E0319 21:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:32:13.409794  543705 memory.go:191] Add success.
I0319 21:32:13.409797  543705 cpu.go:282] Add success.
W0319 21:32:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:32:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:32:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:32:13.420272  543705 net.go:648] Add success.
I0319 21:32:13.423162  543705 net.go:770] primary dev: ETH0
I0319 21:32:13.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:32:13.423186  543705 net.go:698] Add success.
W0319 21:32:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:32:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 21:32:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:32:14.456894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:32:14.456904  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:32:14.456910  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:32:14.456981  543705 disk_worker.go:494] system disk:vda1
I0319 21:32:14.457011  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:32:15.456862  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:32:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:32:16.458039  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:32:16.458039  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:32:16.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:32:16.458129  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:32:16.472491  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:32:21.073673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:32:21.076191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:32:21.076199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9280 0xc0004a92c0]
E0319 21:32:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:32:23.409764  543705 memory.go:184] no items to output this cycle
I0319 21:32:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 21:32:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:32:33.409812  543705 memory.go:184] no items to output this cycle
I0319 21:32:33.409825  543705 cpu.go:275] no items to output this cycle
E0319 21:32:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:32:43.409783  543705 memory.go:191] Add success.
I0319 21:32:43.409816  543705 cpu.go:282] Add success.
I0319 21:32:43.419866  543705 net.go:648] Add success.
I0319 21:32:43.422773  543705 net.go:770] primary dev: ETH0
I0319 21:32:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:32:43.422803  543705 net.go:698] Add success.
I0319 21:32:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:32:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:32:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:32:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:32:53.409765  543705 memory.go:184] no items to output this cycle
I0319 21:32:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:33:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:33:03.409811  543705 memory.go:184] no items to output this cycle
I0319 21:33:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 21:33:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:33:13.409801  543705 memory.go:191] Add success.
I0319 21:33:13.409803  543705 cpu.go:282] Add success.
W0319 21:33:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:33:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:33:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:33:13.420150  543705 net.go:648] Add success.
I0319 21:33:13.422842  543705 net.go:770] primary dev: ETH0
I0319 21:33:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:33:13.422867  543705 net.go:698] Add success.
I0319 21:33:13.464172  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e4259bf-add7-446c-a55f-3ff0437f2b0d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:33:13.464204  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:33:14.455219  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:33:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0319 21:33:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:33:14.456599  543705 disk_worker.go:494] system disk:vda1
I0319 21:33:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:33:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:33:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:33:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:33:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:33:16.472453  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:33:21.077679  543705 disk_info.go:125] begin check local disk info of client
I0319 21:33:21.080200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:33:21.080207  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bd00 0xc00007bd80]
E0319 21:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:33:23.409795  543705 memory.go:184] no items to output this cycle
I0319 21:33:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 21:33:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:33:33.409797  543705 memory.go:184] no items to output this cycle
I0319 21:33:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 21:33:37.905736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:33:37.905743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:33:43.410739  543705 memory.go:191] Add success.
I0319 21:33:43.409801  543705 cpu.go:282] Add success.
I0319 21:33:43.420452  543705 net.go:648] Add success.
I0319 21:33:43.423163  543705 net.go:770] primary dev: ETH0
I0319 21:33:43.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:33:43.423189  543705 net.go:698] Add success.
I0319 21:33:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:33:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:33:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:33:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:33:53.409780  543705 memory.go:184] no items to output this cycle
I0319 21:33:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 21:34:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:34:03.409779  543705 memory.go:184] no items to output this cycle
I0319 21:34:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:34:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:34:13.409815  543705 memory.go:191] Add success.
I0319 21:34:13.409824  543705 cpu.go:282] Add success.
W0319 21:34:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:34:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:34:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:34:13.420139  543705 net.go:648] Add success.
I0319 21:34:13.422991  543705 net.go:770] primary dev: ETH0
I0319 21:34:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:34:13.423015  543705 net.go:698] Add success.
I0319 21:34:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:34:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:34:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 21:34:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:34:14.456612  543705 disk_worker.go:494] system disk:vda1
I0319 21:34:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:34:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:34:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:34:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:34:21.081676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:34:21.084298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:34:21.084304  543705 disk_info.go:196] parse disk info done, disk is : [0xc00060a500 0xc00060a540]
E0319 21:34:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:34:23.409766  543705 memory.go:184] no items to output this cycle
I0319 21:34:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 21:34:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:34:33.409783  543705 memory.go:184] no items to output this cycle
I0319 21:34:33.409815  543705 cpu.go:275] no items to output this cycle
E0319 21:34:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:34:43.409823  543705 memory.go:191] Add success.
I0319 21:34:43.409828  543705 cpu.go:282] Add success.
I0319 21:34:43.420007  543705 net.go:648] Add success.
I0319 21:34:43.422761  543705 net.go:770] primary dev: ETH0
I0319 21:34:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:34:43.422785  543705 net.go:698] Add success.
I0319 21:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:34:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:34:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:34:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:34:53.409769  543705 memory.go:184] no items to output this cycle
I0319 21:34:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:35:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:35:03.409816  543705 memory.go:184] no items to output this cycle
I0319 21:35:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 21:35:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:35:13.409798  543705 memory.go:191] Add success.
I0319 21:35:13.409800  543705 cpu.go:282] Add success.
W0319 21:35:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:35:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:35:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:35:13.419668  543705 net.go:648] Add success.
I0319 21:35:13.422294  543705 net.go:770] primary dev: ETH0
I0319 21:35:13.422308  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:35:13.422320  543705 net.go:698] Add success.
I0319 21:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:35:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:35:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 21:35:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:35:14.456591  543705 disk_worker.go:494] system disk:vda1
I0319 21:35:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:35:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:35:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:35:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:35:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:35:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:35:21.085685  543705 disk_info.go:125] begin check local disk info of client
I0319 21:35:21.088112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:35:21.088117  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b5a80 0xc0002b5ac0]
E0319 21:35:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:35:23.409786  543705 memory.go:184] no items to output this cycle
I0319 21:35:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:35:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:35:33.409797  543705 memory.go:184] no items to output this cycle
I0319 21:35:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:35:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:35:43.409784  543705 memory.go:191] Add success.
I0319 21:35:43.409803  543705 cpu.go:282] Add success.
I0319 21:35:43.419856  543705 net.go:648] Add success.
I0319 21:35:43.422477  543705 net.go:770] primary dev: ETH0
I0319 21:35:43.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:35:43.422507  543705 net.go:698] Add success.
I0319 21:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:35:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:35:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:35:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:35:53.409779  543705 memory.go:184] no items to output this cycle
I0319 21:35:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 21:36:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:36:03.409787  543705 memory.go:184] no items to output this cycle
I0319 21:36:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 21:36:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:36:13.409914  543705 memory.go:191] Add success.
W0319 21:36:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:36:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:36:13.409968  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:36:13.410004  543705 cpu.go:282] Add success.
I0319 21:36:13.419755  543705 net.go:648] Add success.
I0319 21:36:13.422524  543705 net.go:770] primary dev: ETH0
I0319 21:36:13.422537  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:36:13.422548  543705 net.go:698] Add success.
I0319 21:36:13.471425  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0169740d-ec40-4e15-acec-b233fc19f0c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:36:13.471456  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:36:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:36:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:36:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 21:36:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:36:14.456575  543705 disk_worker.go:494] system disk:vda1
I0319 21:36:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:36:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:36:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:36:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:36:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:36:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:36:21.089675  543705 disk_info.go:125] begin check local disk info of client
I0319 21:36:21.092108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:36:21.092114  543705 disk_info.go:196] parse disk info done, disk is : [0xc00060b6c0 0xc00060b700]
E0319 21:36:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:36:23.409790  543705 memory.go:184] no items to output this cycle
I0319 21:36:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:36:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:36:33.409793  543705 memory.go:184] no items to output this cycle
I0319 21:36:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 21:36:37.905888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:36:37.905895  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:36:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:36:43.410676  543705 memory.go:191] Add success.
I0319 21:36:43.409815  543705 cpu.go:282] Add success.
I0319 21:36:43.420361  543705 net.go:648] Add success.
I0319 21:36:43.423090  543705 net.go:770] primary dev: ETH0
I0319 21:36:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:36:43.423115  543705 net.go:698] Add success.
I0319 21:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:36:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:36:53.409777  543705 memory.go:184] no items to output this cycle
I0319 21:36:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 21:37:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:37:03.409807  543705 memory.go:184] no items to output this cycle
I0319 21:37:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 21:37:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:37:13.409824  543705 memory.go:191] Add success.
I0319 21:37:13.409831  543705 cpu.go:282] Add success.
W0319 21:37:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:37:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:37:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:37:13.420174  543705 net.go:648] Add success.
I0319 21:37:13.422719  543705 net.go:770] primary dev: ETH0
I0319 21:37:13.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:37:13.422743  543705 net.go:698] Add success.
I0319 21:37:13.453445  543705 event_worker.go:152] Polling the log file for events...
W0319 21:37:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:37:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 21:37:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:37:14.456916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:37:14.456926  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:37:14.456932  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:37:14.456997  543705 disk_worker.go:494] system disk:vda1
I0319 21:37:14.457036  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:37:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:37:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:37:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:37:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:37:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:37:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:37:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:37:21.093671  543705 disk_info.go:125] begin check local disk info of client
I0319 21:37:21.096123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:37:21.096129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8540 0xc0003e8580]
E0319 21:37:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:37:23.409761  543705 memory.go:184] no items to output this cycle
I0319 21:37:23.409787  543705 cpu.go:275] no items to output this cycle
E0319 21:37:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:37:33.409784  543705 memory.go:184] no items to output this cycle
I0319 21:37:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 21:37:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:37:43.409816  543705 memory.go:191] Add success.
I0319 21:37:43.409825  543705 cpu.go:282] Add success.
I0319 21:37:43.419989  543705 net.go:648] Add success.
I0319 21:37:43.422765  543705 net.go:770] primary dev: ETH0
I0319 21:37:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:37:43.422791  543705 net.go:698] Add success.
I0319 21:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:37:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:37:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:37:53.410187  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:37:53.410202  543705 memory.go:184] no items to output this cycle
I0319 21:37:53.410226  543705 cpu.go:275] no items to output this cycle
E0319 21:38:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:38:03.409816  543705 memory.go:184] no items to output this cycle
I0319 21:38:03.409823  543705 cpu.go:275] no items to output this cycle
I0319 21:38:13.409908  543705 cpu.go:282] Add success.
E0319 21:38:13.409966  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:38:13.409994  543705 memory.go:191] Add success.
W0319 21:38:13.410027  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:38:13.410049  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:38:13.410053  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:38:13.419762  543705 net.go:648] Add success.
I0319 21:38:13.422582  543705 net.go:770] primary dev: ETH0
I0319 21:38:13.422595  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:38:13.422606  543705 net.go:698] Add success.
I0319 21:38:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:38:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:38:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 21:38:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:38:14.456570  543705 disk_worker.go:494] system disk:vda1
I0319 21:38:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:38:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:38:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:38:16.472444  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:38:21.097677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:38:21.100142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:38:21.100148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d00 0xc0000c4d40]
E0319 21:38:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:38:23.409805  543705 memory.go:184] no items to output this cycle
I0319 21:38:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 21:38:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:38:33.409796  543705 memory.go:184] no items to output this cycle
I0319 21:38:33.409825  543705 cpu.go:275] no items to output this cycle
E0319 21:38:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:38:43.409803  543705 memory.go:191] Add success.
I0319 21:38:43.409810  543705 cpu.go:282] Add success.
I0319 21:38:43.419894  543705 net.go:648] Add success.
I0319 21:38:43.422853  543705 net.go:770] primary dev: ETH0
I0319 21:38:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:38:43.422880  543705 net.go:698] Add success.
I0319 21:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:38:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:38:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:38:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:38:53.409793  543705 memory.go:184] no items to output this cycle
I0319 21:38:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:39:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:39:03.409800  543705 memory.go:184] no items to output this cycle
I0319 21:39:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 21:39:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:39:13.409897  543705 memory.go:191] Add success.
W0319 21:39:13.409930  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:39:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:39:13.409946  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:39:13.409947  543705 cpu.go:282] Add success.
I0319 21:39:13.419730  543705 net.go:648] Add success.
I0319 21:39:13.422548  543705 net.go:770] primary dev: ETH0
I0319 21:39:13.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:39:13.422575  543705 net.go:698] Add success.
I0319 21:39:13.469022  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f3e888c7-bbf7-4e3c-8dbd-a34726dd0072","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:39:13.469053  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:39:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:39:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:39:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 21:39:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:39:14.456489  543705 disk_worker.go:494] system disk:vda1
I0319 21:39:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:39:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:39:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:39:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:39:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:39:21.101672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:39:21.104129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:39:21.104135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be840 0xc0003be880]
E0319 21:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:39:23.409791  543705 memory.go:184] no items to output this cycle
I0319 21:39:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 21:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:39:33.409783  543705 memory.go:184] no items to output this cycle
I0319 21:39:33.409820  543705 cpu.go:275] no items to output this cycle
I0319 21:39:37.906036  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:39:37.906043  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:39:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:39:43.410744  543705 memory.go:191] Add success.
I0319 21:39:43.409796  543705 cpu.go:282] Add success.
I0319 21:39:43.420463  543705 net.go:648] Add success.
I0319 21:39:43.423375  543705 net.go:770] primary dev: ETH0
I0319 21:39:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:39:43.423415  543705 net.go:698] Add success.
I0319 21:39:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:39:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:39:53.409771  543705 memory.go:184] no items to output this cycle
I0319 21:39:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 21:40:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:40:03.409806  543705 memory.go:184] no items to output this cycle
I0319 21:40:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 21:40:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:40:13.409786  543705 memory.go:191] Add success.
W0319 21:40:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:40:13.409816  543705 cpu.go:282] Add success.
W0319 21:40:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:40:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:40:13.420401  543705 net.go:648] Add success.
I0319 21:40:13.423124  543705 net.go:770] primary dev: ETH0
I0319 21:40:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:40:13.423148  543705 net.go:698] Add success.
I0319 21:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:40:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:40:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0319 21:40:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:40:14.456494  543705 disk_worker.go:494] system disk:vda1
I0319 21:40:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:40:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:40:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:40:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:40:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:40:21.105672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:40:21.108183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:40:21.108189  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328980 0xc0003289c0]
E0319 21:40:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:40:23.409766  543705 memory.go:184] no items to output this cycle
I0319 21:40:23.409790  543705 cpu.go:275] no items to output this cycle
E0319 21:40:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:40:33.409822  543705 memory.go:184] no items to output this cycle
I0319 21:40:33.409832  543705 cpu.go:275] no items to output this cycle
E0319 21:40:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:40:43.409794  543705 memory.go:191] Add success.
I0319 21:40:43.409819  543705 cpu.go:282] Add success.
I0319 21:40:43.419897  543705 net.go:648] Add success.
I0319 21:40:43.422722  543705 net.go:770] primary dev: ETH0
I0319 21:40:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:40:43.422760  543705 net.go:698] Add success.
I0319 21:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:40:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:40:53.409775  543705 cpu.go:275] no items to output this cycle
E0319 21:40:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:40:53.409791  543705 memory.go:184] no items to output this cycle
E0319 21:41:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:41:03.409812  543705 memory.go:184] no items to output this cycle
I0319 21:41:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 21:41:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:41:13.409791  543705 memory.go:191] Add success.
I0319 21:41:13.409794  543705 cpu.go:282] Add success.
W0319 21:41:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:41:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:41:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:41:13.420043  543705 net.go:648] Add success.
I0319 21:41:13.423027  543705 net.go:770] primary dev: ETH0
I0319 21:41:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:41:13.423052  543705 net.go:698] Add success.
I0319 21:41:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:41:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:41:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 21:41:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:41:14.457051  543705 disk_worker.go:494] system disk:vda1
I0319 21:41:14.457080  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:41:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:41:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:41:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:41:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:41:21.109676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:41:21.112001  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:41:21.112007  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047c380 0xc00047c3c0]
E0319 21:41:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:41:23.409786  543705 memory.go:184] no items to output this cycle
I0319 21:41:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 21:41:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:41:33.409795  543705 memory.go:184] no items to output this cycle
I0319 21:41:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 21:41:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:41:43.409792  543705 memory.go:191] Add success.
I0319 21:41:43.409794  543705 cpu.go:282] Add success.
I0319 21:41:43.419851  543705 net.go:648] Add success.
I0319 21:41:43.422631  543705 net.go:770] primary dev: ETH0
I0319 21:41:43.422644  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:41:43.422657  543705 net.go:698] Add success.
I0319 21:41:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:41:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:41:53.410345  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:41:53.410366  543705 memory.go:184] no items to output this cycle
I0319 21:41:53.410375  543705 cpu.go:275] no items to output this cycle
E0319 21:42:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:42:03.409786  543705 memory.go:184] no items to output this cycle
I0319 21:42:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 21:42:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:42:13.409830  543705 memory.go:191] Add success.
I0319 21:42:13.409834  543705 cpu.go:282] Add success.
W0319 21:42:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:42:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:42:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:42:13.420113  543705 net.go:648] Add success.
I0319 21:42:13.423186  543705 net.go:770] primary dev: ETH0
I0319 21:42:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:42:13.423213  543705 net.go:698] Add success.
I0319 21:42:13.470055  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2d97d857-a847-4bbb-9fd2-3aed89f2a4ae","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:42:13.470087  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 21:42:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:42:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 21:42:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:42:14.457346  543705 disk_worker.go:494] system disk:vda1
E0319 21:42:14.457349  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:42:14.457362  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:42:14.457367  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:42:14.457513  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:42:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 21:42:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:42:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:42:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:42:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:42:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:42:16.472336  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:42:21.113688  543705 disk_info.go:125] begin check local disk info of client
I0319 21:42:21.116074  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:42:21.116080  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330780 0xc0003307c0]
E0319 21:42:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:42:23.409786  543705 memory.go:184] no items to output this cycle
I0319 21:42:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 21:42:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:42:33.409814  543705 memory.go:184] no items to output this cycle
I0319 21:42:33.409829  543705 cpu.go:275] no items to output this cycle
I0319 21:42:37.908236  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:42:37.908243  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:42:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:42:43.410787  543705 memory.go:191] Add success.
I0319 21:42:43.409802  543705 cpu.go:282] Add success.
I0319 21:42:43.420486  543705 net.go:648] Add success.
I0319 21:42:43.423339  543705 net.go:770] primary dev: ETH0
I0319 21:42:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:42:43.423366  543705 net.go:698] Add success.
I0319 21:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:42:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:42:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:42:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:42:53.409768  543705 memory.go:184] no items to output this cycle
I0319 21:42:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 21:43:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:43:03.409810  543705 memory.go:184] no items to output this cycle
I0319 21:43:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 21:43:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:43:13.409789  543705 memory.go:191] Add success.
I0319 21:43:13.409807  543705 cpu.go:282] Add success.
W0319 21:43:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:43:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:43:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:43:13.420138  543705 net.go:648] Add success.
I0319 21:43:13.423082  543705 net.go:770] primary dev: ETH0
I0319 21:43:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:43:13.423106  543705 net.go:698] Add success.
I0319 21:43:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:43:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:43:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 21:43:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:43:14.456496  543705 disk_worker.go:494] system disk:vda1
I0319 21:43:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:43:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:43:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:43:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:43:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:43:16.472430  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:43:21.117671  543705 disk_info.go:125] begin check local disk info of client
I0319 21:43:21.120123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:43:21.120132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369280 0xc0003692c0]
E0319 21:43:23.410371  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:43:23.410386  543705 memory.go:184] no items to output this cycle
I0319 21:43:23.410430  543705 cpu.go:275] no items to output this cycle
E0319 21:43:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:43:33.409790  543705 memory.go:184] no items to output this cycle
I0319 21:43:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 21:43:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:43:43.409826  543705 memory.go:191] Add success.
I0319 21:43:43.409831  543705 cpu.go:282] Add success.
I0319 21:43:43.419982  543705 net.go:648] Add success.
I0319 21:43:43.423043  543705 net.go:770] primary dev: ETH0
I0319 21:43:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:43:43.423080  543705 net.go:698] Add success.
I0319 21:43:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:43:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:43:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:43:53.409771  543705 memory.go:184] no items to output this cycle
I0319 21:43:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:44:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:44:03.409812  543705 memory.go:184] no items to output this cycle
I0319 21:44:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 21:44:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:44:13.409808  543705 memory.go:191] Add success.
I0319 21:44:13.409811  543705 cpu.go:282] Add success.
W0319 21:44:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:44:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:44:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:44:13.420182  543705 net.go:648] Add success.
I0319 21:44:13.422855  543705 net.go:770] primary dev: ETH0
I0319 21:44:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:44:13.422883  543705 net.go:698] Add success.
I0319 21:44:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:44:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:44:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 21:44:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:44:14.456574  543705 disk_worker.go:494] system disk:vda1
I0319 21:44:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:44:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:44:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:44:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:44:21.121677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:44:21.124087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:44:21.124093  543705 disk_info.go:196] parse disk info done, disk is : [0xc000231280 0xc0002312c0]
E0319 21:44:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:44:23.409804  543705 memory.go:184] no items to output this cycle
I0319 21:44:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 21:44:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:44:33.409795  543705 memory.go:184] no items to output this cycle
I0319 21:44:33.409916  543705 cpu.go:275] no items to output this cycle
E0319 21:44:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:44:43.409802  543705 memory.go:191] Add success.
I0319 21:44:43.409809  543705 cpu.go:282] Add success.
I0319 21:44:43.419897  543705 net.go:648] Add success.
I0319 21:44:43.422586  543705 net.go:770] primary dev: ETH0
I0319 21:44:43.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:44:43.422617  543705 net.go:698] Add success.
I0319 21:44:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:44:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:44:53.409789  543705 memory.go:184] no items to output this cycle
I0319 21:44:53.409793  543705 cpu.go:275] no items to output this cycle
E0319 21:45:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:45:03.409776  543705 memory.go:184] no items to output this cycle
I0319 21:45:03.409882  543705 cpu.go:275] no items to output this cycle
E0319 21:45:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:45:13.409826  543705 memory.go:191] Add success.
I0319 21:45:13.409832  543705 cpu.go:282] Add success.
W0319 21:45:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:45:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:45:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:45:13.420156  543705 net.go:648] Add success.
I0319 21:45:13.423102  543705 net.go:770] primary dev: ETH0
I0319 21:45:13.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:45:13.423127  543705 net.go:698] Add success.
I0319 21:45:13.468939  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c07c43c8-bfc1-429b-b071-0dd4dafac9b0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:45:13.468972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:45:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:45:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:45:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 21:45:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:45:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 21:45:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:45:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:45:21.125673  543705 disk_info.go:125] begin check local disk info of client
I0319 21:45:21.128125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:45:21.128131  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024eb00 0xc00024eb40]
E0319 21:45:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:45:23.409766  543705 memory.go:184] no items to output this cycle
I0319 21:45:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:45:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:45:33.409929  543705 memory.go:184] no items to output this cycle
I0319 21:45:33.409952  543705 cpu.go:275] no items to output this cycle
I0319 21:45:37.909736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:45:37.909742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:45:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:45:43.410659  543705 memory.go:191] Add success.
I0319 21:45:43.409815  543705 cpu.go:282] Add success.
I0319 21:45:43.420339  543705 net.go:648] Add success.
I0319 21:45:43.423094  543705 net.go:770] primary dev: ETH0
I0319 21:45:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:45:43.423118  543705 net.go:698] Add success.
I0319 21:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:45:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:45:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:45:53.409781  543705 memory.go:184] no items to output this cycle
I0319 21:45:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 21:46:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:46:03.409791  543705 cpu.go:275] no items to output this cycle
I0319 21:46:03.409801  543705 memory.go:184] no items to output this cycle
E0319 21:46:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:46:13.409789  543705 memory.go:191] Add success.
I0319 21:46:13.409793  543705 cpu.go:282] Add success.
W0319 21:46:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:46:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:46:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:46:13.420067  543705 net.go:648] Add success.
I0319 21:46:13.422858  543705 net.go:770] primary dev: ETH0
I0319 21:46:13.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:46:13.422884  543705 net.go:698] Add success.
I0319 21:46:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:46:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:46:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 21:46:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:46:14.456559  543705 disk_worker.go:494] system disk:vda1
I0319 21:46:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:46:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:46:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:46:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:46:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:46:21.129675  543705 disk_info.go:125] begin check local disk info of client
I0319 21:46:21.132157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:46:21.132163  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf040 0xc0003bf080]
E0319 21:46:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:46:23.409770  543705 memory.go:184] no items to output this cycle
I0319 21:46:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 21:46:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:46:33.409820  543705 memory.go:184] no items to output this cycle
I0319 21:46:33.409831  543705 cpu.go:275] no items to output this cycle
E0319 21:46:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:46:43.409794  543705 memory.go:191] Add success.
I0319 21:46:43.409812  543705 cpu.go:282] Add success.
I0319 21:46:43.419983  543705 net.go:648] Add success.
I0319 21:46:43.422628  543705 net.go:770] primary dev: ETH0
I0319 21:46:43.422640  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:46:43.422652  543705 net.go:698] Add success.
I0319 21:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:46:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:46:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:46:53.409782  543705 memory.go:184] no items to output this cycle
I0319 21:46:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 21:47:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:47:03.409805  543705 memory.go:184] no items to output this cycle
I0319 21:47:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 21:47:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:47:13.409811  543705 memory.go:191] Add success.
I0319 21:47:13.409820  543705 cpu.go:282] Add success.
W0319 21:47:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:47:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:47:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:47:13.420299  543705 net.go:648] Add success.
I0319 21:47:13.423254  543705 net.go:770] primary dev: ETH0
I0319 21:47:13.423269  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:47:13.423280  543705 net.go:698] Add success.
I0319 21:47:13.452792  543705 event_worker.go:152] Polling the log file for events...
W0319 21:47:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:47:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 21:47:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:47:14.456783  543705 disk_worker.go:494] system disk:vda1
I0319 21:47:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:47:14.456994  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:47:14.457003  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:47:14.457009  543705 custom_config.go:64] query custom config with name: gpu
E0319 21:47:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:47:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:47:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:47:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:47:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:47:16.458037  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:47:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:47:21.133672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:47:21.136067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:47:21.136073  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390c80 0xc000390cc0]
E0319 21:47:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:47:23.409786  543705 memory.go:184] no items to output this cycle
I0319 21:47:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 21:47:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:47:33.409792  543705 memory.go:184] no items to output this cycle
I0319 21:47:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 21:47:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:47:43.409788  543705 memory.go:191] Add success.
I0319 21:47:43.409818  543705 cpu.go:282] Add success.
I0319 21:47:43.419699  543705 net.go:770] primary dev: ETH0
I0319 21:47:43.419715  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:47:43.419730  543705 net.go:698] Add success.
I0319 21:47:43.420090  543705 net.go:648] Add success.
I0319 21:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:47:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:47:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:47:53.409770  543705 memory.go:184] no items to output this cycle
I0319 21:47:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 21:48:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:48:03.409812  543705 memory.go:184] no items to output this cycle
I0319 21:48:03.409824  543705 cpu.go:275] no items to output this cycle
E0319 21:48:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:48:13.409793  543705 memory.go:191] Add success.
I0319 21:48:13.409797  543705 cpu.go:282] Add success.
W0319 21:48:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:48:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:48:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:48:13.420035  543705 net.go:648] Add success.
I0319 21:48:13.422713  543705 net.go:770] primary dev: ETH0
I0319 21:48:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:48:13.422739  543705 net.go:698] Add success.
I0319 21:48:13.854475  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7dc12a95-8b72-4c88-8c68-620246dffaab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:48:13.854513  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:48:14.453981  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:48:14.454238  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:48:14.454248  543705 disk_worker.go:708] disk space is not compliant
W0319 21:48:14.454250  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:48:14.455794  543705 disk_worker.go:494] system disk:vda1
I0319 21:48:14.455825  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:48:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:48:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:48:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:48:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:48:21.137674  543705 disk_info.go:125] begin check local disk info of client
I0319 21:48:21.140124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:48:21.140130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e15c0 0xc0003e1600]
E0319 21:48:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:48:23.409791  543705 memory.go:184] no items to output this cycle
I0319 21:48:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 21:48:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:48:33.409796  543705 memory.go:184] no items to output this cycle
I0319 21:48:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 21:48:37.912255  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:48:37.912262  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:48:43.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:48:43.409987  543705 cpu.go:282] Add success.
I0319 21:48:43.410701  543705 memory.go:191] Add success.
I0319 21:48:43.419709  543705 net.go:648] Add success.
I0319 21:48:43.422459  543705 net.go:770] primary dev: ETH0
I0319 21:48:43.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:48:43.422483  543705 net.go:698] Add success.
I0319 21:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:48:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:48:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:48:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:48:53.409777  543705 memory.go:184] no items to output this cycle
I0319 21:48:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 21:49:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:49:03.409788  543705 memory.go:184] no items to output this cycle
I0319 21:49:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 21:49:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:49:13.409790  543705 memory.go:191] Add success.
I0319 21:49:13.409790  543705 cpu.go:282] Add success.
W0319 21:49:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:49:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:49:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:49:13.420208  543705 net.go:648] Add success.
I0319 21:49:13.423284  543705 net.go:770] primary dev: ETH0
I0319 21:49:13.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:49:13.423309  543705 net.go:698] Add success.
I0319 21:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:49:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:49:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0319 21:49:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:49:14.456565  543705 disk_worker.go:494] system disk:vda1
I0319 21:49:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:49:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:49:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:49:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:49:21.141690  543705 disk_info.go:125] begin check local disk info of client
I0319 21:49:21.144090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:49:21.144096  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252580 0xc0002525c0]
E0319 21:49:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:49:23.409787  543705 memory.go:184] no items to output this cycle
I0319 21:49:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 21:49:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:49:33.409787  543705 memory.go:184] no items to output this cycle
I0319 21:49:33.409817  543705 cpu.go:275] no items to output this cycle
E0319 21:49:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:49:43.409795  543705 memory.go:191] Add success.
I0319 21:49:43.409798  543705 cpu.go:282] Add success.
I0319 21:49:43.420325  543705 net.go:648] Add success.
I0319 21:49:43.423291  543705 net.go:770] primary dev: ETH0
I0319 21:49:43.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:49:43.423323  543705 net.go:698] Add success.
I0319 21:49:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:49:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:49:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:49:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:49:53.409763  543705 memory.go:184] no items to output this cycle
I0319 21:49:53.409796  543705 cpu.go:275] no items to output this cycle
E0319 21:50:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:50:03.409786  543705 memory.go:184] no items to output this cycle
I0319 21:50:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 21:50:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:50:13.409793  543705 memory.go:191] Add success.
I0319 21:50:13.409794  543705 cpu.go:282] Add success.
W0319 21:50:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:50:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:50:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:50:13.420146  543705 net.go:648] Add success.
I0319 21:50:13.422918  543705 net.go:770] primary dev: ETH0
I0319 21:50:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:50:13.422947  543705 net.go:698] Add success.
I0319 21:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:50:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:50:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 21:50:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:50:14.456499  543705 disk_worker.go:494] system disk:vda1
I0319 21:50:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:50:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:50:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:50:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:50:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:50:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:50:21.145672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:50:21.148053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:50:21.148059  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1b00 0xc0003e1bc0]
E0319 21:50:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:50:23.409786  543705 memory.go:184] no items to output this cycle
I0319 21:50:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 21:50:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:50:33.409827  543705 memory.go:184] no items to output this cycle
I0319 21:50:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 21:50:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:50:43.409778  543705 memory.go:191] Add success.
I0319 21:50:43.409820  543705 cpu.go:282] Add success.
I0319 21:50:43.420094  543705 net.go:648] Add success.
I0319 21:50:43.422878  543705 net.go:770] primary dev: ETH0
I0319 21:50:43.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:50:43.422903  543705 net.go:698] Add success.
I0319 21:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:50:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:50:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:50:53.409770  543705 memory.go:184] no items to output this cycle
I0319 21:50:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 21:51:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:51:03.409804  543705 memory.go:184] no items to output this cycle
I0319 21:51:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 21:51:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:51:13.409779  543705 memory.go:191] Add success.
W0319 21:51:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:51:13.409811  543705 cpu.go:282] Add success.
W0319 21:51:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:51:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:51:13.420060  543705 net.go:648] Add success.
I0319 21:51:13.422893  543705 net.go:770] primary dev: ETH0
I0319 21:51:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:51:13.422925  543705 net.go:698] Add success.
I0319 21:51:13.580373  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0f8a65e0-32b6-4f03-8525-96aaf2e36808","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:51:13.580408  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:51:14.453984  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:51:14.454233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:51:14.454243  543705 disk_worker.go:708] disk space is not compliant
W0319 21:51:14.454245  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:51:14.455759  543705 disk_worker.go:494] system disk:vda1
I0319 21:51:14.455796  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:51:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:51:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:51:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:51:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:51:21.149674  543705 disk_info.go:125] begin check local disk info of client
I0319 21:51:21.152139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:51:21.152145  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b240 0xc00048b280]
E0319 21:51:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:51:23.409760  543705 memory.go:184] no items to output this cycle
I0319 21:51:23.409798  543705 cpu.go:275] no items to output this cycle
I0319 21:51:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 21:51:33.409822  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:51:33.409845  543705 memory.go:184] no items to output this cycle
I0319 21:51:37.913748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:51:37.913756  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:51:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:51:43.410682  543705 memory.go:191] Add success.
I0319 21:51:43.409836  543705 cpu.go:282] Add success.
I0319 21:51:43.420393  543705 net.go:648] Add success.
I0319 21:51:43.423684  543705 net.go:770] primary dev: ETH0
I0319 21:51:43.423699  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:51:43.423714  543705 net.go:698] Add success.
I0319 21:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:51:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:51:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:51:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:51:53.409773  543705 memory.go:184] no items to output this cycle
I0319 21:51:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 21:52:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:52:03.409779  543705 memory.go:184] no items to output this cycle
I0319 21:52:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 21:52:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:52:13.409782  543705 memory.go:191] Add success.
W0319 21:52:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:52:13.409809  543705 cpu.go:282] Add success.
W0319 21:52:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:52:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:52:13.420064  543705 net.go:648] Add success.
I0319 21:52:13.422740  543705 net.go:770] primary dev: ETH0
I0319 21:52:13.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:52:13.422766  543705 net.go:698] Add success.
W0319 21:52:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:52:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 21:52:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:52:14.456799  543705 disk_worker.go:494] system disk:vda1
I0319 21:52:14.456837  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:52:14.457106  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:52:14.457114  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:52:14.457118  543705 custom_config.go:64] query custom config with name: gpu
E0319 21:52:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:52:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:52:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:52:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:52:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:52:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:52:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:52:21.153678  543705 disk_info.go:125] begin check local disk info of client
I0319 21:52:21.156104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:52:21.156110  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252ac0 0xc000252b00]
E0319 21:52:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:52:23.409796  543705 memory.go:184] no items to output this cycle
I0319 21:52:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 21:52:33.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:52:33.409917  543705 memory.go:184] no items to output this cycle
I0319 21:52:33.410098  543705 cpu.go:275] no items to output this cycle
E0319 21:52:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:52:43.409806  543705 cpu.go:282] Add success.
I0319 21:52:43.409811  543705 memory.go:191] Add success.
I0319 21:52:43.419881  543705 net.go:648] Add success.
I0319 21:52:43.423781  543705 net.go:770] primary dev: ETH0
I0319 21:52:43.423800  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:52:43.423814  543705 net.go:698] Add success.
I0319 21:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:52:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:52:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:52:53.409765  543705 memory.go:184] no items to output this cycle
I0319 21:52:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 21:53:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:53:03.409813  543705 memory.go:184] no items to output this cycle
I0319 21:53:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 21:53:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:53:13.409816  543705 memory.go:191] Add success.
I0319 21:53:13.409822  543705 cpu.go:282] Add success.
W0319 21:53:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:53:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:53:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:53:13.420062  543705 net.go:648] Add success.
I0319 21:53:13.422893  543705 net.go:770] primary dev: ETH0
I0319 21:53:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:53:13.422922  543705 net.go:698] Add success.
I0319 21:53:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:53:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:53:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 21:53:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:53:14.456587  543705 disk_worker.go:494] system disk:vda1
I0319 21:53:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:53:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:53:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:53:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:53:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:53:21.157677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:53:21.160117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:53:21.160123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab880 0xc0003ab8c0]
E0319 21:53:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:53:23.409794  543705 memory.go:184] no items to output this cycle
I0319 21:53:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 21:53:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:53:33.409824  543705 memory.go:184] no items to output this cycle
I0319 21:53:33.409830  543705 cpu.go:275] no items to output this cycle
E0319 21:53:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:53:43.409785  543705 memory.go:191] Add success.
I0319 21:53:43.409804  543705 cpu.go:282] Add success.
I0319 21:53:43.419901  543705 net.go:648] Add success.
I0319 21:53:43.422676  543705 net.go:770] primary dev: ETH0
I0319 21:53:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:53:43.422703  543705 net.go:698] Add success.
I0319 21:53:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:53:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:53:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:53:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:53:53.409803  543705 memory.go:184] no items to output this cycle
I0319 21:53:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 21:54:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:54:03.409787  543705 memory.go:184] no items to output this cycle
I0319 21:54:03.409828  543705 cpu.go:275] no items to output this cycle
E0319 21:54:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:54:13.409821  543705 memory.go:191] Add success.
I0319 21:54:13.409826  543705 cpu.go:282] Add success.
W0319 21:54:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:54:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:54:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:54:13.420418  543705 net.go:648] Add success.
I0319 21:54:13.423386  543705 net.go:770] primary dev: ETH0
I0319 21:54:13.423398  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:54:13.423411  543705 net.go:698] Add success.
I0319 21:54:13.468893  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"28746f54-5691-41cf-830b-ebf8519766c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:54:13.468928  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 21:54:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:54:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:54:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 21:54:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:54:14.456605  543705 disk_worker.go:494] system disk:vda1
I0319 21:54:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:54:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:54:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:54:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:54:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:54:16.472110  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:54:21.161680  543705 disk_info.go:125] begin check local disk info of client
I0319 21:54:21.164136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:54:21.164143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e2080 0xc0001e20c0]
E0319 21:54:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:54:23.409804  543705 memory.go:184] no items to output this cycle
I0319 21:54:23.409816  543705 cpu.go:275] no items to output this cycle
E0319 21:54:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:54:33.409795  543705 memory.go:184] no items to output this cycle
I0319 21:54:33.409857  543705 cpu.go:275] no items to output this cycle
I0319 21:54:37.916271  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:54:37.916279  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:54:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:54:43.410941  543705 memory.go:191] Add success.
I0319 21:54:43.409837  543705 cpu.go:282] Add success.
I0319 21:54:43.420658  543705 net.go:648] Add success.
I0319 21:54:43.423273  543705 net.go:770] primary dev: ETH0
I0319 21:54:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:54:43.423302  543705 net.go:698] Add success.
I0319 21:54:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:54:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:54:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:54:53.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:54:53.410266  543705 memory.go:184] no items to output this cycle
I0319 21:54:53.410275  543705 cpu.go:275] no items to output this cycle
E0319 21:55:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:55:03.409786  543705 memory.go:184] no items to output this cycle
I0319 21:55:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 21:55:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:55:13.409793  543705 memory.go:191] Add success.
I0319 21:55:13.409815  543705 cpu.go:282] Add success.
W0319 21:55:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:55:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:55:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:55:13.420256  543705 net.go:648] Add success.
I0319 21:55:13.423052  543705 net.go:770] primary dev: ETH0
I0319 21:55:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:55:13.423078  543705 net.go:698] Add success.
I0319 21:55:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:55:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:55:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 21:55:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:55:14.456581  543705 disk_worker.go:494] system disk:vda1
I0319 21:55:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:55:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:55:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:55:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:55:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:55:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:55:21.165676  543705 disk_info.go:125] begin check local disk info of client
I0319 21:55:21.168147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:55:21.168153  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048e440 0xc00048e480]
E0319 21:55:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:55:23.409808  543705 memory.go:184] no items to output this cycle
I0319 21:55:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 21:55:33.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:55:33.409833  543705 memory.go:184] no items to output this cycle
I0319 21:55:33.409873  543705 cpu.go:275] no items to output this cycle
E0319 21:55:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:55:43.409788  543705 memory.go:191] Add success.
I0319 21:55:43.409832  543705 cpu.go:282] Add success.
I0319 21:55:43.419721  543705 net.go:770] primary dev: ETH0
I0319 21:55:43.419737  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:55:43.419752  543705 net.go:698] Add success.
I0319 21:55:43.420122  543705 net.go:648] Add success.
I0319 21:55:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:55:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:55:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:55:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:55:53.409808  543705 memory.go:184] no items to output this cycle
I0319 21:55:53.409824  543705 cpu.go:275] no items to output this cycle
E0319 21:56:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:56:03.409787  543705 memory.go:184] no items to output this cycle
I0319 21:56:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 21:56:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:56:13.409784  543705 memory.go:191] Add success.
W0319 21:56:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 21:56:13.409813  543705 cpu.go:282] Add success.
W0319 21:56:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:56:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:56:13.420123  543705 net.go:648] Add success.
I0319 21:56:13.422905  543705 net.go:770] primary dev: ETH0
I0319 21:56:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:56:13.422930  543705 net.go:698] Add success.
I0319 21:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:56:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:56:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 21:56:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:56:14.456574  543705 disk_worker.go:494] system disk:vda1
I0319 21:56:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:56:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:56:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:56:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:56:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:56:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:56:21.169672  543705 disk_info.go:125] begin check local disk info of client
I0319 21:56:21.172255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:56:21.172260  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344100 0xc000344140]
E0319 21:56:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:56:23.409776  543705 cpu.go:275] no items to output this cycle
I0319 21:56:23.409785  543705 memory.go:184] no items to output this cycle
E0319 21:56:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:56:33.409796  543705 memory.go:184] no items to output this cycle
I0319 21:56:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 21:56:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:56:43.409800  543705 memory.go:191] Add success.
I0319 21:56:43.409819  543705 cpu.go:282] Add success.
I0319 21:56:43.419897  543705 net.go:648] Add success.
I0319 21:56:43.422606  543705 net.go:770] primary dev: ETH0
I0319 21:56:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:56:43.422630  543705 net.go:698] Add success.
I0319 21:56:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:56:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:56:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:56:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:56:53.410411  543705 memory.go:184] no items to output this cycle
I0319 21:56:53.410416  543705 cpu.go:275] no items to output this cycle
E0319 21:57:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:57:03.409809  543705 memory.go:184] no items to output this cycle
I0319 21:57:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 21:57:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:57:13.409784  543705 memory.go:191] Add success.
I0319 21:57:13.409804  543705 cpu.go:282] Add success.
W0319 21:57:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:57:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:57:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:57:13.420101  543705 net.go:648] Add success.
I0319 21:57:13.429092  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 21:57:13.429167  543705 net.go:770] primary dev: ETH0
I0319 21:57:13.429180  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:57:13.429191  543705 net.go:698] Add success.
I0319 21:57:13.453726  543705 event_worker.go:152] Polling the log file for events...
I0319 21:57:13.469732  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e36cc32-5090-4630-a6fc-2ddd725a7f96","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 21:57:13.469780  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 21:57:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:57:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 21:57:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0319 21:57:14.456150  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 21:57:14.456160  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 21:57:14.456165  543705 custom_config.go:64] query custom config with name: gpu
I0319 21:57:14.456447  543705 disk_worker.go:494] system disk:vda1
I0319 21:57:14.456477  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 21:57:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 21:57:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:57:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 21:57:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 21:57:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:57:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:57:16.472342  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:57:21.173682  543705 disk_info.go:125] begin check local disk info of client
I0319 21:57:21.176085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:57:21.176092  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001cc000 0xc0001cc040]
E0319 21:57:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:57:23.409766  543705 memory.go:184] no items to output this cycle
I0319 21:57:23.409789  543705 cpu.go:275] no items to output this cycle
E0319 21:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:57:33.409799  543705 memory.go:184] no items to output this cycle
I0319 21:57:33.409812  543705 cpu.go:275] no items to output this cycle
I0319 21:57:37.917747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 21:57:37.917755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 21:57:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:57:43.410767  543705 memory.go:191] Add success.
I0319 21:57:43.409808  543705 cpu.go:282] Add success.
I0319 21:57:43.420688  543705 net.go:648] Add success.
I0319 21:57:43.423437  543705 net.go:770] primary dev: ETH0
I0319 21:57:43.423452  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:57:43.423467  543705 net.go:698] Add success.
I0319 21:57:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:57:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:57:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:57:53.409798  543705 memory.go:184] no items to output this cycle
I0319 21:57:53.409810  543705 cpu.go:275] no items to output this cycle
E0319 21:58:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:58:03.409785  543705 cpu.go:275] no items to output this cycle
I0319 21:58:03.409795  543705 memory.go:184] no items to output this cycle
E0319 21:58:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:58:13.409811  543705 memory.go:191] Add success.
I0319 21:58:13.409815  543705 cpu.go:282] Add success.
W0319 21:58:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:58:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:58:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:58:13.420123  543705 net.go:648] Add success.
I0319 21:58:13.422961  543705 net.go:770] primary dev: ETH0
I0319 21:58:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:58:13.422989  543705 net.go:698] Add success.
I0319 21:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:58:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:58:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0319 21:58:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:58:14.456592  543705 disk_worker.go:494] system disk:vda1
I0319 21:58:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:58:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:58:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:58:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:58:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:58:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:58:21.177677  543705 disk_info.go:125] begin check local disk info of client
I0319 21:58:21.180123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:58:21.180129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c22c0 0xc0004c2300]
E0319 21:58:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:58:23.409764  543705 memory.go:184] no items to output this cycle
I0319 21:58:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 21:58:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:58:33.409810  543705 memory.go:184] no items to output this cycle
I0319 21:58:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 21:58:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:58:43.409799  543705 memory.go:191] Add success.
I0319 21:58:43.409825  543705 cpu.go:282] Add success.
I0319 21:58:43.419870  543705 net.go:648] Add success.
I0319 21:58:43.422726  543705 net.go:770] primary dev: ETH0
I0319 21:58:43.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:58:43.422751  543705 net.go:698] Add success.
I0319 21:58:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:58:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:58:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:58:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:58:53.409789  543705 cpu.go:275] no items to output this cycle
I0319 21:58:53.409796  543705 memory.go:184] no items to output this cycle
E0319 21:59:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:59:03.409789  543705 memory.go:184] no items to output this cycle
I0319 21:59:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 21:59:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:59:13.409803  543705 cpu.go:282] Add success.
I0319 21:59:13.409806  543705 memory.go:191] Add success.
W0319 21:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 21:59:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 21:59:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 21:59:13.420063  543705 net.go:648] Add success.
I0319 21:59:13.423099  543705 net.go:770] primary dev: ETH0
I0319 21:59:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:59:13.423125  543705 net.go:698] Add success.
I0319 21:59:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 21:59:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 21:59:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 21:59:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0319 21:59:14.456570  543705 disk_worker.go:494] system disk:vda1
I0319 21:59:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 21:59:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 21:59:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:59:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:59:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 21:59:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0319 21:59:21.181675  543705 disk_info.go:125] begin check local disk info of client
I0319 21:59:21.184147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 21:59:21.184153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2080 0xc0002b20c0]
E0319 21:59:23.410670  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:59:23.410684  543705 memory.go:184] no items to output this cycle
I0319 21:59:23.410688  543705 cpu.go:275] no items to output this cycle
E0319 21:59:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:59:33.409785  543705 memory.go:184] no items to output this cycle
I0319 21:59:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 21:59:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:59:43.409834  543705 memory.go:191] Add success.
I0319 21:59:43.409835  543705 cpu.go:282] Add success.
I0319 21:59:43.420377  543705 net.go:648] Add success.
I0319 21:59:43.423197  543705 net.go:770] primary dev: ETH0
I0319 21:59:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0319 21:59:43.423223  543705 net.go:698] Add success.
I0319 21:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 21:59:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 21:59:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 21:59:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 21:59:53.409809  543705 memory.go:184] no items to output this cycle
I0319 21:59:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 22:00:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:00:03.409802  543705 memory.go:184] no items to output this cycle
I0319 22:00:03.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:00:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:00:13.409793  543705 memory.go:191] Add success.
I0319 22:00:13.409792  543705 cpu.go:282] Add success.
W0319 22:00:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:00:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:00:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:00:13.420123  543705 net.go:648] Add success.
I0319 22:00:13.422900  543705 net.go:770] primary dev: ETH0
I0319 22:00:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:00:13.422925  543705 net.go:698] Add success.
I0319 22:00:13.468966  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f70ce4cf-7bbe-4ed4-af88-2cd2848c57e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:00:13.468999  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:00:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:00:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:00:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 22:00:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:00:14.456479  543705 disk_worker.go:494] system disk:vda1
I0319 22:00:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:00:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:00:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:00:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:00:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:00:21.185666  543705 disk_info.go:125] begin check local disk info of client
I0319 22:00:21.188093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:00:21.188100  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e78c0 0xc0003e7900]
E0319 22:00:23.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:00:23.409870  543705 memory.go:184] no items to output this cycle
I0319 22:00:23.409940  543705 cpu.go:275] no items to output this cycle
E0319 22:00:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:00:33.409780  543705 memory.go:184] no items to output this cycle
I0319 22:00:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 22:00:37.917911  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:00:37.917919  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:00:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:00:43.410638  543705 memory.go:191] Add success.
I0319 22:00:43.409811  543705 cpu.go:282] Add success.
I0319 22:00:43.420336  543705 net.go:648] Add success.
I0319 22:00:43.422903  543705 net.go:770] primary dev: ETH0
I0319 22:00:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:00:43.422929  543705 net.go:698] Add success.
I0319 22:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:00:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:00:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:00:53.409791  543705 memory.go:184] no items to output this cycle
I0319 22:00:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:01:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:01:03.409789  543705 memory.go:184] no items to output this cycle
I0319 22:01:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:01:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:01:13.409783  543705 memory.go:191] Add success.
I0319 22:01:13.409806  543705 cpu.go:282] Add success.
W0319 22:01:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:01:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:01:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:01:13.420203  543705 net.go:648] Add success.
I0319 22:01:13.423054  543705 net.go:770] primary dev: ETH0
I0319 22:01:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:01:13.423080  543705 net.go:698] Add success.
I0319 22:01:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:01:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:01:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 22:01:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:01:14.456514  543705 disk_worker.go:494] system disk:vda1
I0319 22:01:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:01:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:01:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:01:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:01:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:01:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:01:21.189671  543705 disk_info.go:125] begin check local disk info of client
I0319 22:01:21.192105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:01:21.192111  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004641c0 0xc000464200]
E0319 22:01:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:01:23.409789  543705 memory.go:184] no items to output this cycle
I0319 22:01:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:01:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:01:33.409785  543705 memory.go:184] no items to output this cycle
I0319 22:01:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:01:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:01:43.409805  543705 memory.go:191] Add success.
I0319 22:01:43.409806  543705 cpu.go:282] Add success.
I0319 22:01:43.420013  543705 net.go:648] Add success.
I0319 22:01:43.422611  543705 net.go:770] primary dev: ETH0
I0319 22:01:43.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:01:43.422637  543705 net.go:698] Add success.
I0319 22:01:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:01:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:01:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:01:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:01:53.410409  543705 memory.go:184] no items to output this cycle
I0319 22:01:53.410423  543705 cpu.go:275] no items to output this cycle
E0319 22:02:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:02:03.409801  543705 memory.go:184] no items to output this cycle
I0319 22:02:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 22:02:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:02:13.409783  543705 memory.go:191] Add success.
W0319 22:02:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:02:13.409810  543705 cpu.go:282] Add success.
W0319 22:02:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:02:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:02:13.420071  543705 net.go:648] Add success.
I0319 22:02:13.422940  543705 net.go:770] primary dev: ETH0
I0319 22:02:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:02:13.422965  543705 net.go:698] Add success.
W0319 22:02:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:02:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 22:02:14.455204  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:02:14.456816  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:02:14.456825  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:02:14.456830  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:02:14.456874  543705 disk_worker.go:494] system disk:vda1
I0319 22:02:14.456915  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:02:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:02:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:02:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:02:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:02:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:02:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:02:16.472327  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:02:21.193673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:02:21.196061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:02:21.196068  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305440 0xc000305480]
E0319 22:02:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:02:23.409789  543705 memory.go:184] no items to output this cycle
I0319 22:02:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:02:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:02:33.409811  543705 memory.go:184] no items to output this cycle
I0319 22:02:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 22:02:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:02:43.409796  543705 memory.go:191] Add success.
I0319 22:02:43.409823  543705 cpu.go:282] Add success.
I0319 22:02:43.419791  543705 net.go:770] primary dev: ETH0
I0319 22:02:43.419803  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:02:43.419816  543705 net.go:698] Add success.
I0319 22:02:43.420063  543705 net.go:648] Add success.
I0319 22:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:02:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:02:53.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:02:53.410258  543705 memory.go:184] no items to output this cycle
I0319 22:02:53.410284  543705 cpu.go:275] no items to output this cycle
E0319 22:03:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:03:03.409790  543705 memory.go:184] no items to output this cycle
I0319 22:03:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 22:03:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:03:13.409772  543705 memory.go:191] Add success.
W0319 22:03:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:03:13.409804  543705 cpu.go:282] Add success.
W0319 22:03:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:03:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:03:13.420157  543705 net.go:648] Add success.
I0319 22:03:13.423095  543705 net.go:770] primary dev: ETH0
I0319 22:03:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:03:13.423135  543705 net.go:698] Add success.
I0319 22:03:13.547115  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0fea969e-bada-4e2f-830f-085d2ec679c4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:03:13.547148  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:03:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:03:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0319 22:03:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:03:14.456614  543705 disk_worker.go:494] system disk:vda1
I0319 22:03:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:03:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:03:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:03:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:03:21.197676  543705 disk_info.go:125] begin check local disk info of client
I0319 22:03:21.200191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:03:21.200197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e1240 0xc0000e1280]
E0319 22:03:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:03:23.409777  543705 memory.go:184] no items to output this cycle
I0319 22:03:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 22:03:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:03:33.409810  543705 memory.go:184] no items to output this cycle
I0319 22:03:33.409821  543705 cpu.go:275] no items to output this cycle
I0319 22:03:37.918063  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:03:37.918071  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:03:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:03:43.410580  543705 memory.go:191] Add success.
I0319 22:03:43.409806  543705 cpu.go:282] Add success.
I0319 22:03:43.420341  543705 net.go:648] Add success.
I0319 22:03:43.423258  543705 net.go:770] primary dev: ETH0
I0319 22:03:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:03:43.423284  543705 net.go:698] Add success.
I0319 22:03:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:03:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:03:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:03:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:03:53.409780  543705 memory.go:184] no items to output this cycle
I0319 22:03:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 22:04:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:04:03.409791  543705 memory.go:184] no items to output this cycle
I0319 22:04:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 22:04:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:04:13.409786  543705 memory.go:191] Add success.
I0319 22:04:13.409806  543705 cpu.go:282] Add success.
W0319 22:04:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:04:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:04:13.420099  543705 net.go:648] Add success.
I0319 22:04:13.422644  543705 net.go:770] primary dev: ETH0
I0319 22:04:13.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:04:13.422669  543705 net.go:698] Add success.
I0319 22:04:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:04:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:04:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0319 22:04:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:04:14.456482  543705 disk_worker.go:494] system disk:vda1
I0319 22:04:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:04:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:04:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:04:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:04:16.472422  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:04:21.201673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:04:21.204137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:04:21.204144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003900c0 0xc000390100]
E0319 22:04:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:04:23.409761  543705 memory.go:184] no items to output this cycle
I0319 22:04:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 22:04:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:04:33.409896  543705 memory.go:184] no items to output this cycle
I0319 22:04:33.409979  543705 cpu.go:275] no items to output this cycle
E0319 22:04:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:04:43.409788  543705 memory.go:191] Add success.
I0319 22:04:43.409823  543705 cpu.go:282] Add success.
I0319 22:04:43.419894  543705 net.go:648] Add success.
I0319 22:04:43.422461  543705 net.go:770] primary dev: ETH0
I0319 22:04:43.422475  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:04:43.422487  543705 net.go:698] Add success.
I0319 22:04:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:04:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:04:53.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:04:53.410383  543705 memory.go:184] no items to output this cycle
I0319 22:04:53.410394  543705 cpu.go:275] no items to output this cycle
E0319 22:05:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:05:03.409789  543705 memory.go:184] no items to output this cycle
I0319 22:05:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 22:05:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:05:13.409786  543705 memory.go:191] Add success.
I0319 22:05:13.409788  543705 cpu.go:282] Add success.
W0319 22:05:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:05:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:05:13.420189  543705 net.go:648] Add success.
I0319 22:05:13.423077  543705 net.go:770] primary dev: ETH0
I0319 22:05:13.423090  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:05:13.423103  543705 net.go:698] Add success.
I0319 22:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:05:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:05:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 22:05:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:05:14.456496  543705 disk_worker.go:494] system disk:vda1
I0319 22:05:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:05:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:05:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:05:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:05:21.205725  543705 disk_info.go:125] begin check local disk info of client
I0319 22:05:21.208214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:05:21.208221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000291840 0xc000291880]
E0319 22:05:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:05:23.409794  543705 memory.go:184] no items to output this cycle
I0319 22:05:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 22:05:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:05:33.409782  543705 memory.go:184] no items to output this cycle
I0319 22:05:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 22:05:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:05:43.409827  543705 memory.go:191] Add success.
I0319 22:05:43.409835  543705 cpu.go:282] Add success.
I0319 22:05:43.419996  543705 net.go:648] Add success.
I0319 22:05:43.422567  543705 net.go:770] primary dev: ETH0
I0319 22:05:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:05:43.422593  543705 net.go:698] Add success.
I0319 22:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:05:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:05:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:05:53.409767  543705 memory.go:184] no items to output this cycle
I0319 22:05:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:06:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:06:03.409804  543705 memory.go:184] no items to output this cycle
I0319 22:06:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 22:06:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:06:13.409793  543705 memory.go:191] Add success.
I0319 22:06:13.409795  543705 cpu.go:282] Add success.
W0319 22:06:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:06:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:06:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:06:13.420236  543705 net.go:648] Add success.
I0319 22:06:13.423111  543705 net.go:770] primary dev: ETH0
I0319 22:06:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:06:13.423136  543705 net.go:698] Add success.
I0319 22:06:13.475982  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"395c5114-4cb1-4fd6-b5e0-75054afe29c2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:06:13.476015  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:06:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:06:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:06:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 22:06:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:06:14.456532  543705 disk_worker.go:494] system disk:vda1
I0319 22:06:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:06:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:06:16.457570  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:06:16.457635  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:06:16.457685  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:06:16.473026  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:06:21.209675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:06:21.212147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:06:21.212153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002462c0 0xc000246300]
E0319 22:06:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:06:23.409765  543705 memory.go:184] no items to output this cycle
I0319 22:06:23.409774  543705 cpu.go:275] no items to output this cycle
E0319 22:06:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:06:33.409765  543705 memory.go:184] no items to output this cycle
I0319 22:06:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 22:06:37.920303  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:06:37.920311  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:06:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:06:43.410730  543705 memory.go:191] Add success.
I0319 22:06:43.409814  543705 cpu.go:282] Add success.
I0319 22:06:43.420441  543705 net.go:648] Add success.
I0319 22:06:43.423117  543705 net.go:770] primary dev: ETH0
I0319 22:06:43.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:06:43.423143  543705 net.go:698] Add success.
I0319 22:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:06:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:06:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:06:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:06:53.409769  543705 memory.go:184] no items to output this cycle
I0319 22:06:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 22:07:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:07:03.409789  543705 memory.go:184] no items to output this cycle
I0319 22:07:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 22:07:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:07:13.409780  543705 memory.go:191] Add success.
W0319 22:07:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:07:13.409810  543705 cpu.go:282] Add success.
W0319 22:07:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:07:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:07:13.420070  543705 net.go:648] Add success.
I0319 22:07:13.422595  543705 net.go:770] primary dev: ETH0
I0319 22:07:13.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:07:13.422620  543705 net.go:698] Add success.
I0319 22:07:13.453169  543705 event_worker.go:152] Polling the log file for events...
W0319 22:07:14.455458  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:07:14.455472  543705 disk_worker.go:708] disk space is not compliant
W0319 22:07:14.455476  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:07:14.456845  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:07:14.456854  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:07:14.456860  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:07:14.457725  543705 disk_worker.go:494] system disk:vda1
I0319 22:07:14.457764  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:07:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:07:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:07:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:07:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:07:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:07:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:07:16.472331  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:07:21.213674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:07:21.216047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:07:21.216052  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480300 0xc000480340]
E0319 22:07:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:07:23.409762  543705 memory.go:184] no items to output this cycle
I0319 22:07:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:07:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:07:33.409795  543705 memory.go:184] no items to output this cycle
I0319 22:07:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 22:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:07:43.409794  543705 memory.go:191] Add success.
I0319 22:07:43.409810  543705 cpu.go:282] Add success.
I0319 22:07:43.419897  543705 net.go:648] Add success.
I0319 22:07:43.422843  543705 net.go:770] primary dev: ETH0
I0319 22:07:43.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:07:43.422870  543705 net.go:698] Add success.
I0319 22:07:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:07:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:07:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:07:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:07:53.409801  543705 memory.go:184] no items to output this cycle
I0319 22:07:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 22:08:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:08:03.409784  543705 memory.go:184] no items to output this cycle
I0319 22:08:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 22:08:13.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:08:13.409882  543705 memory.go:191] Add success.
W0319 22:08:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:08:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:08:13.409930  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:08:13.409958  543705 cpu.go:282] Add success.
I0319 22:08:13.419758  543705 net.go:648] Add success.
I0319 22:08:13.422368  543705 net.go:770] primary dev: ETH0
I0319 22:08:13.422382  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:08:13.422397  543705 net.go:698] Add success.
I0319 22:08:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:08:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:08:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0319 22:08:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:08:14.456469  543705 disk_worker.go:494] system disk:vda1
I0319 22:08:14.456511  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:08:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:08:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:08:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:08:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:08:21.217675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:08:21.220095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:08:21.220101  543705 disk_info.go:196] parse disk info done, disk is : [0xc00060b900 0xc00060b940]
E0319 22:08:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:08:23.409804  543705 memory.go:184] no items to output this cycle
I0319 22:08:23.409820  543705 cpu.go:275] no items to output this cycle
E0319 22:08:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:08:33.409768  543705 memory.go:184] no items to output this cycle
I0319 22:08:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:08:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:08:43.409799  543705 memory.go:191] Add success.
I0319 22:08:43.409817  543705 cpu.go:282] Add success.
I0319 22:08:43.419877  543705 net.go:648] Add success.
I0319 22:08:43.423031  543705 net.go:770] primary dev: ETH0
I0319 22:08:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:08:43.423229  543705 net.go:698] Add success.
I0319 22:08:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:08:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:08:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:08:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:08:53.409786  543705 memory.go:184] no items to output this cycle
I0319 22:08:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:09:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:09:03.409813  543705 memory.go:184] no items to output this cycle
I0319 22:09:03.409825  543705 cpu.go:275] no items to output this cycle
E0319 22:09:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:09:13.409804  543705 memory.go:191] Add success.
I0319 22:09:13.409806  543705 cpu.go:282] Add success.
W0319 22:09:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:09:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:09:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:09:13.420239  543705 net.go:648] Add success.
I0319 22:09:13.422983  543705 net.go:770] primary dev: ETH0
I0319 22:09:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:09:13.423008  543705 net.go:698] Add success.
I0319 22:09:13.469679  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5470ea6e-9c1a-4086-bae2-4c1a781175b3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:09:13.469721  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:09:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:09:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:09:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0319 22:09:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:09:14.456487  543705 disk_worker.go:494] system disk:vda1
I0319 22:09:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:09:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:09:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:09:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:09:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:09:16.472487  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:09:21.221674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:09:21.224102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:09:21.224108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5540 0xc0000c5580]
E0319 22:09:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:09:23.409770  543705 memory.go:184] no items to output this cycle
I0319 22:09:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 22:09:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:09:33.409809  543705 memory.go:184] no items to output this cycle
I0319 22:09:33.409823  543705 cpu.go:275] no items to output this cycle
I0319 22:09:37.921743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:09:37.921758  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:09:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:09:43.410661  543705 memory.go:191] Add success.
I0319 22:09:43.409826  543705 cpu.go:282] Add success.
I0319 22:09:43.420373  543705 net.go:648] Add success.
I0319 22:09:43.422943  543705 net.go:770] primary dev: ETH0
I0319 22:09:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:09:43.422970  543705 net.go:698] Add success.
I0319 22:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:09:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:09:53.409815  543705 memory.go:184] no items to output this cycle
I0319 22:09:53.409821  543705 cpu.go:275] no items to output this cycle
E0319 22:10:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:10:03.409779  543705 memory.go:184] no items to output this cycle
I0319 22:10:03.409817  543705 cpu.go:275] no items to output this cycle
E0319 22:10:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:10:13.409786  543705 memory.go:191] Add success.
I0319 22:10:13.409816  543705 cpu.go:282] Add success.
W0319 22:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:10:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:10:13.420084  543705 net.go:648] Add success.
I0319 22:10:13.422771  543705 net.go:770] primary dev: ETH0
I0319 22:10:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:10:13.422801  543705 net.go:698] Add success.
I0319 22:10:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:10:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:10:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0319 22:10:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:10:14.456562  543705 disk_worker.go:494] system disk:vda1
I0319 22:10:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:10:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:10:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:10:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:10:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:10:21.225674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:10:21.228137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:10:21.228145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328000 0xc000328040]
E0319 22:10:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:10:23.409765  543705 memory.go:184] no items to output this cycle
I0319 22:10:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:10:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:10:33.409800  543705 memory.go:184] no items to output this cycle
I0319 22:10:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 22:10:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:10:43.409797  543705 memory.go:191] Add success.
I0319 22:10:43.409798  543705 cpu.go:282] Add success.
I0319 22:10:43.419945  543705 net.go:648] Add success.
I0319 22:10:43.422572  543705 net.go:770] primary dev: ETH0
I0319 22:10:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:10:43.422602  543705 net.go:698] Add success.
I0319 22:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:10:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:10:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:10:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:10:53.409778  543705 memory.go:184] no items to output this cycle
I0319 22:10:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 22:11:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:11:03.409775  543705 memory.go:184] no items to output this cycle
I0319 22:11:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 22:11:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:11:13.409798  543705 memory.go:191] Add success.
I0319 22:11:13.409798  543705 cpu.go:282] Add success.
W0319 22:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:11:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:11:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:11:13.420114  543705 net.go:648] Add success.
I0319 22:11:13.422742  543705 net.go:770] primary dev: ETH0
I0319 22:11:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:11:13.423003  543705 net.go:698] Add success.
I0319 22:11:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:11:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:11:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0319 22:11:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:11:14.456450  543705 disk_worker.go:494] system disk:vda1
I0319 22:11:14.456491  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:11:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:11:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:11:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:11:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:11:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:11:21.229677  543705 disk_info.go:125] begin check local disk info of client
I0319 22:11:21.232062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:11:21.232068  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d80 0xc0000c4dc0]
E0319 22:11:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:11:23.409787  543705 memory.go:184] no items to output this cycle
I0319 22:11:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:11:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:11:33.409766  543705 memory.go:184] no items to output this cycle
I0319 22:11:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:11:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:11:43.409792  543705 memory.go:191] Add success.
I0319 22:11:43.409808  543705 cpu.go:282] Add success.
I0319 22:11:43.420074  543705 net.go:648] Add success.
I0319 22:11:43.422899  543705 net.go:770] primary dev: ETH0
I0319 22:11:43.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:11:43.422930  543705 net.go:698] Add success.
I0319 22:11:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:11:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:11:53.410210  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:11:53.410228  543705 memory.go:184] no items to output this cycle
I0319 22:11:53.410249  543705 cpu.go:275] no items to output this cycle
E0319 22:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:12:03.409777  543705 memory.go:184] no items to output this cycle
I0319 22:12:03.409868  543705 cpu.go:275] no items to output this cycle
E0319 22:12:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:12:13.409775  543705 memory.go:191] Add success.
W0319 22:12:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:12:13.409809  543705 cpu.go:282] Add success.
W0319 22:12:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:12:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:12:13.420121  543705 net.go:648] Add success.
I0319 22:12:13.423181  543705 net.go:770] primary dev: ETH0
I0319 22:12:13.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:12:13.423206  543705 net.go:698] Add success.
I0319 22:12:13.469096  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f7de3e23-c29d-439f-bb2d-815f1d7d589e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:12:13.469128  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 22:12:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:12:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 22:12:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:12:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:12:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:12:14.455897  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:12:14.456650  543705 disk_worker.go:494] system disk:vda1
I0319 22:12:14.456695  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:12:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:12:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:12:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:12:16.457965  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:12:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:12:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:12:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:12:21.233674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:12:21.236157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:12:21.236164  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002af8c0 0xc0002af900]
E0319 22:12:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:12:23.409757  543705 memory.go:184] no items to output this cycle
I0319 22:12:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:12:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:12:33.409766  543705 memory.go:184] no items to output this cycle
I0319 22:12:33.409811  543705 cpu.go:275] no items to output this cycle
I0319 22:12:37.921898  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:12:37.921913  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:12:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:12:43.410787  543705 memory.go:191] Add success.
I0319 22:12:43.409810  543705 cpu.go:282] Add success.
I0319 22:12:43.420487  543705 net.go:648] Add success.
I0319 22:12:43.423250  543705 net.go:770] primary dev: ETH0
I0319 22:12:43.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:12:43.423276  543705 net.go:698] Add success.
I0319 22:12:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:12:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:12:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:12:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:12:53.409774  543705 memory.go:184] no items to output this cycle
I0319 22:12:53.409777  543705 cpu.go:275] no items to output this cycle
E0319 22:13:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:13:03.409774  543705 memory.go:184] no items to output this cycle
I0319 22:13:03.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:13:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:13:13.409774  543705 memory.go:191] Add success.
W0319 22:13:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:13:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:13:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:13:13.409834  543705 cpu.go:282] Add success.
I0319 22:13:13.420057  543705 net.go:648] Add success.
I0319 22:13:13.422980  543705 net.go:770] primary dev: ETH0
I0319 22:13:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:13:13.423222  543705 net.go:698] Add success.
I0319 22:13:14.454944  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:13:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:13:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0319 22:13:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:13:14.456603  543705 disk_worker.go:494] system disk:vda1
I0319 22:13:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:13:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:13:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:13:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:13:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:13:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:13:21.237680  543705 disk_info.go:125] begin check local disk info of client
I0319 22:13:21.240231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:13:21.240238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aeb40 0xc0002aeb80]
E0319 22:13:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:13:23.409762  543705 memory.go:184] no items to output this cycle
I0319 22:13:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:13:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:13:33.409771  543705 memory.go:184] no items to output this cycle
I0319 22:13:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:13:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:13:43.409798  543705 memory.go:191] Add success.
I0319 22:13:43.409799  543705 cpu.go:282] Add success.
I0319 22:13:43.419969  543705 net.go:648] Add success.
I0319 22:13:43.422806  543705 net.go:770] primary dev: ETH0
I0319 22:13:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:13:43.422832  543705 net.go:698] Add success.
I0319 22:13:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:13:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:13:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:13:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:13:53.409795  543705 memory.go:184] no items to output this cycle
I0319 22:13:53.409804  543705 cpu.go:275] no items to output this cycle
I0319 22:14:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:14:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:14:03.409818  543705 memory.go:184] no items to output this cycle
E0319 22:14:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:14:13.409821  543705 memory.go:191] Add success.
I0319 22:14:13.409820  543705 cpu.go:282] Add success.
W0319 22:14:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:14:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:14:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:14:13.420424  543705 net.go:648] Add success.
I0319 22:14:13.423452  543705 net.go:770] primary dev: ETH0
I0319 22:14:13.423466  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:14:13.423480  543705 net.go:698] Add success.
I0319 22:14:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:14:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:14:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 22:14:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:14:14.456512  543705 disk_worker.go:494] system disk:vda1
I0319 22:14:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:14:15.456012  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:14:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:14:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:14:21.241674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:14:21.244113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:14:21.244119  543705 disk_info.go:196] parse disk info done, disk is : [0xc000512680 0xc0005126c0]
E0319 22:14:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:14:23.409799  543705 memory.go:184] no items to output this cycle
I0319 22:14:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 22:14:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:14:33.409774  543705 memory.go:184] no items to output this cycle
I0319 22:14:33.409782  543705 cpu.go:275] no items to output this cycle
E0319 22:14:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:14:43.409802  543705 memory.go:191] Add success.
I0319 22:14:43.409803  543705 cpu.go:282] Add success.
I0319 22:14:43.419905  543705 net.go:648] Add success.
I0319 22:14:43.422588  543705 net.go:770] primary dev: ETH0
I0319 22:14:43.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:14:43.422618  543705 net.go:698] Add success.
I0319 22:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:14:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:14:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:14:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:14:53.409799  543705 memory.go:184] no items to output this cycle
I0319 22:14:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 22:15:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:15:03.409776  543705 memory.go:184] no items to output this cycle
I0319 22:15:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:15:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:15:13.409773  543705 memory.go:191] Add success.
W0319 22:15:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:15:13.409807  543705 cpu.go:282] Add success.
W0319 22:15:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:15:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:15:13.420250  543705 net.go:648] Add success.
I0319 22:15:13.422986  543705 net.go:770] primary dev: ETH0
I0319 22:15:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:15:13.423019  543705 net.go:698] Add success.
I0319 22:15:13.468922  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cddf8d00-985f-4e45-b1fc-e11492ac55f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:15:13.468953  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:15:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:15:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:15:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0319 22:15:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:15:14.457334  543705 disk_worker.go:494] system disk:vda1
I0319 22:15:14.457444  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:15:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:15:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:15:16.472413  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:15:21.245675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:15:21.248100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:15:21.248107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0e00 0xc0002a0e40]
E0319 22:15:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:15:23.409793  543705 memory.go:184] no items to output this cycle
I0319 22:15:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 22:15:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:15:33.409764  543705 memory.go:184] no items to output this cycle
I0319 22:15:33.409806  543705 cpu.go:275] no items to output this cycle
I0319 22:15:37.924318  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:15:37.924325  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:15:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:15:43.410702  543705 memory.go:191] Add success.
I0319 22:15:43.409805  543705 cpu.go:282] Add success.
I0319 22:15:43.420459  543705 net.go:648] Add success.
I0319 22:15:43.423321  543705 net.go:770] primary dev: ETH0
I0319 22:15:43.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:15:43.423348  543705 net.go:698] Add success.
I0319 22:15:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:15:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:15:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:15:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:15:53.409771  543705 memory.go:184] no items to output this cycle
I0319 22:15:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:16:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:16:03.409801  543705 memory.go:184] no items to output this cycle
I0319 22:16:03.409814  543705 cpu.go:275] no items to output this cycle
W0319 22:16:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:16:13.409728  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:16:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:16:13.409794  543705 cpu.go:282] Add success.
E0319 22:16:13.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:16:13.409855  543705 memory.go:191] Add success.
I0319 22:16:13.420222  543705 net.go:648] Add success.
I0319 22:16:13.422974  543705 net.go:770] primary dev: ETH0
I0319 22:16:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:16:13.422998  543705 net.go:698] Add success.
I0319 22:16:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:16:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:16:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 22:16:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:16:14.456726  543705 disk_worker.go:494] system disk:vda1
I0319 22:16:14.456754  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:16:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:16:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:16:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:16:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:16:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:16:21.249676  543705 disk_info.go:125] begin check local disk info of client
I0319 22:16:21.252069  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:16:21.252076  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad040 0xc0003ad080]
E0319 22:16:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:16:23.409787  543705 memory.go:184] no items to output this cycle
I0319 22:16:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 22:16:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:16:33.409780  543705 memory.go:184] no items to output this cycle
I0319 22:16:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 22:16:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:16:43.409819  543705 memory.go:191] Add success.
I0319 22:16:43.409828  543705 cpu.go:282] Add success.
I0319 22:16:43.420015  543705 net.go:648] Add success.
I0319 22:16:43.422825  543705 net.go:770] primary dev: ETH0
I0319 22:16:43.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:16:43.422855  543705 net.go:698] Add success.
I0319 22:16:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:16:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:16:53.409779  543705 memory.go:184] no items to output this cycle
I0319 22:16:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 22:17:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:17:03.409778  543705 memory.go:184] no items to output this cycle
I0319 22:17:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:17:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:17:13.409787  543705 memory.go:191] Add success.
I0319 22:17:13.409807  543705 cpu.go:282] Add success.
W0319 22:17:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:17:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:17:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:17:13.420121  543705 net.go:648] Add success.
I0319 22:17:13.422865  543705 net.go:770] primary dev: ETH0
I0319 22:17:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:17:13.422892  543705 net.go:698] Add success.
I0319 22:17:13.453444  543705 event_worker.go:152] Polling the log file for events...
W0319 22:17:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:17:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 22:17:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:17:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:17:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:17:14.455899  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:17:14.456635  543705 disk_worker.go:494] system disk:vda1
I0319 22:17:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:17:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:17:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:17:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:17:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:17:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:17:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:17:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:17:21.253672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:17:21.256041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:17:21.256047  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a2c0 0xc00048a300]
E0319 22:17:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:17:23.409767  543705 memory.go:184] no items to output this cycle
I0319 22:17:23.409773  543705 cpu.go:275] no items to output this cycle
E0319 22:17:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:17:33.409803  543705 memory.go:184] no items to output this cycle
I0319 22:17:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 22:17:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:17:43.409783  543705 memory.go:191] Add success.
I0319 22:17:43.409813  543705 cpu.go:282] Add success.
I0319 22:17:43.420001  543705 net.go:648] Add success.
I0319 22:17:43.422781  543705 net.go:770] primary dev: ETH0
I0319 22:17:43.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:17:43.422806  543705 net.go:698] Add success.
I0319 22:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:17:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:17:53.409767  543705 memory.go:184] no items to output this cycle
I0319 22:17:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 22:18:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:18:03.409798  543705 memory.go:184] no items to output this cycle
I0319 22:18:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 22:18:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:18:13.409802  543705 memory.go:191] Add success.
I0319 22:18:13.409801  543705 cpu.go:282] Add success.
W0319 22:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:18:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:18:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:18:13.420070  543705 net.go:648] Add success.
I0319 22:18:13.422880  543705 net.go:770] primary dev: ETH0
I0319 22:18:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:18:13.422905  543705 net.go:698] Add success.
I0319 22:18:13.468466  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e79fa02-0efa-4319-b93c-e8bc3b346d23","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:18:13.468499  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:18:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:18:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:18:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 22:18:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:18:14.456541  543705 disk_worker.go:494] system disk:vda1
I0319 22:18:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:18:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:18:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:18:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:18:16.472413  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:18:21.257675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:18:21.260158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:18:21.260164  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002502c0 0xc000250300]
E0319 22:18:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:18:23.409773  543705 memory.go:184] no items to output this cycle
I0319 22:18:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:18:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:18:33.409816  543705 memory.go:184] no items to output this cycle
I0319 22:18:33.409832  543705 cpu.go:275] no items to output this cycle
I0319 22:18:37.925745  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:18:37.925753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:18:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:18:43.410666  543705 memory.go:191] Add success.
I0319 22:18:43.409809  543705 cpu.go:282] Add success.
I0319 22:18:43.420443  543705 net.go:648] Add success.
I0319 22:18:43.423086  543705 net.go:770] primary dev: ETH0
I0319 22:18:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:18:43.423115  543705 net.go:698] Add success.
I0319 22:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:18:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:18:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:18:53.409802  543705 memory.go:184] no items to output this cycle
I0319 22:18:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 22:19:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:19:03.409815  543705 memory.go:184] no items to output this cycle
I0319 22:19:03.409829  543705 cpu.go:275] no items to output this cycle
E0319 22:19:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:19:13.409798  543705 memory.go:191] Add success.
I0319 22:19:13.409818  543705 cpu.go:282] Add success.
W0319 22:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:19:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:19:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:19:13.420043  543705 net.go:648] Add success.
I0319 22:19:13.422866  543705 net.go:770] primary dev: ETH0
I0319 22:19:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:19:13.422891  543705 net.go:698] Add success.
I0319 22:19:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:19:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:19:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0319 22:19:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:19:14.456485  543705 disk_worker.go:494] system disk:vda1
I0319 22:19:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:19:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:19:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:19:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:19:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:19:21.261673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:19:21.264109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:19:21.264115  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003042c0 0xc000304300]
E0319 22:19:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:19:23.409785  543705 memory.go:184] no items to output this cycle
I0319 22:19:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 22:19:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:19:33.409784  543705 cpu.go:275] no items to output this cycle
I0319 22:19:33.409791  543705 memory.go:184] no items to output this cycle
E0319 22:19:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:19:43.409817  543705 memory.go:191] Add success.
I0319 22:19:43.409838  543705 cpu.go:282] Add success.
I0319 22:19:43.419992  543705 net.go:648] Add success.
I0319 22:19:43.422798  543705 net.go:770] primary dev: ETH0
I0319 22:19:43.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:19:43.422834  543705 net.go:698] Add success.
I0319 22:19:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:19:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:19:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:19:53.409795  543705 memory.go:184] no items to output this cycle
I0319 22:19:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:20:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:20:03.409792  543705 memory.go:184] no items to output this cycle
I0319 22:20:03.409807  543705 cpu.go:275] no items to output this cycle
E0319 22:20:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:20:13.409790  543705 memory.go:191] Add success.
I0319 22:20:13.409809  543705 cpu.go:282] Add success.
W0319 22:20:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:20:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:20:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:20:13.420162  543705 net.go:648] Add success.
I0319 22:20:13.422928  543705 net.go:770] primary dev: ETH0
I0319 22:20:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:20:13.422956  543705 net.go:698] Add success.
I0319 22:20:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:20:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:20:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0319 22:20:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:20:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 22:20:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:20:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:20:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:20:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:20:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:20:21.265673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:20:21.268120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:20:21.268127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003749c0 0xc000374a00]
E0319 22:20:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:20:23.409760  543705 memory.go:184] no items to output this cycle
I0319 22:20:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 22:20:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:20:33.409816  543705 memory.go:184] no items to output this cycle
I0319 22:20:33.409825  543705 cpu.go:275] no items to output this cycle
E0319 22:20:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:20:43.409787  543705 memory.go:191] Add success.
I0319 22:20:43.409845  543705 cpu.go:282] Add success.
I0319 22:20:43.420154  543705 net.go:648] Add success.
I0319 22:20:43.422996  543705 net.go:770] primary dev: ETH0
I0319 22:20:43.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:20:43.423023  543705 net.go:698] Add success.
I0319 22:20:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:20:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:20:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:20:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:20:53.409779  543705 cpu.go:275] no items to output this cycle
I0319 22:20:53.409781  543705 memory.go:184] no items to output this cycle
E0319 22:21:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:21:03.409787  543705 memory.go:184] no items to output this cycle
I0319 22:21:03.409790  543705 cpu.go:275] no items to output this cycle
E0319 22:21:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:21:13.409800  543705 memory.go:191] Add success.
I0319 22:21:13.409801  543705 cpu.go:282] Add success.
W0319 22:21:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:21:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:21:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:21:13.420186  543705 net.go:648] Add success.
I0319 22:21:13.423184  543705 net.go:770] primary dev: ETH0
I0319 22:21:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:21:13.423215  543705 net.go:698] Add success.
I0319 22:21:13.490448  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"938596c9-201f-4ce2-9ecf-064faf97f510","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:21:13.490484  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:21:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:21:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:21:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0319 22:21:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:21:14.456498  543705 disk_worker.go:494] system disk:vda1
I0319 22:21:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:21:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:21:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:21:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:21:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:21:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:21:21.269674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:21:21.272110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:21:21.272116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330100 0xc000330140]
E0319 22:21:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:21:23.409774  543705 memory.go:184] no items to output this cycle
I0319 22:21:23.409778  543705 cpu.go:275] no items to output this cycle
E0319 22:21:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:21:33.409805  543705 memory.go:184] no items to output this cycle
I0319 22:21:33.409819  543705 cpu.go:275] no items to output this cycle
I0319 22:21:37.925899  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:21:37.925907  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:21:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:21:43.410721  543705 memory.go:191] Add success.
I0319 22:21:43.409828  543705 cpu.go:282] Add success.
I0319 22:21:43.420430  543705 net.go:648] Add success.
I0319 22:21:43.423041  543705 net.go:770] primary dev: ETH0
I0319 22:21:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:21:43.423067  543705 net.go:698] Add success.
I0319 22:21:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:21:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:21:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:21:53.409774  543705 memory.go:184] no items to output this cycle
I0319 22:21:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:22:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:22:03.409790  543705 memory.go:184] no items to output this cycle
I0319 22:22:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:22:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:22:13.409823  543705 memory.go:191] Add success.
I0319 22:22:13.409825  543705 cpu.go:282] Add success.
W0319 22:22:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:22:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:22:13.420213  543705 net.go:648] Add success.
I0319 22:22:13.422985  543705 net.go:770] primary dev: ETH0
I0319 22:22:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:22:13.423019  543705 net.go:698] Add success.
W0319 22:22:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:22:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 22:22:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:22:14.456539  543705 disk_worker.go:494] system disk:vda1
I0319 22:22:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:22:14.457875  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:22:14.457884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:22:14.457891  543705 custom_config.go:64] query custom config with name: gpu
E0319 22:22:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:22:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:22:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:22:16.457978  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:22:16.458018  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:22:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:22:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:22:21.273672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:22:21.276109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:22:21.276115  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004600c0 0xc000460100]
E0319 22:22:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:22:23.409790  543705 memory.go:184] no items to output this cycle
I0319 22:22:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 22:22:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:22:33.409804  543705 memory.go:184] no items to output this cycle
I0319 22:22:33.409816  543705 cpu.go:275] no items to output this cycle
E0319 22:22:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:22:43.409793  543705 memory.go:191] Add success.
I0319 22:22:43.409811  543705 cpu.go:282] Add success.
I0319 22:22:43.419949  543705 net.go:648] Add success.
I0319 22:22:43.422904  543705 net.go:770] primary dev: ETH0
I0319 22:22:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:22:43.422929  543705 net.go:698] Add success.
I0319 22:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:22:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:22:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:22:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:22:53.409773  543705 memory.go:184] no items to output this cycle
I0319 22:22:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 22:23:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:23:03.409809  543705 memory.go:184] no items to output this cycle
I0319 22:23:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 22:23:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:23:13.409780  543705 memory.go:191] Add success.
I0319 22:23:13.409801  543705 cpu.go:282] Add success.
W0319 22:23:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:23:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:23:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:23:13.420302  543705 net.go:648] Add success.
I0319 22:23:13.423137  543705 net.go:770] primary dev: ETH0
I0319 22:23:13.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:23:13.423161  543705 net.go:698] Add success.
I0319 22:23:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:23:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:23:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 22:23:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:23:14.456509  543705 disk_worker.go:494] system disk:vda1
I0319 22:23:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:23:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:23:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:23:21.277682  543705 disk_info.go:125] begin check local disk info of client
I0319 22:23:21.280197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:23:21.280205  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e040 0xc00039e080]
E0319 22:23:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:23:23.409775  543705 memory.go:184] no items to output this cycle
I0319 22:23:23.409787  543705 cpu.go:275] no items to output this cycle
E0319 22:23:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:23:33.409803  543705 memory.go:184] no items to output this cycle
I0319 22:23:33.409820  543705 cpu.go:275] no items to output this cycle
E0319 22:23:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:23:43.409821  543705 memory.go:191] Add success.
I0319 22:23:43.409830  543705 cpu.go:282] Add success.
I0319 22:23:43.420059  543705 net.go:648] Add success.
I0319 22:23:43.423224  543705 net.go:770] primary dev: ETH0
I0319 22:23:43.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:23:43.423258  543705 net.go:698] Add success.
I0319 22:23:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:23:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:23:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:23:53.409771  543705 memory.go:184] no items to output this cycle
I0319 22:23:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:24:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:24:03.409770  543705 memory.go:184] no items to output this cycle
I0319 22:24:03.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:24:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:24:13.409788  543705 memory.go:191] Add success.
I0319 22:24:13.409808  543705 cpu.go:282] Add success.
W0319 22:24:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:24:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:24:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:24:13.420045  543705 net.go:648] Add success.
I0319 22:24:13.423018  543705 net.go:770] primary dev: ETH0
I0319 22:24:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:24:13.423045  543705 net.go:698] Add success.
I0319 22:24:13.466476  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b1f1a33f-688d-4ced-95ff-f953f5e4b142","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:24:13.466508  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:24:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:24:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:24:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 22:24:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:24:14.456619  543705 disk_worker.go:494] system disk:vda1
I0319 22:24:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:24:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:24:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:24:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:24:21.281672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:24:21.284101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:24:21.284107  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278080 0xc000278100]
E0319 22:24:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:24:23.409796  543705 memory.go:184] no items to output this cycle
I0319 22:24:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:24:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:24:33.409778  543705 memory.go:184] no items to output this cycle
I0319 22:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 22:24:37.926052  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:24:37.926059  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:24:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:24:43.410655  543705 memory.go:191] Add success.
I0319 22:24:43.409833  543705 cpu.go:282] Add success.
I0319 22:24:43.420351  543705 net.go:648] Add success.
I0319 22:24:43.423175  543705 net.go:770] primary dev: ETH0
I0319 22:24:43.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:24:43.423200  543705 net.go:698] Add success.
I0319 22:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:24:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:24:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:24:53.409773  543705 memory.go:184] no items to output this cycle
I0319 22:24:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 22:25:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:25:03.409787  543705 memory.go:184] no items to output this cycle
I0319 22:25:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:25:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:25:13.409796  543705 memory.go:191] Add success.
I0319 22:25:13.409798  543705 cpu.go:282] Add success.
W0319 22:25:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:25:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:25:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:25:13.420300  543705 net.go:648] Add success.
I0319 22:25:13.423352  543705 net.go:770] primary dev: ETH0
I0319 22:25:13.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:25:13.423384  543705 net.go:698] Add success.
I0319 22:25:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:25:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:25:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 22:25:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:25:14.456610  543705 disk_worker.go:494] system disk:vda1
I0319 22:25:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:25:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:25:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:25:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:25:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:25:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:25:21.285672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:25:21.288076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:25:21.288083  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a62c0 0xc0004a6300]
E0319 22:25:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:25:23.409757  543705 memory.go:184] no items to output this cycle
I0319 22:25:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:25:33.409783  543705 memory.go:184] no items to output this cycle
I0319 22:25:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 22:25:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:25:43.409823  543705 memory.go:191] Add success.
I0319 22:25:43.409842  543705 cpu.go:282] Add success.
I0319 22:25:43.419886  543705 net.go:648] Add success.
I0319 22:25:43.422719  543705 net.go:770] primary dev: ETH0
I0319 22:25:43.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:25:43.422747  543705 net.go:698] Add success.
I0319 22:25:46.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:25:46.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:25:46.458023  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:25:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:25:53.409796  543705 memory.go:184] no items to output this cycle
I0319 22:25:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:26:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:26:03.409770  543705 memory.go:184] no items to output this cycle
I0319 22:26:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:26:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:26:13.409820  543705 memory.go:191] Add success.
I0319 22:26:13.409822  543705 cpu.go:282] Add success.
W0319 22:26:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:26:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:26:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:26:13.420393  543705 net.go:648] Add success.
I0319 22:26:13.423111  543705 net.go:770] primary dev: ETH0
I0319 22:26:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:26:13.423136  543705 net.go:698] Add success.
I0319 22:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:26:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:26:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 22:26:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:26:14.456807  543705 disk_worker.go:494] system disk:vda1
I0319 22:26:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:26:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:26:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:26:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:26:21.289686  543705 disk_info.go:125] begin check local disk info of client
I0319 22:26:21.292088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:26:21.292094  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386000 0xc000386040]
E0319 22:26:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:26:23.409761  543705 memory.go:184] no items to output this cycle
I0319 22:26:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 22:26:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:26:33.409806  543705 memory.go:184] no items to output this cycle
I0319 22:26:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 22:26:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:26:43.409796  543705 memory.go:191] Add success.
I0319 22:26:43.409814  543705 cpu.go:282] Add success.
I0319 22:26:43.420046  543705 net.go:648] Add success.
I0319 22:26:43.422734  543705 net.go:770] primary dev: ETH0
I0319 22:26:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:26:43.422759  543705 net.go:698] Add success.
I0319 22:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:26:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:26:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:26:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:26:53.409798  543705 memory.go:184] no items to output this cycle
I0319 22:26:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 22:27:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:27:03.409811  543705 memory.go:184] no items to output this cycle
I0319 22:27:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 22:27:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:27:13.409803  543705 memory.go:191] Add success.
I0319 22:27:13.409804  543705 cpu.go:282] Add success.
W0319 22:27:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:27:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:27:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:27:13.420122  543705 net.go:648] Add success.
I0319 22:27:13.422804  543705 net.go:770] primary dev: ETH0
I0319 22:27:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:27:13.422833  543705 net.go:698] Add success.
I0319 22:27:13.428860  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 22:27:13.453028  543705 event_worker.go:152] Polling the log file for events...
I0319 22:27:13.468849  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6f08d9e2-e605-4970-a93c-a214b9a560a0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:27:13.468882  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 22:27:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:27:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 22:27:14.455208  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:27:14.455921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:27:14.455930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:27:14.455936  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:27:14.456730  543705 disk_worker.go:494] system disk:vda1
I0319 22:27:14.456763  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:27:15.456856  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:27:15.456864  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:27:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:27:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:27:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:27:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:27:16.472355  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:27:21.293682  543705 disk_info.go:125] begin check local disk info of client
I0319 22:27:21.296063  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:27:21.296070  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e3c0 0xc00039e400]
E0319 22:27:23.410387  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:27:23.410405  543705 memory.go:184] no items to output this cycle
I0319 22:27:23.410436  543705 cpu.go:275] no items to output this cycle
E0319 22:27:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:27:33.409795  543705 memory.go:184] no items to output this cycle
I0319 22:27:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 22:27:37.926208  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:27:37.926216  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:27:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:27:43.410698  543705 memory.go:191] Add success.
I0319 22:27:43.409811  543705 cpu.go:282] Add success.
I0319 22:27:43.420420  543705 net.go:648] Add success.
I0319 22:27:43.423091  543705 net.go:770] primary dev: ETH0
I0319 22:27:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:27:43.423118  543705 net.go:698] Add success.
I0319 22:27:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:27:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:27:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:27:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:27:53.409813  543705 memory.go:184] no items to output this cycle
I0319 22:27:53.409821  543705 cpu.go:275] no items to output this cycle
E0319 22:28:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:28:03.409775  543705 memory.go:184] no items to output this cycle
I0319 22:28:03.409827  543705 cpu.go:275] no items to output this cycle
E0319 22:28:13.409834  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:28:13.409861  543705 memory.go:191] Add success.
I0319 22:28:13.409862  543705 cpu.go:282] Add success.
W0319 22:28:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:28:13.409901  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:28:13.409904  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:28:13.420174  543705 net.go:648] Add success.
I0319 22:28:13.423315  543705 net.go:770] primary dev: ETH0
I0319 22:28:13.423330  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:28:13.423344  543705 net.go:698] Add success.
I0319 22:28:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:28:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:28:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 22:28:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:28:14.456497  543705 disk_worker.go:494] system disk:vda1
I0319 22:28:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:28:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:28:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:28:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:28:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:28:16.472091  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:28:21.297681  543705 disk_info.go:125] begin check local disk info of client
I0319 22:28:21.300111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:28:21.300118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae280 0xc0003ae2c0]
E0319 22:28:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:28:23.409806  543705 memory.go:184] no items to output this cycle
I0319 22:28:23.409819  543705 cpu.go:275] no items to output this cycle
E0319 22:28:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:28:33.409767  543705 memory.go:184] no items to output this cycle
I0319 22:28:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 22:28:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:28:43.409802  543705 memory.go:191] Add success.
I0319 22:28:43.409820  543705 cpu.go:282] Add success.
I0319 22:28:43.419999  543705 net.go:648] Add success.
I0319 22:28:43.422689  543705 net.go:770] primary dev: ETH0
I0319 22:28:43.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:28:43.422715  543705 net.go:698] Add success.
I0319 22:28:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:28:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:28:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:28:53.409804  543705 memory.go:184] no items to output this cycle
I0319 22:28:53.409817  543705 cpu.go:275] no items to output this cycle
E0319 22:29:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:29:03.409788  543705 memory.go:184] no items to output this cycle
I0319 22:29:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 22:29:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:29:13.409826  543705 memory.go:191] Add success.
I0319 22:29:13.409829  543705 cpu.go:282] Add success.
W0319 22:29:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:29:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:29:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:29:13.420190  543705 net.go:648] Add success.
I0319 22:29:13.423071  543705 net.go:770] primary dev: ETH0
I0319 22:29:13.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:29:13.423106  543705 net.go:698] Add success.
I0319 22:29:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:29:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:29:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0319 22:29:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:29:14.456578  543705 disk_worker.go:494] system disk:vda1
I0319 22:29:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:29:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:29:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:29:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:29:16.472461  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:29:21.301673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:29:21.304151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:29:21.304158  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035da80 0xc00035dac0]
E0319 22:29:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:29:23.409785  543705 memory.go:184] no items to output this cycle
I0319 22:29:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:29:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:29:33.409809  543705 memory.go:184] no items to output this cycle
I0319 22:29:33.409822  543705 cpu.go:275] no items to output this cycle
E0319 22:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:29:43.409804  543705 memory.go:191] Add success.
I0319 22:29:43.409820  543705 cpu.go:282] Add success.
I0319 22:29:43.420053  543705 net.go:648] Add success.
I0319 22:29:43.422809  543705 net.go:770] primary dev: ETH0
I0319 22:29:43.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:29:43.422835  543705 net.go:698] Add success.
I0319 22:29:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:29:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:29:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:29:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:29:53.409789  543705 memory.go:184] no items to output this cycle
I0319 22:29:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:30:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:30:03.409763  543705 memory.go:184] no items to output this cycle
I0319 22:30:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 22:30:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:30:13.409837  543705 memory.go:191] Add success.
I0319 22:30:13.409843  543705 cpu.go:282] Add success.
W0319 22:30:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:30:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:30:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:30:13.420137  543705 net.go:648] Add success.
I0319 22:30:13.423230  543705 net.go:770] primary dev: ETH0
I0319 22:30:13.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:30:13.423256  543705 net.go:698] Add success.
I0319 22:30:13.478670  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d75154ba-4a6e-44cc-a869-e8c4ab0778c8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:30:13.478704  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:30:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:30:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:30:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0319 22:30:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:30:14.456503  543705 disk_worker.go:494] system disk:vda1
I0319 22:30:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:30:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:30:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:30:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:30:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:30:21.305679  543705 disk_info.go:125] begin check local disk info of client
I0319 22:30:21.308133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:30:21.308140  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328f00 0xc000328f40]
E0319 22:30:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:30:23.409792  543705 memory.go:184] no items to output this cycle
I0319 22:30:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 22:30:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:30:33.409796  543705 memory.go:184] no items to output this cycle
I0319 22:30:33.409802  543705 cpu.go:275] no items to output this cycle
I0319 22:30:37.926359  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:30:37.926367  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:30:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:30:43.410671  543705 memory.go:191] Add success.
I0319 22:30:43.409820  543705 cpu.go:282] Add success.
I0319 22:30:43.420364  543705 net.go:648] Add success.
I0319 22:30:43.422982  543705 net.go:770] primary dev: ETH0
I0319 22:30:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:30:43.423008  543705 net.go:698] Add success.
I0319 22:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:30:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:30:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:30:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:30:53.409786  543705 cpu.go:275] no items to output this cycle
I0319 22:30:53.409788  543705 memory.go:184] no items to output this cycle
E0319 22:31:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:31:03.409786  543705 memory.go:184] no items to output this cycle
I0319 22:31:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:31:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:31:13.409777  543705 memory.go:191] Add success.
I0319 22:31:13.409797  543705 cpu.go:282] Add success.
W0319 22:31:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:31:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:31:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:31:13.420053  543705 net.go:648] Add success.
I0319 22:31:13.422816  543705 net.go:770] primary dev: ETH0
I0319 22:31:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:31:13.422843  543705 net.go:698] Add success.
I0319 22:31:14.454271  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:31:14.454485  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:31:14.454496  543705 disk_worker.go:708] disk space is not compliant
W0319 22:31:14.454499  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:31:14.455873  543705 disk_worker.go:494] system disk:vda1
I0319 22:31:14.455911  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:31:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:31:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:31:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:31:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:31:16.472771  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:31:21.309678  543705 disk_info.go:125] begin check local disk info of client
I0319 22:31:21.312122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:31:21.312128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be7c0 0xc0002be800]
E0319 22:31:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:31:23.409792  543705 memory.go:184] no items to output this cycle
I0319 22:31:23.409806  543705 cpu.go:275] no items to output this cycle
E0319 22:31:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:31:33.409767  543705 memory.go:184] no items to output this cycle
I0319 22:31:33.409799  543705 cpu.go:275] no items to output this cycle
E0319 22:31:43.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:31:43.409924  543705 memory.go:191] Add success.
I0319 22:31:43.410009  543705 cpu.go:282] Add success.
I0319 22:31:43.419735  543705 net.go:648] Add success.
I0319 22:31:43.422448  543705 net.go:770] primary dev: ETH0
I0319 22:31:43.422462  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:31:43.422473  543705 net.go:698] Add success.
I0319 22:31:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:31:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:31:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:31:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:31:53.409792  543705 memory.go:184] no items to output this cycle
I0319 22:31:53.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:32:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:32:03.409770  543705 memory.go:184] no items to output this cycle
I0319 22:32:03.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:32:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:32:13.409789  543705 memory.go:191] Add success.
I0319 22:32:13.409808  543705 cpu.go:282] Add success.
W0319 22:32:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:32:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:32:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:32:13.420145  543705 net.go:648] Add success.
I0319 22:32:13.422784  543705 net.go:770] primary dev: ETH0
I0319 22:32:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:32:13.422814  543705 net.go:698] Add success.
W0319 22:32:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:32:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 22:32:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:32:14.456952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:32:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:32:14.456967  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:32:14.457005  543705 disk_worker.go:494] system disk:vda1
I0319 22:32:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:32:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:32:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:32:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:32:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:32:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:32:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:32:16.472340  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:32:21.313675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:32:21.316191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:32:21.316198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9f00 0xc0003c9f40]
E0319 22:32:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:32:23.409778  543705 memory.go:184] no items to output this cycle
I0319 22:32:23.409783  543705 cpu.go:275] no items to output this cycle
E0319 22:32:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:32:33.409794  543705 memory.go:184] no items to output this cycle
I0319 22:32:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 22:32:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:32:43.409906  543705 memory.go:191] Add success.
I0319 22:32:43.409985  543705 cpu.go:282] Add success.
I0319 22:32:43.419742  543705 net.go:648] Add success.
I0319 22:32:43.422431  543705 net.go:770] primary dev: ETH0
I0319 22:32:43.422446  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:32:43.422458  543705 net.go:698] Add success.
I0319 22:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:32:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:32:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:32:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:32:53.409771  543705 memory.go:184] no items to output this cycle
I0319 22:32:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 22:33:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:33:03.409792  543705 memory.go:184] no items to output this cycle
I0319 22:33:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 22:33:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:33:13.409794  543705 memory.go:191] Add success.
I0319 22:33:13.409796  543705 cpu.go:282] Add success.
W0319 22:33:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:33:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:33:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:33:13.420166  543705 net.go:648] Add success.
I0319 22:33:13.423003  543705 net.go:770] primary dev: ETH0
I0319 22:33:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:33:13.423027  543705 net.go:698] Add success.
I0319 22:33:13.470366  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b4483fd6-2c4c-4384-876d-c1575275776b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:33:13.470398  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:33:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:33:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:33:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 22:33:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:33:14.456508  543705 disk_worker.go:494] system disk:vda1
I0319 22:33:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:33:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:33:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:33:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:33:16.472482  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:33:21.317674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:33:21.320269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:33:21.320278  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032bac0 0xc00032bb00]
E0319 22:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:33:23.409794  543705 memory.go:184] no items to output this cycle
I0319 22:33:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 22:33:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:33:33.409775  543705 memory.go:184] no items to output this cycle
I0319 22:33:33.409783  543705 cpu.go:275] no items to output this cycle
I0319 22:33:37.928335  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:33:37.928343  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:33:43.410715  543705 memory.go:191] Add success.
I0319 22:33:43.409814  543705 cpu.go:282] Add success.
I0319 22:33:43.420406  543705 net.go:648] Add success.
I0319 22:33:43.423136  543705 net.go:770] primary dev: ETH0
I0319 22:33:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:33:43.423161  543705 net.go:698] Add success.
I0319 22:33:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:33:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:33:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:33:53.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:33:53.409937  543705 memory.go:184] no items to output this cycle
I0319 22:33:53.410004  543705 cpu.go:275] no items to output this cycle
E0319 22:34:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:34:03.409775  543705 memory.go:184] no items to output this cycle
I0319 22:34:03.409789  543705 cpu.go:275] no items to output this cycle
E0319 22:34:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:34:13.409826  543705 memory.go:191] Add success.
I0319 22:34:13.409831  543705 cpu.go:282] Add success.
W0319 22:34:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:34:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:34:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:34:13.420139  543705 net.go:648] Add success.
I0319 22:34:13.422770  543705 net.go:770] primary dev: ETH0
I0319 22:34:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:34:13.422794  543705 net.go:698] Add success.
I0319 22:34:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:34:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:34:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 22:34:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:34:14.456498  543705 disk_worker.go:494] system disk:vda1
I0319 22:34:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:34:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:34:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:34:21.321675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:34:21.324177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:34:21.324183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4cc0 0xc0000c4d00]
E0319 22:34:23.410414  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:34:23.410430  543705 memory.go:184] no items to output this cycle
I0319 22:34:23.410461  543705 cpu.go:275] no items to output this cycle
E0319 22:34:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:34:33.409773  543705 memory.go:184] no items to output this cycle
I0319 22:34:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 22:34:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:34:43.409820  543705 memory.go:191] Add success.
I0319 22:34:43.409826  543705 cpu.go:282] Add success.
I0319 22:34:43.420039  543705 net.go:648] Add success.
I0319 22:34:43.422884  543705 net.go:770] primary dev: ETH0
I0319 22:34:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:34:43.422910  543705 net.go:698] Add success.
I0319 22:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:34:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:34:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:34:53.409771  543705 memory.go:184] no items to output this cycle
I0319 22:34:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 22:35:03.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:35:03.409890  543705 memory.go:184] no items to output this cycle
I0319 22:35:03.409941  543705 cpu.go:275] no items to output this cycle
E0319 22:35:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:35:13.409816  543705 memory.go:191] Add success.
I0319 22:35:13.409826  543705 cpu.go:282] Add success.
W0319 22:35:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:35:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:35:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:35:13.420240  543705 net.go:648] Add success.
I0319 22:35:13.423145  543705 net.go:770] primary dev: ETH0
I0319 22:35:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:35:13.423171  543705 net.go:698] Add success.
I0319 22:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:35:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:35:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 22:35:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:35:14.456587  543705 disk_worker.go:494] system disk:vda1
I0319 22:35:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:35:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:35:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:35:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:35:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:35:16.472485  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:35:21.327014  543705 disk_info.go:125] begin check local disk info of client
I0319 22:35:21.329568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:35:21.329575  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469bc0 0xc000469c00]
E0319 22:35:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:35:23.409792  543705 memory.go:184] no items to output this cycle
I0319 22:35:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 22:35:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:35:33.409800  543705 memory.go:184] no items to output this cycle
I0319 22:35:33.409812  543705 cpu.go:275] no items to output this cycle
E0319 22:35:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:35:43.409824  543705 memory.go:191] Add success.
I0319 22:35:43.409829  543705 cpu.go:282] Add success.
I0319 22:35:43.419959  543705 net.go:648] Add success.
I0319 22:35:43.422993  543705 net.go:770] primary dev: ETH0
I0319 22:35:43.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:35:43.423018  543705 net.go:698] Add success.
I0319 22:35:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:35:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:35:53.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:35:53.410267  543705 memory.go:184] no items to output this cycle
I0319 22:35:53.410279  543705 cpu.go:275] no items to output this cycle
E0319 22:36:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:36:03.409887  543705 cpu.go:275] no items to output this cycle
I0319 22:36:03.409903  543705 memory.go:184] no items to output this cycle
E0319 22:36:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:36:13.409816  543705 memory.go:191] Add success.
I0319 22:36:13.409830  543705 cpu.go:282] Add success.
W0319 22:36:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:36:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:36:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:36:13.420203  543705 net.go:648] Add success.
I0319 22:36:13.423018  543705 net.go:770] primary dev: ETH0
I0319 22:36:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:36:13.423044  543705 net.go:698] Add success.
I0319 22:36:13.468981  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e314af6-1795-44a1-8195-d48a2ae2f517","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:36:13.469016  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:36:14.453929  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:36:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:36:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 22:36:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:36:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 22:36:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:36:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:36:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:36:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:36:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:36:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:36:21.329680  543705 disk_info.go:125] begin check local disk info of client
I0319 22:36:21.332115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:36:21.332121  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4480 0xc0000c4540]
E0319 22:36:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:36:23.409789  543705 memory.go:184] no items to output this cycle
I0319 22:36:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:36:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:36:33.409780  543705 memory.go:184] no items to output this cycle
I0319 22:36:33.409784  543705 cpu.go:275] no items to output this cycle
I0319 22:36:37.929735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:36:37.929743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:36:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:36:43.410719  543705 memory.go:191] Add success.
I0319 22:36:43.409803  543705 cpu.go:282] Add success.
I0319 22:36:43.420423  543705 net.go:648] Add success.
I0319 22:36:43.423596  543705 net.go:770] primary dev: ETH0
I0319 22:36:43.423609  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:36:43.423621  543705 net.go:698] Add success.
I0319 22:36:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:36:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:36:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:36:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:36:53.409789  543705 cpu.go:275] no items to output this cycle
I0319 22:36:53.409796  543705 memory.go:184] no items to output this cycle
E0319 22:37:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:37:03.409793  543705 memory.go:184] no items to output this cycle
I0319 22:37:03.409800  543705 cpu.go:275] no items to output this cycle
I0319 22:37:13.409805  543705 cpu.go:282] Add success.
E0319 22:37:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:37:13.409841  543705 memory.go:191] Add success.
W0319 22:37:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:37:13.409897  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:37:13.409902  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:37:13.420224  543705 net.go:648] Add success.
I0319 22:37:13.423182  543705 net.go:770] primary dev: ETH0
I0319 22:37:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:37:13.423207  543705 net.go:698] Add success.
I0319 22:37:13.452809  543705 event_worker.go:152] Polling the log file for events...
W0319 22:37:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:37:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 22:37:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:37:14.456865  543705 disk_worker.go:494] system disk:vda1
I0319 22:37:14.456908  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:37:14.457178  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:37:14.457186  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:37:14.457190  543705 custom_config.go:64] query custom config with name: gpu
E0319 22:37:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:37:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:37:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:37:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:37:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:37:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:37:16.472498  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:37:21.333673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:37:21.336053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:37:21.336059  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466b80 0xc000466bc0]
E0319 22:37:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:37:23.409788  543705 memory.go:184] no items to output this cycle
I0319 22:37:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:37:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:37:33.409767  543705 memory.go:184] no items to output this cycle
I0319 22:37:33.409806  543705 cpu.go:275] no items to output this cycle
E0319 22:37:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:37:43.409800  543705 memory.go:191] Add success.
I0319 22:37:43.409824  543705 cpu.go:282] Add success.
I0319 22:37:43.419879  543705 net.go:648] Add success.
I0319 22:37:43.422749  543705 net.go:770] primary dev: ETH0
I0319 22:37:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:37:43.422786  543705 net.go:698] Add success.
I0319 22:37:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:37:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:37:53.409782  543705 memory.go:184] no items to output this cycle
I0319 22:37:53.409823  543705 cpu.go:275] no items to output this cycle
E0319 22:38:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:38:03.409891  543705 memory.go:184] no items to output this cycle
I0319 22:38:03.409910  543705 cpu.go:275] no items to output this cycle
W0319 22:38:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:38:13.409743  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:38:13.409748  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:38:13.409818  543705 cpu.go:282] Add success.
E0319 22:38:13.409826  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:38:13.409844  543705 memory.go:191] Add success.
I0319 22:38:13.420293  543705 net.go:648] Add success.
I0319 22:38:13.423248  543705 net.go:770] primary dev: ETH0
I0319 22:38:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:38:13.423276  543705 net.go:698] Add success.
I0319 22:38:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:38:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:38:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 22:38:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:38:14.456575  543705 disk_worker.go:494] system disk:vda1
I0319 22:38:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:38:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:38:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:38:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:38:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:38:21.337678  543705 disk_info.go:125] begin check local disk info of client
I0319 22:38:21.340092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:38:21.340098  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aec0 0xc00007af00]
E0319 22:38:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:38:23.409802  543705 memory.go:184] no items to output this cycle
I0319 22:38:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 22:38:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:38:33.409814  543705 memory.go:184] no items to output this cycle
I0319 22:38:33.409824  543705 cpu.go:275] no items to output this cycle
E0319 22:38:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:38:43.409838  543705 memory.go:191] Add success.
I0319 22:38:43.409841  543705 cpu.go:282] Add success.
I0319 22:38:43.419987  543705 net.go:648] Add success.
I0319 22:38:43.423024  543705 net.go:770] primary dev: ETH0
I0319 22:38:43.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:38:43.423051  543705 net.go:698] Add success.
I0319 22:38:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:38:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:38:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:38:53.410396  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:38:53.410413  543705 memory.go:184] no items to output this cycle
I0319 22:38:53.410447  543705 cpu.go:275] no items to output this cycle
E0319 22:39:03.409919  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:39:03.409928  543705 cpu.go:275] no items to output this cycle
I0319 22:39:03.409938  543705 memory.go:184] no items to output this cycle
E0319 22:39:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:39:13.409809  543705 memory.go:191] Add success.
I0319 22:39:13.409810  543705 cpu.go:282] Add success.
W0319 22:39:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:39:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:39:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:39:13.420182  543705 net.go:648] Add success.
I0319 22:39:13.423015  543705 net.go:770] primary dev: ETH0
I0319 22:39:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:39:13.423040  543705 net.go:698] Add success.
I0319 22:39:13.468795  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1fdbc050-3e49-4779-88c8-3d05af260123","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:39:13.468848  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:39:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:39:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:39:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0319 22:39:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:39:14.456520  543705 disk_worker.go:494] system disk:vda1
I0319 22:39:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:39:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:39:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:39:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:39:21.341672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:39:21.344108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:39:21.344114  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa700 0xc0001aa740]
E0319 22:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:39:23.409790  543705 memory.go:184] no items to output this cycle
I0319 22:39:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 22:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:39:33.409782  543705 memory.go:184] no items to output this cycle
I0319 22:39:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 22:39:37.929900  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:39:37.929909  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:39:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:39:43.410574  543705 memory.go:191] Add success.
I0319 22:39:43.409827  543705 cpu.go:282] Add success.
I0319 22:39:43.420257  543705 net.go:648] Add success.
I0319 22:39:43.423097  543705 net.go:770] primary dev: ETH0
I0319 22:39:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:39:43.423125  543705 net.go:698] Add success.
I0319 22:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:39:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:39:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:39:53.409773  543705 memory.go:184] no items to output this cycle
I0319 22:39:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 22:40:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:40:03.409781  543705 memory.go:184] no items to output this cycle
I0319 22:40:03.409779  543705 cpu.go:275] no items to output this cycle
E0319 22:40:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:40:13.409828  543705 memory.go:191] Add success.
I0319 22:40:13.409835  543705 cpu.go:282] Add success.
W0319 22:40:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:40:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:40:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:40:13.420188  543705 net.go:648] Add success.
I0319 22:40:13.422978  543705 net.go:770] primary dev: ETH0
I0319 22:40:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:40:13.423003  543705 net.go:698] Add success.
I0319 22:40:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:40:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:40:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0319 22:40:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:40:14.456513  543705 disk_worker.go:494] system disk:vda1
I0319 22:40:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:40:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:40:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:40:16.472464  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:40:21.345672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:40:21.348128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:40:21.348134  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027bb80 0xc00027bbc0]
E0319 22:40:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:40:23.409759  543705 memory.go:184] no items to output this cycle
I0319 22:40:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:40:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:40:33.409810  543705 memory.go:184] no items to output this cycle
I0319 22:40:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 22:40:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:40:43.409788  543705 memory.go:191] Add success.
I0319 22:40:43.409817  543705 cpu.go:282] Add success.
I0319 22:40:43.419903  543705 net.go:648] Add success.
I0319 22:40:43.422924  543705 net.go:770] primary dev: ETH0
I0319 22:40:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:40:43.422956  543705 net.go:698] Add success.
I0319 22:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:40:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:40:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:40:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:40:53.409768  543705 memory.go:184] no items to output this cycle
I0319 22:40:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 22:41:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:41:03.409811  543705 memory.go:184] no items to output this cycle
I0319 22:41:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 22:41:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:41:13.409863  543705 memory.go:191] Add success.
W0319 22:41:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:41:13.409909  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:41:13.409949  543705 cpu.go:282] Add success.
I0319 22:41:13.409912  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:41:13.419727  543705 net.go:648] Add success.
I0319 22:41:13.422460  543705 net.go:770] primary dev: ETH0
I0319 22:41:13.422474  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:41:13.422485  543705 net.go:698] Add success.
I0319 22:41:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:41:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:41:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0319 22:41:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:41:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 22:41:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:41:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:41:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:41:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:41:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:41:21.349674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:41:21.352115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:41:21.352121  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fd40 0xc00037fd80]
E0319 22:41:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:41:23.409788  543705 memory.go:184] no items to output this cycle
I0319 22:41:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:41:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:41:33.409770  543705 memory.go:184] no items to output this cycle
I0319 22:41:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:41:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:41:43.409826  543705 memory.go:191] Add success.
I0319 22:41:43.409829  543705 cpu.go:282] Add success.
I0319 22:41:43.419985  543705 net.go:648] Add success.
I0319 22:41:43.422874  543705 net.go:770] primary dev: ETH0
I0319 22:41:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:41:43.422900  543705 net.go:698] Add success.
I0319 22:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:41:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:41:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:41:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:41:53.409779  543705 memory.go:184] no items to output this cycle
I0319 22:41:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 22:42:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:42:03.409800  543705 memory.go:184] no items to output this cycle
I0319 22:42:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 22:42:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:42:13.409813  543705 memory.go:191] Add success.
I0319 22:42:13.409824  543705 cpu.go:282] Add success.
W0319 22:42:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:42:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:42:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:42:13.420296  543705 net.go:648] Add success.
I0319 22:42:13.423454  543705 net.go:770] primary dev: ETH0
I0319 22:42:13.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:42:13.423479  543705 net.go:698] Add success.
W0319 22:42:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:42:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 22:42:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:42:14.456784  543705 disk_worker.go:494] system disk:vda1
I0319 22:42:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:42:14.457122  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:42:14.457130  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:42:14.457134  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:42:15.425990  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"01ced21f-85ab-42ef-a2bb-fa64ab921bc2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:42:15.426031  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0319 22:42:15.456182  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:42:15.456190  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 22:42:16.457503  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:42:16.458560  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:42:16.458614  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:42:16.458631  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:42:16.472958  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:42:21.353674  543705 disk_info.go:125] begin check local disk info of client
I0319 22:42:21.356092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:42:21.356098  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047d900 0xc00047d940]
E0319 22:42:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:42:23.409809  543705 memory.go:184] no items to output this cycle
I0319 22:42:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 22:42:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:42:33.409796  543705 memory.go:184] no items to output this cycle
I0319 22:42:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 22:42:37.930054  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:42:37.930062  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:42:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:42:43.410644  543705 memory.go:191] Add success.
I0319 22:42:43.409808  543705 cpu.go:282] Add success.
I0319 22:42:43.420346  543705 net.go:648] Add success.
I0319 22:42:43.423186  543705 net.go:770] primary dev: ETH0
I0319 22:42:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:42:43.423229  543705 net.go:698] Add success.
I0319 22:42:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:42:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:42:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:42:53.409779  543705 memory.go:184] no items to output this cycle
I0319 22:42:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 22:43:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:43:03.409783  543705 memory.go:184] no items to output this cycle
I0319 22:43:03.409788  543705 cpu.go:275] no items to output this cycle
E0319 22:43:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:43:13.409798  543705 memory.go:191] Add success.
I0319 22:43:13.409799  543705 cpu.go:282] Add success.
W0319 22:43:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:43:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:43:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:43:13.420307  543705 net.go:648] Add success.
I0319 22:43:13.423066  543705 net.go:770] primary dev: ETH0
I0319 22:43:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:43:13.423090  543705 net.go:698] Add success.
I0319 22:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:43:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:43:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 22:43:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:43:14.456593  543705 disk_worker.go:494] system disk:vda1
I0319 22:43:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:43:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:43:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:43:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:43:21.357676  543705 disk_info.go:125] begin check local disk info of client
I0319 22:43:21.360168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:43:21.360175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ac400 0xc0003ac440]
E0319 22:43:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:43:23.409767  543705 memory.go:184] no items to output this cycle
I0319 22:43:23.409786  543705 cpu.go:275] no items to output this cycle
E0319 22:43:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:43:33.409769  543705 memory.go:184] no items to output this cycle
I0319 22:43:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 22:43:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:43:43.409827  543705 memory.go:191] Add success.
I0319 22:43:43.409831  543705 cpu.go:282] Add success.
I0319 22:43:43.419851  543705 net.go:648] Add success.
I0319 22:43:43.422582  543705 net.go:770] primary dev: ETH0
I0319 22:43:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:43:43.422613  543705 net.go:698] Add success.
I0319 22:43:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:43:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:43:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:43:53.409802  543705 memory.go:184] no items to output this cycle
I0319 22:43:53.409814  543705 cpu.go:275] no items to output this cycle
E0319 22:44:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:44:03.409786  543705 cpu.go:275] no items to output this cycle
I0319 22:44:03.409790  543705 memory.go:184] no items to output this cycle
E0319 22:44:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:44:13.409796  543705 memory.go:191] Add success.
I0319 22:44:13.409795  543705 cpu.go:282] Add success.
W0319 22:44:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:44:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:44:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:44:13.420258  543705 net.go:648] Add success.
I0319 22:44:13.423079  543705 net.go:770] primary dev: ETH0
I0319 22:44:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:44:13.423108  543705 net.go:698] Add success.
I0319 22:44:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:44:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:44:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0319 22:44:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:44:14.456534  543705 disk_worker.go:494] system disk:vda1
I0319 22:44:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:44:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:44:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:44:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:44:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:44:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:44:21.361673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:44:21.364152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:44:21.364158  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba300 0xc0003ba340]
E0319 22:44:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:44:23.409793  543705 memory.go:184] no items to output this cycle
I0319 22:44:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:44:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:44:33.409780  543705 cpu.go:275] no items to output this cycle
I0319 22:44:33.409783  543705 memory.go:184] no items to output this cycle
E0319 22:44:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:44:43.409791  543705 memory.go:191] Add success.
I0319 22:44:43.409794  543705 cpu.go:282] Add success.
I0319 22:44:43.420424  543705 net.go:648] Add success.
I0319 22:44:43.423241  543705 net.go:770] primary dev: ETH0
I0319 22:44:43.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:44:43.423267  543705 net.go:698] Add success.
I0319 22:44:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:44:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:44:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:44:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:44:53.409765  543705 memory.go:184] no items to output this cycle
I0319 22:44:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:45:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:45:03.409801  543705 memory.go:184] no items to output this cycle
I0319 22:45:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 22:45:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:45:13.409786  543705 memory.go:191] Add success.
I0319 22:45:13.409801  543705 cpu.go:282] Add success.
W0319 22:45:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:45:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:45:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:45:13.420139  543705 net.go:648] Add success.
I0319 22:45:13.422627  543705 net.go:770] primary dev: ETH0
I0319 22:45:13.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:45:13.422657  543705 net.go:698] Add success.
I0319 22:45:13.464112  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bd3567da-31e7-438a-bce6-fb4335468cd5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:45:13.464144  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:45:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:45:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:45:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 22:45:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:45:14.456616  543705 disk_worker.go:494] system disk:vda1
I0319 22:45:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:45:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:45:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:45:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:45:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:45:21.365673  543705 disk_info.go:125] begin check local disk info of client
I0319 22:45:21.368094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:45:21.368100  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328080 0xc0003280c0]
E0319 22:45:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:45:23.409788  543705 memory.go:184] no items to output this cycle
I0319 22:45:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:45:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:45:33.409789  543705 memory.go:184] no items to output this cycle
I0319 22:45:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 22:45:37.930208  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:45:37.930216  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:45:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:45:43.410728  543705 memory.go:191] Add success.
I0319 22:45:43.409811  543705 cpu.go:282] Add success.
I0319 22:45:43.420440  543705 net.go:648] Add success.
I0319 22:45:43.423129  543705 net.go:770] primary dev: ETH0
I0319 22:45:43.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:45:43.423154  543705 net.go:698] Add success.
I0319 22:45:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:45:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:45:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:45:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:45:53.409768  543705 memory.go:184] no items to output this cycle
I0319 22:45:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 22:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:46:03.409784  543705 memory.go:184] no items to output this cycle
I0319 22:46:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 22:46:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:46:13.409795  543705 memory.go:191] Add success.
I0319 22:46:13.409798  543705 cpu.go:282] Add success.
W0319 22:46:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:46:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:46:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:46:13.420056  543705 net.go:648] Add success.
I0319 22:46:13.422702  543705 net.go:770] primary dev: ETH0
I0319 22:46:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:46:13.422732  543705 net.go:698] Add success.
I0319 22:46:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:46:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:46:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 22:46:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:46:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 22:46:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:46:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:46:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:46:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:46:21.369672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:46:21.372074  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:46:21.372081  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024c740 0xc00024c780]
E0319 22:46:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:46:23.409792  543705 memory.go:184] no items to output this cycle
I0319 22:46:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:46:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:46:33.409775  543705 memory.go:184] no items to output this cycle
I0319 22:46:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:46:43.409794  543705 memory.go:191] Add success.
I0319 22:46:43.409809  543705 cpu.go:282] Add success.
I0319 22:46:43.419956  543705 net.go:648] Add success.
I0319 22:46:43.422679  543705 net.go:770] primary dev: ETH0
I0319 22:46:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:46:43.422708  543705 net.go:698] Add success.
I0319 22:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:46:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:46:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:46:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:46:53.409763  543705 memory.go:184] no items to output this cycle
I0319 22:46:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:47:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:47:03.409812  543705 memory.go:184] no items to output this cycle
I0319 22:47:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 22:47:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:47:13.409778  543705 memory.go:191] Add success.
I0319 22:47:13.409804  543705 cpu.go:282] Add success.
W0319 22:47:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:47:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:47:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:47:13.420278  543705 net.go:648] Add success.
I0319 22:47:13.422987  543705 net.go:770] primary dev: ETH0
I0319 22:47:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:47:13.423025  543705 net.go:698] Add success.
I0319 22:47:13.453590  543705 event_worker.go:152] Polling the log file for events...
W0319 22:47:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:47:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 22:47:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:47:14.455903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:47:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:47:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:47:14.456562  543705 disk_worker.go:494] system disk:vda1
I0319 22:47:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:47:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:47:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:47:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:47:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:47:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:47:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:47:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:47:21.373671  543705 disk_info.go:125] begin check local disk info of client
I0319 22:47:21.376148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:47:21.376154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ac100 0xc0003ac140]
E0319 22:47:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:47:23.409761  543705 memory.go:184] no items to output this cycle
I0319 22:47:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 22:47:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:47:33.409771  543705 memory.go:184] no items to output this cycle
I0319 22:47:33.409790  543705 cpu.go:275] no items to output this cycle
E0319 22:47:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:47:43.409822  543705 memory.go:191] Add success.
I0319 22:47:43.409824  543705 cpu.go:282] Add success.
I0319 22:47:43.419955  543705 net.go:648] Add success.
I0319 22:47:43.422790  543705 net.go:770] primary dev: ETH0
I0319 22:47:43.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:47:43.422818  543705 net.go:698] Add success.
I0319 22:47:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:47:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:47:53.409765  543705 memory.go:184] no items to output this cycle
I0319 22:47:53.409785  543705 cpu.go:275] no items to output this cycle
E0319 22:48:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:48:03.409791  543705 memory.go:184] no items to output this cycle
I0319 22:48:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:48:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:48:13.409817  543705 memory.go:191] Add success.
I0319 22:48:13.409821  543705 cpu.go:282] Add success.
W0319 22:48:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:48:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:48:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:48:13.420157  543705 net.go:648] Add success.
I0319 22:48:13.423104  543705 net.go:770] primary dev: ETH0
I0319 22:48:13.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:48:13.423133  543705 net.go:698] Add success.
I0319 22:48:13.469765  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"246b2a4e-e308-48cc-b25c-716bef04f25e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:48:13.469801  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:48:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:48:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:48:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 22:48:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:48:14.456580  543705 disk_worker.go:494] system disk:vda1
I0319 22:48:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:48:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:48:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:48:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:48:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:48:21.377672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:48:21.380123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:48:21.380131  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484380 0xc0004843c0]
E0319 22:48:23.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:48:23.409878  543705 memory.go:184] no items to output this cycle
I0319 22:48:23.410028  543705 cpu.go:275] no items to output this cycle
E0319 22:48:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:48:33.409778  543705 memory.go:184] no items to output this cycle
I0319 22:48:33.409794  543705 cpu.go:275] no items to output this cycle
I0319 22:48:37.932364  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:48:37.932371  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:48:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:48:43.410946  543705 memory.go:191] Add success.
I0319 22:48:43.409824  543705 cpu.go:282] Add success.
I0319 22:48:43.419703  543705 net.go:648] Add success.
I0319 22:48:43.422458  543705 net.go:770] primary dev: ETH0
I0319 22:48:43.422471  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:48:43.422484  543705 net.go:698] Add success.
I0319 22:48:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:48:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:48:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:48:53.409779  543705 memory.go:184] no items to output this cycle
I0319 22:48:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 22:49:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:49:03.409808  543705 memory.go:184] no items to output this cycle
I0319 22:49:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 22:49:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:49:13.409780  543705 memory.go:191] Add success.
I0319 22:49:13.409801  543705 cpu.go:282] Add success.
W0319 22:49:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:49:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:49:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:49:13.420052  543705 net.go:648] Add success.
I0319 22:49:13.422864  543705 net.go:770] primary dev: ETH0
I0319 22:49:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:49:13.422894  543705 net.go:698] Add success.
I0319 22:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:49:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:49:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0319 22:49:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:49:14.456573  543705 disk_worker.go:494] system disk:vda1
I0319 22:49:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:49:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:49:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:49:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:49:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:49:21.381675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:49:21.384075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:49:21.384082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5600 0xc0002a5640]
E0319 22:49:23.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:49:23.409885  543705 memory.go:184] no items to output this cycle
I0319 22:49:23.409929  543705 cpu.go:275] no items to output this cycle
E0319 22:49:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:49:33.409799  543705 memory.go:184] no items to output this cycle
I0319 22:49:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 22:49:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:49:43.409799  543705 memory.go:191] Add success.
I0319 22:49:43.409803  543705 cpu.go:282] Add success.
I0319 22:49:43.419975  543705 net.go:648] Add success.
I0319 22:49:43.422668  543705 net.go:770] primary dev: ETH0
I0319 22:49:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:49:43.422693  543705 net.go:698] Add success.
I0319 22:49:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:49:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:49:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:49:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:49:53.409776  543705 memory.go:184] no items to output this cycle
I0319 22:49:53.409776  543705 cpu.go:275] no items to output this cycle
E0319 22:50:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:50:03.409810  543705 memory.go:184] no items to output this cycle
I0319 22:50:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 22:50:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:50:13.409794  543705 memory.go:191] Add success.
I0319 22:50:13.409813  543705 cpu.go:282] Add success.
W0319 22:50:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:50:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:50:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:50:13.420106  543705 net.go:648] Add success.
I0319 22:50:13.422557  543705 net.go:770] primary dev: ETH0
I0319 22:50:13.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:50:13.422582  543705 net.go:698] Add success.
I0319 22:50:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:50:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:50:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 22:50:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:50:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 22:50:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:50:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:50:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:50:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:50:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:50:21.385672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:50:21.388101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:50:21.388107  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369980 0xc0003699c0]
E0319 22:50:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:50:23.409800  543705 memory.go:184] no items to output this cycle
I0319 22:50:23.409815  543705 cpu.go:275] no items to output this cycle
E0319 22:50:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:50:33.409821  543705 memory.go:184] no items to output this cycle
I0319 22:50:33.409832  543705 cpu.go:275] no items to output this cycle
E0319 22:50:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:50:43.409833  543705 memory.go:191] Add success.
I0319 22:50:43.409835  543705 cpu.go:282] Add success.
I0319 22:50:43.420027  543705 net.go:648] Add success.
I0319 22:50:43.422684  543705 net.go:770] primary dev: ETH0
I0319 22:50:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:50:43.422713  543705 net.go:698] Add success.
I0319 22:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:50:53.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:50:53.409826  543705 memory.go:184] no items to output this cycle
I0319 22:50:53.409834  543705 cpu.go:275] no items to output this cycle
E0319 22:51:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:51:03.409822  543705 memory.go:184] no items to output this cycle
I0319 22:51:03.409835  543705 cpu.go:275] no items to output this cycle
E0319 22:51:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:51:13.409791  543705 memory.go:191] Add success.
I0319 22:51:13.409811  543705 cpu.go:282] Add success.
W0319 22:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:51:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:51:13.420052  543705 net.go:648] Add success.
I0319 22:51:13.422953  543705 net.go:770] primary dev: ETH0
I0319 22:51:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:51:13.422977  543705 net.go:698] Add success.
I0319 22:51:13.468504  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4bdabf63-2457-4008-8c07-2cc69cc318a3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:51:13.468536  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:51:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:51:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:51:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 22:51:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:51:14.456537  543705 disk_worker.go:494] system disk:vda1
I0319 22:51:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:51:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:51:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:51:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:51:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:51:21.391996  543705 disk_info.go:125] begin check local disk info of client
I0319 22:51:21.394482  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:51:21.394488  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f2c0 0xc00039f300]
E0319 22:51:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:51:23.409761  543705 memory.go:184] no items to output this cycle
I0319 22:51:23.409879  543705 cpu.go:275] no items to output this cycle
E0319 22:51:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:51:33.409806  543705 memory.go:184] no items to output this cycle
I0319 22:51:33.409826  543705 cpu.go:275] no items to output this cycle
I0319 22:51:37.933734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:51:37.933742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:51:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:51:43.410689  543705 memory.go:191] Add success.
I0319 22:51:43.409803  543705 cpu.go:282] Add success.
I0319 22:51:43.420430  543705 net.go:648] Add success.
I0319 22:51:43.423285  543705 net.go:770] primary dev: ETH0
I0319 22:51:43.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:51:43.423315  543705 net.go:698] Add success.
I0319 22:51:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:51:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:51:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:51:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:51:53.409769  543705 memory.go:184] no items to output this cycle
I0319 22:51:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:52:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:52:03.409780  543705 memory.go:184] no items to output this cycle
I0319 22:52:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 22:52:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:52:13.409801  543705 memory.go:191] Add success.
I0319 22:52:13.409801  543705 cpu.go:282] Add success.
W0319 22:52:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:52:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:52:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:52:13.420075  543705 net.go:648] Add success.
I0319 22:52:13.423205  543705 net.go:770] primary dev: ETH0
I0319 22:52:13.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:52:13.423232  543705 net.go:698] Add success.
W0319 22:52:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:52:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0319 22:52:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:52:14.455917  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:52:14.455925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:52:14.455932  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:52:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 22:52:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:52:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:52:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:52:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:52:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:52:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:52:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:52:16.472433  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:52:21.397675  543705 disk_info.go:125] begin check local disk info of client
I0319 22:52:21.400090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:52:21.400095  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343140 0xc000343180]
E0319 22:52:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:52:23.409791  543705 memory.go:184] no items to output this cycle
I0319 22:52:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 22:52:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:52:33.409805  543705 memory.go:184] no items to output this cycle
I0319 22:52:33.409819  543705 cpu.go:275] no items to output this cycle
E0319 22:52:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:52:43.409807  543705 memory.go:191] Add success.
I0319 22:52:43.409809  543705 cpu.go:282] Add success.
I0319 22:52:43.420035  543705 net.go:648] Add success.
I0319 22:52:43.422992  543705 net.go:770] primary dev: ETH0
I0319 22:52:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:52:43.423022  543705 net.go:698] Add success.
I0319 22:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:52:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:52:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:52:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:52:53.409778  543705 memory.go:184] no items to output this cycle
I0319 22:52:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 22:53:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:53:03.409808  543705 memory.go:184] no items to output this cycle
I0319 22:53:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 22:53:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:53:13.409792  543705 memory.go:191] Add success.
I0319 22:53:13.409795  543705 cpu.go:282] Add success.
W0319 22:53:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:53:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:53:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:53:13.420177  543705 net.go:648] Add success.
I0319 22:53:13.422831  543705 net.go:770] primary dev: ETH0
I0319 22:53:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:53:13.422860  543705 net.go:698] Add success.
I0319 22:53:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:53:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:53:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0319 22:53:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:53:14.456576  543705 disk_worker.go:494] system disk:vda1
I0319 22:53:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:53:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:53:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:53:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:53:21.401677  543705 disk_info.go:125] begin check local disk info of client
I0319 22:53:21.404084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:53:21.404091  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034f580 0xc00034f5c0]
E0319 22:53:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:53:23.409789  543705 memory.go:184] no items to output this cycle
I0319 22:53:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 22:53:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:53:33.409872  543705 cpu.go:275] no items to output this cycle
I0319 22:53:33.409898  543705 memory.go:184] no items to output this cycle
E0319 22:53:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:53:43.409803  543705 memory.go:191] Add success.
I0319 22:53:43.409806  543705 cpu.go:282] Add success.
I0319 22:53:43.419912  543705 net.go:648] Add success.
I0319 22:53:43.422564  543705 net.go:770] primary dev: ETH0
I0319 22:53:43.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:53:43.422590  543705 net.go:698] Add success.
I0319 22:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:53:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:53:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:53:53.409768  543705 memory.go:184] no items to output this cycle
I0319 22:53:53.409794  543705 cpu.go:275] no items to output this cycle
E0319 22:54:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:54:03.409804  543705 memory.go:184] no items to output this cycle
I0319 22:54:03.409819  543705 cpu.go:275] no items to output this cycle
E0319 22:54:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:54:13.409788  543705 memory.go:191] Add success.
W0319 22:54:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:54:13.409817  543705 cpu.go:282] Add success.
W0319 22:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:54:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:54:13.420492  543705 net.go:648] Add success.
I0319 22:54:13.423050  543705 net.go:770] primary dev: ETH0
I0319 22:54:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:54:13.423075  543705 net.go:698] Add success.
I0319 22:54:13.510968  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0b59467e-4d97-4016-8e43-288a50aee76e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:54:13.511002  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 22:54:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:54:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:54:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0319 22:54:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:54:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 22:54:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:54:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:54:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:54:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:54:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:54:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:54:21.405672  543705 disk_info.go:125] begin check local disk info of client
I0319 22:54:21.408075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:54:21.408081  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027ab00 0xc00027ab40]
E0319 22:54:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:54:23.409789  543705 memory.go:184] no items to output this cycle
I0319 22:54:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:54:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:54:33.409778  543705 memory.go:184] no items to output this cycle
I0319 22:54:33.409780  543705 cpu.go:275] no items to output this cycle
I0319 22:54:37.936386  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:54:37.936394  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:54:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:54:43.410658  543705 memory.go:191] Add success.
I0319 22:54:43.409825  543705 cpu.go:282] Add success.
I0319 22:54:43.420406  543705 net.go:648] Add success.
I0319 22:54:43.422965  543705 net.go:770] primary dev: ETH0
I0319 22:54:43.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:54:43.422990  543705 net.go:698] Add success.
I0319 22:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:54:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:54:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:54:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:54:53.409784  543705 cpu.go:275] no items to output this cycle
I0319 22:54:53.409787  543705 memory.go:184] no items to output this cycle
E0319 22:55:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:55:03.409806  543705 memory.go:184] no items to output this cycle
I0319 22:55:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 22:55:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:55:13.409778  543705 memory.go:191] Add success.
W0319 22:55:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 22:55:13.409809  543705 cpu.go:282] Add success.
W0319 22:55:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:55:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:55:13.420133  543705 net.go:648] Add success.
I0319 22:55:13.422827  543705 net.go:770] primary dev: ETH0
I0319 22:55:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:55:13.422855  543705 net.go:698] Add success.
I0319 22:55:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:55:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:55:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0319 22:55:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:55:14.456548  543705 disk_worker.go:494] system disk:vda1
I0319 22:55:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:55:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:55:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:55:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:55:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:55:21.409678  543705 disk_info.go:125] begin check local disk info of client
I0319 22:55:21.412081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:55:21.412087  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002092c0 0xc000209300]
E0319 22:55:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:55:23.409789  543705 memory.go:184] no items to output this cycle
I0319 22:55:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:55:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:55:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 22:55:33.409792  543705 memory.go:184] no items to output this cycle
E0319 22:55:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:55:43.409824  543705 memory.go:191] Add success.
I0319 22:55:43.409831  543705 cpu.go:282] Add success.
I0319 22:55:43.420217  543705 net.go:648] Add success.
I0319 22:55:43.423332  543705 net.go:770] primary dev: ETH0
I0319 22:55:43.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:55:43.423361  543705 net.go:698] Add success.
I0319 22:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:55:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:55:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:55:53.409792  543705 memory.go:184] no items to output this cycle
I0319 22:55:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 22:56:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:56:03.409792  543705 memory.go:184] no items to output this cycle
I0319 22:56:03.409795  543705 cpu.go:275] no items to output this cycle
E0319 22:56:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:56:13.409820  543705 memory.go:191] Add success.
I0319 22:56:13.409832  543705 cpu.go:282] Add success.
W0319 22:56:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:56:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:56:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:56:13.420393  543705 net.go:648] Add success.
I0319 22:56:13.423172  543705 net.go:770] primary dev: ETH0
I0319 22:56:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:56:13.423196  543705 net.go:698] Add success.
I0319 22:56:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:56:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:56:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0319 22:56:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:56:14.456579  543705 disk_worker.go:494] system disk:vda1
I0319 22:56:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:56:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:56:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:56:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:56:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:56:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:56:21.412798  543705 disk_info.go:125] begin check local disk info of client
I0319 22:56:21.415234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:56:21.415240  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e140 0xc00039e180]
E0319 22:56:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:56:23.409793  543705 memory.go:184] no items to output this cycle
I0319 22:56:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 22:56:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:56:33.409797  543705 memory.go:184] no items to output this cycle
I0319 22:56:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 22:56:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:56:43.409923  543705 memory.go:191] Add success.
I0319 22:56:43.409924  543705 cpu.go:282] Add success.
I0319 22:56:43.419781  543705 net.go:648] Add success.
I0319 22:56:43.422734  543705 net.go:770] primary dev: ETH0
I0319 22:56:43.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:56:43.422762  543705 net.go:698] Add success.
I0319 22:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:56:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:56:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:56:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:56:53.409783  543705 memory.go:184] no items to output this cycle
I0319 22:56:53.409787  543705 cpu.go:275] no items to output this cycle
E0319 22:57:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:57:03.409808  543705 memory.go:184] no items to output this cycle
I0319 22:57:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 22:57:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:57:13.409787  543705 memory.go:191] Add success.
I0319 22:57:13.409792  543705 cpu.go:282] Add success.
W0319 22:57:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:57:13.420103  543705 net.go:648] Add success.
I0319 22:57:13.423202  543705 net.go:770] primary dev: ETH0
I0319 22:57:13.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:57:13.423227  543705 net.go:698] Add success.
I0319 22:57:13.429816  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 22:57:13.452983  543705 event_worker.go:152] Polling the log file for events...
I0319 22:57:13.468069  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f4b30bc4-b04b-4379-a27a-1e9b6876f751","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 22:57:13.468105  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 22:57:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:57:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 22:57:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0319 22:57:14.456800  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 22:57:14.456808  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 22:57:14.456814  543705 custom_config.go:64] query custom config with name: gpu
I0319 22:57:14.456842  543705 disk_worker.go:494] system disk:vda1
I0319 22:57:14.456868  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 22:57:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 22:57:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:57:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 22:57:16.457970  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 22:57:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:57:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:57:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:57:21.415795  543705 disk_info.go:125] begin check local disk info of client
I0319 22:57:21.418285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:57:21.418291  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265300 0xc000265340]
E0319 22:57:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:57:23.409877  543705 cpu.go:275] no items to output this cycle
I0319 22:57:23.409888  543705 memory.go:184] no items to output this cycle
E0319 22:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:57:33.409780  543705 memory.go:184] no items to output this cycle
I0319 22:57:33.409803  543705 cpu.go:275] no items to output this cycle
I0319 22:57:37.937748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 22:57:37.937756  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 22:57:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:57:43.410592  543705 memory.go:191] Add success.
I0319 22:57:43.409831  543705 cpu.go:282] Add success.
I0319 22:57:43.420351  543705 net.go:648] Add success.
I0319 22:57:43.423054  543705 net.go:770] primary dev: ETH0
I0319 22:57:43.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:57:43.423080  543705 net.go:698] Add success.
I0319 22:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:57:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:57:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:57:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:57:53.409793  543705 memory.go:184] no items to output this cycle
I0319 22:57:53.409806  543705 cpu.go:275] no items to output this cycle
E0319 22:58:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:58:03.409782  543705 memory.go:184] no items to output this cycle
I0319 22:58:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 22:58:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:58:13.409798  543705 cpu.go:282] Add success.
I0319 22:58:13.409806  543705 memory.go:191] Add success.
W0319 22:58:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:58:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:58:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:58:13.420076  543705 net.go:648] Add success.
I0319 22:58:13.422924  543705 net.go:770] primary dev: ETH0
I0319 22:58:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:58:13.422953  543705 net.go:698] Add success.
I0319 22:58:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:58:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:58:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 22:58:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:58:14.456584  543705 disk_worker.go:494] system disk:vda1
I0319 22:58:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:58:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:58:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:58:16.472469  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:58:21.418804  543705 disk_info.go:125] begin check local disk info of client
I0319 22:58:21.421270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:58:21.421276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2340 0xc0002a2380]
E0319 22:58:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:58:23.409775  543705 memory.go:184] no items to output this cycle
I0319 22:58:23.409778  543705 cpu.go:275] no items to output this cycle
E0319 22:58:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:58:33.409773  543705 memory.go:184] no items to output this cycle
I0319 22:58:33.409791  543705 cpu.go:275] no items to output this cycle
E0319 22:58:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:58:43.409802  543705 memory.go:191] Add success.
I0319 22:58:43.409805  543705 cpu.go:282] Add success.
I0319 22:58:43.419879  543705 net.go:648] Add success.
I0319 22:58:43.422680  543705 net.go:770] primary dev: ETH0
I0319 22:58:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:58:43.422707  543705 net.go:698] Add success.
I0319 22:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:58:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:58:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:58:53.410399  543705 memory.go:184] no items to output this cycle
I0319 22:58:53.410411  543705 cpu.go:275] no items to output this cycle
E0319 22:59:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:59:03.409809  543705 memory.go:184] no items to output this cycle
I0319 22:59:03.409828  543705 cpu.go:275] no items to output this cycle
E0319 22:59:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:59:13.409781  543705 memory.go:191] Add success.
I0319 22:59:13.409800  543705 cpu.go:282] Add success.
W0319 22:59:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 22:59:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 22:59:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 22:59:13.420113  543705 net.go:648] Add success.
I0319 22:59:13.423276  543705 net.go:770] primary dev: ETH0
I0319 22:59:13.423291  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:59:13.423302  543705 net.go:698] Add success.
I0319 22:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0319 22:59:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 22:59:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0319 22:59:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0319 22:59:14.456486  543705 disk_worker.go:494] system disk:vda1
I0319 22:59:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 22:59:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 22:59:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:59:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:59:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 22:59:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0319 22:59:21.421809  543705 disk_info.go:125] begin check local disk info of client
I0319 22:59:21.424234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 22:59:21.424241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0240 0xc0003b0280]
E0319 22:59:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:59:23.409764  543705 memory.go:184] no items to output this cycle
I0319 22:59:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 22:59:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:59:33.409807  543705 memory.go:184] no items to output this cycle
I0319 22:59:33.409821  543705 cpu.go:275] no items to output this cycle
E0319 22:59:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:59:43.409789  543705 memory.go:191] Add success.
I0319 22:59:43.409815  543705 cpu.go:282] Add success.
I0319 22:59:43.420414  543705 net.go:648] Add success.
I0319 22:59:43.423557  543705 net.go:770] primary dev: ETH0
I0319 22:59:43.423571  543705 net.go:802] Send network stats successfully!,count is 6
I0319 22:59:43.423584  543705 net.go:698] Add success.
I0319 22:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 22:59:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 22:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 22:59:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 22:59:53.409793  543705 memory.go:184] no items to output this cycle
I0319 22:59:53.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:00:03.409782  543705 memory.go:184] no items to output this cycle
I0319 23:00:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 23:00:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:00:13.409791  543705 memory.go:191] Add success.
I0319 23:00:13.409794  543705 cpu.go:282] Add success.
W0319 23:00:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:00:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:00:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:00:13.420079  543705 net.go:648] Add success.
I0319 23:00:13.422774  543705 net.go:770] primary dev: ETH0
I0319 23:00:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:00:13.422801  543705 net.go:698] Add success.
I0319 23:00:13.468952  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cbd76e31-bcf3-42bf-8479-5018f0e6554d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:00:13.468993  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:00:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:00:14.455386  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:00:14.455396  543705 disk_worker.go:708] disk space is not compliant
W0319 23:00:14.455399  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:00:14.457547  543705 disk_worker.go:494] system disk:vda1
I0319 23:00:14.457575  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:00:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:00:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:00:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:00:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:00:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:00:21.424811  543705 disk_info.go:125] begin check local disk info of client
I0319 23:00:21.427209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:00:21.427215  543705 disk_info.go:196] parse disk info done, disk is : [0xc000365340 0xc000365380]
E0319 23:00:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:00:23.409791  543705 memory.go:184] no items to output this cycle
I0319 23:00:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:00:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:00:33.409776  543705 memory.go:184] no items to output this cycle
I0319 23:00:33.409777  543705 cpu.go:275] no items to output this cycle
I0319 23:00:37.938019  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:00:37.938027  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:00:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:00:43.410596  543705 memory.go:191] Add success.
I0319 23:00:43.409809  543705 cpu.go:282] Add success.
I0319 23:00:43.420290  543705 net.go:648] Add success.
I0319 23:00:43.422635  543705 net.go:770] primary dev: ETH0
I0319 23:00:43.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:00:43.422664  543705 net.go:698] Add success.
I0319 23:00:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:00:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:00:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:00:53.409783  543705 memory.go:184] no items to output this cycle
I0319 23:00:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 23:01:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:01:03.409813  543705 memory.go:184] no items to output this cycle
I0319 23:01:03.409818  543705 cpu.go:275] no items to output this cycle
E0319 23:01:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:01:13.409831  543705 memory.go:191] Add success.
I0319 23:01:13.409834  543705 cpu.go:282] Add success.
W0319 23:01:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:01:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:01:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:01:13.420325  543705 net.go:648] Add success.
I0319 23:01:13.422999  543705 net.go:770] primary dev: ETH0
I0319 23:01:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:01:13.423024  543705 net.go:698] Add success.
I0319 23:01:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:01:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:01:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 23:01:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:01:14.456566  543705 disk_worker.go:494] system disk:vda1
I0319 23:01:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:01:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:01:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:01:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:01:21.427824  543705 disk_info.go:125] begin check local disk info of client
I0319 23:01:21.430271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:01:21.430277  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364600 0xc000364640]
E0319 23:01:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:01:23.409798  543705 memory.go:184] no items to output this cycle
I0319 23:01:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:01:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:01:33.409770  543705 memory.go:184] no items to output this cycle
I0319 23:01:33.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:01:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:01:43.409825  543705 memory.go:191] Add success.
I0319 23:01:43.409829  543705 cpu.go:282] Add success.
I0319 23:01:43.420099  543705 net.go:648] Add success.
I0319 23:01:43.423276  543705 net.go:770] primary dev: ETH0
I0319 23:01:43.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:01:43.423301  543705 net.go:698] Add success.
I0319 23:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:01:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:01:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:01:53.409801  543705 memory.go:184] no items to output this cycle
I0319 23:01:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 23:02:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:02:03.409771  543705 memory.go:184] no items to output this cycle
I0319 23:02:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 23:02:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:02:13.409796  543705 memory.go:191] Add success.
I0319 23:02:13.409799  543705 cpu.go:282] Add success.
W0319 23:02:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:02:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:02:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:02:13.419739  543705 net.go:648] Add success.
I0319 23:02:13.422460  543705 net.go:770] primary dev: ETH0
I0319 23:02:13.422475  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:02:13.422488  543705 net.go:698] Add success.
W0319 23:02:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:02:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0319 23:02:14.455148  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:02:14.456895  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:02:14.456904  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:02:14.456910  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:02:14.456981  543705 disk_worker.go:494] system disk:vda1
I0319 23:02:14.457021  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:02:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:02:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
E0319 23:02:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:02:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:02:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:02:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:02:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:02:21.430844  543705 disk_info.go:125] begin check local disk info of client
I0319 23:02:21.433288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:02:21.433295  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a600 0xc00027a640]
E0319 23:02:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:02:23.409757  543705 memory.go:184] no items to output this cycle
I0319 23:02:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 23:02:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:02:33.409795  543705 memory.go:184] no items to output this cycle
I0319 23:02:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 23:02:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:02:43.409823  543705 memory.go:191] Add success.
I0319 23:02:43.409826  543705 cpu.go:282] Add success.
I0319 23:02:43.420005  543705 net.go:648] Add success.
I0319 23:02:43.422700  543705 net.go:770] primary dev: ETH0
I0319 23:02:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:02:43.422725  543705 net.go:698] Add success.
I0319 23:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:02:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:02:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:02:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:02:53.409785  543705 memory.go:184] no items to output this cycle
I0319 23:02:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 23:03:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:03:03.409806  543705 memory.go:184] no items to output this cycle
I0319 23:03:03.409822  543705 cpu.go:275] no items to output this cycle
E0319 23:03:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:03:13.409774  543705 memory.go:191] Add success.
W0319 23:03:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 23:03:13.409807  543705 cpu.go:282] Add success.
W0319 23:03:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:03:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:03:13.419726  543705 net.go:648] Add success.
I0319 23:03:13.422501  543705 net.go:770] primary dev: ETH0
I0319 23:03:13.422513  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:03:13.422524  543705 net.go:698] Add success.
I0319 23:03:13.468668  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"765c46a8-3699-4c1e-a89d-3013c6db3250","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:03:13.468698  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:03:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:03:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 23:03:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:03:14.456689  543705 disk_worker.go:494] system disk:vda1
I0319 23:03:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:03:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:03:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:03:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:03:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:03:21.433868  543705 disk_info.go:125] begin check local disk info of client
I0319 23:03:21.436476  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:03:21.436482  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bf900 0xc0004bf940]
E0319 23:03:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:03:23.409790  543705 memory.go:184] no items to output this cycle
I0319 23:03:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:03:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:03:33.409772  543705 memory.go:184] no items to output this cycle
I0319 23:03:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 23:03:37.940415  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:03:37.940423  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:03:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:03:43.410660  543705 memory.go:191] Add success.
I0319 23:03:43.409805  543705 cpu.go:282] Add success.
I0319 23:03:43.420388  543705 net.go:648] Add success.
I0319 23:03:43.423146  543705 net.go:770] primary dev: ETH0
I0319 23:03:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:03:43.423177  543705 net.go:698] Add success.
I0319 23:03:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:03:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:03:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:03:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:03:53.409773  543705 memory.go:184] no items to output this cycle
I0319 23:03:53.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:04:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:04:03.409806  543705 memory.go:184] no items to output this cycle
I0319 23:04:03.409815  543705 cpu.go:275] no items to output this cycle
E0319 23:04:13.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:04:13.409928  543705 memory.go:191] Add success.
I0319 23:04:13.410012  543705 cpu.go:282] Add success.
W0319 23:04:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:04:13.410160  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:04:13.410171  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:04:13.419714  543705 net.go:648] Add success.
I0319 23:04:13.422333  543705 net.go:770] primary dev: ETH0
I0319 23:04:13.422349  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:04:13.422362  543705 net.go:698] Add success.
I0319 23:04:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:04:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:04:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 23:04:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:04:14.456576  543705 disk_worker.go:494] system disk:vda1
I0319 23:04:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:04:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:04:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:04:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:04:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:04:16.472486  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:04:21.436878  543705 disk_info.go:125] begin check local disk info of client
I0319 23:04:21.439300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:04:21.439306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004988c0 0xc000498900]
E0319 23:04:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:04:23.409788  543705 memory.go:184] no items to output this cycle
I0319 23:04:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 23:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:04:33.409784  543705 memory.go:184] no items to output this cycle
I0319 23:04:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 23:04:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:04:43.409800  543705 memory.go:191] Add success.
I0319 23:04:43.409799  543705 cpu.go:282] Add success.
I0319 23:04:43.419876  543705 net.go:648] Add success.
I0319 23:04:43.422523  543705 net.go:770] primary dev: ETH0
I0319 23:04:43.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:04:43.422554  543705 net.go:698] Add success.
I0319 23:04:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:04:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:04:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:04:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:04:53.409780  543705 memory.go:184] no items to output this cycle
I0319 23:04:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 23:05:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:05:03.409806  543705 memory.go:184] no items to output this cycle
I0319 23:05:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 23:05:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:05:13.409811  543705 memory.go:191] Add success.
I0319 23:05:13.409820  543705 cpu.go:282] Add success.
W0319 23:05:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:05:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:05:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:05:13.420303  543705 net.go:648] Add success.
I0319 23:05:13.423121  543705 net.go:770] primary dev: ETH0
I0319 23:05:13.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:05:13.423150  543705 net.go:698] Add success.
I0319 23:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:05:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:05:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 23:05:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:05:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 23:05:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:05:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:05:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:05:16.472448  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:05:21.439892  543705 disk_info.go:125] begin check local disk info of client
I0319 23:05:21.442320  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:05:21.442326  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c41c0 0xc0000c4200]
E0319 23:05:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:05:23.409789  543705 memory.go:184] no items to output this cycle
I0319 23:05:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 23:05:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:05:33.409799  543705 memory.go:184] no items to output this cycle
I0319 23:05:33.409813  543705 cpu.go:275] no items to output this cycle
E0319 23:05:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:05:43.409823  543705 memory.go:191] Add success.
I0319 23:05:43.409827  543705 cpu.go:282] Add success.
I0319 23:05:43.419978  543705 net.go:648] Add success.
I0319 23:05:43.422501  543705 net.go:770] primary dev: ETH0
I0319 23:05:43.422514  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:05:43.422526  543705 net.go:698] Add success.
I0319 23:05:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:05:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:05:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:05:53.409768  543705 memory.go:184] no items to output this cycle
I0319 23:05:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 23:06:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:06:03.409775  543705 memory.go:184] no items to output this cycle
I0319 23:06:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 23:06:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:06:13.409823  543705 memory.go:191] Add success.
I0319 23:06:13.409832  543705 cpu.go:282] Add success.
W0319 23:06:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:06:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:06:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:06:13.420141  543705 net.go:648] Add success.
I0319 23:06:13.422791  543705 net.go:770] primary dev: ETH0
I0319 23:06:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:06:13.422815  543705 net.go:698] Add success.
I0319 23:06:13.534743  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9711a74f-25b2-4be5-991d-0a9f1b37960f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:06:13.534776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:06:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:06:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0319 23:06:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:06:14.456732  543705 disk_worker.go:494] system disk:vda1
I0319 23:06:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:06:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:06:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:06:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:06:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:06:21.442907  543705 disk_info.go:125] begin check local disk info of client
I0319 23:06:21.445344  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:06:21.445350  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ebdc0 0xc0004ebe00]
E0319 23:06:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:06:23.409787  543705 memory.go:184] no items to output this cycle
I0319 23:06:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:06:33.409796  543705 memory.go:184] no items to output this cycle
I0319 23:06:33.409808  543705 cpu.go:275] no items to output this cycle
I0319 23:06:37.941759  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:06:37.941768  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:06:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:06:43.410792  543705 memory.go:191] Add success.
I0319 23:06:43.409805  543705 cpu.go:282] Add success.
I0319 23:06:43.420569  543705 net.go:648] Add success.
I0319 23:06:43.423613  543705 net.go:770] primary dev: ETH0
I0319 23:06:43.423627  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:06:43.423654  543705 net.go:698] Add success.
I0319 23:06:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:06:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:06:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:06:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:06:53.409791  543705 memory.go:184] no items to output this cycle
I0319 23:06:53.409809  543705 cpu.go:275] no items to output this cycle
E0319 23:07:03.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:07:03.409922  543705 memory.go:184] no items to output this cycle
I0319 23:07:03.409928  543705 cpu.go:275] no items to output this cycle
E0319 23:07:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:07:13.409822  543705 memory.go:191] Add success.
I0319 23:07:13.409837  543705 cpu.go:282] Add success.
W0319 23:07:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:07:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:07:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:07:13.420194  543705 net.go:648] Add success.
I0319 23:07:13.422869  543705 net.go:770] primary dev: ETH0
I0319 23:07:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:07:13.422894  543705 net.go:698] Add success.
I0319 23:07:13.453565  543705 event_worker.go:152] Polling the log file for events...
W0319 23:07:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:07:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 23:07:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:07:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:07:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:07:14.455914  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:07:14.456545  543705 disk_worker.go:494] system disk:vda1
I0319 23:07:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:07:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:07:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:07:16.457899  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:07:16.457897  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:07:16.457954  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:07:16.457974  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:07:16.472353  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:07:21.445913  543705 disk_info.go:125] begin check local disk info of client
I0319 23:07:21.448322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:07:21.448329  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ebbc0 0xc0004ebc00]
E0319 23:07:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:07:23.409796  543705 memory.go:184] no items to output this cycle
I0319 23:07:23.409811  543705 cpu.go:275] no items to output this cycle
E0319 23:07:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:07:33.409814  543705 memory.go:184] no items to output this cycle
I0319 23:07:33.409827  543705 cpu.go:275] no items to output this cycle
E0319 23:07:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:07:43.409803  543705 memory.go:191] Add success.
I0319 23:07:43.409822  543705 cpu.go:282] Add success.
I0319 23:07:43.420008  543705 net.go:648] Add success.
I0319 23:07:43.422684  543705 net.go:770] primary dev: ETH0
I0319 23:07:43.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:07:43.422711  543705 net.go:698] Add success.
I0319 23:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:07:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:07:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:07:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:07:53.409787  543705 memory.go:184] no items to output this cycle
I0319 23:07:53.409816  543705 cpu.go:275] no items to output this cycle
E0319 23:08:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:08:03.409818  543705 memory.go:184] no items to output this cycle
I0319 23:08:03.409832  543705 cpu.go:275] no items to output this cycle
E0319 23:08:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:08:13.409789  543705 memory.go:191] Add success.
I0319 23:08:13.409791  543705 cpu.go:282] Add success.
W0319 23:08:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:08:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:08:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:08:13.420184  543705 net.go:648] Add success.
I0319 23:08:13.423090  543705 net.go:770] primary dev: ETH0
I0319 23:08:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:08:13.423116  543705 net.go:698] Add success.
I0319 23:08:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:08:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:08:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0319 23:08:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:08:14.456484  543705 disk_worker.go:494] system disk:vda1
I0319 23:08:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:08:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:08:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:08:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:08:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:08:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:08:21.448936  543705 disk_info.go:125] begin check local disk info of client
I0319 23:08:21.451380  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:08:21.451386  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329040 0xc000329080]
E0319 23:08:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:08:23.409795  543705 memory.go:184] no items to output this cycle
I0319 23:08:23.409809  543705 cpu.go:275] no items to output this cycle
E0319 23:08:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:08:33.409805  543705 memory.go:184] no items to output this cycle
I0319 23:08:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:08:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:08:43.409808  543705 memory.go:191] Add success.
I0319 23:08:43.409808  543705 cpu.go:282] Add success.
I0319 23:08:43.419963  543705 net.go:648] Add success.
I0319 23:08:43.422604  543705 net.go:770] primary dev: ETH0
I0319 23:08:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:08:43.422630  543705 net.go:698] Add success.
I0319 23:08:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:08:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:08:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:08:53.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:08:53.410257  543705 memory.go:184] no items to output this cycle
I0319 23:08:53.410262  543705 cpu.go:275] no items to output this cycle
E0319 23:09:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:09:03.409787  543705 memory.go:184] no items to output this cycle
I0319 23:09:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 23:09:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:09:13.409794  543705 memory.go:191] Add success.
I0319 23:09:13.409795  543705 cpu.go:282] Add success.
W0319 23:09:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:09:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:09:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:09:13.420133  543705 net.go:648] Add success.
I0319 23:09:13.422929  543705 net.go:770] primary dev: ETH0
I0319 23:09:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:09:13.422953  543705 net.go:698] Add success.
I0319 23:09:13.469429  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5be7790f-f9ae-4a03-89e1-e9172625425f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:09:13.469461  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:09:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:09:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:09:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0319 23:09:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:09:14.456666  543705 disk_worker.go:494] system disk:vda1
I0319 23:09:14.456696  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:09:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:09:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:09:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:09:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:09:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:09:21.451948  543705 disk_info.go:125] begin check local disk info of client
I0319 23:09:21.454392  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:09:21.454398  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034ce40 0xc00034ce80]
E0319 23:09:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:09:23.409772  543705 memory.go:184] no items to output this cycle
I0319 23:09:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 23:09:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:09:33.409767  543705 memory.go:184] no items to output this cycle
I0319 23:09:33.409797  543705 cpu.go:275] no items to output this cycle
I0319 23:09:37.944433  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:09:37.944443  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:09:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:09:43.410992  543705 memory.go:191] Add success.
I0319 23:09:43.409831  543705 cpu.go:282] Add success.
I0319 23:09:43.420703  543705 net.go:648] Add success.
I0319 23:09:43.423603  543705 net.go:770] primary dev: ETH0
I0319 23:09:43.423615  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:09:43.423628  543705 net.go:698] Add success.
I0319 23:09:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:09:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:09:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:09:53.410223  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:09:53.410241  543705 memory.go:184] no items to output this cycle
I0319 23:09:53.410252  543705 cpu.go:275] no items to output this cycle
E0319 23:10:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:10:03.409799  543705 memory.go:184] no items to output this cycle
I0319 23:10:03.409813  543705 cpu.go:275] no items to output this cycle
E0319 23:10:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:10:13.409795  543705 memory.go:191] Add success.
I0319 23:10:13.409796  543705 cpu.go:282] Add success.
W0319 23:10:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:10:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:10:13.420150  543705 net.go:648] Add success.
I0319 23:10:13.423035  543705 net.go:770] primary dev: ETH0
I0319 23:10:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:10:13.423065  543705 net.go:698] Add success.
I0319 23:10:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:10:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:10:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 23:10:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:10:14.456597  543705 disk_worker.go:494] system disk:vda1
I0319 23:10:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:10:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:10:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:10:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:10:16.472449  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:10:21.454965  543705 disk_info.go:125] begin check local disk info of client
I0319 23:10:21.457342  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:10:21.457348  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328900 0xc000328940]
E0319 23:10:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:10:23.409788  543705 memory.go:184] no items to output this cycle
I0319 23:10:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:10:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:10:33.409779  543705 memory.go:184] no items to output this cycle
I0319 23:10:33.409782  543705 cpu.go:275] no items to output this cycle
E0319 23:10:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:10:43.409802  543705 memory.go:191] Add success.
I0319 23:10:43.409804  543705 cpu.go:282] Add success.
I0319 23:10:43.420032  543705 net.go:648] Add success.
I0319 23:10:43.423280  543705 net.go:770] primary dev: ETH0
I0319 23:10:43.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:10:43.423309  543705 net.go:698] Add success.
I0319 23:10:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:10:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:10:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:10:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:10:53.409797  543705 memory.go:184] no items to output this cycle
I0319 23:10:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 23:11:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:11:03.409779  543705 memory.go:184] no items to output this cycle
I0319 23:11:03.409799  543705 cpu.go:275] no items to output this cycle
E0319 23:11:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:11:13.409797  543705 memory.go:191] Add success.
I0319 23:11:13.409800  543705 cpu.go:282] Add success.
W0319 23:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:11:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:11:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:11:13.420243  543705 net.go:648] Add success.
I0319 23:11:13.423023  543705 net.go:770] primary dev: ETH0
I0319 23:11:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:11:13.423052  543705 net.go:698] Add success.
I0319 23:11:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:11:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:11:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 23:11:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:11:14.456570  543705 disk_worker.go:494] system disk:vda1
I0319 23:11:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:11:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:11:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:11:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:11:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:11:21.457981  543705 disk_info.go:125] begin check local disk info of client
I0319 23:11:21.460464  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:11:21.460471  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328c00 0xc000328c40]
E0319 23:11:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:11:23.409762  543705 memory.go:184] no items to output this cycle
I0319 23:11:23.409796  543705 cpu.go:275] no items to output this cycle
E0319 23:11:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:11:33.409781  543705 memory.go:184] no items to output this cycle
I0319 23:11:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 23:11:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:11:43.409799  543705 memory.go:191] Add success.
I0319 23:11:43.409802  543705 cpu.go:282] Add success.
I0319 23:11:43.419864  543705 net.go:648] Add success.
I0319 23:11:43.423017  543705 net.go:770] primary dev: ETH0
I0319 23:11:43.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:11:43.423043  543705 net.go:698] Add success.
I0319 23:11:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:11:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:11:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:11:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:11:53.409774  543705 memory.go:184] no items to output this cycle
I0319 23:11:53.409778  543705 cpu.go:275] no items to output this cycle
E0319 23:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:12:03.409776  543705 memory.go:184] no items to output this cycle
I0319 23:12:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 23:12:13.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:12:13.409905  543705 memory.go:191] Add success.
W0319 23:12:13.409937  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:12:13.409950  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:12:13.409956  543705 cpu.go:282] Add success.
I0319 23:12:13.409960  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:12:13.419708  543705 net.go:648] Add success.
I0319 23:12:13.422323  543705 net.go:770] primary dev: ETH0
I0319 23:12:13.422336  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:12:13.422347  543705 net.go:698] Add success.
I0319 23:12:13.463250  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"278fcee4-44ed-4d63-8740-3a62268e2d7b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:12:13.463281  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 23:12:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:12:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0319 23:12:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:12:14.456988  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:12:14.456997  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:12:14.457002  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:12:14.457023  543705 disk_worker.go:494] system disk:vda1
I0319 23:12:14.457061  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:12:15.456497  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:12:15.456505  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:12:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:12:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:12:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:12:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:12:16.472345  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:12:21.460987  543705 disk_info.go:125] begin check local disk info of client
I0319 23:12:21.463454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:12:21.463462  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac40 0xc0001aac80]
E0319 23:12:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:12:23.409758  543705 memory.go:184] no items to output this cycle
I0319 23:12:23.409788  543705 cpu.go:275] no items to output this cycle
E0319 23:12:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:12:33.409763  543705 memory.go:184] no items to output this cycle
I0319 23:12:33.409786  543705 cpu.go:275] no items to output this cycle
I0319 23:12:37.945736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:12:37.945743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:12:43.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:12:43.410734  543705 memory.go:191] Add success.
I0319 23:12:43.409899  543705 cpu.go:282] Add success.
I0319 23:12:43.420437  543705 net.go:648] Add success.
I0319 23:12:43.423147  543705 net.go:770] primary dev: ETH0
I0319 23:12:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:12:43.423174  543705 net.go:698] Add success.
I0319 23:12:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:12:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:12:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:12:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:12:53.409778  543705 memory.go:184] no items to output this cycle
I0319 23:12:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 23:13:03.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:13:03.409870  543705 memory.go:184] no items to output this cycle
I0319 23:13:03.409940  543705 cpu.go:275] no items to output this cycle
E0319 23:13:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:13:13.409813  543705 memory.go:191] Add success.
I0319 23:13:13.409822  543705 cpu.go:282] Add success.
W0319 23:13:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:13:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:13:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:13:13.420165  543705 net.go:648] Add success.
I0319 23:13:13.423121  543705 net.go:770] primary dev: ETH0
I0319 23:13:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:13:13.423146  543705 net.go:698] Add success.
I0319 23:13:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:13:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:13:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0319 23:13:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:13:14.456973  543705 disk_worker.go:494] system disk:vda1
I0319 23:13:14.457016  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:13:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:13:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:13:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:13:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:13:21.464096  543705 disk_info.go:125] begin check local disk info of client
I0319 23:13:21.466514  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:13:21.466522  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328400 0xc000328440]
E0319 23:13:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:13:23.409792  543705 memory.go:184] no items to output this cycle
I0319 23:13:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 23:13:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:13:33.409766  543705 memory.go:184] no items to output this cycle
I0319 23:13:33.409795  543705 cpu.go:275] no items to output this cycle
E0319 23:13:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:13:43.409817  543705 memory.go:191] Add success.
I0319 23:13:43.409826  543705 cpu.go:282] Add success.
I0319 23:13:43.419985  543705 net.go:648] Add success.
I0319 23:13:43.422524  543705 net.go:770] primary dev: ETH0
I0319 23:13:43.422538  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:13:43.422549  543705 net.go:698] Add success.
I0319 23:13:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:13:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:13:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:13:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:13:53.409778  543705 memory.go:184] no items to output this cycle
I0319 23:13:53.409777  543705 cpu.go:275] no items to output this cycle
E0319 23:14:03.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:14:03.409866  543705 memory.go:184] no items to output this cycle
I0319 23:14:03.410018  543705 cpu.go:275] no items to output this cycle
E0319 23:14:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:14:13.409793  543705 memory.go:191] Add success.
I0319 23:14:13.409798  543705 cpu.go:282] Add success.
W0319 23:14:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:14:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:14:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:14:13.420272  543705 net.go:648] Add success.
I0319 23:14:13.422734  543705 net.go:770] primary dev: ETH0
I0319 23:14:13.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:14:13.422759  543705 net.go:698] Add success.
I0319 23:14:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:14:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:14:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0319 23:14:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:14:14.456483  543705 disk_worker.go:494] system disk:vda1
I0319 23:14:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:14:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:14:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:14:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:14:21.467087  543705 disk_info.go:125] begin check local disk info of client
I0319 23:14:21.469471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:14:21.469477  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328340 0xc000328380]
E0319 23:14:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:14:23.409788  543705 memory.go:184] no items to output this cycle
I0319 23:14:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 23:14:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:14:33.409779  543705 memory.go:184] no items to output this cycle
I0319 23:14:33.409779  543705 cpu.go:275] no items to output this cycle
E0319 23:14:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:14:43.409804  543705 cpu.go:282] Add success.
I0319 23:14:43.409807  543705 memory.go:191] Add success.
I0319 23:14:43.419874  543705 net.go:648] Add success.
I0319 23:14:43.422623  543705 net.go:770] primary dev: ETH0
I0319 23:14:43.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:14:43.422649  543705 net.go:698] Add success.
I0319 23:14:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:14:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:14:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:14:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:14:53.409790  543705 memory.go:184] no items to output this cycle
I0319 23:14:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:15:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:15:03.409804  543705 memory.go:184] no items to output this cycle
I0319 23:15:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 23:15:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:15:13.409814  543705 memory.go:191] Add success.
I0319 23:15:13.409822  543705 cpu.go:282] Add success.
W0319 23:15:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:15:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:15:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:15:13.420120  543705 net.go:648] Add success.
I0319 23:15:13.422778  543705 net.go:770] primary dev: ETH0
I0319 23:15:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:15:13.422803  543705 net.go:698] Add success.
I0319 23:15:13.468959  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"030feaef-767f-4dd0-85a5-2e3b6f6e72a4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:15:13.468993  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:15:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:15:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:15:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0319 23:15:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:15:14.456720  543705 disk_worker.go:494] system disk:vda1
I0319 23:15:14.456755  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:15:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:15:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:15:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:15:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:15:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:15:21.470030  543705 disk_info.go:125] begin check local disk info of client
I0319 23:15:21.472457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:15:21.472462  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004755c0 0xc000475600]
E0319 23:15:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:15:23.409761  543705 memory.go:184] no items to output this cycle
I0319 23:15:23.409784  543705 cpu.go:275] no items to output this cycle
E0319 23:15:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:15:33.409774  543705 memory.go:184] no items to output this cycle
I0319 23:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0319 23:15:37.948442  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:15:37.948449  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:15:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:15:43.410672  543705 memory.go:191] Add success.
I0319 23:15:43.409832  543705 cpu.go:282] Add success.
I0319 23:15:43.420461  543705 net.go:648] Add success.
I0319 23:15:43.423117  543705 net.go:770] primary dev: ETH0
I0319 23:15:43.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:15:43.423146  543705 net.go:698] Add success.
I0319 23:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:15:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:15:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:15:53.409780  543705 cpu.go:275] no items to output this cycle
I0319 23:15:53.409782  543705 memory.go:184] no items to output this cycle
E0319 23:16:03.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:16:03.409903  543705 cpu.go:275] no items to output this cycle
I0319 23:16:03.409910  543705 memory.go:184] no items to output this cycle
E0319 23:16:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:16:13.409793  543705 memory.go:191] Add success.
I0319 23:16:13.409796  543705 cpu.go:282] Add success.
W0319 23:16:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:16:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:16:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:16:13.420092  543705 net.go:648] Add success.
I0319 23:16:13.423195  543705 net.go:770] primary dev: ETH0
I0319 23:16:13.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:16:13.423219  543705 net.go:698] Add success.
I0319 23:16:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:16:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:16:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0319 23:16:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:16:14.456500  543705 disk_worker.go:494] system disk:vda1
I0319 23:16:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:16:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:16:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:16:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:16:16.472427  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:16:21.473053  543705 disk_info.go:125] begin check local disk info of client
I0319 23:16:21.475506  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:16:21.475512  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329780 0xc0003297c0]
E0319 23:16:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:16:23.409806  543705 memory.go:184] no items to output this cycle
I0319 23:16:23.409821  543705 cpu.go:275] no items to output this cycle
E0319 23:16:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:16:33.409780  543705 memory.go:184] no items to output this cycle
I0319 23:16:33.409780  543705 cpu.go:275] no items to output this cycle
E0319 23:16:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:16:43.409813  543705 memory.go:191] Add success.
I0319 23:16:43.409819  543705 cpu.go:282] Add success.
I0319 23:16:43.419999  543705 net.go:648] Add success.
I0319 23:16:43.423786  543705 net.go:770] primary dev: ETH0
I0319 23:16:43.423798  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:16:43.423811  543705 net.go:698] Add success.
I0319 23:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:16:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:16:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:16:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:16:53.409782  543705 memory.go:184] no items to output this cycle
I0319 23:16:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 23:17:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:17:03.409788  543705 memory.go:184] no items to output this cycle
I0319 23:17:03.409799  543705 cpu.go:275] no items to output this cycle
I0319 23:17:13.409916  543705 cpu.go:282] Add success.
E0319 23:17:13.410011  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:17:13.410034  543705 memory.go:191] Add success.
W0319 23:17:13.410065  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:17:13.410079  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:17:13.410082  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:17:13.419716  543705 net.go:648] Add success.
I0319 23:17:13.422324  543705 net.go:770] primary dev: ETH0
I0319 23:17:13.422337  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:17:13.422348  543705 net.go:698] Add success.
I0319 23:17:13.452859  543705 event_worker.go:152] Polling the log file for events...
W0319 23:17:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:17:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 23:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:17:14.455905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:17:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:17:14.455919  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:17:14.456553  543705 disk_worker.go:494] system disk:vda1
I0319 23:17:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:17:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:17:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:17:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:17:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:17:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:17:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:17:16.472450  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:17:21.476120  543705 disk_info.go:125] begin check local disk info of client
I0319 23:17:21.478603  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:17:21.478609  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329a80 0xc000329ac0]
E0319 23:17:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:17:23.409770  543705 memory.go:184] no items to output this cycle
I0319 23:17:23.409775  543705 cpu.go:275] no items to output this cycle
E0319 23:17:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:17:33.409791  543705 memory.go:184] no items to output this cycle
I0319 23:17:33.409803  543705 cpu.go:275] no items to output this cycle
E0319 23:17:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:17:43.409781  543705 memory.go:191] Add success.
I0319 23:17:43.409818  543705 cpu.go:282] Add success.
I0319 23:17:43.419951  543705 net.go:648] Add success.
I0319 23:17:43.422712  543705 net.go:770] primary dev: ETH0
I0319 23:17:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:17:43.422739  543705 net.go:698] Add success.
I0319 23:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:17:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:17:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:17:53.409795  543705 memory.go:184] no items to output this cycle
I0319 23:17:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:18:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:18:03.409799  543705 memory.go:184] no items to output this cycle
I0319 23:18:03.409812  543705 cpu.go:275] no items to output this cycle
E0319 23:18:13.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:18:13.409927  543705 memory.go:191] Add success.
I0319 23:18:13.409935  543705 cpu.go:282] Add success.
W0319 23:18:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:18:13.409978  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:18:13.409981  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:18:13.419730  543705 net.go:648] Add success.
I0319 23:18:13.422723  543705 net.go:770] primary dev: ETH0
I0319 23:18:13.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:18:13.422761  543705 net.go:698] Add success.
I0319 23:18:13.468715  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2e91423c-e587-48d1-9938-5d6082c0be70","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:18:13.468748  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:18:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:18:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:18:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0319 23:18:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:18:14.456609  543705 disk_worker.go:494] system disk:vda1
I0319 23:18:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:18:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:18:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:18:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:18:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:18:21.479098  543705 disk_info.go:125] begin check local disk info of client
I0319 23:18:21.481520  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:18:21.481527  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2ac0 0xc0003e2b00]
E0319 23:18:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:18:23.409790  543705 memory.go:184] no items to output this cycle
I0319 23:18:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:18:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:18:33.409767  543705 memory.go:184] no items to output this cycle
I0319 23:18:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 23:18:37.949730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:18:37.949738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:18:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:18:43.410786  543705 memory.go:191] Add success.
I0319 23:18:43.409814  543705 cpu.go:282] Add success.
I0319 23:18:43.420524  543705 net.go:648] Add success.
I0319 23:18:43.423156  543705 net.go:770] primary dev: ETH0
I0319 23:18:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:18:43.423186  543705 net.go:698] Add success.
I0319 23:18:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:18:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:18:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:18:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:18:53.409767  543705 memory.go:184] no items to output this cycle
I0319 23:18:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 23:19:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:19:03.409918  543705 memory.go:184] no items to output this cycle
I0319 23:19:03.409934  543705 cpu.go:275] no items to output this cycle
E0319 23:19:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:19:13.409795  543705 memory.go:191] Add success.
I0319 23:19:13.409798  543705 cpu.go:282] Add success.
W0319 23:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:19:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:19:13.420347  543705 net.go:648] Add success.
I0319 23:19:13.423145  543705 net.go:770] primary dev: ETH0
I0319 23:19:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:19:13.423170  543705 net.go:698] Add success.
I0319 23:19:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:19:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:19:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0319 23:19:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:19:14.456597  543705 disk_worker.go:494] system disk:vda1
I0319 23:19:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:19:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:19:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:19:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:19:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:19:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:19:21.482106  543705 disk_info.go:125] begin check local disk info of client
I0319 23:19:21.484537  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:19:21.484543  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515840 0xc000515880]
E0319 23:19:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:19:23.409796  543705 memory.go:184] no items to output this cycle
I0319 23:19:23.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:19:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:19:33.409788  543705 memory.go:184] no items to output this cycle
I0319 23:19:33.409794  543705 cpu.go:275] no items to output this cycle
E0319 23:19:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:19:43.409802  543705 memory.go:191] Add success.
I0319 23:19:43.409803  543705 cpu.go:282] Add success.
I0319 23:19:43.419904  543705 net.go:648] Add success.
I0319 23:19:43.422896  543705 net.go:770] primary dev: ETH0
I0319 23:19:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:19:43.422924  543705 net.go:698] Add success.
I0319 23:19:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:19:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:19:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:19:53.409783  543705 memory.go:184] no items to output this cycle
I0319 23:19:53.409784  543705 cpu.go:275] no items to output this cycle
E0319 23:20:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:20:03.409903  543705 memory.go:184] no items to output this cycle
I0319 23:20:03.409902  543705 cpu.go:275] no items to output this cycle
E0319 23:20:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:20:13.409825  543705 memory.go:191] Add success.
I0319 23:20:13.409839  543705 cpu.go:282] Add success.
W0319 23:20:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:20:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:20:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:20:13.420133  543705 net.go:648] Add success.
I0319 23:20:13.422503  543705 net.go:770] primary dev: ETH0
I0319 23:20:13.422517  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:20:13.422530  543705 net.go:698] Add success.
I0319 23:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:20:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:20:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0319 23:20:14.455147  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:20:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 23:20:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:20:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:20:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:20:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:20:21.485116  543705 disk_info.go:125] begin check local disk info of client
I0319 23:20:21.487549  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:20:21.487554  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab7c0 0xc0001ab800]
E0319 23:20:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:20:23.409790  543705 memory.go:184] no items to output this cycle
I0319 23:20:23.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:20:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:20:33.409770  543705 memory.go:184] no items to output this cycle
I0319 23:20:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:20:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:20:43.409823  543705 memory.go:191] Add success.
I0319 23:20:43.409830  543705 cpu.go:282] Add success.
I0319 23:20:43.420048  543705 net.go:648] Add success.
I0319 23:20:43.422705  543705 net.go:770] primary dev: ETH0
I0319 23:20:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:20:43.422733  543705 net.go:698] Add success.
I0319 23:20:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:20:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:20:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:20:53.409783  543705 memory.go:184] no items to output this cycle
I0319 23:20:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 23:21:03.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:21:03.409879  543705 memory.go:184] no items to output this cycle
I0319 23:21:03.409892  543705 cpu.go:275] no items to output this cycle
E0319 23:21:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:21:13.409775  543705 memory.go:191] Add success.
W0319 23:21:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 23:21:13.409807  543705 cpu.go:282] Add success.
W0319 23:21:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:21:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:21:13.420355  543705 net.go:648] Add success.
I0319 23:21:13.423204  543705 net.go:770] primary dev: ETH0
I0319 23:21:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:21:13.423239  543705 net.go:698] Add success.
I0319 23:21:13.467604  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae02aa2c-897c-4881-9ae0-e84f88b7260a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:21:13.467638  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:21:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:21:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:21:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0319 23:21:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:21:14.456550  543705 disk_worker.go:494] system disk:vda1
I0319 23:21:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:21:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:21:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:21:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:21:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:21:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:21:21.488123  543705 disk_info.go:125] begin check local disk info of client
I0319 23:21:21.490661  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:21:21.490668  543705 disk_info.go:196] parse disk info done, disk is : [0xc000346280 0xc0003462c0]
E0319 23:21:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:21:23.409757  543705 memory.go:184] no items to output this cycle
I0319 23:21:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 23:21:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:21:33.409784  543705 memory.go:184] no items to output this cycle
I0319 23:21:33.409787  543705 cpu.go:275] no items to output this cycle
I0319 23:21:37.952451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:21:37.952458  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:21:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:21:43.410657  543705 memory.go:191] Add success.
I0319 23:21:43.409834  543705 cpu.go:282] Add success.
I0319 23:21:43.420333  543705 net.go:648] Add success.
I0319 23:21:43.423421  543705 net.go:770] primary dev: ETH0
I0319 23:21:43.423434  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:21:43.423452  543705 net.go:698] Add success.
I0319 23:21:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:21:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:21:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:21:53.409784  543705 memory.go:184] no items to output this cycle
I0319 23:21:53.409788  543705 cpu.go:275] no items to output this cycle
E0319 23:22:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:22:03.409798  543705 memory.go:184] no items to output this cycle
I0319 23:22:03.409811  543705 cpu.go:275] no items to output this cycle
E0319 23:22:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:22:13.409779  543705 memory.go:191] Add success.
W0319 23:22:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:22:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:22:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:22:13.409821  543705 cpu.go:282] Add success.
I0319 23:22:13.420118  543705 net.go:648] Add success.
I0319 23:22:13.423044  543705 net.go:770] primary dev: ETH0
I0319 23:22:13.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:22:13.423068  543705 net.go:698] Add success.
W0319 23:22:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:22:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0319 23:22:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:22:14.455858  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:22:14.455867  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:22:14.455873  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:22:14.456557  543705 disk_worker.go:494] system disk:vda1
I0319 23:22:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:22:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:22:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:22:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:22:16.457893  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:22:16.457948  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:22:16.457967  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:22:16.472278  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:22:21.491095  543705 disk_info.go:125] begin check local disk info of client
I0319 23:22:21.493455  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:22:21.493461  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
E0319 23:22:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:22:23.409787  543705 memory.go:184] no items to output this cycle
I0319 23:22:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:22:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:22:33.409786  543705 memory.go:184] no items to output this cycle
I0319 23:22:33.409792  543705 cpu.go:275] no items to output this cycle
E0319 23:22:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:22:43.409820  543705 memory.go:191] Add success.
I0319 23:22:43.409825  543705 cpu.go:282] Add success.
I0319 23:22:43.420139  543705 net.go:648] Add success.
I0319 23:22:43.422926  543705 net.go:770] primary dev: ETH0
I0319 23:22:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:22:43.422950  543705 net.go:698] Add success.
I0319 23:22:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:22:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:22:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:22:53.409772  543705 memory.go:184] no items to output this cycle
I0319 23:22:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:23:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:23:03.409782  543705 memory.go:184] no items to output this cycle
I0319 23:23:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:23:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:23:13.409815  543705 memory.go:191] Add success.
I0319 23:23:13.409820  543705 cpu.go:282] Add success.
W0319 23:23:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:23:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:23:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:23:13.420120  543705 net.go:648] Add success.
I0319 23:23:13.422907  543705 net.go:770] primary dev: ETH0
I0319 23:23:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:23:13.422946  543705 net.go:698] Add success.
I0319 23:23:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:23:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:23:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0319 23:23:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:23:14.456633  543705 disk_worker.go:494] system disk:vda1
I0319 23:23:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:23:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:23:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:23:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:23:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:23:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:23:21.494215  543705 disk_info.go:125] begin check local disk info of client
I0319 23:23:21.496622  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:23:21.496629  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd00 0xc0001abd40]
E0319 23:23:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:23:23.409803  543705 memory.go:184] no items to output this cycle
I0319 23:23:23.409817  543705 cpu.go:275] no items to output this cycle
E0319 23:23:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:23:33.409799  543705 cpu.go:275] no items to output this cycle
I0319 23:23:33.409809  543705 memory.go:184] no items to output this cycle
E0319 23:23:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:23:43.409832  543705 memory.go:191] Add success.
I0319 23:23:43.409837  543705 cpu.go:282] Add success.
I0319 23:23:43.420090  543705 net.go:648] Add success.
I0319 23:23:43.422903  543705 net.go:770] primary dev: ETH0
I0319 23:23:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:23:43.422928  543705 net.go:698] Add success.
I0319 23:23:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:23:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:23:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:23:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:23:53.409804  543705 memory.go:184] no items to output this cycle
I0319 23:23:53.409815  543705 cpu.go:275] no items to output this cycle
E0319 23:24:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:24:03.409787  543705 memory.go:184] no items to output this cycle
I0319 23:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:24:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:24:13.409823  543705 memory.go:191] Add success.
I0319 23:24:13.409834  543705 cpu.go:282] Add success.
W0319 23:24:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:24:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:24:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:24:13.420206  543705 net.go:648] Add success.
I0319 23:24:13.422699  543705 net.go:770] primary dev: ETH0
I0319 23:24:13.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:24:13.422729  543705 net.go:698] Add success.
I0319 23:24:13.463161  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea7c5371-b4a3-4a57-9053-b9023e4f39d5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:24:13.463194  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:24:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:24:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:24:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 23:24:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:24:14.456603  543705 disk_worker.go:494] system disk:vda1
I0319 23:24:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:24:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:24:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:24:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:24:21.497230  543705 disk_info.go:125] begin check local disk info of client
I0319 23:24:21.499637  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:24:21.499643  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314f00 0xc000314f40]
E0319 23:24:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:24:23.409785  543705 memory.go:184] no items to output this cycle
I0319 23:24:23.409805  543705 cpu.go:275] no items to output this cycle
E0319 23:24:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:24:33.409776  543705 memory.go:184] no items to output this cycle
I0319 23:24:33.409817  543705 cpu.go:275] no items to output this cycle
I0319 23:24:37.953730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:24:37.953737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:24:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:24:43.410650  543705 memory.go:191] Add success.
I0319 23:24:43.409810  543705 cpu.go:282] Add success.
I0319 23:24:43.419685  543705 net.go:648] Add success.
I0319 23:24:43.422419  543705 net.go:770] primary dev: ETH0
I0319 23:24:43.422432  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:24:43.422444  543705 net.go:698] Add success.
I0319 23:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:24:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:24:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:24:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:24:53.409770  543705 memory.go:184] no items to output this cycle
I0319 23:24:53.409799  543705 cpu.go:275] no items to output this cycle
E0319 23:25:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:25:03.409787  543705 memory.go:184] no items to output this cycle
I0319 23:25:03.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:25:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:25:13.409781  543705 memory.go:191] Add success.
W0319 23:25:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 23:25:13.409809  543705 cpu.go:282] Add success.
W0319 23:25:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:25:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:25:13.420126  543705 net.go:648] Add success.
I0319 23:25:13.422769  543705 net.go:770] primary dev: ETH0
I0319 23:25:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:25:13.422795  543705 net.go:698] Add success.
I0319 23:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:25:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:25:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0319 23:25:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:25:14.456594  543705 disk_worker.go:494] system disk:vda1
I0319 23:25:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:25:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:25:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:25:21.500247  543705 disk_info.go:125] begin check local disk info of client
I0319 23:25:21.502683  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:25:21.502688  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384300 0xc000384340]
E0319 23:25:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:25:23.409789  543705 memory.go:184] no items to output this cycle
I0319 23:25:23.409801  543705 cpu.go:275] no items to output this cycle
E0319 23:25:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:25:33.409806  543705 memory.go:184] no items to output this cycle
I0319 23:25:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 23:25:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:25:43.409921  543705 memory.go:191] Add success.
I0319 23:25:43.409960  543705 cpu.go:282] Add success.
I0319 23:25:43.419740  543705 net.go:648] Add success.
I0319 23:25:43.422462  543705 net.go:770] primary dev: ETH0
I0319 23:25:43.422475  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:25:43.422486  543705 net.go:698] Add success.
I0319 23:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:25:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:25:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:25:53.409763  543705 memory.go:184] no items to output this cycle
I0319 23:25:53.409797  543705 cpu.go:275] no items to output this cycle
E0319 23:26:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:26:03.409774  543705 memory.go:184] no items to output this cycle
I0319 23:26:03.409785  543705 cpu.go:275] no items to output this cycle
E0319 23:26:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:26:13.409793  543705 memory.go:191] Add success.
I0319 23:26:13.409794  543705 cpu.go:282] Add success.
W0319 23:26:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:26:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:26:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:26:13.420501  543705 net.go:648] Add success.
I0319 23:26:13.423437  543705 net.go:770] primary dev: ETH0
I0319 23:26:13.423451  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:26:13.423463  543705 net.go:698] Add success.
I0319 23:26:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:26:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:26:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0319 23:26:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:26:14.456511  543705 disk_worker.go:494] system disk:vda1
I0319 23:26:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:26:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:26:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:26:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:26:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:26:21.503266  543705 disk_info.go:125] begin check local disk info of client
I0319 23:26:21.505682  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:26:21.505687  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315740 0xc000315780]
E0319 23:26:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:26:23.409780  543705 memory.go:184] no items to output this cycle
I0319 23:26:23.409794  543705 cpu.go:275] no items to output this cycle
E0319 23:26:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:26:33.409783  543705 memory.go:184] no items to output this cycle
I0319 23:26:33.409789  543705 cpu.go:275] no items to output this cycle
E0319 23:26:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:26:43.409795  543705 memory.go:191] Add success.
I0319 23:26:43.409796  543705 cpu.go:282] Add success.
I0319 23:26:43.420069  543705 net.go:648] Add success.
I0319 23:26:43.422660  543705 net.go:770] primary dev: ETH0
I0319 23:26:43.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:26:43.422684  543705 net.go:698] Add success.
I0319 23:26:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:26:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:26:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:26:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:26:53.409796  543705 memory.go:184] no items to output this cycle
I0319 23:26:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:27:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:27:03.409786  543705 cpu.go:275] no items to output this cycle
I0319 23:27:03.409799  543705 memory.go:184] no items to output this cycle
E0319 23:27:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:27:13.409796  543705 memory.go:191] Add success.
I0319 23:27:13.409801  543705 cpu.go:282] Add success.
W0319 23:27:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:27:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:27:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:27:13.420120  543705 net.go:648] Add success.
I0319 23:27:13.423150  543705 net.go:770] primary dev: ETH0
I0319 23:27:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:27:13.423173  543705 net.go:698] Add success.
I0319 23:27:13.429508  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 23:27:13.453662  543705 event_worker.go:152] Polling the log file for events...
I0319 23:27:13.469158  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a8f304f4-594a-4131-b1bb-8fc46fe42e16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:27:13.469191  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 23:27:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:27:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0319 23:27:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:27:14.456135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:27:14.456143  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:27:14.456149  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:27:14.456411  543705 disk_worker.go:494] system disk:vda1
I0319 23:27:14.456441  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:27:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:27:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:27:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:27:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:27:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:27:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:27:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:27:21.506171  543705 disk_info.go:125] begin check local disk info of client
I0319 23:27:21.508563  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:27:21.508570  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329bc0 0xc000329c00]
E0319 23:27:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:27:23.409789  543705 memory.go:184] no items to output this cycle
I0319 23:27:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:27:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:27:33.409802  543705 memory.go:184] no items to output this cycle
I0319 23:27:33.409817  543705 cpu.go:275] no items to output this cycle
I0319 23:27:37.956470  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:27:37.956477  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:27:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:27:43.410651  543705 memory.go:191] Add success.
I0319 23:27:43.409810  543705 cpu.go:282] Add success.
I0319 23:27:43.420430  543705 net.go:648] Add success.
I0319 23:27:43.423351  543705 net.go:770] primary dev: ETH0
I0319 23:27:43.423364  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:27:43.423376  543705 net.go:698] Add success.
I0319 23:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:27:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:27:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:27:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:27:53.409801  543705 memory.go:184] no items to output this cycle
I0319 23:27:53.409812  543705 cpu.go:275] no items to output this cycle
E0319 23:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:28:03.409797  543705 memory.go:184] no items to output this cycle
I0319 23:28:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:28:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:28:13.409822  543705 memory.go:191] Add success.
I0319 23:28:13.409825  543705 cpu.go:282] Add success.
W0319 23:28:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:28:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:28:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:28:13.420173  543705 net.go:648] Add success.
I0319 23:28:13.422969  543705 net.go:770] primary dev: ETH0
I0319 23:28:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:28:13.422993  543705 net.go:698] Add success.
I0319 23:28:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:28:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:28:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0319 23:28:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:28:14.456558  543705 disk_worker.go:494] system disk:vda1
I0319 23:28:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:28:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:28:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:28:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:28:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:28:21.509293  543705 disk_info.go:125] begin check local disk info of client
I0319 23:28:21.511676  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:28:21.511683  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c46c0 0xc0000c4700]
E0319 23:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:28:23.409800  543705 memory.go:184] no items to output this cycle
I0319 23:28:23.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:28:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:28:33.409768  543705 memory.go:184] no items to output this cycle
I0319 23:28:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 23:28:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:28:43.409803  543705 memory.go:191] Add success.
I0319 23:28:43.409807  543705 cpu.go:282] Add success.
I0319 23:28:43.419950  543705 net.go:648] Add success.
I0319 23:28:43.422941  543705 net.go:770] primary dev: ETH0
I0319 23:28:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:28:43.422972  543705 net.go:698] Add success.
I0319 23:28:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:28:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:28:53.409922  543705 cpu.go:275] no items to output this cycle
E0319 23:28:53.409931  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:28:53.410059  543705 memory.go:184] no items to output this cycle
E0319 23:29:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:29:03.409788  543705 memory.go:184] no items to output this cycle
I0319 23:29:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:29:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:29:13.409780  543705 memory.go:191] Add success.
W0319 23:29:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 23:29:13.409808  543705 cpu.go:282] Add success.
W0319 23:29:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:29:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:29:13.420148  543705 net.go:648] Add success.
I0319 23:29:13.423817  543705 net.go:770] primary dev: ETH0
I0319 23:29:13.423830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:29:13.423842  543705 net.go:698] Add success.
I0319 23:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:29:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:29:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0319 23:29:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:29:14.456495  543705 disk_worker.go:494] system disk:vda1
I0319 23:29:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:29:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:29:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:29:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:29:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:29:21.511708  543705 disk_info.go:125] begin check local disk info of client
I0319 23:29:21.514302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:29:21.514316  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff580 0xc0003ff5c0]
E0319 23:29:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:29:23.409786  543705 memory.go:184] no items to output this cycle
I0319 23:29:23.409799  543705 cpu.go:275] no items to output this cycle
E0319 23:29:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:29:33.409778  543705 memory.go:184] no items to output this cycle
I0319 23:29:33.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:29:43.409802  543705 memory.go:191] Add success.
I0319 23:29:43.409805  543705 cpu.go:282] Add success.
I0319 23:29:43.419970  543705 net.go:648] Add success.
I0319 23:29:43.422941  543705 net.go:770] primary dev: ETH0
I0319 23:29:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:29:43.422967  543705 net.go:698] Add success.
I0319 23:29:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:29:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:29:53.409804  543705 memory.go:184] no items to output this cycle
I0319 23:29:53.409811  543705 cpu.go:275] no items to output this cycle
E0319 23:30:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:30:03.409766  543705 memory.go:184] no items to output this cycle
I0319 23:30:03.409796  543705 cpu.go:275] no items to output this cycle
E0319 23:30:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:30:13.409805  543705 memory.go:191] Add success.
I0319 23:30:13.409806  543705 cpu.go:282] Add success.
W0319 23:30:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:30:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:30:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:30:13.420368  543705 net.go:648] Add success.
I0319 23:30:13.422819  543705 net.go:770] primary dev: ETH0
I0319 23:30:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:30:13.422849  543705 net.go:698] Add success.
I0319 23:30:13.468944  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a612783a-87e5-497b-93af-16643dcfaa39","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:30:13.468977  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:30:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:30:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:30:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0319 23:30:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:30:14.456488  543705 disk_worker.go:494] system disk:vda1
I0319 23:30:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:30:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:30:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:30:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:30:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:30:21.515259  543705 disk_info.go:125] begin check local disk info of client
I0319 23:30:21.517693  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:30:21.517699  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329a40 0xc000329a80]
E0319 23:30:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:30:23.409805  543705 memory.go:184] no items to output this cycle
I0319 23:30:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 23:30:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:30:33.409781  543705 memory.go:184] no items to output this cycle
I0319 23:30:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 23:30:37.957732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:30:37.957738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:30:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:30:43.410700  543705 memory.go:191] Add success.
I0319 23:30:43.409797  543705 cpu.go:282] Add success.
I0319 23:30:43.420418  543705 net.go:648] Add success.
I0319 23:30:43.423003  543705 net.go:770] primary dev: ETH0
I0319 23:30:43.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:30:43.423031  543705 net.go:698] Add success.
I0319 23:30:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:30:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:30:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:30:53.409773  543705 memory.go:184] no items to output this cycle
I0319 23:30:53.409782  543705 cpu.go:275] no items to output this cycle
E0319 23:31:03.410286  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:31:03.410305  543705 memory.go:184] no items to output this cycle
I0319 23:31:03.410321  543705 cpu.go:275] no items to output this cycle
E0319 23:31:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:31:13.409795  543705 memory.go:191] Add success.
I0319 23:31:13.409800  543705 cpu.go:282] Add success.
W0319 23:31:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:31:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:31:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:31:13.420252  543705 net.go:648] Add success.
I0319 23:31:13.422970  543705 net.go:770] primary dev: ETH0
I0319 23:31:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:31:13.422995  543705 net.go:698] Add success.
I0319 23:31:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:31:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:31:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0319 23:31:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:31:14.456611  543705 disk_worker.go:494] system disk:vda1
I0319 23:31:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:31:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:31:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:31:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:31:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:31:21.518234  543705 disk_info.go:125] begin check local disk info of client
I0319 23:31:21.520673  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:31:21.520679  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004744c0 0xc000474500]
E0319 23:31:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:31:23.409804  543705 memory.go:184] no items to output this cycle
I0319 23:31:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 23:31:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:31:33.409803  543705 memory.go:184] no items to output this cycle
I0319 23:31:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 23:31:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:31:43.409791  543705 memory.go:191] Add success.
I0319 23:31:43.409817  543705 cpu.go:282] Add success.
I0319 23:31:43.419881  543705 net.go:648] Add success.
I0319 23:31:43.422742  543705 net.go:770] primary dev: ETH0
I0319 23:31:43.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:31:43.422767  543705 net.go:698] Add success.
I0319 23:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:31:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:31:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:31:53.409778  543705 memory.go:184] no items to output this cycle
I0319 23:31:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:32:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:32:03.409798  543705 memory.go:184] no items to output this cycle
I0319 23:32:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:32:13.409794  543705 memory.go:191] Add success.
I0319 23:32:13.409812  543705 cpu.go:282] Add success.
W0319 23:32:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:32:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:32:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:32:13.420072  543705 net.go:648] Add success.
I0319 23:32:13.422902  543705 net.go:770] primary dev: ETH0
I0319 23:32:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:32:13.422929  543705 net.go:698] Add success.
W0319 23:32:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:32:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0319 23:32:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:32:14.455851  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:32:14.455860  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:32:14.455866  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:32:14.456556  543705 disk_worker.go:494] system disk:vda1
I0319 23:32:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:32:15.456777  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:32:15.456786  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:32:16.457897  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:32:16.457897  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:32:16.457949  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:32:16.457968  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:32:16.472275  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:32:21.521258  543705 disk_info.go:125] begin check local disk info of client
I0319 23:32:21.523680  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:32:21.523687  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329380 0xc0003293c0]
E0319 23:32:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:32:23.409793  543705 memory.go:184] no items to output this cycle
I0319 23:32:23.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:32:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:32:33.409779  543705 memory.go:184] no items to output this cycle
I0319 23:32:33.409782  543705 cpu.go:275] no items to output this cycle
E0319 23:32:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:32:43.409806  543705 memory.go:191] Add success.
I0319 23:32:43.409808  543705 cpu.go:282] Add success.
I0319 23:32:43.419936  543705 net.go:648] Add success.
I0319 23:32:43.422576  543705 net.go:770] primary dev: ETH0
I0319 23:32:43.422590  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:32:43.422606  543705 net.go:698] Add success.
I0319 23:32:46.458500  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:32:46.458575  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:32:46.458596  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:32:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:32:53.409778  543705 memory.go:184] no items to output this cycle
I0319 23:32:53.409791  543705 cpu.go:275] no items to output this cycle
E0319 23:33:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:33:03.409818  543705 memory.go:184] no items to output this cycle
I0319 23:33:03.409830  543705 cpu.go:275] no items to output this cycle
E0319 23:33:13.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:33:13.409885  543705 memory.go:191] Add success.
W0319 23:33:13.409913  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:33:13.409949  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:33:13.409956  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:33:13.410001  543705 cpu.go:282] Add success.
I0319 23:33:13.419726  543705 net.go:648] Add success.
I0319 23:33:13.422532  543705 net.go:770] primary dev: ETH0
I0319 23:33:13.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:33:13.422556  543705 net.go:698] Add success.
I0319 23:33:13.469260  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d9c556af-2412-470d-844e-07ce1b5c4808","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:33:13.469291  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:33:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:33:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:33:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0319 23:33:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:33:14.456499  543705 disk_worker.go:494] system disk:vda1
I0319 23:33:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:33:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:33:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:33:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:33:16.472432  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:33:21.524274  543705 disk_info.go:125] begin check local disk info of client
I0319 23:33:21.526746  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:33:21.526753  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2740 0xc0003f2780]
E0319 23:33:23.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:33:23.409839  543705 memory.go:184] no items to output this cycle
I0319 23:33:23.409848  543705 cpu.go:275] no items to output this cycle
E0319 23:33:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:33:33.409808  543705 memory.go:184] no items to output this cycle
I0319 23:33:33.409820  543705 cpu.go:275] no items to output this cycle
I0319 23:33:37.960493  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:33:37.960499  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:33:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:33:43.410653  543705 memory.go:191] Add success.
I0319 23:33:43.409827  543705 cpu.go:282] Add success.
I0319 23:33:43.420360  543705 net.go:648] Add success.
I0319 23:33:43.422960  543705 net.go:770] primary dev: ETH0
I0319 23:33:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:33:43.422989  543705 net.go:698] Add success.
I0319 23:33:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:33:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:33:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:33:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:33:53.409788  543705 cpu.go:275] no items to output this cycle
I0319 23:33:53.409790  543705 memory.go:184] no items to output this cycle
E0319 23:34:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:34:03.409820  543705 memory.go:184] no items to output this cycle
I0319 23:34:03.409836  543705 cpu.go:275] no items to output this cycle
W0319 23:34:13.409705  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:34:13.409721  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:34:13.409725  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 23:34:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:34:13.409947  543705 cpu.go:282] Add success.
I0319 23:34:13.410039  543705 memory.go:191] Add success.
I0319 23:34:13.419736  543705 net.go:648] Add success.
I0319 23:34:13.423225  543705 net.go:770] primary dev: ETH0
I0319 23:34:13.423239  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:34:13.423251  543705 net.go:698] Add success.
I0319 23:34:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:34:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:34:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 23:34:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:34:14.456525  543705 disk_worker.go:494] system disk:vda1
I0319 23:34:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:34:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:34:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:34:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:34:21.527283  543705 disk_info.go:125] begin check local disk info of client
I0319 23:34:21.529755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:34:21.529761  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498540 0xc000498580]
E0319 23:34:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:34:23.409793  543705 memory.go:184] no items to output this cycle
I0319 23:34:23.409814  543705 cpu.go:275] no items to output this cycle
E0319 23:34:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:34:33.409770  543705 memory.go:184] no items to output this cycle
I0319 23:34:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:34:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:34:43.409789  543705 memory.go:191] Add success.
I0319 23:34:43.409809  543705 cpu.go:282] Add success.
I0319 23:34:43.419985  543705 net.go:648] Add success.
I0319 23:34:43.422569  543705 net.go:770] primary dev: ETH0
I0319 23:34:43.422582  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:34:43.422593  543705 net.go:698] Add success.
I0319 23:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:34:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:34:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:34:53.410359  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:34:53.410375  543705 memory.go:184] no items to output this cycle
I0319 23:34:53.410412  543705 cpu.go:275] no items to output this cycle
E0319 23:35:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:35:03.409813  543705 memory.go:184] no items to output this cycle
I0319 23:35:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 23:35:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:35:13.409812  543705 memory.go:191] Add success.
I0319 23:35:13.409821  543705 cpu.go:282] Add success.
W0319 23:35:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:35:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:35:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:35:13.420807  543705 net.go:648] Add success.
I0319 23:35:13.423751  543705 net.go:770] primary dev: ETH0
I0319 23:35:13.423766  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:35:13.423780  543705 net.go:698] Add success.
I0319 23:35:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:35:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:35:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0319 23:35:14.455148  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:35:14.456508  543705 disk_worker.go:494] system disk:vda1
I0319 23:35:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:35:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:35:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:35:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:35:21.530315  543705 disk_info.go:125] begin check local disk info of client
I0319 23:35:21.532786  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:35:21.532793  543705 disk_info.go:196] parse disk info done, disk is : [0xc000538b00 0xc000538b40]
E0319 23:35:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:35:23.409790  543705 memory.go:184] no items to output this cycle
I0319 23:35:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 23:35:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:35:33.409780  543705 memory.go:184] no items to output this cycle
I0319 23:35:33.409785  543705 cpu.go:275] no items to output this cycle
E0319 23:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:35:43.409803  543705 memory.go:191] Add success.
I0319 23:35:43.409804  543705 cpu.go:282] Add success.
I0319 23:35:43.419991  543705 net.go:648] Add success.
I0319 23:35:43.422743  543705 net.go:770] primary dev: ETH0
I0319 23:35:43.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:35:43.422769  543705 net.go:698] Add success.
I0319 23:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:35:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:35:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:35:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:35:53.409765  543705 memory.go:184] no items to output this cycle
I0319 23:35:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 23:36:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:36:03.409769  543705 memory.go:184] no items to output this cycle
I0319 23:36:03.409801  543705 cpu.go:275] no items to output this cycle
E0319 23:36:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:36:13.409796  543705 memory.go:191] Add success.
I0319 23:36:13.409796  543705 cpu.go:282] Add success.
W0319 23:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:36:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:36:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:36:13.420129  543705 net.go:648] Add success.
I0319 23:36:13.423052  543705 net.go:770] primary dev: ETH0
I0319 23:36:13.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:36:13.423078  543705 net.go:698] Add success.
I0319 23:36:13.467563  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"672b49c2-42b1-47f5-8eb7-cd96e4d8afb9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:36:13.467595  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:36:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:36:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:36:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0319 23:36:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:36:14.456625  543705 disk_worker.go:494] system disk:vda1
I0319 23:36:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:36:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:36:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:36:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:36:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:36:21.533309  543705 disk_info.go:125] begin check local disk info of client
I0319 23:36:21.535793  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:36:21.535800  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005823c0 0xc000582400]
E0319 23:36:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:36:23.409795  543705 memory.go:184] no items to output this cycle
I0319 23:36:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 23:36:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:36:33.409803  543705 memory.go:184] no items to output this cycle
I0319 23:36:33.409814  543705 cpu.go:275] no items to output this cycle
I0319 23:36:37.961734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:36:37.961741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:36:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:36:43.410682  543705 memory.go:191] Add success.
I0319 23:36:43.409830  543705 cpu.go:282] Add success.
I0319 23:36:43.420391  543705 net.go:648] Add success.
I0319 23:36:43.423001  543705 net.go:770] primary dev: ETH0
I0319 23:36:43.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:36:43.423042  543705 net.go:698] Add success.
I0319 23:36:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:36:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:36:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:36:53.409782  543705 memory.go:184] no items to output this cycle
I0319 23:36:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 23:37:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:37:03.409802  543705 memory.go:184] no items to output this cycle
I0319 23:37:03.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:37:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:37:13.409795  543705 memory.go:191] Add success.
I0319 23:37:13.409796  543705 cpu.go:282] Add success.
W0319 23:37:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:37:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:37:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:37:13.420053  543705 net.go:648] Add success.
I0319 23:37:13.422920  543705 net.go:770] primary dev: ETH0
I0319 23:37:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:37:13.422950  543705 net.go:698] Add success.
I0319 23:37:13.453494  543705 event_worker.go:152] Polling the log file for events...
W0319 23:37:14.455372  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:37:14.455483  543705 disk_worker.go:708] disk space is not compliant
W0319 23:37:14.455486  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:37:14.456514  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:37:14.456521  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:37:14.456525  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:37:14.457697  543705 disk_worker.go:494] system disk:vda1
I0319 23:37:14.457739  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:37:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:37:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:37:16.458062  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:37:16.458064  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:37:16.458133  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:37:16.458156  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:37:16.472527  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:37:21.536323  543705 disk_info.go:125] begin check local disk info of client
I0319 23:37:21.538766  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:37:21.538773  543705 disk_info.go:196] parse disk info done, disk is : [0xc000517340 0xc000517840]
E0319 23:37:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:37:23.409788  543705 memory.go:184] no items to output this cycle
I0319 23:37:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 23:37:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:37:33.409778  543705 memory.go:184] no items to output this cycle
I0319 23:37:33.409784  543705 cpu.go:275] no items to output this cycle
E0319 23:37:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:37:43.409796  543705 memory.go:191] Add success.
I0319 23:37:43.409807  543705 cpu.go:282] Add success.
I0319 23:37:43.419873  543705 net.go:648] Add success.
I0319 23:37:43.422332  543705 net.go:770] primary dev: ETH0
I0319 23:37:43.422345  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:37:43.422357  543705 net.go:698] Add success.
I0319 23:37:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:37:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:37:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:37:53.409778  543705 memory.go:184] no items to output this cycle
I0319 23:37:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 23:38:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:38:03.409782  543705 memory.go:184] no items to output this cycle
I0319 23:38:03.409783  543705 cpu.go:275] no items to output this cycle
E0319 23:38:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:38:13.409803  543705 memory.go:191] Add success.
I0319 23:38:13.409808  543705 cpu.go:282] Add success.
W0319 23:38:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:38:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:38:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:38:13.420057  543705 net.go:648] Add success.
I0319 23:38:13.423020  543705 net.go:770] primary dev: ETH0
I0319 23:38:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:38:13.423045  543705 net.go:698] Add success.
I0319 23:38:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:38:14.455367  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:38:14.455465  543705 disk_worker.go:708] disk space is not compliant
W0319 23:38:14.455518  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:38:14.457106  543705 disk_worker.go:494] system disk:vda1
I0319 23:38:14.457135  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:38:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:38:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:38:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:38:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:38:21.539449  543705 disk_info.go:125] begin check local disk info of client
I0319 23:38:21.541935  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:38:21.541943  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4000 0xc0002a4080]
E0319 23:38:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:38:23.409761  543705 memory.go:184] no items to output this cycle
I0319 23:38:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:38:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:38:33.409765  543705 memory.go:184] no items to output this cycle
I0319 23:38:33.409797  543705 cpu.go:275] no items to output this cycle
E0319 23:38:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:38:43.409792  543705 memory.go:191] Add success.
I0319 23:38:43.409811  543705 cpu.go:282] Add success.
I0319 23:38:43.420138  543705 net.go:648] Add success.
I0319 23:38:43.423064  543705 net.go:770] primary dev: ETH0
I0319 23:38:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:38:43.423090  543705 net.go:698] Add success.
I0319 23:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:38:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:38:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:38:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:38:53.409767  543705 memory.go:184] no items to output this cycle
I0319 23:38:53.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:39:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:39:03.409817  543705 memory.go:184] no items to output this cycle
I0319 23:39:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 23:39:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:39:13.409790  543705 memory.go:191] Add success.
I0319 23:39:13.409791  543705 cpu.go:282] Add success.
W0319 23:39:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:39:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:39:13.420203  543705 net.go:648] Add success.
I0319 23:39:13.422963  543705 net.go:770] primary dev: ETH0
I0319 23:39:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:39:13.422988  543705 net.go:698] Add success.
I0319 23:39:13.471180  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae511726-ce6f-4682-8be7-3894fdb00e48","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:39:13.471210  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:39:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:39:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:39:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0319 23:39:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:39:14.456629  543705 disk_worker.go:494] system disk:vda1
I0319 23:39:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:39:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:39:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:39:21.542351  543705 disk_info.go:125] begin check local disk info of client
I0319 23:39:21.544844  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:39:21.544851  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba2c0 0xc0002ba300]
E0319 23:39:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:39:23.409761  543705 memory.go:184] no items to output this cycle
I0319 23:39:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:39:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:39:33.409800  543705 memory.go:184] no items to output this cycle
I0319 23:39:33.409810  543705 cpu.go:275] no items to output this cycle
I0319 23:39:37.964508  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:39:37.964515  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:39:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:39:43.410611  543705 memory.go:191] Add success.
I0319 23:39:43.409830  543705 cpu.go:282] Add success.
I0319 23:39:43.420310  543705 net.go:648] Add success.
I0319 23:39:43.422855  543705 net.go:770] primary dev: ETH0
I0319 23:39:43.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:39:43.422881  543705 net.go:698] Add success.
I0319 23:39:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:39:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:39:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:39:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:39:53.409789  543705 memory.go:184] no items to output this cycle
I0319 23:39:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:40:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:40:03.409771  543705 memory.go:184] no items to output this cycle
I0319 23:40:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:40:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:40:13.409793  543705 memory.go:191] Add success.
I0319 23:40:13.409811  543705 cpu.go:282] Add success.
W0319 23:40:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:40:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:40:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:40:13.420141  543705 net.go:648] Add success.
I0319 23:40:13.422596  543705 net.go:770] primary dev: ETH0
I0319 23:40:13.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:40:13.422624  543705 net.go:698] Add success.
I0319 23:40:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:40:14.455327  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:40:14.455434  543705 disk_worker.go:708] disk space is not compliant
W0319 23:40:14.455443  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:40:14.457515  543705 disk_worker.go:494] system disk:vda1
I0319 23:40:14.457557  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:40:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:40:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:40:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:40:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:40:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:40:21.545473  543705 disk_info.go:125] begin check local disk info of client
I0319 23:40:21.547941  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:40:21.547948  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492280 0xc0004922c0]
E0319 23:40:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:40:23.409760  543705 memory.go:184] no items to output this cycle
I0319 23:40:23.409795  543705 cpu.go:275] no items to output this cycle
E0319 23:40:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:40:33.409801  543705 memory.go:184] no items to output this cycle
I0319 23:40:33.409809  543705 cpu.go:275] no items to output this cycle
E0319 23:40:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:40:43.409824  543705 memory.go:191] Add success.
I0319 23:40:43.409829  543705 cpu.go:282] Add success.
I0319 23:40:43.419976  543705 net.go:648] Add success.
I0319 23:40:43.422743  543705 net.go:770] primary dev: ETH0
I0319 23:40:43.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:40:43.422770  543705 net.go:698] Add success.
I0319 23:40:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:40:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:40:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:40:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:40:53.409777  543705 memory.go:184] no items to output this cycle
I0319 23:40:53.409779  543705 cpu.go:275] no items to output this cycle
E0319 23:41:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:41:03.409795  543705 memory.go:184] no items to output this cycle
I0319 23:41:03.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:41:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:41:13.409819  543705 memory.go:191] Add success.
I0319 23:41:13.409822  543705 cpu.go:282] Add success.
W0319 23:41:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:41:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:41:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:41:13.420282  543705 net.go:648] Add success.
I0319 23:41:13.423256  543705 net.go:770] primary dev: ETH0
I0319 23:41:13.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:41:13.423285  543705 net.go:698] Add success.
I0319 23:41:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:41:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:41:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0319 23:41:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:41:14.456518  543705 disk_worker.go:494] system disk:vda1
I0319 23:41:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:41:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:41:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:41:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:41:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:41:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:41:21.548380  543705 disk_info.go:125] begin check local disk info of client
I0319 23:41:21.550861  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:41:21.550868  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004963c0 0xc000496400]
E0319 23:41:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:41:23.409767  543705 memory.go:184] no items to output this cycle
I0319 23:41:23.409774  543705 cpu.go:275] no items to output this cycle
E0319 23:41:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:41:33.409800  543705 memory.go:184] no items to output this cycle
I0319 23:41:33.409814  543705 cpu.go:275] no items to output this cycle
E0319 23:41:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:41:43.409791  543705 memory.go:191] Add success.
I0319 23:41:43.409811  543705 cpu.go:282] Add success.
I0319 23:41:43.419954  543705 net.go:648] Add success.
I0319 23:41:43.422556  543705 net.go:770] primary dev: ETH0
I0319 23:41:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:41:43.422586  543705 net.go:698] Add success.
I0319 23:41:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:41:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:41:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:41:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:41:53.409772  543705 memory.go:184] no items to output this cycle
I0319 23:41:53.409780  543705 cpu.go:275] no items to output this cycle
E0319 23:42:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:42:03.409776  543705 memory.go:184] no items to output this cycle
I0319 23:42:03.409780  543705 cpu.go:275] no items to output this cycle
E0319 23:42:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:42:13.409826  543705 memory.go:191] Add success.
I0319 23:42:13.409837  543705 cpu.go:282] Add success.
W0319 23:42:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:42:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:42:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:42:13.420155  543705 net.go:648] Add success.
I0319 23:42:13.422877  543705 net.go:770] primary dev: ETH0
I0319 23:42:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:42:13.422902  543705 net.go:698] Add success.
I0319 23:42:13.468619  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bbe68286-3cbe-484e-a696-cd65f799411c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:42:13.468654  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 23:42:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:42:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0319 23:42:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:42:14.456890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:42:14.456899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:42:14.456905  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:42:14.456908  543705 disk_worker.go:494] system disk:vda1
I0319 23:42:14.456943  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:42:15.456877  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:42:15.456887  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:42:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:42:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:42:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:42:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:42:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:42:21.551501  543705 disk_info.go:125] begin check local disk info of client
I0319 23:42:21.554025  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:42:21.554031  543705 disk_info.go:196] parse disk info done, disk is : [0xc000496340 0xc000496380]
E0319 23:42:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:42:23.409774  543705 memory.go:184] no items to output this cycle
I0319 23:42:23.409782  543705 cpu.go:275] no items to output this cycle
E0319 23:42:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:42:33.409763  543705 memory.go:184] no items to output this cycle
I0319 23:42:33.409796  543705 cpu.go:275] no items to output this cycle
I0319 23:42:37.965737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:42:37.965743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:42:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:42:43.410700  543705 memory.go:191] Add success.
I0319 23:42:43.409808  543705 cpu.go:282] Add success.
I0319 23:42:43.420424  543705 net.go:648] Add success.
I0319 23:42:43.423194  543705 net.go:770] primary dev: ETH0
I0319 23:42:43.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:42:43.423219  543705 net.go:698] Add success.
I0319 23:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:42:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:42:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:42:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:42:53.409768  543705 memory.go:184] no items to output this cycle
I0319 23:42:53.409801  543705 cpu.go:275] no items to output this cycle
E0319 23:43:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:43:03.409796  543705 memory.go:184] no items to output this cycle
I0319 23:43:03.409821  543705 cpu.go:275] no items to output this cycle
E0319 23:43:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:43:13.409799  543705 memory.go:191] Add success.
I0319 23:43:13.409816  543705 cpu.go:282] Add success.
W0319 23:43:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:43:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:43:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:43:13.420076  543705 net.go:648] Add success.
I0319 23:43:13.422841  543705 net.go:770] primary dev: ETH0
I0319 23:43:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:43:13.422870  543705 net.go:698] Add success.
I0319 23:43:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:43:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:43:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0319 23:43:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:43:14.456513  543705 disk_worker.go:494] system disk:vda1
I0319 23:43:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:43:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:43:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:43:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:43:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:43:21.554479  543705 disk_info.go:125] begin check local disk info of client
I0319 23:43:21.556946  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:43:21.556953  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2340 0xc0003b2380]
E0319 23:43:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:43:23.409796  543705 memory.go:184] no items to output this cycle
I0319 23:43:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:43:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:43:33.409775  543705 memory.go:184] no items to output this cycle
I0319 23:43:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:43:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:43:43.409837  543705 memory.go:191] Add success.
I0319 23:43:43.409847  543705 cpu.go:282] Add success.
I0319 23:43:43.419969  543705 net.go:648] Add success.
I0319 23:43:43.422970  543705 net.go:770] primary dev: ETH0
I0319 23:43:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:43:43.422997  543705 net.go:698] Add success.
I0319 23:43:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:43:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:43:53.409811  543705 memory.go:184] no items to output this cycle
I0319 23:43:53.409824  543705 cpu.go:275] no items to output this cycle
E0319 23:44:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:44:03.409789  543705 memory.go:184] no items to output this cycle
I0319 23:44:03.409791  543705 cpu.go:275] no items to output this cycle
E0319 23:44:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:44:13.409799  543705 memory.go:191] Add success.
I0319 23:44:13.409819  543705 cpu.go:282] Add success.
W0319 23:44:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:44:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:44:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:44:13.420165  543705 net.go:648] Add success.
I0319 23:44:13.422664  543705 net.go:770] primary dev: ETH0
I0319 23:44:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:44:13.422689  543705 net.go:698] Add success.
I0319 23:44:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:44:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:44:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 23:44:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:44:14.456561  543705 disk_worker.go:494] system disk:vda1
I0319 23:44:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:44:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:44:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:44:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:44:21.557426  543705 disk_info.go:125] begin check local disk info of client
I0319 23:44:21.559898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:44:21.559906  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492200 0xc000492240]
E0319 23:44:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:44:23.409774  543705 memory.go:184] no items to output this cycle
I0319 23:44:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 23:44:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:44:33.409815  543705 memory.go:184] no items to output this cycle
I0319 23:44:33.409833  543705 cpu.go:275] no items to output this cycle
E0319 23:44:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:44:43.409791  543705 memory.go:191] Add success.
I0319 23:44:43.409811  543705 cpu.go:282] Add success.
I0319 23:44:43.420006  543705 net.go:648] Add success.
I0319 23:44:43.422882  543705 net.go:770] primary dev: ETH0
I0319 23:44:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:44:43.422910  543705 net.go:698] Add success.
I0319 23:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:44:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:44:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:44:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:44:53.409795  543705 memory.go:184] no items to output this cycle
I0319 23:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0319 23:45:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:45:03.409782  543705 memory.go:184] no items to output this cycle
I0319 23:45:03.409816  543705 cpu.go:275] no items to output this cycle
E0319 23:45:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:45:13.409843  543705 memory.go:191] Add success.
I0319 23:45:13.409855  543705 cpu.go:282] Add success.
W0319 23:45:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:45:13.409905  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:45:13.409909  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:45:13.420263  543705 net.go:648] Add success.
I0319 23:45:13.423048  543705 net.go:770] primary dev: ETH0
I0319 23:45:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:45:13.423077  543705 net.go:698] Add success.
I0319 23:45:13.476791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6800e00e-185b-4406-8b81-954b2ed2088e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:45:13.476822  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:45:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:45:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:45:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0319 23:45:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:45:14.456742  543705 disk_worker.go:494] system disk:vda1
I0319 23:45:14.456775  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:45:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:45:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:45:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:45:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:45:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:45:21.560543  543705 disk_info.go:125] begin check local disk info of client
I0319 23:45:21.563037  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:45:21.563043  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002707c0 0xc000270800]
E0319 23:45:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:45:23.409769  543705 memory.go:184] no items to output this cycle
I0319 23:45:23.409778  543705 cpu.go:275] no items to output this cycle
E0319 23:45:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:45:33.409777  543705 memory.go:184] no items to output this cycle
I0319 23:45:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 23:45:37.965882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:45:37.965888  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:45:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:45:43.410655  543705 memory.go:191] Add success.
I0319 23:45:43.409836  543705 cpu.go:282] Add success.
I0319 23:45:43.420437  543705 net.go:648] Add success.
I0319 23:45:43.423402  543705 net.go:770] primary dev: ETH0
I0319 23:45:43.423416  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:45:43.423429  543705 net.go:698] Add success.
I0319 23:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:45:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:45:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:45:53.409796  543705 memory.go:184] no items to output this cycle
I0319 23:45:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:46:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:46:03.409794  543705 memory.go:184] no items to output this cycle
I0319 23:46:03.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:46:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:46:13.409800  543705 memory.go:191] Add success.
W0319 23:46:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 23:46:13.409831  543705 cpu.go:282] Add success.
W0319 23:46:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:46:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:46:13.420634  543705 net.go:648] Add success.
I0319 23:46:13.423230  543705 net.go:770] primary dev: ETH0
I0319 23:46:13.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:46:13.423255  543705 net.go:698] Add success.
I0319 23:46:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:46:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:46:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0319 23:46:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:46:14.456582  543705 disk_worker.go:494] system disk:vda1
I0319 23:46:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:46:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:46:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:46:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:46:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:46:16.472355  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:46:21.563458  543705 disk_info.go:125] begin check local disk info of client
I0319 23:46:21.565901  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:46:21.565908  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5440 0xc0000c5480]
E0319 23:46:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:46:23.409790  543705 memory.go:184] no items to output this cycle
I0319 23:46:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 23:46:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:46:33.409807  543705 memory.go:184] no items to output this cycle
I0319 23:46:33.409818  543705 cpu.go:275] no items to output this cycle
E0319 23:46:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:46:43.409824  543705 memory.go:191] Add success.
I0319 23:46:43.409829  543705 cpu.go:282] Add success.
I0319 23:46:43.419998  543705 net.go:648] Add success.
I0319 23:46:43.422673  543705 net.go:770] primary dev: ETH0
I0319 23:46:43.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:46:43.422703  543705 net.go:698] Add success.
I0319 23:46:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:46:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:46:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:46:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:46:53.409781  543705 memory.go:184] no items to output this cycle
I0319 23:46:53.409786  543705 cpu.go:275] no items to output this cycle
E0319 23:47:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:47:03.409782  543705 memory.go:184] no items to output this cycle
I0319 23:47:03.409820  543705 cpu.go:275] no items to output this cycle
E0319 23:47:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:47:13.409793  543705 memory.go:191] Add success.
I0319 23:47:13.409800  543705 cpu.go:282] Add success.
W0319 23:47:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:47:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:47:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:47:13.420089  543705 net.go:648] Add success.
I0319 23:47:13.423028  543705 net.go:770] primary dev: ETH0
I0319 23:47:13.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:47:13.423055  543705 net.go:698] Add success.
I0319 23:47:13.453638  543705 event_worker.go:152] Polling the log file for events...
W0319 23:47:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:47:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0319 23:47:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:47:14.456812  543705 disk_worker.go:494] system disk:vda1
I0319 23:47:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:47:14.457126  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:47:14.457134  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:47:14.457138  543705 custom_config.go:64] query custom config with name: gpu
E0319 23:47:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:47:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:47:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:47:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:47:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:47:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:47:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:47:21.566464  543705 disk_info.go:125] begin check local disk info of client
I0319 23:47:21.568940  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:47:21.568947  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e800 0xc00037e840]
E0319 23:47:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:47:23.409756  543705 memory.go:184] no items to output this cycle
I0319 23:47:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:47:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:47:33.409781  543705 memory.go:184] no items to output this cycle
I0319 23:47:33.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:47:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:47:43.409821  543705 memory.go:191] Add success.
I0319 23:47:43.409838  543705 cpu.go:282] Add success.
I0319 23:47:43.420013  543705 net.go:648] Add success.
I0319 23:47:43.422763  543705 net.go:770] primary dev: ETH0
I0319 23:47:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:47:43.422794  543705 net.go:698] Add success.
I0319 23:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:47:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:47:53.409765  543705 memory.go:184] no items to output this cycle
I0319 23:47:53.409795  543705 cpu.go:275] no items to output this cycle
E0319 23:48:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:48:03.409804  543705 memory.go:184] no items to output this cycle
I0319 23:48:03.409814  543705 cpu.go:275] no items to output this cycle
E0319 23:48:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:48:13.409786  543705 memory.go:191] Add success.
W0319 23:48:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0319 23:48:13.409814  543705 cpu.go:282] Add success.
W0319 23:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:48:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:48:13.420052  543705 net.go:648] Add success.
I0319 23:48:13.423234  543705 net.go:770] primary dev: ETH0
I0319 23:48:13.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:48:13.423260  543705 net.go:698] Add success.
I0319 23:48:13.463214  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5abb5a52-6be4-403e-8d39-ceffa92cb844","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:48:13.463243  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:48:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:48:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:48:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0319 23:48:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:48:14.456671  543705 disk_worker.go:494] system disk:vda1
I0319 23:48:14.456703  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:48:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:48:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:48:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:48:21.569600  543705 disk_info.go:125] begin check local disk info of client
I0319 23:48:21.571898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:48:21.571905  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1980 0xc0002a19c0]
E0319 23:48:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:48:23.409782  543705 memory.go:184] no items to output this cycle
I0319 23:48:23.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:48:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:48:33.409787  543705 memory.go:184] no items to output this cycle
I0319 23:48:33.409809  543705 cpu.go:275] no items to output this cycle
I0319 23:48:37.966026  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:48:37.966032  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:48:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:48:43.410705  543705 memory.go:191] Add success.
I0319 23:48:43.409827  543705 cpu.go:282] Add success.
I0319 23:48:43.420449  543705 net.go:648] Add success.
I0319 23:48:43.423244  543705 net.go:770] primary dev: ETH0
I0319 23:48:43.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:48:43.423275  543705 net.go:698] Add success.
I0319 23:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:48:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:48:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:48:53.409766  543705 memory.go:184] no items to output this cycle
I0319 23:48:53.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:49:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:49:03.409816  543705 memory.go:184] no items to output this cycle
I0319 23:49:03.409838  543705 cpu.go:275] no items to output this cycle
E0319 23:49:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:49:13.409805  543705 memory.go:191] Add success.
I0319 23:49:13.409806  543705 cpu.go:282] Add success.
W0319 23:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:49:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:49:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:49:13.420274  543705 net.go:648] Add success.
I0319 23:49:13.422932  543705 net.go:770] primary dev: ETH0
I0319 23:49:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:49:13.422961  543705 net.go:698] Add success.
I0319 23:49:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:49:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:49:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0319 23:49:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:49:14.456583  543705 disk_worker.go:494] system disk:vda1
I0319 23:49:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:49:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:49:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:49:16.472431  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:49:21.572499  543705 disk_info.go:125] begin check local disk info of client
I0319 23:49:21.574931  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:49:21.574937  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343c40 0xc000343c80]
E0319 23:49:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:49:23.409783  543705 memory.go:184] no items to output this cycle
I0319 23:49:23.409797  543705 cpu.go:275] no items to output this cycle
I0319 23:49:33.409927  543705 cpu.go:275] no items to output this cycle
E0319 23:49:33.409927  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:49:33.409975  543705 memory.go:184] no items to output this cycle
E0319 23:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:49:43.409808  543705 memory.go:191] Add success.
I0319 23:49:43.409807  543705 cpu.go:282] Add success.
I0319 23:49:43.419995  543705 net.go:648] Add success.
I0319 23:49:43.422699  543705 net.go:770] primary dev: ETH0
I0319 23:49:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:49:43.422726  543705 net.go:698] Add success.
I0319 23:49:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:49:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:49:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:49:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:49:53.409794  543705 memory.go:184] no items to output this cycle
I0319 23:49:53.409804  543705 cpu.go:275] no items to output this cycle
E0319 23:50:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:50:03.409779  543705 memory.go:184] no items to output this cycle
I0319 23:50:03.409781  543705 cpu.go:275] no items to output this cycle
E0319 23:50:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:50:13.409808  543705 memory.go:191] Add success.
I0319 23:50:13.409815  543705 cpu.go:282] Add success.
W0319 23:50:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:50:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:50:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:50:13.420109  543705 net.go:648] Add success.
I0319 23:50:13.422921  543705 net.go:770] primary dev: ETH0
I0319 23:50:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:50:13.422957  543705 net.go:698] Add success.
I0319 23:50:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:50:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:50:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0319 23:50:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:50:14.456604  543705 disk_worker.go:494] system disk:vda1
I0319 23:50:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:50:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:50:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:50:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:50:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:50:21.575617  543705 disk_info.go:125] begin check local disk info of client
I0319 23:50:21.578105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:50:21.578111  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256900 0xc000256940]
E0319 23:50:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:50:23.409774  543705 memory.go:184] no items to output this cycle
I0319 23:50:23.409779  543705 cpu.go:275] no items to output this cycle
E0319 23:50:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:50:33.409796  543705 memory.go:184] no items to output this cycle
I0319 23:50:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:50:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:50:43.409830  543705 memory.go:191] Add success.
I0319 23:50:43.409837  543705 cpu.go:282] Add success.
I0319 23:50:43.419816  543705 net.go:770] primary dev: ETH0
I0319 23:50:43.419830  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:50:43.419845  543705 net.go:698] Add success.
I0319 23:50:43.420224  543705 net.go:648] Add success.
I0319 23:50:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:50:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:50:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:50:53.409794  543705 memory.go:184] no items to output this cycle
I0319 23:50:53.409805  543705 cpu.go:275] no items to output this cycle
E0319 23:51:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:51:03.409788  543705 memory.go:184] no items to output this cycle
I0319 23:51:03.409828  543705 cpu.go:275] no items to output this cycle
E0319 23:51:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:51:13.409805  543705 memory.go:191] Add success.
I0319 23:51:13.409809  543705 cpu.go:282] Add success.
W0319 23:51:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:51:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:51:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:51:13.420213  543705 net.go:648] Add success.
I0319 23:51:13.422789  543705 net.go:770] primary dev: ETH0
I0319 23:51:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:51:13.422818  543705 net.go:698] Add success.
I0319 23:51:13.469850  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"20f4d597-6ef6-4101-a2c4-8806639a6c96","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:51:13.469883  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:51:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:51:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0319 23:51:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:51:14.456763  543705 disk_worker.go:494] system disk:vda1
I0319 23:51:14.456795  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:51:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:51:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:51:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:51:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:51:16.472446  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:51:21.578532  543705 disk_info.go:125] begin check local disk info of client
I0319 23:51:21.581056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:51:21.581062  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256640 0xc000256680]
E0319 23:51:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:51:23.409768  543705 memory.go:184] no items to output this cycle
I0319 23:51:23.409803  543705 cpu.go:275] no items to output this cycle
E0319 23:51:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:51:33.409765  543705 memory.go:184] no items to output this cycle
I0319 23:51:33.409801  543705 cpu.go:275] no items to output this cycle
I0319 23:51:37.966835  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:51:37.966841  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:51:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:51:43.410623  543705 memory.go:191] Add success.
I0319 23:51:43.409817  543705 cpu.go:282] Add success.
I0319 23:51:43.420394  543705 net.go:648] Add success.
I0319 23:51:43.422978  543705 net.go:770] primary dev: ETH0
I0319 23:51:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:51:43.423003  543705 net.go:698] Add success.
I0319 23:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:51:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:51:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:51:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:51:53.409779  543705 cpu.go:275] no items to output this cycle
I0319 23:51:53.409782  543705 memory.go:184] no items to output this cycle
E0319 23:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:52:03.409781  543705 memory.go:184] no items to output this cycle
I0319 23:52:03.409784  543705 cpu.go:275] no items to output this cycle
W0319 23:52:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:52:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:52:13.409728  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0319 23:52:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:52:13.409818  543705 memory.go:191] Add success.
I0319 23:52:13.409827  543705 cpu.go:282] Add success.
I0319 23:52:13.420052  543705 net.go:648] Add success.
I0319 23:52:13.423261  543705 net.go:770] primary dev: ETH0
I0319 23:52:13.423274  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:52:13.423288  543705 net.go:698] Add success.
W0319 23:52:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:52:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0319 23:52:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:52:14.456854  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:52:14.456863  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:52:14.456870  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:52:14.456942  543705 disk_worker.go:494] system disk:vda1
I0319 23:52:14.456985  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:52:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:52:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:52:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:52:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:52:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:52:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:52:16.472325  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:52:21.581597  543705 disk_info.go:125] begin check local disk info of client
I0319 23:52:21.583976  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:52:21.583982  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8680 0xc0004a86c0]
E0319 23:52:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:52:23.409788  543705 memory.go:184] no items to output this cycle
I0319 23:52:23.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:52:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:52:33.409792  543705 memory.go:184] no items to output this cycle
I0319 23:52:33.409805  543705 cpu.go:275] no items to output this cycle
E0319 23:52:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:52:43.409800  543705 cpu.go:282] Add success.
I0319 23:52:43.409805  543705 memory.go:191] Add success.
I0319 23:52:43.420066  543705 net.go:648] Add success.
I0319 23:52:43.422823  543705 net.go:770] primary dev: ETH0
I0319 23:52:43.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:52:43.422848  543705 net.go:698] Add success.
I0319 23:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:52:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:52:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:52:53.409798  543705 memory.go:184] no items to output this cycle
I0319 23:52:53.409813  543705 cpu.go:275] no items to output this cycle
E0319 23:53:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:53:03.409775  543705 memory.go:184] no items to output this cycle
I0319 23:53:03.409855  543705 cpu.go:275] no items to output this cycle
E0319 23:53:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:53:13.409797  543705 memory.go:191] Add success.
I0319 23:53:13.409798  543705 cpu.go:282] Add success.
W0319 23:53:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:53:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:53:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:53:13.420149  543705 net.go:648] Add success.
I0319 23:53:13.422893  543705 net.go:770] primary dev: ETH0
I0319 23:53:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:53:13.422919  543705 net.go:698] Add success.
I0319 23:53:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:53:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:53:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0319 23:53:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:53:14.456543  543705 disk_worker.go:494] system disk:vda1
I0319 23:53:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:53:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:53:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:53:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:53:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:53:21.584064  543705 disk_info.go:125] begin check local disk info of client
I0319 23:53:21.586566  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:53:21.586573  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384b40 0xc000384b80]
E0319 23:53:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:53:23.409778  543705 memory.go:184] no items to output this cycle
I0319 23:53:23.409807  543705 cpu.go:275] no items to output this cycle
E0319 23:53:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:53:33.409772  543705 memory.go:184] no items to output this cycle
I0319 23:53:33.409796  543705 cpu.go:275] no items to output this cycle
E0319 23:53:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:53:43.409819  543705 memory.go:191] Add success.
I0319 23:53:43.409828  543705 cpu.go:282] Add success.
I0319 23:53:43.420140  543705 net.go:648] Add success.
I0319 23:53:43.423134  543705 net.go:770] primary dev: ETH0
I0319 23:53:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:53:43.423163  543705 net.go:698] Add success.
I0319 23:53:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:53:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:53:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:53:53.409778  543705 memory.go:184] no items to output this cycle
I0319 23:53:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 23:54:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:54:03.409784  543705 memory.go:184] no items to output this cycle
I0319 23:54:03.409793  543705 cpu.go:275] no items to output this cycle
E0319 23:54:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:54:13.409798  543705 memory.go:191] Add success.
I0319 23:54:13.409799  543705 cpu.go:282] Add success.
W0319 23:54:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:54:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:54:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:54:13.420121  543705 net.go:648] Add success.
I0319 23:54:13.422961  543705 net.go:770] primary dev: ETH0
I0319 23:54:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:54:13.422987  543705 net.go:698] Add success.
I0319 23:54:13.469413  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6a75e560-5edf-44a9-b042-f827bdbde01c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:54:13.469446  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0319 23:54:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:54:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:54:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0319 23:54:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:54:14.456633  543705 disk_worker.go:494] system disk:vda1
I0319 23:54:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:54:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:54:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:54:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:54:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:54:21.586626  543705 disk_info.go:125] begin check local disk info of client
I0319 23:54:21.589058  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:54:21.589064  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4c00 0xc0004b4c40]
E0319 23:54:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:54:23.409786  543705 memory.go:184] no items to output this cycle
I0319 23:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0319 23:54:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:54:33.409768  543705 memory.go:184] no items to output this cycle
I0319 23:54:33.409788  543705 cpu.go:275] no items to output this cycle
I0319 23:54:37.966984  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:54:37.966990  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:54:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:54:43.410540  543705 memory.go:191] Add success.
I0319 23:54:43.409803  543705 cpu.go:282] Add success.
I0319 23:54:43.420587  543705 net.go:648] Add success.
I0319 23:54:43.423188  543705 net.go:770] primary dev: ETH0
I0319 23:54:43.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:54:43.423213  543705 net.go:698] Add success.
I0319 23:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:54:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:54:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:54:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:54:53.409760  543705 memory.go:184] no items to output this cycle
I0319 23:54:53.409798  543705 cpu.go:275] no items to output this cycle
I0319 23:55:03.409809  543705 cpu.go:275] no items to output this cycle
E0319 23:55:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:55:03.409831  543705 memory.go:184] no items to output this cycle
E0319 23:55:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:55:13.409827  543705 memory.go:191] Add success.
I0319 23:55:13.409827  543705 cpu.go:282] Add success.
W0319 23:55:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:55:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:55:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:55:13.420229  543705 net.go:648] Add success.
I0319 23:55:13.422841  543705 net.go:770] primary dev: ETH0
I0319 23:55:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:55:13.422866  543705 net.go:698] Add success.
I0319 23:55:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:55:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:55:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0319 23:55:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:55:14.456501  543705 disk_worker.go:494] system disk:vda1
I0319 23:55:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:55:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:55:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:55:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:55:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:55:21.589589  543705 disk_info.go:125] begin check local disk info of client
I0319 23:55:21.592155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:55:21.592161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5000 0xc0004b5040]
E0319 23:55:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:55:23.409767  543705 memory.go:184] no items to output this cycle
I0319 23:55:23.409778  543705 cpu.go:275] no items to output this cycle
E0319 23:55:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:55:33.409795  543705 memory.go:184] no items to output this cycle
I0319 23:55:33.409810  543705 cpu.go:275] no items to output this cycle
E0319 23:55:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:55:43.409830  543705 memory.go:191] Add success.
I0319 23:55:43.409833  543705 cpu.go:282] Add success.
I0319 23:55:43.420144  543705 net.go:648] Add success.
I0319 23:55:43.422843  543705 net.go:770] primary dev: ETH0
I0319 23:55:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:55:43.422868  543705 net.go:698] Add success.
I0319 23:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:55:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:55:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:55:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:55:53.409782  543705 memory.go:184] no items to output this cycle
I0319 23:55:53.409783  543705 cpu.go:275] no items to output this cycle
E0319 23:56:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:56:03.409783  543705 memory.go:184] no items to output this cycle
I0319 23:56:03.409787  543705 cpu.go:275] no items to output this cycle
E0319 23:56:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:56:13.409793  543705 memory.go:191] Add success.
I0319 23:56:13.409796  543705 cpu.go:282] Add success.
W0319 23:56:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:56:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:56:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:56:13.420265  543705 net.go:648] Add success.
I0319 23:56:13.422877  543705 net.go:770] primary dev: ETH0
I0319 23:56:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:56:13.422910  543705 net.go:698] Add success.
I0319 23:56:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:56:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:56:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0319 23:56:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:56:14.456571  543705 disk_worker.go:494] system disk:vda1
I0319 23:56:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:56:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:56:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:56:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:56:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:56:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:56:21.592603  543705 disk_info.go:125] begin check local disk info of client
I0319 23:56:21.595036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:56:21.595042  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580d40 0xc000580d80]
E0319 23:56:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:56:23.409787  543705 memory.go:184] no items to output this cycle
I0319 23:56:23.409798  543705 cpu.go:275] no items to output this cycle
E0319 23:56:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:56:33.409767  543705 memory.go:184] no items to output this cycle
I0319 23:56:33.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:56:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:56:43.409780  543705 memory.go:191] Add success.
I0319 23:56:43.409817  543705 cpu.go:282] Add success.
I0319 23:56:43.419963  543705 net.go:648] Add success.
I0319 23:56:43.422430  543705 net.go:770] primary dev: ETH0
I0319 23:56:43.422446  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:56:43.422461  543705 net.go:698] Add success.
I0319 23:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:56:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:56:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:56:53.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:56:53.409858  543705 memory.go:184] no items to output this cycle
I0319 23:56:53.409961  543705 cpu.go:275] no items to output this cycle
E0319 23:57:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:57:03.409801  543705 memory.go:184] no items to output this cycle
I0319 23:57:03.409823  543705 cpu.go:275] no items to output this cycle
E0319 23:57:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:57:13.409818  543705 memory.go:191] Add success.
I0319 23:57:13.409826  543705 cpu.go:282] Add success.
W0319 23:57:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:57:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:57:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:57:13.420152  543705 net.go:648] Add success.
I0319 23:57:13.422819  543705 net.go:770] primary dev: ETH0
I0319 23:57:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:57:13.422847  543705 net.go:698] Add success.
I0319 23:57:13.429111  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0319 23:57:13.453284  543705 event_worker.go:152] Polling the log file for events...
I0319 23:57:13.485844  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b1019dc0-d3f0-413a-ae08-1c4fc92ed21a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0319 23:57:13.485876  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0319 23:57:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:57:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0319 23:57:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0319 23:57:14.455858  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0319 23:57:14.455867  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0319 23:57:14.455871  543705 custom_config.go:64] query custom config with name: gpu
I0319 23:57:14.456621  543705 disk_worker.go:494] system disk:vda1
I0319 23:57:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
E0319 23:57:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0319 23:57:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:57:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0319 23:57:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0319 23:57:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:57:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:57:16.472328  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:57:21.595676  543705 disk_info.go:125] begin check local disk info of client
I0319 23:57:21.598114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:57:21.598120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003844c0 0xc000384500]
E0319 23:57:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:57:23.409765  543705 memory.go:184] no items to output this cycle
I0319 23:57:23.409792  543705 cpu.go:275] no items to output this cycle
E0319 23:57:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:57:33.409764  543705 memory.go:184] no items to output this cycle
I0319 23:57:33.409804  543705 cpu.go:275] no items to output this cycle
I0319 23:57:37.969542  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0319 23:57:37.969548  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0319 23:57:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:57:43.410709  543705 memory.go:191] Add success.
I0319 23:57:43.409813  543705 cpu.go:282] Add success.
I0319 23:57:43.420498  543705 net.go:648] Add success.
I0319 23:57:43.423608  543705 net.go:770] primary dev: ETH0
I0319 23:57:43.423635  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:57:43.423647  543705 net.go:698] Add success.
I0319 23:57:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:57:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:57:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:57:53.409782  543705 memory.go:184] no items to output this cycle
I0319 23:57:53.409781  543705 cpu.go:275] no items to output this cycle
E0319 23:58:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:58:03.409789  543705 memory.go:184] no items to output this cycle
I0319 23:58:03.409802  543705 cpu.go:275] no items to output this cycle
E0319 23:58:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:58:13.409793  543705 memory.go:191] Add success.
I0319 23:58:13.409810  543705 cpu.go:282] Add success.
W0319 23:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:58:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:58:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:58:13.420089  543705 net.go:648] Add success.
I0319 23:58:13.422857  543705 net.go:770] primary dev: ETH0
I0319 23:58:13.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:58:13.422886  543705 net.go:698] Add success.
I0319 23:58:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:58:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:58:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0319 23:58:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:58:14.456576  543705 disk_worker.go:494] system disk:vda1
I0319 23:58:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:58:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:58:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:58:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:58:21.598689  543705 disk_info.go:125] begin check local disk info of client
I0319 23:58:21.601134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:58:21.601141  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f780 0xc00035f7c0]
E0319 23:58:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:58:23.409785  543705 cpu.go:275] no items to output this cycle
I0319 23:58:23.409794  543705 memory.go:184] no items to output this cycle
E0319 23:58:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:58:33.409772  543705 memory.go:184] no items to output this cycle
I0319 23:58:33.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:58:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:58:43.409803  543705 memory.go:191] Add success.
I0319 23:58:43.409811  543705 cpu.go:282] Add success.
I0319 23:58:43.420135  543705 net.go:648] Add success.
I0319 23:58:43.422845  543705 net.go:770] primary dev: ETH0
I0319 23:58:43.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:58:43.422874  543705 net.go:698] Add success.
I0319 23:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:58:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:58:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:58:53.409788  543705 memory.go:184] no items to output this cycle
I0319 23:58:53.409808  543705 cpu.go:275] no items to output this cycle
E0319 23:59:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:59:03.409816  543705 memory.go:184] no items to output this cycle
I0319 23:59:03.409826  543705 cpu.go:275] no items to output this cycle
E0319 23:59:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:59:13.409793  543705 memory.go:191] Add success.
I0319 23:59:13.409815  543705 cpu.go:282] Add success.
W0319 23:59:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0319 23:59:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0319 23:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0319 23:59:13.420054  543705 net.go:648] Add success.
I0319 23:59:13.422820  543705 net.go:770] primary dev: ETH0
I0319 23:59:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:59:13.422845  543705 net.go:698] Add success.
I0319 23:59:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0319 23:59:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0319 23:59:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0319 23:59:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0319 23:59:14.456563  543705 disk_worker.go:494] system disk:vda1
I0319 23:59:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0319 23:59:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0319 23:59:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:59:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0319 23:59:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0319 23:59:21.601673  543705 disk_info.go:125] begin check local disk info of client
I0319 23:59:21.604129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0319 23:59:21.604135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab440 0xc0001ab480]
E0319 23:59:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:59:23.409799  543705 memory.go:184] no items to output this cycle
I0319 23:59:23.409813  543705 cpu.go:275] no items to output this cycle
E0319 23:59:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:59:33.409769  543705 memory.go:184] no items to output this cycle
I0319 23:59:33.409800  543705 cpu.go:275] no items to output this cycle
E0319 23:59:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:59:43.409792  543705 memory.go:191] Add success.
I0319 23:59:43.409820  543705 cpu.go:282] Add success.
I0319 23:59:43.419730  543705 net.go:648] Add success.
I0319 23:59:43.422504  543705 net.go:770] primary dev: ETH0
I0319 23:59:43.422518  543705 net.go:802] Send network stats successfully!,count is 6
I0319 23:59:43.422530  543705 net.go:698] Add success.
I0319 23:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0319 23:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0319 23:59:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0319 23:59:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0319 23:59:53.409760  543705 memory.go:184] no items to output this cycle
I0319 23:59:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:00:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:00:03.409775  543705 memory.go:184] no items to output this cycle
I0320 00:00:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:00:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:00:13.409785  543705 memory.go:191] Add success.
I0320 00:00:13.409807  543705 cpu.go:282] Add success.
W0320 00:00:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:00:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:00:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:00:13.420212  543705 net.go:648] Add success.
I0320 00:00:13.422964  543705 net.go:770] primary dev: ETH0
I0320 00:00:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:00:13.422988  543705 net.go:698] Add success.
I0320 00:00:13.497405  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"92b7482e-b6d5-4e00-a534-7bd74da40b41","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:00:13.497440  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:00:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:00:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:00:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 00:00:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:00:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 00:00:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:00:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:00:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:00:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:00:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:00:21.604719  543705 disk_info.go:125] begin check local disk info of client
I0320 00:00:21.607195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:00:21.607201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4fc0 0xc0000c5000]
E0320 00:00:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:00:23.409770  543705 memory.go:184] no items to output this cycle
I0320 00:00:23.409773  543705 cpu.go:275] no items to output this cycle
E0320 00:00:33.409844  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:00:33.409870  543705 memory.go:184] no items to output this cycle
I0320 00:00:33.409954  543705 cpu.go:275] no items to output this cycle
I0320 00:00:37.969729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:00:37.969735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:00:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:00:43.410641  543705 memory.go:191] Add success.
I0320 00:00:43.409817  543705 cpu.go:282] Add success.
I0320 00:00:43.420349  543705 net.go:648] Add success.
I0320 00:00:43.423175  543705 net.go:770] primary dev: ETH0
I0320 00:00:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:00:43.423206  543705 net.go:698] Add success.
I0320 00:00:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:00:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:00:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:00:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:00:53.409791  543705 memory.go:184] no items to output this cycle
I0320 00:00:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 00:01:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:01:03.409776  543705 memory.go:184] no items to output this cycle
I0320 00:01:03.409795  543705 cpu.go:275] no items to output this cycle
W0320 00:01:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:01:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:01:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 00:01:13.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:01:13.409836  543705 cpu.go:282] Add success.
I0320 00:01:13.409851  543705 memory.go:191] Add success.
I0320 00:01:13.420178  543705 net.go:648] Add success.
I0320 00:01:13.422702  543705 net.go:770] primary dev: ETH0
I0320 00:01:13.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:01:13.422727  543705 net.go:698] Add success.
I0320 00:01:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:01:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:01:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 00:01:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:01:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 00:01:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:01:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:01:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:01:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:01:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:01:21.607284  543705 disk_info.go:125] begin check local disk info of client
I0320 00:01:21.609778  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:01:21.609785  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003297c0 0xc000329800]
I0320 00:01:23.409849  543705 cpu.go:275] no items to output this cycle
E0320 00:01:23.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:01:23.409871  543705 memory.go:184] no items to output this cycle
E0320 00:01:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:01:33.409795  543705 memory.go:184] no items to output this cycle
I0320 00:01:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 00:01:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:01:43.409788  543705 memory.go:191] Add success.
I0320 00:01:43.409823  543705 cpu.go:282] Add success.
I0320 00:01:43.419894  543705 net.go:648] Add success.
I0320 00:01:43.422712  543705 net.go:770] primary dev: ETH0
I0320 00:01:43.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:01:43.422752  543705 net.go:698] Add success.
I0320 00:01:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:01:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:01:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:01:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:01:53.409803  543705 memory.go:184] no items to output this cycle
I0320 00:01:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 00:02:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:02:03.409805  543705 memory.go:184] no items to output this cycle
I0320 00:02:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 00:02:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:02:13.409785  543705 memory.go:191] Add success.
I0320 00:02:13.409803  543705 cpu.go:282] Add success.
W0320 00:02:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:02:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:02:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:02:13.420122  543705 net.go:648] Add success.
I0320 00:02:13.423053  543705 net.go:770] primary dev: ETH0
I0320 00:02:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:02:13.423082  543705 net.go:698] Add success.
W0320 00:02:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:02:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 00:02:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:02:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:02:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:02:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:02:14.457015  543705 disk_worker.go:494] system disk:vda1
I0320 00:02:14.457058  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:02:15.456778  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:02:15.456786  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:02:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:02:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:02:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:02:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:02:16.472363  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:02:21.610728  543705 disk_info.go:125] begin check local disk info of client
I0320 00:02:21.613226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:02:21.613231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000293140 0xc000293180]
E0320 00:02:23.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:02:23.409876  543705 memory.go:184] no items to output this cycle
I0320 00:02:23.409946  543705 cpu.go:275] no items to output this cycle
E0320 00:02:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:02:33.409778  543705 memory.go:184] no items to output this cycle
I0320 00:02:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:02:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:02:43.409807  543705 cpu.go:282] Add success.
I0320 00:02:43.409810  543705 memory.go:191] Add success.
I0320 00:02:43.419995  543705 net.go:648] Add success.
I0320 00:02:43.422753  543705 net.go:770] primary dev: ETH0
I0320 00:02:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:02:43.422782  543705 net.go:698] Add success.
I0320 00:02:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:02:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:02:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:02:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:02:53.409783  543705 memory.go:184] no items to output this cycle
I0320 00:02:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:03:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:03:03.409768  543705 memory.go:184] no items to output this cycle
I0320 00:03:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:03:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:03:13.409791  543705 memory.go:191] Add success.
W0320 00:03:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 00:03:13.409816  543705 cpu.go:282] Add success.
W0320 00:03:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:03:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:03:13.420139  543705 net.go:648] Add success.
I0320 00:03:13.423096  543705 net.go:770] primary dev: ETH0
I0320 00:03:13.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:03:13.423121  543705 net.go:698] Add success.
I0320 00:03:13.927891  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"70438be1-72be-436f-96a1-520a826c44e0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:03:13.927926  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:03:14.454584  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:03:14.454742  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:03:14.454807  543705 disk_worker.go:708] disk space is not compliant
W0320 00:03:14.454810  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:03:14.456181  543705 disk_worker.go:494] system disk:vda1
I0320 00:03:14.456236  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:03:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:03:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:03:21.613671  543705 disk_info.go:125] begin check local disk info of client
I0320 00:03:21.616171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:03:21.616177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370100 0xc000370140]
E0320 00:03:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:03:23.409781  543705 memory.go:184] no items to output this cycle
I0320 00:03:23.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:03:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:03:33.409808  543705 memory.go:184] no items to output this cycle
I0320 00:03:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 00:03:37.972564  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:03:37.972570  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:03:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:03:43.410561  543705 memory.go:191] Add success.
I0320 00:03:43.409814  543705 cpu.go:282] Add success.
I0320 00:03:43.420262  543705 net.go:648] Add success.
I0320 00:03:43.422750  543705 net.go:770] primary dev: ETH0
I0320 00:03:43.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:03:43.422779  543705 net.go:698] Add success.
I0320 00:03:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:03:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:03:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:03:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:03:53.409777  543705 memory.go:184] no items to output this cycle
I0320 00:03:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 00:04:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:04:03.409774  543705 memory.go:184] no items to output this cycle
I0320 00:04:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:04:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:04:13.409815  543705 memory.go:191] Add success.
I0320 00:04:13.409828  543705 cpu.go:282] Add success.
W0320 00:04:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:04:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:04:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:04:13.420153  543705 net.go:648] Add success.
I0320 00:04:13.422860  543705 net.go:770] primary dev: ETH0
I0320 00:04:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:04:13.422891  543705 net.go:698] Add success.
I0320 00:04:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:04:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:04:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 00:04:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:04:14.456621  543705 disk_worker.go:494] system disk:vda1
I0320 00:04:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:04:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:04:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:04:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:04:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:04:21.616769  543705 disk_info.go:125] begin check local disk info of client
I0320 00:04:21.619199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:04:21.619205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6300 0xc0002b6340]
E0320 00:04:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:04:23.409784  543705 memory.go:184] no items to output this cycle
I0320 00:04:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:04:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:04:33.409896  543705 memory.go:184] no items to output this cycle
I0320 00:04:33.409897  543705 cpu.go:275] no items to output this cycle
E0320 00:04:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:04:43.409792  543705 memory.go:191] Add success.
I0320 00:04:43.409809  543705 cpu.go:282] Add success.
I0320 00:04:43.420061  543705 net.go:648] Add success.
I0320 00:04:43.422919  543705 net.go:770] primary dev: ETH0
I0320 00:04:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:04:43.422950  543705 net.go:698] Add success.
I0320 00:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:04:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:04:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:04:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:04:53.409779  543705 memory.go:184] no items to output this cycle
I0320 00:04:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 00:05:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:05:03.409798  543705 memory.go:184] no items to output this cycle
I0320 00:05:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 00:05:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:05:13.409798  543705 memory.go:191] Add success.
I0320 00:05:13.409799  543705 cpu.go:282] Add success.
W0320 00:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:05:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:05:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:05:13.420144  543705 net.go:648] Add success.
I0320 00:05:13.423049  543705 net.go:770] primary dev: ETH0
I0320 00:05:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:05:13.423072  543705 net.go:698] Add success.
I0320 00:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:05:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:05:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 00:05:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:05:14.456507  543705 disk_worker.go:494] system disk:vda1
I0320 00:05:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:05:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:05:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:05:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:05:21.619782  543705 disk_info.go:125] begin check local disk info of client
I0320 00:05:21.622215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:05:21.622222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e80 0xc0000c5ec0]
E0320 00:05:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:05:23.409787  543705 memory.go:184] no items to output this cycle
I0320 00:05:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:05:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:05:33.409792  543705 memory.go:184] no items to output this cycle
I0320 00:05:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 00:05:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:05:43.409797  543705 cpu.go:282] Add success.
I0320 00:05:43.409808  543705 memory.go:191] Add success.
I0320 00:05:43.419724  543705 net.go:648] Add success.
I0320 00:05:43.422556  543705 net.go:770] primary dev: ETH0
I0320 00:05:43.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:05:43.422585  543705 net.go:698] Add success.
I0320 00:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:05:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:05:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:05:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:05:53.409805  543705 memory.go:184] no items to output this cycle
I0320 00:05:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 00:06:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:06:03.409774  543705 memory.go:184] no items to output this cycle
I0320 00:06:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 00:06:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:06:13.409812  543705 memory.go:191] Add success.
I0320 00:06:13.409821  543705 cpu.go:282] Add success.
W0320 00:06:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:06:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:06:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:06:13.420124  543705 net.go:648] Add success.
I0320 00:06:13.423135  543705 net.go:770] primary dev: ETH0
I0320 00:06:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:06:13.423161  543705 net.go:698] Add success.
I0320 00:06:13.476819  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8f424be9-cb20-4a37-aa2e-a855656d4ab7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:06:13.476853  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:06:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:06:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:06:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 00:06:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:06:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 00:06:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:06:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:06:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:06:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:06:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:06:21.622753  543705 disk_info.go:125] begin check local disk info of client
I0320 00:06:21.625158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:06:21.625164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366140 0xc000366180]
E0320 00:06:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:06:23.409792  543705 memory.go:184] no items to output this cycle
I0320 00:06:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:06:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:06:33.409766  543705 memory.go:184] no items to output this cycle
I0320 00:06:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 00:06:37.973738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:06:37.973745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:06:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:06:43.410754  543705 memory.go:191] Add success.
I0320 00:06:43.409797  543705 cpu.go:282] Add success.
I0320 00:06:43.419712  543705 net.go:648] Add success.
I0320 00:06:43.422582  543705 net.go:770] primary dev: ETH0
I0320 00:06:43.422595  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:06:43.422607  543705 net.go:698] Add success.
I0320 00:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:06:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:06:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:06:53.410221  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:06:53.410241  543705 memory.go:184] no items to output this cycle
I0320 00:06:53.410250  543705 cpu.go:275] no items to output this cycle
E0320 00:07:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:07:03.409769  543705 memory.go:184] no items to output this cycle
I0320 00:07:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 00:07:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:07:13.409780  543705 memory.go:191] Add success.
W0320 00:07:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 00:07:13.409810  543705 cpu.go:282] Add success.
W0320 00:07:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:07:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:07:13.420043  543705 net.go:648] Add success.
I0320 00:07:13.422707  543705 net.go:770] primary dev: ETH0
I0320 00:07:13.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:07:13.422731  543705 net.go:698] Add success.
I0320 00:07:13.453302  543705 event_worker.go:152] Polling the log file for events...
W0320 00:07:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:07:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 00:07:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:07:14.456950  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:07:14.456960  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:07:14.456966  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:07:14.457011  543705 disk_worker.go:494] system disk:vda1
I0320 00:07:14.457037  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:07:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:07:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:07:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:07:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:07:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:07:16.457982  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:07:16.472321  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:07:21.625671  543705 disk_info.go:125] begin check local disk info of client
I0320 00:07:21.628124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:07:21.628130  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349000 0xc000349040]
E0320 00:07:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:07:23.409789  543705 memory.go:184] no items to output this cycle
I0320 00:07:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:07:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:07:33.409804  543705 memory.go:184] no items to output this cycle
I0320 00:07:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 00:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:07:43.409786  543705 memory.go:191] Add success.
I0320 00:07:43.409814  543705 cpu.go:282] Add success.
I0320 00:07:43.419995  543705 net.go:648] Add success.
I0320 00:07:43.423049  543705 net.go:770] primary dev: ETH0
I0320 00:07:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:07:43.423076  543705 net.go:698] Add success.
I0320 00:07:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:07:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:07:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:07:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:07:53.409767  543705 memory.go:184] no items to output this cycle
I0320 00:07:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:08:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:08:03.409779  543705 memory.go:184] no items to output this cycle
I0320 00:08:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:08:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:08:13.409775  543705 memory.go:191] Add success.
W0320 00:08:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:08:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:08:13.409811  543705 cpu.go:282] Add success.
I0320 00:08:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:08:13.420233  543705 net.go:648] Add success.
I0320 00:08:13.423119  543705 net.go:770] primary dev: ETH0
I0320 00:08:13.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:08:13.423149  543705 net.go:698] Add success.
I0320 00:08:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:08:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:08:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 00:08:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:08:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 00:08:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:08:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:08:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:08:21.628839  543705 disk_info.go:125] begin check local disk info of client
I0320 00:08:21.631319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:08:21.631325  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348800 0xc000348840]
E0320 00:08:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:08:23.409780  543705 memory.go:184] no items to output this cycle
I0320 00:08:23.409781  543705 cpu.go:275] no items to output this cycle
E0320 00:08:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:08:33.409776  543705 memory.go:184] no items to output this cycle
I0320 00:08:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 00:08:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:08:43.409784  543705 memory.go:191] Add success.
I0320 00:08:43.409817  543705 cpu.go:282] Add success.
I0320 00:08:43.419881  543705 net.go:648] Add success.
I0320 00:08:43.422471  543705 net.go:770] primary dev: ETH0
I0320 00:08:43.422486  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:08:43.422502  543705 net.go:698] Add success.
I0320 00:08:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:08:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:08:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:08:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:08:53.409769  543705 memory.go:184] no items to output this cycle
I0320 00:08:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:09:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:09:03.409783  543705 memory.go:184] no items to output this cycle
I0320 00:09:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 00:09:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:09:13.409802  543705 memory.go:191] Add success.
I0320 00:09:13.409805  543705 cpu.go:282] Add success.
W0320 00:09:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:09:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:09:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:09:13.420169  543705 net.go:648] Add success.
I0320 00:09:13.423080  543705 net.go:770] primary dev: ETH0
I0320 00:09:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:09:13.423108  543705 net.go:698] Add success.
I0320 00:09:13.464884  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8517e4ff-335a-4555-b8a9-8127ca5f6e66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:09:13.464927  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:09:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:09:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:09:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 00:09:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:09:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 00:09:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:09:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:09:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:09:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:09:21.631790  543705 disk_info.go:125] begin check local disk info of client
I0320 00:09:21.634215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:09:21.634220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003491c0 0xc000349200]
E0320 00:09:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:09:23.409787  543705 memory.go:184] no items to output this cycle
I0320 00:09:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:09:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:09:33.409779  543705 memory.go:184] no items to output this cycle
I0320 00:09:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 00:09:37.976569  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:09:37.976576  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:09:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:09:43.410656  543705 memory.go:191] Add success.
I0320 00:09:43.409818  543705 cpu.go:282] Add success.
I0320 00:09:43.420337  543705 net.go:648] Add success.
I0320 00:09:43.422974  543705 net.go:770] primary dev: ETH0
I0320 00:09:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:09:43.423007  543705 net.go:698] Add success.
I0320 00:09:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:09:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:09:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:09:53.409795  543705 memory.go:184] no items to output this cycle
I0320 00:09:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 00:10:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:10:03.409768  543705 memory.go:184] no items to output this cycle
I0320 00:10:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:10:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:10:13.409792  543705 memory.go:191] Add success.
W0320 00:10:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 00:10:13.409820  543705 cpu.go:282] Add success.
W0320 00:10:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:10:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:10:13.420247  543705 net.go:648] Add success.
I0320 00:10:13.422991  543705 net.go:770] primary dev: ETH0
I0320 00:10:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:10:13.423020  543705 net.go:698] Add success.
I0320 00:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:10:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:10:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 00:10:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:10:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 00:10:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:10:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:10:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:10:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:10:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:10:21.634305  543705 disk_info.go:125] begin check local disk info of client
I0320 00:10:21.636760  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:10:21.636766  543705 disk_info.go:196] parse disk info done, disk is : [0xc000559480 0xc0005594c0]
E0320 00:10:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:10:23.409761  543705 memory.go:184] no items to output this cycle
I0320 00:10:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:10:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:10:33.409790  543705 memory.go:184] no items to output this cycle
I0320 00:10:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 00:10:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:10:43.409812  543705 memory.go:191] Add success.
I0320 00:10:43.409815  543705 cpu.go:282] Add success.
I0320 00:10:43.419972  543705 net.go:648] Add success.
I0320 00:10:43.422993  543705 net.go:770] primary dev: ETH0
I0320 00:10:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:10:43.423022  543705 net.go:698] Add success.
I0320 00:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:10:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:10:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:10:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:10:53.409782  543705 memory.go:184] no items to output this cycle
I0320 00:10:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:11:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:11:03.409806  543705 memory.go:184] no items to output this cycle
I0320 00:11:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 00:11:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:11:13.409805  543705 memory.go:191] Add success.
W0320 00:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 00:11:13.409832  543705 cpu.go:282] Add success.
W0320 00:11:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:11:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:11:13.420143  543705 net.go:648] Add success.
I0320 00:11:13.423094  543705 net.go:770] primary dev: ETH0
I0320 00:11:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:11:13.423122  543705 net.go:698] Add success.
I0320 00:11:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:11:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:11:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 00:11:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:11:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 00:11:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:11:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:11:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:11:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:11:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:11:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:11:21.636871  543705 disk_info.go:125] begin check local disk info of client
I0320 00:11:21.639350  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:11:21.639356  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa480 0xc0001aa4c0]
E0320 00:11:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:11:23.409795  543705 memory.go:184] no items to output this cycle
I0320 00:11:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 00:11:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:11:33.409789  543705 memory.go:184] no items to output this cycle
I0320 00:11:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 00:11:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:11:43.409829  543705 memory.go:191] Add success.
I0320 00:11:43.409835  543705 cpu.go:282] Add success.
I0320 00:11:43.419998  543705 net.go:648] Add success.
I0320 00:11:43.422648  543705 net.go:770] primary dev: ETH0
I0320 00:11:43.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:11:43.422678  543705 net.go:698] Add success.
I0320 00:11:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:11:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:11:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:11:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:11:53.409809  543705 memory.go:184] no items to output this cycle
I0320 00:11:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 00:12:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:12:03.409763  543705 memory.go:184] no items to output this cycle
I0320 00:12:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 00:12:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:12:13.409822  543705 memory.go:191] Add success.
I0320 00:12:13.409828  543705 cpu.go:282] Add success.
W0320 00:12:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:12:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:12:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:12:13.420148  543705 net.go:648] Add success.
I0320 00:12:13.423087  543705 net.go:770] primary dev: ETH0
I0320 00:12:13.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:12:13.423112  543705 net.go:698] Add success.
I0320 00:12:13.470178  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7e0d99ad-f583-4a00-9216-009e1d9623be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:12:13.470216  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 00:12:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:12:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 00:12:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:12:14.455933  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:12:14.455942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:12:14.455947  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:12:14.456572  543705 disk_worker.go:494] system disk:vda1
I0320 00:12:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:12:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:12:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:12:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:12:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:12:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:12:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:12:16.472342  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:12:21.639839  543705 disk_info.go:125] begin check local disk info of client
I0320 00:12:21.642307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:12:21.642314  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e8c80 0xc0004e8cc0]
E0320 00:12:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:12:23.409754  543705 memory.go:184] no items to output this cycle
I0320 00:12:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:12:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:12:33.409782  543705 memory.go:184] no items to output this cycle
I0320 00:12:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 00:12:37.977756  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:12:37.977762  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:12:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:12:43.410729  543705 memory.go:191] Add success.
I0320 00:12:43.409827  543705 cpu.go:282] Add success.
I0320 00:12:43.420519  543705 net.go:648] Add success.
I0320 00:12:43.423308  543705 net.go:770] primary dev: ETH0
I0320 00:12:43.423321  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:12:43.423336  543705 net.go:698] Add success.
I0320 00:12:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:12:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:12:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:12:53.409799  543705 memory.go:184] no items to output this cycle
I0320 00:12:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:13:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:13:03.409891  543705 memory.go:184] no items to output this cycle
I0320 00:13:03.409977  543705 cpu.go:275] no items to output this cycle
W0320 00:13:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:13:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:13:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:13:13.409810  543705 cpu.go:282] Add success.
E0320 00:13:13.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:13:13.409860  543705 memory.go:191] Add success.
I0320 00:13:13.420059  543705 net.go:648] Add success.
I0320 00:13:13.422769  543705 net.go:770] primary dev: ETH0
I0320 00:13:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:13:13.422794  543705 net.go:698] Add success.
I0320 00:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:13:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:13:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 00:13:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:13:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 00:13:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:13:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:13:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:13:21.642911  543705 disk_info.go:125] begin check local disk info of client
I0320 00:13:21.645432  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:13:21.645439  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003844c0 0xc000384500]
E0320 00:13:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:13:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 00:13:23.409787  543705 memory.go:184] no items to output this cycle
E0320 00:13:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:13:33.409801  543705 memory.go:184] no items to output this cycle
I0320 00:13:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:13:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:13:43.409800  543705 cpu.go:282] Add success.
I0320 00:13:43.409810  543705 memory.go:191] Add success.
I0320 00:13:43.420142  543705 net.go:648] Add success.
I0320 00:13:43.422754  543705 net.go:770] primary dev: ETH0
I0320 00:13:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:13:43.422781  543705 net.go:698] Add success.
I0320 00:13:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:13:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:13:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:13:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:13:53.409799  543705 memory.go:184] no items to output this cycle
I0320 00:13:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 00:14:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:14:03.409782  543705 memory.go:184] no items to output this cycle
I0320 00:14:03.409787  543705 cpu.go:275] no items to output this cycle
W0320 00:14:13.409719  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:14:13.409743  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:14:13.409749  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:14:13.409840  543705 cpu.go:282] Add success.
E0320 00:14:13.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:14:13.409860  543705 memory.go:191] Add success.
I0320 00:14:13.420154  543705 net.go:648] Add success.
I0320 00:14:13.423004  543705 net.go:770] primary dev: ETH0
I0320 00:14:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:14:13.423043  543705 net.go:698] Add success.
I0320 00:14:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:14:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:14:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 00:14:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:14:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 00:14:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:14:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:14:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:14:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:14:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:14:16.472442  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:14:21.645671  543705 disk_info.go:125] begin check local disk info of client
I0320 00:14:21.648105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:14:21.648110  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483f00 0xc000483f40]
E0320 00:14:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:14:23.409783  543705 memory.go:184] no items to output this cycle
I0320 00:14:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 00:14:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:14:33.409768  543705 memory.go:184] no items to output this cycle
I0320 00:14:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:14:43.409794  543705 memory.go:191] Add success.
I0320 00:14:43.409812  543705 cpu.go:282] Add success.
I0320 00:14:43.420044  543705 net.go:648] Add success.
I0320 00:14:43.422867  543705 net.go:770] primary dev: ETH0
I0320 00:14:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:14:43.422897  543705 net.go:698] Add success.
I0320 00:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:14:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:14:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:14:53.409791  543705 memory.go:184] no items to output this cycle
I0320 00:14:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 00:15:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:15:03.409770  543705 memory.go:184] no items to output this cycle
I0320 00:15:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:15:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:15:13.409795  543705 memory.go:191] Add success.
I0320 00:15:13.409798  543705 cpu.go:282] Add success.
W0320 00:15:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:15:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:15:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:15:13.419709  543705 net.go:648] Add success.
I0320 00:15:13.422255  543705 net.go:770] primary dev: ETH0
I0320 00:15:13.422267  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:15:13.422278  543705 net.go:698] Add success.
I0320 00:15:14.301342  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0a4d3c0e-190e-4104-b10c-fc913246b372","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:15:14.301375  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:15:14.453974  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:15:14.454168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:15:14.454245  543705 disk_worker.go:708] disk space is not compliant
W0320 00:15:14.454248  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:15:14.455815  543705 disk_worker.go:494] system disk:vda1
I0320 00:15:14.455844  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:15:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:15:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:15:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:15:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:15:21.648882  543705 disk_info.go:125] begin check local disk info of client
I0320 00:15:21.651359  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:15:21.651365  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c140 0xc00034c180]
E0320 00:15:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:15:23.409764  543705 memory.go:184] no items to output this cycle
I0320 00:15:23.409793  543705 cpu.go:275] no items to output this cycle
E0320 00:15:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:15:33.409776  543705 memory.go:184] no items to output this cycle
I0320 00:15:33.409781  543705 cpu.go:275] no items to output this cycle
I0320 00:15:37.980582  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:15:37.980588  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:15:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:15:43.410700  543705 memory.go:191] Add success.
I0320 00:15:43.409810  543705 cpu.go:282] Add success.
I0320 00:15:43.420436  543705 net.go:648] Add success.
I0320 00:15:43.422950  543705 net.go:770] primary dev: ETH0
I0320 00:15:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:15:43.422977  543705 net.go:698] Add success.
I0320 00:15:46.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:15:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:15:46.458116  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:15:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:15:53.409795  543705 memory.go:184] no items to output this cycle
I0320 00:15:53.409858  543705 cpu.go:275] no items to output this cycle
E0320 00:16:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:16:03.409790  543705 memory.go:184] no items to output this cycle
I0320 00:16:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 00:16:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:16:13.409855  543705 memory.go:191] Add success.
W0320 00:16:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:16:13.409901  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:16:13.409903  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:16:13.409958  543705 cpu.go:282] Add success.
I0320 00:16:13.419717  543705 net.go:648] Add success.
I0320 00:16:13.422414  543705 net.go:770] primary dev: ETH0
I0320 00:16:13.422427  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:16:13.422440  543705 net.go:698] Add success.
I0320 00:16:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:16:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:16:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 00:16:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:16:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 00:16:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:16:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:16:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:16:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:16:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:16:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:16:21.651448  543705 disk_info.go:125] begin check local disk info of client
I0320 00:16:21.653922  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:16:21.653929  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad800 0xc0003ad840]
E0320 00:16:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:16:23.409767  543705 cpu.go:275] no items to output this cycle
I0320 00:16:23.409780  543705 memory.go:184] no items to output this cycle
E0320 00:16:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:16:33.409796  543705 memory.go:184] no items to output this cycle
I0320 00:16:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 00:16:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:16:43.409802  543705 memory.go:191] Add success.
I0320 00:16:43.409804  543705 cpu.go:282] Add success.
I0320 00:16:43.420035  543705 net.go:648] Add success.
I0320 00:16:43.422735  543705 net.go:770] primary dev: ETH0
I0320 00:16:43.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:16:43.422769  543705 net.go:698] Add success.
I0320 00:16:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:16:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:16:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:16:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:16:53.409776  543705 memory.go:184] no items to output this cycle
I0320 00:16:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 00:17:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:17:03.409768  543705 memory.go:184] no items to output this cycle
I0320 00:17:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:17:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:17:13.409832  543705 memory.go:191] Add success.
I0320 00:17:13.409837  543705 cpu.go:282] Add success.
W0320 00:17:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:17:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:17:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:17:13.420163  543705 net.go:648] Add success.
I0320 00:17:13.422890  543705 net.go:770] primary dev: ETH0
I0320 00:17:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:17:13.422915  543705 net.go:698] Add success.
I0320 00:17:13.453454  543705 event_worker.go:152] Polling the log file for events...
W0320 00:17:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:17:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 00:17:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:17:14.456912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:17:14.456921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:17:14.456927  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:17:14.457002  543705 disk_worker.go:494] system disk:vda1
I0320 00:17:14.457044  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:17:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:17:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:17:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:17:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:17:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:17:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:17:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:17:21.654735  543705 disk_info.go:125] begin check local disk info of client
I0320 00:17:21.657097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:17:21.657104  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c3c0 0xc00034c400]
E0320 00:17:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:17:23.409765  543705 memory.go:184] no items to output this cycle
I0320 00:17:23.409783  543705 cpu.go:275] no items to output this cycle
E0320 00:17:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:17:33.409795  543705 memory.go:184] no items to output this cycle
I0320 00:17:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:17:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:17:43.409801  543705 memory.go:191] Add success.
I0320 00:17:43.409802  543705 cpu.go:282] Add success.
I0320 00:17:43.419855  543705 net.go:648] Add success.
I0320 00:17:43.422615  543705 net.go:770] primary dev: ETH0
I0320 00:17:43.422628  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:17:43.422641  543705 net.go:698] Add success.
I0320 00:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:17:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:17:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:17:53.409780  543705 memory.go:184] no items to output this cycle
I0320 00:17:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:18:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:18:03.409775  543705 memory.go:184] no items to output this cycle
I0320 00:18:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:18:13.409816  543705 memory.go:191] Add success.
I0320 00:18:13.409820  543705 cpu.go:282] Add success.
W0320 00:18:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:18:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:18:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:18:13.419709  543705 net.go:648] Add success.
I0320 00:18:13.422613  543705 net.go:770] primary dev: ETH0
I0320 00:18:13.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:18:13.422643  543705 net.go:698] Add success.
I0320 00:18:13.908202  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ae1b429-cc3e-42bb-9f8c-60912de3e8f3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:18:13.908241  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:18:14.454680  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:18:14.454804  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:18:14.454864  543705 disk_worker.go:708] disk space is not compliant
W0320 00:18:14.454867  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:18:14.456217  543705 disk_worker.go:494] system disk:vda1
I0320 00:18:14.456261  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:18:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:18:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:18:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:18:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:18:21.657672  543705 disk_info.go:125] begin check local disk info of client
I0320 00:18:21.660112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:18:21.660118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4280 0xc0000c42c0]
E0320 00:18:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:18:23.409767  543705 memory.go:184] no items to output this cycle
I0320 00:18:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 00:18:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:18:33.409781  543705 memory.go:184] no items to output this cycle
I0320 00:18:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 00:18:37.981743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:18:37.981750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:18:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:18:43.410712  543705 memory.go:191] Add success.
I0320 00:18:43.409815  543705 cpu.go:282] Add success.
I0320 00:18:43.420443  543705 net.go:648] Add success.
I0320 00:18:43.423808  543705 net.go:770] primary dev: ETH0
I0320 00:18:43.423820  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:18:43.423832  543705 net.go:698] Add success.
I0320 00:18:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:18:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:18:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:18:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:18:53.409808  543705 memory.go:184] no items to output this cycle
I0320 00:18:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 00:19:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:19:03.409788  543705 memory.go:184] no items to output this cycle
I0320 00:19:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 00:19:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:19:13.409802  543705 memory.go:191] Add success.
I0320 00:19:13.409823  543705 cpu.go:282] Add success.
W0320 00:19:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:19:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:19:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:19:13.419747  543705 net.go:648] Add success.
I0320 00:19:13.422718  543705 net.go:770] primary dev: ETH0
I0320 00:19:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:19:13.422742  543705 net.go:698] Add success.
I0320 00:19:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:19:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:19:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 00:19:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:19:14.456743  543705 disk_worker.go:494] system disk:vda1
I0320 00:19:14.456770  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:19:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:19:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:19:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:19:21.660935  543705 disk_info.go:125] begin check local disk info of client
I0320 00:19:21.663300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:19:21.663306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cf7c0 0xc0003cf800]
E0320 00:19:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:19:23.409786  543705 memory.go:184] no items to output this cycle
I0320 00:19:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 00:19:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:19:33.409799  543705 memory.go:184] no items to output this cycle
I0320 00:19:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 00:19:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:19:43.409797  543705 memory.go:191] Add success.
I0320 00:19:43.409800  543705 cpu.go:282] Add success.
I0320 00:19:43.420067  543705 net.go:648] Add success.
I0320 00:19:43.422910  543705 net.go:770] primary dev: ETH0
I0320 00:19:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:19:43.422935  543705 net.go:698] Add success.
I0320 00:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:19:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:19:53.410352  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:19:53.410367  543705 memory.go:184] no items to output this cycle
I0320 00:19:53.410394  543705 cpu.go:275] no items to output this cycle
E0320 00:20:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:20:03.409770  543705 memory.go:184] no items to output this cycle
I0320 00:20:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 00:20:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:20:13.409813  543705 memory.go:191] Add success.
I0320 00:20:13.409826  543705 cpu.go:282] Add success.
W0320 00:20:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:20:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:20:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:20:13.420414  543705 net.go:648] Add success.
I0320 00:20:13.423218  543705 net.go:770] primary dev: ETH0
I0320 00:20:13.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:20:13.423243  543705 net.go:698] Add success.
I0320 00:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:20:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:20:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 00:20:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:20:14.456565  543705 disk_worker.go:494] system disk:vda1
I0320 00:20:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:20:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:20:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:20:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:20:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:20:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:20:21.663389  543705 disk_info.go:125] begin check local disk info of client
I0320 00:20:21.665868  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:20:21.665874  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e8340 0xc0004e8380]
E0320 00:20:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:20:23.409758  543705 memory.go:184] no items to output this cycle
I0320 00:20:23.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:20:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:20:33.409773  543705 memory.go:184] no items to output this cycle
I0320 00:20:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 00:20:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:20:43.409791  543705 memory.go:191] Add success.
I0320 00:20:43.409814  543705 cpu.go:282] Add success.
I0320 00:20:43.419910  543705 net.go:648] Add success.
I0320 00:20:43.422522  543705 net.go:770] primary dev: ETH0
I0320 00:20:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:20:43.422551  543705 net.go:698] Add success.
I0320 00:20:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:20:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:20:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:20:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:20:53.409778  543705 memory.go:184] no items to output this cycle
I0320 00:20:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 00:21:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:21:03.409767  543705 memory.go:184] no items to output this cycle
I0320 00:21:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 00:21:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:21:13.409826  543705 memory.go:191] Add success.
I0320 00:21:13.409833  543705 cpu.go:282] Add success.
W0320 00:21:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:21:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:21:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:21:13.420170  543705 net.go:648] Add success.
I0320 00:21:13.422696  543705 net.go:770] primary dev: ETH0
I0320 00:21:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:21:13.422726  543705 net.go:698] Add success.
I0320 00:21:13.469622  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"70f58bbf-eb92-45ec-89ea-c54a1ae95176","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:21:13.469667  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:21:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:21:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:21:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 00:21:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:21:14.456681  543705 disk_worker.go:494] system disk:vda1
I0320 00:21:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:21:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:21:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:21:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:21:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:21:21.665958  543705 disk_info.go:125] begin check local disk info of client
I0320 00:21:21.668463  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:21:21.668470  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053e1c0 0xc00053e200]
E0320 00:21:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:21:23.409769  543705 cpu.go:275] no items to output this cycle
I0320 00:21:23.409776  543705 memory.go:184] no items to output this cycle
E0320 00:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:21:33.409793  543705 memory.go:184] no items to output this cycle
I0320 00:21:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 00:21:37.984610  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:21:37.984618  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:21:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:21:43.410665  543705 memory.go:191] Add success.
I0320 00:21:43.409830  543705 cpu.go:282] Add success.
I0320 00:21:43.420398  543705 net.go:648] Add success.
I0320 00:21:43.423180  543705 net.go:770] primary dev: ETH0
I0320 00:21:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:21:43.423209  543705 net.go:698] Add success.
I0320 00:21:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:21:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:21:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:21:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:21:53.409777  543705 cpu.go:275] no items to output this cycle
I0320 00:21:53.409779  543705 memory.go:184] no items to output this cycle
E0320 00:22:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:22:03.409797  543705 memory.go:184] no items to output this cycle
I0320 00:22:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 00:22:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:22:13.409815  543705 memory.go:191] Add success.
I0320 00:22:13.409823  543705 cpu.go:282] Add success.
W0320 00:22:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:22:13.420051  543705 net.go:648] Add success.
I0320 00:22:13.422661  543705 net.go:770] primary dev: ETH0
I0320 00:22:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:22:13.422687  543705 net.go:698] Add success.
W0320 00:22:14.455601  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:22:14.455615  543705 disk_worker.go:708] disk space is not compliant
W0320 00:22:14.455620  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:22:14.456240  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:22:14.456250  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:22:14.456257  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:22:14.457402  543705 disk_worker.go:494] system disk:vda1
I0320 00:22:14.457432  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:22:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:22:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 00:22:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:22:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:22:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:22:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:22:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:22:21.668992  543705 disk_info.go:125] begin check local disk info of client
I0320 00:22:21.671741  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:22:21.671747  543705 disk_info.go:196] parse disk info done, disk is : [0xc000530080 0xc0005300c0]
E0320 00:22:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:22:23.409791  543705 memory.go:184] no items to output this cycle
I0320 00:22:23.409810  543705 cpu.go:275] no items to output this cycle
E0320 00:22:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:22:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 00:22:33.409794  543705 memory.go:184] no items to output this cycle
E0320 00:22:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:22:43.409831  543705 memory.go:191] Add success.
I0320 00:22:43.409837  543705 cpu.go:282] Add success.
I0320 00:22:43.419992  543705 net.go:648] Add success.
I0320 00:22:43.422879  543705 net.go:770] primary dev: ETH0
I0320 00:22:43.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:22:43.422905  543705 net.go:698] Add success.
I0320 00:22:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:22:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:22:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:22:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:22:53.409768  543705 memory.go:184] no items to output this cycle
I0320 00:22:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 00:23:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:23:03.409777  543705 memory.go:184] no items to output this cycle
I0320 00:23:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 00:23:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:23:13.409798  543705 memory.go:191] Add success.
I0320 00:23:13.409798  543705 cpu.go:282] Add success.
W0320 00:23:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:23:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:23:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:23:13.420267  543705 net.go:648] Add success.
I0320 00:23:13.423023  543705 net.go:770] primary dev: ETH0
I0320 00:23:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:23:13.423049  543705 net.go:698] Add success.
I0320 00:23:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:23:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:23:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 00:23:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:23:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 00:23:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:23:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:23:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:23:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:23:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:23:16.472423  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:23:21.673001  543705 disk_info.go:125] begin check local disk info of client
I0320 00:23:21.675496  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:23:21.675502  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344480 0xc0003444c0]
E0320 00:23:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:23:23.409772  543705 memory.go:184] no items to output this cycle
I0320 00:23:23.409791  543705 cpu.go:275] no items to output this cycle
E0320 00:23:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:23:33.409778  543705 memory.go:184] no items to output this cycle
I0320 00:23:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:23:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:23:43.409791  543705 memory.go:191] Add success.
I0320 00:23:43.409811  543705 cpu.go:282] Add success.
I0320 00:23:43.419888  543705 net.go:648] Add success.
I0320 00:23:43.422771  543705 net.go:770] primary dev: ETH0
I0320 00:23:43.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:23:43.422798  543705 net.go:698] Add success.
I0320 00:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:23:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:23:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:23:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:23:53.409795  543705 memory.go:184] no items to output this cycle
I0320 00:23:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 00:24:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:24:03.409798  543705 memory.go:184] no items to output this cycle
I0320 00:24:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 00:24:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:24:13.409779  543705 memory.go:191] Add success.
I0320 00:24:13.409805  543705 cpu.go:282] Add success.
W0320 00:24:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:24:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:24:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:24:13.420090  543705 net.go:648] Add success.
I0320 00:24:13.423466  543705 net.go:770] primary dev: ETH0
I0320 00:24:13.423479  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:24:13.423490  543705 net.go:698] Add success.
I0320 00:24:13.530384  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"180b2ee0-f841-4b1f-a103-11a675fcfe29","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:24:13.530417  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:24:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:24:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:24:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 00:24:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:24:14.456621  543705 disk_worker.go:494] system disk:vda1
I0320 00:24:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:24:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:24:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:24:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:24:21.675578  543705 disk_info.go:125] begin check local disk info of client
I0320 00:24:21.677991  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:24:21.677998  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
E0320 00:24:23.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:24:23.409912  543705 memory.go:184] no items to output this cycle
I0320 00:24:23.410012  543705 cpu.go:275] no items to output this cycle
E0320 00:24:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:24:33.409775  543705 memory.go:184] no items to output this cycle
I0320 00:24:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 00:24:37.985744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:24:37.985750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:24:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:24:43.410702  543705 memory.go:191] Add success.
I0320 00:24:43.409821  543705 cpu.go:282] Add success.
I0320 00:24:43.420428  543705 net.go:648] Add success.
I0320 00:24:43.423250  543705 net.go:770] primary dev: ETH0
I0320 00:24:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:24:43.423281  543705 net.go:698] Add success.
I0320 00:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:24:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:24:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:24:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:24:53.409766  543705 memory.go:184] no items to output this cycle
I0320 00:24:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 00:25:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:25:03.409769  543705 memory.go:184] no items to output this cycle
I0320 00:25:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 00:25:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:25:13.409818  543705 memory.go:191] Add success.
I0320 00:25:13.409821  543705 cpu.go:282] Add success.
W0320 00:25:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:25:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:25:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:25:13.420708  543705 net.go:648] Add success.
I0320 00:25:13.423416  543705 net.go:770] primary dev: ETH0
I0320 00:25:13.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:25:13.423442  543705 net.go:698] Add success.
I0320 00:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:25:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:25:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 00:25:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:25:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 00:25:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:25:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:25:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:25:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:25:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:25:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:25:21.679033  543705 disk_info.go:125] begin check local disk info of client
I0320 00:25:21.681449  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:25:21.681456  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab080 0xc0001ab0c0]
I0320 00:25:23.409870  543705 cpu.go:275] no items to output this cycle
E0320 00:25:23.409941  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:25:23.409953  543705 memory.go:184] no items to output this cycle
E0320 00:25:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:25:33.409802  543705 memory.go:184] no items to output this cycle
I0320 00:25:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 00:25:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:25:43.409794  543705 memory.go:191] Add success.
I0320 00:25:43.409812  543705 cpu.go:282] Add success.
I0320 00:25:43.420069  543705 net.go:648] Add success.
I0320 00:25:43.422693  543705 net.go:770] primary dev: ETH0
I0320 00:25:43.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:25:43.422720  543705 net.go:698] Add success.
I0320 00:25:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:25:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:25:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:25:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:25:53.409766  543705 memory.go:184] no items to output this cycle
I0320 00:25:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 00:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:26:03.409769  543705 memory.go:184] no items to output this cycle
I0320 00:26:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 00:26:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:26:13.409791  543705 memory.go:191] Add success.
I0320 00:26:13.409794  543705 cpu.go:282] Add success.
W0320 00:26:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:26:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:26:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:26:13.420309  543705 net.go:648] Add success.
I0320 00:26:13.422983  543705 net.go:770] primary dev: ETH0
I0320 00:26:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:26:13.423009  543705 net.go:698] Add success.
I0320 00:26:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:26:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:26:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 00:26:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:26:14.456526  543705 disk_worker.go:494] system disk:vda1
I0320 00:26:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:26:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:26:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:26:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:26:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:26:21.681669  543705 disk_info.go:125] begin check local disk info of client
I0320 00:26:21.684087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:26:21.684094  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002742c0 0xc000274300]
E0320 00:26:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:26:23.409785  543705 memory.go:184] no items to output this cycle
I0320 00:26:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 00:26:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:26:33.409803  543705 memory.go:184] no items to output this cycle
I0320 00:26:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 00:26:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:26:43.409808  543705 memory.go:191] Add success.
I0320 00:26:43.409810  543705 cpu.go:282] Add success.
I0320 00:26:43.420014  543705 net.go:648] Add success.
I0320 00:26:43.422619  543705 net.go:770] primary dev: ETH0
I0320 00:26:43.422634  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:26:43.422649  543705 net.go:698] Add success.
I0320 00:26:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:26:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:26:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:26:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:26:53.409766  543705 memory.go:184] no items to output this cycle
I0320 00:26:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 00:27:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:27:03.409778  543705 memory.go:184] no items to output this cycle
I0320 00:27:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 00:27:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:27:13.409788  543705 memory.go:191] Add success.
I0320 00:27:13.409808  543705 cpu.go:282] Add success.
W0320 00:27:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:27:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:27:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:27:13.420178  543705 net.go:648] Add success.
I0320 00:27:13.429145  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 00:27:13.429224  543705 net.go:770] primary dev: ETH0
I0320 00:27:13.429236  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:27:13.429250  543705 net.go:698] Add success.
I0320 00:27:13.452777  543705 event_worker.go:152] Polling the log file for events...
I0320 00:27:13.624418  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f4263992-4966-4763-a7ca-118d6b1e7bbc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:27:13.624452  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 00:27:14.454158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:27:14.454219  543705 disk_worker.go:708] disk space is not compliant
W0320 00:27:14.454222  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:27:14.456040  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:27:14.456047  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:27:14.456051  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:27:14.456060  543705 disk_worker.go:494] system disk:vda1
I0320 00:27:14.456115  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:27:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:27:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:27:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:27:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:27:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:27:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:27:16.472333  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:27:21.685062  543705 disk_info.go:125] begin check local disk info of client
I0320 00:27:21.687525  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:27:21.687532  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278540 0xc000278580]
E0320 00:27:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:27:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 00:27:23.409777  543705 memory.go:184] no items to output this cycle
E0320 00:27:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:27:33.409801  543705 memory.go:184] no items to output this cycle
I0320 00:27:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 00:27:37.988641  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:27:37.988648  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:27:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:27:43.410716  543705 memory.go:191] Add success.
I0320 00:27:43.409830  543705 cpu.go:282] Add success.
I0320 00:27:43.420410  543705 net.go:648] Add success.
I0320 00:27:43.423438  543705 net.go:770] primary dev: ETH0
I0320 00:27:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:27:43.423468  543705 net.go:698] Add success.
I0320 00:27:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:27:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:27:53.409780  543705 memory.go:184] no items to output this cycle
I0320 00:27:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 00:28:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:28:03.409776  543705 memory.go:184] no items to output this cycle
I0320 00:28:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 00:28:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:28:13.409797  543705 memory.go:191] Add success.
I0320 00:28:13.409798  543705 cpu.go:282] Add success.
W0320 00:28:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:28:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:28:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:28:13.420219  543705 net.go:648] Add success.
I0320 00:28:13.423148  543705 net.go:770] primary dev: ETH0
I0320 00:28:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:28:13.423178  543705 net.go:698] Add success.
I0320 00:28:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:28:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:28:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 00:28:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:28:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 00:28:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:28:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:28:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:28:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:28:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:28:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:28:21.688129  543705 disk_info.go:125] begin check local disk info of client
I0320 00:28:21.690577  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:28:21.690592  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275180 0xc0002751c0]
E0320 00:28:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:28:23.409798  543705 memory.go:184] no items to output this cycle
I0320 00:28:23.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:28:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:28:33.409774  543705 memory.go:184] no items to output this cycle
I0320 00:28:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:28:43.409905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:28:43.410015  543705 memory.go:191] Add success.
I0320 00:28:43.410041  543705 cpu.go:282] Add success.
I0320 00:28:43.419732  543705 net.go:648] Add success.
I0320 00:28:43.422475  543705 net.go:770] primary dev: ETH0
I0320 00:28:43.422491  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:28:43.422505  543705 net.go:698] Add success.
I0320 00:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:28:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:28:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:28:53.409772  543705 memory.go:184] no items to output this cycle
I0320 00:28:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:29:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:29:03.409779  543705 memory.go:184] no items to output this cycle
I0320 00:29:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 00:29:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:29:13.409823  543705 memory.go:191] Add success.
I0320 00:29:13.409832  543705 cpu.go:282] Add success.
W0320 00:29:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:29:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:29:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:29:13.420382  543705 net.go:648] Add success.
I0320 00:29:13.423067  543705 net.go:770] primary dev: ETH0
I0320 00:29:13.423083  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:29:13.423097  543705 net.go:698] Add success.
I0320 00:29:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:29:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:29:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 00:29:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:29:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 00:29:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:29:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:29:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:29:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:29:16.472358  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:29:21.691102  543705 disk_info.go:125] begin check local disk info of client
I0320 00:29:21.693531  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:29:21.693537  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ab80 0xc00039ac00]
E0320 00:29:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:29:23.409773  543705 memory.go:184] no items to output this cycle
I0320 00:29:23.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:29:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:29:33.409786  543705 memory.go:184] no items to output this cycle
I0320 00:29:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:29:43.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:29:43.409844  543705 memory.go:191] Add success.
I0320 00:29:43.409850  543705 cpu.go:282] Add success.
I0320 00:29:43.420069  543705 net.go:648] Add success.
I0320 00:29:43.422748  543705 net.go:770] primary dev: ETH0
I0320 00:29:43.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:29:43.422778  543705 net.go:698] Add success.
I0320 00:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:29:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:29:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:29:53.409789  543705 memory.go:184] no items to output this cycle
I0320 00:29:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 00:30:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:30:03.409778  543705 memory.go:184] no items to output this cycle
I0320 00:30:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 00:30:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:30:13.409798  543705 memory.go:191] Add success.
I0320 00:30:13.409799  543705 cpu.go:282] Add success.
W0320 00:30:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:30:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:30:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:30:13.420200  543705 net.go:648] Add success.
I0320 00:30:13.422865  543705 net.go:770] primary dev: ETH0
I0320 00:30:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:30:13.422890  543705 net.go:698] Add success.
I0320 00:30:13.468635  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0b779572-25ac-4c60-8935-a325bbea8bda","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:30:13.468684  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:30:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:30:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:30:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 00:30:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:30:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 00:30:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:30:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:30:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:30:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:30:16.472351  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:30:21.693673  543705 disk_info.go:125] begin check local disk info of client
I0320 00:30:21.696061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:30:21.696067  543705 disk_info.go:196] parse disk info done, disk is : [0xc000341280 0xc0003412c0]
E0320 00:30:23.410478  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:30:23.410495  543705 memory.go:184] no items to output this cycle
I0320 00:30:23.410510  543705 cpu.go:275] no items to output this cycle
E0320 00:30:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:30:33.409789  543705 memory.go:184] no items to output this cycle
I0320 00:30:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 00:30:37.989733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:30:37.989739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:30:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:30:43.410663  543705 memory.go:191] Add success.
I0320 00:30:43.409834  543705 cpu.go:282] Add success.
I0320 00:30:43.420389  543705 net.go:648] Add success.
I0320 00:30:43.423237  543705 net.go:770] primary dev: ETH0
I0320 00:30:43.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:30:43.423262  543705 net.go:698] Add success.
I0320 00:30:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:30:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:30:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:30:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:30:53.409806  543705 memory.go:184] no items to output this cycle
I0320 00:30:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 00:31:03.409981  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:31:03.409997  543705 memory.go:184] no items to output this cycle
I0320 00:31:03.410001  543705 cpu.go:275] no items to output this cycle
E0320 00:31:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:31:13.409838  543705 memory.go:191] Add success.
I0320 00:31:13.409844  543705 cpu.go:282] Add success.
W0320 00:31:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:31:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:31:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:31:13.420206  543705 net.go:648] Add success.
I0320 00:31:13.423087  543705 net.go:770] primary dev: ETH0
I0320 00:31:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:31:13.423117  543705 net.go:698] Add success.
I0320 00:31:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:31:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:31:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 00:31:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:31:14.456514  543705 disk_worker.go:494] system disk:vda1
I0320 00:31:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:31:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:31:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:31:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:31:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:31:21.697175  543705 disk_info.go:125] begin check local disk info of client
I0320 00:31:21.699664  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:31:21.699670  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290340 0xc000290380]
E0320 00:31:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:31:23.409767  543705 memory.go:184] no items to output this cycle
I0320 00:31:23.409773  543705 cpu.go:275] no items to output this cycle
E0320 00:31:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:31:33.409809  543705 memory.go:184] no items to output this cycle
I0320 00:31:33.409834  543705 cpu.go:275] no items to output this cycle
E0320 00:31:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:31:43.409832  543705 memory.go:191] Add success.
I0320 00:31:43.409848  543705 cpu.go:282] Add success.
I0320 00:31:43.420077  543705 net.go:648] Add success.
I0320 00:31:43.423075  543705 net.go:770] primary dev: ETH0
I0320 00:31:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:31:43.423105  543705 net.go:698] Add success.
I0320 00:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:31:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:31:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:31:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:31:53.409777  543705 memory.go:184] no items to output this cycle
I0320 00:31:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:32:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:32:03.409804  543705 memory.go:184] no items to output this cycle
I0320 00:32:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 00:32:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:32:13.409784  543705 memory.go:191] Add success.
I0320 00:32:13.409806  543705 cpu.go:282] Add success.
W0320 00:32:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:32:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:32:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:32:13.420116  543705 net.go:648] Add success.
I0320 00:32:13.422693  543705 net.go:770] primary dev: ETH0
I0320 00:32:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:32:13.422721  543705 net.go:698] Add success.
W0320 00:32:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:32:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 00:32:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:32:14.456983  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:32:14.456993  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:32:14.457000  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:32:14.457025  543705 disk_worker.go:494] system disk:vda1
I0320 00:32:14.457065  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:32:15.456771  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:32:15.456780  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:32:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:32:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:32:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:32:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:32:16.472303  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:32:21.699751  543705 disk_info.go:125] begin check local disk info of client
I0320 00:32:21.702206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:32:21.702212  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475e00 0xc000475e40]
E0320 00:32:23.409947  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:32:23.409960  543705 cpu.go:275] no items to output this cycle
I0320 00:32:23.409972  543705 memory.go:184] no items to output this cycle
E0320 00:32:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:32:33.409764  543705 memory.go:184] no items to output this cycle
I0320 00:32:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:32:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:32:43.409809  543705 memory.go:191] Add success.
I0320 00:32:43.409818  543705 cpu.go:282] Add success.
I0320 00:32:43.420054  543705 net.go:648] Add success.
I0320 00:32:43.423172  543705 net.go:770] primary dev: ETH0
I0320 00:32:43.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:32:43.423208  543705 net.go:698] Add success.
I0320 00:32:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:32:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:32:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:32:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:32:53.409781  543705 cpu.go:275] no items to output this cycle
I0320 00:32:53.409788  543705 memory.go:184] no items to output this cycle
E0320 00:33:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:33:03.409784  543705 cpu.go:275] no items to output this cycle
I0320 00:33:03.409786  543705 memory.go:184] no items to output this cycle
E0320 00:33:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:33:13.409823  543705 memory.go:191] Add success.
I0320 00:33:13.409823  543705 cpu.go:282] Add success.
W0320 00:33:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:33:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:33:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:33:13.420192  543705 net.go:648] Add success.
I0320 00:33:13.423084  543705 net.go:770] primary dev: ETH0
I0320 00:33:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:33:13.423120  543705 net.go:698] Add success.
I0320 00:33:13.995331  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"528bb2df-424f-45d6-8169-9d217a43138e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:33:13.995366  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:33:14.454677  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:33:14.454809  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:33:14.454871  543705 disk_worker.go:708] disk space is not compliant
W0320 00:33:14.454874  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:33:14.456236  543705 disk_worker.go:494] system disk:vda1
I0320 00:33:14.456278  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:33:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:33:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:33:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:33:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:33:21.703160  543705 disk_info.go:125] begin check local disk info of client
I0320 00:33:21.705575  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:33:21.705581  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c100 0xc00034c140]
E0320 00:33:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:33:23.409788  543705 memory.go:184] no items to output this cycle
I0320 00:33:23.409807  543705 cpu.go:275] no items to output this cycle
E0320 00:33:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:33:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 00:33:33.409782  543705 memory.go:184] no items to output this cycle
I0320 00:33:37.992651  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:33:37.992658  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:33:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:33:43.410643  543705 memory.go:191] Add success.
I0320 00:33:43.409804  543705 cpu.go:282] Add success.
I0320 00:33:43.420409  543705 net.go:648] Add success.
I0320 00:33:43.423038  543705 net.go:770] primary dev: ETH0
I0320 00:33:43.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:33:43.423064  543705 net.go:698] Add success.
I0320 00:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:33:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:33:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:33:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:33:53.409769  543705 memory.go:184] no items to output this cycle
I0320 00:33:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 00:34:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:34:03.409773  543705 memory.go:184] no items to output this cycle
I0320 00:34:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:34:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:34:13.409814  543705 memory.go:191] Add success.
I0320 00:34:13.409821  543705 cpu.go:282] Add success.
W0320 00:34:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:34:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:34:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:34:13.420140  543705 net.go:648] Add success.
I0320 00:34:13.423205  543705 net.go:770] primary dev: ETH0
I0320 00:34:13.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:34:13.423234  543705 net.go:698] Add success.
I0320 00:34:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:34:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:34:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 00:34:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:34:14.459205  543705 disk_worker.go:494] system disk:vda1
I0320 00:34:14.459237  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:34:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:34:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:34:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:34:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:34:21.705681  543705 disk_info.go:125] begin check local disk info of client
I0320 00:34:21.708213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:34:21.708220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005384c0 0xc000538500]
E0320 00:34:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:34:23.409776  543705 memory.go:184] no items to output this cycle
I0320 00:34:23.409782  543705 cpu.go:275] no items to output this cycle
E0320 00:34:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:34:33.409765  543705 memory.go:184] no items to output this cycle
I0320 00:34:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:34:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:34:43.409832  543705 memory.go:191] Add success.
I0320 00:34:43.409856  543705 cpu.go:282] Add success.
I0320 00:34:43.420145  543705 net.go:648] Add success.
I0320 00:34:43.423212  543705 net.go:770] primary dev: ETH0
I0320 00:34:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:34:43.423245  543705 net.go:698] Add success.
I0320 00:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:34:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:34:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:34:53.409775  543705 memory.go:184] no items to output this cycle
I0320 00:34:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:35:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:35:03.409803  543705 memory.go:184] no items to output this cycle
I0320 00:35:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:35:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:35:13.409788  543705 memory.go:191] Add success.
I0320 00:35:13.409814  543705 cpu.go:282] Add success.
W0320 00:35:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:35:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:35:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:35:13.420055  543705 net.go:648] Add success.
I0320 00:35:13.423189  543705 net.go:770] primary dev: ETH0
I0320 00:35:13.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:35:13.423215  543705 net.go:698] Add success.
I0320 00:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:35:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:35:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 00:35:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:35:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 00:35:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:35:15.456005  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:35:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:35:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:35:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:35:21.709248  543705 disk_info.go:125] begin check local disk info of client
I0320 00:35:21.711712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:35:21.711718  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003302c0 0xc000330300]
E0320 00:35:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:35:23.409792  543705 memory.go:184] no items to output this cycle
I0320 00:35:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:35:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:35:33.409800  543705 memory.go:184] no items to output this cycle
I0320 00:35:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 00:35:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:35:43.409789  543705 memory.go:191] Add success.
I0320 00:35:43.409818  543705 cpu.go:282] Add success.
I0320 00:35:43.419971  543705 net.go:648] Add success.
I0320 00:35:43.422765  543705 net.go:770] primary dev: ETH0
I0320 00:35:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:35:43.422795  543705 net.go:698] Add success.
I0320 00:35:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:35:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:35:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:35:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:35:53.409767  543705 memory.go:184] no items to output this cycle
I0320 00:35:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:36:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:36:03.409799  543705 memory.go:184] no items to output this cycle
I0320 00:36:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:36:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:36:13.409787  543705 memory.go:191] Add success.
I0320 00:36:13.409790  543705 cpu.go:282] Add success.
W0320 00:36:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:36:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:36:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:36:13.420062  543705 net.go:648] Add success.
I0320 00:36:13.422904  543705 net.go:770] primary dev: ETH0
I0320 00:36:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:36:13.422930  543705 net.go:698] Add success.
I0320 00:36:13.463673  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7de70590-d2d4-4167-a8e3-09df588ddd0e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:36:13.463705  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:36:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:36:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0320 00:36:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:36:14.457160  543705 disk_worker.go:494] system disk:vda1
I0320 00:36:14.457191  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:36:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:36:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:36:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:36:21.713199  543705 disk_info.go:125] begin check local disk info of client
I0320 00:36:21.715695  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:36:21.715702  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d5940 0xc0004d5980]
E0320 00:36:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:36:23.409796  543705 memory.go:184] no items to output this cycle
I0320 00:36:23.409811  543705 cpu.go:275] no items to output this cycle
E0320 00:36:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:36:33.409798  543705 memory.go:184] no items to output this cycle
I0320 00:36:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 00:36:37.993733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:36:37.993739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:36:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:36:43.409798  543705 memory.go:191] Add success.
I0320 00:36:43.409855  543705 cpu.go:282] Add success.
I0320 00:36:43.420254  543705 net.go:648] Add success.
I0320 00:36:43.421165  543705 net.go:770] primary dev: ETH0
I0320 00:36:43.421180  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:36:43.421196  543705 net.go:698] Add success.
I0320 00:36:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:36:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:36:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:36:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:36:53.409771  543705 memory.go:184] no items to output this cycle
I0320 00:36:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 00:37:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:37:03.409800  543705 memory.go:184] no items to output this cycle
I0320 00:37:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:37:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:37:13.409820  543705 memory.go:191] Add success.
I0320 00:37:13.409824  543705 cpu.go:282] Add success.
W0320 00:37:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:37:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:37:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:37:13.420185  543705 net.go:648] Add success.
I0320 00:37:13.423127  543705 net.go:770] primary dev: ETH0
I0320 00:37:13.423141  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:37:13.423153  543705 net.go:698] Add success.
I0320 00:37:13.453662  543705 event_worker.go:152] Polling the log file for events...
W0320 00:37:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:37:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 00:37:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:37:14.455972  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:37:14.455981  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:37:14.455988  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:37:14.456452  543705 disk_worker.go:494] system disk:vda1
I0320 00:37:14.456481  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:37:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:37:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:37:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:37:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:37:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:37:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:37:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:37:21.716281  543705 disk_info.go:125] begin check local disk info of client
I0320 00:37:21.718819  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:37:21.718826  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462240 0xc000462280]
E0320 00:37:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:37:23.409795  543705 memory.go:184] no items to output this cycle
I0320 00:37:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 00:37:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:37:33.409798  543705 memory.go:184] no items to output this cycle
I0320 00:37:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 00:37:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:37:43.409795  543705 memory.go:191] Add success.
I0320 00:37:43.409871  543705 cpu.go:282] Add success.
I0320 00:37:43.420124  543705 net.go:648] Add success.
I0320 00:37:43.423123  543705 net.go:770] primary dev: ETH0
I0320 00:37:43.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:37:43.423160  543705 net.go:698] Add success.
I0320 00:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:37:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:37:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:37:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:37:53.409771  543705 memory.go:184] no items to output this cycle
I0320 00:37:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 00:38:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:38:03.409796  543705 memory.go:184] no items to output this cycle
I0320 00:38:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 00:38:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:38:13.409786  543705 memory.go:191] Add success.
I0320 00:38:13.409788  543705 cpu.go:282] Add success.
W0320 00:38:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:38:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:38:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:38:13.420202  543705 net.go:648] Add success.
I0320 00:38:13.422945  543705 net.go:770] primary dev: ETH0
I0320 00:38:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:38:13.422971  543705 net.go:698] Add success.
I0320 00:38:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:38:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:38:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 00:38:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:38:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 00:38:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:38:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:38:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:38:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:38:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:38:21.718915  543705 disk_info.go:125] begin check local disk info of client
I0320 00:38:21.721379  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:38:21.721385  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508700 0xc000508740]
E0320 00:38:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:38:23.409806  543705 memory.go:184] no items to output this cycle
I0320 00:38:23.409818  543705 cpu.go:275] no items to output this cycle
E0320 00:38:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:38:33.409774  543705 memory.go:184] no items to output this cycle
I0320 00:38:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 00:38:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:38:43.409821  543705 memory.go:191] Add success.
I0320 00:38:43.409824  543705 cpu.go:282] Add success.
I0320 00:38:43.420718  543705 net.go:648] Add success.
I0320 00:38:43.421742  543705 net.go:770] primary dev: ETH0
I0320 00:38:43.421754  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:38:43.421767  543705 net.go:698] Add success.
I0320 00:38:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:38:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:38:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:38:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:38:53.409767  543705 memory.go:184] no items to output this cycle
I0320 00:38:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 00:39:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:39:03.409776  543705 memory.go:184] no items to output this cycle
I0320 00:39:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:39:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:39:13.409821  543705 memory.go:191] Add success.
I0320 00:39:13.409822  543705 cpu.go:282] Add success.
W0320 00:39:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:39:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:39:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:39:13.420315  543705 net.go:648] Add success.
I0320 00:39:13.422998  543705 net.go:770] primary dev: ETH0
I0320 00:39:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:39:13.423025  543705 net.go:698] Add success.
I0320 00:39:13.464217  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"04cbde80-612e-4984-8877-61ddba7656fe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:39:13.464250  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:39:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:39:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:39:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 00:39:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:39:14.456693  543705 disk_worker.go:494] system disk:vda1
I0320 00:39:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:39:16.472430  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:39:21.721684  543705 disk_info.go:125] begin check local disk info of client
I0320 00:39:21.724186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:39:21.724193  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028c780 0xc00028c7c0]
E0320 00:39:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:39:23.409789  543705 memory.go:184] no items to output this cycle
I0320 00:39:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 00:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:39:33.409781  543705 memory.go:184] no items to output this cycle
I0320 00:39:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 00:39:37.996676  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:39:37.996683  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:39:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:39:43.409788  543705 memory.go:191] Add success.
I0320 00:39:43.409850  543705 cpu.go:282] Add success.
I0320 00:39:43.420085  543705 net.go:648] Add success.
I0320 00:39:43.421044  543705 net.go:770] primary dev: ETH0
I0320 00:39:43.421061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:39:43.421074  543705 net.go:698] Add success.
I0320 00:39:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:39:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:39:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:39:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:39:53.409765  543705 memory.go:184] no items to output this cycle
I0320 00:39:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 00:40:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:40:03.409797  543705 memory.go:184] no items to output this cycle
I0320 00:40:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 00:40:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:40:13.409814  543705 memory.go:191] Add success.
I0320 00:40:13.409818  543705 cpu.go:282] Add success.
W0320 00:40:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:40:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:40:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:40:13.420057  543705 net.go:648] Add success.
I0320 00:40:13.422765  543705 net.go:770] primary dev: ETH0
I0320 00:40:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:40:13.422789  543705 net.go:698] Add success.
I0320 00:40:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:40:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:40:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 00:40:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:40:14.456633  543705 disk_worker.go:494] system disk:vda1
I0320 00:40:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:40:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:40:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:40:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:40:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:40:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:40:21.724278  543705 disk_info.go:125] begin check local disk info of client
I0320 00:40:21.726879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:40:21.726887  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358100 0xc000358140]
E0320 00:40:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:40:23.409759  543705 memory.go:184] no items to output this cycle
I0320 00:40:23.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:40:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:40:33.409778  543705 memory.go:184] no items to output this cycle
I0320 00:40:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 00:40:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:40:43.409825  543705 cpu.go:282] Add success.
I0320 00:40:43.409839  543705 memory.go:191] Add success.
I0320 00:40:43.420087  543705 net.go:648] Add success.
I0320 00:40:43.423050  543705 net.go:770] primary dev: ETH0
I0320 00:40:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:40:43.423076  543705 net.go:698] Add success.
I0320 00:40:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:40:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:40:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:40:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:40:53.409777  543705 memory.go:184] no items to output this cycle
I0320 00:40:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:41:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:41:03.409782  543705 memory.go:184] no items to output this cycle
I0320 00:41:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:41:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:41:13.409826  543705 memory.go:191] Add success.
I0320 00:41:13.409828  543705 cpu.go:282] Add success.
W0320 00:41:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:41:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:41:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:41:13.420364  543705 net.go:648] Add success.
I0320 00:41:13.423333  543705 net.go:770] primary dev: ETH0
I0320 00:41:13.423349  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:41:13.423363  543705 net.go:698] Add success.
I0320 00:41:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:41:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:41:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0320 00:41:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:41:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 00:41:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:41:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:41:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:41:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:41:16.472459  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:41:21.728279  543705 disk_info.go:125] begin check local disk info of client
I0320 00:41:21.730812  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:41:21.730818  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498400 0xc000498440]
E0320 00:41:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:41:23.409758  543705 memory.go:184] no items to output this cycle
I0320 00:41:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 00:41:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:41:33.409769  543705 memory.go:184] no items to output this cycle
I0320 00:41:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:41:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:41:43.409805  543705 memory.go:191] Add success.
I0320 00:41:43.409851  543705 cpu.go:282] Add success.
I0320 00:41:43.420051  543705 net.go:648] Add success.
I0320 00:41:43.423107  543705 net.go:770] primary dev: ETH0
I0320 00:41:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:41:43.423138  543705 net.go:698] Add success.
I0320 00:41:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:41:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:41:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:41:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:41:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 00:41:53.409782  543705 memory.go:184] no items to output this cycle
E0320 00:42:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:42:03.409770  543705 memory.go:184] no items to output this cycle
I0320 00:42:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:42:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:42:13.409815  543705 memory.go:191] Add success.
I0320 00:42:13.409817  543705 cpu.go:282] Add success.
W0320 00:42:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:42:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:42:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:42:13.420153  543705 net.go:648] Add success.
I0320 00:42:13.422938  543705 net.go:770] primary dev: ETH0
I0320 00:42:13.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:42:13.422967  543705 net.go:698] Add success.
I0320 00:42:13.469032  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"20c14cf0-f869-4f35-a2e7-b4d441228f3a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:42:13.469065  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 00:42:14.455262  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:42:14.455280  543705 disk_worker.go:708] disk space is not compliant
W0320 00:42:14.455284  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:42:14.456228  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:42:14.456238  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:42:14.456244  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:42:14.457198  543705 disk_worker.go:494] system disk:vda1
I0320 00:42:14.457234  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:42:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:42:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:42:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:42:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:42:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:42:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:42:16.472354  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:42:21.731348  543705 disk_info.go:125] begin check local disk info of client
I0320 00:42:21.733835  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:42:21.733842  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394640 0xc000394680]
E0320 00:42:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:42:23.409762  543705 memory.go:184] no items to output this cycle
I0320 00:42:23.409786  543705 cpu.go:275] no items to output this cycle
E0320 00:42:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:42:33.409769  543705 memory.go:184] no items to output this cycle
I0320 00:42:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 00:42:37.997729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:42:37.997735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:42:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:42:43.410655  543705 memory.go:191] Add success.
I0320 00:42:43.409859  543705 cpu.go:282] Add success.
I0320 00:42:43.420421  543705 net.go:648] Add success.
I0320 00:42:43.423137  543705 net.go:770] primary dev: ETH0
I0320 00:42:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:42:43.423164  543705 net.go:698] Add success.
I0320 00:42:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:42:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:42:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:42:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:42:53.409780  543705 memory.go:184] no items to output this cycle
I0320 00:42:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:43:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:43:03.409771  543705 memory.go:184] no items to output this cycle
I0320 00:43:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 00:43:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:43:13.409816  543705 memory.go:191] Add success.
I0320 00:43:13.409822  543705 cpu.go:282] Add success.
W0320 00:43:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:43:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:43:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:43:13.420293  543705 net.go:648] Add success.
I0320 00:43:13.423139  543705 net.go:770] primary dev: ETH0
I0320 00:43:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:43:13.423165  543705 net.go:698] Add success.
I0320 00:43:14.454387  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:43:14.454533  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:43:14.454615  543705 disk_worker.go:708] disk space is not compliant
W0320 00:43:14.454618  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:43:14.455967  543705 disk_worker.go:494] system disk:vda1
I0320 00:43:14.456000  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:43:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:43:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:43:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:43:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:43:21.734728  543705 disk_info.go:125] begin check local disk info of client
I0320 00:43:21.737203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:43:21.737210  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e280 0xc00037e2c0]
E0320 00:43:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:43:23.409767  543705 memory.go:184] no items to output this cycle
I0320 00:43:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:43:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:43:33.409765  543705 memory.go:184] no items to output this cycle
I0320 00:43:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:43:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:43:43.409813  543705 memory.go:191] Add success.
I0320 00:43:43.409817  543705 cpu.go:282] Add success.
I0320 00:43:43.419908  543705 net.go:648] Add success.
I0320 00:43:43.422509  543705 net.go:770] primary dev: ETH0
I0320 00:43:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:43:43.422537  543705 net.go:698] Add success.
I0320 00:43:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:43:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:43:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:43:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:43:53.409780  543705 memory.go:184] no items to output this cycle
I0320 00:43:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 00:44:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:44:03.409799  543705 memory.go:184] no items to output this cycle
I0320 00:44:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 00:44:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:44:13.409804  543705 memory.go:191] Add success.
I0320 00:44:13.409812  543705 cpu.go:282] Add success.
W0320 00:44:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:44:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:44:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:44:13.420116  543705 net.go:648] Add success.
I0320 00:44:13.423049  543705 net.go:770] primary dev: ETH0
I0320 00:44:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:44:13.423074  543705 net.go:698] Add success.
I0320 00:44:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:44:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:44:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 00:44:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:44:14.456611  543705 disk_worker.go:494] system disk:vda1
I0320 00:44:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:44:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:44:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:44:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:44:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:44:21.737679  543705 disk_info.go:125] begin check local disk info of client
I0320 00:44:21.740073  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:44:21.740079  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c180 0xc00048c1c0]
E0320 00:44:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:44:23.409790  543705 memory.go:184] no items to output this cycle
I0320 00:44:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 00:44:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:44:33.409783  543705 memory.go:184] no items to output this cycle
I0320 00:44:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 00:44:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:44:43.409793  543705 memory.go:191] Add success.
I0320 00:44:43.409794  543705 cpu.go:282] Add success.
I0320 00:44:43.419889  543705 net.go:648] Add success.
I0320 00:44:43.422919  543705 net.go:770] primary dev: ETH0
I0320 00:44:43.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:44:43.422949  543705 net.go:698] Add success.
I0320 00:44:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:44:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:44:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:44:53.409793  543705 memory.go:184] no items to output this cycle
I0320 00:44:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:45:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:45:03.409786  543705 memory.go:184] no items to output this cycle
I0320 00:45:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 00:45:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:45:13.409825  543705 memory.go:191] Add success.
I0320 00:45:13.409832  543705 cpu.go:282] Add success.
W0320 00:45:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:45:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:45:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:45:13.420155  543705 net.go:648] Add success.
I0320 00:45:13.423234  543705 net.go:770] primary dev: ETH0
I0320 00:45:13.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:45:13.423269  543705 net.go:698] Add success.
I0320 00:45:13.469243  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c9475e8-b8a3-45ff-9676-a24e85778715","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:45:13.469278  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:45:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:45:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:45:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 00:45:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:45:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 00:45:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:45:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:45:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:45:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:45:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:45:21.741401  543705 disk_info.go:125] begin check local disk info of client
I0320 00:45:21.743876  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:45:21.743882  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2080 0xc0003f20c0]
E0320 00:45:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:45:23.409780  543705 memory.go:184] no items to output this cycle
I0320 00:45:23.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:45:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:45:33.409805  543705 memory.go:184] no items to output this cycle
I0320 00:45:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 00:45:38.000692  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:45:38.000699  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:45:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:45:43.410770  543705 memory.go:191] Add success.
I0320 00:45:43.409809  543705 cpu.go:282] Add success.
I0320 00:45:43.420495  543705 net.go:648] Add success.
I0320 00:45:43.423298  543705 net.go:770] primary dev: ETH0
I0320 00:45:43.423312  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:45:43.423326  543705 net.go:698] Add success.
I0320 00:45:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:45:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:45:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:45:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:45:53.409783  543705 memory.go:184] no items to output this cycle
I0320 00:45:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 00:46:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:46:03.409789  543705 memory.go:184] no items to output this cycle
I0320 00:46:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:46:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:46:13.409834  543705 memory.go:191] Add success.
I0320 00:46:13.409842  543705 cpu.go:282] Add success.
W0320 00:46:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:46:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:46:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:46:13.420063  543705 net.go:648] Add success.
I0320 00:46:13.422720  543705 net.go:770] primary dev: ETH0
I0320 00:46:13.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:46:13.422750  543705 net.go:698] Add success.
I0320 00:46:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:46:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:46:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 00:46:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:46:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 00:46:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:46:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:46:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:46:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:46:21.745366  543705 disk_info.go:125] begin check local disk info of client
I0320 00:46:21.747846  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:46:21.747852  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a8c0 0xc00034a900]
E0320 00:46:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:46:23.409772  543705 memory.go:184] no items to output this cycle
I0320 00:46:23.409777  543705 cpu.go:275] no items to output this cycle
E0320 00:46:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:46:33.409785  543705 memory.go:184] no items to output this cycle
I0320 00:46:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 00:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:46:43.409791  543705 memory.go:191] Add success.
I0320 00:46:43.409812  543705 cpu.go:282] Add success.
I0320 00:46:43.419966  543705 net.go:648] Add success.
I0320 00:46:43.422688  543705 net.go:770] primary dev: ETH0
I0320 00:46:43.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:46:43.422713  543705 net.go:698] Add success.
I0320 00:46:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:46:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:46:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:46:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:46:53.409773  543705 memory.go:184] no items to output this cycle
I0320 00:46:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 00:47:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:47:03.409800  543705 memory.go:184] no items to output this cycle
I0320 00:47:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 00:47:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:47:13.409793  543705 memory.go:191] Add success.
I0320 00:47:13.409814  543705 cpu.go:282] Add success.
W0320 00:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:47:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:47:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:47:13.420392  543705 net.go:648] Add success.
I0320 00:47:13.423081  543705 net.go:770] primary dev: ETH0
I0320 00:47:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:47:13.423107  543705 net.go:698] Add success.
I0320 00:47:13.453666  543705 event_worker.go:152] Polling the log file for events...
W0320 00:47:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:47:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 00:47:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0320 00:47:14.455983  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:47:14.455992  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:47:14.455998  543705 custom_config.go:64] query custom config with name: gpu
I0320 00:47:14.456460  543705 disk_worker.go:494] system disk:vda1
I0320 00:47:14.456490  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:47:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:47:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:47:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:47:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:47:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:47:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:47:16.472341  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:47:21.749379  543705 disk_info.go:125] begin check local disk info of client
I0320 00:47:21.751809  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:47:21.751814  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037cb00 0xc00037cb40]
E0320 00:47:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:47:23.409784  543705 memory.go:184] no items to output this cycle
I0320 00:47:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:47:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:47:33.409777  543705 memory.go:184] no items to output this cycle
I0320 00:47:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 00:47:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:47:43.409808  543705 memory.go:191] Add success.
I0320 00:47:43.409814  543705 cpu.go:282] Add success.
I0320 00:47:43.419973  543705 net.go:648] Add success.
I0320 00:47:43.422765  543705 net.go:770] primary dev: ETH0
I0320 00:47:43.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:47:43.422789  543705 net.go:698] Add success.
I0320 00:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:47:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:47:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:47:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:47:53.409777  543705 memory.go:184] no items to output this cycle
I0320 00:47:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:48:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:48:03.409800  543705 memory.go:184] no items to output this cycle
I0320 00:48:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:48:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:48:13.409830  543705 memory.go:191] Add success.
I0320 00:48:13.409830  543705 cpu.go:282] Add success.
W0320 00:48:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:48:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:48:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:48:13.420210  543705 net.go:648] Add success.
I0320 00:48:13.422976  543705 net.go:770] primary dev: ETH0
I0320 00:48:13.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:48:13.423002  543705 net.go:698] Add success.
I0320 00:48:13.469476  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ea3bb68-61fa-4c56-9321-4f0153a80e3a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:48:13.469519  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:48:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:48:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:48:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 00:48:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:48:14.456775  543705 disk_worker.go:494] system disk:vda1
I0320 00:48:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:48:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:48:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:48:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:48:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:48:21.753398  543705 disk_info.go:125] begin check local disk info of client
I0320 00:48:21.755812  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:48:21.755818  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348380 0xc0003483c0]
E0320 00:48:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:48:23.409816  543705 memory.go:184] no items to output this cycle
I0320 00:48:23.409822  543705 cpu.go:275] no items to output this cycle
E0320 00:48:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:48:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 00:48:33.409792  543705 memory.go:184] no items to output this cycle
I0320 00:48:38.001736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:48:38.001742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:48:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:48:43.410596  543705 memory.go:191] Add success.
I0320 00:48:43.409820  543705 cpu.go:282] Add success.
I0320 00:48:43.420348  543705 net.go:648] Add success.
I0320 00:48:43.422966  543705 net.go:770] primary dev: ETH0
I0320 00:48:43.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:48:43.422991  543705 net.go:698] Add success.
I0320 00:48:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:48:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:48:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:48:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:48:53.409790  543705 memory.go:184] no items to output this cycle
I0320 00:48:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:49:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:49:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 00:49:03.409788  543705 memory.go:184] no items to output this cycle
W0320 00:49:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:49:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:49:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 00:49:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:49:13.409821  543705 memory.go:191] Add success.
I0320 00:49:13.409829  543705 cpu.go:282] Add success.
I0320 00:49:13.420140  543705 net.go:648] Add success.
I0320 00:49:13.422923  543705 net.go:770] primary dev: ETH0
I0320 00:49:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:49:13.422950  543705 net.go:698] Add success.
I0320 00:49:14.453983  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:49:14.454233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:49:14.454242  543705 disk_worker.go:708] disk space is not compliant
W0320 00:49:14.454245  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:49:14.456066  543705 disk_worker.go:494] system disk:vda1
I0320 00:49:14.456105  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:49:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:49:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:49:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:49:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:49:21.755904  543705 disk_info.go:125] begin check local disk info of client
I0320 00:49:21.758363  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:49:21.758369  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034f280 0xc00034f2c0]
E0320 00:49:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:49:23.409789  543705 memory.go:184] no items to output this cycle
I0320 00:49:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 00:49:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:49:33.409775  543705 cpu.go:275] no items to output this cycle
I0320 00:49:33.409786  543705 memory.go:184] no items to output this cycle
E0320 00:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:49:43.409810  543705 memory.go:191] Add success.
I0320 00:49:43.409819  543705 cpu.go:282] Add success.
I0320 00:49:43.419953  543705 net.go:648] Add success.
I0320 00:49:43.422635  543705 net.go:770] primary dev: ETH0
I0320 00:49:43.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:49:43.422668  543705 net.go:698] Add success.
I0320 00:49:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:49:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:49:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:49:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:49:53.409784  543705 memory.go:184] no items to output this cycle
I0320 00:49:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 00:50:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:50:03.409779  543705 memory.go:184] no items to output this cycle
I0320 00:50:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 00:50:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:50:13.409782  543705 memory.go:191] Add success.
W0320 00:50:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 00:50:13.409815  543705 cpu.go:282] Add success.
W0320 00:50:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:50:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:50:13.420217  543705 net.go:648] Add success.
I0320 00:50:13.422982  543705 net.go:770] primary dev: ETH0
I0320 00:50:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:50:13.423007  543705 net.go:698] Add success.
I0320 00:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:50:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:50:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 00:50:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:50:14.456831  543705 disk_worker.go:494] system disk:vda1
I0320 00:50:14.456874  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:50:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:50:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:50:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:50:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:50:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:50:21.759482  543705 disk_info.go:125] begin check local disk info of client
I0320 00:50:21.761949  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:50:21.761955  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003662c0 0xc000366300]
E0320 00:50:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:50:23.409758  543705 memory.go:184] no items to output this cycle
I0320 00:50:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 00:50:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:50:33.409765  543705 memory.go:184] no items to output this cycle
I0320 00:50:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:50:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:50:43.409794  543705 memory.go:191] Add success.
I0320 00:50:43.409803  543705 cpu.go:282] Add success.
I0320 00:50:43.419970  543705 net.go:648] Add success.
I0320 00:50:43.422731  543705 net.go:770] primary dev: ETH0
I0320 00:50:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:50:43.422761  543705 net.go:698] Add success.
I0320 00:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:50:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:50:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:50:53.409770  543705 memory.go:184] no items to output this cycle
I0320 00:50:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 00:51:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:51:03.409800  543705 memory.go:184] no items to output this cycle
I0320 00:51:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 00:51:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:51:13.409795  543705 memory.go:191] Add success.
I0320 00:51:13.409794  543705 cpu.go:282] Add success.
W0320 00:51:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:51:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:51:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:51:13.420112  543705 net.go:648] Add success.
I0320 00:51:13.423146  543705 net.go:770] primary dev: ETH0
I0320 00:51:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:51:13.423173  543705 net.go:698] Add success.
I0320 00:51:13.468051  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1d52854f-90cb-4e38-b882-fccf4a1d49d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:51:13.468083  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:51:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:51:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:51:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 00:51:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:51:14.456511  543705 disk_worker.go:494] system disk:vda1
I0320 00:51:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:51:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:51:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:51:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:51:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:51:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:51:21.763443  543705 disk_info.go:125] begin check local disk info of client
I0320 00:51:21.765882  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:51:21.765888  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2a00 0xc0003b2a40]
E0320 00:51:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:51:23.409795  543705 memory.go:184] no items to output this cycle
I0320 00:51:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 00:51:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:51:33.409768  543705 memory.go:184] no items to output this cycle
I0320 00:51:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 00:51:38.004709  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:51:38.004714  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:51:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:51:43.410724  543705 memory.go:191] Add success.
I0320 00:51:43.409810  543705 cpu.go:282] Add success.
I0320 00:51:43.420411  543705 net.go:648] Add success.
I0320 00:51:43.423289  543705 net.go:770] primary dev: ETH0
I0320 00:51:43.423303  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:51:43.423316  543705 net.go:698] Add success.
I0320 00:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:51:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:51:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:51:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:51:53.409773  543705 memory.go:184] no items to output this cycle
I0320 00:51:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:52:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:52:03.409801  543705 memory.go:184] no items to output this cycle
I0320 00:52:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 00:52:13.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:52:13.409929  543705 cpu.go:282] Add success.
I0320 00:52:13.409931  543705 memory.go:191] Add success.
W0320 00:52:13.409973  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:52:13.409995  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:52:13.410000  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:52:13.419750  543705 net.go:648] Add success.
I0320 00:52:13.422444  543705 net.go:770] primary dev: ETH0
I0320 00:52:13.422458  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:52:13.422469  543705 net.go:698] Add success.
W0320 00:52:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:52:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 00:52:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:52:14.456823  543705 disk_worker.go:494] system disk:vda1
I0320 00:52:14.456863  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:52:14.457145  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:52:14.457153  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:52:14.457158  543705 custom_config.go:64] query custom config with name: gpu
E0320 00:52:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:52:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:52:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:52:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:52:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:52:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:52:16.472307  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:52:21.766729  543705 disk_info.go:125] begin check local disk info of client
I0320 00:52:21.769111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:52:21.769117  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8440 0xc0003c8480]
E0320 00:52:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:52:23.409788  543705 memory.go:184] no items to output this cycle
I0320 00:52:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 00:52:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:52:33.409786  543705 memory.go:184] no items to output this cycle
I0320 00:52:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 00:52:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:52:43.409797  543705 cpu.go:282] Add success.
I0320 00:52:43.409800  543705 memory.go:191] Add success.
I0320 00:52:43.419844  543705 net.go:648] Add success.
I0320 00:52:43.422568  543705 net.go:770] primary dev: ETH0
I0320 00:52:43.422582  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:52:43.422594  543705 net.go:698] Add success.
I0320 00:52:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:52:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:52:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:52:53.409788  543705 memory.go:184] no items to output this cycle
I0320 00:52:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 00:53:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:53:03.409782  543705 memory.go:184] no items to output this cycle
I0320 00:53:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:53:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:53:13.409812  543705 memory.go:191] Add success.
I0320 00:53:13.409813  543705 cpu.go:282] Add success.
W0320 00:53:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:53:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:53:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:53:13.420225  543705 net.go:648] Add success.
I0320 00:53:13.423043  543705 net.go:770] primary dev: ETH0
I0320 00:53:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:53:13.423072  543705 net.go:698] Add success.
I0320 00:53:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:53:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:53:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 00:53:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:53:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 00:53:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:53:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:53:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:53:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:53:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:53:21.769676  543705 disk_info.go:125] begin check local disk info of client
I0320 00:53:21.772163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:53:21.772170  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003673c0 0xc000367400]
E0320 00:53:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:53:23.409773  543705 memory.go:184] no items to output this cycle
I0320 00:53:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 00:53:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:53:33.409805  543705 memory.go:184] no items to output this cycle
I0320 00:53:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 00:53:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:53:43.409788  543705 memory.go:191] Add success.
I0320 00:53:43.409812  543705 cpu.go:282] Add success.
I0320 00:53:43.419884  543705 net.go:648] Add success.
I0320 00:53:43.423084  543705 net.go:770] primary dev: ETH0
I0320 00:53:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:53:43.423114  543705 net.go:698] Add success.
I0320 00:53:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:53:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:53:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:53:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:53:53.409774  543705 memory.go:184] no items to output this cycle
I0320 00:53:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 00:54:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:54:03.409784  543705 memory.go:184] no items to output this cycle
I0320 00:54:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 00:54:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:54:13.409802  543705 memory.go:191] Add success.
I0320 00:54:13.409803  543705 cpu.go:282] Add success.
W0320 00:54:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:54:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:54:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:54:13.420366  543705 net.go:648] Add success.
I0320 00:54:13.423251  543705 net.go:770] primary dev: ETH0
I0320 00:54:13.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:54:13.423277  543705 net.go:698] Add success.
I0320 00:54:13.468184  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7bf02f9d-b0ea-4856-ab65-f23e8f762015","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:54:13.468216  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 00:54:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:54:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:54:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 00:54:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:54:14.456491  543705 disk_worker.go:494] system disk:vda1
I0320 00:54:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:54:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:54:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:54:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:54:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:54:21.773504  543705 disk_info.go:125] begin check local disk info of client
I0320 00:54:21.775931  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:54:21.775937  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4580 0xc0000c45c0]
E0320 00:54:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:54:23.409790  543705 memory.go:184] no items to output this cycle
I0320 00:54:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 00:54:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:54:33.409798  543705 memory.go:184] no items to output this cycle
I0320 00:54:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 00:54:38.005739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:54:38.005745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:54:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:54:43.410597  543705 memory.go:191] Add success.
I0320 00:54:43.409786  543705 cpu.go:282] Add success.
I0320 00:54:43.420277  543705 net.go:648] Add success.
I0320 00:54:43.422805  543705 net.go:770] primary dev: ETH0
I0320 00:54:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:54:43.422831  543705 net.go:698] Add success.
I0320 00:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:54:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:54:53.409771  543705 memory.go:184] no items to output this cycle
I0320 00:54:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 00:55:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:55:03.409840  543705 memory.go:184] no items to output this cycle
I0320 00:55:03.409928  543705 cpu.go:275] no items to output this cycle
E0320 00:55:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:55:13.409790  543705 memory.go:191] Add success.
W0320 00:55:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 00:55:13.409815  543705 cpu.go:282] Add success.
W0320 00:55:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:55:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:55:13.420145  543705 net.go:648] Add success.
I0320 00:55:13.422983  543705 net.go:770] primary dev: ETH0
I0320 00:55:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:55:13.423008  543705 net.go:698] Add success.
I0320 00:55:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:55:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:55:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 00:55:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:55:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 00:55:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:55:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:55:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:55:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:55:21.776019  543705 disk_info.go:125] begin check local disk info of client
I0320 00:55:21.778442  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:55:21.778448  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330380 0xc0003303c0]
E0320 00:55:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:55:23.409782  543705 memory.go:184] no items to output this cycle
I0320 00:55:23.409793  543705 cpu.go:275] no items to output this cycle
E0320 00:55:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:55:33.409797  543705 memory.go:184] no items to output this cycle
I0320 00:55:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 00:55:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:55:43.409795  543705 memory.go:191] Add success.
I0320 00:55:43.409796  543705 cpu.go:282] Add success.
I0320 00:55:43.419871  543705 net.go:648] Add success.
I0320 00:55:43.422945  543705 net.go:770] primary dev: ETH0
I0320 00:55:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:55:43.422970  543705 net.go:698] Add success.
I0320 00:55:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:55:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:55:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:55:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:55:53.409786  543705 memory.go:184] no items to output this cycle
I0320 00:55:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 00:56:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:56:03.409899  543705 memory.go:184] no items to output this cycle
I0320 00:56:03.409926  543705 cpu.go:275] no items to output this cycle
E0320 00:56:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:56:13.409817  543705 memory.go:191] Add success.
I0320 00:56:13.409820  543705 cpu.go:282] Add success.
W0320 00:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:56:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:56:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:56:13.420434  543705 net.go:648] Add success.
I0320 00:56:13.423580  543705 net.go:770] primary dev: ETH0
I0320 00:56:13.423593  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:56:13.423605  543705 net.go:698] Add success.
I0320 00:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:56:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:56:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 00:56:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:56:14.456505  543705 disk_worker.go:494] system disk:vda1
I0320 00:56:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:56:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:56:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:56:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:56:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:56:21.779540  543705 disk_info.go:125] begin check local disk info of client
I0320 00:56:21.781944  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:56:21.781950  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af40 0xc00007af80]
E0320 00:56:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:56:23.409784  543705 memory.go:184] no items to output this cycle
I0320 00:56:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 00:56:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:56:33.409780  543705 memory.go:184] no items to output this cycle
I0320 00:56:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 00:56:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:56:43.409785  543705 memory.go:191] Add success.
I0320 00:56:43.409804  543705 cpu.go:282] Add success.
I0320 00:56:43.419858  543705 net.go:648] Add success.
I0320 00:56:43.422281  543705 net.go:770] primary dev: ETH0
I0320 00:56:43.422296  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:56:43.422309  543705 net.go:698] Add success.
I0320 00:56:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:56:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:56:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:56:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:56:53.409767  543705 memory.go:184] no items to output this cycle
I0320 00:56:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 00:57:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:57:03.409784  543705 memory.go:184] no items to output this cycle
I0320 00:57:03.409787  543705 cpu.go:275] no items to output this cycle
W0320 00:57:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:57:13.409739  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:57:13.409746  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:57:13.409812  543705 cpu.go:282] Add success.
E0320 00:57:13.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:57:13.409871  543705 memory.go:191] Add success.
I0320 00:57:13.420134  543705 net.go:648] Add success.
I0320 00:57:13.429000  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 00:57:13.429091  543705 net.go:770] primary dev: ETH0
I0320 00:57:13.429106  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:57:13.429119  543705 net.go:698] Add success.
I0320 00:57:13.453677  543705 event_worker.go:152] Polling the log file for events...
I0320 00:57:13.464725  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49cb3540-98de-4324-b70f-3c85bfac55ad","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 00:57:13.464758  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 00:57:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:57:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 00:57:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:57:14.456921  543705 disk_worker.go:494] system disk:vda1
I0320 00:57:14.456961  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 00:57:14.457088  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 00:57:14.457095  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 00:57:14.457098  543705 custom_config.go:64] query custom config with name: gpu
E0320 00:57:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 00:57:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:57:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 00:57:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 00:57:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:57:16.457976  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:57:16.472313  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:57:21.783552  543705 disk_info.go:125] begin check local disk info of client
I0320 00:57:21.786063  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:57:21.786069  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e40 0xc0000c5e80]
E0320 00:57:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:57:23.409766  543705 cpu.go:275] no items to output this cycle
I0320 00:57:23.409776  543705 memory.go:184] no items to output this cycle
E0320 00:57:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:57:33.409795  543705 memory.go:184] no items to output this cycle
I0320 00:57:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 00:57:38.008728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 00:57:38.008734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 00:57:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:57:43.410672  543705 memory.go:191] Add success.
I0320 00:57:43.409799  543705 cpu.go:282] Add success.
I0320 00:57:43.420344  543705 net.go:648] Add success.
I0320 00:57:43.423260  543705 net.go:770] primary dev: ETH0
I0320 00:57:43.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:57:43.423284  543705 net.go:698] Add success.
I0320 00:57:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:57:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:57:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:57:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:57:53.409809  543705 memory.go:184] no items to output this cycle
I0320 00:57:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 00:58:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:58:03.409776  543705 memory.go:184] no items to output this cycle
I0320 00:58:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 00:58:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:58:13.409798  543705 memory.go:191] Add success.
I0320 00:58:13.409804  543705 cpu.go:282] Add success.
W0320 00:58:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:58:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:58:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:58:13.420190  543705 net.go:648] Add success.
I0320 00:58:13.422770  543705 net.go:770] primary dev: ETH0
I0320 00:58:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:58:13.422795  543705 net.go:698] Add success.
I0320 00:58:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:58:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:58:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 00:58:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:58:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 00:58:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:58:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:58:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:58:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:58:21.787572  543705 disk_info.go:125] begin check local disk info of client
I0320 00:58:21.790020  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:58:21.790026  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c5740 0xc0004c5780]
E0320 00:58:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:58:23.409770  543705 memory.go:184] no items to output this cycle
I0320 00:58:23.409807  543705 cpu.go:275] no items to output this cycle
E0320 00:58:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:58:33.409798  543705 memory.go:184] no items to output this cycle
I0320 00:58:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 00:58:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:58:43.409816  543705 memory.go:191] Add success.
I0320 00:58:43.409826  543705 cpu.go:282] Add success.
I0320 00:58:43.419888  543705 net.go:648] Add success.
I0320 00:58:43.422717  543705 net.go:770] primary dev: ETH0
I0320 00:58:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:58:43.422741  543705 net.go:698] Add success.
I0320 00:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:58:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:58:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:58:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:58:53.409798  543705 memory.go:184] no items to output this cycle
I0320 00:58:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 00:59:03.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:59:03.409869  543705 memory.go:184] no items to output this cycle
I0320 00:59:03.409953  543705 cpu.go:275] no items to output this cycle
E0320 00:59:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:59:13.409827  543705 memory.go:191] Add success.
I0320 00:59:13.409838  543705 cpu.go:282] Add success.
W0320 00:59:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 00:59:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 00:59:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 00:59:13.420358  543705 net.go:648] Add success.
I0320 00:59:13.423213  543705 net.go:770] primary dev: ETH0
I0320 00:59:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:59:13.423238  543705 net.go:698] Add success.
I0320 00:59:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 00:59:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 00:59:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 00:59:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 00:59:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 00:59:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 00:59:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 00:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:59:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:59:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 00:59:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 00:59:21.791637  543705 disk_info.go:125] begin check local disk info of client
I0320 00:59:21.794135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 00:59:21.794141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aab80 0xc0001aabc0]
E0320 00:59:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:59:23.409769  543705 memory.go:184] no items to output this cycle
I0320 00:59:23.409780  543705 cpu.go:275] no items to output this cycle
E0320 00:59:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:59:33.409774  543705 memory.go:184] no items to output this cycle
I0320 00:59:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 00:59:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:59:43.409790  543705 memory.go:191] Add success.
I0320 00:59:43.409792  543705 cpu.go:282] Add success.
I0320 00:59:43.419958  543705 net.go:648] Add success.
I0320 00:59:43.422890  543705 net.go:770] primary dev: ETH0
I0320 00:59:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 00:59:43.422916  543705 net.go:698] Add success.
I0320 00:59:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 00:59:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 00:59:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 00:59:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 00:59:53.409781  543705 memory.go:184] no items to output this cycle
I0320 00:59:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 01:00:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:00:03.409790  543705 memory.go:184] no items to output this cycle
I0320 01:00:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 01:00:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:00:13.409794  543705 memory.go:191] Add success.
I0320 01:00:13.409793  543705 cpu.go:282] Add success.
W0320 01:00:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:00:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:00:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:00:13.420217  543705 net.go:648] Add success.
I0320 01:00:13.423111  543705 net.go:770] primary dev: ETH0
I0320 01:00:13.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:00:13.423138  543705 net.go:698] Add success.
I0320 01:00:13.463681  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6d79f950-0394-4358-9d6e-b599bfafa392","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:00:13.463714  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:00:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:00:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:00:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 01:00:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:00:14.456505  543705 disk_worker.go:494] system disk:vda1
I0320 01:00:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:00:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:00:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:00:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:00:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:00:21.795614  543705 disk_info.go:125] begin check local disk info of client
I0320 01:00:21.798062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:00:21.798068  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bd80 0xc00007bdc0]
E0320 01:00:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:00:23.409784  543705 memory.go:184] no items to output this cycle
I0320 01:00:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:00:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:00:33.409800  543705 memory.go:184] no items to output this cycle
I0320 01:00:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 01:00:38.009739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:00:38.009745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:00:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:00:43.410811  543705 memory.go:191] Add success.
I0320 01:00:43.409818  543705 cpu.go:282] Add success.
I0320 01:00:43.420485  543705 net.go:648] Add success.
I0320 01:00:43.423104  543705 net.go:770] primary dev: ETH0
I0320 01:00:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:00:43.423131  543705 net.go:698] Add success.
I0320 01:00:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:00:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:00:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:00:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:00:53.409774  543705 memory.go:184] no items to output this cycle
I0320 01:00:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 01:01:03.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:01:03.409857  543705 memory.go:184] no items to output this cycle
I0320 01:01:03.409989  543705 cpu.go:275] no items to output this cycle
E0320 01:01:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:01:13.409799  543705 memory.go:191] Add success.
I0320 01:01:13.409815  543705 cpu.go:282] Add success.
W0320 01:01:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:01:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:01:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:01:13.420148  543705 net.go:648] Add success.
I0320 01:01:13.422968  543705 net.go:770] primary dev: ETH0
I0320 01:01:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:01:13.422993  543705 net.go:698] Add success.
I0320 01:01:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:01:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:01:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 01:01:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:01:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 01:01:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:01:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:01:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:01:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:01:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:01:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:01:21.798154  543705 disk_info.go:125] begin check local disk info of client
I0320 01:01:21.800597  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:01:21.800604  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d4c0 0xc00034d500]
E0320 01:01:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:01:23.409787  543705 memory.go:184] no items to output this cycle
I0320 01:01:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 01:01:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:01:33.409795  543705 memory.go:184] no items to output this cycle
I0320 01:01:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 01:01:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:01:43.409782  543705 memory.go:191] Add success.
I0320 01:01:43.409802  543705 cpu.go:282] Add success.
I0320 01:01:43.420002  543705 net.go:648] Add success.
I0320 01:01:43.422821  543705 net.go:770] primary dev: ETH0
I0320 01:01:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:01:43.422845  543705 net.go:698] Add success.
I0320 01:01:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:01:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:01:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:01:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:01:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:01:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:02:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:02:03.409779  543705 memory.go:184] no items to output this cycle
I0320 01:02:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 01:02:13.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:02:13.409902  543705 memory.go:191] Add success.
W0320 01:02:13.409967  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:02:13.409989  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:02:13.409992  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:02:13.409994  543705 cpu.go:282] Add success.
I0320 01:02:13.419713  543705 net.go:648] Add success.
I0320 01:02:13.422450  543705 net.go:770] primary dev: ETH0
I0320 01:02:13.422463  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:02:13.422474  543705 net.go:698] Add success.
W0320 01:02:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:02:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 01:02:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:02:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:02:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:02:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:02:14.456654  543705 disk_worker.go:494] system disk:vda1
I0320 01:02:14.456697  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:02:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:02:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:02:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:02:16.457989  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:02:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:02:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:02:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:02:21.801672  543705 disk_info.go:125] begin check local disk info of client
I0320 01:02:21.804096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:02:21.804102  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329380 0xc0003293c0]
E0320 01:02:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:02:23.409769  543705 memory.go:184] no items to output this cycle
I0320 01:02:23.409777  543705 cpu.go:275] no items to output this cycle
E0320 01:02:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:02:33.409772  543705 memory.go:184] no items to output this cycle
I0320 01:02:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 01:02:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:02:43.409791  543705 memory.go:191] Add success.
I0320 01:02:43.409791  543705 cpu.go:282] Add success.
I0320 01:02:43.419848  543705 net.go:648] Add success.
I0320 01:02:43.422459  543705 net.go:770] primary dev: ETH0
I0320 01:02:43.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:02:43.422484  543705 net.go:698] Add success.
I0320 01:02:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:02:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:02:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:02:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:02:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:02:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:03:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:03:03.409795  543705 memory.go:184] no items to output this cycle
I0320 01:03:03.409837  543705 cpu.go:275] no items to output this cycle
E0320 01:03:13.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:03:13.409933  543705 cpu.go:282] Add success.
I0320 01:03:13.409934  543705 memory.go:191] Add success.
W0320 01:03:13.409973  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:03:13.409992  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:03:13.409996  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:03:13.419766  543705 net.go:648] Add success.
I0320 01:03:13.422597  543705 net.go:770] primary dev: ETH0
I0320 01:03:13.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:03:13.422626  543705 net.go:698] Add success.
I0320 01:03:13.464105  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5a238178-3a3c-4fa8-98e8-6804248a142b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:03:13.464136  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:03:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:03:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:03:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0320 01:03:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:03:14.456792  543705 disk_worker.go:494] system disk:vda1
I0320 01:03:14.456828  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:03:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:03:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:03:21.805673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:03:21.808081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:03:21.808087  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f000 0xc00032f040]
E0320 01:03:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:03:23.409802  543705 memory.go:184] no items to output this cycle
I0320 01:03:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 01:03:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:03:33.409770  543705 memory.go:184] no items to output this cycle
I0320 01:03:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 01:03:38.012748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:03:38.012755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:03:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:03:43.410595  543705 memory.go:191] Add success.
I0320 01:03:43.409789  543705 cpu.go:282] Add success.
I0320 01:03:43.420274  543705 net.go:648] Add success.
I0320 01:03:43.422943  543705 net.go:770] primary dev: ETH0
I0320 01:03:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:03:43.422968  543705 net.go:698] Add success.
I0320 01:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:03:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:03:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:03:53.409784  543705 memory.go:184] no items to output this cycle
I0320 01:03:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 01:04:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:04:03.409769  543705 memory.go:184] no items to output this cycle
I0320 01:04:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:04:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:04:13.409788  543705 memory.go:191] Add success.
I0320 01:04:13.409805  543705 cpu.go:282] Add success.
W0320 01:04:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:04:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:04:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:04:13.420245  543705 net.go:648] Add success.
I0320 01:04:13.422967  543705 net.go:770] primary dev: ETH0
I0320 01:04:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:04:13.422996  543705 net.go:698] Add success.
I0320 01:04:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:04:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:04:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0320 01:04:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:04:14.456471  543705 disk_worker.go:494] system disk:vda1
I0320 01:04:14.456513  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:04:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:04:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:04:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:04:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:04:21.809671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:04:21.812088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:04:21.812094  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034dd40 0xc00034dd80]
E0320 01:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:04:23.409782  543705 memory.go:184] no items to output this cycle
I0320 01:04:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:04:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:04:33.409778  543705 memory.go:184] no items to output this cycle
I0320 01:04:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 01:04:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:04:43.409784  543705 memory.go:191] Add success.
I0320 01:04:43.409808  543705 cpu.go:282] Add success.
I0320 01:04:43.419994  543705 net.go:648] Add success.
I0320 01:04:43.422991  543705 net.go:770] primary dev: ETH0
I0320 01:04:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:04:43.423015  543705 net.go:698] Add success.
I0320 01:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:04:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:04:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:04:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:04:53.409794  543705 cpu.go:275] no items to output this cycle
I0320 01:04:53.409796  543705 memory.go:184] no items to output this cycle
E0320 01:05:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:05:03.409781  543705 memory.go:184] no items to output this cycle
I0320 01:05:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 01:05:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:05:13.409833  543705 memory.go:191] Add success.
I0320 01:05:13.409839  543705 cpu.go:282] Add success.
W0320 01:05:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:05:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:05:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:05:13.420184  543705 net.go:648] Add success.
I0320 01:05:13.422937  543705 net.go:770] primary dev: ETH0
I0320 01:05:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:05:13.422962  543705 net.go:698] Add success.
I0320 01:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:05:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:05:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 01:05:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:05:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 01:05:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:05:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:05:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:05:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:05:21.813690  543705 disk_info.go:125] begin check local disk info of client
I0320 01:05:21.816107  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:05:21.816113  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034cac0 0xc00034cb00]
E0320 01:05:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:05:23.409755  543705 memory.go:184] no items to output this cycle
I0320 01:05:23.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:05:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:05:33.409789  543705 memory.go:184] no items to output this cycle
I0320 01:05:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:05:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:05:43.409792  543705 memory.go:191] Add success.
I0320 01:05:43.409793  543705 cpu.go:282] Add success.
I0320 01:05:43.419876  543705 net.go:648] Add success.
I0320 01:05:43.422666  543705 net.go:770] primary dev: ETH0
I0320 01:05:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:05:43.422701  543705 net.go:698] Add success.
I0320 01:05:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:05:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:05:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:05:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:05:53.409812  543705 memory.go:184] no items to output this cycle
I0320 01:05:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 01:06:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:06:03.409788  543705 memory.go:184] no items to output this cycle
I0320 01:06:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:06:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:06:13.409804  543705 memory.go:191] Add success.
I0320 01:06:13.409821  543705 cpu.go:282] Add success.
W0320 01:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:06:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:06:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:06:13.420172  543705 net.go:648] Add success.
I0320 01:06:13.422902  543705 net.go:770] primary dev: ETH0
I0320 01:06:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:06:13.422927  543705 net.go:698] Add success.
I0320 01:06:13.463304  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5d823a03-da05-4f29-8ef6-22bdbdf72998","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:06:13.463337  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:06:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:06:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 01:06:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:06:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 01:06:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:06:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:06:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:06:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:06:21.817675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:06:21.820100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:06:21.820106  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d5540 0xc0004d5580]
E0320 01:06:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:06:23.409802  543705 memory.go:184] no items to output this cycle
I0320 01:06:23.409815  543705 cpu.go:275] no items to output this cycle
E0320 01:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:06:33.409793  543705 memory.go:184] no items to output this cycle
I0320 01:06:33.409794  543705 cpu.go:275] no items to output this cycle
I0320 01:06:38.013729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:06:38.013736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:06:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:06:43.410568  543705 memory.go:191] Add success.
I0320 01:06:43.409798  543705 cpu.go:282] Add success.
I0320 01:06:43.420339  543705 net.go:648] Add success.
I0320 01:06:43.422850  543705 net.go:770] primary dev: ETH0
I0320 01:06:43.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:06:43.422877  543705 net.go:698] Add success.
I0320 01:06:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:06:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:06:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:06:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:06:53.409802  543705 memory.go:184] no items to output this cycle
I0320 01:06:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 01:07:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:07:03.409797  543705 memory.go:184] no items to output this cycle
I0320 01:07:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 01:07:13.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:07:13.409898  543705 memory.go:191] Add success.
W0320 01:07:13.409926  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:07:13.409938  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:07:13.409941  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:07:13.410010  543705 cpu.go:282] Add success.
I0320 01:07:13.419715  543705 net.go:648] Add success.
I0320 01:07:13.422562  543705 net.go:770] primary dev: ETH0
I0320 01:07:13.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:07:13.422605  543705 net.go:698] Add success.
I0320 01:07:13.453137  543705 event_worker.go:152] Polling the log file for events...
W0320 01:07:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:07:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 01:07:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:07:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:07:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:07:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:07:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 01:07:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:07:15.456783  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:07:15.456792  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:07:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:07:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:07:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:07:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:07:16.472312  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:07:21.821681  543705 disk_info.go:125] begin check local disk info of client
I0320 01:07:21.824056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:07:21.824062  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536f00 0xc000536f40]
E0320 01:07:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:07:23.409782  543705 memory.go:184] no items to output this cycle
I0320 01:07:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:07:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:07:33.409762  543705 memory.go:184] no items to output this cycle
I0320 01:07:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 01:07:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:07:43.409811  543705 memory.go:191] Add success.
I0320 01:07:43.409819  543705 cpu.go:282] Add success.
I0320 01:07:43.419974  543705 net.go:648] Add success.
I0320 01:07:43.422773  543705 net.go:770] primary dev: ETH0
I0320 01:07:43.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:07:43.422799  543705 net.go:698] Add success.
I0320 01:07:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:07:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:07:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:07:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:07:53.409803  543705 memory.go:184] no items to output this cycle
I0320 01:07:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 01:08:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:08:03.409777  543705 memory.go:184] no items to output this cycle
I0320 01:08:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 01:08:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:08:13.409820  543705 memory.go:191] Add success.
I0320 01:08:13.409828  543705 cpu.go:282] Add success.
W0320 01:08:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:08:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:08:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:08:13.420369  543705 net.go:648] Add success.
I0320 01:08:13.423044  543705 net.go:770] primary dev: ETH0
I0320 01:08:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:08:13.423074  543705 net.go:698] Add success.
I0320 01:08:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:08:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:08:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 01:08:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:08:14.456569  543705 disk_worker.go:494] system disk:vda1
I0320 01:08:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:08:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:08:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:08:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:08:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:08:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:08:21.825674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:08:21.828053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:08:21.828059  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033bd00 0xc00033bd40]
E0320 01:08:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:08:23.409799  543705 memory.go:184] no items to output this cycle
I0320 01:08:23.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:08:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:08:33.409775  543705 memory.go:184] no items to output this cycle
I0320 01:08:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 01:08:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:08:43.409793  543705 memory.go:191] Add success.
I0320 01:08:43.409795  543705 cpu.go:282] Add success.
I0320 01:08:43.419984  543705 net.go:648] Add success.
I0320 01:08:43.422617  543705 net.go:770] primary dev: ETH0
I0320 01:08:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:08:43.422645  543705 net.go:698] Add success.
I0320 01:08:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:08:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:08:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:08:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:08:53.409784  543705 memory.go:184] no items to output this cycle
I0320 01:08:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 01:09:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:09:03.409764  543705 memory.go:184] no items to output this cycle
I0320 01:09:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:09:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:09:13.409825  543705 memory.go:191] Add success.
I0320 01:09:13.409831  543705 cpu.go:282] Add success.
W0320 01:09:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:09:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:09:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:09:13.420144  543705 net.go:648] Add success.
I0320 01:09:13.423063  543705 net.go:770] primary dev: ETH0
I0320 01:09:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:09:13.423092  543705 net.go:698] Add success.
I0320 01:09:13.464306  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"edc0394f-094d-42c6-9ffd-164162f9c867","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:09:13.464339  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:09:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:09:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 01:09:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:09:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 01:09:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:09:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:09:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:09:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:09:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:09:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:09:21.829673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:09:21.832152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:09:21.832159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000323180 0xc0003231c0]
E0320 01:09:23.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:09:23.409754  543705 memory.go:184] no items to output this cycle
I0320 01:09:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 01:09:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:09:33.409796  543705 memory.go:184] no items to output this cycle
I0320 01:09:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 01:09:38.013873  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:09:38.013879  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:09:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:09:43.410720  543705 memory.go:191] Add success.
I0320 01:09:43.409816  543705 cpu.go:282] Add success.
I0320 01:09:43.420440  543705 net.go:648] Add success.
I0320 01:09:43.422994  543705 net.go:770] primary dev: ETH0
I0320 01:09:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:09:43.423021  543705 net.go:698] Add success.
I0320 01:09:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:09:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:09:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:09:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:09:53.409777  543705 memory.go:184] no items to output this cycle
I0320 01:09:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 01:10:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:10:03.409765  543705 memory.go:184] no items to output this cycle
I0320 01:10:03.409894  543705 cpu.go:275] no items to output this cycle
E0320 01:10:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:10:13.409818  543705 memory.go:191] Add success.
I0320 01:10:13.409830  543705 cpu.go:282] Add success.
W0320 01:10:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:10:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:10:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:10:13.420208  543705 net.go:648] Add success.
I0320 01:10:13.422901  543705 net.go:770] primary dev: ETH0
I0320 01:10:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:10:13.422927  543705 net.go:698] Add success.
I0320 01:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:10:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:10:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 01:10:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:10:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 01:10:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:10:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:10:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:10:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:10:21.833673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:10:21.836142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:10:21.836148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c9540 0xc0004c9580]
E0320 01:10:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:10:23.409759  543705 memory.go:184] no items to output this cycle
I0320 01:10:23.409778  543705 cpu.go:275] no items to output this cycle
E0320 01:10:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:10:33.409767  543705 memory.go:184] no items to output this cycle
I0320 01:10:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 01:10:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:10:43.409809  543705 memory.go:191] Add success.
I0320 01:10:43.409813  543705 cpu.go:282] Add success.
I0320 01:10:43.419838  543705 net.go:648] Add success.
I0320 01:10:43.422237  543705 net.go:770] primary dev: ETH0
I0320 01:10:43.422251  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:10:43.422263  543705 net.go:698] Add success.
I0320 01:10:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:10:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:10:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:10:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:10:53.409777  543705 memory.go:184] no items to output this cycle
I0320 01:10:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 01:11:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:11:03.409769  543705 memory.go:184] no items to output this cycle
I0320 01:11:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 01:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:11:13.409799  543705 memory.go:191] Add success.
W0320 01:11:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:11:13.409834  543705 cpu.go:282] Add success.
W0320 01:11:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:11:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:11:13.420163  543705 net.go:648] Add success.
I0320 01:11:13.422905  543705 net.go:770] primary dev: ETH0
I0320 01:11:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:11:13.422935  543705 net.go:698] Add success.
I0320 01:11:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:11:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:11:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 01:11:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:11:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 01:11:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:11:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:11:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:11:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:11:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:11:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:11:21.837671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:11:21.840094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:11:21.840101  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c8b00 0xc0004c8b40]
E0320 01:11:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:11:23.409767  543705 memory.go:184] no items to output this cycle
I0320 01:11:23.409779  543705 cpu.go:275] no items to output this cycle
E0320 01:11:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:11:33.409794  543705 memory.go:184] no items to output this cycle
I0320 01:11:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 01:11:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:11:43.409783  543705 memory.go:191] Add success.
I0320 01:11:43.409783  543705 cpu.go:282] Add success.
I0320 01:11:43.420284  543705 net.go:648] Add success.
I0320 01:11:43.423081  543705 net.go:770] primary dev: ETH0
I0320 01:11:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:11:43.423107  543705 net.go:698] Add success.
I0320 01:11:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:11:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:11:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:11:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:11:53.409806  543705 memory.go:184] no items to output this cycle
I0320 01:11:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 01:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:12:03.409774  543705 memory.go:184] no items to output this cycle
I0320 01:12:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:12:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:12:13.409871  543705 cpu.go:282] Add success.
I0320 01:12:13.409893  543705 memory.go:191] Add success.
W0320 01:12:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:12:13.409936  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:12:13.409947  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:12:13.419761  543705 net.go:648] Add success.
I0320 01:12:13.422692  543705 net.go:770] primary dev: ETH0
I0320 01:12:13.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:12:13.422723  543705 net.go:698] Add success.
I0320 01:12:13.631403  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31e488d0-3b41-4f64-a1fc-971eb93c0a2e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:12:13.631435  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 01:12:14.454194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:12:14.454206  543705 disk_worker.go:708] disk space is not compliant
W0320 01:12:14.454208  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:12:14.455448  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:12:14.455468  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:12:14.455474  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:12:14.456530  543705 disk_worker.go:494] system disk:vda1
I0320 01:12:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:12:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:12:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:12:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:12:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:12:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:12:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:12:16.472331  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:12:21.841673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:12:21.844056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:12:21.844062  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003527c0 0xc000352800]
E0320 01:12:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:12:23.409782  543705 memory.go:184] no items to output this cycle
I0320 01:12:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:12:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:12:33.409779  543705 cpu.go:275] no items to output this cycle
I0320 01:12:33.409783  543705 memory.go:184] no items to output this cycle
I0320 01:12:38.014018  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:12:38.014025  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:12:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:12:43.410687  543705 memory.go:191] Add success.
I0320 01:12:43.409797  543705 cpu.go:282] Add success.
I0320 01:12:43.420365  543705 net.go:648] Add success.
I0320 01:12:43.422992  543705 net.go:770] primary dev: ETH0
I0320 01:12:43.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:12:43.423021  543705 net.go:698] Add success.
I0320 01:12:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:12:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:12:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:12:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:12:53.409788  543705 memory.go:184] no items to output this cycle
I0320 01:12:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 01:13:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:13:03.409773  543705 memory.go:184] no items to output this cycle
I0320 01:13:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 01:13:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:13:13.409799  543705 memory.go:191] Add success.
I0320 01:13:13.409824  543705 cpu.go:282] Add success.
W0320 01:13:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:13:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:13:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:13:13.420163  543705 net.go:648] Add success.
I0320 01:13:13.422877  543705 net.go:770] primary dev: ETH0
I0320 01:13:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:13:13.422906  543705 net.go:698] Add success.
I0320 01:13:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:13:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:13:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 01:13:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:13:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 01:13:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:13:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:13:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:13:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:13:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:13:21.845675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:13:21.848142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:13:21.848149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be80 0xc00007bec0]
I0320 01:13:23.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:13:23.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:13:23.409831  543705 memory.go:184] no items to output this cycle
E0320 01:13:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:13:33.409785  543705 memory.go:184] no items to output this cycle
I0320 01:13:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 01:13:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:13:43.409817  543705 memory.go:191] Add success.
I0320 01:13:43.409817  543705 cpu.go:282] Add success.
I0320 01:13:43.419958  543705 net.go:648] Add success.
I0320 01:13:43.422706  543705 net.go:770] primary dev: ETH0
I0320 01:13:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:13:43.422732  543705 net.go:698] Add success.
I0320 01:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:13:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:13:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:13:53.409777  543705 memory.go:184] no items to output this cycle
I0320 01:13:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:14:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:14:03.409778  543705 memory.go:184] no items to output this cycle
I0320 01:14:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 01:14:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:14:13.409788  543705 memory.go:191] Add success.
I0320 01:14:13.409789  543705 cpu.go:282] Add success.
W0320 01:14:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:14:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:14:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:14:13.419759  543705 net.go:648] Add success.
I0320 01:14:13.422477  543705 net.go:770] primary dev: ETH0
I0320 01:14:13.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:14:13.422506  543705 net.go:698] Add success.
I0320 01:14:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:14:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:14:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 01:14:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:14:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 01:14:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:14:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:14:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:14:21.849671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:14:21.852147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:14:21.852154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d580 0xc00034d5c0]
E0320 01:14:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:14:23.409771  543705 memory.go:184] no items to output this cycle
I0320 01:14:23.409771  543705 cpu.go:275] no items to output this cycle
E0320 01:14:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:14:33.409775  543705 cpu.go:275] no items to output this cycle
I0320 01:14:33.409784  543705 memory.go:184] no items to output this cycle
E0320 01:14:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:14:43.409820  543705 memory.go:191] Add success.
I0320 01:14:43.409826  543705 cpu.go:282] Add success.
I0320 01:14:43.419958  543705 net.go:648] Add success.
I0320 01:14:43.422731  543705 net.go:770] primary dev: ETH0
I0320 01:14:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:14:43.422757  543705 net.go:698] Add success.
I0320 01:14:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:14:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:14:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:14:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:14:53.409777  543705 memory.go:184] no items to output this cycle
I0320 01:14:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 01:15:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:15:03.409801  543705 memory.go:184] no items to output this cycle
I0320 01:15:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:15:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:15:13.409790  543705 memory.go:191] Add success.
W0320 01:15:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:15:13.409824  543705 cpu.go:282] Add success.
W0320 01:15:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:15:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:15:13.419722  543705 net.go:648] Add success.
I0320 01:15:13.422356  543705 net.go:770] primary dev: ETH0
I0320 01:15:13.422369  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:15:13.422379  543705 net.go:698] Add success.
I0320 01:15:13.468480  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2857b90e-a280-4791-b960-d8303e729b83","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:15:13.468520  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:15:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:15:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 01:15:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:15:14.456514  543705 disk_worker.go:494] system disk:vda1
I0320 01:15:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:15:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:15:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:15:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:15:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:15:21.853673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:15:21.856128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:15:21.856134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bccc0 0xc0004bcd00]
E0320 01:15:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:15:23.409797  543705 memory.go:184] no items to output this cycle
I0320 01:15:23.409813  543705 cpu.go:275] no items to output this cycle
E0320 01:15:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:15:33.409781  543705 memory.go:184] no items to output this cycle
I0320 01:15:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 01:15:38.016777  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:15:38.016783  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:15:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:15:43.410895  543705 memory.go:191] Add success.
I0320 01:15:43.409827  543705 cpu.go:282] Add success.
I0320 01:15:43.420607  543705 net.go:648] Add success.
I0320 01:15:43.423267  543705 net.go:770] primary dev: ETH0
I0320 01:15:43.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:15:43.423294  543705 net.go:698] Add success.
I0320 01:15:46.458246  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:15:46.458320  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:15:46.458347  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:15:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:15:53.409777  543705 memory.go:184] no items to output this cycle
I0320 01:15:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 01:16:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:16:03.409803  543705 memory.go:184] no items to output this cycle
I0320 01:16:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 01:16:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:16:13.409837  543705 memory.go:191] Add success.
I0320 01:16:13.409843  543705 cpu.go:282] Add success.
W0320 01:16:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:16:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:16:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:16:13.420208  543705 net.go:648] Add success.
I0320 01:16:13.422877  543705 net.go:770] primary dev: ETH0
I0320 01:16:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:16:13.422903  543705 net.go:698] Add success.
I0320 01:16:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:16:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:16:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 01:16:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:16:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 01:16:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:16:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:16:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:16:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:16:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:16:16.472432  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:16:21.857674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:16:21.860118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:16:21.860124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae00 0xc0001aae40]
E0320 01:16:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:16:23.409792  543705 memory.go:184] no items to output this cycle
I0320 01:16:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 01:16:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:16:33.409803  543705 memory.go:184] no items to output this cycle
I0320 01:16:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 01:16:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:16:43.409777  543705 memory.go:191] Add success.
I0320 01:16:43.409800  543705 cpu.go:282] Add success.
I0320 01:16:43.419688  543705 net.go:770] primary dev: ETH0
I0320 01:16:43.419702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:16:43.419718  543705 net.go:698] Add success.
I0320 01:16:43.420101  543705 net.go:648] Add success.
I0320 01:16:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:16:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:16:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:16:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:16:53.409770  543705 memory.go:184] no items to output this cycle
I0320 01:16:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:17:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:17:03.409799  543705 memory.go:184] no items to output this cycle
I0320 01:17:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 01:17:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:17:13.409794  543705 memory.go:191] Add success.
I0320 01:17:13.409811  543705 cpu.go:282] Add success.
W0320 01:17:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:17:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:17:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:17:13.420121  543705 net.go:648] Add success.
I0320 01:17:13.422950  543705 net.go:770] primary dev: ETH0
I0320 01:17:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:17:13.422975  543705 net.go:698] Add success.
I0320 01:17:13.453538  543705 event_worker.go:152] Polling the log file for events...
W0320 01:17:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:17:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 01:17:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:17:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:17:14.455898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:17:14.455903  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:17:14.456545  543705 disk_worker.go:494] system disk:vda1
I0320 01:17:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:17:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:17:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:17:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:17:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:17:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:17:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:17:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:17:21.861671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:17:21.864093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:17:21.864099  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c500 0xc00034c540]
E0320 01:17:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:17:23.409787  543705 memory.go:184] no items to output this cycle
I0320 01:17:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 01:17:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:17:33.409767  543705 memory.go:184] no items to output this cycle
I0320 01:17:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:17:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:17:43.409795  543705 memory.go:191] Add success.
I0320 01:17:43.409797  543705 cpu.go:282] Add success.
I0320 01:17:43.419858  543705 net.go:648] Add success.
I0320 01:17:43.422536  543705 net.go:770] primary dev: ETH0
I0320 01:17:43.422549  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:17:43.422562  543705 net.go:698] Add success.
I0320 01:17:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:17:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:17:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:17:53.409770  543705 memory.go:184] no items to output this cycle
I0320 01:17:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:18:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:18:03.409809  543705 memory.go:184] no items to output this cycle
I0320 01:18:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 01:18:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:18:13.409880  543705 memory.go:191] Add success.
W0320 01:18:13.409911  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:18:13.409923  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:18:13.409928  543705 cpu.go:282] Add success.
I0320 01:18:13.409933  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:18:13.419748  543705 net.go:648] Add success.
I0320 01:18:13.422541  543705 net.go:770] primary dev: ETH0
I0320 01:18:13.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:18:13.422570  543705 net.go:698] Add success.
I0320 01:18:13.908825  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"63ad3ce6-0fa2-4349-a7f4-fb9ce9574144","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:18:13.908867  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:18:14.454618  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:18:14.454848  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:18:14.454858  543705 disk_worker.go:708] disk space is not compliant
W0320 01:18:14.454861  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:18:14.456205  543705 disk_worker.go:494] system disk:vda1
I0320 01:18:14.456260  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:18:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:18:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:18:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:18:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:18:21.865671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:18:21.868076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:18:21.868082  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af80 0xc00007afc0]
I0320 01:18:23.409788  543705 cpu.go:275] no items to output this cycle
E0320 01:18:23.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:18:23.409829  543705 memory.go:184] no items to output this cycle
E0320 01:18:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:18:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 01:18:33.409804  543705 memory.go:184] no items to output this cycle
I0320 01:18:38.017748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:18:38.017754  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:18:43.410410  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:18:43.411257  543705 memory.go:191] Add success.
I0320 01:18:43.410449  543705 cpu.go:282] Add success.
I0320 01:18:43.419944  543705 net.go:648] Add success.
I0320 01:18:43.422691  543705 net.go:770] primary dev: ETH0
I0320 01:18:43.422704  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:18:43.422716  543705 net.go:698] Add success.
I0320 01:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:18:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:18:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:18:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:18:53.409796  543705 memory.go:184] no items to output this cycle
I0320 01:18:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:19:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:19:03.409776  543705 memory.go:184] no items to output this cycle
I0320 01:19:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 01:19:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:19:13.409791  543705 memory.go:191] Add success.
I0320 01:19:13.409811  543705 cpu.go:282] Add success.
W0320 01:19:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:19:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:19:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:19:13.419718  543705 net.go:648] Add success.
I0320 01:19:13.422294  543705 net.go:770] primary dev: ETH0
I0320 01:19:13.422309  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:19:13.422323  543705 net.go:698] Add success.
I0320 01:19:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:19:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:19:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 01:19:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:19:14.456529  543705 disk_worker.go:494] system disk:vda1
I0320 01:19:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:19:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:19:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:19:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:19:21.869671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:19:21.872103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:19:21.872109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4380]
E0320 01:19:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:19:23.409795  543705 memory.go:184] no items to output this cycle
I0320 01:19:23.409808  543705 cpu.go:275] no items to output this cycle
E0320 01:19:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:19:33.409802  543705 memory.go:184] no items to output this cycle
I0320 01:19:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 01:19:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:19:43.409794  543705 memory.go:191] Add success.
I0320 01:19:43.409814  543705 cpu.go:282] Add success.
I0320 01:19:43.420009  543705 net.go:648] Add success.
I0320 01:19:43.423505  543705 net.go:770] primary dev: ETH0
I0320 01:19:43.423518  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:19:43.423530  543705 net.go:698] Add success.
I0320 01:19:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:19:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:19:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:19:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:19:53.409771  543705 memory.go:184] no items to output this cycle
I0320 01:19:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:20:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:20:03.409804  543705 memory.go:184] no items to output this cycle
I0320 01:20:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:20:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:20:13.409824  543705 memory.go:191] Add success.
I0320 01:20:13.409833  543705 cpu.go:282] Add success.
W0320 01:20:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:20:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:20:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:20:13.420414  543705 net.go:648] Add success.
I0320 01:20:13.423201  543705 net.go:770] primary dev: ETH0
I0320 01:20:13.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:20:13.423239  543705 net.go:698] Add success.
I0320 01:20:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:20:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:20:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 01:20:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:20:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 01:20:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:20:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:20:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:20:21.873671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:20:21.876114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:20:21.876122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a9f40 0xc00007a000]
E0320 01:20:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:20:23.409790  543705 memory.go:184] no items to output this cycle
I0320 01:20:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 01:20:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:20:33.409783  543705 memory.go:184] no items to output this cycle
I0320 01:20:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 01:20:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:20:43.409818  543705 memory.go:191] Add success.
I0320 01:20:43.409825  543705 cpu.go:282] Add success.
I0320 01:20:43.419971  543705 net.go:648] Add success.
I0320 01:20:43.422490  543705 net.go:770] primary dev: ETH0
I0320 01:20:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:20:43.422520  543705 net.go:698] Add success.
I0320 01:20:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:20:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:20:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:20:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:20:53.409767  543705 memory.go:184] no items to output this cycle
I0320 01:20:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:21:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:21:03.409769  543705 memory.go:184] no items to output this cycle
I0320 01:21:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:21:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:21:13.409821  543705 memory.go:191] Add success.
I0320 01:21:13.409822  543705 cpu.go:282] Add success.
W0320 01:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:21:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:21:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:21:13.420130  543705 net.go:648] Add success.
I0320 01:21:13.422912  543705 net.go:770] primary dev: ETH0
I0320 01:21:13.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:21:13.422939  543705 net.go:698] Add success.
I0320 01:21:13.463016  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f06c1826-afbb-4184-ae47-4aaa4044fc63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:21:13.463051  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:21:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:21:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:21:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 01:21:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:21:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 01:21:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:21:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:21:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:21:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:21:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:21:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:21:21.877673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:21:21.880088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:21:21.880094  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aea00 0xc0003aea40]
E0320 01:21:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:21:23.409758  543705 memory.go:184] no items to output this cycle
I0320 01:21:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:21:33.409794  543705 memory.go:184] no items to output this cycle
I0320 01:21:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 01:21:38.020796  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:21:38.020803  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:21:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:21:43.410705  543705 memory.go:191] Add success.
I0320 01:21:43.409805  543705 cpu.go:282] Add success.
I0320 01:21:43.420432  543705 net.go:648] Add success.
I0320 01:21:43.423185  543705 net.go:770] primary dev: ETH0
I0320 01:21:43.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:21:43.423212  543705 net.go:698] Add success.
I0320 01:21:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:21:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:21:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:21:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:21:53.409782  543705 memory.go:184] no items to output this cycle
I0320 01:21:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 01:22:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:22:03.409773  543705 memory.go:184] no items to output this cycle
I0320 01:22:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:22:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:22:13.409809  543705 memory.go:191] Add success.
I0320 01:22:13.409820  543705 cpu.go:282] Add success.
W0320 01:22:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:22:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:22:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:22:13.420073  543705 net.go:648] Add success.
I0320 01:22:13.423030  543705 net.go:770] primary dev: ETH0
I0320 01:22:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:22:13.423058  543705 net.go:698] Add success.
W0320 01:22:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:22:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 01:22:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:22:14.456786  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:22:14.456795  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:22:14.456800  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:22:14.456844  543705 disk_worker.go:494] system disk:vda1
I0320 01:22:14.456883  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:22:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:22:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:22:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:22:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:22:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:22:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:22:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:22:21.881673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:22:21.884071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:22:21.884077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331540 0xc000331580]
E0320 01:22:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:22:23.409780  543705 memory.go:184] no items to output this cycle
I0320 01:22:23.409793  543705 cpu.go:275] no items to output this cycle
E0320 01:22:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:22:33.409799  543705 memory.go:184] no items to output this cycle
I0320 01:22:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:22:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:22:43.409785  543705 memory.go:191] Add success.
I0320 01:22:43.409806  543705 cpu.go:282] Add success.
I0320 01:22:43.419983  543705 net.go:648] Add success.
I0320 01:22:43.422641  543705 net.go:770] primary dev: ETH0
I0320 01:22:43.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:22:43.422666  543705 net.go:698] Add success.
I0320 01:22:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:22:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:22:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:22:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:22:53.409770  543705 memory.go:184] no items to output this cycle
I0320 01:22:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:23:03.409768  543705 memory.go:184] no items to output this cycle
I0320 01:23:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:23:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:23:13.409819  543705 memory.go:191] Add success.
I0320 01:23:13.409829  543705 cpu.go:282] Add success.
W0320 01:23:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:23:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:23:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:23:13.420377  543705 net.go:648] Add success.
I0320 01:23:13.423464  543705 net.go:770] primary dev: ETH0
I0320 01:23:13.423480  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:23:13.423494  543705 net.go:698] Add success.
I0320 01:23:14.453952  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:23:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:23:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0320 01:23:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:23:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 01:23:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:23:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:23:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:23:16.472353  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:23:21.885673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:23:21.888040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:23:21.888046  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331ac0 0xc000331b00]
E0320 01:23:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:23:23.409780  543705 memory.go:184] no items to output this cycle
I0320 01:23:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:23:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:23:33.409784  543705 memory.go:184] no items to output this cycle
I0320 01:23:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:23:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:23:43.409816  543705 memory.go:191] Add success.
I0320 01:23:43.409827  543705 cpu.go:282] Add success.
I0320 01:23:43.419956  543705 net.go:648] Add success.
I0320 01:23:43.422669  543705 net.go:770] primary dev: ETH0
I0320 01:23:43.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:23:43.422700  543705 net.go:698] Add success.
I0320 01:23:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:23:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:23:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:23:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:23:53.409785  543705 memory.go:184] no items to output this cycle
I0320 01:23:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 01:24:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:24:03.409785  543705 memory.go:184] no items to output this cycle
I0320 01:24:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 01:24:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:24:13.409792  543705 memory.go:191] Add success.
I0320 01:24:13.409812  543705 cpu.go:282] Add success.
W0320 01:24:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:24:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:24:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:24:13.420123  543705 net.go:648] Add success.
I0320 01:24:13.422944  543705 net.go:770] primary dev: ETH0
I0320 01:24:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:24:13.422971  543705 net.go:698] Add success.
I0320 01:24:13.464278  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ad5638c6-4afa-4cb0-8e39-ee12548561d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:24:13.464311  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:24:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:24:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:24:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 01:24:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:24:14.456680  543705 disk_worker.go:494] system disk:vda1
I0320 01:24:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:24:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:24:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:24:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:24:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:24:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:24:21.889680  543705 disk_info.go:125] begin check local disk info of client
I0320 01:24:21.892121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:24:21.892128  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331a80 0xc000331ac0]
E0320 01:24:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:24:23.409784  543705 memory.go:184] no items to output this cycle
I0320 01:24:23.409786  543705 cpu.go:275] no items to output this cycle
E0320 01:24:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:24:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 01:24:33.409790  543705 memory.go:184] no items to output this cycle
I0320 01:24:38.021745  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:24:38.021752  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:24:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:24:43.410656  543705 memory.go:191] Add success.
I0320 01:24:43.409825  543705 cpu.go:282] Add success.
I0320 01:24:43.420367  543705 net.go:648] Add success.
I0320 01:24:43.422960  543705 net.go:770] primary dev: ETH0
I0320 01:24:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:24:43.422986  543705 net.go:698] Add success.
I0320 01:24:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:24:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:24:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:24:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:24:53.409818  543705 memory.go:184] no items to output this cycle
I0320 01:24:53.409829  543705 cpu.go:275] no items to output this cycle
E0320 01:25:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:25:03.409788  543705 memory.go:184] no items to output this cycle
I0320 01:25:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 01:25:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:25:13.409805  543705 memory.go:191] Add success.
I0320 01:25:13.409811  543705 cpu.go:282] Add success.
W0320 01:25:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:25:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:25:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:25:13.420658  543705 net.go:648] Add success.
I0320 01:25:13.423579  543705 net.go:770] primary dev: ETH0
I0320 01:25:13.423591  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:25:13.423602  543705 net.go:698] Add success.
I0320 01:25:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:25:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:25:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 01:25:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:25:14.457636  543705 disk_worker.go:494] system disk:vda1
I0320 01:25:14.457693  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:25:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:25:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:25:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:25:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:25:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:25:21.893671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:25:21.896133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:25:21.896138  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c080 0xc00034c0c0]
E0320 01:25:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:25:23.409772  543705 memory.go:184] no items to output this cycle
I0320 01:25:23.409792  543705 cpu.go:275] no items to output this cycle
E0320 01:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:25:33.409783  543705 memory.go:184] no items to output this cycle
I0320 01:25:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:25:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:25:43.409815  543705 memory.go:191] Add success.
I0320 01:25:43.409821  543705 cpu.go:282] Add success.
I0320 01:25:43.419994  543705 net.go:648] Add success.
I0320 01:25:43.422895  543705 net.go:770] primary dev: ETH0
I0320 01:25:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:25:43.422924  543705 net.go:698] Add success.
I0320 01:25:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:25:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:25:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:25:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:25:53.409793  543705 cpu.go:275] no items to output this cycle
I0320 01:25:53.409798  543705 memory.go:184] no items to output this cycle
E0320 01:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:26:03.409777  543705 memory.go:184] no items to output this cycle
I0320 01:26:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:26:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:26:13.409821  543705 memory.go:191] Add success.
I0320 01:26:13.409826  543705 cpu.go:282] Add success.
W0320 01:26:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:26:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:26:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:26:13.420133  543705 net.go:648] Add success.
I0320 01:26:13.422743  543705 net.go:770] primary dev: ETH0
I0320 01:26:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:26:13.422780  543705 net.go:698] Add success.
I0320 01:26:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:26:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:26:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 01:26:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:26:14.456836  543705 disk_worker.go:494] system disk:vda1
I0320 01:26:14.456865  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:26:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:26:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:26:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:26:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:26:21.897675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:26:21.900127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:26:21.900135  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047fe80 0xc00047fec0]
E0320 01:26:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:26:23.409756  543705 memory.go:184] no items to output this cycle
I0320 01:26:23.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:26:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:26:33.409809  543705 memory.go:184] no items to output this cycle
I0320 01:26:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 01:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:26:43.409780  543705 memory.go:191] Add success.
I0320 01:26:43.409817  543705 cpu.go:282] Add success.
I0320 01:26:43.419854  543705 net.go:648] Add success.
I0320 01:26:43.422592  543705 net.go:770] primary dev: ETH0
I0320 01:26:43.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:26:43.422621  543705 net.go:698] Add success.
I0320 01:26:46.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:26:46.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:26:46.458115  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:26:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:26:53.409781  543705 memory.go:184] no items to output this cycle
I0320 01:26:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 01:27:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:27:03.409781  543705 memory.go:184] no items to output this cycle
I0320 01:27:03.409789  543705 cpu.go:275] no items to output this cycle
W0320 01:27:13.409705  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0320 01:27:13.409718  543705 conf_downlod.go:89] use old conf
E0320 01:27:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:27:13.409796  543705 memory.go:191] Add success.
I0320 01:27:13.409819  543705 cpu.go:282] Add success.
W0320 01:27:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:27:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:27:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:27:13.420111  543705 net.go:648] Add success.
I0320 01:27:13.423116  543705 net.go:770] primary dev: ETH0
I0320 01:27:13.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:27:13.423144  543705 net.go:698] Add success.
I0320 01:27:13.429802  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 01:27:13.453138  543705 event_worker.go:152] Polling the log file for events...
I0320 01:27:13.468491  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe5c1129-a20c-46d1-8c07-df70a2b9f271","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:27:13.468526  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 01:27:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:27:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 01:27:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:27:14.456778  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:27:14.456787  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:27:14.456794  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:27:14.457790  543705 disk_worker.go:494] system disk:vda1
I0320 01:27:14.457847  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:27:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:27:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:27:16.458067  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:27:16.458072  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:27:16.458123  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:27:16.458142  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:27:16.472544  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:27:21.901672  543705 disk_info.go:125] begin check local disk info of client
I0320 01:27:21.904066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:27:21.904072  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002743c0 0xc000274400]
E0320 01:27:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:27:23.409782  543705 memory.go:184] no items to output this cycle
I0320 01:27:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:27:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:27:33.409785  543705 memory.go:184] no items to output this cycle
I0320 01:27:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 01:27:38.024762  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:27:38.024768  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:27:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:27:43.410674  543705 memory.go:191] Add success.
I0320 01:27:43.409785  543705 cpu.go:282] Add success.
I0320 01:27:43.420422  543705 net.go:648] Add success.
I0320 01:27:43.423078  543705 net.go:770] primary dev: ETH0
I0320 01:27:43.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:27:43.423107  543705 net.go:698] Add success.
I0320 01:27:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:27:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:27:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:27:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:27:53.409785  543705 memory.go:184] no items to output this cycle
I0320 01:27:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 01:28:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:28:03.409767  543705 memory.go:184] no items to output this cycle
I0320 01:28:03.409799  543705 cpu.go:275] no items to output this cycle
W0320 01:28:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:28:13.409737  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:28:13.409743  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:28:13.409828  543705 cpu.go:282] Add success.
E0320 01:28:13.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:28:13.409854  543705 memory.go:191] Add success.
I0320 01:28:13.419868  543705 net.go:770] primary dev: ETH0
I0320 01:28:13.419894  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:28:13.419910  543705 net.go:698] Add success.
I0320 01:28:13.420286  543705 net.go:648] Add success.
I0320 01:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:28:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:28:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 01:28:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:28:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 01:28:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:28:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:28:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:28:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:28:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:28:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:28:21.905672  543705 disk_info.go:125] begin check local disk info of client
I0320 01:28:21.908149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:28:21.908155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2b40 0xc0004a2b80]
E0320 01:28:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:28:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 01:28:23.409783  543705 memory.go:184] no items to output this cycle
E0320 01:28:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:28:33.409785  543705 memory.go:184] no items to output this cycle
I0320 01:28:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 01:28:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:28:43.409791  543705 memory.go:191] Add success.
I0320 01:28:43.409791  543705 cpu.go:282] Add success.
I0320 01:28:43.420005  543705 net.go:648] Add success.
I0320 01:28:43.423028  543705 net.go:770] primary dev: ETH0
I0320 01:28:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:28:43.423056  543705 net.go:698] Add success.
I0320 01:28:46.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:28:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:28:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:28:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:28:53.409793  543705 memory.go:184] no items to output this cycle
I0320 01:28:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:29:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:29:03.409778  543705 memory.go:184] no items to output this cycle
I0320 01:29:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 01:29:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:29:13.409796  543705 memory.go:191] Add success.
I0320 01:29:13.409797  543705 cpu.go:282] Add success.
W0320 01:29:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:29:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:29:13.420096  543705 net.go:648] Add success.
I0320 01:29:13.422687  543705 net.go:770] primary dev: ETH0
I0320 01:29:13.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:29:13.422716  543705 net.go:698] Add success.
I0320 01:29:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:29:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:29:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 01:29:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:29:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 01:29:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:29:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:29:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:29:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:29:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:29:21.909676  543705 disk_info.go:125] begin check local disk info of client
I0320 01:29:21.912117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:29:21.912124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fc080 0xc0004fc0c0]
E0320 01:29:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:29:23.409785  543705 memory.go:184] no items to output this cycle
I0320 01:29:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:29:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:29:33.409803  543705 memory.go:184] no items to output this cycle
I0320 01:29:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:29:43.409809  543705 memory.go:191] Add success.
I0320 01:29:43.409820  543705 cpu.go:282] Add success.
I0320 01:29:43.419879  543705 net.go:648] Add success.
I0320 01:29:43.422561  543705 net.go:770] primary dev: ETH0
I0320 01:29:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:29:43.422590  543705 net.go:698] Add success.
I0320 01:29:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:29:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:29:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:29:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:29:53.409782  543705 memory.go:184] no items to output this cycle
I0320 01:29:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 01:30:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:30:03.409802  543705 memory.go:184] no items to output this cycle
I0320 01:30:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 01:30:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:30:13.409821  543705 memory.go:191] Add success.
I0320 01:30:13.409827  543705 cpu.go:282] Add success.
W0320 01:30:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:30:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:30:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:30:13.420191  543705 net.go:648] Add success.
I0320 01:30:13.422939  543705 net.go:770] primary dev: ETH0
I0320 01:30:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:30:13.422963  543705 net.go:698] Add success.
I0320 01:30:13.470288  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6c9906e3-787b-472c-88e4-db5ab018bc19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:30:13.470322  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:30:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:30:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 01:30:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:30:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 01:30:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:30:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:30:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:30:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:30:16.472094  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:30:21.913681  543705 disk_info.go:125] begin check local disk info of client
I0320 01:30:21.916317  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:30:21.916325  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c000 0xc00034c040]
E0320 01:30:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:30:23.409767  543705 memory.go:184] no items to output this cycle
I0320 01:30:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:30:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:30:33.409789  543705 memory.go:184] no items to output this cycle
I0320 01:30:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 01:30:38.025739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:30:38.025745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:30:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:30:43.410848  543705 memory.go:191] Add success.
I0320 01:30:43.409807  543705 cpu.go:282] Add success.
I0320 01:30:43.420592  543705 net.go:648] Add success.
I0320 01:30:43.423622  543705 net.go:770] primary dev: ETH0
I0320 01:30:43.423637  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:30:43.423651  543705 net.go:698] Add success.
I0320 01:30:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:30:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:30:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:30:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:30:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:30:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:31:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:31:03.409777  543705 memory.go:184] no items to output this cycle
I0320 01:31:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 01:31:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:31:13.409786  543705 memory.go:191] Add success.
W0320 01:31:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:31:13.409819  543705 cpu.go:282] Add success.
W0320 01:31:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:31:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:31:13.420283  543705 net.go:648] Add success.
I0320 01:31:13.422929  543705 net.go:770] primary dev: ETH0
I0320 01:31:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:31:13.422955  543705 net.go:698] Add success.
I0320 01:31:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:31:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:31:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0320 01:31:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:31:14.456864  543705 disk_worker.go:494] system disk:vda1
I0320 01:31:14.456909  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:31:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:31:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:31:16.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:31:16.458109  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:31:16.472512  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:31:21.917666  543705 disk_info.go:125] begin check local disk info of client
I0320 01:31:21.920173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:31:21.920179  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381040 0xc000381080]
E0320 01:31:23.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:31:23.410271  543705 memory.go:184] no items to output this cycle
I0320 01:31:23.410284  543705 cpu.go:275] no items to output this cycle
E0320 01:31:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:31:33.409784  543705 memory.go:184] no items to output this cycle
I0320 01:31:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 01:31:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:31:43.409797  543705 memory.go:191] Add success.
I0320 01:31:43.409800  543705 cpu.go:282] Add success.
I0320 01:31:43.419887  543705 net.go:648] Add success.
I0320 01:31:43.422679  543705 net.go:770] primary dev: ETH0
I0320 01:31:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:31:43.422708  543705 net.go:698] Add success.
I0320 01:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:31:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:31:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:31:53.409815  543705 memory.go:184] no items to output this cycle
I0320 01:31:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 01:32:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:32:03.409797  543705 memory.go:184] no items to output this cycle
I0320 01:32:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:32:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:32:13.409820  543705 memory.go:191] Add success.
I0320 01:32:13.409824  543705 cpu.go:282] Add success.
W0320 01:32:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:32:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:32:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:32:13.420204  543705 net.go:648] Add success.
I0320 01:32:13.423328  543705 net.go:770] primary dev: ETH0
I0320 01:32:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:32:13.423358  543705 net.go:698] Add success.
W0320 01:32:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:32:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 01:32:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:32:14.455827  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:32:14.455837  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:32:14.455843  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:32:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 01:32:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:32:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:32:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:32:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:32:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:32:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:32:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:32:16.472353  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:32:21.921674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:32:21.924037  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:32:21.924043  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386ac0 0xc000386b00]
E0320 01:32:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:32:23.409804  543705 memory.go:184] no items to output this cycle
I0320 01:32:23.409815  543705 cpu.go:275] no items to output this cycle
E0320 01:32:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:32:33.409896  543705 cpu.go:275] no items to output this cycle
I0320 01:32:33.409933  543705 memory.go:184] no items to output this cycle
E0320 01:32:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:32:43.409771  543705 memory.go:191] Add success.
I0320 01:32:43.409804  543705 cpu.go:282] Add success.
I0320 01:32:43.419977  543705 net.go:648] Add success.
I0320 01:32:43.422865  543705 net.go:770] primary dev: ETH0
I0320 01:32:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:32:43.422890  543705 net.go:698] Add success.
I0320 01:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:32:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:32:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:32:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:32:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:32:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 01:33:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:33:03.409800  543705 memory.go:184] no items to output this cycle
I0320 01:33:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:33:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:33:13.409819  543705 memory.go:191] Add success.
I0320 01:33:13.409828  543705 cpu.go:282] Add success.
W0320 01:33:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:33:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:33:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:33:13.420297  543705 net.go:648] Add success.
I0320 01:33:13.423313  543705 net.go:770] primary dev: ETH0
I0320 01:33:13.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:33:13.423341  543705 net.go:698] Add success.
I0320 01:33:13.598829  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b26ad963-0df9-416f-bc3a-9aca7815445d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:33:13.598863  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:33:14.455096  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:33:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:33:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 01:33:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:33:14.456780  543705 disk_worker.go:494] system disk:vda1
I0320 01:33:14.456822  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:33:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:33:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:33:16.472478  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:33:21.925673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:33:21.928117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:33:21.928125  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9f40 0xc0001f4000]
E0320 01:33:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:33:23.409761  543705 memory.go:184] no items to output this cycle
I0320 01:33:23.409782  543705 cpu.go:275] no items to output this cycle
E0320 01:33:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:33:33.409785  543705 memory.go:184] no items to output this cycle
I0320 01:33:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 01:33:38.025892  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:33:38.025898  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:33:43.410731  543705 memory.go:191] Add success.
I0320 01:33:43.409795  543705 cpu.go:282] Add success.
I0320 01:33:43.420424  543705 net.go:648] Add success.
I0320 01:33:43.423294  543705 net.go:770] primary dev: ETH0
I0320 01:33:43.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:33:43.423319  543705 net.go:698] Add success.
I0320 01:33:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:33:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:33:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:33:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:33:53.409785  543705 memory.go:184] no items to output this cycle
I0320 01:33:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 01:34:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:34:03.409798  543705 memory.go:184] no items to output this cycle
I0320 01:34:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:34:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:34:13.409781  543705 memory.go:191] Add success.
I0320 01:34:13.409804  543705 cpu.go:282] Add success.
W0320 01:34:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:34:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:34:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:34:13.420255  543705 net.go:648] Add success.
I0320 01:34:13.423279  543705 net.go:770] primary dev: ETH0
I0320 01:34:13.423294  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:34:13.423308  543705 net.go:698] Add success.
I0320 01:34:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:34:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:34:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 01:34:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:34:14.456515  543705 disk_worker.go:494] system disk:vda1
I0320 01:34:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:34:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:34:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:34:21.929675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:34:21.932113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:34:21.932119  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e700 0xc00039e740]
E0320 01:34:23.410209  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:34:23.410227  543705 memory.go:184] no items to output this cycle
I0320 01:34:23.410255  543705 cpu.go:275] no items to output this cycle
E0320 01:34:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:34:33.409776  543705 memory.go:184] no items to output this cycle
I0320 01:34:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:34:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:34:43.409805  543705 memory.go:191] Add success.
I0320 01:34:43.409809  543705 cpu.go:282] Add success.
I0320 01:34:43.419982  543705 net.go:648] Add success.
I0320 01:34:43.422698  543705 net.go:770] primary dev: ETH0
I0320 01:34:43.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:34:43.422722  543705 net.go:698] Add success.
I0320 01:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:34:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:34:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:34:53.409812  543705 memory.go:184] no items to output this cycle
I0320 01:34:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 01:35:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:35:03.409778  543705 memory.go:184] no items to output this cycle
I0320 01:35:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 01:35:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:35:13.409826  543705 memory.go:191] Add success.
I0320 01:35:13.409827  543705 cpu.go:282] Add success.
W0320 01:35:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:35:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:35:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:35:13.420184  543705 net.go:648] Add success.
I0320 01:35:13.422941  543705 net.go:770] primary dev: ETH0
I0320 01:35:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:35:13.422972  543705 net.go:698] Add success.
I0320 01:35:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:35:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:35:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 01:35:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:35:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 01:35:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:35:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:35:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:35:16.472356  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:35:21.933675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:35:21.936073  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:35:21.936079  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034df00 0xc00034df40]
E0320 01:35:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:35:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 01:35:23.409794  543705 memory.go:184] no items to output this cycle
E0320 01:35:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:35:33.409779  543705 memory.go:184] no items to output this cycle
I0320 01:35:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 01:35:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:35:43.409780  543705 memory.go:191] Add success.
I0320 01:35:43.409798  543705 cpu.go:282] Add success.
I0320 01:35:43.419862  543705 net.go:648] Add success.
I0320 01:35:43.423111  543705 net.go:770] primary dev: ETH0
I0320 01:35:43.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:35:43.423136  543705 net.go:698] Add success.
I0320 01:35:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:35:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:35:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:35:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:35:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:35:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 01:36:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:36:03.409782  543705 memory.go:184] no items to output this cycle
I0320 01:36:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 01:36:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:36:13.409791  543705 cpu.go:282] Add success.
I0320 01:36:13.409797  543705 memory.go:191] Add success.
W0320 01:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:36:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:36:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:36:13.420140  543705 net.go:648] Add success.
I0320 01:36:13.423199  543705 net.go:770] primary dev: ETH0
I0320 01:36:13.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:36:13.423224  543705 net.go:698] Add success.
I0320 01:36:13.462438  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"342fdcc9-4a16-470b-9e0f-85a0a0d2e2da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:36:13.462469  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:36:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:36:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 01:36:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:36:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 01:36:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:36:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:36:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:36:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:36:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:36:21.937671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:36:21.940086  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:36:21.940092  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003291c0 0xc000329200]
E0320 01:36:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:36:23.409766  543705 memory.go:184] no items to output this cycle
I0320 01:36:23.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:36:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:36:33.409786  543705 memory.go:184] no items to output this cycle
I0320 01:36:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 01:36:38.026035  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:36:38.026041  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:36:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:36:43.410731  543705 memory.go:191] Add success.
I0320 01:36:43.409820  543705 cpu.go:282] Add success.
I0320 01:36:43.420485  543705 net.go:648] Add success.
I0320 01:36:43.423196  543705 net.go:770] primary dev: ETH0
I0320 01:36:43.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:36:43.423221  543705 net.go:698] Add success.
I0320 01:36:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:36:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:36:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:36:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 01:37:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:37:03.409784  543705 memory.go:184] no items to output this cycle
I0320 01:37:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:37:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:37:13.409803  543705 memory.go:191] Add success.
I0320 01:37:13.409806  543705 cpu.go:282] Add success.
W0320 01:37:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:37:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:37:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:37:13.420096  543705 net.go:648] Add success.
I0320 01:37:13.422945  543705 net.go:770] primary dev: ETH0
I0320 01:37:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:37:13.422970  543705 net.go:698] Add success.
I0320 01:37:13.453578  543705 event_worker.go:152] Polling the log file for events...
W0320 01:37:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:37:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 01:37:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:37:14.455927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:37:14.455935  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:37:14.455941  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:37:14.456557  543705 disk_worker.go:494] system disk:vda1
I0320 01:37:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:37:15.456785  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:37:15.456794  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:37:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:37:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:37:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:37:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:37:16.472343  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:37:21.941680  543705 disk_info.go:125] begin check local disk info of client
I0320 01:37:21.944100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:37:21.944107  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007afc0 0xc00007b000]
E0320 01:37:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:37:23.409782  543705 memory.go:184] no items to output this cycle
I0320 01:37:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:37:33.409794  543705 memory.go:184] no items to output this cycle
I0320 01:37:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:37:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:37:43.409776  543705 memory.go:191] Add success.
I0320 01:37:43.409797  543705 cpu.go:282] Add success.
I0320 01:37:43.419937  543705 net.go:648] Add success.
I0320 01:37:43.422662  543705 net.go:770] primary dev: ETH0
I0320 01:37:43.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:37:43.422686  543705 net.go:698] Add success.
I0320 01:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:37:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:37:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:37:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:37:53.409805  543705 memory.go:184] no items to output this cycle
I0320 01:37:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 01:38:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:38:03.409775  543705 memory.go:184] no items to output this cycle
I0320 01:38:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 01:38:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:38:13.409800  543705 memory.go:191] Add success.
I0320 01:38:13.409821  543705 cpu.go:282] Add success.
W0320 01:38:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:38:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:38:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:38:13.420169  543705 net.go:648] Add success.
I0320 01:38:13.423046  543705 net.go:770] primary dev: ETH0
I0320 01:38:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:38:13.423072  543705 net.go:698] Add success.
I0320 01:38:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:38:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:38:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 01:38:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:38:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 01:38:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:38:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:38:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:38:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:38:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:38:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:38:21.945676  543705 disk_info.go:125] begin check local disk info of client
I0320 01:38:21.948137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:38:21.948143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5040 0xc0000c5080]
E0320 01:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:38:23.409792  543705 memory.go:184] no items to output this cycle
I0320 01:38:23.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:38:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:38:33.409800  543705 memory.go:184] no items to output this cycle
I0320 01:38:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:38:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:38:43.409815  543705 memory.go:191] Add success.
I0320 01:38:43.409823  543705 cpu.go:282] Add success.
I0320 01:38:43.419873  543705 net.go:648] Add success.
I0320 01:38:43.422514  543705 net.go:770] primary dev: ETH0
I0320 01:38:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:38:43.422537  543705 net.go:698] Add success.
I0320 01:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:38:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:38:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:38:53.409785  543705 memory.go:184] no items to output this cycle
I0320 01:38:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:39:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:39:03.409798  543705 memory.go:184] no items to output this cycle
I0320 01:39:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 01:39:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:39:13.409890  543705 memory.go:191] Add success.
W0320 01:39:13.409921  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:39:13.409933  543705 cpu.go:282] Add success.
W0320 01:39:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:39:13.409938  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:39:13.419754  543705 net.go:648] Add success.
I0320 01:39:13.422893  543705 net.go:770] primary dev: ETH0
I0320 01:39:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:39:13.422921  543705 net.go:698] Add success.
I0320 01:39:13.468691  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f8de55dc-a447-4848-9c9e-37f517a9152b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:39:13.468724  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:39:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:39:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:39:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 01:39:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:39:14.456622  543705 disk_worker.go:494] system disk:vda1
I0320 01:39:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:39:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:39:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:39:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:39:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:39:21.949677  543705 disk_info.go:125] begin check local disk info of client
I0320 01:39:21.952163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:39:21.952170  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034de80 0xc00034dec0]
E0320 01:39:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:39:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 01:39:23.409785  543705 memory.go:184] no items to output this cycle
E0320 01:39:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:39:33.409775  543705 cpu.go:275] no items to output this cycle
I0320 01:39:33.409790  543705 memory.go:184] no items to output this cycle
I0320 01:39:38.028832  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:39:38.028838  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:39:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:39:43.410674  543705 memory.go:191] Add success.
I0320 01:39:43.409823  543705 cpu.go:282] Add success.
I0320 01:39:43.420399  543705 net.go:648] Add success.
I0320 01:39:43.423186  543705 net.go:770] primary dev: ETH0
I0320 01:39:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:39:43.423215  543705 net.go:698] Add success.
I0320 01:39:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:39:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:39:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:39:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:39:53.409776  543705 memory.go:184] no items to output this cycle
I0320 01:39:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 01:40:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:40:03.409774  543705 memory.go:184] no items to output this cycle
I0320 01:40:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 01:40:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:40:13.409810  543705 memory.go:191] Add success.
I0320 01:40:13.409817  543705 cpu.go:282] Add success.
W0320 01:40:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:40:13.413258  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:40:13.413263  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:40:13.419773  543705 net.go:648] Add success.
I0320 01:40:13.421437  543705 net.go:770] primary dev: ETH0
I0320 01:40:13.421452  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:40:13.421465  543705 net.go:698] Add success.
I0320 01:40:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:40:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:40:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 01:40:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:40:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 01:40:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:40:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:40:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:40:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:40:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:40:21.953674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:40:21.956122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:40:21.956129  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472400 0xc000472440]
E0320 01:40:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:40:23.409757  543705 memory.go:184] no items to output this cycle
I0320 01:40:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 01:40:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:40:33.409799  543705 memory.go:184] no items to output this cycle
I0320 01:40:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 01:40:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:40:43.409774  543705 memory.go:191] Add success.
I0320 01:40:43.409808  543705 cpu.go:282] Add success.
I0320 01:40:43.419834  543705 net.go:648] Add success.
I0320 01:40:43.422871  543705 net.go:770] primary dev: ETH0
I0320 01:40:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:40:43.422897  543705 net.go:698] Add success.
I0320 01:40:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:40:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:40:53.409784  543705 cpu.go:275] no items to output this cycle
I0320 01:40:53.409789  543705 memory.go:184] no items to output this cycle
E0320 01:41:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:41:03.409803  543705 memory.go:184] no items to output this cycle
I0320 01:41:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 01:41:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:41:13.409785  543705 memory.go:191] Add success.
W0320 01:41:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:41:13.409816  543705 cpu.go:282] Add success.
W0320 01:41:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:41:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:41:13.420311  543705 net.go:648] Add success.
I0320 01:41:13.423057  543705 net.go:770] primary dev: ETH0
I0320 01:41:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:41:13.423082  543705 net.go:698] Add success.
I0320 01:41:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:41:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:41:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 01:41:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:41:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 01:41:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:41:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:41:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:41:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:41:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:41:21.957674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:41:21.960211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:41:21.960217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467c00 0xc000467c40]
E0320 01:41:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:41:23.409779  543705 memory.go:184] no items to output this cycle
I0320 01:41:23.409782  543705 cpu.go:275] no items to output this cycle
E0320 01:41:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:41:33.409798  543705 memory.go:184] no items to output this cycle
I0320 01:41:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:41:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:41:43.409817  543705 memory.go:191] Add success.
I0320 01:41:43.409831  543705 cpu.go:282] Add success.
I0320 01:41:43.420022  543705 net.go:648] Add success.
I0320 01:41:43.422807  543705 net.go:770] primary dev: ETH0
I0320 01:41:43.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:41:43.422837  543705 net.go:698] Add success.
I0320 01:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:41:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:41:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:41:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:41:53.409782  543705 memory.go:184] no items to output this cycle
I0320 01:41:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:42:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:42:03.409773  543705 memory.go:184] no items to output this cycle
I0320 01:42:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:42:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:42:13.409805  543705 memory.go:191] Add success.
I0320 01:42:13.409812  543705 cpu.go:282] Add success.
W0320 01:42:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:42:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:42:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:42:13.420038  543705 net.go:648] Add success.
I0320 01:42:13.422756  543705 net.go:770] primary dev: ETH0
I0320 01:42:13.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:42:13.422781  543705 net.go:698] Add success.
I0320 01:42:13.470099  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ae30a92-0dbb-4c7f-b005-77f27d8921a7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:42:13.470132  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 01:42:14.455710  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:42:14.455726  543705 disk_worker.go:708] disk space is not compliant
W0320 01:42:14.455730  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:42:14.456282  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:42:14.456291  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:42:14.456298  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:42:14.457627  543705 disk_worker.go:494] system disk:vda1
I0320 01:42:14.457675  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:42:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:42:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:42:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:42:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:42:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:42:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:42:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:42:21.961673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:42:21.964164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:42:21.964171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd280 0xc0002bd2c0]
E0320 01:42:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:42:23.409792  543705 memory.go:184] no items to output this cycle
I0320 01:42:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 01:42:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:42:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 01:42:33.409812  543705 memory.go:184] no items to output this cycle
I0320 01:42:38.029727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:42:38.029733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:42:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:42:43.410637  543705 memory.go:191] Add success.
I0320 01:42:43.409782  543705 cpu.go:282] Add success.
I0320 01:42:43.420345  543705 net.go:648] Add success.
I0320 01:42:43.422970  543705 net.go:770] primary dev: ETH0
I0320 01:42:43.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:42:43.423000  543705 net.go:698] Add success.
I0320 01:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:42:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:42:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:42:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:42:53.409775  543705 memory.go:184] no items to output this cycle
I0320 01:42:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 01:43:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:43:03.409782  543705 memory.go:184] no items to output this cycle
I0320 01:43:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 01:43:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:43:13.409791  543705 memory.go:191] Add success.
W0320 01:43:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:43:13.409819  543705 cpu.go:282] Add success.
W0320 01:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:43:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:43:13.420218  543705 net.go:648] Add success.
I0320 01:43:13.422919  543705 net.go:770] primary dev: ETH0
I0320 01:43:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:43:13.422949  543705 net.go:698] Add success.
I0320 01:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:43:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:43:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 01:43:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:43:14.457768  543705 disk_worker.go:494] system disk:vda1
I0320 01:43:14.457804  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:43:15.455946  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:43:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:43:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:43:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:43:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:43:21.965676  543705 disk_info.go:125] begin check local disk info of client
I0320 01:43:21.968608  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:43:21.968614  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394940 0xc000394980]
E0320 01:43:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:43:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 01:43:23.409782  543705 memory.go:184] no items to output this cycle
E0320 01:43:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:43:33.409784  543705 memory.go:184] no items to output this cycle
I0320 01:43:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:43:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:43:43.409815  543705 memory.go:191] Add success.
I0320 01:43:43.409821  543705 cpu.go:282] Add success.
I0320 01:43:43.419883  543705 net.go:648] Add success.
I0320 01:43:43.422830  543705 net.go:770] primary dev: ETH0
I0320 01:43:43.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:43:43.422855  543705 net.go:698] Add success.
I0320 01:43:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:43:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:43:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:43:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:43:53.409787  543705 memory.go:184] no items to output this cycle
I0320 01:43:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:44:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:44:03.409799  543705 memory.go:184] no items to output this cycle
I0320 01:44:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 01:44:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:44:13.409815  543705 memory.go:191] Add success.
I0320 01:44:13.409815  543705 cpu.go:282] Add success.
W0320 01:44:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:44:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:44:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:44:13.420127  543705 net.go:648] Add success.
I0320 01:44:13.422851  543705 net.go:770] primary dev: ETH0
I0320 01:44:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:44:13.422880  543705 net.go:698] Add success.
I0320 01:44:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:44:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:44:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 01:44:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:44:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 01:44:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:44:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:44:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:44:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:44:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:44:21.969673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:44:21.972197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:44:21.972204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005702c0 0xc000570300]
E0320 01:44:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:44:23.409773  543705 memory.go:184] no items to output this cycle
I0320 01:44:23.409778  543705 cpu.go:275] no items to output this cycle
E0320 01:44:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:44:33.409800  543705 memory.go:184] no items to output this cycle
I0320 01:44:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:44:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:44:43.409779  543705 memory.go:191] Add success.
I0320 01:44:43.409799  543705 cpu.go:282] Add success.
I0320 01:44:43.419881  543705 net.go:648] Add success.
I0320 01:44:43.422766  543705 net.go:770] primary dev: ETH0
I0320 01:44:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:44:43.422793  543705 net.go:698] Add success.
I0320 01:44:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:44:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:44:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:44:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:44:53.409777  543705 memory.go:184] no items to output this cycle
I0320 01:44:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:45:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:45:03.409774  543705 memory.go:184] no items to output this cycle
I0320 01:45:03.409777  543705 cpu.go:275] no items to output this cycle
E0320 01:45:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:45:13.409821  543705 memory.go:191] Add success.
I0320 01:45:13.409824  543705 cpu.go:282] Add success.
W0320 01:45:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:45:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:45:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:45:13.420149  543705 net.go:648] Add success.
I0320 01:45:13.423118  543705 net.go:770] primary dev: ETH0
I0320 01:45:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:45:13.423147  543705 net.go:698] Add success.
I0320 01:45:14.069134  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af7acb95-914d-4141-86cd-69d9ea6c1b1c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:45:14.069184  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:45:14.454693  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:45:14.454849  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:45:14.454935  543705 disk_worker.go:708] disk space is not compliant
W0320 01:45:14.454939  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:45:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 01:45:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:45:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:45:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:45:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:45:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:45:21.973668  543705 disk_info.go:125] begin check local disk info of client
I0320 01:45:21.976266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:45:21.976274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353140 0xc000353180]
E0320 01:45:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:45:23.409768  543705 memory.go:184] no items to output this cycle
I0320 01:45:23.409922  543705 cpu.go:275] no items to output this cycle
E0320 01:45:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:45:33.409777  543705 memory.go:184] no items to output this cycle
I0320 01:45:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 01:45:38.029871  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:45:38.029877  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:45:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:45:43.410744  543705 memory.go:191] Add success.
I0320 01:45:43.409808  543705 cpu.go:282] Add success.
I0320 01:45:43.420468  543705 net.go:648] Add success.
I0320 01:45:43.423212  543705 net.go:770] primary dev: ETH0
I0320 01:45:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:45:43.423241  543705 net.go:698] Add success.
I0320 01:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:45:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:45:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:45:53.409816  543705 memory.go:184] no items to output this cycle
I0320 01:45:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 01:46:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:46:03.409787  543705 memory.go:184] no items to output this cycle
I0320 01:46:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 01:46:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:46:13.409796  543705 cpu.go:282] Add success.
I0320 01:46:13.409798  543705 memory.go:191] Add success.
W0320 01:46:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:46:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:46:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:46:13.420065  543705 net.go:648] Add success.
I0320 01:46:13.422753  543705 net.go:770] primary dev: ETH0
I0320 01:46:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:46:13.422780  543705 net.go:698] Add success.
I0320 01:46:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:46:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:46:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 01:46:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:46:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 01:46:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:46:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:46:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:46:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:46:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:46:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:46:21.977673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:46:21.980082  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:46:21.980089  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029ab80 0xc00029abc0]
E0320 01:46:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:46:23.409772  543705 memory.go:184] no items to output this cycle
I0320 01:46:23.409777  543705 cpu.go:275] no items to output this cycle
E0320 01:46:33.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:46:33.409868  543705 memory.go:184] no items to output this cycle
I0320 01:46:33.409997  543705 cpu.go:275] no items to output this cycle
E0320 01:46:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:46:43.409790  543705 memory.go:191] Add success.
I0320 01:46:43.409806  543705 cpu.go:282] Add success.
I0320 01:46:43.419880  543705 net.go:648] Add success.
I0320 01:46:43.422536  543705 net.go:770] primary dev: ETH0
I0320 01:46:43.422551  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:46:43.422565  543705 net.go:698] Add success.
I0320 01:46:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:46:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:46:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:46:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:46:53.409806  543705 memory.go:184] no items to output this cycle
I0320 01:46:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 01:47:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:47:03.409787  543705 memory.go:184] no items to output this cycle
I0320 01:47:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 01:47:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:47:13.409806  543705 memory.go:191] Add success.
I0320 01:47:13.409810  543705 cpu.go:282] Add success.
W0320 01:47:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:47:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:47:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:47:13.420056  543705 net.go:648] Add success.
I0320 01:47:13.423144  543705 net.go:770] primary dev: ETH0
I0320 01:47:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:47:13.423177  543705 net.go:698] Add success.
I0320 01:47:13.453744  543705 event_worker.go:152] Polling the log file for events...
W0320 01:47:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:47:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 01:47:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:47:14.456925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:47:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:47:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:47:14.457010  543705 disk_worker.go:494] system disk:vda1
I0320 01:47:14.457055  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:47:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:47:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:47:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:47:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:47:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:47:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:47:16.472318  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:47:21.981681  543705 disk_info.go:125] begin check local disk info of client
I0320 01:47:21.984083  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:47:21.984089  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270340 0xc000270380]
E0320 01:47:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:47:23.409775  543705 memory.go:184] no items to output this cycle
I0320 01:47:23.409777  543705 cpu.go:275] no items to output this cycle
E0320 01:47:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:47:33.409786  543705 memory.go:184] no items to output this cycle
I0320 01:47:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:47:43.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:47:43.409903  543705 memory.go:191] Add success.
I0320 01:47:43.409983  543705 cpu.go:282] Add success.
I0320 01:47:43.419725  543705 net.go:648] Add success.
I0320 01:47:43.422306  543705 net.go:770] primary dev: ETH0
I0320 01:47:43.422320  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:47:43.422334  543705 net.go:698] Add success.
I0320 01:47:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:47:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:47:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:47:53.409816  543705 memory.go:184] no items to output this cycle
I0320 01:47:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 01:48:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:48:03.409775  543705 memory.go:184] no items to output this cycle
I0320 01:48:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:48:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:48:13.409783  543705 memory.go:191] Add success.
I0320 01:48:13.409804  543705 cpu.go:282] Add success.
W0320 01:48:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:48:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:48:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:48:13.420265  543705 net.go:648] Add success.
I0320 01:48:13.423189  543705 net.go:770] primary dev: ETH0
I0320 01:48:13.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:48:13.423213  543705 net.go:698] Add success.
I0320 01:48:13.585986  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d788d7ad-828e-4204-92de-6461dd5d7ccb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:48:13.586019  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:48:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:48:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 01:48:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:48:14.456678  543705 disk_worker.go:494] system disk:vda1
I0320 01:48:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:48:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:48:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:48:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:48:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:48:21.985676  543705 disk_info.go:125] begin check local disk info of client
I0320 01:48:21.988083  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:48:21.988090  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270dc0 0xc000270e00]
E0320 01:48:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:48:23.409787  543705 memory.go:184] no items to output this cycle
I0320 01:48:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 01:48:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:48:33.409780  543705 memory.go:184] no items to output this cycle
I0320 01:48:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 01:48:38.030020  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:48:38.030027  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:48:43.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:48:43.410836  543705 memory.go:191] Add success.
I0320 01:48:43.410075  543705 cpu.go:282] Add success.
I0320 01:48:43.419754  543705 net.go:648] Add success.
I0320 01:48:43.422555  543705 net.go:770] primary dev: ETH0
I0320 01:48:43.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:48:43.422580  543705 net.go:698] Add success.
I0320 01:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:48:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:48:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:48:53.409805  543705 memory.go:184] no items to output this cycle
I0320 01:48:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 01:49:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:49:03.409806  543705 memory.go:184] no items to output this cycle
I0320 01:49:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 01:49:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:49:13.409798  543705 memory.go:191] Add success.
I0320 01:49:13.409801  543705 cpu.go:282] Add success.
W0320 01:49:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:49:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:49:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:49:13.420111  543705 net.go:648] Add success.
I0320 01:49:13.422902  543705 net.go:770] primary dev: ETH0
I0320 01:49:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:49:13.422928  543705 net.go:698] Add success.
I0320 01:49:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:49:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:49:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 01:49:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:49:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 01:49:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:49:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:49:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:49:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:49:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:49:21.989674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:49:21.992084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:49:21.992090  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d100 0xc00034d140]
E0320 01:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:49:23.409778  543705 memory.go:184] no items to output this cycle
I0320 01:49:23.409791  543705 cpu.go:275] no items to output this cycle
E0320 01:49:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:49:33.409784  543705 memory.go:184] no items to output this cycle
I0320 01:49:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 01:49:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:49:43.409903  543705 cpu.go:282] Add success.
I0320 01:49:43.409905  543705 memory.go:191] Add success.
I0320 01:49:43.419745  543705 net.go:648] Add success.
I0320 01:49:43.422390  543705 net.go:770] primary dev: ETH0
I0320 01:49:43.422403  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:49:43.422414  543705 net.go:698] Add success.
I0320 01:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:49:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:49:53.409795  543705 memory.go:184] no items to output this cycle
I0320 01:49:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:50:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:50:03.409778  543705 memory.go:184] no items to output this cycle
I0320 01:50:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 01:50:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:50:13.409827  543705 memory.go:191] Add success.
I0320 01:50:13.409832  543705 cpu.go:282] Add success.
W0320 01:50:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:50:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:50:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:50:13.420145  543705 net.go:648] Add success.
I0320 01:50:13.423046  543705 net.go:770] primary dev: ETH0
I0320 01:50:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:50:13.423075  543705 net.go:698] Add success.
I0320 01:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:50:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:50:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 01:50:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:50:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 01:50:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:50:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:50:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:50:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:50:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:50:21.993671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:50:21.996129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:50:21.996135  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c100 0xc00057c140]
E0320 01:50:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:50:23.409782  543705 memory.go:184] no items to output this cycle
I0320 01:50:23.409785  543705 cpu.go:275] no items to output this cycle
E0320 01:50:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:50:33.409804  543705 memory.go:184] no items to output this cycle
I0320 01:50:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 01:50:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:50:43.409798  543705 memory.go:191] Add success.
I0320 01:50:43.409798  543705 cpu.go:282] Add success.
I0320 01:50:43.420019  543705 net.go:648] Add success.
I0320 01:50:43.422962  543705 net.go:770] primary dev: ETH0
I0320 01:50:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:50:43.422986  543705 net.go:698] Add success.
I0320 01:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:50:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:50:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:50:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:50:53.409795  543705 memory.go:184] no items to output this cycle
I0320 01:50:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 01:51:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:51:03.409773  543705 memory.go:184] no items to output this cycle
I0320 01:51:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 01:51:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:51:13.409828  543705 memory.go:191] Add success.
I0320 01:51:13.409833  543705 cpu.go:282] Add success.
W0320 01:51:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:51:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:51:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:51:13.420081  543705 net.go:648] Add success.
I0320 01:51:13.422897  543705 net.go:770] primary dev: ETH0
I0320 01:51:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:51:13.422922  543705 net.go:698] Add success.
I0320 01:51:13.469632  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c41fffe-a196-48a8-8441-c67be94060df","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:51:13.469684  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:51:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:51:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:51:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 01:51:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:51:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 01:51:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:51:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:51:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:51:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:51:21.997675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:51:22.000091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:51:22.000098  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376e00 0xc000376e40]
E0320 01:51:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:51:23.409791  543705 memory.go:184] no items to output this cycle
I0320 01:51:23.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:51:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:51:33.409811  543705 memory.go:184] no items to output this cycle
I0320 01:51:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 01:51:38.032863  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:51:38.032870  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:51:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:51:43.410667  543705 memory.go:191] Add success.
I0320 01:51:43.409806  543705 cpu.go:282] Add success.
I0320 01:51:43.420649  543705 net.go:648] Add success.
I0320 01:51:43.423521  543705 net.go:770] primary dev: ETH0
I0320 01:51:43.423536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:51:43.423549  543705 net.go:698] Add success.
I0320 01:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:51:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:51:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:51:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:51:53.409782  543705 memory.go:184] no items to output this cycle
I0320 01:51:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 01:52:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:52:03.409810  543705 memory.go:184] no items to output this cycle
I0320 01:52:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:52:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:52:13.409823  543705 memory.go:191] Add success.
I0320 01:52:13.409825  543705 cpu.go:282] Add success.
W0320 01:52:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:52:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:52:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:52:13.420278  543705 net.go:648] Add success.
I0320 01:52:13.422974  543705 net.go:770] primary dev: ETH0
I0320 01:52:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:52:13.422999  543705 net.go:698] Add success.
W0320 01:52:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:52:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 01:52:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:52:14.456969  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:52:14.456980  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:52:14.456986  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:52:14.457042  543705 disk_worker.go:494] system disk:vda1
I0320 01:52:14.457091  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:52:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:52:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:52:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:52:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:52:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:52:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:52:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:52:22.001674  543705 disk_info.go:125] begin check local disk info of client
I0320 01:52:22.004102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:52:22.004107  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377540 0xc000377580]
E0320 01:52:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:52:23.409790  543705 memory.go:184] no items to output this cycle
I0320 01:52:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 01:52:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:52:33.409811  543705 memory.go:184] no items to output this cycle
I0320 01:52:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 01:52:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:52:43.409788  543705 memory.go:191] Add success.
I0320 01:52:43.409789  543705 cpu.go:282] Add success.
I0320 01:52:43.420256  543705 net.go:648] Add success.
I0320 01:52:43.422984  543705 net.go:770] primary dev: ETH0
I0320 01:52:43.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:52:43.423009  543705 net.go:698] Add success.
I0320 01:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:52:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:52:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:52:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:52:53.409784  543705 memory.go:184] no items to output this cycle
I0320 01:52:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 01:53:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:53:03.409780  543705 memory.go:184] no items to output this cycle
I0320 01:53:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 01:53:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:53:13.409791  543705 memory.go:191] Add success.
I0320 01:53:13.409808  543705 cpu.go:282] Add success.
W0320 01:53:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:53:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:53:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:53:13.420175  543705 net.go:648] Add success.
I0320 01:53:13.422782  543705 net.go:770] primary dev: ETH0
I0320 01:53:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:53:13.422808  543705 net.go:698] Add success.
I0320 01:53:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:53:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:53:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 01:53:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:53:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 01:53:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:53:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:53:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:53:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:53:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:53:22.005671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:53:22.008101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:53:22.008107  543705 disk_info.go:196] parse disk info done, disk is : [0xc000587f00 0xc000587f40]
E0320 01:53:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:53:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 01:53:23.409787  543705 memory.go:184] no items to output this cycle
E0320 01:53:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:53:33.409801  543705 memory.go:184] no items to output this cycle
I0320 01:53:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 01:53:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:53:43.409790  543705 memory.go:191] Add success.
I0320 01:53:43.409791  543705 cpu.go:282] Add success.
I0320 01:53:43.420274  543705 net.go:648] Add success.
I0320 01:53:43.423222  543705 net.go:770] primary dev: ETH0
I0320 01:53:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:53:43.423246  543705 net.go:698] Add success.
I0320 01:53:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:53:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:53:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:53:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:53:53.409809  543705 memory.go:184] no items to output this cycle
I0320 01:53:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 01:54:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:54:03.409783  543705 memory.go:184] no items to output this cycle
I0320 01:54:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 01:54:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:54:13.409822  543705 memory.go:191] Add success.
I0320 01:54:13.409824  543705 cpu.go:282] Add success.
W0320 01:54:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:54:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:54:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:54:13.420154  543705 net.go:648] Add success.
I0320 01:54:13.422891  543705 net.go:770] primary dev: ETH0
I0320 01:54:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:54:13.422916  543705 net.go:698] Add success.
I0320 01:54:13.470052  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"02132184-4292-4262-9a2a-dd493d7152a3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:54:13.470087  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 01:54:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:54:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:54:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 01:54:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:54:14.456504  543705 disk_worker.go:494] system disk:vda1
I0320 01:54:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:54:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:54:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:54:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:54:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:54:22.009675  543705 disk_info.go:125] begin check local disk info of client
I0320 01:54:22.012145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:54:22.012151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8640 0xc0002a8680]
E0320 01:54:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:54:23.409788  543705 memory.go:184] no items to output this cycle
I0320 01:54:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 01:54:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:54:33.409808  543705 memory.go:184] no items to output this cycle
I0320 01:54:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 01:54:38.033740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:54:38.033747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:54:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:54:43.410703  543705 memory.go:191] Add success.
I0320 01:54:43.409794  543705 cpu.go:282] Add success.
I0320 01:54:43.420460  543705 net.go:648] Add success.
I0320 01:54:43.423002  543705 net.go:770] primary dev: ETH0
I0320 01:54:43.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:54:43.423029  543705 net.go:698] Add success.
I0320 01:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:54:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:54:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:54:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:54:53.409814  543705 memory.go:184] no items to output this cycle
I0320 01:54:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 01:55:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:55:03.409781  543705 memory.go:184] no items to output this cycle
I0320 01:55:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 01:55:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:55:13.409790  543705 memory.go:191] Add success.
W0320 01:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:55:13.409820  543705 cpu.go:282] Add success.
W0320 01:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:55:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:55:13.420127  543705 net.go:648] Add success.
I0320 01:55:13.422706  543705 net.go:770] primary dev: ETH0
I0320 01:55:13.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:55:13.422734  543705 net.go:698] Add success.
I0320 01:55:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:55:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:55:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 01:55:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:55:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 01:55:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:55:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:55:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:55:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:55:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:55:22.013672  543705 disk_info.go:125] begin check local disk info of client
I0320 01:55:22.016038  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:55:22.016044  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364380 0xc0003643c0]
E0320 01:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:55:23.409784  543705 memory.go:184] no items to output this cycle
I0320 01:55:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 01:55:33.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:55:33.409887  543705 cpu.go:275] no items to output this cycle
I0320 01:55:33.409921  543705 memory.go:184] no items to output this cycle
E0320 01:55:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:55:43.409815  543705 memory.go:191] Add success.
I0320 01:55:43.409826  543705 cpu.go:282] Add success.
I0320 01:55:43.420012  543705 net.go:648] Add success.
I0320 01:55:43.422752  543705 net.go:770] primary dev: ETH0
I0320 01:55:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:55:43.422781  543705 net.go:698] Add success.
I0320 01:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:55:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:55:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:55:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:55:53.409778  543705 memory.go:184] no items to output this cycle
I0320 01:55:53.409800  543705 cpu.go:275] no items to output this cycle
I0320 01:56:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 01:56:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:56:03.409814  543705 memory.go:184] no items to output this cycle
E0320 01:56:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:56:13.409812  543705 memory.go:191] Add success.
I0320 01:56:13.409821  543705 cpu.go:282] Add success.
W0320 01:56:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:56:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:56:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:56:13.420173  543705 net.go:648] Add success.
I0320 01:56:13.422781  543705 net.go:770] primary dev: ETH0
I0320 01:56:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:56:13.422810  543705 net.go:698] Add success.
I0320 01:56:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:56:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:56:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 01:56:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:56:14.456678  543705 disk_worker.go:494] system disk:vda1
I0320 01:56:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:56:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:56:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:56:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:56:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:56:16.472440  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:56:22.017676  543705 disk_info.go:125] begin check local disk info of client
I0320 01:56:22.020108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:56:22.020114  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003045c0 0xc000304600]
E0320 01:56:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:56:23.409785  543705 memory.go:184] no items to output this cycle
I0320 01:56:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 01:56:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:56:33.409784  543705 memory.go:184] no items to output this cycle
I0320 01:56:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 01:56:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:56:43.409817  543705 memory.go:191] Add success.
I0320 01:56:43.409829  543705 cpu.go:282] Add success.
I0320 01:56:43.419883  543705 net.go:648] Add success.
I0320 01:56:43.422707  543705 net.go:770] primary dev: ETH0
I0320 01:56:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:56:43.422732  543705 net.go:698] Add success.
I0320 01:56:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:56:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:56:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:56:53.409779  543705 memory.go:184] no items to output this cycle
I0320 01:56:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 01:57:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:57:03.409785  543705 memory.go:184] no items to output this cycle
I0320 01:57:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 01:57:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:57:13.409822  543705 memory.go:191] Add success.
I0320 01:57:13.409833  543705 cpu.go:282] Add success.
W0320 01:57:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:57:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:57:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:57:13.420145  543705 net.go:648] Add success.
I0320 01:57:13.422905  543705 net.go:770] primary dev: ETH0
I0320 01:57:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:57:13.422930  543705 net.go:698] Add success.
I0320 01:57:13.429293  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 01:57:13.453479  543705 event_worker.go:152] Polling the log file for events...
I0320 01:57:13.469143  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"732ce3e2-a68e-49c3-b5f1-3040ef3c1b1f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 01:57:13.469180  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 01:57:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:57:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 01:57:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0320 01:57:14.456595  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 01:57:14.456604  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 01:57:14.456609  543705 custom_config.go:64] query custom config with name: gpu
I0320 01:57:14.457164  543705 disk_worker.go:494] system disk:vda1
I0320 01:57:14.457197  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 01:57:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 01:57:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:57:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 01:57:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 01:57:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:57:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:57:16.472301  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:57:22.021673  543705 disk_info.go:125] begin check local disk info of client
I0320 01:57:22.024064  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:57:22.024071  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa340 0xc0001aa380]
E0320 01:57:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:57:23.409759  543705 memory.go:184] no items to output this cycle
I0320 01:57:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 01:57:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:57:33.409798  543705 memory.go:184] no items to output this cycle
I0320 01:57:33.409816  543705 cpu.go:275] no items to output this cycle
I0320 01:57:38.036866  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 01:57:38.036872  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 01:57:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:57:43.410669  543705 memory.go:191] Add success.
I0320 01:57:43.409800  543705 cpu.go:282] Add success.
I0320 01:57:43.420379  543705 net.go:648] Add success.
I0320 01:57:43.423232  543705 net.go:770] primary dev: ETH0
I0320 01:57:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:57:43.423257  543705 net.go:698] Add success.
I0320 01:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:57:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:57:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:57:53.409825  543705 memory.go:184] no items to output this cycle
I0320 01:57:53.409835  543705 cpu.go:275] no items to output this cycle
E0320 01:58:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:58:03.409791  543705 memory.go:184] no items to output this cycle
I0320 01:58:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 01:58:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:58:13.409794  543705 memory.go:191] Add success.
I0320 01:58:13.409795  543705 cpu.go:282] Add success.
W0320 01:58:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 01:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:58:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:58:13.420117  543705 net.go:648] Add success.
I0320 01:58:13.422797  543705 net.go:770] primary dev: ETH0
I0320 01:58:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:58:13.422823  543705 net.go:698] Add success.
I0320 01:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:58:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:58:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 01:58:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:58:14.457387  543705 disk_worker.go:494] system disk:vda1
I0320 01:58:14.457432  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:58:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:58:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:58:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:58:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:58:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:58:22.025671  543705 disk_info.go:125] begin check local disk info of client
I0320 01:58:22.028130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:58:22.028137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3640 0xc0003b3680]
E0320 01:58:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:58:23.409789  543705 memory.go:184] no items to output this cycle
I0320 01:58:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 01:58:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:58:33.409785  543705 memory.go:184] no items to output this cycle
I0320 01:58:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 01:58:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:58:43.409787  543705 memory.go:191] Add success.
I0320 01:58:43.409794  543705 cpu.go:282] Add success.
I0320 01:58:43.419895  543705 net.go:648] Add success.
I0320 01:58:43.422839  543705 net.go:770] primary dev: ETH0
I0320 01:58:43.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:58:43.422865  543705 net.go:698] Add success.
I0320 01:58:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:58:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:58:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:58:53.409812  543705 memory.go:184] no items to output this cycle
I0320 01:58:53.409829  543705 cpu.go:275] no items to output this cycle
E0320 01:59:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:59:03.409784  543705 memory.go:184] no items to output this cycle
I0320 01:59:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 01:59:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:59:13.409787  543705 memory.go:191] Add success.
W0320 01:59:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 01:59:13.409812  543705 cpu.go:282] Add success.
W0320 01:59:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 01:59:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 01:59:13.420083  543705 net.go:648] Add success.
I0320 01:59:13.423245  543705 net.go:770] primary dev: ETH0
I0320 01:59:13.423257  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:59:13.423269  543705 net.go:698] Add success.
I0320 01:59:14.453958  543705 custom_config.go:64] query custom config with name: gpu
W0320 01:59:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 01:59:14.455252  543705 disk_worker.go:708] disk space is not compliant
W0320 01:59:14.455255  543705 disk_worker.go:728] disk inode is not compliant
I0320 01:59:14.456638  543705 disk_worker.go:494] system disk:vda1
I0320 01:59:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 01:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 01:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:59:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:59:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 01:59:16.472480  543705 disk_local_worker.go:436] Get disk info: []
I0320 01:59:22.029672  543705 disk_info.go:125] begin check local disk info of client
I0320 01:59:22.032106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 01:59:22.032112  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b38c0 0xc0003b3900]
E0320 01:59:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:59:23.409772  543705 memory.go:184] no items to output this cycle
I0320 01:59:23.409774  543705 cpu.go:275] no items to output this cycle
E0320 01:59:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:59:33.409804  543705 memory.go:184] no items to output this cycle
I0320 01:59:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 01:59:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:59:43.409788  543705 memory.go:191] Add success.
I0320 01:59:43.409790  543705 cpu.go:282] Add success.
I0320 01:59:43.419889  543705 net.go:648] Add success.
I0320 01:59:43.423010  543705 net.go:770] primary dev: ETH0
I0320 01:59:43.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0320 01:59:43.423040  543705 net.go:698] Add success.
I0320 01:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 01:59:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 01:59:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 01:59:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 01:59:53.409789  543705 memory.go:184] no items to output this cycle
I0320 01:59:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 02:00:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:00:03.409770  543705 memory.go:184] no items to output this cycle
I0320 02:00:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:00:13.409787  543705 memory.go:191] Add success.
I0320 02:00:13.409793  543705 cpu.go:282] Add success.
W0320 02:00:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:00:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:00:13.420313  543705 net.go:648] Add success.
I0320 02:00:13.423212  543705 net.go:770] primary dev: ETH0
I0320 02:00:13.423225  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:00:13.423237  543705 net.go:698] Add success.
I0320 02:00:13.468578  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6cb1c3c2-a3a0-4b17-a313-a3061ed4ed13","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:00:13.468611  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:00:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:00:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:00:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0320 02:00:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:00:14.456821  543705 disk_worker.go:494] system disk:vda1
I0320 02:00:14.456866  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:00:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:00:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:00:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:00:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:00:22.033675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:00:22.036111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:00:22.036117  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483c40 0xc000483c80]
E0320 02:00:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:00:23.409789  543705 memory.go:184] no items to output this cycle
I0320 02:00:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 02:00:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:00:33.409779  543705 memory.go:184] no items to output this cycle
I0320 02:00:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 02:00:38.037753  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:00:38.037760  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:00:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:00:43.410723  543705 memory.go:191] Add success.
I0320 02:00:43.409839  543705 cpu.go:282] Add success.
I0320 02:00:43.420430  543705 net.go:648] Add success.
I0320 02:00:43.423214  543705 net.go:770] primary dev: ETH0
I0320 02:00:43.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:00:43.423242  543705 net.go:698] Add success.
I0320 02:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:00:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:00:53.409826  543705 memory.go:184] no items to output this cycle
I0320 02:00:53.409836  543705 cpu.go:275] no items to output this cycle
E0320 02:01:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:01:03.409809  543705 memory.go:184] no items to output this cycle
I0320 02:01:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 02:01:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:01:13.409893  543705 memory.go:191] Add success.
W0320 02:01:13.409925  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:01:13.409938  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:01:13.409941  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:01:13.409946  543705 cpu.go:282] Add success.
I0320 02:01:13.419757  543705 net.go:648] Add success.
I0320 02:01:13.422515  543705 net.go:770] primary dev: ETH0
I0320 02:01:13.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:01:13.422540  543705 net.go:698] Add success.
I0320 02:01:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:01:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:01:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 02:01:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:01:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 02:01:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:01:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:01:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:01:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:01:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:01:22.037675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:01:22.040087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:01:22.040093  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa6c0 0xc0001aa700]
E0320 02:01:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:01:23.409796  543705 memory.go:184] no items to output this cycle
I0320 02:01:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 02:01:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:01:33.409796  543705 memory.go:184] no items to output this cycle
I0320 02:01:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 02:01:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:01:43.409831  543705 memory.go:191] Add success.
I0320 02:01:43.409832  543705 cpu.go:282] Add success.
I0320 02:01:43.419873  543705 net.go:648] Add success.
I0320 02:01:43.422585  543705 net.go:770] primary dev: ETH0
I0320 02:01:43.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:01:43.422628  543705 net.go:698] Add success.
I0320 02:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:01:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:01:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:01:53.409812  543705 memory.go:184] no items to output this cycle
I0320 02:01:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 02:02:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:02:03.409786  543705 memory.go:184] no items to output this cycle
I0320 02:02:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:02:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:02:13.409791  543705 memory.go:191] Add success.
W0320 02:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 02:02:13.409824  543705 cpu.go:282] Add success.
W0320 02:02:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:02:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:02:13.420235  543705 net.go:648] Add success.
I0320 02:02:13.422975  543705 net.go:770] primary dev: ETH0
I0320 02:02:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:02:13.423000  543705 net.go:698] Add success.
W0320 02:02:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:02:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 02:02:14.455226  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:02:14.456901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:02:14.456911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:02:14.456917  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:02:14.458024  543705 disk_worker.go:494] system disk:vda1
I0320 02:02:14.458064  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:02:15.456894  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:02:15.456907  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:02:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:02:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:02:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:02:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:02:16.472479  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:02:22.041675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:02:22.044138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:02:22.044145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481280 0xc0004812c0]
E0320 02:02:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:02:23.409763  543705 memory.go:184] no items to output this cycle
I0320 02:02:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 02:02:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:02:33.409772  543705 memory.go:184] no items to output this cycle
I0320 02:02:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:02:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:02:43.409802  543705 memory.go:191] Add success.
I0320 02:02:43.409803  543705 cpu.go:282] Add success.
I0320 02:02:43.419968  543705 net.go:648] Add success.
I0320 02:02:43.422484  543705 net.go:770] primary dev: ETH0
I0320 02:02:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:02:43.422509  543705 net.go:698] Add success.
I0320 02:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:02:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:02:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:02:53.409816  543705 memory.go:184] no items to output this cycle
I0320 02:02:53.409824  543705 cpu.go:275] no items to output this cycle
E0320 02:03:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:03:03.409811  543705 memory.go:184] no items to output this cycle
I0320 02:03:03.409829  543705 cpu.go:275] no items to output this cycle
E0320 02:03:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:03:13.409809  543705 memory.go:191] Add success.
I0320 02:03:13.409809  543705 cpu.go:282] Add success.
W0320 02:03:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:03:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:03:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:03:13.420122  543705 net.go:648] Add success.
I0320 02:03:13.423005  543705 net.go:770] primary dev: ETH0
I0320 02:03:13.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:03:13.423034  543705 net.go:698] Add success.
I0320 02:03:13.464221  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e6bc3c8-41ed-482a-a3da-b472937c2c9a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:03:13.464255  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:03:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:03:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:03:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 02:03:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:03:14.456663  543705 disk_worker.go:494] system disk:vda1
I0320 02:03:14.456692  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:03:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:03:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:03:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:03:22.045671  543705 disk_info.go:125] begin check local disk info of client
I0320 02:03:22.048180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:03:22.048187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463180 0xc0004631c0]
E0320 02:03:23.410706  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:03:23.410724  543705 memory.go:184] no items to output this cycle
I0320 02:03:23.410737  543705 cpu.go:275] no items to output this cycle
E0320 02:03:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:03:33.409781  543705 memory.go:184] no items to output this cycle
I0320 02:03:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 02:03:38.040894  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:03:38.040901  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:03:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:03:43.410645  543705 memory.go:191] Add success.
I0320 02:03:43.409808  543705 cpu.go:282] Add success.
I0320 02:03:43.420329  543705 net.go:648] Add success.
I0320 02:03:43.423133  543705 net.go:770] primary dev: ETH0
I0320 02:03:43.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:03:43.423158  543705 net.go:698] Add success.
I0320 02:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:03:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:03:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:03:53.409777  543705 memory.go:184] no items to output this cycle
I0320 02:03:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 02:04:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:04:03.409798  543705 memory.go:184] no items to output this cycle
I0320 02:04:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 02:04:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:04:13.409781  543705 memory.go:191] Add success.
W0320 02:04:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 02:04:13.409811  543705 cpu.go:282] Add success.
W0320 02:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:04:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:04:13.420136  543705 net.go:648] Add success.
I0320 02:04:13.423323  543705 net.go:770] primary dev: ETH0
I0320 02:04:13.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:04:13.423347  543705 net.go:698] Add success.
I0320 02:04:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:04:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:04:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 02:04:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:04:14.456555  543705 disk_worker.go:494] system disk:vda1
I0320 02:04:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:04:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:04:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:04:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:04:16.472484  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:04:22.049675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:04:22.052162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:04:22.052168  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463740 0xc000463780]
E0320 02:04:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:04:23.409796  543705 memory.go:184] no items to output this cycle
I0320 02:04:23.409810  543705 cpu.go:275] no items to output this cycle
E0320 02:04:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:04:33.409799  543705 memory.go:184] no items to output this cycle
I0320 02:04:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 02:04:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:04:43.409809  543705 memory.go:191] Add success.
I0320 02:04:43.409815  543705 cpu.go:282] Add success.
I0320 02:04:43.419864  543705 net.go:648] Add success.
I0320 02:04:43.422678  543705 net.go:770] primary dev: ETH0
I0320 02:04:43.422690  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:04:43.422704  543705 net.go:698] Add success.
I0320 02:04:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:04:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:04:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:04:53.409785  543705 memory.go:184] no items to output this cycle
I0320 02:04:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 02:05:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:05:03.409808  543705 memory.go:184] no items to output this cycle
I0320 02:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 02:05:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:05:13.409823  543705 memory.go:191] Add success.
I0320 02:05:13.409826  543705 cpu.go:282] Add success.
W0320 02:05:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:05:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:05:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:05:13.420563  543705 net.go:648] Add success.
I0320 02:05:13.423073  543705 net.go:770] primary dev: ETH0
I0320 02:05:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:05:13.423099  543705 net.go:698] Add success.
I0320 02:05:14.453950  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:05:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:05:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0320 02:05:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:05:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 02:05:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:05:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:05:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:05:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:05:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:05:22.053674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:05:22.056073  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:05:22.056079  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046d2c0 0xc00046d300]
E0320 02:05:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:05:23.409790  543705 memory.go:184] no items to output this cycle
I0320 02:05:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:05:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:05:33.409771  543705 memory.go:184] no items to output this cycle
I0320 02:05:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 02:05:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:05:43.409794  543705 memory.go:191] Add success.
I0320 02:05:43.409795  543705 cpu.go:282] Add success.
I0320 02:05:43.419868  543705 net.go:648] Add success.
I0320 02:05:43.422621  543705 net.go:770] primary dev: ETH0
I0320 02:05:43.422635  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:05:43.422648  543705 net.go:698] Add success.
I0320 02:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:05:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:05:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:05:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:05:53.409785  543705 memory.go:184] no items to output this cycle
I0320 02:05:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:06:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:06:03.409774  543705 memory.go:184] no items to output this cycle
I0320 02:06:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:06:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:06:13.409814  543705 memory.go:191] Add success.
I0320 02:06:13.409821  543705 cpu.go:282] Add success.
W0320 02:06:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:06:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:06:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:06:13.420063  543705 net.go:648] Add success.
I0320 02:06:13.422813  543705 net.go:770] primary dev: ETH0
I0320 02:06:13.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:06:13.422836  543705 net.go:698] Add success.
I0320 02:06:13.474093  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf61cd08-58ae-4967-bc89-9396689e88fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:06:13.474124  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:06:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:06:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:06:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 02:06:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:06:14.456743  543705 disk_worker.go:494] system disk:vda1
I0320 02:06:14.456770  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:06:15.455629  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:06:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:06:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:06:16.472449  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:06:22.057675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:06:22.060249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:06:22.060255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff600 0xc0003ff640]
E0320 02:06:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:06:23.409781  543705 memory.go:184] no items to output this cycle
I0320 02:06:23.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:06:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:06:33.409799  543705 memory.go:184] no items to output this cycle
I0320 02:06:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 02:06:38.041741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:06:38.041747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:06:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:06:43.410690  543705 memory.go:191] Add success.
I0320 02:06:43.409797  543705 cpu.go:282] Add success.
I0320 02:06:43.420406  543705 net.go:648] Add success.
I0320 02:06:43.423143  543705 net.go:770] primary dev: ETH0
I0320 02:06:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:06:43.423180  543705 net.go:698] Add success.
I0320 02:06:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:06:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:06:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:06:53.409819  543705 memory.go:184] no items to output this cycle
I0320 02:06:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 02:07:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:07:03.409801  543705 memory.go:184] no items to output this cycle
I0320 02:07:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 02:07:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:07:13.409781  543705 memory.go:191] Add success.
W0320 02:07:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 02:07:13.409815  543705 cpu.go:282] Add success.
W0320 02:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:07:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:07:13.420448  543705 net.go:648] Add success.
I0320 02:07:13.423257  543705 net.go:770] primary dev: ETH0
I0320 02:07:13.423269  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:07:13.423281  543705 net.go:698] Add success.
I0320 02:07:13.452772  543705 event_worker.go:152] Polling the log file for events...
W0320 02:07:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:07:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 02:07:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:07:14.456194  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:07:14.456204  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:07:14.456211  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:07:14.457304  543705 disk_worker.go:494] system disk:vda1
I0320 02:07:14.457334  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:07:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:07:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:07:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:07:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:07:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:07:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:07:16.472335  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:07:22.061672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:07:22.064045  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:07:22.064060  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d180 0xc00034d1c0]
E0320 02:07:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:07:23.409790  543705 memory.go:184] no items to output this cycle
I0320 02:07:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 02:07:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:07:33.409779  543705 memory.go:184] no items to output this cycle
I0320 02:07:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 02:07:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:07:43.409775  543705 memory.go:191] Add success.
I0320 02:07:43.409811  543705 cpu.go:282] Add success.
I0320 02:07:43.419824  543705 net.go:648] Add success.
I0320 02:07:43.422926  543705 net.go:770] primary dev: ETH0
I0320 02:07:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:07:43.422952  543705 net.go:698] Add success.
I0320 02:07:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:07:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:07:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:07:53.409788  543705 memory.go:184] no items to output this cycle
I0320 02:07:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 02:08:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:08:03.409781  543705 memory.go:184] no items to output this cycle
I0320 02:08:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:08:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:08:13.409790  543705 memory.go:191] Add success.
I0320 02:08:13.409796  543705 cpu.go:282] Add success.
W0320 02:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:08:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:08:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:08:13.420129  543705 net.go:648] Add success.
I0320 02:08:13.423200  543705 net.go:770] primary dev: ETH0
I0320 02:08:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:08:13.423227  543705 net.go:698] Add success.
I0320 02:08:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:08:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:08:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 02:08:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:08:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 02:08:14.456773  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:08:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:08:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:08:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:08:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:08:16.472433  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:08:22.065672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:08:22.068145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:08:22.068152  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a240 0xc00036a280]
E0320 02:08:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:08:23.409759  543705 memory.go:184] no items to output this cycle
I0320 02:08:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 02:08:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:08:33.409768  543705 memory.go:184] no items to output this cycle
I0320 02:08:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 02:08:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:08:43.409789  543705 memory.go:191] Add success.
I0320 02:08:43.409794  543705 cpu.go:282] Add success.
I0320 02:08:43.419854  543705 net.go:648] Add success.
I0320 02:08:43.422325  543705 net.go:770] primary dev: ETH0
I0320 02:08:43.422338  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:08:43.422352  543705 net.go:698] Add success.
I0320 02:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:08:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:08:53.409801  543705 memory.go:184] no items to output this cycle
I0320 02:08:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 02:09:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:09:03.409771  543705 memory.go:184] no items to output this cycle
I0320 02:09:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 02:09:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:09:13.409797  543705 memory.go:191] Add success.
I0320 02:09:13.409799  543705 cpu.go:282] Add success.
W0320 02:09:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:09:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:09:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:09:13.420208  543705 net.go:648] Add success.
I0320 02:09:13.422811  543705 net.go:770] primary dev: ETH0
I0320 02:09:13.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:09:13.422839  543705 net.go:698] Add success.
I0320 02:09:13.463888  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"379d19d8-1fe5-4df1-8bd0-7f91107facb2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:09:13.463923  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:09:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:09:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:09:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 02:09:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:09:14.456737  543705 disk_worker.go:494] system disk:vda1
I0320 02:09:14.456769  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:09:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:09:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:09:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:09:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:09:16.472427  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:09:22.069674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:09:22.072062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:09:22.072068  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394200 0xc000394240]
E0320 02:09:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:09:23.409794  543705 memory.go:184] no items to output this cycle
I0320 02:09:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:09:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:09:33.409765  543705 memory.go:184] no items to output this cycle
I0320 02:09:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 02:09:38.044915  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:09:38.044923  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:09:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:09:43.410618  543705 memory.go:191] Add success.
I0320 02:09:43.409822  543705 cpu.go:282] Add success.
I0320 02:09:43.420411  543705 net.go:648] Add success.
I0320 02:09:43.423057  543705 net.go:770] primary dev: ETH0
I0320 02:09:43.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:09:43.423083  543705 net.go:698] Add success.
I0320 02:09:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:09:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:09:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:09:53.409785  543705 memory.go:184] no items to output this cycle
I0320 02:09:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 02:10:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:10:03.409775  543705 memory.go:184] no items to output this cycle
I0320 02:10:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 02:10:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:10:13.409792  543705 memory.go:191] Add success.
I0320 02:10:13.409794  543705 cpu.go:282] Add success.
W0320 02:10:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:10:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:10:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:10:13.420052  543705 net.go:648] Add success.
I0320 02:10:13.422852  543705 net.go:770] primary dev: ETH0
I0320 02:10:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:10:13.422882  543705 net.go:698] Add success.
I0320 02:10:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:10:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:10:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 02:10:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:10:14.456587  543705 disk_worker.go:494] system disk:vda1
I0320 02:10:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:10:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:10:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:10:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:10:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:10:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:10:22.073672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:10:22.076131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:10:22.076138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ae540 0xc0002ae580]
E0320 02:10:23.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:10:23.409754  543705 memory.go:184] no items to output this cycle
I0320 02:10:23.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:10:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:10:33.409802  543705 memory.go:184] no items to output this cycle
I0320 02:10:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 02:10:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:10:43.409778  543705 memory.go:191] Add success.
I0320 02:10:43.409802  543705 cpu.go:282] Add success.
I0320 02:10:43.419954  543705 net.go:648] Add success.
I0320 02:10:43.422700  543705 net.go:770] primary dev: ETH0
I0320 02:10:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:10:43.422729  543705 net.go:698] Add success.
I0320 02:10:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:10:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:10:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:10:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:10:53.409790  543705 cpu.go:275] no items to output this cycle
I0320 02:10:53.409801  543705 memory.go:184] no items to output this cycle
E0320 02:11:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:11:03.409767  543705 memory.go:184] no items to output this cycle
I0320 02:11:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 02:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:11:13.409794  543705 memory.go:191] Add success.
I0320 02:11:13.409813  543705 cpu.go:282] Add success.
W0320 02:11:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:11:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:11:13.420144  543705 net.go:648] Add success.
I0320 02:11:13.423068  543705 net.go:770] primary dev: ETH0
I0320 02:11:13.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:11:13.423092  543705 net.go:698] Add success.
I0320 02:11:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:11:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:11:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 02:11:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:11:14.456618  543705 disk_worker.go:494] system disk:vda1
I0320 02:11:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:11:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:11:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:11:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:11:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:11:22.077664  543705 disk_info.go:125] begin check local disk info of client
I0320 02:11:22.080047  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:11:22.080054  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d40 0xc0000c5d80]
E0320 02:11:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:11:23.409773  543705 memory.go:184] no items to output this cycle
I0320 02:11:23.409772  543705 cpu.go:275] no items to output this cycle
E0320 02:11:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:11:33.409779  543705 memory.go:184] no items to output this cycle
I0320 02:11:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 02:11:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:11:43.409791  543705 memory.go:191] Add success.
I0320 02:11:43.409792  543705 cpu.go:282] Add success.
I0320 02:11:43.419941  543705 net.go:648] Add success.
I0320 02:11:43.422805  543705 net.go:770] primary dev: ETH0
I0320 02:11:43.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:11:43.422830  543705 net.go:698] Add success.
I0320 02:11:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:11:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:11:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:11:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:11:53.409776  543705 memory.go:184] no items to output this cycle
I0320 02:11:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:12:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:12:03.409772  543705 memory.go:184] no items to output this cycle
I0320 02:12:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 02:12:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:12:13.409785  543705 memory.go:191] Add success.
I0320 02:12:13.409803  543705 cpu.go:282] Add success.
W0320 02:12:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:12:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:12:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:12:13.420216  543705 net.go:648] Add success.
I0320 02:12:13.422868  543705 net.go:770] primary dev: ETH0
I0320 02:12:13.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:12:13.422893  543705 net.go:698] Add success.
I0320 02:12:13.469267  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1955f021-d33f-412e-a561-cb4b0a730994","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:12:13.469300  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 02:12:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:12:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 02:12:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:12:14.457008  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:12:14.457017  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:12:14.457023  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:12:14.457042  543705 disk_worker.go:494] system disk:vda1
I0320 02:12:14.457081  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:12:15.456938  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:12:15.456958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:12:16.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:12:16.458006  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:12:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:12:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:12:16.472428  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:12:22.081676  543705 disk_info.go:125] begin check local disk info of client
I0320 02:12:22.084108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:12:22.084114  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003420c0 0xc000342100]
E0320 02:12:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:12:23.409790  543705 memory.go:184] no items to output this cycle
I0320 02:12:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 02:12:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:12:33.409781  543705 memory.go:184] no items to output this cycle
I0320 02:12:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 02:12:38.045728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:12:38.045734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:12:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:12:43.410648  543705 memory.go:191] Add success.
I0320 02:12:43.409794  543705 cpu.go:282] Add success.
I0320 02:12:43.420379  543705 net.go:648] Add success.
I0320 02:12:43.423101  543705 net.go:770] primary dev: ETH0
I0320 02:12:43.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:12:43.423126  543705 net.go:698] Add success.
I0320 02:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:12:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:12:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:12:53.410275  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:12:53.410298  543705 memory.go:184] no items to output this cycle
I0320 02:12:53.410307  543705 cpu.go:275] no items to output this cycle
E0320 02:13:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:13:03.409781  543705 cpu.go:275] no items to output this cycle
I0320 02:13:03.409783  543705 memory.go:184] no items to output this cycle
E0320 02:13:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:13:13.409820  543705 memory.go:191] Add success.
I0320 02:13:13.409830  543705 cpu.go:282] Add success.
W0320 02:13:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:13:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:13:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:13:13.420198  543705 net.go:648] Add success.
I0320 02:13:13.422859  543705 net.go:770] primary dev: ETH0
I0320 02:13:13.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:13:13.422885  543705 net.go:698] Add success.
I0320 02:13:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:13:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:13:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 02:13:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:13:14.456621  543705 disk_worker.go:494] system disk:vda1
I0320 02:13:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:13:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:13:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:13:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:13:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:13:22.085675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:13:22.088074  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:13:22.088080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fea80 0xc0003feac0]
E0320 02:13:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:13:23.409785  543705 memory.go:184] no items to output this cycle
I0320 02:13:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 02:13:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:13:33.409774  543705 memory.go:184] no items to output this cycle
I0320 02:13:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 02:13:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:13:43.409807  543705 memory.go:191] Add success.
I0320 02:13:43.409815  543705 cpu.go:282] Add success.
I0320 02:13:43.419865  543705 net.go:648] Add success.
I0320 02:13:43.422790  543705 net.go:770] primary dev: ETH0
I0320 02:13:43.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:13:43.422817  543705 net.go:698] Add success.
I0320 02:13:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:13:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:13:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:13:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:13:53.409803  543705 memory.go:184] no items to output this cycle
I0320 02:13:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 02:14:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:14:03.409782  543705 memory.go:184] no items to output this cycle
I0320 02:14:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:14:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:14:13.409788  543705 memory.go:191] Add success.
I0320 02:14:13.409790  543705 cpu.go:282] Add success.
W0320 02:14:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:14:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:14:13.420062  543705 net.go:648] Add success.
I0320 02:14:13.422697  543705 net.go:770] primary dev: ETH0
I0320 02:14:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:14:13.422721  543705 net.go:698] Add success.
I0320 02:14:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:14:14.455373  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:14:14.455382  543705 disk_worker.go:708] disk space is not compliant
W0320 02:14:14.455389  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:14:14.457510  543705 disk_worker.go:494] system disk:vda1
I0320 02:14:14.457538  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:14:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:14:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:14:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:14:16.472423  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:14:22.089677  543705 disk_info.go:125] begin check local disk info of client
I0320 02:14:22.092159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:14:22.092164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000542340 0xc000542380]
E0320 02:14:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:14:23.409766  543705 memory.go:184] no items to output this cycle
I0320 02:14:23.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:14:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:14:33.409796  543705 memory.go:184] no items to output this cycle
I0320 02:14:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 02:14:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:14:43.409785  543705 memory.go:191] Add success.
I0320 02:14:43.409806  543705 cpu.go:282] Add success.
I0320 02:14:43.419917  543705 net.go:648] Add success.
I0320 02:14:43.422716  543705 net.go:770] primary dev: ETH0
I0320 02:14:43.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:14:43.422743  543705 net.go:698] Add success.
I0320 02:14:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:14:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:14:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:14:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:14:53.409793  543705 memory.go:184] no items to output this cycle
I0320 02:14:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:15:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:15:03.409764  543705 memory.go:184] no items to output this cycle
I0320 02:15:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 02:15:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:15:13.409816  543705 memory.go:191] Add success.
I0320 02:15:13.409826  543705 cpu.go:282] Add success.
W0320 02:15:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:15:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:15:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:15:13.420250  543705 net.go:648] Add success.
I0320 02:15:13.423042  543705 net.go:770] primary dev: ETH0
I0320 02:15:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:15:13.423072  543705 net.go:698] Add success.
I0320 02:15:13.552046  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e38996bd-f6d6-416a-b5d0-0545710e58a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:15:13.552078  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:15:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:15:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:15:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 02:15:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:15:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 02:15:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:15:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:15:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:15:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:15:16.472435  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:15:22.093671  543705 disk_info.go:125] begin check local disk info of client
I0320 02:15:22.096120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:15:22.096126  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025c640 0xc00025c680]
E0320 02:15:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:15:23.409783  543705 memory.go:184] no items to output this cycle
I0320 02:15:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:15:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:15:33.409770  543705 memory.go:184] no items to output this cycle
I0320 02:15:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 02:15:38.045870  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:15:38.045877  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:15:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:15:43.410575  543705 memory.go:191] Add success.
I0320 02:15:43.409804  543705 cpu.go:282] Add success.
I0320 02:15:43.420345  543705 net.go:648] Add success.
I0320 02:15:43.422768  543705 net.go:770] primary dev: ETH0
I0320 02:15:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:15:43.422794  543705 net.go:698] Add success.
I0320 02:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:15:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:15:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:15:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:15:53.409775  543705 memory.go:184] no items to output this cycle
I0320 02:15:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:16:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:16:03.409764  543705 memory.go:184] no items to output this cycle
I0320 02:16:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:16:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:16:13.409786  543705 memory.go:191] Add success.
W0320 02:16:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:16:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:16:13.409822  543705 cpu.go:282] Add success.
I0320 02:16:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:16:13.420109  543705 net.go:648] Add success.
I0320 02:16:13.423197  543705 net.go:770] primary dev: ETH0
I0320 02:16:13.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:16:13.423226  543705 net.go:698] Add success.
I0320 02:16:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:16:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:16:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 02:16:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:16:14.456784  543705 disk_worker.go:494] system disk:vda1
I0320 02:16:14.456812  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:16:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:16:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:16:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:16:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:16:16.472465  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:16:22.097673  543705 disk_info.go:125] begin check local disk info of client
I0320 02:16:22.100179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:16:22.100186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376440 0xc000376480]
E0320 02:16:23.409739  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:16:23.409753  543705 memory.go:184] no items to output this cycle
I0320 02:16:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:16:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:16:33.409796  543705 memory.go:184] no items to output this cycle
I0320 02:16:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 02:16:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:16:43.409785  543705 memory.go:191] Add success.
I0320 02:16:43.409803  543705 cpu.go:282] Add success.
I0320 02:16:43.419844  543705 net.go:648] Add success.
I0320 02:16:43.422577  543705 net.go:770] primary dev: ETH0
I0320 02:16:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:16:43.422601  543705 net.go:698] Add success.
I0320 02:16:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:16:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:16:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:16:53.409773  543705 memory.go:184] no items to output this cycle
I0320 02:16:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:17:03.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:17:03.409895  543705 cpu.go:275] no items to output this cycle
I0320 02:17:03.409899  543705 memory.go:184] no items to output this cycle
E0320 02:17:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:17:13.409821  543705 memory.go:191] Add success.
I0320 02:17:13.409831  543705 cpu.go:282] Add success.
W0320 02:17:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:17:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:17:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:17:13.420206  543705 net.go:648] Add success.
I0320 02:17:13.422936  543705 net.go:770] primary dev: ETH0
I0320 02:17:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:17:13.422961  543705 net.go:698] Add success.
I0320 02:17:13.453613  543705 event_worker.go:152] Polling the log file for events...
W0320 02:17:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:17:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 02:17:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:17:14.456955  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:17:14.456964  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:17:14.456971  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:17:14.457023  543705 disk_worker.go:494] system disk:vda1
I0320 02:17:14.457051  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:17:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:17:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:17:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:17:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:17:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:17:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:17:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:17:22.101671  543705 disk_info.go:125] begin check local disk info of client
I0320 02:17:22.104097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:17:22.104104  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467800 0xc000467840]
E0320 02:17:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:17:23.409764  543705 memory.go:184] no items to output this cycle
I0320 02:17:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 02:17:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:17:33.409803  543705 memory.go:184] no items to output this cycle
I0320 02:17:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 02:17:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:17:43.409791  543705 memory.go:191] Add success.
I0320 02:17:43.409821  543705 cpu.go:282] Add success.
I0320 02:17:43.419876  543705 net.go:648] Add success.
I0320 02:17:43.422639  543705 net.go:770] primary dev: ETH0
I0320 02:17:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:17:43.422675  543705 net.go:698] Add success.
I0320 02:17:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:17:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:17:53.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:17:53.409831  543705 memory.go:184] no items to output this cycle
I0320 02:17:53.409835  543705 cpu.go:275] no items to output this cycle
E0320 02:18:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:18:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 02:18:03.409798  543705 memory.go:184] no items to output this cycle
E0320 02:18:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:18:13.409793  543705 memory.go:191] Add success.
I0320 02:18:13.409810  543705 cpu.go:282] Add success.
W0320 02:18:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:18:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:18:13.420120  543705 net.go:648] Add success.
I0320 02:18:13.422694  543705 net.go:770] primary dev: ETH0
I0320 02:18:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:18:13.422719  543705 net.go:698] Add success.
I0320 02:18:13.468290  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e7d5216b-105f-445a-9a7a-70d9799beb30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:18:13.468323  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:18:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:18:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:18:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 02:18:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:18:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 02:18:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:18:15.455989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:18:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:18:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:18:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:18:16.472534  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:18:22.105674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:18:22.108221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:18:22.108227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3d40 0xc0003b3d80]
E0320 02:18:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:18:23.409768  543705 memory.go:184] no items to output this cycle
I0320 02:18:23.409774  543705 cpu.go:275] no items to output this cycle
E0320 02:18:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:18:33.409815  543705 memory.go:184] no items to output this cycle
I0320 02:18:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 02:18:38.046024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:18:38.046031  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:18:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:18:43.410667  543705 memory.go:191] Add success.
I0320 02:18:43.409830  543705 cpu.go:282] Add success.
I0320 02:18:43.420357  543705 net.go:648] Add success.
I0320 02:18:43.423187  543705 net.go:770] primary dev: ETH0
I0320 02:18:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:18:43.423212  543705 net.go:698] Add success.
I0320 02:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:18:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:18:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:18:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:18:53.409822  543705 memory.go:184] no items to output this cycle
I0320 02:18:53.409831  543705 cpu.go:275] no items to output this cycle
E0320 02:19:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:19:03.409795  543705 memory.go:184] no items to output this cycle
I0320 02:19:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 02:19:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:19:13.409807  543705 cpu.go:282] Add success.
I0320 02:19:13.409820  543705 memory.go:191] Add success.
W0320 02:19:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:19:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:19:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:19:13.420128  543705 net.go:648] Add success.
I0320 02:19:13.422873  543705 net.go:770] primary dev: ETH0
I0320 02:19:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:19:13.422904  543705 net.go:698] Add success.
I0320 02:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:19:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:19:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0320 02:19:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:19:14.456484  543705 disk_worker.go:494] system disk:vda1
I0320 02:19:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:19:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:19:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:19:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:19:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:19:22.109676  543705 disk_info.go:125] begin check local disk info of client
I0320 02:19:22.112098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:19:22.112104  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1b00 0xc0002b1b40]
E0320 02:19:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:19:23.409787  543705 memory.go:184] no items to output this cycle
I0320 02:19:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 02:19:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:19:33.409777  543705 memory.go:184] no items to output this cycle
I0320 02:19:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 02:19:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:19:43.409796  543705 memory.go:191] Add success.
I0320 02:19:43.409797  543705 cpu.go:282] Add success.
I0320 02:19:43.419909  543705 net.go:648] Add success.
I0320 02:19:43.422695  543705 net.go:770] primary dev: ETH0
I0320 02:19:43.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:19:43.422724  543705 net.go:698] Add success.
I0320 02:19:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:19:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:19:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:19:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:19:53.409776  543705 memory.go:184] no items to output this cycle
I0320 02:19:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 02:20:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:20:03.409769  543705 memory.go:184] no items to output this cycle
I0320 02:20:03.409889  543705 cpu.go:275] no items to output this cycle
E0320 02:20:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:20:13.409791  543705 memory.go:191] Add success.
I0320 02:20:13.409792  543705 cpu.go:282] Add success.
W0320 02:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:20:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:20:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:20:13.420084  543705 net.go:648] Add success.
I0320 02:20:13.422790  543705 net.go:770] primary dev: ETH0
I0320 02:20:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:20:13.422817  543705 net.go:698] Add success.
I0320 02:20:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:20:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:20:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 02:20:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:20:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 02:20:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:20:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:20:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:20:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:20:16.472419  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:20:22.113673  543705 disk_info.go:125] begin check local disk info of client
I0320 02:20:22.116176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:20:22.116182  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b080 0xc00007b0c0]
E0320 02:20:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:20:23.409759  543705 memory.go:184] no items to output this cycle
I0320 02:20:23.409788  543705 cpu.go:275] no items to output this cycle
E0320 02:20:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:20:33.409776  543705 cpu.go:275] no items to output this cycle
I0320 02:20:33.409781  543705 memory.go:184] no items to output this cycle
E0320 02:20:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:20:43.409818  543705 memory.go:191] Add success.
I0320 02:20:43.409821  543705 cpu.go:282] Add success.
I0320 02:20:43.420196  543705 net.go:648] Add success.
I0320 02:20:43.423068  543705 net.go:770] primary dev: ETH0
I0320 02:20:43.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:20:43.423094  543705 net.go:698] Add success.
I0320 02:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:20:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:20:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:20:53.409788  543705 memory.go:184] no items to output this cycle
I0320 02:20:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 02:21:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:21:03.409866  543705 memory.go:184] no items to output this cycle
I0320 02:21:03.409949  543705 cpu.go:275] no items to output this cycle
E0320 02:21:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:21:13.409798  543705 memory.go:191] Add success.
I0320 02:21:13.409802  543705 cpu.go:282] Add success.
W0320 02:21:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:21:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:21:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:21:13.420288  543705 net.go:648] Add success.
I0320 02:21:13.423073  543705 net.go:770] primary dev: ETH0
I0320 02:21:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:21:13.423098  543705 net.go:698] Add success.
I0320 02:21:13.700343  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bc830f46-6160-492f-ac3a-ef8def47e51d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:21:13.700381  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:21:14.453978  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:21:14.454236  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:21:14.454247  543705 disk_worker.go:708] disk space is not compliant
W0320 02:21:14.454250  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:21:14.455797  543705 disk_worker.go:494] system disk:vda1
I0320 02:21:14.455829  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:21:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:21:16.457579  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:21:16.457676  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:21:16.457719  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:21:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:21:22.117675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:21:22.120191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:21:22.120198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466cc0 0xc000466d00]
E0320 02:21:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:21:23.409792  543705 memory.go:184] no items to output this cycle
I0320 02:21:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:21:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:21:33.409785  543705 memory.go:184] no items to output this cycle
I0320 02:21:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 02:21:38.048952  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:21:38.048959  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:21:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:21:43.410687  543705 memory.go:191] Add success.
I0320 02:21:43.409837  543705 cpu.go:282] Add success.
I0320 02:21:43.420392  543705 net.go:648] Add success.
I0320 02:21:43.423308  543705 net.go:770] primary dev: ETH0
I0320 02:21:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:21:43.423332  543705 net.go:698] Add success.
I0320 02:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:21:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:21:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:21:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:21:53.409782  543705 memory.go:184] no items to output this cycle
I0320 02:21:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 02:22:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:22:03.409779  543705 memory.go:184] no items to output this cycle
I0320 02:22:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:22:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:22:13.409809  543705 memory.go:191] Add success.
I0320 02:22:13.409817  543705 cpu.go:282] Add success.
W0320 02:22:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:22:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:22:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:22:13.420235  543705 net.go:648] Add success.
I0320 02:22:13.422883  543705 net.go:770] primary dev: ETH0
I0320 02:22:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:22:13.422917  543705 net.go:698] Add success.
W0320 02:22:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:22:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 02:22:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:22:14.456885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:22:14.456894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:22:14.456900  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:22:14.456989  543705 disk_worker.go:494] system disk:vda1
I0320 02:22:14.457031  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:22:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:22:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 02:22:16.457984  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:22:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:22:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:22:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:22:16.472457  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:22:22.121674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:22:22.124133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:22:22.124139  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376080 0xc0003760c0]
E0320 02:22:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:22:23.409787  543705 memory.go:184] no items to output this cycle
I0320 02:22:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 02:22:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:22:33.409773  543705 memory.go:184] no items to output this cycle
I0320 02:22:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 02:22:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:22:43.409795  543705 memory.go:191] Add success.
I0320 02:22:43.409796  543705 cpu.go:282] Add success.
I0320 02:22:43.419865  543705 net.go:648] Add success.
I0320 02:22:43.422534  543705 net.go:770] primary dev: ETH0
I0320 02:22:43.422546  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:22:43.422560  543705 net.go:698] Add success.
I0320 02:22:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:22:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:22:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:22:53.410265  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:22:53.410285  543705 memory.go:184] no items to output this cycle
I0320 02:22:53.410295  543705 cpu.go:275] no items to output this cycle
E0320 02:23:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:23:03.409901  543705 memory.go:184] no items to output this cycle
I0320 02:23:03.409945  543705 cpu.go:275] no items to output this cycle
E0320 02:23:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:23:13.409809  543705 memory.go:191] Add success.
I0320 02:23:13.409811  543705 cpu.go:282] Add success.
W0320 02:23:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:23:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:23:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:23:13.420060  543705 net.go:770] primary dev: ETH0
I0320 02:23:13.420074  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:23:13.420086  543705 net.go:698] Add success.
I0320 02:23:13.420326  543705 net.go:648] Add success.
I0320 02:23:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:23:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:23:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 02:23:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:23:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 02:23:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:23:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:23:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:23:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:23:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:23:16.472454  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:23:22.125674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:23:22.128083  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:23:22.128088  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e900 0xc00039e940]
I0320 02:23:23.409778  543705 cpu.go:275] no items to output this cycle
E0320 02:23:23.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:23:23.409864  543705 memory.go:184] no items to output this cycle
E0320 02:23:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:23:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 02:23:33.409786  543705 memory.go:184] no items to output this cycle
E0320 02:23:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:23:43.409820  543705 memory.go:191] Add success.
I0320 02:23:43.409824  543705 cpu.go:282] Add success.
I0320 02:23:43.419878  543705 net.go:648] Add success.
I0320 02:23:43.422731  543705 net.go:770] primary dev: ETH0
I0320 02:23:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:23:43.422757  543705 net.go:698] Add success.
I0320 02:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:23:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:23:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:23:53.409782  543705 memory.go:184] no items to output this cycle
I0320 02:23:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 02:24:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:24:03.409786  543705 memory.go:184] no items to output this cycle
I0320 02:24:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 02:24:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:24:13.409789  543705 memory.go:191] Add success.
I0320 02:24:13.409806  543705 cpu.go:282] Add success.
W0320 02:24:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:24:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:24:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:24:13.420139  543705 net.go:648] Add success.
I0320 02:24:13.422805  543705 net.go:770] primary dev: ETH0
I0320 02:24:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:24:13.422832  543705 net.go:698] Add success.
I0320 02:24:13.468854  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7801f2d0-052f-481d-9678-7087b3e8e79f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:24:13.468888  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:24:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:24:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:24:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 02:24:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:24:14.456703  543705 disk_worker.go:494] system disk:vda1
I0320 02:24:14.456745  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:24:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:24:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:24:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:24:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:24:16.472482  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:24:22.129673  543705 disk_info.go:125] begin check local disk info of client
I0320 02:24:22.132194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:24:22.132201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa680 0xc0001aa6c0]
E0320 02:24:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:24:23.409769  543705 memory.go:184] no items to output this cycle
I0320 02:24:23.409776  543705 cpu.go:275] no items to output this cycle
E0320 02:24:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:24:33.409769  543705 memory.go:184] no items to output this cycle
I0320 02:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 02:24:38.049741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:24:38.049748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:24:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:24:43.410693  543705 memory.go:191] Add success.
I0320 02:24:43.409828  543705 cpu.go:282] Add success.
I0320 02:24:43.420466  543705 net.go:648] Add success.
I0320 02:24:43.423247  543705 net.go:770] primary dev: ETH0
I0320 02:24:43.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:24:43.423277  543705 net.go:698] Add success.
I0320 02:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:24:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:24:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:24:53.409778  543705 memory.go:184] no items to output this cycle
I0320 02:24:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:25:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:25:03.409772  543705 memory.go:184] no items to output this cycle
I0320 02:25:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:25:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:25:13.409831  543705 memory.go:191] Add success.
I0320 02:25:13.409839  543705 cpu.go:282] Add success.
W0320 02:25:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:25:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:25:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:25:13.420301  543705 net.go:648] Add success.
I0320 02:25:13.422822  543705 net.go:770] primary dev: ETH0
I0320 02:25:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:25:13.422856  543705 net.go:698] Add success.
I0320 02:25:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:25:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:25:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 02:25:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:25:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 02:25:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:25:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:25:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:25:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:25:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:25:22.133672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:25:22.136119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:25:22.136124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003771c0 0xc000377200]
E0320 02:25:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:25:23.409764  543705 memory.go:184] no items to output this cycle
I0320 02:25:23.409784  543705 cpu.go:275] no items to output this cycle
E0320 02:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:25:33.409778  543705 cpu.go:275] no items to output this cycle
I0320 02:25:33.409782  543705 memory.go:184] no items to output this cycle
E0320 02:25:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:25:43.409820  543705 memory.go:191] Add success.
I0320 02:25:43.409830  543705 cpu.go:282] Add success.
I0320 02:25:43.419893  543705 net.go:648] Add success.
I0320 02:25:43.422767  543705 net.go:770] primary dev: ETH0
I0320 02:25:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:25:43.422792  543705 net.go:698] Add success.
I0320 02:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:25:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:25:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:25:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:25:53.409808  543705 memory.go:184] no items to output this cycle
I0320 02:25:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 02:26:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:26:03.409796  543705 memory.go:184] no items to output this cycle
I0320 02:26:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 02:26:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:26:13.409791  543705 cpu.go:282] Add success.
I0320 02:26:13.409792  543705 memory.go:191] Add success.
W0320 02:26:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:26:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:26:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:26:13.420329  543705 net.go:648] Add success.
I0320 02:26:13.423054  543705 net.go:770] primary dev: ETH0
I0320 02:26:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:26:13.423079  543705 net.go:698] Add success.
I0320 02:26:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:26:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:26:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 02:26:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:26:14.456520  543705 disk_worker.go:494] system disk:vda1
I0320 02:26:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:26:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:26:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:26:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:26:16.472435  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:26:22.137679  543705 disk_info.go:125] begin check local disk info of client
I0320 02:26:22.140178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:26:22.140184  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582a40 0xc000582a80]
E0320 02:26:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:26:23.409790  543705 memory.go:184] no items to output this cycle
I0320 02:26:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 02:26:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:26:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 02:26:33.409790  543705 memory.go:184] no items to output this cycle
E0320 02:26:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:26:43.409798  543705 memory.go:191] Add success.
I0320 02:26:43.409800  543705 cpu.go:282] Add success.
I0320 02:26:43.419957  543705 net.go:648] Add success.
I0320 02:26:43.422870  543705 net.go:770] primary dev: ETH0
I0320 02:26:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:26:43.422896  543705 net.go:698] Add success.
I0320 02:26:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:26:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:26:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:26:53.410256  543705 memory.go:184] no items to output this cycle
I0320 02:26:53.410294  543705 cpu.go:275] no items to output this cycle
E0320 02:27:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:27:03.409776  543705 memory.go:184] no items to output this cycle
I0320 02:27:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:27:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:27:13.409801  543705 memory.go:191] Add success.
I0320 02:27:13.409802  543705 cpu.go:282] Add success.
W0320 02:27:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:27:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:27:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:27:13.420316  543705 net.go:648] Add success.
I0320 02:27:13.422973  543705 net.go:770] primary dev: ETH0
I0320 02:27:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:27:13.422998  543705 net.go:698] Add success.
I0320 02:27:13.429667  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 02:27:13.452768  543705 event_worker.go:152] Polling the log file for events...
I0320 02:27:13.467956  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"20f824ee-95aa-4f7e-9183-7767be20eea7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:27:13.467989  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 02:27:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:27:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 02:27:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:27:14.455911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:27:14.455920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:27:14.455925  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:27:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 02:27:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:27:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:27:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:27:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:27:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:27:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:27:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:27:16.472329  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:27:22.141673  543705 disk_info.go:125] begin check local disk info of client
I0320 02:27:22.144095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:27:22.144101  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7800 0xc0004a7840]
E0320 02:27:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:27:23.409775  543705 memory.go:184] no items to output this cycle
I0320 02:27:23.409780  543705 cpu.go:275] no items to output this cycle
E0320 02:27:33.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:27:33.409889  543705 memory.go:184] no items to output this cycle
I0320 02:27:33.409901  543705 cpu.go:275] no items to output this cycle
I0320 02:27:38.052948  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:27:38.052956  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:27:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:27:43.410639  543705 memory.go:191] Add success.
I0320 02:27:43.409821  543705 cpu.go:282] Add success.
I0320 02:27:43.420349  543705 net.go:648] Add success.
I0320 02:27:43.423004  543705 net.go:770] primary dev: ETH0
I0320 02:27:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:27:43.423029  543705 net.go:698] Add success.
I0320 02:27:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:27:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:27:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:27:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:27:53.409812  543705 memory.go:184] no items to output this cycle
I0320 02:27:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 02:28:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:28:03.409789  543705 memory.go:184] no items to output this cycle
I0320 02:28:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:28:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:28:13.409785  543705 memory.go:191] Add success.
I0320 02:28:13.409803  543705 cpu.go:282] Add success.
W0320 02:28:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:28:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:28:13.420370  543705 net.go:648] Add success.
I0320 02:28:13.423190  543705 net.go:770] primary dev: ETH0
I0320 02:28:13.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:28:13.423214  543705 net.go:698] Add success.
I0320 02:28:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:28:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:28:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 02:28:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:28:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 02:28:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:28:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:28:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:28:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:28:16.472438  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:28:22.145678  543705 disk_info.go:125] begin check local disk info of client
I0320 02:28:22.148169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:28:22.148174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6300 0xc0004a6340]
E0320 02:28:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:28:23.409788  543705 memory.go:184] no items to output this cycle
I0320 02:28:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 02:28:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:28:33.409787  543705 memory.go:184] no items to output this cycle
I0320 02:28:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:28:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:28:43.409793  543705 memory.go:191] Add success.
I0320 02:28:43.409794  543705 cpu.go:282] Add success.
I0320 02:28:43.420124  543705 net.go:648] Add success.
I0320 02:28:43.422656  543705 net.go:770] primary dev: ETH0
I0320 02:28:43.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:28:43.422686  543705 net.go:698] Add success.
I0320 02:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:28:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:28:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:28:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:28:53.409786  543705 memory.go:184] no items to output this cycle
I0320 02:28:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 02:29:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:29:03.409783  543705 memory.go:184] no items to output this cycle
I0320 02:29:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:29:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:29:13.409799  543705 memory.go:191] Add success.
I0320 02:29:13.409799  543705 cpu.go:282] Add success.
W0320 02:29:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:29:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:29:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:29:13.420432  543705 net.go:648] Add success.
I0320 02:29:13.423044  543705 net.go:770] primary dev: ETH0
I0320 02:29:13.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:29:13.423067  543705 net.go:698] Add success.
I0320 02:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:29:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:29:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 02:29:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:29:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 02:29:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:29:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:29:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:29:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:29:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:29:16.472526  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:29:22.149675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:29:22.152274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:29:22.152282  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035fbc0 0xc00035fc00]
E0320 02:29:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:29:23.409758  543705 memory.go:184] no items to output this cycle
I0320 02:29:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 02:29:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:29:33.409803  543705 memory.go:184] no items to output this cycle
I0320 02:29:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 02:29:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:29:43.409798  543705 memory.go:191] Add success.
I0320 02:29:43.409801  543705 cpu.go:282] Add success.
I0320 02:29:43.419914  543705 net.go:648] Add success.
I0320 02:29:43.422437  543705 net.go:770] primary dev: ETH0
I0320 02:29:43.422450  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:29:43.422461  543705 net.go:698] Add success.
I0320 02:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:29:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:29:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:29:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:29:53.409779  543705 memory.go:184] no items to output this cycle
I0320 02:29:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 02:30:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:30:03.409766  543705 memory.go:184] no items to output this cycle
I0320 02:30:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 02:30:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:30:13.409792  543705 memory.go:191] Add success.
I0320 02:30:13.409809  543705 cpu.go:282] Add success.
W0320 02:30:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:30:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:30:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:30:13.420117  543705 net.go:648] Add success.
I0320 02:30:13.422836  543705 net.go:770] primary dev: ETH0
I0320 02:30:13.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:30:13.422870  543705 net.go:698] Add success.
I0320 02:30:13.463968  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af6ac447-2896-4dc6-a8e7-8b62adb60251","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:30:13.464001  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:30:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:30:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:30:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 02:30:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:30:14.456620  543705 disk_worker.go:494] system disk:vda1
I0320 02:30:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:30:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:30:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:30:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:30:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:30:22.153674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:30:22.156184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:30:22.156190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352440 0xc000352480]
E0320 02:30:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:30:23.409791  543705 memory.go:184] no items to output this cycle
I0320 02:30:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 02:30:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:30:33.409812  543705 memory.go:184] no items to output this cycle
I0320 02:30:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 02:30:38.053731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:30:38.053738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:30:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:30:43.410695  543705 memory.go:191] Add success.
I0320 02:30:43.409783  543705 cpu.go:282] Add success.
I0320 02:30:43.420398  543705 net.go:648] Add success.
I0320 02:30:43.423085  543705 net.go:770] primary dev: ETH0
I0320 02:30:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:30:43.423112  543705 net.go:698] Add success.
I0320 02:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:30:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:30:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:30:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:30:53.409782  543705 memory.go:184] no items to output this cycle
I0320 02:30:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 02:31:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:31:03.409773  543705 memory.go:184] no items to output this cycle
I0320 02:31:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 02:31:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:31:13.409821  543705 memory.go:191] Add success.
I0320 02:31:13.409826  543705 cpu.go:282] Add success.
W0320 02:31:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:31:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:31:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:31:13.420186  543705 net.go:648] Add success.
I0320 02:31:13.422993  543705 net.go:770] primary dev: ETH0
I0320 02:31:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:31:13.423028  543705 net.go:698] Add success.
I0320 02:31:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:31:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:31:14.455412  543705 disk_worker.go:708] disk space is not compliant
W0320 02:31:14.455419  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:31:14.456997  543705 disk_worker.go:494] system disk:vda1
I0320 02:31:14.457038  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:31:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:31:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:31:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:31:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:31:22.157674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:31:22.160092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:31:22.160097  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be240 0xc0002be280]
E0320 02:31:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:31:23.409784  543705 memory.go:184] no items to output this cycle
I0320 02:31:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:31:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:31:33.409801  543705 memory.go:184] no items to output this cycle
I0320 02:31:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 02:31:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:31:43.409791  543705 memory.go:191] Add success.
I0320 02:31:43.409796  543705 cpu.go:282] Add success.
I0320 02:31:43.419888  543705 net.go:648] Add success.
I0320 02:31:43.422575  543705 net.go:770] primary dev: ETH0
I0320 02:31:43.422590  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:31:43.422604  543705 net.go:698] Add success.
I0320 02:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:31:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:31:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:31:53.409789  543705 cpu.go:275] no items to output this cycle
I0320 02:31:53.409804  543705 memory.go:184] no items to output this cycle
E0320 02:32:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:32:03.409803  543705 memory.go:184] no items to output this cycle
I0320 02:32:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 02:32:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:32:13.409791  543705 memory.go:191] Add success.
I0320 02:32:13.409791  543705 cpu.go:282] Add success.
W0320 02:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:32:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:32:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:32:13.420206  543705 net.go:648] Add success.
I0320 02:32:13.422994  543705 net.go:770] primary dev: ETH0
I0320 02:32:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:32:13.423022  543705 net.go:698] Add success.
W0320 02:32:14.455392  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:32:14.455458  543705 disk_worker.go:708] disk space is not compliant
W0320 02:32:14.455461  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:32:14.456000  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:32:14.456007  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:32:14.456012  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:32:14.457873  543705 disk_worker.go:494] system disk:vda1
I0320 02:32:14.457913  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:32:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:32:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:32:16.458028  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:32:16.458036  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:32:16.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:32:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:32:16.472456  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:32:22.161671  543705 disk_info.go:125] begin check local disk info of client
I0320 02:32:22.164202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:32:22.164207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002617c0 0xc000261800]
E0320 02:32:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:32:23.409775  543705 memory.go:184] no items to output this cycle
I0320 02:32:23.409782  543705 cpu.go:275] no items to output this cycle
E0320 02:32:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:32:33.409803  543705 memory.go:184] no items to output this cycle
I0320 02:32:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 02:32:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:32:43.409784  543705 memory.go:191] Add success.
I0320 02:32:43.409787  543705 cpu.go:282] Add success.
I0320 02:32:43.419919  543705 net.go:648] Add success.
I0320 02:32:43.422725  543705 net.go:770] primary dev: ETH0
I0320 02:32:43.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:32:43.422753  543705 net.go:698] Add success.
I0320 02:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:32:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:32:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:32:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:32:53.409784  543705 memory.go:184] no items to output this cycle
I0320 02:32:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 02:33:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:33:03.409780  543705 memory.go:184] no items to output this cycle
I0320 02:33:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 02:33:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:33:13.409820  543705 memory.go:191] Add success.
I0320 02:33:13.409828  543705 cpu.go:282] Add success.
W0320 02:33:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:33:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:33:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:33:13.420133  543705 net.go:648] Add success.
I0320 02:33:13.422970  543705 net.go:770] primary dev: ETH0
I0320 02:33:13.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:33:13.422996  543705 net.go:698] Add success.
I0320 02:33:13.469070  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9b9e9d9f-48b8-456e-ba4f-e17d45717e7f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:33:13.469105  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:33:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:33:14.455385  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:33:14.455396  543705 disk_worker.go:708] disk space is not compliant
W0320 02:33:14.455403  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:33:14.457583  543705 disk_worker.go:494] system disk:vda1
I0320 02:33:14.457625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:33:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:33:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:33:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:33:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:33:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:33:22.165672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:33:22.168163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:33:22.168169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000518280 0xc0005182c0]
E0320 02:33:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:33:23.409761  543705 memory.go:184] no items to output this cycle
I0320 02:33:23.409783  543705 cpu.go:275] no items to output this cycle
E0320 02:33:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:33:33.409782  543705 memory.go:184] no items to output this cycle
I0320 02:33:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 02:33:38.056969  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:33:38.056975  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:33:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:33:43.410673  543705 memory.go:191] Add success.
I0320 02:33:43.409801  543705 cpu.go:282] Add success.
I0320 02:33:43.420418  543705 net.go:648] Add success.
I0320 02:33:43.423287  543705 net.go:770] primary dev: ETH0
I0320 02:33:43.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:33:43.423312  543705 net.go:698] Add success.
I0320 02:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:33:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:33:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:33:53.409789  543705 memory.go:184] no items to output this cycle
I0320 02:33:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 02:34:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:34:03.409780  543705 memory.go:184] no items to output this cycle
I0320 02:34:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:34:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:34:13.409794  543705 cpu.go:282] Add success.
I0320 02:34:13.409795  543705 memory.go:191] Add success.
W0320 02:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:34:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:34:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:34:13.420062  543705 net.go:648] Add success.
I0320 02:34:13.422707  543705 net.go:770] primary dev: ETH0
I0320 02:34:13.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:34:13.422740  543705 net.go:698] Add success.
I0320 02:34:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:34:14.455304  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:34:14.455372  543705 disk_worker.go:708] disk space is not compliant
W0320 02:34:14.455376  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:34:14.457553  543705 disk_worker.go:494] system disk:vda1
I0320 02:34:14.457597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:34:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:34:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:34:16.472415  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:34:22.169676  543705 disk_info.go:125] begin check local disk info of client
I0320 02:34:22.172194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:34:22.172200  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466100 0xc000466140]
E0320 02:34:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:34:23.409766  543705 memory.go:184] no items to output this cycle
I0320 02:34:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:34:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:34:33.409806  543705 memory.go:184] no items to output this cycle
I0320 02:34:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 02:34:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:34:43.409779  543705 memory.go:191] Add success.
I0320 02:34:43.409800  543705 cpu.go:282] Add success.
I0320 02:34:43.419879  543705 net.go:648] Add success.
I0320 02:34:43.422625  543705 net.go:770] primary dev: ETH0
I0320 02:34:43.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:34:43.422656  543705 net.go:698] Add success.
I0320 02:34:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:34:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:34:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:34:53.410364  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:34:53.410385  543705 memory.go:184] no items to output this cycle
I0320 02:34:53.410398  543705 cpu.go:275] no items to output this cycle
E0320 02:35:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:35:03.409767  543705 memory.go:184] no items to output this cycle
I0320 02:35:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 02:35:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:35:13.409814  543705 memory.go:191] Add success.
I0320 02:35:13.409821  543705 cpu.go:282] Add success.
W0320 02:35:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:35:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:35:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:35:13.420145  543705 net.go:648] Add success.
I0320 02:35:13.423191  543705 net.go:770] primary dev: ETH0
I0320 02:35:13.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:35:13.423220  543705 net.go:698] Add success.
I0320 02:35:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:35:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:35:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 02:35:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:35:14.457573  543705 disk_worker.go:494] system disk:vda1
I0320 02:35:14.457627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:35:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:35:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:35:22.173674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:35:22.176103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:35:22.176109  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a580 0xc00039a5c0]
E0320 02:35:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:35:23.409783  543705 memory.go:184] no items to output this cycle
I0320 02:35:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:35:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:35:33.409768  543705 memory.go:184] no items to output this cycle
I0320 02:35:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:35:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:35:43.409815  543705 memory.go:191] Add success.
I0320 02:35:43.409820  543705 cpu.go:282] Add success.
I0320 02:35:43.419884  543705 net.go:648] Add success.
I0320 02:35:43.422524  543705 net.go:770] primary dev: ETH0
I0320 02:35:43.422537  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:35:43.422548  543705 net.go:698] Add success.
I0320 02:35:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:35:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:35:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:35:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:35:53.409779  543705 memory.go:184] no items to output this cycle
I0320 02:35:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 02:36:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:36:03.409777  543705 memory.go:184] no items to output this cycle
I0320 02:36:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 02:36:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:36:13.409785  543705 cpu.go:282] Add success.
I0320 02:36:13.409787  543705 memory.go:191] Add success.
W0320 02:36:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:36:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:36:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:36:13.420276  543705 net.go:648] Add success.
I0320 02:36:13.422918  543705 net.go:770] primary dev: ETH0
I0320 02:36:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:36:13.422949  543705 net.go:698] Add success.
I0320 02:36:13.466321  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"407a10ad-52c9-448f-a955-5c393a92f08a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:36:13.466354  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:36:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:36:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:36:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 02:36:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:36:14.457242  543705 disk_worker.go:494] system disk:vda1
I0320 02:36:14.457279  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:36:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:36:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:36:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:36:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:36:16.472434  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:36:22.177677  543705 disk_info.go:125] begin check local disk info of client
I0320 02:36:22.180212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:36:22.180218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390300 0xc000390340]
E0320 02:36:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:36:23.409785  543705 memory.go:184] no items to output this cycle
I0320 02:36:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:36:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:36:33.409803  543705 memory.go:184] no items to output this cycle
I0320 02:36:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 02:36:38.057744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:36:38.057750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:36:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:36:43.410690  543705 memory.go:191] Add success.
I0320 02:36:43.409799  543705 cpu.go:282] Add success.
I0320 02:36:43.420435  543705 net.go:648] Add success.
I0320 02:36:43.423159  543705 net.go:770] primary dev: ETH0
I0320 02:36:43.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:36:43.423184  543705 net.go:698] Add success.
I0320 02:36:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:36:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:36:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:36:53.409807  543705 memory.go:184] no items to output this cycle
I0320 02:36:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 02:37:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:37:03.409772  543705 memory.go:184] no items to output this cycle
I0320 02:37:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 02:37:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:37:13.409803  543705 memory.go:191] Add success.
I0320 02:37:13.409821  543705 cpu.go:282] Add success.
W0320 02:37:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:37:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:37:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:37:13.420147  543705 net.go:648] Add success.
I0320 02:37:13.423219  543705 net.go:770] primary dev: ETH0
I0320 02:37:13.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:37:13.423251  543705 net.go:698] Add success.
I0320 02:37:13.452781  543705 event_worker.go:152] Polling the log file for events...
W0320 02:37:14.454316  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:37:14.454331  543705 disk_worker.go:708] disk space is not compliant
W0320 02:37:14.454335  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:37:14.454971  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:37:14.454981  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:37:14.454989  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:37:14.456098  543705 disk_worker.go:494] system disk:vda1
I0320 02:37:14.456127  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:37:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:37:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:37:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:37:16.457984  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:37:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:37:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:37:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:37:22.181677  543705 disk_info.go:125] begin check local disk info of client
I0320 02:37:22.184077  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:37:22.184083  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c41c0 0xc0000c4200]
E0320 02:37:23.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:37:23.410270  543705 memory.go:184] no items to output this cycle
I0320 02:37:23.410285  543705 cpu.go:275] no items to output this cycle
E0320 02:37:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:37:33.409783  543705 memory.go:184] no items to output this cycle
I0320 02:37:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 02:37:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:37:43.409820  543705 memory.go:191] Add success.
I0320 02:37:43.409824  543705 cpu.go:282] Add success.
I0320 02:37:43.419943  543705 net.go:648] Add success.
I0320 02:37:43.422911  543705 net.go:770] primary dev: ETH0
I0320 02:37:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:37:43.422936  543705 net.go:698] Add success.
I0320 02:37:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:37:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:37:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:37:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:37:53.409795  543705 memory.go:184] no items to output this cycle
I0320 02:37:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:38:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:38:03.409780  543705 memory.go:184] no items to output this cycle
I0320 02:38:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 02:38:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:38:13.409800  543705 memory.go:191] Add success.
I0320 02:38:13.409801  543705 cpu.go:282] Add success.
W0320 02:38:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:38:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:38:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:38:13.420118  543705 net.go:648] Add success.
I0320 02:38:13.422875  543705 net.go:770] primary dev: ETH0
I0320 02:38:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:38:13.422901  543705 net.go:698] Add success.
I0320 02:38:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:38:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:38:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 02:38:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:38:14.458958  543705 disk_worker.go:494] system disk:vda1
I0320 02:38:14.458986  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:38:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:38:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:38:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:38:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:38:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:38:22.185675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:38:22.188156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:38:22.188162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e3c0 0xc00039e400]
E0320 02:38:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:38:23.409779  543705 memory.go:184] no items to output this cycle
I0320 02:38:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 02:38:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:38:33.409787  543705 memory.go:184] no items to output this cycle
I0320 02:38:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 02:38:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:38:43.409800  543705 memory.go:191] Add success.
I0320 02:38:43.409805  543705 cpu.go:282] Add success.
I0320 02:38:43.419954  543705 net.go:648] Add success.
I0320 02:38:43.422932  543705 net.go:770] primary dev: ETH0
I0320 02:38:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:38:43.422965  543705 net.go:698] Add success.
I0320 02:38:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:38:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:38:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:38:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:38:53.409816  543705 memory.go:184] no items to output this cycle
I0320 02:38:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 02:39:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:39:03.409792  543705 memory.go:184] no items to output this cycle
I0320 02:39:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 02:39:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:39:13.409838  543705 memory.go:191] Add success.
I0320 02:39:13.409842  543705 cpu.go:282] Add success.
W0320 02:39:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:39:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:39:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:39:13.420171  543705 net.go:648] Add success.
I0320 02:39:13.423002  543705 net.go:770] primary dev: ETH0
I0320 02:39:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:39:13.423029  543705 net.go:698] Add success.
I0320 02:39:13.497584  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"602a7ab7-8bfc-43df-a125-1dd75f5a6f09","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:39:13.497616  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:39:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:39:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:39:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 02:39:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:39:14.456854  543705 disk_worker.go:494] system disk:vda1
I0320 02:39:14.456885  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:39:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:39:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:39:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:39:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:39:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:39:22.189679  543705 disk_info.go:125] begin check local disk info of client
I0320 02:39:22.192080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:39:22.192087  543705 disk_info.go:196] parse disk info done, disk is : [0xc000562300 0xc000562340]
E0320 02:39:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:39:23.409767  543705 memory.go:184] no items to output this cycle
I0320 02:39:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:39:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:39:33.409787  543705 memory.go:184] no items to output this cycle
I0320 02:39:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 02:39:38.061004  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:39:38.061011  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:39:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:39:43.410717  543705 memory.go:191] Add success.
I0320 02:39:43.409809  543705 cpu.go:282] Add success.
I0320 02:39:43.420445  543705 net.go:648] Add success.
I0320 02:39:43.423307  543705 net.go:770] primary dev: ETH0
I0320 02:39:43.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:39:43.423337  543705 net.go:698] Add success.
I0320 02:39:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:39:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:39:53.409801  543705 memory.go:184] no items to output this cycle
I0320 02:39:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:40:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:40:03.409776  543705 memory.go:184] no items to output this cycle
I0320 02:40:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:40:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:40:13.409809  543705 memory.go:191] Add success.
I0320 02:40:13.409810  543705 cpu.go:282] Add success.
W0320 02:40:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:40:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:40:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:40:13.420308  543705 net.go:648] Add success.
I0320 02:40:13.422991  543705 net.go:770] primary dev: ETH0
I0320 02:40:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:40:13.423015  543705 net.go:698] Add success.
I0320 02:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:40:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:40:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 02:40:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:40:14.456499  543705 disk_worker.go:494] system disk:vda1
I0320 02:40:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:40:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:40:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:40:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:40:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:40:16.472454  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:40:22.193679  543705 disk_info.go:125] begin check local disk info of client
I0320 02:40:22.196126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:40:22.196133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2940 0xc0003f2980]
E0320 02:40:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:40:23.409767  543705 memory.go:184] no items to output this cycle
I0320 02:40:23.409811  543705 cpu.go:275] no items to output this cycle
E0320 02:40:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:40:33.409799  543705 memory.go:184] no items to output this cycle
I0320 02:40:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 02:40:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:40:43.409781  543705 memory.go:191] Add success.
I0320 02:40:43.409799  543705 cpu.go:282] Add success.
I0320 02:40:43.419993  543705 net.go:648] Add success.
I0320 02:40:43.423040  543705 net.go:770] primary dev: ETH0
I0320 02:40:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:40:43.423071  543705 net.go:698] Add success.
I0320 02:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:40:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:40:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:40:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:40:53.409778  543705 memory.go:184] no items to output this cycle
I0320 02:40:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 02:41:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:41:03.409804  543705 memory.go:184] no items to output this cycle
I0320 02:41:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 02:41:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:41:13.409786  543705 memory.go:191] Add success.
W0320 02:41:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 02:41:13.409818  543705 cpu.go:282] Add success.
W0320 02:41:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:41:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:41:13.420189  543705 net.go:648] Add success.
I0320 02:41:13.422830  543705 net.go:770] primary dev: ETH0
I0320 02:41:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:41:13.422868  543705 net.go:698] Add success.
I0320 02:41:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:41:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:41:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 02:41:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:41:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 02:41:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:41:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:41:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:41:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:41:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:41:22.197672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:41:22.200136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:41:22.200143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba7c0 0xc0002ba800]
E0320 02:41:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:41:23.409796  543705 memory.go:184] no items to output this cycle
I0320 02:41:23.409808  543705 cpu.go:275] no items to output this cycle
E0320 02:41:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:41:33.409769  543705 memory.go:184] no items to output this cycle
I0320 02:41:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 02:41:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:41:43.409793  543705 memory.go:191] Add success.
I0320 02:41:43.409793  543705 cpu.go:282] Add success.
I0320 02:41:43.419902  543705 net.go:648] Add success.
I0320 02:41:43.422417  543705 net.go:770] primary dev: ETH0
I0320 02:41:43.422436  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:41:43.422452  543705 net.go:698] Add success.
I0320 02:41:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:41:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:41:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:41:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:41:53.409787  543705 memory.go:184] no items to output this cycle
I0320 02:41:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 02:42:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:42:03.409766  543705 memory.go:184] no items to output this cycle
I0320 02:42:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 02:42:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:42:13.409786  543705 memory.go:191] Add success.
I0320 02:42:13.409789  543705 cpu.go:282] Add success.
W0320 02:42:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:42:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:42:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:42:13.420331  543705 net.go:648] Add success.
I0320 02:42:13.422817  543705 net.go:770] primary dev: ETH0
I0320 02:42:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:42:13.422845  543705 net.go:698] Add success.
I0320 02:42:13.468775  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e06dfcd9-4903-4054-9919-143f384cdf59","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:42:13.468808  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 02:42:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:42:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 02:42:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:42:14.456103  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:42:14.456112  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:42:14.456118  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:42:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 02:42:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:42:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:42:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 02:42:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:42:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:42:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:42:16.472123  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:42:22.201681  543705 disk_info.go:125] begin check local disk info of client
I0320 02:42:22.204240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:42:22.204248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec640 0xc0000ec680]
E0320 02:42:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:42:23.409766  543705 memory.go:184] no items to output this cycle
I0320 02:42:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:42:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:42:33.409788  543705 memory.go:184] no items to output this cycle
I0320 02:42:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 02:42:38.061730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:42:38.061736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:42:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:42:43.410655  543705 memory.go:191] Add success.
I0320 02:42:43.409813  543705 cpu.go:282] Add success.
I0320 02:42:43.420447  543705 net.go:648] Add success.
I0320 02:42:43.423084  543705 net.go:770] primary dev: ETH0
I0320 02:42:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:42:43.423113  543705 net.go:698] Add success.
I0320 02:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:42:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:42:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:42:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:42:53.409790  543705 cpu.go:275] no items to output this cycle
I0320 02:42:53.409793  543705 memory.go:184] no items to output this cycle
E0320 02:43:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:43:03.409797  543705 memory.go:184] no items to output this cycle
I0320 02:43:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 02:43:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:43:13.409802  543705 memory.go:191] Add success.
I0320 02:43:13.409803  543705 cpu.go:282] Add success.
W0320 02:43:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:43:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:43:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:43:13.420189  543705 net.go:648] Add success.
I0320 02:43:13.422776  543705 net.go:770] primary dev: ETH0
I0320 02:43:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:43:13.422802  543705 net.go:698] Add success.
I0320 02:43:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:43:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:43:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 02:43:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:43:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 02:43:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:43:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:43:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:43:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:43:16.472419  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:43:22.205672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:43:22.208144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:43:22.208151  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f600 0xc00029f640]
E0320 02:43:23.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:43:23.409894  543705 memory.go:184] no items to output this cycle
I0320 02:43:23.409971  543705 cpu.go:275] no items to output this cycle
E0320 02:43:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:43:33.409798  543705 memory.go:184] no items to output this cycle
I0320 02:43:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 02:43:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:43:43.409782  543705 memory.go:191] Add success.
I0320 02:43:43.409802  543705 cpu.go:282] Add success.
I0320 02:43:43.419944  543705 net.go:648] Add success.
I0320 02:43:43.422707  543705 net.go:770] primary dev: ETH0
I0320 02:43:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:43:43.422732  543705 net.go:698] Add success.
I0320 02:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:43:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:43:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:43:53.409777  543705 memory.go:184] no items to output this cycle
I0320 02:43:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:44:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:44:03.409798  543705 memory.go:184] no items to output this cycle
I0320 02:44:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 02:44:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:44:13.409790  543705 memory.go:191] Add success.
I0320 02:44:13.409793  543705 cpu.go:282] Add success.
W0320 02:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:44:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:44:13.420058  543705 net.go:648] Add success.
I0320 02:44:13.422854  543705 net.go:770] primary dev: ETH0
I0320 02:44:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:44:13.422879  543705 net.go:698] Add success.
I0320 02:44:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:44:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:44:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 02:44:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:44:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 02:44:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:44:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:44:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:44:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:44:16.472457  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:44:22.209680  543705 disk_info.go:125] begin check local disk info of client
I0320 02:44:22.212234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:44:22.212240  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0320 02:44:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:44:23.409767  543705 memory.go:184] no items to output this cycle
I0320 02:44:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:44:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:44:33.409776  543705 memory.go:184] no items to output this cycle
I0320 02:44:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:44:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:44:43.409819  543705 memory.go:191] Add success.
I0320 02:44:43.409821  543705 cpu.go:282] Add success.
I0320 02:44:43.419707  543705 net.go:770] primary dev: ETH0
I0320 02:44:43.419721  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:44:43.419735  543705 net.go:698] Add success.
I0320 02:44:43.420097  543705 net.go:648] Add success.
I0320 02:44:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:44:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:44:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:44:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:44:53.409810  543705 memory.go:184] no items to output this cycle
I0320 02:44:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 02:45:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:45:03.409774  543705 memory.go:184] no items to output this cycle
I0320 02:45:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:45:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:45:13.409798  543705 memory.go:191] Add success.
I0320 02:45:13.409801  543705 cpu.go:282] Add success.
W0320 02:45:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:45:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:45:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:45:13.420064  543705 net.go:648] Add success.
I0320 02:45:13.423122  543705 net.go:770] primary dev: ETH0
I0320 02:45:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:45:13.423151  543705 net.go:698] Add success.
I0320 02:45:13.469567  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d605e169-59df-4d9c-90ce-3e64f26063f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:45:13.469599  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:45:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:45:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:45:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 02:45:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:45:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 02:45:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:45:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:45:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:45:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:45:22.213674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:45:22.216189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:45:22.216196  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326fc0 0xc000327000]
E0320 02:45:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:45:23.409762  543705 memory.go:184] no items to output this cycle
I0320 02:45:23.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:45:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:45:33.409795  543705 memory.go:184] no items to output this cycle
I0320 02:45:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 02:45:38.065017  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:45:38.065023  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:45:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:45:43.410605  543705 memory.go:191] Add success.
I0320 02:45:43.409799  543705 cpu.go:282] Add success.
I0320 02:45:43.420318  543705 net.go:648] Add success.
I0320 02:45:43.423042  543705 net.go:770] primary dev: ETH0
I0320 02:45:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:45:43.423067  543705 net.go:698] Add success.
I0320 02:45:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:45:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:45:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:45:53.409788  543705 memory.go:184] no items to output this cycle
I0320 02:45:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 02:46:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:46:03.409773  543705 memory.go:184] no items to output this cycle
I0320 02:46:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:46:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:46:13.409784  543705 memory.go:191] Add success.
I0320 02:46:13.409808  543705 cpu.go:282] Add success.
W0320 02:46:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:46:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:46:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:46:13.420177  543705 net.go:648] Add success.
I0320 02:46:13.422759  543705 net.go:770] primary dev: ETH0
I0320 02:46:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:46:13.422782  543705 net.go:698] Add success.
I0320 02:46:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:46:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:46:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 02:46:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:46:14.459182  543705 disk_worker.go:494] system disk:vda1
I0320 02:46:14.459211  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:46:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:46:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:46:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:46:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:46:16.472431  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:46:22.217672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:46:22.220157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:46:22.220164  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ed80 0xc00039edc0]
E0320 02:46:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:46:23.409767  543705 memory.go:184] no items to output this cycle
I0320 02:46:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 02:46:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:46:33.409797  543705 memory.go:184] no items to output this cycle
I0320 02:46:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 02:46:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:46:43.409784  543705 memory.go:191] Add success.
I0320 02:46:43.409800  543705 cpu.go:282] Add success.
I0320 02:46:43.419914  543705 net.go:648] Add success.
I0320 02:46:43.422652  543705 net.go:770] primary dev: ETH0
I0320 02:46:43.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:46:43.422685  543705 net.go:698] Add success.
I0320 02:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:46:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:46:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:46:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:46:53.409812  543705 memory.go:184] no items to output this cycle
I0320 02:46:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 02:47:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:47:03.409796  543705 memory.go:184] no items to output this cycle
I0320 02:47:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 02:47:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:47:13.409821  543705 memory.go:191] Add success.
I0320 02:47:13.409826  543705 cpu.go:282] Add success.
W0320 02:47:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:47:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:47:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:47:13.420226  543705 net.go:648] Add success.
I0320 02:47:13.422888  543705 net.go:770] primary dev: ETH0
I0320 02:47:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:47:13.422917  543705 net.go:698] Add success.
I0320 02:47:13.453458  543705 event_worker.go:152] Polling the log file for events...
W0320 02:47:14.454674  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:47:14.454689  543705 disk_worker.go:708] disk space is not compliant
W0320 02:47:14.454694  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:47:14.456902  543705 disk_worker.go:494] system disk:vda1
I0320 02:47:14.456956  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:47:14.457733  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:47:14.457754  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:47:14.457760  543705 custom_config.go:64] query custom config with name: gpu
E0320 02:47:15.456888  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:47:15.456898  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:47:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:47:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:47:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:47:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:47:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:47:22.221692  543705 disk_info.go:125] begin check local disk info of client
I0320 02:47:22.224113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:47:22.224119  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a7c0 0xc00047a800]
E0320 02:47:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:47:23.409792  543705 memory.go:184] no items to output this cycle
I0320 02:47:23.409811  543705 cpu.go:275] no items to output this cycle
E0320 02:47:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:47:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 02:47:33.409794  543705 memory.go:184] no items to output this cycle
E0320 02:47:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:47:43.409808  543705 memory.go:191] Add success.
I0320 02:47:43.409823  543705 cpu.go:282] Add success.
I0320 02:47:43.419997  543705 net.go:648] Add success.
I0320 02:47:43.422514  543705 net.go:770] primary dev: ETH0
I0320 02:47:43.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:47:43.422539  543705 net.go:698] Add success.
I0320 02:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:47:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:47:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:47:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:47:53.409776  543705 memory.go:184] no items to output this cycle
I0320 02:47:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:48:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:48:03.409772  543705 memory.go:184] no items to output this cycle
I0320 02:48:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 02:48:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:48:13.409799  543705 cpu.go:282] Add success.
I0320 02:48:13.409822  543705 memory.go:191] Add success.
W0320 02:48:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:48:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:48:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:48:13.420252  543705 net.go:648] Add success.
I0320 02:48:13.421198  543705 net.go:770] primary dev: ETH0
I0320 02:48:13.421211  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:48:13.421224  543705 net.go:698] Add success.
I0320 02:48:13.559394  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5672636e-de0f-4379-8875-ebe0347a47e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:48:13.559435  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:48:14.453976  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:48:14.454174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:48:14.454346  543705 disk_worker.go:708] disk space is not compliant
W0320 02:48:14.454351  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:48:14.456206  543705 disk_worker.go:494] system disk:vda1
I0320 02:48:14.456237  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:48:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:48:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:48:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:48:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:48:22.225681  543705 disk_info.go:125] begin check local disk info of client
I0320 02:48:22.228158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:48:22.228165  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304080 0xc0003040c0]
E0320 02:48:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:48:23.409793  543705 memory.go:184] no items to output this cycle
I0320 02:48:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 02:48:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:48:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 02:48:33.409802  543705 memory.go:184] no items to output this cycle
I0320 02:48:38.065737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:48:38.065743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:48:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:48:43.410599  543705 memory.go:191] Add success.
I0320 02:48:43.409821  543705 cpu.go:282] Add success.
I0320 02:48:43.420313  543705 net.go:648] Add success.
I0320 02:48:43.422901  543705 net.go:770] primary dev: ETH0
I0320 02:48:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:48:43.422927  543705 net.go:698] Add success.
I0320 02:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:48:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:48:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:48:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:48:53.409782  543705 memory.go:184] no items to output this cycle
I0320 02:48:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:49:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:49:03.409788  543705 memory.go:184] no items to output this cycle
I0320 02:49:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:49:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:49:13.409807  543705 memory.go:191] Add success.
I0320 02:49:13.409820  543705 cpu.go:282] Add success.
W0320 02:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:49:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:49:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:49:13.420175  543705 net.go:648] Add success.
I0320 02:49:13.423148  543705 net.go:770] primary dev: ETH0
I0320 02:49:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:49:13.423176  543705 net.go:698] Add success.
I0320 02:49:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:49:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:49:14.455320  543705 disk_worker.go:708] disk space is not compliant
W0320 02:49:14.455325  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:49:14.457036  543705 disk_worker.go:494] system disk:vda1
I0320 02:49:14.457065  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:49:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:49:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:49:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:49:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:49:22.229674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:49:22.232158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:49:22.232165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003973c0 0xc000397400]
E0320 02:49:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:49:23.409778  543705 memory.go:184] no items to output this cycle
I0320 02:49:23.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:49:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:49:33.409783  543705 memory.go:184] no items to output this cycle
I0320 02:49:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:49:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:49:43.409791  543705 memory.go:191] Add success.
I0320 02:49:43.409814  543705 cpu.go:282] Add success.
I0320 02:49:43.419870  543705 net.go:648] Add success.
I0320 02:49:43.422313  543705 net.go:770] primary dev: ETH0
I0320 02:49:43.422327  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:49:43.422339  543705 net.go:698] Add success.
I0320 02:49:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:49:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:49:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:49:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:49:53.410270  543705 memory.go:184] no items to output this cycle
I0320 02:49:53.410271  543705 cpu.go:275] no items to output this cycle
E0320 02:50:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:50:03.409806  543705 memory.go:184] no items to output this cycle
I0320 02:50:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 02:50:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:50:13.409788  543705 memory.go:191] Add success.
W0320 02:50:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 02:50:13.409819  543705 cpu.go:282] Add success.
W0320 02:50:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:50:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:50:13.420134  543705 net.go:648] Add success.
I0320 02:50:13.422894  543705 net.go:770] primary dev: ETH0
I0320 02:50:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:50:13.422924  543705 net.go:698] Add success.
I0320 02:50:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:50:14.455308  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:50:14.455414  543705 disk_worker.go:708] disk space is not compliant
W0320 02:50:14.455419  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:50:14.456993  543705 disk_worker.go:494] system disk:vda1
I0320 02:50:14.457021  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:50:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:50:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:50:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:50:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:50:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:50:22.233678  543705 disk_info.go:125] begin check local disk info of client
I0320 02:50:22.236224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:50:22.236231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384080 0xc0003840c0]
E0320 02:50:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:50:23.409781  543705 memory.go:184] no items to output this cycle
I0320 02:50:23.409790  543705 cpu.go:275] no items to output this cycle
E0320 02:50:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:50:33.409782  543705 memory.go:184] no items to output this cycle
I0320 02:50:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:50:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:50:43.409797  543705 memory.go:191] Add success.
I0320 02:50:43.409807  543705 cpu.go:282] Add success.
I0320 02:50:43.419901  543705 net.go:648] Add success.
I0320 02:50:43.422693  543705 net.go:770] primary dev: ETH0
I0320 02:50:43.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:50:43.422723  543705 net.go:698] Add success.
I0320 02:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:50:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:50:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:50:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:50:53.409786  543705 memory.go:184] no items to output this cycle
I0320 02:50:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 02:51:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:51:03.409787  543705 memory.go:184] no items to output this cycle
I0320 02:51:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:51:13.409809  543705 memory.go:191] Add success.
I0320 02:51:13.409809  543705 cpu.go:282] Add success.
W0320 02:51:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:51:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:51:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:51:13.420154  543705 net.go:648] Add success.
I0320 02:51:13.423098  543705 net.go:770] primary dev: ETH0
I0320 02:51:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:51:13.423124  543705 net.go:698] Add success.
I0320 02:51:13.469354  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1576fa2b-66e7-4cc0-8c46-cbb674bd3cb1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:51:13.469387  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:51:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:51:14.455457  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:51:14.455478  543705 disk_worker.go:708] disk space is not compliant
W0320 02:51:14.455482  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:51:14.457537  543705 disk_worker.go:494] system disk:vda1
I0320 02:51:14.457563  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:51:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:51:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:51:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:51:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:51:22.237677  543705 disk_info.go:125] begin check local disk info of client
I0320 02:51:22.240114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:51:22.240120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f7e40 0xc0004f7e80]
E0320 02:51:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:51:23.409782  543705 memory.go:184] no items to output this cycle
I0320 02:51:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:51:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:51:33.409766  543705 memory.go:184] no items to output this cycle
I0320 02:51:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 02:51:38.069032  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:51:38.069038  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:51:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:51:43.410648  543705 memory.go:191] Add success.
I0320 02:51:43.409834  543705 cpu.go:282] Add success.
I0320 02:51:43.420289  543705 net.go:648] Add success.
I0320 02:51:43.423115  543705 net.go:770] primary dev: ETH0
I0320 02:51:43.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:51:43.423141  543705 net.go:698] Add success.
I0320 02:51:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:51:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:51:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:51:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:51:53.409777  543705 memory.go:184] no items to output this cycle
I0320 02:51:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 02:52:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:52:03.409783  543705 memory.go:184] no items to output this cycle
I0320 02:52:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 02:52:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:52:13.409792  543705 memory.go:191] Add success.
I0320 02:52:13.409796  543705 cpu.go:282] Add success.
W0320 02:52:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:52:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:52:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:52:13.420132  543705 net.go:648] Add success.
I0320 02:52:13.422974  543705 net.go:770] primary dev: ETH0
I0320 02:52:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:52:13.423005  543705 net.go:698] Add success.
W0320 02:52:14.455324  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:52:14.455339  543705 disk_worker.go:708] disk space is not compliant
W0320 02:52:14.455342  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:52:14.456503  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:52:14.456512  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:52:14.456518  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:52:14.457107  543705 disk_worker.go:494] system disk:vda1
I0320 02:52:14.457140  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:52:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:52:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:52:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:52:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:52:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:52:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:52:16.472330  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:52:22.241674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:52:22.244156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:52:22.244163  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053c640 0xc00053c680]
E0320 02:52:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:52:23.409788  543705 memory.go:184] no items to output this cycle
I0320 02:52:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 02:52:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:52:33.409774  543705 memory.go:184] no items to output this cycle
I0320 02:52:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 02:52:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:52:43.409799  543705 memory.go:191] Add success.
I0320 02:52:43.409800  543705 cpu.go:282] Add success.
I0320 02:52:43.419922  543705 net.go:648] Add success.
I0320 02:52:43.422791  543705 net.go:770] primary dev: ETH0
I0320 02:52:43.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:52:43.422821  543705 net.go:698] Add success.
I0320 02:52:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:52:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:52:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:52:53.409775  543705 memory.go:184] no items to output this cycle
I0320 02:52:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 02:53:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:53:03.409800  543705 memory.go:184] no items to output this cycle
I0320 02:53:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 02:53:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:53:13.409809  543705 memory.go:191] Add success.
I0320 02:53:13.409811  543705 cpu.go:282] Add success.
W0320 02:53:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:53:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:53:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:53:13.420150  543705 net.go:648] Add success.
I0320 02:53:13.422811  543705 net.go:770] primary dev: ETH0
I0320 02:53:13.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:53:13.422838  543705 net.go:698] Add success.
I0320 02:53:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:53:14.455356  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:53:14.455369  543705 disk_worker.go:708] disk space is not compliant
W0320 02:53:14.455373  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:53:14.457006  543705 disk_worker.go:494] system disk:vda1
I0320 02:53:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:53:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:53:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:53:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:53:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:53:22.245675  543705 disk_info.go:125] begin check local disk info of client
I0320 02:53:22.248107  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:53:22.248113  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467480 0xc0004674c0]
E0320 02:53:23.410697  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:53:23.410710  543705 memory.go:184] no items to output this cycle
I0320 02:53:23.410751  543705 cpu.go:275] no items to output this cycle
E0320 02:53:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:53:33.409770  543705 memory.go:184] no items to output this cycle
I0320 02:53:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:53:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:53:43.409816  543705 memory.go:191] Add success.
I0320 02:53:43.409824  543705 cpu.go:282] Add success.
I0320 02:53:43.420018  543705 net.go:648] Add success.
I0320 02:53:43.423003  543705 net.go:770] primary dev: ETH0
I0320 02:53:43.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:53:43.423030  543705 net.go:698] Add success.
I0320 02:53:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:53:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:53:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:53:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:53:53.409779  543705 memory.go:184] no items to output this cycle
I0320 02:53:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 02:54:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:54:03.409776  543705 memory.go:184] no items to output this cycle
I0320 02:54:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 02:54:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:54:13.409810  543705 memory.go:191] Add success.
I0320 02:54:13.409809  543705 cpu.go:282] Add success.
W0320 02:54:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:54:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:54:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:54:13.420341  543705 net.go:648] Add success.
I0320 02:54:13.423071  543705 net.go:770] primary dev: ETH0
I0320 02:54:13.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:54:13.423101  543705 net.go:698] Add success.
I0320 02:54:13.464200  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"03aa716d-a898-45f0-94f8-32e8d4c007b4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:54:13.464232  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 02:54:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:54:14.455350  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:54:14.455364  543705 disk_worker.go:708] disk space is not compliant
W0320 02:54:14.455367  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:54:14.457533  543705 disk_worker.go:494] system disk:vda1
I0320 02:54:14.457576  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:54:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:54:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:54:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:54:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:54:22.249674  543705 disk_info.go:125] begin check local disk info of client
I0320 02:54:22.252171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:54:22.252177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000538600 0xc000538640]
E0320 02:54:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:54:23.409756  543705 memory.go:184] no items to output this cycle
I0320 02:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:54:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:54:33.409801  543705 memory.go:184] no items to output this cycle
I0320 02:54:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 02:54:38.069729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:54:38.069735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:54:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:54:43.410600  543705 memory.go:191] Add success.
I0320 02:54:43.409804  543705 cpu.go:282] Add success.
I0320 02:54:43.420362  543705 net.go:648] Add success.
I0320 02:54:43.422984  543705 net.go:770] primary dev: ETH0
I0320 02:54:43.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:54:43.423011  543705 net.go:698] Add success.
I0320 02:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:54:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:54:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:54:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:54:53.409776  543705 memory.go:184] no items to output this cycle
I0320 02:54:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 02:55:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:55:03.409768  543705 memory.go:184] no items to output this cycle
I0320 02:55:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 02:55:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:55:13.409807  543705 memory.go:191] Add success.
I0320 02:55:13.409808  543705 cpu.go:282] Add success.
W0320 02:55:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:55:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:55:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:55:13.420178  543705 net.go:648] Add success.
I0320 02:55:13.422879  543705 net.go:770] primary dev: ETH0
I0320 02:55:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:55:13.422905  543705 net.go:698] Add success.
I0320 02:55:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:55:14.455328  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:55:14.455427  543705 disk_worker.go:708] disk space is not compliant
W0320 02:55:14.455431  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:55:14.457546  543705 disk_worker.go:494] system disk:vda1
I0320 02:55:14.457586  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:55:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:55:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:55:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:55:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:55:22.253679  543705 disk_info.go:125] begin check local disk info of client
I0320 02:55:22.256089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:55:22.256095  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba380 0xc0003ba3c0]
E0320 02:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:55:23.409783  543705 memory.go:184] no items to output this cycle
I0320 02:55:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 02:55:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:55:33.409769  543705 memory.go:184] no items to output this cycle
I0320 02:55:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 02:55:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:55:43.409792  543705 memory.go:191] Add success.
I0320 02:55:43.409793  543705 cpu.go:282] Add success.
I0320 02:55:43.420012  543705 net.go:648] Add success.
I0320 02:55:43.422906  543705 net.go:770] primary dev: ETH0
I0320 02:55:43.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:55:43.422934  543705 net.go:698] Add success.
I0320 02:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:55:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:55:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:55:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:55:53.409808  543705 memory.go:184] no items to output this cycle
I0320 02:55:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 02:56:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:56:03.409776  543705 memory.go:184] no items to output this cycle
I0320 02:56:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 02:56:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:56:13.409799  543705 memory.go:191] Add success.
I0320 02:56:13.409799  543705 cpu.go:282] Add success.
W0320 02:56:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:56:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:56:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:56:13.420170  543705 net.go:648] Add success.
I0320 02:56:13.423033  543705 net.go:770] primary dev: ETH0
I0320 02:56:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:56:13.423058  543705 net.go:698] Add success.
I0320 02:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:56:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:56:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 02:56:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:56:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 02:56:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:56:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:56:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:56:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:56:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:56:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:56:22.257678  543705 disk_info.go:125] begin check local disk info of client
I0320 02:56:22.260145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:56:22.260151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4300 0xc0004b4340]
E0320 02:56:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:56:23.409790  543705 memory.go:184] no items to output this cycle
I0320 02:56:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 02:56:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:56:33.409774  543705 memory.go:184] no items to output this cycle
I0320 02:56:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 02:56:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:56:43.409773  543705 memory.go:191] Add success.
I0320 02:56:43.409807  543705 cpu.go:282] Add success.
I0320 02:56:43.419996  543705 net.go:648] Add success.
I0320 02:56:43.422610  543705 net.go:770] primary dev: ETH0
I0320 02:56:43.422623  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:56:43.422635  543705 net.go:698] Add success.
I0320 02:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:56:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:56:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:56:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:56:53.409821  543705 memory.go:184] no items to output this cycle
I0320 02:56:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 02:57:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:57:03.409785  543705 memory.go:184] no items to output this cycle
I0320 02:57:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 02:57:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:57:13.409811  543705 memory.go:191] Add success.
I0320 02:57:13.409820  543705 cpu.go:282] Add success.
W0320 02:57:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:57:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:57:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:57:13.420284  543705 net.go:648] Add success.
I0320 02:57:13.429768  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 02:57:13.429843  543705 net.go:770] primary dev: ETH0
I0320 02:57:13.429856  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:57:13.429868  543705 net.go:698] Add success.
I0320 02:57:13.453425  543705 event_worker.go:152] Polling the log file for events...
I0320 02:57:13.464157  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5459d80f-1332-48b2-9f8d-d78713da2b5f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 02:57:13.464192  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 02:57:14.455379  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:57:14.455394  543705 disk_worker.go:708] disk space is not compliant
W0320 02:57:14.455399  543705 disk_worker.go:728] disk inode is not compliant
E0320 02:57:14.457724  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 02:57:14.457731  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 02:57:14.457735  543705 custom_config.go:64] query custom config with name: gpu
I0320 02:57:14.457763  543705 disk_worker.go:494] system disk:vda1
I0320 02:57:14.457798  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 02:57:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 02:57:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:57:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 02:57:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 02:57:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:57:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:57:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:57:22.261672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:57:22.264092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:57:22.264098  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344140 0xc000344180]
E0320 02:57:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:57:23.409786  543705 memory.go:184] no items to output this cycle
I0320 02:57:23.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:57:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:57:33.409780  543705 memory.go:184] no items to output this cycle
I0320 02:57:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 02:57:38.073045  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 02:57:38.073052  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 02:57:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:57:43.410760  543705 memory.go:191] Add success.
I0320 02:57:43.409811  543705 cpu.go:282] Add success.
I0320 02:57:43.420489  543705 net.go:648] Add success.
I0320 02:57:43.423305  543705 net.go:770] primary dev: ETH0
I0320 02:57:43.423318  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:57:43.423331  543705 net.go:698] Add success.
I0320 02:57:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:57:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:57:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:57:53.409788  543705 cpu.go:275] no items to output this cycle
I0320 02:57:53.409793  543705 memory.go:184] no items to output this cycle
E0320 02:58:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:58:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 02:58:03.409791  543705 memory.go:184] no items to output this cycle
E0320 02:58:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:58:13.409829  543705 memory.go:191] Add success.
I0320 02:58:13.409831  543705 cpu.go:282] Add success.
W0320 02:58:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:58:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:58:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:58:13.420278  543705 net.go:648] Add success.
I0320 02:58:13.422824  543705 net.go:770] primary dev: ETH0
I0320 02:58:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:58:13.422850  543705 net.go:698] Add success.
I0320 02:58:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:58:14.455428  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:58:14.455441  543705 disk_worker.go:708] disk space is not compliant
W0320 02:58:14.455461  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:58:14.457066  543705 disk_worker.go:494] system disk:vda1
I0320 02:58:14.457097  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:58:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:58:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:58:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:58:22.265676  543705 disk_info.go:125] begin check local disk info of client
I0320 02:58:22.268159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:58:22.268166  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486180 0xc0004861c0]
E0320 02:58:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:58:23.409782  543705 memory.go:184] no items to output this cycle
I0320 02:58:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 02:58:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:58:33.409783  543705 memory.go:184] no items to output this cycle
I0320 02:58:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 02:58:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:58:43.409793  543705 memory.go:191] Add success.
I0320 02:58:43.409794  543705 cpu.go:282] Add success.
I0320 02:58:43.419993  543705 net.go:648] Add success.
I0320 02:58:43.422715  543705 net.go:770] primary dev: ETH0
I0320 02:58:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:58:43.422740  543705 net.go:698] Add success.
I0320 02:58:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:58:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:58:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:58:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:58:53.409788  543705 cpu.go:275] no items to output this cycle
I0320 02:58:53.409790  543705 memory.go:184] no items to output this cycle
E0320 02:59:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:59:03.409778  543705 memory.go:184] no items to output this cycle
I0320 02:59:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 02:59:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:59:13.409836  543705 memory.go:191] Add success.
I0320 02:59:13.409842  543705 cpu.go:282] Add success.
W0320 02:59:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 02:59:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 02:59:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 02:59:13.420341  543705 net.go:648] Add success.
I0320 02:59:13.423202  543705 net.go:770] primary dev: ETH0
I0320 02:59:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:59:13.423227  543705 net.go:698] Add success.
I0320 02:59:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 02:59:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 02:59:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 02:59:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 02:59:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 02:59:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 02:59:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 02:59:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:59:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:59:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 02:59:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 02:59:22.269672  543705 disk_info.go:125] begin check local disk info of client
I0320 02:59:22.272141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 02:59:22.272147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe340 0xc0003fe380]
E0320 02:59:23.410450  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:59:23.410467  543705 memory.go:184] no items to output this cycle
I0320 02:59:23.410478  543705 cpu.go:275] no items to output this cycle
E0320 02:59:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:59:33.409768  543705 memory.go:184] no items to output this cycle
I0320 02:59:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 02:59:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:59:43.409815  543705 memory.go:191] Add success.
I0320 02:59:43.409819  543705 cpu.go:282] Add success.
I0320 02:59:43.419848  543705 net.go:648] Add success.
I0320 02:59:43.422492  543705 net.go:770] primary dev: ETH0
I0320 02:59:43.422508  543705 net.go:802] Send network stats successfully!,count is 6
I0320 02:59:43.422522  543705 net.go:698] Add success.
I0320 02:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 02:59:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 02:59:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 02:59:53.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 02:59:53.410263  543705 memory.go:184] no items to output this cycle
I0320 02:59:53.410289  543705 cpu.go:275] no items to output this cycle
E0320 03:00:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:00:03.409806  543705 memory.go:184] no items to output this cycle
I0320 03:00:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 03:00:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:00:13.409780  543705 memory.go:191] Add success.
I0320 03:00:13.409805  543705 cpu.go:282] Add success.
W0320 03:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:00:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:00:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:00:13.420222  543705 net.go:648] Add success.
I0320 03:00:13.422771  543705 net.go:770] primary dev: ETH0
I0320 03:00:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:00:13.422796  543705 net.go:698] Add success.
I0320 03:00:13.470207  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"87a90272-2dfa-48aa-a898-165806da00bd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:00:13.470240  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:00:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:00:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:00:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 03:00:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:00:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 03:00:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:00:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:00:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:00:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:00:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:00:22.273677  543705 disk_info.go:125] begin check local disk info of client
I0320 03:00:22.276174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:00:22.276180  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c140 0xc00034c1c0]
E0320 03:00:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:00:23.409790  543705 memory.go:184] no items to output this cycle
I0320 03:00:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:00:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:00:33.409777  543705 memory.go:184] no items to output this cycle
I0320 03:00:33.409781  543705 cpu.go:275] no items to output this cycle
I0320 03:00:38.073732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:00:38.073738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:00:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:00:43.410618  543705 memory.go:191] Add success.
I0320 03:00:43.409822  543705 cpu.go:282] Add success.
I0320 03:00:43.420599  543705 net.go:648] Add success.
I0320 03:00:43.423205  543705 net.go:770] primary dev: ETH0
I0320 03:00:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:00:43.423231  543705 net.go:698] Add success.
I0320 03:00:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:00:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:00:53.410422  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:00:53.410441  543705 memory.go:184] no items to output this cycle
I0320 03:00:53.410445  543705 cpu.go:275] no items to output this cycle
E0320 03:01:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:01:03.409770  543705 memory.go:184] no items to output this cycle
I0320 03:01:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 03:01:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:01:13.409789  543705 memory.go:191] Add success.
W0320 03:01:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:01:13.409824  543705 cpu.go:282] Add success.
W0320 03:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:01:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:01:13.420220  543705 net.go:648] Add success.
I0320 03:01:13.423152  543705 net.go:770] primary dev: ETH0
I0320 03:01:13.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:01:13.423177  543705 net.go:698] Add success.
I0320 03:01:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:01:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:01:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 03:01:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:01:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 03:01:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:01:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:01:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:01:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:01:22.277674  543705 disk_info.go:125] begin check local disk info of client
I0320 03:01:22.280088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:01:22.280095  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370100 0xc000370140]
E0320 03:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:01:23.409781  543705 memory.go:184] no items to output this cycle
I0320 03:01:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:01:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:01:33.409796  543705 memory.go:184] no items to output this cycle
I0320 03:01:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 03:01:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:01:43.409808  543705 memory.go:191] Add success.
I0320 03:01:43.409817  543705 cpu.go:282] Add success.
I0320 03:01:43.420016  543705 net.go:648] Add success.
I0320 03:01:43.422848  543705 net.go:770] primary dev: ETH0
I0320 03:01:43.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:01:43.422874  543705 net.go:698] Add success.
I0320 03:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:01:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:01:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:01:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:01:53.409808  543705 memory.go:184] no items to output this cycle
I0320 03:01:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 03:02:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:02:03.409785  543705 memory.go:184] no items to output this cycle
I0320 03:02:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 03:02:13.410531  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:02:13.410566  543705 memory.go:191] Add success.
I0320 03:02:13.410571  543705 cpu.go:282] Add success.
W0320 03:02:13.410598  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:02:13.410615  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:02:13.410619  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:02:13.419895  543705 net.go:648] Add success.
I0320 03:02:13.422788  543705 net.go:770] primary dev: ETH0
I0320 03:02:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:02:13.422813  543705 net.go:698] Add success.
W0320 03:02:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:02:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 03:02:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:02:14.455873  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:02:14.455882  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:02:14.455888  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:02:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 03:02:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:02:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:02:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:02:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:02:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:02:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:02:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:02:16.472437  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:02:22.281675  543705 disk_info.go:125] begin check local disk info of client
I0320 03:02:22.284177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:02:22.284184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6180 0xc0003b61c0]
E0320 03:02:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:02:23.409771  543705 memory.go:184] no items to output this cycle
I0320 03:02:23.409778  543705 cpu.go:275] no items to output this cycle
E0320 03:02:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:02:33.409769  543705 memory.go:184] no items to output this cycle
I0320 03:02:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:02:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:02:43.409776  543705 memory.go:191] Add success.
I0320 03:02:43.409809  543705 cpu.go:282] Add success.
I0320 03:02:43.420020  543705 net.go:648] Add success.
I0320 03:02:43.422547  543705 net.go:770] primary dev: ETH0
I0320 03:02:43.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:02:43.422572  543705 net.go:698] Add success.
I0320 03:02:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:02:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:02:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:02:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:02:53.409789  543705 memory.go:184] no items to output this cycle
I0320 03:02:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 03:03:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:03:03.409802  543705 memory.go:184] no items to output this cycle
I0320 03:03:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 03:03:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:03:13.409832  543705 memory.go:191] Add success.
I0320 03:03:13.409840  543705 cpu.go:282] Add success.
W0320 03:03:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:03:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:03:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:03:13.420179  543705 net.go:648] Add success.
I0320 03:03:13.423088  543705 net.go:770] primary dev: ETH0
I0320 03:03:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:03:13.423116  543705 net.go:698] Add success.
I0320 03:03:13.464367  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98a2cecb-d212-4f2e-aec7-cb68319b30c5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:03:13.464399  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:03:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:03:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:03:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 03:03:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:03:14.456534  543705 disk_worker.go:494] system disk:vda1
I0320 03:03:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:03:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:03:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:03:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:03:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:03:22.285671  543705 disk_info.go:125] begin check local disk info of client
I0320 03:03:22.288153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:03:22.288159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354340 0xc000354380]
E0320 03:03:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:03:23.409771  543705 memory.go:184] no items to output this cycle
I0320 03:03:23.409777  543705 cpu.go:275] no items to output this cycle
E0320 03:03:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:03:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 03:03:33.409797  543705 memory.go:184] no items to output this cycle
I0320 03:03:38.077063  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:03:38.077071  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:03:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:03:43.410657  543705 memory.go:191] Add success.
I0320 03:03:43.409793  543705 cpu.go:282] Add success.
I0320 03:03:43.420378  543705 net.go:648] Add success.
I0320 03:03:43.422952  543705 net.go:770] primary dev: ETH0
I0320 03:03:43.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:03:43.422977  543705 net.go:698] Add success.
I0320 03:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:03:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:03:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:03:53.409780  543705 memory.go:184] no items to output this cycle
I0320 03:03:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 03:04:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:04:03.409804  543705 memory.go:184] no items to output this cycle
I0320 03:04:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 03:04:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:04:13.409790  543705 memory.go:191] Add success.
I0320 03:04:13.409791  543705 cpu.go:282] Add success.
W0320 03:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:04:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:04:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:04:13.420144  543705 net.go:648] Add success.
I0320 03:04:13.423145  543705 net.go:770] primary dev: ETH0
I0320 03:04:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:04:13.423171  543705 net.go:698] Add success.
I0320 03:04:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:04:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:04:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 03:04:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:04:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 03:04:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:04:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:04:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:04:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:04:22.289676  543705 disk_info.go:125] begin check local disk info of client
I0320 03:04:22.292171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:04:22.292178  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029e2c0 0xc00029e300]
E0320 03:04:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:04:23.409764  543705 memory.go:184] no items to output this cycle
I0320 03:04:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:04:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:04:33.409801  543705 memory.go:184] no items to output this cycle
I0320 03:04:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 03:04:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:04:43.409793  543705 memory.go:191] Add success.
I0320 03:04:43.409804  543705 cpu.go:282] Add success.
I0320 03:04:43.419859  543705 net.go:648] Add success.
I0320 03:04:43.422602  543705 net.go:770] primary dev: ETH0
I0320 03:04:43.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:04:43.422633  543705 net.go:698] Add success.
I0320 03:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:04:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:04:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:04:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:04:53.409815  543705 memory.go:184] no items to output this cycle
I0320 03:04:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 03:05:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:05:03.409781  543705 memory.go:184] no items to output this cycle
I0320 03:05:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 03:05:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:05:13.409809  543705 memory.go:191] Add success.
I0320 03:05:13.409813  543705 cpu.go:282] Add success.
W0320 03:05:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:05:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:05:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:05:13.420169  543705 net.go:648] Add success.
I0320 03:05:13.423062  543705 net.go:770] primary dev: ETH0
I0320 03:05:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:05:13.423087  543705 net.go:698] Add success.
I0320 03:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:05:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:05:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 03:05:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:05:14.456605  543705 disk_worker.go:494] system disk:vda1
I0320 03:05:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:05:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:05:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:05:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:05:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:05:22.293678  543705 disk_info.go:125] begin check local disk info of client
I0320 03:05:22.296053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:05:22.296060  543705 disk_info.go:196] parse disk info done, disk is : [0xc000495e80 0xc000495ec0]
E0320 03:05:23.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:05:23.409878  543705 memory.go:184] no items to output this cycle
I0320 03:05:23.409952  543705 cpu.go:275] no items to output this cycle
E0320 03:05:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:05:33.409776  543705 memory.go:184] no items to output this cycle
I0320 03:05:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 03:05:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:05:43.409818  543705 memory.go:191] Add success.
I0320 03:05:43.409823  543705 cpu.go:282] Add success.
I0320 03:05:43.419940  543705 net.go:648] Add success.
I0320 03:05:43.422883  543705 net.go:770] primary dev: ETH0
I0320 03:05:43.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:05:43.422908  543705 net.go:698] Add success.
I0320 03:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:05:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:05:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:05:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:05:53.409779  543705 memory.go:184] no items to output this cycle
I0320 03:05:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:06:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:06:03.409772  543705 memory.go:184] no items to output this cycle
I0320 03:06:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 03:06:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:06:13.409813  543705 memory.go:191] Add success.
I0320 03:06:13.409821  543705 cpu.go:282] Add success.
W0320 03:06:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:06:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:06:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:06:13.420162  543705 net.go:648] Add success.
I0320 03:06:13.422766  543705 net.go:770] primary dev: ETH0
I0320 03:06:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:06:13.422792  543705 net.go:698] Add success.
I0320 03:06:13.465602  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"46fd14b9-47b9-4aab-8592-e13441e9ea6c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:06:13.465642  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:06:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:06:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:06:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 03:06:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:06:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 03:06:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:06:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:06:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:06:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:06:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:06:22.297679  543705 disk_info.go:125] begin check local disk info of client
I0320 03:06:22.300099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:06:22.300106  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ce80 0xc00047cec0]
E0320 03:06:23.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:06:23.409872  543705 memory.go:184] no items to output this cycle
I0320 03:06:23.409983  543705 cpu.go:275] no items to output this cycle
E0320 03:06:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:06:33.409777  543705 memory.go:184] no items to output this cycle
I0320 03:06:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 03:06:38.077733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:06:38.077740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:06:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:06:43.410734  543705 memory.go:191] Add success.
I0320 03:06:43.409830  543705 cpu.go:282] Add success.
I0320 03:06:43.420244  543705 net.go:770] primary dev: ETH0
I0320 03:06:43.420259  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:06:43.420275  543705 net.go:698] Add success.
I0320 03:06:43.420621  543705 net.go:648] Add success.
I0320 03:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:06:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:06:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:06:53.409790  543705 memory.go:184] no items to output this cycle
I0320 03:06:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 03:07:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:07:03.409807  543705 memory.go:184] no items to output this cycle
I0320 03:07:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 03:07:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:07:13.409837  543705 memory.go:191] Add success.
I0320 03:07:13.409838  543705 cpu.go:282] Add success.
W0320 03:07:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:07:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:07:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:07:13.420229  543705 net.go:648] Add success.
I0320 03:07:13.423006  543705 net.go:770] primary dev: ETH0
I0320 03:07:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:07:13.423030  543705 net.go:698] Add success.
I0320 03:07:13.453561  543705 event_worker.go:152] Polling the log file for events...
W0320 03:07:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:07:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 03:07:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:07:14.456792  543705 disk_worker.go:494] system disk:vda1
I0320 03:07:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:07:14.457224  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:07:14.457232  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:07:14.457237  543705 custom_config.go:64] query custom config with name: gpu
E0320 03:07:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:07:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:07:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:07:16.457979  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:07:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:07:16.458037  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:07:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:07:22.301679  543705 disk_info.go:125] begin check local disk info of client
I0320 03:07:22.304059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:07:22.304065  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b800 0xc00032b840]
E0320 03:07:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:07:23.409789  543705 memory.go:184] no items to output this cycle
I0320 03:07:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:07:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:07:33.409790  543705 memory.go:184] no items to output this cycle
I0320 03:07:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:07:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:07:43.409796  543705 memory.go:191] Add success.
I0320 03:07:43.409796  543705 cpu.go:282] Add success.
I0320 03:07:43.419831  543705 net.go:770] primary dev: ETH0
I0320 03:07:43.419843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:07:43.419855  543705 net.go:698] Add success.
I0320 03:07:43.420083  543705 net.go:648] Add success.
I0320 03:07:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:07:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:07:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:07:53.409797  543705 memory.go:184] no items to output this cycle
I0320 03:07:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 03:08:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:08:03.409793  543705 memory.go:184] no items to output this cycle
I0320 03:08:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 03:08:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:08:13.409808  543705 memory.go:191] Add success.
I0320 03:08:13.409831  543705 cpu.go:282] Add success.
W0320 03:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:08:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:08:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:08:13.420037  543705 net.go:648] Add success.
I0320 03:08:13.422760  543705 net.go:770] primary dev: ETH0
I0320 03:08:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:08:13.422784  543705 net.go:698] Add success.
I0320 03:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:08:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:08:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 03:08:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:08:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 03:08:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:08:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:08:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:08:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:08:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:08:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:08:22.305681  543705 disk_info.go:125] begin check local disk info of client
I0320 03:08:22.308148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:08:22.308154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024e600 0xc00024e640]
E0320 03:08:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:08:23.409797  543705 memory.go:184] no items to output this cycle
I0320 03:08:23.409811  543705 cpu.go:275] no items to output this cycle
E0320 03:08:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:08:33.409799  543705 memory.go:184] no items to output this cycle
I0320 03:08:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 03:08:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:08:43.409799  543705 memory.go:191] Add success.
I0320 03:08:43.409803  543705 cpu.go:282] Add success.
I0320 03:08:43.419984  543705 net.go:648] Add success.
I0320 03:08:43.422582  543705 net.go:770] primary dev: ETH0
I0320 03:08:43.422594  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:08:43.422606  543705 net.go:698] Add success.
I0320 03:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:08:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:08:53.409807  543705 memory.go:184] no items to output this cycle
I0320 03:08:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 03:09:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:09:03.409807  543705 memory.go:184] no items to output this cycle
I0320 03:09:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 03:09:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:09:13.409842  543705 memory.go:191] Add success.
I0320 03:09:13.409850  543705 cpu.go:282] Add success.
W0320 03:09:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:09:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:09:13.409896  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:09:13.420210  543705 net.go:648] Add success.
I0320 03:09:13.423031  543705 net.go:770] primary dev: ETH0
I0320 03:09:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:09:13.423056  543705 net.go:698] Add success.
I0320 03:09:13.469374  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3971057d-2688-4551-abd3-48f3283a5ba3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:09:13.469407  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:09:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:09:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 03:09:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:09:14.456481  543705 disk_worker.go:494] system disk:vda1
I0320 03:09:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:09:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:09:16.472423  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:09:22.309671  543705 disk_info.go:125] begin check local disk info of client
I0320 03:09:22.312070  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:09:22.312076  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344240 0xc000344280]
E0320 03:09:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:09:23.409793  543705 memory.go:184] no items to output this cycle
I0320 03:09:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 03:09:33.409911  543705 cpu.go:275] no items to output this cycle
E0320 03:09:33.410053  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:09:33.410069  543705 memory.go:184] no items to output this cycle
I0320 03:09:38.077880  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:09:38.077887  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:09:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:09:43.410663  543705 memory.go:191] Add success.
I0320 03:09:43.409820  543705 cpu.go:282] Add success.
I0320 03:09:43.420418  543705 net.go:648] Add success.
I0320 03:09:43.422904  543705 net.go:770] primary dev: ETH0
I0320 03:09:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:09:43.422929  543705 net.go:698] Add success.
I0320 03:09:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:09:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:09:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:09:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:09:53.409798  543705 memory.go:184] no items to output this cycle
I0320 03:09:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 03:10:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:10:03.409776  543705 memory.go:184] no items to output this cycle
I0320 03:10:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:10:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:10:13.409826  543705 memory.go:191] Add success.
I0320 03:10:13.409835  543705 cpu.go:282] Add success.
W0320 03:10:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:10:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:10:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:10:13.420184  543705 net.go:648] Add success.
I0320 03:10:13.422882  543705 net.go:770] primary dev: ETH0
I0320 03:10:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:10:13.422906  543705 net.go:698] Add success.
I0320 03:10:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:10:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:10:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 03:10:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:10:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 03:10:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:10:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:10:16.472417  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:10:22.313675  543705 disk_info.go:125] begin check local disk info of client
I0320 03:10:22.316144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:10:22.316150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8dc0 0xc0003b8e00]
E0320 03:10:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:10:23.409807  543705 memory.go:184] no items to output this cycle
I0320 03:10:23.409820  543705 cpu.go:275] no items to output this cycle
E0320 03:10:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:10:33.409808  543705 memory.go:184] no items to output this cycle
I0320 03:10:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 03:10:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:10:43.409788  543705 memory.go:191] Add success.
I0320 03:10:43.409819  543705 cpu.go:282] Add success.
I0320 03:10:43.419853  543705 net.go:648] Add success.
I0320 03:10:43.422571  543705 net.go:770] primary dev: ETH0
I0320 03:10:43.422583  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:10:43.422595  543705 net.go:698] Add success.
I0320 03:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:10:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:10:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:10:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:10:53.409787  543705 memory.go:184] no items to output this cycle
I0320 03:10:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 03:11:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:11:03.409800  543705 memory.go:184] no items to output this cycle
I0320 03:11:03.409821  543705 cpu.go:275] no items to output this cycle
I0320 03:11:13.409809  543705 cpu.go:282] Add success.
E0320 03:11:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:11:13.409847  543705 memory.go:191] Add success.
W0320 03:11:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:11:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:11:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:11:13.420239  543705 net.go:648] Add success.
I0320 03:11:13.423203  543705 net.go:770] primary dev: ETH0
I0320 03:11:13.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:11:13.423229  543705 net.go:698] Add success.
I0320 03:11:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:11:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:11:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 03:11:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:11:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 03:11:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:11:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:11:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:11:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:11:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:11:16.472451  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:11:22.317676  543705 disk_info.go:125] begin check local disk info of client
I0320 03:11:22.320179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:11:22.320186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9600 0xc0002b9640]
E0320 03:11:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:11:23.409786  543705 memory.go:184] no items to output this cycle
I0320 03:11:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 03:11:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:11:33.409772  543705 memory.go:184] no items to output this cycle
I0320 03:11:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 03:11:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:11:43.409779  543705 memory.go:191] Add success.
I0320 03:11:43.409799  543705 cpu.go:282] Add success.
I0320 03:11:43.420223  543705 net.go:648] Add success.
I0320 03:11:43.422932  543705 net.go:770] primary dev: ETH0
I0320 03:11:43.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:11:43.422957  543705 net.go:698] Add success.
I0320 03:11:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:11:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:11:53.409795  543705 memory.go:184] no items to output this cycle
I0320 03:11:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:12:03.409776  543705 memory.go:184] no items to output this cycle
I0320 03:12:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:12:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:12:13.409814  543705 memory.go:191] Add success.
I0320 03:12:13.409816  543705 cpu.go:282] Add success.
W0320 03:12:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:12:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:12:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:12:13.420304  543705 net.go:648] Add success.
I0320 03:12:13.422921  543705 net.go:770] primary dev: ETH0
I0320 03:12:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:12:13.422947  543705 net.go:698] Add success.
I0320 03:12:13.469297  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ebd67f9b-c743-4a08-a276-47e6bdabcfdf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:12:13.469331  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 03:12:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:12:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 03:12:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:12:14.456181  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:12:14.456191  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:12:14.456197  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:12:14.456460  543705 disk_worker.go:494] system disk:vda1
I0320 03:12:14.456490  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:12:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:12:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:12:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:12:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:12:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:12:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:12:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:12:22.321676  543705 disk_info.go:125] begin check local disk info of client
I0320 03:12:22.324219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:12:22.324224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9c80 0xc0002b9cc0]
E0320 03:12:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:12:23.409775  543705 memory.go:184] no items to output this cycle
I0320 03:12:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 03:12:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:12:33.409798  543705 memory.go:184] no items to output this cycle
I0320 03:12:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 03:12:38.081084  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:12:38.081091  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:12:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:12:43.410643  543705 memory.go:191] Add success.
I0320 03:12:43.409792  543705 cpu.go:282] Add success.
I0320 03:12:43.420741  543705 net.go:648] Add success.
I0320 03:12:43.423490  543705 net.go:770] primary dev: ETH0
I0320 03:12:43.423504  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:12:43.423515  543705 net.go:698] Add success.
I0320 03:12:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:12:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:12:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:12:53.410367  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:12:53.410386  543705 memory.go:184] no items to output this cycle
I0320 03:12:53.410399  543705 cpu.go:275] no items to output this cycle
E0320 03:13:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:13:03.409801  543705 memory.go:184] no items to output this cycle
I0320 03:13:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 03:13:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:13:13.409789  543705 memory.go:191] Add success.
W0320 03:13:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:13:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:13:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:13:13.409856  543705 cpu.go:282] Add success.
I0320 03:13:13.420315  543705 net.go:648] Add success.
I0320 03:13:13.421231  543705 net.go:770] primary dev: ETH0
I0320 03:13:13.421246  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:13:13.421259  543705 net.go:698] Add success.
I0320 03:13:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:13:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:13:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 03:13:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:13:14.456545  543705 disk_worker.go:494] system disk:vda1
I0320 03:13:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:13:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:13:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:13:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:13:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:13:22.325683  543705 disk_info.go:125] begin check local disk info of client
I0320 03:13:22.328089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:13:22.328095  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8940 0xc0002b8980]
E0320 03:13:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:13:23.409785  543705 memory.go:184] no items to output this cycle
I0320 03:13:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:13:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:13:33.409784  543705 memory.go:184] no items to output this cycle
I0320 03:13:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 03:13:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:13:43.409813  543705 memory.go:191] Add success.
I0320 03:13:43.409824  543705 cpu.go:282] Add success.
I0320 03:13:43.420056  543705 net.go:648] Add success.
I0320 03:13:43.422922  543705 net.go:770] primary dev: ETH0
I0320 03:13:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:13:43.422947  543705 net.go:698] Add success.
I0320 03:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:13:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:13:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:13:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:13:53.409792  543705 memory.go:184] no items to output this cycle
I0320 03:13:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 03:14:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:14:03.409773  543705 memory.go:184] no items to output this cycle
I0320 03:14:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 03:14:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:14:13.409787  543705 memory.go:191] Add success.
I0320 03:14:13.409806  543705 cpu.go:282] Add success.
W0320 03:14:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:14:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:14:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:14:13.420128  543705 net.go:648] Add success.
I0320 03:14:13.423172  543705 net.go:770] primary dev: ETH0
I0320 03:14:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:14:13.423197  543705 net.go:698] Add success.
I0320 03:14:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:14:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:14:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0320 03:14:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:14:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 03:14:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:14:16.457663  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:14:16.457721  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:14:16.457740  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:14:16.473050  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:14:22.329676  543705 disk_info.go:125] begin check local disk info of client
I0320 03:14:22.332159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:14:22.332166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8d40 0xc0002b8d80]
E0320 03:14:23.410406  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:14:23.410422  543705 memory.go:184] no items to output this cycle
I0320 03:14:23.410431  543705 cpu.go:275] no items to output this cycle
E0320 03:14:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:14:33.409800  543705 memory.go:184] no items to output this cycle
I0320 03:14:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 03:14:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:14:43.409794  543705 memory.go:191] Add success.
I0320 03:14:43.409799  543705 cpu.go:282] Add success.
I0320 03:14:43.419881  543705 net.go:648] Add success.
I0320 03:14:43.422394  543705 net.go:770] primary dev: ETH0
I0320 03:14:43.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:14:43.422424  543705 net.go:698] Add success.
I0320 03:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:14:53.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:14:53.409916  543705 memory.go:184] no items to output this cycle
I0320 03:14:53.409976  543705 cpu.go:275] no items to output this cycle
E0320 03:15:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:15:03.409776  543705 memory.go:184] no items to output this cycle
I0320 03:15:03.409777  543705 cpu.go:275] no items to output this cycle
E0320 03:15:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:15:13.409810  543705 memory.go:191] Add success.
I0320 03:15:13.409822  543705 cpu.go:282] Add success.
W0320 03:15:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:15:13.412516  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:15:13.412521  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:15:13.420208  543705 net.go:648] Add success.
I0320 03:15:13.421978  543705 net.go:770] primary dev: ETH0
I0320 03:15:13.421993  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:15:13.422006  543705 net.go:698] Add success.
I0320 03:15:13.471424  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"76fdfcf5-cd4f-4f75-997b-f461619d135a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:15:13.471468  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:15:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:15:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:15:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 03:15:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:15:14.456737  543705 disk_worker.go:494] system disk:vda1
I0320 03:15:14.456771  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:15:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:15:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:15:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:15:22.333670  543705 disk_info.go:125] begin check local disk info of client
I0320 03:15:22.336116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:15:22.336123  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b880 0xc00007b8c0]
E0320 03:15:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:15:23.409788  543705 memory.go:184] no items to output this cycle
I0320 03:15:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:15:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:15:33.409769  543705 memory.go:184] no items to output this cycle
I0320 03:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 03:15:38.081731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:15:38.081737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:15:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:15:43.410704  543705 memory.go:191] Add success.
I0320 03:15:43.409826  543705 cpu.go:282] Add success.
I0320 03:15:43.420419  543705 net.go:648] Add success.
I0320 03:15:43.423189  543705 net.go:770] primary dev: ETH0
I0320 03:15:43.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:15:43.423216  543705 net.go:698] Add success.
I0320 03:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:15:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:15:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:15:53.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:15:53.409919  543705 memory.go:184] no items to output this cycle
I0320 03:15:53.409951  543705 cpu.go:275] no items to output this cycle
E0320 03:16:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:16:03.409802  543705 memory.go:184] no items to output this cycle
I0320 03:16:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 03:16:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:16:13.409782  543705 memory.go:191] Add success.
I0320 03:16:13.409806  543705 cpu.go:282] Add success.
W0320 03:16:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:16:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:16:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:16:13.420264  543705 net.go:648] Add success.
I0320 03:16:13.422811  543705 net.go:770] primary dev: ETH0
I0320 03:16:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:16:13.422841  543705 net.go:698] Add success.
I0320 03:16:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:16:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:16:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 03:16:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:16:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 03:16:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:16:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:16:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:16:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:16:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:16:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:16:22.337673  543705 disk_info.go:125] begin check local disk info of client
I0320 03:16:22.340232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:16:22.340239  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed100 0xc0000ed140]
E0320 03:16:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:16:23.409771  543705 memory.go:184] no items to output this cycle
I0320 03:16:23.409779  543705 cpu.go:275] no items to output this cycle
E0320 03:16:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:16:33.409798  543705 memory.go:184] no items to output this cycle
I0320 03:16:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 03:16:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:16:43.409817  543705 memory.go:191] Add success.
I0320 03:16:43.409823  543705 cpu.go:282] Add success.
I0320 03:16:43.419983  543705 net.go:648] Add success.
I0320 03:16:43.422959  543705 net.go:770] primary dev: ETH0
I0320 03:16:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:16:43.422989  543705 net.go:698] Add success.
I0320 03:16:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:16:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:16:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:16:53.410532  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:16:53.410553  543705 memory.go:184] no items to output this cycle
I0320 03:16:53.410565  543705 cpu.go:275] no items to output this cycle
E0320 03:17:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:17:03.409773  543705 memory.go:184] no items to output this cycle
I0320 03:17:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 03:17:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:17:13.409777  543705 memory.go:191] Add success.
W0320 03:17:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:17:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:17:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:17:13.409851  543705 cpu.go:282] Add success.
I0320 03:17:13.420505  543705 net.go:648] Add success.
I0320 03:17:13.423345  543705 net.go:770] primary dev: ETH0
I0320 03:17:13.423364  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:17:13.423381  543705 net.go:698] Add success.
I0320 03:17:13.453360  543705 event_worker.go:152] Polling the log file for events...
W0320 03:17:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:17:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 03:17:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:17:14.456631  543705 disk_worker.go:494] system disk:vda1
I0320 03:17:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:17:14.458174  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:17:14.458183  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:17:14.458189  543705 custom_config.go:64] query custom config with name: gpu
E0320 03:17:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:17:15.456864  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:17:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:17:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:17:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:17:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:17:16.472311  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:17:22.341675  543705 disk_info.go:125] begin check local disk info of client
I0320 03:17:22.344097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:17:22.344103  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003148c0 0xc000314900]
E0320 03:17:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:17:23.409787  543705 memory.go:184] no items to output this cycle
I0320 03:17:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 03:17:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:17:33.409800  543705 memory.go:184] no items to output this cycle
I0320 03:17:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 03:17:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:17:43.409776  543705 memory.go:191] Add success.
I0320 03:17:43.409805  543705 cpu.go:282] Add success.
I0320 03:17:43.419861  543705 net.go:648] Add success.
I0320 03:17:43.422501  543705 net.go:770] primary dev: ETH0
I0320 03:17:43.422517  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:17:43.422532  543705 net.go:698] Add success.
I0320 03:17:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:17:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:17:53.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:17:53.409859  543705 memory.go:184] no items to output this cycle
I0320 03:17:53.409941  543705 cpu.go:275] no items to output this cycle
E0320 03:18:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:18:03.409802  543705 memory.go:184] no items to output this cycle
I0320 03:18:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 03:18:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:18:13.409789  543705 memory.go:191] Add success.
I0320 03:18:13.409797  543705 cpu.go:282] Add success.
W0320 03:18:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:18:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:18:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:18:13.420088  543705 net.go:648] Add success.
I0320 03:18:13.422799  543705 net.go:770] primary dev: ETH0
I0320 03:18:13.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:18:13.422824  543705 net.go:698] Add success.
I0320 03:18:13.469318  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"261ac0fd-92db-411f-bcac-a27b2f503ae3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:18:13.469352  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:18:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:18:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:18:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 03:18:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:18:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 03:18:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:18:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:18:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:18:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:18:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:18:22.345672  543705 disk_info.go:125] begin check local disk info of client
I0320 03:18:22.348117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:18:22.348124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003779c0 0xc000377a00]
E0320 03:18:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:18:23.409777  543705 memory.go:184] no items to output this cycle
I0320 03:18:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 03:18:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:18:33.409773  543705 memory.go:184] no items to output this cycle
I0320 03:18:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 03:18:38.081874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:18:38.081881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:18:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:18:43.410774  543705 memory.go:191] Add success.
I0320 03:18:43.409918  543705 cpu.go:282] Add success.
I0320 03:18:43.420544  543705 net.go:648] Add success.
I0320 03:18:43.423409  543705 net.go:770] primary dev: ETH0
I0320 03:18:43.423422  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:18:43.423434  543705 net.go:698] Add success.
I0320 03:18:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:18:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:18:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:18:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:18:53.409800  543705 memory.go:184] no items to output this cycle
I0320 03:18:53.409829  543705 cpu.go:275] no items to output this cycle
E0320 03:19:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:19:03.409783  543705 memory.go:184] no items to output this cycle
I0320 03:19:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:19:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:19:13.409793  543705 memory.go:191] Add success.
W0320 03:19:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:19:13.409821  543705 cpu.go:282] Add success.
W0320 03:19:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:19:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:19:13.420214  543705 net.go:648] Add success.
I0320 03:19:13.422996  543705 net.go:770] primary dev: ETH0
I0320 03:19:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:19:13.423021  543705 net.go:698] Add success.
I0320 03:19:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:19:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:19:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 03:19:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:19:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 03:19:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:19:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:19:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:19:22.349674  543705 disk_info.go:125] begin check local disk info of client
I0320 03:19:22.352125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:19:22.352131  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395300 0xc000395340]
E0320 03:19:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:19:23.409792  543705 memory.go:184] no items to output this cycle
I0320 03:19:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 03:19:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:19:33.409788  543705 memory.go:184] no items to output this cycle
I0320 03:19:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 03:19:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:19:43.409815  543705 memory.go:191] Add success.
I0320 03:19:43.409854  543705 cpu.go:282] Add success.
I0320 03:19:43.420166  543705 net.go:648] Add success.
I0320 03:19:43.423282  543705 net.go:770] primary dev: ETH0
I0320 03:19:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:19:43.423310  543705 net.go:698] Add success.
I0320 03:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:19:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:19:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:19:53.409792  543705 memory.go:184] no items to output this cycle
I0320 03:19:53.409943  543705 cpu.go:275] no items to output this cycle
E0320 03:20:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:20:03.409804  543705 memory.go:184] no items to output this cycle
I0320 03:20:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 03:20:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:20:13.409792  543705 memory.go:191] Add success.
I0320 03:20:13.409795  543705 cpu.go:282] Add success.
W0320 03:20:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:20:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:20:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:20:13.420100  543705 net.go:648] Add success.
I0320 03:20:13.422674  543705 net.go:770] primary dev: ETH0
I0320 03:20:13.422687  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:20:13.422698  543705 net.go:698] Add success.
I0320 03:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:20:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:20:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 03:20:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:20:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 03:20:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:20:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:20:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:20:22.353678  543705 disk_info.go:125] begin check local disk info of client
I0320 03:20:22.356170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:20:22.356176  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5780 0xc0000c57c0]
E0320 03:20:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:20:23.409760  543705 memory.go:184] no items to output this cycle
I0320 03:20:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 03:20:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:20:33.409789  543705 memory.go:184] no items to output this cycle
I0320 03:20:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:20:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:20:43.409834  543705 memory.go:191] Add success.
I0320 03:20:43.409845  543705 cpu.go:282] Add success.
I0320 03:20:43.420035  543705 net.go:648] Add success.
I0320 03:20:43.422754  543705 net.go:770] primary dev: ETH0
I0320 03:20:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:20:43.422794  543705 net.go:698] Add success.
I0320 03:20:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:20:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:20:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:20:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:20:53.409826  543705 memory.go:184] no items to output this cycle
I0320 03:20:53.409834  543705 cpu.go:275] no items to output this cycle
E0320 03:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:21:03.409909  543705 memory.go:184] no items to output this cycle
I0320 03:21:03.409985  543705 cpu.go:275] no items to output this cycle
E0320 03:21:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:21:13.409807  543705 memory.go:191] Add success.
I0320 03:21:13.409809  543705 cpu.go:282] Add success.
W0320 03:21:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:21:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:21:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:21:13.420158  543705 net.go:648] Add success.
I0320 03:21:13.423092  543705 net.go:770] primary dev: ETH0
I0320 03:21:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:21:13.423116  543705 net.go:698] Add success.
I0320 03:21:13.941897  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"be26a690-f320-4b56-aa71-1de4a6facaa4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:21:13.941946  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:21:14.454524  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:21:14.454692  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:21:14.454782  543705 disk_worker.go:708] disk space is not compliant
W0320 03:21:14.454786  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:21:14.456287  543705 disk_worker.go:494] system disk:vda1
I0320 03:21:14.456321  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:21:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:21:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:21:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:21:22.357675  543705 disk_info.go:125] begin check local disk info of client
I0320 03:21:22.360153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:21:22.360160  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ecd80 0xc0000ecdc0]
E0320 03:21:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:21:23.409758  543705 memory.go:184] no items to output this cycle
I0320 03:21:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:21:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:21:33.409798  543705 memory.go:184] no items to output this cycle
I0320 03:21:33.409820  543705 cpu.go:275] no items to output this cycle
I0320 03:21:38.085106  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:21:38.085112  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:21:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:21:43.410675  543705 memory.go:191] Add success.
I0320 03:21:43.409802  543705 cpu.go:282] Add success.
I0320 03:21:43.420392  543705 net.go:648] Add success.
I0320 03:21:43.422956  543705 net.go:770] primary dev: ETH0
I0320 03:21:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:21:43.422983  543705 net.go:698] Add success.
I0320 03:21:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:21:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:21:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:21:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:21:53.409786  543705 memory.go:184] no items to output this cycle
I0320 03:21:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 03:22:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:22:03.409783  543705 memory.go:184] no items to output this cycle
I0320 03:22:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:22:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:22:13.409781  543705 memory.go:191] Add success.
W0320 03:22:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:22:13.409815  543705 cpu.go:282] Add success.
W0320 03:22:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:22:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:22:13.420120  543705 net.go:648] Add success.
I0320 03:22:13.423120  543705 net.go:770] primary dev: ETH0
I0320 03:22:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:22:13.423145  543705 net.go:698] Add success.
W0320 03:22:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:22:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 03:22:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:22:14.456935  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:22:14.456944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:22:14.456950  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:22:14.456990  543705 disk_worker.go:494] system disk:vda1
I0320 03:22:14.457030  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:22:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:22:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:22:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:22:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:22:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:22:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:22:16.472332  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:22:22.361675  543705 disk_info.go:125] begin check local disk info of client
I0320 03:22:22.364230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:22:22.364237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab700 0xc0001ab740]
E0320 03:22:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:22:23.409789  543705 memory.go:184] no items to output this cycle
I0320 03:22:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 03:22:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:22:33.409806  543705 memory.go:184] no items to output this cycle
I0320 03:22:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 03:22:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:22:43.409799  543705 memory.go:191] Add success.
I0320 03:22:43.409801  543705 cpu.go:282] Add success.
I0320 03:22:43.419903  543705 net.go:648] Add success.
I0320 03:22:43.422842  543705 net.go:770] primary dev: ETH0
I0320 03:22:43.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:22:43.422878  543705 net.go:698] Add success.
I0320 03:22:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:22:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:22:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:22:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:22:53.409780  543705 memory.go:184] no items to output this cycle
I0320 03:22:53.409889  543705 cpu.go:275] no items to output this cycle
E0320 03:23:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:23:03.409769  543705 memory.go:184] no items to output this cycle
I0320 03:23:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 03:23:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:23:13.409914  543705 memory.go:191] Add success.
W0320 03:23:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:23:13.409966  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:23:13.409966  543705 cpu.go:282] Add success.
I0320 03:23:13.409969  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:23:13.419717  543705 net.go:648] Add success.
I0320 03:23:13.422832  543705 net.go:770] primary dev: ETH0
I0320 03:23:13.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:23:13.422857  543705 net.go:698] Add success.
I0320 03:23:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:23:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:23:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 03:23:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:23:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 03:23:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:23:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:23:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:23:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:23:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:23:22.365674  543705 disk_info.go:125] begin check local disk info of client
I0320 03:23:22.368127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:23:22.368133  543705 disk_info.go:196] parse disk info done, disk is : [0xc000538f40 0xc000538f80]
E0320 03:23:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:23:23.409783  543705 memory.go:184] no items to output this cycle
I0320 03:23:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 03:23:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:23:33.409773  543705 memory.go:184] no items to output this cycle
I0320 03:23:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:23:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:23:43.409817  543705 memory.go:191] Add success.
I0320 03:23:43.409823  543705 cpu.go:282] Add success.
I0320 03:23:43.419877  543705 net.go:648] Add success.
I0320 03:23:43.422892  543705 net.go:770] primary dev: ETH0
I0320 03:23:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:23:43.422931  543705 net.go:698] Add success.
I0320 03:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:23:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:23:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:23:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:23:53.409782  543705 memory.go:184] no items to output this cycle
I0320 03:23:53.409856  543705 cpu.go:275] no items to output this cycle
E0320 03:24:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:24:03.409782  543705 memory.go:184] no items to output this cycle
I0320 03:24:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:24:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:24:13.409787  543705 memory.go:191] Add success.
I0320 03:24:13.409792  543705 cpu.go:282] Add success.
W0320 03:24:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:24:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:24:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:24:13.420072  543705 net.go:648] Add success.
I0320 03:24:13.422997  543705 net.go:770] primary dev: ETH0
I0320 03:24:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:24:13.423022  543705 net.go:698] Add success.
I0320 03:24:13.469228  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c0897d3-450e-4452-95b9-ccfedbf16aeb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:24:13.469350  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:24:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:24:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:24:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 03:24:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:24:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 03:24:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:24:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:24:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:24:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:24:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:24:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:24:22.369675  543705 disk_info.go:125] begin check local disk info of client
I0320 03:24:22.372203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:24:22.372208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d4c0 0xc00037d500]
E0320 03:24:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:24:23.409777  543705 memory.go:184] no items to output this cycle
I0320 03:24:23.409777  543705 cpu.go:275] no items to output this cycle
E0320 03:24:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:24:33.409794  543705 memory.go:184] no items to output this cycle
I0320 03:24:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 03:24:38.085743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:24:38.085749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:24:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:24:43.410593  543705 memory.go:191] Add success.
I0320 03:24:43.409807  543705 cpu.go:282] Add success.
I0320 03:24:43.420289  543705 net.go:648] Add success.
I0320 03:24:43.422798  543705 net.go:770] primary dev: ETH0
I0320 03:24:43.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:24:43.422827  543705 net.go:698] Add success.
I0320 03:24:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:24:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:24:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:24:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:24:53.409786  543705 memory.go:184] no items to output this cycle
I0320 03:24:53.409851  543705 cpu.go:275] no items to output this cycle
E0320 03:25:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:25:03.409772  543705 memory.go:184] no items to output this cycle
I0320 03:25:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:25:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:25:13.409816  543705 memory.go:191] Add success.
I0320 03:25:13.409822  543705 cpu.go:282] Add success.
W0320 03:25:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:25:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:25:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:25:13.420133  543705 net.go:648] Add success.
I0320 03:25:13.422919  543705 net.go:770] primary dev: ETH0
I0320 03:25:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:25:13.422949  543705 net.go:698] Add success.
I0320 03:25:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:25:14.455234  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:25:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0320 03:25:14.455250  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:25:14.456633  543705 disk_worker.go:494] system disk:vda1
I0320 03:25:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:25:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:25:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:25:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:25:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:25:16.472495  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:25:22.373674  543705 disk_info.go:125] begin check local disk info of client
I0320 03:25:22.376104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:25:22.376110  543705 disk_info.go:196] parse disk info done, disk is : [0xc000534280 0xc0005342c0]
E0320 03:25:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:25:23.409788  543705 memory.go:184] no items to output this cycle
I0320 03:25:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 03:25:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:25:33.409777  543705 memory.go:184] no items to output this cycle
I0320 03:25:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 03:25:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:25:43.409787  543705 memory.go:191] Add success.
I0320 03:25:43.409811  543705 cpu.go:282] Add success.
I0320 03:25:43.419984  543705 net.go:648] Add success.
I0320 03:25:43.422609  543705 net.go:770] primary dev: ETH0
I0320 03:25:43.422622  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:25:43.422636  543705 net.go:698] Add success.
I0320 03:25:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:25:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:25:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:25:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 03:25:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:25:53.409825  543705 memory.go:184] no items to output this cycle
E0320 03:26:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:26:03.409782  543705 memory.go:184] no items to output this cycle
I0320 03:26:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 03:26:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:26:13.409792  543705 memory.go:191] Add success.
I0320 03:26:13.409798  543705 cpu.go:282] Add success.
W0320 03:26:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:26:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:26:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:26:13.420193  543705 net.go:648] Add success.
I0320 03:26:13.422953  543705 net.go:770] primary dev: ETH0
I0320 03:26:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:26:13.422977  543705 net.go:698] Add success.
I0320 03:26:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:26:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:26:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 03:26:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:26:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 03:26:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:26:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:26:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:26:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:26:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:26:22.377677  543705 disk_info.go:125] begin check local disk info of client
I0320 03:26:22.380231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:26:22.380237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f01c0 0xc0003f0200]
E0320 03:26:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:26:23.409774  543705 memory.go:184] no items to output this cycle
I0320 03:26:23.409775  543705 cpu.go:275] no items to output this cycle
E0320 03:26:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:26:33.409775  543705 memory.go:184] no items to output this cycle
I0320 03:26:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:26:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:26:43.409791  543705 memory.go:191] Add success.
I0320 03:26:43.409795  543705 cpu.go:282] Add success.
I0320 03:26:43.420044  543705 net.go:648] Add success.
I0320 03:26:43.422639  543705 net.go:770] primary dev: ETH0
I0320 03:26:43.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:26:43.422667  543705 net.go:698] Add success.
I0320 03:26:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:26:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:26:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:26:53.409809  543705 memory.go:184] no items to output this cycle
I0320 03:26:53.409824  543705 cpu.go:275] no items to output this cycle
E0320 03:27:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:27:03.409804  543705 memory.go:184] no items to output this cycle
I0320 03:27:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 03:27:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:27:13.409812  543705 memory.go:191] Add success.
I0320 03:27:13.409820  543705 cpu.go:282] Add success.
W0320 03:27:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:27:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:27:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:27:13.420050  543705 net.go:648] Add success.
I0320 03:27:13.422748  543705 net.go:770] primary dev: ETH0
I0320 03:27:13.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:27:13.422778  543705 net.go:698] Add success.
I0320 03:27:13.429085  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 03:27:13.453259  543705 event_worker.go:152] Polling the log file for events...
I0320 03:27:13.469075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"effa47fd-490c-428d-b64f-cf4df90cc415","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:27:13.469108  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 03:27:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:27:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0320 03:27:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:27:14.456654  543705 disk_worker.go:494] system disk:vda1
I0320 03:27:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:27:14.458197  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:27:14.458206  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:27:14.458210  543705 custom_config.go:64] query custom config with name: gpu
E0320 03:27:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:27:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:27:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:27:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:27:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:27:16.472337  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:27:22.381676  543705 disk_info.go:125] begin check local disk info of client
I0320 03:27:22.384100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:27:22.384107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee140 0xc0003ee180]
E0320 03:27:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:27:23.409794  543705 memory.go:184] no items to output this cycle
I0320 03:27:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:27:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:27:33.409784  543705 memory.go:184] no items to output this cycle
I0320 03:27:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 03:27:38.089137  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:27:38.089144  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:27:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:27:43.410617  543705 memory.go:191] Add success.
I0320 03:27:43.409805  543705 cpu.go:282] Add success.
I0320 03:27:43.420350  543705 net.go:648] Add success.
I0320 03:27:43.423150  543705 net.go:770] primary dev: ETH0
I0320 03:27:43.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:27:43.423179  543705 net.go:698] Add success.
I0320 03:27:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:27:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:27:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:27:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:27:53.409809  543705 memory.go:184] no items to output this cycle
I0320 03:27:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 03:28:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:28:03.409776  543705 memory.go:184] no items to output this cycle
I0320 03:28:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 03:28:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:28:13.409800  543705 memory.go:191] Add success.
I0320 03:28:13.409805  543705 cpu.go:282] Add success.
W0320 03:28:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:28:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:28:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:28:13.420133  543705 net.go:648] Add success.
I0320 03:28:13.422853  543705 net.go:770] primary dev: ETH0
I0320 03:28:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:28:13.422884  543705 net.go:698] Add success.
I0320 03:28:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:28:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:28:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 03:28:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:28:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 03:28:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:28:15.456021  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:28:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:28:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:28:22.385674  543705 disk_info.go:125] begin check local disk info of client
I0320 03:28:22.388120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:28:22.388126  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029ac40 0xc00029ac80]
E0320 03:28:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:28:23.409791  543705 memory.go:184] no items to output this cycle
I0320 03:28:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 03:28:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:28:33.409774  543705 memory.go:184] no items to output this cycle
I0320 03:28:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 03:28:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:28:43.409815  543705 memory.go:191] Add success.
I0320 03:28:43.409824  543705 cpu.go:282] Add success.
I0320 03:28:43.419888  543705 net.go:648] Add success.
I0320 03:28:43.422678  543705 net.go:770] primary dev: ETH0
I0320 03:28:43.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:28:43.422704  543705 net.go:698] Add success.
I0320 03:28:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:28:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:28:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:28:53.409803  543705 cpu.go:275] no items to output this cycle
I0320 03:28:53.409817  543705 memory.go:184] no items to output this cycle
E0320 03:29:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:29:03.409772  543705 memory.go:184] no items to output this cycle
I0320 03:29:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:29:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:29:13.409817  543705 memory.go:191] Add success.
I0320 03:29:13.409826  543705 cpu.go:282] Add success.
W0320 03:29:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:29:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:29:13.420160  543705 net.go:648] Add success.
I0320 03:29:13.422774  543705 net.go:770] primary dev: ETH0
I0320 03:29:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:29:13.422802  543705 net.go:698] Add success.
I0320 03:29:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:29:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:29:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0320 03:29:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:29:14.456673  543705 disk_worker.go:494] system disk:vda1
I0320 03:29:14.456708  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:29:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:29:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:29:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:29:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:29:16.472432  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:29:22.389667  543705 disk_info.go:125] begin check local disk info of client
I0320 03:29:22.392080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:29:22.392086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fb9c0 0xc0004fba00]
E0320 03:29:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:29:23.409790  543705 memory.go:184] no items to output this cycle
I0320 03:29:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 03:29:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:29:33.409779  543705 memory.go:184] no items to output this cycle
I0320 03:29:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 03:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:29:43.409783  543705 memory.go:191] Add success.
I0320 03:29:43.409807  543705 cpu.go:282] Add success.
I0320 03:29:43.419914  543705 net.go:648] Add success.
I0320 03:29:43.422701  543705 net.go:770] primary dev: ETH0
I0320 03:29:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:29:43.422730  543705 net.go:698] Add success.
I0320 03:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:29:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:29:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:29:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:29:53.409768  543705 memory.go:184] no items to output this cycle
I0320 03:29:53.409859  543705 cpu.go:275] no items to output this cycle
E0320 03:30:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:30:03.409785  543705 memory.go:184] no items to output this cycle
I0320 03:30:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 03:30:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:30:13.409784  543705 memory.go:191] Add success.
I0320 03:30:13.409786  543705 cpu.go:282] Add success.
W0320 03:30:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:30:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:30:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:30:13.420081  543705 net.go:648] Add success.
I0320 03:30:13.422770  543705 net.go:770] primary dev: ETH0
I0320 03:30:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:30:13.422797  543705 net.go:698] Add success.
I0320 03:30:13.481043  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a514af7d-aebc-49db-9716-f864709e7a39","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:30:13.481075  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:30:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:30:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:30:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 03:30:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:30:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 03:30:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:30:15.455602  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:30:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:30:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:30:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:30:22.393677  543705 disk_info.go:125] begin check local disk info of client
I0320 03:30:22.396160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:30:22.396166  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a480 0xc00032a4c0]
E0320 03:30:23.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:30:23.409927  543705 memory.go:184] no items to output this cycle
I0320 03:30:23.409906  543705 cpu.go:275] no items to output this cycle
E0320 03:30:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:30:33.409782  543705 memory.go:184] no items to output this cycle
I0320 03:30:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 03:30:38.089734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:30:38.089741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:30:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:30:43.410642  543705 memory.go:191] Add success.
I0320 03:30:43.409828  543705 cpu.go:282] Add success.
I0320 03:30:43.420323  543705 net.go:648] Add success.
I0320 03:30:43.422771  543705 net.go:770] primary dev: ETH0
I0320 03:30:43.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:30:43.422796  543705 net.go:698] Add success.
I0320 03:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:30:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:30:53.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:30:53.409756  543705 memory.go:184] no items to output this cycle
I0320 03:30:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:31:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:31:03.409786  543705 memory.go:184] no items to output this cycle
I0320 03:31:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 03:31:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:31:13.409788  543705 memory.go:191] Add success.
I0320 03:31:13.409790  543705 cpu.go:282] Add success.
W0320 03:31:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:31:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:31:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:31:13.420056  543705 net.go:648] Add success.
I0320 03:31:13.422688  543705 net.go:770] primary dev: ETH0
I0320 03:31:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:31:13.422717  543705 net.go:698] Add success.
I0320 03:31:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:31:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:31:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 03:31:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:31:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 03:31:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:31:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:31:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:31:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:31:22.397672  543705 disk_info.go:125] begin check local disk info of client
I0320 03:31:22.400075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:31:22.400082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002737c0 0xc000273800]
E0320 03:31:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:31:23.409798  543705 memory.go:184] no items to output this cycle
I0320 03:31:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 03:31:33.409886  543705 cpu.go:275] no items to output this cycle
E0320 03:31:33.410025  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:31:33.410037  543705 memory.go:184] no items to output this cycle
E0320 03:31:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:31:43.409791  543705 memory.go:191] Add success.
I0320 03:31:43.409800  543705 cpu.go:282] Add success.
I0320 03:31:43.419917  543705 net.go:648] Add success.
I0320 03:31:43.422938  543705 net.go:770] primary dev: ETH0
I0320 03:31:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:31:43.422964  543705 net.go:698] Add success.
I0320 03:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:31:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:31:53.409793  543705 memory.go:184] no items to output this cycle
I0320 03:31:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 03:32:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:32:03.409781  543705 memory.go:184] no items to output this cycle
I0320 03:32:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:32:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:32:13.409789  543705 memory.go:191] Add success.
I0320 03:32:13.409790  543705 cpu.go:282] Add success.
W0320 03:32:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:32:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:32:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:32:13.420332  543705 net.go:648] Add success.
I0320 03:32:13.423164  543705 net.go:770] primary dev: ETH0
I0320 03:32:13.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:32:13.423189  543705 net.go:698] Add success.
W0320 03:32:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:32:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 03:32:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:32:14.455865  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:32:14.455873  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:32:14.455880  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:32:14.456587  543705 disk_worker.go:494] system disk:vda1
I0320 03:32:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:32:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:32:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:32:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:32:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:32:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:32:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:32:16.472306  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:32:22.402120  543705 disk_info.go:125] begin check local disk info of client
I0320 03:32:22.404696  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:32:22.404703  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315440 0xc000315480]
E0320 03:32:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:32:23.409759  543705 memory.go:184] no items to output this cycle
I0320 03:32:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:32:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:32:33.409789  543705 memory.go:184] no items to output this cycle
I0320 03:32:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:32:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:32:43.409791  543705 memory.go:191] Add success.
I0320 03:32:43.409793  543705 cpu.go:282] Add success.
I0320 03:32:43.419967  543705 net.go:648] Add success.
I0320 03:32:43.422512  543705 net.go:770] primary dev: ETH0
I0320 03:32:43.422525  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:32:43.422537  543705 net.go:698] Add success.
I0320 03:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:32:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:32:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:32:53.409764  543705 memory.go:184] no items to output this cycle
I0320 03:32:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 03:33:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:33:03.409786  543705 memory.go:184] no items to output this cycle
I0320 03:33:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 03:33:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:33:13.409784  543705 memory.go:191] Add success.
I0320 03:33:13.409806  543705 cpu.go:282] Add success.
W0320 03:33:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:33:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:33:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:33:13.420210  543705 net.go:648] Add success.
I0320 03:33:13.422732  543705 net.go:770] primary dev: ETH0
I0320 03:33:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:33:13.422756  543705 net.go:698] Add success.
I0320 03:33:13.939245  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7bce0dc-1a7f-42e9-918f-8ee007a2507c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:33:13.939279  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:33:14.454889  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:33:14.454901  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:33:14.454978  543705 disk_worker.go:708] disk space is not compliant
W0320 03:33:14.454982  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:33:14.457455  543705 disk_worker.go:494] system disk:vda1
I0320 03:33:14.457498  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:33:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:33:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:33:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:33:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:33:16.472560  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:33:22.405674  543705 disk_info.go:125] begin check local disk info of client
I0320 03:33:22.408219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:33:22.408227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265f40 0xc0003fe000]
E0320 03:33:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:33:23.409785  543705 memory.go:184] no items to output this cycle
I0320 03:33:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:33:33.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:33:33.409914  543705 cpu.go:275] no items to output this cycle
I0320 03:33:33.409932  543705 memory.go:184] no items to output this cycle
I0320 03:33:38.089875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:33:38.089882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:33:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:33:43.410662  543705 memory.go:191] Add success.
I0320 03:33:43.409798  543705 cpu.go:282] Add success.
I0320 03:33:43.420353  543705 net.go:648] Add success.
I0320 03:33:43.422941  543705 net.go:770] primary dev: ETH0
I0320 03:33:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:33:43.422966  543705 net.go:698] Add success.
I0320 03:33:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:33:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:33:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:33:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:33:53.409783  543705 memory.go:184] no items to output this cycle
I0320 03:33:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 03:34:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:34:03.409809  543705 memory.go:184] no items to output this cycle
I0320 03:34:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 03:34:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:34:13.409812  543705 memory.go:191] Add success.
I0320 03:34:13.409819  543705 cpu.go:282] Add success.
W0320 03:34:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:34:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:34:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:34:13.420107  543705 net.go:648] Add success.
I0320 03:34:13.422991  543705 net.go:770] primary dev: ETH0
I0320 03:34:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:34:13.423029  543705 net.go:698] Add success.
I0320 03:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:34:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:34:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 03:34:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:34:14.456482  543705 disk_worker.go:494] system disk:vda1
I0320 03:34:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:34:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:34:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:34:22.409676  543705 disk_info.go:125] begin check local disk info of client
I0320 03:34:22.412184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:34:22.412191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289e00 0xc000289e40]
E0320 03:34:23.409734  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:34:23.409835  543705 memory.go:184] no items to output this cycle
I0320 03:34:23.409924  543705 cpu.go:275] no items to output this cycle
E0320 03:34:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:34:33.409768  543705 memory.go:184] no items to output this cycle
I0320 03:34:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 03:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:34:43.409792  543705 memory.go:191] Add success.
I0320 03:34:43.409801  543705 cpu.go:282] Add success.
I0320 03:34:43.420001  543705 net.go:648] Add success.
I0320 03:34:43.422676  543705 net.go:770] primary dev: ETH0
I0320 03:34:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:34:43.422702  543705 net.go:698] Add success.
I0320 03:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:34:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:34:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:34:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:34:53.409781  543705 memory.go:184] no items to output this cycle
I0320 03:34:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 03:35:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:35:03.409805  543705 memory.go:184] no items to output this cycle
I0320 03:35:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 03:35:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:35:13.409812  543705 memory.go:191] Add success.
I0320 03:35:13.409823  543705 cpu.go:282] Add success.
W0320 03:35:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:35:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:35:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:35:13.420065  543705 net.go:648] Add success.
I0320 03:35:13.422713  543705 net.go:770] primary dev: ETH0
I0320 03:35:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:35:13.422738  543705 net.go:698] Add success.
I0320 03:35:14.453946  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:35:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:35:14.455341  543705 disk_worker.go:708] disk space is not compliant
W0320 03:35:14.455346  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:35:14.457229  543705 disk_worker.go:494] system disk:vda1
I0320 03:35:14.457266  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:35:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:35:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:35:16.472447  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:35:22.412794  543705 disk_info.go:125] begin check local disk info of client
I0320 03:35:22.415349  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:35:22.415356  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474180 0xc0004741c0]
E0320 03:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:35:23.409875  543705 memory.go:184] no items to output this cycle
I0320 03:35:23.409907  543705 cpu.go:275] no items to output this cycle
E0320 03:35:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:35:33.409799  543705 memory.go:184] no items to output this cycle
I0320 03:35:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 03:35:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:35:43.409795  543705 memory.go:191] Add success.
I0320 03:35:43.409808  543705 cpu.go:282] Add success.
I0320 03:35:43.420005  543705 net.go:648] Add success.
I0320 03:35:43.422574  543705 net.go:770] primary dev: ETH0
I0320 03:35:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:35:43.422603  543705 net.go:698] Add success.
I0320 03:35:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:35:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:35:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:35:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:35:53.409779  543705 memory.go:184] no items to output this cycle
I0320 03:35:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 03:36:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:36:03.409782  543705 memory.go:184] no items to output this cycle
I0320 03:36:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 03:36:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:36:13.409802  543705 memory.go:191] Add success.
I0320 03:36:13.409804  543705 cpu.go:282] Add success.
W0320 03:36:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:36:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:36:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:36:13.420068  543705 net.go:648] Add success.
I0320 03:36:13.422714  543705 net.go:770] primary dev: ETH0
I0320 03:36:13.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:36:13.422744  543705 net.go:698] Add success.
I0320 03:36:13.505534  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3691879c-60ca-4b7b-8321-76e542d85a86","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:36:13.505566  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:36:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:36:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:36:14.455232  543705 disk_worker.go:708] disk space is not compliant
W0320 03:36:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:36:14.456721  543705 disk_worker.go:494] system disk:vda1
I0320 03:36:14.456750  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:36:15.456020  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:36:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:36:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:36:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:36:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:36:22.415800  543705 disk_info.go:125] begin check local disk info of client
I0320 03:36:22.418374  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:36:22.418381  543705 disk_info.go:196] parse disk info done, disk is : [0xc000564080 0xc0005640c0]
E0320 03:36:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:36:23.409764  543705 memory.go:184] no items to output this cycle
I0320 03:36:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:36:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:36:33.409780  543705 memory.go:184] no items to output this cycle
I0320 03:36:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 03:36:38.093158  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:36:38.093164  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:36:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:36:43.410673  543705 memory.go:191] Add success.
I0320 03:36:43.409809  543705 cpu.go:282] Add success.
I0320 03:36:43.420455  543705 net.go:648] Add success.
I0320 03:36:43.423164  543705 net.go:770] primary dev: ETH0
I0320 03:36:43.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:36:43.423195  543705 net.go:698] Add success.
I0320 03:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:36:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:36:53.409799  543705 memory.go:184] no items to output this cycle
I0320 03:36:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 03:37:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:37:03.409791  543705 memory.go:184] no items to output this cycle
I0320 03:37:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 03:37:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:37:13.409774  543705 memory.go:191] Add success.
W0320 03:37:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:37:13.409798  543705 cpu.go:282] Add success.
W0320 03:37:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:37:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:37:13.420154  543705 net.go:648] Add success.
I0320 03:37:13.423022  543705 net.go:770] primary dev: ETH0
I0320 03:37:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:37:13.423051  543705 net.go:698] Add success.
I0320 03:37:13.453593  543705 event_worker.go:152] Polling the log file for events...
W0320 03:37:14.454429  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:37:14.454530  543705 disk_worker.go:708] disk space is not compliant
W0320 03:37:14.454535  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:37:14.454936  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:37:14.454945  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:37:14.454951  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:37:14.457081  543705 disk_worker.go:494] system disk:vda1
I0320 03:37:14.457125  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:37:15.457032  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:37:15.457198  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:37:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:37:16.457970  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:37:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:37:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:37:16.472472  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:37:22.418787  543705 disk_info.go:125] begin check local disk info of client
I0320 03:37:22.421339  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:37:22.421346  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037c280 0xc00037c2c0]
E0320 03:37:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:37:23.409791  543705 memory.go:184] no items to output this cycle
I0320 03:37:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 03:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:37:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 03:37:33.409793  543705 memory.go:184] no items to output this cycle
E0320 03:37:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:37:43.409805  543705 memory.go:191] Add success.
I0320 03:37:43.409815  543705 cpu.go:282] Add success.
I0320 03:37:43.419854  543705 net.go:648] Add success.
I0320 03:37:43.422504  543705 net.go:770] primary dev: ETH0
I0320 03:37:43.422518  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:37:43.422531  543705 net.go:698] Add success.
I0320 03:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:37:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:37:53.409782  543705 memory.go:184] no items to output this cycle
I0320 03:37:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:38:03.409784  543705 memory.go:184] no items to output this cycle
I0320 03:38:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 03:38:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:38:13.409775  543705 memory.go:191] Add success.
W0320 03:38:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:38:13.409803  543705 cpu.go:282] Add success.
W0320 03:38:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:38:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:38:13.420105  543705 net.go:648] Add success.
I0320 03:38:13.422749  543705 net.go:770] primary dev: ETH0
I0320 03:38:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:38:13.422776  543705 net.go:698] Add success.
I0320 03:38:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:38:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:38:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 03:38:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:38:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 03:38:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:38:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:38:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:38:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:38:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:38:22.421799  543705 disk_info.go:125] begin check local disk info of client
I0320 03:38:22.424267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:38:22.424276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ce000 0xc0003ce040]
E0320 03:38:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:38:23.409783  543705 memory.go:184] no items to output this cycle
I0320 03:38:23.409808  543705 cpu.go:275] no items to output this cycle
E0320 03:38:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:38:33.409787  543705 memory.go:184] no items to output this cycle
I0320 03:38:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:38:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:38:43.409818  543705 memory.go:191] Add success.
I0320 03:38:43.409818  543705 cpu.go:282] Add success.
I0320 03:38:43.419895  543705 net.go:648] Add success.
I0320 03:38:43.422438  543705 net.go:770] primary dev: ETH0
I0320 03:38:43.422450  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:38:43.422462  543705 net.go:698] Add success.
I0320 03:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:38:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:38:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:38:53.409777  543705 cpu.go:275] no items to output this cycle
E0320 03:38:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:38:53.409795  543705 memory.go:184] no items to output this cycle
E0320 03:39:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:39:03.409778  543705 memory.go:184] no items to output this cycle
I0320 03:39:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 03:39:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:39:13.409787  543705 memory.go:191] Add success.
I0320 03:39:13.409805  543705 cpu.go:282] Add success.
W0320 03:39:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:39:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:39:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:39:13.420190  543705 net.go:648] Add success.
I0320 03:39:13.423368  543705 net.go:770] primary dev: ETH0
I0320 03:39:13.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:39:13.423397  543705 net.go:698] Add success.
I0320 03:39:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:39:14.455280  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:39:14.455294  543705 disk_worker.go:708] disk space is not compliant
W0320 03:39:14.455299  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:39:14.457530  543705 disk_worker.go:494] system disk:vda1
I0320 03:39:14.457577  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:39:14.744667  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ccfd4ee-99fc-4f0d-acbf-df045979eee2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:39:14.744714  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:39:15.455703  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:39:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:39:16.472448  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:39:22.424816  543705 disk_info.go:125] begin check local disk info of client
I0320 03:39:22.427369  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:39:22.427376  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273600 0xc000273640]
E0320 03:39:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:39:23.409790  543705 memory.go:184] no items to output this cycle
I0320 03:39:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 03:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:39:33.409782  543705 memory.go:184] no items to output this cycle
I0320 03:39:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 03:39:38.093732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:39:38.093739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:39:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:39:43.410648  543705 memory.go:191] Add success.
I0320 03:39:43.409824  543705 cpu.go:282] Add success.
I0320 03:39:43.420393  543705 net.go:648] Add success.
I0320 03:39:43.422834  543705 net.go:770] primary dev: ETH0
I0320 03:39:43.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:39:43.422859  543705 net.go:698] Add success.
I0320 03:39:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:39:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:39:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:39:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:39:53.409783  543705 memory.go:184] no items to output this cycle
I0320 03:39:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 03:40:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:40:03.409781  543705 memory.go:184] no items to output this cycle
I0320 03:40:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 03:40:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:40:13.409776  543705 memory.go:191] Add success.
W0320 03:40:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:40:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:40:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:40:13.409821  543705 cpu.go:282] Add success.
I0320 03:40:13.420049  543705 net.go:648] Add success.
I0320 03:40:13.422693  543705 net.go:770] primary dev: ETH0
I0320 03:40:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:40:13.422718  543705 net.go:698] Add success.
I0320 03:40:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:40:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:40:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 03:40:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:40:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 03:40:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:40:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:40:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:40:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:40:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:40:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:40:22.427832  543705 disk_info.go:125] begin check local disk info of client
I0320 03:40:22.430307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:40:22.430314  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0a80 0xc0002a0ac0]
E0320 03:40:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:40:23.409799  543705 memory.go:184] no items to output this cycle
I0320 03:40:23.409808  543705 cpu.go:275] no items to output this cycle
E0320 03:40:33.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:40:33.409945  543705 cpu.go:275] no items to output this cycle
I0320 03:40:33.409948  543705 memory.go:184] no items to output this cycle
E0320 03:40:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:40:43.409810  543705 memory.go:191] Add success.
I0320 03:40:43.409822  543705 cpu.go:282] Add success.
I0320 03:40:43.419962  543705 net.go:648] Add success.
I0320 03:40:43.422752  543705 net.go:770] primary dev: ETH0
I0320 03:40:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:40:43.422776  543705 net.go:698] Add success.
I0320 03:40:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:40:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:40:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:40:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:40:53.409776  543705 memory.go:184] no items to output this cycle
I0320 03:40:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 03:41:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:41:03.409787  543705 memory.go:184] no items to output this cycle
I0320 03:41:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 03:41:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:41:13.409785  543705 memory.go:191] Add success.
I0320 03:41:13.409806  543705 cpu.go:282] Add success.
W0320 03:41:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:41:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:41:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:41:13.420036  543705 net.go:648] Add success.
I0320 03:41:13.422543  543705 net.go:770] primary dev: ETH0
I0320 03:41:13.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:41:13.422568  543705 net.go:698] Add success.
I0320 03:41:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:41:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:41:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 03:41:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:41:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 03:41:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:41:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:41:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:41:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:41:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:41:22.430841  543705 disk_info.go:125] begin check local disk info of client
I0320 03:41:22.433432  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:41:22.433439  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266340 0xc000266380]
E0320 03:41:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:41:23.409805  543705 memory.go:184] no items to output this cycle
I0320 03:41:23.409812  543705 cpu.go:275] no items to output this cycle
E0320 03:41:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:41:33.409779  543705 memory.go:184] no items to output this cycle
I0320 03:41:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:41:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:41:43.409789  543705 memory.go:191] Add success.
I0320 03:41:43.409817  543705 cpu.go:282] Add success.
I0320 03:41:43.419968  543705 net.go:648] Add success.
I0320 03:41:43.422485  543705 net.go:770] primary dev: ETH0
I0320 03:41:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:41:43.422510  543705 net.go:698] Add success.
I0320 03:41:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:41:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:41:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:41:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:41:53.409770  543705 memory.go:184] no items to output this cycle
I0320 03:41:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:42:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:42:03.409776  543705 memory.go:184] no items to output this cycle
I0320 03:42:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:42:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:42:13.409777  543705 memory.go:191] Add success.
W0320 03:42:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:42:13.409807  543705 cpu.go:282] Add success.
W0320 03:42:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:42:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:42:13.420247  543705 net.go:648] Add success.
I0320 03:42:13.422885  543705 net.go:770] primary dev: ETH0
I0320 03:42:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:42:13.422911  543705 net.go:698] Add success.
I0320 03:42:13.659637  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2349ca2-3528-4b39-856a-e1a93ce7434a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:42:13.659671  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 03:42:14.454164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:42:14.454227  543705 disk_worker.go:708] disk space is not compliant
W0320 03:42:14.454230  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:42:14.456076  543705 disk_worker.go:494] system disk:vda1
E0320 03:42:14.456101  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0320 03:42:14.456106  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:42:14.456110  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:42:14.456115  543705 custom_config.go:64] query custom config with name: gpu
E0320 03:42:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:42:15.456884  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:42:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:42:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:42:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:42:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:42:16.472322  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:42:22.433869  543705 disk_info.go:125] begin check local disk info of client
I0320 03:42:22.436285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:42:22.436292  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468b40 0xc000468b80]
E0320 03:42:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:42:23.409792  543705 memory.go:184] no items to output this cycle
I0320 03:42:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 03:42:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:42:33.409763  543705 memory.go:184] no items to output this cycle
I0320 03:42:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 03:42:38.093883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:42:38.093888  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:42:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:42:43.410820  543705 memory.go:191] Add success.
I0320 03:42:43.409825  543705 cpu.go:282] Add success.
I0320 03:42:43.420567  543705 net.go:648] Add success.
I0320 03:42:43.423282  543705 net.go:770] primary dev: ETH0
I0320 03:42:43.423301  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:42:43.423315  543705 net.go:698] Add success.
I0320 03:42:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:42:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:42:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:42:53.409763  543705 memory.go:184] no items to output this cycle
I0320 03:42:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 03:43:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:43:03.409807  543705 memory.go:184] no items to output this cycle
I0320 03:43:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 03:43:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:43:13.409813  543705 memory.go:191] Add success.
I0320 03:43:13.409824  543705 cpu.go:282] Add success.
W0320 03:43:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:43:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:43:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:43:13.420158  543705 net.go:648] Add success.
I0320 03:43:13.422637  543705 net.go:770] primary dev: ETH0
I0320 03:43:13.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:43:13.422662  543705 net.go:698] Add success.
I0320 03:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:43:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:43:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 03:43:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:43:14.456511  543705 disk_worker.go:494] system disk:vda1
I0320 03:43:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:43:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:43:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:43:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:43:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:43:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:43:22.436879  543705 disk_info.go:125] begin check local disk info of client
I0320 03:43:22.439370  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:43:22.439376  543705 disk_info.go:196] parse disk info done, disk is : [0xc000299340 0xc000299380]
E0320 03:43:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:43:23.409782  543705 memory.go:184] no items to output this cycle
I0320 03:43:23.409797  543705 cpu.go:275] no items to output this cycle
E0320 03:43:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:43:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 03:43:33.409791  543705 memory.go:184] no items to output this cycle
E0320 03:43:43.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:43:43.409926  543705 memory.go:191] Add success.
I0320 03:43:43.409980  543705 cpu.go:282] Add success.
I0320 03:43:43.419731  543705 net.go:648] Add success.
I0320 03:43:43.422418  543705 net.go:770] primary dev: ETH0
I0320 03:43:43.422435  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:43:43.422447  543705 net.go:698] Add success.
I0320 03:43:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:43:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:43:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:43:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:43:53.409776  543705 memory.go:184] no items to output this cycle
I0320 03:43:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 03:44:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:44:03.409784  543705 memory.go:184] no items to output this cycle
I0320 03:44:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 03:44:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:44:13.409793  543705 memory.go:191] Add success.
I0320 03:44:13.409793  543705 cpu.go:282] Add success.
W0320 03:44:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:44:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:44:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:44:13.420142  543705 net.go:648] Add success.
I0320 03:44:13.423231  543705 net.go:770] primary dev: ETH0
I0320 03:44:13.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:44:13.423263  543705 net.go:698] Add success.
I0320 03:44:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:44:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:44:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 03:44:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:44:14.456494  543705 disk_worker.go:494] system disk:vda1
I0320 03:44:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:44:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:44:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:44:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:44:22.439895  543705 disk_info.go:125] begin check local disk info of client
I0320 03:44:22.442340  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:44:22.442347  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf500 0xc0002bf540]
E0320 03:44:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:44:23.409781  543705 memory.go:184] no items to output this cycle
I0320 03:44:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:44:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:44:33.409783  543705 memory.go:184] no items to output this cycle
I0320 03:44:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 03:44:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:44:43.409785  543705 memory.go:191] Add success.
I0320 03:44:43.409816  543705 cpu.go:282] Add success.
I0320 03:44:43.419977  543705 net.go:648] Add success.
I0320 03:44:43.422694  543705 net.go:770] primary dev: ETH0
I0320 03:44:43.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:44:43.422718  543705 net.go:698] Add success.
I0320 03:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:44:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:44:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:44:53.409764  543705 memory.go:184] no items to output this cycle
I0320 03:44:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:45:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:45:03.409789  543705 memory.go:184] no items to output this cycle
I0320 03:45:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 03:45:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:45:13.409778  543705 memory.go:191] Add success.
W0320 03:45:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:45:13.409809  543705 cpu.go:282] Add success.
W0320 03:45:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:45:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:45:13.420044  543705 net.go:648] Add success.
I0320 03:45:13.423028  543705 net.go:770] primary dev: ETH0
I0320 03:45:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:45:13.423052  543705 net.go:698] Add success.
I0320 03:45:13.553667  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2ad8fb9-fc15-418a-9263-82a3f7000216","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:45:13.553703  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:45:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:45:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:45:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 03:45:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:45:14.456674  543705 disk_worker.go:494] system disk:vda1
I0320 03:45:14.456705  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:45:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:45:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:45:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:45:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:45:16.472453  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:45:22.442906  543705 disk_info.go:125] begin check local disk info of client
I0320 03:45:22.445413  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:45:22.445419  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376e80 0xc000376ec0]
E0320 03:45:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:45:23.409787  543705 memory.go:184] no items to output this cycle
I0320 03:45:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:45:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:45:33.409768  543705 memory.go:184] no items to output this cycle
I0320 03:45:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 03:45:38.097174  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:45:38.097180  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:45:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:45:43.410632  543705 memory.go:191] Add success.
I0320 03:45:43.409810  543705 cpu.go:282] Add success.
I0320 03:45:43.420347  543705 net.go:648] Add success.
I0320 03:45:43.422935  543705 net.go:770] primary dev: ETH0
I0320 03:45:43.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:45:43.422961  543705 net.go:698] Add success.
I0320 03:45:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:45:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:45:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:45:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:45:53.409772  543705 memory.go:184] no items to output this cycle
I0320 03:45:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:46:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:46:03.409804  543705 memory.go:184] no items to output this cycle
I0320 03:46:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 03:46:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:46:13.409785  543705 memory.go:191] Add success.
I0320 03:46:13.409786  543705 cpu.go:282] Add success.
W0320 03:46:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:46:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:46:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:46:13.420075  543705 net.go:648] Add success.
I0320 03:46:13.422871  543705 net.go:770] primary dev: ETH0
I0320 03:46:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:46:13.422896  543705 net.go:698] Add success.
I0320 03:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:46:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:46:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 03:46:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:46:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 03:46:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:46:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:46:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:46:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:46:16.472406  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:46:22.445924  543705 disk_info.go:125] begin check local disk info of client
I0320 03:46:22.448485  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:46:22.448492  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0320 03:46:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:46:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 03:46:23.409781  543705 memory.go:184] no items to output this cycle
E0320 03:46:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:46:33.409784  543705 memory.go:184] no items to output this cycle
I0320 03:46:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:46:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:46:43.409781  543705 memory.go:191] Add success.
I0320 03:46:43.409802  543705 cpu.go:282] Add success.
I0320 03:46:43.420033  543705 net.go:648] Add success.
I0320 03:46:43.422743  543705 net.go:770] primary dev: ETH0
I0320 03:46:43.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:46:43.422773  543705 net.go:698] Add success.
I0320 03:46:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:46:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:46:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:46:53.409767  543705 memory.go:184] no items to output this cycle
I0320 03:46:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:47:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:47:03.409781  543705 memory.go:184] no items to output this cycle
I0320 03:47:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:47:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:47:13.409810  543705 memory.go:191] Add success.
I0320 03:47:13.409814  543705 cpu.go:282] Add success.
W0320 03:47:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:47:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:47:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:47:13.420244  543705 net.go:648] Add success.
I0320 03:47:13.422850  543705 net.go:770] primary dev: ETH0
I0320 03:47:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:47:13.422877  543705 net.go:698] Add success.
I0320 03:47:13.453411  543705 event_worker.go:152] Polling the log file for events...
W0320 03:47:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:47:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 03:47:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:47:14.456783  543705 disk_worker.go:494] system disk:vda1
I0320 03:47:14.456823  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:47:14.457147  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:47:14.457154  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:47:14.457159  543705 custom_config.go:64] query custom config with name: gpu
E0320 03:47:15.456976  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:47:15.456991  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:47:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:47:16.457998  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:47:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:47:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:47:16.472527  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:47:22.448939  543705 disk_info.go:125] begin check local disk info of client
I0320 03:47:22.451465  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:47:22.451471  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000edd80 0xc0000eddc0]
E0320 03:47:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:47:23.409792  543705 memory.go:184] no items to output this cycle
I0320 03:47:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 03:47:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:47:33.409773  543705 memory.go:184] no items to output this cycle
I0320 03:47:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:47:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:47:43.409787  543705 memory.go:191] Add success.
I0320 03:47:43.409798  543705 cpu.go:282] Add success.
I0320 03:47:43.419837  543705 net.go:648] Add success.
I0320 03:47:43.422465  543705 net.go:770] primary dev: ETH0
I0320 03:47:43.422479  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:47:43.422492  543705 net.go:698] Add success.
I0320 03:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:47:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:47:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:47:53.409795  543705 memory.go:184] no items to output this cycle
I0320 03:47:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 03:48:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:48:03.409786  543705 memory.go:184] no items to output this cycle
I0320 03:48:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:48:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:48:13.409788  543705 memory.go:191] Add success.
I0320 03:48:13.409791  543705 cpu.go:282] Add success.
W0320 03:48:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:48:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:48:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:48:13.420205  543705 net.go:648] Add success.
I0320 03:48:13.423162  543705 net.go:770] primary dev: ETH0
I0320 03:48:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:48:13.423188  543705 net.go:698] Add success.
I0320 03:48:13.469235  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db7c55b8-727e-4966-a8a7-36ecce4b6cb1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:48:13.469274  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:48:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:48:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:48:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 03:48:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:48:14.456540  543705 disk_worker.go:494] system disk:vda1
I0320 03:48:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:48:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:48:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:48:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:48:16.472429  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:48:22.451955  543705 disk_info.go:125] begin check local disk info of client
I0320 03:48:22.454462  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:48:22.454468  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a40 0xc0000c4a80]
E0320 03:48:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:48:23.409783  543705 memory.go:184] no items to output this cycle
I0320 03:48:23.409787  543705 cpu.go:275] no items to output this cycle
E0320 03:48:33.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:48:33.409899  543705 memory.go:184] no items to output this cycle
I0320 03:48:33.409902  543705 cpu.go:275] no items to output this cycle
I0320 03:48:38.097746  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:48:38.097753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:48:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:48:43.410605  543705 memory.go:191] Add success.
I0320 03:48:43.409823  543705 cpu.go:282] Add success.
I0320 03:48:43.420317  543705 net.go:648] Add success.
I0320 03:48:43.422816  543705 net.go:770] primary dev: ETH0
I0320 03:48:43.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:48:43.422842  543705 net.go:698] Add success.
I0320 03:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:48:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:48:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:48:53.409794  543705 memory.go:184] no items to output this cycle
I0320 03:48:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 03:49:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:49:03.409783  543705 memory.go:184] no items to output this cycle
I0320 03:49:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 03:49:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:49:13.409786  543705 memory.go:191] Add success.
I0320 03:49:13.409788  543705 cpu.go:282] Add success.
W0320 03:49:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:49:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:49:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:49:13.420072  543705 net.go:648] Add success.
I0320 03:49:13.422795  543705 net.go:770] primary dev: ETH0
I0320 03:49:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:49:13.422823  543705 net.go:698] Add success.
I0320 03:49:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:49:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:49:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 03:49:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:49:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 03:49:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:49:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:49:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:49:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:49:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:49:16.472483  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:49:22.454972  543705 disk_info.go:125] begin check local disk info of client
I0320 03:49:22.457583  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:49:22.457589  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003534c0 0xc000353500]
E0320 03:49:23.410300  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:49:23.410315  543705 memory.go:184] no items to output this cycle
I0320 03:49:23.410434  543705 cpu.go:275] no items to output this cycle
E0320 03:49:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:49:33.409777  543705 memory.go:184] no items to output this cycle
I0320 03:49:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:49:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:49:43.409787  543705 memory.go:191] Add success.
I0320 03:49:43.409802  543705 cpu.go:282] Add success.
I0320 03:49:43.420293  543705 net.go:648] Add success.
I0320 03:49:43.423552  543705 net.go:770] primary dev: ETH0
I0320 03:49:43.423566  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:49:43.423579  543705 net.go:698] Add success.
I0320 03:49:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:49:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:49:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:49:53.409794  543705 memory.go:184] no items to output this cycle
I0320 03:49:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 03:50:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:50:03.409772  543705 memory.go:184] no items to output this cycle
I0320 03:50:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 03:50:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:50:13.409789  543705 memory.go:191] Add success.
I0320 03:50:13.409793  543705 cpu.go:282] Add success.
W0320 03:50:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:50:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:50:13.420035  543705 net.go:648] Add success.
I0320 03:50:13.422834  543705 net.go:770] primary dev: ETH0
I0320 03:50:13.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:50:13.422859  543705 net.go:698] Add success.
I0320 03:50:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:50:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:50:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 03:50:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:50:14.456628  543705 disk_worker.go:494] system disk:vda1
I0320 03:50:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:50:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:50:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:50:16.472417  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:50:22.457974  543705 disk_info.go:125] begin check local disk info of client
I0320 03:50:22.460483  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:50:22.460488  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375c40 0xc000375c80]
E0320 03:50:23.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:50:23.409890  543705 memory.go:184] no items to output this cycle
I0320 03:50:23.410039  543705 cpu.go:275] no items to output this cycle
E0320 03:50:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:50:33.409797  543705 memory.go:184] no items to output this cycle
I0320 03:50:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 03:50:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:50:43.409795  543705 memory.go:191] Add success.
I0320 03:50:43.409796  543705 cpu.go:282] Add success.
I0320 03:50:43.419980  543705 net.go:648] Add success.
I0320 03:50:43.422850  543705 net.go:770] primary dev: ETH0
I0320 03:50:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:50:43.422880  543705 net.go:698] Add success.
I0320 03:50:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:50:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:50:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:50:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:50:53.409777  543705 memory.go:184] no items to output this cycle
I0320 03:50:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 03:51:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:51:03.409810  543705 memory.go:184] no items to output this cycle
I0320 03:51:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 03:51:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:51:13.409783  543705 memory.go:191] Add success.
W0320 03:51:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 03:51:13.409816  543705 cpu.go:282] Add success.
W0320 03:51:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:51:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:51:13.420127  543705 net.go:648] Add success.
I0320 03:51:13.423087  543705 net.go:770] primary dev: ETH0
I0320 03:51:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:51:13.423114  543705 net.go:698] Add success.
I0320 03:51:13.469244  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea6daa3a-7c2d-43a8-a516-575afaf869b9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:51:13.469286  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:51:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:51:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:51:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 03:51:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:51:14.456520  543705 disk_worker.go:494] system disk:vda1
I0320 03:51:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:51:15.456028  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:51:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:51:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:51:16.472471  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:51:22.461001  543705 disk_info.go:125] begin check local disk info of client
I0320 03:51:22.463578  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:51:22.463584  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374080 0xc0003740c0]
E0320 03:51:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:51:23.409789  543705 memory.go:184] no items to output this cycle
I0320 03:51:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:51:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:51:33.409795  543705 memory.go:184] no items to output this cycle
I0320 03:51:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 03:51:38.101193  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:51:38.101200  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:51:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:51:43.410784  543705 memory.go:191] Add success.
I0320 03:51:43.409806  543705 cpu.go:282] Add success.
I0320 03:51:43.420561  543705 net.go:648] Add success.
I0320 03:51:43.423206  543705 net.go:770] primary dev: ETH0
I0320 03:51:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:51:43.423231  543705 net.go:698] Add success.
I0320 03:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:51:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:51:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:51:53.409778  543705 memory.go:184] no items to output this cycle
I0320 03:51:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 03:52:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:52:03.409784  543705 memory.go:184] no items to output this cycle
I0320 03:52:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 03:52:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:52:13.409819  543705 memory.go:191] Add success.
I0320 03:52:13.409824  543705 cpu.go:282] Add success.
W0320 03:52:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:52:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:52:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:52:13.420149  543705 net.go:648] Add success.
I0320 03:52:13.422813  543705 net.go:770] primary dev: ETH0
I0320 03:52:13.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:52:13.422850  543705 net.go:698] Add success.
W0320 03:52:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:52:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 03:52:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:52:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:52:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:52:14.455891  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:52:14.456645  543705 disk_worker.go:494] system disk:vda1
I0320 03:52:14.456693  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:52:15.456779  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:52:15.456788  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:52:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:52:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:52:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:52:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:52:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:52:22.464007  543705 disk_info.go:125] begin check local disk info of client
I0320 03:52:22.466579  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:52:22.466587  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c43c0 0xc0000c4400]
E0320 03:52:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:52:23.409765  543705 memory.go:184] no items to output this cycle
I0320 03:52:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 03:52:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:52:33.409779  543705 memory.go:184] no items to output this cycle
I0320 03:52:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 03:52:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:52:43.409790  543705 memory.go:191] Add success.
I0320 03:52:43.409792  543705 cpu.go:282] Add success.
I0320 03:52:43.419928  543705 net.go:648] Add success.
I0320 03:52:43.422438  543705 net.go:770] primary dev: ETH0
I0320 03:52:43.422451  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:52:43.422463  543705 net.go:698] Add success.
I0320 03:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:52:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:52:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:52:53.409800  543705 memory.go:184] no items to output this cycle
I0320 03:52:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 03:53:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:53:03.409804  543705 memory.go:184] no items to output this cycle
I0320 03:53:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 03:53:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:53:13.409790  543705 memory.go:191] Add success.
I0320 03:53:13.409812  543705 cpu.go:282] Add success.
W0320 03:53:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:53:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:53:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:53:13.420103  543705 net.go:648] Add success.
I0320 03:53:13.423036  543705 net.go:770] primary dev: ETH0
I0320 03:53:13.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:53:13.423061  543705 net.go:698] Add success.
I0320 03:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:53:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:53:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 03:53:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:53:14.456609  543705 disk_worker.go:494] system disk:vda1
I0320 03:53:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:53:15.455987  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:53:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:53:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:53:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:53:16.472491  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:53:22.467033  543705 disk_info.go:125] begin check local disk info of client
I0320 03:53:22.469614  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:53:22.469621  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260000 0xc000260040]
E0320 03:53:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:53:23.409781  543705 memory.go:184] no items to output this cycle
I0320 03:53:23.409781  543705 cpu.go:275] no items to output this cycle
E0320 03:53:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:53:33.409807  543705 memory.go:184] no items to output this cycle
I0320 03:53:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 03:53:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:53:43.409813  543705 memory.go:191] Add success.
I0320 03:53:43.409829  543705 cpu.go:282] Add success.
I0320 03:53:43.419957  543705 net.go:648] Add success.
I0320 03:53:43.422874  543705 net.go:770] primary dev: ETH0
I0320 03:53:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:53:43.422898  543705 net.go:698] Add success.
I0320 03:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:53:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:53:53.409774  543705 memory.go:184] no items to output this cycle
I0320 03:53:53.409777  543705 cpu.go:275] no items to output this cycle
E0320 03:54:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:54:03.409805  543705 memory.go:184] no items to output this cycle
I0320 03:54:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 03:54:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:54:13.409795  543705 memory.go:191] Add success.
I0320 03:54:13.409798  543705 cpu.go:282] Add success.
W0320 03:54:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:54:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:54:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:54:13.420061  543705 net.go:648] Add success.
I0320 03:54:13.422993  543705 net.go:770] primary dev: ETH0
I0320 03:54:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:54:13.423024  543705 net.go:698] Add success.
I0320 03:54:13.464394  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f0a17ad1-1525-49c8-af71-92d014daa3ca","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:54:13.464427  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 03:54:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:54:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:54:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 03:54:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:54:14.456543  543705 disk_worker.go:494] system disk:vda1
I0320 03:54:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:54:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:54:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:54:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:54:22.470036  543705 disk_info.go:125] begin check local disk info of client
I0320 03:54:22.472482  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:54:22.472488  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364300 0xc000364340]
E0320 03:54:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:54:23.409787  543705 memory.go:184] no items to output this cycle
I0320 03:54:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:54:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:54:33.409807  543705 memory.go:184] no items to output this cycle
I0320 03:54:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 03:54:38.101733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:54:38.101740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:54:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:54:43.410685  543705 memory.go:191] Add success.
I0320 03:54:43.409801  543705 cpu.go:282] Add success.
I0320 03:54:43.420432  543705 net.go:648] Add success.
I0320 03:54:43.422929  543705 net.go:770] primary dev: ETH0
I0320 03:54:43.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:54:43.422956  543705 net.go:698] Add success.
I0320 03:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:54:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:54:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:54:53.410403  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:54:53.410425  543705 memory.go:184] no items to output this cycle
I0320 03:54:53.410431  543705 cpu.go:275] no items to output this cycle
E0320 03:55:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:55:03.409806  543705 memory.go:184] no items to output this cycle
I0320 03:55:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 03:55:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:55:13.409791  543705 memory.go:191] Add success.
I0320 03:55:13.409796  543705 cpu.go:282] Add success.
W0320 03:55:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:55:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:55:13.420044  543705 net.go:648] Add success.
I0320 03:55:13.422727  543705 net.go:770] primary dev: ETH0
I0320 03:55:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:55:13.422752  543705 net.go:698] Add success.
I0320 03:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:55:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:55:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 03:55:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:55:14.456623  543705 disk_worker.go:494] system disk:vda1
I0320 03:55:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:55:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:55:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:55:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:55:16.472460  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:55:22.473063  543705 disk_info.go:125] begin check local disk info of client
I0320 03:55:22.475612  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:55:22.475618  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a340 0xc00035a380]
E0320 03:55:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:55:23.409791  543705 memory.go:184] no items to output this cycle
I0320 03:55:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 03:55:33.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:55:33.409912  543705 cpu.go:275] no items to output this cycle
I0320 03:55:33.409928  543705 memory.go:184] no items to output this cycle
E0320 03:55:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:55:43.409780  543705 memory.go:191] Add success.
I0320 03:55:43.409813  543705 cpu.go:282] Add success.
I0320 03:55:43.419902  543705 net.go:648] Add success.
I0320 03:55:43.422764  543705 net.go:770] primary dev: ETH0
I0320 03:55:43.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:55:43.422789  543705 net.go:698] Add success.
I0320 03:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:55:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:55:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:55:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:55:53.409777  543705 cpu.go:275] no items to output this cycle
I0320 03:55:53.409788  543705 memory.go:184] no items to output this cycle
E0320 03:56:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:56:03.409795  543705 memory.go:184] no items to output this cycle
I0320 03:56:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:56:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:56:13.409795  543705 memory.go:191] Add success.
I0320 03:56:13.409800  543705 cpu.go:282] Add success.
W0320 03:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:56:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:56:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:56:13.420116  543705 net.go:648] Add success.
I0320 03:56:13.422834  543705 net.go:770] primary dev: ETH0
I0320 03:56:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:56:13.422863  543705 net.go:698] Add success.
I0320 03:56:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:56:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:56:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 03:56:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:56:14.456613  543705 disk_worker.go:494] system disk:vda1
I0320 03:56:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:56:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:56:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:56:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:56:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:56:22.476076  543705 disk_info.go:125] begin check local disk info of client
I0320 03:56:22.478635  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:56:22.478641  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
E0320 03:56:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:56:23.409801  543705 memory.go:184] no items to output this cycle
I0320 03:56:23.409812  543705 cpu.go:275] no items to output this cycle
E0320 03:56:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:56:33.409765  543705 memory.go:184] no items to output this cycle
I0320 03:56:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 03:56:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:56:43.409817  543705 memory.go:191] Add success.
I0320 03:56:43.409824  543705 cpu.go:282] Add success.
I0320 03:56:43.420003  543705 net.go:648] Add success.
I0320 03:56:43.423049  543705 net.go:770] primary dev: ETH0
I0320 03:56:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:56:43.423073  543705 net.go:698] Add success.
I0320 03:56:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:56:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:56:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:56:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:56:53.409771  543705 memory.go:184] no items to output this cycle
I0320 03:56:53.409776  543705 cpu.go:275] no items to output this cycle
E0320 03:57:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:57:03.409782  543705 memory.go:184] no items to output this cycle
I0320 03:57:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 03:57:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:57:13.409795  543705 cpu.go:282] Add success.
I0320 03:57:13.409800  543705 memory.go:191] Add success.
W0320 03:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:57:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:57:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:57:13.420194  543705 net.go:648] Add success.
I0320 03:57:13.429714  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 03:57:13.429789  543705 net.go:770] primary dev: ETH0
I0320 03:57:13.429802  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:57:13.429813  543705 net.go:698] Add success.
I0320 03:57:13.453463  543705 event_worker.go:152] Polling the log file for events...
I0320 03:57:13.470047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07003181-9047-4c9d-af5e-745ef109bae2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 03:57:13.470079  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 03:57:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:57:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 03:57:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0320 03:57:14.456135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 03:57:14.456144  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 03:57:14.456150  543705 custom_config.go:64] query custom config with name: gpu
I0320 03:57:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 03:57:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 03:57:15.456553  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 03:57:15.456568  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:57:16.458095  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 03:57:16.458123  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 03:57:16.458157  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:57:16.458178  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:57:16.472537  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:57:22.479085  543705 disk_info.go:125] begin check local disk info of client
I0320 03:57:22.481668  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:57:22.481675  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024d6c0 0xc00024d700]
E0320 03:57:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:57:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 03:57:23.409779  543705 memory.go:184] no items to output this cycle
E0320 03:57:33.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:57:33.409896  543705 memory.go:184] no items to output this cycle
I0320 03:57:33.409973  543705 cpu.go:275] no items to output this cycle
I0320 03:57:38.101886  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 03:57:38.101891  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 03:57:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:57:43.410579  543705 memory.go:191] Add success.
I0320 03:57:43.409801  543705 cpu.go:282] Add success.
I0320 03:57:43.420277  543705 net.go:648] Add success.
I0320 03:57:43.422864  543705 net.go:770] primary dev: ETH0
I0320 03:57:43.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:57:43.422893  543705 net.go:698] Add success.
I0320 03:57:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:57:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:57:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:57:53.409777  543705 memory.go:184] no items to output this cycle
I0320 03:57:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 03:58:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:58:03.409785  543705 memory.go:184] no items to output this cycle
I0320 03:58:03.409786  543705 cpu.go:275] no items to output this cycle
W0320 03:58:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:58:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:58:13.409736  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:58:13.409797  543705 cpu.go:282] Add success.
E0320 03:58:13.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:58:13.409864  543705 memory.go:191] Add success.
I0320 03:58:13.420056  543705 net.go:648] Add success.
I0320 03:58:13.422580  543705 net.go:770] primary dev: ETH0
I0320 03:58:13.422594  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:58:13.422607  543705 net.go:698] Add success.
I0320 03:58:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:58:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:58:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 03:58:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:58:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 03:58:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:58:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:58:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:58:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:58:22.481759  543705 disk_info.go:125] begin check local disk info of client
I0320 03:58:22.484244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:58:22.484254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000edf40 0xc0004fc000]
E0320 03:58:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:58:23.409769  543705 memory.go:184] no items to output this cycle
I0320 03:58:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 03:58:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:58:33.409912  543705 memory.go:184] no items to output this cycle
I0320 03:58:33.410114  543705 cpu.go:275] no items to output this cycle
E0320 03:58:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:58:43.409780  543705 memory.go:191] Add success.
I0320 03:58:43.409811  543705 cpu.go:282] Add success.
I0320 03:58:43.419851  543705 net.go:648] Add success.
I0320 03:58:43.422582  543705 net.go:770] primary dev: ETH0
I0320 03:58:43.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:58:43.422607  543705 net.go:698] Add success.
I0320 03:58:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:58:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:58:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:58:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:58:53.409768  543705 memory.go:184] no items to output this cycle
I0320 03:58:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 03:59:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:59:03.409780  543705 memory.go:184] no items to output this cycle
I0320 03:59:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 03:59:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:59:13.409796  543705 memory.go:191] Add success.
I0320 03:59:13.409797  543705 cpu.go:282] Add success.
W0320 03:59:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 03:59:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 03:59:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 03:59:13.420596  543705 net.go:648] Add success.
I0320 03:59:13.423611  543705 net.go:770] primary dev: ETH0
I0320 03:59:13.423625  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:59:13.423636  543705 net.go:698] Add success.
I0320 03:59:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 03:59:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 03:59:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 03:59:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 03:59:14.456630  543705 disk_worker.go:494] system disk:vda1
I0320 03:59:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 03:59:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 03:59:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:59:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:59:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0320 03:59:16.472417  543705 disk_local_worker.go:436] Get disk info: []
I0320 03:59:22.485117  543705 disk_info.go:125] begin check local disk info of client
I0320 03:59:22.487707  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 03:59:22.487714  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a39c0 0xc0002a3a00]
E0320 03:59:23.409735  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:59:23.409748  543705 memory.go:184] no items to output this cycle
I0320 03:59:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 03:59:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:59:33.409795  543705 memory.go:184] no items to output this cycle
I0320 03:59:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 03:59:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:59:43.409825  543705 memory.go:191] Add success.
I0320 03:59:43.409833  543705 cpu.go:282] Add success.
I0320 03:59:43.419976  543705 net.go:648] Add success.
I0320 03:59:43.423015  543705 net.go:770] primary dev: ETH0
I0320 03:59:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0320 03:59:43.423045  543705 net.go:698] Add success.
I0320 03:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 03:59:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 03:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 03:59:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 03:59:53.409777  543705 memory.go:184] no items to output this cycle
I0320 03:59:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 04:00:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:00:03.409787  543705 memory.go:184] no items to output this cycle
I0320 04:00:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 04:00:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:00:13.409782  543705 memory.go:191] Add success.
W0320 04:00:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 04:00:13.409810  543705 cpu.go:282] Add success.
W0320 04:00:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:00:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:00:13.420057  543705 net.go:648] Add success.
I0320 04:00:13.422726  543705 net.go:770] primary dev: ETH0
I0320 04:00:13.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:00:13.422756  543705 net.go:698] Add success.
I0320 04:00:13.468517  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1293f6e7-f7a0-4bd2-b118-fb0bdaaa9470","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:00:13.468558  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:00:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:00:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:00:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 04:00:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:00:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 04:00:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:00:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:00:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:00:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:00:22.487799  543705 disk_info.go:125] begin check local disk info of client
I0320 04:00:22.490355  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:00:22.490362  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf00 0xc0001abf40]
E0320 04:00:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:00:23.409759  543705 memory.go:184] no items to output this cycle
I0320 04:00:23.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:00:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:00:33.409770  543705 memory.go:184] no items to output this cycle
I0320 04:00:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 04:00:38.105221  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:00:38.105227  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:00:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:00:43.410704  543705 memory.go:191] Add success.
I0320 04:00:43.409803  543705 cpu.go:282] Add success.
I0320 04:00:43.420434  543705 net.go:648] Add success.
I0320 04:00:43.422966  543705 net.go:770] primary dev: ETH0
I0320 04:00:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:00:43.422992  543705 net.go:698] Add success.
I0320 04:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:00:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:00:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:00:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:00:53.409793  543705 memory.go:184] no items to output this cycle
I0320 04:00:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 04:01:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:01:03.409777  543705 memory.go:184] no items to output this cycle
I0320 04:01:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:01:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:01:13.409816  543705 memory.go:191] Add success.
I0320 04:01:13.409827  543705 cpu.go:282] Add success.
W0320 04:01:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:01:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:01:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:01:13.420163  543705 net.go:648] Add success.
I0320 04:01:13.422875  543705 net.go:770] primary dev: ETH0
I0320 04:01:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:01:13.422900  543705 net.go:698] Add success.
I0320 04:01:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:01:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:01:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 04:01:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:01:14.456611  543705 disk_worker.go:494] system disk:vda1
I0320 04:01:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:01:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:01:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:01:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:01:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:01:16.472458  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:01:22.491155  543705 disk_info.go:125] begin check local disk info of client
I0320 04:01:22.493621  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:01:22.493628  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266340 0xc000266380]
E0320 04:01:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:01:23.409792  543705 memory.go:184] no items to output this cycle
I0320 04:01:23.409807  543705 cpu.go:275] no items to output this cycle
E0320 04:01:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:01:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 04:01:33.409781  543705 memory.go:184] no items to output this cycle
E0320 04:01:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:01:43.409785  543705 memory.go:191] Add success.
I0320 04:01:43.409803  543705 cpu.go:282] Add success.
I0320 04:01:43.419956  543705 net.go:648] Add success.
I0320 04:01:43.422364  543705 net.go:770] primary dev: ETH0
I0320 04:01:43.422379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:01:43.422394  543705 net.go:698] Add success.
I0320 04:01:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:01:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:01:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:01:53.409787  543705 memory.go:184] no items to output this cycle
I0320 04:01:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:02:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:02:03.409817  543705 memory.go:184] no items to output this cycle
I0320 04:02:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 04:02:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:02:13.409816  543705 memory.go:191] Add success.
I0320 04:02:13.409820  543705 cpu.go:282] Add success.
W0320 04:02:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:02:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:02:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:02:13.420176  543705 net.go:648] Add success.
I0320 04:02:13.422806  543705 net.go:770] primary dev: ETH0
I0320 04:02:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:02:13.422832  543705 net.go:698] Add success.
W0320 04:02:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:02:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 04:02:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:02:14.456764  543705 disk_worker.go:494] system disk:vda1
I0320 04:02:14.456804  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:02:14.457136  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:02:14.457144  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:02:14.457149  543705 custom_config.go:64] query custom config with name: gpu
E0320 04:02:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:02:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:02:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:02:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:02:16.457957  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:02:16.457976  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:02:16.472323  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:02:22.494164  543705 disk_info.go:125] begin check local disk info of client
I0320 04:02:22.496602  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:02:22.496609  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253240 0xc000253280]
E0320 04:02:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:02:23.409781  543705 memory.go:184] no items to output this cycle
I0320 04:02:23.409805  543705 cpu.go:275] no items to output this cycle
E0320 04:02:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:02:33.409792  543705 memory.go:184] no items to output this cycle
I0320 04:02:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 04:02:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:02:43.409818  543705 memory.go:191] Add success.
I0320 04:02:43.409826  543705 cpu.go:282] Add success.
I0320 04:02:43.420077  543705 net.go:648] Add success.
I0320 04:02:43.423568  543705 net.go:770] primary dev: ETH0
I0320 04:02:43.423582  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:02:43.423596  543705 net.go:698] Add success.
I0320 04:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:02:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:02:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:02:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:02:53.409772  543705 cpu.go:275] no items to output this cycle
I0320 04:02:53.409781  543705 memory.go:184] no items to output this cycle
E0320 04:03:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:03:03.409806  543705 memory.go:184] no items to output this cycle
I0320 04:03:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 04:03:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:03:13.409776  543705 memory.go:191] Add success.
W0320 04:03:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 04:03:13.409805  543705 cpu.go:282] Add success.
W0320 04:03:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:03:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:03:13.420148  543705 net.go:648] Add success.
I0320 04:03:13.422878  543705 net.go:770] primary dev: ETH0
I0320 04:03:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:03:13.422904  543705 net.go:698] Add success.
I0320 04:03:13.464086  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b4b8a64a-5713-4e5f-8a85-312427ea552b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:03:13.464121  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:03:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:03:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:03:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 04:03:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:03:14.456500  543705 disk_worker.go:494] system disk:vda1
I0320 04:03:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:03:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:03:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:03:16.458106  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:03:16.472527  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:03:22.497177  543705 disk_info.go:125] begin check local disk info of client
I0320 04:03:22.499847  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:03:22.499853  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024c200 0xc00024c240]
E0320 04:03:23.409737  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:03:23.409752  543705 memory.go:184] no items to output this cycle
I0320 04:03:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:03:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:03:33.409784  543705 memory.go:184] no items to output this cycle
I0320 04:03:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 04:03:38.105733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:03:38.105740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:03:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:03:43.410583  543705 memory.go:191] Add success.
I0320 04:03:43.409804  543705 cpu.go:282] Add success.
I0320 04:03:43.420330  543705 net.go:648] Add success.
I0320 04:03:43.422772  543705 net.go:770] primary dev: ETH0
I0320 04:03:43.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:03:43.422799  543705 net.go:698] Add success.
I0320 04:03:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:03:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:03:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:03:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:03:53.409798  543705 memory.go:184] no items to output this cycle
I0320 04:03:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:04:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:04:03.409785  543705 memory.go:184] no items to output this cycle
I0320 04:04:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 04:04:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:04:13.409784  543705 memory.go:191] Add success.
I0320 04:04:13.409805  543705 cpu.go:282] Add success.
W0320 04:04:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:04:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:04:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:04:13.420131  543705 net.go:648] Add success.
I0320 04:04:13.422798  543705 net.go:770] primary dev: ETH0
I0320 04:04:13.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:04:13.422822  543705 net.go:698] Add success.
I0320 04:04:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:04:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:04:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 04:04:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:04:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 04:04:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:04:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:04:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:04:16.472464  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:04:22.499938  543705 disk_info.go:125] begin check local disk info of client
I0320 04:04:22.502448  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:04:22.502454  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab000 0xc0001ab040]
E0320 04:04:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:04:23.409763  543705 memory.go:184] no items to output this cycle
I0320 04:04:23.409774  543705 cpu.go:275] no items to output this cycle
E0320 04:04:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:04:33.409777  543705 memory.go:184] no items to output this cycle
I0320 04:04:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 04:04:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:04:43.409869  543705 memory.go:191] Add success.
I0320 04:04:43.409919  543705 cpu.go:282] Add success.
I0320 04:04:43.419708  543705 net.go:648] Add success.
I0320 04:04:43.422626  543705 net.go:770] primary dev: ETH0
I0320 04:04:43.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:04:43.422657  543705 net.go:698] Add success.
I0320 04:04:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:04:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:04:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:04:53.409792  543705 memory.go:184] no items to output this cycle
I0320 04:04:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 04:05:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:05:03.409793  543705 memory.go:184] no items to output this cycle
I0320 04:05:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 04:05:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:05:13.409797  543705 memory.go:191] Add success.
I0320 04:05:13.409797  543705 cpu.go:282] Add success.
W0320 04:05:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:05:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:05:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:05:13.420171  543705 net.go:648] Add success.
I0320 04:05:13.422756  543705 net.go:770] primary dev: ETH0
I0320 04:05:13.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:05:13.422783  543705 net.go:698] Add success.
I0320 04:05:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:05:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:05:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 04:05:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:05:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 04:05:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:05:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:05:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:05:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:05:16.472489  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:05:22.503202  543705 disk_info.go:125] begin check local disk info of client
I0320 04:05:22.505875  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:05:22.505881  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029bbc0 0xc00029bc00]
E0320 04:05:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:05:23.409772  543705 memory.go:184] no items to output this cycle
I0320 04:05:23.409781  543705 cpu.go:275] no items to output this cycle
E0320 04:05:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:05:33.409777  543705 memory.go:184] no items to output this cycle
I0320 04:05:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 04:05:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:05:43.409807  543705 memory.go:191] Add success.
I0320 04:05:43.409815  543705 cpu.go:282] Add success.
I0320 04:05:43.420118  543705 net.go:648] Add success.
I0320 04:05:43.422838  543705 net.go:770] primary dev: ETH0
I0320 04:05:43.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:05:43.422864  543705 net.go:698] Add success.
I0320 04:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:05:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:05:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:05:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:05:53.409795  543705 memory.go:184] no items to output this cycle
I0320 04:05:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 04:06:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:06:03.409810  543705 memory.go:184] no items to output this cycle
I0320 04:06:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 04:06:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:06:13.409791  543705 memory.go:191] Add success.
I0320 04:06:13.409803  543705 cpu.go:282] Add success.
W0320 04:06:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:06:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:06:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:06:13.420335  543705 net.go:648] Add success.
I0320 04:06:13.422917  543705 net.go:770] primary dev: ETH0
I0320 04:06:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:06:13.422946  543705 net.go:698] Add success.
I0320 04:06:13.482877  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"25840856-042d-4156-8681-82a6c384048a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:06:13.482910  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:06:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:06:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:06:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 04:06:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:06:14.456634  543705 disk_worker.go:494] system disk:vda1
I0320 04:06:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:06:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:06:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:06:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:06:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:06:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:06:22.505965  543705 disk_info.go:125] begin check local disk info of client
I0320 04:06:22.508528  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:06:22.508535  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac80 0xc00007acc0]
E0320 04:06:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:06:23.409763  543705 memory.go:184] no items to output this cycle
I0320 04:06:23.409796  543705 cpu.go:275] no items to output this cycle
E0320 04:06:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:06:33.409803  543705 memory.go:184] no items to output this cycle
I0320 04:06:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 04:06:38.109241  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:06:38.109247  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:06:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:06:43.410943  543705 memory.go:191] Add success.
I0320 04:06:43.409792  543705 cpu.go:282] Add success.
I0320 04:06:43.419736  543705 net.go:648] Add success.
I0320 04:06:43.422460  543705 net.go:770] primary dev: ETH0
I0320 04:06:43.422474  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:06:43.422486  543705 net.go:698] Add success.
I0320 04:06:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:06:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:06:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:06:53.410651  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:06:53.410666  543705 memory.go:184] no items to output this cycle
I0320 04:06:53.410672  543705 cpu.go:275] no items to output this cycle
E0320 04:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:07:03.409784  543705 memory.go:184] no items to output this cycle
I0320 04:07:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 04:07:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:07:13.409777  543705 memory.go:191] Add success.
W0320 04:07:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 04:07:13.409806  543705 cpu.go:282] Add success.
W0320 04:07:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:07:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:07:13.420295  543705 net.go:648] Add success.
I0320 04:07:13.422858  543705 net.go:770] primary dev: ETH0
I0320 04:07:13.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:07:13.422882  543705 net.go:698] Add success.
I0320 04:07:13.453617  543705 event_worker.go:152] Polling the log file for events...
W0320 04:07:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:07:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0320 04:07:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:07:14.456923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:07:14.456932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:07:14.456938  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:07:14.456975  543705 disk_worker.go:494] system disk:vda1
I0320 04:07:14.457016  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:07:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:07:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:07:16.458098  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:07:16.458122  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:07:16.458160  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:07:16.458187  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:07:16.472552  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:07:22.509230  543705 disk_info.go:125] begin check local disk info of client
I0320 04:07:22.511716  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:07:22.511722  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002923c0 0xc000292400]
E0320 04:07:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:07:23.409784  543705 memory.go:184] no items to output this cycle
I0320 04:07:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 04:07:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:07:33.409771  543705 memory.go:184] no items to output this cycle
I0320 04:07:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:07:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:07:43.409815  543705 memory.go:191] Add success.
I0320 04:07:43.409822  543705 cpu.go:282] Add success.
I0320 04:07:43.419905  543705 net.go:648] Add success.
I0320 04:07:43.422609  543705 net.go:770] primary dev: ETH0
I0320 04:07:43.422622  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:07:43.422634  543705 net.go:698] Add success.
I0320 04:07:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:07:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:07:53.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:07:53.410254  543705 memory.go:184] no items to output this cycle
I0320 04:07:53.410278  543705 cpu.go:275] no items to output this cycle
E0320 04:08:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:08:03.409783  543705 memory.go:184] no items to output this cycle
I0320 04:08:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 04:08:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:08:13.409795  543705 memory.go:191] Add success.
I0320 04:08:13.409798  543705 cpu.go:282] Add success.
W0320 04:08:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:08:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:08:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:08:13.420119  543705 net.go:648] Add success.
I0320 04:08:13.422905  543705 net.go:770] primary dev: ETH0
I0320 04:08:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:08:13.422930  543705 net.go:698] Add success.
I0320 04:08:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:08:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:08:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0320 04:08:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:08:14.456648  543705 disk_worker.go:494] system disk:vda1
I0320 04:08:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:08:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:08:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:08:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:08:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:08:22.512204  543705 disk_info.go:125] begin check local disk info of client
I0320 04:08:22.514720  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:08:22.514726  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4ec0 0xc0000c4f00]
E0320 04:08:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:08:23.409791  543705 memory.go:184] no items to output this cycle
I0320 04:08:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 04:08:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:08:33.409786  543705 memory.go:184] no items to output this cycle
I0320 04:08:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 04:08:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:08:43.409794  543705 memory.go:191] Add success.
I0320 04:08:43.409801  543705 cpu.go:282] Add success.
I0320 04:08:43.420015  543705 net.go:648] Add success.
I0320 04:08:43.422771  543705 net.go:770] primary dev: ETH0
I0320 04:08:43.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:08:43.422796  543705 net.go:698] Add success.
I0320 04:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:08:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:08:53.409768  543705 memory.go:184] no items to output this cycle
I0320 04:08:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:09:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:09:03.409802  543705 memory.go:184] no items to output this cycle
I0320 04:09:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 04:09:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:09:13.409812  543705 memory.go:191] Add success.
I0320 04:09:13.409825  543705 cpu.go:282] Add success.
W0320 04:09:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:09:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:09:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:09:13.420144  543705 net.go:648] Add success.
I0320 04:09:13.423265  543705 net.go:770] primary dev: ETH0
I0320 04:09:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:09:13.423292  543705 net.go:698] Add success.
I0320 04:09:13.468119  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e3a9009-3e34-4ce2-8153-02756b06f7b6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:09:13.468155  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:09:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:09:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 04:09:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:09:14.456642  543705 disk_worker.go:494] system disk:vda1
I0320 04:09:14.456673  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:09:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:09:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:09:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:09:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:09:16.472609  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:09:22.515217  543705 disk_info.go:125] begin check local disk info of client
I0320 04:09:22.517801  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:09:22.517807  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330140 0xc000330180]
E0320 04:09:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:09:23.409790  543705 memory.go:184] no items to output this cycle
I0320 04:09:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 04:09:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:09:33.409788  543705 memory.go:184] no items to output this cycle
I0320 04:09:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 04:09:38.109731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:09:38.109737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:09:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:09:43.409797  543705 cpu.go:282] Add success.
I0320 04:09:43.410819  543705 memory.go:191] Add success.
I0320 04:09:43.419714  543705 net.go:648] Add success.
I0320 04:09:43.422396  543705 net.go:770] primary dev: ETH0
I0320 04:09:43.422409  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:09:43.422421  543705 net.go:698] Add success.
I0320 04:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:09:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:09:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:09:53.410280  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:09:53.410300  543705 memory.go:184] no items to output this cycle
I0320 04:09:53.410303  543705 cpu.go:275] no items to output this cycle
E0320 04:10:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:10:03.409793  543705 cpu.go:275] no items to output this cycle
I0320 04:10:03.409795  543705 memory.go:184] no items to output this cycle
E0320 04:10:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:10:13.409794  543705 cpu.go:282] Add success.
I0320 04:10:13.409799  543705 memory.go:191] Add success.
W0320 04:10:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:10:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:10:13.420185  543705 net.go:648] Add success.
I0320 04:10:13.423091  543705 net.go:770] primary dev: ETH0
I0320 04:10:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:10:13.423120  543705 net.go:698] Add success.
I0320 04:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:10:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:10:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 04:10:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:10:14.456540  543705 disk_worker.go:494] system disk:vda1
I0320 04:10:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:10:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:10:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:10:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:10:22.518232  543705 disk_info.go:125] begin check local disk info of client
I0320 04:10:22.520668  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:10:22.520675  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002471c0 0xc000247200]
E0320 04:10:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:10:23.409788  543705 memory.go:184] no items to output this cycle
I0320 04:10:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:10:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:10:33.409783  543705 memory.go:184] no items to output this cycle
I0320 04:10:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 04:10:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:10:43.409821  543705 memory.go:191] Add success.
I0320 04:10:43.409823  543705 cpu.go:282] Add success.
I0320 04:10:43.419738  543705 net.go:648] Add success.
I0320 04:10:43.422353  543705 net.go:770] primary dev: ETH0
I0320 04:10:43.422367  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:10:43.422379  543705 net.go:698] Add success.
I0320 04:10:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:10:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:10:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:10:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:10:53.409779  543705 memory.go:184] no items to output this cycle
I0320 04:10:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 04:11:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:11:03.409784  543705 memory.go:184] no items to output this cycle
I0320 04:11:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 04:11:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:11:13.409788  543705 memory.go:191] Add success.
I0320 04:11:13.409794  543705 cpu.go:282] Add success.
W0320 04:11:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:11:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:11:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:11:13.420223  543705 net.go:648] Add success.
I0320 04:11:13.422967  543705 net.go:770] primary dev: ETH0
I0320 04:11:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:11:13.422992  543705 net.go:698] Add success.
I0320 04:11:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:11:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:11:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 04:11:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:11:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 04:11:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:11:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:11:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:11:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:11:16.472525  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:11:22.521249  543705 disk_info.go:125] begin check local disk info of client
I0320 04:11:22.523872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:11:22.523879  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266c80 0xc000266cc0]
E0320 04:11:23.409739  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:11:23.409754  543705 memory.go:184] no items to output this cycle
I0320 04:11:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:11:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:11:33.409804  543705 memory.go:184] no items to output this cycle
I0320 04:11:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 04:11:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:11:43.409794  543705 memory.go:191] Add success.
I0320 04:11:43.409798  543705 cpu.go:282] Add success.
I0320 04:11:43.419706  543705 net.go:648] Add success.
I0320 04:11:43.422174  543705 net.go:770] primary dev: ETH0
I0320 04:11:43.422188  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:11:43.422200  543705 net.go:698] Add success.
I0320 04:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:11:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:11:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:11:53.410262  543705 memory.go:184] no items to output this cycle
I0320 04:11:53.410268  543705 cpu.go:275] no items to output this cycle
E0320 04:12:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:12:03.409783  543705 memory.go:184] no items to output this cycle
I0320 04:12:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 04:12:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:12:13.409789  543705 memory.go:191] Add success.
I0320 04:12:13.409791  543705 cpu.go:282] Add success.
W0320 04:12:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:12:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:12:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:12:13.420037  543705 net.go:648] Add success.
I0320 04:12:13.422505  543705 net.go:770] primary dev: ETH0
I0320 04:12:13.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:12:13.422535  543705 net.go:698] Add success.
I0320 04:12:13.471212  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12b4f6a9-c5da-4295-b225-378916482f15","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:12:13.471245  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 04:12:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:12:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 04:12:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:12:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:12:14.455898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:12:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:12:14.456641  543705 disk_worker.go:494] system disk:vda1
I0320 04:12:14.456683  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:12:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:12:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:12:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:12:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:12:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:12:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:12:16.472331  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:12:22.523964  543705 disk_info.go:125] begin check local disk info of client
I0320 04:12:22.526502  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:12:22.526509  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053fd00 0xc00053fd40]
E0320 04:12:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:12:23.409759  543705 memory.go:184] no items to output this cycle
I0320 04:12:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:12:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:12:33.409802  543705 memory.go:184] no items to output this cycle
I0320 04:12:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 04:12:38.109876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:12:38.109885  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:12:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:12:43.410988  543705 memory.go:191] Add success.
I0320 04:12:43.409920  543705 cpu.go:282] Add success.
I0320 04:12:43.419716  543705 net.go:648] Add success.
I0320 04:12:43.422475  543705 net.go:770] primary dev: ETH0
I0320 04:12:43.422488  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:12:43.422501  543705 net.go:698] Add success.
I0320 04:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:12:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:12:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:12:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:12:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 04:12:53.409797  543705 memory.go:184] no items to output this cycle
E0320 04:13:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:13:03.409780  543705 memory.go:184] no items to output this cycle
I0320 04:13:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 04:13:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:13:13.409784  543705 memory.go:191] Add success.
I0320 04:13:13.409802  543705 cpu.go:282] Add success.
W0320 04:13:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:13:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:13:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:13:13.420170  543705 net.go:648] Add success.
I0320 04:13:13.422784  543705 net.go:770] primary dev: ETH0
I0320 04:13:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:13:13.422813  543705 net.go:698] Add success.
I0320 04:13:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:13:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:13:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 04:13:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:13:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 04:13:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:13:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:13:16.458041  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:13:16.458126  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:13:16.458164  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:13:16.472772  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:13:22.527325  543705 disk_info.go:125] begin check local disk info of client
I0320 04:13:22.529947  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:13:22.529954  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053eac0 0xc00053eb00]
E0320 04:13:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:13:23.409774  543705 memory.go:184] no items to output this cycle
I0320 04:13:23.409776  543705 cpu.go:275] no items to output this cycle
E0320 04:13:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:13:33.409811  543705 memory.go:184] no items to output this cycle
I0320 04:13:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 04:13:43.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:13:43.409893  543705 memory.go:191] Add success.
I0320 04:13:43.409969  543705 cpu.go:282] Add success.
I0320 04:13:43.419717  543705 net.go:648] Add success.
I0320 04:13:43.422113  543705 net.go:770] primary dev: ETH0
I0320 04:13:43.422128  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:13:43.422143  543705 net.go:698] Add success.
I0320 04:13:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:13:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:13:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:13:53.409775  543705 memory.go:184] no items to output this cycle
I0320 04:13:53.409787  543705 cpu.go:275] no items to output this cycle
I0320 04:14:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 04:14:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:14:03.409814  543705 memory.go:184] no items to output this cycle
E0320 04:14:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:14:13.409810  543705 memory.go:191] Add success.
I0320 04:14:13.409819  543705 cpu.go:282] Add success.
W0320 04:14:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:14:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:14:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:14:13.420125  543705 net.go:648] Add success.
I0320 04:14:13.422862  543705 net.go:770] primary dev: ETH0
I0320 04:14:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:14:13.422892  543705 net.go:698] Add success.
I0320 04:14:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:14:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:14:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 04:14:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:14:14.456531  543705 disk_worker.go:494] system disk:vda1
I0320 04:14:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:14:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:14:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:14:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:14:22.530286  543705 disk_info.go:125] begin check local disk info of client
I0320 04:14:22.532774  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:14:22.532780  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253440 0xc000253480]
E0320 04:14:23.409735  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:14:23.409749  543705 memory.go:184] no items to output this cycle
I0320 04:14:23.409791  543705 cpu.go:275] no items to output this cycle
E0320 04:14:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:14:33.409770  543705 memory.go:184] no items to output this cycle
I0320 04:14:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:14:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:14:43.409811  543705 memory.go:191] Add success.
I0320 04:14:43.409817  543705 cpu.go:282] Add success.
I0320 04:14:43.419998  543705 net.go:648] Add success.
I0320 04:14:43.423196  543705 net.go:770] primary dev: ETH0
I0320 04:14:43.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:14:43.423220  543705 net.go:698] Add success.
I0320 04:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:14:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:14:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:14:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:14:53.409809  543705 memory.go:184] no items to output this cycle
I0320 04:14:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 04:15:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:15:03.409782  543705 memory.go:184] no items to output this cycle
I0320 04:15:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 04:15:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:15:13.409777  543705 memory.go:191] Add success.
W0320 04:15:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:15:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:15:13.409814  543705 cpu.go:282] Add success.
I0320 04:15:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:15:13.420103  543705 net.go:648] Add success.
I0320 04:15:13.422560  543705 net.go:770] primary dev: ETH0
I0320 04:15:13.422573  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:15:13.422585  543705 net.go:698] Add success.
I0320 04:15:13.463902  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c739081b-d6fa-45c5-8904-12eafc106887","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:15:13.463934  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:15:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:15:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:15:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 04:15:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:15:14.456684  543705 disk_worker.go:494] system disk:vda1
I0320 04:15:14.456726  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:15:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:15:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:15:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:15:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:15:16.472515  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:15:22.533316  543705 disk_info.go:125] begin check local disk info of client
I0320 04:15:22.535872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:15:22.535878  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed440 0xc0000ed480]
E0320 04:15:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:15:23.409792  543705 memory.go:184] no items to output this cycle
I0320 04:15:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 04:15:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:15:33.409811  543705 memory.go:184] no items to output this cycle
I0320 04:15:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 04:15:38.113256  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:15:38.113262  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:15:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:15:43.410691  543705 memory.go:191] Add success.
I0320 04:15:43.409796  543705 cpu.go:282] Add success.
I0320 04:15:43.420574  543705 net.go:648] Add success.
I0320 04:15:43.422935  543705 net.go:770] primary dev: ETH0
I0320 04:15:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:15:43.422960  543705 net.go:698] Add success.
I0320 04:15:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:15:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:15:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:15:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:15:53.409789  543705 memory.go:184] no items to output this cycle
I0320 04:15:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 04:16:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:16:03.409785  543705 memory.go:184] no items to output this cycle
I0320 04:16:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:16:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:16:13.409795  543705 memory.go:191] Add success.
I0320 04:16:13.409797  543705 cpu.go:282] Add success.
W0320 04:16:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:16:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:16:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:16:13.420248  543705 net.go:648] Add success.
I0320 04:16:13.423172  543705 net.go:770] primary dev: ETH0
I0320 04:16:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:16:13.423201  543705 net.go:698] Add success.
I0320 04:16:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:16:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:16:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 04:16:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:16:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 04:16:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:16:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:16:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:16:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:16:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:16:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:16:22.536318  543705 disk_info.go:125] begin check local disk info of client
I0320 04:16:22.538820  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:16:22.538827  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee100 0xc0003ee140]
E0320 04:16:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:16:23.409799  543705 memory.go:184] no items to output this cycle
I0320 04:16:23.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:16:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:16:33.409776  543705 memory.go:184] no items to output this cycle
I0320 04:16:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 04:16:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:16:43.409809  543705 memory.go:191] Add success.
I0320 04:16:43.409818  543705 cpu.go:282] Add success.
I0320 04:16:43.419903  543705 net.go:648] Add success.
I0320 04:16:43.422506  543705 net.go:770] primary dev: ETH0
I0320 04:16:43.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:16:43.422536  543705 net.go:698] Add success.
I0320 04:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:16:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:16:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:16:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:16:53.409809  543705 cpu.go:275] no items to output this cycle
I0320 04:16:53.409818  543705 memory.go:184] no items to output this cycle
E0320 04:17:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:17:03.409772  543705 memory.go:184] no items to output this cycle
I0320 04:17:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:17:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:17:13.409810  543705 memory.go:191] Add success.
I0320 04:17:13.409815  543705 cpu.go:282] Add success.
W0320 04:17:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:17:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:17:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:17:13.420066  543705 net.go:648] Add success.
I0320 04:17:13.422588  543705 net.go:770] primary dev: ETH0
I0320 04:17:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:17:13.422617  543705 net.go:698] Add success.
I0320 04:17:13.453157  543705 event_worker.go:152] Polling the log file for events...
W0320 04:17:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:17:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 04:17:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:17:14.456403  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:17:14.456414  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:17:14.456420  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:17:14.457453  543705 disk_worker.go:494] system disk:vda1
I0320 04:17:14.457485  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:17:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:17:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:17:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:17:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:17:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:17:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:17:16.472582  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:17:22.538917  543705 disk_info.go:125] begin check local disk info of client
I0320 04:17:22.541443  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:17:22.541450  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484100 0xc000484140]
E0320 04:17:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:17:23.409785  543705 memory.go:184] no items to output this cycle
I0320 04:17:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 04:17:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:17:33.409765  543705 memory.go:184] no items to output this cycle
I0320 04:17:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:17:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:17:43.409808  543705 memory.go:191] Add success.
I0320 04:17:43.409816  543705 cpu.go:282] Add success.
I0320 04:17:43.419861  543705 net.go:648] Add success.
I0320 04:17:43.422484  543705 net.go:770] primary dev: ETH0
I0320 04:17:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:17:43.422510  543705 net.go:698] Add success.
I0320 04:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:17:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:17:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:17:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:17:53.409801  543705 memory.go:184] no items to output this cycle
I0320 04:17:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 04:18:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:18:03.409799  543705 cpu.go:275] no items to output this cycle
I0320 04:18:03.409801  543705 memory.go:184] no items to output this cycle
E0320 04:18:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:18:13.409794  543705 memory.go:191] Add success.
I0320 04:18:13.409799  543705 cpu.go:282] Add success.
W0320 04:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:18:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:18:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:18:13.420261  543705 net.go:648] Add success.
I0320 04:18:13.422818  543705 net.go:770] primary dev: ETH0
I0320 04:18:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:18:13.422842  543705 net.go:698] Add success.
I0320 04:18:13.467823  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"834f8f0f-6551-44c4-a10d-06ec40371728","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:18:13.467853  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:18:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:18:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:18:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 04:18:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:18:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 04:18:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:18:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:18:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:18:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:18:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:18:22.542409  543705 disk_info.go:125] begin check local disk info of client
I0320 04:18:22.544860  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:18:22.544866  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384500 0xc000384540]
E0320 04:18:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:18:23.409784  543705 memory.go:184] no items to output this cycle
I0320 04:18:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:18:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:18:33.409782  543705 memory.go:184] no items to output this cycle
I0320 04:18:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 04:18:38.113730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:18:38.113736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:18:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:18:43.410668  543705 memory.go:191] Add success.
I0320 04:18:43.409810  543705 cpu.go:282] Add success.
I0320 04:18:43.420445  543705 net.go:648] Add success.
I0320 04:18:43.423300  543705 net.go:770] primary dev: ETH0
I0320 04:18:43.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:18:43.423330  543705 net.go:698] Add success.
I0320 04:18:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:18:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:18:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:18:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:18:53.409813  543705 memory.go:184] no items to output this cycle
I0320 04:18:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 04:19:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:19:03.409799  543705 memory.go:184] no items to output this cycle
I0320 04:19:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:19:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:19:13.409789  543705 memory.go:191] Add success.
I0320 04:19:13.409811  543705 cpu.go:282] Add success.
W0320 04:19:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:19:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:19:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:19:13.420153  543705 net.go:648] Add success.
I0320 04:19:13.423140  543705 net.go:770] primary dev: ETH0
I0320 04:19:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:19:13.423169  543705 net.go:698] Add success.
I0320 04:19:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:19:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:19:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 04:19:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:19:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 04:19:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:19:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:19:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:19:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:19:16.472503  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:19:22.545371  543705 disk_info.go:125] begin check local disk info of client
I0320 04:19:22.547922  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:19:22.547928  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab40 0xc00007ab80]
E0320 04:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:19:23.409795  543705 memory.go:184] no items to output this cycle
I0320 04:19:23.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:19:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:19:33.409801  543705 memory.go:184] no items to output this cycle
I0320 04:19:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 04:19:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:19:43.409788  543705 memory.go:191] Add success.
I0320 04:19:43.409799  543705 cpu.go:282] Add success.
I0320 04:19:43.419960  543705 net.go:648] Add success.
I0320 04:19:43.422549  543705 net.go:770] primary dev: ETH0
I0320 04:19:43.422563  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:19:43.422574  543705 net.go:698] Add success.
I0320 04:19:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:19:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:19:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:19:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:19:53.409808  543705 memory.go:184] no items to output this cycle
I0320 04:19:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 04:20:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:20:03.409791  543705 memory.go:184] no items to output this cycle
I0320 04:20:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 04:20:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:20:13.409786  543705 memory.go:191] Add success.
I0320 04:20:13.409788  543705 cpu.go:282] Add success.
W0320 04:20:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:20:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:20:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:20:13.420037  543705 net.go:648] Add success.
I0320 04:20:13.422535  543705 net.go:770] primary dev: ETH0
I0320 04:20:13.422547  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:20:13.422559  543705 net.go:698] Add success.
I0320 04:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:20:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:20:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 04:20:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:20:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 04:20:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:20:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:20:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:20:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:20:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:20:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:20:22.548015  543705 disk_info.go:125] begin check local disk info of client
I0320 04:20:22.550473  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:20:22.550479  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034bc00 0xc00034bc40]
E0320 04:20:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:20:23.409783  543705 memory.go:184] no items to output this cycle
I0320 04:20:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 04:20:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:20:33.409781  543705 memory.go:184] no items to output this cycle
I0320 04:20:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 04:20:43.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:20:43.409897  543705 memory.go:191] Add success.
I0320 04:20:43.410116  543705 cpu.go:282] Add success.
I0320 04:20:43.419718  543705 net.go:648] Add success.
I0320 04:20:43.422333  543705 net.go:770] primary dev: ETH0
I0320 04:20:43.422346  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:20:43.422357  543705 net.go:698] Add success.
I0320 04:20:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:20:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:20:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:20:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:20:53.409776  543705 cpu.go:275] no items to output this cycle
I0320 04:20:53.409786  543705 memory.go:184] no items to output this cycle
E0320 04:21:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:21:03.409804  543705 memory.go:184] no items to output this cycle
I0320 04:21:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 04:21:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:21:13.409809  543705 memory.go:191] Add success.
I0320 04:21:13.409819  543705 cpu.go:282] Add success.
W0320 04:21:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:21:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:21:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:21:13.420099  543705 net.go:648] Add success.
I0320 04:21:13.423133  543705 net.go:770] primary dev: ETH0
I0320 04:21:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:21:13.423157  543705 net.go:698] Add success.
I0320 04:21:13.473262  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a8d9d78-f3eb-4b18-9446-2ee8452ce807","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:21:13.473297  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:21:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:21:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:21:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 04:21:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:21:14.456614  543705 disk_worker.go:494] system disk:vda1
I0320 04:21:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:21:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:21:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:21:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:21:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:21:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:21:22.551453  543705 disk_info.go:125] begin check local disk info of client
I0320 04:21:22.554023  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:21:22.554029  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385300 0xc000385340]
E0320 04:21:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:21:23.409891  543705 memory.go:184] no items to output this cycle
I0320 04:21:23.409928  543705 cpu.go:275] no items to output this cycle
E0320 04:21:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:21:33.409782  543705 memory.go:184] no items to output this cycle
I0320 04:21:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 04:21:38.117286  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:21:38.117292  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:21:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:21:43.410625  543705 memory.go:191] Add success.
I0320 04:21:43.409801  543705 cpu.go:282] Add success.
I0320 04:21:43.420328  543705 net.go:648] Add success.
I0320 04:21:43.422942  543705 net.go:770] primary dev: ETH0
I0320 04:21:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:21:43.422972  543705 net.go:698] Add success.
I0320 04:21:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:21:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:21:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:21:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:21:53.409809  543705 memory.go:184] no items to output this cycle
I0320 04:21:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 04:22:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:22:03.409784  543705 cpu.go:275] no items to output this cycle
I0320 04:22:03.409788  543705 memory.go:184] no items to output this cycle
E0320 04:22:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:22:13.409807  543705 memory.go:191] Add success.
I0320 04:22:13.409815  543705 cpu.go:282] Add success.
W0320 04:22:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:22:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:22:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:22:13.420116  543705 net.go:648] Add success.
I0320 04:22:13.423679  543705 net.go:770] primary dev: ETH0
I0320 04:22:13.423692  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:22:13.423705  543705 net.go:698] Add success.
W0320 04:22:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:22:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 04:22:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:22:14.456908  543705 disk_worker.go:494] system disk:vda1
I0320 04:22:14.456958  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:22:14.457442  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:22:14.457449  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:22:14.457453  543705 custom_config.go:64] query custom config with name: gpu
E0320 04:22:15.456783  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:22:15.456791  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:22:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:22:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:22:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:22:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:22:16.472347  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:22:22.554116  543705 disk_info.go:125] begin check local disk info of client
I0320 04:22:22.556617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:22:22.556624  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368340 0xc000368380]
E0320 04:22:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:22:23.409773  543705 cpu.go:275] no items to output this cycle
I0320 04:22:23.409784  543705 memory.go:184] no items to output this cycle
E0320 04:22:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:22:33.409770  543705 memory.go:184] no items to output this cycle
I0320 04:22:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 04:22:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:22:43.409821  543705 memory.go:191] Add success.
I0320 04:22:43.409826  543705 cpu.go:282] Add success.
I0320 04:22:43.419953  543705 net.go:648] Add success.
I0320 04:22:43.422574  543705 net.go:770] primary dev: ETH0
I0320 04:22:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:22:43.422602  543705 net.go:698] Add success.
I0320 04:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:22:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:22:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:22:53.409776  543705 memory.go:184] no items to output this cycle
I0320 04:22:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 04:23:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:23:03.409790  543705 memory.go:184] no items to output this cycle
I0320 04:23:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 04:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:23:13.409811  543705 memory.go:191] Add success.
I0320 04:23:13.409815  543705 cpu.go:282] Add success.
W0320 04:23:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:23:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:23:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:23:13.420056  543705 net.go:648] Add success.
I0320 04:23:13.422852  543705 net.go:770] primary dev: ETH0
I0320 04:23:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:23:13.422876  543705 net.go:698] Add success.
I0320 04:23:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:23:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:23:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 04:23:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:23:14.456519  543705 disk_worker.go:494] system disk:vda1
I0320 04:23:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:23:16.472091  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:23:22.557473  543705 disk_info.go:125] begin check local disk info of client
I0320 04:23:22.560018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:23:22.560025  543705 disk_info.go:196] parse disk info done, disk is : [0xc000516000 0xc000516040]
E0320 04:23:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:23:23.409788  543705 memory.go:184] no items to output this cycle
I0320 04:23:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 04:23:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:23:33.409775  543705 memory.go:184] no items to output this cycle
I0320 04:23:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 04:23:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:23:43.409820  543705 memory.go:191] Add success.
I0320 04:23:43.409834  543705 cpu.go:282] Add success.
I0320 04:23:43.420066  543705 net.go:648] Add success.
I0320 04:23:43.422601  543705 net.go:770] primary dev: ETH0
I0320 04:23:43.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:23:43.422628  543705 net.go:698] Add success.
I0320 04:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:23:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:23:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:23:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:23:53.409783  543705 memory.go:184] no items to output this cycle
I0320 04:23:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 04:24:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:24:03.409810  543705 memory.go:184] no items to output this cycle
I0320 04:24:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 04:24:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:24:13.409785  543705 memory.go:191] Add success.
I0320 04:24:13.409802  543705 cpu.go:282] Add success.
W0320 04:24:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:24:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:24:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:24:13.420076  543705 net.go:648] Add success.
I0320 04:24:13.422728  543705 net.go:770] primary dev: ETH0
I0320 04:24:13.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:24:13.422754  543705 net.go:698] Add success.
I0320 04:24:13.709606  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d1be6743-3050-44b2-a41e-cf99ee96e750","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:24:13.709638  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:24:14.453970  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:24:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:24:14.455288  543705 disk_worker.go:708] disk space is not compliant
W0320 04:24:14.455291  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:24:14.456823  543705 disk_worker.go:494] system disk:vda1
I0320 04:24:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:24:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:24:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:24:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:24:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:24:22.560441  543705 disk_info.go:125] begin check local disk info of client
I0320 04:24:22.562942  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:24:22.562949  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003962c0 0xc000396300]
E0320 04:24:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:24:23.409887  543705 memory.go:184] no items to output this cycle
I0320 04:24:23.409893  543705 cpu.go:275] no items to output this cycle
E0320 04:24:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:24:33.409761  543705 memory.go:184] no items to output this cycle
I0320 04:24:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 04:24:38.117734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:24:38.117741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:24:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:24:43.410928  543705 memory.go:191] Add success.
I0320 04:24:43.409825  543705 cpu.go:282] Add success.
I0320 04:24:43.420641  543705 net.go:648] Add success.
I0320 04:24:43.422940  543705 net.go:770] primary dev: ETH0
I0320 04:24:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:24:43.422966  543705 net.go:698] Add success.
I0320 04:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:24:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:24:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:24:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:24:53.409780  543705 memory.go:184] no items to output this cycle
I0320 04:24:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 04:25:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:25:03.409775  543705 memory.go:184] no items to output this cycle
I0320 04:25:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 04:25:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:25:13.409806  543705 memory.go:191] Add success.
I0320 04:25:13.409816  543705 cpu.go:282] Add success.
W0320 04:25:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:25:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:25:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:25:13.420158  543705 net.go:648] Add success.
I0320 04:25:13.422654  543705 net.go:770] primary dev: ETH0
I0320 04:25:13.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:25:13.422681  543705 net.go:698] Add success.
I0320 04:25:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:25:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:25:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 04:25:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:25:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 04:25:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:25:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:25:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:25:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:25:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:25:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:25:22.563455  543705 disk_info.go:125] begin check local disk info of client
I0320 04:25:22.566081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:25:22.566089  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033fd80 0xc00033fdc0]
E0320 04:25:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:25:23.409861  543705 memory.go:184] no items to output this cycle
I0320 04:25:23.409927  543705 cpu.go:275] no items to output this cycle
E0320 04:25:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:25:33.409776  543705 memory.go:184] no items to output this cycle
I0320 04:25:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:25:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:25:43.409788  543705 memory.go:191] Add success.
I0320 04:25:43.409807  543705 cpu.go:282] Add success.
I0320 04:25:43.419882  543705 net.go:648] Add success.
I0320 04:25:43.422459  543705 net.go:770] primary dev: ETH0
I0320 04:25:43.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:25:43.422484  543705 net.go:698] Add success.
I0320 04:25:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:25:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:25:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:25:53.409777  543705 memory.go:184] no items to output this cycle
I0320 04:25:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:26:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:26:03.409812  543705 memory.go:184] no items to output this cycle
I0320 04:26:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 04:26:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:26:13.409781  543705 memory.go:191] Add success.
I0320 04:26:13.409803  543705 cpu.go:282] Add success.
W0320 04:26:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:26:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:26:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:26:13.420183  543705 net.go:648] Add success.
I0320 04:26:13.423149  543705 net.go:770] primary dev: ETH0
I0320 04:26:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:26:13.423175  543705 net.go:698] Add success.
I0320 04:26:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:26:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:26:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 04:26:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:26:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 04:26:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:26:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:26:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:26:22.566175  543705 disk_info.go:125] begin check local disk info of client
I0320 04:26:22.568615  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:26:22.568622  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270600 0xc000270640]
E0320 04:26:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:26:23.409780  543705 memory.go:184] no items to output this cycle
I0320 04:26:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 04:26:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:26:33.409780  543705 memory.go:184] no items to output this cycle
I0320 04:26:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 04:26:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:26:43.409800  543705 memory.go:191] Add success.
I0320 04:26:43.409819  543705 cpu.go:282] Add success.
I0320 04:26:43.419911  543705 net.go:648] Add success.
I0320 04:26:43.422696  543705 net.go:770] primary dev: ETH0
I0320 04:26:43.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:26:43.422721  543705 net.go:698] Add success.
I0320 04:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:26:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:26:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:26:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:26:53.409793  543705 memory.go:184] no items to output this cycle
I0320 04:26:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 04:27:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:27:03.409787  543705 memory.go:184] no items to output this cycle
I0320 04:27:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 04:27:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:27:13.409792  543705 memory.go:191] Add success.
I0320 04:27:13.409795  543705 cpu.go:282] Add success.
W0320 04:27:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:27:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:27:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:27:13.420139  543705 net.go:648] Add success.
I0320 04:27:13.422889  543705 net.go:770] primary dev: ETH0
I0320 04:27:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:27:13.422917  543705 net.go:698] Add success.
I0320 04:27:13.428947  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 04:27:13.453134  543705 event_worker.go:152] Polling the log file for events...
I0320 04:27:13.679364  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98d243d5-f083-4cbe-baa1-7e4a53a23dac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:27:13.679400  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 04:27:14.454861  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:27:14.454924  543705 disk_worker.go:708] disk space is not compliant
W0320 04:27:14.454928  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:27:14.455631  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:27:14.455640  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:27:14.455645  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:27:14.456523  543705 disk_worker.go:494] system disk:vda1
I0320 04:27:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:27:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:27:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:27:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:27:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:27:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:27:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:27:22.569532  543705 disk_info.go:125] begin check local disk info of client
I0320 04:27:22.572062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:27:22.572069  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034eb80 0xc00034ebc0]
E0320 04:27:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:27:23.409786  543705 memory.go:184] no items to output this cycle
I0320 04:27:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:27:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:27:33.409777  543705 memory.go:184] no items to output this cycle
I0320 04:27:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 04:27:38.117880  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:27:38.117887  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:27:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:27:43.410588  543705 memory.go:191] Add success.
I0320 04:27:43.409805  543705 cpu.go:282] Add success.
I0320 04:27:43.420292  543705 net.go:648] Add success.
I0320 04:27:43.422897  543705 net.go:770] primary dev: ETH0
I0320 04:27:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:27:43.422923  543705 net.go:698] Add success.
I0320 04:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:27:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:27:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:27:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:27:53.409763  543705 memory.go:184] no items to output this cycle
I0320 04:27:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:28:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:28:03.409791  543705 memory.go:184] no items to output this cycle
I0320 04:28:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:28:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:28:13.409794  543705 memory.go:191] Add success.
I0320 04:28:13.409795  543705 cpu.go:282] Add success.
W0320 04:28:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:28:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:28:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:28:13.420124  543705 net.go:648] Add success.
I0320 04:28:13.422697  543705 net.go:770] primary dev: ETH0
I0320 04:28:13.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:28:13.422726  543705 net.go:698] Add success.
I0320 04:28:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:28:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:28:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 04:28:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:28:14.456494  543705 disk_worker.go:494] system disk:vda1
I0320 04:28:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:28:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:28:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:28:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:28:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:28:22.572503  543705 disk_info.go:125] begin check local disk info of client
I0320 04:28:22.574986  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:28:22.574996  543705 disk_info.go:196] parse disk info done, disk is : [0xc000250000 0xc000250040]
E0320 04:28:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:28:23.409758  543705 memory.go:184] no items to output this cycle
I0320 04:28:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:28:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:28:33.409772  543705 memory.go:184] no items to output this cycle
I0320 04:28:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 04:28:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:28:43.409805  543705 memory.go:191] Add success.
I0320 04:28:43.409806  543705 cpu.go:282] Add success.
I0320 04:28:43.419977  543705 net.go:648] Add success.
I0320 04:28:43.422643  543705 net.go:770] primary dev: ETH0
I0320 04:28:43.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:28:43.422668  543705 net.go:698] Add success.
I0320 04:28:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:28:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:28:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:28:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:28:53.409797  543705 memory.go:184] no items to output this cycle
I0320 04:28:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 04:29:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:29:03.409788  543705 memory.go:184] no items to output this cycle
I0320 04:29:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 04:29:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:29:13.409797  543705 memory.go:191] Add success.
I0320 04:29:13.409798  543705 cpu.go:282] Add success.
W0320 04:29:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:29:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:29:13.420157  543705 net.go:648] Add success.
I0320 04:29:13.423003  543705 net.go:770] primary dev: ETH0
I0320 04:29:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:29:13.423043  543705 net.go:698] Add success.
I0320 04:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:29:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:29:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 04:29:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:29:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 04:29:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:29:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:29:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:29:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:29:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:29:22.575512  543705 disk_info.go:125] begin check local disk info of client
I0320 04:29:22.578080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:29:22.578087  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033a440 0xc00033a480]
E0320 04:29:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:29:23.409788  543705 memory.go:184] no items to output this cycle
I0320 04:29:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:29:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:29:33.409806  543705 memory.go:184] no items to output this cycle
I0320 04:29:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 04:29:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:29:43.409790  543705 memory.go:191] Add success.
I0320 04:29:43.409819  543705 cpu.go:282] Add success.
I0320 04:29:43.419999  543705 net.go:648] Add success.
I0320 04:29:43.423045  543705 net.go:770] primary dev: ETH0
I0320 04:29:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:29:43.423075  543705 net.go:698] Add success.
I0320 04:29:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:29:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:29:53.409786  543705 memory.go:184] no items to output this cycle
I0320 04:29:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 04:30:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:30:03.409784  543705 memory.go:184] no items to output this cycle
I0320 04:30:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 04:30:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:30:13.409827  543705 memory.go:191] Add success.
I0320 04:30:13.409839  543705 cpu.go:282] Add success.
W0320 04:30:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:30:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:30:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:30:13.420134  543705 net.go:648] Add success.
I0320 04:30:13.423082  543705 net.go:770] primary dev: ETH0
I0320 04:30:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:30:13.423113  543705 net.go:698] Add success.
I0320 04:30:13.508584  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0bb93920-19cc-43bd-8a37-61a3daa90d7f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:30:13.508618  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:30:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:30:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:30:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 04:30:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:30:14.456513  543705 disk_worker.go:494] system disk:vda1
I0320 04:30:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:30:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:30:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:30:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:30:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:30:16.472412  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:30:22.578554  543705 disk_info.go:125] begin check local disk info of client
I0320 04:30:22.581069  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:30:22.581077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000575740 0xc000575780]
E0320 04:30:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:30:23.409801  543705 memory.go:184] no items to output this cycle
I0320 04:30:23.409814  543705 cpu.go:275] no items to output this cycle
E0320 04:30:33.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:30:33.409913  543705 memory.go:184] no items to output this cycle
I0320 04:30:33.409934  543705 cpu.go:275] no items to output this cycle
I0320 04:30:38.118025  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:30:38.118033  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:30:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:30:43.410627  543705 memory.go:191] Add success.
I0320 04:30:43.409841  543705 cpu.go:282] Add success.
I0320 04:30:43.420414  543705 net.go:648] Add success.
I0320 04:30:43.423270  543705 net.go:770] primary dev: ETH0
I0320 04:30:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:30:43.423297  543705 net.go:698] Add success.
I0320 04:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:30:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:30:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:30:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:30:53.409778  543705 memory.go:184] no items to output this cycle
I0320 04:30:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:31:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:31:03.409793  543705 memory.go:184] no items to output this cycle
I0320 04:31:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 04:31:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:31:13.409815  543705 memory.go:191] Add success.
I0320 04:31:13.409816  543705 cpu.go:282] Add success.
W0320 04:31:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:31:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:31:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:31:13.420144  543705 net.go:648] Add success.
I0320 04:31:13.422678  543705 net.go:770] primary dev: ETH0
I0320 04:31:13.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:31:13.422711  543705 net.go:698] Add success.
I0320 04:31:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:31:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:31:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 04:31:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:31:14.456541  543705 disk_worker.go:494] system disk:vda1
I0320 04:31:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:31:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:31:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:31:16.472428  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:31:22.581549  543705 disk_info.go:125] begin check local disk info of client
I0320 04:31:22.584095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:31:22.584103  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382640 0xc000382680]
E0320 04:31:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:31:23.409801  543705 memory.go:184] no items to output this cycle
I0320 04:31:23.409816  543705 cpu.go:275] no items to output this cycle
E0320 04:31:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:31:33.409907  543705 memory.go:184] no items to output this cycle
I0320 04:31:33.409944  543705 cpu.go:275] no items to output this cycle
E0320 04:31:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:31:43.409813  543705 memory.go:191] Add success.
I0320 04:31:43.409830  543705 cpu.go:282] Add success.
I0320 04:31:43.420015  543705 net.go:648] Add success.
I0320 04:31:43.423008  543705 net.go:770] primary dev: ETH0
I0320 04:31:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:31:43.423033  543705 net.go:698] Add success.
I0320 04:31:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:31:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:31:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:31:53.409779  543705 memory.go:184] no items to output this cycle
I0320 04:31:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 04:32:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:32:03.409789  543705 memory.go:184] no items to output this cycle
I0320 04:32:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 04:32:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:32:13.409796  543705 memory.go:191] Add success.
I0320 04:32:13.409818  543705 cpu.go:282] Add success.
W0320 04:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:32:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:32:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:32:13.420145  543705 net.go:648] Add success.
I0320 04:32:13.422732  543705 net.go:770] primary dev: ETH0
I0320 04:32:13.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:32:13.422774  543705 net.go:698] Add success.
W0320 04:32:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:32:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 04:32:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:32:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:32:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:32:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:32:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 04:32:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:32:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:32:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:32:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:32:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:32:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:32:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:32:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:32:22.584565  543705 disk_info.go:125] begin check local disk info of client
I0320 04:32:22.587032  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:32:22.587039  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307280 0xc0003072c0]
E0320 04:32:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:32:23.409797  543705 memory.go:184] no items to output this cycle
I0320 04:32:23.409814  543705 cpu.go:275] no items to output this cycle
E0320 04:32:33.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:32:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 04:32:33.409833  543705 memory.go:184] no items to output this cycle
E0320 04:32:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:32:43.409805  543705 memory.go:191] Add success.
I0320 04:32:43.409831  543705 cpu.go:282] Add success.
I0320 04:32:43.419998  543705 net.go:648] Add success.
I0320 04:32:43.422487  543705 net.go:770] primary dev: ETH0
I0320 04:32:43.422499  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:32:43.422512  543705 net.go:698] Add success.
I0320 04:32:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:32:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:32:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:32:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:32:53.409810  543705 memory.go:184] no items to output this cycle
I0320 04:32:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 04:33:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:33:03.409789  543705 memory.go:184] no items to output this cycle
I0320 04:33:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 04:33:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:33:13.409786  543705 memory.go:191] Add success.
I0320 04:33:13.409803  543705 cpu.go:282] Add success.
W0320 04:33:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:33:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:33:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:33:13.420220  543705 net.go:648] Add success.
I0320 04:33:13.423046  543705 net.go:770] primary dev: ETH0
I0320 04:33:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:33:13.423072  543705 net.go:698] Add success.
I0320 04:33:13.469079  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3df17fd0-f897-4d6b-8ce7-3abd43e1f858","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:33:13.469113  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:33:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:33:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:33:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 04:33:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:33:14.456612  543705 disk_worker.go:494] system disk:vda1
I0320 04:33:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:33:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:33:22.587125  543705 disk_info.go:125] begin check local disk info of client
I0320 04:33:22.589773  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:33:22.589781  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba780 0xc0002ba7c0]
E0320 04:33:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:33:23.409763  543705 memory.go:184] no items to output this cycle
I0320 04:33:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:33:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:33:33.409808  543705 memory.go:184] no items to output this cycle
I0320 04:33:33.409823  543705 cpu.go:275] no items to output this cycle
I0320 04:33:38.121291  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:33:38.121297  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0320 04:33:43.409952  543705 cpu.go:282] Add success.
E0320 04:33:43.409949  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:33:43.410653  543705 memory.go:191] Add success.
I0320 04:33:43.419728  543705 net.go:648] Add success.
I0320 04:33:43.422108  543705 net.go:770] primary dev: ETH0
I0320 04:33:43.422122  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:33:43.422135  543705 net.go:698] Add success.
I0320 04:33:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:33:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:33:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:33:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:33:53.409808  543705 memory.go:184] no items to output this cycle
I0320 04:33:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 04:34:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:34:03.409789  543705 memory.go:184] no items to output this cycle
I0320 04:34:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 04:34:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:34:13.409783  543705 memory.go:191] Add success.
I0320 04:34:13.409799  543705 cpu.go:282] Add success.
W0320 04:34:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:34:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:34:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:34:13.420101  543705 net.go:648] Add success.
I0320 04:34:13.422795  543705 net.go:770] primary dev: ETH0
I0320 04:34:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:34:13.422820  543705 net.go:698] Add success.
I0320 04:34:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:34:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:34:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 04:34:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:34:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 04:34:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:34:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:34:16.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:34:16.458092  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:34:16.458112  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:34:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:34:22.590600  543705 disk_info.go:125] begin check local disk info of client
I0320 04:34:22.593026  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:34:22.593032  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328500 0xc000328540]
E0320 04:34:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:34:23.409781  543705 memory.go:184] no items to output this cycle
I0320 04:34:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:34:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:34:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 04:34:33.409807  543705 memory.go:184] no items to output this cycle
E0320 04:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:34:43.409791  543705 memory.go:191] Add success.
I0320 04:34:43.409795  543705 cpu.go:282] Add success.
I0320 04:34:43.419726  543705 net.go:648] Add success.
I0320 04:34:43.422488  543705 net.go:770] primary dev: ETH0
I0320 04:34:43.422501  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:34:43.422528  543705 net.go:698] Add success.
I0320 04:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:34:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:34:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:34:53.409770  543705 memory.go:184] no items to output this cycle
I0320 04:34:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 04:35:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:35:03.409809  543705 memory.go:184] no items to output this cycle
I0320 04:35:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 04:35:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:35:13.409799  543705 memory.go:191] Add success.
I0320 04:35:13.409801  543705 cpu.go:282] Add success.
W0320 04:35:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:35:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:35:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:35:13.420059  543705 net.go:648] Add success.
I0320 04:35:13.422610  543705 net.go:770] primary dev: ETH0
I0320 04:35:13.422625  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:35:13.422636  543705 net.go:698] Add success.
I0320 04:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:35:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:35:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 04:35:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:35:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 04:35:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:35:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:35:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:35:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:35:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:35:16.472421  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:35:22.593603  543705 disk_info.go:125] begin check local disk info of client
I0320 04:35:22.596200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:35:22.596206  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329d40 0xc000329d80]
E0320 04:35:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:35:23.409795  543705 memory.go:184] no items to output this cycle
I0320 04:35:23.409809  543705 cpu.go:275] no items to output this cycle
E0320 04:35:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:35:33.409786  543705 memory.go:184] no items to output this cycle
I0320 04:35:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 04:35:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:35:43.409794  543705 memory.go:191] Add success.
I0320 04:35:43.409816  543705 cpu.go:282] Add success.
I0320 04:35:43.420190  543705 net.go:648] Add success.
I0320 04:35:43.423043  543705 net.go:770] primary dev: ETH0
I0320 04:35:43.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:35:43.423067  543705 net.go:698] Add success.
I0320 04:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:35:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:35:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:35:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:35:53.409810  543705 memory.go:184] no items to output this cycle
I0320 04:35:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 04:36:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:36:03.409801  543705 cpu.go:275] no items to output this cycle
I0320 04:36:03.409810  543705 memory.go:184] no items to output this cycle
E0320 04:36:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:36:13.409822  543705 memory.go:191] Add success.
I0320 04:36:13.409835  543705 cpu.go:282] Add success.
W0320 04:36:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:36:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:36:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:36:13.420276  543705 net.go:648] Add success.
I0320 04:36:13.423252  543705 net.go:770] primary dev: ETH0
I0320 04:36:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:36:13.423285  543705 net.go:698] Add success.
I0320 04:36:13.468740  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1fd70168-bb47-4c17-bbef-fe9aac4e3ce7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:36:13.468777  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:36:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:36:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 04:36:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:36:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 04:36:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:36:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:36:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:36:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:36:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:36:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:36:22.596632  543705 disk_info.go:125] begin check local disk info of client
I0320 04:36:22.599194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:36:22.599200  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028a540 0xc00028a580]
E0320 04:36:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:36:23.409776  543705 memory.go:184] no items to output this cycle
I0320 04:36:23.409793  543705 cpu.go:275] no items to output this cycle
E0320 04:36:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:36:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 04:36:33.409811  543705 memory.go:184] no items to output this cycle
I0320 04:36:38.121742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:36:38.121749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:36:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:36:43.410744  543705 memory.go:191] Add success.
I0320 04:36:43.409812  543705 cpu.go:282] Add success.
I0320 04:36:43.420549  543705 net.go:648] Add success.
I0320 04:36:43.423217  543705 net.go:770] primary dev: ETH0
I0320 04:36:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:36:43.423242  543705 net.go:698] Add success.
I0320 04:36:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:36:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:36:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:36:53.409781  543705 memory.go:184] no items to output this cycle
I0320 04:36:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 04:37:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:37:03.409793  543705 memory.go:184] no items to output this cycle
I0320 04:37:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:37:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:37:13.409795  543705 memory.go:191] Add success.
I0320 04:37:13.409799  543705 cpu.go:282] Add success.
W0320 04:37:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:37:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:37:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:37:13.420151  543705 net.go:648] Add success.
I0320 04:37:13.422711  543705 net.go:770] primary dev: ETH0
I0320 04:37:13.422723  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:37:13.422736  543705 net.go:698] Add success.
I0320 04:37:13.453335  543705 event_worker.go:152] Polling the log file for events...
W0320 04:37:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:37:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 04:37:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:37:14.455866  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:37:14.455875  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:37:14.455881  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:37:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 04:37:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:37:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:37:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:37:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:37:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:37:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:37:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:37:16.472335  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:37:22.599287  543705 disk_info.go:125] begin check local disk info of client
I0320 04:37:22.601906  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:37:22.601915  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037cfc0 0xc00037d000]
E0320 04:37:23.409738  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:37:23.409752  543705 memory.go:184] no items to output this cycle
I0320 04:37:23.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:37:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:37:33.409797  543705 memory.go:184] no items to output this cycle
I0320 04:37:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:37:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:37:43.409802  543705 memory.go:191] Add success.
I0320 04:37:43.409803  543705 cpu.go:282] Add success.
I0320 04:37:43.420042  543705 net.go:648] Add success.
I0320 04:37:43.422871  543705 net.go:770] primary dev: ETH0
I0320 04:37:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:37:43.422897  543705 net.go:698] Add success.
I0320 04:37:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:37:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:37:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:37:53.409910  543705 cpu.go:275] no items to output this cycle
E0320 04:37:53.409932  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:37:53.409947  543705 memory.go:184] no items to output this cycle
E0320 04:38:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:38:03.409784  543705 memory.go:184] no items to output this cycle
I0320 04:38:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 04:38:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:38:13.409821  543705 memory.go:191] Add success.
I0320 04:38:13.409829  543705 cpu.go:282] Add success.
W0320 04:38:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:38:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:38:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:38:13.420110  543705 net.go:648] Add success.
I0320 04:38:13.422675  543705 net.go:770] primary dev: ETH0
I0320 04:38:13.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:38:13.422705  543705 net.go:698] Add success.
I0320 04:38:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:38:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:38:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 04:38:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:38:14.456511  543705 disk_worker.go:494] system disk:vda1
I0320 04:38:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:38:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:38:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:38:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:38:22.602662  543705 disk_info.go:125] begin check local disk info of client
I0320 04:38:22.605121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:38:22.605128  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057b5c0 0xc00057b600]
E0320 04:38:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:38:23.409790  543705 memory.go:184] no items to output this cycle
I0320 04:38:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 04:38:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:38:33.409805  543705 memory.go:184] no items to output this cycle
I0320 04:38:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 04:38:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:38:43.409806  543705 memory.go:191] Add success.
I0320 04:38:43.409814  543705 cpu.go:282] Add success.
I0320 04:38:43.419950  543705 net.go:648] Add success.
I0320 04:38:43.422640  543705 net.go:770] primary dev: ETH0
I0320 04:38:43.422654  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:38:43.422667  543705 net.go:698] Add success.
I0320 04:38:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:38:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:38:53.409802  543705 memory.go:184] no items to output this cycle
I0320 04:38:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 04:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:39:03.409786  543705 memory.go:184] no items to output this cycle
I0320 04:39:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 04:39:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:39:13.409811  543705 memory.go:191] Add success.
I0320 04:39:13.409816  543705 cpu.go:282] Add success.
W0320 04:39:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:39:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:39:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:39:13.420137  543705 net.go:648] Add success.
I0320 04:39:13.422806  543705 net.go:770] primary dev: ETH0
I0320 04:39:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:39:13.422831  543705 net.go:698] Add success.
I0320 04:39:13.523065  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"94e1a022-da2e-4b80-a746-0495c969490e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:39:13.523101  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:39:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:39:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:39:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 04:39:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:39:14.456680  543705 disk_worker.go:494] system disk:vda1
I0320 04:39:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:39:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:39:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:39:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:39:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:39:22.605675  543705 disk_info.go:125] begin check local disk info of client
I0320 04:39:22.608275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:39:22.608282  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f00 0xc0000c4f40]
E0320 04:39:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:39:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 04:39:23.409786  543705 memory.go:184] no items to output this cycle
E0320 04:39:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:39:33.409785  543705 memory.go:184] no items to output this cycle
I0320 04:39:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 04:39:38.125308  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:39:38.125314  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:39:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:39:43.410794  543705 memory.go:191] Add success.
I0320 04:39:43.409814  543705 cpu.go:282] Add success.
I0320 04:39:43.420553  543705 net.go:648] Add success.
I0320 04:39:43.423485  543705 net.go:770] primary dev: ETH0
I0320 04:39:43.423500  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:39:43.423515  543705 net.go:698] Add success.
I0320 04:39:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:39:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:39:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:39:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:39:53.409812  543705 memory.go:184] no items to output this cycle
I0320 04:39:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 04:40:03.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:40:03.409947  543705 memory.go:184] no items to output this cycle
I0320 04:40:03.410073  543705 cpu.go:275] no items to output this cycle
E0320 04:40:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:40:13.409798  543705 memory.go:191] Add success.
I0320 04:40:13.409802  543705 cpu.go:282] Add success.
W0320 04:40:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:40:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:40:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:40:13.419980  543705 net.go:770] primary dev: ETH0
I0320 04:40:13.419991  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:40:13.420003  543705 net.go:698] Add success.
I0320 04:40:13.420372  543705 net.go:648] Add success.
I0320 04:40:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:40:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:40:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 04:40:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:40:14.456513  543705 disk_worker.go:494] system disk:vda1
I0320 04:40:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:40:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:40:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:40:22.608682  543705 disk_info.go:125] begin check local disk info of client
I0320 04:40:22.611085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:40:22.611092  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
E0320 04:40:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:40:23.409786  543705 memory.go:184] no items to output this cycle
I0320 04:40:23.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:40:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:40:33.409781  543705 memory.go:184] no items to output this cycle
I0320 04:40:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 04:40:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:40:43.409790  543705 memory.go:191] Add success.
I0320 04:40:43.409791  543705 cpu.go:282] Add success.
I0320 04:40:43.419861  543705 net.go:648] Add success.
I0320 04:40:43.422736  543705 net.go:770] primary dev: ETH0
I0320 04:40:43.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:40:43.422765  543705 net.go:698] Add success.
I0320 04:40:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:40:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:40:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:40:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:40:53.409797  543705 memory.go:184] no items to output this cycle
I0320 04:40:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 04:41:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:41:03.409787  543705 memory.go:184] no items to output this cycle
I0320 04:41:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:41:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:41:13.409784  543705 memory.go:191] Add success.
I0320 04:41:13.409806  543705 cpu.go:282] Add success.
W0320 04:41:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:41:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:41:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:41:13.420315  543705 net.go:648] Add success.
I0320 04:41:13.422889  543705 net.go:770] primary dev: ETH0
I0320 04:41:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:41:13.422918  543705 net.go:698] Add success.
I0320 04:41:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:41:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:41:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 04:41:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:41:14.456509  543705 disk_worker.go:494] system disk:vda1
I0320 04:41:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:41:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:41:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:41:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:41:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:41:22.611701  543705 disk_info.go:125] begin check local disk info of client
I0320 04:41:22.614286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:41:22.614292  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec400 0xc0000ec440]
E0320 04:41:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:41:23.409786  543705 memory.go:184] no items to output this cycle
I0320 04:41:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:41:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:41:33.409803  543705 memory.go:184] no items to output this cycle
I0320 04:41:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 04:41:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:41:43.409780  543705 memory.go:191] Add success.
I0320 04:41:43.409802  543705 cpu.go:282] Add success.
I0320 04:41:43.419854  543705 net.go:648] Add success.
I0320 04:41:43.422592  543705 net.go:770] primary dev: ETH0
I0320 04:41:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:41:43.422616  543705 net.go:698] Add success.
I0320 04:41:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:41:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:41:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:41:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:41:53.409778  543705 memory.go:184] no items to output this cycle
I0320 04:41:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 04:42:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:42:03.409773  543705 memory.go:184] no items to output this cycle
I0320 04:42:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:42:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:42:13.409810  543705 memory.go:191] Add success.
I0320 04:42:13.409817  543705 cpu.go:282] Add success.
W0320 04:42:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:42:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:42:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:42:13.420074  543705 net.go:648] Add success.
I0320 04:42:13.422470  543705 net.go:770] primary dev: ETH0
I0320 04:42:13.422483  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:42:13.422494  543705 net.go:698] Add success.
I0320 04:42:13.469230  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ad8088a-8c39-45ee-a0c2-361d2ba7e10c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:42:13.469265  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 04:42:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:42:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 04:42:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:42:14.457027  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:42:14.457047  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:42:14.457053  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:42:14.457096  543705 disk_worker.go:494] system disk:vda1
I0320 04:42:14.457142  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:42:15.456449  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:42:15.456458  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:42:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:42:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:42:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:42:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:42:16.472344  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:42:22.614724  543705 disk_info.go:125] begin check local disk info of client
I0320 04:42:22.617118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:42:22.617124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec580 0xc0000ec5c0]
E0320 04:42:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:42:23.409787  543705 memory.go:184] no items to output this cycle
I0320 04:42:23.409801  543705 cpu.go:275] no items to output this cycle
E0320 04:42:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:42:33.409806  543705 memory.go:184] no items to output this cycle
I0320 04:42:33.409815  543705 cpu.go:275] no items to output this cycle
I0320 04:42:38.125734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:42:38.125740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:42:43.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:42:43.410872  543705 memory.go:191] Add success.
I0320 04:42:43.409881  543705 cpu.go:282] Add success.
I0320 04:42:43.419777  543705 net.go:648] Add success.
I0320 04:42:43.422202  543705 net.go:770] primary dev: ETH0
I0320 04:42:43.422215  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:42:43.422227  543705 net.go:698] Add success.
I0320 04:42:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:42:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:42:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:42:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:42:53.409787  543705 cpu.go:275] no items to output this cycle
I0320 04:42:53.409792  543705 memory.go:184] no items to output this cycle
E0320 04:43:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:43:03.409805  543705 memory.go:184] no items to output this cycle
I0320 04:43:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 04:43:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:43:13.409799  543705 memory.go:191] Add success.
I0320 04:43:13.409799  543705 cpu.go:282] Add success.
W0320 04:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:43:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:43:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:43:13.420256  543705 net.go:648] Add success.
I0320 04:43:13.423334  543705 net.go:770] primary dev: ETH0
I0320 04:43:13.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:43:13.423363  543705 net.go:698] Add success.
I0320 04:43:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:43:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:43:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 04:43:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:43:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 04:43:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:43:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:43:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:43:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:43:22.617675  543705 disk_info.go:125] begin check local disk info of client
I0320 04:43:22.620224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:43:22.620231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c59c0 0xc0000c5a00]
E0320 04:43:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:43:23.409793  543705 memory.go:184] no items to output this cycle
I0320 04:43:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:43:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:43:33.409794  543705 memory.go:184] no items to output this cycle
I0320 04:43:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 04:43:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:43:43.409794  543705 memory.go:191] Add success.
I0320 04:43:43.409803  543705 cpu.go:282] Add success.
I0320 04:43:43.419865  543705 net.go:648] Add success.
I0320 04:43:43.422690  543705 net.go:770] primary dev: ETH0
I0320 04:43:43.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:43:43.422714  543705 net.go:698] Add success.
I0320 04:43:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:43:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:43:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:43:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:43:53.409778  543705 memory.go:184] no items to output this cycle
I0320 04:43:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 04:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:44:03.409781  543705 memory.go:184] no items to output this cycle
I0320 04:44:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 04:44:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:44:13.409816  543705 memory.go:191] Add success.
I0320 04:44:13.409823  543705 cpu.go:282] Add success.
W0320 04:44:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:44:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:44:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:44:13.420262  543705 net.go:648] Add success.
I0320 04:44:13.423380  543705 net.go:770] primary dev: ETH0
I0320 04:44:13.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:44:13.423408  543705 net.go:698] Add success.
I0320 04:44:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:44:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:44:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 04:44:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:44:14.456480  543705 disk_worker.go:494] system disk:vda1
I0320 04:44:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:44:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:44:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:44:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:44:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:44:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:44:22.620760  543705 disk_info.go:125] begin check local disk info of client
I0320 04:44:22.623204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:44:22.623211  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377180 0xc0003771c0]
E0320 04:44:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:44:23.409789  543705 memory.go:184] no items to output this cycle
I0320 04:44:23.409992  543705 cpu.go:275] no items to output this cycle
E0320 04:44:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:44:33.409778  543705 memory.go:184] no items to output this cycle
I0320 04:44:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 04:44:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:44:43.409818  543705 memory.go:191] Add success.
I0320 04:44:43.409820  543705 cpu.go:282] Add success.
I0320 04:44:43.420002  543705 net.go:648] Add success.
I0320 04:44:43.422759  543705 net.go:770] primary dev: ETH0
I0320 04:44:43.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:44:43.422785  543705 net.go:698] Add success.
I0320 04:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:44:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:44:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:44:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:44:53.409765  543705 memory.go:184] no items to output this cycle
I0320 04:44:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 04:45:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:45:03.409807  543705 memory.go:184] no items to output this cycle
I0320 04:45:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 04:45:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:45:13.409781  543705 memory.go:191] Add success.
I0320 04:45:13.409804  543705 cpu.go:282] Add success.
W0320 04:45:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:45:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:45:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:45:13.420112  543705 net.go:648] Add success.
I0320 04:45:13.423191  543705 net.go:770] primary dev: ETH0
I0320 04:45:13.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:45:13.423215  543705 net.go:698] Add success.
I0320 04:45:13.463399  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e024982-104c-4eab-8035-e4235e354e4d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:45:13.463434  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:45:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:45:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:45:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 04:45:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:45:14.456531  543705 disk_worker.go:494] system disk:vda1
I0320 04:45:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:45:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:45:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:45:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:45:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:45:16.472494  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:45:22.623761  543705 disk_info.go:125] begin check local disk info of client
I0320 04:45:22.626385  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:45:22.626393  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364d40 0xc000364d80]
E0320 04:45:23.407883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:45:23.407897  543705 memory.go:184] no items to output this cycle
I0320 04:45:23.407898  543705 cpu.go:275] no items to output this cycle
E0320 04:45:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:45:33.409781  543705 memory.go:184] no items to output this cycle
I0320 04:45:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 04:45:38.125879  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:45:38.125886  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:45:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:45:43.410723  543705 memory.go:191] Add success.
I0320 04:45:43.409804  543705 cpu.go:282] Add success.
I0320 04:45:43.420447  543705 net.go:648] Add success.
I0320 04:45:43.423377  543705 net.go:770] primary dev: ETH0
I0320 04:45:43.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:45:43.423403  543705 net.go:698] Add success.
I0320 04:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:45:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:45:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:45:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:45:53.409778  543705 memory.go:184] no items to output this cycle
I0320 04:45:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:46:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:46:03.409785  543705 memory.go:184] no items to output this cycle
I0320 04:46:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 04:46:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:46:13.409773  543705 memory.go:191] Add success.
W0320 04:46:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 04:46:13.409807  543705 cpu.go:282] Add success.
W0320 04:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:46:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:46:13.420104  543705 net.go:648] Add success.
I0320 04:46:13.422836  543705 net.go:770] primary dev: ETH0
I0320 04:46:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:46:13.422865  543705 net.go:698] Add success.
I0320 04:46:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:46:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:46:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0320 04:46:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:46:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 04:46:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:46:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:46:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:46:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:46:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:46:22.626806  543705 disk_info.go:125] begin check local disk info of client
I0320 04:46:22.629222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:46:22.629229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508a40 0xc000508a80]
E0320 04:46:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:46:23.409797  543705 memory.go:184] no items to output this cycle
I0320 04:46:23.409811  543705 cpu.go:275] no items to output this cycle
E0320 04:46:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:46:33.409800  543705 memory.go:184] no items to output this cycle
I0320 04:46:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 04:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:46:43.409783  543705 memory.go:191] Add success.
I0320 04:46:43.409806  543705 cpu.go:282] Add success.
I0320 04:46:43.419866  543705 net.go:648] Add success.
I0320 04:46:43.422815  543705 net.go:770] primary dev: ETH0
I0320 04:46:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:46:43.422846  543705 net.go:698] Add success.
I0320 04:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:46:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:46:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:46:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:46:53.409803  543705 memory.go:184] no items to output this cycle
I0320 04:46:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 04:47:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:47:03.409800  543705 memory.go:184] no items to output this cycle
I0320 04:47:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 04:47:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:47:13.409795  543705 memory.go:191] Add success.
I0320 04:47:13.409795  543705 cpu.go:282] Add success.
W0320 04:47:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:47:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:47:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:47:13.420117  543705 net.go:648] Add success.
I0320 04:47:13.422685  543705 net.go:770] primary dev: ETH0
I0320 04:47:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:47:13.422710  543705 net.go:698] Add success.
I0320 04:47:13.453270  543705 event_worker.go:152] Polling the log file for events...
W0320 04:47:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:47:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 04:47:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:47:14.456146  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:47:14.456155  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:47:14.456161  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:47:14.456475  543705 disk_worker.go:494] system disk:vda1
I0320 04:47:14.456505  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:47:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:47:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:47:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:47:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:47:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:47:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:47:16.472336  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:47:22.629675  543705 disk_info.go:125] begin check local disk info of client
I0320 04:47:22.632201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:47:22.632209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e000 0xc00039ef40]
E0320 04:47:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:47:23.409787  543705 memory.go:184] no items to output this cycle
I0320 04:47:23.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:47:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:47:33.409782  543705 memory.go:184] no items to output this cycle
I0320 04:47:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 04:47:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:47:43.409794  543705 memory.go:191] Add success.
I0320 04:47:43.409795  543705 cpu.go:282] Add success.
I0320 04:47:43.419866  543705 net.go:648] Add success.
I0320 04:47:43.422517  543705 net.go:770] primary dev: ETH0
I0320 04:47:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:47:43.422552  543705 net.go:698] Add success.
I0320 04:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:47:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:47:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:47:53.409773  543705 memory.go:184] no items to output this cycle
I0320 04:47:53.409776  543705 cpu.go:275] no items to output this cycle
E0320 04:48:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:48:03.409808  543705 memory.go:184] no items to output this cycle
I0320 04:48:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 04:48:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:48:13.409786  543705 memory.go:191] Add success.
I0320 04:48:13.409807  543705 cpu.go:282] Add success.
W0320 04:48:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:48:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:48:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:48:13.419941  543705 net.go:770] primary dev: ETH0
I0320 04:48:13.419955  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:48:13.419967  543705 net.go:698] Add success.
I0320 04:48:13.420212  543705 net.go:648] Add success.
I0320 04:48:13.476933  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b16f167e-94a4-4769-a58b-30b980fdd6a0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:48:13.476967  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:48:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:48:14.455222  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:48:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0320 04:48:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:48:14.456810  543705 disk_worker.go:494] system disk:vda1
I0320 04:48:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:48:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:48:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:48:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:48:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:48:22.632825  543705 disk_info.go:125] begin check local disk info of client
I0320 04:48:22.635309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:48:22.635316  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2440 0xc0003b2480]
E0320 04:48:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:48:23.409763  543705 memory.go:184] no items to output this cycle
I0320 04:48:23.409803  543705 cpu.go:275] no items to output this cycle
E0320 04:48:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:48:33.409785  543705 memory.go:184] no items to output this cycle
I0320 04:48:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 04:48:38.129333  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:48:38.129339  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:48:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:48:43.410574  543705 memory.go:191] Add success.
I0320 04:48:43.409800  543705 cpu.go:282] Add success.
I0320 04:48:43.420297  543705 net.go:648] Add success.
I0320 04:48:43.422925  543705 net.go:770] primary dev: ETH0
I0320 04:48:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:48:43.422951  543705 net.go:698] Add success.
I0320 04:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:48:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:48:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:48:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:48:53.409794  543705 memory.go:184] no items to output this cycle
I0320 04:48:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 04:49:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:49:03.409808  543705 memory.go:184] no items to output this cycle
I0320 04:49:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 04:49:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:49:13.409776  543705 memory.go:191] Add success.
I0320 04:49:13.409801  543705 cpu.go:282] Add success.
W0320 04:49:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:49:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:49:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:49:13.420070  543705 net.go:648] Add success.
I0320 04:49:13.422535  543705 net.go:770] primary dev: ETH0
I0320 04:49:13.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:49:13.422563  543705 net.go:698] Add success.
I0320 04:49:14.454611  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:49:14.454781  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:49:14.454867  543705 disk_worker.go:708] disk space is not compliant
W0320 04:49:14.454870  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:49:14.456253  543705 disk_worker.go:494] system disk:vda1
I0320 04:49:14.456285  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:49:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:49:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:49:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:49:16.472448  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:49:22.635396  543705 disk_info.go:125] begin check local disk info of client
I0320 04:49:22.637966  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:49:22.637974  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed4c0 0xc0000ed500]
E0320 04:49:23.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:49:23.409866  543705 memory.go:184] no items to output this cycle
I0320 04:49:23.410002  543705 cpu.go:275] no items to output this cycle
E0320 04:49:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:49:33.409801  543705 memory.go:184] no items to output this cycle
I0320 04:49:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 04:49:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:49:43.409791  543705 memory.go:191] Add success.
I0320 04:49:43.409792  543705 cpu.go:282] Add success.
I0320 04:49:43.420017  543705 net.go:648] Add success.
I0320 04:49:43.422867  543705 net.go:770] primary dev: ETH0
I0320 04:49:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:49:43.422892  543705 net.go:698] Add success.
I0320 04:49:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:49:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:49:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:49:53.409797  543705 memory.go:184] no items to output this cycle
I0320 04:49:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 04:50:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:50:03.409784  543705 cpu.go:275] no items to output this cycle
I0320 04:50:03.409790  543705 memory.go:184] no items to output this cycle
E0320 04:50:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:50:13.409815  543705 memory.go:191] Add success.
I0320 04:50:13.409821  543705 cpu.go:282] Add success.
W0320 04:50:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:50:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:50:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:50:13.420164  543705 net.go:648] Add success.
I0320 04:50:13.423261  543705 net.go:770] primary dev: ETH0
I0320 04:50:13.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:50:13.423289  543705 net.go:698] Add success.
I0320 04:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:50:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:50:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 04:50:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:50:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 04:50:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:50:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:50:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:50:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:50:22.638944  543705 disk_info.go:125] begin check local disk info of client
I0320 04:50:22.641331  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:50:22.641338  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001dd100 0xc0001dd140]
E0320 04:50:23.407892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:50:23.407912  543705 memory.go:184] no items to output this cycle
I0320 04:50:23.407924  543705 cpu.go:275] no items to output this cycle
E0320 04:50:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:50:33.409782  543705 memory.go:184] no items to output this cycle
I0320 04:50:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 04:50:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:50:43.409821  543705 memory.go:191] Add success.
I0320 04:50:43.409823  543705 cpu.go:282] Add success.
I0320 04:50:43.419980  543705 net.go:648] Add success.
I0320 04:50:43.422518  543705 net.go:770] primary dev: ETH0
I0320 04:50:43.422531  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:50:43.422543  543705 net.go:698] Add success.
I0320 04:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:50:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:50:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:50:53.409796  543705 memory.go:184] no items to output this cycle
I0320 04:50:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 04:51:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:51:03.409782  543705 memory.go:184] no items to output this cycle
I0320 04:51:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 04:51:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:51:13.409792  543705 memory.go:191] Add success.
I0320 04:51:13.409797  543705 cpu.go:282] Add success.
W0320 04:51:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:51:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:51:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:51:13.420081  543705 net.go:648] Add success.
I0320 04:51:13.422662  543705 net.go:770] primary dev: ETH0
I0320 04:51:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:51:13.422691  543705 net.go:698] Add success.
I0320 04:51:13.464560  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c419629a-04ea-4cce-9fa4-cadc49021b3a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:51:13.464594  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:51:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:51:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 04:51:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:51:14.456694  543705 disk_worker.go:494] system disk:vda1
I0320 04:51:14.456738  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:51:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:51:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:51:16.458105  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:51:16.472523  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:51:22.641676  543705 disk_info.go:125] begin check local disk info of client
I0320 04:51:22.644151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:51:22.644157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396f00 0xc000396f40]
E0320 04:51:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:51:23.409795  543705 memory.go:184] no items to output this cycle
I0320 04:51:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 04:51:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:51:33.409784  543705 memory.go:184] no items to output this cycle
I0320 04:51:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 04:51:38.129735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:51:38.129741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:51:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:51:43.410631  543705 memory.go:191] Add success.
I0320 04:51:43.409796  543705 cpu.go:282] Add success.
I0320 04:51:43.420341  543705 net.go:648] Add success.
I0320 04:51:43.422970  543705 net.go:770] primary dev: ETH0
I0320 04:51:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:51:43.422998  543705 net.go:698] Add success.
I0320 04:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:51:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:51:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:51:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:51:53.409766  543705 memory.go:184] no items to output this cycle
I0320 04:51:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 04:52:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:52:03.409785  543705 memory.go:184] no items to output this cycle
I0320 04:52:03.409790  543705 cpu.go:275] no items to output this cycle
W0320 04:52:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:52:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:52:13.409728  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:52:13.409801  543705 cpu.go:282] Add success.
E0320 04:52:13.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:52:13.409842  543705 memory.go:191] Add success.
I0320 04:52:13.420058  543705 net.go:648] Add success.
I0320 04:52:13.422683  543705 net.go:770] primary dev: ETH0
I0320 04:52:13.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:52:13.422709  543705 net.go:698] Add success.
W0320 04:52:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:52:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 04:52:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:52:14.455859  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:52:14.455866  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:52:14.455871  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:52:14.456785  543705 disk_worker.go:494] system disk:vda1
I0320 04:52:14.456815  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:52:15.456923  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:52:15.456936  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:52:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:52:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:52:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:52:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:52:16.472369  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:52:22.644863  543705 disk_info.go:125] begin check local disk info of client
I0320 04:52:22.647231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:52:22.647239  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028a7c0 0xc00028a800]
E0320 04:52:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:52:23.409772  543705 memory.go:184] no items to output this cycle
I0320 04:52:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 04:52:33.409887  543705 cpu.go:275] no items to output this cycle
E0320 04:52:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:52:33.409904  543705 memory.go:184] no items to output this cycle
E0320 04:52:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:52:43.409794  543705 memory.go:191] Add success.
I0320 04:52:43.409804  543705 cpu.go:282] Add success.
I0320 04:52:43.420017  543705 net.go:648] Add success.
I0320 04:52:43.422563  543705 net.go:770] primary dev: ETH0
I0320 04:52:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:52:43.422588  543705 net.go:698] Add success.
I0320 04:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:52:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:52:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:52:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:52:53.409777  543705 memory.go:184] no items to output this cycle
I0320 04:52:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 04:53:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:53:03.409805  543705 memory.go:184] no items to output this cycle
I0320 04:53:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 04:53:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:53:13.409781  543705 memory.go:191] Add success.
I0320 04:53:13.409803  543705 cpu.go:282] Add success.
W0320 04:53:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:53:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:53:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:53:13.420117  543705 net.go:648] Add success.
I0320 04:53:13.423136  543705 net.go:770] primary dev: ETH0
I0320 04:53:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:53:13.423175  543705 net.go:698] Add success.
I0320 04:53:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:53:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:53:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 04:53:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:53:14.456532  543705 disk_worker.go:494] system disk:vda1
I0320 04:53:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:53:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:53:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:53:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:53:16.472434  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:53:22.647879  543705 disk_info.go:125] begin check local disk info of client
I0320 04:53:22.650378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:53:22.650384  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b640 0xc00007b680]
E0320 04:53:23.407876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:53:23.407890  543705 memory.go:184] no items to output this cycle
I0320 04:53:23.407898  543705 cpu.go:275] no items to output this cycle
E0320 04:53:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:53:33.409803  543705 memory.go:184] no items to output this cycle
I0320 04:53:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 04:53:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:53:43.409793  543705 cpu.go:282] Add success.
I0320 04:53:43.409801  543705 memory.go:191] Add success.
I0320 04:53:43.419943  543705 net.go:648] Add success.
I0320 04:53:43.422571  543705 net.go:770] primary dev: ETH0
I0320 04:53:43.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:53:43.422595  543705 net.go:698] Add success.
I0320 04:53:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:53:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:53:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:53:53.409768  543705 memory.go:184] no items to output this cycle
I0320 04:53:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 04:54:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:54:03.409787  543705 memory.go:184] no items to output this cycle
I0320 04:54:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 04:54:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:54:13.409779  543705 memory.go:191] Add success.
I0320 04:54:13.409803  543705 cpu.go:282] Add success.
W0320 04:54:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:54:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:54:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:54:13.420120  543705 net.go:648] Add success.
I0320 04:54:13.422705  543705 net.go:770] primary dev: ETH0
I0320 04:54:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:54:13.422732  543705 net.go:698] Add success.
I0320 04:54:13.469075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75c76bf2-1da2-4de6-a56d-ab17746109d1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:54:13.469107  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 04:54:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:54:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:54:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 04:54:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:54:14.456497  543705 disk_worker.go:494] system disk:vda1
I0320 04:54:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:54:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:54:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:54:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:54:22.650470  543705 disk_info.go:125] begin check local disk info of client
I0320 04:54:22.652891  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:54:22.652897  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b780 0xc00032b7c0]
E0320 04:54:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:54:23.409796  543705 memory.go:184] no items to output this cycle
I0320 04:54:23.409806  543705 cpu.go:275] no items to output this cycle
E0320 04:54:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:54:33.409778  543705 memory.go:184] no items to output this cycle
I0320 04:54:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 04:54:38.133363  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:54:38.133369  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:54:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:54:43.410689  543705 memory.go:191] Add success.
I0320 04:54:43.409825  543705 cpu.go:282] Add success.
I0320 04:54:43.420388  543705 net.go:648] Add success.
I0320 04:54:43.423193  543705 net.go:770] primary dev: ETH0
I0320 04:54:43.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:54:43.423218  543705 net.go:698] Add success.
I0320 04:54:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:54:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:54:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:54:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:54:53.409802  543705 memory.go:184] no items to output this cycle
I0320 04:54:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 04:55:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:55:03.409787  543705 memory.go:184] no items to output this cycle
I0320 04:55:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 04:55:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:55:13.409791  543705 memory.go:191] Add success.
I0320 04:55:13.409795  543705 cpu.go:282] Add success.
W0320 04:55:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:55:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:55:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:55:13.420113  543705 net.go:648] Add success.
I0320 04:55:13.422534  543705 net.go:770] primary dev: ETH0
I0320 04:55:13.422547  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:55:13.422558  543705 net.go:698] Add success.
I0320 04:55:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:55:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:55:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 04:55:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:55:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 04:55:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:55:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:55:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:55:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:55:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:55:16.472365  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:55:22.653675  543705 disk_info.go:125] begin check local disk info of client
I0320 04:55:22.656246  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:55:22.656254  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e100 0xc00034e140]
E0320 04:55:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:55:23.409767  543705 cpu.go:275] no items to output this cycle
I0320 04:55:23.409775  543705 memory.go:184] no items to output this cycle
E0320 04:55:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:55:33.409799  543705 memory.go:184] no items to output this cycle
I0320 04:55:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 04:55:43.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:55:43.409908  543705 cpu.go:282] Add success.
I0320 04:55:43.409932  543705 memory.go:191] Add success.
I0320 04:55:43.419713  543705 net.go:648] Add success.
I0320 04:55:43.422481  543705 net.go:770] primary dev: ETH0
I0320 04:55:43.422493  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:55:43.422506  543705 net.go:698] Add success.
I0320 04:55:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:55:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:55:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:55:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:55:53.409767  543705 memory.go:184] no items to output this cycle
I0320 04:55:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 04:56:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:56:03.409788  543705 memory.go:184] no items to output this cycle
I0320 04:56:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 04:56:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:56:13.409791  543705 memory.go:191] Add success.
I0320 04:56:13.409810  543705 cpu.go:282] Add success.
W0320 04:56:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:56:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:56:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:56:13.420222  543705 net.go:648] Add success.
I0320 04:56:13.423234  543705 net.go:770] primary dev: ETH0
I0320 04:56:13.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:56:13.423260  543705 net.go:698] Add success.
I0320 04:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:56:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:56:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 04:56:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:56:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 04:56:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:56:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:56:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:56:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:56:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:56:22.656917  543705 disk_info.go:125] begin check local disk info of client
I0320 04:56:22.659439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:56:22.659446  543705 disk_info.go:196] parse disk info done, disk is : [0xc000228080 0xc0002280c0]
E0320 04:56:23.407879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:56:23.407899  543705 memory.go:184] no items to output this cycle
I0320 04:56:23.407903  543705 cpu.go:275] no items to output this cycle
E0320 04:56:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:56:33.409783  543705 memory.go:184] no items to output this cycle
I0320 04:56:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 04:56:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:56:43.409791  543705 memory.go:191] Add success.
I0320 04:56:43.409804  543705 cpu.go:282] Add success.
I0320 04:56:43.420303  543705 net.go:648] Add success.
I0320 04:56:43.423091  543705 net.go:770] primary dev: ETH0
I0320 04:56:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:56:43.423115  543705 net.go:698] Add success.
I0320 04:56:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:56:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:56:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:56:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:56:53.409779  543705 memory.go:184] no items to output this cycle
I0320 04:56:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 04:57:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:57:03.409810  543705 memory.go:184] no items to output this cycle
I0320 04:57:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 04:57:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:57:13.409815  543705 memory.go:191] Add success.
I0320 04:57:13.409825  543705 cpu.go:282] Add success.
W0320 04:57:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:57:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:57:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:57:13.420349  543705 net.go:648] Add success.
I0320 04:57:13.428764  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 04:57:13.428838  543705 net.go:770] primary dev: ETH0
I0320 04:57:13.428849  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:57:13.428861  543705 net.go:698] Add success.
I0320 04:57:13.453384  543705 event_worker.go:152] Polling the log file for events...
I0320 04:57:14.300172  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a371197b-008f-437c-8538-87a69d5e3e39","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 04:57:14.300207  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 04:57:14.454310  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:57:14.454322  543705 disk_worker.go:708] disk space is not compliant
W0320 04:57:14.454326  543705 disk_worker.go:728] disk inode is not compliant
E0320 04:57:14.454810  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 04:57:14.454820  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 04:57:14.454825  543705 custom_config.go:64] query custom config with name: gpu
I0320 04:57:14.455861  543705 disk_worker.go:494] system disk:vda1
I0320 04:57:14.455891  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 04:57:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 04:57:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:57:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 04:57:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 04:57:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:57:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:57:16.472330  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:57:22.659948  543705 disk_info.go:125] begin check local disk info of client
I0320 04:57:22.662541  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:57:22.662548  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d0800 0xc0003d0840]
E0320 04:57:23.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:57:23.407525  543705 memory.go:184] no items to output this cycle
I0320 04:57:23.407555  543705 cpu.go:275] no items to output this cycle
E0320 04:57:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:57:33.409770  543705 memory.go:184] no items to output this cycle
I0320 04:57:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 04:57:38.133742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 04:57:38.133749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 04:57:43.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:57:43.410600  543705 memory.go:191] Add success.
I0320 04:57:43.409923  543705 cpu.go:282] Add success.
I0320 04:57:43.419732  543705 net.go:648] Add success.
I0320 04:57:43.422536  543705 net.go:770] primary dev: ETH0
I0320 04:57:43.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:57:43.422564  543705 net.go:698] Add success.
I0320 04:57:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:57:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:57:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:57:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:57:53.409774  543705 memory.go:184] no items to output this cycle
I0320 04:57:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 04:58:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:58:03.409807  543705 memory.go:184] no items to output this cycle
I0320 04:58:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 04:58:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:58:13.409799  543705 memory.go:191] Add success.
I0320 04:58:13.409815  543705 cpu.go:282] Add success.
W0320 04:58:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:58:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:58:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:58:13.420142  543705 net.go:648] Add success.
I0320 04:58:13.423051  543705 net.go:770] primary dev: ETH0
I0320 04:58:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:58:13.423075  543705 net.go:698] Add success.
I0320 04:58:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:58:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:58:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0320 04:58:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:58:14.456479  543705 disk_worker.go:494] system disk:vda1
I0320 04:58:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:58:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:58:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:58:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:58:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:58:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:58:22.662944  543705 disk_info.go:125] begin check local disk info of client
I0320 04:58:22.665391  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:58:22.665397  543705 disk_info.go:196] parse disk info done, disk is : [0xc000347c40 0xc000347c80]
E0320 04:58:23.407905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:58:23.407924  543705 memory.go:184] no items to output this cycle
I0320 04:58:23.407937  543705 cpu.go:275] no items to output this cycle
E0320 04:58:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:58:33.409804  543705 memory.go:184] no items to output this cycle
I0320 04:58:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 04:58:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:58:43.409784  543705 memory.go:191] Add success.
I0320 04:58:43.409809  543705 cpu.go:282] Add success.
I0320 04:58:43.419990  543705 net.go:648] Add success.
I0320 04:58:43.422976  543705 net.go:770] primary dev: ETH0
I0320 04:58:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:58:43.423000  543705 net.go:698] Add success.
I0320 04:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:58:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:58:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:58:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:58:53.409804  543705 memory.go:184] no items to output this cycle
I0320 04:58:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 04:59:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:59:03.409787  543705 memory.go:184] no items to output this cycle
I0320 04:59:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 04:59:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:59:13.409783  543705 memory.go:191] Add success.
I0320 04:59:13.409802  543705 cpu.go:282] Add success.
W0320 04:59:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 04:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 04:59:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 04:59:13.420686  543705 net.go:648] Add success.
I0320 04:59:13.423248  543705 net.go:770] primary dev: ETH0
I0320 04:59:13.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:59:13.423270  543705 net.go:698] Add success.
I0320 04:59:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 04:59:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 04:59:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 04:59:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 04:59:14.456562  543705 disk_worker.go:494] system disk:vda1
I0320 04:59:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 04:59:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 04:59:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:59:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:59:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 04:59:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0320 04:59:22.665677  543705 disk_info.go:125] begin check local disk info of client
I0320 04:59:22.668231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 04:59:22.668238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003460c0 0xc000346100]
E0320 04:59:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:59:23.409792  543705 memory.go:184] no items to output this cycle
I0320 04:59:23.409804  543705 cpu.go:275] no items to output this cycle
E0320 04:59:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:59:33.409785  543705 memory.go:184] no items to output this cycle
I0320 04:59:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 04:59:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:59:43.409796  543705 memory.go:191] Add success.
I0320 04:59:43.409801  543705 cpu.go:282] Add success.
I0320 04:59:43.420034  543705 net.go:648] Add success.
I0320 04:59:43.422763  543705 net.go:770] primary dev: ETH0
I0320 04:59:43.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 04:59:43.422787  543705 net.go:698] Add success.
I0320 04:59:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 04:59:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 04:59:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 04:59:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 04:59:53.409794  543705 memory.go:184] no items to output this cycle
I0320 04:59:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:00:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:00:03.409780  543705 memory.go:184] no items to output this cycle
I0320 05:00:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 05:00:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:00:13.409811  543705 memory.go:191] Add success.
I0320 05:00:13.409811  543705 cpu.go:282] Add success.
W0320 05:00:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:00:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:00:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:00:13.420157  543705 net.go:648] Add success.
I0320 05:00:13.422555  543705 net.go:770] primary dev: ETH0
I0320 05:00:13.422573  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:00:13.422589  543705 net.go:698] Add success.
I0320 05:00:13.468911  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a9e2b6c7-9328-47df-a762-b7a17cf7738c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:00:13.468947  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:00:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:00:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:00:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0320 05:00:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:00:14.456629  543705 disk_worker.go:494] system disk:vda1
I0320 05:00:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:00:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:00:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:00:22.668982  543705 disk_info.go:125] begin check local disk info of client
I0320 05:00:22.671434  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:00:22.671440  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc500 0xc0002bc540]
E0320 05:00:23.407541  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:00:23.407559  543705 memory.go:184] no items to output this cycle
I0320 05:00:23.407562  543705 cpu.go:275] no items to output this cycle
E0320 05:00:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:00:33.409783  543705 memory.go:184] no items to output this cycle
I0320 05:00:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 05:00:38.137376  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:00:38.137383  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:00:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:00:43.410661  543705 memory.go:191] Add success.
I0320 05:00:43.409815  543705 cpu.go:282] Add success.
I0320 05:00:43.420539  543705 net.go:648] Add success.
I0320 05:00:43.423305  543705 net.go:770] primary dev: ETH0
I0320 05:00:43.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:00:43.423342  543705 net.go:698] Add success.
I0320 05:00:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:00:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:00:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:00:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:00:53.409781  543705 memory.go:184] no items to output this cycle
I0320 05:00:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 05:01:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:01:03.409796  543705 memory.go:184] no items to output this cycle
I0320 05:01:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 05:01:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:01:13.409804  543705 memory.go:191] Add success.
I0320 05:01:13.409826  543705 cpu.go:282] Add success.
W0320 05:01:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:01:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:01:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:01:13.420147  543705 net.go:648] Add success.
I0320 05:01:13.422757  543705 net.go:770] primary dev: ETH0
I0320 05:01:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:01:13.422791  543705 net.go:698] Add success.
I0320 05:01:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:01:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:01:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 05:01:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:01:14.456616  543705 disk_worker.go:494] system disk:vda1
I0320 05:01:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:01:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:01:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:01:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:01:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:01:22.671997  543705 disk_info.go:125] begin check local disk info of client
I0320 05:01:22.674562  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:01:22.674569  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bdd40 0xc0002bdd80]
E0320 05:01:23.407511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:01:23.407527  543705 memory.go:184] no items to output this cycle
I0320 05:01:23.407549  543705 cpu.go:275] no items to output this cycle
E0320 05:01:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:01:33.409779  543705 memory.go:184] no items to output this cycle
I0320 05:01:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 05:01:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:01:43.409776  543705 memory.go:191] Add success.
I0320 05:01:43.409811  543705 cpu.go:282] Add success.
I0320 05:01:43.419995  543705 net.go:648] Add success.
I0320 05:01:43.422863  543705 net.go:770] primary dev: ETH0
I0320 05:01:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:01:43.422892  543705 net.go:698] Add success.
I0320 05:01:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:01:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:01:53.409782  543705 memory.go:184] no items to output this cycle
I0320 05:01:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 05:02:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:02:03.409774  543705 memory.go:184] no items to output this cycle
I0320 05:02:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 05:02:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:02:13.409794  543705 memory.go:191] Add success.
I0320 05:02:13.409800  543705 cpu.go:282] Add success.
W0320 05:02:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:02:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:02:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:02:13.420166  543705 net.go:648] Add success.
I0320 05:02:13.423330  543705 net.go:770] primary dev: ETH0
I0320 05:02:13.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:02:13.423360  543705 net.go:698] Add success.
W0320 05:02:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:02:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 05:02:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:02:14.456941  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:02:14.456950  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:02:14.456956  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:02:14.457011  543705 disk_worker.go:494] system disk:vda1
I0320 05:02:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:02:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:02:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:02:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:02:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:02:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:02:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:02:16.472315  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:02:22.675013  543705 disk_info.go:125] begin check local disk info of client
I0320 05:02:22.677403  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:02:22.677408  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed7c0 0xc0000ed800]
E0320 05:02:23.407527  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:02:23.407543  543705 memory.go:184] no items to output this cycle
I0320 05:02:23.407561  543705 cpu.go:275] no items to output this cycle
E0320 05:02:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:02:33.409802  543705 memory.go:184] no items to output this cycle
I0320 05:02:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 05:02:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:02:43.409790  543705 memory.go:191] Add success.
I0320 05:02:43.409791  543705 cpu.go:282] Add success.
I0320 05:02:43.420258  543705 net.go:648] Add success.
I0320 05:02:43.423445  543705 net.go:770] primary dev: ETH0
I0320 05:02:43.423460  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:02:43.423471  543705 net.go:698] Add success.
I0320 05:02:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:02:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:02:53.409786  543705 memory.go:184] no items to output this cycle
I0320 05:02:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:03:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:03:03.409792  543705 memory.go:184] no items to output this cycle
I0320 05:03:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 05:03:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:03:13.409801  543705 memory.go:191] Add success.
I0320 05:03:13.409802  543705 cpu.go:282] Add success.
W0320 05:03:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:03:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:03:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:03:13.420200  543705 net.go:648] Add success.
I0320 05:03:13.422786  543705 net.go:770] primary dev: ETH0
I0320 05:03:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:03:13.422811  543705 net.go:698] Add success.
I0320 05:03:13.468791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5408792b-3673-419c-a4df-581289389999","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:03:13.468825  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:03:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:03:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:03:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 05:03:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:03:14.456616  543705 disk_worker.go:494] system disk:vda1
I0320 05:03:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:03:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:03:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:03:22.677677  543705 disk_info.go:125] begin check local disk info of client
I0320 05:03:22.680240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:03:22.680247  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377600 0xc000377640]
E0320 05:03:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:03:23.409781  543705 memory.go:184] no items to output this cycle
I0320 05:03:23.409798  543705 cpu.go:275] no items to output this cycle
E0320 05:03:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:03:33.409787  543705 memory.go:184] no items to output this cycle
I0320 05:03:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 05:03:38.137734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:03:38.137741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:03:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:03:43.410635  543705 memory.go:191] Add success.
I0320 05:03:43.409834  543705 cpu.go:282] Add success.
I0320 05:03:43.420551  543705 net.go:648] Add success.
I0320 05:03:43.423608  543705 net.go:770] primary dev: ETH0
I0320 05:03:43.423621  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:03:43.423633  543705 net.go:698] Add success.
I0320 05:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:03:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:03:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:03:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:03:53.409779  543705 memory.go:184] no items to output this cycle
I0320 05:03:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 05:04:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:04:03.409785  543705 memory.go:184] no items to output this cycle
I0320 05:04:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 05:04:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:04:13.409829  543705 memory.go:191] Add success.
I0320 05:04:13.409833  543705 cpu.go:282] Add success.
W0320 05:04:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:04:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:04:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:04:13.420275  543705 net.go:648] Add success.
I0320 05:04:13.423391  543705 net.go:770] primary dev: ETH0
I0320 05:04:13.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:04:13.423415  543705 net.go:698] Add success.
I0320 05:04:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:04:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:04:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 05:04:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:04:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 05:04:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:04:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:04:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:04:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:04:16.472405  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:04:22.681045  543705 disk_info.go:125] begin check local disk info of client
I0320 05:04:22.683529  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:04:22.683536  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509b00 0xc000509b40]
E0320 05:04:23.407869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:04:23.407887  543705 memory.go:184] no items to output this cycle
I0320 05:04:23.407922  543705 cpu.go:275] no items to output this cycle
E0320 05:04:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:04:33.409770  543705 memory.go:184] no items to output this cycle
I0320 05:04:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 05:04:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:04:43.409783  543705 memory.go:191] Add success.
I0320 05:04:43.409802  543705 cpu.go:282] Add success.
I0320 05:04:43.419956  543705 net.go:648] Add success.
I0320 05:04:43.422849  543705 net.go:770] primary dev: ETH0
I0320 05:04:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:04:43.422874  543705 net.go:698] Add success.
I0320 05:04:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:04:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:04:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:04:53.410262  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:04:53.410278  543705 memory.go:184] no items to output this cycle
I0320 05:04:53.410277  543705 cpu.go:275] no items to output this cycle
E0320 05:05:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:05:03.409779  543705 memory.go:184] no items to output this cycle
I0320 05:05:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 05:05:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:05:13.409791  543705 memory.go:191] Add success.
I0320 05:05:13.409795  543705 cpu.go:282] Add success.
W0320 05:05:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:05:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:05:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:05:13.420094  543705 net.go:648] Add success.
I0320 05:05:13.422811  543705 net.go:770] primary dev: ETH0
I0320 05:05:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:05:13.422837  543705 net.go:698] Add success.
I0320 05:05:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:05:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:05:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 05:05:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:05:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 05:05:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:05:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:05:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:05:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:05:22.683624  543705 disk_info.go:125] begin check local disk info of client
I0320 05:05:22.686209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:05:22.686216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462ac0 0xc000462b00]
E0320 05:05:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:05:23.409754  543705 memory.go:184] no items to output this cycle
I0320 05:05:23.409795  543705 cpu.go:275] no items to output this cycle
E0320 05:05:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:05:33.409799  543705 memory.go:184] no items to output this cycle
I0320 05:05:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 05:05:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:05:43.409795  543705 memory.go:191] Add success.
I0320 05:05:43.409796  543705 cpu.go:282] Add success.
I0320 05:05:43.420119  543705 net.go:648] Add success.
I0320 05:05:43.423133  543705 net.go:770] primary dev: ETH0
I0320 05:05:43.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:05:43.423158  543705 net.go:698] Add success.
I0320 05:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:05:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:05:53.409768  543705 memory.go:184] no items to output this cycle
I0320 05:05:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 05:06:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:06:03.409787  543705 memory.go:184] no items to output this cycle
I0320 05:06:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 05:06:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:06:13.409779  543705 memory.go:191] Add success.
W0320 05:06:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:06:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:06:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:06:13.409829  543705 cpu.go:282] Add success.
I0320 05:06:13.420036  543705 net.go:648] Add success.
I0320 05:06:13.422898  543705 net.go:770] primary dev: ETH0
I0320 05:06:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:06:13.422921  543705 net.go:698] Add success.
I0320 05:06:13.464759  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f728b936-55a0-4a5b-bcf4-f52e90aa56e4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:06:13.464790  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:06:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:06:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 05:06:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:06:14.456914  543705 disk_worker.go:494] system disk:vda1
I0320 05:06:14.456946  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:06:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:06:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:06:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:06:22.686301  543705 disk_info.go:125] begin check local disk info of client
I0320 05:06:22.688812  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:06:22.688818  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba300 0xc0002ba340]
E0320 05:06:23.409485  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:06:23.409501  543705 memory.go:184] no items to output this cycle
I0320 05:06:23.409561  543705 cpu.go:275] no items to output this cycle
E0320 05:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:06:33.409799  543705 memory.go:184] no items to output this cycle
I0320 05:06:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 05:06:38.141395  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:06:38.141402  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:06:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:06:43.410613  543705 memory.go:191] Add success.
I0320 05:06:43.409797  543705 cpu.go:282] Add success.
I0320 05:06:43.420195  543705 net.go:770] primary dev: ETH0
I0320 05:06:43.420208  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:06:43.420223  543705 net.go:698] Add success.
I0320 05:06:43.420575  543705 net.go:648] Add success.
I0320 05:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:06:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:06:53.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:06:53.409882  543705 memory.go:184] no items to output this cycle
I0320 05:06:53.409910  543705 cpu.go:275] no items to output this cycle
E0320 05:07:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:07:03.409786  543705 memory.go:184] no items to output this cycle
I0320 05:07:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 05:07:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:07:13.409816  543705 memory.go:191] Add success.
I0320 05:07:13.409819  543705 cpu.go:282] Add success.
W0320 05:07:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:07:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:07:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:07:13.420162  543705 net.go:648] Add success.
I0320 05:07:13.423052  543705 net.go:770] primary dev: ETH0
I0320 05:07:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:07:13.423078  543705 net.go:698] Add success.
I0320 05:07:13.453667  543705 event_worker.go:152] Polling the log file for events...
W0320 05:07:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:07:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 05:07:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:07:14.455905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:07:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:07:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:07:14.456547  543705 disk_worker.go:494] system disk:vda1
I0320 05:07:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:07:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:07:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:07:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:07:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:07:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:07:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:07:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:07:22.689675  543705 disk_info.go:125] begin check local disk info of client
I0320 05:07:22.692203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:07:22.692210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
E0320 05:07:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:07:23.409796  543705 memory.go:184] no items to output this cycle
I0320 05:07:23.409802  543705 cpu.go:275] no items to output this cycle
E0320 05:07:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:07:33.409779  543705 cpu.go:275] no items to output this cycle
I0320 05:07:33.409780  543705 memory.go:184] no items to output this cycle
E0320 05:07:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:07:43.409816  543705 memory.go:191] Add success.
I0320 05:07:43.409832  543705 cpu.go:282] Add success.
I0320 05:07:43.419973  543705 net.go:648] Add success.
I0320 05:07:43.422568  543705 net.go:770] primary dev: ETH0
I0320 05:07:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:07:43.422592  543705 net.go:698] Add success.
I0320 05:07:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:07:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:07:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:07:53.409774  543705 memory.go:184] no items to output this cycle
I0320 05:07:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 05:08:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:08:03.409795  543705 memory.go:184] no items to output this cycle
I0320 05:08:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 05:08:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:08:13.409795  543705 memory.go:191] Add success.
I0320 05:08:13.409795  543705 cpu.go:282] Add success.
W0320 05:08:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:08:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:08:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:08:13.420136  543705 net.go:648] Add success.
I0320 05:08:13.422991  543705 net.go:770] primary dev: ETH0
I0320 05:08:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:08:13.423019  543705 net.go:698] Add success.
I0320 05:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:08:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:08:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 05:08:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:08:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 05:08:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:08:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:08:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:08:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:08:22.693100  543705 disk_info.go:125] begin check local disk info of client
I0320 05:08:22.695566  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:08:22.695572  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002baac0 0xc0002bab00]
E0320 05:08:23.407869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:08:23.407882  543705 memory.go:184] no items to output this cycle
I0320 05:08:23.407939  543705 cpu.go:275] no items to output this cycle
E0320 05:08:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:08:33.409811  543705 memory.go:184] no items to output this cycle
I0320 05:08:33.409822  543705 cpu.go:275] no items to output this cycle
E0320 05:08:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:08:43.409825  543705 memory.go:191] Add success.
I0320 05:08:43.409830  543705 cpu.go:282] Add success.
I0320 05:08:43.420060  543705 net.go:648] Add success.
I0320 05:08:43.422749  543705 net.go:770] primary dev: ETH0
I0320 05:08:43.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:08:43.422776  543705 net.go:698] Add success.
I0320 05:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:08:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:08:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:08:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:08:53.409785  543705 memory.go:184] no items to output this cycle
I0320 05:08:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 05:09:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:09:03.409794  543705 memory.go:184] no items to output this cycle
I0320 05:09:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 05:09:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:09:13.409806  543705 memory.go:191] Add success.
I0320 05:09:13.409811  543705 cpu.go:282] Add success.
W0320 05:09:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:09:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:09:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:09:13.420135  543705 net.go:648] Add success.
I0320 05:09:13.422632  543705 net.go:770] primary dev: ETH0
I0320 05:09:13.422644  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:09:13.422656  543705 net.go:698] Add success.
I0320 05:09:13.469193  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"74d01eab-e1fe-4562-b910-5dbb3efc0e52","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:09:13.469226  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:09:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:09:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:09:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 05:09:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:09:14.456663  543705 disk_worker.go:494] system disk:vda1
I0320 05:09:14.456694  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:09:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:09:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:09:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:09:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:09:22.696128  543705 disk_info.go:125] begin check local disk info of client
I0320 05:09:22.698711  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:09:22.698718  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed200 0xc0000ed240]
E0320 05:09:23.409354  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:09:23.409370  543705 memory.go:184] no items to output this cycle
I0320 05:09:23.409388  543705 cpu.go:275] no items to output this cycle
E0320 05:09:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:09:33.409810  543705 memory.go:184] no items to output this cycle
I0320 05:09:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 05:09:38.141738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:09:38.141755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:09:43.410560  543705 memory.go:191] Add success.
I0320 05:09:43.409815  543705 cpu.go:282] Add success.
I0320 05:09:43.420254  543705 net.go:648] Add success.
I0320 05:09:43.422843  543705 net.go:770] primary dev: ETH0
I0320 05:09:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:09:43.422873  543705 net.go:698] Add success.
I0320 05:09:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:09:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:09:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:09:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:09:53.409775  543705 memory.go:184] no items to output this cycle
I0320 05:09:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 05:10:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:10:03.409807  543705 cpu.go:275] no items to output this cycle
I0320 05:10:03.409811  543705 memory.go:184] no items to output this cycle
E0320 05:10:13.410041  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:10:13.410075  543705 memory.go:191] Add success.
I0320 05:10:13.410104  543705 cpu.go:282] Add success.
W0320 05:10:13.410141  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:10:13.410200  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:10:13.410203  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:10:13.419707  543705 net.go:648] Add success.
I0320 05:10:13.422114  543705 net.go:770] primary dev: ETH0
I0320 05:10:13.422126  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:10:13.422138  543705 net.go:698] Add success.
I0320 05:10:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:10:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:10:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 05:10:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:10:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 05:10:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:10:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:10:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:10:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:10:22.700135  543705 disk_info.go:125] begin check local disk info of client
I0320 05:10:22.702611  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:10:22.702617  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048f040 0xc00048f080]
E0320 05:10:23.407889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:10:23.407906  543705 memory.go:184] no items to output this cycle
I0320 05:10:23.407920  543705 cpu.go:275] no items to output this cycle
E0320 05:10:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:10:33.409780  543705 memory.go:184] no items to output this cycle
I0320 05:10:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 05:10:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:10:43.409811  543705 memory.go:191] Add success.
I0320 05:10:43.409818  543705 cpu.go:282] Add success.
I0320 05:10:43.419907  543705 net.go:648] Add success.
I0320 05:10:43.422733  543705 net.go:770] primary dev: ETH0
I0320 05:10:43.422746  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:10:43.422760  543705 net.go:698] Add success.
I0320 05:10:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:10:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:10:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:10:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:10:53.409806  543705 cpu.go:275] no items to output this cycle
I0320 05:10:53.409808  543705 memory.go:184] no items to output this cycle
E0320 05:11:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:11:03.409810  543705 memory.go:184] no items to output this cycle
I0320 05:11:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 05:11:13.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:11:13.409907  543705 memory.go:191] Add success.
W0320 05:11:13.409936  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:11:13.409948  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:11:13.409955  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:11:13.409971  543705 cpu.go:282] Add success.
I0320 05:11:13.419728  543705 net.go:648] Add success.
I0320 05:11:13.422458  543705 net.go:770] primary dev: ETH0
I0320 05:11:13.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:11:13.422486  543705 net.go:698] Add success.
I0320 05:11:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:11:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:11:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 05:11:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:11:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 05:11:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:11:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:11:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:11:22.703159  543705 disk_info.go:125] begin check local disk info of client
I0320 05:11:22.705686  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:11:22.705693  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004815c0 0xc000481600]
E0320 05:11:23.409289  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:11:23.409308  543705 memory.go:184] no items to output this cycle
I0320 05:11:23.409323  543705 cpu.go:275] no items to output this cycle
E0320 05:11:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:11:33.409778  543705 memory.go:184] no items to output this cycle
I0320 05:11:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 05:11:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:11:43.409816  543705 memory.go:191] Add success.
I0320 05:11:43.409824  543705 cpu.go:282] Add success.
I0320 05:11:43.420001  543705 net.go:648] Add success.
I0320 05:11:43.422738  543705 net.go:770] primary dev: ETH0
I0320 05:11:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:11:43.422765  543705 net.go:698] Add success.
I0320 05:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:11:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:11:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:11:53.409799  543705 memory.go:184] no items to output this cycle
I0320 05:11:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 05:12:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:12:03.409812  543705 memory.go:184] no items to output this cycle
I0320 05:12:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 05:12:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:12:13.409868  543705 memory.go:191] Add success.
W0320 05:12:13.409897  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:12:13.409916  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:12:13.409920  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:12:13.409990  543705 cpu.go:282] Add success.
I0320 05:12:13.419712  543705 net.go:648] Add success.
I0320 05:12:13.422291  543705 net.go:770] primary dev: ETH0
I0320 05:12:13.422306  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:12:13.422319  543705 net.go:698] Add success.
I0320 05:12:13.468636  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"efc93293-62cb-4099-9d6c-68c04ff04bb5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:12:13.468669  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 05:12:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:12:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 05:12:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:12:14.456808  543705 disk_worker.go:494] system disk:vda1
E0320 05:12:14.456823  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:12:14.456841  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:12:14.456845  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:12:14.456847  543705 custom_config.go:64] query custom config with name: gpu
E0320 05:12:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:12:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:12:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:12:16.457995  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:12:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:12:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:12:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:12:22.707175  543705 disk_info.go:125] begin check local disk info of client
I0320 05:12:22.709660  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:12:22.709669  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab840 0xc0001ab880]
E0320 05:12:23.407849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:12:23.407862  543705 memory.go:184] no items to output this cycle
I0320 05:12:23.407896  543705 cpu.go:275] no items to output this cycle
E0320 05:12:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:12:33.409776  543705 memory.go:184] no items to output this cycle
I0320 05:12:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 05:12:38.145418  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:12:38.145425  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:12:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:12:43.410645  543705 memory.go:191] Add success.
I0320 05:12:43.409807  543705 cpu.go:282] Add success.
I0320 05:12:43.420342  543705 net.go:648] Add success.
I0320 05:12:43.422958  543705 net.go:770] primary dev: ETH0
I0320 05:12:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:12:43.422998  543705 net.go:698] Add success.
I0320 05:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:12:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:12:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:12:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:12:53.409800  543705 memory.go:184] no items to output this cycle
I0320 05:12:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 05:13:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:13:03.409887  543705 cpu.go:275] no items to output this cycle
I0320 05:13:03.409905  543705 memory.go:184] no items to output this cycle
E0320 05:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:13:13.409796  543705 memory.go:191] Add success.
I0320 05:13:13.409798  543705 cpu.go:282] Add success.
W0320 05:13:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:13:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:13:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:13:13.420129  543705 net.go:648] Add success.
I0320 05:13:13.422652  543705 net.go:770] primary dev: ETH0
I0320 05:13:13.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:13:13.422677  543705 net.go:698] Add success.
I0320 05:13:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:13:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:13:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 05:13:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:13:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 05:13:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:13:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:13:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:13:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:13:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:13:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:13:22.711201  543705 disk_info.go:125] begin check local disk info of client
I0320 05:13:22.713806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:13:22.713813  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377580 0xc0003775c0]
E0320 05:13:23.409353  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:13:23.409369  543705 memory.go:184] no items to output this cycle
I0320 05:13:23.409370  543705 cpu.go:275] no items to output this cycle
E0320 05:13:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:13:33.409810  543705 memory.go:184] no items to output this cycle
I0320 05:13:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 05:13:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:13:43.409789  543705 memory.go:191] Add success.
I0320 05:13:43.409808  543705 cpu.go:282] Add success.
I0320 05:13:43.420090  543705 net.go:648] Add success.
I0320 05:13:43.422967  543705 net.go:770] primary dev: ETH0
I0320 05:13:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:13:43.422996  543705 net.go:698] Add success.
I0320 05:13:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:13:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:13:53.409799  543705 memory.go:184] no items to output this cycle
I0320 05:13:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 05:14:03.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:14:03.410016  543705 memory.go:184] no items to output this cycle
I0320 05:14:03.409936  543705 cpu.go:275] no items to output this cycle
E0320 05:14:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:14:13.409784  543705 memory.go:191] Add success.
W0320 05:14:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:14:13.409819  543705 cpu.go:282] Add success.
W0320 05:14:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:14:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:14:13.420331  543705 net.go:648] Add success.
I0320 05:14:13.422802  543705 net.go:770] primary dev: ETH0
I0320 05:14:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:14:13.422830  543705 net.go:698] Add success.
I0320 05:14:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:14:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:14:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 05:14:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:14:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 05:14:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:14:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:14:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:14:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:14:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:14:22.715206  543705 disk_info.go:125] begin check local disk info of client
I0320 05:14:22.717726  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:14:22.717732  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377c40 0xc000377c80]
E0320 05:14:23.409239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:14:23.409255  543705 memory.go:184] no items to output this cycle
I0320 05:14:23.409286  543705 cpu.go:275] no items to output this cycle
E0320 05:14:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:14:33.409803  543705 memory.go:184] no items to output this cycle
I0320 05:14:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 05:14:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:14:43.409782  543705 memory.go:191] Add success.
I0320 05:14:43.409812  543705 cpu.go:282] Add success.
I0320 05:14:43.419697  543705 net.go:770] primary dev: ETH0
I0320 05:14:43.419712  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:14:43.419728  543705 net.go:698] Add success.
I0320 05:14:43.420089  543705 net.go:648] Add success.
I0320 05:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:14:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:14:53.409828  543705 cpu.go:275] no items to output this cycle
E0320 05:14:53.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:14:53.409903  543705 memory.go:184] no items to output this cycle
E0320 05:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:15:03.409783  543705 memory.go:184] no items to output this cycle
I0320 05:15:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 05:15:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:15:13.409795  543705 memory.go:191] Add success.
I0320 05:15:13.409795  543705 cpu.go:282] Add success.
W0320 05:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:15:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:15:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:15:13.420127  543705 net.go:648] Add success.
I0320 05:15:13.422713  543705 net.go:770] primary dev: ETH0
I0320 05:15:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:15:13.422738  543705 net.go:698] Add success.
I0320 05:15:13.468139  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"64f3f2f6-7b8d-4c47-a551-f433dcafc3e4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:15:13.468182  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:15:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:15:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 05:15:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:15:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 05:15:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:15:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:15:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:15:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:15:16.472484  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:15:22.719227  543705 disk_info.go:125] begin check local disk info of client
I0320 05:15:22.721816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:15:22.721823  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a91c0 0xc0004a9200]
E0320 05:15:23.409314  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:15:23.409328  543705 memory.go:184] no items to output this cycle
I0320 05:15:23.409363  543705 cpu.go:275] no items to output this cycle
E0320 05:15:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:15:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 05:15:33.409798  543705 memory.go:184] no items to output this cycle
I0320 05:15:38.145732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:15:38.145739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:15:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:15:43.410594  543705 memory.go:191] Add success.
I0320 05:15:43.409827  543705 cpu.go:282] Add success.
I0320 05:15:43.420366  543705 net.go:648] Add success.
I0320 05:15:43.422924  543705 net.go:770] primary dev: ETH0
I0320 05:15:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:15:43.422954  543705 net.go:698] Add success.
I0320 05:15:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:15:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:15:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:15:53.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:15:53.409856  543705 memory.go:184] no items to output this cycle
I0320 05:15:53.409919  543705 cpu.go:275] no items to output this cycle
E0320 05:16:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:16:03.409776  543705 memory.go:184] no items to output this cycle
I0320 05:16:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 05:16:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:16:13.409787  543705 memory.go:191] Add success.
I0320 05:16:13.409810  543705 cpu.go:282] Add success.
W0320 05:16:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:16:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:16:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:16:13.420169  543705 net.go:648] Add success.
I0320 05:16:13.422920  543705 net.go:770] primary dev: ETH0
I0320 05:16:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:16:13.422948  543705 net.go:698] Add success.
I0320 05:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:16:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:16:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 05:16:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:16:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 05:16:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:16:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:16:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:16:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:16:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:16:16.472482  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:16:22.723264  543705 disk_info.go:125] begin check local disk info of client
I0320 05:16:22.725828  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:16:22.725833  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377e80 0xc000377ec0]
E0320 05:16:23.407518  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:16:23.407532  543705 memory.go:184] no items to output this cycle
I0320 05:16:23.407551  543705 cpu.go:275] no items to output this cycle
E0320 05:16:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:16:33.409772  543705 memory.go:184] no items to output this cycle
I0320 05:16:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 05:16:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:16:43.409812  543705 memory.go:191] Add success.
I0320 05:16:43.409824  543705 cpu.go:282] Add success.
I0320 05:16:43.419962  543705 net.go:648] Add success.
I0320 05:16:43.422926  543705 net.go:770] primary dev: ETH0
I0320 05:16:43.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:16:43.423039  543705 net.go:698] Add success.
I0320 05:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:16:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:16:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:16:53.409771  543705 memory.go:184] no items to output this cycle
I0320 05:16:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 05:17:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:17:03.409803  543705 memory.go:184] no items to output this cycle
I0320 05:17:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 05:17:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:17:13.409780  543705 memory.go:191] Add success.
I0320 05:17:13.409798  543705 cpu.go:282] Add success.
W0320 05:17:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:17:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:17:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:17:13.420065  543705 net.go:648] Add success.
I0320 05:17:13.422661  543705 net.go:770] primary dev: ETH0
I0320 05:17:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:17:13.422686  543705 net.go:698] Add success.
I0320 05:17:13.453222  543705 event_worker.go:152] Polling the log file for events...
W0320 05:17:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:17:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 05:17:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:17:14.456898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:17:14.456908  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:17:14.456914  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:17:14.456965  543705 disk_worker.go:494] system disk:vda1
I0320 05:17:14.457008  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:17:15.456846  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:17:15.456855  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:17:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:17:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:17:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:17:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:17:16.472314  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:17:22.725922  543705 disk_info.go:125] begin check local disk info of client
I0320 05:17:22.728485  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:17:22.728493  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a040 0xc00039a080]
E0320 05:17:23.407882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:17:23.407902  543705 memory.go:184] no items to output this cycle
I0320 05:17:23.407916  543705 cpu.go:275] no items to output this cycle
E0320 05:17:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:17:33.409769  543705 memory.go:184] no items to output this cycle
I0320 05:17:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 05:17:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:17:43.409782  543705 memory.go:191] Add success.
I0320 05:17:43.409805  543705 cpu.go:282] Add success.
I0320 05:17:43.419998  543705 net.go:648] Add success.
I0320 05:17:43.422776  543705 net.go:770] primary dev: ETH0
I0320 05:17:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:17:43.422805  543705 net.go:698] Add success.
I0320 05:17:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:17:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:17:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:17:53.409771  543705 memory.go:184] no items to output this cycle
I0320 05:17:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 05:18:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:18:03.409808  543705 memory.go:184] no items to output this cycle
I0320 05:18:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 05:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:18:13.409812  543705 memory.go:191] Add success.
I0320 05:18:13.409820  543705 cpu.go:282] Add success.
W0320 05:18:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:18:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:18:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:18:13.420140  543705 net.go:648] Add success.
I0320 05:18:13.422762  543705 net.go:770] primary dev: ETH0
I0320 05:18:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:18:13.422787  543705 net.go:698] Add success.
I0320 05:18:13.467952  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"139feac5-5c5c-477a-91c7-858365d3f723","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:18:13.467986  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:18:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:18:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:18:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 05:18:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:18:14.456696  543705 disk_worker.go:494] system disk:vda1
I0320 05:18:14.456728  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:18:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:18:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:18:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:18:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:18:22.728575  543705 disk_info.go:125] begin check local disk info of client
I0320 05:18:22.731104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:18:22.731111  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376480 0xc0003764c0]
E0320 05:18:23.409573  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:18:23.409574  543705 cpu.go:275] no items to output this cycle
I0320 05:18:23.409585  543705 memory.go:184] no items to output this cycle
E0320 05:18:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:18:33.409808  543705 memory.go:184] no items to output this cycle
I0320 05:18:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 05:18:38.145883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:18:38.145890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:18:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:18:43.410771  543705 memory.go:191] Add success.
I0320 05:18:43.409906  543705 cpu.go:282] Add success.
I0320 05:18:43.419746  543705 net.go:648] Add success.
I0320 05:18:43.422291  543705 net.go:770] primary dev: ETH0
I0320 05:18:43.422304  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:18:43.422316  543705 net.go:698] Add success.
I0320 05:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:18:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:18:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:18:53.409768  543705 memory.go:184] no items to output this cycle
I0320 05:18:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 05:19:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:19:03.409781  543705 memory.go:184] no items to output this cycle
I0320 05:19:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 05:19:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:19:13.409808  543705 memory.go:191] Add success.
I0320 05:19:13.409817  543705 cpu.go:282] Add success.
W0320 05:19:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:19:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:19:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:19:13.420147  543705 net.go:648] Add success.
I0320 05:19:13.423053  543705 net.go:770] primary dev: ETH0
I0320 05:19:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:19:13.423078  543705 net.go:698] Add success.
I0320 05:19:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:19:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:19:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 05:19:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:19:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 05:19:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:19:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:19:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:19:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:19:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:19:22.732300  543705 disk_info.go:125] begin check local disk info of client
I0320 05:19:22.734903  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:19:22.734911  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376280 0xc0003762c0]
E0320 05:19:23.409327  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:19:23.409340  543705 memory.go:184] no items to output this cycle
I0320 05:19:23.409379  543705 cpu.go:275] no items to output this cycle
E0320 05:19:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:19:33.409770  543705 memory.go:184] no items to output this cycle
I0320 05:19:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 05:19:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:19:43.409909  543705 memory.go:191] Add success.
I0320 05:19:43.409974  543705 cpu.go:282] Add success.
I0320 05:19:43.419725  543705 net.go:648] Add success.
I0320 05:19:43.422654  543705 net.go:770] primary dev: ETH0
I0320 05:19:43.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:19:43.422678  543705 net.go:698] Add success.
I0320 05:19:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:19:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:19:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:19:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:19:53.409783  543705 memory.go:184] no items to output this cycle
I0320 05:19:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 05:20:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:20:03.409777  543705 memory.go:184] no items to output this cycle
I0320 05:20:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 05:20:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:20:13.409779  543705 memory.go:191] Add success.
W0320 05:20:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:20:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:20:13.409829  543705 cpu.go:282] Add success.
I0320 05:20:13.420078  543705 net.go:648] Add success.
I0320 05:20:13.423011  543705 net.go:770] primary dev: ETH0
I0320 05:20:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:20:13.423043  543705 net.go:698] Add success.
I0320 05:20:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:20:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:20:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 05:20:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:20:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 05:20:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:20:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:20:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:20:22.736320  543705 disk_info.go:125] begin check local disk info of client
I0320 05:20:22.738860  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:20:22.738867  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9240 0xc0004a9280]
E0320 05:20:23.409281  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:20:23.409296  543705 memory.go:184] no items to output this cycle
I0320 05:20:23.409306  543705 cpu.go:275] no items to output this cycle
I0320 05:20:33.409923  543705 cpu.go:275] no items to output this cycle
E0320 05:20:33.409927  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:20:33.409946  543705 memory.go:184] no items to output this cycle
E0320 05:20:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:20:43.409819  543705 memory.go:191] Add success.
I0320 05:20:43.409828  543705 cpu.go:282] Add success.
I0320 05:20:43.419962  543705 net.go:648] Add success.
I0320 05:20:43.422853  543705 net.go:770] primary dev: ETH0
I0320 05:20:43.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:20:43.422882  543705 net.go:698] Add success.
I0320 05:20:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:20:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:20:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:20:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:20:53.409779  543705 memory.go:184] no items to output this cycle
I0320 05:20:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 05:21:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:21:03.409791  543705 memory.go:184] no items to output this cycle
I0320 05:21:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:21:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:21:13.409783  543705 memory.go:191] Add success.
I0320 05:21:13.409783  543705 cpu.go:282] Add success.
W0320 05:21:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:21:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:21:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:21:13.420057  543705 net.go:648] Add success.
I0320 05:21:13.422995  543705 net.go:770] primary dev: ETH0
I0320 05:21:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:21:13.423023  543705 net.go:698] Add success.
I0320 05:21:13.575413  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e8733d17-a07f-4414-9b5b-13ba6f0b5dc5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:21:13.575449  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:21:14.453972  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:21:14.455244  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:21:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0320 05:21:14.455257  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:21:14.456624  543705 disk_worker.go:494] system disk:vda1
I0320 05:21:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:21:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:21:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:21:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:21:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:21:22.740336  543705 disk_info.go:125] begin check local disk info of client
I0320 05:21:22.742876  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:21:22.742882  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb00 0xc0001abb40]
E0320 05:21:23.409377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:21:23.409392  543705 memory.go:184] no items to output this cycle
I0320 05:21:23.409488  543705 cpu.go:275] no items to output this cycle
E0320 05:21:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:21:33.409778  543705 memory.go:184] no items to output this cycle
I0320 05:21:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 05:21:38.149435  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:21:38.149441  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:21:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:21:43.410735  543705 memory.go:191] Add success.
I0320 05:21:43.409805  543705 cpu.go:282] Add success.
I0320 05:21:43.420478  543705 net.go:648] Add success.
I0320 05:21:43.423375  543705 net.go:770] primary dev: ETH0
I0320 05:21:43.423388  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:21:43.423400  543705 net.go:698] Add success.
I0320 05:21:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:21:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:21:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:21:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:21:53.409780  543705 memory.go:184] no items to output this cycle
I0320 05:21:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 05:22:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:22:03.409806  543705 memory.go:184] no items to output this cycle
I0320 05:22:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 05:22:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:22:13.409784  543705 memory.go:191] Add success.
I0320 05:22:13.409803  543705 cpu.go:282] Add success.
W0320 05:22:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:22:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:22:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:22:13.420094  543705 net.go:648] Add success.
I0320 05:22:13.423030  543705 net.go:770] primary dev: ETH0
I0320 05:22:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:22:13.423056  543705 net.go:698] Add success.
W0320 05:22:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:22:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 05:22:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:22:14.455909  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:22:14.455918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:22:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:22:14.456571  543705 disk_worker.go:494] system disk:vda1
I0320 05:22:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:22:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:22:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:22:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:22:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:22:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:22:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:22:16.472345  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:22:22.744352  543705 disk_info.go:125] begin check local disk info of client
I0320 05:22:22.746805  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:22:22.746811  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046de00 0xc00046de40]
E0320 05:22:23.407630  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:22:23.407645  543705 memory.go:184] no items to output this cycle
I0320 05:22:23.407684  543705 cpu.go:275] no items to output this cycle
E0320 05:22:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:22:33.409808  543705 memory.go:184] no items to output this cycle
I0320 05:22:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 05:22:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:22:43.409818  543705 memory.go:191] Add success.
I0320 05:22:43.409826  543705 cpu.go:282] Add success.
I0320 05:22:43.420049  543705 net.go:648] Add success.
I0320 05:22:43.422787  543705 net.go:770] primary dev: ETH0
I0320 05:22:43.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:22:43.422813  543705 net.go:698] Add success.
I0320 05:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:22:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:22:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:22:53.410275  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:22:53.410292  543705 memory.go:184] no items to output this cycle
I0320 05:22:53.410299  543705 cpu.go:275] no items to output this cycle
E0320 05:23:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:23:03.409790  543705 memory.go:184] no items to output this cycle
I0320 05:23:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 05:23:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:23:13.409799  543705 cpu.go:282] Add success.
I0320 05:23:13.409820  543705 memory.go:191] Add success.
W0320 05:23:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:23:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:23:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:23:13.420183  543705 net.go:648] Add success.
I0320 05:23:13.421060  543705 net.go:770] primary dev: ETH0
I0320 05:23:13.421073  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:23:13.421084  543705 net.go:698] Add success.
I0320 05:23:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:23:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:23:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 05:23:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:23:14.456626  543705 disk_worker.go:494] system disk:vda1
I0320 05:23:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:23:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:23:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:23:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:23:22.746901  543705 disk_info.go:125] begin check local disk info of client
I0320 05:23:22.749447  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:23:22.749453  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002718c0 0xc000271900]
E0320 05:23:23.407534  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:23:23.407551  543705 memory.go:184] no items to output this cycle
I0320 05:23:23.407565  543705 cpu.go:275] no items to output this cycle
E0320 05:23:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:23:33.409792  543705 memory.go:184] no items to output this cycle
I0320 05:23:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 05:23:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:23:43.409788  543705 memory.go:191] Add success.
I0320 05:23:43.409810  543705 cpu.go:282] Add success.
I0320 05:23:43.419956  543705 net.go:648] Add success.
I0320 05:23:43.422690  543705 net.go:770] primary dev: ETH0
I0320 05:23:43.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:23:43.422715  543705 net.go:698] Add success.
I0320 05:23:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:23:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:23:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:23:53.409776  543705 memory.go:184] no items to output this cycle
I0320 05:23:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 05:24:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:24:03.409791  543705 cpu.go:275] no items to output this cycle
I0320 05:24:03.409802  543705 memory.go:184] no items to output this cycle
E0320 05:24:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:24:13.409814  543705 memory.go:191] Add success.
I0320 05:24:13.409827  543705 cpu.go:282] Add success.
W0320 05:24:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:24:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:24:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:24:13.420131  543705 net.go:648] Add success.
I0320 05:24:13.422527  543705 net.go:770] primary dev: ETH0
I0320 05:24:13.422540  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:24:13.422551  543705 net.go:698] Add success.
I0320 05:24:13.468308  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"582e5053-326d-4947-a85b-c4fc8f84c725","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:24:13.468343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:24:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:24:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:24:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 05:24:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:24:14.456685  543705 disk_worker.go:494] system disk:vda1
I0320 05:24:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:24:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:24:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:24:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:24:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:24:22.749674  543705 disk_info.go:125] begin check local disk info of client
I0320 05:24:22.752129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:24:22.752135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003850c0 0xc000385100]
E0320 05:24:23.409497  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:24:23.409514  543705 memory.go:184] no items to output this cycle
I0320 05:24:23.409529  543705 cpu.go:275] no items to output this cycle
E0320 05:24:33.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:24:33.409876  543705 memory.go:184] no items to output this cycle
I0320 05:24:33.409877  543705 cpu.go:275] no items to output this cycle
I0320 05:24:38.149735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:24:38.149742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:24:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:24:43.410875  543705 memory.go:191] Add success.
I0320 05:24:43.409823  543705 cpu.go:282] Add success.
I0320 05:24:43.419863  543705 net.go:648] Add success.
I0320 05:24:43.422236  543705 net.go:770] primary dev: ETH0
I0320 05:24:43.422248  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:24:43.422261  543705 net.go:698] Add success.
I0320 05:24:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:24:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:24:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:24:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:24:53.409781  543705 memory.go:184] no items to output this cycle
I0320 05:24:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 05:25:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:25:03.409785  543705 memory.go:184] no items to output this cycle
I0320 05:25:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 05:25:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:25:13.409814  543705 memory.go:191] Add success.
I0320 05:25:13.409830  543705 cpu.go:282] Add success.
W0320 05:25:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:25:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:25:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:25:13.420110  543705 net.go:648] Add success.
I0320 05:25:13.422594  543705 net.go:770] primary dev: ETH0
I0320 05:25:13.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:25:13.422621  543705 net.go:698] Add success.
I0320 05:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:25:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:25:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 05:25:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:25:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 05:25:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:25:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:25:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:25:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:25:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:25:22.753422  543705 disk_info.go:125] begin check local disk info of client
I0320 05:25:22.756040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:25:22.756047  543705 disk_info.go:196] parse disk info done, disk is : [0xc00054a700 0xc00054a740]
E0320 05:25:23.409391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:25:23.409406  543705 memory.go:184] no items to output this cycle
I0320 05:25:23.409443  543705 cpu.go:275] no items to output this cycle
E0320 05:25:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:25:33.409788  543705 memory.go:184] no items to output this cycle
I0320 05:25:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 05:25:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:25:43.409825  543705 memory.go:191] Add success.
I0320 05:25:43.409829  543705 cpu.go:282] Add success.
I0320 05:25:43.419963  543705 net.go:648] Add success.
I0320 05:25:43.422515  543705 net.go:770] primary dev: ETH0
I0320 05:25:43.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:25:43.422539  543705 net.go:698] Add success.
I0320 05:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:25:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:25:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:25:53.409777  543705 memory.go:184] no items to output this cycle
I0320 05:25:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 05:26:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:26:03.409790  543705 memory.go:184] no items to output this cycle
I0320 05:26:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:26:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:26:13.409786  543705 memory.go:191] Add success.
I0320 05:26:13.409788  543705 cpu.go:282] Add success.
W0320 05:26:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:26:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:26:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:26:13.420091  543705 net.go:648] Add success.
I0320 05:26:13.422585  543705 net.go:770] primary dev: ETH0
I0320 05:26:13.422599  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:26:13.422613  543705 net.go:698] Add success.
I0320 05:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:26:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:26:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 05:26:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:26:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 05:26:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:26:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:26:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:26:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:26:22.756131  543705 disk_info.go:125] begin check local disk info of client
I0320 05:26:22.758612  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:26:22.758618  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6fc0 0xc0002a7000]
E0320 05:26:23.407881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:26:23.407899  543705 memory.go:184] no items to output this cycle
I0320 05:26:23.407916  543705 cpu.go:275] no items to output this cycle
E0320 05:26:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:26:33.409780  543705 memory.go:184] no items to output this cycle
I0320 05:26:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 05:26:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:26:43.409826  543705 memory.go:191] Add success.
I0320 05:26:43.409833  543705 cpu.go:282] Add success.
I0320 05:26:43.420156  543705 net.go:648] Add success.
I0320 05:26:43.422628  543705 net.go:770] primary dev: ETH0
I0320 05:26:43.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:26:43.422654  543705 net.go:698] Add success.
I0320 05:26:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:26:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:26:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:26:53.409774  543705 memory.go:184] no items to output this cycle
I0320 05:26:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 05:27:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:27:03.409778  543705 memory.go:184] no items to output this cycle
I0320 05:27:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 05:27:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:27:13.409789  543705 memory.go:191] Add success.
I0320 05:27:13.409807  543705 cpu.go:282] Add success.
W0320 05:27:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:27:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:27:13.420040  543705 net.go:648] Add success.
I0320 05:27:13.422584  543705 net.go:770] primary dev: ETH0
I0320 05:27:13.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:27:13.422608  543705 net.go:698] Add success.
I0320 05:27:13.428608  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 05:27:13.452772  543705 event_worker.go:152] Polling the log file for events...
I0320 05:27:13.464047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f48ecf93-e6d0-458f-94b9-a8acd848692a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:27:13.464080  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 05:27:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:27:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 05:27:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:27:14.455887  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:27:14.455897  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:27:14.455902  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:27:14.456668  543705 disk_worker.go:494] system disk:vda1
I0320 05:27:14.456704  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:27:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:27:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 05:27:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:27:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:27:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:27:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:27:16.472325  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:27:22.759428  543705 disk_info.go:125] begin check local disk info of client
I0320 05:27:22.762061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:27:22.762068  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002712c0 0xc000271300]
E0320 05:27:23.407515  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:27:23.407529  543705 memory.go:184] no items to output this cycle
I0320 05:27:23.407550  543705 cpu.go:275] no items to output this cycle
E0320 05:27:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:27:33.409797  543705 memory.go:184] no items to output this cycle
I0320 05:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 05:27:38.153478  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:27:38.153484  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:27:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:27:43.410608  543705 memory.go:191] Add success.
I0320 05:27:43.409829  543705 cpu.go:282] Add success.
I0320 05:27:43.420333  543705 net.go:648] Add success.
I0320 05:27:43.422694  543705 net.go:770] primary dev: ETH0
I0320 05:27:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:27:43.422725  543705 net.go:698] Add success.
I0320 05:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:27:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:27:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:27:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:27:53.409773  543705 memory.go:184] no items to output this cycle
I0320 05:27:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 05:28:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:28:03.409814  543705 memory.go:184] no items to output this cycle
I0320 05:28:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 05:28:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:28:13.409782  543705 memory.go:191] Add success.
I0320 05:28:13.409804  543705 cpu.go:282] Add success.
W0320 05:28:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:28:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:28:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:28:13.420133  543705 net.go:648] Add success.
I0320 05:28:13.423326  543705 net.go:770] primary dev: ETH0
I0320 05:28:13.423354  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:28:13.423368  543705 net.go:698] Add success.
I0320 05:28:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:28:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:28:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 05:28:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:28:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 05:28:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:28:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:28:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:28:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:28:22.763455  543705 disk_info.go:125] begin check local disk info of client
I0320 05:28:22.765919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:28:22.765925  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5580 0xc0000c55c0]
E0320 05:28:23.409211  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:28:23.409225  543705 memory.go:184] no items to output this cycle
I0320 05:28:23.409232  543705 cpu.go:275] no items to output this cycle
E0320 05:28:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:28:33.409789  543705 memory.go:184] no items to output this cycle
I0320 05:28:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 05:28:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:28:43.409810  543705 memory.go:191] Add success.
I0320 05:28:43.409824  543705 cpu.go:282] Add success.
I0320 05:28:43.419981  543705 net.go:648] Add success.
I0320 05:28:43.422693  543705 net.go:770] primary dev: ETH0
I0320 05:28:43.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:28:43.422718  543705 net.go:698] Add success.
I0320 05:28:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:28:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:28:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:28:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:28:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 05:28:53.409795  543705 memory.go:184] no items to output this cycle
E0320 05:29:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:29:03.409793  543705 memory.go:184] no items to output this cycle
I0320 05:29:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 05:29:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:29:13.409799  543705 cpu.go:282] Add success.
I0320 05:29:13.409807  543705 memory.go:191] Add success.
W0320 05:29:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:29:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:29:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:29:13.420120  543705 net.go:648] Add success.
I0320 05:29:13.422630  543705 net.go:770] primary dev: ETH0
I0320 05:29:13.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:29:13.422655  543705 net.go:698] Add success.
I0320 05:29:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:29:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:29:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 05:29:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:29:14.456492  543705 disk_worker.go:494] system disk:vda1
I0320 05:29:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:29:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:29:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:29:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:29:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:29:22.766012  543705 disk_info.go:125] begin check local disk info of client
I0320 05:29:22.768656  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:29:22.768663  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000eda40 0xc0000eda80]
E0320 05:29:23.407865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:29:23.407881  543705 memory.go:184] no items to output this cycle
I0320 05:29:23.407911  543705 cpu.go:275] no items to output this cycle
E0320 05:29:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:29:33.409781  543705 memory.go:184] no items to output this cycle
I0320 05:29:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 05:29:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:29:43.409797  543705 memory.go:191] Add success.
I0320 05:29:43.409817  543705 cpu.go:282] Add success.
I0320 05:29:43.419959  543705 net.go:648] Add success.
I0320 05:29:43.422606  543705 net.go:770] primary dev: ETH0
I0320 05:29:43.422621  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:29:43.422636  543705 net.go:698] Add success.
I0320 05:29:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:29:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:29:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:29:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:29:53.409808  543705 memory.go:184] no items to output this cycle
I0320 05:29:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 05:30:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:30:03.409814  543705 memory.go:184] no items to output this cycle
I0320 05:30:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 05:30:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:30:13.409821  543705 memory.go:191] Add success.
I0320 05:30:13.409831  543705 cpu.go:282] Add success.
W0320 05:30:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:30:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:30:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:30:13.420206  543705 net.go:648] Add success.
I0320 05:30:13.422714  543705 net.go:770] primary dev: ETH0
I0320 05:30:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:30:13.422745  543705 net.go:698] Add success.
I0320 05:30:13.470360  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"73028b81-61a4-4ff5-b4bb-6848ab9557bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:30:13.470392  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:30:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:30:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:30:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 05:30:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:30:14.456743  543705 disk_worker.go:494] system disk:vda1
I0320 05:30:14.456774  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:30:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:30:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:30:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:30:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:30:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:30:22.769672  543705 disk_info.go:125] begin check local disk info of client
I0320 05:30:22.772179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:30:22.772185  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036ad40 0xc00036ad80]
E0320 05:30:23.409436  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:30:23.409448  543705 memory.go:184] no items to output this cycle
I0320 05:30:23.409451  543705 cpu.go:275] no items to output this cycle
E0320 05:30:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:30:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 05:30:33.409793  543705 memory.go:184] no items to output this cycle
I0320 05:30:38.153741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:30:38.153748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:30:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:30:43.410687  543705 memory.go:191] Add success.
I0320 05:30:43.409803  543705 cpu.go:282] Add success.
I0320 05:30:43.420426  543705 net.go:648] Add success.
I0320 05:30:43.423230  543705 net.go:770] primary dev: ETH0
I0320 05:30:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:30:43.423261  543705 net.go:698] Add success.
I0320 05:30:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:30:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:30:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:30:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:30:53.409784  543705 memory.go:184] no items to output this cycle
I0320 05:30:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 05:31:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:31:03.409789  543705 memory.go:184] no items to output this cycle
I0320 05:31:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 05:31:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:31:13.409786  543705 memory.go:191] Add success.
W0320 05:31:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:31:13.409815  543705 cpu.go:282] Add success.
W0320 05:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:31:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:31:13.420127  543705 net.go:648] Add success.
I0320 05:31:13.422791  543705 net.go:770] primary dev: ETH0
I0320 05:31:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:31:13.422816  543705 net.go:698] Add success.
I0320 05:31:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:31:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:31:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 05:31:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:31:14.456608  543705 disk_worker.go:494] system disk:vda1
I0320 05:31:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:31:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:31:22.773502  543705 disk_info.go:125] begin check local disk info of client
I0320 05:31:22.776046  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:31:22.776053  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed300 0xc0000ed340]
E0320 05:31:23.409302  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:31:23.409319  543705 memory.go:184] no items to output this cycle
I0320 05:31:23.409338  543705 cpu.go:275] no items to output this cycle
I0320 05:31:33.409865  543705 cpu.go:275] no items to output this cycle
E0320 05:31:33.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:31:33.409887  543705 memory.go:184] no items to output this cycle
E0320 05:31:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:31:43.409790  543705 memory.go:191] Add success.
I0320 05:31:43.409809  543705 cpu.go:282] Add success.
I0320 05:31:43.419899  543705 net.go:648] Add success.
I0320 05:31:43.422483  543705 net.go:770] primary dev: ETH0
I0320 05:31:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:31:43.422513  543705 net.go:698] Add success.
I0320 05:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:31:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:31:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:31:53.409772  543705 memory.go:184] no items to output this cycle
I0320 05:31:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 05:32:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:32:03.409808  543705 memory.go:184] no items to output this cycle
I0320 05:32:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 05:32:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:32:13.409779  543705 memory.go:191] Add success.
W0320 05:32:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:32:13.409806  543705 cpu.go:282] Add success.
W0320 05:32:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:32:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:32:13.420071  543705 net.go:648] Add success.
I0320 05:32:13.423137  543705 net.go:770] primary dev: ETH0
I0320 05:32:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:32:13.423162  543705 net.go:698] Add success.
W0320 05:32:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:32:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 05:32:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:32:14.456901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:32:14.456910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:32:14.456916  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:32:14.457011  543705 disk_worker.go:494] system disk:vda1
I0320 05:32:14.457052  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:32:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:32:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:32:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:32:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:32:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:32:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:32:16.472328  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:32:22.777517  543705 disk_info.go:125] begin check local disk info of client
I0320 05:32:22.779978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:32:22.779984  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad00 0xc0001aad40]
E0320 05:32:23.409199  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:32:23.409216  543705 memory.go:184] no items to output this cycle
I0320 05:32:23.409231  543705 cpu.go:275] no items to output this cycle
E0320 05:32:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:32:33.409779  543705 memory.go:184] no items to output this cycle
I0320 05:32:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 05:32:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:32:43.409805  543705 memory.go:191] Add success.
I0320 05:32:43.409806  543705 cpu.go:282] Add success.
I0320 05:32:43.420103  543705 net.go:648] Add success.
I0320 05:32:43.422801  543705 net.go:770] primary dev: ETH0
I0320 05:32:43.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:32:43.422829  543705 net.go:698] Add success.
I0320 05:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:32:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:32:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:32:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:32:53.409793  543705 memory.go:184] no items to output this cycle
I0320 05:32:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 05:33:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:33:03.409785  543705 memory.go:184] no items to output this cycle
I0320 05:33:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 05:33:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:33:13.409788  543705 memory.go:191] Add success.
I0320 05:33:13.409806  543705 cpu.go:282] Add success.
W0320 05:33:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:33:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:33:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:33:13.420124  543705 net.go:648] Add success.
I0320 05:33:13.423321  543705 net.go:770] primary dev: ETH0
I0320 05:33:13.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:33:13.423345  543705 net.go:698] Add success.
I0320 05:33:13.468454  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea7afdc7-2ca7-46b5-817d-1d959fc9a056","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:33:13.468488  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:33:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:33:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 05:33:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:33:14.456695  543705 disk_worker.go:494] system disk:vda1
I0320 05:33:14.456731  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:33:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:33:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:33:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:33:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:33:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:33:22.781551  543705 disk_info.go:125] begin check local disk info of client
I0320 05:33:22.784168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:33:22.784175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000dcc00 0xc0000dcc40]
E0320 05:33:23.409375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:33:23.409394  543705 memory.go:184] no items to output this cycle
I0320 05:33:23.409405  543705 cpu.go:275] no items to output this cycle
E0320 05:33:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:33:33.409784  543705 memory.go:184] no items to output this cycle
I0320 05:33:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 05:33:38.157482  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:33:38.157490  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:33:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:33:43.410725  543705 memory.go:191] Add success.
I0320 05:33:43.409827  543705 cpu.go:282] Add success.
I0320 05:33:43.420413  543705 net.go:648] Add success.
I0320 05:33:43.423009  543705 net.go:770] primary dev: ETH0
I0320 05:33:43.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:33:43.423034  543705 net.go:698] Add success.
I0320 05:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:33:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:33:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:33:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:33:53.409775  543705 memory.go:184] no items to output this cycle
I0320 05:33:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 05:34:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:34:03.409786  543705 memory.go:184] no items to output this cycle
I0320 05:34:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:34:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:34:13.409820  543705 memory.go:191] Add success.
I0320 05:34:13.409823  543705 cpu.go:282] Add success.
W0320 05:34:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:34:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:34:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:34:13.420096  543705 net.go:648] Add success.
I0320 05:34:13.422772  543705 net.go:770] primary dev: ETH0
I0320 05:34:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:34:13.422796  543705 net.go:698] Add success.
I0320 05:34:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:34:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:34:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 05:34:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:34:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 05:34:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:34:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:34:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:34:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:34:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:34:22.785571  543705 disk_info.go:125] begin check local disk info of client
I0320 05:34:22.788030  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:34:22.788037  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305240 0xc000305280]
E0320 05:34:23.407524  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:34:23.407535  543705 memory.go:184] no items to output this cycle
I0320 05:34:23.407535  543705 cpu.go:275] no items to output this cycle
E0320 05:34:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:34:33.409803  543705 memory.go:184] no items to output this cycle
I0320 05:34:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 05:34:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:34:43.409794  543705 memory.go:191] Add success.
I0320 05:34:43.409814  543705 cpu.go:282] Add success.
I0320 05:34:43.419955  543705 net.go:648] Add success.
I0320 05:34:43.422941  543705 net.go:770] primary dev: ETH0
I0320 05:34:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:34:43.422967  543705 net.go:698] Add success.
I0320 05:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:34:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:34:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:34:53.409765  543705 memory.go:184] no items to output this cycle
I0320 05:34:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 05:35:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:35:03.409810  543705 memory.go:184] no items to output this cycle
I0320 05:35:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 05:35:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:35:13.409780  543705 memory.go:191] Add success.
I0320 05:35:13.409799  543705 cpu.go:282] Add success.
W0320 05:35:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:35:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:35:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:35:13.420247  543705 net.go:648] Add success.
I0320 05:35:13.422793  543705 net.go:770] primary dev: ETH0
I0320 05:35:13.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:35:13.422818  543705 net.go:698] Add success.
I0320 05:35:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:35:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:35:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 05:35:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:35:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 05:35:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:35:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:35:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:35:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:35:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:35:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:35:22.789588  543705 disk_info.go:125] begin check local disk info of client
I0320 05:35:22.792112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:35:22.792120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b00 0xc0000c4b40]
E0320 05:35:23.409295  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:35:23.409314  543705 memory.go:184] no items to output this cycle
I0320 05:35:23.409329  543705 cpu.go:275] no items to output this cycle
E0320 05:35:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:35:33.409771  543705 memory.go:184] no items to output this cycle
I0320 05:35:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 05:35:43.409917  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:35:43.409926  543705 cpu.go:282] Add success.
I0320 05:35:43.409969  543705 memory.go:191] Add success.
I0320 05:35:43.419710  543705 net.go:648] Add success.
I0320 05:35:43.422518  543705 net.go:770] primary dev: ETH0
I0320 05:35:43.422531  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:35:43.422542  543705 net.go:698] Add success.
I0320 05:35:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:35:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:35:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:35:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:35:53.409785  543705 memory.go:184] no items to output this cycle
I0320 05:35:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 05:36:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:36:03.409789  543705 memory.go:184] no items to output this cycle
I0320 05:36:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 05:36:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:36:13.409791  543705 memory.go:191] Add success.
I0320 05:36:13.409799  543705 cpu.go:282] Add success.
W0320 05:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:36:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:36:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:36:13.419887  543705 net.go:770] primary dev: ETH0
I0320 05:36:13.419901  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:36:13.419913  543705 net.go:698] Add success.
I0320 05:36:13.420143  543705 net.go:648] Add success.
I0320 05:36:13.470247  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c8b8e07e-38fb-4b27-a978-b2852083dd56","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:36:13.470280  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:36:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:36:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:36:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 05:36:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:36:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 05:36:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:36:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:36:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:36:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:36:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:36:22.792203  543705 disk_info.go:125] begin check local disk info of client
I0320 05:36:22.794740  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:36:22.794747  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305340 0xc000305380]
E0320 05:36:23.408876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:36:23.408891  543705 memory.go:184] no items to output this cycle
I0320 05:36:23.408899  543705 cpu.go:275] no items to output this cycle
E0320 05:36:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:36:33.409775  543705 memory.go:184] no items to output this cycle
I0320 05:36:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 05:36:38.157731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:36:38.157738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:36:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:36:43.409965  543705 cpu.go:282] Add success.
I0320 05:36:43.410696  543705 memory.go:191] Add success.
I0320 05:36:43.419720  543705 net.go:648] Add success.
I0320 05:36:43.422638  543705 net.go:770] primary dev: ETH0
I0320 05:36:43.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:36:43.422662  543705 net.go:698] Add success.
I0320 05:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:36:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:36:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:36:53.409776  543705 memory.go:184] no items to output this cycle
I0320 05:36:53.409776  543705 cpu.go:275] no items to output this cycle
E0320 05:37:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:37:03.409784  543705 memory.go:184] no items to output this cycle
I0320 05:37:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 05:37:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:37:13.409818  543705 memory.go:191] Add success.
I0320 05:37:13.409827  543705 cpu.go:282] Add success.
W0320 05:37:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:37:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:37:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:37:13.420221  543705 net.go:648] Add success.
I0320 05:37:13.422824  543705 net.go:770] primary dev: ETH0
I0320 05:37:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:37:13.422857  543705 net.go:698] Add success.
I0320 05:37:13.453444  543705 event_worker.go:152] Polling the log file for events...
W0320 05:37:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:37:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 05:37:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:37:14.456755  543705 disk_worker.go:494] system disk:vda1
I0320 05:37:14.456794  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:37:14.457119  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:37:14.457127  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:37:14.457132  543705 custom_config.go:64] query custom config with name: gpu
E0320 05:37:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:37:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:37:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:37:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:37:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:37:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:37:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:37:22.796628  543705 disk_info.go:125] begin check local disk info of client
I0320 05:37:22.799175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:37:22.799182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001efac0 0xc0001efb00]
E0320 05:37:23.409333  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:37:23.409349  543705 memory.go:184] no items to output this cycle
I0320 05:37:23.409357  543705 cpu.go:275] no items to output this cycle
E0320 05:37:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:37:33.409781  543705 memory.go:184] no items to output this cycle
I0320 05:37:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 05:37:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:37:43.409795  543705 memory.go:191] Add success.
I0320 05:37:43.409795  543705 cpu.go:282] Add success.
I0320 05:37:43.420072  543705 net.go:648] Add success.
I0320 05:37:43.423287  543705 net.go:770] primary dev: ETH0
I0320 05:37:43.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:37:43.423312  543705 net.go:698] Add success.
I0320 05:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:37:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:37:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:37:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:37:53.409768  543705 memory.go:184] no items to output this cycle
I0320 05:37:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 05:38:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:38:03.409789  543705 memory.go:184] no items to output this cycle
I0320 05:38:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 05:38:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:38:13.409778  543705 memory.go:191] Add success.
W0320 05:38:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:38:13.409808  543705 cpu.go:282] Add success.
W0320 05:38:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:38:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:38:13.420162  543705 net.go:648] Add success.
I0320 05:38:13.422813  543705 net.go:770] primary dev: ETH0
I0320 05:38:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:38:13.422839  543705 net.go:698] Add success.
I0320 05:38:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:38:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:38:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 05:38:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:38:14.456608  543705 disk_worker.go:494] system disk:vda1
I0320 05:38:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:38:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:38:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:38:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:38:22.800629  543705 disk_info.go:125] begin check local disk info of client
I0320 05:38:22.803106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:38:22.803112  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e500 0xc00034e540]
E0320 05:38:23.409189  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:38:23.409201  543705 memory.go:184] no items to output this cycle
I0320 05:38:23.409240  543705 cpu.go:275] no items to output this cycle
E0320 05:38:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:38:33.409795  543705 memory.go:184] no items to output this cycle
I0320 05:38:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 05:38:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:38:43.409795  543705 memory.go:191] Add success.
I0320 05:38:43.409795  543705 cpu.go:282] Add success.
I0320 05:38:43.420054  543705 net.go:648] Add success.
I0320 05:38:43.423124  543705 net.go:770] primary dev: ETH0
I0320 05:38:43.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:38:43.423327  543705 net.go:698] Add success.
I0320 05:38:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:38:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:38:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:38:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:38:53.409781  543705 memory.go:184] no items to output this cycle
I0320 05:38:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 05:39:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:39:03.409796  543705 memory.go:184] no items to output this cycle
I0320 05:39:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 05:39:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:39:13.409827  543705 memory.go:191] Add success.
I0320 05:39:13.409832  543705 cpu.go:282] Add success.
W0320 05:39:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:39:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:39:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:39:13.420113  543705 net.go:648] Add success.
I0320 05:39:13.422960  543705 net.go:770] primary dev: ETH0
I0320 05:39:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:39:13.422986  543705 net.go:698] Add success.
I0320 05:39:13.865943  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3a16e799-f3e4-444b-bd9c-495b49987bde","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:39:13.865978  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:39:14.454731  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:39:14.454903  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:39:14.454913  543705 disk_worker.go:708] disk space is not compliant
W0320 05:39:14.454916  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:39:14.456252  543705 disk_worker.go:494] system disk:vda1
I0320 05:39:14.456297  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:39:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:39:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:39:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:39:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:39:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:39:22.803200  543705 disk_info.go:125] begin check local disk info of client
I0320 05:39:22.805836  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:39:22.805843  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4980 0xc0000c49c0]
E0320 05:39:23.408912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:39:23.408927  543705 memory.go:184] no items to output this cycle
I0320 05:39:23.408963  543705 cpu.go:275] no items to output this cycle
E0320 05:39:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:39:33.409801  543705 memory.go:184] no items to output this cycle
I0320 05:39:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 05:39:38.161488  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:39:38.161495  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:39:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:39:43.410719  543705 memory.go:191] Add success.
I0320 05:39:43.409831  543705 cpu.go:282] Add success.
I0320 05:39:43.420448  543705 net.go:648] Add success.
I0320 05:39:43.423391  543705 net.go:770] primary dev: ETH0
I0320 05:39:43.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:39:43.423420  543705 net.go:698] Add success.
I0320 05:39:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:39:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:39:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:39:53.409808  543705 memory.go:184] no items to output this cycle
I0320 05:39:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 05:40:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:40:03.409803  543705 memory.go:184] no items to output this cycle
I0320 05:40:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 05:40:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:40:13.409797  543705 memory.go:191] Add success.
I0320 05:40:13.409798  543705 cpu.go:282] Add success.
W0320 05:40:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:40:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:40:13.420157  543705 net.go:648] Add success.
I0320 05:40:13.422767  543705 net.go:770] primary dev: ETH0
I0320 05:40:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:40:13.422793  543705 net.go:698] Add success.
I0320 05:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:40:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:40:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 05:40:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:40:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 05:40:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:40:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:40:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:40:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:40:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:40:22.807675  543705 disk_info.go:125] begin check local disk info of client
I0320 05:40:22.810147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:40:22.810153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5480 0xc0000c54c0]
E0320 05:40:23.409232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:40:23.409249  543705 memory.go:184] no items to output this cycle
I0320 05:40:23.409265  543705 cpu.go:275] no items to output this cycle
E0320 05:40:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:40:33.409819  543705 memory.go:184] no items to output this cycle
I0320 05:40:33.409836  543705 cpu.go:275] no items to output this cycle
E0320 05:40:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:40:43.409831  543705 memory.go:191] Add success.
I0320 05:40:43.409838  543705 cpu.go:282] Add success.
I0320 05:40:43.419887  543705 net.go:648] Add success.
I0320 05:40:43.422562  543705 net.go:770] primary dev: ETH0
I0320 05:40:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:40:43.422588  543705 net.go:698] Add success.
I0320 05:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:40:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:40:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:40:53.409817  543705 memory.go:184] no items to output this cycle
I0320 05:40:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 05:41:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:41:03.409820  543705 memory.go:184] no items to output this cycle
I0320 05:41:03.409832  543705 cpu.go:275] no items to output this cycle
E0320 05:41:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:41:13.409818  543705 memory.go:191] Add success.
I0320 05:41:13.409825  543705 cpu.go:282] Add success.
W0320 05:41:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:41:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:41:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:41:13.420158  543705 net.go:648] Add success.
I0320 05:41:13.423089  543705 net.go:770] primary dev: ETH0
I0320 05:41:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:41:13.423117  543705 net.go:698] Add success.
I0320 05:41:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:41:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:41:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 05:41:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:41:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 05:41:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:41:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:41:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:41:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:41:16.472433  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:41:22.811694  543705 disk_info.go:125] begin check local disk info of client
I0320 05:41:22.814279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:41:22.814287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005559c0 0xc000555a00]
E0320 05:41:23.409311  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:41:23.409325  543705 memory.go:184] no items to output this cycle
I0320 05:41:23.409361  543705 cpu.go:275] no items to output this cycle
E0320 05:41:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:41:33.409805  543705 memory.go:184] no items to output this cycle
I0320 05:41:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 05:41:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:41:43.409811  543705 memory.go:191] Add success.
I0320 05:41:43.409820  543705 cpu.go:282] Add success.
I0320 05:41:43.419902  543705 net.go:648] Add success.
I0320 05:41:43.422681  543705 net.go:770] primary dev: ETH0
I0320 05:41:43.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:41:43.422715  543705 net.go:698] Add success.
I0320 05:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:41:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:41:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:41:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:41:53.409773  543705 memory.go:184] no items to output this cycle
I0320 05:41:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 05:42:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:42:03.409785  543705 memory.go:184] no items to output this cycle
I0320 05:42:03.409914  543705 cpu.go:275] no items to output this cycle
E0320 05:42:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:42:13.409798  543705 memory.go:191] Add success.
I0320 05:42:13.409800  543705 cpu.go:282] Add success.
W0320 05:42:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:42:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:42:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:42:13.420186  543705 net.go:648] Add success.
I0320 05:42:13.423018  543705 net.go:770] primary dev: ETH0
I0320 05:42:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:42:13.423043  543705 net.go:698] Add success.
I0320 05:42:13.464556  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"13268bb6-f15b-4491-b2af-081431c34c9e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:42:13.464597  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 05:42:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:42:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 05:42:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:42:14.456854  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0320 05:42:14.456870  543705 disk_worker.go:494] system disk:vda1
E0320 05:42:14.456863  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:42:14.456880  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:42:14.456908  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:42:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:42:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:42:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:42:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:42:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:42:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:42:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:42:22.814370  543705 disk_info.go:125] begin check local disk info of client
I0320 05:42:22.816784  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:42:22.816790  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf80 0xc0001aafc0]
E0320 05:42:23.408826  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:42:23.408841  543705 memory.go:184] no items to output this cycle
I0320 05:42:23.408855  543705 cpu.go:275] no items to output this cycle
E0320 05:42:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:42:33.409795  543705 memory.go:184] no items to output this cycle
I0320 05:42:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 05:42:38.161732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:42:38.161739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:42:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:42:43.410674  543705 memory.go:191] Add success.
I0320 05:42:43.409810  543705 cpu.go:282] Add success.
I0320 05:42:43.420368  543705 net.go:648] Add success.
I0320 05:42:43.423137  543705 net.go:770] primary dev: ETH0
I0320 05:42:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:42:43.423165  543705 net.go:698] Add success.
I0320 05:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:42:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:42:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:42:53.409763  543705 memory.go:184] no items to output this cycle
I0320 05:42:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 05:43:03.409932  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:43:03.410001  543705 cpu.go:275] no items to output this cycle
I0320 05:43:03.410039  543705 memory.go:184] no items to output this cycle
E0320 05:43:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:43:13.409783  543705 memory.go:191] Add success.
W0320 05:43:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:43:13.409815  543705 cpu.go:282] Add success.
W0320 05:43:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:43:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:43:13.419940  543705 net.go:770] primary dev: ETH0
I0320 05:43:13.419951  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:43:13.419964  543705 net.go:698] Add success.
I0320 05:43:13.420205  543705 net.go:648] Add success.
I0320 05:43:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:43:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:43:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 05:43:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:43:14.456492  543705 disk_worker.go:494] system disk:vda1
I0320 05:43:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:43:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:43:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:43:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:43:22.817678  543705 disk_info.go:125] begin check local disk info of client
I0320 05:43:22.820190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:43:22.820197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed600 0xc0000ed640]
E0320 05:43:23.409229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:43:23.409246  543705 memory.go:184] no items to output this cycle
I0320 05:43:23.409260  543705 cpu.go:275] no items to output this cycle
E0320 05:43:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:43:33.409802  543705 memory.go:184] no items to output this cycle
I0320 05:43:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 05:43:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:43:43.409792  543705 memory.go:191] Add success.
I0320 05:43:43.409791  543705 cpu.go:282] Add success.
I0320 05:43:43.419906  543705 net.go:648] Add success.
I0320 05:43:43.422505  543705 net.go:770] primary dev: ETH0
I0320 05:43:43.422520  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:43:43.422533  543705 net.go:698] Add success.
I0320 05:43:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:43:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:43:53.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:43:53.410284  543705 memory.go:184] no items to output this cycle
I0320 05:43:53.410286  543705 cpu.go:275] no items to output this cycle
E0320 05:44:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:44:03.409780  543705 memory.go:184] no items to output this cycle
I0320 05:44:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 05:44:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:44:13.409790  543705 memory.go:191] Add success.
I0320 05:44:13.409791  543705 cpu.go:282] Add success.
W0320 05:44:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:44:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:44:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:44:13.420122  543705 net.go:648] Add success.
I0320 05:44:13.422757  543705 net.go:770] primary dev: ETH0
I0320 05:44:13.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:44:13.422782  543705 net.go:698] Add success.
I0320 05:44:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:44:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:44:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 05:44:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:44:14.456615  543705 disk_worker.go:494] system disk:vda1
I0320 05:44:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:44:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:44:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:44:22.821673  543705 disk_info.go:125] begin check local disk info of client
I0320 05:44:22.824099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:44:22.824105  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b580 0xc00007b5c0]
E0320 05:44:23.409115  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:44:23.409130  543705 memory.go:184] no items to output this cycle
I0320 05:44:23.409144  543705 cpu.go:275] no items to output this cycle
E0320 05:44:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:44:33.409783  543705 memory.go:184] no items to output this cycle
I0320 05:44:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 05:44:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:44:43.409796  543705 memory.go:191] Add success.
I0320 05:44:43.409797  543705 cpu.go:282] Add success.
I0320 05:44:43.419875  543705 net.go:648] Add success.
I0320 05:44:43.422233  543705 net.go:770] primary dev: ETH0
I0320 05:44:43.422246  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:44:43.422261  543705 net.go:698] Add success.
I0320 05:44:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:44:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:44:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:44:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:44:53.409784  543705 memory.go:184] no items to output this cycle
I0320 05:44:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 05:45:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:45:03.409787  543705 memory.go:184] no items to output this cycle
I0320 05:45:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 05:45:13.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:45:13.409912  543705 memory.go:191] Add success.
W0320 05:45:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:45:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:45:13.409963  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:45:13.409970  543705 cpu.go:282] Add success.
I0320 05:45:13.419706  543705 net.go:648] Add success.
I0320 05:45:13.422269  543705 net.go:770] primary dev: ETH0
I0320 05:45:13.422281  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:45:13.422292  543705 net.go:698] Add success.
I0320 05:45:13.467836  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7862e44e-dee9-4d00-b07b-af09fb8f522d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:45:13.467867  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:45:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:45:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:45:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 05:45:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:45:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 05:45:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:45:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:45:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:45:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:45:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:45:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:45:22.825678  543705 disk_info.go:125] begin check local disk info of client
I0320 05:45:22.828220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:45:22.828227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5740 0xc0000c5780]
E0320 05:45:23.407515  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:45:23.407529  543705 memory.go:184] no items to output this cycle
I0320 05:45:23.407550  543705 cpu.go:275] no items to output this cycle
E0320 05:45:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:45:33.409803  543705 memory.go:184] no items to output this cycle
I0320 05:45:33.409815  543705 cpu.go:275] no items to output this cycle
I0320 05:45:38.165507  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:45:38.165514  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:45:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:45:43.410623  543705 memory.go:191] Add success.
I0320 05:45:43.409809  543705 cpu.go:282] Add success.
I0320 05:45:43.420373  543705 net.go:648] Add success.
I0320 05:45:43.423156  543705 net.go:770] primary dev: ETH0
I0320 05:45:43.423168  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:45:43.423181  543705 net.go:698] Add success.
I0320 05:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:45:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:45:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:45:53.409766  543705 memory.go:184] no items to output this cycle
I0320 05:45:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 05:46:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:46:03.409782  543705 memory.go:184] no items to output this cycle
I0320 05:46:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 05:46:13.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:46:13.409888  543705 memory.go:191] Add success.
W0320 05:46:13.409924  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:46:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:46:13.409954  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:46:13.409973  543705 cpu.go:282] Add success.
I0320 05:46:13.419722  543705 net.go:648] Add success.
I0320 05:46:13.422636  543705 net.go:770] primary dev: ETH0
I0320 05:46:13.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:46:13.422660  543705 net.go:698] Add success.
I0320 05:46:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:46:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:46:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 05:46:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:46:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 05:46:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:46:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:46:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:46:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:46:16.472436  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:46:22.829675  543705 disk_info.go:125] begin check local disk info of client
I0320 05:46:22.832158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:46:22.832165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2cc0 0xc0002b2d00]
E0320 05:46:23.409109  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:46:23.409124  543705 memory.go:184] no items to output this cycle
I0320 05:46:23.409149  543705 cpu.go:275] no items to output this cycle
E0320 05:46:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:46:33.409779  543705 memory.go:184] no items to output this cycle
I0320 05:46:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 05:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:46:43.409793  543705 memory.go:191] Add success.
I0320 05:46:43.409793  543705 cpu.go:282] Add success.
I0320 05:46:43.419860  543705 net.go:648] Add success.
I0320 05:46:43.422602  543705 net.go:770] primary dev: ETH0
I0320 05:46:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:46:43.422626  543705 net.go:698] Add success.
I0320 05:46:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:46:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:46:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:46:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:46:53.409789  543705 memory.go:184] no items to output this cycle
I0320 05:46:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 05:47:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:47:03.409789  543705 memory.go:184] no items to output this cycle
I0320 05:47:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:47:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:47:13.409788  543705 memory.go:191] Add success.
I0320 05:47:13.409792  543705 cpu.go:282] Add success.
W0320 05:47:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:47:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:47:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:47:13.420192  543705 net.go:648] Add success.
I0320 05:47:13.423165  543705 net.go:770] primary dev: ETH0
I0320 05:47:13.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:47:13.423189  543705 net.go:698] Add success.
I0320 05:47:13.452880  543705 event_worker.go:152] Polling the log file for events...
W0320 05:47:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 05:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:47:14.455920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:47:14.455928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:47:14.455934  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:47:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 05:47:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:47:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:47:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:47:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:47:16.457989  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:47:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:47:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:47:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:47:22.833678  543705 disk_info.go:125] begin check local disk info of client
I0320 05:47:22.836160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:47:22.836167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8640 0xc0003c8680]
E0320 05:47:23.407525  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:47:23.407539  543705 memory.go:184] no items to output this cycle
I0320 05:47:23.407551  543705 cpu.go:275] no items to output this cycle
E0320 05:47:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:47:33.409768  543705 memory.go:184] no items to output this cycle
I0320 05:47:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 05:47:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:47:43.409808  543705 memory.go:191] Add success.
I0320 05:47:43.409817  543705 cpu.go:282] Add success.
I0320 05:47:43.419852  543705 net.go:648] Add success.
I0320 05:47:43.422378  543705 net.go:770] primary dev: ETH0
I0320 05:47:43.422391  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:47:43.422404  543705 net.go:698] Add success.
I0320 05:47:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:47:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:47:53.409795  543705 memory.go:184] no items to output this cycle
I0320 05:47:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 05:48:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:48:03.409784  543705 memory.go:184] no items to output this cycle
I0320 05:48:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 05:48:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:48:13.409811  543705 memory.go:191] Add success.
I0320 05:48:13.409815  543705 cpu.go:282] Add success.
W0320 05:48:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:48:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:48:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:48:13.420583  543705 net.go:648] Add success.
I0320 05:48:13.423377  543705 net.go:770] primary dev: ETH0
I0320 05:48:13.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:48:13.423402  543705 net.go:698] Add success.
I0320 05:48:13.463098  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe146765-ccd5-4fbe-9116-4b202a2d5f42","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:48:13.463128  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:48:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:48:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:48:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 05:48:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:48:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 05:48:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:48:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:48:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:48:16.472366  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:48:22.837676  543705 disk_info.go:125] begin check local disk info of client
I0320 05:48:22.840133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:48:22.840140  543705 disk_info.go:196] parse disk info done, disk is : [0xc000518b80 0xc000518bc0]
E0320 05:48:23.409069  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:48:23.409087  543705 memory.go:184] no items to output this cycle
I0320 05:48:23.409099  543705 cpu.go:275] no items to output this cycle
E0320 05:48:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:48:33.409781  543705 memory.go:184] no items to output this cycle
I0320 05:48:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 05:48:38.165734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:48:38.165740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:48:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:48:43.410737  543705 memory.go:191] Add success.
I0320 05:48:43.409793  543705 cpu.go:282] Add success.
I0320 05:48:43.420440  543705 net.go:648] Add success.
I0320 05:48:43.423047  543705 net.go:770] primary dev: ETH0
I0320 05:48:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:48:43.423075  543705 net.go:698] Add success.
I0320 05:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:48:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:48:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:48:53.409801  543705 memory.go:184] no items to output this cycle
I0320 05:48:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 05:49:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:49:03.409787  543705 memory.go:184] no items to output this cycle
I0320 05:49:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 05:49:13.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:49:13.409928  543705 memory.go:191] Add success.
I0320 05:49:13.409944  543705 cpu.go:282] Add success.
W0320 05:49:13.410092  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:49:13.410106  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:49:13.410109  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:49:13.419722  543705 net.go:648] Add success.
I0320 05:49:13.422507  543705 net.go:770] primary dev: ETH0
I0320 05:49:13.422522  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:49:13.422536  543705 net.go:698] Add success.
I0320 05:49:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:49:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:49:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 05:49:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:49:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 05:49:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:49:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:49:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:49:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:49:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:49:16.472415  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:49:22.841680  543705 disk_info.go:125] begin check local disk info of client
I0320 05:49:22.844239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:49:22.844246  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464740 0xc000464780]
E0320 05:49:23.409126  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:49:23.409140  543705 memory.go:184] no items to output this cycle
I0320 05:49:23.409174  543705 cpu.go:275] no items to output this cycle
E0320 05:49:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:49:33.409770  543705 memory.go:184] no items to output this cycle
I0320 05:49:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 05:49:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:49:43.409779  543705 memory.go:191] Add success.
I0320 05:49:43.409800  543705 cpu.go:282] Add success.
I0320 05:49:43.419892  543705 net.go:648] Add success.
I0320 05:49:43.422539  543705 net.go:770] primary dev: ETH0
I0320 05:49:43.422553  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:49:43.422566  543705 net.go:698] Add success.
I0320 05:49:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:49:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:49:53.410333  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:49:53.410347  543705 memory.go:184] no items to output this cycle
I0320 05:49:53.410351  543705 cpu.go:275] no items to output this cycle
E0320 05:50:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:50:03.409789  543705 memory.go:184] no items to output this cycle
I0320 05:50:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 05:50:13.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:50:13.409897  543705 cpu.go:282] Add success.
I0320 05:50:13.409911  543705 memory.go:191] Add success.
W0320 05:50:13.409969  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:50:13.409991  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:50:13.409996  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:50:13.419767  543705 net.go:648] Add success.
I0320 05:50:13.422638  543705 net.go:770] primary dev: ETH0
I0320 05:50:13.422652  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:50:13.422675  543705 net.go:698] Add success.
I0320 05:50:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:50:14.455078  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:50:14.455142  543705 disk_worker.go:708] disk space is not compliant
W0320 05:50:14.455145  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:50:14.456460  543705 disk_worker.go:494] system disk:vda1
I0320 05:50:14.456504  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:50:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:50:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:50:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:50:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:50:22.845688  543705 disk_info.go:125] begin check local disk info of client
I0320 05:50:22.848204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:50:22.848211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa280 0xc0001aa2c0]
E0320 05:50:23.409088  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:50:23.409096  543705 cpu.go:275] no items to output this cycle
I0320 05:50:23.409101  543705 memory.go:184] no items to output this cycle
E0320 05:50:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:50:33.409812  543705 memory.go:184] no items to output this cycle
I0320 05:50:33.409827  543705 cpu.go:275] no items to output this cycle
E0320 05:50:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:50:43.409794  543705 memory.go:191] Add success.
I0320 05:50:43.409809  543705 cpu.go:282] Add success.
I0320 05:50:43.419936  543705 net.go:648] Add success.
I0320 05:50:43.422475  543705 net.go:770] primary dev: ETH0
I0320 05:50:43.422487  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:50:43.422499  543705 net.go:698] Add success.
I0320 05:50:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:50:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:50:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:50:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:50:53.409784  543705 memory.go:184] no items to output this cycle
I0320 05:50:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 05:51:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:51:03.409818  543705 memory.go:184] no items to output this cycle
I0320 05:51:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 05:51:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:51:13.409807  543705 memory.go:191] Add success.
I0320 05:51:13.409823  543705 cpu.go:282] Add success.
W0320 05:51:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:51:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:51:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:51:13.420058  543705 net.go:648] Add success.
I0320 05:51:13.422713  543705 net.go:770] primary dev: ETH0
I0320 05:51:13.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:51:13.422751  543705 net.go:698] Add success.
I0320 05:51:13.463791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3e12b123-ad00-4401-b136-8939e4332714","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:51:13.463823  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:51:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:51:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:51:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 05:51:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:51:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 05:51:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:51:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:51:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:51:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:51:22.849680  543705 disk_info.go:125] begin check local disk info of client
I0320 05:51:22.852310  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:51:22.852316  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad40 0xc00007ad80]
E0320 05:51:23.407869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:51:23.407883  543705 memory.go:184] no items to output this cycle
I0320 05:51:23.407894  543705 cpu.go:275] no items to output this cycle
E0320 05:51:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:51:33.409796  543705 memory.go:184] no items to output this cycle
I0320 05:51:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 05:51:38.165882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:51:38.165888  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:51:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:51:43.410590  543705 memory.go:191] Add success.
I0320 05:51:43.409824  543705 cpu.go:282] Add success.
I0320 05:51:43.420260  543705 net.go:648] Add success.
I0320 05:51:43.422701  543705 net.go:770] primary dev: ETH0
I0320 05:51:43.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:51:43.422727  543705 net.go:698] Add success.
I0320 05:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:51:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:51:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:51:53.409777  543705 cpu.go:275] no items to output this cycle
I0320 05:51:53.409783  543705 memory.go:184] no items to output this cycle
E0320 05:52:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:52:03.409794  543705 memory.go:184] no items to output this cycle
I0320 05:52:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 05:52:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:52:13.409797  543705 memory.go:191] Add success.
W0320 05:52:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:52:13.409842  543705 cpu.go:282] Add success.
W0320 05:52:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:52:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:52:13.420008  543705 net.go:770] primary dev: ETH0
I0320 05:52:13.420023  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:52:13.420037  543705 net.go:698] Add success.
I0320 05:52:13.420380  543705 net.go:648] Add success.
W0320 05:52:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:52:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 05:52:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:52:14.456851  543705 disk_worker.go:494] system disk:vda1
I0320 05:52:14.456901  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:52:14.457126  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:52:14.457134  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:52:14.457139  543705 custom_config.go:64] query custom config with name: gpu
E0320 05:52:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:52:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:52:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:52:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:52:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:52:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:52:16.472283  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:52:22.853677  543705 disk_info.go:125] begin check local disk info of client
I0320 05:52:22.856167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:52:22.856174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e880 0xc00037e8c0]
E0320 05:52:23.409020  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:52:23.409037  543705 memory.go:184] no items to output this cycle
I0320 05:52:23.409053  543705 cpu.go:275] no items to output this cycle
E0320 05:52:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:52:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 05:52:33.409803  543705 memory.go:184] no items to output this cycle
E0320 05:52:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:52:43.409793  543705 memory.go:191] Add success.
I0320 05:52:43.409794  543705 cpu.go:282] Add success.
I0320 05:52:43.419854  543705 net.go:648] Add success.
I0320 05:52:43.422527  543705 net.go:770] primary dev: ETH0
I0320 05:52:43.422540  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:52:43.422552  543705 net.go:698] Add success.
I0320 05:52:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:52:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:52:53.409760  543705 memory.go:184] no items to output this cycle
I0320 05:52:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 05:53:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:53:03.409922  543705 memory.go:184] no items to output this cycle
I0320 05:53:03.409934  543705 cpu.go:275] no items to output this cycle
E0320 05:53:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:53:13.409786  543705 memory.go:191] Add success.
I0320 05:53:13.409804  543705 cpu.go:282] Add success.
W0320 05:53:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:53:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:53:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:53:13.420138  543705 net.go:648] Add success.
I0320 05:53:13.422841  543705 net.go:770] primary dev: ETH0
I0320 05:53:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:53:13.422867  543705 net.go:698] Add success.
I0320 05:53:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:53:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:53:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 05:53:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:53:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 05:53:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:53:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:53:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:53:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:53:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:53:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:53:22.857677  543705 disk_info.go:125] begin check local disk info of client
I0320 05:53:22.860308  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:53:22.860314  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f4680 0xc0003f46c0]
E0320 05:53:23.409125  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:53:23.409139  543705 memory.go:184] no items to output this cycle
I0320 05:53:23.409154  543705 cpu.go:275] no items to output this cycle
E0320 05:53:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:53:33.409776  543705 memory.go:184] no items to output this cycle
I0320 05:53:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 05:53:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:53:43.409809  543705 memory.go:191] Add success.
I0320 05:53:43.409822  543705 cpu.go:282] Add success.
I0320 05:53:43.419950  543705 net.go:648] Add success.
I0320 05:53:43.422631  543705 net.go:770] primary dev: ETH0
I0320 05:53:43.422644  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:53:43.422657  543705 net.go:698] Add success.
I0320 05:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:53:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:53:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:53:53.409776  543705 memory.go:184] no items to output this cycle
I0320 05:53:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 05:54:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:54:03.409883  543705 memory.go:184] no items to output this cycle
I0320 05:54:03.409928  543705 cpu.go:275] no items to output this cycle
E0320 05:54:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:54:13.409789  543705 cpu.go:282] Add success.
I0320 05:54:13.409799  543705 memory.go:191] Add success.
W0320 05:54:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:54:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:54:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:54:13.420158  543705 net.go:648] Add success.
I0320 05:54:13.423167  543705 net.go:770] primary dev: ETH0
I0320 05:54:13.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:54:13.423192  543705 net.go:698] Add success.
I0320 05:54:13.532265  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"25352091-a420-4e20-96ff-472b152912fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:54:13.532298  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 05:54:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:54:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:54:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 05:54:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:54:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 05:54:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:54:15.455604  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:54:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:54:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:54:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:54:16.472453  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:54:22.861674  543705 disk_info.go:125] begin check local disk info of client
I0320 05:54:22.864203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:54:22.864209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7080 0xc0001c70c0]
E0320 05:54:23.407519  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:54:23.407531  543705 memory.go:184] no items to output this cycle
I0320 05:54:23.407566  543705 cpu.go:275] no items to output this cycle
E0320 05:54:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:54:33.409810  543705 memory.go:184] no items to output this cycle
I0320 05:54:33.409815  543705 cpu.go:275] no items to output this cycle
I0320 05:54:38.166024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:54:38.166030  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:54:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:54:43.410645  543705 memory.go:191] Add success.
I0320 05:54:43.409815  543705 cpu.go:282] Add success.
I0320 05:54:43.420339  543705 net.go:648] Add success.
I0320 05:54:43.422934  543705 net.go:770] primary dev: ETH0
I0320 05:54:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:54:43.422960  543705 net.go:698] Add success.
I0320 05:54:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:54:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:54:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:54:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:54:53.409789  543705 memory.go:184] no items to output this cycle
I0320 05:54:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 05:55:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:55:03.409792  543705 memory.go:184] no items to output this cycle
I0320 05:55:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 05:55:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:55:13.409779  543705 memory.go:191] Add success.
W0320 05:55:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:55:13.409809  543705 cpu.go:282] Add success.
W0320 05:55:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:55:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:55:13.419953  543705 net.go:770] primary dev: ETH0
I0320 05:55:13.419966  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:55:13.419978  543705 net.go:698] Add success.
I0320 05:55:13.420327  543705 net.go:648] Add success.
I0320 05:55:14.453926  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:55:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:55:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 05:55:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:55:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 05:55:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:55:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:55:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:55:16.472457  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:55:22.865675  543705 disk_info.go:125] begin check local disk info of client
I0320 05:55:22.868222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:55:22.868229  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad40 0xc00007ad80]
E0320 05:55:23.409006  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:55:23.409021  543705 memory.go:184] no items to output this cycle
I0320 05:55:23.409029  543705 cpu.go:275] no items to output this cycle
E0320 05:55:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:55:33.409777  543705 memory.go:184] no items to output this cycle
I0320 05:55:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 05:55:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:55:43.409809  543705 memory.go:191] Add success.
I0320 05:55:43.409816  543705 cpu.go:282] Add success.
I0320 05:55:43.419897  543705 net.go:648] Add success.
I0320 05:55:43.422548  543705 net.go:770] primary dev: ETH0
I0320 05:55:43.422563  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:55:43.422577  543705 net.go:698] Add success.
I0320 05:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:55:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:55:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:55:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:55:53.409776  543705 memory.go:184] no items to output this cycle
I0320 05:55:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 05:56:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:56:03.409799  543705 memory.go:184] no items to output this cycle
I0320 05:56:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 05:56:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:56:13.409797  543705 memory.go:191] Add success.
I0320 05:56:13.409812  543705 cpu.go:282] Add success.
W0320 05:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:56:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:56:13.420163  543705 net.go:648] Add success.
I0320 05:56:13.422974  543705 net.go:770] primary dev: ETH0
I0320 05:56:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:56:13.422999  543705 net.go:698] Add success.
I0320 05:56:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:56:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:56:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 05:56:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:56:14.456528  543705 disk_worker.go:494] system disk:vda1
I0320 05:56:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:56:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:56:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:56:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:56:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:56:22.869675  543705 disk_info.go:125] begin check local disk info of client
I0320 05:56:22.872141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:56:22.872148  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a500 0xc00047a540]
E0320 05:56:23.407522  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:56:23.407539  543705 memory.go:184] no items to output this cycle
I0320 05:56:23.407555  543705 cpu.go:275] no items to output this cycle
E0320 05:56:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:56:33.409795  543705 memory.go:184] no items to output this cycle
I0320 05:56:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 05:56:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:56:43.409827  543705 memory.go:191] Add success.
I0320 05:56:43.409828  543705 cpu.go:282] Add success.
I0320 05:56:43.419899  543705 net.go:648] Add success.
I0320 05:56:43.422620  543705 net.go:770] primary dev: ETH0
I0320 05:56:43.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:56:43.422644  543705 net.go:698] Add success.
I0320 05:56:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:56:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:56:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:56:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:56:53.409808  543705 memory.go:184] no items to output this cycle
I0320 05:56:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 05:57:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:57:03.409800  543705 memory.go:184] no items to output this cycle
I0320 05:57:03.409829  543705 cpu.go:275] no items to output this cycle
E0320 05:57:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:57:13.409817  543705 memory.go:191] Add success.
I0320 05:57:13.409823  543705 cpu.go:282] Add success.
W0320 05:57:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:57:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:57:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:57:13.419770  543705 net.go:648] Add success.
I0320 05:57:13.428685  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 05:57:13.428768  543705 net.go:770] primary dev: ETH0
I0320 05:57:13.428781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:57:13.428792  543705 net.go:698] Add success.
I0320 05:57:13.453393  543705 event_worker.go:152] Polling the log file for events...
I0320 05:57:13.463773  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f0c3e47e-a4fd-4f25-a177-95b29d9f1a40","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 05:57:13.463804  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 05:57:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:57:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 05:57:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0320 05:57:14.456792  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 05:57:14.456802  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 05:57:14.456807  543705 custom_config.go:64] query custom config with name: gpu
I0320 05:57:14.456857  543705 disk_worker.go:494] system disk:vda1
I0320 05:57:14.456897  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 05:57:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 05:57:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:57:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 05:57:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 05:57:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:57:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:57:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:57:22.873678  543705 disk_info.go:125] begin check local disk info of client
I0320 05:57:22.876275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:57:22.876282  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7780 0xc0001c77c0]
E0320 05:57:23.408997  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:57:23.409009  543705 memory.go:184] no items to output this cycle
I0320 05:57:23.409043  543705 cpu.go:275] no items to output this cycle
E0320 05:57:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:57:33.409764  543705 memory.go:184] no items to output this cycle
I0320 05:57:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 05:57:38.169534  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 05:57:38.169540  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 05:57:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:57:43.409778  543705 memory.go:191] Add success.
I0320 05:57:43.409797  543705 cpu.go:282] Add success.
I0320 05:57:43.419866  543705 net.go:648] Add success.
I0320 05:57:43.420764  543705 net.go:770] primary dev: ETH0
I0320 05:57:43.420777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:57:43.420804  543705 net.go:698] Add success.
I0320 05:57:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:57:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:57:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:57:53.409775  543705 memory.go:184] no items to output this cycle
I0320 05:57:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 05:58:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:58:03.409825  543705 memory.go:184] no items to output this cycle
I0320 05:58:03.409838  543705 cpu.go:275] no items to output this cycle
E0320 05:58:13.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:58:13.409882  543705 memory.go:191] Add success.
W0320 05:58:13.409910  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 05:58:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:58:13.409929  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:58:13.409932  543705 cpu.go:282] Add success.
I0320 05:58:13.419706  543705 net.go:648] Add success.
I0320 05:58:13.422383  543705 net.go:770] primary dev: ETH0
I0320 05:58:13.422397  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:58:13.422409  543705 net.go:698] Add success.
I0320 05:58:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:58:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:58:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 05:58:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:58:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 05:58:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:58:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:58:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:58:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:58:16.472413  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:58:22.877674  543705 disk_info.go:125] begin check local disk info of client
I0320 05:58:22.880175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:58:22.880183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa740 0xc0001aa780]
E0320 05:58:23.408880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:58:23.408894  543705 memory.go:184] no items to output this cycle
I0320 05:58:23.408924  543705 cpu.go:275] no items to output this cycle
E0320 05:58:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:58:33.409789  543705 memory.go:184] no items to output this cycle
I0320 05:58:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 05:58:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:58:43.409793  543705 memory.go:191] Add success.
I0320 05:58:43.409797  543705 cpu.go:282] Add success.
I0320 05:58:43.419995  543705 net.go:648] Add success.
I0320 05:58:43.423122  543705 net.go:770] primary dev: ETH0
I0320 05:58:43.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:58:43.423148  543705 net.go:698] Add success.
I0320 05:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:58:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:58:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:58:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:58:53.409769  543705 memory.go:184] no items to output this cycle
I0320 05:58:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 05:59:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:59:03.409787  543705 memory.go:184] no items to output this cycle
I0320 05:59:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 05:59:13.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:59:13.409907  543705 memory.go:191] Add success.
W0320 05:59:13.409944  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 05:59:13.409960  543705 cpu.go:282] Add success.
W0320 05:59:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 05:59:13.409972  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 05:59:13.419711  543705 net.go:648] Add success.
I0320 05:59:13.422760  543705 net.go:770] primary dev: ETH0
I0320 05:59:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:59:13.422784  543705 net.go:698] Add success.
I0320 05:59:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 05:59:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 05:59:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 05:59:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 05:59:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 05:59:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 05:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 05:59:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:59:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:59:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 05:59:16.472449  543705 disk_local_worker.go:436] Get disk info: []
I0320 05:59:22.881675  543705 disk_info.go:125] begin check local disk info of client
I0320 05:59:22.884251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 05:59:22.884258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c5600 0xc0004c5640]
E0320 05:59:23.408980  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:59:23.408997  543705 memory.go:184] no items to output this cycle
I0320 05:59:23.409012  543705 cpu.go:275] no items to output this cycle
E0320 05:59:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:59:33.409768  543705 memory.go:184] no items to output this cycle
I0320 05:59:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 05:59:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:59:43.409811  543705 memory.go:191] Add success.
I0320 05:59:43.409819  543705 cpu.go:282] Add success.
I0320 05:59:43.419960  543705 net.go:648] Add success.
I0320 05:59:43.423033  543705 net.go:770] primary dev: ETH0
I0320 05:59:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0320 05:59:43.423058  543705 net.go:698] Add success.
I0320 05:59:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 05:59:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 05:59:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 05:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 05:59:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 05:59:53.409778  543705 memory.go:184] no items to output this cycle
E0320 06:00:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:00:03.409790  543705 memory.go:184] no items to output this cycle
I0320 06:00:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 06:00:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:00:13.409795  543705 memory.go:191] Add success.
I0320 06:00:13.409797  543705 cpu.go:282] Add success.
W0320 06:00:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:00:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:00:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:00:13.419771  543705 net.go:648] Add success.
I0320 06:00:13.422762  543705 net.go:770] primary dev: ETH0
I0320 06:00:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:00:13.422796  543705 net.go:698] Add success.
I0320 06:00:13.468770  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1001c76c-e783-4d14-9432-118425b75764","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:00:13.468802  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:00:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:00:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:00:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 06:00:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:00:14.456614  543705 disk_worker.go:494] system disk:vda1
I0320 06:00:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:00:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:00:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:00:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:00:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:00:22.885673  543705 disk_info.go:125] begin check local disk info of client
I0320 06:00:22.888125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:00:22.888131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c00 0xc0000c4c40]
E0320 06:00:23.407526  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:00:23.407538  543705 memory.go:184] no items to output this cycle
I0320 06:00:23.407546  543705 cpu.go:275] no items to output this cycle
E0320 06:00:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:00:33.409806  543705 memory.go:184] no items to output this cycle
I0320 06:00:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 06:00:38.169738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:00:38.169745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:00:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:00:43.410911  543705 memory.go:191] Add success.
I0320 06:00:43.409787  543705 cpu.go:282] Add success.
I0320 06:00:43.420658  543705 net.go:648] Add success.
I0320 06:00:43.423461  543705 net.go:770] primary dev: ETH0
I0320 06:00:43.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:00:43.423488  543705 net.go:698] Add success.
I0320 06:00:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:00:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:00:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:00:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:00:53.409778  543705 memory.go:184] no items to output this cycle
I0320 06:00:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 06:01:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:01:03.409788  543705 memory.go:184] no items to output this cycle
I0320 06:01:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 06:01:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:01:13.409783  543705 memory.go:191] Add success.
I0320 06:01:13.409801  543705 cpu.go:282] Add success.
W0320 06:01:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:01:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:01:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:01:13.420550  543705 net.go:648] Add success.
I0320 06:01:13.423217  543705 net.go:770] primary dev: ETH0
I0320 06:01:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:01:13.423240  543705 net.go:698] Add success.
I0320 06:01:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:01:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:01:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 06:01:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:01:14.456494  543705 disk_worker.go:494] system disk:vda1
I0320 06:01:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:01:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:01:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:01:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:01:22.889676  543705 disk_info.go:125] begin check local disk info of client
I0320 06:01:22.892316  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:01:22.892324  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003904c0 0xc000390500]
E0320 06:01:23.408974  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:01:23.408984  543705 cpu.go:275] no items to output this cycle
I0320 06:01:23.408988  543705 memory.go:184] no items to output this cycle
E0320 06:01:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:01:33.409766  543705 memory.go:184] no items to output this cycle
I0320 06:01:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 06:01:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:01:43.409785  543705 memory.go:191] Add success.
I0320 06:01:43.409804  543705 cpu.go:282] Add success.
I0320 06:01:43.419881  543705 net.go:648] Add success.
I0320 06:01:43.422576  543705 net.go:770] primary dev: ETH0
I0320 06:01:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:01:43.422601  543705 net.go:698] Add success.
I0320 06:01:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:01:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:01:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:01:53.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:01:53.410286  543705 memory.go:184] no items to output this cycle
I0320 06:01:53.410289  543705 cpu.go:275] no items to output this cycle
E0320 06:02:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:02:03.409798  543705 memory.go:184] no items to output this cycle
I0320 06:02:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 06:02:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:02:13.409797  543705 cpu.go:282] Add success.
I0320 06:02:13.409808  543705 memory.go:191] Add success.
W0320 06:02:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:02:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:02:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:02:13.420139  543705 net.go:648] Add success.
I0320 06:02:13.422650  543705 net.go:770] primary dev: ETH0
I0320 06:02:13.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:02:13.422675  543705 net.go:698] Add success.
W0320 06:02:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:02:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 06:02:14.455208  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:02:14.455929  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:02:14.455938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:02:14.455943  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:02:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 06:02:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:02:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:02:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:02:16.457909  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:02:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:02:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:02:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:02:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:02:22.893672  543705 disk_info.go:125] begin check local disk info of client
I0320 06:02:22.896070  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:02:22.896076  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e180 0xc00034e1c0]
E0320 06:02:23.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:02:23.407524  543705 memory.go:184] no items to output this cycle
I0320 06:02:23.407556  543705 cpu.go:275] no items to output this cycle
E0320 06:02:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:02:33.409773  543705 memory.go:184] no items to output this cycle
I0320 06:02:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 06:02:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:02:43.409811  543705 memory.go:191] Add success.
I0320 06:02:43.409817  543705 cpu.go:282] Add success.
I0320 06:02:43.419870  543705 net.go:648] Add success.
I0320 06:02:43.422585  543705 net.go:770] primary dev: ETH0
I0320 06:02:43.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:02:43.422638  543705 net.go:698] Add success.
I0320 06:02:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:02:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:02:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:02:53.409793  543705 memory.go:184] no items to output this cycle
I0320 06:02:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 06:03:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:03:03.409780  543705 memory.go:184] no items to output this cycle
I0320 06:03:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 06:03:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:03:13.409818  543705 memory.go:191] Add success.
I0320 06:03:13.409828  543705 cpu.go:282] Add success.
W0320 06:03:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:03:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:03:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:03:13.420227  543705 net.go:648] Add success.
I0320 06:03:13.423266  543705 net.go:770] primary dev: ETH0
I0320 06:03:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:03:13.423293  543705 net.go:698] Add success.
I0320 06:03:13.647138  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c511bbff-04e6-4726-bb4d-94b2764e41c3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:03:13.647175  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:03:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:03:14.455224  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:03:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0320 06:03:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:03:14.456722  543705 disk_worker.go:494] system disk:vda1
I0320 06:03:14.456750  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:03:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:03:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:03:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:03:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:03:22.897683  543705 disk_info.go:125] begin check local disk info of client
I0320 06:03:22.900236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:03:22.900244  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384500 0xc000384540]
E0320 06:03:23.408870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:03:23.408888  543705 memory.go:184] no items to output this cycle
I0320 06:03:23.408901  543705 cpu.go:275] no items to output this cycle
E0320 06:03:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:03:33.409770  543705 memory.go:184] no items to output this cycle
I0320 06:03:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 06:03:38.173563  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:03:38.173569  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:03:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:03:43.410709  543705 memory.go:191] Add success.
I0320 06:03:43.409787  543705 cpu.go:282] Add success.
I0320 06:03:43.420613  543705 net.go:648] Add success.
I0320 06:03:43.423250  543705 net.go:770] primary dev: ETH0
I0320 06:03:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:03:43.423278  543705 net.go:698] Add success.
I0320 06:03:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:03:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:03:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:03:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:03:53.409761  543705 memory.go:184] no items to output this cycle
I0320 06:03:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 06:04:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:04:03.409797  543705 memory.go:184] no items to output this cycle
I0320 06:04:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 06:04:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:04:13.409780  543705 memory.go:191] Add success.
W0320 06:04:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 06:04:13.409812  543705 cpu.go:282] Add success.
W0320 06:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:04:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:04:13.420148  543705 net.go:648] Add success.
I0320 06:04:13.422905  543705 net.go:770] primary dev: ETH0
I0320 06:04:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:04:13.422929  543705 net.go:698] Add success.
I0320 06:04:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:04:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:04:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 06:04:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:04:14.456839  543705 disk_worker.go:494] system disk:vda1
I0320 06:04:14.456868  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:04:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:04:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:04:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:04:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:04:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:04:22.901686  543705 disk_info.go:125] begin check local disk info of client
I0320 06:04:22.904123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:04:22.904129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5240 0xc0000c5280]
E0320 06:04:23.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:04:23.407520  543705 memory.go:184] no items to output this cycle
I0320 06:04:23.407553  543705 cpu.go:275] no items to output this cycle
E0320 06:04:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:04:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 06:04:33.409792  543705 memory.go:184] no items to output this cycle
E0320 06:04:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:04:43.409812  543705 memory.go:191] Add success.
I0320 06:04:43.409819  543705 cpu.go:282] Add success.
I0320 06:04:43.419884  543705 net.go:648] Add success.
I0320 06:04:43.422497  543705 net.go:770] primary dev: ETH0
I0320 06:04:43.422511  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:04:43.422523  543705 net.go:698] Add success.
I0320 06:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:04:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:04:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:04:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:04:53.409769  543705 memory.go:184] no items to output this cycle
I0320 06:04:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 06:05:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:05:03.409796  543705 memory.go:184] no items to output this cycle
I0320 06:05:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 06:05:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:05:13.409817  543705 memory.go:191] Add success.
I0320 06:05:13.409825  543705 cpu.go:282] Add success.
W0320 06:05:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:05:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:05:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:05:13.419953  543705 net.go:770] primary dev: ETH0
I0320 06:05:13.419965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:05:13.419978  543705 net.go:698] Add success.
I0320 06:05:13.420361  543705 net.go:648] Add success.
I0320 06:05:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:05:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:05:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 06:05:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:05:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 06:05:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:05:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:05:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:05:22.905692  543705 disk_info.go:125] begin check local disk info of client
I0320 06:05:22.908245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:05:22.908253  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330040 0xc000330080]
E0320 06:05:23.408841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:05:23.408861  543705 memory.go:184] no items to output this cycle
I0320 06:05:23.408874  543705 cpu.go:275] no items to output this cycle
E0320 06:05:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:05:33.409777  543705 memory.go:184] no items to output this cycle
I0320 06:05:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 06:05:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:05:43.409788  543705 memory.go:191] Add success.
I0320 06:05:43.409788  543705 cpu.go:282] Add success.
I0320 06:05:43.420020  543705 net.go:648] Add success.
I0320 06:05:43.422756  543705 net.go:770] primary dev: ETH0
I0320 06:05:43.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:05:43.422782  543705 net.go:698] Add success.
I0320 06:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:05:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:05:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:05:53.409780  543705 memory.go:184] no items to output this cycle
I0320 06:05:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 06:06:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:06:03.409787  543705 memory.go:184] no items to output this cycle
I0320 06:06:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 06:06:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:06:13.409787  543705 memory.go:191] Add success.
I0320 06:06:13.409806  543705 cpu.go:282] Add success.
W0320 06:06:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:06:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:06:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:06:13.420042  543705 net.go:648] Add success.
I0320 06:06:13.422901  543705 net.go:770] primary dev: ETH0
I0320 06:06:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:06:13.422927  543705 net.go:698] Add success.
I0320 06:06:13.468986  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ba8f4fcb-e0cd-4544-964f-22a093e8456d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:06:13.469025  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:06:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:06:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 06:06:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:06:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 06:06:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:06:15.456009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:06:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:06:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:06:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:06:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:06:22.909671  543705 disk_info.go:125] begin check local disk info of client
I0320 06:06:22.912100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:06:22.912106  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492080 0xc0004920c0]
E0320 06:06:23.407527  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:06:23.407541  543705 memory.go:184] no items to output this cycle
I0320 06:06:23.407550  543705 cpu.go:275] no items to output this cycle
E0320 06:06:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:06:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 06:06:33.409780  543705 memory.go:184] no items to output this cycle
I0320 06:06:38.173735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:06:38.173741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:06:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:06:43.410673  543705 memory.go:191] Add success.
I0320 06:06:43.409806  543705 cpu.go:282] Add success.
I0320 06:06:43.420304  543705 net.go:648] Add success.
I0320 06:06:43.422897  543705 net.go:770] primary dev: ETH0
I0320 06:06:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:06:43.422926  543705 net.go:698] Add success.
I0320 06:06:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:06:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:06:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:06:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:06:53.409794  543705 memory.go:184] no items to output this cycle
I0320 06:06:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 06:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:07:03.409776  543705 memory.go:184] no items to output this cycle
I0320 06:07:03.409845  543705 cpu.go:275] no items to output this cycle
E0320 06:07:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:07:13.409810  543705 memory.go:191] Add success.
I0320 06:07:13.409829  543705 cpu.go:282] Add success.
W0320 06:07:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:07:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:07:13.420107  543705 net.go:648] Add success.
I0320 06:07:13.422881  543705 net.go:770] primary dev: ETH0
I0320 06:07:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:07:13.422907  543705 net.go:698] Add success.
I0320 06:07:13.453452  543705 event_worker.go:152] Polling the log file for events...
W0320 06:07:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:07:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 06:07:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:07:14.455916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:07:14.455925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:07:14.455931  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:07:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 06:07:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:07:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:07:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:07:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:07:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:07:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:07:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:07:16.472351  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:07:22.913683  543705 disk_info.go:125] begin check local disk info of client
I0320 06:07:22.916242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:07:22.916249  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484000 0xc000484040]
E0320 06:07:23.408800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:07:23.408818  543705 memory.go:184] no items to output this cycle
I0320 06:07:23.408833  543705 cpu.go:275] no items to output this cycle
E0320 06:07:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:07:33.409774  543705 memory.go:184] no items to output this cycle
I0320 06:07:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 06:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:07:43.409784  543705 cpu.go:282] Add success.
I0320 06:07:43.409790  543705 memory.go:191] Add success.
I0320 06:07:43.420046  543705 net.go:648] Add success.
I0320 06:07:43.422900  543705 net.go:770] primary dev: ETH0
I0320 06:07:43.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:07:43.422943  543705 net.go:698] Add success.
I0320 06:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:07:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:07:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:07:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:07:53.409769  543705 memory.go:184] no items to output this cycle
I0320 06:07:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 06:08:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:08:03.409783  543705 memory.go:184] no items to output this cycle
I0320 06:08:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 06:08:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:08:13.409811  543705 memory.go:191] Add success.
I0320 06:08:13.409821  543705 cpu.go:282] Add success.
W0320 06:08:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:08:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:08:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:08:13.420171  543705 net.go:648] Add success.
I0320 06:08:13.423304  543705 net.go:770] primary dev: ETH0
I0320 06:08:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:08:13.423330  543705 net.go:698] Add success.
I0320 06:08:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:08:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:08:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 06:08:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:08:14.456571  543705 disk_worker.go:494] system disk:vda1
I0320 06:08:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:08:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:08:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:08:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:08:16.472357  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:08:22.917668  543705 disk_info.go:125] begin check local disk info of client
I0320 06:08:22.920154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:08:22.920162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003846c0 0xc000384700]
E0320 06:08:23.408671  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:08:23.408680  543705 cpu.go:275] no items to output this cycle
I0320 06:08:23.408683  543705 memory.go:184] no items to output this cycle
E0320 06:08:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:08:33.409797  543705 memory.go:184] no items to output this cycle
I0320 06:08:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 06:08:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:08:43.409783  543705 memory.go:191] Add success.
I0320 06:08:43.409784  543705 cpu.go:282] Add success.
I0320 06:08:43.419894  543705 net.go:648] Add success.
I0320 06:08:43.422509  543705 net.go:770] primary dev: ETH0
I0320 06:08:43.422524  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:08:43.422539  543705 net.go:698] Add success.
I0320 06:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:08:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:08:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:08:53.409779  543705 memory.go:184] no items to output this cycle
I0320 06:08:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 06:09:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:09:03.409797  543705 memory.go:184] no items to output this cycle
I0320 06:09:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 06:09:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:09:13.409806  543705 memory.go:191] Add success.
I0320 06:09:13.409813  543705 cpu.go:282] Add success.
W0320 06:09:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:09:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:09:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:09:13.420090  543705 net.go:648] Add success.
I0320 06:09:13.422913  543705 net.go:770] primary dev: ETH0
I0320 06:09:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:09:13.422937  543705 net.go:698] Add success.
I0320 06:09:13.468347  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0ed36070-040c-4b92-ad66-132967d63014","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:09:13.468380  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:09:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:09:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 06:09:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:09:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 06:09:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:09:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:09:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:09:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:09:22.921681  543705 disk_info.go:125] begin check local disk info of client
I0320 06:09:22.924314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:09:22.924322  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033f140 0xc00033f180]
E0320 06:09:23.408791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:09:23.408808  543705 memory.go:184] no items to output this cycle
I0320 06:09:23.408841  543705 cpu.go:275] no items to output this cycle
E0320 06:09:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:09:33.409809  543705 memory.go:184] no items to output this cycle
I0320 06:09:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 06:09:38.173878  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:09:38.173884  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:09:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:09:43.410783  543705 memory.go:191] Add success.
I0320 06:09:43.409786  543705 cpu.go:282] Add success.
I0320 06:09:43.420314  543705 net.go:770] primary dev: ETH0
I0320 06:09:43.420327  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:09:43.420340  543705 net.go:698] Add success.
I0320 06:09:43.420713  543705 net.go:648] Add success.
I0320 06:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:09:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:09:53.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:09:53.409758  543705 memory.go:184] no items to output this cycle
I0320 06:09:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 06:10:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:10:03.409810  543705 memory.go:184] no items to output this cycle
I0320 06:10:03.409963  543705 cpu.go:275] no items to output this cycle
E0320 06:10:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:10:13.409780  543705 memory.go:191] Add success.
I0320 06:10:13.409800  543705 cpu.go:282] Add success.
W0320 06:10:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:10:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:10:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:10:13.420139  543705 net.go:648] Add success.
I0320 06:10:13.422699  543705 net.go:770] primary dev: ETH0
I0320 06:10:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:10:13.422723  543705 net.go:698] Add success.
I0320 06:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:10:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:10:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 06:10:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:10:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 06:10:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:10:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:10:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:10:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:10:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:10:22.925676  543705 disk_info.go:125] begin check local disk info of client
I0320 06:10:22.928128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:10:22.928133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0580 0xc0002a05c0]
E0320 06:10:23.408624  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:10:23.408640  543705 memory.go:184] no items to output this cycle
I0320 06:10:23.408656  543705 cpu.go:275] no items to output this cycle
E0320 06:10:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:10:33.409792  543705 memory.go:184] no items to output this cycle
I0320 06:10:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 06:10:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:10:43.409780  543705 memory.go:191] Add success.
I0320 06:10:43.409810  543705 cpu.go:282] Add success.
I0320 06:10:43.420002  543705 net.go:648] Add success.
I0320 06:10:43.422529  543705 net.go:770] primary dev: ETH0
I0320 06:10:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:10:43.422559  543705 net.go:698] Add success.
I0320 06:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:10:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:10:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:10:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:10:53.409767  543705 memory.go:184] no items to output this cycle
I0320 06:10:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 06:11:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:11:03.409815  543705 memory.go:184] no items to output this cycle
I0320 06:11:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 06:11:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:11:13.409784  543705 memory.go:191] Add success.
I0320 06:11:13.409803  543705 cpu.go:282] Add success.
W0320 06:11:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:11:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:11:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:11:13.420198  543705 net.go:648] Add success.
I0320 06:11:13.423321  543705 net.go:770] primary dev: ETH0
I0320 06:11:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:11:13.423345  543705 net.go:698] Add success.
I0320 06:11:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:11:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:11:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 06:11:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:11:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 06:11:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:11:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:11:16.472410  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:11:22.929677  543705 disk_info.go:125] begin check local disk info of client
I0320 06:11:22.932240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:11:22.932247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e4ac0 0xc0004e4b00]
E0320 06:11:23.408710  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:11:23.408729  543705 memory.go:184] no items to output this cycle
I0320 06:11:23.408742  543705 cpu.go:275] no items to output this cycle
E0320 06:11:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:11:33.409782  543705 memory.go:184] no items to output this cycle
I0320 06:11:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 06:11:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:11:43.409782  543705 memory.go:191] Add success.
I0320 06:11:43.409800  543705 cpu.go:282] Add success.
I0320 06:11:43.419959  543705 net.go:648] Add success.
I0320 06:11:43.422558  543705 net.go:770] primary dev: ETH0
I0320 06:11:43.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:11:43.422583  543705 net.go:698] Add success.
I0320 06:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:11:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:11:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:11:53.409762  543705 memory.go:184] no items to output this cycle
I0320 06:11:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 06:12:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 06:12:03.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:12:03.409829  543705 memory.go:184] no items to output this cycle
E0320 06:12:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:12:13.409795  543705 memory.go:191] Add success.
I0320 06:12:13.409796  543705 cpu.go:282] Add success.
W0320 06:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:12:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:12:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:12:13.420139  543705 net.go:648] Add success.
I0320 06:12:13.423039  543705 net.go:770] primary dev: ETH0
I0320 06:12:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:12:13.423064  543705 net.go:698] Add success.
I0320 06:12:13.551311  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a201e6aa-4253-421f-b224-70cab2828e11","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:12:13.551346  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 06:12:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:12:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 06:12:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:12:14.456834  543705 disk_worker.go:494] system disk:vda1
E0320 06:12:14.456851  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:12:14.456859  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:12:14.456865  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:12:14.456888  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:12:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:12:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:12:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:12:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:12:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:12:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:12:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:12:22.933676  543705 disk_info.go:125] begin check local disk info of client
I0320 06:12:22.936074  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:12:22.936080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7300 0xc0001c7340]
E0320 06:12:23.407530  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:12:23.407546  543705 memory.go:184] no items to output this cycle
I0320 06:12:23.407561  543705 cpu.go:275] no items to output this cycle
E0320 06:12:33.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:12:33.409860  543705 memory.go:184] no items to output this cycle
I0320 06:12:33.409934  543705 cpu.go:275] no items to output this cycle
I0320 06:12:38.174025  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:12:38.174031  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:12:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:12:43.410688  543705 memory.go:191] Add success.
I0320 06:12:43.409810  543705 cpu.go:282] Add success.
I0320 06:12:43.420439  543705 net.go:648] Add success.
I0320 06:12:43.423185  543705 net.go:770] primary dev: ETH0
I0320 06:12:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:12:43.423214  543705 net.go:698] Add success.
I0320 06:12:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:12:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:12:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:12:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:12:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 06:12:53.409788  543705 memory.go:184] no items to output this cycle
E0320 06:13:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:13:03.409814  543705 memory.go:184] no items to output this cycle
I0320 06:13:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 06:13:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:13:13.409798  543705 memory.go:191] Add success.
I0320 06:13:13.409798  543705 cpu.go:282] Add success.
W0320 06:13:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:13:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:13:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:13:13.420120  543705 net.go:648] Add success.
I0320 06:13:13.422711  543705 net.go:770] primary dev: ETH0
I0320 06:13:13.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:13:13.422737  543705 net.go:698] Add success.
I0320 06:13:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:13:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:13:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 06:13:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:13:14.456599  543705 disk_worker.go:494] system disk:vda1
I0320 06:13:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:13:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:13:22.937684  543705 disk_info.go:125] begin check local disk info of client
I0320 06:13:22.940217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:13:22.940224  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025c440 0xc00025c480]
E0320 06:13:23.408655  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:13:23.408672  543705 memory.go:184] no items to output this cycle
I0320 06:13:23.408686  543705 cpu.go:275] no items to output this cycle
E0320 06:13:33.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:13:33.409912  543705 cpu.go:275] no items to output this cycle
I0320 06:13:33.409920  543705 memory.go:184] no items to output this cycle
E0320 06:13:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:13:43.409797  543705 memory.go:191] Add success.
I0320 06:13:43.409801  543705 cpu.go:282] Add success.
I0320 06:13:43.420084  543705 net.go:648] Add success.
I0320 06:13:43.422933  543705 net.go:770] primary dev: ETH0
I0320 06:13:43.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:13:43.422957  543705 net.go:698] Add success.
I0320 06:13:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:13:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:13:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:13:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:13:53.409771  543705 memory.go:184] no items to output this cycle
I0320 06:13:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 06:14:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:14:03.409813  543705 memory.go:184] no items to output this cycle
I0320 06:14:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 06:14:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:14:13.409789  543705 memory.go:191] Add success.
I0320 06:14:13.409812  543705 cpu.go:282] Add success.
W0320 06:14:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:14:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:14:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:14:13.420144  543705 net.go:648] Add success.
I0320 06:14:13.422766  543705 net.go:770] primary dev: ETH0
I0320 06:14:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:14:13.422794  543705 net.go:698] Add success.
I0320 06:14:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:14:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:14:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 06:14:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:14:14.456497  543705 disk_worker.go:494] system disk:vda1
I0320 06:14:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:14:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:14:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:14:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:14:22.941673  543705 disk_info.go:125] begin check local disk info of client
I0320 06:14:22.944097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:14:22.944103  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466340 0xc000466380]
E0320 06:14:23.408512  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:14:23.408529  543705 memory.go:184] no items to output this cycle
I0320 06:14:23.408542  543705 cpu.go:275] no items to output this cycle
E0320 06:14:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:14:33.409787  543705 memory.go:184] no items to output this cycle
I0320 06:14:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 06:14:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:14:43.409779  543705 memory.go:191] Add success.
I0320 06:14:43.409806  543705 cpu.go:282] Add success.
I0320 06:14:43.419902  543705 net.go:648] Add success.
I0320 06:14:43.422770  543705 net.go:770] primary dev: ETH0
I0320 06:14:43.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:14:43.422796  543705 net.go:698] Add success.
I0320 06:14:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:14:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:14:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:14:53.409793  543705 memory.go:184] no items to output this cycle
I0320 06:14:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 06:15:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:15:03.409809  543705 memory.go:184] no items to output this cycle
I0320 06:15:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 06:15:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:15:13.409799  543705 cpu.go:282] Add success.
I0320 06:15:13.409802  543705 memory.go:191] Add success.
W0320 06:15:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:15:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:15:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:15:13.420226  543705 net.go:648] Add success.
I0320 06:15:13.423206  543705 net.go:770] primary dev: ETH0
I0320 06:15:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:15:13.423235  543705 net.go:698] Add success.
I0320 06:15:13.527931  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b6ac6934-bd64-4c97-afa9-737f656fa755","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:15:13.527965  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:15:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:15:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:15:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 06:15:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:15:14.456542  543705 disk_worker.go:494] system disk:vda1
I0320 06:15:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:15:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:15:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:15:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:15:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:15:22.945688  543705 disk_info.go:125] begin check local disk info of client
I0320 06:15:22.948270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:15:22.948276  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028a140 0xc00028a180]
E0320 06:15:23.408659  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:15:23.408678  543705 memory.go:184] no items to output this cycle
I0320 06:15:23.408691  543705 cpu.go:275] no items to output this cycle
E0320 06:15:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:15:33.409761  543705 memory.go:184] no items to output this cycle
I0320 06:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 06:15:38.174171  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:15:38.174177  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:15:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:15:43.410571  543705 memory.go:191] Add success.
I0320 06:15:43.409795  543705 cpu.go:282] Add success.
I0320 06:15:43.420376  543705 net.go:648] Add success.
I0320 06:15:43.422976  543705 net.go:770] primary dev: ETH0
I0320 06:15:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:15:43.423000  543705 net.go:698] Add success.
I0320 06:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:15:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:15:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:15:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:15:53.409774  543705 memory.go:184] no items to output this cycle
I0320 06:15:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 06:16:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 06:16:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:16:03.409807  543705 memory.go:184] no items to output this cycle
E0320 06:16:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:16:13.409819  543705 memory.go:191] Add success.
I0320 06:16:13.409825  543705 cpu.go:282] Add success.
W0320 06:16:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:16:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:16:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:16:13.420129  543705 net.go:648] Add success.
I0320 06:16:13.423130  543705 net.go:770] primary dev: ETH0
I0320 06:16:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:16:13.423154  543705 net.go:698] Add success.
I0320 06:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:16:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:16:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 06:16:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:16:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 06:16:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:16:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:16:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:16:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:16:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:16:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:16:22.949674  543705 disk_info.go:125] begin check local disk info of client
I0320 06:16:22.952106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:16:22.952112  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a440 0xc00039a480]
E0320 06:16:23.407860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:16:23.407864  543705 cpu.go:275] no items to output this cycle
I0320 06:16:23.407872  543705 memory.go:184] no items to output this cycle
E0320 06:16:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:16:33.409804  543705 memory.go:184] no items to output this cycle
I0320 06:16:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 06:16:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:16:43.409779  543705 memory.go:191] Add success.
I0320 06:16:43.409797  543705 cpu.go:282] Add success.
I0320 06:16:43.419859  543705 net.go:648] Add success.
I0320 06:16:43.422652  543705 net.go:770] primary dev: ETH0
I0320 06:16:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:16:43.422678  543705 net.go:698] Add success.
I0320 06:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:16:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:16:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:16:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:16:53.409798  543705 memory.go:184] no items to output this cycle
I0320 06:16:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 06:17:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:17:03.409777  543705 memory.go:184] no items to output this cycle
I0320 06:17:03.409838  543705 cpu.go:275] no items to output this cycle
E0320 06:17:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:17:13.409817  543705 memory.go:191] Add success.
I0320 06:17:13.409824  543705 cpu.go:282] Add success.
W0320 06:17:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:17:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:17:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:17:13.420130  543705 net.go:648] Add success.
I0320 06:17:13.423216  543705 net.go:770] primary dev: ETH0
I0320 06:17:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:17:13.423246  543705 net.go:698] Add success.
I0320 06:17:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0320 06:17:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:17:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 06:17:14.455157  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:17:14.456922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:17:14.456931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:17:14.456937  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:17:14.456992  543705 disk_worker.go:494] system disk:vda1
I0320 06:17:14.457019  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:17:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:17:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:17:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:17:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:17:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:17:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:17:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:17:22.953686  543705 disk_info.go:125] begin check local disk info of client
I0320 06:17:22.956206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:17:22.956213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2280 0xc0003b22c0]
E0320 06:17:23.408581  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:17:23.408600  543705 memory.go:184] no items to output this cycle
I0320 06:17:23.408614  543705 cpu.go:275] no items to output this cycle
E0320 06:17:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:17:33.409769  543705 memory.go:184] no items to output this cycle
I0320 06:17:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 06:17:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:17:43.409813  543705 memory.go:191] Add success.
I0320 06:17:43.409814  543705 cpu.go:282] Add success.
I0320 06:17:43.419953  543705 net.go:648] Add success.
I0320 06:17:43.422824  543705 net.go:770] primary dev: ETH0
I0320 06:17:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:17:43.422856  543705 net.go:698] Add success.
I0320 06:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:17:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:17:53.409773  543705 memory.go:184] no items to output this cycle
I0320 06:17:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 06:18:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:18:03.409779  543705 memory.go:184] no items to output this cycle
I0320 06:18:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 06:18:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:18:13.409803  543705 memory.go:191] Add success.
I0320 06:18:13.409803  543705 cpu.go:282] Add success.
W0320 06:18:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:18:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:18:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:18:13.419733  543705 net.go:648] Add success.
I0320 06:18:13.422236  543705 net.go:770] primary dev: ETH0
I0320 06:18:13.422249  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:18:13.422259  543705 net.go:698] Add success.
I0320 06:18:13.468734  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"de403176-931e-4443-a3ba-b56d510934ee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:18:13.468764  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:18:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:18:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 06:18:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:18:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 06:18:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:18:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:18:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:18:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:18:16.472394  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:18:22.957675  543705 disk_info.go:125] begin check local disk info of client
I0320 06:18:22.960065  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:18:22.960072  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256800 0xc000256840]
E0320 06:18:23.408396  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:18:23.408412  543705 memory.go:184] no items to output this cycle
I0320 06:18:23.408425  543705 cpu.go:275] no items to output this cycle
E0320 06:18:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:18:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 06:18:33.409793  543705 memory.go:184] no items to output this cycle
I0320 06:18:38.177577  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:18:38.177584  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:18:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:18:43.410856  543705 memory.go:191] Add success.
I0320 06:18:43.409822  543705 cpu.go:282] Add success.
I0320 06:18:43.420615  543705 net.go:648] Add success.
I0320 06:18:43.423393  543705 net.go:770] primary dev: ETH0
I0320 06:18:43.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:18:43.423419  543705 net.go:698] Add success.
I0320 06:18:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:18:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:18:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:18:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:18:53.409786  543705 memory.go:184] no items to output this cycle
I0320 06:18:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 06:19:03.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:19:03.409857  543705 memory.go:184] no items to output this cycle
I0320 06:19:03.409929  543705 cpu.go:275] no items to output this cycle
E0320 06:19:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:19:13.409808  543705 memory.go:191] Add success.
I0320 06:19:13.409809  543705 cpu.go:282] Add success.
W0320 06:19:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:19:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:19:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:19:13.420186  543705 net.go:648] Add success.
I0320 06:19:13.422917  543705 net.go:770] primary dev: ETH0
I0320 06:19:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:19:13.422945  543705 net.go:698] Add success.
I0320 06:19:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:19:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:19:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 06:19:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:19:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 06:19:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:19:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:19:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:19:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:19:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:19:16.472419  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:19:22.961683  543705 disk_info.go:125] begin check local disk info of client
I0320 06:19:22.964192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:19:22.964199  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475680 0xc0004756c0]
E0320 06:19:23.407536  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:19:23.407553  543705 memory.go:184] no items to output this cycle
I0320 06:19:23.407569  543705 cpu.go:275] no items to output this cycle
E0320 06:19:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:19:33.409804  543705 memory.go:184] no items to output this cycle
I0320 06:19:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 06:19:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:19:43.409796  543705 memory.go:191] Add success.
I0320 06:19:43.409805  543705 cpu.go:282] Add success.
I0320 06:19:43.420043  543705 net.go:648] Add success.
I0320 06:19:43.422641  543705 net.go:770] primary dev: ETH0
I0320 06:19:43.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:19:43.422672  543705 net.go:698] Add success.
I0320 06:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:19:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:19:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:19:53.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:19:53.409906  543705 memory.go:184] no items to output this cycle
I0320 06:19:53.409907  543705 cpu.go:275] no items to output this cycle
E0320 06:20:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:20:03.409761  543705 memory.go:184] no items to output this cycle
I0320 06:20:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 06:20:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:20:13.409824  543705 memory.go:191] Add success.
I0320 06:20:13.409829  543705 cpu.go:282] Add success.
W0320 06:20:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:20:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:20:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:20:13.420206  543705 net.go:648] Add success.
I0320 06:20:13.422804  543705 net.go:770] primary dev: ETH0
I0320 06:20:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:20:13.422832  543705 net.go:698] Add success.
I0320 06:20:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:20:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:20:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 06:20:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:20:14.456587  543705 disk_worker.go:494] system disk:vda1
I0320 06:20:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:20:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:20:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:20:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:20:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:20:16.472416  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:20:22.965671  543705 disk_info.go:125] begin check local disk info of client
I0320 06:20:22.968103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:20:22.968108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4680 0xc0000c46c0]
E0320 06:20:23.408402  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:20:23.408418  543705 memory.go:184] no items to output this cycle
I0320 06:20:23.408433  543705 cpu.go:275] no items to output this cycle
E0320 06:20:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:20:33.409782  543705 memory.go:184] no items to output this cycle
I0320 06:20:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 06:20:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:20:43.409788  543705 memory.go:191] Add success.
I0320 06:20:43.409804  543705 cpu.go:282] Add success.
I0320 06:20:43.420003  543705 net.go:648] Add success.
I0320 06:20:43.423022  543705 net.go:770] primary dev: ETH0
I0320 06:20:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:20:43.423088  543705 net.go:698] Add success.
I0320 06:20:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:20:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:20:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:20:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:20:53.409783  543705 memory.go:184] no items to output this cycle
I0320 06:20:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 06:21:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:21:03.409775  543705 memory.go:184] no items to output this cycle
I0320 06:21:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 06:21:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:21:13.409803  543705 memory.go:191] Add success.
I0320 06:21:13.409802  543705 cpu.go:282] Add success.
W0320 06:21:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:21:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:21:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:21:13.420046  543705 net.go:648] Add success.
I0320 06:21:13.423095  543705 net.go:770] primary dev: ETH0
I0320 06:21:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:21:13.423120  543705 net.go:698] Add success.
I0320 06:21:13.464318  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2de38162-f71d-4977-ae10-c5369915936d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:21:13.464353  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:21:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:21:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:21:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 06:21:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:21:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 06:21:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:21:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:21:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:21:16.472370  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:21:22.969686  543705 disk_info.go:125] begin check local disk info of client
I0320 06:21:22.972248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:21:22.972256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5780 0xc0004b57c0]
E0320 06:21:23.408501  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:21:23.408515  543705 memory.go:184] no items to output this cycle
I0320 06:21:23.408550  543705 cpu.go:275] no items to output this cycle
E0320 06:21:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:21:33.409767  543705 memory.go:184] no items to output this cycle
I0320 06:21:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 06:21:38.177729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:21:38.177736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:21:43.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:21:43.410734  543705 memory.go:191] Add success.
I0320 06:21:43.409994  543705 cpu.go:282] Add success.
I0320 06:21:43.419723  543705 net.go:648] Add success.
I0320 06:21:43.422488  543705 net.go:770] primary dev: ETH0
I0320 06:21:43.422501  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:21:43.422513  543705 net.go:698] Add success.
I0320 06:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:21:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:21:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:21:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:21:53.409787  543705 memory.go:184] no items to output this cycle
I0320 06:21:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 06:22:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:22:03.409798  543705 memory.go:184] no items to output this cycle
I0320 06:22:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 06:22:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:22:13.409789  543705 memory.go:191] Add success.
I0320 06:22:13.409815  543705 cpu.go:282] Add success.
W0320 06:22:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:22:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:22:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:22:13.420103  543705 net.go:648] Add success.
I0320 06:22:13.422905  543705 net.go:770] primary dev: ETH0
I0320 06:22:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:22:13.422931  543705 net.go:698] Add success.
W0320 06:22:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:22:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 06:22:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:22:14.456866  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:22:14.456875  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:22:14.456881  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:22:14.456952  543705 disk_worker.go:494] system disk:vda1
I0320 06:22:14.456992  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:22:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:22:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:22:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:22:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:22:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:22:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:22:16.472362  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:22:22.973675  543705 disk_info.go:125] begin check local disk info of client
I0320 06:22:22.976068  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:22:22.976075  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4400 0xc0004b4440]
E0320 06:22:23.408318  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:22:23.408334  543705 memory.go:184] no items to output this cycle
I0320 06:22:23.408351  543705 cpu.go:275] no items to output this cycle
E0320 06:22:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:22:33.409773  543705 memory.go:184] no items to output this cycle
I0320 06:22:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 06:22:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:22:43.409797  543705 memory.go:191] Add success.
I0320 06:22:43.409800  543705 cpu.go:282] Add success.
I0320 06:22:43.419970  543705 net.go:648] Add success.
I0320 06:22:43.422941  543705 net.go:770] primary dev: ETH0
I0320 06:22:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:22:43.422969  543705 net.go:698] Add success.
I0320 06:22:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:22:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:22:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:22:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:22:53.409806  543705 memory.go:184] no items to output this cycle
I0320 06:22:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 06:23:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:23:03.409783  543705 memory.go:184] no items to output this cycle
I0320 06:23:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 06:23:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:23:13.409801  543705 cpu.go:282] Add success.
I0320 06:23:13.409814  543705 memory.go:191] Add success.
W0320 06:23:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:23:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:23:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:23:13.420102  543705 net.go:648] Add success.
I0320 06:23:13.423085  543705 net.go:770] primary dev: ETH0
I0320 06:23:13.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:23:13.423115  543705 net.go:698] Add success.
I0320 06:23:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:23:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:23:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 06:23:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:23:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 06:23:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:23:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:23:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:23:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:23:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:23:22.977685  543705 disk_info.go:125] begin check local disk info of client
I0320 06:23:22.980245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:23:22.980253  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003147c0 0xc000314800]
E0320 06:23:23.407539  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:23:23.407557  543705 memory.go:184] no items to output this cycle
I0320 06:23:23.407568  543705 cpu.go:275] no items to output this cycle
E0320 06:23:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:23:33.409785  543705 memory.go:184] no items to output this cycle
I0320 06:23:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 06:23:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:23:43.409786  543705 memory.go:191] Add success.
I0320 06:23:43.409805  543705 cpu.go:282] Add success.
I0320 06:23:43.419966  543705 net.go:648] Add success.
I0320 06:23:43.422768  543705 net.go:770] primary dev: ETH0
I0320 06:23:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:23:43.422794  543705 net.go:698] Add success.
I0320 06:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:23:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:23:53.409781  543705 memory.go:184] no items to output this cycle
I0320 06:23:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 06:24:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:24:03.409778  543705 memory.go:184] no items to output this cycle
I0320 06:24:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 06:24:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:24:13.409817  543705 memory.go:191] Add success.
I0320 06:24:13.409824  543705 cpu.go:282] Add success.
W0320 06:24:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:24:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:24:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:24:13.420147  543705 net.go:648] Add success.
I0320 06:24:13.422979  543705 net.go:770] primary dev: ETH0
I0320 06:24:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:24:13.423008  543705 net.go:698] Add success.
I0320 06:24:13.469711  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a5583943-f79e-4f95-9ea7-5ba6f1a8edd2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:24:13.469745  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:24:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:24:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:24:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 06:24:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:24:14.456641  543705 disk_worker.go:494] system disk:vda1
I0320 06:24:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:24:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:24:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:24:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:24:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:24:22.981672  543705 disk_info.go:125] begin check local disk info of client
I0320 06:24:22.984112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:24:22.984118  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e940 0xc00037e980]
E0320 06:24:23.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:24:23.407522  543705 memory.go:184] no items to output this cycle
I0320 06:24:23.407549  543705 cpu.go:275] no items to output this cycle
E0320 06:24:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:24:33.409793  543705 memory.go:184] no items to output this cycle
I0320 06:24:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 06:24:38.181589  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:24:38.181596  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:24:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:24:43.410609  543705 memory.go:191] Add success.
I0320 06:24:43.409827  543705 cpu.go:282] Add success.
I0320 06:24:43.420319  543705 net.go:648] Add success.
I0320 06:24:43.423375  543705 net.go:770] primary dev: ETH0
I0320 06:24:43.423388  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:24:43.423400  543705 net.go:698] Add success.
I0320 06:24:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:24:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:24:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:24:53.409788  543705 cpu.go:275] no items to output this cycle
I0320 06:24:53.409790  543705 memory.go:184] no items to output this cycle
E0320 06:25:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:25:03.409777  543705 memory.go:184] no items to output this cycle
I0320 06:25:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 06:25:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:25:13.409783  543705 memory.go:191] Add success.
W0320 06:25:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 06:25:13.409812  543705 cpu.go:282] Add success.
W0320 06:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:25:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:25:13.420057  543705 net.go:648] Add success.
I0320 06:25:13.422591  543705 net.go:770] primary dev: ETH0
I0320 06:25:13.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:25:13.422616  543705 net.go:698] Add success.
I0320 06:25:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:25:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:25:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 06:25:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:25:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 06:25:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:25:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:25:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:25:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:25:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:25:22.985683  543705 disk_info.go:125] begin check local disk info of client
I0320 06:25:22.988055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:25:22.988064  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352400 0xc000352440]
E0320 06:25:23.407551  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:25:23.407567  543705 memory.go:184] no items to output this cycle
I0320 06:25:23.407568  543705 cpu.go:275] no items to output this cycle
E0320 06:25:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:25:33.409780  543705 memory.go:184] no items to output this cycle
I0320 06:25:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 06:25:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:25:43.409814  543705 memory.go:191] Add success.
I0320 06:25:43.409828  543705 cpu.go:282] Add success.
I0320 06:25:43.420035  543705 net.go:648] Add success.
I0320 06:25:43.423173  543705 net.go:770] primary dev: ETH0
I0320 06:25:43.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:25:43.423203  543705 net.go:698] Add success.
I0320 06:25:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:25:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:25:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:25:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:25:53.409802  543705 memory.go:184] no items to output this cycle
I0320 06:25:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 06:26:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:26:03.409808  543705 memory.go:184] no items to output this cycle
I0320 06:26:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 06:26:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:26:13.409791  543705 memory.go:191] Add success.
I0320 06:26:13.409811  543705 cpu.go:282] Add success.
W0320 06:26:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:26:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:26:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:26:13.420147  543705 net.go:648] Add success.
I0320 06:26:13.422954  543705 net.go:770] primary dev: ETH0
I0320 06:26:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:26:13.422984  543705 net.go:698] Add success.
I0320 06:26:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:26:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:26:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 06:26:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:26:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 06:26:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:26:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:26:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:26:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:26:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:26:22.989671  543705 disk_info.go:125] begin check local disk info of client
I0320 06:26:22.992083  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:26:22.992089  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024ee40 0xc00024ee80]
E0320 06:26:23.408339  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:26:23.408358  543705 memory.go:184] no items to output this cycle
I0320 06:26:23.408458  543705 cpu.go:275] no items to output this cycle
E0320 06:26:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:26:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 06:26:33.409795  543705 memory.go:184] no items to output this cycle
E0320 06:26:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:26:43.409811  543705 memory.go:191] Add success.
I0320 06:26:43.409820  543705 cpu.go:282] Add success.
I0320 06:26:43.419960  543705 net.go:648] Add success.
I0320 06:26:43.424034  543705 net.go:770] primary dev: ETH0
I0320 06:26:43.424048  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:26:43.424064  543705 net.go:698] Add success.
I0320 06:26:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:26:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:26:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:26:53.410263  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:26:53.410281  543705 memory.go:184] no items to output this cycle
I0320 06:26:53.410294  543705 cpu.go:275] no items to output this cycle
E0320 06:27:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:27:03.409769  543705 memory.go:184] no items to output this cycle
I0320 06:27:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 06:27:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:27:13.409784  543705 memory.go:191] Add success.
I0320 06:27:13.409805  543705 cpu.go:282] Add success.
W0320 06:27:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:27:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:27:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:27:13.420124  543705 net.go:648] Add success.
I0320 06:27:13.422781  543705 net.go:770] primary dev: ETH0
I0320 06:27:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:27:13.422804  543705 net.go:698] Add success.
I0320 06:27:13.428768  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 06:27:13.452935  543705 event_worker.go:152] Polling the log file for events...
I0320 06:27:13.518550  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9b4a7ef8-d135-4bb9-919f-45d03ff8a24f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:27:13.518592  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 06:27:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:27:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 06:27:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:27:14.456815  543705 disk_worker.go:494] system disk:vda1
E0320 06:27:14.456833  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:27:14.456840  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:27:14.456845  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:27:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:27:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:27:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:27:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:27:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:27:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:27:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:27:16.472346  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:27:22.993679  543705 disk_info.go:125] begin check local disk info of client
I0320 06:27:22.996197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:27:22.996205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368480 0xc0003684c0]
E0320 06:27:23.408381  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:27:23.408395  543705 cpu.go:275] no items to output this cycle
I0320 06:27:23.408399  543705 memory.go:184] no items to output this cycle
E0320 06:27:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:27:33.409768  543705 memory.go:184] no items to output this cycle
I0320 06:27:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 06:27:38.181739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:27:38.181746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:27:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:27:43.410744  543705 memory.go:191] Add success.
I0320 06:27:43.409830  543705 cpu.go:282] Add success.
I0320 06:27:43.420505  543705 net.go:648] Add success.
I0320 06:27:43.423156  543705 net.go:770] primary dev: ETH0
I0320 06:27:43.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:27:43.423192  543705 net.go:698] Add success.
I0320 06:27:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:27:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:27:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:27:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:27:53.409778  543705 memory.go:184] no items to output this cycle
I0320 06:27:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 06:28:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:28:03.409773  543705 memory.go:184] no items to output this cycle
I0320 06:28:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 06:28:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:28:13.409826  543705 memory.go:191] Add success.
I0320 06:28:13.409831  543705 cpu.go:282] Add success.
W0320 06:28:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:28:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:28:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:28:13.420147  543705 net.go:648] Add success.
I0320 06:28:13.422879  543705 net.go:770] primary dev: ETH0
I0320 06:28:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:28:13.422905  543705 net.go:698] Add success.
I0320 06:28:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:28:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:28:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 06:28:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:28:14.456840  543705 disk_worker.go:494] system disk:vda1
I0320 06:28:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:28:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:28:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:28:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:28:16.472432  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:28:22.997673  543705 disk_info.go:125] begin check local disk info of client
I0320 06:28:23.000118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:28:23.000125  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047c280 0xc00047c2c0]
E0320 06:28:23.408260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:28:23.408277  543705 memory.go:184] no items to output this cycle
I0320 06:28:23.408285  543705 cpu.go:275] no items to output this cycle
E0320 06:28:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:28:33.409804  543705 memory.go:184] no items to output this cycle
I0320 06:28:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 06:28:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:28:43.409799  543705 memory.go:191] Add success.
I0320 06:28:43.409799  543705 cpu.go:282] Add success.
I0320 06:28:43.419949  543705 net.go:648] Add success.
I0320 06:28:43.422919  543705 net.go:770] primary dev: ETH0
I0320 06:28:43.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:28:43.422945  543705 net.go:698] Add success.
I0320 06:28:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:28:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:28:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:28:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:28:53.409795  543705 memory.go:184] no items to output this cycle
I0320 06:28:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 06:29:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:29:03.409776  543705 memory.go:184] no items to output this cycle
I0320 06:29:03.409777  543705 cpu.go:275] no items to output this cycle
W0320 06:29:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:29:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:29:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:29:13.409799  543705 cpu.go:282] Add success.
E0320 06:29:13.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:29:13.409856  543705 memory.go:191] Add success.
I0320 06:29:13.420201  543705 net.go:648] Add success.
I0320 06:29:13.422977  543705 net.go:770] primary dev: ETH0
I0320 06:29:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:29:13.423003  543705 net.go:698] Add success.
I0320 06:29:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:29:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:29:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 06:29:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:29:14.456526  543705 disk_worker.go:494] system disk:vda1
I0320 06:29:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:29:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:29:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:29:16.472415  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:29:23.001679  543705 disk_info.go:125] begin check local disk info of client
I0320 06:29:23.003989  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:29:23.003996  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a280 0xc00047a2c0]
E0320 06:29:23.407558  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:29:23.407580  543705 memory.go:184] no items to output this cycle
I0320 06:29:23.407587  543705 cpu.go:275] no items to output this cycle
E0320 06:29:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:29:33.409802  543705 memory.go:184] no items to output this cycle
I0320 06:29:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 06:29:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:29:43.409783  543705 memory.go:191] Add success.
I0320 06:29:43.409806  543705 cpu.go:282] Add success.
I0320 06:29:43.419886  543705 net.go:648] Add success.
I0320 06:29:43.422660  543705 net.go:770] primary dev: ETH0
I0320 06:29:43.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:29:43.422685  543705 net.go:698] Add success.
I0320 06:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:29:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:29:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:29:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:29:53.409763  543705 memory.go:184] no items to output this cycle
I0320 06:29:53.409775  543705 cpu.go:275] no items to output this cycle
E0320 06:30:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:30:03.409768  543705 memory.go:184] no items to output this cycle
I0320 06:30:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 06:30:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:30:13.409815  543705 memory.go:191] Add success.
I0320 06:30:13.409823  543705 cpu.go:282] Add success.
W0320 06:30:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:30:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:30:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:30:13.420106  543705 net.go:648] Add success.
I0320 06:30:13.422932  543705 net.go:770] primary dev: ETH0
I0320 06:30:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:30:13.422957  543705 net.go:698] Add success.
I0320 06:30:13.469072  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c79fda38-68f5-4d3c-8145-fe3c2199555a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:30:13.469106  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:30:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:30:14.455355  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:30:14.455500  543705 disk_worker.go:708] disk space is not compliant
W0320 06:30:14.455506  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:30:14.457095  543705 disk_worker.go:494] system disk:vda1
I0320 06:30:14.457124  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:30:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:30:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:30:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:30:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:30:23.005674  543705 disk_info.go:125] begin check local disk info of client
I0320 06:30:23.008187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:30:23.008194  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a180 0xc00039a1c0]
E0320 06:30:23.408239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:30:23.408252  543705 memory.go:184] no items to output this cycle
I0320 06:30:23.408288  543705 cpu.go:275] no items to output this cycle
E0320 06:30:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:30:33.409784  543705 memory.go:184] no items to output this cycle
I0320 06:30:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 06:30:38.185611  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:30:38.185618  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:30:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:30:43.410793  543705 memory.go:191] Add success.
I0320 06:30:43.409789  543705 cpu.go:282] Add success.
I0320 06:30:43.420504  543705 net.go:648] Add success.
I0320 06:30:43.423106  543705 net.go:770] primary dev: ETH0
I0320 06:30:43.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:30:43.423132  543705 net.go:698] Add success.
I0320 06:30:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:30:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:30:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:30:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:30:53.409802  543705 memory.go:184] no items to output this cycle
I0320 06:30:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 06:31:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:31:03.409773  543705 memory.go:184] no items to output this cycle
I0320 06:31:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 06:31:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:31:13.409819  543705 memory.go:191] Add success.
I0320 06:31:13.409824  543705 cpu.go:282] Add success.
W0320 06:31:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:31:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:31:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:31:13.420119  543705 net.go:648] Add success.
I0320 06:31:13.423270  543705 net.go:770] primary dev: ETH0
I0320 06:31:13.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:31:13.423297  543705 net.go:698] Add success.
I0320 06:31:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:31:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:31:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 06:31:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:31:14.457156  543705 disk_worker.go:494] system disk:vda1
I0320 06:31:14.457186  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:31:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:31:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:31:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:31:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:31:16.472442  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:31:23.009677  543705 disk_info.go:125] begin check local disk info of client
I0320 06:31:23.012183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:31:23.012191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000261480 0xc0002614c0]
E0320 06:31:23.407570  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:31:23.407589  543705 memory.go:184] no items to output this cycle
I0320 06:31:23.407601  543705 cpu.go:275] no items to output this cycle
E0320 06:31:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:31:33.409800  543705 memory.go:184] no items to output this cycle
I0320 06:31:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 06:31:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:31:43.409792  543705 cpu.go:282] Add success.
I0320 06:31:43.409799  543705 memory.go:191] Add success.
I0320 06:31:43.420065  543705 net.go:648] Add success.
I0320 06:31:43.423149  543705 net.go:770] primary dev: ETH0
I0320 06:31:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:31:43.423177  543705 net.go:698] Add success.
I0320 06:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:31:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:31:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:31:53.409792  543705 memory.go:184] no items to output this cycle
I0320 06:31:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 06:32:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:32:03.409778  543705 cpu.go:275] no items to output this cycle
I0320 06:32:03.409786  543705 memory.go:184] no items to output this cycle
E0320 06:32:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:32:13.409823  543705 memory.go:191] Add success.
I0320 06:32:13.409834  543705 cpu.go:282] Add success.
W0320 06:32:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:32:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:32:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:32:13.420200  543705 net.go:648] Add success.
I0320 06:32:13.422981  543705 net.go:770] primary dev: ETH0
I0320 06:32:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:32:13.423005  543705 net.go:698] Add success.
W0320 06:32:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:32:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 06:32:14.455199  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:32:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:32:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:32:14.455919  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:32:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 06:32:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:32:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:32:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:32:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:32:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:32:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:32:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:32:16.472315  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:32:23.013676  543705 disk_info.go:125] begin check local disk info of client
I0320 06:32:23.016097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:32:23.016103  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498300 0xc000498340]
E0320 06:32:23.407505  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:32:23.407526  543705 memory.go:184] no items to output this cycle
I0320 06:32:23.407566  543705 cpu.go:275] no items to output this cycle
E0320 06:32:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:32:33.409774  543705 memory.go:184] no items to output this cycle
I0320 06:32:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 06:32:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:32:43.409818  543705 memory.go:191] Add success.
I0320 06:32:43.409824  543705 cpu.go:282] Add success.
I0320 06:32:43.420010  543705 net.go:648] Add success.
I0320 06:32:43.422812  543705 net.go:770] primary dev: ETH0
I0320 06:32:43.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:32:43.422837  543705 net.go:698] Add success.
I0320 06:32:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:32:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:32:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:32:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:32:53.409794  543705 memory.go:184] no items to output this cycle
I0320 06:32:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 06:33:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:33:03.409764  543705 memory.go:184] no items to output this cycle
I0320 06:33:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 06:33:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:33:13.409797  543705 memory.go:191] Add success.
I0320 06:33:13.409797  543705 cpu.go:282] Add success.
W0320 06:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:33:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:33:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:33:13.420151  543705 net.go:648] Add success.
I0320 06:33:13.422741  543705 net.go:770] primary dev: ETH0
I0320 06:33:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:33:13.422771  543705 net.go:698] Add success.
I0320 06:33:13.601737  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c7d8a1f6-539d-4422-9e1e-31e0ff716165","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:33:13.601775  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:33:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:33:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:33:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 06:33:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:33:14.456732  543705 disk_worker.go:494] system disk:vda1
I0320 06:33:14.456764  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:33:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:33:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:33:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:33:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:33:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:33:23.017673  543705 disk_info.go:125] begin check local disk info of client
I0320 06:33:23.020042  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:33:23.020048  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b82c0 0xc0002b8300]
I0320 06:33:23.408101  543705 cpu.go:275] no items to output this cycle
E0320 06:33:23.408118  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:33:23.408137  543705 memory.go:184] no items to output this cycle
E0320 06:33:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:33:33.409784  543705 memory.go:184] no items to output this cycle
I0320 06:33:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 06:33:38.185749  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:33:38.185756  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:33:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:33:43.410777  543705 memory.go:191] Add success.
I0320 06:33:43.409809  543705 cpu.go:282] Add success.
I0320 06:33:43.420486  543705 net.go:648] Add success.
I0320 06:33:43.423015  543705 net.go:770] primary dev: ETH0
I0320 06:33:43.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:33:43.423043  543705 net.go:698] Add success.
I0320 06:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:33:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:33:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:33:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:33:53.409777  543705 memory.go:184] no items to output this cycle
I0320 06:33:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 06:34:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:34:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 06:34:03.409788  543705 memory.go:184] no items to output this cycle
E0320 06:34:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:34:13.409826  543705 memory.go:191] Add success.
I0320 06:34:13.409827  543705 cpu.go:282] Add success.
W0320 06:34:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:34:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:34:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:34:13.420233  543705 net.go:648] Add success.
I0320 06:34:13.423061  543705 net.go:770] primary dev: ETH0
I0320 06:34:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:34:13.423089  543705 net.go:698] Add success.
I0320 06:34:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:34:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:34:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 06:34:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:34:14.456569  543705 disk_worker.go:494] system disk:vda1
I0320 06:34:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:34:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:34:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:34:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:34:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:34:23.021680  543705 disk_info.go:125] begin check local disk info of client
I0320 06:34:23.024135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:34:23.024142  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6300 0xc0001c6340]
E0320 06:34:23.408111  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:34:23.408124  543705 memory.go:184] no items to output this cycle
I0320 06:34:23.408160  543705 cpu.go:275] no items to output this cycle
E0320 06:34:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:34:33.409775  543705 memory.go:184] no items to output this cycle
I0320 06:34:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 06:34:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:34:43.409824  543705 memory.go:191] Add success.
I0320 06:34:43.409835  543705 cpu.go:282] Add success.
I0320 06:34:43.419912  543705 net.go:648] Add success.
I0320 06:34:43.422586  543705 net.go:770] primary dev: ETH0
I0320 06:34:43.422600  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:34:43.422612  543705 net.go:698] Add success.
I0320 06:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:34:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:34:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:34:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:34:53.409810  543705 memory.go:184] no items to output this cycle
I0320 06:34:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 06:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:35:03.409785  543705 memory.go:184] no items to output this cycle
I0320 06:35:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 06:35:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:35:13.409827  543705 memory.go:191] Add success.
I0320 06:35:13.409837  543705 cpu.go:282] Add success.
W0320 06:35:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:35:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:35:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:35:13.420052  543705 net.go:648] Add success.
I0320 06:35:13.423028  543705 net.go:770] primary dev: ETH0
I0320 06:35:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:35:13.423054  543705 net.go:698] Add success.
I0320 06:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:35:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:35:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 06:35:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:35:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 06:35:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:35:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:35:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:35:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:35:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:35:16.472409  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:35:23.025666  543705 disk_info.go:125] begin check local disk info of client
I0320 06:35:23.028122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:35:23.028128  543705 disk_info.go:196] parse disk info done, disk is : [0xc000325f00 0xc000325f40]
E0320 06:35:23.407525  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:35:23.407541  543705 memory.go:184] no items to output this cycle
I0320 06:35:23.407748  543705 cpu.go:275] no items to output this cycle
E0320 06:35:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:35:33.409795  543705 memory.go:184] no items to output this cycle
I0320 06:35:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 06:35:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:35:43.409805  543705 memory.go:191] Add success.
I0320 06:35:43.409812  543705 cpu.go:282] Add success.
I0320 06:35:43.419906  543705 net.go:648] Add success.
I0320 06:35:43.422485  543705 net.go:770] primary dev: ETH0
I0320 06:35:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:35:43.422510  543705 net.go:698] Add success.
I0320 06:35:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:35:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:35:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:35:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:35:53.409812  543705 memory.go:184] no items to output this cycle
I0320 06:35:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 06:36:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:36:03.409787  543705 memory.go:184] no items to output this cycle
I0320 06:36:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 06:36:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:36:13.409828  543705 memory.go:191] Add success.
I0320 06:36:13.409829  543705 cpu.go:282] Add success.
W0320 06:36:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:36:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:36:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:36:13.420147  543705 net.go:648] Add success.
I0320 06:36:13.422996  543705 net.go:770] primary dev: ETH0
I0320 06:36:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:36:13.423022  543705 net.go:698] Add success.
I0320 06:36:13.747558  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"18269ea2-b662-497d-b949-c5c11a20098f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:36:13.747592  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:36:14.453970  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:36:14.455258  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:36:14.455269  543705 disk_worker.go:708] disk space is not compliant
W0320 06:36:14.455272  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:36:14.456850  543705 disk_worker.go:494] system disk:vda1
I0320 06:36:14.456883  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:36:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:36:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:36:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:36:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:36:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:36:23.029687  543705 disk_info.go:125] begin check local disk info of client
I0320 06:36:23.032203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:36:23.032209  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492140 0xc000492180]
E0320 06:36:23.408152  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:36:23.408164  543705 memory.go:184] no items to output this cycle
I0320 06:36:23.408171  543705 cpu.go:275] no items to output this cycle
E0320 06:36:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:36:33.409826  543705 memory.go:184] no items to output this cycle
I0320 06:36:33.409839  543705 cpu.go:275] no items to output this cycle
I0320 06:36:38.189625  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:36:38.189632  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:36:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:36:43.410758  543705 memory.go:191] Add success.
I0320 06:36:43.409807  543705 cpu.go:282] Add success.
I0320 06:36:43.420485  543705 net.go:648] Add success.
I0320 06:36:43.423659  543705 net.go:770] primary dev: ETH0
I0320 06:36:43.423672  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:36:43.423685  543705 net.go:698] Add success.
I0320 06:36:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:36:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:36:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:36:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:36:53.409769  543705 memory.go:184] no items to output this cycle
I0320 06:36:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 06:37:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:37:03.409780  543705 memory.go:184] no items to output this cycle
I0320 06:37:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 06:37:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:37:13.409786  543705 memory.go:191] Add success.
W0320 06:37:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 06:37:13.409818  543705 cpu.go:282] Add success.
W0320 06:37:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:37:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:37:13.420504  543705 net.go:648] Add success.
I0320 06:37:13.423179  543705 net.go:770] primary dev: ETH0
I0320 06:37:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:37:13.423207  543705 net.go:698] Add success.
I0320 06:37:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0320 06:37:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:37:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 06:37:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:37:14.455882  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:37:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:37:14.455896  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:37:14.456549  543705 disk_worker.go:494] system disk:vda1
I0320 06:37:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:37:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:37:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:37:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:37:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:37:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:37:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:37:16.472343  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:37:23.033675  543705 disk_info.go:125] begin check local disk info of client
I0320 06:37:23.036089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:37:23.036095  543705 disk_info.go:196] parse disk info done, disk is : [0xc000259180 0xc0002591c0]
E0320 06:37:23.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:37:23.407519  543705 memory.go:184] no items to output this cycle
I0320 06:37:23.407601  543705 cpu.go:275] no items to output this cycle
E0320 06:37:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:37:33.409792  543705 memory.go:184] no items to output this cycle
I0320 06:37:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 06:37:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:37:43.409795  543705 memory.go:191] Add success.
I0320 06:37:43.409799  543705 cpu.go:282] Add success.
I0320 06:37:43.419982  543705 net.go:648] Add success.
I0320 06:37:43.422684  543705 net.go:770] primary dev: ETH0
I0320 06:37:43.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:37:43.422710  543705 net.go:698] Add success.
I0320 06:37:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:37:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:37:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:37:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:37:53.409795  543705 memory.go:184] no items to output this cycle
I0320 06:37:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 06:38:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:38:03.409784  543705 memory.go:184] no items to output this cycle
I0320 06:38:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 06:38:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:38:13.409789  543705 memory.go:191] Add success.
I0320 06:38:13.409812  543705 cpu.go:282] Add success.
W0320 06:38:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:38:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:38:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:38:13.420098  543705 net.go:648] Add success.
I0320 06:38:13.422687  543705 net.go:770] primary dev: ETH0
I0320 06:38:13.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:38:13.422713  543705 net.go:698] Add success.
I0320 06:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:38:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:38:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 06:38:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:38:14.456491  543705 disk_worker.go:494] system disk:vda1
I0320 06:38:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:38:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:38:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:38:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:38:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:38:23.037678  543705 disk_info.go:125] begin check local disk info of client
I0320 06:38:23.040112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:38:23.040118  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393740 0xc000393780]
E0320 06:38:23.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:38:23.407519  543705 memory.go:184] no items to output this cycle
I0320 06:38:23.407547  543705 cpu.go:275] no items to output this cycle
E0320 06:38:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:38:33.409908  543705 memory.go:184] no items to output this cycle
I0320 06:38:33.409938  543705 cpu.go:275] no items to output this cycle
E0320 06:38:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:38:43.409793  543705 memory.go:191] Add success.
I0320 06:38:43.409797  543705 cpu.go:282] Add success.
I0320 06:38:43.419960  543705 net.go:648] Add success.
I0320 06:38:43.422867  543705 net.go:770] primary dev: ETH0
I0320 06:38:43.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:38:43.422891  543705 net.go:698] Add success.
I0320 06:38:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:38:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:38:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:38:53.409797  543705 memory.go:184] no items to output this cycle
I0320 06:38:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 06:39:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:39:03.409800  543705 memory.go:184] no items to output this cycle
I0320 06:39:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 06:39:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:39:13.409790  543705 memory.go:191] Add success.
I0320 06:39:13.409817  543705 cpu.go:282] Add success.
W0320 06:39:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:39:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:39:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:39:13.420139  543705 net.go:648] Add success.
I0320 06:39:13.422826  543705 net.go:770] primary dev: ETH0
I0320 06:39:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:39:13.422851  543705 net.go:698] Add success.
I0320 06:39:13.468395  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cddcf3ad-1522-4519-bd10-8d963305d14b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:39:13.468427  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:39:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:39:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:39:14.455245  543705 disk_worker.go:708] disk space is not compliant
W0320 06:39:14.455248  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:39:14.456750  543705 disk_worker.go:494] system disk:vda1
I0320 06:39:14.456785  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:39:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:39:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:39:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:39:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:39:23.041678  543705 disk_info.go:125] begin check local disk info of client
I0320 06:39:23.044075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:39:23.044081  543705 disk_info.go:196] parse disk info done, disk is : [0xc000313140 0xc000313180]
E0320 06:39:23.407500  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:39:23.407512  543705 memory.go:184] no items to output this cycle
I0320 06:39:23.407539  543705 cpu.go:275] no items to output this cycle
E0320 06:39:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:39:33.409879  543705 memory.go:184] no items to output this cycle
I0320 06:39:33.409941  543705 cpu.go:275] no items to output this cycle
I0320 06:39:38.189732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:39:38.189739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:39:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:39:43.410583  543705 memory.go:191] Add success.
I0320 06:39:43.409809  543705 cpu.go:282] Add success.
I0320 06:39:43.420287  543705 net.go:648] Add success.
I0320 06:39:43.423158  543705 net.go:770] primary dev: ETH0
I0320 06:39:43.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:39:43.423183  543705 net.go:698] Add success.
I0320 06:39:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:39:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:39:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:39:53.409788  543705 memory.go:184] no items to output this cycle
I0320 06:39:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 06:40:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:40:03.409801  543705 memory.go:184] no items to output this cycle
I0320 06:40:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 06:40:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:40:13.409827  543705 memory.go:191] Add success.
I0320 06:40:13.409835  543705 cpu.go:282] Add success.
W0320 06:40:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:40:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:40:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:40:13.420180  543705 net.go:648] Add success.
I0320 06:40:13.423020  543705 net.go:770] primary dev: ETH0
I0320 06:40:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:40:13.423049  543705 net.go:698] Add success.
I0320 06:40:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:40:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:40:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 06:40:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:40:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 06:40:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:40:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:40:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:40:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:40:23.045672  543705 disk_info.go:125] begin check local disk info of client
I0320 06:40:23.048170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:40:23.048177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329cc0 0xc000329d00]
E0320 06:40:23.407531  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:40:23.407547  543705 memory.go:184] no items to output this cycle
I0320 06:40:23.407562  543705 cpu.go:275] no items to output this cycle
E0320 06:40:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:40:33.409828  543705 memory.go:184] no items to output this cycle
I0320 06:40:33.409841  543705 cpu.go:275] no items to output this cycle
E0320 06:40:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:40:43.409785  543705 memory.go:191] Add success.
I0320 06:40:43.409798  543705 cpu.go:282] Add success.
I0320 06:40:43.420012  543705 net.go:648] Add success.
I0320 06:40:43.422633  543705 net.go:770] primary dev: ETH0
I0320 06:40:43.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:40:43.422657  543705 net.go:698] Add success.
I0320 06:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:40:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:40:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:40:53.409766  543705 memory.go:184] no items to output this cycle
I0320 06:40:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 06:41:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:41:03.409798  543705 memory.go:184] no items to output this cycle
I0320 06:41:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 06:41:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:41:13.409790  543705 memory.go:191] Add success.
I0320 06:41:13.409806  543705 cpu.go:282] Add success.
W0320 06:41:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:41:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:41:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:41:13.420145  543705 net.go:648] Add success.
I0320 06:41:13.422912  543705 net.go:770] primary dev: ETH0
I0320 06:41:13.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:41:13.422938  543705 net.go:698] Add success.
I0320 06:41:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:41:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:41:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 06:41:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:41:14.456560  543705 disk_worker.go:494] system disk:vda1
I0320 06:41:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:41:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:41:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:41:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:41:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:41:23.049677  543705 disk_info.go:125] begin check local disk info of client
I0320 06:41:23.052116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:41:23.052123  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e580 0xc00034e5c0]
E0320 06:41:23.407990  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:41:23.408006  543705 memory.go:184] no items to output this cycle
I0320 06:41:23.408025  543705 cpu.go:275] no items to output this cycle
E0320 06:41:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:41:33.409803  543705 memory.go:184] no items to output this cycle
I0320 06:41:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 06:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:41:43.409814  543705 memory.go:191] Add success.
I0320 06:41:43.409820  543705 cpu.go:282] Add success.
I0320 06:41:43.420006  543705 net.go:648] Add success.
I0320 06:41:43.422975  543705 net.go:770] primary dev: ETH0
I0320 06:41:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:41:43.423008  543705 net.go:698] Add success.
I0320 06:41:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:41:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:41:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:41:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:41:53.409781  543705 memory.go:184] no items to output this cycle
I0320 06:41:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 06:42:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:42:03.409791  543705 memory.go:184] no items to output this cycle
I0320 06:42:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 06:42:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:42:13.409784  543705 memory.go:191] Add success.
W0320 06:42:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 06:42:13.409814  543705 cpu.go:282] Add success.
W0320 06:42:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:42:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:42:13.420305  543705 net.go:648] Add success.
I0320 06:42:13.423161  543705 net.go:770] primary dev: ETH0
I0320 06:42:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:42:13.423191  543705 net.go:698] Add success.
I0320 06:42:13.464562  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e92258d4-c33c-4114-91a0-6b084885cd0b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:42:13.464596  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 06:42:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:42:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 06:42:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:42:14.457008  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:42:14.457017  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:42:14.457023  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:42:14.457120  543705 disk_worker.go:494] system disk:vda1
I0320 06:42:14.457163  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:42:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:42:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:42:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:42:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:42:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:42:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:42:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:42:23.053673  543705 disk_info.go:125] begin check local disk info of client
I0320 06:42:23.056067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:42:23.056073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab6c0 0xc0001ab700]
E0320 06:42:23.407512  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:42:23.407527  543705 memory.go:184] no items to output this cycle
I0320 06:42:23.407534  543705 cpu.go:275] no items to output this cycle
E0320 06:42:33.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:42:33.409892  543705 memory.go:184] no items to output this cycle
I0320 06:42:33.409953  543705 cpu.go:275] no items to output this cycle
I0320 06:42:38.189878  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:42:38.189883  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:42:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:42:43.410807  543705 memory.go:191] Add success.
I0320 06:42:43.409806  543705 cpu.go:282] Add success.
I0320 06:42:43.420541  543705 net.go:648] Add success.
I0320 06:42:43.423458  543705 net.go:770] primary dev: ETH0
I0320 06:42:43.423473  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:42:43.423488  543705 net.go:698] Add success.
I0320 06:42:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:42:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:42:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:42:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:42:53.409765  543705 memory.go:184] no items to output this cycle
I0320 06:42:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 06:43:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:43:03.409800  543705 memory.go:184] no items to output this cycle
I0320 06:43:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 06:43:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:43:13.409785  543705 memory.go:191] Add success.
I0320 06:43:13.409811  543705 cpu.go:282] Add success.
W0320 06:43:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:43:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:43:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:43:13.420076  543705 net.go:648] Add success.
I0320 06:43:13.422779  543705 net.go:770] primary dev: ETH0
I0320 06:43:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:43:13.422808  543705 net.go:698] Add success.
I0320 06:43:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:43:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:43:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 06:43:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:43:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 06:43:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:43:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:43:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:43:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:43:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:43:23.057671  543705 disk_info.go:125] begin check local disk info of client
I0320 06:43:23.060181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:43:23.060190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ec000 0xc0004ec040]
E0320 06:43:23.407534  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:43:23.407550  543705 memory.go:184] no items to output this cycle
I0320 06:43:23.407563  543705 cpu.go:275] no items to output this cycle
E0320 06:43:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:43:33.409774  543705 memory.go:184] no items to output this cycle
I0320 06:43:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 06:43:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:43:43.409918  543705 memory.go:191] Add success.
I0320 06:43:43.409927  543705 cpu.go:282] Add success.
I0320 06:43:43.419735  543705 net.go:648] Add success.
I0320 06:43:43.422819  543705 net.go:770] primary dev: ETH0
I0320 06:43:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:43:43.422852  543705 net.go:698] Add success.
I0320 06:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:43:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:43:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:43:53.409794  543705 memory.go:184] no items to output this cycle
I0320 06:43:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 06:44:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:44:03.409778  543705 memory.go:184] no items to output this cycle
I0320 06:44:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 06:44:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:44:13.409821  543705 memory.go:191] Add success.
I0320 06:44:13.409825  543705 cpu.go:282] Add success.
W0320 06:44:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:44:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:44:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:44:13.420244  543705 net.go:648] Add success.
I0320 06:44:13.422863  543705 net.go:770] primary dev: ETH0
I0320 06:44:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:44:13.422888  543705 net.go:698] Add success.
I0320 06:44:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:44:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:44:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 06:44:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:44:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 06:44:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:44:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:44:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:44:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:44:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:44:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:44:23.061679  543705 disk_info.go:125] begin check local disk info of client
I0320 06:44:23.064050  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:44:23.064056  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab9c0 0xc0001aba00]
E0320 06:44:23.407851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:44:23.407867  543705 memory.go:184] no items to output this cycle
I0320 06:44:23.407880  543705 cpu.go:275] no items to output this cycle
E0320 06:44:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:44:33.409808  543705 memory.go:184] no items to output this cycle
I0320 06:44:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 06:44:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:44:43.409777  543705 memory.go:191] Add success.
I0320 06:44:43.409798  543705 cpu.go:282] Add success.
I0320 06:44:43.419981  543705 net.go:648] Add success.
I0320 06:44:43.422849  543705 net.go:770] primary dev: ETH0
I0320 06:44:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:44:43.422878  543705 net.go:698] Add success.
I0320 06:44:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:44:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:44:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:44:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:44:53.409792  543705 memory.go:184] no items to output this cycle
I0320 06:44:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 06:45:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:45:03.409772  543705 memory.go:184] no items to output this cycle
I0320 06:45:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 06:45:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:45:13.409798  543705 memory.go:191] Add success.
I0320 06:45:13.409801  543705 cpu.go:282] Add success.
W0320 06:45:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:45:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:45:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:45:13.420157  543705 net.go:648] Add success.
I0320 06:45:13.422963  543705 net.go:770] primary dev: ETH0
I0320 06:45:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:45:13.422989  543705 net.go:698] Add success.
I0320 06:45:13.470048  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"592e429a-a482-4e1c-91c8-bb282bcf365b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:45:13.470081  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:45:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:45:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 06:45:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:45:14.456539  543705 disk_worker.go:494] system disk:vda1
I0320 06:45:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:45:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:45:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:45:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:45:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:45:16.472417  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:45:23.065674  543705 disk_info.go:125] begin check local disk info of client
I0320 06:45:23.068147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:45:23.068154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b100 0xc00007b140]
E0320 06:45:23.407903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:45:23.407914  543705 memory.go:184] no items to output this cycle
I0320 06:45:23.407954  543705 cpu.go:275] no items to output this cycle
E0320 06:45:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:45:33.409807  543705 memory.go:184] no items to output this cycle
I0320 06:45:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 06:45:38.193658  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:45:38.193666  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:45:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:45:43.410868  543705 memory.go:191] Add success.
I0320 06:45:43.409817  543705 cpu.go:282] Add success.
I0320 06:45:43.420561  543705 net.go:648] Add success.
I0320 06:45:43.423460  543705 net.go:770] primary dev: ETH0
I0320 06:45:43.423472  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:45:43.423484  543705 net.go:698] Add success.
I0320 06:45:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:45:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:45:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:45:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:45:53.409780  543705 memory.go:184] no items to output this cycle
I0320 06:45:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 06:46:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:46:03.409774  543705 memory.go:184] no items to output this cycle
I0320 06:46:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 06:46:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:46:13.409789  543705 memory.go:191] Add success.
W0320 06:46:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 06:46:13.409822  543705 cpu.go:282] Add success.
W0320 06:46:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:46:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:46:13.420233  543705 net.go:648] Add success.
I0320 06:46:13.423038  543705 net.go:770] primary dev: ETH0
I0320 06:46:13.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:46:13.423066  543705 net.go:698] Add success.
I0320 06:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:46:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:46:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 06:46:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:46:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 06:46:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:46:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:46:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:46:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:46:16.472444  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:46:23.069682  543705 disk_info.go:125] begin check local disk info of client
I0320 06:46:23.072097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:46:23.072102  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae40 0xc0001aae80]
E0320 06:46:23.407889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:46:23.407908  543705 memory.go:184] no items to output this cycle
I0320 06:46:23.407927  543705 cpu.go:275] no items to output this cycle
E0320 06:46:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:46:33.409777  543705 memory.go:184] no items to output this cycle
I0320 06:46:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 06:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:46:43.409792  543705 memory.go:191] Add success.
I0320 06:46:43.409796  543705 cpu.go:282] Add success.
I0320 06:46:43.419945  543705 net.go:648] Add success.
I0320 06:46:43.423032  543705 net.go:770] primary dev: ETH0
I0320 06:46:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:46:43.423058  543705 net.go:698] Add success.
I0320 06:46:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:46:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:46:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:46:53.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:46:53.409927  543705 memory.go:184] no items to output this cycle
I0320 06:46:53.410042  543705 cpu.go:275] no items to output this cycle
E0320 06:47:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:47:03.409776  543705 memory.go:184] no items to output this cycle
I0320 06:47:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 06:47:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:47:13.409791  543705 memory.go:191] Add success.
I0320 06:47:13.409810  543705 cpu.go:282] Add success.
W0320 06:47:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:47:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:47:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:47:13.420079  543705 net.go:648] Add success.
I0320 06:47:13.423368  543705 net.go:770] primary dev: ETH0
I0320 06:47:13.423382  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:47:13.423394  543705 net.go:698] Add success.
I0320 06:47:13.452910  543705 event_worker.go:152] Polling the log file for events...
W0320 06:47:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:47:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 06:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:47:14.455871  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:47:14.455880  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:47:14.455886  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:47:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 06:47:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:47:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:47:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:47:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:47:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:47:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:47:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:47:16.472339  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:47:23.073673  543705 disk_info.go:125] begin check local disk info of client
I0320 06:47:23.076059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:47:23.076065  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312280 0xc0003122c0]
E0320 06:47:23.407814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:47:23.407829  543705 memory.go:184] no items to output this cycle
I0320 06:47:23.407848  543705 cpu.go:275] no items to output this cycle
E0320 06:47:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:47:33.409784  543705 memory.go:184] no items to output this cycle
I0320 06:47:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 06:47:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:47:43.409815  543705 memory.go:191] Add success.
I0320 06:47:43.409820  543705 cpu.go:282] Add success.
I0320 06:47:43.420046  543705 net.go:648] Add success.
I0320 06:47:43.423655  543705 net.go:770] primary dev: ETH0
I0320 06:47:43.423669  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:47:43.423682  543705 net.go:698] Add success.
I0320 06:47:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:47:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:47:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:47:53.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:47:53.409893  543705 memory.go:184] no items to output this cycle
I0320 06:47:53.409951  543705 cpu.go:275] no items to output this cycle
E0320 06:48:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:48:03.409804  543705 memory.go:184] no items to output this cycle
I0320 06:48:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 06:48:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:48:13.409782  543705 memory.go:191] Add success.
W0320 06:48:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 06:48:13.409812  543705 cpu.go:282] Add success.
W0320 06:48:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:48:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:48:13.420173  543705 net.go:648] Add success.
I0320 06:48:13.422671  543705 net.go:770] primary dev: ETH0
I0320 06:48:13.422690  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:48:13.422704  543705 net.go:698] Add success.
I0320 06:48:13.468118  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2b6f5bff-566b-46a7-b0ec-8aee48f60cf8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:48:13.468151  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:48:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:48:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:48:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 06:48:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:48:14.456540  543705 disk_worker.go:494] system disk:vda1
I0320 06:48:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:48:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:48:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:48:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:48:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:48:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:48:23.077680  543705 disk_info.go:125] begin check local disk info of client
I0320 06:48:23.080178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:48:23.080184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa400 0xc0001fa440]
E0320 06:48:23.407518  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:48:23.407535  543705 memory.go:184] no items to output this cycle
I0320 06:48:23.407544  543705 cpu.go:275] no items to output this cycle
E0320 06:48:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:48:33.409791  543705 memory.go:184] no items to output this cycle
I0320 06:48:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 06:48:38.197673  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:48:38.197680  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:48:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:48:43.410701  543705 memory.go:191] Add success.
I0320 06:48:43.409827  543705 cpu.go:282] Add success.
I0320 06:48:43.420395  543705 net.go:648] Add success.
I0320 06:48:43.422784  543705 net.go:770] primary dev: ETH0
I0320 06:48:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:48:43.422814  543705 net.go:698] Add success.
I0320 06:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:48:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:48:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:48:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:48:53.409803  543705 memory.go:184] no items to output this cycle
I0320 06:48:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 06:49:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:49:03.409783  543705 memory.go:184] no items to output this cycle
I0320 06:49:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 06:49:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:49:13.409812  543705 memory.go:191] Add success.
I0320 06:49:13.409813  543705 cpu.go:282] Add success.
W0320 06:49:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:49:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:49:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:49:13.420149  543705 net.go:648] Add success.
I0320 06:49:13.423149  543705 net.go:770] primary dev: ETH0
I0320 06:49:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:49:13.423174  543705 net.go:698] Add success.
I0320 06:49:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:49:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:49:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 06:49:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:49:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 06:49:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:49:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:49:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:49:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:49:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:49:16.472408  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:49:23.081681  543705 disk_info.go:125] begin check local disk info of client
I0320 06:49:23.084060  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:49:23.084066  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003127c0 0xc000312800]
E0320 06:49:23.407528  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:49:23.407542  543705 memory.go:184] no items to output this cycle
I0320 06:49:23.407555  543705 cpu.go:275] no items to output this cycle
E0320 06:49:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:49:33.409817  543705 memory.go:184] no items to output this cycle
I0320 06:49:33.409827  543705 cpu.go:275] no items to output this cycle
E0320 06:49:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:49:43.409829  543705 memory.go:191] Add success.
I0320 06:49:43.409833  543705 cpu.go:282] Add success.
I0320 06:49:43.419982  543705 net.go:648] Add success.
I0320 06:49:43.422845  543705 net.go:770] primary dev: ETH0
I0320 06:49:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:49:43.422870  543705 net.go:698] Add success.
I0320 06:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:49:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:49:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:49:53.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:49:53.409895  543705 memory.go:184] no items to output this cycle
I0320 06:49:53.409975  543705 cpu.go:275] no items to output this cycle
E0320 06:50:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:50:03.409803  543705 cpu.go:275] no items to output this cycle
I0320 06:50:03.409805  543705 memory.go:184] no items to output this cycle
E0320 06:50:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:50:13.409835  543705 memory.go:191] Add success.
I0320 06:50:13.409838  543705 cpu.go:282] Add success.
W0320 06:50:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:50:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:50:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:50:13.420272  543705 net.go:648] Add success.
I0320 06:50:13.423484  543705 net.go:770] primary dev: ETH0
I0320 06:50:13.423496  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:50:13.423508  543705 net.go:698] Add success.
I0320 06:50:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:50:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:50:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 06:50:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:50:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 06:50:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:50:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:50:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:50:23.085677  543705 disk_info.go:125] begin check local disk info of client
I0320 06:50:23.088162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:50:23.088169  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f040 0xc00037f080]
E0320 06:50:23.407849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:50:23.407866  543705 memory.go:184] no items to output this cycle
I0320 06:50:23.407888  543705 cpu.go:275] no items to output this cycle
E0320 06:50:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:50:33.409803  543705 memory.go:184] no items to output this cycle
I0320 06:50:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 06:50:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:50:43.409783  543705 memory.go:191] Add success.
I0320 06:50:43.409801  543705 cpu.go:282] Add success.
I0320 06:50:43.419858  543705 net.go:648] Add success.
I0320 06:50:43.422752  543705 net.go:770] primary dev: ETH0
I0320 06:50:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:50:43.422776  543705 net.go:698] Add success.
I0320 06:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:50:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:50:53.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:50:53.409914  543705 memory.go:184] no items to output this cycle
I0320 06:50:53.409927  543705 cpu.go:275] no items to output this cycle
E0320 06:51:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:51:03.409779  543705 cpu.go:275] no items to output this cycle
I0320 06:51:03.409794  543705 memory.go:184] no items to output this cycle
E0320 06:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:51:13.409797  543705 memory.go:191] Add success.
I0320 06:51:13.409801  543705 cpu.go:282] Add success.
W0320 06:51:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:51:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:51:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:51:13.420223  543705 net.go:648] Add success.
I0320 06:51:13.423109  543705 net.go:770] primary dev: ETH0
I0320 06:51:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:51:13.423139  543705 net.go:698] Add success.
I0320 06:51:13.468774  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"caca9206-7199-49a9-abff-f44606e6aa46","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:51:13.468810  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:51:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:51:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:51:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 06:51:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:51:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 06:51:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:51:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:51:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:51:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:51:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:51:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:51:23.089678  543705 disk_info.go:125] begin check local disk info of client
I0320 06:51:23.092138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:51:23.092145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003804c0 0xc000380500]
E0320 06:51:23.407512  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:51:23.407525  543705 memory.go:184] no items to output this cycle
I0320 06:51:23.407553  543705 cpu.go:275] no items to output this cycle
E0320 06:51:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:51:33.409786  543705 memory.go:184] no items to output this cycle
I0320 06:51:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 06:51:38.201697  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:51:38.201703  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:51:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:51:43.410713  543705 memory.go:191] Add success.
I0320 06:51:43.409814  543705 cpu.go:282] Add success.
I0320 06:51:43.419736  543705 net.go:648] Add success.
I0320 06:51:43.422745  543705 net.go:770] primary dev: ETH0
I0320 06:51:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:51:43.422773  543705 net.go:698] Add success.
I0320 06:51:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:51:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:51:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:51:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:51:53.409773  543705 memory.go:184] no items to output this cycle
I0320 06:51:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 06:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:52:03.409781  543705 cpu.go:275] no items to output this cycle
I0320 06:52:03.409786  543705 memory.go:184] no items to output this cycle
E0320 06:52:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:52:13.409819  543705 memory.go:191] Add success.
I0320 06:52:13.409826  543705 cpu.go:282] Add success.
W0320 06:52:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:52:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:52:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:52:13.420133  543705 net.go:648] Add success.
I0320 06:52:13.422866  543705 net.go:770] primary dev: ETH0
I0320 06:52:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:52:13.422894  543705 net.go:698] Add success.
W0320 06:52:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:52:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 06:52:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:52:14.456785  543705 disk_worker.go:494] system disk:vda1
I0320 06:52:14.456827  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:52:14.457138  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:52:14.457146  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:52:14.457151  543705 custom_config.go:64] query custom config with name: gpu
E0320 06:52:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:52:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:52:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:52:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:52:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:52:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:52:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:52:23.093675  543705 disk_info.go:125] begin check local disk info of client
I0320 06:52:23.096227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:52:23.096234  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032ea40 0xc00032ea80]
E0320 06:52:23.407848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:52:23.407860  543705 memory.go:184] no items to output this cycle
I0320 06:52:23.407894  543705 cpu.go:275] no items to output this cycle
E0320 06:52:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:52:33.409788  543705 memory.go:184] no items to output this cycle
I0320 06:52:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 06:52:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:52:43.409800  543705 memory.go:191] Add success.
I0320 06:52:43.409802  543705 cpu.go:282] Add success.
I0320 06:52:43.419984  543705 net.go:648] Add success.
I0320 06:52:43.422755  543705 net.go:770] primary dev: ETH0
I0320 06:52:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:52:43.422779  543705 net.go:698] Add success.
I0320 06:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:52:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:52:53.409770  543705 memory.go:184] no items to output this cycle
I0320 06:52:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 06:53:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:53:03.409783  543705 cpu.go:275] no items to output this cycle
I0320 06:53:03.409786  543705 memory.go:184] no items to output this cycle
E0320 06:53:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:53:13.409829  543705 memory.go:191] Add success.
I0320 06:53:13.409830  543705 cpu.go:282] Add success.
W0320 06:53:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:53:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:53:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:53:13.420260  543705 net.go:648] Add success.
I0320 06:53:13.423495  543705 net.go:770] primary dev: ETH0
I0320 06:53:13.423509  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:53:13.423520  543705 net.go:698] Add success.
I0320 06:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:53:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:53:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 06:53:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:53:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 06:53:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:53:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:53:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:53:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:53:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:53:23.097677  543705 disk_info.go:125] begin check local disk info of client
I0320 06:53:23.100172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:53:23.100178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b78c0 0xc0002b7900]
E0320 06:53:23.407804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:53:23.407816  543705 memory.go:184] no items to output this cycle
I0320 06:53:23.407825  543705 cpu.go:275] no items to output this cycle
E0320 06:53:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:53:33.409809  543705 memory.go:184] no items to output this cycle
I0320 06:53:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 06:53:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:53:43.409826  543705 memory.go:191] Add success.
I0320 06:53:43.409835  543705 cpu.go:282] Add success.
I0320 06:53:43.420022  543705 net.go:648] Add success.
I0320 06:53:43.422670  543705 net.go:770] primary dev: ETH0
I0320 06:53:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:53:43.422704  543705 net.go:698] Add success.
I0320 06:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:53:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:53:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:53:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:53:53.409804  543705 memory.go:184] no items to output this cycle
I0320 06:53:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 06:54:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:54:03.409778  543705 memory.go:184] no items to output this cycle
I0320 06:54:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 06:54:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:54:13.409802  543705 memory.go:191] Add success.
I0320 06:54:13.409807  543705 cpu.go:282] Add success.
W0320 06:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:54:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:54:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:54:13.420132  543705 net.go:648] Add success.
I0320 06:54:13.422654  543705 net.go:770] primary dev: ETH0
I0320 06:54:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:54:13.422684  543705 net.go:698] Add success.
I0320 06:54:13.464291  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1e743f54-5c76-4cc1-803d-1218406f212d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:54:13.464326  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 06:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:54:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:54:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 06:54:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:54:14.456521  543705 disk_worker.go:494] system disk:vda1
I0320 06:54:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:54:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:54:16.472423  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:54:23.101677  543705 disk_info.go:125] begin check local disk info of client
I0320 06:54:23.104159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:54:23.104167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005455c0 0xc000545600]
E0320 06:54:23.407772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:54:23.407789  543705 memory.go:184] no items to output this cycle
I0320 06:54:23.407804  543705 cpu.go:275] no items to output this cycle
E0320 06:54:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:54:33.409784  543705 memory.go:184] no items to output this cycle
I0320 06:54:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 06:54:38.205717  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:54:38.205723  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:54:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:54:43.410688  543705 memory.go:191] Add success.
I0320 06:54:43.409808  543705 cpu.go:282] Add success.
I0320 06:54:43.420404  543705 net.go:648] Add success.
I0320 06:54:43.423333  543705 net.go:770] primary dev: ETH0
I0320 06:54:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:54:43.423366  543705 net.go:698] Add success.
I0320 06:54:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:54:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:54:53.409766  543705 memory.go:184] no items to output this cycle
I0320 06:54:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 06:55:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:55:03.409772  543705 memory.go:184] no items to output this cycle
I0320 06:55:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 06:55:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:55:13.409805  543705 memory.go:191] Add success.
I0320 06:55:13.409809  543705 cpu.go:282] Add success.
W0320 06:55:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:55:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:55:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:55:13.420127  543705 net.go:648] Add success.
I0320 06:55:13.422875  543705 net.go:770] primary dev: ETH0
I0320 06:55:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:55:13.422901  543705 net.go:698] Add success.
I0320 06:55:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:55:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:55:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 06:55:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:55:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 06:55:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:55:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:55:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:55:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:55:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:55:23.105681  543705 disk_info.go:125] begin check local disk info of client
I0320 06:55:23.108177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:55:23.108198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000564080 0xc0005640c0]
E0320 06:55:23.407744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:55:23.407757  543705 memory.go:184] no items to output this cycle
I0320 06:55:23.407783  543705 cpu.go:275] no items to output this cycle
E0320 06:55:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:55:33.409807  543705 memory.go:184] no items to output this cycle
I0320 06:55:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 06:55:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:55:43.409816  543705 memory.go:191] Add success.
I0320 06:55:43.409826  543705 cpu.go:282] Add success.
I0320 06:55:43.419992  543705 net.go:648] Add success.
I0320 06:55:43.422731  543705 net.go:770] primary dev: ETH0
I0320 06:55:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:55:43.422756  543705 net.go:698] Add success.
I0320 06:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:55:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:55:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:55:53.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:55:53.410266  543705 memory.go:184] no items to output this cycle
I0320 06:55:53.410275  543705 cpu.go:275] no items to output this cycle
E0320 06:56:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:56:03.409778  543705 memory.go:184] no items to output this cycle
I0320 06:56:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 06:56:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:56:13.409803  543705 memory.go:191] Add success.
I0320 06:56:13.409818  543705 cpu.go:282] Add success.
W0320 06:56:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:56:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:56:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:56:13.420137  543705 net.go:648] Add success.
I0320 06:56:13.422818  543705 net.go:770] primary dev: ETH0
I0320 06:56:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:56:13.422843  543705 net.go:698] Add success.
I0320 06:56:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:56:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:56:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 06:56:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:56:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 06:56:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:56:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:56:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:56:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:56:16.472458  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:56:23.109676  543705 disk_info.go:125] begin check local disk info of client
I0320 06:56:23.112239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:56:23.112245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265c00 0xc000265c40]
E0320 06:56:23.407803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:56:23.407818  543705 memory.go:184] no items to output this cycle
I0320 06:56:23.407828  543705 cpu.go:275] no items to output this cycle
E0320 06:56:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:56:33.409801  543705 memory.go:184] no items to output this cycle
I0320 06:56:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 06:56:43.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:56:43.409889  543705 memory.go:191] Add success.
I0320 06:56:43.409963  543705 cpu.go:282] Add success.
I0320 06:56:43.419709  543705 net.go:648] Add success.
I0320 06:56:43.422560  543705 net.go:770] primary dev: ETH0
I0320 06:56:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:56:43.422584  543705 net.go:698] Add success.
I0320 06:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:56:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:56:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:56:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:56:53.409795  543705 memory.go:184] no items to output this cycle
I0320 06:56:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 06:57:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:57:03.409767  543705 memory.go:184] no items to output this cycle
I0320 06:57:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 06:57:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:57:13.409793  543705 memory.go:191] Add success.
I0320 06:57:13.409810  543705 cpu.go:282] Add success.
W0320 06:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:57:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:57:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:57:13.420135  543705 net.go:648] Add success.
I0320 06:57:13.429007  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 06:57:13.429079  543705 net.go:770] primary dev: ETH0
I0320 06:57:13.429093  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:57:13.429108  543705 net.go:698] Add success.
I0320 06:57:13.453658  543705 event_worker.go:152] Polling the log file for events...
I0320 06:57:13.603316  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8f3b8a00-658f-4cd7-9462-318cdb08186b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 06:57:13.603357  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 06:57:14.454890  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:57:14.454903  543705 disk_worker.go:708] disk space is not compliant
W0320 06:57:14.454907  543705 disk_worker.go:728] disk inode is not compliant
E0320 06:57:14.455614  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 06:57:14.455623  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 06:57:14.455628  543705 custom_config.go:64] query custom config with name: gpu
I0320 06:57:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 06:57:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 06:57:15.456892  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 06:57:15.456903  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:57:16.458027  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 06:57:16.458029  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 06:57:16.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:57:16.458104  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:57:16.472506  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:57:23.113680  543705 disk_info.go:125] begin check local disk info of client
I0320 06:57:23.116169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:57:23.116175  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f780 0xc00037f7c0]
E0320 06:57:23.407690  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:57:23.407702  543705 memory.go:184] no items to output this cycle
I0320 06:57:23.407736  543705 cpu.go:275] no items to output this cycle
E0320 06:57:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:57:33.409784  543705 memory.go:184] no items to output this cycle
I0320 06:57:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 06:57:38.209746  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 06:57:38.209751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 06:57:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:57:43.410682  543705 memory.go:191] Add success.
I0320 06:57:43.409826  543705 cpu.go:282] Add success.
I0320 06:57:43.420197  543705 net.go:770] primary dev: ETH0
I0320 06:57:43.420210  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:57:43.420222  543705 net.go:698] Add success.
I0320 06:57:43.420622  543705 net.go:648] Add success.
I0320 06:57:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:57:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:57:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:57:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:57:53.409799  543705 memory.go:184] no items to output this cycle
I0320 06:57:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 06:58:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:58:03.409775  543705 memory.go:184] no items to output this cycle
I0320 06:58:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 06:58:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:58:13.409793  543705 memory.go:191] Add success.
I0320 06:58:13.409810  543705 cpu.go:282] Add success.
W0320 06:58:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:58:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:58:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:58:13.420100  543705 net.go:648] Add success.
I0320 06:58:13.422589  543705 net.go:770] primary dev: ETH0
I0320 06:58:13.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:58:13.422617  543705 net.go:698] Add success.
I0320 06:58:14.453934  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:58:14.455243  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:58:14.455257  543705 disk_worker.go:708] disk space is not compliant
W0320 06:58:14.455261  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:58:14.457272  543705 disk_worker.go:494] system disk:vda1
I0320 06:58:14.457320  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:58:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:58:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:58:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:58:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:58:16.472444  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:58:23.117674  543705 disk_info.go:125] begin check local disk info of client
I0320 06:58:23.120231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:58:23.120238  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256200 0xc000256240]
E0320 06:58:23.407729  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:58:23.407741  543705 memory.go:184] no items to output this cycle
I0320 06:58:23.407777  543705 cpu.go:275] no items to output this cycle
E0320 06:58:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:58:33.409770  543705 memory.go:184] no items to output this cycle
I0320 06:58:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 06:58:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:58:43.409816  543705 memory.go:191] Add success.
I0320 06:58:43.409826  543705 cpu.go:282] Add success.
I0320 06:58:43.419745  543705 net.go:648] Add success.
I0320 06:58:43.422855  543705 net.go:770] primary dev: ETH0
I0320 06:58:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:58:43.422898  543705 net.go:698] Add success.
I0320 06:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:58:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:58:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:58:53.409776  543705 memory.go:184] no items to output this cycle
I0320 06:58:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 06:59:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:59:03.409778  543705 memory.go:184] no items to output this cycle
I0320 06:59:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 06:59:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:59:13.409821  543705 memory.go:191] Add success.
I0320 06:59:13.409824  543705 cpu.go:282] Add success.
W0320 06:59:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 06:59:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 06:59:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 06:59:13.420166  543705 net.go:648] Add success.
I0320 06:59:13.422835  543705 net.go:770] primary dev: ETH0
I0320 06:59:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:59:13.422860  543705 net.go:698] Add success.
I0320 06:59:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 06:59:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 06:59:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 06:59:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 06:59:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 06:59:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 06:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 06:59:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:59:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 06:59:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 06:59:23.121672  543705 disk_info.go:125] begin check local disk info of client
I0320 06:59:23.124169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 06:59:23.124175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fafc0 0xc0001fb000]
E0320 06:59:23.407519  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:59:23.407533  543705 memory.go:184] no items to output this cycle
I0320 06:59:23.407535  543705 cpu.go:275] no items to output this cycle
E0320 06:59:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:59:33.409781  543705 memory.go:184] no items to output this cycle
I0320 06:59:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 06:59:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:59:43.409792  543705 memory.go:191] Add success.
I0320 06:59:43.409793  543705 cpu.go:282] Add success.
I0320 06:59:43.419980  543705 net.go:648] Add success.
I0320 06:59:43.422773  543705 net.go:770] primary dev: ETH0
I0320 06:59:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0320 06:59:43.422797  543705 net.go:698] Add success.
I0320 06:59:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 06:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 06:59:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 06:59:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 06:59:53.409798  543705 memory.go:184] no items to output this cycle
I0320 06:59:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 07:00:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:00:03.409789  543705 memory.go:184] no items to output this cycle
I0320 07:00:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 07:00:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:00:13.409807  543705 memory.go:191] Add success.
I0320 07:00:13.409811  543705 cpu.go:282] Add success.
W0320 07:00:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:00:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:00:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:00:13.420357  543705 net.go:648] Add success.
I0320 07:00:13.423051  543705 net.go:770] primary dev: ETH0
I0320 07:00:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:00:13.423080  543705 net.go:698] Add success.
I0320 07:00:13.469202  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"72f3779f-3b9b-40fe-aada-4960bdf1c08d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:00:13.469236  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:00:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:00:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:00:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 07:00:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:00:14.456747  543705 disk_worker.go:494] system disk:vda1
I0320 07:00:14.456781  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:00:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:00:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:00:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:00:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:00:23.125678  543705 disk_info.go:125] begin check local disk info of client
I0320 07:00:23.128142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:00:23.128148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7180 0xc0001c71c0]
E0320 07:00:23.407612  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:00:23.407624  543705 memory.go:184] no items to output this cycle
I0320 07:00:23.407661  543705 cpu.go:275] no items to output this cycle
E0320 07:00:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:00:33.409803  543705 memory.go:184] no items to output this cycle
I0320 07:00:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 07:00:38.213734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:00:38.213741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:00:43.409824  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:00:43.410735  543705 memory.go:191] Add success.
I0320 07:00:43.409885  543705 cpu.go:282] Add success.
I0320 07:00:43.420451  543705 net.go:648] Add success.
I0320 07:00:43.423106  543705 net.go:770] primary dev: ETH0
I0320 07:00:43.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:00:43.423132  543705 net.go:698] Add success.
I0320 07:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:00:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:00:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:00:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:00:53.409779  543705 memory.go:184] no items to output this cycle
I0320 07:00:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:01:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:01:03.409778  543705 memory.go:184] no items to output this cycle
I0320 07:01:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 07:01:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:01:13.409805  543705 memory.go:191] Add success.
I0320 07:01:13.409817  543705 cpu.go:282] Add success.
W0320 07:01:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:01:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:01:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:01:13.420113  543705 net.go:648] Add success.
I0320 07:01:13.422952  543705 net.go:770] primary dev: ETH0
I0320 07:01:13.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:01:13.422977  543705 net.go:698] Add success.
I0320 07:01:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:01:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:01:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 07:01:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:01:14.456500  543705 disk_worker.go:494] system disk:vda1
I0320 07:01:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:01:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:01:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:01:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:01:16.472357  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:01:23.129672  543705 disk_info.go:125] begin check local disk info of client
I0320 07:01:23.132064  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:01:23.132069  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b480 0xc00007b4c0]
E0320 07:01:23.407546  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:01:23.407564  543705 memory.go:184] no items to output this cycle
I0320 07:01:23.407568  543705 cpu.go:275] no items to output this cycle
E0320 07:01:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:01:33.409797  543705 memory.go:184] no items to output this cycle
I0320 07:01:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 07:01:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:01:43.409795  543705 memory.go:191] Add success.
I0320 07:01:43.409815  543705 cpu.go:282] Add success.
I0320 07:01:43.419952  543705 net.go:648] Add success.
I0320 07:01:43.422699  543705 net.go:770] primary dev: ETH0
I0320 07:01:43.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:01:43.422724  543705 net.go:698] Add success.
I0320 07:01:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:01:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:01:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:01:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:01:53.409859  543705 memory.go:184] no items to output this cycle
I0320 07:01:53.409967  543705 cpu.go:275] no items to output this cycle
E0320 07:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:02:03.409777  543705 memory.go:184] no items to output this cycle
I0320 07:02:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 07:02:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:02:13.409809  543705 memory.go:191] Add success.
I0320 07:02:13.409813  543705 cpu.go:282] Add success.
W0320 07:02:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:02:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:02:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:02:13.420160  543705 net.go:648] Add success.
I0320 07:02:13.422819  543705 net.go:770] primary dev: ETH0
I0320 07:02:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:02:13.422845  543705 net.go:698] Add success.
W0320 07:02:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:02:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 07:02:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:02:14.456796  543705 disk_worker.go:494] system disk:vda1
I0320 07:02:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:02:14.457132  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:02:14.457140  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:02:14.457144  543705 custom_config.go:64] query custom config with name: gpu
E0320 07:02:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:02:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:02:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:02:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:02:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:02:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:02:16.472356  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:02:23.133671  543705 disk_info.go:125] begin check local disk info of client
I0320 07:02:23.136079  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:02:23.136086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c81c0 0xc0003c8200]
E0320 07:02:23.407520  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:02:23.407537  543705 memory.go:184] no items to output this cycle
I0320 07:02:23.407557  543705 cpu.go:275] no items to output this cycle
E0320 07:02:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:02:33.409786  543705 memory.go:184] no items to output this cycle
I0320 07:02:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 07:02:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:02:43.409801  543705 memory.go:191] Add success.
I0320 07:02:43.409810  543705 cpu.go:282] Add success.
I0320 07:02:43.419889  543705 net.go:648] Add success.
I0320 07:02:43.422715  543705 net.go:770] primary dev: ETH0
I0320 07:02:43.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:02:43.422753  543705 net.go:698] Add success.
I0320 07:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:02:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:02:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:02:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:02:53.409806  543705 memory.go:184] no items to output this cycle
I0320 07:02:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 07:03:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:03:03.409798  543705 memory.go:184] no items to output this cycle
I0320 07:03:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 07:03:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:03:13.409812  543705 memory.go:191] Add success.
I0320 07:03:13.409815  543705 cpu.go:282] Add success.
W0320 07:03:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:03:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:03:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:03:13.420250  543705 net.go:648] Add success.
I0320 07:03:13.423191  543705 net.go:770] primary dev: ETH0
I0320 07:03:13.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:03:13.423220  543705 net.go:698] Add success.
I0320 07:03:13.486472  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31ed8cf3-c885-4b6a-8d7c-031e3a1b0a3d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:03:13.486506  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:03:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:03:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:03:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 07:03:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:03:14.456728  543705 disk_worker.go:494] system disk:vda1
I0320 07:03:14.456760  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:03:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:03:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:03:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:03:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:03:23.137673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:03:23.140172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:03:23.140178  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037eac0 0xc00037eb00]
E0320 07:03:23.407570  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:03:23.407583  543705 memory.go:184] no items to output this cycle
I0320 07:03:23.407612  543705 cpu.go:275] no items to output this cycle
E0320 07:03:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:03:33.409786  543705 memory.go:184] no items to output this cycle
I0320 07:03:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 07:03:38.217736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:03:38.217743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:03:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:03:43.410685  543705 memory.go:191] Add success.
I0320 07:03:43.409839  543705 cpu.go:282] Add success.
I0320 07:03:43.420428  543705 net.go:648] Add success.
I0320 07:03:43.423117  543705 net.go:770] primary dev: ETH0
I0320 07:03:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:03:43.423145  543705 net.go:698] Add success.
I0320 07:03:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:03:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:03:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:03:53.409780  543705 memory.go:184] no items to output this cycle
I0320 07:03:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 07:04:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:04:03.409868  543705 memory.go:184] no items to output this cycle
I0320 07:04:03.409974  543705 cpu.go:275] no items to output this cycle
E0320 07:04:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:04:13.409826  543705 memory.go:191] Add success.
I0320 07:04:13.409836  543705 cpu.go:282] Add success.
W0320 07:04:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:04:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:04:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:04:13.420252  543705 net.go:648] Add success.
I0320 07:04:13.422999  543705 net.go:770] primary dev: ETH0
I0320 07:04:13.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:04:13.423028  543705 net.go:698] Add success.
I0320 07:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:04:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:04:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 07:04:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:04:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 07:04:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:04:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:04:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:04:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:04:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:04:23.141674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:04:23.144138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:04:23.144144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fab40 0xc0001fab80]
E0320 07:04:23.407532  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:04:23.407547  543705 memory.go:184] no items to output this cycle
I0320 07:04:23.407563  543705 cpu.go:275] no items to output this cycle
E0320 07:04:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:04:33.409772  543705 memory.go:184] no items to output this cycle
I0320 07:04:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 07:04:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:04:43.409817  543705 memory.go:191] Add success.
I0320 07:04:43.409829  543705 cpu.go:282] Add success.
I0320 07:04:43.419989  543705 net.go:648] Add success.
I0320 07:04:43.422895  543705 net.go:770] primary dev: ETH0
I0320 07:04:43.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:04:43.422921  543705 net.go:698] Add success.
I0320 07:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:04:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:04:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:04:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:04:53.410384  543705 memory.go:184] no items to output this cycle
I0320 07:04:53.410386  543705 cpu.go:275] no items to output this cycle
E0320 07:05:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:05:03.409780  543705 memory.go:184] no items to output this cycle
I0320 07:05:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 07:05:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:05:13.409806  543705 memory.go:191] Add success.
I0320 07:05:13.409807  543705 cpu.go:282] Add success.
W0320 07:05:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:05:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:05:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:05:13.420192  543705 net.go:648] Add success.
I0320 07:05:13.422933  543705 net.go:770] primary dev: ETH0
I0320 07:05:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:05:13.422959  543705 net.go:698] Add success.
I0320 07:05:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:05:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:05:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 07:05:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:05:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 07:05:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:05:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:05:16.472392  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:05:23.145673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:05:23.148115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:05:23.148121  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fc80 0xc00037fcc0]
E0320 07:05:23.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:05:23.407523  543705 memory.go:184] no items to output this cycle
I0320 07:05:23.407553  543705 cpu.go:275] no items to output this cycle
E0320 07:05:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:05:33.409803  543705 memory.go:184] no items to output this cycle
I0320 07:05:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 07:05:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:05:43.409776  543705 memory.go:191] Add success.
I0320 07:05:43.409800  543705 cpu.go:282] Add success.
I0320 07:05:43.420035  543705 net.go:648] Add success.
I0320 07:05:43.423724  543705 net.go:770] primary dev: ETH0
I0320 07:05:43.423739  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:05:43.423753  543705 net.go:698] Add success.
I0320 07:05:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:05:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:05:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:05:53.409780  543705 memory.go:184] no items to output this cycle
I0320 07:05:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 07:06:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:06:03.409795  543705 memory.go:184] no items to output this cycle
I0320 07:06:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 07:06:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:06:13.409907  543705 cpu.go:282] Add success.
I0320 07:06:13.409923  543705 memory.go:191] Add success.
W0320 07:06:13.409954  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:06:13.409979  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:06:13.409983  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:06:13.419725  543705 net.go:648] Add success.
I0320 07:06:13.422371  543705 net.go:770] primary dev: ETH0
I0320 07:06:13.422384  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:06:13.422396  543705 net.go:698] Add success.
I0320 07:06:13.468255  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0093675a-2196-4e9d-b85f-313177df5c30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:06:13.468286  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:06:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:06:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:06:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 07:06:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:06:14.456714  543705 disk_worker.go:494] system disk:vda1
I0320 07:06:14.456742  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:06:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:06:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:06:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:06:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:06:23.149673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:06:23.152116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:06:23.152122  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fd40 0xc00037fd80]
E0320 07:06:23.407480  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:06:23.407497  543705 memory.go:184] no items to output this cycle
I0320 07:06:23.407511  543705 cpu.go:275] no items to output this cycle
E0320 07:06:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:06:33.409801  543705 memory.go:184] no items to output this cycle
I0320 07:06:33.409815  543705 cpu.go:275] no items to output this cycle
I0320 07:06:38.221732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:06:38.221739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:06:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:06:43.410755  543705 memory.go:191] Add success.
I0320 07:06:43.409824  543705 cpu.go:282] Add success.
I0320 07:06:43.420433  543705 net.go:648] Add success.
I0320 07:06:43.423320  543705 net.go:770] primary dev: ETH0
I0320 07:06:43.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:06:43.423344  543705 net.go:698] Add success.
I0320 07:06:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:06:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:06:53.409774  543705 memory.go:184] no items to output this cycle
I0320 07:06:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 07:07:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:07:03.409771  543705 memory.go:184] no items to output this cycle
I0320 07:07:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:07:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:07:13.409817  543705 memory.go:191] Add success.
I0320 07:07:13.409826  543705 cpu.go:282] Add success.
W0320 07:07:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:07:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:07:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:07:13.420464  543705 net.go:648] Add success.
I0320 07:07:13.423850  543705 net.go:770] primary dev: ETH0
I0320 07:07:13.423862  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:07:13.423874  543705 net.go:698] Add success.
I0320 07:07:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0320 07:07:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:07:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 07:07:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:07:14.456536  543705 disk_worker.go:494] system disk:vda1
I0320 07:07:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:07:14.457386  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:07:14.457393  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:07:14.457398  543705 custom_config.go:64] query custom config with name: gpu
E0320 07:07:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:07:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:07:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:07:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:07:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:07:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:07:16.472323  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:07:23.153673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:07:23.156111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:07:23.156117  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007acc0 0xc00007ad00]
E0320 07:07:23.407444  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:07:23.407456  543705 memory.go:184] no items to output this cycle
I0320 07:07:23.407477  543705 cpu.go:275] no items to output this cycle
E0320 07:07:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:07:33.409809  543705 memory.go:184] no items to output this cycle
I0320 07:07:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 07:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:07:43.409785  543705 memory.go:191] Add success.
I0320 07:07:43.409787  543705 cpu.go:282] Add success.
I0320 07:07:43.419853  543705 net.go:648] Add success.
I0320 07:07:43.422494  543705 net.go:770] primary dev: ETH0
I0320 07:07:43.422507  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:07:43.422519  543705 net.go:698] Add success.
I0320 07:07:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:07:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:07:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:07:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:07:53.409767  543705 memory.go:184] no items to output this cycle
I0320 07:07:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 07:08:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:08:03.409772  543705 memory.go:184] no items to output this cycle
I0320 07:08:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 07:08:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:08:13.409829  543705 memory.go:191] Add success.
I0320 07:08:13.409831  543705 cpu.go:282] Add success.
W0320 07:08:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:08:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:08:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:08:13.420349  543705 net.go:648] Add success.
I0320 07:08:13.423080  543705 net.go:770] primary dev: ETH0
I0320 07:08:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:08:13.423104  543705 net.go:698] Add success.
I0320 07:08:14.454943  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:08:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:08:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 07:08:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:08:14.456605  543705 disk_worker.go:494] system disk:vda1
I0320 07:08:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:08:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:08:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:08:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:08:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:08:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:08:23.157674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:08:23.160144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:08:23.160164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465400 0xc000465440]
E0320 07:08:23.407441  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:08:23.407453  543705 memory.go:184] no items to output this cycle
I0320 07:08:23.407489  543705 cpu.go:275] no items to output this cycle
E0320 07:08:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:08:33.409810  543705 memory.go:184] no items to output this cycle
I0320 07:08:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 07:08:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:08:43.409795  543705 memory.go:191] Add success.
I0320 07:08:43.409814  543705 cpu.go:282] Add success.
I0320 07:08:43.419895  543705 net.go:648] Add success.
I0320 07:08:43.422723  543705 net.go:770] primary dev: ETH0
I0320 07:08:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:08:43.422752  543705 net.go:698] Add success.
I0320 07:08:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:08:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:08:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:08:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:08:53.409804  543705 memory.go:184] no items to output this cycle
I0320 07:08:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 07:09:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:09:03.409771  543705 memory.go:184] no items to output this cycle
I0320 07:09:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:09:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:09:13.409820  543705 memory.go:191] Add success.
I0320 07:09:13.409824  543705 cpu.go:282] Add success.
W0320 07:09:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:09:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:09:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:09:13.420066  543705 net.go:648] Add success.
I0320 07:09:13.423215  543705 net.go:770] primary dev: ETH0
I0320 07:09:13.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:09:13.423243  543705 net.go:698] Add success.
I0320 07:09:13.463172  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"095674d6-e40a-4f8d-b0b2-1d3accab5bbf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:09:13.463205  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:09:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:09:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 07:09:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:09:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 07:09:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:09:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:09:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:09:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:09:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:09:23.161681  543705 disk_info.go:125] begin check local disk info of client
I0320 07:09:23.164091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:09:23.164098  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab80 0xc00007abc0]
E0320 07:09:23.408469  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:09:23.408485  543705 memory.go:184] no items to output this cycle
I0320 07:09:23.408497  543705 cpu.go:275] no items to output this cycle
E0320 07:09:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:09:33.409809  543705 memory.go:184] no items to output this cycle
I0320 07:09:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 07:09:38.225739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:09:38.225746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:09:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:09:43.410764  543705 memory.go:191] Add success.
I0320 07:09:43.409795  543705 cpu.go:282] Add success.
I0320 07:09:43.420553  543705 net.go:648] Add success.
I0320 07:09:43.423607  543705 net.go:770] primary dev: ETH0
I0320 07:09:43.423620  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:09:43.423633  543705 net.go:698] Add success.
I0320 07:09:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:09:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:09:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:09:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:09:53.409783  543705 memory.go:184] no items to output this cycle
I0320 07:09:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:10:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:10:03.409787  543705 memory.go:184] no items to output this cycle
I0320 07:10:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:10:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:10:13.409803  543705 memory.go:191] Add success.
I0320 07:10:13.409804  543705 cpu.go:282] Add success.
W0320 07:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:10:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:10:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:10:13.420089  543705 net.go:648] Add success.
I0320 07:10:13.423209  543705 net.go:770] primary dev: ETH0
I0320 07:10:13.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:10:13.423237  543705 net.go:698] Add success.
I0320 07:10:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:10:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:10:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 07:10:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:10:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 07:10:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:10:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:10:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:10:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:10:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:10:23.165673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:10:23.168144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:10:23.168150  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034ac40 0xc00034ac80]
E0320 07:10:23.407523  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:10:23.407540  543705 memory.go:184] no items to output this cycle
I0320 07:10:23.407552  543705 cpu.go:275] no items to output this cycle
E0320 07:10:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:10:33.409782  543705 memory.go:184] no items to output this cycle
I0320 07:10:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:10:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:10:43.409794  543705 memory.go:191] Add success.
I0320 07:10:43.409794  543705 cpu.go:282] Add success.
I0320 07:10:43.419885  543705 net.go:648] Add success.
I0320 07:10:43.422849  543705 net.go:770] primary dev: ETH0
I0320 07:10:43.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:10:43.422875  543705 net.go:698] Add success.
I0320 07:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:10:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:10:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:10:53.409762  543705 memory.go:184] no items to output this cycle
I0320 07:10:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:11:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:11:03.409804  543705 memory.go:184] no items to output this cycle
I0320 07:11:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 07:11:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:11:13.409824  543705 memory.go:191] Add success.
I0320 07:11:13.409829  543705 cpu.go:282] Add success.
W0320 07:11:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:11:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:11:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:11:13.420175  543705 net.go:648] Add success.
I0320 07:11:13.423401  543705 net.go:770] primary dev: ETH0
I0320 07:11:13.423414  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:11:13.423426  543705 net.go:698] Add success.
I0320 07:11:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:11:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:11:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 07:11:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:11:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 07:11:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:11:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:11:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:11:23.169669  543705 disk_info.go:125] begin check local disk info of client
I0320 07:11:23.172105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:11:23.172111  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461cc0 0xc000461d00]
E0320 07:11:23.408442  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:11:23.408457  543705 memory.go:184] no items to output this cycle
I0320 07:11:23.408470  543705 cpu.go:275] no items to output this cycle
E0320 07:11:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:11:33.409800  543705 memory.go:184] no items to output this cycle
I0320 07:11:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 07:11:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:11:43.409791  543705 memory.go:191] Add success.
I0320 07:11:43.409796  543705 cpu.go:282] Add success.
I0320 07:11:43.419886  543705 net.go:648] Add success.
I0320 07:11:43.422969  543705 net.go:770] primary dev: ETH0
I0320 07:11:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:11:43.422995  543705 net.go:698] Add success.
I0320 07:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:11:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:11:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:11:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:11:53.409759  543705 memory.go:184] no items to output this cycle
I0320 07:11:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 07:12:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:12:03.409784  543705 cpu.go:275] no items to output this cycle
I0320 07:12:03.409787  543705 memory.go:184] no items to output this cycle
E0320 07:12:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:12:13.409826  543705 memory.go:191] Add success.
I0320 07:12:13.409829  543705 cpu.go:282] Add success.
W0320 07:12:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:12:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:12:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:12:13.420162  543705 net.go:648] Add success.
I0320 07:12:13.423220  543705 net.go:770] primary dev: ETH0
I0320 07:12:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:12:13.423245  543705 net.go:698] Add success.
I0320 07:12:13.899242  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"016e1c3b-11a4-44c9-9fc8-f3f8f5c148c7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:12:13.899277  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 07:12:14.454842  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:12:14.454912  543705 disk_worker.go:708] disk space is not compliant
W0320 07:12:14.454916  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:12:14.455663  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:12:14.455673  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:12:14.455678  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:12:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 07:12:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:12:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:12:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:12:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:12:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:12:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:12:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:12:16.472321  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:12:23.173676  543705 disk_info.go:125] begin check local disk info of client
I0320 07:12:23.176053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:12:23.176059  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002780c0 0xc000278100]
E0320 07:12:23.408368  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:12:23.408383  543705 memory.go:184] no items to output this cycle
I0320 07:12:23.408397  543705 cpu.go:275] no items to output this cycle
E0320 07:12:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:12:33.409770  543705 memory.go:184] no items to output this cycle
I0320 07:12:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 07:12:38.229747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:12:38.229753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:12:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:12:43.410676  543705 memory.go:191] Add success.
I0320 07:12:43.409809  543705 cpu.go:282] Add success.
I0320 07:12:43.420373  543705 net.go:648] Add success.
I0320 07:12:43.422955  543705 net.go:770] primary dev: ETH0
I0320 07:12:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:12:43.422979  543705 net.go:698] Add success.
I0320 07:12:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:12:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:12:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:12:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:12:53.409796  543705 memory.go:184] no items to output this cycle
I0320 07:12:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 07:13:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:13:03.409806  543705 memory.go:184] no items to output this cycle
I0320 07:13:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 07:13:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:13:13.409822  543705 memory.go:191] Add success.
I0320 07:13:13.409831  543705 cpu.go:282] Add success.
W0320 07:13:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:13:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:13:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:13:13.420143  543705 net.go:648] Add success.
I0320 07:13:13.422778  543705 net.go:770] primary dev: ETH0
I0320 07:13:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:13:13.422809  543705 net.go:698] Add success.
I0320 07:13:14.454222  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:13:14.454422  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:13:14.454432  543705 disk_worker.go:708] disk space is not compliant
W0320 07:13:14.454435  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:13:14.455829  543705 disk_worker.go:494] system disk:vda1
I0320 07:13:14.455859  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:13:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:13:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:13:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:13:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:13:23.177677  543705 disk_info.go:125] begin check local disk info of client
I0320 07:13:23.180169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:13:23.180177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461480 0xc0004614c0]
E0320 07:13:23.408435  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:13:23.408447  543705 memory.go:184] no items to output this cycle
I0320 07:13:23.408472  543705 cpu.go:275] no items to output this cycle
E0320 07:13:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:13:33.409812  543705 memory.go:184] no items to output this cycle
I0320 07:13:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 07:13:43.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:13:43.410008  543705 cpu.go:282] Add success.
I0320 07:13:43.410008  543705 memory.go:191] Add success.
I0320 07:13:43.419720  543705 net.go:648] Add success.
I0320 07:13:43.422564  543705 net.go:770] primary dev: ETH0
I0320 07:13:43.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:13:43.422593  543705 net.go:698] Add success.
I0320 07:13:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:13:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:13:53.409792  543705 memory.go:184] no items to output this cycle
I0320 07:13:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 07:14:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:14:03.409781  543705 memory.go:184] no items to output this cycle
I0320 07:14:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 07:14:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:14:13.409829  543705 memory.go:191] Add success.
I0320 07:14:13.409834  543705 cpu.go:282] Add success.
W0320 07:14:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:14:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:14:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:14:13.420109  543705 net.go:648] Add success.
I0320 07:14:13.422989  543705 net.go:770] primary dev: ETH0
I0320 07:14:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:14:13.423015  543705 net.go:698] Add success.
I0320 07:14:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:14:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:14:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 07:14:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:14:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 07:14:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:14:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:14:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:14:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:14:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:14:23.181672  543705 disk_info.go:125] begin check local disk info of client
I0320 07:14:23.184112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:14:23.184118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa7c0 0xc0001fa800]
E0320 07:14:23.407501  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:14:23.407514  543705 memory.go:184] no items to output this cycle
I0320 07:14:23.407542  543705 cpu.go:275] no items to output this cycle
E0320 07:14:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:14:33.409787  543705 memory.go:184] no items to output this cycle
I0320 07:14:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 07:14:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:14:43.409820  543705 memory.go:191] Add success.
I0320 07:14:43.409828  543705 cpu.go:282] Add success.
I0320 07:14:43.420164  543705 net.go:648] Add success.
I0320 07:14:43.423319  543705 net.go:770] primary dev: ETH0
I0320 07:14:43.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:14:43.423344  543705 net.go:698] Add success.
I0320 07:14:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:14:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:14:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:14:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:14:53.409793  543705 memory.go:184] no items to output this cycle
I0320 07:14:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 07:15:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:15:03.409788  543705 memory.go:184] no items to output this cycle
I0320 07:15:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 07:15:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:15:13.409815  543705 memory.go:191] Add success.
I0320 07:15:13.409818  543705 cpu.go:282] Add success.
W0320 07:15:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:15:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:15:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:15:13.420105  543705 net.go:648] Add success.
I0320 07:15:13.422872  543705 net.go:770] primary dev: ETH0
I0320 07:15:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:15:13.422901  543705 net.go:698] Add success.
I0320 07:15:13.473819  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"245b1b86-fac8-4817-91f4-f5e56db132f7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:15:13.473852  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:15:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:15:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:15:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 07:15:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:15:14.456535  543705 disk_worker.go:494] system disk:vda1
I0320 07:15:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:15:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:15:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:15:16.472403  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:15:23.185674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:15:23.188122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:15:23.188129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003760c0 0xc000376100]
E0320 07:15:23.408379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:15:23.408396  543705 memory.go:184] no items to output this cycle
I0320 07:15:23.408409  543705 cpu.go:275] no items to output this cycle
E0320 07:15:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:15:33.409815  543705 memory.go:184] no items to output this cycle
I0320 07:15:33.409829  543705 cpu.go:275] no items to output this cycle
I0320 07:15:38.233738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:15:38.233744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:15:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:15:43.410853  543705 memory.go:191] Add success.
I0320 07:15:43.409916  543705 cpu.go:282] Add success.
I0320 07:15:43.419762  543705 net.go:648] Add success.
I0320 07:15:43.422991  543705 net.go:770] primary dev: ETH0
I0320 07:15:43.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:15:43.423019  543705 net.go:698] Add success.
I0320 07:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:15:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:15:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:15:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:15:53.409767  543705 memory.go:184] no items to output this cycle
I0320 07:15:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 07:16:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:16:03.409776  543705 memory.go:184] no items to output this cycle
I0320 07:16:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 07:16:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:16:13.409836  543705 memory.go:191] Add success.
I0320 07:16:13.409839  543705 cpu.go:282] Add success.
W0320 07:16:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:16:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:16:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:16:13.420121  543705 net.go:648] Add success.
I0320 07:16:13.423000  543705 net.go:770] primary dev: ETH0
I0320 07:16:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:16:13.423026  543705 net.go:698] Add success.
I0320 07:16:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:16:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:16:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 07:16:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:16:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 07:16:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:16:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:16:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:16:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:16:16.472443  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:16:23.189677  543705 disk_info.go:125] begin check local disk info of client
I0320 07:16:23.192121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:16:23.192127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fc080 0xc0001fc0c0]
E0320 07:16:23.408349  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:16:23.408364  543705 memory.go:184] no items to output this cycle
I0320 07:16:23.408382  543705 cpu.go:275] no items to output this cycle
E0320 07:16:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:16:33.409785  543705 memory.go:184] no items to output this cycle
I0320 07:16:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 07:16:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:16:43.409800  543705 memory.go:191] Add success.
I0320 07:16:43.409817  543705 cpu.go:282] Add success.
I0320 07:16:43.419887  543705 net.go:648] Add success.
I0320 07:16:43.422775  543705 net.go:770] primary dev: ETH0
I0320 07:16:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:16:43.422800  543705 net.go:698] Add success.
I0320 07:16:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:16:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:16:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:16:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:16:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 07:16:53.409786  543705 memory.go:184] no items to output this cycle
E0320 07:17:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:17:03.409805  543705 memory.go:184] no items to output this cycle
I0320 07:17:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 07:17:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:17:13.409787  543705 memory.go:191] Add success.
I0320 07:17:13.409806  543705 cpu.go:282] Add success.
W0320 07:17:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:17:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:17:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:17:13.420143  543705 net.go:648] Add success.
I0320 07:17:13.423081  543705 net.go:770] primary dev: ETH0
I0320 07:17:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:17:13.423107  543705 net.go:698] Add success.
I0320 07:17:13.453664  543705 event_worker.go:152] Polling the log file for events...
W0320 07:17:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:17:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 07:17:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:17:14.456941  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:17:14.456950  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:17:14.456956  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:17:14.457012  543705 disk_worker.go:494] system disk:vda1
I0320 07:17:14.457044  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:17:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:17:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:17:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:17:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:17:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:17:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:17:16.472311  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:17:23.193671  543705 disk_info.go:125] begin check local disk info of client
I0320 07:17:23.196090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:17:23.196095  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368380 0xc0003683c0]
E0320 07:17:23.408297  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:17:23.408309  543705 memory.go:184] no items to output this cycle
I0320 07:17:23.408318  543705 cpu.go:275] no items to output this cycle
E0320 07:17:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:17:33.409790  543705 memory.go:184] no items to output this cycle
I0320 07:17:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:17:43.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:17:43.409886  543705 memory.go:191] Add success.
I0320 07:17:43.409963  543705 cpu.go:282] Add success.
I0320 07:17:43.419717  543705 net.go:648] Add success.
I0320 07:17:43.422836  543705 net.go:770] primary dev: ETH0
I0320 07:17:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:17:43.422861  543705 net.go:698] Add success.
I0320 07:17:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:17:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:17:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:17:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:17:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 07:17:53.409785  543705 memory.go:184] no items to output this cycle
E0320 07:18:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:18:03.409787  543705 memory.go:184] no items to output this cycle
I0320 07:18:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 07:18:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:18:13.409829  543705 memory.go:191] Add success.
I0320 07:18:13.409835  543705 cpu.go:282] Add success.
W0320 07:18:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:18:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:18:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:18:13.420171  543705 net.go:648] Add success.
I0320 07:18:13.423058  543705 net.go:770] primary dev: ETH0
I0320 07:18:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:18:13.423084  543705 net.go:698] Add success.
I0320 07:18:13.468628  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b4b5ce4-2c4c-4680-951c-df05dc8eaf40","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:18:13.468660  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:18:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:18:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:18:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 07:18:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:18:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 07:18:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:18:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:18:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:18:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:18:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:18:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:18:23.197674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:18:23.200186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:18:23.200192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0320 07:18:23.408366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:18:23.408378  543705 memory.go:184] no items to output this cycle
I0320 07:18:23.408401  543705 cpu.go:275] no items to output this cycle
E0320 07:18:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:18:33.409805  543705 memory.go:184] no items to output this cycle
I0320 07:18:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 07:18:38.237737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:18:38.237744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:18:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:18:43.410878  543705 memory.go:191] Add success.
I0320 07:18:43.409819  543705 cpu.go:282] Add success.
I0320 07:18:43.420980  543705 net.go:648] Add success.
I0320 07:18:43.424180  543705 net.go:770] primary dev: ETH0
I0320 07:18:43.424192  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:18:43.424204  543705 net.go:698] Add success.
I0320 07:18:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:18:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:18:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:18:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:18:53.409779  543705 memory.go:184] no items to output this cycle
I0320 07:18:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 07:19:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:19:03.409773  543705 memory.go:184] no items to output this cycle
I0320 07:19:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 07:19:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:19:13.409791  543705 memory.go:191] Add success.
I0320 07:19:13.409792  543705 cpu.go:282] Add success.
W0320 07:19:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:19:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:19:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:19:13.420171  543705 net.go:648] Add success.
I0320 07:19:13.422954  543705 net.go:770] primary dev: ETH0
I0320 07:19:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:19:13.422979  543705 net.go:698] Add success.
I0320 07:19:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:19:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:19:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 07:19:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:19:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 07:19:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:19:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:19:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:19:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:19:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:19:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:19:23.201676  543705 disk_info.go:125] begin check local disk info of client
I0320 07:19:23.204089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:19:23.204095  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c4040 0xc0001c4080]
E0320 07:19:23.408259  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:19:23.408282  543705 memory.go:184] no items to output this cycle
I0320 07:19:23.408294  543705 cpu.go:275] no items to output this cycle
E0320 07:19:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:19:33.409777  543705 memory.go:184] no items to output this cycle
I0320 07:19:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:19:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:19:43.409774  543705 memory.go:191] Add success.
I0320 07:19:43.409808  543705 cpu.go:282] Add success.
I0320 07:19:43.420191  543705 net.go:648] Add success.
I0320 07:19:43.423339  543705 net.go:770] primary dev: ETH0
I0320 07:19:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:19:43.423364  543705 net.go:698] Add success.
I0320 07:19:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:19:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:19:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:19:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:19:53.409799  543705 memory.go:184] no items to output this cycle
I0320 07:19:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 07:20:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:20:03.409785  543705 memory.go:184] no items to output this cycle
I0320 07:20:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:20:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:20:13.409823  543705 memory.go:191] Add success.
I0320 07:20:13.409844  543705 cpu.go:282] Add success.
W0320 07:20:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:20:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:20:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:20:13.420184  543705 net.go:648] Add success.
I0320 07:20:13.422999  543705 net.go:770] primary dev: ETH0
I0320 07:20:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:20:13.423024  543705 net.go:698] Add success.
I0320 07:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:20:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:20:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 07:20:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:20:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 07:20:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:20:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:20:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:20:23.205676  543705 disk_info.go:125] begin check local disk info of client
I0320 07:20:23.208081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:20:23.208087  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa880 0xc0001aa8c0]
E0320 07:20:23.407524  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:20:23.407525  543705 cpu.go:275] no items to output this cycle
I0320 07:20:23.407538  543705 memory.go:184] no items to output this cycle
E0320 07:20:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:20:33.409782  543705 memory.go:184] no items to output this cycle
I0320 07:20:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:20:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:20:43.409792  543705 memory.go:191] Add success.
I0320 07:20:43.409812  543705 cpu.go:282] Add success.
I0320 07:20:43.420102  543705 net.go:648] Add success.
I0320 07:20:43.423307  543705 net.go:770] primary dev: ETH0
I0320 07:20:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:20:43.423332  543705 net.go:698] Add success.
I0320 07:20:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:20:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:20:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:20:53.409782  543705 memory.go:184] no items to output this cycle
I0320 07:20:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 07:21:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:21:03.409793  543705 memory.go:184] no items to output this cycle
I0320 07:21:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 07:21:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:21:13.409788  543705 memory.go:191] Add success.
I0320 07:21:13.409807  543705 cpu.go:282] Add success.
W0320 07:21:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:21:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:21:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:21:13.420163  543705 net.go:648] Add success.
I0320 07:21:13.422839  543705 net.go:770] primary dev: ETH0
I0320 07:21:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:21:13.422863  543705 net.go:698] Add success.
I0320 07:21:13.559584  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a28403bb-719b-4905-9e66-6e27d204b7f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:21:13.559617  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:21:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:21:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:21:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 07:21:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:21:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 07:21:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:21:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:21:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:21:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:21:16.472427  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:21:23.209683  543705 disk_info.go:125] begin check local disk info of client
I0320 07:21:23.212092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:21:23.212099  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb940 0xc0001fb980]
E0320 07:21:23.408220  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:21:23.408237  543705 memory.go:184] no items to output this cycle
I0320 07:21:23.408253  543705 cpu.go:275] no items to output this cycle
E0320 07:21:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:21:33.409786  543705 memory.go:184] no items to output this cycle
I0320 07:21:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 07:21:38.241734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:21:38.241741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:21:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:21:43.410653  543705 memory.go:191] Add success.
I0320 07:21:43.409803  543705 cpu.go:282] Add success.
I0320 07:21:43.420360  543705 net.go:648] Add success.
I0320 07:21:43.423235  543705 net.go:770] primary dev: ETH0
I0320 07:21:43.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:21:43.423289  543705 net.go:698] Add success.
I0320 07:21:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:21:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:21:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:21:53.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:21:53.409834  543705 memory.go:184] no items to output this cycle
I0320 07:21:53.409847  543705 cpu.go:275] no items to output this cycle
E0320 07:22:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:22:03.409768  543705 memory.go:184] no items to output this cycle
I0320 07:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 07:22:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:22:13.409790  543705 memory.go:191] Add success.
I0320 07:22:13.409806  543705 cpu.go:282] Add success.
W0320 07:22:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:22:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:22:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:22:13.420252  543705 net.go:648] Add success.
I0320 07:22:13.422921  543705 net.go:770] primary dev: ETH0
I0320 07:22:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:22:13.422950  543705 net.go:698] Add success.
W0320 07:22:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:22:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 07:22:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:22:14.456918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:22:14.456927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:22:14.456933  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:22:14.456990  543705 disk_worker.go:494] system disk:vda1
I0320 07:22:14.457019  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:22:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:22:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:22:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:22:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:22:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:22:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:22:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:22:23.213679  543705 disk_info.go:125] begin check local disk info of client
I0320 07:22:23.216066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:22:23.216073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1f00 0xc0004c3d00]
E0320 07:22:23.408192  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:22:23.408208  543705 memory.go:184] no items to output this cycle
I0320 07:22:23.408223  543705 cpu.go:275] no items to output this cycle
E0320 07:22:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:22:33.409805  543705 memory.go:184] no items to output this cycle
I0320 07:22:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 07:22:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:22:43.409781  543705 memory.go:191] Add success.
I0320 07:22:43.409804  543705 cpu.go:282] Add success.
I0320 07:22:43.419866  543705 net.go:648] Add success.
I0320 07:22:43.422831  543705 net.go:770] primary dev: ETH0
I0320 07:22:43.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:22:43.422857  543705 net.go:698] Add success.
I0320 07:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:22:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:22:53.409784  543705 memory.go:184] no items to output this cycle
I0320 07:22:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 07:23:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:23:03.409783  543705 memory.go:184] no items to output this cycle
I0320 07:23:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:23:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:23:13.409818  543705 memory.go:191] Add success.
I0320 07:23:13.409824  543705 cpu.go:282] Add success.
W0320 07:23:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:23:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:23:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:23:13.420158  543705 net.go:648] Add success.
I0320 07:23:13.422768  543705 net.go:770] primary dev: ETH0
I0320 07:23:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:23:13.422793  543705 net.go:698] Add success.
I0320 07:23:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:23:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:23:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 07:23:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:23:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 07:23:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:23:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:23:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:23:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:23:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:23:23.217678  543705 disk_info.go:125] begin check local disk info of client
I0320 07:23:23.220049  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:23:23.220055  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
E0320 07:23:23.407526  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:23:23.407543  543705 memory.go:184] no items to output this cycle
I0320 07:23:23.407567  543705 cpu.go:275] no items to output this cycle
E0320 07:23:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:23:33.409781  543705 memory.go:184] no items to output this cycle
I0320 07:23:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 07:23:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:23:43.409809  543705 memory.go:191] Add success.
I0320 07:23:43.409819  543705 cpu.go:282] Add success.
I0320 07:23:43.419892  543705 net.go:648] Add success.
I0320 07:23:43.422760  543705 net.go:770] primary dev: ETH0
I0320 07:23:43.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:23:43.422790  543705 net.go:698] Add success.
I0320 07:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:23:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:23:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:23:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:23:53.409802  543705 memory.go:184] no items to output this cycle
I0320 07:23:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 07:24:03.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:24:03.409906  543705 cpu.go:275] no items to output this cycle
I0320 07:24:03.409951  543705 memory.go:184] no items to output this cycle
E0320 07:24:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:24:13.409796  543705 memory.go:191] Add success.
I0320 07:24:13.409816  543705 cpu.go:282] Add success.
W0320 07:24:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:24:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:24:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:24:13.420121  543705 net.go:648] Add success.
I0320 07:24:13.423100  543705 net.go:770] primary dev: ETH0
I0320 07:24:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:24:13.423125  543705 net.go:698] Add success.
I0320 07:24:13.595750  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0080df41-9d71-44c3-8b4a-0796f7cadf7c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:24:13.595782  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:24:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:24:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:24:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 07:24:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:24:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 07:24:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:24:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:24:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:24:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:24:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:24:23.221674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:24:23.224094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:24:23.224100  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fe00 0xc00037fe40]
E0320 07:24:23.407508  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:24:23.407522  543705 memory.go:184] no items to output this cycle
I0320 07:24:23.407536  543705 cpu.go:275] no items to output this cycle
E0320 07:24:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:24:33.409780  543705 memory.go:184] no items to output this cycle
I0320 07:24:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 07:24:38.245733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:24:38.245739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:24:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:24:43.410608  543705 memory.go:191] Add success.
I0320 07:24:43.409825  543705 cpu.go:282] Add success.
I0320 07:24:43.420438  543705 net.go:648] Add success.
I0320 07:24:43.422986  543705 net.go:770] primary dev: ETH0
I0320 07:24:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:24:43.423012  543705 net.go:698] Add success.
I0320 07:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:24:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:24:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:24:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:24:53.409788  543705 memory.go:184] no items to output this cycle
I0320 07:24:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 07:25:03.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:25:03.409904  543705 memory.go:184] no items to output this cycle
I0320 07:25:03.409932  543705 cpu.go:275] no items to output this cycle
E0320 07:25:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:25:13.409839  543705 memory.go:191] Add success.
I0320 07:25:13.409846  543705 cpu.go:282] Add success.
W0320 07:25:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:25:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:25:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:25:13.420330  543705 net.go:648] Add success.
I0320 07:25:13.423174  543705 net.go:770] primary dev: ETH0
I0320 07:25:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:25:13.423200  543705 net.go:698] Add success.
I0320 07:25:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:25:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:25:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 07:25:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:25:14.456502  543705 disk_worker.go:494] system disk:vda1
I0320 07:25:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:25:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:25:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:25:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:25:16.472364  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:25:23.225674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:25:23.228156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:25:23.228162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a80 0xc0000c4ac0]
E0320 07:25:23.408203  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:25:23.408217  543705 memory.go:184] no items to output this cycle
I0320 07:25:23.408222  543705 cpu.go:275] no items to output this cycle
E0320 07:25:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:25:33.409815  543705 memory.go:184] no items to output this cycle
I0320 07:25:33.409826  543705 cpu.go:275] no items to output this cycle
E0320 07:25:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:25:43.409794  543705 memory.go:191] Add success.
I0320 07:25:43.409820  543705 cpu.go:282] Add success.
I0320 07:25:43.419898  543705 net.go:648] Add success.
I0320 07:25:43.422723  543705 net.go:770] primary dev: ETH0
I0320 07:25:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:25:43.422750  543705 net.go:698] Add success.
I0320 07:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:25:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:25:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:25:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:25:53.409806  543705 memory.go:184] no items to output this cycle
I0320 07:25:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 07:26:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:26:03.409785  543705 memory.go:184] no items to output this cycle
I0320 07:26:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 07:26:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:26:13.409815  543705 memory.go:191] Add success.
I0320 07:26:13.409820  543705 cpu.go:282] Add success.
W0320 07:26:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:26:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:26:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:26:13.420145  543705 net.go:648] Add success.
I0320 07:26:13.423085  543705 net.go:770] primary dev: ETH0
I0320 07:26:13.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:26:13.423110  543705 net.go:698] Add success.
I0320 07:26:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:26:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:26:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 07:26:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:26:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 07:26:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:26:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:26:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:26:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:26:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:26:23.229675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:26:23.232111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:26:23.232118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbf40 0xc00034e000]
E0320 07:26:23.408139  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:26:23.408154  543705 memory.go:184] no items to output this cycle
I0320 07:26:23.408168  543705 cpu.go:275] no items to output this cycle
E0320 07:26:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:26:33.409794  543705 memory.go:184] no items to output this cycle
I0320 07:26:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 07:26:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:26:43.409804  543705 memory.go:191] Add success.
I0320 07:26:43.409808  543705 cpu.go:282] Add success.
I0320 07:26:43.419913  543705 net.go:648] Add success.
I0320 07:26:43.422471  543705 net.go:770] primary dev: ETH0
I0320 07:26:43.422486  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:26:43.422501  543705 net.go:698] Add success.
I0320 07:26:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:26:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:26:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:26:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:26:53.409782  543705 memory.go:184] no items to output this cycle
I0320 07:26:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 07:27:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:27:03.409762  543705 memory.go:184] no items to output this cycle
I0320 07:27:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 07:27:13.409919  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:27:13.409933  543705 cpu.go:282] Add success.
I0320 07:27:13.410104  543705 memory.go:191] Add success.
W0320 07:27:13.410142  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:27:13.410159  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:27:13.410164  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:27:13.419716  543705 net.go:648] Add success.
I0320 07:27:13.422844  543705 net.go:770] primary dev: ETH0
I0320 07:27:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:27:13.422868  543705 net.go:698] Add success.
I0320 07:27:13.429233  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 07:27:13.453474  543705 event_worker.go:152] Polling the log file for events...
I0320 07:27:13.501864  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ad1acc5-248e-4b0b-85da-9d79f45b098d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:27:13.501897  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 07:27:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:27:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 07:27:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:27:14.455836  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:27:14.455845  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:27:14.455850  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:27:14.456649  543705 disk_worker.go:494] system disk:vda1
I0320 07:27:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:27:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:27:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:27:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:27:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:27:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:27:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:27:16.472336  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:27:23.233674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:27:23.236062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:27:23.236068  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3c40 0xc0002b3c80]
E0320 07:27:23.407508  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:27:23.407520  543705 memory.go:184] no items to output this cycle
I0320 07:27:23.407552  543705 cpu.go:275] no items to output this cycle
E0320 07:27:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:27:33.409780  543705 memory.go:184] no items to output this cycle
I0320 07:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 07:27:38.249737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:27:38.249743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:27:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:27:43.410611  543705 memory.go:191] Add success.
I0320 07:27:43.409813  543705 cpu.go:282] Add success.
I0320 07:27:43.420383  543705 net.go:648] Add success.
I0320 07:27:43.423025  543705 net.go:770] primary dev: ETH0
I0320 07:27:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:27:43.423053  543705 net.go:698] Add success.
I0320 07:27:46.458117  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:27:46.458184  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:27:46.458212  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:27:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:27:53.409776  543705 memory.go:184] no items to output this cycle
I0320 07:27:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:28:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:28:03.409765  543705 memory.go:184] no items to output this cycle
I0320 07:28:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 07:28:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:28:13.409834  543705 memory.go:191] Add success.
I0320 07:28:13.409846  543705 cpu.go:282] Add success.
W0320 07:28:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:28:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:28:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:28:13.420165  543705 net.go:648] Add success.
I0320 07:28:13.422771  543705 net.go:770] primary dev: ETH0
I0320 07:28:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:28:13.422795  543705 net.go:698] Add success.
I0320 07:28:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:28:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:28:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 07:28:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:28:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 07:28:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:28:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:28:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:28:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:28:16.472440  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:28:23.237672  543705 disk_info.go:125] begin check local disk info of client
I0320 07:28:23.240227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:28:23.240234  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057db80 0xc00057dbc0]
E0320 07:28:23.408189  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:28:23.408201  543705 memory.go:184] no items to output this cycle
I0320 07:28:23.408240  543705 cpu.go:275] no items to output this cycle
E0320 07:28:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:28:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 07:28:33.409790  543705 memory.go:184] no items to output this cycle
E0320 07:28:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:28:43.409795  543705 memory.go:191] Add success.
I0320 07:28:43.409799  543705 cpu.go:282] Add success.
I0320 07:28:43.419952  543705 net.go:648] Add success.
I0320 07:28:43.422888  543705 net.go:770] primary dev: ETH0
I0320 07:28:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:28:43.422917  543705 net.go:698] Add success.
I0320 07:28:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:28:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:28:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:28:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:28:53.409770  543705 memory.go:184] no items to output this cycle
I0320 07:28:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 07:29:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:29:03.409775  543705 memory.go:184] no items to output this cycle
I0320 07:29:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 07:29:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:29:13.409827  543705 memory.go:191] Add success.
I0320 07:29:13.409833  543705 cpu.go:282] Add success.
W0320 07:29:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:29:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:29:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:29:13.419977  543705 net.go:770] primary dev: ETH0
I0320 07:29:13.419991  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:29:13.420004  543705 net.go:698] Add success.
I0320 07:29:13.420365  543705 net.go:648] Add success.
I0320 07:29:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:29:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:29:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 07:29:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:29:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 07:29:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:29:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:29:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:29:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:29:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:29:23.241674  543705 disk_info.go:125] begin check local disk info of client
I0320 07:29:23.244157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:29:23.244163  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c60c0 0xc0001c6140]
E0320 07:29:23.407508  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:29:23.407522  543705 memory.go:184] no items to output this cycle
I0320 07:29:23.407525  543705 cpu.go:275] no items to output this cycle
E0320 07:29:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:29:33.409808  543705 memory.go:184] no items to output this cycle
I0320 07:29:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 07:29:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:29:43.409775  543705 memory.go:191] Add success.
I0320 07:29:43.409802  543705 cpu.go:282] Add success.
I0320 07:29:43.420048  543705 net.go:648] Add success.
I0320 07:29:43.422898  543705 net.go:770] primary dev: ETH0
I0320 07:29:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:29:43.422929  543705 net.go:698] Add success.
I0320 07:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:29:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:29:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:29:53.409780  543705 memory.go:184] no items to output this cycle
I0320 07:29:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 07:30:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:30:03.409773  543705 memory.go:184] no items to output this cycle
I0320 07:30:03.409805  543705 cpu.go:275] no items to output this cycle
I0320 07:30:13.409967  543705 cpu.go:282] Add success.
E0320 07:30:13.409915  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:30:13.410043  543705 memory.go:191] Add success.
W0320 07:30:13.410073  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:30:13.410085  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:30:13.410088  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:30:13.419734  543705 net.go:648] Add success.
I0320 07:30:13.422578  543705 net.go:770] primary dev: ETH0
I0320 07:30:13.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:30:13.422606  543705 net.go:698] Add success.
I0320 07:30:13.468087  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"66c00345-7d33-4122-987c-8e0fa3c29def","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:30:13.468117  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:30:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:30:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:30:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 07:30:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:30:14.456628  543705 disk_worker.go:494] system disk:vda1
I0320 07:30:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:30:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:30:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:30:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:30:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:30:23.246330  543705 disk_info.go:125] begin check local disk info of client
I0320 07:30:23.248749  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:30:23.248756  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b740 0xc00007b780]
E0320 07:30:23.407505  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:30:23.407518  543705 memory.go:184] no items to output this cycle
I0320 07:30:23.407529  543705 cpu.go:275] no items to output this cycle
E0320 07:30:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:30:33.409805  543705 memory.go:184] no items to output this cycle
I0320 07:30:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 07:30:38.253738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:30:38.253745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:30:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:30:43.410736  543705 memory.go:191] Add success.
I0320 07:30:43.409903  543705 cpu.go:282] Add success.
I0320 07:30:43.420474  543705 net.go:648] Add success.
I0320 07:30:43.423010  543705 net.go:770] primary dev: ETH0
I0320 07:30:43.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:30:43.423037  543705 net.go:698] Add success.
I0320 07:30:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:30:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:30:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:30:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:30:53.409763  543705 memory.go:184] no items to output this cycle
I0320 07:30:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 07:31:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:31:03.409766  543705 memory.go:184] no items to output this cycle
I0320 07:31:03.409807  543705 cpu.go:275] no items to output this cycle
W0320 07:31:13.409712  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:31:13.409731  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:31:13.409737  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:31:13.409899  543705 cpu.go:282] Add success.
E0320 07:31:13.410039  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:31:13.410068  543705 memory.go:191] Add success.
I0320 07:31:13.419758  543705 net.go:648] Add success.
I0320 07:31:13.422665  543705 net.go:770] primary dev: ETH0
I0320 07:31:13.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:31:13.422690  543705 net.go:698] Add success.
I0320 07:31:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:31:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:31:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 07:31:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:31:14.456613  543705 disk_worker.go:494] system disk:vda1
I0320 07:31:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:31:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:31:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:31:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:31:16.472372  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:31:23.249673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:31:23.252111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:31:23.252117  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b4c0 0xc00007b500]
E0320 07:31:23.407522  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:31:23.407538  543705 memory.go:184] no items to output this cycle
I0320 07:31:23.407551  543705 cpu.go:275] no items to output this cycle
E0320 07:31:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:31:33.409787  543705 memory.go:184] no items to output this cycle
I0320 07:31:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:31:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:31:43.409818  543705 memory.go:191] Add success.
I0320 07:31:43.409824  543705 cpu.go:282] Add success.
I0320 07:31:43.419979  543705 net.go:648] Add success.
I0320 07:31:43.422755  543705 net.go:770] primary dev: ETH0
I0320 07:31:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:31:43.422781  543705 net.go:698] Add success.
I0320 07:31:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:31:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:31:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:31:53.409781  543705 memory.go:184] no items to output this cycle
I0320 07:31:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 07:32:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:32:03.409777  543705 memory.go:184] no items to output this cycle
I0320 07:32:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 07:32:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:32:13.409799  543705 memory.go:191] Add success.
I0320 07:32:13.409801  543705 cpu.go:282] Add success.
W0320 07:32:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:32:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:32:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:32:13.420179  543705 net.go:648] Add success.
I0320 07:32:13.423384  543705 net.go:770] primary dev: ETH0
I0320 07:32:13.423397  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:32:13.423416  543705 net.go:698] Add success.
W0320 07:32:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:32:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 07:32:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:32:14.455960  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:32:14.455969  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:32:14.455975  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:32:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 07:32:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:32:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:32:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:32:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:32:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:32:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:32:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:32:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:32:23.253673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:32:23.256055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:32:23.256061  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af80 0xc00007afc0]
E0320 07:32:23.407965  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:32:23.407981  543705 memory.go:184] no items to output this cycle
I0320 07:32:23.407993  543705 cpu.go:275] no items to output this cycle
E0320 07:32:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:32:33.409780  543705 memory.go:184] no items to output this cycle
I0320 07:32:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:32:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:32:43.409796  543705 memory.go:191] Add success.
I0320 07:32:43.409798  543705 cpu.go:282] Add success.
I0320 07:32:43.419883  543705 net.go:648] Add success.
I0320 07:32:43.422920  543705 net.go:770] primary dev: ETH0
I0320 07:32:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:32:43.422949  543705 net.go:698] Add success.
I0320 07:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:32:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:32:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:32:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:32:53.409782  543705 memory.go:184] no items to output this cycle
I0320 07:32:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 07:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:33:03.409784  543705 memory.go:184] no items to output this cycle
I0320 07:33:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 07:33:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:33:13.409806  543705 memory.go:191] Add success.
I0320 07:33:13.409810  543705 cpu.go:282] Add success.
W0320 07:33:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:33:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:33:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:33:13.420385  543705 net.go:648] Add success.
I0320 07:33:13.423173  543705 net.go:770] primary dev: ETH0
I0320 07:33:13.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:33:13.423198  543705 net.go:698] Add success.
I0320 07:33:13.467648  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1162f6c8-b041-4bc5-9732-4f98a383f2e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:33:13.467680  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:33:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:33:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:33:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 07:33:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:33:14.456686  543705 disk_worker.go:494] system disk:vda1
I0320 07:33:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:33:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:33:16.472375  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:33:23.257673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:33:23.260076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:33:23.260083  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035be40 0xc00035be80]
E0320 07:33:23.407969  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:33:23.407986  543705 memory.go:184] no items to output this cycle
I0320 07:33:23.408000  543705 cpu.go:275] no items to output this cycle
E0320 07:33:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:33:33.409815  543705 memory.go:184] no items to output this cycle
I0320 07:33:33.409827  543705 cpu.go:275] no items to output this cycle
I0320 07:33:38.257735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:33:38.257741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:33:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:33:43.410709  543705 memory.go:191] Add success.
I0320 07:33:43.409808  543705 cpu.go:282] Add success.
I0320 07:33:43.420416  543705 net.go:648] Add success.
I0320 07:33:43.423422  543705 net.go:770] primary dev: ETH0
I0320 07:33:43.423436  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:33:43.423450  543705 net.go:698] Add success.
I0320 07:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:33:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:33:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:33:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:33:53.409763  543705 memory.go:184] no items to output this cycle
I0320 07:33:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 07:34:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:34:03.409776  543705 memory.go:184] no items to output this cycle
I0320 07:34:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 07:34:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:34:13.409785  543705 memory.go:191] Add success.
W0320 07:34:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 07:34:13.409823  543705 cpu.go:282] Add success.
W0320 07:34:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:34:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:34:13.420144  543705 net.go:648] Add success.
I0320 07:34:13.423085  543705 net.go:770] primary dev: ETH0
I0320 07:34:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:34:13.423299  543705 net.go:698] Add success.
I0320 07:34:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:34:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:34:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 07:34:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:34:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 07:34:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:34:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:34:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:34:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:34:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:34:16.472368  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:34:23.261672  543705 disk_info.go:125] begin check local disk info of client
I0320 07:34:23.264174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:34:23.264181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004af380 0xc0004af3c0]
E0320 07:34:23.408015  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:34:23.408028  543705 memory.go:184] no items to output this cycle
I0320 07:34:23.408059  543705 cpu.go:275] no items to output this cycle
E0320 07:34:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:34:33.409800  543705 memory.go:184] no items to output this cycle
I0320 07:34:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 07:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:34:43.409787  543705 memory.go:191] Add success.
I0320 07:34:43.409814  543705 cpu.go:282] Add success.
I0320 07:34:43.419894  543705 net.go:648] Add success.
I0320 07:34:43.422703  543705 net.go:770] primary dev: ETH0
I0320 07:34:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:34:43.422731  543705 net.go:698] Add success.
I0320 07:34:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:34:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:34:53.409767  543705 memory.go:184] no items to output this cycle
I0320 07:34:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 07:35:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:35:03.409775  543705 memory.go:184] no items to output this cycle
I0320 07:35:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 07:35:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:35:13.409787  543705 memory.go:191] Add success.
W0320 07:35:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:35:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:35:13.409826  543705 cpu.go:282] Add success.
I0320 07:35:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:35:13.420122  543705 net.go:648] Add success.
I0320 07:35:13.422949  543705 net.go:770] primary dev: ETH0
I0320 07:35:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:35:13.423126  543705 net.go:698] Add success.
I0320 07:35:14.454862  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:35:14.455070  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:35:14.455081  543705 disk_worker.go:708] disk space is not compliant
W0320 07:35:14.455083  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:35:14.456471  543705 disk_worker.go:494] system disk:vda1
I0320 07:35:14.456499  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:35:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:35:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:35:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:35:23.265698  543705 disk_info.go:125] begin check local disk info of client
I0320 07:35:23.268159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:35:23.268165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae1c0 0xc0004ae200]
E0320 07:35:23.408014  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:35:23.408031  543705 memory.go:184] no items to output this cycle
I0320 07:35:23.408043  543705 cpu.go:275] no items to output this cycle
E0320 07:35:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:35:33.409787  543705 memory.go:184] no items to output this cycle
I0320 07:35:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 07:35:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:35:43.409789  543705 memory.go:191] Add success.
I0320 07:35:43.409820  543705 cpu.go:282] Add success.
I0320 07:35:43.419866  543705 net.go:648] Add success.
I0320 07:35:43.422567  543705 net.go:770] primary dev: ETH0
I0320 07:35:43.422582  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:35:43.422597  543705 net.go:698] Add success.
I0320 07:35:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:35:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:35:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:35:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:35:53.409790  543705 memory.go:184] no items to output this cycle
I0320 07:35:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 07:36:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:36:03.409812  543705 memory.go:184] no items to output this cycle
I0320 07:36:03.409824  543705 cpu.go:275] no items to output this cycle
E0320 07:36:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:36:13.409809  543705 memory.go:191] Add success.
I0320 07:36:13.409812  543705 cpu.go:282] Add success.
W0320 07:36:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:36:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:36:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:36:13.420143  543705 net.go:648] Add success.
I0320 07:36:13.422756  543705 net.go:770] primary dev: ETH0
I0320 07:36:13.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:36:13.422780  543705 net.go:698] Add success.
I0320 07:36:13.504771  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1fcbfe61-1564-4081-bde0-89dcb56c99b8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:36:13.504802  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:36:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:36:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:36:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 07:36:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:36:14.456667  543705 disk_worker.go:494] system disk:vda1
I0320 07:36:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:36:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:36:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:36:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:36:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:36:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:36:23.269675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:36:23.272088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:36:23.272094  543705 disk_info.go:196] parse disk info done, disk is : [0xc000538600 0xc000538700]
E0320 07:36:23.407907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:36:23.407922  543705 memory.go:184] no items to output this cycle
I0320 07:36:23.407936  543705 cpu.go:275] no items to output this cycle
E0320 07:36:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:36:33.409797  543705 memory.go:184] no items to output this cycle
I0320 07:36:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 07:36:38.261759  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:36:38.261767  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:36:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:36:43.410759  543705 memory.go:191] Add success.
I0320 07:36:43.409834  543705 cpu.go:282] Add success.
I0320 07:36:43.420470  543705 net.go:648] Add success.
I0320 07:36:43.423226  543705 net.go:770] primary dev: ETH0
I0320 07:36:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:36:43.423256  543705 net.go:698] Add success.
I0320 07:36:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:36:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:36:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:36:53.409815  543705 memory.go:184] no items to output this cycle
I0320 07:36:53.409824  543705 cpu.go:275] no items to output this cycle
E0320 07:37:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:37:03.409809  543705 memory.go:184] no items to output this cycle
I0320 07:37:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 07:37:13.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:37:13.409913  543705 memory.go:191] Add success.
I0320 07:37:13.409914  543705 cpu.go:282] Add success.
W0320 07:37:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:37:13.409972  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:37:13.409976  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:37:13.419729  543705 net.go:648] Add success.
I0320 07:37:13.422637  543705 net.go:770] primary dev: ETH0
I0320 07:37:13.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:37:13.422662  543705 net.go:698] Add success.
I0320 07:37:13.453279  543705 event_worker.go:152] Polling the log file for events...
W0320 07:37:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:37:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 07:37:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:37:14.456788  543705 disk_worker.go:494] system disk:vda1
I0320 07:37:14.456825  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:37:14.457128  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:37:14.457135  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:37:14.457140  543705 custom_config.go:64] query custom config with name: gpu
E0320 07:37:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:37:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:37:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:37:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:37:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:37:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:37:16.472321  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:37:23.273675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:37:23.276081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:37:23.276087  543705 disk_info.go:196] parse disk info done, disk is : [0xc000538340 0xc000538380]
E0320 07:37:23.407886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:37:23.407902  543705 memory.go:184] no items to output this cycle
I0320 07:37:23.407914  543705 cpu.go:275] no items to output this cycle
E0320 07:37:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:37:33.409822  543705 memory.go:184] no items to output this cycle
I0320 07:37:33.409836  543705 cpu.go:275] no items to output this cycle
E0320 07:37:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:37:43.409775  543705 memory.go:191] Add success.
I0320 07:37:43.409816  543705 cpu.go:282] Add success.
I0320 07:37:43.419864  543705 net.go:648] Add success.
I0320 07:37:43.422820  543705 net.go:770] primary dev: ETH0
I0320 07:37:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:37:43.422847  543705 net.go:698] Add success.
I0320 07:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:37:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:37:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:37:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:37:53.409776  543705 memory.go:184] no items to output this cycle
I0320 07:37:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 07:38:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:38:03.409791  543705 memory.go:184] no items to output this cycle
I0320 07:38:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 07:38:13.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:38:13.409926  543705 memory.go:191] Add success.
I0320 07:38:13.409928  543705 cpu.go:282] Add success.
W0320 07:38:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:38:13.409989  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:38:13.409992  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:38:13.419711  543705 net.go:648] Add success.
I0320 07:38:13.422524  543705 net.go:770] primary dev: ETH0
I0320 07:38:13.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:38:13.422547  543705 net.go:698] Add success.
I0320 07:38:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:38:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:38:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 07:38:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:38:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 07:38:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:38:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:38:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:38:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:38:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:38:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:38:23.277675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:38:23.280158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:38:23.280165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0320 07:38:23.407923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:38:23.407935  543705 memory.go:184] no items to output this cycle
I0320 07:38:23.407978  543705 cpu.go:275] no items to output this cycle
E0320 07:38:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:38:33.409797  543705 memory.go:184] no items to output this cycle
I0320 07:38:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:38:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:38:43.409795  543705 memory.go:191] Add success.
I0320 07:38:43.409796  543705 cpu.go:282] Add success.
I0320 07:38:43.419867  543705 net.go:648] Add success.
I0320 07:38:43.422802  543705 net.go:770] primary dev: ETH0
I0320 07:38:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:38:43.422834  543705 net.go:698] Add success.
I0320 07:38:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:38:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:38:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:38:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 07:38:53.409780  543705 memory.go:184] no items to output this cycle
E0320 07:39:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:39:03.409798  543705 memory.go:184] no items to output this cycle
I0320 07:39:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 07:39:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:39:13.409923  543705 memory.go:191] Add success.
I0320 07:39:13.409949  543705 cpu.go:282] Add success.
W0320 07:39:13.409960  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:39:13.409975  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:39:13.409978  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:39:13.419749  543705 net.go:648] Add success.
I0320 07:39:13.422694  543705 net.go:770] primary dev: ETH0
I0320 07:39:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:39:13.422719  543705 net.go:698] Add success.
I0320 07:39:13.470289  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b0e4fca8-0e01-474d-a3b0-e34a1501364b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:39:13.470327  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:39:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:39:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:39:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 07:39:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:39:14.456672  543705 disk_worker.go:494] system disk:vda1
I0320 07:39:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:39:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:39:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:39:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:39:16.472441  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:39:23.281673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:39:23.284102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:39:23.284108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9c80 0xc0003e9cc0]
E0320 07:39:23.407534  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:39:23.407549  543705 memory.go:184] no items to output this cycle
I0320 07:39:23.407565  543705 cpu.go:275] no items to output this cycle
E0320 07:39:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:39:33.409787  543705 memory.go:184] no items to output this cycle
I0320 07:39:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 07:39:38.265758  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:39:38.265765  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:39:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:39:43.410619  543705 memory.go:191] Add success.
I0320 07:39:43.409801  543705 cpu.go:282] Add success.
I0320 07:39:43.420330  543705 net.go:648] Add success.
I0320 07:39:43.422987  543705 net.go:770] primary dev: ETH0
I0320 07:39:43.423001  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:39:43.423017  543705 net.go:698] Add success.
I0320 07:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:39:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:39:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:39:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:39:53.409788  543705 memory.go:184] no items to output this cycle
I0320 07:39:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 07:40:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:40:03.409806  543705 memory.go:184] no items to output this cycle
I0320 07:40:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 07:40:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:40:13.409784  543705 memory.go:191] Add success.
I0320 07:40:13.409808  543705 cpu.go:282] Add success.
W0320 07:40:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:40:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:40:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:40:13.419712  543705 net.go:648] Add success.
I0320 07:40:13.422931  543705 net.go:770] primary dev: ETH0
I0320 07:40:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:40:13.422955  543705 net.go:698] Add success.
I0320 07:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:40:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:40:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 07:40:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:40:14.456867  543705 disk_worker.go:494] system disk:vda1
I0320 07:40:14.456896  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:40:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:40:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:40:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:40:23.285671  543705 disk_info.go:125] begin check local disk info of client
I0320 07:40:23.288110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:40:23.288116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f7000 0xc0001f7040]
E0320 07:40:23.407513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:40:23.407526  543705 memory.go:184] no items to output this cycle
I0320 07:40:23.407539  543705 cpu.go:275] no items to output this cycle
E0320 07:40:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:40:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 07:40:33.409787  543705 memory.go:184] no items to output this cycle
E0320 07:40:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:40:43.409826  543705 memory.go:191] Add success.
I0320 07:40:43.409829  543705 cpu.go:282] Add success.
I0320 07:40:43.419889  543705 net.go:648] Add success.
I0320 07:40:43.422881  543705 net.go:770] primary dev: ETH0
I0320 07:40:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:40:43.422906  543705 net.go:698] Add success.
I0320 07:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:40:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:40:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:40:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:40:53.409777  543705 memory.go:184] no items to output this cycle
I0320 07:40:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 07:41:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:41:03.409768  543705 memory.go:184] no items to output this cycle
I0320 07:41:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 07:41:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:41:13.409790  543705 memory.go:191] Add success.
W0320 07:41:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 07:41:13.409820  543705 cpu.go:282] Add success.
W0320 07:41:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:41:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:41:13.420078  543705 net.go:648] Add success.
I0320 07:41:13.423189  543705 net.go:770] primary dev: ETH0
I0320 07:41:13.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:41:13.423218  543705 net.go:698] Add success.
I0320 07:41:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:41:14.455315  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:41:14.455401  543705 disk_worker.go:708] disk space is not compliant
W0320 07:41:14.455406  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:41:14.457036  543705 disk_worker.go:494] system disk:vda1
I0320 07:41:14.457077  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:41:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:41:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:41:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:41:16.472422  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:41:23.289672  543705 disk_info.go:125] begin check local disk info of client
I0320 07:41:23.292143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:41:23.292149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046c280 0xc00046c2c0]
E0320 07:41:23.407513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:41:23.407524  543705 memory.go:184] no items to output this cycle
I0320 07:41:23.407529  543705 cpu.go:275] no items to output this cycle
E0320 07:41:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:41:33.409783  543705 memory.go:184] no items to output this cycle
I0320 07:41:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 07:41:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:41:43.409814  543705 memory.go:191] Add success.
I0320 07:41:43.409819  543705 cpu.go:282] Add success.
I0320 07:41:43.420049  543705 net.go:648] Add success.
I0320 07:41:43.423149  543705 net.go:770] primary dev: ETH0
I0320 07:41:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:41:43.423175  543705 net.go:698] Add success.
I0320 07:41:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:41:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:41:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:41:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:41:53.409776  543705 cpu.go:275] no items to output this cycle
I0320 07:41:53.409787  543705 memory.go:184] no items to output this cycle
E0320 07:42:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:42:03.409791  543705 memory.go:184] no items to output this cycle
I0320 07:42:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 07:42:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:42:13.409816  543705 memory.go:191] Add success.
I0320 07:42:13.409824  543705 cpu.go:282] Add success.
W0320 07:42:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:42:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:42:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:42:13.420141  543705 net.go:648] Add success.
I0320 07:42:13.423066  543705 net.go:770] primary dev: ETH0
I0320 07:42:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:42:13.423095  543705 net.go:698] Add success.
I0320 07:42:13.481918  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1a5aaf72-ac73-443e-b902-43f1c9dbd413","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:42:13.481951  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 07:42:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:42:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 07:42:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:42:14.456151  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:42:14.456161  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:42:14.456168  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:42:14.457626  543705 disk_worker.go:494] system disk:vda1
I0320 07:42:14.457681  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:42:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:42:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:42:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:42:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:42:16.457955  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:42:16.457974  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:42:16.472301  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:42:23.293676  543705 disk_info.go:125] begin check local disk info of client
I0320 07:42:23.296112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:42:23.296119  543705 disk_info.go:196] parse disk info done, disk is : [0xc000250300 0xc000250340]
E0320 07:42:23.407815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:42:23.407827  543705 memory.go:184] no items to output this cycle
I0320 07:42:23.407829  543705 cpu.go:275] no items to output this cycle
E0320 07:42:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:42:33.409803  543705 memory.go:184] no items to output this cycle
I0320 07:42:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 07:42:38.269732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:42:38.269739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:42:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:42:43.410737  543705 memory.go:191] Add success.
I0320 07:42:43.409790  543705 cpu.go:282] Add success.
I0320 07:42:43.420450  543705 net.go:648] Add success.
I0320 07:42:43.423410  543705 net.go:770] primary dev: ETH0
I0320 07:42:43.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:42:43.423439  543705 net.go:698] Add success.
I0320 07:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:42:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:42:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:42:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:42:53.409777  543705 memory.go:184] no items to output this cycle
I0320 07:42:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 07:43:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:43:03.409764  543705 memory.go:184] no items to output this cycle
I0320 07:43:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 07:43:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:43:13.409797  543705 memory.go:191] Add success.
I0320 07:43:13.409803  543705 cpu.go:282] Add success.
W0320 07:43:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:43:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:43:13.420056  543705 net.go:648] Add success.
I0320 07:43:13.422665  543705 net.go:770] primary dev: ETH0
I0320 07:43:13.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:43:13.422695  543705 net.go:698] Add success.
I0320 07:43:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:43:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:43:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 07:43:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:43:14.457028  543705 disk_worker.go:494] system disk:vda1
I0320 07:43:14.457066  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:43:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:43:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:43:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:43:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:43:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:43:23.297673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:43:23.300102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:43:23.300108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3480 0xc0003f34c0]
E0320 07:43:23.407788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:43:23.407806  543705 memory.go:184] no items to output this cycle
I0320 07:43:23.407818  543705 cpu.go:275] no items to output this cycle
E0320 07:43:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:43:33.409802  543705 memory.go:184] no items to output this cycle
I0320 07:43:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 07:43:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:43:43.409791  543705 memory.go:191] Add success.
I0320 07:43:43.409791  543705 cpu.go:282] Add success.
I0320 07:43:43.419880  543705 net.go:648] Add success.
I0320 07:43:43.422574  543705 net.go:770] primary dev: ETH0
I0320 07:43:43.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:43:43.422600  543705 net.go:698] Add success.
I0320 07:43:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:43:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:43:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:43:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:43:53.409794  543705 memory.go:184] no items to output this cycle
I0320 07:43:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 07:44:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:44:03.409793  543705 memory.go:184] no items to output this cycle
I0320 07:44:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 07:44:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:44:13.409783  543705 memory.go:191] Add success.
I0320 07:44:13.409810  543705 cpu.go:282] Add success.
W0320 07:44:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:44:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:44:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:44:13.420132  543705 net.go:648] Add success.
I0320 07:44:13.422550  543705 net.go:770] primary dev: ETH0
I0320 07:44:13.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:44:13.422578  543705 net.go:698] Add success.
I0320 07:44:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:44:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:44:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 07:44:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:44:14.456561  543705 disk_worker.go:494] system disk:vda1
I0320 07:44:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:44:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:44:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:44:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:44:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:44:23.301676  543705 disk_info.go:125] begin check local disk info of client
I0320 07:44:23.304101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:44:23.304107  543705 disk_info.go:196] parse disk info done, disk is : [0xc000576940 0xc000576980]
E0320 07:44:23.407764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:44:23.407780  543705 memory.go:184] no items to output this cycle
I0320 07:44:23.407794  543705 cpu.go:275] no items to output this cycle
E0320 07:44:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:44:33.409798  543705 memory.go:184] no items to output this cycle
I0320 07:44:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 07:44:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:44:43.409818  543705 memory.go:191] Add success.
I0320 07:44:43.409824  543705 cpu.go:282] Add success.
I0320 07:44:43.419968  543705 net.go:648] Add success.
I0320 07:44:43.422495  543705 net.go:770] primary dev: ETH0
I0320 07:44:43.422511  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:44:43.422525  543705 net.go:698] Add success.
I0320 07:44:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:44:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:44:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:44:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:44:53.409777  543705 memory.go:184] no items to output this cycle
I0320 07:44:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 07:45:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:45:03.409790  543705 memory.go:184] no items to output this cycle
I0320 07:45:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 07:45:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:45:13.409800  543705 cpu.go:282] Add success.
I0320 07:45:13.409803  543705 memory.go:191] Add success.
W0320 07:45:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:45:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:45:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:45:13.420143  543705 net.go:648] Add success.
I0320 07:45:13.423140  543705 net.go:770] primary dev: ETH0
I0320 07:45:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:45:13.423165  543705 net.go:698] Add success.
I0320 07:45:13.573736  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"05171ae8-d8eb-432c-9a3f-bb8d79955521","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:45:13.573776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:45:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:45:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:45:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 07:45:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:45:14.456732  543705 disk_worker.go:494] system disk:vda1
I0320 07:45:14.456769  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:45:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:45:16.457581  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:45:16.457642  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:45:16.457687  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:45:16.473045  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:45:23.305675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:45:23.308130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:45:23.308136  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053a580 0xc00053a5c0]
E0320 07:45:23.407786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:45:23.407801  543705 memory.go:184] no items to output this cycle
I0320 07:45:23.407816  543705 cpu.go:275] no items to output this cycle
E0320 07:45:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:45:33.409785  543705 memory.go:184] no items to output this cycle
I0320 07:45:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 07:45:38.273732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:45:38.273740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:45:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:45:43.410699  543705 memory.go:191] Add success.
I0320 07:45:43.409819  543705 cpu.go:282] Add success.
I0320 07:45:43.420460  543705 net.go:648] Add success.
I0320 07:45:43.423032  543705 net.go:770] primary dev: ETH0
I0320 07:45:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:45:43.423058  543705 net.go:698] Add success.
I0320 07:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:45:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:45:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:45:53.409779  543705 memory.go:184] no items to output this cycle
I0320 07:45:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 07:46:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:46:03.409799  543705 memory.go:184] no items to output this cycle
I0320 07:46:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 07:46:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:46:13.409828  543705 memory.go:191] Add success.
I0320 07:46:13.409834  543705 cpu.go:282] Add success.
W0320 07:46:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:46:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:46:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:46:13.420180  543705 net.go:648] Add success.
I0320 07:46:13.422838  543705 net.go:770] primary dev: ETH0
I0320 07:46:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:46:13.422867  543705 net.go:698] Add success.
I0320 07:46:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:46:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:46:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 07:46:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:46:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 07:46:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:46:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:46:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:46:16.472383  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:46:23.309670  543705 disk_info.go:125] begin check local disk info of client
I0320 07:46:23.312096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:46:23.312102  543705 disk_info.go:196] parse disk info done, disk is : [0xc000322580 0xc0003225c0]
E0320 07:46:23.407840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:46:23.407860  543705 cpu.go:275] no items to output this cycle
I0320 07:46:23.407862  543705 memory.go:184] no items to output this cycle
E0320 07:46:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:46:33.409789  543705 memory.go:184] no items to output this cycle
I0320 07:46:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 07:46:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:46:43.409796  543705 memory.go:191] Add success.
I0320 07:46:43.409796  543705 cpu.go:282] Add success.
I0320 07:46:43.420026  543705 net.go:648] Add success.
I0320 07:46:43.422530  543705 net.go:770] primary dev: ETH0
I0320 07:46:43.422544  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:46:43.422557  543705 net.go:698] Add success.
I0320 07:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:46:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:46:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:46:53.409768  543705 memory.go:184] no items to output this cycle
I0320 07:46:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 07:47:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:47:03.409780  543705 cpu.go:275] no items to output this cycle
I0320 07:47:03.409786  543705 memory.go:184] no items to output this cycle
E0320 07:47:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:47:13.409828  543705 memory.go:191] Add success.
I0320 07:47:13.409834  543705 cpu.go:282] Add success.
W0320 07:47:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:47:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:47:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:47:13.420172  543705 net.go:648] Add success.
I0320 07:47:13.422814  543705 net.go:770] primary dev: ETH0
I0320 07:47:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:47:13.422843  543705 net.go:698] Add success.
I0320 07:47:13.453518  543705 event_worker.go:152] Polling the log file for events...
W0320 07:47:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:47:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 07:47:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:47:14.456914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:47:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:47:14.456929  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:47:14.457000  543705 disk_worker.go:494] system disk:vda1
I0320 07:47:14.457042  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:47:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:47:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:47:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:47:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:47:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:47:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:47:16.472367  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:47:23.313680  543705 disk_info.go:125] begin check local disk info of client
I0320 07:47:23.316125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:47:23.316132  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd9c0 0xc0002bda00]
E0320 07:47:23.407681  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:47:23.407737  543705 cpu.go:275] no items to output this cycle
I0320 07:47:23.407753  543705 memory.go:184] no items to output this cycle
E0320 07:47:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:47:33.409806  543705 memory.go:184] no items to output this cycle
I0320 07:47:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 07:47:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:47:43.409787  543705 memory.go:191] Add success.
I0320 07:47:43.409802  543705 cpu.go:282] Add success.
I0320 07:47:43.419889  543705 net.go:648] Add success.
I0320 07:47:43.422762  543705 net.go:770] primary dev: ETH0
I0320 07:47:43.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:47:43.422788  543705 net.go:698] Add success.
I0320 07:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:47:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:47:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:47:53.409767  543705 memory.go:184] no items to output this cycle
I0320 07:47:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 07:48:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:48:03.409802  543705 memory.go:184] no items to output this cycle
I0320 07:48:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 07:48:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:48:13.409808  543705 cpu.go:282] Add success.
I0320 07:48:13.409824  543705 memory.go:191] Add success.
W0320 07:48:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:48:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:48:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:48:13.420068  543705 net.go:648] Add success.
I0320 07:48:13.422833  543705 net.go:770] primary dev: ETH0
I0320 07:48:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:48:13.422858  543705 net.go:698] Add success.
I0320 07:48:13.470253  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4fc1324-c85d-446b-97ac-49f4b8638485","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:48:13.470288  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:48:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:48:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:48:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 07:48:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:48:14.456695  543705 disk_worker.go:494] system disk:vda1
I0320 07:48:14.456733  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:48:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:48:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:48:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:48:16.472386  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:48:23.317673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:48:23.320077  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:48:23.320083  543705 disk_info.go:196] parse disk info done, disk is : [0xc000207240 0xc000207280]
E0320 07:48:23.407672  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:48:23.407693  543705 memory.go:184] no items to output this cycle
I0320 07:48:23.407698  543705 cpu.go:275] no items to output this cycle
E0320 07:48:33.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:48:33.409919  543705 memory.go:184] no items to output this cycle
I0320 07:48:33.409995  543705 cpu.go:275] no items to output this cycle
I0320 07:48:38.277738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:48:38.277745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:48:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:48:43.410775  543705 memory.go:191] Add success.
I0320 07:48:43.409808  543705 cpu.go:282] Add success.
I0320 07:48:43.420489  543705 net.go:648] Add success.
I0320 07:48:43.423395  543705 net.go:770] primary dev: ETH0
I0320 07:48:43.423408  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:48:43.423420  543705 net.go:698] Add success.
I0320 07:48:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:48:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:48:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:48:53.409765  543705 memory.go:184] no items to output this cycle
I0320 07:48:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 07:49:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:49:03.409798  543705 memory.go:184] no items to output this cycle
I0320 07:49:03.409813  543705 cpu.go:275] no items to output this cycle
W0320 07:49:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:49:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:49:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:49:13.409804  543705 cpu.go:282] Add success.
E0320 07:49:13.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:49:13.409831  543705 memory.go:191] Add success.
I0320 07:49:13.420160  543705 net.go:648] Add success.
I0320 07:49:13.423141  543705 net.go:770] primary dev: ETH0
I0320 07:49:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:49:13.423166  543705 net.go:698] Add success.
I0320 07:49:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:49:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:49:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 07:49:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:49:14.456518  543705 disk_worker.go:494] system disk:vda1
I0320 07:49:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:49:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:49:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:49:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:49:23.321675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:49:23.324086  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:49:23.324092  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384b80 0xc000384bc0]
E0320 07:49:23.407660  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:49:23.407676  543705 memory.go:184] no items to output this cycle
I0320 07:49:23.407686  543705 cpu.go:275] no items to output this cycle
E0320 07:49:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:49:33.409786  543705 memory.go:184] no items to output this cycle
I0320 07:49:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 07:49:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:49:43.409787  543705 memory.go:191] Add success.
I0320 07:49:43.409804  543705 cpu.go:282] Add success.
I0320 07:49:43.419953  543705 net.go:648] Add success.
I0320 07:49:43.422761  543705 net.go:770] primary dev: ETH0
I0320 07:49:43.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:49:43.422786  543705 net.go:698] Add success.
I0320 07:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:49:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:49:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:49:53.409771  543705 memory.go:184] no items to output this cycle
I0320 07:49:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:50:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:50:03.409812  543705 memory.go:184] no items to output this cycle
I0320 07:50:03.409827  543705 cpu.go:275] no items to output this cycle
W0320 07:50:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:50:13.409734  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:50:13.409740  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:50:13.409800  543705 cpu.go:282] Add success.
E0320 07:50:13.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:50:13.409864  543705 memory.go:191] Add success.
I0320 07:50:13.420137  543705 net.go:648] Add success.
I0320 07:50:13.422661  543705 net.go:770] primary dev: ETH0
I0320 07:50:13.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:50:13.422685  543705 net.go:698] Add success.
I0320 07:50:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:50:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:50:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 07:50:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:50:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 07:50:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:50:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:50:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:50:16.472381  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:50:23.325676  543705 disk_info.go:125] begin check local disk info of client
I0320 07:50:23.328100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:50:23.328106  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004faf40 0xc0004faf80]
E0320 07:50:23.407656  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:50:23.407671  543705 memory.go:184] no items to output this cycle
I0320 07:50:23.407685  543705 cpu.go:275] no items to output this cycle
E0320 07:50:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:50:33.409782  543705 memory.go:184] no items to output this cycle
I0320 07:50:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 07:50:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:50:43.409788  543705 memory.go:191] Add success.
I0320 07:50:43.409791  543705 cpu.go:282] Add success.
I0320 07:50:43.420071  543705 net.go:648] Add success.
I0320 07:50:43.423265  543705 net.go:770] primary dev: ETH0
I0320 07:50:43.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:50:43.423295  543705 net.go:698] Add success.
I0320 07:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:50:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:50:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:50:53.409778  543705 memory.go:184] no items to output this cycle
I0320 07:50:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 07:51:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:51:03.409793  543705 memory.go:184] no items to output this cycle
I0320 07:51:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 07:51:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:51:13.409801  543705 memory.go:191] Add success.
I0320 07:51:13.409801  543705 cpu.go:282] Add success.
W0320 07:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:51:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:51:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:51:13.420546  543705 net.go:648] Add success.
I0320 07:51:13.423368  543705 net.go:770] primary dev: ETH0
I0320 07:51:13.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:51:13.423393  543705 net.go:698] Add success.
I0320 07:51:13.658452  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"76567a90-63c6-482b-b47e-d192089180a8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:51:13.658489  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:51:14.453972  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:51:14.455242  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:51:14.455261  543705 disk_worker.go:708] disk space is not compliant
W0320 07:51:14.455263  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:51:14.456637  543705 disk_worker.go:494] system disk:vda1
I0320 07:51:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:51:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:51:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:51:16.472411  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:51:23.329672  543705 disk_info.go:125] begin check local disk info of client
I0320 07:51:23.332124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:51:23.332130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025f1c0 0xc00025f200]
E0320 07:51:23.407612  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:51:23.407624  543705 memory.go:184] no items to output this cycle
I0320 07:51:23.407657  543705 cpu.go:275] no items to output this cycle
E0320 07:51:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:51:33.409809  543705 memory.go:184] no items to output this cycle
I0320 07:51:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 07:51:38.281739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:51:38.281750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:51:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:51:43.410744  543705 memory.go:191] Add success.
I0320 07:51:43.409826  543705 cpu.go:282] Add success.
I0320 07:51:43.420537  543705 net.go:648] Add success.
I0320 07:51:43.423548  543705 net.go:770] primary dev: ETH0
I0320 07:51:43.423561  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:51:43.423573  543705 net.go:698] Add success.
I0320 07:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:51:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:51:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:51:53.409796  543705 memory.go:184] no items to output this cycle
I0320 07:51:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 07:52:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:52:03.409775  543705 memory.go:184] no items to output this cycle
I0320 07:52:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:52:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:52:13.409824  543705 memory.go:191] Add success.
I0320 07:52:13.409832  543705 cpu.go:282] Add success.
W0320 07:52:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:52:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:52:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:52:13.420132  543705 net.go:648] Add success.
I0320 07:52:13.422932  543705 net.go:770] primary dev: ETH0
I0320 07:52:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:52:13.422961  543705 net.go:698] Add success.
W0320 07:52:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:52:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 07:52:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:52:14.456803  543705 disk_worker.go:494] system disk:vda1
I0320 07:52:14.456844  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:52:14.457104  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:52:14.457111  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:52:14.457116  543705 custom_config.go:64] query custom config with name: gpu
E0320 07:52:15.456882  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:52:15.456891  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:52:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:52:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:52:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:52:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:52:16.472348  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:52:23.333680  543705 disk_info.go:125] begin check local disk info of client
I0320 07:52:23.336144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:52:23.336151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c65c0 0xc0001c6600]
E0320 07:52:23.407651  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:52:23.407667  543705 memory.go:184] no items to output this cycle
I0320 07:52:23.407680  543705 cpu.go:275] no items to output this cycle
E0320 07:52:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:52:33.409775  543705 memory.go:184] no items to output this cycle
I0320 07:52:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 07:52:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:52:43.409812  543705 memory.go:191] Add success.
I0320 07:52:43.409820  543705 cpu.go:282] Add success.
I0320 07:52:43.419980  543705 net.go:648] Add success.
I0320 07:52:43.422853  543705 net.go:770] primary dev: ETH0
I0320 07:52:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:52:43.423042  543705 net.go:698] Add success.
I0320 07:52:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:52:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:52:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:52:53.409801  543705 memory.go:184] no items to output this cycle
I0320 07:52:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 07:53:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:53:03.409796  543705 memory.go:184] no items to output this cycle
I0320 07:53:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 07:53:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:53:13.409822  543705 memory.go:191] Add success.
I0320 07:53:13.409832  543705 cpu.go:282] Add success.
W0320 07:53:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:53:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:53:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:53:13.420147  543705 net.go:648] Add success.
I0320 07:53:13.422838  543705 net.go:770] primary dev: ETH0
I0320 07:53:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:53:13.422863  543705 net.go:698] Add success.
I0320 07:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:53:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:53:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 07:53:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:53:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 07:53:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:53:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:53:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:53:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:53:23.337675  543705 disk_info.go:125] begin check local disk info of client
I0320 07:53:23.340145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:53:23.340152  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbcc0 0xc0001fbd00]
E0320 07:53:23.407624  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:53:23.407641  543705 memory.go:184] no items to output this cycle
I0320 07:53:23.407663  543705 cpu.go:275] no items to output this cycle
E0320 07:53:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:53:33.409783  543705 memory.go:184] no items to output this cycle
I0320 07:53:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 07:53:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:53:43.409781  543705 memory.go:191] Add success.
I0320 07:53:43.409805  543705 cpu.go:282] Add success.
I0320 07:53:43.419848  543705 net.go:648] Add success.
I0320 07:53:43.423062  543705 net.go:770] primary dev: ETH0
I0320 07:53:43.423074  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:53:43.423087  543705 net.go:698] Add success.
I0320 07:53:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:53:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:53:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:53:53.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:53:53.409890  543705 cpu.go:275] no items to output this cycle
I0320 07:53:53.409893  543705 memory.go:184] no items to output this cycle
E0320 07:54:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:54:03.409784  543705 memory.go:184] no items to output this cycle
I0320 07:54:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 07:54:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:54:13.409781  543705 memory.go:191] Add success.
W0320 07:54:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 07:54:13.409815  543705 cpu.go:282] Add success.
W0320 07:54:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:54:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:54:13.420123  543705 net.go:648] Add success.
I0320 07:54:13.423122  543705 net.go:770] primary dev: ETH0
I0320 07:54:13.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:54:13.423150  543705 net.go:698] Add success.
I0320 07:54:13.469791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f003605c-5bf3-49d5-9ed8-81a78c78c95c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:54:13.469823  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 07:54:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:54:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:54:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 07:54:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:54:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 07:54:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:54:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:54:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:54:16.472399  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:54:23.341673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:54:23.344116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:54:23.344123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa180 0xc0001fa1c0]
E0320 07:54:23.407521  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:54:23.407533  543705 memory.go:184] no items to output this cycle
I0320 07:54:23.407533  543705 cpu.go:275] no items to output this cycle
E0320 07:54:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:54:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 07:54:33.409792  543705 memory.go:184] no items to output this cycle
I0320 07:54:38.285732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:54:38.285738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:54:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:54:43.410674  543705 memory.go:191] Add success.
I0320 07:54:43.409820  543705 cpu.go:282] Add success.
I0320 07:54:43.420442  543705 net.go:648] Add success.
I0320 07:54:43.423582  543705 net.go:770] primary dev: ETH0
I0320 07:54:43.423595  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:54:43.423608  543705 net.go:698] Add success.
I0320 07:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:54:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:54:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:54:53.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:54:53.409883  543705 memory.go:184] no items to output this cycle
I0320 07:54:53.409955  543705 cpu.go:275] no items to output this cycle
E0320 07:55:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:55:03.409763  543705 memory.go:184] no items to output this cycle
I0320 07:55:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 07:55:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:55:13.409790  543705 memory.go:191] Add success.
W0320 07:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 07:55:13.409820  543705 cpu.go:282] Add success.
W0320 07:55:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:55:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:55:13.420121  543705 net.go:648] Add success.
I0320 07:55:13.423061  543705 net.go:770] primary dev: ETH0
I0320 07:55:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:55:13.423085  543705 net.go:698] Add success.
I0320 07:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:55:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:55:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 07:55:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:55:14.456545  543705 disk_worker.go:494] system disk:vda1
I0320 07:55:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:55:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:55:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:55:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:55:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:55:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:55:23.345677  543705 disk_info.go:125] begin check local disk info of client
I0320 07:55:23.348171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:55:23.348177  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c4c0 0xc00048c500]
E0320 07:55:23.407625  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:55:23.407640  543705 memory.go:184] no items to output this cycle
I0320 07:55:23.407653  543705 cpu.go:275] no items to output this cycle
E0320 07:55:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:55:33.409791  543705 memory.go:184] no items to output this cycle
I0320 07:55:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 07:55:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:55:43.409794  543705 memory.go:191] Add success.
I0320 07:55:43.409796  543705 cpu.go:282] Add success.
I0320 07:55:43.419881  543705 net.go:648] Add success.
I0320 07:55:43.422876  543705 net.go:770] primary dev: ETH0
I0320 07:55:43.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:55:43.422905  543705 net.go:698] Add success.
I0320 07:55:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:55:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:55:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:55:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:55:53.409843  543705 memory.go:184] no items to output this cycle
I0320 07:55:53.409918  543705 cpu.go:275] no items to output this cycle
E0320 07:56:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:56:03.409779  543705 memory.go:184] no items to output this cycle
I0320 07:56:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 07:56:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:56:13.409796  543705 memory.go:191] Add success.
I0320 07:56:13.409798  543705 cpu.go:282] Add success.
W0320 07:56:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:56:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:56:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:56:13.420044  543705 net.go:648] Add success.
I0320 07:56:13.422512  543705 net.go:770] primary dev: ETH0
I0320 07:56:13.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:56:13.422550  543705 net.go:698] Add success.
I0320 07:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:56:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:56:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 07:56:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:56:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 07:56:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:56:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:56:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:56:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:56:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:56:23.349685  543705 disk_info.go:125] begin check local disk info of client
I0320 07:56:23.352152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:56:23.352158  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048ce80 0xc00048cec0]
E0320 07:56:23.407511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:56:23.407525  543705 memory.go:184] no items to output this cycle
I0320 07:56:23.407554  543705 cpu.go:275] no items to output this cycle
E0320 07:56:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:56:33.409778  543705 memory.go:184] no items to output this cycle
I0320 07:56:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 07:56:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:56:43.409789  543705 memory.go:191] Add success.
I0320 07:56:43.409790  543705 cpu.go:282] Add success.
I0320 07:56:43.419882  543705 net.go:648] Add success.
I0320 07:56:43.422699  543705 net.go:770] primary dev: ETH0
I0320 07:56:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:56:43.422727  543705 net.go:698] Add success.
I0320 07:56:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:56:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:56:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:56:53.409793  543705 memory.go:184] no items to output this cycle
I0320 07:56:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 07:57:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:57:03.409788  543705 cpu.go:275] no items to output this cycle
I0320 07:57:03.409790  543705 memory.go:184] no items to output this cycle
E0320 07:57:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:57:13.409822  543705 memory.go:191] Add success.
I0320 07:57:13.409834  543705 cpu.go:282] Add success.
W0320 07:57:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:57:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:57:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:57:13.420168  543705 net.go:648] Add success.
I0320 07:57:13.422972  543705 net.go:770] primary dev: ETH0
I0320 07:57:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:57:13.423000  543705 net.go:698] Add success.
I0320 07:57:13.429689  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 07:57:13.452927  543705 event_worker.go:152] Polling the log file for events...
I0320 07:57:13.470395  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"89b60826-e737-4da2-94e5-6f66f625db4b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 07:57:13.470428  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 07:57:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:57:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 07:57:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0320 07:57:14.455881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 07:57:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 07:57:14.455894  543705 custom_config.go:64] query custom config with name: gpu
I0320 07:57:14.456534  543705 disk_worker.go:494] system disk:vda1
I0320 07:57:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 07:57:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 07:57:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:57:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 07:57:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 07:57:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:57:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:57:16.472361  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:57:23.353678  543705 disk_info.go:125] begin check local disk info of client
I0320 07:57:23.356180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:57:23.356187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
E0320 07:57:23.407589  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:57:23.407613  543705 memory.go:184] no items to output this cycle
I0320 07:57:23.407627  543705 cpu.go:275] no items to output this cycle
E0320 07:57:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:57:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 07:57:33.409792  543705 memory.go:184] no items to output this cycle
I0320 07:57:38.289735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 07:57:38.289742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 07:57:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:57:43.410672  543705 memory.go:191] Add success.
I0320 07:57:43.409799  543705 cpu.go:282] Add success.
I0320 07:57:43.420348  543705 net.go:648] Add success.
I0320 07:57:43.423257  543705 net.go:770] primary dev: ETH0
I0320 07:57:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:57:43.423283  543705 net.go:698] Add success.
I0320 07:57:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:57:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:57:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:57:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:57:53.409800  543705 memory.go:184] no items to output this cycle
I0320 07:57:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 07:58:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:58:03.409781  543705 memory.go:184] no items to output this cycle
I0320 07:58:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:58:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:58:13.409809  543705 cpu.go:282] Add success.
I0320 07:58:13.409815  543705 memory.go:191] Add success.
W0320 07:58:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:58:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:58:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:58:13.420083  543705 net.go:648] Add success.
I0320 07:58:13.423174  543705 net.go:770] primary dev: ETH0
I0320 07:58:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:58:13.423208  543705 net.go:698] Add success.
I0320 07:58:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:58:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:58:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 07:58:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:58:14.456597  543705 disk_worker.go:494] system disk:vda1
I0320 07:58:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:58:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:58:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:58:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:58:16.472382  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:58:23.357680  543705 disk_info.go:125] begin check local disk info of client
I0320 07:58:23.360159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:58:23.360165  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c240 0xc00048c280]
E0320 07:58:23.407557  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:58:23.407573  543705 memory.go:184] no items to output this cycle
I0320 07:58:23.407588  543705 cpu.go:275] no items to output this cycle
E0320 07:58:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:58:33.409798  543705 memory.go:184] no items to output this cycle
I0320 07:58:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 07:58:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:58:43.409807  543705 memory.go:191] Add success.
I0320 07:58:43.409811  543705 cpu.go:282] Add success.
I0320 07:58:43.419987  543705 net.go:648] Add success.
I0320 07:58:43.423211  543705 net.go:770] primary dev: ETH0
I0320 07:58:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:58:43.423238  543705 net.go:698] Add success.
I0320 07:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:58:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:58:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:58:53.409787  543705 memory.go:184] no items to output this cycle
I0320 07:58:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 07:59:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:59:03.409814  543705 memory.go:184] no items to output this cycle
I0320 07:59:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 07:59:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:59:13.409791  543705 memory.go:191] Add success.
I0320 07:59:13.409809  543705 cpu.go:282] Add success.
W0320 07:59:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 07:59:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 07:59:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 07:59:13.420260  543705 net.go:648] Add success.
I0320 07:59:13.423217  543705 net.go:770] primary dev: ETH0
I0320 07:59:13.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:59:13.423242  543705 net.go:698] Add success.
I0320 07:59:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 07:59:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 07:59:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 07:59:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 07:59:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 07:59:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 07:59:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 07:59:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:59:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:59:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 07:59:16.472433  543705 disk_local_worker.go:436] Get disk info: []
I0320 07:59:23.361673  543705 disk_info.go:125] begin check local disk info of client
I0320 07:59:23.364178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 07:59:23.364185  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7ac0 0xc0001c7b00]
E0320 07:59:23.407552  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:59:23.407568  543705 memory.go:184] no items to output this cycle
I0320 07:59:23.407587  543705 cpu.go:275] no items to output this cycle
E0320 07:59:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:59:33.409787  543705 memory.go:184] no items to output this cycle
I0320 07:59:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 07:59:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:59:43.409811  543705 memory.go:191] Add success.
I0320 07:59:43.409818  543705 cpu.go:282] Add success.
I0320 07:59:43.419904  543705 net.go:648] Add success.
I0320 07:59:43.422981  543705 net.go:770] primary dev: ETH0
I0320 07:59:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 07:59:43.423009  543705 net.go:698] Add success.
I0320 07:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 07:59:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 07:59:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 07:59:53.410347  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 07:59:53.410361  543705 memory.go:184] no items to output this cycle
I0320 07:59:53.410394  543705 cpu.go:275] no items to output this cycle
E0320 08:00:03.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:00:03.409888  543705 memory.go:184] no items to output this cycle
I0320 08:00:03.409912  543705 cpu.go:275] no items to output this cycle
E0320 08:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:00:13.409789  543705 memory.go:191] Add success.
W0320 08:00:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 08:00:13.409821  543705 cpu.go:282] Add success.
W0320 08:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:00:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:00:13.420132  543705 net.go:648] Add success.
I0320 08:00:13.422796  543705 net.go:770] primary dev: ETH0
I0320 08:00:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:00:13.422820  543705 net.go:698] Add success.
I0320 08:00:13.469151  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a391497-42dc-4df4-b7a6-06e6c9266cf6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:00:13.469183  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:00:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:00:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:00:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 08:00:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:00:14.456527  543705 disk_worker.go:494] system disk:vda1
I0320 08:00:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:00:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:00:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:00:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:00:16.472419  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:00:23.365674  543705 disk_info.go:125] begin check local disk info of client
I0320 08:00:23.368174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:00:23.368180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb800 0xc0001fb840]
E0320 08:00:23.407533  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:00:23.407551  543705 memory.go:184] no items to output this cycle
I0320 08:00:23.407566  543705 cpu.go:275] no items to output this cycle
E0320 08:00:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:00:33.409788  543705 memory.go:184] no items to output this cycle
I0320 08:00:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 08:00:38.293730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:00:38.293736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:00:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:00:43.410695  543705 memory.go:191] Add success.
I0320 08:00:43.409816  543705 cpu.go:282] Add success.
I0320 08:00:43.420468  543705 net.go:648] Add success.
I0320 08:00:43.423515  543705 net.go:770] primary dev: ETH0
I0320 08:00:43.423530  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:00:43.423543  543705 net.go:698] Add success.
I0320 08:00:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:00:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:00:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:00:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:00:53.409760  543705 memory.go:184] no items to output this cycle
I0320 08:00:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 08:01:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:01:03.409782  543705 memory.go:184] no items to output this cycle
I0320 08:01:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 08:01:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:01:13.409792  543705 cpu.go:282] Add success.
I0320 08:01:13.409797  543705 memory.go:191] Add success.
W0320 08:01:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:01:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:01:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:01:13.420103  543705 net.go:648] Add success.
I0320 08:01:13.422906  543705 net.go:770] primary dev: ETH0
I0320 08:01:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:01:13.422935  543705 net.go:698] Add success.
I0320 08:01:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:01:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:01:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 08:01:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:01:14.456613  543705 disk_worker.go:494] system disk:vda1
I0320 08:01:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:01:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:01:16.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:01:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:01:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:01:16.472458  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:01:23.369674  543705 disk_info.go:125] begin check local disk info of client
I0320 08:01:23.372113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:01:23.372120  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326140 0xc000326180]
E0320 08:01:23.407462  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:01:23.407476  543705 memory.go:184] no items to output this cycle
I0320 08:01:23.407483  543705 cpu.go:275] no items to output this cycle
E0320 08:01:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:01:33.409811  543705 memory.go:184] no items to output this cycle
I0320 08:01:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 08:01:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:01:43.409777  543705 memory.go:191] Add success.
I0320 08:01:43.409803  543705 cpu.go:282] Add success.
I0320 08:01:43.419879  543705 net.go:648] Add success.
I0320 08:01:43.423131  543705 net.go:770] primary dev: ETH0
I0320 08:01:43.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:01:43.423157  543705 net.go:698] Add success.
I0320 08:01:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:01:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:01:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:01:53.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:01:53.409885  543705 memory.go:184] no items to output this cycle
I0320 08:01:53.409972  543705 cpu.go:275] no items to output this cycle
E0320 08:02:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:02:03.409773  543705 memory.go:184] no items to output this cycle
I0320 08:02:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 08:02:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:02:13.409799  543705 memory.go:191] Add success.
I0320 08:02:13.409815  543705 cpu.go:282] Add success.
W0320 08:02:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:02:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:02:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:02:13.420507  543705 net.go:648] Add success.
I0320 08:02:13.423312  543705 net.go:770] primary dev: ETH0
I0320 08:02:13.423331  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:02:13.423352  543705 net.go:698] Add success.
W0320 08:02:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:02:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 08:02:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:02:14.456931  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:02:14.456940  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:02:14.456947  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:02:14.457030  543705 disk_worker.go:494] system disk:vda1
I0320 08:02:14.457061  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:02:15.456776  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:02:15.456786  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:02:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:02:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:02:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:02:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:02:16.472347  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:02:23.373674  543705 disk_info.go:125] begin check local disk info of client
I0320 08:02:23.376116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:02:23.376123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0320 08:02:23.408483  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:02:23.408497  543705 memory.go:184] no items to output this cycle
I0320 08:02:23.408516  543705 cpu.go:275] no items to output this cycle
E0320 08:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:02:33.409784  543705 memory.go:184] no items to output this cycle
I0320 08:02:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 08:02:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:02:43.409772  543705 memory.go:191] Add success.
I0320 08:02:43.409807  543705 cpu.go:282] Add success.
I0320 08:02:43.419945  543705 net.go:648] Add success.
I0320 08:02:43.422996  543705 net.go:770] primary dev: ETH0
I0320 08:02:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:02:43.423054  543705 net.go:698] Add success.
I0320 08:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:02:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:02:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:02:53.409802  543705 memory.go:184] no items to output this cycle
I0320 08:02:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 08:03:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:03:03.409765  543705 memory.go:184] no items to output this cycle
I0320 08:03:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 08:03:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:03:13.409792  543705 memory.go:191] Add success.
I0320 08:03:13.409812  543705 cpu.go:282] Add success.
W0320 08:03:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:03:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:03:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:03:13.420208  543705 net.go:648] Add success.
I0320 08:03:13.422817  543705 net.go:770] primary dev: ETH0
I0320 08:03:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:03:13.422851  543705 net.go:698] Add success.
I0320 08:03:13.468888  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"975e0f8a-ae2a-4b43-ab91-698c30b2c0b6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:03:13.468923  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:03:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:03:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:03:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 08:03:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:03:14.456557  543705 disk_worker.go:494] system disk:vda1
I0320 08:03:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:03:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:03:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:03:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:03:16.472396  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:03:23.377675  543705 disk_info.go:125] begin check local disk info of client
I0320 08:03:23.380180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:03:23.380187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa3c0 0xc0001aa400]
E0320 08:03:23.407443  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:03:23.407451  543705 cpu.go:275] no items to output this cycle
I0320 08:03:23.407454  543705 memory.go:184] no items to output this cycle
E0320 08:03:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:03:33.409780  543705 memory.go:184] no items to output this cycle
I0320 08:03:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 08:03:38.297743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:03:38.297749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:03:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:03:43.411072  543705 memory.go:191] Add success.
I0320 08:03:43.409815  543705 cpu.go:282] Add success.
I0320 08:03:43.419977  543705 net.go:648] Add success.
I0320 08:03:43.423374  543705 net.go:770] primary dev: ETH0
I0320 08:03:43.423388  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:03:43.423399  543705 net.go:698] Add success.
I0320 08:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:03:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:03:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:03:53.409774  543705 memory.go:184] no items to output this cycle
I0320 08:03:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 08:04:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:04:03.409806  543705 memory.go:184] no items to output this cycle
I0320 08:04:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 08:04:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:04:13.409786  543705 memory.go:191] Add success.
W0320 08:04:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 08:04:13.409814  543705 cpu.go:282] Add success.
W0320 08:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:04:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:04:13.420484  543705 net.go:648] Add success.
I0320 08:04:13.424162  543705 net.go:770] primary dev: ETH0
I0320 08:04:13.424178  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:04:13.424192  543705 net.go:698] Add success.
I0320 08:04:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:04:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:04:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 08:04:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:04:14.456520  543705 disk_worker.go:494] system disk:vda1
I0320 08:04:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:04:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:04:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:04:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:04:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:04:16.472402  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:04:23.381683  543705 disk_info.go:125] begin check local disk info of client
I0320 08:04:23.384176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:04:23.384183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab240 0xc0001ab280]
E0320 08:04:23.408470  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:04:23.408482  543705 memory.go:184] no items to output this cycle
I0320 08:04:23.408488  543705 cpu.go:275] no items to output this cycle
E0320 08:04:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:04:33.409808  543705 memory.go:184] no items to output this cycle
I0320 08:04:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 08:04:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:04:43.409792  543705 cpu.go:282] Add success.
I0320 08:04:43.409800  543705 memory.go:191] Add success.
I0320 08:04:43.420045  543705 net.go:648] Add success.
I0320 08:04:43.423090  543705 net.go:770] primary dev: ETH0
I0320 08:04:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:04:43.423115  543705 net.go:698] Add success.
I0320 08:04:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:04:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:04:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:04:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:04:53.409774  543705 memory.go:184] no items to output this cycle
I0320 08:04:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 08:05:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:05:03.409803  543705 memory.go:184] no items to output this cycle
I0320 08:05:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 08:05:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:05:13.409794  543705 cpu.go:282] Add success.
I0320 08:05:13.409801  543705 memory.go:191] Add success.
W0320 08:05:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:05:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:05:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:05:13.420198  543705 net.go:648] Add success.
I0320 08:05:13.422982  543705 net.go:770] primary dev: ETH0
I0320 08:05:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:05:13.423007  543705 net.go:698] Add success.
I0320 08:05:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:05:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:05:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0320 08:05:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:05:14.456493  543705 disk_worker.go:494] system disk:vda1
I0320 08:05:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:05:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:05:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:05:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:05:23.385683  543705 disk_info.go:125] begin check local disk info of client
I0320 08:05:23.388142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:05:23.388148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0080 0xc0004a00c0]
E0320 08:05:23.408438  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:05:23.408451  543705 memory.go:184] no items to output this cycle
I0320 08:05:23.408457  543705 cpu.go:275] no items to output this cycle
E0320 08:05:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:05:33.409809  543705 memory.go:184] no items to output this cycle
I0320 08:05:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 08:05:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:05:43.409795  543705 memory.go:191] Add success.
I0320 08:05:43.409794  543705 cpu.go:282] Add success.
I0320 08:05:43.419984  543705 net.go:648] Add success.
I0320 08:05:43.422720  543705 net.go:770] primary dev: ETH0
I0320 08:05:43.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:05:43.422745  543705 net.go:698] Add success.
I0320 08:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:05:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:05:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:05:53.409787  543705 memory.go:184] no items to output this cycle
I0320 08:05:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 08:06:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:06:03.409796  543705 memory.go:184] no items to output this cycle
I0320 08:06:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 08:06:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:06:13.409822  543705 memory.go:191] Add success.
I0320 08:06:13.409833  543705 cpu.go:282] Add success.
W0320 08:06:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:06:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:06:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:06:13.420434  543705 net.go:648] Add success.
I0320 08:06:13.423364  543705 net.go:770] primary dev: ETH0
I0320 08:06:13.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:06:13.423392  543705 net.go:698] Add success.
I0320 08:06:13.567492  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5d617f98-5e2f-44d1-8ec3-53e34365f2c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:06:13.567530  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:06:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:06:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:06:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 08:06:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:06:14.456625  543705 disk_worker.go:494] system disk:vda1
I0320 08:06:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:06:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:06:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:06:16.472442  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:06:23.389673  543705 disk_info.go:125] begin check local disk info of client
I0320 08:06:23.392131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:06:23.392137  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053cb40 0xc00053cb80]
E0320 08:06:23.408384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:06:23.408396  543705 memory.go:184] no items to output this cycle
I0320 08:06:23.408429  543705 cpu.go:275] no items to output this cycle
E0320 08:06:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:06:33.409770  543705 memory.go:184] no items to output this cycle
I0320 08:06:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 08:06:38.301729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:06:38.301735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:06:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:06:43.410809  543705 memory.go:191] Add success.
I0320 08:06:43.409798  543705 cpu.go:282] Add success.
I0320 08:06:43.420619  543705 net.go:648] Add success.
I0320 08:06:43.423762  543705 net.go:770] primary dev: ETH0
I0320 08:06:43.423775  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:06:43.423788  543705 net.go:698] Add success.
I0320 08:06:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:06:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:06:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:06:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:06:53.409762  543705 memory.go:184] no items to output this cycle
I0320 08:06:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 08:07:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:07:03.409799  543705 memory.go:184] no items to output this cycle
I0320 08:07:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 08:07:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:07:13.409788  543705 memory.go:191] Add success.
I0320 08:07:13.409808  543705 cpu.go:282] Add success.
W0320 08:07:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:07:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:07:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:07:13.420204  543705 net.go:648] Add success.
I0320 08:07:13.423030  543705 net.go:770] primary dev: ETH0
I0320 08:07:13.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:07:13.423054  543705 net.go:698] Add success.
I0320 08:07:13.453623  543705 event_worker.go:152] Polling the log file for events...
W0320 08:07:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 08:07:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:07:14.457005  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:07:14.457015  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:07:14.457021  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:07:14.457046  543705 disk_worker.go:494] system disk:vda1
I0320 08:07:14.457074  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:07:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:07:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:07:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:07:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:07:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:07:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:07:16.472356  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:07:23.393671  543705 disk_info.go:125] begin check local disk info of client
I0320 08:07:23.396139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:07:23.396146  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327300 0xc000327340]
E0320 08:07:23.408375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:07:23.408386  543705 memory.go:184] no items to output this cycle
I0320 08:07:23.408425  543705 cpu.go:275] no items to output this cycle
E0320 08:07:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:07:33.409818  543705 memory.go:184] no items to output this cycle
I0320 08:07:33.409828  543705 cpu.go:275] no items to output this cycle
E0320 08:07:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:07:43.409791  543705 memory.go:191] Add success.
I0320 08:07:43.409794  543705 cpu.go:282] Add success.
I0320 08:07:43.420154  543705 net.go:648] Add success.
I0320 08:07:43.423082  543705 net.go:770] primary dev: ETH0
I0320 08:07:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:07:43.423107  543705 net.go:698] Add success.
I0320 08:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:07:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:07:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:07:53.409779  543705 memory.go:184] no items to output this cycle
I0320 08:07:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 08:08:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:08:03.409769  543705 memory.go:184] no items to output this cycle
I0320 08:08:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:08:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:08:13.409829  543705 memory.go:191] Add success.
I0320 08:08:13.409837  543705 cpu.go:282] Add success.
W0320 08:08:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:08:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:08:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:08:13.420341  543705 net.go:648] Add success.
I0320 08:08:13.423177  543705 net.go:770] primary dev: ETH0
I0320 08:08:13.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:08:13.423202  543705 net.go:698] Add success.
I0320 08:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:08:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:08:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 08:08:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:08:14.456515  543705 disk_worker.go:494] system disk:vda1
I0320 08:08:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:08:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:08:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:08:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:08:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:08:16.472418  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:08:23.397673  543705 disk_info.go:125] begin check local disk info of client
I0320 08:08:23.400081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:08:23.400087  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b400 0xc00007b440]
E0320 08:08:23.408337  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:08:23.408355  543705 memory.go:184] no items to output this cycle
I0320 08:08:23.408370  543705 cpu.go:275] no items to output this cycle
E0320 08:08:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:08:33.409773  543705 memory.go:184] no items to output this cycle
I0320 08:08:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 08:08:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:08:43.409814  543705 memory.go:191] Add success.
I0320 08:08:43.409819  543705 cpu.go:282] Add success.
I0320 08:08:43.419860  543705 net.go:648] Add success.
I0320 08:08:43.422512  543705 net.go:770] primary dev: ETH0
I0320 08:08:43.422528  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:08:43.422540  543705 net.go:698] Add success.
I0320 08:08:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:08:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:08:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:08:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:08:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 08:08:53.409785  543705 memory.go:184] no items to output this cycle
E0320 08:09:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:09:03.409797  543705 memory.go:184] no items to output this cycle
I0320 08:09:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 08:09:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:09:13.409825  543705 memory.go:191] Add success.
I0320 08:09:13.409831  543705 cpu.go:282] Add success.
W0320 08:09:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:09:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:09:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:09:13.420142  543705 net.go:648] Add success.
I0320 08:09:13.423167  543705 net.go:770] primary dev: ETH0
I0320 08:09:13.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:09:13.423198  543705 net.go:698] Add success.
I0320 08:09:13.593141  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00e97b56-d1c4-4e93-a732-7751d368fe62","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:09:13.593175  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:09:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:09:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:09:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0320 08:09:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:09:14.456758  543705 disk_worker.go:494] system disk:vda1
I0320 08:09:14.456786  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:09:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:09:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:09:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:09:16.472380  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:09:23.401672  543705 disk_info.go:125] begin check local disk info of client
I0320 08:09:23.404074  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:09:23.404080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e40 0xc0000c4e80]
E0320 08:09:23.408338  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:09:23.408353  543705 memory.go:184] no items to output this cycle
I0320 08:09:23.408365  543705 cpu.go:275] no items to output this cycle
E0320 08:09:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:09:33.409784  543705 memory.go:184] no items to output this cycle
I0320 08:09:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 08:09:38.305735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:09:38.305743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:09:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:09:43.410858  543705 memory.go:191] Add success.
I0320 08:09:43.409815  543705 cpu.go:282] Add success.
I0320 08:09:43.420718  543705 net.go:648] Add success.
I0320 08:09:43.423697  543705 net.go:770] primary dev: ETH0
I0320 08:09:43.423709  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:09:43.423721  543705 net.go:698] Add success.
I0320 08:09:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:09:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:09:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:09:53.409796  543705 memory.go:184] no items to output this cycle
I0320 08:09:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:10:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:10:03.409767  543705 memory.go:184] no items to output this cycle
I0320 08:10:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:10:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:10:13.409794  543705 memory.go:191] Add success.
I0320 08:10:13.409809  543705 cpu.go:282] Add success.
W0320 08:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:10:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:10:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:10:13.420151  543705 net.go:648] Add success.
I0320 08:10:13.423018  543705 net.go:770] primary dev: ETH0
I0320 08:10:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:10:13.423044  543705 net.go:698] Add success.
I0320 08:10:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:10:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:10:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 08:10:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:10:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 08:10:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:10:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:10:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:10:16.472377  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:10:23.405671  543705 disk_info.go:125] begin check local disk info of client
E0320 08:10:23.407926  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:10:23.407945  543705 memory.go:184] no items to output this cycle
I0320 08:10:23.407958  543705 cpu.go:275] no items to output this cycle
I0320 08:10:23.408123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:10:23.408127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba00 0xc0001aba40]
E0320 08:10:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:10:33.409773  543705 memory.go:184] no items to output this cycle
I0320 08:10:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 08:10:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:10:43.409821  543705 memory.go:191] Add success.
I0320 08:10:43.409822  543705 cpu.go:282] Add success.
I0320 08:10:43.420100  543705 net.go:648] Add success.
I0320 08:10:43.423069  543705 net.go:770] primary dev: ETH0
I0320 08:10:43.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:10:43.423093  543705 net.go:698] Add success.
I0320 08:10:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:10:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:10:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:10:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:10:53.409774  543705 memory.go:184] no items to output this cycle
I0320 08:10:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 08:11:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:11:03.409762  543705 memory.go:184] no items to output this cycle
I0320 08:11:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 08:11:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:11:13.409794  543705 memory.go:191] Add success.
I0320 08:11:13.409815  543705 cpu.go:282] Add success.
W0320 08:11:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:11:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:11:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:11:13.420326  543705 net.go:648] Add success.
I0320 08:11:13.422963  543705 net.go:770] primary dev: ETH0
I0320 08:11:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:11:13.422993  543705 net.go:698] Add success.
I0320 08:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:11:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:11:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 08:11:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:11:14.456539  543705 disk_worker.go:494] system disk:vda1
I0320 08:11:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:11:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:11:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:11:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:11:16.472361  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:11:23.409696  543705 disk_info.go:125] begin check local disk info of client
I0320 08:11:23.409853  543705 cpu.go:275] no items to output this cycle
E0320 08:11:23.409948  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:11:23.409962  543705 memory.go:184] no items to output this cycle
I0320 08:11:23.411981  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:11:23.411987  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab600 0xc0001ab640]
E0320 08:11:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:11:33.409808  543705 memory.go:184] no items to output this cycle
I0320 08:11:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 08:11:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:11:43.409812  543705 memory.go:191] Add success.
I0320 08:11:43.409818  543705 cpu.go:282] Add success.
I0320 08:11:43.419908  543705 net.go:648] Add success.
I0320 08:11:43.422785  543705 net.go:770] primary dev: ETH0
I0320 08:11:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:11:43.422813  543705 net.go:698] Add success.
I0320 08:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:11:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:11:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:11:53.409768  543705 memory.go:184] no items to output this cycle
I0320 08:11:53.409895  543705 cpu.go:275] no items to output this cycle
E0320 08:12:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:12:03.409801  543705 memory.go:184] no items to output this cycle
I0320 08:12:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 08:12:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:12:13.409809  543705 memory.go:191] Add success.
I0320 08:12:13.409815  543705 cpu.go:282] Add success.
W0320 08:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:12:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:12:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:12:13.420299  543705 net.go:648] Add success.
I0320 08:12:13.423428  543705 net.go:770] primary dev: ETH0
I0320 08:12:13.423442  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:12:13.423456  543705 net.go:698] Add success.
I0320 08:12:13.470601  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9611976a-5c5c-4744-a30c-8e8dce64dd95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:12:13.470635  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 08:12:14.455237  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:12:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0320 08:12:14.455255  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:12:14.456105  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:12:14.456114  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:12:14.456120  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:12:14.457117  543705 disk_worker.go:494] system disk:vda1
I0320 08:12:14.457146  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:12:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:12:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:12:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:12:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:12:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:12:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:12:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:12:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:12:23.409777  543705 memory.go:184] no items to output this cycle
I0320 08:12:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 08:12:23.412883  543705 disk_info.go:125] begin check local disk info of client
I0320 08:12:23.415282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:12:23.415287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a09c0 0xc0004a0a00]
E0320 08:12:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:12:33.409805  543705 memory.go:184] no items to output this cycle
I0320 08:12:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 08:12:38.309739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:12:38.309746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:12:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:12:43.410757  543705 memory.go:191] Add success.
I0320 08:12:43.409824  543705 cpu.go:282] Add success.
I0320 08:12:43.420442  543705 net.go:648] Add success.
I0320 08:12:43.423294  543705 net.go:770] primary dev: ETH0
I0320 08:12:43.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:12:43.423321  543705 net.go:698] Add success.
I0320 08:12:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:12:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:12:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:12:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:12:53.409788  543705 memory.go:184] no items to output this cycle
I0320 08:12:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 08:13:03.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:13:03.409828  543705 memory.go:184] no items to output this cycle
I0320 08:13:03.409836  543705 cpu.go:275] no items to output this cycle
E0320 08:13:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:13:13.409806  543705 memory.go:191] Add success.
W0320 08:13:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 08:13:13.409840  543705 cpu.go:282] Add success.
W0320 08:13:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:13:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:13:13.420601  543705 net.go:648] Add success.
I0320 08:13:13.423474  543705 net.go:770] primary dev: ETH0
I0320 08:13:13.423489  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:13:13.423503  543705 net.go:698] Add success.
I0320 08:13:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:13:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:13:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 08:13:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:13:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 08:13:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:13:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:13:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:13:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:13:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:13:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:13:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:13:23.409808  543705 memory.go:184] no items to output this cycle
I0320 08:13:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 08:13:23.416029  543705 disk_info.go:125] begin check local disk info of client
I0320 08:13:23.418375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:13:23.418381  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326880 0xc0003268c0]
E0320 08:13:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:13:33.409823  543705 memory.go:184] no items to output this cycle
I0320 08:13:33.409840  543705 cpu.go:275] no items to output this cycle
E0320 08:13:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:13:43.409800  543705 memory.go:191] Add success.
I0320 08:13:43.409844  543705 cpu.go:282] Add success.
I0320 08:13:43.420074  543705 net.go:648] Add success.
I0320 08:13:43.422728  543705 net.go:770] primary dev: ETH0
I0320 08:13:43.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:13:43.422764  543705 net.go:698] Add success.
I0320 08:13:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:13:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:13:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:13:53.409782  543705 memory.go:184] no items to output this cycle
I0320 08:13:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 08:14:03.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:14:03.409908  543705 memory.go:184] no items to output this cycle
I0320 08:14:03.409983  543705 cpu.go:275] no items to output this cycle
E0320 08:14:13.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:14:13.409847  543705 memory.go:191] Add success.
I0320 08:14:13.409850  543705 cpu.go:282] Add success.
W0320 08:14:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:14:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:14:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:14:13.420217  543705 net.go:648] Add success.
I0320 08:14:13.423299  543705 net.go:770] primary dev: ETH0
I0320 08:14:13.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:14:13.423328  543705 net.go:698] Add success.
I0320 08:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:14:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:14:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 08:14:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:14:14.456492  543705 disk_worker.go:494] system disk:vda1
I0320 08:14:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:14:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:14:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:14:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:14:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:14:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:14:23.409777  543705 memory.go:184] no items to output this cycle
I0320 08:14:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 08:14:23.419016  543705 disk_info.go:125] begin check local disk info of client
I0320 08:14:23.421422  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:14:23.423499  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b7900 0xc0002b7940]
E0320 08:14:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:14:33.409801  543705 memory.go:184] no items to output this cycle
I0320 08:14:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 08:14:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:14:43.409780  543705 memory.go:191] Add success.
I0320 08:14:43.409808  543705 cpu.go:282] Add success.
I0320 08:14:43.420137  543705 net.go:648] Add success.
I0320 08:14:43.423126  543705 net.go:770] primary dev: ETH0
I0320 08:14:43.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:14:43.423154  543705 net.go:698] Add success.
I0320 08:14:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:14:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:14:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:14:53.409791  543705 memory.go:184] no items to output this cycle
I0320 08:14:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 08:15:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:15:03.409779  543705 memory.go:184] no items to output this cycle
I0320 08:15:03.409886  543705 cpu.go:275] no items to output this cycle
E0320 08:15:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:15:13.409805  543705 memory.go:191] Add success.
I0320 08:15:13.409805  543705 cpu.go:282] Add success.
W0320 08:15:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:15:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:15:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:15:13.420180  543705 net.go:648] Add success.
I0320 08:15:13.423092  543705 net.go:770] primary dev: ETH0
I0320 08:15:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:15:13.423122  543705 net.go:698] Add success.
I0320 08:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:15:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:15:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 08:15:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:15:14.456502  543705 disk_worker.go:494] system disk:vda1
I0320 08:15:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:15:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:15:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:15:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:15:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:15:16.472391  543705 disk_local_worker.go:436] Get disk info: []
I0320 08:15:16.711707  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7a80b1b9-02a2-46c6-9152-e946897236e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:15:16.711742  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0320 08:15:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:15:23.409761  543705 memory.go:184] no items to output this cycle
I0320 08:15:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 08:15:23.424343  543705 disk_info.go:125] begin check local disk info of client
I0320 08:15:23.426772  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:15:23.426777  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa4c0 0xc0001fa500]
E0320 08:15:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:15:33.409789  543705 memory.go:184] no items to output this cycle
I0320 08:15:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 08:15:38.313730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:15:38.313736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:15:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:15:43.410769  543705 memory.go:191] Add success.
I0320 08:15:43.409814  543705 cpu.go:282] Add success.
I0320 08:15:43.420462  543705 net.go:648] Add success.
I0320 08:15:43.423377  543705 net.go:770] primary dev: ETH0
I0320 08:15:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:15:43.423409  543705 net.go:698] Add success.
I0320 08:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:15:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:15:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:15:53.409759  543705 memory.go:184] no items to output this cycle
I0320 08:15:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 08:16:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:16:03.409803  543705 memory.go:184] no items to output this cycle
I0320 08:16:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 08:16:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:16:13.409799  543705 memory.go:191] Add success.
I0320 08:16:13.409820  543705 cpu.go:282] Add success.
W0320 08:16:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:16:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:16:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:16:13.419753  543705 net.go:648] Add success.
I0320 08:16:13.422523  543705 net.go:770] primary dev: ETH0
I0320 08:16:13.422547  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:16:13.422559  543705 net.go:698] Add success.
I0320 08:16:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:16:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:16:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 08:16:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:16:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 08:16:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:16:15.455030  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:16:16.458214  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:16:16.458276  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:16:16.458297  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:16:16.472615  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:16:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:16:23.409769  543705 memory.go:184] no items to output this cycle
I0320 08:16:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 08:16:23.427333  543705 disk_info.go:125] begin check local disk info of client
I0320 08:16:23.429795  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:16:23.429801  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fac80 0xc0001facc0]
E0320 08:16:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:16:33.409801  543705 memory.go:184] no items to output this cycle
I0320 08:16:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 08:16:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:16:43.409791  543705 memory.go:191] Add success.
I0320 08:16:43.409801  543705 cpu.go:282] Add success.
I0320 08:16:43.420119  543705 net.go:648] Add success.
I0320 08:16:43.422844  543705 net.go:770] primary dev: ETH0
I0320 08:16:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:16:43.422872  543705 net.go:698] Add success.
I0320 08:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:16:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:16:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:16:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:16:53.409797  543705 memory.go:184] no items to output this cycle
I0320 08:16:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 08:17:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:17:03.409782  543705 memory.go:184] no items to output this cycle
I0320 08:17:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 08:17:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:17:13.409798  543705 memory.go:191] Add success.
I0320 08:17:13.409800  543705 cpu.go:282] Add success.
W0320 08:17:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:17:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:17:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:17:13.420205  543705 net.go:648] Add success.
I0320 08:17:13.423240  543705 net.go:770] primary dev: ETH0
I0320 08:17:13.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:17:13.423265  543705 net.go:698] Add success.
I0320 08:17:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0320 08:17:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:17:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 08:17:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:17:14.456964  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:17:14.456973  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:17:14.456980  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:17:14.457016  543705 disk_worker.go:494] system disk:vda1
I0320 08:17:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:17:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:17:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:17:16.457898  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:17:16.457899  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:17:16.457951  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:17:16.457970  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:17:16.472306  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:17:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:17:23.409768  543705 memory.go:184] no items to output this cycle
I0320 08:17:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 08:17:23.430318  543705 disk_info.go:125] begin check local disk info of client
I0320 08:17:23.432695  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:17:23.432700  543705 disk_info.go:196] parse disk info done, disk is : [0xc000534d80 0xc000534dc0]
E0320 08:17:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:17:33.409807  543705 memory.go:184] no items to output this cycle
I0320 08:17:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 08:17:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:17:43.409783  543705 memory.go:191] Add success.
I0320 08:17:43.409804  543705 cpu.go:282] Add success.
I0320 08:17:43.419983  543705 net.go:648] Add success.
I0320 08:17:43.422991  543705 net.go:770] primary dev: ETH0
I0320 08:17:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:17:43.423016  543705 net.go:698] Add success.
I0320 08:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:17:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:17:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:17:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:17:53.409775  543705 memory.go:184] no items to output this cycle
I0320 08:17:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 08:18:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:18:03.409805  543705 memory.go:184] no items to output this cycle
I0320 08:18:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 08:18:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:18:13.409784  543705 memory.go:191] Add success.
I0320 08:18:13.409809  543705 cpu.go:282] Add success.
W0320 08:18:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:18:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:18:13.420104  543705 net.go:648] Add success.
I0320 08:18:13.423053  543705 net.go:770] primary dev: ETH0
I0320 08:18:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:18:13.423096  543705 net.go:698] Add success.
I0320 08:18:13.467910  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"62f06532-8033-4c31-991e-58b19179ccd5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:18:13.467951  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:18:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:18:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:18:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 08:18:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:18:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 08:18:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:18:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:18:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:18:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:18:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:18:23.409791  543705 memory.go:184] no items to output this cycle
I0320 08:18:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 08:18:23.433337  543705 disk_info.go:125] begin check local disk info of client
I0320 08:18:23.435745  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:18:23.435751  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6300 0xc0001c6340]
E0320 08:18:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:18:33.409809  543705 memory.go:184] no items to output this cycle
I0320 08:18:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 08:18:38.317727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:18:38.317734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:18:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:18:43.410794  543705 memory.go:191] Add success.
I0320 08:18:43.409800  543705 cpu.go:282] Add success.
I0320 08:18:43.420520  543705 net.go:648] Add success.
I0320 08:18:43.423740  543705 net.go:770] primary dev: ETH0
I0320 08:18:43.423755  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:18:43.423770  543705 net.go:698] Add success.
I0320 08:18:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:18:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:18:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:18:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:18:53.409798  543705 memory.go:184] no items to output this cycle
I0320 08:18:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:19:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:19:03.409801  543705 memory.go:184] no items to output this cycle
I0320 08:19:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 08:19:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:19:13.409823  543705 memory.go:191] Add success.
I0320 08:19:13.409826  543705 cpu.go:282] Add success.
W0320 08:19:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:19:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:19:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:19:13.419747  543705 net.go:648] Add success.
I0320 08:19:13.422648  543705 net.go:770] primary dev: ETH0
I0320 08:19:13.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:19:13.422676  543705 net.go:698] Add success.
I0320 08:19:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:19:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:19:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 08:19:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:19:14.456483  543705 disk_worker.go:494] system disk:vda1
I0320 08:19:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:19:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:19:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:19:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:19:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:19:23.409774  543705 memory.go:184] no items to output this cycle
I0320 08:19:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 08:19:23.436367  543705 disk_info.go:125] begin check local disk info of client
I0320 08:19:23.438809  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:19:23.438815  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb4c0 0xc0001fb500]
E0320 08:19:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:19:33.409810  543705 memory.go:184] no items to output this cycle
I0320 08:19:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 08:19:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:19:43.409780  543705 memory.go:191] Add success.
I0320 08:19:43.409800  543705 cpu.go:282] Add success.
I0320 08:19:43.419865  543705 net.go:648] Add success.
I0320 08:19:43.422644  543705 net.go:770] primary dev: ETH0
I0320 08:19:43.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:19:43.422670  543705 net.go:698] Add success.
I0320 08:19:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:19:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:19:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:19:53.409767  543705 memory.go:184] no items to output this cycle
I0320 08:19:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 08:20:03.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:20:03.409870  543705 cpu.go:275] no items to output this cycle
I0320 08:20:03.409874  543705 memory.go:184] no items to output this cycle
E0320 08:20:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:20:13.409797  543705 memory.go:191] Add success.
I0320 08:20:13.409808  543705 cpu.go:282] Add success.
W0320 08:20:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:20:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:20:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:20:13.419750  543705 net.go:648] Add success.
I0320 08:20:13.422773  543705 net.go:770] primary dev: ETH0
I0320 08:20:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:20:13.422797  543705 net.go:698] Add success.
I0320 08:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:20:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:20:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 08:20:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:20:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 08:20:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:20:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:20:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:20:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:20:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:20:23.409777  543705 memory.go:184] no items to output this cycle
I0320 08:20:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 08:20:23.439350  543705 disk_info.go:125] begin check local disk info of client
I0320 08:20:23.441845  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:20:23.441850  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0320 08:20:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:20:33.409806  543705 memory.go:184] no items to output this cycle
I0320 08:20:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 08:20:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:20:43.409796  543705 memory.go:191] Add success.
I0320 08:20:43.409797  543705 cpu.go:282] Add success.
I0320 08:20:43.419854  543705 net.go:648] Add success.
I0320 08:20:43.422731  543705 net.go:770] primary dev: ETH0
I0320 08:20:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:20:43.422756  543705 net.go:698] Add success.
I0320 08:20:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:20:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:20:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:20:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:20:53.409781  543705 memory.go:184] no items to output this cycle
I0320 08:20:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 08:21:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:21:03.409793  543705 memory.go:184] no items to output this cycle
I0320 08:21:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 08:21:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:21:13.409811  543705 memory.go:191] Add success.
I0320 08:21:13.409812  543705 cpu.go:282] Add success.
W0320 08:21:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:21:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:21:13.419737  543705 net.go:648] Add success.
I0320 08:21:13.422108  543705 net.go:770] primary dev: ETH0
I0320 08:21:13.422124  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:21:13.422137  543705 net.go:698] Add success.
I0320 08:21:13.786768  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98e94087-895f-4fe9-968a-94ff8e0edb74","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:21:13.786805  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:21:14.454090  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:21:14.454243  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:21:14.454334  543705 disk_worker.go:708] disk space is not compliant
W0320 08:21:14.454337  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:21:14.455887  543705 disk_worker.go:494] system disk:vda1
I0320 08:21:14.455918  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:21:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:21:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:21:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:21:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:21:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:21:23.409776  543705 memory.go:184] no items to output this cycle
I0320 08:21:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 08:21:23.442393  543705 disk_info.go:125] begin check local disk info of client
I0320 08:21:23.444838  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:21:23.444843  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb340 0xc0001fb380]
E0320 08:21:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:21:33.409794  543705 memory.go:184] no items to output this cycle
I0320 08:21:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 08:21:38.321741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:21:38.321747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:21:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:21:43.411015  543705 memory.go:191] Add success.
I0320 08:21:43.409818  543705 cpu.go:282] Add success.
I0320 08:21:43.419717  543705 net.go:648] Add success.
I0320 08:21:43.423172  543705 net.go:770] primary dev: ETH0
I0320 08:21:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:21:43.423198  543705 net.go:698] Add success.
I0320 08:21:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:21:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:21:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:21:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:21:53.409804  543705 memory.go:184] no items to output this cycle
I0320 08:21:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 08:22:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:22:03.409793  543705 memory.go:184] no items to output this cycle
I0320 08:22:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 08:22:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:22:13.409884  543705 memory.go:191] Add success.
W0320 08:22:13.409919  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:22:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:22:13.409937  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:22:13.409950  543705 cpu.go:282] Add success.
I0320 08:22:13.419754  543705 net.go:648] Add success.
I0320 08:22:13.422503  543705 net.go:770] primary dev: ETH0
I0320 08:22:13.422516  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:22:13.422527  543705 net.go:698] Add success.
W0320 08:22:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:22:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 08:22:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:22:14.456981  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:22:14.456990  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:22:14.456996  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:22:14.457016  543705 disk_worker.go:494] system disk:vda1
I0320 08:22:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:22:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:22:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:22:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:22:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:22:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:22:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:22:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:22:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:22:23.409783  543705 memory.go:184] no items to output this cycle
I0320 08:22:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 08:22:23.445394  543705 disk_info.go:125] begin check local disk info of client
I0320 08:22:23.447898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:22:23.447903  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb040 0xc0001fb080]
E0320 08:22:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:22:33.409815  543705 memory.go:184] no items to output this cycle
I0320 08:22:33.409827  543705 cpu.go:275] no items to output this cycle
E0320 08:22:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:22:43.409800  543705 memory.go:191] Add success.
I0320 08:22:43.409801  543705 cpu.go:282] Add success.
I0320 08:22:43.419869  543705 net.go:648] Add success.
I0320 08:22:43.422559  543705 net.go:770] primary dev: ETH0
I0320 08:22:43.422574  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:22:43.422588  543705 net.go:698] Add success.
I0320 08:22:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:22:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:22:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:22:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:22:53.409776  543705 memory.go:184] no items to output this cycle
I0320 08:22:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 08:23:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:23:03.409811  543705 memory.go:184] no items to output this cycle
I0320 08:23:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 08:23:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:23:13.409782  543705 memory.go:191] Add success.
W0320 08:23:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 08:23:13.409823  543705 cpu.go:282] Add success.
W0320 08:23:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:23:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:23:13.420299  543705 net.go:648] Add success.
I0320 08:23:13.423335  543705 net.go:770] primary dev: ETH0
I0320 08:23:13.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:23:13.423358  543705 net.go:698] Add success.
I0320 08:23:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:23:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:23:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 08:23:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:23:14.456587  543705 disk_worker.go:494] system disk:vda1
I0320 08:23:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:23:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:23:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:23:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:23:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:23:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:23:23.409795  543705 memory.go:184] no items to output this cycle
I0320 08:23:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 08:23:23.448411  543705 disk_info.go:125] begin check local disk info of client
I0320 08:23:23.450806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:23:23.450811  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3700 0xc0002b3740]
E0320 08:23:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:23:33.409792  543705 memory.go:184] no items to output this cycle
I0320 08:23:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 08:23:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:23:43.409819  543705 memory.go:191] Add success.
I0320 08:23:43.409822  543705 cpu.go:282] Add success.
I0320 08:23:43.419968  543705 net.go:648] Add success.
I0320 08:23:43.422753  543705 net.go:770] primary dev: ETH0
I0320 08:23:43.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:23:43.422783  543705 net.go:698] Add success.
I0320 08:23:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:23:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:23:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:23:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:23:53.409783  543705 memory.go:184] no items to output this cycle
I0320 08:23:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 08:24:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:24:03.409768  543705 memory.go:184] no items to output this cycle
I0320 08:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 08:24:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:24:13.409805  543705 memory.go:191] Add success.
I0320 08:24:13.409808  543705 cpu.go:282] Add success.
W0320 08:24:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:24:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:24:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:24:13.420111  543705 net.go:648] Add success.
I0320 08:24:13.423224  543705 net.go:770] primary dev: ETH0
I0320 08:24:13.423239  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:24:13.423250  543705 net.go:698] Add success.
I0320 08:24:13.468525  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bce40aad-b02d-4403-9a7f-cdaced317a59","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:24:13.468574  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:24:14.454063  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:24:14.454254  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:24:14.454265  543705 disk_worker.go:708] disk space is not compliant
W0320 08:24:14.454268  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:24:14.455623  543705 disk_worker.go:494] system disk:vda1
I0320 08:24:14.455666  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:24:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:24:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:24:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:24:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:24:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:24:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:24:23.409797  543705 memory.go:184] no items to output this cycle
I0320 08:24:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 08:24:23.451430  543705 disk_info.go:125] begin check local disk info of client
I0320 08:24:23.453936  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:24:23.453942  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb880 0xc0001fb8c0]
E0320 08:24:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:24:33.409802  543705 memory.go:184] no items to output this cycle
I0320 08:24:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 08:24:38.325737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:24:38.325744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:24:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:24:43.410797  543705 memory.go:191] Add success.
I0320 08:24:43.409807  543705 cpu.go:282] Add success.
I0320 08:24:43.420559  543705 net.go:648] Add success.
I0320 08:24:43.423570  543705 net.go:770] primary dev: ETH0
I0320 08:24:43.423583  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:24:43.423596  543705 net.go:698] Add success.
I0320 08:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:24:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:24:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:24:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:24:53.409772  543705 memory.go:184] no items to output this cycle
I0320 08:24:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 08:25:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:25:03.409766  543705 memory.go:184] no items to output this cycle
I0320 08:25:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 08:25:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:25:13.409818  543705 memory.go:191] Add success.
I0320 08:25:13.409824  543705 cpu.go:282] Add success.
W0320 08:25:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:25:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:25:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:25:13.420324  543705 net.go:648] Add success.
I0320 08:25:13.423202  543705 net.go:770] primary dev: ETH0
I0320 08:25:13.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:25:13.423232  543705 net.go:698] Add success.
I0320 08:25:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:25:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:25:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 08:25:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:25:14.456571  543705 disk_worker.go:494] system disk:vda1
I0320 08:25:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:25:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:25:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:25:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:25:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:25:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:25:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:25:23.409764  543705 memory.go:184] no items to output this cycle
I0320 08:25:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 08:25:23.454469  543705 disk_info.go:125] begin check local disk info of client
I0320 08:25:23.456844  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:25:23.456849  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fc180 0xc0004fc1c0]
E0320 08:25:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:25:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 08:25:33.409793  543705 memory.go:184] no items to output this cycle
E0320 08:25:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:25:43.409780  543705 memory.go:191] Add success.
I0320 08:25:43.409796  543705 cpu.go:282] Add success.
I0320 08:25:43.419897  543705 net.go:648] Add success.
I0320 08:25:43.420849  543705 net.go:770] primary dev: ETH0
I0320 08:25:43.420864  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:25:43.420879  543705 net.go:698] Add success.
I0320 08:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:25:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:25:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:25:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:25:53.409789  543705 memory.go:184] no items to output this cycle
I0320 08:25:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 08:26:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:26:03.409778  543705 memory.go:184] no items to output this cycle
I0320 08:26:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 08:26:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:26:13.409799  543705 memory.go:191] Add success.
I0320 08:26:13.409800  543705 cpu.go:282] Add success.
W0320 08:26:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:26:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:26:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:26:13.420194  543705 net.go:648] Add success.
I0320 08:26:13.422887  543705 net.go:770] primary dev: ETH0
I0320 08:26:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:26:13.422916  543705 net.go:698] Add success.
I0320 08:26:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:26:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:26:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 08:26:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:26:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 08:26:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:26:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:26:16.458163  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:26:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:26:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:26:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 08:26:23.409792  543705 memory.go:184] no items to output this cycle
I0320 08:26:23.457483  543705 disk_info.go:125] begin check local disk info of client
I0320 08:26:23.459916  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:26:23.459921  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327040 0xc000327080]
E0320 08:26:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:26:33.409812  543705 memory.go:184] no items to output this cycle
I0320 08:26:33.409826  543705 cpu.go:275] no items to output this cycle
E0320 08:26:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:26:43.409784  543705 memory.go:191] Add success.
I0320 08:26:43.409795  543705 cpu.go:282] Add success.
I0320 08:26:43.419858  543705 net.go:648] Add success.
I0320 08:26:43.422628  543705 net.go:770] primary dev: ETH0
I0320 08:26:43.422640  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:26:43.422652  543705 net.go:698] Add success.
I0320 08:26:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:26:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:26:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:26:53.409766  543705 memory.go:184] no items to output this cycle
I0320 08:26:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 08:27:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:27:03.409796  543705 memory.go:184] no items to output this cycle
I0320 08:27:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:27:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:27:13.409781  543705 memory.go:191] Add success.
W0320 08:27:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 08:27:13.409810  543705 cpu.go:282] Add success.
W0320 08:27:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:27:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:27:13.420064  543705 net.go:648] Add success.
I0320 08:27:13.422787  543705 net.go:770] primary dev: ETH0
I0320 08:27:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:27:13.422822  543705 net.go:698] Add success.
I0320 08:27:13.429434  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 08:27:13.453610  543705 event_worker.go:152] Polling the log file for events...
I0320 08:27:13.463832  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9e6729f9-ec79-4f51-ad82-99ef5fd342bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:27:13.463868  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 08:27:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:27:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 08:27:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:27:14.455806  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:27:14.455813  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:27:14.455817  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:27:14.456935  543705 disk_worker.go:494] system disk:vda1
I0320 08:27:14.456979  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:27:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:27:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:27:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:27:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:27:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:27:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:27:23.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:27:23.409878  543705 memory.go:184] no items to output this cycle
I0320 08:27:23.409923  543705 cpu.go:275] no items to output this cycle
I0320 08:27:23.460911  543705 disk_info.go:125] begin check local disk info of client
I0320 08:27:23.463353  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:27:23.463358  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b1c0 0xc00048b200]
E0320 08:27:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:27:33.409783  543705 memory.go:184] no items to output this cycle
I0320 08:27:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 08:27:38.329737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:27:38.329744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:27:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:27:43.410516  543705 memory.go:191] Add success.
I0320 08:27:43.409803  543705 cpu.go:282] Add success.
I0320 08:27:43.420216  543705 net.go:648] Add success.
I0320 08:27:43.422752  543705 net.go:770] primary dev: ETH0
I0320 08:27:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:27:43.422778  543705 net.go:698] Add success.
I0320 08:27:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:27:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:27:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:27:53.410273  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:27:53.410294  543705 memory.go:184] no items to output this cycle
I0320 08:27:53.410297  543705 cpu.go:275] no items to output this cycle
E0320 08:28:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:28:03.409766  543705 memory.go:184] no items to output this cycle
I0320 08:28:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 08:28:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:28:13.409830  543705 memory.go:191] Add success.
I0320 08:28:13.409833  543705 cpu.go:282] Add success.
W0320 08:28:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:28:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:28:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:28:13.420195  543705 net.go:648] Add success.
I0320 08:28:13.423213  543705 net.go:770] primary dev: ETH0
I0320 08:28:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:28:13.423238  543705 net.go:698] Add success.
I0320 08:28:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:28:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:28:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 08:28:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:28:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 08:28:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:28:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:28:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:28:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:28:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:28:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:28:23.409775  543705 memory.go:184] no items to output this cycle
I0320 08:28:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 08:28:23.463428  543705 disk_info.go:125] begin check local disk info of client
I0320 08:28:23.465902  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:28:23.465908  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2c80 0xc0004b2cc0]
E0320 08:28:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:28:33.409884  543705 memory.go:184] no items to output this cycle
I0320 08:28:33.410010  543705 cpu.go:275] no items to output this cycle
E0320 08:28:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:28:43.409801  543705 cpu.go:282] Add success.
I0320 08:28:43.409806  543705 memory.go:191] Add success.
I0320 08:28:43.419880  543705 net.go:648] Add success.
I0320 08:28:43.422777  543705 net.go:770] primary dev: ETH0
I0320 08:28:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:28:43.422801  543705 net.go:698] Add success.
I0320 08:28:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:28:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:28:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:28:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:28:53.409824  543705 memory.go:184] no items to output this cycle
I0320 08:28:53.409839  543705 cpu.go:275] no items to output this cycle
E0320 08:29:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:29:03.409781  543705 cpu.go:275] no items to output this cycle
I0320 08:29:03.409784  543705 memory.go:184] no items to output this cycle
E0320 08:29:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:29:13.409809  543705 memory.go:191] Add success.
I0320 08:29:13.409808  543705 cpu.go:282] Add success.
W0320 08:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:29:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:29:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:29:13.420124  543705 net.go:648] Add success.
I0320 08:29:13.422889  543705 net.go:770] primary dev: ETH0
I0320 08:29:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:29:13.422915  543705 net.go:698] Add success.
I0320 08:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:29:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:29:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 08:29:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:29:14.456511  543705 disk_worker.go:494] system disk:vda1
I0320 08:29:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:29:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:29:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:29:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:29:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:29:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:29:23.409779  543705 memory.go:184] no items to output this cycle
I0320 08:29:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 08:29:23.466036  543705 disk_info.go:125] begin check local disk info of client
I0320 08:29:23.468516  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:29:23.468523  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536000 0xc000536040]
E0320 08:29:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:29:33.409815  543705 memory.go:184] no items to output this cycle
I0320 08:29:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 08:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:29:43.409786  543705 memory.go:191] Add success.
I0320 08:29:43.409820  543705 cpu.go:282] Add success.
I0320 08:29:43.419858  543705 net.go:648] Add success.
I0320 08:29:43.422667  543705 net.go:770] primary dev: ETH0
I0320 08:29:43.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:29:43.422694  543705 net.go:698] Add success.
I0320 08:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:29:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:29:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:29:53.409769  543705 memory.go:184] no items to output this cycle
I0320 08:29:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 08:30:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:30:03.409783  543705 memory.go:184] no items to output this cycle
I0320 08:30:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 08:30:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:30:13.409823  543705 memory.go:191] Add success.
I0320 08:30:13.409828  543705 cpu.go:282] Add success.
W0320 08:30:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:30:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:30:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:30:13.420121  543705 net.go:648] Add success.
I0320 08:30:13.423433  543705 net.go:770] primary dev: ETH0
I0320 08:30:13.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:30:13.423457  543705 net.go:698] Add success.
I0320 08:30:13.468048  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0f67d5fe-da0e-46ef-97e2-8921a0203b9f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:30:13.468083  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:30:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:30:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:30:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 08:30:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:30:14.456525  543705 disk_worker.go:494] system disk:vda1
I0320 08:30:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:30:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:30:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:30:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:30:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:30:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:30:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:30:23.409762  543705 memory.go:184] no items to output this cycle
I0320 08:30:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 08:30:23.469537  543705 disk_info.go:125] begin check local disk info of client
I0320 08:30:23.471943  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:30:23.471949  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327200 0xc000327240]
E0320 08:30:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:30:33.409799  543705 memory.go:184] no items to output this cycle
I0320 08:30:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 08:30:38.333730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:30:38.333737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:30:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:30:43.410868  543705 memory.go:191] Add success.
I0320 08:30:43.409795  543705 cpu.go:282] Add success.
I0320 08:30:43.420578  543705 net.go:648] Add success.
I0320 08:30:43.423920  543705 net.go:770] primary dev: ETH0
I0320 08:30:43.423933  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:30:43.423945  543705 net.go:698] Add success.
I0320 08:30:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:30:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:30:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:30:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:30:53.409776  543705 memory.go:184] no items to output this cycle
I0320 08:30:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 08:31:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:31:03.409776  543705 memory.go:184] no items to output this cycle
I0320 08:31:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 08:31:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:31:13.409794  543705 memory.go:191] Add success.
I0320 08:31:13.409814  543705 cpu.go:282] Add success.
W0320 08:31:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:31:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:31:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:31:13.420143  543705 net.go:648] Add success.
I0320 08:31:13.423392  543705 net.go:770] primary dev: ETH0
I0320 08:31:13.423405  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:31:13.423416  543705 net.go:698] Add success.
I0320 08:31:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:31:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:31:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 08:31:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:31:14.456613  543705 disk_worker.go:494] system disk:vda1
I0320 08:31:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:31:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:31:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:31:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:31:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:31:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:31:23.409794  543705 memory.go:184] no items to output this cycle
I0320 08:31:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 08:31:23.472541  543705 disk_info.go:125] begin check local disk info of client
I0320 08:31:23.474956  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:31:23.474961  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b4c0 0xc00007b500]
E0320 08:31:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:31:33.409776  543705 memory.go:184] no items to output this cycle
I0320 08:31:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 08:31:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:31:43.409811  543705 memory.go:191] Add success.
I0320 08:31:43.409822  543705 cpu.go:282] Add success.
I0320 08:31:43.419871  543705 net.go:648] Add success.
I0320 08:31:43.422593  543705 net.go:770] primary dev: ETH0
I0320 08:31:43.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:31:43.422623  543705 net.go:698] Add success.
I0320 08:31:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:31:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:31:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:31:53.409797  543705 memory.go:184] no items to output this cycle
I0320 08:31:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 08:32:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:32:03.409774  543705 memory.go:184] no items to output this cycle
I0320 08:32:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 08:32:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:32:13.409821  543705 memory.go:191] Add success.
I0320 08:32:13.409830  543705 cpu.go:282] Add success.
W0320 08:32:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:32:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:32:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:32:13.420215  543705 net.go:648] Add success.
I0320 08:32:13.423674  543705 net.go:770] primary dev: ETH0
I0320 08:32:13.423687  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:32:13.423704  543705 net.go:698] Add success.
W0320 08:32:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:32:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 08:32:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:32:14.456044  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:32:14.456054  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:32:14.456060  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:32:14.456868  543705 disk_worker.go:494] system disk:vda1
I0320 08:32:14.456908  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:32:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:32:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:32:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:32:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:32:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:32:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:32:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:32:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:32:23.409776  543705 memory.go:184] no items to output this cycle
I0320 08:32:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 08:32:23.475529  543705 disk_info.go:125] begin check local disk info of client
I0320 08:32:23.477987  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:32:23.477993  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4240 0xc0000c4280]
E0320 08:32:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:32:33.409804  543705 memory.go:184] no items to output this cycle
I0320 08:32:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 08:32:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:32:43.409785  543705 memory.go:191] Add success.
I0320 08:32:43.409815  543705 cpu.go:282] Add success.
I0320 08:32:43.419873  543705 net.go:648] Add success.
I0320 08:32:43.422551  543705 net.go:770] primary dev: ETH0
I0320 08:32:43.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:32:43.422576  543705 net.go:698] Add success.
I0320 08:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:32:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:32:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:32:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:32:53.409782  543705 memory.go:184] no items to output this cycle
I0320 08:32:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 08:33:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:33:03.409793  543705 memory.go:184] no items to output this cycle
I0320 08:33:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 08:33:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:33:13.409803  543705 memory.go:191] Add success.
I0320 08:33:13.409803  543705 cpu.go:282] Add success.
W0320 08:33:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:33:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:33:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:33:13.420216  543705 net.go:648] Add success.
I0320 08:33:13.422990  543705 net.go:770] primary dev: ETH0
I0320 08:33:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:33:13.423019  543705 net.go:698] Add success.
I0320 08:33:13.465780  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db7c197f-d7f6-48ce-b934-127d5160d1f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:33:13.465822  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:33:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:33:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 08:33:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:33:14.456523  543705 disk_worker.go:494] system disk:vda1
I0320 08:33:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:33:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:33:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:33:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:33:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:33:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:33:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:33:23.409769  543705 memory.go:184] no items to output this cycle
I0320 08:33:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 08:33:23.478631  543705 disk_info.go:125] begin check local disk info of client
I0320 08:33:23.481046  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:33:23.481051  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
E0320 08:33:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:33:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 08:33:33.409794  543705 memory.go:184] no items to output this cycle
I0320 08:33:38.337730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:33:38.337736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:33:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:33:43.410560  543705 memory.go:191] Add success.
I0320 08:33:43.409806  543705 cpu.go:282] Add success.
I0320 08:33:43.420260  543705 net.go:648] Add success.
I0320 08:33:43.422937  543705 net.go:770] primary dev: ETH0
I0320 08:33:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:33:43.422963  543705 net.go:698] Add success.
I0320 08:33:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:33:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:33:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:33:53.409795  543705 memory.go:184] no items to output this cycle
I0320 08:33:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 08:34:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:34:03.409772  543705 memory.go:184] no items to output this cycle
I0320 08:34:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 08:34:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:34:13.409802  543705 memory.go:191] Add success.
I0320 08:34:13.409816  543705 cpu.go:282] Add success.
W0320 08:34:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:34:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:34:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:34:13.419714  543705 net.go:648] Add success.
I0320 08:34:13.422661  543705 net.go:770] primary dev: ETH0
I0320 08:34:13.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:34:13.422697  543705 net.go:698] Add success.
I0320 08:34:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:34:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:34:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 08:34:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:34:14.456521  543705 disk_worker.go:494] system disk:vda1
I0320 08:34:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:34:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:34:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:34:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:34:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:34:23.409772  543705 cpu.go:275] no items to output this cycle
I0320 08:34:23.409783  543705 memory.go:184] no items to output this cycle
I0320 08:34:23.481569  543705 disk_info.go:125] begin check local disk info of client
I0320 08:34:23.484030  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:34:23.484035  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c51c0 0xc0000c5200]
E0320 08:34:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:34:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 08:34:33.409805  543705 memory.go:184] no items to output this cycle
E0320 08:34:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:34:43.409793  543705 memory.go:191] Add success.
I0320 08:34:43.409809  543705 cpu.go:282] Add success.
I0320 08:34:43.419973  543705 net.go:648] Add success.
I0320 08:34:43.422788  543705 net.go:770] primary dev: ETH0
I0320 08:34:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:34:43.422826  543705 net.go:698] Add success.
I0320 08:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:34:53.410271  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:34:53.410291  543705 memory.go:184] no items to output this cycle
I0320 08:34:53.410304  543705 cpu.go:275] no items to output this cycle
E0320 08:35:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:35:03.409766  543705 memory.go:184] no items to output this cycle
I0320 08:35:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 08:35:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:35:13.409820  543705 memory.go:191] Add success.
I0320 08:35:13.409827  543705 cpu.go:282] Add success.
W0320 08:35:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:35:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:35:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:35:13.420223  543705 net.go:648] Add success.
I0320 08:35:13.422816  543705 net.go:770] primary dev: ETH0
I0320 08:35:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:35:13.422849  543705 net.go:698] Add success.
I0320 08:35:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:35:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:35:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0320 08:35:14.455146  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:35:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 08:35:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:35:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:35:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:35:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:35:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:35:23.409765  543705 memory.go:184] no items to output this cycle
I0320 08:35:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 08:35:23.484648  543705 disk_info.go:125] begin check local disk info of client
I0320 08:35:23.487095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:35:23.487101  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab1c0 0xc0001ab200]
E0320 08:35:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:35:33.409776  543705 memory.go:184] no items to output this cycle
I0320 08:35:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 08:35:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:35:43.409790  543705 memory.go:191] Add success.
I0320 08:35:43.409791  543705 cpu.go:282] Add success.
I0320 08:35:43.419871  543705 net.go:648] Add success.
I0320 08:35:43.422542  543705 net.go:770] primary dev: ETH0
I0320 08:35:43.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:35:43.422572  543705 net.go:698] Add success.
I0320 08:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:35:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:35:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:35:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:35:53.409774  543705 memory.go:184] no items to output this cycle
I0320 08:35:53.409777  543705 cpu.go:275] no items to output this cycle
E0320 08:36:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:36:03.409769  543705 memory.go:184] no items to output this cycle
I0320 08:36:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 08:36:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:36:13.409822  543705 memory.go:191] Add success.
I0320 08:36:13.409832  543705 cpu.go:282] Add success.
W0320 08:36:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:36:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:36:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:36:13.420542  543705 net.go:648] Add success.
I0320 08:36:13.423167  543705 net.go:770] primary dev: ETH0
I0320 08:36:13.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:36:13.423190  543705 net.go:698] Add success.
I0320 08:36:13.464119  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1870935d-b0f0-4291-895d-f039509879ea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:36:13.464154  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:36:14.455356  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:36:14.455369  543705 disk_worker.go:708] disk space is not compliant
W0320 08:36:14.455373  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:36:14.457025  543705 disk_worker.go:494] system disk:vda1
I0320 08:36:14.457054  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:36:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:36:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:36:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:36:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:36:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:36:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 08:36:23.409777  543705 memory.go:184] no items to output this cycle
I0320 08:36:23.487604  543705 disk_info.go:125] begin check local disk info of client
I0320 08:36:23.490017  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:36:23.490023  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faa00 0xc0001faa40]
E0320 08:36:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:36:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 08:36:33.409819  543705 memory.go:184] no items to output this cycle
I0320 08:36:38.341736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:36:38.341743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:36:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:36:43.410744  543705 memory.go:191] Add success.
I0320 08:36:43.409798  543705 cpu.go:282] Add success.
I0320 08:36:43.420458  543705 net.go:648] Add success.
I0320 08:36:43.423542  543705 net.go:770] primary dev: ETH0
I0320 08:36:43.423556  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:36:43.423569  543705 net.go:698] Add success.
I0320 08:36:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:36:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:36:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:36:53.409776  543705 cpu.go:275] no items to output this cycle
I0320 08:36:53.409779  543705 memory.go:184] no items to output this cycle
E0320 08:37:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:37:03.409774  543705 memory.go:184] no items to output this cycle
I0320 08:37:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 08:37:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:37:13.409801  543705 memory.go:191] Add success.
I0320 08:37:13.409803  543705 cpu.go:282] Add success.
W0320 08:37:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:37:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:37:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:37:13.420050  543705 net.go:648] Add success.
I0320 08:37:13.422674  543705 net.go:770] primary dev: ETH0
I0320 08:37:13.422687  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:37:13.422700  543705 net.go:698] Add success.
I0320 08:37:13.453317  543705 event_worker.go:152] Polling the log file for events...
W0320 08:37:14.455295  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:37:14.455309  543705 disk_worker.go:708] disk space is not compliant
W0320 08:37:14.455313  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:37:14.457391  543705 disk_worker.go:494] system disk:vda1
I0320 08:37:14.457432  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:37:14.457611  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:37:14.457618  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:37:14.457622  543705 custom_config.go:64] query custom config with name: gpu
E0320 08:37:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:37:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:37:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:37:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:37:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:37:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:37:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:37:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:37:23.409765  543705 memory.go:184] no items to output this cycle
I0320 08:37:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 08:37:23.490631  543705 disk_info.go:125] begin check local disk info of client
I0320 08:37:23.493019  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:37:23.493024  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492840 0xc000492880]
E0320 08:37:33.410694  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:37:33.410711  543705 memory.go:184] no items to output this cycle
I0320 08:37:33.410718  543705 cpu.go:275] no items to output this cycle
E0320 08:37:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:37:43.409788  543705 cpu.go:282] Add success.
I0320 08:37:43.409793  543705 memory.go:191] Add success.
I0320 08:37:43.419841  543705 net.go:648] Add success.
I0320 08:37:43.422507  543705 net.go:770] primary dev: ETH0
I0320 08:37:43.422520  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:37:43.422533  543705 net.go:698] Add success.
I0320 08:37:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:37:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:37:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:37:53.409783  543705 memory.go:184] no items to output this cycle
I0320 08:37:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 08:38:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:38:03.409802  543705 memory.go:184] no items to output this cycle
I0320 08:38:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 08:38:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:38:13.409804  543705 memory.go:191] Add success.
I0320 08:38:13.409807  543705 cpu.go:282] Add success.
W0320 08:38:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:38:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:38:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:38:13.420258  543705 net.go:648] Add success.
I0320 08:38:13.423419  543705 net.go:770] primary dev: ETH0
I0320 08:38:13.423435  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:38:13.423449  543705 net.go:698] Add success.
I0320 08:38:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:38:14.455291  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:38:14.455487  543705 disk_worker.go:708] disk space is not compliant
W0320 08:38:14.455491  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:38:14.457234  543705 disk_worker.go:494] system disk:vda1
I0320 08:38:14.457265  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:38:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:38:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:38:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:38:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:38:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:38:23.409769  543705 memory.go:184] no items to output this cycle
I0320 08:38:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 08:38:23.493663  543705 disk_info.go:125] begin check local disk info of client
I0320 08:38:23.496060  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:38:23.496065  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad40 0xc0001aad80]
E0320 08:38:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:38:33.409779  543705 memory.go:184] no items to output this cycle
I0320 08:38:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 08:38:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:38:43.409795  543705 cpu.go:282] Add success.
I0320 08:38:43.409803  543705 memory.go:191] Add success.
I0320 08:38:43.420031  543705 net.go:648] Add success.
I0320 08:38:43.423025  543705 net.go:770] primary dev: ETH0
I0320 08:38:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:38:43.423054  543705 net.go:698] Add success.
I0320 08:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:38:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:38:53.410334  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:38:53.410349  543705 memory.go:184] no items to output this cycle
I0320 08:38:53.410366  543705 cpu.go:275] no items to output this cycle
E0320 08:39:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:39:03.409811  543705 memory.go:184] no items to output this cycle
I0320 08:39:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 08:39:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:39:13.409797  543705 memory.go:191] Add success.
I0320 08:39:13.409818  543705 cpu.go:282] Add success.
W0320 08:39:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:39:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:39:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:39:13.420135  543705 net.go:648] Add success.
I0320 08:39:13.423160  543705 net.go:770] primary dev: ETH0
I0320 08:39:13.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:39:13.423185  543705 net.go:698] Add success.
I0320 08:39:13.470124  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"082c9658-5c53-4cfb-a855-31823abe110b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:39:13.470156  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:39:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:39:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:39:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 08:39:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:39:14.456542  543705 disk_worker.go:494] system disk:vda1
I0320 08:39:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:39:15.455612  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:39:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:39:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:39:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:39:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:39:23.409786  543705 memory.go:184] no items to output this cycle
I0320 08:39:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 08:39:23.496675  543705 disk_info.go:125] begin check local disk info of client
I0320 08:39:23.499085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:39:23.499090  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab4c0 0xc0001ab500]
E0320 08:39:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:39:33.409784  543705 memory.go:184] no items to output this cycle
I0320 08:39:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 08:39:38.345743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:39:38.345750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:39:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:39:43.410672  543705 memory.go:191] Add success.
I0320 08:39:43.409835  543705 cpu.go:282] Add success.
I0320 08:39:43.420488  543705 net.go:648] Add success.
I0320 08:39:43.424660  543705 net.go:770] primary dev: ETH0
I0320 08:39:43.424672  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:39:43.424686  543705 net.go:698] Add success.
I0320 08:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:39:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:39:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:39:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:39:53.409779  543705 memory.go:184] no items to output this cycle
I0320 08:39:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 08:40:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:40:03.409806  543705 memory.go:184] no items to output this cycle
I0320 08:40:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 08:40:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:40:13.409835  543705 memory.go:191] Add success.
I0320 08:40:13.409843  543705 cpu.go:282] Add success.
W0320 08:40:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:40:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:40:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:40:13.420415  543705 net.go:648] Add success.
I0320 08:40:13.423231  543705 net.go:770] primary dev: ETH0
I0320 08:40:13.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:40:13.423254  543705 net.go:698] Add success.
I0320 08:40:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:40:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:40:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 08:40:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:40:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 08:40:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:40:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:40:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:40:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:40:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:40:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:40:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:40:23.409801  543705 memory.go:184] no items to output this cycle
I0320 08:40:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 08:40:23.499664  543705 disk_info.go:125] begin check local disk info of client
I0320 08:40:23.502150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:40:23.502156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b9c0 0xc00007ba00]
E0320 08:40:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:40:33.409764  543705 memory.go:184] no items to output this cycle
I0320 08:40:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 08:40:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:40:43.409806  543705 memory.go:191] Add success.
I0320 08:40:43.409821  543705 cpu.go:282] Add success.
I0320 08:40:43.419898  543705 net.go:648] Add success.
I0320 08:40:43.422849  543705 net.go:770] primary dev: ETH0
I0320 08:40:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:40:43.422876  543705 net.go:698] Add success.
I0320 08:40:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:40:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:40:53.409763  543705 memory.go:184] no items to output this cycle
I0320 08:40:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 08:41:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:41:03.409761  543705 memory.go:184] no items to output this cycle
I0320 08:41:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 08:41:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:41:13.409800  543705 memory.go:191] Add success.
I0320 08:41:13.409832  543705 cpu.go:282] Add success.
W0320 08:41:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:41:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:41:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:41:13.420172  543705 net.go:648] Add success.
I0320 08:41:13.423166  543705 net.go:770] primary dev: ETH0
I0320 08:41:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:41:13.423194  543705 net.go:698] Add success.
I0320 08:41:14.453945  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:41:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:41:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 08:41:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:41:14.456620  543705 disk_worker.go:494] system disk:vda1
I0320 08:41:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:41:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:41:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:41:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:41:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:41:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:41:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:41:23.409783  543705 memory.go:184] no items to output this cycle
I0320 08:41:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 08:41:23.502705  543705 disk_info.go:125] begin check local disk info of client
I0320 08:41:23.505090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:41:23.505096  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa680 0xc0001aa6c0]
E0320 08:41:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:41:33.409780  543705 memory.go:184] no items to output this cycle
I0320 08:41:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 08:41:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:41:43.409783  543705 memory.go:191] Add success.
I0320 08:41:43.409803  543705 cpu.go:282] Add success.
I0320 08:41:43.419901  543705 net.go:648] Add success.
I0320 08:41:43.422813  543705 net.go:770] primary dev: ETH0
I0320 08:41:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:41:43.422840  543705 net.go:698] Add success.
I0320 08:41:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:41:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:41:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:41:53.409776  543705 memory.go:184] no items to output this cycle
I0320 08:41:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 08:42:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:42:03.409769  543705 memory.go:184] no items to output this cycle
I0320 08:42:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 08:42:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:42:13.409823  543705 memory.go:191] Add success.
I0320 08:42:13.409830  543705 cpu.go:282] Add success.
W0320 08:42:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:42:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:42:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:42:13.420779  543705 net.go:648] Add success.
I0320 08:42:13.423768  543705 net.go:770] primary dev: ETH0
I0320 08:42:13.423781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:42:13.423792  543705 net.go:698] Add success.
I0320 08:42:13.467684  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab6a0dcf-e6b5-43fd-8abd-8d7b52c8dcfa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:42:13.467715  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 08:42:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:42:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 08:42:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:42:14.455987  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:42:14.455996  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:42:14.456002  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:42:14.456710  543705 disk_worker.go:494] system disk:vda1
I0320 08:42:14.456740  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:42:15.456777  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:42:15.456785  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 08:42:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:42:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:42:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:42:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:42:16.472299  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:42:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:42:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 08:42:23.409780  543705 memory.go:184] no items to output this cycle
I0320 08:42:23.505667  543705 disk_info.go:125] begin check local disk info of client
I0320 08:42:23.508075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:42:23.508080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa140 0xc0001fa180]
E0320 08:42:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:42:33.409774  543705 memory.go:184] no items to output this cycle
I0320 08:42:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 08:42:38.349736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:42:38.349744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:42:43.410739  543705 memory.go:191] Add success.
I0320 08:42:43.409798  543705 cpu.go:282] Add success.
I0320 08:42:43.420467  543705 net.go:648] Add success.
I0320 08:42:43.423378  543705 net.go:770] primary dev: ETH0
I0320 08:42:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:42:43.423409  543705 net.go:698] Add success.
I0320 08:42:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:42:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:42:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:42:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:42:53.409799  543705 memory.go:184] no items to output this cycle
I0320 08:42:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 08:43:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:43:03.409763  543705 memory.go:184] no items to output this cycle
I0320 08:43:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 08:43:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:43:13.409805  543705 memory.go:191] Add success.
I0320 08:43:13.409822  543705 cpu.go:282] Add success.
W0320 08:43:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:43:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:43:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:43:13.420426  543705 net.go:648] Add success.
I0320 08:43:13.423004  543705 net.go:770] primary dev: ETH0
I0320 08:43:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:43:13.423028  543705 net.go:698] Add success.
I0320 08:43:14.453955  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:43:14.455225  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:43:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0320 08:43:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:43:14.456646  543705 disk_worker.go:494] system disk:vda1
I0320 08:43:14.456676  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:43:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:43:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:43:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:43:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:43:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:43:23.409765  543705 memory.go:184] no items to output this cycle
I0320 08:43:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 08:43:23.508731  543705 disk_info.go:125] begin check local disk info of client
I0320 08:43:23.511163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:43:23.511168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1800 0xc0003f1840]
E0320 08:43:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:43:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 08:43:33.409800  543705 memory.go:184] no items to output this cycle
E0320 08:43:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:43:43.409787  543705 memory.go:191] Add success.
I0320 08:43:43.409805  543705 cpu.go:282] Add success.
I0320 08:43:43.419865  543705 net.go:648] Add success.
I0320 08:43:43.422794  543705 net.go:770] primary dev: ETH0
I0320 08:43:43.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:43:43.422822  543705 net.go:698] Add success.
I0320 08:43:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:43:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:43:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:43:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:43:53.409780  543705 cpu.go:275] no items to output this cycle
I0320 08:43:53.409783  543705 memory.go:184] no items to output this cycle
E0320 08:44:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:44:03.409782  543705 memory.go:184] no items to output this cycle
I0320 08:44:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 08:44:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:44:13.409802  543705 memory.go:191] Add success.
I0320 08:44:13.409822  543705 cpu.go:282] Add success.
W0320 08:44:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:44:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:44:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:44:13.420433  543705 net.go:648] Add success.
I0320 08:44:13.423305  543705 net.go:770] primary dev: ETH0
I0320 08:44:13.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:44:13.423334  543705 net.go:698] Add success.
I0320 08:44:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:44:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:44:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 08:44:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:44:14.456620  543705 disk_worker.go:494] system disk:vda1
I0320 08:44:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:44:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:44:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:44:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:44:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:44:23.409798  543705 memory.go:184] no items to output this cycle
I0320 08:44:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 08:44:23.511755  543705 disk_info.go:125] begin check local disk info of client
I0320 08:44:23.514298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:44:23.514303  543705 disk_info.go:196] parse disk info done, disk is : [0xc000546d80 0xc000546dc0]
E0320 08:44:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:44:33.409806  543705 memory.go:184] no items to output this cycle
I0320 08:44:33.409822  543705 cpu.go:275] no items to output this cycle
E0320 08:44:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:44:43.409821  543705 memory.go:191] Add success.
I0320 08:44:43.409825  543705 cpu.go:282] Add success.
I0320 08:44:43.419954  543705 net.go:648] Add success.
I0320 08:44:43.423469  543705 net.go:770] primary dev: ETH0
I0320 08:44:43.423483  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:44:43.423496  543705 net.go:698] Add success.
I0320 08:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:44:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:44:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:44:53.409799  543705 memory.go:184] no items to output this cycle
I0320 08:44:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 08:45:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:45:03.409802  543705 memory.go:184] no items to output this cycle
I0320 08:45:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 08:45:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:45:13.409807  543705 memory.go:191] Add success.
I0320 08:45:13.409811  543705 cpu.go:282] Add success.
W0320 08:45:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:45:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:45:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:45:13.420133  543705 net.go:648] Add success.
I0320 08:45:13.423048  543705 net.go:770] primary dev: ETH0
I0320 08:45:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:45:13.423073  543705 net.go:698] Add success.
I0320 08:45:13.494629  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"504558e1-c4f0-483c-a892-d5f37feda00d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:45:13.494661  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:45:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:45:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:45:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 08:45:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:45:14.456705  543705 disk_worker.go:494] system disk:vda1
I0320 08:45:14.456740  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:45:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:45:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:45:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:45:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:45:23.409801  543705 memory.go:184] no items to output this cycle
I0320 08:45:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 08:45:23.514706  543705 disk_info.go:125] begin check local disk info of client
I0320 08:45:23.517171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:45:23.517177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8480 0xc0003e84c0]
E0320 08:45:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:45:33.409777  543705 memory.go:184] no items to output this cycle
I0320 08:45:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 08:45:38.353737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:45:38.353744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:45:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:45:43.410522  543705 memory.go:191] Add success.
I0320 08:45:43.409917  543705 cpu.go:282] Add success.
I0320 08:45:43.419709  543705 net.go:648] Add success.
I0320 08:45:43.422166  543705 net.go:770] primary dev: ETH0
I0320 08:45:43.422178  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:45:43.422190  543705 net.go:698] Add success.
I0320 08:45:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:45:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:45:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:45:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:45:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 08:45:53.409790  543705 memory.go:184] no items to output this cycle
E0320 08:46:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:46:03.409804  543705 memory.go:184] no items to output this cycle
I0320 08:46:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 08:46:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:46:13.409795  543705 memory.go:191] Add success.
I0320 08:46:13.409816  543705 cpu.go:282] Add success.
W0320 08:46:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:46:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:46:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:46:13.420258  543705 net.go:648] Add success.
I0320 08:46:13.422766  543705 net.go:770] primary dev: ETH0
I0320 08:46:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:46:13.422792  543705 net.go:698] Add success.
I0320 08:46:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:46:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:46:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 08:46:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:46:14.456625  543705 disk_worker.go:494] system disk:vda1
I0320 08:46:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:46:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:46:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:46:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:46:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:46:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:46:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:46:23.409767  543705 memory.go:184] no items to output this cycle
I0320 08:46:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 08:46:23.517771  543705 disk_info.go:125] begin check local disk info of client
I0320 08:46:23.520157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:46:23.520162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004644c0 0xc000464500]
E0320 08:46:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:46:33.409773  543705 memory.go:184] no items to output this cycle
I0320 08:46:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 08:46:43.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:46:43.409875  543705 memory.go:191] Add success.
I0320 08:46:43.409958  543705 cpu.go:282] Add success.
I0320 08:46:43.419709  543705 net.go:648] Add success.
I0320 08:46:43.422286  543705 net.go:770] primary dev: ETH0
I0320 08:46:43.422299  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:46:43.422311  543705 net.go:698] Add success.
I0320 08:46:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:46:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:46:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:46:53.409762  543705 memory.go:184] no items to output this cycle
I0320 08:46:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 08:47:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:47:03.409797  543705 memory.go:184] no items to output this cycle
I0320 08:47:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 08:47:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:47:13.409805  543705 memory.go:191] Add success.
I0320 08:47:13.409807  543705 cpu.go:282] Add success.
W0320 08:47:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:47:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:47:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:47:13.420143  543705 net.go:648] Add success.
I0320 08:47:13.423205  543705 net.go:770] primary dev: ETH0
I0320 08:47:13.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:47:13.423229  543705 net.go:698] Add success.
I0320 08:47:13.453751  543705 event_worker.go:152] Polling the log file for events...
W0320 08:47:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:47:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 08:47:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:47:14.456774  543705 disk_worker.go:494] system disk:vda1
I0320 08:47:14.456813  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:47:14.457153  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:47:14.457161  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:47:14.457166  543705 custom_config.go:64] query custom config with name: gpu
E0320 08:47:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:47:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:47:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:47:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:47:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:47:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:47:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:47:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:47:23.409778  543705 memory.go:184] no items to output this cycle
I0320 08:47:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 08:47:23.520794  543705 disk_info.go:125] begin check local disk info of client
I0320 08:47:23.523171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:47:23.523176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349ec0 0xc000349f00]
E0320 08:47:33.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:47:33.409917  543705 cpu.go:275] no items to output this cycle
I0320 08:47:33.409926  543705 memory.go:184] no items to output this cycle
E0320 08:47:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:47:43.409782  543705 memory.go:191] Add success.
I0320 08:47:43.409811  543705 cpu.go:282] Add success.
I0320 08:47:43.419885  543705 net.go:648] Add success.
I0320 08:47:43.423166  543705 net.go:770] primary dev: ETH0
I0320 08:47:43.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:47:43.423191  543705 net.go:698] Add success.
I0320 08:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:47:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:47:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:47:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:47:53.409798  543705 memory.go:184] no items to output this cycle
I0320 08:47:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 08:48:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:48:03.409776  543705 memory.go:184] no items to output this cycle
I0320 08:48:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 08:48:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:48:13.409808  543705 memory.go:191] Add success.
I0320 08:48:13.409810  543705 cpu.go:282] Add success.
W0320 08:48:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:48:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:48:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:48:13.420356  543705 net.go:648] Add success.
I0320 08:48:13.422833  543705 net.go:770] primary dev: ETH0
I0320 08:48:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:48:13.422859  543705 net.go:698] Add success.
I0320 08:48:13.475430  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f90c77c9-409e-4416-8dfa-be1b36dc4814","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:48:13.475463  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:48:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:48:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:48:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0320 08:48:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:48:14.456645  543705 disk_worker.go:494] system disk:vda1
I0320 08:48:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:48:15.454995  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:48:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:48:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:48:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:48:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:48:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:48:23.409781  543705 memory.go:184] no items to output this cycle
I0320 08:48:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 08:48:23.523773  543705 disk_info.go:125] begin check local disk info of client
I0320 08:48:23.526207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:48:23.526213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000497b00 0xc000497b40]
E0320 08:48:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:48:33.409774  543705 memory.go:184] no items to output this cycle
I0320 08:48:33.409853  543705 cpu.go:275] no items to output this cycle
I0320 08:48:38.357740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:48:38.357747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:48:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:48:43.410519  543705 memory.go:191] Add success.
I0320 08:48:43.409802  543705 cpu.go:282] Add success.
I0320 08:48:43.420224  543705 net.go:648] Add success.
I0320 08:48:43.422657  543705 net.go:770] primary dev: ETH0
I0320 08:48:43.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:48:43.422684  543705 net.go:698] Add success.
I0320 08:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:48:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:48:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:48:53.409797  543705 memory.go:184] no items to output this cycle
I0320 08:48:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:49:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:49:03.409777  543705 memory.go:184] no items to output this cycle
I0320 08:49:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 08:49:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:49:13.409796  543705 memory.go:191] Add success.
W0320 08:49:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 08:49:13.409827  543705 cpu.go:282] Add success.
W0320 08:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:49:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:49:13.420379  543705 net.go:648] Add success.
I0320 08:49:13.423036  543705 net.go:770] primary dev: ETH0
I0320 08:49:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:49:13.423063  543705 net.go:698] Add success.
I0320 08:49:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:49:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:49:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 08:49:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:49:14.456857  543705 disk_worker.go:494] system disk:vda1
I0320 08:49:14.456885  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:49:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:49:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:49:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:49:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:49:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:49:23.409776  543705 memory.go:184] no items to output this cycle
I0320 08:49:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 08:49:23.526290  543705 disk_info.go:125] begin check local disk info of client
I0320 08:49:23.528717  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:49:23.528723  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393b00 0xc000393b40]
E0320 08:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:49:33.409783  543705 memory.go:184] no items to output this cycle
I0320 08:49:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 08:49:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:49:43.409793  543705 memory.go:191] Add success.
I0320 08:49:43.409796  543705 cpu.go:282] Add success.
I0320 08:49:43.420018  543705 net.go:648] Add success.
I0320 08:49:43.423221  543705 net.go:770] primary dev: ETH0
I0320 08:49:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:49:43.423247  543705 net.go:698] Add success.
I0320 08:49:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:49:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:49:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:49:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:49:53.409790  543705 cpu.go:275] no items to output this cycle
I0320 08:49:53.409792  543705 memory.go:184] no items to output this cycle
E0320 08:50:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:50:03.409793  543705 memory.go:184] no items to output this cycle
I0320 08:50:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 08:50:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:50:13.409809  543705 memory.go:191] Add success.
I0320 08:50:13.409814  543705 cpu.go:282] Add success.
W0320 08:50:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:50:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:50:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:50:13.420150  543705 net.go:648] Add success.
I0320 08:50:13.423145  543705 net.go:770] primary dev: ETH0
I0320 08:50:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:50:13.423170  543705 net.go:698] Add success.
I0320 08:50:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:50:14.455312  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:50:14.455420  543705 disk_worker.go:708] disk space is not compliant
W0320 08:50:14.455425  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:50:14.457139  543705 disk_worker.go:494] system disk:vda1
I0320 08:50:14.457181  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:50:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:50:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:50:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:50:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:50:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:50:23.409775  543705 memory.go:184] no items to output this cycle
I0320 08:50:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 08:50:23.528763  543705 disk_info.go:125] begin check local disk info of client
I0320 08:50:23.531244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:50:23.531249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004914c0 0xc000491500]
E0320 08:50:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:50:33.409771  543705 memory.go:184] no items to output this cycle
I0320 08:50:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 08:50:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:50:43.409796  543705 memory.go:191] Add success.
I0320 08:50:43.409798  543705 cpu.go:282] Add success.
I0320 08:50:43.420055  543705 net.go:648] Add success.
I0320 08:50:43.423047  543705 net.go:770] primary dev: ETH0
I0320 08:50:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:50:43.423075  543705 net.go:698] Add success.
I0320 08:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:50:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:50:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:50:53.409794  543705 memory.go:184] no items to output this cycle
I0320 08:50:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 08:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:51:03.409776  543705 memory.go:184] no items to output this cycle
I0320 08:51:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 08:51:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:51:13.409829  543705 memory.go:191] Add success.
I0320 08:51:13.409835  543705 cpu.go:282] Add success.
W0320 08:51:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:51:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:51:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:51:13.420211  543705 net.go:648] Add success.
I0320 08:51:13.423388  543705 net.go:770] primary dev: ETH0
I0320 08:51:13.423401  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:51:13.423413  543705 net.go:698] Add success.
I0320 08:51:13.573137  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31f054c0-c773-4b0d-823c-046b856523e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:51:13.573170  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:51:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:51:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:51:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 08:51:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:51:14.459207  543705 disk_worker.go:494] system disk:vda1
I0320 08:51:14.459247  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:51:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:51:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:51:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:51:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:51:23.409767  543705 memory.go:184] no items to output this cycle
I0320 08:51:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 08:51:23.531810  543705 disk_info.go:125] begin check local disk info of client
I0320 08:51:23.534265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:51:23.534270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004961c0 0xc000496200]
E0320 08:51:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:51:33.409796  543705 memory.go:184] no items to output this cycle
I0320 08:51:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 08:51:38.361733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:51:38.361739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:51:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:51:43.410679  543705 memory.go:191] Add success.
I0320 08:51:43.409802  543705 cpu.go:282] Add success.
I0320 08:51:43.420375  543705 net.go:648] Add success.
I0320 08:51:43.423259  543705 net.go:770] primary dev: ETH0
I0320 08:51:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:51:43.423284  543705 net.go:698] Add success.
I0320 08:51:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:51:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:51:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:51:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:51:53.409771  543705 memory.go:184] no items to output this cycle
I0320 08:51:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 08:52:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:52:03.409778  543705 memory.go:184] no items to output this cycle
I0320 08:52:03.409783  543705 cpu.go:275] no items to output this cycle
W0320 08:52:13.409718  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:52:13.409735  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:52:13.409740  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:52:13.409808  543705 cpu.go:282] Add success.
E0320 08:52:13.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:52:13.409833  543705 memory.go:191] Add success.
I0320 08:52:13.420316  543705 net.go:648] Add success.
I0320 08:52:13.423472  543705 net.go:770] primary dev: ETH0
I0320 08:52:13.423487  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:52:13.423499  543705 net.go:698] Add success.
W0320 08:52:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:52:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 08:52:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:52:14.455945  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:52:14.455954  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:52:14.455961  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:52:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 08:52:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:52:15.455918  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:52:15.455927  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 08:52:16.457080  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:52:16.458133  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:52:16.458195  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:52:16.458216  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:52:16.472533  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:52:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:52:23.409794  543705 memory.go:184] no items to output this cycle
I0320 08:52:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 08:52:23.534803  543705 disk_info.go:125] begin check local disk info of client
I0320 08:52:23.537253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:52:23.537259  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007afc0 0xc00007b000]
E0320 08:52:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:52:33.409809  543705 memory.go:184] no items to output this cycle
I0320 08:52:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 08:52:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:52:43.409785  543705 memory.go:191] Add success.
I0320 08:52:43.409804  543705 cpu.go:282] Add success.
I0320 08:52:43.419980  543705 net.go:648] Add success.
I0320 08:52:43.422660  543705 net.go:770] primary dev: ETH0
I0320 08:52:43.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:52:43.422690  543705 net.go:698] Add success.
I0320 08:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:52:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:52:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:52:53.409791  543705 memory.go:184] no items to output this cycle
I0320 08:52:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 08:53:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:53:03.409772  543705 memory.go:184] no items to output this cycle
I0320 08:53:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 08:53:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:53:13.409830  543705 memory.go:191] Add success.
I0320 08:53:13.409836  543705 cpu.go:282] Add success.
W0320 08:53:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:53:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:53:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:53:13.420152  543705 net.go:648] Add success.
I0320 08:53:13.422878  543705 net.go:770] primary dev: ETH0
I0320 08:53:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:53:13.422903  543705 net.go:698] Add success.
I0320 08:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:53:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:53:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 08:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:53:14.456609  543705 disk_worker.go:494] system disk:vda1
I0320 08:53:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:53:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:53:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:53:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:53:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:53:23.409906  543705 memory.go:184] no items to output this cycle
I0320 08:53:23.409914  543705 cpu.go:275] no items to output this cycle
I0320 08:53:23.538306  543705 disk_info.go:125] begin check local disk info of client
I0320 08:53:23.540696  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:53:23.540701  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb4c0 0xc0001fb500]
E0320 08:53:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:53:33.409769  543705 memory.go:184] no items to output this cycle
I0320 08:53:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 08:53:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:53:43.409815  543705 memory.go:191] Add success.
I0320 08:53:43.409826  543705 cpu.go:282] Add success.
I0320 08:53:43.419955  543705 net.go:648] Add success.
I0320 08:53:43.422804  543705 net.go:770] primary dev: ETH0
I0320 08:53:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:53:43.422833  543705 net.go:698] Add success.
I0320 08:53:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:53:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:53:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:53:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:53:53.409770  543705 memory.go:184] no items to output this cycle
I0320 08:53:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 08:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:54:03.409773  543705 memory.go:184] no items to output this cycle
I0320 08:54:03.409777  543705 cpu.go:275] no items to output this cycle
E0320 08:54:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:54:13.409827  543705 memory.go:191] Add success.
I0320 08:54:13.409837  543705 cpu.go:282] Add success.
W0320 08:54:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:54:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:54:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:54:13.420139  543705 net.go:648] Add success.
I0320 08:54:13.422846  543705 net.go:770] primary dev: ETH0
I0320 08:54:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:54:13.422875  543705 net.go:698] Add success.
I0320 08:54:13.469344  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"873bf6ce-d62b-4c77-8460-0d78f78c4765","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:54:13.469377  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 08:54:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:54:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:54:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0320 08:54:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:54:14.456652  543705 disk_worker.go:494] system disk:vda1
I0320 08:54:14.456686  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:54:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:54:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:54:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:54:23.410864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:54:23.410889  543705 memory.go:184] no items to output this cycle
I0320 08:54:23.410974  543705 cpu.go:275] no items to output this cycle
I0320 08:54:23.541418  543705 disk_info.go:125] begin check local disk info of client
I0320 08:54:23.543807  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:54:23.543814  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bbdc0 0xc0002bbe00]
E0320 08:54:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:54:33.409801  543705 memory.go:184] no items to output this cycle
I0320 08:54:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 08:54:38.365741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:54:38.365749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:54:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:54:43.410575  543705 memory.go:191] Add success.
I0320 08:54:43.409818  543705 cpu.go:282] Add success.
I0320 08:54:43.420307  543705 net.go:648] Add success.
I0320 08:54:43.423327  543705 net.go:770] primary dev: ETH0
I0320 08:54:43.423341  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:54:43.423365  543705 net.go:698] Add success.
I0320 08:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:54:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:54:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:54:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:54:53.409795  543705 memory.go:184] no items to output this cycle
I0320 08:54:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 08:55:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:55:03.409780  543705 memory.go:184] no items to output this cycle
I0320 08:55:03.409783  543705 cpu.go:275] no items to output this cycle
W0320 08:55:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:55:13.409738  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:55:13.409743  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:55:13.409822  543705 cpu.go:282] Add success.
E0320 08:55:13.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:55:13.409867  543705 memory.go:191] Add success.
I0320 08:55:13.420247  543705 net.go:648] Add success.
I0320 08:55:13.422790  543705 net.go:770] primary dev: ETH0
I0320 08:55:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:55:13.422815  543705 net.go:698] Add success.
I0320 08:55:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:55:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:55:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 08:55:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:55:14.456651  543705 disk_worker.go:494] system disk:vda1
I0320 08:55:14.456682  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:55:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:55:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:55:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:55:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:55:23.409802  543705 memory.go:184] no items to output this cycle
I0320 08:55:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 08:55:23.543837  543705 disk_info.go:125] begin check local disk info of client
I0320 08:55:23.546543  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:55:23.546549  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380000 0xc000380040]
E0320 08:55:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:55:33.409778  543705 memory.go:184] no items to output this cycle
I0320 08:55:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 08:55:43.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:55:43.409915  543705 memory.go:191] Add success.
I0320 08:55:43.409932  543705 cpu.go:282] Add success.
I0320 08:55:43.420306  543705 net.go:648] Add success.
I0320 08:55:43.422953  543705 net.go:770] primary dev: ETH0
I0320 08:55:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:55:43.422979  543705 net.go:698] Add success.
I0320 08:55:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:55:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:55:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:55:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:55:53.409787  543705 memory.go:184] no items to output this cycle
I0320 08:55:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 08:56:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:56:03.409779  543705 memory.go:184] no items to output this cycle
I0320 08:56:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 08:56:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:56:13.409807  543705 memory.go:191] Add success.
I0320 08:56:13.409807  543705 cpu.go:282] Add success.
W0320 08:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:56:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:56:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:56:13.420249  543705 net.go:648] Add success.
I0320 08:56:13.423154  543705 net.go:770] primary dev: ETH0
I0320 08:56:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:56:13.423182  543705 net.go:698] Add success.
I0320 08:56:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:56:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:56:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 08:56:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:56:14.456508  543705 disk_worker.go:494] system disk:vda1
I0320 08:56:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:56:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:56:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:56:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:56:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:56:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:56:23.409770  543705 memory.go:184] no items to output this cycle
I0320 08:56:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 08:56:23.546619  543705 disk_info.go:125] begin check local disk info of client
I0320 08:56:23.549131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:56:23.549136  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471c40 0xc000471c80]
E0320 08:56:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:56:33.409806  543705 memory.go:184] no items to output this cycle
I0320 08:56:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 08:56:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:56:43.409920  543705 memory.go:191] Add success.
I0320 08:56:43.409921  543705 cpu.go:282] Add success.
I0320 08:56:43.419727  543705 net.go:648] Add success.
I0320 08:56:43.422562  543705 net.go:770] primary dev: ETH0
I0320 08:56:43.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:56:43.422591  543705 net.go:698] Add success.
I0320 08:56:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:56:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:56:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:56:53.409793  543705 memory.go:184] no items to output this cycle
I0320 08:56:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 08:57:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:57:03.409765  543705 memory.go:184] no items to output this cycle
I0320 08:57:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 08:57:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:57:13.409798  543705 memory.go:191] Add success.
W0320 08:57:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:57:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:57:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:57:13.409847  543705 cpu.go:282] Add success.
I0320 08:57:13.420339  543705 net.go:648] Add success.
I0320 08:57:13.427437  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 08:57:13.427513  543705 net.go:770] primary dev: ETH0
I0320 08:57:13.427536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:57:13.427549  543705 net.go:698] Add success.
I0320 08:57:13.453146  543705 event_worker.go:152] Polling the log file for events...
I0320 08:57:13.469815  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bcdd4107-0d98-44d0-badf-a71b7c7d84c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 08:57:13.469847  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 08:57:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:57:14.455256  543705 disk_worker.go:708] disk space is not compliant
W0320 08:57:14.455261  543705 disk_worker.go:728] disk inode is not compliant
E0320 08:57:14.456063  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 08:57:14.456072  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 08:57:14.456078  543705 custom_config.go:64] query custom config with name: gpu
I0320 08:57:14.457023  543705 disk_worker.go:494] system disk:vda1
I0320 08:57:14.457053  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 08:57:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 08:57:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:57:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 08:57:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 08:57:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:57:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:57:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:57:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:57:23.409775  543705 memory.go:184] no items to output this cycle
I0320 08:57:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 08:57:23.549933  543705 disk_info.go:125] begin check local disk info of client
I0320 08:57:23.552390  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:57:23.552395  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c540 0xc00035c580]
E0320 08:57:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:57:33.409777  543705 memory.go:184] no items to output this cycle
I0320 08:57:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 08:57:38.369728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 08:57:38.369734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 08:57:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:57:43.410689  543705 memory.go:191] Add success.
I0320 08:57:43.409817  543705 cpu.go:282] Add success.
I0320 08:57:43.419720  543705 net.go:648] Add success.
I0320 08:57:43.422592  543705 net.go:770] primary dev: ETH0
I0320 08:57:43.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:57:43.422621  543705 net.go:698] Add success.
I0320 08:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:57:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:57:53.409772  543705 memory.go:184] no items to output this cycle
I0320 08:57:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 08:58:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:58:03.409777  543705 cpu.go:275] no items to output this cycle
I0320 08:58:03.409784  543705 memory.go:184] no items to output this cycle
E0320 08:58:13.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:58:13.409819  543705 cpu.go:282] Add success.
I0320 08:58:13.409843  543705 memory.go:191] Add success.
W0320 08:58:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:58:13.409900  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:58:13.409904  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:58:13.420238  543705 net.go:648] Add success.
I0320 08:58:13.421166  543705 net.go:770] primary dev: ETH0
I0320 08:58:13.421184  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:58:13.421201  543705 net.go:698] Add success.
I0320 08:58:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:58:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:58:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 08:58:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:58:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 08:58:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:58:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:58:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:58:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:58:23.409781  543705 memory.go:184] no items to output this cycle
I0320 08:58:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 08:58:23.552904  543705 disk_info.go:125] begin check local disk info of client
I0320 08:58:23.555382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:58:23.555387  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c500 0xc00035c540]
E0320 08:58:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:58:33.409781  543705 memory.go:184] no items to output this cycle
I0320 08:58:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 08:58:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:58:43.409800  543705 memory.go:191] Add success.
I0320 08:58:43.409800  543705 cpu.go:282] Add success.
I0320 08:58:43.419746  543705 net.go:648] Add success.
I0320 08:58:43.423007  543705 net.go:770] primary dev: ETH0
I0320 08:58:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:58:43.423031  543705 net.go:698] Add success.
I0320 08:58:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:58:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:58:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:58:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:58:53.409795  543705 memory.go:184] no items to output this cycle
I0320 08:58:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 08:59:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:59:03.409788  543705 cpu.go:275] no items to output this cycle
I0320 08:59:03.409801  543705 memory.go:184] no items to output this cycle
E0320 08:59:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:59:13.409821  543705 cpu.go:282] Add success.
I0320 08:59:13.409837  543705 memory.go:191] Add success.
W0320 08:59:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 08:59:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 08:59:13.409900  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 08:59:13.420341  543705 net.go:648] Add success.
I0320 08:59:13.421316  543705 net.go:770] primary dev: ETH0
I0320 08:59:13.421330  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:59:13.421342  543705 net.go:698] Add success.
I0320 08:59:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 08:59:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 08:59:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 08:59:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0320 08:59:14.456633  543705 disk_worker.go:494] system disk:vda1
I0320 08:59:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 08:59:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 08:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:59:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:59:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 08:59:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0320 08:59:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:59:23.409783  543705 memory.go:184] no items to output this cycle
I0320 08:59:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 08:59:23.555463  543705 disk_info.go:125] begin check local disk info of client
I0320 08:59:23.557969  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 08:59:23.557974  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035bdc0 0xc00035be00]
E0320 08:59:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:59:33.409824  543705 memory.go:184] no items to output this cycle
I0320 08:59:33.409828  543705 cpu.go:275] no items to output this cycle
E0320 08:59:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:59:43.409790  543705 memory.go:191] Add success.
I0320 08:59:43.409813  543705 cpu.go:282] Add success.
I0320 08:59:43.419838  543705 net.go:648] Add success.
I0320 08:59:43.422391  543705 net.go:770] primary dev: ETH0
I0320 08:59:43.422405  543705 net.go:802] Send network stats successfully!,count is 6
I0320 08:59:43.422416  543705 net.go:698] Add success.
I0320 08:59:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 08:59:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 08:59:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 08:59:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 08:59:53.409811  543705 memory.go:184] no items to output this cycle
I0320 08:59:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 09:00:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:00:03.409805  543705 memory.go:184] no items to output this cycle
I0320 09:00:03.409818  543705 cpu.go:275] no items to output this cycle
I0320 09:00:13.409821  543705 cpu.go:282] Add success.
E0320 09:00:13.410129  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:00:13.410148  543705 memory.go:191] Add success.
W0320 09:00:13.410172  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:00:13.410184  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:00:13.410186  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:00:13.420389  543705 net.go:648] Add success.
I0320 09:00:13.421480  543705 net.go:770] primary dev: ETH0
I0320 09:00:13.421494  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:00:13.421506  543705 net.go:698] Add success.
I0320 09:00:13.469021  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a49a950-c78e-4091-b64d-54b4b4935b79","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:00:13.469077  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:00:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:00:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:00:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0320 09:00:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:00:14.456642  543705 disk_worker.go:494] system disk:vda1
I0320 09:00:14.456676  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:00:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:00:23.410235  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:00:23.410254  543705 memory.go:184] no items to output this cycle
I0320 09:00:23.410256  543705 cpu.go:275] no items to output this cycle
I0320 09:00:23.558449  543705 disk_info.go:125] begin check local disk info of client
I0320 09:00:23.560901  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:00:23.560906  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003babc0 0xc0003bac00]
E0320 09:00:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:00:33.409774  543705 memory.go:184] no items to output this cycle
I0320 09:00:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 09:00:38.373738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:00:38.373746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:00:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:00:43.410570  543705 memory.go:191] Add success.
I0320 09:00:43.409836  543705 cpu.go:282] Add success.
I0320 09:00:43.420289  543705 net.go:648] Add success.
I0320 09:00:43.423102  543705 net.go:770] primary dev: ETH0
I0320 09:00:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:00:43.423130  543705 net.go:698] Add success.
I0320 09:00:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:00:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:00:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:00:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:00:53.409782  543705 memory.go:184] no items to output this cycle
I0320 09:00:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:01:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:01:03.409761  543705 memory.go:184] no items to output this cycle
I0320 09:01:03.409795  543705 cpu.go:275] no items to output this cycle
W0320 09:01:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:01:13.409749  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:01:13.409755  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:01:13.409842  543705 cpu.go:282] Add success.
E0320 09:01:13.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:01:13.409880  543705 memory.go:191] Add success.
I0320 09:01:13.420502  543705 net.go:648] Add success.
I0320 09:01:13.423300  543705 net.go:770] primary dev: ETH0
I0320 09:01:13.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:01:13.423338  543705 net.go:698] Add success.
I0320 09:01:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:01:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:01:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 09:01:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:01:14.456522  543705 disk_worker.go:494] system disk:vda1
I0320 09:01:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:01:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:01:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:01:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:01:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:01:23.409788  543705 memory.go:184] no items to output this cycle
I0320 09:01:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 09:01:23.560940  543705 disk_info.go:125] begin check local disk info of client
I0320 09:01:23.563419  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:01:23.563424  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003da680 0xc0003da6c0]
E0320 09:01:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:01:33.409811  543705 memory.go:184] no items to output this cycle
I0320 09:01:33.409825  543705 cpu.go:275] no items to output this cycle
E0320 09:01:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:01:43.409796  543705 cpu.go:282] Add success.
I0320 09:01:43.409798  543705 memory.go:191] Add success.
I0320 09:01:43.419835  543705 net.go:648] Add success.
I0320 09:01:43.422431  543705 net.go:770] primary dev: ETH0
I0320 09:01:43.422445  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:01:43.422458  543705 net.go:698] Add success.
I0320 09:01:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:01:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:01:53.409905  543705 cpu.go:275] no items to output this cycle
E0320 09:01:53.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:01:53.409944  543705 memory.go:184] no items to output this cycle
E0320 09:02:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:02:03.409793  543705 memory.go:184] no items to output this cycle
I0320 09:02:03.409807  543705 cpu.go:275] no items to output this cycle
W0320 09:02:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:02:13.409761  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:02:13.409768  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:02:13.409806  543705 cpu.go:282] Add success.
E0320 09:02:13.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:02:13.411440  543705 memory.go:191] Add success.
I0320 09:02:13.420602  543705 net.go:648] Add success.
I0320 09:02:13.422980  543705 net.go:770] primary dev: ETH0
I0320 09:02:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:02:13.423005  543705 net.go:698] Add success.
W0320 09:02:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:02:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0320 09:02:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:02:14.456890  543705 disk_worker.go:494] system disk:vda1
I0320 09:02:14.456936  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:02:14.457310  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:02:14.457319  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:02:14.457324  543705 custom_config.go:64] query custom config with name: gpu
E0320 09:02:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:02:15.456865  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:02:16.458064  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:02:16.458074  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:02:16.458122  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:02:16.458142  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:02:16.472506  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:02:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:02:23.409800  543705 memory.go:184] no items to output this cycle
I0320 09:02:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 09:02:23.563507  543705 disk_info.go:125] begin check local disk info of client
I0320 09:02:23.565983  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:02:23.565989  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb280 0xc0001fb2c0]
E0320 09:02:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:02:33.409796  543705 memory.go:184] no items to output this cycle
I0320 09:02:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 09:02:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:02:43.409805  543705 memory.go:191] Add success.
I0320 09:02:43.409806  543705 cpu.go:282] Add success.
I0320 09:02:43.419914  543705 net.go:648] Add success.
I0320 09:02:43.422594  543705 net.go:770] primary dev: ETH0
I0320 09:02:43.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:02:43.422621  543705 net.go:698] Add success.
I0320 09:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:02:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:02:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:02:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:02:53.409818  543705 memory.go:184] no items to output this cycle
I0320 09:02:53.409831  543705 cpu.go:275] no items to output this cycle
E0320 09:03:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:03:03.409802  543705 memory.go:184] no items to output this cycle
I0320 09:03:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 09:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:03:13.409779  543705 memory.go:191] Add success.
W0320 09:03:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:03:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:03:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:03:13.409852  543705 cpu.go:282] Add success.
I0320 09:03:13.420404  543705 net.go:648] Add success.
I0320 09:03:13.423393  543705 net.go:770] primary dev: ETH0
I0320 09:03:13.423409  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:03:13.423421  543705 net.go:698] Add success.
I0320 09:03:13.731694  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d03af66a-f353-4911-8478-2b58fdbef580","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:03:13.731752  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:03:14.454713  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:03:14.454879  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:03:14.454960  543705 disk_worker.go:708] disk space is not compliant
W0320 09:03:14.454964  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:03:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 09:03:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:03:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:03:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:03:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:03:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:03:23.409809  543705 memory.go:184] no items to output this cycle
I0320 09:03:23.409825  543705 cpu.go:275] no items to output this cycle
I0320 09:03:23.566727  543705 disk_info.go:125] begin check local disk info of client
I0320 09:03:23.569198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:03:23.569204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2c00 0xc0003b2c40]
E0320 09:03:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:03:33.409795  543705 memory.go:184] no items to output this cycle
I0320 09:03:33.409804  543705 cpu.go:275] no items to output this cycle
I0320 09:03:38.377735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:03:38.377741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:03:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:03:43.410722  543705 memory.go:191] Add success.
I0320 09:03:43.409821  543705 cpu.go:282] Add success.
I0320 09:03:43.420506  543705 net.go:648] Add success.
I0320 09:03:43.423343  543705 net.go:770] primary dev: ETH0
I0320 09:03:43.423356  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:03:43.423368  543705 net.go:698] Add success.
I0320 09:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:03:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:03:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:03:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:03:53.409771  543705 memory.go:184] no items to output this cycle
I0320 09:03:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 09:04:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:04:03.409768  543705 memory.go:184] no items to output this cycle
I0320 09:04:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 09:04:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:04:13.409792  543705 memory.go:191] Add success.
I0320 09:04:13.409792  543705 cpu.go:282] Add success.
W0320 09:04:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:04:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:04:13.420246  543705 net.go:648] Add success.
I0320 09:04:13.422883  543705 net.go:770] primary dev: ETH0
I0320 09:04:13.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:04:13.422908  543705 net.go:698] Add success.
I0320 09:04:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:04:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:04:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0320 09:04:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:04:14.456615  543705 disk_worker.go:494] system disk:vda1
I0320 09:04:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:04:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:04:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:04:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:04:23.409772  543705 memory.go:184] no items to output this cycle
I0320 09:04:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 09:04:23.569667  543705 disk_info.go:125] begin check local disk info of client
I0320 09:04:23.572157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:04:23.572162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7c40 0xc0001c7c80]
E0320 09:04:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:04:33.409775  543705 memory.go:184] no items to output this cycle
I0320 09:04:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 09:04:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:04:43.409797  543705 memory.go:191] Add success.
I0320 09:04:43.409803  543705 cpu.go:282] Add success.
I0320 09:04:43.419842  543705 net.go:648] Add success.
I0320 09:04:43.422525  543705 net.go:770] primary dev: ETH0
I0320 09:04:43.422538  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:04:43.422551  543705 net.go:698] Add success.
I0320 09:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:04:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:04:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:04:53.409779  543705 memory.go:184] no items to output this cycle
I0320 09:04:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:05:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:05:03.409804  543705 memory.go:184] no items to output this cycle
I0320 09:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 09:05:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:05:13.409803  543705 memory.go:191] Add success.
I0320 09:05:13.409805  543705 cpu.go:282] Add success.
W0320 09:05:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:05:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:05:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:05:13.420137  543705 net.go:648] Add success.
I0320 09:05:13.422868  543705 net.go:770] primary dev: ETH0
I0320 09:05:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:05:13.422894  543705 net.go:698] Add success.
I0320 09:05:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:05:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:05:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 09:05:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:05:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 09:05:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:05:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:05:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:05:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:05:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:05:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:05:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:05:23.409795  543705 memory.go:184] no items to output this cycle
I0320 09:05:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 09:05:23.573092  543705 disk_info.go:125] begin check local disk info of client
I0320 09:05:23.575518  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:05:23.575524  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7a80 0xc0001c7ac0]
E0320 09:05:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:05:33.409778  543705 memory.go:184] no items to output this cycle
I0320 09:05:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 09:05:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:05:43.409787  543705 memory.go:191] Add success.
I0320 09:05:43.409790  543705 cpu.go:282] Add success.
I0320 09:05:43.419869  543705 net.go:648] Add success.
I0320 09:05:43.422536  543705 net.go:770] primary dev: ETH0
I0320 09:05:43.422549  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:05:43.422563  543705 net.go:698] Add success.
I0320 09:05:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:05:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:05:53.409905  543705 cpu.go:275] no items to output this cycle
I0320 09:05:53.409958  543705 memory.go:184] no items to output this cycle
E0320 09:06:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:06:03.409782  543705 memory.go:184] no items to output this cycle
I0320 09:06:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 09:06:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:06:13.409778  543705 memory.go:191] Add success.
W0320 09:06:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:06:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:06:13.409816  543705 cpu.go:282] Add success.
I0320 09:06:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:06:13.420142  543705 net.go:648] Add success.
I0320 09:06:13.422852  543705 net.go:770] primary dev: ETH0
I0320 09:06:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:06:13.422877  543705 net.go:698] Add success.
I0320 09:06:13.468933  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e70b7c07-5866-4f67-b38f-19acb1fcac36","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:06:13.468967  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:06:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:06:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:06:14.455245  543705 disk_worker.go:708] disk space is not compliant
W0320 09:06:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:06:14.456676  543705 disk_worker.go:494] system disk:vda1
I0320 09:06:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:06:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:06:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:06:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:06:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:06:23.409780  543705 memory.go:184] no items to output this cycle
I0320 09:06:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 09:06:23.576065  543705 disk_info.go:125] begin check local disk info of client
I0320 09:06:23.578476  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:06:23.578482  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480740 0xc000480780]
E0320 09:06:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:06:33.409779  543705 memory.go:184] no items to output this cycle
I0320 09:06:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 09:06:38.381726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:06:38.381733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:06:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:06:43.410630  543705 memory.go:191] Add success.
I0320 09:06:43.409808  543705 cpu.go:282] Add success.
I0320 09:06:43.420472  543705 net.go:648] Add success.
I0320 09:06:43.423049  543705 net.go:770] primary dev: ETH0
I0320 09:06:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:06:43.423074  543705 net.go:698] Add success.
I0320 09:06:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:06:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:06:53.409768  543705 memory.go:184] no items to output this cycle
I0320 09:06:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 09:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:07:03.409776  543705 memory.go:184] no items to output this cycle
I0320 09:07:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:07:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:07:13.409813  543705 memory.go:191] Add success.
I0320 09:07:13.409822  543705 cpu.go:282] Add success.
W0320 09:07:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:07:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:07:13.420238  543705 net.go:648] Add success.
I0320 09:07:13.422788  543705 net.go:770] primary dev: ETH0
I0320 09:07:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:07:13.422815  543705 net.go:698] Add success.
I0320 09:07:13.453366  543705 event_worker.go:152] Polling the log file for events...
W0320 09:07:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:07:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 09:07:14.455216  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:07:14.457150  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0320 09:07:14.457158  543705 disk_worker.go:494] system disk:vda1
E0320 09:07:14.457161  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:07:14.457168  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:07:14.457194  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:07:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:07:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:07:16.458039  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:07:16.458050  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:07:16.458101  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:07:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:07:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:07:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:07:23.409774  543705 memory.go:184] no items to output this cycle
I0320 09:07:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 09:07:23.578993  543705 disk_info.go:125] begin check local disk info of client
I0320 09:07:23.581407  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:07:23.581412  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381040 0xc000381080]
E0320 09:07:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:07:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 09:07:33.409795  543705 memory.go:184] no items to output this cycle
E0320 09:07:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:07:43.409918  543705 memory.go:191] Add success.
I0320 09:07:43.409928  543705 cpu.go:282] Add success.
I0320 09:07:43.419709  543705 net.go:648] Add success.
I0320 09:07:43.422405  543705 net.go:770] primary dev: ETH0
I0320 09:07:43.422419  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:07:43.422431  543705 net.go:698] Add success.
I0320 09:07:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:07:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:07:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:07:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:07:53.409797  543705 memory.go:184] no items to output this cycle
I0320 09:07:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 09:08:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:08:03.409770  543705 memory.go:184] no items to output this cycle
I0320 09:08:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 09:08:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:08:13.409797  543705 memory.go:191] Add success.
I0320 09:08:13.409800  543705 cpu.go:282] Add success.
W0320 09:08:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:08:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:08:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:08:13.420133  543705 net.go:648] Add success.
I0320 09:08:13.422730  543705 net.go:770] primary dev: ETH0
I0320 09:08:13.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:08:13.422757  543705 net.go:698] Add success.
I0320 09:08:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:08:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:08:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0320 09:08:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:08:14.456671  543705 disk_worker.go:494] system disk:vda1
I0320 09:08:14.456705  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:08:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:08:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:08:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:08:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:08:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:08:23.409781  543705 memory.go:184] no items to output this cycle
I0320 09:08:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 09:08:23.581668  543705 disk_info.go:125] begin check local disk info of client
I0320 09:08:23.584098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:08:23.584104  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307c00 0xc000307c40]
E0320 09:08:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:08:33.409812  543705 memory.go:184] no items to output this cycle
I0320 09:08:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 09:08:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:08:43.409805  543705 memory.go:191] Add success.
I0320 09:08:43.409806  543705 cpu.go:282] Add success.
I0320 09:08:43.420318  543705 net.go:648] Add success.
I0320 09:08:43.422806  543705 net.go:770] primary dev: ETH0
I0320 09:08:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:08:43.422831  543705 net.go:698] Add success.
I0320 09:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:08:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:08:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:08:53.409778  543705 memory.go:184] no items to output this cycle
I0320 09:08:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 09:09:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:09:03.409778  543705 memory.go:184] no items to output this cycle
I0320 09:09:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 09:09:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:09:13.409815  543705 memory.go:191] Add success.
I0320 09:09:13.409822  543705 cpu.go:282] Add success.
W0320 09:09:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:09:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:09:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:09:13.420161  543705 net.go:648] Add success.
I0320 09:09:13.423049  543705 net.go:770] primary dev: ETH0
I0320 09:09:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:09:13.423078  543705 net.go:698] Add success.
I0320 09:09:13.464209  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e861b529-13ef-48f2-ac7a-e9836c5a8281","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:09:13.464243  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:09:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:09:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:09:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0320 09:09:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:09:14.456656  543705 disk_worker.go:494] system disk:vda1
I0320 09:09:14.456689  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:09:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:09:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:09:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:09:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:09:23.409798  543705 memory.go:184] no items to output this cycle
I0320 09:09:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 09:09:23.585090  543705 disk_info.go:125] begin check local disk info of client
I0320 09:09:23.587563  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:09:23.587568  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490c00 0xc000490c40]
E0320 09:09:33.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:09:33.409894  543705 memory.go:184] no items to output this cycle
I0320 09:09:33.409941  543705 cpu.go:275] no items to output this cycle
I0320 09:09:38.385741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:09:38.385746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:09:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:09:43.410638  543705 memory.go:191] Add success.
I0320 09:09:43.409824  543705 cpu.go:282] Add success.
I0320 09:09:43.420424  543705 net.go:648] Add success.
I0320 09:09:43.423031  543705 net.go:770] primary dev: ETH0
I0320 09:09:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:09:43.423057  543705 net.go:698] Add success.
I0320 09:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:09:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:09:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:09:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:09:53.409763  543705 memory.go:184] no items to output this cycle
I0320 09:09:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 09:10:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:10:03.409766  543705 memory.go:184] no items to output this cycle
I0320 09:10:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 09:10:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:10:13.409793  543705 memory.go:191] Add success.
I0320 09:10:13.409818  543705 cpu.go:282] Add success.
W0320 09:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:10:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:10:13.420136  543705 net.go:648] Add success.
I0320 09:10:13.422685  543705 net.go:770] primary dev: ETH0
I0320 09:10:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:10:13.422709  543705 net.go:698] Add success.
I0320 09:10:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:10:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:10:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 09:10:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:10:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 09:10:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:10:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:10:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:10:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:10:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:10:23.409812  543705 memory.go:184] no items to output this cycle
I0320 09:10:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 09:10:23.587676  543705 disk_info.go:125] begin check local disk info of client
I0320 09:10:23.590106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:10:23.590112  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b7240 0xc0002b7280]
E0320 09:10:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:10:33.409770  543705 memory.go:184] no items to output this cycle
I0320 09:10:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 09:10:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:10:43.409794  543705 memory.go:191] Add success.
I0320 09:10:43.409799  543705 cpu.go:282] Add success.
I0320 09:10:43.420006  543705 net.go:648] Add success.
I0320 09:10:43.422905  543705 net.go:770] primary dev: ETH0
I0320 09:10:43.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:10:43.422932  543705 net.go:698] Add success.
I0320 09:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:10:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:10:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:10:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:10:53.409768  543705 memory.go:184] no items to output this cycle
I0320 09:10:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 09:11:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:11:03.409772  543705 memory.go:184] no items to output this cycle
I0320 09:11:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 09:11:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:11:13.409782  543705 memory.go:191] Add success.
I0320 09:11:13.409802  543705 cpu.go:282] Add success.
W0320 09:11:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:11:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:11:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:11:13.420122  543705 net.go:648] Add success.
I0320 09:11:13.422910  543705 net.go:770] primary dev: ETH0
I0320 09:11:13.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:11:13.422934  543705 net.go:698] Add success.
I0320 09:11:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:11:14.455224  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:11:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0320 09:11:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:11:14.456689  543705 disk_worker.go:494] system disk:vda1
I0320 09:11:14.456725  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:11:15.454985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:11:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:11:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:11:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:11:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:11:23.409801  543705 memory.go:184] no items to output this cycle
I0320 09:11:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 09:11:23.590723  543705 disk_info.go:125] begin check local disk info of client
I0320 09:11:23.593144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:11:23.593149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005474c0 0xc000547500]
E0320 09:11:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:11:33.409779  543705 memory.go:184] no items to output this cycle
I0320 09:11:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 09:11:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:11:43.409820  543705 memory.go:191] Add success.
I0320 09:11:43.409822  543705 cpu.go:282] Add success.
I0320 09:11:43.419868  543705 net.go:648] Add success.
I0320 09:11:43.422537  543705 net.go:770] primary dev: ETH0
I0320 09:11:43.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:11:43.422563  543705 net.go:698] Add success.
I0320 09:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:11:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:11:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:11:53.409806  543705 memory.go:184] no items to output this cycle
I0320 09:11:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 09:12:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:12:03.409774  543705 memory.go:184] no items to output this cycle
I0320 09:12:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 09:12:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:12:13.409827  543705 memory.go:191] Add success.
I0320 09:12:13.409833  543705 cpu.go:282] Add success.
W0320 09:12:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:12:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:12:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:12:13.420121  543705 net.go:648] Add success.
I0320 09:12:13.423038  543705 net.go:770] primary dev: ETH0
I0320 09:12:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:12:13.423063  543705 net.go:698] Add success.
I0320 09:12:13.467327  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"42a8d73c-0c50-48bc-8348-7d9ad5f2e0d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:12:13.467360  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 09:12:14.455231  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:12:14.455245  543705 disk_worker.go:708] disk space is not compliant
W0320 09:12:14.455247  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:12:14.456557  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:12:14.456568  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:12:14.456575  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:12:14.457080  543705 disk_worker.go:494] system disk:vda1
I0320 09:12:14.457125  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:12:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:12:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:12:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:12:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:12:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:12:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:12:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:12:23.410404  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:12:23.410419  543705 memory.go:184] no items to output this cycle
I0320 09:12:23.410459  543705 cpu.go:275] no items to output this cycle
I0320 09:12:23.593669  543705 disk_info.go:125] begin check local disk info of client
I0320 09:12:23.596040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:12:23.596045  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b5ac0 0xc0002b5b00]
E0320 09:12:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:12:33.409797  543705 memory.go:184] no items to output this cycle
I0320 09:12:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 09:12:38.389735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:12:38.389742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:12:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:12:43.410523  543705 memory.go:191] Add success.
I0320 09:12:43.409829  543705 cpu.go:282] Add success.
I0320 09:12:43.420316  543705 net.go:648] Add success.
I0320 09:12:43.422890  543705 net.go:770] primary dev: ETH0
I0320 09:12:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:12:43.422918  543705 net.go:698] Add success.
I0320 09:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:12:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:12:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:12:53.409781  543705 memory.go:184] no items to output this cycle
I0320 09:12:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 09:13:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:13:03.409798  543705 memory.go:184] no items to output this cycle
I0320 09:13:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 09:13:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:13:13.409817  543705 memory.go:191] Add success.
I0320 09:13:13.409827  543705 cpu.go:282] Add success.
W0320 09:13:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:13:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:13:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:13:13.420166  543705 net.go:648] Add success.
I0320 09:13:13.423259  543705 net.go:770] primary dev: ETH0
I0320 09:13:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:13:13.423284  543705 net.go:698] Add success.
I0320 09:13:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:13:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:13:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0320 09:13:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:13:14.456944  543705 disk_worker.go:494] system disk:vda1
I0320 09:13:14.456978  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:13:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:13:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:13:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:13:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:13:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:13:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:13:23.409799  543705 memory.go:184] no items to output this cycle
I0320 09:13:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 09:13:23.596115  543705 disk_info.go:125] begin check local disk info of client
I0320 09:13:23.598550  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:13:23.598555  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046e280 0xc00046e2c0]
E0320 09:13:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:13:33.409792  543705 memory.go:184] no items to output this cycle
I0320 09:13:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 09:13:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:13:43.409796  543705 memory.go:191] Add success.
I0320 09:13:43.409796  543705 cpu.go:282] Add success.
I0320 09:13:43.419856  543705 net.go:648] Add success.
I0320 09:13:43.422463  543705 net.go:770] primary dev: ETH0
I0320 09:13:43.422478  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:13:43.422493  543705 net.go:698] Add success.
I0320 09:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:13:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:13:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:13:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:13:53.409786  543705 memory.go:184] no items to output this cycle
I0320 09:13:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 09:14:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:14:03.409781  543705 memory.go:184] no items to output this cycle
I0320 09:14:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 09:14:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:14:13.409806  543705 memory.go:191] Add success.
I0320 09:14:13.409808  543705 cpu.go:282] Add success.
W0320 09:14:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:14:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:14:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:14:13.420082  543705 net.go:648] Add success.
I0320 09:14:13.423103  543705 net.go:770] primary dev: ETH0
I0320 09:14:13.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:14:13.423133  543705 net.go:698] Add success.
I0320 09:14:14.453937  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:14:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:14:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 09:14:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:14:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 09:14:14.456733  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:14:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:14:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:14:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:14:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:14:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:14:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:14:23.409779  543705 memory.go:184] no items to output this cycle
I0320 09:14:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 09:14:23.599141  543705 disk_info.go:125] begin check local disk info of client
I0320 09:14:23.601534  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:14:23.601539  543705 disk_info.go:196] parse disk info done, disk is : [0xc000247080 0xc0002470c0]
E0320 09:14:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:14:33.409790  543705 memory.go:184] no items to output this cycle
I0320 09:14:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 09:14:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:14:43.409831  543705 memory.go:191] Add success.
I0320 09:14:43.409836  543705 cpu.go:282] Add success.
I0320 09:14:43.419997  543705 net.go:648] Add success.
I0320 09:14:43.423222  543705 net.go:770] primary dev: ETH0
I0320 09:14:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:14:43.423249  543705 net.go:698] Add success.
I0320 09:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:14:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:14:53.409779  543705 memory.go:184] no items to output this cycle
I0320 09:14:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 09:15:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:15:03.409807  543705 memory.go:184] no items to output this cycle
I0320 09:15:03.409821  543705 cpu.go:275] no items to output this cycle
W0320 09:15:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:15:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:15:13.409742  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 09:15:13.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:15:13.409849  543705 cpu.go:282] Add success.
I0320 09:15:13.409870  543705 memory.go:191] Add success.
I0320 09:15:13.420063  543705 net.go:648] Add success.
I0320 09:15:13.422799  543705 net.go:770] primary dev: ETH0
I0320 09:15:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:15:13.422829  543705 net.go:698] Add success.
I0320 09:15:13.468966  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"34bcd4d7-feae-4376-b1f7-7fc72204dc22","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:15:13.469000  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:15:14.453955  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:15:14.455332  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:15:14.455348  543705 disk_worker.go:708] disk space is not compliant
W0320 09:15:14.455353  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:15:14.457684  543705 disk_worker.go:494] system disk:vda1
I0320 09:15:14.457722  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:15:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:15:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:15:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:15:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:15:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:15:23.409788  543705 memory.go:184] no items to output this cycle
I0320 09:15:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 09:15:23.601669  543705 disk_info.go:125] begin check local disk info of client
I0320 09:15:23.604187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:15:23.604192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c3c80 0xc0004c3cc0]
E0320 09:15:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:15:33.409824  543705 memory.go:184] no items to output this cycle
I0320 09:15:33.409839  543705 cpu.go:275] no items to output this cycle
I0320 09:15:38.393733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:15:38.393740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:15:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:15:43.410649  543705 memory.go:191] Add success.
I0320 09:15:43.409830  543705 cpu.go:282] Add success.
I0320 09:15:43.420415  543705 net.go:648] Add success.
I0320 09:15:43.423041  543705 net.go:770] primary dev: ETH0
I0320 09:15:43.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:15:43.423068  543705 net.go:698] Add success.
I0320 09:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:15:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:15:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:15:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:15:53.409796  543705 memory.go:184] no items to output this cycle
I0320 09:15:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 09:16:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:16:03.409775  543705 memory.go:184] no items to output this cycle
I0320 09:16:03.409803  543705 cpu.go:275] no items to output this cycle
W0320 09:16:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:16:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:16:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:16:13.409796  543705 cpu.go:282] Add success.
E0320 09:16:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:16:13.409824  543705 memory.go:191] Add success.
I0320 09:16:13.420135  543705 net.go:648] Add success.
I0320 09:16:13.422908  543705 net.go:770] primary dev: ETH0
I0320 09:16:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:16:13.422933  543705 net.go:698] Add success.
I0320 09:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:16:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:16:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 09:16:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:16:14.458030  543705 disk_worker.go:494] system disk:vda1
I0320 09:16:14.458062  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:16:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:16:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:16:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:16:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:16:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:16:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:16:23.409800  543705 memory.go:184] no items to output this cycle
I0320 09:16:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 09:16:23.605195  543705 disk_info.go:125] begin check local disk info of client
I0320 09:16:23.607773  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:16:23.607779  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6e00 0xc0001c6e40]
E0320 09:16:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:16:33.409775  543705 memory.go:184] no items to output this cycle
I0320 09:16:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 09:16:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:16:43.409822  543705 memory.go:191] Add success.
I0320 09:16:43.409833  543705 cpu.go:282] Add success.
I0320 09:16:43.419877  543705 net.go:648] Add success.
I0320 09:16:43.422665  543705 net.go:770] primary dev: ETH0
I0320 09:16:43.422679  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:16:43.422693  543705 net.go:698] Add success.
I0320 09:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:16:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:16:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:16:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:16:53.409776  543705 memory.go:184] no items to output this cycle
I0320 09:16:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:17:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:17:03.409783  543705 memory.go:184] no items to output this cycle
I0320 09:17:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:17:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:17:13.409790  543705 memory.go:191] Add success.
I0320 09:17:13.409792  543705 cpu.go:282] Add success.
W0320 09:17:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:17:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:17:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:17:13.420076  543705 net.go:648] Add success.
I0320 09:17:13.422550  543705 net.go:770] primary dev: ETH0
I0320 09:17:13.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:17:13.422576  543705 net.go:698] Add success.
I0320 09:17:13.453100  543705 event_worker.go:152] Polling the log file for events...
W0320 09:17:14.455362  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:17:14.455380  543705 disk_worker.go:708] disk space is not compliant
W0320 09:17:14.455385  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:17:14.458153  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:17:14.458161  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:17:14.458166  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:17:14.459244  543705 disk_worker.go:494] system disk:vda1
I0320 09:17:14.459296  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:17:15.457066  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:17:15.457080  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:17:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:17:16.457996  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:17:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:17:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:17:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:17:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:17:23.409774  543705 memory.go:184] no items to output this cycle
I0320 09:17:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 09:17:23.608143  543705 disk_info.go:125] begin check local disk info of client
I0320 09:17:23.610618  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:17:23.610623  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaac0 0xc0001aab00]
E0320 09:17:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:17:33.409809  543705 memory.go:184] no items to output this cycle
I0320 09:17:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 09:17:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:17:43.409787  543705 memory.go:191] Add success.
I0320 09:17:43.409805  543705 cpu.go:282] Add success.
I0320 09:17:43.420027  543705 net.go:648] Add success.
I0320 09:17:43.422728  543705 net.go:770] primary dev: ETH0
I0320 09:17:43.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:17:43.422754  543705 net.go:698] Add success.
I0320 09:17:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:17:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:17:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:17:53.409775  543705 memory.go:184] no items to output this cycle
I0320 09:17:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 09:18:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:18:03.409784  543705 memory.go:184] no items to output this cycle
I0320 09:18:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:18:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:18:13.409804  543705 memory.go:191] Add success.
I0320 09:18:13.409804  543705 cpu.go:282] Add success.
W0320 09:18:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:18:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:18:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:18:13.420403  543705 net.go:648] Add success.
I0320 09:18:13.423040  543705 net.go:770] primary dev: ETH0
I0320 09:18:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:18:13.423067  543705 net.go:698] Add success.
I0320 09:18:13.605473  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fdffd18c-0093-412e-8888-d0a8a5466285","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:18:13.605504  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:18:14.454806  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:18:14.454957  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:18:14.454968  543705 disk_worker.go:708] disk space is not compliant
W0320 09:18:14.454971  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:18:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 09:18:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:18:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:18:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:18:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:18:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:18:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:18:23.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:18:23.409898  543705 memory.go:184] no items to output this cycle
I0320 09:18:23.409907  543705 cpu.go:275] no items to output this cycle
I0320 09:18:23.611677  543705 disk_info.go:125] begin check local disk info of client
I0320 09:18:23.614276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:18:23.614281  543705 disk_info.go:196] parse disk info done, disk is : [0xc000257080 0xc0002570c0]
E0320 09:18:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:18:33.409777  543705 memory.go:184] no items to output this cycle
I0320 09:18:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 09:18:38.397730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:18:38.397737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:18:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:18:43.410609  543705 memory.go:191] Add success.
I0320 09:18:43.409794  543705 cpu.go:282] Add success.
I0320 09:18:43.420317  543705 net.go:648] Add success.
I0320 09:18:43.423057  543705 net.go:770] primary dev: ETH0
I0320 09:18:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:18:43.423092  543705 net.go:698] Add success.
I0320 09:18:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:18:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:18:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:18:53.409782  543705 memory.go:184] no items to output this cycle
I0320 09:18:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:19:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:19:03.409782  543705 memory.go:184] no items to output this cycle
I0320 09:19:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 09:19:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:19:13.409810  543705 memory.go:191] Add success.
I0320 09:19:13.409816  543705 cpu.go:282] Add success.
W0320 09:19:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:19:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:19:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:19:13.420116  543705 net.go:648] Add success.
I0320 09:19:13.422691  543705 net.go:770] primary dev: ETH0
I0320 09:19:13.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:19:13.422721  543705 net.go:698] Add success.
I0320 09:19:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:19:14.455275  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:19:14.455291  543705 disk_worker.go:708] disk space is not compliant
W0320 09:19:14.455295  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:19:14.457165  543705 disk_worker.go:494] system disk:vda1
I0320 09:19:14.457199  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:19:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:19:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:19:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:19:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:19:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:19:23.409773  543705 memory.go:184] no items to output this cycle
I0320 09:19:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 09:19:23.615210  543705 disk_info.go:125] begin check local disk info of client
I0320 09:19:23.617740  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:19:23.617745  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de6c0 0xc0003de700]
E0320 09:19:33.409954  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:19:33.409977  543705 memory.go:184] no items to output this cycle
I0320 09:19:33.410015  543705 cpu.go:275] no items to output this cycle
E0320 09:19:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:19:43.409790  543705 memory.go:191] Add success.
I0320 09:19:43.409798  543705 cpu.go:282] Add success.
I0320 09:19:43.420056  543705 net.go:648] Add success.
I0320 09:19:43.422606  543705 net.go:770] primary dev: ETH0
I0320 09:19:43.422620  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:19:43.422632  543705 net.go:698] Add success.
I0320 09:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:19:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:19:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:19:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:19:53.409767  543705 memory.go:184] no items to output this cycle
I0320 09:19:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 09:20:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:20:03.409784  543705 memory.go:184] no items to output this cycle
I0320 09:20:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 09:20:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:20:13.409788  543705 memory.go:191] Add success.
I0320 09:20:13.409791  543705 cpu.go:282] Add success.
W0320 09:20:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:20:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:20:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:20:13.420111  543705 net.go:648] Add success.
I0320 09:20:13.423092  543705 net.go:770] primary dev: ETH0
I0320 09:20:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:20:13.423117  543705 net.go:698] Add success.
I0320 09:20:14.453933  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:20:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:20:14.455293  543705 disk_worker.go:708] disk space is not compliant
W0320 09:20:14.455298  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:20:14.457534  543705 disk_worker.go:494] system disk:vda1
I0320 09:20:14.457583  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:20:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:20:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:20:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:20:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:20:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:20:23.409801  543705 memory.go:184] no items to output this cycle
I0320 09:20:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 09:20:23.617825  543705 disk_info.go:125] begin check local disk info of client
I0320 09:20:23.620398  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:20:23.620404  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5340 0xc0004b5380]
E0320 09:20:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:20:33.409765  543705 memory.go:184] no items to output this cycle
I0320 09:20:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 09:20:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:20:43.409801  543705 memory.go:191] Add success.
I0320 09:20:43.409802  543705 cpu.go:282] Add success.
I0320 09:20:43.419881  543705 net.go:648] Add success.
I0320 09:20:43.422496  543705 net.go:770] primary dev: ETH0
I0320 09:20:43.422510  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:20:43.422524  543705 net.go:698] Add success.
I0320 09:20:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:20:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:20:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:20:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:20:53.409785  543705 memory.go:184] no items to output this cycle
I0320 09:20:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 09:21:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:21:03.409785  543705 memory.go:184] no items to output this cycle
I0320 09:21:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 09:21:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:21:13.409798  543705 memory.go:191] Add success.
I0320 09:21:13.409798  543705 cpu.go:282] Add success.
W0320 09:21:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:21:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:21:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:21:13.420172  543705 net.go:648] Add success.
I0320 09:21:13.422835  543705 net.go:770] primary dev: ETH0
I0320 09:21:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:21:13.422859  543705 net.go:698] Add success.
I0320 09:21:13.468818  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"08413161-335c-4137-a175-4f9dc6eda5cd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:21:13.468850  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:21:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:21:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:21:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 09:21:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:21:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 09:21:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:21:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:21:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:21:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:21:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:21:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:21:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:21:23.409786  543705 memory.go:184] no items to output this cycle
I0320 09:21:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 09:21:23.621217  543705 disk_info.go:125] begin check local disk info of client
I0320 09:21:23.623747  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:21:23.623752  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487840 0xc000487880]
E0320 09:21:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:21:33.409793  543705 memory.go:184] no items to output this cycle
I0320 09:21:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 09:21:38.401741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:21:38.401748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:21:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:21:43.410605  543705 memory.go:191] Add success.
I0320 09:21:43.409830  543705 cpu.go:282] Add success.
I0320 09:21:43.420473  543705 net.go:648] Add success.
I0320 09:21:43.423098  543705 net.go:770] primary dev: ETH0
I0320 09:21:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:21:43.423123  543705 net.go:698] Add success.
I0320 09:21:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:21:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:21:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:21:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:21:53.409805  543705 memory.go:184] no items to output this cycle
I0320 09:21:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 09:22:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:22:03.409787  543705 memory.go:184] no items to output this cycle
I0320 09:22:03.409796  543705 cpu.go:275] no items to output this cycle
W0320 09:22:13.409701  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:22:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:22:13.409721  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 09:22:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:22:13.409821  543705 memory.go:191] Add success.
I0320 09:22:13.409835  543705 cpu.go:282] Add success.
I0320 09:22:13.420035  543705 net.go:648] Add success.
I0320 09:22:13.422700  543705 net.go:770] primary dev: ETH0
I0320 09:22:13.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:22:13.422725  543705 net.go:698] Add success.
W0320 09:22:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:22:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 09:22:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:22:14.456913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:22:14.456922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:22:14.456928  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:22:14.456991  543705 disk_worker.go:494] system disk:vda1
I0320 09:22:14.457018  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:22:15.457035  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:22:15.457050  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:22:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:22:16.457995  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:22:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:22:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:22:16.472522  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:22:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:22:23.409774  543705 memory.go:184] no items to output this cycle
I0320 09:22:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 09:22:23.623832  543705 disk_info.go:125] begin check local disk info of client
I0320 09:22:23.626315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:22:23.626320  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487280 0xc0004872c0]
E0320 09:22:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:22:33.409764  543705 memory.go:184] no items to output this cycle
I0320 09:22:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 09:22:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:22:43.409788  543705 memory.go:191] Add success.
I0320 09:22:43.409817  543705 cpu.go:282] Add success.
I0320 09:22:43.420141  543705 net.go:648] Add success.
I0320 09:22:43.422720  543705 net.go:770] primary dev: ETH0
I0320 09:22:43.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:22:43.422748  543705 net.go:698] Add success.
I0320 09:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:22:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:22:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:22:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:22:53.409771  543705 memory.go:184] no items to output this cycle
I0320 09:22:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 09:23:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:23:03.409785  543705 memory.go:184] no items to output this cycle
I0320 09:23:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 09:23:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:23:13.409788  543705 memory.go:191] Add success.
I0320 09:23:13.409798  543705 cpu.go:282] Add success.
W0320 09:23:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:23:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:23:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:23:13.420138  543705 net.go:648] Add success.
I0320 09:23:13.422840  543705 net.go:770] primary dev: ETH0
I0320 09:23:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:23:13.422875  543705 net.go:698] Add success.
I0320 09:23:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:23:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:23:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 09:23:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:23:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 09:23:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:23:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:23:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:23:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:23:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:23:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:23:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:23:23.409780  543705 memory.go:184] no items to output this cycle
I0320 09:23:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 09:23:23.626397  543705 disk_info.go:125] begin check local disk info of client
I0320 09:23:23.628942  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:23:23.628947  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035cec0 0xc00035cf00]
E0320 09:23:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:23:33.409780  543705 memory.go:184] no items to output this cycle
I0320 09:23:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 09:23:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:23:43.409817  543705 memory.go:191] Add success.
I0320 09:23:43.409826  543705 cpu.go:282] Add success.
I0320 09:23:43.419853  543705 net.go:648] Add success.
I0320 09:23:43.422649  543705 net.go:770] primary dev: ETH0
I0320 09:23:43.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:23:43.422674  543705 net.go:698] Add success.
I0320 09:23:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:23:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:23:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:23:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:23:53.409781  543705 memory.go:184] no items to output this cycle
I0320 09:23:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 09:24:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:24:03.409792  543705 memory.go:184] no items to output this cycle
I0320 09:24:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 09:24:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:24:13.409781  543705 memory.go:191] Add success.
W0320 09:24:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:24:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:24:13.409818  543705 cpu.go:282] Add success.
I0320 09:24:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:24:13.420128  543705 net.go:648] Add success.
I0320 09:24:13.423364  543705 net.go:770] primary dev: ETH0
I0320 09:24:13.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:24:13.423394  543705 net.go:698] Add success.
I0320 09:24:13.463557  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a552381-2908-4d4c-b670-da4c73f521c8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:24:13.463591  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:24:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:24:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:24:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 09:24:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:24:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 09:24:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:24:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:24:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:24:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:24:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:24:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:24:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:24:23.409769  543705 memory.go:184] no items to output this cycle
I0320 09:24:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 09:24:23.629285  543705 disk_info.go:125] begin check local disk info of client
I0320 09:24:23.631822  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:24:23.631827  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315e00 0xc000315e40]
E0320 09:24:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:24:33.409778  543705 memory.go:184] no items to output this cycle
I0320 09:24:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 09:24:38.405733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:24:38.405740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:24:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:24:43.410581  543705 memory.go:191] Add success.
I0320 09:24:43.409825  543705 cpu.go:282] Add success.
I0320 09:24:43.420270  543705 net.go:648] Add success.
I0320 09:24:43.422874  543705 net.go:770] primary dev: ETH0
I0320 09:24:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:24:43.422901  543705 net.go:698] Add success.
I0320 09:24:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:24:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:24:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:24:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:24:53.409770  543705 memory.go:184] no items to output this cycle
I0320 09:24:53.409792  543705 cpu.go:275] no items to output this cycle
I0320 09:25:03.409905  543705 cpu.go:275] no items to output this cycle
E0320 09:25:03.409943  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:25:03.409976  543705 memory.go:184] no items to output this cycle
E0320 09:25:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:25:13.409822  543705 memory.go:191] Add success.
I0320 09:25:13.409830  543705 cpu.go:282] Add success.
W0320 09:25:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:25:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:25:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:25:13.420094  543705 net.go:648] Add success.
I0320 09:25:13.422593  543705 net.go:770] primary dev: ETH0
I0320 09:25:13.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:25:13.422623  543705 net.go:698] Add success.
I0320 09:25:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:25:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:25:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 09:25:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:25:14.456775  543705 disk_worker.go:494] system disk:vda1
I0320 09:25:14.456804  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:25:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:25:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:25:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:25:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:25:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:25:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:25:23.409803  543705 memory.go:184] no items to output this cycle
I0320 09:25:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 09:25:23.632298  543705 disk_info.go:125] begin check local disk info of client
I0320 09:25:23.634899  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:25:23.634905  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab000 0xc0001ab040]
E0320 09:25:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:25:33.409809  543705 memory.go:184] no items to output this cycle
I0320 09:25:33.409822  543705 cpu.go:275] no items to output this cycle
E0320 09:25:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:25:43.409802  543705 cpu.go:282] Add success.
I0320 09:25:43.409803  543705 memory.go:191] Add success.
I0320 09:25:43.419902  543705 net.go:648] Add success.
I0320 09:25:43.422968  543705 net.go:770] primary dev: ETH0
I0320 09:25:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:25:43.422994  543705 net.go:698] Add success.
I0320 09:25:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:25:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:25:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:25:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:25:53.409785  543705 memory.go:184] no items to output this cycle
I0320 09:25:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 09:26:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:26:03.409778  543705 memory.go:184] no items to output this cycle
I0320 09:26:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 09:26:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:26:13.409930  543705 memory.go:191] Add success.
I0320 09:26:13.409941  543705 cpu.go:282] Add success.
W0320 09:26:13.409974  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:26:13.409987  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:26:13.409991  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:26:13.419722  543705 net.go:648] Add success.
I0320 09:26:13.422274  543705 net.go:770] primary dev: ETH0
I0320 09:26:13.422287  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:26:13.422298  543705 net.go:698] Add success.
I0320 09:26:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:26:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:26:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 09:26:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:26:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 09:26:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:26:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:26:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:26:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:26:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:26:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:26:23.409771  543705 memory.go:184] no items to output this cycle
I0320 09:26:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 09:26:23.635324  543705 disk_info.go:125] begin check local disk info of client
I0320 09:26:23.637854  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:26:23.637859  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c57c0 0xc0000c5800]
E0320 09:26:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:26:33.409777  543705 memory.go:184] no items to output this cycle
I0320 09:26:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:26:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:26:43.409790  543705 memory.go:191] Add success.
I0320 09:26:43.409812  543705 cpu.go:282] Add success.
I0320 09:26:43.420058  543705 net.go:648] Add success.
I0320 09:26:43.422645  543705 net.go:770] primary dev: ETH0
I0320 09:26:43.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:26:43.422671  543705 net.go:698] Add success.
I0320 09:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:26:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:26:53.409785  543705 memory.go:184] no items to output this cycle
I0320 09:26:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 09:27:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:27:03.409779  543705 memory.go:184] no items to output this cycle
I0320 09:27:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 09:27:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:27:13.409788  543705 memory.go:191] Add success.
I0320 09:27:13.409791  543705 cpu.go:282] Add success.
W0320 09:27:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:27:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:27:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:27:13.420169  543705 net.go:648] Add success.
I0320 09:27:13.423002  543705 net.go:770] primary dev: ETH0
I0320 09:27:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:27:13.423026  543705 net.go:698] Add success.
I0320 09:27:13.429274  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 09:27:13.452783  543705 event_worker.go:152] Polling the log file for events...
I0320 09:27:13.468972  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c3ee894b-4451-48a8-944a-128c0f1c1b23","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:27:13.469003  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 09:27:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:27:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 09:27:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:27:14.455842  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:27:14.455851  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:27:14.455856  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:27:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 09:27:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:27:15.457009  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:27:15.457024  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:27:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:27:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:27:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:27:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:27:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:27:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:27:23.409773  543705 memory.go:184] no items to output this cycle
I0320 09:27:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 09:27:23.637936  543705 disk_info.go:125] begin check local disk info of client
I0320 09:27:23.640450  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:27:23.640456  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052e0c0 0xc00052e100]
E0320 09:27:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:27:33.409778  543705 memory.go:184] no items to output this cycle
I0320 09:27:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 09:27:38.409740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:27:38.409746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:27:43.410610  543705 memory.go:191] Add success.
I0320 09:27:43.409793  543705 cpu.go:282] Add success.
I0320 09:27:43.420312  543705 net.go:648] Add success.
I0320 09:27:43.423122  543705 net.go:770] primary dev: ETH0
I0320 09:27:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:27:43.423147  543705 net.go:698] Add success.
I0320 09:27:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:27:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:27:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:27:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:27:53.409770  543705 memory.go:184] no items to output this cycle
I0320 09:27:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 09:28:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:28:03.409779  543705 memory.go:184] no items to output this cycle
I0320 09:28:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 09:28:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:28:13.409777  543705 memory.go:191] Add success.
W0320 09:28:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 09:28:13.409813  543705 cpu.go:282] Add success.
W0320 09:28:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:28:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:28:13.420440  543705 net.go:648] Add success.
I0320 09:28:13.423103  543705 net.go:770] primary dev: ETH0
I0320 09:28:13.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:28:13.423126  543705 net.go:698] Add success.
I0320 09:28:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:28:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:28:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 09:28:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:28:14.456525  543705 disk_worker.go:494] system disk:vda1
I0320 09:28:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:28:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:28:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:28:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:28:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:28:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:28:23.409772  543705 memory.go:184] no items to output this cycle
I0320 09:28:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 09:28:23.641337  543705 disk_info.go:125] begin check local disk info of client
I0320 09:28:23.643935  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:28:23.643940  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7600 0xc0003b7640]
E0320 09:28:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:28:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 09:28:33.409793  543705 memory.go:184] no items to output this cycle
E0320 09:28:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:28:43.409798  543705 memory.go:191] Add success.
I0320 09:28:43.409808  543705 cpu.go:282] Add success.
I0320 09:28:43.419875  543705 net.go:648] Add success.
I0320 09:28:43.422371  543705 net.go:770] primary dev: ETH0
I0320 09:28:43.422386  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:28:43.422400  543705 net.go:698] Add success.
I0320 09:28:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:28:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:28:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:28:53.409772  543705 memory.go:184] no items to output this cycle
I0320 09:28:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 09:29:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:29:03.409768  543705 memory.go:184] no items to output this cycle
I0320 09:29:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 09:29:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:29:13.409826  543705 memory.go:191] Add success.
I0320 09:29:13.409835  543705 cpu.go:282] Add success.
W0320 09:29:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:29:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:29:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:29:13.420101  543705 net.go:648] Add success.
I0320 09:29:13.422837  543705 net.go:770] primary dev: ETH0
I0320 09:29:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:29:13.422867  543705 net.go:698] Add success.
I0320 09:29:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:29:14.455364  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:29:14.455382  543705 disk_worker.go:708] disk space is not compliant
W0320 09:29:14.455398  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:29:14.456973  543705 disk_worker.go:494] system disk:vda1
I0320 09:29:14.457001  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:29:15.456023  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:29:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:29:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:29:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:29:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:29:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:29:23.409800  543705 memory.go:184] no items to output this cycle
I0320 09:29:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 09:29:23.644416  543705 disk_info.go:125] begin check local disk info of client
I0320 09:29:23.646935  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:29:23.646941  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b100 0xc00007b140]
E0320 09:29:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:29:33.409786  543705 memory.go:184] no items to output this cycle
I0320 09:29:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 09:29:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:29:43.409791  543705 memory.go:191] Add success.
I0320 09:29:43.409794  543705 cpu.go:282] Add success.
I0320 09:29:43.419887  543705 net.go:648] Add success.
I0320 09:29:43.422473  543705 net.go:770] primary dev: ETH0
I0320 09:29:43.422486  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:29:43.422499  543705 net.go:698] Add success.
I0320 09:29:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:29:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:29:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:29:53.409802  543705 memory.go:184] no items to output this cycle
I0320 09:29:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 09:30:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:30:03.409795  543705 memory.go:184] no items to output this cycle
I0320 09:30:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 09:30:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:30:13.409788  543705 memory.go:191] Add success.
I0320 09:30:13.409789  543705 cpu.go:282] Add success.
W0320 09:30:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:30:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:30:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:30:13.420129  543705 net.go:648] Add success.
I0320 09:30:13.422650  543705 net.go:770] primary dev: ETH0
I0320 09:30:13.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:30:13.422676  543705 net.go:698] Add success.
I0320 09:30:13.469362  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e99a6da0-96ba-4fe4-8295-e602eeed6c56","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:30:13.469395  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:30:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:30:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:30:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 09:30:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:30:14.456743  543705 disk_worker.go:494] system disk:vda1
I0320 09:30:14.456772  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:30:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:30:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:30:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:30:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:30:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:30:23.409769  543705 memory.go:184] no items to output this cycle
I0320 09:30:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 09:30:23.647370  543705 disk_info.go:125] begin check local disk info of client
I0320 09:30:23.649925  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:30:23.649931  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329000 0xc000329040]
E0320 09:30:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:30:33.409800  543705 memory.go:184] no items to output this cycle
I0320 09:30:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 09:30:38.410797  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:30:38.410804  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:30:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:30:43.410579  543705 memory.go:191] Add success.
I0320 09:30:43.409808  543705 cpu.go:282] Add success.
I0320 09:30:43.420351  543705 net.go:648] Add success.
I0320 09:30:43.423522  543705 net.go:770] primary dev: ETH0
I0320 09:30:43.423536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:30:43.423550  543705 net.go:698] Add success.
I0320 09:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:30:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:30:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:30:53.409788  543705 memory.go:184] no items to output this cycle
I0320 09:30:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 09:31:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:31:03.409767  543705 memory.go:184] no items to output this cycle
I0320 09:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 09:31:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:31:13.409807  543705 memory.go:191] Add success.
I0320 09:31:13.409819  543705 cpu.go:282] Add success.
W0320 09:31:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:31:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:31:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:31:13.420092  543705 net.go:648] Add success.
I0320 09:31:13.422881  543705 net.go:770] primary dev: ETH0
I0320 09:31:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:31:13.422906  543705 net.go:698] Add success.
I0320 09:31:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:31:14.455350  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:31:14.455364  543705 disk_worker.go:708] disk space is not compliant
W0320 09:31:14.455367  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:31:14.456720  543705 disk_worker.go:494] system disk:vda1
I0320 09:31:14.456764  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:31:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:31:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:31:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:31:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:31:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:31:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:31:23.409765  543705 memory.go:184] no items to output this cycle
I0320 09:31:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 09:31:23.650011  543705 disk_info.go:125] begin check local disk info of client
I0320 09:31:23.652604  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:31:23.652609  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b340 0xc00007b380]
E0320 09:31:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:31:33.409809  543705 memory.go:184] no items to output this cycle
I0320 09:31:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 09:31:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:31:43.409813  543705 memory.go:191] Add success.
I0320 09:31:43.409821  543705 cpu.go:282] Add success.
I0320 09:31:43.419932  543705 net.go:648] Add success.
I0320 09:31:43.422455  543705 net.go:770] primary dev: ETH0
I0320 09:31:43.422470  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:31:43.422482  543705 net.go:698] Add success.
I0320 09:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:31:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:31:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:31:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:31:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 09:31:53.409785  543705 memory.go:184] no items to output this cycle
E0320 09:32:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:32:03.409795  543705 memory.go:184] no items to output this cycle
I0320 09:32:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 09:32:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:32:13.409774  543705 memory.go:191] Add success.
W0320 09:32:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 09:32:13.409806  543705 cpu.go:282] Add success.
W0320 09:32:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:32:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:32:13.420031  543705 net.go:648] Add success.
I0320 09:32:13.422814  543705 net.go:770] primary dev: ETH0
I0320 09:32:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:32:13.422838  543705 net.go:698] Add success.
W0320 09:32:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:32:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 09:32:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:32:14.456627  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:32:14.456635  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:32:14.456641  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:32:14.457406  543705 disk_worker.go:494] system disk:vda1
I0320 09:32:14.457435  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:32:15.456957  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:32:15.456972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:32:16.458092  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:32:16.458155  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:32:16.458174  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:32:16.458171  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:32:16.472557  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:32:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:32:23.409782  543705 memory.go:184] no items to output this cycle
I0320 09:32:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 09:32:23.653377  543705 disk_info.go:125] begin check local disk info of client
I0320 09:32:23.655860  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:32:23.655865  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035b4c0 0xc00035b500]
E0320 09:32:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:32:33.409796  543705 memory.go:184] no items to output this cycle
I0320 09:32:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 09:32:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:32:43.409779  543705 memory.go:191] Add success.
I0320 09:32:43.409808  543705 cpu.go:282] Add success.
I0320 09:32:43.419870  543705 net.go:648] Add success.
I0320 09:32:43.422983  543705 net.go:770] primary dev: ETH0
I0320 09:32:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:32:43.423009  543705 net.go:698] Add success.
I0320 09:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:32:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:32:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:32:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:32:53.409767  543705 memory.go:184] no items to output this cycle
I0320 09:32:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 09:33:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:33:03.409783  543705 cpu.go:275] no items to output this cycle
I0320 09:33:03.409788  543705 memory.go:184] no items to output this cycle
E0320 09:33:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:33:13.409795  543705 memory.go:191] Add success.
I0320 09:33:13.409795  543705 cpu.go:282] Add success.
W0320 09:33:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:33:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:33:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:33:13.420177  543705 net.go:648] Add success.
I0320 09:33:13.422912  543705 net.go:770] primary dev: ETH0
I0320 09:33:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:33:13.422939  543705 net.go:698] Add success.
I0320 09:33:13.554359  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c1a86d5-dc33-40ac-8844-0862c1b78cc8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:33:13.554392  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:33:14.453977  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:33:14.454221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:33:14.454231  543705 disk_worker.go:708] disk space is not compliant
W0320 09:33:14.454233  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:33:14.455756  543705 disk_worker.go:494] system disk:vda1
I0320 09:33:14.455785  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:33:15.455995  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:33:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:33:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:33:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:33:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:33:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:33:23.409788  543705 memory.go:184] no items to output this cycle
I0320 09:33:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 09:33:23.656387  543705 disk_info.go:125] begin check local disk info of client
I0320 09:33:23.658928  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:33:23.658934  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0320 09:33:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:33:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 09:33:33.409804  543705 memory.go:184] no items to output this cycle
I0320 09:33:38.411855  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:33:38.411861  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:33:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:33:43.410674  543705 memory.go:191] Add success.
I0320 09:33:43.409831  543705 cpu.go:282] Add success.
I0320 09:33:43.420427  543705 net.go:648] Add success.
I0320 09:33:43.423252  543705 net.go:770] primary dev: ETH0
I0320 09:33:43.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:33:43.423278  543705 net.go:698] Add success.
I0320 09:33:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:33:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:33:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:33:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:33:53.409786  543705 memory.go:184] no items to output this cycle
I0320 09:33:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 09:34:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:34:03.409807  543705 memory.go:184] no items to output this cycle
I0320 09:34:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 09:34:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:34:13.409785  543705 memory.go:191] Add success.
I0320 09:34:13.409803  543705 cpu.go:282] Add success.
W0320 09:34:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:34:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:34:13.420115  543705 net.go:648] Add success.
I0320 09:34:13.423127  543705 net.go:770] primary dev: ETH0
I0320 09:34:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:34:13.423161  543705 net.go:698] Add success.
I0320 09:34:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:34:14.455309  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:34:14.455319  543705 disk_worker.go:708] disk space is not compliant
W0320 09:34:14.455322  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:34:14.456671  543705 disk_worker.go:494] system disk:vda1
I0320 09:34:14.456713  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:34:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:34:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:34:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:34:16.472536  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:34:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:34:23.409783  543705 memory.go:184] no items to output this cycle
I0320 09:34:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 09:34:23.659415  543705 disk_info.go:125] begin check local disk info of client
I0320 09:34:23.661987  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:34:23.661992  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493540 0xc000493580]
E0320 09:34:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:34:33.409773  543705 memory.go:184] no items to output this cycle
I0320 09:34:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 09:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:34:43.409799  543705 memory.go:191] Add success.
I0320 09:34:43.409801  543705 cpu.go:282] Add success.
I0320 09:34:43.419966  543705 net.go:648] Add success.
I0320 09:34:43.423297  543705 net.go:770] primary dev: ETH0
I0320 09:34:43.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:34:43.423328  543705 net.go:698] Add success.
I0320 09:34:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:34:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:34:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:34:53.409768  543705 memory.go:184] no items to output this cycle
I0320 09:34:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 09:35:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:35:03.409773  543705 memory.go:184] no items to output this cycle
I0320 09:35:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:35:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:35:13.409786  543705 memory.go:191] Add success.
I0320 09:35:13.409805  543705 cpu.go:282] Add success.
W0320 09:35:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:35:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:35:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:35:13.420594  543705 net.go:648] Add success.
I0320 09:35:13.423240  543705 net.go:770] primary dev: ETH0
I0320 09:35:13.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:35:13.423265  543705 net.go:698] Add success.
I0320 09:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:35:14.455425  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:35:14.455438  543705 disk_worker.go:708] disk space is not compliant
W0320 09:35:14.455442  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:35:14.457030  543705 disk_worker.go:494] system disk:vda1
I0320 09:35:14.457059  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:35:15.454995  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:35:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:35:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:35:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:35:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:35:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:35:23.409788  543705 memory.go:184] no items to output this cycle
I0320 09:35:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 09:35:23.662725  543705 disk_info.go:125] begin check local disk info of client
I0320 09:35:23.665252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:35:23.665258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0320 09:35:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:35:33.409781  543705 memory.go:184] no items to output this cycle
I0320 09:35:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 09:35:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:35:43.409809  543705 memory.go:191] Add success.
I0320 09:35:43.409815  543705 cpu.go:282] Add success.
I0320 09:35:43.419853  543705 net.go:648] Add success.
I0320 09:35:43.422786  543705 net.go:770] primary dev: ETH0
I0320 09:35:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:35:43.422831  543705 net.go:698] Add success.
I0320 09:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:35:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:35:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:35:53.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:35:53.410393  543705 memory.go:184] no items to output this cycle
I0320 09:35:53.410405  543705 cpu.go:275] no items to output this cycle
E0320 09:36:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:36:03.409780  543705 memory.go:184] no items to output this cycle
I0320 09:36:03.409789  543705 cpu.go:275] no items to output this cycle
W0320 09:36:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:36:13.409725  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:36:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:36:13.409791  543705 cpu.go:282] Add success.
E0320 09:36:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:36:13.409837  543705 memory.go:191] Add success.
I0320 09:36:13.420179  543705 net.go:648] Add success.
I0320 09:36:13.422974  543705 net.go:770] primary dev: ETH0
I0320 09:36:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:36:13.423000  543705 net.go:698] Add success.
I0320 09:36:13.469801  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"afbef233-f1ff-45f4-b3bb-5f4fd620bf63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:36:13.469832  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:36:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:36:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:36:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 09:36:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:36:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 09:36:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:36:15.455650  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:36:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:36:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:36:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:36:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:36:23.409794  543705 memory.go:184] no items to output this cycle
I0320 09:36:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 09:36:23.665669  543705 disk_info.go:125] begin check local disk info of client
I0320 09:36:23.668240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:36:23.668245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa680 0xc0001fa6c0]
E0320 09:36:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:36:33.409799  543705 memory.go:184] no items to output this cycle
I0320 09:36:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 09:36:38.412852  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:36:38.412859  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:36:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:36:43.410764  543705 memory.go:191] Add success.
I0320 09:36:43.409803  543705 cpu.go:282] Add success.
I0320 09:36:43.420603  543705 net.go:648] Add success.
I0320 09:36:43.423443  543705 net.go:770] primary dev: ETH0
I0320 09:36:43.423456  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:36:43.423470  543705 net.go:698] Add success.
I0320 09:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:36:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:36:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:36:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:36:53.409779  543705 cpu.go:275] no items to output this cycle
I0320 09:36:53.409789  543705 memory.go:184] no items to output this cycle
E0320 09:37:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:37:03.409794  543705 memory.go:184] no items to output this cycle
I0320 09:37:03.409809  543705 cpu.go:275] no items to output this cycle
W0320 09:37:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:37:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:37:13.409732  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:37:13.409782  543705 cpu.go:282] Add success.
E0320 09:37:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:37:13.409854  543705 memory.go:191] Add success.
I0320 09:37:13.419998  543705 net.go:648] Add success.
I0320 09:37:13.423041  543705 net.go:770] primary dev: ETH0
I0320 09:37:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:37:13.423071  543705 net.go:698] Add success.
I0320 09:37:13.453627  543705 event_worker.go:152] Polling the log file for events...
W0320 09:37:14.455283  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:37:14.455361  543705 disk_worker.go:708] disk space is not compliant
W0320 09:37:14.455365  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:37:14.456555  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:37:14.456564  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:37:14.456570  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:37:14.457444  543705 disk_worker.go:494] system disk:vda1
I0320 09:37:14.457486  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:37:15.456779  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:37:15.456789  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:37:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:37:16.458144  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:37:16.458169  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:37:16.458191  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:37:16.472583  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:37:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:37:23.409803  543705 memory.go:184] no items to output this cycle
I0320 09:37:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 09:37:23.668325  543705 disk_info.go:125] begin check local disk info of client
I0320 09:37:23.670848  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:37:23.670854  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6040 0xc0001c60c0]
E0320 09:37:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:37:33.409780  543705 memory.go:184] no items to output this cycle
I0320 09:37:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 09:37:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:37:43.409814  543705 memory.go:191] Add success.
I0320 09:37:43.409821  543705 cpu.go:282] Add success.
I0320 09:37:43.419874  543705 net.go:648] Add success.
I0320 09:37:43.422790  543705 net.go:770] primary dev: ETH0
I0320 09:37:43.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:37:43.422817  543705 net.go:698] Add success.
I0320 09:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:37:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:37:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:37:53.410230  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:37:53.410256  543705 memory.go:184] no items to output this cycle
I0320 09:37:53.410284  543705 cpu.go:275] no items to output this cycle
E0320 09:38:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:38:03.409792  543705 memory.go:184] no items to output this cycle
I0320 09:38:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 09:38:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:38:13.409820  543705 memory.go:191] Add success.
I0320 09:38:13.409823  543705 cpu.go:282] Add success.
W0320 09:38:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:38:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:38:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:38:13.420164  543705 net.go:648] Add success.
I0320 09:38:13.422986  543705 net.go:770] primary dev: ETH0
I0320 09:38:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:38:13.423017  543705 net.go:698] Add success.
I0320 09:38:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:38:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:38:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 09:38:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:38:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 09:38:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:38:15.456016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:38:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:38:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:38:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:38:16.472549  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:38:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:38:23.409799  543705 memory.go:184] no items to output this cycle
I0320 09:38:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 09:38:23.671491  543705 disk_info.go:125] begin check local disk info of client
I0320 09:38:23.674040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:38:23.674046  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3680 0xc0002b36c0]
I0320 09:38:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 09:38:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:38:33.409816  543705 memory.go:184] no items to output this cycle
E0320 09:38:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:38:43.409787  543705 memory.go:191] Add success.
I0320 09:38:43.409811  543705 cpu.go:282] Add success.
I0320 09:38:43.419839  543705 net.go:648] Add success.
I0320 09:38:43.422644  543705 net.go:770] primary dev: ETH0
I0320 09:38:43.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:38:43.422682  543705 net.go:698] Add success.
I0320 09:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:38:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:38:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:38:53.409797  543705 memory.go:184] no items to output this cycle
I0320 09:38:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 09:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:39:03.409782  543705 memory.go:184] no items to output this cycle
I0320 09:39:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 09:39:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:39:13.409781  543705 memory.go:191] Add success.
W0320 09:39:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 09:39:13.409813  543705 cpu.go:282] Add success.
W0320 09:39:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:39:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:39:13.420142  543705 net.go:648] Add success.
I0320 09:39:13.423078  543705 net.go:770] primary dev: ETH0
I0320 09:39:13.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:39:13.423103  543705 net.go:698] Add success.
I0320 09:39:13.487646  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e0b149c8-470b-458f-9f31-09760360f7eb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:39:13.487681  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:39:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:39:14.455088  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:39:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0320 09:39:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:39:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 09:39:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:39:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:39:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:39:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:39:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:39:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:39:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 09:39:23.409789  543705 memory.go:184] no items to output this cycle
I0320 09:39:23.674728  543705 disk_info.go:125] begin check local disk info of client
I0320 09:39:23.677297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:39:23.677303  543705 disk_info.go:196] parse disk info done, disk is : [0xc000299380 0xc0002993c0]
E0320 09:39:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:39:33.409783  543705 memory.go:184] no items to output this cycle
I0320 09:39:33.409815  543705 cpu.go:275] no items to output this cycle
I0320 09:39:38.413858  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:39:38.413865  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:39:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:39:43.410626  543705 memory.go:191] Add success.
I0320 09:39:43.409806  543705 cpu.go:282] Add success.
I0320 09:39:43.420411  543705 net.go:648] Add success.
I0320 09:39:43.423199  543705 net.go:770] primary dev: ETH0
I0320 09:39:43.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:39:43.423231  543705 net.go:698] Add success.
I0320 09:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:39:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:39:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:39:53.409811  543705 memory.go:184] no items to output this cycle
I0320 09:39:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 09:40:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:40:03.409772  543705 memory.go:184] no items to output this cycle
I0320 09:40:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 09:40:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:40:13.409825  543705 memory.go:191] Add success.
I0320 09:40:13.409833  543705 cpu.go:282] Add success.
W0320 09:40:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:40:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:40:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:40:13.420128  543705 net.go:648] Add success.
I0320 09:40:13.422852  543705 net.go:770] primary dev: ETH0
I0320 09:40:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:40:13.422878  543705 net.go:698] Add success.
I0320 09:40:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:40:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:40:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 09:40:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:40:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 09:40:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:40:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:40:16.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:40:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:40:16.472492  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:40:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:40:23.409790  543705 memory.go:184] no items to output this cycle
I0320 09:40:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 09:40:23.677672  543705 disk_info.go:125] begin check local disk info of client
I0320 09:40:23.680218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:40:23.680223  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b6c0 0xc00027b700]
E0320 09:40:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:40:33.409803  543705 memory.go:184] no items to output this cycle
I0320 09:40:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 09:40:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:40:43.409807  543705 memory.go:191] Add success.
I0320 09:40:43.409810  543705 cpu.go:282] Add success.
I0320 09:40:43.419996  543705 net.go:648] Add success.
I0320 09:40:43.423325  543705 net.go:770] primary dev: ETH0
I0320 09:40:43.423339  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:40:43.423354  543705 net.go:698] Add success.
I0320 09:40:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:40:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:40:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:40:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:40:53.409809  543705 memory.go:184] no items to output this cycle
I0320 09:40:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 09:41:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:41:03.409818  543705 memory.go:184] no items to output this cycle
I0320 09:41:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 09:41:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:41:13.409818  543705 memory.go:191] Add success.
I0320 09:41:13.409823  543705 cpu.go:282] Add success.
W0320 09:41:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:41:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:41:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:41:13.420137  543705 net.go:648] Add success.
I0320 09:41:13.422658  543705 net.go:770] primary dev: ETH0
I0320 09:41:13.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:41:13.422691  543705 net.go:698] Add success.
I0320 09:41:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:41:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:41:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 09:41:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:41:14.456493  543705 disk_worker.go:494] system disk:vda1
I0320 09:41:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:41:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:41:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:41:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:41:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:41:16.472525  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:41:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:41:23.409776  543705 memory.go:184] no items to output this cycle
I0320 09:41:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 09:41:23.681551  543705 disk_info.go:125] begin check local disk info of client
I0320 09:41:23.684110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:41:23.684115  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c55c0 0xc0000c5600]
E0320 09:41:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:41:33.409819  543705 memory.go:184] no items to output this cycle
I0320 09:41:33.409836  543705 cpu.go:275] no items to output this cycle
E0320 09:41:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:41:43.409798  543705 cpu.go:282] Add success.
I0320 09:41:43.409803  543705 memory.go:191] Add success.
I0320 09:41:43.419970  543705 net.go:648] Add success.
I0320 09:41:43.422733  543705 net.go:770] primary dev: ETH0
I0320 09:41:43.422746  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:41:43.422758  543705 net.go:698] Add success.
I0320 09:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:41:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:41:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:41:53.409784  543705 memory.go:184] no items to output this cycle
I0320 09:41:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:42:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:42:03.409779  543705 memory.go:184] no items to output this cycle
I0320 09:42:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:42:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:42:13.409806  543705 memory.go:191] Add success.
I0320 09:42:13.409815  543705 cpu.go:282] Add success.
W0320 09:42:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:42:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:42:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:42:13.420116  543705 net.go:648] Add success.
I0320 09:42:13.422944  543705 net.go:770] primary dev: ETH0
I0320 09:42:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:42:13.422976  543705 net.go:698] Add success.
I0320 09:42:13.520710  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"80bd7b86-16d8-4bfe-bb9a-766b8c1730d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:42:13.520742  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 09:42:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:42:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 09:42:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:42:14.456154  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:42:14.456164  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:42:14.456170  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:42:14.456442  543705 disk_worker.go:494] system disk:vda1
I0320 09:42:14.456497  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:42:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:42:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:42:16.458090  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:42:16.458149  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:42:16.458167  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:42:16.458189  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:42:16.472564  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:42:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:42:23.409769  543705 memory.go:184] no items to output this cycle
I0320 09:42:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 09:42:23.684584  543705 disk_info.go:125] begin check local disk info of client
I0320 09:42:23.687052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:42:23.687058  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cfa00 0xc0003cfa40]
E0320 09:42:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:42:33.409771  543705 memory.go:184] no items to output this cycle
I0320 09:42:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 09:42:38.414863  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:42:38.414871  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:42:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:42:43.410819  543705 memory.go:191] Add success.
I0320 09:42:43.409827  543705 cpu.go:282] Add success.
I0320 09:42:43.420565  543705 net.go:648] Add success.
I0320 09:42:43.423541  543705 net.go:770] primary dev: ETH0
I0320 09:42:43.423556  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:42:43.423570  543705 net.go:698] Add success.
I0320 09:42:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:42:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:42:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:42:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:42:53.409765  543705 memory.go:184] no items to output this cycle
I0320 09:42:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 09:43:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:43:03.409800  543705 memory.go:184] no items to output this cycle
I0320 09:43:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 09:43:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:43:13.409782  543705 memory.go:191] Add success.
I0320 09:43:13.409803  543705 cpu.go:282] Add success.
W0320 09:43:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:43:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:43:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:43:13.420146  543705 net.go:648] Add success.
I0320 09:43:13.423036  543705 net.go:770] primary dev: ETH0
I0320 09:43:13.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:43:13.423067  543705 net.go:698] Add success.
I0320 09:43:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:43:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:43:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 09:43:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:43:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 09:43:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:43:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:43:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:43:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:43:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:43:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:43:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:43:23.409771  543705 memory.go:184] no items to output this cycle
I0320 09:43:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 09:43:23.687588  543705 disk_info.go:125] begin check local disk info of client
I0320 09:43:23.690166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:43:23.690171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004933c0 0xc000493400]
E0320 09:43:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:43:33.409811  543705 memory.go:184] no items to output this cycle
I0320 09:43:33.409826  543705 cpu.go:275] no items to output this cycle
E0320 09:43:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:43:43.409791  543705 memory.go:191] Add success.
I0320 09:43:43.409810  543705 cpu.go:282] Add success.
I0320 09:43:43.419972  543705 net.go:648] Add success.
I0320 09:43:43.422894  543705 net.go:770] primary dev: ETH0
I0320 09:43:43.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:43:43.422926  543705 net.go:698] Add success.
I0320 09:43:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:43:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:43:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:43:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:43:53.409782  543705 memory.go:184] no items to output this cycle
I0320 09:43:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:44:03.409777  543705 memory.go:184] no items to output this cycle
I0320 09:44:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 09:44:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:44:13.409777  543705 memory.go:191] Add success.
W0320 09:44:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 09:44:13.409802  543705 cpu.go:282] Add success.
W0320 09:44:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:44:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:44:13.420187  543705 net.go:648] Add success.
I0320 09:44:13.422782  543705 net.go:770] primary dev: ETH0
I0320 09:44:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:44:13.422808  543705 net.go:698] Add success.
I0320 09:44:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:44:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:44:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 09:44:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:44:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 09:44:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:44:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:44:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:44:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:44:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:44:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:44:23.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:44:23.409885  543705 cpu.go:275] no items to output this cycle
I0320 09:44:23.409896  543705 memory.go:184] no items to output this cycle
I0320 09:44:23.691035  543705 disk_info.go:125] begin check local disk info of client
I0320 09:44:23.693564  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:44:23.693570  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c14c0 0xc0003c1500]
E0320 09:44:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:44:33.409796  543705 memory.go:184] no items to output this cycle
I0320 09:44:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 09:44:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:44:43.409825  543705 memory.go:191] Add success.
I0320 09:44:43.409835  543705 cpu.go:282] Add success.
I0320 09:44:43.420022  543705 net.go:648] Add success.
I0320 09:44:43.422673  543705 net.go:770] primary dev: ETH0
I0320 09:44:43.422686  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:44:43.422699  543705 net.go:698] Add success.
I0320 09:44:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:44:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:44:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:44:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:44:53.409794  543705 memory.go:184] no items to output this cycle
I0320 09:44:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 09:45:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:45:03.409764  543705 memory.go:184] no items to output this cycle
I0320 09:45:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 09:45:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:45:13.409816  543705 memory.go:191] Add success.
I0320 09:45:13.409820  543705 cpu.go:282] Add success.
W0320 09:45:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:45:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:45:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:45:13.420135  543705 net.go:648] Add success.
I0320 09:45:13.422892  543705 net.go:770] primary dev: ETH0
I0320 09:45:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:45:13.422920  543705 net.go:698] Add success.
I0320 09:45:13.469164  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"94dec404-846f-4705-9345-00a41e4242be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:45:13.469195  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:45:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:45:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:45:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 09:45:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:45:14.456504  543705 disk_worker.go:494] system disk:vda1
I0320 09:45:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:45:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:45:16.458011  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:45:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:45:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:45:16.472564  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:45:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:45:23.409798  543705 memory.go:184] no items to output this cycle
I0320 09:45:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 09:45:23.693663  543705 disk_info.go:125] begin check local disk info of client
I0320 09:45:23.696225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:45:23.696231  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049f740 0xc00049f780]
E0320 09:45:33.409940  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:45:33.410027  543705 memory.go:184] no items to output this cycle
I0320 09:45:33.410029  543705 cpu.go:275] no items to output this cycle
I0320 09:45:38.415856  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:45:38.415864  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:45:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:45:43.410630  543705 memory.go:191] Add success.
I0320 09:45:43.409810  543705 cpu.go:282] Add success.
I0320 09:45:43.420408  543705 net.go:648] Add success.
I0320 09:45:43.423037  543705 net.go:770] primary dev: ETH0
I0320 09:45:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:45:43.423062  543705 net.go:698] Add success.
I0320 09:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:45:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:45:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:45:53.409798  543705 memory.go:184] no items to output this cycle
I0320 09:45:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 09:46:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:46:03.409776  543705 memory.go:184] no items to output this cycle
I0320 09:46:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 09:46:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:46:13.409811  543705 memory.go:191] Add success.
I0320 09:46:13.409819  543705 cpu.go:282] Add success.
W0320 09:46:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:46:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:46:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:46:13.420266  543705 net.go:648] Add success.
I0320 09:46:13.423111  543705 net.go:770] primary dev: ETH0
I0320 09:46:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:46:13.423136  543705 net.go:698] Add success.
I0320 09:46:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:46:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:46:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 09:46:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:46:14.456584  543705 disk_worker.go:494] system disk:vda1
I0320 09:46:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:46:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:46:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:46:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:46:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:46:16.472528  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:46:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:46:23.409780  543705 memory.go:184] no items to output this cycle
I0320 09:46:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 09:46:23.697599  543705 disk_info.go:125] begin check local disk info of client
I0320 09:46:23.700170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:46:23.700176  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046bb40 0xc00046bb80]
E0320 09:46:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:46:33.409765  543705 memory.go:184] no items to output this cycle
I0320 09:46:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 09:46:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:46:43.409812  543705 memory.go:191] Add success.
I0320 09:46:43.409814  543705 cpu.go:282] Add success.
I0320 09:46:43.419964  543705 net.go:648] Add success.
I0320 09:46:43.422874  543705 net.go:770] primary dev: ETH0
I0320 09:46:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:46:43.422899  543705 net.go:698] Add success.
I0320 09:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:46:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:46:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:46:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:46:53.409764  543705 memory.go:184] no items to output this cycle
I0320 09:46:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 09:47:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:47:03.409765  543705 memory.go:184] no items to output this cycle
I0320 09:47:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 09:47:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:47:13.409783  543705 memory.go:191] Add success.
I0320 09:47:13.409805  543705 cpu.go:282] Add success.
W0320 09:47:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:47:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:47:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:47:13.420173  543705 net.go:648] Add success.
I0320 09:47:13.422869  543705 net.go:770] primary dev: ETH0
I0320 09:47:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:47:13.422894  543705 net.go:698] Add success.
I0320 09:47:13.453440  543705 event_worker.go:152] Polling the log file for events...
W0320 09:47:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:47:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 09:47:14.455156  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:47:14.456881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:47:14.456889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:47:14.456895  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:47:14.456964  543705 disk_worker.go:494] system disk:vda1
I0320 09:47:14.457007  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:47:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:47:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:47:16.458097  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:47:16.458173  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0320 09:47:16.458170  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:47:16.458193  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:47:16.472592  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:47:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:47:23.409787  543705 memory.go:184] no items to output this cycle
I0320 09:47:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 09:47:23.700654  543705 disk_info.go:125] begin check local disk info of client
I0320 09:47:23.703176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:47:23.703182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c3280 0xc0004c32c0]
E0320 09:47:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:47:33.409814  543705 memory.go:184] no items to output this cycle
I0320 09:47:33.409827  543705 cpu.go:275] no items to output this cycle
E0320 09:47:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:47:43.409790  543705 memory.go:191] Add success.
I0320 09:47:43.409808  543705 cpu.go:282] Add success.
I0320 09:47:43.419973  543705 net.go:648] Add success.
I0320 09:47:43.423300  543705 net.go:770] primary dev: ETH0
I0320 09:47:43.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:47:43.423325  543705 net.go:698] Add success.
I0320 09:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:47:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:47:53.409799  543705 memory.go:184] no items to output this cycle
I0320 09:47:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 09:48:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:48:03.409777  543705 memory.go:184] no items to output this cycle
I0320 09:48:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 09:48:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:48:13.409787  543705 memory.go:191] Add success.
I0320 09:48:13.409786  543705 cpu.go:282] Add success.
W0320 09:48:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:48:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:48:13.420107  543705 net.go:648] Add success.
I0320 09:48:13.423251  543705 net.go:770] primary dev: ETH0
I0320 09:48:13.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:48:13.423275  543705 net.go:698] Add success.
I0320 09:48:13.469748  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dcd07bab-512d-427f-a97e-93b195b3e2fe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:48:13.469782  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:48:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:48:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:48:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 09:48:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:48:14.456782  543705 disk_worker.go:494] system disk:vda1
I0320 09:48:14.456813  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:48:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:48:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:48:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:48:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:48:16.472564  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:48:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:48:23.409771  543705 memory.go:184] no items to output this cycle
I0320 09:48:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 09:48:23.703649  543705 disk_info.go:125] begin check local disk info of client
I0320 09:48:23.706244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:48:23.706250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8140 0xc0004a8180]
E0320 09:48:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:48:33.409771  543705 memory.go:184] no items to output this cycle
I0320 09:48:33.409804  543705 cpu.go:275] no items to output this cycle
I0320 09:48:38.416861  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:48:38.416868  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:48:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:48:43.410600  543705 memory.go:191] Add success.
I0320 09:48:43.409808  543705 cpu.go:282] Add success.
I0320 09:48:43.420385  543705 net.go:648] Add success.
I0320 09:48:43.423092  543705 net.go:770] primary dev: ETH0
I0320 09:48:43.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:48:43.423122  543705 net.go:698] Add success.
I0320 09:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:48:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:48:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:48:53.409777  543705 memory.go:184] no items to output this cycle
I0320 09:48:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 09:49:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:49:03.409775  543705 memory.go:184] no items to output this cycle
I0320 09:49:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 09:49:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:49:13.409810  543705 memory.go:191] Add success.
I0320 09:49:13.409821  543705 cpu.go:282] Add success.
W0320 09:49:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:49:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:49:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:49:13.420306  543705 net.go:648] Add success.
I0320 09:49:13.422920  543705 net.go:770] primary dev: ETH0
I0320 09:49:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:49:13.422944  543705 net.go:698] Add success.
I0320 09:49:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:49:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:49:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 09:49:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:49:14.456615  543705 disk_worker.go:494] system disk:vda1
I0320 09:49:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:49:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:49:16.458026  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:49:16.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:49:16.458146  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:49:16.472612  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:49:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:49:23.409780  543705 memory.go:184] no items to output this cycle
I0320 09:49:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 09:49:23.707683  543705 disk_info.go:125] begin check local disk info of client
I0320 09:49:23.710198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:49:23.710204  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375ac0 0xc000375b00]
E0320 09:49:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:49:33.409796  543705 memory.go:184] no items to output this cycle
I0320 09:49:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 09:49:43.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:49:43.409865  543705 memory.go:191] Add success.
I0320 09:49:43.409970  543705 cpu.go:282] Add success.
I0320 09:49:43.419718  543705 net.go:648] Add success.
I0320 09:49:43.422259  543705 net.go:770] primary dev: ETH0
I0320 09:49:43.422274  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:49:43.422287  543705 net.go:698] Add success.
I0320 09:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:49:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:49:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:49:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:49:53.409780  543705 memory.go:184] no items to output this cycle
I0320 09:49:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 09:50:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:50:03.409773  543705 memory.go:184] no items to output this cycle
I0320 09:50:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 09:50:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:50:13.409792  543705 memory.go:191] Add success.
I0320 09:50:13.409795  543705 cpu.go:282] Add success.
W0320 09:50:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:50:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:50:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:50:13.420226  543705 net.go:648] Add success.
I0320 09:50:13.422877  543705 net.go:770] primary dev: ETH0
I0320 09:50:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:50:13.422902  543705 net.go:698] Add success.
I0320 09:50:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:50:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:50:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0320 09:50:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:50:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 09:50:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:50:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:50:16.458039  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:50:16.458114  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:50:16.458153  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:50:16.472757  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:50:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:50:23.409778  543705 memory.go:184] no items to output this cycle
I0320 09:50:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 09:50:23.710726  543705 disk_info.go:125] begin check local disk info of client
I0320 09:50:23.713252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:50:23.713257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b86c0 0xc0003b8700]
E0320 09:50:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:50:33.409787  543705 memory.go:184] no items to output this cycle
I0320 09:50:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 09:50:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:50:43.409806  543705 memory.go:191] Add success.
I0320 09:50:43.409810  543705 cpu.go:282] Add success.
I0320 09:50:43.419964  543705 net.go:648] Add success.
I0320 09:50:43.422451  543705 net.go:770] primary dev: ETH0
I0320 09:50:43.422464  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:50:43.422476  543705 net.go:698] Add success.
I0320 09:50:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:50:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:50:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:50:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:50:53.409765  543705 memory.go:184] no items to output this cycle
I0320 09:50:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 09:51:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:51:03.409798  543705 memory.go:184] no items to output this cycle
I0320 09:51:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 09:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:51:13.409815  543705 memory.go:191] Add success.
I0320 09:51:13.409827  543705 cpu.go:282] Add success.
W0320 09:51:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:51:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:51:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:51:13.420130  543705 net.go:648] Add success.
I0320 09:51:13.422863  543705 net.go:770] primary dev: ETH0
I0320 09:51:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:51:13.422887  543705 net.go:698] Add success.
I0320 09:51:13.469736  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e99289a-ce33-4015-8fa1-906ad0561e1f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:51:13.469770  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:51:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:51:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:51:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 09:51:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:51:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 09:51:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:51:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:51:16.458117  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:51:16.458153  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:51:16.472621  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:51:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:51:23.409779  543705 memory.go:184] no items to output this cycle
I0320 09:51:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 09:51:23.713672  543705 disk_info.go:125] begin check local disk info of client
I0320 09:51:23.716190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:51:23.716195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004aca80 0xc0004acac0]
E0320 09:51:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:51:33.409795  543705 memory.go:184] no items to output this cycle
I0320 09:51:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 09:51:38.417846  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:51:38.417853  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:51:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:51:43.410770  543705 memory.go:191] Add success.
I0320 09:51:43.409917  543705 cpu.go:282] Add success.
I0320 09:51:43.419736  543705 net.go:648] Add success.
I0320 09:51:43.422432  543705 net.go:770] primary dev: ETH0
I0320 09:51:43.422447  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:51:43.422460  543705 net.go:698] Add success.
I0320 09:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:51:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:51:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:51:53.409777  543705 memory.go:184] no items to output this cycle
I0320 09:51:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 09:52:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:52:03.409777  543705 memory.go:184] no items to output this cycle
I0320 09:52:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 09:52:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:52:13.409781  543705 cpu.go:282] Add success.
I0320 09:52:13.409782  543705 memory.go:191] Add success.
W0320 09:52:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:52:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:52:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:52:13.420206  543705 net.go:648] Add success.
I0320 09:52:13.422911  543705 net.go:770] primary dev: ETH0
I0320 09:52:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:52:13.422936  543705 net.go:698] Add success.
W0320 09:52:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:52:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 09:52:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:52:14.455888  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:52:14.455897  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:52:14.455903  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:52:14.456628  543705 disk_worker.go:494] system disk:vda1
I0320 09:52:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:52:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:52:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:52:16.458103  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:52:16.458150  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:52:16.458181  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:52:16.458205  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:52:16.472634  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:52:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:52:23.409807  543705 memory.go:184] no items to output this cycle
I0320 09:52:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 09:52:23.716716  543705 disk_info.go:125] begin check local disk info of client
I0320 09:52:23.719251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:52:23.719256  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032e2c0 0xc00032e300]
E0320 09:52:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:52:33.409809  543705 memory.go:184] no items to output this cycle
I0320 09:52:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 09:52:43.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:52:43.409888  543705 memory.go:191] Add success.
I0320 09:52:43.409981  543705 cpu.go:282] Add success.
I0320 09:52:43.419748  543705 net.go:648] Add success.
I0320 09:52:43.422488  543705 net.go:770] primary dev: ETH0
I0320 09:52:43.422501  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:52:43.422513  543705 net.go:698] Add success.
I0320 09:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:52:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:52:53.410268  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:52:53.410287  543705 memory.go:184] no items to output this cycle
I0320 09:52:53.410298  543705 cpu.go:275] no items to output this cycle
E0320 09:53:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:53:03.409771  543705 memory.go:184] no items to output this cycle
I0320 09:53:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 09:53:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:53:13.409809  543705 memory.go:191] Add success.
I0320 09:53:13.409816  543705 cpu.go:282] Add success.
W0320 09:53:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:53:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:53:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:53:13.420109  543705 net.go:648] Add success.
I0320 09:53:13.422782  543705 net.go:770] primary dev: ETH0
I0320 09:53:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:53:13.422811  543705 net.go:698] Add success.
I0320 09:53:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:53:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:53:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 09:53:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:53:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 09:53:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:53:16.458015  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:53:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:53:16.458122  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:53:16.472525  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:53:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:53:23.409805  543705 memory.go:184] no items to output this cycle
I0320 09:53:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 09:53:23.720730  543705 disk_info.go:125] begin check local disk info of client
I0320 09:53:23.723262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:53:23.723267  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353ac0 0xc000353b00]
I0320 09:53:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 09:53:33.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:53:33.409828  543705 memory.go:184] no items to output this cycle
E0320 09:53:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:53:43.409797  543705 memory.go:191] Add success.
I0320 09:53:43.409817  543705 cpu.go:282] Add success.
I0320 09:53:43.420024  543705 net.go:648] Add success.
I0320 09:53:43.423103  543705 net.go:770] primary dev: ETH0
I0320 09:53:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:53:43.423128  543705 net.go:698] Add success.
I0320 09:53:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:53:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:53:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:53:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:53:53.409782  543705 memory.go:184] no items to output this cycle
I0320 09:53:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 09:54:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:54:03.409776  543705 memory.go:184] no items to output this cycle
I0320 09:54:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 09:54:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:54:13.409809  543705 memory.go:191] Add success.
I0320 09:54:13.409820  543705 cpu.go:282] Add success.
W0320 09:54:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:54:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:54:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:54:13.420269  543705 net.go:648] Add success.
I0320 09:54:13.423105  543705 net.go:770] primary dev: ETH0
I0320 09:54:13.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:54:13.423129  543705 net.go:698] Add success.
I0320 09:54:13.469304  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f3235356-630c-470f-ac97-f875c8a6e35f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:54:13.469339  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 09:54:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:54:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:54:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0320 09:54:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:54:14.456633  543705 disk_worker.go:494] system disk:vda1
I0320 09:54:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:54:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:54:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:54:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:54:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:54:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:54:23.409780  543705 memory.go:184] no items to output this cycle
I0320 09:54:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 09:54:23.724760  543705 disk_info.go:125] begin check local disk info of client
I0320 09:54:23.727300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:54:23.727305  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b200 0xc00007b240]
E0320 09:54:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:54:33.409781  543705 memory.go:184] no items to output this cycle
I0320 09:54:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 09:54:38.418854  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:54:38.418861  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:54:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:54:43.410805  543705 memory.go:191] Add success.
I0320 09:54:43.409806  543705 cpu.go:282] Add success.
I0320 09:54:43.420715  543705 net.go:648] Add success.
I0320 09:54:43.423254  543705 net.go:770] primary dev: ETH0
I0320 09:54:43.423268  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:54:43.423280  543705 net.go:698] Add success.
I0320 09:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:54:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:54:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:54:53.409798  543705 memory.go:184] no items to output this cycle
I0320 09:54:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 09:55:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:55:03.409776  543705 memory.go:184] no items to output this cycle
I0320 09:55:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 09:55:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:55:13.409788  543705 memory.go:191] Add success.
W0320 09:55:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:55:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:55:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:55:13.409838  543705 cpu.go:282] Add success.
I0320 09:55:13.420142  543705 net.go:648] Add success.
I0320 09:55:13.422479  543705 net.go:770] primary dev: ETH0
I0320 09:55:13.422494  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:55:13.422506  543705 net.go:698] Add success.
I0320 09:55:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:55:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:55:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 09:55:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:55:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 09:55:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:55:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:55:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:55:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:55:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:55:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:55:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:55:23.409785  543705 memory.go:184] no items to output this cycle
I0320 09:55:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 09:55:23.728773  543705 disk_info.go:125] begin check local disk info of client
I0320 09:55:23.731304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:55:23.731309  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab500 0xc0001ab540]
I0320 09:55:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 09:55:33.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:55:33.409833  543705 memory.go:184] no items to output this cycle
E0320 09:55:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:55:43.409796  543705 memory.go:191] Add success.
I0320 09:55:43.409817  543705 cpu.go:282] Add success.
I0320 09:55:43.419925  543705 net.go:648] Add success.
I0320 09:55:43.422513  543705 net.go:770] primary dev: ETH0
I0320 09:55:43.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:55:43.422539  543705 net.go:698] Add success.
I0320 09:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:55:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:55:53.409929  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:55:53.409946  543705 memory.go:184] no items to output this cycle
I0320 09:55:53.410146  543705 cpu.go:275] no items to output this cycle
E0320 09:56:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:56:03.409804  543705 memory.go:184] no items to output this cycle
I0320 09:56:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 09:56:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:56:13.409829  543705 memory.go:191] Add success.
I0320 09:56:13.409844  543705 cpu.go:282] Add success.
W0320 09:56:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:56:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:56:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:56:13.420179  543705 net.go:648] Add success.
I0320 09:56:13.423027  543705 net.go:770] primary dev: ETH0
I0320 09:56:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:56:13.423055  543705 net.go:698] Add success.
I0320 09:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:56:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:56:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 09:56:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:56:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 09:56:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:56:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:56:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:56:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:56:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:56:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:56:23.409801  543705 memory.go:184] no items to output this cycle
I0320 09:56:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 09:56:23.732785  543705 disk_info.go:125] begin check local disk info of client
I0320 09:56:23.735327  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:56:23.735332  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af80 0xc00007afc0]
E0320 09:56:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:56:33.409809  543705 memory.go:184] no items to output this cycle
I0320 09:56:33.409826  543705 cpu.go:275] no items to output this cycle
E0320 09:56:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:56:43.409804  543705 memory.go:191] Add success.
I0320 09:56:43.409820  543705 cpu.go:282] Add success.
I0320 09:56:43.419967  543705 net.go:648] Add success.
I0320 09:56:43.422647  543705 net.go:770] primary dev: ETH0
I0320 09:56:43.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:56:43.422676  543705 net.go:698] Add success.
I0320 09:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:56:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:56:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:56:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:56:53.409778  543705 memory.go:184] no items to output this cycle
I0320 09:56:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 09:57:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:57:03.409793  543705 memory.go:184] no items to output this cycle
I0320 09:57:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 09:57:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:57:13.409813  543705 memory.go:191] Add success.
I0320 09:57:13.409814  543705 cpu.go:282] Add success.
W0320 09:57:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:57:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:57:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:57:13.420309  543705 net.go:648] Add success.
I0320 09:57:13.423018  543705 net.go:770] primary dev: ETH0
I0320 09:57:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:57:13.423047  543705 net.go:698] Add success.
I0320 09:57:13.429693  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 09:57:13.452773  543705 event_worker.go:152] Polling the log file for events...
I0320 09:57:13.469359  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"410f77d6-f791-4728-baad-9f2b7b33aa00","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 09:57:13.469392  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 09:57:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:57:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 09:57:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0320 09:57:14.456693  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 09:57:14.456701  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 09:57:14.456704  543705 custom_config.go:64] query custom config with name: gpu
I0320 09:57:14.456919  543705 disk_worker.go:494] system disk:vda1
I0320 09:57:14.456961  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 09:57:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 09:57:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:57:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 09:57:16.457996  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 09:57:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:57:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:57:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:57:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:57:23.409778  543705 memory.go:184] no items to output this cycle
I0320 09:57:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 09:57:23.735413  543705 disk_info.go:125] begin check local disk info of client
I0320 09:57:23.737863  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:57:23.737868  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329380 0xc0003293c0]
I0320 09:57:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 09:57:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:57:33.409823  543705 memory.go:184] no items to output this cycle
I0320 09:57:38.419859  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 09:57:38.419867  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 09:57:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:57:43.410694  543705 memory.go:191] Add success.
I0320 09:57:43.409834  543705 cpu.go:282] Add success.
I0320 09:57:43.420409  543705 net.go:648] Add success.
I0320 09:57:43.423244  543705 net.go:770] primary dev: ETH0
I0320 09:57:43.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:57:43.423272  543705 net.go:698] Add success.
I0320 09:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:57:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:57:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:57:53.409773  543705 memory.go:184] no items to output this cycle
I0320 09:57:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 09:58:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:58:03.409781  543705 memory.go:184] no items to output this cycle
I0320 09:58:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 09:58:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:58:13.409790  543705 memory.go:191] Add success.
I0320 09:58:13.409801  543705 cpu.go:282] Add success.
W0320 09:58:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:58:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:58:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:58:13.420138  543705 net.go:648] Add success.
I0320 09:58:13.422719  543705 net.go:770] primary dev: ETH0
I0320 09:58:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:58:13.422749  543705 net.go:698] Add success.
I0320 09:58:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:58:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:58:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 09:58:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:58:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 09:58:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:58:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:58:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:58:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:58:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:58:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:58:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:58:23.409807  543705 memory.go:184] no items to output this cycle
I0320 09:58:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 09:58:23.738725  543705 disk_info.go:125] begin check local disk info of client
I0320 09:58:23.741240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:58:23.741246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d00 0xc0000c4d40]
E0320 09:58:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:58:33.409772  543705 memory.go:184] no items to output this cycle
I0320 09:58:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 09:58:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:58:43.409795  543705 memory.go:191] Add success.
I0320 09:58:43.409800  543705 cpu.go:282] Add success.
I0320 09:58:43.419956  543705 net.go:648] Add success.
I0320 09:58:43.422683  543705 net.go:770] primary dev: ETH0
I0320 09:58:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:58:43.422708  543705 net.go:698] Add success.
I0320 09:58:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:58:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:58:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:58:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:58:53.409798  543705 memory.go:184] no items to output this cycle
I0320 09:58:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 09:59:03.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:59:03.409889  543705 memory.go:184] no items to output this cycle
I0320 09:59:03.409966  543705 cpu.go:275] no items to output this cycle
E0320 09:59:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:59:13.409821  543705 memory.go:191] Add success.
I0320 09:59:13.409832  543705 cpu.go:282] Add success.
W0320 09:59:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 09:59:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 09:59:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 09:59:13.420309  543705 net.go:648] Add success.
I0320 09:59:13.423134  543705 net.go:770] primary dev: ETH0
I0320 09:59:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:59:13.423159  543705 net.go:698] Add success.
I0320 09:59:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 09:59:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 09:59:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 09:59:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 09:59:14.456565  543705 disk_worker.go:494] system disk:vda1
I0320 09:59:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 09:59:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 09:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:59:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:59:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 09:59:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 09:59:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:59:23.409809  543705 memory.go:184] no items to output this cycle
I0320 09:59:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 09:59:23.741669  543705 disk_info.go:125] begin check local disk info of client
I0320 09:59:23.744128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 09:59:23.744134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b24c0 0xc0003b2500]
E0320 09:59:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:59:33.409772  543705 memory.go:184] no items to output this cycle
I0320 09:59:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 09:59:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:59:43.409804  543705 cpu.go:282] Add success.
I0320 09:59:43.409806  543705 memory.go:191] Add success.
I0320 09:59:43.419889  543705 net.go:648] Add success.
I0320 09:59:43.422585  543705 net.go:770] primary dev: ETH0
I0320 09:59:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0320 09:59:43.422609  543705 net.go:698] Add success.
I0320 09:59:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 09:59:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 09:59:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 09:59:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 09:59:53.409771  543705 memory.go:184] no items to output this cycle
I0320 09:59:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 10:00:03.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:00:03.409907  543705 memory.go:184] no items to output this cycle
I0320 10:00:03.409923  543705 cpu.go:275] no items to output this cycle
E0320 10:00:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:00:13.409818  543705 memory.go:191] Add success.
I0320 10:00:13.409827  543705 cpu.go:282] Add success.
W0320 10:00:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:00:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:00:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:00:13.420192  543705 net.go:648] Add success.
I0320 10:00:13.422963  543705 net.go:770] primary dev: ETH0
I0320 10:00:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:00:13.422990  543705 net.go:698] Add success.
I0320 10:00:13.464537  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1ff8fc91-c517-4e52-9297-653d43b130c3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:00:13.464572  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:00:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:00:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:00:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 10:00:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:00:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 10:00:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:00:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:00:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:00:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:00:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:00:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 10:00:23.409791  543705 memory.go:184] no items to output this cycle
I0320 10:00:23.744214  543705 disk_info.go:125] begin check local disk info of client
I0320 10:00:23.746697  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:00:23.746703  543705 disk_info.go:196] parse disk info done, disk is : [0xc000258ac0 0xc000258b00]
E0320 10:00:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:00:33.409774  543705 memory.go:184] no items to output this cycle
I0320 10:00:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 10:00:38.420861  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:00:38.420868  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:00:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:00:43.410663  543705 memory.go:191] Add success.
I0320 10:00:43.409800  543705 cpu.go:282] Add success.
I0320 10:00:43.420350  543705 net.go:648] Add success.
I0320 10:00:43.423210  543705 net.go:770] primary dev: ETH0
I0320 10:00:43.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:00:43.423236  543705 net.go:698] Add success.
I0320 10:00:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:00:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:00:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:00:53.409770  543705 memory.go:184] no items to output this cycle
I0320 10:00:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 10:01:03.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:01:03.409923  543705 memory.go:184] no items to output this cycle
I0320 10:01:03.409961  543705 cpu.go:275] no items to output this cycle
E0320 10:01:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:01:13.409817  543705 memory.go:191] Add success.
I0320 10:01:13.409822  543705 cpu.go:282] Add success.
W0320 10:01:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:01:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:01:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:01:13.420221  543705 net.go:648] Add success.
I0320 10:01:13.423231  543705 net.go:770] primary dev: ETH0
I0320 10:01:13.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:01:13.423256  543705 net.go:698] Add success.
I0320 10:01:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:01:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:01:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 10:01:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:01:14.456571  543705 disk_worker.go:494] system disk:vda1
I0320 10:01:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:01:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:01:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:01:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:01:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:01:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:01:23.409813  543705 memory.go:184] no items to output this cycle
I0320 10:01:23.409820  543705 cpu.go:275] no items to output this cycle
I0320 10:01:23.747874  543705 disk_info.go:125] begin check local disk info of client
I0320 10:01:23.750449  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:01:23.750454  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
E0320 10:01:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:01:33.409765  543705 memory.go:184] no items to output this cycle
I0320 10:01:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 10:01:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:01:43.409802  543705 memory.go:191] Add success.
I0320 10:01:43.409806  543705 cpu.go:282] Add success.
I0320 10:01:43.420060  543705 net.go:648] Add success.
I0320 10:01:43.422477  543705 net.go:770] primary dev: ETH0
I0320 10:01:43.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:01:43.422501  543705 net.go:698] Add success.
I0320 10:01:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:01:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:01:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:01:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:01:53.409766  543705 memory.go:184] no items to output this cycle
I0320 10:01:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 10:02:03.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:02:03.409944  543705 cpu.go:275] no items to output this cycle
I0320 10:02:03.410011  543705 memory.go:184] no items to output this cycle
E0320 10:02:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:02:13.409800  543705 memory.go:191] Add success.
I0320 10:02:13.409807  543705 cpu.go:282] Add success.
W0320 10:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:02:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:02:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:02:13.420131  543705 net.go:648] Add success.
I0320 10:02:13.423009  543705 net.go:770] primary dev: ETH0
I0320 10:02:13.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:02:13.423033  543705 net.go:698] Add success.
W0320 10:02:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:02:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 10:02:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:02:14.456945  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:02:14.456955  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:02:14.456962  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:02:14.457006  543705 disk_worker.go:494] system disk:vda1
I0320 10:02:14.457032  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:02:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:02:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:02:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:02:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:02:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:02:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:02:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:02:23.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:02:23.409835  543705 memory.go:184] no items to output this cycle
I0320 10:02:23.409844  543705 cpu.go:275] no items to output this cycle
I0320 10:02:23.751923  543705 disk_info.go:125] begin check local disk info of client
I0320 10:02:23.754456  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:02:23.754461  543705 disk_info.go:196] parse disk info done, disk is : [0xc000546cc0 0xc000546d00]
E0320 10:02:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:02:33.409775  543705 memory.go:184] no items to output this cycle
I0320 10:02:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 10:02:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:02:43.409791  543705 memory.go:191] Add success.
I0320 10:02:43.409806  543705 cpu.go:282] Add success.
I0320 10:02:43.420027  543705 net.go:648] Add success.
I0320 10:02:43.422861  543705 net.go:770] primary dev: ETH0
I0320 10:02:43.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:02:43.422889  543705 net.go:698] Add success.
I0320 10:02:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:02:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:02:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:02:53.409784  543705 memory.go:184] no items to output this cycle
I0320 10:02:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:03:03.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:03:03.409920  543705 memory.go:184] no items to output this cycle
I0320 10:03:03.409950  543705 cpu.go:275] no items to output this cycle
E0320 10:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:03:13.409783  543705 memory.go:191] Add success.
W0320 10:03:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:03:13.409814  543705 cpu.go:282] Add success.
W0320 10:03:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:03:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:03:13.420094  543705 net.go:648] Add success.
I0320 10:03:13.422871  543705 net.go:770] primary dev: ETH0
I0320 10:03:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:03:13.422896  543705 net.go:698] Add success.
I0320 10:03:13.468942  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7bb75d9c-f47f-49ab-963e-4888e0b973a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:03:13.468983  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:03:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:03:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:03:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 10:03:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:03:14.456531  543705 disk_worker.go:494] system disk:vda1
I0320 10:03:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:03:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:03:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:03:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:03:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:03:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:03:23.409815  543705 memory.go:184] no items to output this cycle
I0320 10:03:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 10:03:23.754541  543705 disk_info.go:125] begin check local disk info of client
I0320 10:03:23.757027  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:03:23.757033  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005464c0 0xc000546500]
E0320 10:03:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:03:33.409770  543705 memory.go:184] no items to output this cycle
I0320 10:03:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 10:03:38.421872  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:03:38.421880  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:03:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:03:43.410642  543705 memory.go:191] Add success.
I0320 10:03:43.409824  543705 cpu.go:282] Add success.
I0320 10:03:43.420434  543705 net.go:648] Add success.
I0320 10:03:43.422861  543705 net.go:770] primary dev: ETH0
I0320 10:03:43.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:03:43.422889  543705 net.go:698] Add success.
I0320 10:03:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:03:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:03:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:03:53.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:03:53.409888  543705 memory.go:184] no items to output this cycle
I0320 10:03:53.409923  543705 cpu.go:275] no items to output this cycle
E0320 10:04:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:04:03.409776  543705 memory.go:184] no items to output this cycle
I0320 10:04:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:04:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:04:13.409791  543705 memory.go:191] Add success.
I0320 10:04:13.409791  543705 cpu.go:282] Add success.
W0320 10:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:04:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:04:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:04:13.420172  543705 net.go:648] Add success.
I0320 10:04:13.423123  543705 net.go:770] primary dev: ETH0
I0320 10:04:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:04:13.423148  543705 net.go:698] Add success.
I0320 10:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:04:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:04:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 10:04:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:04:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 10:04:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:04:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:04:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:04:23.409820  543705 memory.go:184] no items to output this cycle
I0320 10:04:23.409829  543705 cpu.go:275] no items to output this cycle
I0320 10:04:23.757671  543705 disk_info.go:125] begin check local disk info of client
I0320 10:04:23.760206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:04:23.760212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc440 0xc0003dc480]
E0320 10:04:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:04:33.409805  543705 memory.go:184] no items to output this cycle
I0320 10:04:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 10:04:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:04:43.409792  543705 memory.go:191] Add success.
I0320 10:04:43.409795  543705 cpu.go:282] Add success.
I0320 10:04:43.419873  543705 net.go:648] Add success.
I0320 10:04:43.422555  543705 net.go:770] primary dev: ETH0
I0320 10:04:43.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:04:43.422584  543705 net.go:698] Add success.
I0320 10:04:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:04:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:04:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:04:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:04:53.409779  543705 memory.go:184] no items to output this cycle
I0320 10:04:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 10:05:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:05:03.409804  543705 memory.go:184] no items to output this cycle
I0320 10:05:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 10:05:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:05:13.409784  543705 memory.go:191] Add success.
I0320 10:05:13.409791  543705 cpu.go:282] Add success.
W0320 10:05:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:05:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:05:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:05:13.420061  543705 net.go:648] Add success.
I0320 10:05:13.422688  543705 net.go:770] primary dev: ETH0
I0320 10:05:13.422704  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:05:13.422719  543705 net.go:698] Add success.
I0320 10:05:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:05:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:05:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 10:05:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:05:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 10:05:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:05:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:05:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:05:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:05:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:05:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:05:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 10:05:23.409788  543705 memory.go:184] no items to output this cycle
I0320 10:05:23.761671  543705 disk_info.go:125] begin check local disk info of client
I0320 10:05:23.764171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:05:23.764176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000546580 0xc0005465c0]
E0320 10:05:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:05:33.409799  543705 memory.go:184] no items to output this cycle
I0320 10:05:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 10:05:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:05:43.409800  543705 memory.go:191] Add success.
I0320 10:05:43.409801  543705 cpu.go:282] Add success.
I0320 10:05:43.420047  543705 net.go:648] Add success.
I0320 10:05:43.423057  543705 net.go:770] primary dev: ETH0
I0320 10:05:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:05:43.423084  543705 net.go:698] Add success.
I0320 10:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:05:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:05:53.409767  543705 memory.go:184] no items to output this cycle
I0320 10:05:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 10:06:03.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:06:03.409882  543705 cpu.go:275] no items to output this cycle
I0320 10:06:03.409910  543705 memory.go:184] no items to output this cycle
E0320 10:06:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:06:13.409780  543705 memory.go:191] Add success.
W0320 10:06:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:06:13.409813  543705 cpu.go:282] Add success.
W0320 10:06:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:06:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:06:13.420150  543705 net.go:648] Add success.
I0320 10:06:13.422761  543705 net.go:770] primary dev: ETH0
I0320 10:06:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:06:13.422790  543705 net.go:698] Add success.
I0320 10:06:13.469655  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc75721a-12b6-40b3-8392-94e911e8d3c1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:06:13.469688  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:06:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:06:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:06:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 10:06:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:06:14.456605  543705 disk_worker.go:494] system disk:vda1
I0320 10:06:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:06:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:06:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:06:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:06:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:06:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:06:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:06:23.409777  543705 memory.go:184] no items to output this cycle
I0320 10:06:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 10:06:23.764948  543705 disk_info.go:125] begin check local disk info of client
I0320 10:06:23.767481  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:06:23.767486  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396bc0 0xc000396c00]
E0320 10:06:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:06:33.409795  543705 memory.go:184] no items to output this cycle
I0320 10:06:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 10:06:38.422860  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:06:38.422867  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:06:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:06:43.410759  543705 memory.go:191] Add success.
I0320 10:06:43.409827  543705 cpu.go:282] Add success.
I0320 10:06:43.420524  543705 net.go:648] Add success.
I0320 10:06:43.423234  543705 net.go:770] primary dev: ETH0
I0320 10:06:43.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:06:43.423281  543705 net.go:698] Add success.
I0320 10:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:06:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:06:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:06:53.409764  543705 memory.go:184] no items to output this cycle
I0320 10:06:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 10:07:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:07:03.409896  543705 memory.go:184] no items to output this cycle
I0320 10:07:03.409915  543705 cpu.go:275] no items to output this cycle
E0320 10:07:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:07:13.409797  543705 memory.go:191] Add success.
I0320 10:07:13.409797  543705 cpu.go:282] Add success.
W0320 10:07:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:07:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:07:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:07:13.420176  543705 net.go:648] Add success.
I0320 10:07:13.423064  543705 net.go:770] primary dev: ETH0
I0320 10:07:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:07:13.423087  543705 net.go:698] Add success.
I0320 10:07:13.453661  543705 event_worker.go:152] Polling the log file for events...
W0320 10:07:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:07:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 10:07:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:07:14.456996  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:07:14.457006  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:07:14.457012  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:07:14.457063  543705 disk_worker.go:494] system disk:vda1
I0320 10:07:14.457096  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:07:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:07:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:07:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:07:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:07:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:07:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:07:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:07:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:07:23.409778  543705 memory.go:184] no items to output this cycle
I0320 10:07:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 10:07:23.767566  543705 disk_info.go:125] begin check local disk info of client
I0320 10:07:23.770178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:07:23.770183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6900 0xc0001c6940]
E0320 10:07:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:07:33.409794  543705 memory.go:184] no items to output this cycle
I0320 10:07:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 10:07:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:07:43.409799  543705 memory.go:191] Add success.
I0320 10:07:43.409801  543705 cpu.go:282] Add success.
I0320 10:07:43.419879  543705 net.go:648] Add success.
I0320 10:07:43.422717  543705 net.go:770] primary dev: ETH0
I0320 10:07:43.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:07:43.422745  543705 net.go:698] Add success.
I0320 10:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:07:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:07:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:07:53.409763  543705 memory.go:184] no items to output this cycle
I0320 10:07:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 10:08:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:08:03.409763  543705 memory.go:184] no items to output this cycle
I0320 10:08:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 10:08:13.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:08:13.409946  543705 memory.go:191] Add success.
W0320 10:08:13.409994  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:08:13.410013  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:08:13.410015  543705 cpu.go:282] Add success.
I0320 10:08:13.410020  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:08:13.419713  543705 net.go:648] Add success.
I0320 10:08:13.422263  543705 net.go:770] primary dev: ETH0
I0320 10:08:13.422276  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:08:13.422287  543705 net.go:698] Add success.
I0320 10:08:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:08:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:08:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 10:08:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:08:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 10:08:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:08:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:08:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:08:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:08:23.409806  543705 memory.go:184] no items to output this cycle
I0320 10:08:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 10:08:23.770264  543705 disk_info.go:125] begin check local disk info of client
I0320 10:08:23.772839  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:08:23.772846  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5200 0xc0000c5240]
E0320 10:08:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:08:33.409774  543705 memory.go:184] no items to output this cycle
I0320 10:08:33.409779  543705 cpu.go:275] no items to output this cycle
E0320 10:08:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:08:43.409797  543705 memory.go:191] Add success.
I0320 10:08:43.409797  543705 cpu.go:282] Add success.
I0320 10:08:43.419879  543705 net.go:648] Add success.
I0320 10:08:43.422891  543705 net.go:770] primary dev: ETH0
I0320 10:08:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:08:43.422921  543705 net.go:698] Add success.
I0320 10:08:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:08:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:08:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:08:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:08:53.409767  543705 memory.go:184] no items to output this cycle
I0320 10:08:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 10:09:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:09:03.409801  543705 memory.go:184] no items to output this cycle
I0320 10:09:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 10:09:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:09:13.409790  543705 memory.go:191] Add success.
I0320 10:09:13.409807  543705 cpu.go:282] Add success.
W0320 10:09:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:09:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:09:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:09:13.419736  543705 net.go:648] Add success.
I0320 10:09:13.422571  543705 net.go:770] primary dev: ETH0
I0320 10:09:13.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:09:13.422595  543705 net.go:698] Add success.
I0320 10:09:13.468397  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3d0f52a5-1aa7-412a-b300-4b79bfb5d7be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:09:13.468428  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:09:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:09:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:09:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 10:09:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:09:14.456527  543705 disk_worker.go:494] system disk:vda1
I0320 10:09:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:09:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:09:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:09:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:09:23.409809  543705 memory.go:184] no items to output this cycle
I0320 10:09:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 10:09:23.773672  543705 disk_info.go:125] begin check local disk info of client
I0320 10:09:23.776186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:09:23.776191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6440 0xc0001c6480]
E0320 10:09:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:09:33.409779  543705 memory.go:184] no items to output this cycle
I0320 10:09:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 10:09:38.423868  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:09:38.423876  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:09:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:09:43.410706  543705 memory.go:191] Add success.
I0320 10:09:43.409818  543705 cpu.go:282] Add success.
I0320 10:09:43.420395  543705 net.go:648] Add success.
I0320 10:09:43.423504  543705 net.go:770] primary dev: ETH0
I0320 10:09:43.423518  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:09:43.423545  543705 net.go:698] Add success.
I0320 10:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:09:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:09:53.410274  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:09:53.410294  543705 memory.go:184] no items to output this cycle
I0320 10:09:53.410307  543705 cpu.go:275] no items to output this cycle
E0320 10:10:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:10:03.409760  543705 memory.go:184] no items to output this cycle
I0320 10:10:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 10:10:13.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:10:13.409907  543705 memory.go:191] Add success.
W0320 10:10:13.409939  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:10:13.409949  543705 cpu.go:282] Add success.
W0320 10:10:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:10:13.409958  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:10:13.419755  543705 net.go:648] Add success.
I0320 10:10:13.422632  543705 net.go:770] primary dev: ETH0
I0320 10:10:13.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:10:13.422661  543705 net.go:698] Add success.
I0320 10:10:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:10:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:10:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 10:10:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:10:14.456487  543705 disk_worker.go:494] system disk:vda1
I0320 10:10:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:10:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:10:16.458029  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:10:16.458089  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:10:16.458111  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:10:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:10:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:10:23.409790  543705 memory.go:184] no items to output this cycle
I0320 10:10:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 10:10:23.777673  543705 disk_info.go:125] begin check local disk info of client
I0320 10:10:23.780196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:10:23.780201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b7c0 0xc00048b800]
E0320 10:10:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:10:33.409766  543705 memory.go:184] no items to output this cycle
I0320 10:10:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 10:10:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:10:43.409811  543705 memory.go:191] Add success.
I0320 10:10:43.409816  543705 cpu.go:282] Add success.
I0320 10:10:43.419707  543705 net.go:770] primary dev: ETH0
I0320 10:10:43.419720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:10:43.419733  543705 net.go:698] Add success.
I0320 10:10:43.419964  543705 net.go:648] Add success.
I0320 10:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:10:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:10:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:10:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:10:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:10:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 10:11:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:11:03.409803  543705 memory.go:184] no items to output this cycle
I0320 10:11:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 10:11:13.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:11:13.409914  543705 memory.go:191] Add success.
I0320 10:11:13.409917  543705 cpu.go:282] Add success.
W0320 10:11:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:11:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:11:13.409994  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:11:13.419726  543705 net.go:648] Add success.
I0320 10:11:13.422589  543705 net.go:770] primary dev: ETH0
I0320 10:11:13.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:11:13.422617  543705 net.go:698] Add success.
I0320 10:11:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:11:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:11:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 10:11:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:11:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 10:11:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:11:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:11:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:11:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:11:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:11:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:11:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:11:23.409817  543705 memory.go:184] no items to output this cycle
I0320 10:11:23.409824  543705 cpu.go:275] no items to output this cycle
I0320 10:11:23.781047  543705 disk_info.go:125] begin check local disk info of client
I0320 10:11:23.783588  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:11:23.783594  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328200 0xc000328240]
E0320 10:11:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:11:33.409783  543705 memory.go:184] no items to output this cycle
I0320 10:11:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:11:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:11:43.409826  543705 memory.go:191] Add success.
I0320 10:11:43.409834  543705 cpu.go:282] Add success.
I0320 10:11:43.420002  543705 net.go:648] Add success.
I0320 10:11:43.423116  543705 net.go:770] primary dev: ETH0
I0320 10:11:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:11:43.423146  543705 net.go:698] Add success.
I0320 10:11:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:11:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:11:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:11:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:11:53.409773  543705 memory.go:184] no items to output this cycle
I0320 10:11:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 10:12:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:12:03.409783  543705 memory.go:184] no items to output this cycle
I0320 10:12:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 10:12:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:12:13.409804  543705 memory.go:191] Add success.
I0320 10:12:13.409808  543705 cpu.go:282] Add success.
W0320 10:12:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:12:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:12:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:12:13.419717  543705 net.go:648] Add success.
I0320 10:12:13.422967  543705 net.go:770] primary dev: ETH0
I0320 10:12:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:12:13.422990  543705 net.go:698] Add success.
I0320 10:12:13.469656  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"582f2092-2eae-48d9-ac27-da1b71b11a84","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:12:13.469688  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 10:12:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:12:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 10:12:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:12:14.456840  543705 disk_worker.go:494] system disk:vda1
I0320 10:12:14.456879  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:12:14.457095  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:12:14.457102  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:12:14.457106  543705 custom_config.go:64] query custom config with name: gpu
E0320 10:12:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:12:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:12:16.457894  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:12:16.457894  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:12:16.457948  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:12:16.457967  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:12:16.472301  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:12:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:12:23.409823  543705 memory.go:184] no items to output this cycle
I0320 10:12:23.409832  543705 cpu.go:275] no items to output this cycle
I0320 10:12:23.785082  543705 disk_info.go:125] begin check local disk info of client
I0320 10:12:23.787627  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:12:23.787632  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b400 0xc00007b440]
E0320 10:12:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:12:33.409768  543705 memory.go:184] no items to output this cycle
I0320 10:12:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 10:12:38.424873  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:12:38.424880  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:12:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:12:43.410644  543705 memory.go:191] Add success.
I0320 10:12:43.409813  543705 cpu.go:282] Add success.
I0320 10:12:43.420324  543705 net.go:648] Add success.
I0320 10:12:43.423098  543705 net.go:770] primary dev: ETH0
I0320 10:12:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:12:43.423124  543705 net.go:698] Add success.
I0320 10:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:12:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:12:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:12:53.409793  543705 memory.go:184] no items to output this cycle
I0320 10:12:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 10:13:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:13:03.409782  543705 memory.go:184] no items to output this cycle
I0320 10:13:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 10:13:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:13:13.409778  543705 memory.go:191] Add success.
W0320 10:13:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:13:13.409811  543705 cpu.go:282] Add success.
W0320 10:13:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:13:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:13:13.420292  543705 net.go:648] Add success.
I0320 10:13:13.423233  543705 net.go:770] primary dev: ETH0
I0320 10:13:13.423246  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:13:13.423258  543705 net.go:698] Add success.
I0320 10:13:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:13:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:13:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 10:13:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:13:14.456482  543705 disk_worker.go:494] system disk:vda1
I0320 10:13:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:13:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:13:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:13:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:13:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:13:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:13:23.409776  543705 memory.go:184] no items to output this cycle
I0320 10:13:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 10:13:23.789083  543705 disk_info.go:125] begin check local disk info of client
I0320 10:13:23.791623  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:13:23.791629  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6d80 0xc0001c6dc0]
E0320 10:13:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:13:33.409768  543705 memory.go:184] no items to output this cycle
I0320 10:13:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 10:13:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:13:43.409789  543705 memory.go:191] Add success.
I0320 10:13:43.409816  543705 cpu.go:282] Add success.
I0320 10:13:43.419854  543705 net.go:648] Add success.
I0320 10:13:43.422829  543705 net.go:770] primary dev: ETH0
I0320 10:13:43.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:13:43.422861  543705 net.go:698] Add success.
I0320 10:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:13:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:13:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:13:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:13:53.409794  543705 memory.go:184] no items to output this cycle
I0320 10:13:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 10:14:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:14:03.409781  543705 memory.go:184] no items to output this cycle
I0320 10:14:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 10:14:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:14:13.409799  543705 memory.go:191] Add success.
I0320 10:14:13.409802  543705 cpu.go:282] Add success.
W0320 10:14:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:14:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:14:13.420071  543705 net.go:648] Add success.
I0320 10:14:13.422856  543705 net.go:770] primary dev: ETH0
I0320 10:14:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:14:13.422884  543705 net.go:698] Add success.
I0320 10:14:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:14:14.455432  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:14:14.455443  543705 disk_worker.go:708] disk space is not compliant
W0320 10:14:14.455447  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:14:14.457057  543705 disk_worker.go:494] system disk:vda1
I0320 10:14:14.457086  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:14:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:14:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:14:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:14:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:14:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:14:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:14:23.409787  543705 memory.go:184] no items to output this cycle
I0320 10:14:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 10:14:23.793068  543705 disk_info.go:125] begin check local disk info of client
I0320 10:14:23.795628  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:14:23.795633  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb4c0 0xc0001fb500]
E0320 10:14:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:14:33.409770  543705 memory.go:184] no items to output this cycle
I0320 10:14:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 10:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:14:43.409793  543705 memory.go:191] Add success.
I0320 10:14:43.409799  543705 cpu.go:282] Add success.
I0320 10:14:43.419974  543705 net.go:648] Add success.
I0320 10:14:43.422899  543705 net.go:770] primary dev: ETH0
I0320 10:14:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:14:43.422925  543705 net.go:698] Add success.
I0320 10:14:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:14:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:14:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:14:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:14:53.409764  543705 memory.go:184] no items to output this cycle
I0320 10:14:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 10:15:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:15:03.409776  543705 memory.go:184] no items to output this cycle
I0320 10:15:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 10:15:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:15:13.409800  543705 memory.go:191] Add success.
I0320 10:15:13.409806  543705 cpu.go:282] Add success.
W0320 10:15:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:15:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:15:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:15:13.420111  543705 net.go:648] Add success.
I0320 10:15:13.422646  543705 net.go:770] primary dev: ETH0
I0320 10:15:13.422660  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:15:13.422672  543705 net.go:698] Add success.
I0320 10:15:13.565525  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c5a9617-9293-487e-89d8-cd33819fc473","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:15:13.565559  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:15:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:15:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:15:14.455331  543705 disk_worker.go:708] disk space is not compliant
W0320 10:15:14.455336  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:15:14.456955  543705 disk_worker.go:494] system disk:vda1
I0320 10:15:14.456985  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:15:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:15:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:15:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:15:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:15:23.409778  543705 memory.go:184] no items to output this cycle
I0320 10:15:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 10:15:23.795715  543705 disk_info.go:125] begin check local disk info of client
I0320 10:15:23.798352  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:15:23.798357  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329440 0xc000329480]
E0320 10:15:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:15:33.409801  543705 memory.go:184] no items to output this cycle
I0320 10:15:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 10:15:38.425885  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:15:38.425892  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:15:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:15:43.410935  543705 memory.go:191] Add success.
I0320 10:15:43.409827  543705 cpu.go:282] Add success.
I0320 10:15:43.420704  543705 net.go:648] Add success.
I0320 10:15:43.423376  543705 net.go:770] primary dev: ETH0
I0320 10:15:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:15:43.423403  543705 net.go:698] Add success.
I0320 10:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:15:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:15:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:15:53.409776  543705 memory.go:184] no items to output this cycle
I0320 10:15:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 10:16:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:16:03.409769  543705 memory.go:184] no items to output this cycle
I0320 10:16:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 10:16:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:16:13.409823  543705 memory.go:191] Add success.
I0320 10:16:13.409831  543705 cpu.go:282] Add success.
W0320 10:16:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:16:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:16:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:16:13.420167  543705 net.go:648] Add success.
I0320 10:16:13.422786  543705 net.go:770] primary dev: ETH0
I0320 10:16:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:16:13.422811  543705 net.go:698] Add success.
I0320 10:16:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:16:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:16:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 10:16:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:16:14.459245  543705 disk_worker.go:494] system disk:vda1
I0320 10:16:14.459273  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:16:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:16:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:16:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:16:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:16:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:16:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:16:23.409789  543705 memory.go:184] no items to output this cycle
I0320 10:16:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 10:16:23.800124  543705 disk_info.go:125] begin check local disk info of client
I0320 10:16:23.802666  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:16:23.802671  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329340 0xc000329380]
E0320 10:16:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:16:33.409777  543705 memory.go:184] no items to output this cycle
I0320 10:16:33.409777  543705 cpu.go:275] no items to output this cycle
E0320 10:16:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:16:43.409794  543705 memory.go:191] Add success.
I0320 10:16:43.409795  543705 cpu.go:282] Add success.
I0320 10:16:43.419864  543705 net.go:648] Add success.
I0320 10:16:43.422331  543705 net.go:770] primary dev: ETH0
I0320 10:16:43.422345  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:16:43.422357  543705 net.go:698] Add success.
I0320 10:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:16:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:16:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:16:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:16:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:16:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 10:17:03.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:17:03.409758  543705 memory.go:184] no items to output this cycle
I0320 10:17:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 10:17:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:17:13.409819  543705 memory.go:191] Add success.
I0320 10:17:13.409824  543705 cpu.go:282] Add success.
W0320 10:17:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:17:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:17:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:17:13.420056  543705 net.go:648] Add success.
I0320 10:17:13.422784  543705 net.go:770] primary dev: ETH0
I0320 10:17:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:17:13.422809  543705 net.go:698] Add success.
I0320 10:17:13.453339  543705 event_worker.go:152] Polling the log file for events...
W0320 10:17:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:17:14.455253  543705 disk_worker.go:708] disk space is not compliant
W0320 10:17:14.455258  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:17:14.456059  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:17:14.456069  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:17:14.456075  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:17:14.457017  543705 disk_worker.go:494] system disk:vda1
I0320 10:17:14.457127  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:17:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:17:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 10:17:16.457567  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:17:16.457582  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:17:16.457631  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:17:16.457666  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:17:16.472990  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:17:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:17:23.409807  543705 memory.go:184] no items to output this cycle
I0320 10:17:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 10:17:23.804150  543705 disk_info.go:125] begin check local disk info of client
I0320 10:17:23.806726  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:17:23.806731  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328200 0xc000328240]
E0320 10:17:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:17:33.409777  543705 memory.go:184] no items to output this cycle
I0320 10:17:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 10:17:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:17:43.409806  543705 memory.go:191] Add success.
I0320 10:17:43.409805  543705 cpu.go:282] Add success.
I0320 10:17:43.419867  543705 net.go:648] Add success.
I0320 10:17:43.422169  543705 net.go:770] primary dev: ETH0
I0320 10:17:43.422182  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:17:43.422194  543705 net.go:698] Add success.
I0320 10:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:17:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:17:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:17:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:17:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 10:18:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:18:03.409767  543705 memory.go:184] no items to output this cycle
I0320 10:18:03.409838  543705 cpu.go:275] no items to output this cycle
E0320 10:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:18:13.409813  543705 memory.go:191] Add success.
I0320 10:18:13.409826  543705 cpu.go:282] Add success.
W0320 10:18:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:18:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:18:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:18:13.420163  543705 net.go:648] Add success.
I0320 10:18:13.422883  543705 net.go:770] primary dev: ETH0
I0320 10:18:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:18:13.422909  543705 net.go:698] Add success.
I0320 10:18:13.470666  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"666b6968-a603-4e4c-940c-a83a0ab8379d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:18:13.470698  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:18:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:18:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:18:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 10:18:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:18:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 10:18:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:18:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:18:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:18:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:18:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:18:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:18:23.409821  543705 memory.go:184] no items to output this cycle
I0320 10:18:23.409826  543705 cpu.go:275] no items to output this cycle
I0320 10:18:23.808176  543705 disk_info.go:125] begin check local disk info of client
I0320 10:18:23.810739  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:18:23.810745  543705 disk_info.go:196] parse disk info done, disk is : [0xc000397cc0 0xc000397d00]
E0320 10:18:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:18:33.409772  543705 memory.go:184] no items to output this cycle
I0320 10:18:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 10:18:38.426888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:18:38.426895  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:18:43.410680  543705 memory.go:191] Add success.
I0320 10:18:43.409808  543705 cpu.go:282] Add success.
I0320 10:18:43.420451  543705 net.go:648] Add success.
I0320 10:18:43.423001  543705 net.go:770] primary dev: ETH0
I0320 10:18:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:18:43.423032  543705 net.go:698] Add success.
I0320 10:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:18:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:18:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:18:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:18:53.409780  543705 memory.go:184] no items to output this cycle
I0320 10:18:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 10:19:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:19:03.409775  543705 memory.go:184] no items to output this cycle
I0320 10:19:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 10:19:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:19:13.409795  543705 cpu.go:282] Add success.
I0320 10:19:13.409796  543705 memory.go:191] Add success.
W0320 10:19:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:19:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:19:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:19:13.420082  543705 net.go:648] Add success.
I0320 10:19:13.422830  543705 net.go:770] primary dev: ETH0
I0320 10:19:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:19:13.422863  543705 net.go:698] Add success.
I0320 10:19:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:19:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:19:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 10:19:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:19:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 10:19:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:19:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:19:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:19:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:19:23.409792  543705 memory.go:184] no items to output this cycle
I0320 10:19:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 10:19:23.812167  543705 disk_info.go:125] begin check local disk info of client
I0320 10:19:23.814750  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:19:23.814758  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7700 0xc0001c7740]
E0320 10:19:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:19:33.409779  543705 memory.go:184] no items to output this cycle
I0320 10:19:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 10:19:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:19:43.409801  543705 memory.go:191] Add success.
I0320 10:19:43.409802  543705 cpu.go:282] Add success.
I0320 10:19:43.420258  543705 net.go:648] Add success.
I0320 10:19:43.422777  543705 net.go:770] primary dev: ETH0
I0320 10:19:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:19:43.422803  543705 net.go:698] Add success.
I0320 10:19:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:19:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:19:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:19:53.409768  543705 memory.go:184] no items to output this cycle
I0320 10:19:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 10:20:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:20:03.409802  543705 memory.go:184] no items to output this cycle
I0320 10:20:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 10:20:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:20:13.409813  543705 memory.go:191] Add success.
I0320 10:20:13.409819  543705 cpu.go:282] Add success.
W0320 10:20:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:20:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:20:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:20:13.420144  543705 net.go:648] Add success.
I0320 10:20:13.422855  543705 net.go:770] primary dev: ETH0
I0320 10:20:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:20:13.422884  543705 net.go:698] Add success.
I0320 10:20:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:20:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:20:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 10:20:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:20:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 10:20:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:20:15.456009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:20:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:20:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:20:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:20:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:20:23.409787  543705 memory.go:184] no items to output this cycle
I0320 10:20:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 10:20:23.816183  543705 disk_info.go:125] begin check local disk info of client
I0320 10:20:23.818734  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:20:23.818739  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7e80 0xc0001c7ec0]
E0320 10:20:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:20:33.409803  543705 memory.go:184] no items to output this cycle
I0320 10:20:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 10:20:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:20:43.409815  543705 memory.go:191] Add success.
I0320 10:20:43.409824  543705 cpu.go:282] Add success.
I0320 10:20:43.420062  543705 net.go:648] Add success.
I0320 10:20:43.422691  543705 net.go:770] primary dev: ETH0
I0320 10:20:43.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:20:43.422719  543705 net.go:698] Add success.
I0320 10:20:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:20:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:20:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:20:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:20:53.409799  543705 memory.go:184] no items to output this cycle
I0320 10:20:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 10:21:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:21:03.409781  543705 memory.go:184] no items to output this cycle
I0320 10:21:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 10:21:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:21:13.409810  543705 memory.go:191] Add success.
I0320 10:21:13.409822  543705 cpu.go:282] Add success.
W0320 10:21:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:21:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:21:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:21:13.420280  543705 net.go:648] Add success.
I0320 10:21:13.422973  543705 net.go:770] primary dev: ETH0
I0320 10:21:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:21:13.423000  543705 net.go:698] Add success.
I0320 10:21:13.464174  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6f208b6f-12d5-45e6-b5b0-36994d49c063","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:21:13.464207  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:21:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:21:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:21:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 10:21:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:21:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 10:21:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:21:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:21:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:21:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:21:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:21:23.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:21:23.409825  543705 memory.go:184] no items to output this cycle
I0320 10:21:23.409834  543705 cpu.go:275] no items to output this cycle
I0320 10:21:23.818823  543705 disk_info.go:125] begin check local disk info of client
I0320 10:21:23.821375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:21:23.821381  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c7180 0xc0001c71c0]
E0320 10:21:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:21:33.409766  543705 memory.go:184] no items to output this cycle
I0320 10:21:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 10:21:38.427897  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:21:38.427905  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:21:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:21:43.410847  543705 memory.go:191] Add success.
I0320 10:21:43.409824  543705 cpu.go:282] Add success.
I0320 10:21:43.420614  543705 net.go:648] Add success.
I0320 10:21:43.423215  543705 net.go:770] primary dev: ETH0
I0320 10:21:43.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:21:43.423243  543705 net.go:698] Add success.
I0320 10:21:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:21:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:21:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:21:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:21:53.409768  543705 memory.go:184] no items to output this cycle
I0320 10:21:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 10:22:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:22:03.409782  543705 memory.go:184] no items to output this cycle
I0320 10:22:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 10:22:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:22:13.409788  543705 memory.go:191] Add success.
I0320 10:22:13.409789  543705 cpu.go:282] Add success.
W0320 10:22:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:22:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:22:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:22:13.420314  543705 net.go:648] Add success.
I0320 10:22:13.423132  543705 net.go:770] primary dev: ETH0
I0320 10:22:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:22:13.423158  543705 net.go:698] Add success.
W0320 10:22:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:22:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 10:22:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:22:14.455865  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:22:14.455874  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:22:14.455880  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:22:14.456536  543705 disk_worker.go:494] system disk:vda1
I0320 10:22:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:22:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:22:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:22:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:22:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:22:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:22:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:22:16.472317  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:22:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:22:23.409813  543705 memory.go:184] no items to output this cycle
I0320 10:22:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 10:22:23.821672  543705 disk_info.go:125] begin check local disk info of client
I0320 10:22:23.824255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:22:23.824261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046fc40 0xc00046fc80]
E0320 10:22:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:22:33.409771  543705 memory.go:184] no items to output this cycle
I0320 10:22:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 10:22:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:22:43.409816  543705 memory.go:191] Add success.
I0320 10:22:43.409823  543705 cpu.go:282] Add success.
I0320 10:22:43.419970  543705 net.go:648] Add success.
I0320 10:22:43.422709  543705 net.go:770] primary dev: ETH0
I0320 10:22:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:22:43.422734  543705 net.go:698] Add success.
I0320 10:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:22:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:22:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:22:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:22:53.409797  543705 memory.go:184] no items to output this cycle
I0320 10:22:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 10:23:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:23:03.409785  543705 memory.go:184] no items to output this cycle
I0320 10:23:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 10:23:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:23:13.409784  543705 memory.go:191] Add success.
I0320 10:23:13.409785  543705 cpu.go:282] Add success.
W0320 10:23:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:23:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:23:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:23:13.420216  543705 net.go:648] Add success.
I0320 10:23:13.422979  543705 net.go:770] primary dev: ETH0
I0320 10:23:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:23:13.423023  543705 net.go:698] Add success.
I0320 10:23:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:23:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:23:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 10:23:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:23:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 10:23:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:23:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:23:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:23:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:23:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:23:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:23:23.409894  543705 memory.go:184] no items to output this cycle
I0320 10:23:23.410064  543705 cpu.go:275] no items to output this cycle
I0320 10:23:23.825669  543705 disk_info.go:125] begin check local disk info of client
I0320 10:23:23.828199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:23:23.828206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9140 0xc0002b9180]
E0320 10:23:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:23:33.409767  543705 memory.go:184] no items to output this cycle
I0320 10:23:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 10:23:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:23:43.409818  543705 memory.go:191] Add success.
I0320 10:23:43.409821  543705 cpu.go:282] Add success.
I0320 10:23:43.420085  543705 net.go:648] Add success.
I0320 10:23:43.423007  543705 net.go:770] primary dev: ETH0
I0320 10:23:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:23:43.423046  543705 net.go:698] Add success.
I0320 10:23:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:23:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:23:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:23:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:23:53.409775  543705 cpu.go:275] no items to output this cycle
I0320 10:23:53.409782  543705 memory.go:184] no items to output this cycle
E0320 10:24:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:24:03.409807  543705 memory.go:184] no items to output this cycle
I0320 10:24:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 10:24:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:24:13.409780  543705 memory.go:191] Add success.
I0320 10:24:13.409802  543705 cpu.go:282] Add success.
W0320 10:24:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:24:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:24:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:24:13.420159  543705 net.go:648] Add success.
I0320 10:24:13.422814  543705 net.go:770] primary dev: ETH0
I0320 10:24:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:24:13.422841  543705 net.go:698] Add success.
I0320 10:24:13.463999  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5288ab69-56b6-492d-98ae-dba7aaf5fd21","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:24:13.464032  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:24:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:24:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:24:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 10:24:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:24:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 10:24:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:24:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:24:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:24:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:24:23.409783  543705 memory.go:184] no items to output this cycle
I0320 10:24:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 10:24:23.828294  543705 disk_info.go:125] begin check local disk info of client
I0320 10:24:23.830919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:24:23.830925  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025a000 0xc00025a040]
E0320 10:24:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:24:33.409780  543705 memory.go:184] no items to output this cycle
I0320 10:24:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 10:24:38.428902  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:24:38.428908  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:24:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:24:43.410844  543705 memory.go:191] Add success.
I0320 10:24:43.409825  543705 cpu.go:282] Add success.
I0320 10:24:43.420558  543705 net.go:648] Add success.
I0320 10:24:43.423574  543705 net.go:770] primary dev: ETH0
I0320 10:24:43.423593  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:24:43.423609  543705 net.go:698] Add success.
I0320 10:24:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:24:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:24:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:24:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:24:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:24:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 10:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:25:03.409801  543705 memory.go:184] no items to output this cycle
I0320 10:25:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 10:25:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:25:13.409776  543705 memory.go:191] Add success.
W0320 10:25:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:25:13.409808  543705 cpu.go:282] Add success.
W0320 10:25:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:25:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:25:13.420121  543705 net.go:648] Add success.
I0320 10:25:13.423378  543705 net.go:770] primary dev: ETH0
I0320 10:25:13.423393  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:25:13.423407  543705 net.go:698] Add success.
I0320 10:25:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:25:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:25:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 10:25:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:25:14.456499  543705 disk_worker.go:494] system disk:vda1
I0320 10:25:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:25:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:25:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:25:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:25:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:25:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:25:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:25:23.409872  543705 memory.go:184] no items to output this cycle
I0320 10:25:23.409926  543705 cpu.go:275] no items to output this cycle
I0320 10:25:23.831002  543705 disk_info.go:125] begin check local disk info of client
I0320 10:25:23.833572  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:25:23.833577  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d780 0xc00039d7c0]
E0320 10:25:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:25:33.409801  543705 memory.go:184] no items to output this cycle
I0320 10:25:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 10:25:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:25:43.409791  543705 memory.go:191] Add success.
I0320 10:25:43.409808  543705 cpu.go:282] Add success.
I0320 10:25:43.420058  543705 net.go:648] Add success.
I0320 10:25:43.422851  543705 net.go:770] primary dev: ETH0
I0320 10:25:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:25:43.422876  543705 net.go:698] Add success.
I0320 10:25:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:25:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:25:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:25:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:25:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:26:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:26:03.409781  543705 memory.go:184] no items to output this cycle
I0320 10:26:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 10:26:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:26:13.409801  543705 memory.go:191] Add success.
I0320 10:26:13.409807  543705 cpu.go:282] Add success.
W0320 10:26:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:26:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:26:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:26:13.420149  543705 net.go:648] Add success.
I0320 10:26:13.422969  543705 net.go:770] primary dev: ETH0
I0320 10:26:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:26:13.422994  543705 net.go:698] Add success.
I0320 10:26:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:26:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:26:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 10:26:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:26:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 10:26:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:26:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:26:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:26:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:26:23.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:26:23.409825  543705 memory.go:184] no items to output this cycle
I0320 10:26:23.409830  543705 cpu.go:275] no items to output this cycle
I0320 10:26:23.833672  543705 disk_info.go:125] begin check local disk info of client
I0320 10:26:23.836189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:26:23.836195  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025c000 0xc00025c040]
E0320 10:26:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:26:33.409777  543705 memory.go:184] no items to output this cycle
I0320 10:26:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:26:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:26:43.409798  543705 memory.go:191] Add success.
I0320 10:26:43.409798  543705 cpu.go:282] Add success.
I0320 10:26:43.420074  543705 net.go:648] Add success.
I0320 10:26:43.422824  543705 net.go:770] primary dev: ETH0
I0320 10:26:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:26:43.422853  543705 net.go:698] Add success.
I0320 10:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:26:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:26:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:26:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:26:53.409801  543705 memory.go:184] no items to output this cycle
I0320 10:26:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 10:27:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:27:03.409784  543705 memory.go:184] no items to output this cycle
I0320 10:27:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:27:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:27:13.409786  543705 cpu.go:282] Add success.
I0320 10:27:13.409788  543705 memory.go:191] Add success.
W0320 10:27:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:27:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:27:13.420047  543705 net.go:648] Add success.
I0320 10:27:13.428857  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 10:27:13.428937  543705 net.go:770] primary dev: ETH0
I0320 10:27:13.428950  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:27:13.428961  543705 net.go:698] Add success.
I0320 10:27:13.453494  543705 event_worker.go:152] Polling the log file for events...
I0320 10:27:13.550954  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"52e705e7-28cc-484b-8ad9-ae504019f6d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:27:13.550989  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 10:27:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:27:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 10:27:14.455156  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:27:14.456111  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:27:14.456121  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:27:14.456126  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:27:14.456727  543705 disk_worker.go:494] system disk:vda1
I0320 10:27:14.456757  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:27:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:27:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:27:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:27:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:27:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:27:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:27:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:27:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:27:23.409785  543705 memory.go:184] no items to output this cycle
I0320 10:27:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 10:27:23.836274  543705 disk_info.go:125] begin check local disk info of client
I0320 10:27:23.838808  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:27:23.838814  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac80 0xc0001aacc0]
E0320 10:27:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:27:33.409800  543705 memory.go:184] no items to output this cycle
I0320 10:27:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 10:27:38.429911  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:27:38.429919  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:27:43.410699  543705 memory.go:191] Add success.
I0320 10:27:43.409812  543705 cpu.go:282] Add success.
I0320 10:27:43.420404  543705 net.go:648] Add success.
I0320 10:27:43.423045  543705 net.go:770] primary dev: ETH0
I0320 10:27:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:27:43.423075  543705 net.go:698] Add success.
I0320 10:27:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:27:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:27:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:27:53.409775  543705 memory.go:184] no items to output this cycle
I0320 10:27:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 10:28:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:28:03.409807  543705 memory.go:184] no items to output this cycle
I0320 10:28:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 10:28:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:28:13.409782  543705 memory.go:191] Add success.
I0320 10:28:13.409808  543705 cpu.go:282] Add success.
W0320 10:28:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:28:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:28:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:28:13.420065  543705 net.go:648] Add success.
I0320 10:28:13.422668  543705 net.go:770] primary dev: ETH0
I0320 10:28:13.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:28:13.422693  543705 net.go:698] Add success.
I0320 10:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:28:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:28:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 10:28:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:28:14.456825  543705 disk_worker.go:494] system disk:vda1
I0320 10:28:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:28:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:28:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:28:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:28:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:28:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:28:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:28:23.409789  543705 memory.go:184] no items to output this cycle
I0320 10:28:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 10:28:23.840327  543705 disk_info.go:125] begin check local disk info of client
I0320 10:28:23.842976  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:28:23.842982  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464b40 0xc000464b80]
E0320 10:28:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:28:33.409767  543705 memory.go:184] no items to output this cycle
I0320 10:28:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 10:28:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:28:43.409793  543705 memory.go:191] Add success.
I0320 10:28:43.409797  543705 cpu.go:282] Add success.
I0320 10:28:43.419971  543705 net.go:648] Add success.
I0320 10:28:43.422568  543705 net.go:770] primary dev: ETH0
I0320 10:28:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:28:43.422594  543705 net.go:698] Add success.
I0320 10:28:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:28:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:28:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:28:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:28:53.409785  543705 memory.go:184] no items to output this cycle
I0320 10:28:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 10:29:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:29:03.409780  543705 memory.go:184] no items to output this cycle
I0320 10:29:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 10:29:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:29:13.409814  543705 memory.go:191] Add success.
I0320 10:29:13.409816  543705 cpu.go:282] Add success.
W0320 10:29:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:29:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:29:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:29:13.420213  543705 net.go:648] Add success.
I0320 10:29:13.422895  543705 net.go:770] primary dev: ETH0
I0320 10:29:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:29:13.423085  543705 net.go:698] Add success.
I0320 10:29:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:29:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:29:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 10:29:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:29:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 10:29:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:29:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:29:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:29:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:29:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:29:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:29:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:29:23.409801  543705 memory.go:184] no items to output this cycle
I0320 10:29:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 10:29:23.844341  543705 disk_info.go:125] begin check local disk info of client
I0320 10:29:23.846917  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:29:23.846922  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bac0 0xc00007bb00]
E0320 10:29:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:29:33.409784  543705 memory.go:184] no items to output this cycle
I0320 10:29:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 10:29:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:29:43.409818  543705 memory.go:191] Add success.
I0320 10:29:43.409831  543705 cpu.go:282] Add success.
I0320 10:29:43.419967  543705 net.go:648] Add success.
I0320 10:29:43.422491  543705 net.go:770] primary dev: ETH0
I0320 10:29:43.422504  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:29:43.422516  543705 net.go:698] Add success.
I0320 10:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:29:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:29:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:29:53.409800  543705 memory.go:184] no items to output this cycle
I0320 10:29:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 10:30:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:30:03.409799  543705 memory.go:184] no items to output this cycle
I0320 10:30:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 10:30:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:30:13.409790  543705 memory.go:191] Add success.
I0320 10:30:13.409797  543705 cpu.go:282] Add success.
W0320 10:30:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:30:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:30:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:30:13.419737  543705 net.go:648] Add success.
I0320 10:30:13.422394  543705 net.go:770] primary dev: ETH0
I0320 10:30:13.422407  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:30:13.422418  543705 net.go:698] Add success.
I0320 10:30:13.517518  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4622b4f-f40d-44f2-bc96-9ebca161ccb6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:30:13.517550  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:30:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:30:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:30:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 10:30:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:30:14.456770  543705 disk_worker.go:494] system disk:vda1
I0320 10:30:14.456807  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:30:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:30:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:30:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:30:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:30:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:30:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:30:23.409783  543705 memory.go:184] no items to output this cycle
I0320 10:30:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 10:30:23.847006  543705 disk_info.go:125] begin check local disk info of client
I0320 10:30:23.849569  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:30:23.849575  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fba40 0xc0001fba80]
E0320 10:30:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:30:33.409773  543705 memory.go:184] no items to output this cycle
I0320 10:30:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 10:30:38.430908  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:30:38.430914  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:30:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:30:43.410624  543705 memory.go:191] Add success.
I0320 10:30:43.409801  543705 cpu.go:282] Add success.
I0320 10:30:43.420360  543705 net.go:648] Add success.
I0320 10:30:43.423017  543705 net.go:770] primary dev: ETH0
I0320 10:30:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:30:43.423044  543705 net.go:698] Add success.
I0320 10:30:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:30:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:30:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:30:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:30:53.409801  543705 memory.go:184] no items to output this cycle
I0320 10:30:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 10:31:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:31:03.409789  543705 memory.go:184] no items to output this cycle
I0320 10:31:03.409792  543705 cpu.go:275] no items to output this cycle
W0320 10:31:13.409719  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:31:13.409737  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:31:13.409744  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:31:13.409805  543705 cpu.go:282] Add success.
E0320 10:31:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:31:13.409864  543705 memory.go:191] Add success.
I0320 10:31:13.420090  543705 net.go:648] Add success.
I0320 10:31:13.422736  543705 net.go:770] primary dev: ETH0
I0320 10:31:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:31:13.422762  543705 net.go:698] Add success.
I0320 10:31:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:31:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:31:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 10:31:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:31:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 10:31:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:31:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:31:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:31:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:31:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:31:23.409788  543705 memory.go:184] no items to output this cycle
I0320 10:31:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 10:31:23.849673  543705 disk_info.go:125] begin check local disk info of client
I0320 10:31:23.852197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:31:23.852203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5340 0xc0000c5380]
E0320 10:31:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:31:33.409784  543705 memory.go:184] no items to output this cycle
I0320 10:31:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 10:31:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:31:43.409797  543705 memory.go:191] Add success.
I0320 10:31:43.409824  543705 cpu.go:282] Add success.
I0320 10:31:43.419985  543705 net.go:648] Add success.
I0320 10:31:43.422591  543705 net.go:770] primary dev: ETH0
I0320 10:31:43.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:31:43.422618  543705 net.go:698] Add success.
I0320 10:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:31:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:31:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:31:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:31:53.409790  543705 memory.go:184] no items to output this cycle
I0320 10:31:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 10:32:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:32:03.409813  543705 memory.go:184] no items to output this cycle
I0320 10:32:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 10:32:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:32:13.409793  543705 memory.go:191] Add success.
I0320 10:32:13.409795  543705 cpu.go:282] Add success.
W0320 10:32:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:32:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:32:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:32:13.420281  543705 net.go:648] Add success.
I0320 10:32:13.423318  543705 net.go:770] primary dev: ETH0
I0320 10:32:13.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:32:13.423346  543705 net.go:698] Add success.
W0320 10:32:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:32:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 10:32:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:32:14.456796  543705 disk_worker.go:494] system disk:vda1
I0320 10:32:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:32:14.457052  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:32:14.457060  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:32:14.457064  543705 custom_config.go:64] query custom config with name: gpu
E0320 10:32:15.456879  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:32:15.456889  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:32:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:32:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:32:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:32:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:32:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:32:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:32:23.409802  543705 memory.go:184] no items to output this cycle
I0320 10:32:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 10:32:23.853670  543705 disk_info.go:125] begin check local disk info of client
I0320 10:32:23.856187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:32:23.856192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5000 0xc0000c5040]
E0320 10:32:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:32:33.409780  543705 memory.go:184] no items to output this cycle
I0320 10:32:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 10:32:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:32:43.409787  543705 memory.go:191] Add success.
I0320 10:32:43.409815  543705 cpu.go:282] Add success.
I0320 10:32:43.420144  543705 net.go:648] Add success.
I0320 10:32:43.422693  543705 net.go:770] primary dev: ETH0
I0320 10:32:43.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:32:43.422722  543705 net.go:698] Add success.
I0320 10:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:32:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:32:53.409809  543705 memory.go:184] no items to output this cycle
I0320 10:32:53.409829  543705 cpu.go:275] no items to output this cycle
E0320 10:33:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:33:03.409778  543705 cpu.go:275] no items to output this cycle
I0320 10:33:03.409794  543705 memory.go:184] no items to output this cycle
E0320 10:33:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:33:13.409786  543705 memory.go:191] Add success.
I0320 10:33:13.409788  543705 cpu.go:282] Add success.
W0320 10:33:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:33:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:33:13.420193  543705 net.go:648] Add success.
I0320 10:33:13.423200  543705 net.go:770] primary dev: ETH0
I0320 10:33:13.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:33:13.423230  543705 net.go:698] Add success.
I0320 10:33:13.548301  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"856d2b35-c5d8-468e-a8fe-fa9ba78a8309","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:33:13.548338  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:33:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:33:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:33:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 10:33:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:33:14.456699  543705 disk_worker.go:494] system disk:vda1
I0320 10:33:14.456735  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:33:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:33:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:33:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:33:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:33:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:33:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:33:23.409778  543705 memory.go:184] no items to output this cycle
I0320 10:33:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 10:33:23.856272  543705 disk_info.go:125] begin check local disk info of client
I0320 10:33:23.858830  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:33:23.858835  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046b740 0xc00046b780]
E0320 10:33:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:33:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 10:33:33.409788  543705 memory.go:184] no items to output this cycle
I0320 10:33:38.431933  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:33:38.431941  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:33:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:33:43.410631  543705 memory.go:191] Add success.
I0320 10:33:43.409797  543705 cpu.go:282] Add success.
I0320 10:33:43.420312  543705 net.go:648] Add success.
I0320 10:33:43.422752  543705 net.go:770] primary dev: ETH0
I0320 10:33:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:33:43.422778  543705 net.go:698] Add success.
I0320 10:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:33:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:33:53.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:33:53.409857  543705 memory.go:184] no items to output this cycle
I0320 10:33:53.409980  543705 cpu.go:275] no items to output this cycle
E0320 10:34:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:34:03.409799  543705 memory.go:184] no items to output this cycle
I0320 10:34:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 10:34:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:34:13.409789  543705 memory.go:191] Add success.
I0320 10:34:13.409794  543705 cpu.go:282] Add success.
W0320 10:34:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:34:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:34:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:34:13.420048  543705 net.go:648] Add success.
I0320 10:34:13.422723  543705 net.go:770] primary dev: ETH0
I0320 10:34:13.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:34:13.422747  543705 net.go:698] Add success.
I0320 10:34:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:34:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:34:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 10:34:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:34:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 10:34:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:34:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:34:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:34:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:34:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:34:23.409806  543705 memory.go:184] no items to output this cycle
I0320 10:34:23.409819  543705 cpu.go:275] no items to output this cycle
I0320 10:34:23.860426  543705 disk_info.go:125] begin check local disk info of client
I0320 10:34:23.862957  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:34:23.862962  543705 disk_info.go:196] parse disk info done, disk is : [0xc000254100 0xc000254140]
E0320 10:34:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:34:33.409800  543705 memory.go:184] no items to output this cycle
I0320 10:34:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 10:34:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:34:43.409797  543705 memory.go:191] Add success.
I0320 10:34:43.409797  543705 cpu.go:282] Add success.
I0320 10:34:43.420071  543705 net.go:648] Add success.
I0320 10:34:43.422862  543705 net.go:770] primary dev: ETH0
I0320 10:34:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:34:43.422891  543705 net.go:698] Add success.
I0320 10:34:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:34:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:34:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:34:53.410665  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:34:53.410689  543705 memory.go:184] no items to output this cycle
I0320 10:34:53.410705  543705 cpu.go:275] no items to output this cycle
E0320 10:35:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:35:03.409767  543705 memory.go:184] no items to output this cycle
I0320 10:35:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 10:35:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:35:13.409821  543705 memory.go:191] Add success.
I0320 10:35:13.409830  543705 cpu.go:282] Add success.
W0320 10:35:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:35:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:35:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:35:13.420136  543705 net.go:648] Add success.
I0320 10:35:13.423130  543705 net.go:770] primary dev: ETH0
I0320 10:35:13.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:35:13.423155  543705 net.go:698] Add success.
I0320 10:35:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:35:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:35:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 10:35:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:35:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 10:35:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:35:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:35:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:35:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:35:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:35:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:35:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 10:35:23.409789  543705 memory.go:184] no items to output this cycle
I0320 10:35:23.863053  543705 disk_info.go:125] begin check local disk info of client
I0320 10:35:23.865625  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:35:23.865632  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003425c0 0xc000342600]
E0320 10:35:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:35:33.409762  543705 memory.go:184] no items to output this cycle
I0320 10:35:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 10:35:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:35:43.409813  543705 memory.go:191] Add success.
I0320 10:35:43.409819  543705 cpu.go:282] Add success.
I0320 10:35:43.419997  543705 net.go:648] Add success.
I0320 10:35:43.422641  543705 net.go:770] primary dev: ETH0
I0320 10:35:43.422654  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:35:43.422666  543705 net.go:698] Add success.
I0320 10:35:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:35:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:35:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:35:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:35:53.409781  543705 memory.go:184] no items to output this cycle
I0320 10:35:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 10:36:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:36:03.409779  543705 memory.go:184] no items to output this cycle
I0320 10:36:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 10:36:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:36:13.409815  543705 memory.go:191] Add success.
I0320 10:36:13.409831  543705 cpu.go:282] Add success.
W0320 10:36:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:36:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:36:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:36:13.420163  543705 net.go:648] Add success.
I0320 10:36:13.423174  543705 net.go:770] primary dev: ETH0
I0320 10:36:13.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:36:13.423200  543705 net.go:698] Add success.
I0320 10:36:13.467839  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"26ce0f79-de72-4247-892b-c55cb256054d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:36:13.467873  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:36:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:36:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:36:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 10:36:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:36:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 10:36:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:36:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:36:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:36:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:36:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:36:23.409782  543705 memory.go:184] no items to output this cycle
I0320 10:36:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 10:36:23.865678  543705 disk_info.go:125] begin check local disk info of client
I0320 10:36:23.868229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:36:23.868235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306740 0xc000306780]
E0320 10:36:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:36:33.409799  543705 memory.go:184] no items to output this cycle
I0320 10:36:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 10:36:38.432920  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:36:38.432926  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:36:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:36:43.410744  543705 memory.go:191] Add success.
I0320 10:36:43.409796  543705 cpu.go:282] Add success.
I0320 10:36:43.420761  543705 net.go:648] Add success.
I0320 10:36:43.423598  543705 net.go:770] primary dev: ETH0
I0320 10:36:43.423611  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:36:43.423623  543705 net.go:698] Add success.
I0320 10:36:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:36:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:36:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:36:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:36:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:36:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 10:37:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:37:03.409776  543705 memory.go:184] no items to output this cycle
I0320 10:37:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 10:37:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:37:13.409793  543705 memory.go:191] Add success.
I0320 10:37:13.409794  543705 cpu.go:282] Add success.
W0320 10:37:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:37:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:37:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:37:13.420061  543705 net.go:648] Add success.
I0320 10:37:13.422848  543705 net.go:770] primary dev: ETH0
I0320 10:37:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:37:13.422877  543705 net.go:698] Add success.
I0320 10:37:13.453445  543705 event_worker.go:152] Polling the log file for events...
W0320 10:37:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:37:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 10:37:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:37:14.456881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:37:14.456890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:37:14.456897  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:37:14.456967  543705 disk_worker.go:494] system disk:vda1
I0320 10:37:14.457012  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:37:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:37:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:37:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:37:16.458000  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:37:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:37:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:37:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:37:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:37:23.409804  543705 memory.go:184] no items to output this cycle
I0320 10:37:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 10:37:23.869673  543705 disk_info.go:125] begin check local disk info of client
I0320 10:37:23.872262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:37:23.872268  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305900 0xc000305940]
E0320 10:37:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:37:33.409778  543705 memory.go:184] no items to output this cycle
I0320 10:37:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 10:37:43.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:37:43.409886  543705 memory.go:191] Add success.
I0320 10:37:43.409965  543705 cpu.go:282] Add success.
I0320 10:37:43.419727  543705 net.go:648] Add success.
I0320 10:37:43.422902  543705 net.go:770] primary dev: ETH0
I0320 10:37:43.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:37:43.422926  543705 net.go:698] Add success.
I0320 10:37:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:37:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:37:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:37:53.410699  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:37:53.410715  543705 memory.go:184] no items to output this cycle
I0320 10:37:53.410720  543705 cpu.go:275] no items to output this cycle
E0320 10:38:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:38:03.409766  543705 memory.go:184] no items to output this cycle
I0320 10:38:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 10:38:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:38:13.409823  543705 memory.go:191] Add success.
I0320 10:38:13.409826  543705 cpu.go:282] Add success.
W0320 10:38:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:38:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:38:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:38:13.420110  543705 net.go:648] Add success.
I0320 10:38:13.423265  543705 net.go:770] primary dev: ETH0
I0320 10:38:13.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:38:13.423294  543705 net.go:698] Add success.
I0320 10:38:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:38:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:38:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 10:38:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:38:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 10:38:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:38:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:38:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:38:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:38:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:38:23.409821  543705 memory.go:184] no items to output this cycle
I0320 10:38:23.409826  543705 cpu.go:275] no items to output this cycle
I0320 10:38:23.873676  543705 disk_info.go:125] begin check local disk info of client
I0320 10:38:23.876229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:38:23.876234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003af580 0xc0003af5c0]
E0320 10:38:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:38:33.409813  543705 memory.go:184] no items to output this cycle
I0320 10:38:33.409932  543705 cpu.go:275] no items to output this cycle
E0320 10:38:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:38:43.409804  543705 memory.go:191] Add success.
I0320 10:38:43.409821  543705 cpu.go:282] Add success.
I0320 10:38:43.420002  543705 net.go:648] Add success.
I0320 10:38:43.423060  543705 net.go:770] primary dev: ETH0
I0320 10:38:43.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:38:43.423089  543705 net.go:698] Add success.
I0320 10:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:38:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:38:53.409791  543705 memory.go:184] no items to output this cycle
I0320 10:38:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 10:39:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:39:03.409781  543705 memory.go:184] no items to output this cycle
I0320 10:39:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 10:39:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:39:13.409808  543705 memory.go:191] Add success.
I0320 10:39:13.409808  543705 cpu.go:282] Add success.
W0320 10:39:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:39:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:39:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:39:13.420175  543705 net.go:648] Add success.
I0320 10:39:13.423287  543705 net.go:770] primary dev: ETH0
I0320 10:39:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:39:13.423311  543705 net.go:698] Add success.
I0320 10:39:13.463158  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71d6ceca-c6cf-4408-a85a-3348282194f2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:39:13.463190  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:39:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:39:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:39:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0320 10:39:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:39:14.456768  543705 disk_worker.go:494] system disk:vda1
I0320 10:39:14.456797  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:39:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:39:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:39:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:39:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:39:23.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:39:23.409922  543705 memory.go:184] no items to output this cycle
I0320 10:39:23.409934  543705 cpu.go:275] no items to output this cycle
I0320 10:39:23.877666  543705 disk_info.go:125] begin check local disk info of client
I0320 10:39:23.880182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:39:23.880187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352540 0xc000352580]
E0320 10:39:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:39:33.409809  543705 memory.go:184] no items to output this cycle
I0320 10:39:33.409827  543705 cpu.go:275] no items to output this cycle
I0320 10:39:38.433929  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:39:38.433937  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:39:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:39:43.410653  543705 memory.go:191] Add success.
I0320 10:39:43.409837  543705 cpu.go:282] Add success.
I0320 10:39:43.420366  543705 net.go:648] Add success.
I0320 10:39:43.422948  543705 net.go:770] primary dev: ETH0
I0320 10:39:43.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:39:43.422979  543705 net.go:698] Add success.
I0320 10:39:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:39:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:39:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:39:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:39:53.409766  543705 memory.go:184] no items to output this cycle
I0320 10:39:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 10:40:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:40:03.409782  543705 memory.go:184] no items to output this cycle
I0320 10:40:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 10:40:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:40:13.409785  543705 memory.go:191] Add success.
I0320 10:40:13.409788  543705 cpu.go:282] Add success.
W0320 10:40:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:40:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:40:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:40:13.420078  543705 net.go:648] Add success.
I0320 10:40:13.422986  543705 net.go:770] primary dev: ETH0
I0320 10:40:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:40:13.423018  543705 net.go:698] Add success.
I0320 10:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:40:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:40:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 10:40:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:40:14.456555  543705 disk_worker.go:494] system disk:vda1
I0320 10:40:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:40:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:40:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:40:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:40:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:40:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:40:23.409806  543705 memory.go:184] no items to output this cycle
I0320 10:40:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 10:40:23.880279  543705 disk_info.go:125] begin check local disk info of client
I0320 10:40:23.882816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:40:23.882822  543705 disk_info.go:196] parse disk info done, disk is : [0xc000364000 0xc000364040]
E0320 10:40:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:40:33.409781  543705 memory.go:184] no items to output this cycle
I0320 10:40:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 10:40:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:40:43.409817  543705 memory.go:191] Add success.
I0320 10:40:43.409825  543705 cpu.go:282] Add success.
I0320 10:40:43.420082  543705 net.go:648] Add success.
I0320 10:40:43.423030  543705 net.go:770] primary dev: ETH0
I0320 10:40:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:40:43.423055  543705 net.go:698] Add success.
I0320 10:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:40:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:40:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:40:53.409779  543705 cpu.go:275] no items to output this cycle
I0320 10:40:53.409783  543705 memory.go:184] no items to output this cycle
E0320 10:41:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:41:03.409799  543705 memory.go:184] no items to output this cycle
I0320 10:41:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 10:41:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:41:13.409785  543705 cpu.go:282] Add success.
I0320 10:41:13.409796  543705 memory.go:191] Add success.
W0320 10:41:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:41:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:41:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:41:13.420141  543705 net.go:648] Add success.
I0320 10:41:13.422980  543705 net.go:770] primary dev: ETH0
I0320 10:41:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:41:13.423009  543705 net.go:698] Add success.
I0320 10:41:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:41:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:41:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0320 10:41:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:41:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 10:41:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:41:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:41:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:41:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:41:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:41:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:41:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:41:23.409779  543705 memory.go:184] no items to output this cycle
I0320 10:41:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 10:41:23.884551  543705 disk_info.go:125] begin check local disk info of client
I0320 10:41:23.887081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:41:23.887088  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004987c0 0xc000498800]
E0320 10:41:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:41:33.409783  543705 memory.go:184] no items to output this cycle
I0320 10:41:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 10:41:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:41:43.409833  543705 memory.go:191] Add success.
I0320 10:41:43.409839  543705 cpu.go:282] Add success.
I0320 10:41:43.419977  543705 net.go:648] Add success.
I0320 10:41:43.422964  543705 net.go:770] primary dev: ETH0
I0320 10:41:43.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:41:43.422989  543705 net.go:698] Add success.
I0320 10:41:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:41:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:41:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:41:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:41:53.409796  543705 memory.go:184] no items to output this cycle
I0320 10:41:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 10:42:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:42:03.409779  543705 memory.go:184] no items to output this cycle
I0320 10:42:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 10:42:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:42:13.409787  543705 memory.go:191] Add success.
I0320 10:42:13.409794  543705 cpu.go:282] Add success.
W0320 10:42:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:42:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:42:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:42:13.420206  543705 net.go:648] Add success.
I0320 10:42:13.422936  543705 net.go:770] primary dev: ETH0
I0320 10:42:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:42:13.422959  543705 net.go:698] Add success.
I0320 10:42:13.469516  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dd083173-ef5d-4373-ab49-3b1318d162d0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:42:13.469552  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 10:42:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:42:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 10:42:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:42:14.456856  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0320 10:42:14.456861  543705 disk_worker.go:494] system disk:vda1
E0320 10:42:14.456865  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:42:14.456870  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:42:14.456914  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:42:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:42:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:42:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:42:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:42:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:42:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:42:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:42:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:42:23.409796  543705 memory.go:184] no items to output this cycle
I0320 10:42:23.409835  543705 cpu.go:275] no items to output this cycle
I0320 10:42:23.888646  543705 disk_info.go:125] begin check local disk info of client
I0320 10:42:23.891212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:42:23.891217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486b00 0xc000486b40]
E0320 10:42:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:42:33.409776  543705 memory.go:184] no items to output this cycle
I0320 10:42:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 10:42:38.434926  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:42:38.434932  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:42:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:42:43.410628  543705 memory.go:191] Add success.
I0320 10:42:43.409805  543705 cpu.go:282] Add success.
I0320 10:42:43.420350  543705 net.go:648] Add success.
I0320 10:42:43.422940  543705 net.go:770] primary dev: ETH0
I0320 10:42:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:42:43.422971  543705 net.go:698] Add success.
I0320 10:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:42:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:42:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:42:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:42:53.409776  543705 memory.go:184] no items to output this cycle
I0320 10:42:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 10:43:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:43:03.409778  543705 memory.go:184] no items to output this cycle
I0320 10:43:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 10:43:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:43:13.409793  543705 memory.go:191] Add success.
I0320 10:43:13.409797  543705 cpu.go:282] Add success.
W0320 10:43:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:43:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:43:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:43:13.420063  543705 net.go:648] Add success.
I0320 10:43:13.422793  543705 net.go:770] primary dev: ETH0
I0320 10:43:13.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:43:13.422818  543705 net.go:698] Add success.
I0320 10:43:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:43:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:43:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 10:43:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:43:14.456515  543705 disk_worker.go:494] system disk:vda1
I0320 10:43:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:43:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:43:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:43:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:43:23.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:43:23.410273  543705 memory.go:184] no items to output this cycle
I0320 10:43:23.410292  543705 cpu.go:275] no items to output this cycle
I0320 10:43:23.893109  543705 disk_info.go:125] begin check local disk info of client
I0320 10:43:23.895651  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:43:23.895657  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471040 0xc000471080]
E0320 10:43:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:43:33.409806  543705 memory.go:184] no items to output this cycle
I0320 10:43:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 10:43:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:43:43.409891  543705 memory.go:191] Add success.
I0320 10:43:43.409950  543705 cpu.go:282] Add success.
I0320 10:43:43.419726  543705 net.go:648] Add success.
I0320 10:43:43.422630  543705 net.go:770] primary dev: ETH0
I0320 10:43:43.422646  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:43:43.422660  543705 net.go:698] Add success.
I0320 10:43:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:43:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:43:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:43:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:43:53.409778  543705 memory.go:184] no items to output this cycle
I0320 10:43:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 10:44:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:44:03.409776  543705 memory.go:184] no items to output this cycle
I0320 10:44:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 10:44:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:44:13.409793  543705 memory.go:191] Add success.
I0320 10:44:13.409796  543705 cpu.go:282] Add success.
W0320 10:44:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:44:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:44:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:44:13.420111  543705 net.go:648] Add success.
I0320 10:44:13.422892  543705 net.go:770] primary dev: ETH0
I0320 10:44:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:44:13.422926  543705 net.go:698] Add success.
I0320 10:44:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:44:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:44:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 10:44:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:44:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 10:44:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:44:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:44:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:44:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:44:23.410345  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:44:23.410363  543705 memory.go:184] no items to output this cycle
I0320 10:44:23.410384  543705 cpu.go:275] no items to output this cycle
I0320 10:44:23.897210  543705 disk_info.go:125] begin check local disk info of client
I0320 10:44:23.899738  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:44:23.899744  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384200 0xc000384240]
E0320 10:44:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:44:33.409778  543705 memory.go:184] no items to output this cycle
I0320 10:44:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 10:44:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:44:43.409799  543705 memory.go:191] Add success.
I0320 10:44:43.409799  543705 cpu.go:282] Add success.
I0320 10:44:43.420152  543705 net.go:648] Add success.
I0320 10:44:43.423076  543705 net.go:770] primary dev: ETH0
I0320 10:44:43.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:44:43.423101  543705 net.go:698] Add success.
I0320 10:44:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:44:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:44:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:44:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:44:53.409791  543705 memory.go:184] no items to output this cycle
I0320 10:44:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 10:45:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:45:03.409774  543705 memory.go:184] no items to output this cycle
I0320 10:45:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 10:45:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:45:13.409786  543705 memory.go:191] Add success.
I0320 10:45:13.409800  543705 cpu.go:282] Add success.
W0320 10:45:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:45:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:45:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:45:13.420128  543705 net.go:648] Add success.
I0320 10:45:13.422993  543705 net.go:770] primary dev: ETH0
I0320 10:45:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:45:13.423021  543705 net.go:698] Add success.
I0320 10:45:13.471380  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5b3c22e2-3926-4a9f-8b0f-53e566d8de16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:45:13.471414  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:45:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:45:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:45:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 10:45:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:45:14.456615  543705 disk_worker.go:494] system disk:vda1
I0320 10:45:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:45:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:45:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:45:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:45:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:45:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:45:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:45:23.409794  543705 memory.go:184] no items to output this cycle
I0320 10:45:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 10:45:23.899825  543705 disk_info.go:125] begin check local disk info of client
I0320 10:45:23.902386  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:45:23.902391  543705 disk_info.go:196] parse disk info done, disk is : [0xc000546f40 0xc000546f80]
E0320 10:45:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:45:33.409767  543705 memory.go:184] no items to output this cycle
I0320 10:45:33.409804  543705 cpu.go:275] no items to output this cycle
I0320 10:45:38.435928  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:45:38.435936  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:45:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:45:43.410756  543705 memory.go:191] Add success.
I0320 10:45:43.409814  543705 cpu.go:282] Add success.
I0320 10:45:43.419729  543705 net.go:648] Add success.
I0320 10:45:43.422370  543705 net.go:770] primary dev: ETH0
I0320 10:45:43.422383  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:45:43.422394  543705 net.go:698] Add success.
I0320 10:45:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:45:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:45:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:45:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:45:53.409768  543705 memory.go:184] no items to output this cycle
I0320 10:45:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 10:46:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:46:03.409804  543705 memory.go:184] no items to output this cycle
I0320 10:46:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 10:46:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:46:13.409780  543705 memory.go:191] Add success.
W0320 10:46:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:46:13.409810  543705 cpu.go:282] Add success.
W0320 10:46:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:46:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:46:13.420131  543705 net.go:648] Add success.
I0320 10:46:13.422753  543705 net.go:770] primary dev: ETH0
I0320 10:46:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:46:13.422777  543705 net.go:698] Add success.
I0320 10:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:46:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:46:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 10:46:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:46:14.456521  543705 disk_worker.go:494] system disk:vda1
I0320 10:46:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:46:16.458251  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:46:16.458320  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:46:16.458355  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:46:16.472675  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:46:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:46:23.409787  543705 memory.go:184] no items to output this cycle
I0320 10:46:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 10:46:23.904616  543705 disk_info.go:125] begin check local disk info of client
I0320 10:46:23.907684  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:46:23.907690  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac580 0xc0002ac5c0]
E0320 10:46:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:46:33.409799  543705 memory.go:184] no items to output this cycle
I0320 10:46:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 10:46:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:46:43.409793  543705 memory.go:191] Add success.
I0320 10:46:43.409806  543705 cpu.go:282] Add success.
I0320 10:46:43.419730  543705 net.go:648] Add success.
I0320 10:46:43.422699  543705 net.go:770] primary dev: ETH0
I0320 10:46:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:46:43.422724  543705 net.go:698] Add success.
I0320 10:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:46:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:46:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:46:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:46:53.409779  543705 cpu.go:275] no items to output this cycle
I0320 10:46:53.409784  543705 memory.go:184] no items to output this cycle
E0320 10:47:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:47:03.409792  543705 memory.go:184] no items to output this cycle
I0320 10:47:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 10:47:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:47:13.409779  543705 memory.go:191] Add success.
I0320 10:47:13.409797  543705 cpu.go:282] Add success.
W0320 10:47:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:47:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:47:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:47:13.420218  543705 net.go:648] Add success.
I0320 10:47:13.423907  543705 net.go:770] primary dev: ETH0
I0320 10:47:13.423926  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:47:13.423948  543705 net.go:698] Add success.
I0320 10:47:13.453504  543705 event_worker.go:152] Polling the log file for events...
W0320 10:47:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:47:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 10:47:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:47:14.455895  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:47:14.455904  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:47:14.455910  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:47:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 10:47:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:47:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:47:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:47:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:47:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:47:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:47:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:47:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:47:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:47:23.409815  543705 memory.go:184] no items to output this cycle
I0320 10:47:23.409822  543705 cpu.go:275] no items to output this cycle
I0320 10:47:23.909672  543705 disk_info.go:125] begin check local disk info of client
I0320 10:47:23.912256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:47:23.912262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002acc80 0xc0002accc0]
E0320 10:47:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:47:33.409781  543705 memory.go:184] no items to output this cycle
I0320 10:47:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 10:47:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:47:43.409820  543705 memory.go:191] Add success.
I0320 10:47:43.409842  543705 cpu.go:282] Add success.
I0320 10:47:43.419977  543705 net.go:648] Add success.
I0320 10:47:43.422498  543705 net.go:770] primary dev: ETH0
I0320 10:47:43.422511  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:47:43.422524  543705 net.go:698] Add success.
I0320 10:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:47:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:47:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:47:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:47:53.409799  543705 memory.go:184] no items to output this cycle
I0320 10:47:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 10:48:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:48:03.409778  543705 cpu.go:275] no items to output this cycle
I0320 10:48:03.409788  543705 memory.go:184] no items to output this cycle
E0320 10:48:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:48:13.409816  543705 memory.go:191] Add success.
I0320 10:48:13.409818  543705 cpu.go:282] Add success.
W0320 10:48:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:48:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:48:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:48:13.420297  543705 net.go:648] Add success.
I0320 10:48:13.423342  543705 net.go:770] primary dev: ETH0
I0320 10:48:13.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:48:13.423372  543705 net.go:698] Add success.
I0320 10:48:13.699791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bca8e649-85bf-43a7-b43b-bcad7e52d0cd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:48:13.699834  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:48:14.454679  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:48:14.454928  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:48:14.454937  543705 disk_worker.go:708] disk space is not compliant
W0320 10:48:14.454940  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:48:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 10:48:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:48:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:48:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:48:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:48:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:48:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:48:23.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:48:23.410259  543705 memory.go:184] no items to output this cycle
I0320 10:48:23.410260  543705 cpu.go:275] no items to output this cycle
I0320 10:48:23.913668  543705 disk_info.go:125] begin check local disk info of client
I0320 10:48:23.916191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:48:23.916196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa900 0xc0001aa940]
E0320 10:48:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:48:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 10:48:33.409792  543705 memory.go:184] no items to output this cycle
I0320 10:48:38.436945  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:48:38.436953  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:48:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:48:43.410666  543705 memory.go:191] Add success.
I0320 10:48:43.409802  543705 cpu.go:282] Add success.
I0320 10:48:43.420473  543705 net.go:648] Add success.
I0320 10:48:43.423207  543705 net.go:770] primary dev: ETH0
I0320 10:48:43.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:48:43.423232  543705 net.go:698] Add success.
I0320 10:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:48:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:48:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:48:53.409772  543705 memory.go:184] no items to output this cycle
I0320 10:48:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:49:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:49:03.409788  543705 cpu.go:275] no items to output this cycle
I0320 10:49:03.409791  543705 memory.go:184] no items to output this cycle
E0320 10:49:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:49:13.409832  543705 memory.go:191] Add success.
I0320 10:49:13.409840  543705 cpu.go:282] Add success.
W0320 10:49:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:49:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:49:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:49:13.420142  543705 net.go:648] Add success.
I0320 10:49:13.422795  543705 net.go:770] primary dev: ETH0
I0320 10:49:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:49:13.422820  543705 net.go:698] Add success.
I0320 10:49:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:49:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:49:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 10:49:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:49:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 10:49:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:49:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:49:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:49:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:49:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:49:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:49:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:49:23.409788  543705 memory.go:184] no items to output this cycle
I0320 10:49:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 10:49:23.917669  543705 disk_info.go:125] begin check local disk info of client
I0320 10:49:23.920194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:49:23.920199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faf00 0xc0001faf40]
E0320 10:49:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:49:33.409776  543705 memory.go:184] no items to output this cycle
I0320 10:49:33.409835  543705 cpu.go:275] no items to output this cycle
E0320 10:49:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:49:43.409804  543705 memory.go:191] Add success.
I0320 10:49:43.409807  543705 cpu.go:282] Add success.
I0320 10:49:43.419962  543705 net.go:648] Add success.
I0320 10:49:43.422662  543705 net.go:770] primary dev: ETH0
I0320 10:49:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:49:43.422689  543705 net.go:698] Add success.
I0320 10:49:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:49:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:49:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:49:53.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:49:53.409894  543705 memory.go:184] no items to output this cycle
I0320 10:49:53.410033  543705 cpu.go:275] no items to output this cycle
E0320 10:50:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:50:03.409802  543705 memory.go:184] no items to output this cycle
I0320 10:50:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 10:50:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:50:13.409792  543705 memory.go:191] Add success.
I0320 10:50:13.409793  543705 cpu.go:282] Add success.
W0320 10:50:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:50:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:50:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:50:13.420177  543705 net.go:648] Add success.
I0320 10:50:13.423147  543705 net.go:770] primary dev: ETH0
I0320 10:50:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:50:13.423178  543705 net.go:698] Add success.
I0320 10:50:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:50:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:50:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 10:50:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:50:14.456494  543705 disk_worker.go:494] system disk:vda1
I0320 10:50:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:50:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:50:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:50:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:50:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:50:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:50:23.409808  543705 memory.go:184] no items to output this cycle
I0320 10:50:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 10:50:23.921668  543705 disk_info.go:125] begin check local disk info of client
I0320 10:50:23.924244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:50:23.924250  543705 disk_info.go:196] parse disk info done, disk is : [0xc000392300 0xc000392340]
E0320 10:50:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:50:33.409809  543705 memory.go:184] no items to output this cycle
I0320 10:50:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 10:50:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:50:43.409821  543705 memory.go:191] Add success.
I0320 10:50:43.409829  543705 cpu.go:282] Add success.
I0320 10:50:43.420038  543705 net.go:648] Add success.
I0320 10:50:43.422883  543705 net.go:770] primary dev: ETH0
I0320 10:50:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:50:43.422908  543705 net.go:698] Add success.
I0320 10:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:50:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:50:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:50:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:50:53.409767  543705 memory.go:184] no items to output this cycle
I0320 10:50:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 10:51:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:51:03.409806  543705 memory.go:184] no items to output this cycle
I0320 10:51:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 10:51:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:51:13.409787  543705 memory.go:191] Add success.
I0320 10:51:13.409801  543705 cpu.go:282] Add success.
W0320 10:51:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:51:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:51:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:51:13.420155  543705 net.go:648] Add success.
I0320 10:51:13.422898  543705 net.go:770] primary dev: ETH0
I0320 10:51:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:51:13.422923  543705 net.go:698] Add success.
I0320 10:51:13.469391  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75ba7b4d-b809-434c-9ab8-0f0835f26791","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:51:13.469423  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:51:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:51:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0320 10:51:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:51:14.456768  543705 disk_worker.go:494] system disk:vda1
I0320 10:51:14.456797  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:51:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:51:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:51:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:51:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:51:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:51:23.409779  543705 memory.go:184] no items to output this cycle
I0320 10:51:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 10:51:23.925671  543705 disk_info.go:125] begin check local disk info of client
I0320 10:51:23.928197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:51:23.928202  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492fc0 0xc000493000]
E0320 10:51:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:51:33.409781  543705 memory.go:184] no items to output this cycle
I0320 10:51:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 10:51:38.437958  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:51:38.437966  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:51:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:51:43.410723  543705 memory.go:191] Add success.
I0320 10:51:43.409834  543705 cpu.go:282] Add success.
I0320 10:51:43.420437  543705 net.go:648] Add success.
I0320 10:51:43.423090  543705 net.go:770] primary dev: ETH0
I0320 10:51:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:51:43.423116  543705 net.go:698] Add success.
I0320 10:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:51:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:51:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:51:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:51:53.409768  543705 memory.go:184] no items to output this cycle
I0320 10:51:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 10:52:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:52:03.409898  543705 memory.go:184] no items to output this cycle
I0320 10:52:03.409977  543705 cpu.go:275] no items to output this cycle
E0320 10:52:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:52:13.409781  543705 memory.go:191] Add success.
I0320 10:52:13.409805  543705 cpu.go:282] Add success.
W0320 10:52:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:52:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:52:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:52:13.420104  543705 net.go:648] Add success.
I0320 10:52:13.422581  543705 net.go:770] primary dev: ETH0
I0320 10:52:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:52:13.422610  543705 net.go:698] Add success.
W0320 10:52:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:52:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 10:52:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:52:14.456764  543705 disk_worker.go:494] system disk:vda1
I0320 10:52:14.456805  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:52:14.457105  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:52:14.457112  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:52:14.457117  543705 custom_config.go:64] query custom config with name: gpu
E0320 10:52:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:52:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:52:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 10:52:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:52:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:52:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:52:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:52:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:52:23.409785  543705 memory.go:184] no items to output this cycle
I0320 10:52:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 10:52:23.929672  543705 disk_info.go:125] begin check local disk info of client
I0320 10:52:23.932211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:52:23.932216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393e00 0xc000393e40]
E0320 10:52:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:52:33.409774  543705 memory.go:184] no items to output this cycle
I0320 10:52:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 10:52:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:52:43.409785  543705 memory.go:191] Add success.
I0320 10:52:43.409806  543705 cpu.go:282] Add success.
I0320 10:52:43.420019  543705 net.go:648] Add success.
I0320 10:52:43.422667  543705 net.go:770] primary dev: ETH0
I0320 10:52:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:52:43.422693  543705 net.go:698] Add success.
I0320 10:52:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:52:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:52:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:52:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:52:53.409777  543705 memory.go:184] no items to output this cycle
I0320 10:52:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 10:53:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:53:03.409794  543705 memory.go:184] no items to output this cycle
I0320 10:53:03.409807  543705 cpu.go:275] no items to output this cycle
I0320 10:53:13.409897  543705 cpu.go:282] Add success.
E0320 10:53:13.409959  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:53:13.409985  543705 memory.go:191] Add success.
W0320 10:53:13.410019  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:53:13.410040  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:53:13.410044  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:53:13.419717  543705 net.go:648] Add success.
I0320 10:53:13.422716  543705 net.go:770] primary dev: ETH0
I0320 10:53:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:53:13.422744  543705 net.go:698] Add success.
I0320 10:53:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:53:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:53:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 10:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:53:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 10:53:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:53:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:53:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:53:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:53:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:53:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:53:23.409777  543705 memory.go:184] no items to output this cycle
I0320 10:53:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 10:53:23.933670  543705 disk_info.go:125] begin check local disk info of client
I0320 10:53:23.936251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:53:23.936256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abdc0 0xc0001abe00]
E0320 10:53:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:53:33.409805  543705 memory.go:184] no items to output this cycle
I0320 10:53:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 10:53:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:53:43.409796  543705 memory.go:191] Add success.
I0320 10:53:43.409811  543705 cpu.go:282] Add success.
I0320 10:53:43.419946  543705 net.go:648] Add success.
I0320 10:53:43.422925  543705 net.go:770] primary dev: ETH0
I0320 10:53:43.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:53:43.422963  543705 net.go:698] Add success.
I0320 10:53:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:53:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:53:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:53:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:53:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:53:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 10:54:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:54:03.409803  543705 memory.go:184] no items to output this cycle
I0320 10:54:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 10:54:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:54:13.409783  543705 memory.go:191] Add success.
I0320 10:54:13.409804  543705 cpu.go:282] Add success.
W0320 10:54:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:54:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:54:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:54:13.420245  543705 net.go:648] Add success.
I0320 10:54:13.423360  543705 net.go:770] primary dev: ETH0
I0320 10:54:13.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:54:13.423400  543705 net.go:698] Add success.
I0320 10:54:13.508570  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d54d6228-74a9-4956-8d84-cf55a17173c3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:54:13.508610  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 10:54:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:54:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:54:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 10:54:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:54:14.456678  543705 disk_worker.go:494] system disk:vda1
I0320 10:54:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:54:15.455608  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:54:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:54:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:54:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:54:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:54:23.409780  543705 memory.go:184] no items to output this cycle
I0320 10:54:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 10:54:23.937671  543705 disk_info.go:125] begin check local disk info of client
I0320 10:54:23.940175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:54:23.940181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc40 0xc0001abc80]
E0320 10:54:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:54:33.409806  543705 memory.go:184] no items to output this cycle
I0320 10:54:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 10:54:38.438935  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:54:38.438942  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:54:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:54:43.410724  543705 memory.go:191] Add success.
I0320 10:54:43.409796  543705 cpu.go:282] Add success.
I0320 10:54:43.420430  543705 net.go:648] Add success.
I0320 10:54:43.423111  543705 net.go:770] primary dev: ETH0
I0320 10:54:43.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:54:43.423142  543705 net.go:698] Add success.
I0320 10:54:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:54:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:54:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:54:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:54:53.409778  543705 memory.go:184] no items to output this cycle
I0320 10:54:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 10:55:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:55:03.409792  543705 memory.go:184] no items to output this cycle
I0320 10:55:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 10:55:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:55:13.409791  543705 memory.go:191] Add success.
I0320 10:55:13.409796  543705 cpu.go:282] Add success.
W0320 10:55:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:55:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:55:13.420047  543705 net.go:648] Add success.
I0320 10:55:13.423016  543705 net.go:770] primary dev: ETH0
I0320 10:55:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:55:13.423041  543705 net.go:698] Add success.
I0320 10:55:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:55:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:55:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 10:55:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:55:14.456587  543705 disk_worker.go:494] system disk:vda1
I0320 10:55:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:55:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:55:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:55:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:55:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:55:23.410342  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:55:23.410361  543705 memory.go:184] no items to output this cycle
I0320 10:55:23.410378  543705 cpu.go:275] no items to output this cycle
I0320 10:55:23.941671  543705 disk_info.go:125] begin check local disk info of client
I0320 10:55:23.944188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:55:23.944195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb500 0xc0001fb540]
E0320 10:55:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:55:33.409773  543705 memory.go:184] no items to output this cycle
I0320 10:55:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 10:55:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:55:43.409793  543705 memory.go:191] Add success.
I0320 10:55:43.409817  543705 cpu.go:282] Add success.
I0320 10:55:43.419917  543705 net.go:648] Add success.
I0320 10:55:43.422429  543705 net.go:770] primary dev: ETH0
I0320 10:55:43.422448  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:55:43.422463  543705 net.go:698] Add success.
I0320 10:55:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:55:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:55:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:55:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:55:53.409778  543705 memory.go:184] no items to output this cycle
I0320 10:55:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 10:56:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:56:03.409796  543705 memory.go:184] no items to output this cycle
I0320 10:56:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 10:56:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:56:13.409779  543705 memory.go:191] Add success.
W0320 10:56:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:56:13.409805  543705 cpu.go:282] Add success.
W0320 10:56:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:56:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:56:13.420505  543705 net.go:648] Add success.
I0320 10:56:13.423560  543705 net.go:770] primary dev: ETH0
I0320 10:56:13.423574  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:56:13.423586  543705 net.go:698] Add success.
I0320 10:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:56:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:56:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0320 10:56:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:56:14.456481  543705 disk_worker.go:494] system disk:vda1
I0320 10:56:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:56:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:56:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:56:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:56:23.409812  543705 memory.go:184] no items to output this cycle
I0320 10:56:23.409822  543705 cpu.go:275] no items to output this cycle
I0320 10:56:23.945669  543705 disk_info.go:125] begin check local disk info of client
I0320 10:56:23.948196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:56:23.948201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329c40 0xc000329c80]
E0320 10:56:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:56:33.409806  543705 memory.go:184] no items to output this cycle
I0320 10:56:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 10:56:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:56:43.409823  543705 memory.go:191] Add success.
I0320 10:56:43.409834  543705 cpu.go:282] Add success.
I0320 10:56:43.420014  543705 net.go:648] Add success.
I0320 10:56:43.422826  543705 net.go:770] primary dev: ETH0
I0320 10:56:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:56:43.422856  543705 net.go:698] Add success.
I0320 10:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:56:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:56:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:56:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:56:53.409769  543705 memory.go:184] no items to output this cycle
I0320 10:56:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 10:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:57:03.409777  543705 memory.go:184] no items to output this cycle
I0320 10:57:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 10:57:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:57:13.409810  543705 memory.go:191] Add success.
I0320 10:57:13.409818  543705 cpu.go:282] Add success.
W0320 10:57:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:57:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:57:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:57:13.420132  543705 net.go:648] Add success.
I0320 10:57:13.423017  543705 net.go:770] primary dev: ETH0
I0320 10:57:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:57:13.423043  543705 net.go:698] Add success.
I0320 10:57:13.429345  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 10:57:13.453527  543705 event_worker.go:152] Polling the log file for events...
I0320 10:57:13.468376  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dd329215-d506-4bd5-b97d-3ddf29e4c898","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 10:57:13.468415  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 10:57:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:57:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 10:57:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0320 10:57:14.456161  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 10:57:14.456171  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 10:57:14.456177  543705 custom_config.go:64] query custom config with name: gpu
I0320 10:57:14.456456  543705 disk_worker.go:494] system disk:vda1
I0320 10:57:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 10:57:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 10:57:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 10:57:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 10:57:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:57:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:57:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:57:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:57:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:57:23.409815  543705 memory.go:184] no items to output this cycle
I0320 10:57:23.409830  543705 cpu.go:275] no items to output this cycle
I0320 10:57:23.949677  543705 disk_info.go:125] begin check local disk info of client
I0320 10:57:23.952242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:57:23.952248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e80 0xc0000c4ec0]
E0320 10:57:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:57:33.409785  543705 memory.go:184] no items to output this cycle
I0320 10:57:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 10:57:38.439989  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 10:57:38.439997  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 10:57:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:57:43.410819  543705 memory.go:191] Add success.
I0320 10:57:43.409842  543705 cpu.go:282] Add success.
I0320 10:57:43.420477  543705 net.go:648] Add success.
I0320 10:57:43.423081  543705 net.go:770] primary dev: ETH0
I0320 10:57:43.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:57:43.423105  543705 net.go:698] Add success.
I0320 10:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:57:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:57:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:57:53.409791  543705 cpu.go:275] no items to output this cycle
I0320 10:57:53.409794  543705 memory.go:184] no items to output this cycle
E0320 10:58:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:58:03.409800  543705 cpu.go:275] no items to output this cycle
I0320 10:58:03.409809  543705 memory.go:184] no items to output this cycle
E0320 10:58:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:58:13.409817  543705 memory.go:191] Add success.
I0320 10:58:13.409827  543705 cpu.go:282] Add success.
W0320 10:58:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 10:58:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:58:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:58:13.420125  543705 net.go:648] Add success.
I0320 10:58:13.422952  543705 net.go:770] primary dev: ETH0
I0320 10:58:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:58:13.422977  543705 net.go:698] Add success.
I0320 10:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:58:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:58:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 10:58:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:58:14.456564  543705 disk_worker.go:494] system disk:vda1
I0320 10:58:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:58:15.456012  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:58:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:58:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:58:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:58:23.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:58:23.410269  543705 memory.go:184] no items to output this cycle
I0320 10:58:23.410285  543705 cpu.go:275] no items to output this cycle
I0320 10:58:23.953672  543705 disk_info.go:125] begin check local disk info of client
I0320 10:58:23.956280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:58:23.956286  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490a40 0xc000490a80]
E0320 10:58:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:58:33.409793  543705 memory.go:184] no items to output this cycle
I0320 10:58:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 10:58:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:58:43.409802  543705 memory.go:191] Add success.
I0320 10:58:43.409802  543705 cpu.go:282] Add success.
I0320 10:58:43.419894  543705 net.go:648] Add success.
I0320 10:58:43.422699  543705 net.go:770] primary dev: ETH0
I0320 10:58:43.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:58:43.422723  543705 net.go:698] Add success.
I0320 10:58:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:58:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:58:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:58:53.409779  543705 memory.go:184] no items to output this cycle
I0320 10:58:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 10:59:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:59:03.409804  543705 memory.go:184] no items to output this cycle
I0320 10:59:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 10:59:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:59:13.409775  543705 memory.go:191] Add success.
W0320 10:59:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 10:59:13.409819  543705 cpu.go:282] Add success.
W0320 10:59:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 10:59:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 10:59:13.420076  543705 net.go:648] Add success.
I0320 10:59:13.422741  543705 net.go:770] primary dev: ETH0
I0320 10:59:13.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:59:13.422767  543705 net.go:698] Add success.
I0320 10:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 10:59:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 10:59:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 10:59:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 10:59:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 10:59:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 10:59:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 10:59:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 10:59:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 10:59:23.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:59:23.409884  543705 memory.go:184] no items to output this cycle
I0320 10:59:23.409956  543705 cpu.go:275] no items to output this cycle
I0320 10:59:23.957691  543705 disk_info.go:125] begin check local disk info of client
I0320 10:59:23.960103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 10:59:23.960109  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034af00 0xc00034af40]
E0320 10:59:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:59:33.409779  543705 memory.go:184] no items to output this cycle
I0320 10:59:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 10:59:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:59:43.409823  543705 memory.go:191] Add success.
I0320 10:59:43.409825  543705 cpu.go:282] Add success.
I0320 10:59:43.419994  543705 net.go:648] Add success.
I0320 10:59:43.422824  543705 net.go:770] primary dev: ETH0
I0320 10:59:43.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0320 10:59:43.422858  543705 net.go:698] Add success.
I0320 10:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 10:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 10:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 10:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 10:59:53.409779  543705 memory.go:184] no items to output this cycle
I0320 10:59:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:00:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:00:03.409771  543705 memory.go:184] no items to output this cycle
I0320 11:00:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 11:00:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:00:13.409793  543705 cpu.go:282] Add success.
I0320 11:00:13.409801  543705 memory.go:191] Add success.
W0320 11:00:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:00:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:00:13.419834  543705 net.go:770] primary dev: ETH0
I0320 11:00:13.419846  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:00:13.419858  543705 net.go:698] Add success.
I0320 11:00:13.420208  543705 net.go:648] Add success.
I0320 11:00:13.477260  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bda8dfe1-ce3b-4e25-ad68-dfd01a1f5b74","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:00:13.477295  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:00:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:00:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:00:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 11:00:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:00:14.456470  543705 disk_worker.go:494] system disk:vda1
I0320 11:00:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:00:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:00:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:00:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:00:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:00:23.409774  543705 memory.go:184] no items to output this cycle
I0320 11:00:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 11:00:23.960201  543705 disk_info.go:125] begin check local disk info of client
I0320 11:00:23.962707  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:00:23.962712  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e0900 0xc0004e0940]
E0320 11:00:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:00:33.409777  543705 memory.go:184] no items to output this cycle
I0320 11:00:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 11:00:38.440978  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:00:38.440985  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:00:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:00:43.410662  543705 memory.go:191] Add success.
I0320 11:00:43.409827  543705 cpu.go:282] Add success.
I0320 11:00:43.420366  543705 net.go:648] Add success.
I0320 11:00:43.423420  543705 net.go:770] primary dev: ETH0
I0320 11:00:43.423435  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:00:43.423450  543705 net.go:698] Add success.
I0320 11:00:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:00:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:00:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:00:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:00:53.409793  543705 memory.go:184] no items to output this cycle
I0320 11:00:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 11:01:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:01:03.409766  543705 memory.go:184] no items to output this cycle
I0320 11:01:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 11:01:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:01:13.409806  543705 memory.go:191] Add success.
I0320 11:01:13.409818  543705 cpu.go:282] Add success.
W0320 11:01:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:01:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:01:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:01:13.420079  543705 net.go:648] Add success.
I0320 11:01:13.423154  543705 net.go:770] primary dev: ETH0
I0320 11:01:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:01:13.423184  543705 net.go:698] Add success.
I0320 11:01:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:01:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:01:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 11:01:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:01:14.456569  543705 disk_worker.go:494] system disk:vda1
I0320 11:01:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:01:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:01:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:01:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:01:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:01:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:01:23.409803  543705 memory.go:184] no items to output this cycle
I0320 11:01:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 11:01:23.964939  543705 disk_info.go:125] begin check local disk info of client
I0320 11:01:23.967408  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:01:23.967415  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490000 0xc000490040]
E0320 11:01:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:01:33.409768  543705 memory.go:184] no items to output this cycle
I0320 11:01:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 11:01:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:01:43.409831  543705 memory.go:191] Add success.
I0320 11:01:43.409836  543705 cpu.go:282] Add success.
I0320 11:01:43.420085  543705 net.go:648] Add success.
I0320 11:01:43.423051  543705 net.go:770] primary dev: ETH0
I0320 11:01:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:01:43.423076  543705 net.go:698] Add success.
I0320 11:01:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:01:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:01:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:01:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:01:53.409780  543705 memory.go:184] no items to output this cycle
I0320 11:01:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:02:03.409774  543705 memory.go:184] no items to output this cycle
I0320 11:02:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 11:02:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:02:13.409775  543705 memory.go:191] Add success.
W0320 11:02:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:02:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:02:13.409811  543705 cpu.go:282] Add success.
I0320 11:02:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:02:13.420080  543705 net.go:648] Add success.
I0320 11:02:13.422873  543705 net.go:770] primary dev: ETH0
I0320 11:02:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:02:13.422897  543705 net.go:698] Add success.
W0320 11:02:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:02:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 11:02:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:02:14.455912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:02:14.455921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:02:14.455928  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:02:14.456564  543705 disk_worker.go:494] system disk:vda1
I0320 11:02:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:02:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:02:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:02:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:02:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:02:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:02:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:02:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:02:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:02:23.409819  543705 memory.go:184] no items to output this cycle
I0320 11:02:23.409822  543705 cpu.go:275] no items to output this cycle
I0320 11:02:23.969671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:02:23.972170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:02:23.972175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003406c0 0xc000340700]
E0320 11:02:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:02:33.409769  543705 memory.go:184] no items to output this cycle
I0320 11:02:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 11:02:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:02:43.409793  543705 memory.go:191] Add success.
I0320 11:02:43.409799  543705 cpu.go:282] Add success.
I0320 11:02:43.420107  543705 net.go:648] Add success.
I0320 11:02:43.422868  543705 net.go:770] primary dev: ETH0
I0320 11:02:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:02:43.422892  543705 net.go:698] Add success.
I0320 11:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:02:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:02:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:02:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:02:53.409772  543705 memory.go:184] no items to output this cycle
I0320 11:02:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 11:03:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:03:03.409776  543705 memory.go:184] no items to output this cycle
I0320 11:03:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 11:03:13.410393  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:03:13.410426  543705 memory.go:191] Add success.
I0320 11:03:13.410434  543705 cpu.go:282] Add success.
W0320 11:03:13.410458  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:03:13.410474  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:03:13.410478  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:03:13.420707  543705 net.go:648] Add success.
I0320 11:03:13.423651  543705 net.go:770] primary dev: ETH0
I0320 11:03:13.423665  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:03:13.423676  543705 net.go:698] Add success.
I0320 11:03:13.469297  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"56c1b8fc-5886-4b40-bfbb-8bb470ad132d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:03:13.469332  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:03:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:03:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:03:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 11:03:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:03:14.456776  543705 disk_worker.go:494] system disk:vda1
I0320 11:03:14.456805  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:03:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:03:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:03:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:03:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:03:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:03:23.409785  543705 memory.go:184] no items to output this cycle
I0320 11:03:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 11:03:23.973674  543705 disk_info.go:125] begin check local disk info of client
I0320 11:03:23.976199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:03:23.976204  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340080 0xc0003400c0]
E0320 11:03:33.410575  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:03:33.410592  543705 memory.go:184] no items to output this cycle
I0320 11:03:33.410632  543705 cpu.go:275] no items to output this cycle
I0320 11:03:38.441966  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:03:38.441974  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:03:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:03:43.410666  543705 memory.go:191] Add success.
I0320 11:03:43.409810  543705 cpu.go:282] Add success.
I0320 11:03:43.420191  543705 net.go:770] primary dev: ETH0
I0320 11:03:43.420206  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:03:43.420219  543705 net.go:698] Add success.
I0320 11:03:43.420760  543705 net.go:648] Add success.
I0320 11:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:03:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:03:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:03:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:03:53.409802  543705 memory.go:184] no items to output this cycle
I0320 11:03:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 11:04:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:04:03.409778  543705 memory.go:184] no items to output this cycle
I0320 11:04:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 11:04:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:04:13.409782  543705 memory.go:191] Add success.
W0320 11:04:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:04:13.409808  543705 cpu.go:282] Add success.
W0320 11:04:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:04:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:04:13.420046  543705 net.go:648] Add success.
I0320 11:04:13.422589  543705 net.go:770] primary dev: ETH0
I0320 11:04:13.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:04:13.422613  543705 net.go:698] Add success.
I0320 11:04:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:04:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:04:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 11:04:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:04:14.456504  543705 disk_worker.go:494] system disk:vda1
I0320 11:04:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:04:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:04:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:04:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:04:16.472478  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:04:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:04:23.409777  543705 memory.go:184] no items to output this cycle
I0320 11:04:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 11:04:23.977670  543705 disk_info.go:125] begin check local disk info of client
I0320 11:04:23.980202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:04:23.980207  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ac00 0xc00039ac40]
E0320 11:04:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:04:33.409772  543705 memory.go:184] no items to output this cycle
I0320 11:04:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 11:04:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:04:43.409809  543705 memory.go:191] Add success.
I0320 11:04:43.409820  543705 cpu.go:282] Add success.
I0320 11:04:43.419880  543705 net.go:648] Add success.
I0320 11:04:43.422623  543705 net.go:770] primary dev: ETH0
I0320 11:04:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:04:43.422650  543705 net.go:698] Add success.
I0320 11:04:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:04:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:04:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:04:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:04:53.409767  543705 memory.go:184] no items to output this cycle
I0320 11:04:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 11:05:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:05:03.409980  543705 memory.go:184] no items to output this cycle
I0320 11:05:03.410012  543705 cpu.go:275] no items to output this cycle
E0320 11:05:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:05:13.409811  543705 memory.go:191] Add success.
I0320 11:05:13.409817  543705 cpu.go:282] Add success.
W0320 11:05:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:05:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:05:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:05:13.420155  543705 net.go:648] Add success.
I0320 11:05:13.422659  543705 net.go:770] primary dev: ETH0
I0320 11:05:13.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:05:13.422684  543705 net.go:698] Add success.
I0320 11:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:05:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:05:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 11:05:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:05:14.456492  543705 disk_worker.go:494] system disk:vda1
I0320 11:05:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:05:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:05:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:05:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:05:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:05:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:05:23.409806  543705 memory.go:184] no items to output this cycle
I0320 11:05:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 11:05:23.981669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:05:23.984231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:05:23.984236  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471e00 0xc000471e40]
E0320 11:05:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:05:33.409799  543705 memory.go:184] no items to output this cycle
I0320 11:05:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 11:05:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:05:43.409792  543705 memory.go:191] Add success.
I0320 11:05:43.409812  543705 cpu.go:282] Add success.
I0320 11:05:43.420010  543705 net.go:648] Add success.
I0320 11:05:43.423138  543705 net.go:770] primary dev: ETH0
I0320 11:05:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:05:43.423163  543705 net.go:698] Add success.
I0320 11:05:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:05:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:05:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:05:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:05:53.409797  543705 memory.go:184] no items to output this cycle
I0320 11:05:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 11:06:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:06:03.409780  543705 memory.go:184] no items to output this cycle
I0320 11:06:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 11:06:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:06:13.409787  543705 memory.go:191] Add success.
I0320 11:06:13.409787  543705 cpu.go:282] Add success.
W0320 11:06:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:06:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:06:13.419739  543705 net.go:648] Add success.
I0320 11:06:13.422893  543705 net.go:770] primary dev: ETH0
I0320 11:06:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:06:13.422917  543705 net.go:698] Add success.
I0320 11:06:13.468761  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b3c055d4-54a8-4948-bc86-de93ae891588","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:06:13.468792  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:06:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:06:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:06:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 11:06:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:06:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 11:06:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:06:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:06:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:06:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:06:23.409776  543705 memory.go:184] no items to output this cycle
I0320 11:06:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 11:06:23.985672  543705 disk_info.go:125] begin check local disk info of client
I0320 11:06:23.988205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:06:23.988211  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470540 0xc000470580]
E0320 11:06:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:06:33.409775  543705 memory.go:184] no items to output this cycle
I0320 11:06:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 11:06:38.442971  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:06:38.442978  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:06:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:06:43.410592  543705 memory.go:191] Add success.
I0320 11:06:43.409802  543705 cpu.go:282] Add success.
I0320 11:06:43.420299  543705 net.go:648] Add success.
I0320 11:06:43.422769  543705 net.go:770] primary dev: ETH0
I0320 11:06:43.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:06:43.422796  543705 net.go:698] Add success.
I0320 11:06:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:06:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:06:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:06:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:06:53.409782  543705 memory.go:184] no items to output this cycle
I0320 11:06:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 11:07:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:07:03.409780  543705 memory.go:184] no items to output this cycle
I0320 11:07:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 11:07:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:07:13.409785  543705 memory.go:191] Add success.
I0320 11:07:13.409796  543705 cpu.go:282] Add success.
W0320 11:07:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:07:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:07:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:07:13.419807  543705 net.go:648] Add success.
I0320 11:07:13.422824  543705 net.go:770] primary dev: ETH0
I0320 11:07:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:07:13.422849  543705 net.go:698] Add success.
I0320 11:07:13.453535  543705 event_worker.go:152] Polling the log file for events...
W0320 11:07:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:07:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 11:07:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:07:14.456965  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:07:14.456974  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:07:14.456980  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:07:14.457018  543705 disk_worker.go:494] system disk:vda1
I0320 11:07:14.457048  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:07:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:07:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:07:16.458022  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:07:16.458021  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:07:16.458087  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:07:16.458111  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:07:16.472518  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:07:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:07:23.409787  543705 memory.go:184] no items to output this cycle
I0320 11:07:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 11:07:23.989671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:07:23.992245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:07:23.992251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4740 0xc0000c4780]
E0320 11:07:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:07:33.409775  543705 memory.go:184] no items to output this cycle
I0320 11:07:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 11:07:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:07:43.409801  543705 memory.go:191] Add success.
I0320 11:07:43.409802  543705 cpu.go:282] Add success.
I0320 11:07:43.419894  543705 net.go:648] Add success.
I0320 11:07:43.422868  543705 net.go:770] primary dev: ETH0
I0320 11:07:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:07:43.422894  543705 net.go:698] Add success.
I0320 11:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:07:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:07:53.409798  543705 memory.go:184] no items to output this cycle
I0320 11:07:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 11:08:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:08:03.409777  543705 memory.go:184] no items to output this cycle
I0320 11:08:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 11:08:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:08:13.409815  543705 memory.go:191] Add success.
I0320 11:08:13.409819  543705 cpu.go:282] Add success.
W0320 11:08:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:08:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:08:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:08:13.420250  543705 net.go:648] Add success.
I0320 11:08:13.423055  543705 net.go:770] primary dev: ETH0
I0320 11:08:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:08:13.423079  543705 net.go:698] Add success.
I0320 11:08:14.454008  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:08:14.454227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:08:14.454237  543705 disk_worker.go:708] disk space is not compliant
W0320 11:08:14.454240  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:08:14.455619  543705 disk_worker.go:494] system disk:vda1
I0320 11:08:14.455647  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:08:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:08:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:08:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:08:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:08:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:08:23.409823  543705 memory.go:184] no items to output this cycle
I0320 11:08:23.409826  543705 cpu.go:275] no items to output this cycle
I0320 11:08:23.993669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:08:23.996229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:08:23.996234  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b100 0xc00047b140]
E0320 11:08:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:08:33.409778  543705 memory.go:184] no items to output this cycle
I0320 11:08:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 11:08:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:08:43.409799  543705 memory.go:191] Add success.
I0320 11:08:43.409802  543705 cpu.go:282] Add success.
I0320 11:08:43.419952  543705 net.go:648] Add success.
I0320 11:08:43.423047  543705 net.go:770] primary dev: ETH0
I0320 11:08:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:08:43.423073  543705 net.go:698] Add success.
I0320 11:08:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:08:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:08:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:08:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:08:53.409775  543705 memory.go:184] no items to output this cycle
I0320 11:08:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 11:09:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:09:03.409761  543705 memory.go:184] no items to output this cycle
I0320 11:09:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 11:09:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:09:13.409809  543705 memory.go:191] Add success.
I0320 11:09:13.409821  543705 cpu.go:282] Add success.
W0320 11:09:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:09:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:09:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:09:13.420067  543705 net.go:648] Add success.
I0320 11:09:13.423511  543705 net.go:770] primary dev: ETH0
I0320 11:09:13.423597  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:09:13.423619  543705 net.go:698] Add success.
I0320 11:09:13.470178  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"32ef6a57-4b76-4444-9507-0dd29c5e3ac2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:09:13.470208  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:09:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:09:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:09:14.455250  543705 disk_worker.go:708] disk space is not compliant
W0320 11:09:14.455255  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:09:14.457008  543705 disk_worker.go:494] system disk:vda1
I0320 11:09:14.457038  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:09:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:09:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:09:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:09:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:09:23.409808  543705 memory.go:184] no items to output this cycle
I0320 11:09:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 11:09:23.997672  543705 disk_info.go:125] begin check local disk info of client
I0320 11:09:24.000207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:09:24.000212  543705 disk_info.go:196] parse disk info done, disk is : [0xc000547880 0xc0005478c0]
E0320 11:09:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:09:33.409769  543705 memory.go:184] no items to output this cycle
I0320 11:09:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 11:09:38.443973  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:09:38.443980  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:09:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:09:43.410737  543705 memory.go:191] Add success.
I0320 11:09:43.409825  543705 cpu.go:282] Add success.
I0320 11:09:43.420452  543705 net.go:648] Add success.
I0320 11:09:43.423431  543705 net.go:770] primary dev: ETH0
I0320 11:09:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:09:43.423460  543705 net.go:698] Add success.
I0320 11:09:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:09:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:09:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:09:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:09:53.409786  543705 memory.go:184] no items to output this cycle
I0320 11:09:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 11:10:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:10:03.409775  543705 memory.go:184] no items to output this cycle
I0320 11:10:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 11:10:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:10:13.409813  543705 cpu.go:282] Add success.
I0320 11:10:13.409822  543705 memory.go:191] Add success.
W0320 11:10:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:10:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:10:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:10:13.420222  543705 net.go:648] Add success.
I0320 11:10:13.423186  543705 net.go:770] primary dev: ETH0
I0320 11:10:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:10:13.423215  543705 net.go:698] Add success.
I0320 11:10:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:10:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:10:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 11:10:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:10:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 11:10:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:10:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:10:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:10:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:10:23.409805  543705 memory.go:184] no items to output this cycle
I0320 11:10:23.409824  543705 cpu.go:275] no items to output this cycle
I0320 11:10:24.001673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:10:24.004191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:10:24.004197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd040 0xc0002bd080]
E0320 11:10:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:10:33.409766  543705 memory.go:184] no items to output this cycle
I0320 11:10:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 11:10:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:10:43.409813  543705 memory.go:191] Add success.
I0320 11:10:43.409822  543705 cpu.go:282] Add success.
I0320 11:10:43.419994  543705 net.go:648] Add success.
I0320 11:10:43.422989  543705 net.go:770] primary dev: ETH0
I0320 11:10:43.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:10:43.423016  543705 net.go:698] Add success.
I0320 11:10:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:10:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:10:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:10:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:10:53.409777  543705 memory.go:184] no items to output this cycle
I0320 11:10:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 11:11:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:11:03.409782  543705 memory.go:184] no items to output this cycle
I0320 11:11:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 11:11:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:11:13.409795  543705 memory.go:191] Add success.
I0320 11:11:13.409811  543705 cpu.go:282] Add success.
W0320 11:11:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:11:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:11:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:11:13.420303  543705 net.go:648] Add success.
I0320 11:11:13.422991  543705 net.go:770] primary dev: ETH0
I0320 11:11:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:11:13.423016  543705 net.go:698] Add success.
I0320 11:11:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:11:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:11:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 11:11:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:11:14.457621  543705 disk_worker.go:494] system disk:vda1
I0320 11:11:14.457676  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:11:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:11:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:11:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:11:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:11:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:11:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:11:23.409782  543705 memory.go:184] no items to output this cycle
I0320 11:11:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 11:11:24.005669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:11:24.008184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:11:24.008189  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd440 0xc0002bd480]
E0320 11:11:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:11:33.409806  543705 memory.go:184] no items to output this cycle
I0320 11:11:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 11:11:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:11:43.409810  543705 cpu.go:282] Add success.
I0320 11:11:43.409814  543705 memory.go:191] Add success.
I0320 11:11:43.420015  543705 net.go:648] Add success.
I0320 11:11:43.422849  543705 net.go:770] primary dev: ETH0
I0320 11:11:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:11:43.422877  543705 net.go:698] Add success.
I0320 11:11:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:11:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:11:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:11:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:11:53.409794  543705 memory.go:184] no items to output this cycle
I0320 11:11:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 11:12:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:12:03.409780  543705 memory.go:184] no items to output this cycle
I0320 11:12:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 11:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:12:13.409800  543705 memory.go:191] Add success.
I0320 11:12:13.409816  543705 cpu.go:282] Add success.
W0320 11:12:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:12:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:12:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:12:13.420133  543705 net.go:648] Add success.
I0320 11:12:13.422843  543705 net.go:770] primary dev: ETH0
I0320 11:12:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:12:13.422869  543705 net.go:698] Add success.
I0320 11:12:13.463721  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f37c6eaf-d07c-4078-a95a-59b479bc8b7b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:12:13.463757  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 11:12:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:12:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 11:12:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:12:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:12:14.456929  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:12:14.456934  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:12:14.457108  543705 disk_worker.go:494] system disk:vda1
I0320 11:12:14.457150  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:12:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:12:15.456865  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:12:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:12:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:12:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:12:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:12:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:12:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:12:23.409795  543705 memory.go:184] no items to output this cycle
I0320 11:12:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 11:12:24.009673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:12:24.012208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:12:24.012214  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487cc0 0xc000487d00]
E0320 11:12:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:12:33.409805  543705 memory.go:184] no items to output this cycle
I0320 11:12:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 11:12:38.444973  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:12:38.444980  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:12:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:12:43.410750  543705 memory.go:191] Add success.
I0320 11:12:43.409823  543705 cpu.go:282] Add success.
I0320 11:12:43.420425  543705 net.go:648] Add success.
I0320 11:12:43.423069  543705 net.go:770] primary dev: ETH0
I0320 11:12:43.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:12:43.423094  543705 net.go:698] Add success.
I0320 11:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:12:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:12:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:12:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:12:53.409789  543705 memory.go:184] no items to output this cycle
I0320 11:12:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 11:13:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:13:03.409815  543705 memory.go:184] no items to output this cycle
I0320 11:13:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 11:13:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:13:13.409796  543705 memory.go:191] Add success.
W0320 11:13:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:13:13.409823  543705 cpu.go:282] Add success.
W0320 11:13:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:13:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:13:13.420116  543705 net.go:648] Add success.
I0320 11:13:13.423089  543705 net.go:770] primary dev: ETH0
I0320 11:13:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:13:13.423119  543705 net.go:698] Add success.
I0320 11:13:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:13:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:13:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 11:13:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:13:14.456571  543705 disk_worker.go:494] system disk:vda1
I0320 11:13:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:13:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:13:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:13:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:13:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:13:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:13:23.409790  543705 memory.go:184] no items to output this cycle
I0320 11:13:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 11:13:24.013672  543705 disk_info.go:125] begin check local disk info of client
I0320 11:13:24.016197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:13:24.016204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5cc0 0xc0003d5d00]
E0320 11:13:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:13:33.409812  543705 memory.go:184] no items to output this cycle
I0320 11:13:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 11:13:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:13:43.409793  543705 memory.go:191] Add success.
I0320 11:13:43.409820  543705 cpu.go:282] Add success.
I0320 11:13:43.419911  543705 net.go:648] Add success.
I0320 11:13:43.422629  543705 net.go:770] primary dev: ETH0
I0320 11:13:43.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:13:43.422660  543705 net.go:698] Add success.
I0320 11:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:13:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:13:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:13:53.409779  543705 memory.go:184] no items to output this cycle
I0320 11:13:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:14:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:14:03.409767  543705 memory.go:184] no items to output this cycle
I0320 11:14:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 11:14:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:14:13.409818  543705 memory.go:191] Add success.
I0320 11:14:13.409825  543705 cpu.go:282] Add success.
W0320 11:14:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:14:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:14:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:14:13.420103  543705 net.go:648] Add success.
I0320 11:14:13.423162  543705 net.go:770] primary dev: ETH0
I0320 11:14:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:14:13.423188  543705 net.go:698] Add success.
I0320 11:14:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:14:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:14:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 11:14:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:14:14.456476  543705 disk_worker.go:494] system disk:vda1
I0320 11:14:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:14:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:14:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:14:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:14:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:14:23.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:14:23.409912  543705 cpu.go:275] no items to output this cycle
I0320 11:14:23.409959  543705 memory.go:184] no items to output this cycle
I0320 11:14:24.017669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:14:24.020201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:14:24.020207  543705 disk_info.go:196] parse disk info done, disk is : [0xc000254140 0xc000254180]
E0320 11:14:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:14:33.409775  543705 memory.go:184] no items to output this cycle
I0320 11:14:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 11:14:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:14:43.409784  543705 memory.go:191] Add success.
I0320 11:14:43.409803  543705 cpu.go:282] Add success.
I0320 11:14:43.419987  543705 net.go:648] Add success.
I0320 11:14:43.422811  543705 net.go:770] primary dev: ETH0
I0320 11:14:43.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:14:43.422835  543705 net.go:698] Add success.
I0320 11:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:14:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:14:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:14:53.409783  543705 memory.go:184] no items to output this cycle
I0320 11:14:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 11:15:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:15:03.409770  543705 memory.go:184] no items to output this cycle
I0320 11:15:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 11:15:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:15:13.409791  543705 memory.go:191] Add success.
I0320 11:15:13.409814  543705 cpu.go:282] Add success.
W0320 11:15:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:15:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:15:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:15:13.420167  543705 net.go:648] Add success.
I0320 11:15:13.422900  543705 net.go:770] primary dev: ETH0
I0320 11:15:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:15:13.422925  543705 net.go:698] Add success.
I0320 11:15:13.469482  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b7d8aeb-2b6e-45e9-bba8-d6c7b9e3991d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:15:13.469515  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:15:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:15:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 11:15:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:15:14.456612  543705 disk_worker.go:494] system disk:vda1
I0320 11:15:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:15:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:15:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:15:16.458168  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:15:16.472120  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:15:23.410395  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:15:23.410413  543705 memory.go:184] no items to output this cycle
I0320 11:15:23.410420  543705 cpu.go:275] no items to output this cycle
I0320 11:15:24.021671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:15:24.024273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:15:24.024279  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462fc0 0xc000463000]
E0320 11:15:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:15:33.409782  543705 memory.go:184] no items to output this cycle
I0320 11:15:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 11:15:38.446003  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:15:38.446010  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:15:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:15:43.410704  543705 memory.go:191] Add success.
I0320 11:15:43.409823  543705 cpu.go:282] Add success.
I0320 11:15:43.420393  543705 net.go:648] Add success.
I0320 11:15:43.423331  543705 net.go:770] primary dev: ETH0
I0320 11:15:43.423344  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:15:43.423356  543705 net.go:698] Add success.
I0320 11:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:15:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:15:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:15:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:15:53.409787  543705 memory.go:184] no items to output this cycle
I0320 11:15:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 11:16:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:16:03.409771  543705 memory.go:184] no items to output this cycle
I0320 11:16:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 11:16:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:16:13.409813  543705 memory.go:191] Add success.
I0320 11:16:13.409825  543705 cpu.go:282] Add success.
W0320 11:16:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:16:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:16:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:16:13.420435  543705 net.go:648] Add success.
I0320 11:16:13.423324  543705 net.go:770] primary dev: ETH0
I0320 11:16:13.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:16:13.423351  543705 net.go:698] Add success.
I0320 11:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:16:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:16:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 11:16:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:16:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 11:16:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:16:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:16:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:16:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:16:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:16:23.410406  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:16:23.410425  543705 memory.go:184] no items to output this cycle
I0320 11:16:23.410442  543705 cpu.go:275] no items to output this cycle
I0320 11:16:24.025674  543705 disk_info.go:125] begin check local disk info of client
I0320 11:16:24.028224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:16:24.028230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0320 11:16:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:16:33.409784  543705 memory.go:184] no items to output this cycle
I0320 11:16:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 11:16:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:16:43.409817  543705 memory.go:191] Add success.
I0320 11:16:43.409819  543705 cpu.go:282] Add success.
I0320 11:16:43.419987  543705 net.go:648] Add success.
I0320 11:16:43.423088  543705 net.go:770] primary dev: ETH0
I0320 11:16:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:16:43.423117  543705 net.go:698] Add success.
I0320 11:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:16:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:16:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:16:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:16:53.409799  543705 memory.go:184] no items to output this cycle
I0320 11:16:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 11:17:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:17:03.409773  543705 memory.go:184] no items to output this cycle
I0320 11:17:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 11:17:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:17:13.409813  543705 memory.go:191] Add success.
I0320 11:17:13.409817  543705 cpu.go:282] Add success.
W0320 11:17:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:17:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:17:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:17:13.420250  543705 net.go:648] Add success.
I0320 11:17:13.422888  543705 net.go:770] primary dev: ETH0
I0320 11:17:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:17:13.422914  543705 net.go:698] Add success.
I0320 11:17:13.453452  543705 event_worker.go:152] Polling the log file for events...
W0320 11:17:14.455464  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:17:14.455484  543705 disk_worker.go:708] disk space is not compliant
W0320 11:17:14.455489  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:17:14.456548  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:17:14.456557  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:17:14.456564  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:17:14.457495  543705 disk_worker.go:494] system disk:vda1
I0320 11:17:14.457533  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:17:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:17:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:17:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:17:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:17:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:17:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:17:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:17:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:17:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 11:17:23.409793  543705 memory.go:184] no items to output this cycle
I0320 11:17:24.029674  543705 disk_info.go:125] begin check local disk info of client
I0320 11:17:24.032216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:17:24.032222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa380 0xc0001fa3c0]
E0320 11:17:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:17:33.409795  543705 memory.go:184] no items to output this cycle
I0320 11:17:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 11:17:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:17:43.409800  543705 memory.go:191] Add success.
I0320 11:17:43.409805  543705 cpu.go:282] Add success.
I0320 11:17:43.419976  543705 net.go:648] Add success.
I0320 11:17:43.422740  543705 net.go:770] primary dev: ETH0
I0320 11:17:43.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:17:43.422767  543705 net.go:698] Add success.
I0320 11:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:17:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:17:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:17:53.409776  543705 memory.go:184] no items to output this cycle
I0320 11:17:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 11:18:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:18:03.409776  543705 memory.go:184] no items to output this cycle
I0320 11:18:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 11:18:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:18:13.409812  543705 memory.go:191] Add success.
I0320 11:18:13.409818  543705 cpu.go:282] Add success.
W0320 11:18:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:18:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:18:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:18:13.420099  543705 net.go:648] Add success.
I0320 11:18:13.422906  543705 net.go:770] primary dev: ETH0
I0320 11:18:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:18:13.422932  543705 net.go:698] Add success.
I0320 11:18:13.463611  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"553f1a5f-c339-494d-8ff6-510754dc6120","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:18:13.463641  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:18:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:18:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:18:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 11:18:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:18:14.456605  543705 disk_worker.go:494] system disk:vda1
I0320 11:18:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:18:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:18:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:18:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:18:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:18:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:18:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:18:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 11:18:23.409792  543705 memory.go:184] no items to output this cycle
I0320 11:18:24.033673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:18:24.036203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:18:24.036208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046ec00 0xc00046ec40]
E0320 11:18:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:18:33.409770  543705 memory.go:184] no items to output this cycle
I0320 11:18:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 11:18:38.446982  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:18:38.446989  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:18:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:18:43.410851  543705 memory.go:191] Add success.
I0320 11:18:43.409799  543705 cpu.go:282] Add success.
I0320 11:18:43.420539  543705 net.go:648] Add success.
I0320 11:18:43.423605  543705 net.go:770] primary dev: ETH0
I0320 11:18:43.423618  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:18:43.423631  543705 net.go:698] Add success.
I0320 11:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:18:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:18:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:18:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:18:53.409783  543705 memory.go:184] no items to output this cycle
I0320 11:18:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 11:19:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:19:03.409766  543705 memory.go:184] no items to output this cycle
I0320 11:19:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 11:19:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:19:13.409827  543705 memory.go:191] Add success.
I0320 11:19:13.409840  543705 cpu.go:282] Add success.
W0320 11:19:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:19:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:19:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:19:13.420173  543705 net.go:648] Add success.
I0320 11:19:13.422842  543705 net.go:770] primary dev: ETH0
I0320 11:19:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:19:13.422868  543705 net.go:698] Add success.
I0320 11:19:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:19:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:19:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 11:19:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:19:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 11:19:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:19:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:19:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:19:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:19:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:19:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:19:23.409787  543705 memory.go:184] no items to output this cycle
I0320 11:19:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 11:19:24.037672  543705 disk_info.go:125] begin check local disk info of client
I0320 11:19:24.040205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:19:24.040210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f8cc0 0xc0001f8d00]
E0320 11:19:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:19:33.409772  543705 memory.go:184] no items to output this cycle
I0320 11:19:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 11:19:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:19:43.409825  543705 memory.go:191] Add success.
I0320 11:19:43.409834  543705 cpu.go:282] Add success.
I0320 11:19:43.420022  543705 net.go:648] Add success.
I0320 11:19:43.422990  543705 net.go:770] primary dev: ETH0
I0320 11:19:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:19:43.423018  543705 net.go:698] Add success.
I0320 11:19:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:19:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:19:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:19:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:19:53.409779  543705 memory.go:184] no items to output this cycle
I0320 11:19:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 11:20:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:20:03.409768  543705 memory.go:184] no items to output this cycle
I0320 11:20:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 11:20:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:20:13.409822  543705 memory.go:191] Add success.
I0320 11:20:13.409829  543705 cpu.go:282] Add success.
W0320 11:20:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:20:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:20:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:20:13.420158  543705 net.go:648] Add success.
I0320 11:20:13.422806  543705 net.go:770] primary dev: ETH0
I0320 11:20:13.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:20:13.422833  543705 net.go:698] Add success.
I0320 11:20:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:20:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:20:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 11:20:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:20:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 11:20:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:20:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:20:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:20:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:20:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:20:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:20:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 11:20:23.409789  543705 memory.go:184] no items to output this cycle
I0320 11:20:24.041672  543705 disk_info.go:125] begin check local disk info of client
I0320 11:20:24.044167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:20:24.044172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9000 0xc0001f9040]
E0320 11:20:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:20:33.409765  543705 memory.go:184] no items to output this cycle
I0320 11:20:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 11:20:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:20:43.409795  543705 memory.go:191] Add success.
I0320 11:20:43.409800  543705 cpu.go:282] Add success.
I0320 11:20:43.419894  543705 net.go:648] Add success.
I0320 11:20:43.422551  543705 net.go:770] primary dev: ETH0
I0320 11:20:43.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:20:43.422577  543705 net.go:698] Add success.
I0320 11:20:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:20:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:20:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:20:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:20:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 11:20:53.409789  543705 memory.go:184] no items to output this cycle
E0320 11:21:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:21:03.409772  543705 memory.go:184] no items to output this cycle
I0320 11:21:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 11:21:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:21:13.409791  543705 memory.go:191] Add success.
I0320 11:21:13.409808  543705 cpu.go:282] Add success.
W0320 11:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:21:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:21:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:21:13.420344  543705 net.go:648] Add success.
I0320 11:21:13.423228  543705 net.go:770] primary dev: ETH0
I0320 11:21:13.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:21:13.423252  543705 net.go:698] Add success.
I0320 11:21:13.468648  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5540b5b8-1c03-488c-960f-3791ab041b0f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:21:13.468682  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:21:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:21:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:21:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 11:21:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:21:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 11:21:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:21:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:21:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:21:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:21:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:21:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:21:23.409807  543705 memory.go:184] no items to output this cycle
I0320 11:21:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 11:21:24.045670  543705 disk_info.go:125] begin check local disk info of client
I0320 11:21:24.048208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:21:24.048213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9a00 0xc0001f9a40]
E0320 11:21:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:21:33.409770  543705 memory.go:184] no items to output this cycle
I0320 11:21:33.409794  543705 cpu.go:275] no items to output this cycle
I0320 11:21:38.447999  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:21:38.448007  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:21:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:21:43.410669  543705 memory.go:191] Add success.
I0320 11:21:43.409823  543705 cpu.go:282] Add success.
I0320 11:21:43.420407  543705 net.go:648] Add success.
I0320 11:21:43.423066  543705 net.go:770] primary dev: ETH0
I0320 11:21:43.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:21:43.423094  543705 net.go:698] Add success.
I0320 11:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:21:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:21:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:21:53.409763  543705 memory.go:184] no items to output this cycle
I0320 11:21:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 11:22:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:22:03.409779  543705 memory.go:184] no items to output this cycle
I0320 11:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 11:22:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:22:13.409819  543705 memory.go:191] Add success.
I0320 11:22:13.409823  543705 cpu.go:282] Add success.
W0320 11:22:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:22:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:22:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:22:13.420122  543705 net.go:648] Add success.
I0320 11:22:13.423347  543705 net.go:770] primary dev: ETH0
I0320 11:22:13.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:22:13.423371  543705 net.go:698] Add success.
W0320 11:22:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:22:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0320 11:22:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:22:14.456897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:22:14.456907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:22:14.456913  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:22:14.456985  543705 disk_worker.go:494] system disk:vda1
I0320 11:22:14.457013  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:22:15.456865  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:22:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:22:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:22:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:22:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:22:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:22:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:22:23.410430  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:22:23.410451  543705 memory.go:184] no items to output this cycle
I0320 11:22:23.410458  543705 cpu.go:275] no items to output this cycle
I0320 11:22:24.049669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:22:24.052189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:22:24.052194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd40 0xc0001abd80]
E0320 11:22:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:22:33.409769  543705 memory.go:184] no items to output this cycle
I0320 11:22:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 11:22:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:22:43.409815  543705 memory.go:191] Add success.
I0320 11:22:43.409820  543705 cpu.go:282] Add success.
I0320 11:22:43.419953  543705 net.go:648] Add success.
I0320 11:22:43.422745  543705 net.go:770] primary dev: ETH0
I0320 11:22:43.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:22:43.422773  543705 net.go:698] Add success.
I0320 11:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:22:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:22:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:22:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:22:53.409794  543705 memory.go:184] no items to output this cycle
I0320 11:22:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 11:23:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:23:03.409777  543705 memory.go:184] no items to output this cycle
I0320 11:23:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:23:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:23:13.409879  543705 cpu.go:282] Add success.
I0320 11:23:13.409895  543705 memory.go:191] Add success.
W0320 11:23:13.409930  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:23:13.409945  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:23:13.409950  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:23:13.419733  543705 net.go:648] Add success.
I0320 11:23:13.422603  543705 net.go:770] primary dev: ETH0
I0320 11:23:13.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:23:13.422631  543705 net.go:698] Add success.
I0320 11:23:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:23:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:23:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 11:23:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:23:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 11:23:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:23:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:23:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:23:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:23:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:23:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:23:23.409778  543705 memory.go:184] no items to output this cycle
I0320 11:23:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 11:23:24.053673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:23:24.056203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:23:24.056209  543705 disk_info.go:196] parse disk info done, disk is : [0xc000549d40 0xc000549d80]
E0320 11:23:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:23:33.409765  543705 memory.go:184] no items to output this cycle
I0320 11:23:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 11:23:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:23:43.409797  543705 memory.go:191] Add success.
I0320 11:23:43.409799  543705 cpu.go:282] Add success.
I0320 11:23:43.419871  543705 net.go:648] Add success.
I0320 11:23:43.422918  543705 net.go:770] primary dev: ETH0
I0320 11:23:43.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:23:43.422952  543705 net.go:698] Add success.
I0320 11:23:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:23:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:23:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:23:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:23:53.409772  543705 memory.go:184] no items to output this cycle
I0320 11:23:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 11:24:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:24:03.409784  543705 memory.go:184] no items to output this cycle
I0320 11:24:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 11:24:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:24:13.409814  543705 memory.go:191] Add success.
I0320 11:24:13.409824  543705 cpu.go:282] Add success.
W0320 11:24:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:24:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:24:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:24:13.420510  543705 net.go:648] Add success.
I0320 11:24:13.423109  543705 net.go:770] primary dev: ETH0
I0320 11:24:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:24:13.423133  543705 net.go:698] Add success.
I0320 11:24:13.467953  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"10d41998-f74d-47a7-9039-5b328228f190","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:24:13.467983  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:24:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:24:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:24:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 11:24:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:24:14.456531  543705 disk_worker.go:494] system disk:vda1
I0320 11:24:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:24:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:24:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:24:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:24:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:24:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:24:23.409814  543705 memory.go:184] no items to output this cycle
I0320 11:24:23.409823  543705 cpu.go:275] no items to output this cycle
I0320 11:24:24.057691  543705 disk_info.go:125] begin check local disk info of client
I0320 11:24:24.060225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:24:24.060231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515340 0xc000515380]
E0320 11:24:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:24:33.409795  543705 memory.go:184] no items to output this cycle
I0320 11:24:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 11:24:38.448990  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:24:38.448997  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:24:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:24:43.410766  543705 memory.go:191] Add success.
I0320 11:24:43.409801  543705 cpu.go:282] Add success.
I0320 11:24:43.420572  543705 net.go:648] Add success.
I0320 11:24:43.423574  543705 net.go:770] primary dev: ETH0
I0320 11:24:43.423588  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:24:43.423600  543705 net.go:698] Add success.
I0320 11:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:24:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:24:53.409767  543705 memory.go:184] no items to output this cycle
I0320 11:24:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:25:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:25:03.409803  543705 memory.go:184] no items to output this cycle
I0320 11:25:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 11:25:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:25:13.409797  543705 memory.go:191] Add success.
I0320 11:25:13.409797  543705 cpu.go:282] Add success.
W0320 11:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:25:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:25:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:25:13.420222  543705 net.go:648] Add success.
I0320 11:25:13.423320  543705 net.go:770] primary dev: ETH0
I0320 11:25:13.423335  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:25:13.423348  543705 net.go:698] Add success.
I0320 11:25:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:25:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:25:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 11:25:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:25:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 11:25:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:25:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:25:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:25:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:25:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:25:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:25:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:25:23.409782  543705 memory.go:184] no items to output this cycle
I0320 11:25:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 11:25:24.061673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:25:24.064191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:25:24.064197  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f1c0 0xc00046f200]
E0320 11:25:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:25:33.409799  543705 memory.go:184] no items to output this cycle
I0320 11:25:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 11:25:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:25:43.409815  543705 memory.go:191] Add success.
I0320 11:25:43.409821  543705 cpu.go:282] Add success.
I0320 11:25:43.420000  543705 net.go:648] Add success.
I0320 11:25:43.422749  543705 net.go:770] primary dev: ETH0
I0320 11:25:43.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:25:43.422777  543705 net.go:698] Add success.
I0320 11:25:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:25:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:25:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:25:53.409787  543705 memory.go:184] no items to output this cycle
I0320 11:25:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 11:26:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:26:03.409786  543705 cpu.go:275] no items to output this cycle
I0320 11:26:03.409790  543705 memory.go:184] no items to output this cycle
E0320 11:26:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:26:13.409799  543705 memory.go:191] Add success.
I0320 11:26:13.409800  543705 cpu.go:282] Add success.
W0320 11:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:26:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:26:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:26:13.420401  543705 net.go:648] Add success.
I0320 11:26:13.423248  543705 net.go:770] primary dev: ETH0
I0320 11:26:13.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:26:13.423276  543705 net.go:698] Add success.
I0320 11:26:14.453955  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:26:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:26:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0320 11:26:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:26:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 11:26:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:26:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:26:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:26:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:26:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:26:23.409814  543705 memory.go:184] no items to output this cycle
I0320 11:26:23.409824  543705 cpu.go:275] no items to output this cycle
I0320 11:26:24.065670  543705 disk_info.go:125] begin check local disk info of client
I0320 11:26:24.068241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:26:24.068248  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f3c0 0xc00046f400]
E0320 11:26:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:26:33.409778  543705 memory.go:184] no items to output this cycle
I0320 11:26:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 11:26:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:26:43.409787  543705 memory.go:191] Add success.
I0320 11:26:43.409800  543705 cpu.go:282] Add success.
I0320 11:26:43.420076  543705 net.go:648] Add success.
I0320 11:26:43.422799  543705 net.go:770] primary dev: ETH0
I0320 11:26:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:26:43.422828  543705 net.go:698] Add success.
I0320 11:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:26:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:26:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:26:53.409794  543705 memory.go:184] no items to output this cycle
I0320 11:26:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 11:27:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:27:03.409782  543705 memory.go:184] no items to output this cycle
I0320 11:27:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 11:27:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:27:13.409811  543705 memory.go:191] Add success.
I0320 11:27:13.409813  543705 cpu.go:282] Add success.
W0320 11:27:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:27:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:27:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:27:13.420269  543705 net.go:648] Add success.
I0320 11:27:13.423324  543705 net.go:770] primary dev: ETH0
I0320 11:27:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:27:13.423348  543705 net.go:698] Add success.
I0320 11:27:13.430053  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 11:27:13.453295  543705 event_worker.go:152] Polling the log file for events...
I0320 11:27:13.468647  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1eff5136-9e64-4d3e-9d8a-5216d777b707","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:27:13.468680  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 11:27:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:27:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 11:27:14.455214  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:27:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:27:14.455917  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:27:14.455923  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:27:14.456760  543705 disk_worker.go:494] system disk:vda1
I0320 11:27:14.456795  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:27:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:27:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:27:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:27:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:27:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:27:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:27:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:27:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:27:23.409788  543705 memory.go:184] no items to output this cycle
I0320 11:27:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 11:27:24.069668  543705 disk_info.go:125] begin check local disk info of client
I0320 11:27:24.072273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:27:24.072280  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ee00 0xc00047ee40]
E0320 11:27:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:27:33.409790  543705 memory.go:184] no items to output this cycle
I0320 11:27:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 11:27:38.450021  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:27:38.450028  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:27:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:27:43.410635  543705 memory.go:191] Add success.
I0320 11:27:43.409792  543705 cpu.go:282] Add success.
I0320 11:27:43.420400  543705 net.go:648] Add success.
I0320 11:27:43.423263  543705 net.go:770] primary dev: ETH0
I0320 11:27:43.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:27:43.423291  543705 net.go:698] Add success.
I0320 11:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:27:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:27:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:27:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:27:53.409770  543705 memory.go:184] no items to output this cycle
I0320 11:27:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 11:28:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:28:03.409800  543705 memory.go:184] no items to output this cycle
I0320 11:28:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 11:28:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:28:13.409780  543705 memory.go:191] Add success.
I0320 11:28:13.409803  543705 cpu.go:282] Add success.
W0320 11:28:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:28:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:28:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:28:13.420072  543705 net.go:648] Add success.
I0320 11:28:13.422768  543705 net.go:770] primary dev: ETH0
I0320 11:28:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:28:13.422797  543705 net.go:698] Add success.
I0320 11:28:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:28:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:28:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 11:28:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:28:14.456535  543705 disk_worker.go:494] system disk:vda1
I0320 11:28:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:28:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:28:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:28:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:28:23.409789  543705 memory.go:184] no items to output this cycle
I0320 11:28:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 11:28:24.073671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:28:24.076265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:28:24.076270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e2340 0xc0001e2380]
E0320 11:28:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:28:33.409788  543705 memory.go:184] no items to output this cycle
I0320 11:28:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 11:28:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:28:43.409795  543705 memory.go:191] Add success.
I0320 11:28:43.409796  543705 cpu.go:282] Add success.
I0320 11:28:43.420061  543705 net.go:648] Add success.
I0320 11:28:43.422743  543705 net.go:770] primary dev: ETH0
I0320 11:28:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:28:43.422768  543705 net.go:698] Add success.
I0320 11:28:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:28:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:28:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:28:53.409777  543705 memory.go:184] no items to output this cycle
I0320 11:28:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 11:29:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:29:03.409786  543705 memory.go:184] no items to output this cycle
I0320 11:29:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:29:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:29:13.409805  543705 memory.go:191] Add success.
I0320 11:29:13.409812  543705 cpu.go:282] Add success.
W0320 11:29:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:29:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:29:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:29:13.420271  543705 net.go:648] Add success.
I0320 11:29:13.422918  543705 net.go:770] primary dev: ETH0
I0320 11:29:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:29:13.422945  543705 net.go:698] Add success.
I0320 11:29:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:29:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:29:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 11:29:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:29:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 11:29:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:29:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:29:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:29:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:29:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:29:23.409802  543705 memory.go:184] no items to output this cycle
I0320 11:29:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 11:29:24.077671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:29:24.080184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:29:24.080190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5900 0xc0000c5940]
E0320 11:29:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:29:33.409808  543705 memory.go:184] no items to output this cycle
I0320 11:29:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 11:29:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:29:43.409787  543705 memory.go:191] Add success.
I0320 11:29:43.409810  543705 cpu.go:282] Add success.
I0320 11:29:43.419961  543705 net.go:648] Add success.
I0320 11:29:43.422921  543705 net.go:770] primary dev: ETH0
I0320 11:29:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:29:43.422950  543705 net.go:698] Add success.
I0320 11:29:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:29:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:29:53.409792  543705 memory.go:184] no items to output this cycle
I0320 11:29:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 11:30:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:30:03.409779  543705 memory.go:184] no items to output this cycle
I0320 11:30:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 11:30:13.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:30:13.409884  543705 memory.go:191] Add success.
I0320 11:30:13.409914  543705 cpu.go:282] Add success.
W0320 11:30:13.409921  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:30:13.409935  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:30:13.409942  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:30:13.419722  543705 net.go:648] Add success.
I0320 11:30:13.423270  543705 net.go:770] primary dev: ETH0
I0320 11:30:13.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:30:13.423292  543705 net.go:698] Add success.
I0320 11:30:13.463496  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2d4b875a-5e7d-4a06-8e46-12bb40f63ab6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:30:13.463527  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:30:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:30:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:30:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 11:30:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:30:14.456672  543705 disk_worker.go:494] system disk:vda1
I0320 11:30:14.456701  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:30:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:30:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:30:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:30:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:30:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:30:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:30:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 11:30:23.409800  543705 memory.go:184] no items to output this cycle
I0320 11:30:24.081676  543705 disk_info.go:125] begin check local disk info of client
I0320 11:30:24.084211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:30:24.084217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000287300 0xc000287340]
E0320 11:30:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:30:33.409784  543705 memory.go:184] no items to output this cycle
I0320 11:30:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 11:30:38.451009  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:30:38.451016  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:30:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:30:43.410830  543705 memory.go:191] Add success.
I0320 11:30:43.409803  543705 cpu.go:282] Add success.
I0320 11:30:43.420539  543705 net.go:648] Add success.
I0320 11:30:43.423483  543705 net.go:770] primary dev: ETH0
I0320 11:30:43.423496  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:30:43.423509  543705 net.go:698] Add success.
I0320 11:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:30:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:30:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:30:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:30:53.409771  543705 memory.go:184] no items to output this cycle
I0320 11:30:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 11:31:03.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:31:03.409914  543705 memory.go:184] no items to output this cycle
I0320 11:31:03.409926  543705 cpu.go:275] no items to output this cycle
E0320 11:31:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:31:13.409780  543705 memory.go:191] Add success.
I0320 11:31:13.409804  543705 cpu.go:282] Add success.
W0320 11:31:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:31:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:31:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:31:13.420167  543705 net.go:648] Add success.
I0320 11:31:13.422764  543705 net.go:770] primary dev: ETH0
I0320 11:31:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:31:13.422788  543705 net.go:698] Add success.
I0320 11:31:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:31:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:31:14.455140  543705 disk_worker.go:708] disk space is not compliant
W0320 11:31:14.455142  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:31:14.456432  543705 disk_worker.go:494] system disk:vda1
I0320 11:31:14.456476  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:31:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:31:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:31:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:31:23.409795  543705 memory.go:184] no items to output this cycle
I0320 11:31:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 11:31:24.085669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:31:24.088204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:31:24.088210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1a80 0xc0002a1ac0]
E0320 11:31:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:31:33.409803  543705 memory.go:184] no items to output this cycle
I0320 11:31:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 11:31:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:31:43.409821  543705 memory.go:191] Add success.
I0320 11:31:43.409831  543705 cpu.go:282] Add success.
I0320 11:31:43.420001  543705 net.go:648] Add success.
I0320 11:31:43.422522  543705 net.go:770] primary dev: ETH0
I0320 11:31:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:31:43.422549  543705 net.go:698] Add success.
I0320 11:31:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:31:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:31:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:31:53.409796  543705 memory.go:184] no items to output this cycle
I0320 11:31:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 11:32:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:32:03.409803  543705 memory.go:184] no items to output this cycle
I0320 11:32:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 11:32:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:32:13.409811  543705 memory.go:191] Add success.
I0320 11:32:13.409814  543705 cpu.go:282] Add success.
W0320 11:32:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:32:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:32:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:32:13.420084  543705 net.go:648] Add success.
I0320 11:32:13.423605  543705 net.go:770] primary dev: ETH0
I0320 11:32:13.423618  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:32:13.423629  543705 net.go:698] Add success.
W0320 11:32:14.455077  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:32:14.455133  543705 disk_worker.go:708] disk space is not compliant
W0320 11:32:14.455136  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:32:14.456874  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:32:14.456883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:32:14.456889  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:32:14.456964  543705 disk_worker.go:494] system disk:vda1
I0320 11:32:14.456993  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:32:15.456856  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:32:15.456864  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:32:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:32:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:32:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:32:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:32:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:32:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:32:23.409796  543705 memory.go:184] no items to output this cycle
I0320 11:32:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 11:32:24.089671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:32:24.092268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:32:24.092274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9380 0xc0003b93c0]
E0320 11:32:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:32:33.409782  543705 memory.go:184] no items to output this cycle
I0320 11:32:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 11:32:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:32:43.409794  543705 cpu.go:282] Add success.
I0320 11:32:43.409798  543705 memory.go:191] Add success.
I0320 11:32:43.419900  543705 net.go:648] Add success.
I0320 11:32:43.422757  543705 net.go:770] primary dev: ETH0
I0320 11:32:43.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:32:43.422786  543705 net.go:698] Add success.
I0320 11:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:32:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:32:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:32:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:32:53.409782  543705 memory.go:184] no items to output this cycle
I0320 11:32:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 11:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:33:03.409780  543705 memory.go:184] no items to output this cycle
I0320 11:33:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 11:33:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:33:13.409796  543705 memory.go:191] Add success.
I0320 11:33:13.409798  543705 cpu.go:282] Add success.
W0320 11:33:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:33:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:33:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:33:13.420495  543705 net.go:648] Add success.
I0320 11:33:13.423184  543705 net.go:770] primary dev: ETH0
I0320 11:33:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:33:13.423208  543705 net.go:698] Add success.
I0320 11:33:13.960578  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"64c2451f-1b7d-47b9-b482-2071c2da8108","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:33:13.960623  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:33:14.454679  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:33:14.454853  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:33:14.454864  543705 disk_worker.go:708] disk space is not compliant
W0320 11:33:14.454866  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:33:14.456198  543705 disk_worker.go:494] system disk:vda1
I0320 11:33:14.456252  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:33:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:33:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:33:23.409799  543705 memory.go:184] no items to output this cycle
I0320 11:33:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 11:33:24.093694  543705 disk_info.go:125] begin check local disk info of client
I0320 11:33:24.096200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:33:24.096205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003446c0 0xc000344700]
E0320 11:33:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:33:33.409772  543705 memory.go:184] no items to output this cycle
I0320 11:33:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 11:33:38.452024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:33:38.452032  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:33:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:33:43.410613  543705 memory.go:191] Add success.
I0320 11:33:43.409821  543705 cpu.go:282] Add success.
I0320 11:33:43.420363  543705 net.go:648] Add success.
I0320 11:33:43.422858  543705 net.go:770] primary dev: ETH0
I0320 11:33:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:33:43.422887  543705 net.go:698] Add success.
I0320 11:33:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:33:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:33:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:33:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:33:53.409782  543705 memory.go:184] no items to output this cycle
I0320 11:33:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 11:34:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:34:03.409806  543705 memory.go:184] no items to output this cycle
I0320 11:34:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 11:34:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:34:13.409793  543705 memory.go:191] Add success.
W0320 11:34:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:34:13.409821  543705 cpu.go:282] Add success.
W0320 11:34:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:34:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:34:13.420143  543705 net.go:648] Add success.
I0320 11:34:13.423024  543705 net.go:770] primary dev: ETH0
I0320 11:34:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:34:13.423052  543705 net.go:698] Add success.
I0320 11:34:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:34:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:34:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 11:34:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:34:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 11:34:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:34:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:34:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:34:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:34:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:34:23.409795  543705 memory.go:184] no items to output this cycle
I0320 11:34:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 11:34:24.097675  543705 disk_info.go:125] begin check local disk info of client
I0320 11:34:24.100220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:34:24.100225  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344200 0xc000344240]
E0320 11:34:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:34:33.409806  543705 memory.go:184] no items to output this cycle
I0320 11:34:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 11:34:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:34:43.409791  543705 memory.go:191] Add success.
I0320 11:34:43.409825  543705 cpu.go:282] Add success.
I0320 11:34:43.419893  543705 net.go:648] Add success.
I0320 11:34:43.422877  543705 net.go:770] primary dev: ETH0
I0320 11:34:43.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:34:43.422904  543705 net.go:698] Add success.
I0320 11:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:34:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:34:53.410381  543705 memory.go:184] no items to output this cycle
I0320 11:34:53.410383  543705 cpu.go:275] no items to output this cycle
E0320 11:35:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:35:03.409808  543705 memory.go:184] no items to output this cycle
I0320 11:35:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 11:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:35:13.409789  543705 memory.go:191] Add success.
I0320 11:35:13.409810  543705 cpu.go:282] Add success.
W0320 11:35:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:35:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:35:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:35:13.420369  543705 net.go:648] Add success.
I0320 11:35:13.423097  543705 net.go:770] primary dev: ETH0
I0320 11:35:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:35:13.423122  543705 net.go:698] Add success.
I0320 11:35:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:35:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:35:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 11:35:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:35:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 11:35:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:35:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:35:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:35:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:35:23.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:35:23.409829  543705 memory.go:184] no items to output this cycle
I0320 11:35:23.409833  543705 cpu.go:275] no items to output this cycle
I0320 11:35:24.101671  543705 disk_info.go:125] begin check local disk info of client
I0320 11:35:24.104213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:35:24.104218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c52c0 0xc0000c5300]
E0320 11:35:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:35:33.409791  543705 memory.go:184] no items to output this cycle
I0320 11:35:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 11:35:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:35:43.409828  543705 memory.go:191] Add success.
I0320 11:35:43.409829  543705 cpu.go:282] Add success.
I0320 11:35:43.420036  543705 net.go:648] Add success.
I0320 11:35:43.422935  543705 net.go:770] primary dev: ETH0
I0320 11:35:43.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:35:43.422964  543705 net.go:698] Add success.
I0320 11:35:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:35:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:35:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:35:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:35:53.409794  543705 memory.go:184] no items to output this cycle
I0320 11:35:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 11:36:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:36:03.409782  543705 memory.go:184] no items to output this cycle
I0320 11:36:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 11:36:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:36:13.409793  543705 memory.go:191] Add success.
I0320 11:36:13.409794  543705 cpu.go:282] Add success.
W0320 11:36:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:36:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:36:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:36:13.420245  543705 net.go:648] Add success.
I0320 11:36:13.423304  543705 net.go:770] primary dev: ETH0
I0320 11:36:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:36:13.423328  543705 net.go:698] Add success.
I0320 11:36:13.463037  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5a5ec593-907b-4aa6-8e66-abccbd5e9ed2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:36:13.463069  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:36:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:36:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:36:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 11:36:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:36:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 11:36:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:36:15.455608  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:36:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:36:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:36:16.472425  543705 disk_local_worker.go:436] Get disk info: []
I0320 11:36:23.410514  543705 cpu.go:275] no items to output this cycle
E0320 11:36:23.410522  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:36:23.410540  543705 memory.go:184] no items to output this cycle
I0320 11:36:24.105676  543705 disk_info.go:125] begin check local disk info of client
I0320 11:36:24.108454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:36:24.108460  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0320 11:36:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:36:33.409797  543705 memory.go:184] no items to output this cycle
I0320 11:36:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 11:36:38.453012  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:36:38.453018  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:36:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:36:43.410984  543705 memory.go:191] Add success.
I0320 11:36:43.409797  543705 cpu.go:282] Add success.
I0320 11:36:43.420694  543705 net.go:648] Add success.
I0320 11:36:43.423831  543705 net.go:770] primary dev: ETH0
I0320 11:36:43.423844  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:36:43.423858  543705 net.go:698] Add success.
I0320 11:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:36:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:36:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:36:53.409781  543705 memory.go:184] no items to output this cycle
I0320 11:36:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 11:37:03.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:37:03.409866  543705 memory.go:184] no items to output this cycle
I0320 11:37:03.409893  543705 cpu.go:275] no items to output this cycle
E0320 11:37:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:37:13.409817  543705 memory.go:191] Add success.
I0320 11:37:13.409827  543705 cpu.go:282] Add success.
W0320 11:37:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:37:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:37:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:37:13.420141  543705 net.go:648] Add success.
I0320 11:37:13.422803  543705 net.go:770] primary dev: ETH0
I0320 11:37:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:37:13.422828  543705 net.go:698] Add success.
I0320 11:37:13.453422  543705 event_worker.go:152] Polling the log file for events...
W0320 11:37:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:37:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 11:37:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:37:14.456862  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:37:14.456871  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:37:14.456877  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:37:14.456969  543705 disk_worker.go:494] system disk:vda1
I0320 11:37:14.456998  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:37:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:37:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:37:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:37:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:37:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:37:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:37:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:37:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:37:23.409817  543705 memory.go:184] no items to output this cycle
I0320 11:37:23.409830  543705 cpu.go:275] no items to output this cycle
I0320 11:37:24.109670  543705 disk_info.go:125] begin check local disk info of client
I0320 11:37:24.112232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:37:24.112238  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0320 11:37:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:37:33.409770  543705 memory.go:184] no items to output this cycle
I0320 11:37:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:37:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:37:43.409825  543705 memory.go:191] Add success.
I0320 11:37:43.409832  543705 cpu.go:282] Add success.
I0320 11:37:43.419994  543705 net.go:648] Add success.
I0320 11:37:43.422799  543705 net.go:770] primary dev: ETH0
I0320 11:37:43.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:37:43.422829  543705 net.go:698] Add success.
I0320 11:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:37:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:37:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:37:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:37:53.409802  543705 memory.go:184] no items to output this cycle
I0320 11:37:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 11:38:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:38:03.409897  543705 cpu.go:275] no items to output this cycle
I0320 11:38:03.409924  543705 memory.go:184] no items to output this cycle
E0320 11:38:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:38:13.409825  543705 memory.go:191] Add success.
I0320 11:38:13.409833  543705 cpu.go:282] Add success.
W0320 11:38:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:38:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:38:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:38:13.420201  543705 net.go:648] Add success.
I0320 11:38:13.422999  543705 net.go:770] primary dev: ETH0
I0320 11:38:13.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:38:13.423038  543705 net.go:698] Add success.
I0320 11:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:38:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:38:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 11:38:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:38:14.456520  543705 disk_worker.go:494] system disk:vda1
I0320 11:38:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:38:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:38:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:38:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:38:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:38:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:38:23.409794  543705 memory.go:184] no items to output this cycle
I0320 11:38:23.409827  543705 cpu.go:275] no items to output this cycle
I0320 11:38:24.113675  543705 disk_info.go:125] begin check local disk info of client
I0320 11:38:24.116225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:38:24.116231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c55c0 0xc0000c5600]
E0320 11:38:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:38:33.409800  543705 memory.go:184] no items to output this cycle
I0320 11:38:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 11:38:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:38:43.409791  543705 memory.go:191] Add success.
I0320 11:38:43.409815  543705 cpu.go:282] Add success.
I0320 11:38:43.420016  543705 net.go:648] Add success.
I0320 11:38:43.422929  543705 net.go:770] primary dev: ETH0
I0320 11:38:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:38:43.422973  543705 net.go:698] Add success.
I0320 11:38:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:38:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:38:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:38:53.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:38:53.410392  543705 memory.go:184] no items to output this cycle
I0320 11:38:53.410421  543705 cpu.go:275] no items to output this cycle
E0320 11:39:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:39:03.409775  543705 memory.go:184] no items to output this cycle
I0320 11:39:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 11:39:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:39:13.409797  543705 memory.go:191] Add success.
I0320 11:39:13.409797  543705 cpu.go:282] Add success.
W0320 11:39:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:39:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:39:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:39:13.420163  543705 net.go:648] Add success.
I0320 11:39:13.422778  543705 net.go:770] primary dev: ETH0
I0320 11:39:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:39:13.422804  543705 net.go:698] Add success.
I0320 11:39:13.877834  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1722ee1b-db6b-456a-b2db-4a42dcb91337","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:39:13.877873  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:39:14.454725  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:39:14.454963  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:39:14.454974  543705 disk_worker.go:708] disk space is not compliant
W0320 11:39:14.454976  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:39:14.456461  543705 disk_worker.go:494] system disk:vda1
I0320 11:39:14.456490  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:39:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:39:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:39:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:39:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:39:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:39:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:39:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 11:39:23.409797  543705 memory.go:184] no items to output this cycle
I0320 11:39:24.117674  543705 disk_info.go:125] begin check local disk info of client
I0320 11:39:24.120172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:39:24.120178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba500 0xc0002ba540]
E0320 11:39:33.409828  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:39:33.409857  543705 memory.go:184] no items to output this cycle
I0320 11:39:33.409894  543705 cpu.go:275] no items to output this cycle
I0320 11:39:38.454045  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:39:38.454053  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:39:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:39:43.410730  543705 memory.go:191] Add success.
I0320 11:39:43.409810  543705 cpu.go:282] Add success.
I0320 11:39:43.420420  543705 net.go:648] Add success.
I0320 11:39:43.423196  543705 net.go:770] primary dev: ETH0
I0320 11:39:43.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:39:43.423228  543705 net.go:698] Add success.
I0320 11:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:39:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:39:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:39:53.409781  543705 memory.go:184] no items to output this cycle
I0320 11:39:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 11:40:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:40:03.409766  543705 memory.go:184] no items to output this cycle
I0320 11:40:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 11:40:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:40:13.409781  543705 memory.go:191] Add success.
W0320 11:40:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:40:13.409811  543705 cpu.go:282] Add success.
W0320 11:40:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:40:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:40:13.420092  543705 net.go:648] Add success.
I0320 11:40:13.423108  543705 net.go:770] primary dev: ETH0
I0320 11:40:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:40:13.423133  543705 net.go:698] Add success.
I0320 11:40:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:40:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:40:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 11:40:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:40:14.456584  543705 disk_worker.go:494] system disk:vda1
I0320 11:40:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:40:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:40:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:40:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:40:16.472492  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:40:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:40:23.409812  543705 memory.go:184] no items to output this cycle
I0320 11:40:23.409820  543705 cpu.go:275] no items to output this cycle
I0320 11:40:24.121669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:40:24.124205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:40:24.124210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c80 0xc0000c5cc0]
E0320 11:40:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:40:33.409804  543705 memory.go:184] no items to output this cycle
I0320 11:40:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 11:40:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:40:43.409779  543705 memory.go:191] Add success.
I0320 11:40:43.409802  543705 cpu.go:282] Add success.
I0320 11:40:43.419878  543705 net.go:648] Add success.
I0320 11:40:43.422886  543705 net.go:770] primary dev: ETH0
I0320 11:40:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:40:43.422912  543705 net.go:698] Add success.
I0320 11:40:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:40:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:40:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:40:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:40:53.409781  543705 memory.go:184] no items to output this cycle
I0320 11:40:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 11:41:03.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:41:03.409888  543705 memory.go:184] no items to output this cycle
I0320 11:41:03.409935  543705 cpu.go:275] no items to output this cycle
E0320 11:41:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:41:13.409810  543705 memory.go:191] Add success.
I0320 11:41:13.409824  543705 cpu.go:282] Add success.
W0320 11:41:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:41:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:41:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:41:13.420372  543705 net.go:648] Add success.
I0320 11:41:13.423790  543705 net.go:770] primary dev: ETH0
I0320 11:41:13.423803  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:41:13.423814  543705 net.go:698] Add success.
I0320 11:41:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:41:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:41:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 11:41:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:41:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 11:41:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:41:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:41:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:41:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:41:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:41:23.409777  543705 memory.go:184] no items to output this cycle
I0320 11:41:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 11:41:24.125673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:41:24.128269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:41:24.128274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475000 0xc000475040]
E0320 11:41:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:41:33.409772  543705 memory.go:184] no items to output this cycle
I0320 11:41:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 11:41:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:41:43.409826  543705 memory.go:191] Add success.
I0320 11:41:43.409831  543705 cpu.go:282] Add success.
I0320 11:41:43.419950  543705 net.go:648] Add success.
I0320 11:41:43.422718  543705 net.go:770] primary dev: ETH0
I0320 11:41:43.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:41:43.422743  543705 net.go:698] Add success.
I0320 11:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:41:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:41:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:41:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:41:53.409796  543705 memory.go:184] no items to output this cycle
I0320 11:41:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 11:42:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:42:03.409782  543705 memory.go:184] no items to output this cycle
I0320 11:42:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 11:42:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:42:13.409809  543705 memory.go:191] Add success.
I0320 11:42:13.409819  543705 cpu.go:282] Add success.
W0320 11:42:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:42:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:42:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:42:13.420122  543705 net.go:648] Add success.
I0320 11:42:13.422990  543705 net.go:770] primary dev: ETH0
I0320 11:42:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:42:13.423014  543705 net.go:698] Add success.
I0320 11:42:13.471448  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"effc8314-0db7-4aea-a43a-881fb2d259da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:42:13.471480  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 11:42:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:42:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 11:42:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:42:14.456168  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:42:14.456178  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:42:14.456183  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:42:14.456487  543705 disk_worker.go:494] system disk:vda1
I0320 11:42:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:42:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:42:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:42:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:42:16.457989  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:42:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:42:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:42:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:42:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:42:23.409795  543705 memory.go:184] no items to output this cycle
I0320 11:42:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 11:42:24.129669  543705 disk_info.go:125] begin check local disk info of client
I0320 11:42:24.132296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:42:24.132302  543705 disk_info.go:196] parse disk info done, disk is : [0xc000233700 0xc000233740]
E0320 11:42:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:42:33.409778  543705 memory.go:184] no items to output this cycle
I0320 11:42:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 11:42:38.455027  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:42:38.455034  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:42:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:42:43.410795  543705 memory.go:191] Add success.
I0320 11:42:43.409802  543705 cpu.go:282] Add success.
I0320 11:42:43.420584  543705 net.go:648] Add success.
I0320 11:42:43.423629  543705 net.go:770] primary dev: ETH0
I0320 11:42:43.423644  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:42:43.423658  543705 net.go:698] Add success.
I0320 11:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:42:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:42:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:42:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:42:53.409766  543705 memory.go:184] no items to output this cycle
I0320 11:42:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 11:43:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:43:03.409800  543705 memory.go:184] no items to output this cycle
I0320 11:43:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 11:43:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:43:13.409777  543705 memory.go:191] Add success.
I0320 11:43:13.409797  543705 cpu.go:282] Add success.
W0320 11:43:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:43:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:43:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:43:13.420175  543705 net.go:648] Add success.
I0320 11:43:13.422825  543705 net.go:770] primary dev: ETH0
I0320 11:43:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:43:13.422849  543705 net.go:698] Add success.
I0320 11:43:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:43:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:43:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 11:43:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:43:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 11:43:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:43:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:43:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:43:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:43:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:43:23.409783  543705 memory.go:184] no items to output this cycle
I0320 11:43:23.409854  543705 cpu.go:275] no items to output this cycle
I0320 11:43:24.133673  543705 disk_info.go:125] begin check local disk info of client
I0320 11:43:24.136174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:43:24.136180  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487500 0xc000487540]
E0320 11:43:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:43:33.409768  543705 memory.go:184] no items to output this cycle
I0320 11:43:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 11:43:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:43:43.409830  543705 memory.go:191] Add success.
I0320 11:43:43.409837  543705 cpu.go:282] Add success.
I0320 11:43:43.420076  543705 net.go:648] Add success.
I0320 11:43:43.423089  543705 net.go:770] primary dev: ETH0
I0320 11:43:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:43:43.423120  543705 net.go:698] Add success.
I0320 11:43:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:43:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:43:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:43:53.409799  543705 memory.go:184] no items to output this cycle
I0320 11:43:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 11:44:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:44:03.409785  543705 memory.go:184] no items to output this cycle
I0320 11:44:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 11:44:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:44:13.409806  543705 memory.go:191] Add success.
I0320 11:44:13.409818  543705 cpu.go:282] Add success.
W0320 11:44:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:44:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:44:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:44:13.420415  543705 net.go:648] Add success.
I0320 11:44:13.423121  543705 net.go:770] primary dev: ETH0
I0320 11:44:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:44:13.423145  543705 net.go:698] Add success.
I0320 11:44:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:44:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:44:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 11:44:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:44:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 11:44:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:44:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:44:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:44:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:44:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:44:23.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:44:23.409939  543705 memory.go:184] no items to output this cycle
I0320 11:44:23.409948  543705 cpu.go:275] no items to output this cycle
I0320 11:44:24.137679  543705 disk_info.go:125] begin check local disk info of client
I0320 11:44:24.140226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:44:24.140232  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289300 0xc000289340]
E0320 11:44:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:44:33.409805  543705 memory.go:184] no items to output this cycle
I0320 11:44:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 11:44:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:44:43.409786  543705 memory.go:191] Add success.
I0320 11:44:43.409805  543705 cpu.go:282] Add success.
I0320 11:44:43.419978  543705 net.go:648] Add success.
I0320 11:44:43.422636  543705 net.go:770] primary dev: ETH0
I0320 11:44:43.422649  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:44:43.422662  543705 net.go:698] Add success.
I0320 11:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:44:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:44:53.409795  543705 memory.go:184] no items to output this cycle
I0320 11:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 11:45:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:45:03.409766  543705 memory.go:184] no items to output this cycle
I0320 11:45:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 11:45:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:45:13.409777  543705 memory.go:191] Add success.
I0320 11:45:13.409789  543705 cpu.go:282] Add success.
W0320 11:45:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:45:13.412367  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:45:13.412372  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:45:13.420073  543705 net.go:648] Add success.
I0320 11:45:13.421697  543705 net.go:770] primary dev: ETH0
I0320 11:45:13.421710  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:45:13.421723  543705 net.go:698] Add success.
I0320 11:45:13.464566  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d9546ef2-8b07-43ee-87c2-7c2f5a446147","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:45:13.464600  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:45:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:45:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:45:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 11:45:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:45:14.456842  543705 disk_worker.go:494] system disk:vda1
I0320 11:45:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:45:15.455623  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:45:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:45:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:45:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:45:16.472384  543705 disk_local_worker.go:436] Get disk info: []
I0320 11:45:23.409808  543705 cpu.go:275] no items to output this cycle
E0320 11:45:23.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:45:23.409831  543705 memory.go:184] no items to output this cycle
I0320 11:45:24.141681  543705 disk_info.go:125] begin check local disk info of client
I0320 11:45:24.144228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:45:24.144235  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba40 0xc00007ba80]
E0320 11:45:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:45:33.409770  543705 memory.go:184] no items to output this cycle
I0320 11:45:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 11:45:38.456036  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:45:38.456042  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:45:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:45:43.410826  543705 memory.go:191] Add success.
I0320 11:45:43.409841  543705 cpu.go:282] Add success.
I0320 11:45:43.420670  543705 net.go:648] Add success.
I0320 11:45:43.423507  543705 net.go:770] primary dev: ETH0
I0320 11:45:43.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:45:43.423532  543705 net.go:698] Add success.
I0320 11:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:45:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:45:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:45:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:45:53.409785  543705 cpu.go:275] no items to output this cycle
I0320 11:45:53.409791  543705 memory.go:184] no items to output this cycle
E0320 11:46:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:46:03.409769  543705 memory.go:184] no items to output this cycle
I0320 11:46:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 11:46:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:46:13.409781  543705 memory.go:191] Add success.
W0320 11:46:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:46:13.409816  543705 cpu.go:282] Add success.
W0320 11:46:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:46:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:46:13.420305  543705 net.go:648] Add success.
I0320 11:46:13.423228  543705 net.go:770] primary dev: ETH0
I0320 11:46:13.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:46:13.423251  543705 net.go:698] Add success.
I0320 11:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:46:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:46:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 11:46:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:46:14.456465  543705 disk_worker.go:494] system disk:vda1
I0320 11:46:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:46:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:46:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:46:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:46:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:46:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:46:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:46:23.409785  543705 memory.go:184] no items to output this cycle
I0320 11:46:23.409836  543705 cpu.go:275] no items to output this cycle
I0320 11:46:24.145683  543705 disk_info.go:125] begin check local disk info of client
I0320 11:46:24.148249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:46:24.148255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cf340 0xc0003cf380]
E0320 11:46:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:46:33.409805  543705 memory.go:184] no items to output this cycle
I0320 11:46:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 11:46:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:46:43.409801  543705 memory.go:191] Add success.
I0320 11:46:43.409805  543705 cpu.go:282] Add success.
I0320 11:46:43.419876  543705 net.go:648] Add success.
I0320 11:46:43.422533  543705 net.go:770] primary dev: ETH0
I0320 11:46:43.422561  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:46:43.422576  543705 net.go:698] Add success.
I0320 11:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:46:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:46:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:46:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:46:53.410258  543705 memory.go:184] no items to output this cycle
I0320 11:46:53.410270  543705 cpu.go:275] no items to output this cycle
E0320 11:47:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:47:03.409796  543705 memory.go:184] no items to output this cycle
I0320 11:47:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 11:47:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:47:13.409812  543705 memory.go:191] Add success.
I0320 11:47:13.409827  543705 cpu.go:282] Add success.
W0320 11:47:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:47:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:47:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:47:13.419739  543705 net.go:648] Add success.
I0320 11:47:13.422467  543705 net.go:770] primary dev: ETH0
I0320 11:47:13.422481  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:47:13.422494  543705 net.go:698] Add success.
I0320 11:47:13.453036  543705 event_worker.go:152] Polling the log file for events...
W0320 11:47:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:47:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 11:47:14.455157  543705 disk_worker.go:728] disk inode is not compliant
E0320 11:47:14.456905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:47:14.456915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:47:14.456920  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:47:14.456990  543705 disk_worker.go:494] system disk:vda1
I0320 11:47:14.457031  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:47:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:47:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:47:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:47:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:47:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:47:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:47:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0320 11:47:23.409811  543705 cpu.go:275] no items to output this cycle
E0320 11:47:23.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:47:23.409832  543705 memory.go:184] no items to output this cycle
I0320 11:47:24.149683  543705 disk_info.go:125] begin check local disk info of client
I0320 11:47:24.152247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:47:24.152255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4940 0xc0000c4980]
E0320 11:47:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:47:33.409780  543705 memory.go:184] no items to output this cycle
I0320 11:47:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 11:47:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:47:43.409800  543705 memory.go:191] Add success.
I0320 11:47:43.409800  543705 cpu.go:282] Add success.
I0320 11:47:43.419850  543705 net.go:648] Add success.
I0320 11:47:43.422715  543705 net.go:770] primary dev: ETH0
I0320 11:47:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:47:43.422743  543705 net.go:698] Add success.
I0320 11:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:47:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:47:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:47:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:47:53.409793  543705 memory.go:184] no items to output this cycle
I0320 11:47:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 11:48:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:48:03.409904  543705 memory.go:184] no items to output this cycle
I0320 11:48:03.409915  543705 cpu.go:275] no items to output this cycle
E0320 11:48:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:48:13.409796  543705 memory.go:191] Add success.
I0320 11:48:13.409797  543705 cpu.go:282] Add success.
W0320 11:48:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:48:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:48:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:48:13.420231  543705 net.go:648] Add success.
I0320 11:48:13.422677  543705 net.go:770] primary dev: ETH0
I0320 11:48:13.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:48:13.422706  543705 net.go:698] Add success.
I0320 11:48:13.464356  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f10a341f-ce05-4a3c-81f5-cbc165cf0f3a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:48:13.464389  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:48:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:48:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:48:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 11:48:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:48:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 11:48:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:48:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:48:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:48:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:48:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:48:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:48:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:48:23.409801  543705 memory.go:184] no items to output this cycle
I0320 11:48:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 11:48:24.153686  543705 disk_info.go:125] begin check local disk info of client
I0320 11:48:24.156285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:48:24.156293  543705 disk_info.go:196] parse disk info done, disk is : [0xc000284b80 0xc000284bc0]
E0320 11:48:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:48:33.409801  543705 memory.go:184] no items to output this cycle
I0320 11:48:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 11:48:38.457035  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:48:38.457042  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:48:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:48:43.410549  543705 memory.go:191] Add success.
I0320 11:48:43.409826  543705 cpu.go:282] Add success.
I0320 11:48:43.420240  543705 net.go:648] Add success.
I0320 11:48:43.422962  543705 net.go:770] primary dev: ETH0
I0320 11:48:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:48:43.422991  543705 net.go:698] Add success.
I0320 11:48:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:48:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:48:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:48:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:48:53.409799  543705 memory.go:184] no items to output this cycle
I0320 11:48:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 11:49:03.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:49:03.409914  543705 memory.go:184] no items to output this cycle
I0320 11:49:03.409987  543705 cpu.go:275] no items to output this cycle
E0320 11:49:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:49:13.409819  543705 memory.go:191] Add success.
I0320 11:49:13.409828  543705 cpu.go:282] Add success.
W0320 11:49:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:49:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:49:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:49:13.420138  543705 net.go:648] Add success.
I0320 11:49:13.422783  543705 net.go:770] primary dev: ETH0
I0320 11:49:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:49:13.422808  543705 net.go:698] Add success.
I0320 11:49:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:49:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:49:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 11:49:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:49:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 11:49:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:49:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:49:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:49:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:49:16.472373  543705 disk_local_worker.go:436] Get disk info: []
I0320 11:49:23.409785  543705 cpu.go:275] no items to output this cycle
E0320 11:49:23.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:49:23.409825  543705 memory.go:184] no items to output this cycle
I0320 11:49:24.157823  543705 disk_info.go:125] begin check local disk info of client
I0320 11:49:24.160353  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:49:24.160361  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367000 0xc000367040]
E0320 11:49:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:49:33.409794  543705 cpu.go:275] no items to output this cycle
I0320 11:49:33.409796  543705 memory.go:184] no items to output this cycle
E0320 11:49:43.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:49:43.409839  543705 memory.go:191] Add success.
I0320 11:49:43.409844  543705 cpu.go:282] Add success.
I0320 11:49:43.420081  543705 net.go:648] Add success.
I0320 11:49:43.423365  543705 net.go:770] primary dev: ETH0
I0320 11:49:43.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:49:43.423392  543705 net.go:698] Add success.
I0320 11:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:49:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:49:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:49:53.409779  543705 memory.go:184] no items to output this cycle
I0320 11:49:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 11:50:03.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:50:03.409881  543705 memory.go:184] no items to output this cycle
I0320 11:50:03.409961  543705 cpu.go:275] no items to output this cycle
E0320 11:50:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:50:13.409792  543705 memory.go:191] Add success.
W0320 11:50:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:50:13.409818  543705 cpu.go:282] Add success.
W0320 11:50:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:50:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:50:13.420171  543705 net.go:648] Add success.
I0320 11:50:13.422750  543705 net.go:770] primary dev: ETH0
I0320 11:50:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:50:13.422788  543705 net.go:698] Add success.
I0320 11:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:50:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:50:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 11:50:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:50:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 11:50:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:50:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:50:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:50:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:50:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:50:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:50:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:50:23.409779  543705 memory.go:184] no items to output this cycle
I0320 11:50:23.409841  543705 cpu.go:275] no items to output this cycle
I0320 11:50:24.161684  543705 disk_info.go:125] begin check local disk info of client
I0320 11:50:24.164830  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:50:24.164837  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4bc0 0xc0000c4c00]
E0320 11:50:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:50:33.409821  543705 memory.go:184] no items to output this cycle
I0320 11:50:33.409832  543705 cpu.go:275] no items to output this cycle
E0320 11:50:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:50:43.409835  543705 memory.go:191] Add success.
I0320 11:50:43.409844  543705 cpu.go:282] Add success.
I0320 11:50:43.419898  543705 net.go:648] Add success.
I0320 11:50:43.422947  543705 net.go:770] primary dev: ETH0
I0320 11:50:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:50:43.422988  543705 net.go:698] Add success.
I0320 11:50:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:50:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:50:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:50:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:50:53.409769  543705 memory.go:184] no items to output this cycle
I0320 11:50:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 11:51:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:51:03.409775  543705 memory.go:184] no items to output this cycle
I0320 11:51:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 11:51:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:51:13.409795  543705 memory.go:191] Add success.
I0320 11:51:13.409809  543705 cpu.go:282] Add success.
W0320 11:51:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:51:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:51:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:51:13.420140  543705 net.go:648] Add success.
I0320 11:51:13.423293  543705 net.go:770] primary dev: ETH0
I0320 11:51:13.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:51:13.423320  543705 net.go:698] Add success.
I0320 11:51:13.469775  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"100b57d8-1617-49a9-881d-e14099a3da11","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:51:13.469808  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:51:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:51:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:51:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 11:51:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:51:14.456518  543705 disk_worker.go:494] system disk:vda1
I0320 11:51:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:51:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:51:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:51:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:51:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:51:23.409802  543705 memory.go:184] no items to output this cycle
I0320 11:51:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 11:51:24.165682  543705 disk_info.go:125] begin check local disk info of client
I0320 11:51:24.168238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:51:24.168245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000542780 0xc0005427c0]
E0320 11:51:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:51:33.409783  543705 memory.go:184] no items to output this cycle
I0320 11:51:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 11:51:38.458040  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:51:38.458047  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:51:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:51:43.410774  543705 memory.go:191] Add success.
I0320 11:51:43.409811  543705 cpu.go:282] Add success.
I0320 11:51:43.420471  543705 net.go:648] Add success.
I0320 11:51:43.423568  543705 net.go:770] primary dev: ETH0
I0320 11:51:43.423582  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:51:43.423596  543705 net.go:698] Add success.
I0320 11:51:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:51:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:51:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:51:53.409799  543705 memory.go:184] no items to output this cycle
I0320 11:51:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 11:52:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:52:03.409909  543705 memory.go:184] no items to output this cycle
I0320 11:52:03.409932  543705 cpu.go:275] no items to output this cycle
E0320 11:52:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:52:13.409775  543705 memory.go:191] Add success.
W0320 11:52:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:52:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:52:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:52:13.409824  543705 cpu.go:282] Add success.
I0320 11:52:13.420153  543705 net.go:648] Add success.
I0320 11:52:13.423354  543705 net.go:770] primary dev: ETH0
I0320 11:52:13.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:52:13.423388  543705 net.go:698] Add success.
W0320 11:52:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:52:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 11:52:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:52:14.456793  543705 disk_worker.go:494] system disk:vda1
I0320 11:52:14.456832  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:52:14.457118  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:52:14.457126  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:52:14.457130  543705 custom_config.go:64] query custom config with name: gpu
E0320 11:52:15.456773  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:52:15.456781  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:52:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:52:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:52:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:52:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:52:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:52:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:52:23.409782  543705 memory.go:184] no items to output this cycle
I0320 11:52:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 11:52:24.169680  543705 disk_info.go:125] begin check local disk info of client
I0320 11:52:24.172281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:52:24.172289  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473380 0xc0004733c0]
E0320 11:52:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:52:33.409813  543705 memory.go:184] no items to output this cycle
I0320 11:52:33.409828  543705 cpu.go:275] no items to output this cycle
E0320 11:52:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:52:43.409784  543705 memory.go:191] Add success.
I0320 11:52:43.409805  543705 cpu.go:282] Add success.
I0320 11:52:43.420084  543705 net.go:648] Add success.
I0320 11:52:43.422813  543705 net.go:770] primary dev: ETH0
I0320 11:52:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:52:43.422840  543705 net.go:698] Add success.
I0320 11:52:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:52:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:52:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:52:53.409778  543705 memory.go:184] no items to output this cycle
I0320 11:52:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 11:53:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:53:03.409843  543705 memory.go:184] no items to output this cycle
I0320 11:53:03.409906  543705 cpu.go:275] no items to output this cycle
E0320 11:53:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:53:13.409822  543705 memory.go:191] Add success.
I0320 11:53:13.409830  543705 cpu.go:282] Add success.
W0320 11:53:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:53:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:53:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:53:13.420156  543705 net.go:648] Add success.
I0320 11:53:13.423368  543705 net.go:770] primary dev: ETH0
I0320 11:53:13.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:53:13.423397  543705 net.go:698] Add success.
I0320 11:53:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:53:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:53:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 11:53:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:53:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 11:53:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:53:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:53:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:53:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:53:23.409795  543705 memory.go:184] no items to output this cycle
I0320 11:53:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 11:53:24.173679  543705 disk_info.go:125] begin check local disk info of client
I0320 11:53:24.176273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:53:24.176280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002857c0 0xc000285800]
E0320 11:53:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:53:33.409801  543705 memory.go:184] no items to output this cycle
I0320 11:53:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 11:53:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:53:43.409786  543705 memory.go:191] Add success.
I0320 11:53:43.409790  543705 cpu.go:282] Add success.
I0320 11:53:43.419978  543705 net.go:648] Add success.
I0320 11:53:43.420875  543705 net.go:770] primary dev: ETH0
I0320 11:53:43.420888  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:53:43.420902  543705 net.go:698] Add success.
I0320 11:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:53:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:53:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:53:53.409770  543705 memory.go:184] no items to output this cycle
I0320 11:53:53.409791  543705 cpu.go:275] no items to output this cycle
I0320 11:54:03.409895  543705 cpu.go:275] no items to output this cycle
E0320 11:54:03.409929  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:54:03.410044  543705 memory.go:184] no items to output this cycle
E0320 11:54:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:54:13.409777  543705 memory.go:191] Add success.
W0320 11:54:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 11:54:13.409807  543705 cpu.go:282] Add success.
W0320 11:54:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:54:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:54:13.420194  543705 net.go:648] Add success.
I0320 11:54:13.422970  543705 net.go:770] primary dev: ETH0
I0320 11:54:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:54:13.423010  543705 net.go:698] Add success.
I0320 11:54:13.470515  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d08200df-97dd-4337-8d3c-150fb19376b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:54:13.470548  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 11:54:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:54:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:54:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 11:54:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:54:14.456695  543705 disk_worker.go:494] system disk:vda1
I0320 11:54:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:54:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:54:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:54:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:54:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:54:23.409802  543705 memory.go:184] no items to output this cycle
I0320 11:54:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 11:54:24.177681  543705 disk_info.go:125] begin check local disk info of client
I0320 11:54:24.180247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:54:24.180254  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035cb40 0xc00035cb80]
E0320 11:54:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:54:33.409785  543705 memory.go:184] no items to output this cycle
I0320 11:54:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 11:54:38.459048  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:54:38.459055  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:54:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:54:43.410858  543705 memory.go:191] Add success.
I0320 11:54:43.409793  543705 cpu.go:282] Add success.
I0320 11:54:43.420610  543705 net.go:648] Add success.
I0320 11:54:43.423493  543705 net.go:770] primary dev: ETH0
I0320 11:54:43.423506  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:54:43.423521  543705 net.go:698] Add success.
I0320 11:54:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:54:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:54:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:54:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:54:53.409781  543705 memory.go:184] no items to output this cycle
I0320 11:54:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 11:55:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:55:03.409796  543705 memory.go:184] no items to output this cycle
I0320 11:55:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 11:55:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:55:13.409789  543705 memory.go:191] Add success.
I0320 11:55:13.409810  543705 cpu.go:282] Add success.
W0320 11:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:55:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:55:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:55:13.420246  543705 net.go:648] Add success.
I0320 11:55:13.423088  543705 net.go:770] primary dev: ETH0
I0320 11:55:13.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:55:13.423111  543705 net.go:698] Add success.
I0320 11:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:55:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:55:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 11:55:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:55:14.456565  543705 disk_worker.go:494] system disk:vda1
I0320 11:55:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:55:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:55:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:55:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:55:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:55:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:55:23.409757  543705 memory.go:184] no items to output this cycle
I0320 11:55:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 11:55:24.181688  543705 disk_info.go:125] begin check local disk info of client
I0320 11:55:24.184251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:55:24.184258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faa80 0xc0001faac0]
E0320 11:55:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:55:33.409800  543705 memory.go:184] no items to output this cycle
I0320 11:55:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 11:55:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:55:43.409798  543705 memory.go:191] Add success.
I0320 11:55:43.409802  543705 cpu.go:282] Add success.
I0320 11:55:43.419887  543705 net.go:648] Add success.
I0320 11:55:43.423146  543705 net.go:770] primary dev: ETH0
I0320 11:55:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:55:43.423172  543705 net.go:698] Add success.
I0320 11:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:55:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:55:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:55:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:55:53.409771  543705 memory.go:184] no items to output this cycle
I0320 11:55:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 11:56:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:56:03.409801  543705 memory.go:184] no items to output this cycle
I0320 11:56:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 11:56:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:56:13.409813  543705 memory.go:191] Add success.
I0320 11:56:13.409822  543705 cpu.go:282] Add success.
W0320 11:56:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:56:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:56:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:56:13.420054  543705 net.go:648] Add success.
I0320 11:56:13.422624  543705 net.go:770] primary dev: ETH0
I0320 11:56:13.422638  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:56:13.422652  543705 net.go:698] Add success.
I0320 11:56:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:56:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:56:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 11:56:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:56:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 11:56:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:56:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:56:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:56:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:56:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:56:23.409795  543705 memory.go:184] no items to output this cycle
I0320 11:56:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 11:56:24.185684  543705 disk_info.go:125] begin check local disk info of client
I0320 11:56:24.188223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:56:24.188231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bed00 0xc0002bed40]
E0320 11:56:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:56:33.409815  543705 memory.go:184] no items to output this cycle
I0320 11:56:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 11:56:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:56:43.409787  543705 memory.go:191] Add success.
I0320 11:56:43.409809  543705 cpu.go:282] Add success.
I0320 11:56:43.420011  543705 net.go:648] Add success.
I0320 11:56:43.423560  543705 net.go:770] primary dev: ETH0
I0320 11:56:43.423575  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:56:43.423590  543705 net.go:698] Add success.
I0320 11:56:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:56:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:56:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:56:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:56:53.409774  543705 memory.go:184] no items to output this cycle
I0320 11:56:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:57:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:57:03.409767  543705 memory.go:184] no items to output this cycle
I0320 11:57:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 11:57:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:57:13.409783  543705 memory.go:191] Add success.
I0320 11:57:13.409796  543705 cpu.go:282] Add success.
W0320 11:57:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:57:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:57:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:57:13.420051  543705 net.go:648] Add success.
I0320 11:57:13.428490  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 11:57:13.428566  543705 net.go:770] primary dev: ETH0
I0320 11:57:13.428580  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:57:13.428594  543705 net.go:698] Add success.
I0320 11:57:13.453139  543705 event_worker.go:152] Polling the log file for events...
I0320 11:57:13.469054  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"03ca1274-9241-40bd-a20d-9f0286615121","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 11:57:13.469087  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 11:57:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:57:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 11:57:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:57:14.456846  543705 disk_worker.go:494] system disk:vda1
E0320 11:57:14.456869  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 11:57:14.456877  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 11:57:14.456882  543705 custom_config.go:64] query custom config with name: gpu
I0320 11:57:14.456901  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 11:57:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 11:57:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:57:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 11:57:16.457983  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 11:57:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:57:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:57:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:57:23.410367  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:57:23.410383  543705 memory.go:184] no items to output this cycle
I0320 11:57:23.410393  543705 cpu.go:275] no items to output this cycle
I0320 11:57:24.189694  543705 disk_info.go:125] begin check local disk info of client
I0320 11:57:24.192259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:57:24.192266  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033e2c0 0xc00033e300]
E0320 11:57:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:57:33.409778  543705 memory.go:184] no items to output this cycle
I0320 11:57:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 11:57:38.460057  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 11:57:38.460064  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 11:57:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:57:43.410519  543705 memory.go:191] Add success.
I0320 11:57:43.409799  543705 cpu.go:282] Add success.
I0320 11:57:43.420214  543705 net.go:648] Add success.
I0320 11:57:43.423162  543705 net.go:770] primary dev: ETH0
I0320 11:57:43.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:57:43.423190  543705 net.go:698] Add success.
I0320 11:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:57:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:57:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:57:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:57:53.409777  543705 memory.go:184] no items to output this cycle
I0320 11:57:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 11:58:03.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:58:03.409899  543705 memory.go:184] no items to output this cycle
I0320 11:58:03.409898  543705 cpu.go:275] no items to output this cycle
E0320 11:58:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:58:13.409814  543705 memory.go:191] Add success.
I0320 11:58:13.409826  543705 cpu.go:282] Add success.
W0320 11:58:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:58:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:58:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:58:13.420104  543705 net.go:648] Add success.
I0320 11:58:13.422508  543705 net.go:770] primary dev: ETH0
I0320 11:58:13.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:58:13.422537  543705 net.go:698] Add success.
I0320 11:58:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:58:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:58:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 11:58:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:58:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 11:58:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:58:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:58:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:58:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:58:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:58:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:58:23.409770  543705 memory.go:184] no items to output this cycle
I0320 11:58:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 11:58:24.193696  543705 disk_info.go:125] begin check local disk info of client
I0320 11:58:24.196187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:58:24.196195  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a880 0xc00048a8c0]
E0320 11:58:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:58:33.409807  543705 memory.go:184] no items to output this cycle
I0320 11:58:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 11:58:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:58:43.409824  543705 memory.go:191] Add success.
I0320 11:58:43.409829  543705 cpu.go:282] Add success.
I0320 11:58:43.419982  543705 net.go:648] Add success.
I0320 11:58:43.422538  543705 net.go:770] primary dev: ETH0
I0320 11:58:43.422551  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:58:43.422564  543705 net.go:698] Add success.
I0320 11:58:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:58:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:58:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:58:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:58:53.409769  543705 memory.go:184] no items to output this cycle
I0320 11:58:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 11:59:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:59:03.409814  543705 memory.go:184] no items to output this cycle
I0320 11:59:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 11:59:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:59:13.409786  543705 memory.go:191] Add success.
I0320 11:59:13.409800  543705 cpu.go:282] Add success.
W0320 11:59:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 11:59:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 11:59:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 11:59:13.420060  543705 net.go:648] Add success.
I0320 11:59:13.422866  543705 net.go:770] primary dev: ETH0
I0320 11:59:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:59:13.422891  543705 net.go:698] Add success.
I0320 11:59:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 11:59:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 11:59:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 11:59:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 11:59:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 11:59:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 11:59:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 11:59:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:59:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:59:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 11:59:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 11:59:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:59:23.409803  543705 memory.go:184] no items to output this cycle
I0320 11:59:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 11:59:24.197686  543705 disk_info.go:125] begin check local disk info of client
I0320 11:59:24.200079  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 11:59:24.200087  543705 disk_info.go:196] parse disk info done, disk is : [0xc000294900 0xc000294940]
E0320 11:59:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:59:33.409773  543705 memory.go:184] no items to output this cycle
I0320 11:59:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 11:59:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:59:43.409801  543705 memory.go:191] Add success.
I0320 11:59:43.409802  543705 cpu.go:282] Add success.
I0320 11:59:43.420077  543705 net.go:648] Add success.
I0320 11:59:43.422993  543705 net.go:770] primary dev: ETH0
I0320 11:59:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0320 11:59:43.423019  543705 net.go:698] Add success.
I0320 11:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 11:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 11:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 11:59:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 11:59:53.409775  543705 memory.go:184] no items to output this cycle
I0320 11:59:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 12:00:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:00:03.409793  543705 memory.go:184] no items to output this cycle
I0320 12:00:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 12:00:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:00:13.409797  543705 memory.go:191] Add success.
I0320 12:00:13.409800  543705 cpu.go:282] Add success.
W0320 12:00:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:00:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:00:13.420116  543705 net.go:648] Add success.
I0320 12:00:13.422802  543705 net.go:770] primary dev: ETH0
I0320 12:00:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:00:13.422827  543705 net.go:698] Add success.
I0320 12:00:13.464062  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"627c75f3-53c1-4521-808f-0f68117d4818","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:00:13.464093  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:00:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:00:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 12:00:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:00:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 12:00:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:00:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:00:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:00:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:00:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:00:23.409762  543705 memory.go:184] no items to output this cycle
I0320 12:00:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 12:00:24.201694  543705 disk_info.go:125] begin check local disk info of client
I0320 12:00:24.204112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:00:24.204120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa440 0xc0001fa480]
E0320 12:00:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:00:33.409773  543705 memory.go:184] no items to output this cycle
I0320 12:00:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 12:00:38.461055  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:00:38.461062  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:00:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:00:43.411059  543705 memory.go:191] Add success.
I0320 12:00:43.409813  543705 cpu.go:282] Add success.
I0320 12:00:43.419728  543705 net.go:648] Add success.
I0320 12:00:43.422270  543705 net.go:770] primary dev: ETH0
I0320 12:00:43.422285  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:00:43.422300  543705 net.go:698] Add success.
I0320 12:00:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:00:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:00:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:00:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:00:53.409774  543705 memory.go:184] no items to output this cycle
I0320 12:00:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 12:01:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:01:03.409770  543705 memory.go:184] no items to output this cycle
I0320 12:01:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 12:01:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:01:13.409789  543705 memory.go:191] Add success.
I0320 12:01:13.409808  543705 cpu.go:282] Add success.
W0320 12:01:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:01:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:01:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:01:13.420217  543705 net.go:648] Add success.
I0320 12:01:13.422924  543705 net.go:770] primary dev: ETH0
I0320 12:01:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:01:13.422949  543705 net.go:698] Add success.
I0320 12:01:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:01:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:01:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0320 12:01:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:01:14.456493  543705 disk_worker.go:494] system disk:vda1
I0320 12:01:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:01:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:01:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:01:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:01:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:01:23.409767  543705 memory.go:184] no items to output this cycle
I0320 12:01:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 12:01:24.205677  543705 disk_info.go:125] begin check local disk info of client
I0320 12:01:24.208129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:01:24.208137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb3c0 0xc0001fb400]
E0320 12:01:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:01:33.409778  543705 memory.go:184] no items to output this cycle
I0320 12:01:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 12:01:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:01:43.409802  543705 memory.go:191] Add success.
I0320 12:01:43.409820  543705 cpu.go:282] Add success.
I0320 12:01:43.419912  543705 net.go:648] Add success.
I0320 12:01:43.422660  543705 net.go:770] primary dev: ETH0
I0320 12:01:43.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:01:43.422688  543705 net.go:698] Add success.
I0320 12:01:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:01:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:01:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:01:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:01:53.409804  543705 memory.go:184] no items to output this cycle
I0320 12:01:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 12:02:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:02:03.409786  543705 memory.go:184] no items to output this cycle
I0320 12:02:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 12:02:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:02:13.409838  543705 memory.go:191] Add success.
I0320 12:02:13.409845  543705 cpu.go:282] Add success.
W0320 12:02:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:02:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:02:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:02:13.420200  543705 net.go:648] Add success.
I0320 12:02:13.423212  543705 net.go:770] primary dev: ETH0
I0320 12:02:13.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:02:13.423241  543705 net.go:698] Add success.
W0320 12:02:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:02:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 12:02:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:02:14.455929  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:02:14.455938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:02:14.455944  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:02:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 12:02:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:02:15.456893  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:02:15.456903  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:02:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:02:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:02:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:02:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:02:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:02:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:02:23.409784  543705 memory.go:184] no items to output this cycle
I0320 12:02:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 12:02:24.209684  543705 disk_info.go:125] begin check local disk info of client
I0320 12:02:24.212147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:02:24.212155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f47c0 0xc0004f4800]
E0320 12:02:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:02:33.409781  543705 memory.go:184] no items to output this cycle
I0320 12:02:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 12:02:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:02:43.409843  543705 memory.go:191] Add success.
I0320 12:02:43.409813  543705 cpu.go:282] Add success.
I0320 12:02:43.420104  543705 net.go:648] Add success.
I0320 12:02:43.421062  543705 net.go:770] primary dev: ETH0
I0320 12:02:43.421074  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:02:43.421092  543705 net.go:698] Add success.
I0320 12:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:02:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:02:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:02:53.409776  543705 memory.go:184] no items to output this cycle
I0320 12:02:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 12:03:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:03:03.409805  543705 memory.go:184] no items to output this cycle
I0320 12:03:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 12:03:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:03:13.409791  543705 memory.go:191] Add success.
I0320 12:03:13.409813  543705 cpu.go:282] Add success.
W0320 12:03:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:03:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:03:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:03:13.420246  543705 net.go:648] Add success.
I0320 12:03:13.423209  543705 net.go:770] primary dev: ETH0
I0320 12:03:13.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:03:13.423234  543705 net.go:698] Add success.
I0320 12:03:13.468239  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0f7cc2ab-d590-4f29-b139-9a0b246966c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:03:13.468271  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:03:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:03:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:03:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 12:03:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:03:14.456965  543705 disk_worker.go:494] system disk:vda1
I0320 12:03:14.457002  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:03:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:03:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:03:23.409779  543705 memory.go:184] no items to output this cycle
I0320 12:03:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 12:03:24.213674  543705 disk_info.go:125] begin check local disk info of client
I0320 12:03:24.216041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:03:24.216048  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004704c0 0xc000470500]
E0320 12:03:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:03:33.409775  543705 memory.go:184] no items to output this cycle
I0320 12:03:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 12:03:38.462068  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:03:38.462075  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:03:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:03:43.410683  543705 memory.go:191] Add success.
I0320 12:03:43.409832  543705 cpu.go:282] Add success.
I0320 12:03:43.420401  543705 net.go:648] Add success.
I0320 12:03:43.423264  543705 net.go:770] primary dev: ETH0
I0320 12:03:43.423278  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:03:43.423293  543705 net.go:698] Add success.
I0320 12:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:03:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:03:53.409794  543705 memory.go:184] no items to output this cycle
I0320 12:03:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 12:04:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:04:03.409776  543705 memory.go:184] no items to output this cycle
I0320 12:04:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 12:04:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:04:13.409783  543705 memory.go:191] Add success.
I0320 12:04:13.409787  543705 cpu.go:282] Add success.
W0320 12:04:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:04:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:04:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:04:13.420059  543705 net.go:648] Add success.
I0320 12:04:13.422871  543705 net.go:770] primary dev: ETH0
I0320 12:04:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:04:13.422897  543705 net.go:698] Add success.
I0320 12:04:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:04:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:04:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 12:04:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:04:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 12:04:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:04:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:04:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:04:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:04:23.409775  543705 memory.go:184] no items to output this cycle
I0320 12:04:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 12:04:24.216132  543705 disk_info.go:125] begin check local disk info of client
I0320 12:04:24.218676  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:04:24.218685  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003824c0 0xc000382500]
E0320 12:04:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:04:33.409815  543705 memory.go:184] no items to output this cycle
I0320 12:04:33.409831  543705 cpu.go:275] no items to output this cycle
E0320 12:04:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:04:43.409787  543705 memory.go:191] Add success.
I0320 12:04:43.409813  543705 cpu.go:282] Add success.
I0320 12:04:43.420000  543705 net.go:648] Add success.
I0320 12:04:43.423038  543705 net.go:770] primary dev: ETH0
I0320 12:04:43.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:04:43.423063  543705 net.go:698] Add success.
I0320 12:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:04:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:04:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:04:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:04:53.409775  543705 memory.go:184] no items to output this cycle
I0320 12:04:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 12:05:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:05:03.409765  543705 memory.go:184] no items to output this cycle
I0320 12:05:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 12:05:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:05:13.409808  543705 memory.go:191] Add success.
I0320 12:05:13.409815  543705 cpu.go:282] Add success.
W0320 12:05:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:05:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:05:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:05:13.420108  543705 net.go:648] Add success.
I0320 12:05:13.422655  543705 net.go:770] primary dev: ETH0
I0320 12:05:13.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:05:13.422683  543705 net.go:698] Add success.
I0320 12:05:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:05:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:05:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 12:05:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:05:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 12:05:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:05:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:05:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:05:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:05:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:05:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:05:23.409772  543705 memory.go:184] no items to output this cycle
I0320 12:05:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 12:05:24.218781  543705 disk_info.go:125] begin check local disk info of client
I0320 12:05:24.221297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:05:24.221303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6300 0xc0002a6340]
E0320 12:05:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:05:33.409811  543705 memory.go:184] no items to output this cycle
I0320 12:05:33.409833  543705 cpu.go:275] no items to output this cycle
E0320 12:05:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:05:43.409810  543705 memory.go:191] Add success.
I0320 12:05:43.409811  543705 cpu.go:282] Add success.
I0320 12:05:43.420057  543705 net.go:648] Add success.
I0320 12:05:43.422871  543705 net.go:770] primary dev: ETH0
I0320 12:05:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:05:43.422896  543705 net.go:698] Add success.
I0320 12:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:05:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:05:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:05:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:05:53.409775  543705 memory.go:184] no items to output this cycle
I0320 12:05:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 12:06:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:06:03.409792  543705 memory.go:184] no items to output this cycle
I0320 12:06:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 12:06:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:06:13.409801  543705 cpu.go:282] Add success.
I0320 12:06:13.409806  543705 memory.go:191] Add success.
W0320 12:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:06:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:06:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:06:13.420082  543705 net.go:648] Add success.
I0320 12:06:13.422856  543705 net.go:770] primary dev: ETH0
I0320 12:06:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:06:13.422882  543705 net.go:698] Add success.
I0320 12:06:13.468492  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c0c633d-623b-4ec5-8ef6-ad52256bc1d8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:06:13.468524  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:06:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:06:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:06:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0320 12:06:14.455247  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:06:14.456771  543705 disk_worker.go:494] system disk:vda1
I0320 12:06:14.456802  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:06:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:06:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:06:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:06:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:06:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:06:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:06:23.409771  543705 memory.go:184] no items to output this cycle
I0320 12:06:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 12:06:24.221671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:06:24.224217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:06:24.224223  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bc40 0xc00007bc80]
E0320 12:06:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:06:33.409774  543705 memory.go:184] no items to output this cycle
I0320 12:06:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 12:06:38.463084  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:06:38.463091  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:06:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:06:43.410675  543705 memory.go:191] Add success.
I0320 12:06:43.409796  543705 cpu.go:282] Add success.
I0320 12:06:43.420407  543705 net.go:648] Add success.
I0320 12:06:43.423649  543705 net.go:770] primary dev: ETH0
I0320 12:06:43.423662  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:06:43.423675  543705 net.go:698] Add success.
I0320 12:06:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:06:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:06:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:06:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:06:53.409770  543705 memory.go:184] no items to output this cycle
I0320 12:06:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 12:07:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:07:03.409798  543705 memory.go:184] no items to output this cycle
I0320 12:07:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 12:07:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:07:13.409775  543705 memory.go:191] Add success.
W0320 12:07:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:07:13.409807  543705 cpu.go:282] Add success.
W0320 12:07:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:07:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:07:13.420041  543705 net.go:648] Add success.
I0320 12:07:13.422886  543705 net.go:770] primary dev: ETH0
I0320 12:07:13.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:07:13.422912  543705 net.go:698] Add success.
I0320 12:07:13.453531  543705 event_worker.go:152] Polling the log file for events...
W0320 12:07:14.455329  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:07:14.455425  543705 disk_worker.go:708] disk space is not compliant
W0320 12:07:14.455430  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:07:14.456337  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:07:14.456346  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:07:14.456352  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:07:14.457234  543705 disk_worker.go:494] system disk:vda1
I0320 12:07:14.457267  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:07:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:07:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:07:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:07:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:07:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:07:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:07:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:07:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:07:23.409775  543705 memory.go:184] no items to output this cycle
I0320 12:07:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 12:07:24.225677  543705 disk_info.go:125] begin check local disk info of client
I0320 12:07:24.228165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:07:24.228172  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba80 0xc00007bac0]
E0320 12:07:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:07:33.409807  543705 memory.go:184] no items to output this cycle
I0320 12:07:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 12:07:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:07:43.409792  543705 memory.go:191] Add success.
I0320 12:07:43.409812  543705 cpu.go:282] Add success.
I0320 12:07:43.420062  543705 net.go:648] Add success.
I0320 12:07:43.422889  543705 net.go:770] primary dev: ETH0
I0320 12:07:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:07:43.422916  543705 net.go:698] Add success.
I0320 12:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:07:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:07:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:07:53.409771  543705 memory.go:184] no items to output this cycle
I0320 12:07:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 12:08:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:08:03.409795  543705 memory.go:184] no items to output this cycle
I0320 12:08:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 12:08:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:08:13.409781  543705 memory.go:191] Add success.
W0320 12:08:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:08:13.409820  543705 cpu.go:282] Add success.
I0320 12:08:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:08:13.420224  543705 net.go:648] Add success.
I0320 12:08:13.422987  543705 net.go:770] primary dev: ETH0
I0320 12:08:13.423001  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:08:13.423014  543705 net.go:698] Add success.
I0320 12:08:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:08:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:08:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 12:08:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:08:14.456830  543705 disk_worker.go:494] system disk:vda1
I0320 12:08:14.456859  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:08:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:08:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:08:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:08:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:08:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:08:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:08:23.409809  543705 memory.go:184] no items to output this cycle
I0320 12:08:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 12:08:24.229673  543705 disk_info.go:125] begin check local disk info of client
I0320 12:08:24.232171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:08:24.232177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460d00 0xc000460d40]
E0320 12:08:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:08:33.409776  543705 memory.go:184] no items to output this cycle
I0320 12:08:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 12:08:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:08:43.409792  543705 memory.go:191] Add success.
I0320 12:08:43.409809  543705 cpu.go:282] Add success.
I0320 12:08:43.419988  543705 net.go:648] Add success.
I0320 12:08:43.422551  543705 net.go:770] primary dev: ETH0
I0320 12:08:43.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:08:43.422577  543705 net.go:698] Add success.
I0320 12:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:08:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:08:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:08:53.409784  543705 memory.go:184] no items to output this cycle
I0320 12:08:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 12:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:09:03.409780  543705 memory.go:184] no items to output this cycle
I0320 12:09:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 12:09:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:09:13.409791  543705 memory.go:191] Add success.
I0320 12:09:13.409791  543705 cpu.go:282] Add success.
W0320 12:09:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:09:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:09:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:09:13.420215  543705 net.go:648] Add success.
I0320 12:09:13.423148  543705 net.go:770] primary dev: ETH0
I0320 12:09:13.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:09:13.423175  543705 net.go:698] Add success.
I0320 12:09:13.465153  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7bab7df0-ef38-4e68-847e-e8897be76f47","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:09:13.465205  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:09:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:09:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 12:09:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:09:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 12:09:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:09:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:09:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:09:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:09:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:09:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:09:23.409787  543705 memory.go:184] no items to output this cycle
I0320 12:09:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 12:09:24.233672  543705 disk_info.go:125] begin check local disk info of client
I0320 12:09:24.236133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:09:24.236138  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376c40 0xc000376c80]
E0320 12:09:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:09:33.409775  543705 memory.go:184] no items to output this cycle
I0320 12:09:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 12:09:38.464071  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:09:38.464077  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:09:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:09:43.411065  543705 memory.go:191] Add success.
I0320 12:09:43.409825  543705 cpu.go:282] Add success.
I0320 12:09:43.419711  543705 net.go:648] Add success.
I0320 12:09:43.422846  543705 net.go:770] primary dev: ETH0
I0320 12:09:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:09:43.422872  543705 net.go:698] Add success.
I0320 12:09:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:09:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:09:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:09:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:09:53.409798  543705 memory.go:184] no items to output this cycle
I0320 12:09:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 12:10:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:10:03.409779  543705 memory.go:184] no items to output this cycle
I0320 12:10:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 12:10:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:10:13.409796  543705 memory.go:191] Add success.
I0320 12:10:13.409804  543705 cpu.go:282] Add success.
W0320 12:10:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:10:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:10:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:10:13.420108  543705 net.go:648] Add success.
I0320 12:10:13.422611  543705 net.go:770] primary dev: ETH0
I0320 12:10:13.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:10:13.422635  543705 net.go:698] Add success.
I0320 12:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:10:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:10:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 12:10:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:10:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 12:10:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:10:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:10:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:10:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:10:16.472516  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:10:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:10:23.409813  543705 memory.go:184] no items to output this cycle
I0320 12:10:23.409844  543705 cpu.go:275] no items to output this cycle
I0320 12:10:24.237669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:10:24.240187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:10:24.240193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa3c0 0xc0001aa400]
E0320 12:10:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:10:33.409809  543705 memory.go:184] no items to output this cycle
I0320 12:10:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 12:10:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:10:43.409776  543705 memory.go:191] Add success.
I0320 12:10:43.409812  543705 cpu.go:282] Add success.
I0320 12:10:43.419869  543705 net.go:648] Add success.
I0320 12:10:43.422571  543705 net.go:770] primary dev: ETH0
I0320 12:10:43.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:10:43.422597  543705 net.go:698] Add success.
I0320 12:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:10:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:10:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:10:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:10:53.409769  543705 memory.go:184] no items to output this cycle
I0320 12:10:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 12:11:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:11:03.409781  543705 memory.go:184] no items to output this cycle
I0320 12:11:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 12:11:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:11:13.409796  543705 memory.go:191] Add success.
I0320 12:11:13.409799  543705 cpu.go:282] Add success.
W0320 12:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:11:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:11:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:11:13.420103  543705 net.go:648] Add success.
I0320 12:11:13.423003  543705 net.go:770] primary dev: ETH0
I0320 12:11:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:11:13.423027  543705 net.go:698] Add success.
I0320 12:11:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:11:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:11:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 12:11:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:11:14.456631  543705 disk_worker.go:494] system disk:vda1
I0320 12:11:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:11:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:11:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:11:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:11:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:11:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:11:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:11:23.409778  543705 memory.go:184] no items to output this cycle
I0320 12:11:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 12:11:24.240278  543705 disk_info.go:125] begin check local disk info of client
I0320 12:11:24.242809  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:11:24.242815  543705 disk_info.go:196] parse disk info done, disk is : [0xc000485280 0xc0004852c0]
E0320 12:11:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:11:33.409814  543705 memory.go:184] no items to output this cycle
I0320 12:11:33.409834  543705 cpu.go:275] no items to output this cycle
E0320 12:11:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:11:43.409811  543705 memory.go:191] Add success.
I0320 12:11:43.409811  543705 cpu.go:282] Add success.
I0320 12:11:43.419857  543705 net.go:770] primary dev: ETH0
I0320 12:11:43.419870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:11:43.419882  543705 net.go:698] Add success.
I0320 12:11:43.420243  543705 net.go:648] Add success.
I0320 12:11:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:11:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:11:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:11:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:11:53.409772  543705 memory.go:184] no items to output this cycle
I0320 12:11:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 12:12:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:12:03.409781  543705 memory.go:184] no items to output this cycle
I0320 12:12:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 12:12:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:12:13.409775  543705 memory.go:191] Add success.
W0320 12:12:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:12:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:12:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:12:13.409821  543705 cpu.go:282] Add success.
I0320 12:12:13.420037  543705 net.go:770] primary dev: ETH0
I0320 12:12:13.420051  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:12:13.420063  543705 net.go:698] Add success.
I0320 12:12:13.420404  543705 net.go:648] Add success.
I0320 12:12:13.627678  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a5ca21b7-b309-4819-b3a6-dfc604dd0c18","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:12:13.627714  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 12:12:14.454906  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:12:14.454970  543705 disk_worker.go:708] disk space is not compliant
W0320 12:12:14.454973  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:12:14.455704  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:12:14.455713  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:12:14.455719  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:12:14.456400  543705 disk_worker.go:494] system disk:vda1
I0320 12:12:14.456449  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:12:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:12:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:12:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:12:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:12:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:12:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:12:16.472338  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:12:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:12:23.409802  543705 memory.go:184] no items to output this cycle
I0320 12:12:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 12:12:24.242900  543705 disk_info.go:125] begin check local disk info of client
I0320 12:12:24.245323  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:12:24.245329  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e040 0xc00034e080]
E0320 12:12:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:12:33.409790  543705 memory.go:184] no items to output this cycle
I0320 12:12:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 12:12:38.465096  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:12:38.465102  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:12:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:12:43.410548  543705 memory.go:191] Add success.
I0320 12:12:43.409822  543705 cpu.go:282] Add success.
I0320 12:12:43.420053  543705 net.go:770] primary dev: ETH0
I0320 12:12:43.420072  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:12:43.420088  543705 net.go:698] Add success.
I0320 12:12:43.420469  543705 net.go:648] Add success.
I0320 12:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:12:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:12:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:12:53.409769  543705 memory.go:184] no items to output this cycle
I0320 12:12:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 12:13:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:13:03.409802  543705 memory.go:184] no items to output this cycle
I0320 12:13:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 12:13:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:13:13.409809  543705 memory.go:191] Add success.
I0320 12:13:13.409817  543705 cpu.go:282] Add success.
W0320 12:13:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:13:13.412608  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:13:13.412614  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:13:13.420317  543705 net.go:648] Add success.
I0320 12:13:13.422088  543705 net.go:770] primary dev: ETH0
I0320 12:13:13.422101  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:13:13.422115  543705 net.go:698] Add success.
I0320 12:13:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:13:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:13:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 12:13:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:13:14.456765  543705 disk_worker.go:494] system disk:vda1
I0320 12:13:14.456798  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:13:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:13:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:13:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:13:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:13:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:13:23.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:13:23.409885  543705 cpu.go:275] no items to output this cycle
I0320 12:13:23.409889  543705 memory.go:184] no items to output this cycle
I0320 12:13:24.245673  543705 disk_info.go:125] begin check local disk info of client
I0320 12:13:24.248071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:13:24.248077  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba000 0xc0002ba040]
E0320 12:13:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:13:33.409815  543705 memory.go:184] no items to output this cycle
I0320 12:13:33.409826  543705 cpu.go:275] no items to output this cycle
E0320 12:13:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:13:43.409793  543705 memory.go:191] Add success.
I0320 12:13:43.409811  543705 cpu.go:282] Add success.
I0320 12:13:43.420008  543705 net.go:648] Add success.
I0320 12:13:43.422734  543705 net.go:770] primary dev: ETH0
I0320 12:13:43.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:13:43.422764  543705 net.go:698] Add success.
I0320 12:13:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:13:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:13:53.409794  543705 memory.go:184] no items to output this cycle
I0320 12:13:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 12:14:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:14:03.409765  543705 memory.go:184] no items to output this cycle
I0320 12:14:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 12:14:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:14:13.409803  543705 memory.go:191] Add success.
W0320 12:14:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:14:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:14:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:14:13.409863  543705 cpu.go:282] Add success.
I0320 12:14:13.420382  543705 net.go:648] Add success.
I0320 12:14:13.423194  543705 net.go:770] primary dev: ETH0
I0320 12:14:13.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:14:13.423220  543705 net.go:698] Add success.
I0320 12:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:14:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:14:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 12:14:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:14:14.456611  543705 disk_worker.go:494] system disk:vda1
I0320 12:14:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:14:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:14:23.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:14:23.409898  543705 memory.go:184] no items to output this cycle
I0320 12:14:23.409944  543705 cpu.go:275] no items to output this cycle
I0320 12:14:24.249671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:14:24.252077  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:14:24.252082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be080 0xc0002be0c0]
E0320 12:14:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:14:33.409779  543705 memory.go:184] no items to output this cycle
I0320 12:14:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 12:14:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:14:43.409790  543705 memory.go:191] Add success.
I0320 12:14:43.409799  543705 cpu.go:282] Add success.
I0320 12:14:43.419937  543705 net.go:648] Add success.
I0320 12:14:43.422453  543705 net.go:770] primary dev: ETH0
I0320 12:14:43.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:14:43.422477  543705 net.go:698] Add success.
I0320 12:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:14:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:14:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:14:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:14:53.409784  543705 memory.go:184] no items to output this cycle
I0320 12:14:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 12:15:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:15:03.409797  543705 memory.go:184] no items to output this cycle
I0320 12:15:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 12:15:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:15:13.409787  543705 memory.go:191] Add success.
I0320 12:15:13.409807  543705 cpu.go:282] Add success.
W0320 12:15:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:15:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:15:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:15:13.420151  543705 net.go:648] Add success.
I0320 12:15:13.423272  543705 net.go:770] primary dev: ETH0
I0320 12:15:13.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:15:13.423302  543705 net.go:698] Add success.
I0320 12:15:13.467669  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"85cc3622-cf3b-4553-9133-4204c2b94502","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:15:13.467701  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:15:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:15:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0320 12:15:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:15:14.456612  543705 disk_worker.go:494] system disk:vda1
I0320 12:15:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:15:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:15:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:15:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:15:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:15:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:15:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:15:23.409799  543705 memory.go:184] no items to output this cycle
I0320 12:15:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 12:15:24.253673  543705 disk_info.go:125] begin check local disk info of client
I0320 12:15:24.256171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:15:24.256176  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0320 12:15:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:15:33.409812  543705 memory.go:184] no items to output this cycle
I0320 12:15:33.409826  543705 cpu.go:275] no items to output this cycle
I0320 12:15:38.466086  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:15:38.466092  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:15:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:15:43.410871  543705 memory.go:191] Add success.
I0320 12:15:43.409825  543705 cpu.go:282] Add success.
I0320 12:15:43.420431  543705 net.go:770] primary dev: ETH0
I0320 12:15:43.420450  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:15:43.420465  543705 net.go:698] Add success.
I0320 12:15:43.420822  543705 net.go:648] Add success.
I0320 12:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:15:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:15:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:15:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:15:53.409765  543705 memory.go:184] no items to output this cycle
I0320 12:15:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:16:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:16:03.409769  543705 memory.go:184] no items to output this cycle
I0320 12:16:03.409798  543705 cpu.go:275] no items to output this cycle
W0320 12:16:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:16:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:16:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:16:13.409798  543705 cpu.go:282] Add success.
E0320 12:16:13.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:16:13.409840  543705 memory.go:191] Add success.
I0320 12:16:13.420162  543705 net.go:648] Add success.
I0320 12:16:13.422995  543705 net.go:770] primary dev: ETH0
I0320 12:16:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:16:13.423021  543705 net.go:698] Add success.
I0320 12:16:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:16:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:16:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 12:16:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:16:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 12:16:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:16:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:16:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:16:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:16:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:16:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:16:23.409788  543705 memory.go:184] no items to output this cycle
I0320 12:16:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 12:16:24.257669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:16:24.260172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:16:24.260178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e9240 0xc0000e9280]
E0320 12:16:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:16:33.409785  543705 memory.go:184] no items to output this cycle
I0320 12:16:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 12:16:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:16:43.409792  543705 memory.go:191] Add success.
I0320 12:16:43.409798  543705 cpu.go:282] Add success.
I0320 12:16:43.420091  543705 net.go:648] Add success.
I0320 12:16:43.422957  543705 net.go:770] primary dev: ETH0
I0320 12:16:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:16:43.422982  543705 net.go:698] Add success.
I0320 12:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:16:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:16:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:16:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:16:53.409811  543705 memory.go:184] no items to output this cycle
I0320 12:16:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 12:17:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:17:03.409781  543705 memory.go:184] no items to output this cycle
I0320 12:17:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 12:17:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:17:13.409834  543705 memory.go:191] Add success.
I0320 12:17:13.409838  543705 cpu.go:282] Add success.
W0320 12:17:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:17:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:17:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:17:13.420148  543705 net.go:648] Add success.
I0320 12:17:13.423136  543705 net.go:770] primary dev: ETH0
I0320 12:17:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:17:13.423186  543705 net.go:698] Add success.
I0320 12:17:13.452795  543705 event_worker.go:152] Polling the log file for events...
W0320 12:17:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:17:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 12:17:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:17:14.456935  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:17:14.456944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:17:14.456950  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:17:14.457001  543705 disk_worker.go:494] system disk:vda1
I0320 12:17:14.457046  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:17:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:17:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:17:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:17:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:17:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:17:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:17:16.472304  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:17:23.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:17:23.409900  543705 memory.go:184] no items to output this cycle
I0320 12:17:23.409942  543705 cpu.go:275] no items to output this cycle
I0320 12:17:24.261671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:17:24.264097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:17:24.264103  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0320 12:17:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:17:33.409777  543705 memory.go:184] no items to output this cycle
I0320 12:17:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 12:17:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:17:43.409820  543705 memory.go:191] Add success.
I0320 12:17:43.409825  543705 cpu.go:282] Add success.
I0320 12:17:43.419978  543705 net.go:648] Add success.
I0320 12:17:43.422963  543705 net.go:770] primary dev: ETH0
I0320 12:17:43.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:17:43.422995  543705 net.go:698] Add success.
I0320 12:17:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:17:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:17:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:17:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:17:53.409777  543705 cpu.go:275] no items to output this cycle
I0320 12:17:53.409791  543705 memory.go:184] no items to output this cycle
E0320 12:18:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:18:03.409778  543705 memory.go:184] no items to output this cycle
I0320 12:18:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 12:18:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:18:13.409813  543705 memory.go:191] Add success.
I0320 12:18:13.409823  543705 cpu.go:282] Add success.
W0320 12:18:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:18:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:18:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:18:13.420142  543705 net.go:648] Add success.
I0320 12:18:13.423301  543705 net.go:770] primary dev: ETH0
I0320 12:18:13.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:18:13.423326  543705 net.go:698] Add success.
I0320 12:18:13.955667  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8d228090-a625-4e01-a957-1acdf7897eae","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:18:13.955704  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:18:14.454168  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:18:14.454393  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:18:14.454403  543705 disk_worker.go:708] disk space is not compliant
W0320 12:18:14.454406  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:18:14.455933  543705 disk_worker.go:494] system disk:vda1
I0320 12:18:14.455966  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:18:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:18:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:18:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:18:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:18:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:18:23.409784  543705 memory.go:184] no items to output this cycle
I0320 12:18:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 12:18:24.265667  543705 disk_info.go:125] begin check local disk info of client
I0320 12:18:24.268110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:18:24.268116  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047fa00 0xc00047fa40]
E0320 12:18:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:18:33.409787  543705 memory.go:184] no items to output this cycle
I0320 12:18:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 12:18:38.467102  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:18:38.467110  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:18:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:18:43.410680  543705 memory.go:191] Add success.
I0320 12:18:43.409828  543705 cpu.go:282] Add success.
I0320 12:18:43.420385  543705 net.go:648] Add success.
I0320 12:18:43.423577  543705 net.go:770] primary dev: ETH0
I0320 12:18:43.423590  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:18:43.423603  543705 net.go:698] Add success.
I0320 12:18:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:18:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:18:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:18:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:18:53.409779  543705 memory.go:184] no items to output this cycle
I0320 12:18:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 12:19:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:19:03.409806  543705 memory.go:184] no items to output this cycle
I0320 12:19:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 12:19:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:19:13.409799  543705 memory.go:191] Add success.
I0320 12:19:13.409814  543705 cpu.go:282] Add success.
W0320 12:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:19:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:19:13.420133  543705 net.go:648] Add success.
I0320 12:19:13.423183  543705 net.go:770] primary dev: ETH0
I0320 12:19:13.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:19:13.423206  543705 net.go:698] Add success.
I0320 12:19:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:19:14.455219  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:19:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0320 12:19:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:19:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 12:19:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:19:15.456024  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:19:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:19:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:19:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:19:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:19:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:19:23.409788  543705 memory.go:184] no items to output this cycle
I0320 12:19:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 12:19:24.269671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:19:24.272135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:19:24.272141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb5c0 0xc0001fb600]
E0320 12:19:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:19:33.409790  543705 memory.go:184] no items to output this cycle
I0320 12:19:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 12:19:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:19:43.409828  543705 memory.go:191] Add success.
I0320 12:19:43.409836  543705 cpu.go:282] Add success.
I0320 12:19:43.419908  543705 net.go:648] Add success.
I0320 12:19:43.422872  543705 net.go:770] primary dev: ETH0
I0320 12:19:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:19:43.422900  543705 net.go:698] Add success.
I0320 12:19:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:19:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:19:53.409781  543705 memory.go:184] no items to output this cycle
I0320 12:19:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 12:20:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:20:03.409770  543705 memory.go:184] no items to output this cycle
I0320 12:20:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 12:20:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:20:13.409808  543705 memory.go:191] Add success.
I0320 12:20:13.409811  543705 cpu.go:282] Add success.
W0320 12:20:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:20:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:20:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:20:13.420083  543705 net.go:648] Add success.
I0320 12:20:13.422693  543705 net.go:770] primary dev: ETH0
I0320 12:20:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:20:13.422721  543705 net.go:698] Add success.
I0320 12:20:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:20:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:20:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 12:20:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:20:14.456507  543705 disk_worker.go:494] system disk:vda1
I0320 12:20:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:20:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:20:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:20:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:20:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:20:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:20:23.409804  543705 memory.go:184] no items to output this cycle
I0320 12:20:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 12:20:24.273672  543705 disk_info.go:125] begin check local disk info of client
I0320 12:20:24.276242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:20:24.276248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5c80 0xc0002a5cc0]
E0320 12:20:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:20:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 12:20:33.409809  543705 memory.go:184] no items to output this cycle
E0320 12:20:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:20:43.409786  543705 memory.go:191] Add success.
I0320 12:20:43.409818  543705 cpu.go:282] Add success.
I0320 12:20:43.420046  543705 net.go:648] Add success.
I0320 12:20:43.422689  543705 net.go:770] primary dev: ETH0
I0320 12:20:43.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:20:43.422715  543705 net.go:698] Add success.
I0320 12:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:20:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:20:53.409796  543705 memory.go:184] no items to output this cycle
I0320 12:20:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 12:21:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:21:03.409779  543705 memory.go:184] no items to output this cycle
I0320 12:21:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 12:21:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:21:13.409797  543705 memory.go:191] Add success.
I0320 12:21:13.409801  543705 cpu.go:282] Add success.
W0320 12:21:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:21:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:21:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:21:13.420134  543705 net.go:770] primary dev: ETH0
I0320 12:21:13.420147  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:21:13.420160  543705 net.go:698] Add success.
I0320 12:21:13.420394  543705 net.go:648] Add success.
I0320 12:21:13.468951  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"60530d1b-b7a7-46b2-aac9-de21056f550a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:21:13.468985  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:21:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:21:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:21:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 12:21:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:21:14.456862  543705 disk_worker.go:494] system disk:vda1
I0320 12:21:14.456893  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:21:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:21:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:21:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:21:23.409777  543705 memory.go:184] no items to output this cycle
I0320 12:21:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 12:21:24.277669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:21:24.280104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:21:24.280109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa080 0xc0001fa0c0]
E0320 12:21:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:21:33.409784  543705 memory.go:184] no items to output this cycle
I0320 12:21:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 12:21:38.468091  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:21:38.468098  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:21:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:21:43.410656  543705 memory.go:191] Add success.
I0320 12:21:43.409811  543705 cpu.go:282] Add success.
I0320 12:21:43.420195  543705 net.go:770] primary dev: ETH0
I0320 12:21:43.420208  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:21:43.420221  543705 net.go:698] Add success.
I0320 12:21:43.420585  543705 net.go:648] Add success.
I0320 12:21:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:21:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:21:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:21:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:21:53.409780  543705 memory.go:184] no items to output this cycle
I0320 12:21:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 12:22:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:22:03.409768  543705 memory.go:184] no items to output this cycle
I0320 12:22:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 12:22:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:22:13.409794  543705 memory.go:191] Add success.
I0320 12:22:13.409799  543705 cpu.go:282] Add success.
W0320 12:22:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:22:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:22:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:22:13.420064  543705 net.go:648] Add success.
I0320 12:22:13.422739  543705 net.go:770] primary dev: ETH0
I0320 12:22:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:22:13.422765  543705 net.go:698] Add success.
W0320 12:22:14.455382  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:22:14.455477  543705 disk_worker.go:708] disk space is not compliant
W0320 12:22:14.455482  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:22:14.457508  543705 disk_worker.go:494] system disk:vda1
E0320 12:22:14.457588  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:22:14.457597  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:22:14.457603  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:22:14.457605  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:22:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:22:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:22:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:22:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:22:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:22:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:22:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:22:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:22:23.409765  543705 memory.go:184] no items to output this cycle
I0320 12:22:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 12:22:24.281673  543705 disk_info.go:125] begin check local disk info of client
I0320 12:22:24.284112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:22:24.284119  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1840 0xc0003c1880]
E0320 12:22:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:22:33.409774  543705 memory.go:184] no items to output this cycle
I0320 12:22:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 12:22:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:22:43.409784  543705 memory.go:191] Add success.
I0320 12:22:43.409809  543705 cpu.go:282] Add success.
I0320 12:22:43.420285  543705 net.go:648] Add success.
I0320 12:22:43.422930  543705 net.go:770] primary dev: ETH0
I0320 12:22:43.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:22:43.422967  543705 net.go:698] Add success.
I0320 12:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:22:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:22:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:22:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:22:53.409770  543705 memory.go:184] no items to output this cycle
I0320 12:22:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 12:23:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:23:03.409797  543705 memory.go:184] no items to output this cycle
I0320 12:23:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 12:23:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:23:13.409787  543705 memory.go:191] Add success.
I0320 12:23:13.409810  543705 cpu.go:282] Add success.
W0320 12:23:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:23:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:23:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:23:13.420144  543705 net.go:648] Add success.
I0320 12:23:13.422868  543705 net.go:770] primary dev: ETH0
I0320 12:23:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:23:13.422892  543705 net.go:698] Add success.
I0320 12:23:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:23:14.455458  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:23:14.455471  543705 disk_worker.go:708] disk space is not compliant
W0320 12:23:14.455475  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:23:14.457074  543705 disk_worker.go:494] system disk:vda1
I0320 12:23:14.457103  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:23:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:23:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:23:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:23:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:23:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:23:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:23:23.409800  543705 memory.go:184] no items to output this cycle
I0320 12:23:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 12:23:24.285669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:23:24.288167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:23:24.288174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a480 0xc00047a4c0]
E0320 12:23:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:23:33.409780  543705 memory.go:184] no items to output this cycle
I0320 12:23:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 12:23:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:23:43.409822  543705 memory.go:191] Add success.
I0320 12:23:43.409831  543705 cpu.go:282] Add success.
I0320 12:23:43.419947  543705 net.go:648] Add success.
I0320 12:23:43.422933  543705 net.go:770] primary dev: ETH0
I0320 12:23:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:23:43.422959  543705 net.go:698] Add success.
I0320 12:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:23:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:23:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:23:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:23:53.409774  543705 memory.go:184] no items to output this cycle
I0320 12:23:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 12:24:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:24:03.409772  543705 memory.go:184] no items to output this cycle
I0320 12:24:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 12:24:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:24:13.409817  543705 memory.go:191] Add success.
I0320 12:24:13.409825  543705 cpu.go:282] Add success.
W0320 12:24:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:24:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:24:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:24:13.420046  543705 net.go:648] Add success.
I0320 12:24:13.422810  543705 net.go:770] primary dev: ETH0
I0320 12:24:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:24:13.422837  543705 net.go:698] Add success.
I0320 12:24:13.469118  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"093d2a7e-d2de-4077-a30e-3bb39c34e3ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:24:13.469151  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:24:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:24:14.455320  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:24:14.455333  543705 disk_worker.go:708] disk space is not compliant
W0320 12:24:14.455337  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:24:14.457448  543705 disk_worker.go:494] system disk:vda1
I0320 12:24:14.457489  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:24:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:24:16.472515  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:24:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:24:23.409764  543705 memory.go:184] no items to output this cycle
I0320 12:24:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 12:24:24.289671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:24:24.292088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:24:24.292093  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ba00 0xc00047ba40]
E0320 12:24:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:24:33.409803  543705 memory.go:184] no items to output this cycle
I0320 12:24:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 12:24:38.469107  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:24:38.469115  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:24:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:24:43.410585  543705 memory.go:191] Add success.
I0320 12:24:43.409821  543705 cpu.go:282] Add success.
I0320 12:24:43.420341  543705 net.go:648] Add success.
I0320 12:24:43.423052  543705 net.go:770] primary dev: ETH0
I0320 12:24:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:24:43.423078  543705 net.go:698] Add success.
I0320 12:24:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:24:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:24:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:24:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:24:53.409806  543705 memory.go:184] no items to output this cycle
I0320 12:24:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 12:25:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:25:03.409803  543705 memory.go:184] no items to output this cycle
I0320 12:25:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 12:25:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:25:13.409778  543705 memory.go:191] Add success.
W0320 12:25:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:25:13.409804  543705 cpu.go:282] Add success.
W0320 12:25:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:25:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:25:13.420065  543705 net.go:648] Add success.
I0320 12:25:13.423093  543705 net.go:770] primary dev: ETH0
I0320 12:25:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:25:13.423117  543705 net.go:698] Add success.
I0320 12:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:25:14.455255  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:25:14.455318  543705 disk_worker.go:708] disk space is not compliant
W0320 12:25:14.455320  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:25:14.457553  543705 disk_worker.go:494] system disk:vda1
I0320 12:25:14.457594  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:25:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:25:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:25:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:25:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:25:23.409800  543705 memory.go:184] no items to output this cycle
I0320 12:25:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 12:25:24.293669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:25:24.296110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:25:24.296115  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047af00 0xc00047af40]
E0320 12:25:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:25:33.409778  543705 memory.go:184] no items to output this cycle
I0320 12:25:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 12:25:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:25:43.409802  543705 memory.go:191] Add success.
I0320 12:25:43.409804  543705 cpu.go:282] Add success.
I0320 12:25:43.419965  543705 net.go:648] Add success.
I0320 12:25:43.422979  543705 net.go:770] primary dev: ETH0
I0320 12:25:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:25:43.423009  543705 net.go:698] Add success.
I0320 12:25:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:25:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:25:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:25:53.409765  543705 memory.go:184] no items to output this cycle
I0320 12:25:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 12:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:26:03.409778  543705 memory.go:184] no items to output this cycle
I0320 12:26:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 12:26:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:26:13.409778  543705 memory.go:191] Add success.
W0320 12:26:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:26:13.409811  543705 cpu.go:282] Add success.
W0320 12:26:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:26:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:26:13.420061  543705 net.go:648] Add success.
I0320 12:26:13.423378  543705 net.go:770] primary dev: ETH0
I0320 12:26:13.423391  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:26:13.423403  543705 net.go:698] Add success.
I0320 12:26:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:26:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:26:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 12:26:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:26:14.456655  543705 disk_worker.go:494] system disk:vda1
I0320 12:26:14.456700  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:26:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:26:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:26:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:26:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:26:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:26:23.409773  543705 memory.go:184] no items to output this cycle
I0320 12:26:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 12:26:24.297674  543705 disk_info.go:125] begin check local disk info of client
I0320 12:26:24.300098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:26:24.300104  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029b1c0 0xc00029b200]
E0320 12:26:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:26:33.409797  543705 memory.go:184] no items to output this cycle
I0320 12:26:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 12:26:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:26:43.409794  543705 memory.go:191] Add success.
I0320 12:26:43.409800  543705 cpu.go:282] Add success.
I0320 12:26:43.419923  543705 net.go:648] Add success.
I0320 12:26:43.422641  543705 net.go:770] primary dev: ETH0
I0320 12:26:43.422654  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:26:43.422667  543705 net.go:698] Add success.
I0320 12:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:26:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:26:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:26:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:26:53.409767  543705 memory.go:184] no items to output this cycle
I0320 12:26:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 12:27:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:27:03.409771  543705 memory.go:184] no items to output this cycle
I0320 12:27:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 12:27:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:27:13.409808  543705 memory.go:191] Add success.
I0320 12:27:13.409820  543705 cpu.go:282] Add success.
W0320 12:27:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:27:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:27:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:27:13.420126  543705 net.go:648] Add success.
I0320 12:27:13.422882  543705 net.go:770] primary dev: ETH0
I0320 12:27:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:27:13.422907  543705 net.go:698] Add success.
I0320 12:27:13.428841  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 12:27:13.453078  543705 event_worker.go:152] Polling the log file for events...
I0320 12:27:13.468874  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af08c090-ea97-4430-9b8e-3773364b8bc7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:27:13.468908  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 12:27:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:27:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 12:27:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:27:14.456756  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:27:14.456765  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:27:14.456771  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:27:14.456970  543705 disk_worker.go:494] system disk:vda1
I0320 12:27:14.457025  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:27:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:27:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:27:16.457905  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:27:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:27:16.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:27:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:27:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:27:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:27:23.409761  543705 memory.go:184] no items to output this cycle
I0320 12:27:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 12:27:24.301671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:27:24.304078  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:27:24.304084  543705 disk_info.go:196] parse disk info done, disk is : [0xc000518b40 0xc000518b80]
E0320 12:27:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:27:33.409805  543705 memory.go:184] no items to output this cycle
I0320 12:27:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 12:27:38.470108  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:27:38.470114  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:27:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:27:43.410628  543705 memory.go:191] Add success.
I0320 12:27:43.409815  543705 cpu.go:282] Add success.
I0320 12:27:43.420428  543705 net.go:648] Add success.
I0320 12:27:43.423002  543705 net.go:770] primary dev: ETH0
I0320 12:27:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:27:43.423037  543705 net.go:698] Add success.
I0320 12:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:27:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:27:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:27:53.409773  543705 cpu.go:275] no items to output this cycle
I0320 12:27:53.409780  543705 memory.go:184] no items to output this cycle
E0320 12:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:28:03.409783  543705 memory.go:184] no items to output this cycle
I0320 12:28:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 12:28:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:28:13.409799  543705 memory.go:191] Add success.
I0320 12:28:13.409799  543705 cpu.go:282] Add success.
W0320 12:28:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:28:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:28:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:28:13.420052  543705 net.go:770] primary dev: ETH0
I0320 12:28:13.420067  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:28:13.420083  543705 net.go:698] Add success.
I0320 12:28:13.420450  543705 net.go:648] Add success.
I0320 12:28:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:28:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:28:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 12:28:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:28:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 12:28:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:28:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:28:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:28:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:28:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:28:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:28:23.409773  543705 memory.go:184] no items to output this cycle
I0320 12:28:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 12:28:24.305672  543705 disk_info.go:125] begin check local disk info of client
I0320 12:28:24.308118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:28:24.308124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048d3c0 0xc00048d400]
E0320 12:28:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:28:33.409801  543705 memory.go:184] no items to output this cycle
I0320 12:28:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 12:28:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:28:43.409801  543705 memory.go:191] Add success.
I0320 12:28:43.409802  543705 cpu.go:282] Add success.
I0320 12:28:43.419883  543705 net.go:648] Add success.
I0320 12:28:43.422878  543705 net.go:770] primary dev: ETH0
I0320 12:28:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:28:43.422907  543705 net.go:698] Add success.
I0320 12:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:28:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:28:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:28:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:28:53.409811  543705 memory.go:184] no items to output this cycle
I0320 12:28:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 12:29:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:29:03.409807  543705 memory.go:184] no items to output this cycle
I0320 12:29:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 12:29:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:29:13.409795  543705 memory.go:191] Add success.
I0320 12:29:13.409795  543705 cpu.go:282] Add success.
W0320 12:29:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:29:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:29:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:29:13.420095  543705 net.go:648] Add success.
I0320 12:29:13.423324  543705 net.go:770] primary dev: ETH0
I0320 12:29:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:29:13.423349  543705 net.go:698] Add success.
I0320 12:29:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:29:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:29:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 12:29:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:29:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 12:29:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:29:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:29:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:29:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:29:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:29:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:29:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:29:23.409780  543705 memory.go:184] no items to output this cycle
I0320 12:29:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 12:29:24.309671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:29:24.312053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:29:24.312058  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475b40 0xc000475b80]
E0320 12:29:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:29:33.409796  543705 memory.go:184] no items to output this cycle
I0320 12:29:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 12:29:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:29:43.409808  543705 cpu.go:282] Add success.
I0320 12:29:43.409817  543705 memory.go:191] Add success.
I0320 12:29:43.419991  543705 net.go:648] Add success.
I0320 12:29:43.422813  543705 net.go:770] primary dev: ETH0
I0320 12:29:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:29:43.422840  543705 net.go:698] Add success.
I0320 12:29:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:29:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:29:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:29:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:29:53.409765  543705 memory.go:184] no items to output this cycle
I0320 12:29:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 12:30:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:30:03.409776  543705 memory.go:184] no items to output this cycle
I0320 12:30:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 12:30:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:30:13.409786  543705 memory.go:191] Add success.
I0320 12:30:13.409802  543705 cpu.go:282] Add success.
W0320 12:30:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:30:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:30:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:30:13.420093  543705 net.go:648] Add success.
I0320 12:30:13.422746  543705 net.go:770] primary dev: ETH0
I0320 12:30:13.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:30:13.422775  543705 net.go:698] Add success.
I0320 12:30:13.763318  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"30c4eb2c-0fb6-4547-86fe-1e5f306fb58e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:30:13.763351  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:30:14.453982  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:30:14.454184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:30:14.454195  543705 disk_worker.go:708] disk space is not compliant
W0320 12:30:14.454197  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:30:14.455547  543705 disk_worker.go:494] system disk:vda1
I0320 12:30:14.455593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:30:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:30:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:30:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:30:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:30:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:30:23.409797  543705 memory.go:184] no items to output this cycle
I0320 12:30:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 12:30:24.312149  543705 disk_info.go:125] begin check local disk info of client
I0320 12:30:24.314611  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:30:24.314617  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a200 0xc00046a240]
E0320 12:30:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:30:33.409784  543705 memory.go:184] no items to output this cycle
I0320 12:30:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 12:30:38.471107  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:30:38.471114  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:30:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:30:43.410719  543705 memory.go:191] Add success.
I0320 12:30:43.409818  543705 cpu.go:282] Add success.
I0320 12:30:43.420444  543705 net.go:648] Add success.
I0320 12:30:43.423068  543705 net.go:770] primary dev: ETH0
I0320 12:30:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:30:43.423099  543705 net.go:698] Add success.
I0320 12:30:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:30:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:30:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:30:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:30:53.409800  543705 memory.go:184] no items to output this cycle
I0320 12:30:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 12:31:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:31:03.409783  543705 memory.go:184] no items to output this cycle
I0320 12:31:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 12:31:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:31:13.409814  543705 memory.go:191] Add success.
I0320 12:31:13.409820  543705 cpu.go:282] Add success.
W0320 12:31:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:31:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:31:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:31:13.420128  543705 net.go:648] Add success.
I0320 12:31:13.422894  543705 net.go:770] primary dev: ETH0
I0320 12:31:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:31:13.422935  543705 net.go:698] Add success.
I0320 12:31:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:31:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:31:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 12:31:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:31:14.456473  543705 disk_worker.go:494] system disk:vda1
I0320 12:31:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:31:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:31:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:31:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:31:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:31:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:31:23.409893  543705 memory.go:184] no items to output this cycle
I0320 12:31:23.409948  543705 cpu.go:275] no items to output this cycle
I0320 12:31:24.317667  543705 disk_info.go:125] begin check local disk info of client
I0320 12:31:24.320138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:31:24.320144  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474000 0xc000474040]
E0320 12:31:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:31:33.409809  543705 memory.go:184] no items to output this cycle
I0320 12:31:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 12:31:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:31:43.409794  543705 memory.go:191] Add success.
I0320 12:31:43.409799  543705 cpu.go:282] Add success.
I0320 12:31:43.420085  543705 net.go:648] Add success.
I0320 12:31:43.423137  543705 net.go:770] primary dev: ETH0
I0320 12:31:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:31:43.423168  543705 net.go:698] Add success.
I0320 12:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:31:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:31:53.410380  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:31:53.410395  543705 memory.go:184] no items to output this cycle
I0320 12:31:53.410398  543705 cpu.go:275] no items to output this cycle
E0320 12:32:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:32:03.409801  543705 memory.go:184] no items to output this cycle
I0320 12:32:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 12:32:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:32:13.409776  543705 memory.go:191] Add success.
I0320 12:32:13.409799  543705 cpu.go:282] Add success.
W0320 12:32:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:32:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:32:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:32:13.420056  543705 net.go:648] Add success.
I0320 12:32:13.423148  543705 net.go:770] primary dev: ETH0
I0320 12:32:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:32:13.423171  543705 net.go:698] Add success.
W0320 12:32:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:32:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 12:32:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:32:14.456912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:32:14.456921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:32:14.456927  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:32:14.457018  543705 disk_worker.go:494] system disk:vda1
I0320 12:32:14.457062  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:32:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:32:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:32:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:32:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:32:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:32:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:32:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:32:23.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:32:23.409901  543705 memory.go:184] no items to output this cycle
I0320 12:32:23.409905  543705 cpu.go:275] no items to output this cycle
I0320 12:32:24.321670  543705 disk_info.go:125] begin check local disk info of client
I0320 12:32:24.324186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:32:24.324192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b27c0 0xc0003b29c0]
E0320 12:32:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:32:33.409802  543705 memory.go:184] no items to output this cycle
I0320 12:32:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 12:32:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:32:43.409812  543705 memory.go:191] Add success.
I0320 12:32:43.409814  543705 cpu.go:282] Add success.
I0320 12:32:43.419965  543705 net.go:648] Add success.
I0320 12:32:43.422756  543705 net.go:770] primary dev: ETH0
I0320 12:32:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:32:43.422781  543705 net.go:698] Add success.
I0320 12:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:32:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:32:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:32:53.409805  543705 memory.go:184] no items to output this cycle
I0320 12:32:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 12:33:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:33:03.409780  543705 memory.go:184] no items to output this cycle
I0320 12:33:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 12:33:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:33:13.409783  543705 cpu.go:282] Add success.
I0320 12:33:13.409793  543705 memory.go:191] Add success.
W0320 12:33:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:33:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:33:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:33:13.420234  543705 net.go:648] Add success.
I0320 12:33:13.422854  543705 net.go:770] primary dev: ETH0
I0320 12:33:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:33:13.422883  543705 net.go:698] Add success.
I0320 12:33:13.469589  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0707ead8-2365-45cf-b645-6850b2c07903","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:33:13.469630  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:33:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:33:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:33:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 12:33:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:33:14.456689  543705 disk_worker.go:494] system disk:vda1
I0320 12:33:14.456720  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:33:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:33:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:33:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:33:23.409777  543705 memory.go:184] no items to output this cycle
I0320 12:33:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 12:33:24.325668  543705 disk_info.go:125] begin check local disk info of client
I0320 12:33:24.328093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:33:24.328099  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b180 0xc00007b1c0]
E0320 12:33:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:33:33.409805  543705 memory.go:184] no items to output this cycle
I0320 12:33:33.409820  543705 cpu.go:275] no items to output this cycle
I0320 12:33:38.472118  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:33:38.472136  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:33:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:33:43.410657  543705 memory.go:191] Add success.
I0320 12:33:43.409795  543705 cpu.go:282] Add success.
I0320 12:33:43.420365  543705 net.go:648] Add success.
I0320 12:33:43.422870  543705 net.go:770] primary dev: ETH0
I0320 12:33:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:33:43.422896  543705 net.go:698] Add success.
I0320 12:33:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:33:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:33:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:33:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:33:53.409796  543705 memory.go:184] no items to output this cycle
I0320 12:33:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 12:34:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:34:03.409774  543705 memory.go:184] no items to output this cycle
I0320 12:34:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 12:34:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:34:13.409807  543705 memory.go:191] Add success.
I0320 12:34:13.409817  543705 cpu.go:282] Add success.
W0320 12:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:34:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:34:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:34:13.420050  543705 net.go:648] Add success.
I0320 12:34:13.422734  543705 net.go:770] primary dev: ETH0
I0320 12:34:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:34:13.422764  543705 net.go:698] Add success.
I0320 12:34:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:34:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:34:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 12:34:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:34:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 12:34:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:34:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:34:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:34:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:34:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:34:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:34:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:34:23.409796  543705 memory.go:184] no items to output this cycle
I0320 12:34:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 12:34:24.329678  543705 disk_info.go:125] begin check local disk info of client
I0320 12:34:24.332102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:34:24.332107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5240 0xc0000c5280]
E0320 12:34:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:34:33.409767  543705 memory.go:184] no items to output this cycle
I0320 12:34:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 12:34:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:34:43.409814  543705 memory.go:191] Add success.
I0320 12:34:43.409820  543705 cpu.go:282] Add success.
I0320 12:34:43.419991  543705 net.go:648] Add success.
I0320 12:34:43.422774  543705 net.go:770] primary dev: ETH0
I0320 12:34:43.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:34:43.422799  543705 net.go:698] Add success.
I0320 12:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:34:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:34:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:34:53.409781  543705 memory.go:184] no items to output this cycle
I0320 12:34:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 12:35:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:35:03.409778  543705 memory.go:184] no items to output this cycle
I0320 12:35:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 12:35:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:35:13.409810  543705 memory.go:191] Add success.
I0320 12:35:13.409821  543705 cpu.go:282] Add success.
W0320 12:35:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:35:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:35:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:35:13.420153  543705 net.go:648] Add success.
I0320 12:35:13.422790  543705 net.go:770] primary dev: ETH0
I0320 12:35:13.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:35:13.422815  543705 net.go:698] Add success.
I0320 12:35:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:35:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:35:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 12:35:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:35:14.456525  543705 disk_worker.go:494] system disk:vda1
I0320 12:35:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:35:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:35:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:35:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:35:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:35:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:35:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:35:23.409777  543705 memory.go:184] no items to output this cycle
I0320 12:35:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 12:35:24.333669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:35:24.336104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:35:24.336109  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386940 0xc000386980]
E0320 12:35:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:35:33.409777  543705 memory.go:184] no items to output this cycle
I0320 12:35:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 12:35:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:35:43.409820  543705 memory.go:191] Add success.
I0320 12:35:43.409826  543705 cpu.go:282] Add success.
I0320 12:35:43.419906  543705 net.go:648] Add success.
I0320 12:35:43.422643  543705 net.go:770] primary dev: ETH0
I0320 12:35:43.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:35:43.422670  543705 net.go:698] Add success.
I0320 12:35:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:35:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:35:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:35:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:35:53.409768  543705 memory.go:184] no items to output this cycle
I0320 12:35:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 12:36:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:36:03.409779  543705 cpu.go:275] no items to output this cycle
I0320 12:36:03.409781  543705 memory.go:184] no items to output this cycle
E0320 12:36:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:36:13.409806  543705 memory.go:191] Add success.
I0320 12:36:13.409827  543705 cpu.go:282] Add success.
W0320 12:36:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:36:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:36:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:36:13.420186  543705 net.go:648] Add success.
I0320 12:36:13.422662  543705 net.go:770] primary dev: ETH0
I0320 12:36:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:36:13.422691  543705 net.go:698] Add success.
I0320 12:36:13.464618  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe32ae03-afad-40c4-9828-eb56e1ffad07","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:36:13.464655  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:36:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:36:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 12:36:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:36:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 12:36:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:36:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:36:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:36:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:36:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:36:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:36:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:36:23.409770  543705 memory.go:184] no items to output this cycle
I0320 12:36:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 12:36:24.338287  543705 disk_info.go:125] begin check local disk info of client
I0320 12:36:24.340790  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:36:24.340797  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474c40 0xc000474c80]
E0320 12:36:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:36:33.409783  543705 memory.go:184] no items to output this cycle
I0320 12:36:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 12:36:38.473112  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:36:38.473119  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:36:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:36:43.410672  543705 memory.go:191] Add success.
I0320 12:36:43.409805  543705 cpu.go:282] Add success.
I0320 12:36:43.420347  543705 net.go:648] Add success.
I0320 12:36:43.423448  543705 net.go:770] primary dev: ETH0
I0320 12:36:43.423463  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:36:43.423477  543705 net.go:698] Add success.
I0320 12:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:36:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:36:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:36:53.409777  543705 memory.go:184] no items to output this cycle
I0320 12:36:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:37:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:37:03.409780  543705 memory.go:184] no items to output this cycle
I0320 12:37:03.409789  543705 cpu.go:275] no items to output this cycle
W0320 12:37:13.409703  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:37:13.409718  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:37:13.409722  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 12:37:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:37:13.409806  543705 cpu.go:282] Add success.
I0320 12:37:13.409812  543705 memory.go:191] Add success.
I0320 12:37:13.420049  543705 net.go:648] Add success.
I0320 12:37:13.422707  543705 net.go:770] primary dev: ETH0
I0320 12:37:13.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:37:13.422732  543705 net.go:698] Add success.
I0320 12:37:13.453303  543705 event_worker.go:152] Polling the log file for events...
W0320 12:37:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:37:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 12:37:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:37:14.455857  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:37:14.455866  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:37:14.455873  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:37:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 12:37:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:37:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:37:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:37:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:37:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:37:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:37:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:37:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:37:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:37:23.409799  543705 memory.go:184] no items to output this cycle
I0320 12:37:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 12:37:24.341669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:37:24.344109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:37:24.344116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa340 0xc0001aa380]
E0320 12:37:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:37:33.409803  543705 memory.go:184] no items to output this cycle
I0320 12:37:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 12:37:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:37:43.409800  543705 memory.go:191] Add success.
I0320 12:37:43.409804  543705 cpu.go:282] Add success.
I0320 12:37:43.419976  543705 net.go:648] Add success.
I0320 12:37:43.422377  543705 net.go:770] primary dev: ETH0
I0320 12:37:43.422393  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:37:43.422407  543705 net.go:698] Add success.
I0320 12:37:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:37:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:37:53.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:37:53.410382  543705 memory.go:184] no items to output this cycle
I0320 12:37:53.410385  543705 cpu.go:275] no items to output this cycle
E0320 12:38:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:38:03.409777  543705 memory.go:184] no items to output this cycle
I0320 12:38:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 12:38:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:38:13.409776  543705 memory.go:191] Add success.
W0320 12:38:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:38:13.409809  543705 cpu.go:282] Add success.
W0320 12:38:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:38:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:38:13.420054  543705 net.go:648] Add success.
I0320 12:38:13.422862  543705 net.go:770] primary dev: ETH0
I0320 12:38:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:38:13.422888  543705 net.go:698] Add success.
I0320 12:38:14.455217  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:38:14.455236  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:38:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0320 12:38:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:38:14.456648  543705 disk_worker.go:494] system disk:vda1
I0320 12:38:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:38:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:38:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:38:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:38:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:38:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:38:23.409780  543705 memory.go:184] no items to output this cycle
I0320 12:38:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 12:38:24.345669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:38:24.348089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:38:24.348094  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376e00 0xc000376e40]
E0320 12:38:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:38:33.409794  543705 memory.go:184] no items to output this cycle
I0320 12:38:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 12:38:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:38:43.409791  543705 memory.go:191] Add success.
I0320 12:38:43.409796  543705 cpu.go:282] Add success.
I0320 12:38:43.419919  543705 net.go:648] Add success.
I0320 12:38:43.422659  543705 net.go:770] primary dev: ETH0
I0320 12:38:43.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:38:43.422688  543705 net.go:698] Add success.
I0320 12:38:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:38:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:38:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:38:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:38:53.409775  543705 memory.go:184] no items to output this cycle
I0320 12:38:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 12:39:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:39:03.409803  543705 memory.go:184] no items to output this cycle
I0320 12:39:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 12:39:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:39:13.409788  543705 memory.go:191] Add success.
I0320 12:39:13.409790  543705 cpu.go:282] Add success.
W0320 12:39:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:39:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:39:13.420048  543705 net.go:648] Add success.
I0320 12:39:13.422789  543705 net.go:770] primary dev: ETH0
I0320 12:39:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:39:13.422814  543705 net.go:698] Add success.
I0320 12:39:13.468850  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d8c55910-3126-4cac-a355-131716e4c574","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:39:13.468884  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:39:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:39:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:39:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 12:39:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:39:14.457036  543705 disk_worker.go:494] system disk:vda1
I0320 12:39:14.457064  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:39:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:39:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:39:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:39:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:39:23.409776  543705 memory.go:184] no items to output this cycle
I0320 12:39:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 12:39:24.349670  543705 disk_info.go:125] begin check local disk info of client
I0320 12:39:24.352082  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:39:24.352088  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fd240 0xc0001fd280]
E0320 12:39:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:39:33.409814  543705 memory.go:184] no items to output this cycle
I0320 12:39:33.409836  543705 cpu.go:275] no items to output this cycle
I0320 12:39:38.474123  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:39:38.474129  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:39:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:39:43.410675  543705 memory.go:191] Add success.
I0320 12:39:43.409821  543705 cpu.go:282] Add success.
I0320 12:39:43.420382  543705 net.go:648] Add success.
I0320 12:39:43.423380  543705 net.go:770] primary dev: ETH0
I0320 12:39:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:39:43.423411  543705 net.go:698] Add success.
I0320 12:39:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:39:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:39:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:39:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:39:53.409787  543705 memory.go:184] no items to output this cycle
I0320 12:39:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 12:40:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:40:03.409767  543705 memory.go:184] no items to output this cycle
I0320 12:40:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 12:40:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:40:13.409827  543705 memory.go:191] Add success.
I0320 12:40:13.409842  543705 cpu.go:282] Add success.
W0320 12:40:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:40:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:40:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:40:13.420136  543705 net.go:648] Add success.
I0320 12:40:13.423010  543705 net.go:770] primary dev: ETH0
I0320 12:40:13.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:40:13.423039  543705 net.go:698] Add success.
I0320 12:40:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:40:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:40:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 12:40:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:40:14.456482  543705 disk_worker.go:494] system disk:vda1
I0320 12:40:14.456508  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:40:15.456025  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:40:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:40:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:40:23.409787  543705 memory.go:184] no items to output this cycle
I0320 12:40:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 12:40:24.353670  543705 disk_info.go:125] begin check local disk info of client
I0320 12:40:24.356140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:40:24.356145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002908c0 0xc000290900]
E0320 12:40:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:40:33.409794  543705 memory.go:184] no items to output this cycle
I0320 12:40:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 12:40:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:40:43.409800  543705 memory.go:191] Add success.
I0320 12:40:43.409818  543705 cpu.go:282] Add success.
I0320 12:40:43.419868  543705 net.go:648] Add success.
I0320 12:40:43.422630  543705 net.go:770] primary dev: ETH0
I0320 12:40:43.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:40:43.422656  543705 net.go:698] Add success.
I0320 12:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:40:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:40:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:40:53.409812  543705 memory.go:184] no items to output this cycle
I0320 12:40:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 12:41:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:41:03.409808  543705 memory.go:184] no items to output this cycle
I0320 12:41:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 12:41:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:41:13.409823  543705 memory.go:191] Add success.
I0320 12:41:13.409832  543705 cpu.go:282] Add success.
W0320 12:41:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:41:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:41:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:41:13.420062  543705 net.go:648] Add success.
I0320 12:41:13.423202  543705 net.go:770] primary dev: ETH0
I0320 12:41:13.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:41:13.423226  543705 net.go:698] Add success.
I0320 12:41:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:41:14.455333  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:41:14.455446  543705 disk_worker.go:708] disk space is not compliant
W0320 12:41:14.455451  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:41:14.457568  543705 disk_worker.go:494] system disk:vda1
I0320 12:41:14.457602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:41:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:41:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:41:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:41:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:41:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:41:23.409769  543705 memory.go:184] no items to output this cycle
I0320 12:41:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 12:41:24.357671  543705 disk_info.go:125] begin check local disk info of client
I0320 12:41:24.360139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:41:24.360145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb040 0xc0001fb080]
E0320 12:41:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:41:33.409810  543705 memory.go:184] no items to output this cycle
I0320 12:41:33.409837  543705 cpu.go:275] no items to output this cycle
E0320 12:41:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:41:43.409825  543705 memory.go:191] Add success.
I0320 12:41:43.409853  543705 cpu.go:282] Add success.
I0320 12:41:43.420010  543705 net.go:648] Add success.
I0320 12:41:43.423043  543705 net.go:770] primary dev: ETH0
I0320 12:41:43.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:41:43.423068  543705 net.go:698] Add success.
I0320 12:41:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:41:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:41:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:41:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:41:53.409782  543705 memory.go:184] no items to output this cycle
I0320 12:41:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 12:42:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:42:03.409772  543705 memory.go:184] no items to output this cycle
I0320 12:42:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:42:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:42:13.409809  543705 memory.go:191] Add success.
I0320 12:42:13.409818  543705 cpu.go:282] Add success.
W0320 12:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:42:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:42:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:42:13.420229  543705 net.go:648] Add success.
I0320 12:42:13.423095  543705 net.go:770] primary dev: ETH0
I0320 12:42:13.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:42:13.423121  543705 net.go:698] Add success.
I0320 12:42:13.470887  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fb3c1a60-170a-4b7d-bd50-c4972dc7b522","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:42:13.470923  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 12:42:14.455272  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:42:14.455286  543705 disk_worker.go:708] disk space is not compliant
W0320 12:42:14.455289  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:42:14.456709  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:42:14.456719  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:42:14.456725  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:42:14.456764  543705 disk_worker.go:494] system disk:vda1
I0320 12:42:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:42:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:42:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:42:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:42:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:42:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:42:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:42:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:42:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:42:23.409777  543705 memory.go:184] no items to output this cycle
I0320 12:42:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 12:42:24.361670  543705 disk_info.go:125] begin check local disk info of client
I0320 12:42:24.364109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:42:24.364115  543705 disk_info.go:196] parse disk info done, disk is : [0xc000324480 0xc0003244c0]
E0320 12:42:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:42:33.409783  543705 memory.go:184] no items to output this cycle
I0320 12:42:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 12:42:38.475126  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:42:38.475132  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:42:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:42:43.410678  543705 memory.go:191] Add success.
I0320 12:42:43.409821  543705 cpu.go:282] Add success.
I0320 12:42:43.420388  543705 net.go:648] Add success.
I0320 12:42:43.423352  543705 net.go:770] primary dev: ETH0
I0320 12:42:43.423370  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:42:43.423387  543705 net.go:698] Add success.
I0320 12:42:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:42:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:42:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:42:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:42:53.409781  543705 memory.go:184] no items to output this cycle
I0320 12:42:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 12:43:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:43:03.409774  543705 memory.go:184] no items to output this cycle
I0320 12:43:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 12:43:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:43:13.409784  543705 memory.go:191] Add success.
I0320 12:43:13.409806  543705 cpu.go:282] Add success.
W0320 12:43:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:43:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:43:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:43:13.420290  543705 net.go:648] Add success.
I0320 12:43:13.422976  543705 net.go:770] primary dev: ETH0
I0320 12:43:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:43:13.423002  543705 net.go:698] Add success.
I0320 12:43:14.453958  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:43:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:43:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 12:43:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:43:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 12:43:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:43:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:43:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:43:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:43:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:43:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:43:23.409800  543705 memory.go:184] no items to output this cycle
I0320 12:43:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 12:43:24.365669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:43:24.368101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:43:24.368107  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bcc0 0xc00007bd00]
E0320 12:43:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:43:33.409812  543705 memory.go:184] no items to output this cycle
I0320 12:43:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 12:43:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:43:43.409802  543705 memory.go:191] Add success.
I0320 12:43:43.409804  543705 cpu.go:282] Add success.
I0320 12:43:43.419993  543705 net.go:648] Add success.
I0320 12:43:43.422782  543705 net.go:770] primary dev: ETH0
I0320 12:43:43.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:43:43.422808  543705 net.go:698] Add success.
I0320 12:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:43:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:43:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:43:53.409780  543705 memory.go:184] no items to output this cycle
I0320 12:43:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 12:44:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:44:03.409762  543705 memory.go:184] no items to output this cycle
I0320 12:44:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 12:44:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:44:13.409783  543705 memory.go:191] Add success.
I0320 12:44:13.409804  543705 cpu.go:282] Add success.
W0320 12:44:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:44:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:44:13.420078  543705 net.go:648] Add success.
I0320 12:44:13.422974  543705 net.go:770] primary dev: ETH0
I0320 12:44:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:44:13.423000  543705 net.go:698] Add success.
I0320 12:44:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:44:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:44:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 12:44:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:44:14.456505  543705 disk_worker.go:494] system disk:vda1
I0320 12:44:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:44:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:44:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:44:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:44:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:44:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:44:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:44:23.409775  543705 memory.go:184] no items to output this cycle
I0320 12:44:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 12:44:24.369672  543705 disk_info.go:125] begin check local disk info of client
I0320 12:44:24.372147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:44:24.372153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb6c0 0xc0001fb700]
E0320 12:44:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:44:33.409810  543705 memory.go:184] no items to output this cycle
I0320 12:44:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 12:44:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:44:43.409830  543705 memory.go:191] Add success.
I0320 12:44:43.409843  543705 cpu.go:282] Add success.
I0320 12:44:43.420002  543705 net.go:648] Add success.
I0320 12:44:43.422665  543705 net.go:770] primary dev: ETH0
I0320 12:44:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:44:43.422692  543705 net.go:698] Add success.
I0320 12:44:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:44:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:44:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:44:53.409786  543705 memory.go:184] no items to output this cycle
I0320 12:44:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 12:45:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:45:03.409764  543705 memory.go:184] no items to output this cycle
I0320 12:45:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 12:45:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:45:13.409802  543705 memory.go:191] Add success.
I0320 12:45:13.409809  543705 cpu.go:282] Add success.
W0320 12:45:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:45:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:45:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:45:13.420041  543705 net.go:648] Add success.
I0320 12:45:13.422953  543705 net.go:770] primary dev: ETH0
I0320 12:45:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:45:13.422980  543705 net.go:698] Add success.
I0320 12:45:13.463419  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49ee4eea-4208-4f10-993f-fcd4c234b924","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:45:13.463450  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:45:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:45:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:45:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 12:45:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:45:14.456655  543705 disk_worker.go:494] system disk:vda1
I0320 12:45:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:45:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:45:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:45:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:45:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:45:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:45:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:45:23.409793  543705 memory.go:184] no items to output this cycle
I0320 12:45:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 12:45:24.373669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:45:24.376152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:45:24.376159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbb40 0xc0001fbb80]
E0320 12:45:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:45:33.409782  543705 memory.go:184] no items to output this cycle
I0320 12:45:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 12:45:38.476139  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:45:38.476146  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:45:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:45:43.410753  543705 memory.go:191] Add success.
I0320 12:45:43.409826  543705 cpu.go:282] Add success.
I0320 12:45:43.420459  543705 net.go:648] Add success.
I0320 12:45:43.423194  543705 net.go:770] primary dev: ETH0
I0320 12:45:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:45:43.423220  543705 net.go:698] Add success.
I0320 12:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:45:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:45:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:45:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:45:53.409774  543705 memory.go:184] no items to output this cycle
I0320 12:45:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 12:46:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:46:03.409798  543705 memory.go:184] no items to output this cycle
I0320 12:46:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 12:46:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:46:13.409775  543705 memory.go:191] Add success.
W0320 12:46:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:46:13.409802  543705 cpu.go:282] Add success.
W0320 12:46:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:46:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:46:13.419743  543705 net.go:648] Add success.
I0320 12:46:13.422340  543705 net.go:770] primary dev: ETH0
I0320 12:46:13.422355  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:46:13.422368  543705 net.go:698] Add success.
I0320 12:46:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:46:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:46:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 12:46:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:46:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 12:46:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:46:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:46:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:46:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:46:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:46:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:46:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:46:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 12:46:23.409788  543705 memory.go:184] no items to output this cycle
I0320 12:46:24.377676  543705 disk_info.go:125] begin check local disk info of client
I0320 12:46:24.380095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:46:24.380100  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474800 0xc000474840]
E0320 12:46:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:46:33.409805  543705 memory.go:184] no items to output this cycle
I0320 12:46:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 12:46:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:46:43.409824  543705 memory.go:191] Add success.
I0320 12:46:43.409827  543705 cpu.go:282] Add success.
I0320 12:46:43.420043  543705 net.go:648] Add success.
I0320 12:46:43.422684  543705 net.go:770] primary dev: ETH0
I0320 12:46:43.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:46:43.422712  543705 net.go:698] Add success.
I0320 12:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:46:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:46:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:46:53.409784  543705 cpu.go:275] no items to output this cycle
I0320 12:46:53.409790  543705 memory.go:184] no items to output this cycle
E0320 12:47:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:47:03.409781  543705 memory.go:184] no items to output this cycle
I0320 12:47:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 12:47:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:47:13.409782  543705 memory.go:191] Add success.
I0320 12:47:13.409792  543705 cpu.go:282] Add success.
W0320 12:47:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:47:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:47:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:47:13.420595  543705 net.go:648] Add success.
I0320 12:47:13.423404  543705 net.go:770] primary dev: ETH0
I0320 12:47:13.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:47:13.423438  543705 net.go:698] Add success.
I0320 12:47:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0320 12:47:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:47:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 12:47:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:47:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:47:14.455908  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:47:14.455914  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:47:14.456552  543705 disk_worker.go:494] system disk:vda1
I0320 12:47:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:47:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:47:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:47:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:47:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:47:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:47:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:47:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:47:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:47:23.409797  543705 memory.go:184] no items to output this cycle
I0320 12:47:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 12:47:24.381670  543705 disk_info.go:125] begin check local disk info of client
I0320 12:47:24.384156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:47:24.384161  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394800 0xc000394840]
E0320 12:47:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:47:33.409776  543705 memory.go:184] no items to output this cycle
I0320 12:47:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 12:47:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:47:43.409790  543705 memory.go:191] Add success.
I0320 12:47:43.409812  543705 cpu.go:282] Add success.
I0320 12:47:43.420000  543705 net.go:648] Add success.
I0320 12:47:43.423000  543705 net.go:770] primary dev: ETH0
I0320 12:47:43.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:47:43.423028  543705 net.go:698] Add success.
I0320 12:47:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:47:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:47:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:47:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:47:53.409777  543705 memory.go:184] no items to output this cycle
I0320 12:47:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:48:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:48:03.409799  543705 memory.go:184] no items to output this cycle
I0320 12:48:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 12:48:13.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:48:13.409925  543705 memory.go:191] Add success.
I0320 12:48:13.409939  543705 cpu.go:282] Add success.
W0320 12:48:13.410075  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:48:13.410089  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:48:13.410092  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:48:13.419763  543705 net.go:648] Add success.
I0320 12:48:13.422452  543705 net.go:770] primary dev: ETH0
I0320 12:48:13.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:48:13.422476  543705 net.go:698] Add success.
I0320 12:48:13.472435  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d61c4e7b-edad-4eeb-9526-b29eae2d7598","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:48:13.472468  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:48:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:48:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:48:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 12:48:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:48:14.456814  543705 disk_worker.go:494] system disk:vda1
I0320 12:48:14.456844  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:48:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:48:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:48:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:48:23.409775  543705 memory.go:184] no items to output this cycle
I0320 12:48:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 12:48:24.385669  543705 disk_info.go:125] begin check local disk info of client
I0320 12:48:24.388090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:48:24.388096  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b300 0xc00007b340]
E0320 12:48:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:48:33.409813  543705 memory.go:184] no items to output this cycle
I0320 12:48:33.409824  543705 cpu.go:275] no items to output this cycle
I0320 12:48:38.477139  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:48:38.477146  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:48:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:48:43.410661  543705 memory.go:191] Add success.
I0320 12:48:43.409828  543705 cpu.go:282] Add success.
I0320 12:48:43.420354  543705 net.go:648] Add success.
I0320 12:48:43.422919  543705 net.go:770] primary dev: ETH0
I0320 12:48:43.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:48:43.422944  543705 net.go:698] Add success.
I0320 12:48:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:48:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:48:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:48:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:48:53.409787  543705 memory.go:184] no items to output this cycle
I0320 12:48:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 12:49:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:49:03.409780  543705 memory.go:184] no items to output this cycle
I0320 12:49:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 12:49:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:49:13.409817  543705 memory.go:191] Add success.
I0320 12:49:13.409827  543705 cpu.go:282] Add success.
W0320 12:49:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:49:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:49:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:49:13.420166  543705 net.go:648] Add success.
I0320 12:49:13.423269  543705 net.go:770] primary dev: ETH0
I0320 12:49:13.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:49:13.423294  543705 net.go:698] Add success.
I0320 12:49:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:49:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:49:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 12:49:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:49:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 12:49:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:49:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:49:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:49:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:49:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:49:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:49:23.409762  543705 memory.go:184] no items to output this cycle
I0320 12:49:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 12:49:24.392002  543705 disk_info.go:125] begin check local disk info of client
I0320 12:49:24.394471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:49:24.394477  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faac0 0xc0001fab00]
E0320 12:49:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:49:33.409818  543705 memory.go:184] no items to output this cycle
I0320 12:49:33.409836  543705 cpu.go:275] no items to output this cycle
E0320 12:49:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:49:43.409799  543705 memory.go:191] Add success.
I0320 12:49:43.409812  543705 cpu.go:282] Add success.
I0320 12:49:43.419913  543705 net.go:648] Add success.
I0320 12:49:43.422586  543705 net.go:770] primary dev: ETH0
I0320 12:49:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:49:43.422615  543705 net.go:698] Add success.
I0320 12:49:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:49:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:49:53.409797  543705 memory.go:184] no items to output this cycle
I0320 12:49:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 12:50:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:50:03.409907  543705 memory.go:184] no items to output this cycle
I0320 12:50:03.409908  543705 cpu.go:275] no items to output this cycle
E0320 12:50:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:50:13.409781  543705 memory.go:191] Add success.
W0320 12:50:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:50:13.409806  543705 cpu.go:282] Add success.
W0320 12:50:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:50:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:50:13.420072  543705 net.go:648] Add success.
I0320 12:50:13.423317  543705 net.go:770] primary dev: ETH0
I0320 12:50:13.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:50:13.423346  543705 net.go:698] Add success.
I0320 12:50:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:50:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:50:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 12:50:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:50:14.456485  543705 disk_worker.go:494] system disk:vda1
I0320 12:50:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:50:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:50:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:50:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:50:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:50:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:50:23.409803  543705 memory.go:184] no items to output this cycle
I0320 12:50:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 12:50:24.397670  543705 disk_info.go:125] begin check local disk info of client
I0320 12:50:24.400156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:50:24.400161  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289580 0xc0002895c0]
E0320 12:50:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:50:33.409784  543705 memory.go:184] no items to output this cycle
I0320 12:50:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:50:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:50:43.409787  543705 memory.go:191] Add success.
I0320 12:50:43.409810  543705 cpu.go:282] Add success.
I0320 12:50:43.419960  543705 net.go:648] Add success.
I0320 12:50:43.422814  543705 net.go:770] primary dev: ETH0
I0320 12:50:43.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:50:43.422841  543705 net.go:698] Add success.
I0320 12:50:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:50:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:50:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:50:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:50:53.409768  543705 memory.go:184] no items to output this cycle
I0320 12:50:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 12:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:51:03.409777  543705 memory.go:184] no items to output this cycle
I0320 12:51:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 12:51:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:51:13.409787  543705 memory.go:191] Add success.
W0320 12:51:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 12:51:13.409818  543705 cpu.go:282] Add success.
W0320 12:51:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:51:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:51:13.420100  543705 net.go:648] Add success.
I0320 12:51:13.422898  543705 net.go:770] primary dev: ETH0
I0320 12:51:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:51:13.422923  543705 net.go:698] Add success.
I0320 12:51:13.469676  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"be9fc4ec-b1bd-46a1-9b26-00a167ce8845","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:51:13.469707  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:51:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:51:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:51:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 12:51:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:51:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 12:51:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:51:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:51:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:51:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:51:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:51:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:51:23.409774  543705 memory.go:184] no items to output this cycle
I0320 12:51:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 12:51:24.401668  543705 disk_info.go:125] begin check local disk info of client
I0320 12:51:24.404094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:51:24.404100  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa780 0xc0001aa7c0]
E0320 12:51:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:51:33.409822  543705 memory.go:184] no items to output this cycle
I0320 12:51:33.409834  543705 cpu.go:275] no items to output this cycle
I0320 12:51:38.478140  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:51:38.478147  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:51:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:51:43.410682  543705 memory.go:191] Add success.
I0320 12:51:43.409847  543705 cpu.go:282] Add success.
I0320 12:51:43.420469  543705 net.go:648] Add success.
I0320 12:51:43.423152  543705 net.go:770] primary dev: ETH0
I0320 12:51:43.423167  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:51:43.423181  543705 net.go:698] Add success.
I0320 12:51:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:51:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:51:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:51:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:51:53.409778  543705 memory.go:184] no items to output this cycle
I0320 12:51:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 12:52:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:52:03.409798  543705 memory.go:184] no items to output this cycle
I0320 12:52:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 12:52:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:52:13.409816  543705 memory.go:191] Add success.
I0320 12:52:13.409827  543705 cpu.go:282] Add success.
W0320 12:52:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:52:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:52:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:52:13.420131  543705 net.go:648] Add success.
I0320 12:52:13.423121  543705 net.go:770] primary dev: ETH0
I0320 12:52:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:52:13.423147  543705 net.go:698] Add success.
W0320 12:52:14.455083  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:52:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0320 12:52:14.455146  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:52:14.456949  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:52:14.456959  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:52:14.456965  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:52:14.457010  543705 disk_worker.go:494] system disk:vda1
I0320 12:52:14.457051  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:52:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:52:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:52:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:52:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:52:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:52:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:52:16.472344  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:52:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:52:23.409775  543705 memory.go:184] no items to output this cycle
I0320 12:52:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 12:52:24.405673  543705 disk_info.go:125] begin check local disk info of client
I0320 12:52:24.408090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:52:24.408096  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1340 0xc0003b1380]
E0320 12:52:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:52:33.409796  543705 memory.go:184] no items to output this cycle
I0320 12:52:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:52:43.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:52:43.409835  543705 memory.go:191] Add success.
I0320 12:52:43.409843  543705 cpu.go:282] Add success.
I0320 12:52:43.419972  543705 net.go:648] Add success.
I0320 12:52:43.422882  543705 net.go:770] primary dev: ETH0
I0320 12:52:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:52:43.422911  543705 net.go:698] Add success.
I0320 12:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:52:53.410273  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:52:53.410294  543705 memory.go:184] no items to output this cycle
I0320 12:52:53.410311  543705 cpu.go:275] no items to output this cycle
E0320 12:53:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:53:03.409797  543705 memory.go:184] no items to output this cycle
I0320 12:53:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 12:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:53:13.409783  543705 memory.go:191] Add success.
W0320 12:53:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:53:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:53:13.409855  543705 cpu.go:282] Add success.
I0320 12:53:13.420187  543705 net.go:648] Add success.
I0320 12:53:13.423202  543705 net.go:770] primary dev: ETH0
I0320 12:53:13.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:53:13.423227  543705 net.go:698] Add success.
I0320 12:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:53:14.455082  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:53:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0320 12:53:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:53:14.456477  543705 disk_worker.go:494] system disk:vda1
I0320 12:53:14.456522  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:53:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:53:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:53:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:53:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:53:23.409797  543705 memory.go:184] no items to output this cycle
I0320 12:53:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 12:53:24.409676  543705 disk_info.go:125] begin check local disk info of client
I0320 12:53:24.412169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:53:24.412174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b700 0xc00007b740]
E0320 12:53:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:53:33.409792  543705 memory.go:184] no items to output this cycle
I0320 12:53:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 12:53:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:53:43.409799  543705 memory.go:191] Add success.
I0320 12:53:43.409816  543705 cpu.go:282] Add success.
I0320 12:53:43.420007  543705 net.go:648] Add success.
I0320 12:53:43.422754  543705 net.go:770] primary dev: ETH0
I0320 12:53:43.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:53:43.422781  543705 net.go:698] Add success.
I0320 12:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:53:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:53:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:53:53.409780  543705 memory.go:184] no items to output this cycle
I0320 12:53:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 12:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:54:03.409873  543705 memory.go:184] no items to output this cycle
I0320 12:54:03.409947  543705 cpu.go:275] no items to output this cycle
E0320 12:54:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:54:13.409821  543705 memory.go:191] Add success.
I0320 12:54:13.409828  543705 cpu.go:282] Add success.
W0320 12:54:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:54:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:54:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:54:13.420120  543705 net.go:648] Add success.
I0320 12:54:13.423112  543705 net.go:770] primary dev: ETH0
I0320 12:54:13.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:54:13.423137  543705 net.go:698] Add success.
I0320 12:54:13.470385  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cb34fea9-9de2-4201-a42a-ca4714fadcfa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:54:13.470420  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 12:54:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:54:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:54:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 12:54:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:54:14.456530  543705 disk_worker.go:494] system disk:vda1
I0320 12:54:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:54:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:54:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:54:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:54:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:54:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:54:23.409801  543705 memory.go:184] no items to output this cycle
I0320 12:54:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 12:54:24.412789  543705 disk_info.go:125] begin check local disk info of client
I0320 12:54:24.415241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:54:24.415246  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474f00 0xc000474f40]
E0320 12:54:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:54:33.409818  543705 memory.go:184] no items to output this cycle
I0320 12:54:33.409832  543705 cpu.go:275] no items to output this cycle
I0320 12:54:38.479137  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:54:38.479144  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:54:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:54:43.410864  543705 memory.go:191] Add success.
I0320 12:54:43.409836  543705 cpu.go:282] Add success.
I0320 12:54:43.420551  543705 net.go:648] Add success.
I0320 12:54:43.423376  543705 net.go:770] primary dev: ETH0
I0320 12:54:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:54:43.423403  543705 net.go:698] Add success.
I0320 12:54:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:54:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:54:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:54:53.409800  543705 memory.go:184] no items to output this cycle
I0320 12:54:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 12:55:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:55:03.409782  543705 memory.go:184] no items to output this cycle
I0320 12:55:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 12:55:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:55:13.409783  543705 memory.go:191] Add success.
I0320 12:55:13.409788  543705 cpu.go:282] Add success.
W0320 12:55:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:55:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:55:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:55:13.420076  543705 net.go:648] Add success.
I0320 12:55:13.422770  543705 net.go:770] primary dev: ETH0
I0320 12:55:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:55:13.422795  543705 net.go:698] Add success.
I0320 12:55:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:55:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:55:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 12:55:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:55:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 12:55:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:55:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:55:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:55:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:55:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:55:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:55:23.409781  543705 memory.go:184] no items to output this cycle
I0320 12:55:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 12:55:24.415790  543705 disk_info.go:125] begin check local disk info of client
I0320 12:55:24.418250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:55:24.418256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf80 0xc0001aafc0]
E0320 12:55:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:55:33.409806  543705 memory.go:184] no items to output this cycle
I0320 12:55:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 12:55:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:55:43.409787  543705 memory.go:191] Add success.
I0320 12:55:43.409817  543705 cpu.go:282] Add success.
I0320 12:55:43.419872  543705 net.go:648] Add success.
I0320 12:55:43.422578  543705 net.go:770] primary dev: ETH0
I0320 12:55:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:55:43.422604  543705 net.go:698] Add success.
I0320 12:55:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:55:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:55:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:55:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:55:53.409799  543705 memory.go:184] no items to output this cycle
I0320 12:55:53.409807  543705 cpu.go:275] no items to output this cycle
I0320 12:56:03.409875  543705 cpu.go:275] no items to output this cycle
E0320 12:56:03.409949  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:56:03.409964  543705 memory.go:184] no items to output this cycle
E0320 12:56:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:56:13.409796  543705 cpu.go:282] Add success.
I0320 12:56:13.409805  543705 memory.go:191] Add success.
W0320 12:56:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:56:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:56:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:56:13.420049  543705 net.go:648] Add success.
I0320 12:56:13.422798  543705 net.go:770] primary dev: ETH0
I0320 12:56:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:56:13.422822  543705 net.go:698] Add success.
I0320 12:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:56:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:56:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 12:56:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:56:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 12:56:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:56:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:56:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:56:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:56:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:56:23.409790  543705 memory.go:184] no items to output this cycle
I0320 12:56:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 12:56:24.418793  543705 disk_info.go:125] begin check local disk info of client
I0320 12:56:24.421211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:56:24.421216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475640 0xc000475680]
E0320 12:56:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:56:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 12:56:33.409794  543705 memory.go:184] no items to output this cycle
E0320 12:56:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:56:43.409813  543705 memory.go:191] Add success.
I0320 12:56:43.409817  543705 cpu.go:282] Add success.
I0320 12:56:43.420106  543705 net.go:648] Add success.
I0320 12:56:43.422906  543705 net.go:770] primary dev: ETH0
I0320 12:56:43.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:56:43.422934  543705 net.go:698] Add success.
I0320 12:56:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:56:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:56:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:56:53.409778  543705 memory.go:184] no items to output this cycle
I0320 12:56:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 12:57:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:57:03.409793  543705 memory.go:184] no items to output this cycle
I0320 12:57:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 12:57:13.410462  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:57:13.410488  543705 memory.go:191] Add success.
W0320 12:57:13.410517  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:57:13.410530  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:57:13.410538  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:57:13.410672  543705 cpu.go:282] Add success.
I0320 12:57:13.419734  543705 net.go:648] Add success.
I0320 12:57:13.428636  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 12:57:13.428721  543705 net.go:770] primary dev: ETH0
I0320 12:57:13.428734  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:57:13.428749  543705 net.go:698] Add success.
I0320 12:57:13.453286  543705 event_worker.go:152] Polling the log file for events...
I0320 12:57:13.581887  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ce70a01f-c668-4a19-b024-831d6620bf47","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 12:57:13.581918  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 12:57:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:57:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 12:57:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0320 12:57:14.455874  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 12:57:14.455883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 12:57:14.455888  543705 custom_config.go:64] query custom config with name: gpu
I0320 12:57:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 12:57:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 12:57:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 12:57:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:57:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 12:57:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 12:57:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:57:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:57:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:57:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:57:23.409767  543705 memory.go:184] no items to output this cycle
I0320 12:57:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 12:57:24.421799  543705 disk_info.go:125] begin check local disk info of client
I0320 12:57:24.424314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:57:24.424319  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4680 0xc0000c46c0]
E0320 12:57:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:57:33.409783  543705 memory.go:184] no items to output this cycle
I0320 12:57:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 12:57:38.480155  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 12:57:38.480161  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 12:57:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:57:43.410681  543705 memory.go:191] Add success.
I0320 12:57:43.409816  543705 cpu.go:282] Add success.
I0320 12:57:43.420433  543705 net.go:648] Add success.
I0320 12:57:43.423086  543705 net.go:770] primary dev: ETH0
I0320 12:57:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:57:43.423117  543705 net.go:698] Add success.
I0320 12:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:57:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:57:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:57:53.410276  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:57:53.410302  543705 memory.go:184] no items to output this cycle
I0320 12:57:53.410303  543705 cpu.go:275] no items to output this cycle
E0320 12:58:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:58:03.409760  543705 memory.go:184] no items to output this cycle
I0320 12:58:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 12:58:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:58:13.409794  543705 memory.go:191] Add success.
I0320 12:58:13.409795  543705 cpu.go:282] Add success.
W0320 12:58:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:58:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:58:13.420124  543705 net.go:648] Add success.
I0320 12:58:13.423605  543705 net.go:770] primary dev: ETH0
I0320 12:58:13.423618  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:58:13.423629  543705 net.go:698] Add success.
I0320 12:58:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:58:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:58:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 12:58:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:58:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 12:58:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:58:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:58:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:58:16.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:58:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:58:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:58:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:58:23.409762  543705 memory.go:184] no items to output this cycle
I0320 12:58:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 12:58:24.424810  543705 disk_info.go:125] begin check local disk info of client
I0320 12:58:24.427259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:58:24.427264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa140 0xc0001fa180]
E0320 12:58:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:58:33.409808  543705 memory.go:184] no items to output this cycle
I0320 12:58:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 12:58:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:58:43.409788  543705 memory.go:191] Add success.
I0320 12:58:43.409809  543705 cpu.go:282] Add success.
I0320 12:58:43.419881  543705 net.go:648] Add success.
I0320 12:58:43.422221  543705 net.go:770] primary dev: ETH0
I0320 12:58:43.422235  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:58:43.422249  543705 net.go:698] Add success.
I0320 12:58:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:58:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:58:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:58:53.409767  543705 memory.go:184] no items to output this cycle
I0320 12:58:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 12:59:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:59:03.409769  543705 memory.go:184] no items to output this cycle
I0320 12:59:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 12:59:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:59:13.409793  543705 memory.go:191] Add success.
I0320 12:59:13.409794  543705 cpu.go:282] Add success.
W0320 12:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 12:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 12:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 12:59:13.420103  543705 net.go:648] Add success.
I0320 12:59:13.422718  543705 net.go:770] primary dev: ETH0
I0320 12:59:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:59:13.422746  543705 net.go:698] Add success.
I0320 12:59:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 12:59:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 12:59:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 12:59:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 12:59:14.456561  543705 disk_worker.go:494] system disk:vda1
I0320 12:59:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 12:59:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 12:59:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:59:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 12:59:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 12:59:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:59:23.409768  543705 memory.go:184] no items to output this cycle
I0320 12:59:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 12:59:24.427829  543705 disk_info.go:125] begin check local disk info of client
I0320 12:59:24.430303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 12:59:24.430308  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327200 0xc000327240]
E0320 12:59:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:59:33.409774  543705 memory.go:184] no items to output this cycle
I0320 12:59:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 12:59:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:59:43.409807  543705 memory.go:191] Add success.
I0320 12:59:43.409809  543705 cpu.go:282] Add success.
I0320 12:59:43.419944  543705 net.go:648] Add success.
I0320 12:59:43.422706  543705 net.go:770] primary dev: ETH0
I0320 12:59:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 12:59:43.422732  543705 net.go:698] Add success.
I0320 12:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 12:59:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 12:59:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 12:59:53.410275  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 12:59:53.410293  543705 memory.go:184] no items to output this cycle
I0320 12:59:53.410294  543705 cpu.go:275] no items to output this cycle
E0320 13:00:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:00:03.409776  543705 memory.go:184] no items to output this cycle
I0320 13:00:03.409779  543705 cpu.go:275] no items to output this cycle
E0320 13:00:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:00:13.409815  543705 memory.go:191] Add success.
I0320 13:00:13.409823  543705 cpu.go:282] Add success.
W0320 13:00:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:00:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:00:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:00:13.420140  543705 net.go:648] Add success.
I0320 13:00:13.422901  543705 net.go:770] primary dev: ETH0
I0320 13:00:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:00:13.422926  543705 net.go:698] Add success.
I0320 13:00:13.464047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7112539b-ad56-4dbb-b13a-9d150721c68b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:00:13.464081  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:00:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:00:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:00:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 13:00:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:00:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 13:00:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:00:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:00:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:00:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:00:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:00:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:00:23.409811  543705 memory.go:184] no items to output this cycle
I0320 13:00:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 13:00:24.430853  543705 disk_info.go:125] begin check local disk info of client
I0320 13:00:24.433278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:00:24.433283  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395e40 0xc000395e80]
E0320 13:00:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:00:33.409820  543705 memory.go:184] no items to output this cycle
I0320 13:00:33.409832  543705 cpu.go:275] no items to output this cycle
I0320 13:00:38.481158  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:00:38.481165  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:00:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:00:43.410587  543705 memory.go:191] Add success.
I0320 13:00:43.409805  543705 cpu.go:282] Add success.
I0320 13:00:43.420327  543705 net.go:648] Add success.
I0320 13:00:43.422864  543705 net.go:770] primary dev: ETH0
I0320 13:00:43.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:00:43.422889  543705 net.go:698] Add success.
I0320 13:00:46.458032  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:00:46.458095  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:00:46.458121  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:00:53.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:00:53.410394  543705 memory.go:184] no items to output this cycle
I0320 13:00:53.410401  543705 cpu.go:275] no items to output this cycle
E0320 13:01:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:01:03.409780  543705 memory.go:184] no items to output this cycle
I0320 13:01:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 13:01:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:01:13.409821  543705 memory.go:191] Add success.
I0320 13:01:13.409835  543705 cpu.go:282] Add success.
W0320 13:01:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:01:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:01:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:01:13.420157  543705 net.go:648] Add success.
I0320 13:01:13.422952  543705 net.go:770] primary dev: ETH0
I0320 13:01:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:01:13.422977  543705 net.go:698] Add success.
I0320 13:01:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:01:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:01:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 13:01:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:01:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 13:01:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:01:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:01:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:01:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:01:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:01:23.409783  543705 memory.go:184] no items to output this cycle
I0320 13:01:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 13:01:24.433856  543705 disk_info.go:125] begin check local disk info of client
I0320 13:01:24.436258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:01:24.436264  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353bc0 0xc000353c00]
E0320 13:01:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:01:33.409811  543705 memory.go:184] no items to output this cycle
I0320 13:01:33.409822  543705 cpu.go:275] no items to output this cycle
E0320 13:01:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:01:43.409812  543705 memory.go:191] Add success.
I0320 13:01:43.409826  543705 cpu.go:282] Add success.
I0320 13:01:43.419978  543705 net.go:648] Add success.
I0320 13:01:43.422702  543705 net.go:770] primary dev: ETH0
I0320 13:01:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:01:43.422729  543705 net.go:698] Add success.
I0320 13:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:01:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:01:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:01:53.409771  543705 memory.go:184] no items to output this cycle
I0320 13:01:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 13:02:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:02:03.409801  543705 memory.go:184] no items to output this cycle
I0320 13:02:03.409836  543705 cpu.go:275] no items to output this cycle
E0320 13:02:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:02:13.409788  543705 memory.go:191] Add success.
I0320 13:02:13.409800  543705 cpu.go:282] Add success.
W0320 13:02:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:02:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:02:13.420147  543705 net.go:648] Add success.
I0320 13:02:13.422685  543705 net.go:770] primary dev: ETH0
I0320 13:02:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:02:13.422710  543705 net.go:698] Add success.
W0320 13:02:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:02:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 13:02:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:02:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:02:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:02:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:02:14.456987  543705 disk_worker.go:494] system disk:vda1
I0320 13:02:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:02:15.456904  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:02:15.456917  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:02:16.458026  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:02:16.458033  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:02:16.458079  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:02:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:02:16.472492  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:02:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:02:23.409764  543705 memory.go:184] no items to output this cycle
I0320 13:02:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 13:02:24.436877  543705 disk_info.go:125] begin check local disk info of client
I0320 13:02:24.439341  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:02:24.439347  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358300 0xc000358340]
E0320 13:02:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:02:33.409811  543705 memory.go:184] no items to output this cycle
I0320 13:02:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 13:02:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:02:43.409788  543705 memory.go:191] Add success.
I0320 13:02:43.409813  543705 cpu.go:282] Add success.
I0320 13:02:43.419874  543705 net.go:648] Add success.
I0320 13:02:43.423252  543705 net.go:770] primary dev: ETH0
I0320 13:02:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:02:43.423277  543705 net.go:698] Add success.
I0320 13:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:02:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:02:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:02:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:02:53.409785  543705 memory.go:184] no items to output this cycle
I0320 13:02:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 13:03:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:03:03.409765  543705 memory.go:184] no items to output this cycle
I0320 13:03:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 13:03:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:03:13.409792  543705 memory.go:191] Add success.
I0320 13:03:13.409800  543705 cpu.go:282] Add success.
W0320 13:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:03:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:03:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:03:13.420310  543705 net.go:648] Add success.
I0320 13:03:13.423109  543705 net.go:770] primary dev: ETH0
I0320 13:03:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:03:13.423133  543705 net.go:698] Add success.
I0320 13:03:13.469603  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f0309c7e-bab0-4dfd-83d5-ff00158536bf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:03:13.469635  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:03:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:03:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 13:03:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:03:14.456615  543705 disk_worker.go:494] system disk:vda1
I0320 13:03:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:03:15.455604  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:03:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:03:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:03:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:03:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:03:23.409778  543705 memory.go:184] no items to output this cycle
I0320 13:03:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 13:03:24.439889  543705 disk_info.go:125] begin check local disk info of client
I0320 13:03:24.442332  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:03:24.442337  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb00 0xc0001abb40]
E0320 13:03:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:03:33.409780  543705 memory.go:184] no items to output this cycle
I0320 13:03:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 13:03:38.482167  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:03:38.482174  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:03:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:03:43.410672  543705 memory.go:191] Add success.
I0320 13:03:43.409822  543705 cpu.go:282] Add success.
I0320 13:03:43.420546  543705 net.go:648] Add success.
I0320 13:03:43.423452  543705 net.go:770] primary dev: ETH0
I0320 13:03:43.423467  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:03:43.423481  543705 net.go:698] Add success.
I0320 13:03:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:03:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:03:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:03:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:03:53.409800  543705 memory.go:184] no items to output this cycle
I0320 13:03:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 13:04:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:04:03.409799  543705 memory.go:184] no items to output this cycle
I0320 13:04:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 13:04:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:04:13.409810  543705 memory.go:191] Add success.
I0320 13:04:13.409819  543705 cpu.go:282] Add success.
W0320 13:04:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:04:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:04:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:04:13.420113  543705 net.go:648] Add success.
I0320 13:04:13.423134  543705 net.go:770] primary dev: ETH0
I0320 13:04:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:04:13.423159  543705 net.go:698] Add success.
I0320 13:04:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:04:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:04:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0320 13:04:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:04:14.456608  543705 disk_worker.go:494] system disk:vda1
I0320 13:04:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:04:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:04:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:04:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:04:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:04:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:04:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:04:23.409771  543705 memory.go:184] no items to output this cycle
I0320 13:04:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 13:04:24.442908  543705 disk_info.go:125] begin check local disk info of client
I0320 13:04:24.445345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:04:24.445352  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fae40 0xc0001faec0]
E0320 13:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:04:33.409788  543705 memory.go:184] no items to output this cycle
I0320 13:04:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 13:04:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:04:43.409780  543705 memory.go:191] Add success.
I0320 13:04:43.409801  543705 cpu.go:282] Add success.
I0320 13:04:43.420204  543705 net.go:648] Add success.
I0320 13:04:43.423014  543705 net.go:770] primary dev: ETH0
I0320 13:04:43.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:04:43.423038  543705 net.go:698] Add success.
I0320 13:04:46.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:04:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:04:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:04:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:04:53.409782  543705 memory.go:184] no items to output this cycle
I0320 13:04:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 13:05:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:05:03.409775  543705 memory.go:184] no items to output this cycle
I0320 13:05:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 13:05:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:05:13.409788  543705 memory.go:191] Add success.
I0320 13:05:13.409789  543705 cpu.go:282] Add success.
W0320 13:05:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:05:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:05:13.420147  543705 net.go:648] Add success.
I0320 13:05:13.422955  543705 net.go:770] primary dev: ETH0
I0320 13:05:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:05:13.422979  543705 net.go:698] Add success.
I0320 13:05:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:05:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:05:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 13:05:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:05:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 13:05:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:05:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:05:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:05:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:05:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:05:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:05:23.410409  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:05:23.410424  543705 memory.go:184] no items to output this cycle
I0320 13:05:23.410446  543705 cpu.go:275] no items to output this cycle
I0320 13:05:24.445930  543705 disk_info.go:125] begin check local disk info of client
I0320 13:05:24.448392  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:05:24.448398  543705 disk_info.go:196] parse disk info done, disk is : [0xc000533180 0xc0005331c0]
E0320 13:05:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:05:33.409784  543705 memory.go:184] no items to output this cycle
I0320 13:05:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 13:05:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:05:43.409831  543705 memory.go:191] Add success.
I0320 13:05:43.409835  543705 cpu.go:282] Add success.
I0320 13:05:43.419719  543705 net.go:648] Add success.
I0320 13:05:43.422530  543705 net.go:770] primary dev: ETH0
I0320 13:05:43.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:05:43.422554  543705 net.go:698] Add success.
I0320 13:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:05:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:05:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:05:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:05:53.409776  543705 memory.go:184] no items to output this cycle
I0320 13:05:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 13:06:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:06:03.409778  543705 memory.go:184] no items to output this cycle
I0320 13:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 13:06:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:06:13.409808  543705 memory.go:191] Add success.
I0320 13:06:13.409816  543705 cpu.go:282] Add success.
W0320 13:06:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:06:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:06:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:06:13.420059  543705 net.go:648] Add success.
I0320 13:06:13.422692  543705 net.go:770] primary dev: ETH0
I0320 13:06:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:06:13.422721  543705 net.go:698] Add success.
I0320 13:06:13.469840  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"657f17c0-ee99-4c7d-9406-49f240d7e7e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:06:13.469874  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:06:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:06:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:06:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 13:06:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:06:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 13:06:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:06:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:06:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:06:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:06:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:06:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:06:23.409793  543705 memory.go:184] no items to output this cycle
I0320 13:06:24.448934  543705 disk_info.go:125] begin check local disk info of client
I0320 13:06:24.451420  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:06:24.451425  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6500 0xc0002b6540]
E0320 13:06:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:06:33.409775  543705 memory.go:184] no items to output this cycle
I0320 13:06:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 13:06:38.483177  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:06:38.483183  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:06:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:06:43.410650  543705 memory.go:191] Add success.
I0320 13:06:43.409816  543705 cpu.go:282] Add success.
I0320 13:06:43.420642  543705 net.go:648] Add success.
I0320 13:06:43.423377  543705 net.go:770] primary dev: ETH0
I0320 13:06:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:06:43.423402  543705 net.go:698] Add success.
I0320 13:06:46.458015  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:06:46.458089  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:06:46.458125  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:06:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:06:53.409778  543705 memory.go:184] no items to output this cycle
I0320 13:06:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 13:07:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:07:03.409783  543705 memory.go:184] no items to output this cycle
I0320 13:07:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 13:07:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:07:13.409806  543705 memory.go:191] Add success.
I0320 13:07:13.409815  543705 cpu.go:282] Add success.
W0320 13:07:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:07:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:07:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:07:13.420085  543705 net.go:648] Add success.
I0320 13:07:13.422873  543705 net.go:770] primary dev: ETH0
I0320 13:07:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:07:13.422901  543705 net.go:698] Add success.
I0320 13:07:13.453446  543705 event_worker.go:152] Polling the log file for events...
W0320 13:07:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:07:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 13:07:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:07:14.456951  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:07:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:07:14.456967  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:07:14.457006  543705 disk_worker.go:494] system disk:vda1
I0320 13:07:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:07:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:07:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:07:16.457909  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:07:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:07:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:07:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:07:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:07:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:07:23.409785  543705 memory.go:184] no items to output this cycle
I0320 13:07:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 13:07:24.451941  543705 disk_info.go:125] begin check local disk info of client
I0320 13:07:24.454428  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:07:24.454434  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260b40 0xc000260b80]
E0320 13:07:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:07:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:07:33.409792  543705 memory.go:184] no items to output this cycle
E0320 13:07:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:07:43.409807  543705 memory.go:191] Add success.
I0320 13:07:43.409807  543705 cpu.go:282] Add success.
I0320 13:07:43.419756  543705 net.go:648] Add success.
I0320 13:07:43.422262  543705 net.go:770] primary dev: ETH0
I0320 13:07:43.422275  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:07:43.422288  543705 net.go:698] Add success.
I0320 13:07:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:07:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:07:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:07:53.409798  543705 memory.go:184] no items to output this cycle
I0320 13:07:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 13:08:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:08:03.409776  543705 cpu.go:275] no items to output this cycle
I0320 13:08:03.409780  543705 memory.go:184] no items to output this cycle
E0320 13:08:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:08:13.409787  543705 memory.go:191] Add success.
I0320 13:08:13.409786  543705 cpu.go:282] Add success.
W0320 13:08:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:08:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:08:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:08:13.420215  543705 net.go:648] Add success.
I0320 13:08:13.423285  543705 net.go:770] primary dev: ETH0
I0320 13:08:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:08:13.423310  543705 net.go:698] Add success.
I0320 13:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:08:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:08:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 13:08:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:08:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 13:08:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:08:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:08:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:08:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:08:23.409779  543705 memory.go:184] no items to output this cycle
I0320 13:08:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 13:08:24.454964  543705 disk_info.go:125] begin check local disk info of client
I0320 13:08:24.457455  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:08:24.457461  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b64c0 0xc0002b6500]
E0320 13:08:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:08:33.409782  543705 memory.go:184] no items to output this cycle
I0320 13:08:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 13:08:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:08:43.409790  543705 memory.go:191] Add success.
I0320 13:08:43.409821  543705 cpu.go:282] Add success.
I0320 13:08:43.420223  543705 net.go:648] Add success.
I0320 13:08:43.422894  543705 net.go:770] primary dev: ETH0
I0320 13:08:43.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:08:43.422923  543705 net.go:698] Add success.
I0320 13:08:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:08:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:08:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:08:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:08:53.409773  543705 memory.go:184] no items to output this cycle
I0320 13:08:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 13:09:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:09:03.409799  543705 memory.go:184] no items to output this cycle
I0320 13:09:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 13:09:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:09:13.409786  543705 memory.go:191] Add success.
I0320 13:09:13.409787  543705 cpu.go:282] Add success.
W0320 13:09:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:09:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:09:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:09:13.420122  543705 net.go:648] Add success.
I0320 13:09:13.423035  543705 net.go:770] primary dev: ETH0
I0320 13:09:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:09:13.423059  543705 net.go:698] Add success.
I0320 13:09:13.470626  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6402604d-f1bd-497c-a7fb-611381a46881","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:09:13.470660  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:09:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:09:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 13:09:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:09:14.456515  543705 disk_worker.go:494] system disk:vda1
I0320 13:09:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:09:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:09:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:09:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:09:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:09:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:09:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:09:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 13:09:23.409791  543705 memory.go:184] no items to output this cycle
I0320 13:09:24.457975  543705 disk_info.go:125] begin check local disk info of client
I0320 13:09:24.460451  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:09:24.460457  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004af800 0xc0004af840]
E0320 13:09:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:09:33.409790  543705 memory.go:184] no items to output this cycle
I0320 13:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 13:09:38.484166  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:09:38.484173  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:09:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:09:43.410571  543705 memory.go:191] Add success.
I0320 13:09:43.409817  543705 cpu.go:282] Add success.
I0320 13:09:43.420287  543705 net.go:648] Add success.
I0320 13:09:43.422853  543705 net.go:770] primary dev: ETH0
I0320 13:09:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:09:43.422879  543705 net.go:698] Add success.
I0320 13:09:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:09:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:09:53.409906  543705 cpu.go:275] no items to output this cycle
E0320 13:09:53.409921  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:09:53.409973  543705 memory.go:184] no items to output this cycle
E0320 13:10:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:10:03.409782  543705 memory.go:184] no items to output this cycle
I0320 13:10:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 13:10:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:10:13.409779  543705 memory.go:191] Add success.
I0320 13:10:13.409802  543705 cpu.go:282] Add success.
W0320 13:10:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:10:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:10:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:10:13.420122  543705 net.go:648] Add success.
I0320 13:10:13.422930  543705 net.go:770] primary dev: ETH0
I0320 13:10:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:10:13.422955  543705 net.go:698] Add success.
I0320 13:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:10:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:10:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 13:10:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:10:14.456561  543705 disk_worker.go:494] system disk:vda1
I0320 13:10:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:10:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:10:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:10:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:10:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:10:23.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:10:23.410400  543705 cpu.go:275] no items to output this cycle
I0320 13:10:23.410404  543705 memory.go:184] no items to output this cycle
I0320 13:10:24.460994  543705 disk_info.go:125] begin check local disk info of client
I0320 13:10:24.463473  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:10:24.463480  543705 disk_info.go:196] parse disk info done, disk is : [0xc000347740 0xc000347780]
E0320 13:10:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:10:33.409778  543705 memory.go:184] no items to output this cycle
I0320 13:10:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 13:10:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:10:43.409816  543705 memory.go:191] Add success.
I0320 13:10:43.409817  543705 cpu.go:282] Add success.
I0320 13:10:43.419895  543705 net.go:648] Add success.
I0320 13:10:43.423006  543705 net.go:770] primary dev: ETH0
I0320 13:10:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:10:43.423032  543705 net.go:698] Add success.
I0320 13:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:10:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:10:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:10:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:10:53.409786  543705 cpu.go:275] no items to output this cycle
I0320 13:10:53.409795  543705 memory.go:184] no items to output this cycle
E0320 13:11:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:11:03.409768  543705 memory.go:184] no items to output this cycle
I0320 13:11:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 13:11:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:11:13.409811  543705 memory.go:191] Add success.
I0320 13:11:13.409825  543705 cpu.go:282] Add success.
W0320 13:11:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:11:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:11:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:11:13.420147  543705 net.go:648] Add success.
I0320 13:11:13.422850  543705 net.go:770] primary dev: ETH0
I0320 13:11:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:11:13.422874  543705 net.go:698] Add success.
I0320 13:11:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:11:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:11:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 13:11:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:11:14.456584  543705 disk_worker.go:494] system disk:vda1
I0320 13:11:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:11:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:11:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:11:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:11:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:11:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:11:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 13:11:23.409796  543705 memory.go:184] no items to output this cycle
I0320 13:11:24.464015  543705 disk_info.go:125] begin check local disk info of client
I0320 13:11:24.466494  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:11:24.466500  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb080 0xc0001fb0c0]
E0320 13:11:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:11:33.409785  543705 memory.go:184] no items to output this cycle
I0320 13:11:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 13:11:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:11:43.409840  543705 memory.go:191] Add success.
I0320 13:11:43.409842  543705 cpu.go:282] Add success.
I0320 13:11:43.419995  543705 net.go:648] Add success.
I0320 13:11:43.423080  543705 net.go:770] primary dev: ETH0
I0320 13:11:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:11:43.423109  543705 net.go:698] Add success.
I0320 13:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:11:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:11:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:11:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:11:53.409791  543705 memory.go:184] no items to output this cycle
I0320 13:11:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 13:12:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:12:03.409806  543705 memory.go:184] no items to output this cycle
I0320 13:12:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 13:12:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:12:13.409792  543705 memory.go:191] Add success.
I0320 13:12:13.409811  543705 cpu.go:282] Add success.
W0320 13:12:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:12:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:12:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:12:13.420054  543705 net.go:648] Add success.
I0320 13:12:13.422624  543705 net.go:770] primary dev: ETH0
I0320 13:12:13.422638  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:12:13.422652  543705 net.go:698] Add success.
I0320 13:12:13.464598  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"45484058-aea0-431d-b8f2-62f8b8b0be91","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:12:13.464632  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 13:12:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:12:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 13:12:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:12:14.455899  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:12:14.455908  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:12:14.455913  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:12:14.456555  543705 disk_worker.go:494] system disk:vda1
I0320 13:12:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:12:15.456886  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:12:15.456894  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:12:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:12:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:12:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:12:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:12:16.472312  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:12:23.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:12:23.409890  543705 memory.go:184] no items to output this cycle
I0320 13:12:23.409916  543705 cpu.go:275] no items to output this cycle
I0320 13:12:24.467028  543705 disk_info.go:125] begin check local disk info of client
I0320 13:12:24.469543  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:12:24.469549  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae00 0xc0001aae40]
E0320 13:12:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:12:33.409802  543705 memory.go:184] no items to output this cycle
I0320 13:12:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 13:12:38.485178  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:12:38.485185  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:12:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:12:43.410994  543705 memory.go:191] Add success.
I0320 13:12:43.409828  543705 cpu.go:282] Add success.
I0320 13:12:43.419946  543705 net.go:648] Add success.
I0320 13:12:43.422553  543705 net.go:770] primary dev: ETH0
I0320 13:12:43.422567  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:12:43.422579  543705 net.go:698] Add success.
I0320 13:12:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:12:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:12:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:12:53.409772  543705 memory.go:184] no items to output this cycle
I0320 13:12:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 13:13:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:13:03.409807  543705 memory.go:184] no items to output this cycle
I0320 13:13:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 13:13:13.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:13:13.409775  543705 memory.go:191] Add success.
W0320 13:13:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:13:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:13:13.409824  543705 cpu.go:282] Add success.
I0320 13:13:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:13:13.420067  543705 net.go:648] Add success.
I0320 13:13:13.423067  543705 net.go:770] primary dev: ETH0
I0320 13:13:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:13:13.423093  543705 net.go:698] Add success.
I0320 13:13:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:13:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:13:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 13:13:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:13:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 13:13:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:13:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:13:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:13:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:13:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:13:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:13:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:13:23.409773  543705 memory.go:184] no items to output this cycle
I0320 13:13:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 13:13:24.470046  543705 disk_info.go:125] begin check local disk info of client
I0320 13:13:24.472532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:13:24.472537  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ea00 0xc00039ea40]
I0320 13:13:33.409906  543705 cpu.go:275] no items to output this cycle
E0320 13:13:33.410045  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:13:33.410059  543705 memory.go:184] no items to output this cycle
E0320 13:13:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:13:43.409826  543705 memory.go:191] Add success.
I0320 13:13:43.409886  543705 cpu.go:282] Add success.
I0320 13:13:43.420229  543705 net.go:648] Add success.
I0320 13:13:43.423086  543705 net.go:770] primary dev: ETH0
I0320 13:13:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:13:43.423114  543705 net.go:698] Add success.
I0320 13:13:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:13:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:13:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:13:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:13:53.410264  543705 memory.go:184] no items to output this cycle
I0320 13:13:53.410269  543705 cpu.go:275] no items to output this cycle
E0320 13:14:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:14:03.409769  543705 memory.go:184] no items to output this cycle
I0320 13:14:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 13:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:14:13.409791  543705 memory.go:191] Add success.
I0320 13:14:13.409793  543705 cpu.go:282] Add success.
W0320 13:14:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:14:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:14:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:14:13.420056  543705 net.go:648] Add success.
I0320 13:14:13.422738  543705 net.go:770] primary dev: ETH0
I0320 13:14:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:14:13.422764  543705 net.go:698] Add success.
I0320 13:14:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:14:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:14:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 13:14:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:14:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 13:14:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:14:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:14:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:14:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:14:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:14:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:14:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:14:23.409777  543705 memory.go:184] no items to output this cycle
I0320 13:14:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 13:14:24.473101  543705 disk_info.go:125] begin check local disk info of client
I0320 13:14:24.475676  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:14:24.475683  543705 disk_info.go:196] parse disk info done, disk is : [0xc000320000 0xc000320040]
E0320 13:14:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:14:33.409789  543705 memory.go:184] no items to output this cycle
I0320 13:14:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 13:14:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:14:43.409815  543705 memory.go:191] Add success.
I0320 13:14:43.409819  543705 cpu.go:282] Add success.
I0320 13:14:43.419986  543705 net.go:648] Add success.
I0320 13:14:43.422683  543705 net.go:770] primary dev: ETH0
I0320 13:14:43.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:14:43.422709  543705 net.go:698] Add success.
I0320 13:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:14:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:14:53.409800  543705 memory.go:184] no items to output this cycle
I0320 13:14:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 13:15:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:15:03.409770  543705 memory.go:184] no items to output this cycle
I0320 13:15:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 13:15:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:15:13.409806  543705 memory.go:191] Add success.
I0320 13:15:13.409813  543705 cpu.go:282] Add success.
W0320 13:15:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:15:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:15:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:15:13.420133  543705 net.go:648] Add success.
I0320 13:15:13.422989  543705 net.go:770] primary dev: ETH0
I0320 13:15:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:15:13.423017  543705 net.go:698] Add success.
I0320 13:15:13.469522  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"64bbc47e-ec84-4abf-831e-12fe511cfbf8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:15:13.469555  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:15:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:15:14.455232  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:15:14.455243  543705 disk_worker.go:708] disk space is not compliant
W0320 13:15:14.455246  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:15:14.456948  543705 disk_worker.go:494] system disk:vda1
I0320 13:15:14.457006  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:15:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:15:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:15:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:15:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:15:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:15:23.409773  543705 memory.go:184] no items to output this cycle
I0320 13:15:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 13:15:24.475761  543705 disk_info.go:125] begin check local disk info of client
I0320 13:15:24.478289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:15:24.478294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002831c0 0xc000283200]
E0320 13:15:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:15:33.409777  543705 memory.go:184] no items to output this cycle
I0320 13:15:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 13:15:38.486187  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:15:38.486194  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:15:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:15:43.409822  543705 memory.go:191] Add success.
I0320 13:15:43.409825  543705 cpu.go:282] Add success.
I0320 13:15:43.420224  543705 net.go:648] Add success.
I0320 13:15:43.421285  543705 net.go:770] primary dev: ETH0
I0320 13:15:43.421301  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:15:43.421314  543705 net.go:698] Add success.
I0320 13:15:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:15:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:15:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:15:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:15:53.409786  543705 cpu.go:275] no items to output this cycle
I0320 13:15:53.409794  543705 memory.go:184] no items to output this cycle
E0320 13:16:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:16:03.409780  543705 memory.go:184] no items to output this cycle
I0320 13:16:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 13:16:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:16:13.409782  543705 memory.go:191] Add success.
W0320 13:16:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 13:16:13.409809  543705 cpu.go:282] Add success.
W0320 13:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:16:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:16:13.420142  543705 net.go:648] Add success.
I0320 13:16:13.422940  543705 net.go:770] primary dev: ETH0
I0320 13:16:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:16:13.422966  543705 net.go:698] Add success.
I0320 13:16:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:16:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:16:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 13:16:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:16:14.456829  543705 disk_worker.go:494] system disk:vda1
I0320 13:16:14.456877  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:16:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:16:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:16:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:16:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:16:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:16:23.409775  543705 memory.go:184] no items to output this cycle
I0320 13:16:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 13:16:24.479083  543705 disk_info.go:125] begin check local disk info of client
I0320 13:16:24.481489  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:16:24.481496  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0320 13:16:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:16:33.409807  543705 memory.go:184] no items to output this cycle
I0320 13:16:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 13:16:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:16:43.409787  543705 memory.go:191] Add success.
I0320 13:16:43.409799  543705 cpu.go:282] Add success.
I0320 13:16:43.419905  543705 net.go:648] Add success.
I0320 13:16:43.422878  543705 net.go:770] primary dev: ETH0
I0320 13:16:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:16:43.422907  543705 net.go:698] Add success.
I0320 13:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:16:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:16:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:16:53.410332  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:16:53.410348  543705 memory.go:184] no items to output this cycle
I0320 13:16:53.410354  543705 cpu.go:275] no items to output this cycle
E0320 13:17:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:17:03.409783  543705 memory.go:184] no items to output this cycle
I0320 13:17:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 13:17:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:17:13.409796  543705 memory.go:191] Add success.
I0320 13:17:13.409797  543705 cpu.go:282] Add success.
W0320 13:17:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:17:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:17:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:17:13.420490  543705 net.go:648] Add success.
I0320 13:17:13.423680  543705 net.go:770] primary dev: ETH0
I0320 13:17:13.423694  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:17:13.423705  543705 net.go:698] Add success.
I0320 13:17:13.452772  543705 event_worker.go:152] Polling the log file for events...
W0320 13:17:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:17:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 13:17:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:17:14.455945  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:17:14.455954  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:17:14.455960  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:17:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 13:17:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:17:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:17:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:17:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:17:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:17:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:17:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:17:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:17:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:17:23.409775  543705 memory.go:184] no items to output this cycle
I0320 13:17:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 13:17:24.482092  543705 disk_info.go:125] begin check local disk info of client
I0320 13:17:24.484505  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:17:24.484511  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004744c0 0xc000474500]
E0320 13:17:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:17:33.409818  543705 memory.go:184] no items to output this cycle
I0320 13:17:33.409833  543705 cpu.go:275] no items to output this cycle
E0320 13:17:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:17:43.409804  543705 memory.go:191] Add success.
I0320 13:17:43.409866  543705 cpu.go:282] Add success.
I0320 13:17:43.420264  543705 net.go:648] Add success.
I0320 13:17:43.423301  543705 net.go:770] primary dev: ETH0
I0320 13:17:43.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:17:43.423341  543705 net.go:698] Add success.
I0320 13:17:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:17:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:17:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:17:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:17:53.409769  543705 memory.go:184] no items to output this cycle
I0320 13:17:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 13:18:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:18:03.409784  543705 memory.go:184] no items to output this cycle
I0320 13:18:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 13:18:13.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:18:13.409929  543705 memory.go:191] Add success.
W0320 13:18:13.409979  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 13:18:13.409979  543705 cpu.go:282] Add success.
W0320 13:18:13.409997  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:18:13.410002  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:18:13.419734  543705 net.go:648] Add success.
I0320 13:18:13.422404  543705 net.go:770] primary dev: ETH0
I0320 13:18:13.422417  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:18:13.422429  543705 net.go:698] Add success.
I0320 13:18:13.469172  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aa0669ec-6d41-4fa9-8f40-6fb256539820","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:18:13.469212  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:18:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:18:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:18:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 13:18:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:18:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 13:18:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:18:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:18:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:18:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:18:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:18:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:18:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:18:23.409785  543705 memory.go:184] no items to output this cycle
I0320 13:18:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 13:18:24.485112  543705 disk_info.go:125] begin check local disk info of client
I0320 13:18:24.487540  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:18:24.487545  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac3c0 0xc0002ac440]
E0320 13:18:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:18:33.409794  543705 memory.go:184] no items to output this cycle
I0320 13:18:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 13:18:38.487187  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:18:38.487194  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:18:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:18:43.410757  543705 memory.go:191] Add success.
I0320 13:18:43.409846  543705 cpu.go:282] Add success.
I0320 13:18:43.420322  543705 net.go:770] primary dev: ETH0
I0320 13:18:43.420336  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:18:43.420348  543705 net.go:698] Add success.
I0320 13:18:43.420588  543705 net.go:648] Add success.
I0320 13:18:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:18:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:18:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:18:53.409781  543705 memory.go:184] no items to output this cycle
I0320 13:18:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 13:19:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:19:03.409814  543705 memory.go:184] no items to output this cycle
I0320 13:19:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 13:19:13.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:19:13.409893  543705 memory.go:191] Add success.
W0320 13:19:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:19:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:19:13.409937  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:19:13.409953  543705 cpu.go:282] Add success.
I0320 13:19:13.419756  543705 net.go:648] Add success.
I0320 13:19:13.422483  543705 net.go:770] primary dev: ETH0
I0320 13:19:13.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:19:13.422511  543705 net.go:698] Add success.
I0320 13:19:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:19:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:19:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 13:19:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:19:14.456557  543705 disk_worker.go:494] system disk:vda1
I0320 13:19:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:19:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:19:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:19:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:19:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:19:23.410675  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:19:23.410691  543705 memory.go:184] no items to output this cycle
I0320 13:19:23.410720  543705 cpu.go:275] no items to output this cycle
I0320 13:19:24.488135  543705 disk_info.go:125] begin check local disk info of client
I0320 13:19:24.490561  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:19:24.490566  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048db80 0xc00048dbc0]
E0320 13:19:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:19:33.409816  543705 memory.go:184] no items to output this cycle
I0320 13:19:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 13:19:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:19:43.409780  543705 memory.go:191] Add success.
I0320 13:19:43.409808  543705 cpu.go:282] Add success.
I0320 13:19:43.419901  543705 net.go:648] Add success.
I0320 13:19:43.422833  543705 net.go:770] primary dev: ETH0
I0320 13:19:43.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:19:43.422861  543705 net.go:698] Add success.
I0320 13:19:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:19:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:19:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:19:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:19:53.409773  543705 memory.go:184] no items to output this cycle
I0320 13:19:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 13:20:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:20:03.409796  543705 memory.go:184] no items to output this cycle
I0320 13:20:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 13:20:13.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:20:13.409941  543705 memory.go:191] Add success.
I0320 13:20:13.409967  543705 cpu.go:282] Add success.
W0320 13:20:13.409980  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:20:13.410003  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:20:13.410008  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:20:13.419714  543705 net.go:648] Add success.
I0320 13:20:13.422400  543705 net.go:770] primary dev: ETH0
I0320 13:20:13.422413  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:20:13.422424  543705 net.go:698] Add success.
I0320 13:20:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:20:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:20:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 13:20:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:20:14.456561  543705 disk_worker.go:494] system disk:vda1
I0320 13:20:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:20:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:20:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:20:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:20:23.409800  543705 memory.go:184] no items to output this cycle
I0320 13:20:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 13:20:24.491144  543705 disk_info.go:125] begin check local disk info of client
I0320 13:20:24.493565  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:20:24.493570  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048cc40 0xc00048cc80]
E0320 13:20:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:20:33.409807  543705 memory.go:184] no items to output this cycle
I0320 13:20:33.409822  543705 cpu.go:275] no items to output this cycle
E0320 13:20:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:20:43.409778  543705 memory.go:191] Add success.
I0320 13:20:43.409806  543705 cpu.go:282] Add success.
I0320 13:20:43.419900  543705 net.go:648] Add success.
I0320 13:20:43.422615  543705 net.go:770] primary dev: ETH0
I0320 13:20:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:20:43.422644  543705 net.go:698] Add success.
I0320 13:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:20:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:20:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:20:53.409806  543705 memory.go:184] no items to output this cycle
I0320 13:20:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 13:21:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:21:03.409770  543705 memory.go:184] no items to output this cycle
I0320 13:21:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 13:21:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:21:13.409788  543705 memory.go:191] Add success.
I0320 13:21:13.409803  543705 cpu.go:282] Add success.
W0320 13:21:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:21:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:21:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:21:13.419736  543705 net.go:648] Add success.
I0320 13:21:13.422666  543705 net.go:770] primary dev: ETH0
I0320 13:21:13.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:21:13.422690  543705 net.go:698] Add success.
I0320 13:21:13.469123  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"90c2c9db-c422-4d79-bce5-94fecd47114f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:21:13.469155  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:21:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:21:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:21:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 13:21:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:21:14.456539  543705 disk_worker.go:494] system disk:vda1
I0320 13:21:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:21:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:21:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:21:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:21:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:21:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:21:23.409800  543705 memory.go:184] no items to output this cycle
I0320 13:21:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 13:21:24.494162  543705 disk_info.go:125] begin check local disk info of client
I0320 13:21:24.496582  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:21:24.496587  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ca80 0xc00035cac0]
E0320 13:21:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:21:33.409783  543705 memory.go:184] no items to output this cycle
I0320 13:21:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 13:21:38.488198  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:21:38.488205  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:21:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:21:43.410557  543705 memory.go:191] Add success.
I0320 13:21:43.409808  543705 cpu.go:282] Add success.
I0320 13:21:43.420298  543705 net.go:648] Add success.
I0320 13:21:43.422982  543705 net.go:770] primary dev: ETH0
I0320 13:21:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:21:43.423008  543705 net.go:698] Add success.
I0320 13:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:21:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:21:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:21:53.410214  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:21:53.410230  543705 memory.go:184] no items to output this cycle
I0320 13:21:53.410237  543705 cpu.go:275] no items to output this cycle
E0320 13:22:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:22:03.409770  543705 memory.go:184] no items to output this cycle
I0320 13:22:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 13:22:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:22:13.409791  543705 memory.go:191] Add success.
I0320 13:22:13.409812  543705 cpu.go:282] Add success.
W0320 13:22:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:22:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:22:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:22:13.419708  543705 net.go:648] Add success.
I0320 13:22:13.422398  543705 net.go:770] primary dev: ETH0
I0320 13:22:13.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:22:13.422422  543705 net.go:698] Add success.
W0320 13:22:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:22:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 13:22:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:22:14.455855  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:22:14.455864  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:22:14.455870  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:22:14.456612  543705 disk_worker.go:494] system disk:vda1
I0320 13:22:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:22:15.456855  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:22:15.456864  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:22:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:22:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:22:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:22:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:22:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:22:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:22:23.409775  543705 memory.go:184] no items to output this cycle
I0320 13:22:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 13:22:24.496856  543705 disk_info.go:125] begin check local disk info of client
I0320 13:22:24.499352  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:22:24.499357  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c47c0 0xc0000c4800]
E0320 13:22:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:22:33.409799  543705 memory.go:184] no items to output this cycle
I0320 13:22:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 13:22:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:22:43.409823  543705 memory.go:191] Add success.
I0320 13:22:43.409827  543705 cpu.go:282] Add success.
I0320 13:22:43.419979  543705 net.go:648] Add success.
I0320 13:22:43.422532  543705 net.go:770] primary dev: ETH0
I0320 13:22:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:22:43.422557  543705 net.go:698] Add success.
I0320 13:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:22:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:22:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:22:53.410411  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:22:53.410428  543705 memory.go:184] no items to output this cycle
I0320 13:22:53.410450  543705 cpu.go:275] no items to output this cycle
E0320 13:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:23:03.409773  543705 memory.go:184] no items to output this cycle
I0320 13:23:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 13:23:13.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:23:13.409908  543705 memory.go:191] Add success.
W0320 13:23:13.409935  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:23:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:23:13.409955  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:23:13.409990  543705 cpu.go:282] Add success.
I0320 13:23:13.419717  543705 net.go:648] Add success.
I0320 13:23:13.422640  543705 net.go:770] primary dev: ETH0
I0320 13:23:13.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:23:13.422668  543705 net.go:698] Add success.
I0320 13:23:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:23:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:23:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 13:23:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:23:14.456502  543705 disk_worker.go:494] system disk:vda1
I0320 13:23:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:23:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:23:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:23:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:23:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:23:23.409784  543705 memory.go:184] no items to output this cycle
I0320 13:23:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:23:24.500187  543705 disk_info.go:125] begin check local disk info of client
I0320 13:23:24.502627  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:23:24.502633  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475240 0xc000475280]
E0320 13:23:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:23:33.409806  543705 memory.go:184] no items to output this cycle
I0320 13:23:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 13:23:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:23:43.409825  543705 memory.go:191] Add success.
I0320 13:23:43.409830  543705 cpu.go:282] Add success.
I0320 13:23:43.419978  543705 net.go:648] Add success.
I0320 13:23:43.422935  543705 net.go:770] primary dev: ETH0
I0320 13:23:43.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:23:43.422962  543705 net.go:698] Add success.
I0320 13:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:23:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:23:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:23:53.409783  543705 memory.go:184] no items to output this cycle
I0320 13:23:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 13:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:24:03.409773  543705 memory.go:184] no items to output this cycle
I0320 13:24:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 13:24:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:24:13.409900  543705 memory.go:191] Add success.
I0320 13:24:13.409901  543705 cpu.go:282] Add success.
W0320 13:24:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:24:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:24:13.409957  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:24:13.419715  543705 net.go:648] Add success.
I0320 13:24:13.422426  543705 net.go:770] primary dev: ETH0
I0320 13:24:13.422438  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:24:13.422450  543705 net.go:698] Add success.
I0320 13:24:13.543883  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"173252a4-ebcb-46c3-bd5b-6545a3170a3b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:24:13.543914  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:24:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:24:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:24:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 13:24:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:24:14.456662  543705 disk_worker.go:494] system disk:vda1
I0320 13:24:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:24:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:24:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:24:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:24:23.410689  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:24:23.410705  543705 memory.go:184] no items to output this cycle
I0320 13:24:23.410734  543705 cpu.go:275] no items to output this cycle
I0320 13:24:24.503189  543705 disk_info.go:125] begin check local disk info of client
I0320 13:24:24.505688  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:24:24.505694  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f900 0xc00039f940]
E0320 13:24:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:24:33.409812  543705 memory.go:184] no items to output this cycle
I0320 13:24:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 13:24:38.489199  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:24:38.489206  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:24:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:24:43.410739  543705 memory.go:191] Add success.
I0320 13:24:43.409808  543705 cpu.go:282] Add success.
I0320 13:24:43.420447  543705 net.go:648] Add success.
I0320 13:24:43.423458  543705 net.go:770] primary dev: ETH0
I0320 13:24:43.423471  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:24:43.423484  543705 net.go:698] Add success.
I0320 13:24:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:24:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:24:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:24:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:24:53.409774  543705 memory.go:184] no items to output this cycle
I0320 13:24:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 13:25:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:25:03.409804  543705 memory.go:184] no items to output this cycle
I0320 13:25:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 13:25:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:25:13.409784  543705 memory.go:191] Add success.
I0320 13:25:13.409807  543705 cpu.go:282] Add success.
W0320 13:25:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:25:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:25:13.419749  543705 net.go:648] Add success.
I0320 13:25:13.422706  543705 net.go:770] primary dev: ETH0
I0320 13:25:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:25:13.422730  543705 net.go:698] Add success.
I0320 13:25:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:25:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:25:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 13:25:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:25:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 13:25:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:25:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:25:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:25:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:25:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:25:23.410643  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:25:23.410658  543705 memory.go:184] no items to output this cycle
I0320 13:25:23.410687  543705 cpu.go:275] no items to output this cycle
I0320 13:25:24.506173  543705 disk_info.go:125] begin check local disk info of client
I0320 13:25:24.508593  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:25:24.508599  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be80 0xc00007bec0]
E0320 13:25:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:25:33.409785  543705 memory.go:184] no items to output this cycle
I0320 13:25:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 13:25:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:25:43.409813  543705 memory.go:191] Add success.
I0320 13:25:43.409820  543705 cpu.go:282] Add success.
I0320 13:25:43.420026  543705 net.go:648] Add success.
I0320 13:25:43.422815  543705 net.go:770] primary dev: ETH0
I0320 13:25:43.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:25:43.422840  543705 net.go:698] Add success.
I0320 13:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:25:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:25:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:25:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:25:53.409782  543705 memory.go:184] no items to output this cycle
I0320 13:25:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 13:26:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:26:03.409779  543705 memory.go:184] no items to output this cycle
I0320 13:26:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 13:26:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:26:13.409823  543705 memory.go:191] Add success.
I0320 13:26:13.409832  543705 cpu.go:282] Add success.
W0320 13:26:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:26:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:26:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:26:13.420182  543705 net.go:648] Add success.
I0320 13:26:13.422972  543705 net.go:770] primary dev: ETH0
I0320 13:26:13.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:26:13.422997  543705 net.go:698] Add success.
I0320 13:26:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:26:14.455311  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:26:14.455322  543705 disk_worker.go:708] disk space is not compliant
W0320 13:26:14.455329  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:26:14.457486  543705 disk_worker.go:494] system disk:vda1
I0320 13:26:14.457529  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:26:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:26:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:26:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:26:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:26:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:26:23.410256  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:26:23.410266  543705 cpu.go:275] no items to output this cycle
I0320 13:26:23.410273  543705 memory.go:184] no items to output this cycle
I0320 13:26:24.509241  543705 disk_info.go:125] begin check local disk info of client
I0320 13:26:24.511752  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:26:24.511758  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a000 0xc00046a040]
E0320 13:26:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:26:33.409792  543705 memory.go:184] no items to output this cycle
I0320 13:26:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 13:26:43.410426  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:26:43.410453  543705 memory.go:191] Add success.
I0320 13:26:43.410456  543705 cpu.go:282] Add success.
I0320 13:26:43.420606  543705 net.go:648] Add success.
I0320 13:26:43.423516  543705 net.go:770] primary dev: ETH0
I0320 13:26:43.423530  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:26:43.423543  543705 net.go:698] Add success.
I0320 13:26:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:26:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:26:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:26:53.409784  543705 cpu.go:275] no items to output this cycle
I0320 13:26:53.409788  543705 memory.go:184] no items to output this cycle
E0320 13:27:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:27:03.409802  543705 memory.go:184] no items to output this cycle
I0320 13:27:03.409810  543705 cpu.go:275] no items to output this cycle
W0320 13:27:13.409704  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0320 13:27:13.409713  543705 conf_downlod.go:89] use old conf
E0320 13:27:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:27:13.409808  543705 memory.go:191] Add success.
I0320 13:27:13.409820  543705 cpu.go:282] Add success.
W0320 13:27:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:27:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:27:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:27:13.420252  543705 net.go:648] Add success.
I0320 13:27:13.429416  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 13:27:13.429517  543705 net.go:770] primary dev: ETH0
I0320 13:27:13.429530  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:27:13.429540  543705 net.go:698] Add success.
I0320 13:27:13.453149  543705 event_worker.go:152] Polling the log file for events...
I0320 13:27:13.468570  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef0eca33-ec42-420c-864c-7335e4f8102a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:27:13.468607  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 13:27:14.455358  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:27:14.455372  543705 disk_worker.go:708] disk space is not compliant
W0320 13:27:14.455377  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:27:14.456228  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:27:14.456991  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:27:14.456998  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:27:14.458001  543705 disk_worker.go:494] system disk:vda1
I0320 13:27:14.458033  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:27:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:27:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:27:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:27:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:27:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:27:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:27:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:27:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:27:23.409773  543705 cpu.go:275] no items to output this cycle
I0320 13:27:23.409785  543705 memory.go:184] no items to output this cycle
I0320 13:27:24.511836  543705 disk_info.go:125] begin check local disk info of client
I0320 13:27:24.514300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:27:24.514305  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa640 0xc0001fa680]
E0320 13:27:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:27:33.409806  543705 memory.go:184] no items to output this cycle
I0320 13:27:33.409816  543705 cpu.go:275] no items to output this cycle
I0320 13:27:38.490206  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:27:38.490213  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:27:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:27:43.410742  543705 memory.go:191] Add success.
I0320 13:27:43.409791  543705 cpu.go:282] Add success.
I0320 13:27:43.420436  543705 net.go:648] Add success.
I0320 13:27:43.423440  543705 net.go:770] primary dev: ETH0
I0320 13:27:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:27:43.423465  543705 net.go:698] Add success.
I0320 13:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:27:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:27:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:27:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:27:53.409779  543705 memory.go:184] no items to output this cycle
I0320 13:27:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 13:28:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:28:03.409780  543705 memory.go:184] no items to output this cycle
I0320 13:28:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 13:28:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:28:13.409811  543705 memory.go:191] Add success.
I0320 13:28:13.409820  543705 cpu.go:282] Add success.
W0320 13:28:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:28:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:28:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:28:13.420075  543705 net.go:648] Add success.
I0320 13:28:13.423210  543705 net.go:770] primary dev: ETH0
I0320 13:28:13.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:28:13.423240  543705 net.go:698] Add success.
I0320 13:28:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:28:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:28:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 13:28:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:28:14.459278  543705 disk_worker.go:494] system disk:vda1
I0320 13:28:14.459307  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:28:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:28:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:28:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:28:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:28:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:28:23.409786  543705 memory.go:184] no items to output this cycle
I0320 13:28:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:28:24.515267  543705 disk_info.go:125] begin check local disk info of client
I0320 13:28:24.517778  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:28:24.517784  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5680 0xc0000c56c0]
E0320 13:28:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:28:33.409779  543705 memory.go:184] no items to output this cycle
I0320 13:28:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 13:28:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:28:43.409798  543705 memory.go:191] Add success.
I0320 13:28:43.409821  543705 cpu.go:282] Add success.
I0320 13:28:43.419910  543705 net.go:648] Add success.
I0320 13:28:43.422480  543705 net.go:770] primary dev: ETH0
I0320 13:28:43.422496  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:28:43.422511  543705 net.go:698] Add success.
I0320 13:28:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:28:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:28:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:28:53.409792  543705 memory.go:184] no items to output this cycle
I0320 13:28:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 13:29:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:29:03.409781  543705 memory.go:184] no items to output this cycle
I0320 13:29:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 13:29:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:29:13.409781  543705 memory.go:191] Add success.
W0320 13:29:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 13:29:13.409806  543705 cpu.go:282] Add success.
W0320 13:29:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:29:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:29:13.420158  543705 net.go:648] Add success.
I0320 13:29:13.422780  543705 net.go:770] primary dev: ETH0
I0320 13:29:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:29:13.422805  543705 net.go:698] Add success.
I0320 13:29:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:29:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:29:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 13:29:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:29:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 13:29:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:29:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:29:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:29:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:29:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:29:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:29:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:29:23.409775  543705 memory.go:184] no items to output this cycle
I0320 13:29:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 13:29:24.517864  543705 disk_info.go:125] begin check local disk info of client
I0320 13:29:24.520379  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:29:24.520386  543705 disk_info.go:196] parse disk info done, disk is : [0xc000257440 0xc000257480]
E0320 13:29:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:29:33.409778  543705 memory.go:184] no items to output this cycle
I0320 13:29:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 13:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:29:43.409805  543705 memory.go:191] Add success.
I0320 13:29:43.409819  543705 cpu.go:282] Add success.
I0320 13:29:43.419966  543705 net.go:648] Add success.
I0320 13:29:43.422632  543705 net.go:770] primary dev: ETH0
I0320 13:29:43.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:29:43.422674  543705 net.go:698] Add success.
I0320 13:29:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:29:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:29:46.458107  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:29:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:29:53.409818  543705 memory.go:184] no items to output this cycle
I0320 13:29:53.409830  543705 cpu.go:275] no items to output this cycle
E0320 13:30:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:30:03.409796  543705 memory.go:184] no items to output this cycle
I0320 13:30:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 13:30:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:30:13.409794  543705 memory.go:191] Add success.
W0320 13:30:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 13:30:13.409826  543705 cpu.go:282] Add success.
W0320 13:30:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:30:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:30:13.420192  543705 net.go:648] Add success.
I0320 13:30:13.423195  543705 net.go:770] primary dev: ETH0
I0320 13:30:13.423217  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:30:13.423231  543705 net.go:698] Add success.
I0320 13:30:13.467841  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3fe4d411-a19f-4ac8-8bb5-41c7a09a8829","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:30:13.467874  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:30:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:30:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:30:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 13:30:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:30:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 13:30:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:30:15.456016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:30:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:30:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:30:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:30:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:30:23.409777  543705 memory.go:184] no items to output this cycle
I0320 13:30:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 13:30:24.521300  543705 disk_info.go:125] begin check local disk info of client
I0320 13:30:24.523769  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:30:24.523774  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0320 13:30:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:30:33.409797  543705 memory.go:184] no items to output this cycle
I0320 13:30:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 13:30:38.491209  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:30:38.491216  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:30:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:30:43.410654  543705 memory.go:191] Add success.
I0320 13:30:43.409806  543705 cpu.go:282] Add success.
I0320 13:30:43.420389  543705 net.go:648] Add success.
I0320 13:30:43.423270  543705 net.go:770] primary dev: ETH0
I0320 13:30:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:30:43.423296  543705 net.go:698] Add success.
I0320 13:30:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:30:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:30:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:30:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:30:53.409823  543705 memory.go:184] no items to output this cycle
I0320 13:30:53.409835  543705 cpu.go:275] no items to output this cycle
E0320 13:31:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:31:03.409787  543705 memory.go:184] no items to output this cycle
I0320 13:31:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 13:31:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:31:13.409822  543705 memory.go:191] Add success.
I0320 13:31:13.409832  543705 cpu.go:282] Add success.
W0320 13:31:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:31:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:31:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:31:13.420071  543705 net.go:648] Add success.
I0320 13:31:13.422906  543705 net.go:770] primary dev: ETH0
I0320 13:31:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:31:13.422931  543705 net.go:698] Add success.
I0320 13:31:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:31:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:31:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 13:31:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:31:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 13:31:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:31:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:31:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:31:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:31:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:31:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:31:23.409762  543705 memory.go:184] no items to output this cycle
I0320 13:31:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 13:31:24.524256  543705 disk_info.go:125] begin check local disk info of client
I0320 13:31:24.526692  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:31:24.526698  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abc40 0xc0002abc80]
E0320 13:31:33.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:31:33.409929  543705 memory.go:184] no items to output this cycle
I0320 13:31:33.410060  543705 cpu.go:275] no items to output this cycle
E0320 13:31:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:31:43.409798  543705 memory.go:191] Add success.
I0320 13:31:43.409808  543705 cpu.go:282] Add success.
I0320 13:31:43.420053  543705 net.go:648] Add success.
I0320 13:31:43.423171  543705 net.go:770] primary dev: ETH0
I0320 13:31:43.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:31:43.423198  543705 net.go:698] Add success.
I0320 13:31:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:31:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:31:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:31:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:31:53.409801  543705 memory.go:184] no items to output this cycle
I0320 13:31:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 13:32:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:32:03.409778  543705 memory.go:184] no items to output this cycle
I0320 13:32:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 13:32:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:32:13.409787  543705 memory.go:191] Add success.
I0320 13:32:13.409793  543705 cpu.go:282] Add success.
W0320 13:32:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:32:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:32:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:32:13.420245  543705 net.go:648] Add success.
I0320 13:32:13.422838  543705 net.go:770] primary dev: ETH0
I0320 13:32:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:32:13.422865  543705 net.go:698] Add success.
W0320 13:32:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:32:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 13:32:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:32:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:32:14.456953  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:32:14.456959  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:32:14.457029  543705 disk_worker.go:494] system disk:vda1
I0320 13:32:14.457071  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:32:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:32:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:32:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:32:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:32:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:32:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:32:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:32:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:32:23.409774  543705 memory.go:184] no items to output this cycle
I0320 13:32:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 13:32:24.527266  543705 disk_info.go:125] begin check local disk info of client
I0320 13:32:24.529747  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:32:24.529753  543705 disk_info.go:196] parse disk info done, disk is : [0xc000217300 0xc000217340]
E0320 13:32:33.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:32:33.409889  543705 memory.go:184] no items to output this cycle
I0320 13:32:33.409917  543705 cpu.go:275] no items to output this cycle
E0320 13:32:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:32:43.409786  543705 memory.go:191] Add success.
I0320 13:32:43.409797  543705 cpu.go:282] Add success.
I0320 13:32:43.420072  543705 net.go:648] Add success.
I0320 13:32:43.422817  543705 net.go:770] primary dev: ETH0
I0320 13:32:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:32:43.422843  543705 net.go:698] Add success.
I0320 13:32:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:32:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:32:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:32:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:32:53.409782  543705 memory.go:184] no items to output this cycle
I0320 13:32:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 13:33:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:33:03.409771  543705 memory.go:184] no items to output this cycle
I0320 13:33:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 13:33:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:33:13.409813  543705 memory.go:191] Add success.
I0320 13:33:13.409823  543705 cpu.go:282] Add success.
W0320 13:33:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:33:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:33:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:33:13.420161  543705 net.go:648] Add success.
I0320 13:33:13.423224  543705 net.go:770] primary dev: ETH0
I0320 13:33:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:33:13.423248  543705 net.go:698] Add success.
I0320 13:33:13.463697  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ef1b5b2-447f-41c8-b601-a0dd34fce657","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:33:13.463732  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:33:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:33:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:33:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 13:33:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:33:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 13:33:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:33:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:33:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:33:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:33:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:33:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:33:23.409764  543705 memory.go:184] no items to output this cycle
I0320 13:33:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 13:33:24.530294  543705 disk_info.go:125] begin check local disk info of client
I0320 13:33:24.532727  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:33:24.532732  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265900 0xc000265940]
I0320 13:33:33.409931  543705 cpu.go:275] no items to output this cycle
E0320 13:33:33.409969  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:33:33.409985  543705 memory.go:184] no items to output this cycle
I0320 13:33:38.492221  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:33:38.492228  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:33:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:33:43.410558  543705 memory.go:191] Add success.
I0320 13:33:43.409813  543705 cpu.go:282] Add success.
I0320 13:33:43.420290  543705 net.go:648] Add success.
I0320 13:33:43.422776  543705 net.go:770] primary dev: ETH0
I0320 13:33:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:33:43.422803  543705 net.go:698] Add success.
I0320 13:33:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:33:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:33:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:33:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:33:53.409785  543705 memory.go:184] no items to output this cycle
I0320 13:33:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 13:34:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:34:03.409769  543705 memory.go:184] no items to output this cycle
I0320 13:34:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 13:34:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:34:13.409800  543705 memory.go:191] Add success.
I0320 13:34:13.409799  543705 cpu.go:282] Add success.
W0320 13:34:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:34:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:34:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:34:13.420106  543705 net.go:648] Add success.
I0320 13:34:13.422756  543705 net.go:770] primary dev: ETH0
I0320 13:34:13.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:34:13.422780  543705 net.go:698] Add success.
I0320 13:34:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:34:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:34:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 13:34:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:34:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 13:34:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:34:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:34:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:34:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:34:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:34:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:34:23.409766  543705 memory.go:184] no items to output this cycle
I0320 13:34:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 13:34:24.532810  543705 disk_info.go:125] begin check local disk info of client
I0320 13:34:24.535296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:34:24.535302  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395d00 0xc000395d40]
E0320 13:34:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:34:33.409776  543705 memory.go:184] no items to output this cycle
I0320 13:34:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 13:34:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:34:43.409789  543705 memory.go:191] Add success.
I0320 13:34:43.409817  543705 cpu.go:282] Add success.
I0320 13:34:43.419883  543705 net.go:648] Add success.
I0320 13:34:43.422818  543705 net.go:770] primary dev: ETH0
I0320 13:34:43.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:34:43.422843  543705 net.go:698] Add success.
I0320 13:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:34:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:34:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:34:53.409774  543705 memory.go:184] no items to output this cycle
I0320 13:34:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 13:35:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:35:03.409804  543705 memory.go:184] no items to output this cycle
I0320 13:35:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 13:35:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:35:13.409785  543705 memory.go:191] Add success.
I0320 13:35:13.409805  543705 cpu.go:282] Add success.
W0320 13:35:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:35:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:35:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:35:13.420181  543705 net.go:648] Add success.
I0320 13:35:13.422759  543705 net.go:770] primary dev: ETH0
I0320 13:35:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:35:13.422788  543705 net.go:698] Add success.
I0320 13:35:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:35:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:35:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 13:35:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:35:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 13:35:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:35:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:35:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:35:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:35:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:35:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:35:23.409777  543705 memory.go:184] no items to output this cycle
I0320 13:35:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 13:35:24.536374  543705 disk_info.go:125] begin check local disk info of client
I0320 13:35:24.538855  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:35:24.538861  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027ae40 0xc00027ae80]
E0320 13:35:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:35:33.409813  543705 memory.go:184] no items to output this cycle
I0320 13:35:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 13:35:43.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:35:43.409907  543705 memory.go:191] Add success.
I0320 13:35:43.409958  543705 cpu.go:282] Add success.
I0320 13:35:43.419721  543705 net.go:648] Add success.
I0320 13:35:43.422361  543705 net.go:770] primary dev: ETH0
I0320 13:35:43.422376  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:35:43.422391  543705 net.go:698] Add success.
I0320 13:35:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:35:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:35:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:35:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:35:53.409810  543705 memory.go:184] no items to output this cycle
I0320 13:35:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 13:36:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:36:03.409774  543705 memory.go:184] no items to output this cycle
I0320 13:36:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 13:36:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:36:13.409822  543705 memory.go:191] Add success.
I0320 13:36:13.409833  543705 cpu.go:282] Add success.
W0320 13:36:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:36:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:36:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:36:13.420272  543705 net.go:648] Add success.
I0320 13:36:13.423048  543705 net.go:770] primary dev: ETH0
I0320 13:36:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:36:13.423072  543705 net.go:698] Add success.
I0320 13:36:13.470185  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c7f3e158-8136-44b3-868d-9bb724bf87cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:36:13.470226  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:36:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:36:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:36:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 13:36:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:36:14.456520  543705 disk_worker.go:494] system disk:vda1
I0320 13:36:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:36:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:36:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:36:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:36:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:36:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:36:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:36:23.409810  543705 memory.go:184] no items to output this cycle
I0320 13:36:23.409820  543705 cpu.go:275] no items to output this cycle
I0320 13:36:24.538942  543705 disk_info.go:125] begin check local disk info of client
I0320 13:36:24.541362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:36:24.541367  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002586c0 0xc000258700]
E0320 13:36:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:36:33.409829  543705 memory.go:184] no items to output this cycle
I0320 13:36:33.409845  543705 cpu.go:275] no items to output this cycle
I0320 13:36:38.493218  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:36:38.493225  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:36:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:36:43.410596  543705 memory.go:191] Add success.
I0320 13:36:43.409842  543705 cpu.go:282] Add success.
I0320 13:36:43.420518  543705 net.go:648] Add success.
I0320 13:36:43.423095  543705 net.go:770] primary dev: ETH0
I0320 13:36:43.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:36:43.423120  543705 net.go:698] Add success.
I0320 13:36:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:36:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:36:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:36:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:36:53.409819  543705 memory.go:184] no items to output this cycle
I0320 13:36:53.409830  543705 cpu.go:275] no items to output this cycle
E0320 13:37:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:37:03.409807  543705 memory.go:184] no items to output this cycle
I0320 13:37:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 13:37:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:37:13.409797  543705 memory.go:191] Add success.
I0320 13:37:13.409803  543705 cpu.go:282] Add success.
W0320 13:37:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:37:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:37:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:37:13.420037  543705 net.go:648] Add success.
I0320 13:37:13.423014  543705 net.go:770] primary dev: ETH0
I0320 13:37:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:37:13.423040  543705 net.go:698] Add success.
I0320 13:37:13.453663  543705 event_worker.go:152] Polling the log file for events...
W0320 13:37:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:37:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 13:37:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:37:14.456757  543705 disk_worker.go:494] system disk:vda1
I0320 13:37:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:37:14.457108  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:37:14.457116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:37:14.457120  543705 custom_config.go:64] query custom config with name: gpu
E0320 13:37:15.456872  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:37:15.456880  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:37:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:37:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:37:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:37:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:37:16.472318  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:37:23.410273  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:37:23.410296  543705 memory.go:184] no items to output this cycle
I0320 13:37:23.410295  543705 cpu.go:275] no items to output this cycle
I0320 13:37:24.542402  543705 disk_info.go:125] begin check local disk info of client
I0320 13:37:24.544819  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:37:24.544825  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c57c0 0xc0000c5800]
E0320 13:37:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:37:33.409821  543705 memory.go:184] no items to output this cycle
I0320 13:37:33.409828  543705 cpu.go:275] no items to output this cycle
E0320 13:37:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:37:43.409808  543705 memory.go:191] Add success.
I0320 13:37:43.409818  543705 cpu.go:282] Add success.
I0320 13:37:43.420045  543705 net.go:648] Add success.
I0320 13:37:43.422617  543705 net.go:770] primary dev: ETH0
I0320 13:37:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:37:43.422642  543705 net.go:698] Add success.
I0320 13:37:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:37:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:37:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:37:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:37:53.409785  543705 memory.go:184] no items to output this cycle
I0320 13:37:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 13:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:38:03.409782  543705 memory.go:184] no items to output this cycle
I0320 13:38:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 13:38:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:38:13.409805  543705 memory.go:191] Add success.
I0320 13:38:13.409815  543705 cpu.go:282] Add success.
W0320 13:38:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:38:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:38:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:38:13.420156  543705 net.go:648] Add success.
I0320 13:38:13.422660  543705 net.go:770] primary dev: ETH0
I0320 13:38:13.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:38:13.422686  543705 net.go:698] Add success.
I0320 13:38:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:38:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:38:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 13:38:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:38:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 13:38:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:38:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:38:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:38:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:38:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:38:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:38:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:38:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 13:38:23.409790  543705 memory.go:184] no items to output this cycle
I0320 13:38:24.545368  543705 disk_info.go:125] begin check local disk info of client
I0320 13:38:24.547862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:38:24.547868  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353300 0xc000353340]
E0320 13:38:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:38:33.409807  543705 memory.go:184] no items to output this cycle
I0320 13:38:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 13:38:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:38:43.409793  543705 memory.go:191] Add success.
I0320 13:38:43.409796  543705 cpu.go:282] Add success.
I0320 13:38:43.419758  543705 net.go:648] Add success.
I0320 13:38:43.422487  543705 net.go:770] primary dev: ETH0
I0320 13:38:43.422503  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:38:43.422518  543705 net.go:698] Add success.
I0320 13:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:38:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:38:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:38:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:38:53.409812  543705 memory.go:184] no items to output this cycle
I0320 13:38:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 13:39:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:39:03.409783  543705 memory.go:184] no items to output this cycle
I0320 13:39:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 13:39:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:39:13.409807  543705 memory.go:191] Add success.
I0320 13:39:13.409815  543705 cpu.go:282] Add success.
W0320 13:39:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:39:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:39:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:39:13.420086  543705 net.go:648] Add success.
I0320 13:39:13.422695  543705 net.go:770] primary dev: ETH0
I0320 13:39:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:39:13.422719  543705 net.go:698] Add success.
I0320 13:39:13.468325  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ba69651e-b19b-4076-ac31-367afe829ae5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:39:13.468359  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:39:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:39:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:39:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 13:39:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:39:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 13:39:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:39:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:39:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:39:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:39:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:39:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:39:23.409792  543705 memory.go:184] no items to output this cycle
I0320 13:39:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 13:39:24.548382  543705 disk_info.go:125] begin check local disk info of client
I0320 13:39:24.550830  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:39:24.550836  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352e00 0xc000352e40]
E0320 13:39:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:39:33.409811  543705 memory.go:184] no items to output this cycle
I0320 13:39:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 13:39:38.494232  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:39:38.494240  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:39:43.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:39:43.410673  543705 memory.go:191] Add success.
I0320 13:39:43.409983  543705 cpu.go:282] Add success.
I0320 13:39:43.419708  543705 net.go:648] Add success.
I0320 13:39:43.422543  543705 net.go:770] primary dev: ETH0
I0320 13:39:43.422556  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:39:43.422569  543705 net.go:698] Add success.
I0320 13:39:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:39:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:39:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:39:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:39:53.409809  543705 memory.go:184] no items to output this cycle
I0320 13:39:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 13:40:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:40:03.409764  543705 memory.go:184] no items to output this cycle
I0320 13:40:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 13:40:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:40:13.409806  543705 memory.go:191] Add success.
I0320 13:40:13.409816  543705 cpu.go:282] Add success.
W0320 13:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:40:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:40:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:40:13.420063  543705 net.go:648] Add success.
I0320 13:40:13.422518  543705 net.go:770] primary dev: ETH0
I0320 13:40:13.422531  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:40:13.422543  543705 net.go:698] Add success.
I0320 13:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:40:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:40:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 13:40:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:40:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 13:40:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:40:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:40:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:40:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:40:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:40:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:40:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:40:23.409793  543705 memory.go:184] no items to output this cycle
I0320 13:40:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 13:40:24.550915  543705 disk_info.go:125] begin check local disk info of client
I0320 13:40:24.553357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:40:24.553365  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273dc0 0xc000273e00]
E0320 13:40:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:40:33.409806  543705 memory.go:184] no items to output this cycle
I0320 13:40:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 13:40:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:40:43.409786  543705 memory.go:191] Add success.
I0320 13:40:43.409803  543705 cpu.go:282] Add success.
I0320 13:40:43.419915  543705 net.go:648] Add success.
I0320 13:40:43.422650  543705 net.go:770] primary dev: ETH0
I0320 13:40:43.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:40:43.422677  543705 net.go:698] Add success.
I0320 13:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:40:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:40:53.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:40:53.410261  543705 memory.go:184] no items to output this cycle
I0320 13:40:53.410283  543705 cpu.go:275] no items to output this cycle
E0320 13:41:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:41:03.409775  543705 memory.go:184] no items to output this cycle
I0320 13:41:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 13:41:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:41:13.409781  543705 memory.go:191] Add success.
I0320 13:41:13.409799  543705 cpu.go:282] Add success.
W0320 13:41:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:41:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:41:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:41:13.420097  543705 net.go:648] Add success.
I0320 13:41:13.422966  543705 net.go:770] primary dev: ETH0
I0320 13:41:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:41:13.422992  543705 net.go:698] Add success.
I0320 13:41:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:41:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:41:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 13:41:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:41:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 13:41:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:41:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:41:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:41:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:41:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:41:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:41:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:41:23.409773  543705 memory.go:184] no items to output this cycle
I0320 13:41:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:41:24.553402  543705 disk_info.go:125] begin check local disk info of client
I0320 13:41:24.555879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:41:24.555885  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004861c0 0xc000486200]
E0320 13:41:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:41:33.409803  543705 memory.go:184] no items to output this cycle
I0320 13:41:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 13:41:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:41:43.409798  543705 memory.go:191] Add success.
I0320 13:41:43.409819  543705 cpu.go:282] Add success.
I0320 13:41:43.419995  543705 net.go:648] Add success.
I0320 13:41:43.422829  543705 net.go:770] primary dev: ETH0
I0320 13:41:43.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:41:43.422855  543705 net.go:698] Add success.
I0320 13:41:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:41:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:41:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:41:53.409806  543705 memory.go:184] no items to output this cycle
I0320 13:41:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 13:42:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:42:03.409797  543705 memory.go:184] no items to output this cycle
I0320 13:42:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 13:42:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:42:13.409788  543705 memory.go:191] Add success.
I0320 13:42:13.409790  543705 cpu.go:282] Add success.
W0320 13:42:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:42:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:42:13.420151  543705 net.go:648] Add success.
I0320 13:42:13.423117  543705 net.go:770] primary dev: ETH0
I0320 13:42:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:42:13.423145  543705 net.go:698] Add success.
I0320 13:42:13.486300  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b2b0e00-a602-4c58-9ba8-ac7dab9c72db","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:42:13.486335  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 13:42:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:42:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 13:42:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:42:14.455908  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:42:14.455917  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:42:14.455922  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:42:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 13:42:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:42:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:42:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:42:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:42:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:42:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:42:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:42:23.410270  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:42:23.410289  543705 memory.go:184] no items to output this cycle
I0320 13:42:23.410307  543705 cpu.go:275] no items to output this cycle
I0320 13:42:24.556420  543705 disk_info.go:125] begin check local disk info of client
I0320 13:42:24.558870  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:42:24.558875  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377200 0xc000377240]
E0320 13:42:33.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:42:33.409875  543705 memory.go:184] no items to output this cycle
I0320 13:42:33.409958  543705 cpu.go:275] no items to output this cycle
I0320 13:42:38.495236  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:42:38.495242  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:42:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:42:43.410520  543705 memory.go:191] Add success.
I0320 13:42:43.409822  543705 cpu.go:282] Add success.
I0320 13:42:43.420202  543705 net.go:648] Add success.
I0320 13:42:43.423091  543705 net.go:770] primary dev: ETH0
I0320 13:42:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:42:43.423115  543705 net.go:698] Add success.
I0320 13:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:42:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:42:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:42:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:42:53.409780  543705 memory.go:184] no items to output this cycle
I0320 13:42:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 13:43:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:43:03.409772  543705 memory.go:184] no items to output this cycle
I0320 13:43:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 13:43:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:43:13.409792  543705 memory.go:191] Add success.
I0320 13:43:13.409792  543705 cpu.go:282] Add success.
W0320 13:43:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:43:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:43:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:43:13.420159  543705 net.go:648] Add success.
I0320 13:43:13.423255  543705 net.go:770] primary dev: ETH0
I0320 13:43:13.423268  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:43:13.423290  543705 net.go:698] Add success.
I0320 13:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:43:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:43:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 13:43:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:43:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 13:43:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:43:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:43:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:43:23.409773  543705 memory.go:184] no items to output this cycle
I0320 13:43:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 13:43:24.558953  543705 disk_info.go:125] begin check local disk info of client
I0320 13:43:24.561522  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:43:24.561528  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385d40 0xc000385d80]
E0320 13:43:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:43:33.409815  543705 memory.go:184] no items to output this cycle
I0320 13:43:33.409829  543705 cpu.go:275] no items to output this cycle
E0320 13:43:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:43:43.409793  543705 memory.go:191] Add success.
I0320 13:43:43.409808  543705 cpu.go:282] Add success.
I0320 13:43:43.420067  543705 net.go:648] Add success.
I0320 13:43:43.422744  543705 net.go:770] primary dev: ETH0
I0320 13:43:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:43:43.422770  543705 net.go:698] Add success.
I0320 13:43:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:43:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:43:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:43:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:43:53.409775  543705 memory.go:184] no items to output this cycle
I0320 13:43:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 13:44:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:44:03.409769  543705 memory.go:184] no items to output this cycle
I0320 13:44:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 13:44:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:44:13.409779  543705 memory.go:191] Add success.
I0320 13:44:13.409798  543705 cpu.go:282] Add success.
W0320 13:44:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:44:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:44:13.412388  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:44:13.419987  543705 net.go:648] Add success.
I0320 13:44:13.421664  543705 net.go:770] primary dev: ETH0
I0320 13:44:13.421677  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:44:13.421690  543705 net.go:698] Add success.
I0320 13:44:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:44:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:44:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 13:44:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:44:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 13:44:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:44:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:44:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:44:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:44:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:44:23.409812  543705 memory.go:184] no items to output this cycle
I0320 13:44:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 13:44:24.562498  543705 disk_info.go:125] begin check local disk info of client
I0320 13:44:24.564935  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:44:24.564941  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb480 0xc0001fb4c0]
E0320 13:44:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:44:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:44:33.409790  543705 memory.go:184] no items to output this cycle
E0320 13:44:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:44:43.409924  543705 memory.go:191] Add success.
I0320 13:44:43.410058  543705 cpu.go:282] Add success.
I0320 13:44:43.419735  543705 net.go:648] Add success.
I0320 13:44:43.422321  543705 net.go:770] primary dev: ETH0
I0320 13:44:43.422337  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:44:43.422350  543705 net.go:698] Add success.
I0320 13:44:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:44:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:44:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:44:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:44:53.409772  543705 memory.go:184] no items to output this cycle
I0320 13:44:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 13:45:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:45:03.409760  543705 memory.go:184] no items to output this cycle
I0320 13:45:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 13:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:45:13.409812  543705 memory.go:191] Add success.
I0320 13:45:13.409829  543705 cpu.go:282] Add success.
W0320 13:45:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:45:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:45:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:45:13.420130  543705 net.go:648] Add success.
I0320 13:45:13.422760  543705 net.go:770] primary dev: ETH0
I0320 13:45:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:45:13.422786  543705 net.go:698] Add success.
I0320 13:45:13.492277  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6a03445c-11d3-4262-91d8-e16a49e56560","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:45:13.492314  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:45:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:45:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:45:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 13:45:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:45:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 13:45:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:45:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:45:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:45:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:45:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:45:23.409777  543705 memory.go:184] no items to output this cycle
I0320 13:45:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 13:45:24.565469  543705 disk_info.go:125] begin check local disk info of client
I0320 13:45:24.567893  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:45:24.567898  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c54c0 0xc0000c5500]
E0320 13:45:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:45:33.409799  543705 memory.go:184] no items to output this cycle
I0320 13:45:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 13:45:38.496229  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:45:38.496235  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:45:43.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:45:43.410706  543705 memory.go:191] Add success.
I0320 13:45:43.410024  543705 cpu.go:282] Add success.
I0320 13:45:43.419716  543705 net.go:648] Add success.
I0320 13:45:43.422228  543705 net.go:770] primary dev: ETH0
I0320 13:45:43.422241  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:45:43.422253  543705 net.go:698] Add success.
I0320 13:45:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:45:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:45:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:45:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:45:53.409781  543705 memory.go:184] no items to output this cycle
I0320 13:45:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 13:46:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:46:03.409795  543705 memory.go:184] no items to output this cycle
I0320 13:46:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 13:46:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:46:13.409783  543705 memory.go:191] Add success.
I0320 13:46:13.409803  543705 cpu.go:282] Add success.
W0320 13:46:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:46:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:46:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:46:13.420055  543705 net.go:648] Add success.
I0320 13:46:13.423013  543705 net.go:770] primary dev: ETH0
I0320 13:46:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:46:13.423040  543705 net.go:698] Add success.
I0320 13:46:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:46:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:46:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 13:46:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:46:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 13:46:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:46:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:46:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:46:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:46:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:46:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:46:23.409794  543705 memory.go:184] no items to output this cycle
I0320 13:46:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 13:46:24.568481  543705 disk_info.go:125] begin check local disk info of client
I0320 13:46:24.570963  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:46:24.570969  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032eb00 0xc00032eb40]
E0320 13:46:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:46:33.409807  543705 memory.go:184] no items to output this cycle
I0320 13:46:33.409822  543705 cpu.go:275] no items to output this cycle
E0320 13:46:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:46:43.409793  543705 memory.go:191] Add success.
I0320 13:46:43.409796  543705 cpu.go:282] Add success.
I0320 13:46:43.420172  543705 net.go:648] Add success.
I0320 13:46:43.422768  543705 net.go:770] primary dev: ETH0
I0320 13:46:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:46:43.422792  543705 net.go:698] Add success.
I0320 13:46:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:46:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:46:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:46:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:46:53.409772  543705 memory.go:184] no items to output this cycle
I0320 13:46:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 13:47:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:47:03.409797  543705 memory.go:184] no items to output this cycle
I0320 13:47:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 13:47:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:47:13.409786  543705 memory.go:191] Add success.
I0320 13:47:13.409788  543705 cpu.go:282] Add success.
W0320 13:47:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:47:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:47:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:47:13.420246  543705 net.go:648] Add success.
I0320 13:47:13.423257  543705 net.go:770] primary dev: ETH0
I0320 13:47:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:47:13.423283  543705 net.go:698] Add success.
I0320 13:47:13.452774  543705 event_worker.go:152] Polling the log file for events...
W0320 13:47:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:47:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 13:47:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:47:14.456784  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:47:14.456794  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:47:14.456800  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:47:14.456842  543705 disk_worker.go:494] system disk:vda1
I0320 13:47:14.456885  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:47:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:47:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:47:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:47:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:47:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:47:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:47:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:47:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:47:23.409783  543705 memory.go:184] no items to output this cycle
I0320 13:47:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 13:47:24.571047  543705 disk_info.go:125] begin check local disk info of client
I0320 13:47:24.573449  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:47:24.573455  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395780 0xc0003957c0]
E0320 13:47:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:47:33.409774  543705 memory.go:184] no items to output this cycle
I0320 13:47:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 13:47:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:47:43.409811  543705 memory.go:191] Add success.
I0320 13:47:43.409822  543705 cpu.go:282] Add success.
I0320 13:47:43.419998  543705 net.go:648] Add success.
I0320 13:47:43.422609  543705 net.go:770] primary dev: ETH0
I0320 13:47:43.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:47:43.422657  543705 net.go:698] Add success.
I0320 13:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:47:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:47:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:47:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:47:53.409784  543705 memory.go:184] no items to output this cycle
I0320 13:47:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 13:48:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:48:03.409775  543705 memory.go:184] no items to output this cycle
I0320 13:48:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 13:48:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:48:13.409786  543705 cpu.go:282] Add success.
I0320 13:48:13.409800  543705 memory.go:191] Add success.
W0320 13:48:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:48:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:48:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:48:13.420048  543705 net.go:648] Add success.
I0320 13:48:13.423039  543705 net.go:770] primary dev: ETH0
I0320 13:48:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:48:13.423069  543705 net.go:698] Add success.
I0320 13:48:13.469041  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"716ee91f-db02-4c88-9117-b82e41292d82","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:48:13.469076  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:48:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:48:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:48:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 13:48:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:48:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 13:48:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:48:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:48:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:48:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:48:23.409773  543705 memory.go:184] no items to output this cycle
I0320 13:48:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 13:48:24.573505  543705 disk_info.go:125] begin check local disk info of client
I0320 13:48:24.575971  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:48:24.575977  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b80 0xc0000c4bc0]
E0320 13:48:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:48:33.409802  543705 memory.go:184] no items to output this cycle
I0320 13:48:33.409823  543705 cpu.go:275] no items to output this cycle
I0320 13:48:38.497241  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:48:38.497248  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:48:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:48:43.410628  543705 memory.go:191] Add success.
I0320 13:48:43.409826  543705 cpu.go:282] Add success.
I0320 13:48:43.420350  543705 net.go:648] Add success.
I0320 13:48:43.422981  543705 net.go:770] primary dev: ETH0
I0320 13:48:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:48:43.423009  543705 net.go:698] Add success.
I0320 13:48:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:48:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:48:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:48:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:48:53.409787  543705 memory.go:184] no items to output this cycle
I0320 13:48:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 13:49:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:49:03.409783  543705 cpu.go:275] no items to output this cycle
I0320 13:49:03.409788  543705 memory.go:184] no items to output this cycle
E0320 13:49:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:49:13.409801  543705 memory.go:191] Add success.
I0320 13:49:13.409801  543705 cpu.go:282] Add success.
W0320 13:49:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:49:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:49:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:49:13.420127  543705 net.go:648] Add success.
I0320 13:49:13.422781  543705 net.go:770] primary dev: ETH0
I0320 13:49:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:49:13.422804  543705 net.go:698] Add success.
I0320 13:49:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:49:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:49:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 13:49:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:49:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 13:49:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:49:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:49:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:49:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:49:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:49:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:49:23.409777  543705 memory.go:184] no items to output this cycle
I0320 13:49:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 13:49:24.576511  543705 disk_info.go:125] begin check local disk info of client
I0320 13:49:24.579001  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:49:24.579007  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa80 0xc0001aaac0]
E0320 13:49:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:49:33.409797  543705 memory.go:184] no items to output this cycle
I0320 13:49:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 13:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:49:43.409802  543705 memory.go:191] Add success.
I0320 13:49:43.409803  543705 cpu.go:282] Add success.
I0320 13:49:43.419869  543705 net.go:648] Add success.
I0320 13:49:43.422804  543705 net.go:770] primary dev: ETH0
I0320 13:49:43.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:49:43.422832  543705 net.go:698] Add success.
I0320 13:49:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:49:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:49:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:49:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:49:53.409789  543705 memory.go:184] no items to output this cycle
I0320 13:49:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 13:50:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:50:03.409879  543705 cpu.go:275] no items to output this cycle
I0320 13:50:03.409886  543705 memory.go:184] no items to output this cycle
E0320 13:50:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:50:13.409826  543705 memory.go:191] Add success.
I0320 13:50:13.409832  543705 cpu.go:282] Add success.
W0320 13:50:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:50:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:50:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:50:13.420158  543705 net.go:648] Add success.
I0320 13:50:13.422990  543705 net.go:770] primary dev: ETH0
I0320 13:50:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:50:13.423015  543705 net.go:698] Add success.
I0320 13:50:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:50:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:50:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 13:50:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:50:14.456557  543705 disk_worker.go:494] system disk:vda1
I0320 13:50:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:50:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:50:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:50:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:50:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:50:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:50:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:50:23.409795  543705 memory.go:184] no items to output this cycle
I0320 13:50:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 13:50:24.579541  543705 disk_info.go:125] begin check local disk info of client
I0320 13:50:24.582059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:50:24.582064  543705 disk_info.go:196] parse disk info done, disk is : [0xc000551480 0xc0005514c0]
E0320 13:50:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:50:33.409816  543705 memory.go:184] no items to output this cycle
I0320 13:50:33.409828  543705 cpu.go:275] no items to output this cycle
E0320 13:50:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:50:43.409800  543705 memory.go:191] Add success.
I0320 13:50:43.409803  543705 cpu.go:282] Add success.
I0320 13:50:43.419896  543705 net.go:648] Add success.
I0320 13:50:43.422907  543705 net.go:770] primary dev: ETH0
I0320 13:50:43.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:50:43.422932  543705 net.go:698] Add success.
I0320 13:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:50:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:50:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:50:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:50:53.409809  543705 memory.go:184] no items to output this cycle
I0320 13:50:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 13:51:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:51:03.409771  543705 memory.go:184] no items to output this cycle
I0320 13:51:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 13:51:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:51:13.409781  543705 memory.go:191] Add success.
I0320 13:51:13.409802  543705 cpu.go:282] Add success.
W0320 13:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:51:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:51:13.420260  543705 net.go:648] Add success.
I0320 13:51:13.423429  543705 net.go:770] primary dev: ETH0
I0320 13:51:13.423443  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:51:13.423454  543705 net.go:698] Add success.
I0320 13:51:13.463884  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"430d040e-5689-4914-8eee-5f9a935f571b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:51:13.463915  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:51:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:51:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 13:51:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:51:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 13:51:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:51:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:51:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:51:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:51:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:51:23.409775  543705 memory.go:184] no items to output this cycle
I0320 13:51:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 13:51:24.582541  543705 disk_info.go:125] begin check local disk info of client
I0320 13:51:24.585072  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:51:24.585078  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465740 0xc000465780]
E0320 13:51:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:51:33.409786  543705 memory.go:184] no items to output this cycle
I0320 13:51:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 13:51:38.498239  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:51:38.498246  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:51:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:51:43.410719  543705 memory.go:191] Add success.
I0320 13:51:43.409799  543705 cpu.go:282] Add success.
I0320 13:51:43.420448  543705 net.go:648] Add success.
I0320 13:51:43.423588  543705 net.go:770] primary dev: ETH0
I0320 13:51:43.423602  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:51:43.423614  543705 net.go:698] Add success.
I0320 13:51:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:51:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:51:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:51:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:51:53.409787  543705 memory.go:184] no items to output this cycle
I0320 13:51:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 13:52:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:52:03.409783  543705 memory.go:184] no items to output this cycle
I0320 13:52:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 13:52:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:52:13.409819  543705 memory.go:191] Add success.
I0320 13:52:13.409820  543705 cpu.go:282] Add success.
W0320 13:52:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:52:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:52:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:52:13.420501  543705 net.go:648] Add success.
I0320 13:52:13.423285  543705 net.go:770] primary dev: ETH0
I0320 13:52:13.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:52:13.423313  543705 net.go:698] Add success.
W0320 13:52:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:52:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 13:52:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:52:14.455870  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:52:14.455879  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:52:14.455901  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:52:14.456547  543705 disk_worker.go:494] system disk:vda1
I0320 13:52:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:52:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:52:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:52:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:52:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:52:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:52:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:52:16.472314  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:52:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:52:23.409794  543705 memory.go:184] no items to output this cycle
I0320 13:52:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 13:52:24.585564  543705 disk_info.go:125] begin check local disk info of client
I0320 13:52:24.588007  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:52:24.588013  543705 disk_info.go:196] parse disk info done, disk is : [0xc000594dc0 0xc000594e00]
E0320 13:52:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:52:33.409787  543705 memory.go:184] no items to output this cycle
I0320 13:52:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 13:52:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:52:43.409814  543705 memory.go:191] Add success.
I0320 13:52:43.409815  543705 cpu.go:282] Add success.
I0320 13:52:43.419981  543705 net.go:648] Add success.
I0320 13:52:43.422730  543705 net.go:770] primary dev: ETH0
I0320 13:52:43.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:52:43.422756  543705 net.go:698] Add success.
I0320 13:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:52:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:52:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:52:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:52:53.409808  543705 memory.go:184] no items to output this cycle
I0320 13:52:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 13:53:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:53:03.409798  543705 memory.go:184] no items to output this cycle
I0320 13:53:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 13:53:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:53:13.409784  543705 memory.go:191] Add success.
I0320 13:53:13.409810  543705 cpu.go:282] Add success.
W0320 13:53:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:53:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:53:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:53:13.420585  543705 net.go:648] Add success.
I0320 13:53:13.423285  543705 net.go:770] primary dev: ETH0
I0320 13:53:13.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:53:13.423313  543705 net.go:698] Add success.
I0320 13:53:14.453951  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:53:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:53:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 13:53:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:53:14.456549  543705 disk_worker.go:494] system disk:vda1
I0320 13:53:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:53:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:53:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:53:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:53:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:53:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:53:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:53:23.409796  543705 memory.go:184] no items to output this cycle
I0320 13:53:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 13:53:24.588090  543705 disk_info.go:125] begin check local disk info of client
I0320 13:53:24.590532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:53:24.590538  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b5c0 0xc00007b600]
E0320 13:53:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:53:33.409788  543705 memory.go:184] no items to output this cycle
I0320 13:53:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 13:53:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:53:43.409818  543705 memory.go:191] Add success.
I0320 13:53:43.409826  543705 cpu.go:282] Add success.
I0320 13:53:43.419969  543705 net.go:648] Add success.
I0320 13:53:43.423021  543705 net.go:770] primary dev: ETH0
I0320 13:53:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:53:43.423052  543705 net.go:698] Add success.
I0320 13:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:53:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:53:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:53:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:53:53.409784  543705 memory.go:184] no items to output this cycle
I0320 13:53:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 13:54:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:54:03.409808  543705 memory.go:184] no items to output this cycle
I0320 13:54:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 13:54:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:54:13.409778  543705 memory.go:191] Add success.
W0320 13:54:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 13:54:13.409803  543705 cpu.go:282] Add success.
W0320 13:54:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:54:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:54:13.420053  543705 net.go:648] Add success.
I0320 13:54:13.422919  543705 net.go:770] primary dev: ETH0
I0320 13:54:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:54:13.423107  543705 net.go:698] Add success.
I0320 13:54:13.464110  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7a460e7f-8cff-4958-b835-85d3f7a03b33","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:54:13.464141  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 13:54:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:54:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:54:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 13:54:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:54:14.456682  543705 disk_worker.go:494] system disk:vda1
I0320 13:54:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:54:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:54:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:54:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:54:23.409822  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:54:23.409847  543705 memory.go:184] no items to output this cycle
I0320 13:54:23.409890  543705 cpu.go:275] no items to output this cycle
I0320 13:54:24.590592  543705 disk_info.go:125] begin check local disk info of client
I0320 13:54:24.593033  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:54:24.593039  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483240 0xc000483280]
E0320 13:54:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:54:33.409790  543705 memory.go:184] no items to output this cycle
I0320 13:54:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 13:54:38.499245  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:54:38.499252  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:54:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:54:43.410701  543705 memory.go:191] Add success.
I0320 13:54:43.409827  543705 cpu.go:282] Add success.
I0320 13:54:43.420412  543705 net.go:648] Add success.
I0320 13:54:43.423471  543705 net.go:770] primary dev: ETH0
I0320 13:54:43.423483  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:54:43.423496  543705 net.go:698] Add success.
I0320 13:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:54:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:54:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:54:53.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:54:53.410265  543705 memory.go:184] no items to output this cycle
I0320 13:54:53.410267  543705 cpu.go:275] no items to output this cycle
E0320 13:55:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:55:03.409767  543705 memory.go:184] no items to output this cycle
I0320 13:55:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 13:55:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:55:13.409813  543705 memory.go:191] Add success.
I0320 13:55:13.409821  543705 cpu.go:282] Add success.
W0320 13:55:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:55:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:55:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:55:13.420042  543705 net.go:648] Add success.
I0320 13:55:13.422632  543705 net.go:770] primary dev: ETH0
I0320 13:55:13.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:55:13.422657  543705 net.go:698] Add success.
I0320 13:55:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:55:14.455500  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:55:14.455521  543705 disk_worker.go:708] disk space is not compliant
W0320 13:55:14.455525  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:55:14.457038  543705 disk_worker.go:494] system disk:vda1
I0320 13:55:14.457066  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:55:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:55:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:55:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:55:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:55:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:55:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:55:23.409775  543705 memory.go:184] no items to output this cycle
I0320 13:55:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 13:55:24.593118  543705 disk_info.go:125] begin check local disk info of client
I0320 13:55:24.595587  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:55:24.595593  543705 disk_info.go:196] parse disk info done, disk is : [0xc000551300 0xc000551340]
E0320 13:55:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:55:33.409818  543705 memory.go:184] no items to output this cycle
I0320 13:55:33.409833  543705 cpu.go:275] no items to output this cycle
E0320 13:55:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:55:43.409783  543705 memory.go:191] Add success.
I0320 13:55:43.409816  543705 cpu.go:282] Add success.
I0320 13:55:43.419894  543705 net.go:648] Add success.
I0320 13:55:43.422596  543705 net.go:770] primary dev: ETH0
I0320 13:55:43.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:55:43.422627  543705 net.go:698] Add success.
I0320 13:55:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:55:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:55:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:55:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:55:53.409798  543705 memory.go:184] no items to output this cycle
I0320 13:55:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 13:56:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:56:03.409775  543705 memory.go:184] no items to output this cycle
I0320 13:56:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 13:56:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:56:13.409806  543705 memory.go:191] Add success.
I0320 13:56:13.409816  543705 cpu.go:282] Add success.
W0320 13:56:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:56:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:56:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:56:13.420221  543705 net.go:648] Add success.
I0320 13:56:13.422905  543705 net.go:770] primary dev: ETH0
I0320 13:56:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:56:13.422932  543705 net.go:698] Add success.
I0320 13:56:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:56:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:56:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 13:56:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:56:14.456809  543705 disk_worker.go:494] system disk:vda1
I0320 13:56:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:56:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:56:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:56:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:56:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:56:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:56:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:56:23.409769  543705 memory.go:184] no items to output this cycle
I0320 13:56:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 13:56:24.596666  543705 disk_info.go:125] begin check local disk info of client
I0320 13:56:24.599159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:56:24.599164  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c640 0xc00039c680]
E0320 13:56:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:56:33.409806  543705 memory.go:184] no items to output this cycle
I0320 13:56:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 13:56:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:56:43.409814  543705 memory.go:191] Add success.
I0320 13:56:43.409821  543705 cpu.go:282] Add success.
I0320 13:56:43.419888  543705 net.go:648] Add success.
I0320 13:56:43.422464  543705 net.go:770] primary dev: ETH0
I0320 13:56:43.422477  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:56:43.422490  543705 net.go:698] Add success.
I0320 13:56:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:56:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:56:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:56:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:56:53.409801  543705 memory.go:184] no items to output this cycle
I0320 13:56:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 13:57:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:57:03.409779  543705 memory.go:184] no items to output this cycle
I0320 13:57:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 13:57:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:57:13.409779  543705 memory.go:191] Add success.
W0320 13:57:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 13:57:13.409809  543705 cpu.go:282] Add success.
W0320 13:57:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:57:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:57:13.425860  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 13:57:13.425945  543705 net.go:770] primary dev: ETH0
I0320 13:57:13.425957  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:57:13.425968  543705 net.go:698] Add success.
I0320 13:57:13.426330  543705 net.go:648] Add success.
I0320 13:57:13.453722  543705 event_worker.go:152] Polling the log file for events...
I0320 13:57:13.464609  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a9a1a123-db3f-455e-9540-b29ad7f6c465","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 13:57:13.464647  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 13:57:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:57:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0320 13:57:14.455151  543705 disk_worker.go:728] disk inode is not compliant
E0320 13:57:14.456261  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 13:57:14.456270  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 13:57:14.456285  543705 custom_config.go:64] query custom config with name: gpu
I0320 13:57:14.457026  543705 disk_worker.go:494] system disk:vda1
I0320 13:57:14.457066  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 13:57:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 13:57:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:57:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 13:57:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 13:57:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:57:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:57:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:57:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:57:23.409791  543705 memory.go:184] no items to output this cycle
I0320 13:57:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 13:57:24.599637  543705 disk_info.go:125] begin check local disk info of client
I0320 13:57:24.602081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:57:24.602086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5680 0xc0000c56c0]
E0320 13:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:57:33.409779  543705 memory.go:184] no items to output this cycle
I0320 13:57:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 13:57:38.500246  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 13:57:38.500253  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 13:57:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:57:43.410714  543705 memory.go:191] Add success.
I0320 13:57:43.409801  543705 cpu.go:282] Add success.
I0320 13:57:43.420431  543705 net.go:648] Add success.
I0320 13:57:43.423034  543705 net.go:770] primary dev: ETH0
I0320 13:57:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:57:43.423059  543705 net.go:698] Add success.
I0320 13:57:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:57:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:57:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:57:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:57:53.409780  543705 memory.go:184] no items to output this cycle
I0320 13:57:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 13:58:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:58:03.409803  543705 cpu.go:275] no items to output this cycle
I0320 13:58:03.409809  543705 memory.go:184] no items to output this cycle
E0320 13:58:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:58:13.409827  543705 memory.go:191] Add success.
I0320 13:58:13.409833  543705 cpu.go:282] Add success.
W0320 13:58:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:58:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:58:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:58:13.420285  543705 net.go:648] Add success.
I0320 13:58:13.423306  543705 net.go:770] primary dev: ETH0
I0320 13:58:13.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:58:13.423337  543705 net.go:698] Add success.
I0320 13:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:58:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:58:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 13:58:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:58:14.456626  543705 disk_worker.go:494] system disk:vda1
I0320 13:58:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:58:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:58:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:58:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:58:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:58:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:58:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 13:58:23.409786  543705 memory.go:184] no items to output this cycle
I0320 13:58:24.602651  543705 disk_info.go:125] begin check local disk info of client
I0320 13:58:24.605110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:58:24.605118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003912c0 0xc000391300]
E0320 13:58:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:58:33.409781  543705 memory.go:184] no items to output this cycle
I0320 13:58:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 13:58:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:58:43.409784  543705 memory.go:191] Add success.
I0320 13:58:43.409800  543705 cpu.go:282] Add success.
I0320 13:58:43.420014  543705 net.go:648] Add success.
I0320 13:58:43.422871  543705 net.go:770] primary dev: ETH0
I0320 13:58:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:58:43.422896  543705 net.go:698] Add success.
I0320 13:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:58:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:58:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:58:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 13:58:53.409783  543705 memory.go:184] no items to output this cycle
E0320 13:59:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:59:03.409768  543705 memory.go:184] no items to output this cycle
I0320 13:59:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 13:59:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:59:13.409779  543705 memory.go:191] Add success.
I0320 13:59:13.409799  543705 cpu.go:282] Add success.
W0320 13:59:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 13:59:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 13:59:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 13:59:13.420126  543705 net.go:648] Add success.
I0320 13:59:13.423248  543705 net.go:770] primary dev: ETH0
I0320 13:59:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:59:13.423273  543705 net.go:698] Add success.
I0320 13:59:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0320 13:59:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 13:59:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 13:59:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 13:59:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 13:59:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 13:59:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 13:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:59:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:59:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 13:59:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 13:59:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:59:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 13:59:23.409783  543705 memory.go:184] no items to output this cycle
I0320 13:59:24.605198  543705 disk_info.go:125] begin check local disk info of client
I0320 13:59:24.607667  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 13:59:24.607672  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8540 0xc0004a8580]
E0320 13:59:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:59:33.409776  543705 memory.go:184] no items to output this cycle
I0320 13:59:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 13:59:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:59:43.409792  543705 memory.go:191] Add success.
I0320 13:59:43.409805  543705 cpu.go:282] Add success.
I0320 13:59:43.420002  543705 net.go:648] Add success.
I0320 13:59:43.422623  543705 net.go:770] primary dev: ETH0
I0320 13:59:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0320 13:59:43.422648  543705 net.go:698] Add success.
I0320 13:59:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 13:59:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 13:59:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 13:59:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 13:59:53.409802  543705 memory.go:184] no items to output this cycle
I0320 13:59:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 14:00:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:00:03.409770  543705 memory.go:184] no items to output this cycle
I0320 14:00:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 14:00:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:00:13.409812  543705 memory.go:191] Add success.
I0320 14:00:13.409816  543705 cpu.go:282] Add success.
W0320 14:00:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:00:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:00:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:00:13.420488  543705 net.go:648] Add success.
I0320 14:00:13.423393  543705 net.go:770] primary dev: ETH0
I0320 14:00:13.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:00:13.423418  543705 net.go:698] Add success.
I0320 14:00:13.463926  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"73a2dfe5-6c99-4bc7-9575-ff78559abaa2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:00:13.463965  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:00:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:00:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 14:00:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:00:14.456534  543705 disk_worker.go:494] system disk:vda1
I0320 14:00:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:00:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:00:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:00:16.472427  543705 disk_local_worker.go:436] Get disk info: []
I0320 14:00:23.409914  543705 cpu.go:275] no items to output this cycle
E0320 14:00:23.409946  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:00:23.409978  543705 memory.go:184] no items to output this cycle
I0320 14:00:24.608683  543705 disk_info.go:125] begin check local disk info of client
I0320 14:00:24.611234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:00:24.611240  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052aec0 0xc00052af00]
E0320 14:00:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:00:33.409788  543705 memory.go:184] no items to output this cycle
I0320 14:00:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 14:00:38.501258  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:00:38.501265  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:00:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:00:43.410924  543705 memory.go:191] Add success.
I0320 14:00:43.409816  543705 cpu.go:282] Add success.
I0320 14:00:43.420636  543705 net.go:648] Add success.
I0320 14:00:43.423479  543705 net.go:770] primary dev: ETH0
I0320 14:00:43.423491  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:00:43.423505  543705 net.go:698] Add success.
I0320 14:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:00:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:00:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:00:53.409773  543705 memory.go:184] no items to output this cycle
I0320 14:00:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 14:01:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:01:03.409775  543705 memory.go:184] no items to output this cycle
I0320 14:01:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 14:01:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:01:13.409812  543705 memory.go:191] Add success.
I0320 14:01:13.409818  543705 cpu.go:282] Add success.
W0320 14:01:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:01:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:01:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:01:13.420109  543705 net.go:648] Add success.
I0320 14:01:13.423104  543705 net.go:770] primary dev: ETH0
I0320 14:01:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:01:13.423128  543705 net.go:698] Add success.
I0320 14:01:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:01:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:01:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 14:01:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:01:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 14:01:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:01:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:01:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:01:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:01:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:01:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:01:23.409795  543705 memory.go:184] no items to output this cycle
I0320 14:01:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 14:01:24.611691  543705 disk_info.go:125] begin check local disk info of client
I0320 14:01:24.614225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:01:24.614231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289980 0xc0002899c0]
E0320 14:01:33.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:01:33.409888  543705 memory.go:184] no items to output this cycle
I0320 14:01:33.409958  543705 cpu.go:275] no items to output this cycle
E0320 14:01:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:01:43.409786  543705 memory.go:191] Add success.
I0320 14:01:43.409795  543705 cpu.go:282] Add success.
I0320 14:01:43.420039  543705 net.go:648] Add success.
I0320 14:01:43.422954  543705 net.go:770] primary dev: ETH0
I0320 14:01:43.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:01:43.422987  543705 net.go:698] Add success.
I0320 14:01:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:01:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:01:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:01:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:01:53.409781  543705 memory.go:184] no items to output this cycle
I0320 14:01:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 14:02:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:02:03.409798  543705 memory.go:184] no items to output this cycle
I0320 14:02:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 14:02:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:02:13.409786  543705 memory.go:191] Add success.
I0320 14:02:13.409810  543705 cpu.go:282] Add success.
W0320 14:02:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:02:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:02:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:02:13.420080  543705 net.go:648] Add success.
I0320 14:02:13.422863  543705 net.go:770] primary dev: ETH0
I0320 14:02:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:02:13.422889  543705 net.go:698] Add success.
W0320 14:02:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:02:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 14:02:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:02:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:02:14.455916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:02:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:02:14.456561  543705 disk_worker.go:494] system disk:vda1
I0320 14:02:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:02:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:02:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:02:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:02:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:02:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:02:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:02:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:02:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:02:23.409820  543705 memory.go:184] no items to output this cycle
I0320 14:02:23.409823  543705 cpu.go:275] no items to output this cycle
I0320 14:02:24.614716  543705 disk_info.go:125] begin check local disk info of client
I0320 14:02:24.617297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:02:24.617303  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298480 0xc0002984c0]
E0320 14:02:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:02:33.409784  543705 memory.go:184] no items to output this cycle
I0320 14:02:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 14:02:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:02:43.409949  543705 memory.go:191] Add success.
I0320 14:02:43.410091  543705 cpu.go:282] Add success.
I0320 14:02:43.419730  543705 net.go:648] Add success.
I0320 14:02:43.422391  543705 net.go:770] primary dev: ETH0
I0320 14:02:43.422409  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:02:43.422424  543705 net.go:698] Add success.
I0320 14:02:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:02:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:02:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:02:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:02:53.409809  543705 memory.go:184] no items to output this cycle
I0320 14:02:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:03:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:03:03.409789  543705 memory.go:184] no items to output this cycle
I0320 14:03:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 14:03:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:03:13.409793  543705 memory.go:191] Add success.
I0320 14:03:13.409796  543705 cpu.go:282] Add success.
W0320 14:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:03:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:03:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:03:13.420234  543705 net.go:648] Add success.
I0320 14:03:13.423321  543705 net.go:770] primary dev: ETH0
I0320 14:03:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:03:13.423346  543705 net.go:698] Add success.
I0320 14:03:13.470347  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9f6e0dde-6328-4613-b2e7-684e250fb700","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:03:13.470381  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:03:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:03:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:03:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 14:03:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:03:14.456760  543705 disk_worker.go:494] system disk:vda1
I0320 14:03:14.456791  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:03:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:03:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:03:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:03:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:03:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:03:23.409808  543705 memory.go:184] no items to output this cycle
I0320 14:03:23.409823  543705 cpu.go:275] no items to output this cycle
I0320 14:03:24.617670  543705 disk_info.go:125] begin check local disk info of client
I0320 14:03:24.620109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:03:24.620114  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a180 0xc00048a1c0]
E0320 14:03:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:03:33.409789  543705 memory.go:184] no items to output this cycle
I0320 14:03:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 14:03:38.502291  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:03:38.502298  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:03:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:03:43.410724  543705 memory.go:191] Add success.
I0320 14:03:43.409820  543705 cpu.go:282] Add success.
I0320 14:03:43.420469  543705 net.go:648] Add success.
I0320 14:03:43.423242  543705 net.go:770] primary dev: ETH0
I0320 14:03:43.423255  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:03:43.423267  543705 net.go:698] Add success.
I0320 14:03:46.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:03:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:03:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:03:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:03:53.409804  543705 memory.go:184] no items to output this cycle
I0320 14:03:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:04:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:04:03.409776  543705 memory.go:184] no items to output this cycle
I0320 14:04:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 14:04:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:04:13.409809  543705 memory.go:191] Add success.
I0320 14:04:13.409821  543705 cpu.go:282] Add success.
W0320 14:04:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:04:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:04:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:04:13.420155  543705 net.go:648] Add success.
I0320 14:04:13.422917  543705 net.go:770] primary dev: ETH0
I0320 14:04:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:04:13.422945  543705 net.go:698] Add success.
I0320 14:04:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:04:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:04:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 14:04:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:04:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 14:04:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:04:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:04:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:04:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:04:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:04:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 14:04:23.409780  543705 memory.go:184] no items to output this cycle
I0320 14:04:24.620733  543705 disk_info.go:125] begin check local disk info of client
I0320 14:04:24.623153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:04:24.623159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386c80 0xc000386cc0]
E0320 14:04:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:04:33.409785  543705 memory.go:184] no items to output this cycle
I0320 14:04:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 14:04:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:04:43.409909  543705 memory.go:191] Add success.
I0320 14:04:43.409944  543705 cpu.go:282] Add success.
I0320 14:04:43.419740  543705 net.go:648] Add success.
I0320 14:04:43.422973  543705 net.go:770] primary dev: ETH0
I0320 14:04:43.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:04:43.422998  543705 net.go:698] Add success.
I0320 14:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:04:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:04:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:04:53.409801  543705 memory.go:184] no items to output this cycle
I0320 14:04:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 14:05:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:05:03.409783  543705 cpu.go:275] no items to output this cycle
I0320 14:05:03.409790  543705 memory.go:184] no items to output this cycle
E0320 14:05:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:05:13.409779  543705 memory.go:191] Add success.
I0320 14:05:13.409798  543705 cpu.go:282] Add success.
W0320 14:05:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:05:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:05:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:05:13.420138  543705 net.go:648] Add success.
I0320 14:05:13.423265  543705 net.go:770] primary dev: ETH0
I0320 14:05:13.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:05:13.423293  543705 net.go:698] Add success.
I0320 14:05:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:05:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:05:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 14:05:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:05:14.456476  543705 disk_worker.go:494] system disk:vda1
I0320 14:05:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:05:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:05:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:05:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:05:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:05:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:05:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 14:05:23.409785  543705 memory.go:184] no items to output this cycle
I0320 14:05:24.623748  543705 disk_info.go:125] begin check local disk info of client
I0320 14:05:24.626236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:05:24.626242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331980 0xc0003319c0]
E0320 14:05:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:05:33.409803  543705 memory.go:184] no items to output this cycle
I0320 14:05:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 14:05:43.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:05:43.409892  543705 memory.go:191] Add success.
I0320 14:05:43.409894  543705 cpu.go:282] Add success.
I0320 14:05:43.419723  543705 net.go:648] Add success.
I0320 14:05:43.422991  543705 net.go:770] primary dev: ETH0
I0320 14:05:43.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:05:43.423014  543705 net.go:698] Add success.
I0320 14:05:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:05:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:05:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:05:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:05:53.409780  543705 memory.go:184] no items to output this cycle
I0320 14:05:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 14:06:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:06:03.409789  543705 memory.go:184] no items to output this cycle
I0320 14:06:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 14:06:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:06:13.409806  543705 memory.go:191] Add success.
I0320 14:06:13.409812  543705 cpu.go:282] Add success.
W0320 14:06:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:06:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:06:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:06:13.420063  543705 net.go:648] Add success.
I0320 14:06:13.423097  543705 net.go:770] primary dev: ETH0
I0320 14:06:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:06:13.423122  543705 net.go:698] Add success.
I0320 14:06:13.469119  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"063fb934-58de-44bd-96ac-3104517b4175","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:06:13.469151  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:06:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:06:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:06:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 14:06:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:06:14.456598  543705 disk_worker.go:494] system disk:vda1
I0320 14:06:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:06:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:06:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:06:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:06:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:06:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:06:23.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:06:23.410260  543705 memory.go:184] no items to output this cycle
I0320 14:06:23.410288  543705 cpu.go:275] no items to output this cycle
I0320 14:06:24.626764  543705 disk_info.go:125] begin check local disk info of client
I0320 14:06:24.629200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:06:24.629206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d40 0xc0000c5d80]
E0320 14:06:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:06:33.409816  543705 memory.go:184] no items to output this cycle
I0320 14:06:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 14:06:38.503265  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:06:38.503271  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:06:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:06:43.410654  543705 memory.go:191] Add success.
I0320 14:06:43.409808  543705 cpu.go:282] Add success.
I0320 14:06:43.420353  543705 net.go:648] Add success.
I0320 14:06:43.422927  543705 net.go:770] primary dev: ETH0
I0320 14:06:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:06:43.422954  543705 net.go:698] Add success.
I0320 14:06:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:06:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:06:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:06:53.409777  543705 memory.go:184] no items to output this cycle
I0320 14:06:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 14:07:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:07:03.409803  543705 memory.go:184] no items to output this cycle
I0320 14:07:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:07:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:07:13.409804  543705 memory.go:191] Add success.
I0320 14:07:13.409815  543705 cpu.go:282] Add success.
W0320 14:07:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:07:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:07:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:07:13.420068  543705 net.go:648] Add success.
I0320 14:07:13.422719  543705 net.go:770] primary dev: ETH0
I0320 14:07:13.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:07:13.422744  543705 net.go:698] Add success.
I0320 14:07:13.453367  543705 event_worker.go:152] Polling the log file for events...
W0320 14:07:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:07:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0320 14:07:14.455232  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:07:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:07:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:07:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:07:14.456812  543705 disk_worker.go:494] system disk:vda1
I0320 14:07:14.456841  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:07:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:07:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:07:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:07:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:07:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:07:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:07:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:07:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:07:23.409774  543705 memory.go:184] no items to output this cycle
I0320 14:07:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 14:07:24.629680  543705 disk_info.go:125] begin check local disk info of client
I0320 14:07:24.632071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:07:24.632077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329780 0xc0003297c0]
E0320 14:07:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:07:33.409787  543705 memory.go:184] no items to output this cycle
I0320 14:07:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 14:07:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:07:43.409789  543705 memory.go:191] Add success.
I0320 14:07:43.409819  543705 cpu.go:282] Add success.
I0320 14:07:43.419968  543705 net.go:648] Add success.
I0320 14:07:43.422753  543705 net.go:770] primary dev: ETH0
I0320 14:07:43.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:07:43.422778  543705 net.go:698] Add success.
I0320 14:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:07:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:07:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:07:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:07:53.409806  543705 memory.go:184] no items to output this cycle
I0320 14:07:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:08:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:08:03.409784  543705 memory.go:184] no items to output this cycle
I0320 14:08:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 14:08:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:08:13.409803  543705 memory.go:191] Add success.
I0320 14:08:13.409804  543705 cpu.go:282] Add success.
W0320 14:08:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:08:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:08:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:08:13.420134  543705 net.go:648] Add success.
I0320 14:08:13.423558  543705 net.go:770] primary dev: ETH0
I0320 14:08:13.423574  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:08:13.423588  543705 net.go:698] Add success.
I0320 14:08:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:08:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:08:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 14:08:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:08:14.456504  543705 disk_worker.go:494] system disk:vda1
I0320 14:08:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:08:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:08:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:08:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:08:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:08:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:08:23.409796  543705 memory.go:184] no items to output this cycle
I0320 14:08:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 14:08:24.632801  543705 disk_info.go:125] begin check local disk info of client
I0320 14:08:24.635235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:08:24.635241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296480 0xc0002964c0]
E0320 14:08:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:08:33.409821  543705 memory.go:184] no items to output this cycle
I0320 14:08:33.409831  543705 cpu.go:275] no items to output this cycle
E0320 14:08:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:08:43.409834  543705 memory.go:191] Add success.
I0320 14:08:43.409843  543705 cpu.go:282] Add success.
I0320 14:08:43.420003  543705 net.go:648] Add success.
I0320 14:08:43.423333  543705 net.go:770] primary dev: ETH0
I0320 14:08:43.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:08:43.423362  543705 net.go:698] Add success.
I0320 14:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:08:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:08:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:08:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:08:53.409818  543705 memory.go:184] no items to output this cycle
I0320 14:08:53.409827  543705 cpu.go:275] no items to output this cycle
E0320 14:09:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:09:03.409785  543705 memory.go:184] no items to output this cycle
I0320 14:09:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 14:09:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:09:13.409809  543705 memory.go:191] Add success.
I0320 14:09:13.409817  543705 cpu.go:282] Add success.
W0320 14:09:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:09:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:09:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:09:13.420139  543705 net.go:648] Add success.
I0320 14:09:13.423044  543705 net.go:770] primary dev: ETH0
I0320 14:09:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:09:13.423070  543705 net.go:698] Add success.
I0320 14:09:13.471158  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cbf034a6-653a-4a32-bc5f-0d789ef92578","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:09:13.471192  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:09:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:09:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 14:09:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:09:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 14:09:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:09:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:09:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:09:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:09:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:09:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:09:23.409770  543705 memory.go:184] no items to output this cycle
I0320 14:09:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 14:09:24.635322  543705 disk_info.go:125] begin check local disk info of client
I0320 14:09:24.637863  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:09:24.637869  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481600 0xc000481640]
E0320 14:09:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:09:33.409880  543705 memory.go:184] no items to output this cycle
I0320 14:09:33.409957  543705 cpu.go:275] no items to output this cycle
I0320 14:09:38.504268  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:09:38.504274  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:09:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:09:43.410604  543705 memory.go:191] Add success.
I0320 14:09:43.409838  543705 cpu.go:282] Add success.
I0320 14:09:43.420297  543705 net.go:648] Add success.
I0320 14:09:43.422959  543705 net.go:770] primary dev: ETH0
I0320 14:09:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:09:43.422985  543705 net.go:698] Add success.
I0320 14:09:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:09:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:09:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:09:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:09:53.409798  543705 memory.go:184] no items to output this cycle
I0320 14:09:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 14:10:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:10:03.409771  543705 memory.go:184] no items to output this cycle
I0320 14:10:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 14:10:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:10:13.409807  543705 memory.go:191] Add success.
I0320 14:10:13.409817  543705 cpu.go:282] Add success.
W0320 14:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:10:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:10:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:10:13.420056  543705 net.go:648] Add success.
I0320 14:10:13.423092  543705 net.go:770] primary dev: ETH0
I0320 14:10:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:10:13.423118  543705 net.go:698] Add success.
I0320 14:10:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:10:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:10:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 14:10:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:10:14.456491  543705 disk_worker.go:494] system disk:vda1
I0320 14:10:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:10:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:10:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:10:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:10:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:10:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:10:23.409780  543705 memory.go:184] no items to output this cycle
I0320 14:10:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 14:10:24.638832  543705 disk_info.go:125] begin check local disk info of client
I0320 14:10:24.641291  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:10:24.641297  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbb00 0xc0001fbb40]
E0320 14:10:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:10:33.409808  543705 memory.go:184] no items to output this cycle
I0320 14:10:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 14:10:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:10:43.409803  543705 memory.go:191] Add success.
I0320 14:10:43.409805  543705 cpu.go:282] Add success.
I0320 14:10:43.420004  543705 net.go:648] Add success.
I0320 14:10:43.422599  543705 net.go:770] primary dev: ETH0
I0320 14:10:43.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:10:43.422628  543705 net.go:698] Add success.
I0320 14:10:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:10:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:10:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:10:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:10:53.409766  543705 memory.go:184] no items to output this cycle
I0320 14:10:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 14:11:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:11:03.409777  543705 memory.go:184] no items to output this cycle
I0320 14:11:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 14:11:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:11:13.409780  543705 memory.go:191] Add success.
W0320 14:11:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 14:11:13.409813  543705 cpu.go:282] Add success.
W0320 14:11:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:11:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:11:13.420184  543705 net.go:648] Add success.
I0320 14:11:13.422888  543705 net.go:770] primary dev: ETH0
I0320 14:11:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:11:13.422916  543705 net.go:698] Add success.
I0320 14:11:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:11:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:11:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 14:11:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:11:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 14:11:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:11:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:11:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:11:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:11:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:11:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:11:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:11:23.409767  543705 memory.go:184] no items to output this cycle
I0320 14:11:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 14:11:24.641674  543705 disk_info.go:125] begin check local disk info of client
I0320 14:11:24.644142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:11:24.644148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002791c0 0xc000279200]
E0320 14:11:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:11:33.409782  543705 memory.go:184] no items to output this cycle
I0320 14:11:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 14:11:43.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:11:43.409899  543705 memory.go:191] Add success.
I0320 14:11:43.410045  543705 cpu.go:282] Add success.
I0320 14:11:43.419710  543705 net.go:648] Add success.
I0320 14:11:43.422715  543705 net.go:770] primary dev: ETH0
I0320 14:11:43.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:11:43.422739  543705 net.go:698] Add success.
I0320 14:11:46.458156  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:11:46.458229  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:11:46.458259  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:11:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:11:53.409773  543705 memory.go:184] no items to output this cycle
I0320 14:11:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 14:12:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:12:03.409776  543705 memory.go:184] no items to output this cycle
I0320 14:12:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 14:12:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:12:13.409814  543705 memory.go:191] Add success.
I0320 14:12:13.409828  543705 cpu.go:282] Add success.
W0320 14:12:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:12:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:12:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:12:13.420183  543705 net.go:648] Add success.
I0320 14:12:13.423269  543705 net.go:770] primary dev: ETH0
I0320 14:12:13.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:12:13.423295  543705 net.go:698] Add success.
I0320 14:12:13.567725  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3a4a1e0e-b70b-4cb9-bd63-fe07be62e10b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:12:13.567760  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 14:12:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:12:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 14:12:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:12:14.456866  543705 disk_worker.go:494] system disk:vda1
I0320 14:12:14.456905  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:12:14.457117  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:12:14.457125  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:12:14.457129  543705 custom_config.go:64] query custom config with name: gpu
E0320 14:12:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:12:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:12:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:12:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:12:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:12:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:12:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:12:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:12:23.409808  543705 memory.go:184] no items to output this cycle
I0320 14:12:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 14:12:24.644230  543705 disk_info.go:125] begin check local disk info of client
I0320 14:12:24.646733  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:12:24.646738  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024bcc0 0xc00024bd00]
E0320 14:12:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:12:33.409790  543705 memory.go:184] no items to output this cycle
I0320 14:12:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 14:12:38.505283  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:12:38.505290  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:12:43.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:12:43.410789  543705 memory.go:191] Add success.
I0320 14:12:43.409964  543705 cpu.go:282] Add success.
I0320 14:12:43.419757  543705 net.go:648] Add success.
I0320 14:12:43.422578  543705 net.go:770] primary dev: ETH0
I0320 14:12:43.422593  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:12:43.422608  543705 net.go:698] Add success.
I0320 14:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:12:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:12:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:12:53.409798  543705 memory.go:184] no items to output this cycle
I0320 14:12:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 14:13:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:13:03.409765  543705 memory.go:184] no items to output this cycle
I0320 14:13:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 14:13:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:13:13.409810  543705 memory.go:191] Add success.
I0320 14:13:13.409813  543705 cpu.go:282] Add success.
W0320 14:13:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:13:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:13:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:13:13.420531  543705 net.go:648] Add success.
I0320 14:13:13.423109  543705 net.go:770] primary dev: ETH0
I0320 14:13:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:13:13.423138  543705 net.go:698] Add success.
I0320 14:13:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:13:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:13:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0320 14:13:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:13:14.456617  543705 disk_worker.go:494] system disk:vda1
I0320 14:13:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:13:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:13:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:13:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:13:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:13:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:13:23.409803  543705 memory.go:184] no items to output this cycle
I0320 14:13:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 14:13:24.647876  543705 disk_info.go:125] begin check local disk info of client
I0320 14:13:24.650497  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:13:24.650504  543705 disk_info.go:196] parse disk info done, disk is : [0xc000216940 0xc000216980]
E0320 14:13:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:13:33.409789  543705 memory.go:184] no items to output this cycle
I0320 14:13:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 14:13:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:13:43.409790  543705 memory.go:191] Add success.
I0320 14:13:43.409793  543705 cpu.go:282] Add success.
I0320 14:13:43.419723  543705 net.go:648] Add success.
I0320 14:13:43.422506  543705 net.go:770] primary dev: ETH0
I0320 14:13:43.422519  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:13:43.422530  543705 net.go:698] Add success.
I0320 14:13:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:13:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:13:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:13:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:13:53.409777  543705 memory.go:184] no items to output this cycle
I0320 14:13:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 14:14:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:14:03.409771  543705 memory.go:184] no items to output this cycle
I0320 14:14:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 14:14:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:14:13.409807  543705 memory.go:191] Add success.
I0320 14:14:13.409810  543705 cpu.go:282] Add success.
W0320 14:14:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:14:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:14:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:14:13.420053  543705 net.go:648] Add success.
I0320 14:14:13.423411  543705 net.go:770] primary dev: ETH0
I0320 14:14:13.423423  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:14:13.423436  543705 net.go:698] Add success.
I0320 14:14:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:14:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:14:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 14:14:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:14:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 14:14:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:14:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:14:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:14:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:14:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:14:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:14:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:14:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 14:14:23.409786  543705 memory.go:184] no items to output this cycle
I0320 14:14:24.650883  543705 disk_info.go:125] begin check local disk info of client
I0320 14:14:24.653346  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:14:24.653353  543705 disk_info.go:196] parse disk info done, disk is : [0xc000241a40 0xc000241a80]
E0320 14:14:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:14:33.409781  543705 memory.go:184] no items to output this cycle
I0320 14:14:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 14:14:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:14:43.409808  543705 memory.go:191] Add success.
I0320 14:14:43.409814  543705 cpu.go:282] Add success.
I0320 14:14:43.420042  543705 net.go:648] Add success.
I0320 14:14:43.423295  543705 net.go:770] primary dev: ETH0
I0320 14:14:43.423308  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:14:43.423320  543705 net.go:698] Add success.
I0320 14:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:14:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:14:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:14:53.409769  543705 memory.go:184] no items to output this cycle
I0320 14:14:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 14:15:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:15:03.409781  543705 memory.go:184] no items to output this cycle
I0320 14:15:03.409784  543705 cpu.go:275] no items to output this cycle
E0320 14:15:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:15:13.409771  543705 memory.go:191] Add success.
W0320 14:15:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 14:15:13.409804  543705 cpu.go:282] Add success.
W0320 14:15:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:15:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:15:13.420122  543705 net.go:648] Add success.
I0320 14:15:13.423179  543705 net.go:770] primary dev: ETH0
I0320 14:15:13.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:15:13.423204  543705 net.go:698] Add success.
I0320 14:15:13.468322  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9576cbae-fb28-4970-b688-d45577f5d98d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:15:13.468357  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:15:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:15:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:15:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 14:15:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:15:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 14:15:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:15:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:15:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:15:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:15:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:15:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:15:23.409773  543705 memory.go:184] no items to output this cycle
I0320 14:15:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 14:15:24.653672  543705 disk_info.go:125] begin check local disk info of client
I0320 14:15:24.656164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:15:24.656170  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bac0 0xc00007bb00]
E0320 14:15:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:15:33.409814  543705 memory.go:184] no items to output this cycle
I0320 14:15:33.409827  543705 cpu.go:275] no items to output this cycle
I0320 14:15:38.506276  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:15:38.506296  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:15:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:15:43.410728  543705 memory.go:191] Add success.
I0320 14:15:43.409841  543705 cpu.go:282] Add success.
I0320 14:15:43.420430  543705 net.go:648] Add success.
I0320 14:15:43.423311  543705 net.go:770] primary dev: ETH0
I0320 14:15:43.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:15:43.423337  543705 net.go:698] Add success.
I0320 14:15:46.458013  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:15:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:15:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:15:53.410554  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:15:53.410568  543705 cpu.go:275] no items to output this cycle
I0320 14:15:53.410573  543705 memory.go:184] no items to output this cycle
E0320 14:16:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:16:03.409773  543705 memory.go:184] no items to output this cycle
I0320 14:16:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 14:16:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:16:13.409793  543705 memory.go:191] Add success.
I0320 14:16:13.409793  543705 cpu.go:282] Add success.
W0320 14:16:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:16:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:16:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:16:13.420180  543705 net.go:648] Add success.
I0320 14:16:13.422943  543705 net.go:770] primary dev: ETH0
I0320 14:16:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:16:13.422969  543705 net.go:698] Add success.
I0320 14:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:16:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:16:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 14:16:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:16:14.456513  543705 disk_worker.go:494] system disk:vda1
I0320 14:16:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:16:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:16:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:16:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:16:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:16:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:16:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:16:23.409771  543705 memory.go:184] no items to output this cycle
I0320 14:16:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 14:16:24.656250  543705 disk_info.go:125] begin check local disk info of client
I0320 14:16:24.658814  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:16:24.658820  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331240 0xc000331280]
E0320 14:16:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:16:33.409794  543705 memory.go:184] no items to output this cycle
I0320 14:16:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 14:16:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:16:43.409778  543705 memory.go:191] Add success.
I0320 14:16:43.409800  543705 cpu.go:282] Add success.
I0320 14:16:43.420089  543705 net.go:648] Add success.
I0320 14:16:43.422961  543705 net.go:770] primary dev: ETH0
I0320 14:16:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:16:43.422987  543705 net.go:698] Add success.
I0320 14:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:16:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:16:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:16:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:16:53.409804  543705 memory.go:184] no items to output this cycle
I0320 14:16:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 14:17:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:17:03.409785  543705 memory.go:184] no items to output this cycle
I0320 14:17:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 14:17:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:17:13.409779  543705 cpu.go:282] Add success.
I0320 14:17:13.409788  543705 memory.go:191] Add success.
W0320 14:17:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:17:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:17:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:17:13.420055  543705 net.go:648] Add success.
I0320 14:17:13.422846  543705 net.go:770] primary dev: ETH0
I0320 14:17:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:17:13.422870  543705 net.go:698] Add success.
I0320 14:17:13.453392  543705 event_worker.go:152] Polling the log file for events...
W0320 14:17:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:17:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 14:17:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:17:14.455877  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:17:14.455886  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:17:14.455892  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:17:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 14:17:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:17:15.456942  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:17:15.456955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:17:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:17:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:17:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:17:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:17:16.472506  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:17:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:17:23.409796  543705 memory.go:184] no items to output this cycle
I0320 14:17:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 14:17:24.659929  543705 disk_info.go:125] begin check local disk info of client
I0320 14:17:24.662488  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:17:24.662493  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5180 0xc0000c51c0]
E0320 14:17:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:17:33.409790  543705 memory.go:184] no items to output this cycle
I0320 14:17:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:17:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:17:43.409814  543705 memory.go:191] Add success.
I0320 14:17:43.409824  543705 cpu.go:282] Add success.
I0320 14:17:43.419907  543705 net.go:648] Add success.
I0320 14:17:43.422648  543705 net.go:770] primary dev: ETH0
I0320 14:17:43.422660  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:17:43.422672  543705 net.go:698] Add success.
I0320 14:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:17:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:17:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:17:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:17:53.409787  543705 memory.go:184] no items to output this cycle
I0320 14:17:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 14:18:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:18:03.409763  543705 memory.go:184] no items to output this cycle
I0320 14:18:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 14:18:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:18:13.409815  543705 memory.go:191] Add success.
I0320 14:18:13.409822  543705 cpu.go:282] Add success.
W0320 14:18:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:18:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:18:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:18:13.420298  543705 net.go:648] Add success.
I0320 14:18:13.423099  543705 net.go:770] primary dev: ETH0
I0320 14:18:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:18:13.423124  543705 net.go:698] Add success.
I0320 14:18:13.469502  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"51a6d140-63d8-49d1-abfc-36379f5ed4f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:18:13.469534  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:18:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:18:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:18:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 14:18:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:18:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 14:18:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:18:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:18:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:18:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:18:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:18:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:18:23.409783  543705 memory.go:184] no items to output this cycle
I0320 14:18:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 14:18:24.662945  543705 disk_info.go:125] begin check local disk info of client
I0320 14:18:24.665382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:18:24.665388  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003288c0 0xc000328900]
E0320 14:18:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:18:33.409814  543705 memory.go:184] no items to output this cycle
I0320 14:18:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 14:18:38.507283  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:18:38.507290  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:18:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:18:43.410580  543705 memory.go:191] Add success.
I0320 14:18:43.409839  543705 cpu.go:282] Add success.
I0320 14:18:43.420250  543705 net.go:648] Add success.
I0320 14:18:43.422885  543705 net.go:770] primary dev: ETH0
I0320 14:18:43.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:18:43.422910  543705 net.go:698] Add success.
I0320 14:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:18:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:18:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:18:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:18:53.409790  543705 memory.go:184] no items to output this cycle
I0320 14:18:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 14:19:03.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:19:03.409884  543705 memory.go:184] no items to output this cycle
I0320 14:19:03.409949  543705 cpu.go:275] no items to output this cycle
E0320 14:19:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:19:13.409835  543705 memory.go:191] Add success.
I0320 14:19:13.409839  543705 cpu.go:282] Add success.
W0320 14:19:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:19:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:19:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:19:13.420161  543705 net.go:648] Add success.
I0320 14:19:13.423359  543705 net.go:770] primary dev: ETH0
I0320 14:19:13.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:19:13.423383  543705 net.go:698] Add success.
I0320 14:19:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:19:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:19:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 14:19:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:19:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 14:19:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:19:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:19:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:19:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:19:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:19:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:19:23.409779  543705 memory.go:184] no items to output this cycle
I0320 14:19:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 14:19:24.665671  543705 disk_info.go:125] begin check local disk info of client
I0320 14:19:24.668113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:19:24.668119  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003318c0 0xc000331900]
E0320 14:19:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:19:33.409807  543705 memory.go:184] no items to output this cycle
I0320 14:19:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:19:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:19:43.409789  543705 memory.go:191] Add success.
I0320 14:19:43.409817  543705 cpu.go:282] Add success.
I0320 14:19:43.419886  543705 net.go:648] Add success.
I0320 14:19:43.422958  543705 net.go:770] primary dev: ETH0
I0320 14:19:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:19:43.422983  543705 net.go:698] Add success.
I0320 14:19:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:19:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:19:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:19:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:19:53.409792  543705 memory.go:184] no items to output this cycle
I0320 14:19:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:20:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:20:03.409772  543705 memory.go:184] no items to output this cycle
I0320 14:20:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 14:20:13.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:20:13.409907  543705 memory.go:191] Add success.
I0320 14:20:13.409981  543705 cpu.go:282] Add success.
W0320 14:20:13.410025  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:20:13.410047  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:20:13.410052  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:20:13.419754  543705 net.go:648] Add success.
I0320 14:20:13.422548  543705 net.go:770] primary dev: ETH0
I0320 14:20:13.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:20:13.422577  543705 net.go:698] Add success.
I0320 14:20:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:20:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:20:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 14:20:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:20:14.456471  543705 disk_worker.go:494] system disk:vda1
I0320 14:20:14.456513  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:20:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:20:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:20:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:20:23.409766  543705 memory.go:184] no items to output this cycle
I0320 14:20:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 14:20:24.668984  543705 disk_info.go:125] begin check local disk info of client
I0320 14:20:24.671420  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:20:24.671425  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a22c0 0xc0004a2300]
E0320 14:20:33.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:20:33.409826  543705 memory.go:184] no items to output this cycle
I0320 14:20:33.409834  543705 cpu.go:275] no items to output this cycle
E0320 14:20:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:20:43.409825  543705 memory.go:191] Add success.
I0320 14:20:43.409832  543705 cpu.go:282] Add success.
I0320 14:20:43.420002  543705 net.go:648] Add success.
I0320 14:20:43.422614  543705 net.go:770] primary dev: ETH0
I0320 14:20:43.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:20:43.422643  543705 net.go:698] Add success.
I0320 14:20:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:20:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:20:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:20:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:20:53.409805  543705 memory.go:184] no items to output this cycle
I0320 14:20:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 14:21:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:21:03.409767  543705 memory.go:184] no items to output this cycle
I0320 14:21:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 14:21:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:21:13.409799  543705 memory.go:191] Add success.
I0320 14:21:13.409802  543705 cpu.go:282] Add success.
W0320 14:21:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:21:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:21:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:21:13.420433  543705 net.go:648] Add success.
I0320 14:21:13.423143  543705 net.go:770] primary dev: ETH0
I0320 14:21:13.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:21:13.423169  543705 net.go:698] Add success.
I0320 14:21:13.463180  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"120abda2-aa52-4368-b912-d60574b946d1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:21:13.463215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:21:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:21:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:21:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 14:21:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:21:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 14:21:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:21:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:21:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:21:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:21:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:21:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:21:23.409776  543705 memory.go:184] no items to output this cycle
I0320 14:21:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 14:21:24.671991  543705 disk_info.go:125] begin check local disk info of client
I0320 14:21:24.674491  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:21:24.674497  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faa80 0xc0001faac0]
E0320 14:21:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:21:33.409782  543705 memory.go:184] no items to output this cycle
I0320 14:21:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 14:21:38.508292  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:21:38.508298  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:21:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:21:43.410579  543705 memory.go:191] Add success.
I0320 14:21:43.409814  543705 cpu.go:282] Add success.
I0320 14:21:43.420159  543705 net.go:770] primary dev: ETH0
I0320 14:21:43.420175  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:21:43.420191  543705 net.go:698] Add success.
I0320 14:21:43.420565  543705 net.go:648] Add success.
I0320 14:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:21:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:21:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:21:53.409785  543705 memory.go:184] no items to output this cycle
I0320 14:21:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 14:22:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:22:03.409770  543705 memory.go:184] no items to output this cycle
I0320 14:22:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 14:22:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:22:13.409816  543705 memory.go:191] Add success.
I0320 14:22:13.409822  543705 cpu.go:282] Add success.
W0320 14:22:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:22:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:22:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:22:13.420463  543705 net.go:648] Add success.
I0320 14:22:13.423621  543705 net.go:770] primary dev: ETH0
I0320 14:22:13.423634  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:22:13.423645  543705 net.go:698] Add success.
W0320 14:22:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:22:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 14:22:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:22:14.456798  543705 disk_worker.go:494] system disk:vda1
I0320 14:22:14.456837  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:22:14.457158  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:22:14.457165  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:22:14.457170  543705 custom_config.go:64] query custom config with name: gpu
E0320 14:22:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:22:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:22:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:22:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:22:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:22:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:22:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:22:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:22:23.409801  543705 memory.go:184] no items to output this cycle
I0320 14:22:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 14:22:24.675011  543705 disk_info.go:125] begin check local disk info of client
I0320 14:22:24.677496  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:22:24.677503  543705 disk_info.go:196] parse disk info done, disk is : [0xc00054a840 0xc00054a880]
E0320 14:22:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:22:33.409791  543705 memory.go:184] no items to output this cycle
I0320 14:22:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 14:22:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:22:43.409809  543705 memory.go:191] Add success.
I0320 14:22:43.409819  543705 cpu.go:282] Add success.
I0320 14:22:43.419892  543705 net.go:648] Add success.
I0320 14:22:43.422663  543705 net.go:770] primary dev: ETH0
I0320 14:22:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:22:43.422688  543705 net.go:698] Add success.
I0320 14:22:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:22:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:22:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:22:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:22:53.409805  543705 memory.go:184] no items to output this cycle
I0320 14:22:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 14:23:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:23:03.409776  543705 memory.go:184] no items to output this cycle
I0320 14:23:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 14:23:13.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:23:13.409924  543705 memory.go:191] Add success.
I0320 14:23:13.409937  543705 cpu.go:282] Add success.
W0320 14:23:13.409967  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:23:13.409985  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:23:13.409990  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:23:13.419741  543705 net.go:648] Add success.
I0320 14:23:13.422884  543705 net.go:770] primary dev: ETH0
I0320 14:23:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:23:13.422913  543705 net.go:698] Add success.
I0320 14:23:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:23:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:23:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 14:23:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:23:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 14:23:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:23:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:23:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:23:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:23:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:23:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:23:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:23:23.409776  543705 memory.go:184] no items to output this cycle
I0320 14:23:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 14:23:24.677673  543705 disk_info.go:125] begin check local disk info of client
I0320 14:23:24.680125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:23:24.680131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6280 0xc0003b62c0]
E0320 14:23:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:23:33.409796  543705 memory.go:184] no items to output this cycle
I0320 14:23:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 14:23:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:23:43.409787  543705 memory.go:191] Add success.
I0320 14:23:43.409809  543705 cpu.go:282] Add success.
I0320 14:23:43.420022  543705 net.go:648] Add success.
I0320 14:23:43.422864  543705 net.go:770] primary dev: ETH0
I0320 14:23:43.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:23:43.422892  543705 net.go:698] Add success.
I0320 14:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:23:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:23:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:23:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:23:53.409790  543705 memory.go:184] no items to output this cycle
I0320 14:23:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 14:24:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:24:03.409777  543705 memory.go:184] no items to output this cycle
I0320 14:24:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 14:24:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:24:13.409881  543705 memory.go:191] Add success.
W0320 14:24:13.409912  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:24:13.409925  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:24:13.409928  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:24:13.409940  543705 cpu.go:282] Add success.
I0320 14:24:13.419703  543705 net.go:648] Add success.
I0320 14:24:13.422181  543705 net.go:770] primary dev: ETH0
I0320 14:24:13.422195  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:24:13.422206  543705 net.go:698] Add success.
I0320 14:24:13.463844  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1ae20273-7fcb-44da-8efc-7924bdbf156e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:24:13.463877  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:24:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:24:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:24:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 14:24:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:24:14.456597  543705 disk_worker.go:494] system disk:vda1
I0320 14:24:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:24:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:24:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:24:23.409803  543705 memory.go:184] no items to output this cycle
I0320 14:24:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 14:24:24.681030  543705 disk_info.go:125] begin check local disk info of client
I0320 14:24:24.683546  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:24:24.683551  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e580 0xc00028e5c0]
E0320 14:24:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:24:33.409796  543705 memory.go:184] no items to output this cycle
I0320 14:24:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 14:24:38.509290  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:24:38.509297  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:24:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:24:43.410579  543705 memory.go:191] Add success.
I0320 14:24:43.409815  543705 cpu.go:282] Add success.
I0320 14:24:43.420344  543705 net.go:648] Add success.
I0320 14:24:43.423153  543705 net.go:770] primary dev: ETH0
I0320 14:24:43.423167  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:24:43.423180  543705 net.go:698] Add success.
I0320 14:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:24:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:24:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:24:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:24:53.409800  543705 memory.go:184] no items to output this cycle
I0320 14:24:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 14:25:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:25:03.409779  543705 memory.go:184] no items to output this cycle
I0320 14:25:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 14:25:13.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:25:13.409908  543705 memory.go:191] Add success.
W0320 14:25:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 14:25:13.409981  543705 cpu.go:282] Add success.
W0320 14:25:13.409984  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:25:13.409989  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:25:13.419730  543705 net.go:648] Add success.
I0320 14:25:13.422498  543705 net.go:770] primary dev: ETH0
I0320 14:25:13.422511  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:25:13.422522  543705 net.go:698] Add success.
I0320 14:25:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:25:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:25:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 14:25:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:25:14.456521  543705 disk_worker.go:494] system disk:vda1
I0320 14:25:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:25:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:25:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:25:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:25:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:25:23.409777  543705 memory.go:184] no items to output this cycle
I0320 14:25:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 14:25:24.683631  543705 disk_info.go:125] begin check local disk info of client
I0320 14:25:24.686126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:25:24.686132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331880 0xc0003318c0]
E0320 14:25:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:25:33.409784  543705 memory.go:184] no items to output this cycle
I0320 14:25:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 14:25:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:25:43.409792  543705 memory.go:191] Add success.
I0320 14:25:43.409813  543705 cpu.go:282] Add success.
I0320 14:25:43.419903  543705 net.go:648] Add success.
I0320 14:25:43.422591  543705 net.go:770] primary dev: ETH0
I0320 14:25:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:25:43.422620  543705 net.go:698] Add success.
I0320 14:25:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:25:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:25:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:25:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:25:53.409805  543705 memory.go:184] no items to output this cycle
I0320 14:25:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:26:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:26:03.409779  543705 memory.go:184] no items to output this cycle
I0320 14:26:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 14:26:13.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:26:13.409933  543705 memory.go:191] Add success.
I0320 14:26:13.409937  543705 cpu.go:282] Add success.
W0320 14:26:13.409976  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:26:13.409995  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:26:13.409999  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:26:13.419727  543705 net.go:648] Add success.
I0320 14:26:13.422668  543705 net.go:770] primary dev: ETH0
I0320 14:26:13.422683  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:26:13.422697  543705 net.go:698] Add success.
I0320 14:26:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:26:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:26:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 14:26:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:26:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 14:26:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:26:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:26:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:26:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:26:23.409803  543705 memory.go:184] no items to output this cycle
I0320 14:26:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 14:26:24.686214  543705 disk_info.go:125] begin check local disk info of client
I0320 14:26:24.688793  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:26:24.688799  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513980 0xc000513a00]
E0320 14:26:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:26:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 14:26:33.409800  543705 memory.go:184] no items to output this cycle
E0320 14:26:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:26:43.409808  543705 memory.go:191] Add success.
I0320 14:26:43.409809  543705 cpu.go:282] Add success.
I0320 14:26:43.419878  543705 net.go:648] Add success.
I0320 14:26:43.423298  543705 net.go:770] primary dev: ETH0
I0320 14:26:43.423314  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:26:43.423328  543705 net.go:698] Add success.
I0320 14:26:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:26:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:26:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:26:53.409780  543705 memory.go:184] no items to output this cycle
I0320 14:26:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 14:27:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:27:03.409775  543705 memory.go:184] no items to output this cycle
I0320 14:27:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 14:27:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:27:13.409802  543705 cpu.go:282] Add success.
I0320 14:27:13.409806  543705 memory.go:191] Add success.
W0320 14:27:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:27:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:27:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:27:13.420116  543705 net.go:648] Add success.
I0320 14:27:13.422963  543705 net.go:770] primary dev: ETH0
I0320 14:27:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:27:13.422994  543705 net.go:698] Add success.
I0320 14:27:13.429348  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 14:27:13.453528  543705 event_worker.go:152] Polling the log file for events...
I0320 14:27:13.469775  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84fe879e-e9d4-4eb4-89a1-f7f8c32a9bc9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:27:13.469810  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 14:27:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:27:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 14:27:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:27:14.456793  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:27:14.456802  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:27:14.456807  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:27:14.456849  543705 disk_worker.go:494] system disk:vda1
I0320 14:27:14.456876  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:27:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:27:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:27:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:27:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:27:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:27:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:27:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:27:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:27:23.409768  543705 memory.go:184] no items to output this cycle
I0320 14:27:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 14:27:24.689672  543705 disk_info.go:125] begin check local disk info of client
I0320 14:27:24.692095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:27:24.692100  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513b40 0xc000513b80]
E0320 14:27:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:27:33.409813  543705 memory.go:184] no items to output this cycle
I0320 14:27:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 14:27:38.510317  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:27:38.510324  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:27:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:27:43.410711  543705 memory.go:191] Add success.
I0320 14:27:43.409815  543705 cpu.go:282] Add success.
I0320 14:27:43.420483  543705 net.go:648] Add success.
I0320 14:27:43.423110  543705 net.go:770] primary dev: ETH0
I0320 14:27:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:27:43.423134  543705 net.go:698] Add success.
I0320 14:27:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:27:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:27:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:27:53.410397  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:27:53.410415  543705 memory.go:184] no items to output this cycle
I0320 14:27:53.410427  543705 cpu.go:275] no items to output this cycle
E0320 14:28:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:28:03.409779  543705 memory.go:184] no items to output this cycle
I0320 14:28:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 14:28:13.409971  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:28:13.410058  543705 memory.go:191] Add success.
I0320 14:28:13.410085  543705 cpu.go:282] Add success.
W0320 14:28:13.410093  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:28:13.410115  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:28:13.410119  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:28:13.419750  543705 net.go:648] Add success.
I0320 14:28:13.422494  543705 net.go:770] primary dev: ETH0
I0320 14:28:13.422509  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:28:13.422522  543705 net.go:698] Add success.
I0320 14:28:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:28:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:28:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 14:28:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:28:14.456562  543705 disk_worker.go:494] system disk:vda1
I0320 14:28:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:28:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:28:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:28:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:28:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:28:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:28:23.409798  543705 memory.go:184] no items to output this cycle
I0320 14:28:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 14:28:24.693098  543705 disk_info.go:125] begin check local disk info of client
I0320 14:28:24.695542  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:28:24.695547  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5ec0 0xc0000c5f00]
E0320 14:28:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:28:33.409783  543705 memory.go:184] no items to output this cycle
I0320 14:28:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 14:28:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:28:43.409794  543705 memory.go:191] Add success.
I0320 14:28:43.409805  543705 cpu.go:282] Add success.
I0320 14:28:43.419859  543705 net.go:648] Add success.
I0320 14:28:43.423217  543705 net.go:770] primary dev: ETH0
I0320 14:28:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:28:43.423242  543705 net.go:698] Add success.
I0320 14:28:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:28:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:28:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:28:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:28:53.409774  543705 memory.go:184] no items to output this cycle
I0320 14:28:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 14:29:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:29:03.409770  543705 memory.go:184] no items to output this cycle
I0320 14:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 14:29:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:29:13.409817  543705 memory.go:191] Add success.
I0320 14:29:13.409819  543705 cpu.go:282] Add success.
W0320 14:29:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:29:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:29:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:29:13.419744  543705 net.go:648] Add success.
I0320 14:29:13.422746  543705 net.go:770] primary dev: ETH0
I0320 14:29:13.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:29:13.422770  543705 net.go:698] Add success.
I0320 14:29:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:29:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:29:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 14:29:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:29:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 14:29:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:29:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:29:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:29:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:29:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:29:23.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:29:23.410266  543705 memory.go:184] no items to output this cycle
I0320 14:29:23.410267  543705 cpu.go:275] no items to output this cycle
I0320 14:29:24.696121  543705 disk_info.go:125] begin check local disk info of client
I0320 14:29:24.698578  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:29:24.698583  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381980 0xc0003819c0]
E0320 14:29:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:29:33.409780  543705 memory.go:184] no items to output this cycle
I0320 14:29:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 14:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:29:43.409806  543705 memory.go:191] Add success.
I0320 14:29:43.409813  543705 cpu.go:282] Add success.
I0320 14:29:43.419939  543705 net.go:648] Add success.
I0320 14:29:43.422636  543705 net.go:770] primary dev: ETH0
I0320 14:29:43.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:29:43.422659  543705 net.go:698] Add success.
I0320 14:29:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:29:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:29:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:29:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:29:53.409805  543705 memory.go:184] no items to output this cycle
I0320 14:29:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:30:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:30:03.409805  543705 memory.go:184] no items to output this cycle
I0320 14:30:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:30:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:30:13.409909  543705 memory.go:191] Add success.
I0320 14:30:13.409937  543705 cpu.go:282] Add success.
W0320 14:30:13.409945  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:30:13.409959  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:30:13.409962  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:30:13.419741  543705 net.go:648] Add success.
I0320 14:30:13.422677  543705 net.go:770] primary dev: ETH0
I0320 14:30:13.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:30:13.422705  543705 net.go:698] Add success.
I0320 14:30:13.468003  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3d2ca136-df07-4192-befa-34ecf4702053","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:30:13.468042  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:30:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:30:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 14:30:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:30:14.456739  543705 disk_worker.go:494] system disk:vda1
I0320 14:30:14.456768  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:30:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:30:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:30:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:30:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:30:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:30:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:30:23.409775  543705 memory.go:184] no items to output this cycle
I0320 14:30:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 14:30:24.698663  543705 disk_info.go:125] begin check local disk info of client
I0320 14:30:24.701179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:30:24.701186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa9c0 0xc0001fb040]
E0320 14:30:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:30:33.409786  543705 memory.go:184] no items to output this cycle
I0320 14:30:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 14:30:38.511300  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:30:38.511307  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:30:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:30:43.410620  543705 memory.go:191] Add success.
I0320 14:30:43.409802  543705 cpu.go:282] Add success.
I0320 14:30:43.420316  543705 net.go:648] Add success.
I0320 14:30:43.423396  543705 net.go:770] primary dev: ETH0
I0320 14:30:43.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:30:43.423422  543705 net.go:698] Add success.
I0320 14:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:30:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:30:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:30:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:30:53.409779  543705 memory.go:184] no items to output this cycle
I0320 14:30:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 14:31:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:31:03.409772  543705 memory.go:184] no items to output this cycle
I0320 14:31:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 14:31:13.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:31:13.409919  543705 memory.go:191] Add success.
I0320 14:31:13.409924  543705 cpu.go:282] Add success.
W0320 14:31:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:31:13.409966  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:31:13.409972  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:31:13.419724  543705 net.go:648] Add success.
I0320 14:31:13.422186  543705 net.go:770] primary dev: ETH0
I0320 14:31:13.422200  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:31:13.422212  543705 net.go:698] Add success.
I0320 14:31:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:31:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:31:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 14:31:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:31:14.456545  543705 disk_worker.go:494] system disk:vda1
I0320 14:31:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:31:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:31:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:31:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:31:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:31:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:31:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:31:23.409763  543705 memory.go:184] no items to output this cycle
I0320 14:31:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 14:31:24.701668  543705 disk_info.go:125] begin check local disk info of client
I0320 14:31:24.704126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:31:24.704132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468980 0xc0004689c0]
E0320 14:31:33.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:31:33.409820  543705 cpu.go:275] no items to output this cycle
I0320 14:31:33.409836  543705 memory.go:184] no items to output this cycle
E0320 14:31:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:31:43.409801  543705 memory.go:191] Add success.
I0320 14:31:43.409824  543705 cpu.go:282] Add success.
I0320 14:31:43.419702  543705 net.go:770] primary dev: ETH0
I0320 14:31:43.419718  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:31:43.419732  543705 net.go:698] Add success.
I0320 14:31:43.420077  543705 net.go:648] Add success.
I0320 14:31:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:31:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:31:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:31:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:31:53.409816  543705 memory.go:184] no items to output this cycle
I0320 14:31:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 14:32:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:32:03.409789  543705 memory.go:184] no items to output this cycle
I0320 14:32:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 14:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:32:13.409792  543705 memory.go:191] Add success.
I0320 14:32:13.409800  543705 cpu.go:282] Add success.
W0320 14:32:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:32:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:32:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:32:13.420241  543705 net.go:648] Add success.
I0320 14:32:13.423028  543705 net.go:770] primary dev: ETH0
I0320 14:32:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:32:13.423052  543705 net.go:698] Add success.
W0320 14:32:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:32:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 14:32:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:32:14.456781  543705 disk_worker.go:494] system disk:vda1
I0320 14:32:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:32:14.457119  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:32:14.457127  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:32:14.457131  543705 custom_config.go:64] query custom config with name: gpu
E0320 14:32:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:32:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:32:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:32:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:32:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:32:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:32:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:32:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:32:23.409800  543705 memory.go:184] no items to output this cycle
I0320 14:32:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 14:32:24.704212  543705 disk_info.go:125] begin check local disk info of client
I0320 14:32:24.706692  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:32:24.706698  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004af6c0 0xc0004af700]
E0320 14:32:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:32:33.409813  543705 memory.go:184] no items to output this cycle
I0320 14:32:33.409857  543705 cpu.go:275] no items to output this cycle
E0320 14:32:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:32:43.409801  543705 memory.go:191] Add success.
I0320 14:32:43.409802  543705 cpu.go:282] Add success.
I0320 14:32:43.419879  543705 net.go:648] Add success.
I0320 14:32:43.422468  543705 net.go:770] primary dev: ETH0
I0320 14:32:43.422482  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:32:43.422498  543705 net.go:698] Add success.
I0320 14:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:32:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:32:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:32:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:32:53.409782  543705 memory.go:184] no items to output this cycle
I0320 14:32:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 14:33:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:33:03.409819  543705 memory.go:184] no items to output this cycle
I0320 14:33:03.409833  543705 cpu.go:275] no items to output this cycle
E0320 14:33:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:33:13.409789  543705 memory.go:191] Add success.
I0320 14:33:13.409808  543705 cpu.go:282] Add success.
W0320 14:33:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:33:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:33:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:33:13.420357  543705 net.go:648] Add success.
I0320 14:33:13.422973  543705 net.go:770] primary dev: ETH0
I0320 14:33:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:33:13.423002  543705 net.go:698] Add success.
I0320 14:33:13.463348  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00291b22-8ee3-48ef-8bcc-5ab25a007485","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:33:13.463378  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:33:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:33:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:33:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 14:33:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:33:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 14:33:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:33:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:33:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:33:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:33:23.409767  543705 memory.go:184] no items to output this cycle
I0320 14:33:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 14:33:24.708176  543705 disk_info.go:125] begin check local disk info of client
I0320 14:33:24.710642  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:33:24.710648  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb00 0xc0001abb40]
I0320 14:33:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:33:33.409826  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:33:33.409845  543705 memory.go:184] no items to output this cycle
I0320 14:33:38.512326  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:33:38.512333  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:33:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:33:43.410831  543705 memory.go:191] Add success.
I0320 14:33:43.409849  543705 cpu.go:282] Add success.
I0320 14:33:43.420569  543705 net.go:648] Add success.
I0320 14:33:43.423454  543705 net.go:770] primary dev: ETH0
I0320 14:33:43.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:33:43.423480  543705 net.go:698] Add success.
I0320 14:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:33:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:33:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:33:53.409817  543705 memory.go:184] no items to output this cycle
I0320 14:33:53.409824  543705 cpu.go:275] no items to output this cycle
E0320 14:34:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:34:03.409786  543705 memory.go:184] no items to output this cycle
I0320 14:34:03.409791  543705 cpu.go:275] no items to output this cycle
W0320 14:34:13.409712  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:34:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:34:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:34:13.409800  543705 cpu.go:282] Add success.
E0320 14:34:13.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:34:13.409874  543705 memory.go:191] Add success.
I0320 14:34:13.420338  543705 net.go:648] Add success.
I0320 14:34:13.423019  543705 net.go:770] primary dev: ETH0
I0320 14:34:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:34:13.423042  543705 net.go:698] Add success.
I0320 14:34:14.454942  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:34:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:34:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 14:34:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:34:14.456543  543705 disk_worker.go:494] system disk:vda1
I0320 14:34:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:34:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:34:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:34:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:34:23.409765  543705 memory.go:184] no items to output this cycle
I0320 14:34:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 14:34:24.711192  543705 disk_info.go:125] begin check local disk info of client
I0320 14:34:24.713637  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:34:24.713658  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbec0 0xc0001fbf00]
E0320 14:34:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:34:33.409779  543705 memory.go:184] no items to output this cycle
I0320 14:34:33.409841  543705 cpu.go:275] no items to output this cycle
E0320 14:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:34:43.409795  543705 memory.go:191] Add success.
I0320 14:34:43.409822  543705 cpu.go:282] Add success.
I0320 14:34:43.420075  543705 net.go:648] Add success.
I0320 14:34:43.423132  543705 net.go:770] primary dev: ETH0
I0320 14:34:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:34:43.423162  543705 net.go:698] Add success.
I0320 14:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:34:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:34:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:34:53.409808  543705 memory.go:184] no items to output this cycle
I0320 14:34:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 14:35:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:35:03.409800  543705 memory.go:184] no items to output this cycle
I0320 14:35:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 14:35:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:35:13.409807  543705 memory.go:191] Add success.
I0320 14:35:13.409808  543705 cpu.go:282] Add success.
W0320 14:35:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:35:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:35:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:35:13.420126  543705 net.go:648] Add success.
I0320 14:35:13.422830  543705 net.go:770] primary dev: ETH0
I0320 14:35:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:35:13.422853  543705 net.go:698] Add success.
I0320 14:35:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:35:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:35:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 14:35:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:35:14.459088  543705 disk_worker.go:494] system disk:vda1
I0320 14:35:14.459115  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:35:15.455919  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:35:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:35:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:35:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:35:23.409803  543705 memory.go:184] no items to output this cycle
I0320 14:35:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 14:35:24.714726  543705 disk_info.go:125] begin check local disk info of client
I0320 14:35:24.717159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:35:24.717164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000234540 0xc000234580]
E0320 14:35:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:35:33.409805  543705 memory.go:184] no items to output this cycle
I0320 14:35:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:35:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:35:43.409831  543705 memory.go:191] Add success.
I0320 14:35:43.409836  543705 cpu.go:282] Add success.
I0320 14:35:43.420004  543705 net.go:648] Add success.
I0320 14:35:43.422759  543705 net.go:770] primary dev: ETH0
I0320 14:35:43.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:35:43.422785  543705 net.go:698] Add success.
I0320 14:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:35:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:35:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:35:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:35:53.409807  543705 memory.go:184] no items to output this cycle
I0320 14:35:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:36:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:36:03.409793  543705 memory.go:184] no items to output this cycle
I0320 14:36:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 14:36:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:36:13.409792  543705 memory.go:191] Add success.
I0320 14:36:13.409791  543705 cpu.go:282] Add success.
W0320 14:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:36:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:36:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:36:13.420114  543705 net.go:648] Add success.
I0320 14:36:13.423220  543705 net.go:770] primary dev: ETH0
I0320 14:36:13.423239  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:36:13.423268  543705 net.go:698] Add success.
I0320 14:36:13.935066  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a4fc6d2e-271f-4299-afe9-cc38fe4048a6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:36:13.935099  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:36:14.454731  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:36:14.454990  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:36:14.455091  543705 disk_worker.go:708] disk space is not compliant
W0320 14:36:14.455096  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:36:14.456783  543705 disk_worker.go:494] system disk:vda1
I0320 14:36:14.456811  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:36:15.455605  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:36:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:36:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:36:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:36:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:36:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:36:23.409778  543705 memory.go:184] no items to output this cycle
I0320 14:36:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 14:36:24.717669  543705 disk_info.go:125] begin check local disk info of client
I0320 14:36:24.720101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:36:24.720106  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d9440 0xc0003d9480]
E0320 14:36:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:36:33.409794  543705 cpu.go:275] no items to output this cycle
I0320 14:36:33.409811  543705 memory.go:184] no items to output this cycle
I0320 14:36:38.513331  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:36:38.513339  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:36:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:36:43.410638  543705 memory.go:191] Add success.
I0320 14:36:43.409831  543705 cpu.go:282] Add success.
I0320 14:36:43.420386  543705 net.go:648] Add success.
I0320 14:36:43.423328  543705 net.go:770] primary dev: ETH0
I0320 14:36:43.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:36:43.423353  543705 net.go:698] Add success.
I0320 14:36:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:36:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:36:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:36:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:36:53.409773  543705 memory.go:184] no items to output this cycle
I0320 14:36:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 14:37:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:37:03.409809  543705 memory.go:184] no items to output this cycle
I0320 14:37:03.409824  543705 cpu.go:275] no items to output this cycle
W0320 14:37:13.409699  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:37:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:37:13.409718  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 14:37:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:37:13.409811  543705 memory.go:191] Add success.
I0320 14:37:13.409816  543705 cpu.go:282] Add success.
I0320 14:37:13.420052  543705 net.go:648] Add success.
I0320 14:37:13.422896  543705 net.go:770] primary dev: ETH0
I0320 14:37:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:37:13.422921  543705 net.go:698] Add success.
I0320 14:37:13.453443  543705 event_worker.go:152] Polling the log file for events...
W0320 14:37:14.455322  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:37:14.455457  543705 disk_worker.go:708] disk space is not compliant
W0320 14:37:14.455462  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:37:14.456126  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:37:14.456134  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:37:14.456138  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:37:14.457249  543705 disk_worker.go:494] system disk:vda1
I0320 14:37:14.457290  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:37:15.456867  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:37:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:37:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:37:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:37:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:37:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:37:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:37:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:37:23.409773  543705 cpu.go:275] no items to output this cycle
I0320 14:37:23.409784  543705 memory.go:184] no items to output this cycle
I0320 14:37:24.721247  543705 disk_info.go:125] begin check local disk info of client
I0320 14:37:24.723711  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:37:24.723717  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004700c0 0xc000470100]
E0320 14:37:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:37:33.409790  543705 memory.go:184] no items to output this cycle
I0320 14:37:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 14:37:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:37:43.409789  543705 memory.go:191] Add success.
I0320 14:37:43.409811  543705 cpu.go:282] Add success.
I0320 14:37:43.420038  543705 net.go:648] Add success.
I0320 14:37:43.422939  543705 net.go:770] primary dev: ETH0
I0320 14:37:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:37:43.422967  543705 net.go:698] Add success.
I0320 14:37:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:37:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:37:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:37:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:37:53.409784  543705 memory.go:184] no items to output this cycle
I0320 14:37:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 14:38:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:38:03.409772  543705 memory.go:184] no items to output this cycle
I0320 14:38:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 14:38:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:38:13.409811  543705 memory.go:191] Add success.
I0320 14:38:13.409816  543705 cpu.go:282] Add success.
W0320 14:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:38:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:38:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:38:13.420072  543705 net.go:648] Add success.
I0320 14:38:13.423100  543705 net.go:770] primary dev: ETH0
I0320 14:38:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:38:13.423126  543705 net.go:698] Add success.
I0320 14:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:38:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:38:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 14:38:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:38:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 14:38:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:38:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:38:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:38:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:38:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:38:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:38:23.409770  543705 memory.go:184] no items to output this cycle
I0320 14:38:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 14:38:24.725260  543705 disk_info.go:125] begin check local disk info of client
I0320 14:38:24.727782  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:38:24.727787  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e8c0 0xc00049e900]
E0320 14:38:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:38:33.409798  543705 memory.go:184] no items to output this cycle
I0320 14:38:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 14:38:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:38:43.409800  543705 memory.go:191] Add success.
I0320 14:38:43.409800  543705 cpu.go:282] Add success.
I0320 14:38:43.419977  543705 net.go:648] Add success.
I0320 14:38:43.422702  543705 net.go:770] primary dev: ETH0
I0320 14:38:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:38:43.422730  543705 net.go:698] Add success.
I0320 14:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:38:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:38:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:38:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:38:53.410269  543705 memory.go:184] no items to output this cycle
I0320 14:38:53.410279  543705 cpu.go:275] no items to output this cycle
E0320 14:39:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:39:03.409786  543705 memory.go:184] no items to output this cycle
I0320 14:39:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 14:39:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:39:13.409810  543705 memory.go:191] Add success.
I0320 14:39:13.409818  543705 cpu.go:282] Add success.
W0320 14:39:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:39:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:39:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:39:13.420097  543705 net.go:648] Add success.
I0320 14:39:13.422896  543705 net.go:770] primary dev: ETH0
I0320 14:39:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:39:13.422927  543705 net.go:698] Add success.
I0320 14:39:13.469717  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eb53bf53-338e-4e28-befa-97b866615c09","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:39:13.469751  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:39:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:39:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:39:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 14:39:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:39:14.456899  543705 disk_worker.go:494] system disk:vda1
I0320 14:39:14.456928  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:39:15.455613  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:39:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:39:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:39:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:39:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:39:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:39:23.409781  543705 memory.go:184] no items to output this cycle
I0320 14:39:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 14:39:24.729280  543705 disk_info.go:125] begin check local disk info of client
I0320 14:39:24.731718  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:39:24.731724  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0320 14:39:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:39:33.409782  543705 cpu.go:275] no items to output this cycle
I0320 14:39:33.409784  543705 memory.go:184] no items to output this cycle
I0320 14:39:38.514329  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:39:38.514336  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:39:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:39:43.410920  543705 memory.go:191] Add success.
I0320 14:39:43.409803  543705 cpu.go:282] Add success.
I0320 14:39:43.420651  543705 net.go:648] Add success.
I0320 14:39:43.423575  543705 net.go:770] primary dev: ETH0
I0320 14:39:43.423588  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:39:43.423600  543705 net.go:698] Add success.
I0320 14:39:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:39:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:39:46.458050  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:39:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:39:53.409784  543705 memory.go:184] no items to output this cycle
I0320 14:39:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 14:40:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:40:03.409771  543705 memory.go:184] no items to output this cycle
I0320 14:40:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 14:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:40:13.409807  543705 memory.go:191] Add success.
I0320 14:40:13.409827  543705 cpu.go:282] Add success.
W0320 14:40:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:40:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:40:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:40:13.420195  543705 net.go:648] Add success.
I0320 14:40:13.422998  543705 net.go:770] primary dev: ETH0
I0320 14:40:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:40:13.423033  543705 net.go:698] Add success.
I0320 14:40:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:40:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:40:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 14:40:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:40:14.456564  543705 disk_worker.go:494] system disk:vda1
I0320 14:40:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:40:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:40:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:40:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:40:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:40:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:40:23.409763  543705 memory.go:184] no items to output this cycle
I0320 14:40:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 14:40:24.733301  543705 disk_info.go:125] begin check local disk info of client
I0320 14:40:24.735745  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:40:24.735751  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483000 0xc000483040]
E0320 14:40:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:40:33.409764  543705 memory.go:184] no items to output this cycle
I0320 14:40:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 14:40:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:40:43.409828  543705 memory.go:191] Add success.
I0320 14:40:43.409835  543705 cpu.go:282] Add success.
I0320 14:40:43.419912  543705 net.go:648] Add success.
I0320 14:40:43.422917  543705 net.go:770] primary dev: ETH0
I0320 14:40:43.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:40:43.422946  543705 net.go:698] Add success.
I0320 14:40:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:40:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:40:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:40:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:40:53.409774  543705 memory.go:184] no items to output this cycle
I0320 14:40:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 14:41:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:41:03.409795  543705 memory.go:184] no items to output this cycle
I0320 14:41:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 14:41:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:41:13.409796  543705 memory.go:191] Add success.
I0320 14:41:13.409801  543705 cpu.go:282] Add success.
W0320 14:41:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:41:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:41:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:41:13.420241  543705 net.go:648] Add success.
I0320 14:41:13.422942  543705 net.go:770] primary dev: ETH0
I0320 14:41:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:41:13.422969  543705 net.go:698] Add success.
I0320 14:41:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:41:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:41:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 14:41:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:41:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 14:41:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:41:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:41:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:41:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:41:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:41:23.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:41:23.409895  543705 memory.go:184] no items to output this cycle
I0320 14:41:23.409954  543705 cpu.go:275] no items to output this cycle
I0320 14:41:24.735834  543705 disk_info.go:125] begin check local disk info of client
I0320 14:41:24.738326  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:41:24.738332  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390000 0xc000390040]
E0320 14:41:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:41:33.409794  543705 memory.go:184] no items to output this cycle
I0320 14:41:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 14:41:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:41:43.409792  543705 memory.go:191] Add success.
I0320 14:41:43.409809  543705 cpu.go:282] Add success.
I0320 14:41:43.419902  543705 net.go:648] Add success.
I0320 14:41:43.422715  543705 net.go:770] primary dev: ETH0
I0320 14:41:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:41:43.422744  543705 net.go:698] Add success.
I0320 14:41:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:41:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:41:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:41:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:41:53.409774  543705 memory.go:184] no items to output this cycle
I0320 14:41:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 14:42:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:42:03.409775  543705 memory.go:184] no items to output this cycle
I0320 14:42:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 14:42:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:42:13.409805  543705 memory.go:191] Add success.
I0320 14:42:13.409813  543705 cpu.go:282] Add success.
W0320 14:42:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:42:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:42:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:42:13.420108  543705 net.go:648] Add success.
I0320 14:42:13.422913  543705 net.go:770] primary dev: ETH0
I0320 14:42:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:42:13.422943  543705 net.go:698] Add success.
I0320 14:42:13.470176  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"81eb2364-6bc0-4456-afeb-54526256c564","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:42:13.470209  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 14:42:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:42:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 14:42:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:42:14.455869  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:42:14.455876  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:42:14.455880  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:42:14.456836  543705 disk_worker.go:494] system disk:vda1
I0320 14:42:14.456867  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:42:15.456878  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:42:15.456887  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:42:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:42:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:42:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:42:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:42:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:42:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:42:23.409784  543705 memory.go:184] no items to output this cycle
I0320 14:42:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 14:42:24.739340  543705 disk_info.go:125] begin check local disk info of client
I0320 14:42:24.741773  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:42:24.741778  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa080 0xc0001aa0c0]
E0320 14:42:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:42:33.409782  543705 memory.go:184] no items to output this cycle
I0320 14:42:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 14:42:38.515344  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:42:38.515352  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:42:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:42:43.410818  543705 memory.go:191] Add success.
I0320 14:42:43.409836  543705 cpu.go:282] Add success.
I0320 14:42:43.420505  543705 net.go:648] Add success.
I0320 14:42:43.423472  543705 net.go:770] primary dev: ETH0
I0320 14:42:43.423485  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:42:43.423498  543705 net.go:698] Add success.
I0320 14:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:42:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:42:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:42:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:42:53.409797  543705 memory.go:184] no items to output this cycle
I0320 14:42:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 14:43:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:43:03.409782  543705 memory.go:184] no items to output this cycle
I0320 14:43:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 14:43:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:43:13.409797  543705 memory.go:191] Add success.
I0320 14:43:13.409798  543705 cpu.go:282] Add success.
W0320 14:43:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:43:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:43:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:43:13.420069  543705 net.go:648] Add success.
I0320 14:43:13.422718  543705 net.go:770] primary dev: ETH0
I0320 14:43:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:43:13.422745  543705 net.go:698] Add success.
I0320 14:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:43:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:43:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 14:43:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:43:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 14:43:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:43:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:43:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:43:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:43:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:43:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:43:23.410331  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:43:23.410345  543705 memory.go:184] no items to output this cycle
I0320 14:43:23.410345  543705 cpu.go:275] no items to output this cycle
I0320 14:43:24.743340  543705 disk_info.go:125] begin check local disk info of client
I0320 14:43:24.745779  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:43:24.745785  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5840 0xc0003d5880]
E0320 14:43:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:43:33.409788  543705 memory.go:184] no items to output this cycle
I0320 14:43:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 14:43:43.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:43:43.409830  543705 memory.go:191] Add success.
I0320 14:43:43.409831  543705 cpu.go:282] Add success.
I0320 14:43:43.419914  543705 net.go:648] Add success.
I0320 14:43:43.422756  543705 net.go:770] primary dev: ETH0
I0320 14:43:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:43:43.422782  543705 net.go:698] Add success.
I0320 14:43:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:43:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:43:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:43:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:43:53.409791  543705 memory.go:184] no items to output this cycle
I0320 14:43:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 14:44:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:44:03.409802  543705 memory.go:184] no items to output this cycle
I0320 14:44:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 14:44:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:44:13.409794  543705 memory.go:191] Add success.
I0320 14:44:13.409812  543705 cpu.go:282] Add success.
W0320 14:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:44:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:44:13.420061  543705 net.go:648] Add success.
I0320 14:44:13.423454  543705 net.go:770] primary dev: ETH0
I0320 14:44:13.423469  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:44:13.423484  543705 net.go:698] Add success.
I0320 14:44:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:44:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:44:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 14:44:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:44:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 14:44:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:44:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:44:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:44:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:44:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:44:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:44:23.409769  543705 memory.go:184] no items to output this cycle
I0320 14:44:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 14:44:24.747372  543705 disk_info.go:125] begin check local disk info of client
I0320 14:44:24.749839  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:44:24.749846  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae000 0xc0004ae040]
E0320 14:44:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:44:33.409772  543705 memory.go:184] no items to output this cycle
I0320 14:44:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 14:44:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:44:43.409807  543705 memory.go:191] Add success.
I0320 14:44:43.409813  543705 cpu.go:282] Add success.
I0320 14:44:43.419931  543705 net.go:648] Add success.
I0320 14:44:43.422912  543705 net.go:770] primary dev: ETH0
I0320 14:44:43.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:44:43.422937  543705 net.go:698] Add success.
I0320 14:44:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:44:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:44:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:44:53.409787  543705 cpu.go:275] no items to output this cycle
I0320 14:44:53.409789  543705 memory.go:184] no items to output this cycle
E0320 14:45:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:45:03.409780  543705 cpu.go:275] no items to output this cycle
I0320 14:45:03.409783  543705 memory.go:184] no items to output this cycle
E0320 14:45:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:45:13.409782  543705 memory.go:191] Add success.
I0320 14:45:13.409785  543705 cpu.go:282] Add success.
W0320 14:45:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:45:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:45:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:45:13.420185  543705 net.go:648] Add success.
I0320 14:45:13.422730  543705 net.go:770] primary dev: ETH0
I0320 14:45:13.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:45:13.422753  543705 net.go:698] Add success.
I0320 14:45:13.687295  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1d2d2433-f534-4421-b87b-641f45791942","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:45:13.687333  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:45:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:45:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:45:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 14:45:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:45:14.456860  543705 disk_worker.go:494] system disk:vda1
I0320 14:45:14.456889  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:45:16.457564  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:45:16.457625  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:45:16.457716  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:45:16.472971  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:45:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:45:23.409778  543705 memory.go:184] no items to output this cycle
I0320 14:45:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 14:45:24.751391  543705 disk_info.go:125] begin check local disk info of client
I0320 14:45:24.753822  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:45:24.753828  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057a400 0xc00057a440]
E0320 14:45:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:45:33.409772  543705 memory.go:184] no items to output this cycle
I0320 14:45:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 14:45:38.516335  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:45:38.516344  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:45:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:45:43.410786  543705 memory.go:191] Add success.
I0320 14:45:43.409826  543705 cpu.go:282] Add success.
I0320 14:45:43.420533  543705 net.go:648] Add success.
I0320 14:45:43.423171  543705 net.go:770] primary dev: ETH0
I0320 14:45:43.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:45:43.423201  543705 net.go:698] Add success.
I0320 14:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:45:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:45:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:45:53.410267  543705 memory.go:184] no items to output this cycle
I0320 14:45:53.410274  543705 cpu.go:275] no items to output this cycle
E0320 14:46:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:46:03.409801  543705 memory.go:184] no items to output this cycle
I0320 14:46:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 14:46:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:46:13.409787  543705 cpu.go:282] Add success.
I0320 14:46:13.409792  543705 memory.go:191] Add success.
W0320 14:46:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:46:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:46:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:46:13.420109  543705 net.go:648] Add success.
I0320 14:46:13.422732  543705 net.go:770] primary dev: ETH0
I0320 14:46:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:46:13.422757  543705 net.go:698] Add success.
I0320 14:46:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:46:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:46:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 14:46:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:46:14.456518  543705 disk_worker.go:494] system disk:vda1
I0320 14:46:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:46:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:46:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:46:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:46:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:46:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:46:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:46:23.409766  543705 memory.go:184] no items to output this cycle
I0320 14:46:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 14:46:24.755421  543705 disk_info.go:125] begin check local disk info of client
I0320 14:46:24.757883  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:46:24.757890  543705 disk_info.go:196] parse disk info done, disk is : [0xc000586000 0xc000586040]
E0320 14:46:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:46:33.409801  543705 memory.go:184] no items to output this cycle
I0320 14:46:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:46:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:46:43.409827  543705 memory.go:191] Add success.
I0320 14:46:43.409836  543705 cpu.go:282] Add success.
I0320 14:46:43.420125  543705 net.go:648] Add success.
I0320 14:46:43.422952  543705 net.go:770] primary dev: ETH0
I0320 14:46:43.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:46:43.422978  543705 net.go:698] Add success.
I0320 14:46:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:46:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:46:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:46:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:46:53.409784  543705 cpu.go:275] no items to output this cycle
I0320 14:46:53.409792  543705 memory.go:184] no items to output this cycle
E0320 14:47:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:47:03.409779  543705 memory.go:184] no items to output this cycle
I0320 14:47:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 14:47:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:47:13.409805  543705 memory.go:191] Add success.
I0320 14:47:13.409817  543705 cpu.go:282] Add success.
W0320 14:47:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:47:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:47:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:47:13.420062  543705 net.go:648] Add success.
I0320 14:47:13.422771  543705 net.go:770] primary dev: ETH0
I0320 14:47:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:47:13.422795  543705 net.go:698] Add success.
I0320 14:47:13.453356  543705 event_worker.go:152] Polling the log file for events...
W0320 14:47:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:47:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 14:47:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:47:14.456903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:47:14.456913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:47:14.456919  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:47:14.456985  543705 disk_worker.go:494] system disk:vda1
I0320 14:47:14.457028  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:47:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:47:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:47:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:47:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:47:16.457946  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:47:16.457963  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:47:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:47:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:47:23.409779  543705 memory.go:184] no items to output this cycle
I0320 14:47:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 14:47:24.757971  543705 disk_info.go:125] begin check local disk info of client
I0320 14:47:24.760425  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:47:24.760431  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390980 0xc0003909c0]
E0320 14:47:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:47:33.409769  543705 memory.go:184] no items to output this cycle
I0320 14:47:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 14:47:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:47:43.409799  543705 memory.go:191] Add success.
I0320 14:47:43.409799  543705 cpu.go:282] Add success.
I0320 14:47:43.420067  543705 net.go:648] Add success.
I0320 14:47:43.422860  543705 net.go:770] primary dev: ETH0
I0320 14:47:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:47:43.422885  543705 net.go:698] Add success.
I0320 14:47:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:47:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:47:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:47:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:47:53.409776  543705 memory.go:184] no items to output this cycle
I0320 14:47:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 14:48:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:48:03.409799  543705 memory.go:184] no items to output this cycle
I0320 14:48:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 14:48:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:48:13.409789  543705 memory.go:191] Add success.
I0320 14:48:13.409813  543705 cpu.go:282] Add success.
W0320 14:48:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:48:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:48:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:48:13.420033  543705 net.go:648] Add success.
I0320 14:48:13.422850  543705 net.go:770] primary dev: ETH0
I0320 14:48:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:48:13.422875  543705 net.go:698] Add success.
I0320 14:48:13.464644  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"effec298-9aaa-461c-a21d-c788419aa290","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:48:13.464679  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:48:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:48:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:48:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 14:48:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:48:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 14:48:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:48:15.455610  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:48:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:48:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:48:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:48:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:48:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:48:23.409792  543705 memory.go:184] no items to output this cycle
I0320 14:48:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 14:48:24.761432  543705 disk_info.go:125] begin check local disk info of client
I0320 14:48:24.763861  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:48:24.763866  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046b480 0xc00046b4c0]
E0320 14:48:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:48:33.409791  543705 memory.go:184] no items to output this cycle
I0320 14:48:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 14:48:38.517342  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:48:38.517350  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:48:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:48:43.410672  543705 memory.go:191] Add success.
I0320 14:48:43.409811  543705 cpu.go:282] Add success.
I0320 14:48:43.420437  543705 net.go:648] Add success.
I0320 14:48:43.423284  543705 net.go:770] primary dev: ETH0
I0320 14:48:43.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:48:43.423315  543705 net.go:698] Add success.
I0320 14:48:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:48:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:48:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:48:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:48:53.409810  543705 memory.go:184] no items to output this cycle
I0320 14:48:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 14:49:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:49:03.409769  543705 memory.go:184] no items to output this cycle
I0320 14:49:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 14:49:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:49:13.409789  543705 memory.go:191] Add success.
I0320 14:49:13.409792  543705 cpu.go:282] Add success.
W0320 14:49:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:49:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:49:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:49:13.420055  543705 net.go:648] Add success.
I0320 14:49:13.422871  543705 net.go:770] primary dev: ETH0
I0320 14:49:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:49:13.422895  543705 net.go:698] Add success.
I0320 14:49:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:49:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:49:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 14:49:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:49:14.456994  543705 disk_worker.go:494] system disk:vda1
I0320 14:49:14.457023  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:49:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:49:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:49:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:49:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:49:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:49:23.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:49:23.410260  543705 memory.go:184] no items to output this cycle
I0320 14:49:23.410266  543705 cpu.go:275] no items to output this cycle
I0320 14:49:24.765463  543705 disk_info.go:125] begin check local disk info of client
I0320 14:49:24.767928  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:49:24.767934  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005192c0 0xc000519300]
E0320 14:49:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:49:33.409774  543705 memory.go:184] no items to output this cycle
I0320 14:49:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 14:49:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:49:43.409790  543705 memory.go:191] Add success.
I0320 14:49:43.409808  543705 cpu.go:282] Add success.
I0320 14:49:43.419879  543705 net.go:648] Add success.
I0320 14:49:43.422731  543705 net.go:770] primary dev: ETH0
I0320 14:49:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:49:43.422756  543705 net.go:698] Add success.
I0320 14:49:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:49:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:49:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:49:53.409809  543705 memory.go:184] no items to output this cycle
I0320 14:49:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 14:50:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:50:03.409777  543705 memory.go:184] no items to output this cycle
I0320 14:50:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 14:50:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:50:13.409819  543705 memory.go:191] Add success.
I0320 14:50:13.409829  543705 cpu.go:282] Add success.
W0320 14:50:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:50:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:50:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:50:13.420062  543705 net.go:648] Add success.
I0320 14:50:13.423001  543705 net.go:770] primary dev: ETH0
I0320 14:50:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:50:13.423029  543705 net.go:698] Add success.
I0320 14:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:50:14.455247  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:50:14.455335  543705 disk_worker.go:708] disk space is not compliant
W0320 14:50:14.455340  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:50:14.456969  543705 disk_worker.go:494] system disk:vda1
I0320 14:50:14.457002  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:50:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:50:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:50:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:50:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:50:23.409784  543705 memory.go:184] no items to output this cycle
I0320 14:50:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 14:50:24.769492  543705 disk_info.go:125] begin check local disk info of client
I0320 14:50:24.771950  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:50:24.771956  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262300 0xc000262340]
E0320 14:50:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:50:33.409771  543705 memory.go:184] no items to output this cycle
I0320 14:50:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 14:50:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:50:43.409792  543705 memory.go:191] Add success.
I0320 14:50:43.409821  543705 cpu.go:282] Add success.
I0320 14:50:43.419910  543705 net.go:648] Add success.
I0320 14:50:43.422459  543705 net.go:770] primary dev: ETH0
I0320 14:50:43.422475  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:50:43.422490  543705 net.go:698] Add success.
I0320 14:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:50:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:50:53.409812  543705 memory.go:184] no items to output this cycle
I0320 14:50:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 14:51:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:51:03.409810  543705 memory.go:184] no items to output this cycle
I0320 14:51:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 14:51:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:51:13.409792  543705 memory.go:191] Add success.
I0320 14:51:13.409811  543705 cpu.go:282] Add success.
W0320 14:51:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:51:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:51:13.420071  543705 net.go:648] Add success.
I0320 14:51:13.422734  543705 net.go:770] primary dev: ETH0
I0320 14:51:13.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:51:13.422763  543705 net.go:698] Add success.
I0320 14:51:13.469437  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07381519-2296-4bdc-b06f-11baa0ddc5b9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:51:13.469470  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:51:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:51:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:51:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 14:51:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:51:14.457034  543705 disk_worker.go:494] system disk:vda1
I0320 14:51:14.457063  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:51:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:51:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:51:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:51:23.409779  543705 memory.go:184] no items to output this cycle
I0320 14:51:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 14:51:24.773501  543705 disk_info.go:125] begin check local disk info of client
I0320 14:51:24.775950  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:51:24.775956  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b680 0xc00036b6c0]
E0320 14:51:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:51:33.409776  543705 memory.go:184] no items to output this cycle
I0320 14:51:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 14:51:38.518357  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:51:38.518364  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:51:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:51:43.410737  543705 memory.go:191] Add success.
I0320 14:51:43.409816  543705 cpu.go:282] Add success.
I0320 14:51:43.420489  543705 net.go:648] Add success.
I0320 14:51:43.423266  543705 net.go:770] primary dev: ETH0
I0320 14:51:43.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:51:43.423293  543705 net.go:698] Add success.
I0320 14:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:51:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:51:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:51:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:51:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 14:51:53.409786  543705 memory.go:184] no items to output this cycle
E0320 14:52:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:52:03.409768  543705 memory.go:184] no items to output this cycle
I0320 14:52:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 14:52:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:52:13.409806  543705 memory.go:191] Add success.
I0320 14:52:13.409807  543705 cpu.go:282] Add success.
W0320 14:52:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:52:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:52:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:52:13.420058  543705 net.go:648] Add success.
I0320 14:52:13.422832  543705 net.go:770] primary dev: ETH0
I0320 14:52:13.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:52:13.422858  543705 net.go:698] Add success.
W0320 14:52:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:52:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 14:52:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:52:14.455921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:52:14.455930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:52:14.455936  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:52:14.456843  543705 disk_worker.go:494] system disk:vda1
I0320 14:52:14.456880  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:52:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:52:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:52:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:52:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:52:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:52:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:52:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:52:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:52:23.409774  543705 memory.go:184] no items to output this cycle
I0320 14:52:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 14:52:24.776040  543705 disk_info.go:125] begin check local disk info of client
I0320 14:52:24.778480  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:52:24.778485  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470040 0xc000470080]
E0320 14:52:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:52:33.409778  543705 memory.go:184] no items to output this cycle
I0320 14:52:33.409778  543705 cpu.go:275] no items to output this cycle
E0320 14:52:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:52:43.409797  543705 memory.go:191] Add success.
I0320 14:52:43.409801  543705 cpu.go:282] Add success.
I0320 14:52:43.419888  543705 net.go:648] Add success.
I0320 14:52:43.422841  543705 net.go:770] primary dev: ETH0
I0320 14:52:43.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:52:43.422871  543705 net.go:698] Add success.
I0320 14:52:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:52:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:52:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:52:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:52:53.409808  543705 memory.go:184] no items to output this cycle
I0320 14:52:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 14:53:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:53:03.409777  543705 memory.go:184] no items to output this cycle
I0320 14:53:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 14:53:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:53:13.409786  543705 memory.go:191] Add success.
I0320 14:53:13.409786  543705 cpu.go:282] Add success.
W0320 14:53:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:53:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:53:13.420037  543705 net.go:648] Add success.
I0320 14:53:13.422841  543705 net.go:770] primary dev: ETH0
I0320 14:53:13.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:53:13.422865  543705 net.go:698] Add success.
I0320 14:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:53:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:53:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 14:53:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:53:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 14:53:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:53:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:53:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:53:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:53:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:53:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:53:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:53:23.409773  543705 memory.go:184] no items to output this cycle
I0320 14:53:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 14:53:24.779531  543705 disk_info.go:125] begin check local disk info of client
I0320 14:53:24.781978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:53:24.781984  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004700c0 0xc000470100]
E0320 14:53:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:53:33.409773  543705 memory.go:184] no items to output this cycle
I0320 14:53:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 14:53:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:53:43.409789  543705 memory.go:191] Add success.
I0320 14:53:43.409806  543705 cpu.go:282] Add success.
I0320 14:53:43.419982  543705 net.go:648] Add success.
I0320 14:53:43.422661  543705 net.go:770] primary dev: ETH0
I0320 14:53:43.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:53:43.422702  543705 net.go:698] Add success.
I0320 14:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:53:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:53:53.409785  543705 memory.go:184] no items to output this cycle
I0320 14:53:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 14:54:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:54:03.409768  543705 memory.go:184] no items to output this cycle
I0320 14:54:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 14:54:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:54:13.409813  543705 memory.go:191] Add success.
I0320 14:54:13.409821  543705 cpu.go:282] Add success.
W0320 14:54:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:54:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:54:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:54:13.420067  543705 net.go:648] Add success.
I0320 14:54:13.422843  543705 net.go:770] primary dev: ETH0
I0320 14:54:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:54:13.422874  543705 net.go:698] Add success.
I0320 14:54:13.578436  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c142ea5c-be98-4135-b1f4-cac3a1bfde9f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:54:13.578481  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 14:54:14.453975  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:54:14.454129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:54:14.454192  543705 disk_worker.go:708] disk space is not compliant
W0320 14:54:14.454195  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:54:14.455530  543705 disk_worker.go:494] system disk:vda1
I0320 14:54:14.455575  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:54:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:54:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:54:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:54:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:54:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:54:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:54:23.409766  543705 memory.go:184] no items to output this cycle
I0320 14:54:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 14:54:24.783546  543705 disk_info.go:125] begin check local disk info of client
I0320 14:54:24.785995  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:54:24.786000  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004633c0 0xc000463400]
E0320 14:54:33.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:54:33.410389  543705 memory.go:184] no items to output this cycle
I0320 14:54:33.410424  543705 cpu.go:275] no items to output this cycle
I0320 14:54:38.519350  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:54:38.519358  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:54:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:54:43.410697  543705 memory.go:191] Add success.
I0320 14:54:43.409825  543705 cpu.go:282] Add success.
I0320 14:54:43.420479  543705 net.go:648] Add success.
I0320 14:54:43.423085  543705 net.go:770] primary dev: ETH0
I0320 14:54:43.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:54:43.423111  543705 net.go:698] Add success.
I0320 14:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:54:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:54:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:54:53.409778  543705 memory.go:184] no items to output this cycle
I0320 14:54:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 14:55:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:55:03.409793  543705 memory.go:184] no items to output this cycle
I0320 14:55:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 14:55:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:55:13.409781  543705 memory.go:191] Add success.
W0320 14:55:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:55:13.409818  543705 cpu.go:282] Add success.
I0320 14:55:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:55:13.420052  543705 net.go:648] Add success.
I0320 14:55:13.422862  543705 net.go:770] primary dev: ETH0
I0320 14:55:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:55:13.422891  543705 net.go:698] Add success.
I0320 14:55:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:55:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:55:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 14:55:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:55:14.456572  543705 disk_worker.go:494] system disk:vda1
I0320 14:55:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:55:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:55:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:55:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:55:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:55:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:55:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:55:23.409771  543705 memory.go:184] no items to output this cycle
I0320 14:55:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 14:55:24.787591  543705 disk_info.go:125] begin check local disk info of client
I0320 14:55:24.790037  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:55:24.790043  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae000 0xc0004ae040]
E0320 14:55:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:55:33.409773  543705 memory.go:184] no items to output this cycle
I0320 14:55:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 14:55:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:55:43.409793  543705 memory.go:191] Add success.
I0320 14:55:43.409796  543705 cpu.go:282] Add success.
I0320 14:55:43.420302  543705 net.go:648] Add success.
I0320 14:55:43.423091  543705 net.go:770] primary dev: ETH0
I0320 14:55:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:55:43.423117  543705 net.go:698] Add success.
I0320 14:55:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:55:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:55:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:55:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:55:53.409781  543705 memory.go:184] no items to output this cycle
I0320 14:55:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 14:56:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:56:03.409798  543705 memory.go:184] no items to output this cycle
I0320 14:56:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 14:56:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:56:13.409792  543705 memory.go:191] Add success.
I0320 14:56:13.409793  543705 cpu.go:282] Add success.
W0320 14:56:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:56:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:56:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:56:13.420061  543705 net.go:648] Add success.
I0320 14:56:13.423008  543705 net.go:770] primary dev: ETH0
I0320 14:56:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:56:13.423033  543705 net.go:698] Add success.
I0320 14:56:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:56:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:56:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 14:56:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:56:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 14:56:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:56:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:56:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:56:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:56:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:56:23.409782  543705 memory.go:184] no items to output this cycle
I0320 14:56:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 14:56:24.790126  543705 disk_info.go:125] begin check local disk info of client
I0320 14:56:24.792549  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:56:24.792554  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483140 0xc000483180]
E0320 14:56:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:56:33.409781  543705 memory.go:184] no items to output this cycle
I0320 14:56:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 14:56:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:56:43.409794  543705 memory.go:191] Add success.
I0320 14:56:43.409814  543705 cpu.go:282] Add success.
I0320 14:56:43.419885  543705 net.go:648] Add success.
I0320 14:56:43.422933  543705 net.go:770] primary dev: ETH0
I0320 14:56:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:56:43.422979  543705 net.go:698] Add success.
I0320 14:56:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:56:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:56:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:56:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:56:53.409771  543705 memory.go:184] no items to output this cycle
I0320 14:56:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 14:57:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:57:03.409774  543705 memory.go:184] no items to output this cycle
I0320 14:57:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 14:57:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:57:13.409793  543705 memory.go:191] Add success.
I0320 14:57:13.409798  543705 cpu.go:282] Add success.
W0320 14:57:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:57:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:57:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:57:13.419894  543705 net.go:770] primary dev: ETH0
I0320 14:57:13.419909  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:57:13.419924  543705 net.go:698] Add success.
I0320 14:57:13.426707  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 14:57:13.426949  543705 net.go:648] Add success.
I0320 14:57:13.453352  543705 event_worker.go:152] Polling the log file for events...
I0320 14:57:13.469083  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f784f0cc-5e44-4530-b798-0db705f7ba73","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 14:57:13.469115  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 14:57:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:57:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 14:57:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 14:57:14.456747  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 14:57:14.456756  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 14:57:14.456761  543705 custom_config.go:64] query custom config with name: gpu
I0320 14:57:14.456818  543705 disk_worker.go:494] system disk:vda1
I0320 14:57:14.456849  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 14:57:15.456880  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 14:57:15.456889  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:57:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 14:57:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 14:57:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:57:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:57:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:57:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:57:23.409806  543705 memory.go:184] no items to output this cycle
I0320 14:57:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 14:57:24.793601  543705 disk_info.go:125] begin check local disk info of client
I0320 14:57:24.796035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:57:24.796041  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae080 0xc0004ae0c0]
E0320 14:57:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:57:33.409782  543705 memory.go:184] no items to output this cycle
I0320 14:57:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 14:57:38.520353  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 14:57:38.520361  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 14:57:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:57:43.410642  543705 memory.go:191] Add success.
I0320 14:57:43.409820  543705 cpu.go:282] Add success.
I0320 14:57:43.420346  543705 net.go:648] Add success.
I0320 14:57:43.422690  543705 net.go:770] primary dev: ETH0
I0320 14:57:43.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:57:43.422727  543705 net.go:698] Add success.
I0320 14:57:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:57:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:57:53.410280  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:57:53.410301  543705 memory.go:184] no items to output this cycle
I0320 14:57:53.410316  543705 cpu.go:275] no items to output this cycle
E0320 14:58:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:58:03.409786  543705 memory.go:184] no items to output this cycle
I0320 14:58:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 14:58:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:58:13.409794  543705 memory.go:191] Add success.
I0320 14:58:13.409797  543705 cpu.go:282] Add success.
W0320 14:58:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:58:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:58:13.420126  543705 net.go:648] Add success.
I0320 14:58:13.423091  543705 net.go:770] primary dev: ETH0
I0320 14:58:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:58:13.423116  543705 net.go:698] Add success.
I0320 14:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:58:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:58:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 14:58:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:58:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 14:58:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:58:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:58:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:58:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:58:16.472109  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:58:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:58:23.409819  543705 memory.go:184] no items to output this cycle
I0320 14:58:23.409831  543705 cpu.go:275] no items to output this cycle
I0320 14:58:24.797617  543705 disk_info.go:125] begin check local disk info of client
I0320 14:58:24.800055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:58:24.800060  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484440 0xc000484480]
E0320 14:58:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:58:33.409780  543705 memory.go:184] no items to output this cycle
I0320 14:58:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 14:58:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:58:43.409800  543705 memory.go:191] Add success.
I0320 14:58:43.409822  543705 cpu.go:282] Add success.
I0320 14:58:43.419894  543705 net.go:648] Add success.
I0320 14:58:43.422736  543705 net.go:770] primary dev: ETH0
I0320 14:58:43.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:58:43.422761  543705 net.go:698] Add success.
I0320 14:58:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:58:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:58:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:58:53.409772  543705 memory.go:184] no items to output this cycle
I0320 14:58:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 14:59:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:59:03.409777  543705 memory.go:184] no items to output this cycle
I0320 14:59:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 14:59:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:59:13.409785  543705 memory.go:191] Add success.
I0320 14:59:13.409785  543705 cpu.go:282] Add success.
W0320 14:59:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 14:59:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 14:59:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 14:59:13.420120  543705 net.go:648] Add success.
I0320 14:59:13.422954  543705 net.go:770] primary dev: ETH0
I0320 14:59:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:59:13.422983  543705 net.go:698] Add success.
I0320 14:59:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 14:59:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 14:59:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 14:59:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 14:59:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 14:59:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 14:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 14:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:59:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 14:59:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 14:59:23.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:59:23.410276  543705 memory.go:184] no items to output this cycle
I0320 14:59:23.410276  543705 cpu.go:275] no items to output this cycle
I0320 14:59:24.800139  543705 disk_info.go:125] begin check local disk info of client
I0320 14:59:24.802657  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 14:59:24.802662  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376b80 0xc000376bc0]
E0320 14:59:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:59:33.409778  543705 memory.go:184] no items to output this cycle
I0320 14:59:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 14:59:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:59:43.409783  543705 memory.go:191] Add success.
I0320 14:59:43.409813  543705 cpu.go:282] Add success.
I0320 14:59:43.420009  543705 net.go:648] Add success.
I0320 14:59:43.422871  543705 net.go:770] primary dev: ETH0
I0320 14:59:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0320 14:59:43.422899  543705 net.go:698] Add success.
I0320 14:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 14:59:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 14:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 14:59:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 14:59:53.409775  543705 memory.go:184] no items to output this cycle
I0320 14:59:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 15:00:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:00:03.409774  543705 memory.go:184] no items to output this cycle
I0320 15:00:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 15:00:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:00:13.409786  543705 memory.go:191] Add success.
I0320 15:00:13.409786  543705 cpu.go:282] Add success.
W0320 15:00:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:00:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:00:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:00:13.420053  543705 net.go:648] Add success.
I0320 15:00:13.422566  543705 net.go:770] primary dev: ETH0
I0320 15:00:13.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:00:13.422589  543705 net.go:698] Add success.
I0320 15:00:13.468487  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ada63591-86cf-4b9c-ab34-ebe15726cc21","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:00:13.468526  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:00:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:00:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:00:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 15:00:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:00:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 15:00:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:00:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:00:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:00:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:00:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:00:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:00:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:00:23.409887  543705 memory.go:184] no items to output this cycle
I0320 15:00:23.409918  543705 cpu.go:275] no items to output this cycle
I0320 15:00:24.804652  543705 disk_info.go:125] begin check local disk info of client
I0320 15:00:24.807112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:00:24.807120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae000 0xc0004ae040]
I0320 15:00:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 15:00:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:00:33.409808  543705 memory.go:184] no items to output this cycle
I0320 15:00:38.521369  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:00:38.521377  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:00:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:00:43.410578  543705 memory.go:191] Add success.
I0320 15:00:43.409818  543705 cpu.go:282] Add success.
I0320 15:00:43.420273  543705 net.go:648] Add success.
I0320 15:00:43.423025  543705 net.go:770] primary dev: ETH0
I0320 15:00:43.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:00:43.423050  543705 net.go:698] Add success.
I0320 15:00:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:00:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:00:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:00:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:00:53.409785  543705 memory.go:184] no items to output this cycle
I0320 15:00:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 15:01:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:01:03.409780  543705 memory.go:184] no items to output this cycle
I0320 15:01:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 15:01:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:01:13.409783  543705 memory.go:191] Add success.
I0320 15:01:13.409786  543705 cpu.go:282] Add success.
W0320 15:01:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:01:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:01:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:01:13.420204  543705 net.go:648] Add success.
I0320 15:01:13.423136  543705 net.go:770] primary dev: ETH0
I0320 15:01:13.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:01:13.423167  543705 net.go:698] Add success.
I0320 15:01:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:01:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:01:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 15:01:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:01:14.456623  543705 disk_worker.go:494] system disk:vda1
I0320 15:01:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:01:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:01:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:01:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:01:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:01:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:01:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:01:23.409791  543705 memory.go:184] no items to output this cycle
I0320 15:01:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 15:01:24.808665  543705 disk_info.go:125] begin check local disk info of client
I0320 15:01:24.811099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:01:24.811106  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331180 0xc0003311c0]
E0320 15:01:33.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:01:33.409864  543705 memory.go:184] no items to output this cycle
I0320 15:01:33.410026  543705 cpu.go:275] no items to output this cycle
E0320 15:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:01:43.409795  543705 memory.go:191] Add success.
I0320 15:01:43.409814  543705 cpu.go:282] Add success.
I0320 15:01:43.419978  543705 net.go:648] Add success.
I0320 15:01:43.422849  543705 net.go:770] primary dev: ETH0
I0320 15:01:43.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:01:43.422884  543705 net.go:698] Add success.
I0320 15:01:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:01:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:01:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:01:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:01:53.409788  543705 memory.go:184] no items to output this cycle
I0320 15:01:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 15:02:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:02:03.409803  543705 memory.go:184] no items to output this cycle
I0320 15:02:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 15:02:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:02:13.409777  543705 memory.go:191] Add success.
W0320 15:02:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:02:13.409816  543705 cpu.go:282] Add success.
W0320 15:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:02:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:02:13.420317  543705 net.go:648] Add success.
I0320 15:02:13.423149  543705 net.go:770] primary dev: ETH0
I0320 15:02:13.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:02:13.423178  543705 net.go:698] Add success.
W0320 15:02:14.454283  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:02:14.454296  543705 disk_worker.go:708] disk space is not compliant
W0320 15:02:14.454300  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:02:14.454922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:02:14.454930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:02:14.454936  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:02:14.455864  543705 disk_worker.go:494] system disk:vda1
I0320 15:02:14.455908  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:02:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:02:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:02:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:02:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:02:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:02:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:02:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:02:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:02:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 15:02:23.409791  543705 memory.go:184] no items to output this cycle
I0320 15:02:24.812745  543705 disk_info.go:125] begin check local disk info of client
I0320 15:02:24.815291  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:02:24.815297  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b080 0xc00032b0c0]
E0320 15:02:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:02:33.409780  543705 memory.go:184] no items to output this cycle
I0320 15:02:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 15:02:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:02:43.409823  543705 memory.go:191] Add success.
I0320 15:02:43.409835  543705 cpu.go:282] Add success.
I0320 15:02:43.420022  543705 net.go:648] Add success.
I0320 15:02:43.422900  543705 net.go:770] primary dev: ETH0
I0320 15:02:43.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:02:43.422925  543705 net.go:698] Add success.
I0320 15:02:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:02:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:02:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:02:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:02:53.409800  543705 memory.go:184] no items to output this cycle
I0320 15:02:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 15:03:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:03:03.409780  543705 memory.go:184] no items to output this cycle
I0320 15:03:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 15:03:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:03:13.409785  543705 memory.go:191] Add success.
I0320 15:03:13.409802  543705 cpu.go:282] Add success.
W0320 15:03:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:03:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:03:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:03:13.420260  543705 net.go:648] Add success.
I0320 15:03:13.422893  543705 net.go:770] primary dev: ETH0
I0320 15:03:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:03:13.422918  543705 net.go:698] Add success.
I0320 15:03:13.468924  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"634a3010-b6e9-4e8b-afcf-b6f9be5b7f97","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:03:13.468958  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:03:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:03:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:03:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 15:03:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:03:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 15:03:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:03:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:03:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:03:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:03:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:03:23.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:03:23.409899  543705 memory.go:184] no items to output this cycle
I0320 15:03:23.409903  543705 cpu.go:275] no items to output this cycle
I0320 15:03:24.815378  543705 disk_info.go:125] begin check local disk info of client
I0320 15:03:24.817874  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:03:24.817879  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0320 15:03:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:03:33.409780  543705 memory.go:184] no items to output this cycle
I0320 15:03:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 15:03:38.522367  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:03:38.522375  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:03:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:03:43.410770  543705 memory.go:191] Add success.
I0320 15:03:43.409823  543705 cpu.go:282] Add success.
I0320 15:03:43.420548  543705 net.go:648] Add success.
I0320 15:03:43.423508  543705 net.go:770] primary dev: ETH0
I0320 15:03:43.423521  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:03:43.423533  543705 net.go:698] Add success.
I0320 15:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:03:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:03:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:03:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:03:53.409787  543705 memory.go:184] no items to output this cycle
I0320 15:03:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 15:04:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:04:03.409778  543705 memory.go:184] no items to output this cycle
I0320 15:04:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 15:04:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:04:13.409778  543705 memory.go:191] Add success.
W0320 15:04:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:04:13.409810  543705 cpu.go:282] Add success.
W0320 15:04:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:04:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:04:13.420080  543705 net.go:770] primary dev: ETH0
I0320 15:04:13.420096  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:04:13.420111  543705 net.go:698] Add success.
I0320 15:04:13.420468  543705 net.go:648] Add success.
I0320 15:04:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:04:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:04:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 15:04:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:04:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 15:04:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:04:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:04:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:04:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:04:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:04:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:04:23.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:04:23.409880  543705 memory.go:184] no items to output this cycle
I0320 15:04:23.409982  543705 cpu.go:275] no items to output this cycle
I0320 15:04:24.819740  543705 disk_info.go:125] begin check local disk info of client
I0320 15:04:24.822223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:04:24.822229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472180 0xc0004721c0]
E0320 15:04:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:04:33.409773  543705 memory.go:184] no items to output this cycle
I0320 15:04:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 15:04:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:04:43.409827  543705 memory.go:191] Add success.
I0320 15:04:43.409838  543705 cpu.go:282] Add success.
I0320 15:04:43.420059  543705 net.go:648] Add success.
I0320 15:04:43.422431  543705 net.go:770] primary dev: ETH0
I0320 15:04:43.422444  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:04:43.422458  543705 net.go:698] Add success.
I0320 15:04:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:04:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:04:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:04:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:04:53.409780  543705 cpu.go:275] no items to output this cycle
I0320 15:04:53.409789  543705 memory.go:184] no items to output this cycle
E0320 15:05:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:05:03.409778  543705 memory.go:184] no items to output this cycle
I0320 15:05:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 15:05:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:05:13.409790  543705 memory.go:191] Add success.
I0320 15:05:13.409794  543705 cpu.go:282] Add success.
W0320 15:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:05:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:05:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:05:13.420046  543705 net.go:648] Add success.
I0320 15:05:13.423218  543705 net.go:770] primary dev: ETH0
I0320 15:05:13.423239  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:05:13.423251  543705 net.go:698] Add success.
I0320 15:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:05:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:05:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 15:05:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:05:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 15:05:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:05:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:05:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:05:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:05:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:05:23.409773  543705 memory.go:184] no items to output this cycle
I0320 15:05:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 15:05:24.823737  543705 disk_info.go:125] begin check local disk info of client
I0320 15:05:24.826176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:05:24.826182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8cc0 0xc0003e8d00]
E0320 15:05:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:05:33.409776  543705 memory.go:184] no items to output this cycle
I0320 15:05:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 15:05:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:05:43.409822  543705 memory.go:191] Add success.
I0320 15:05:43.409823  543705 cpu.go:282] Add success.
I0320 15:05:43.419955  543705 net.go:648] Add success.
I0320 15:05:43.422772  543705 net.go:770] primary dev: ETH0
I0320 15:05:43.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:05:43.422798  543705 net.go:698] Add success.
I0320 15:05:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:05:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:05:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:05:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:05:53.409777  543705 memory.go:184] no items to output this cycle
I0320 15:05:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 15:06:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:06:03.409793  543705 memory.go:184] no items to output this cycle
I0320 15:06:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 15:06:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:06:13.409784  543705 memory.go:191] Add success.
I0320 15:06:13.409806  543705 cpu.go:282] Add success.
W0320 15:06:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:06:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:06:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:06:13.420060  543705 net.go:648] Add success.
I0320 15:06:13.422782  543705 net.go:770] primary dev: ETH0
I0320 15:06:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:06:13.422810  543705 net.go:698] Add success.
I0320 15:06:13.504405  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f95c362a-fcff-4bcb-97a7-4e2aebabdc66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:06:13.504440  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:06:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:06:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:06:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 15:06:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:06:14.456855  543705 disk_worker.go:494] system disk:vda1
I0320 15:06:14.456884  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:06:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:06:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:06:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:06:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:06:23.409769  543705 memory.go:184] no items to output this cycle
I0320 15:06:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 15:06:24.827763  543705 disk_info.go:125] begin check local disk info of client
I0320 15:06:24.830190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:06:24.830196  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025b180 0xc00025b1c0]
E0320 15:06:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:06:33.409807  543705 memory.go:184] no items to output this cycle
I0320 15:06:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 15:06:38.523383  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:06:38.523392  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:06:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:06:43.410543  543705 memory.go:191] Add success.
I0320 15:06:43.409838  543705 cpu.go:282] Add success.
I0320 15:06:43.420235  543705 net.go:648] Add success.
I0320 15:06:43.422926  543705 net.go:770] primary dev: ETH0
I0320 15:06:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:06:43.422957  543705 net.go:698] Add success.
I0320 15:06:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:06:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:06:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:06:53.409775  543705 memory.go:184] no items to output this cycle
I0320 15:06:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 15:07:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:07:03.409780  543705 cpu.go:275] no items to output this cycle
I0320 15:07:03.409783  543705 memory.go:184] no items to output this cycle
W0320 15:07:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:07:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:07:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 15:07:13.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:07:13.409830  543705 cpu.go:282] Add success.
I0320 15:07:13.409847  543705 memory.go:191] Add success.
I0320 15:07:13.420390  543705 net.go:648] Add success.
I0320 15:07:13.423248  543705 net.go:770] primary dev: ETH0
I0320 15:07:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:07:13.423273  543705 net.go:698] Add success.
I0320 15:07:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0320 15:07:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:07:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 15:07:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:07:14.457067  543705 disk_worker.go:494] system disk:vda1
I0320 15:07:14.457092  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:07:14.457229  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:07:14.457234  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:07:14.457238  543705 custom_config.go:64] query custom config with name: gpu
E0320 15:07:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:07:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:07:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:07:16.457894  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:07:16.457947  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:07:16.457965  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:07:16.472275  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:07:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:07:23.409801  543705 memory.go:184] no items to output this cycle
I0320 15:07:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 15:07:24.830277  543705 disk_info.go:125] begin check local disk info of client
I0320 15:07:24.832758  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:07:24.832764  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae900 0xc0004ae940]
E0320 15:07:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:07:33.409782  543705 memory.go:184] no items to output this cycle
I0320 15:07:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 15:07:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:07:43.409788  543705 memory.go:191] Add success.
I0320 15:07:43.409821  543705 cpu.go:282] Add success.
I0320 15:07:43.419869  543705 net.go:648] Add success.
I0320 15:07:43.422623  543705 net.go:770] primary dev: ETH0
I0320 15:07:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:07:43.422648  543705 net.go:698] Add success.
I0320 15:07:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:07:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:07:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:07:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:07:53.409809  543705 memory.go:184] no items to output this cycle
I0320 15:07:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 15:08:03.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:08:03.409850  543705 memory.go:184] no items to output this cycle
I0320 15:08:03.409909  543705 cpu.go:275] no items to output this cycle
E0320 15:08:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:08:13.409778  543705 memory.go:191] Add success.
W0320 15:08:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:08:13.409811  543705 cpu.go:282] Add success.
W0320 15:08:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:08:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:08:13.420209  543705 net.go:648] Add success.
I0320 15:08:13.423218  543705 net.go:770] primary dev: ETH0
I0320 15:08:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:08:13.423251  543705 net.go:698] Add success.
I0320 15:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:08:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:08:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 15:08:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:08:14.456820  543705 disk_worker.go:494] system disk:vda1
I0320 15:08:14.456848  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:08:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:08:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:08:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:08:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:08:23.409768  543705 memory.go:184] no items to output this cycle
I0320 15:08:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 15:08:24.833672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:08:24.836087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:08:24.836093  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509bc0 0xc000509c00]
E0320 15:08:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:08:33.409806  543705 memory.go:184] no items to output this cycle
I0320 15:08:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 15:08:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:08:43.409829  543705 memory.go:191] Add success.
I0320 15:08:43.409830  543705 cpu.go:282] Add success.
I0320 15:08:43.419969  543705 net.go:648] Add success.
I0320 15:08:43.423096  543705 net.go:770] primary dev: ETH0
I0320 15:08:43.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:08:43.423121  543705 net.go:698] Add success.
I0320 15:08:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:08:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:08:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:08:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:08:53.409804  543705 memory.go:184] no items to output this cycle
I0320 15:08:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 15:09:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:09:03.409795  543705 memory.go:184] no items to output this cycle
I0320 15:09:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 15:09:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:09:13.409796  543705 memory.go:191] Add success.
W0320 15:09:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:09:13.409826  543705 cpu.go:282] Add success.
W0320 15:09:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:09:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:09:13.420301  543705 net.go:648] Add success.
I0320 15:09:13.423146  543705 net.go:770] primary dev: ETH0
I0320 15:09:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:09:13.423171  543705 net.go:698] Add success.
I0320 15:09:13.588163  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f01033e-3df3-43e8-bb70-3f3dc967a5dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:09:13.588203  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:09:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:09:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:09:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 15:09:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:09:14.456720  543705 disk_worker.go:494] system disk:vda1
I0320 15:09:14.456751  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:09:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:09:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:09:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:09:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:09:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:09:23.409765  543705 memory.go:184] no items to output this cycle
I0320 15:09:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 15:09:24.837673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:09:24.840158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:09:24.840165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0b40 0xc0003c0b80]
E0320 15:09:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:09:33.409785  543705 memory.go:184] no items to output this cycle
I0320 15:09:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 15:09:38.524388  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:09:38.524395  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:09:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:09:43.410674  543705 memory.go:191] Add success.
I0320 15:09:43.409819  543705 cpu.go:282] Add success.
I0320 15:09:43.420384  543705 net.go:648] Add success.
I0320 15:09:43.422958  543705 net.go:770] primary dev: ETH0
I0320 15:09:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:09:43.422984  543705 net.go:698] Add success.
I0320 15:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:09:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:09:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:09:53.409785  543705 memory.go:184] no items to output this cycle
I0320 15:09:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 15:10:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:10:03.409800  543705 memory.go:184] no items to output this cycle
I0320 15:10:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 15:10:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:10:13.409790  543705 memory.go:191] Add success.
I0320 15:10:13.409791  543705 cpu.go:282] Add success.
W0320 15:10:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:10:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:10:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:10:13.420255  543705 net.go:648] Add success.
I0320 15:10:13.422978  543705 net.go:770] primary dev: ETH0
I0320 15:10:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:10:13.423004  543705 net.go:698] Add success.
I0320 15:10:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:10:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:10:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 15:10:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:10:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 15:10:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:10:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:10:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:10:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:10:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:10:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:10:23.409763  543705 memory.go:184] no items to output this cycle
I0320 15:10:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 15:10:24.841678  543705 disk_info.go:125] begin check local disk info of client
I0320 15:10:24.844083  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:10:24.844090  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370d80 0xc000370dc0]
E0320 15:10:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:10:33.409812  543705 memory.go:184] no items to output this cycle
I0320 15:10:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 15:10:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:10:43.409803  543705 memory.go:191] Add success.
I0320 15:10:43.409804  543705 cpu.go:282] Add success.
I0320 15:10:43.420000  543705 net.go:648] Add success.
I0320 15:10:43.422759  543705 net.go:770] primary dev: ETH0
I0320 15:10:43.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:10:43.422788  543705 net.go:698] Add success.
I0320 15:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:10:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:10:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:10:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:10:53.409781  543705 memory.go:184] no items to output this cycle
I0320 15:10:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 15:11:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:11:03.409776  543705 memory.go:184] no items to output this cycle
I0320 15:11:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 15:11:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:11:13.409782  543705 memory.go:191] Add success.
I0320 15:11:13.409803  543705 cpu.go:282] Add success.
W0320 15:11:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:11:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:11:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:11:13.420364  543705 net.go:648] Add success.
I0320 15:11:13.423268  543705 net.go:770] primary dev: ETH0
I0320 15:11:13.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:11:13.423293  543705 net.go:698] Add success.
I0320 15:11:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:11:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:11:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 15:11:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:11:14.456552  543705 disk_worker.go:494] system disk:vda1
I0320 15:11:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:11:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:11:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:11:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:11:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:11:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:11:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:11:23.409788  543705 memory.go:184] no items to output this cycle
I0320 15:11:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 15:11:24.845674  543705 disk_info.go:125] begin check local disk info of client
I0320 15:11:24.848162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:11:24.848168  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad80 0xc00007adc0]
E0320 15:11:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:11:33.409796  543705 memory.go:184] no items to output this cycle
I0320 15:11:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 15:11:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:11:43.409792  543705 memory.go:191] Add success.
I0320 15:11:43.409812  543705 cpu.go:282] Add success.
I0320 15:11:43.420062  543705 net.go:648] Add success.
I0320 15:11:43.423390  543705 net.go:770] primary dev: ETH0
I0320 15:11:43.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:11:43.423414  543705 net.go:698] Add success.
I0320 15:11:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:11:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:11:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:11:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:11:53.409819  543705 memory.go:184] no items to output this cycle
I0320 15:11:53.409829  543705 cpu.go:275] no items to output this cycle
E0320 15:12:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:12:03.409779  543705 memory.go:184] no items to output this cycle
I0320 15:12:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 15:12:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:12:13.409825  543705 memory.go:191] Add success.
I0320 15:12:13.409828  543705 cpu.go:282] Add success.
W0320 15:12:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:12:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:12:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:12:13.420236  543705 net.go:648] Add success.
I0320 15:12:13.422903  543705 net.go:770] primary dev: ETH0
I0320 15:12:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:12:13.422927  543705 net.go:698] Add success.
I0320 15:12:13.474027  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75841e96-7fb5-45ea-9983-55107fc01553","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:12:13.474061  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 15:12:14.455373  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:12:14.455394  543705 disk_worker.go:708] disk space is not compliant
W0320 15:12:14.455399  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:12:14.456639  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:12:14.456649  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:12:14.456670  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:12:14.456929  543705 disk_worker.go:494] system disk:vda1
I0320 15:12:14.456965  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:12:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:12:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:12:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:12:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:12:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:12:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:12:16.472307  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:12:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:12:23.409821  543705 memory.go:184] no items to output this cycle
I0320 15:12:23.409824  543705 cpu.go:275] no items to output this cycle
I0320 15:12:24.849680  543705 disk_info.go:125] begin check local disk info of client
I0320 15:12:24.852217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:12:24.852224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5180 0xc0000c51c0]
E0320 15:12:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:12:33.409777  543705 memory.go:184] no items to output this cycle
I0320 15:12:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 15:12:38.525375  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:12:38.525383  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:12:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:12:43.410648  543705 memory.go:191] Add success.
I0320 15:12:43.409832  543705 cpu.go:282] Add success.
I0320 15:12:43.420358  543705 net.go:648] Add success.
I0320 15:12:43.422955  543705 net.go:770] primary dev: ETH0
I0320 15:12:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:12:43.422984  543705 net.go:698] Add success.
I0320 15:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:12:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:12:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:12:53.409813  543705 memory.go:184] no items to output this cycle
I0320 15:12:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 15:13:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:13:03.409780  543705 memory.go:184] no items to output this cycle
I0320 15:13:03.409824  543705 cpu.go:275] no items to output this cycle
E0320 15:13:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:13:13.409802  543705 memory.go:191] Add success.
I0320 15:13:13.409802  543705 cpu.go:282] Add success.
W0320 15:13:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:13:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:13:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:13:13.420439  543705 net.go:648] Add success.
I0320 15:13:13.423081  543705 net.go:770] primary dev: ETH0
I0320 15:13:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:13:13.423107  543705 net.go:698] Add success.
I0320 15:13:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:13:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:13:14.455138  543705 disk_worker.go:708] disk space is not compliant
W0320 15:13:14.455141  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:13:14.456457  543705 disk_worker.go:494] system disk:vda1
I0320 15:13:14.456498  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:13:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:13:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:13:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:13:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:13:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:13:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:13:23.409811  543705 memory.go:184] no items to output this cycle
I0320 15:13:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 15:13:24.853676  543705 disk_info.go:125] begin check local disk info of client
I0320 15:13:24.856208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:13:24.856214  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256fc0 0xc000257000]
E0320 15:13:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:13:33.409808  543705 memory.go:184] no items to output this cycle
I0320 15:13:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 15:13:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:13:43.409804  543705 memory.go:191] Add success.
I0320 15:13:43.409821  543705 cpu.go:282] Add success.
I0320 15:13:43.419993  543705 net.go:648] Add success.
I0320 15:13:43.422835  543705 net.go:770] primary dev: ETH0
I0320 15:13:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:13:43.422864  543705 net.go:698] Add success.
I0320 15:13:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:13:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:13:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:13:53.409811  543705 memory.go:184] no items to output this cycle
I0320 15:13:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 15:14:03.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:14:03.409902  543705 memory.go:184] no items to output this cycle
I0320 15:14:03.409958  543705 cpu.go:275] no items to output this cycle
E0320 15:14:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:14:13.409804  543705 memory.go:191] Add success.
I0320 15:14:13.409806  543705 cpu.go:282] Add success.
W0320 15:14:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:14:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:14:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:14:13.420158  543705 net.go:648] Add success.
I0320 15:14:13.423026  543705 net.go:770] primary dev: ETH0
I0320 15:14:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:14:13.423051  543705 net.go:698] Add success.
I0320 15:14:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:14:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:14:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 15:14:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:14:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 15:14:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:14:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:14:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:14:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:14:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:14:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:14:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:14:23.409788  543705 memory.go:184] no items to output this cycle
I0320 15:14:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 15:14:24.857673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:14:24.860181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:14:24.860187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000587c40 0xc000587c80]
E0320 15:14:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:14:33.409808  543705 memory.go:184] no items to output this cycle
I0320 15:14:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 15:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:14:43.409816  543705 memory.go:191] Add success.
I0320 15:14:43.409823  543705 cpu.go:282] Add success.
I0320 15:14:43.419977  543705 net.go:648] Add success.
I0320 15:14:43.423004  543705 net.go:770] primary dev: ETH0
I0320 15:14:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:14:43.423035  543705 net.go:698] Add success.
I0320 15:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:14:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:14:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:14:53.409777  543705 memory.go:184] no items to output this cycle
I0320 15:14:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 15:15:03.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:15:03.409910  543705 memory.go:184] no items to output this cycle
I0320 15:15:03.409963  543705 cpu.go:275] no items to output this cycle
E0320 15:15:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:15:13.409784  543705 memory.go:191] Add success.
W0320 15:15:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:15:13.409818  543705 cpu.go:282] Add success.
W0320 15:15:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:15:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:15:13.420255  543705 net.go:648] Add success.
I0320 15:15:13.423266  543705 net.go:770] primary dev: ETH0
I0320 15:15:13.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:15:13.423295  543705 net.go:698] Add success.
I0320 15:15:13.527951  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d70f2f5f-ca49-481a-8808-7e8e0eba0f31","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:15:13.527986  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:15:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:15:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:15:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 15:15:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:15:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 15:15:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:15:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:15:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:15:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:15:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:15:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:15:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:15:23.409804  543705 memory.go:184] no items to output this cycle
I0320 15:15:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 15:15:24.861674  543705 disk_info.go:125] begin check local disk info of client
I0320 15:15:24.864217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:15:24.864224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa3c0 0xc0001aa400]
E0320 15:15:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:15:33.409772  543705 memory.go:184] no items to output this cycle
I0320 15:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 15:15:38.526380  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:15:38.526388  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:15:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:15:43.410603  543705 memory.go:191] Add success.
I0320 15:15:43.409816  543705 cpu.go:282] Add success.
I0320 15:15:43.420358  543705 net.go:648] Add success.
I0320 15:15:43.422906  543705 net.go:770] primary dev: ETH0
I0320 15:15:43.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:15:43.422933  543705 net.go:698] Add success.
I0320 15:15:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:15:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:15:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:15:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:15:53.409774  543705 memory.go:184] no items to output this cycle
I0320 15:15:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 15:16:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:16:03.409779  543705 memory.go:184] no items to output this cycle
I0320 15:16:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 15:16:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:16:13.409787  543705 memory.go:191] Add success.
W0320 15:16:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:16:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:16:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:16:13.409841  543705 cpu.go:282] Add success.
I0320 15:16:13.420064  543705 net.go:648] Add success.
I0320 15:16:13.422836  543705 net.go:770] primary dev: ETH0
I0320 15:16:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:16:13.422864  543705 net.go:698] Add success.
I0320 15:16:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:16:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:16:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 15:16:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:16:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 15:16:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:16:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:16:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:16:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:16:23.409778  543705 memory.go:184] no items to output this cycle
I0320 15:16:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 15:16:24.865673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:16:24.868149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:16:24.868154  543705 disk_info.go:196] parse disk info done, disk is : [0xc000586180 0xc0005861c0]
E0320 15:16:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:16:33.409772  543705 memory.go:184] no items to output this cycle
I0320 15:16:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 15:16:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:16:43.409821  543705 memory.go:191] Add success.
I0320 15:16:43.409832  543705 cpu.go:282] Add success.
I0320 15:16:43.420257  543705 net.go:648] Add success.
I0320 15:16:43.423279  543705 net.go:770] primary dev: ETH0
I0320 15:16:43.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:16:43.423305  543705 net.go:698] Add success.
I0320 15:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:16:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:16:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:16:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:16:53.409786  543705 memory.go:184] no items to output this cycle
I0320 15:16:53.409824  543705 cpu.go:275] no items to output this cycle
E0320 15:17:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:17:03.409785  543705 memory.go:184] no items to output this cycle
I0320 15:17:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 15:17:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:17:13.409793  543705 memory.go:191] Add success.
I0320 15:17:13.409800  543705 cpu.go:282] Add success.
W0320 15:17:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:17:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:17:13.420155  543705 net.go:648] Add success.
I0320 15:17:13.423302  543705 net.go:770] primary dev: ETH0
I0320 15:17:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:17:13.423330  543705 net.go:698] Add success.
I0320 15:17:13.452804  543705 event_worker.go:152] Polling the log file for events...
W0320 15:17:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:17:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 15:17:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:17:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:17:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:17:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:17:14.456530  543705 disk_worker.go:494] system disk:vda1
I0320 15:17:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:17:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:17:15.456800  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:17:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:17:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:17:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:17:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:17:16.472338  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:17:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:17:23.409770  543705 memory.go:184] no items to output this cycle
I0320 15:17:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 15:17:24.869673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:17:24.872152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:17:24.872157  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb700 0xc0001fb740]
E0320 15:17:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:17:33.409797  543705 memory.go:184] no items to output this cycle
I0320 15:17:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 15:17:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:17:43.409808  543705 memory.go:191] Add success.
I0320 15:17:43.409827  543705 cpu.go:282] Add success.
I0320 15:17:43.419923  543705 net.go:648] Add success.
I0320 15:17:43.422524  543705 net.go:770] primary dev: ETH0
I0320 15:17:43.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:17:43.422553  543705 net.go:698] Add success.
I0320 15:17:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:17:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:17:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:17:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:17:53.409773  543705 memory.go:184] no items to output this cycle
I0320 15:17:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 15:18:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:18:03.409791  543705 memory.go:184] no items to output this cycle
I0320 15:18:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 15:18:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:18:13.409830  543705 memory.go:191] Add success.
I0320 15:18:13.409842  543705 cpu.go:282] Add success.
W0320 15:18:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:18:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:18:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:18:13.420127  543705 net.go:648] Add success.
I0320 15:18:13.423039  543705 net.go:770] primary dev: ETH0
I0320 15:18:13.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:18:13.423063  543705 net.go:698] Add success.
I0320 15:18:13.663302  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a88c284c-da0b-474a-a862-1ce56b8ecbe2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:18:13.663343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:18:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:18:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:18:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 15:18:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:18:14.456661  543705 disk_worker.go:494] system disk:vda1
I0320 15:18:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:18:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:18:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:18:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:18:16.473061  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:18:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:18:23.409796  543705 memory.go:184] no items to output this cycle
I0320 15:18:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 15:18:24.873672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:18:24.876155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:18:24.876161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa5c0 0xc0001fa600]
E0320 15:18:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:18:33.409776  543705 memory.go:184] no items to output this cycle
I0320 15:18:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 15:18:38.527401  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:18:38.527409  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:18:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:18:43.410598  543705 memory.go:191] Add success.
I0320 15:18:43.409818  543705 cpu.go:282] Add success.
I0320 15:18:43.420303  543705 net.go:648] Add success.
I0320 15:18:43.423022  543705 net.go:770] primary dev: ETH0
I0320 15:18:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:18:43.423054  543705 net.go:698] Add success.
I0320 15:18:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:18:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:18:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:18:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:18:53.409800  543705 memory.go:184] no items to output this cycle
I0320 15:18:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 15:19:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:19:03.409778  543705 memory.go:184] no items to output this cycle
I0320 15:19:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 15:19:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:19:13.409792  543705 memory.go:191] Add success.
I0320 15:19:13.409803  543705 cpu.go:282] Add success.
W0320 15:19:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:19:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:19:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:19:13.420133  543705 net.go:648] Add success.
I0320 15:19:13.422785  543705 net.go:770] primary dev: ETH0
I0320 15:19:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:19:13.422809  543705 net.go:698] Add success.
I0320 15:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:19:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:19:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 15:19:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:19:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 15:19:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:19:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:19:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:19:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:19:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:19:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:19:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:19:23.409769  543705 memory.go:184] no items to output this cycle
I0320 15:19:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 15:19:24.877672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:19:24.880187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:19:24.880193  543705 disk_info.go:196] parse disk info done, disk is : [0xc000586cc0 0xc000586d00]
E0320 15:19:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:19:33.409768  543705 memory.go:184] no items to output this cycle
I0320 15:19:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 15:19:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:19:43.409829  543705 memory.go:191] Add success.
I0320 15:19:43.409832  543705 cpu.go:282] Add success.
I0320 15:19:43.420006  543705 net.go:648] Add success.
I0320 15:19:43.423392  543705 net.go:770] primary dev: ETH0
I0320 15:19:43.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:19:43.423426  543705 net.go:698] Add success.
I0320 15:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:19:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:19:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:19:53.410281  543705 cpu.go:275] no items to output this cycle
I0320 15:19:53.410288  543705 memory.go:184] no items to output this cycle
E0320 15:20:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:20:03.409902  543705 memory.go:184] no items to output this cycle
I0320 15:20:03.409923  543705 cpu.go:275] no items to output this cycle
E0320 15:20:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:20:13.409822  543705 memory.go:191] Add success.
I0320 15:20:13.409832  543705 cpu.go:282] Add success.
W0320 15:20:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:20:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:20:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:20:13.420295  543705 net.go:648] Add success.
I0320 15:20:13.422983  543705 net.go:770] primary dev: ETH0
I0320 15:20:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:20:13.423007  543705 net.go:698] Add success.
I0320 15:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:20:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:20:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 15:20:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:20:14.456538  543705 disk_worker.go:494] system disk:vda1
I0320 15:20:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:20:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:20:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:20:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:20:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:20:23.409770  543705 memory.go:184] no items to output this cycle
I0320 15:20:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 15:20:24.881671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:20:24.884172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:20:24.884178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa480 0xc0001fa4c0]
E0320 15:20:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:20:33.409779  543705 memory.go:184] no items to output this cycle
I0320 15:20:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 15:20:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:20:43.409800  543705 memory.go:191] Add success.
I0320 15:20:43.409805  543705 cpu.go:282] Add success.
I0320 15:20:43.419985  543705 net.go:648] Add success.
I0320 15:20:43.423511  543705 net.go:770] primary dev: ETH0
I0320 15:20:43.423527  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:20:43.423542  543705 net.go:698] Add success.
I0320 15:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:20:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:20:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:20:53.409803  543705 memory.go:184] no items to output this cycle
I0320 15:20:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 15:21:03.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:21:03.409890  543705 memory.go:184] no items to output this cycle
I0320 15:21:03.409924  543705 cpu.go:275] no items to output this cycle
E0320 15:21:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:21:13.409818  543705 memory.go:191] Add success.
I0320 15:21:13.409827  543705 cpu.go:282] Add success.
W0320 15:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:21:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:21:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:21:13.420239  543705 net.go:648] Add success.
I0320 15:21:13.423041  543705 net.go:770] primary dev: ETH0
I0320 15:21:13.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:21:13.423069  543705 net.go:698] Add success.
I0320 15:21:13.463392  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ffa63386-154c-43a0-b3bf-8f3d20b0c8d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:21:13.463423  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:21:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:21:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:21:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 15:21:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:21:14.456654  543705 disk_worker.go:494] system disk:vda1
I0320 15:21:14.456688  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:21:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:21:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:21:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:21:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:21:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:21:23.409801  543705 memory.go:184] no items to output this cycle
I0320 15:21:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 15:21:24.885669  543705 disk_info.go:125] begin check local disk info of client
I0320 15:21:24.888098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:21:24.888104  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb280 0xc0001fb2c0]
E0320 15:21:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:21:33.409762  543705 memory.go:184] no items to output this cycle
I0320 15:21:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 15:21:38.528401  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:21:38.528408  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:21:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:21:43.410750  543705 memory.go:191] Add success.
I0320 15:21:43.409813  543705 cpu.go:282] Add success.
I0320 15:21:43.420230  543705 net.go:770] primary dev: ETH0
I0320 15:21:43.420246  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:21:43.420261  543705 net.go:698] Add success.
I0320 15:21:43.420611  543705 net.go:648] Add success.
I0320 15:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:21:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:21:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:21:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:21:53.409810  543705 memory.go:184] no items to output this cycle
I0320 15:21:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 15:22:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:22:03.409786  543705 cpu.go:275] no items to output this cycle
I0320 15:22:03.409787  543705 memory.go:184] no items to output this cycle
E0320 15:22:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:22:13.409798  543705 cpu.go:282] Add success.
I0320 15:22:13.409803  543705 memory.go:191] Add success.
W0320 15:22:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:22:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:22:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:22:13.420076  543705 net.go:648] Add success.
I0320 15:22:13.422970  543705 net.go:770] primary dev: ETH0
I0320 15:22:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:22:13.422996  543705 net.go:698] Add success.
W0320 15:22:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:22:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 15:22:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:22:14.456818  543705 disk_worker.go:494] system disk:vda1
I0320 15:22:14.456861  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:22:14.457134  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:22:14.457142  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:22:14.457147  543705 custom_config.go:64] query custom config with name: gpu
E0320 15:22:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:22:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:22:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:22:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:22:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:22:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:22:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:22:23.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:22:23.410265  543705 memory.go:184] no items to output this cycle
I0320 15:22:23.410267  543705 cpu.go:275] no items to output this cycle
I0320 15:22:24.889668  543705 disk_info.go:125] begin check local disk info of client
I0320 15:22:24.892102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:22:24.892107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb440 0xc0001fb480]
E0320 15:22:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:22:33.409777  543705 memory.go:184] no items to output this cycle
I0320 15:22:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 15:22:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:22:43.409821  543705 memory.go:191] Add success.
I0320 15:22:43.409825  543705 cpu.go:282] Add success.
I0320 15:22:43.420069  543705 net.go:648] Add success.
I0320 15:22:43.422724  543705 net.go:770] primary dev: ETH0
I0320 15:22:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:22:43.422750  543705 net.go:698] Add success.
I0320 15:22:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:22:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:22:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:22:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:22:53.409802  543705 memory.go:184] no items to output this cycle
I0320 15:22:53.409898  543705 cpu.go:275] no items to output this cycle
E0320 15:23:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:23:03.409778  543705 memory.go:184] no items to output this cycle
I0320 15:23:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 15:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:23:13.409810  543705 memory.go:191] Add success.
I0320 15:23:13.409817  543705 cpu.go:282] Add success.
W0320 15:23:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:23:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:23:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:23:13.420050  543705 net.go:648] Add success.
I0320 15:23:13.422979  543705 net.go:770] primary dev: ETH0
I0320 15:23:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:23:13.423003  543705 net.go:698] Add success.
I0320 15:23:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:23:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:23:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 15:23:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:23:14.456612  543705 disk_worker.go:494] system disk:vda1
I0320 15:23:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:23:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:23:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:23:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:23:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:23:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:23:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:23:23.409766  543705 memory.go:184] no items to output this cycle
I0320 15:23:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 15:23:24.893673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:23:24.896114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:23:24.896120  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340c80 0xc000340cc0]
E0320 15:23:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:23:33.409772  543705 memory.go:184] no items to output this cycle
I0320 15:23:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 15:23:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:23:43.409794  543705 memory.go:191] Add success.
I0320 15:23:43.409809  543705 cpu.go:282] Add success.
I0320 15:23:43.419897  543705 net.go:648] Add success.
I0320 15:23:43.422685  543705 net.go:770] primary dev: ETH0
I0320 15:23:43.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:23:43.422715  543705 net.go:698] Add success.
I0320 15:23:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:23:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:23:53.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:23:53.409929  543705 memory.go:184] no items to output this cycle
I0320 15:23:53.409931  543705 cpu.go:275] no items to output this cycle
E0320 15:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:24:03.409780  543705 memory.go:184] no items to output this cycle
I0320 15:24:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 15:24:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:24:13.409784  543705 memory.go:191] Add success.
W0320 15:24:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:24:13.409818  543705 cpu.go:282] Add success.
W0320 15:24:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:24:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:24:13.420293  543705 net.go:648] Add success.
I0320 15:24:13.422789  543705 net.go:770] primary dev: ETH0
I0320 15:24:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:24:13.422814  543705 net.go:698] Add success.
I0320 15:24:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:24:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:24:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0320 15:24:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:24:14.456486  543705 disk_worker.go:494] system disk:vda1
I0320 15:24:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:24:15.494118  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9508922c-b18d-49cb-bdf5-a00ab90bfeb2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:24:15.494157  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:24:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:24:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:24:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:24:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:24:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:24:23.409783  543705 memory.go:184] no items to output this cycle
I0320 15:24:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 15:24:24.897677  543705 disk_info.go:125] begin check local disk info of client
I0320 15:24:24.900098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:24:24.900104  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc40 0xc0001abc80]
E0320 15:24:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:24:33.409804  543705 memory.go:184] no items to output this cycle
I0320 15:24:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 15:24:38.528873  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:24:38.528880  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:24:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:24:43.410755  543705 memory.go:191] Add success.
I0320 15:24:43.409815  543705 cpu.go:282] Add success.
I0320 15:24:43.420463  543705 net.go:648] Add success.
I0320 15:24:43.423289  543705 net.go:770] primary dev: ETH0
I0320 15:24:43.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:24:43.423315  543705 net.go:698] Add success.
I0320 15:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:24:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:24:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:24:53.409895  543705 cpu.go:275] no items to output this cycle
E0320 15:24:53.409968  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:24:53.409978  543705 memory.go:184] no items to output this cycle
E0320 15:25:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:25:03.409796  543705 memory.go:184] no items to output this cycle
I0320 15:25:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 15:25:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:25:13.409804  543705 memory.go:191] Add success.
I0320 15:25:13.409805  543705 cpu.go:282] Add success.
W0320 15:25:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:25:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:25:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:25:13.420136  543705 net.go:648] Add success.
I0320 15:25:13.422704  543705 net.go:770] primary dev: ETH0
I0320 15:25:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:25:13.422743  543705 net.go:698] Add success.
I0320 15:25:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:25:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:25:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 15:25:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:25:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 15:25:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:25:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:25:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:25:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:25:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:25:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:25:23.409776  543705 memory.go:184] no items to output this cycle
I0320 15:25:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 15:25:24.901674  543705 disk_info.go:125] begin check local disk info of client
I0320 15:25:24.904092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:25:24.904097  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbcc0 0xc0001fbd00]
E0320 15:25:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:25:33.409802  543705 memory.go:184] no items to output this cycle
I0320 15:25:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 15:25:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:25:43.409831  543705 memory.go:191] Add success.
I0320 15:25:43.409836  543705 cpu.go:282] Add success.
I0320 15:25:43.419983  543705 net.go:648] Add success.
I0320 15:25:43.422858  543705 net.go:770] primary dev: ETH0
I0320 15:25:43.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:25:43.422883  543705 net.go:698] Add success.
I0320 15:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:25:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:25:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:25:53.409796  543705 memory.go:184] no items to output this cycle
I0320 15:25:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 15:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:26:03.409794  543705 memory.go:184] no items to output this cycle
I0320 15:26:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 15:26:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:26:13.409811  543705 cpu.go:282] Add success.
I0320 15:26:13.409813  543705 memory.go:191] Add success.
W0320 15:26:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:26:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:26:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:26:13.420249  543705 net.go:648] Add success.
I0320 15:26:13.423361  543705 net.go:770] primary dev: ETH0
I0320 15:26:13.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:26:13.423385  543705 net.go:698] Add success.
I0320 15:26:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:26:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:26:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 15:26:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:26:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 15:26:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:26:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:26:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:26:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:26:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:26:23.409770  543705 memory.go:184] no items to output this cycle
I0320 15:26:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 15:26:24.905673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:26:24.908133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:26:24.908140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004719c0 0xc000471a00]
E0320 15:26:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:26:33.409783  543705 memory.go:184] no items to output this cycle
I0320 15:26:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 15:26:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:26:43.409799  543705 memory.go:191] Add success.
I0320 15:26:43.409800  543705 cpu.go:282] Add success.
I0320 15:26:43.419973  543705 net.go:648] Add success.
I0320 15:26:43.423163  543705 net.go:770] primary dev: ETH0
I0320 15:26:43.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:26:43.423188  543705 net.go:698] Add success.
I0320 15:26:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:26:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:26:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:26:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:26:53.409784  543705 cpu.go:275] no items to output this cycle
I0320 15:26:53.409788  543705 memory.go:184] no items to output this cycle
E0320 15:27:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:27:03.409806  543705 memory.go:184] no items to output this cycle
I0320 15:27:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 15:27:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:27:13.409797  543705 memory.go:191] Add success.
I0320 15:27:13.409804  543705 cpu.go:282] Add success.
W0320 15:27:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:27:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:27:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:27:13.420195  543705 net.go:648] Add success.
I0320 15:27:13.429247  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 15:27:13.429322  543705 net.go:770] primary dev: ETH0
I0320 15:27:13.429334  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:27:13.429345  543705 net.go:698] Add success.
I0320 15:27:13.452939  543705 event_worker.go:152] Polling the log file for events...
I0320 15:27:13.463217  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0fcf64cf-706f-46f9-8497-c795fc3bfedc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:27:13.463251  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 15:27:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:27:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0320 15:27:14.455238  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:27:14.456068  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:27:14.456078  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:27:14.456084  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:27:14.456928  543705 disk_worker.go:494] system disk:vda1
I0320 15:27:14.456955  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:27:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:27:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:27:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:27:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:27:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:27:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:27:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:27:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:27:23.409763  543705 memory.go:184] no items to output this cycle
I0320 15:27:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 15:27:24.909671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:27:24.912112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:27:24.912117  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536e80 0xc000536ec0]
E0320 15:27:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:27:33.409782  543705 memory.go:184] no items to output this cycle
I0320 15:27:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 15:27:38.529025  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:27:38.529033  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:27:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:27:43.410788  543705 memory.go:191] Add success.
I0320 15:27:43.409808  543705 cpu.go:282] Add success.
I0320 15:27:43.420540  543705 net.go:648] Add success.
I0320 15:27:43.423410  543705 net.go:770] primary dev: ETH0
I0320 15:27:43.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:27:43.423436  543705 net.go:698] Add success.
I0320 15:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:27:53.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:27:53.409942  543705 cpu.go:275] no items to output this cycle
I0320 15:27:53.409967  543705 memory.go:184] no items to output this cycle
E0320 15:28:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:28:03.409794  543705 memory.go:184] no items to output this cycle
I0320 15:28:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 15:28:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:28:13.409786  543705 memory.go:191] Add success.
I0320 15:28:13.409805  543705 cpu.go:282] Add success.
W0320 15:28:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:28:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:28:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:28:13.420233  543705 net.go:648] Add success.
I0320 15:28:13.423079  543705 net.go:770] primary dev: ETH0
I0320 15:28:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:28:13.423105  543705 net.go:698] Add success.
I0320 15:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:28:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:28:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 15:28:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:28:14.456497  543705 disk_worker.go:494] system disk:vda1
I0320 15:28:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:28:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:28:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:28:23.409774  543705 memory.go:184] no items to output this cycle
I0320 15:28:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 15:28:24.913672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:28:24.916103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:28:24.916109  543705 disk_info.go:196] parse disk info done, disk is : [0xc000587100 0xc000587140]
E0320 15:28:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:28:33.409801  543705 memory.go:184] no items to output this cycle
I0320 15:28:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 15:28:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:28:43.409804  543705 memory.go:191] Add success.
I0320 15:28:43.409803  543705 cpu.go:282] Add success.
I0320 15:28:43.420061  543705 net.go:648] Add success.
I0320 15:28:43.422635  543705 net.go:770] primary dev: ETH0
I0320 15:28:43.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:28:43.422665  543705 net.go:698] Add success.
I0320 15:28:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:28:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:28:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:28:53.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:28:53.409877  543705 memory.go:184] no items to output this cycle
I0320 15:28:53.410022  543705 cpu.go:275] no items to output this cycle
E0320 15:29:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:29:03.409763  543705 memory.go:184] no items to output this cycle
I0320 15:29:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 15:29:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:29:13.409794  543705 memory.go:191] Add success.
I0320 15:29:13.409804  543705 cpu.go:282] Add success.
W0320 15:29:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:29:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:29:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:29:13.420248  543705 net.go:648] Add success.
I0320 15:29:13.423195  543705 net.go:770] primary dev: ETH0
I0320 15:29:13.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:29:13.423223  543705 net.go:698] Add success.
I0320 15:29:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:29:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:29:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 15:29:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:29:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 15:29:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:29:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:29:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:29:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:29:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:29:23.409791  543705 memory.go:184] no items to output this cycle
I0320 15:29:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 15:29:24.917675  543705 disk_info.go:125] begin check local disk info of client
I0320 15:29:24.920134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:29:24.920140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa880 0xc0001aa8c0]
E0320 15:29:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:29:33.409798  543705 memory.go:184] no items to output this cycle
I0320 15:29:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 15:29:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:29:43.409798  543705 memory.go:191] Add success.
I0320 15:29:43.409834  543705 cpu.go:282] Add success.
I0320 15:29:43.420061  543705 net.go:648] Add success.
I0320 15:29:43.422811  543705 net.go:770] primary dev: ETH0
I0320 15:29:43.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:29:43.422835  543705 net.go:698] Add success.
I0320 15:29:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:29:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:29:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:29:53.409811  543705 memory.go:184] no items to output this cycle
I0320 15:29:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 15:30:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:30:03.409775  543705 memory.go:184] no items to output this cycle
I0320 15:30:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 15:30:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:30:13.409787  543705 memory.go:191] Add success.
I0320 15:30:13.409809  543705 cpu.go:282] Add success.
W0320 15:30:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:30:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:30:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:30:13.420282  543705 net.go:648] Add success.
I0320 15:30:13.422966  543705 net.go:770] primary dev: ETH0
I0320 15:30:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:30:13.422990  543705 net.go:698] Add success.
I0320 15:30:13.469138  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ba5a2913-e7a4-4779-8591-4fbd10f6516f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:30:13.469173  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:30:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:30:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:30:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 15:30:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:30:14.456698  543705 disk_worker.go:494] system disk:vda1
I0320 15:30:14.456728  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:30:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:30:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:30:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:30:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:30:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:30:23.409797  543705 memory.go:184] no items to output this cycle
I0320 15:30:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 15:30:24.921672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:30:24.924121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:30:24.924127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab100 0xc0001ab140]
E0320 15:30:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:30:33.409774  543705 memory.go:184] no items to output this cycle
I0320 15:30:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 15:30:38.529180  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:30:38.529187  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:30:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:30:43.410876  543705 memory.go:191] Add success.
I0320 15:30:43.409823  543705 cpu.go:282] Add success.
I0320 15:30:43.420676  543705 net.go:648] Add success.
I0320 15:30:43.423383  543705 net.go:770] primary dev: ETH0
I0320 15:30:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:30:43.423408  543705 net.go:698] Add success.
I0320 15:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:30:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:30:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:30:53.409794  543705 memory.go:184] no items to output this cycle
I0320 15:30:53.409806  543705 cpu.go:275] no items to output this cycle
I0320 15:31:03.409876  543705 cpu.go:275] no items to output this cycle
E0320 15:31:03.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:31:03.409893  543705 memory.go:184] no items to output this cycle
E0320 15:31:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:31:13.409789  543705 memory.go:191] Add success.
W0320 15:31:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:31:13.409817  543705 cpu.go:282] Add success.
W0320 15:31:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:31:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:31:13.420131  543705 net.go:648] Add success.
I0320 15:31:13.422815  543705 net.go:770] primary dev: ETH0
I0320 15:31:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:31:13.422845  543705 net.go:698] Add success.
I0320 15:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:31:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:31:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 15:31:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:31:14.456572  543705 disk_worker.go:494] system disk:vda1
I0320 15:31:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:31:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:31:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:31:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:31:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:31:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:31:23.409777  543705 memory.go:184] no items to output this cycle
I0320 15:31:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 15:31:24.925672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:31:24.928075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:31:24.928081  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470940 0xc000470980]
E0320 15:31:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:31:33.409771  543705 memory.go:184] no items to output this cycle
I0320 15:31:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 15:31:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:31:43.409819  543705 memory.go:191] Add success.
I0320 15:31:43.409828  543705 cpu.go:282] Add success.
I0320 15:31:43.419960  543705 net.go:648] Add success.
I0320 15:31:43.422774  543705 net.go:770] primary dev: ETH0
I0320 15:31:43.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:31:43.422802  543705 net.go:698] Add success.
I0320 15:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:31:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:31:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:31:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:31:53.409806  543705 memory.go:184] no items to output this cycle
I0320 15:31:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 15:32:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:32:03.409808  543705 memory.go:184] no items to output this cycle
I0320 15:32:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 15:32:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:32:13.409883  543705 memory.go:191] Add success.
W0320 15:32:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:32:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:32:13.409928  543705 cpu.go:282] Add success.
I0320 15:32:13.409930  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:32:13.419710  543705 net.go:648] Add success.
I0320 15:32:13.422218  543705 net.go:770] primary dev: ETH0
I0320 15:32:13.422243  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:32:13.422254  543705 net.go:698] Add success.
W0320 15:32:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:32:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0320 15:32:14.455153  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:32:14.456911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:32:14.456920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:32:14.456926  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:32:14.456994  543705 disk_worker.go:494] system disk:vda1
I0320 15:32:14.457023  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:32:15.456855  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:32:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:32:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:32:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:32:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:32:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:32:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:32:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:32:23.409777  543705 memory.go:184] no items to output this cycle
I0320 15:32:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 15:32:24.929670  543705 disk_info.go:125] begin check local disk info of client
I0320 15:32:24.932092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:32:24.932097  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053a440 0xc00053a480]
E0320 15:32:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:32:33.409793  543705 memory.go:184] no items to output this cycle
I0320 15:32:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 15:32:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:32:43.409790  543705 memory.go:191] Add success.
I0320 15:32:43.409808  543705 cpu.go:282] Add success.
I0320 15:32:43.419937  543705 net.go:648] Add success.
I0320 15:32:43.422508  543705 net.go:770] primary dev: ETH0
I0320 15:32:43.422520  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:32:43.422532  543705 net.go:698] Add success.
I0320 15:32:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:32:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:32:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:32:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:32:53.409763  543705 memory.go:184] no items to output this cycle
I0320 15:32:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 15:33:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:33:03.409779  543705 memory.go:184] no items to output this cycle
I0320 15:33:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 15:33:13.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:33:13.409913  543705 memory.go:191] Add success.
I0320 15:33:13.409933  543705 cpu.go:282] Add success.
W0320 15:33:13.409950  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:33:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:33:13.409978  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:33:13.419708  543705 net.go:648] Add success.
I0320 15:33:13.422684  543705 net.go:770] primary dev: ETH0
I0320 15:33:13.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:33:13.422708  543705 net.go:698] Add success.
I0320 15:33:13.469096  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"55d2669b-b5e9-45bd-86ea-c6342fd70c84","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:33:13.469135  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:33:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:33:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:33:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 15:33:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:33:14.456679  543705 disk_worker.go:494] system disk:vda1
I0320 15:33:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:33:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:33:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:33:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:33:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:33:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:33:23.409776  543705 memory.go:184] no items to output this cycle
I0320 15:33:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 15:33:24.933671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:33:24.936168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:33:24.936175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba80 0xc0001abac0]
E0320 15:33:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:33:33.409788  543705 memory.go:184] no items to output this cycle
I0320 15:33:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 15:33:38.529358  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:33:38.529365  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:33:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:33:43.410547  543705 memory.go:191] Add success.
I0320 15:33:43.409805  543705 cpu.go:282] Add success.
I0320 15:33:43.420258  543705 net.go:648] Add success.
I0320 15:33:43.422574  543705 net.go:770] primary dev: ETH0
I0320 15:33:43.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:33:43.422600  543705 net.go:698] Add success.
I0320 15:33:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:33:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:33:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:33:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:33:53.409778  543705 memory.go:184] no items to output this cycle
I0320 15:33:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 15:34:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:34:03.409800  543705 memory.go:184] no items to output this cycle
I0320 15:34:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 15:34:13.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:34:13.409892  543705 memory.go:191] Add success.
W0320 15:34:13.409921  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:34:13.409933  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:34:13.409940  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:34:13.410051  543705 cpu.go:282] Add success.
I0320 15:34:13.419705  543705 net.go:648] Add success.
I0320 15:34:13.422690  543705 net.go:770] primary dev: ETH0
I0320 15:34:13.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:34:13.422715  543705 net.go:698] Add success.
I0320 15:34:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:34:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:34:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 15:34:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:34:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 15:34:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:34:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:34:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:34:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:34:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:34:23.409795  543705 memory.go:184] no items to output this cycle
I0320 15:34:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 15:34:24.937669  543705 disk_info.go:125] begin check local disk info of client
I0320 15:34:24.940094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:34:24.940100  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b76c0 0xc0002b7700]
E0320 15:34:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:34:33.409761  543705 memory.go:184] no items to output this cycle
I0320 15:34:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 15:34:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:34:43.409800  543705 memory.go:191] Add success.
I0320 15:34:43.409805  543705 cpu.go:282] Add success.
I0320 15:34:43.419956  543705 net.go:648] Add success.
I0320 15:34:43.422853  543705 net.go:770] primary dev: ETH0
I0320 15:34:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:34:43.422890  543705 net.go:698] Add success.
I0320 15:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:34:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:34:53.409771  543705 memory.go:184] no items to output this cycle
I0320 15:34:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 15:35:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:35:03.409788  543705 memory.go:184] no items to output this cycle
I0320 15:35:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 15:35:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:35:13.409794  543705 memory.go:191] Add success.
I0320 15:35:13.409799  543705 cpu.go:282] Add success.
W0320 15:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:35:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:35:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:35:13.420062  543705 net.go:648] Add success.
I0320 15:35:13.422681  543705 net.go:770] primary dev: ETH0
I0320 15:35:13.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:35:13.422707  543705 net.go:698] Add success.
I0320 15:35:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:35:14.455294  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:35:14.455387  543705 disk_worker.go:708] disk space is not compliant
W0320 15:35:14.455391  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:35:14.457042  543705 disk_worker.go:494] system disk:vda1
I0320 15:35:14.457084  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:35:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:35:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:35:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:35:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:35:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:35:23.409801  543705 memory.go:184] no items to output this cycle
I0320 15:35:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 15:35:24.941668  543705 disk_info.go:125] begin check local disk info of client
I0320 15:35:24.944086  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:35:24.944092  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4200 0xc0000c4240]
E0320 15:35:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:35:33.409781  543705 memory.go:184] no items to output this cycle
I0320 15:35:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 15:35:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:35:43.409800  543705 memory.go:191] Add success.
I0320 15:35:43.409801  543705 cpu.go:282] Add success.
I0320 15:35:43.420122  543705 net.go:648] Add success.
I0320 15:35:43.423222  543705 net.go:770] primary dev: ETH0
I0320 15:35:43.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:35:43.423250  543705 net.go:698] Add success.
I0320 15:35:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:35:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:35:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:35:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:35:53.409789  543705 memory.go:184] no items to output this cycle
I0320 15:35:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 15:36:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:36:03.409800  543705 memory.go:184] no items to output this cycle
I0320 15:36:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 15:36:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:36:13.409815  543705 memory.go:191] Add success.
I0320 15:36:13.409825  543705 cpu.go:282] Add success.
W0320 15:36:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:36:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:36:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:36:13.420212  543705 net.go:648] Add success.
I0320 15:36:13.423340  543705 net.go:770] primary dev: ETH0
I0320 15:36:13.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:36:13.423366  543705 net.go:698] Add success.
I0320 15:36:13.468524  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a68f5021-5e33-44ae-b9c7-1c58f4c614ea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:36:13.468573  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:36:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:36:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:36:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 15:36:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:36:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 15:36:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:36:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:36:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:36:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:36:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:36:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:36:23.409795  543705 memory.go:184] no items to output this cycle
I0320 15:36:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 15:36:24.945673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:36:24.948206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:36:24.948213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa000 0xc0001fa040]
E0320 15:36:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:36:33.409767  543705 memory.go:184] no items to output this cycle
I0320 15:36:33.409804  543705 cpu.go:275] no items to output this cycle
I0320 15:36:38.530408  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:36:38.530416  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:36:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:36:43.410616  543705 memory.go:191] Add success.
I0320 15:36:43.409801  543705 cpu.go:282] Add success.
I0320 15:36:43.420320  543705 net.go:648] Add success.
I0320 15:36:43.422894  543705 net.go:770] primary dev: ETH0
I0320 15:36:43.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:36:43.422922  543705 net.go:698] Add success.
I0320 15:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:36:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:36:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:36:53.409796  543705 memory.go:184] no items to output this cycle
I0320 15:36:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 15:37:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:37:03.409788  543705 memory.go:184] no items to output this cycle
I0320 15:37:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 15:37:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:37:13.409796  543705 memory.go:191] Add success.
I0320 15:37:13.409796  543705 cpu.go:282] Add success.
W0320 15:37:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:37:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:37:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:37:13.420070  543705 net.go:648] Add success.
I0320 15:37:13.422683  543705 net.go:770] primary dev: ETH0
I0320 15:37:13.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:37:13.422710  543705 net.go:698] Add success.
I0320 15:37:13.453234  543705 event_worker.go:152] Polling the log file for events...
W0320 15:37:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:37:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 15:37:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:37:14.455870  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:37:14.455878  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:37:14.455884  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:37:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 15:37:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:37:15.456890  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:37:15.456900  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 15:37:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:37:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:37:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:37:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:37:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:37:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:37:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 15:37:23.409805  543705 memory.go:184] no items to output this cycle
I0320 15:37:24.949671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:37:24.952177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:37:24.952184  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a640 0xc00039a680]
E0320 15:37:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:37:33.409765  543705 memory.go:184] no items to output this cycle
I0320 15:37:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 15:37:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:37:43.409820  543705 memory.go:191] Add success.
I0320 15:37:43.409834  543705 cpu.go:282] Add success.
I0320 15:37:43.419960  543705 net.go:648] Add success.
I0320 15:37:43.422596  543705 net.go:770] primary dev: ETH0
I0320 15:37:43.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:37:43.422622  543705 net.go:698] Add success.
I0320 15:37:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:37:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:37:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:37:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:37:53.409796  543705 cpu.go:275] no items to output this cycle
I0320 15:37:53.409803  543705 memory.go:184] no items to output this cycle
E0320 15:38:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:38:03.409809  543705 memory.go:184] no items to output this cycle
I0320 15:38:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 15:38:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:38:13.409813  543705 memory.go:191] Add success.
I0320 15:38:13.409820  543705 cpu.go:282] Add success.
W0320 15:38:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:38:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:38:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:38:13.420042  543705 net.go:648] Add success.
I0320 15:38:13.422691  543705 net.go:770] primary dev: ETH0
I0320 15:38:13.422704  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:38:13.422715  543705 net.go:698] Add success.
I0320 15:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:38:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:38:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 15:38:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:38:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 15:38:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:38:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:38:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:38:23.410480  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:38:23.410500  543705 memory.go:184] no items to output this cycle
I0320 15:38:23.410499  543705 cpu.go:275] no items to output this cycle
I0320 15:38:24.953695  543705 disk_info.go:125] begin check local disk info of client
I0320 15:38:24.956128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:38:24.956134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa000 0xc0001fa040]
E0320 15:38:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:38:33.409786  543705 memory.go:184] no items to output this cycle
I0320 15:38:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 15:38:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:38:43.409805  543705 cpu.go:282] Add success.
I0320 15:38:43.409816  543705 memory.go:191] Add success.
I0320 15:38:43.419974  543705 net.go:648] Add success.
I0320 15:38:43.422348  543705 net.go:770] primary dev: ETH0
I0320 15:38:43.422361  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:38:43.422374  543705 net.go:698] Add success.
I0320 15:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:38:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:38:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:38:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:38:53.409803  543705 memory.go:184] no items to output this cycle
I0320 15:38:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 15:39:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:39:03.409807  543705 memory.go:184] no items to output this cycle
I0320 15:39:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 15:39:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:39:13.409776  543705 memory.go:191] Add success.
I0320 15:39:13.409798  543705 cpu.go:282] Add success.
W0320 15:39:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:39:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:39:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:39:13.420237  543705 net.go:648] Add success.
I0320 15:39:13.423137  543705 net.go:770] primary dev: ETH0
I0320 15:39:13.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:39:13.423163  543705 net.go:698] Add success.
I0320 15:39:13.469468  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ec2d6c55-3538-4809-9b5c-7ea05f2f7927","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:39:13.469501  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:39:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:39:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:39:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 15:39:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:39:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 15:39:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:39:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:39:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:39:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:39:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:39:23.409765  543705 memory.go:184] no items to output this cycle
I0320 15:39:23.409918  543705 cpu.go:275] no items to output this cycle
I0320 15:39:24.957917  543705 disk_info.go:125] begin check local disk info of client
I0320 15:39:24.960439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:39:24.960445  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0320 15:39:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:39:33.409774  543705 memory.go:184] no items to output this cycle
I0320 15:39:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 15:39:38.531428  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:39:38.531435  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:39:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:39:43.410619  543705 memory.go:191] Add success.
I0320 15:39:43.409802  543705 cpu.go:282] Add success.
I0320 15:39:43.420329  543705 net.go:648] Add success.
I0320 15:39:43.422844  543705 net.go:770] primary dev: ETH0
I0320 15:39:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:39:43.422871  543705 net.go:698] Add success.
I0320 15:39:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:39:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:39:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:39:53.409779  543705 memory.go:184] no items to output this cycle
I0320 15:39:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 15:40:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:40:03.409807  543705 memory.go:184] no items to output this cycle
I0320 15:40:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 15:40:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:40:13.409786  543705 memory.go:191] Add success.
I0320 15:40:13.409809  543705 cpu.go:282] Add success.
W0320 15:40:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:40:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:40:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:40:13.420128  543705 net.go:648] Add success.
I0320 15:40:13.422787  543705 net.go:770] primary dev: ETH0
I0320 15:40:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:40:13.422815  543705 net.go:698] Add success.
I0320 15:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:40:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:40:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 15:40:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:40:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 15:40:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:40:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:40:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:40:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:40:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:40:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:40:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:40:23.409782  543705 memory.go:184] no items to output this cycle
I0320 15:40:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 15:40:24.961670  543705 disk_info.go:125] begin check local disk info of client
I0320 15:40:24.964177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:40:24.964184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc000 0xc0002bc040]
E0320 15:40:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:40:33.409780  543705 memory.go:184] no items to output this cycle
I0320 15:40:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 15:40:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:40:43.409837  543705 memory.go:191] Add success.
I0320 15:40:43.409839  543705 cpu.go:282] Add success.
I0320 15:40:43.419797  543705 net.go:770] primary dev: ETH0
I0320 15:40:43.419813  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:40:43.419829  543705 net.go:698] Add success.
I0320 15:40:43.420181  543705 net.go:648] Add success.
I0320 15:40:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:40:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:40:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:40:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:40:53.409777  543705 memory.go:184] no items to output this cycle
I0320 15:40:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 15:41:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:41:03.409798  543705 memory.go:184] no items to output this cycle
I0320 15:41:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 15:41:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:41:13.409794  543705 memory.go:191] Add success.
I0320 15:41:13.409799  543705 cpu.go:282] Add success.
W0320 15:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:41:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:41:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:41:13.420147  543705 net.go:648] Add success.
I0320 15:41:13.422852  543705 net.go:770] primary dev: ETH0
I0320 15:41:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:41:13.422878  543705 net.go:698] Add success.
I0320 15:41:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:41:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:41:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 15:41:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:41:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 15:41:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:41:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:41:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:41:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:41:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:41:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:41:23.409787  543705 memory.go:184] no items to output this cycle
I0320 15:41:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 15:41:24.965664  543705 disk_info.go:125] begin check local disk info of client
I0320 15:41:24.968108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:41:24.968114  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387d00 0xc000387d40]
E0320 15:41:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:41:33.409894  543705 memory.go:184] no items to output this cycle
I0320 15:41:33.409964  543705 cpu.go:275] no items to output this cycle
E0320 15:41:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:41:43.409812  543705 memory.go:191] Add success.
I0320 15:41:43.409848  543705 cpu.go:282] Add success.
I0320 15:41:43.419992  543705 net.go:648] Add success.
I0320 15:41:43.423103  543705 net.go:770] primary dev: ETH0
I0320 15:41:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:41:43.423129  543705 net.go:698] Add success.
I0320 15:41:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:41:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:41:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:41:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:41:53.409775  543705 memory.go:184] no items to output this cycle
I0320 15:41:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 15:42:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:42:03.409819  543705 memory.go:184] no items to output this cycle
I0320 15:42:03.409830  543705 cpu.go:275] no items to output this cycle
E0320 15:42:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:42:13.409795  543705 memory.go:191] Add success.
I0320 15:42:13.409797  543705 cpu.go:282] Add success.
W0320 15:42:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:42:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:42:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:42:13.420103  543705 net.go:648] Add success.
I0320 15:42:13.422699  543705 net.go:770] primary dev: ETH0
I0320 15:42:13.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:42:13.422731  543705 net.go:698] Add success.
I0320 15:42:13.468777  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2685224a-c7a9-4f3c-9119-c4da11c2df96","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:42:13.468809  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 15:42:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:42:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0320 15:42:14.455242  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:42:14.456066  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:42:14.456075  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:42:14.456081  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:42:14.457037  543705 disk_worker.go:494] system disk:vda1
I0320 15:42:14.457067  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:42:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:42:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:42:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:42:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:42:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:42:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:42:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:42:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:42:23.409780  543705 memory.go:184] no items to output this cycle
I0320 15:42:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 15:42:24.969672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:42:24.972155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:42:24.972161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de900 0xc0003de940]
E0320 15:42:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:42:33.409781  543705 memory.go:184] no items to output this cycle
I0320 15:42:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 15:42:38.532431  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:42:38.532438  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:42:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:42:43.410513  543705 memory.go:191] Add success.
I0320 15:42:43.409805  543705 cpu.go:282] Add success.
I0320 15:42:43.420286  543705 net.go:648] Add success.
I0320 15:42:43.422678  543705 net.go:770] primary dev: ETH0
I0320 15:42:43.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:42:43.422708  543705 net.go:698] Add success.
I0320 15:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:42:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:42:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:42:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:42:53.409806  543705 memory.go:184] no items to output this cycle
I0320 15:42:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 15:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:43:03.409786  543705 memory.go:184] no items to output this cycle
I0320 15:43:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 15:43:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:43:13.409784  543705 memory.go:191] Add success.
W0320 15:43:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:43:13.409809  543705 cpu.go:282] Add success.
W0320 15:43:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:43:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:43:13.420124  543705 net.go:648] Add success.
I0320 15:43:13.423064  543705 net.go:770] primary dev: ETH0
I0320 15:43:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:43:13.423093  543705 net.go:698] Add success.
I0320 15:43:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:43:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:43:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 15:43:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:43:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 15:43:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:43:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:43:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:43:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:43:23.409780  543705 memory.go:184] no items to output this cycle
I0320 15:43:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 15:43:24.973664  543705 disk_info.go:125] begin check local disk info of client
I0320 15:43:24.976197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:43:24.976203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a180 0xc00036a1c0]
E0320 15:43:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:43:33.409795  543705 memory.go:184] no items to output this cycle
I0320 15:43:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 15:43:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:43:43.409792  543705 memory.go:191] Add success.
I0320 15:43:43.409972  543705 cpu.go:282] Add success.
I0320 15:43:43.419741  543705 net.go:648] Add success.
I0320 15:43:43.422776  543705 net.go:770] primary dev: ETH0
I0320 15:43:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:43:43.422805  543705 net.go:698] Add success.
I0320 15:43:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:43:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:43:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:43:53.409795  543705 memory.go:184] no items to output this cycle
I0320 15:43:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 15:44:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:44:03.409777  543705 memory.go:184] no items to output this cycle
I0320 15:44:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 15:44:13.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:44:13.409770  543705 memory.go:191] Add success.
W0320 15:44:13.409796  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:44:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:44:13.409810  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:44:13.409819  543705 cpu.go:282] Add success.
I0320 15:44:13.420158  543705 net.go:648] Add success.
I0320 15:44:13.422745  543705 net.go:770] primary dev: ETH0
I0320 15:44:13.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:44:13.422774  543705 net.go:698] Add success.
I0320 15:44:14.454219  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:44:14.454394  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:44:14.454404  543705 disk_worker.go:708] disk space is not compliant
W0320 15:44:14.454407  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:44:14.455747  543705 disk_worker.go:494] system disk:vda1
I0320 15:44:14.455794  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:44:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:44:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:44:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:44:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:44:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:44:23.409772  543705 memory.go:184] no items to output this cycle
I0320 15:44:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 15:44:24.977672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:44:24.980112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:44:24.980118  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329640 0xc000329680]
E0320 15:44:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:44:33.409781  543705 memory.go:184] no items to output this cycle
I0320 15:44:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 15:44:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:44:43.409789  543705 memory.go:191] Add success.
I0320 15:44:43.409811  543705 cpu.go:282] Add success.
I0320 15:44:43.419955  543705 net.go:648] Add success.
I0320 15:44:43.422609  543705 net.go:770] primary dev: ETH0
I0320 15:44:43.422620  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:44:43.422632  543705 net.go:698] Add success.
I0320 15:44:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:44:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:44:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:44:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:44:53.409784  543705 memory.go:184] no items to output this cycle
I0320 15:44:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 15:45:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:45:03.409773  543705 memory.go:184] no items to output this cycle
I0320 15:45:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 15:45:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:45:13.409798  543705 cpu.go:282] Add success.
I0320 15:45:13.409799  543705 memory.go:191] Add success.
W0320 15:45:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:45:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:45:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:45:13.420583  543705 net.go:648] Add success.
I0320 15:45:13.423444  543705 net.go:770] primary dev: ETH0
I0320 15:45:13.423457  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:45:13.423469  543705 net.go:698] Add success.
I0320 15:45:13.470171  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"36febf27-4fdf-434a-9215-c04167b0584f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:45:13.470208  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:45:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:45:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:45:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 15:45:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:45:14.456532  543705 disk_worker.go:494] system disk:vda1
I0320 15:45:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:45:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:45:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:45:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:45:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:45:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:45:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:45:23.409797  543705 memory.go:184] no items to output this cycle
I0320 15:45:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 15:45:24.981675  543705 disk_info.go:125] begin check local disk info of client
I0320 15:45:24.984123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:45:24.984129  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314100 0xc000314140]
E0320 15:45:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:45:33.409807  543705 memory.go:184] no items to output this cycle
I0320 15:45:33.409820  543705 cpu.go:275] no items to output this cycle
I0320 15:45:38.533433  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:45:38.533440  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:45:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:45:43.410679  543705 memory.go:191] Add success.
I0320 15:45:43.409807  543705 cpu.go:282] Add success.
I0320 15:45:43.420399  543705 net.go:648] Add success.
I0320 15:45:43.423206  543705 net.go:770] primary dev: ETH0
I0320 15:45:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:45:43.423252  543705 net.go:698] Add success.
I0320 15:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:45:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:45:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:45:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:45:53.409787  543705 memory.go:184] no items to output this cycle
I0320 15:45:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 15:46:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:46:03.409774  543705 memory.go:184] no items to output this cycle
I0320 15:46:03.409788  543705 cpu.go:275] no items to output this cycle
W0320 15:46:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:46:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:46:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:46:13.409826  543705 cpu.go:282] Add success.
E0320 15:46:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:46:13.409845  543705 memory.go:191] Add success.
I0320 15:46:13.419988  543705 net.go:648] Add success.
I0320 15:46:13.422853  543705 net.go:770] primary dev: ETH0
I0320 15:46:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:46:13.422882  543705 net.go:698] Add success.
I0320 15:46:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:46:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:46:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 15:46:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:46:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 15:46:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:46:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:46:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:46:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:46:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:46:23.409793  543705 memory.go:184] no items to output this cycle
I0320 15:46:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 15:46:24.985673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:46:24.988132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:46:24.988138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4dc0 0xc0000c4e00]
E0320 15:46:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:46:33.409798  543705 memory.go:184] no items to output this cycle
I0320 15:46:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 15:46:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:46:43.409788  543705 memory.go:191] Add success.
I0320 15:46:43.409811  543705 cpu.go:282] Add success.
I0320 15:46:43.419874  543705 net.go:648] Add success.
I0320 15:46:43.422814  543705 net.go:770] primary dev: ETH0
I0320 15:46:43.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:46:43.422841  543705 net.go:698] Add success.
I0320 15:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:46:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:46:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:46:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:46:53.409772  543705 memory.go:184] no items to output this cycle
I0320 15:46:53.409877  543705 cpu.go:275] no items to output this cycle
E0320 15:47:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:47:03.409784  543705 cpu.go:275] no items to output this cycle
I0320 15:47:03.409793  543705 memory.go:184] no items to output this cycle
E0320 15:47:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:47:13.409788  543705 memory.go:191] Add success.
I0320 15:47:13.409804  543705 cpu.go:282] Add success.
W0320 15:47:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:47:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:47:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:47:13.420464  543705 net.go:648] Add success.
I0320 15:47:13.423251  543705 net.go:770] primary dev: ETH0
I0320 15:47:13.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:47:13.423276  543705 net.go:698] Add success.
I0320 15:47:13.452940  543705 event_worker.go:152] Polling the log file for events...
W0320 15:47:14.455079  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:47:14.455138  543705 disk_worker.go:708] disk space is not compliant
W0320 15:47:14.455141  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:47:14.456885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:47:14.456893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:47:14.456900  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:47:14.456972  543705 disk_worker.go:494] system disk:vda1
I0320 15:47:14.457014  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:47:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:47:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:47:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:47:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:47:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:47:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:47:16.472304  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:47:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:47:23.409793  543705 memory.go:184] no items to output this cycle
I0320 15:47:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 15:47:24.989671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:47:24.992095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:47:24.992101  543705 disk_info.go:196] parse disk info done, disk is : [0xc000587ac0 0xc000587b00]
E0320 15:47:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:47:33.409783  543705 memory.go:184] no items to output this cycle
I0320 15:47:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 15:47:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:47:43.409794  543705 memory.go:191] Add success.
I0320 15:47:43.409812  543705 cpu.go:282] Add success.
I0320 15:47:43.419885  543705 net.go:648] Add success.
I0320 15:47:43.422542  543705 net.go:770] primary dev: ETH0
I0320 15:47:43.422555  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:47:43.422567  543705 net.go:698] Add success.
I0320 15:47:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:47:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:47:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:47:53.409777  543705 memory.go:184] no items to output this cycle
I0320 15:47:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 15:48:03.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:48:03.409862  543705 memory.go:184] no items to output this cycle
I0320 15:48:03.409972  543705 cpu.go:275] no items to output this cycle
E0320 15:48:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:48:13.409786  543705 memory.go:191] Add success.
I0320 15:48:13.409791  543705 cpu.go:282] Add success.
W0320 15:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:48:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:48:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:48:13.420088  543705 net.go:648] Add success.
I0320 15:48:13.422691  543705 net.go:770] primary dev: ETH0
I0320 15:48:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:48:13.422715  543705 net.go:698] Add success.
I0320 15:48:13.967068  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3cfac21e-2f48-421a-85fc-6d8611001870","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:48:13.967102  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:48:14.454634  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:48:14.454865  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:48:14.454875  543705 disk_worker.go:708] disk space is not compliant
W0320 15:48:14.454878  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:48:14.456305  543705 disk_worker.go:494] system disk:vda1
I0320 15:48:14.456351  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:48:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:48:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:48:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:48:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:48:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:48:23.409776  543705 memory.go:184] no items to output this cycle
I0320 15:48:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 15:48:24.993668  543705 disk_info.go:125] begin check local disk info of client
I0320 15:48:24.996116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:48:24.996121  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471500 0xc000471540]
E0320 15:48:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:48:33.409800  543705 memory.go:184] no items to output this cycle
I0320 15:48:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 15:48:38.534441  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:48:38.534449  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:48:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:48:43.410852  543705 memory.go:191] Add success.
I0320 15:48:43.409801  543705 cpu.go:282] Add success.
I0320 15:48:43.420599  543705 net.go:648] Add success.
I0320 15:48:43.423688  543705 net.go:770] primary dev: ETH0
I0320 15:48:43.423701  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:48:43.423715  543705 net.go:698] Add success.
I0320 15:48:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:48:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:48:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:48:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:48:53.409771  543705 memory.go:184] no items to output this cycle
I0320 15:48:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 15:49:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:49:03.409778  543705 memory.go:184] no items to output this cycle
I0320 15:49:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 15:49:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:49:13.409809  543705 memory.go:191] Add success.
I0320 15:49:13.409813  543705 cpu.go:282] Add success.
W0320 15:49:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:49:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:49:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:49:13.420146  543705 net.go:648] Add success.
I0320 15:49:13.422919  543705 net.go:770] primary dev: ETH0
I0320 15:49:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:49:13.422943  543705 net.go:698] Add success.
I0320 15:49:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:49:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:49:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 15:49:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:49:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 15:49:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:49:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:49:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:49:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:49:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:49:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:49:23.409765  543705 memory.go:184] no items to output this cycle
I0320 15:49:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 15:49:24.997671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:49:25.000105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:49:25.000110  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab8c0 0xc0001ab900]
E0320 15:49:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:49:33.409801  543705 memory.go:184] no items to output this cycle
I0320 15:49:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 15:49:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:49:43.409788  543705 memory.go:191] Add success.
I0320 15:49:43.409819  543705 cpu.go:282] Add success.
I0320 15:49:43.419988  543705 net.go:648] Add success.
I0320 15:49:43.422588  543705 net.go:770] primary dev: ETH0
I0320 15:49:43.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:49:43.422613  543705 net.go:698] Add success.
I0320 15:49:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:49:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:49:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:49:53.409811  543705 memory.go:184] no items to output this cycle
I0320 15:49:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 15:50:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:50:03.409878  543705 cpu.go:275] no items to output this cycle
I0320 15:50:03.409894  543705 memory.go:184] no items to output this cycle
E0320 15:50:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:50:13.409809  543705 memory.go:191] Add success.
I0320 15:50:13.409823  543705 cpu.go:282] Add success.
W0320 15:50:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:50:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:50:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:50:13.420187  543705 net.go:648] Add success.
I0320 15:50:13.422815  543705 net.go:770] primary dev: ETH0
I0320 15:50:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:50:13.422844  543705 net.go:698] Add success.
I0320 15:50:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:50:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:50:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0320 15:50:14.455146  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:50:14.456488  543705 disk_worker.go:494] system disk:vda1
I0320 15:50:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:50:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:50:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:50:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:50:23.409777  543705 memory.go:184] no items to output this cycle
I0320 15:50:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 15:50:25.001672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:50:25.004153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:50:25.004159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005867c0 0xc000586800]
E0320 15:50:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:50:33.409791  543705 memory.go:184] no items to output this cycle
I0320 15:50:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 15:50:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:50:43.409787  543705 memory.go:191] Add success.
I0320 15:50:43.409809  543705 cpu.go:282] Add success.
I0320 15:50:43.419869  543705 net.go:648] Add success.
I0320 15:50:43.422591  543705 net.go:770] primary dev: ETH0
I0320 15:50:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:50:43.422617  543705 net.go:698] Add success.
I0320 15:50:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:50:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:50:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:50:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:50:53.409784  543705 memory.go:184] no items to output this cycle
I0320 15:50:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 15:51:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:51:03.409782  543705 memory.go:184] no items to output this cycle
I0320 15:51:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 15:51:13.409890  543705 cpu.go:282] Add success.
E0320 15:51:13.410031  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:51:13.410049  543705 memory.go:191] Add success.
W0320 15:51:13.410076  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:51:13.410089  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:51:13.410092  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:51:13.419703  543705 net.go:648] Add success.
I0320 15:51:13.423007  543705 net.go:770] primary dev: ETH0
I0320 15:51:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:51:13.423030  543705 net.go:698] Add success.
I0320 15:51:13.472370  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3e30de48-d97b-41c4-a2e4-4b5b867907b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:51:13.472404  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:51:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:51:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:51:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 15:51:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:51:14.456679  543705 disk_worker.go:494] system disk:vda1
I0320 15:51:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:51:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:51:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:51:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:51:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:51:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:51:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:51:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 15:51:23.409787  543705 memory.go:184] no items to output this cycle
I0320 15:51:25.005671  543705 disk_info.go:125] begin check local disk info of client
I0320 15:51:25.008133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:51:25.008139  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007acc0 0xc00007ad00]
E0320 15:51:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:51:33.409799  543705 memory.go:184] no items to output this cycle
I0320 15:51:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 15:51:38.535450  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:51:38.535457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:51:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:51:43.410715  543705 memory.go:191] Add success.
I0320 15:51:43.409793  543705 cpu.go:282] Add success.
I0320 15:51:43.420413  543705 net.go:648] Add success.
I0320 15:51:43.423201  543705 net.go:770] primary dev: ETH0
I0320 15:51:43.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:51:43.423228  543705 net.go:698] Add success.
I0320 15:51:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:51:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:51:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:51:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:51:53.409791  543705 cpu.go:275] no items to output this cycle
I0320 15:51:53.409793  543705 memory.go:184] no items to output this cycle
E0320 15:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:52:03.409781  543705 memory.go:184] no items to output this cycle
I0320 15:52:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 15:52:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:52:13.409796  543705 memory.go:191] Add success.
I0320 15:52:13.409796  543705 cpu.go:282] Add success.
W0320 15:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:52:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:52:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:52:13.420150  543705 net.go:648] Add success.
I0320 15:52:13.422710  543705 net.go:770] primary dev: ETH0
I0320 15:52:13.422723  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:52:13.422735  543705 net.go:698] Add success.
W0320 15:52:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:52:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 15:52:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0320 15:52:14.456948  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:52:14.456957  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:52:14.456963  543705 custom_config.go:64] query custom config with name: gpu
I0320 15:52:14.457009  543705 disk_worker.go:494] system disk:vda1
I0320 15:52:14.457051  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:52:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:52:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:52:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:52:16.457999  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:52:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:52:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:52:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:52:23.409778  543705 memory.go:184] no items to output this cycle
I0320 15:52:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 15:52:25.009670  543705 disk_info.go:125] begin check local disk info of client
I0320 15:52:25.012067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:52:25.012072  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053c200 0xc00053c240]
E0320 15:52:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:52:33.409785  543705 memory.go:184] no items to output this cycle
I0320 15:52:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 15:52:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:52:43.409794  543705 memory.go:191] Add success.
I0320 15:52:43.409799  543705 cpu.go:282] Add success.
I0320 15:52:43.419783  543705 net.go:770] primary dev: ETH0
I0320 15:52:43.419796  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:52:43.419809  543705 net.go:698] Add success.
I0320 15:52:43.420154  543705 net.go:648] Add success.
I0320 15:52:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:52:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:52:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:52:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:52:53.409771  543705 memory.go:184] no items to output this cycle
I0320 15:52:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 15:53:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:53:03.409808  543705 memory.go:184] no items to output this cycle
I0320 15:53:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 15:53:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:53:13.409784  543705 memory.go:191] Add success.
I0320 15:53:13.409804  543705 cpu.go:282] Add success.
W0320 15:53:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:53:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:53:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:53:13.420166  543705 net.go:648] Add success.
I0320 15:53:13.423317  543705 net.go:770] primary dev: ETH0
I0320 15:53:13.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:53:13.423346  543705 net.go:698] Add success.
I0320 15:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:53:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:53:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 15:53:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:53:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 15:53:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:53:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:53:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:53:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:53:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:53:23.409794  543705 memory.go:184] no items to output this cycle
I0320 15:53:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 15:53:25.013677  543705 disk_info.go:125] begin check local disk info of client
I0320 15:53:25.016097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:53:25.016102  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab1c0 0xc0001ab200]
E0320 15:53:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:53:33.409799  543705 memory.go:184] no items to output this cycle
I0320 15:53:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 15:53:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:53:43.409786  543705 memory.go:191] Add success.
I0320 15:53:43.409809  543705 cpu.go:282] Add success.
I0320 15:53:43.419994  543705 net.go:648] Add success.
I0320 15:53:43.422540  543705 net.go:770] primary dev: ETH0
I0320 15:53:43.422553  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:53:43.422566  543705 net.go:698] Add success.
I0320 15:53:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:53:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:53:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:53:53.409779  543705 memory.go:184] no items to output this cycle
I0320 15:53:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 15:54:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:54:03.409775  543705 memory.go:184] no items to output this cycle
I0320 15:54:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 15:54:13.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:54:13.409895  543705 memory.go:191] Add success.
W0320 15:54:13.409957  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 15:54:13.409970  543705 cpu.go:282] Add success.
W0320 15:54:13.409982  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:54:13.409986  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:54:13.419742  543705 net.go:648] Add success.
I0320 15:54:13.422222  543705 net.go:770] primary dev: ETH0
I0320 15:54:13.422235  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:54:13.422246  543705 net.go:698] Add success.
I0320 15:54:13.577306  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"519067a3-eb52-43ca-94e1-a19ca95446da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:54:13.577338  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 15:54:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:54:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:54:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 15:54:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:54:14.456486  543705 disk_worker.go:494] system disk:vda1
I0320 15:54:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:54:15.455605  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:54:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:54:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:54:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:54:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:54:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:54:23.409793  543705 memory.go:184] no items to output this cycle
I0320 15:54:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 15:54:25.017685  543705 disk_info.go:125] begin check local disk info of client
I0320 15:54:25.020193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:54:25.020199  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464e40 0xc000464e80]
E0320 15:54:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:54:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 15:54:33.409788  543705 memory.go:184] no items to output this cycle
I0320 15:54:38.536444  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:54:38.536452  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:54:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:54:43.410669  543705 memory.go:191] Add success.
I0320 15:54:43.409799  543705 cpu.go:282] Add success.
I0320 15:54:43.420542  543705 net.go:648] Add success.
I0320 15:54:43.423172  543705 net.go:770] primary dev: ETH0
I0320 15:54:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:54:43.423197  543705 net.go:698] Add success.
I0320 15:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:54:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:54:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:54:53.409781  543705 memory.go:184] no items to output this cycle
I0320 15:54:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 15:55:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:55:03.409777  543705 memory.go:184] no items to output this cycle
I0320 15:55:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 15:55:13.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:55:13.409966  543705 cpu.go:282] Add success.
I0320 15:55:13.410041  543705 memory.go:191] Add success.
W0320 15:55:13.410077  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:55:13.410095  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:55:13.410099  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:55:13.419705  543705 net.go:648] Add success.
I0320 15:55:13.422320  543705 net.go:770] primary dev: ETH0
I0320 15:55:13.422333  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:55:13.422345  543705 net.go:698] Add success.
I0320 15:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:55:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:55:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0320 15:55:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:55:14.456478  543705 disk_worker.go:494] system disk:vda1
I0320 15:55:14.456522  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:55:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:55:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:55:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:55:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:55:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:55:23.409760  543705 memory.go:184] no items to output this cycle
I0320 15:55:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 15:55:25.021672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:55:25.024048  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:55:25.024054  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499bc0 0xc000499c00]
E0320 15:55:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:55:33.409792  543705 memory.go:184] no items to output this cycle
I0320 15:55:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 15:55:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:55:43.409792  543705 memory.go:191] Add success.
I0320 15:55:43.409812  543705 cpu.go:282] Add success.
I0320 15:55:43.419946  543705 net.go:648] Add success.
I0320 15:55:43.422562  543705 net.go:770] primary dev: ETH0
I0320 15:55:43.422575  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:55:43.422587  543705 net.go:698] Add success.
I0320 15:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:55:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:55:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:55:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:55:53.409810  543705 memory.go:184] no items to output this cycle
I0320 15:55:53.409831  543705 cpu.go:275] no items to output this cycle
E0320 15:56:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:56:03.409781  543705 memory.go:184] no items to output this cycle
I0320 15:56:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 15:56:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:56:13.409794  543705 memory.go:191] Add success.
I0320 15:56:13.409794  543705 cpu.go:282] Add success.
W0320 15:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:56:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:56:13.420510  543705 net.go:648] Add success.
I0320 15:56:13.423379  543705 net.go:770] primary dev: ETH0
I0320 15:56:13.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:56:13.423403  543705 net.go:698] Add success.
I0320 15:56:14.453953  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:56:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:56:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0320 15:56:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:56:14.456625  543705 disk_worker.go:494] system disk:vda1
I0320 15:56:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:56:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:56:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:56:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:56:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:56:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:56:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:56:23.409777  543705 memory.go:184] no items to output this cycle
I0320 15:56:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 15:56:25.025674  543705 disk_info.go:125] begin check local disk info of client
I0320 15:56:25.028207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:56:25.028213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0320 15:56:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:56:33.409781  543705 memory.go:184] no items to output this cycle
I0320 15:56:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 15:56:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:56:43.409791  543705 memory.go:191] Add success.
I0320 15:56:43.409812  543705 cpu.go:282] Add success.
I0320 15:56:43.420017  543705 net.go:648] Add success.
I0320 15:56:43.423099  543705 net.go:770] primary dev: ETH0
I0320 15:56:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:56:43.423129  543705 net.go:698] Add success.
I0320 15:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:56:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:56:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:56:53.409768  543705 memory.go:184] no items to output this cycle
I0320 15:56:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 15:57:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:57:03.409770  543705 memory.go:184] no items to output this cycle
I0320 15:57:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 15:57:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:57:13.409793  543705 memory.go:191] Add success.
I0320 15:57:13.409796  543705 cpu.go:282] Add success.
W0320 15:57:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:57:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:57:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:57:13.420182  543705 net.go:648] Add success.
I0320 15:57:13.429367  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 15:57:13.429442  543705 net.go:770] primary dev: ETH0
I0320 15:57:13.429453  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:57:13.429464  543705 net.go:698] Add success.
I0320 15:57:13.453016  543705 event_worker.go:152] Polling the log file for events...
I0320 15:57:13.463495  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9bde6078-2547-4b42-85ec-1523f653adc3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 15:57:13.463525  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 15:57:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:57:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 15:57:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:57:14.456764  543705 disk_worker.go:494] system disk:vda1
I0320 15:57:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 15:57:14.457099  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 15:57:14.457107  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 15:57:14.457111  543705 custom_config.go:64] query custom config with name: gpu
E0320 15:57:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 15:57:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:57:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 15:57:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 15:57:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:57:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:57:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:57:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:57:23.409778  543705 memory.go:184] no items to output this cycle
I0320 15:57:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 15:57:25.029670  543705 disk_info.go:125] begin check local disk info of client
I0320 15:57:25.032092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:57:25.032097  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0320 15:57:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:57:33.409800  543705 memory.go:184] no items to output this cycle
I0320 15:57:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 15:57:38.537481  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 15:57:38.537491  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 15:57:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:57:43.410606  543705 memory.go:191] Add success.
I0320 15:57:43.409805  543705 cpu.go:282] Add success.
I0320 15:57:43.420301  543705 net.go:648] Add success.
I0320 15:57:43.422817  543705 net.go:770] primary dev: ETH0
I0320 15:57:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:57:43.422842  543705 net.go:698] Add success.
I0320 15:57:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:57:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:57:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:57:53.409788  543705 memory.go:184] no items to output this cycle
I0320 15:57:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 15:58:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:58:03.409783  543705 memory.go:184] no items to output this cycle
I0320 15:58:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 15:58:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:58:13.409817  543705 memory.go:191] Add success.
I0320 15:58:13.409824  543705 cpu.go:282] Add success.
W0320 15:58:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:58:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:58:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:58:13.420151  543705 net.go:648] Add success.
I0320 15:58:13.422847  543705 net.go:770] primary dev: ETH0
I0320 15:58:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:58:13.422872  543705 net.go:698] Add success.
I0320 15:58:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:58:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:58:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0320 15:58:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:58:14.456476  543705 disk_worker.go:494] system disk:vda1
I0320 15:58:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:58:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:58:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:58:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:58:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:58:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:58:23.409798  543705 memory.go:184] no items to output this cycle
I0320 15:58:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 15:58:25.033673  543705 disk_info.go:125] begin check local disk info of client
I0320 15:58:25.036191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:58:25.036198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487ec0 0xc000487f00]
E0320 15:58:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:58:33.409776  543705 memory.go:184] no items to output this cycle
I0320 15:58:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 15:58:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:58:43.409818  543705 memory.go:191] Add success.
I0320 15:58:43.409825  543705 cpu.go:282] Add success.
I0320 15:58:43.419994  543705 net.go:648] Add success.
I0320 15:58:43.423317  543705 net.go:770] primary dev: ETH0
I0320 15:58:43.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:58:43.423347  543705 net.go:698] Add success.
I0320 15:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:58:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:58:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:58:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:58:53.409805  543705 memory.go:184] no items to output this cycle
I0320 15:58:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 15:59:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:59:03.409774  543705 memory.go:184] no items to output this cycle
I0320 15:59:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 15:59:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:59:13.409817  543705 memory.go:191] Add success.
I0320 15:59:13.409827  543705 cpu.go:282] Add success.
W0320 15:59:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 15:59:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 15:59:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 15:59:13.420145  543705 net.go:648] Add success.
I0320 15:59:13.422664  543705 net.go:770] primary dev: ETH0
I0320 15:59:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:59:13.422688  543705 net.go:698] Add success.
I0320 15:59:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 15:59:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 15:59:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 15:59:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 15:59:14.456486  543705 disk_worker.go:494] system disk:vda1
I0320 15:59:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 15:59:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 15:59:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:59:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:59:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 15:59:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 15:59:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:59:23.409780  543705 memory.go:184] no items to output this cycle
I0320 15:59:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 15:59:25.037672  543705 disk_info.go:125] begin check local disk info of client
I0320 15:59:25.040110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 15:59:25.040115  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4780 0xc0000c47c0]
E0320 15:59:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:59:33.409794  543705 memory.go:184] no items to output this cycle
I0320 15:59:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 15:59:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:59:43.409790  543705 memory.go:191] Add success.
I0320 15:59:43.409792  543705 cpu.go:282] Add success.
I0320 15:59:43.419995  543705 net.go:648] Add success.
I0320 15:59:43.423304  543705 net.go:770] primary dev: ETH0
I0320 15:59:43.423318  543705 net.go:802] Send network stats successfully!,count is 6
I0320 15:59:43.423330  543705 net.go:698] Add success.
I0320 15:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 15:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 15:59:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 15:59:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 15:59:53.409790  543705 cpu.go:275] no items to output this cycle
I0320 15:59:53.409793  543705 memory.go:184] no items to output this cycle
E0320 16:00:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:00:03.409886  543705 memory.go:184] no items to output this cycle
I0320 16:00:03.409922  543705 cpu.go:275] no items to output this cycle
E0320 16:00:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:00:13.409777  543705 memory.go:191] Add success.
W0320 16:00:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:00:13.409810  543705 cpu.go:282] Add success.
W0320 16:00:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:00:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:00:13.420221  543705 net.go:648] Add success.
I0320 16:00:13.423182  543705 net.go:770] primary dev: ETH0
I0320 16:00:13.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:00:13.423209  543705 net.go:698] Add success.
I0320 16:00:13.469685  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4fe9cd23-4412-4e26-b897-4de3bbd905d0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:00:13.469719  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:00:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:00:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:00:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 16:00:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:00:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 16:00:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:00:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:00:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:00:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:00:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:00:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:00:23.409795  543705 memory.go:184] no items to output this cycle
I0320 16:00:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 16:00:25.041671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:00:25.044121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:00:25.044126  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d40 0xc0000c4d80]
E0320 16:00:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:00:33.409770  543705 memory.go:184] no items to output this cycle
I0320 16:00:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 16:00:38.538452  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:00:38.538459  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:00:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:00:43.411026  543705 memory.go:191] Add success.
I0320 16:00:43.409829  543705 cpu.go:282] Add success.
I0320 16:00:43.419702  543705 net.go:648] Add success.
I0320 16:00:43.422808  543705 net.go:770] primary dev: ETH0
I0320 16:00:43.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:00:43.422836  543705 net.go:698] Add success.
I0320 16:00:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:00:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:00:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:00:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:00:53.409891  543705 memory.go:184] no items to output this cycle
I0320 16:00:53.409914  543705 cpu.go:275] no items to output this cycle
E0320 16:01:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:01:03.409777  543705 memory.go:184] no items to output this cycle
I0320 16:01:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 16:01:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:01:13.409812  543705 memory.go:191] Add success.
I0320 16:01:13.409819  543705 cpu.go:282] Add success.
W0320 16:01:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:01:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:01:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:01:13.420090  543705 net.go:648] Add success.
I0320 16:01:13.422776  543705 net.go:770] primary dev: ETH0
I0320 16:01:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:01:13.422801  543705 net.go:698] Add success.
I0320 16:01:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:01:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:01:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 16:01:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:01:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 16:01:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:01:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:01:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:01:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:01:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:01:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:01:23.409778  543705 memory.go:184] no items to output this cycle
I0320 16:01:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 16:01:25.045678  543705 disk_info.go:125] begin check local disk info of client
I0320 16:01:25.048094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:01:25.048099  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471d40 0xc000471d80]
E0320 16:01:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:01:33.409804  543705 memory.go:184] no items to output this cycle
I0320 16:01:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 16:01:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:01:43.409804  543705 cpu.go:282] Add success.
I0320 16:01:43.409811  543705 memory.go:191] Add success.
I0320 16:01:43.419895  543705 net.go:648] Add success.
I0320 16:01:43.422492  543705 net.go:770] primary dev: ETH0
I0320 16:01:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:01:43.422517  543705 net.go:698] Add success.
I0320 16:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:01:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:01:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:01:53.410316  543705 memory.go:184] no items to output this cycle
I0320 16:01:53.410429  543705 cpu.go:275] no items to output this cycle
E0320 16:02:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:02:03.409792  543705 memory.go:184] no items to output this cycle
I0320 16:02:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 16:02:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:02:13.409795  543705 memory.go:191] Add success.
I0320 16:02:13.409795  543705 cpu.go:282] Add success.
W0320 16:02:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:02:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:02:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:02:13.420141  543705 net.go:648] Add success.
I0320 16:02:13.422933  543705 net.go:770] primary dev: ETH0
I0320 16:02:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:02:13.422958  543705 net.go:698] Add success.
W0320 16:02:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:02:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 16:02:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:02:14.456794  543705 disk_worker.go:494] system disk:vda1
I0320 16:02:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:02:14.457085  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:02:14.457093  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:02:14.457098  543705 custom_config.go:64] query custom config with name: gpu
E0320 16:02:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:02:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:02:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:02:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:02:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:02:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:02:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:02:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:02:23.409809  543705 memory.go:184] no items to output this cycle
I0320 16:02:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 16:02:25.049674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:02:25.052124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:02:25.052130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0320 16:02:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:02:33.409803  543705 memory.go:184] no items to output this cycle
I0320 16:02:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 16:02:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:02:43.409831  543705 memory.go:191] Add success.
I0320 16:02:43.409834  543705 cpu.go:282] Add success.
I0320 16:02:43.419974  543705 net.go:648] Add success.
I0320 16:02:43.423194  543705 net.go:770] primary dev: ETH0
I0320 16:02:43.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:02:43.423219  543705 net.go:698] Add success.
I0320 16:02:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:02:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:02:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:02:53.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:02:53.409888  543705 memory.go:184] no items to output this cycle
I0320 16:02:53.409969  543705 cpu.go:275] no items to output this cycle
E0320 16:03:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:03:03.409768  543705 memory.go:184] no items to output this cycle
I0320 16:03:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 16:03:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:03:13.409819  543705 memory.go:191] Add success.
I0320 16:03:13.409826  543705 cpu.go:282] Add success.
W0320 16:03:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:03:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:03:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:03:13.420150  543705 net.go:648] Add success.
I0320 16:03:13.422706  543705 net.go:770] primary dev: ETH0
I0320 16:03:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:03:13.422730  543705 net.go:698] Add success.
I0320 16:03:13.469007  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4d5ea5c8-01c3-4657-892c-2b621891d0fe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:03:13.469042  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:03:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:03:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:03:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 16:03:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:03:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 16:03:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:03:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:03:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:03:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:03:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:03:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:03:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:03:23.409794  543705 memory.go:184] no items to output this cycle
I0320 16:03:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 16:03:25.053671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:03:25.056116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:03:25.056122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005878c0 0xc000587900]
E0320 16:03:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:03:33.409771  543705 memory.go:184] no items to output this cycle
I0320 16:03:33.409804  543705 cpu.go:275] no items to output this cycle
I0320 16:03:38.539450  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:03:38.539457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:03:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:03:43.410902  543705 memory.go:191] Add success.
I0320 16:03:43.409839  543705 cpu.go:282] Add success.
I0320 16:03:43.420583  543705 net.go:648] Add success.
I0320 16:03:43.423366  543705 net.go:770] primary dev: ETH0
I0320 16:03:43.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:03:43.423394  543705 net.go:698] Add success.
I0320 16:03:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:03:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:03:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:03:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:03:53.409779  543705 memory.go:184] no items to output this cycle
I0320 16:03:53.409799  543705 cpu.go:275] no items to output this cycle
I0320 16:04:03.409910  543705 cpu.go:275] no items to output this cycle
E0320 16:04:03.409930  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:04:03.409951  543705 memory.go:184] no items to output this cycle
E0320 16:04:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:04:13.409785  543705 memory.go:191] Add success.
I0320 16:04:13.409802  543705 cpu.go:282] Add success.
W0320 16:04:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:04:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:04:13.420183  543705 net.go:648] Add success.
I0320 16:04:13.422873  543705 net.go:770] primary dev: ETH0
I0320 16:04:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:04:13.422902  543705 net.go:698] Add success.
I0320 16:04:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:04:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:04:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 16:04:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:04:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 16:04:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:04:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:04:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:04:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:04:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:04:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:04:23.409786  543705 memory.go:184] no items to output this cycle
I0320 16:04:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 16:04:25.057671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:04:25.060095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:04:25.060101  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
E0320 16:04:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:04:33.409806  543705 memory.go:184] no items to output this cycle
I0320 16:04:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 16:04:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:04:43.409802  543705 memory.go:191] Add success.
I0320 16:04:43.409803  543705 cpu.go:282] Add success.
I0320 16:04:43.420058  543705 net.go:648] Add success.
I0320 16:04:43.422759  543705 net.go:770] primary dev: ETH0
I0320 16:04:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:04:43.422788  543705 net.go:698] Add success.
I0320 16:04:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:04:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:04:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:04:53.409770  543705 memory.go:184] no items to output this cycle
I0320 16:04:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 16:05:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:05:03.409771  543705 memory.go:184] no items to output this cycle
I0320 16:05:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 16:05:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:05:13.409813  543705 memory.go:191] Add success.
I0320 16:05:13.409815  543705 cpu.go:282] Add success.
W0320 16:05:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:05:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:05:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:05:13.419766  543705 net.go:648] Add success.
I0320 16:05:13.422648  543705 net.go:770] primary dev: ETH0
I0320 16:05:13.422660  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:05:13.422672  543705 net.go:698] Add success.
I0320 16:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:05:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:05:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0320 16:05:14.455148  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:05:14.456467  543705 disk_worker.go:494] system disk:vda1
I0320 16:05:14.456511  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:05:15.456018  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:05:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:05:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:05:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:05:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:05:23.409774  543705 memory.go:184] no items to output this cycle
I0320 16:05:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 16:05:25.061670  543705 disk_info.go:125] begin check local disk info of client
I0320 16:05:25.064099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:05:25.064105  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498980 0xc000498a00]
E0320 16:05:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:05:33.409773  543705 memory.go:184] no items to output this cycle
I0320 16:05:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 16:05:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:05:43.409821  543705 memory.go:191] Add success.
I0320 16:05:43.409822  543705 cpu.go:282] Add success.
I0320 16:05:43.419825  543705 net.go:770] primary dev: ETH0
I0320 16:05:43.419838  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:05:43.419851  543705 net.go:698] Add success.
I0320 16:05:43.420219  543705 net.go:648] Add success.
I0320 16:05:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:05:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:05:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:05:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:05:53.409816  543705 memory.go:184] no items to output this cycle
I0320 16:05:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 16:06:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:06:03.409800  543705 memory.go:184] no items to output this cycle
I0320 16:06:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 16:06:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:06:13.409778  543705 memory.go:191] Add success.
W0320 16:06:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:06:13.409808  543705 cpu.go:282] Add success.
W0320 16:06:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:06:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:06:13.419750  543705 net.go:648] Add success.
I0320 16:06:13.422608  543705 net.go:770] primary dev: ETH0
I0320 16:06:13.422622  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:06:13.422633  543705 net.go:698] Add success.
I0320 16:06:13.469675  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db252f21-e28a-4f08-9969-6b53c6fe0b7d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:06:13.469705  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:06:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:06:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:06:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 16:06:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:06:14.456599  543705 disk_worker.go:494] system disk:vda1
I0320 16:06:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:06:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:06:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:06:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:06:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:06:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:06:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:06:23.409771  543705 memory.go:184] no items to output this cycle
I0320 16:06:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 16:06:25.065673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:06:25.068100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:06:25.068105  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0320 16:06:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:06:33.409774  543705 memory.go:184] no items to output this cycle
I0320 16:06:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 16:06:38.540457  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:06:38.540464  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:06:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:06:43.410762  543705 memory.go:191] Add success.
I0320 16:06:43.409802  543705 cpu.go:282] Add success.
I0320 16:06:43.420472  543705 net.go:648] Add success.
I0320 16:06:43.423140  543705 net.go:770] primary dev: ETH0
I0320 16:06:43.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:06:43.423167  543705 net.go:698] Add success.
I0320 16:06:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:06:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:06:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:06:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:06:53.409780  543705 memory.go:184] no items to output this cycle
I0320 16:06:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 16:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:07:03.409784  543705 memory.go:184] no items to output this cycle
I0320 16:07:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:07:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:07:13.409810  543705 memory.go:191] Add success.
I0320 16:07:13.409816  543705 cpu.go:282] Add success.
W0320 16:07:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:07:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:07:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:07:13.420300  543705 net.go:648] Add success.
I0320 16:07:13.422924  543705 net.go:770] primary dev: ETH0
I0320 16:07:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:07:13.422953  543705 net.go:698] Add success.
I0320 16:07:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0320 16:07:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:07:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 16:07:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:07:14.456784  543705 disk_worker.go:494] system disk:vda1
I0320 16:07:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:07:14.456990  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:07:14.456999  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:07:14.457004  543705 custom_config.go:64] query custom config with name: gpu
E0320 16:07:15.456681  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:07:15.456690  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:07:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:07:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:07:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:07:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:07:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:07:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:07:23.409793  543705 memory.go:184] no items to output this cycle
I0320 16:07:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 16:07:25.069673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:07:25.072056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:07:25.072062  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8a40 0xc0003d8a80]
E0320 16:07:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:07:33.409768  543705 memory.go:184] no items to output this cycle
I0320 16:07:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 16:07:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:07:43.409803  543705 memory.go:191] Add success.
I0320 16:07:43.409805  543705 cpu.go:282] Add success.
I0320 16:07:43.419974  543705 net.go:648] Add success.
I0320 16:07:43.422587  543705 net.go:770] primary dev: ETH0
I0320 16:07:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:07:43.422618  543705 net.go:698] Add success.
I0320 16:07:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:07:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:07:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:07:53.409788  543705 memory.go:184] no items to output this cycle
I0320 16:07:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 16:08:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:08:03.409782  543705 memory.go:184] no items to output this cycle
I0320 16:08:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:08:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:08:13.409792  543705 memory.go:191] Add success.
I0320 16:08:13.409797  543705 cpu.go:282] Add success.
W0320 16:08:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:08:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:08:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:08:13.420068  543705 net.go:648] Add success.
I0320 16:08:13.422954  543705 net.go:770] primary dev: ETH0
I0320 16:08:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:08:13.422984  543705 net.go:698] Add success.
I0320 16:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:08:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:08:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 16:08:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:08:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 16:08:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:08:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:08:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:08:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:08:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:08:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:08:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:08:23.409781  543705 memory.go:184] no items to output this cycle
I0320 16:08:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 16:08:25.073672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:08:25.076131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:08:25.076137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dcd40 0xc0003dcd80]
E0320 16:08:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:08:33.409786  543705 memory.go:184] no items to output this cycle
I0320 16:08:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 16:08:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:08:43.409788  543705 memory.go:191] Add success.
I0320 16:08:43.409818  543705 cpu.go:282] Add success.
I0320 16:08:43.419865  543705 net.go:648] Add success.
I0320 16:08:43.422875  543705 net.go:770] primary dev: ETH0
I0320 16:08:43.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:08:43.422901  543705 net.go:698] Add success.
I0320 16:08:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:08:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:08:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:08:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:08:53.409767  543705 memory.go:184] no items to output this cycle
I0320 16:08:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 16:09:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:09:03.409814  543705 memory.go:184] no items to output this cycle
I0320 16:09:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 16:09:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:09:13.409812  543705 memory.go:191] Add success.
I0320 16:09:13.409818  543705 cpu.go:282] Add success.
W0320 16:09:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:09:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:09:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:09:13.420098  543705 net.go:648] Add success.
I0320 16:09:13.422567  543705 net.go:770] primary dev: ETH0
I0320 16:09:13.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:09:13.422592  543705 net.go:698] Add success.
I0320 16:09:13.468367  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ab55eec-13ef-40aa-88d9-afea44b473cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:09:13.468399  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:09:14.455315  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:09:14.455413  543705 disk_worker.go:708] disk space is not compliant
W0320 16:09:14.455418  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:09:14.457000  543705 disk_worker.go:494] system disk:vda1
I0320 16:09:14.457030  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:09:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:09:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:09:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:09:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:09:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:09:23.409796  543705 memory.go:184] no items to output this cycle
I0320 16:09:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 16:09:25.077677  543705 disk_info.go:125] begin check local disk info of client
I0320 16:09:25.080190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:09:25.080196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ac080 0xc0004ac0c0]
E0320 16:09:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:09:33.409788  543705 memory.go:184] no items to output this cycle
I0320 16:09:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 16:09:38.541450  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:09:38.541457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:09:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:09:43.410707  543705 memory.go:191] Add success.
I0320 16:09:43.409824  543705 cpu.go:282] Add success.
I0320 16:09:43.420426  543705 net.go:648] Add success.
I0320 16:09:43.423364  543705 net.go:770] primary dev: ETH0
I0320 16:09:43.423377  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:09:43.423390  543705 net.go:698] Add success.
I0320 16:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:09:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:09:53.409781  543705 memory.go:184] no items to output this cycle
I0320 16:09:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 16:10:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:10:03.409794  543705 memory.go:184] no items to output this cycle
I0320 16:10:03.409804  543705 cpu.go:275] no items to output this cycle
W0320 16:10:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:10:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:10:13.409742  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:10:13.409835  543705 cpu.go:282] Add success.
E0320 16:10:13.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:10:13.409856  543705 memory.go:191] Add success.
I0320 16:10:13.420048  543705 net.go:648] Add success.
I0320 16:10:13.422595  543705 net.go:770] primary dev: ETH0
I0320 16:10:13.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:10:13.422620  543705 net.go:698] Add success.
I0320 16:10:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:10:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:10:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 16:10:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:10:14.456481  543705 disk_worker.go:494] system disk:vda1
I0320 16:10:14.456509  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:10:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:10:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:10:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:10:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:10:23.410235  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:10:23.410251  543705 memory.go:184] no items to output this cycle
I0320 16:10:23.410275  543705 cpu.go:275] no items to output this cycle
I0320 16:10:25.081676  543705 disk_info.go:125] begin check local disk info of client
I0320 16:10:25.084123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:10:25.084129  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315140 0xc000315180]
E0320 16:10:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:10:33.409779  543705 memory.go:184] no items to output this cycle
I0320 16:10:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:10:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:10:43.409819  543705 memory.go:191] Add success.
I0320 16:10:43.409828  543705 cpu.go:282] Add success.
I0320 16:10:43.420096  543705 net.go:648] Add success.
I0320 16:10:43.423605  543705 net.go:770] primary dev: ETH0
I0320 16:10:43.423619  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:10:43.423632  543705 net.go:698] Add success.
I0320 16:10:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:10:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:10:53.410410  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:10:53.410429  543705 memory.go:184] no items to output this cycle
I0320 16:10:53.410439  543705 cpu.go:275] no items to output this cycle
E0320 16:11:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:11:03.409775  543705 memory.go:184] no items to output this cycle
I0320 16:11:03.409796  543705 cpu.go:275] no items to output this cycle
W0320 16:11:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:11:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:11:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:11:13.409800  543705 cpu.go:282] Add success.
E0320 16:11:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:11:13.409831  543705 memory.go:191] Add success.
I0320 16:11:13.420246  543705 net.go:648] Add success.
I0320 16:11:13.422909  543705 net.go:770] primary dev: ETH0
I0320 16:11:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:11:13.422936  543705 net.go:698] Add success.
I0320 16:11:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:11:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:11:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 16:11:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:11:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 16:11:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:11:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:11:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:11:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:11:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:11:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:11:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:11:23.409794  543705 memory.go:184] no items to output this cycle
I0320 16:11:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 16:11:25.085671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:11:25.088114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:11:25.088120  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461b80 0xc000461bc0]
E0320 16:11:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:11:33.409803  543705 memory.go:184] no items to output this cycle
I0320 16:11:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 16:11:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:11:43.409782  543705 memory.go:191] Add success.
I0320 16:11:43.409814  543705 cpu.go:282] Add success.
I0320 16:11:43.419885  543705 net.go:648] Add success.
I0320 16:11:43.422554  543705 net.go:770] primary dev: ETH0
I0320 16:11:43.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:11:43.422592  543705 net.go:698] Add success.
I0320 16:11:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:11:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:11:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:11:53.409781  543705 memory.go:184] no items to output this cycle
I0320 16:11:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:12:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:12:03.409781  543705 memory.go:184] no items to output this cycle
I0320 16:12:03.409786  543705 cpu.go:275] no items to output this cycle
W0320 16:12:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:12:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:12:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 16:12:13.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:12:13.409840  543705 cpu.go:282] Add success.
I0320 16:12:13.409850  543705 memory.go:191] Add success.
I0320 16:12:13.419871  543705 net.go:770] primary dev: ETH0
I0320 16:12:13.419883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:12:13.419897  543705 net.go:698] Add success.
I0320 16:12:13.420121  543705 net.go:648] Add success.
I0320 16:12:13.908582  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bfe2a3c7-eaa7-4540-94f5-d4c6afae503b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:12:13.908625  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 16:12:14.454253  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:12:14.454264  543705 disk_worker.go:708] disk space is not compliant
W0320 16:12:14.454266  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:12:14.455610  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:12:14.455619  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:12:14.455635  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:12:14.456238  543705 disk_worker.go:494] system disk:vda1
I0320 16:12:14.456268  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:12:15.457003  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:12:15.457012  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 16:12:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:12:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:12:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:12:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:12:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:12:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:12:23.409795  543705 memory.go:184] no items to output this cycle
I0320 16:12:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 16:12:25.089679  543705 disk_info.go:125] begin check local disk info of client
I0320 16:12:25.092214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:12:25.092222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0320 16:12:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:12:33.409802  543705 memory.go:184] no items to output this cycle
I0320 16:12:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 16:12:38.542462  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:12:38.542468  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:12:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:12:43.410721  543705 memory.go:191] Add success.
I0320 16:12:43.409800  543705 cpu.go:282] Add success.
I0320 16:12:43.420502  543705 net.go:648] Add success.
I0320 16:12:43.423175  543705 net.go:770] primary dev: ETH0
I0320 16:12:43.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:12:43.423205  543705 net.go:698] Add success.
I0320 16:12:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:12:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:12:53.409797  543705 memory.go:184] no items to output this cycle
I0320 16:12:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 16:13:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:13:03.409790  543705 memory.go:184] no items to output this cycle
I0320 16:13:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 16:13:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:13:13.409808  543705 memory.go:191] Add success.
I0320 16:13:13.409818  543705 cpu.go:282] Add success.
W0320 16:13:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:13:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:13:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:13:13.420136  543705 net.go:648] Add success.
I0320 16:13:13.423430  543705 net.go:770] primary dev: ETH0
I0320 16:13:13.423443  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:13:13.423455  543705 net.go:698] Add success.
I0320 16:13:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:13:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:13:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 16:13:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:13:14.456564  543705 disk_worker.go:494] system disk:vda1
I0320 16:13:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:13:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:13:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:13:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:13:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:13:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:13:23.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:13:23.409887  543705 memory.go:184] no items to output this cycle
I0320 16:13:23.409947  543705 cpu.go:275] no items to output this cycle
I0320 16:13:25.093675  543705 disk_info.go:125] begin check local disk info of client
I0320 16:13:25.096110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:13:25.096117  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa000 0xc0001fa040]
E0320 16:13:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:13:33.409787  543705 memory.go:184] no items to output this cycle
I0320 16:13:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 16:13:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:13:43.409810  543705 memory.go:191] Add success.
I0320 16:13:43.409815  543705 cpu.go:282] Add success.
I0320 16:13:43.419913  543705 net.go:648] Add success.
I0320 16:13:43.422714  543705 net.go:770] primary dev: ETH0
I0320 16:13:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:13:43.422739  543705 net.go:698] Add success.
I0320 16:13:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:13:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:13:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:13:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:13:53.409794  543705 memory.go:184] no items to output this cycle
I0320 16:13:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 16:14:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:14:03.409794  543705 memory.go:184] no items to output this cycle
I0320 16:14:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 16:14:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:14:13.409825  543705 memory.go:191] Add success.
I0320 16:14:13.409828  543705 cpu.go:282] Add success.
W0320 16:14:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:14:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:14:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:14:13.420110  543705 net.go:648] Add success.
I0320 16:14:13.422942  543705 net.go:770] primary dev: ETH0
I0320 16:14:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:14:13.422967  543705 net.go:698] Add success.
I0320 16:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:14:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:14:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0320 16:14:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:14:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 16:14:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:14:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:14:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:14:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:14:23.409912  543705 memory.go:184] no items to output this cycle
I0320 16:14:23.409921  543705 cpu.go:275] no items to output this cycle
I0320 16:14:25.097674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:14:25.100207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:14:25.100213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304000 0xc000304040]
E0320 16:14:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:14:33.409814  543705 memory.go:184] no items to output this cycle
I0320 16:14:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 16:14:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:14:43.409830  543705 memory.go:191] Add success.
I0320 16:14:43.409834  543705 cpu.go:282] Add success.
I0320 16:14:43.420020  543705 net.go:648] Add success.
I0320 16:14:43.422604  543705 net.go:770] primary dev: ETH0
I0320 16:14:43.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:14:43.422629  543705 net.go:698] Add success.
I0320 16:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:14:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:14:53.410247  543705 memory.go:184] no items to output this cycle
I0320 16:14:53.410257  543705 cpu.go:275] no items to output this cycle
E0320 16:15:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:15:03.409816  543705 memory.go:184] no items to output this cycle
I0320 16:15:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 16:15:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:15:13.409786  543705 memory.go:191] Add success.
I0320 16:15:13.409788  543705 cpu.go:282] Add success.
W0320 16:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:15:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:15:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:15:13.420053  543705 net.go:648] Add success.
I0320 16:15:13.422960  543705 net.go:770] primary dev: ETH0
I0320 16:15:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:15:13.422985  543705 net.go:698] Add success.
I0320 16:15:13.470270  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"621884d8-f88c-4ea7-93af-ea78bc1e24b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:15:13.470305  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:15:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:15:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:15:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 16:15:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:15:14.456673  543705 disk_worker.go:494] system disk:vda1
I0320 16:15:14.456713  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:15:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:15:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:15:23.409784  543705 memory.go:184] no items to output this cycle
I0320 16:15:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 16:15:25.101672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:15:25.104200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:15:25.104205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9100 0xc0003b9140]
E0320 16:15:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:15:33.409784  543705 memory.go:184] no items to output this cycle
I0320 16:15:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 16:15:38.543470  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:15:38.543476  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:15:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:15:43.410666  543705 memory.go:191] Add success.
I0320 16:15:43.409799  543705 cpu.go:282] Add success.
I0320 16:15:43.420378  543705 net.go:648] Add success.
I0320 16:15:43.423424  543705 net.go:770] primary dev: ETH0
I0320 16:15:43.423437  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:15:43.423450  543705 net.go:698] Add success.
I0320 16:15:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:15:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:15:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:15:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:15:53.409786  543705 memory.go:184] no items to output this cycle
I0320 16:15:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 16:16:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:16:03.409807  543705 memory.go:184] no items to output this cycle
I0320 16:16:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 16:16:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:16:13.409780  543705 memory.go:191] Add success.
I0320 16:16:13.409805  543705 cpu.go:282] Add success.
W0320 16:16:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:16:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:16:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:16:13.420191  543705 net.go:648] Add success.
I0320 16:16:13.423288  543705 net.go:770] primary dev: ETH0
I0320 16:16:13.423304  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:16:13.423324  543705 net.go:698] Add success.
I0320 16:16:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:16:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:16:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 16:16:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:16:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 16:16:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:16:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:16:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:16:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:16:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:16:23.409769  543705 memory.go:184] no items to output this cycle
I0320 16:16:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 16:16:25.105675  543705 disk_info.go:125] begin check local disk info of client
I0320 16:16:25.108167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:16:25.108173  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f980 0xc00032f9c0]
E0320 16:16:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:16:33.409795  543705 memory.go:184] no items to output this cycle
I0320 16:16:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 16:16:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:16:43.409792  543705 memory.go:191] Add success.
I0320 16:16:43.409831  543705 cpu.go:282] Add success.
I0320 16:16:43.419884  543705 net.go:648] Add success.
I0320 16:16:43.422803  543705 net.go:770] primary dev: ETH0
I0320 16:16:43.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:16:43.422829  543705 net.go:698] Add success.
I0320 16:16:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:16:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:16:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:16:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:16:53.409763  543705 memory.go:184] no items to output this cycle
I0320 16:16:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 16:17:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:17:03.409782  543705 memory.go:184] no items to output this cycle
I0320 16:17:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 16:17:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:17:13.409810  543705 memory.go:191] Add success.
I0320 16:17:13.409820  543705 cpu.go:282] Add success.
W0320 16:17:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:17:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:17:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:17:13.420124  543705 net.go:648] Add success.
I0320 16:17:13.422802  543705 net.go:770] primary dev: ETH0
I0320 16:17:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:17:13.422830  543705 net.go:698] Add success.
I0320 16:17:13.453396  543705 event_worker.go:152] Polling the log file for events...
W0320 16:17:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:17:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 16:17:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:17:14.456434  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:17:14.456444  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:17:14.456450  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:17:14.456947  543705 disk_worker.go:494] system disk:vda1
I0320 16:17:14.457005  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:17:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:17:15.456792  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 16:17:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:17:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:17:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:17:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:17:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:17:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:17:23.409803  543705 memory.go:184] no items to output this cycle
I0320 16:17:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 16:17:25.109674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:17:25.112209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:17:25.112214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0320 16:17:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:17:33.409801  543705 memory.go:184] no items to output this cycle
I0320 16:17:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 16:17:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:17:43.409823  543705 memory.go:191] Add success.
I0320 16:17:43.409829  543705 cpu.go:282] Add success.
I0320 16:17:43.419969  543705 net.go:648] Add success.
I0320 16:17:43.423135  543705 net.go:770] primary dev: ETH0
I0320 16:17:43.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:17:43.423160  543705 net.go:698] Add success.
I0320 16:17:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:17:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:17:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:17:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:17:53.409795  543705 memory.go:184] no items to output this cycle
I0320 16:17:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 16:18:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:18:03.409801  543705 memory.go:184] no items to output this cycle
I0320 16:18:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 16:18:13.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:18:13.409774  543705 memory.go:191] Add success.
W0320 16:18:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:18:13.409807  543705 cpu.go:282] Add success.
W0320 16:18:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:18:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:18:13.420041  543705 net.go:648] Add success.
I0320 16:18:13.423094  543705 net.go:770] primary dev: ETH0
I0320 16:18:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:18:13.423119  543705 net.go:698] Add success.
I0320 16:18:13.971391  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7b748109-66ad-4ddf-9208-fec9aabe0f47","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:18:13.971427  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:18:14.453974  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:18:14.455279  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:18:14.455289  543705 disk_worker.go:708] disk space is not compliant
W0320 16:18:14.455292  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:18:14.457056  543705 disk_worker.go:494] system disk:vda1
I0320 16:18:14.457085  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:18:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:18:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:18:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:18:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:18:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:18:23.409791  543705 memory.go:184] no items to output this cycle
I0320 16:18:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 16:18:25.113674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:18:25.116166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:18:25.116172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa3c0 0xc0001fa400]
E0320 16:18:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:18:33.409797  543705 memory.go:184] no items to output this cycle
I0320 16:18:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 16:18:38.544472  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:18:38.544478  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:18:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:18:43.410847  543705 memory.go:191] Add success.
I0320 16:18:43.409811  543705 cpu.go:282] Add success.
I0320 16:18:43.420565  543705 net.go:648] Add success.
I0320 16:18:43.423360  543705 net.go:770] primary dev: ETH0
I0320 16:18:43.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:18:43.423392  543705 net.go:698] Add success.
I0320 16:18:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:18:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:18:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:18:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:18:53.409768  543705 memory.go:184] no items to output this cycle
I0320 16:18:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 16:19:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:19:03.409809  543705 memory.go:184] no items to output this cycle
I0320 16:19:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 16:19:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:19:13.409780  543705 memory.go:191] Add success.
W0320 16:19:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:19:13.409811  543705 cpu.go:282] Add success.
W0320 16:19:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:19:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:19:13.420112  543705 net.go:648] Add success.
I0320 16:19:13.422751  543705 net.go:770] primary dev: ETH0
I0320 16:19:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:19:13.422780  543705 net.go:698] Add success.
I0320 16:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:19:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:19:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 16:19:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:19:14.456528  543705 disk_worker.go:494] system disk:vda1
I0320 16:19:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:19:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:19:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:19:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:19:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:19:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:19:23.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:19:23.409906  543705 memory.go:184] no items to output this cycle
I0320 16:19:23.409961  543705 cpu.go:275] no items to output this cycle
I0320 16:19:25.117671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:19:25.120273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:19:25.120279  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032e000 0xc00032e040]
E0320 16:19:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:19:33.409774  543705 memory.go:184] no items to output this cycle
I0320 16:19:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 16:19:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:19:43.409798  543705 memory.go:191] Add success.
I0320 16:19:43.409806  543705 cpu.go:282] Add success.
I0320 16:19:43.419962  543705 net.go:648] Add success.
I0320 16:19:43.422676  543705 net.go:770] primary dev: ETH0
I0320 16:19:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:19:43.422702  543705 net.go:698] Add success.
I0320 16:19:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:19:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:19:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:19:53.409817  543705 memory.go:184] no items to output this cycle
I0320 16:19:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 16:20:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:20:03.409773  543705 memory.go:184] no items to output this cycle
I0320 16:20:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 16:20:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:20:13.409779  543705 memory.go:191] Add success.
W0320 16:20:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:20:13.409810  543705 cpu.go:282] Add success.
W0320 16:20:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:20:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:20:13.420192  543705 net.go:648] Add success.
I0320 16:20:13.423263  543705 net.go:770] primary dev: ETH0
I0320 16:20:13.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:20:13.423291  543705 net.go:698] Add success.
I0320 16:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:20:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:20:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 16:20:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:20:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 16:20:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:20:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:20:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:20:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:20:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:20:23.409778  543705 memory.go:184] no items to output this cycle
I0320 16:20:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 16:20:25.121673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:20:25.124192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:20:25.124199  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272000 0xc000272040]
E0320 16:20:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:20:33.409804  543705 memory.go:184] no items to output this cycle
I0320 16:20:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 16:20:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:20:43.409819  543705 memory.go:191] Add success.
I0320 16:20:43.409832  543705 cpu.go:282] Add success.
I0320 16:20:43.420004  543705 net.go:648] Add success.
I0320 16:20:43.422670  543705 net.go:770] primary dev: ETH0
I0320 16:20:43.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:20:43.422697  543705 net.go:698] Add success.
I0320 16:20:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:20:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:20:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:20:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:20:53.410258  543705 memory.go:184] no items to output this cycle
I0320 16:20:53.410289  543705 cpu.go:275] no items to output this cycle
E0320 16:21:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:21:03.409810  543705 memory.go:184] no items to output this cycle
I0320 16:21:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 16:21:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:21:13.409817  543705 memory.go:191] Add success.
I0320 16:21:13.409822  543705 cpu.go:282] Add success.
W0320 16:21:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:21:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:21:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:21:13.420077  543705 net.go:648] Add success.
I0320 16:21:13.422644  543705 net.go:770] primary dev: ETH0
I0320 16:21:13.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:21:13.422671  543705 net.go:698] Add success.
I0320 16:21:13.463413  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"60e92f49-324b-4d27-965f-0d50f0510254","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:21:13.463447  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:21:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:21:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:21:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 16:21:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:21:14.456530  543705 disk_worker.go:494] system disk:vda1
I0320 16:21:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:21:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:21:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:21:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:21:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:21:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:21:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:21:23.409768  543705 memory.go:184] no items to output this cycle
I0320 16:21:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 16:21:25.125673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:21:25.128240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:21:25.128246  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396400 0xc000396440]
E0320 16:21:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:21:33.409782  543705 memory.go:184] no items to output this cycle
I0320 16:21:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 16:21:38.545489  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:21:38.545495  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:21:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:21:43.410501  543705 memory.go:191] Add success.
I0320 16:21:43.409832  543705 cpu.go:282] Add success.
I0320 16:21:43.420392  543705 net.go:648] Add success.
I0320 16:21:43.422924  543705 net.go:770] primary dev: ETH0
I0320 16:21:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:21:43.422954  543705 net.go:698] Add success.
I0320 16:21:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:21:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:21:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:21:53.409785  543705 memory.go:184] no items to output this cycle
I0320 16:21:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 16:22:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:22:03.409796  543705 memory.go:184] no items to output this cycle
I0320 16:22:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 16:22:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:22:13.409815  543705 memory.go:191] Add success.
I0320 16:22:13.409827  543705 cpu.go:282] Add success.
W0320 16:22:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:22:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:22:13.420283  543705 net.go:648] Add success.
I0320 16:22:13.422937  543705 net.go:770] primary dev: ETH0
I0320 16:22:13.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:22:13.422965  543705 net.go:698] Add success.
W0320 16:22:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:22:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 16:22:14.455159  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:22:14.456931  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:22:14.456941  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:22:14.456947  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:22:14.456993  543705 disk_worker.go:494] system disk:vda1
I0320 16:22:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:22:15.456926  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:22:15.456940  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:22:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:22:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:22:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:22:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:22:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:22:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:22:23.409806  543705 memory.go:184] no items to output this cycle
I0320 16:22:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 16:22:25.129671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:22:25.132115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:22:25.132121  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f680 0xc00032f6c0]
E0320 16:22:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:22:33.409767  543705 memory.go:184] no items to output this cycle
I0320 16:22:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 16:22:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:22:43.409803  543705 memory.go:191] Add success.
I0320 16:22:43.409805  543705 cpu.go:282] Add success.
I0320 16:22:43.419975  543705 net.go:648] Add success.
I0320 16:22:43.423086  543705 net.go:770] primary dev: ETH0
I0320 16:22:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:22:43.423118  543705 net.go:698] Add success.
I0320 16:22:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:22:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:22:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:22:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:22:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 16:22:53.409781  543705 memory.go:184] no items to output this cycle
E0320 16:23:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:23:03.409805  543705 memory.go:184] no items to output this cycle
I0320 16:23:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 16:23:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:23:13.409794  543705 memory.go:191] Add success.
I0320 16:23:13.409795  543705 cpu.go:282] Add success.
W0320 16:23:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:23:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:23:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:23:13.420123  543705 net.go:648] Add success.
I0320 16:23:13.423209  543705 net.go:770] primary dev: ETH0
I0320 16:23:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:23:13.423240  543705 net.go:698] Add success.
I0320 16:23:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:23:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:23:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 16:23:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:23:14.456627  543705 disk_worker.go:494] system disk:vda1
I0320 16:23:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:23:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:23:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:23:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:23:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:23:23.409807  543705 memory.go:184] no items to output this cycle
I0320 16:23:23.409819  543705 cpu.go:275] no items to output this cycle
I0320 16:23:25.133677  543705 disk_info.go:125] begin check local disk info of client
I0320 16:23:25.136123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:23:25.136129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fd540 0xc0001fd580]
E0320 16:23:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:23:33.409775  543705 memory.go:184] no items to output this cycle
I0320 16:23:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 16:23:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:23:43.409792  543705 memory.go:191] Add success.
I0320 16:23:43.409810  543705 cpu.go:282] Add success.
I0320 16:23:43.419955  543705 net.go:648] Add success.
I0320 16:23:43.422650  543705 net.go:770] primary dev: ETH0
I0320 16:23:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:23:43.422680  543705 net.go:698] Add success.
I0320 16:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:23:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:23:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:23:53.409793  543705 memory.go:184] no items to output this cycle
I0320 16:23:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 16:24:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:24:03.409784  543705 memory.go:184] no items to output this cycle
I0320 16:24:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:24:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:24:13.409829  543705 memory.go:191] Add success.
I0320 16:24:13.409840  543705 cpu.go:282] Add success.
W0320 16:24:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:24:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:24:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:24:13.420421  543705 net.go:648] Add success.
I0320 16:24:13.423733  543705 net.go:770] primary dev: ETH0
I0320 16:24:13.423751  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:24:13.423769  543705 net.go:698] Add success.
I0320 16:24:13.468249  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ee83008-9af2-47f7-85c2-a6f8adc50191","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:24:13.468284  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:24:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:24:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:24:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 16:24:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:24:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 16:24:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:24:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:24:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:24:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:24:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:24:23.409783  543705 memory.go:184] no items to output this cycle
I0320 16:24:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 16:24:25.137674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:24:25.140100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:24:25.140106  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470040 0xc000470080]
E0320 16:24:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:24:33.409787  543705 memory.go:184] no items to output this cycle
I0320 16:24:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 16:24:38.546494  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:24:38.546501  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:24:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:24:43.410835  543705 memory.go:191] Add success.
I0320 16:24:43.409816  543705 cpu.go:282] Add success.
I0320 16:24:43.420513  543705 net.go:648] Add success.
I0320 16:24:43.423339  543705 net.go:770] primary dev: ETH0
I0320 16:24:43.423354  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:24:43.423368  543705 net.go:698] Add success.
I0320 16:24:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:24:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:24:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:24:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:24:53.409786  543705 memory.go:184] no items to output this cycle
I0320 16:24:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:25:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:25:03.409806  543705 memory.go:184] no items to output this cycle
I0320 16:25:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 16:25:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:25:13.409832  543705 memory.go:191] Add success.
I0320 16:25:13.409839  543705 cpu.go:282] Add success.
W0320 16:25:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:25:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:25:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:25:13.420311  543705 net.go:648] Add success.
I0320 16:25:13.423290  543705 net.go:770] primary dev: ETH0
I0320 16:25:13.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:25:13.423321  543705 net.go:698] Add success.
I0320 16:25:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:25:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:25:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 16:25:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:25:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 16:25:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:25:15.455947  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:25:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:25:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:25:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:25:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:25:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:25:23.409787  543705 memory.go:184] no items to output this cycle
I0320 16:25:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 16:25:25.141675  543705 disk_info.go:125] begin check local disk info of client
I0320 16:25:25.144112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:25:25.144118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000dc000 0xc0000dc040]
E0320 16:25:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:25:33.409777  543705 memory.go:184] no items to output this cycle
I0320 16:25:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 16:25:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:25:43.409844  543705 memory.go:191] Add success.
I0320 16:25:43.409844  543705 cpu.go:282] Add success.
I0320 16:25:43.420057  543705 net.go:648] Add success.
I0320 16:25:43.422819  543705 net.go:770] primary dev: ETH0
I0320 16:25:43.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:25:43.422846  543705 net.go:698] Add success.
I0320 16:25:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:25:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:25:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:25:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:25:53.409793  543705 memory.go:184] no items to output this cycle
I0320 16:25:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 16:26:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:26:03.409812  543705 memory.go:184] no items to output this cycle
I0320 16:26:03.409824  543705 cpu.go:275] no items to output this cycle
E0320 16:26:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:26:13.409827  543705 memory.go:191] Add success.
I0320 16:26:13.409836  543705 cpu.go:282] Add success.
W0320 16:26:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:26:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:26:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:26:13.420149  543705 net.go:648] Add success.
I0320 16:26:13.422768  543705 net.go:770] primary dev: ETH0
I0320 16:26:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:26:13.422808  543705 net.go:698] Add success.
I0320 16:26:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:26:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:26:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 16:26:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:26:14.456817  543705 disk_worker.go:494] system disk:vda1
I0320 16:26:14.456846  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:26:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:26:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:26:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:26:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:26:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:26:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:26:23.409799  543705 memory.go:184] no items to output this cycle
I0320 16:26:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 16:26:25.145674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:26:25.148132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:26:25.148138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004924c0 0xc000492500]
E0320 16:26:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:26:33.409776  543705 memory.go:184] no items to output this cycle
I0320 16:26:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 16:26:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:26:43.409829  543705 memory.go:191] Add success.
I0320 16:26:43.409832  543705 cpu.go:282] Add success.
I0320 16:26:43.419884  543705 net.go:648] Add success.
I0320 16:26:43.422898  543705 net.go:770] primary dev: ETH0
I0320 16:26:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:26:43.422924  543705 net.go:698] Add success.
I0320 16:26:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:26:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:26:53.409776  543705 memory.go:184] no items to output this cycle
I0320 16:26:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 16:27:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:27:03.409780  543705 memory.go:184] no items to output this cycle
I0320 16:27:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:27:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:27:13.409778  543705 memory.go:191] Add success.
W0320 16:27:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:27:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:27:13.409815  543705 cpu.go:282] Add success.
I0320 16:27:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:27:13.426076  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 16:27:13.426154  543705 net.go:770] primary dev: ETH0
I0320 16:27:13.426173  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:27:13.426189  543705 net.go:698] Add success.
I0320 16:27:13.426549  543705 net.go:648] Add success.
I0320 16:27:13.453055  543705 event_worker.go:152] Polling the log file for events...
I0320 16:27:13.477286  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b5347e2-a208-4504-a081-8a0f39c3ea75","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:27:13.477317  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 16:27:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:27:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 16:27:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:27:14.455921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:27:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:27:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:27:14.456684  543705 disk_worker.go:494] system disk:vda1
I0320 16:27:14.456715  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:27:15.456862  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:27:15.456872  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:27:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:27:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:27:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:27:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:27:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:27:23.409782  543705 memory.go:184] no items to output this cycle
I0320 16:27:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 16:27:25.149669  543705 disk_info.go:125] begin check local disk info of client
I0320 16:27:25.152142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:27:25.152148  543705 disk_info.go:196] parse disk info done, disk is : [0xc000322080 0xc0003220c0]
E0320 16:27:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:27:33.409802  543705 memory.go:184] no items to output this cycle
I0320 16:27:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 16:27:38.547493  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:27:38.547499  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:27:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:27:43.410674  543705 memory.go:191] Add success.
I0320 16:27:43.409820  543705 cpu.go:282] Add success.
I0320 16:27:43.420373  543705 net.go:648] Add success.
I0320 16:27:43.423202  543705 net.go:770] primary dev: ETH0
I0320 16:27:43.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:27:43.423231  543705 net.go:698] Add success.
I0320 16:27:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:27:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:27:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:27:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:27:53.409808  543705 memory.go:184] no items to output this cycle
I0320 16:27:53.409824  543705 cpu.go:275] no items to output this cycle
E0320 16:28:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:28:03.409781  543705 memory.go:184] no items to output this cycle
I0320 16:28:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 16:28:13.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:28:13.409936  543705 memory.go:191] Add success.
I0320 16:28:13.409978  543705 cpu.go:282] Add success.
W0320 16:28:13.410058  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:28:13.410090  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:28:13.410095  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:28:13.419725  543705 net.go:648] Add success.
I0320 16:28:13.422619  543705 net.go:770] primary dev: ETH0
I0320 16:28:13.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:28:13.422644  543705 net.go:698] Add success.
I0320 16:28:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:28:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:28:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 16:28:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:28:14.456646  543705 disk_worker.go:494] system disk:vda1
I0320 16:28:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:28:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:28:16.458040  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:28:16.458101  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:28:16.458124  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:28:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:28:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:28:23.409774  543705 memory.go:184] no items to output this cycle
I0320 16:28:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 16:28:25.153673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:28:25.156118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:28:25.156124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d740 0xc00037d780]
E0320 16:28:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:28:33.409805  543705 memory.go:184] no items to output this cycle
I0320 16:28:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 16:28:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:28:43.409795  543705 memory.go:191] Add success.
I0320 16:28:43.409815  543705 cpu.go:282] Add success.
I0320 16:28:43.419882  543705 net.go:648] Add success.
I0320 16:28:43.422649  543705 net.go:770] primary dev: ETH0
I0320 16:28:43.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:28:43.422676  543705 net.go:698] Add success.
I0320 16:28:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:28:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:28:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:28:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:28:53.409771  543705 memory.go:184] no items to output this cycle
I0320 16:28:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 16:29:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:29:03.409814  543705 memory.go:184] no items to output this cycle
I0320 16:29:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 16:29:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:29:13.409873  543705 memory.go:191] Add success.
W0320 16:29:13.409905  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:29:13.409918  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:29:13.409922  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:29:13.409951  543705 cpu.go:282] Add success.
I0320 16:29:13.419741  543705 net.go:648] Add success.
I0320 16:29:13.422685  543705 net.go:770] primary dev: ETH0
I0320 16:29:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:29:13.422713  543705 net.go:698] Add success.
I0320 16:29:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:29:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:29:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 16:29:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:29:14.456526  543705 disk_worker.go:494] system disk:vda1
I0320 16:29:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:29:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:29:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:29:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:29:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:29:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:29:23.409795  543705 memory.go:184] no items to output this cycle
I0320 16:29:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 16:29:25.157675  543705 disk_info.go:125] begin check local disk info of client
I0320 16:29:25.160241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:29:25.160247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
E0320 16:29:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:29:33.409811  543705 memory.go:184] no items to output this cycle
I0320 16:29:33.409825  543705 cpu.go:275] no items to output this cycle
E0320 16:29:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:29:43.409784  543705 memory.go:191] Add success.
I0320 16:29:43.409827  543705 cpu.go:282] Add success.
I0320 16:29:43.420024  543705 net.go:648] Add success.
I0320 16:29:43.423355  543705 net.go:770] primary dev: ETH0
I0320 16:29:43.423368  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:29:43.423383  543705 net.go:698] Add success.
I0320 16:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:29:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:29:53.409817  543705 memory.go:184] no items to output this cycle
I0320 16:29:53.409837  543705 cpu.go:275] no items to output this cycle
E0320 16:30:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:30:03.409781  543705 memory.go:184] no items to output this cycle
I0320 16:30:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 16:30:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:30:13.409826  543705 memory.go:191] Add success.
I0320 16:30:13.409840  543705 cpu.go:282] Add success.
W0320 16:30:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:30:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:30:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:30:13.420258  543705 net.go:648] Add success.
I0320 16:30:13.422859  543705 net.go:770] primary dev: ETH0
I0320 16:30:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:30:13.422885  543705 net.go:698] Add success.
I0320 16:30:13.468908  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d7f05ab-0772-4a9e-9cad-bf2c243d8a6e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:30:13.468942  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:30:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:30:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 16:30:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:30:14.456499  543705 disk_worker.go:494] system disk:vda1
I0320 16:30:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:30:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:30:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:30:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:30:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:30:23.410576  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:30:23.410583  543705 cpu.go:275] no items to output this cycle
I0320 16:30:23.410592  543705 memory.go:184] no items to output this cycle
I0320 16:30:25.161676  543705 disk_info.go:125] begin check local disk info of client
I0320 16:30:25.164135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:30:25.164143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9b00 0xc0001fa000]
E0320 16:30:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:30:33.409781  543705 memory.go:184] no items to output this cycle
I0320 16:30:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 16:30:38.548488  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:30:38.548495  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:30:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:30:43.410717  543705 memory.go:191] Add success.
I0320 16:30:43.409830  543705 cpu.go:282] Add success.
I0320 16:30:43.420502  543705 net.go:648] Add success.
I0320 16:30:43.423236  543705 net.go:770] primary dev: ETH0
I0320 16:30:43.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:30:43.423261  543705 net.go:698] Add success.
I0320 16:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:30:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:30:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:30:53.409769  543705 memory.go:184] no items to output this cycle
I0320 16:30:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 16:31:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:31:03.409782  543705 memory.go:184] no items to output this cycle
I0320 16:31:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 16:31:13.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:31:13.410016  543705 cpu.go:282] Add success.
I0320 16:31:13.410078  543705 memory.go:191] Add success.
W0320 16:31:13.410114  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:31:13.410143  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:31:13.410148  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:31:13.419731  543705 net.go:648] Add success.
I0320 16:31:13.423067  543705 net.go:770] primary dev: ETH0
I0320 16:31:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:31:13.423096  543705 net.go:698] Add success.
I0320 16:31:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:31:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:31:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 16:31:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:31:14.456560  543705 disk_worker.go:494] system disk:vda1
I0320 16:31:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:31:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:31:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:31:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:31:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:31:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:31:23.409763  543705 memory.go:184] no items to output this cycle
I0320 16:31:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 16:31:25.165674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:31:25.168119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:31:25.168125  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a77c0 0xc0004a7800]
I0320 16:31:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 16:31:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:31:33.409813  543705 memory.go:184] no items to output this cycle
E0320 16:31:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:31:43.409784  543705 memory.go:191] Add success.
I0320 16:31:43.409813  543705 cpu.go:282] Add success.
I0320 16:31:43.419999  543705 net.go:648] Add success.
I0320 16:31:43.422581  543705 net.go:770] primary dev: ETH0
I0320 16:31:43.422595  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:31:43.422609  543705 net.go:698] Add success.
I0320 16:31:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:31:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:31:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:31:53.410370  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:31:53.410389  543705 memory.go:184] no items to output this cycle
I0320 16:31:53.410407  543705 cpu.go:275] no items to output this cycle
E0320 16:32:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:32:03.409769  543705 memory.go:184] no items to output this cycle
I0320 16:32:03.409792  543705 cpu.go:275] no items to output this cycle
W0320 16:32:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:32:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:32:13.409743  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:32:13.409832  543705 cpu.go:282] Add success.
E0320 16:32:13.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:32:13.409856  543705 memory.go:191] Add success.
I0320 16:32:13.420263  543705 net.go:648] Add success.
I0320 16:32:13.423065  543705 net.go:770] primary dev: ETH0
I0320 16:32:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:32:13.423090  543705 net.go:698] Add success.
W0320 16:32:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:32:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 16:32:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:32:14.456802  543705 disk_worker.go:494] system disk:vda1
I0320 16:32:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:32:14.457004  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:32:14.457028  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:32:14.457033  543705 custom_config.go:64] query custom config with name: gpu
E0320 16:32:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:32:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:32:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:32:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:32:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:32:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:32:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:32:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:32:23.409792  543705 memory.go:184] no items to output this cycle
I0320 16:32:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 16:32:25.169671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:32:25.172182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:32:25.172189  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003694c0 0xc000369500]
E0320 16:32:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:32:33.409773  543705 memory.go:184] no items to output this cycle
I0320 16:32:33.409775  543705 cpu.go:275] no items to output this cycle
E0320 16:32:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:32:43.409819  543705 memory.go:191] Add success.
I0320 16:32:43.409829  543705 cpu.go:282] Add success.
I0320 16:32:43.419976  543705 net.go:648] Add success.
I0320 16:32:43.422931  543705 net.go:770] primary dev: ETH0
I0320 16:32:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:32:43.422956  543705 net.go:698] Add success.
I0320 16:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:32:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:32:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:32:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:32:53.409778  543705 memory.go:184] no items to output this cycle
I0320 16:32:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 16:33:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:33:03.409778  543705 memory.go:184] no items to output this cycle
I0320 16:33:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 16:33:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:33:13.409792  543705 memory.go:191] Add success.
I0320 16:33:13.409810  543705 cpu.go:282] Add success.
W0320 16:33:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:33:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:33:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:33:13.420288  543705 net.go:648] Add success.
I0320 16:33:13.422796  543705 net.go:770] primary dev: ETH0
I0320 16:33:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:33:13.422821  543705 net.go:698] Add success.
I0320 16:33:13.468155  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"846895ce-5c25-488e-9f16-2cb1b19b4a90","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:33:13.468187  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:33:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:33:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:33:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 16:33:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:33:14.456511  543705 disk_worker.go:494] system disk:vda1
I0320 16:33:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:33:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:33:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:33:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:33:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:33:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:33:23.409777  543705 memory.go:184] no items to output this cycle
I0320 16:33:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 16:33:25.173674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:33:25.176130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:33:25.176136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae1c0 0xc0004ae200]
E0320 16:33:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:33:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 16:33:33.409790  543705 memory.go:184] no items to output this cycle
I0320 16:33:38.549496  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:33:38.549502  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:33:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:33:43.410698  543705 memory.go:191] Add success.
I0320 16:33:43.409829  543705 cpu.go:282] Add success.
I0320 16:33:43.420434  543705 net.go:648] Add success.
I0320 16:33:43.423261  543705 net.go:770] primary dev: ETH0
I0320 16:33:43.423274  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:33:43.423288  543705 net.go:698] Add success.
I0320 16:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:33:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:33:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:33:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:33:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:33:53.409822  543705 memory.go:184] no items to output this cycle
E0320 16:34:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:34:03.409783  543705 memory.go:184] no items to output this cycle
I0320 16:34:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 16:34:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:34:13.409793  543705 memory.go:191] Add success.
I0320 16:34:13.409796  543705 cpu.go:282] Add success.
W0320 16:34:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:34:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:34:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:34:13.420223  543705 net.go:648] Add success.
I0320 16:34:13.423304  543705 net.go:770] primary dev: ETH0
I0320 16:34:13.423321  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:34:13.423334  543705 net.go:698] Add success.
I0320 16:34:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:34:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:34:14.455135  543705 disk_worker.go:708] disk space is not compliant
W0320 16:34:14.455138  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:34:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 16:34:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:34:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:34:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:34:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:34:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:34:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:34:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:34:23.409778  543705 memory.go:184] no items to output this cycle
I0320 16:34:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 16:34:25.177670  543705 disk_info.go:125] begin check local disk info of client
I0320 16:34:25.180194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:34:25.180200  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f940 0xc00035f980]
E0320 16:34:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:34:33.409773  543705 memory.go:184] no items to output this cycle
I0320 16:34:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 16:34:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:34:43.409785  543705 memory.go:191] Add success.
I0320 16:34:43.409817  543705 cpu.go:282] Add success.
I0320 16:34:43.419856  543705 net.go:648] Add success.
I0320 16:34:43.422964  543705 net.go:770] primary dev: ETH0
I0320 16:34:43.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:34:43.423008  543705 net.go:698] Add success.
I0320 16:34:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:34:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:34:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:34:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:34:53.409774  543705 memory.go:184] no items to output this cycle
I0320 16:34:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 16:35:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:35:03.409783  543705 memory.go:184] no items to output this cycle
I0320 16:35:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:35:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:35:13.409782  543705 memory.go:191] Add success.
I0320 16:35:13.409799  543705 cpu.go:282] Add success.
W0320 16:35:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:35:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:35:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:35:13.420340  543705 net.go:648] Add success.
I0320 16:35:13.423058  543705 net.go:770] primary dev: ETH0
I0320 16:35:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:35:13.423082  543705 net.go:698] Add success.
I0320 16:35:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:35:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:35:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 16:35:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:35:14.456499  543705 disk_worker.go:494] system disk:vda1
I0320 16:35:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:35:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:35:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:35:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:35:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:35:23.410363  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:35:23.410379  543705 memory.go:184] no items to output this cycle
I0320 16:35:23.410413  543705 cpu.go:275] no items to output this cycle
I0320 16:35:25.181676  543705 disk_info.go:125] begin check local disk info of client
I0320 16:35:25.184160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:35:25.184167  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b700 0xc00047b740]
E0320 16:35:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:35:33.409797  543705 memory.go:184] no items to output this cycle
I0320 16:35:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 16:35:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:35:43.409796  543705 memory.go:191] Add success.
I0320 16:35:43.409797  543705 cpu.go:282] Add success.
I0320 16:35:43.419872  543705 net.go:648] Add success.
I0320 16:35:43.422594  543705 net.go:770] primary dev: ETH0
I0320 16:35:43.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:35:43.422619  543705 net.go:698] Add success.
I0320 16:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:35:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:35:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:35:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:35:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:35:53.409821  543705 memory.go:184] no items to output this cycle
E0320 16:36:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:36:03.409785  543705 memory.go:184] no items to output this cycle
I0320 16:36:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 16:36:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:36:13.409784  543705 memory.go:191] Add success.
W0320 16:36:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:36:13.409817  543705 cpu.go:282] Add success.
W0320 16:36:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:36:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:36:13.420368  543705 net.go:648] Add success.
I0320 16:36:13.423452  543705 net.go:770] primary dev: ETH0
I0320 16:36:13.423466  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:36:13.423477  543705 net.go:698] Add success.
I0320 16:36:13.468938  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f03743fd-53d9-49b4-b151-054000a11ec2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:36:13.468969  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:36:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:36:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 16:36:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:36:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 16:36:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:36:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:36:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:36:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:36:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:36:23.409766  543705 memory.go:184] no items to output this cycle
I0320 16:36:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 16:36:25.185671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:36:25.188108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:36:25.188114  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b040 0xc00036b080]
E0320 16:36:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:36:33.409767  543705 memory.go:184] no items to output this cycle
I0320 16:36:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 16:36:38.550506  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:36:38.550513  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:36:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:36:43.410620  543705 memory.go:191] Add success.
I0320 16:36:43.409828  543705 cpu.go:282] Add success.
I0320 16:36:43.420384  543705 net.go:648] Add success.
I0320 16:36:43.422828  543705 net.go:770] primary dev: ETH0
I0320 16:36:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:36:43.422859  543705 net.go:698] Add success.
I0320 16:36:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:36:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:36:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:36:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:36:53.409784  543705 memory.go:184] no items to output this cycle
I0320 16:36:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:37:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:37:03.409779  543705 memory.go:184] no items to output this cycle
I0320 16:37:03.409811  543705 cpu.go:275] no items to output this cycle
W0320 16:37:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:37:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:37:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 16:37:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:37:13.409813  543705 cpu.go:282] Add success.
I0320 16:37:13.409819  543705 memory.go:191] Add success.
I0320 16:37:13.420154  543705 net.go:648] Add success.
I0320 16:37:13.422940  543705 net.go:770] primary dev: ETH0
I0320 16:37:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:37:13.422965  543705 net.go:698] Add success.
I0320 16:37:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0320 16:37:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:37:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0320 16:37:14.455157  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:37:14.456929  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:37:14.456938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:37:14.456945  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:37:14.456994  543705 disk_worker.go:494] system disk:vda1
I0320 16:37:14.457024  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:37:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:37:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:37:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:37:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:37:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:37:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:37:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:37:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:37:23.409790  543705 memory.go:184] no items to output this cycle
I0320 16:37:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 16:37:25.189673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:37:25.192142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:37:25.192149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032af80 0xc00032afc0]
E0320 16:37:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:37:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 16:37:33.409790  543705 memory.go:184] no items to output this cycle
E0320 16:37:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:37:43.409817  543705 memory.go:191] Add success.
I0320 16:37:43.409829  543705 cpu.go:282] Add success.
I0320 16:37:43.420066  543705 net.go:648] Add success.
I0320 16:37:43.422836  543705 net.go:770] primary dev: ETH0
I0320 16:37:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:37:43.422862  543705 net.go:698] Add success.
I0320 16:37:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:37:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:37:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:37:53.410399  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:37:53.410416  543705 memory.go:184] no items to output this cycle
I0320 16:37:53.410472  543705 cpu.go:275] no items to output this cycle
E0320 16:38:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:38:03.409783  543705 memory.go:184] no items to output this cycle
I0320 16:38:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 16:38:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:38:13.409799  543705 memory.go:191] Add success.
I0320 16:38:13.409800  543705 cpu.go:282] Add success.
W0320 16:38:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:38:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:38:13.420304  543705 net.go:648] Add success.
I0320 16:38:13.423064  543705 net.go:770] primary dev: ETH0
I0320 16:38:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:38:13.423089  543705 net.go:698] Add success.
I0320 16:38:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:38:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:38:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 16:38:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:38:14.456532  543705 disk_worker.go:494] system disk:vda1
I0320 16:38:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:38:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:38:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:38:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:38:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:38:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:38:23.409766  543705 memory.go:184] no items to output this cycle
I0320 16:38:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 16:38:25.193681  543705 disk_info.go:125] begin check local disk info of client
I0320 16:38:25.196132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:38:25.196139  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9dc0 0xc0003e9e00]
E0320 16:38:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:38:33.409801  543705 memory.go:184] no items to output this cycle
I0320 16:38:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 16:38:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:38:43.409827  543705 memory.go:191] Add success.
I0320 16:38:43.409832  543705 cpu.go:282] Add success.
I0320 16:38:43.419992  543705 net.go:648] Add success.
I0320 16:38:43.422809  543705 net.go:770] primary dev: ETH0
I0320 16:38:43.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:38:43.422835  543705 net.go:698] Add success.
I0320 16:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:38:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:38:53.410278  543705 memory.go:184] no items to output this cycle
I0320 16:38:53.410290  543705 cpu.go:275] no items to output this cycle
E0320 16:39:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:39:03.409779  543705 memory.go:184] no items to output this cycle
I0320 16:39:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:39:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:39:13.409801  543705 memory.go:191] Add success.
I0320 16:39:13.409820  543705 cpu.go:282] Add success.
W0320 16:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:39:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:39:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:39:13.420172  543705 net.go:648] Add success.
I0320 16:39:13.422971  543705 net.go:770] primary dev: ETH0
I0320 16:39:13.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:39:13.422996  543705 net.go:698] Add success.
I0320 16:39:13.463868  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e03d522e-c5ec-45a8-b0c9-0443e74fdb3e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:39:13.463902  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:39:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:39:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:39:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 16:39:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:39:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 16:39:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:39:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:39:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:39:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:39:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:39:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:39:23.409794  543705 memory.go:184] no items to output this cycle
I0320 16:39:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 16:39:25.197672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:39:25.200145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:39:25.200151  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470340 0xc000470380]
E0320 16:39:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:39:33.409775  543705 memory.go:184] no items to output this cycle
I0320 16:39:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 16:39:38.551506  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:39:38.551513  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:39:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:39:43.410731  543705 memory.go:191] Add success.
I0320 16:39:43.409835  543705 cpu.go:282] Add success.
I0320 16:39:43.420482  543705 net.go:648] Add success.
I0320 16:39:43.423306  543705 net.go:770] primary dev: ETH0
I0320 16:39:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:39:43.423332  543705 net.go:698] Add success.
I0320 16:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:39:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:39:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:39:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:39:53.409873  543705 cpu.go:275] no items to output this cycle
I0320 16:39:53.409888  543705 memory.go:184] no items to output this cycle
E0320 16:40:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:40:03.409771  543705 memory.go:184] no items to output this cycle
I0320 16:40:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 16:40:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:40:13.409803  543705 memory.go:191] Add success.
I0320 16:40:13.409805  543705 cpu.go:282] Add success.
W0320 16:40:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:40:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:40:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:40:13.420062  543705 net.go:648] Add success.
I0320 16:40:13.422891  543705 net.go:770] primary dev: ETH0
I0320 16:40:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:40:13.422916  543705 net.go:698] Add success.
I0320 16:40:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:40:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:40:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 16:40:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:40:14.456525  543705 disk_worker.go:494] system disk:vda1
I0320 16:40:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:40:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:40:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:40:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:40:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:40:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:40:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:40:23.409781  543705 memory.go:184] no items to output this cycle
I0320 16:40:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 16:40:25.201684  543705 disk_info.go:125] begin check local disk info of client
I0320 16:40:25.204135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:40:25.204140  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471c00 0xc000471c40]
E0320 16:40:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:40:33.409775  543705 memory.go:184] no items to output this cycle
I0320 16:40:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 16:40:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:40:43.409827  543705 memory.go:191] Add success.
I0320 16:40:43.409841  543705 cpu.go:282] Add success.
I0320 16:40:43.419960  543705 net.go:648] Add success.
I0320 16:40:43.423045  543705 net.go:770] primary dev: ETH0
I0320 16:40:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:40:43.423232  543705 net.go:698] Add success.
I0320 16:40:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:40:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:40:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:40:53.409812  543705 memory.go:184] no items to output this cycle
I0320 16:40:53.409828  543705 cpu.go:275] no items to output this cycle
E0320 16:41:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:41:03.409788  543705 memory.go:184] no items to output this cycle
I0320 16:41:03.409826  543705 cpu.go:275] no items to output this cycle
E0320 16:41:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:41:13.409818  543705 memory.go:191] Add success.
I0320 16:41:13.409818  543705 cpu.go:282] Add success.
W0320 16:41:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:41:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:41:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:41:13.420151  543705 net.go:648] Add success.
I0320 16:41:13.423010  543705 net.go:770] primary dev: ETH0
I0320 16:41:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:41:13.423035  543705 net.go:698] Add success.
I0320 16:41:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:41:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:41:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 16:41:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:41:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 16:41:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:41:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:41:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:41:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:41:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:41:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:41:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 16:41:23.409795  543705 memory.go:184] no items to output this cycle
I0320 16:41:25.205672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:41:25.208129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:41:25.208135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb600 0xc0001fb640]
E0320 16:41:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:41:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 16:41:33.409795  543705 memory.go:184] no items to output this cycle
E0320 16:41:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:41:43.409808  543705 memory.go:191] Add success.
I0320 16:41:43.409808  543705 cpu.go:282] Add success.
I0320 16:41:43.419969  543705 net.go:648] Add success.
I0320 16:41:43.422462  543705 net.go:770] primary dev: ETH0
I0320 16:41:43.422474  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:41:43.422649  543705 net.go:698] Add success.
I0320 16:41:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:41:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:41:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:41:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:41:53.409782  543705 memory.go:184] no items to output this cycle
I0320 16:41:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 16:42:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:42:03.409813  543705 memory.go:184] no items to output this cycle
I0320 16:42:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 16:42:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:42:13.409780  543705 memory.go:191] Add success.
W0320 16:42:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:42:13.409809  543705 cpu.go:282] Add success.
W0320 16:42:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:42:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:42:13.420159  543705 net.go:648] Add success.
I0320 16:42:13.422753  543705 net.go:770] primary dev: ETH0
I0320 16:42:13.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:42:13.422778  543705 net.go:698] Add success.
I0320 16:42:13.463559  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8d13ef04-0f33-48cf-88f9-27ab103e34e9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:42:13.463591  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 16:42:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:42:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0320 16:42:14.455241  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:42:14.456116  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:42:14.456126  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:42:14.456131  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:42:14.456981  543705 disk_worker.go:494] system disk:vda1
I0320 16:42:14.457009  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:42:15.456867  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:42:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:42:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:42:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:42:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:42:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:42:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:42:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:42:23.409774  543705 memory.go:184] no items to output this cycle
I0320 16:42:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 16:42:25.209668  543705 disk_info.go:125] begin check local disk info of client
I0320 16:42:25.212063  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:42:25.212069  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa500 0xc0001fa540]
E0320 16:42:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:42:33.409795  543705 memory.go:184] no items to output this cycle
I0320 16:42:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 16:42:38.552513  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:42:38.552519  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:42:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:42:43.410887  543705 memory.go:191] Add success.
I0320 16:42:43.409933  543705 cpu.go:282] Add success.
I0320 16:42:43.419725  543705 net.go:648] Add success.
I0320 16:42:43.422202  543705 net.go:770] primary dev: ETH0
I0320 16:42:43.422215  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:42:43.422228  543705 net.go:698] Add success.
I0320 16:42:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:42:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:42:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:42:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:42:53.409778  543705 memory.go:184] no items to output this cycle
I0320 16:42:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 16:43:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:43:03.409782  543705 memory.go:184] no items to output this cycle
I0320 16:43:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:43:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:43:13.409792  543705 memory.go:191] Add success.
I0320 16:43:13.409798  543705 cpu.go:282] Add success.
W0320 16:43:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:43:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:43:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:43:13.420230  543705 net.go:648] Add success.
I0320 16:43:13.422920  543705 net.go:770] primary dev: ETH0
I0320 16:43:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:43:13.422951  543705 net.go:698] Add success.
I0320 16:43:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:43:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:43:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0320 16:43:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:43:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 16:43:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:43:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:43:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:43:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:43:23.409801  543705 memory.go:184] no items to output this cycle
I0320 16:43:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 16:43:25.213672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:43:25.216168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:43:25.216174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002044c0 0xc000204500]
E0320 16:43:33.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:43:33.409880  543705 cpu.go:275] no items to output this cycle
I0320 16:43:33.409912  543705 memory.go:184] no items to output this cycle
E0320 16:43:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:43:43.409825  543705 memory.go:191] Add success.
I0320 16:43:43.409832  543705 cpu.go:282] Add success.
I0320 16:43:43.420027  543705 net.go:648] Add success.
I0320 16:43:43.422661  543705 net.go:770] primary dev: ETH0
I0320 16:43:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:43:43.422690  543705 net.go:698] Add success.
I0320 16:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:43:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:43:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:43:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:43:53.409799  543705 memory.go:184] no items to output this cycle
I0320 16:43:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 16:44:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:44:03.409784  543705 memory.go:184] no items to output this cycle
I0320 16:44:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 16:44:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:44:13.409791  543705 cpu.go:282] Add success.
I0320 16:44:13.409793  543705 memory.go:191] Add success.
W0320 16:44:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:44:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:44:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:44:13.420099  543705 net.go:648] Add success.
I0320 16:44:13.422878  543705 net.go:770] primary dev: ETH0
I0320 16:44:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:44:13.422902  543705 net.go:698] Add success.
I0320 16:44:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:44:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:44:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 16:44:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:44:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 16:44:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:44:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:44:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:44:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 16:44:23.409784  543705 memory.go:184] no items to output this cycle
I0320 16:44:25.217672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:44:25.220122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:44:25.220128  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391580 0xc0003915c0]
E0320 16:44:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:44:33.409799  543705 memory.go:184] no items to output this cycle
I0320 16:44:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 16:44:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:44:43.409917  543705 memory.go:191] Add success.
I0320 16:44:43.410014  543705 cpu.go:282] Add success.
I0320 16:44:43.419731  543705 net.go:648] Add success.
I0320 16:44:43.422835  543705 net.go:770] primary dev: ETH0
I0320 16:44:43.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:44:43.422859  543705 net.go:698] Add success.
I0320 16:44:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:44:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:44:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:44:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:44:53.409779  543705 memory.go:184] no items to output this cycle
I0320 16:44:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 16:45:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:45:03.409788  543705 memory.go:184] no items to output this cycle
I0320 16:45:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 16:45:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:45:13.409784  543705 memory.go:191] Add success.
I0320 16:45:13.409789  543705 cpu.go:282] Add success.
W0320 16:45:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:45:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:45:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:45:13.420122  543705 net.go:648] Add success.
I0320 16:45:13.422794  543705 net.go:770] primary dev: ETH0
I0320 16:45:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:45:13.422819  543705 net.go:698] Add success.
I0320 16:45:13.469663  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9b469a6c-d59a-4c5d-a281-4b6078ee9b60","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:45:13.469695  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:45:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:45:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:45:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 16:45:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:45:14.456525  543705 disk_worker.go:494] system disk:vda1
I0320 16:45:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:45:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:45:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:45:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:45:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:45:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:45:23.409765  543705 memory.go:184] no items to output this cycle
I0320 16:45:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 16:45:25.221675  543705 disk_info.go:125] begin check local disk info of client
I0320 16:45:25.224119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:45:25.224125  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278040 0xc000278080]
E0320 16:45:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:45:33.409809  543705 memory.go:184] no items to output this cycle
I0320 16:45:33.409822  543705 cpu.go:275] no items to output this cycle
I0320 16:45:38.553525  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:45:38.553532  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:45:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:45:43.410610  543705 memory.go:191] Add success.
I0320 16:45:43.409799  543705 cpu.go:282] Add success.
I0320 16:45:43.420318  543705 net.go:648] Add success.
I0320 16:45:43.423083  543705 net.go:770] primary dev: ETH0
I0320 16:45:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:45:43.423115  543705 net.go:698] Add success.
I0320 16:45:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:45:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:45:46.458154  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:45:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:45:53.409785  543705 memory.go:184] no items to output this cycle
I0320 16:45:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 16:46:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:46:03.409818  543705 memory.go:184] no items to output this cycle
I0320 16:46:03.409834  543705 cpu.go:275] no items to output this cycle
E0320 16:46:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:46:13.409820  543705 memory.go:191] Add success.
I0320 16:46:13.409830  543705 cpu.go:282] Add success.
W0320 16:46:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:46:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:46:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:46:13.420203  543705 net.go:648] Add success.
I0320 16:46:13.422930  543705 net.go:770] primary dev: ETH0
I0320 16:46:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:46:13.422955  543705 net.go:698] Add success.
I0320 16:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:46:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:46:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 16:46:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:46:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 16:46:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:46:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:46:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:46:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:46:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:46:23.409773  543705 memory.go:184] no items to output this cycle
I0320 16:46:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 16:46:25.225673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:46:25.228106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:46:25.228112  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d40 0xc0000c4d80]
E0320 16:46:33.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:46:33.409830  543705 memory.go:184] no items to output this cycle
I0320 16:46:33.409843  543705 cpu.go:275] no items to output this cycle
E0320 16:46:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:46:43.409838  543705 memory.go:191] Add success.
I0320 16:46:43.409839  543705 cpu.go:282] Add success.
I0320 16:46:43.420024  543705 net.go:648] Add success.
I0320 16:46:43.422701  543705 net.go:770] primary dev: ETH0
I0320 16:46:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:46:43.422732  543705 net.go:698] Add success.
I0320 16:46:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:46:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:46:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:46:53.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:46:53.409916  543705 memory.go:184] no items to output this cycle
I0320 16:46:53.409968  543705 cpu.go:275] no items to output this cycle
E0320 16:47:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:47:03.409790  543705 memory.go:184] no items to output this cycle
I0320 16:47:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 16:47:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:47:13.409818  543705 memory.go:191] Add success.
I0320 16:47:13.409832  543705 cpu.go:282] Add success.
W0320 16:47:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:47:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:47:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:47:13.420173  543705 net.go:648] Add success.
I0320 16:47:13.423318  543705 net.go:770] primary dev: ETH0
I0320 16:47:13.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:47:13.423345  543705 net.go:698] Add success.
I0320 16:47:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0320 16:47:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:47:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 16:47:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:47:14.456909  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:47:14.456918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:47:14.456924  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:47:14.456972  543705 disk_worker.go:494] system disk:vda1
I0320 16:47:14.457011  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:47:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:47:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:47:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:47:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:47:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:47:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:47:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:47:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:47:23.409762  543705 memory.go:184] no items to output this cycle
I0320 16:47:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 16:47:25.229671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:47:25.232124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:47:25.232130  543705 disk_info.go:196] parse disk info done, disk is : [0xc000586b40 0xc000586b80]
E0320 16:47:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:47:33.409795  543705 memory.go:184] no items to output this cycle
I0320 16:47:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 16:47:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:47:43.409820  543705 memory.go:191] Add success.
I0320 16:47:43.409837  543705 cpu.go:282] Add success.
I0320 16:47:43.419896  543705 net.go:648] Add success.
I0320 16:47:43.422513  543705 net.go:770] primary dev: ETH0
I0320 16:47:43.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:47:43.422542  543705 net.go:698] Add success.
I0320 16:47:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:47:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:47:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:47:53.409767  543705 memory.go:184] no items to output this cycle
I0320 16:47:53.409895  543705 cpu.go:275] no items to output this cycle
E0320 16:48:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:48:03.409787  543705 memory.go:184] no items to output this cycle
I0320 16:48:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 16:48:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:48:13.409784  543705 memory.go:191] Add success.
I0320 16:48:13.409795  543705 cpu.go:282] Add success.
W0320 16:48:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:48:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:48:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:48:13.420288  543705 net.go:648] Add success.
I0320 16:48:13.423312  543705 net.go:770] primary dev: ETH0
I0320 16:48:13.423325  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:48:13.423337  543705 net.go:698] Add success.
I0320 16:48:13.732066  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"253a2be5-43aa-44f6-8306-b194508758ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:48:13.732108  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:48:14.454736  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:48:14.454952  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:48:14.454963  543705 disk_worker.go:708] disk space is not compliant
W0320 16:48:14.454965  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:48:14.456385  543705 disk_worker.go:494] system disk:vda1
I0320 16:48:14.456416  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:48:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:48:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:48:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:48:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:48:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:48:23.409767  543705 memory.go:184] no items to output this cycle
I0320 16:48:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 16:48:25.233671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:48:25.236102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:48:25.236107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003124c0 0xc000312500]
E0320 16:48:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:48:33.409797  543705 memory.go:184] no items to output this cycle
I0320 16:48:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 16:48:38.554509  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:48:38.554516  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:48:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:48:43.410683  543705 memory.go:191] Add success.
I0320 16:48:43.409803  543705 cpu.go:282] Add success.
I0320 16:48:43.420373  543705 net.go:648] Add success.
I0320 16:48:43.423206  543705 net.go:770] primary dev: ETH0
I0320 16:48:43.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:48:43.423231  543705 net.go:698] Add success.
I0320 16:48:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:48:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:48:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:48:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:48:53.409772  543705 memory.go:184] no items to output this cycle
I0320 16:48:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 16:49:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:49:03.409784  543705 memory.go:184] no items to output this cycle
I0320 16:49:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 16:49:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:49:13.409812  543705 memory.go:191] Add success.
I0320 16:49:13.409817  543705 cpu.go:282] Add success.
W0320 16:49:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:49:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:49:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:49:13.420138  543705 net.go:648] Add success.
I0320 16:49:13.422998  543705 net.go:770] primary dev: ETH0
I0320 16:49:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:49:13.423022  543705 net.go:698] Add success.
I0320 16:49:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:49:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:49:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 16:49:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:49:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 16:49:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:49:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:49:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:49:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:49:16.472479  543705 disk_local_worker.go:436] Get disk info: []
I0320 16:49:23.409774  543705 cpu.go:275] no items to output this cycle
E0320 16:49:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:49:23.409789  543705 memory.go:184] no items to output this cycle
I0320 16:49:25.237676  543705 disk_info.go:125] begin check local disk info of client
I0320 16:49:25.240087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:49:25.240093  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa80 0xc0001aaac0]
E0320 16:49:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:49:33.409779  543705 memory.go:184] no items to output this cycle
I0320 16:49:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 16:49:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:49:43.409817  543705 memory.go:191] Add success.
I0320 16:49:43.409827  543705 cpu.go:282] Add success.
I0320 16:49:43.419987  543705 net.go:648] Add success.
I0320 16:49:43.422709  543705 net.go:770] primary dev: ETH0
I0320 16:49:43.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:49:43.422740  543705 net.go:698] Add success.
I0320 16:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:49:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:49:53.409776  543705 memory.go:184] no items to output this cycle
I0320 16:49:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 16:50:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:50:03.409899  543705 cpu.go:275] no items to output this cycle
I0320 16:50:03.409908  543705 memory.go:184] no items to output this cycle
E0320 16:50:13.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:50:13.409768  543705 memory.go:191] Add success.
W0320 16:50:13.409793  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:50:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:50:13.409808  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:50:13.409814  543705 cpu.go:282] Add success.
I0320 16:50:13.420058  543705 net.go:648] Add success.
I0320 16:50:13.423330  543705 net.go:770] primary dev: ETH0
I0320 16:50:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:50:13.423355  543705 net.go:698] Add success.
I0320 16:50:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:50:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:50:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 16:50:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:50:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 16:50:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:50:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:50:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:50:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:50:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:50:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:50:23.409783  543705 memory.go:184] no items to output this cycle
I0320 16:50:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 16:50:25.241676  543705 disk_info.go:125] begin check local disk info of client
I0320 16:50:25.244123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:50:25.244130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bdc0 0xc00007be00]
E0320 16:50:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:50:33.409797  543705 memory.go:184] no items to output this cycle
I0320 16:50:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 16:50:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:50:43.409800  543705 cpu.go:282] Add success.
I0320 16:50:43.409802  543705 memory.go:191] Add success.
I0320 16:50:43.419959  543705 net.go:648] Add success.
I0320 16:50:43.422754  543705 net.go:770] primary dev: ETH0
I0320 16:50:43.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:50:43.422779  543705 net.go:698] Add success.
I0320 16:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:50:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:50:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:50:53.409803  543705 memory.go:184] no items to output this cycle
I0320 16:50:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 16:51:03.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:51:03.409890  543705 cpu.go:275] no items to output this cycle
I0320 16:51:03.409902  543705 memory.go:184] no items to output this cycle
E0320 16:51:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:51:13.409796  543705 memory.go:191] Add success.
W0320 16:51:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 16:51:13.409830  543705 cpu.go:282] Add success.
W0320 16:51:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:51:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:51:13.420131  543705 net.go:648] Add success.
I0320 16:51:13.422760  543705 net.go:770] primary dev: ETH0
I0320 16:51:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:51:13.422785  543705 net.go:698] Add success.
I0320 16:51:13.518440  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b3f2401f-2fa5-479c-a908-8d0d8e5b9b1b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:51:13.518472  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:51:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:51:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0320 16:51:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:51:14.456729  543705 disk_worker.go:494] system disk:vda1
I0320 16:51:14.456758  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:51:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:51:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:51:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:51:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:51:23.409772  543705 memory.go:184] no items to output this cycle
I0320 16:51:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 16:51:25.245674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:51:25.248108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:51:25.248113  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0320 16:51:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:51:33.409778  543705 cpu.go:275] no items to output this cycle
I0320 16:51:33.409779  543705 memory.go:184] no items to output this cycle
I0320 16:51:38.555518  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:51:38.555525  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:51:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:51:43.410667  543705 memory.go:191] Add success.
I0320 16:51:43.409831  543705 cpu.go:282] Add success.
I0320 16:51:43.420375  543705 net.go:648] Add success.
I0320 16:51:43.423011  543705 net.go:770] primary dev: ETH0
I0320 16:51:43.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:51:43.423037  543705 net.go:698] Add success.
I0320 16:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:51:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:51:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:51:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:51:53.409809  543705 memory.go:184] no items to output this cycle
I0320 16:51:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 16:52:03.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:52:03.409930  543705 memory.go:184] no items to output this cycle
I0320 16:52:03.409979  543705 cpu.go:275] no items to output this cycle
E0320 16:52:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:52:13.409821  543705 memory.go:191] Add success.
I0320 16:52:13.409830  543705 cpu.go:282] Add success.
W0320 16:52:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:52:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:52:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:52:13.420260  543705 net.go:648] Add success.
I0320 16:52:13.422973  543705 net.go:770] primary dev: ETH0
I0320 16:52:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:52:13.423005  543705 net.go:698] Add success.
W0320 16:52:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:52:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 16:52:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:52:14.455905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:52:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:52:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:52:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 16:52:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:52:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:52:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:52:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:52:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:52:16.457959  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:52:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:52:16.472300  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:52:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:52:23.409796  543705 memory.go:184] no items to output this cycle
I0320 16:52:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 16:52:25.249671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:52:25.252106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:52:25.252111  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa980 0xc0001aa9c0]
E0320 16:52:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:52:33.409770  543705 memory.go:184] no items to output this cycle
I0320 16:52:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 16:52:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:52:43.409820  543705 memory.go:191] Add success.
I0320 16:52:43.409829  543705 cpu.go:282] Add success.
I0320 16:52:43.420008  543705 net.go:648] Add success.
I0320 16:52:43.422652  543705 net.go:770] primary dev: ETH0
I0320 16:52:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:52:43.422679  543705 net.go:698] Add success.
I0320 16:52:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:52:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:52:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:52:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:52:53.409783  543705 memory.go:184] no items to output this cycle
I0320 16:52:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 16:53:03.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:53:03.409866  543705 memory.go:184] no items to output this cycle
I0320 16:53:03.409976  543705 cpu.go:275] no items to output this cycle
E0320 16:53:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:53:13.409791  543705 memory.go:191] Add success.
I0320 16:53:13.409792  543705 cpu.go:282] Add success.
W0320 16:53:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:53:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:53:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:53:13.420173  543705 net.go:648] Add success.
I0320 16:53:13.423328  543705 net.go:770] primary dev: ETH0
I0320 16:53:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:53:13.423357  543705 net.go:698] Add success.
I0320 16:53:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:53:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:53:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 16:53:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:53:14.456500  543705 disk_worker.go:494] system disk:vda1
I0320 16:53:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:53:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:53:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:53:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:53:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:53:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:53:23.409778  543705 memory.go:184] no items to output this cycle
I0320 16:53:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 16:53:25.253674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:53:25.256101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:53:25.256107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c47c0 0xc0000c4800]
E0320 16:53:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:53:33.409772  543705 memory.go:184] no items to output this cycle
I0320 16:53:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 16:53:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:53:43.409820  543705 memory.go:191] Add success.
I0320 16:53:43.409829  543705 cpu.go:282] Add success.
I0320 16:53:43.419968  543705 net.go:648] Add success.
I0320 16:53:43.422685  543705 net.go:770] primary dev: ETH0
I0320 16:53:43.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:53:43.422711  543705 net.go:698] Add success.
I0320 16:53:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:53:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:53:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:53:53.409805  543705 memory.go:184] no items to output this cycle
I0320 16:53:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 16:54:03.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:54:03.409913  543705 cpu.go:275] no items to output this cycle
I0320 16:54:03.409919  543705 memory.go:184] no items to output this cycle
E0320 16:54:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:54:13.409783  543705 memory.go:191] Add success.
I0320 16:54:13.409789  543705 cpu.go:282] Add success.
W0320 16:54:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:54:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:54:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:54:13.420296  543705 net.go:648] Add success.
I0320 16:54:13.422933  543705 net.go:770] primary dev: ETH0
I0320 16:54:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:54:13.422958  543705 net.go:698] Add success.
I0320 16:54:13.464183  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a946051-42ab-423a-8550-c9726bdbe977","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:54:13.464215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 16:54:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:54:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:54:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 16:54:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:54:14.456683  543705 disk_worker.go:494] system disk:vda1
I0320 16:54:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:54:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:54:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:54:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:54:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:54:23.410356  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:54:23.410372  543705 memory.go:184] no items to output this cycle
I0320 16:54:23.410402  543705 cpu.go:275] no items to output this cycle
I0320 16:54:25.257669  543705 disk_info.go:125] begin check local disk info of client
I0320 16:54:25.260103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:54:25.260108  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b780 0xc00007b7c0]
E0320 16:54:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:54:33.409777  543705 cpu.go:275] no items to output this cycle
I0320 16:54:33.409782  543705 memory.go:184] no items to output this cycle
I0320 16:54:38.556544  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:54:38.556551  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:54:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:54:43.410660  543705 memory.go:191] Add success.
I0320 16:54:43.409816  543705 cpu.go:282] Add success.
I0320 16:54:43.420359  543705 net.go:648] Add success.
I0320 16:54:43.423174  543705 net.go:770] primary dev: ETH0
I0320 16:54:43.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:54:43.423199  543705 net.go:698] Add success.
I0320 16:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:54:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:54:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:54:53.409772  543705 memory.go:184] no items to output this cycle
I0320 16:54:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 16:55:03.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:55:03.409891  543705 memory.go:184] no items to output this cycle
I0320 16:55:03.410057  543705 cpu.go:275] no items to output this cycle
E0320 16:55:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:55:13.409823  543705 memory.go:191] Add success.
I0320 16:55:13.409825  543705 cpu.go:282] Add success.
W0320 16:55:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:55:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:55:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:55:13.420238  543705 net.go:648] Add success.
I0320 16:55:13.423038  543705 net.go:770] primary dev: ETH0
I0320 16:55:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:55:13.423063  543705 net.go:698] Add success.
I0320 16:55:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:55:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:55:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 16:55:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:55:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 16:55:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:55:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:55:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:55:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:55:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:55:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:55:23.409767  543705 memory.go:184] no items to output this cycle
I0320 16:55:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 16:55:25.261672  543705 disk_info.go:125] begin check local disk info of client
I0320 16:55:25.264112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:55:25.264118  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471d00 0xc000471d40]
E0320 16:55:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:55:33.409774  543705 memory.go:184] no items to output this cycle
I0320 16:55:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 16:55:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:55:43.409806  543705 cpu.go:282] Add success.
I0320 16:55:43.409808  543705 memory.go:191] Add success.
I0320 16:55:43.420048  543705 net.go:648] Add success.
I0320 16:55:43.422685  543705 net.go:770] primary dev: ETH0
I0320 16:55:43.422701  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:55:43.422717  543705 net.go:698] Add success.
I0320 16:55:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:55:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:55:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:55:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:55:53.409802  543705 memory.go:184] no items to output this cycle
I0320 16:55:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 16:56:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:56:03.409779  543705 memory.go:184] no items to output this cycle
I0320 16:56:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 16:56:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:56:13.409797  543705 memory.go:191] Add success.
I0320 16:56:13.409799  543705 cpu.go:282] Add success.
W0320 16:56:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:56:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:56:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:56:13.420042  543705 net.go:648] Add success.
I0320 16:56:13.422859  543705 net.go:770] primary dev: ETH0
I0320 16:56:13.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:56:13.422884  543705 net.go:698] Add success.
I0320 16:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:56:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:56:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 16:56:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:56:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 16:56:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:56:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:56:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:56:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:56:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:56:23.409776  543705 memory.go:184] no items to output this cycle
I0320 16:56:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 16:56:25.265671  543705 disk_info.go:125] begin check local disk info of client
I0320 16:56:25.268076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:56:25.268082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae40 0xc0001aae80]
E0320 16:56:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:56:33.409782  543705 memory.go:184] no items to output this cycle
I0320 16:56:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 16:56:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:56:43.409821  543705 memory.go:191] Add success.
I0320 16:56:43.409830  543705 cpu.go:282] Add success.
I0320 16:56:43.420009  543705 net.go:648] Add success.
I0320 16:56:43.422958  543705 net.go:770] primary dev: ETH0
I0320 16:56:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:56:43.422987  543705 net.go:698] Add success.
I0320 16:56:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:56:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:56:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:56:53.409773  543705 memory.go:184] no items to output this cycle
I0320 16:56:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 16:57:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:57:03.409795  543705 memory.go:184] no items to output this cycle
I0320 16:57:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 16:57:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:57:13.409787  543705 memory.go:191] Add success.
I0320 16:57:13.409808  543705 cpu.go:282] Add success.
W0320 16:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:57:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:57:13.419734  543705 net.go:648] Add success.
I0320 16:57:13.422773  543705 net.go:770] primary dev: ETH0
I0320 16:57:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:57:13.422798  543705 net.go:698] Add success.
I0320 16:57:13.429360  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 16:57:13.453538  543705 event_worker.go:152] Polling the log file for events...
I0320 16:57:13.464004  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98242cf6-28dd-4d92-9616-ff49e9a2cbe7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 16:57:13.464034  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 16:57:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:57:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 16:57:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0320 16:57:14.456941  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 16:57:14.456950  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 16:57:14.456956  543705 custom_config.go:64] query custom config with name: gpu
I0320 16:57:14.456989  543705 disk_worker.go:494] system disk:vda1
I0320 16:57:14.457017  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 16:57:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 16:57:15.456873  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:57:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 16:57:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 16:57:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:57:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:57:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:57:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:57:23.409776  543705 memory.go:184] no items to output this cycle
I0320 16:57:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 16:57:25.269670  543705 disk_info.go:125] begin check local disk info of client
I0320 16:57:25.272189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:57:25.272196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004eb300 0xc0004eb340]
E0320 16:57:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:57:33.409775  543705 memory.go:184] no items to output this cycle
I0320 16:57:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 16:57:38.557526  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 16:57:38.557531  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 16:57:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:57:43.410792  543705 memory.go:191] Add success.
I0320 16:57:43.409822  543705 cpu.go:282] Add success.
I0320 16:57:43.420503  543705 net.go:648] Add success.
I0320 16:57:43.423737  543705 net.go:770] primary dev: ETH0
I0320 16:57:43.423752  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:57:43.423765  543705 net.go:698] Add success.
I0320 16:57:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:57:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:57:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:57:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:57:53.409802  543705 memory.go:184] no items to output this cycle
I0320 16:57:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 16:58:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:58:03.409788  543705 memory.go:184] no items to output this cycle
I0320 16:58:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 16:58:13.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:58:13.409904  543705 memory.go:191] Add success.
W0320 16:58:13.410023  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:58:13.410042  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:58:13.410047  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:58:13.410063  543705 cpu.go:282] Add success.
I0320 16:58:13.419710  543705 net.go:648] Add success.
I0320 16:58:13.422372  543705 net.go:770] primary dev: ETH0
I0320 16:58:13.422385  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:58:13.422396  543705 net.go:698] Add success.
I0320 16:58:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:58:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:58:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 16:58:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:58:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 16:58:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:58:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:58:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:58:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:58:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:58:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:58:23.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:58:23.410272  543705 memory.go:184] no items to output this cycle
I0320 16:58:23.410284  543705 cpu.go:275] no items to output this cycle
I0320 16:58:25.273674  543705 disk_info.go:125] begin check local disk info of client
I0320 16:58:25.276130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:58:25.276136  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be80 0xc00007bec0]
E0320 16:58:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:58:33.409774  543705 memory.go:184] no items to output this cycle
I0320 16:58:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 16:58:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:58:43.409794  543705 memory.go:191] Add success.
I0320 16:58:43.409800  543705 cpu.go:282] Add success.
I0320 16:58:43.419865  543705 net.go:648] Add success.
I0320 16:58:43.422997  543705 net.go:770] primary dev: ETH0
I0320 16:58:43.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:58:43.423031  543705 net.go:698] Add success.
I0320 16:58:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:58:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:58:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:58:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:58:53.409763  543705 memory.go:184] no items to output this cycle
I0320 16:58:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 16:59:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:59:03.409773  543705 memory.go:184] no items to output this cycle
I0320 16:59:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 16:59:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:59:13.409806  543705 memory.go:191] Add success.
I0320 16:59:13.409820  543705 cpu.go:282] Add success.
W0320 16:59:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 16:59:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 16:59:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 16:59:13.419741  543705 net.go:648] Add success.
I0320 16:59:13.423012  543705 net.go:770] primary dev: ETH0
I0320 16:59:13.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:59:13.423036  543705 net.go:698] Add success.
I0320 16:59:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 16:59:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 16:59:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 16:59:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 16:59:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 16:59:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 16:59:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 16:59:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:59:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 16:59:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 16:59:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:59:23.409779  543705 memory.go:184] no items to output this cycle
I0320 16:59:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 16:59:25.277673  543705 disk_info.go:125] begin check local disk info of client
I0320 16:59:25.280097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 16:59:25.280103  543705 disk_info.go:196] parse disk info done, disk is : [0xc000586100 0xc000586140]
E0320 16:59:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:59:33.409777  543705 cpu.go:275] no items to output this cycle
I0320 16:59:33.409784  543705 memory.go:184] no items to output this cycle
E0320 16:59:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:59:43.409822  543705 memory.go:191] Add success.
I0320 16:59:43.409826  543705 cpu.go:282] Add success.
I0320 16:59:43.419985  543705 net.go:648] Add success.
I0320 16:59:43.422928  543705 net.go:770] primary dev: ETH0
I0320 16:59:43.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0320 16:59:43.422966  543705 net.go:698] Add success.
I0320 16:59:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 16:59:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 16:59:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 16:59:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 16:59:53.409800  543705 memory.go:184] no items to output this cycle
I0320 16:59:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 17:00:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:00:03.409790  543705 memory.go:184] no items to output this cycle
I0320 17:00:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 17:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:00:13.409784  543705 memory.go:191] Add success.
W0320 17:00:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:00:13.409810  543705 cpu.go:282] Add success.
W0320 17:00:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:00:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:00:13.420306  543705 net.go:648] Add success.
I0320 17:00:13.423293  543705 net.go:770] primary dev: ETH0
I0320 17:00:13.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:00:13.423321  543705 net.go:698] Add success.
I0320 17:00:13.684815  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cc380024-9354-4c09-94b9-f2268cf2a833","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:00:13.684847  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:00:14.454719  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:00:14.454917  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:00:14.454927  543705 disk_worker.go:708] disk space is not compliant
W0320 17:00:14.454930  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:00:14.456285  543705 disk_worker.go:494] system disk:vda1
I0320 17:00:14.456328  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:00:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:00:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:00:23.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:00:23.410394  543705 memory.go:184] no items to output this cycle
I0320 17:00:23.410404  543705 cpu.go:275] no items to output this cycle
I0320 17:00:25.281671  543705 disk_info.go:125] begin check local disk info of client
I0320 17:00:25.284123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:00:25.284128  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390a00 0xc000390a40]
E0320 17:00:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:00:33.409779  543705 memory.go:184] no items to output this cycle
I0320 17:00:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 17:00:38.558556  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:00:38.558563  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:00:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:00:43.410685  543705 memory.go:191] Add success.
I0320 17:00:43.409835  543705 cpu.go:282] Add success.
I0320 17:00:43.420404  543705 net.go:648] Add success.
I0320 17:00:43.423376  543705 net.go:770] primary dev: ETH0
I0320 17:00:43.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:00:43.423401  543705 net.go:698] Add success.
I0320 17:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:00:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:00:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:00:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:00:53.409795  543705 memory.go:184] no items to output this cycle
I0320 17:00:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 17:01:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:01:03.409782  543705 memory.go:184] no items to output this cycle
I0320 17:01:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:01:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:01:13.409783  543705 memory.go:191] Add success.
I0320 17:01:13.409801  543705 cpu.go:282] Add success.
W0320 17:01:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:01:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:01:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:01:13.419719  543705 net.go:648] Add success.
I0320 17:01:13.422509  543705 net.go:770] primary dev: ETH0
I0320 17:01:13.422524  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:01:13.422537  543705 net.go:698] Add success.
I0320 17:01:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:01:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:01:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 17:01:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:01:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 17:01:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:01:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:01:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:01:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 17:01:23.409778  543705 memory.go:184] no items to output this cycle
I0320 17:01:25.285672  543705 disk_info.go:125] begin check local disk info of client
I0320 17:01:25.288086  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:01:25.288092  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d35c0 0xc0003d3600]
E0320 17:01:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:01:33.409761  543705 memory.go:184] no items to output this cycle
I0320 17:01:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 17:01:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:01:43.409830  543705 memory.go:191] Add success.
I0320 17:01:43.409834  543705 cpu.go:282] Add success.
I0320 17:01:43.419959  543705 net.go:648] Add success.
I0320 17:01:43.423238  543705 net.go:770] primary dev: ETH0
I0320 17:01:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:01:43.423266  543705 net.go:698] Add success.
I0320 17:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:01:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:01:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:01:53.409800  543705 memory.go:184] no items to output this cycle
I0320 17:01:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 17:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:02:03.409777  543705 memory.go:184] no items to output this cycle
I0320 17:02:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 17:02:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:02:13.409806  543705 memory.go:191] Add success.
I0320 17:02:13.409814  543705 cpu.go:282] Add success.
W0320 17:02:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:02:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:02:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:02:13.419710  543705 net.go:648] Add success.
I0320 17:02:13.422685  543705 net.go:770] primary dev: ETH0
I0320 17:02:13.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:02:13.422713  543705 net.go:698] Add success.
W0320 17:02:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:02:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 17:02:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:02:14.456749  543705 disk_worker.go:494] system disk:vda1
I0320 17:02:14.456788  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:02:14.457184  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:02:14.457192  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:02:14.457196  543705 custom_config.go:64] query custom config with name: gpu
E0320 17:02:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:02:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:02:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:02:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:02:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:02:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:02:16.472338  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:02:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:02:23.409792  543705 memory.go:184] no items to output this cycle
I0320 17:02:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 17:02:25.289683  543705 disk_info.go:125] begin check local disk info of client
I0320 17:02:25.292245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:02:25.292253  543705 disk_info.go:196] parse disk info done, disk is : [0xc000587300 0xc000587340]
E0320 17:02:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:02:33.409775  543705 memory.go:184] no items to output this cycle
I0320 17:02:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 17:02:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:02:43.409792  543705 memory.go:191] Add success.
I0320 17:02:43.409815  543705 cpu.go:282] Add success.
I0320 17:02:43.419870  543705 net.go:648] Add success.
I0320 17:02:43.422711  543705 net.go:770] primary dev: ETH0
I0320 17:02:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:02:43.422743  543705 net.go:698] Add success.
I0320 17:02:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:02:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:02:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:02:53.410353  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:02:53.410366  543705 cpu.go:275] no items to output this cycle
I0320 17:02:53.410368  543705 memory.go:184] no items to output this cycle
E0320 17:03:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:03:03.409801  543705 memory.go:184] no items to output this cycle
I0320 17:03:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 17:03:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:03:13.409815  543705 memory.go:191] Add success.
I0320 17:03:13.409820  543705 cpu.go:282] Add success.
W0320 17:03:13.409978  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:03:13.410001  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:03:13.410006  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:03:13.419703  543705 net.go:648] Add success.
I0320 17:03:13.422848  543705 net.go:770] primary dev: ETH0
I0320 17:03:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:03:13.422878  543705 net.go:698] Add success.
I0320 17:03:13.464054  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5320d097-6613-4d08-99bb-dfa5b80eb94c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:03:13.464085  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:03:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:03:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 17:03:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:03:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 17:03:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:03:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:03:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:03:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:03:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:03:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:03:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:03:23.409762  543705 memory.go:184] no items to output this cycle
I0320 17:03:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 17:03:25.293670  543705 disk_info.go:125] begin check local disk info of client
I0320 17:03:25.296108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:03:25.296114  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cb340 0xc0004cb380]
E0320 17:03:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:03:33.409797  543705 memory.go:184] no items to output this cycle
I0320 17:03:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 17:03:38.559545  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:03:38.559552  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:03:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:03:43.410573  543705 memory.go:191] Add success.
I0320 17:03:43.409818  543705 cpu.go:282] Add success.
I0320 17:03:43.420326  543705 net.go:648] Add success.
I0320 17:03:43.423315  543705 net.go:770] primary dev: ETH0
I0320 17:03:43.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:03:43.423343  543705 net.go:698] Add success.
I0320 17:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:03:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:03:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:03:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:03:53.409795  543705 memory.go:184] no items to output this cycle
I0320 17:03:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 17:04:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:04:03.409786  543705 memory.go:184] no items to output this cycle
I0320 17:04:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:04:13.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:04:13.409897  543705 cpu.go:282] Add success.
I0320 17:04:13.409906  543705 memory.go:191] Add success.
W0320 17:04:13.409941  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:04:13.409960  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:04:13.409969  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:04:13.419725  543705 net.go:648] Add success.
I0320 17:04:13.422616  543705 net.go:770] primary dev: ETH0
I0320 17:04:13.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:04:13.422640  543705 net.go:698] Add success.
I0320 17:04:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:04:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:04:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 17:04:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:04:14.456536  543705 disk_worker.go:494] system disk:vda1
I0320 17:04:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:04:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:04:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:04:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:04:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:04:23.409792  543705 memory.go:184] no items to output this cycle
I0320 17:04:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 17:04:25.297674  543705 disk_info.go:125] begin check local disk info of client
I0320 17:04:25.300175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:04:25.300180  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab80 0xc00007abc0]
E0320 17:04:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:04:33.409776  543705 memory.go:184] no items to output this cycle
I0320 17:04:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 17:04:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:04:43.409800  543705 memory.go:191] Add success.
I0320 17:04:43.409801  543705 cpu.go:282] Add success.
I0320 17:04:43.420015  543705 net.go:648] Add success.
I0320 17:04:43.423086  543705 net.go:770] primary dev: ETH0
I0320 17:04:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:04:43.423113  543705 net.go:698] Add success.
I0320 17:04:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:04:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:04:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:04:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:04:53.409779  543705 memory.go:184] no items to output this cycle
I0320 17:04:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 17:05:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:05:03.409784  543705 memory.go:184] no items to output this cycle
I0320 17:05:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:05:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:05:13.409787  543705 memory.go:191] Add success.
I0320 17:05:13.409786  543705 cpu.go:282] Add success.
W0320 17:05:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:05:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:05:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:05:13.420254  543705 net.go:648] Add success.
I0320 17:05:13.423156  543705 net.go:770] primary dev: ETH0
I0320 17:05:13.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:05:13.423181  543705 net.go:698] Add success.
I0320 17:05:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:05:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:05:14.455142  543705 disk_worker.go:708] disk space is not compliant
W0320 17:05:14.455144  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:05:14.456476  543705 disk_worker.go:494] system disk:vda1
I0320 17:05:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:05:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:05:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:05:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:05:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:05:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:05:23.409779  543705 memory.go:184] no items to output this cycle
I0320 17:05:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 17:05:25.302968  543705 disk_info.go:125] begin check local disk info of client
I0320 17:05:25.305423  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:05:25.305441  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340840 0xc000340880]
E0320 17:05:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:05:33.409775  543705 memory.go:184] no items to output this cycle
I0320 17:05:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 17:05:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:05:43.409806  543705 memory.go:191] Add success.
I0320 17:05:43.409807  543705 cpu.go:282] Add success.
I0320 17:05:43.420011  543705 net.go:648] Add success.
I0320 17:05:43.422835  543705 net.go:770] primary dev: ETH0
I0320 17:05:43.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:05:43.422861  543705 net.go:698] Add success.
I0320 17:05:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:05:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:05:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:05:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:05:53.409790  543705 memory.go:184] no items to output this cycle
I0320 17:05:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 17:06:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:06:03.409778  543705 memory.go:184] no items to output this cycle
I0320 17:06:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 17:06:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:06:13.409783  543705 memory.go:191] Add success.
I0320 17:06:13.409800  543705 cpu.go:282] Add success.
W0320 17:06:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:06:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:06:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:06:13.420333  543705 net.go:648] Add success.
I0320 17:06:13.423625  543705 net.go:770] primary dev: ETH0
I0320 17:06:13.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:06:13.423649  543705 net.go:698] Add success.
I0320 17:06:13.470249  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f2661989-bc19-4e5c-ba4b-bfaa76caa3b1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:06:13.470280  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:06:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:06:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:06:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 17:06:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:06:14.456689  543705 disk_worker.go:494] system disk:vda1
I0320 17:06:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:06:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:06:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:06:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:06:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:06:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:06:23.409775  543705 memory.go:184] no items to output this cycle
I0320 17:06:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 17:06:25.305669  543705 disk_info.go:125] begin check local disk info of client
I0320 17:06:25.308139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:06:25.308145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8940 0xc0004a8980]
E0320 17:06:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:06:33.409784  543705 memory.go:184] no items to output this cycle
I0320 17:06:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 17:06:38.560564  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:06:38.560570  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:06:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:06:43.410644  543705 memory.go:191] Add success.
I0320 17:06:43.409832  543705 cpu.go:282] Add success.
I0320 17:06:43.420370  543705 net.go:648] Add success.
I0320 17:06:43.422875  543705 net.go:770] primary dev: ETH0
I0320 17:06:43.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:06:43.422905  543705 net.go:698] Add success.
I0320 17:06:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:06:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:06:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:06:53.409793  543705 memory.go:184] no items to output this cycle
I0320 17:06:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 17:07:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:07:03.409788  543705 memory.go:184] no items to output this cycle
I0320 17:07:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:07:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:07:13.409790  543705 memory.go:191] Add success.
I0320 17:07:13.409791  543705 cpu.go:282] Add success.
W0320 17:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:07:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:07:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:07:13.420070  543705 net.go:648] Add success.
I0320 17:07:13.423303  543705 net.go:770] primary dev: ETH0
I0320 17:07:13.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:07:13.423333  543705 net.go:698] Add success.
I0320 17:07:13.452838  543705 event_worker.go:152] Polling the log file for events...
W0320 17:07:14.456362  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:07:14.456375  543705 disk_worker.go:708] disk space is not compliant
W0320 17:07:14.456379  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:07:14.457107  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:07:14.457116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:07:14.457122  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:07:14.457996  543705 disk_worker.go:494] system disk:vda1
I0320 17:07:14.458022  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:07:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:07:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:07:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:07:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:07:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:07:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:07:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:07:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:07:23.409769  543705 memory.go:184] no items to output this cycle
I0320 17:07:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 17:07:25.309672  543705 disk_info.go:125] begin check local disk info of client
I0320 17:07:25.312101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:07:25.312107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbec0 0xc0001fbf00]
E0320 17:07:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:07:33.409782  543705 memory.go:184] no items to output this cycle
I0320 17:07:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:07:43.410023  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:07:43.410052  543705 memory.go:191] Add success.
I0320 17:07:43.410083  543705 cpu.go:282] Add success.
I0320 17:07:43.420337  543705 net.go:648] Add success.
I0320 17:07:43.423126  543705 net.go:770] primary dev: ETH0
I0320 17:07:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:07:43.423151  543705 net.go:698] Add success.
I0320 17:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:07:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:07:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:07:53.409766  543705 memory.go:184] no items to output this cycle
I0320 17:07:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 17:08:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:08:03.409786  543705 memory.go:184] no items to output this cycle
I0320 17:08:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:08:13.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:08:13.409815  543705 cpu.go:282] Add success.
I0320 17:08:13.409830  543705 memory.go:191] Add success.
W0320 17:08:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:08:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:08:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:08:13.420058  543705 net.go:648] Add success.
I0320 17:08:13.422833  543705 net.go:770] primary dev: ETH0
I0320 17:08:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:08:13.422877  543705 net.go:698] Add success.
I0320 17:08:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:08:14.455293  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:08:14.455391  543705 disk_worker.go:708] disk space is not compliant
W0320 17:08:14.455412  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:08:14.457016  543705 disk_worker.go:494] system disk:vda1
I0320 17:08:14.457046  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:08:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:08:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:08:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:08:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:08:23.409787  543705 memory.go:184] no items to output this cycle
I0320 17:08:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 17:08:25.313683  543705 disk_info.go:125] begin check local disk info of client
I0320 17:08:25.316124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:08:25.316130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b7c0 0xc00007b800]
E0320 17:08:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:08:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 17:08:33.409805  543705 memory.go:184] no items to output this cycle
E0320 17:08:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:08:43.409836  543705 memory.go:191] Add success.
I0320 17:08:43.409843  543705 cpu.go:282] Add success.
I0320 17:08:43.420008  543705 net.go:648] Add success.
I0320 17:08:43.422736  543705 net.go:770] primary dev: ETH0
I0320 17:08:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:08:43.422763  543705 net.go:698] Add success.
I0320 17:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:08:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:08:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:08:53.409789  543705 memory.go:184] no items to output this cycle
I0320 17:08:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:09:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:09:03.409791  543705 memory.go:184] no items to output this cycle
I0320 17:09:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:09:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:09:13.409802  543705 memory.go:191] Add success.
I0320 17:09:13.409804  543705 cpu.go:282] Add success.
W0320 17:09:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:09:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:09:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:09:13.420043  543705 net.go:648] Add success.
I0320 17:09:13.422912  543705 net.go:770] primary dev: ETH0
I0320 17:09:13.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:09:13.422937  543705 net.go:698] Add success.
I0320 17:09:13.472937  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0982ed41-24d2-4bb6-91a6-09e485776676","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:09:13.472972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:09:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:09:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:09:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 17:09:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:09:14.456806  543705 disk_worker.go:494] system disk:vda1
I0320 17:09:14.456847  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:09:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:09:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:09:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:09:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:09:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:09:23.409816  543705 memory.go:184] no items to output this cycle
I0320 17:09:23.409827  543705 cpu.go:275] no items to output this cycle
I0320 17:09:25.317675  543705 disk_info.go:125] begin check local disk info of client
I0320 17:09:25.320135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:09:25.320141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be200 0xc0002be240]
E0320 17:09:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:09:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 17:09:33.409796  543705 memory.go:184] no items to output this cycle
I0320 17:09:38.561566  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:09:38.561573  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:09:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:09:43.410641  543705 memory.go:191] Add success.
I0320 17:09:43.409848  543705 cpu.go:282] Add success.
I0320 17:09:43.420338  543705 net.go:648] Add success.
I0320 17:09:43.422869  543705 net.go:770] primary dev: ETH0
I0320 17:09:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:09:43.422894  543705 net.go:698] Add success.
I0320 17:09:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:09:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:09:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:09:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:09:53.409776  543705 memory.go:184] no items to output this cycle
I0320 17:09:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 17:10:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:10:03.409780  543705 memory.go:184] no items to output this cycle
I0320 17:10:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 17:10:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:10:13.409827  543705 memory.go:191] Add success.
I0320 17:10:13.409835  543705 cpu.go:282] Add success.
W0320 17:10:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:10:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:10:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:10:13.420151  543705 net.go:648] Add success.
I0320 17:10:13.422680  543705 net.go:770] primary dev: ETH0
I0320 17:10:13.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:10:13.422705  543705 net.go:698] Add success.
I0320 17:10:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:10:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:10:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 17:10:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:10:14.456474  543705 disk_worker.go:494] system disk:vda1
I0320 17:10:14.456503  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:10:15.454989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:10:16.458036  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:10:16.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:10:16.458119  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:10:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:10:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:10:23.409786  543705 memory.go:184] no items to output this cycle
I0320 17:10:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 17:10:25.321674  543705 disk_info.go:125] begin check local disk info of client
I0320 17:10:25.324136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:10:25.324142  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be780 0xc0002be7c0]
E0320 17:10:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:10:33.409784  543705 memory.go:184] no items to output this cycle
I0320 17:10:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 17:10:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:10:43.409809  543705 memory.go:191] Add success.
I0320 17:10:43.409827  543705 cpu.go:282] Add success.
I0320 17:10:43.419926  543705 net.go:648] Add success.
I0320 17:10:43.422652  543705 net.go:770] primary dev: ETH0
I0320 17:10:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:10:43.422679  543705 net.go:698] Add success.
I0320 17:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:10:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:10:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:10:53.409766  543705 memory.go:184] no items to output this cycle
I0320 17:10:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 17:11:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:11:03.409777  543705 memory.go:184] no items to output this cycle
I0320 17:11:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 17:11:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:11:13.409809  543705 memory.go:191] Add success.
I0320 17:11:13.409819  543705 cpu.go:282] Add success.
W0320 17:11:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:11:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:11:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:11:13.420114  543705 net.go:648] Add success.
I0320 17:11:13.422705  543705 net.go:770] primary dev: ETH0
I0320 17:11:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:11:13.422729  543705 net.go:698] Add success.
I0320 17:11:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:11:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:11:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 17:11:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:11:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 17:11:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:11:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:11:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:11:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:11:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:11:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:11:23.409771  543705 memory.go:184] no items to output this cycle
I0320 17:11:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 17:11:25.325674  543705 disk_info.go:125] begin check local disk info of client
I0320 17:11:25.328112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:11:25.328118  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472740 0xc000472780]
E0320 17:11:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:11:33.409785  543705 memory.go:184] no items to output this cycle
I0320 17:11:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 17:11:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:11:43.409798  543705 memory.go:191] Add success.
I0320 17:11:43.409816  543705 cpu.go:282] Add success.
I0320 17:11:43.420008  543705 net.go:648] Add success.
I0320 17:11:43.423247  543705 net.go:770] primary dev: ETH0
I0320 17:11:43.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:11:43.423274  543705 net.go:698] Add success.
I0320 17:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:11:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:11:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:11:53.409792  543705 memory.go:184] no items to output this cycle
I0320 17:11:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 17:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:12:03.409777  543705 memory.go:184] no items to output this cycle
I0320 17:12:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 17:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:12:13.409807  543705 memory.go:191] Add success.
I0320 17:12:13.409816  543705 cpu.go:282] Add success.
W0320 17:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:12:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:12:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:12:13.420269  543705 net.go:648] Add success.
I0320 17:12:13.423094  543705 net.go:770] primary dev: ETH0
I0320 17:12:13.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:12:13.423123  543705 net.go:698] Add success.
I0320 17:12:13.581250  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"78e03e9e-ea4b-421c-b300-6d87529a5637","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:12:13.581284  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 17:12:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:12:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0320 17:12:14.455244  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:12:14.456079  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:12:14.456088  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:12:14.456094  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:12:14.457047  543705 disk_worker.go:494] system disk:vda1
I0320 17:12:14.457087  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:12:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:12:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:12:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:12:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:12:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:12:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:12:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:12:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:12:23.409793  543705 memory.go:184] no items to output this cycle
I0320 17:12:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 17:12:25.329680  543705 disk_info.go:125] begin check local disk info of client
I0320 17:12:25.332111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:12:25.332117  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a000 0xc00048a040]
E0320 17:12:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:12:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 17:12:33.409798  543705 memory.go:184] no items to output this cycle
I0320 17:12:38.562570  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:12:38.562577  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:12:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:12:43.410818  543705 memory.go:191] Add success.
I0320 17:12:43.409834  543705 cpu.go:282] Add success.
I0320 17:12:43.420535  543705 net.go:648] Add success.
I0320 17:12:43.423613  543705 net.go:770] primary dev: ETH0
I0320 17:12:43.423626  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:12:43.423638  543705 net.go:698] Add success.
I0320 17:12:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:12:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:12:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:12:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:12:53.409774  543705 memory.go:184] no items to output this cycle
I0320 17:12:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 17:13:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:13:03.409778  543705 memory.go:184] no items to output this cycle
I0320 17:13:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 17:13:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:13:13.409785  543705 memory.go:191] Add success.
I0320 17:13:13.409804  543705 cpu.go:282] Add success.
W0320 17:13:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:13:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:13:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:13:13.420062  543705 net.go:648] Add success.
I0320 17:13:13.422488  543705 net.go:770] primary dev: ETH0
I0320 17:13:13.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:13:13.422520  543705 net.go:698] Add success.
I0320 17:13:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:13:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:13:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 17:13:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:13:14.456517  543705 disk_worker.go:494] system disk:vda1
I0320 17:13:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:13:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:13:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:13:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:13:23.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:13:23.409881  543705 cpu.go:275] no items to output this cycle
I0320 17:13:23.409930  543705 memory.go:184] no items to output this cycle
I0320 17:13:25.333671  543705 disk_info.go:125] begin check local disk info of client
I0320 17:13:25.336191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:13:25.336198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348000 0xc000348040]
E0320 17:13:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:13:33.409781  543705 memory.go:184] no items to output this cycle
I0320 17:13:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 17:13:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:13:43.409799  543705 memory.go:191] Add success.
I0320 17:13:43.409821  543705 cpu.go:282] Add success.
I0320 17:13:43.419981  543705 net.go:648] Add success.
I0320 17:13:43.422551  543705 net.go:770] primary dev: ETH0
I0320 17:13:43.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:13:43.422577  543705 net.go:698] Add success.
I0320 17:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:13:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:13:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:13:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:13:53.409774  543705 memory.go:184] no items to output this cycle
I0320 17:13:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:14:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:14:03.409795  543705 cpu.go:275] no items to output this cycle
I0320 17:14:03.409798  543705 memory.go:184] no items to output this cycle
E0320 17:14:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:14:13.409785  543705 memory.go:191] Add success.
W0320 17:14:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:14:13.409818  543705 cpu.go:282] Add success.
W0320 17:14:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:14:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:14:13.420093  543705 net.go:648] Add success.
I0320 17:14:13.423293  543705 net.go:770] primary dev: ETH0
I0320 17:14:13.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:14:13.423321  543705 net.go:698] Add success.
I0320 17:14:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:14:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:14:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 17:14:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:14:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 17:14:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:14:15.454995  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:14:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:14:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:14:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:14:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:14:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:14:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 17:14:23.409797  543705 memory.go:184] no items to output this cycle
I0320 17:14:25.337688  543705 disk_info.go:125] begin check local disk info of client
I0320 17:14:25.340021  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:14:25.340028  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007adc0 0xc00007ae00]
E0320 17:14:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:14:33.409819  543705 memory.go:184] no items to output this cycle
I0320 17:14:33.409831  543705 cpu.go:275] no items to output this cycle
E0320 17:14:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:14:43.409815  543705 memory.go:191] Add success.
I0320 17:14:43.409817  543705 cpu.go:282] Add success.
I0320 17:14:43.420011  543705 net.go:648] Add success.
I0320 17:14:43.422819  543705 net.go:770] primary dev: ETH0
I0320 17:14:43.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:14:43.422849  543705 net.go:698] Add success.
I0320 17:14:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:14:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:14:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:14:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:14:53.409771  543705 memory.go:184] no items to output this cycle
I0320 17:14:53.409776  543705 cpu.go:275] no items to output this cycle
E0320 17:15:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:15:03.409783  543705 memory.go:184] no items to output this cycle
I0320 17:15:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:15:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:15:13.409814  543705 memory.go:191] Add success.
I0320 17:15:13.409819  543705 cpu.go:282] Add success.
W0320 17:15:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:15:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:15:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:15:13.420098  543705 net.go:648] Add success.
I0320 17:15:13.423068  543705 net.go:770] primary dev: ETH0
I0320 17:15:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:15:13.423093  543705 net.go:698] Add success.
I0320 17:15:13.514102  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"61d1750f-c590-4aa4-9456-2a89cb21e1bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:15:13.514136  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:15:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:15:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:15:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0320 17:15:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:15:14.456499  543705 disk_worker.go:494] system disk:vda1
I0320 17:15:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:15:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:15:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:15:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:15:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:15:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:15:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:15:23.409772  543705 memory.go:184] no items to output this cycle
I0320 17:15:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 17:15:25.341678  543705 disk_info.go:125] begin check local disk info of client
I0320 17:15:25.344098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:15:25.344105  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0320 17:15:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:15:33.409783  543705 memory.go:184] no items to output this cycle
I0320 17:15:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 17:15:38.563568  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:15:38.563574  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:15:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:15:43.410644  543705 memory.go:191] Add success.
I0320 17:15:43.409803  543705 cpu.go:282] Add success.
I0320 17:15:43.420308  543705 net.go:648] Add success.
I0320 17:15:43.422971  543705 net.go:770] primary dev: ETH0
I0320 17:15:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:15:43.423004  543705 net.go:698] Add success.
I0320 17:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:15:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:15:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:15:53.409771  543705 memory.go:184] no items to output this cycle
I0320 17:15:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:16:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:16:03.409781  543705 memory.go:184] no items to output this cycle
I0320 17:16:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:16:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:16:13.409809  543705 memory.go:191] Add success.
I0320 17:16:13.409816  543705 cpu.go:282] Add success.
W0320 17:16:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:16:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:16:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:16:13.420328  543705 net.go:648] Add success.
I0320 17:16:13.423156  543705 net.go:770] primary dev: ETH0
I0320 17:16:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:16:13.423182  543705 net.go:698] Add success.
I0320 17:16:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:16:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:16:14.455264  543705 disk_worker.go:708] disk space is not compliant
W0320 17:16:14.455268  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:16:14.457509  543705 disk_worker.go:494] system disk:vda1
I0320 17:16:14.457537  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:16:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:16:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:16:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:16:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:16:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:16:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:16:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 17:16:23.409777  543705 memory.go:184] no items to output this cycle
I0320 17:16:25.345675  543705 disk_info.go:125] begin check local disk info of client
I0320 17:16:25.348200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:16:25.348207  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270d00 0xc000270d40]
E0320 17:16:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:16:33.409781  543705 memory.go:184] no items to output this cycle
I0320 17:16:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:16:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:16:43.409795  543705 memory.go:191] Add success.
I0320 17:16:43.409830  543705 cpu.go:282] Add success.
I0320 17:16:43.420010  543705 net.go:648] Add success.
I0320 17:16:43.423233  543705 net.go:770] primary dev: ETH0
I0320 17:16:43.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:16:43.423263  543705 net.go:698] Add success.
I0320 17:16:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:16:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:16:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:16:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:16:53.409766  543705 memory.go:184] no items to output this cycle
I0320 17:16:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 17:17:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:17:03.409775  543705 memory.go:184] no items to output this cycle
I0320 17:17:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 17:17:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:17:13.409806  543705 memory.go:191] Add success.
I0320 17:17:13.409814  543705 cpu.go:282] Add success.
W0320 17:17:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:17:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:17:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:17:13.420442  543705 net.go:648] Add success.
I0320 17:17:13.423553  543705 net.go:770] primary dev: ETH0
I0320 17:17:13.423568  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:17:13.423581  543705 net.go:698] Add success.
I0320 17:17:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0320 17:17:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:17:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 17:17:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:17:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:17:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:17:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:17:14.456989  543705 disk_worker.go:494] system disk:vda1
I0320 17:17:14.457042  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:17:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:17:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:17:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:17:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:17:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:17:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:17:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:17:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:17:23.409789  543705 memory.go:184] no items to output this cycle
I0320 17:17:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 17:17:25.349674  543705 disk_info.go:125] begin check local disk info of client
I0320 17:17:25.352148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:17:25.352155  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a940 0xc00007aa40]
E0320 17:17:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:17:33.409760  543705 memory.go:184] no items to output this cycle
I0320 17:17:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 17:17:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:17:43.409839  543705 memory.go:191] Add success.
I0320 17:17:43.409841  543705 cpu.go:282] Add success.
I0320 17:17:43.420241  543705 net.go:648] Add success.
I0320 17:17:43.421212  543705 net.go:770] primary dev: ETH0
I0320 17:17:43.421225  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:17:43.421238  543705 net.go:698] Add success.
I0320 17:17:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:17:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:17:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:17:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:17:53.409798  543705 memory.go:184] no items to output this cycle
I0320 17:17:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 17:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:18:03.409778  543705 memory.go:184] no items to output this cycle
I0320 17:18:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 17:18:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:18:13.409783  543705 memory.go:191] Add success.
I0320 17:18:13.409808  543705 cpu.go:282] Add success.
W0320 17:18:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:18:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:18:13.419728  543705 net.go:648] Add success.
I0320 17:18:13.422355  543705 net.go:770] primary dev: ETH0
I0320 17:18:13.422367  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:18:13.422379  543705 net.go:698] Add success.
I0320 17:18:13.468591  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00fa8571-f89c-4653-af1e-317b08fa5189","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:18:13.468622  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:18:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:18:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:18:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 17:18:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:18:14.456630  543705 disk_worker.go:494] system disk:vda1
I0320 17:18:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:18:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:18:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:18:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:18:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:18:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:18:23.409808  543705 memory.go:184] no items to output this cycle
I0320 17:18:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 17:18:25.353671  543705 disk_info.go:125] begin check local disk info of client
I0320 17:18:25.356130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:18:25.356136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5380 0xc0000c53c0]
E0320 17:18:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:18:33.409786  543705 memory.go:184] no items to output this cycle
I0320 17:18:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 17:18:38.564564  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:18:38.564571  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:18:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:18:43.409825  543705 cpu.go:282] Add success.
I0320 17:18:43.409839  543705 memory.go:191] Add success.
I0320 17:18:43.420297  543705 net.go:648] Add success.
I0320 17:18:43.421489  543705 net.go:770] primary dev: ETH0
I0320 17:18:43.421504  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:18:43.421518  543705 net.go:698] Add success.
I0320 17:18:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:18:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:18:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:18:53.410749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:18:53.410766  543705 memory.go:184] no items to output this cycle
I0320 17:18:53.410772  543705 cpu.go:275] no items to output this cycle
E0320 17:19:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:19:03.409778  543705 memory.go:184] no items to output this cycle
I0320 17:19:03.409804  543705 cpu.go:275] no items to output this cycle
I0320 17:19:13.409898  543705 cpu.go:282] Add success.
E0320 17:19:13.410036  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:19:13.410054  543705 memory.go:191] Add success.
W0320 17:19:13.410081  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:19:13.410094  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:19:13.410097  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:19:13.419742  543705 net.go:648] Add success.
I0320 17:19:13.422363  543705 net.go:770] primary dev: ETH0
I0320 17:19:13.422377  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:19:13.422390  543705 net.go:698] Add success.
I0320 17:19:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:19:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:19:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 17:19:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:19:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 17:19:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:19:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:19:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:19:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:19:23.409768  543705 memory.go:184] no items to output this cycle
I0320 17:19:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 17:19:25.357676  543705 disk_info.go:125] begin check local disk info of client
I0320 17:19:25.360171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:19:25.360178  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c540 0xc00034c580]
E0320 17:19:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:19:33.409778  543705 memory.go:184] no items to output this cycle
I0320 17:19:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 17:19:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:19:43.409789  543705 memory.go:191] Add success.
I0320 17:19:43.409850  543705 cpu.go:282] Add success.
I0320 17:19:43.420412  543705 net.go:648] Add success.
I0320 17:19:43.421366  543705 net.go:770] primary dev: ETH0
I0320 17:19:43.421384  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:19:43.421404  543705 net.go:698] Add success.
I0320 17:19:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:19:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:19:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:19:53.409800  543705 memory.go:184] no items to output this cycle
I0320 17:19:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 17:20:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:20:03.409781  543705 memory.go:184] no items to output this cycle
I0320 17:20:03.409891  543705 cpu.go:275] no items to output this cycle
E0320 17:20:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:20:13.409785  543705 memory.go:191] Add success.
I0320 17:20:13.409802  543705 cpu.go:282] Add success.
W0320 17:20:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:20:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:20:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:20:13.420370  543705 net.go:648] Add success.
I0320 17:20:13.422947  543705 net.go:770] primary dev: ETH0
I0320 17:20:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:20:13.422972  543705 net.go:698] Add success.
I0320 17:20:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:20:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:20:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 17:20:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:20:14.456508  543705 disk_worker.go:494] system disk:vda1
I0320 17:20:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:20:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:20:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:20:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:20:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:20:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:20:23.409800  543705 memory.go:184] no items to output this cycle
I0320 17:20:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 17:20:25.361691  543705 disk_info.go:125] begin check local disk info of client
I0320 17:20:25.364164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:20:25.364171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4940 0xc0000c4980]
E0320 17:20:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:20:33.409782  543705 memory.go:184] no items to output this cycle
I0320 17:20:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 17:20:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:20:43.409818  543705 memory.go:191] Add success.
I0320 17:20:43.409857  543705 cpu.go:282] Add success.
I0320 17:20:43.420033  543705 net.go:648] Add success.
I0320 17:20:43.422826  543705 net.go:770] primary dev: ETH0
I0320 17:20:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:20:43.422855  543705 net.go:698] Add success.
I0320 17:20:46.457671  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:20:46.457744  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:20:46.457770  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:20:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:20:53.409787  543705 cpu.go:275] no items to output this cycle
I0320 17:20:53.409793  543705 memory.go:184] no items to output this cycle
E0320 17:21:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:21:03.409802  543705 memory.go:184] no items to output this cycle
I0320 17:21:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 17:21:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:21:13.409795  543705 memory.go:191] Add success.
I0320 17:21:13.409796  543705 cpu.go:282] Add success.
W0320 17:21:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:21:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:21:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:21:13.420212  543705 net.go:648] Add success.
I0320 17:21:13.422923  543705 net.go:770] primary dev: ETH0
I0320 17:21:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:21:13.422947  543705 net.go:698] Add success.
I0320 17:21:13.469756  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4198d147-2a8d-4174-99af-b8108b485af4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:21:13.469789  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:21:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:21:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:21:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 17:21:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:21:14.456659  543705 disk_worker.go:494] system disk:vda1
I0320 17:21:14.456689  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:21:15.455604  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:21:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:21:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:21:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:21:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:21:23.409766  543705 memory.go:184] no items to output this cycle
I0320 17:21:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 17:21:25.365676  543705 disk_info.go:125] begin check local disk info of client
I0320 17:21:25.368146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:21:25.368151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb6c0 0xc0001fb700]
E0320 17:21:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:21:33.409783  543705 memory.go:184] no items to output this cycle
I0320 17:21:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 17:21:38.565574  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:21:38.565582  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:21:43.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:21:43.409815  543705 cpu.go:282] Add success.
I0320 17:21:43.409837  543705 memory.go:191] Add success.
I0320 17:21:43.420219  543705 net.go:648] Add success.
I0320 17:21:43.421103  543705 net.go:770] primary dev: ETH0
I0320 17:21:43.421118  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:21:43.421131  543705 net.go:698] Add success.
I0320 17:21:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:21:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:21:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:21:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:21:53.409775  543705 memory.go:184] no items to output this cycle
I0320 17:21:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 17:22:03.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:22:03.409917  543705 cpu.go:275] no items to output this cycle
I0320 17:22:03.409922  543705 memory.go:184] no items to output this cycle
E0320 17:22:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:22:13.409834  543705 memory.go:191] Add success.
I0320 17:22:13.409838  543705 cpu.go:282] Add success.
W0320 17:22:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:22:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:22:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:22:13.420133  543705 net.go:648] Add success.
I0320 17:22:13.422992  543705 net.go:770] primary dev: ETH0
I0320 17:22:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:22:13.423018  543705 net.go:698] Add success.
W0320 17:22:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:22:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0320 17:22:14.455222  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:22:14.456124  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:22:14.456135  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:22:14.456141  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:22:14.456687  543705 disk_worker.go:494] system disk:vda1
I0320 17:22:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:22:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:22:15.456885  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:22:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:22:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:22:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:22:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:22:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:22:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:22:23.409800  543705 memory.go:184] no items to output this cycle
I0320 17:22:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 17:22:25.369671  543705 disk_info.go:125] begin check local disk info of client
I0320 17:22:25.372144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:22:25.372150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb40 0xc0001abb80]
E0320 17:22:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:22:33.409782  543705 memory.go:184] no items to output this cycle
I0320 17:22:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:22:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:22:43.409802  543705 memory.go:191] Add success.
I0320 17:22:43.409843  543705 cpu.go:282] Add success.
I0320 17:22:43.420569  543705 net.go:648] Add success.
I0320 17:22:43.423314  543705 net.go:770] primary dev: ETH0
I0320 17:22:43.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:22:43.423354  543705 net.go:698] Add success.
I0320 17:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:22:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:22:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:22:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:22:53.409779  543705 memory.go:184] no items to output this cycle
I0320 17:22:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 17:23:03.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:23:03.409874  543705 memory.go:184] no items to output this cycle
I0320 17:23:03.409950  543705 cpu.go:275] no items to output this cycle
E0320 17:23:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:23:13.409780  543705 memory.go:191] Add success.
W0320 17:23:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:23:13.409812  543705 cpu.go:282] Add success.
W0320 17:23:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:23:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:23:13.420241  543705 net.go:648] Add success.
I0320 17:23:13.423072  543705 net.go:770] primary dev: ETH0
I0320 17:23:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:23:13.423096  543705 net.go:698] Add success.
I0320 17:23:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:23:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:23:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 17:23:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:23:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 17:23:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:23:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:23:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:23:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:23:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:23:23.409782  543705 memory.go:184] no items to output this cycle
I0320 17:23:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 17:23:25.373672  543705 disk_info.go:125] begin check local disk info of client
I0320 17:23:25.376236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:23:25.376242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470140 0xc000470180]
E0320 17:23:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:23:33.409773  543705 memory.go:184] no items to output this cycle
I0320 17:23:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 17:23:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:23:43.409826  543705 memory.go:191] Add success.
I0320 17:23:43.409836  543705 cpu.go:282] Add success.
I0320 17:23:43.420138  543705 net.go:648] Add success.
I0320 17:23:43.422886  543705 net.go:770] primary dev: ETH0
I0320 17:23:43.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:23:43.422916  543705 net.go:698] Add success.
I0320 17:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:23:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:23:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:23:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:23:53.409800  543705 memory.go:184] no items to output this cycle
I0320 17:23:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 17:24:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:24:03.409798  543705 memory.go:184] no items to output this cycle
I0320 17:24:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 17:24:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:24:13.409817  543705 memory.go:191] Add success.
I0320 17:24:13.409824  543705 cpu.go:282] Add success.
W0320 17:24:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:24:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:24:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:24:13.420138  543705 net.go:648] Add success.
I0320 17:24:13.422757  543705 net.go:770] primary dev: ETH0
I0320 17:24:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:24:13.422783  543705 net.go:698] Add success.
I0320 17:24:13.464002  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"18f74db0-96a8-47a3-bdba-27190c3a7d1f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:24:13.464034  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:24:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:24:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:24:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 17:24:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:24:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 17:24:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:24:15.455618  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:24:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:24:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:24:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:24:23.410407  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:24:23.410425  543705 memory.go:184] no items to output this cycle
I0320 17:24:23.410438  543705 cpu.go:275] no items to output this cycle
I0320 17:24:25.377671  543705 disk_info.go:125] begin check local disk info of client
I0320 17:24:25.380130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:24:25.380136  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492ec0 0xc000492f00]
E0320 17:24:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:24:33.409791  543705 memory.go:184] no items to output this cycle
I0320 17:24:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 17:24:38.566585  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:24:38.566592  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:24:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:24:43.410664  543705 memory.go:191] Add success.
I0320 17:24:43.409815  543705 cpu.go:282] Add success.
I0320 17:24:43.420495  543705 net.go:648] Add success.
I0320 17:24:43.423189  543705 net.go:770] primary dev: ETH0
I0320 17:24:43.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:24:43.423228  543705 net.go:698] Add success.
I0320 17:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:24:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:24:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:24:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:24:53.409776  543705 cpu.go:275] no items to output this cycle
I0320 17:24:53.409788  543705 memory.go:184] no items to output this cycle
E0320 17:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:25:03.409800  543705 memory.go:184] no items to output this cycle
I0320 17:25:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 17:25:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:25:13.409786  543705 memory.go:191] Add success.
I0320 17:25:13.409794  543705 cpu.go:282] Add success.
W0320 17:25:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:25:13.412650  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:25:13.412654  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:25:13.420292  543705 net.go:648] Add success.
I0320 17:25:13.422002  543705 net.go:770] primary dev: ETH0
I0320 17:25:13.422016  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:25:13.422028  543705 net.go:698] Add success.
I0320 17:25:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:25:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:25:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 17:25:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:25:14.456492  543705 disk_worker.go:494] system disk:vda1
I0320 17:25:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:25:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:25:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:25:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:25:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:25:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:25:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:25:23.409807  543705 memory.go:184] no items to output this cycle
I0320 17:25:23.409819  543705 cpu.go:275] no items to output this cycle
I0320 17:25:25.381671  543705 disk_info.go:125] begin check local disk info of client
I0320 17:25:25.384168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:25:25.384174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bd00 0xc00007bd80]
E0320 17:25:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:25:33.409780  543705 memory.go:184] no items to output this cycle
I0320 17:25:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 17:25:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:25:43.409787  543705 memory.go:191] Add success.
I0320 17:25:43.409788  543705 cpu.go:282] Add success.
I0320 17:25:43.419873  543705 net.go:648] Add success.
I0320 17:25:43.422890  543705 net.go:770] primary dev: ETH0
I0320 17:25:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:25:43.422915  543705 net.go:698] Add success.
I0320 17:25:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:25:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:25:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:25:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:25:53.409781  543705 memory.go:184] no items to output this cycle
I0320 17:25:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 17:26:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:26:03.409804  543705 memory.go:184] no items to output this cycle
I0320 17:26:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 17:26:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:26:13.409828  543705 memory.go:191] Add success.
I0320 17:26:13.409836  543705 cpu.go:282] Add success.
W0320 17:26:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:26:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:26:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:26:13.420176  543705 net.go:648] Add success.
I0320 17:26:13.422841  543705 net.go:770] primary dev: ETH0
I0320 17:26:13.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:26:13.422865  543705 net.go:698] Add success.
I0320 17:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:26:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:26:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0320 17:26:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:26:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 17:26:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:26:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:26:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:26:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:26:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:26:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 17:26:23.409789  543705 memory.go:184] no items to output this cycle
I0320 17:26:25.385672  543705 disk_info.go:125] begin check local disk info of client
I0320 17:26:25.387891  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:26:25.387897  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa840 0xc0001aa880]
E0320 17:26:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:26:33.409815  543705 memory.go:184] no items to output this cycle
I0320 17:26:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 17:26:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:26:43.409795  543705 memory.go:191] Add success.
I0320 17:26:43.409824  543705 cpu.go:282] Add success.
I0320 17:26:43.419977  543705 net.go:648] Add success.
I0320 17:26:43.422476  543705 net.go:770] primary dev: ETH0
I0320 17:26:43.422491  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:26:43.422506  543705 net.go:698] Add success.
I0320 17:26:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:26:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:26:53.409800  543705 memory.go:184] no items to output this cycle
I0320 17:26:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 17:27:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:27:03.409790  543705 memory.go:184] no items to output this cycle
I0320 17:27:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 17:27:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:27:13.409821  543705 cpu.go:282] Add success.
I0320 17:27:13.409829  543705 memory.go:191] Add success.
W0320 17:27:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:27:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:27:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:27:13.420138  543705 net.go:648] Add success.
I0320 17:27:13.429191  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 17:27:13.429268  543705 net.go:770] primary dev: ETH0
I0320 17:27:13.429282  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:27:13.429296  543705 net.go:698] Add success.
I0320 17:27:13.452770  543705 event_worker.go:152] Polling the log file for events...
I0320 17:27:13.467633  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c2a9952-ca3f-4a1b-851b-8a5b200ef079","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:27:13.467668  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 17:27:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:27:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 17:27:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:27:14.455942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:27:14.455951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:27:14.455957  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:27:14.456558  543705 disk_worker.go:494] system disk:vda1
I0320 17:27:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:27:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:27:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:27:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:27:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:27:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:27:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:27:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:27:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:27:23.409788  543705 memory.go:184] no items to output this cycle
I0320 17:27:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 17:27:25.392063  543705 disk_info.go:125] begin check local disk info of client
I0320 17:27:25.394547  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:27:25.394553  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa3c0 0xc0001aa400]
E0320 17:27:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:27:33.409782  543705 memory.go:184] no items to output this cycle
I0320 17:27:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 17:27:38.567587  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:27:38.567594  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:27:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:27:43.410823  543705 memory.go:191] Add success.
I0320 17:27:43.409813  543705 cpu.go:282] Add success.
I0320 17:27:43.420550  543705 net.go:648] Add success.
I0320 17:27:43.423205  543705 net.go:770] primary dev: ETH0
I0320 17:27:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:27:43.423230  543705 net.go:698] Add success.
I0320 17:27:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:27:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:27:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:27:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:27:53.409802  543705 memory.go:184] no items to output this cycle
I0320 17:27:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 17:28:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:28:03.409789  543705 memory.go:184] no items to output this cycle
I0320 17:28:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:28:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:28:13.409794  543705 memory.go:191] Add success.
I0320 17:28:13.409797  543705 cpu.go:282] Add success.
W0320 17:28:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:28:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:28:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:28:13.420095  543705 net.go:648] Add success.
I0320 17:28:13.422962  543705 net.go:770] primary dev: ETH0
I0320 17:28:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:28:13.422991  543705 net.go:698] Add success.
I0320 17:28:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:28:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:28:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 17:28:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:28:14.456779  543705 disk_worker.go:494] system disk:vda1
I0320 17:28:14.456808  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:28:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:28:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:28:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:28:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:28:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:28:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:28:23.409778  543705 memory.go:184] no items to output this cycle
I0320 17:28:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 17:28:25.397674  543705 disk_info.go:125] begin check local disk info of client
I0320 17:28:25.400098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:28:25.400105  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0740 0xc0003c0780]
E0320 17:28:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:28:33.409768  543705 memory.go:184] no items to output this cycle
I0320 17:28:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 17:28:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:28:43.409781  543705 memory.go:191] Add success.
I0320 17:28:43.409814  543705 cpu.go:282] Add success.
I0320 17:28:43.419985  543705 net.go:648] Add success.
I0320 17:28:43.422547  543705 net.go:770] primary dev: ETH0
I0320 17:28:43.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:28:43.422578  543705 net.go:698] Add success.
I0320 17:28:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:28:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:28:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:28:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:28:53.409798  543705 memory.go:184] no items to output this cycle
I0320 17:28:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 17:29:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:29:03.409807  543705 memory.go:184] no items to output this cycle
I0320 17:29:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 17:29:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:29:13.409800  543705 cpu.go:282] Add success.
I0320 17:29:13.409803  543705 memory.go:191] Add success.
W0320 17:29:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:29:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:29:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:29:13.420291  543705 net.go:648] Add success.
I0320 17:29:13.423188  543705 net.go:770] primary dev: ETH0
I0320 17:29:13.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:29:13.423217  543705 net.go:698] Add success.
I0320 17:29:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:29:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:29:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 17:29:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:29:14.456529  543705 disk_worker.go:494] system disk:vda1
I0320 17:29:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:29:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:29:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:29:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:29:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:29:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:29:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:29:23.409792  543705 memory.go:184] no items to output this cycle
I0320 17:29:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 17:29:25.402118  543705 disk_info.go:125] begin check local disk info of client
I0320 17:29:25.404592  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:29:25.404599  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c40 0xc0000c4c80]
E0320 17:29:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:29:33.409774  543705 memory.go:184] no items to output this cycle
I0320 17:29:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 17:29:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:29:43.409779  543705 memory.go:191] Add success.
I0320 17:29:43.409802  543705 cpu.go:282] Add success.
I0320 17:29:43.419853  543705 net.go:648] Add success.
I0320 17:29:43.422410  543705 net.go:770] primary dev: ETH0
I0320 17:29:43.422422  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:29:43.422435  543705 net.go:698] Add success.
I0320 17:29:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:29:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:29:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:29:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:29:53.409765  543705 memory.go:184] no items to output this cycle
I0320 17:29:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 17:30:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:30:03.409893  543705 memory.go:184] no items to output this cycle
I0320 17:30:03.409950  543705 cpu.go:275] no items to output this cycle
E0320 17:30:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:30:13.409802  543705 memory.go:191] Add success.
I0320 17:30:13.409806  543705 cpu.go:282] Add success.
W0320 17:30:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:30:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:30:13.420071  543705 net.go:648] Add success.
I0320 17:30:13.423217  543705 net.go:770] primary dev: ETH0
I0320 17:30:13.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:30:13.423242  543705 net.go:698] Add success.
I0320 17:30:13.467639  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3a66bbac-9a0d-49ec-bc5d-811df56d0430","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:30:13.467673  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:30:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:30:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:30:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 17:30:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:30:14.456527  543705 disk_worker.go:494] system disk:vda1
I0320 17:30:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:30:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:30:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:30:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:30:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:30:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:30:23.409779  543705 memory.go:184] no items to output this cycle
I0320 17:30:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 17:30:25.405670  543705 disk_info.go:125] begin check local disk info of client
I0320 17:30:25.408151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:30:25.408157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471640 0xc000471680]
E0320 17:30:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:30:33.409767  543705 memory.go:184] no items to output this cycle
I0320 17:30:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 17:30:38.568602  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:30:38.568608  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:30:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:30:43.410756  543705 memory.go:191] Add success.
I0320 17:30:43.409817  543705 cpu.go:282] Add success.
I0320 17:30:43.420464  543705 net.go:648] Add success.
I0320 17:30:43.423664  543705 net.go:770] primary dev: ETH0
I0320 17:30:43.423680  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:30:43.423694  543705 net.go:698] Add success.
I0320 17:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:30:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:30:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:30:53.409785  543705 memory.go:184] no items to output this cycle
I0320 17:30:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 17:31:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:31:03.409807  543705 memory.go:184] no items to output this cycle
I0320 17:31:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 17:31:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:31:13.409810  543705 memory.go:191] Add success.
I0320 17:31:13.409818  543705 cpu.go:282] Add success.
W0320 17:31:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:31:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:31:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:31:13.420143  543705 net.go:648] Add success.
I0320 17:31:13.422765  543705 net.go:770] primary dev: ETH0
I0320 17:31:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:31:13.422793  543705 net.go:698] Add success.
I0320 17:31:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:31:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:31:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 17:31:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:31:14.456497  543705 disk_worker.go:494] system disk:vda1
I0320 17:31:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:31:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:31:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:31:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:31:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:31:23.409763  543705 memory.go:184] no items to output this cycle
I0320 17:31:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 17:31:25.409674  543705 disk_info.go:125] begin check local disk info of client
I0320 17:31:25.412140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:31:25.412147  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b400 0xc00048b440]
E0320 17:31:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:31:33.409768  543705 memory.go:184] no items to output this cycle
I0320 17:31:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 17:31:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:31:43.409787  543705 memory.go:191] Add success.
I0320 17:31:43.409801  543705 cpu.go:282] Add success.
I0320 17:31:43.420036  543705 net.go:648] Add success.
I0320 17:31:43.422680  543705 net.go:770] primary dev: ETH0
I0320 17:31:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:31:43.422709  543705 net.go:698] Add success.
I0320 17:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:31:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:31:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:31:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:31:53.409769  543705 memory.go:184] no items to output this cycle
I0320 17:31:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 17:32:03.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:32:03.409904  543705 cpu.go:275] no items to output this cycle
I0320 17:32:03.409916  543705 memory.go:184] no items to output this cycle
E0320 17:32:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:32:13.409797  543705 cpu.go:282] Add success.
I0320 17:32:13.409818  543705 memory.go:191] Add success.
W0320 17:32:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:32:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:32:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:32:13.420163  543705 net.go:648] Add success.
I0320 17:32:13.423378  543705 net.go:770] primary dev: ETH0
I0320 17:32:13.423391  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:32:13.423402  543705 net.go:698] Add success.
W0320 17:32:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:32:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 17:32:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:32:14.456950  543705 disk_worker.go:494] system disk:vda1
I0320 17:32:14.457005  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:32:14.457012  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:32:14.457019  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:32:14.457024  543705 custom_config.go:64] query custom config with name: gpu
E0320 17:32:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:32:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:32:16.457896  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:32:16.457896  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:32:16.457949  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:32:16.457969  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:32:16.472318  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:32:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:32:23.409757  543705 memory.go:184] no items to output this cycle
I0320 17:32:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 17:32:25.412785  543705 disk_info.go:125] begin check local disk info of client
I0320 17:32:25.415232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:32:25.415238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faf00 0xc0001faf40]
E0320 17:32:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:32:33.409778  543705 memory.go:184] no items to output this cycle
I0320 17:32:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 17:32:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:32:43.409811  543705 memory.go:191] Add success.
I0320 17:32:43.409821  543705 cpu.go:282] Add success.
I0320 17:32:43.419922  543705 net.go:648] Add success.
I0320 17:32:43.422413  543705 net.go:770] primary dev: ETH0
I0320 17:32:43.422426  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:32:43.422439  543705 net.go:698] Add success.
I0320 17:32:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:32:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:32:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:32:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:32:53.409785  543705 memory.go:184] no items to output this cycle
I0320 17:32:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 17:33:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:33:03.409774  543705 memory.go:184] no items to output this cycle
I0320 17:33:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 17:33:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:33:13.409800  543705 memory.go:191] Add success.
I0320 17:33:13.409802  543705 cpu.go:282] Add success.
W0320 17:33:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:33:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:33:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:33:13.420156  543705 net.go:648] Add success.
I0320 17:33:13.423095  543705 net.go:770] primary dev: ETH0
I0320 17:33:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:33:13.423123  543705 net.go:698] Add success.
I0320 17:33:13.660878  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bcdec7f9-b8ae-43fd-b369-129a4b90edd5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:33:13.660917  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:33:14.454728  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:33:14.454906  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:33:14.454917  543705 disk_worker.go:708] disk space is not compliant
W0320 17:33:14.454919  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:33:14.456289  543705 disk_worker.go:494] system disk:vda1
I0320 17:33:14.456334  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:33:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:33:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:33:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:33:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:33:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:33:23.409797  543705 memory.go:184] no items to output this cycle
I0320 17:33:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 17:33:25.415783  543705 disk_info.go:125] begin check local disk info of client
I0320 17:33:25.418249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:33:25.418254  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b140 0xc00027b180]
E0320 17:33:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:33:33.409770  543705 memory.go:184] no items to output this cycle
I0320 17:33:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 17:33:38.569597  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:33:38.569604  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:33:43.409784  543705 memory.go:191] Add success.
I0320 17:33:43.409791  543705 cpu.go:282] Add success.
I0320 17:33:43.419955  543705 net.go:648] Add success.
I0320 17:33:43.420948  543705 net.go:770] primary dev: ETH0
I0320 17:33:43.420964  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:33:43.420977  543705 net.go:698] Add success.
I0320 17:33:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:33:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:33:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:33:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:33:53.409905  543705 memory.go:184] no items to output this cycle
I0320 17:33:53.409941  543705 cpu.go:275] no items to output this cycle
E0320 17:34:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:34:03.409784  543705 memory.go:184] no items to output this cycle
I0320 17:34:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 17:34:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:34:13.409781  543705 memory.go:191] Add success.
I0320 17:34:13.409806  543705 cpu.go:282] Add success.
W0320 17:34:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:34:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:34:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:34:13.420182  543705 net.go:648] Add success.
I0320 17:34:13.423023  543705 net.go:770] primary dev: ETH0
I0320 17:34:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:34:13.423051  543705 net.go:698] Add success.
I0320 17:34:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:34:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:34:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 17:34:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:34:14.456521  543705 disk_worker.go:494] system disk:vda1
I0320 17:34:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:34:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:34:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:34:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:34:23.409793  543705 memory.go:184] no items to output this cycle
I0320 17:34:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 17:34:25.418785  543705 disk_info.go:125] begin check local disk info of client
I0320 17:34:25.421231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:34:25.421237  543705 disk_info.go:196] parse disk info done, disk is : [0xc000261a40 0xc000261a80]
E0320 17:34:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:34:33.409801  543705 memory.go:184] no items to output this cycle
I0320 17:34:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 17:34:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:34:43.409788  543705 memory.go:191] Add success.
I0320 17:34:43.409789  543705 cpu.go:282] Add success.
I0320 17:34:43.420019  543705 net.go:648] Add success.
I0320 17:34:43.422918  543705 net.go:770] primary dev: ETH0
I0320 17:34:43.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:34:43.422945  543705 net.go:698] Add success.
I0320 17:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:34:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:34:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:34:53.409771  543705 memory.go:184] no items to output this cycle
I0320 17:34:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:35:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:35:03.409791  543705 memory.go:184] no items to output this cycle
I0320 17:35:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 17:35:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:35:13.409817  543705 memory.go:191] Add success.
I0320 17:35:13.409824  543705 cpu.go:282] Add success.
W0320 17:35:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:35:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:35:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:35:13.420197  543705 net.go:648] Add success.
I0320 17:35:13.422984  543705 net.go:770] primary dev: ETH0
I0320 17:35:13.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:35:13.423009  543705 net.go:698] Add success.
I0320 17:35:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:35:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:35:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 17:35:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:35:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 17:35:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:35:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:35:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:35:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:35:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:35:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:35:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:35:23.409799  543705 memory.go:184] no items to output this cycle
I0320 17:35:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 17:35:25.421809  543705 disk_info.go:125] begin check local disk info of client
I0320 17:35:25.424249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:35:25.424255  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0320 17:35:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:35:33.409794  543705 memory.go:184] no items to output this cycle
I0320 17:35:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 17:35:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:35:43.409786  543705 memory.go:191] Add success.
I0320 17:35:43.409789  543705 cpu.go:282] Add success.
I0320 17:35:43.420001  543705 net.go:648] Add success.
I0320 17:35:43.422620  543705 net.go:770] primary dev: ETH0
I0320 17:35:43.422633  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:35:43.422645  543705 net.go:698] Add success.
I0320 17:35:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:35:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:35:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:35:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:35:53.409783  543705 memory.go:184] no items to output this cycle
I0320 17:35:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 17:36:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:36:03.409899  543705 memory.go:184] no items to output this cycle
I0320 17:36:03.410038  543705 cpu.go:275] no items to output this cycle
E0320 17:36:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:36:13.409815  543705 memory.go:191] Add success.
I0320 17:36:13.409829  543705 cpu.go:282] Add success.
W0320 17:36:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:36:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:36:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:36:13.420175  543705 net.go:648] Add success.
I0320 17:36:13.423037  543705 net.go:770] primary dev: ETH0
I0320 17:36:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:36:13.423062  543705 net.go:698] Add success.
I0320 17:36:13.469655  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a25599f-4839-40b3-a45f-1fbfd7ea8cd3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:36:13.469691  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:36:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:36:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:36:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 17:36:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:36:14.456680  543705 disk_worker.go:494] system disk:vda1
I0320 17:36:14.456720  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:36:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:36:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:36:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:36:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:36:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:36:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:36:23.409765  543705 memory.go:184] no items to output this cycle
I0320 17:36:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 17:36:25.424807  543705 disk_info.go:125] begin check local disk info of client
I0320 17:36:25.427274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:36:25.427280  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370480 0xc0003704c0]
E0320 17:36:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:36:33.409784  543705 memory.go:184] no items to output this cycle
I0320 17:36:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 17:36:38.570604  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:36:38.570610  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:36:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:36:43.410804  543705 memory.go:191] Add success.
I0320 17:36:43.409801  543705 cpu.go:282] Add success.
I0320 17:36:43.420510  543705 net.go:648] Add success.
I0320 17:36:43.423591  543705 net.go:770] primary dev: ETH0
I0320 17:36:43.423604  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:36:43.423617  543705 net.go:698] Add success.
I0320 17:36:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:36:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:36:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:36:53.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:36:53.409891  543705 cpu.go:275] no items to output this cycle
I0320 17:36:53.409899  543705 memory.go:184] no items to output this cycle
E0320 17:37:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:37:03.409781  543705 memory.go:184] no items to output this cycle
I0320 17:37:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:37:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:37:13.409783  543705 memory.go:191] Add success.
W0320 17:37:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:37:13.409810  543705 cpu.go:282] Add success.
W0320 17:37:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:37:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:37:13.420193  543705 net.go:648] Add success.
I0320 17:37:13.422947  543705 net.go:770] primary dev: ETH0
I0320 17:37:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:37:13.422976  543705 net.go:698] Add success.
I0320 17:37:13.453520  543705 event_worker.go:152] Polling the log file for events...
W0320 17:37:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:37:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 17:37:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:37:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:37:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:37:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:37:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 17:37:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:37:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:37:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:37:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:37:16.457994  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:37:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:37:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:37:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:37:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:37:23.409776  543705 memory.go:184] no items to output this cycle
I0320 17:37:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 17:37:25.427828  543705 disk_info.go:125] begin check local disk info of client
I0320 17:37:25.430286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:37:25.430292  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391c40 0xc000391c80]
E0320 17:37:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:37:33.409767  543705 memory.go:184] no items to output this cycle
I0320 17:37:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 17:37:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:37:43.409784  543705 memory.go:191] Add success.
I0320 17:37:43.409801  543705 cpu.go:282] Add success.
I0320 17:37:43.420399  543705 net.go:648] Add success.
I0320 17:37:43.423118  543705 net.go:770] primary dev: ETH0
I0320 17:37:43.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:37:43.423148  543705 net.go:698] Add success.
I0320 17:37:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:37:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:37:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:37:53.409884  543705 cpu.go:275] no items to output this cycle
E0320 17:37:53.409972  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:37:53.409993  543705 memory.go:184] no items to output this cycle
E0320 17:38:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:38:03.409794  543705 memory.go:184] no items to output this cycle
I0320 17:38:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:38:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:38:13.409797  543705 memory.go:191] Add success.
I0320 17:38:13.409812  543705 cpu.go:282] Add success.
W0320 17:38:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:38:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:38:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:38:13.420119  543705 net.go:648] Add success.
I0320 17:38:13.422965  543705 net.go:770] primary dev: ETH0
I0320 17:38:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:38:13.422991  543705 net.go:698] Add success.
I0320 17:38:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:38:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:38:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 17:38:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:38:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 17:38:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:38:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:38:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:38:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:38:23.409791  543705 memory.go:184] no items to output this cycle
I0320 17:38:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 17:38:25.430832  543705 disk_info.go:125] begin check local disk info of client
I0320 17:38:25.433317  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:38:25.433324  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b29c0 0xc0002b2a00]
E0320 17:38:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:38:33.409804  543705 memory.go:184] no items to output this cycle
I0320 17:38:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 17:38:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:38:43.409794  543705 memory.go:191] Add success.
I0320 17:38:43.409797  543705 cpu.go:282] Add success.
I0320 17:38:43.419864  543705 net.go:648] Add success.
I0320 17:38:43.422869  543705 net.go:770] primary dev: ETH0
I0320 17:38:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:38:43.422895  543705 net.go:698] Add success.
I0320 17:38:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:38:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:38:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:38:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:38:53.409773  543705 memory.go:184] no items to output this cycle
I0320 17:38:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 17:39:03.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:39:03.409898  543705 memory.go:184] no items to output this cycle
I0320 17:39:03.409919  543705 cpu.go:275] no items to output this cycle
E0320 17:39:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:39:13.409801  543705 memory.go:191] Add success.
I0320 17:39:13.409807  543705 cpu.go:282] Add success.
W0320 17:39:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:39:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:39:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:39:13.420141  543705 net.go:648] Add success.
I0320 17:39:13.422918  543705 net.go:770] primary dev: ETH0
I0320 17:39:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:39:13.422943  543705 net.go:698] Add success.
I0320 17:39:13.464474  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3cb8c28d-0d72-4131-b6f3-bf1d8981d4de","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:39:13.464509  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:39:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:39:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:39:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 17:39:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:39:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 17:39:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:39:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:39:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:39:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:39:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:39:23.409801  543705 memory.go:184] no items to output this cycle
I0320 17:39:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 17:39:25.433866  543705 disk_info.go:125] begin check local disk info of client
I0320 17:39:25.436362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:39:25.436368  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2f00 0xc0002b2f40]
E0320 17:39:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:39:33.409796  543705 memory.go:184] no items to output this cycle
I0320 17:39:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 17:39:38.571611  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:39:38.571617  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:39:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:39:43.410852  543705 memory.go:191] Add success.
I0320 17:39:43.409812  543705 cpu.go:282] Add success.
I0320 17:39:43.420589  543705 net.go:648] Add success.
I0320 17:39:43.423649  543705 net.go:770] primary dev: ETH0
I0320 17:39:43.423662  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:39:43.423673  543705 net.go:698] Add success.
I0320 17:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:39:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:39:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:39:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:39:53.409785  543705 memory.go:184] no items to output this cycle
I0320 17:39:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 17:40:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:40:03.409907  543705 cpu.go:275] no items to output this cycle
I0320 17:40:03.409911  543705 memory.go:184] no items to output this cycle
E0320 17:40:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:40:13.409786  543705 memory.go:191] Add success.
W0320 17:40:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:40:13.409816  543705 cpu.go:282] Add success.
W0320 17:40:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:40:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:40:13.420052  543705 net.go:648] Add success.
I0320 17:40:13.422837  543705 net.go:770] primary dev: ETH0
I0320 17:40:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:40:13.422862  543705 net.go:698] Add success.
I0320 17:40:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:40:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:40:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 17:40:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:40:14.456552  543705 disk_worker.go:494] system disk:vda1
I0320 17:40:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:40:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:40:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:40:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:40:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:40:23.409793  543705 memory.go:184] no items to output this cycle
I0320 17:40:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 17:40:25.436873  543705 disk_info.go:125] begin check local disk info of client
I0320 17:40:25.439343  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:40:25.439349  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb800 0xc0001fb840]
E0320 17:40:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:40:33.409776  543705 memory.go:184] no items to output this cycle
I0320 17:40:33.409779  543705 cpu.go:275] no items to output this cycle
E0320 17:40:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:40:43.409783  543705 memory.go:191] Add success.
I0320 17:40:43.409786  543705 cpu.go:282] Add success.
I0320 17:40:43.420002  543705 net.go:648] Add success.
I0320 17:40:43.423082  543705 net.go:770] primary dev: ETH0
I0320 17:40:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:40:43.423107  543705 net.go:698] Add success.
I0320 17:40:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:40:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:40:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:40:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:40:53.409781  543705 cpu.go:275] no items to output this cycle
I0320 17:40:53.409792  543705 memory.go:184] no items to output this cycle
E0320 17:41:03.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:41:03.409885  543705 memory.go:184] no items to output this cycle
I0320 17:41:03.409946  543705 cpu.go:275] no items to output this cycle
E0320 17:41:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:41:13.409813  543705 memory.go:191] Add success.
I0320 17:41:13.409822  543705 cpu.go:282] Add success.
W0320 17:41:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:41:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:41:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:41:13.420115  543705 net.go:648] Add success.
I0320 17:41:13.422720  543705 net.go:770] primary dev: ETH0
I0320 17:41:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:41:13.422748  543705 net.go:698] Add success.
I0320 17:41:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:41:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:41:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 17:41:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:41:14.456496  543705 disk_worker.go:494] system disk:vda1
I0320 17:41:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:41:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:41:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:41:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:41:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:41:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:41:23.409792  543705 memory.go:184] no items to output this cycle
I0320 17:41:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 17:41:25.439896  543705 disk_info.go:125] begin check local disk info of client
I0320 17:41:25.442367  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:41:25.442373  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0a80 0xc0003b0ac0]
E0320 17:41:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:41:33.409773  543705 memory.go:184] no items to output this cycle
I0320 17:41:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 17:41:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:41:43.409788  543705 memory.go:191] Add success.
I0320 17:41:43.409817  543705 cpu.go:282] Add success.
I0320 17:41:43.420442  543705 net.go:648] Add success.
I0320 17:41:43.423346  543705 net.go:770] primary dev: ETH0
I0320 17:41:43.423360  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:41:43.423375  543705 net.go:698] Add success.
I0320 17:41:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:41:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:41:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:41:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:41:53.409801  543705 memory.go:184] no items to output this cycle
I0320 17:41:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 17:42:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:42:03.409785  543705 memory.go:184] no items to output this cycle
I0320 17:42:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 17:42:13.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:42:13.409960  543705 memory.go:191] Add success.
W0320 17:42:13.409999  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:42:13.410001  543705 cpu.go:282] Add success.
W0320 17:42:13.410016  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:42:13.410021  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:42:13.419720  543705 net.go:648] Add success.
I0320 17:42:13.422590  543705 net.go:770] primary dev: ETH0
I0320 17:42:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:42:13.422615  543705 net.go:698] Add success.
I0320 17:42:13.463564  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"83ab9779-4ef5-40bf-b2b3-531db7ed5501","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:42:13.463596  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 17:42:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:42:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 17:42:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:42:14.456079  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:42:14.456089  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:42:14.456094  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:42:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 17:42:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:42:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:42:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:42:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:42:16.457965  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:42:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:42:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:42:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:42:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:42:23.409773  543705 memory.go:184] no items to output this cycle
I0320 17:42:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 17:42:25.442902  543705 disk_info.go:125] begin check local disk info of client
I0320 17:42:25.445345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:42:25.445351  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0320 17:42:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:42:33.409770  543705 memory.go:184] no items to output this cycle
I0320 17:42:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 17:42:38.572609  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:42:38.572616  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:42:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:42:43.410744  543705 memory.go:191] Add success.
I0320 17:42:43.409814  543705 cpu.go:282] Add success.
I0320 17:42:43.420524  543705 net.go:648] Add success.
I0320 17:42:43.423319  543705 net.go:770] primary dev: ETH0
I0320 17:42:43.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:42:43.423347  543705 net.go:698] Add success.
I0320 17:42:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:42:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:42:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:42:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:42:53.409783  543705 memory.go:184] no items to output this cycle
I0320 17:42:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 17:43:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:43:03.409774  543705 memory.go:184] no items to output this cycle
I0320 17:43:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:43:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:43:13.409792  543705 memory.go:191] Add success.
W0320 17:43:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:43:13.409826  543705 cpu.go:282] Add success.
W0320 17:43:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:43:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:43:13.420156  543705 net.go:648] Add success.
I0320 17:43:13.422994  543705 net.go:770] primary dev: ETH0
I0320 17:43:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:43:13.423019  543705 net.go:698] Add success.
I0320 17:43:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:43:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:43:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 17:43:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:43:14.456513  543705 disk_worker.go:494] system disk:vda1
I0320 17:43:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:43:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:43:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:43:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:43:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:43:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:43:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:43:23.409773  543705 memory.go:184] no items to output this cycle
I0320 17:43:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 17:43:25.445915  543705 disk_info.go:125] begin check local disk info of client
I0320 17:43:25.448376  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:43:25.448381  543705 disk_info.go:196] parse disk info done, disk is : [0xc000514500 0xc000514540]
E0320 17:43:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:43:33.409773  543705 memory.go:184] no items to output this cycle
I0320 17:43:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:43:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:43:43.409794  543705 memory.go:191] Add success.
I0320 17:43:43.409810  543705 cpu.go:282] Add success.
I0320 17:43:43.420384  543705 net.go:648] Add success.
I0320 17:43:43.423450  543705 net.go:770] primary dev: ETH0
I0320 17:43:43.423462  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:43:43.423476  543705 net.go:698] Add success.
I0320 17:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:43:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:43:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:43:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:43:53.409768  543705 memory.go:184] no items to output this cycle
I0320 17:43:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 17:44:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:44:03.409800  543705 cpu.go:275] no items to output this cycle
I0320 17:44:03.409808  543705 memory.go:184] no items to output this cycle
E0320 17:44:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:44:13.409790  543705 memory.go:191] Add success.
I0320 17:44:13.409790  543705 cpu.go:282] Add success.
W0320 17:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:44:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:44:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:44:13.419713  543705 net.go:648] Add success.
I0320 17:44:13.422847  543705 net.go:770] primary dev: ETH0
I0320 17:44:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:44:13.422871  543705 net.go:698] Add success.
I0320 17:44:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:44:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:44:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 17:44:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:44:14.456538  543705 disk_worker.go:494] system disk:vda1
I0320 17:44:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:44:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:44:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:44:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:44:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:44:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:44:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:44:23.409766  543705 memory.go:184] no items to output this cycle
I0320 17:44:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 17:44:25.448937  543705 disk_info.go:125] begin check local disk info of client
I0320 17:44:25.451390  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:44:25.451396  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515300 0xc000515340]
E0320 17:44:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:44:33.409780  543705 memory.go:184] no items to output this cycle
I0320 17:44:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 17:44:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:44:43.409815  543705 memory.go:191] Add success.
I0320 17:44:43.409826  543705 cpu.go:282] Add success.
I0320 17:44:43.419886  543705 net.go:648] Add success.
I0320 17:44:43.422798  543705 net.go:770] primary dev: ETH0
I0320 17:44:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:44:43.422828  543705 net.go:698] Add success.
I0320 17:44:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:44:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:44:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:44:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:44:53.409770  543705 memory.go:184] no items to output this cycle
I0320 17:44:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 17:45:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:45:03.409773  543705 memory.go:184] no items to output this cycle
I0320 17:45:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:45:13.409812  543705 memory.go:191] Add success.
I0320 17:45:13.409813  543705 cpu.go:282] Add success.
W0320 17:45:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:45:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:45:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:45:13.419708  543705 net.go:648] Add success.
I0320 17:45:13.422410  543705 net.go:770] primary dev: ETH0
I0320 17:45:13.422426  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:45:13.422439  543705 net.go:698] Add success.
I0320 17:45:13.469077  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9e5c0ba3-df38-4c79-8570-c9a9f99fec55","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:45:13.469107  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:45:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:45:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:45:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 17:45:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:45:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 17:45:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:45:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:45:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:45:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:45:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:45:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:45:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:45:23.409793  543705 memory.go:184] no items to output this cycle
I0320 17:45:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 17:45:25.451941  543705 disk_info.go:125] begin check local disk info of client
I0320 17:45:25.454396  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:45:25.454402  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa940 0xc0001aa980]
E0320 17:45:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:45:33.409774  543705 memory.go:184] no items to output this cycle
I0320 17:45:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 17:45:38.573620  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:45:38.573626  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:45:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:45:43.410648  543705 memory.go:191] Add success.
I0320 17:45:43.409811  543705 cpu.go:282] Add success.
I0320 17:45:43.420419  543705 net.go:648] Add success.
I0320 17:45:43.422957  543705 net.go:770] primary dev: ETH0
I0320 17:45:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:45:43.422984  543705 net.go:698] Add success.
I0320 17:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:45:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:45:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:45:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:45:53.409770  543705 memory.go:184] no items to output this cycle
I0320 17:45:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 17:46:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:46:03.409790  543705 memory.go:184] no items to output this cycle
I0320 17:46:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:46:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:46:13.409778  543705 memory.go:191] Add success.
I0320 17:46:13.409802  543705 cpu.go:282] Add success.
W0320 17:46:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:46:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:46:13.419730  543705 net.go:648] Add success.
I0320 17:46:13.422678  543705 net.go:770] primary dev: ETH0
I0320 17:46:13.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:46:13.422705  543705 net.go:698] Add success.
I0320 17:46:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:46:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:46:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 17:46:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:46:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 17:46:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:46:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:46:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:46:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:46:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:46:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:46:23.409773  543705 cpu.go:275] no items to output this cycle
I0320 17:46:23.409777  543705 memory.go:184] no items to output this cycle
I0320 17:46:25.454964  543705 disk_info.go:125] begin check local disk info of client
I0320 17:46:25.457382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:46:25.457388  543705 disk_info.go:196] parse disk info done, disk is : [0xc000514e40 0xc000514e80]
E0320 17:46:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:46:33.409764  543705 memory.go:184] no items to output this cycle
I0320 17:46:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 17:46:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:46:43.409812  543705 memory.go:191] Add success.
I0320 17:46:43.409817  543705 cpu.go:282] Add success.
I0320 17:46:43.419869  543705 net.go:770] primary dev: ETH0
I0320 17:46:43.419882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:46:43.419894  543705 net.go:698] Add success.
I0320 17:46:43.420243  543705 net.go:648] Add success.
I0320 17:46:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:46:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:46:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:46:53.409784  543705 memory.go:184] no items to output this cycle
I0320 17:46:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 17:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:47:03.409786  543705 memory.go:184] no items to output this cycle
I0320 17:47:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 17:47:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:47:13.409818  543705 memory.go:191] Add success.
I0320 17:47:13.409823  543705 cpu.go:282] Add success.
W0320 17:47:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:47:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:47:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:47:13.419718  543705 net.go:648] Add success.
I0320 17:47:13.422342  543705 net.go:770] primary dev: ETH0
I0320 17:47:13.422356  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:47:13.422369  543705 net.go:698] Add success.
I0320 17:47:13.452920  543705 event_worker.go:152] Polling the log file for events...
W0320 17:47:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:47:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 17:47:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:47:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:47:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:47:14.456939  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:47:14.456981  543705 disk_worker.go:494] system disk:vda1
I0320 17:47:14.457007  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:47:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:47:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:47:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:47:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:47:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:47:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:47:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:47:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:47:23.409793  543705 memory.go:184] no items to output this cycle
I0320 17:47:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 17:47:25.457982  543705 disk_info.go:125] begin check local disk info of client
I0320 17:47:25.460378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:47:25.460384  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab200 0xc0001ab240]
E0320 17:47:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:47:33.409768  543705 memory.go:184] no items to output this cycle
I0320 17:47:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 17:47:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:47:43.409809  543705 memory.go:191] Add success.
I0320 17:47:43.409813  543705 cpu.go:282] Add success.
I0320 17:47:43.419895  543705 net.go:648] Add success.
I0320 17:47:43.422940  543705 net.go:770] primary dev: ETH0
I0320 17:47:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:47:43.422966  543705 net.go:698] Add success.
I0320 17:47:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:47:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:47:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:47:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:47:53.409803  543705 memory.go:184] no items to output this cycle
I0320 17:47:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 17:48:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:48:03.409780  543705 memory.go:184] no items to output this cycle
I0320 17:48:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 17:48:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:48:13.409782  543705 memory.go:191] Add success.
W0320 17:48:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:48:13.409812  543705 cpu.go:282] Add success.
W0320 17:48:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:48:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:48:13.420225  543705 net.go:648] Add success.
I0320 17:48:13.422933  543705 net.go:770] primary dev: ETH0
I0320 17:48:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:48:13.422956  543705 net.go:698] Add success.
I0320 17:48:13.568093  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1e835470-164e-4336-b1ea-bb34315be429","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:48:13.568136  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:48:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:48:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 17:48:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:48:14.456519  543705 disk_worker.go:494] system disk:vda1
I0320 17:48:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:48:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:48:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:48:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:48:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:48:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 17:48:23.409785  543705 memory.go:184] no items to output this cycle
I0320 17:48:25.460992  543705 disk_info.go:125] begin check local disk info of client
I0320 17:48:25.463466  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:48:25.463472  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5dc0 0xc0000c5e00]
E0320 17:48:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:48:33.409798  543705 memory.go:184] no items to output this cycle
I0320 17:48:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 17:48:38.574617  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:48:38.574624  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:48:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:48:43.410834  543705 memory.go:191] Add success.
I0320 17:48:43.409836  543705 cpu.go:282] Add success.
I0320 17:48:43.420542  543705 net.go:648] Add success.
I0320 17:48:43.423404  543705 net.go:770] primary dev: ETH0
I0320 17:48:43.423416  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:48:43.423429  543705 net.go:698] Add success.
I0320 17:48:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:48:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:48:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:48:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:48:53.409809  543705 memory.go:184] no items to output this cycle
I0320 17:48:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 17:49:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:49:03.409776  543705 memory.go:184] no items to output this cycle
I0320 17:49:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 17:49:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:49:13.409804  543705 memory.go:191] Add success.
I0320 17:49:13.409813  543705 cpu.go:282] Add success.
W0320 17:49:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:49:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:49:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:49:13.419711  543705 net.go:648] Add success.
I0320 17:49:13.422805  543705 net.go:770] primary dev: ETH0
I0320 17:49:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:49:13.422829  543705 net.go:698] Add success.
I0320 17:49:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:49:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:49:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 17:49:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:49:14.456509  543705 disk_worker.go:494] system disk:vda1
I0320 17:49:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:49:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:49:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:49:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:49:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:49:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 17:49:23.409780  543705 memory.go:184] no items to output this cycle
I0320 17:49:25.464010  543705 disk_info.go:125] begin check local disk info of client
I0320 17:49:25.466479  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:49:25.466485  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a3c0 0xc00048a400]
E0320 17:49:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:49:33.409766  543705 memory.go:184] no items to output this cycle
I0320 17:49:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 17:49:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:49:43.409811  543705 memory.go:191] Add success.
I0320 17:49:43.409821  543705 cpu.go:282] Add success.
I0320 17:49:43.419868  543705 net.go:648] Add success.
I0320 17:49:43.422748  543705 net.go:770] primary dev: ETH0
I0320 17:49:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:49:43.422773  543705 net.go:698] Add success.
I0320 17:49:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:49:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:49:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:49:53.410342  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:49:53.410359  543705 memory.go:184] no items to output this cycle
I0320 17:49:53.410372  543705 cpu.go:275] no items to output this cycle
E0320 17:50:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:50:03.409803  543705 memory.go:184] no items to output this cycle
I0320 17:50:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 17:50:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:50:13.409779  543705 memory.go:191] Add success.
I0320 17:50:13.409797  543705 cpu.go:282] Add success.
W0320 17:50:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:50:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:50:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:50:13.420351  543705 net.go:648] Add success.
I0320 17:50:13.422997  543705 net.go:770] primary dev: ETH0
I0320 17:50:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:50:13.423021  543705 net.go:698] Add success.
I0320 17:50:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:50:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:50:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 17:50:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:50:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 17:50:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:50:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:50:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:50:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:50:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:50:23.409769  543705 memory.go:184] no items to output this cycle
I0320 17:50:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 17:50:25.467027  543705 disk_info.go:125] begin check local disk info of client
I0320 17:50:25.469454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:50:25.469460  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5600 0xc0003d5640]
E0320 17:50:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:50:33.409780  543705 memory.go:184] no items to output this cycle
I0320 17:50:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 17:50:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:50:43.409828  543705 memory.go:191] Add success.
I0320 17:50:43.409830  543705 cpu.go:282] Add success.
I0320 17:50:43.420009  543705 net.go:648] Add success.
I0320 17:50:43.423338  543705 net.go:770] primary dev: ETH0
I0320 17:50:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:50:43.423363  543705 net.go:698] Add success.
I0320 17:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:50:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:50:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:50:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:50:53.409774  543705 memory.go:184] no items to output this cycle
I0320 17:50:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 17:51:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:51:03.409802  543705 memory.go:184] no items to output this cycle
I0320 17:51:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 17:51:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:51:13.409811  543705 memory.go:191] Add success.
I0320 17:51:13.409822  543705 cpu.go:282] Add success.
W0320 17:51:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:51:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:51:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:51:13.420044  543705 net.go:648] Add success.
I0320 17:51:13.423077  543705 net.go:770] primary dev: ETH0
I0320 17:51:13.423090  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:51:13.423103  543705 net.go:698] Add success.
I0320 17:51:13.463998  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"64a608cf-cb22-419b-ac32-f5fca4ff6edb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:51:13.464031  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:51:14.455386  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:51:14.455420  543705 disk_worker.go:708] disk space is not compliant
W0320 17:51:14.455424  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:51:14.457055  543705 disk_worker.go:494] system disk:vda1
I0320 17:51:14.457088  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:51:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:51:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:51:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:51:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:51:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:51:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 17:51:23.409778  543705 memory.go:184] no items to output this cycle
I0320 17:51:25.470044  543705 disk_info.go:125] begin check local disk info of client
I0320 17:51:25.472477  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:51:25.472483  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4880 0xc0000c48c0]
E0320 17:51:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:51:33.409771  543705 memory.go:184] no items to output this cycle
I0320 17:51:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 17:51:38.575629  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:51:38.575636  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:51:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:51:43.410651  543705 memory.go:191] Add success.
I0320 17:51:43.409791  543705 cpu.go:282] Add success.
I0320 17:51:43.420341  543705 net.go:648] Add success.
I0320 17:51:43.423115  543705 net.go:770] primary dev: ETH0
I0320 17:51:43.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:51:43.423143  543705 net.go:698] Add success.
I0320 17:51:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:51:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:51:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:51:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:51:53.409785  543705 memory.go:184] no items to output this cycle
I0320 17:51:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:52:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:52:03.409816  543705 memory.go:184] no items to output this cycle
I0320 17:52:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 17:52:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:52:13.409816  543705 memory.go:191] Add success.
I0320 17:52:13.409821  543705 cpu.go:282] Add success.
W0320 17:52:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:52:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:52:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:52:13.420110  543705 net.go:648] Add success.
I0320 17:52:13.422846  543705 net.go:770] primary dev: ETH0
I0320 17:52:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:52:13.422871  543705 net.go:698] Add success.
W0320 17:52:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:52:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 17:52:14.455159  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:52:14.456165  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:52:14.456173  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:52:14.456178  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:52:14.457594  543705 disk_worker.go:494] system disk:vda1
I0320 17:52:14.457622  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:52:15.456769  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:52:15.456778  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:52:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:52:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:52:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:52:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:52:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:52:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:52:23.409797  543705 memory.go:184] no items to output this cycle
I0320 17:52:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 17:52:25.473047  543705 disk_info.go:125] begin check local disk info of client
I0320 17:52:25.475555  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:52:25.475561  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bd80 0xc00007bdc0]
E0320 17:52:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:52:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 17:52:33.409798  543705 memory.go:184] no items to output this cycle
E0320 17:52:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:52:43.409799  543705 memory.go:191] Add success.
I0320 17:52:43.409806  543705 cpu.go:282] Add success.
I0320 17:52:43.419892  543705 net.go:648] Add success.
I0320 17:52:43.423177  543705 net.go:770] primary dev: ETH0
I0320 17:52:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:52:43.423206  543705 net.go:698] Add success.
I0320 17:52:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:52:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:52:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:52:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:52:53.409826  543705 memory.go:184] no items to output this cycle
I0320 17:52:53.409839  543705 cpu.go:275] no items to output this cycle
E0320 17:53:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:53:03.409789  543705 memory.go:184] no items to output this cycle
I0320 17:53:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:53:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:53:13.409788  543705 memory.go:191] Add success.
I0320 17:53:13.409813  543705 cpu.go:282] Add success.
W0320 17:53:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:53:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:53:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:53:13.420165  543705 net.go:648] Add success.
I0320 17:53:13.422994  543705 net.go:770] primary dev: ETH0
I0320 17:53:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:53:13.423024  543705 net.go:698] Add success.
I0320 17:53:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:53:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:53:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 17:53:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:53:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 17:53:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:53:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:53:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:53:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:53:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:53:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:53:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:53:23.409803  543705 memory.go:184] no items to output this cycle
I0320 17:53:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 17:53:25.476071  543705 disk_info.go:125] begin check local disk info of client
I0320 17:53:25.478583  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:53:25.478588  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464440 0xc000464480]
E0320 17:53:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:53:33.409810  543705 memory.go:184] no items to output this cycle
I0320 17:53:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 17:53:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:53:43.409827  543705 memory.go:191] Add success.
I0320 17:53:43.409836  543705 cpu.go:282] Add success.
I0320 17:53:43.420009  543705 net.go:648] Add success.
I0320 17:53:43.422719  543705 net.go:770] primary dev: ETH0
I0320 17:53:43.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:53:43.422749  543705 net.go:698] Add success.
I0320 17:53:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:53:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:53:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:53:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:53:53.409777  543705 memory.go:184] no items to output this cycle
I0320 17:53:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 17:54:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:54:03.409806  543705 memory.go:184] no items to output this cycle
I0320 17:54:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 17:54:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:54:13.409774  543705 memory.go:191] Add success.
W0320 17:54:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 17:54:13.409806  543705 cpu.go:282] Add success.
W0320 17:54:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:54:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:54:13.420085  543705 net.go:648] Add success.
I0320 17:54:13.422873  543705 net.go:770] primary dev: ETH0
I0320 17:54:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:54:13.422903  543705 net.go:698] Add success.
I0320 17:54:13.469013  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ed705db-6996-4f4c-bec2-2d5311c7921e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:54:13.469047  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 17:54:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:54:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:54:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 17:54:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:54:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 17:54:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:54:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:54:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:54:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:54:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:54:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:54:23.409771  543705 memory.go:184] no items to output this cycle
I0320 17:54:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 17:54:25.479087  543705 disk_info.go:125] begin check local disk info of client
I0320 17:54:25.481531  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:54:25.481537  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b3c0 0xc00007b400]
E0320 17:54:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:54:33.409797  543705 memory.go:184] no items to output this cycle
I0320 17:54:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 17:54:38.576623  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:54:38.576629  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:54:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:54:43.410652  543705 memory.go:191] Add success.
I0320 17:54:43.409794  543705 cpu.go:282] Add success.
I0320 17:54:43.420360  543705 net.go:648] Add success.
I0320 17:54:43.423222  543705 net.go:770] primary dev: ETH0
I0320 17:54:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:54:43.423248  543705 net.go:698] Add success.
I0320 17:54:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:54:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:54:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:54:53.409784  543705 memory.go:184] no items to output this cycle
I0320 17:54:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 17:55:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:55:03.409797  543705 memory.go:184] no items to output this cycle
I0320 17:55:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 17:55:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:55:13.409777  543705 memory.go:191] Add success.
I0320 17:55:13.409801  543705 cpu.go:282] Add success.
W0320 17:55:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:55:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:55:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:55:13.419993  543705 net.go:770] primary dev: ETH0
I0320 17:55:13.420009  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:55:13.420023  543705 net.go:698] Add success.
I0320 17:55:13.420375  543705 net.go:648] Add success.
I0320 17:55:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:55:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:55:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 17:55:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:55:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 17:55:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:55:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:55:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:55:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:55:16.472388  543705 disk_local_worker.go:436] Get disk info: []
I0320 17:55:23.409870  543705 cpu.go:275] no items to output this cycle
E0320 17:55:23.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:55:23.409888  543705 memory.go:184] no items to output this cycle
I0320 17:55:25.482102  543705 disk_info.go:125] begin check local disk info of client
I0320 17:55:25.484531  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:55:25.484537  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae780 0xc0003ae7c0]
E0320 17:55:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:55:33.409773  543705 memory.go:184] no items to output this cycle
I0320 17:55:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 17:55:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:55:43.409817  543705 memory.go:191] Add success.
I0320 17:55:43.409817  543705 cpu.go:282] Add success.
I0320 17:55:43.419973  543705 net.go:648] Add success.
I0320 17:55:43.422625  543705 net.go:770] primary dev: ETH0
I0320 17:55:43.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:55:43.422650  543705 net.go:698] Add success.
I0320 17:55:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:55:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:55:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:55:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:55:53.409770  543705 memory.go:184] no items to output this cycle
I0320 17:55:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 17:56:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:56:03.409778  543705 memory.go:184] no items to output this cycle
I0320 17:56:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 17:56:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:56:13.409789  543705 memory.go:191] Add success.
I0320 17:56:13.409792  543705 cpu.go:282] Add success.
W0320 17:56:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:56:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:56:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:56:13.420079  543705 net.go:648] Add success.
I0320 17:56:13.422900  543705 net.go:770] primary dev: ETH0
I0320 17:56:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:56:13.422929  543705 net.go:698] Add success.
I0320 17:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:56:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:56:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 17:56:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:56:14.456504  543705 disk_worker.go:494] system disk:vda1
I0320 17:56:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:56:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:56:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:56:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:56:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:56:23.409774  543705 memory.go:184] no items to output this cycle
I0320 17:56:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 17:56:25.485105  543705 disk_info.go:125] begin check local disk info of client
I0320 17:56:25.487556  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:56:25.487563  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c000 0xc00035c040]
E0320 17:56:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:56:33.409795  543705 memory.go:184] no items to output this cycle
I0320 17:56:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 17:56:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:56:43.409797  543705 memory.go:191] Add success.
I0320 17:56:43.409812  543705 cpu.go:282] Add success.
I0320 17:56:43.419949  543705 net.go:648] Add success.
I0320 17:56:43.422712  543705 net.go:770] primary dev: ETH0
I0320 17:56:43.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:56:43.422736  543705 net.go:698] Add success.
I0320 17:56:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:56:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:56:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:56:53.409784  543705 memory.go:184] no items to output this cycle
I0320 17:56:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 17:57:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:57:03.409772  543705 memory.go:184] no items to output this cycle
I0320 17:57:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 17:57:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:57:13.409805  543705 memory.go:191] Add success.
I0320 17:57:13.409815  543705 cpu.go:282] Add success.
W0320 17:57:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:57:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:57:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:57:13.420187  543705 net.go:648] Add success.
I0320 17:57:13.422823  543705 net.go:770] primary dev: ETH0
I0320 17:57:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:57:13.422853  543705 net.go:698] Add success.
I0320 17:57:13.429154  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 17:57:13.453327  543705 event_worker.go:152] Polling the log file for events...
I0320 17:57:13.468946  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"63f21e38-ddc2-4c37-8e3e-697da1673ee2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 17:57:13.468996  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 17:57:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:57:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 17:57:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 17:57:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 17:57:14.455888  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 17:57:14.455893  543705 custom_config.go:64] query custom config with name: gpu
I0320 17:57:14.456547  543705 disk_worker.go:494] system disk:vda1
I0320 17:57:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 17:57:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 17:57:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:57:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 17:57:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 17:57:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:57:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:57:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:57:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:57:23.409800  543705 memory.go:184] no items to output this cycle
I0320 17:57:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 17:57:25.488126  543705 disk_info.go:125] begin check local disk info of client
I0320 17:57:25.490649  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:57:25.490656  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000da100 0xc0000da180]
E0320 17:57:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:57:33.409781  543705 memory.go:184] no items to output this cycle
I0320 17:57:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 17:57:38.577639  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 17:57:38.577659  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 17:57:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:57:43.410874  543705 memory.go:191] Add success.
I0320 17:57:43.409821  543705 cpu.go:282] Add success.
I0320 17:57:43.420621  543705 net.go:648] Add success.
I0320 17:57:43.423149  543705 net.go:770] primary dev: ETH0
I0320 17:57:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:57:43.423174  543705 net.go:698] Add success.
I0320 17:57:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:57:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:57:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:57:53.409773  543705 memory.go:184] no items to output this cycle
I0320 17:57:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 17:58:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:58:03.409783  543705 memory.go:184] no items to output this cycle
I0320 17:58:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 17:58:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:58:13.409800  543705 memory.go:191] Add success.
I0320 17:58:13.409818  543705 cpu.go:282] Add success.
W0320 17:58:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:58:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:58:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:58:13.420109  543705 net.go:648] Add success.
I0320 17:58:13.423111  543705 net.go:770] primary dev: ETH0
I0320 17:58:13.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:58:13.423138  543705 net.go:698] Add success.
I0320 17:58:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:58:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:58:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 17:58:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:58:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 17:58:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:58:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:58:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:58:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:58:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:58:23.409763  543705 memory.go:184] no items to output this cycle
I0320 17:58:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 17:58:25.490731  543705 disk_info.go:125] begin check local disk info of client
I0320 17:58:25.493434  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:58:25.493441  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304000 0xc000304040]
E0320 17:58:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:58:33.409795  543705 memory.go:184] no items to output this cycle
I0320 17:58:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 17:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:58:43.409811  543705 memory.go:191] Add success.
I0320 17:58:43.409858  543705 cpu.go:282] Add success.
I0320 17:58:43.420179  543705 net.go:648] Add success.
I0320 17:58:43.423047  543705 net.go:770] primary dev: ETH0
I0320 17:58:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:58:43.423073  543705 net.go:698] Add success.
I0320 17:58:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:58:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:58:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:58:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:58:53.409772  543705 memory.go:184] no items to output this cycle
I0320 17:58:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 17:59:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:59:03.409808  543705 memory.go:184] no items to output this cycle
I0320 17:59:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 17:59:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:59:13.409792  543705 cpu.go:282] Add success.
I0320 17:59:13.409801  543705 memory.go:191] Add success.
W0320 17:59:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 17:59:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 17:59:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 17:59:13.420046  543705 net.go:648] Add success.
I0320 17:59:13.422764  543705 net.go:770] primary dev: ETH0
I0320 17:59:13.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:59:13.422790  543705 net.go:698] Add success.
I0320 17:59:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 17:59:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 17:59:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 17:59:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 17:59:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 17:59:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 17:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 17:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:59:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:59:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 17:59:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0320 17:59:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:59:23.409772  543705 memory.go:184] no items to output this cycle
I0320 17:59:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 17:59:25.494164  543705 disk_info.go:125] begin check local disk info of client
I0320 17:59:25.496639  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 17:59:25.496645  543705 disk_info.go:196] parse disk info done, disk is : [0xc000322000 0xc000322040]
E0320 17:59:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:59:33.409807  543705 memory.go:184] no items to output this cycle
I0320 17:59:33.409824  543705 cpu.go:275] no items to output this cycle
E0320 17:59:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:59:43.409790  543705 memory.go:191] Add success.
I0320 17:59:43.409801  543705 cpu.go:282] Add success.
I0320 17:59:43.419948  543705 net.go:648] Add success.
I0320 17:59:43.422659  543705 net.go:770] primary dev: ETH0
I0320 17:59:43.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0320 17:59:43.422684  543705 net.go:698] Add success.
I0320 17:59:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 17:59:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 17:59:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 17:59:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 17:59:53.409777  543705 memory.go:184] no items to output this cycle
I0320 17:59:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 18:00:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:00:03.409778  543705 memory.go:184] no items to output this cycle
I0320 18:00:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:00:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:00:13.409781  543705 memory.go:191] Add success.
I0320 18:00:13.409798  543705 cpu.go:282] Add success.
W0320 18:00:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:00:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:00:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:00:13.420342  543705 net.go:648] Add success.
I0320 18:00:13.423043  543705 net.go:770] primary dev: ETH0
I0320 18:00:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:00:13.423067  543705 net.go:698] Add success.
I0320 18:00:13.469107  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f285814-3ac8-4e86-8228-02e751ee204a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:00:13.469146  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:00:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:00:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 18:00:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:00:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 18:00:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:00:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:00:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:00:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:00:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:00:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:00:23.409883  543705 memory.go:184] no items to output this cycle
I0320 18:00:23.409860  543705 cpu.go:275] no items to output this cycle
I0320 18:00:25.497175  543705 disk_info.go:125] begin check local disk info of client
I0320 18:00:25.499660  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:00:25.499666  543705 disk_info.go:196] parse disk info done, disk is : [0xc000279b00 0xc000279b40]
E0320 18:00:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:00:33.409799  543705 memory.go:184] no items to output this cycle
I0320 18:00:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 18:00:38.578650  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:00:38.578657  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:00:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:00:43.410777  543705 memory.go:191] Add success.
I0320 18:00:43.409793  543705 cpu.go:282] Add success.
I0320 18:00:43.420482  543705 net.go:648] Add success.
I0320 18:00:43.423050  543705 net.go:770] primary dev: ETH0
I0320 18:00:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:00:43.423077  543705 net.go:698] Add success.
I0320 18:00:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:00:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:00:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:00:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:00:53.409786  543705 memory.go:184] no items to output this cycle
I0320 18:00:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 18:01:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:01:03.409802  543705 memory.go:184] no items to output this cycle
I0320 18:01:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 18:01:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:01:13.409790  543705 memory.go:191] Add success.
I0320 18:01:13.409811  543705 cpu.go:282] Add success.
W0320 18:01:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:01:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:01:13.420192  543705 net.go:648] Add success.
I0320 18:01:13.422704  543705 net.go:770] primary dev: ETH0
I0320 18:01:13.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:01:13.422735  543705 net.go:698] Add success.
I0320 18:01:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:01:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:01:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 18:01:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:01:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 18:01:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:01:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:01:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:01:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:01:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:01:23.409938  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:01:23.409957  543705 memory.go:184] no items to output this cycle
I0320 18:01:23.409938  543705 cpu.go:275] no items to output this cycle
I0320 18:01:25.499748  543705 disk_info.go:125] begin check local disk info of client
I0320 18:01:25.502211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:01:25.502217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000545e00 0xc000545e40]
E0320 18:01:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:01:33.409784  543705 memory.go:184] no items to output this cycle
I0320 18:01:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 18:01:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:01:43.409812  543705 memory.go:191] Add success.
I0320 18:01:43.409825  543705 cpu.go:282] Add success.
I0320 18:01:43.420051  543705 net.go:648] Add success.
I0320 18:01:43.422897  543705 net.go:770] primary dev: ETH0
I0320 18:01:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:01:43.422925  543705 net.go:698] Add success.
I0320 18:01:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:01:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:01:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:01:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:01:53.409778  543705 memory.go:184] no items to output this cycle
I0320 18:01:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 18:02:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:02:03.409811  543705 memory.go:184] no items to output this cycle
I0320 18:02:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 18:02:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:02:13.409809  543705 memory.go:191] Add success.
I0320 18:02:13.409814  543705 cpu.go:282] Add success.
W0320 18:02:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:02:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:02:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:02:13.420029  543705 net.go:648] Add success.
I0320 18:02:13.422504  543705 net.go:770] primary dev: ETH0
I0320 18:02:13.422516  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:02:13.422529  543705 net.go:698] Add success.
W0320 18:02:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:02:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 18:02:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:02:14.456916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:02:14.456926  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:02:14.456932  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:02:14.456984  543705 disk_worker.go:494] system disk:vda1
I0320 18:02:14.457014  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:02:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:02:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:02:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:02:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:02:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:02:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:02:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:02:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:02:23.409791  543705 memory.go:184] no items to output this cycle
I0320 18:02:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 18:02:25.503203  543705 disk_info.go:125] begin check local disk info of client
I0320 18:02:25.505704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:02:25.505710  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa200 0xc0001fa240]
E0320 18:02:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:02:33.409782  543705 memory.go:184] no items to output this cycle
I0320 18:02:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 18:02:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:02:43.409791  543705 memory.go:191] Add success.
I0320 18:02:43.409797  543705 cpu.go:282] Add success.
I0320 18:02:43.419975  543705 net.go:648] Add success.
I0320 18:02:43.422937  543705 net.go:770] primary dev: ETH0
I0320 18:02:43.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:02:43.422963  543705 net.go:698] Add success.
I0320 18:02:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:02:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:02:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:02:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:02:53.409818  543705 memory.go:184] no items to output this cycle
I0320 18:02:53.409828  543705 cpu.go:275] no items to output this cycle
E0320 18:03:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:03:03.409805  543705 memory.go:184] no items to output this cycle
I0320 18:03:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 18:03:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:03:13.409787  543705 memory.go:191] Add success.
I0320 18:03:13.409809  543705 cpu.go:282] Add success.
W0320 18:03:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:03:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:03:13.420080  543705 net.go:648] Add success.
I0320 18:03:13.423436  543705 net.go:770] primary dev: ETH0
I0320 18:03:13.423450  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:03:13.423464  543705 net.go:698] Add success.
I0320 18:03:13.468453  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc8fe5dc-35fa-4c0f-ad56-88b225efd54e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:03:13.468485  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:03:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:03:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:03:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0320 18:03:14.455248  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:03:14.456769  543705 disk_worker.go:494] system disk:vda1
I0320 18:03:14.456802  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:03:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:03:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:03:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:03:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:03:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:03:23.409799  543705 memory.go:184] no items to output this cycle
I0320 18:03:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 18:03:25.506171  543705 disk_info.go:125] begin check local disk info of client
I0320 18:03:25.508644  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:03:25.508651  543705 disk_info.go:196] parse disk info done, disk is : [0xc000323c80 0xc000323cc0]
E0320 18:03:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:03:33.409786  543705 memory.go:184] no items to output this cycle
I0320 18:03:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 18:03:38.579705  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:03:38.579713  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:03:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:03:43.410743  543705 memory.go:191] Add success.
I0320 18:03:43.409801  543705 cpu.go:282] Add success.
I0320 18:03:43.420663  543705 net.go:648] Add success.
I0320 18:03:43.423321  543705 net.go:770] primary dev: ETH0
I0320 18:03:43.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:03:43.423346  543705 net.go:698] Add success.
I0320 18:03:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:03:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:03:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:03:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:03:53.409781  543705 memory.go:184] no items to output this cycle
I0320 18:03:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 18:04:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:04:03.409789  543705 memory.go:184] no items to output this cycle
I0320 18:04:03.409799  543705 cpu.go:275] no items to output this cycle
W0320 18:04:13.409704  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:04:13.409719  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:04:13.409724  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 18:04:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:04:13.409815  543705 memory.go:191] Add success.
I0320 18:04:13.409824  543705 cpu.go:282] Add success.
I0320 18:04:13.419970  543705 net.go:648] Add success.
I0320 18:04:13.422481  543705 net.go:770] primary dev: ETH0
I0320 18:04:13.422494  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:04:13.422506  543705 net.go:698] Add success.
I0320 18:04:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:04:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:04:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 18:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:04:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 18:04:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:04:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:04:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:04:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:04:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:04:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:04:23.409796  543705 memory.go:184] no items to output this cycle
I0320 18:04:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 18:04:25.509239  543705 disk_info.go:125] begin check local disk info of client
I0320 18:04:25.511813  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:04:25.511820  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4000 0xc0003d4040]
E0320 18:04:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:04:33.409769  543705 memory.go:184] no items to output this cycle
I0320 18:04:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 18:04:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:04:43.409789  543705 memory.go:191] Add success.
I0320 18:04:43.409825  543705 cpu.go:282] Add success.
I0320 18:04:43.419994  543705 net.go:648] Add success.
I0320 18:04:43.423036  543705 net.go:770] primary dev: ETH0
I0320 18:04:43.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:04:43.423064  543705 net.go:698] Add success.
I0320 18:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:04:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:04:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:04:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:04:53.409777  543705 cpu.go:275] no items to output this cycle
I0320 18:04:53.409787  543705 memory.go:184] no items to output this cycle
E0320 18:05:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:05:03.409790  543705 memory.go:184] no items to output this cycle
I0320 18:05:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 18:05:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:05:13.409789  543705 memory.go:191] Add success.
I0320 18:05:13.409790  543705 cpu.go:282] Add success.
W0320 18:05:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:05:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:05:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:05:13.420039  543705 net.go:648] Add success.
I0320 18:05:13.422905  543705 net.go:770] primary dev: ETH0
I0320 18:05:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:05:13.422931  543705 net.go:698] Add success.
I0320 18:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:05:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:05:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 18:05:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:05:14.456560  543705 disk_worker.go:494] system disk:vda1
I0320 18:05:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:05:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:05:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:05:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:05:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:05:23.409778  543705 memory.go:184] no items to output this cycle
I0320 18:05:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 18:05:25.512199  543705 disk_info.go:125] begin check local disk info of client
I0320 18:05:25.514657  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:05:25.514663  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9ac0 0xc0003c9b00]
E0320 18:05:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:05:33.409800  543705 memory.go:184] no items to output this cycle
I0320 18:05:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 18:05:43.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:05:43.409912  543705 memory.go:191] Add success.
I0320 18:05:43.409957  543705 cpu.go:282] Add success.
I0320 18:05:43.419755  543705 net.go:648] Add success.
I0320 18:05:43.422821  543705 net.go:770] primary dev: ETH0
I0320 18:05:43.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:05:43.422847  543705 net.go:698] Add success.
I0320 18:05:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:05:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:05:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:05:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:05:53.409785  543705 memory.go:184] no items to output this cycle
I0320 18:05:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 18:06:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:06:03.409786  543705 memory.go:184] no items to output this cycle
I0320 18:06:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 18:06:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:06:13.409816  543705 memory.go:191] Add success.
I0320 18:06:13.409825  543705 cpu.go:282] Add success.
W0320 18:06:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:06:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:06:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:06:13.420133  543705 net.go:648] Add success.
I0320 18:06:13.423282  543705 net.go:770] primary dev: ETH0
I0320 18:06:13.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:06:13.423307  543705 net.go:698] Add success.
I0320 18:06:13.536752  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"46350abc-16b9-426a-9d46-152add540df1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:06:13.536786  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:06:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:06:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:06:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 18:06:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:06:14.456741  543705 disk_worker.go:494] system disk:vda1
I0320 18:06:14.456776  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:06:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:06:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:06:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:06:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:06:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:06:23.409790  543705 memory.go:184] no items to output this cycle
I0320 18:06:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 18:06:25.515265  543705 disk_info.go:125] begin check local disk info of client
I0320 18:06:25.517826  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:06:25.517834  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dfe80 0xc0003b8000]
E0320 18:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:06:33.409800  543705 memory.go:184] no items to output this cycle
I0320 18:06:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 18:06:38.579862  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:06:38.579869  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:06:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:06:43.410944  543705 memory.go:191] Add success.
I0320 18:06:43.409824  543705 cpu.go:282] Add success.
I0320 18:06:43.420660  543705 net.go:648] Add success.
I0320 18:06:43.423601  543705 net.go:770] primary dev: ETH0
I0320 18:06:43.423615  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:06:43.423627  543705 net.go:698] Add success.
I0320 18:06:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:06:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:06:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:06:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:06:53.409784  543705 memory.go:184] no items to output this cycle
I0320 18:06:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 18:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:07:03.409783  543705 memory.go:184] no items to output this cycle
I0320 18:07:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 18:07:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:07:13.409803  543705 memory.go:191] Add success.
I0320 18:07:13.409813  543705 cpu.go:282] Add success.
W0320 18:07:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:07:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:07:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:07:13.420112  543705 net.go:648] Add success.
I0320 18:07:13.422708  543705 net.go:770] primary dev: ETH0
I0320 18:07:13.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:07:13.422738  543705 net.go:698] Add success.
I0320 18:07:13.453299  543705 event_worker.go:152] Polling the log file for events...
W0320 18:07:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:07:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 18:07:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:07:14.456762  543705 disk_worker.go:494] system disk:vda1
I0320 18:07:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:07:14.457135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:07:14.457143  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:07:14.457148  543705 custom_config.go:64] query custom config with name: gpu
E0320 18:07:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:07:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:07:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:07:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:07:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:07:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:07:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:07:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:07:23.409794  543705 memory.go:184] no items to output this cycle
I0320 18:07:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 18:07:25.518229  543705 disk_info.go:125] begin check local disk info of client
I0320 18:07:25.520716  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:07:25.520723  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049cfc0 0xc00049d000]
E0320 18:07:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:07:33.409772  543705 memory.go:184] no items to output this cycle
I0320 18:07:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 18:07:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:07:43.409794  543705 memory.go:191] Add success.
I0320 18:07:43.409823  543705 cpu.go:282] Add success.
I0320 18:07:43.419723  543705 net.go:648] Add success.
I0320 18:07:43.422259  543705 net.go:770] primary dev: ETH0
I0320 18:07:43.422273  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:07:43.422283  543705 net.go:698] Add success.
I0320 18:07:46.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:07:46.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:07:46.458119  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:07:53.410528  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:07:53.410546  543705 memory.go:184] no items to output this cycle
I0320 18:07:53.410571  543705 cpu.go:275] no items to output this cycle
E0320 18:08:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:08:03.409817  543705 memory.go:184] no items to output this cycle
I0320 18:08:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 18:08:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:08:13.409805  543705 memory.go:191] Add success.
I0320 18:08:13.409805  543705 cpu.go:282] Add success.
W0320 18:08:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:08:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:08:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:08:13.420519  543705 net.go:648] Add success.
I0320 18:08:13.423082  543705 net.go:770] primary dev: ETH0
I0320 18:08:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:08:13.423110  543705 net.go:698] Add success.
I0320 18:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:08:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:08:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 18:08:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:08:14.456480  543705 disk_worker.go:494] system disk:vda1
I0320 18:08:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:08:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:08:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:08:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:08:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:08:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:08:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:08:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 18:08:23.409789  543705 memory.go:184] no items to output this cycle
I0320 18:08:25.521243  543705 disk_info.go:125] begin check local disk info of client
I0320 18:08:25.523714  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:08:25.523720  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329980 0xc0003299c0]
E0320 18:08:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:08:33.409806  543705 memory.go:184] no items to output this cycle
I0320 18:08:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 18:08:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:08:43.409791  543705 memory.go:191] Add success.
I0320 18:08:43.409816  543705 cpu.go:282] Add success.
I0320 18:08:43.419741  543705 net.go:648] Add success.
I0320 18:08:43.422297  543705 net.go:770] primary dev: ETH0
I0320 18:08:43.422312  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:08:43.422325  543705 net.go:698] Add success.
I0320 18:08:46.458016  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:08:46.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:08:46.458136  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:08:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:08:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 18:08:53.409798  543705 memory.go:184] no items to output this cycle
E0320 18:09:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:09:03.409797  543705 memory.go:184] no items to output this cycle
I0320 18:09:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 18:09:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:09:13.409819  543705 memory.go:191] Add success.
I0320 18:09:13.409840  543705 cpu.go:282] Add success.
W0320 18:09:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:09:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:09:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:09:13.420108  543705 net.go:648] Add success.
I0320 18:09:13.423118  543705 net.go:770] primary dev: ETH0
I0320 18:09:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:09:13.423149  543705 net.go:698] Add success.
I0320 18:09:13.514788  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"821aa077-522f-4537-891b-befb3e6c0587","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:09:13.514823  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:09:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:09:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:09:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 18:09:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:09:14.456654  543705 disk_worker.go:494] system disk:vda1
I0320 18:09:14.456684  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:09:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:09:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:09:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:09:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:09:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:09:23.409777  543705 memory.go:184] no items to output this cycle
I0320 18:09:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 18:09:25.523799  543705 disk_info.go:125] begin check local disk info of client
I0320 18:09:25.526226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:09:25.526233  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396800 0xc000396840]
E0320 18:09:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:09:33.409808  543705 memory.go:184] no items to output this cycle
I0320 18:09:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 18:09:38.580008  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:09:38.580015  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:09:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:09:43.410631  543705 memory.go:191] Add success.
I0320 18:09:43.409816  543705 cpu.go:282] Add success.
I0320 18:09:43.420339  543705 net.go:648] Add success.
I0320 18:09:43.423182  543705 net.go:770] primary dev: ETH0
I0320 18:09:43.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:09:43.423211  543705 net.go:698] Add success.
I0320 18:09:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:09:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:09:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:09:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:09:53.409779  543705 memory.go:184] no items to output this cycle
I0320 18:09:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 18:10:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:10:03.409812  543705 memory.go:184] no items to output this cycle
I0320 18:10:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 18:10:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:10:13.409781  543705 memory.go:191] Add success.
W0320 18:10:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:10:13.409809  543705 cpu.go:282] Add success.
W0320 18:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:10:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:10:13.420125  543705 net.go:648] Add success.
I0320 18:10:13.422793  543705 net.go:770] primary dev: ETH0
I0320 18:10:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:10:13.422824  543705 net.go:698] Add success.
I0320 18:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:10:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:10:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 18:10:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:10:14.456599  543705 disk_worker.go:494] system disk:vda1
I0320 18:10:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:10:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:10:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:10:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:10:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:10:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:10:23.409772  543705 memory.go:184] no items to output this cycle
I0320 18:10:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 18:10:25.526270  543705 disk_info.go:125] begin check local disk info of client
I0320 18:10:25.528683  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:10:25.528688  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003db640 0xc0003db680]
E0320 18:10:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:10:33.409803  543705 memory.go:184] no items to output this cycle
I0320 18:10:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 18:10:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:10:43.409774  543705 memory.go:191] Add success.
I0320 18:10:43.409806  543705 cpu.go:282] Add success.
I0320 18:10:43.419842  543705 net.go:648] Add success.
I0320 18:10:43.422710  543705 net.go:770] primary dev: ETH0
I0320 18:10:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:10:43.422741  543705 net.go:698] Add success.
I0320 18:10:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:10:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:10:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:10:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:10:53.409815  543705 memory.go:184] no items to output this cycle
I0320 18:10:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 18:11:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:11:03.409809  543705 memory.go:184] no items to output this cycle
I0320 18:11:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 18:11:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:11:13.409780  543705 memory.go:191] Add success.
I0320 18:11:13.409796  543705 cpu.go:282] Add success.
W0320 18:11:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:11:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:11:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:11:13.420084  543705 net.go:648] Add success.
I0320 18:11:13.423153  543705 net.go:770] primary dev: ETH0
I0320 18:11:13.423167  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:11:13.423179  543705 net.go:698] Add success.
I0320 18:11:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:11:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:11:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 18:11:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:11:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 18:11:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:11:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:11:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:11:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:11:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:11:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:11:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:11:23.409773  543705 memory.go:184] no items to output this cycle
I0320 18:11:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 18:11:25.529287  543705 disk_info.go:125] begin check local disk info of client
I0320 18:11:25.531909  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:11:25.531916  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a5980 0xc0004a59c0]
E0320 18:11:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:11:33.409778  543705 memory.go:184] no items to output this cycle
I0320 18:11:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 18:11:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:11:43.409816  543705 memory.go:191] Add success.
I0320 18:11:43.409825  543705 cpu.go:282] Add success.
I0320 18:11:43.420159  543705 net.go:648] Add success.
I0320 18:11:43.422974  543705 net.go:770] primary dev: ETH0
I0320 18:11:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:11:43.423010  543705 net.go:698] Add success.
I0320 18:11:46.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:11:46.458123  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:11:46.458162  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:11:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:11:53.409789  543705 memory.go:184] no items to output this cycle
I0320 18:11:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 18:12:03.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:12:03.409918  543705 memory.go:184] no items to output this cycle
I0320 18:12:03.409989  543705 cpu.go:275] no items to output this cycle
E0320 18:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:12:13.409809  543705 memory.go:191] Add success.
I0320 18:12:13.409813  543705 cpu.go:282] Add success.
W0320 18:12:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:12:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:12:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:12:13.420068  543705 net.go:648] Add success.
I0320 18:12:13.423163  543705 net.go:770] primary dev: ETH0
I0320 18:12:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:12:13.423190  543705 net.go:698] Add success.
I0320 18:12:13.463982  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0f866a0f-eaf0-43bb-909f-24ab4283998f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:12:13.464013  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 18:12:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:12:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 18:12:14.455204  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:12:14.456810  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:12:14.456819  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:12:14.456825  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:12:14.456834  543705 disk_worker.go:494] system disk:vda1
I0320 18:12:14.456863  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:12:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:12:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:12:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:12:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:12:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:12:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:12:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:12:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:12:23.409795  543705 memory.go:184] no items to output this cycle
I0320 18:12:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 18:12:25.532299  543705 disk_info.go:125] begin check local disk info of client
I0320 18:12:25.534720  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:12:25.534728  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048aac0 0xc00048ab00]
E0320 18:12:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:12:33.409811  543705 memory.go:184] no items to output this cycle
I0320 18:12:33.409824  543705 cpu.go:275] no items to output this cycle
I0320 18:12:38.580606  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:12:38.580613  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:12:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:12:43.410563  543705 memory.go:191] Add success.
I0320 18:12:43.409791  543705 cpu.go:282] Add success.
I0320 18:12:43.420330  543705 net.go:648] Add success.
I0320 18:12:43.422949  543705 net.go:770] primary dev: ETH0
I0320 18:12:43.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:12:43.422974  543705 net.go:698] Add success.
I0320 18:12:46.458022  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:12:46.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:12:46.458138  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:12:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:12:53.409779  543705 memory.go:184] no items to output this cycle
I0320 18:12:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 18:13:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:13:03.409818  543705 memory.go:184] no items to output this cycle
I0320 18:13:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 18:13:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:13:13.409792  543705 memory.go:191] Add success.
I0320 18:13:13.409804  543705 cpu.go:282] Add success.
W0320 18:13:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:13:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:13:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:13:13.420119  543705 net.go:648] Add success.
I0320 18:13:13.422708  543705 net.go:770] primary dev: ETH0
I0320 18:13:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:13:13.422733  543705 net.go:698] Add success.
I0320 18:13:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:13:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:13:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 18:13:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:13:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 18:13:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:13:16.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:13:16.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:13:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:13:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:13:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:13:23.409761  543705 memory.go:184] no items to output this cycle
I0320 18:13:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 18:13:25.534807  543705 disk_info.go:125] begin check local disk info of client
I0320 18:13:25.537270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:13:25.537287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa900 0xc0001aa940]
E0320 18:13:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:13:33.409781  543705 memory.go:184] no items to output this cycle
I0320 18:13:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 18:13:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:13:43.409790  543705 memory.go:191] Add success.
I0320 18:13:43.409791  543705 cpu.go:282] Add success.
I0320 18:13:43.419956  543705 net.go:648] Add success.
I0320 18:13:43.422465  543705 net.go:770] primary dev: ETH0
I0320 18:13:43.422479  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:13:43.422492  543705 net.go:698] Add success.
I0320 18:13:46.457608  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:13:46.457686  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:13:46.457716  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:13:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:13:53.409783  543705 memory.go:184] no items to output this cycle
I0320 18:13:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 18:14:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:14:03.409810  543705 memory.go:184] no items to output this cycle
I0320 18:14:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 18:14:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:14:13.409815  543705 memory.go:191] Add success.
I0320 18:14:13.409821  543705 cpu.go:282] Add success.
W0320 18:14:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:14:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:14:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:14:13.420131  543705 net.go:648] Add success.
I0320 18:14:13.423171  543705 net.go:770] primary dev: ETH0
I0320 18:14:13.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:14:13.423201  543705 net.go:698] Add success.
I0320 18:14:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:14:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:14:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 18:14:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:14:14.456572  543705 disk_worker.go:494] system disk:vda1
I0320 18:14:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:14:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:14:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:14:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:14:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:14:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:14:23.409792  543705 memory.go:184] no items to output this cycle
I0320 18:14:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 18:14:25.537325  543705 disk_info.go:125] begin check local disk info of client
I0320 18:14:25.539893  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:14:25.539899  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f48c0 0xc0003f4900]
E0320 18:14:33.410016  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:14:33.410033  543705 memory.go:184] no items to output this cycle
I0320 18:14:33.410034  543705 cpu.go:275] no items to output this cycle
E0320 18:14:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:14:43.409794  543705 memory.go:191] Add success.
I0320 18:14:43.409794  543705 cpu.go:282] Add success.
I0320 18:14:43.419978  543705 net.go:648] Add success.
I0320 18:14:43.422807  543705 net.go:770] primary dev: ETH0
I0320 18:14:43.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:14:43.422838  543705 net.go:698] Add success.
I0320 18:14:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:14:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:14:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:14:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:14:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 18:14:53.409800  543705 memory.go:184] no items to output this cycle
E0320 18:15:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:15:03.409788  543705 memory.go:184] no items to output this cycle
I0320 18:15:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 18:15:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:15:13.409817  543705 memory.go:191] Add success.
I0320 18:15:13.409825  543705 cpu.go:282] Add success.
W0320 18:15:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:15:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:15:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:15:13.420139  543705 net.go:648] Add success.
I0320 18:15:13.422924  543705 net.go:770] primary dev: ETH0
I0320 18:15:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:15:13.422952  543705 net.go:698] Add success.
I0320 18:15:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:15:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:15:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0320 18:15:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:15:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 18:15:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:15:14.552997  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9a01fe42-f3a8-485a-97ab-019a8be009a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:15:14.553030  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:15:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:15:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:15:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:15:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:15:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:15:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:15:23.409770  543705 memory.go:184] no items to output this cycle
I0320 18:15:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 18:15:25.540346  543705 disk_info.go:125] begin check local disk info of client
I0320 18:15:25.542806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:15:25.542811  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470780 0xc0004707c0]
E0320 18:15:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:15:33.409776  543705 memory.go:184] no items to output this cycle
I0320 18:15:33.409794  543705 cpu.go:275] no items to output this cycle
I0320 18:15:38.581673  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:15:38.581680  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:15:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:15:43.410746  543705 memory.go:191] Add success.
I0320 18:15:43.409819  543705 cpu.go:282] Add success.
I0320 18:15:43.420435  543705 net.go:648] Add success.
I0320 18:15:43.423690  543705 net.go:770] primary dev: ETH0
I0320 18:15:43.423703  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:15:43.423715  543705 net.go:698] Add success.
I0320 18:15:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:15:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:15:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:15:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:15:53.409788  543705 memory.go:184] no items to output this cycle
I0320 18:15:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:16:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:16:03.409776  543705 memory.go:184] no items to output this cycle
I0320 18:16:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 18:16:13.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:16:13.409922  543705 memory.go:191] Add success.
W0320 18:16:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:16:13.409964  543705 cpu.go:282] Add success.
W0320 18:16:13.409971  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:16:13.409974  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:16:13.419748  543705 net.go:648] Add success.
I0320 18:16:13.422561  543705 net.go:770] primary dev: ETH0
I0320 18:16:13.422583  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:16:13.422596  543705 net.go:698] Add success.
I0320 18:16:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:16:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:16:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 18:16:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:16:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 18:16:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:16:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:16:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:16:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:16:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:16:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:16:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:16:23.409771  543705 memory.go:184] no items to output this cycle
I0320 18:16:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 18:16:25.543360  543705 disk_info.go:125] begin check local disk info of client
I0320 18:16:25.545816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:16:25.545822  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470440 0xc000470480]
E0320 18:16:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:16:33.409780  543705 memory.go:184] no items to output this cycle
I0320 18:16:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 18:16:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:16:43.409786  543705 memory.go:191] Add success.
I0320 18:16:43.409805  543705 cpu.go:282] Add success.
I0320 18:16:43.419844  543705 net.go:648] Add success.
I0320 18:16:43.422723  543705 net.go:770] primary dev: ETH0
I0320 18:16:43.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:16:43.422747  543705 net.go:698] Add success.
I0320 18:16:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:16:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:16:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:16:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:16:53.409816  543705 memory.go:184] no items to output this cycle
I0320 18:16:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 18:17:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:17:03.409791  543705 memory.go:184] no items to output this cycle
I0320 18:17:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 18:17:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:17:13.409794  543705 memory.go:191] Add success.
I0320 18:17:13.409794  543705 cpu.go:282] Add success.
W0320 18:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:17:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:17:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:17:13.420356  543705 net.go:648] Add success.
I0320 18:17:13.422980  543705 net.go:770] primary dev: ETH0
I0320 18:17:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:17:13.423004  543705 net.go:698] Add success.
I0320 18:17:13.452768  543705 event_worker.go:152] Polling the log file for events...
W0320 18:17:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:17:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 18:17:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:17:14.456923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:17:14.456931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:17:14.456937  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:17:14.456985  543705 disk_worker.go:494] system disk:vda1
I0320 18:17:14.457025  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:17:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:17:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:17:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:17:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:17:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:17:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:17:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:17:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:17:23.409788  543705 memory.go:184] no items to output this cycle
I0320 18:17:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 18:17:25.545904  543705 disk_info.go:125] begin check local disk info of client
I0320 18:17:25.548354  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:17:25.548361  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab780 0xc0001ab7c0]
E0320 18:17:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:17:33.409800  543705 memory.go:184] no items to output this cycle
I0320 18:17:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 18:17:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:17:43.409783  543705 memory.go:191] Add success.
I0320 18:17:43.409797  543705 cpu.go:282] Add success.
I0320 18:17:43.419899  543705 net.go:648] Add success.
I0320 18:17:43.422602  543705 net.go:770] primary dev: ETH0
I0320 18:17:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:17:43.422627  543705 net.go:698] Add success.
I0320 18:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:17:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:17:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:17:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:17:53.409788  543705 memory.go:184] no items to output this cycle
I0320 18:17:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 18:18:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:18:03.409781  543705 memory.go:184] no items to output this cycle
I0320 18:18:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 18:18:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:18:13.409805  543705 memory.go:191] Add success.
I0320 18:18:13.409810  543705 cpu.go:282] Add success.
W0320 18:18:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:18:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:18:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:18:13.419737  543705 net.go:648] Add success.
I0320 18:18:13.422999  543705 net.go:770] primary dev: ETH0
I0320 18:18:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:18:13.423024  543705 net.go:698] Add success.
I0320 18:18:13.469518  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a496fef-0c23-41c3-aa28-ab845115cdd3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:18:13.469549  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:18:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:18:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:18:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0320 18:18:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:18:14.456483  543705 disk_worker.go:494] system disk:vda1
I0320 18:18:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:18:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:18:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:18:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:18:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:18:23.409773  543705 cpu.go:275] no items to output this cycle
I0320 18:18:23.409774  543705 memory.go:184] no items to output this cycle
I0320 18:18:25.548389  543705 disk_info.go:125] begin check local disk info of client
I0320 18:18:25.550857  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:18:25.550863  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048ac00 0xc00048ac40]
E0320 18:18:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:18:33.409798  543705 memory.go:184] no items to output this cycle
I0320 18:18:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 18:18:38.582621  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:18:38.582628  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:18:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:18:43.410621  543705 memory.go:191] Add success.
I0320 18:18:43.409782  543705 cpu.go:282] Add success.
I0320 18:18:43.420311  543705 net.go:648] Add success.
I0320 18:18:43.422805  543705 net.go:770] primary dev: ETH0
I0320 18:18:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:18:43.422831  543705 net.go:698] Add success.
I0320 18:18:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:18:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:18:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:18:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:18:53.409776  543705 memory.go:184] no items to output this cycle
I0320 18:18:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 18:19:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:19:03.409802  543705 memory.go:184] no items to output this cycle
I0320 18:19:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 18:19:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:19:13.409778  543705 memory.go:191] Add success.
I0320 18:19:13.409796  543705 cpu.go:282] Add success.
W0320 18:19:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:19:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:19:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:19:13.420224  543705 net.go:648] Add success.
I0320 18:19:13.422868  543705 net.go:770] primary dev: ETH0
I0320 18:19:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:19:13.422896  543705 net.go:698] Add success.
I0320 18:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:19:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:19:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 18:19:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:19:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 18:19:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:19:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:19:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:19:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:19:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:19:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:19:23.409804  543705 memory.go:184] no items to output this cycle
I0320 18:19:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 18:19:25.551397  543705 disk_info.go:125] begin check local disk info of client
I0320 18:19:25.553864  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:19:25.553870  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a840 0xc00048a880]
E0320 18:19:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:19:33.409768  543705 memory.go:184] no items to output this cycle
I0320 18:19:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 18:19:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:19:43.409788  543705 memory.go:191] Add success.
I0320 18:19:43.409804  543705 cpu.go:282] Add success.
I0320 18:19:43.419905  543705 net.go:648] Add success.
I0320 18:19:43.423005  543705 net.go:770] primary dev: ETH0
I0320 18:19:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:19:43.423032  543705 net.go:698] Add success.
I0320 18:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:19:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:19:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:19:53.409783  543705 memory.go:184] no items to output this cycle
I0320 18:19:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 18:20:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:20:03.409785  543705 memory.go:184] no items to output this cycle
I0320 18:20:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 18:20:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:20:13.409788  543705 cpu.go:282] Add success.
I0320 18:20:13.409794  543705 memory.go:191] Add success.
W0320 18:20:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:20:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:20:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:20:13.420203  543705 net.go:648] Add success.
I0320 18:20:13.423259  543705 net.go:770] primary dev: ETH0
I0320 18:20:13.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:20:13.423284  543705 net.go:698] Add success.
I0320 18:20:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:20:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:20:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 18:20:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:20:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 18:20:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:20:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:20:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:20:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:20:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:20:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:20:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:20:23.409800  543705 memory.go:184] no items to output this cycle
I0320 18:20:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 18:20:25.554414  543705 disk_info.go:125] begin check local disk info of client
I0320 18:20:25.556872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:20:25.556878  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007abc0 0xc00007ac00]
E0320 18:20:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:20:33.409769  543705 memory.go:184] no items to output this cycle
I0320 18:20:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 18:20:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:20:43.409778  543705 memory.go:191] Add success.
I0320 18:20:43.409795  543705 cpu.go:282] Add success.
I0320 18:20:43.419885  543705 net.go:648] Add success.
I0320 18:20:43.422653  543705 net.go:770] primary dev: ETH0
I0320 18:20:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:20:43.422680  543705 net.go:698] Add success.
I0320 18:20:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:20:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:20:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:20:53.410344  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:20:53.410347  543705 cpu.go:275] no items to output this cycle
I0320 18:20:53.410361  543705 memory.go:184] no items to output this cycle
E0320 18:21:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:21:03.409785  543705 memory.go:184] no items to output this cycle
I0320 18:21:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:21:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:21:13.409788  543705 memory.go:191] Add success.
I0320 18:21:13.409788  543705 cpu.go:282] Add success.
W0320 18:21:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:21:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:21:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:21:13.420159  543705 net.go:648] Add success.
I0320 18:21:13.423450  543705 net.go:770] primary dev: ETH0
I0320 18:21:13.423463  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:21:13.423475  543705 net.go:698] Add success.
I0320 18:21:13.480747  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0d902f15-6cbf-4e29-8fce-26d62e9894a3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:21:13.480781  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:21:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:21:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:21:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 18:21:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:21:14.456674  543705 disk_worker.go:494] system disk:vda1
I0320 18:21:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:21:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:21:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:21:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:21:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:21:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:21:23.410280  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:21:23.410300  543705 memory.go:184] no items to output this cycle
I0320 18:21:23.410301  543705 cpu.go:275] no items to output this cycle
I0320 18:21:25.557425  543705 disk_info.go:125] begin check local disk info of client
I0320 18:21:25.559947  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:21:25.559953  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaec0 0xc0001aaf00]
E0320 18:21:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:21:33.409781  543705 memory.go:184] no items to output this cycle
I0320 18:21:33.409782  543705 cpu.go:275] no items to output this cycle
I0320 18:21:38.583671  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:21:38.583678  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:21:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:21:43.410794  543705 memory.go:191] Add success.
I0320 18:21:43.409819  543705 cpu.go:282] Add success.
I0320 18:21:43.420543  543705 net.go:648] Add success.
I0320 18:21:43.423082  543705 net.go:770] primary dev: ETH0
I0320 18:21:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:21:43.423112  543705 net.go:698] Add success.
I0320 18:21:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:21:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:21:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:21:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:21:53.409789  543705 memory.go:184] no items to output this cycle
I0320 18:21:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 18:22:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:22:03.409782  543705 memory.go:184] no items to output this cycle
I0320 18:22:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 18:22:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:22:13.409789  543705 memory.go:191] Add success.
I0320 18:22:13.409790  543705 cpu.go:282] Add success.
W0320 18:22:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:22:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:22:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:22:13.420183  543705 net.go:648] Add success.
I0320 18:22:13.423637  543705 net.go:770] primary dev: ETH0
I0320 18:22:13.423649  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:22:13.423660  543705 net.go:698] Add success.
W0320 18:22:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:22:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 18:22:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:22:14.456908  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:22:14.456918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:22:14.456923  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:22:14.456991  543705 disk_worker.go:494] system disk:vda1
I0320 18:22:14.457032  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:22:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:22:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:22:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:22:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:22:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:22:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:22:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:22:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:22:23.409805  543705 memory.go:184] no items to output this cycle
I0320 18:22:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 18:22:25.560433  543705 disk_info.go:125] begin check local disk info of client
I0320 18:22:25.562919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:22:25.562925  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c47c0 0xc0000c4800]
E0320 18:22:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:22:33.409768  543705 memory.go:184] no items to output this cycle
I0320 18:22:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 18:22:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:22:43.409792  543705 memory.go:191] Add success.
I0320 18:22:43.409794  543705 cpu.go:282] Add success.
I0320 18:22:43.419854  543705 net.go:648] Add success.
I0320 18:22:43.422767  543705 net.go:770] primary dev: ETH0
I0320 18:22:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:22:43.422792  543705 net.go:698] Add success.
I0320 18:22:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:22:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:22:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:22:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:22:53.410259  543705 cpu.go:275] no items to output this cycle
I0320 18:22:53.410261  543705 memory.go:184] no items to output this cycle
E0320 18:23:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:23:03.409791  543705 cpu.go:275] no items to output this cycle
I0320 18:23:03.409794  543705 memory.go:184] no items to output this cycle
E0320 18:23:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:23:13.409779  543705 memory.go:191] Add success.
W0320 18:23:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:23:13.409812  543705 cpu.go:282] Add success.
W0320 18:23:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:23:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:23:13.420337  543705 net.go:648] Add success.
I0320 18:23:13.422904  543705 net.go:770] primary dev: ETH0
I0320 18:23:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:23:13.422928  543705 net.go:698] Add success.
I0320 18:23:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:23:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:23:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 18:23:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:23:14.456567  543705 disk_worker.go:494] system disk:vda1
I0320 18:23:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:23:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:23:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:23:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:23:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:23:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:23:23.409806  543705 memory.go:184] no items to output this cycle
I0320 18:23:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 18:23:25.563456  543705 disk_info.go:125] begin check local disk info of client
I0320 18:23:25.565987  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:23:25.565993  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe0c0 0xc0003fe100]
E0320 18:23:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:23:33.409771  543705 memory.go:184] no items to output this cycle
I0320 18:23:33.409779  543705 cpu.go:275] no items to output this cycle
E0320 18:23:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:23:43.409813  543705 memory.go:191] Add success.
I0320 18:23:43.409816  543705 cpu.go:282] Add success.
I0320 18:23:43.419925  543705 net.go:648] Add success.
I0320 18:23:43.422591  543705 net.go:770] primary dev: ETH0
I0320 18:23:43.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:23:43.422617  543705 net.go:698] Add success.
I0320 18:23:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:23:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:23:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:23:53.409785  543705 memory.go:184] no items to output this cycle
I0320 18:23:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 18:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:24:03.409777  543705 memory.go:184] no items to output this cycle
I0320 18:24:03.409826  543705 cpu.go:275] no items to output this cycle
E0320 18:24:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:24:13.409779  543705 memory.go:191] Add success.
W0320 18:24:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:24:13.409811  543705 cpu.go:282] Add success.
W0320 18:24:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:24:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:24:13.420396  543705 net.go:648] Add success.
I0320 18:24:13.423172  543705 net.go:770] primary dev: ETH0
I0320 18:24:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:24:13.423196  543705 net.go:698] Add success.
I0320 18:24:13.463231  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8d95c754-579e-4089-a4cd-7e2242bf75bb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:24:13.463262  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:24:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:24:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:24:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 18:24:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:24:14.456673  543705 disk_worker.go:494] system disk:vda1
I0320 18:24:14.456701  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:24:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:24:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:24:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:24:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:24:23.409804  543705 memory.go:184] no items to output this cycle
I0320 18:24:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 18:24:25.566076  543705 disk_info.go:125] begin check local disk info of client
I0320 18:24:25.568523  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:24:25.568528  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5ac0 0xc0000c5b00]
E0320 18:24:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:24:33.409773  543705 memory.go:184] no items to output this cycle
I0320 18:24:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 18:24:38.583819  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:24:38.583825  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:24:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:24:43.410692  543705 memory.go:191] Add success.
I0320 18:24:43.409827  543705 cpu.go:282] Add success.
I0320 18:24:43.420423  543705 net.go:648] Add success.
I0320 18:24:43.423134  543705 net.go:770] primary dev: ETH0
I0320 18:24:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:24:43.423159  543705 net.go:698] Add success.
I0320 18:24:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:24:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:24:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:24:53.409787  543705 memory.go:184] no items to output this cycle
I0320 18:24:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 18:25:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:25:03.409804  543705 memory.go:184] no items to output this cycle
I0320 18:25:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 18:25:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:25:13.409794  543705 memory.go:191] Add success.
I0320 18:25:13.409816  543705 cpu.go:282] Add success.
W0320 18:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:25:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:25:13.420084  543705 net.go:648] Add success.
I0320 18:25:13.422916  543705 net.go:770] primary dev: ETH0
I0320 18:25:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:25:13.422953  543705 net.go:698] Add success.
I0320 18:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:25:14.455244  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:25:14.455316  543705 disk_worker.go:708] disk space is not compliant
W0320 18:25:14.455323  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:25:14.457492  543705 disk_worker.go:494] system disk:vda1
I0320 18:25:14.457534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:25:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:25:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:25:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:25:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:25:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:25:23.409818  543705 memory.go:184] no items to output this cycle
I0320 18:25:23.409830  543705 cpu.go:275] no items to output this cycle
I0320 18:25:25.569527  543705 disk_info.go:125] begin check local disk info of client
I0320 18:25:25.572392  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:25:25.572399  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac00 0xc00007ac40]
E0320 18:25:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:25:33.409774  543705 memory.go:184] no items to output this cycle
I0320 18:25:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 18:25:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:25:43.409802  543705 memory.go:191] Add success.
I0320 18:25:43.409805  543705 cpu.go:282] Add success.
I0320 18:25:43.419990  543705 net.go:648] Add success.
I0320 18:25:43.422697  543705 net.go:770] primary dev: ETH0
I0320 18:25:43.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:25:43.422726  543705 net.go:698] Add success.
I0320 18:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:25:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:25:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:25:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:25:53.409797  543705 cpu.go:275] no items to output this cycle
I0320 18:25:53.409805  543705 memory.go:184] no items to output this cycle
E0320 18:26:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:26:03.409784  543705 memory.go:184] no items to output this cycle
I0320 18:26:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 18:26:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:26:13.409820  543705 memory.go:191] Add success.
I0320 18:26:13.409830  543705 cpu.go:282] Add success.
W0320 18:26:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:26:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:26:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:26:13.420123  543705 net.go:648] Add success.
I0320 18:26:13.422859  543705 net.go:770] primary dev: ETH0
I0320 18:26:13.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:26:13.422884  543705 net.go:698] Add success.
I0320 18:26:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:26:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:26:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 18:26:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:26:14.456828  543705 disk_worker.go:494] system disk:vda1
I0320 18:26:14.456859  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:26:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:26:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:26:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:26:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:26:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:26:23.409782  543705 memory.go:184] no items to output this cycle
I0320 18:26:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 18:26:25.572485  543705 disk_info.go:125] begin check local disk info of client
I0320 18:26:25.575117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:26:25.575123  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025c080 0xc00025c0c0]
E0320 18:26:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:26:33.409805  543705 memory.go:184] no items to output this cycle
I0320 18:26:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 18:26:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:26:43.409787  543705 memory.go:191] Add success.
I0320 18:26:43.409786  543705 cpu.go:282] Add success.
I0320 18:26:43.419876  543705 net.go:648] Add success.
I0320 18:26:43.422868  543705 net.go:770] primary dev: ETH0
I0320 18:26:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:26:43.422893  543705 net.go:698] Add success.
I0320 18:26:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:26:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:26:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:26:53.409789  543705 memory.go:184] no items to output this cycle
I0320 18:26:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 18:27:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:27:03.409777  543705 memory.go:184] no items to output this cycle
I0320 18:27:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 18:27:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:27:13.409808  543705 memory.go:191] Add success.
I0320 18:27:13.409815  543705 cpu.go:282] Add success.
W0320 18:27:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:27:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:27:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:27:13.420133  543705 net.go:648] Add success.
I0320 18:27:13.422985  543705 net.go:770] primary dev: ETH0
I0320 18:27:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:27:13.423011  543705 net.go:698] Add success.
I0320 18:27:13.429241  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 18:27:13.453414  543705 event_worker.go:152] Polling the log file for events...
I0320 18:27:13.464295  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7dfcafc1-e6fc-414c-91b5-17abccf522f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:27:13.464337  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 18:27:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:27:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 18:27:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:27:14.456117  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:27:14.456138  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:27:14.456144  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:27:14.456428  543705 disk_worker.go:494] system disk:vda1
I0320 18:27:14.456459  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:27:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:27:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 18:27:16.458092  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:27:16.458097  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:27:16.458154  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:27:16.458172  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:27:16.472534  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:27:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:27:23.409777  543705 memory.go:184] no items to output this cycle
I0320 18:27:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 18:27:25.575509  543705 disk_info.go:125] begin check local disk info of client
I0320 18:27:25.578015  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:27:25.578020  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbf00 0xc0001fbf40]
E0320 18:27:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:27:33.409797  543705 memory.go:184] no items to output this cycle
I0320 18:27:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 18:27:38.584616  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:27:38.584623  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:27:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:27:43.410689  543705 memory.go:191] Add success.
I0320 18:27:43.409784  543705 cpu.go:282] Add success.
I0320 18:27:43.420377  543705 net.go:648] Add success.
I0320 18:27:43.423345  543705 net.go:770] primary dev: ETH0
I0320 18:27:43.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:27:43.423378  543705 net.go:698] Add success.
I0320 18:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:27:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:27:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:27:53.409776  543705 memory.go:184] no items to output this cycle
I0320 18:27:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 18:28:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:28:03.409788  543705 memory.go:184] no items to output this cycle
I0320 18:28:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 18:28:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:28:13.409811  543705 memory.go:191] Add success.
I0320 18:28:13.409819  543705 cpu.go:282] Add success.
W0320 18:28:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:28:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:28:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:28:13.420132  543705 net.go:648] Add success.
I0320 18:28:13.422612  543705 net.go:770] primary dev: ETH0
I0320 18:28:13.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:28:13.422639  543705 net.go:698] Add success.
I0320 18:28:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:28:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:28:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 18:28:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:28:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 18:28:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:28:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:28:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:28:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:28:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:28:23.409780  543705 memory.go:184] no items to output this cycle
I0320 18:28:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 18:28:25.578524  543705 disk_info.go:125] begin check local disk info of client
I0320 18:28:25.581084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:28:25.581091  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb380 0xc0001fb3c0]
E0320 18:28:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:28:33.409781  543705 memory.go:184] no items to output this cycle
I0320 18:28:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 18:28:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:28:43.409814  543705 memory.go:191] Add success.
I0320 18:28:43.409828  543705 cpu.go:282] Add success.
I0320 18:28:43.420072  543705 net.go:648] Add success.
I0320 18:28:43.422873  543705 net.go:770] primary dev: ETH0
I0320 18:28:43.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:28:43.422902  543705 net.go:698] Add success.
I0320 18:28:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:28:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:28:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:28:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:28:53.409779  543705 memory.go:184] no items to output this cycle
I0320 18:28:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 18:29:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:29:03.409773  543705 memory.go:184] no items to output this cycle
I0320 18:29:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 18:29:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:29:13.409789  543705 memory.go:191] Add success.
I0320 18:29:13.409795  543705 cpu.go:282] Add success.
W0320 18:29:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:29:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:29:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:29:13.420068  543705 net.go:648] Add success.
I0320 18:29:13.422771  543705 net.go:770] primary dev: ETH0
I0320 18:29:13.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:29:13.422799  543705 net.go:698] Add success.
I0320 18:29:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:29:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:29:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 18:29:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:29:14.456606  543705 disk_worker.go:494] system disk:vda1
I0320 18:29:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:29:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:29:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:29:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:29:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:29:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:29:23.409802  543705 memory.go:184] no items to output this cycle
I0320 18:29:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 18:29:25.581173  543705 disk_info.go:125] begin check local disk info of client
I0320 18:29:25.583641  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:29:25.583647  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387680 0xc0003876c0]
E0320 18:29:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:29:33.409797  543705 memory.go:184] no items to output this cycle
I0320 18:29:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 18:29:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:29:43.409812  543705 memory.go:191] Add success.
I0320 18:29:43.409829  543705 cpu.go:282] Add success.
I0320 18:29:43.420046  543705 net.go:648] Add success.
I0320 18:29:43.422763  543705 net.go:770] primary dev: ETH0
I0320 18:29:43.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:29:43.422788  543705 net.go:698] Add success.
I0320 18:29:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:29:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:29:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:29:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:29:53.409786  543705 memory.go:184] no items to output this cycle
I0320 18:29:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 18:30:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:30:03.409778  543705 memory.go:184] no items to output this cycle
I0320 18:30:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 18:30:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:30:13.409799  543705 memory.go:191] Add success.
I0320 18:30:13.409802  543705 cpu.go:282] Add success.
W0320 18:30:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:30:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:30:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:30:13.420061  543705 net.go:648] Add success.
I0320 18:30:13.422962  543705 net.go:770] primary dev: ETH0
I0320 18:30:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:30:13.422988  543705 net.go:698] Add success.
I0320 18:30:13.464790  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3423e485-1b24-4839-ab67-a7e0ce8a211f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:30:13.464825  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:30:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:30:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:30:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 18:30:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:30:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 18:30:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:30:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:30:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:30:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:30:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:30:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:30:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:30:23.409783  543705 memory.go:184] no items to output this cycle
I0320 18:30:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 18:30:25.584611  543705 disk_info.go:125] begin check local disk info of client
I0320 18:30:25.587230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:30:25.587237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b000 0xc00007b040]
E0320 18:30:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:30:33.409764  543705 memory.go:184] no items to output this cycle
I0320 18:30:33.409803  543705 cpu.go:275] no items to output this cycle
I0320 18:30:38.585692  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:30:38.585699  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:30:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:30:43.410638  543705 memory.go:191] Add success.
I0320 18:30:43.409821  543705 cpu.go:282] Add success.
I0320 18:30:43.420318  543705 net.go:648] Add success.
I0320 18:30:43.423032  543705 net.go:770] primary dev: ETH0
I0320 18:30:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:30:43.423057  543705 net.go:698] Add success.
I0320 18:30:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:30:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:30:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:30:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:30:53.409789  543705 memory.go:184] no items to output this cycle
I0320 18:30:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:31:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:31:03.409785  543705 memory.go:184] no items to output this cycle
I0320 18:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 18:31:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:31:13.409823  543705 memory.go:191] Add success.
I0320 18:31:13.409823  543705 cpu.go:282] Add success.
W0320 18:31:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:31:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:31:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:31:13.420156  543705 net.go:648] Add success.
I0320 18:31:13.422732  543705 net.go:770] primary dev: ETH0
I0320 18:31:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:31:13.422757  543705 net.go:698] Add success.
I0320 18:31:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:31:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:31:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 18:31:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:31:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 18:31:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:31:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:31:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:31:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:31:23.409771  543705 memory.go:184] no items to output this cycle
I0320 18:31:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 18:31:25.587598  543705 disk_info.go:125] begin check local disk info of client
I0320 18:31:25.590166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:31:25.590173  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328000 0xc000328040]
E0320 18:31:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:31:33.409780  543705 memory.go:184] no items to output this cycle
I0320 18:31:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:31:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:31:43.409780  543705 memory.go:191] Add success.
I0320 18:31:43.409819  543705 cpu.go:282] Add success.
I0320 18:31:43.419857  543705 net.go:648] Add success.
I0320 18:31:43.422999  543705 net.go:770] primary dev: ETH0
I0320 18:31:43.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:31:43.423024  543705 net.go:698] Add success.
I0320 18:31:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:31:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:31:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:31:53.409791  543705 memory.go:184] no items to output this cycle
I0320 18:31:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 18:32:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:32:03.409805  543705 memory.go:184] no items to output this cycle
I0320 18:32:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 18:32:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:32:13.409779  543705 memory.go:191] Add success.
W0320 18:32:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:32:13.409813  543705 cpu.go:282] Add success.
W0320 18:32:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:32:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:32:13.420093  543705 net.go:648] Add success.
I0320 18:32:13.422984  543705 net.go:770] primary dev: ETH0
I0320 18:32:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:32:13.423009  543705 net.go:698] Add success.
W0320 18:32:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:32:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 18:32:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:32:14.456937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:32:14.456946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:32:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:32:14.457001  543705 disk_worker.go:494] system disk:vda1
I0320 18:32:14.457043  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:32:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:32:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:32:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:32:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:32:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:32:16.458037  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:32:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:32:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:32:23.409781  543705 memory.go:184] no items to output this cycle
I0320 18:32:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 18:32:25.590245  543705 disk_info.go:125] begin check local disk info of client
I0320 18:32:25.592754  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:32:25.592762  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a90c0 0xc0004a9100]
E0320 18:32:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:32:33.409835  543705 memory.go:184] no items to output this cycle
I0320 18:32:33.409908  543705 cpu.go:275] no items to output this cycle
E0320 18:32:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:32:43.409817  543705 memory.go:191] Add success.
I0320 18:32:43.409826  543705 cpu.go:282] Add success.
I0320 18:32:43.419962  543705 net.go:648] Add success.
I0320 18:32:43.422602  543705 net.go:770] primary dev: ETH0
I0320 18:32:43.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:32:43.422626  543705 net.go:698] Add success.
I0320 18:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:32:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:32:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:32:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:32:53.409809  543705 memory.go:184] no items to output this cycle
I0320 18:32:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 18:33:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:33:03.409806  543705 memory.go:184] no items to output this cycle
I0320 18:33:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 18:33:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:33:13.409819  543705 memory.go:191] Add success.
I0320 18:33:13.409831  543705 cpu.go:282] Add success.
W0320 18:33:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:33:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:33:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:33:13.420205  543705 net.go:648] Add success.
I0320 18:33:13.422795  543705 net.go:770] primary dev: ETH0
I0320 18:33:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:33:13.422824  543705 net.go:698] Add success.
I0320 18:33:13.839389  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"88965d1d-e74f-4740-bfd8-42b2d0982577","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:33:13.839422  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:33:14.454684  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:33:14.454895  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:33:14.454906  543705 disk_worker.go:708] disk space is not compliant
W0320 18:33:14.454909  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:33:14.456240  543705 disk_worker.go:494] system disk:vda1
I0320 18:33:14.456286  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:33:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:33:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:33:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:33:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:33:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:33:23.409776  543705 memory.go:184] no items to output this cycle
I0320 18:33:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 18:33:25.593596  543705 disk_info.go:125] begin check local disk info of client
I0320 18:33:25.596015  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:33:25.596021  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486580 0xc0004865c0]
E0320 18:33:33.409901  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:33:33.409922  543705 memory.go:184] no items to output this cycle
I0320 18:33:33.410013  543705 cpu.go:275] no items to output this cycle
I0320 18:33:38.586634  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:33:38.586641  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:33:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:33:43.410683  543705 memory.go:191] Add success.
I0320 18:33:43.409812  543705 cpu.go:282] Add success.
I0320 18:33:43.420370  543705 net.go:648] Add success.
I0320 18:33:43.423119  543705 net.go:770] primary dev: ETH0
I0320 18:33:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:33:43.423148  543705 net.go:698] Add success.
I0320 18:33:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:33:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:33:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:33:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:33:53.409807  543705 memory.go:184] no items to output this cycle
I0320 18:33:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 18:34:03.409989  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:34:03.410007  543705 memory.go:184] no items to output this cycle
I0320 18:34:03.410018  543705 cpu.go:275] no items to output this cycle
E0320 18:34:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:34:13.409782  543705 memory.go:191] Add success.
W0320 18:34:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:34:13.409809  543705 cpu.go:282] Add success.
W0320 18:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:34:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:34:13.420104  543705 net.go:648] Add success.
I0320 18:34:13.422769  543705 net.go:770] primary dev: ETH0
I0320 18:34:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:34:13.422795  543705 net.go:698] Add success.
I0320 18:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:34:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:34:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 18:34:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:34:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 18:34:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:34:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:34:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:34:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:34:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:34:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:34:23.409805  543705 memory.go:184] no items to output this cycle
I0320 18:34:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 18:34:25.596117  543705 disk_info.go:125] begin check local disk info of client
I0320 18:34:25.598582  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:34:25.598589  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cc2c0 0xc0004cc300]
E0320 18:34:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:34:33.409795  543705 memory.go:184] no items to output this cycle
I0320 18:34:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 18:34:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:34:43.409785  543705 memory.go:191] Add success.
I0320 18:34:43.409804  543705 cpu.go:282] Add success.
I0320 18:34:43.419885  543705 net.go:648] Add success.
I0320 18:34:43.422406  543705 net.go:770] primary dev: ETH0
I0320 18:34:43.422420  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:34:43.422432  543705 net.go:698] Add success.
I0320 18:34:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:34:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:34:53.409777  543705 memory.go:184] no items to output this cycle
I0320 18:34:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 18:35:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:35:03.409779  543705 memory.go:184] no items to output this cycle
I0320 18:35:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 18:35:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:35:13.409786  543705 memory.go:191] Add success.
I0320 18:35:13.409803  543705 cpu.go:282] Add success.
W0320 18:35:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:35:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:35:13.420064  543705 net.go:648] Add success.
I0320 18:35:13.422999  543705 net.go:770] primary dev: ETH0
I0320 18:35:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:35:13.423027  543705 net.go:698] Add success.
I0320 18:35:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:35:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:35:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0320 18:35:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:35:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 18:35:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:35:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:35:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:35:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:35:23.409793  543705 memory.go:184] no items to output this cycle
I0320 18:35:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 18:35:25.598629  543705 disk_info.go:125] begin check local disk info of client
I0320 18:35:25.601071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:35:25.601078  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5740 0xc0004b5780]
E0320 18:35:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:35:33.409809  543705 memory.go:184] no items to output this cycle
I0320 18:35:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 18:35:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:35:43.409807  543705 memory.go:191] Add success.
I0320 18:35:43.409817  543705 cpu.go:282] Add success.
I0320 18:35:43.419865  543705 net.go:648] Add success.
I0320 18:35:43.422502  543705 net.go:770] primary dev: ETH0
I0320 18:35:43.422515  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:35:43.422527  543705 net.go:698] Add success.
I0320 18:35:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:35:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:35:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:35:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:35:53.409785  543705 memory.go:184] no items to output this cycle
I0320 18:35:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 18:36:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:36:03.409788  543705 memory.go:184] no items to output this cycle
I0320 18:36:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 18:36:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:36:13.409819  543705 memory.go:191] Add success.
I0320 18:36:13.409827  543705 cpu.go:282] Add success.
W0320 18:36:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:36:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:36:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:36:13.420049  543705 net.go:648] Add success.
I0320 18:36:13.422725  543705 net.go:770] primary dev: ETH0
I0320 18:36:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:36:13.422752  543705 net.go:698] Add success.
I0320 18:36:13.463305  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31a5b4b6-7243-4d49-adca-b732358e60d8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:36:13.463340  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:36:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:36:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0320 18:36:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:36:14.456699  543705 disk_worker.go:494] system disk:vda1
I0320 18:36:14.456736  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:36:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:36:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:36:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:36:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:36:23.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:36:23.409918  543705 memory.go:184] no items to output this cycle
I0320 18:36:23.410075  543705 cpu.go:275] no items to output this cycle
I0320 18:36:25.601651  543705 disk_info.go:125] begin check local disk info of client
I0320 18:36:25.604059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:36:25.604065  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c140 0xc00034c180]
E0320 18:36:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:36:33.409795  543705 memory.go:184] no items to output this cycle
I0320 18:36:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 18:36:38.587684  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:36:38.587690  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:36:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:36:43.410679  543705 memory.go:191] Add success.
I0320 18:36:43.409824  543705 cpu.go:282] Add success.
I0320 18:36:43.420439  543705 net.go:648] Add success.
I0320 18:36:43.423022  543705 net.go:770] primary dev: ETH0
I0320 18:36:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:36:43.423047  543705 net.go:698] Add success.
I0320 18:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:36:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:36:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:36:53.409782  543705 memory.go:184] no items to output this cycle
I0320 18:36:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:37:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:37:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 18:37:03.409792  543705 memory.go:184] no items to output this cycle
E0320 18:37:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:37:13.409793  543705 memory.go:191] Add success.
I0320 18:37:13.409796  543705 cpu.go:282] Add success.
W0320 18:37:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:37:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:37:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:37:13.420059  543705 net.go:648] Add success.
I0320 18:37:13.422899  543705 net.go:770] primary dev: ETH0
I0320 18:37:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:37:13.422926  543705 net.go:698] Add success.
I0320 18:37:13.453489  543705 event_worker.go:152] Polling the log file for events...
W0320 18:37:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:37:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 18:37:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:37:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:37:14.455888  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:37:14.455895  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:37:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 18:37:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:37:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:37:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:37:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:37:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:37:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:37:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:37:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:37:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:37:23.409872  543705 cpu.go:275] no items to output this cycle
I0320 18:37:23.409890  543705 memory.go:184] no items to output this cycle
I0320 18:37:25.604661  543705 disk_info.go:125] begin check local disk info of client
I0320 18:37:25.607118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:37:25.607124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9240 0xc0004a9280]
E0320 18:37:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:37:33.409774  543705 memory.go:184] no items to output this cycle
I0320 18:37:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 18:37:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:37:43.409790  543705 memory.go:191] Add success.
I0320 18:37:43.409791  543705 cpu.go:282] Add success.
I0320 18:37:43.420056  543705 net.go:648] Add success.
I0320 18:37:43.422748  543705 net.go:770] primary dev: ETH0
I0320 18:37:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:37:43.422774  543705 net.go:698] Add success.
I0320 18:37:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:37:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:37:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:37:53.409810  543705 memory.go:184] no items to output this cycle
I0320 18:37:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 18:38:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:38:03.409789  543705 cpu.go:275] no items to output this cycle
I0320 18:38:03.409801  543705 memory.go:184] no items to output this cycle
E0320 18:38:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:38:13.409792  543705 memory.go:191] Add success.
I0320 18:38:13.409792  543705 cpu.go:282] Add success.
W0320 18:38:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:38:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:38:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:38:13.420056  543705 net.go:648] Add success.
I0320 18:38:13.422825  543705 net.go:770] primary dev: ETH0
I0320 18:38:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:38:13.422850  543705 net.go:698] Add success.
I0320 18:38:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:38:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:38:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 18:38:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:38:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 18:38:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:38:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:38:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:38:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:38:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:38:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:38:23.409772  543705 memory.go:184] no items to output this cycle
I0320 18:38:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 18:38:25.607205  543705 disk_info.go:125] begin check local disk info of client
I0320 18:38:25.609668  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:38:25.609674  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4000 0xc0002a4040]
E0320 18:38:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:38:33.409804  543705 memory.go:184] no items to output this cycle
I0320 18:38:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 18:38:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:38:43.409779  543705 memory.go:191] Add success.
I0320 18:38:43.409807  543705 cpu.go:282] Add success.
I0320 18:38:43.419881  543705 net.go:648] Add success.
I0320 18:38:43.422398  543705 net.go:770] primary dev: ETH0
I0320 18:38:43.422410  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:38:43.422422  543705 net.go:698] Add success.
I0320 18:38:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:38:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:38:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:38:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:38:53.409808  543705 memory.go:184] no items to output this cycle
I0320 18:38:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 18:39:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:39:03.409809  543705 memory.go:184] no items to output this cycle
I0320 18:39:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 18:39:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:39:13.409793  543705 memory.go:191] Add success.
I0320 18:39:13.409807  543705 cpu.go:282] Add success.
W0320 18:39:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:39:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:39:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:39:13.420096  543705 net.go:648] Add success.
I0320 18:39:13.422673  543705 net.go:770] primary dev: ETH0
I0320 18:39:13.422690  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:39:13.422705  543705 net.go:698] Add success.
I0320 18:39:13.469077  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"13d45302-7618-4d71-8800-3a5e7e0c20b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:39:13.469112  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:39:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:39:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 18:39:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:39:14.456533  543705 disk_worker.go:494] system disk:vda1
I0320 18:39:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:39:15.455624  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:39:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:39:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:39:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:39:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:39:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:39:23.409773  543705 memory.go:184] no items to output this cycle
I0320 18:39:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 18:39:25.610695  543705 disk_info.go:125] begin check local disk info of client
I0320 18:39:25.613130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:39:25.613136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0320 18:39:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:39:33.409764  543705 memory.go:184] no items to output this cycle
I0320 18:39:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 18:39:38.588673  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:39:38.588680  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:39:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:39:43.410632  543705 memory.go:191] Add success.
I0320 18:39:43.409824  543705 cpu.go:282] Add success.
I0320 18:39:43.420378  543705 net.go:648] Add success.
I0320 18:39:43.423118  543705 net.go:770] primary dev: ETH0
I0320 18:39:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:39:43.423144  543705 net.go:698] Add success.
I0320 18:39:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:39:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:39:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:39:53.409784  543705 memory.go:184] no items to output this cycle
I0320 18:39:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 18:40:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:40:03.409779  543705 memory.go:184] no items to output this cycle
I0320 18:40:03.409848  543705 cpu.go:275] no items to output this cycle
E0320 18:40:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:40:13.409795  543705 memory.go:191] Add success.
I0320 18:40:13.409815  543705 cpu.go:282] Add success.
W0320 18:40:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:40:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:40:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:40:13.420245  543705 net.go:648] Add success.
I0320 18:40:13.423398  543705 net.go:770] primary dev: ETH0
I0320 18:40:13.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:40:13.423422  543705 net.go:698] Add success.
I0320 18:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:40:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:40:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 18:40:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:40:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 18:40:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:40:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:40:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:40:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:40:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:40:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:40:23.409780  543705 memory.go:184] no items to output this cycle
I0320 18:40:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 18:40:25.613671  543705 disk_info.go:125] begin check local disk info of client
I0320 18:40:25.616136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:40:25.616143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004da080 0xc0004da0c0]
E0320 18:40:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:40:33.409774  543705 memory.go:184] no items to output this cycle
I0320 18:40:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 18:40:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:40:43.409799  543705 memory.go:191] Add success.
I0320 18:40:43.409810  543705 cpu.go:282] Add success.
I0320 18:40:43.420066  543705 net.go:648] Add success.
I0320 18:40:43.422828  543705 net.go:770] primary dev: ETH0
I0320 18:40:43.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:40:43.422856  543705 net.go:698] Add success.
I0320 18:40:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:40:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:40:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:40:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:40:53.409799  543705 memory.go:184] no items to output this cycle
I0320 18:40:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 18:41:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:41:03.409823  543705 memory.go:184] no items to output this cycle
I0320 18:41:03.409830  543705 cpu.go:275] no items to output this cycle
E0320 18:41:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:41:13.409778  543705 memory.go:191] Add success.
W0320 18:41:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:41:13.409811  543705 cpu.go:282] Add success.
W0320 18:41:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:41:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:41:13.420055  543705 net.go:648] Add success.
I0320 18:41:13.422582  543705 net.go:770] primary dev: ETH0
I0320 18:41:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:41:13.422609  543705 net.go:698] Add success.
I0320 18:41:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:41:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:41:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 18:41:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:41:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 18:41:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:41:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:41:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:41:23.410613  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:41:23.410629  543705 memory.go:184] no items to output this cycle
I0320 18:41:23.410646  543705 cpu.go:275] no items to output this cycle
I0320 18:41:25.616718  543705 disk_info.go:125] begin check local disk info of client
I0320 18:41:25.619185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:41:25.619192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004863c0 0xc000486400]
E0320 18:41:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:41:33.409773  543705 memory.go:184] no items to output this cycle
I0320 18:41:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 18:41:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:41:43.409824  543705 memory.go:191] Add success.
I0320 18:41:43.409833  543705 cpu.go:282] Add success.
I0320 18:41:43.420056  543705 net.go:648] Add success.
I0320 18:41:43.425606  543705 net.go:770] primary dev: ETH0
I0320 18:41:43.425619  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:41:43.425630  543705 net.go:698] Add success.
I0320 18:41:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:41:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:41:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:41:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:41:53.409809  543705 memory.go:184] no items to output this cycle
I0320 18:41:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 18:42:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:42:03.409806  543705 memory.go:184] no items to output this cycle
I0320 18:42:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 18:42:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:42:13.409781  543705 memory.go:191] Add success.
I0320 18:42:13.409805  543705 cpu.go:282] Add success.
W0320 18:42:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:42:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:42:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:42:13.420112  543705 net.go:648] Add success.
I0320 18:42:13.423314  543705 net.go:770] primary dev: ETH0
I0320 18:42:13.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:42:13.423340  543705 net.go:698] Add success.
I0320 18:42:13.463795  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ec839d9-c2f0-431a-80b6-c302562f8f67","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:42:13.463829  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 18:42:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:42:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 18:42:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:42:14.455944  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:42:14.455953  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:42:14.455958  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:42:14.456444  543705 disk_worker.go:494] system disk:vda1
I0320 18:42:14.456471  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:42:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:42:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 18:42:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:42:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:42:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:42:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:42:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:42:23.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:42:23.410454  543705 cpu.go:275] no items to output this cycle
I0320 18:42:23.410508  543705 memory.go:184] no items to output this cycle
I0320 18:42:25.619734  543705 disk_info.go:125] begin check local disk info of client
I0320 18:42:25.622248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:42:25.622254  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b1c0 0xc00048b200]
E0320 18:42:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:42:33.409774  543705 memory.go:184] no items to output this cycle
I0320 18:42:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 18:42:38.589673  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:42:38.589679  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:42:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:42:43.410654  543705 memory.go:191] Add success.
I0320 18:42:43.409823  543705 cpu.go:282] Add success.
I0320 18:42:43.420338  543705 net.go:648] Add success.
I0320 18:42:43.423197  543705 net.go:770] primary dev: ETH0
I0320 18:42:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:42:43.423221  543705 net.go:698] Add success.
I0320 18:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:42:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:42:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:42:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:42:53.409773  543705 memory.go:184] no items to output this cycle
I0320 18:42:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 18:43:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:43:03.409811  543705 memory.go:184] no items to output this cycle
I0320 18:43:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 18:43:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:43:13.409790  543705 memory.go:191] Add success.
I0320 18:43:13.409790  543705 cpu.go:282] Add success.
W0320 18:43:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:43:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:43:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:43:13.420104  543705 net.go:648] Add success.
I0320 18:43:13.423314  543705 net.go:770] primary dev: ETH0
I0320 18:43:13.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:43:13.423340  543705 net.go:698] Add success.
I0320 18:43:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:43:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:43:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 18:43:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:43:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 18:43:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:43:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:43:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:43:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:43:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:43:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:43:23.409886  543705 cpu.go:275] no items to output this cycle
I0320 18:43:23.409889  543705 memory.go:184] no items to output this cycle
I0320 18:43:25.622753  543705 disk_info.go:125] begin check local disk info of client
I0320 18:43:25.625187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:43:25.625193  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329440 0xc000329480]
E0320 18:43:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:43:33.409795  543705 memory.go:184] no items to output this cycle
I0320 18:43:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 18:43:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:43:43.409794  543705 memory.go:191] Add success.
I0320 18:43:43.409800  543705 cpu.go:282] Add success.
I0320 18:43:43.419962  543705 net.go:648] Add success.
I0320 18:43:43.422762  543705 net.go:770] primary dev: ETH0
I0320 18:43:43.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:43:43.422792  543705 net.go:698] Add success.
I0320 18:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:43:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:43:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:43:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:43:53.409769  543705 memory.go:184] no items to output this cycle
I0320 18:43:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 18:44:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:44:03.409787  543705 memory.go:184] no items to output this cycle
I0320 18:44:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 18:44:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:44:13.409806  543705 memory.go:191] Add success.
I0320 18:44:13.409812  543705 cpu.go:282] Add success.
W0320 18:44:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:44:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:44:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:44:13.420070  543705 net.go:648] Add success.
I0320 18:44:13.422751  543705 net.go:770] primary dev: ETH0
I0320 18:44:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:44:13.422777  543705 net.go:698] Add success.
I0320 18:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:44:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:44:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 18:44:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:44:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 18:44:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:44:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:44:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:44:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:44:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:44:23.409763  543705 memory.go:184] no items to output this cycle
I0320 18:44:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 18:44:25.625673  543705 disk_info.go:125] begin check local disk info of client
I0320 18:44:25.628132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:44:25.628138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba000 0xc0003ba040]
E0320 18:44:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:44:33.409762  543705 memory.go:184] no items to output this cycle
I0320 18:44:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 18:44:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:44:43.409811  543705 memory.go:191] Add success.
I0320 18:44:43.409816  543705 cpu.go:282] Add success.
I0320 18:44:43.419970  543705 net.go:648] Add success.
I0320 18:44:43.422783  543705 net.go:770] primary dev: ETH0
I0320 18:44:43.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:44:43.422807  543705 net.go:698] Add success.
I0320 18:44:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:44:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:44:53.409808  543705 memory.go:184] no items to output this cycle
I0320 18:44:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 18:45:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:45:03.409782  543705 memory.go:184] no items to output this cycle
I0320 18:45:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 18:45:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:45:13.409808  543705 memory.go:191] Add success.
I0320 18:45:13.409817  543705 cpu.go:282] Add success.
W0320 18:45:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:45:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:45:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:45:13.420048  543705 net.go:648] Add success.
I0320 18:45:13.422870  543705 net.go:770] primary dev: ETH0
I0320 18:45:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:45:13.422895  543705 net.go:698] Add success.
I0320 18:45:13.469156  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c78c04d-9fef-4468-98bd-471a0d423560","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:45:13.469188  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:45:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:45:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:45:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 18:45:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:45:14.456599  543705 disk_worker.go:494] system disk:vda1
I0320 18:45:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:45:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:45:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:45:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:45:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:45:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:45:23.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:45:23.409848  543705 memory.go:184] no items to output this cycle
I0320 18:45:23.409963  543705 cpu.go:275] no items to output this cycle
I0320 18:45:25.628219  543705 disk_info.go:125] begin check local disk info of client
I0320 18:45:25.630679  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:45:25.630686  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032fcc0 0xc00032fd00]
E0320 18:45:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:45:33.409799  543705 memory.go:184] no items to output this cycle
I0320 18:45:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 18:45:38.589824  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:45:38.589831  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:45:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:45:43.410578  543705 memory.go:191] Add success.
I0320 18:45:43.409800  543705 cpu.go:282] Add success.
I0320 18:45:43.420304  543705 net.go:648] Add success.
I0320 18:45:43.422837  543705 net.go:770] primary dev: ETH0
I0320 18:45:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:45:43.422862  543705 net.go:698] Add success.
I0320 18:45:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:45:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:45:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:45:53.409772  543705 memory.go:184] no items to output this cycle
I0320 18:45:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 18:46:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:46:03.409810  543705 memory.go:184] no items to output this cycle
I0320 18:46:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 18:46:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:46:13.409785  543705 memory.go:191] Add success.
I0320 18:46:13.409803  543705 cpu.go:282] Add success.
W0320 18:46:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:46:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:46:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:46:13.420137  543705 net.go:648] Add success.
I0320 18:46:13.422847  543705 net.go:770] primary dev: ETH0
I0320 18:46:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:46:13.422874  543705 net.go:698] Add success.
I0320 18:46:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:46:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:46:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 18:46:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:46:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 18:46:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:46:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:46:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:46:23.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:46:23.409864  543705 cpu.go:275] no items to output this cycle
I0320 18:46:23.409872  543705 memory.go:184] no items to output this cycle
I0320 18:46:25.630768  543705 disk_info.go:125] begin check local disk info of client
I0320 18:46:25.633218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:46:25.633224  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386d00 0xc000386d40]
E0320 18:46:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:46:33.409792  543705 memory.go:184] no items to output this cycle
I0320 18:46:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 18:46:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:46:43.409793  543705 memory.go:191] Add success.
I0320 18:46:43.409798  543705 cpu.go:282] Add success.
I0320 18:46:43.419864  543705 net.go:648] Add success.
I0320 18:46:43.422574  543705 net.go:770] primary dev: ETH0
I0320 18:46:43.422586  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:46:43.422598  543705 net.go:698] Add success.
I0320 18:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:46:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:46:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:46:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:46:53.409779  543705 memory.go:184] no items to output this cycle
I0320 18:46:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 18:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:47:03.409786  543705 memory.go:184] no items to output this cycle
I0320 18:47:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 18:47:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:47:13.409790  543705 memory.go:191] Add success.
I0320 18:47:13.409791  543705 cpu.go:282] Add success.
W0320 18:47:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:47:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:47:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:47:13.420215  543705 net.go:648] Add success.
I0320 18:47:13.422933  543705 net.go:770] primary dev: ETH0
I0320 18:47:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:47:13.422961  543705 net.go:698] Add success.
I0320 18:47:13.453509  543705 event_worker.go:152] Polling the log file for events...
W0320 18:47:14.455238  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:47:14.455255  543705 disk_worker.go:708] disk space is not compliant
W0320 18:47:14.455259  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:47:14.455915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:47:14.455924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:47:14.455930  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:47:14.456839  543705 disk_worker.go:494] system disk:vda1
I0320 18:47:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:47:15.456855  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:47:15.456864  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:47:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:47:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:47:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:47:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:47:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:47:23.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:47:23.409899  543705 cpu.go:275] no items to output this cycle
I0320 18:47:23.409973  543705 memory.go:184] no items to output this cycle
I0320 18:47:25.633673  543705 disk_info.go:125] begin check local disk info of client
I0320 18:47:25.636091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:47:25.636096  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331a00 0xc000331a40]
E0320 18:47:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:47:33.409772  543705 memory.go:184] no items to output this cycle
I0320 18:47:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 18:47:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:47:43.409788  543705 memory.go:191] Add success.
I0320 18:47:43.409788  543705 cpu.go:282] Add success.
I0320 18:47:43.419974  543705 net.go:648] Add success.
I0320 18:47:43.423275  543705 net.go:770] primary dev: ETH0
I0320 18:47:43.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:47:43.423301  543705 net.go:698] Add success.
I0320 18:47:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:47:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:47:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:47:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:47:53.409779  543705 memory.go:184] no items to output this cycle
I0320 18:47:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 18:48:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:48:03.409805  543705 memory.go:184] no items to output this cycle
I0320 18:48:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 18:48:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:48:13.409800  543705 memory.go:191] Add success.
I0320 18:48:13.409800  543705 cpu.go:282] Add success.
W0320 18:48:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:48:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:48:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:48:13.420183  543705 net.go:648] Add success.
I0320 18:48:13.422818  543705 net.go:770] primary dev: ETH0
I0320 18:48:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:48:13.422845  543705 net.go:698] Add success.
I0320 18:48:13.470047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a700ff0c-93b0-4806-b1e7-113c75185ffa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:48:13.470079  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:48:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:48:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:48:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 18:48:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:48:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 18:48:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:48:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:48:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:48:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:48:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:48:23.409766  543705 memory.go:184] no items to output this cycle
I0320 18:48:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 18:48:25.636822  543705 disk_info.go:125] begin check local disk info of client
I0320 18:48:25.639298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:48:25.639304  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e3d40 0xc0004e3d80]
E0320 18:48:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:48:33.409777  543705 memory.go:184] no items to output this cycle
I0320 18:48:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 18:48:38.590650  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:48:38.590656  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:48:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:48:43.410836  543705 memory.go:191] Add success.
I0320 18:48:43.409817  543705 cpu.go:282] Add success.
I0320 18:48:43.420339  543705 net.go:770] primary dev: ETH0
I0320 18:48:43.420351  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:48:43.420364  543705 net.go:698] Add success.
I0320 18:48:43.420704  543705 net.go:648] Add success.
I0320 18:48:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:48:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:48:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:48:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:48:53.409785  543705 memory.go:184] no items to output this cycle
I0320 18:48:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 18:49:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:49:03.409778  543705 memory.go:184] no items to output this cycle
I0320 18:49:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 18:49:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:49:13.409806  543705 memory.go:191] Add success.
I0320 18:49:13.409810  543705 cpu.go:282] Add success.
W0320 18:49:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:49:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:49:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:49:13.419869  543705 net.go:770] primary dev: ETH0
I0320 18:49:13.419884  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:49:13.419898  543705 net.go:698] Add success.
I0320 18:49:13.420265  543705 net.go:648] Add success.
I0320 18:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:49:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:49:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 18:49:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:49:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 18:49:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:49:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:49:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:49:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:49:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:49:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:49:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:49:23.409781  543705 memory.go:184] no items to output this cycle
I0320 18:49:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 18:49:25.639389  543705 disk_info.go:125] begin check local disk info of client
I0320 18:49:25.641916  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:49:25.641923  543705 disk_info.go:196] parse disk info done, disk is : [0xc000325000 0xc000325040]
E0320 18:49:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:49:33.409775  543705 memory.go:184] no items to output this cycle
I0320 18:49:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 18:49:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:49:43.409788  543705 memory.go:191] Add success.
I0320 18:49:43.409803  543705 cpu.go:282] Add success.
I0320 18:49:43.419859  543705 net.go:648] Add success.
I0320 18:49:43.422614  543705 net.go:770] primary dev: ETH0
I0320 18:49:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:49:43.422652  543705 net.go:698] Add success.
I0320 18:49:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:49:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:49:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:49:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:49:53.409796  543705 memory.go:184] no items to output this cycle
I0320 18:49:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 18:50:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:50:03.409809  543705 memory.go:184] no items to output this cycle
I0320 18:50:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 18:50:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:50:13.409824  543705 memory.go:191] Add success.
I0320 18:50:13.409828  543705 cpu.go:282] Add success.
W0320 18:50:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:50:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:50:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:50:13.420069  543705 net.go:648] Add success.
I0320 18:50:13.423122  543705 net.go:770] primary dev: ETH0
I0320 18:50:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:50:13.423151  543705 net.go:698] Add success.
I0320 18:50:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:50:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:50:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 18:50:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:50:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 18:50:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:50:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:50:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:50:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:50:23.409780  543705 memory.go:184] no items to output this cycle
I0320 18:50:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 18:50:25.642011  543705 disk_info.go:125] begin check local disk info of client
I0320 18:50:25.644457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:50:25.644464  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266000 0xc000266040]
E0320 18:50:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:50:33.409820  543705 memory.go:184] no items to output this cycle
I0320 18:50:33.409827  543705 cpu.go:275] no items to output this cycle
E0320 18:50:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:50:43.409797  543705 memory.go:191] Add success.
I0320 18:50:43.409799  543705 cpu.go:282] Add success.
I0320 18:50:43.419990  543705 net.go:648] Add success.
I0320 18:50:43.422823  543705 net.go:770] primary dev: ETH0
I0320 18:50:43.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:50:43.422853  543705 net.go:698] Add success.
I0320 18:50:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:50:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:50:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:50:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:50:53.409820  543705 memory.go:184] no items to output this cycle
I0320 18:50:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 18:51:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:51:03.409789  543705 memory.go:184] no items to output this cycle
I0320 18:51:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 18:51:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:51:13.409794  543705 memory.go:191] Add success.
I0320 18:51:13.409805  543705 cpu.go:282] Add success.
W0320 18:51:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:51:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:51:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:51:13.420142  543705 net.go:648] Add success.
I0320 18:51:13.423173  543705 net.go:770] primary dev: ETH0
I0320 18:51:13.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:51:13.423199  543705 net.go:698] Add success.
I0320 18:51:13.464652  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"026ee7fe-ec02-41c2-9c08-b2acd676caea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:51:13.464685  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:51:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:51:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:51:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 18:51:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:51:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 18:51:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:51:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:51:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:51:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:51:23.410356  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:51:23.410370  543705 memory.go:184] no items to output this cycle
I0320 18:51:23.410372  543705 cpu.go:275] no items to output this cycle
I0320 18:51:25.644865  543705 disk_info.go:125] begin check local disk info of client
I0320 18:51:25.647325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:51:25.647332  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048df00 0xc00048df40]
E0320 18:51:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:51:33.409774  543705 memory.go:184] no items to output this cycle
I0320 18:51:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 18:51:38.591703  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:51:38.591710  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:51:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:51:43.410744  543705 memory.go:191] Add success.
I0320 18:51:43.409803  543705 cpu.go:282] Add success.
I0320 18:51:43.420506  543705 net.go:648] Add success.
I0320 18:51:43.423283  543705 net.go:770] primary dev: ETH0
I0320 18:51:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:51:43.423308  543705 net.go:698] Add success.
I0320 18:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:51:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:51:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:51:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:51:53.409792  543705 memory.go:184] no items to output this cycle
I0320 18:51:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 18:52:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:52:03.409777  543705 memory.go:184] no items to output this cycle
I0320 18:52:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 18:52:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:52:13.409818  543705 memory.go:191] Add success.
I0320 18:52:13.409818  543705 cpu.go:282] Add success.
W0320 18:52:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:52:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:52:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:52:13.420308  543705 net.go:648] Add success.
I0320 18:52:13.423053  543705 net.go:770] primary dev: ETH0
I0320 18:52:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:52:13.423078  543705 net.go:698] Add success.
W0320 18:52:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:52:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 18:52:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:52:14.456786  543705 disk_worker.go:494] system disk:vda1
I0320 18:52:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:52:14.457133  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:52:14.457140  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:52:14.457145  543705 custom_config.go:64] query custom config with name: gpu
E0320 18:52:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:52:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:52:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:52:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:52:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:52:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:52:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:52:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:52:23.409770  543705 memory.go:184] no items to output this cycle
I0320 18:52:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 18:52:25.647414  543705 disk_info.go:125] begin check local disk info of client
I0320 18:52:25.649896  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:52:25.649902  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb080 0xc0001fb0c0]
E0320 18:52:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:52:33.409773  543705 memory.go:184] no items to output this cycle
I0320 18:52:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 18:52:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:52:43.409790  543705 memory.go:191] Add success.
I0320 18:52:43.409794  543705 cpu.go:282] Add success.
I0320 18:52:43.419887  543705 net.go:648] Add success.
I0320 18:52:43.422634  543705 net.go:770] primary dev: ETH0
I0320 18:52:43.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:52:43.422659  543705 net.go:698] Add success.
I0320 18:52:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:52:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:52:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:52:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:52:53.409786  543705 memory.go:184] no items to output this cycle
I0320 18:52:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 18:53:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:53:03.409805  543705 memory.go:184] no items to output this cycle
I0320 18:53:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 18:53:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:53:13.409783  543705 memory.go:191] Add success.
W0320 18:53:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:53:13.409807  543705 cpu.go:282] Add success.
W0320 18:53:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:53:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:53:13.420213  543705 net.go:648] Add success.
I0320 18:53:13.422954  543705 net.go:770] primary dev: ETH0
I0320 18:53:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:53:13.422979  543705 net.go:698] Add success.
I0320 18:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:53:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:53:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 18:53:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:53:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 18:53:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:53:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:53:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:53:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:53:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:53:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:53:23.409810  543705 memory.go:184] no items to output this cycle
I0320 18:53:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 18:53:25.649982  543705 disk_info.go:125] begin check local disk info of client
I0320 18:53:25.652424  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:53:25.652430  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b400 0xc00007b440]
E0320 18:53:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:53:33.409795  543705 memory.go:184] no items to output this cycle
I0320 18:53:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 18:53:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:53:43.409790  543705 memory.go:191] Add success.
I0320 18:53:43.409791  543705 cpu.go:282] Add success.
I0320 18:53:43.419974  543705 net.go:648] Add success.
I0320 18:53:43.422579  543705 net.go:770] primary dev: ETH0
I0320 18:53:43.422594  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:53:43.422608  543705 net.go:698] Add success.
I0320 18:53:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:53:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:53:53.410590  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:53:53.410613  543705 memory.go:184] no items to output this cycle
I0320 18:53:53.410627  543705 cpu.go:275] no items to output this cycle
E0320 18:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:54:03.409777  543705 memory.go:184] no items to output this cycle
I0320 18:54:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 18:54:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:54:13.409813  543705 memory.go:191] Add success.
I0320 18:54:13.409818  543705 cpu.go:282] Add success.
W0320 18:54:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:54:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:54:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:54:13.420241  543705 net.go:648] Add success.
I0320 18:54:13.423213  543705 net.go:770] primary dev: ETH0
I0320 18:54:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:54:13.423238  543705 net.go:698] Add success.
I0320 18:54:13.545348  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"92a0a95a-8238-4933-9db1-8a9eaf8437ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:54:13.545378  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 18:54:14.453983  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:54:14.454170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:54:14.454257  543705 disk_worker.go:708] disk space is not compliant
W0320 18:54:14.454261  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:54:14.455797  543705 disk_worker.go:494] system disk:vda1
I0320 18:54:14.455828  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:54:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:54:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:54:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:54:23.409824  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:54:23.409846  543705 memory.go:184] no items to output this cycle
I0320 18:54:23.409910  543705 cpu.go:275] no items to output this cycle
I0320 18:54:25.652509  543705 disk_info.go:125] begin check local disk info of client
I0320 18:54:25.654978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:54:25.654984  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002af9c0 0xc0002afa00]
E0320 18:54:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:54:33.409758  543705 memory.go:184] no items to output this cycle
I0320 18:54:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 18:54:38.592667  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:54:38.592673  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:54:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:54:43.410691  543705 memory.go:191] Add success.
I0320 18:54:43.409804  543705 cpu.go:282] Add success.
I0320 18:54:43.420439  543705 net.go:648] Add success.
I0320 18:54:43.423152  543705 net.go:770] primary dev: ETH0
I0320 18:54:43.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:54:43.423195  543705 net.go:698] Add success.
I0320 18:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:54:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:54:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:54:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:54:53.409809  543705 memory.go:184] no items to output this cycle
I0320 18:54:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 18:55:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:55:03.409775  543705 memory.go:184] no items to output this cycle
I0320 18:55:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 18:55:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:55:13.409808  543705 memory.go:191] Add success.
I0320 18:55:13.409817  543705 cpu.go:282] Add success.
W0320 18:55:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:55:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:55:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:55:13.420099  543705 net.go:648] Add success.
I0320 18:55:13.422698  543705 net.go:770] primary dev: ETH0
I0320 18:55:13.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:55:13.422724  543705 net.go:698] Add success.
I0320 18:55:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:55:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:55:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 18:55:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:55:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 18:55:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:55:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:55:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:55:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:55:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:55:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:55:23.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:55:23.409864  543705 memory.go:184] no items to output this cycle
I0320 18:55:23.409943  543705 cpu.go:275] no items to output this cycle
I0320 18:55:25.655911  543705 disk_info.go:125] begin check local disk info of client
I0320 18:55:25.658363  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:55:25.658369  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b4240 0xc0002b4280]
E0320 18:55:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:55:33.409759  543705 memory.go:184] no items to output this cycle
I0320 18:55:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 18:55:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:55:43.409815  543705 memory.go:191] Add success.
I0320 18:55:43.409819  543705 cpu.go:282] Add success.
I0320 18:55:43.420011  543705 net.go:648] Add success.
I0320 18:55:43.422753  543705 net.go:770] primary dev: ETH0
I0320 18:55:43.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:55:43.422782  543705 net.go:698] Add success.
I0320 18:55:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:55:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:55:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:55:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:55:53.409784  543705 memory.go:184] no items to output this cycle
I0320 18:55:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 18:56:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:56:03.409805  543705 memory.go:184] no items to output this cycle
I0320 18:56:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 18:56:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:56:13.409821  543705 memory.go:191] Add success.
I0320 18:56:13.409825  543705 cpu.go:282] Add success.
W0320 18:56:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:56:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:56:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:56:13.420260  543705 net.go:648] Add success.
I0320 18:56:13.422982  543705 net.go:770] primary dev: ETH0
I0320 18:56:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:56:13.423010  543705 net.go:698] Add success.
I0320 18:56:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:56:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:56:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 18:56:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:56:14.456505  543705 disk_worker.go:494] system disk:vda1
I0320 18:56:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:56:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:56:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:56:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:56:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:56:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:56:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 18:56:23.409784  543705 memory.go:184] no items to output this cycle
I0320 18:56:25.658926  543705 disk_info.go:125] begin check local disk info of client
I0320 18:56:25.661371  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:56:25.661468  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052ff40 0xc000530000]
E0320 18:56:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:56:33.409800  543705 memory.go:184] no items to output this cycle
I0320 18:56:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 18:56:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:56:43.409782  543705 memory.go:191] Add success.
I0320 18:56:43.409799  543705 cpu.go:282] Add success.
I0320 18:56:43.419972  543705 net.go:648] Add success.
I0320 18:56:43.422517  543705 net.go:770] primary dev: ETH0
I0320 18:56:43.422530  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:56:43.422543  543705 net.go:698] Add success.
I0320 18:56:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:56:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:56:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:56:53.409786  543705 memory.go:184] no items to output this cycle
I0320 18:56:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 18:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:57:03.409779  543705 memory.go:184] no items to output this cycle
I0320 18:57:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 18:57:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:57:13.409789  543705 memory.go:191] Add success.
I0320 18:57:13.409805  543705 cpu.go:282] Add success.
W0320 18:57:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:57:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:57:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:57:13.420210  543705 net.go:648] Add success.
I0320 18:57:13.428726  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 18:57:13.428801  543705 net.go:770] primary dev: ETH0
I0320 18:57:13.428813  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:57:13.428825  543705 net.go:698] Add success.
I0320 18:57:13.453383  543705 event_worker.go:152] Polling the log file for events...
I0320 18:57:13.463994  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"66cb085d-f745-4fb5-8067-b973c26a1ac5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 18:57:13.464026  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 18:57:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:57:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 18:57:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0320 18:57:14.455897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 18:57:14.455906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 18:57:14.455911  543705 custom_config.go:64] query custom config with name: gpu
I0320 18:57:14.456540  543705 disk_worker.go:494] system disk:vda1
I0320 18:57:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 18:57:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 18:57:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:57:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 18:57:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 18:57:16.457954  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:57:16.457973  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:57:16.472291  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:57:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:57:23.409794  543705 memory.go:184] no items to output this cycle
I0320 18:57:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 18:57:25.661672  543705 disk_info.go:125] begin check local disk info of client
I0320 18:57:25.664089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:57:25.664095  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034fec0 0xc00034ff00]
E0320 18:57:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:57:33.409767  543705 memory.go:184] no items to output this cycle
I0320 18:57:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 18:57:38.593683  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 18:57:38.593690  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 18:57:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:57:43.410901  543705 memory.go:191] Add success.
I0320 18:57:43.409804  543705 cpu.go:282] Add success.
I0320 18:57:43.420677  543705 net.go:648] Add success.
I0320 18:57:43.423573  543705 net.go:770] primary dev: ETH0
I0320 18:57:43.423587  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:57:43.423601  543705 net.go:698] Add success.
I0320 18:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:57:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:57:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:57:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:57:53.409782  543705 memory.go:184] no items to output this cycle
I0320 18:57:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 18:58:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:58:03.409809  543705 memory.go:184] no items to output this cycle
I0320 18:58:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 18:58:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:58:13.409784  543705 memory.go:191] Add success.
W0320 18:58:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 18:58:13.409811  543705 cpu.go:282] Add success.
W0320 18:58:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:58:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:58:13.420078  543705 net.go:648] Add success.
I0320 18:58:13.423003  543705 net.go:770] primary dev: ETH0
I0320 18:58:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:58:13.423029  543705 net.go:698] Add success.
I0320 18:58:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:58:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:58:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 18:58:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:58:14.456573  543705 disk_worker.go:494] system disk:vda1
I0320 18:58:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:58:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:58:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:58:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:58:23.409792  543705 memory.go:184] no items to output this cycle
I0320 18:58:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 18:58:25.664968  543705 disk_info.go:125] begin check local disk info of client
I0320 18:58:25.667429  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:58:25.667434  543705 disk_info.go:196] parse disk info done, disk is : [0xc000543480 0xc0005434c0]
E0320 18:58:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:58:33.409796  543705 memory.go:184] no items to output this cycle
I0320 18:58:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 18:58:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:58:43.409786  543705 memory.go:191] Add success.
I0320 18:58:43.409818  543705 cpu.go:282] Add success.
I0320 18:58:43.419848  543705 net.go:648] Add success.
I0320 18:58:43.422488  543705 net.go:770] primary dev: ETH0
I0320 18:58:43.422500  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:58:43.422513  543705 net.go:698] Add success.
I0320 18:58:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:58:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:58:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:58:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:58:53.409776  543705 memory.go:184] no items to output this cycle
I0320 18:58:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 18:59:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:59:03.409796  543705 cpu.go:275] no items to output this cycle
I0320 18:59:03.409805  543705 memory.go:184] no items to output this cycle
E0320 18:59:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:59:13.409818  543705 memory.go:191] Add success.
I0320 18:59:13.409826  543705 cpu.go:282] Add success.
W0320 18:59:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 18:59:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 18:59:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 18:59:13.420088  543705 net.go:648] Add success.
I0320 18:59:13.422907  543705 net.go:770] primary dev: ETH0
I0320 18:59:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:59:13.422933  543705 net.go:698] Add success.
I0320 18:59:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 18:59:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 18:59:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 18:59:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 18:59:14.456500  543705 disk_worker.go:494] system disk:vda1
I0320 18:59:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 18:59:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 18:59:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:59:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:59:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 18:59:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 18:59:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:59:23.409800  543705 memory.go:184] no items to output this cycle
I0320 18:59:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 18:59:25.667969  543705 disk_info.go:125] begin check local disk info of client
I0320 18:59:25.670626  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 18:59:25.670632  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046c640 0xc00046c680]
E0320 18:59:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:59:33.409902  543705 memory.go:184] no items to output this cycle
I0320 18:59:33.409916  543705 cpu.go:275] no items to output this cycle
E0320 18:59:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:59:43.409824  543705 memory.go:191] Add success.
I0320 18:59:43.409834  543705 cpu.go:282] Add success.
I0320 18:59:43.420037  543705 net.go:648] Add success.
I0320 18:59:43.422866  543705 net.go:770] primary dev: ETH0
I0320 18:59:43.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0320 18:59:43.422890  543705 net.go:698] Add success.
I0320 18:59:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 18:59:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 18:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 18:59:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 18:59:53.409778  543705 memory.go:184] no items to output this cycle
I0320 18:59:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 19:00:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:00:03.409795  543705 memory.go:184] no items to output this cycle
I0320 19:00:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:00:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:00:13.409817  543705 memory.go:191] Add success.
I0320 19:00:13.409827  543705 cpu.go:282] Add success.
W0320 19:00:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:00:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:00:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:00:13.420101  543705 net.go:648] Add success.
I0320 19:00:13.423115  543705 net.go:770] primary dev: ETH0
I0320 19:00:13.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:00:13.423141  543705 net.go:698] Add success.
I0320 19:00:13.471427  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5565a369-1e03-4d9a-921e-3b95897419c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:00:13.471460  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:00:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:00:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:00:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 19:00:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:00:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 19:00:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:00:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:00:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:00:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:00:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:00:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:00:23.409774  543705 memory.go:184] no items to output this cycle
I0320 19:00:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 19:00:25.670712  543705 disk_info.go:125] begin check local disk info of client
I0320 19:00:25.673147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:00:25.673153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ca100 0xc0004ca140]
E0320 19:00:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:00:33.409919  543705 memory.go:184] no items to output this cycle
I0320 19:00:33.409938  543705 cpu.go:275] no items to output this cycle
I0320 19:00:38.593839  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:00:38.593845  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:00:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:00:43.410670  543705 memory.go:191] Add success.
I0320 19:00:43.409814  543705 cpu.go:282] Add success.
I0320 19:00:43.420402  543705 net.go:648] Add success.
I0320 19:00:43.423168  543705 net.go:770] primary dev: ETH0
I0320 19:00:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:00:43.423200  543705 net.go:698] Add success.
I0320 19:00:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:00:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:00:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:00:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:00:53.409781  543705 memory.go:184] no items to output this cycle
I0320 19:00:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 19:01:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:01:03.409787  543705 cpu.go:275] no items to output this cycle
I0320 19:01:03.409802  543705 memory.go:184] no items to output this cycle
E0320 19:01:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:01:13.409795  543705 memory.go:191] Add success.
I0320 19:01:13.409809  543705 cpu.go:282] Add success.
W0320 19:01:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:01:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:01:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:01:13.420080  543705 net.go:648] Add success.
I0320 19:01:13.422898  543705 net.go:770] primary dev: ETH0
I0320 19:01:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:01:13.422926  543705 net.go:698] Add success.
I0320 19:01:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:01:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:01:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 19:01:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:01:14.456486  543705 disk_worker.go:494] system disk:vda1
I0320 19:01:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:01:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:01:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:01:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:01:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:01:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:01:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:01:23.409792  543705 memory.go:184] no items to output this cycle
I0320 19:01:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 19:01:25.673670  543705 disk_info.go:125] begin check local disk info of client
I0320 19:01:25.676143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:01:25.676148  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358700 0xc000358740]
E0320 19:01:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:01:33.409790  543705 memory.go:184] no items to output this cycle
I0320 19:01:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:01:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:01:43.409831  543705 memory.go:191] Add success.
I0320 19:01:43.409838  543705 cpu.go:282] Add success.
I0320 19:01:43.420010  543705 net.go:648] Add success.
I0320 19:01:43.422705  543705 net.go:770] primary dev: ETH0
I0320 19:01:43.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:01:43.422732  543705 net.go:698] Add success.
I0320 19:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:01:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:01:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:01:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:01:53.409807  543705 memory.go:184] no items to output this cycle
I0320 19:01:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 19:02:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:02:03.409790  543705 memory.go:184] no items to output this cycle
I0320 19:02:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 19:02:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:02:13.409789  543705 memory.go:191] Add success.
I0320 19:02:13.409791  543705 cpu.go:282] Add success.
W0320 19:02:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:02:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:02:13.420163  543705 net.go:648] Add success.
I0320 19:02:13.423024  543705 net.go:770] primary dev: ETH0
I0320 19:02:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:02:13.423051  543705 net.go:698] Add success.
W0320 19:02:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:02:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 19:02:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:02:14.455909  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:02:14.455918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:02:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:02:14.456555  543705 disk_worker.go:494] system disk:vda1
I0320 19:02:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:02:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:02:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:02:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:02:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:02:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:02:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:02:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:02:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:02:23.409763  543705 memory.go:184] no items to output this cycle
I0320 19:02:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 19:02:25.676228  543705 disk_info.go:125] begin check local disk info of client
I0320 19:02:25.678675  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:02:25.678681  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d7c0 0xc00035d800]
E0320 19:02:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:02:33.409791  543705 memory.go:184] no items to output this cycle
I0320 19:02:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 19:02:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:02:43.409890  543705 memory.go:191] Add success.
I0320 19:02:43.409929  543705 cpu.go:282] Add success.
I0320 19:02:43.419728  543705 net.go:648] Add success.
I0320 19:02:43.422695  543705 net.go:770] primary dev: ETH0
I0320 19:02:43.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:02:43.422724  543705 net.go:698] Add success.
I0320 19:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:02:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:02:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:02:53.410257  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:02:53.410275  543705 memory.go:184] no items to output this cycle
I0320 19:02:53.410279  543705 cpu.go:275] no items to output this cycle
E0320 19:03:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:03:03.409779  543705 memory.go:184] no items to output this cycle
I0320 19:03:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 19:03:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:03:13.409815  543705 memory.go:191] Add success.
I0320 19:03:13.409828  543705 cpu.go:282] Add success.
W0320 19:03:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:03:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:03:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:03:13.420147  543705 net.go:648] Add success.
I0320 19:03:13.423147  543705 net.go:770] primary dev: ETH0
I0320 19:03:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:03:13.423174  543705 net.go:698] Add success.
I0320 19:03:13.469922  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea546bf6-8842-4184-a389-a73105332933","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:03:13.469957  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:03:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:03:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 19:03:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:03:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 19:03:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:03:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:03:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:03:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:03:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:03:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:03:23.410414  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:03:23.410426  543705 cpu.go:275] no items to output this cycle
I0320 19:03:23.410429  543705 memory.go:184] no items to output this cycle
I0320 19:03:25.678761  543705 disk_info.go:125] begin check local disk info of client
I0320 19:03:25.681243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:03:25.681248  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037c5c0 0xc00037c600]
E0320 19:03:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:03:33.409797  543705 memory.go:184] no items to output this cycle
I0320 19:03:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 19:03:38.593980  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:03:38.593986  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:03:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:03:43.410932  543705 memory.go:191] Add success.
I0320 19:03:43.409823  543705 cpu.go:282] Add success.
I0320 19:03:43.419940  543705 net.go:648] Add success.
I0320 19:03:43.423120  543705 net.go:770] primary dev: ETH0
I0320 19:03:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:03:43.423145  543705 net.go:698] Add success.
I0320 19:03:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:03:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:03:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:03:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:03:53.409773  543705 memory.go:184] no items to output this cycle
I0320 19:03:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 19:04:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:04:03.409777  543705 memory.go:184] no items to output this cycle
I0320 19:04:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 19:04:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:04:13.409817  543705 memory.go:191] Add success.
I0320 19:04:13.409822  543705 cpu.go:282] Add success.
W0320 19:04:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:04:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:04:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:04:13.420578  543705 net.go:648] Add success.
I0320 19:04:13.423606  543705 net.go:770] primary dev: ETH0
I0320 19:04:13.423618  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:04:13.423630  543705 net.go:698] Add success.
I0320 19:04:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:04:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:04:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 19:04:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:04:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 19:04:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:04:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:04:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:04:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:04:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:04:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:04:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:04:23.409759  543705 memory.go:184] no items to output this cycle
I0320 19:04:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 19:04:25.681670  543705 disk_info.go:125] begin check local disk info of client
I0320 19:04:25.684084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:04:25.684090  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be080 0xc0003be0c0]
E0320 19:04:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:04:33.409802  543705 memory.go:184] no items to output this cycle
I0320 19:04:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 19:04:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:04:43.409779  543705 memory.go:191] Add success.
I0320 19:04:43.409809  543705 cpu.go:282] Add success.
I0320 19:04:43.419875  543705 net.go:648] Add success.
I0320 19:04:43.422564  543705 net.go:770] primary dev: ETH0
I0320 19:04:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:04:43.422588  543705 net.go:698] Add success.
I0320 19:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:04:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:04:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:04:53.409812  543705 memory.go:184] no items to output this cycle
I0320 19:04:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 19:05:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:05:03.409805  543705 memory.go:184] no items to output this cycle
I0320 19:05:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 19:05:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:05:13.409786  543705 memory.go:191] Add success.
I0320 19:05:13.409807  543705 cpu.go:282] Add success.
W0320 19:05:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:05:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:05:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:05:13.420310  543705 net.go:648] Add success.
I0320 19:05:13.423307  543705 net.go:770] primary dev: ETH0
I0320 19:05:13.423321  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:05:13.423334  543705 net.go:698] Add success.
I0320 19:05:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:05:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:05:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 19:05:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:05:14.456502  543705 disk_worker.go:494] system disk:vda1
I0320 19:05:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:05:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:05:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:05:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:05:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:05:23.409776  543705 memory.go:184] no items to output this cycle
I0320 19:05:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 19:05:25.685061  543705 disk_info.go:125] begin check local disk info of client
I0320 19:05:25.687522  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:05:25.687528  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be0c0 0xc0003be100]
E0320 19:05:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:05:33.409774  543705 memory.go:184] no items to output this cycle
I0320 19:05:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 19:05:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:05:43.409798  543705 memory.go:191] Add success.
I0320 19:05:43.409803  543705 cpu.go:282] Add success.
I0320 19:05:43.419892  543705 net.go:648] Add success.
I0320 19:05:43.422859  543705 net.go:770] primary dev: ETH0
I0320 19:05:43.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:05:43.422889  543705 net.go:698] Add success.
I0320 19:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:05:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:05:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:05:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:05:53.409810  543705 memory.go:184] no items to output this cycle
I0320 19:05:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 19:06:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:06:03.409775  543705 memory.go:184] no items to output this cycle
I0320 19:06:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 19:06:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:06:13.409813  543705 memory.go:191] Add success.
I0320 19:06:13.409821  543705 cpu.go:282] Add success.
W0320 19:06:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:06:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:06:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:06:13.420312  543705 net.go:648] Add success.
I0320 19:06:13.423160  543705 net.go:770] primary dev: ETH0
I0320 19:06:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:06:13.423184  543705 net.go:698] Add success.
I0320 19:06:14.014072  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"059b32e5-21bd-4465-95e2-8ab22d183b5a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:06:14.014110  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:06:14.454681  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:06:14.454858  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:06:14.454936  543705 disk_worker.go:708] disk space is not compliant
W0320 19:06:14.454939  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:06:14.456427  543705 disk_worker.go:494] system disk:vda1
I0320 19:06:14.456461  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:06:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:06:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:06:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:06:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:06:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:06:23.409767  543705 memory.go:184] no items to output this cycle
I0320 19:06:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 19:06:25.688082  543705 disk_info.go:125] begin check local disk info of client
I0320 19:06:25.690535  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:06:25.690541  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be600 0xc0003be640]
E0320 19:06:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:06:33.409773  543705 memory.go:184] no items to output this cycle
I0320 19:06:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 19:06:38.594122  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:06:38.594129  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:06:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:06:43.410686  543705 memory.go:191] Add success.
I0320 19:06:43.409807  543705 cpu.go:282] Add success.
I0320 19:06:43.420369  543705 net.go:648] Add success.
I0320 19:06:43.423026  543705 net.go:770] primary dev: ETH0
I0320 19:06:43.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:06:43.423051  543705 net.go:698] Add success.
I0320 19:06:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:06:46.458155  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:06:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:06:53.410389  543705 memory.go:184] no items to output this cycle
I0320 19:06:53.410392  543705 cpu.go:275] no items to output this cycle
E0320 19:07:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:07:03.409818  543705 memory.go:184] no items to output this cycle
I0320 19:07:03.409841  543705 cpu.go:275] no items to output this cycle
E0320 19:07:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:07:13.409809  543705 memory.go:191] Add success.
I0320 19:07:13.409817  543705 cpu.go:282] Add success.
W0320 19:07:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:07:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:07:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:07:13.420226  543705 net.go:648] Add success.
I0320 19:07:13.422880  543705 net.go:770] primary dev: ETH0
I0320 19:07:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:07:13.422904  543705 net.go:698] Add success.
I0320 19:07:13.453461  543705 event_worker.go:152] Polling the log file for events...
W0320 19:07:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:07:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 19:07:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:07:14.456952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:07:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:07:14.456967  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:07:14.457012  543705 disk_worker.go:494] system disk:vda1
I0320 19:07:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:07:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:07:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:07:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:07:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:07:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:07:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:07:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:07:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:07:23.409814  543705 memory.go:184] no items to output this cycle
I0320 19:07:23.409825  543705 cpu.go:275] no items to output this cycle
I0320 19:07:25.690696  543705 disk_info.go:125] begin check local disk info of client
I0320 19:07:25.693105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:07:25.693111  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266a00 0xc000266a40]
E0320 19:07:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:07:33.409772  543705 memory.go:184] no items to output this cycle
I0320 19:07:33.409778  543705 cpu.go:275] no items to output this cycle
E0320 19:07:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:07:43.409798  543705 memory.go:191] Add success.
I0320 19:07:43.409811  543705 cpu.go:282] Add success.
I0320 19:07:43.419882  543705 net.go:648] Add success.
I0320 19:07:43.422657  543705 net.go:770] primary dev: ETH0
I0320 19:07:43.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:07:43.422683  543705 net.go:698] Add success.
I0320 19:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:07:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:07:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:07:53.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:07:53.409913  543705 memory.go:184] no items to output this cycle
I0320 19:07:53.409972  543705 cpu.go:275] no items to output this cycle
E0320 19:08:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:08:03.409783  543705 memory.go:184] no items to output this cycle
I0320 19:08:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 19:08:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:08:13.409796  543705 memory.go:191] Add success.
I0320 19:08:13.409795  543705 cpu.go:282] Add success.
W0320 19:08:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:08:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:08:13.420180  543705 net.go:648] Add success.
I0320 19:08:13.422684  543705 net.go:770] primary dev: ETH0
I0320 19:08:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:08:13.422713  543705 net.go:698] Add success.
I0320 19:08:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:08:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:08:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 19:08:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:08:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 19:08:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:08:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:08:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:08:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:08:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:08:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:08:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:08:23.409782  543705 memory.go:184] no items to output this cycle
I0320 19:08:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 19:08:25.693670  543705 disk_info.go:125] begin check local disk info of client
I0320 19:08:25.696152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:08:25.696158  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2180 0xc0003b21c0]
E0320 19:08:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:08:33.409794  543705 memory.go:184] no items to output this cycle
I0320 19:08:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 19:08:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:08:43.409801  543705 memory.go:191] Add success.
I0320 19:08:43.409801  543705 cpu.go:282] Add success.
I0320 19:08:43.419936  543705 net.go:648] Add success.
I0320 19:08:43.422561  543705 net.go:770] primary dev: ETH0
I0320 19:08:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:08:43.422590  543705 net.go:698] Add success.
I0320 19:08:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:08:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:08:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:08:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:08:53.409779  543705 memory.go:184] no items to output this cycle
I0320 19:08:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 19:09:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:09:03.409805  543705 memory.go:184] no items to output this cycle
I0320 19:09:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 19:09:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:09:13.409778  543705 memory.go:191] Add success.
W0320 19:09:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:09:13.409805  543705 cpu.go:282] Add success.
W0320 19:09:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:09:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:09:13.420233  543705 net.go:648] Add success.
I0320 19:09:13.422731  543705 net.go:770] primary dev: ETH0
I0320 19:09:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:09:13.422758  543705 net.go:698] Add success.
I0320 19:09:13.604133  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"87f8270a-6c51-4afa-b8fc-5ee8e76bbcf9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:09:13.604167  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:09:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:09:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:09:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 19:09:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:09:14.456514  543705 disk_worker.go:494] system disk:vda1
I0320 19:09:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:09:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:09:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:09:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:09:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:09:23.410287  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:09:23.410307  543705 memory.go:184] no items to output this cycle
I0320 19:09:23.410314  543705 cpu.go:275] no items to output this cycle
I0320 19:09:25.697114  543705 disk_info.go:125] begin check local disk info of client
I0320 19:09:25.699600  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:09:25.699606  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375ec0 0xc000375f00]
E0320 19:09:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:09:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 19:09:33.409790  543705 memory.go:184] no items to output this cycle
I0320 19:09:38.594668  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:09:38.594675  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:09:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:09:43.410784  543705 memory.go:191] Add success.
I0320 19:09:43.409797  543705 cpu.go:282] Add success.
I0320 19:09:43.420495  543705 net.go:648] Add success.
I0320 19:09:43.423447  543705 net.go:770] primary dev: ETH0
I0320 19:09:43.423460  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:09:43.423472  543705 net.go:698] Add success.
I0320 19:09:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:09:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:09:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:09:53.409788  543705 memory.go:184] no items to output this cycle
I0320 19:09:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 19:10:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:10:03.409894  543705 cpu.go:275] no items to output this cycle
I0320 19:10:03.409900  543705 memory.go:184] no items to output this cycle
E0320 19:10:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:10:13.409775  543705 memory.go:191] Add success.
W0320 19:10:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:10:13.409801  543705 cpu.go:282] Add success.
W0320 19:10:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:10:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:10:13.420326  543705 net.go:648] Add success.
I0320 19:10:13.422983  543705 net.go:770] primary dev: ETH0
I0320 19:10:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:10:13.423007  543705 net.go:698] Add success.
I0320 19:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:10:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:10:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 19:10:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:10:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 19:10:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:10:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:10:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:10:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:10:23.409794  543705 memory.go:184] no items to output this cycle
I0320 19:10:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 19:10:25.699689  543705 disk_info.go:125] begin check local disk info of client
I0320 19:10:25.702202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:10:25.702208  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270380 0xc0002703c0]
E0320 19:10:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:10:33.409781  543705 memory.go:184] no items to output this cycle
I0320 19:10:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 19:10:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:10:43.409795  543705 cpu.go:282] Add success.
I0320 19:10:43.409800  543705 memory.go:191] Add success.
I0320 19:10:43.419868  543705 net.go:648] Add success.
I0320 19:10:43.422351  543705 net.go:770] primary dev: ETH0
I0320 19:10:43.422363  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:10:43.422375  543705 net.go:698] Add success.
I0320 19:10:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:10:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:10:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:10:53.409785  543705 memory.go:184] no items to output this cycle
I0320 19:10:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 19:11:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:11:03.409795  543705 memory.go:184] no items to output this cycle
I0320 19:11:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 19:11:13.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:11:13.409978  543705 memory.go:191] Add success.
W0320 19:11:13.410006  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:11:13.410020  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:11:13.410023  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:11:13.410126  543705 cpu.go:282] Add success.
I0320 19:11:13.419716  543705 net.go:648] Add success.
I0320 19:11:13.422981  543705 net.go:770] primary dev: ETH0
I0320 19:11:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:11:13.423005  543705 net.go:698] Add success.
I0320 19:11:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:11:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:11:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 19:11:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:11:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 19:11:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:11:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:11:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:11:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:11:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:11:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:11:23.409792  543705 memory.go:184] no items to output this cycle
I0320 19:11:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 19:11:25.703155  543705 disk_info.go:125] begin check local disk info of client
I0320 19:11:25.705606  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:11:25.705612  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048ad00 0xc00048ad40]
E0320 19:11:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:11:33.409787  543705 memory.go:184] no items to output this cycle
I0320 19:11:33.409838  543705 cpu.go:275] no items to output this cycle
E0320 19:11:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:11:43.409778  543705 memory.go:191] Add success.
I0320 19:11:43.409801  543705 cpu.go:282] Add success.
I0320 19:11:43.419871  543705 net.go:648] Add success.
I0320 19:11:43.422845  543705 net.go:770] primary dev: ETH0
I0320 19:11:43.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:11:43.422879  543705 net.go:698] Add success.
I0320 19:11:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:11:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:11:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:11:53.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:11:53.409881  543705 cpu.go:275] no items to output this cycle
I0320 19:11:53.409888  543705 memory.go:184] no items to output this cycle
E0320 19:12:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:12:03.409789  543705 memory.go:184] no items to output this cycle
I0320 19:12:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 19:12:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:12:13.409786  543705 memory.go:191] Add success.
I0320 19:12:13.409808  543705 cpu.go:282] Add success.
W0320 19:12:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:12:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:12:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:12:13.420178  543705 net.go:648] Add success.
I0320 19:12:13.422876  543705 net.go:770] primary dev: ETH0
I0320 19:12:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:12:13.422908  543705 net.go:698] Add success.
I0320 19:12:13.469936  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5b32a110-03d6-4e0a-94ac-d5777233e34b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:12:13.469970  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 19:12:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:12:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 19:12:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:12:14.455945  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:12:14.455954  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:12:14.455959  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:12:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 19:12:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:12:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:12:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:12:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:12:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:12:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:12:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:12:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:12:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:12:23.409771  543705 cpu.go:275] no items to output this cycle
I0320 19:12:23.409785  543705 memory.go:184] no items to output this cycle
I0320 19:12:25.705676  543705 disk_info.go:125] begin check local disk info of client
I0320 19:12:25.708167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:12:25.708176  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c000 0xc00039c040]
E0320 19:12:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:12:33.409806  543705 memory.go:184] no items to output this cycle
I0320 19:12:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 19:12:38.594819  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:12:38.594825  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:12:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:12:43.410685  543705 memory.go:191] Add success.
I0320 19:12:43.409804  543705 cpu.go:282] Add success.
I0320 19:12:43.420656  543705 net.go:648] Add success.
I0320 19:12:43.423366  543705 net.go:770] primary dev: ETH0
I0320 19:12:43.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:12:43.423391  543705 net.go:698] Add success.
I0320 19:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:12:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:12:53.409808  543705 memory.go:184] no items to output this cycle
I0320 19:12:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 19:13:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:13:03.409775  543705 memory.go:184] no items to output this cycle
I0320 19:13:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 19:13:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:13:13.409793  543705 memory.go:191] Add success.
I0320 19:13:13.409798  543705 cpu.go:282] Add success.
W0320 19:13:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:13:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:13:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:13:13.420169  543705 net.go:648] Add success.
I0320 19:13:13.422913  543705 net.go:770] primary dev: ETH0
I0320 19:13:13.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:13:13.422939  543705 net.go:698] Add success.
I0320 19:13:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:13:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:13:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 19:13:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:13:14.456507  543705 disk_worker.go:494] system disk:vda1
I0320 19:13:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:13:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:13:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:13:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:13:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:13:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:13:23.409760  543705 memory.go:184] no items to output this cycle
I0320 19:13:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 19:13:25.709172  543705 disk_info.go:125] begin check local disk info of client
I0320 19:13:25.711718  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:13:25.711725  543705 disk_info.go:196] parse disk info done, disk is : [0xc000282800 0xc000282840]
E0320 19:13:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:13:33.409773  543705 memory.go:184] no items to output this cycle
I0320 19:13:33.409913  543705 cpu.go:275] no items to output this cycle
E0320 19:13:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:13:43.409786  543705 memory.go:191] Add success.
I0320 19:13:43.409810  543705 cpu.go:282] Add success.
I0320 19:13:43.419936  543705 net.go:648] Add success.
I0320 19:13:43.422465  543705 net.go:770] primary dev: ETH0
I0320 19:13:43.422478  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:13:43.422490  543705 net.go:698] Add success.
I0320 19:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:13:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:13:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:13:53.409785  543705 cpu.go:275] no items to output this cycle
I0320 19:13:53.409791  543705 memory.go:184] no items to output this cycle
E0320 19:14:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:14:03.409809  543705 memory.go:184] no items to output this cycle
I0320 19:14:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 19:14:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:14:13.409786  543705 memory.go:191] Add success.
I0320 19:14:13.409807  543705 cpu.go:282] Add success.
W0320 19:14:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:14:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:14:13.420074  543705 net.go:648] Add success.
I0320 19:14:13.422726  543705 net.go:770] primary dev: ETH0
I0320 19:14:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:14:13.422753  543705 net.go:698] Add success.
I0320 19:14:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:14:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:14:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 19:14:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:14:14.456523  543705 disk_worker.go:494] system disk:vda1
I0320 19:14:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:14:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:14:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:14:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:14:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:14:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:14:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:14:23.409769  543705 memory.go:184] no items to output this cycle
I0320 19:14:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 19:14:25.711806  543705 disk_info.go:125] begin check local disk info of client
I0320 19:14:25.714348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:14:25.714354  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab200 0xc0001ab240]
E0320 19:14:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:14:33.409783  543705 memory.go:184] no items to output this cycle
I0320 19:14:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 19:14:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:14:43.409782  543705 memory.go:191] Add success.
I0320 19:14:43.409785  543705 cpu.go:282] Add success.
I0320 19:14:43.419885  543705 net.go:648] Add success.
I0320 19:14:43.422510  543705 net.go:770] primary dev: ETH0
I0320 19:14:43.422524  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:14:43.422539  543705 net.go:698] Add success.
I0320 19:14:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:14:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:14:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:14:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:14:53.409805  543705 memory.go:184] no items to output this cycle
I0320 19:14:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 19:15:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:15:03.409780  543705 memory.go:184] no items to output this cycle
I0320 19:15:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 19:15:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:15:13.409812  543705 memory.go:191] Add success.
I0320 19:15:13.409818  543705 cpu.go:282] Add success.
W0320 19:15:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:15:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:15:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:15:13.420099  543705 net.go:648] Add success.
I0320 19:15:13.422644  543705 net.go:770] primary dev: ETH0
I0320 19:15:13.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:15:13.422670  543705 net.go:698] Add success.
I0320 19:15:13.477139  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eb006d29-f71c-4ad9-986c-ecfe00b999ba","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:15:13.477174  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:15:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:15:14.455347  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:15:14.455447  543705 disk_worker.go:708] disk space is not compliant
W0320 19:15:14.455457  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:15:14.457541  543705 disk_worker.go:494] system disk:vda1
I0320 19:15:14.457570  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:15:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:15:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:15:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:15:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:15:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:15:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:15:23.409769  543705 memory.go:184] no items to output this cycle
I0320 19:15:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 19:15:25.715215  543705 disk_info.go:125] begin check local disk info of client
I0320 19:15:25.717690  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:15:25.717696  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa0c0 0xc0001aa100]
E0320 19:15:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:15:33.409775  543705 memory.go:184] no items to output this cycle
I0320 19:15:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 19:15:38.595692  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:15:38.595699  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:15:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:15:43.409788  543705 memory.go:191] Add success.
I0320 19:15:43.409796  543705 cpu.go:282] Add success.
I0320 19:15:43.419889  543705 net.go:648] Add success.
I0320 19:15:43.420854  543705 net.go:770] primary dev: ETH0
I0320 19:15:43.420868  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:15:43.420880  543705 net.go:698] Add success.
I0320 19:15:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:15:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:15:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:15:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:15:53.409827  543705 memory.go:184] no items to output this cycle
I0320 19:15:53.409845  543705 cpu.go:275] no items to output this cycle
E0320 19:16:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:16:03.409794  543705 memory.go:184] no items to output this cycle
I0320 19:16:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 19:16:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:16:13.409835  543705 memory.go:191] Add success.
I0320 19:16:13.409838  543705 cpu.go:282] Add success.
W0320 19:16:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:16:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:16:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:16:13.420123  543705 net.go:648] Add success.
I0320 19:16:13.422855  543705 net.go:770] primary dev: ETH0
I0320 19:16:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:16:13.422884  543705 net.go:698] Add success.
I0320 19:16:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:16:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:16:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 19:16:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:16:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 19:16:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:16:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:16:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:16:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:16:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:16:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:16:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:16:23.409783  543705 memory.go:184] no items to output this cycle
I0320 19:16:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 19:16:25.717778  543705 disk_info.go:125] begin check local disk info of client
I0320 19:16:25.720271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:16:25.720278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aabc0 0xc0001aac00]
E0320 19:16:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:16:33.409788  543705 memory.go:184] no items to output this cycle
I0320 19:16:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 19:16:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:16:43.409797  543705 memory.go:191] Add success.
I0320 19:16:43.409799  543705 cpu.go:282] Add success.
I0320 19:16:43.419903  543705 net.go:648] Add success.
I0320 19:16:43.422653  543705 net.go:770] primary dev: ETH0
I0320 19:16:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:16:43.422678  543705 net.go:698] Add success.
I0320 19:16:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:16:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:16:53.409782  543705 memory.go:184] no items to output this cycle
I0320 19:16:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 19:17:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:17:03.409793  543705 memory.go:184] no items to output this cycle
I0320 19:17:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 19:17:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:17:13.409801  543705 cpu.go:282] Add success.
I0320 19:17:13.409809  543705 memory.go:191] Add success.
W0320 19:17:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:17:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:17:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:17:13.420142  543705 net.go:648] Add success.
I0320 19:17:13.422892  543705 net.go:770] primary dev: ETH0
I0320 19:17:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:17:13.422916  543705 net.go:698] Add success.
I0320 19:17:13.453448  543705 event_worker.go:152] Polling the log file for events...
W0320 19:17:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:17:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 19:17:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:17:14.456115  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:17:14.456124  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:17:14.456129  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:17:14.456556  543705 disk_worker.go:494] system disk:vda1
I0320 19:17:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:17:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:17:15.456863  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:17:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:17:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:17:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:17:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:17:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:17:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:17:23.409792  543705 memory.go:184] no items to output this cycle
I0320 19:17:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 19:17:25.721250  543705 disk_info.go:125] begin check local disk info of client
I0320 19:17:25.723663  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:17:25.723669  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e6700 0xc0001e6740]
E0320 19:17:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:17:33.409781  543705 memory.go:184] no items to output this cycle
I0320 19:17:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 19:17:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:17:43.409787  543705 cpu.go:282] Add success.
I0320 19:17:43.409787  543705 memory.go:191] Add success.
I0320 19:17:43.419993  543705 net.go:648] Add success.
I0320 19:17:43.423164  543705 net.go:770] primary dev: ETH0
I0320 19:17:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:17:43.423194  543705 net.go:698] Add success.
I0320 19:17:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:17:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:17:53.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:17:53.409927  543705 memory.go:184] no items to output this cycle
I0320 19:17:53.409963  543705 cpu.go:275] no items to output this cycle
E0320 19:18:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:18:03.409788  543705 cpu.go:275] no items to output this cycle
I0320 19:18:03.409797  543705 memory.go:184] no items to output this cycle
E0320 19:18:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:18:13.409791  543705 memory.go:191] Add success.
I0320 19:18:13.409804  543705 cpu.go:282] Add success.
W0320 19:18:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:18:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:18:13.420158  543705 net.go:648] Add success.
I0320 19:18:13.422741  543705 net.go:770] primary dev: ETH0
I0320 19:18:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:18:13.422766  543705 net.go:698] Add success.
I0320 19:18:13.464560  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d9e3eccc-1821-482b-a668-1786be526a2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:18:13.464602  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:18:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:18:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:18:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 19:18:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:18:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 19:18:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:18:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:18:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:18:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:18:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:18:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:18:23.409769  543705 memory.go:184] no items to output this cycle
I0320 19:18:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 19:18:25.725261  543705 disk_info.go:125] begin check local disk info of client
I0320 19:18:25.727749  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:18:25.727754  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb7c0 0xc0001fb800]
E0320 19:18:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:18:33.409788  543705 memory.go:184] no items to output this cycle
I0320 19:18:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 19:18:38.596693  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:18:38.596699  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:18:43.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:18:43.410603  543705 memory.go:191] Add success.
I0320 19:18:43.409900  543705 cpu.go:282] Add success.
I0320 19:18:43.419726  543705 net.go:648] Add success.
I0320 19:18:43.422238  543705 net.go:770] primary dev: ETH0
I0320 19:18:43.422252  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:18:43.422264  543705 net.go:698] Add success.
I0320 19:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:18:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:18:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:18:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:18:53.409777  543705 memory.go:184] no items to output this cycle
I0320 19:18:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 19:19:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:19:03.409804  543705 memory.go:184] no items to output this cycle
I0320 19:19:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 19:19:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:19:13.409823  543705 memory.go:191] Add success.
I0320 19:19:13.409829  543705 cpu.go:282] Add success.
W0320 19:19:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:19:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:19:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:19:13.420122  543705 net.go:648] Add success.
I0320 19:19:13.422715  543705 net.go:770] primary dev: ETH0
I0320 19:19:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:19:13.422745  543705 net.go:698] Add success.
I0320 19:19:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:19:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:19:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 19:19:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:19:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 19:19:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:19:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:19:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:19:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:19:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:19:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:19:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:19:23.409800  543705 memory.go:184] no items to output this cycle
I0320 19:19:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 19:19:25.727836  543705 disk_info.go:125] begin check local disk info of client
I0320 19:19:25.730302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:19:25.730308  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c8c0 0xc00057c900]
E0320 19:19:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:19:33.409775  543705 memory.go:184] no items to output this cycle
I0320 19:19:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 19:19:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:19:43.409815  543705 memory.go:191] Add success.
I0320 19:19:43.409823  543705 cpu.go:282] Add success.
I0320 19:19:43.419958  543705 net.go:648] Add success.
I0320 19:19:43.423146  543705 net.go:770] primary dev: ETH0
I0320 19:19:43.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:19:43.423171  543705 net.go:698] Add success.
I0320 19:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:19:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:19:53.410382  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:19:53.410403  543705 memory.go:184] no items to output this cycle
I0320 19:19:53.410416  543705 cpu.go:275] no items to output this cycle
E0320 19:20:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:20:03.409787  543705 memory.go:184] no items to output this cycle
I0320 19:20:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 19:20:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:20:13.409778  543705 memory.go:191] Add success.
W0320 19:20:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:20:13.409808  543705 cpu.go:282] Add success.
W0320 19:20:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:20:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:20:13.420104  543705 net.go:648] Add success.
I0320 19:20:13.423141  543705 net.go:770] primary dev: ETH0
I0320 19:20:13.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:20:13.423170  543705 net.go:698] Add success.
I0320 19:20:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:20:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:20:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 19:20:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:20:14.456557  543705 disk_worker.go:494] system disk:vda1
I0320 19:20:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:20:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:20:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:20:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:20:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:20:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:20:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:20:23.409792  543705 memory.go:184] no items to output this cycle
I0320 19:20:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 19:20:25.731297  543705 disk_info.go:125] begin check local disk info of client
I0320 19:20:25.733797  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:20:25.733804  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe680 0xc0003fe6c0]
E0320 19:20:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:20:33.409767  543705 memory.go:184] no items to output this cycle
I0320 19:20:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 19:20:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:20:43.409806  543705 memory.go:191] Add success.
I0320 19:20:43.409814  543705 cpu.go:282] Add success.
I0320 19:20:43.419707  543705 net.go:648] Add success.
I0320 19:20:43.422626  543705 net.go:770] primary dev: ETH0
I0320 19:20:43.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:20:43.422651  543705 net.go:698] Add success.
I0320 19:20:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:20:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:20:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:20:53.409783  543705 memory.go:184] no items to output this cycle
I0320 19:20:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 19:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:21:03.409798  543705 memory.go:184] no items to output this cycle
I0320 19:21:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 19:21:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:21:13.409793  543705 memory.go:191] Add success.
I0320 19:21:13.409794  543705 cpu.go:282] Add success.
W0320 19:21:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:21:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:21:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:21:13.420186  543705 net.go:648] Add success.
I0320 19:21:13.422829  543705 net.go:770] primary dev: ETH0
I0320 19:21:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:21:13.422854  543705 net.go:698] Add success.
I0320 19:21:13.463644  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29489938-0bb7-42eb-843c-6919b9f767d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:21:13.463679  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:21:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:21:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:21:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 19:21:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:21:14.456514  543705 disk_worker.go:494] system disk:vda1
I0320 19:21:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:21:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:21:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:21:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:21:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:21:23.409762  543705 memory.go:184] no items to output this cycle
I0320 19:21:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 19:21:25.735314  543705 disk_info.go:125] begin check local disk info of client
I0320 19:21:25.737800  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:21:25.737807  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002950c0 0xc000295100]
E0320 19:21:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:21:33.409776  543705 memory.go:184] no items to output this cycle
I0320 19:21:33.409787  543705 cpu.go:275] no items to output this cycle
I0320 19:21:38.596839  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:21:38.596846  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:21:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:21:43.410766  543705 memory.go:191] Add success.
I0320 19:21:43.409951  543705 cpu.go:282] Add success.
I0320 19:21:43.419744  543705 net.go:648] Add success.
I0320 19:21:43.422503  543705 net.go:770] primary dev: ETH0
I0320 19:21:43.422517  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:21:43.422531  543705 net.go:698] Add success.
I0320 19:21:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:21:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:21:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:21:53.409811  543705 memory.go:184] no items to output this cycle
I0320 19:21:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 19:22:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:22:03.409795  543705 memory.go:184] no items to output this cycle
I0320 19:22:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:22:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:22:13.409784  543705 memory.go:191] Add success.
I0320 19:22:13.409784  543705 cpu.go:282] Add success.
W0320 19:22:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:22:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:22:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:22:13.420161  543705 net.go:648] Add success.
I0320 19:22:13.423032  543705 net.go:770] primary dev: ETH0
I0320 19:22:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:22:13.423060  543705 net.go:698] Add success.
W0320 19:22:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:22:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 19:22:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:22:14.456798  543705 disk_worker.go:494] system disk:vda1
I0320 19:22:14.456837  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:22:14.457123  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:22:14.457130  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:22:14.457135  543705 custom_config.go:64] query custom config with name: gpu
E0320 19:22:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:22:15.456873  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:22:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:22:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:22:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:22:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:22:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:22:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:22:23.409778  543705 memory.go:184] no items to output this cycle
I0320 19:22:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 19:22:25.739331  543705 disk_info.go:125] begin check local disk info of client
I0320 19:22:25.741787  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:22:25.741793  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8600 0xc0004a8640]
E0320 19:22:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:22:33.409795  543705 memory.go:184] no items to output this cycle
I0320 19:22:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 19:22:43.409925  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:22:43.409925  543705 cpu.go:282] Add success.
I0320 19:22:43.410074  543705 memory.go:191] Add success.
I0320 19:22:43.419732  543705 net.go:648] Add success.
I0320 19:22:43.422400  543705 net.go:770] primary dev: ETH0
I0320 19:22:43.422415  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:22:43.422430  543705 net.go:698] Add success.
I0320 19:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:22:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:22:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:22:53.409785  543705 memory.go:184] no items to output this cycle
I0320 19:22:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 19:23:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:23:03.409778  543705 memory.go:184] no items to output this cycle
I0320 19:23:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 19:23:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:23:13.409791  543705 memory.go:191] Add success.
W0320 19:23:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:23:13.409818  543705 cpu.go:282] Add success.
W0320 19:23:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:23:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:23:13.420161  543705 net.go:648] Add success.
I0320 19:23:13.422775  543705 net.go:770] primary dev: ETH0
I0320 19:23:13.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:23:13.422800  543705 net.go:698] Add success.
I0320 19:23:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:23:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:23:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 19:23:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:23:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 19:23:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:23:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:23:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:23:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:23:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:23:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:23:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 19:23:23.409791  543705 memory.go:184] no items to output this cycle
I0320 19:23:25.743356  543705 disk_info.go:125] begin check local disk info of client
I0320 19:23:25.745806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:23:25.745813  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312680 0xc0003126c0]
E0320 19:23:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:23:33.409802  543705 memory.go:184] no items to output this cycle
I0320 19:23:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 19:23:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:23:43.409778  543705 memory.go:191] Add success.
I0320 19:23:43.409799  543705 cpu.go:282] Add success.
I0320 19:23:43.419849  543705 net.go:648] Add success.
I0320 19:23:43.422581  543705 net.go:770] primary dev: ETH0
I0320 19:23:43.422594  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:23:43.422604  543705 net.go:698] Add success.
I0320 19:23:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:23:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:23:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:23:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:23:53.409792  543705 memory.go:184] no items to output this cycle
I0320 19:23:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 19:24:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:24:03.409777  543705 memory.go:184] no items to output this cycle
I0320 19:24:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:24:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:24:13.409812  543705 memory.go:191] Add success.
I0320 19:24:13.409816  543705 cpu.go:282] Add success.
W0320 19:24:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:24:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:24:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:24:13.420067  543705 net.go:648] Add success.
I0320 19:24:13.422591  543705 net.go:770] primary dev: ETH0
I0320 19:24:13.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:24:13.422617  543705 net.go:698] Add success.
I0320 19:24:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:24:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:24:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 19:24:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:24:14.456599  543705 disk_worker.go:494] system disk:vda1
I0320 19:24:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:24:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:24:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:24:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:24:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:24:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0320 19:24:16.736348  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"770da961-9abd-4ec9-88a2-c910e4e36c7c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:24:16.736385  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0320 19:24:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:24:23.409777  543705 memory.go:184] no items to output this cycle
I0320 19:24:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 19:24:25.745896  543705 disk_info.go:125] begin check local disk info of client
I0320 19:24:25.748381  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:24:25.748388  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4ac0 0xc0004a4b00]
E0320 19:24:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:24:33.409762  543705 memory.go:184] no items to output this cycle
I0320 19:24:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 19:24:38.597000  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:24:38.597006  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:24:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:24:43.409804  543705 cpu.go:282] Add success.
I0320 19:24:43.410722  543705 memory.go:191] Add success.
I0320 19:24:43.419718  543705 net.go:648] Add success.
I0320 19:24:43.422332  543705 net.go:770] primary dev: ETH0
I0320 19:24:43.422348  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:24:43.422362  543705 net.go:698] Add success.
I0320 19:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:24:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:24:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:24:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:24:53.409773  543705 memory.go:184] no items to output this cycle
I0320 19:24:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 19:25:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:25:03.409776  543705 memory.go:184] no items to output this cycle
I0320 19:25:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 19:25:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:25:13.409790  543705 memory.go:191] Add success.
I0320 19:25:13.409806  543705 cpu.go:282] Add success.
W0320 19:25:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:25:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:25:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:25:13.420048  543705 net.go:648] Add success.
I0320 19:25:13.422814  543705 net.go:770] primary dev: ETH0
I0320 19:25:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:25:13.422838  543705 net.go:698] Add success.
I0320 19:25:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:25:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:25:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 19:25:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:25:14.456466  543705 disk_worker.go:494] system disk:vda1
I0320 19:25:14.456509  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:25:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:25:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:25:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:25:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:25:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:25:23.409807  543705 memory.go:184] no items to output this cycle
I0320 19:25:23.409829  543705 cpu.go:275] no items to output this cycle
I0320 19:25:25.748472  543705 disk_info.go:125] begin check local disk info of client
I0320 19:25:25.751033  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:25:25.751038  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035fb80 0xc00035fbc0]
E0320 19:25:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:25:33.409761  543705 memory.go:184] no items to output this cycle
I0320 19:25:33.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:25:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:25:43.409804  543705 memory.go:191] Add success.
I0320 19:25:43.409811  543705 cpu.go:282] Add success.
I0320 19:25:43.420004  543705 net.go:648] Add success.
I0320 19:25:43.422707  543705 net.go:770] primary dev: ETH0
I0320 19:25:43.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:25:43.422736  543705 net.go:698] Add success.
I0320 19:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:25:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:25:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:25:53.410257  543705 memory.go:184] no items to output this cycle
I0320 19:25:53.410270  543705 cpu.go:275] no items to output this cycle
E0320 19:26:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:26:03.409796  543705 memory.go:184] no items to output this cycle
I0320 19:26:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:26:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:26:13.409831  543705 memory.go:191] Add success.
I0320 19:26:13.409835  543705 cpu.go:282] Add success.
W0320 19:26:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:26:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:26:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:26:13.420136  543705 net.go:648] Add success.
I0320 19:26:13.423020  543705 net.go:770] primary dev: ETH0
I0320 19:26:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:26:13.423045  543705 net.go:698] Add success.
I0320 19:26:14.454599  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:26:14.454828  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:26:14.454838  543705 disk_worker.go:708] disk space is not compliant
W0320 19:26:14.454840  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:26:14.456215  543705 disk_worker.go:494] system disk:vda1
I0320 19:26:14.456245  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:26:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:26:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:26:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:26:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:26:23.409795  543705 memory.go:184] no items to output this cycle
I0320 19:26:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 19:26:25.752402  543705 disk_info.go:125] begin check local disk info of client
I0320 19:26:25.754919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:26:25.754926  543705 disk_info.go:196] parse disk info done, disk is : [0xc000577800 0xc000577840]
E0320 19:26:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:26:33.409796  543705 memory.go:184] no items to output this cycle
I0320 19:26:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 19:26:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:26:43.409819  543705 memory.go:191] Add success.
I0320 19:26:43.409827  543705 cpu.go:282] Add success.
I0320 19:26:43.419691  543705 net.go:770] primary dev: ETH0
I0320 19:26:43.419703  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:26:43.419718  543705 net.go:698] Add success.
I0320 19:26:43.420112  543705 net.go:648] Add success.
I0320 19:26:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:26:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:26:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:26:53.409791  543705 memory.go:184] no items to output this cycle
I0320 19:26:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 19:27:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:27:03.409777  543705 memory.go:184] no items to output this cycle
I0320 19:27:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 19:27:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:27:13.409827  543705 memory.go:191] Add success.
I0320 19:27:13.409835  543705 cpu.go:282] Add success.
W0320 19:27:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:27:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:27:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:27:13.420153  543705 net.go:648] Add success.
I0320 19:27:13.422643  543705 net.go:770] primary dev: ETH0
I0320 19:27:13.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:27:13.422672  543705 net.go:698] Add success.
I0320 19:27:13.428995  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 19:27:13.453168  543705 event_worker.go:152] Polling the log file for events...
I0320 19:27:13.900592  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f27a3251-ed18-43e8-8532-ce8eb2fd8a5d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:27:13.900628  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 19:27:14.454159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:27:14.454231  543705 disk_worker.go:708] disk space is not compliant
W0320 19:27:14.454234  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:27:14.454949  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:27:14.454959  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:27:14.454964  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:27:14.455760  543705 disk_worker.go:494] system disk:vda1
I0320 19:27:14.455790  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:27:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:27:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:27:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:27:16.457978  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:27:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:27:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:27:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:27:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:27:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 19:27:23.409782  543705 memory.go:184] no items to output this cycle
I0320 19:27:25.756410  543705 disk_info.go:125] begin check local disk info of client
I0320 19:27:25.758920  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:27:25.758926  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6fc0 0xc0004a7000]
E0320 19:27:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:27:33.409777  543705 memory.go:184] no items to output this cycle
I0320 19:27:33.409789  543705 cpu.go:275] no items to output this cycle
I0320 19:27:38.597697  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:27:38.597703  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:27:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:27:43.410721  543705 memory.go:191] Add success.
I0320 19:27:43.409816  543705 cpu.go:282] Add success.
I0320 19:27:43.419544  543705 net.go:770] primary dev: ETH0
I0320 19:27:43.419557  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:27:43.419570  543705 net.go:698] Add success.
I0320 19:27:43.419794  543705 net.go:648] Add success.
I0320 19:27:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:27:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:27:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:27:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:27:53.409790  543705 memory.go:184] no items to output this cycle
I0320 19:27:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 19:28:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:28:03.409817  543705 memory.go:184] no items to output this cycle
I0320 19:28:03.409824  543705 cpu.go:275] no items to output this cycle
E0320 19:28:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:28:13.409810  543705 memory.go:191] Add success.
I0320 19:28:13.409816  543705 cpu.go:282] Add success.
W0320 19:28:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:28:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:28:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:28:13.420154  543705 net.go:648] Add success.
I0320 19:28:13.422922  543705 net.go:770] primary dev: ETH0
I0320 19:28:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:28:13.422947  543705 net.go:698] Add success.
I0320 19:28:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:28:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:28:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 19:28:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:28:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 19:28:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:28:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:28:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:28:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:28:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:28:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:28:23.410206  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:28:23.410223  543705 memory.go:184] no items to output this cycle
I0320 19:28:23.410228  543705 cpu.go:275] no items to output this cycle
I0320 19:28:25.759010  543705 disk_info.go:125] begin check local disk info of client
I0320 19:28:25.761494  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:28:25.761499  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1dc0 0xc0002b1e00]
E0320 19:28:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:28:33.409780  543705 memory.go:184] no items to output this cycle
I0320 19:28:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 19:28:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:28:43.409780  543705 memory.go:191] Add success.
I0320 19:28:43.409805  543705 cpu.go:282] Add success.
I0320 19:28:43.419990  543705 net.go:648] Add success.
I0320 19:28:43.422708  543705 net.go:770] primary dev: ETH0
I0320 19:28:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:28:43.422733  543705 net.go:698] Add success.
I0320 19:28:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:28:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:28:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:28:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:28:53.409780  543705 memory.go:184] no items to output this cycle
I0320 19:28:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 19:29:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:29:03.409801  543705 memory.go:184] no items to output this cycle
I0320 19:29:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 19:29:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:29:13.409815  543705 memory.go:191] Add success.
I0320 19:29:13.409821  543705 cpu.go:282] Add success.
W0320 19:29:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:29:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:29:13.420135  543705 net.go:648] Add success.
I0320 19:29:13.422730  543705 net.go:770] primary dev: ETH0
I0320 19:29:13.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:29:13.422754  543705 net.go:698] Add success.
I0320 19:29:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:29:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:29:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 19:29:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:29:14.456494  543705 disk_worker.go:494] system disk:vda1
I0320 19:29:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:29:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:29:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:29:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:29:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:29:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:29:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:29:23.409781  543705 memory.go:184] no items to output this cycle
I0320 19:29:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 19:29:25.761668  543705 disk_info.go:125] begin check local disk info of client
I0320 19:29:25.764187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:29:25.764193  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025ad40 0xc00025ad80]
E0320 19:29:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:29:33.409801  543705 memory.go:184] no items to output this cycle
I0320 19:29:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 19:29:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:29:43.409782  543705 memory.go:191] Add success.
I0320 19:29:43.409805  543705 cpu.go:282] Add success.
I0320 19:29:43.419905  543705 net.go:648] Add success.
I0320 19:29:43.422471  543705 net.go:770] primary dev: ETH0
I0320 19:29:43.422484  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:29:43.422660  543705 net.go:698] Add success.
I0320 19:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:29:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:29:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:29:53.409778  543705 memory.go:184] no items to output this cycle
I0320 19:29:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 19:30:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:30:03.409785  543705 memory.go:184] no items to output this cycle
I0320 19:30:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 19:30:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:30:13.409812  543705 memory.go:191] Add success.
I0320 19:30:13.409824  543705 cpu.go:282] Add success.
W0320 19:30:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:30:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:30:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:30:13.420277  543705 net.go:648] Add success.
I0320 19:30:13.423163  543705 net.go:770] primary dev: ETH0
I0320 19:30:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:30:13.423188  543705 net.go:698] Add success.
I0320 19:30:13.550442  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db30b28a-4715-4e7a-9719-019a6b46133d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:30:13.550476  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:30:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:30:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:30:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 19:30:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:30:14.456517  543705 disk_worker.go:494] system disk:vda1
I0320 19:30:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:30:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:30:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:30:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:30:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:30:23.409779  543705 memory.go:184] no items to output this cycle
I0320 19:30:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 19:30:25.764275  543705 disk_info.go:125] begin check local disk info of client
I0320 19:30:25.766758  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:30:25.766764  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272640 0xc000272680]
E0320 19:30:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:30:33.409761  543705 memory.go:184] no items to output this cycle
I0320 19:30:33.409800  543705 cpu.go:275] no items to output this cycle
I0320 19:30:38.598547  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:30:38.598553  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:30:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:30:43.410638  543705 memory.go:191] Add success.
I0320 19:30:43.409814  543705 cpu.go:282] Add success.
I0320 19:30:43.420365  543705 net.go:648] Add success.
I0320 19:30:43.422705  543705 net.go:770] primary dev: ETH0
I0320 19:30:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:30:43.422732  543705 net.go:698] Add success.
I0320 19:30:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:30:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:30:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:30:53.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:30:53.409868  543705 memory.go:184] no items to output this cycle
I0320 19:30:53.409996  543705 cpu.go:275] no items to output this cycle
E0320 19:31:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:31:03.409773  543705 memory.go:184] no items to output this cycle
I0320 19:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 19:31:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:31:13.409819  543705 memory.go:191] Add success.
I0320 19:31:13.409823  543705 cpu.go:282] Add success.
W0320 19:31:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:31:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:31:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:31:13.420305  543705 net.go:648] Add success.
I0320 19:31:13.422860  543705 net.go:770] primary dev: ETH0
I0320 19:31:13.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:31:13.422885  543705 net.go:698] Add success.
I0320 19:31:14.454997  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:31:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:31:14.455259  543705 disk_worker.go:708] disk space is not compliant
W0320 19:31:14.455264  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:31:14.457247  543705 disk_worker.go:494] system disk:vda1
I0320 19:31:14.457297  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:31:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:31:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:31:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:31:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:31:16.472475  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:31:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:31:23.409778  543705 memory.go:184] no items to output this cycle
I0320 19:31:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 19:31:25.768477  543705 disk_info.go:125] begin check local disk info of client
I0320 19:31:25.770977  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:31:25.770984  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046dcc0 0xc00046dd00]
E0320 19:31:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:31:33.409775  543705 memory.go:184] no items to output this cycle
I0320 19:31:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 19:31:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:31:43.409812  543705 memory.go:191] Add success.
I0320 19:31:43.409819  543705 cpu.go:282] Add success.
I0320 19:31:43.419851  543705 net.go:648] Add success.
I0320 19:31:43.422515  543705 net.go:770] primary dev: ETH0
I0320 19:31:43.422529  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:31:43.422544  543705 net.go:698] Add success.
I0320 19:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:31:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:31:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:31:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:31:53.409781  543705 memory.go:184] no items to output this cycle
I0320 19:31:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:32:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:32:03.409778  543705 memory.go:184] no items to output this cycle
I0320 19:32:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 19:32:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:32:13.409782  543705 memory.go:191] Add success.
W0320 19:32:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:32:13.409809  543705 cpu.go:282] Add success.
W0320 19:32:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:32:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:32:13.420133  543705 net.go:648] Add success.
I0320 19:32:13.423125  543705 net.go:770] primary dev: ETH0
I0320 19:32:13.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:32:13.423151  543705 net.go:698] Add success.
W0320 19:32:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:32:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 19:32:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:32:14.456819  543705 disk_worker.go:494] system disk:vda1
I0320 19:32:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:32:14.457177  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:32:14.457185  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:32:14.457190  543705 custom_config.go:64] query custom config with name: gpu
E0320 19:32:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:32:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:32:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:32:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:32:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:32:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:32:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:32:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:32:23.409764  543705 memory.go:184] no items to output this cycle
I0320 19:32:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 19:32:25.771084  543705 disk_info.go:125] begin check local disk info of client
I0320 19:32:25.773862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:32:25.773870  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b18c0 0xc0003b1900]
E0320 19:32:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:32:33.409812  543705 memory.go:184] no items to output this cycle
I0320 19:32:33.409826  543705 cpu.go:275] no items to output this cycle
E0320 19:32:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:32:43.409774  543705 memory.go:191] Add success.
I0320 19:32:43.409811  543705 cpu.go:282] Add success.
I0320 19:32:43.419820  543705 net.go:648] Add success.
I0320 19:32:43.422480  543705 net.go:770] primary dev: ETH0
I0320 19:32:43.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:32:43.422505  543705 net.go:698] Add success.
I0320 19:32:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:32:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:32:53.409788  543705 memory.go:184] no items to output this cycle
I0320 19:32:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 19:33:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:33:03.409781  543705 memory.go:184] no items to output this cycle
I0320 19:33:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 19:33:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:33:13.409827  543705 memory.go:191] Add success.
I0320 19:33:13.409828  543705 cpu.go:282] Add success.
W0320 19:33:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:33:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:33:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:33:13.420189  543705 net.go:648] Add success.
I0320 19:33:13.423407  543705 net.go:770] primary dev: ETH0
I0320 19:33:13.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:33:13.423450  543705 net.go:698] Add success.
I0320 19:33:13.468730  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f40a441d-ae88-42a2-a88f-a4264cf4148f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:33:13.468763  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:33:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:33:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:33:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 19:33:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:33:14.456526  543705 disk_worker.go:494] system disk:vda1
I0320 19:33:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:33:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:33:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:33:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:33:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:33:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:33:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 19:33:23.409792  543705 memory.go:184] no items to output this cycle
I0320 19:33:25.775506  543705 disk_info.go:125] begin check local disk info of client
I0320 19:33:25.778002  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:33:25.778007  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002666c0 0xc000266700]
E0320 19:33:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:33:33.409798  543705 memory.go:184] no items to output this cycle
I0320 19:33:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 19:33:38.598694  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:33:38.598700  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:33:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:33:43.410651  543705 memory.go:191] Add success.
I0320 19:33:43.409827  543705 cpu.go:282] Add success.
I0320 19:33:43.420358  543705 net.go:648] Add success.
I0320 19:33:43.422816  543705 net.go:770] primary dev: ETH0
I0320 19:33:43.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:33:43.422845  543705 net.go:698] Add success.
I0320 19:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:33:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:33:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:33:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:33:53.409784  543705 memory.go:184] no items to output this cycle
I0320 19:33:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 19:34:03.409922  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:34:03.409961  543705 memory.go:184] no items to output this cycle
I0320 19:34:03.409989  543705 cpu.go:275] no items to output this cycle
E0320 19:34:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:34:13.409785  543705 memory.go:191] Add success.
W0320 19:34:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:34:13.409819  543705 cpu.go:282] Add success.
W0320 19:34:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:34:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:34:13.420179  543705 net.go:648] Add success.
I0320 19:34:13.423134  543705 net.go:770] primary dev: ETH0
I0320 19:34:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:34:13.423161  543705 net.go:698] Add success.
I0320 19:34:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:34:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:34:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 19:34:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:34:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 19:34:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:34:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:34:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:34:23.410416  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:34:23.410434  543705 memory.go:184] no items to output this cycle
I0320 19:34:23.410443  543705 cpu.go:275] no items to output this cycle
I0320 19:34:25.779536  543705 disk_info.go:125] begin check local disk info of client
I0320 19:34:25.782090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:34:25.782096  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b640 0xc00007b680]
E0320 19:34:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:34:33.409768  543705 memory.go:184] no items to output this cycle
I0320 19:34:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 19:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:34:43.409795  543705 memory.go:191] Add success.
I0320 19:34:43.409796  543705 cpu.go:282] Add success.
I0320 19:34:43.420054  543705 net.go:648] Add success.
I0320 19:34:43.422766  543705 net.go:770] primary dev: ETH0
I0320 19:34:43.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:34:43.422792  543705 net.go:698] Add success.
I0320 19:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:34:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:34:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:34:53.409810  543705 memory.go:184] no items to output this cycle
I0320 19:34:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 19:35:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:35:03.409767  543705 memory.go:184] no items to output this cycle
I0320 19:35:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 19:35:13.409940  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:35:13.409977  543705 memory.go:191] Add success.
W0320 19:35:13.410018  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:35:13.409941  543705 cpu.go:282] Add success.
W0320 19:35:13.410033  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:35:13.410036  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:35:13.419755  543705 net.go:648] Add success.
I0320 19:35:13.422316  543705 net.go:770] primary dev: ETH0
I0320 19:35:13.422329  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:35:13.422343  543705 net.go:698] Add success.
I0320 19:35:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:35:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:35:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 19:35:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:35:14.456507  543705 disk_worker.go:494] system disk:vda1
I0320 19:35:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:35:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:35:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:35:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:35:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:35:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:35:23.409785  543705 memory.go:184] no items to output this cycle
I0320 19:35:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 19:35:25.782180  543705 disk_info.go:125] begin check local disk info of client
I0320 19:35:25.784705  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:35:25.784711  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb740 0xc0001fb780]
E0320 19:35:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:35:33.409775  543705 memory.go:184] no items to output this cycle
I0320 19:35:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 19:35:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:35:43.409804  543705 memory.go:191] Add success.
I0320 19:35:43.409812  543705 cpu.go:282] Add success.
I0320 19:35:43.419875  543705 net.go:648] Add success.
I0320 19:35:43.422649  543705 net.go:770] primary dev: ETH0
I0320 19:35:43.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:35:43.422674  543705 net.go:698] Add success.
I0320 19:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:35:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:35:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:35:53.409795  543705 memory.go:184] no items to output this cycle
I0320 19:35:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 19:36:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:36:03.409791  543705 memory.go:184] no items to output this cycle
I0320 19:36:03.409843  543705 cpu.go:275] no items to output this cycle
E0320 19:36:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:36:13.409813  543705 memory.go:191] Add success.
I0320 19:36:13.409814  543705 cpu.go:282] Add success.
W0320 19:36:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:36:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:36:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:36:13.420504  543705 net.go:648] Add success.
I0320 19:36:13.423339  543705 net.go:770] primary dev: ETH0
I0320 19:36:13.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:36:13.423364  543705 net.go:698] Add success.
I0320 19:36:13.469121  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"62748330-f46e-4263-9458-eb95c428701f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:36:13.469152  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:36:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:36:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:36:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 19:36:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:36:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 19:36:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:36:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:36:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:36:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:36:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:36:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:36:23.409807  543705 memory.go:184] no items to output this cycle
I0320 19:36:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 19:36:25.785671  543705 disk_info.go:125] begin check local disk info of client
I0320 19:36:25.788093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:36:25.788099  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c700 0xc00039c740]
E0320 19:36:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:36:33.409765  543705 memory.go:184] no items to output this cycle
I0320 19:36:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 19:36:38.599705  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:36:38.599712  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:36:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:36:43.410521  543705 memory.go:191] Add success.
I0320 19:36:43.409812  543705 cpu.go:282] Add success.
I0320 19:36:43.420270  543705 net.go:648] Add success.
I0320 19:36:43.422914  543705 net.go:770] primary dev: ETH0
I0320 19:36:43.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:36:43.422939  543705 net.go:698] Add success.
I0320 19:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:36:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:36:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:36:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:36:53.409779  543705 memory.go:184] no items to output this cycle
I0320 19:36:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 19:37:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:37:03.409800  543705 memory.go:184] no items to output this cycle
I0320 19:37:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 19:37:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:37:13.409819  543705 memory.go:191] Add success.
I0320 19:37:13.409821  543705 cpu.go:282] Add success.
W0320 19:37:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:37:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:37:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:37:13.420195  543705 net.go:648] Add success.
I0320 19:37:13.422825  543705 net.go:770] primary dev: ETH0
I0320 19:37:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:37:13.422854  543705 net.go:698] Add success.
I0320 19:37:13.453427  543705 event_worker.go:152] Polling the log file for events...
W0320 19:37:14.455399  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:37:14.455415  543705 disk_worker.go:708] disk space is not compliant
W0320 19:37:14.455419  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:37:14.456135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:37:14.456144  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:37:14.456150  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:37:14.457336  543705 disk_worker.go:494] system disk:vda1
I0320 19:37:14.457361  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:37:15.456775  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:37:15.456784  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:37:16.457899  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:37:16.457899  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:37:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:37:16.457970  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:37:16.472270  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:37:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:37:23.409790  543705 memory.go:184] no items to output this cycle
I0320 19:37:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 19:37:25.788180  543705 disk_info.go:125] begin check local disk info of client
I0320 19:37:25.790627  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:37:25.790635  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314000 0xc000314040]
E0320 19:37:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:37:33.409798  543705 memory.go:184] no items to output this cycle
I0320 19:37:33.409832  543705 cpu.go:275] no items to output this cycle
E0320 19:37:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:37:43.409779  543705 memory.go:191] Add success.
I0320 19:37:43.409815  543705 cpu.go:282] Add success.
I0320 19:37:43.420011  543705 net.go:648] Add success.
I0320 19:37:43.422891  543705 net.go:770] primary dev: ETH0
I0320 19:37:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:37:43.422920  543705 net.go:698] Add success.
I0320 19:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:37:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:37:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:37:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:37:53.409791  543705 memory.go:184] no items to output this cycle
I0320 19:37:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 19:38:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:38:03.409816  543705 memory.go:184] no items to output this cycle
I0320 19:38:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 19:38:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:38:13.409798  543705 memory.go:191] Add success.
I0320 19:38:13.409804  543705 cpu.go:282] Add success.
W0320 19:38:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:38:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:38:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:38:13.420046  543705 net.go:648] Add success.
I0320 19:38:13.422903  543705 net.go:770] primary dev: ETH0
I0320 19:38:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:38:13.422931  543705 net.go:698] Add success.
I0320 19:38:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:38:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:38:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 19:38:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:38:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 19:38:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:38:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:38:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:38:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:38:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:38:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:38:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:38:23.409778  543705 memory.go:184] no items to output this cycle
I0320 19:38:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 19:38:25.791591  543705 disk_info.go:125] begin check local disk info of client
I0320 19:38:25.794061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:38:25.794067  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa440 0xc0001fa480]
E0320 19:38:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:38:33.409771  543705 memory.go:184] no items to output this cycle
I0320 19:38:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:38:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:38:43.409815  543705 memory.go:191] Add success.
I0320 19:38:43.409825  543705 cpu.go:282] Add success.
I0320 19:38:43.419989  543705 net.go:648] Add success.
I0320 19:38:43.422957  543705 net.go:770] primary dev: ETH0
I0320 19:38:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:38:43.422982  543705 net.go:698] Add success.
I0320 19:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:38:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:38:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:38:53.409777  543705 memory.go:184] no items to output this cycle
I0320 19:38:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 19:39:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:39:03.409776  543705 memory.go:184] no items to output this cycle
I0320 19:39:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 19:39:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:39:13.409797  543705 memory.go:191] Add success.
I0320 19:39:13.409798  543705 cpu.go:282] Add success.
W0320 19:39:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:39:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:39:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:39:13.420238  543705 net.go:648] Add success.
I0320 19:39:13.423067  543705 net.go:770] primary dev: ETH0
I0320 19:39:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:39:13.423091  543705 net.go:698] Add success.
I0320 19:39:13.683216  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"317c83fe-b034-4e43-a328-fa89ca39c0b1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:39:13.683256  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:39:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:39:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 19:39:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:39:14.456612  543705 disk_worker.go:494] system disk:vda1
I0320 19:39:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:39:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:39:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:39:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:39:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:39:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:39:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:39:23.409799  543705 memory.go:184] no items to output this cycle
I0320 19:39:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 19:39:25.794148  543705 disk_info.go:125] begin check local disk info of client
I0320 19:39:25.796610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:39:25.796616  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
E0320 19:39:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:39:33.409797  543705 memory.go:184] no items to output this cycle
I0320 19:39:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 19:39:38.599855  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:39:38.599861  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:39:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:39:43.410639  543705 memory.go:191] Add success.
I0320 19:39:43.409798  543705 cpu.go:282] Add success.
I0320 19:39:43.420312  543705 net.go:648] Add success.
I0320 19:39:43.422718  543705 net.go:770] primary dev: ETH0
I0320 19:39:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:39:43.422742  543705 net.go:698] Add success.
I0320 19:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:39:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:39:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:39:53.409804  543705 memory.go:184] no items to output this cycle
I0320 19:39:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 19:40:03.410718  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:40:03.410736  543705 memory.go:184] no items to output this cycle
I0320 19:40:03.410767  543705 cpu.go:275] no items to output this cycle
E0320 19:40:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:40:13.409814  543705 memory.go:191] Add success.
I0320 19:40:13.409822  543705 cpu.go:282] Add success.
W0320 19:40:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:40:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:40:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:40:13.420068  543705 net.go:648] Add success.
I0320 19:40:13.422665  543705 net.go:770] primary dev: ETH0
I0320 19:40:13.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:40:13.422694  543705 net.go:698] Add success.
I0320 19:40:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:40:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:40:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 19:40:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:40:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 19:40:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:40:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:40:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:40:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:40:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:40:23.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:40:23.409860  543705 memory.go:184] no items to output this cycle
I0320 19:40:23.409952  543705 cpu.go:275] no items to output this cycle
I0320 19:40:25.797622  543705 disk_info.go:125] begin check local disk info of client
I0320 19:40:25.800122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:40:25.800128  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c500 0xc00035c540]
E0320 19:40:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:40:33.409781  543705 cpu.go:275] no items to output this cycle
I0320 19:40:33.409786  543705 memory.go:184] no items to output this cycle
E0320 19:40:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:40:43.409809  543705 memory.go:191] Add success.
I0320 19:40:43.409820  543705 cpu.go:282] Add success.
I0320 19:40:43.419900  543705 net.go:648] Add success.
I0320 19:40:43.422712  543705 net.go:770] primary dev: ETH0
I0320 19:40:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:40:43.422743  543705 net.go:698] Add success.
I0320 19:40:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:40:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:40:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:40:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:40:53.409777  543705 memory.go:184] no items to output this cycle
I0320 19:40:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 19:41:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:41:03.409797  543705 memory.go:184] no items to output this cycle
I0320 19:41:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 19:41:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:41:13.409783  543705 memory.go:191] Add success.
I0320 19:41:13.409795  543705 cpu.go:282] Add success.
W0320 19:41:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:41:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:41:13.420146  543705 net.go:648] Add success.
I0320 19:41:13.423113  543705 net.go:770] primary dev: ETH0
I0320 19:41:13.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:41:13.423137  543705 net.go:698] Add success.
I0320 19:41:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:41:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:41:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0320 19:41:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:41:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 19:41:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:41:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:41:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:41:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:41:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:41:23.410737  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:41:23.410750  543705 memory.go:184] no items to output this cycle
I0320 19:41:23.410753  543705 cpu.go:275] no items to output this cycle
I0320 19:41:25.801652  543705 disk_info.go:125] begin check local disk info of client
I0320 19:41:25.804085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:41:25.804090  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cb9c0 0xc0004cba00]
E0320 19:41:33.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:41:33.409898  543705 memory.go:184] no items to output this cycle
I0320 19:41:33.409965  543705 cpu.go:275] no items to output this cycle
E0320 19:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:41:43.409804  543705 memory.go:191] Add success.
I0320 19:41:43.409833  543705 cpu.go:282] Add success.
I0320 19:41:43.419910  543705 net.go:648] Add success.
I0320 19:41:43.422847  543705 net.go:770] primary dev: ETH0
I0320 19:41:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:41:43.422876  543705 net.go:698] Add success.
I0320 19:41:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:41:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:41:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:41:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:41:53.409796  543705 memory.go:184] no items to output this cycle
I0320 19:41:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 19:42:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:42:03.409794  543705 memory.go:184] no items to output this cycle
I0320 19:42:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 19:42:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:42:13.409791  543705 memory.go:191] Add success.
W0320 19:42:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:42:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:42:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:42:13.409835  543705 cpu.go:282] Add success.
I0320 19:42:13.420122  543705 net.go:648] Add success.
I0320 19:42:13.423069  543705 net.go:770] primary dev: ETH0
I0320 19:42:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:42:13.423094  543705 net.go:698] Add success.
I0320 19:42:13.468820  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"85e163f5-b92a-415a-ac53-2ef3ea1b2feb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:42:13.468855  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 19:42:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:42:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 19:42:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:42:14.456825  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:42:14.456833  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:42:14.456838  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:42:14.457106  543705 disk_worker.go:494] system disk:vda1
I0320 19:42:14.457132  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:42:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:42:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:42:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:42:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:42:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:42:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:42:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:42:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:42:23.409790  543705 memory.go:184] no items to output this cycle
I0320 19:42:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 19:42:25.805667  543705 disk_info.go:125] begin check local disk info of client
I0320 19:42:25.808160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:42:25.808166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bcb40 0xc0002bcb80]
E0320 19:42:33.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:42:33.409895  543705 memory.go:184] no items to output this cycle
I0320 19:42:33.409977  543705 cpu.go:275] no items to output this cycle
I0320 19:42:38.600702  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:42:38.600709  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:42:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:42:43.410647  543705 memory.go:191] Add success.
I0320 19:42:43.409827  543705 cpu.go:282] Add success.
I0320 19:42:43.420342  543705 net.go:648] Add success.
I0320 19:42:43.422951  543705 net.go:770] primary dev: ETH0
I0320 19:42:43.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:42:43.422976  543705 net.go:698] Add success.
I0320 19:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:42:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:42:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:42:53.409816  543705 memory.go:184] no items to output this cycle
I0320 19:42:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 19:43:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:43:03.409775  543705 memory.go:184] no items to output this cycle
I0320 19:43:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 19:43:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:43:13.409786  543705 memory.go:191] Add success.
I0320 19:43:13.409808  543705 cpu.go:282] Add success.
W0320 19:43:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:43:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:43:13.420304  543705 net.go:648] Add success.
I0320 19:43:13.423122  543705 net.go:770] primary dev: ETH0
I0320 19:43:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:43:13.423148  543705 net.go:698] Add success.
I0320 19:43:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:43:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:43:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 19:43:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:43:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 19:43:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:43:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:43:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:43:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:43:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:43:23.409780  543705 memory.go:184] no items to output this cycle
I0320 19:43:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 19:43:25.809670  543705 disk_info.go:125] begin check local disk info of client
I0320 19:43:25.812117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:43:25.812122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f43c0 0xc0003f4400]
E0320 19:43:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:43:33.409874  543705 memory.go:184] no items to output this cycle
I0320 19:43:33.409952  543705 cpu.go:275] no items to output this cycle
E0320 19:43:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:43:43.409797  543705 memory.go:191] Add success.
I0320 19:43:43.409798  543705 cpu.go:282] Add success.
I0320 19:43:43.420003  543705 net.go:648] Add success.
I0320 19:43:43.422750  543705 net.go:770] primary dev: ETH0
I0320 19:43:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:43:43.422774  543705 net.go:698] Add success.
I0320 19:43:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:43:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:43:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:43:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:43:53.409778  543705 memory.go:184] no items to output this cycle
I0320 19:43:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 19:44:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:44:03.409816  543705 memory.go:184] no items to output this cycle
I0320 19:44:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 19:44:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:44:13.409785  543705 memory.go:191] Add success.
I0320 19:44:13.409800  543705 cpu.go:282] Add success.
W0320 19:44:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:44:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:44:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:44:13.420043  543705 net.go:648] Add success.
I0320 19:44:13.422589  543705 net.go:770] primary dev: ETH0
I0320 19:44:13.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:44:13.422614  543705 net.go:698] Add success.
I0320 19:44:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:44:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:44:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 19:44:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:44:14.456514  543705 disk_worker.go:494] system disk:vda1
I0320 19:44:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:44:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:44:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:44:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:44:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:44:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:44:23.409792  543705 memory.go:184] no items to output this cycle
I0320 19:44:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 19:44:25.813678  543705 disk_info.go:125] begin check local disk info of client
I0320 19:44:25.816170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:44:25.816178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb640 0xc0002bb680]
E0320 19:44:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:44:33.409778  543705 memory.go:184] no items to output this cycle
I0320 19:44:33.409779  543705 cpu.go:275] no items to output this cycle
E0320 19:44:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:44:43.409890  543705 cpu.go:282] Add success.
I0320 19:44:43.409914  543705 memory.go:191] Add success.
I0320 19:44:43.419705  543705 net.go:648] Add success.
I0320 19:44:43.422302  543705 net.go:770] primary dev: ETH0
I0320 19:44:43.422315  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:44:43.422327  543705 net.go:698] Add success.
I0320 19:44:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:44:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:44:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:44:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:44:53.409777  543705 memory.go:184] no items to output this cycle
I0320 19:44:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:45:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:45:03.409785  543705 cpu.go:275] no items to output this cycle
I0320 19:45:03.409789  543705 memory.go:184] no items to output this cycle
E0320 19:45:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:45:13.409815  543705 memory.go:191] Add success.
I0320 19:45:13.409821  543705 cpu.go:282] Add success.
W0320 19:45:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:45:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:45:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:45:13.420138  543705 net.go:648] Add success.
I0320 19:45:13.423518  543705 net.go:770] primary dev: ETH0
I0320 19:45:13.423531  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:45:13.423543  543705 net.go:698] Add success.
I0320 19:45:13.463592  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"05d743c6-c312-4f23-9cdb-8aa741791dfd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:45:13.463627  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:45:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:45:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:45:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 19:45:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:45:14.456484  543705 disk_worker.go:494] system disk:vda1
I0320 19:45:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:45:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:45:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:45:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:45:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:45:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:45:23.409773  543705 memory.go:184] no items to output this cycle
I0320 19:45:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 19:45:25.817674  543705 disk_info.go:125] begin check local disk info of client
I0320 19:45:25.820117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:45:25.820123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f4e40 0xc0003f4e80]
E0320 19:45:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:45:33.409793  543705 memory.go:184] no items to output this cycle
I0320 19:45:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 19:45:38.601714  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:45:38.601721  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:45:43.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:45:43.410670  543705 memory.go:191] Add success.
I0320 19:45:43.409923  543705 cpu.go:282] Add success.
I0320 19:45:43.419732  543705 net.go:648] Add success.
I0320 19:45:43.422170  543705 net.go:770] primary dev: ETH0
I0320 19:45:43.422184  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:45:43.422197  543705 net.go:698] Add success.
I0320 19:45:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:45:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:45:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:45:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:45:53.409788  543705 cpu.go:275] no items to output this cycle
I0320 19:45:53.409790  543705 memory.go:184] no items to output this cycle
E0320 19:46:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:46:03.409790  543705 memory.go:184] no items to output this cycle
I0320 19:46:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 19:46:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:46:13.409784  543705 cpu.go:282] Add success.
I0320 19:46:13.409787  543705 memory.go:191] Add success.
W0320 19:46:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:46:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:46:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:46:13.420111  543705 net.go:648] Add success.
I0320 19:46:13.422931  543705 net.go:770] primary dev: ETH0
I0320 19:46:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:46:13.422958  543705 net.go:698] Add success.
I0320 19:46:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:46:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:46:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 19:46:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:46:14.456582  543705 disk_worker.go:494] system disk:vda1
I0320 19:46:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:46:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:46:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:46:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:46:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:46:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:46:23.410248  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:46:23.410264  543705 memory.go:184] no items to output this cycle
I0320 19:46:23.410288  543705 cpu.go:275] no items to output this cycle
I0320 19:46:25.821676  543705 disk_info.go:125] begin check local disk info of client
I0320 19:46:25.824126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:46:25.824131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b28c0 0xc0003b2900]
E0320 19:46:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:46:33.409795  543705 memory.go:184] no items to output this cycle
I0320 19:46:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 19:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:46:43.409789  543705 memory.go:191] Add success.
I0320 19:46:43.409789  543705 cpu.go:282] Add success.
I0320 19:46:43.419754  543705 net.go:648] Add success.
I0320 19:46:43.422315  543705 net.go:770] primary dev: ETH0
I0320 19:46:43.422327  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:46:43.422338  543705 net.go:698] Add success.
I0320 19:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:46:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:46:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:46:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:46:53.409792  543705 memory.go:184] no items to output this cycle
I0320 19:46:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 19:47:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:47:03.409779  543705 memory.go:184] no items to output this cycle
I0320 19:47:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:47:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:47:13.409812  543705 memory.go:191] Add success.
I0320 19:47:13.409823  543705 cpu.go:282] Add success.
W0320 19:47:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:47:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:47:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:47:13.420262  543705 net.go:648] Add success.
I0320 19:47:13.422989  543705 net.go:770] primary dev: ETH0
I0320 19:47:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:47:13.423018  543705 net.go:698] Add success.
I0320 19:47:13.453574  543705 event_worker.go:152] Polling the log file for events...
W0320 19:47:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:47:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 19:47:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:47:14.455854  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:47:14.455863  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:47:14.455868  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:47:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 19:47:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:47:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:47:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:47:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:47:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:47:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:47:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:47:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:47:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:47:23.409769  543705 memory.go:184] no items to output this cycle
I0320 19:47:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 19:47:25.825672  543705 disk_info.go:125] begin check local disk info of client
I0320 19:47:25.828098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:47:25.828103  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004876c0 0xc000487700]
E0320 19:47:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:47:33.409795  543705 memory.go:184] no items to output this cycle
I0320 19:47:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 19:47:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:47:43.409776  543705 memory.go:191] Add success.
I0320 19:47:43.409800  543705 cpu.go:282] Add success.
I0320 19:47:43.419711  543705 net.go:648] Add success.
I0320 19:47:43.422743  543705 net.go:770] primary dev: ETH0
I0320 19:47:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:47:43.422773  543705 net.go:698] Add success.
I0320 19:47:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:47:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:47:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:47:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:47:53.409791  543705 memory.go:184] no items to output this cycle
I0320 19:47:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 19:48:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:48:03.409810  543705 memory.go:184] no items to output this cycle
I0320 19:48:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 19:48:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:48:13.409786  543705 cpu.go:282] Add success.
I0320 19:48:13.409800  543705 memory.go:191] Add success.
W0320 19:48:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:48:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:48:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:48:13.420063  543705 net.go:648] Add success.
I0320 19:48:13.422819  543705 net.go:770] primary dev: ETH0
I0320 19:48:13.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:48:13.422848  543705 net.go:698] Add success.
I0320 19:48:13.471825  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"17686cc3-2298-4cdb-adc4-ec7bf3e790c3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:48:13.471857  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:48:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:48:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:48:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 19:48:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:48:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 19:48:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:48:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:48:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:48:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:48:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:48:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:48:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 19:48:23.409782  543705 memory.go:184] no items to output this cycle
I0320 19:48:25.829671  543705 disk_info.go:125] begin check local disk info of client
I0320 19:48:25.832129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:48:25.832134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9080 0xc0003b90c0]
E0320 19:48:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:48:33.409776  543705 cpu.go:275] no items to output this cycle
I0320 19:48:33.409781  543705 memory.go:184] no items to output this cycle
I0320 19:48:38.602708  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:48:38.602714  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:48:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:48:43.409798  543705 cpu.go:282] Add success.
I0320 19:48:43.410752  543705 memory.go:191] Add success.
I0320 19:48:43.419707  543705 net.go:648] Add success.
I0320 19:48:43.422091  543705 net.go:770] primary dev: ETH0
I0320 19:48:43.422104  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:48:43.422115  543705 net.go:698] Add success.
I0320 19:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:48:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:48:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:48:53.409790  543705 memory.go:184] no items to output this cycle
I0320 19:48:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 19:49:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:49:03.409803  543705 memory.go:184] no items to output this cycle
I0320 19:49:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 19:49:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:49:13.409783  543705 memory.go:191] Add success.
I0320 19:49:13.409803  543705 cpu.go:282] Add success.
W0320 19:49:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:49:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:49:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:49:13.420339  543705 net.go:648] Add success.
I0320 19:49:13.423207  543705 net.go:770] primary dev: ETH0
I0320 19:49:13.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:49:13.423232  543705 net.go:698] Add success.
I0320 19:49:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:49:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:49:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 19:49:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:49:14.456584  543705 disk_worker.go:494] system disk:vda1
I0320 19:49:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:49:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:49:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:49:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:49:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:49:23.409765  543705 memory.go:184] no items to output this cycle
I0320 19:49:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 19:49:25.833674  543705 disk_info.go:125] begin check local disk info of client
I0320 19:49:25.836082  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:49:25.836089  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486e00 0xc000486e40]
E0320 19:49:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:49:33.409789  543705 memory.go:184] no items to output this cycle
I0320 19:49:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:49:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:49:43.409839  543705 memory.go:191] Add success.
I0320 19:49:43.409849  543705 cpu.go:282] Add success.
I0320 19:49:43.420090  543705 net.go:770] primary dev: ETH0
I0320 19:49:43.420108  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:49:43.420122  543705 net.go:698] Add success.
I0320 19:49:43.420680  543705 net.go:648] Add success.
I0320 19:49:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:49:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:49:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:49:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:49:53.409794  543705 memory.go:184] no items to output this cycle
I0320 19:49:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 19:50:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:50:03.409823  543705 memory.go:184] no items to output this cycle
I0320 19:50:03.409834  543705 cpu.go:275] no items to output this cycle
E0320 19:50:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:50:13.409788  543705 memory.go:191] Add success.
I0320 19:50:13.409811  543705 cpu.go:282] Add success.
W0320 19:50:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:50:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:50:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:50:13.420261  543705 net.go:648] Add success.
I0320 19:50:13.423009  543705 net.go:770] primary dev: ETH0
I0320 19:50:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:50:13.423052  543705 net.go:698] Add success.
I0320 19:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:50:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:50:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 19:50:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:50:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 19:50:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:50:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:50:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:50:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:50:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:50:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:50:23.409790  543705 memory.go:184] no items to output this cycle
I0320 19:50:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 19:50:25.837669  543705 disk_info.go:125] begin check local disk info of client
I0320 19:50:25.840164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:50:25.840169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486e40 0xc000486e80]
E0320 19:50:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:50:33.409792  543705 memory.go:184] no items to output this cycle
I0320 19:50:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 19:50:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:50:43.409792  543705 memory.go:191] Add success.
I0320 19:50:43.409819  543705 cpu.go:282] Add success.
I0320 19:50:43.419888  543705 net.go:648] Add success.
I0320 19:50:43.422690  543705 net.go:770] primary dev: ETH0
I0320 19:50:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:50:43.422719  543705 net.go:698] Add success.
I0320 19:50:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:50:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:50:46.458177  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:50:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:50:53.409805  543705 cpu.go:275] no items to output this cycle
I0320 19:50:53.409818  543705 memory.go:184] no items to output this cycle
E0320 19:51:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:51:03.409780  543705 memory.go:184] no items to output this cycle
I0320 19:51:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 19:51:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:51:13.409794  543705 memory.go:191] Add success.
I0320 19:51:13.409795  543705 cpu.go:282] Add success.
W0320 19:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:51:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:51:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:51:13.420095  543705 net.go:648] Add success.
I0320 19:51:13.422985  543705 net.go:770] primary dev: ETH0
I0320 19:51:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:51:13.423010  543705 net.go:698] Add success.
I0320 19:51:13.474951  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a58cbac5-4ab6-43e2-a4c6-f5ef333e89a2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:51:13.474992  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:51:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:51:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:51:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 19:51:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:51:14.456537  543705 disk_worker.go:494] system disk:vda1
I0320 19:51:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:51:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:51:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:51:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:51:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:51:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:51:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:51:23.409797  543705 memory.go:184] no items to output this cycle
I0320 19:51:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 19:51:25.841672  543705 disk_info.go:125] begin check local disk info of client
I0320 19:51:25.844241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:51:25.844247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9bc0 0xc0001f9c00]
E0320 19:51:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:51:33.409792  543705 memory.go:184] no items to output this cycle
I0320 19:51:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 19:51:38.603734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:51:38.603742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:51:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:51:43.410598  543705 memory.go:191] Add success.
I0320 19:51:43.409792  543705 cpu.go:282] Add success.
I0320 19:51:43.420412  543705 net.go:648] Add success.
I0320 19:51:43.423198  543705 net.go:770] primary dev: ETH0
I0320 19:51:43.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:51:43.423224  543705 net.go:698] Add success.
I0320 19:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:51:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:51:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:51:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:51:53.409792  543705 memory.go:184] no items to output this cycle
I0320 19:51:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 19:52:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:52:03.409788  543705 memory.go:184] no items to output this cycle
I0320 19:52:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:52:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:52:13.409807  543705 memory.go:191] Add success.
I0320 19:52:13.409817  543705 cpu.go:282] Add success.
W0320 19:52:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:52:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:52:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:52:13.420149  543705 net.go:648] Add success.
I0320 19:52:13.422856  543705 net.go:770] primary dev: ETH0
I0320 19:52:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:52:13.422882  543705 net.go:698] Add success.
W0320 19:52:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:52:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 19:52:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:52:14.456935  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:52:14.456944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:52:14.456950  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:52:14.457025  543705 disk_worker.go:494] system disk:vda1
I0320 19:52:14.457069  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:52:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:52:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:52:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:52:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:52:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:52:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:52:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:52:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:52:23.409774  543705 memory.go:184] no items to output this cycle
I0320 19:52:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 19:52:25.845674  543705 disk_info.go:125] begin check local disk info of client
I0320 19:52:25.848121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:52:25.848126  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349bc0 0xc000349c00]
E0320 19:52:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:52:33.409775  543705 memory.go:184] no items to output this cycle
I0320 19:52:33.409800  543705 cpu.go:275] no items to output this cycle
E0320 19:52:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:52:43.409786  543705 memory.go:191] Add success.
I0320 19:52:43.409788  543705 cpu.go:282] Add success.
I0320 19:52:43.419922  543705 net.go:648] Add success.
I0320 19:52:43.422811  543705 net.go:770] primary dev: ETH0
I0320 19:52:43.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:52:43.422846  543705 net.go:698] Add success.
I0320 19:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:52:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:52:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:52:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:52:53.409797  543705 memory.go:184] no items to output this cycle
I0320 19:52:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 19:53:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:53:03.409772  543705 memory.go:184] no items to output this cycle
I0320 19:53:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 19:53:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:53:13.409797  543705 memory.go:191] Add success.
I0320 19:53:13.409804  543705 cpu.go:282] Add success.
W0320 19:53:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:53:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:53:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:53:13.420296  543705 net.go:648] Add success.
I0320 19:53:13.423068  543705 net.go:770] primary dev: ETH0
I0320 19:53:13.423090  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:53:13.423103  543705 net.go:698] Add success.
I0320 19:53:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:53:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:53:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 19:53:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:53:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 19:53:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:53:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:53:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:53:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:53:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:53:23.409775  543705 memory.go:184] no items to output this cycle
I0320 19:53:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 19:53:25.849669  543705 disk_info.go:125] begin check local disk info of client
I0320 19:53:25.852116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:53:25.852122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8700 0xc0002a8740]
E0320 19:53:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:53:33.409802  543705 memory.go:184] no items to output this cycle
I0320 19:53:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 19:53:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:53:43.409815  543705 memory.go:191] Add success.
I0320 19:53:43.409827  543705 cpu.go:282] Add success.
I0320 19:53:43.419877  543705 net.go:648] Add success.
I0320 19:53:43.423004  543705 net.go:770] primary dev: ETH0
I0320 19:53:43.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:53:43.423029  543705 net.go:698] Add success.
I0320 19:53:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:53:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:53:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:53:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:53:53.409814  543705 memory.go:184] no items to output this cycle
I0320 19:53:53.409831  543705 cpu.go:275] no items to output this cycle
E0320 19:54:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:54:03.409824  543705 memory.go:184] no items to output this cycle
I0320 19:54:03.409835  543705 cpu.go:275] no items to output this cycle
E0320 19:54:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:54:13.409802  543705 memory.go:191] Add success.
I0320 19:54:13.409806  543705 cpu.go:282] Add success.
W0320 19:54:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:54:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:54:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:54:13.420134  543705 net.go:648] Add success.
I0320 19:54:13.422930  543705 net.go:770] primary dev: ETH0
I0320 19:54:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:54:13.422960  543705 net.go:698] Add success.
I0320 19:54:13.586777  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f6c0ec84-fadc-4e55-a9ac-1cf5201cefc7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 19:54:13.586812  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 19:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:54:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:54:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 19:54:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:54:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 19:54:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:54:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:54:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:54:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:54:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:54:23.409800  543705 memory.go:184] no items to output this cycle
I0320 19:54:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 19:54:25.853670  543705 disk_info.go:125] begin check local disk info of client
I0320 19:54:25.856158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:54:25.856165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6480 0xc0002a64c0]
E0320 19:54:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:54:33.409797  543705 memory.go:184] no items to output this cycle
I0320 19:54:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 19:54:38.604726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:54:38.604733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:54:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:54:43.410668  543705 memory.go:191] Add success.
I0320 19:54:43.409793  543705 cpu.go:282] Add success.
I0320 19:54:43.420361  543705 net.go:648] Add success.
I0320 19:54:43.423298  543705 net.go:770] primary dev: ETH0
I0320 19:54:43.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:54:43.423325  543705 net.go:698] Add success.
I0320 19:54:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:54:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:54:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:54:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:54:53.409800  543705 memory.go:184] no items to output this cycle
I0320 19:54:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 19:55:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:55:03.409787  543705 memory.go:184] no items to output this cycle
I0320 19:55:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 19:55:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:55:13.409827  543705 memory.go:191] Add success.
I0320 19:55:13.409837  543705 cpu.go:282] Add success.
W0320 19:55:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:55:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:55:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:55:13.420211  543705 net.go:648] Add success.
I0320 19:55:13.423399  543705 net.go:770] primary dev: ETH0
I0320 19:55:13.423413  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:55:13.423425  543705 net.go:698] Add success.
I0320 19:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:55:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:55:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 19:55:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:55:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 19:55:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:55:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:55:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:55:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:55:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:55:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:55:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:55:23.409771  543705 memory.go:184] no items to output this cycle
I0320 19:55:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 19:55:25.857674  543705 disk_info.go:125] begin check local disk info of client
I0320 19:55:25.860111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:55:25.860116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb7c0 0xc0001fb800]
E0320 19:55:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:55:33.409781  543705 memory.go:184] no items to output this cycle
I0320 19:55:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 19:55:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:55:43.409776  543705 memory.go:191] Add success.
I0320 19:55:43.409809  543705 cpu.go:282] Add success.
I0320 19:55:43.419843  543705 net.go:648] Add success.
I0320 19:55:43.422977  543705 net.go:770] primary dev: ETH0
I0320 19:55:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:55:43.423002  543705 net.go:698] Add success.
I0320 19:55:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:55:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:55:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:55:53.409793  543705 memory.go:184] no items to output this cycle
I0320 19:55:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 19:56:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:56:03.409788  543705 memory.go:184] no items to output this cycle
I0320 19:56:03.409835  543705 cpu.go:275] no items to output this cycle
E0320 19:56:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:56:13.409817  543705 memory.go:191] Add success.
I0320 19:56:13.409828  543705 cpu.go:282] Add success.
W0320 19:56:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:56:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:56:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:56:13.420165  543705 net.go:648] Add success.
I0320 19:56:13.422739  543705 net.go:770] primary dev: ETH0
I0320 19:56:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:56:13.422764  543705 net.go:698] Add success.
I0320 19:56:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:56:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:56:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 19:56:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:56:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 19:56:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:56:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:56:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:56:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:56:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:56:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:56:23.409764  543705 memory.go:184] no items to output this cycle
I0320 19:56:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 19:56:25.861673  543705 disk_info.go:125] begin check local disk info of client
I0320 19:56:25.864115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:56:25.864121  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a380 0xc00036a400]
E0320 19:56:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:56:33.409763  543705 memory.go:184] no items to output this cycle
I0320 19:56:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 19:56:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:56:43.409791  543705 memory.go:191] Add success.
I0320 19:56:43.409794  543705 cpu.go:282] Add success.
I0320 19:56:43.420068  543705 net.go:648] Add success.
I0320 19:56:43.422837  543705 net.go:770] primary dev: ETH0
I0320 19:56:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:56:43.422862  543705 net.go:698] Add success.
I0320 19:56:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:56:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:56:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:56:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:56:53.409791  543705 memory.go:184] no items to output this cycle
I0320 19:56:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 19:57:03.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:57:03.409867  543705 cpu.go:275] no items to output this cycle
I0320 19:57:03.409870  543705 memory.go:184] no items to output this cycle
E0320 19:57:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:57:13.409789  543705 memory.go:191] Add success.
I0320 19:57:13.409810  543705 cpu.go:282] Add success.
W0320 19:57:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:57:13.420166  543705 net.go:648] Add success.
I0320 19:57:13.429093  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 19:57:13.429171  543705 net.go:770] primary dev: ETH0
I0320 19:57:13.429184  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:57:13.429196  543705 net.go:698] Add success.
I0320 19:57:13.453714  543705 event_worker.go:152] Polling the log file for events...
W0320 19:57:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:57:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 19:57:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0320 19:57:14.455898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 19:57:14.455907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 19:57:14.455913  543705 custom_config.go:64] query custom config with name: gpu
I0320 19:57:14.456547  543705 disk_worker.go:494] system disk:vda1
I0320 19:57:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 19:57:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 19:57:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:57:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 19:57:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 19:57:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:57:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:57:16.472330  543705 disk_local_worker.go:436] Get disk info: []
W0320 19:57:18.453958  543705 custom_config.go:80] failed to get custom config
I0320 19:57:18.453977  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0320 19:57:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:57:23.409763  543705 memory.go:184] no items to output this cycle
I0320 19:57:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 19:57:25.865670  543705 disk_info.go:125] begin check local disk info of client
I0320 19:57:25.868085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:57:25.868091  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492d80 0xc000492dc0]
E0320 19:57:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:57:33.409765  543705 memory.go:184] no items to output this cycle
I0320 19:57:33.409795  543705 cpu.go:275] no items to output this cycle
I0320 19:57:38.604876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 19:57:38.604883  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 19:57:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:57:43.410700  543705 memory.go:191] Add success.
I0320 19:57:43.409797  543705 cpu.go:282] Add success.
I0320 19:57:43.420380  543705 net.go:648] Add success.
I0320 19:57:43.423472  543705 net.go:770] primary dev: ETH0
I0320 19:57:43.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:57:43.423497  543705 net.go:698] Add success.
I0320 19:57:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:57:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:57:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:57:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:57:53.409789  543705 memory.go:184] no items to output this cycle
I0320 19:57:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 19:58:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:58:03.409770  543705 memory.go:184] no items to output this cycle
I0320 19:58:03.409840  543705 cpu.go:275] no items to output this cycle
E0320 19:58:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:58:13.409812  543705 memory.go:191] Add success.
I0320 19:58:13.409813  543705 cpu.go:282] Add success.
W0320 19:58:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 19:58:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:58:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:58:13.420542  543705 net.go:648] Add success.
I0320 19:58:13.423445  543705 net.go:770] primary dev: ETH0
I0320 19:58:13.423460  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:58:13.423474  543705 net.go:698] Add success.
I0320 19:58:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:58:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:58:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 19:58:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:58:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 19:58:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:58:16.458034  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:58:16.458091  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:58:16.458112  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:58:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:58:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:58:23.409775  543705 memory.go:184] no items to output this cycle
I0320 19:58:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 19:58:25.869672  543705 disk_info.go:125] begin check local disk info of client
I0320 19:58:25.872125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:58:25.872131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0320 19:58:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:58:33.409780  543705 memory.go:184] no items to output this cycle
I0320 19:58:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 19:58:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:58:43.409797  543705 memory.go:191] Add success.
I0320 19:58:43.409803  543705 cpu.go:282] Add success.
I0320 19:58:43.419841  543705 net.go:648] Add success.
I0320 19:58:43.422660  543705 net.go:770] primary dev: ETH0
I0320 19:58:43.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:58:43.422685  543705 net.go:698] Add success.
I0320 19:58:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:58:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:58:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:58:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:58:53.409803  543705 memory.go:184] no items to output this cycle
I0320 19:58:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 19:59:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:59:03.409806  543705 memory.go:184] no items to output this cycle
I0320 19:59:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 19:59:13.409905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:59:13.409933  543705 memory.go:191] Add success.
W0320 19:59:13.409971  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 19:59:13.409982  543705 cpu.go:282] Add success.
W0320 19:59:13.409991  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 19:59:13.410000  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 19:59:13.419726  543705 net.go:648] Add success.
I0320 19:59:13.422729  543705 net.go:770] primary dev: ETH0
I0320 19:59:13.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:59:13.422765  543705 net.go:698] Add success.
I0320 19:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 19:59:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 19:59:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 19:59:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 19:59:14.456497  543705 disk_worker.go:494] system disk:vda1
I0320 19:59:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 19:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 19:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 19:59:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 19:59:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:59:23.409784  543705 memory.go:184] no items to output this cycle
I0320 19:59:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 19:59:25.873673  543705 disk_info.go:125] begin check local disk info of client
I0320 19:59:25.876124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 19:59:25.876130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aab40 0xc0001aab80]
E0320 19:59:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:59:33.409786  543705 memory.go:184] no items to output this cycle
I0320 19:59:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 19:59:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:59:43.409809  543705 cpu.go:282] Add success.
I0320 19:59:43.409811  543705 memory.go:191] Add success.
I0320 19:59:43.419921  543705 net.go:648] Add success.
I0320 19:59:43.422556  543705 net.go:770] primary dev: ETH0
I0320 19:59:43.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0320 19:59:43.422585  543705 net.go:698] Add success.
I0320 19:59:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 19:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 19:59:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 19:59:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 19:59:53.409816  543705 cpu.go:275] no items to output this cycle
I0320 19:59:53.409822  543705 memory.go:184] no items to output this cycle
E0320 20:00:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:00:03.409807  543705 memory.go:184] no items to output this cycle
I0320 20:00:03.409826  543705 cpu.go:275] no items to output this cycle
E0320 20:00:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:00:13.409838  543705 memory.go:191] Add success.
I0320 20:00:13.409838  543705 cpu.go:282] Add success.
W0320 20:00:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:00:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:00:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:00:13.420511  543705 net.go:648] Add success.
I0320 20:00:13.423050  543705 net.go:770] primary dev: ETH0
I0320 20:00:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:00:13.423075  543705 net.go:698] Add success.
I0320 20:00:13.469290  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dbc86c47-dfe2-4e1e-b5c5-cf469daafc5f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:00:13.469321  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:00:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:00:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 20:00:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:00:14.456636  543705 disk_worker.go:494] system disk:vda1
I0320 20:00:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:00:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:00:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:00:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:00:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:00:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:00:23.409796  543705 memory.go:184] no items to output this cycle
I0320 20:00:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 20:00:25.877670  543705 disk_info.go:125] begin check local disk info of client
I0320 20:00:25.880235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:00:25.880241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1180 0xc0004b11c0]
E0320 20:00:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:00:33.409797  543705 memory.go:184] no items to output this cycle
I0320 20:00:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 20:00:38.605732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:00:38.605738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:00:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:00:43.410644  543705 memory.go:191] Add success.
I0320 20:00:43.409807  543705 cpu.go:282] Add success.
I0320 20:00:43.420323  543705 net.go:648] Add success.
I0320 20:00:43.422798  543705 net.go:770] primary dev: ETH0
I0320 20:00:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:00:43.422823  543705 net.go:698] Add success.
I0320 20:00:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:00:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:00:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:00:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:00:53.409782  543705 memory.go:184] no items to output this cycle
I0320 20:00:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 20:01:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:01:03.409776  543705 memory.go:184] no items to output this cycle
I0320 20:01:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 20:01:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:01:13.409784  543705 memory.go:191] Add success.
I0320 20:01:13.409809  543705 cpu.go:282] Add success.
W0320 20:01:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:01:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:01:13.420124  543705 net.go:648] Add success.
I0320 20:01:13.423003  543705 net.go:770] primary dev: ETH0
I0320 20:01:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:01:13.423033  543705 net.go:698] Add success.
I0320 20:01:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:01:14.455348  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:01:14.455512  543705 disk_worker.go:708] disk space is not compliant
W0320 20:01:14.455516  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:01:14.457510  543705 disk_worker.go:494] system disk:vda1
I0320 20:01:14.457548  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:01:15.456018  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:01:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:01:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:01:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:01:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:01:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:01:23.409765  543705 memory.go:184] no items to output this cycle
I0320 20:01:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 20:01:25.881675  543705 disk_info.go:125] begin check local disk info of client
I0320 20:01:25.884117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:01:25.884123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b12c0 0xc0004b1300]
E0320 20:01:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:01:33.409782  543705 cpu.go:275] no items to output this cycle
I0320 20:01:33.409786  543705 memory.go:184] no items to output this cycle
E0320 20:01:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:01:43.409788  543705 memory.go:191] Add success.
I0320 20:01:43.409804  543705 cpu.go:282] Add success.
I0320 20:01:43.419857  543705 net.go:648] Add success.
I0320 20:01:43.422647  543705 net.go:770] primary dev: ETH0
I0320 20:01:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:01:43.422670  543705 net.go:698] Add success.
I0320 20:01:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:01:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:01:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:01:53.409782  543705 memory.go:184] no items to output this cycle
I0320 20:01:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 20:02:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:02:03.409804  543705 memory.go:184] no items to output this cycle
I0320 20:02:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 20:02:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:02:13.409797  543705 memory.go:191] Add success.
I0320 20:02:13.409796  543705 cpu.go:282] Add success.
W0320 20:02:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:02:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:02:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:02:13.420673  543705 net.go:648] Add success.
I0320 20:02:13.423809  543705 net.go:770] primary dev: ETH0
I0320 20:02:13.423822  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:02:13.423833  543705 net.go:698] Add success.
W0320 20:02:14.455286  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:02:14.455421  543705 disk_worker.go:708] disk space is not compliant
W0320 20:02:14.455426  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:02:14.456286  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:02:14.456296  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:02:14.456303  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:02:14.457268  543705 disk_worker.go:494] system disk:vda1
I0320 20:02:14.457297  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:02:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:02:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:02:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:02:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:02:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:02:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:02:16.472305  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:02:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:02:23.409770  543705 memory.go:184] no items to output this cycle
I0320 20:02:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 20:02:25.885674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:02:25.888109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:02:25.888116  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034de80 0xc00034dec0]
E0320 20:02:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:02:33.409785  543705 memory.go:184] no items to output this cycle
I0320 20:02:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 20:02:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:02:43.409779  543705 memory.go:191] Add success.
I0320 20:02:43.409815  543705 cpu.go:282] Add success.
I0320 20:02:43.419870  543705 net.go:648] Add success.
I0320 20:02:43.422702  543705 net.go:770] primary dev: ETH0
I0320 20:02:43.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:02:43.422743  543705 net.go:698] Add success.
I0320 20:02:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:02:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:02:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:02:53.409794  543705 memory.go:184] no items to output this cycle
I0320 20:02:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 20:03:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:03:03.409781  543705 memory.go:184] no items to output this cycle
I0320 20:03:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 20:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:03:13.409785  543705 memory.go:191] Add success.
I0320 20:03:13.409803  543705 cpu.go:282] Add success.
W0320 20:03:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:03:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:03:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:03:13.420054  543705 net.go:648] Add success.
I0320 20:03:13.422778  543705 net.go:770] primary dev: ETH0
I0320 20:03:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:03:13.422808  543705 net.go:698] Add success.
I0320 20:03:13.463707  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c5fb7e51-612a-4744-aa6f-f51dc1d5b7f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:03:13.463738  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:03:14.455569  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:03:14.455583  543705 disk_worker.go:708] disk space is not compliant
W0320 20:03:14.455587  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:03:14.457159  543705 disk_worker.go:494] system disk:vda1
I0320 20:03:14.457210  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:03:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:03:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:03:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:03:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:03:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:03:23.409800  543705 memory.go:184] no items to output this cycle
I0320 20:03:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 20:03:25.889673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:03:25.892138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:03:25.892144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004901c0 0xc000490200]
E0320 20:03:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:03:33.409766  543705 memory.go:184] no items to output this cycle
I0320 20:03:33.409802  543705 cpu.go:275] no items to output this cycle
I0320 20:03:38.606736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:03:38.606743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:03:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:03:43.410554  543705 memory.go:191] Add success.
I0320 20:03:43.409855  543705 cpu.go:282] Add success.
I0320 20:03:43.420291  543705 net.go:648] Add success.
I0320 20:03:43.422788  543705 net.go:770] primary dev: ETH0
I0320 20:03:43.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:03:43.422817  543705 net.go:698] Add success.
I0320 20:03:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:03:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:03:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:03:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:03:53.409798  543705 memory.go:184] no items to output this cycle
I0320 20:03:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 20:04:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:04:03.409776  543705 memory.go:184] no items to output this cycle
I0320 20:04:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 20:04:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:04:13.409793  543705 memory.go:191] Add success.
I0320 20:04:13.409808  543705 cpu.go:282] Add success.
W0320 20:04:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:04:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:04:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:04:13.420073  543705 net.go:648] Add success.
I0320 20:04:13.422943  543705 net.go:770] primary dev: ETH0
I0320 20:04:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:04:13.422971  543705 net.go:698] Add success.
I0320 20:04:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:04:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:04:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 20:04:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:04:14.456564  543705 disk_worker.go:494] system disk:vda1
I0320 20:04:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:04:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:04:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:04:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:04:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:04:23.409795  543705 memory.go:184] no items to output this cycle
I0320 20:04:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 20:04:25.893672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:04:25.896118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:04:25.896124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fba80 0xc0001fbac0]
E0320 20:04:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:04:33.409778  543705 cpu.go:275] no items to output this cycle
I0320 20:04:33.409780  543705 memory.go:184] no items to output this cycle
E0320 20:04:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:04:43.409791  543705 memory.go:191] Add success.
I0320 20:04:43.409813  543705 cpu.go:282] Add success.
I0320 20:04:43.420032  543705 net.go:648] Add success.
I0320 20:04:43.422758  543705 net.go:770] primary dev: ETH0
I0320 20:04:43.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:04:43.422785  543705 net.go:698] Add success.
I0320 20:04:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:04:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:04:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:04:53.409812  543705 memory.go:184] no items to output this cycle
I0320 20:04:53.409823  543705 cpu.go:275] no items to output this cycle
E0320 20:05:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:05:03.409815  543705 memory.go:184] no items to output this cycle
I0320 20:05:03.409829  543705 cpu.go:275] no items to output this cycle
E0320 20:05:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:05:13.409787  543705 memory.go:191] Add success.
I0320 20:05:13.409805  543705 cpu.go:282] Add success.
W0320 20:05:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:05:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:05:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:05:13.420190  543705 net.go:648] Add success.
I0320 20:05:13.423478  543705 net.go:770] primary dev: ETH0
I0320 20:05:13.423492  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:05:13.423505  543705 net.go:698] Add success.
I0320 20:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:05:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:05:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 20:05:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:05:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 20:05:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:05:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:05:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:05:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:05:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:05:16.472893  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:05:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:05:23.409767  543705 memory.go:184] no items to output this cycle
I0320 20:05:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 20:05:25.897674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:05:25.900123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:05:25.900129  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb540 0xc0001fb580]
E0320 20:05:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:05:33.409765  543705 memory.go:184] no items to output this cycle
I0320 20:05:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 20:05:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:05:43.409786  543705 memory.go:191] Add success.
I0320 20:05:43.409802  543705 cpu.go:282] Add success.
I0320 20:05:43.419875  543705 net.go:648] Add success.
I0320 20:05:43.422643  543705 net.go:770] primary dev: ETH0
I0320 20:05:43.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:05:43.422668  543705 net.go:698] Add success.
I0320 20:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:05:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:05:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:05:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:05:53.409784  543705 memory.go:184] no items to output this cycle
I0320 20:05:53.409846  543705 cpu.go:275] no items to output this cycle
E0320 20:06:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:06:03.409806  543705 memory.go:184] no items to output this cycle
I0320 20:06:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 20:06:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:06:13.409824  543705 memory.go:191] Add success.
I0320 20:06:13.409828  543705 cpu.go:282] Add success.
W0320 20:06:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:06:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:06:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:06:13.420145  543705 net.go:648] Add success.
I0320 20:06:13.422738  543705 net.go:770] primary dev: ETH0
I0320 20:06:13.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:06:13.422766  543705 net.go:698] Add success.
I0320 20:06:13.643144  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d47868a0-06ed-4b49-ab7d-40f4c3356b3b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:06:13.643178  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:06:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:06:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:06:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 20:06:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:06:14.456775  543705 disk_worker.go:494] system disk:vda1
I0320 20:06:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:06:15.455602  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:06:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:06:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:06:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:06:23.409781  543705 memory.go:184] no items to output this cycle
I0320 20:06:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 20:06:25.901673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:06:25.904131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:06:25.904137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4580 0xc0000c45c0]
E0320 20:06:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:06:33.409781  543705 memory.go:184] no items to output this cycle
I0320 20:06:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 20:06:38.607741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:06:38.607748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:06:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:06:43.409790  543705 memory.go:191] Add success.
I0320 20:06:43.409796  543705 cpu.go:282] Add success.
I0320 20:06:43.420004  543705 net.go:648] Add success.
I0320 20:06:43.420975  543705 net.go:770] primary dev: ETH0
I0320 20:06:43.420991  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:06:43.421005  543705 net.go:698] Add success.
I0320 20:06:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:06:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:06:53.409818  543705 memory.go:184] no items to output this cycle
I0320 20:06:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 20:07:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:07:03.409788  543705 memory.go:184] no items to output this cycle
I0320 20:07:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 20:07:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:07:13.409815  543705 memory.go:191] Add success.
I0320 20:07:13.409820  543705 cpu.go:282] Add success.
W0320 20:07:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:07:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:07:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:07:13.420147  543705 net.go:648] Add success.
I0320 20:07:13.422818  543705 net.go:770] primary dev: ETH0
I0320 20:07:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:07:13.422845  543705 net.go:698] Add success.
I0320 20:07:13.453409  543705 event_worker.go:152] Polling the log file for events...
W0320 20:07:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:07:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 20:07:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:07:14.455903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:07:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:07:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:07:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 20:07:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:07:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:07:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:07:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:07:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:07:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:07:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:07:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:07:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:07:23.409776  543705 memory.go:184] no items to output this cycle
I0320 20:07:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 20:07:25.905675  543705 disk_info.go:125] begin check local disk info of client
I0320 20:07:25.908093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:07:25.908099  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2800 0xc0003b2840]
E0320 20:07:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:07:33.409773  543705 memory.go:184] no items to output this cycle
I0320 20:07:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 20:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:07:43.409786  543705 memory.go:191] Add success.
I0320 20:07:43.409804  543705 cpu.go:282] Add success.
I0320 20:07:43.419959  543705 net.go:648] Add success.
I0320 20:07:43.422845  543705 net.go:770] primary dev: ETH0
I0320 20:07:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:07:43.422870  543705 net.go:698] Add success.
I0320 20:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:07:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:07:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:07:53.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:07:53.409825  543705 memory.go:184] no items to output this cycle
I0320 20:07:53.409826  543705 cpu.go:275] no items to output this cycle
E0320 20:08:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:08:03.409805  543705 memory.go:184] no items to output this cycle
I0320 20:08:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 20:08:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:08:13.409786  543705 memory.go:191] Add success.
I0320 20:08:13.409805  543705 cpu.go:282] Add success.
W0320 20:08:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:08:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:08:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:08:13.420199  543705 net.go:648] Add success.
I0320 20:08:13.422918  543705 net.go:770] primary dev: ETH0
I0320 20:08:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:08:13.422944  543705 net.go:698] Add success.
I0320 20:08:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:08:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:08:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 20:08:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:08:14.456554  543705 disk_worker.go:494] system disk:vda1
I0320 20:08:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:08:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:08:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:08:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:08:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:08:23.409801  543705 memory.go:184] no items to output this cycle
I0320 20:08:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 20:08:25.909665  543705 disk_info.go:125] begin check local disk info of client
I0320 20:08:25.912141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:08:25.912147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004df780 0xc0004df7c0]
E0320 20:08:33.409901  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:08:33.409919  543705 memory.go:184] no items to output this cycle
I0320 20:08:33.409925  543705 cpu.go:275] no items to output this cycle
E0320 20:08:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:08:43.409825  543705 memory.go:191] Add success.
I0320 20:08:43.409829  543705 cpu.go:282] Add success.
I0320 20:08:43.420045  543705 net.go:648] Add success.
I0320 20:08:43.422583  543705 net.go:770] primary dev: ETH0
I0320 20:08:43.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:08:43.422612  543705 net.go:698] Add success.
I0320 20:08:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:08:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:08:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:08:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 20:08:53.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:08:53.409883  543705 memory.go:184] no items to output this cycle
E0320 20:09:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:09:03.409773  543705 memory.go:184] no items to output this cycle
I0320 20:09:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 20:09:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:09:13.409825  543705 memory.go:191] Add success.
I0320 20:09:13.409826  543705 cpu.go:282] Add success.
W0320 20:09:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:09:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:09:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:09:13.420141  543705 net.go:648] Add success.
I0320 20:09:13.422668  543705 net.go:770] primary dev: ETH0
I0320 20:09:13.422682  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:09:13.422695  543705 net.go:698] Add success.
I0320 20:09:13.468745  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"90169a8b-86b7-49c0-a7bf-555913c58040","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:09:13.468779  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:09:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:09:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0320 20:09:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:09:14.456484  543705 disk_worker.go:494] system disk:vda1
I0320 20:09:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:09:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:09:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:09:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:09:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:09:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:09:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:09:23.409767  543705 memory.go:184] no items to output this cycle
I0320 20:09:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 20:09:25.913676  543705 disk_info.go:125] begin check local disk info of client
I0320 20:09:25.916104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:09:25.916109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2400 0xc0003b2440]
E0320 20:09:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:09:33.409769  543705 memory.go:184] no items to output this cycle
I0320 20:09:33.409900  543705 cpu.go:275] no items to output this cycle
I0320 20:09:38.608752  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:09:38.608760  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:09:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:09:43.410698  543705 memory.go:191] Add success.
I0320 20:09:43.409810  543705 cpu.go:282] Add success.
I0320 20:09:43.420405  543705 net.go:648] Add success.
I0320 20:09:43.423055  543705 net.go:770] primary dev: ETH0
I0320 20:09:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:09:43.423081  543705 net.go:698] Add success.
I0320 20:09:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:09:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:09:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:09:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 20:09:53.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:09:53.409830  543705 memory.go:184] no items to output this cycle
E0320 20:10:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:10:03.409786  543705 memory.go:184] no items to output this cycle
I0320 20:10:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 20:10:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:10:13.409798  543705 memory.go:191] Add success.
I0320 20:10:13.409816  543705 cpu.go:282] Add success.
W0320 20:10:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:10:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:10:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:10:13.420328  543705 net.go:648] Add success.
I0320 20:10:13.423081  543705 net.go:770] primary dev: ETH0
I0320 20:10:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:10:13.423105  543705 net.go:698] Add success.
I0320 20:10:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:10:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:10:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0320 20:10:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:10:14.456482  543705 disk_worker.go:494] system disk:vda1
I0320 20:10:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:10:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:10:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:10:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:10:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:10:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:10:23.409784  543705 memory.go:184] no items to output this cycle
I0320 20:10:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 20:10:25.917674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:10:25.920107  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:10:25.920112  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de900 0xc0003de940]
E0320 20:10:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:10:33.409797  543705 memory.go:184] no items to output this cycle
I0320 20:10:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 20:10:43.409922  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:10:43.410022  543705 cpu.go:282] Add success.
I0320 20:10:43.410049  543705 memory.go:191] Add success.
I0320 20:10:43.419707  543705 net.go:648] Add success.
I0320 20:10:43.422452  543705 net.go:770] primary dev: ETH0
I0320 20:10:43.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:10:43.422477  543705 net.go:698] Add success.
I0320 20:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:10:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:10:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:10:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:10:53.409795  543705 memory.go:184] no items to output this cycle
I0320 20:10:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:11:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:11:03.409777  543705 memory.go:184] no items to output this cycle
I0320 20:11:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 20:11:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:11:13.409835  543705 memory.go:191] Add success.
I0320 20:11:13.409836  543705 cpu.go:282] Add success.
W0320 20:11:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:11:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:11:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:11:13.420643  543705 net.go:648] Add success.
I0320 20:11:13.423475  543705 net.go:770] primary dev: ETH0
I0320 20:11:13.423489  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:11:13.423504  543705 net.go:698] Add success.
I0320 20:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:11:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:11:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 20:11:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:11:14.456473  543705 disk_worker.go:494] system disk:vda1
I0320 20:11:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:11:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:11:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:11:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:11:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:11:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:11:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:11:23.409777  543705 memory.go:184] no items to output this cycle
I0320 20:11:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 20:11:25.921673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:11:25.924135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:11:25.924142  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033fa40 0xc00033fa80]
E0320 20:11:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:11:33.409809  543705 memory.go:184] no items to output this cycle
I0320 20:11:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 20:11:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:11:43.409787  543705 memory.go:191] Add success.
I0320 20:11:43.409805  543705 cpu.go:282] Add success.
I0320 20:11:43.419751  543705 net.go:648] Add success.
I0320 20:11:43.422549  543705 net.go:770] primary dev: ETH0
I0320 20:11:43.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:11:43.422573  543705 net.go:698] Add success.
I0320 20:11:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:11:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:11:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:11:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:11:53.409778  543705 memory.go:184] no items to output this cycle
I0320 20:11:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 20:12:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:12:03.409777  543705 memory.go:184] no items to output this cycle
I0320 20:12:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 20:12:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:12:13.409813  543705 memory.go:191] Add success.
I0320 20:12:13.409820  543705 cpu.go:282] Add success.
W0320 20:12:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:12:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:12:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:12:13.420155  543705 net.go:648] Add success.
I0320 20:12:13.422658  543705 net.go:770] primary dev: ETH0
I0320 20:12:13.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:12:13.422683  543705 net.go:698] Add success.
I0320 20:12:14.018887  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea5e1733-4fa7-4679-a16c-671af76ecc2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:12:14.018929  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 20:12:14.454897  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:12:14.454924  543705 disk_worker.go:708] disk space is not compliant
W0320 20:12:14.454929  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:12:14.455662  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:12:14.455671  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:12:14.455676  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:12:14.456475  543705 disk_worker.go:494] system disk:vda1
I0320 20:12:14.456506  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:12:15.456775  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:12:15.456785  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:12:16.457898  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:12:16.457897  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:12:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:12:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:12:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:12:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:12:23.409778  543705 memory.go:184] no items to output this cycle
I0320 20:12:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 20:12:25.925688  543705 disk_info.go:125] begin check local disk info of client
I0320 20:12:25.928119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:12:25.928124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047d600 0xc00047d640]
E0320 20:12:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:12:33.409798  543705 memory.go:184] no items to output this cycle
I0320 20:12:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 20:12:38.609736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:12:38.609743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:12:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:12:43.410690  543705 memory.go:191] Add success.
I0320 20:12:43.409807  543705 cpu.go:282] Add success.
I0320 20:12:43.420818  543705 net.go:648] Add success.
I0320 20:12:43.424148  543705 net.go:770] primary dev: ETH0
I0320 20:12:43.424163  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:12:43.424176  543705 net.go:698] Add success.
I0320 20:12:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:12:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:12:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:12:53.409762  543705 memory.go:184] no items to output this cycle
I0320 20:12:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 20:13:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:13:03.409805  543705 memory.go:184] no items to output this cycle
I0320 20:13:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 20:13:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:13:13.409783  543705 memory.go:191] Add success.
W0320 20:13:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 20:13:13.409808  543705 cpu.go:282] Add success.
W0320 20:13:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:13:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:13:13.420108  543705 net.go:648] Add success.
I0320 20:13:13.422870  543705 net.go:770] primary dev: ETH0
I0320 20:13:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:13:13.422895  543705 net.go:698] Add success.
I0320 20:13:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:13:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:13:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0320 20:13:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:13:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 20:13:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:13:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:13:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:13:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:13:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:13:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:13:23.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:13:23.410386  543705 memory.go:184] no items to output this cycle
I0320 20:13:23.410388  543705 cpu.go:275] no items to output this cycle
I0320 20:13:25.929673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:13:25.932112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:13:25.932118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8c40 0xc0003c8c80]
E0320 20:13:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:13:33.409774  543705 memory.go:184] no items to output this cycle
I0320 20:13:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 20:13:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:13:43.409927  543705 memory.go:191] Add success.
I0320 20:13:43.409936  543705 cpu.go:282] Add success.
I0320 20:13:43.419773  543705 net.go:648] Add success.
I0320 20:13:43.422515  543705 net.go:770] primary dev: ETH0
I0320 20:13:43.422530  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:13:43.422543  543705 net.go:698] Add success.
I0320 20:13:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:13:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:13:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:13:53.409775  543705 memory.go:184] no items to output this cycle
I0320 20:13:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 20:14:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:14:03.409806  543705 memory.go:184] no items to output this cycle
I0320 20:14:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 20:14:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:14:13.409805  543705 memory.go:191] Add success.
I0320 20:14:13.409805  543705 cpu.go:282] Add success.
W0320 20:14:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:14:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:14:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:14:13.420140  543705 net.go:648] Add success.
I0320 20:14:13.422782  543705 net.go:770] primary dev: ETH0
I0320 20:14:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:14:13.422808  543705 net.go:698] Add success.
I0320 20:14:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:14:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:14:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 20:14:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:14:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 20:14:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:14:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:14:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:14:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:14:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:14:23.409780  543705 memory.go:184] no items to output this cycle
I0320 20:14:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 20:14:25.933670  543705 disk_info.go:125] begin check local disk info of client
I0320 20:14:25.936206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:14:25.936212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2740 0xc0004a2780]
E0320 20:14:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:14:33.409790  543705 memory.go:184] no items to output this cycle
I0320 20:14:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 20:14:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:14:43.409797  543705 memory.go:191] Add success.
I0320 20:14:43.409810  543705 cpu.go:282] Add success.
I0320 20:14:43.420339  543705 net.go:648] Add success.
I0320 20:14:43.423091  543705 net.go:770] primary dev: ETH0
I0320 20:14:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:14:43.423115  543705 net.go:698] Add success.
I0320 20:14:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:14:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:14:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:14:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:14:53.409780  543705 cpu.go:275] no items to output this cycle
I0320 20:14:53.409786  543705 memory.go:184] no items to output this cycle
E0320 20:15:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:15:03.409791  543705 cpu.go:275] no items to output this cycle
I0320 20:15:03.409795  543705 memory.go:184] no items to output this cycle
E0320 20:15:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:15:13.409798  543705 memory.go:191] Add success.
I0320 20:15:13.409799  543705 cpu.go:282] Add success.
W0320 20:15:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:15:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:15:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:15:13.420051  543705 net.go:648] Add success.
I0320 20:15:13.422806  543705 net.go:770] primary dev: ETH0
I0320 20:15:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:15:13.422831  543705 net.go:698] Add success.
I0320 20:15:13.468807  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"adfdd83d-fcfb-477e-9e3b-ba1c8ae5024c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:15:13.468842  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:15:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:15:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:15:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 20:15:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:15:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 20:15:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:15:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:15:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:15:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:15:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:15:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:15:23.409766  543705 memory.go:184] no items to output this cycle
I0320 20:15:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 20:15:25.937674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:15:25.940127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:15:25.940134  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a640 0xc00039a680]
E0320 20:15:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:15:33.409771  543705 memory.go:184] no items to output this cycle
I0320 20:15:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 20:15:38.610759  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:15:38.610765  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:15:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:15:43.410599  543705 memory.go:191] Add success.
I0320 20:15:43.409803  543705 cpu.go:282] Add success.
I0320 20:15:43.420341  543705 net.go:648] Add success.
I0320 20:15:43.423215  543705 net.go:770] primary dev: ETH0
I0320 20:15:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:15:43.423245  543705 net.go:698] Add success.
I0320 20:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:15:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:15:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:15:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:15:53.409801  543705 memory.go:184] no items to output this cycle
I0320 20:15:53.409811  543705 cpu.go:275] no items to output this cycle
E0320 20:16:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:16:03.409781  543705 memory.go:184] no items to output this cycle
I0320 20:16:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 20:16:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:16:13.409818  543705 memory.go:191] Add success.
I0320 20:16:13.409820  543705 cpu.go:282] Add success.
W0320 20:16:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:16:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:16:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:16:13.420269  543705 net.go:648] Add success.
I0320 20:16:13.423120  543705 net.go:770] primary dev: ETH0
I0320 20:16:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:16:13.423146  543705 net.go:698] Add success.
I0320 20:16:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:16:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:16:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 20:16:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:16:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 20:16:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:16:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:16:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:16:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:16:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:16:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:16:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:16:23.409797  543705 memory.go:184] no items to output this cycle
I0320 20:16:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 20:16:25.941672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:16:25.944171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:16:25.944177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004aedc0 0xc0004aee00]
E0320 20:16:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:16:33.409772  543705 memory.go:184] no items to output this cycle
I0320 20:16:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 20:16:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:16:43.409792  543705 memory.go:191] Add success.
I0320 20:16:43.409807  543705 cpu.go:282] Add success.
I0320 20:16:43.419953  543705 net.go:648] Add success.
I0320 20:16:43.422763  543705 net.go:770] primary dev: ETH0
I0320 20:16:43.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:16:43.422792  543705 net.go:698] Add success.
I0320 20:16:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:16:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:16:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:16:53.410657  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:16:53.410671  543705 memory.go:184] no items to output this cycle
I0320 20:16:53.410672  543705 cpu.go:275] no items to output this cycle
E0320 20:17:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:17:03.409784  543705 memory.go:184] no items to output this cycle
I0320 20:17:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 20:17:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:17:13.409789  543705 memory.go:191] Add success.
I0320 20:17:13.409790  543705 cpu.go:282] Add success.
W0320 20:17:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:17:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:17:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:17:13.420045  543705 net.go:648] Add success.
I0320 20:17:13.422706  543705 net.go:770] primary dev: ETH0
I0320 20:17:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:17:13.422731  543705 net.go:698] Add success.
I0320 20:17:13.453387  543705 event_worker.go:152] Polling the log file for events...
W0320 20:17:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:17:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 20:17:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:17:14.456470  543705 disk_worker.go:494] system disk:vda1
I0320 20:17:14.456497  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:17:14.456900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:17:14.456909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:17:14.456915  543705 custom_config.go:64] query custom config with name: gpu
E0320 20:17:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:17:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:17:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:17:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:17:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:17:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:17:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:17:23.410356  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:17:23.410371  543705 memory.go:184] no items to output this cycle
I0320 20:17:23.410379  543705 cpu.go:275] no items to output this cycle
I0320 20:17:25.945670  543705 disk_info.go:125] begin check local disk info of client
I0320 20:17:25.948133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:17:25.948141  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484000 0xc000484040]
E0320 20:17:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:17:33.409799  543705 memory.go:184] no items to output this cycle
I0320 20:17:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:17:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:17:43.409802  543705 cpu.go:282] Add success.
I0320 20:17:43.409807  543705 memory.go:191] Add success.
I0320 20:17:43.420002  543705 net.go:648] Add success.
I0320 20:17:43.422688  543705 net.go:770] primary dev: ETH0
I0320 20:17:43.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:17:43.422716  543705 net.go:698] Add success.
I0320 20:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:17:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:17:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:17:53.409784  543705 memory.go:184] no items to output this cycle
I0320 20:17:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 20:18:03.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:18:03.409906  543705 memory.go:184] no items to output this cycle
I0320 20:18:03.409923  543705 cpu.go:275] no items to output this cycle
E0320 20:18:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:18:13.409813  543705 memory.go:191] Add success.
I0320 20:18:13.409820  543705 cpu.go:282] Add success.
W0320 20:18:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:18:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:18:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:18:13.420126  543705 net.go:648] Add success.
I0320 20:18:13.422844  543705 net.go:770] primary dev: ETH0
I0320 20:18:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:18:13.422868  543705 net.go:698] Add success.
I0320 20:18:13.468796  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e8bf7550-ff7d-439e-b597-66b789996ca8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:18:13.468831  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:18:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:18:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 20:18:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:18:14.456518  543705 disk_worker.go:494] system disk:vda1
I0320 20:18:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:18:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:18:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:18:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:18:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:18:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:18:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 20:18:23.409780  543705 memory.go:184] no items to output this cycle
I0320 20:18:25.949671  543705 disk_info.go:125] begin check local disk info of client
I0320 20:18:25.952134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:18:25.952140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003efc00 0xc0003efc40]
E0320 20:18:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:18:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 20:18:33.409794  543705 memory.go:184] no items to output this cycle
I0320 20:18:38.611760  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:18:38.611767  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:18:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:18:43.410813  543705 memory.go:191] Add success.
I0320 20:18:43.409805  543705 cpu.go:282] Add success.
I0320 20:18:43.420524  543705 net.go:648] Add success.
I0320 20:18:43.423092  543705 net.go:770] primary dev: ETH0
I0320 20:18:43.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:18:43.423122  543705 net.go:698] Add success.
I0320 20:18:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:18:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:18:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:18:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:18:53.409771  543705 memory.go:184] no items to output this cycle
I0320 20:18:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 20:19:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:19:03.409783  543705 memory.go:184] no items to output this cycle
I0320 20:19:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 20:19:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:19:13.409800  543705 memory.go:191] Add success.
I0320 20:19:13.409801  543705 cpu.go:282] Add success.
W0320 20:19:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:19:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:19:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:19:13.420264  543705 net.go:648] Add success.
I0320 20:19:13.422977  543705 net.go:770] primary dev: ETH0
I0320 20:19:13.423001  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:19:13.423015  543705 net.go:698] Add success.
I0320 20:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:19:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:19:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 20:19:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:19:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 20:19:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:19:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:19:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:19:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:19:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:19:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:19:23.409792  543705 memory.go:184] no items to output this cycle
I0320 20:19:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 20:19:25.953671  543705 disk_info.go:125] begin check local disk info of client
I0320 20:19:25.956089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:19:25.956095  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f440 0xc00037f480]
E0320 20:19:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:19:33.409777  543705 memory.go:184] no items to output this cycle
I0320 20:19:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 20:19:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:19:43.409788  543705 memory.go:191] Add success.
I0320 20:19:43.409788  543705 cpu.go:282] Add success.
I0320 20:19:43.419718  543705 net.go:648] Add success.
I0320 20:19:43.422894  543705 net.go:770] primary dev: ETH0
I0320 20:19:43.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:19:43.422922  543705 net.go:698] Add success.
I0320 20:19:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:19:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:19:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:19:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:19:53.409777  543705 memory.go:184] no items to output this cycle
I0320 20:19:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 20:20:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:20:03.409776  543705 memory.go:184] no items to output this cycle
I0320 20:20:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 20:20:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:20:13.409820  543705 memory.go:191] Add success.
I0320 20:20:13.409827  543705 cpu.go:282] Add success.
W0320 20:20:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:20:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:20:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:20:13.420139  543705 net.go:648] Add success.
I0320 20:20:13.422909  543705 net.go:770] primary dev: ETH0
I0320 20:20:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:20:13.422936  543705 net.go:698] Add success.
I0320 20:20:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:20:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:20:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 20:20:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:20:14.456514  543705 disk_worker.go:494] system disk:vda1
I0320 20:20:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:20:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:20:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:20:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:20:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:20:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:20:23.409780  543705 memory.go:184] no items to output this cycle
I0320 20:20:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 20:20:25.957672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:20:25.960144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:20:25.960150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae140 0xc0003ae180]
E0320 20:20:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:20:33.409793  543705 memory.go:184] no items to output this cycle
I0320 20:20:33.409821  543705 cpu.go:275] no items to output this cycle
E0320 20:20:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:20:43.409797  543705 memory.go:191] Add success.
I0320 20:20:43.409798  543705 cpu.go:282] Add success.
I0320 20:20:43.420092  543705 net.go:648] Add success.
I0320 20:20:43.423644  543705 net.go:770] primary dev: ETH0
I0320 20:20:43.423657  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:20:43.423669  543705 net.go:698] Add success.
I0320 20:20:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:20:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:20:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:20:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:20:53.410269  543705 memory.go:184] no items to output this cycle
I0320 20:20:53.410268  543705 cpu.go:275] no items to output this cycle
E0320 20:21:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:21:03.409781  543705 memory.go:184] no items to output this cycle
I0320 20:21:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 20:21:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:21:13.409794  543705 memory.go:191] Add success.
I0320 20:21:13.409794  543705 cpu.go:282] Add success.
W0320 20:21:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:21:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:21:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:21:13.420132  543705 net.go:648] Add success.
I0320 20:21:13.422892  543705 net.go:770] primary dev: ETH0
I0320 20:21:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:21:13.422916  543705 net.go:698] Add success.
I0320 20:21:13.469233  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4466d0e9-831e-49ca-99b1-24d140a5eec3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:21:13.469268  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:21:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:21:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:21:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 20:21:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:21:14.456623  543705 disk_worker.go:494] system disk:vda1
I0320 20:21:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:21:16.472476  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:21:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:21:23.409773  543705 memory.go:184] no items to output this cycle
I0320 20:21:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 20:21:25.961672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:21:25.964141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:21:25.964148  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f440 0xc00039f480]
E0320 20:21:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:21:33.409810  543705 memory.go:184] no items to output this cycle
I0320 20:21:33.409823  543705 cpu.go:275] no items to output this cycle
I0320 20:21:38.612762  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:21:38.612769  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:21:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:21:43.410705  543705 memory.go:191] Add success.
I0320 20:21:43.409786  543705 cpu.go:282] Add success.
I0320 20:21:43.420638  543705 net.go:648] Add success.
I0320 20:21:43.423133  543705 net.go:770] primary dev: ETH0
I0320 20:21:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:21:43.423158  543705 net.go:698] Add success.
I0320 20:21:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:21:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:21:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:21:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:21:53.409781  543705 memory.go:184] no items to output this cycle
I0320 20:21:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 20:22:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:22:03.409771  543705 memory.go:184] no items to output this cycle
I0320 20:22:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 20:22:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:22:13.409821  543705 memory.go:191] Add success.
I0320 20:22:13.409830  543705 cpu.go:282] Add success.
W0320 20:22:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:22:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:22:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:22:13.420111  543705 net.go:648] Add success.
I0320 20:22:13.422745  543705 net.go:770] primary dev: ETH0
I0320 20:22:13.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:22:13.422771  543705 net.go:698] Add success.
W0320 20:22:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:22:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0320 20:22:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:22:14.456893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:22:14.456902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:22:14.456909  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:22:14.456974  543705 disk_worker.go:494] system disk:vda1
I0320 20:22:14.457015  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:22:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:22:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:22:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:22:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:22:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:22:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:22:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:22:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:22:23.409800  543705 memory.go:184] no items to output this cycle
I0320 20:22:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 20:22:25.965680  543705 disk_info.go:125] begin check local disk info of client
I0320 20:22:25.968169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:22:25.968176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272680 0xc0002726c0]
E0320 20:22:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:22:33.409777  543705 memory.go:184] no items to output this cycle
I0320 20:22:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 20:22:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:22:43.409801  543705 memory.go:191] Add success.
I0320 20:22:43.409802  543705 cpu.go:282] Add success.
I0320 20:22:43.420243  543705 net.go:648] Add success.
I0320 20:22:43.422776  543705 net.go:770] primary dev: ETH0
I0320 20:22:43.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:22:43.422802  543705 net.go:698] Add success.
I0320 20:22:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:22:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:22:53.409764  543705 memory.go:184] no items to output this cycle
I0320 20:22:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 20:23:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:23:03.409807  543705 memory.go:184] no items to output this cycle
I0320 20:23:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 20:23:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:23:13.409796  543705 memory.go:191] Add success.
I0320 20:23:13.409798  543705 cpu.go:282] Add success.
W0320 20:23:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:23:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:23:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:23:13.420098  543705 net.go:648] Add success.
I0320 20:23:13.422858  543705 net.go:770] primary dev: ETH0
I0320 20:23:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:23:13.422882  543705 net.go:698] Add success.
I0320 20:23:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:23:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:23:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 20:23:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:23:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 20:23:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:23:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:23:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:23:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:23:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:23:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:23:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:23:23.409800  543705 memory.go:184] no items to output this cycle
I0320 20:23:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 20:23:25.969676  543705 disk_info.go:125] begin check local disk info of client
I0320 20:23:25.972091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:23:25.972098  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bbb00 0xc0003bbb40]
E0320 20:23:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:23:33.409771  543705 memory.go:184] no items to output this cycle
I0320 20:23:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 20:23:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:23:43.409812  543705 memory.go:191] Add success.
I0320 20:23:43.409818  543705 cpu.go:282] Add success.
I0320 20:23:43.419871  543705 net.go:648] Add success.
I0320 20:23:43.423038  543705 net.go:770] primary dev: ETH0
I0320 20:23:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:23:43.423068  543705 net.go:698] Add success.
I0320 20:23:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:23:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:23:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:23:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:23:53.409810  543705 memory.go:184] no items to output this cycle
I0320 20:23:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 20:24:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:24:03.409810  543705 memory.go:184] no items to output this cycle
I0320 20:24:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 20:24:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:24:13.409788  543705 memory.go:191] Add success.
I0320 20:24:13.409808  543705 cpu.go:282] Add success.
W0320 20:24:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:24:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:24:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:24:13.420042  543705 net.go:648] Add success.
I0320 20:24:13.422869  543705 net.go:770] primary dev: ETH0
I0320 20:24:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:24:13.422894  543705 net.go:698] Add success.
I0320 20:24:13.487666  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"085f606c-7d8f-4d7c-abe2-9b82f5d79d90","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:24:13.487701  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:24:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:24:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:24:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 20:24:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:24:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 20:24:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:24:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:24:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:24:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:24:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:24:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:24:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:24:23.409769  543705 memory.go:184] no items to output this cycle
I0320 20:24:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 20:24:25.973684  543705 disk_info.go:125] begin check local disk info of client
I0320 20:24:25.976180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:24:25.976186  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039fdc0 0xc00039fe00]
E0320 20:24:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:24:33.409773  543705 memory.go:184] no items to output this cycle
I0320 20:24:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 20:24:38.613733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:24:38.613740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:24:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:24:43.410529  543705 memory.go:191] Add success.
I0320 20:24:43.409793  543705 cpu.go:282] Add success.
I0320 20:24:43.420314  543705 net.go:648] Add success.
I0320 20:24:43.422803  543705 net.go:770] primary dev: ETH0
I0320 20:24:43.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:24:43.422828  543705 net.go:698] Add success.
I0320 20:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:24:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:24:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:24:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:24:53.409771  543705 memory.go:184] no items to output this cycle
I0320 20:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 20:25:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:25:03.409785  543705 memory.go:184] no items to output this cycle
I0320 20:25:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 20:25:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:25:13.409799  543705 cpu.go:282] Add success.
I0320 20:25:13.409807  543705 memory.go:191] Add success.
W0320 20:25:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:25:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:25:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:25:13.420158  543705 net.go:648] Add success.
I0320 20:25:13.422894  543705 net.go:770] primary dev: ETH0
I0320 20:25:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:25:13.422919  543705 net.go:698] Add success.
I0320 20:25:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:25:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:25:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 20:25:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:25:14.456529  543705 disk_worker.go:494] system disk:vda1
I0320 20:25:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:25:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:25:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:25:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:25:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:25:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:25:23.409806  543705 memory.go:184] no items to output this cycle
I0320 20:25:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 20:25:25.977671  543705 disk_info.go:125] begin check local disk info of client
I0320 20:25:25.980120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:25:25.980127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6340 0xc0004a6380]
E0320 20:25:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:25:33.409777  543705 memory.go:184] no items to output this cycle
I0320 20:25:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 20:25:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:25:43.409792  543705 memory.go:191] Add success.
I0320 20:25:43.409806  543705 cpu.go:282] Add success.
I0320 20:25:43.419928  543705 net.go:648] Add success.
I0320 20:25:43.422509  543705 net.go:770] primary dev: ETH0
I0320 20:25:43.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:25:43.422533  543705 net.go:698] Add success.
I0320 20:25:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:25:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:25:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:25:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:25:53.409801  543705 memory.go:184] no items to output this cycle
I0320 20:25:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:26:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:26:03.409812  543705 memory.go:184] no items to output this cycle
I0320 20:26:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 20:26:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:26:13.409787  543705 memory.go:191] Add success.
W0320 20:26:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 20:26:13.409816  543705 cpu.go:282] Add success.
W0320 20:26:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:26:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:26:13.420156  543705 net.go:648] Add success.
I0320 20:26:13.422849  543705 net.go:770] primary dev: ETH0
I0320 20:26:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:26:13.422878  543705 net.go:698] Add success.
I0320 20:26:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:26:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:26:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 20:26:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:26:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 20:26:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:26:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:26:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:26:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:26:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:26:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:26:23.409774  543705 memory.go:184] no items to output this cycle
I0320 20:26:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 20:26:25.981674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:26:25.984179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:26:25.984185  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba340 0xc0003ba380]
E0320 20:26:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:26:33.409792  543705 memory.go:184] no items to output this cycle
I0320 20:26:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 20:26:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:26:43.409793  543705 memory.go:191] Add success.
I0320 20:26:43.409808  543705 cpu.go:282] Add success.
I0320 20:26:43.419876  543705 net.go:648] Add success.
I0320 20:26:43.422673  543705 net.go:770] primary dev: ETH0
I0320 20:26:43.422687  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:26:43.422700  543705 net.go:698] Add success.
I0320 20:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:26:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:26:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:26:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:26:53.409785  543705 memory.go:184] no items to output this cycle
I0320 20:26:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 20:27:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:27:03.409807  543705 memory.go:184] no items to output this cycle
I0320 20:27:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 20:27:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:27:13.409785  543705 memory.go:191] Add success.
I0320 20:27:13.409808  543705 cpu.go:282] Add success.
W0320 20:27:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:27:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:27:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:27:13.420204  543705 net.go:648] Add success.
I0320 20:27:13.429063  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 20:27:13.429137  543705 net.go:770] primary dev: ETH0
I0320 20:27:13.429150  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:27:13.429169  543705 net.go:698] Add success.
I0320 20:27:13.453717  543705 event_worker.go:152] Polling the log file for events...
I0320 20:27:13.485743  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"851aef01-b83d-4b58-b80e-7f0e01ce276b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:27:13.485776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 20:27:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:27:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 20:27:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:27:14.456904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:27:14.456913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:27:14.456918  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:27:14.456990  543705 disk_worker.go:494] system disk:vda1
I0320 20:27:14.457018  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:27:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:27:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:27:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:27:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:27:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:27:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:27:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:27:23.409774  543705 memory.go:184] no items to output this cycle
I0320 20:27:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 20:27:25.985673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:27:25.988140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:27:25.988147  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357000 0xc000357040]
E0320 20:27:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:27:33.409770  543705 memory.go:184] no items to output this cycle
I0320 20:27:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 20:27:38.613883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:27:38.613890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:27:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:27:43.410675  543705 memory.go:191] Add success.
I0320 20:27:43.409806  543705 cpu.go:282] Add success.
I0320 20:27:43.420354  543705 net.go:648] Add success.
I0320 20:27:43.422977  543705 net.go:770] primary dev: ETH0
I0320 20:27:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:27:43.423001  543705 net.go:698] Add success.
I0320 20:27:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:27:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:27:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:27:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:27:53.409797  543705 memory.go:184] no items to output this cycle
I0320 20:27:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 20:28:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:28:03.409813  543705 memory.go:184] no items to output this cycle
I0320 20:28:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 20:28:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:28:13.409784  543705 memory.go:191] Add success.
W0320 20:28:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 20:28:13.409814  543705 cpu.go:282] Add success.
W0320 20:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:28:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:28:13.420123  543705 net.go:648] Add success.
I0320 20:28:13.422806  543705 net.go:770] primary dev: ETH0
I0320 20:28:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:28:13.422834  543705 net.go:698] Add success.
I0320 20:28:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:28:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:28:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 20:28:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:28:14.456587  543705 disk_worker.go:494] system disk:vda1
I0320 20:28:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:28:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:28:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:28:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:28:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:28:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:28:23.409779  543705 memory.go:184] no items to output this cycle
I0320 20:28:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 20:28:25.989676  543705 disk_info.go:125] begin check local disk info of client
I0320 20:28:25.992174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:28:25.992180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5d80 0xc0003d5dc0]
E0320 20:28:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:28:33.409800  543705 memory.go:184] no items to output this cycle
I0320 20:28:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 20:28:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:28:43.409793  543705 cpu.go:282] Add success.
I0320 20:28:43.409797  543705 memory.go:191] Add success.
I0320 20:28:43.420051  543705 net.go:648] Add success.
I0320 20:28:43.422830  543705 net.go:770] primary dev: ETH0
I0320 20:28:43.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:28:43.422854  543705 net.go:698] Add success.
I0320 20:28:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:28:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:28:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:28:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:28:53.409798  543705 memory.go:184] no items to output this cycle
I0320 20:28:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:29:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:29:03.409789  543705 cpu.go:275] no items to output this cycle
I0320 20:29:03.409796  543705 memory.go:184] no items to output this cycle
E0320 20:29:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:29:13.409823  543705 memory.go:191] Add success.
I0320 20:29:13.409832  543705 cpu.go:282] Add success.
W0320 20:29:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:29:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:29:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:29:13.420271  543705 net.go:648] Add success.
I0320 20:29:13.422957  543705 net.go:770] primary dev: ETH0
I0320 20:29:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:29:13.422982  543705 net.go:698] Add success.
I0320 20:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:29:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:29:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0320 20:29:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:29:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 20:29:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:29:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:29:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:29:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:29:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:29:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:29:23.409813  543705 memory.go:184] no items to output this cycle
I0320 20:29:23.409824  543705 cpu.go:275] no items to output this cycle
I0320 20:29:25.993673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:29:25.996158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:29:25.996165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5840 0xc0000c5880]
E0320 20:29:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:29:33.409795  543705 memory.go:184] no items to output this cycle
I0320 20:29:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 20:29:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:29:43.409792  543705 cpu.go:282] Add success.
I0320 20:29:43.409802  543705 memory.go:191] Add success.
I0320 20:29:43.419983  543705 net.go:648] Add success.
I0320 20:29:43.422715  543705 net.go:770] primary dev: ETH0
I0320 20:29:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:29:43.422744  543705 net.go:698] Add success.
I0320 20:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:29:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:29:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:29:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:29:53.409774  543705 memory.go:184] no items to output this cycle
I0320 20:29:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 20:30:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:30:03.409796  543705 memory.go:184] no items to output this cycle
I0320 20:30:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:30:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:30:13.409787  543705 memory.go:191] Add success.
I0320 20:30:13.409804  543705 cpu.go:282] Add success.
W0320 20:30:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:30:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:30:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:30:13.420129  543705 net.go:648] Add success.
I0320 20:30:13.422876  543705 net.go:770] primary dev: ETH0
I0320 20:30:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:30:13.422905  543705 net.go:698] Add success.
I0320 20:30:14.203541  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d7efcc6b-e6c7-434a-9767-f99f9375396f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:30:14.203581  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:30:14.453981  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:30:14.454251  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:30:14.454263  543705 disk_worker.go:708] disk space is not compliant
W0320 20:30:14.454267  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:30:14.455819  543705 disk_worker.go:494] system disk:vda1
I0320 20:30:14.455850  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:30:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:30:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:30:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:30:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:30:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:30:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:30:23.409780  543705 memory.go:184] no items to output this cycle
I0320 20:30:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 20:30:25.997675  543705 disk_info.go:125] begin check local disk info of client
I0320 20:30:26.000187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:30:26.000193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d00 0xc0000c4d40]
E0320 20:30:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:30:33.409800  543705 memory.go:184] no items to output this cycle
I0320 20:30:33.409814  543705 cpu.go:275] no items to output this cycle
I0320 20:30:38.614032  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:30:38.614039  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:30:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:30:43.410534  543705 memory.go:191] Add success.
I0320 20:30:43.409824  543705 cpu.go:282] Add success.
I0320 20:30:43.420305  543705 net.go:648] Add success.
I0320 20:30:43.423067  543705 net.go:770] primary dev: ETH0
I0320 20:30:43.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:30:43.423097  543705 net.go:698] Add success.
I0320 20:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:30:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:30:53.409800  543705 memory.go:184] no items to output this cycle
I0320 20:30:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:31:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:31:03.409771  543705 memory.go:184] no items to output this cycle
I0320 20:31:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 20:31:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:31:13.409813  543705 memory.go:191] Add success.
I0320 20:31:13.409816  543705 cpu.go:282] Add success.
W0320 20:31:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:31:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:31:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:31:13.420098  543705 net.go:648] Add success.
I0320 20:31:13.423150  543705 net.go:770] primary dev: ETH0
I0320 20:31:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:31:13.423174  543705 net.go:698] Add success.
I0320 20:31:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:31:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:31:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 20:31:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:31:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 20:31:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:31:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:31:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:31:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:31:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:31:23.409788  543705 memory.go:184] no items to output this cycle
I0320 20:31:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 20:31:26.001680  543705 disk_info.go:125] begin check local disk info of client
I0320 20:31:26.004183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:31:26.004188  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340d80 0xc000340dc0]
E0320 20:31:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:31:33.409782  543705 cpu.go:275] no items to output this cycle
I0320 20:31:33.409786  543705 memory.go:184] no items to output this cycle
E0320 20:31:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:31:43.409825  543705 memory.go:191] Add success.
I0320 20:31:43.409826  543705 cpu.go:282] Add success.
I0320 20:31:43.419956  543705 net.go:648] Add success.
I0320 20:31:43.422554  543705 net.go:770] primary dev: ETH0
I0320 20:31:43.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:31:43.422583  543705 net.go:698] Add success.
I0320 20:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:31:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:31:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:31:53.409807  543705 memory.go:184] no items to output this cycle
I0320 20:31:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 20:32:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:32:03.409795  543705 memory.go:184] no items to output this cycle
I0320 20:32:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 20:32:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:32:13.409801  543705 memory.go:191] Add success.
I0320 20:32:13.409804  543705 cpu.go:282] Add success.
W0320 20:32:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:32:13.412395  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:32:13.412400  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:32:13.420045  543705 net.go:648] Add success.
I0320 20:32:13.421802  543705 net.go:770] primary dev: ETH0
I0320 20:32:13.421815  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:32:13.421827  543705 net.go:698] Add success.
W0320 20:32:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:32:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 20:32:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:32:14.455936  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:32:14.455944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:32:14.455950  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:32:14.456543  543705 disk_worker.go:494] system disk:vda1
I0320 20:32:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:32:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:32:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:32:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:32:16.457991  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:32:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:32:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:32:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:32:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:32:23.409777  543705 memory.go:184] no items to output this cycle
I0320 20:32:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 20:32:26.005672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:32:26.008182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:32:26.008188  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e500 0xc00035e540]
E0320 20:32:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:32:33.409806  543705 memory.go:184] no items to output this cycle
I0320 20:32:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 20:32:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:32:43.409802  543705 memory.go:191] Add success.
I0320 20:32:43.409806  543705 cpu.go:282] Add success.
I0320 20:32:43.420196  543705 net.go:648] Add success.
I0320 20:32:43.423143  543705 net.go:770] primary dev: ETH0
I0320 20:32:43.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:32:43.423195  543705 net.go:698] Add success.
I0320 20:32:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:32:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:32:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:32:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:32:53.409786  543705 memory.go:184] no items to output this cycle
I0320 20:32:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 20:33:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:33:03.409775  543705 memory.go:184] no items to output this cycle
I0320 20:33:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 20:33:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:33:13.409798  543705 memory.go:191] Add success.
I0320 20:33:13.409804  543705 cpu.go:282] Add success.
W0320 20:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:33:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:33:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:33:13.420066  543705 net.go:648] Add success.
I0320 20:33:13.423061  543705 net.go:770] primary dev: ETH0
I0320 20:33:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:33:13.423091  543705 net.go:698] Add success.
I0320 20:33:13.471757  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f44fee3a-1f46-4a63-8da4-88f1d01de75d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:33:13.471792  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:33:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:33:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:33:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0320 20:33:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:33:14.456614  543705 disk_worker.go:494] system disk:vda1
I0320 20:33:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:33:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:33:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:33:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:33:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:33:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:33:23.409801  543705 memory.go:184] no items to output this cycle
I0320 20:33:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 20:33:26.009673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:33:26.012224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:33:26.012229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390580 0xc0003905c0]
E0320 20:33:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:33:33.409808  543705 memory.go:184] no items to output this cycle
I0320 20:33:33.409808  543705 cpu.go:275] no items to output this cycle
I0320 20:33:38.614180  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:33:38.614187  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:33:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:33:43.410747  543705 memory.go:191] Add success.
I0320 20:33:43.409816  543705 cpu.go:282] Add success.
I0320 20:33:43.420695  543705 net.go:648] Add success.
I0320 20:33:43.423699  543705 net.go:770] primary dev: ETH0
I0320 20:33:43.423714  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:33:43.423727  543705 net.go:698] Add success.
I0320 20:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:33:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:33:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:33:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:33:53.409785  543705 memory.go:184] no items to output this cycle
I0320 20:33:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 20:34:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:34:03.409814  543705 memory.go:184] no items to output this cycle
I0320 20:34:03.409832  543705 cpu.go:275] no items to output this cycle
E0320 20:34:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:34:13.409808  543705 memory.go:191] Add success.
I0320 20:34:13.409809  543705 cpu.go:282] Add success.
W0320 20:34:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:34:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:34:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:34:13.420195  543705 net.go:648] Add success.
I0320 20:34:13.423064  543705 net.go:770] primary dev: ETH0
I0320 20:34:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:34:13.423088  543705 net.go:698] Add success.
I0320 20:34:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:34:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:34:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 20:34:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:34:14.456632  543705 disk_worker.go:494] system disk:vda1
I0320 20:34:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:34:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:34:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:34:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:34:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:34:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:34:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:34:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 20:34:23.409788  543705 memory.go:184] no items to output this cycle
I0320 20:34:26.013675  543705 disk_info.go:125] begin check local disk info of client
I0320 20:34:26.016150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:34:26.016156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470640 0xc000470680]
E0320 20:34:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:34:33.409775  543705 memory.go:184] no items to output this cycle
I0320 20:34:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 20:34:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:34:43.409799  543705 memory.go:191] Add success.
I0320 20:34:43.409802  543705 cpu.go:282] Add success.
I0320 20:34:43.420289  543705 net.go:648] Add success.
I0320 20:34:43.423016  543705 net.go:770] primary dev: ETH0
I0320 20:34:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:34:43.423045  543705 net.go:698] Add success.
I0320 20:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:34:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:34:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:34:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:34:53.409776  543705 memory.go:184] no items to output this cycle
I0320 20:34:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 20:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:35:03.409784  543705 memory.go:184] no items to output this cycle
I0320 20:35:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 20:35:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:35:13.409789  543705 memory.go:191] Add success.
W0320 20:35:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:35:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:35:13.409826  543705 cpu.go:282] Add success.
I0320 20:35:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:35:13.420331  543705 net.go:648] Add success.
I0320 20:35:13.423096  543705 net.go:770] primary dev: ETH0
I0320 20:35:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:35:13.423127  543705 net.go:698] Add success.
I0320 20:35:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:35:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:35:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 20:35:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:35:14.456524  543705 disk_worker.go:494] system disk:vda1
I0320 20:35:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:35:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:35:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:35:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:35:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:35:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:35:23.409795  543705 memory.go:184] no items to output this cycle
I0320 20:35:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 20:35:26.017669  543705 disk_info.go:125] begin check local disk info of client
I0320 20:35:26.020143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:35:26.020149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d99c0 0xc0003d9a00]
E0320 20:35:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:35:33.409769  543705 memory.go:184] no items to output this cycle
I0320 20:35:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 20:35:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:35:43.409817  543705 memory.go:191] Add success.
I0320 20:35:43.409826  543705 cpu.go:282] Add success.
I0320 20:35:43.420065  543705 net.go:648] Add success.
I0320 20:35:43.422825  543705 net.go:770] primary dev: ETH0
I0320 20:35:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:35:43.422854  543705 net.go:698] Add success.
I0320 20:35:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:35:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:35:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:35:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:35:53.409799  543705 memory.go:184] no items to output this cycle
I0320 20:35:53.409812  543705 cpu.go:275] no items to output this cycle
E0320 20:36:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:36:03.409779  543705 memory.go:184] no items to output this cycle
I0320 20:36:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 20:36:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:36:13.409801  543705 memory.go:191] Add success.
I0320 20:36:13.409805  543705 cpu.go:282] Add success.
W0320 20:36:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:36:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:36:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:36:13.420206  543705 net.go:648] Add success.
I0320 20:36:13.423098  543705 net.go:770] primary dev: ETH0
I0320 20:36:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:36:13.423124  543705 net.go:698] Add success.
I0320 20:36:13.566317  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"389539af-c54b-4cc3-be64-df4f83e8cbc1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:36:13.566352  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:36:14.454673  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:36:14.454828  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:36:14.454889  543705 disk_worker.go:708] disk space is not compliant
W0320 20:36:14.454892  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:36:14.456248  543705 disk_worker.go:494] system disk:vda1
I0320 20:36:14.456280  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:36:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:36:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:36:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:36:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:36:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:36:23.409782  543705 memory.go:184] no items to output this cycle
I0320 20:36:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 20:36:26.021675  543705 disk_info.go:125] begin check local disk info of client
I0320 20:36:26.024115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:36:26.024122  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266a80 0xc000266ac0]
E0320 20:36:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:36:33.409781  543705 memory.go:184] no items to output this cycle
I0320 20:36:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 20:36:38.614328  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:36:38.614334  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:36:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:36:43.410692  543705 memory.go:191] Add success.
I0320 20:36:43.409822  543705 cpu.go:282] Add success.
I0320 20:36:43.420430  543705 net.go:648] Add success.
I0320 20:36:43.423244  543705 net.go:770] primary dev: ETH0
I0320 20:36:43.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:36:43.423272  543705 net.go:698] Add success.
I0320 20:36:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:36:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:36:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:36:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:36:53.409780  543705 memory.go:184] no items to output this cycle
I0320 20:36:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 20:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:37:03.409785  543705 memory.go:184] no items to output this cycle
I0320 20:37:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 20:37:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:37:13.409816  543705 memory.go:191] Add success.
I0320 20:37:13.409824  543705 cpu.go:282] Add success.
W0320 20:37:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:37:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:37:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:37:13.420132  543705 net.go:648] Add success.
I0320 20:37:13.422963  543705 net.go:770] primary dev: ETH0
I0320 20:37:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:37:13.422988  543705 net.go:698] Add success.
I0320 20:37:13.453515  543705 event_worker.go:152] Polling the log file for events...
W0320 20:37:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:37:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 20:37:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:37:14.456922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:37:14.456931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:37:14.456937  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:37:14.457006  543705 disk_worker.go:494] system disk:vda1
I0320 20:37:14.457037  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:37:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:37:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:37:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:37:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:37:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:37:16.458148  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:37:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:37:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:37:23.409773  543705 memory.go:184] no items to output this cycle
I0320 20:37:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 20:37:26.025672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:37:26.028103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:37:26.028109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad40 0xc0001aad80]
E0320 20:37:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:37:33.409774  543705 memory.go:184] no items to output this cycle
I0320 20:37:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 20:37:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:37:43.409816  543705 memory.go:191] Add success.
I0320 20:37:43.409827  543705 cpu.go:282] Add success.
I0320 20:37:43.420017  543705 net.go:648] Add success.
I0320 20:37:43.422798  543705 net.go:770] primary dev: ETH0
I0320 20:37:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:37:43.422827  543705 net.go:698] Add success.
I0320 20:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:37:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:37:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:37:53.409801  543705 memory.go:184] no items to output this cycle
I0320 20:37:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 20:38:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:38:03.409805  543705 memory.go:184] no items to output this cycle
I0320 20:38:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 20:38:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:38:13.409820  543705 memory.go:191] Add success.
I0320 20:38:13.409821  543705 cpu.go:282] Add success.
W0320 20:38:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:38:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:38:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:38:13.420196  543705 net.go:648] Add success.
I0320 20:38:13.422948  543705 net.go:770] primary dev: ETH0
I0320 20:38:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:38:13.422975  543705 net.go:698] Add success.
I0320 20:38:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:38:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:38:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 20:38:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:38:14.456523  543705 disk_worker.go:494] system disk:vda1
I0320 20:38:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:38:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:38:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:38:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:38:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:38:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:38:23.409797  543705 memory.go:184] no items to output this cycle
I0320 20:38:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 20:38:26.029671  543705 disk_info.go:125] begin check local disk info of client
I0320 20:38:26.032124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:38:26.032130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b0140 0xc0004b0180]
E0320 20:38:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:38:33.409777  543705 memory.go:184] no items to output this cycle
I0320 20:38:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 20:38:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:38:43.409791  543705 memory.go:191] Add success.
I0320 20:38:43.409792  543705 cpu.go:282] Add success.
I0320 20:38:43.419893  543705 net.go:648] Add success.
I0320 20:38:43.422494  543705 net.go:770] primary dev: ETH0
I0320 20:38:43.422508  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:38:43.422522  543705 net.go:698] Add success.
I0320 20:38:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:38:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:38:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:38:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:38:53.409775  543705 memory.go:184] no items to output this cycle
I0320 20:38:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 20:39:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:39:03.409809  543705 memory.go:184] no items to output this cycle
I0320 20:39:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 20:39:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:39:13.409820  543705 memory.go:191] Add success.
I0320 20:39:13.409831  543705 cpu.go:282] Add success.
W0320 20:39:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:39:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:39:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:39:13.420133  543705 net.go:648] Add success.
I0320 20:39:13.423091  543705 net.go:770] primary dev: ETH0
I0320 20:39:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:39:13.423127  543705 net.go:698] Add success.
I0320 20:39:13.469600  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"19362160-2ce4-41c3-a40c-2e7285775958","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:39:13.469634  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:39:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:39:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:39:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 20:39:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:39:14.456548  543705 disk_worker.go:494] system disk:vda1
I0320 20:39:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:39:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:39:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:39:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:39:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:39:23.409785  543705 memory.go:184] no items to output this cycle
I0320 20:39:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 20:39:26.033670  543705 disk_info.go:125] begin check local disk info of client
I0320 20:39:26.036156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:39:26.036162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003813c0 0xc000381400]
E0320 20:39:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:39:33.409793  543705 memory.go:184] no items to output this cycle
I0320 20:39:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 20:39:38.614779  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:39:38.614785  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:39:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:39:43.410673  543705 memory.go:191] Add success.
I0320 20:39:43.409816  543705 cpu.go:282] Add success.
I0320 20:39:43.420427  543705 net.go:648] Add success.
I0320 20:39:43.423387  543705 net.go:770] primary dev: ETH0
I0320 20:39:43.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:39:43.423412  543705 net.go:698] Add success.
I0320 20:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:39:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:39:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:39:53.409769  543705 memory.go:184] no items to output this cycle
I0320 20:39:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 20:40:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:40:03.409778  543705 memory.go:184] no items to output this cycle
I0320 20:40:03.409787  543705 cpu.go:275] no items to output this cycle
W0320 20:40:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:40:13.409728  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:40:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:40:13.409804  543705 cpu.go:282] Add success.
E0320 20:40:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:40:13.409829  543705 memory.go:191] Add success.
I0320 20:40:13.420546  543705 net.go:648] Add success.
I0320 20:40:13.423338  543705 net.go:770] primary dev: ETH0
I0320 20:40:13.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:40:13.423368  543705 net.go:698] Add success.
I0320 20:40:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:40:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:40:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 20:40:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:40:14.456608  543705 disk_worker.go:494] system disk:vda1
I0320 20:40:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:40:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:40:16.458019  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:40:16.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:40:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:40:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:40:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:40:23.409776  543705 memory.go:184] no items to output this cycle
I0320 20:40:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 20:40:26.037673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:40:26.040146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:40:26.040152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471a80 0xc000471ac0]
I0320 20:40:33.409777  543705 cpu.go:275] no items to output this cycle
E0320 20:40:33.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:40:33.409873  543705 memory.go:184] no items to output this cycle
E0320 20:40:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:40:43.409780  543705 memory.go:191] Add success.
I0320 20:40:43.409803  543705 cpu.go:282] Add success.
I0320 20:40:43.419903  543705 net.go:648] Add success.
I0320 20:40:43.422681  543705 net.go:770] primary dev: ETH0
I0320 20:40:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:40:43.422727  543705 net.go:698] Add success.
I0320 20:40:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:40:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:40:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:40:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:40:53.409800  543705 memory.go:184] no items to output this cycle
I0320 20:40:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 20:41:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:41:03.409789  543705 memory.go:184] no items to output this cycle
I0320 20:41:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 20:41:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:41:13.409795  543705 memory.go:191] Add success.
I0320 20:41:13.409796  543705 cpu.go:282] Add success.
W0320 20:41:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:41:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:41:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:41:13.420388  543705 net.go:648] Add success.
I0320 20:41:13.423160  543705 net.go:770] primary dev: ETH0
I0320 20:41:13.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:41:13.423185  543705 net.go:698] Add success.
I0320 20:41:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:41:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:41:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 20:41:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:41:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 20:41:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:41:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:41:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:41:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:41:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:41:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:41:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:41:23.409769  543705 memory.go:184] no items to output this cycle
I0320 20:41:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 20:41:26.041672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:41:26.044151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:41:26.044157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368a80 0xc000368ac0]
E0320 20:41:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:41:33.409800  543705 memory.go:184] no items to output this cycle
I0320 20:41:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 20:41:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:41:43.409784  543705 memory.go:191] Add success.
I0320 20:41:43.409809  543705 cpu.go:282] Add success.
I0320 20:41:43.420014  543705 net.go:648] Add success.
I0320 20:41:43.422925  543705 net.go:770] primary dev: ETH0
I0320 20:41:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:41:43.422959  543705 net.go:698] Add success.
I0320 20:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:41:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:41:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:41:53.409801  543705 memory.go:184] no items to output this cycle
I0320 20:41:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:42:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:42:03.409786  543705 memory.go:184] no items to output this cycle
I0320 20:42:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 20:42:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:42:13.409795  543705 memory.go:191] Add success.
W0320 20:42:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 20:42:13.409828  543705 cpu.go:282] Add success.
W0320 20:42:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:42:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:42:13.420141  543705 net.go:648] Add success.
I0320 20:42:13.422953  543705 net.go:770] primary dev: ETH0
I0320 20:42:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:42:13.422978  543705 net.go:698] Add success.
I0320 20:42:13.470082  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b148ddac-b6a3-4d68-878d-35661d88f438","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:42:13.470114  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 20:42:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:42:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 20:42:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:42:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:42:14.455930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:42:14.455935  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:42:14.456592  543705 disk_worker.go:494] system disk:vda1
I0320 20:42:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:42:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:42:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:42:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:42:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:42:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:42:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:42:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:42:23.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:42:23.410384  543705 memory.go:184] no items to output this cycle
I0320 20:42:23.410411  543705 cpu.go:275] no items to output this cycle
I0320 20:42:26.045673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:42:26.048116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:42:26.048122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002adb80 0xc0002adbc0]
E0320 20:42:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:42:33.409779  543705 memory.go:184] no items to output this cycle
I0320 20:42:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 20:42:38.615775  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:42:38.615782  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:42:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:42:43.410644  543705 memory.go:191] Add success.
I0320 20:42:43.409822  543705 cpu.go:282] Add success.
I0320 20:42:43.420385  543705 net.go:648] Add success.
I0320 20:42:43.422935  543705 net.go:770] primary dev: ETH0
I0320 20:42:43.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:42:43.422965  543705 net.go:698] Add success.
I0320 20:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:42:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:42:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:42:53.409764  543705 memory.go:184] no items to output this cycle
I0320 20:42:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 20:43:03.410300  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:43:03.410321  543705 memory.go:184] no items to output this cycle
I0320 20:43:03.410336  543705 cpu.go:275] no items to output this cycle
E0320 20:43:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:43:13.409821  543705 memory.go:191] Add success.
I0320 20:43:13.409836  543705 cpu.go:282] Add success.
W0320 20:43:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:43:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:43:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:43:13.420184  543705 net.go:648] Add success.
I0320 20:43:13.422716  543705 net.go:770] primary dev: ETH0
I0320 20:43:13.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:43:13.422744  543705 net.go:698] Add success.
I0320 20:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:43:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:43:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 20:43:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:43:14.456605  543705 disk_worker.go:494] system disk:vda1
I0320 20:43:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:43:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:43:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:43:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:43:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:43:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:43:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:43:23.409795  543705 memory.go:184] no items to output this cycle
I0320 20:43:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 20:43:26.049670  543705 disk_info.go:125] begin check local disk info of client
I0320 20:43:26.052148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:43:26.052154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b3600 0xc0004b3640]
E0320 20:43:33.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:43:33.409887  543705 memory.go:184] no items to output this cycle
I0320 20:43:33.409967  543705 cpu.go:275] no items to output this cycle
E0320 20:43:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:43:43.409830  543705 memory.go:191] Add success.
I0320 20:43:43.409840  543705 cpu.go:282] Add success.
I0320 20:43:43.420127  543705 net.go:648] Add success.
I0320 20:43:43.422823  543705 net.go:770] primary dev: ETH0
I0320 20:43:43.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:43:43.422848  543705 net.go:698] Add success.
I0320 20:43:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:43:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:43:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:43:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:43:53.409817  543705 memory.go:184] no items to output this cycle
I0320 20:43:53.409821  543705 cpu.go:275] no items to output this cycle
E0320 20:44:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:44:03.409792  543705 memory.go:184] no items to output this cycle
I0320 20:44:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 20:44:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:44:13.409808  543705 memory.go:191] Add success.
I0320 20:44:13.409823  543705 cpu.go:282] Add success.
W0320 20:44:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:44:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:44:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:44:13.420160  543705 net.go:648] Add success.
I0320 20:44:13.423577  543705 net.go:770] primary dev: ETH0
I0320 20:44:13.423592  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:44:13.423607  543705 net.go:698] Add success.
I0320 20:44:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:44:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:44:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 20:44:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:44:14.456510  543705 disk_worker.go:494] system disk:vda1
I0320 20:44:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:44:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:44:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:44:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:44:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:44:23.409768  543705 memory.go:184] no items to output this cycle
I0320 20:44:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 20:44:26.053677  543705 disk_info.go:125] begin check local disk info of client
I0320 20:44:26.056131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:44:26.056137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f0100 0xc0003f0140]
I0320 20:44:33.409922  543705 cpu.go:275] no items to output this cycle
E0320 20:44:33.409942  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:44:33.409960  543705 memory.go:184] no items to output this cycle
E0320 20:44:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:44:43.409826  543705 memory.go:191] Add success.
I0320 20:44:43.409829  543705 cpu.go:282] Add success.
I0320 20:44:43.419882  543705 net.go:648] Add success.
I0320 20:44:43.423206  543705 net.go:770] primary dev: ETH0
I0320 20:44:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:44:43.423230  543705 net.go:698] Add success.
I0320 20:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:44:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:44:53.409809  543705 memory.go:184] no items to output this cycle
I0320 20:44:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 20:45:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:45:03.409781  543705 memory.go:184] no items to output this cycle
I0320 20:45:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 20:45:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:45:13.409817  543705 memory.go:191] Add success.
I0320 20:45:13.409825  543705 cpu.go:282] Add success.
W0320 20:45:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:45:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:45:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:45:13.420177  543705 net.go:648] Add success.
I0320 20:45:13.423181  543705 net.go:770] primary dev: ETH0
I0320 20:45:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:45:13.423210  543705 net.go:698] Add success.
I0320 20:45:13.469072  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5884ec59-4fc0-4b26-ad88-46b5a8c9792e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:45:13.469105  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:45:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:45:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:45:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 20:45:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:45:14.456605  543705 disk_worker.go:494] system disk:vda1
I0320 20:45:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:45:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:45:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:45:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:45:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:45:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:45:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:45:23.409766  543705 memory.go:184] no items to output this cycle
I0320 20:45:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 20:45:26.057674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:45:26.060165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:45:26.060171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8900 0xc0004d8940]
E0320 20:45:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:45:33.409909  543705 memory.go:184] no items to output this cycle
I0320 20:45:33.409978  543705 cpu.go:275] no items to output this cycle
I0320 20:45:38.616783  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:45:38.616789  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:45:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:45:43.410744  543705 memory.go:191] Add success.
I0320 20:45:43.409828  543705 cpu.go:282] Add success.
I0320 20:45:43.420554  543705 net.go:648] Add success.
I0320 20:45:43.423420  543705 net.go:770] primary dev: ETH0
I0320 20:45:43.423432  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:45:43.423444  543705 net.go:698] Add success.
I0320 20:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:45:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:45:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:45:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:45:53.409798  543705 memory.go:184] no items to output this cycle
I0320 20:45:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 20:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:46:03.409783  543705 memory.go:184] no items to output this cycle
I0320 20:46:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 20:46:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:46:13.409818  543705 memory.go:191] Add success.
I0320 20:46:13.409829  543705 cpu.go:282] Add success.
W0320 20:46:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:46:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:46:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:46:13.420187  543705 net.go:648] Add success.
I0320 20:46:13.422926  543705 net.go:770] primary dev: ETH0
I0320 20:46:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:46:13.422966  543705 net.go:698] Add success.
I0320 20:46:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:46:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:46:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0320 20:46:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:46:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 20:46:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:46:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:46:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:46:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:46:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:46:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:46:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:46:23.409775  543705 memory.go:184] no items to output this cycle
I0320 20:46:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 20:46:26.061671  543705 disk_info.go:125] begin check local disk info of client
I0320 20:46:26.064124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:46:26.064131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6800 0xc0004a6840]
I0320 20:46:33.409924  543705 cpu.go:275] no items to output this cycle
E0320 20:46:33.409993  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:46:33.410016  543705 memory.go:184] no items to output this cycle
E0320 20:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:46:43.409791  543705 memory.go:191] Add success.
I0320 20:46:43.409793  543705 cpu.go:282] Add success.
I0320 20:46:43.420015  543705 net.go:648] Add success.
I0320 20:46:43.422672  543705 net.go:770] primary dev: ETH0
I0320 20:46:43.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:46:43.422697  543705 net.go:698] Add success.
I0320 20:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:46:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:46:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:46:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:46:53.409770  543705 memory.go:184] no items to output this cycle
I0320 20:46:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 20:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:47:03.409785  543705 memory.go:184] no items to output this cycle
I0320 20:47:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 20:47:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:47:13.409794  543705 memory.go:191] Add success.
I0320 20:47:13.409798  543705 cpu.go:282] Add success.
W0320 20:47:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:47:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:47:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:47:13.420082  543705 net.go:648] Add success.
I0320 20:47:13.422923  543705 net.go:770] primary dev: ETH0
I0320 20:47:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:47:13.422949  543705 net.go:698] Add success.
I0320 20:47:13.453495  543705 event_worker.go:152] Polling the log file for events...
W0320 20:47:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:47:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 20:47:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:47:14.456970  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:47:14.456979  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:47:14.456985  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:47:14.457028  543705 disk_worker.go:494] system disk:vda1
I0320 20:47:14.457068  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:47:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:47:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:47:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:47:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:47:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:47:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:47:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:47:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:47:23.409794  543705 memory.go:184] no items to output this cycle
I0320 20:47:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 20:47:26.065677  543705 disk_info.go:125] begin check local disk info of client
I0320 20:47:26.068117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:47:26.068124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae500 0xc0003ae540]
E0320 20:47:33.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:47:33.409874  543705 memory.go:184] no items to output this cycle
I0320 20:47:33.409878  543705 cpu.go:275] no items to output this cycle
E0320 20:47:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:47:43.409786  543705 memory.go:191] Add success.
I0320 20:47:43.409797  543705 cpu.go:282] Add success.
I0320 20:47:43.419895  543705 net.go:648] Add success.
I0320 20:47:43.422473  543705 net.go:770] primary dev: ETH0
I0320 20:47:43.422486  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:47:43.422497  543705 net.go:698] Add success.
I0320 20:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:47:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:47:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:47:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:47:53.409798  543705 memory.go:184] no items to output this cycle
I0320 20:47:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 20:48:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:48:03.409784  543705 memory.go:184] no items to output this cycle
I0320 20:48:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 20:48:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:48:13.409825  543705 memory.go:191] Add success.
I0320 20:48:13.409828  543705 cpu.go:282] Add success.
W0320 20:48:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:48:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:48:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:48:13.420333  543705 net.go:648] Add success.
I0320 20:48:13.423623  543705 net.go:770] primary dev: ETH0
I0320 20:48:13.423639  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:48:13.423653  543705 net.go:698] Add success.
I0320 20:48:13.469289  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ff82b88-bdba-46e1-8430-3fa0b99b1cf3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:48:13.469322  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:48:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:48:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:48:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 20:48:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:48:14.456757  543705 disk_worker.go:494] system disk:vda1
I0320 20:48:14.456795  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:48:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:48:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:48:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:48:23.409776  543705 memory.go:184] no items to output this cycle
I0320 20:48:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 20:48:26.069673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:48:26.072181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:48:26.072188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fd480 0xc0004fd4c0]
E0320 20:48:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:48:33.409799  543705 memory.go:184] no items to output this cycle
I0320 20:48:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 20:48:38.617733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:48:38.617739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:48:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:48:43.410678  543705 memory.go:191] Add success.
I0320 20:48:43.409797  543705 cpu.go:282] Add success.
I0320 20:48:43.420362  543705 net.go:648] Add success.
I0320 20:48:43.423065  543705 net.go:770] primary dev: ETH0
I0320 20:48:43.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:48:43.423090  543705 net.go:698] Add success.
I0320 20:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:48:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:48:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:48:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:48:53.409802  543705 memory.go:184] no items to output this cycle
I0320 20:48:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 20:49:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:49:03.409798  543705 memory.go:184] no items to output this cycle
I0320 20:49:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 20:49:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:49:13.409838  543705 memory.go:191] Add success.
I0320 20:49:13.409848  543705 cpu.go:282] Add success.
W0320 20:49:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:49:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:49:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:49:13.420202  543705 net.go:648] Add success.
I0320 20:49:13.423081  543705 net.go:770] primary dev: ETH0
I0320 20:49:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:49:13.423109  543705 net.go:698] Add success.
I0320 20:49:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:49:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:49:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 20:49:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:49:14.456531  543705 disk_worker.go:494] system disk:vda1
I0320 20:49:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:49:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:49:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:49:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:49:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:49:16.472094  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:49:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:49:23.409788  543705 memory.go:184] no items to output this cycle
I0320 20:49:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 20:49:26.073679  543705 disk_info.go:125] begin check local disk info of client
I0320 20:49:26.076122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:49:26.076128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b80 0xc0000c5bc0]
E0320 20:49:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:49:33.409798  543705 memory.go:184] no items to output this cycle
I0320 20:49:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 20:49:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:49:43.409791  543705 memory.go:191] Add success.
I0320 20:49:43.409808  543705 cpu.go:282] Add success.
I0320 20:49:43.419866  543705 net.go:648] Add success.
I0320 20:49:43.423453  543705 net.go:770] primary dev: ETH0
I0320 20:49:43.423467  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:49:43.423482  543705 net.go:698] Add success.
I0320 20:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:49:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:49:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:49:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:49:53.409778  543705 memory.go:184] no items to output this cycle
I0320 20:49:53.409777  543705 cpu.go:275] no items to output this cycle
E0320 20:50:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:50:03.409809  543705 memory.go:184] no items to output this cycle
I0320 20:50:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 20:50:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:50:13.409818  543705 memory.go:191] Add success.
I0320 20:50:13.409825  543705 cpu.go:282] Add success.
W0320 20:50:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:50:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:50:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:50:13.420322  543705 net.go:648] Add success.
I0320 20:50:13.423482  543705 net.go:770] primary dev: ETH0
I0320 20:50:13.423495  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:50:13.423507  543705 net.go:698] Add success.
I0320 20:50:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:50:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:50:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 20:50:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:50:14.456522  543705 disk_worker.go:494] system disk:vda1
I0320 20:50:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:50:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:50:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:50:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:50:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:50:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:50:23.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:50:23.409840  543705 memory.go:184] no items to output this cycle
I0320 20:50:23.409909  543705 cpu.go:275] no items to output this cycle
I0320 20:50:26.077679  543705 disk_info.go:125] begin check local disk info of client
I0320 20:50:26.080126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:50:26.080133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa840 0xc0001fa880]
E0320 20:50:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:50:33.409779  543705 memory.go:184] no items to output this cycle
I0320 20:50:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 20:50:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:50:43.409823  543705 memory.go:191] Add success.
I0320 20:50:43.409827  543705 cpu.go:282] Add success.
I0320 20:50:43.419897  543705 net.go:648] Add success.
I0320 20:50:43.422736  543705 net.go:770] primary dev: ETH0
I0320 20:50:43.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:50:43.422766  543705 net.go:698] Add success.
I0320 20:50:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:50:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:50:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:50:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:50:53.409794  543705 memory.go:184] no items to output this cycle
I0320 20:50:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 20:51:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:51:03.409772  543705 memory.go:184] no items to output this cycle
I0320 20:51:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 20:51:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:51:13.409818  543705 memory.go:191] Add success.
I0320 20:51:13.409825  543705 cpu.go:282] Add success.
W0320 20:51:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:51:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:51:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:51:13.420135  543705 net.go:648] Add success.
I0320 20:51:13.423166  543705 net.go:770] primary dev: ETH0
I0320 20:51:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:51:13.423196  543705 net.go:698] Add success.
I0320 20:51:13.470110  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31389829-504a-48a1-893d-516900ac640d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:51:13.470145  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:51:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:51:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:51:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 20:51:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:51:14.456602  543705 disk_worker.go:494] system disk:vda1
I0320 20:51:14.456717  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:51:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:51:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:51:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:51:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:51:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:51:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:51:23.409771  543705 memory.go:184] no items to output this cycle
I0320 20:51:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 20:51:26.081675  543705 disk_info.go:125] begin check local disk info of client
I0320 20:51:26.084107  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:51:26.084114  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470d80 0xc000470dc0]
E0320 20:51:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:51:33.409800  543705 memory.go:184] no items to output this cycle
I0320 20:51:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 20:51:38.618797  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:51:38.618804  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:51:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:51:43.410662  543705 memory.go:191] Add success.
I0320 20:51:43.409824  543705 cpu.go:282] Add success.
I0320 20:51:43.420345  543705 net.go:648] Add success.
I0320 20:51:43.423415  543705 net.go:770] primary dev: ETH0
I0320 20:51:43.423439  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:51:43.423453  543705 net.go:698] Add success.
I0320 20:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:51:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:51:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:51:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:51:53.409776  543705 memory.go:184] no items to output this cycle
I0320 20:51:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 20:52:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:52:03.409807  543705 memory.go:184] no items to output this cycle
I0320 20:52:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 20:52:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:52:13.409789  543705 memory.go:191] Add success.
I0320 20:52:13.409814  543705 cpu.go:282] Add success.
W0320 20:52:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:52:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:52:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:52:13.420218  543705 net.go:648] Add success.
I0320 20:52:13.422936  543705 net.go:770] primary dev: ETH0
I0320 20:52:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:52:13.422960  543705 net.go:698] Add success.
W0320 20:52:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:52:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 20:52:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:52:14.455855  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:52:14.455864  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:52:14.455870  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:52:14.456615  543705 disk_worker.go:494] system disk:vda1
I0320 20:52:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:52:15.456873  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:52:15.456882  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:52:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:52:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:52:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:52:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:52:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:52:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:52:23.409768  543705 memory.go:184] no items to output this cycle
I0320 20:52:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 20:52:26.085674  543705 disk_info.go:125] begin check local disk info of client
I0320 20:52:26.088147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:52:26.088154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0320 20:52:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:52:33.409766  543705 memory.go:184] no items to output this cycle
I0320 20:52:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 20:52:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:52:43.409788  543705 memory.go:191] Add success.
I0320 20:52:43.409806  543705 cpu.go:282] Add success.
I0320 20:52:43.419842  543705 net.go:648] Add success.
I0320 20:52:43.422638  543705 net.go:770] primary dev: ETH0
I0320 20:52:43.422654  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:52:43.422669  543705 net.go:698] Add success.
I0320 20:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:52:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:52:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:52:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:52:53.409799  543705 memory.go:184] no items to output this cycle
I0320 20:52:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 20:53:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:53:03.409783  543705 memory.go:184] no items to output this cycle
I0320 20:53:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 20:53:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:53:13.409787  543705 memory.go:191] Add success.
I0320 20:53:13.409792  543705 cpu.go:282] Add success.
W0320 20:53:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:53:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:53:13.420315  543705 net.go:648] Add success.
I0320 20:53:13.422958  543705 net.go:770] primary dev: ETH0
I0320 20:53:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:53:13.422982  543705 net.go:698] Add success.
I0320 20:53:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:53:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:53:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 20:53:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:53:14.456544  543705 disk_worker.go:494] system disk:vda1
I0320 20:53:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:53:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:53:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:53:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:53:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:53:23.409797  543705 memory.go:184] no items to output this cycle
I0320 20:53:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 20:53:26.089672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:53:26.092256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:53:26.092262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbbc0 0xc0001fbc00]
E0320 20:53:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:53:33.409765  543705 memory.go:184] no items to output this cycle
I0320 20:53:33.409795  543705 cpu.go:275] no items to output this cycle
E0320 20:53:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:53:43.409815  543705 memory.go:191] Add success.
I0320 20:53:43.409818  543705 cpu.go:282] Add success.
I0320 20:53:43.419954  543705 net.go:648] Add success.
I0320 20:53:43.422674  543705 net.go:770] primary dev: ETH0
I0320 20:53:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:53:43.422704  543705 net.go:698] Add success.
I0320 20:53:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:53:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:53:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:53:53.409815  543705 memory.go:184] no items to output this cycle
I0320 20:53:53.409825  543705 cpu.go:275] no items to output this cycle
E0320 20:54:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:54:03.409792  543705 memory.go:184] no items to output this cycle
I0320 20:54:03.409795  543705 cpu.go:275] no items to output this cycle
W0320 20:54:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:54:13.409731  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:54:13.409736  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 20:54:13.409940  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:54:13.409964  543705 memory.go:191] Add success.
I0320 20:54:13.409973  543705 cpu.go:282] Add success.
I0320 20:54:13.419713  543705 net.go:648] Add success.
I0320 20:54:13.422389  543705 net.go:770] primary dev: ETH0
I0320 20:54:13.422402  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:54:13.422413  543705 net.go:698] Add success.
I0320 20:54:13.468551  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00d99631-3db6-4f6f-8bdd-e334fa8f36b6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:54:13.468582  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 20:54:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:54:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:54:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 20:54:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:54:14.456678  543705 disk_worker.go:494] system disk:vda1
I0320 20:54:14.456734  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:54:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:54:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:54:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:54:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:54:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:54:23.409807  543705 memory.go:184] no items to output this cycle
I0320 20:54:23.409819  543705 cpu.go:275] no items to output this cycle
I0320 20:54:26.093671  543705 disk_info.go:125] begin check local disk info of client
I0320 20:54:26.096201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:54:26.096208  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002738c0 0xc000273900]
E0320 20:54:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:54:33.409796  543705 memory.go:184] no items to output this cycle
I0320 20:54:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 20:54:38.619802  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:54:38.619809  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:54:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:54:43.410603  543705 memory.go:191] Add success.
I0320 20:54:43.409812  543705 cpu.go:282] Add success.
I0320 20:54:43.420362  543705 net.go:648] Add success.
I0320 20:54:43.423097  543705 net.go:770] primary dev: ETH0
I0320 20:54:43.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:54:43.423128  543705 net.go:698] Add success.
I0320 20:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:54:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:54:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:54:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:54:53.409789  543705 memory.go:184] no items to output this cycle
I0320 20:54:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 20:55:03.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:55:03.409880  543705 memory.go:184] no items to output this cycle
I0320 20:55:03.409940  543705 cpu.go:275] no items to output this cycle
E0320 20:55:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:55:13.409803  543705 memory.go:191] Add success.
I0320 20:55:13.409809  543705 cpu.go:282] Add success.
W0320 20:55:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:55:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:55:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:55:13.420113  543705 net.go:648] Add success.
I0320 20:55:13.423405  543705 net.go:770] primary dev: ETH0
I0320 20:55:13.423418  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:55:13.423430  543705 net.go:698] Add success.
I0320 20:55:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:55:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:55:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 20:55:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:55:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 20:55:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:55:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:55:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:55:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:55:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:55:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:55:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:55:23.409770  543705 memory.go:184] no items to output this cycle
I0320 20:55:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 20:55:26.097672  543705 disk_info.go:125] begin check local disk info of client
I0320 20:55:26.100128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:55:26.100134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004717c0 0xc000471800]
E0320 20:55:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:55:33.409783  543705 memory.go:184] no items to output this cycle
I0320 20:55:33.409798  543705 cpu.go:275] no items to output this cycle
E0320 20:55:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:55:43.409812  543705 memory.go:191] Add success.
I0320 20:55:43.409821  543705 cpu.go:282] Add success.
I0320 20:55:43.419876  543705 net.go:648] Add success.
I0320 20:55:43.422426  543705 net.go:770] primary dev: ETH0
I0320 20:55:43.422440  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:55:43.422453  543705 net.go:698] Add success.
I0320 20:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:55:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:55:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:55:53.409779  543705 cpu.go:275] no items to output this cycle
I0320 20:55:53.409782  543705 memory.go:184] no items to output this cycle
E0320 20:56:03.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:56:03.409891  543705 memory.go:184] no items to output this cycle
I0320 20:56:03.409939  543705 cpu.go:275] no items to output this cycle
E0320 20:56:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:56:13.409794  543705 memory.go:191] Add success.
I0320 20:56:13.409811  543705 cpu.go:282] Add success.
W0320 20:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:56:13.420185  543705 net.go:648] Add success.
I0320 20:56:13.422995  543705 net.go:770] primary dev: ETH0
I0320 20:56:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:56:13.423025  543705 net.go:698] Add success.
I0320 20:56:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:56:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:56:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 20:56:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:56:14.456611  543705 disk_worker.go:494] system disk:vda1
I0320 20:56:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:56:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:56:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:56:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:56:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:56:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:56:23.409803  543705 memory.go:184] no items to output this cycle
I0320 20:56:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 20:56:26.101676  543705 disk_info.go:125] begin check local disk info of client
I0320 20:56:26.104207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:56:26.104213  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b8c0 0xc00007b900]
E0320 20:56:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:56:33.409765  543705 memory.go:184] no items to output this cycle
I0320 20:56:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 20:56:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:56:43.409810  543705 memory.go:191] Add success.
I0320 20:56:43.409818  543705 cpu.go:282] Add success.
I0320 20:56:43.420019  543705 net.go:648] Add success.
I0320 20:56:43.422512  543705 net.go:770] primary dev: ETH0
I0320 20:56:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:56:43.422541  543705 net.go:698] Add success.
I0320 20:56:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:56:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:56:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:56:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:56:53.409782  543705 memory.go:184] no items to output this cycle
I0320 20:56:53.409798  543705 cpu.go:275] no items to output this cycle
I0320 20:57:03.409912  543705 cpu.go:275] no items to output this cycle
E0320 20:57:03.409912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:57:03.409933  543705 memory.go:184] no items to output this cycle
E0320 20:57:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:57:13.409816  543705 memory.go:191] Add success.
I0320 20:57:13.409827  543705 cpu.go:282] Add success.
W0320 20:57:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:57:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:57:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:57:13.420495  543705 net.go:648] Add success.
I0320 20:57:13.428951  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 20:57:13.429029  543705 net.go:770] primary dev: ETH0
I0320 20:57:13.429040  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:57:13.429051  543705 net.go:698] Add success.
I0320 20:57:13.453602  543705 event_worker.go:152] Polling the log file for events...
I0320 20:57:13.464730  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"04a35aa3-8fb9-49e1-abad-364bd3909b9e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 20:57:13.464765  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 20:57:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:57:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 20:57:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0320 20:57:14.456099  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 20:57:14.456109  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 20:57:14.456115  543705 custom_config.go:64] query custom config with name: gpu
I0320 20:57:14.456473  543705 disk_worker.go:494] system disk:vda1
I0320 20:57:14.456502  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 20:57:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 20:57:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:57:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 20:57:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 20:57:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:57:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:57:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:57:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:57:23.409791  543705 memory.go:184] no items to output this cycle
I0320 20:57:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 20:57:26.105665  543705 disk_info.go:125] begin check local disk info of client
I0320 20:57:26.108102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:57:26.108108  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b400 0xc00007b440]
E0320 20:57:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:57:33.409799  543705 memory.go:184] no items to output this cycle
I0320 20:57:33.409812  543705 cpu.go:275] no items to output this cycle
I0320 20:57:38.619954  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 20:57:38.619961  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 20:57:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:57:43.410638  543705 memory.go:191] Add success.
I0320 20:57:43.409837  543705 cpu.go:282] Add success.
I0320 20:57:43.420421  543705 net.go:648] Add success.
I0320 20:57:43.423268  543705 net.go:770] primary dev: ETH0
I0320 20:57:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:57:43.423296  543705 net.go:698] Add success.
I0320 20:57:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:57:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:57:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:57:53.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:57:53.409908  543705 memory.go:184] no items to output this cycle
I0320 20:57:53.409965  543705 cpu.go:275] no items to output this cycle
E0320 20:58:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:58:03.409795  543705 memory.go:184] no items to output this cycle
I0320 20:58:03.409803  543705 cpu.go:275] no items to output this cycle
E0320 20:58:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:58:13.409830  543705 memory.go:191] Add success.
I0320 20:58:13.409843  543705 cpu.go:282] Add success.
W0320 20:58:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:58:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:58:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:58:13.420146  543705 net.go:648] Add success.
I0320 20:58:13.422985  543705 net.go:770] primary dev: ETH0
I0320 20:58:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:58:13.423009  543705 net.go:698] Add success.
I0320 20:58:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:58:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:58:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 20:58:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:58:14.456509  543705 disk_worker.go:494] system disk:vda1
I0320 20:58:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:58:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:58:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:58:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:58:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:58:23.409797  543705 memory.go:184] no items to output this cycle
I0320 20:58:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 20:58:26.109678  543705 disk_info.go:125] begin check local disk info of client
I0320 20:58:26.112133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:58:26.112139  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8ec0 0xc0003c8f00]
E0320 20:58:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:58:33.409778  543705 memory.go:184] no items to output this cycle
I0320 20:58:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 20:58:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:58:43.409785  543705 memory.go:191] Add success.
I0320 20:58:43.409798  543705 cpu.go:282] Add success.
I0320 20:58:43.419867  543705 net.go:648] Add success.
I0320 20:58:43.422537  543705 net.go:770] primary dev: ETH0
I0320 20:58:43.422549  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:58:43.422576  543705 net.go:698] Add success.
I0320 20:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:58:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:58:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:58:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:58:53.409802  543705 memory.go:184] no items to output this cycle
I0320 20:58:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 20:59:03.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:59:03.410011  543705 memory.go:184] no items to output this cycle
I0320 20:59:03.410021  543705 cpu.go:275] no items to output this cycle
E0320 20:59:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:59:13.409799  543705 memory.go:191] Add success.
I0320 20:59:13.409806  543705 cpu.go:282] Add success.
W0320 20:59:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 20:59:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 20:59:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 20:59:13.420055  543705 net.go:648] Add success.
I0320 20:59:13.422877  543705 net.go:770] primary dev: ETH0
I0320 20:59:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:59:13.422910  543705 net.go:698] Add success.
I0320 20:59:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 20:59:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 20:59:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 20:59:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 20:59:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 20:59:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 20:59:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 20:59:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:59:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 20:59:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0320 20:59:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:59:23.409795  543705 memory.go:184] no items to output this cycle
I0320 20:59:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 20:59:26.113673  543705 disk_info.go:125] begin check local disk info of client
I0320 20:59:26.116239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 20:59:26.116245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbbc0 0xc0001fbc00]
E0320 20:59:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:59:33.409805  543705 memory.go:184] no items to output this cycle
I0320 20:59:33.409823  543705 cpu.go:275] no items to output this cycle
E0320 20:59:43.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:59:43.409838  543705 memory.go:191] Add success.
I0320 20:59:43.409849  543705 cpu.go:282] Add success.
I0320 20:59:43.420076  543705 net.go:648] Add success.
I0320 20:59:43.422857  543705 net.go:770] primary dev: ETH0
I0320 20:59:43.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0320 20:59:43.422884  543705 net.go:698] Add success.
I0320 20:59:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 20:59:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 20:59:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 20:59:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 20:59:53.409800  543705 memory.go:184] no items to output this cycle
I0320 20:59:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 21:00:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:00:03.409815  543705 memory.go:184] no items to output this cycle
I0320 21:00:03.409828  543705 cpu.go:275] no items to output this cycle
E0320 21:00:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:00:13.409800  543705 memory.go:191] Add success.
W0320 21:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 21:00:13.409837  543705 cpu.go:282] Add success.
W0320 21:00:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:00:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:00:13.420180  543705 net.go:648] Add success.
I0320 21:00:13.422784  543705 net.go:770] primary dev: ETH0
I0320 21:00:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:00:13.422814  543705 net.go:698] Add success.
I0320 21:00:13.468699  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cb474ff5-f735-46df-bde9-c9e7b436d36f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:00:13.468743  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:00:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:00:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:00:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 21:00:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:00:14.456660  543705 disk_worker.go:494] system disk:vda1
I0320 21:00:14.456690  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:00:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:00:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:00:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:00:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:00:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:00:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:00:23.409781  543705 memory.go:184] no items to output this cycle
I0320 21:00:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 21:00:26.117676  543705 disk_info.go:125] begin check local disk info of client
I0320 21:00:26.120125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:00:26.120131  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471d80 0xc000471dc0]
E0320 21:00:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:00:33.409773  543705 memory.go:184] no items to output this cycle
I0320 21:00:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 21:00:38.620100  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:00:38.620107  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:00:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:00:43.410563  543705 memory.go:191] Add success.
I0320 21:00:43.409799  543705 cpu.go:282] Add success.
I0320 21:00:43.420280  543705 net.go:648] Add success.
I0320 21:00:43.423377  543705 net.go:770] primary dev: ETH0
I0320 21:00:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:00:43.423403  543705 net.go:698] Add success.
I0320 21:00:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:00:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:00:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:00:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:00:53.409785  543705 memory.go:184] no items to output this cycle
I0320 21:00:53.409813  543705 cpu.go:275] no items to output this cycle
I0320 21:01:03.409909  543705 cpu.go:275] no items to output this cycle
E0320 21:01:03.409938  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:01:03.409973  543705 memory.go:184] no items to output this cycle
E0320 21:01:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:01:13.409795  543705 memory.go:191] Add success.
I0320 21:01:13.409811  543705 cpu.go:282] Add success.
W0320 21:01:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:01:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:01:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:01:13.420171  543705 net.go:648] Add success.
I0320 21:01:13.422816  543705 net.go:770] primary dev: ETH0
I0320 21:01:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:01:13.422854  543705 net.go:698] Add success.
I0320 21:01:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:01:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:01:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 21:01:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:01:14.456490  543705 disk_worker.go:494] system disk:vda1
I0320 21:01:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:01:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:01:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:01:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:01:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:01:23.409798  543705 memory.go:184] no items to output this cycle
I0320 21:01:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 21:01:26.121674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:01:26.124120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:01:26.124127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5480 0xc0000c54c0]
E0320 21:01:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:01:33.409765  543705 memory.go:184] no items to output this cycle
I0320 21:01:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 21:01:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:01:43.409786  543705 memory.go:191] Add success.
I0320 21:01:43.409791  543705 cpu.go:282] Add success.
I0320 21:01:43.419835  543705 net.go:648] Add success.
I0320 21:01:43.422446  543705 net.go:770] primary dev: ETH0
I0320 21:01:43.422459  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:01:43.422471  543705 net.go:698] Add success.
I0320 21:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:01:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:01:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:01:53.409807  543705 memory.go:184] no items to output this cycle
I0320 21:01:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 21:02:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:02:03.409819  543705 memory.go:184] no items to output this cycle
I0320 21:02:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 21:02:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:02:13.409801  543705 cpu.go:282] Add success.
I0320 21:02:13.409809  543705 memory.go:191] Add success.
W0320 21:02:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:02:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:02:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:02:13.420091  543705 net.go:648] Add success.
I0320 21:02:13.423185  543705 net.go:770] primary dev: ETH0
I0320 21:02:13.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:02:13.423213  543705 net.go:698] Add success.
W0320 21:02:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:02:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 21:02:14.455206  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:02:14.456995  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:02:14.457004  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:02:14.457010  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:02:14.457042  543705 disk_worker.go:494] system disk:vda1
I0320 21:02:14.457083  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:02:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:02:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:02:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:02:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:02:16.458018  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:02:16.458040  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:02:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:02:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:02:23.409795  543705 memory.go:184] no items to output this cycle
I0320 21:02:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 21:02:26.125672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:02:26.128162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:02:26.128169  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9d00 0xc0003b9d40]
E0320 21:02:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:02:33.409796  543705 memory.go:184] no items to output this cycle
I0320 21:02:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 21:02:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:02:43.409822  543705 memory.go:191] Add success.
I0320 21:02:43.409835  543705 cpu.go:282] Add success.
I0320 21:02:43.419963  543705 net.go:648] Add success.
I0320 21:02:43.422388  543705 net.go:770] primary dev: ETH0
I0320 21:02:43.422403  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:02:43.422418  543705 net.go:698] Add success.
I0320 21:02:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:02:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:02:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:02:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:02:53.409784  543705 memory.go:184] no items to output this cycle
I0320 21:02:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 21:03:03.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:03:03.409891  543705 memory.go:184] no items to output this cycle
I0320 21:03:03.409919  543705 cpu.go:275] no items to output this cycle
E0320 21:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:03:13.409790  543705 memory.go:191] Add success.
I0320 21:03:13.409805  543705 cpu.go:282] Add success.
W0320 21:03:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:03:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:03:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:03:13.420175  543705 net.go:648] Add success.
I0320 21:03:13.423012  543705 net.go:770] primary dev: ETH0
I0320 21:03:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:03:13.423046  543705 net.go:698] Add success.
I0320 21:03:13.469367  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4f38816a-eec6-482c-9c42-1041c81c4208","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:03:13.469399  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:03:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:03:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:03:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0320 21:03:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:03:14.456713  543705 disk_worker.go:494] system disk:vda1
I0320 21:03:14.456743  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:03:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:03:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:03:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:03:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:03:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:03:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:03:23.409777  543705 cpu.go:275] no items to output this cycle
I0320 21:03:23.409786  543705 memory.go:184] no items to output this cycle
I0320 21:03:26.129672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:03:26.132232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:03:26.132238  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492600 0xc000492640]
E0320 21:03:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:03:33.409775  543705 memory.go:184] no items to output this cycle
I0320 21:03:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 21:03:38.620247  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:03:38.620254  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:03:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:03:43.410659  543705 memory.go:191] Add success.
I0320 21:03:43.409805  543705 cpu.go:282] Add success.
I0320 21:03:43.420422  543705 net.go:648] Add success.
I0320 21:03:43.423594  543705 net.go:770] primary dev: ETH0
I0320 21:03:43.423605  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:03:43.423618  543705 net.go:698] Add success.
I0320 21:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:03:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:03:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:03:53.409787  543705 memory.go:184] no items to output this cycle
I0320 21:03:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 21:04:03.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:04:03.409882  543705 memory.go:184] no items to output this cycle
I0320 21:04:03.409956  543705 cpu.go:275] no items to output this cycle
E0320 21:04:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:04:13.409795  543705 memory.go:191] Add success.
I0320 21:04:13.409805  543705 cpu.go:282] Add success.
W0320 21:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:04:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:04:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:04:13.420242  543705 net.go:648] Add success.
I0320 21:04:13.422911  543705 net.go:770] primary dev: ETH0
I0320 21:04:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:04:13.422936  543705 net.go:698] Add success.
I0320 21:04:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:04:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:04:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 21:04:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:04:14.456517  543705 disk_worker.go:494] system disk:vda1
I0320 21:04:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:04:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:04:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:04:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:04:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:04:23.409790  543705 memory.go:184] no items to output this cycle
I0320 21:04:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 21:04:26.133677  543705 disk_info.go:125] begin check local disk info of client
I0320 21:04:26.136192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:04:26.136197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1080 0xc0004b10c0]
E0320 21:04:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:04:33.409775  543705 memory.go:184] no items to output this cycle
I0320 21:04:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 21:04:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:04:43.409819  543705 memory.go:191] Add success.
I0320 21:04:43.409830  543705 cpu.go:282] Add success.
I0320 21:04:43.419956  543705 net.go:648] Add success.
I0320 21:04:43.422585  543705 net.go:770] primary dev: ETH0
I0320 21:04:43.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:04:43.422611  543705 net.go:698] Add success.
I0320 21:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:04:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:04:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:04:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:04:53.409869  543705 cpu.go:275] no items to output this cycle
I0320 21:04:53.409888  543705 memory.go:184] no items to output this cycle
E0320 21:05:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:05:03.409810  543705 memory.go:184] no items to output this cycle
I0320 21:05:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 21:05:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:05:13.409790  543705 memory.go:191] Add success.
I0320 21:05:13.409793  543705 cpu.go:282] Add success.
W0320 21:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:05:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:05:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:05:13.420167  543705 net.go:648] Add success.
I0320 21:05:13.423336  543705 net.go:770] primary dev: ETH0
I0320 21:05:13.423349  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:05:13.423363  543705 net.go:698] Add success.
I0320 21:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:05:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:05:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 21:05:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:05:14.456521  543705 disk_worker.go:494] system disk:vda1
I0320 21:05:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:05:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:05:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:05:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:05:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:05:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:05:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:05:23.409769  543705 memory.go:184] no items to output this cycle
I0320 21:05:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 21:05:26.137675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:05:26.140099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:05:26.140105  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b8c0 0xc00032b900]
E0320 21:05:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:05:33.409793  543705 memory.go:184] no items to output this cycle
I0320 21:05:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 21:05:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:05:43.409790  543705 memory.go:191] Add success.
I0320 21:05:43.409793  543705 cpu.go:282] Add success.
I0320 21:05:43.419930  543705 net.go:648] Add success.
I0320 21:05:43.423144  543705 net.go:770] primary dev: ETH0
I0320 21:05:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:05:43.423168  543705 net.go:698] Add success.
I0320 21:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:05:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:05:53.410334  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:05:53.410351  543705 memory.go:184] no items to output this cycle
I0320 21:05:53.410362  543705 cpu.go:275] no items to output this cycle
E0320 21:06:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:06:03.409801  543705 memory.go:184] no items to output this cycle
I0320 21:06:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 21:06:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:06:13.409792  543705 memory.go:191] Add success.
W0320 21:06:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:06:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:06:13.409831  543705 cpu.go:282] Add success.
I0320 21:06:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:06:13.420577  543705 net.go:648] Add success.
I0320 21:06:13.423476  543705 net.go:770] primary dev: ETH0
I0320 21:06:13.423494  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:06:13.423510  543705 net.go:698] Add success.
I0320 21:06:13.468388  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"28b60117-f0e8-4a33-a028-acbf568387a4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:06:13.468423  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:06:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:06:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:06:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 21:06:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:06:14.456631  543705 disk_worker.go:494] system disk:vda1
I0320 21:06:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:06:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:06:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:06:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:06:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:06:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:06:23.409779  543705 memory.go:184] no items to output this cycle
I0320 21:06:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 21:06:26.141671  543705 disk_info.go:125] begin check local disk info of client
I0320 21:06:26.144096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:06:26.144101  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473380 0xc0004733c0]
E0320 21:06:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:06:33.409779  543705 memory.go:184] no items to output this cycle
I0320 21:06:33.409784  543705 cpu.go:275] no items to output this cycle
I0320 21:06:38.620394  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:06:38.620408  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:06:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:06:43.410583  543705 memory.go:191] Add success.
I0320 21:06:43.409806  543705 cpu.go:282] Add success.
I0320 21:06:43.420315  543705 net.go:648] Add success.
I0320 21:06:43.422950  543705 net.go:770] primary dev: ETH0
I0320 21:06:43.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:06:43.422977  543705 net.go:698] Add success.
I0320 21:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:06:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:06:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:06:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:06:53.409800  543705 memory.go:184] no items to output this cycle
I0320 21:06:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 21:07:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:07:03.409794  543705 memory.go:184] no items to output this cycle
I0320 21:07:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 21:07:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:07:13.409780  543705 memory.go:191] Add success.
W0320 21:07:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:07:13.409819  543705 cpu.go:282] Add success.
I0320 21:07:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:07:13.420125  543705 net.go:648] Add success.
I0320 21:07:13.422845  543705 net.go:770] primary dev: ETH0
I0320 21:07:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:07:13.422870  543705 net.go:698] Add success.
I0320 21:07:13.453404  543705 event_worker.go:152] Polling the log file for events...
W0320 21:07:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:07:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 21:07:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:07:14.455878  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:07:14.455887  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:07:14.455892  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:07:14.456555  543705 disk_worker.go:494] system disk:vda1
I0320 21:07:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:07:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:07:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:07:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:07:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:07:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:07:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:07:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:07:23.409795  543705 memory.go:184] no items to output this cycle
I0320 21:07:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 21:07:26.145674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:07:26.148124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:07:26.148130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a9280 0xc0002a92c0]
E0320 21:07:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:07:33.409796  543705 memory.go:184] no items to output this cycle
I0320 21:07:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 21:07:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:07:43.409801  543705 memory.go:191] Add success.
I0320 21:07:43.409801  543705 cpu.go:282] Add success.
I0320 21:07:43.419848  543705 net.go:648] Add success.
I0320 21:07:43.422521  543705 net.go:770] primary dev: ETH0
I0320 21:07:43.422534  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:07:43.422547  543705 net.go:698] Add success.
I0320 21:07:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:07:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:07:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:07:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:07:53.409770  543705 memory.go:184] no items to output this cycle
I0320 21:07:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 21:08:03.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:08:03.409898  543705 memory.go:184] no items to output this cycle
I0320 21:08:03.409977  543705 cpu.go:275] no items to output this cycle
E0320 21:08:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:08:13.409832  543705 memory.go:191] Add success.
I0320 21:08:13.409846  543705 cpu.go:282] Add success.
W0320 21:08:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:08:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:08:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:08:13.420156  543705 net.go:648] Add success.
I0320 21:08:13.422887  543705 net.go:770] primary dev: ETH0
I0320 21:08:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:08:13.422916  543705 net.go:698] Add success.
I0320 21:08:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:08:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:08:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 21:08:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:08:14.456597  543705 disk_worker.go:494] system disk:vda1
I0320 21:08:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:08:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:08:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:08:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:08:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:08:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:08:23.409775  543705 memory.go:184] no items to output this cycle
I0320 21:08:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 21:08:26.149673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:08:26.152107  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:08:26.152113  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abac0 0xc0001abb00]
E0320 21:08:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:08:33.409768  543705 memory.go:184] no items to output this cycle
I0320 21:08:33.409787  543705 cpu.go:275] no items to output this cycle
E0320 21:08:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:08:43.409818  543705 memory.go:191] Add success.
I0320 21:08:43.409821  543705 cpu.go:282] Add success.
I0320 21:08:43.419939  543705 net.go:648] Add success.
I0320 21:08:43.422771  543705 net.go:770] primary dev: ETH0
I0320 21:08:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:08:43.422799  543705 net.go:698] Add success.
I0320 21:08:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:08:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:08:53.409790  543705 memory.go:184] no items to output this cycle
I0320 21:08:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 21:09:03.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:09:03.409884  543705 memory.go:184] no items to output this cycle
I0320 21:09:03.409958  543705 cpu.go:275] no items to output this cycle
E0320 21:09:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:09:13.409787  543705 memory.go:191] Add success.
W0320 21:09:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 21:09:13.409814  543705 cpu.go:282] Add success.
W0320 21:09:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:09:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:09:13.420081  543705 net.go:648] Add success.
I0320 21:09:13.422619  543705 net.go:770] primary dev: ETH0
I0320 21:09:13.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:09:13.422644  543705 net.go:698] Add success.
I0320 21:09:13.468635  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3f1a29c2-eae4-4179-804a-67f9f6ceac86","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:09:13.468680  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:09:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:09:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:09:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 21:09:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:09:14.456536  543705 disk_worker.go:494] system disk:vda1
I0320 21:09:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:09:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:09:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:09:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:09:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:09:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:09:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:09:23.409780  543705 memory.go:184] no items to output this cycle
I0320 21:09:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 21:09:26.153675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:09:26.156137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:09:26.156143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad00 0xc0001aad40]
E0320 21:09:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:09:33.409791  543705 memory.go:184] no items to output this cycle
I0320 21:09:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 21:09:38.620804  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:09:38.620811  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:09:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:09:43.410623  543705 memory.go:191] Add success.
I0320 21:09:43.409806  543705 cpu.go:282] Add success.
I0320 21:09:43.420334  543705 net.go:648] Add success.
I0320 21:09:43.422778  543705 net.go:770] primary dev: ETH0
I0320 21:09:43.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:09:43.422806  543705 net.go:698] Add success.
I0320 21:09:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:09:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:09:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:09:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:09:53.410395  543705 memory.go:184] no items to output this cycle
I0320 21:09:53.410408  543705 cpu.go:275] no items to output this cycle
E0320 21:10:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:10:03.409795  543705 memory.go:184] no items to output this cycle
I0320 21:10:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 21:10:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:10:13.409808  543705 memory.go:191] Add success.
I0320 21:10:13.409818  543705 cpu.go:282] Add success.
W0320 21:10:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:10:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:10:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:10:13.420247  543705 net.go:648] Add success.
I0320 21:10:13.422904  543705 net.go:770] primary dev: ETH0
I0320 21:10:13.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:10:13.422937  543705 net.go:698] Add success.
I0320 21:10:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:10:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:10:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 21:10:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:10:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 21:10:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:10:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:10:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:10:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:10:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:10:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:10:23.409775  543705 memory.go:184] no items to output this cycle
I0320 21:10:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 21:10:26.157669  543705 disk_info.go:125] begin check local disk info of client
I0320 21:10:26.160118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:10:26.160124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab740 0xc0001ab780]
E0320 21:10:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:10:33.409784  543705 memory.go:184] no items to output this cycle
I0320 21:10:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 21:10:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:10:43.409799  543705 cpu.go:282] Add success.
I0320 21:10:43.409803  543705 memory.go:191] Add success.
I0320 21:10:43.420001  543705 net.go:648] Add success.
I0320 21:10:43.423255  543705 net.go:770] primary dev: ETH0
I0320 21:10:43.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:10:43.423280  543705 net.go:698] Add success.
I0320 21:10:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:10:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:10:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:10:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:10:53.409867  543705 memory.go:184] no items to output this cycle
I0320 21:10:53.409924  543705 cpu.go:275] no items to output this cycle
E0320 21:11:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:11:03.409811  543705 memory.go:184] no items to output this cycle
I0320 21:11:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 21:11:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:11:13.409787  543705 memory.go:191] Add success.
I0320 21:11:13.409804  543705 cpu.go:282] Add success.
W0320 21:11:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:11:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:11:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:11:13.420195  543705 net.go:648] Add success.
I0320 21:11:13.423310  543705 net.go:770] primary dev: ETH0
I0320 21:11:13.423325  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:11:13.423339  543705 net.go:698] Add success.
I0320 21:11:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:11:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:11:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 21:11:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:11:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 21:11:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:11:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:11:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:11:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:11:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:11:23.409776  543705 memory.go:184] no items to output this cycle
I0320 21:11:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 21:11:26.161671  543705 disk_info.go:125] begin check local disk info of client
I0320 21:11:26.164188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:11:26.164194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee1c0 0xc0003ee200]
E0320 21:11:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:11:33.409795  543705 memory.go:184] no items to output this cycle
I0320 21:11:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 21:11:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:11:43.409788  543705 memory.go:191] Add success.
I0320 21:11:43.409803  543705 cpu.go:282] Add success.
I0320 21:11:43.420193  543705 net.go:648] Add success.
I0320 21:11:43.423272  543705 net.go:770] primary dev: ETH0
I0320 21:11:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:11:43.423298  543705 net.go:698] Add success.
I0320 21:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:11:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:11:53.409765  543705 memory.go:184] no items to output this cycle
I0320 21:11:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 21:12:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:12:03.409819  543705 memory.go:184] no items to output this cycle
I0320 21:12:03.409832  543705 cpu.go:275] no items to output this cycle
E0320 21:12:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:12:13.409801  543705 memory.go:191] Add success.
I0320 21:12:13.409808  543705 cpu.go:282] Add success.
W0320 21:12:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:12:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:12:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:12:13.420072  543705 net.go:648] Add success.
I0320 21:12:13.422957  543705 net.go:770] primary dev: ETH0
I0320 21:12:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:12:13.422983  543705 net.go:698] Add success.
I0320 21:12:13.470268  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4cf2026e-d948-4a07-928b-67f22d8154c7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:12:13.470300  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 21:12:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:12:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 21:12:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:12:14.455965  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:12:14.455974  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:12:14.455980  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:12:14.456793  543705 disk_worker.go:494] system disk:vda1
I0320 21:12:14.456825  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:12:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:12:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:12:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:12:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:12:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:12:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:12:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:12:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:12:23.409765  543705 memory.go:184] no items to output this cycle
I0320 21:12:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 21:12:26.165672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:12:26.168067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:12:26.168072  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faa80 0xc0001faac0]
E0320 21:12:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:12:33.409760  543705 memory.go:184] no items to output this cycle
I0320 21:12:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 21:12:38.621733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:12:38.621740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:12:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:12:43.410624  543705 memory.go:191] Add success.
I0320 21:12:43.409799  543705 cpu.go:282] Add success.
I0320 21:12:43.420377  543705 net.go:648] Add success.
I0320 21:12:43.423293  543705 net.go:770] primary dev: ETH0
I0320 21:12:43.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:12:43.423316  543705 net.go:698] Add success.
I0320 21:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:12:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:12:53.409796  543705 memory.go:184] no items to output this cycle
I0320 21:12:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 21:13:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:13:03.409775  543705 memory.go:184] no items to output this cycle
I0320 21:13:03.409796  543705 cpu.go:275] no items to output this cycle
I0320 21:13:13.409980  543705 cpu.go:282] Add success.
E0320 21:13:13.410016  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:13:13.410040  543705 memory.go:191] Add success.
W0320 21:13:13.410069  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:13:13.410240  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:13:13.410246  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:13:13.419746  543705 net.go:648] Add success.
I0320 21:13:13.422639  543705 net.go:770] primary dev: ETH0
I0320 21:13:13.422652  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:13:13.422663  543705 net.go:698] Add success.
I0320 21:13:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:13:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:13:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0320 21:13:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:13:14.456489  543705 disk_worker.go:494] system disk:vda1
I0320 21:13:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:13:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:13:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:13:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:13:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:13:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:13:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:13:23.409773  543705 memory.go:184] no items to output this cycle
I0320 21:13:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 21:13:26.169675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:13:26.172119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:13:26.172125  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abec0 0xc0001abf00]
E0320 21:13:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:13:33.409765  543705 memory.go:184] no items to output this cycle
I0320 21:13:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 21:13:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:13:43.409815  543705 memory.go:191] Add success.
I0320 21:13:43.409823  543705 cpu.go:282] Add success.
I0320 21:13:43.419849  543705 net.go:648] Add success.
I0320 21:13:43.422523  543705 net.go:770] primary dev: ETH0
I0320 21:13:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:13:43.422548  543705 net.go:698] Add success.
I0320 21:13:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:13:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:13:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:13:53.409804  543705 memory.go:184] no items to output this cycle
I0320 21:13:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 21:14:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:14:03.409785  543705 memory.go:184] no items to output this cycle
I0320 21:14:03.409792  543705 cpu.go:275] no items to output this cycle
W0320 21:14:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:14:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:14:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:14:13.409851  543705 cpu.go:282] Add success.
E0320 21:14:13.409958  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:14:13.409996  543705 memory.go:191] Add success.
I0320 21:14:13.419732  543705 net.go:648] Add success.
I0320 21:14:13.422348  543705 net.go:770] primary dev: ETH0
I0320 21:14:13.422360  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:14:13.422372  543705 net.go:698] Add success.
I0320 21:14:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:14:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:14:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 21:14:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:14:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 21:14:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:14:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:14:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:14:23.410742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:14:23.410757  543705 memory.go:184] no items to output this cycle
I0320 21:14:23.410760  543705 cpu.go:275] no items to output this cycle
I0320 21:14:26.173682  543705 disk_info.go:125] begin check local disk info of client
I0320 21:14:26.176202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:14:26.176208  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab600 0xc0001ab640]
E0320 21:14:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:14:33.409801  543705 memory.go:184] no items to output this cycle
I0320 21:14:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 21:14:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:14:43.409803  543705 memory.go:191] Add success.
I0320 21:14:43.409823  543705 cpu.go:282] Add success.
I0320 21:14:43.419886  543705 net.go:648] Add success.
I0320 21:14:43.422705  543705 net.go:770] primary dev: ETH0
I0320 21:14:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:14:43.422736  543705 net.go:698] Add success.
I0320 21:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:14:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:14:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:14:53.410343  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:14:53.410358  543705 memory.go:184] no items to output this cycle
I0320 21:14:53.410379  543705 cpu.go:275] no items to output this cycle
E0320 21:15:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:15:03.409786  543705 memory.go:184] no items to output this cycle
I0320 21:15:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 21:15:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:15:13.409803  543705 memory.go:191] Add success.
I0320 21:15:13.409809  543705 cpu.go:282] Add success.
W0320 21:15:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:15:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:15:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:15:13.420157  543705 net.go:648] Add success.
I0320 21:15:13.423520  543705 net.go:770] primary dev: ETH0
I0320 21:15:13.423536  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:15:13.423549  543705 net.go:698] Add success.
I0320 21:15:13.463566  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"82664d1a-b401-49f1-b518-4619e8582181","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:15:13.463599  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:15:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:15:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 21:15:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:15:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 21:15:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:15:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:15:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:15:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:15:23.409783  543705 memory.go:184] no items to output this cycle
I0320 21:15:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 21:15:26.177676  543705 disk_info.go:125] begin check local disk info of client
I0320 21:15:26.180144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:15:26.180152  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8b80 0xc0002a8bc0]
E0320 21:15:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:15:33.409768  543705 memory.go:184] no items to output this cycle
I0320 21:15:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 21:15:38.622820  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:15:38.622827  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:15:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:15:43.410716  543705 memory.go:191] Add success.
I0320 21:15:43.409803  543705 cpu.go:282] Add success.
I0320 21:15:43.420498  543705 net.go:648] Add success.
I0320 21:15:43.423394  543705 net.go:770] primary dev: ETH0
I0320 21:15:43.423408  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:15:43.423422  543705 net.go:698] Add success.
I0320 21:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:15:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:15:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:15:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:15:53.409772  543705 memory.go:184] no items to output this cycle
I0320 21:15:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 21:16:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:16:03.409814  543705 memory.go:184] no items to output this cycle
I0320 21:16:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 21:16:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:16:13.409782  543705 memory.go:191] Add success.
W0320 21:16:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 21:16:13.409811  543705 cpu.go:282] Add success.
W0320 21:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:16:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:16:13.420194  543705 net.go:648] Add success.
I0320 21:16:13.423066  543705 net.go:770] primary dev: ETH0
I0320 21:16:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:16:13.423091  543705 net.go:698] Add success.
I0320 21:16:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:16:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:16:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 21:16:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:16:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 21:16:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:16:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:16:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:16:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:16:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:16:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:16:23.410475  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:16:23.410492  543705 memory.go:184] no items to output this cycle
I0320 21:16:23.410511  543705 cpu.go:275] no items to output this cycle
I0320 21:16:26.181673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:16:26.184114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:16:26.184119  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a9240 0xc0002a9280]
E0320 21:16:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:16:33.409780  543705 cpu.go:275] no items to output this cycle
I0320 21:16:33.409788  543705 memory.go:184] no items to output this cycle
E0320 21:16:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:16:43.409819  543705 memory.go:191] Add success.
I0320 21:16:43.409833  543705 cpu.go:282] Add success.
I0320 21:16:43.420006  543705 net.go:648] Add success.
I0320 21:16:43.422691  543705 net.go:770] primary dev: ETH0
I0320 21:16:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:16:43.422720  543705 net.go:698] Add success.
I0320 21:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:16:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:16:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:16:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:16:53.409773  543705 memory.go:184] no items to output this cycle
I0320 21:16:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 21:17:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:17:03.409777  543705 memory.go:184] no items to output this cycle
I0320 21:17:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 21:17:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:17:13.409813  543705 memory.go:191] Add success.
I0320 21:17:13.409819  543705 cpu.go:282] Add success.
W0320 21:17:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:17:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:17:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:17:13.420057  543705 net.go:648] Add success.
I0320 21:17:13.422711  543705 net.go:770] primary dev: ETH0
I0320 21:17:13.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:17:13.422737  543705 net.go:698] Add success.
I0320 21:17:13.452865  543705 event_worker.go:152] Polling the log file for events...
W0320 21:17:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:17:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 21:17:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:17:14.455911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:17:14.455920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:17:14.455926  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:17:14.456552  543705 disk_worker.go:494] system disk:vda1
I0320 21:17:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:17:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:17:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:17:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:17:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:17:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:17:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:17:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:17:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:17:23.409792  543705 memory.go:184] no items to output this cycle
I0320 21:17:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 21:17:26.185672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:17:26.188177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:17:26.188183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b80 0xc0000c5bc0]
E0320 21:17:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:17:33.409805  543705 memory.go:184] no items to output this cycle
I0320 21:17:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 21:17:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:17:43.409778  543705 memory.go:191] Add success.
I0320 21:17:43.409806  543705 cpu.go:282] Add success.
I0320 21:17:43.419864  543705 net.go:648] Add success.
I0320 21:17:43.423228  543705 net.go:770] primary dev: ETH0
I0320 21:17:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:17:43.423255  543705 net.go:698] Add success.
I0320 21:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:17:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:17:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:17:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:17:53.409773  543705 memory.go:184] no items to output this cycle
I0320 21:17:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 21:18:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:18:03.409782  543705 memory.go:184] no items to output this cycle
I0320 21:18:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 21:18:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:18:13.409791  543705 memory.go:191] Add success.
I0320 21:18:13.409801  543705 cpu.go:282] Add success.
W0320 21:18:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:18:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:18:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:18:13.420049  543705 net.go:648] Add success.
I0320 21:18:13.423256  543705 net.go:770] primary dev: ETH0
I0320 21:18:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:18:13.423295  543705 net.go:698] Add success.
I0320 21:18:13.481709  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2eec28f2-0260-455e-a9a5-ee13c6970c9a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:18:13.481751  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:18:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:18:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 21:18:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:18:14.456729  543705 disk_worker.go:494] system disk:vda1
I0320 21:18:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:18:15.455613  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:18:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:18:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:18:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:18:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:18:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:18:23.409800  543705 memory.go:184] no items to output this cycle
I0320 21:18:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 21:18:26.189669  543705 disk_info.go:125] begin check local disk info of client
I0320 21:18:26.192169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:18:26.192176  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8080 0xc0002a80c0]
E0320 21:18:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:18:33.409778  543705 memory.go:184] no items to output this cycle
I0320 21:18:33.409798  543705 cpu.go:275] no items to output this cycle
I0320 21:18:38.622970  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:18:38.622977  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:18:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:18:43.410734  543705 memory.go:191] Add success.
I0320 21:18:43.409820  543705 cpu.go:282] Add success.
I0320 21:18:43.420430  543705 net.go:648] Add success.
I0320 21:18:43.423019  543705 net.go:770] primary dev: ETH0
I0320 21:18:43.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:18:43.423046  543705 net.go:698] Add success.
I0320 21:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:18:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:18:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:18:53.409783  543705 memory.go:184] no items to output this cycle
I0320 21:18:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 21:19:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:19:03.409777  543705 memory.go:184] no items to output this cycle
I0320 21:19:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 21:19:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:19:13.409793  543705 memory.go:191] Add success.
I0320 21:19:13.409811  543705 cpu.go:282] Add success.
W0320 21:19:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:19:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:19:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:19:13.420140  543705 net.go:648] Add success.
I0320 21:19:13.422878  543705 net.go:770] primary dev: ETH0
I0320 21:19:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:19:13.422904  543705 net.go:698] Add success.
I0320 21:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:19:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:19:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 21:19:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:19:14.456505  543705 disk_worker.go:494] system disk:vda1
I0320 21:19:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:19:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:19:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:19:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:19:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:19:23.409791  543705 memory.go:184] no items to output this cycle
I0320 21:19:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 21:19:26.193675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:19:26.196149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:19:26.196155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a84c0 0xc0002a8500]
E0320 21:19:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:19:33.409769  543705 memory.go:184] no items to output this cycle
I0320 21:19:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 21:19:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:19:43.409794  543705 memory.go:191] Add success.
I0320 21:19:43.409806  543705 cpu.go:282] Add success.
I0320 21:19:43.419980  543705 net.go:648] Add success.
I0320 21:19:43.422898  543705 net.go:770] primary dev: ETH0
I0320 21:19:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:19:43.422925  543705 net.go:698] Add success.
I0320 21:19:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:19:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:19:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:19:53.409804  543705 memory.go:184] no items to output this cycle
I0320 21:19:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 21:20:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:20:03.409784  543705 memory.go:184] no items to output this cycle
I0320 21:20:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 21:20:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:20:13.409789  543705 memory.go:191] Add success.
I0320 21:20:13.409813  543705 cpu.go:282] Add success.
W0320 21:20:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:20:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:20:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:20:13.420178  543705 net.go:648] Add success.
I0320 21:20:13.422756  543705 net.go:770] primary dev: ETH0
I0320 21:20:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:20:13.422784  543705 net.go:698] Add success.
I0320 21:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:20:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:20:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 21:20:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:20:14.456581  543705 disk_worker.go:494] system disk:vda1
I0320 21:20:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:20:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:20:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:20:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:20:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:20:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:20:23.409763  543705 memory.go:184] no items to output this cycle
I0320 21:20:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 21:20:26.197678  543705 disk_info.go:125] begin check local disk info of client
I0320 21:20:26.200128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:20:26.200135  543705 disk_info.go:196] parse disk info done, disk is : [0xc00058c7c0 0xc00058c800]
E0320 21:20:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:20:33.409817  543705 memory.go:184] no items to output this cycle
I0320 21:20:33.409827  543705 cpu.go:275] no items to output this cycle
E0320 21:20:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:20:43.409794  543705 memory.go:191] Add success.
I0320 21:20:43.409808  543705 cpu.go:282] Add success.
I0320 21:20:43.419910  543705 net.go:648] Add success.
I0320 21:20:43.422665  543705 net.go:770] primary dev: ETH0
I0320 21:20:43.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:20:43.422695  543705 net.go:698] Add success.
I0320 21:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:20:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:20:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:20:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:20:53.409784  543705 memory.go:184] no items to output this cycle
I0320 21:20:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 21:21:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:21:03.409814  543705 memory.go:184] no items to output this cycle
I0320 21:21:03.409824  543705 cpu.go:275] no items to output this cycle
E0320 21:21:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:21:13.409795  543705 cpu.go:282] Add success.
I0320 21:21:13.409800  543705 memory.go:191] Add success.
W0320 21:21:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:21:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:21:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:21:13.419704  543705 net.go:648] Add success.
I0320 21:21:13.422145  543705 net.go:770] primary dev: ETH0
I0320 21:21:13.422158  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:21:13.422174  543705 net.go:698] Add success.
I0320 21:21:13.469147  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6802158b-db72-4963-9b10-aeabb65fdef3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:21:13.469179  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:21:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:21:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:21:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0320 21:21:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:21:14.456543  543705 disk_worker.go:494] system disk:vda1
I0320 21:21:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:21:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:21:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:21:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:21:16.472516  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:21:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:21:23.409784  543705 memory.go:184] no items to output this cycle
I0320 21:21:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 21:21:26.201679  543705 disk_info.go:125] begin check local disk info of client
I0320 21:21:26.204126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:21:26.204133  543705 disk_info.go:196] parse disk info done, disk is : [0xc000280000 0xc000280040]
E0320 21:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:21:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 21:21:33.409794  543705 memory.go:184] no items to output this cycle
I0320 21:21:38.623824  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:21:38.623830  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:21:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:21:43.410700  543705 memory.go:191] Add success.
I0320 21:21:43.409817  543705 cpu.go:282] Add success.
I0320 21:21:43.420395  543705 net.go:648] Add success.
I0320 21:21:43.423202  543705 net.go:770] primary dev: ETH0
I0320 21:21:43.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:21:43.423227  543705 net.go:698] Add success.
I0320 21:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:21:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:21:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:21:53.409773  543705 memory.go:184] no items to output this cycle
I0320 21:21:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 21:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:22:03.409784  543705 memory.go:184] no items to output this cycle
I0320 21:22:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 21:22:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:22:13.409789  543705 memory.go:191] Add success.
I0320 21:22:13.409808  543705 cpu.go:282] Add success.
W0320 21:22:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:22:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:22:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:22:13.420120  543705 net.go:648] Add success.
I0320 21:22:13.422753  543705 net.go:770] primary dev: ETH0
I0320 21:22:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:22:13.422778  543705 net.go:698] Add success.
W0320 21:22:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:22:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 21:22:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:22:14.456912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:22:14.456921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:22:14.456927  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:22:14.456999  543705 disk_worker.go:494] system disk:vda1
I0320 21:22:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:22:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:22:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:22:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:22:16.458011  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:22:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:22:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:22:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:22:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:22:23.409779  543705 memory.go:184] no items to output this cycle
I0320 21:22:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 21:22:26.205674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:22:26.208323  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:22:26.208334  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bc000 0xc0004bc040]
E0320 21:22:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:22:33.409790  543705 memory.go:184] no items to output this cycle
I0320 21:22:33.409789  543705 cpu.go:275] no items to output this cycle
E0320 21:22:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:22:43.409804  543705 memory.go:191] Add success.
I0320 21:22:43.409807  543705 cpu.go:282] Add success.
I0320 21:22:43.419891  543705 net.go:648] Add success.
I0320 21:22:43.422705  543705 net.go:770] primary dev: ETH0
I0320 21:22:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:22:43.422736  543705 net.go:698] Add success.
I0320 21:22:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:22:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:22:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:22:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:22:53.409799  543705 memory.go:184] no items to output this cycle
I0320 21:22:53.409847  543705 cpu.go:275] no items to output this cycle
E0320 21:23:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:23:03.409804  543705 memory.go:184] no items to output this cycle
I0320 21:23:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 21:23:13.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:23:13.409851  543705 memory.go:191] Add success.
I0320 21:23:13.409856  543705 cpu.go:282] Add success.
W0320 21:23:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:23:13.409901  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:23:13.409905  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:23:13.420203  543705 net.go:648] Add success.
I0320 21:23:13.423026  543705 net.go:770] primary dev: ETH0
I0320 21:23:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:23:13.423051  543705 net.go:698] Add success.
I0320 21:23:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:23:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:23:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0320 21:23:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:23:14.456600  543705 disk_worker.go:494] system disk:vda1
I0320 21:23:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:23:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:23:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:23:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:23:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:23:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:23:23.409806  543705 memory.go:184] no items to output this cycle
I0320 21:23:23.409819  543705 cpu.go:275] no items to output this cycle
I0320 21:23:26.209671  543705 disk_info.go:125] begin check local disk info of client
I0320 21:23:26.212151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:23:26.212157  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8600 0xc0003e8640]
E0320 21:23:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:23:33.409784  543705 memory.go:184] no items to output this cycle
I0320 21:23:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 21:23:43.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:23:43.409923  543705 memory.go:191] Add success.
I0320 21:23:43.410074  543705 cpu.go:282] Add success.
I0320 21:23:43.419708  543705 net.go:648] Add success.
I0320 21:23:43.422490  543705 net.go:770] primary dev: ETH0
I0320 21:23:43.422502  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:23:43.422514  543705 net.go:698] Add success.
I0320 21:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:23:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:23:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:23:53.409790  543705 memory.go:184] no items to output this cycle
I0320 21:23:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 21:24:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:24:03.409785  543705 memory.go:184] no items to output this cycle
I0320 21:24:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 21:24:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:24:13.409793  543705 memory.go:191] Add success.
I0320 21:24:13.409814  543705 cpu.go:282] Add success.
W0320 21:24:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:24:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:24:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:24:13.420152  543705 net.go:648] Add success.
I0320 21:24:13.422832  543705 net.go:770] primary dev: ETH0
I0320 21:24:13.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:24:13.422861  543705 net.go:698] Add success.
I0320 21:24:13.463361  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4095ec15-91c5-4e53-a092-c36baada3007","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:24:13.463394  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:24:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:24:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:24:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 21:24:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:24:14.456702  543705 disk_worker.go:494] system disk:vda1
I0320 21:24:14.456737  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:24:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:24:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:24:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:24:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:24:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:24:23.409775  543705 memory.go:184] no items to output this cycle
I0320 21:24:23.409794  543705 cpu.go:275] no items to output this cycle
I0320 21:24:26.213685  543705 disk_info.go:125] begin check local disk info of client
I0320 21:24:26.215994  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:24:26.216001  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d040 0xc00034d080]
E0320 21:24:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:24:33.409820  543705 memory.go:184] no items to output this cycle
I0320 21:24:33.409828  543705 cpu.go:275] no items to output this cycle
I0320 21:24:38.624824  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:24:38.624831  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:24:43.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:24:43.410543  543705 memory.go:191] Add success.
I0320 21:24:43.409927  543705 cpu.go:282] Add success.
I0320 21:24:43.419755  543705 net.go:648] Add success.
I0320 21:24:43.422355  543705 net.go:770] primary dev: ETH0
I0320 21:24:43.422369  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:24:43.422383  543705 net.go:698] Add success.
I0320 21:24:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:24:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:24:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:24:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:24:53.409777  543705 memory.go:184] no items to output this cycle
I0320 21:24:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 21:25:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:25:03.409777  543705 memory.go:184] no items to output this cycle
I0320 21:25:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 21:25:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:25:13.409791  543705 memory.go:191] Add success.
I0320 21:25:13.409811  543705 cpu.go:282] Add success.
W0320 21:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:25:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:25:13.420182  543705 net.go:648] Add success.
I0320 21:25:13.422942  543705 net.go:770] primary dev: ETH0
I0320 21:25:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:25:13.422969  543705 net.go:698] Add success.
I0320 21:25:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:25:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:25:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 21:25:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:25:14.456501  543705 disk_worker.go:494] system disk:vda1
I0320 21:25:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:25:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:25:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:25:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:25:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:25:23.409765  543705 memory.go:184] no items to output this cycle
I0320 21:25:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 21:25:26.217678  543705 disk_info.go:125] begin check local disk info of client
I0320 21:25:26.220118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:25:26.220124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037cb00 0xc00037cb40]
E0320 21:25:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:25:33.409801  543705 memory.go:184] no items to output this cycle
I0320 21:25:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 21:25:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:25:43.409920  543705 memory.go:191] Add success.
I0320 21:25:43.409936  543705 cpu.go:282] Add success.
I0320 21:25:43.419754  543705 net.go:648] Add success.
I0320 21:25:43.422269  543705 net.go:770] primary dev: ETH0
I0320 21:25:43.422284  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:25:43.422297  543705 net.go:698] Add success.
I0320 21:25:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:25:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:25:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:25:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:25:53.409782  543705 memory.go:184] no items to output this cycle
I0320 21:25:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 21:26:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:26:03.409781  543705 memory.go:184] no items to output this cycle
I0320 21:26:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 21:26:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:26:13.409799  543705 memory.go:191] Add success.
I0320 21:26:13.409821  543705 cpu.go:282] Add success.
W0320 21:26:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:26:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:26:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:26:13.420208  543705 net.go:648] Add success.
I0320 21:26:13.423142  543705 net.go:770] primary dev: ETH0
I0320 21:26:13.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:26:13.423171  543705 net.go:698] Add success.
I0320 21:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:26:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:26:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 21:26:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:26:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 21:26:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:26:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:26:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:26:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:26:23.409770  543705 memory.go:184] no items to output this cycle
I0320 21:26:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 21:26:26.221675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:26:26.224213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:26:26.224219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5680 0xc0000c56c0]
E0320 21:26:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:26:33.409777  543705 memory.go:184] no items to output this cycle
I0320 21:26:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 21:26:43.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:26:43.409913  543705 cpu.go:282] Add success.
I0320 21:26:43.409945  543705 memory.go:191] Add success.
I0320 21:26:43.419739  543705 net.go:648] Add success.
I0320 21:26:43.422476  543705 net.go:770] primary dev: ETH0
I0320 21:26:43.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:26:43.422501  543705 net.go:698] Add success.
I0320 21:26:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:26:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:26:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:26:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:26:53.409794  543705 memory.go:184] no items to output this cycle
I0320 21:26:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 21:27:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:27:03.409781  543705 memory.go:184] no items to output this cycle
I0320 21:27:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 21:27:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:27:13.409812  543705 memory.go:191] Add success.
I0320 21:27:13.409815  543705 cpu.go:282] Add success.
W0320 21:27:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:27:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:27:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:27:13.420137  543705 net.go:648] Add success.
I0320 21:27:13.422865  543705 net.go:770] primary dev: ETH0
I0320 21:27:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:27:13.422895  543705 net.go:698] Add success.
I0320 21:27:13.428871  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 21:27:13.453046  543705 event_worker.go:152] Polling the log file for events...
I0320 21:27:13.463283  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d2d348c0-4c64-4492-a2f0-7218a20c57f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:27:13.463315  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 21:27:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:27:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 21:27:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:27:14.456868  543705 disk_worker.go:494] system disk:vda1
E0320 21:27:14.456888  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:27:14.456896  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:27:14.456900  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:27:14.456914  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:27:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:27:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:27:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:27:16.457980  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:27:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:27:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:27:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:27:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:27:23.409793  543705 memory.go:184] no items to output this cycle
I0320 21:27:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 21:27:26.225677  543705 disk_info.go:125] begin check local disk info of client
I0320 21:27:26.228265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:27:26.228271  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037dc00 0xc00037dc40]
E0320 21:27:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:27:33.409770  543705 memory.go:184] no items to output this cycle
I0320 21:27:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 21:27:38.625731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:27:38.625737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:27:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:27:43.410787  543705 memory.go:191] Add success.
I0320 21:27:43.409971  543705 cpu.go:282] Add success.
I0320 21:27:43.419701  543705 net.go:648] Add success.
I0320 21:27:43.422194  543705 net.go:770] primary dev: ETH0
I0320 21:27:43.422206  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:27:43.422218  543705 net.go:698] Add success.
I0320 21:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:27:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:27:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:27:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:27:53.409771  543705 memory.go:184] no items to output this cycle
I0320 21:27:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 21:28:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:28:03.409788  543705 memory.go:184] no items to output this cycle
I0320 21:28:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 21:28:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:28:13.409839  543705 memory.go:191] Add success.
I0320 21:28:13.409844  543705 cpu.go:282] Add success.
W0320 21:28:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:28:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:28:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:28:13.420177  543705 net.go:648] Add success.
I0320 21:28:13.423314  543705 net.go:770] primary dev: ETH0
I0320 21:28:13.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:28:13.423339  543705 net.go:698] Add success.
I0320 21:28:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:28:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:28:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 21:28:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:28:14.456569  543705 disk_worker.go:494] system disk:vda1
I0320 21:28:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:28:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:28:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:28:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:28:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:28:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:28:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:28:23.409783  543705 memory.go:184] no items to output this cycle
I0320 21:28:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 21:28:26.229692  543705 disk_info.go:125] begin check local disk info of client
I0320 21:28:26.232182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:28:26.232188  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f200 0xc00029f240]
E0320 21:28:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:28:33.409776  543705 memory.go:184] no items to output this cycle
I0320 21:28:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 21:28:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:28:43.409912  543705 cpu.go:282] Add success.
I0320 21:28:43.409921  543705 memory.go:191] Add success.
I0320 21:28:43.419728  543705 net.go:648] Add success.
I0320 21:28:43.422413  543705 net.go:770] primary dev: ETH0
I0320 21:28:43.422428  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:28:43.422442  543705 net.go:698] Add success.
I0320 21:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:28:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:28:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:28:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 21:28:53.409793  543705 memory.go:184] no items to output this cycle
E0320 21:29:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:29:03.409804  543705 memory.go:184] no items to output this cycle
I0320 21:29:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 21:29:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:29:13.409799  543705 cpu.go:282] Add success.
I0320 21:29:13.409805  543705 memory.go:191] Add success.
W0320 21:29:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:29:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:29:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:29:13.420296  543705 net.go:648] Add success.
I0320 21:29:13.422895  543705 net.go:770] primary dev: ETH0
I0320 21:29:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:29:13.422924  543705 net.go:698] Add success.
I0320 21:29:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:29:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:29:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0320 21:29:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:29:14.456553  543705 disk_worker.go:494] system disk:vda1
I0320 21:29:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:29:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:29:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:29:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:29:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:29:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:29:23.409775  543705 memory.go:184] no items to output this cycle
I0320 21:29:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 21:29:26.233678  543705 disk_info.go:125] begin check local disk info of client
I0320 21:29:26.236155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:29:26.236161  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460140 0xc000460180]
E0320 21:29:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:29:33.409794  543705 memory.go:184] no items to output this cycle
I0320 21:29:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 21:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:29:43.409796  543705 memory.go:191] Add success.
I0320 21:29:43.409799  543705 cpu.go:282] Add success.
I0320 21:29:43.419830  543705 net.go:648] Add success.
I0320 21:29:43.422572  543705 net.go:770] primary dev: ETH0
I0320 21:29:43.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:29:43.422601  543705 net.go:698] Add success.
I0320 21:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:29:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:29:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:29:53.410332  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:29:53.410349  543705 memory.go:184] no items to output this cycle
I0320 21:29:53.410377  543705 cpu.go:275] no items to output this cycle
E0320 21:30:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:30:03.409785  543705 memory.go:184] no items to output this cycle
I0320 21:30:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 21:30:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:30:13.409823  543705 memory.go:191] Add success.
I0320 21:30:13.409830  543705 cpu.go:282] Add success.
W0320 21:30:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:30:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:30:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:30:13.420186  543705 net.go:648] Add success.
I0320 21:30:13.422817  543705 net.go:770] primary dev: ETH0
I0320 21:30:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:30:13.422842  543705 net.go:698] Add success.
I0320 21:30:13.469313  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b441df17-4a61-4fbe-8886-c8d6b17310cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:30:13.469347  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:30:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:30:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:30:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 21:30:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:30:14.456569  543705 disk_worker.go:494] system disk:vda1
I0320 21:30:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:30:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:30:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:30:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:30:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:30:23.409810  543705 memory.go:184] no items to output this cycle
I0320 21:30:23.409822  543705 cpu.go:275] no items to output this cycle
I0320 21:30:26.237672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:30:26.240070  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:30:26.240077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486a40 0xc000486a80]
E0320 21:30:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:30:33.409788  543705 cpu.go:275] no items to output this cycle
I0320 21:30:33.409792  543705 memory.go:184] no items to output this cycle
I0320 21:30:38.626830  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:30:38.626837  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:30:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:30:43.410765  543705 memory.go:191] Add success.
I0320 21:30:43.409812  543705 cpu.go:282] Add success.
I0320 21:30:43.420444  543705 net.go:648] Add success.
I0320 21:30:43.423477  543705 net.go:770] primary dev: ETH0
I0320 21:30:43.423491  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:30:43.423503  543705 net.go:698] Add success.
I0320 21:30:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:30:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:30:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:30:53.409767  543705 memory.go:184] no items to output this cycle
I0320 21:30:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 21:31:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:31:03.409791  543705 memory.go:184] no items to output this cycle
I0320 21:31:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 21:31:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:31:13.409784  543705 memory.go:191] Add success.
I0320 21:31:13.409806  543705 cpu.go:282] Add success.
W0320 21:31:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:31:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:31:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:31:13.420136  543705 net.go:648] Add success.
I0320 21:31:13.422887  543705 net.go:770] primary dev: ETH0
I0320 21:31:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:31:13.422911  543705 net.go:698] Add success.
I0320 21:31:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:31:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:31:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 21:31:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:31:14.456516  543705 disk_worker.go:494] system disk:vda1
I0320 21:31:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:31:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:31:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:31:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:31:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:31:23.409810  543705 memory.go:184] no items to output this cycle
I0320 21:31:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 21:31:26.241674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:31:26.244143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:31:26.244149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f700 0xc00029f740]
E0320 21:31:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:31:33.409800  543705 memory.go:184] no items to output this cycle
I0320 21:31:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 21:31:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:31:43.409796  543705 memory.go:191] Add success.
I0320 21:31:43.409800  543705 cpu.go:282] Add success.
I0320 21:31:43.419980  543705 net.go:648] Add success.
I0320 21:31:43.422760  543705 net.go:770] primary dev: ETH0
I0320 21:31:43.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:31:43.422786  543705 net.go:698] Add success.
I0320 21:31:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:31:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:31:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:31:53.409781  543705 memory.go:184] no items to output this cycle
I0320 21:31:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 21:32:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:32:03.409781  543705 memory.go:184] no items to output this cycle
I0320 21:32:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 21:32:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:32:13.409818  543705 memory.go:191] Add success.
I0320 21:32:13.409821  543705 cpu.go:282] Add success.
W0320 21:32:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:32:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:32:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:32:13.420249  543705 net.go:648] Add success.
I0320 21:32:13.423104  543705 net.go:770] primary dev: ETH0
I0320 21:32:13.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:32:13.423133  543705 net.go:698] Add success.
W0320 21:32:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:32:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 21:32:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:32:14.456806  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:32:14.456815  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:32:14.456822  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:32:14.456867  543705 disk_worker.go:494] system disk:vda1
I0320 21:32:14.456909  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:32:15.456846  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:32:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:32:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:32:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:32:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:32:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:32:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:32:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:32:23.409803  543705 memory.go:184] no items to output this cycle
I0320 21:32:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 21:32:26.245674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:32:26.248196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:32:26.248203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab040 0xc0001ab080]
E0320 21:32:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:32:33.409803  543705 memory.go:184] no items to output this cycle
I0320 21:32:33.409809  543705 cpu.go:275] no items to output this cycle
E0320 21:32:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:32:43.409774  543705 memory.go:191] Add success.
I0320 21:32:43.409801  543705 cpu.go:282] Add success.
I0320 21:32:43.419876  543705 net.go:648] Add success.
I0320 21:32:43.422523  543705 net.go:770] primary dev: ETH0
I0320 21:32:43.422537  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:32:43.422548  543705 net.go:698] Add success.
I0320 21:32:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:32:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:32:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:32:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:32:53.409768  543705 memory.go:184] no items to output this cycle
I0320 21:32:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 21:33:03.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:33:03.409938  543705 cpu.go:275] no items to output this cycle
I0320 21:33:03.409967  543705 memory.go:184] no items to output this cycle
E0320 21:33:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:33:13.409818  543705 memory.go:191] Add success.
I0320 21:33:13.409829  543705 cpu.go:282] Add success.
W0320 21:33:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:33:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:33:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:33:13.420169  543705 net.go:648] Add success.
I0320 21:33:13.422734  543705 net.go:770] primary dev: ETH0
I0320 21:33:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:33:13.422768  543705 net.go:698] Add success.
I0320 21:33:13.468695  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e9df1ee-5e2c-4c3f-9582-ec32ed4015fd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:33:13.468727  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:33:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:33:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 21:33:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:33:14.456508  543705 disk_worker.go:494] system disk:vda1
I0320 21:33:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:33:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:33:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:33:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:33:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:33:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 21:33:23.409791  543705 memory.go:184] no items to output this cycle
I0320 21:33:26.249675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:33:26.252175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:33:26.252181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5cc0 0xc0000c5d00]
E0320 21:33:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:33:33.409805  543705 memory.go:184] no items to output this cycle
I0320 21:33:33.409816  543705 cpu.go:275] no items to output this cycle
I0320 21:33:38.627844  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:33:38.627852  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:33:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:33:43.410838  543705 memory.go:191] Add success.
I0320 21:33:43.409820  543705 cpu.go:282] Add success.
I0320 21:33:43.420561  543705 net.go:648] Add success.
I0320 21:33:43.423199  543705 net.go:770] primary dev: ETH0
I0320 21:33:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:33:43.423224  543705 net.go:698] Add success.
I0320 21:33:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:33:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:33:53.410386  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:33:53.410403  543705 cpu.go:275] no items to output this cycle
I0320 21:33:53.410415  543705 memory.go:184] no items to output this cycle
E0320 21:34:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:34:03.409785  543705 memory.go:184] no items to output this cycle
I0320 21:34:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 21:34:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:34:13.409824  543705 memory.go:191] Add success.
I0320 21:34:13.409828  543705 cpu.go:282] Add success.
W0320 21:34:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:34:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:34:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:34:13.420227  543705 net.go:648] Add success.
I0320 21:34:13.422884  543705 net.go:770] primary dev: ETH0
I0320 21:34:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:34:13.422914  543705 net.go:698] Add success.
I0320 21:34:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:34:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:34:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 21:34:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:34:14.456579  543705 disk_worker.go:494] system disk:vda1
I0320 21:34:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:34:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:34:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:34:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:34:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:34:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:34:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:34:23.409800  543705 memory.go:184] no items to output this cycle
I0320 21:34:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 21:34:26.253673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:34:26.256235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:34:26.256241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2f00 0xc0002b2f40]
E0320 21:34:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:34:33.409782  543705 memory.go:184] no items to output this cycle
I0320 21:34:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 21:34:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:34:43.409809  543705 memory.go:191] Add success.
I0320 21:34:43.409819  543705 cpu.go:282] Add success.
I0320 21:34:43.419857  543705 net.go:648] Add success.
I0320 21:34:43.422677  543705 net.go:770] primary dev: ETH0
I0320 21:34:43.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:34:43.422709  543705 net.go:698] Add success.
I0320 21:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:34:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:34:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:34:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:34:53.409795  543705 memory.go:184] no items to output this cycle
I0320 21:34:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 21:35:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:35:03.409791  543705 cpu.go:275] no items to output this cycle
I0320 21:35:03.409794  543705 memory.go:184] no items to output this cycle
E0320 21:35:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:35:13.409781  543705 memory.go:191] Add success.
W0320 21:35:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 21:35:13.409809  543705 cpu.go:282] Add success.
W0320 21:35:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:35:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:35:13.420057  543705 net.go:648] Add success.
I0320 21:35:13.422686  543705 net.go:770] primary dev: ETH0
I0320 21:35:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:35:13.422710  543705 net.go:698] Add success.
I0320 21:35:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:35:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:35:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 21:35:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:35:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 21:35:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:35:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:35:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:35:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:35:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:35:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:35:23.409797  543705 memory.go:184] no items to output this cycle
I0320 21:35:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 21:35:26.257673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:35:26.260188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:35:26.260194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2ec0 0xc0002b2f00]
E0320 21:35:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:35:33.409764  543705 memory.go:184] no items to output this cycle
I0320 21:35:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 21:35:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:35:43.409823  543705 memory.go:191] Add success.
I0320 21:35:43.409827  543705 cpu.go:282] Add success.
I0320 21:35:43.419871  543705 net.go:648] Add success.
I0320 21:35:43.422687  543705 net.go:770] primary dev: ETH0
I0320 21:35:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:35:43.422710  543705 net.go:698] Add success.
I0320 21:35:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:35:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:35:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:35:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:35:53.409793  543705 memory.go:184] no items to output this cycle
I0320 21:35:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 21:36:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:36:03.409785  543705 memory.go:184] no items to output this cycle
I0320 21:36:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 21:36:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:36:13.409803  543705 cpu.go:282] Add success.
I0320 21:36:13.409805  543705 memory.go:191] Add success.
W0320 21:36:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:36:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:36:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:36:13.420139  543705 net.go:648] Add success.
I0320 21:36:13.422935  543705 net.go:770] primary dev: ETH0
I0320 21:36:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:36:13.422962  543705 net.go:698] Add success.
I0320 21:36:13.469471  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1a1faa47-f056-4699-93c2-ea89a653f508","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:36:13.469504  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:36:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:36:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:36:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0320 21:36:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:36:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 21:36:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:36:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:36:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:36:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:36:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:36:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:36:23.409778  543705 memory.go:184] no items to output this cycle
I0320 21:36:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 21:36:26.261674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:36:26.264207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:36:26.264213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352f00 0xc000352f40]
E0320 21:36:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:36:33.409809  543705 memory.go:184] no items to output this cycle
I0320 21:36:33.409823  543705 cpu.go:275] no items to output this cycle
I0320 21:36:38.628847  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:36:38.628854  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:36:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:36:43.410638  543705 memory.go:191] Add success.
I0320 21:36:43.409820  543705 cpu.go:282] Add success.
I0320 21:36:43.420339  543705 net.go:648] Add success.
I0320 21:36:43.422882  543705 net.go:770] primary dev: ETH0
I0320 21:36:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:36:43.422910  543705 net.go:698] Add success.
I0320 21:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:36:53.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:36:53.409880  543705 memory.go:184] no items to output this cycle
I0320 21:36:53.409948  543705 cpu.go:275] no items to output this cycle
E0320 21:37:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:37:03.409821  543705 memory.go:184] no items to output this cycle
I0320 21:37:03.409836  543705 cpu.go:275] no items to output this cycle
E0320 21:37:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:37:13.409796  543705 cpu.go:282] Add success.
I0320 21:37:13.409799  543705 memory.go:191] Add success.
W0320 21:37:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:37:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:37:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:37:13.420110  543705 net.go:648] Add success.
I0320 21:37:13.423145  543705 net.go:770] primary dev: ETH0
I0320 21:37:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:37:13.423171  543705 net.go:698] Add success.
I0320 21:37:13.453669  543705 event_worker.go:152] Polling the log file for events...
W0320 21:37:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:37:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 21:37:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:37:14.457030  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:37:14.457040  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:37:14.457047  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:37:14.457095  543705 disk_worker.go:494] system disk:vda1
I0320 21:37:14.457141  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:37:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:37:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:37:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:37:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:37:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:37:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:37:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:37:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:37:23.409797  543705 memory.go:184] no items to output this cycle
I0320 21:37:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 21:37:26.265672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:37:26.268160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:37:26.268165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b35c0 0xc0002b3600]
E0320 21:37:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:37:33.409795  543705 memory.go:184] no items to output this cycle
I0320 21:37:33.409807  543705 cpu.go:275] no items to output this cycle
E0320 21:37:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:37:43.409773  543705 memory.go:191] Add success.
I0320 21:37:43.409806  543705 cpu.go:282] Add success.
I0320 21:37:43.419854  543705 net.go:648] Add success.
I0320 21:37:43.422497  543705 net.go:770] primary dev: ETH0
I0320 21:37:43.422509  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:37:43.422523  543705 net.go:698] Add success.
I0320 21:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:37:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:37:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:37:53.409778  543705 memory.go:184] no items to output this cycle
I0320 21:37:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 21:38:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:38:03.409815  543705 memory.go:184] no items to output this cycle
I0320 21:38:03.409826  543705 cpu.go:275] no items to output this cycle
E0320 21:38:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:38:13.409788  543705 memory.go:191] Add success.
I0320 21:38:13.409807  543705 cpu.go:282] Add success.
W0320 21:38:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:38:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:38:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:38:13.420243  543705 net.go:648] Add success.
I0320 21:38:13.423017  543705 net.go:770] primary dev: ETH0
I0320 21:38:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:38:13.423046  543705 net.go:698] Add success.
I0320 21:38:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:38:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:38:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 21:38:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:38:14.456591  543705 disk_worker.go:494] system disk:vda1
I0320 21:38:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:38:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:38:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:38:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:38:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:38:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:38:23.409785  543705 memory.go:184] no items to output this cycle
I0320 21:38:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 21:38:26.269677  543705 disk_info.go:125] begin check local disk info of client
I0320 21:38:26.272150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:38:26.272156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278ac0 0xc000278b00]
E0320 21:38:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:38:33.409770  543705 memory.go:184] no items to output this cycle
I0320 21:38:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 21:38:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:38:43.409820  543705 memory.go:191] Add success.
I0320 21:38:43.409826  543705 cpu.go:282] Add success.
I0320 21:38:43.419954  543705 net.go:648] Add success.
I0320 21:38:43.422606  543705 net.go:770] primary dev: ETH0
I0320 21:38:43.422619  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:38:43.422631  543705 net.go:698] Add success.
I0320 21:38:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:38:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:38:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:38:53.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:38:53.410265  543705 memory.go:184] no items to output this cycle
I0320 21:38:53.410289  543705 cpu.go:275] no items to output this cycle
E0320 21:39:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:39:03.409813  543705 memory.go:184] no items to output this cycle
I0320 21:39:03.409826  543705 cpu.go:275] no items to output this cycle
E0320 21:39:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:39:13.409822  543705 memory.go:191] Add success.
I0320 21:39:13.409822  543705 cpu.go:282] Add success.
W0320 21:39:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:39:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:39:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:39:13.420219  543705 net.go:648] Add success.
I0320 21:39:13.422822  543705 net.go:770] primary dev: ETH0
I0320 21:39:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:39:13.422851  543705 net.go:698] Add success.
I0320 21:39:13.469755  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5fc8f91d-21cb-44d1-935b-f514917a1f96","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:39:13.469788  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:39:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:39:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:39:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 21:39:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:39:14.456627  543705 disk_worker.go:494] system disk:vda1
I0320 21:39:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:39:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:39:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:39:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:39:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:39:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:39:23.409766  543705 memory.go:184] no items to output this cycle
I0320 21:39:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 21:39:26.273670  543705 disk_info.go:125] begin check local disk info of client
I0320 21:39:26.276162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:39:26.276167  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046d900 0xc00046d940]
E0320 21:39:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:39:33.409797  543705 memory.go:184] no items to output this cycle
I0320 21:39:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 21:39:38.629744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:39:38.629752  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:39:43.409924  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:39:43.410680  543705 memory.go:191] Add success.
I0320 21:39:43.410064  543705 cpu.go:282] Add success.
I0320 21:39:43.419719  543705 net.go:648] Add success.
I0320 21:39:43.422578  543705 net.go:770] primary dev: ETH0
I0320 21:39:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:39:43.422602  543705 net.go:698] Add success.
I0320 21:39:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:39:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:39:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:39:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:39:53.409770  543705 memory.go:184] no items to output this cycle
I0320 21:39:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 21:40:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:40:03.409775  543705 memory.go:184] no items to output this cycle
I0320 21:40:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 21:40:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:40:13.409818  543705 memory.go:191] Add success.
I0320 21:40:13.409827  543705 cpu.go:282] Add success.
W0320 21:40:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:40:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:40:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:40:13.420347  543705 net.go:648] Add success.
I0320 21:40:13.423018  543705 net.go:770] primary dev: ETH0
I0320 21:40:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:40:13.423043  543705 net.go:698] Add success.
I0320 21:40:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:40:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:40:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0320 21:40:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:40:14.456618  543705 disk_worker.go:494] system disk:vda1
I0320 21:40:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:40:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:40:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:40:16.472478  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:40:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:40:23.409786  543705 memory.go:184] no items to output this cycle
I0320 21:40:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 21:40:26.277674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:40:26.280194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:40:26.280201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000346380 0xc0003463c0]
E0320 21:40:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:40:33.409772  543705 memory.go:184] no items to output this cycle
I0320 21:40:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 21:40:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:40:43.409793  543705 cpu.go:282] Add success.
I0320 21:40:43.409794  543705 memory.go:191] Add success.
I0320 21:40:43.419979  543705 net.go:648] Add success.
I0320 21:40:43.422889  543705 net.go:770] primary dev: ETH0
I0320 21:40:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:40:43.422918  543705 net.go:698] Add success.
I0320 21:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:40:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:40:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:40:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:40:53.409763  543705 memory.go:184] no items to output this cycle
I0320 21:40:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 21:41:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:41:03.409813  543705 memory.go:184] no items to output this cycle
I0320 21:41:03.409824  543705 cpu.go:275] no items to output this cycle
E0320 21:41:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:41:13.409798  543705 memory.go:191] Add success.
I0320 21:41:13.409799  543705 cpu.go:282] Add success.
W0320 21:41:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:41:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:41:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:41:13.420143  543705 net.go:648] Add success.
I0320 21:41:13.422712  543705 net.go:770] primary dev: ETH0
I0320 21:41:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:41:13.422741  543705 net.go:698] Add success.
I0320 21:41:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:41:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:41:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 21:41:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:41:14.456596  543705 disk_worker.go:494] system disk:vda1
I0320 21:41:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:41:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:41:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:41:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:41:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:41:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:41:23.409768  543705 memory.go:184] no items to output this cycle
I0320 21:41:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 21:41:26.281673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:41:26.284125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:41:26.284132  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa0c0 0xc0001aa100]
E0320 21:41:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:41:33.409767  543705 memory.go:184] no items to output this cycle
I0320 21:41:33.409806  543705 cpu.go:275] no items to output this cycle
E0320 21:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:41:43.409811  543705 memory.go:191] Add success.
I0320 21:41:43.409824  543705 cpu.go:282] Add success.
I0320 21:41:43.419934  543705 net.go:648] Add success.
I0320 21:41:43.422761  543705 net.go:770] primary dev: ETH0
I0320 21:41:43.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:41:43.422786  543705 net.go:698] Add success.
I0320 21:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:41:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:41:53.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:41:53.410380  543705 memory.go:184] no items to output this cycle
I0320 21:41:53.410383  543705 cpu.go:275] no items to output this cycle
E0320 21:42:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:42:03.409777  543705 memory.go:184] no items to output this cycle
I0320 21:42:03.409800  543705 cpu.go:275] no items to output this cycle
E0320 21:42:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:42:13.409815  543705 memory.go:191] Add success.
I0320 21:42:13.409822  543705 cpu.go:282] Add success.
W0320 21:42:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:42:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:42:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:42:13.420108  543705 net.go:648] Add success.
I0320 21:42:13.423018  543705 net.go:770] primary dev: ETH0
I0320 21:42:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:42:13.423056  543705 net.go:698] Add success.
I0320 21:42:13.469917  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e1cfd884-7c8f-4a38-b00f-0cf48e0f86e0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:42:13.469955  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 21:42:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:42:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0320 21:42:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:42:14.456910  543705 disk_worker.go:494] system disk:vda1
E0320 21:42:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:42:14.456927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:42:14.456931  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:42:14.456954  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:42:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:42:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:42:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:42:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:42:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:42:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:42:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:42:23.410380  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:42:23.410405  543705 memory.go:184] no items to output this cycle
I0320 21:42:23.410408  543705 cpu.go:275] no items to output this cycle
I0320 21:42:26.285674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:42:26.288145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:42:26.288151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa8c0 0xc0001aa900]
E0320 21:42:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:42:33.409774  543705 memory.go:184] no items to output this cycle
I0320 21:42:33.409776  543705 cpu.go:275] no items to output this cycle
I0320 21:42:38.630845  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:42:38.630851  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:42:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:42:43.410777  543705 memory.go:191] Add success.
I0320 21:42:43.409789  543705 cpu.go:282] Add success.
I0320 21:42:43.420460  543705 net.go:648] Add success.
I0320 21:42:43.423078  543705 net.go:770] primary dev: ETH0
I0320 21:42:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:42:43.423104  543705 net.go:698] Add success.
I0320 21:42:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:42:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:42:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:42:53.409778  543705 memory.go:184] no items to output this cycle
I0320 21:42:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 21:43:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:43:03.409804  543705 memory.go:184] no items to output this cycle
I0320 21:43:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 21:43:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:43:13.409785  543705 memory.go:191] Add success.
I0320 21:43:13.409802  543705 cpu.go:282] Add success.
W0320 21:43:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:43:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:43:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:43:13.420132  543705 net.go:648] Add success.
I0320 21:43:13.423133  543705 net.go:770] primary dev: ETH0
I0320 21:43:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:43:13.423157  543705 net.go:698] Add success.
I0320 21:43:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:43:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:43:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 21:43:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:43:14.456860  543705 disk_worker.go:494] system disk:vda1
I0320 21:43:14.456894  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:43:15.455947  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:43:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:43:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:43:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:43:23.409762  543705 memory.go:184] no items to output this cycle
I0320 21:43:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 21:43:26.289677  543705 disk_info.go:125] begin check local disk info of client
I0320 21:43:26.292090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:43:26.292096  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a9080 0xc0002a90c0]
E0320 21:43:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:43:33.409799  543705 memory.go:184] no items to output this cycle
I0320 21:43:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 21:43:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:43:43.409811  543705 memory.go:191] Add success.
I0320 21:43:43.409817  543705 cpu.go:282] Add success.
I0320 21:43:43.419905  543705 net.go:648] Add success.
I0320 21:43:43.423076  543705 net.go:770] primary dev: ETH0
I0320 21:43:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:43:43.423107  543705 net.go:698] Add success.
I0320 21:43:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:43:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:43:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:43:53.410202  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:43:53.410229  543705 memory.go:184] no items to output this cycle
I0320 21:43:53.410235  543705 cpu.go:275] no items to output this cycle
E0320 21:44:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:44:03.409805  543705 memory.go:184] no items to output this cycle
I0320 21:44:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 21:44:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:44:13.409788  543705 memory.go:191] Add success.
I0320 21:44:13.409807  543705 cpu.go:282] Add success.
W0320 21:44:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:44:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:44:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:44:13.420219  543705 net.go:648] Add success.
I0320 21:44:13.422832  543705 net.go:770] primary dev: ETH0
I0320 21:44:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:44:13.422855  543705 net.go:698] Add success.
I0320 21:44:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:44:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:44:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 21:44:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:44:14.456614  543705 disk_worker.go:494] system disk:vda1
I0320 21:44:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:44:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:44:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:44:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:44:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:44:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 21:44:23.409782  543705 memory.go:184] no items to output this cycle
I0320 21:44:26.293676  543705 disk_info.go:125] begin check local disk info of client
I0320 21:44:26.296153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:44:26.296159  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b700 0xc00007b740]
E0320 21:44:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:44:33.409770  543705 memory.go:184] no items to output this cycle
I0320 21:44:33.409777  543705 cpu.go:275] no items to output this cycle
E0320 21:44:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:44:43.409789  543705 memory.go:191] Add success.
I0320 21:44:43.409791  543705 cpu.go:282] Add success.
I0320 21:44:43.419851  543705 net.go:648] Add success.
I0320 21:44:43.422734  543705 net.go:770] primary dev: ETH0
I0320 21:44:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:44:43.422759  543705 net.go:698] Add success.
I0320 21:44:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:44:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:44:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:44:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:44:53.409775  543705 memory.go:184] no items to output this cycle
I0320 21:44:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 21:45:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:45:03.409793  543705 memory.go:184] no items to output this cycle
I0320 21:45:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 21:45:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:45:13.409903  543705 cpu.go:282] Add success.
I0320 21:45:13.409914  543705 memory.go:191] Add success.
W0320 21:45:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:45:13.409987  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:45:13.409992  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:45:13.419746  543705 net.go:648] Add success.
I0320 21:45:13.422364  543705 net.go:770] primary dev: ETH0
I0320 21:45:13.422379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:45:13.422392  543705 net.go:698] Add success.
I0320 21:45:13.464030  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d17b432-c0cd-4dcb-a9d8-04539dbf695c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:45:13.464062  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:45:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:45:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:45:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 21:45:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:45:14.456485  543705 disk_worker.go:494] system disk:vda1
I0320 21:45:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:45:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:45:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:45:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:45:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:45:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:45:23.409786  543705 memory.go:184] no items to output this cycle
I0320 21:45:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 21:45:26.297675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:45:26.300161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:45:26.300167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1840 0xc0004b1880]
E0320 21:45:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:45:33.409784  543705 memory.go:184] no items to output this cycle
I0320 21:45:33.409804  543705 cpu.go:275] no items to output this cycle
I0320 21:45:38.631860  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:45:38.631867  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:45:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:45:43.410718  543705 memory.go:191] Add success.
I0320 21:45:43.409830  543705 cpu.go:282] Add success.
I0320 21:45:43.420442  543705 net.go:648] Add success.
I0320 21:45:43.423153  543705 net.go:770] primary dev: ETH0
I0320 21:45:43.423167  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:45:43.423179  543705 net.go:698] Add success.
I0320 21:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:45:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:45:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:45:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:45:53.409800  543705 memory.go:184] no items to output this cycle
I0320 21:45:53.409801  543705 cpu.go:275] no items to output this cycle
I0320 21:46:03.409894  543705 cpu.go:275] no items to output this cycle
E0320 21:46:03.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:46:03.409933  543705 memory.go:184] no items to output this cycle
E0320 21:46:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:46:13.409789  543705 memory.go:191] Add success.
I0320 21:46:13.409816  543705 cpu.go:282] Add success.
W0320 21:46:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:46:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:46:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:46:13.420174  543705 net.go:648] Add success.
I0320 21:46:13.423417  543705 net.go:770] primary dev: ETH0
I0320 21:46:13.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:46:13.423442  543705 net.go:698] Add success.
I0320 21:46:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:46:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:46:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0320 21:46:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:46:14.456589  543705 disk_worker.go:494] system disk:vda1
I0320 21:46:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:46:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:46:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:46:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:46:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:46:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:46:23.409793  543705 memory.go:184] no items to output this cycle
I0320 21:46:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 21:46:26.301673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:46:26.304249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:46:26.304256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbd40 0xc0001fbd80]
E0320 21:46:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:46:33.409771  543705 memory.go:184] no items to output this cycle
I0320 21:46:33.409791  543705 cpu.go:275] no items to output this cycle
E0320 21:46:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:46:43.409821  543705 memory.go:191] Add success.
I0320 21:46:43.409822  543705 cpu.go:282] Add success.
I0320 21:46:43.420030  543705 net.go:648] Add success.
I0320 21:46:43.422602  543705 net.go:770] primary dev: ETH0
I0320 21:46:43.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:46:43.422631  543705 net.go:698] Add success.
I0320 21:46:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:46:53.409858  543705 cpu.go:275] no items to output this cycle
E0320 21:46:53.409915  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:46:53.409927  543705 memory.go:184] no items to output this cycle
E0320 21:47:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:47:03.409788  543705 memory.go:184] no items to output this cycle
I0320 21:47:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 21:47:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:47:13.409818  543705 memory.go:191] Add success.
I0320 21:47:13.409822  543705 cpu.go:282] Add success.
W0320 21:47:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:47:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:47:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:47:13.420148  543705 net.go:648] Add success.
I0320 21:47:13.422970  543705 net.go:770] primary dev: ETH0
I0320 21:47:13.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:47:13.422997  543705 net.go:698] Add success.
I0320 21:47:13.453531  543705 event_worker.go:152] Polling the log file for events...
W0320 21:47:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:47:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 21:47:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:47:14.456807  543705 disk_worker.go:494] system disk:vda1
I0320 21:47:14.456847  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:47:14.457136  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:47:14.457144  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:47:14.457149  543705 custom_config.go:64] query custom config with name: gpu
E0320 21:47:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:47:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:47:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:47:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:47:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:47:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:47:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:47:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:47:23.409789  543705 memory.go:184] no items to output this cycle
I0320 21:47:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 21:47:26.305669  543705 disk_info.go:125] begin check local disk info of client
I0320 21:47:26.308166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:47:26.308172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370140 0xc000370180]
E0320 21:47:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:47:33.409798  543705 memory.go:184] no items to output this cycle
I0320 21:47:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 21:47:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:47:43.409809  543705 memory.go:191] Add success.
I0320 21:47:43.409818  543705 cpu.go:282] Add success.
I0320 21:47:43.419989  543705 net.go:648] Add success.
I0320 21:47:43.422814  543705 net.go:770] primary dev: ETH0
I0320 21:47:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:47:43.422839  543705 net.go:698] Add success.
I0320 21:47:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:47:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:47:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:47:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:47:53.409768  543705 memory.go:184] no items to output this cycle
I0320 21:47:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 21:48:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:48:03.409800  543705 cpu.go:275] no items to output this cycle
I0320 21:48:03.409811  543705 memory.go:184] no items to output this cycle
E0320 21:48:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:48:13.409831  543705 memory.go:191] Add success.
I0320 21:48:13.409840  543705 cpu.go:282] Add success.
W0320 21:48:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:48:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:48:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:48:13.420224  543705 net.go:648] Add success.
I0320 21:48:13.423181  543705 net.go:770] primary dev: ETH0
I0320 21:48:13.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:48:13.423224  543705 net.go:698] Add success.
I0320 21:48:13.471085  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"57f910c1-e8f8-4a8d-8710-56dd1c582052","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:48:13.471119  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:48:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:48:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:48:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0320 21:48:14.455247  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:48:14.456763  543705 disk_worker.go:494] system disk:vda1
I0320 21:48:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:48:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:48:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:48:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:48:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:48:23.409765  543705 memory.go:184] no items to output this cycle
I0320 21:48:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 21:48:26.309673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:48:26.312146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:48:26.312153  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461640 0xc000461680]
E0320 21:48:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:48:33.409797  543705 memory.go:184] no items to output this cycle
I0320 21:48:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 21:48:38.632008  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:48:38.632014  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:48:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:48:43.410566  543705 memory.go:191] Add success.
I0320 21:48:43.409804  543705 cpu.go:282] Add success.
I0320 21:48:43.420297  543705 net.go:648] Add success.
I0320 21:48:43.422793  543705 net.go:770] primary dev: ETH0
I0320 21:48:43.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:48:43.422818  543705 net.go:698] Add success.
I0320 21:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:48:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:48:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:48:53.409786  543705 memory.go:184] no items to output this cycle
I0320 21:48:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 21:49:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:49:03.409796  543705 memory.go:184] no items to output this cycle
I0320 21:49:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 21:49:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:49:13.409802  543705 memory.go:191] Add success.
I0320 21:49:13.409805  543705 cpu.go:282] Add success.
W0320 21:49:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:49:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:49:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:49:13.420204  543705 net.go:648] Add success.
I0320 21:49:13.423080  543705 net.go:770] primary dev: ETH0
I0320 21:49:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:49:13.423109  543705 net.go:698] Add success.
I0320 21:49:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:49:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:49:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 21:49:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:49:14.456566  543705 disk_worker.go:494] system disk:vda1
I0320 21:49:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:49:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:49:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:49:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:49:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:49:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:49:23.409782  543705 memory.go:184] no items to output this cycle
I0320 21:49:23.409786  543705 cpu.go:275] no items to output this cycle
I0320 21:49:26.313672  543705 disk_info.go:125] begin check local disk info of client
I0320 21:49:26.316149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:49:26.316155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004af3c0 0xc0004af400]
E0320 21:49:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:49:33.409779  543705 memory.go:184] no items to output this cycle
I0320 21:49:33.409792  543705 cpu.go:275] no items to output this cycle
E0320 21:49:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:49:43.409783  543705 memory.go:191] Add success.
I0320 21:49:43.409807  543705 cpu.go:282] Add success.
I0320 21:49:43.419874  543705 net.go:648] Add success.
I0320 21:49:43.422735  543705 net.go:770] primary dev: ETH0
I0320 21:49:43.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:49:43.422761  543705 net.go:698] Add success.
I0320 21:49:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:49:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:49:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:49:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:49:53.409773  543705 memory.go:184] no items to output this cycle
I0320 21:49:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 21:50:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:50:03.409803  543705 memory.go:184] no items to output this cycle
I0320 21:50:03.409917  543705 cpu.go:275] no items to output this cycle
E0320 21:50:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:50:13.409801  543705 memory.go:191] Add success.
I0320 21:50:13.409803  543705 cpu.go:282] Add success.
W0320 21:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:50:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:50:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:50:13.420072  543705 net.go:648] Add success.
I0320 21:50:13.423192  543705 net.go:770] primary dev: ETH0
I0320 21:50:13.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:50:13.423223  543705 net.go:698] Add success.
I0320 21:50:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:50:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:50:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 21:50:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:50:14.456578  543705 disk_worker.go:494] system disk:vda1
I0320 21:50:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:50:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:50:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:50:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:50:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:50:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:50:23.409799  543705 memory.go:184] no items to output this cycle
I0320 21:50:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 21:50:26.317678  543705 disk_info.go:125] begin check local disk info of client
I0320 21:50:26.320184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:50:26.320191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b02c0 0xc0003b0300]
E0320 21:50:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:50:33.409781  543705 memory.go:184] no items to output this cycle
I0320 21:50:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 21:50:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:50:43.409802  543705 memory.go:191] Add success.
I0320 21:50:43.409803  543705 cpu.go:282] Add success.
I0320 21:50:43.419983  543705 net.go:648] Add success.
I0320 21:50:43.422628  543705 net.go:770] primary dev: ETH0
I0320 21:50:43.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:50:43.422654  543705 net.go:698] Add success.
I0320 21:50:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:50:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:50:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:50:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:50:53.409782  543705 memory.go:184] no items to output this cycle
I0320 21:50:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 21:51:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:51:03.409802  543705 memory.go:184] no items to output this cycle
I0320 21:51:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 21:51:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:51:13.409807  543705 memory.go:191] Add success.
I0320 21:51:13.409808  543705 cpu.go:282] Add success.
W0320 21:51:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:51:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:51:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:51:13.420137  543705 net.go:648] Add success.
I0320 21:51:13.422894  543705 net.go:770] primary dev: ETH0
I0320 21:51:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:51:13.422920  543705 net.go:698] Add success.
I0320 21:51:13.488104  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6a19251c-6362-45ad-a55b-70f7a5162382","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:51:13.488138  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:51:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:51:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:51:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 21:51:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:51:14.456676  543705 disk_worker.go:494] system disk:vda1
I0320 21:51:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:51:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:51:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:51:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:51:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:51:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:51:23.409782  543705 memory.go:184] no items to output this cycle
I0320 21:51:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 21:51:26.321675  543705 disk_info.go:125] begin check local disk info of client
I0320 21:51:26.324163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:51:26.324169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342440 0xc000342480]
E0320 21:51:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:51:33.409778  543705 memory.go:184] no items to output this cycle
I0320 21:51:33.409782  543705 cpu.go:275] no items to output this cycle
I0320 21:51:38.632864  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:51:38.632870  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:51:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:51:43.410606  543705 memory.go:191] Add success.
I0320 21:51:43.409833  543705 cpu.go:282] Add success.
I0320 21:51:43.420367  543705 net.go:648] Add success.
I0320 21:51:43.423047  543705 net.go:770] primary dev: ETH0
I0320 21:51:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:51:43.423075  543705 net.go:698] Add success.
I0320 21:51:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:51:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:51:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:51:53.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:51:53.409917  543705 memory.go:184] no items to output this cycle
I0320 21:51:53.409991  543705 cpu.go:275] no items to output this cycle
E0320 21:52:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:52:03.409786  543705 memory.go:184] no items to output this cycle
I0320 21:52:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 21:52:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:52:13.409816  543705 memory.go:191] Add success.
I0320 21:52:13.409819  543705 cpu.go:282] Add success.
W0320 21:52:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:52:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:52:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:52:13.420222  543705 net.go:648] Add success.
I0320 21:52:13.423182  543705 net.go:770] primary dev: ETH0
I0320 21:52:13.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:52:13.423214  543705 net.go:698] Add success.
W0320 21:52:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:52:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 21:52:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:52:14.456800  543705 disk_worker.go:494] system disk:vda1
I0320 21:52:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:52:14.457123  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:52:14.457131  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:52:14.457136  543705 custom_config.go:64] query custom config with name: gpu
E0320 21:52:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:52:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:52:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:52:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:52:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:52:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:52:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:52:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:52:23.409799  543705 memory.go:184] no items to output this cycle
I0320 21:52:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 21:52:26.325676  543705 disk_info.go:125] begin check local disk info of client
I0320 21:52:26.328185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:52:26.328192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf380 0xc0003bf3c0]
E0320 21:52:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:52:33.409778  543705 cpu.go:275] no items to output this cycle
I0320 21:52:33.409786  543705 memory.go:184] no items to output this cycle
E0320 21:52:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:52:43.409807  543705 memory.go:191] Add success.
I0320 21:52:43.409818  543705 cpu.go:282] Add success.
I0320 21:52:43.420174  543705 net.go:648] Add success.
I0320 21:52:43.422796  543705 net.go:770] primary dev: ETH0
I0320 21:52:43.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:52:43.422821  543705 net.go:698] Add success.
I0320 21:52:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:52:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:52:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:52:53.410354  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:52:53.410371  543705 memory.go:184] no items to output this cycle
I0320 21:52:53.410388  543705 cpu.go:275] no items to output this cycle
E0320 21:53:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:53:03.409788  543705 memory.go:184] no items to output this cycle
I0320 21:53:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 21:53:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:53:13.409824  543705 memory.go:191] Add success.
I0320 21:53:13.409829  543705 cpu.go:282] Add success.
W0320 21:53:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:53:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:53:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:53:13.420155  543705 net.go:648] Add success.
I0320 21:53:13.422999  543705 net.go:770] primary dev: ETH0
I0320 21:53:13.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:53:13.423039  543705 net.go:698] Add success.
I0320 21:53:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:53:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:53:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 21:53:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:53:14.456483  543705 disk_worker.go:494] system disk:vda1
I0320 21:53:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:53:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:53:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:53:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:53:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:53:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:53:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:53:23.409805  543705 memory.go:184] no items to output this cycle
I0320 21:53:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 21:53:26.329674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:53:26.332198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:53:26.332204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002988c0 0xc000298900]
E0320 21:53:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:53:33.409804  543705 memory.go:184] no items to output this cycle
I0320 21:53:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 21:53:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:53:43.409787  543705 memory.go:191] Add success.
I0320 21:53:43.409806  543705 cpu.go:282] Add success.
I0320 21:53:43.420135  543705 net.go:648] Add success.
I0320 21:53:43.422830  543705 net.go:770] primary dev: ETH0
I0320 21:53:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:53:43.422855  543705 net.go:698] Add success.
I0320 21:53:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:53:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:53:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:53:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:53:53.409807  543705 memory.go:184] no items to output this cycle
I0320 21:53:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 21:54:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:54:03.409789  543705 memory.go:184] no items to output this cycle
I0320 21:54:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 21:54:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:54:13.409795  543705 memory.go:191] Add success.
W0320 21:54:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 21:54:13.409823  543705 cpu.go:282] Add success.
W0320 21:54:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:54:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:54:13.420126  543705 net.go:648] Add success.
I0320 21:54:13.422920  543705 net.go:770] primary dev: ETH0
I0320 21:54:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:54:13.422944  543705 net.go:698] Add success.
I0320 21:54:13.470943  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b03b9a2a-c85c-4002-86a1-7ed658fdf110","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:54:13.470977  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 21:54:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:54:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:54:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 21:54:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:54:14.456539  543705 disk_worker.go:494] system disk:vda1
I0320 21:54:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:54:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:54:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:54:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:54:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:54:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:54:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 21:54:23.409787  543705 memory.go:184] no items to output this cycle
I0320 21:54:26.333678  543705 disk_info.go:125] begin check local disk info of client
I0320 21:54:26.336130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:54:26.336136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003acc80 0xc0003accc0]
E0320 21:54:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:54:33.409782  543705 cpu.go:275] no items to output this cycle
I0320 21:54:33.409788  543705 memory.go:184] no items to output this cycle
I0320 21:54:38.633740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:54:38.633747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:54:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:54:43.410816  543705 memory.go:191] Add success.
I0320 21:54:43.409790  543705 cpu.go:282] Add success.
I0320 21:54:43.420594  543705 net.go:648] Add success.
I0320 21:54:43.423444  543705 net.go:770] primary dev: ETH0
I0320 21:54:43.423457  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:54:43.423468  543705 net.go:698] Add success.
I0320 21:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:54:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:54:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:54:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:54:53.409764  543705 memory.go:184] no items to output this cycle
I0320 21:54:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 21:55:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:55:03.409779  543705 memory.go:184] no items to output this cycle
I0320 21:55:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 21:55:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:55:13.409816  543705 memory.go:191] Add success.
I0320 21:55:13.409820  543705 cpu.go:282] Add success.
W0320 21:55:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:55:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:55:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:55:13.420191  543705 net.go:648] Add success.
I0320 21:55:13.422894  543705 net.go:770] primary dev: ETH0
I0320 21:55:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:55:13.422919  543705 net.go:698] Add success.
I0320 21:55:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:55:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:55:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 21:55:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:55:14.456590  543705 disk_worker.go:494] system disk:vda1
I0320 21:55:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:55:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:55:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:55:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:55:23.409794  543705 memory.go:184] no items to output this cycle
I0320 21:55:23.409805  543705 cpu.go:275] no items to output this cycle
I0320 21:55:26.337670  543705 disk_info.go:125] begin check local disk info of client
I0320 21:55:26.340195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:55:26.340203  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369cc0 0xc000369d00]
E0320 21:55:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:55:33.409767  543705 memory.go:184] no items to output this cycle
I0320 21:55:33.409785  543705 cpu.go:275] no items to output this cycle
E0320 21:55:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:55:43.409796  543705 memory.go:191] Add success.
I0320 21:55:43.409798  543705 cpu.go:282] Add success.
I0320 21:55:43.419877  543705 net.go:648] Add success.
I0320 21:55:43.422451  543705 net.go:770] primary dev: ETH0
I0320 21:55:43.422464  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:55:43.422475  543705 net.go:698] Add success.
I0320 21:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:55:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:55:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:55:53.409767  543705 memory.go:184] no items to output this cycle
I0320 21:55:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 21:56:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:56:03.409788  543705 memory.go:184] no items to output this cycle
I0320 21:56:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 21:56:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:56:13.409795  543705 memory.go:191] Add success.
I0320 21:56:13.409799  543705 cpu.go:282] Add success.
W0320 21:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:56:13.420069  543705 net.go:648] Add success.
I0320 21:56:13.423084  543705 net.go:770] primary dev: ETH0
I0320 21:56:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:56:13.423110  543705 net.go:698] Add success.
I0320 21:56:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:56:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:56:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 21:56:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:56:14.456560  543705 disk_worker.go:494] system disk:vda1
I0320 21:56:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:56:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:56:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:56:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:56:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:56:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:56:23.409778  543705 cpu.go:275] no items to output this cycle
I0320 21:56:23.409781  543705 memory.go:184] no items to output this cycle
I0320 21:56:26.341674  543705 disk_info.go:125] begin check local disk info of client
I0320 21:56:26.344123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:56:26.344129  543705 disk_info.go:196] parse disk info done, disk is : [0xc000497b40 0xc000497b80]
E0320 21:56:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:56:33.409773  543705 cpu.go:275] no items to output this cycle
I0320 21:56:33.409775  543705 memory.go:184] no items to output this cycle
E0320 21:56:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:56:43.409793  543705 memory.go:191] Add success.
I0320 21:56:43.409797  543705 cpu.go:282] Add success.
I0320 21:56:43.419992  543705 net.go:648] Add success.
I0320 21:56:43.422711  543705 net.go:770] primary dev: ETH0
I0320 21:56:43.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:56:43.422737  543705 net.go:698] Add success.
I0320 21:56:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:56:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:56:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:56:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:56:53.409771  543705 memory.go:184] no items to output this cycle
I0320 21:56:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 21:57:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:57:03.409786  543705 memory.go:184] no items to output this cycle
I0320 21:57:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 21:57:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:57:13.409785  543705 memory.go:191] Add success.
I0320 21:57:13.409789  543705 cpu.go:282] Add success.
W0320 21:57:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:57:13.412506  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:57:13.412512  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:57:13.420104  543705 net.go:648] Add success.
I0320 21:57:13.428176  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 21:57:13.428259  543705 net.go:770] primary dev: ETH0
I0320 21:57:13.428272  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:57:13.428284  543705 net.go:698] Add success.
I0320 21:57:13.452772  543705 event_worker.go:152] Polling the log file for events...
I0320 21:57:13.468883  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2874e436-cfb2-49ba-bff5-787d430486eb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 21:57:13.468917  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 21:57:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:57:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 21:57:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0320 21:57:14.456796  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 21:57:14.456805  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 21:57:14.456811  543705 custom_config.go:64] query custom config with name: gpu
I0320 21:57:14.456840  543705 disk_worker.go:494] system disk:vda1
I0320 21:57:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 21:57:15.456891  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 21:57:15.456900  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:57:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 21:57:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 21:57:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:57:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:57:16.472414  543705 disk_local_worker.go:436] Get disk info: []
I0320 21:57:23.409930  543705 cpu.go:275] no items to output this cycle
E0320 21:57:23.409931  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:57:23.409982  543705 memory.go:184] no items to output this cycle
I0320 21:57:26.345670  543705 disk_info.go:125] begin check local disk info of client
I0320 21:57:26.348112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:57:26.348118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004de4c0 0xc0004de500]
E0320 21:57:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:57:33.409778  543705 memory.go:184] no items to output this cycle
I0320 21:57:33.409781  543705 cpu.go:275] no items to output this cycle
I0320 21:57:38.633901  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 21:57:38.633907  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 21:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:57:43.410587  543705 memory.go:191] Add success.
I0320 21:57:43.409794  543705 cpu.go:282] Add success.
I0320 21:57:43.420322  543705 net.go:648] Add success.
I0320 21:57:43.423031  543705 net.go:770] primary dev: ETH0
I0320 21:57:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:57:43.423056  543705 net.go:698] Add success.
I0320 21:57:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:57:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:57:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:57:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:57:53.409759  543705 memory.go:184] no items to output this cycle
I0320 21:57:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 21:58:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:58:03.409793  543705 memory.go:184] no items to output this cycle
I0320 21:58:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 21:58:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:58:13.409806  543705 memory.go:191] Add success.
I0320 21:58:13.409807  543705 cpu.go:282] Add success.
W0320 21:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:58:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:58:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:58:13.420090  543705 net.go:648] Add success.
I0320 21:58:13.422923  543705 net.go:770] primary dev: ETH0
I0320 21:58:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:58:13.422949  543705 net.go:698] Add success.
I0320 21:58:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:58:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:58:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 21:58:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:58:14.456597  543705 disk_worker.go:494] system disk:vda1
I0320 21:58:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:58:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:58:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:58:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:58:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:58:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:58:23.409785  543705 memory.go:184] no items to output this cycle
I0320 21:58:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 21:58:26.349676  543705 disk_info.go:125] begin check local disk info of client
I0320 21:58:26.352148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:58:26.352154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dfac0 0xc0004dfb00]
E0320 21:58:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:58:33.409788  543705 memory.go:184] no items to output this cycle
I0320 21:58:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 21:58:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:58:43.409798  543705 memory.go:191] Add success.
I0320 21:58:43.409801  543705 cpu.go:282] Add success.
I0320 21:58:43.419985  543705 net.go:648] Add success.
I0320 21:58:43.422637  543705 net.go:770] primary dev: ETH0
I0320 21:58:43.422649  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:58:43.422662  543705 net.go:698] Add success.
I0320 21:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:58:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:58:53.409797  543705 memory.go:184] no items to output this cycle
I0320 21:58:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 21:59:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:59:03.409822  543705 memory.go:184] no items to output this cycle
I0320 21:59:03.409832  543705 cpu.go:275] no items to output this cycle
E0320 21:59:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:59:13.409779  543705 memory.go:191] Add success.
I0320 21:59:13.409814  543705 cpu.go:282] Add success.
W0320 21:59:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 21:59:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 21:59:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 21:59:13.420116  543705 net.go:648] Add success.
I0320 21:59:13.422857  543705 net.go:770] primary dev: ETH0
I0320 21:59:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:59:13.422882  543705 net.go:698] Add success.
I0320 21:59:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 21:59:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 21:59:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 21:59:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0320 21:59:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 21:59:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 21:59:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 21:59:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:59:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:59:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 21:59:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0320 21:59:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:59:23.409769  543705 memory.go:184] no items to output this cycle
I0320 21:59:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 21:59:26.353673  543705 disk_info.go:125] begin check local disk info of client
I0320 21:59:26.356103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 21:59:26.356109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb380 0xc0001fb3c0]
E0320 21:59:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:59:33.409776  543705 memory.go:184] no items to output this cycle
I0320 21:59:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 21:59:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:59:43.409812  543705 memory.go:191] Add success.
I0320 21:59:43.409822  543705 cpu.go:282] Add success.
I0320 21:59:43.419884  543705 net.go:648] Add success.
I0320 21:59:43.422870  543705 net.go:770] primary dev: ETH0
I0320 21:59:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0320 21:59:43.422895  543705 net.go:698] Add success.
I0320 21:59:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 21:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 21:59:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 21:59:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 21:59:53.410397  543705 memory.go:184] no items to output this cycle
I0320 21:59:53.410409  543705 cpu.go:275] no items to output this cycle
E0320 22:00:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:00:03.409797  543705 memory.go:184] no items to output this cycle
I0320 22:00:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 22:00:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:00:13.409807  543705 memory.go:191] Add success.
I0320 22:00:13.409808  543705 cpu.go:282] Add success.
W0320 22:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:00:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:00:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:00:13.420117  543705 net.go:648] Add success.
I0320 22:00:13.423034  543705 net.go:770] primary dev: ETH0
I0320 22:00:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:00:13.423059  543705 net.go:698] Add success.
I0320 22:00:13.463711  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"711cf543-89cd-4520-91b4-25c0ab28e286","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:00:13.463757  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:00:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:00:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:00:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 22:00:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:00:14.456948  543705 disk_worker.go:494] system disk:vda1
I0320 22:00:14.456985  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:00:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:00:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:00:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:00:23.409805  543705 memory.go:184] no items to output this cycle
I0320 22:00:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 22:00:26.357672  543705 disk_info.go:125] begin check local disk info of client
I0320 22:00:26.360166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:00:26.360171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004deac0 0xc0004deb00]
E0320 22:00:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:00:33.409803  543705 memory.go:184] no items to output this cycle
I0320 22:00:33.409813  543705 cpu.go:275] no items to output this cycle
I0320 22:00:38.634871  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:00:38.634878  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:00:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:00:43.410802  543705 memory.go:191] Add success.
I0320 22:00:43.409785  543705 cpu.go:282] Add success.
I0320 22:00:43.420467  543705 net.go:648] Add success.
I0320 22:00:43.423229  543705 net.go:770] primary dev: ETH0
I0320 22:00:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:00:43.423255  543705 net.go:698] Add success.
I0320 22:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:00:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:00:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:00:53.409799  543705 memory.go:184] no items to output this cycle
I0320 22:00:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 22:01:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:01:03.409793  543705 memory.go:184] no items to output this cycle
I0320 22:01:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 22:01:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:01:13.409787  543705 cpu.go:282] Add success.
I0320 22:01:13.409796  543705 memory.go:191] Add success.
W0320 22:01:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:01:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:01:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:01:13.420062  543705 net.go:648] Add success.
I0320 22:01:13.422885  543705 net.go:770] primary dev: ETH0
I0320 22:01:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:01:13.422912  543705 net.go:698] Add success.
I0320 22:01:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:01:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:01:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0320 22:01:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:01:14.459129  543705 disk_worker.go:494] system disk:vda1
I0320 22:01:14.459157  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:01:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:01:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:01:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:01:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:01:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:01:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:01:23.409769  543705 memory.go:184] no items to output this cycle
I0320 22:01:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 22:01:26.361675  543705 disk_info.go:125] begin check local disk info of client
I0320 22:01:26.364117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:01:26.364122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb440 0xc0001fb480]
E0320 22:01:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:01:33.409792  543705 memory.go:184] no items to output this cycle
I0320 22:01:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 22:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:01:43.409793  543705 memory.go:191] Add success.
I0320 22:01:43.409794  543705 cpu.go:282] Add success.
I0320 22:01:43.419875  543705 net.go:648] Add success.
I0320 22:01:43.422508  543705 net.go:770] primary dev: ETH0
I0320 22:01:43.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:01:43.422535  543705 net.go:698] Add success.
I0320 22:01:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:01:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:01:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:01:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:01:53.409784  543705 memory.go:184] no items to output this cycle
I0320 22:01:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 22:02:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:02:03.409791  543705 memory.go:184] no items to output this cycle
I0320 22:02:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 22:02:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:02:13.409794  543705 memory.go:191] Add success.
I0320 22:02:13.409798  543705 cpu.go:282] Add success.
W0320 22:02:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:02:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:02:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:02:13.420060  543705 net.go:648] Add success.
I0320 22:02:13.422536  543705 net.go:770] primary dev: ETH0
I0320 22:02:13.422548  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:02:13.422560  543705 net.go:698] Add success.
W0320 22:02:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:02:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0320 22:02:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:02:14.456159  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:02:14.456168  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:02:14.456175  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:02:14.456466  543705 disk_worker.go:494] system disk:vda1
I0320 22:02:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:02:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:02:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:02:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:02:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:02:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:02:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:02:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:02:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:02:23.409791  543705 memory.go:184] no items to output this cycle
I0320 22:02:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 22:02:26.365672  543705 disk_info.go:125] begin check local disk info of client
I0320 22:02:26.368111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:02:26.368118  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1300 0xc0004a1340]
E0320 22:02:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:02:33.409805  543705 memory.go:184] no items to output this cycle
I0320 22:02:33.409820  543705 cpu.go:275] no items to output this cycle
E0320 22:02:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:02:43.409823  543705 memory.go:191] Add success.
I0320 22:02:43.409825  543705 cpu.go:282] Add success.
I0320 22:02:43.419963  543705 net.go:648] Add success.
I0320 22:02:43.422482  543705 net.go:770] primary dev: ETH0
I0320 22:02:43.422496  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:02:43.422512  543705 net.go:698] Add success.
I0320 22:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:02:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:02:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:02:53.409781  543705 memory.go:184] no items to output this cycle
I0320 22:02:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 22:03:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:03:03.409776  543705 memory.go:184] no items to output this cycle
I0320 22:03:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 22:03:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:03:13.409786  543705 memory.go:191] Add success.
I0320 22:03:13.409807  543705 cpu.go:282] Add success.
W0320 22:03:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:03:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:03:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:03:13.420156  543705 net.go:648] Add success.
I0320 22:03:13.422750  543705 net.go:770] primary dev: ETH0
I0320 22:03:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:03:13.422778  543705 net.go:698] Add success.
I0320 22:03:13.468997  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"59192428-3630-4b7a-9816-8bf25288bd5d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:03:13.469032  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:03:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:03:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:03:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 22:03:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:03:14.456494  543705 disk_worker.go:494] system disk:vda1
I0320 22:03:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:03:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:03:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:03:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:03:23.409771  543705 memory.go:184] no items to output this cycle
I0320 22:03:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 22:03:26.369672  543705 disk_info.go:125] begin check local disk info of client
I0320 22:03:26.372157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:03:26.372162  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359780 0xc0003597c0]
E0320 22:03:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:03:33.409806  543705 memory.go:184] no items to output this cycle
I0320 22:03:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 22:03:38.635890  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:03:38.635897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:03:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:03:43.410573  543705 memory.go:191] Add success.
I0320 22:03:43.409785  543705 cpu.go:282] Add success.
I0320 22:03:43.420266  543705 net.go:648] Add success.
I0320 22:03:43.422937  543705 net.go:770] primary dev: ETH0
I0320 22:03:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:03:43.422967  543705 net.go:698] Add success.
I0320 22:03:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:03:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:03:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:03:53.409787  543705 memory.go:184] no items to output this cycle
I0320 22:03:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 22:04:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:04:03.409786  543705 memory.go:184] no items to output this cycle
I0320 22:04:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 22:04:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:04:13.409818  543705 memory.go:191] Add success.
I0320 22:04:13.409825  543705 cpu.go:282] Add success.
W0320 22:04:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:04:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:04:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:04:13.420179  543705 net.go:648] Add success.
I0320 22:04:13.423322  543705 net.go:770] primary dev: ETH0
I0320 22:04:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:04:13.423353  543705 net.go:698] Add success.
I0320 22:04:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:04:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:04:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0320 22:04:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:04:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 22:04:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:04:16.458041  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:04:16.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:04:16.458125  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:04:16.472470  543705 disk_local_worker.go:436] Get disk info: []
I0320 22:04:23.409871  543705 cpu.go:275] no items to output this cycle
E0320 22:04:23.409945  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:04:23.409961  543705 memory.go:184] no items to output this cycle
I0320 22:04:26.373672  543705 disk_info.go:125] begin check local disk info of client
I0320 22:04:26.376174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:04:26.376181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d1d80 0xc0003d1dc0]
E0320 22:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:04:33.409778  543705 cpu.go:275] no items to output this cycle
I0320 22:04:33.409785  543705 memory.go:184] no items to output this cycle
E0320 22:04:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:04:43.409777  543705 memory.go:191] Add success.
I0320 22:04:43.409800  543705 cpu.go:282] Add success.
I0320 22:04:43.419899  543705 net.go:648] Add success.
I0320 22:04:43.422653  543705 net.go:770] primary dev: ETH0
I0320 22:04:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:04:43.422678  543705 net.go:698] Add success.
I0320 22:04:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:04:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:04:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:04:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:04:53.409781  543705 memory.go:184] no items to output this cycle
I0320 22:04:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 22:05:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:05:03.409788  543705 memory.go:184] no items to output this cycle
I0320 22:05:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 22:05:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:05:13.409801  543705 memory.go:191] Add success.
I0320 22:05:13.409806  543705 cpu.go:282] Add success.
W0320 22:05:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:05:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:05:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:05:13.420199  543705 net.go:648] Add success.
I0320 22:05:13.422989  543705 net.go:770] primary dev: ETH0
I0320 22:05:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:05:13.423014  543705 net.go:698] Add success.
I0320 22:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:05:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:05:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 22:05:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:05:14.456486  543705 disk_worker.go:494] system disk:vda1
I0320 22:05:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:05:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:05:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:05:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:05:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:05:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:05:23.409803  543705 memory.go:184] no items to output this cycle
I0320 22:05:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 22:05:26.377675  543705 disk_info.go:125] begin check local disk info of client
I0320 22:05:26.380156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:05:26.380163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354800 0xc000354840]
E0320 22:05:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:05:33.409785  543705 memory.go:184] no items to output this cycle
I0320 22:05:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 22:05:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:05:43.409789  543705 memory.go:191] Add success.
I0320 22:05:43.409816  543705 cpu.go:282] Add success.
I0320 22:05:43.419952  543705 net.go:648] Add success.
I0320 22:05:43.422839  543705 net.go:770] primary dev: ETH0
I0320 22:05:43.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:05:43.422863  543705 net.go:698] Add success.
I0320 22:05:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:05:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:05:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:05:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:05:53.409811  543705 memory.go:184] no items to output this cycle
I0320 22:05:53.409822  543705 cpu.go:275] no items to output this cycle
E0320 22:06:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:06:03.409784  543705 memory.go:184] no items to output this cycle
I0320 22:06:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 22:06:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:06:13.409819  543705 memory.go:191] Add success.
I0320 22:06:13.409826  543705 cpu.go:282] Add success.
W0320 22:06:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:06:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:06:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:06:13.420127  543705 net.go:648] Add success.
I0320 22:06:13.422916  543705 net.go:770] primary dev: ETH0
I0320 22:06:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:06:13.422940  543705 net.go:698] Add success.
I0320 22:06:13.469202  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"45ff0129-96aa-48a4-93fe-76d5d306b648","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:06:13.469237  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:06:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:06:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:06:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 22:06:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:06:14.456544  543705 disk_worker.go:494] system disk:vda1
I0320 22:06:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:06:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:06:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:06:23.409781  543705 memory.go:184] no items to output this cycle
I0320 22:06:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 22:06:26.381668  543705 disk_info.go:125] begin check local disk info of client
I0320 22:06:26.384111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:06:26.384117  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de380 0xc0003de3c0]
E0320 22:06:33.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:06:33.409909  543705 memory.go:184] no items to output this cycle
I0320 22:06:33.409960  543705 cpu.go:275] no items to output this cycle
I0320 22:06:38.636885  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:06:38.636891  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:06:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:06:43.410868  543705 memory.go:191] Add success.
I0320 22:06:43.409823  543705 cpu.go:282] Add success.
I0320 22:06:43.420621  543705 net.go:648] Add success.
I0320 22:06:43.423624  543705 net.go:770] primary dev: ETH0
I0320 22:06:43.423637  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:06:43.423649  543705 net.go:698] Add success.
I0320 22:06:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:06:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:06:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:06:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:06:53.409777  543705 memory.go:184] no items to output this cycle
I0320 22:06:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 22:07:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:07:03.409817  543705 memory.go:184] no items to output this cycle
I0320 22:07:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 22:07:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:07:13.409789  543705 memory.go:191] Add success.
I0320 22:07:13.409806  543705 cpu.go:282] Add success.
W0320 22:07:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:07:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:07:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:07:13.420115  543705 net.go:648] Add success.
I0320 22:07:13.422982  543705 net.go:770] primary dev: ETH0
I0320 22:07:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:07:13.423008  543705 net.go:698] Add success.
I0320 22:07:13.453558  543705 event_worker.go:152] Polling the log file for events...
W0320 22:07:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:07:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 22:07:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:07:14.455884  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:07:14.455893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:07:14.455899  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:07:14.456534  543705 disk_worker.go:494] system disk:vda1
I0320 22:07:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:07:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:07:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:07:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:07:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:07:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:07:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:07:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:07:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:07:23.409807  543705 memory.go:184] no items to output this cycle
I0320 22:07:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 22:07:26.385673  543705 disk_info.go:125] begin check local disk info of client
I0320 22:07:26.388105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:07:26.388111  543705 disk_info.go:196] parse disk info done, disk is : [0xc00054b100 0xc00054b140]
E0320 22:07:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:07:33.409910  543705 memory.go:184] no items to output this cycle
I0320 22:07:33.409927  543705 cpu.go:275] no items to output this cycle
E0320 22:07:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:07:43.409797  543705 memory.go:191] Add success.
I0320 22:07:43.409814  543705 cpu.go:282] Add success.
I0320 22:07:43.419893  543705 net.go:648] Add success.
I0320 22:07:43.423015  543705 net.go:770] primary dev: ETH0
I0320 22:07:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:07:43.423044  543705 net.go:698] Add success.
I0320 22:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:07:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:07:53.409781  543705 memory.go:184] no items to output this cycle
I0320 22:07:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 22:08:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:08:03.409818  543705 memory.go:184] no items to output this cycle
I0320 22:08:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 22:08:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:08:13.409811  543705 memory.go:191] Add success.
I0320 22:08:13.409815  543705 cpu.go:282] Add success.
W0320 22:08:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:08:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:08:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:08:13.420121  543705 net.go:648] Add success.
I0320 22:08:13.423362  543705 net.go:770] primary dev: ETH0
I0320 22:08:13.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:08:13.423391  543705 net.go:698] Add success.
I0320 22:08:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:08:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:08:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0320 22:08:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:08:14.456593  543705 disk_worker.go:494] system disk:vda1
I0320 22:08:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:08:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:08:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:08:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:08:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:08:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:08:23.409772  543705 memory.go:184] no items to output this cycle
I0320 22:08:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 22:08:26.391997  543705 disk_info.go:125] begin check local disk info of client
I0320 22:08:26.394553  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:08:26.394559  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba6c0 0xc0003ba700]
E0320 22:08:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:08:33.409765  543705 memory.go:184] no items to output this cycle
I0320 22:08:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 22:08:43.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:08:43.409907  543705 memory.go:191] Add success.
I0320 22:08:43.409971  543705 cpu.go:282] Add success.
I0320 22:08:43.419708  543705 net.go:648] Add success.
I0320 22:08:43.422479  543705 net.go:770] primary dev: ETH0
I0320 22:08:43.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:08:43.422503  543705 net.go:698] Add success.
I0320 22:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:08:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:08:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:08:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:08:53.409782  543705 memory.go:184] no items to output this cycle
I0320 22:08:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 22:09:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:09:03.409807  543705 memory.go:184] no items to output this cycle
I0320 22:09:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 22:09:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:09:13.409792  543705 memory.go:191] Add success.
I0320 22:09:13.409809  543705 cpu.go:282] Add success.
W0320 22:09:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:09:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:09:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:09:13.420194  543705 net.go:648] Add success.
I0320 22:09:13.422962  543705 net.go:770] primary dev: ETH0
I0320 22:09:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:09:13.422987  543705 net.go:698] Add success.
I0320 22:09:13.463795  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c97ff5e0-7236-4989-8dba-8a8bbe8721ce","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:09:13.463838  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:09:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:09:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:09:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0320 22:09:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:09:14.456746  543705 disk_worker.go:494] system disk:vda1
I0320 22:09:14.456776  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:09:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:09:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:09:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:09:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:09:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:09:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:09:23.409785  543705 memory.go:184] no items to output this cycle
I0320 22:09:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 22:09:26.398161  543705 disk_info.go:125] begin check local disk info of client
I0320 22:09:26.400645  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:09:26.400651  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f0440 0xc0003f0480]
E0320 22:09:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:09:33.409797  543705 memory.go:184] no items to output this cycle
I0320 22:09:33.409810  543705 cpu.go:275] no items to output this cycle
I0320 22:09:38.637739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:09:38.637746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:09:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:09:43.410675  543705 memory.go:191] Add success.
I0320 22:09:43.409808  543705 cpu.go:282] Add success.
I0320 22:09:43.420668  543705 net.go:648] Add success.
I0320 22:09:43.423665  543705 net.go:770] primary dev: ETH0
I0320 22:09:43.423678  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:09:43.423691  543705 net.go:698] Add success.
I0320 22:09:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:09:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:09:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:09:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:09:53.409797  543705 memory.go:184] no items to output this cycle
I0320 22:09:53.409813  543705 cpu.go:275] no items to output this cycle
E0320 22:10:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:10:03.409791  543705 memory.go:184] no items to output this cycle
I0320 22:10:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 22:10:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:10:13.409799  543705 memory.go:191] Add success.
W0320 22:10:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:10:13.409832  543705 cpu.go:282] Add success.
W0320 22:10:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:10:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:10:13.420262  543705 net.go:648] Add success.
I0320 22:10:13.422920  543705 net.go:770] primary dev: ETH0
I0320 22:10:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:10:13.422947  543705 net.go:698] Add success.
I0320 22:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:10:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:10:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0320 22:10:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:10:14.456586  543705 disk_worker.go:494] system disk:vda1
I0320 22:10:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:10:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:10:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:10:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:10:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:10:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:10:23.409770  543705 memory.go:184] no items to output this cycle
I0320 22:10:23.409791  543705 cpu.go:275] no items to output this cycle
I0320 22:10:26.401675  543705 disk_info.go:125] begin check local disk info of client
I0320 22:10:26.404151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:10:26.404157  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003664c0 0xc000366500]
E0320 22:10:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:10:33.409807  543705 memory.go:184] no items to output this cycle
I0320 22:10:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 22:10:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:10:43.409796  543705 memory.go:191] Add success.
I0320 22:10:43.409797  543705 cpu.go:282] Add success.
I0320 22:10:43.419996  543705 net.go:648] Add success.
I0320 22:10:43.422605  543705 net.go:770] primary dev: ETH0
I0320 22:10:43.422620  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:10:43.422634  543705 net.go:698] Add success.
I0320 22:10:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:10:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:10:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:10:53.409782  543705 memory.go:184] no items to output this cycle
I0320 22:10:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 22:11:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:11:03.409811  543705 memory.go:184] no items to output this cycle
I0320 22:11:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 22:11:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:11:13.409824  543705 memory.go:191] Add success.
I0320 22:11:13.409838  543705 cpu.go:282] Add success.
W0320 22:11:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:11:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:11:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:11:13.420286  543705 net.go:648] Add success.
I0320 22:11:13.423506  543705 net.go:770] primary dev: ETH0
I0320 22:11:13.423521  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:11:13.423535  543705 net.go:698] Add success.
I0320 22:11:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:11:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:11:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 22:11:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:11:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 22:11:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:11:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:11:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:11:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:11:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:11:23.409768  543705 memory.go:184] no items to output this cycle
I0320 22:11:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 22:11:26.405674  543705 disk_info.go:125] begin check local disk info of client
I0320 22:11:26.408180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:11:26.408188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003eea80 0xc0003eeac0]
E0320 22:11:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:11:33.409792  543705 memory.go:184] no items to output this cycle
I0320 22:11:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 22:11:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:11:43.409822  543705 memory.go:191] Add success.
I0320 22:11:43.409830  543705 cpu.go:282] Add success.
I0320 22:11:43.419956  543705 net.go:648] Add success.
I0320 22:11:43.423419  543705 net.go:770] primary dev: ETH0
I0320 22:11:43.423433  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:11:43.423447  543705 net.go:698] Add success.
I0320 22:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:11:53.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:11:53.409866  543705 memory.go:184] no items to output this cycle
I0320 22:11:53.410020  543705 cpu.go:275] no items to output this cycle
E0320 22:12:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:12:03.409789  543705 memory.go:184] no items to output this cycle
I0320 22:12:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 22:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:12:13.409802  543705 memory.go:191] Add success.
I0320 22:12:13.409806  543705 cpu.go:282] Add success.
W0320 22:12:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:12:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:12:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:12:13.420136  543705 net.go:648] Add success.
I0320 22:12:13.422752  543705 net.go:770] primary dev: ETH0
I0320 22:12:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:12:13.422781  543705 net.go:698] Add success.
I0320 22:12:13.469401  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"13612a44-fc45-42b5-894f-9129459fc1c1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:12:13.469436  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 22:12:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:12:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 22:12:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:12:14.455949  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:12:14.455958  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:12:14.455963  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:12:14.456574  543705 disk_worker.go:494] system disk:vda1
I0320 22:12:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:12:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:12:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:12:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:12:16.457990  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:12:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:12:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:12:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:12:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:12:23.409791  543705 memory.go:184] no items to output this cycle
I0320 22:12:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 22:12:26.409674  543705 disk_info.go:125] begin check local disk info of client
I0320 22:12:26.412140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:12:26.412147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa8c0 0xc0001fa900]
E0320 22:12:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:12:33.409779  543705 memory.go:184] no items to output this cycle
I0320 22:12:33.409805  543705 cpu.go:275] no items to output this cycle
I0320 22:12:38.638896  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:12:38.638902  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:12:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:12:43.410793  543705 memory.go:191] Add success.
I0320 22:12:43.409804  543705 cpu.go:282] Add success.
I0320 22:12:43.420717  543705 net.go:648] Add success.
I0320 22:12:43.423459  543705 net.go:770] primary dev: ETH0
I0320 22:12:43.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:12:43.423487  543705 net.go:698] Add success.
I0320 22:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:12:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:12:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:12:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:12:53.409792  543705 memory.go:184] no items to output this cycle
I0320 22:12:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 22:13:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:13:03.409790  543705 memory.go:184] no items to output this cycle
I0320 22:13:03.409798  543705 cpu.go:275] no items to output this cycle
E0320 22:13:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:13:13.409792  543705 memory.go:191] Add success.
W0320 22:13:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:13:13.409818  543705 cpu.go:282] Add success.
W0320 22:13:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:13:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:13:13.420162  543705 net.go:648] Add success.
I0320 22:13:13.422765  543705 net.go:770] primary dev: ETH0
I0320 22:13:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:13:13.422794  543705 net.go:698] Add success.
I0320 22:13:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:13:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:13:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0320 22:13:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:13:14.456507  543705 disk_worker.go:494] system disk:vda1
I0320 22:13:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:13:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:13:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:13:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:13:23.410502  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:13:23.410517  543705 memory.go:184] no items to output this cycle
I0320 22:13:23.410520  543705 cpu.go:275] no items to output this cycle
I0320 22:13:26.412797  543705 disk_info.go:125] begin check local disk info of client
I0320 22:13:26.415413  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:13:26.415421  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004eec00 0xc0004eec40]
E0320 22:13:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:13:33.409813  543705 memory.go:184] no items to output this cycle
I0320 22:13:33.409830  543705 cpu.go:275] no items to output this cycle
E0320 22:13:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:13:43.409866  543705 memory.go:191] Add success.
I0320 22:13:43.409921  543705 cpu.go:282] Add success.
I0320 22:13:43.419736  543705 net.go:648] Add success.
I0320 22:13:43.422548  543705 net.go:770] primary dev: ETH0
I0320 22:13:43.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:13:43.422572  543705 net.go:698] Add success.
I0320 22:13:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:13:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:13:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:13:53.409776  543705 memory.go:184] no items to output this cycle
I0320 22:13:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 22:14:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:14:03.409780  543705 memory.go:184] no items to output this cycle
I0320 22:14:03.409787  543705 cpu.go:275] no items to output this cycle
E0320 22:14:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:14:13.409801  543705 memory.go:191] Add success.
I0320 22:14:13.409824  543705 cpu.go:282] Add success.
W0320 22:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:14:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:14:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:14:13.420194  543705 net.go:648] Add success.
I0320 22:14:13.422764  543705 net.go:770] primary dev: ETH0
I0320 22:14:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:14:13.422789  543705 net.go:698] Add success.
I0320 22:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:14:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:14:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0320 22:14:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:14:14.456609  543705 disk_worker.go:494] system disk:vda1
I0320 22:14:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:14:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:14:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:14:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:14:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:14:23.409803  543705 memory.go:184] no items to output this cycle
I0320 22:14:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 22:14:26.415795  543705 disk_info.go:125] begin check local disk info of client
I0320 22:14:26.418290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:14:26.418297  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002648c0 0xc000264900]
E0320 22:14:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:14:33.409758  543705 memory.go:184] no items to output this cycle
I0320 22:14:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:14:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:14:43.409802  543705 memory.go:191] Add success.
I0320 22:14:43.409819  543705 cpu.go:282] Add success.
I0320 22:14:43.419988  543705 net.go:648] Add success.
I0320 22:14:43.423240  543705 net.go:770] primary dev: ETH0
I0320 22:14:43.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:14:43.423266  543705 net.go:698] Add success.
I0320 22:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:14:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:14:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:14:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:14:53.409768  543705 memory.go:184] no items to output this cycle
I0320 22:14:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 22:15:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:15:03.409780  543705 memory.go:184] no items to output this cycle
I0320 22:15:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 22:15:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:15:13.409780  543705 memory.go:191] Add success.
W0320 22:15:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:15:13.409806  543705 cpu.go:282] Add success.
W0320 22:15:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:15:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:15:13.420139  543705 net.go:648] Add success.
I0320 22:15:13.422940  543705 net.go:770] primary dev: ETH0
I0320 22:15:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:15:13.422967  543705 net.go:698] Add success.
I0320 22:15:13.472146  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"47f9314c-d176-4966-b179-6639f8025557","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:15:13.472178  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:15:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:15:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0320 22:15:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:15:14.456770  543705 disk_worker.go:494] system disk:vda1
I0320 22:15:14.456803  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:15:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:15:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:15:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:15:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:15:23.409771  543705 memory.go:184] no items to output this cycle
I0320 22:15:23.409797  543705 cpu.go:275] no items to output this cycle
I0320 22:15:26.418784  543705 disk_info.go:125] begin check local disk info of client
I0320 22:15:26.421274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:15:26.421281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa440 0xc0001fa480]
E0320 22:15:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:15:33.409771  543705 memory.go:184] no items to output this cycle
I0320 22:15:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 22:15:38.639921  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:15:38.639928  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:15:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:15:43.410684  543705 memory.go:191] Add success.
I0320 22:15:43.409821  543705 cpu.go:282] Add success.
I0320 22:15:43.420439  543705 net.go:648] Add success.
I0320 22:15:43.423066  543705 net.go:770] primary dev: ETH0
I0320 22:15:43.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:15:43.423095  543705 net.go:698] Add success.
I0320 22:15:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:15:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:15:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:15:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:15:53.409771  543705 memory.go:184] no items to output this cycle
I0320 22:15:53.409786  543705 cpu.go:275] no items to output this cycle
E0320 22:16:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:16:03.409784  543705 memory.go:184] no items to output this cycle
I0320 22:16:03.409802  543705 cpu.go:275] no items to output this cycle
E0320 22:16:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:16:13.409799  543705 memory.go:191] Add success.
I0320 22:16:13.409811  543705 cpu.go:282] Add success.
W0320 22:16:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:16:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:16:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:16:13.420258  543705 net.go:648] Add success.
I0320 22:16:13.422897  543705 net.go:770] primary dev: ETH0
I0320 22:16:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:16:13.422924  543705 net.go:698] Add success.
I0320 22:16:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:16:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:16:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 22:16:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:16:14.456575  543705 disk_worker.go:494] system disk:vda1
I0320 22:16:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:16:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:16:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:16:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:16:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:16:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:16:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:16:23.409767  543705 memory.go:184] no items to output this cycle
I0320 22:16:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 22:16:26.421795  543705 disk_info.go:125] begin check local disk info of client
I0320 22:16:26.424214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:16:26.424220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264980 0xc0002649c0]
E0320 22:16:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:16:33.409779  543705 memory.go:184] no items to output this cycle
I0320 22:16:33.409780  543705 cpu.go:275] no items to output this cycle
E0320 22:16:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:16:43.409911  543705 cpu.go:282] Add success.
I0320 22:16:43.409928  543705 memory.go:191] Add success.
I0320 22:16:43.419721  543705 net.go:648] Add success.
I0320 22:16:43.423075  543705 net.go:770] primary dev: ETH0
I0320 22:16:43.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:16:43.423099  543705 net.go:698] Add success.
I0320 22:16:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:16:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:16:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:16:53.410235  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:16:53.410250  543705 memory.go:184] no items to output this cycle
I0320 22:16:53.410267  543705 cpu.go:275] no items to output this cycle
E0320 22:17:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:17:03.409793  543705 cpu.go:275] no items to output this cycle
I0320 22:17:03.409800  543705 memory.go:184] no items to output this cycle
E0320 22:17:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:17:13.409807  543705 memory.go:191] Add success.
I0320 22:17:13.409808  543705 cpu.go:282] Add success.
W0320 22:17:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:17:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:17:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:17:13.420154  543705 net.go:648] Add success.
I0320 22:17:13.422878  543705 net.go:770] primary dev: ETH0
I0320 22:17:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:17:13.422903  543705 net.go:698] Add success.
I0320 22:17:13.453493  543705 event_worker.go:152] Polling the log file for events...
W0320 22:17:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:17:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 22:17:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:17:14.455928  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:17:14.455937  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:17:14.455943  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:17:14.456580  543705 disk_worker.go:494] system disk:vda1
I0320 22:17:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:17:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:17:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:17:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:17:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:17:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:17:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:17:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:17:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:17:23.409801  543705 memory.go:184] no items to output this cycle
I0320 22:17:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 22:17:26.424811  543705 disk_info.go:125] begin check local disk info of client
I0320 22:17:26.427325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:17:26.427331  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290080 0xc0002900c0]
E0320 22:17:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:17:33.409784  543705 memory.go:184] no items to output this cycle
I0320 22:17:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 22:17:43.409894  543705 cpu.go:282] Add success.
E0320 22:17:43.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:17:43.409921  543705 memory.go:191] Add success.
I0320 22:17:43.419713  543705 net.go:648] Add success.
I0320 22:17:43.422969  543705 net.go:770] primary dev: ETH0
I0320 22:17:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:17:43.422995  543705 net.go:698] Add success.
I0320 22:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:17:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:17:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:17:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:17:53.409775  543705 memory.go:184] no items to output this cycle
I0320 22:17:53.409775  543705 cpu.go:275] no items to output this cycle
E0320 22:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:18:03.409778  543705 memory.go:184] no items to output this cycle
I0320 22:18:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 22:18:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:18:13.409785  543705 memory.go:191] Add success.
W0320 22:18:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:18:13.409819  543705 cpu.go:282] Add success.
W0320 22:18:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:18:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:18:13.420140  543705 net.go:648] Add success.
I0320 22:18:13.423014  543705 net.go:770] primary dev: ETH0
I0320 22:18:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:18:13.423044  543705 net.go:698] Add success.
I0320 22:18:13.469694  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aa7fe69d-c4c3-46ae-9534-76a238dc0ea2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:18:13.469727  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:18:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:18:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 22:18:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:18:14.456769  543705 disk_worker.go:494] system disk:vda1
I0320 22:18:14.456799  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:18:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:18:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:18:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:18:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:18:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:18:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:18:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 22:18:23.409774  543705 memory.go:184] no items to output this cycle
I0320 22:18:26.427829  543705 disk_info.go:125] begin check local disk info of client
I0320 22:18:26.430309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:18:26.430315  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f8780 0xc0001f87c0]
E0320 22:18:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:18:33.409766  543705 memory.go:184] no items to output this cycle
I0320 22:18:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 22:18:38.640906  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:18:38.640913  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:18:43.410710  543705 memory.go:191] Add success.
I0320 22:18:43.409809  543705 cpu.go:282] Add success.
I0320 22:18:43.420420  543705 net.go:648] Add success.
I0320 22:18:43.423475  543705 net.go:770] primary dev: ETH0
I0320 22:18:43.423490  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:18:43.423504  543705 net.go:698] Add success.
I0320 22:18:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:18:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:18:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:18:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:18:53.409773  543705 memory.go:184] no items to output this cycle
I0320 22:18:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 22:19:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:19:03.409808  543705 memory.go:184] no items to output this cycle
I0320 22:19:03.409823  543705 cpu.go:275] no items to output this cycle
E0320 22:19:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:19:13.409819  543705 memory.go:191] Add success.
I0320 22:19:13.409822  543705 cpu.go:282] Add success.
W0320 22:19:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:19:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:19:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:19:13.420136  543705 net.go:648] Add success.
I0320 22:19:13.422693  543705 net.go:770] primary dev: ETH0
I0320 22:19:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:19:13.422720  543705 net.go:698] Add success.
I0320 22:19:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:19:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:19:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0320 22:19:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:19:14.456507  543705 disk_worker.go:494] system disk:vda1
I0320 22:19:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:19:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:19:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:19:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:19:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 22:19:23.409783  543705 memory.go:184] no items to output this cycle
I0320 22:19:26.430850  543705 disk_info.go:125] begin check local disk info of client
I0320 22:19:26.433348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:19:26.433355  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4800 0xc0004a4840]
E0320 22:19:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:19:33.409775  543705 memory.go:184] no items to output this cycle
I0320 22:19:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 22:19:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:19:43.409790  543705 memory.go:191] Add success.
I0320 22:19:43.409794  543705 cpu.go:282] Add success.
I0320 22:19:43.419977  543705 net.go:648] Add success.
I0320 22:19:43.422927  543705 net.go:770] primary dev: ETH0
I0320 22:19:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:19:43.422956  543705 net.go:698] Add success.
I0320 22:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:19:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:19:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:19:53.409795  543705 memory.go:184] no items to output this cycle
I0320 22:19:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 22:20:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:20:03.409787  543705 cpu.go:275] no items to output this cycle
I0320 22:20:03.409794  543705 memory.go:184] no items to output this cycle
E0320 22:20:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:20:13.409816  543705 memory.go:191] Add success.
I0320 22:20:13.409830  543705 cpu.go:282] Add success.
W0320 22:20:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:20:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:20:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:20:13.420187  543705 net.go:648] Add success.
I0320 22:20:13.422795  543705 net.go:770] primary dev: ETH0
I0320 22:20:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:20:13.422825  543705 net.go:698] Add success.
I0320 22:20:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:20:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:20:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 22:20:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:20:14.456610  543705 disk_worker.go:494] system disk:vda1
I0320 22:20:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:20:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:20:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:20:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:20:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:20:23.410383  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:20:23.410398  543705 memory.go:184] no items to output this cycle
I0320 22:20:23.410419  543705 cpu.go:275] no items to output this cycle
I0320 22:20:26.433864  543705 disk_info.go:125] begin check local disk info of client
I0320 22:20:26.436348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:20:26.436354  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
E0320 22:20:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:20:33.409804  543705 memory.go:184] no items to output this cycle
I0320 22:20:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 22:20:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:20:43.409776  543705 memory.go:191] Add success.
I0320 22:20:43.409797  543705 cpu.go:282] Add success.
I0320 22:20:43.419908  543705 net.go:648] Add success.
I0320 22:20:43.422705  543705 net.go:770] primary dev: ETH0
I0320 22:20:43.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:20:43.422730  543705 net.go:698] Add success.
I0320 22:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:20:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:20:53.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:20:53.410011  543705 cpu.go:275] no items to output this cycle
I0320 22:20:53.410018  543705 memory.go:184] no items to output this cycle
E0320 22:21:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:21:03.409783  543705 memory.go:184] no items to output this cycle
I0320 22:21:03.409795  543705 cpu.go:275] no items to output this cycle
E0320 22:21:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:21:13.409794  543705 memory.go:191] Add success.
I0320 22:21:13.409815  543705 cpu.go:282] Add success.
W0320 22:21:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:21:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:21:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:21:13.420198  543705 net.go:648] Add success.
I0320 22:21:13.422803  543705 net.go:770] primary dev: ETH0
I0320 22:21:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:21:13.422832  543705 net.go:698] Add success.
I0320 22:21:13.468648  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2c818ea1-80bd-4e7a-b3f1-097d889c1069","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:21:13.468682  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:21:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:21:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:21:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0320 22:21:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:21:14.456562  543705 disk_worker.go:494] system disk:vda1
I0320 22:21:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:21:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:21:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:21:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:21:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:21:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:21:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:21:23.409780  543705 memory.go:184] no items to output this cycle
I0320 22:21:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 22:21:26.436867  543705 disk_info.go:125] begin check local disk info of client
I0320 22:21:26.439375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:21:26.439381  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290ac0 0xc000290b00]
E0320 22:21:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:21:33.409806  543705 memory.go:184] no items to output this cycle
I0320 22:21:33.409817  543705 cpu.go:275] no items to output this cycle
I0320 22:21:38.641737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:21:38.641744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:21:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:21:43.410602  543705 memory.go:191] Add success.
I0320 22:21:43.409838  543705 cpu.go:282] Add success.
I0320 22:21:43.420511  543705 net.go:648] Add success.
I0320 22:21:43.423140  543705 net.go:770] primary dev: ETH0
I0320 22:21:43.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:21:43.423165  543705 net.go:698] Add success.
I0320 22:21:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:21:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:21:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:21:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:21:53.409789  543705 memory.go:184] no items to output this cycle
I0320 22:21:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 22:22:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:22:03.409792  543705 memory.go:184] no items to output this cycle
I0320 22:22:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 22:22:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:22:13.409825  543705 memory.go:191] Add success.
I0320 22:22:13.409832  543705 cpu.go:282] Add success.
W0320 22:22:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:22:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:22:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:22:13.420198  543705 net.go:648] Add success.
I0320 22:22:13.422978  543705 net.go:770] primary dev: ETH0
I0320 22:22:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:22:13.423001  543705 net.go:698] Add success.
W0320 22:22:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:22:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 22:22:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:22:14.456678  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:22:14.456685  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:22:14.456690  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:22:14.456810  543705 disk_worker.go:494] system disk:vda1
I0320 22:22:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:22:15.456884  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:22:15.456893  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:22:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:22:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:22:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:22:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:22:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:22:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:22:23.409802  543705 memory.go:184] no items to output this cycle
I0320 22:22:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 22:22:26.439890  543705 disk_info.go:125] begin check local disk info of client
I0320 22:22:26.442361  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:22:26.442368  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464e80 0xc000464ec0]
E0320 22:22:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:22:33.409779  543705 memory.go:184] no items to output this cycle
I0320 22:22:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 22:22:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:22:43.409904  543705 cpu.go:282] Add success.
I0320 22:22:43.409963  543705 memory.go:191] Add success.
I0320 22:22:43.419730  543705 net.go:648] Add success.
I0320 22:22:43.420687  543705 net.go:770] primary dev: ETH0
I0320 22:22:43.420702  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:22:43.420715  543705 net.go:698] Add success.
I0320 22:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:22:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:22:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:22:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:22:53.409781  543705 memory.go:184] no items to output this cycle
I0320 22:22:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 22:23:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:23:03.409792  543705 memory.go:184] no items to output this cycle
I0320 22:23:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:23:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:23:13.409814  543705 memory.go:191] Add success.
I0320 22:23:13.409819  543705 cpu.go:282] Add success.
W0320 22:23:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:23:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:23:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:23:13.420229  543705 net.go:648] Add success.
I0320 22:23:13.423329  543705 net.go:770] primary dev: ETH0
I0320 22:23:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:23:13.423355  543705 net.go:698] Add success.
I0320 22:23:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:23:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:23:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0320 22:23:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:23:14.456505  543705 disk_worker.go:494] system disk:vda1
I0320 22:23:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:23:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:23:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:23:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:23:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:23:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:23:23.409772  543705 memory.go:184] no items to output this cycle
I0320 22:23:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 22:23:26.442908  543705 disk_info.go:125] begin check local disk info of client
I0320 22:23:26.445371  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:23:26.445378  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fec40 0xc0003fec80]
E0320 22:23:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:23:33.409776  543705 memory.go:184] no items to output this cycle
I0320 22:23:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 22:23:43.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:23:43.409882  543705 memory.go:191] Add success.
I0320 22:23:43.409952  543705 cpu.go:282] Add success.
I0320 22:23:43.419733  543705 net.go:648] Add success.
I0320 22:23:43.422439  543705 net.go:770] primary dev: ETH0
I0320 22:23:43.422453  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:23:43.422467  543705 net.go:698] Add success.
I0320 22:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:23:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:23:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:23:53.409781  543705 cpu.go:275] no items to output this cycle
I0320 22:23:53.409791  543705 memory.go:184] no items to output this cycle
E0320 22:24:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:24:03.409804  543705 memory.go:184] no items to output this cycle
I0320 22:24:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 22:24:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:24:13.409790  543705 memory.go:191] Add success.
I0320 22:24:13.409809  543705 cpu.go:282] Add success.
W0320 22:24:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:24:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:24:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:24:13.420274  543705 net.go:648] Add success.
I0320 22:24:13.423171  543705 net.go:770] primary dev: ETH0
I0320 22:24:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:24:13.423197  543705 net.go:698] Add success.
I0320 22:24:13.463875  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2b5b592b-dcaa-44ee-9c3e-dadb100cbeda","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:24:13.463910  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:24:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:24:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:24:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 22:24:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:24:14.456717  543705 disk_worker.go:494] system disk:vda1
I0320 22:24:14.456750  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:24:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:24:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:24:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:24:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:24:23.409776  543705 memory.go:184] no items to output this cycle
I0320 22:24:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 22:24:26.445920  543705 disk_info.go:125] begin check local disk info of client
I0320 22:24:26.448365  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:24:26.448371  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290780 0xc0002907c0]
E0320 22:24:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:24:33.409799  543705 memory.go:184] no items to output this cycle
I0320 22:24:33.409816  543705 cpu.go:275] no items to output this cycle
I0320 22:24:38.641888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:24:38.641894  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:24:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:24:43.410647  543705 memory.go:191] Add success.
I0320 22:24:43.409820  543705 cpu.go:282] Add success.
I0320 22:24:43.420347  543705 net.go:648] Add success.
I0320 22:24:43.423430  543705 net.go:770] primary dev: ETH0
I0320 22:24:43.423445  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:24:43.423459  543705 net.go:698] Add success.
I0320 22:24:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:24:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:24:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:24:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:24:53.409778  543705 memory.go:184] no items to output this cycle
I0320 22:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 22:25:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:25:03.409773  543705 memory.go:184] no items to output this cycle
I0320 22:25:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:25:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:25:13.409785  543705 memory.go:191] Add success.
W0320 22:25:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:25:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:25:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:25:13.409832  543705 cpu.go:282] Add success.
I0320 22:25:13.420206  543705 net.go:648] Add success.
I0320 22:25:13.421295  543705 net.go:770] primary dev: ETH0
I0320 22:25:13.421310  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:25:13.421324  543705 net.go:698] Add success.
I0320 22:25:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:25:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:25:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 22:25:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:25:14.456487  543705 disk_worker.go:494] system disk:vda1
I0320 22:25:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:25:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:25:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:25:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:25:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:25:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:25:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:25:23.409775  543705 cpu.go:275] no items to output this cycle
I0320 22:25:23.409783  543705 memory.go:184] no items to output this cycle
I0320 22:25:26.448927  543705 disk_info.go:125] begin check local disk info of client
I0320 22:25:26.451383  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:25:26.451389  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272d40 0xc000272d80]
E0320 22:25:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:25:33.409776  543705 memory.go:184] no items to output this cycle
I0320 22:25:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:25:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:25:43.409821  543705 memory.go:191] Add success.
I0320 22:25:43.409831  543705 cpu.go:282] Add success.
I0320 22:25:43.420020  543705 net.go:648] Add success.
I0320 22:25:43.423115  543705 net.go:770] primary dev: ETH0
I0320 22:25:43.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:25:43.423140  543705 net.go:698] Add success.
I0320 22:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:25:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:25:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:25:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:25:53.409796  543705 memory.go:184] no items to output this cycle
I0320 22:25:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 22:26:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:26:03.409809  543705 memory.go:184] no items to output this cycle
I0320 22:26:03.409822  543705 cpu.go:275] no items to output this cycle
E0320 22:26:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:26:13.409792  543705 memory.go:191] Add success.
I0320 22:26:13.409814  543705 cpu.go:282] Add success.
W0320 22:26:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:26:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:26:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:26:13.420699  543705 net.go:648] Add success.
I0320 22:26:13.423950  543705 net.go:770] primary dev: ETH0
I0320 22:26:13.423963  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:26:13.423975  543705 net.go:698] Add success.
I0320 22:26:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:26:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:26:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0320 22:26:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:26:14.456520  543705 disk_worker.go:494] system disk:vda1
I0320 22:26:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:26:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:26:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:26:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:26:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:26:23.409774  543705 cpu.go:275] no items to output this cycle
I0320 22:26:23.409775  543705 memory.go:184] no items to output this cycle
I0320 22:26:26.451956  543705 disk_info.go:125] begin check local disk info of client
I0320 22:26:26.454439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:26:26.454445  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0320 22:26:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:26:33.409799  543705 memory.go:184] no items to output this cycle
I0320 22:26:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 22:26:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:26:43.409800  543705 memory.go:191] Add success.
I0320 22:26:43.409801  543705 cpu.go:282] Add success.
I0320 22:26:43.419878  543705 net.go:648] Add success.
I0320 22:26:43.423005  543705 net.go:770] primary dev: ETH0
I0320 22:26:43.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:26:43.423031  543705 net.go:698] Add success.
I0320 22:26:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:26:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:26:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:26:53.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:26:53.410276  543705 memory.go:184] no items to output this cycle
I0320 22:26:53.410277  543705 cpu.go:275] no items to output this cycle
E0320 22:27:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:27:03.409776  543705 memory.go:184] no items to output this cycle
I0320 22:27:03.409811  543705 cpu.go:275] no items to output this cycle
I0320 22:27:13.409790  543705 cpu.go:282] Add success.
E0320 22:27:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:27:13.409820  543705 memory.go:191] Add success.
W0320 22:27:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:27:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:27:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:27:13.420116  543705 net.go:648] Add success.
I0320 22:27:13.427418  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 22:27:13.427491  543705 net.go:770] primary dev: ETH0
I0320 22:27:13.427502  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:27:13.427514  543705 net.go:698] Add success.
I0320 22:27:13.453279  543705 event_worker.go:152] Polling the log file for events...
I0320 22:27:13.468582  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f8873223-0f5d-4960-a7f8-55b78b68056e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:27:13.468626  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 22:27:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:27:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 22:27:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:27:14.456906  543705 disk_worker.go:494] system disk:vda1
E0320 22:27:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:27:14.456935  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:27:14.456941  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:27:14.456962  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:27:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:27:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:27:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:27:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:27:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:27:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:27:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:27:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:27:23.409793  543705 memory.go:184] no items to output this cycle
I0320 22:27:23.409806  543705 cpu.go:275] no items to output this cycle
I0320 22:27:26.454970  543705 disk_info.go:125] begin check local disk info of client
I0320 22:27:26.457435  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:27:26.457444  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0320 22:27:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:27:33.409787  543705 memory.go:184] no items to output this cycle
I0320 22:27:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 22:27:38.642916  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:27:38.642923  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:27:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:27:43.410696  543705 memory.go:191] Add success.
I0320 22:27:43.409798  543705 cpu.go:282] Add success.
I0320 22:27:43.420409  543705 net.go:648] Add success.
I0320 22:27:43.423303  543705 net.go:770] primary dev: ETH0
I0320 22:27:43.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:27:43.423330  543705 net.go:698] Add success.
I0320 22:27:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:27:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:27:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:27:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:27:53.409764  543705 memory.go:184] no items to output this cycle
I0320 22:27:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 22:28:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:28:03.409811  543705 memory.go:184] no items to output this cycle
I0320 22:28:03.409819  543705 cpu.go:275] no items to output this cycle
E0320 22:28:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:28:13.409806  543705 memory.go:191] Add success.
I0320 22:28:13.409807  543705 cpu.go:282] Add success.
W0320 22:28:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:28:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:28:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:28:13.420154  543705 net.go:648] Add success.
I0320 22:28:13.423005  543705 net.go:770] primary dev: ETH0
I0320 22:28:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:28:13.423031  543705 net.go:698] Add success.
I0320 22:28:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:28:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:28:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0320 22:28:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:28:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 22:28:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:28:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:28:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:28:23.410634  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:28:23.410650  543705 memory.go:184] no items to output this cycle
I0320 22:28:23.410658  543705 cpu.go:275] no items to output this cycle
I0320 22:28:26.457973  543705 disk_info.go:125] begin check local disk info of client
I0320 22:28:26.460426  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:28:26.460434  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba640 0xc0002ba680]
E0320 22:28:33.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:28:33.409897  543705 memory.go:184] no items to output this cycle
I0320 22:28:33.409914  543705 cpu.go:275] no items to output this cycle
E0320 22:28:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:28:43.409778  543705 memory.go:191] Add success.
I0320 22:28:43.409815  543705 cpu.go:282] Add success.
I0320 22:28:43.419983  543705 net.go:648] Add success.
I0320 22:28:43.422551  543705 net.go:770] primary dev: ETH0
I0320 22:28:43.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:28:43.422588  543705 net.go:698] Add success.
I0320 22:28:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:28:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:28:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:28:53.409807  543705 memory.go:184] no items to output this cycle
I0320 22:28:53.409818  543705 cpu.go:275] no items to output this cycle
E0320 22:29:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:29:03.409810  543705 memory.go:184] no items to output this cycle
I0320 22:29:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 22:29:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:29:13.409821  543705 memory.go:191] Add success.
I0320 22:29:13.409831  543705 cpu.go:282] Add success.
W0320 22:29:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:29:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:29:13.420111  543705 net.go:648] Add success.
I0320 22:29:13.422821  543705 net.go:770] primary dev: ETH0
I0320 22:29:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:29:13.422847  543705 net.go:698] Add success.
I0320 22:29:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:29:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:29:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 22:29:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:29:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 22:29:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:29:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:29:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:29:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:29:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:29:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:29:23.410433  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:29:23.410453  543705 memory.go:184] no items to output this cycle
I0320 22:29:23.410468  543705 cpu.go:275] no items to output this cycle
I0320 22:29:26.460641  543705 disk_info.go:125] begin check local disk info of client
I0320 22:29:26.463148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:29:26.463155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e12c0 0xc0003e1300]
E0320 22:29:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:29:33.409897  543705 memory.go:184] no items to output this cycle
I0320 22:29:33.409954  543705 cpu.go:275] no items to output this cycle
E0320 22:29:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:29:43.409802  543705 memory.go:191] Add success.
I0320 22:29:43.409819  543705 cpu.go:282] Add success.
I0320 22:29:43.419950  543705 net.go:648] Add success.
I0320 22:29:43.422776  543705 net.go:770] primary dev: ETH0
I0320 22:29:43.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:29:43.422802  543705 net.go:698] Add success.
I0320 22:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:29:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:29:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:29:53.410477  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:29:53.410494  543705 memory.go:184] no items to output this cycle
I0320 22:29:53.410515  543705 cpu.go:275] no items to output this cycle
E0320 22:30:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:30:03.409811  543705 memory.go:184] no items to output this cycle
I0320 22:30:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 22:30:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:30:13.409814  543705 memory.go:191] Add success.
I0320 22:30:13.409816  543705 cpu.go:282] Add success.
W0320 22:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:30:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:30:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:30:13.420136  543705 net.go:648] Add success.
I0320 22:30:13.422718  543705 net.go:770] primary dev: ETH0
I0320 22:30:13.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:30:13.422743  543705 net.go:698] Add success.
I0320 22:30:13.464215  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9d122166-eb45-4ecf-91cc-6517bc5f4a39","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:30:13.464250  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:30:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:30:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:30:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0320 22:30:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:30:14.456529  543705 disk_worker.go:494] system disk:vda1
I0320 22:30:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:30:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:30:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:30:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:30:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:30:23.409783  543705 memory.go:184] no items to output this cycle
I0320 22:30:23.409807  543705 cpu.go:275] no items to output this cycle
I0320 22:30:26.463603  543705 disk_info.go:125] begin check local disk info of client
I0320 22:30:26.466387  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:30:26.466395  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0320 22:30:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:30:33.409776  543705 memory.go:184] no items to output this cycle
I0320 22:30:33.409785  543705 cpu.go:275] no items to output this cycle
I0320 22:30:38.643925  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:30:38.643931  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:30:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:30:43.410720  543705 memory.go:191] Add success.
I0320 22:30:43.409804  543705 cpu.go:282] Add success.
I0320 22:30:43.420431  543705 net.go:648] Add success.
I0320 22:30:43.423181  543705 net.go:770] primary dev: ETH0
I0320 22:30:43.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:30:43.423208  543705 net.go:698] Add success.
I0320 22:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:30:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:30:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:30:53.409778  543705 memory.go:184] no items to output this cycle
I0320 22:30:53.409780  543705 cpu.go:275] no items to output this cycle
E0320 22:31:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:31:03.409809  543705 memory.go:184] no items to output this cycle
I0320 22:31:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 22:31:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:31:13.409784  543705 memory.go:191] Add success.
I0320 22:31:13.409789  543705 cpu.go:282] Add success.
W0320 22:31:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:31:13.412569  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:31:13.412573  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:31:13.420204  543705 net.go:648] Add success.
I0320 22:31:13.421926  543705 net.go:770] primary dev: ETH0
I0320 22:31:13.421940  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:31:13.421964  543705 net.go:698] Add success.
I0320 22:31:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:31:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:31:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 22:31:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:31:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 22:31:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:31:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:31:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:31:23.410207  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:31:23.410223  543705 memory.go:184] no items to output this cycle
I0320 22:31:23.410232  543705 cpu.go:275] no items to output this cycle
I0320 22:31:26.466603  543705 disk_info.go:125] begin check local disk info of client
I0320 22:31:26.469134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:31:26.469140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab740 0xc0003ab780]
E0320 22:31:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:31:33.409776  543705 memory.go:184] no items to output this cycle
I0320 22:31:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:31:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:31:43.409778  543705 memory.go:191] Add success.
I0320 22:31:43.409806  543705 cpu.go:282] Add success.
I0320 22:31:43.420054  543705 net.go:648] Add success.
I0320 22:31:43.422981  543705 net.go:770] primary dev: ETH0
I0320 22:31:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:31:43.423008  543705 net.go:698] Add success.
I0320 22:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:31:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:31:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:31:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:31:53.409794  543705 memory.go:184] no items to output this cycle
I0320 22:31:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 22:32:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:32:03.409787  543705 memory.go:184] no items to output this cycle
I0320 22:32:03.409790  543705 cpu.go:275] no items to output this cycle
E0320 22:32:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:32:13.409820  543705 memory.go:191] Add success.
I0320 22:32:13.409823  543705 cpu.go:282] Add success.
W0320 22:32:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:32:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:32:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:32:13.420202  543705 net.go:648] Add success.
I0320 22:32:13.423459  543705 net.go:770] primary dev: ETH0
I0320 22:32:13.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:32:13.423488  543705 net.go:698] Add success.
W0320 22:32:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:32:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0320 22:32:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:32:14.456915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:32:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:32:14.456931  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:32:14.457004  543705 disk_worker.go:494] system disk:vda1
I0320 22:32:14.457048  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:32:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:32:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:32:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:32:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:32:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:32:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:32:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:32:23.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:32:23.409905  543705 memory.go:184] no items to output this cycle
I0320 22:32:23.409968  543705 cpu.go:275] no items to output this cycle
I0320 22:32:26.469592  543705 disk_info.go:125] begin check local disk info of client
I0320 22:32:26.472175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:32:26.472181  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e2c0 0xc00034e300]
E0320 22:32:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:32:33.409779  543705 memory.go:184] no items to output this cycle
I0320 22:32:33.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:32:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:32:43.409782  543705 memory.go:191] Add success.
I0320 22:32:43.409805  543705 cpu.go:282] Add success.
I0320 22:32:43.420042  543705 net.go:648] Add success.
I0320 22:32:43.422791  543705 net.go:770] primary dev: ETH0
I0320 22:32:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:32:43.422817  543705 net.go:698] Add success.
I0320 22:32:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:32:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:32:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:32:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:32:53.409776  543705 memory.go:184] no items to output this cycle
I0320 22:32:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 22:33:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:33:03.409799  543705 memory.go:184] no items to output this cycle
I0320 22:33:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 22:33:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:33:13.409777  543705 memory.go:191] Add success.
W0320 22:33:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:33:13.409806  543705 cpu.go:282] Add success.
W0320 22:33:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:33:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:33:13.420146  543705 net.go:648] Add success.
I0320 22:33:13.422769  543705 net.go:770] primary dev: ETH0
I0320 22:33:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:33:13.422800  543705 net.go:698] Add success.
I0320 22:33:13.464345  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b099ddfb-dd1f-45c3-b290-cf55c6b22d92","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:33:13.464378  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:33:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:33:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 22:33:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:33:14.456576  543705 disk_worker.go:494] system disk:vda1
I0320 22:33:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:33:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:33:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:33:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:33:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:33:23.409789  543705 memory.go:184] no items to output this cycle
I0320 22:33:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 22:33:26.472707  543705 disk_info.go:125] begin check local disk info of client
I0320 22:33:26.475294  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:33:26.475301  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa400 0xc0001fa440]
E0320 22:33:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:33:33.409778  543705 cpu.go:275] no items to output this cycle
I0320 22:33:33.409785  543705 memory.go:184] no items to output this cycle
I0320 22:33:38.644922  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:33:38.644929  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:33:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:33:43.410898  543705 memory.go:191] Add success.
I0320 22:33:43.409811  543705 cpu.go:282] Add success.
I0320 22:33:43.420597  543705 net.go:648] Add success.
I0320 22:33:43.423308  543705 net.go:770] primary dev: ETH0
I0320 22:33:43.423321  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:33:43.423334  543705 net.go:698] Add success.
I0320 22:33:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:33:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:33:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:33:53.410389  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:33:53.410406  543705 memory.go:184] no items to output this cycle
I0320 22:33:53.410409  543705 cpu.go:275] no items to output this cycle
E0320 22:34:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:34:03.409781  543705 memory.go:184] no items to output this cycle
I0320 22:34:03.409802  543705 cpu.go:275] no items to output this cycle
W0320 22:34:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:34:13.409739  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:34:13.409745  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 22:34:13.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:34:13.409862  543705 memory.go:191] Add success.
I0320 22:34:13.409863  543705 cpu.go:282] Add success.
I0320 22:34:13.420224  543705 net.go:648] Add success.
I0320 22:34:13.423194  543705 net.go:770] primary dev: ETH0
I0320 22:34:13.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:34:13.423223  543705 net.go:698] Add success.
I0320 22:34:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:34:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:34:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 22:34:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:34:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 22:34:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:34:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:34:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:34:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:34:23.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:34:23.409859  543705 memory.go:184] no items to output this cycle
I0320 22:34:23.409929  543705 cpu.go:275] no items to output this cycle
I0320 22:34:26.475678  543705 disk_info.go:125] begin check local disk info of client
I0320 22:34:26.478448  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:34:26.478454  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e000 0xc00034e040]
E0320 22:34:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:34:33.409767  543705 memory.go:184] no items to output this cycle
I0320 22:34:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 22:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:34:43.409784  543705 memory.go:191] Add success.
I0320 22:34:43.409812  543705 cpu.go:282] Add success.
I0320 22:34:43.419928  543705 net.go:648] Add success.
I0320 22:34:43.422682  543705 net.go:770] primary dev: ETH0
I0320 22:34:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:34:43.422710  543705 net.go:698] Add success.
I0320 22:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:34:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:34:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:34:53.409774  543705 memory.go:184] no items to output this cycle
I0320 22:34:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 22:35:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:35:03.409813  543705 memory.go:184] no items to output this cycle
I0320 22:35:03.409821  543705 cpu.go:275] no items to output this cycle
E0320 22:35:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:35:13.409819  543705 memory.go:191] Add success.
I0320 22:35:13.409831  543705 cpu.go:282] Add success.
W0320 22:35:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:35:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:35:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:35:13.420214  543705 net.go:648] Add success.
I0320 22:35:13.422808  543705 net.go:770] primary dev: ETH0
I0320 22:35:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:35:13.422840  543705 net.go:698] Add success.
I0320 22:35:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:35:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:35:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0320 22:35:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:35:14.456603  543705 disk_worker.go:494] system disk:vda1
I0320 22:35:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:35:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:35:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:35:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:35:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:35:23.409787  543705 memory.go:184] no items to output this cycle
I0320 22:35:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 22:35:26.478667  543705 disk_info.go:125] begin check local disk info of client
I0320 22:35:26.481410  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:35:26.481417  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004917c0 0xc000491800]
E0320 22:35:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:35:33.409781  543705 cpu.go:275] no items to output this cycle
I0320 22:35:33.409789  543705 memory.go:184] no items to output this cycle
E0320 22:35:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:35:43.409809  543705 memory.go:191] Add success.
I0320 22:35:43.409819  543705 cpu.go:282] Add success.
I0320 22:35:43.420432  543705 net.go:648] Add success.
I0320 22:35:43.423530  543705 net.go:770] primary dev: ETH0
I0320 22:35:43.423554  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:35:43.423566  543705 net.go:698] Add success.
I0320 22:35:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:35:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:35:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:35:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:35:53.409777  543705 memory.go:184] no items to output this cycle
I0320 22:35:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 22:36:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:36:03.409787  543705 memory.go:184] no items to output this cycle
I0320 22:36:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 22:36:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:36:13.409800  543705 memory.go:191] Add success.
I0320 22:36:13.409802  543705 cpu.go:282] Add success.
W0320 22:36:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:36:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:36:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:36:13.420160  543705 net.go:648] Add success.
I0320 22:36:13.422771  543705 net.go:770] primary dev: ETH0
I0320 22:36:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:36:13.422795  543705 net.go:698] Add success.
I0320 22:36:13.468317  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3429a258-826e-423d-b211-ff35fc6d00bb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:36:13.468350  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:36:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:36:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 22:36:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:36:14.456621  543705 disk_worker.go:494] system disk:vda1
I0320 22:36:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:36:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:36:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:36:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:36:23.409792  543705 memory.go:184] no items to output this cycle
I0320 22:36:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 22:36:26.481621  543705 disk_info.go:125] begin check local disk info of client
I0320 22:36:26.484112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:36:26.484119  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470e80 0xc000470ec0]
E0320 22:36:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:36:33.409807  543705 memory.go:184] no items to output this cycle
I0320 22:36:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 22:36:38.645844  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:36:38.645850  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:36:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:36:43.410609  543705 memory.go:191] Add success.
I0320 22:36:43.409827  543705 cpu.go:282] Add success.
I0320 22:36:43.420268  543705 net.go:648] Add success.
I0320 22:36:43.422797  543705 net.go:770] primary dev: ETH0
I0320 22:36:43.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:36:43.422822  543705 net.go:698] Add success.
I0320 22:36:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:36:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:36:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:36:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:36:53.409785  543705 memory.go:184] no items to output this cycle
I0320 22:36:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 22:37:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:37:03.409798  543705 memory.go:184] no items to output this cycle
I0320 22:37:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 22:37:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:37:13.409793  543705 memory.go:191] Add success.
I0320 22:37:13.409793  543705 cpu.go:282] Add success.
W0320 22:37:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:37:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:37:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:37:13.420036  543705 net.go:648] Add success.
I0320 22:37:13.422613  543705 net.go:770] primary dev: ETH0
I0320 22:37:13.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:37:13.422641  543705 net.go:698] Add success.
I0320 22:37:13.453179  543705 event_worker.go:152] Polling the log file for events...
W0320 22:37:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:37:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0320 22:37:14.455206  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:37:14.455972  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:37:14.455981  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:37:14.455988  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:37:14.456601  543705 disk_worker.go:494] system disk:vda1
I0320 22:37:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:37:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:37:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:37:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:37:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:37:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:37:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:37:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:37:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:37:23.409811  543705 memory.go:184] no items to output this cycle
I0320 22:37:23.409822  543705 cpu.go:275] no items to output this cycle
I0320 22:37:26.484572  543705 disk_info.go:125] begin check local disk info of client
I0320 22:37:26.487082  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:37:26.487088  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6440 0xc0003e6480]
E0320 22:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:37:33.409803  543705 memory.go:184] no items to output this cycle
I0320 22:37:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 22:37:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:37:43.409819  543705 memory.go:191] Add success.
I0320 22:37:43.409824  543705 cpu.go:282] Add success.
I0320 22:37:43.420689  543705 net.go:648] Add success.
I0320 22:37:43.423433  543705 net.go:770] primary dev: ETH0
I0320 22:37:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:37:43.423457  543705 net.go:698] Add success.
I0320 22:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:37:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:37:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:37:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:37:53.409803  543705 memory.go:184] no items to output this cycle
I0320 22:37:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 22:38:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:38:03.409793  543705 memory.go:184] no items to output this cycle
I0320 22:38:03.409816  543705 cpu.go:275] no items to output this cycle
E0320 22:38:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:38:13.409789  543705 memory.go:191] Add success.
I0320 22:38:13.409814  543705 cpu.go:282] Add success.
W0320 22:38:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:38:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:38:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:38:13.420145  543705 net.go:648] Add success.
I0320 22:38:13.422713  543705 net.go:770] primary dev: ETH0
I0320 22:38:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:38:13.422738  543705 net.go:698] Add success.
I0320 22:38:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:38:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:38:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0320 22:38:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:38:14.456572  543705 disk_worker.go:494] system disk:vda1
I0320 22:38:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:38:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:38:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:38:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:38:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:38:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:38:23.409795  543705 memory.go:184] no items to output this cycle
I0320 22:38:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 22:38:26.487724  543705 disk_info.go:125] begin check local disk info of client
I0320 22:38:26.490330  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:38:26.490336  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003af280 0xc0003af2c0]
E0320 22:38:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:38:33.409799  543705 memory.go:184] no items to output this cycle
I0320 22:38:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 22:38:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:38:43.409885  543705 memory.go:191] Add success.
I0320 22:38:43.409940  543705 cpu.go:282] Add success.
I0320 22:38:43.419711  543705 net.go:648] Add success.
I0320 22:38:43.422275  543705 net.go:770] primary dev: ETH0
I0320 22:38:43.422288  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:38:43.422301  543705 net.go:698] Add success.
I0320 22:38:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:38:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:38:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:38:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:38:53.409782  543705 memory.go:184] no items to output this cycle
I0320 22:38:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 22:39:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:39:03.409815  543705 memory.go:184] no items to output this cycle
I0320 22:39:03.409827  543705 cpu.go:275] no items to output this cycle
E0320 22:39:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:39:13.409786  543705 memory.go:191] Add success.
I0320 22:39:13.409811  543705 cpu.go:282] Add success.
W0320 22:39:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:39:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:39:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:39:13.420172  543705 net.go:648] Add success.
I0320 22:39:13.422766  543705 net.go:770] primary dev: ETH0
I0320 22:39:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:39:13.422793  543705 net.go:698] Add success.
I0320 22:39:13.498075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0b90e8d4-6c95-4362-9ae2-e348afc3dc3a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:39:13.498109  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:39:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:39:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:39:14.455250  543705 disk_worker.go:708] disk space is not compliant
W0320 22:39:14.455254  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:39:14.456776  543705 disk_worker.go:494] system disk:vda1
I0320 22:39:14.456812  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:39:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:39:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:39:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:39:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:39:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:39:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:39:23.409798  543705 memory.go:184] no items to output this cycle
I0320 22:39:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 22:39:26.490682  543705 disk_info.go:125] begin check local disk info of client
I0320 22:39:26.493213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:39:26.493220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381b00 0xc000381b40]
E0320 22:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:39:33.409780  543705 memory.go:184] no items to output this cycle
I0320 22:39:33.409797  543705 cpu.go:275] no items to output this cycle
I0320 22:39:38.646939  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:39:38.646945  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:39:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:39:43.409781  543705 memory.go:191] Add success.
I0320 22:39:43.409792  543705 cpu.go:282] Add success.
I0320 22:39:43.419739  543705 net.go:648] Add success.
I0320 22:39:43.420663  543705 net.go:770] primary dev: ETH0
I0320 22:39:43.420678  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:39:43.420690  543705 net.go:698] Add success.
I0320 22:39:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:39:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:39:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:39:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:39:53.409782  543705 memory.go:184] no items to output this cycle
I0320 22:39:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 22:40:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:40:03.409789  543705 cpu.go:275] no items to output this cycle
I0320 22:40:03.409801  543705 memory.go:184] no items to output this cycle
E0320 22:40:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:40:13.409822  543705 memory.go:191] Add success.
I0320 22:40:13.409829  543705 cpu.go:282] Add success.
W0320 22:40:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:40:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:40:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:40:13.420289  543705 net.go:648] Add success.
I0320 22:40:13.423110  543705 net.go:770] primary dev: ETH0
I0320 22:40:13.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:40:13.423149  543705 net.go:698] Add success.
I0320 22:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:40:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:40:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 22:40:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:40:14.456609  543705 disk_worker.go:494] system disk:vda1
I0320 22:40:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:40:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:40:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:40:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:40:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 22:40:23.409786  543705 memory.go:184] no items to output this cycle
I0320 22:40:26.493747  543705 disk_info.go:125] begin check local disk info of client
I0320 22:40:26.496269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:40:26.496275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f0e00 0xc0003f0e40]
E0320 22:40:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:40:33.409767  543705 memory.go:184] no items to output this cycle
I0320 22:40:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 22:40:43.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:40:43.409912  543705 memory.go:191] Add success.
I0320 22:40:43.409958  543705 cpu.go:282] Add success.
I0320 22:40:43.419744  543705 net.go:648] Add success.
I0320 22:40:43.422348  543705 net.go:770] primary dev: ETH0
I0320 22:40:43.422361  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:40:43.422372  543705 net.go:698] Add success.
I0320 22:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:40:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:40:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:40:53.409768  543705 memory.go:184] no items to output this cycle
I0320 22:40:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 22:41:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:41:03.409790  543705 memory.go:184] no items to output this cycle
I0320 22:41:03.409829  543705 cpu.go:275] no items to output this cycle
E0320 22:41:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:41:13.409832  543705 memory.go:191] Add success.
I0320 22:41:13.409835  543705 cpu.go:282] Add success.
W0320 22:41:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:41:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:41:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:41:13.420244  543705 net.go:648] Add success.
I0320 22:41:13.422727  543705 net.go:770] primary dev: ETH0
I0320 22:41:13.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:41:13.422752  543705 net.go:698] Add success.
I0320 22:41:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:41:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:41:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 22:41:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:41:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 22:41:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:41:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:41:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:41:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:41:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:41:23.409800  543705 memory.go:184] no items to output this cycle
I0320 22:41:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 22:41:26.496796  543705 disk_info.go:125] begin check local disk info of client
I0320 22:41:26.499251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:41:26.499258  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f580 0xc00039f5c0]
E0320 22:41:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:41:33.409777  543705 memory.go:184] no items to output this cycle
I0320 22:41:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 22:41:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:41:43.409905  543705 cpu.go:282] Add success.
I0320 22:41:43.409931  543705 memory.go:191] Add success.
I0320 22:41:43.419709  543705 net.go:648] Add success.
I0320 22:41:43.422362  543705 net.go:770] primary dev: ETH0
I0320 22:41:43.422375  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:41:43.422387  543705 net.go:698] Add success.
I0320 22:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:41:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:41:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:41:53.409763  543705 memory.go:184] no items to output this cycle
I0320 22:41:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 22:42:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:42:03.409785  543705 memory.go:184] no items to output this cycle
I0320 22:42:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 22:42:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:42:13.409798  543705 memory.go:191] Add success.
I0320 22:42:13.409798  543705 cpu.go:282] Add success.
W0320 22:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:42:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:42:13.420110  543705 net.go:648] Add success.
I0320 22:42:13.422983  543705 net.go:770] primary dev: ETH0
I0320 22:42:13.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:42:13.423010  543705 net.go:698] Add success.
I0320 22:42:13.465298  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dab6900a-5225-496d-b9a3-e59d46a5be16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:42:13.465337  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 22:42:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:42:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0320 22:42:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:42:14.456967  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:42:14.456976  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:42:14.456982  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:42:14.457027  543705 disk_worker.go:494] system disk:vda1
I0320 22:42:14.457059  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:42:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:42:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 22:42:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:42:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:42:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:42:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:42:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:42:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:42:23.409793  543705 memory.go:184] no items to output this cycle
I0320 22:42:23.409804  543705 cpu.go:275] no items to output this cycle
I0320 22:42:26.499786  543705 disk_info.go:125] begin check local disk info of client
I0320 22:42:26.502257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:42:26.502263  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c600 0xc00048c640]
E0320 22:42:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:42:33.409770  543705 memory.go:184] no items to output this cycle
I0320 22:42:33.409790  543705 cpu.go:275] no items to output this cycle
I0320 22:42:38.647942  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:42:38.647948  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:42:43.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:42:43.410782  543705 memory.go:191] Add success.
I0320 22:42:43.409984  543705 cpu.go:282] Add success.
I0320 22:42:43.419745  543705 net.go:648] Add success.
I0320 22:42:43.422327  543705 net.go:770] primary dev: ETH0
I0320 22:42:43.422341  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:42:43.422354  543705 net.go:698] Add success.
I0320 22:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:42:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:42:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:42:53.409798  543705 memory.go:184] no items to output this cycle
I0320 22:42:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 22:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:43:03.409789  543705 memory.go:184] no items to output this cycle
I0320 22:43:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 22:43:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:43:13.409778  543705 memory.go:191] Add success.
W0320 22:43:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:43:13.409809  543705 cpu.go:282] Add success.
W0320 22:43:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:43:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:43:13.420124  543705 net.go:648] Add success.
I0320 22:43:13.422584  543705 net.go:770] primary dev: ETH0
I0320 22:43:13.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:43:13.422610  543705 net.go:698] Add success.
I0320 22:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:43:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:43:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0320 22:43:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:43:14.456623  543705 disk_worker.go:494] system disk:vda1
I0320 22:43:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:43:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:43:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:43:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:43:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:43:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:43:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:43:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 22:43:23.409784  543705 memory.go:184] no items to output this cycle
I0320 22:43:26.502723  543705 disk_info.go:125] begin check local disk info of client
I0320 22:43:26.505189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:43:26.505195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460d00 0xc000460d40]
E0320 22:43:33.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:43:33.409757  543705 memory.go:184] no items to output this cycle
I0320 22:43:33.409797  543705 cpu.go:275] no items to output this cycle
E0320 22:43:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:43:43.409887  543705 cpu.go:282] Add success.
I0320 22:43:43.409911  543705 memory.go:191] Add success.
I0320 22:43:43.419719  543705 net.go:648] Add success.
I0320 22:43:43.422319  543705 net.go:770] primary dev: ETH0
I0320 22:43:43.422334  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:43:43.422348  543705 net.go:698] Add success.
I0320 22:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:43:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:43:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:43:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:43:53.409797  543705 memory.go:184] no items to output this cycle
I0320 22:43:53.409801  543705 cpu.go:275] no items to output this cycle
E0320 22:44:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:44:03.409795  543705 memory.go:184] no items to output this cycle
I0320 22:44:03.409799  543705 cpu.go:275] no items to output this cycle
E0320 22:44:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:44:13.409831  543705 memory.go:191] Add success.
I0320 22:44:13.409833  543705 cpu.go:282] Add success.
W0320 22:44:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:44:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:44:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:44:13.420368  543705 net.go:648] Add success.
I0320 22:44:13.423447  543705 net.go:770] primary dev: ETH0
I0320 22:44:13.423462  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:44:13.423478  543705 net.go:698] Add success.
I0320 22:44:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:44:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:44:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0320 22:44:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:44:14.456491  543705 disk_worker.go:494] system disk:vda1
I0320 22:44:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:44:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:44:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:44:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:44:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:44:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:44:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:44:23.409801  543705 memory.go:184] no items to output this cycle
I0320 22:44:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 22:44:26.505826  543705 disk_info.go:125] begin check local disk info of client
I0320 22:44:26.508551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:44:26.508557  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046ce00 0xc00046ce40]
E0320 22:44:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:44:33.409789  543705 memory.go:184] no items to output this cycle
I0320 22:44:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 22:44:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:44:43.409810  543705 memory.go:191] Add success.
I0320 22:44:43.409811  543705 cpu.go:282] Add success.
I0320 22:44:43.419961  543705 net.go:648] Add success.
I0320 22:44:43.422572  543705 net.go:770] primary dev: ETH0
I0320 22:44:43.422586  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:44:43.422597  543705 net.go:698] Add success.
I0320 22:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:44:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:44:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:44:53.409788  543705 memory.go:184] no items to output this cycle
I0320 22:44:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 22:45:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:45:03.409794  543705 memory.go:184] no items to output this cycle
I0320 22:45:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 22:45:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:45:13.409821  543705 memory.go:191] Add success.
I0320 22:45:13.409829  543705 cpu.go:282] Add success.
W0320 22:45:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:45:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:45:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:45:13.420207  543705 net.go:648] Add success.
I0320 22:45:13.423031  543705 net.go:770] primary dev: ETH0
I0320 22:45:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:45:13.423061  543705 net.go:698] Add success.
I0320 22:45:13.469245  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7e63d0f8-3f93-42b9-9ef7-8e4be49074e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:45:13.469277  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:45:14.454061  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:45:14.454255  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:45:14.454265  543705 disk_worker.go:708] disk space is not compliant
W0320 22:45:14.454268  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:45:14.455600  543705 disk_worker.go:494] system disk:vda1
I0320 22:45:14.455647  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:45:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:45:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:45:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:45:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:45:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:45:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:45:23.409780  543705 memory.go:184] no items to output this cycle
I0320 22:45:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 22:45:26.508757  543705 disk_info.go:125] begin check local disk info of client
I0320 22:45:26.511494  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:45:26.511501  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340600 0xc000340640]
E0320 22:45:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:45:33.409807  543705 memory.go:184] no items to output this cycle
I0320 22:45:33.409825  543705 cpu.go:275] no items to output this cycle
I0320 22:45:38.648938  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:45:38.648950  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:45:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:45:43.410644  543705 memory.go:191] Add success.
I0320 22:45:43.409815  543705 cpu.go:282] Add success.
I0320 22:45:43.420329  543705 net.go:648] Add success.
I0320 22:45:43.422836  543705 net.go:770] primary dev: ETH0
I0320 22:45:43.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:45:43.422874  543705 net.go:698] Add success.
I0320 22:45:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:45:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:45:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:45:53.409781  543705 memory.go:184] no items to output this cycle
I0320 22:45:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 22:46:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:46:03.409779  543705 memory.go:184] no items to output this cycle
I0320 22:46:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 22:46:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:46:13.409826  543705 memory.go:191] Add success.
I0320 22:46:13.409832  543705 cpu.go:282] Add success.
W0320 22:46:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:46:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:46:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:46:13.420202  543705 net.go:648] Add success.
I0320 22:46:13.422853  543705 net.go:770] primary dev: ETH0
I0320 22:46:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:46:13.422878  543705 net.go:698] Add success.
I0320 22:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:46:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:46:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0320 22:46:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:46:14.456613  543705 disk_worker.go:494] system disk:vda1
I0320 22:46:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:46:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:46:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:46:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:46:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:46:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:46:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:46:23.409773  543705 memory.go:184] no items to output this cycle
I0320 22:46:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 22:46:26.511761  543705 disk_info.go:125] begin check local disk info of client
I0320 22:46:26.514256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:46:26.514262  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264e80 0xc000264ec0]
E0320 22:46:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:46:33.409796  543705 memory.go:184] no items to output this cycle
I0320 22:46:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 22:46:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:46:43.409775  543705 memory.go:191] Add success.
I0320 22:46:43.409822  543705 cpu.go:282] Add success.
I0320 22:46:43.419896  543705 net.go:648] Add success.
I0320 22:46:43.422956  543705 net.go:770] primary dev: ETH0
I0320 22:46:43.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:46:43.423001  543705 net.go:698] Add success.
I0320 22:46:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:46:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:46:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:46:53.409798  543705 memory.go:184] no items to output this cycle
I0320 22:46:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 22:47:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:47:03.409819  543705 memory.go:184] no items to output this cycle
I0320 22:47:03.409831  543705 cpu.go:275] no items to output this cycle
E0320 22:47:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:47:13.409795  543705 memory.go:191] Add success.
I0320 22:47:13.409814  543705 cpu.go:282] Add success.
W0320 22:47:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:47:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:47:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:47:13.420176  543705 net.go:648] Add success.
I0320 22:47:13.422813  543705 net.go:770] primary dev: ETH0
I0320 22:47:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:47:13.422838  543705 net.go:698] Add success.
I0320 22:47:13.453369  543705 event_worker.go:152] Polling the log file for events...
W0320 22:47:14.454299  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:47:14.454395  543705 disk_worker.go:708] disk space is not compliant
W0320 22:47:14.454400  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:47:14.454901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:47:14.454910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:47:14.454917  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:47:14.455970  543705 disk_worker.go:494] system disk:vda1
I0320 22:47:14.456028  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:47:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:47:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:47:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:47:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:47:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:47:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:47:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:47:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:47:23.409778  543705 memory.go:184] no items to output this cycle
I0320 22:47:23.409780  543705 cpu.go:275] no items to output this cycle
I0320 22:47:26.514726  543705 disk_info.go:125] begin check local disk info of client
I0320 22:47:26.517179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:47:26.517185  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae540 0xc0004ae580]
E0320 22:47:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:47:33.409805  543705 memory.go:184] no items to output this cycle
I0320 22:47:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 22:47:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:47:43.409795  543705 memory.go:191] Add success.
I0320 22:47:43.409797  543705 cpu.go:282] Add success.
I0320 22:47:43.419848  543705 net.go:648] Add success.
I0320 22:47:43.422225  543705 net.go:770] primary dev: ETH0
I0320 22:47:43.422240  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:47:43.422255  543705 net.go:698] Add success.
I0320 22:47:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:47:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:47:53.409775  543705 memory.go:184] no items to output this cycle
I0320 22:47:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 22:48:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:48:03.409788  543705 memory.go:184] no items to output this cycle
I0320 22:48:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 22:48:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:48:13.409821  543705 memory.go:191] Add success.
I0320 22:48:13.409836  543705 cpu.go:282] Add success.
W0320 22:48:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:48:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:48:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:48:13.420470  543705 net.go:648] Add success.
I0320 22:48:13.423636  543705 net.go:770] primary dev: ETH0
I0320 22:48:13.423649  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:48:13.423661  543705 net.go:698] Add success.
I0320 22:48:13.469762  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bab4a59f-de75-47ef-bc6b-4e7a08ef4c1b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:48:13.469795  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:48:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:48:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:48:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0320 22:48:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:48:14.456533  543705 disk_worker.go:494] system disk:vda1
I0320 22:48:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:48:15.455612  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:48:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:48:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:48:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:48:23.409797  543705 memory.go:184] no items to output this cycle
I0320 22:48:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 22:48:26.517778  543705 disk_info.go:125] begin check local disk info of client
I0320 22:48:26.520225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:48:26.520231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbd00 0xc0001fbd40]
E0320 22:48:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:48:33.409765  543705 memory.go:184] no items to output this cycle
I0320 22:48:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 22:48:38.649735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:48:38.649742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:48:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:48:43.410656  543705 memory.go:191] Add success.
I0320 22:48:43.409797  543705 cpu.go:282] Add success.
I0320 22:48:43.420458  543705 net.go:648] Add success.
I0320 22:48:43.423130  543705 net.go:770] primary dev: ETH0
I0320 22:48:43.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:48:43.423156  543705 net.go:698] Add success.
I0320 22:48:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:48:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:48:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:48:53.409793  543705 memory.go:184] no items to output this cycle
I0320 22:48:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 22:49:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:49:03.409784  543705 memory.go:184] no items to output this cycle
I0320 22:49:03.409813  543705 cpu.go:275] no items to output this cycle
E0320 22:49:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:49:13.409797  543705 memory.go:191] Add success.
I0320 22:49:13.409800  543705 cpu.go:282] Add success.
W0320 22:49:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:49:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:49:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:49:13.420155  543705 net.go:648] Add success.
I0320 22:49:13.422708  543705 net.go:770] primary dev: ETH0
I0320 22:49:13.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:49:13.422736  543705 net.go:698] Add success.
W0320 22:49:14.455257  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:49:14.455277  543705 disk_worker.go:708] disk space is not compliant
W0320 22:49:14.455281  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:49:14.455633  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:49:14.457451  543705 disk_worker.go:494] system disk:vda1
I0320 22:49:14.457497  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:49:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:49:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:49:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:49:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:49:16.472491  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:49:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:49:23.409784  543705 memory.go:184] no items to output this cycle
I0320 22:49:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 22:49:26.520904  543705 disk_info.go:125] begin check local disk info of client
I0320 22:49:26.523427  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:49:26.523434  543705 disk_info.go:196] parse disk info done, disk is : [0xc000590580 0xc0005905c0]
E0320 22:49:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:49:33.409807  543705 memory.go:184] no items to output this cycle
I0320 22:49:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 22:49:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:49:43.409788  543705 memory.go:191] Add success.
I0320 22:49:43.409804  543705 cpu.go:282] Add success.
I0320 22:49:43.419857  543705 net.go:648] Add success.
I0320 22:49:43.422484  543705 net.go:770] primary dev: ETH0
I0320 22:49:43.422497  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:49:43.422509  543705 net.go:698] Add success.
I0320 22:49:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:49:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:49:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:49:53.410272  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:49:53.410287  543705 memory.go:184] no items to output this cycle
I0320 22:49:53.410292  543705 cpu.go:275] no items to output this cycle
E0320 22:50:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:50:03.409808  543705 memory.go:184] no items to output this cycle
I0320 22:50:03.409825  543705 cpu.go:275] no items to output this cycle
E0320 22:50:13.409940  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:50:13.409969  543705 memory.go:191] Add success.
W0320 22:50:13.410006  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:50:13.410008  543705 cpu.go:282] Add success.
W0320 22:50:13.410020  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:50:13.410023  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:50:13.419712  543705 net.go:648] Add success.
I0320 22:50:13.422228  543705 net.go:770] primary dev: ETH0
I0320 22:50:13.422251  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:50:13.422263  543705 net.go:698] Add success.
I0320 22:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:50:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:50:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 22:50:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:50:14.456594  543705 disk_worker.go:494] system disk:vda1
I0320 22:50:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:50:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:50:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:50:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:50:23.409775  543705 memory.go:184] no items to output this cycle
I0320 22:50:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 22:50:26.523515  543705 disk_info.go:125] begin check local disk info of client
I0320 22:50:26.525975  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:50:26.525981  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001facc0 0xc0001fad00]
E0320 22:50:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:50:33.409777  543705 memory.go:184] no items to output this cycle
I0320 22:50:33.409782  543705 cpu.go:275] no items to output this cycle
E0320 22:50:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:50:43.409813  543705 memory.go:191] Add success.
I0320 22:50:43.409822  543705 cpu.go:282] Add success.
I0320 22:50:43.420037  543705 net.go:648] Add success.
I0320 22:50:43.423144  543705 net.go:770] primary dev: ETH0
I0320 22:50:43.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:50:43.423170  543705 net.go:698] Add success.
I0320 22:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:50:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:50:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:50:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:50:53.409800  543705 memory.go:184] no items to output this cycle
I0320 22:50:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 22:51:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:51:03.409813  543705 memory.go:184] no items to output this cycle
I0320 22:51:03.409826  543705 cpu.go:275] no items to output this cycle
E0320 22:51:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:51:13.409805  543705 memory.go:191] Add success.
I0320 22:51:13.409807  543705 cpu.go:282] Add success.
W0320 22:51:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:51:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:51:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:51:13.420280  543705 net.go:648] Add success.
I0320 22:51:13.422712  543705 net.go:770] primary dev: ETH0
I0320 22:51:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:51:13.422737  543705 net.go:698] Add success.
I0320 22:51:13.548896  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c3c27e22-1a37-45ad-8227-c9253af49e99","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:51:13.548928  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:51:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:51:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:51:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0320 22:51:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:51:14.456506  543705 disk_worker.go:494] system disk:vda1
I0320 22:51:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:51:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:51:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:51:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:51:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:51:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:51:23.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:51:23.410260  543705 memory.go:184] no items to output this cycle
I0320 22:51:23.410272  543705 cpu.go:275] no items to output this cycle
I0320 22:51:26.526728  543705 disk_info.go:125] begin check local disk info of client
I0320 22:51:26.529204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:51:26.529209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa180 0xc0001fa1c0]
E0320 22:51:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:51:33.409800  543705 memory.go:184] no items to output this cycle
I0320 22:51:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 22:51:38.650963  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:51:38.650970  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:51:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:51:43.410730  543705 memory.go:191] Add success.
I0320 22:51:43.409830  543705 cpu.go:282] Add success.
I0320 22:51:43.420288  543705 net.go:770] primary dev: ETH0
I0320 22:51:43.420301  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:51:43.420313  543705 net.go:698] Add success.
I0320 22:51:43.420642  543705 net.go:648] Add success.
I0320 22:51:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:51:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:51:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:51:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:51:53.409777  543705 memory.go:184] no items to output this cycle
I0320 22:51:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 22:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:52:03.409784  543705 memory.go:184] no items to output this cycle
I0320 22:52:03.409860  543705 cpu.go:275] no items to output this cycle
W0320 22:52:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:52:13.409737  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:52:13.409743  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:52:13.409833  543705 cpu.go:282] Add success.
E0320 22:52:13.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:52:13.409856  543705 memory.go:191] Add success.
I0320 22:52:13.420370  543705 net.go:648] Add success.
I0320 22:52:13.423569  543705 net.go:770] primary dev: ETH0
I0320 22:52:13.423582  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:52:13.423594  543705 net.go:698] Add success.
W0320 22:52:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:52:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 22:52:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0320 22:52:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:52:14.455900  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:52:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0320 22:52:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 22:52:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:52:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:52:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:52:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 22:52:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:52:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:52:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:52:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:52:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:52:23.409783  543705 memory.go:184] no items to output this cycle
I0320 22:52:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 22:52:26.529934  543705 disk_info.go:125] begin check local disk info of client
I0320 22:52:26.532452  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:52:26.532458  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5940 0xc0000c5980]
E0320 22:52:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:52:33.409775  543705 memory.go:184] no items to output this cycle
I0320 22:52:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 22:52:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:52:43.409787  543705 memory.go:191] Add success.
I0320 22:52:43.409813  543705 cpu.go:282] Add success.
I0320 22:52:43.420136  543705 net.go:648] Add success.
I0320 22:52:43.422660  543705 net.go:770] primary dev: ETH0
I0320 22:52:43.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:52:43.422686  543705 net.go:698] Add success.
I0320 22:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:52:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:52:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:52:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:52:53.409766  543705 memory.go:184] no items to output this cycle
I0320 22:52:53.409798  543705 cpu.go:275] no items to output this cycle
E0320 22:53:03.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:53:03.409829  543705 cpu.go:275] no items to output this cycle
I0320 22:53:03.409837  543705 memory.go:184] no items to output this cycle
E0320 22:53:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:53:13.409829  543705 memory.go:191] Add success.
I0320 22:53:13.409836  543705 cpu.go:282] Add success.
W0320 22:53:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:53:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:53:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:53:13.420133  543705 net.go:648] Add success.
I0320 22:53:13.422907  543705 net.go:770] primary dev: ETH0
I0320 22:53:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:53:13.422931  543705 net.go:698] Add success.
I0320 22:53:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:53:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:53:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 22:53:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:53:14.456559  543705 disk_worker.go:494] system disk:vda1
I0320 22:53:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:53:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:53:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:53:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:53:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:53:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:53:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:53:23.409781  543705 memory.go:184] no items to output this cycle
I0320 22:53:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 22:53:26.532985  543705 disk_info.go:125] begin check local disk info of client
I0320 22:53:26.535509  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:53:26.535516  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049f980 0xc00049f9c0]
E0320 22:53:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:53:33.409789  543705 memory.go:184] no items to output this cycle
I0320 22:53:33.409793  543705 cpu.go:275] no items to output this cycle
E0320 22:53:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:53:43.409801  543705 memory.go:191] Add success.
I0320 22:53:43.409808  543705 cpu.go:282] Add success.
I0320 22:53:43.419890  543705 net.go:648] Add success.
I0320 22:53:43.422477  543705 net.go:770] primary dev: ETH0
I0320 22:53:43.422491  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:53:43.422503  543705 net.go:698] Add success.
I0320 22:53:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:53:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:53:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:53:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:53:53.409817  543705 memory.go:184] no items to output this cycle
I0320 22:53:53.409827  543705 cpu.go:275] no items to output this cycle
E0320 22:54:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:54:03.409793  543705 memory.go:184] no items to output this cycle
I0320 22:54:03.409862  543705 cpu.go:275] no items to output this cycle
E0320 22:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:54:13.409795  543705 memory.go:191] Add success.
W0320 22:54:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:54:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:54:13.409845  543705 cpu.go:282] Add success.
I0320 22:54:13.420377  543705 net.go:648] Add success.
I0320 22:54:13.423199  543705 net.go:770] primary dev: ETH0
I0320 22:54:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:54:13.423225  543705 net.go:698] Add success.
I0320 22:54:13.463763  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd9b646a-7f46-408c-984d-de7ddacd1e12","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:54:13.463794  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 22:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:54:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:54:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 22:54:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:54:14.456498  543705 disk_worker.go:494] system disk:vda1
I0320 22:54:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:54:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:54:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:54:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:54:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:54:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:54:23.409791  543705 memory.go:184] no items to output this cycle
I0320 22:54:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 22:54:26.535930  543705 disk_info.go:125] begin check local disk info of client
I0320 22:54:26.538426  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:54:26.538432  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c54c0 0xc0000c5500]
E0320 22:54:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:54:33.409804  543705 memory.go:184] no items to output this cycle
I0320 22:54:33.409818  543705 cpu.go:275] no items to output this cycle
I0320 22:54:38.651960  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:54:38.651966  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:54:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:54:43.410769  543705 memory.go:191] Add success.
I0320 22:54:43.409794  543705 cpu.go:282] Add success.
I0320 22:54:43.420486  543705 net.go:648] Add success.
I0320 22:54:43.423283  543705 net.go:770] primary dev: ETH0
I0320 22:54:43.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:54:43.423315  543705 net.go:698] Add success.
I0320 22:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:54:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:54:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:54:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:54:53.409781  543705 memory.go:184] no items to output this cycle
I0320 22:54:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 22:55:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:55:03.409781  543705 memory.go:184] no items to output this cycle
I0320 22:55:03.409881  543705 cpu.go:275] no items to output this cycle
E0320 22:55:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:55:13.409799  543705 memory.go:191] Add success.
I0320 22:55:13.409818  543705 cpu.go:282] Add success.
W0320 22:55:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:55:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:55:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:55:13.420338  543705 net.go:648] Add success.
I0320 22:55:13.422949  543705 net.go:770] primary dev: ETH0
I0320 22:55:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:55:13.422973  543705 net.go:698] Add success.
I0320 22:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:55:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:55:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0320 22:55:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:55:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 22:55:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:55:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:55:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:55:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:55:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:55:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:55:23.409786  543705 memory.go:184] no items to output this cycle
I0320 22:55:23.409789  543705 cpu.go:275] no items to output this cycle
I0320 22:55:26.538735  543705 disk_info.go:125] begin check local disk info of client
I0320 22:55:26.541202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:55:26.541209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032eac0 0xc00032eb00]
E0320 22:55:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:55:33.409805  543705 memory.go:184] no items to output this cycle
I0320 22:55:33.409816  543705 cpu.go:275] no items to output this cycle
E0320 22:55:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:55:43.409785  543705 memory.go:191] Add success.
I0320 22:55:43.409806  543705 cpu.go:282] Add success.
I0320 22:55:43.420054  543705 net.go:648] Add success.
I0320 22:55:43.422866  543705 net.go:770] primary dev: ETH0
I0320 22:55:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:55:43.422895  543705 net.go:698] Add success.
I0320 22:55:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:55:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:55:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:55:53.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:55:53.410263  543705 cpu.go:275] no items to output this cycle
I0320 22:55:53.410269  543705 memory.go:184] no items to output this cycle
E0320 22:56:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:56:03.409774  543705 memory.go:184] no items to output this cycle
I0320 22:56:03.409839  543705 cpu.go:275] no items to output this cycle
E0320 22:56:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:56:13.409824  543705 memory.go:191] Add success.
I0320 22:56:13.409825  543705 cpu.go:282] Add success.
W0320 22:56:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:56:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:56:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:56:13.420288  543705 net.go:648] Add success.
I0320 22:56:13.423147  543705 net.go:770] primary dev: ETH0
I0320 22:56:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:56:13.423172  543705 net.go:698] Add success.
I0320 22:56:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:56:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:56:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 22:56:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:56:14.456624  543705 disk_worker.go:494] system disk:vda1
I0320 22:56:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:56:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:56:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:56:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:56:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:56:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:56:23.409766  543705 memory.go:184] no items to output this cycle
I0320 22:56:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 22:56:26.541969  543705 disk_info.go:125] begin check local disk info of client
I0320 22:56:26.544484  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:56:26.544491  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0320 22:56:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:56:33.409799  543705 memory.go:184] no items to output this cycle
I0320 22:56:33.409811  543705 cpu.go:275] no items to output this cycle
E0320 22:56:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:56:43.409801  543705 memory.go:191] Add success.
I0320 22:56:43.409809  543705 cpu.go:282] Add success.
I0320 22:56:43.419951  543705 net.go:648] Add success.
I0320 22:56:43.422489  543705 net.go:770] primary dev: ETH0
I0320 22:56:43.422504  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:56:43.422518  543705 net.go:698] Add success.
I0320 22:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:56:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:56:53.410476  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:56:53.410490  543705 memory.go:184] no items to output this cycle
I0320 22:56:53.410493  543705 cpu.go:275] no items to output this cycle
E0320 22:57:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:57:03.409796  543705 memory.go:184] no items to output this cycle
I0320 22:57:03.409869  543705 cpu.go:275] no items to output this cycle
E0320 22:57:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:57:13.409791  543705 memory.go:191] Add success.
I0320 22:57:13.409793  543705 cpu.go:282] Add success.
W0320 22:57:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:57:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:57:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:57:13.420167  543705 net.go:648] Add success.
I0320 22:57:13.429157  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 22:57:13.429232  543705 net.go:770] primary dev: ETH0
I0320 22:57:13.429243  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:57:13.429255  543705 net.go:698] Add success.
I0320 22:57:13.452772  543705 event_worker.go:152] Polling the log file for events...
I0320 22:57:13.463316  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6de66bad-cdb3-4b23-9b9c-06ef7d3536c8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 22:57:13.463352  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 22:57:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:57:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 22:57:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:57:14.456819  543705 disk_worker.go:494] system disk:vda1
I0320 22:57:14.456856  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 22:57:14.457053  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 22:57:14.457061  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 22:57:14.457065  543705 custom_config.go:64] query custom config with name: gpu
E0320 22:57:15.456926  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 22:57:15.456938  543705 custom_config.go:64] query custom config with name: huawei_npu
E0320 22:57:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 22:57:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:57:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:57:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:57:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:57:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:57:23.409801  543705 memory.go:184] no items to output this cycle
I0320 22:57:23.409814  543705 cpu.go:275] no items to output this cycle
I0320 22:57:26.544930  543705 disk_info.go:125] begin check local disk info of client
I0320 22:57:26.547447  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:57:26.547454  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8700 0xc0002a8740]
E0320 22:57:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:57:33.409768  543705 memory.go:184] no items to output this cycle
I0320 22:57:33.409791  543705 cpu.go:275] no items to output this cycle
I0320 22:57:38.652106  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 22:57:38.652113  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 22:57:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:57:43.410694  543705 memory.go:191] Add success.
I0320 22:57:43.409810  543705 cpu.go:282] Add success.
I0320 22:57:43.419712  543705 net.go:648] Add success.
I0320 22:57:43.422346  543705 net.go:770] primary dev: ETH0
I0320 22:57:43.422358  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:57:43.422371  543705 net.go:698] Add success.
I0320 22:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:57:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:57:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:57:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:57:53.409793  543705 memory.go:184] no items to output this cycle
I0320 22:57:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 22:58:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:58:03.409796  543705 memory.go:184] no items to output this cycle
I0320 22:58:03.409815  543705 cpu.go:275] no items to output this cycle
E0320 22:58:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:58:13.409820  543705 memory.go:191] Add success.
I0320 22:58:13.409835  543705 cpu.go:282] Add success.
W0320 22:58:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 22:58:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:58:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:58:13.420180  543705 net.go:648] Add success.
I0320 22:58:13.423105  543705 net.go:770] primary dev: ETH0
I0320 22:58:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:58:13.423133  543705 net.go:698] Add success.
I0320 22:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:58:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:58:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 22:58:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:58:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 22:58:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:58:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:58:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:58:23.409811  543705 memory.go:184] no items to output this cycle
I0320 22:58:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 22:58:26.548055  543705 disk_info.go:125] begin check local disk info of client
I0320 22:58:26.550564  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:58:26.550570  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004632c0 0xc000463300]
E0320 22:58:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:58:33.409901  543705 memory.go:184] no items to output this cycle
I0320 22:58:33.409940  543705 cpu.go:275] no items to output this cycle
E0320 22:58:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:58:43.409794  543705 memory.go:191] Add success.
I0320 22:58:43.409799  543705 cpu.go:282] Add success.
I0320 22:58:43.419993  543705 net.go:648] Add success.
I0320 22:58:43.422890  543705 net.go:770] primary dev: ETH0
I0320 22:58:43.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:58:43.422923  543705 net.go:698] Add success.
I0320 22:58:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:58:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:58:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:58:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:58:53.409766  543705 memory.go:184] no items to output this cycle
I0320 22:58:53.409792  543705 cpu.go:275] no items to output this cycle
E0320 22:59:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:59:03.409773  543705 memory.go:184] no items to output this cycle
I0320 22:59:03.409792  543705 cpu.go:275] no items to output this cycle
E0320 22:59:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:59:13.409784  543705 memory.go:191] Add success.
W0320 22:59:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 22:59:13.409812  543705 cpu.go:282] Add success.
W0320 22:59:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 22:59:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 22:59:13.420043  543705 net.go:648] Add success.
I0320 22:59:13.422702  543705 net.go:770] primary dev: ETH0
I0320 22:59:13.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:59:13.422726  543705 net.go:698] Add success.
I0320 22:59:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 22:59:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 22:59:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 22:59:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 22:59:14.456585  543705 disk_worker.go:494] system disk:vda1
I0320 22:59:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 22:59:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 22:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:59:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:59:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 22:59:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0320 22:59:23.410487  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:59:23.410506  543705 memory.go:184] no items to output this cycle
I0320 22:59:23.410518  543705 cpu.go:275] no items to output this cycle
I0320 22:59:26.550918  543705 disk_info.go:125] begin check local disk info of client
I0320 22:59:26.553404  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 22:59:26.553410  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032e000 0xc00032e040]
E0320 22:59:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:59:33.409805  543705 memory.go:184] no items to output this cycle
I0320 22:59:33.409817  543705 cpu.go:275] no items to output this cycle
E0320 22:59:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:59:43.409797  543705 memory.go:191] Add success.
I0320 22:59:43.409800  543705 cpu.go:282] Add success.
I0320 22:59:43.419949  543705 net.go:648] Add success.
I0320 22:59:43.423042  543705 net.go:770] primary dev: ETH0
I0320 22:59:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0320 22:59:43.423067  543705 net.go:698] Add success.
I0320 22:59:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 22:59:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 22:59:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 22:59:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 22:59:53.409769  543705 memory.go:184] no items to output this cycle
I0320 22:59:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 23:00:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:00:03.409783  543705 memory.go:184] no items to output this cycle
I0320 23:00:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 23:00:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:00:13.409820  543705 memory.go:191] Add success.
I0320 23:00:13.409821  543705 cpu.go:282] Add success.
W0320 23:00:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:00:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:00:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:00:13.420166  543705 net.go:648] Add success.
I0320 23:00:13.423368  543705 net.go:770] primary dev: ETH0
I0320 23:00:13.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:00:13.423397  543705 net.go:698] Add success.
I0320 23:00:13.564123  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"247db265-1b69-44fe-8d56-9452d3b249a7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:00:13.564156  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:00:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:00:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 23:00:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:00:14.456619  543705 disk_worker.go:494] system disk:vda1
I0320 23:00:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:00:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:00:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:00:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:00:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:00:23.409776  543705 memory.go:184] no items to output this cycle
I0320 23:00:23.409779  543705 cpu.go:275] no items to output this cycle
I0320 23:00:26.553975  543705 disk_info.go:125] begin check local disk info of client
I0320 23:00:26.556413  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:00:26.556420  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a540 0xc00027a580]
E0320 23:00:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:00:33.409791  543705 memory.go:184] no items to output this cycle
I0320 23:00:33.409806  543705 cpu.go:275] no items to output this cycle
I0320 23:00:38.652248  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:00:38.652255  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:00:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:00:43.410630  543705 memory.go:191] Add success.
I0320 23:00:43.409822  543705 cpu.go:282] Add success.
I0320 23:00:43.420360  543705 net.go:648] Add success.
I0320 23:00:43.423312  543705 net.go:770] primary dev: ETH0
I0320 23:00:43.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:00:43.423342  543705 net.go:698] Add success.
I0320 23:00:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:00:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:00:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:00:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:00:53.409794  543705 memory.go:184] no items to output this cycle
I0320 23:00:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 23:01:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:01:03.409770  543705 memory.go:184] no items to output this cycle
I0320 23:01:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 23:01:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:01:13.409805  543705 memory.go:191] Add success.
I0320 23:01:13.409805  543705 cpu.go:282] Add success.
W0320 23:01:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:01:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:01:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:01:13.420151  543705 net.go:648] Add success.
I0320 23:01:13.422779  543705 net.go:770] primary dev: ETH0
I0320 23:01:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:01:13.422808  543705 net.go:698] Add success.
I0320 23:01:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:01:14.455359  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:01:14.455458  543705 disk_worker.go:708] disk space is not compliant
W0320 23:01:14.455468  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:01:14.457051  543705 disk_worker.go:494] system disk:vda1
I0320 23:01:14.457080  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:01:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:01:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:01:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:01:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:01:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:01:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:01:23.409787  543705 memory.go:184] no items to output this cycle
I0320 23:01:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 23:01:26.556947  543705 disk_info.go:125] begin check local disk info of client
I0320 23:01:26.559417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:01:26.559431  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056c200 0xc00056c240]
E0320 23:01:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:01:33.409771  543705 memory.go:184] no items to output this cycle
I0320 23:01:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:01:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:01:43.409807  543705 cpu.go:282] Add success.
I0320 23:01:43.409811  543705 memory.go:191] Add success.
I0320 23:01:43.419904  543705 net.go:648] Add success.
I0320 23:01:43.422617  543705 net.go:770] primary dev: ETH0
I0320 23:01:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:01:43.422642  543705 net.go:698] Add success.
I0320 23:01:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:01:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:01:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:01:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:01:53.409799  543705 memory.go:184] no items to output this cycle
I0320 23:01:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 23:02:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:02:03.409787  543705 memory.go:184] no items to output this cycle
I0320 23:02:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 23:02:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:02:13.409835  543705 memory.go:191] Add success.
I0320 23:02:13.409842  543705 cpu.go:282] Add success.
W0320 23:02:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:02:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:02:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:02:13.420239  543705 net.go:648] Add success.
I0320 23:02:13.422871  543705 net.go:770] primary dev: ETH0
I0320 23:02:13.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:02:13.422899  543705 net.go:698] Add success.
W0320 23:02:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:02:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 23:02:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:02:14.455903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:02:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:02:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:02:14.456819  543705 disk_worker.go:494] system disk:vda1
I0320 23:02:14.456947  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:02:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:02:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:02:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:02:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:02:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:02:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:02:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:02:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:02:23.409806  543705 memory.go:184] no items to output this cycle
I0320 23:02:23.409818  543705 cpu.go:275] no items to output this cycle
I0320 23:02:26.560037  543705 disk_info.go:125] begin check local disk info of client
I0320 23:02:26.562486  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:02:26.562492  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387300 0xc000387340]
E0320 23:02:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:02:33.409807  543705 memory.go:184] no items to output this cycle
I0320 23:02:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 23:02:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:02:43.409796  543705 memory.go:191] Add success.
I0320 23:02:43.409823  543705 cpu.go:282] Add success.
I0320 23:02:43.419979  543705 net.go:648] Add success.
I0320 23:02:43.422865  543705 net.go:770] primary dev: ETH0
I0320 23:02:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:02:43.422891  543705 net.go:698] Add success.
I0320 23:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:02:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:02:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:02:53.409810  543705 memory.go:184] no items to output this cycle
I0320 23:02:53.409820  543705 cpu.go:275] no items to output this cycle
E0320 23:03:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:03:03.409788  543705 memory.go:184] no items to output this cycle
I0320 23:03:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 23:03:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:03:13.409831  543705 memory.go:191] Add success.
I0320 23:03:13.409844  543705 cpu.go:282] Add success.
W0320 23:03:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:03:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:03:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:03:13.420134  543705 net.go:648] Add success.
I0320 23:03:13.422913  543705 net.go:770] primary dev: ETH0
I0320 23:03:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:03:13.422943  543705 net.go:698] Add success.
I0320 23:03:13.507211  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7034b473-324a-4a68-ab62-26055e1696f9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:03:13.507243  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:03:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:03:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0320 23:03:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:03:14.456848  543705 disk_worker.go:494] system disk:vda1
I0320 23:03:14.456877  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:03:15.455631  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:03:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:03:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:03:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:03:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:03:23.410456  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:03:23.410475  543705 memory.go:184] no items to output this cycle
I0320 23:03:23.410490  543705 cpu.go:275] no items to output this cycle
I0320 23:03:26.563001  543705 disk_info.go:125] begin check local disk info of client
I0320 23:03:26.565482  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:03:26.565488  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046d340 0xc00046d380]
E0320 23:03:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:03:33.409795  543705 memory.go:184] no items to output this cycle
I0320 23:03:33.409821  543705 cpu.go:275] no items to output this cycle
I0320 23:03:38.652966  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:03:38.652973  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:03:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:03:43.410746  543705 memory.go:191] Add success.
I0320 23:03:43.409847  543705 cpu.go:282] Add success.
I0320 23:03:43.420455  543705 net.go:648] Add success.
I0320 23:03:43.423255  543705 net.go:770] primary dev: ETH0
I0320 23:03:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:03:43.423285  543705 net.go:698] Add success.
I0320 23:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:03:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:03:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:03:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:03:53.409781  543705 memory.go:184] no items to output this cycle
I0320 23:03:53.409784  543705 cpu.go:275] no items to output this cycle
E0320 23:04:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:04:03.409763  543705 memory.go:184] no items to output this cycle
I0320 23:04:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 23:04:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:04:13.409805  543705 memory.go:191] Add success.
I0320 23:04:13.409808  543705 cpu.go:282] Add success.
W0320 23:04:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:04:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:04:13.420159  543705 net.go:648] Add success.
I0320 23:04:13.423175  543705 net.go:770] primary dev: ETH0
I0320 23:04:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:04:13.423201  543705 net.go:698] Add success.
I0320 23:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:04:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:04:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0320 23:04:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:04:14.456619  543705 disk_worker.go:494] system disk:vda1
I0320 23:04:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:04:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:04:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:04:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:04:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:04:23.409774  543705 memory.go:184] no items to output this cycle
I0320 23:04:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 23:04:26.565670  543705 disk_info.go:125] begin check local disk info of client
I0320 23:04:26.568418  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:04:26.568424  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae40 0xc00007ae80]
E0320 23:04:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:04:33.409770  543705 memory.go:184] no items to output this cycle
I0320 23:04:33.409779  543705 cpu.go:275] no items to output this cycle
E0320 23:04:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:04:43.409815  543705 memory.go:191] Add success.
I0320 23:04:43.409823  543705 cpu.go:282] Add success.
I0320 23:04:43.419972  543705 net.go:648] Add success.
I0320 23:04:43.423004  543705 net.go:770] primary dev: ETH0
I0320 23:04:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:04:43.423034  543705 net.go:698] Add success.
I0320 23:04:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:04:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:04:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:04:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:04:53.409784  543705 memory.go:184] no items to output this cycle
I0320 23:04:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 23:05:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:05:03.409804  543705 memory.go:184] no items to output this cycle
I0320 23:05:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 23:05:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:05:13.409794  543705 memory.go:191] Add success.
I0320 23:05:13.409809  543705 cpu.go:282] Add success.
W0320 23:05:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:05:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:05:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:05:13.420227  543705 net.go:648] Add success.
I0320 23:05:13.422850  543705 net.go:770] primary dev: ETH0
I0320 23:05:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:05:13.422876  543705 net.go:698] Add success.
I0320 23:05:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:05:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:05:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0320 23:05:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:05:14.456512  543705 disk_worker.go:494] system disk:vda1
I0320 23:05:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:05:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:05:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:05:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:05:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:05:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:05:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:05:23.409804  543705 memory.go:184] no items to output this cycle
I0320 23:05:23.409815  543705 cpu.go:275] no items to output this cycle
I0320 23:05:26.569104  543705 disk_info.go:125] begin check local disk info of client
I0320 23:05:26.571634  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:05:26.571641  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e80 0xc0000c5ec0]
E0320 23:05:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:05:33.409794  543705 memory.go:184] no items to output this cycle
I0320 23:05:33.409805  543705 cpu.go:275] no items to output this cycle
E0320 23:05:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:05:43.409825  543705 memory.go:191] Add success.
I0320 23:05:43.409828  543705 cpu.go:282] Add success.
I0320 23:05:43.419875  543705 net.go:648] Add success.
I0320 23:05:43.422792  543705 net.go:770] primary dev: ETH0
I0320 23:05:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:05:43.422817  543705 net.go:698] Add success.
I0320 23:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:05:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:05:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:05:53.409773  543705 memory.go:184] no items to output this cycle
I0320 23:05:53.409809  543705 cpu.go:275] no items to output this cycle
E0320 23:06:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:06:03.409794  543705 memory.go:184] no items to output this cycle
I0320 23:06:03.409807  543705 cpu.go:275] no items to output this cycle
E0320 23:06:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:06:13.409804  543705 memory.go:191] Add success.
I0320 23:06:13.409820  543705 cpu.go:282] Add success.
W0320 23:06:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:06:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:06:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:06:13.420166  543705 net.go:648] Add success.
I0320 23:06:13.423227  543705 net.go:770] primary dev: ETH0
I0320 23:06:13.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:06:13.423256  543705 net.go:698] Add success.
I0320 23:06:13.777487  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75dfdbf0-6953-408d-a8b8-f34501662a05","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:06:13.777522  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:06:14.453971  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:06:14.455237  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:06:14.455309  543705 disk_worker.go:708] disk space is not compliant
W0320 23:06:14.455313  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:06:14.456882  543705 disk_worker.go:494] system disk:vda1
I0320 23:06:14.456914  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:06:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:06:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:06:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:06:23.409779  543705 memory.go:184] no items to output this cycle
I0320 23:06:23.409782  543705 cpu.go:275] no items to output this cycle
I0320 23:06:26.572015  543705 disk_info.go:125] begin check local disk info of client
I0320 23:06:26.574746  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:06:26.574754  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0320 23:06:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:06:33.409763  543705 memory.go:184] no items to output this cycle
I0320 23:06:33.409796  543705 cpu.go:275] no items to output this cycle
I0320 23:06:38.653730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:06:38.653736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:06:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:06:43.410782  543705 memory.go:191] Add success.
I0320 23:06:43.409804  543705 cpu.go:282] Add success.
I0320 23:06:43.420473  543705 net.go:648] Add success.
I0320 23:06:43.423280  543705 net.go:770] primary dev: ETH0
I0320 23:06:43.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:06:43.423305  543705 net.go:698] Add success.
I0320 23:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:06:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:06:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:06:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:06:53.409780  543705 memory.go:184] no items to output this cycle
I0320 23:06:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 23:07:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:07:03.409780  543705 memory.go:184] no items to output this cycle
I0320 23:07:03.409782  543705 cpu.go:275] no items to output this cycle
E0320 23:07:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:07:13.409797  543705 memory.go:191] Add success.
I0320 23:07:13.409797  543705 cpu.go:282] Add success.
W0320 23:07:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:07:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:07:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:07:13.420112  543705 net.go:648] Add success.
I0320 23:07:13.423359  543705 net.go:770] primary dev: ETH0
I0320 23:07:13.423373  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:07:13.423385  543705 net.go:698] Add success.
I0320 23:07:13.452859  543705 event_worker.go:152] Polling the log file for events...
W0320 23:07:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:07:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0320 23:07:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:07:14.455915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:07:14.455924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:07:14.455930  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:07:14.456552  543705 disk_worker.go:494] system disk:vda1
I0320 23:07:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:07:15.456938  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:07:15.456951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:07:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:07:16.457991  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:07:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:07:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:07:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:07:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:07:23.409783  543705 memory.go:184] no items to output this cycle
I0320 23:07:23.409783  543705 cpu.go:275] no items to output this cycle
I0320 23:07:26.575125  543705 disk_info.go:125] begin check local disk info of client
I0320 23:07:26.577866  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:07:26.577873  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5380 0xc0000c53c0]
E0320 23:07:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:07:33.409793  543705 memory.go:184] no items to output this cycle
I0320 23:07:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 23:07:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:07:43.409780  543705 memory.go:191] Add success.
I0320 23:07:43.409804  543705 cpu.go:282] Add success.
I0320 23:07:43.419853  543705 net.go:648] Add success.
I0320 23:07:43.422883  543705 net.go:770] primary dev: ETH0
I0320 23:07:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:07:43.422912  543705 net.go:698] Add success.
I0320 23:07:46.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:07:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:07:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:07:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:07:53.409776  543705 memory.go:184] no items to output this cycle
I0320 23:07:53.409796  543705 cpu.go:275] no items to output this cycle
E0320 23:08:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:08:03.409768  543705 memory.go:184] no items to output this cycle
I0320 23:08:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 23:08:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:08:13.409786  543705 memory.go:191] Add success.
W0320 23:08:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:08:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:08:13.409821  543705 cpu.go:282] Add success.
I0320 23:08:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:08:13.420159  543705 net.go:648] Add success.
I0320 23:08:13.422818  543705 net.go:770] primary dev: ETH0
I0320 23:08:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:08:13.422842  543705 net.go:698] Add success.
I0320 23:08:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:08:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:08:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 23:08:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:08:14.456595  543705 disk_worker.go:494] system disk:vda1
I0320 23:08:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:08:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:08:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:08:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:08:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:08:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:08:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:08:23.409772  543705 memory.go:184] no items to output this cycle
I0320 23:08:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 23:08:26.578726  543705 disk_info.go:125] begin check local disk info of client
I0320 23:08:26.581511  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:08:26.581517  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366fc0 0xc000367000]
E0320 23:08:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:08:33.409776  543705 memory.go:184] no items to output this cycle
I0320 23:08:33.409783  543705 cpu.go:275] no items to output this cycle
E0320 23:08:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:08:43.409788  543705 cpu.go:282] Add success.
I0320 23:08:43.409795  543705 memory.go:191] Add success.
I0320 23:08:43.419877  543705 net.go:648] Add success.
I0320 23:08:43.422507  543705 net.go:770] primary dev: ETH0
I0320 23:08:43.422519  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:08:43.422533  543705 net.go:698] Add success.
I0320 23:08:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:08:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:08:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:08:53.410458  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:08:53.410464  543705 cpu.go:275] no items to output this cycle
I0320 23:08:53.410477  543705 memory.go:184] no items to output this cycle
E0320 23:09:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:09:03.409777  543705 memory.go:184] no items to output this cycle
I0320 23:09:03.409788  543705 cpu.go:275] no items to output this cycle
E0320 23:09:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:09:13.409816  543705 memory.go:191] Add success.
I0320 23:09:13.409820  543705 cpu.go:282] Add success.
W0320 23:09:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:09:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:09:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:09:13.420135  543705 net.go:648] Add success.
I0320 23:09:13.422638  543705 net.go:770] primary dev: ETH0
I0320 23:09:13.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:09:13.422662  543705 net.go:698] Add success.
I0320 23:09:13.469811  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"063a3a44-5b7a-4373-a901-7c790edcaa19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:09:13.469844  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:09:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:09:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0320 23:09:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:09:14.456607  543705 disk_worker.go:494] system disk:vda1
I0320 23:09:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:09:15.455649  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:09:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:09:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:09:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:09:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:09:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:09:23.409797  543705 memory.go:184] no items to output this cycle
I0320 23:09:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 23:09:26.582213  543705 disk_info.go:125] begin check local disk info of client
I0320 23:09:26.584724  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:09:26.584730  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056dbc0 0xc00056dc00]
E0320 23:09:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:09:33.409798  543705 memory.go:184] no items to output this cycle
I0320 23:09:33.409809  543705 cpu.go:275] no items to output this cycle
I0320 23:09:38.653875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:09:38.653882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:09:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:09:43.410651  543705 memory.go:191] Add success.
I0320 23:09:43.409781  543705 cpu.go:282] Add success.
I0320 23:09:43.420374  543705 net.go:648] Add success.
I0320 23:09:43.422969  543705 net.go:770] primary dev: ETH0
I0320 23:09:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:09:43.422995  543705 net.go:698] Add success.
I0320 23:09:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:09:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:09:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:09:53.409805  543705 memory.go:184] no items to output this cycle
I0320 23:09:53.409817  543705 cpu.go:275] no items to output this cycle
E0320 23:10:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:10:03.409765  543705 memory.go:184] no items to output this cycle
I0320 23:10:03.409797  543705 cpu.go:275] no items to output this cycle
E0320 23:10:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:10:13.409806  543705 memory.go:191] Add success.
I0320 23:10:13.409807  543705 cpu.go:282] Add success.
W0320 23:10:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:10:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:10:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:10:13.420160  543705 net.go:648] Add success.
I0320 23:10:13.422898  543705 net.go:770] primary dev: ETH0
I0320 23:10:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:10:13.422926  543705 net.go:698] Add success.
I0320 23:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:10:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:10:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 23:10:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:10:14.456588  543705 disk_worker.go:494] system disk:vda1
I0320 23:10:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:10:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:10:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:10:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:10:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:10:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:10:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:10:23.409798  543705 memory.go:184] no items to output this cycle
I0320 23:10:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 23:10:26.585168  543705 disk_info.go:125] begin check local disk info of client
I0320 23:10:26.587665  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:10:26.587671  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa5c0 0xc0001fa600]
E0320 23:10:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:10:33.409804  543705 memory.go:184] no items to output this cycle
I0320 23:10:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 23:10:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:10:43.409795  543705 memory.go:191] Add success.
I0320 23:10:43.409820  543705 cpu.go:282] Add success.
I0320 23:10:43.419989  543705 net.go:648] Add success.
I0320 23:10:43.422801  543705 net.go:770] primary dev: ETH0
I0320 23:10:43.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:10:43.422830  543705 net.go:698] Add success.
I0320 23:10:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:10:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:10:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:10:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:10:53.409809  543705 memory.go:184] no items to output this cycle
I0320 23:10:53.409819  543705 cpu.go:275] no items to output this cycle
E0320 23:11:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:11:03.409785  543705 memory.go:184] no items to output this cycle
I0320 23:11:03.409809  543705 cpu.go:275] no items to output this cycle
E0320 23:11:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:11:13.409811  543705 memory.go:191] Add success.
I0320 23:11:13.409813  543705 cpu.go:282] Add success.
W0320 23:11:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:11:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:11:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:11:13.420125  543705 net.go:648] Add success.
I0320 23:11:13.422687  543705 net.go:770] primary dev: ETH0
I0320 23:11:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:11:13.422730  543705 net.go:698] Add success.
I0320 23:11:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:11:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:11:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0320 23:11:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:11:14.456563  543705 disk_worker.go:494] system disk:vda1
I0320 23:11:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:11:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:11:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:11:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:11:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:11:16.472552  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:11:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:11:23.409791  543705 memory.go:184] no items to output this cycle
I0320 23:11:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 23:11:26.588103  543705 disk_info.go:125] begin check local disk info of client
I0320 23:11:26.590613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:11:26.590620  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bc300 0xc0004bc340]
E0320 23:11:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:11:33.409781  543705 memory.go:184] no items to output this cycle
I0320 23:11:33.409786  543705 cpu.go:275] no items to output this cycle
E0320 23:11:43.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:11:43.409860  543705 memory.go:191] Add success.
I0320 23:11:43.409874  543705 cpu.go:282] Add success.
I0320 23:11:43.420061  543705 net.go:648] Add success.
I0320 23:11:43.422994  543705 net.go:770] primary dev: ETH0
I0320 23:11:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:11:43.423020  543705 net.go:698] Add success.
I0320 23:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:11:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:11:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:11:53.409768  543705 memory.go:184] no items to output this cycle
I0320 23:11:53.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:12:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:12:03.409797  543705 memory.go:184] no items to output this cycle
I0320 23:12:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 23:12:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:12:13.409791  543705 memory.go:191] Add success.
W0320 23:12:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:12:13.412259  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:12:13.412264  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:12:13.409923  543705 cpu.go:282] Add success.
I0320 23:12:13.419937  543705 net.go:648] Add success.
I0320 23:12:13.421838  543705 net.go:770] primary dev: ETH0
I0320 23:12:13.421850  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:12:13.421863  543705 net.go:698] Add success.
I0320 23:12:13.468664  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b1ba4977-155b-4924-9961-17925447f00d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:12:13.468700  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 23:12:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:12:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0320 23:12:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:12:14.456817  543705 disk_worker.go:494] system disk:vda1
I0320 23:12:14.456869  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:12:14.457149  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:12:14.457157  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:12:14.457162  543705 custom_config.go:64] query custom config with name: gpu
E0320 23:12:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:12:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:12:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:12:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:12:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:12:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:12:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:12:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:12:23.409771  543705 memory.go:184] no items to output this cycle
I0320 23:12:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 23:12:26.591179  543705 disk_info.go:125] begin check local disk info of client
I0320 23:12:26.593630  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:12:26.593638  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b39c0 0xc0003b3a00]
E0320 23:12:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:12:33.409792  543705 memory.go:184] no items to output this cycle
I0320 23:12:33.409807  543705 cpu.go:275] no items to output this cycle
I0320 23:12:38.654973  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:12:38.654979  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:12:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:12:43.410601  543705 memory.go:191] Add success.
I0320 23:12:43.409824  543705 cpu.go:282] Add success.
I0320 23:12:43.420354  543705 net.go:648] Add success.
I0320 23:12:43.422801  543705 net.go:770] primary dev: ETH0
I0320 23:12:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:12:43.422827  543705 net.go:698] Add success.
I0320 23:12:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:12:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:12:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:12:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:12:53.409770  543705 memory.go:184] no items to output this cycle
I0320 23:12:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:13:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:13:03.409775  543705 memory.go:184] no items to output this cycle
I0320 23:13:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 23:13:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:13:13.409826  543705 memory.go:191] Add success.
I0320 23:13:13.409832  543705 cpu.go:282] Add success.
W0320 23:13:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:13:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:13:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:13:13.420164  543705 net.go:648] Add success.
I0320 23:13:13.423075  543705 net.go:770] primary dev: ETH0
I0320 23:13:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:13:13.423119  543705 net.go:698] Add success.
I0320 23:13:14.454488  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:13:14.454643  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:13:14.454719  543705 disk_worker.go:708] disk space is not compliant
W0320 23:13:14.454722  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:13:14.456338  543705 disk_worker.go:494] system disk:vda1
I0320 23:13:14.456368  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:13:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:13:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:13:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:13:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:13:16.472522  543705 disk_local_worker.go:436] Get disk info: []
I0320 23:13:23.409916  543705 cpu.go:275] no items to output this cycle
E0320 23:13:23.410090  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:13:23.410102  543705 memory.go:184] no items to output this cycle
I0320 23:13:26.594726  543705 disk_info.go:125] begin check local disk info of client
I0320 23:13:26.597248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:13:26.597262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5c40 0xc0002a5c80]
E0320 23:13:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:13:33.409776  543705 memory.go:184] no items to output this cycle
I0320 23:13:33.409801  543705 cpu.go:275] no items to output this cycle
E0320 23:13:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:13:43.409826  543705 memory.go:191] Add success.
I0320 23:13:43.409831  543705 cpu.go:282] Add success.
I0320 23:13:43.420083  543705 net.go:648] Add success.
I0320 23:13:43.422728  543705 net.go:770] primary dev: ETH0
I0320 23:13:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:13:43.422759  543705 net.go:698] Add success.
I0320 23:13:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:13:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:13:53.410401  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:13:53.410418  543705 memory.go:184] no items to output this cycle
I0320 23:13:53.410437  543705 cpu.go:275] no items to output this cycle
E0320 23:14:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:14:03.409795  543705 memory.go:184] no items to output this cycle
I0320 23:14:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 23:14:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:14:13.409825  543705 cpu.go:282] Add success.
I0320 23:14:13.409847  543705 memory.go:191] Add success.
W0320 23:14:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:14:13.409906  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:14:13.409911  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:14:13.420254  543705 net.go:648] Add success.
I0320 23:14:13.423035  543705 net.go:770] primary dev: ETH0
I0320 23:14:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:14:13.423082  543705 net.go:698] Add success.
I0320 23:14:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:14:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:14:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0320 23:14:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:14:14.456633  543705 disk_worker.go:494] system disk:vda1
I0320 23:14:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:14:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:14:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:14:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:14:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:14:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:14:23.409774  543705 memory.go:184] no items to output this cycle
I0320 23:14:23.409792  543705 cpu.go:275] no items to output this cycle
I0320 23:14:26.598238  543705 disk_info.go:125] begin check local disk info of client
I0320 23:14:26.600968  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:14:26.600976  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c97c0 0xc0004c9800]
E0320 23:14:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:14:33.409766  543705 memory.go:184] no items to output this cycle
I0320 23:14:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 23:14:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:14:43.409806  543705 memory.go:191] Add success.
I0320 23:14:43.409807  543705 cpu.go:282] Add success.
I0320 23:14:43.420075  543705 net.go:648] Add success.
I0320 23:14:43.422690  543705 net.go:770] primary dev: ETH0
I0320 23:14:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:14:43.422720  543705 net.go:698] Add success.
I0320 23:14:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:14:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:14:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:14:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:14:53.409779  543705 memory.go:184] no items to output this cycle
I0320 23:14:53.409783  543705 cpu.go:275] no items to output this cycle
E0320 23:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:15:03.409774  543705 memory.go:184] no items to output this cycle
I0320 23:15:03.409778  543705 cpu.go:275] no items to output this cycle
E0320 23:15:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:15:13.409829  543705 memory.go:191] Add success.
I0320 23:15:13.409844  543705 cpu.go:282] Add success.
W0320 23:15:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:15:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:15:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:15:13.420237  543705 net.go:648] Add success.
I0320 23:15:13.423317  543705 net.go:770] primary dev: ETH0
I0320 23:15:13.423331  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:15:13.423343  543705 net.go:698] Add success.
I0320 23:15:13.468095  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a682af4a-5a05-4ef7-9d91-fdedd2928dc7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:15:13.468130  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:15:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:15:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:15:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0320 23:15:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:15:14.456720  543705 disk_worker.go:494] system disk:vda1
I0320 23:15:14.456749  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:15:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:15:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:15:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:15:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:15:23.409811  543705 memory.go:184] no items to output this cycle
I0320 23:15:23.409819  543705 cpu.go:275] no items to output this cycle
I0320 23:15:26.601215  543705 disk_info.go:125] begin check local disk info of client
I0320 23:15:26.603835  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:15:26.603843  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032ecc0 0xc00032ed00]
E0320 23:15:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:15:33.409781  543705 memory.go:184] no items to output this cycle
I0320 23:15:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 23:15:38.655974  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:15:38.655980  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:15:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:15:43.410601  543705 memory.go:191] Add success.
I0320 23:15:43.409800  543705 cpu.go:282] Add success.
I0320 23:15:43.420303  543705 net.go:648] Add success.
I0320 23:15:43.422749  543705 net.go:770] primary dev: ETH0
I0320 23:15:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:15:43.422774  543705 net.go:698] Add success.
I0320 23:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:15:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:15:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:15:53.409771  543705 memory.go:184] no items to output this cycle
I0320 23:15:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 23:16:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:16:03.409778  543705 memory.go:184] no items to output this cycle
I0320 23:16:03.409781  543705 cpu.go:275] no items to output this cycle
E0320 23:16:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:16:13.409818  543705 memory.go:191] Add success.
I0320 23:16:13.409829  543705 cpu.go:282] Add success.
W0320 23:16:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:16:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:16:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:16:13.420373  543705 net.go:648] Add success.
I0320 23:16:13.423613  543705 net.go:770] primary dev: ETH0
I0320 23:16:13.423629  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:16:13.423643  543705 net.go:698] Add success.
I0320 23:16:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:16:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:16:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0320 23:16:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:16:14.456550  543705 disk_worker.go:494] system disk:vda1
I0320 23:16:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:16:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:16:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:16:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:16:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:16:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:16:23.410639  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:16:23.410655  543705 memory.go:184] no items to output this cycle
I0320 23:16:23.410663  543705 cpu.go:275] no items to output this cycle
I0320 23:16:26.604277  543705 disk_info.go:125] begin check local disk info of client
I0320 23:16:26.606776  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:16:26.606783  543705 disk_info.go:196] parse disk info done, disk is : [0xc000566440 0xc000566480]
E0320 23:16:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:16:33.409804  543705 memory.go:184] no items to output this cycle
I0320 23:16:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 23:16:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:16:43.409793  543705 memory.go:191] Add success.
I0320 23:16:43.409814  543705 cpu.go:282] Add success.
I0320 23:16:43.419968  543705 net.go:648] Add success.
I0320 23:16:43.422859  543705 net.go:770] primary dev: ETH0
I0320 23:16:43.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:16:43.422884  543705 net.go:698] Add success.
I0320 23:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:16:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:16:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:16:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:16:53.409777  543705 memory.go:184] no items to output this cycle
I0320 23:16:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 23:17:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:17:03.409786  543705 memory.go:184] no items to output this cycle
I0320 23:17:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 23:17:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:17:13.409819  543705 memory.go:191] Add success.
I0320 23:17:13.409822  543705 cpu.go:282] Add success.
W0320 23:17:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:17:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:17:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:17:13.420147  543705 net.go:648] Add success.
I0320 23:17:13.423160  543705 net.go:770] primary dev: ETH0
I0320 23:17:13.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:17:13.423184  543705 net.go:698] Add success.
I0320 23:17:13.453752  543705 event_worker.go:152] Polling the log file for events...
W0320 23:17:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:17:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 23:17:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:17:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:17:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:17:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:17:14.456638  543705 disk_worker.go:494] system disk:vda1
I0320 23:17:14.456682  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:17:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:17:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:17:16.458037  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:17:16.458047  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:17:16.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:17:16.458114  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:17:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:17:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:17:23.409795  543705 memory.go:184] no items to output this cycle
I0320 23:17:23.409809  543705 cpu.go:275] no items to output this cycle
I0320 23:17:26.606857  543705 disk_info.go:125] begin check local disk info of client
I0320 23:17:26.609637  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:17:26.609655  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7400 0xc0004a7440]
E0320 23:17:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:17:33.409775  543705 memory.go:184] no items to output this cycle
I0320 23:17:33.409790  543705 cpu.go:275] no items to output this cycle
E0320 23:17:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:17:43.409791  543705 memory.go:191] Add success.
I0320 23:17:43.409821  543705 cpu.go:282] Add success.
I0320 23:17:43.419907  543705 net.go:648] Add success.
I0320 23:17:43.422586  543705 net.go:770] primary dev: ETH0
I0320 23:17:43.422600  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:17:43.422614  543705 net.go:698] Add success.
I0320 23:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:17:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:17:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:17:53.409770  543705 memory.go:184] no items to output this cycle
I0320 23:17:53.409782  543705 cpu.go:275] no items to output this cycle
E0320 23:18:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:18:03.409798  543705 memory.go:184] no items to output this cycle
I0320 23:18:03.409811  543705 cpu.go:275] no items to output this cycle
E0320 23:18:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:18:13.409801  543705 memory.go:191] Add success.
W0320 23:18:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:18:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:18:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:18:13.409888  543705 cpu.go:282] Add success.
I0320 23:18:13.420526  543705 net.go:648] Add success.
I0320 23:18:13.423109  543705 net.go:770] primary dev: ETH0
I0320 23:18:13.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:18:13.423148  543705 net.go:698] Add success.
I0320 23:18:13.468437  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"823884a6-95a6-4de4-b954-5655f712c0d7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:18:13.468473  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:18:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:18:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:18:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 23:18:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:18:14.456583  543705 disk_worker.go:494] system disk:vda1
I0320 23:18:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:18:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:18:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:18:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:18:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:18:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:18:23.409799  543705 memory.go:184] no items to output this cycle
I0320 23:18:23.409810  543705 cpu.go:275] no items to output this cycle
I0320 23:18:26.610731  543705 disk_info.go:125] begin check local disk info of client
I0320 23:18:26.613199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:18:26.613205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fee40 0xc0003fee80]
E0320 23:18:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:18:33.409781  543705 memory.go:184] no items to output this cycle
I0320 23:18:33.409786  543705 cpu.go:275] no items to output this cycle
I0320 23:18:38.656982  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:18:38.656990  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:18:43.410664  543705 memory.go:191] Add success.
I0320 23:18:43.409814  543705 cpu.go:282] Add success.
I0320 23:18:43.420484  543705 net.go:648] Add success.
I0320 23:18:43.422994  543705 net.go:770] primary dev: ETH0
I0320 23:18:43.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:18:43.423024  543705 net.go:698] Add success.
I0320 23:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:18:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:18:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:18:53.409787  543705 memory.go:184] no items to output this cycle
I0320 23:18:53.409789  543705 cpu.go:275] no items to output this cycle
E0320 23:19:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:19:03.409797  543705 memory.go:184] no items to output this cycle
I0320 23:19:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:19:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:19:13.409786  543705 memory.go:191] Add success.
I0320 23:19:13.409808  543705 cpu.go:282] Add success.
W0320 23:19:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:19:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:19:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:19:13.420064  543705 net.go:648] Add success.
I0320 23:19:13.423372  543705 net.go:770] primary dev: ETH0
I0320 23:19:13.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:19:13.423395  543705 net.go:698] Add success.
I0320 23:19:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:19:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:19:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0320 23:19:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:19:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 23:19:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:19:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:19:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:19:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:19:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:19:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:19:23.410362  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:19:23.410378  543705 memory.go:184] no items to output this cycle
I0320 23:19:23.410394  543705 cpu.go:275] no items to output this cycle
I0320 23:19:26.613300  543705 disk_info.go:125] begin check local disk info of client
I0320 23:19:26.615818  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:19:26.615825  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be480 0xc0003be4c0]
E0320 23:19:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:19:33.409910  543705 memory.go:184] no items to output this cycle
I0320 23:19:33.409910  543705 cpu.go:275] no items to output this cycle
E0320 23:19:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:19:43.409824  543705 memory.go:191] Add success.
I0320 23:19:43.409833  543705 cpu.go:282] Add success.
I0320 23:19:43.420010  543705 net.go:648] Add success.
I0320 23:19:43.422597  543705 net.go:770] primary dev: ETH0
I0320 23:19:43.422610  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:19:43.422623  543705 net.go:698] Add success.
I0320 23:19:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:19:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:19:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:19:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:19:53.409773  543705 memory.go:184] no items to output this cycle
I0320 23:19:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 23:20:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:20:03.409784  543705 memory.go:184] no items to output this cycle
I0320 23:20:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 23:20:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:20:13.409791  543705 memory.go:191] Add success.
I0320 23:20:13.409795  543705 cpu.go:282] Add success.
W0320 23:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:20:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:20:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:20:13.420080  543705 net.go:648] Add success.
I0320 23:20:13.422714  543705 net.go:770] primary dev: ETH0
I0320 23:20:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:20:13.422739  543705 net.go:698] Add success.
I0320 23:20:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:20:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:20:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 23:20:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:20:14.456568  543705 disk_worker.go:494] system disk:vda1
I0320 23:20:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:20:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:20:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:20:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:20:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:20:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:20:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:20:23.409782  543705 memory.go:184] no items to output this cycle
I0320 23:20:23.409784  543705 cpu.go:275] no items to output this cycle
I0320 23:20:26.616256  543705 disk_info.go:125] begin check local disk info of client
I0320 23:20:26.618765  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:20:26.618772  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b9c0 0xc00047ba00]
E0320 23:20:33.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:20:33.410032  543705 memory.go:184] no items to output this cycle
I0320 23:20:33.409892  543705 cpu.go:275] no items to output this cycle
E0320 23:20:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:20:43.409798  543705 memory.go:191] Add success.
I0320 23:20:43.409809  543705 cpu.go:282] Add success.
I0320 23:20:43.419975  543705 net.go:648] Add success.
I0320 23:20:43.422468  543705 net.go:770] primary dev: ETH0
I0320 23:20:43.422483  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:20:43.422498  543705 net.go:698] Add success.
I0320 23:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:20:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:20:53.410372  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:20:53.410388  543705 memory.go:184] no items to output this cycle
I0320 23:20:53.410428  543705 cpu.go:275] no items to output this cycle
E0320 23:21:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:21:03.409782  543705 memory.go:184] no items to output this cycle
I0320 23:21:03.409786  543705 cpu.go:275] no items to output this cycle
E0320 23:21:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:21:13.409827  543705 memory.go:191] Add success.
I0320 23:21:13.409839  543705 cpu.go:282] Add success.
W0320 23:21:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:21:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:21:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:21:13.420164  543705 net.go:648] Add success.
I0320 23:21:13.422916  543705 net.go:770] primary dev: ETH0
I0320 23:21:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:21:13.422941  543705 net.go:698] Add success.
I0320 23:21:13.629296  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49bbb84d-637f-40de-9e8c-71cb36bc1c57","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:21:13.629330  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:21:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:21:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:21:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0320 23:21:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:21:14.456604  543705 disk_worker.go:494] system disk:vda1
I0320 23:21:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:21:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:21:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:21:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:21:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:21:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:21:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:21:23.409779  543705 memory.go:184] no items to output this cycle
I0320 23:21:23.409796  543705 cpu.go:275] no items to output this cycle
I0320 23:21:26.619368  543705 disk_info.go:125] begin check local disk info of client
I0320 23:21:26.621877  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:21:26.621883  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa580 0xc0002aa5c0]
E0320 23:21:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:21:33.409893  543705 memory.go:184] no items to output this cycle
I0320 23:21:33.409896  543705 cpu.go:275] no items to output this cycle
I0320 23:21:38.657734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:21:38.657742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:21:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:21:43.410577  543705 memory.go:191] Add success.
I0320 23:21:43.409797  543705 cpu.go:282] Add success.
I0320 23:21:43.420299  543705 net.go:648] Add success.
I0320 23:21:43.422921  543705 net.go:770] primary dev: ETH0
I0320 23:21:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:21:43.422947  543705 net.go:698] Add success.
I0320 23:21:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:21:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:21:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:21:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:21:53.409773  543705 memory.go:184] no items to output this cycle
I0320 23:21:53.409790  543705 cpu.go:275] no items to output this cycle
E0320 23:22:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:22:03.409797  543705 memory.go:184] no items to output this cycle
I0320 23:22:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 23:22:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:22:13.409794  543705 memory.go:191] Add success.
W0320 23:22:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0320 23:22:13.409824  543705 cpu.go:282] Add success.
W0320 23:22:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:22:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:22:13.420193  543705 net.go:648] Add success.
I0320 23:22:13.422825  543705 net.go:770] primary dev: ETH0
I0320 23:22:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:22:13.422855  543705 net.go:698] Add success.
W0320 23:22:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:22:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0320 23:22:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:22:14.456881  543705 disk_worker.go:494] system disk:vda1
I0320 23:22:14.456923  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:22:14.457345  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:22:14.457354  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:22:14.457359  543705 custom_config.go:64] query custom config with name: gpu
E0320 23:22:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:22:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:22:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:22:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:22:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:22:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:22:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:22:23.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:22:23.410389  543705 memory.go:184] no items to output this cycle
I0320 23:22:23.410400  543705 cpu.go:275] no items to output this cycle
I0320 23:22:26.622728  543705 disk_info.go:125] begin check local disk info of client
I0320 23:22:26.625160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:22:26.625167  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e280 0xc00049e2c0]
E0320 23:22:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:22:33.409803  543705 memory.go:184] no items to output this cycle
I0320 23:22:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 23:22:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:22:43.409822  543705 memory.go:191] Add success.
I0320 23:22:43.409825  543705 cpu.go:282] Add success.
I0320 23:22:43.419964  543705 net.go:648] Add success.
I0320 23:22:43.422540  543705 net.go:770] primary dev: ETH0
I0320 23:22:43.422554  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:22:43.422568  543705 net.go:698] Add success.
I0320 23:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:22:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:22:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:22:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:22:53.409778  543705 cpu.go:275] no items to output this cycle
I0320 23:22:53.409779  543705 memory.go:184] no items to output this cycle
E0320 23:23:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:23:03.409794  543705 memory.go:184] no items to output this cycle
I0320 23:23:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:23:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:23:13.409826  543705 memory.go:191] Add success.
I0320 23:23:13.409830  543705 cpu.go:282] Add success.
W0320 23:23:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:23:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:23:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:23:13.420145  543705 net.go:648] Add success.
I0320 23:23:13.422818  543705 net.go:770] primary dev: ETH0
I0320 23:23:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:23:13.422847  543705 net.go:698] Add success.
I0320 23:23:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:23:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:23:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0320 23:23:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:23:14.456515  543705 disk_worker.go:494] system disk:vda1
I0320 23:23:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:23:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:23:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:23:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:23:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:23:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:23:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:23:23.409768  543705 memory.go:184] no items to output this cycle
I0320 23:23:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 23:23:26.625356  543705 disk_info.go:125] begin check local disk info of client
I0320 23:23:26.627860  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:23:26.627867  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037c340 0xc00037c380]
E0320 23:23:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:23:33.409800  543705 memory.go:184] no items to output this cycle
I0320 23:23:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 23:23:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:23:43.409790  543705 memory.go:191] Add success.
I0320 23:23:43.409814  543705 cpu.go:282] Add success.
I0320 23:23:43.420110  543705 net.go:648] Add success.
I0320 23:23:43.422973  543705 net.go:770] primary dev: ETH0
I0320 23:23:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:23:43.422997  543705 net.go:698] Add success.
I0320 23:23:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:23:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:23:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:23:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:23:53.409777  543705 memory.go:184] no items to output this cycle
I0320 23:23:53.409781  543705 cpu.go:275] no items to output this cycle
E0320 23:24:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:24:03.409782  543705 cpu.go:275] no items to output this cycle
I0320 23:24:03.409792  543705 memory.go:184] no items to output this cycle
E0320 23:24:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:24:13.409789  543705 memory.go:191] Add success.
I0320 23:24:13.409813  543705 cpu.go:282] Add success.
W0320 23:24:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:24:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:24:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:24:13.420103  543705 net.go:648] Add success.
I0320 23:24:13.422788  543705 net.go:770] primary dev: ETH0
I0320 23:24:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:24:13.422814  543705 net.go:698] Add success.
I0320 23:24:13.469336  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c76ca5e4-6f0e-404b-870e-518c81e73d9b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:24:13.469369  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:24:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:24:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:24:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0320 23:24:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:24:14.456597  543705 disk_worker.go:494] system disk:vda1
I0320 23:24:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:24:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:24:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:24:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:24:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:24:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:24:23.409776  543705 cpu.go:275] no items to output this cycle
I0320 23:24:23.409776  543705 memory.go:184] no items to output this cycle
I0320 23:24:26.628286  543705 disk_info.go:125] begin check local disk info of client
I0320 23:24:26.630772  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:24:26.630778  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab8c0 0xc0001ab900]
E0320 23:24:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:24:33.409780  543705 memory.go:184] no items to output this cycle
I0320 23:24:33.409783  543705 cpu.go:275] no items to output this cycle
I0320 23:24:38.659003  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:24:38.659010  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:24:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:24:43.410540  543705 memory.go:191] Add success.
I0320 23:24:43.409806  543705 cpu.go:282] Add success.
I0320 23:24:43.420291  543705 net.go:648] Add success.
I0320 23:24:43.422808  543705 net.go:770] primary dev: ETH0
I0320 23:24:43.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:24:43.422836  543705 net.go:698] Add success.
I0320 23:24:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:24:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:24:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:24:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:24:53.409793  543705 memory.go:184] no items to output this cycle
I0320 23:24:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 23:25:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:25:03.409779  543705 memory.go:184] no items to output this cycle
I0320 23:25:03.409780  543705 cpu.go:275] no items to output this cycle
W0320 23:25:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:25:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:25:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0320 23:25:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:25:13.409827  543705 memory.go:191] Add success.
I0320 23:25:13.409826  543705 cpu.go:282] Add success.
I0320 23:25:13.420073  543705 net.go:648] Add success.
I0320 23:25:13.423271  543705 net.go:770] primary dev: ETH0
I0320 23:25:13.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:25:13.423297  543705 net.go:698] Add success.
I0320 23:25:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:25:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:25:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0320 23:25:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:25:14.456485  543705 disk_worker.go:494] system disk:vda1
I0320 23:25:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:25:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:25:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:25:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:25:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:25:23.410624  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:25:23.410639  543705 memory.go:184] no items to output this cycle
I0320 23:25:23.410642  543705 cpu.go:275] no items to output this cycle
I0320 23:25:26.631382  543705 disk_info.go:125] begin check local disk info of client
I0320 23:25:26.633903  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:25:26.633909  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bae00 0xc0002bae40]
E0320 23:25:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:25:33.409895  543705 memory.go:184] no items to output this cycle
I0320 23:25:33.409931  543705 cpu.go:275] no items to output this cycle
E0320 23:25:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:25:43.409792  543705 memory.go:191] Add success.
I0320 23:25:43.409792  543705 cpu.go:282] Add success.
I0320 23:25:43.419926  543705 net.go:648] Add success.
I0320 23:25:43.422730  543705 net.go:770] primary dev: ETH0
I0320 23:25:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:25:43.422759  543705 net.go:698] Add success.
I0320 23:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:25:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:25:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:25:53.409782  543705 memory.go:184] no items to output this cycle
I0320 23:25:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 23:26:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:26:03.409788  543705 memory.go:184] no items to output this cycle
I0320 23:26:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 23:26:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:26:13.409835  543705 memory.go:191] Add success.
I0320 23:26:13.409836  543705 cpu.go:282] Add success.
W0320 23:26:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:26:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:26:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:26:13.420174  543705 net.go:648] Add success.
I0320 23:26:13.422876  543705 net.go:770] primary dev: ETH0
I0320 23:26:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:26:13.422906  543705 net.go:698] Add success.
I0320 23:26:14.453932  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:26:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:26:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0320 23:26:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:26:14.456577  543705 disk_worker.go:494] system disk:vda1
I0320 23:26:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:26:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:26:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:26:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:26:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:26:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:26:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:26:23.409797  543705 memory.go:184] no items to output this cycle
I0320 23:26:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 23:26:26.634730  543705 disk_info.go:125] begin check local disk info of client
I0320 23:26:26.637200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:26:26.637213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521f40 0xc000540000]
E0320 23:26:33.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:26:33.409918  543705 memory.go:184] no items to output this cycle
I0320 23:26:33.410111  543705 cpu.go:275] no items to output this cycle
E0320 23:26:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:26:43.409801  543705 memory.go:191] Add success.
I0320 23:26:43.409827  543705 cpu.go:282] Add success.
I0320 23:26:43.420025  543705 net.go:648] Add success.
I0320 23:26:43.422558  543705 net.go:770] primary dev: ETH0
I0320 23:26:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:26:43.422584  543705 net.go:698] Add success.
I0320 23:26:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:26:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:26:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:26:53.409779  543705 memory.go:184] no items to output this cycle
I0320 23:26:53.409814  543705 cpu.go:275] no items to output this cycle
E0320 23:27:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:27:03.409774  543705 memory.go:184] no items to output this cycle
I0320 23:27:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:27:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:27:13.409793  543705 memory.go:191] Add success.
I0320 23:27:13.409821  543705 cpu.go:282] Add success.
W0320 23:27:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:27:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:27:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:27:13.420113  543705 net.go:648] Add success.
I0320 23:27:13.428927  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 23:27:13.429006  543705 net.go:770] primary dev: ETH0
I0320 23:27:13.429020  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:27:13.429035  543705 net.go:698] Add success.
I0320 23:27:13.453625  543705 event_worker.go:152] Polling the log file for events...
I0320 23:27:13.469040  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c729e02-6a4f-45e8-8126-172fd0fcfd2d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:27:13.469071  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 23:27:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:27:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0320 23:27:14.455258  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:27:14.455860  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:27:14.455869  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:27:14.455874  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:27:14.456777  543705 disk_worker.go:494] system disk:vda1
I0320 23:27:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:27:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:27:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:27:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:27:16.457999  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:27:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:27:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:27:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:27:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:27:23.409784  543705 memory.go:184] no items to output this cycle
I0320 23:27:23.409785  543705 cpu.go:275] no items to output this cycle
I0320 23:27:26.637293  543705 disk_info.go:125] begin check local disk info of client
I0320 23:27:26.639804  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:27:26.639811  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004927c0 0xc000492800]
E0320 23:27:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:27:33.409802  543705 memory.go:184] no items to output this cycle
I0320 23:27:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 23:27:38.659152  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:27:38.659159  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:27:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:27:43.410667  543705 memory.go:191] Add success.
I0320 23:27:43.409818  543705 cpu.go:282] Add success.
I0320 23:27:43.420461  543705 net.go:648] Add success.
I0320 23:27:43.423485  543705 net.go:770] primary dev: ETH0
I0320 23:27:43.423500  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:27:43.423515  543705 net.go:698] Add success.
I0320 23:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:27:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:27:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:27:53.409797  543705 memory.go:184] no items to output this cycle
I0320 23:27:53.409807  543705 cpu.go:275] no items to output this cycle
E0320 23:28:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:28:03.409781  543705 memory.go:184] no items to output this cycle
I0320 23:28:03.409789  543705 cpu.go:275] no items to output this cycle
E0320 23:28:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:28:13.409795  543705 memory.go:191] Add success.
I0320 23:28:13.409798  543705 cpu.go:282] Add success.
W0320 23:28:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:28:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:28:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:28:13.420215  543705 net.go:648] Add success.
I0320 23:28:13.423375  543705 net.go:770] primary dev: ETH0
I0320 23:28:13.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:28:13.423402  543705 net.go:698] Add success.
I0320 23:28:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:28:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:28:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0320 23:28:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:28:14.456551  543705 disk_worker.go:494] system disk:vda1
I0320 23:28:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:28:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:28:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:28:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:28:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:28:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:28:23.409780  543705 memory.go:184] no items to output this cycle
I0320 23:28:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 23:28:26.640351  543705 disk_info.go:125] begin check local disk info of client
I0320 23:28:26.642815  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:28:26.642822  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056c000 0xc00056c040]
E0320 23:28:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:28:33.409824  543705 memory.go:184] no items to output this cycle
I0320 23:28:33.409847  543705 cpu.go:275] no items to output this cycle
E0320 23:28:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:28:43.409791  543705 memory.go:191] Add success.
I0320 23:28:43.409818  543705 cpu.go:282] Add success.
I0320 23:28:43.419899  543705 net.go:648] Add success.
I0320 23:28:43.422447  543705 net.go:770] primary dev: ETH0
I0320 23:28:43.422461  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:28:43.422473  543705 net.go:698] Add success.
I0320 23:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:28:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:28:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:28:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:28:53.409784  543705 memory.go:184] no items to output this cycle
I0320 23:28:53.409788  543705 cpu.go:275] no items to output this cycle
E0320 23:29:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:29:03.409774  543705 memory.go:184] no items to output this cycle
I0320 23:29:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 23:29:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:29:13.409800  543705 memory.go:191] Add success.
I0320 23:29:13.409805  543705 cpu.go:282] Add success.
W0320 23:29:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:29:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:29:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:29:13.420069  543705 net.go:648] Add success.
I0320 23:29:13.422861  543705 net.go:770] primary dev: ETH0
I0320 23:29:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:29:13.422892  543705 net.go:698] Add success.
I0320 23:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:29:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:29:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0320 23:29:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:29:14.456584  543705 disk_worker.go:494] system disk:vda1
I0320 23:29:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:29:16.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:29:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:29:16.458120  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:29:16.472505  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:29:23.410298  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:29:23.410316  543705 memory.go:184] no items to output this cycle
I0320 23:29:23.410360  543705 cpu.go:275] no items to output this cycle
I0320 23:29:26.643432  543705 disk_info.go:125] begin check local disk info of client
I0320 23:29:26.645961  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:29:26.645969  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0320 23:29:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:29:33.409786  543705 memory.go:184] no items to output this cycle
I0320 23:29:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 23:29:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:29:43.409787  543705 memory.go:191] Add success.
I0320 23:29:43.409789  543705 cpu.go:282] Add success.
I0320 23:29:43.419968  543705 net.go:648] Add success.
I0320 23:29:43.422694  543705 net.go:770] primary dev: ETH0
I0320 23:29:43.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:29:43.422720  543705 net.go:698] Add success.
I0320 23:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:29:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:29:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:29:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:29:53.409803  543705 memory.go:184] no items to output this cycle
I0320 23:29:53.409815  543705 cpu.go:275] no items to output this cycle
E0320 23:30:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:30:03.409800  543705 memory.go:184] no items to output this cycle
I0320 23:30:03.409818  543705 cpu.go:275] no items to output this cycle
E0320 23:30:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:30:13.409790  543705 memory.go:191] Add success.
I0320 23:30:13.409810  543705 cpu.go:282] Add success.
W0320 23:30:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:30:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:30:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:30:13.420212  543705 net.go:648] Add success.
I0320 23:30:13.422673  543705 net.go:770] primary dev: ETH0
I0320 23:30:13.422686  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:30:13.422698  543705 net.go:698] Add success.
I0320 23:30:13.595763  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ad5b3b1b-c348-48dc-a8c5-cc0355a25c5a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:30:13.595798  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:30:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:30:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:30:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0320 23:30:14.455242  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:30:14.456804  543705 disk_worker.go:494] system disk:vda1
I0320 23:30:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:30:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:30:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:30:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:30:23.409912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:30:23.409933  543705 memory.go:184] no items to output this cycle
I0320 23:30:23.409964  543705 cpu.go:275] no items to output this cycle
I0320 23:30:26.646732  543705 disk_info.go:125] begin check local disk info of client
I0320 23:30:26.649176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:30:26.649192  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273980 0xc0002739c0]
E0320 23:30:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:30:33.409805  543705 memory.go:184] no items to output this cycle
I0320 23:30:33.409826  543705 cpu.go:275] no items to output this cycle
I0320 23:30:38.660002  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:30:38.660009  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:30:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:30:43.410663  543705 memory.go:191] Add success.
I0320 23:30:43.409838  543705 cpu.go:282] Add success.
I0320 23:30:43.420386  543705 net.go:648] Add success.
I0320 23:30:43.423166  543705 net.go:770] primary dev: ETH0
I0320 23:30:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:30:43.423193  543705 net.go:698] Add success.
I0320 23:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:30:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:30:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:30:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:30:53.409765  543705 memory.go:184] no items to output this cycle
I0320 23:30:53.409794  543705 cpu.go:275] no items to output this cycle
E0320 23:31:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:31:03.409800  543705 memory.go:184] no items to output this cycle
I0320 23:31:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:31:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:31:13.409782  543705 memory.go:191] Add success.
I0320 23:31:13.409806  543705 cpu.go:282] Add success.
W0320 23:31:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:31:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:31:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:31:13.420178  543705 net.go:648] Add success.
I0320 23:31:13.422883  543705 net.go:770] primary dev: ETH0
I0320 23:31:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:31:13.422907  543705 net.go:698] Add success.
I0320 23:31:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:31:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:31:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 23:31:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:31:14.456611  543705 disk_worker.go:494] system disk:vda1
I0320 23:31:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:31:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:31:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:31:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:31:23.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:31:23.409878  543705 memory.go:184] no items to output this cycle
I0320 23:31:23.409978  543705 cpu.go:275] no items to output this cycle
I0320 23:31:26.649492  543705 disk_info.go:125] begin check local disk info of client
I0320 23:31:26.651968  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:31:26.651975  543705 disk_info.go:196] parse disk info done, disk is : [0xc000286000 0xc000286040]
E0320 23:31:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:31:33.409797  543705 memory.go:184] no items to output this cycle
I0320 23:31:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 23:31:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:31:43.409792  543705 memory.go:191] Add success.
I0320 23:31:43.409807  543705 cpu.go:282] Add success.
I0320 23:31:43.420077  543705 net.go:648] Add success.
I0320 23:31:43.422491  543705 net.go:770] primary dev: ETH0
I0320 23:31:43.422504  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:31:43.422517  543705 net.go:698] Add success.
I0320 23:31:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:31:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:31:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:31:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:31:53.409764  543705 memory.go:184] no items to output this cycle
I0320 23:31:53.409785  543705 cpu.go:275] no items to output this cycle
E0320 23:32:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:32:03.409775  543705 memory.go:184] no items to output this cycle
I0320 23:32:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 23:32:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:32:13.409796  543705 memory.go:191] Add success.
I0320 23:32:13.409797  543705 cpu.go:282] Add success.
W0320 23:32:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:32:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:32:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:32:13.420167  543705 net.go:648] Add success.
I0320 23:32:13.423190  543705 net.go:770] primary dev: ETH0
I0320 23:32:13.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:32:13.423217  543705 net.go:698] Add success.
W0320 23:32:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:32:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0320 23:32:14.455220  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:32:14.457153  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0320 23:32:14.457163  543705 disk_worker.go:494] system disk:vda1
E0320 23:32:14.457164  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:32:14.457171  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:32:14.457197  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:32:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:32:15.456855  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:32:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:32:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:32:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:32:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:32:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:32:23.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:32:23.409894  543705 cpu.go:275] no items to output this cycle
I0320 23:32:23.409896  543705 memory.go:184] no items to output this cycle
I0320 23:32:26.652465  543705 disk_info.go:125] begin check local disk info of client
I0320 23:32:26.654964  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:32:26.654979  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394340 0xc000394380]
E0320 23:32:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:32:33.409796  543705 memory.go:184] no items to output this cycle
I0320 23:32:33.409814  543705 cpu.go:275] no items to output this cycle
E0320 23:32:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:32:43.409782  543705 memory.go:191] Add success.
I0320 23:32:43.409803  543705 cpu.go:282] Add success.
I0320 23:32:43.419884  543705 net.go:648] Add success.
I0320 23:32:43.422458  543705 net.go:770] primary dev: ETH0
I0320 23:32:43.422473  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:32:43.422488  543705 net.go:698] Add success.
I0320 23:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:32:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:32:53.409766  543705 memory.go:184] no items to output this cycle
I0320 23:32:53.409800  543705 cpu.go:275] no items to output this cycle
E0320 23:33:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:33:03.409772  543705 memory.go:184] no items to output this cycle
I0320 23:33:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 23:33:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:33:13.409786  543705 memory.go:191] Add success.
I0320 23:33:13.409812  543705 cpu.go:282] Add success.
W0320 23:33:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:33:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:33:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:33:13.420146  543705 net.go:648] Add success.
I0320 23:33:13.422955  543705 net.go:770] primary dev: ETH0
I0320 23:33:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:33:13.422993  543705 net.go:698] Add success.
I0320 23:33:13.463883  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"874b215e-630e-4832-ab5e-f5c89cc64cdd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:33:13.463918  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:33:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:33:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:33:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 23:33:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:33:14.456508  543705 disk_worker.go:494] system disk:vda1
I0320 23:33:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:33:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:33:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:33:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:33:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:33:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:33:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:33:23.409810  543705 memory.go:184] no items to output this cycle
I0320 23:33:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 23:33:26.655528  543705 disk_info.go:125] begin check local disk info of client
I0320 23:33:26.658080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:33:26.658087  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e000 0xc00028e040]
E0320 23:33:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:33:33.409797  543705 memory.go:184] no items to output this cycle
I0320 23:33:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 23:33:38.660148  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:33:38.660155  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:33:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:33:43.410641  543705 memory.go:191] Add success.
I0320 23:33:43.409824  543705 cpu.go:282] Add success.
I0320 23:33:43.420409  543705 net.go:648] Add success.
I0320 23:33:43.422873  543705 net.go:770] primary dev: ETH0
I0320 23:33:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:33:43.422898  543705 net.go:698] Add success.
I0320 23:33:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:33:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:33:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:33:53.409802  543705 memory.go:184] no items to output this cycle
I0320 23:33:53.409816  543705 cpu.go:275] no items to output this cycle
E0320 23:34:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:34:03.409773  543705 memory.go:184] no items to output this cycle
I0320 23:34:03.409777  543705 cpu.go:275] no items to output this cycle
E0320 23:34:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:34:13.409797  543705 memory.go:191] Add success.
I0320 23:34:13.409803  543705 cpu.go:282] Add success.
W0320 23:34:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:34:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:34:13.420047  543705 net.go:648] Add success.
I0320 23:34:13.422904  543705 net.go:770] primary dev: ETH0
I0320 23:34:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:34:13.422934  543705 net.go:698] Add success.
I0320 23:34:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:34:14.455265  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:34:14.455282  543705 disk_worker.go:708] disk space is not compliant
W0320 23:34:14.455286  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:34:14.457075  543705 disk_worker.go:494] system disk:vda1
I0320 23:34:14.457108  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:34:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:34:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:34:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:34:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:34:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:34:23.409785  543705 memory.go:184] no items to output this cycle
I0320 23:34:23.409801  543705 cpu.go:275] no items to output this cycle
I0320 23:34:26.658736  543705 disk_info.go:125] begin check local disk info of client
I0320 23:34:26.661247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:34:26.661256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003905c0 0xc000390600]
E0320 23:34:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:34:33.409799  543705 memory.go:184] no items to output this cycle
I0320 23:34:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 23:34:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:34:43.409785  543705 memory.go:191] Add success.
I0320 23:34:43.409787  543705 cpu.go:282] Add success.
I0320 23:34:43.419900  543705 net.go:648] Add success.
I0320 23:34:43.422339  543705 net.go:770] primary dev: ETH0
I0320 23:34:43.422354  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:34:43.422369  543705 net.go:698] Add success.
I0320 23:34:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:34:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:34:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:34:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:34:53.409795  543705 memory.go:184] no items to output this cycle
I0320 23:34:53.409803  543705 cpu.go:275] no items to output this cycle
E0320 23:35:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:35:03.409792  543705 memory.go:184] no items to output this cycle
I0320 23:35:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 23:35:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:35:13.409821  543705 memory.go:191] Add success.
I0320 23:35:13.409828  543705 cpu.go:282] Add success.
W0320 23:35:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:35:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:35:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:35:13.420150  543705 net.go:648] Add success.
I0320 23:35:13.422963  543705 net.go:770] primary dev: ETH0
I0320 23:35:13.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:35:13.422996  543705 net.go:698] Add success.
I0320 23:35:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:35:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:35:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0320 23:35:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:35:14.456495  543705 disk_worker.go:494] system disk:vda1
I0320 23:35:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:35:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:35:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:35:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:35:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:35:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:35:23.409809  543705 memory.go:184] no items to output this cycle
I0320 23:35:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 23:35:26.661342  543705 disk_info.go:125] begin check local disk info of client
I0320 23:35:26.663992  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:35:26.663999  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ffc00 0xc0003ffc40]
E0320 23:35:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:35:33.409800  543705 memory.go:184] no items to output this cycle
I0320 23:35:33.409819  543705 cpu.go:275] no items to output this cycle
E0320 23:35:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:35:43.409792  543705 memory.go:191] Add success.
I0320 23:35:43.409792  543705 cpu.go:282] Add success.
I0320 23:35:43.419974  543705 net.go:648] Add success.
I0320 23:35:43.422717  543705 net.go:770] primary dev: ETH0
I0320 23:35:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:35:43.422742  543705 net.go:698] Add success.
I0320 23:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:35:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:35:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:35:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:35:53.409782  543705 cpu.go:275] no items to output this cycle
I0320 23:35:53.409791  543705 memory.go:184] no items to output this cycle
E0320 23:36:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:36:03.409782  543705 memory.go:184] no items to output this cycle
I0320 23:36:03.409783  543705 cpu.go:275] no items to output this cycle
E0320 23:36:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:36:13.409784  543705 memory.go:191] Add success.
I0320 23:36:13.409803  543705 cpu.go:282] Add success.
W0320 23:36:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:36:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:36:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:36:13.420604  543705 net.go:648] Add success.
I0320 23:36:13.423364  543705 net.go:770] primary dev: ETH0
I0320 23:36:13.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:36:13.423390  543705 net.go:698] Add success.
I0320 23:36:13.469294  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f3bf6ab8-b14d-4571-bdbf-d5dcdab31167","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:36:13.469338  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:36:14.453944  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:36:14.455287  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:36:14.455302  543705 disk_worker.go:708] disk space is not compliant
W0320 23:36:14.455306  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:36:14.456915  543705 disk_worker.go:494] system disk:vda1
I0320 23:36:14.456945  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:36:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:36:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:36:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:36:16.458154  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:36:16.472114  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:36:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:36:23.409791  543705 memory.go:184] no items to output this cycle
I0320 23:36:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 23:36:26.664571  543705 disk_info.go:125] begin check local disk info of client
I0320 23:36:26.667143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:36:26.667150  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b500 0xc00007b540]
E0320 23:36:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:36:33.409766  543705 memory.go:184] no items to output this cycle
I0320 23:36:33.409793  543705 cpu.go:275] no items to output this cycle
I0320 23:36:38.660998  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:36:38.661005  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:36:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:36:43.410627  543705 memory.go:191] Add success.
I0320 23:36:43.409813  543705 cpu.go:282] Add success.
I0320 23:36:43.420326  543705 net.go:648] Add success.
I0320 23:36:43.422938  543705 net.go:770] primary dev: ETH0
I0320 23:36:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:36:43.422972  543705 net.go:698] Add success.
I0320 23:36:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:36:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:36:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:36:53.409776  543705 memory.go:184] no items to output this cycle
I0320 23:36:53.409797  543705 cpu.go:275] no items to output this cycle
E0320 23:37:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:37:03.409808  543705 memory.go:184] no items to output this cycle
I0320 23:37:03.409820  543705 cpu.go:275] no items to output this cycle
E0320 23:37:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:37:13.409802  543705 memory.go:191] Add success.
I0320 23:37:13.409802  543705 cpu.go:282] Add success.
W0320 23:37:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:37:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:37:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:37:13.420162  543705 net.go:648] Add success.
I0320 23:37:13.423043  543705 net.go:770] primary dev: ETH0
I0320 23:37:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:37:13.423074  543705 net.go:698] Add success.
I0320 23:37:13.453613  543705 event_worker.go:152] Polling the log file for events...
W0320 23:37:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:37:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0320 23:37:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:37:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:37:14.456951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:37:14.456957  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:37:14.457002  543705 disk_worker.go:494] system disk:vda1
I0320 23:37:14.457055  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:37:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:37:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:37:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:37:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:37:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:37:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:37:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:37:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:37:23.409793  543705 cpu.go:275] no items to output this cycle
I0320 23:37:23.409802  543705 memory.go:184] no items to output this cycle
I0320 23:37:26.667560  543705 disk_info.go:125] begin check local disk info of client
I0320 23:37:26.670066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:37:26.670073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa480 0xc0001fa4c0]
E0320 23:37:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:37:33.409777  543705 memory.go:184] no items to output this cycle
I0320 23:37:33.409794  543705 cpu.go:275] no items to output this cycle
E0320 23:37:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:37:43.409791  543705 memory.go:191] Add success.
I0320 23:37:43.409814  543705 cpu.go:282] Add success.
I0320 23:37:43.419850  543705 net.go:648] Add success.
I0320 23:37:43.422570  543705 net.go:770] primary dev: ETH0
I0320 23:37:43.422583  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:37:43.422595  543705 net.go:698] Add success.
I0320 23:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:37:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:37:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:37:53.409775  543705 memory.go:184] no items to output this cycle
I0320 23:37:53.409774  543705 cpu.go:275] no items to output this cycle
E0320 23:38:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:38:03.409797  543705 memory.go:184] no items to output this cycle
I0320 23:38:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:38:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:38:13.409835  543705 memory.go:191] Add success.
I0320 23:38:13.409844  543705 cpu.go:282] Add success.
W0320 23:38:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:38:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:38:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:38:13.420244  543705 net.go:648] Add success.
I0320 23:38:13.423067  543705 net.go:770] primary dev: ETH0
I0320 23:38:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:38:13.423098  543705 net.go:698] Add success.
I0320 23:38:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:38:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:38:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0320 23:38:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:38:14.456866  543705 disk_worker.go:494] system disk:vda1
I0320 23:38:14.456912  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:38:15.456029  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:38:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:38:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:38:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:38:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:38:23.409782  543705 memory.go:184] no items to output this cycle
I0320 23:38:23.409803  543705 cpu.go:275] no items to output this cycle
I0320 23:38:26.670728  543705 disk_info.go:125] begin check local disk info of client
I0320 23:38:26.673542  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:38:26.673549  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f340 0xc00035f380]
E0320 23:38:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:38:33.409761  543705 memory.go:184] no items to output this cycle
I0320 23:38:33.409799  543705 cpu.go:275] no items to output this cycle
E0320 23:38:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:38:43.409794  543705 memory.go:191] Add success.
I0320 23:38:43.409813  543705 cpu.go:282] Add success.
I0320 23:38:43.419986  543705 net.go:648] Add success.
I0320 23:38:43.422615  543705 net.go:770] primary dev: ETH0
I0320 23:38:43.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:38:43.422651  543705 net.go:698] Add success.
I0320 23:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:38:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:38:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:38:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:38:53.409792  543705 memory.go:184] no items to output this cycle
I0320 23:38:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 23:39:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:39:03.409778  543705 memory.go:184] no items to output this cycle
I0320 23:39:03.409806  543705 cpu.go:275] no items to output this cycle
E0320 23:39:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:39:13.409818  543705 cpu.go:282] Add success.
I0320 23:39:13.409820  543705 memory.go:191] Add success.
W0320 23:39:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:39:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:39:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:39:13.420126  543705 net.go:648] Add success.
I0320 23:39:13.422815  543705 net.go:770] primary dev: ETH0
I0320 23:39:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:39:13.422840  543705 net.go:698] Add success.
I0320 23:39:13.463791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"995feba5-bcb8-43e3-aa87-54e13322ca3f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:39:13.463826  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:39:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:39:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:39:14.455303  543705 disk_worker.go:708] disk space is not compliant
W0320 23:39:14.455306  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:39:14.457381  543705 disk_worker.go:494] system disk:vda1
I0320 23:39:14.457408  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:39:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:39:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:39:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:39:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:39:23.409802  543705 memory.go:184] no items to output this cycle
I0320 23:39:23.409812  543705 cpu.go:275] no items to output this cycle
I0320 23:39:26.673676  543705 disk_info.go:125] begin check local disk info of client
I0320 23:39:26.676245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:39:26.676252  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7cc0 0xc0003e7d00]
E0320 23:39:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:39:33.409812  543705 memory.go:184] no items to output this cycle
I0320 23:39:33.409828  543705 cpu.go:275] no items to output this cycle
I0320 23:39:38.661738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:39:38.661745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:39:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:39:43.410609  543705 memory.go:191] Add success.
I0320 23:39:43.409846  543705 cpu.go:282] Add success.
I0320 23:39:43.420292  543705 net.go:648] Add success.
I0320 23:39:43.422967  543705 net.go:770] primary dev: ETH0
I0320 23:39:43.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:39:43.422994  543705 net.go:698] Add success.
I0320 23:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:39:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:39:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:39:53.409779  543705 memory.go:184] no items to output this cycle
I0320 23:39:53.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:40:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:40:03.409765  543705 memory.go:184] no items to output this cycle
I0320 23:40:03.409804  543705 cpu.go:275] no items to output this cycle
E0320 23:40:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:40:13.409826  543705 memory.go:191] Add success.
I0320 23:40:13.409829  543705 cpu.go:282] Add success.
W0320 23:40:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:40:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:40:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:40:13.420149  543705 net.go:648] Add success.
I0320 23:40:13.423095  543705 net.go:770] primary dev: ETH0
I0320 23:40:13.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:40:13.423121  543705 net.go:698] Add success.
W0320 23:40:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:40:14.455271  543705 disk_worker.go:708] disk space is not compliant
W0320 23:40:14.455275  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:40:14.455641  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:40:14.457548  543705 disk_worker.go:494] system disk:vda1
I0320 23:40:14.457589  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:40:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:40:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:40:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:40:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:40:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:40:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:40:23.409778  543705 memory.go:184] no items to output this cycle
I0320 23:40:23.409795  543705 cpu.go:275] no items to output this cycle
I0320 23:40:26.676334  543705 disk_info.go:125] begin check local disk info of client
I0320 23:40:26.678899  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:40:26.678914  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029e100 0xc00029e140]
E0320 23:40:33.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:40:33.409850  543705 memory.go:184] no items to output this cycle
I0320 23:40:33.409855  543705 cpu.go:275] no items to output this cycle
E0320 23:40:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:40:43.409783  543705 memory.go:191] Add success.
I0320 23:40:43.409787  543705 cpu.go:282] Add success.
I0320 23:40:43.419900  543705 net.go:648] Add success.
I0320 23:40:43.422641  543705 net.go:770] primary dev: ETH0
I0320 23:40:43.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:40:43.422672  543705 net.go:698] Add success.
I0320 23:40:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:40:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:40:53.409792  543705 memory.go:184] no items to output this cycle
I0320 23:40:53.409802  543705 cpu.go:275] no items to output this cycle
E0320 23:41:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:41:03.409764  543705 memory.go:184] no items to output this cycle
I0320 23:41:03.409801  543705 cpu.go:275] no items to output this cycle
E0320 23:41:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:41:13.409827  543705 memory.go:191] Add success.
I0320 23:41:13.409831  543705 cpu.go:282] Add success.
W0320 23:41:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:41:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:41:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:41:13.420248  543705 net.go:648] Add success.
I0320 23:41:13.422973  543705 net.go:770] primary dev: ETH0
I0320 23:41:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:41:13.422998  543705 net.go:698] Add success.
I0320 23:41:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:41:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:41:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0320 23:41:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:41:14.456570  543705 disk_worker.go:494] system disk:vda1
I0320 23:41:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:41:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:41:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:41:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:41:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:41:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:41:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:41:23.409783  543705 memory.go:184] no items to output this cycle
I0320 23:41:23.409787  543705 cpu.go:275] no items to output this cycle
I0320 23:41:26.679590  543705 disk_info.go:125] begin check local disk info of client
I0320 23:41:26.682442  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:41:26.682449  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7480 0xc0003e74c0]
E0320 23:41:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:41:33.409776  543705 memory.go:184] no items to output this cycle
I0320 23:41:33.409781  543705 cpu.go:275] no items to output this cycle
E0320 23:41:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:41:43.409806  543705 memory.go:191] Add success.
I0320 23:41:43.409816  543705 cpu.go:282] Add success.
I0320 23:41:43.419840  543705 net.go:648] Add success.
I0320 23:41:43.422607  543705 net.go:770] primary dev: ETH0
I0320 23:41:43.422620  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:41:43.422633  543705 net.go:698] Add success.
I0320 23:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:41:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:41:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:41:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:41:53.409791  543705 memory.go:184] no items to output this cycle
I0320 23:41:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 23:42:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:42:03.409771  543705 memory.go:184] no items to output this cycle
I0320 23:42:03.409796  543705 cpu.go:275] no items to output this cycle
E0320 23:42:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:42:13.409816  543705 memory.go:191] Add success.
I0320 23:42:13.409826  543705 cpu.go:282] Add success.
W0320 23:42:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:42:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:42:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:42:13.420166  543705 net.go:648] Add success.
I0320 23:42:13.422961  543705 net.go:770] primary dev: ETH0
I0320 23:42:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:42:13.422991  543705 net.go:698] Add success.
I0320 23:42:13.469163  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f1cac995-f20b-490a-beb8-b3e052200a8d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:42:13.469194  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 23:42:14.455225  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:42:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0320 23:42:14.455240  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:42:14.456259  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:42:14.456269  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:42:14.456274  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:42:14.457205  543705 disk_worker.go:494] system disk:vda1
I0320 23:42:14.457230  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:42:15.457026  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:42:15.457048  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:42:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:42:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:42:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:42:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:42:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:42:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:42:23.409802  543705 memory.go:184] no items to output this cycle
I0320 23:42:23.409813  543705 cpu.go:275] no items to output this cycle
I0320 23:42:26.683560  543705 disk_info.go:125] begin check local disk info of client
I0320 23:42:26.686071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:42:26.686077  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492440 0xc000492480]
E0320 23:42:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:42:33.409805  543705 memory.go:184] no items to output this cycle
I0320 23:42:33.409824  543705 cpu.go:275] no items to output this cycle
I0320 23:42:38.663008  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:42:38.663015  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:42:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:42:43.410812  543705 memory.go:191] Add success.
I0320 23:42:43.409817  543705 cpu.go:282] Add success.
I0320 23:42:43.420495  543705 net.go:648] Add success.
I0320 23:42:43.423447  543705 net.go:770] primary dev: ETH0
I0320 23:42:43.423460  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:42:43.423473  543705 net.go:698] Add success.
I0320 23:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:42:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:42:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:42:53.409797  543705 memory.go:184] no items to output this cycle
I0320 23:42:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 23:43:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:43:03.409772  543705 memory.go:184] no items to output this cycle
I0320 23:43:03.409780  543705 cpu.go:275] no items to output this cycle
E0320 23:43:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:43:13.409793  543705 memory.go:191] Add success.
I0320 23:43:13.409793  543705 cpu.go:282] Add success.
W0320 23:43:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:43:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:43:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:43:13.420170  543705 net.go:648] Add success.
I0320 23:43:13.422941  543705 net.go:770] primary dev: ETH0
I0320 23:43:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:43:13.422972  543705 net.go:698] Add success.
I0320 23:43:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:43:14.455394  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:43:14.455413  543705 disk_worker.go:708] disk space is not compliant
W0320 23:43:14.455418  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:43:14.457017  543705 disk_worker.go:494] system disk:vda1
I0320 23:43:14.457049  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:43:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:43:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:43:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:43:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:43:23.409807  543705 memory.go:184] no items to output this cycle
I0320 23:43:23.409817  543705 cpu.go:275] no items to output this cycle
I0320 23:43:26.686734  543705 disk_info.go:125] begin check local disk info of client
I0320 23:43:26.689249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:43:26.689257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a740 0xc00048a780]
E0320 23:43:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:43:33.409776  543705 memory.go:184] no items to output this cycle
I0320 23:43:33.409788  543705 cpu.go:275] no items to output this cycle
E0320 23:43:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:43:43.409795  543705 memory.go:191] Add success.
I0320 23:43:43.409815  543705 cpu.go:282] Add success.
I0320 23:43:43.419955  543705 net.go:648] Add success.
I0320 23:43:43.423226  543705 net.go:770] primary dev: ETH0
I0320 23:43:43.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:43:43.423251  543705 net.go:698] Add success.
I0320 23:43:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:43:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:43:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:43:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:43:53.409776  543705 memory.go:184] no items to output this cycle
I0320 23:43:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 23:44:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:44:03.409800  543705 memory.go:184] no items to output this cycle
I0320 23:44:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 23:44:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:44:13.409792  543705 memory.go:191] Add success.
I0320 23:44:13.409808  543705 cpu.go:282] Add success.
W0320 23:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:44:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:44:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:44:13.420137  543705 net.go:648] Add success.
I0320 23:44:13.423180  543705 net.go:770] primary dev: ETH0
I0320 23:44:13.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:44:13.423209  543705 net.go:698] Add success.
I0320 23:44:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:44:14.455331  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:44:14.455345  543705 disk_worker.go:708] disk space is not compliant
W0320 23:44:14.455349  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:44:14.457064  543705 disk_worker.go:494] system disk:vda1
I0320 23:44:14.457093  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:44:15.456018  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:44:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:44:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:44:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:44:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:44:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:44:23.409777  543705 memory.go:184] no items to output this cycle
I0320 23:44:23.409798  543705 cpu.go:275] no items to output this cycle
I0320 23:44:26.689652  543705 disk_info.go:125] begin check local disk info of client
I0320 23:44:26.692224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:44:26.692237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e69c0 0xc0003e6a00]
E0320 23:44:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:44:33.409775  543705 memory.go:184] no items to output this cycle
I0320 23:44:33.409784  543705 cpu.go:275] no items to output this cycle
E0320 23:44:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:44:43.409807  543705 memory.go:191] Add success.
I0320 23:44:43.409815  543705 cpu.go:282] Add success.
I0320 23:44:43.419882  543705 net.go:648] Add success.
I0320 23:44:43.422515  543705 net.go:770] primary dev: ETH0
I0320 23:44:43.422531  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:44:43.422544  543705 net.go:698] Add success.
I0320 23:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:44:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:44:53.409777  543705 memory.go:184] no items to output this cycle
I0320 23:44:53.409779  543705 cpu.go:275] no items to output this cycle
E0320 23:45:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:45:03.409768  543705 memory.go:184] no items to output this cycle
I0320 23:45:03.409791  543705 cpu.go:275] no items to output this cycle
E0320 23:45:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:45:13.409813  543705 memory.go:191] Add success.
I0320 23:45:13.409824  543705 cpu.go:282] Add success.
W0320 23:45:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:45:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:45:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:45:13.420143  543705 net.go:648] Add success.
I0320 23:45:13.422764  543705 net.go:770] primary dev: ETH0
I0320 23:45:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:45:13.422790  543705 net.go:698] Add success.
I0320 23:45:13.470307  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2e5183aa-ba7d-4a3f-8b83-82f282333681","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:45:13.470343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:45:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:45:14.455287  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:45:14.455375  543705 disk_worker.go:708] disk space is not compliant
W0320 23:45:14.455389  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:45:14.457212  543705 disk_worker.go:494] system disk:vda1
I0320 23:45:14.457243  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:45:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:45:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:45:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:45:16.472465  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:45:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:45:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 23:45:23.409795  543705 memory.go:184] no items to output this cycle
I0320 23:45:26.692724  543705 disk_info.go:125] begin check local disk info of client
I0320 23:45:26.695490  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:45:26.695499  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003318c0 0xc000331900]
E0320 23:45:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:45:33.409770  543705 memory.go:184] no items to output this cycle
I0320 23:45:33.409801  543705 cpu.go:275] no items to output this cycle
I0320 23:45:38.664016  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:45:38.664023  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:45:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:45:43.410731  543705 memory.go:191] Add success.
I0320 23:45:43.409823  543705 cpu.go:282] Add success.
I0320 23:45:43.420459  543705 net.go:648] Add success.
I0320 23:45:43.423274  543705 net.go:770] primary dev: ETH0
I0320 23:45:43.423288  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:45:43.423300  543705 net.go:698] Add success.
I0320 23:45:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:45:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:45:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:45:53.409793  543705 memory.go:184] no items to output this cycle
I0320 23:45:53.409805  543705 cpu.go:275] no items to output this cycle
E0320 23:46:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:46:03.409780  543705 memory.go:184] no items to output this cycle
I0320 23:46:03.409781  543705 cpu.go:275] no items to output this cycle
W0320 23:46:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:46:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:46:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:46:13.409832  543705 cpu.go:282] Add success.
E0320 23:46:13.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:46:13.409856  543705 memory.go:191] Add success.
I0320 23:46:13.420116  543705 net.go:648] Add success.
I0320 23:46:13.422685  543705 net.go:770] primary dev: ETH0
I0320 23:46:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:46:13.422710  543705 net.go:698] Add success.
I0320 23:46:14.453958  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:46:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:46:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0320 23:46:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:46:14.456618  543705 disk_worker.go:494] system disk:vda1
I0320 23:46:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:46:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:46:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:46:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:46:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:46:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:46:23.410348  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:46:23.410364  543705 memory.go:184] no items to output this cycle
I0320 23:46:23.410378  543705 cpu.go:275] no items to output this cycle
I0320 23:46:26.696706  543705 disk_info.go:125] begin check local disk info of client
I0320 23:46:26.699266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:46:26.699273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ab200 0xc0002ab240]
E0320 23:46:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:46:33.409798  543705 memory.go:184] no items to output this cycle
I0320 23:46:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:46:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:46:43.409812  543705 memory.go:191] Add success.
I0320 23:46:43.409821  543705 cpu.go:282] Add success.
I0320 23:46:43.419826  543705 net.go:648] Add success.
I0320 23:46:43.422586  543705 net.go:770] primary dev: ETH0
I0320 23:46:43.422599  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:46:43.422612  543705 net.go:698] Add success.
I0320 23:46:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:46:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:46:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:46:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:46:53.409772  543705 memory.go:184] no items to output this cycle
I0320 23:46:53.409787  543705 cpu.go:275] no items to output this cycle
E0320 23:47:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:47:03.409787  543705 cpu.go:275] no items to output this cycle
I0320 23:47:03.409791  543705 memory.go:184] no items to output this cycle
E0320 23:47:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:47:13.409820  543705 memory.go:191] Add success.
I0320 23:47:13.409826  543705 cpu.go:282] Add success.
W0320 23:47:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:47:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:47:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:47:13.420267  543705 net.go:648] Add success.
I0320 23:47:13.423134  543705 net.go:770] primary dev: ETH0
I0320 23:47:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:47:13.423163  543705 net.go:698] Add success.
I0320 23:47:13.453664  543705 event_worker.go:152] Polling the log file for events...
W0320 23:47:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:47:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0320 23:47:14.455241  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:47:14.456024  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:47:14.456034  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:47:14.456039  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:47:14.456856  543705 disk_worker.go:494] system disk:vda1
I0320 23:47:14.456884  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:47:15.456785  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:47:15.456794  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:47:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:47:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:47:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:47:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:47:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:47:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:47:23.409799  543705 memory.go:184] no items to output this cycle
I0320 23:47:23.409811  543705 cpu.go:275] no items to output this cycle
I0320 23:47:26.699740  543705 disk_info.go:125] begin check local disk info of client
I0320 23:47:26.702241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:47:26.702247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb200 0xc0001fb240]
E0320 23:47:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:47:33.409800  543705 memory.go:184] no items to output this cycle
I0320 23:47:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:47:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:47:43.409816  543705 memory.go:191] Add success.
I0320 23:47:43.409836  543705 cpu.go:282] Add success.
I0320 23:47:43.419930  543705 net.go:648] Add success.
I0320 23:47:43.422636  543705 net.go:770] primary dev: ETH0
I0320 23:47:43.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:47:43.422665  543705 net.go:698] Add success.
I0320 23:47:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:47:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:47:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:47:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:47:53.409768  543705 memory.go:184] no items to output this cycle
I0320 23:47:53.409804  543705 cpu.go:275] no items to output this cycle
E0320 23:48:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:48:03.409764  543705 memory.go:184] no items to output this cycle
I0320 23:48:03.409794  543705 cpu.go:275] no items to output this cycle
E0320 23:48:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:48:13.409820  543705 memory.go:191] Add success.
I0320 23:48:13.409821  543705 cpu.go:282] Add success.
W0320 23:48:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:48:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:48:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:48:13.420128  543705 net.go:648] Add success.
I0320 23:48:13.422801  543705 net.go:770] primary dev: ETH0
I0320 23:48:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:48:13.422828  543705 net.go:698] Add success.
I0320 23:48:13.468732  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"372f63ab-a9b5-45f4-9201-fc626e2129ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:48:13.468767  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:48:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:48:14.455285  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:48:14.455350  543705 disk_worker.go:708] disk space is not compliant
W0320 23:48:14.455355  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:48:14.456906  543705 disk_worker.go:494] system disk:vda1
I0320 23:48:14.456949  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:48:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:48:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:48:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:48:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:48:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:48:23.409817  543705 memory.go:184] no items to output this cycle
I0320 23:48:23.409828  543705 cpu.go:275] no items to output this cycle
I0320 23:48:26.702741  543705 disk_info.go:125] begin check local disk info of client
I0320 23:48:26.705278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:48:26.705284  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e67c0 0xc0003e6800]
E0320 23:48:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:48:33.409797  543705 memory.go:184] no items to output this cycle
I0320 23:48:33.409811  543705 cpu.go:275] no items to output this cycle
I0320 23:48:38.665023  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:48:38.665029  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:48:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:48:43.410621  543705 memory.go:191] Add success.
I0320 23:48:43.409793  543705 cpu.go:282] Add success.
I0320 23:48:43.420341  543705 net.go:648] Add success.
I0320 23:48:43.422849  543705 net.go:770] primary dev: ETH0
I0320 23:48:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:48:43.422879  543705 net.go:698] Add success.
I0320 23:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:48:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:48:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:48:53.410390  543705 memory.go:184] no items to output this cycle
I0320 23:48:53.410400  543705 cpu.go:275] no items to output this cycle
E0320 23:49:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:49:03.409790  543705 memory.go:184] no items to output this cycle
I0320 23:49:03.409814  543705 cpu.go:275] no items to output this cycle
E0320 23:49:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:49:13.409822  543705 memory.go:191] Add success.
I0320 23:49:13.409822  543705 cpu.go:282] Add success.
W0320 23:49:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:49:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:49:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:49:13.420195  543705 net.go:648] Add success.
I0320 23:49:13.423089  543705 net.go:770] primary dev: ETH0
I0320 23:49:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:49:13.423116  543705 net.go:698] Add success.
I0320 23:49:14.454944  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:49:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:49:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0320 23:49:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:49:14.456466  543705 disk_worker.go:494] system disk:vda1
I0320 23:49:14.456508  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:49:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:49:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:49:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:49:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:49:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:49:23.409812  543705 memory.go:184] no items to output this cycle
I0320 23:49:23.409821  543705 cpu.go:275] no items to output this cycle
I0320 23:49:26.705671  543705 disk_info.go:125] begin check local disk info of client
I0320 23:49:26.708161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:49:26.708167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fd440 0xc0001fd480]
E0320 23:49:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:49:33.409800  543705 memory.go:184] no items to output this cycle
I0320 23:49:33.409813  543705 cpu.go:275] no items to output this cycle
E0320 23:49:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:49:43.409782  543705 memory.go:191] Add success.
I0320 23:49:43.409823  543705 cpu.go:282] Add success.
I0320 23:49:43.420239  543705 net.go:648] Add success.
I0320 23:49:43.423015  543705 net.go:770] primary dev: ETH0
I0320 23:49:43.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:49:43.423042  543705 net.go:698] Add success.
I0320 23:49:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:49:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:49:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:49:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:49:53.409766  543705 memory.go:184] no items to output this cycle
I0320 23:49:53.409795  543705 cpu.go:275] no items to output this cycle
E0320 23:50:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:50:03.409793  543705 memory.go:184] no items to output this cycle
I0320 23:50:03.409805  543705 cpu.go:275] no items to output this cycle
E0320 23:50:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:50:13.409796  543705 memory.go:191] Add success.
I0320 23:50:13.409816  543705 cpu.go:282] Add success.
W0320 23:50:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:50:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:50:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:50:13.420174  543705 net.go:648] Add success.
I0320 23:50:13.422823  543705 net.go:770] primary dev: ETH0
I0320 23:50:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:50:13.422848  543705 net.go:698] Add success.
I0320 23:50:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:50:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:50:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0320 23:50:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:50:14.456608  543705 disk_worker.go:494] system disk:vda1
I0320 23:50:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:50:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:50:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:50:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:50:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:50:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:50:23.409769  543705 memory.go:184] no items to output this cycle
I0320 23:50:23.409800  543705 cpu.go:275] no items to output this cycle
I0320 23:50:26.708760  543705 disk_info.go:125] begin check local disk info of client
I0320 23:50:26.711301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:50:26.711317  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6140 0xc0003e6180]
E0320 23:50:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:50:33.409788  543705 memory.go:184] no items to output this cycle
I0320 23:50:33.409804  543705 cpu.go:275] no items to output this cycle
E0320 23:50:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:50:43.409812  543705 memory.go:191] Add success.
I0320 23:50:43.409819  543705 cpu.go:282] Add success.
I0320 23:50:43.420043  543705 net.go:648] Add success.
I0320 23:50:43.422719  543705 net.go:770] primary dev: ETH0
I0320 23:50:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:50:43.422745  543705 net.go:698] Add success.
I0320 23:50:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:50:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:50:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:50:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:50:53.409796  543705 memory.go:184] no items to output this cycle
I0320 23:50:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 23:51:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:51:03.409773  543705 memory.go:184] no items to output this cycle
I0320 23:51:03.409775  543705 cpu.go:275] no items to output this cycle
E0320 23:51:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:51:13.409817  543705 memory.go:191] Add success.
I0320 23:51:13.409828  543705 cpu.go:282] Add success.
W0320 23:51:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:51:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:51:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:51:13.420103  543705 net.go:648] Add success.
I0320 23:51:13.423099  543705 net.go:770] primary dev: ETH0
I0320 23:51:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:51:13.423126  543705 net.go:698] Add success.
I0320 23:51:13.470701  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4d1e14ba-7a0f-4f1c-aa35-2c2139565c2f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:51:13.470744  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:51:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:51:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:51:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0320 23:51:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:51:14.456693  543705 disk_worker.go:494] system disk:vda1
I0320 23:51:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:51:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:51:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:51:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:51:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:51:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:51:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:51:23.409779  543705 memory.go:184] no items to output this cycle
I0320 23:51:23.409808  543705 cpu.go:275] no items to output this cycle
I0320 23:51:26.711806  543705 disk_info.go:125] begin check local disk info of client
I0320 23:51:26.714414  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:51:26.714421  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003853c0 0xc000385400]
E0320 23:51:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:51:33.409768  543705 memory.go:184] no items to output this cycle
I0320 23:51:33.409792  543705 cpu.go:275] no items to output this cycle
I0320 23:51:38.665735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:51:38.665742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:51:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:51:43.410589  543705 memory.go:191] Add success.
I0320 23:51:43.409812  543705 cpu.go:282] Add success.
I0320 23:51:43.420279  543705 net.go:648] Add success.
I0320 23:51:43.423139  543705 net.go:770] primary dev: ETH0
I0320 23:51:43.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:51:43.423165  543705 net.go:698] Add success.
I0320 23:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:51:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:51:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:51:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:51:53.409773  543705 memory.go:184] no items to output this cycle
I0320 23:51:53.409791  543705 cpu.go:275] no items to output this cycle
E0320 23:52:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:52:03.409768  543705 memory.go:184] no items to output this cycle
I0320 23:52:03.409793  543705 cpu.go:275] no items to output this cycle
E0320 23:52:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:52:13.409786  543705 memory.go:191] Add success.
I0320 23:52:13.409803  543705 cpu.go:282] Add success.
W0320 23:52:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:52:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:52:13.420091  543705 net.go:648] Add success.
I0320 23:52:13.422791  543705 net.go:770] primary dev: ETH0
I0320 23:52:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:52:13.422820  543705 net.go:698] Add success.
W0320 23:52:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:52:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0320 23:52:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:52:14.456615  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:52:14.456624  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:52:14.456630  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:52:14.457061  543705 disk_worker.go:494] system disk:vda1
I0320 23:52:14.457090  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:52:15.456979  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:52:15.456994  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:52:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:52:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:52:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:52:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:52:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:52:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:52:23.409805  543705 memory.go:184] no items to output this cycle
I0320 23:52:23.409816  543705 cpu.go:275] no items to output this cycle
I0320 23:52:26.714744  543705 disk_info.go:125] begin check local disk info of client
I0320 23:52:26.717253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:52:26.717259  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464540 0xc000464580]
E0320 23:52:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:52:33.409804  543705 memory.go:184] no items to output this cycle
I0320 23:52:33.409818  543705 cpu.go:275] no items to output this cycle
E0320 23:52:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:52:43.409804  543705 memory.go:191] Add success.
I0320 23:52:43.409811  543705 cpu.go:282] Add success.
I0320 23:52:43.419996  543705 net.go:648] Add success.
I0320 23:52:43.422770  543705 net.go:770] primary dev: ETH0
I0320 23:52:43.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:52:43.422797  543705 net.go:698] Add success.
I0320 23:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:52:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:52:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:52:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:52:53.409773  543705 memory.go:184] no items to output this cycle
I0320 23:52:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 23:53:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:53:03.409800  543705 memory.go:184] no items to output this cycle
I0320 23:53:03.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:53:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:53:13.409827  543705 memory.go:191] Add success.
I0320 23:53:13.409829  543705 cpu.go:282] Add success.
W0320 23:53:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:53:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:53:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:53:13.420170  543705 net.go:648] Add success.
I0320 23:53:13.422952  543705 net.go:770] primary dev: ETH0
I0320 23:53:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:53:13.422978  543705 net.go:698] Add success.
I0320 23:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:53:14.455370  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:53:14.455387  543705 disk_worker.go:708] disk space is not compliant
W0320 23:53:14.455390  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:53:14.456993  543705 disk_worker.go:494] system disk:vda1
I0320 23:53:14.457022  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:53:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:53:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:53:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:53:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:53:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:53:23.409788  543705 memory.go:184] no items to output this cycle
I0320 23:53:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 23:53:26.717679  543705 disk_info.go:125] begin check local disk info of client
I0320 23:53:26.720192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:53:26.720198  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be00 0xc00007be40]
E0320 23:53:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:53:33.409803  543705 memory.go:184] no items to output this cycle
I0320 23:53:33.409815  543705 cpu.go:275] no items to output this cycle
E0320 23:53:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:53:43.409820  543705 memory.go:191] Add success.
I0320 23:53:43.409829  543705 cpu.go:282] Add success.
I0320 23:53:43.419971  543705 net.go:648] Add success.
I0320 23:53:43.422658  543705 net.go:770] primary dev: ETH0
I0320 23:53:43.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:53:43.422683  543705 net.go:698] Add success.
I0320 23:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:53:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:53:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:53:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:53:53.409781  543705 memory.go:184] no items to output this cycle
I0320 23:53:53.409799  543705 cpu.go:275] no items to output this cycle
E0320 23:54:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:54:03.409783  543705 memory.go:184] no items to output this cycle
I0320 23:54:03.409817  543705 cpu.go:275] no items to output this cycle
E0320 23:54:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:54:13.409837  543705 memory.go:191] Add success.
I0320 23:54:13.409853  543705 cpu.go:282] Add success.
W0320 23:54:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:54:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:54:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:54:13.420208  543705 net.go:648] Add success.
I0320 23:54:13.422868  543705 net.go:770] primary dev: ETH0
I0320 23:54:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:54:13.422902  543705 net.go:698] Add success.
I0320 23:54:13.468828  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6f706eb-63a8-4abc-9d5f-28f0d5c8f296","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:54:13.468860  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0320 23:54:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:54:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:54:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0320 23:54:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:54:14.456624  543705 disk_worker.go:494] system disk:vda1
I0320 23:54:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:54:15.455988  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:54:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:54:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:54:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:54:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:54:23.410419  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:54:23.410435  543705 memory.go:184] no items to output this cycle
I0320 23:54:23.410453  543705 cpu.go:275] no items to output this cycle
I0320 23:54:26.720758  543705 disk_info.go:125] begin check local disk info of client
I0320 23:54:26.723306  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:54:26.723312  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004716c0 0xc000471700]
E0320 23:54:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:54:33.409774  543705 memory.go:184] no items to output this cycle
I0320 23:54:33.409799  543705 cpu.go:275] no items to output this cycle
I0320 23:54:38.667031  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:54:38.667037  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:54:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:54:43.410659  543705 memory.go:191] Add success.
I0320 23:54:43.409833  543705 cpu.go:282] Add success.
I0320 23:54:43.420419  543705 net.go:648] Add success.
I0320 23:54:43.422954  543705 net.go:770] primary dev: ETH0
I0320 23:54:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:54:43.422979  543705 net.go:698] Add success.
I0320 23:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:54:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:54:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:54:53.409775  543705 memory.go:184] no items to output this cycle
I0320 23:54:53.409778  543705 cpu.go:275] no items to output this cycle
E0320 23:55:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:55:03.409783  543705 memory.go:184] no items to output this cycle
I0320 23:55:03.409785  543705 cpu.go:275] no items to output this cycle
E0320 23:55:13.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:55:13.409916  543705 memory.go:191] Add success.
I0320 23:55:13.409922  543705 cpu.go:282] Add success.
W0320 23:55:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:55:13.409974  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:55:13.409979  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:55:13.419729  543705 net.go:648] Add success.
I0320 23:55:13.422533  543705 net.go:770] primary dev: ETH0
I0320 23:55:13.422547  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:55:13.422561  543705 net.go:698] Add success.
I0320 23:55:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:55:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:55:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0320 23:55:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:55:14.456503  543705 disk_worker.go:494] system disk:vda1
I0320 23:55:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:55:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:55:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:55:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:55:23.409784  543705 memory.go:184] no items to output this cycle
I0320 23:55:23.409802  543705 cpu.go:275] no items to output this cycle
I0320 23:55:26.723770  543705 disk_info.go:125] begin check local disk info of client
I0320 23:55:26.726304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:55:26.726310  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fac00 0xc0001fac40]
E0320 23:55:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:55:33.409792  543705 memory.go:184] no items to output this cycle
I0320 23:55:33.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:55:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:55:43.409774  543705 memory.go:191] Add success.
I0320 23:55:43.409806  543705 cpu.go:282] Add success.
I0320 23:55:43.419881  543705 net.go:648] Add success.
I0320 23:55:43.422941  543705 net.go:770] primary dev: ETH0
I0320 23:55:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:55:43.422971  543705 net.go:698] Add success.
I0320 23:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:55:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:55:53.409775  543705 memory.go:184] no items to output this cycle
I0320 23:55:53.409793  543705 cpu.go:275] no items to output this cycle
E0320 23:56:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:56:03.409800  543705 memory.go:184] no items to output this cycle
I0320 23:56:03.409812  543705 cpu.go:275] no items to output this cycle
E0320 23:56:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:56:13.409804  543705 memory.go:191] Add success.
I0320 23:56:13.409824  543705 cpu.go:282] Add success.
W0320 23:56:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:56:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:56:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:56:13.420191  543705 net.go:648] Add success.
I0320 23:56:13.423125  543705 net.go:770] primary dev: ETH0
I0320 23:56:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:56:13.423151  543705 net.go:698] Add success.
I0320 23:56:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:56:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:56:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0320 23:56:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:56:14.456518  543705 disk_worker.go:494] system disk:vda1
I0320 23:56:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:56:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:56:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:56:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:56:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:56:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:56:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:56:23.409777  543705 memory.go:184] no items to output this cycle
I0320 23:56:23.409799  543705 cpu.go:275] no items to output this cycle
I0320 23:56:26.726729  543705 disk_info.go:125] begin check local disk info of client
I0320 23:56:26.729285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:56:26.729292  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bcc0 0xc00007bd00]
E0320 23:56:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:56:33.409801  543705 memory.go:184] no items to output this cycle
I0320 23:56:33.409810  543705 cpu.go:275] no items to output this cycle
E0320 23:56:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:56:43.409783  543705 memory.go:191] Add success.
I0320 23:56:43.409802  543705 cpu.go:282] Add success.
I0320 23:56:43.420067  543705 net.go:648] Add success.
I0320 23:56:43.422726  543705 net.go:770] primary dev: ETH0
I0320 23:56:43.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:56:43.422756  543705 net.go:698] Add success.
I0320 23:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:56:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:56:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:56:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:56:53.409783  543705 cpu.go:275] no items to output this cycle
I0320 23:56:53.409787  543705 memory.go:184] no items to output this cycle
E0320 23:57:03.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:57:03.409876  543705 memory.go:184] no items to output this cycle
I0320 23:57:03.409941  543705 cpu.go:275] no items to output this cycle
E0320 23:57:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:57:13.409791  543705 memory.go:191] Add success.
I0320 23:57:13.409816  543705 cpu.go:282] Add success.
W0320 23:57:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:57:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:57:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:57:13.420145  543705 net.go:648] Add success.
I0320 23:57:13.422772  543705 net.go:770] primary dev: ETH0
I0320 23:57:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:57:13.422797  543705 net.go:698] Add success.
I0320 23:57:13.429373  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0320 23:57:13.453557  543705 event_worker.go:152] Polling the log file for events...
I0320 23:57:13.469760  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0cf02da0-1cc0-4a37-b8cc-78ed6255f275","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0320 23:57:13.469793  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0320 23:57:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:57:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0320 23:57:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0320 23:57:14.455877  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0320 23:57:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0320 23:57:14.455891  543705 custom_config.go:64] query custom config with name: gpu
I0320 23:57:14.456536  543705 disk_worker.go:494] system disk:vda1
I0320 23:57:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
E0320 23:57:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0320 23:57:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:57:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0320 23:57:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0320 23:57:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:57:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:57:16.472293  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:57:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:57:23.409789  543705 memory.go:184] no items to output this cycle
I0320 23:57:23.409790  543705 cpu.go:275] no items to output this cycle
I0320 23:57:26.729674  543705 disk_info.go:125] begin check local disk info of client
I0320 23:57:26.732182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:57:26.732188  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304d80 0xc000304dc0]
E0320 23:57:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:57:33.409803  543705 memory.go:184] no items to output this cycle
I0320 23:57:33.409819  543705 cpu.go:275] no items to output this cycle
I0320 23:57:38.667182  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0320 23:57:38.667188  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0320 23:57:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:57:43.410690  543705 memory.go:191] Add success.
I0320 23:57:43.409816  543705 cpu.go:282] Add success.
I0320 23:57:43.420377  543705 net.go:648] Add success.
I0320 23:57:43.422950  543705 net.go:770] primary dev: ETH0
I0320 23:57:43.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:57:43.422978  543705 net.go:698] Add success.
I0320 23:57:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:57:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:57:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:57:53.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:57:53.409895  543705 memory.go:184] no items to output this cycle
I0320 23:57:53.409964  543705 cpu.go:275] no items to output this cycle
E0320 23:58:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:58:03.409767  543705 memory.go:184] no items to output this cycle
I0320 23:58:03.409808  543705 cpu.go:275] no items to output this cycle
E0320 23:58:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:58:13.409791  543705 memory.go:191] Add success.
W0320 23:58:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:58:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:58:13.409830  543705 cpu.go:282] Add success.
I0320 23:58:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:58:13.420039  543705 net.go:648] Add success.
I0320 23:58:13.422740  543705 net.go:770] primary dev: ETH0
I0320 23:58:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:58:13.422766  543705 net.go:698] Add success.
I0320 23:58:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:58:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:58:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0320 23:58:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:58:14.456617  543705 disk_worker.go:494] system disk:vda1
I0320 23:58:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:58:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:58:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:58:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:58:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:58:16.472541  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:58:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:58:23.409781  543705 memory.go:184] no items to output this cycle
I0320 23:58:23.409781  543705 cpu.go:275] no items to output this cycle
I0320 23:58:26.732891  543705 disk_info.go:125] begin check local disk info of client
I0320 23:58:26.735498  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:58:26.735505  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304f00 0xc000304f40]
E0320 23:58:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:58:33.409793  543705 memory.go:184] no items to output this cycle
I0320 23:58:33.409812  543705 cpu.go:275] no items to output this cycle
E0320 23:58:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:58:43.409822  543705 memory.go:191] Add success.
I0320 23:58:43.409827  543705 cpu.go:282] Add success.
I0320 23:58:43.419965  543705 net.go:648] Add success.
I0320 23:58:43.423128  543705 net.go:770] primary dev: ETH0
I0320 23:58:43.423141  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:58:43.423154  543705 net.go:698] Add success.
I0320 23:58:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:58:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:58:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:58:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:58:53.409796  543705 memory.go:184] no items to output this cycle
I0320 23:58:53.409806  543705 cpu.go:275] no items to output this cycle
E0320 23:59:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:59:03.409790  543705 memory.go:184] no items to output this cycle
I0320 23:59:03.409797  543705 cpu.go:275] no items to output this cycle
W0320 23:59:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0320 23:59:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0320 23:59:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0320 23:59:13.409801  543705 cpu.go:282] Add success.
E0320 23:59:13.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:59:13.409853  543705 memory.go:191] Add success.
I0320 23:59:13.420106  543705 net.go:648] Add success.
I0320 23:59:13.423083  543705 net.go:770] primary dev: ETH0
I0320 23:59:13.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:59:13.423113  543705 net.go:698] Add success.
I0320 23:59:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0320 23:59:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0320 23:59:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0320 23:59:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0320 23:59:14.456611  543705 disk_worker.go:494] system disk:vda1
I0320 23:59:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0320 23:59:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0320 23:59:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:59:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0320 23:59:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0320 23:59:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:59:23.409787  543705 memory.go:184] no items to output this cycle
I0320 23:59:23.409788  543705 cpu.go:275] no items to output this cycle
I0320 23:59:26.736910  543705 disk_info.go:125] begin check local disk info of client
I0320 23:59:26.739411  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0320 23:59:26.739418  543705 disk_info.go:196] parse disk info done, disk is : [0xc000540f40 0xc000540f80]
E0320 23:59:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:59:33.409790  543705 memory.go:184] no items to output this cycle
I0320 23:59:33.409802  543705 cpu.go:275] no items to output this cycle
E0320 23:59:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:59:43.409778  543705 memory.go:191] Add success.
I0320 23:59:43.409806  543705 cpu.go:282] Add success.
I0320 23:59:43.419892  543705 net.go:648] Add success.
I0320 23:59:43.422383  543705 net.go:770] primary dev: ETH0
I0320 23:59:43.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0320 23:59:43.422425  543705 net.go:698] Add success.
I0320 23:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0320 23:59:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0320 23:59:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0320 23:59:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0320 23:59:53.409879  543705 cpu.go:275] no items to output this cycle
I0320 23:59:53.409892  543705 memory.go:184] no items to output this cycle
E0321 00:00:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:00:03.409785  543705 memory.go:184] no items to output this cycle
I0321 00:00:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 00:00:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:00:13.409787  543705 memory.go:191] Add success.
I0321 00:00:13.409810  543705 cpu.go:282] Add success.
W0321 00:00:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:00:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:00:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:00:13.420282  543705 net.go:648] Add success.
I0321 00:00:13.423449  543705 net.go:770] primary dev: ETH0
I0321 00:00:13.423473  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:00:13.423485  543705 net.go:698] Add success.
I0321 00:00:13.464284  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fda9fe65-c9dc-4c83-924c-a6bd0ac37f99","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:00:13.464317  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:00:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:00:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:00:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 00:00:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:00:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 00:00:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:00:15.455639  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:00:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:00:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:00:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:00:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:00:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:00:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:00:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 00:00:26.739899  543705 disk_info.go:125] begin check local disk info of client
I0321 00:00:26.742447  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:00:26.742454  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331d40 0xc000331d80]
E0321 00:00:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:00:33.409802  543705 memory.go:184] no items to output this cycle
I0321 00:00:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 00:00:38.667330  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:00:38.667337  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:00:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:00:43.410803  543705 memory.go:191] Add success.
I0321 00:00:43.409795  543705 cpu.go:282] Add success.
I0321 00:00:43.420503  543705 net.go:648] Add success.
I0321 00:00:43.423619  543705 net.go:770] primary dev: ETH0
I0321 00:00:43.423631  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:00:43.423645  543705 net.go:698] Add success.
I0321 00:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:00:53.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:00:53.409882  543705 memory.go:184] no items to output this cycle
I0321 00:00:53.409959  543705 cpu.go:275] no items to output this cycle
E0321 00:01:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:01:03.409793  543705 memory.go:184] no items to output this cycle
I0321 00:01:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 00:01:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:01:13.409799  543705 memory.go:191] Add success.
I0321 00:01:13.409801  543705 cpu.go:282] Add success.
W0321 00:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:01:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:01:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:01:13.420173  543705 net.go:648] Add success.
I0321 00:01:13.422800  543705 net.go:770] primary dev: ETH0
I0321 00:01:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:01:13.422828  543705 net.go:698] Add success.
I0321 00:01:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:01:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:01:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 00:01:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:01:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 00:01:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:01:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:01:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:01:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:01:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:01:23.409782  543705 memory.go:184] no items to output this cycle
I0321 00:01:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 00:01:26.742731  543705 disk_info.go:125] begin check local disk info of client
I0321 00:01:26.745195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:01:26.745201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486740 0xc000486780]
E0321 00:01:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:01:33.409797  543705 memory.go:184] no items to output this cycle
I0321 00:01:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 00:01:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:01:43.409794  543705 memory.go:191] Add success.
I0321 00:01:43.409826  543705 cpu.go:282] Add success.
I0321 00:01:43.419885  543705 net.go:648] Add success.
I0321 00:01:43.422513  543705 net.go:770] primary dev: ETH0
I0321 00:01:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:01:43.422540  543705 net.go:698] Add success.
I0321 00:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:01:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:01:53.409802  543705 memory.go:184] no items to output this cycle
I0321 00:01:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 00:02:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:02:03.409794  543705 memory.go:184] no items to output this cycle
I0321 00:02:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 00:02:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:02:13.409812  543705 memory.go:191] Add success.
I0321 00:02:13.409813  543705 cpu.go:282] Add success.
W0321 00:02:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:02:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:02:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:02:13.420139  543705 net.go:648] Add success.
I0321 00:02:13.422936  543705 net.go:770] primary dev: ETH0
I0321 00:02:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:02:13.422964  543705 net.go:698] Add success.
W0321 00:02:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:02:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 00:02:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:02:14.455877  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:02:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:02:14.455891  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:02:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 00:02:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:02:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:02:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:02:16.458095  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:02:16.458166  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0321 00:02:16.458167  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:02:16.458187  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:02:16.472570  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:02:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:02:23.409784  543705 memory.go:184] no items to output this cycle
I0321 00:02:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 00:02:26.745674  543705 disk_info.go:125] begin check local disk info of client
I0321 00:02:26.748190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:02:26.748196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb980 0xc0001fb9c0]
E0321 00:02:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:02:33.409806  543705 memory.go:184] no items to output this cycle
I0321 00:02:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 00:02:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:02:43.409809  543705 memory.go:191] Add success.
I0321 00:02:43.409824  543705 cpu.go:282] Add success.
I0321 00:02:43.420061  543705 net.go:648] Add success.
I0321 00:02:43.422817  543705 net.go:770] primary dev: ETH0
I0321 00:02:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:02:43.422842  543705 net.go:698] Add success.
I0321 00:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:02:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:02:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:02:53.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:02:53.410382  543705 cpu.go:275] no items to output this cycle
I0321 00:02:53.410396  543705 memory.go:184] no items to output this cycle
E0321 00:03:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:03:03.409777  543705 memory.go:184] no items to output this cycle
I0321 00:03:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 00:03:13.410519  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:03:13.410549  543705 memory.go:191] Add success.
I0321 00:03:13.410563  543705 cpu.go:282] Add success.
W0321 00:03:13.410576  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:03:13.410588  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:03:13.410591  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:03:13.419874  543705 net.go:648] Add success.
I0321 00:03:13.422768  543705 net.go:770] primary dev: ETH0
I0321 00:03:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:03:13.422794  543705 net.go:698] Add success.
I0321 00:03:13.571631  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"201d852c-da35-40e2-ba71-1adc763a7ae2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:03:13.571662  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:03:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:03:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:03:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 00:03:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:03:14.456682  543705 disk_worker.go:494] system disk:vda1
I0321 00:03:14.456711  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:03:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:03:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:03:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:03:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:03:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:03:23.409785  543705 memory.go:184] no items to output this cycle
I0321 00:03:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 00:03:26.748929  543705 disk_info.go:125] begin check local disk info of client
I0321 00:03:26.751717  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:03:26.751722  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004614c0 0xc000461500]
E0321 00:03:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:03:33.409770  543705 memory.go:184] no items to output this cycle
I0321 00:03:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 00:03:38.667475  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:03:38.667481  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:03:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:03:43.410727  543705 memory.go:191] Add success.
I0321 00:03:43.409816  543705 cpu.go:282] Add success.
I0321 00:03:43.420447  543705 net.go:648] Add success.
I0321 00:03:43.423737  543705 net.go:770] primary dev: ETH0
I0321 00:03:43.423751  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:03:43.423766  543705 net.go:698] Add success.
I0321 00:03:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:03:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:03:53.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:03:53.409935  543705 cpu.go:275] no items to output this cycle
I0321 00:03:53.409942  543705 memory.go:184] no items to output this cycle
E0321 00:04:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:04:03.409786  543705 cpu.go:275] no items to output this cycle
I0321 00:04:03.409789  543705 memory.go:184] no items to output this cycle
E0321 00:04:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:04:13.409805  543705 memory.go:191] Add success.
I0321 00:04:13.409810  543705 cpu.go:282] Add success.
W0321 00:04:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:04:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:04:13.420197  543705 net.go:648] Add success.
I0321 00:04:13.423220  543705 net.go:770] primary dev: ETH0
I0321 00:04:13.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:04:13.423250  543705 net.go:698] Add success.
I0321 00:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:04:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:04:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 00:04:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:04:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 00:04:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:04:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:04:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:04:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:04:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:04:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:04:23.409771  543705 memory.go:184] no items to output this cycle
I0321 00:04:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 00:04:26.752944  543705 disk_info.go:125] begin check local disk info of client
I0321 00:04:26.755519  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:04:26.755526  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007adc0 0xc00007ae00]
E0321 00:04:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:04:33.409770  543705 memory.go:184] no items to output this cycle
I0321 00:04:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 00:04:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:04:43.409815  543705 memory.go:191] Add success.
I0321 00:04:43.409822  543705 cpu.go:282] Add success.
I0321 00:04:43.419943  543705 net.go:648] Add success.
I0321 00:04:43.423045  543705 net.go:770] primary dev: ETH0
I0321 00:04:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:04:43.423070  543705 net.go:698] Add success.
I0321 00:04:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:04:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:04:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:04:53.409807  543705 memory.go:184] no items to output this cycle
I0321 00:04:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 00:05:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:05:03.409898  543705 memory.go:184] no items to output this cycle
I0321 00:05:03.409932  543705 cpu.go:275] no items to output this cycle
E0321 00:05:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:05:13.409793  543705 memory.go:191] Add success.
W0321 00:05:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 00:05:13.409825  543705 cpu.go:282] Add success.
W0321 00:05:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:05:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:05:13.420183  543705 net.go:648] Add success.
I0321 00:05:13.423013  543705 net.go:770] primary dev: ETH0
I0321 00:05:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:05:13.423055  543705 net.go:698] Add success.
I0321 00:05:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:05:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:05:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 00:05:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:05:14.456511  543705 disk_worker.go:494] system disk:vda1
I0321 00:05:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:05:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:05:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:05:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:05:23.409774  543705 memory.go:184] no items to output this cycle
I0321 00:05:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 00:05:26.757039  543705 disk_info.go:125] begin check local disk info of client
I0321 00:05:26.759559  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:05:26.759574  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0321 00:05:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:05:33.409774  543705 memory.go:184] no items to output this cycle
I0321 00:05:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 00:05:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:05:43.409788  543705 memory.go:191] Add success.
I0321 00:05:43.409798  543705 cpu.go:282] Add success.
I0321 00:05:43.419845  543705 net.go:648] Add success.
I0321 00:05:43.422580  543705 net.go:770] primary dev: ETH0
I0321 00:05:43.422593  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:05:43.422606  543705 net.go:698] Add success.
I0321 00:05:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:05:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:05:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:05:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:05:53.409797  543705 memory.go:184] no items to output this cycle
I0321 00:05:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 00:06:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:06:03.409778  543705 memory.go:184] no items to output this cycle
I0321 00:06:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 00:06:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:06:13.409927  543705 cpu.go:282] Add success.
I0321 00:06:13.409961  543705 memory.go:191] Add success.
W0321 00:06:13.409996  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:06:13.410015  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:06:13.410020  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:06:13.419714  543705 net.go:648] Add success.
I0321 00:06:13.423047  543705 net.go:770] primary dev: ETH0
I0321 00:06:13.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:06:13.423072  543705 net.go:698] Add success.
I0321 00:06:13.469145  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c2e7f32b-036c-4478-a72f-220c59080492","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:06:13.469186  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:06:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:06:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:06:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 00:06:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:06:14.456780  543705 disk_worker.go:494] system disk:vda1
I0321 00:06:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:06:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:06:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:06:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:06:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:06:23.409773  543705 memory.go:184] no items to output this cycle
I0321 00:06:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 00:06:26.759633  543705 disk_info.go:125] begin check local disk info of client
I0321 00:06:26.762235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:06:26.762242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e79c0 0xc0003e7a00]
E0321 00:06:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:06:33.409777  543705 memory.go:184] no items to output this cycle
I0321 00:06:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 00:06:38.667618  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:06:38.667637  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:06:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:06:43.410707  543705 memory.go:191] Add success.
I0321 00:06:43.409816  543705 cpu.go:282] Add success.
I0321 00:06:43.420428  543705 net.go:648] Add success.
I0321 00:06:43.423148  543705 net.go:770] primary dev: ETH0
I0321 00:06:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:06:43.423177  543705 net.go:698] Add success.
I0321 00:06:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:06:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:06:53.409807  543705 memory.go:184] no items to output this cycle
I0321 00:06:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 00:07:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:07:03.409799  543705 memory.go:184] no items to output this cycle
I0321 00:07:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 00:07:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:07:13.409789  543705 memory.go:191] Add success.
W0321 00:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 00:07:13.409822  543705 cpu.go:282] Add success.
W0321 00:07:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:07:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:07:13.420231  543705 net.go:648] Add success.
I0321 00:07:13.423025  543705 net.go:770] primary dev: ETH0
I0321 00:07:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:07:13.423050  543705 net.go:698] Add success.
I0321 00:07:13.453725  543705 event_worker.go:152] Polling the log file for events...
W0321 00:07:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:07:14.455352  543705 disk_worker.go:708] disk space is not compliant
W0321 00:07:14.455359  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:07:14.457036  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:07:14.457045  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:07:14.457051  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:07:14.457301  543705 disk_worker.go:494] system disk:vda1
I0321 00:07:14.457341  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:07:15.456855  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:07:15.456863  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:07:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:07:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:07:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:07:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:07:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:07:23.409794  543705 memory.go:184] no items to output this cycle
I0321 00:07:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 00:07:26.762739  543705 disk_info.go:125] begin check local disk info of client
I0321 00:07:26.765268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:07:26.765275  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515940 0xc000515980]
E0321 00:07:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:07:33.409782  543705 memory.go:184] no items to output this cycle
I0321 00:07:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 00:07:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:07:43.409783  543705 memory.go:191] Add success.
I0321 00:07:43.409820  543705 cpu.go:282] Add success.
I0321 00:07:43.419855  543705 net.go:648] Add success.
I0321 00:07:43.422686  543705 net.go:770] primary dev: ETH0
I0321 00:07:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:07:43.422711  543705 net.go:698] Add success.
I0321 00:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:07:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:07:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:07:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:07:53.409789  543705 memory.go:184] no items to output this cycle
I0321 00:07:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 00:08:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:08:03.409767  543705 memory.go:184] no items to output this cycle
I0321 00:08:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 00:08:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:08:13.409793  543705 memory.go:191] Add success.
I0321 00:08:13.409821  543705 cpu.go:282] Add success.
W0321 00:08:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:08:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:08:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:08:13.420289  543705 net.go:648] Add success.
I0321 00:08:13.422973  543705 net.go:770] primary dev: ETH0
I0321 00:08:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:08:13.422999  543705 net.go:698] Add success.
I0321 00:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:08:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:08:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 00:08:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:08:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 00:08:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:08:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:08:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:08:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:08:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:08:16.472527  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:08:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:08:23.409769  543705 memory.go:184] no items to output this cycle
I0321 00:08:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 00:08:26.765670  543705 disk_info.go:125] begin check local disk info of client
I0321 00:08:26.768492  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:08:26.768497  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa680 0xc0001aa6c0]
E0321 00:08:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:08:33.409780  543705 memory.go:184] no items to output this cycle
I0321 00:08:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:08:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:08:43.409783  543705 memory.go:191] Add success.
I0321 00:08:43.409808  543705 cpu.go:282] Add success.
I0321 00:08:43.419835  543705 net.go:648] Add success.
I0321 00:08:43.422443  543705 net.go:770] primary dev: ETH0
I0321 00:08:43.422456  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:08:43.422468  543705 net.go:698] Add success.
I0321 00:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:08:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:08:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:08:53.409806  543705 memory.go:184] no items to output this cycle
I0321 00:08:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 00:09:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:09:03.409776  543705 memory.go:184] no items to output this cycle
I0321 00:09:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:09:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:09:13.409794  543705 memory.go:191] Add success.
I0321 00:09:13.409802  543705 cpu.go:282] Add success.
W0321 00:09:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:09:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:09:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:09:13.420163  543705 net.go:648] Add success.
I0321 00:09:13.423095  543705 net.go:770] primary dev: ETH0
I0321 00:09:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:09:13.423124  543705 net.go:698] Add success.
I0321 00:09:13.476523  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2fb59ccd-33de-4327-b163-035e9804f567","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:09:13.476565  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:09:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:09:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 00:09:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:09:14.456738  543705 disk_worker.go:494] system disk:vda1
I0321 00:09:14.456768  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:09:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:09:16.472578  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:09:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:09:23.409793  543705 memory.go:184] no items to output this cycle
I0321 00:09:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 00:09:26.769095  543705 disk_info.go:125] begin check local disk info of client
I0321 00:09:26.771597  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:09:26.771603  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056dac0 0xc00056db00]
E0321 00:09:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:09:33.409765  543705 memory.go:184] no items to output this cycle
I0321 00:09:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 00:09:38.667773  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:09:38.667780  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:09:43.410673  543705 memory.go:191] Add success.
I0321 00:09:43.409797  543705 cpu.go:282] Add success.
I0321 00:09:43.420385  543705 net.go:648] Add success.
I0321 00:09:43.422927  543705 net.go:770] primary dev: ETH0
I0321 00:09:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:09:43.422953  543705 net.go:698] Add success.
I0321 00:09:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:09:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:09:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:09:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:09:53.409777  543705 memory.go:184] no items to output this cycle
I0321 00:09:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 00:10:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:10:03.409782  543705 memory.go:184] no items to output this cycle
I0321 00:10:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 00:10:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:10:13.409819  543705 memory.go:191] Add success.
I0321 00:10:13.409824  543705 cpu.go:282] Add success.
W0321 00:10:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:10:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:10:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:10:13.420073  543705 net.go:648] Add success.
I0321 00:10:13.422640  543705 net.go:770] primary dev: ETH0
I0321 00:10:13.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:10:13.422670  543705 net.go:698] Add success.
I0321 00:10:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:10:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:10:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 00:10:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:10:14.456609  543705 disk_worker.go:494] system disk:vda1
I0321 00:10:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:10:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:10:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:10:16.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:10:16.458111  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:10:16.472534  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:10:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:10:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:10:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 00:10:26.773114  543705 disk_info.go:125] begin check local disk info of client
I0321 00:10:26.775725  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:10:26.775732  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab140 0xc0001ab180]
E0321 00:10:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:10:33.409771  543705 memory.go:184] no items to output this cycle
I0321 00:10:33.409777  543705 cpu.go:275] no items to output this cycle
E0321 00:10:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:10:43.409818  543705 memory.go:191] Add success.
I0321 00:10:43.409830  543705 cpu.go:282] Add success.
I0321 00:10:43.419957  543705 net.go:648] Add success.
I0321 00:10:43.422723  543705 net.go:770] primary dev: ETH0
I0321 00:10:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:10:43.422750  543705 net.go:698] Add success.
I0321 00:10:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:10:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:10:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:10:53.409782  543705 memory.go:184] no items to output this cycle
I0321 00:10:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 00:11:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:11:03.409794  543705 memory.go:184] no items to output this cycle
I0321 00:11:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 00:11:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:11:13.409824  543705 memory.go:191] Add success.
I0321 00:11:13.409831  543705 cpu.go:282] Add success.
W0321 00:11:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:11:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:11:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:11:13.420076  543705 net.go:648] Add success.
I0321 00:11:13.422995  543705 net.go:770] primary dev: ETH0
I0321 00:11:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:11:13.423027  543705 net.go:698] Add success.
I0321 00:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:11:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:11:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 00:11:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:11:14.456533  543705 disk_worker.go:494] system disk:vda1
I0321 00:11:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:11:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:11:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:11:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:11:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:11:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:11:23.409787  543705 memory.go:184] no items to output this cycle
I0321 00:11:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 00:11:26.777043  543705 disk_info.go:125] begin check local disk info of client
I0321 00:11:26.779595  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:11:26.779601  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f8380 0xc0004f83c0]
E0321 00:11:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:11:33.409781  543705 memory.go:184] no items to output this cycle
I0321 00:11:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 00:11:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:11:43.409833  543705 memory.go:191] Add success.
I0321 00:11:43.409851  543705 cpu.go:282] Add success.
I0321 00:11:43.420187  543705 net.go:648] Add success.
I0321 00:11:43.422969  543705 net.go:770] primary dev: ETH0
I0321 00:11:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:11:43.422999  543705 net.go:698] Add success.
I0321 00:11:46.458027  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:11:46.458091  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:11:46.458125  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:11:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:11:53.409780  543705 memory.go:184] no items to output this cycle
I0321 00:11:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 00:12:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:12:03.409775  543705 memory.go:184] no items to output this cycle
I0321 00:12:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:12:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:12:13.409794  543705 memory.go:191] Add success.
I0321 00:12:13.409798  543705 cpu.go:282] Add success.
W0321 00:12:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:12:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:12:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:12:13.420051  543705 net.go:648] Add success.
I0321 00:12:13.423103  543705 net.go:770] primary dev: ETH0
I0321 00:12:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:12:13.423129  543705 net.go:698] Add success.
I0321 00:12:13.464367  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd2a5138-c45e-4866-9010-44e488debad9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:12:13.464406  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 00:12:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:12:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0321 00:12:14.455250  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:12:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:12:14.455932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:12:14.455938  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:12:14.456847  543705 disk_worker.go:494] system disk:vda1
I0321 00:12:14.456878  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:12:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:12:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:12:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:12:16.458158  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:12:16.458181  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:12:16.458187  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:12:16.472570  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:12:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:12:23.409769  543705 memory.go:184] no items to output this cycle
I0321 00:12:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 00:12:26.781186  543705 disk_info.go:125] begin check local disk info of client
I0321 00:12:26.783733  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:12:26.783739  543705 disk_info.go:196] parse disk info done, disk is : [0xc000274d40 0xc000274d80]
E0321 00:12:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:12:33.409801  543705 memory.go:184] no items to output this cycle
I0321 00:12:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 00:12:38.667916  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:12:38.667923  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:12:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:12:43.410617  543705 memory.go:191] Add success.
I0321 00:12:43.409799  543705 cpu.go:282] Add success.
I0321 00:12:43.420326  543705 net.go:648] Add success.
I0321 00:12:43.423029  543705 net.go:770] primary dev: ETH0
I0321 00:12:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:12:43.423059  543705 net.go:698] Add success.
I0321 00:12:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:12:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:12:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:12:53.409777  543705 memory.go:184] no items to output this cycle
I0321 00:12:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 00:13:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:13:03.409763  543705 memory.go:184] no items to output this cycle
I0321 00:13:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 00:13:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:13:13.409836  543705 memory.go:191] Add success.
I0321 00:13:13.409840  543705 cpu.go:282] Add success.
W0321 00:13:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:13:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:13:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:13:13.420701  543705 net.go:648] Add success.
I0321 00:13:13.423581  543705 net.go:770] primary dev: ETH0
I0321 00:13:13.423594  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:13:13.423605  543705 net.go:698] Add success.
I0321 00:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:13:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:13:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 00:13:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:13:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 00:13:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:13:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:13:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:13:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:13:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:13:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:13:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:13:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:13:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 00:13:26.785076  543705 disk_info.go:125] begin check local disk info of client
I0321 00:13:26.787830  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:13:26.787836  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab9c0 0xc0003aba00]
E0321 00:13:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:13:33.409764  543705 memory.go:184] no items to output this cycle
I0321 00:13:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 00:13:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:13:43.409800  543705 memory.go:191] Add success.
I0321 00:13:43.409809  543705 cpu.go:282] Add success.
I0321 00:13:43.419861  543705 net.go:648] Add success.
I0321 00:13:43.423406  543705 net.go:770] primary dev: ETH0
I0321 00:13:43.423419  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:13:43.423431  543705 net.go:698] Add success.
I0321 00:13:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:13:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:13:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:13:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:13:53.409777  543705 memory.go:184] no items to output this cycle
I0321 00:13:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 00:14:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:14:03.409798  543705 memory.go:184] no items to output this cycle
I0321 00:14:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 00:14:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:14:13.409789  543705 memory.go:191] Add success.
W0321 00:14:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 00:14:13.409818  543705 cpu.go:282] Add success.
W0321 00:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:14:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:14:13.420166  543705 net.go:648] Add success.
I0321 00:14:13.422883  543705 net.go:770] primary dev: ETH0
I0321 00:14:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:14:13.422907  543705 net.go:698] Add success.
I0321 00:14:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:14:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:14:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 00:14:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:14:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 00:14:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:14:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:14:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:14:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:14:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:14:16.472681  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:14:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:14:23.409778  543705 memory.go:184] no items to output this cycle
I0321 00:14:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 00:14:26.789172  543705 disk_info.go:125] begin check local disk info of client
I0321 00:14:26.791710  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:14:26.791716  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474800 0xc000474840]
I0321 00:14:33.409884  543705 cpu.go:275] no items to output this cycle
E0321 00:14:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:14:33.409905  543705 memory.go:184] no items to output this cycle
E0321 00:14:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:14:43.409777  543705 memory.go:191] Add success.
I0321 00:14:43.410140  543705 cpu.go:282] Add success.
I0321 00:14:43.420044  543705 net.go:648] Add success.
I0321 00:14:43.421062  543705 net.go:770] primary dev: ETH0
I0321 00:14:43.421074  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:14:43.421086  543705 net.go:698] Add success.
I0321 00:14:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:14:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:14:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:14:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:14:53.409766  543705 memory.go:184] no items to output this cycle
I0321 00:14:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:15:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:15:03.409763  543705 memory.go:184] no items to output this cycle
I0321 00:15:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 00:15:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:15:13.409816  543705 memory.go:191] Add success.
I0321 00:15:13.409824  543705 cpu.go:282] Add success.
W0321 00:15:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:15:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:15:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:15:13.420062  543705 net.go:648] Add success.
I0321 00:15:13.422528  543705 net.go:770] primary dev: ETH0
I0321 00:15:13.422540  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:15:13.422554  543705 net.go:698] Add success.
I0321 00:15:13.468922  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"178d2553-ea46-4d8e-9c34-c6abd82897f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:15:13.468957  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:15:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:15:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 00:15:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:15:14.456684  543705 disk_worker.go:494] system disk:vda1
I0321 00:15:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:15:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:15:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:15:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:15:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:15:23.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:15:23.409874  543705 memory.go:184] no items to output this cycle
I0321 00:15:23.409971  543705 cpu.go:275] no items to output this cycle
I0321 00:15:26.793155  543705 disk_info.go:125] begin check local disk info of client
I0321 00:15:26.795736  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:15:26.795742  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f180 0xc00035f1c0]
E0321 00:15:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:15:33.409783  543705 memory.go:184] no items to output this cycle
I0321 00:15:33.409790  543705 cpu.go:275] no items to output this cycle
I0321 00:15:38.669034  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:15:38.669040  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:15:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:15:43.410544  543705 memory.go:191] Add success.
I0321 00:15:43.409826  543705 cpu.go:282] Add success.
I0321 00:15:43.420256  543705 net.go:648] Add success.
I0321 00:15:43.422889  543705 net.go:770] primary dev: ETH0
I0321 00:15:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:15:43.422919  543705 net.go:698] Add success.
I0321 00:15:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:15:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:15:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:15:53.410263  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:15:53.410276  543705 cpu.go:275] no items to output this cycle
I0321 00:15:53.410278  543705 memory.go:184] no items to output this cycle
E0321 00:16:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:16:03.409798  543705 memory.go:184] no items to output this cycle
I0321 00:16:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 00:16:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:16:13.409799  543705 memory.go:191] Add success.
I0321 00:16:13.409801  543705 cpu.go:282] Add success.
W0321 00:16:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:16:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:16:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:16:13.420101  543705 net.go:648] Add success.
I0321 00:16:13.422571  543705 net.go:770] primary dev: ETH0
I0321 00:16:13.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:16:13.422596  543705 net.go:698] Add success.
I0321 00:16:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:16:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:16:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 00:16:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:16:14.456520  543705 disk_worker.go:494] system disk:vda1
I0321 00:16:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:16:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:16:16.458015  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:16:16.458079  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:16:16.458109  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:16:16.472578  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:16:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:16:23.409894  543705 memory.go:184] no items to output this cycle
I0321 00:16:23.409904  543705 cpu.go:275] no items to output this cycle
I0321 00:16:26.797226  543705 disk_info.go:125] begin check local disk info of client
I0321 00:16:26.800042  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:16:26.800048  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487c40 0xc000487c80]
E0321 00:16:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:16:33.409773  543705 memory.go:184] no items to output this cycle
I0321 00:16:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 00:16:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:16:43.409792  543705 cpu.go:282] Add success.
I0321 00:16:43.409793  543705 memory.go:191] Add success.
I0321 00:16:43.419887  543705 net.go:648] Add success.
I0321 00:16:43.422728  543705 net.go:770] primary dev: ETH0
I0321 00:16:43.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:16:43.422755  543705 net.go:698] Add success.
I0321 00:16:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:16:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:16:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:16:53.409771  543705 memory.go:184] no items to output this cycle
I0321 00:16:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 00:17:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:17:03.409790  543705 memory.go:184] no items to output this cycle
I0321 00:17:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 00:17:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:17:13.409783  543705 memory.go:191] Add success.
I0321 00:17:13.409806  543705 cpu.go:282] Add success.
W0321 00:17:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:17:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:17:13.420158  543705 net.go:648] Add success.
I0321 00:17:13.422989  543705 net.go:770] primary dev: ETH0
I0321 00:17:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:17:13.423018  543705 net.go:698] Add success.
I0321 00:17:13.453590  543705 event_worker.go:152] Polling the log file for events...
W0321 00:17:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:17:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 00:17:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:17:14.455913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:17:14.455922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:17:14.455928  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:17:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 00:17:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:17:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:17:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:17:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:17:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:17:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:17:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:17:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:17:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:17:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:17:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 00:17:26.801200  543705 disk_info.go:125] begin check local disk info of client
I0321 00:17:26.803717  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:17:26.803724  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493dc0 0xc000493e00]
E0321 00:17:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:17:33.409762  543705 memory.go:184] no items to output this cycle
I0321 00:17:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 00:17:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:17:43.409792  543705 memory.go:191] Add success.
I0321 00:17:43.409794  543705 cpu.go:282] Add success.
I0321 00:17:43.419905  543705 net.go:648] Add success.
I0321 00:17:43.422732  543705 net.go:770] primary dev: ETH0
I0321 00:17:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:17:43.422761  543705 net.go:698] Add success.
I0321 00:17:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:17:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:17:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:17:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:17:53.409780  543705 memory.go:184] no items to output this cycle
I0321 00:17:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 00:18:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:18:03.409782  543705 memory.go:184] no items to output this cycle
I0321 00:18:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 00:18:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:18:13.409791  543705 memory.go:191] Add success.
I0321 00:18:13.409813  543705 cpu.go:282] Add success.
W0321 00:18:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:18:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:18:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:18:13.420068  543705 net.go:648] Add success.
I0321 00:18:13.422788  543705 net.go:770] primary dev: ETH0
I0321 00:18:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:18:13.422813  543705 net.go:698] Add success.
I0321 00:18:13.464467  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e7f9eb7-c0a6-4e25-b4aa-18ef7de64fdf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:18:13.464502  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:18:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:18:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:18:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 00:18:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:18:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 00:18:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:18:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:18:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:18:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:18:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:18:16.472521  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:18:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:18:23.409804  543705 memory.go:184] no items to output this cycle
I0321 00:18:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 00:18:26.805281  543705 disk_info.go:125] begin check local disk info of client
I0321 00:18:26.807896  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:18:26.807903  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003704c0 0xc000370500]
E0321 00:18:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:18:33.409797  543705 memory.go:184] no items to output this cycle
I0321 00:18:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 00:18:38.669732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:18:38.669739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:18:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:18:43.410605  543705 memory.go:191] Add success.
I0321 00:18:43.409810  543705 cpu.go:282] Add success.
I0321 00:18:43.420286  543705 net.go:648] Add success.
I0321 00:18:43.422905  543705 net.go:770] primary dev: ETH0
I0321 00:18:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:18:43.422931  543705 net.go:698] Add success.
I0321 00:18:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:18:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:18:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:18:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:18:53.409806  543705 memory.go:184] no items to output this cycle
I0321 00:18:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 00:19:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:19:03.409775  543705 memory.go:184] no items to output this cycle
I0321 00:19:03.409777  543705 cpu.go:275] no items to output this cycle
E0321 00:19:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:19:13.409792  543705 memory.go:191] Add success.
I0321 00:19:13.409809  543705 cpu.go:282] Add success.
W0321 00:19:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:19:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:19:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:19:13.420258  543705 net.go:648] Add success.
I0321 00:19:13.422957  543705 net.go:770] primary dev: ETH0
I0321 00:19:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:19:13.422982  543705 net.go:698] Add success.
I0321 00:19:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:19:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:19:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 00:19:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:19:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 00:19:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:19:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:19:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:19:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:19:23.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:19:23.410405  543705 memory.go:184] no items to output this cycle
I0321 00:19:23.410440  543705 cpu.go:275] no items to output this cycle
I0321 00:19:26.809234  543705 disk_info.go:125] begin check local disk info of client
I0321 00:19:26.811740  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:19:26.811746  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270140 0xc000270180]
E0321 00:19:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:19:33.409788  543705 memory.go:184] no items to output this cycle
I0321 00:19:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 00:19:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:19:43.409810  543705 memory.go:191] Add success.
I0321 00:19:43.409818  543705 cpu.go:282] Add success.
I0321 00:19:43.419878  543705 net.go:648] Add success.
I0321 00:19:43.423117  543705 net.go:770] primary dev: ETH0
I0321 00:19:43.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:19:43.423143  543705 net.go:698] Add success.
I0321 00:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:19:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:19:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:19:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:19:53.409771  543705 memory.go:184] no items to output this cycle
I0321 00:19:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 00:20:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:20:03.409775  543705 memory.go:184] no items to output this cycle
I0321 00:20:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 00:20:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:20:13.409793  543705 memory.go:191] Add success.
I0321 00:20:13.409814  543705 cpu.go:282] Add success.
W0321 00:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:20:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:20:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:20:13.420154  543705 net.go:648] Add success.
I0321 00:20:13.422733  543705 net.go:770] primary dev: ETH0
I0321 00:20:13.422746  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:20:13.422758  543705 net.go:698] Add success.
I0321 00:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:20:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:20:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 00:20:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:20:14.456515  543705 disk_worker.go:494] system disk:vda1
I0321 00:20:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:20:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:20:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:20:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:20:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:20:16.472499  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:20:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:20:23.409819  543705 memory.go:184] no items to output this cycle
I0321 00:20:23.409831  543705 cpu.go:275] no items to output this cycle
I0321 00:20:26.813285  543705 disk_info.go:125] begin check local disk info of client
I0321 00:20:26.815855  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:20:26.815861  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5480 0xc0000c54c0]
E0321 00:20:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:20:33.409776  543705 memory.go:184] no items to output this cycle
I0321 00:20:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 00:20:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:20:43.409827  543705 memory.go:191] Add success.
I0321 00:20:43.409838  543705 cpu.go:282] Add success.
I0321 00:20:43.420001  543705 net.go:648] Add success.
I0321 00:20:43.423062  543705 net.go:770] primary dev: ETH0
I0321 00:20:43.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:20:43.423093  543705 net.go:698] Add success.
I0321 00:20:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:20:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:20:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:20:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:20:53.409783  543705 memory.go:184] no items to output this cycle
I0321 00:20:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 00:21:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:21:03.409791  543705 cpu.go:275] no items to output this cycle
I0321 00:21:03.409793  543705 memory.go:184] no items to output this cycle
E0321 00:21:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:21:13.409804  543705 memory.go:191] Add success.
W0321 00:21:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 00:21:13.409840  543705 cpu.go:282] Add success.
W0321 00:21:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:21:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:21:13.420213  543705 net.go:648] Add success.
I0321 00:21:13.422884  543705 net.go:770] primary dev: ETH0
I0321 00:21:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:21:13.422909  543705 net.go:698] Add success.
I0321 00:21:13.469133  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e70a0bda-339d-4b9b-b795-1412c34ba111","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:21:13.469168  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:21:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:21:14.455372  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:21:14.455463  543705 disk_worker.go:708] disk space is not compliant
W0321 00:21:14.455468  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:21:14.457122  543705 disk_worker.go:494] system disk:vda1
I0321 00:21:14.457152  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:21:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:21:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:21:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:21:23.410407  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:21:23.410426  543705 memory.go:184] no items to output this cycle
I0321 00:21:23.410444  543705 cpu.go:275] no items to output this cycle
I0321 00:21:26.817283  543705 disk_info.go:125] begin check local disk info of client
I0321 00:21:26.819816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:21:26.819822  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5d40 0xc0002a5d80]
E0321 00:21:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:21:33.409791  543705 memory.go:184] no items to output this cycle
I0321 00:21:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 00:21:38.671050  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:21:38.671058  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:21:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:21:43.409794  543705 memory.go:191] Add success.
I0321 00:21:43.409805  543705 cpu.go:282] Add success.
I0321 00:21:43.419995  543705 net.go:648] Add success.
I0321 00:21:43.420868  543705 net.go:770] primary dev: ETH0
I0321 00:21:43.420881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:21:43.420894  543705 net.go:698] Add success.
I0321 00:21:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:21:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:21:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:21:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:21:53.409810  543705 memory.go:184] no items to output this cycle
I0321 00:21:53.409824  543705 cpu.go:275] no items to output this cycle
E0321 00:22:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:22:03.409768  543705 memory.go:184] no items to output this cycle
I0321 00:22:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 00:22:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:22:13.409820  543705 memory.go:191] Add success.
I0321 00:22:13.409826  543705 cpu.go:282] Add success.
W0321 00:22:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:22:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:22:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:22:13.420156  543705 net.go:648] Add success.
I0321 00:22:13.423269  543705 net.go:770] primary dev: ETH0
I0321 00:22:13.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:22:13.423460  543705 net.go:698] Add success.
W0321 00:22:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:22:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 00:22:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:22:14.456805  543705 disk_worker.go:494] system disk:vda1
I0321 00:22:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:22:14.457156  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:22:14.457164  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:22:14.457168  543705 custom_config.go:64] query custom config with name: gpu
E0321 00:22:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:22:15.456791  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:22:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:22:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:22:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:22:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:22:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:22:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:22:23.409807  543705 memory.go:184] no items to output this cycle
I0321 00:22:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 00:22:26.821251  543705 disk_info.go:125] begin check local disk info of client
I0321 00:22:26.823813  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:22:26.823820  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b84c0 0xc0002b8500]
E0321 00:22:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:22:33.409804  543705 memory.go:184] no items to output this cycle
I0321 00:22:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 00:22:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:22:43.409783  543705 memory.go:191] Add success.
I0321 00:22:43.409805  543705 cpu.go:282] Add success.
I0321 00:22:43.419889  543705 net.go:648] Add success.
I0321 00:22:43.422995  543705 net.go:770] primary dev: ETH0
I0321 00:22:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:22:43.423020  543705 net.go:698] Add success.
I0321 00:22:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:22:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:22:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:22:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:22:53.409778  543705 memory.go:184] no items to output this cycle
I0321 00:22:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 00:23:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:23:03.409783  543705 memory.go:184] no items to output this cycle
I0321 00:23:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 00:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:23:13.409805  543705 memory.go:191] Add success.
I0321 00:23:13.409808  543705 cpu.go:282] Add success.
W0321 00:23:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:23:13.409962  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:23:13.409967  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:23:13.419718  543705 net.go:648] Add success.
I0321 00:23:13.422259  543705 net.go:770] primary dev: ETH0
I0321 00:23:13.422275  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:23:13.422289  543705 net.go:698] Add success.
I0321 00:23:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:23:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:23:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 00:23:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:23:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 00:23:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:23:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:23:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:23:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:23:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:23:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:23:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:23:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:23:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 00:23:26.825341  543705 disk_info.go:125] begin check local disk info of client
I0321 00:23:26.827847  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:23:26.827853  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5bc0 0xc0000c5c00]
E0321 00:23:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:23:33.409796  543705 memory.go:184] no items to output this cycle
I0321 00:23:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 00:23:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:23:43.409787  543705 memory.go:191] Add success.
I0321 00:23:43.409810  543705 cpu.go:282] Add success.
I0321 00:23:43.419855  543705 net.go:648] Add success.
I0321 00:23:43.422646  543705 net.go:770] primary dev: ETH0
I0321 00:23:43.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:23:43.422677  543705 net.go:698] Add success.
I0321 00:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:23:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:23:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:23:53.409772  543705 memory.go:184] no items to output this cycle
I0321 00:23:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 00:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:24:03.409867  543705 memory.go:184] no items to output this cycle
I0321 00:24:03.409910  543705 cpu.go:275] no items to output this cycle
E0321 00:24:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:24:13.409806  543705 memory.go:191] Add success.
I0321 00:24:13.409808  543705 cpu.go:282] Add success.
W0321 00:24:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:24:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:24:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:24:13.420181  543705 net.go:648] Add success.
I0321 00:24:13.422890  543705 net.go:770] primary dev: ETH0
I0321 00:24:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:24:13.422919  543705 net.go:698] Add success.
I0321 00:24:13.469682  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71361961-5e6c-4da8-863d-b45b640d9a95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:24:13.469715  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:24:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:24:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:24:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 00:24:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:24:14.456622  543705 disk_worker.go:494] system disk:vda1
I0321 00:24:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:24:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:24:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:24:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:24:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:24:23.409785  543705 memory.go:184] no items to output this cycle
I0321 00:24:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 00:24:26.829388  543705 disk_info.go:125] begin check local disk info of client
I0321 00:24:26.831996  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:24:26.832003  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056d300 0xc00056d340]
E0321 00:24:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:24:33.409776  543705 memory.go:184] no items to output this cycle
I0321 00:24:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 00:24:38.671211  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:24:38.671218  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:24:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:24:43.410616  543705 memory.go:191] Add success.
I0321 00:24:43.409822  543705 cpu.go:282] Add success.
I0321 00:24:43.420328  543705 net.go:648] Add success.
I0321 00:24:43.423006  543705 net.go:770] primary dev: ETH0
I0321 00:24:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:24:43.423041  543705 net.go:698] Add success.
I0321 00:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:24:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:24:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:24:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:24:53.409765  543705 memory.go:184] no items to output this cycle
I0321 00:24:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 00:25:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:25:03.409793  543705 memory.go:184] no items to output this cycle
I0321 00:25:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 00:25:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:25:13.409790  543705 memory.go:191] Add success.
W0321 00:25:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 00:25:13.409824  543705 cpu.go:282] Add success.
W0321 00:25:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:25:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:25:13.420144  543705 net.go:648] Add success.
I0321 00:25:13.422749  543705 net.go:770] primary dev: ETH0
I0321 00:25:13.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:25:13.422774  543705 net.go:698] Add success.
I0321 00:25:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:25:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:25:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 00:25:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:25:14.456621  543705 disk_worker.go:494] system disk:vda1
I0321 00:25:14.456658  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:25:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:25:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:25:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:25:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:25:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:25:23.409783  543705 memory.go:184] no items to output this cycle
I0321 00:25:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 00:25:26.833414  543705 disk_info.go:125] begin check local disk info of client
I0321 00:25:26.836184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:25:26.836199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9e00 0xc0001f9e40]
E0321 00:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:25:33.409782  543705 memory.go:184] no items to output this cycle
I0321 00:25:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:25:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:25:43.409789  543705 memory.go:191] Add success.
I0321 00:25:43.409803  543705 cpu.go:282] Add success.
I0321 00:25:43.419872  543705 net.go:648] Add success.
I0321 00:25:43.422525  543705 net.go:770] primary dev: ETH0
I0321 00:25:43.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:25:43.422552  543705 net.go:698] Add success.
I0321 00:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:25:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:25:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:25:53.409799  543705 memory.go:184] no items to output this cycle
I0321 00:25:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 00:26:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:26:03.409762  543705 memory.go:184] no items to output this cycle
I0321 00:26:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 00:26:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:26:13.409798  543705 memory.go:191] Add success.
I0321 00:26:13.409799  543705 cpu.go:282] Add success.
W0321 00:26:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:26:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:26:13.420178  543705 net.go:648] Add success.
I0321 00:26:13.423233  543705 net.go:770] primary dev: ETH0
I0321 00:26:13.423246  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:26:13.423271  543705 net.go:698] Add success.
I0321 00:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:26:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:26:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 00:26:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:26:14.456620  543705 disk_worker.go:494] system disk:vda1
I0321 00:26:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:26:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:26:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:26:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:26:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:26:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:26:23.409783  543705 memory.go:184] no items to output this cycle
I0321 00:26:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 00:26:26.837429  543705 disk_info.go:125] begin check local disk info of client
I0321 00:26:26.840018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:26:26.840025  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340380 0xc0003403c0]
E0321 00:26:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:26:33.409797  543705 memory.go:184] no items to output this cycle
I0321 00:26:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 00:26:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:26:43.409773  543705 memory.go:191] Add success.
I0321 00:26:43.409804  543705 cpu.go:282] Add success.
I0321 00:26:43.420103  543705 net.go:648] Add success.
I0321 00:26:43.422714  543705 net.go:770] primary dev: ETH0
I0321 00:26:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:26:43.422738  543705 net.go:698] Add success.
I0321 00:26:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:26:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:26:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:26:53.410346  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:26:53.410365  543705 memory.go:184] no items to output this cycle
I0321 00:26:53.410382  543705 cpu.go:275] no items to output this cycle
E0321 00:27:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:27:03.409768  543705 memory.go:184] no items to output this cycle
I0321 00:27:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 00:27:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:27:13.409798  543705 memory.go:191] Add success.
I0321 00:27:13.409800  543705 cpu.go:282] Add success.
W0321 00:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:27:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:27:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:27:13.420211  543705 net.go:648] Add success.
I0321 00:27:13.423134  543705 net.go:770] primary dev: ETH0
I0321 00:27:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:27:13.423167  543705 net.go:698] Add success.
I0321 00:27:13.429897  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 00:27:13.453073  543705 event_worker.go:152] Polling the log file for events...
I0321 00:27:13.469178  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"116f1dd6-1698-477d-b481-b04c529b67f3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:27:13.469210  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 00:27:14.455234  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:27:14.455249  543705 disk_worker.go:708] disk space is not compliant
W0321 00:27:14.455254  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:27:14.455868  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:27:14.455876  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:27:14.455881  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:27:14.456820  543705 disk_worker.go:494] system disk:vda1
I0321 00:27:14.456865  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:27:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:27:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:27:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:27:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:27:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:27:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:27:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:27:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:27:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 00:27:23.409789  543705 memory.go:184] no items to output this cycle
I0321 00:27:26.841393  543705 disk_info.go:125] begin check local disk info of client
I0321 00:27:26.843920  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:27:26.843926  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056dcc0 0xc00056dd00]
E0321 00:27:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:27:33.409802  543705 memory.go:184] no items to output this cycle
I0321 00:27:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 00:27:38.671361  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:27:38.671375  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:27:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:27:43.410591  543705 memory.go:191] Add success.
I0321 00:27:43.409816  543705 cpu.go:282] Add success.
I0321 00:27:43.420297  543705 net.go:648] Add success.
I0321 00:27:43.423027  543705 net.go:770] primary dev: ETH0
I0321 00:27:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:27:43.423061  543705 net.go:698] Add success.
I0321 00:27:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:27:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:27:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:27:53.409797  543705 memory.go:184] no items to output this cycle
I0321 00:27:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 00:28:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:28:03.409765  543705 memory.go:184] no items to output this cycle
I0321 00:28:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 00:28:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:28:13.409823  543705 memory.go:191] Add success.
I0321 00:28:13.409834  543705 cpu.go:282] Add success.
W0321 00:28:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:28:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:28:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:28:13.420252  543705 net.go:648] Add success.
I0321 00:28:13.423337  543705 net.go:770] primary dev: ETH0
I0321 00:28:13.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:28:13.423368  543705 net.go:698] Add success.
I0321 00:28:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:28:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:28:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 00:28:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:28:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 00:28:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:28:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:28:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:28:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:28:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:28:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:28:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:28:23.409784  543705 memory.go:184] no items to output this cycle
I0321 00:28:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 00:28:26.845424  543705 disk_info.go:125] begin check local disk info of client
I0321 00:28:26.847995  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:28:26.848002  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344700 0xc000344740]
E0321 00:28:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:28:33.409777  543705 memory.go:184] no items to output this cycle
I0321 00:28:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 00:28:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:28:43.409820  543705 memory.go:191] Add success.
I0321 00:28:43.409824  543705 cpu.go:282] Add success.
I0321 00:28:43.420121  543705 net.go:648] Add success.
I0321 00:28:43.422853  543705 net.go:770] primary dev: ETH0
I0321 00:28:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:28:43.422878  543705 net.go:698] Add success.
I0321 00:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:28:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:28:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:28:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:28:53.409801  543705 memory.go:184] no items to output this cycle
I0321 00:28:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 00:29:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:29:03.409795  543705 cpu.go:275] no items to output this cycle
I0321 00:29:03.409807  543705 memory.go:184] no items to output this cycle
E0321 00:29:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:29:13.409803  543705 memory.go:191] Add success.
I0321 00:29:13.409809  543705 cpu.go:282] Add success.
W0321 00:29:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:29:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:29:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:29:13.420059  543705 net.go:648] Add success.
I0321 00:29:13.422641  543705 net.go:770] primary dev: ETH0
I0321 00:29:13.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:29:13.422671  543705 net.go:698] Add success.
I0321 00:29:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:29:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:29:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 00:29:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:29:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 00:29:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:29:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:29:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:29:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:29:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:29:23.409775  543705 memory.go:184] no items to output this cycle
I0321 00:29:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 00:29:26.848084  543705 disk_info.go:125] begin check local disk info of client
I0321 00:29:26.850854  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:29:26.850860  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039cc00 0xc00039cc40]
E0321 00:29:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:29:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 00:29:33.409789  543705 memory.go:184] no items to output this cycle
E0321 00:29:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:29:43.409788  543705 memory.go:191] Add success.
I0321 00:29:43.409808  543705 cpu.go:282] Add success.
I0321 00:29:43.420002  543705 net.go:648] Add success.
I0321 00:29:43.423146  543705 net.go:770] primary dev: ETH0
I0321 00:29:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:29:43.423171  543705 net.go:698] Add success.
I0321 00:29:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:29:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:29:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:29:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:29:53.409774  543705 memory.go:184] no items to output this cycle
I0321 00:29:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 00:30:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:30:03.409791  543705 memory.go:184] no items to output this cycle
I0321 00:30:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 00:30:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:30:13.409804  543705 memory.go:191] Add success.
I0321 00:30:13.409805  543705 cpu.go:282] Add success.
W0321 00:30:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:30:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:30:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:30:13.420126  543705 net.go:648] Add success.
I0321 00:30:13.422957  543705 net.go:770] primary dev: ETH0
I0321 00:30:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:30:13.422986  543705 net.go:698] Add success.
I0321 00:30:13.468757  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf539d49-c227-4f4b-bce9-8c91aa10ff50","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:30:13.468797  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:30:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:30:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:30:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 00:30:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:30:14.456679  543705 disk_worker.go:494] system disk:vda1
I0321 00:30:14.456724  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:30:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:30:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:30:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:30:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:30:23.409788  543705 memory.go:184] no items to output this cycle
I0321 00:30:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 00:30:26.852416  543705 disk_info.go:125] begin check local disk info of client
I0321 00:30:26.855028  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:30:26.855034  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004994c0 0xc000499500]
E0321 00:30:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:30:33.409778  543705 memory.go:184] no items to output this cycle
I0321 00:30:33.409790  543705 cpu.go:275] no items to output this cycle
I0321 00:30:38.671520  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:30:38.671527  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:30:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:30:43.410633  543705 memory.go:191] Add success.
I0321 00:30:43.409800  543705 cpu.go:282] Add success.
I0321 00:30:43.419748  543705 net.go:648] Add success.
I0321 00:30:43.422440  543705 net.go:770] primary dev: ETH0
I0321 00:30:43.422455  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:30:43.422469  543705 net.go:698] Add success.
I0321 00:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:30:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:30:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:30:53.409774  543705 memory.go:184] no items to output this cycle
I0321 00:30:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 00:31:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:31:03.409781  543705 memory.go:184] no items to output this cycle
I0321 00:31:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 00:31:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:31:13.409843  543705 memory.go:191] Add success.
I0321 00:31:13.409853  543705 cpu.go:282] Add success.
W0321 00:31:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:31:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:31:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:31:13.420232  543705 net.go:648] Add success.
I0321 00:31:13.422935  543705 net.go:770] primary dev: ETH0
I0321 00:31:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:31:13.422964  543705 net.go:698] Add success.
I0321 00:31:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:31:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:31:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 00:31:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:31:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 00:31:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:31:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:31:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:31:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:31:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:31:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:31:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:31:23.409809  543705 memory.go:184] no items to output this cycle
I0321 00:31:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 00:31:26.856487  543705 disk_info.go:125] begin check local disk info of client
I0321 00:31:26.859089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:31:26.859096  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357100 0xc000357140]
E0321 00:31:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:31:33.409770  543705 memory.go:184] no items to output this cycle
I0321 00:31:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 00:31:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:31:43.409805  543705 memory.go:191] Add success.
I0321 00:31:43.409807  543705 cpu.go:282] Add success.
I0321 00:31:43.420338  543705 net.go:648] Add success.
I0321 00:31:43.422900  543705 net.go:770] primary dev: ETH0
I0321 00:31:43.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:31:43.422924  543705 net.go:698] Add success.
I0321 00:31:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:31:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:31:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:31:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:31:53.409812  543705 memory.go:184] no items to output this cycle
I0321 00:31:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 00:32:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:32:03.409798  543705 memory.go:184] no items to output this cycle
I0321 00:32:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 00:32:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:32:13.409831  543705 memory.go:191] Add success.
I0321 00:32:13.409835  543705 cpu.go:282] Add success.
W0321 00:32:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:32:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:32:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:32:13.420126  543705 net.go:770] primary dev: ETH0
I0321 00:32:13.420140  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:32:13.420152  543705 net.go:698] Add success.
I0321 00:32:13.420382  543705 net.go:648] Add success.
W0321 00:32:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:32:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 00:32:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:32:14.455895  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:32:14.455903  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:32:14.455909  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:32:14.456563  543705 disk_worker.go:494] system disk:vda1
I0321 00:32:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:32:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:32:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:32:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:32:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:32:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:32:16.457986  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:32:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:32:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:32:23.409790  543705 memory.go:184] no items to output this cycle
I0321 00:32:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 00:32:26.859184  543705 disk_info.go:125] begin check local disk info of client
I0321 00:32:26.861612  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:32:26.861619  543705 disk_info.go:196] parse disk info done, disk is : [0xc000345580 0xc0003455c0]
E0321 00:32:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:32:33.409810  543705 memory.go:184] no items to output this cycle
I0321 00:32:33.409828  543705 cpu.go:275] no items to output this cycle
E0321 00:32:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:32:43.409789  543705 memory.go:191] Add success.
I0321 00:32:43.409818  543705 cpu.go:282] Add success.
I0321 00:32:43.420201  543705 net.go:648] Add success.
I0321 00:32:43.422881  543705 net.go:770] primary dev: ETH0
I0321 00:32:43.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:32:43.422905  543705 net.go:698] Add success.
I0321 00:32:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:32:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:32:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:32:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:32:53.409761  543705 memory.go:184] no items to output this cycle
I0321 00:32:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 00:33:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:33:03.409796  543705 memory.go:184] no items to output this cycle
I0321 00:33:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 00:33:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:33:13.409802  543705 memory.go:191] Add success.
I0321 00:33:13.409807  543705 cpu.go:282] Add success.
W0321 00:33:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:33:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:33:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:33:13.420130  543705 net.go:648] Add success.
I0321 00:33:13.422895  543705 net.go:770] primary dev: ETH0
I0321 00:33:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:33:13.422923  543705 net.go:698] Add success.
I0321 00:33:13.470288  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c77ae8b7-b689-44ed-b91f-46d17e23358e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:33:13.470323  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:33:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:33:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:33:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 00:33:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:33:14.456505  543705 disk_worker.go:494] system disk:vda1
I0321 00:33:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:33:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:33:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:33:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:33:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:33:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:33:23.409811  543705 memory.go:184] no items to output this cycle
I0321 00:33:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 00:33:26.861676  543705 disk_info.go:125] begin check local disk info of client
I0321 00:33:26.864125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:33:26.864132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000345000 0xc000345040]
E0321 00:33:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:33:33.409796  543705 memory.go:184] no items to output this cycle
I0321 00:33:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 00:33:38.672056  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:33:38.672062  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:33:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:33:43.410558  543705 memory.go:191] Add success.
I0321 00:33:43.409810  543705 cpu.go:282] Add success.
I0321 00:33:43.420281  543705 net.go:648] Add success.
I0321 00:33:43.422853  543705 net.go:770] primary dev: ETH0
I0321 00:33:43.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:33:43.422888  543705 net.go:698] Add success.
I0321 00:33:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:33:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:33:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:33:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:33:53.409786  543705 memory.go:184] no items to output this cycle
I0321 00:33:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 00:34:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:34:03.409801  543705 memory.go:184] no items to output this cycle
I0321 00:34:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 00:34:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:34:13.409799  543705 memory.go:191] Add success.
I0321 00:34:13.409801  543705 cpu.go:282] Add success.
W0321 00:34:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:34:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:34:13.420114  543705 net.go:648] Add success.
I0321 00:34:13.423219  543705 net.go:770] primary dev: ETH0
I0321 00:34:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:34:13.423248  543705 net.go:698] Add success.
I0321 00:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:34:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:34:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0321 00:34:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:34:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 00:34:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:34:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:34:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:34:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:34:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:34:23.409815  543705 memory.go:184] no items to output this cycle
I0321 00:34:23.409825  543705 cpu.go:275] no items to output this cycle
I0321 00:34:26.865503  543705 disk_info.go:125] begin check local disk info of client
I0321 00:34:26.868021  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:34:26.868028  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473e00 0xc000473e40]
E0321 00:34:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:34:33.409781  543705 memory.go:184] no items to output this cycle
I0321 00:34:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 00:34:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:34:43.409792  543705 memory.go:191] Add success.
I0321 00:34:43.409803  543705 cpu.go:282] Add success.
I0321 00:34:43.419896  543705 net.go:648] Add success.
I0321 00:34:43.422528  543705 net.go:770] primary dev: ETH0
I0321 00:34:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:34:43.422554  543705 net.go:698] Add success.
I0321 00:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:34:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:34:53.410505  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:34:53.410531  543705 memory.go:184] no items to output this cycle
I0321 00:34:53.410578  543705 cpu.go:275] no items to output this cycle
E0321 00:35:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:35:03.409782  543705 memory.go:184] no items to output this cycle
I0321 00:35:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 00:35:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:35:13.409799  543705 memory.go:191] Add success.
I0321 00:35:13.409803  543705 cpu.go:282] Add success.
W0321 00:35:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:35:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:35:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:35:13.420064  543705 net.go:648] Add success.
I0321 00:35:13.422730  543705 net.go:770] primary dev: ETH0
I0321 00:35:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:35:13.422759  543705 net.go:698] Add success.
I0321 00:35:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:35:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:35:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 00:35:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:35:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 00:35:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:35:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:35:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:35:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:35:23.410379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:35:23.410396  543705 memory.go:184] no items to output this cycle
I0321 00:35:23.410405  543705 cpu.go:275] no items to output this cycle
I0321 00:35:26.869425  543705 disk_info.go:125] begin check local disk info of client
I0321 00:35:26.871927  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:35:26.871933  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b5980 0xc0002b59c0]
E0321 00:35:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:35:33.409798  543705 memory.go:184] no items to output this cycle
I0321 00:35:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 00:35:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:35:43.409815  543705 memory.go:191] Add success.
I0321 00:35:43.409822  543705 cpu.go:282] Add success.
I0321 00:35:43.419892  543705 net.go:648] Add success.
I0321 00:35:43.422635  543705 net.go:770] primary dev: ETH0
I0321 00:35:43.422649  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:35:43.422661  543705 net.go:698] Add success.
I0321 00:35:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:35:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:35:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:35:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:35:53.409854  543705 memory.go:184] no items to output this cycle
I0321 00:35:53.409927  543705 cpu.go:275] no items to output this cycle
E0321 00:36:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:36:03.409796  543705 memory.go:184] no items to output this cycle
I0321 00:36:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 00:36:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:36:13.409820  543705 memory.go:191] Add success.
I0321 00:36:13.409826  543705 cpu.go:282] Add success.
W0321 00:36:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:36:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:36:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:36:13.420204  543705 net.go:648] Add success.
I0321 00:36:13.422835  543705 net.go:770] primary dev: ETH0
I0321 00:36:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:36:13.422861  543705 net.go:698] Add success.
I0321 00:36:13.468277  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a07847d2-bdbc-4abd-b231-731487ab37fe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:36:13.468311  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:36:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:36:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:36:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 00:36:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:36:14.456639  543705 disk_worker.go:494] system disk:vda1
I0321 00:36:14.456670  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:36:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:36:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:36:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:36:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:36:23.409775  543705 memory.go:184] no items to output this cycle
I0321 00:36:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 00:36:26.873612  543705 disk_info.go:125] begin check local disk info of client
I0321 00:36:26.876246  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:36:26.876253  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342bc0 0xc000342c00]
E0321 00:36:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:36:33.409796  543705 memory.go:184] no items to output this cycle
I0321 00:36:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 00:36:38.672198  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:36:38.672205  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:36:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:36:43.410478  543705 memory.go:191] Add success.
I0321 00:36:43.409800  543705 cpu.go:282] Add success.
I0321 00:36:43.420157  543705 net.go:648] Add success.
I0321 00:36:43.422662  543705 net.go:770] primary dev: ETH0
I0321 00:36:43.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:36:43.422688  543705 net.go:698] Add success.
I0321 00:36:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:36:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:36:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:36:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:36:53.409872  543705 cpu.go:275] no items to output this cycle
I0321 00:36:53.409882  543705 memory.go:184] no items to output this cycle
E0321 00:37:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:37:03.409791  543705 memory.go:184] no items to output this cycle
I0321 00:37:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 00:37:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:37:13.409800  543705 memory.go:191] Add success.
I0321 00:37:13.409801  543705 cpu.go:282] Add success.
W0321 00:37:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:37:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:37:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:37:13.420138  543705 net.go:648] Add success.
I0321 00:37:13.423175  543705 net.go:770] primary dev: ETH0
I0321 00:37:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:37:13.423201  543705 net.go:698] Add success.
I0321 00:37:13.452787  543705 event_worker.go:152] Polling the log file for events...
W0321 00:37:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:37:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 00:37:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:37:14.456883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:37:14.456893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:37:14.456899  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:37:14.456973  543705 disk_worker.go:494] system disk:vda1
I0321 00:37:14.457016  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:37:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:37:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:37:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:37:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:37:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:37:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:37:16.472313  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:37:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:37:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:37:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 00:37:26.877583  543705 disk_info.go:125] begin check local disk info of client
I0321 00:37:26.880120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:37:26.880126  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343140 0xc000343180]
E0321 00:37:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:37:33.409771  543705 memory.go:184] no items to output this cycle
I0321 00:37:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 00:37:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:37:43.409806  543705 memory.go:191] Add success.
I0321 00:37:43.409815  543705 cpu.go:282] Add success.
I0321 00:37:43.420007  543705 net.go:648] Add success.
I0321 00:37:43.422550  543705 net.go:770] primary dev: ETH0
I0321 00:37:43.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:37:43.422576  543705 net.go:698] Add success.
I0321 00:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:37:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:37:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:37:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:37:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 00:37:53.409777  543705 memory.go:184] no items to output this cycle
E0321 00:38:03.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:38:03.409903  543705 memory.go:184] no items to output this cycle
I0321 00:38:03.409958  543705 cpu.go:275] no items to output this cycle
E0321 00:38:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:38:13.409793  543705 memory.go:191] Add success.
I0321 00:38:13.409817  543705 cpu.go:282] Add success.
W0321 00:38:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:38:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:38:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:38:13.420169  543705 net.go:648] Add success.
I0321 00:38:13.423159  543705 net.go:770] primary dev: ETH0
I0321 00:38:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:38:13.423188  543705 net.go:698] Add success.
I0321 00:38:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:38:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:38:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 00:38:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:38:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 00:38:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:38:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:38:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:38:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:38:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:38:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:38:23.409773  543705 memory.go:184] no items to output this cycle
I0321 00:38:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 00:38:26.881656  543705 disk_info.go:125] begin check local disk info of client
I0321 00:38:26.884447  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:38:26.884454  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bec0 0xc00007bf00]
E0321 00:38:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:38:33.409764  543705 memory.go:184] no items to output this cycle
I0321 00:38:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 00:38:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:38:43.409780  543705 memory.go:191] Add success.
I0321 00:38:43.409792  543705 cpu.go:282] Add success.
I0321 00:38:43.419855  543705 net.go:648] Add success.
I0321 00:38:43.421004  543705 net.go:770] primary dev: ETH0
I0321 00:38:43.421017  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:38:43.421029  543705 net.go:698] Add success.
I0321 00:38:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:38:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:38:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:38:53.409799  543705 memory.go:184] no items to output this cycle
I0321 00:38:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 00:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:39:03.409779  543705 memory.go:184] no items to output this cycle
I0321 00:39:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 00:39:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:39:13.409804  543705 memory.go:191] Add success.
I0321 00:39:13.409806  543705 cpu.go:282] Add success.
W0321 00:39:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:39:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:39:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:39:13.420185  543705 net.go:648] Add success.
I0321 00:39:13.423250  543705 net.go:770] primary dev: ETH0
I0321 00:39:13.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:39:13.423280  543705 net.go:698] Add success.
I0321 00:39:13.556113  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf3fcf07-f1fb-4582-8f14-1cd6c8c7754f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:39:13.556146  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:39:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:39:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:39:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 00:39:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:39:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 00:39:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:39:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:39:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:39:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:39:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:39:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:39:23.409780  543705 memory.go:184] no items to output this cycle
I0321 00:39:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 00:39:26.885626  543705 disk_info.go:125] begin check local disk info of client
I0321 00:39:26.888190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:39:26.888196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4640 0xc0000c4680]
E0321 00:39:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:39:33.409792  543705 memory.go:184] no items to output this cycle
I0321 00:39:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 00:39:38.673064  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:39:38.673071  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:39:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:39:43.410632  543705 memory.go:191] Add success.
I0321 00:39:43.409819  543705 cpu.go:282] Add success.
I0321 00:39:43.420311  543705 net.go:648] Add success.
I0321 00:39:43.423157  543705 net.go:770] primary dev: ETH0
I0321 00:39:43.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:39:43.423183  543705 net.go:698] Add success.
I0321 00:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:39:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:39:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:39:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:39:53.410270  543705 memory.go:184] no items to output this cycle
I0321 00:39:53.410282  543705 cpu.go:275] no items to output this cycle
E0321 00:40:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:40:03.409783  543705 memory.go:184] no items to output this cycle
I0321 00:40:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 00:40:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:40:13.409799  543705 memory.go:191] Add success.
I0321 00:40:13.409815  543705 cpu.go:282] Add success.
W0321 00:40:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:40:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:40:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:40:13.420193  543705 net.go:648] Add success.
I0321 00:40:13.422940  543705 net.go:770] primary dev: ETH0
I0321 00:40:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:40:13.422966  543705 net.go:698] Add success.
I0321 00:40:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:40:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:40:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0321 00:40:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:40:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 00:40:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:40:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:40:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:40:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:40:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:40:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:40:23.409790  543705 memory.go:184] no items to output this cycle
I0321 00:40:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 00:40:26.889674  543705 disk_info.go:125] begin check local disk info of client
I0321 00:40:26.892255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:40:26.892262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5040 0xc0000c5080]
E0321 00:40:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:40:33.409794  543705 memory.go:184] no items to output this cycle
I0321 00:40:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 00:40:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:40:43.409811  543705 memory.go:191] Add success.
I0321 00:40:43.409820  543705 cpu.go:282] Add success.
I0321 00:40:43.420041  543705 net.go:648] Add success.
I0321 00:40:43.422699  543705 net.go:770] primary dev: ETH0
I0321 00:40:43.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:40:43.422725  543705 net.go:698] Add success.
I0321 00:40:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:40:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:40:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:40:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:40:53.409772  543705 memory.go:184] no items to output this cycle
I0321 00:40:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 00:41:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:41:03.409780  543705 memory.go:184] no items to output this cycle
I0321 00:41:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 00:41:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:41:13.409821  543705 memory.go:191] Add success.
I0321 00:41:13.409827  543705 cpu.go:282] Add success.
W0321 00:41:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:41:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:41:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:41:13.419712  543705 net.go:648] Add success.
I0321 00:41:13.422346  543705 net.go:770] primary dev: ETH0
I0321 00:41:13.422359  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:41:13.422371  543705 net.go:698] Add success.
I0321 00:41:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:41:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:41:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 00:41:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:41:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 00:41:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:41:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:41:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:41:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:41:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:41:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:41:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:41:23.409785  543705 memory.go:184] no items to output this cycle
I0321 00:41:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 00:41:26.893674  543705 disk_info.go:125] begin check local disk info of client
I0321 00:41:26.896193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:41:26.896199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c41c0 0xc0000c4200]
E0321 00:41:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:41:33.409764  543705 memory.go:184] no items to output this cycle
I0321 00:41:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 00:41:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:41:43.409786  543705 memory.go:191] Add success.
I0321 00:41:43.409791  543705 cpu.go:282] Add success.
I0321 00:41:43.419829  543705 net.go:770] primary dev: ETH0
I0321 00:41:43.419842  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:41:43.419855  543705 net.go:698] Add success.
I0321 00:41:43.420184  543705 net.go:648] Add success.
I0321 00:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:41:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:41:53.409779  543705 memory.go:184] no items to output this cycle
I0321 00:41:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 00:42:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:42:03.409762  543705 memory.go:184] no items to output this cycle
I0321 00:42:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 00:42:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:42:13.409800  543705 memory.go:191] Add success.
I0321 00:42:13.409806  543705 cpu.go:282] Add success.
W0321 00:42:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:42:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:42:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:42:13.420340  543705 net.go:648] Add success.
I0321 00:42:13.423105  543705 net.go:770] primary dev: ETH0
I0321 00:42:13.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:42:13.423133  543705 net.go:698] Add success.
I0321 00:42:13.469922  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5f547862-e453-4584-929e-eca29fb80bda","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:42:13.469952  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 00:42:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:42:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 00:42:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:42:14.455914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:42:14.455922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:42:14.455927  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:42:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 00:42:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:42:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:42:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:42:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:42:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:42:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:42:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:42:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:42:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:42:23.409807  543705 memory.go:184] no items to output this cycle
I0321 00:42:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 00:42:26.897673  543705 disk_info.go:125] begin check local disk info of client
I0321 00:42:26.900197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:42:26.900204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6ec0 0xc0003e6f00]
E0321 00:42:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:42:33.409766  543705 memory.go:184] no items to output this cycle
I0321 00:42:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 00:42:38.673744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:42:38.673750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:42:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:42:43.410772  543705 memory.go:191] Add success.
I0321 00:42:43.409793  543705 cpu.go:282] Add success.
I0321 00:42:43.420473  543705 net.go:648] Add success.
I0321 00:42:43.423757  543705 net.go:770] primary dev: ETH0
I0321 00:42:43.423771  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:42:43.423785  543705 net.go:698] Add success.
I0321 00:42:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:42:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:42:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:42:53.409782  543705 memory.go:184] no items to output this cycle
I0321 00:42:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:43:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:43:03.409801  543705 memory.go:184] no items to output this cycle
I0321 00:43:03.409813  543705 cpu.go:275] no items to output this cycle
W0321 00:43:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:43:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:43:13.409728  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 00:43:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:43:13.409819  543705 memory.go:191] Add success.
I0321 00:43:13.409829  543705 cpu.go:282] Add success.
I0321 00:43:13.419713  543705 net.go:648] Add success.
I0321 00:43:13.422364  543705 net.go:770] primary dev: ETH0
I0321 00:43:13.422378  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:43:13.422392  543705 net.go:698] Add success.
I0321 00:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:43:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:43:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 00:43:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:43:14.456609  543705 disk_worker.go:494] system disk:vda1
I0321 00:43:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:43:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:43:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:43:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:43:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:43:23.409802  543705 memory.go:184] no items to output this cycle
I0321 00:43:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 00:43:26.901672  543705 disk_info.go:125] begin check local disk info of client
I0321 00:43:26.904442  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:43:26.904449  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377b00 0xc000377b40]
E0321 00:43:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:43:33.409771  543705 memory.go:184] no items to output this cycle
I0321 00:43:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 00:43:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:43:43.409806  543705 memory.go:191] Add success.
I0321 00:43:43.409816  543705 cpu.go:282] Add success.
I0321 00:43:43.419851  543705 net.go:648] Add success.
I0321 00:43:43.422531  543705 net.go:770] primary dev: ETH0
I0321 00:43:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:43:43.422557  543705 net.go:698] Add success.
I0321 00:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:43:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:43:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:43:53.410361  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:43:53.410382  543705 memory.go:184] no items to output this cycle
I0321 00:43:53.410391  543705 cpu.go:275] no items to output this cycle
E0321 00:44:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:44:03.409782  543705 memory.go:184] no items to output this cycle
I0321 00:44:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:44:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:44:13.409795  543705 memory.go:191] Add success.
I0321 00:44:13.409796  543705 cpu.go:282] Add success.
W0321 00:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:44:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:44:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:44:13.420320  543705 net.go:648] Add success.
I0321 00:44:13.422918  543705 net.go:770] primary dev: ETH0
I0321 00:44:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:44:13.422943  543705 net.go:698] Add success.
I0321 00:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:44:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:44:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 00:44:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:44:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 00:44:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:44:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:44:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:44:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:44:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:44:23.409781  543705 memory.go:184] no items to output this cycle
I0321 00:44:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 00:44:26.905673  543705 disk_info.go:125] begin check local disk info of client
I0321 00:44:26.908207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:44:26.908213  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056c480 0xc00056c4c0]
E0321 00:44:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:44:33.409797  543705 memory.go:184] no items to output this cycle
I0321 00:44:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 00:44:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:44:43.409779  543705 memory.go:191] Add success.
I0321 00:44:43.409801  543705 cpu.go:282] Add success.
I0321 00:44:43.419869  543705 net.go:648] Add success.
I0321 00:44:43.422311  543705 net.go:770] primary dev: ETH0
I0321 00:44:43.422330  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:44:43.422346  543705 net.go:698] Add success.
I0321 00:44:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:44:53.410334  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:44:53.410343  543705 cpu.go:275] no items to output this cycle
I0321 00:44:53.410347  543705 memory.go:184] no items to output this cycle
E0321 00:45:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:45:03.409772  543705 memory.go:184] no items to output this cycle
I0321 00:45:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 00:45:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:45:13.409795  543705 memory.go:191] Add success.
I0321 00:45:13.409810  543705 cpu.go:282] Add success.
W0321 00:45:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:45:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:45:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:45:13.420056  543705 net.go:648] Add success.
I0321 00:45:13.422519  543705 net.go:770] primary dev: ETH0
I0321 00:45:13.422533  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:45:13.422544  543705 net.go:698] Add success.
I0321 00:45:13.463628  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1d7f611a-d127-418b-b150-32546d22fc61","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:45:13.463665  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:45:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:45:14.455364  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:45:14.455382  543705 disk_worker.go:708] disk space is not compliant
W0321 00:45:14.455386  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:45:14.457040  543705 disk_worker.go:494] system disk:vda1
I0321 00:45:14.457070  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:45:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:45:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:45:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:45:23.410281  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:45:23.410300  543705 memory.go:184] no items to output this cycle
I0321 00:45:23.410303  543705 cpu.go:275] no items to output this cycle
I0321 00:45:26.909678  543705 disk_info.go:125] begin check local disk info of client
I0321 00:45:26.912404  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:45:26.912411  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005492c0 0xc000549300]
E0321 00:45:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:45:33.409761  543705 memory.go:184] no items to output this cycle
I0321 00:45:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 00:45:38.675064  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:45:38.675071  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:45:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:45:43.410619  543705 memory.go:191] Add success.
I0321 00:45:43.409814  543705 cpu.go:282] Add success.
I0321 00:45:43.420328  543705 net.go:648] Add success.
I0321 00:45:43.422938  543705 net.go:770] primary dev: ETH0
I0321 00:45:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:45:43.422966  543705 net.go:698] Add success.
I0321 00:45:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:45:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:45:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:45:53.409790  543705 memory.go:184] no items to output this cycle
I0321 00:45:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 00:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:46:03.409781  543705 memory.go:184] no items to output this cycle
I0321 00:46:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:46:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:46:13.409821  543705 memory.go:191] Add success.
I0321 00:46:13.409831  543705 cpu.go:282] Add success.
W0321 00:46:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:46:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:46:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:46:13.420268  543705 net.go:648] Add success.
I0321 00:46:13.422769  543705 net.go:770] primary dev: ETH0
I0321 00:46:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:46:13.422799  543705 net.go:698] Add success.
I0321 00:46:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:46:14.455435  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:46:14.455450  543705 disk_worker.go:708] disk space is not compliant
W0321 00:46:14.455459  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:46:14.457078  543705 disk_worker.go:494] system disk:vda1
I0321 00:46:14.457106  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:46:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:46:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:46:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:46:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:46:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:46:23.409780  543705 memory.go:184] no items to output this cycle
I0321 00:46:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 00:46:26.913677  543705 disk_info.go:125] begin check local disk info of client
I0321 00:46:26.916202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:46:26.916208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb00 0xc00007bb40]
E0321 00:46:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:46:33.409790  543705 memory.go:184] no items to output this cycle
I0321 00:46:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 00:46:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:46:43.409774  543705 memory.go:191] Add success.
I0321 00:46:43.409799  543705 cpu.go:282] Add success.
I0321 00:46:43.419854  543705 net.go:648] Add success.
I0321 00:46:43.422527  543705 net.go:770] primary dev: ETH0
I0321 00:46:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:46:43.422564  543705 net.go:698] Add success.
I0321 00:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:46:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:46:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:46:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:46:53.409765  543705 memory.go:184] no items to output this cycle
I0321 00:46:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 00:47:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:47:03.409780  543705 memory.go:184] no items to output this cycle
I0321 00:47:03.409779  543705 cpu.go:275] no items to output this cycle
E0321 00:47:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:47:13.409821  543705 memory.go:191] Add success.
I0321 00:47:13.409828  543705 cpu.go:282] Add success.
W0321 00:47:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:47:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:47:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:47:13.420208  543705 net.go:648] Add success.
I0321 00:47:13.423260  543705 net.go:770] primary dev: ETH0
I0321 00:47:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:47:13.423290  543705 net.go:698] Add success.
I0321 00:47:13.452804  543705 event_worker.go:152] Polling the log file for events...
W0321 00:47:14.455307  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:47:14.455451  543705 disk_worker.go:708] disk space is not compliant
W0321 00:47:14.455456  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:47:14.457493  543705 disk_worker.go:494] system disk:vda1
E0321 00:47:14.457567  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:47:14.457577  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:47:14.457584  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:47:14.457624  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:47:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:47:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:47:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:47:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:47:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:47:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:47:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:47:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:47:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 00:47:23.409790  543705 memory.go:184] no items to output this cycle
I0321 00:47:26.917674  543705 disk_info.go:125] begin check local disk info of client
I0321 00:47:26.920168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:47:26.920174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e69c0 0xc0003e6a00]
E0321 00:47:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:47:33.409765  543705 memory.go:184] no items to output this cycle
I0321 00:47:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 00:47:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:47:43.409790  543705 memory.go:191] Add success.
I0321 00:47:43.409792  543705 cpu.go:282] Add success.
I0321 00:47:43.419902  543705 net.go:648] Add success.
I0321 00:47:43.422616  543705 net.go:770] primary dev: ETH0
I0321 00:47:43.422640  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:47:43.422652  543705 net.go:698] Add success.
I0321 00:47:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:47:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:47:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:47:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:47:53.409792  543705 memory.go:184] no items to output this cycle
I0321 00:47:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 00:48:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:48:03.409786  543705 memory.go:184] no items to output this cycle
I0321 00:48:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 00:48:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:48:13.409833  543705 memory.go:191] Add success.
I0321 00:48:13.409838  543705 cpu.go:282] Add success.
W0321 00:48:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:48:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:48:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:48:13.420126  543705 net.go:648] Add success.
I0321 00:48:13.423267  543705 net.go:770] primary dev: ETH0
I0321 00:48:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:48:13.423292  543705 net.go:698] Add success.
I0321 00:48:13.474485  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f5fb8e5b-a436-4723-af1c-93941a311952","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:48:13.474520  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:48:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:48:14.455392  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:48:14.455406  543705 disk_worker.go:708] disk space is not compliant
W0321 00:48:14.455554  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:48:14.457035  543705 disk_worker.go:494] system disk:vda1
I0321 00:48:14.457063  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:48:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:48:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:48:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:48:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:48:23.409777  543705 memory.go:184] no items to output this cycle
I0321 00:48:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 00:48:26.921672  543705 disk_info.go:125] begin check local disk info of client
I0321 00:48:26.924234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:48:26.924242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab7c0 0xc0001ab800]
E0321 00:48:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:48:33.409791  543705 memory.go:184] no items to output this cycle
I0321 00:48:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 00:48:38.675211  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:48:38.675217  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:48:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:48:43.410668  543705 memory.go:191] Add success.
I0321 00:48:43.409799  543705 cpu.go:282] Add success.
I0321 00:48:43.420383  543705 net.go:648] Add success.
I0321 00:48:43.423339  543705 net.go:770] primary dev: ETH0
I0321 00:48:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:48:43.423364  543705 net.go:698] Add success.
I0321 00:48:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:48:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:48:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:48:53.409767  543705 memory.go:184] no items to output this cycle
I0321 00:48:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 00:49:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:49:03.409765  543705 memory.go:184] no items to output this cycle
I0321 00:49:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 00:49:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:49:13.409791  543705 memory.go:191] Add success.
I0321 00:49:13.409797  543705 cpu.go:282] Add success.
W0321 00:49:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:49:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:49:13.420045  543705 net.go:648] Add success.
I0321 00:49:13.422855  543705 net.go:770] primary dev: ETH0
I0321 00:49:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:49:13.422881  543705 net.go:698] Add success.
I0321 00:49:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:49:14.455370  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:49:14.455493  543705 disk_worker.go:708] disk space is not compliant
W0321 00:49:14.455498  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:49:14.457178  543705 disk_worker.go:494] system disk:vda1
I0321 00:49:14.457207  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:49:15.455947  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:49:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:49:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:49:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:49:23.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:49:23.410374  543705 cpu.go:275] no items to output this cycle
I0321 00:49:23.410382  543705 memory.go:184] no items to output this cycle
I0321 00:49:26.925671  543705 disk_info.go:125] begin check local disk info of client
I0321 00:49:26.928136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:49:26.928142  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1880 0xc0003b18c0]
E0321 00:49:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:49:33.409773  543705 memory.go:184] no items to output this cycle
I0321 00:49:33.409776  543705 cpu.go:275] no items to output this cycle
E0321 00:49:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:49:43.409790  543705 memory.go:191] Add success.
I0321 00:49:43.409792  543705 cpu.go:282] Add success.
I0321 00:49:43.419826  543705 net.go:648] Add success.
I0321 00:49:43.422865  543705 net.go:770] primary dev: ETH0
I0321 00:49:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:49:43.422891  543705 net.go:698] Add success.
I0321 00:49:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:49:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:49:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:49:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:49:53.409768  543705 memory.go:184] no items to output this cycle
I0321 00:49:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 00:50:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:50:03.409761  543705 memory.go:184] no items to output this cycle
I0321 00:50:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 00:50:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:50:13.409799  543705 memory.go:191] Add success.
I0321 00:50:13.409803  543705 cpu.go:282] Add success.
W0321 00:50:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:50:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:50:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:50:13.420049  543705 net.go:648] Add success.
I0321 00:50:13.422806  543705 net.go:770] primary dev: ETH0
I0321 00:50:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:50:13.422841  543705 net.go:698] Add success.
I0321 00:50:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:50:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:50:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 00:50:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:50:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 00:50:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:50:15.456015  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:50:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:50:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:50:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:50:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:50:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:50:23.409813  543705 memory.go:184] no items to output this cycle
I0321 00:50:23.409825  543705 cpu.go:275] no items to output this cycle
I0321 00:50:26.929678  543705 disk_info.go:125] begin check local disk info of client
I0321 00:50:26.932252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:50:26.932259  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4180 0xc0000c41c0]
E0321 00:50:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:50:33.409794  543705 memory.go:184] no items to output this cycle
I0321 00:50:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 00:50:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:50:43.409789  543705 memory.go:191] Add success.
I0321 00:50:43.409789  543705 cpu.go:282] Add success.
I0321 00:50:43.420034  543705 net.go:648] Add success.
I0321 00:50:43.422797  543705 net.go:770] primary dev: ETH0
I0321 00:50:43.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:50:43.422827  543705 net.go:698] Add success.
I0321 00:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:50:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:50:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:50:53.410342  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:50:53.410357  543705 memory.go:184] no items to output this cycle
I0321 00:50:53.410383  543705 cpu.go:275] no items to output this cycle
E0321 00:51:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:51:03.409775  543705 memory.go:184] no items to output this cycle
I0321 00:51:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 00:51:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:51:13.409819  543705 memory.go:191] Add success.
I0321 00:51:13.409823  543705 cpu.go:282] Add success.
W0321 00:51:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:51:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:51:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:51:13.420170  543705 net.go:648] Add success.
I0321 00:51:13.422836  543705 net.go:770] primary dev: ETH0
I0321 00:51:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:51:13.422863  543705 net.go:698] Add success.
I0321 00:51:13.468956  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8db016b3-607d-4761-8021-c9796ff5c5bf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:51:13.468988  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:51:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:51:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 00:51:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:51:14.456963  543705 disk_worker.go:494] system disk:vda1
I0321 00:51:14.456994  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:51:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:51:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:51:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:51:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:51:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:51:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:51:23.409783  543705 memory.go:184] no items to output this cycle
I0321 00:51:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 00:51:26.933672  543705 disk_info.go:125] begin check local disk info of client
I0321 00:51:26.936124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:51:26.936130  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484940 0xc000484980]
E0321 00:51:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:51:33.409765  543705 memory.go:184] no items to output this cycle
I0321 00:51:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 00:51:38.676084  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:51:38.676090  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:51:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:51:43.410756  543705 memory.go:191] Add success.
I0321 00:51:43.409847  543705 cpu.go:282] Add success.
I0321 00:51:43.420543  543705 net.go:648] Add success.
I0321 00:51:43.423280  543705 net.go:770] primary dev: ETH0
I0321 00:51:43.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:51:43.423315  543705 net.go:698] Add success.
I0321 00:51:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:51:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:51:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:51:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:51:53.409764  543705 memory.go:184] no items to output this cycle
I0321 00:51:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 00:52:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:52:03.409793  543705 memory.go:184] no items to output this cycle
I0321 00:52:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 00:52:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:52:13.409787  543705 memory.go:191] Add success.
I0321 00:52:13.409808  543705 cpu.go:282] Add success.
W0321 00:52:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:52:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:52:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:52:13.420135  543705 net.go:648] Add success.
I0321 00:52:13.422736  543705 net.go:770] primary dev: ETH0
I0321 00:52:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:52:13.422763  543705 net.go:698] Add success.
W0321 00:52:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:52:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 00:52:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:52:14.456811  543705 disk_worker.go:494] system disk:vda1
I0321 00:52:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:52:14.457663  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:52:14.457671  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:52:14.457676  543705 custom_config.go:64] query custom config with name: gpu
E0321 00:52:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:52:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:52:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:52:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:52:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:52:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:52:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:52:23.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:52:23.410396  543705 memory.go:184] no items to output this cycle
I0321 00:52:23.410400  543705 cpu.go:275] no items to output this cycle
I0321 00:52:26.937685  543705 disk_info.go:125] begin check local disk info of client
I0321 00:52:26.940210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:52:26.940216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002afb80 0xc0002afbc0]
E0321 00:52:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:52:33.409802  543705 memory.go:184] no items to output this cycle
I0321 00:52:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 00:52:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:52:43.409801  543705 memory.go:191] Add success.
I0321 00:52:43.409802  543705 cpu.go:282] Add success.
I0321 00:52:43.419949  543705 net.go:648] Add success.
I0321 00:52:43.422850  543705 net.go:770] primary dev: ETH0
I0321 00:52:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:52:43.422879  543705 net.go:698] Add success.
I0321 00:52:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:52:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:52:53.409777  543705 memory.go:184] no items to output this cycle
I0321 00:52:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 00:53:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:53:03.409803  543705 memory.go:184] no items to output this cycle
I0321 00:53:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 00:53:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:53:13.409806  543705 memory.go:191] Add success.
I0321 00:53:13.409807  543705 cpu.go:282] Add success.
W0321 00:53:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:53:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:53:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:53:13.420135  543705 net.go:648] Add success.
I0321 00:53:13.422764  543705 net.go:770] primary dev: ETH0
I0321 00:53:13.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:53:13.422791  543705 net.go:698] Add success.
I0321 00:53:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:53:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:53:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 00:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:53:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 00:53:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:53:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:53:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:53:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:53:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:53:23.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:53:23.409878  543705 memory.go:184] no items to output this cycle
I0321 00:53:23.409948  543705 cpu.go:275] no items to output this cycle
I0321 00:53:26.941671  543705 disk_info.go:125] begin check local disk info of client
I0321 00:53:26.944161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:53:26.944167  543705 disk_info.go:196] parse disk info done, disk is : [0xc000321c00 0xc000321c40]
E0321 00:53:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:53:33.409774  543705 memory.go:184] no items to output this cycle
I0321 00:53:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 00:53:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:53:43.409795  543705 memory.go:191] Add success.
I0321 00:53:43.409816  543705 cpu.go:282] Add success.
I0321 00:53:43.420034  543705 net.go:648] Add success.
I0321 00:53:43.422576  543705 net.go:770] primary dev: ETH0
I0321 00:53:43.422590  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:53:43.422603  543705 net.go:698] Add success.
I0321 00:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:53:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:53:53.409796  543705 memory.go:184] no items to output this cycle
I0321 00:53:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 00:54:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:54:03.409777  543705 memory.go:184] no items to output this cycle
I0321 00:54:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 00:54:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:54:13.409798  543705 memory.go:191] Add success.
I0321 00:54:13.409824  543705 cpu.go:282] Add success.
W0321 00:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:54:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:54:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:54:13.420263  543705 net.go:648] Add success.
I0321 00:54:13.423121  543705 net.go:770] primary dev: ETH0
I0321 00:54:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:54:13.423150  543705 net.go:698] Add success.
I0321 00:54:13.463050  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ed66baa5-03b1-48b8-82ec-219510f6e62c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:54:13.463082  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 00:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:54:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:54:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 00:54:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:54:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 00:54:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:54:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:54:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:54:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:54:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:54:23.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:54:23.409911  543705 cpu.go:275] no items to output this cycle
I0321 00:54:23.409912  543705 memory.go:184] no items to output this cycle
I0321 00:54:26.945668  543705 disk_info.go:125] begin check local disk info of client
I0321 00:54:26.948245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:54:26.948252  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 00:54:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:54:33.409802  543705 memory.go:184] no items to output this cycle
I0321 00:54:33.409818  543705 cpu.go:275] no items to output this cycle
I0321 00:54:38.676231  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:54:38.676237  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:54:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:54:43.410525  543705 memory.go:191] Add success.
I0321 00:54:43.409810  543705 cpu.go:282] Add success.
I0321 00:54:43.420279  543705 net.go:648] Add success.
I0321 00:54:43.422931  543705 net.go:770] primary dev: ETH0
I0321 00:54:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:54:43.422960  543705 net.go:698] Add success.
I0321 00:54:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:54:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:54:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:54:53.410264  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:54:53.410282  543705 memory.go:184] no items to output this cycle
I0321 00:54:53.410283  543705 cpu.go:275] no items to output this cycle
E0321 00:55:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:55:03.409771  543705 memory.go:184] no items to output this cycle
I0321 00:55:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 00:55:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:55:13.409796  543705 memory.go:191] Add success.
I0321 00:55:13.409810  543705 cpu.go:282] Add success.
W0321 00:55:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:55:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:55:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:55:13.420060  543705 net.go:648] Add success.
I0321 00:55:13.422732  543705 net.go:770] primary dev: ETH0
I0321 00:55:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:55:13.422758  543705 net.go:698] Add success.
I0321 00:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:55:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:55:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 00:55:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:55:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 00:55:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:55:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:55:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:55:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:55:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:55:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:55:23.409802  543705 memory.go:184] no items to output this cycle
I0321 00:55:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 00:55:26.949673  543705 disk_info.go:125] begin check local disk info of client
I0321 00:55:26.952225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:55:26.952231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c8c00 0xc0004c8c40]
E0321 00:55:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:55:33.409770  543705 memory.go:184] no items to output this cycle
I0321 00:55:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 00:55:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:55:43.409774  543705 memory.go:191] Add success.
I0321 00:55:43.409787  543705 cpu.go:282] Add success.
I0321 00:55:43.420025  543705 net.go:648] Add success.
I0321 00:55:43.420998  543705 net.go:770] primary dev: ETH0
I0321 00:55:43.421012  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:55:43.421023  543705 net.go:698] Add success.
I0321 00:55:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:55:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:55:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:55:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:55:53.409769  543705 memory.go:184] no items to output this cycle
I0321 00:55:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 00:56:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:56:03.409768  543705 memory.go:184] no items to output this cycle
I0321 00:56:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 00:56:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:56:13.409818  543705 memory.go:191] Add success.
I0321 00:56:13.409824  543705 cpu.go:282] Add success.
W0321 00:56:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:56:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:56:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:56:13.420130  543705 net.go:648] Add success.
I0321 00:56:13.422793  543705 net.go:770] primary dev: ETH0
I0321 00:56:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:56:13.422823  543705 net.go:698] Add success.
I0321 00:56:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:56:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:56:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0321 00:56:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:56:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 00:56:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:56:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:56:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:56:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:56:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:56:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:56:23.410394  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:56:23.410414  543705 memory.go:184] no items to output this cycle
I0321 00:56:23.410418  543705 cpu.go:275] no items to output this cycle
I0321 00:56:26.953666  543705 disk_info.go:125] begin check local disk info of client
I0321 00:56:26.956198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:56:26.956205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a48c0 0xc0004a4900]
E0321 00:56:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:56:33.409766  543705 memory.go:184] no items to output this cycle
I0321 00:56:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 00:56:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:56:43.409816  543705 memory.go:191] Add success.
I0321 00:56:43.409824  543705 cpu.go:282] Add success.
I0321 00:56:43.420073  543705 net.go:648] Add success.
I0321 00:56:43.422778  543705 net.go:770] primary dev: ETH0
I0321 00:56:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:56:43.422801  543705 net.go:698] Add success.
I0321 00:56:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:56:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:56:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:56:53.409794  543705 memory.go:184] no items to output this cycle
I0321 00:56:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 00:57:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:57:03.409777  543705 memory.go:184] no items to output this cycle
I0321 00:57:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 00:57:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:57:13.409821  543705 memory.go:191] Add success.
I0321 00:57:13.409827  543705 cpu.go:282] Add success.
W0321 00:57:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:57:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:57:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:57:13.420211  543705 net.go:648] Add success.
I0321 00:57:13.422923  543705 net.go:770] primary dev: ETH0
I0321 00:57:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:57:13.422953  543705 net.go:698] Add success.
I0321 00:57:13.429219  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 00:57:13.453393  543705 event_worker.go:152] Polling the log file for events...
I0321 00:57:13.463713  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e69da210-6d83-49f0-a382-6e9dd2cb686f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 00:57:13.463748  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 00:57:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:57:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 00:57:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0321 00:57:14.456954  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 00:57:14.456963  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 00:57:14.456981  543705 custom_config.go:64] query custom config with name: gpu
I0321 00:57:14.457030  543705 disk_worker.go:494] system disk:vda1
I0321 00:57:14.457083  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 00:57:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 00:57:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:57:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 00:57:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 00:57:16.458011  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:57:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:57:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:57:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:57:23.409806  543705 memory.go:184] no items to output this cycle
I0321 00:57:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 00:57:26.957673  543705 disk_info.go:125] begin check local disk info of client
I0321 00:57:26.960177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:57:26.960183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fed80 0xc0003fedc0]
E0321 00:57:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:57:33.409766  543705 memory.go:184] no items to output this cycle
I0321 00:57:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 00:57:38.677089  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 00:57:38.677095  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 00:57:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:57:43.410538  543705 memory.go:191] Add success.
I0321 00:57:43.409818  543705 cpu.go:282] Add success.
I0321 00:57:43.420299  543705 net.go:648] Add success.
I0321 00:57:43.422967  543705 net.go:770] primary dev: ETH0
I0321 00:57:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:57:43.422991  543705 net.go:698] Add success.
I0321 00:57:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:57:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:57:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:57:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:57:53.409795  543705 memory.go:184] no items to output this cycle
I0321 00:57:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 00:58:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:58:03.409770  543705 memory.go:184] no items to output this cycle
I0321 00:58:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 00:58:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:58:13.409793  543705 memory.go:191] Add success.
I0321 00:58:13.409796  543705 cpu.go:282] Add success.
W0321 00:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:58:13.412446  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:58:13.412450  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:58:13.420162  543705 net.go:648] Add success.
I0321 00:58:13.421784  543705 net.go:770] primary dev: ETH0
I0321 00:58:13.421797  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:58:13.421810  543705 net.go:698] Add success.
I0321 00:58:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:58:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:58:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 00:58:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:58:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 00:58:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:58:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:58:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:58:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:58:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:58:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:58:23.409785  543705 memory.go:184] no items to output this cycle
I0321 00:58:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 00:58:26.961675  543705 disk_info.go:125] begin check local disk info of client
I0321 00:58:26.964218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:58:26.964224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4440 0xc0003e4480]
E0321 00:58:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:58:33.409801  543705 memory.go:184] no items to output this cycle
I0321 00:58:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 00:58:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:58:43.409789  543705 memory.go:191] Add success.
I0321 00:58:43.409794  543705 cpu.go:282] Add success.
I0321 00:58:43.419706  543705 net.go:648] Add success.
I0321 00:58:43.422440  543705 net.go:770] primary dev: ETH0
I0321 00:58:43.422453  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:58:43.422464  543705 net.go:698] Add success.
I0321 00:58:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:58:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:58:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:58:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:58:53.409802  543705 memory.go:184] no items to output this cycle
I0321 00:58:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 00:59:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:59:03.409822  543705 memory.go:184] no items to output this cycle
I0321 00:59:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 00:59:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:59:13.409794  543705 memory.go:191] Add success.
I0321 00:59:13.409798  543705 cpu.go:282] Add success.
W0321 00:59:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 00:59:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 00:59:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 00:59:13.420053  543705 net.go:648] Add success.
I0321 00:59:13.422846  543705 net.go:770] primary dev: ETH0
I0321 00:59:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:59:13.422870  543705 net.go:698] Add success.
I0321 00:59:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 00:59:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 00:59:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 00:59:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 00:59:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 00:59:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 00:59:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 00:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:59:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:59:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 00:59:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 00:59:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:59:23.409807  543705 memory.go:184] no items to output this cycle
I0321 00:59:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 00:59:26.965672  543705 disk_info.go:125] begin check local disk info of client
I0321 00:59:26.968186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 00:59:26.968192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec500 0xc0000ec540]
E0321 00:59:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:59:33.409772  543705 memory.go:184] no items to output this cycle
I0321 00:59:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 00:59:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:59:43.409789  543705 memory.go:191] Add success.
I0321 00:59:43.409790  543705 cpu.go:282] Add success.
I0321 00:59:43.419845  543705 net.go:648] Add success.
I0321 00:59:43.422619  543705 net.go:770] primary dev: ETH0
I0321 00:59:43.422631  543705 net.go:802] Send network stats successfully!,count is 6
I0321 00:59:43.422667  543705 net.go:698] Add success.
I0321 00:59:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 00:59:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 00:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 00:59:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 00:59:53.409765  543705 memory.go:184] no items to output this cycle
I0321 00:59:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 01:00:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:00:03.409808  543705 memory.go:184] no items to output this cycle
I0321 01:00:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 01:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:00:13.409791  543705 memory.go:191] Add success.
W0321 01:00:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 01:00:13.409822  543705 cpu.go:282] Add success.
W0321 01:00:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:00:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:00:13.420540  543705 net.go:648] Add success.
I0321 01:00:13.423162  543705 net.go:770] primary dev: ETH0
I0321 01:00:13.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:00:13.423199  543705 net.go:698] Add success.
I0321 01:00:13.472407  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12e76eea-e04b-4b39-8174-3c9ae88a3693","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:00:13.472440  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:00:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:00:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 01:00:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:00:14.456765  543705 disk_worker.go:494] system disk:vda1
I0321 01:00:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:00:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:00:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:00:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:00:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:00:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:00:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:00:23.409810  543705 memory.go:184] no items to output this cycle
I0321 01:00:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 01:00:26.969674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:00:26.972272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:00:26.972278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2640 0xc0003e2680]
E0321 01:00:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:00:33.409767  543705 memory.go:184] no items to output this cycle
I0321 01:00:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 01:00:38.677732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:00:38.677739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:00:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:00:43.410717  543705 memory.go:191] Add success.
I0321 01:00:43.409826  543705 cpu.go:282] Add success.
I0321 01:00:43.420447  543705 net.go:648] Add success.
I0321 01:00:43.423138  543705 net.go:770] primary dev: ETH0
I0321 01:00:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:00:43.423164  543705 net.go:698] Add success.
I0321 01:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:00:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:00:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:00:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:00:53.409889  543705 memory.go:184] no items to output this cycle
I0321 01:00:53.409892  543705 cpu.go:275] no items to output this cycle
E0321 01:01:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:01:03.409768  543705 memory.go:184] no items to output this cycle
I0321 01:01:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 01:01:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:01:13.409827  543705 memory.go:191] Add success.
I0321 01:01:13.409827  543705 cpu.go:282] Add success.
W0321 01:01:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:01:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:01:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:01:13.420394  543705 net.go:648] Add success.
I0321 01:01:13.423146  543705 net.go:770] primary dev: ETH0
I0321 01:01:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:01:13.423172  543705 net.go:698] Add success.
I0321 01:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:01:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:01:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 01:01:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:01:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 01:01:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:01:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:01:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:01:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:01:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:01:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:01:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:01:23.409804  543705 memory.go:184] no items to output this cycle
I0321 01:01:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 01:01:26.973676  543705 disk_info.go:125] begin check local disk info of client
I0321 01:01:26.976280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:01:26.976288  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fba00 0xc0001fba40]
E0321 01:01:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:01:33.409799  543705 memory.go:184] no items to output this cycle
I0321 01:01:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 01:01:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:01:43.409781  543705 memory.go:191] Add success.
I0321 01:01:43.409806  543705 cpu.go:282] Add success.
I0321 01:01:43.419912  543705 net.go:648] Add success.
I0321 01:01:43.422935  543705 net.go:770] primary dev: ETH0
I0321 01:01:43.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:01:43.422960  543705 net.go:698] Add success.
I0321 01:01:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:01:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:01:53.409790  543705 memory.go:184] no items to output this cycle
I0321 01:01:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 01:02:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:02:03.409779  543705 cpu.go:275] no items to output this cycle
I0321 01:02:03.409781  543705 memory.go:184] no items to output this cycle
E0321 01:02:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:02:13.409825  543705 memory.go:191] Add success.
I0321 01:02:13.409832  543705 cpu.go:282] Add success.
W0321 01:02:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:02:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:02:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:02:13.420255  543705 net.go:648] Add success.
I0321 01:02:13.423225  543705 net.go:770] primary dev: ETH0
I0321 01:02:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:02:13.423250  543705 net.go:698] Add success.
W0321 01:02:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:02:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 01:02:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:02:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 01:02:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:02:14.456952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:02:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:02:14.456967  543705 custom_config.go:64] query custom config with name: gpu
E0321 01:02:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:02:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:02:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:02:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:02:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:02:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:02:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:02:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:02:23.409781  543705 memory.go:184] no items to output this cycle
I0321 01:02:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 01:02:26.977675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:02:26.980201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:02:26.980207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000eca80 0xc0000ecac0]
E0321 01:02:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:02:33.409789  543705 memory.go:184] no items to output this cycle
I0321 01:02:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 01:02:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:02:43.409801  543705 memory.go:191] Add success.
I0321 01:02:43.409809  543705 cpu.go:282] Add success.
I0321 01:02:43.419890  543705 net.go:648] Add success.
I0321 01:02:43.422616  543705 net.go:770] primary dev: ETH0
I0321 01:02:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:02:43.422643  543705 net.go:698] Add success.
I0321 01:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:02:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:02:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:02:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:02:53.409794  543705 memory.go:184] no items to output this cycle
I0321 01:02:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 01:03:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:03:03.409782  543705 memory.go:184] no items to output this cycle
I0321 01:03:03.409785  543705 cpu.go:275] no items to output this cycle
I0321 01:03:13.409965  543705 cpu.go:282] Add success.
E0321 01:03:13.410058  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:03:13.410078  543705 memory.go:191] Add success.
W0321 01:03:13.410108  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:03:13.410120  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:03:13.410123  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:03:13.419710  543705 net.go:648] Add success.
I0321 01:03:13.422548  543705 net.go:770] primary dev: ETH0
I0321 01:03:13.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:03:13.422572  543705 net.go:698] Add success.
I0321 01:03:13.464103  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"470dd185-8a6f-4133-b1f0-716b5b9eb4c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:03:13.464134  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:03:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:03:14.455083  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:03:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0321 01:03:14.455148  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:03:14.456473  543705 disk_worker.go:494] system disk:vda1
I0321 01:03:14.456517  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:03:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:03:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:03:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:03:23.409786  543705 memory.go:184] no items to output this cycle
I0321 01:03:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 01:03:26.981673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:03:26.984144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:03:26.984150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec580 0xc0000ec5c0]
E0321 01:03:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:03:33.409797  543705 memory.go:184] no items to output this cycle
I0321 01:03:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 01:03:38.679090  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:03:38.679096  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:03:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:03:43.410620  543705 memory.go:191] Add success.
I0321 01:03:43.409810  543705 cpu.go:282] Add success.
I0321 01:03:43.420313  543705 net.go:648] Add success.
I0321 01:03:43.423253  543705 net.go:770] primary dev: ETH0
I0321 01:03:43.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:03:43.423279  543705 net.go:698] Add success.
I0321 01:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:03:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:03:53.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:03:53.410376  543705 memory.go:184] no items to output this cycle
I0321 01:03:53.410414  543705 cpu.go:275] no items to output this cycle
E0321 01:04:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:04:03.409775  543705 memory.go:184] no items to output this cycle
I0321 01:04:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 01:04:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:04:13.409796  543705 memory.go:191] Add success.
I0321 01:04:13.409823  543705 cpu.go:282] Add success.
W0321 01:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:04:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:04:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:04:13.420169  543705 net.go:648] Add success.
I0321 01:04:13.422719  543705 net.go:770] primary dev: ETH0
I0321 01:04:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:04:13.422748  543705 net.go:698] Add success.
I0321 01:04:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:04:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:04:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 01:04:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:04:14.456596  543705 disk_worker.go:494] system disk:vda1
I0321 01:04:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:04:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:04:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:04:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:04:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:04:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:04:23.409779  543705 memory.go:184] no items to output this cycle
I0321 01:04:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 01:04:26.985676  543705 disk_info.go:125] begin check local disk info of client
I0321 01:04:26.988249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:04:26.988255  543705 disk_info.go:196] parse disk info done, disk is : [0xc000261300 0xc000261340]
E0321 01:04:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:04:33.409776  543705 memory.go:184] no items to output this cycle
I0321 01:04:33.409777  543705 cpu.go:275] no items to output this cycle
E0321 01:04:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:04:43.409785  543705 memory.go:191] Add success.
I0321 01:04:43.409809  543705 cpu.go:282] Add success.
I0321 01:04:43.419957  543705 net.go:648] Add success.
I0321 01:04:43.423111  543705 net.go:770] primary dev: ETH0
I0321 01:04:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:04:43.423144  543705 net.go:698] Add success.
I0321 01:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:04:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:04:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:04:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:04:53.409759  543705 memory.go:184] no items to output this cycle
I0321 01:04:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 01:05:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:05:03.409779  543705 memory.go:184] no items to output this cycle
I0321 01:05:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 01:05:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:05:13.409818  543705 memory.go:191] Add success.
I0321 01:05:13.409825  543705 cpu.go:282] Add success.
W0321 01:05:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:05:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:05:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:05:13.420286  543705 net.go:648] Add success.
I0321 01:05:13.423093  543705 net.go:770] primary dev: ETH0
I0321 01:05:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:05:13.423118  543705 net.go:698] Add success.
I0321 01:05:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:05:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:05:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 01:05:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:05:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 01:05:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:05:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:05:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:05:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:05:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:05:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:05:23.409792  543705 memory.go:184] no items to output this cycle
I0321 01:05:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 01:05:26.989673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:05:26.992200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:05:26.992206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faa80 0xc0001faac0]
E0321 01:05:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:05:33.409803  543705 memory.go:184] no items to output this cycle
I0321 01:05:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 01:05:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:05:43.409790  543705 memory.go:191] Add success.
I0321 01:05:43.409791  543705 cpu.go:282] Add success.
I0321 01:05:43.419835  543705 net.go:648] Add success.
I0321 01:05:43.422254  543705 net.go:770] primary dev: ETH0
I0321 01:05:43.422269  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:05:43.422283  543705 net.go:698] Add success.
I0321 01:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:05:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:05:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:05:53.409776  543705 memory.go:184] no items to output this cycle
I0321 01:05:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 01:06:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:06:03.409800  543705 memory.go:184] no items to output this cycle
I0321 01:06:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 01:06:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:06:13.409823  543705 memory.go:191] Add success.
I0321 01:06:13.409828  543705 cpu.go:282] Add success.
W0321 01:06:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:06:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:06:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:06:13.420342  543705 net.go:648] Add success.
I0321 01:06:13.423471  543705 net.go:770] primary dev: ETH0
I0321 01:06:13.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:06:13.423495  543705 net.go:698] Add success.
I0321 01:06:13.477635  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b76ad627-0e18-4fb4-bdf0-4d291502543f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:06:13.477675  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:06:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:06:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:06:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 01:06:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:06:14.456520  543705 disk_worker.go:494] system disk:vda1
I0321 01:06:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:06:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:06:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:06:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:06:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:06:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:06:23.409780  543705 memory.go:184] no items to output this cycle
I0321 01:06:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 01:06:26.993675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:06:26.996223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:06:26.996229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357b00 0xc000357b40]
E0321 01:06:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:06:33.409775  543705 memory.go:184] no items to output this cycle
I0321 01:06:33.409775  543705 cpu.go:275] no items to output this cycle
I0321 01:06:38.679240  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:06:38.679246  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:06:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:06:43.410717  543705 memory.go:191] Add success.
I0321 01:06:43.409789  543705 cpu.go:282] Add success.
I0321 01:06:43.420500  543705 net.go:648] Add success.
I0321 01:06:43.423044  543705 net.go:770] primary dev: ETH0
I0321 01:06:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:06:43.423073  543705 net.go:698] Add success.
I0321 01:06:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:06:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:06:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:06:53.409793  543705 memory.go:184] no items to output this cycle
I0321 01:06:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 01:07:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:07:03.409777  543705 memory.go:184] no items to output this cycle
I0321 01:07:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 01:07:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:07:13.409821  543705 memory.go:191] Add success.
I0321 01:07:13.409822  543705 cpu.go:282] Add success.
W0321 01:07:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:07:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:07:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:07:13.420540  543705 net.go:648] Add success.
I0321 01:07:13.423306  543705 net.go:770] primary dev: ETH0
I0321 01:07:13.423318  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:07:13.423330  543705 net.go:698] Add success.
I0321 01:07:13.452774  543705 event_worker.go:152] Polling the log file for events...
W0321 01:07:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:07:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 01:07:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:07:14.456787  543705 disk_worker.go:494] system disk:vda1
I0321 01:07:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:07:14.457169  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:07:14.457177  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:07:14.457181  543705 custom_config.go:64] query custom config with name: gpu
E0321 01:07:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:07:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:07:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:07:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:07:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:07:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:07:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:07:23.409797  543705 memory.go:184] no items to output this cycle
I0321 01:07:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 01:07:26.997673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:07:27.000402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:07:27.000407  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056de00 0xc00056de40]
E0321 01:07:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:07:33.409784  543705 memory.go:184] no items to output this cycle
I0321 01:07:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:07:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:07:43.409784  543705 memory.go:191] Add success.
I0321 01:07:43.409804  543705 cpu.go:282] Add success.
I0321 01:07:43.419888  543705 net.go:648] Add success.
I0321 01:07:43.422540  543705 net.go:770] primary dev: ETH0
I0321 01:07:43.422555  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:07:43.422570  543705 net.go:698] Add success.
I0321 01:07:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:07:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:07:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:07:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:07:53.409790  543705 memory.go:184] no items to output this cycle
I0321 01:07:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 01:08:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:08:03.409810  543705 memory.go:184] no items to output this cycle
I0321 01:08:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 01:08:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:08:13.409831  543705 memory.go:191] Add success.
I0321 01:08:13.409832  543705 cpu.go:282] Add success.
W0321 01:08:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:08:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:08:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:08:13.420408  543705 net.go:648] Add success.
I0321 01:08:13.423648  543705 net.go:770] primary dev: ETH0
I0321 01:08:13.423660  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:08:13.423672  543705 net.go:698] Add success.
I0321 01:08:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:08:14.455082  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:08:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 01:08:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:08:14.456549  543705 disk_worker.go:494] system disk:vda1
I0321 01:08:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:08:15.455613  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:08:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:08:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:08:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:08:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:08:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:08:23.409818  543705 memory.go:184] no items to output this cycle
I0321 01:08:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 01:08:27.001680  543705 disk_info.go:125] begin check local disk info of client
I0321 01:08:27.004244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:08:27.004251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003041c0 0xc000304200]
E0321 01:08:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:08:33.409780  543705 memory.go:184] no items to output this cycle
I0321 01:08:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 01:08:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:08:43.409798  543705 memory.go:191] Add success.
I0321 01:08:43.409797  543705 cpu.go:282] Add success.
I0321 01:08:43.419970  543705 net.go:648] Add success.
I0321 01:08:43.422642  543705 net.go:770] primary dev: ETH0
I0321 01:08:43.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:08:43.422667  543705 net.go:698] Add success.
I0321 01:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:08:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:08:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:08:53.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:08:53.410410  543705 memory.go:184] no items to output this cycle
I0321 01:08:53.410410  543705 cpu.go:275] no items to output this cycle
E0321 01:09:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:09:03.409781  543705 memory.go:184] no items to output this cycle
I0321 01:09:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 01:09:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:09:13.409802  543705 memory.go:191] Add success.
I0321 01:09:13.409819  543705 cpu.go:282] Add success.
W0321 01:09:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:09:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:09:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:09:13.420452  543705 net.go:648] Add success.
I0321 01:09:13.423240  543705 net.go:770] primary dev: ETH0
I0321 01:09:13.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:09:13.423267  543705 net.go:698] Add success.
I0321 01:09:13.463301  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e0dfe152-c528-4ba8-a264-fcec8e9845e4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:09:13.463334  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:09:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:09:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:09:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 01:09:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:09:14.456683  543705 disk_worker.go:494] system disk:vda1
I0321 01:09:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:09:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:09:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:09:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:09:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:09:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:09:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:09:23.409808  543705 memory.go:184] no items to output this cycle
I0321 01:09:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 01:09:27.005671  543705 disk_info.go:125] begin check local disk info of client
I0321 01:09:27.008156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:09:27.008162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056cbc0 0xc00056cc00]
E0321 01:09:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:09:33.409790  543705 cpu.go:275] no items to output this cycle
I0321 01:09:33.409797  543705 memory.go:184] no items to output this cycle
I0321 01:09:38.680095  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:09:38.680102  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:09:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:09:43.410688  543705 memory.go:191] Add success.
I0321 01:09:43.409789  543705 cpu.go:282] Add success.
I0321 01:09:43.420384  543705 net.go:648] Add success.
I0321 01:09:43.423433  543705 net.go:770] primary dev: ETH0
I0321 01:09:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:09:43.423459  543705 net.go:698] Add success.
I0321 01:09:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:09:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:09:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:09:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:09:53.409771  543705 memory.go:184] no items to output this cycle
I0321 01:09:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 01:10:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:10:03.409772  543705 memory.go:184] no items to output this cycle
I0321 01:10:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:10:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:10:13.409804  543705 memory.go:191] Add success.
I0321 01:10:13.409805  543705 cpu.go:282] Add success.
W0321 01:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:10:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:10:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:10:13.419725  543705 net.go:648] Add success.
I0321 01:10:13.422482  543705 net.go:770] primary dev: ETH0
I0321 01:10:13.422501  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:10:13.422515  543705 net.go:698] Add success.
I0321 01:10:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:10:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:10:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 01:10:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:10:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 01:10:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:10:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:10:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:10:23.409806  543705 memory.go:184] no items to output this cycle
I0321 01:10:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 01:10:27.009673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:10:27.012245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:10:27.012251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb900 0xc0001fb940]
E0321 01:10:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:10:33.409757  543705 memory.go:184] no items to output this cycle
I0321 01:10:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 01:10:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:10:43.409824  543705 memory.go:191] Add success.
I0321 01:10:43.409832  543705 cpu.go:282] Add success.
I0321 01:10:43.420151  543705 net.go:648] Add success.
I0321 01:10:43.422785  543705 net.go:770] primary dev: ETH0
I0321 01:10:43.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:10:43.422814  543705 net.go:698] Add success.
I0321 01:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:10:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:10:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:10:53.409780  543705 memory.go:184] no items to output this cycle
I0321 01:10:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 01:11:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:11:03.409761  543705 memory.go:184] no items to output this cycle
I0321 01:11:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 01:11:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:11:13.409823  543705 memory.go:191] Add success.
I0321 01:11:13.409823  543705 cpu.go:282] Add success.
W0321 01:11:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:11:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:11:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:11:13.420578  543705 net.go:648] Add success.
I0321 01:11:13.423844  543705 net.go:770] primary dev: ETH0
I0321 01:11:13.423863  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:11:13.423878  543705 net.go:698] Add success.
I0321 01:11:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:11:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:11:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 01:11:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:11:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 01:11:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:11:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:11:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:11:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:11:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:11:23.409772  543705 memory.go:184] no items to output this cycle
I0321 01:11:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 01:11:27.013679  543705 disk_info.go:125] begin check local disk info of client
I0321 01:11:27.016193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:11:27.016200  543705 disk_info.go:196] parse disk info done, disk is : [0xc000567b00 0xc000567b40]
E0321 01:11:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:11:33.409795  543705 memory.go:184] no items to output this cycle
I0321 01:11:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 01:11:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:11:43.409778  543705 memory.go:191] Add success.
I0321 01:11:43.409799  543705 cpu.go:282] Add success.
I0321 01:11:43.419873  543705 net.go:648] Add success.
I0321 01:11:43.422737  543705 net.go:770] primary dev: ETH0
I0321 01:11:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:11:43.422763  543705 net.go:698] Add success.
I0321 01:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:11:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:11:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:11:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:11:53.409777  543705 memory.go:184] no items to output this cycle
I0321 01:11:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 01:12:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:12:03.409780  543705 memory.go:184] no items to output this cycle
I0321 01:12:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 01:12:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:12:13.409798  543705 memory.go:191] Add success.
I0321 01:12:13.409800  543705 cpu.go:282] Add success.
W0321 01:12:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:12:13.409980  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:12:13.409983  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:12:13.419734  543705 net.go:648] Add success.
I0321 01:12:13.422596  543705 net.go:770] primary dev: ETH0
I0321 01:12:13.422610  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:12:13.422622  543705 net.go:698] Add success.
I0321 01:12:13.468805  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e71f31ae-792a-4c9d-b8d1-0afdbb0bec1d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:12:13.468837  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 01:12:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:12:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 01:12:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:12:14.456839  543705 disk_worker.go:494] system disk:vda1
I0321 01:12:14.456877  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:12:14.457165  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:12:14.457173  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:12:14.457178  543705 custom_config.go:64] query custom config with name: gpu
E0321 01:12:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:12:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:12:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:12:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:12:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:12:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:12:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:12:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:12:23.409774  543705 memory.go:184] no items to output this cycle
I0321 01:12:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 01:12:27.017674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:12:27.020286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:12:27.020292  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5380 0xc0000c53c0]
E0321 01:12:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:12:33.409770  543705 memory.go:184] no items to output this cycle
I0321 01:12:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 01:12:38.680249  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:12:38.680255  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:12:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:12:43.409775  543705 memory.go:191] Add success.
I0321 01:12:43.409785  543705 cpu.go:282] Add success.
I0321 01:12:43.419848  543705 net.go:648] Add success.
I0321 01:12:43.420805  543705 net.go:770] primary dev: ETH0
I0321 01:12:43.420818  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:12:43.420831  543705 net.go:698] Add success.
I0321 01:12:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:12:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:12:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:12:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:12:53.409781  543705 memory.go:184] no items to output this cycle
I0321 01:12:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 01:13:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:13:03.409793  543705 memory.go:184] no items to output this cycle
I0321 01:13:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:13:13.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:13:13.409897  543705 memory.go:191] Add success.
W0321 01:13:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 01:13:13.409944  543705 cpu.go:282] Add success.
W0321 01:13:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:13:13.409964  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:13:13.419714  543705 net.go:648] Add success.
I0321 01:13:13.422671  543705 net.go:770] primary dev: ETH0
I0321 01:13:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:13:13.422696  543705 net.go:698] Add success.
I0321 01:13:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:13:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:13:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 01:13:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:13:14.456505  543705 disk_worker.go:494] system disk:vda1
I0321 01:13:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:13:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:13:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:13:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:13:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:13:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:13:23.409799  543705 memory.go:184] no items to output this cycle
I0321 01:13:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 01:13:27.021672  543705 disk_info.go:125] begin check local disk info of client
I0321 01:13:27.024128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:13:27.024134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa100 0xc0001fa140]
E0321 01:13:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:13:33.409776  543705 memory.go:184] no items to output this cycle
I0321 01:13:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 01:13:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:13:43.409790  543705 memory.go:191] Add success.
I0321 01:13:43.409791  543705 cpu.go:282] Add success.
I0321 01:13:43.419895  543705 net.go:648] Add success.
I0321 01:13:43.422651  543705 net.go:770] primary dev: ETH0
I0321 01:13:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:13:43.422677  543705 net.go:698] Add success.
I0321 01:13:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:13:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:13:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:13:53.409781  543705 memory.go:184] no items to output this cycle
I0321 01:13:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 01:14:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:14:03.409773  543705 memory.go:184] no items to output this cycle
I0321 01:14:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 01:14:13.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:14:13.409936  543705 memory.go:191] Add success.
I0321 01:14:13.409945  543705 cpu.go:282] Add success.
W0321 01:14:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:14:13.409981  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:14:13.409989  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:14:13.419716  543705 net.go:648] Add success.
I0321 01:14:13.422305  543705 net.go:770] primary dev: ETH0
I0321 01:14:13.422328  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:14:13.422340  543705 net.go:698] Add success.
I0321 01:14:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:14:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:14:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 01:14:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:14:14.456619  543705 disk_worker.go:494] system disk:vda1
I0321 01:14:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:14:15.454998  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:14:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:14:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:14:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:14:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:14:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:14:23.409784  543705 memory.go:184] no items to output this cycle
I0321 01:14:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 01:14:27.025675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:14:27.028226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:14:27.028234  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b080 0xc00007b0c0]
E0321 01:14:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:14:33.409773  543705 memory.go:184] no items to output this cycle
I0321 01:14:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 01:14:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:14:43.409812  543705 memory.go:191] Add success.
I0321 01:14:43.409819  543705 cpu.go:282] Add success.
I0321 01:14:43.419875  543705 net.go:648] Add success.
I0321 01:14:43.422729  543705 net.go:770] primary dev: ETH0
I0321 01:14:43.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:14:43.422754  543705 net.go:698] Add success.
I0321 01:14:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:14:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:14:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:14:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:14:53.409773  543705 memory.go:184] no items to output this cycle
I0321 01:14:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:15:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:15:03.409796  543705 memory.go:184] no items to output this cycle
I0321 01:15:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 01:15:13.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:15:13.409907  543705 memory.go:191] Add success.
I0321 01:15:13.409911  543705 cpu.go:282] Add success.
W0321 01:15:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:15:13.409976  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:15:13.409980  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:15:13.419739  543705 net.go:648] Add success.
I0321 01:15:13.422471  543705 net.go:770] primary dev: ETH0
I0321 01:15:13.422483  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:15:13.422496  543705 net.go:698] Add success.
I0321 01:15:13.463665  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"57377629-c537-47a3-acff-133f1d2efb0f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:15:13.463694  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:15:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:15:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 01:15:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:15:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 01:15:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:15:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:15:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:15:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:15:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:15:23.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:15:23.410257  543705 memory.go:184] no items to output this cycle
I0321 01:15:23.410273  543705 cpu.go:275] no items to output this cycle
I0321 01:15:27.029673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:15:27.032179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:15:27.032186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000546e80 0xc000546ec0]
E0321 01:15:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:15:33.409785  543705 memory.go:184] no items to output this cycle
I0321 01:15:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 01:15:38.680393  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:15:38.680400  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:15:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:15:43.410751  543705 memory.go:191] Add success.
I0321 01:15:43.409789  543705 cpu.go:282] Add success.
I0321 01:15:43.420431  543705 net.go:648] Add success.
I0321 01:15:43.423697  543705 net.go:770] primary dev: ETH0
I0321 01:15:43.423710  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:15:43.423730  543705 net.go:698] Add success.
I0321 01:15:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:15:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:15:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:15:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:15:53.409782  543705 memory.go:184] no items to output this cycle
I0321 01:15:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:16:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:16:03.409904  543705 memory.go:184] no items to output this cycle
I0321 01:16:03.409919  543705 cpu.go:275] no items to output this cycle
E0321 01:16:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:16:13.409787  543705 memory.go:191] Add success.
W0321 01:16:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 01:16:13.409815  543705 cpu.go:282] Add success.
W0321 01:16:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:16:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:16:13.420141  543705 net.go:648] Add success.
I0321 01:16:13.422774  543705 net.go:770] primary dev: ETH0
I0321 01:16:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:16:13.422799  543705 net.go:698] Add success.
I0321 01:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:16:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:16:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 01:16:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:16:14.456613  543705 disk_worker.go:494] system disk:vda1
I0321 01:16:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:16:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:16:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:16:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:16:23.409809  543705 memory.go:184] no items to output this cycle
I0321 01:16:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 01:16:27.033674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:16:27.036208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:16:27.036214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed280 0xc0000ed2c0]
E0321 01:16:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:16:33.409767  543705 memory.go:184] no items to output this cycle
I0321 01:16:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 01:16:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:16:43.409809  543705 memory.go:191] Add success.
I0321 01:16:43.409817  543705 cpu.go:282] Add success.
I0321 01:16:43.420054  543705 net.go:648] Add success.
I0321 01:16:43.422891  543705 net.go:770] primary dev: ETH0
I0321 01:16:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:16:43.422914  543705 net.go:698] Add success.
I0321 01:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:16:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:16:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:16:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:16:53.409797  543705 memory.go:184] no items to output this cycle
I0321 01:16:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 01:17:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:17:03.409776  543705 memory.go:184] no items to output this cycle
I0321 01:17:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 01:17:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:17:13.409804  543705 memory.go:191] Add success.
I0321 01:17:13.409805  543705 cpu.go:282] Add success.
W0321 01:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:17:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:17:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:17:13.420218  543705 net.go:648] Add success.
I0321 01:17:13.423037  543705 net.go:770] primary dev: ETH0
I0321 01:17:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:17:13.423063  543705 net.go:698] Add success.
I0321 01:17:13.453721  543705 event_worker.go:152] Polling the log file for events...
W0321 01:17:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:17:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 01:17:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0321 01:17:14.455937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:17:14.455946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:17:14.455951  543705 custom_config.go:64] query custom config with name: gpu
I0321 01:17:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 01:17:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:17:15.456862  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:17:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:17:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:17:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:17:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:17:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:17:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:17:23.410559  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:17:23.410579  543705 memory.go:184] no items to output this cycle
I0321 01:17:23.410589  543705 cpu.go:275] no items to output this cycle
I0321 01:17:27.037672  543705 disk_info.go:125] begin check local disk info of client
I0321 01:17:27.040248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:17:27.040254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fabc0 0xc0001fac00]
E0321 01:17:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:17:33.409792  543705 memory.go:184] no items to output this cycle
I0321 01:17:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 01:17:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:17:43.409795  543705 memory.go:191] Add success.
I0321 01:17:43.409810  543705 cpu.go:282] Add success.
I0321 01:17:43.420016  543705 net.go:648] Add success.
I0321 01:17:43.422742  543705 net.go:770] primary dev: ETH0
I0321 01:17:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:17:43.422770  543705 net.go:698] Add success.
I0321 01:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:17:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:17:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:17:53.409768  543705 memory.go:184] no items to output this cycle
I0321 01:17:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 01:18:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:18:03.409792  543705 memory.go:184] no items to output this cycle
I0321 01:18:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:18:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:18:13.409798  543705 cpu.go:282] Add success.
I0321 01:18:13.409799  543705 memory.go:191] Add success.
W0321 01:18:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:18:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:18:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:18:13.419718  543705 net.go:648] Add success.
I0321 01:18:13.423216  543705 net.go:770] primary dev: ETH0
I0321 01:18:13.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:18:13.423244  543705 net.go:698] Add success.
I0321 01:18:13.475093  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2da4d194-d2a4-46b0-b9b5-b2893001600a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:18:13.475123  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:18:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:18:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:18:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 01:18:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:18:14.456515  543705 disk_worker.go:494] system disk:vda1
I0321 01:18:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:18:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:18:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:18:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:18:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:18:23.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:18:23.410265  543705 memory.go:184] no items to output this cycle
I0321 01:18:23.410287  543705 cpu.go:275] no items to output this cycle
I0321 01:18:27.041683  543705 disk_info.go:125] begin check local disk info of client
I0321 01:18:27.044259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:18:27.044265  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003565c0 0xc000356600]
E0321 01:18:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:18:33.409787  543705 memory.go:184] no items to output this cycle
I0321 01:18:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 01:18:38.681095  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:18:38.681101  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:18:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:18:43.410595  543705 memory.go:191] Add success.
I0321 01:18:43.409807  543705 cpu.go:282] Add success.
I0321 01:18:43.420317  543705 net.go:648] Add success.
I0321 01:18:43.422782  543705 net.go:770] primary dev: ETH0
I0321 01:18:43.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:18:43.422810  543705 net.go:698] Add success.
I0321 01:18:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:18:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:18:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:18:53.409783  543705 memory.go:184] no items to output this cycle
I0321 01:18:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 01:19:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:19:03.409776  543705 memory.go:184] no items to output this cycle
I0321 01:19:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 01:19:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:19:13.409825  543705 memory.go:191] Add success.
I0321 01:19:13.409829  543705 cpu.go:282] Add success.
W0321 01:19:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:19:13.410039  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:19:13.410046  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:19:13.419744  543705 net.go:648] Add success.
I0321 01:19:13.422317  543705 net.go:770] primary dev: ETH0
I0321 01:19:13.422331  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:19:13.422342  543705 net.go:698] Add success.
I0321 01:19:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:19:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:19:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 01:19:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:19:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 01:19:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:19:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:19:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:19:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:19:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:19:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:19:23.409798  543705 memory.go:184] no items to output this cycle
I0321 01:19:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 01:19:27.045672  543705 disk_info.go:125] begin check local disk info of client
I0321 01:19:27.048168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:19:27.048174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056de40 0xc00056de80]
E0321 01:19:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:19:33.409778  543705 memory.go:184] no items to output this cycle
I0321 01:19:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 01:19:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:19:43.409789  543705 memory.go:191] Add success.
I0321 01:19:43.409804  543705 cpu.go:282] Add success.
I0321 01:19:43.419861  543705 net.go:648] Add success.
I0321 01:19:43.422483  543705 net.go:770] primary dev: ETH0
I0321 01:19:43.422497  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:19:43.422509  543705 net.go:698] Add success.
I0321 01:19:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:19:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:19:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:19:53.409804  543705 memory.go:184] no items to output this cycle
I0321 01:19:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 01:20:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:20:03.409803  543705 memory.go:184] no items to output this cycle
I0321 01:20:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 01:20:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:20:13.409791  543705 memory.go:191] Add success.
I0321 01:20:13.409806  543705 cpu.go:282] Add success.
W0321 01:20:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:20:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:20:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:20:13.420133  543705 net.go:648] Add success.
I0321 01:20:13.423312  543705 net.go:770] primary dev: ETH0
I0321 01:20:13.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:20:13.423404  543705 net.go:698] Add success.
I0321 01:20:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:20:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:20:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0321 01:20:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:20:14.456473  543705 disk_worker.go:494] system disk:vda1
I0321 01:20:14.456517  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:20:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:20:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:20:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:20:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:20:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:20:23.409788  543705 memory.go:184] no items to output this cycle
I0321 01:20:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 01:20:27.049679  543705 disk_info.go:125] begin check local disk info of client
I0321 01:20:27.052250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:20:27.052257  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475600 0xc000475640]
E0321 01:20:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:20:33.409799  543705 memory.go:184] no items to output this cycle
I0321 01:20:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 01:20:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:20:43.409784  543705 memory.go:191] Add success.
I0321 01:20:43.409810  543705 cpu.go:282] Add success.
I0321 01:20:43.419903  543705 net.go:648] Add success.
I0321 01:20:43.422822  543705 net.go:770] primary dev: ETH0
I0321 01:20:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:20:43.422851  543705 net.go:698] Add success.
I0321 01:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:20:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:20:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:20:53.410339  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:20:53.410364  543705 memory.go:184] no items to output this cycle
I0321 01:20:53.410379  543705 cpu.go:275] no items to output this cycle
E0321 01:21:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:21:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 01:21:03.409784  543705 memory.go:184] no items to output this cycle
E0321 01:21:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:21:13.409839  543705 memory.go:191] Add success.
I0321 01:21:13.409841  543705 cpu.go:282] Add success.
W0321 01:21:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:21:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:21:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:21:13.420215  543705 net.go:648] Add success.
I0321 01:21:13.422801  543705 net.go:770] primary dev: ETH0
I0321 01:21:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:21:13.422826  543705 net.go:698] Add success.
I0321 01:21:13.463575  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1264c456-60c3-4e8c-b67b-9fc7c50ba7d6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:21:13.463607  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:21:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:21:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:21:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 01:21:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:21:14.459223  543705 disk_worker.go:494] system disk:vda1
I0321 01:21:14.459253  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:21:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:21:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:21:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:21:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:21:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:21:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:21:23.409799  543705 memory.go:184] no items to output this cycle
I0321 01:21:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 01:21:27.053674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:21:27.056440  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:21:27.056447  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056d3c0 0xc00056d400]
E0321 01:21:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:21:33.409776  543705 cpu.go:275] no items to output this cycle
I0321 01:21:33.409781  543705 memory.go:184] no items to output this cycle
I0321 01:21:38.681746  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:21:38.681753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:21:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:21:43.410666  543705 memory.go:191] Add success.
I0321 01:21:43.409809  543705 cpu.go:282] Add success.
I0321 01:21:43.420385  543705 net.go:648] Add success.
I0321 01:21:43.423189  543705 net.go:770] primary dev: ETH0
I0321 01:21:43.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:21:43.423216  543705 net.go:698] Add success.
I0321 01:21:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:21:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:21:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:21:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:21:53.409778  543705 memory.go:184] no items to output this cycle
I0321 01:21:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 01:22:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:22:03.409776  543705 memory.go:184] no items to output this cycle
I0321 01:22:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 01:22:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:22:13.409823  543705 memory.go:191] Add success.
I0321 01:22:13.409827  543705 cpu.go:282] Add success.
W0321 01:22:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:22:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:22:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:22:13.420201  543705 net.go:648] Add success.
I0321 01:22:13.422917  543705 net.go:770] primary dev: ETH0
I0321 01:22:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:22:13.422947  543705 net.go:698] Add success.
W0321 01:22:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:22:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 01:22:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:22:14.456791  543705 disk_worker.go:494] system disk:vda1
I0321 01:22:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:22:14.457014  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:22:14.457023  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:22:14.457029  543705 custom_config.go:64] query custom config with name: gpu
E0321 01:22:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:22:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:22:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:22:16.457979  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:22:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:22:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:22:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:22:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:22:23.409784  543705 memory.go:184] no items to output this cycle
I0321 01:22:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 01:22:27.057680  543705 disk_info.go:125] begin check local disk info of client
I0321 01:22:27.060238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:22:27.060245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466580 0xc0004665c0]
E0321 01:22:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:22:33.409776  543705 memory.go:184] no items to output this cycle
I0321 01:22:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 01:22:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:22:43.409796  543705 memory.go:191] Add success.
I0321 01:22:43.409799  543705 cpu.go:282] Add success.
I0321 01:22:43.419999  543705 net.go:648] Add success.
I0321 01:22:43.422723  543705 net.go:770] primary dev: ETH0
I0321 01:22:43.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:22:43.422749  543705 net.go:698] Add success.
I0321 01:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:22:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:22:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:22:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:22:53.409791  543705 memory.go:184] no items to output this cycle
I0321 01:22:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 01:23:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:23:03.409777  543705 cpu.go:275] no items to output this cycle
I0321 01:23:03.409782  543705 memory.go:184] no items to output this cycle
E0321 01:23:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:23:13.409822  543705 memory.go:191] Add success.
I0321 01:23:13.409830  543705 cpu.go:282] Add success.
W0321 01:23:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:23:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:23:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:23:13.420182  543705 net.go:648] Add success.
I0321 01:23:13.422936  543705 net.go:770] primary dev: ETH0
I0321 01:23:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:23:13.422962  543705 net.go:698] Add success.
I0321 01:23:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:23:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:23:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 01:23:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:23:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 01:23:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:23:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:23:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:23:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:23:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:23:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:23:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:23:23.409771  543705 memory.go:184] no items to output this cycle
I0321 01:23:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 01:23:27.061674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:23:27.064167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:23:27.064173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1680 0xc0004a16c0]
E0321 01:23:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:23:33.409777  543705 cpu.go:275] no items to output this cycle
I0321 01:23:33.409785  543705 memory.go:184] no items to output this cycle
E0321 01:23:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:23:43.409782  543705 memory.go:191] Add success.
I0321 01:23:43.409804  543705 cpu.go:282] Add success.
I0321 01:23:43.419887  543705 net.go:648] Add success.
I0321 01:23:43.422478  543705 net.go:770] primary dev: ETH0
I0321 01:23:43.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:23:43.422504  543705 net.go:698] Add success.
I0321 01:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:23:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:23:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:23:53.409802  543705 memory.go:184] no items to output this cycle
I0321 01:23:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 01:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:24:03.409777  543705 memory.go:184] no items to output this cycle
I0321 01:24:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 01:24:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:24:13.409826  543705 memory.go:191] Add success.
I0321 01:24:13.409830  543705 cpu.go:282] Add success.
W0321 01:24:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:24:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:24:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:24:13.420165  543705 net.go:648] Add success.
I0321 01:24:13.422859  543705 net.go:770] primary dev: ETH0
I0321 01:24:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:24:13.422888  543705 net.go:698] Add success.
I0321 01:24:13.463872  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"609da7ed-b181-4781-af1a-cfa8e2581639","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:24:13.463910  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:24:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:24:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:24:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 01:24:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:24:14.456734  543705 disk_worker.go:494] system disk:vda1
I0321 01:24:14.456767  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:24:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:24:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:24:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:24:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:24:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:24:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:24:23.409784  543705 memory.go:184] no items to output this cycle
I0321 01:24:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 01:24:27.065677  543705 disk_info.go:125] begin check local disk info of client
I0321 01:24:27.068256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:24:27.068262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e3900 0xc0001e3940]
E0321 01:24:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:24:33.409811  543705 memory.go:184] no items to output this cycle
I0321 01:24:33.409819  543705 cpu.go:275] no items to output this cycle
I0321 01:24:38.683112  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:24:38.683119  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:24:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:24:43.410554  543705 memory.go:191] Add success.
I0321 01:24:43.409791  543705 cpu.go:282] Add success.
I0321 01:24:43.420356  543705 net.go:648] Add success.
I0321 01:24:43.423062  543705 net.go:770] primary dev: ETH0
I0321 01:24:43.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:24:43.423093  543705 net.go:698] Add success.
I0321 01:24:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:24:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:24:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:24:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:24:53.409772  543705 memory.go:184] no items to output this cycle
I0321 01:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 01:25:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:25:03.409766  543705 memory.go:184] no items to output this cycle
I0321 01:25:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 01:25:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:25:13.409825  543705 memory.go:191] Add success.
I0321 01:25:13.409831  543705 cpu.go:282] Add success.
W0321 01:25:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:25:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:25:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:25:13.420507  543705 net.go:648] Add success.
I0321 01:25:13.423234  543705 net.go:770] primary dev: ETH0
I0321 01:25:13.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:25:13.423264  543705 net.go:698] Add success.
I0321 01:25:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:25:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:25:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 01:25:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:25:14.456611  543705 disk_worker.go:494] system disk:vda1
I0321 01:25:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:25:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:25:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:25:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:25:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:25:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:25:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:25:23.409804  543705 memory.go:184] no items to output this cycle
I0321 01:25:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 01:25:27.069674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:25:27.072177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:25:27.072183  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492b00 0xc000492b40]
E0321 01:25:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:25:33.409761  543705 memory.go:184] no items to output this cycle
I0321 01:25:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 01:25:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:25:43.409813  543705 memory.go:191] Add success.
I0321 01:25:43.409825  543705 cpu.go:282] Add success.
I0321 01:25:43.420112  543705 net.go:648] Add success.
I0321 01:25:43.422762  543705 net.go:770] primary dev: ETH0
I0321 01:25:43.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:25:43.422791  543705 net.go:698] Add success.
I0321 01:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:25:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:25:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:25:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:25:53.409779  543705 cpu.go:275] no items to output this cycle
I0321 01:25:53.409787  543705 memory.go:184] no items to output this cycle
E0321 01:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:26:03.409776  543705 memory.go:184] no items to output this cycle
I0321 01:26:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 01:26:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:26:13.409806  543705 memory.go:191] Add success.
I0321 01:26:13.409808  543705 cpu.go:282] Add success.
W0321 01:26:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:26:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:26:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:26:13.420233  543705 net.go:648] Add success.
I0321 01:26:13.423082  543705 net.go:770] primary dev: ETH0
I0321 01:26:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:26:13.423110  543705 net.go:698] Add success.
I0321 01:26:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:26:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:26:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 01:26:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:26:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 01:26:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:26:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:26:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:26:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:26:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:26:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:26:23.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:26:23.410251  543705 memory.go:184] no items to output this cycle
I0321 01:26:23.410257  543705 cpu.go:275] no items to output this cycle
I0321 01:26:27.073666  543705 disk_info.go:125] begin check local disk info of client
I0321 01:26:27.076229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:26:27.076235  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a340 0xc00046a380]
E0321 01:26:33.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:26:33.409908  543705 memory.go:184] no items to output this cycle
I0321 01:26:33.410057  543705 cpu.go:275] no items to output this cycle
E0321 01:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:26:43.409781  543705 memory.go:191] Add success.
I0321 01:26:43.409808  543705 cpu.go:282] Add success.
I0321 01:26:43.419991  543705 net.go:648] Add success.
I0321 01:26:43.422960  543705 net.go:770] primary dev: ETH0
I0321 01:26:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:26:43.422986  543705 net.go:698] Add success.
I0321 01:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:26:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:26:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:26:53.409801  543705 memory.go:184] no items to output this cycle
I0321 01:26:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 01:27:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:27:03.409799  543705 memory.go:184] no items to output this cycle
I0321 01:27:03.409811  543705 cpu.go:275] no items to output this cycle
W0321 01:27:13.409701  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0321 01:27:13.409712  543705 conf_downlod.go:89] use old conf
E0321 01:27:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:27:13.409828  543705 memory.go:191] Add success.
I0321 01:27:13.409844  543705 cpu.go:282] Add success.
W0321 01:27:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:27:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:27:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:27:13.420187  543705 net.go:648] Add success.
I0321 01:27:13.423282  543705 net.go:770] primary dev: ETH0
I0321 01:27:13.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:27:13.423308  543705 net.go:698] Add success.
I0321 01:27:13.429405  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 01:27:13.453580  543705 event_worker.go:152] Polling the log file for events...
I0321 01:27:13.469762  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a3e742cc-0bfc-451f-a323-f068bc8c99bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:27:13.469797  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 01:27:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:27:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 01:27:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 01:27:14.455887  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:27:14.455897  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:27:14.455902  543705 custom_config.go:64] query custom config with name: gpu
I0321 01:27:14.456550  543705 disk_worker.go:494] system disk:vda1
I0321 01:27:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:27:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:27:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:27:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:27:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:27:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:27:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:27:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:27:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:27:23.409807  543705 memory.go:184] no items to output this cycle
I0321 01:27:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 01:27:27.077675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:27:27.080280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:27:27.080287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 01:27:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:27:33.409792  543705 memory.go:184] no items to output this cycle
I0321 01:27:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 01:27:38.684126  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:27:38.684134  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:27:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:27:43.410695  543705 memory.go:191] Add success.
I0321 01:27:43.409787  543705 cpu.go:282] Add success.
I0321 01:27:43.420394  543705 net.go:648] Add success.
I0321 01:27:43.423004  543705 net.go:770] primary dev: ETH0
I0321 01:27:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:27:43.423029  543705 net.go:698] Add success.
I0321 01:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:27:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:27:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:27:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:27:53.409772  543705 memory.go:184] no items to output this cycle
I0321 01:27:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 01:28:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:28:03.409795  543705 memory.go:184] no items to output this cycle
I0321 01:28:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:28:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:28:13.409804  543705 memory.go:191] Add success.
I0321 01:28:13.409806  543705 cpu.go:282] Add success.
W0321 01:28:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:28:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:28:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:28:13.420083  543705 net.go:648] Add success.
I0321 01:28:13.422576  543705 net.go:770] primary dev: ETH0
I0321 01:28:13.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:28:13.422606  543705 net.go:698] Add success.
I0321 01:28:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:28:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:28:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 01:28:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:28:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 01:28:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:28:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:28:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:28:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:28:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:28:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:28:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 01:28:23.409796  543705 memory.go:184] no items to output this cycle
I0321 01:28:27.081680  543705 disk_info.go:125] begin check local disk info of client
I0321 01:28:27.084230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:28:27.084236  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bdc80 0xc0004bdcc0]
E0321 01:28:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:28:33.409781  543705 memory.go:184] no items to output this cycle
I0321 01:28:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 01:28:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:28:43.409812  543705 memory.go:191] Add success.
I0321 01:28:43.409823  543705 cpu.go:282] Add success.
I0321 01:28:43.419996  543705 net.go:648] Add success.
I0321 01:28:43.422873  543705 net.go:770] primary dev: ETH0
I0321 01:28:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:28:43.422898  543705 net.go:698] Add success.
I0321 01:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:28:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:28:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:28:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:28:53.409785  543705 memory.go:184] no items to output this cycle
I0321 01:28:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 01:29:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:29:03.409777  543705 memory.go:184] no items to output this cycle
I0321 01:29:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 01:29:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:29:13.409793  543705 memory.go:191] Add success.
I0321 01:29:13.409811  543705 cpu.go:282] Add success.
W0321 01:29:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:29:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:29:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:29:13.420150  543705 net.go:648] Add success.
I0321 01:29:13.422966  543705 net.go:770] primary dev: ETH0
I0321 01:29:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:29:13.422991  543705 net.go:698] Add success.
I0321 01:29:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:29:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:29:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 01:29:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:29:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 01:29:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:29:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:29:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:29:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:29:23.410436  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:29:23.410458  543705 memory.go:184] no items to output this cycle
I0321 01:29:23.410466  543705 cpu.go:275] no items to output this cycle
I0321 01:29:27.085664  543705 disk_info.go:125] begin check local disk info of client
I0321 01:29:27.088132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:29:27.088138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9640 0xc0003e9680]
E0321 01:29:33.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:29:33.409891  543705 memory.go:184] no items to output this cycle
I0321 01:29:33.409968  543705 cpu.go:275] no items to output this cycle
E0321 01:29:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:29:43.409813  543705 memory.go:191] Add success.
I0321 01:29:43.409821  543705 cpu.go:282] Add success.
I0321 01:29:43.419876  543705 net.go:648] Add success.
I0321 01:29:43.422545  543705 net.go:770] primary dev: ETH0
I0321 01:29:43.422558  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:29:43.422570  543705 net.go:698] Add success.
I0321 01:29:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:29:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:29:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:29:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:29:53.409776  543705 memory.go:184] no items to output this cycle
I0321 01:29:53.409775  543705 cpu.go:275] no items to output this cycle
E0321 01:30:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:30:03.409776  543705 memory.go:184] no items to output this cycle
I0321 01:30:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 01:30:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:30:13.409831  543705 memory.go:191] Add success.
I0321 01:30:13.409833  543705 cpu.go:282] Add success.
W0321 01:30:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:30:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:30:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:30:13.420210  543705 net.go:648] Add success.
I0321 01:30:13.423081  543705 net.go:770] primary dev: ETH0
I0321 01:30:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:30:13.423110  543705 net.go:698] Add success.
I0321 01:30:13.470050  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8bacd184-cccb-4dc5-beb2-630dfd7b9b04","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:30:13.470083  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:30:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:30:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:30:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 01:30:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:30:14.456628  543705 disk_worker.go:494] system disk:vda1
I0321 01:30:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:30:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:30:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:30:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:30:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:30:23.410273  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:30:23.410294  543705 memory.go:184] no items to output this cycle
I0321 01:30:23.410314  543705 cpu.go:275] no items to output this cycle
I0321 01:30:27.089675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:30:27.092195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:30:27.092202  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e800 0xc00035e840]
E0321 01:30:33.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:30:33.409916  543705 memory.go:184] no items to output this cycle
I0321 01:30:33.409936  543705 cpu.go:275] no items to output this cycle
I0321 01:30:38.685123  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:30:38.685129  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:30:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:30:43.410680  543705 memory.go:191] Add success.
I0321 01:30:43.409793  543705 cpu.go:282] Add success.
I0321 01:30:43.419829  543705 net.go:648] Add success.
I0321 01:30:43.422346  543705 net.go:770] primary dev: ETH0
I0321 01:30:43.422360  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:30:43.422372  543705 net.go:698] Add success.
I0321 01:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:30:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:30:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:30:53.409794  543705 memory.go:184] no items to output this cycle
I0321 01:30:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 01:31:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:31:03.409783  543705 memory.go:184] no items to output this cycle
I0321 01:31:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 01:31:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:31:13.409807  543705 memory.go:191] Add success.
I0321 01:31:13.409809  543705 cpu.go:282] Add success.
W0321 01:31:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:31:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:31:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:31:13.420282  543705 net.go:648] Add success.
I0321 01:31:13.423162  543705 net.go:770] primary dev: ETH0
I0321 01:31:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:31:13.423188  543705 net.go:698] Add success.
I0321 01:31:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:31:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:31:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 01:31:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:31:14.456550  543705 disk_worker.go:494] system disk:vda1
I0321 01:31:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:31:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:31:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:31:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:31:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:31:23.409780  543705 memory.go:184] no items to output this cycle
I0321 01:31:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 01:31:27.093677  543705 disk_info.go:125] begin check local disk info of client
I0321 01:31:27.096154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:31:27.096160  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357d40 0xc000357d80]
E0321 01:31:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:31:33.409781  543705 memory.go:184] no items to output this cycle
I0321 01:31:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 01:31:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:31:43.409911  543705 memory.go:191] Add success.
I0321 01:31:43.409942  543705 cpu.go:282] Add success.
I0321 01:31:43.419708  543705 net.go:648] Add success.
I0321 01:31:43.422359  543705 net.go:770] primary dev: ETH0
I0321 01:31:43.422372  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:31:43.422384  543705 net.go:698] Add success.
I0321 01:31:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:31:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:31:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:31:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:31:53.409764  543705 memory.go:184] no items to output this cycle
I0321 01:31:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 01:32:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:32:03.409767  543705 memory.go:184] no items to output this cycle
I0321 01:32:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 01:32:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:32:13.409825  543705 memory.go:191] Add success.
I0321 01:32:13.409826  543705 cpu.go:282] Add success.
W0321 01:32:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:32:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:32:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:32:13.420321  543705 net.go:648] Add success.
I0321 01:32:13.423130  543705 net.go:770] primary dev: ETH0
I0321 01:32:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:32:13.423157  543705 net.go:698] Add success.
W0321 01:32:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:32:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 01:32:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:32:14.456791  543705 disk_worker.go:494] system disk:vda1
I0321 01:32:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:32:14.457144  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:32:14.457152  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:32:14.457157  543705 custom_config.go:64] query custom config with name: gpu
E0321 01:32:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:32:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:32:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:32:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:32:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:32:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:32:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:32:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:32:23.409808  543705 memory.go:184] no items to output this cycle
I0321 01:32:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 01:32:27.097684  543705 disk_info.go:125] begin check local disk info of client
I0321 01:32:27.100304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:32:27.100310  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0640 0xc0003b0680]
E0321 01:32:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:32:33.409783  543705 memory.go:184] no items to output this cycle
I0321 01:32:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 01:32:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:32:43.409777  543705 memory.go:191] Add success.
I0321 01:32:43.409806  543705 cpu.go:282] Add success.
I0321 01:32:43.420175  543705 net.go:648] Add success.
I0321 01:32:43.422796  543705 net.go:770] primary dev: ETH0
I0321 01:32:43.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:32:43.422821  543705 net.go:698] Add success.
I0321 01:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:32:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:32:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:32:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:32:53.409772  543705 memory.go:184] no items to output this cycle
I0321 01:32:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 01:33:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:33:03.409779  543705 cpu.go:275] no items to output this cycle
I0321 01:33:03.409780  543705 memory.go:184] no items to output this cycle
E0321 01:33:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:33:13.409826  543705 memory.go:191] Add success.
I0321 01:33:13.409831  543705 cpu.go:282] Add success.
W0321 01:33:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:33:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:33:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:33:13.420215  543705 net.go:648] Add success.
I0321 01:33:13.423274  543705 net.go:770] primary dev: ETH0
I0321 01:33:13.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:33:13.423303  543705 net.go:698] Add success.
I0321 01:33:13.463510  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e110d34-8736-4482-aece-ed37b2245470","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:33:13.463547  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:33:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:33:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:33:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 01:33:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:33:14.456723  543705 disk_worker.go:494] system disk:vda1
I0321 01:33:14.456753  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:33:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:33:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:33:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:33:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:33:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 01:33:23.409795  543705 memory.go:184] no items to output this cycle
I0321 01:33:27.101675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:33:27.104203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:33:27.104213  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024a740 0xc00024a780]
E0321 01:33:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:33:33.409801  543705 memory.go:184] no items to output this cycle
I0321 01:33:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 01:33:38.685731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:33:38.685738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:33:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:33:43.410686  543705 memory.go:191] Add success.
I0321 01:33:43.409792  543705 cpu.go:282] Add success.
I0321 01:33:43.420663  543705 net.go:648] Add success.
I0321 01:33:43.423453  543705 net.go:770] primary dev: ETH0
I0321 01:33:43.423466  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:33:43.423478  543705 net.go:698] Add success.
I0321 01:33:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:33:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:33:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:33:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:33:53.409785  543705 cpu.go:275] no items to output this cycle
I0321 01:33:53.409798  543705 memory.go:184] no items to output this cycle
E0321 01:34:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:34:03.409782  543705 memory.go:184] no items to output this cycle
I0321 01:34:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 01:34:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:34:13.409792  543705 memory.go:191] Add success.
I0321 01:34:13.409818  543705 cpu.go:282] Add success.
W0321 01:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:34:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:34:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:34:13.420114  543705 net.go:648] Add success.
I0321 01:34:13.422672  543705 net.go:770] primary dev: ETH0
I0321 01:34:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:34:13.422695  543705 net.go:698] Add success.
I0321 01:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:34:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:34:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 01:34:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:34:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 01:34:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:34:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:34:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:34:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:34:23.409782  543705 memory.go:184] no items to output this cycle
I0321 01:34:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 01:34:27.105673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:34:27.108212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:34:27.108218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aec00 0xc0003aec40]
E0321 01:34:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:34:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 01:34:33.409788  543705 memory.go:184] no items to output this cycle
E0321 01:34:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:34:43.409809  543705 memory.go:191] Add success.
I0321 01:34:43.409817  543705 cpu.go:282] Add success.
I0321 01:34:43.419951  543705 net.go:648] Add success.
I0321 01:34:43.423003  543705 net.go:770] primary dev: ETH0
I0321 01:34:43.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:34:43.423036  543705 net.go:698] Add success.
I0321 01:34:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:34:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:34:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:34:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:34:53.409780  543705 memory.go:184] no items to output this cycle
I0321 01:34:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 01:35:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:35:03.409792  543705 memory.go:184] no items to output this cycle
I0321 01:35:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:35:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:35:13.409793  543705 memory.go:191] Add success.
W0321 01:35:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 01:35:13.409819  543705 cpu.go:282] Add success.
W0321 01:35:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:35:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:35:13.420156  543705 net.go:648] Add success.
I0321 01:35:13.423135  543705 net.go:770] primary dev: ETH0
I0321 01:35:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:35:13.423161  543705 net.go:698] Add success.
I0321 01:35:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:35:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:35:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 01:35:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:35:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 01:35:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:35:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:35:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:35:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:35:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:35:23.409789  543705 memory.go:184] no items to output this cycle
I0321 01:35:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 01:35:27.109682  543705 disk_info.go:125] begin check local disk info of client
I0321 01:35:27.112141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:35:27.112148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad580 0xc0003ad5c0]
E0321 01:35:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:35:33.409802  543705 memory.go:184] no items to output this cycle
I0321 01:35:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 01:35:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:35:43.409783  543705 memory.go:191] Add success.
I0321 01:35:43.409810  543705 cpu.go:282] Add success.
I0321 01:35:43.419881  543705 net.go:648] Add success.
I0321 01:35:43.422480  543705 net.go:770] primary dev: ETH0
I0321 01:35:43.422496  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:35:43.422511  543705 net.go:698] Add success.
I0321 01:35:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:35:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:35:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:35:53.409919  543705 cpu.go:275] no items to output this cycle
E0321 01:35:53.409938  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:35:53.409953  543705 memory.go:184] no items to output this cycle
E0321 01:36:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:36:03.409803  543705 memory.go:184] no items to output this cycle
I0321 01:36:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 01:36:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:36:13.409840  543705 memory.go:191] Add success.
I0321 01:36:13.409854  543705 cpu.go:282] Add success.
W0321 01:36:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:36:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:36:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:36:13.420187  543705 net.go:648] Add success.
I0321 01:36:13.422722  543705 net.go:770] primary dev: ETH0
I0321 01:36:13.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:36:13.422752  543705 net.go:698] Add success.
I0321 01:36:13.469808  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"413d75d0-050a-4273-aad4-147515791c2b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:36:13.469842  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:36:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:36:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:36:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 01:36:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:36:14.456645  543705 disk_worker.go:494] system disk:vda1
I0321 01:36:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:36:15.455607  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:36:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:36:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:36:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:36:23.409789  543705 memory.go:184] no items to output this cycle
I0321 01:36:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 01:36:27.113677  543705 disk_info.go:125] begin check local disk info of client
I0321 01:36:27.116184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:36:27.116190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003af880 0xc0003af8c0]
E0321 01:36:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:36:33.409806  543705 memory.go:184] no items to output this cycle
I0321 01:36:33.409820  543705 cpu.go:275] no items to output this cycle
I0321 01:36:38.687139  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:36:38.687146  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:36:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:36:43.410685  543705 memory.go:191] Add success.
I0321 01:36:43.409813  543705 cpu.go:282] Add success.
I0321 01:36:43.420440  543705 net.go:648] Add success.
I0321 01:36:43.422990  543705 net.go:770] primary dev: ETH0
I0321 01:36:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:36:43.423017  543705 net.go:698] Add success.
I0321 01:36:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:36:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:36:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:36:53.409786  543705 memory.go:184] no items to output this cycle
I0321 01:36:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 01:37:03.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:37:03.409908  543705 memory.go:184] no items to output this cycle
I0321 01:37:03.409933  543705 cpu.go:275] no items to output this cycle
E0321 01:37:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:37:13.409842  543705 memory.go:191] Add success.
I0321 01:37:13.409853  543705 cpu.go:282] Add success.
W0321 01:37:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:37:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:37:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:37:13.420370  543705 net.go:648] Add success.
I0321 01:37:13.423024  543705 net.go:770] primary dev: ETH0
I0321 01:37:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:37:13.423050  543705 net.go:698] Add success.
I0321 01:37:13.453572  543705 event_worker.go:152] Polling the log file for events...
W0321 01:37:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:37:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 01:37:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0321 01:37:14.456982  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:37:14.456992  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:37:14.456999  543705 custom_config.go:64] query custom config with name: gpu
I0321 01:37:14.457031  543705 disk_worker.go:494] system disk:vda1
I0321 01:37:14.457060  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:37:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:37:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:37:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:37:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:37:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:37:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:37:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:37:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:37:23.409816  543705 memory.go:184] no items to output this cycle
I0321 01:37:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 01:37:27.117674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:37:27.120392  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:37:27.120398  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ac440 0xc0003ac480]
E0321 01:37:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:37:33.409781  543705 memory.go:184] no items to output this cycle
I0321 01:37:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:37:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:37:43.409817  543705 memory.go:191] Add success.
I0321 01:37:43.409827  543705 cpu.go:282] Add success.
I0321 01:37:43.419898  543705 net.go:648] Add success.
I0321 01:37:43.422586  543705 net.go:770] primary dev: ETH0
I0321 01:37:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:37:43.422613  543705 net.go:698] Add success.
I0321 01:37:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:37:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:37:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:37:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:37:53.409783  543705 memory.go:184] no items to output this cycle
I0321 01:37:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 01:38:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:38:03.409760  543705 memory.go:184] no items to output this cycle
I0321 01:38:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 01:38:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:38:13.409863  543705 memory.go:191] Add success.
I0321 01:38:13.409869  543705 cpu.go:282] Add success.
W0321 01:38:13.409898  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:38:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:38:13.409918  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:38:13.419724  543705 net.go:648] Add success.
I0321 01:38:13.422919  543705 net.go:770] primary dev: ETH0
I0321 01:38:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:38:13.422943  543705 net.go:698] Add success.
I0321 01:38:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:38:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:38:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 01:38:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:38:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 01:38:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:38:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:38:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:38:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:38:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0321 01:38:23.409803  543705 cpu.go:275] no items to output this cycle
E0321 01:38:23.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:38:23.409825  543705 memory.go:184] no items to output this cycle
I0321 01:38:27.121678  543705 disk_info.go:125] begin check local disk info of client
I0321 01:38:27.124190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:38:27.124197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb500 0xc0001fb540]
E0321 01:38:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:38:33.409774  543705 memory.go:184] no items to output this cycle
I0321 01:38:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:38:43.409783  543705 memory.go:191] Add success.
I0321 01:38:43.409796  543705 cpu.go:282] Add success.
I0321 01:38:43.420026  543705 net.go:648] Add success.
I0321 01:38:43.422786  543705 net.go:770] primary dev: ETH0
I0321 01:38:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:38:43.422811  543705 net.go:698] Add success.
I0321 01:38:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:38:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:38:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:38:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:38:53.409772  543705 memory.go:184] no items to output this cycle
I0321 01:38:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:39:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:39:03.409772  543705 memory.go:184] no items to output this cycle
I0321 01:39:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:39:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:39:13.409831  543705 memory.go:191] Add success.
W0321 01:39:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:39:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:39:13.409876  543705 cpu.go:282] Add success.
I0321 01:39:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:39:13.420512  543705 net.go:648] Add success.
I0321 01:39:13.423396  543705 net.go:770] primary dev: ETH0
I0321 01:39:13.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:39:13.423427  543705 net.go:698] Add success.
I0321 01:39:13.470002  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"576ef737-9e83-4d63-9091-ad9678915e90","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:39:13.470045  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:39:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:39:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:39:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0321 01:39:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:39:14.456835  543705 disk_worker.go:494] system disk:vda1
I0321 01:39:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:39:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:39:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:39:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:39:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 01:39:23.409792  543705 memory.go:184] no items to output this cycle
I0321 01:39:27.125672  543705 disk_info.go:125] begin check local disk info of client
I0321 01:39:27.128468  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:39:27.128474  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315640 0xc000315680]
E0321 01:39:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:39:33.409778  543705 memory.go:184] no items to output this cycle
I0321 01:39:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 01:39:38.688136  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:39:38.688142  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:39:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:39:43.410661  543705 memory.go:191] Add success.
I0321 01:39:43.409806  543705 cpu.go:282] Add success.
I0321 01:39:43.420337  543705 net.go:648] Add success.
I0321 01:39:43.422917  543705 net.go:770] primary dev: ETH0
I0321 01:39:43.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:39:43.422955  543705 net.go:698] Add success.
I0321 01:39:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:39:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:39:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:39:53.409769  543705 memory.go:184] no items to output this cycle
I0321 01:39:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 01:40:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:40:03.409772  543705 memory.go:184] no items to output this cycle
I0321 01:40:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 01:40:13.409921  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:40:13.410059  543705 memory.go:191] Add success.
W0321 01:40:13.410098  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 01:40:13.410113  543705 cpu.go:282] Add success.
W0321 01:40:13.410118  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:40:13.410128  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:40:13.419702  543705 net.go:648] Add success.
I0321 01:40:13.422416  543705 net.go:770] primary dev: ETH0
I0321 01:40:13.422435  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:40:13.422454  543705 net.go:698] Add success.
I0321 01:40:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:40:14.455234  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:40:14.455247  543705 disk_worker.go:708] disk space is not compliant
W0321 01:40:14.455250  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:40:14.456636  543705 disk_worker.go:494] system disk:vda1
I0321 01:40:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:40:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:40:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:40:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:40:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:40:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 01:40:23.409786  543705 memory.go:184] no items to output this cycle
I0321 01:40:27.129676  543705 disk_info.go:125] begin check local disk info of client
I0321 01:40:27.132199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:40:27.132207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
E0321 01:40:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:40:33.409791  543705 memory.go:184] no items to output this cycle
I0321 01:40:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 01:40:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:40:43.409779  543705 memory.go:191] Add success.
I0321 01:40:43.409797  543705 cpu.go:282] Add success.
I0321 01:40:43.419886  543705 net.go:648] Add success.
I0321 01:40:43.422485  543705 net.go:770] primary dev: ETH0
I0321 01:40:43.422501  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:40:43.422515  543705 net.go:698] Add success.
I0321 01:40:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:40:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:40:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:40:53.409774  543705 cpu.go:275] no items to output this cycle
I0321 01:40:53.409776  543705 memory.go:184] no items to output this cycle
E0321 01:41:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:41:03.409779  543705 memory.go:184] no items to output this cycle
I0321 01:41:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:41:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:41:13.409835  543705 memory.go:191] Add success.
I0321 01:41:13.409841  543705 cpu.go:282] Add success.
W0321 01:41:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:41:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:41:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:41:13.420385  543705 net.go:648] Add success.
I0321 01:41:13.423328  543705 net.go:770] primary dev: ETH0
I0321 01:41:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:41:13.423357  543705 net.go:698] Add success.
I0321 01:41:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:41:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:41:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 01:41:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:41:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 01:41:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:41:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:41:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:41:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:41:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:41:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:41:23.409776  543705 memory.go:184] no items to output this cycle
I0321 01:41:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 01:41:27.133676  543705 disk_info.go:125] begin check local disk info of client
I0321 01:41:27.136186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:41:27.136192  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315200 0xc000315240]
E0321 01:41:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:41:33.409793  543705 memory.go:184] no items to output this cycle
I0321 01:41:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 01:41:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:41:43.409778  543705 memory.go:191] Add success.
I0321 01:41:43.409801  543705 cpu.go:282] Add success.
I0321 01:41:43.419856  543705 net.go:648] Add success.
I0321 01:41:43.422766  543705 net.go:770] primary dev: ETH0
I0321 01:41:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:41:43.422791  543705 net.go:698] Add success.
I0321 01:41:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:41:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:41:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:41:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:41:53.409797  543705 memory.go:184] no items to output this cycle
I0321 01:41:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 01:42:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:42:03.409766  543705 memory.go:184] no items to output this cycle
I0321 01:42:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 01:42:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:42:13.409819  543705 memory.go:191] Add success.
I0321 01:42:13.409837  543705 cpu.go:282] Add success.
W0321 01:42:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:42:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:42:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:42:13.420338  543705 net.go:648] Add success.
I0321 01:42:13.423023  543705 net.go:770] primary dev: ETH0
I0321 01:42:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:42:13.423050  543705 net.go:698] Add success.
I0321 01:42:13.469536  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"30b18671-4a68-4e47-9f6f-4d80307b43cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:42:13.469582  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 01:42:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:42:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0321 01:42:14.455225  543705 disk_worker.go:728] disk inode is not compliant
E0321 01:42:14.456081  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:42:14.456091  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:42:14.456098  543705 custom_config.go:64] query custom config with name: gpu
I0321 01:42:14.456644  543705 disk_worker.go:494] system disk:vda1
I0321 01:42:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:42:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:42:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 01:42:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:42:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:42:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:42:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:42:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:42:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:42:23.409777  543705 memory.go:184] no items to output this cycle
I0321 01:42:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 01:42:27.137673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:42:27.140199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:42:27.140206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2140 0xc0002b2180]
E0321 01:42:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:42:33.409764  543705 memory.go:184] no items to output this cycle
I0321 01:42:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 01:42:38.689143  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:42:38.689150  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:42:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:42:43.410608  543705 memory.go:191] Add success.
I0321 01:42:43.409786  543705 cpu.go:282] Add success.
I0321 01:42:43.420313  543705 net.go:648] Add success.
I0321 01:42:43.423305  543705 net.go:770] primary dev: ETH0
I0321 01:42:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:42:43.423334  543705 net.go:698] Add success.
I0321 01:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:42:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:42:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:42:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:42:53.409800  543705 memory.go:184] no items to output this cycle
I0321 01:42:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 01:43:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:43:03.409777  543705 memory.go:184] no items to output this cycle
I0321 01:43:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 01:43:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:43:13.409777  543705 memory.go:191] Add success.
W0321 01:43:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:43:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:43:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:43:13.409838  543705 cpu.go:282] Add success.
I0321 01:43:13.420565  543705 net.go:648] Add success.
I0321 01:43:13.421533  543705 net.go:770] primary dev: ETH0
I0321 01:43:13.421551  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:43:13.421569  543705 net.go:698] Add success.
I0321 01:43:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:43:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:43:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 01:43:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:43:14.456658  543705 disk_worker.go:494] system disk:vda1
I0321 01:43:14.456692  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:43:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:43:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:43:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:43:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:43:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:43:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:43:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 01:43:23.409794  543705 memory.go:184] no items to output this cycle
I0321 01:43:27.141675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:43:27.144147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:43:27.144154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056d580 0xc00056d5c0]
E0321 01:43:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:43:33.409795  543705 memory.go:184] no items to output this cycle
I0321 01:43:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 01:43:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:43:43.409789  543705 cpu.go:282] Add success.
I0321 01:43:43.409802  543705 memory.go:191] Add success.
I0321 01:43:43.420094  543705 net.go:648] Add success.
I0321 01:43:43.422786  543705 net.go:770] primary dev: ETH0
I0321 01:43:43.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:43:43.422811  543705 net.go:698] Add success.
I0321 01:43:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:43:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:43:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:43:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:43:53.409780  543705 memory.go:184] no items to output this cycle
I0321 01:43:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 01:44:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:44:03.409770  543705 memory.go:184] no items to output this cycle
I0321 01:44:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 01:44:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:44:13.409788  543705 memory.go:191] Add success.
W0321 01:44:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:44:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:44:13.409843  543705 cpu.go:282] Add success.
I0321 01:44:13.420333  543705 net.go:648] Add success.
I0321 01:44:13.421259  543705 net.go:770] primary dev: ETH0
I0321 01:44:13.421278  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:44:13.421298  543705 net.go:698] Add success.
I0321 01:44:14.454993  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:44:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:44:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 01:44:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:44:14.456550  543705 disk_worker.go:494] system disk:vda1
I0321 01:44:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:44:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:44:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:44:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:44:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:44:23.409774  543705 memory.go:184] no items to output this cycle
I0321 01:44:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 01:44:27.145678  543705 disk_info.go:125] begin check local disk info of client
I0321 01:44:27.148224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:44:27.148231  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b600 0xc00007b640]
E0321 01:44:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:44:33.409783  543705 memory.go:184] no items to output this cycle
I0321 01:44:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 01:44:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:44:43.409784  543705 memory.go:191] Add success.
I0321 01:44:43.409787  543705 cpu.go:282] Add success.
I0321 01:44:43.420100  543705 net.go:648] Add success.
I0321 01:44:43.423278  543705 net.go:770] primary dev: ETH0
I0321 01:44:43.423291  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:44:43.423303  543705 net.go:698] Add success.
I0321 01:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:44:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:44:53.409773  543705 memory.go:184] no items to output this cycle
I0321 01:44:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:45:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:45:03.409794  543705 memory.go:184] no items to output this cycle
I0321 01:45:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 01:45:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:45:13.409789  543705 memory.go:191] Add success.
I0321 01:45:13.409789  543705 cpu.go:282] Add success.
W0321 01:45:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:45:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:45:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:45:13.420070  543705 net.go:648] Add success.
I0321 01:45:13.422905  543705 net.go:770] primary dev: ETH0
I0321 01:45:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:45:13.422936  543705 net.go:698] Add success.
I0321 01:45:13.463944  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f8ebef6d-ba6b-47e9-a7b0-b84d4c013369","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:45:13.463977  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:45:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:45:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:45:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0321 01:45:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:45:14.456636  543705 disk_worker.go:494] system disk:vda1
I0321 01:45:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:45:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:45:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:45:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:45:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:45:23.409800  543705 memory.go:184] no items to output this cycle
I0321 01:45:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 01:45:27.149674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:45:27.152204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:45:27.152211  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ba40 0xc00039ba80]
E0321 01:45:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:45:33.409802  543705 memory.go:184] no items to output this cycle
I0321 01:45:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 01:45:38.689733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:45:38.689740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:45:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:45:43.410619  543705 memory.go:191] Add success.
I0321 01:45:43.409792  543705 cpu.go:282] Add success.
I0321 01:45:43.420308  543705 net.go:648] Add success.
I0321 01:45:43.422790  543705 net.go:770] primary dev: ETH0
I0321 01:45:43.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:45:43.422815  543705 net.go:698] Add success.
I0321 01:45:46.457767  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:45:46.457826  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:45:46.457850  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:45:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:45:53.409776  543705 memory.go:184] no items to output this cycle
I0321 01:45:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 01:46:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:46:03.409798  543705 memory.go:184] no items to output this cycle
I0321 01:46:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 01:46:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:46:13.409811  543705 memory.go:191] Add success.
I0321 01:46:13.409821  543705 cpu.go:282] Add success.
W0321 01:46:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:46:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:46:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:46:13.420190  543705 net.go:648] Add success.
I0321 01:46:13.422862  543705 net.go:770] primary dev: ETH0
I0321 01:46:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:46:13.422886  543705 net.go:698] Add success.
I0321 01:46:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:46:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:46:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0321 01:46:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:46:14.456654  543705 disk_worker.go:494] system disk:vda1
I0321 01:46:14.456690  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:46:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:46:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:46:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:46:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:46:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:46:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:46:23.409788  543705 memory.go:184] no items to output this cycle
I0321 01:46:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 01:46:27.153673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:46:27.156203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:46:27.156211  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056c200 0xc00056c240]
E0321 01:46:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:46:33.409760  543705 memory.go:184] no items to output this cycle
I0321 01:46:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 01:46:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:46:43.409808  543705 memory.go:191] Add success.
I0321 01:46:43.409813  543705 cpu.go:282] Add success.
I0321 01:46:43.419956  543705 net.go:648] Add success.
I0321 01:46:43.422675  543705 net.go:770] primary dev: ETH0
I0321 01:46:43.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:46:43.422861  543705 net.go:698] Add success.
I0321 01:46:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:46:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:46:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:46:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:46:53.409771  543705 memory.go:184] no items to output this cycle
I0321 01:46:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:47:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:47:03.409768  543705 memory.go:184] no items to output this cycle
I0321 01:47:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 01:47:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:47:13.409781  543705 memory.go:191] Add success.
I0321 01:47:13.409789  543705 cpu.go:282] Add success.
W0321 01:47:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:47:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:47:13.420504  543705 net.go:648] Add success.
I0321 01:47:13.421525  543705 net.go:770] primary dev: ETH0
I0321 01:47:13.421539  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:47:13.421553  543705 net.go:698] Add success.
I0321 01:47:13.453112  543705 event_worker.go:152] Polling the log file for events...
W0321 01:47:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:47:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 01:47:14.455227  543705 disk_worker.go:728] disk inode is not compliant
E0321 01:47:14.456066  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:47:14.456075  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:47:14.456082  543705 custom_config.go:64] query custom config with name: gpu
I0321 01:47:14.456633  543705 disk_worker.go:494] system disk:vda1
I0321 01:47:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:47:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:47:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:47:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:47:16.457990  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:47:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:47:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:47:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:47:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:47:23.409811  543705 memory.go:184] no items to output this cycle
I0321 01:47:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 01:47:27.157683  543705 disk_info.go:125] begin check local disk info of client
I0321 01:47:27.160192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:47:27.160198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000323800 0xc000323840]
E0321 01:47:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:47:33.409798  543705 memory.go:184] no items to output this cycle
I0321 01:47:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 01:47:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:47:43.409788  543705 memory.go:191] Add success.
I0321 01:47:43.409805  543705 cpu.go:282] Add success.
I0321 01:47:43.419857  543705 net.go:648] Add success.
I0321 01:47:43.423220  543705 net.go:770] primary dev: ETH0
I0321 01:47:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:47:43.423246  543705 net.go:698] Add success.
I0321 01:47:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:47:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:47:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:47:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:47:53.409781  543705 memory.go:184] no items to output this cycle
I0321 01:47:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 01:48:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:48:03.409788  543705 memory.go:184] no items to output this cycle
I0321 01:48:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 01:48:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:48:13.409783  543705 memory.go:191] Add success.
I0321 01:48:13.409803  543705 cpu.go:282] Add success.
W0321 01:48:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:48:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:48:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:48:13.420076  543705 net.go:648] Add success.
I0321 01:48:13.422739  543705 net.go:770] primary dev: ETH0
I0321 01:48:13.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:48:13.422768  543705 net.go:698] Add success.
I0321 01:48:13.743551  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cdd90b74-292f-4e5a-8125-c278efb2e7d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:48:13.743596  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:48:14.454720  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:48:14.454851  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:48:14.454916  543705 disk_worker.go:708] disk space is not compliant
W0321 01:48:14.454919  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:48:14.456272  543705 disk_worker.go:494] system disk:vda1
I0321 01:48:14.456332  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:48:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:48:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:48:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:48:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:48:23.409786  543705 memory.go:184] no items to output this cycle
I0321 01:48:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 01:48:27.161687  543705 disk_info.go:125] begin check local disk info of client
I0321 01:48:27.164264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:48:27.164271  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad800 0xc0003ad840]
E0321 01:48:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:48:33.409773  543705 memory.go:184] no items to output this cycle
I0321 01:48:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 01:48:38.691157  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:48:38.691164  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:48:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:48:43.410694  543705 memory.go:191] Add success.
I0321 01:48:43.409809  543705 cpu.go:282] Add success.
I0321 01:48:43.420200  543705 net.go:770] primary dev: ETH0
I0321 01:48:43.420212  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:48:43.420224  543705 net.go:698] Add success.
I0321 01:48:43.420590  543705 net.go:648] Add success.
I0321 01:48:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:48:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:48:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:48:53.409803  543705 memory.go:184] no items to output this cycle
I0321 01:48:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 01:49:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:49:03.409769  543705 memory.go:184] no items to output this cycle
I0321 01:49:03.409912  543705 cpu.go:275] no items to output this cycle
E0321 01:49:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:49:13.409811  543705 memory.go:191] Add success.
I0321 01:49:13.409825  543705 cpu.go:282] Add success.
W0321 01:49:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:49:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:49:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:49:13.420313  543705 net.go:648] Add success.
I0321 01:49:13.422928  543705 net.go:770] primary dev: ETH0
I0321 01:49:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:49:13.422953  543705 net.go:698] Add success.
I0321 01:49:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:49:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:49:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0321 01:49:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:49:14.456650  543705 disk_worker.go:494] system disk:vda1
I0321 01:49:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:49:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:49:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:49:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:49:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:49:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:49:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:49:23.409773  543705 memory.go:184] no items to output this cycle
I0321 01:49:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 01:49:27.165675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:49:27.168157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:49:27.168163  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad3c0 0xc0003ad400]
E0321 01:49:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:49:33.409789  543705 memory.go:184] no items to output this cycle
I0321 01:49:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 01:49:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:49:43.409787  543705 memory.go:191] Add success.
I0321 01:49:43.409789  543705 cpu.go:282] Add success.
I0321 01:49:43.419860  543705 net.go:648] Add success.
I0321 01:49:43.422693  543705 net.go:770] primary dev: ETH0
I0321 01:49:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:49:43.422718  543705 net.go:698] Add success.
I0321 01:49:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:49:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:49:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:49:53.409791  543705 memory.go:184] no items to output this cycle
I0321 01:49:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 01:50:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:50:03.409907  543705 memory.go:184] no items to output this cycle
I0321 01:50:03.409909  543705 cpu.go:275] no items to output this cycle
E0321 01:50:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:50:13.409786  543705 memory.go:191] Add success.
W0321 01:50:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 01:50:13.409815  543705 cpu.go:282] Add success.
W0321 01:50:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:50:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:50:13.420202  543705 net.go:648] Add success.
I0321 01:50:13.422707  543705 net.go:770] primary dev: ETH0
I0321 01:50:13.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:50:13.422732  543705 net.go:698] Add success.
I0321 01:50:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:50:14.455225  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:50:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0321 01:50:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:50:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 01:50:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:50:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:50:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:50:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:50:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:50:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:50:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:50:23.409808  543705 memory.go:184] no items to output this cycle
I0321 01:50:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 01:50:27.169675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:50:27.172279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:50:27.172286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ac900 0xc0003ac940]
E0321 01:50:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:50:33.409800  543705 memory.go:184] no items to output this cycle
I0321 01:50:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 01:50:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:50:43.409780  543705 memory.go:191] Add success.
I0321 01:50:43.409799  543705 cpu.go:282] Add success.
I0321 01:50:43.419882  543705 net.go:648] Add success.
I0321 01:50:43.422370  543705 net.go:770] primary dev: ETH0
I0321 01:50:43.422384  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:50:43.422399  543705 net.go:698] Add success.
I0321 01:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:50:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:50:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:50:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:50:53.409784  543705 cpu.go:275] no items to output this cycle
I0321 01:50:53.409786  543705 memory.go:184] no items to output this cycle
E0321 01:51:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:51:03.409776  543705 memory.go:184] no items to output this cycle
I0321 01:51:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 01:51:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:51:13.409826  543705 memory.go:191] Add success.
I0321 01:51:13.409833  543705 cpu.go:282] Add success.
W0321 01:51:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:51:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:51:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:51:13.420130  543705 net.go:648] Add success.
I0321 01:51:13.422765  543705 net.go:770] primary dev: ETH0
I0321 01:51:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:51:13.422794  543705 net.go:698] Add success.
I0321 01:51:13.468391  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ff9a5cb0-0e0b-4a03-bd8d-edf6f3240b4a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:51:13.468423  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:51:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:51:14.455232  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:51:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0321 01:51:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:51:14.456807  543705 disk_worker.go:494] system disk:vda1
I0321 01:51:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:51:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:51:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:51:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:51:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:51:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:51:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:51:23.409778  543705 memory.go:184] no items to output this cycle
I0321 01:51:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 01:51:27.173685  543705 disk_info.go:125] begin check local disk info of client
I0321 01:51:27.176175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:51:27.176182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ace40 0xc0003ace80]
E0321 01:51:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:51:33.409818  543705 memory.go:184] no items to output this cycle
I0321 01:51:33.409828  543705 cpu.go:275] no items to output this cycle
I0321 01:51:38.691303  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:51:38.691310  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:51:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:51:43.410901  543705 memory.go:191] Add success.
I0321 01:51:43.409830  543705 cpu.go:282] Add success.
I0321 01:51:43.420594  543705 net.go:648] Add success.
I0321 01:51:43.423142  543705 net.go:770] primary dev: ETH0
I0321 01:51:43.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:51:43.423168  543705 net.go:698] Add success.
I0321 01:51:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:51:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:51:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:51:53.410353  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:51:53.410371  543705 memory.go:184] no items to output this cycle
I0321 01:51:53.410403  543705 cpu.go:275] no items to output this cycle
E0321 01:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:52:03.409783  543705 memory.go:184] no items to output this cycle
I0321 01:52:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 01:52:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:52:13.409833  543705 memory.go:191] Add success.
I0321 01:52:13.409840  543705 cpu.go:282] Add success.
W0321 01:52:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:52:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:52:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:52:13.420289  543705 net.go:648] Add success.
I0321 01:52:13.423540  543705 net.go:770] primary dev: ETH0
I0321 01:52:13.423553  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:52:13.423565  543705 net.go:698] Add success.
W0321 01:52:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:52:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 01:52:14.455217  543705 disk_worker.go:728] disk inode is not compliant
E0321 01:52:14.456074  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:52:14.456084  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:52:14.456090  543705 custom_config.go:64] query custom config with name: gpu
I0321 01:52:14.456664  543705 disk_worker.go:494] system disk:vda1
I0321 01:52:14.456701  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:52:15.456865  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:52:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:52:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:52:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:52:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:52:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:52:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:52:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:52:23.409812  543705 memory.go:184] no items to output this cycle
I0321 01:52:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 01:52:27.177677  543705 disk_info.go:125] begin check local disk info of client
I0321 01:52:27.180221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:52:27.180227  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056d280 0xc00056d2c0]
E0321 01:52:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:52:33.409778  543705 memory.go:184] no items to output this cycle
I0321 01:52:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 01:52:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:52:43.409821  543705 memory.go:191] Add success.
I0321 01:52:43.409825  543705 cpu.go:282] Add success.
I0321 01:52:43.419906  543705 net.go:648] Add success.
I0321 01:52:43.422899  543705 net.go:770] primary dev: ETH0
I0321 01:52:43.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:52:43.422928  543705 net.go:698] Add success.
I0321 01:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:52:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:52:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:52:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:52:53.409780  543705 memory.go:184] no items to output this cycle
I0321 01:52:53.409797  543705 cpu.go:275] no items to output this cycle
I0321 01:53:03.409884  543705 cpu.go:275] no items to output this cycle
E0321 01:53:03.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:53:03.409903  543705 memory.go:184] no items to output this cycle
E0321 01:53:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:53:13.409811  543705 memory.go:191] Add success.
I0321 01:53:13.409814  543705 cpu.go:282] Add success.
W0321 01:53:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:53:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:53:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:53:13.420147  543705 net.go:648] Add success.
I0321 01:53:13.422850  543705 net.go:770] primary dev: ETH0
I0321 01:53:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:53:13.422875  543705 net.go:698] Add success.
I0321 01:53:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:53:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:53:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0321 01:53:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:53:14.456644  543705 disk_worker.go:494] system disk:vda1
I0321 01:53:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:53:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:53:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:53:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:53:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:53:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:53:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:53:23.409804  543705 memory.go:184] no items to output this cycle
I0321 01:53:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 01:53:27.181673  543705 disk_info.go:125] begin check local disk info of client
I0321 01:53:27.184173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:53:27.184180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faf80 0xc0001fafc0]
E0321 01:53:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:53:33.409765  543705 memory.go:184] no items to output this cycle
I0321 01:53:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 01:53:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:53:43.409822  543705 memory.go:191] Add success.
I0321 01:53:43.409830  543705 cpu.go:282] Add success.
I0321 01:53:43.419989  543705 net.go:648] Add success.
I0321 01:53:43.422910  543705 net.go:770] primary dev: ETH0
I0321 01:53:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:53:43.422936  543705 net.go:698] Add success.
I0321 01:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:53:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:53:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:53:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:53:53.409780  543705 memory.go:184] no items to output this cycle
I0321 01:53:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 01:54:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:54:03.409763  543705 memory.go:184] no items to output this cycle
I0321 01:54:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 01:54:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:54:13.409791  543705 memory.go:191] Add success.
I0321 01:54:13.409809  543705 cpu.go:282] Add success.
W0321 01:54:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:54:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:54:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:54:13.420149  543705 net.go:648] Add success.
I0321 01:54:13.423286  543705 net.go:770] primary dev: ETH0
I0321 01:54:13.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:54:13.423315  543705 net.go:698] Add success.
I0321 01:54:13.468461  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fa4b2332-f39e-4e1f-9481-ce157e43393d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:54:13.468494  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 01:54:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:54:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:54:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 01:54:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:54:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 01:54:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:54:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:54:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:54:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:54:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:54:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:54:23.409788  543705 memory.go:184] no items to output this cycle
I0321 01:54:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 01:54:27.185674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:54:27.188451  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:54:27.188458  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8d80 0xc0003b8dc0]
E0321 01:54:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:54:33.409766  543705 memory.go:184] no items to output this cycle
I0321 01:54:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 01:54:38.691445  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:54:38.691452  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:54:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:54:43.410670  543705 memory.go:191] Add success.
I0321 01:54:43.409796  543705 cpu.go:282] Add success.
I0321 01:54:43.420347  543705 net.go:648] Add success.
I0321 01:54:43.423010  543705 net.go:770] primary dev: ETH0
I0321 01:54:43.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:54:43.423037  543705 net.go:698] Add success.
I0321 01:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:54:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:54:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:54:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:54:53.409759  543705 memory.go:184] no items to output this cycle
I0321 01:54:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 01:55:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:55:03.409778  543705 memory.go:184] no items to output this cycle
I0321 01:55:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 01:55:13.409987  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:55:13.410011  543705 memory.go:191] Add success.
W0321 01:55:13.410039  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:55:13.410054  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:55:13.410057  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:55:13.409987  543705 cpu.go:282] Add success.
I0321 01:55:13.419704  543705 net.go:648] Add success.
I0321 01:55:13.422709  543705 net.go:770] primary dev: ETH0
I0321 01:55:13.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:55:13.422733  543705 net.go:698] Add success.
I0321 01:55:14.455004  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:55:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:55:14.455242  543705 disk_worker.go:708] disk space is not compliant
W0321 01:55:14.455245  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:55:14.456661  543705 disk_worker.go:494] system disk:vda1
I0321 01:55:14.456694  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:55:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:55:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:55:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:55:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:55:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:55:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:55:23.409791  543705 memory.go:184] no items to output this cycle
I0321 01:55:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 01:55:27.189684  543705 disk_info.go:125] begin check local disk info of client
I0321 01:55:27.192250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:55:27.192256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faac0 0xc0001fab00]
E0321 01:55:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:55:33.409773  543705 memory.go:184] no items to output this cycle
I0321 01:55:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 01:55:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:55:43.409817  543705 memory.go:191] Add success.
I0321 01:55:43.409818  543705 cpu.go:282] Add success.
I0321 01:55:43.419987  543705 net.go:648] Add success.
I0321 01:55:43.422665  543705 net.go:770] primary dev: ETH0
I0321 01:55:43.422682  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:55:43.422697  543705 net.go:698] Add success.
I0321 01:55:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:55:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:55:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:55:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:55:53.409767  543705 memory.go:184] no items to output this cycle
I0321 01:55:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 01:56:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:56:03.409768  543705 memory.go:184] no items to output this cycle
I0321 01:56:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 01:56:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:56:13.409793  543705 memory.go:191] Add success.
I0321 01:56:13.409798  543705 cpu.go:282] Add success.
W0321 01:56:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:56:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:56:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:56:13.419720  543705 net.go:648] Add success.
I0321 01:56:13.422294  543705 net.go:770] primary dev: ETH0
I0321 01:56:13.422305  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:56:13.422317  543705 net.go:698] Add success.
I0321 01:56:14.455013  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:56:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:56:14.455315  543705 disk_worker.go:708] disk space is not compliant
W0321 01:56:14.455320  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:56:14.457160  543705 disk_worker.go:494] system disk:vda1
I0321 01:56:14.457192  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:56:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:56:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:56:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:56:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:56:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:56:23.409789  543705 memory.go:184] no items to output this cycle
I0321 01:56:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 01:56:27.193674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:56:27.196234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:56:27.196241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab040 0xc0001ab080]
E0321 01:56:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:56:33.409773  543705 memory.go:184] no items to output this cycle
I0321 01:56:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 01:56:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:56:43.409784  543705 memory.go:191] Add success.
I0321 01:56:43.409784  543705 cpu.go:282] Add success.
I0321 01:56:43.419876  543705 net.go:648] Add success.
I0321 01:56:43.423056  543705 net.go:770] primary dev: ETH0
I0321 01:56:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:56:43.423085  543705 net.go:698] Add success.
I0321 01:56:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:56:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:56:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:56:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:56:53.409799  543705 memory.go:184] no items to output this cycle
I0321 01:56:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 01:57:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:57:03.409800  543705 memory.go:184] no items to output this cycle
I0321 01:57:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 01:57:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:57:13.409874  543705 memory.go:191] Add success.
W0321 01:57:13.409903  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:57:13.409917  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:57:13.409920  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:57:13.409942  543705 cpu.go:282] Add success.
I0321 01:57:13.419721  543705 net.go:648] Add success.
I0321 01:57:13.422814  543705 net.go:770] primary dev: ETH0
I0321 01:57:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:57:13.422838  543705 net.go:698] Add success.
I0321 01:57:13.429444  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 01:57:13.453624  543705 event_worker.go:152] Polling the log file for events...
I0321 01:57:13.469475  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a62898ce-8410-43d8-9c59-41ee74b04dc2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 01:57:13.469507  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 01:57:14.455335  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:57:14.455353  543705 disk_worker.go:708] disk space is not compliant
W0321 01:57:14.455358  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:57:14.458199  543705 disk_worker.go:494] system disk:vda1
I0321 01:57:14.458245  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 01:57:14.458530  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 01:57:14.458539  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 01:57:14.458545  543705 custom_config.go:64] query custom config with name: gpu
E0321 01:57:15.457005  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 01:57:15.457019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:57:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 01:57:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 01:57:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:57:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:57:16.472535  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:57:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:57:23.409770  543705 memory.go:184] no items to output this cycle
I0321 01:57:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 01:57:27.197676  543705 disk_info.go:125] begin check local disk info of client
I0321 01:57:27.200449  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:57:27.200455  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0321 01:57:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:57:33.409785  543705 memory.go:184] no items to output this cycle
I0321 01:57:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 01:57:38.691593  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 01:57:38.691599  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 01:57:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:57:43.410727  543705 memory.go:191] Add success.
I0321 01:57:43.409798  543705 cpu.go:282] Add success.
I0321 01:57:43.420466  543705 net.go:648] Add success.
I0321 01:57:43.423714  543705 net.go:770] primary dev: ETH0
I0321 01:57:43.423727  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:57:43.423740  543705 net.go:698] Add success.
I0321 01:57:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:57:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:57:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:57:53.409813  543705 memory.go:184] no items to output this cycle
I0321 01:57:53.409824  543705 cpu.go:275] no items to output this cycle
E0321 01:58:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:58:03.409879  543705 cpu.go:275] no items to output this cycle
I0321 01:58:03.409891  543705 memory.go:184] no items to output this cycle
E0321 01:58:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:58:13.409798  543705 memory.go:191] Add success.
I0321 01:58:13.409815  543705 cpu.go:282] Add success.
W0321 01:58:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:58:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:58:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:58:13.420122  543705 net.go:648] Add success.
I0321 01:58:13.423244  543705 net.go:770] primary dev: ETH0
I0321 01:58:13.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:58:13.423269  543705 net.go:698] Add success.
I0321 01:58:14.455118  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:58:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:58:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 01:58:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:58:14.456524  543705 disk_worker.go:494] system disk:vda1
I0321 01:58:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:58:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:58:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:58:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:58:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:58:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:58:23.409798  543705 memory.go:184] no items to output this cycle
I0321 01:58:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 01:58:27.201674  543705 disk_info.go:125] begin check local disk info of client
I0321 01:58:27.204201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:58:27.204208  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d6080 0xc0004d60c0]
E0321 01:58:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:58:33.409802  543705 memory.go:184] no items to output this cycle
I0321 01:58:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 01:58:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:58:43.409797  543705 memory.go:191] Add success.
I0321 01:58:43.409802  543705 cpu.go:282] Add success.
I0321 01:58:43.419875  543705 net.go:648] Add success.
I0321 01:58:43.422538  543705 net.go:770] primary dev: ETH0
I0321 01:58:43.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:58:43.422561  543705 net.go:698] Add success.
I0321 01:58:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:58:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:58:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:58:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:58:53.409799  543705 memory.go:184] no items to output this cycle
I0321 01:58:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 01:59:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:59:03.409807  543705 memory.go:184] no items to output this cycle
I0321 01:59:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 01:59:13.409955  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:59:13.409963  543705 cpu.go:282] Add success.
I0321 01:59:13.409983  543705 memory.go:191] Add success.
W0321 01:59:13.410015  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 01:59:13.410038  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 01:59:13.410043  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 01:59:13.419734  543705 net.go:648] Add success.
I0321 01:59:13.422628  543705 net.go:770] primary dev: ETH0
I0321 01:59:13.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:59:13.422661  543705 net.go:698] Add success.
I0321 01:59:14.453939  543705 custom_config.go:64] query custom config with name: gpu
W0321 01:59:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 01:59:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 01:59:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 01:59:14.458027  543705 disk_worker.go:494] system disk:vda1
I0321 01:59:14.458059  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 01:59:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 01:59:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:59:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:59:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 01:59:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0321 01:59:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:59:23.409797  543705 memory.go:184] no items to output this cycle
I0321 01:59:23.409835  543705 cpu.go:275] no items to output this cycle
I0321 01:59:27.205675  543705 disk_info.go:125] begin check local disk info of client
I0321 01:59:27.208165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 01:59:27.208172  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056d640 0xc00056d680]
E0321 01:59:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:59:33.409780  543705 memory.go:184] no items to output this cycle
I0321 01:59:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 01:59:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:59:43.409785  543705 memory.go:191] Add success.
I0321 01:59:43.409819  543705 cpu.go:282] Add success.
I0321 01:59:43.419878  543705 net.go:648] Add success.
I0321 01:59:43.422657  543705 net.go:770] primary dev: ETH0
I0321 01:59:43.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0321 01:59:43.422686  543705 net.go:698] Add success.
I0321 01:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 01:59:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 01:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 01:59:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 01:59:53.409790  543705 memory.go:184] no items to output this cycle
I0321 01:59:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 02:00:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:00:03.409801  543705 memory.go:184] no items to output this cycle
I0321 02:00:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 02:00:13.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:00:13.409892  543705 memory.go:191] Add success.
W0321 02:00:13.409920  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:00:13.409936  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:00:13.409939  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:00:13.410032  543705 cpu.go:282] Add success.
I0321 02:00:13.419750  543705 net.go:648] Add success.
I0321 02:00:13.422452  543705 net.go:770] primary dev: ETH0
I0321 02:00:13.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:00:13.422479  543705 net.go:698] Add success.
I0321 02:00:13.556311  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"981959d5-9476-4e94-a32d-e9f90b5f7a4b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:00:13.556341  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:00:14.455109  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:00:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:00:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 02:00:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:00:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 02:00:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:00:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:00:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:00:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:00:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:00:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:00:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:00:23.409774  543705 memory.go:184] no items to output this cycle
I0321 02:00:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 02:00:27.209672  543705 disk_info.go:125] begin check local disk info of client
I0321 02:00:27.212227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:00:27.212233  543705 disk_info.go:196] parse disk info done, disk is : [0xc00056d940 0xc00056d980]
E0321 02:00:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:00:33.409797  543705 memory.go:184] no items to output this cycle
I0321 02:00:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 02:00:38.692155  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:00:38.692162  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:00:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:00:43.410694  543705 memory.go:191] Add success.
I0321 02:00:43.409811  543705 cpu.go:282] Add success.
I0321 02:00:43.420387  543705 net.go:648] Add success.
I0321 02:00:43.423032  543705 net.go:770] primary dev: ETH0
I0321 02:00:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:00:43.423059  543705 net.go:698] Add success.
I0321 02:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:00:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:00:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:00:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:00:53.409771  543705 memory.go:184] no items to output this cycle
I0321 02:00:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:01:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:01:03.409774  543705 memory.go:184] no items to output this cycle
I0321 02:01:03.409795  543705 cpu.go:275] no items to output this cycle
I0321 02:01:13.409962  543705 cpu.go:282] Add success.
E0321 02:01:13.409997  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:01:13.410018  543705 memory.go:191] Add success.
W0321 02:01:13.410050  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:01:13.410062  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:01:13.410066  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:01:13.419722  543705 net.go:648] Add success.
I0321 02:01:13.422506  543705 net.go:770] primary dev: ETH0
I0321 02:01:13.422520  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:01:13.422533  543705 net.go:698] Add success.
I0321 02:01:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:01:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:01:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 02:01:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:01:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 02:01:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:01:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:01:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:01:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:01:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:01:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:01:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:01:23.409800  543705 memory.go:184] no items to output this cycle
I0321 02:01:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 02:01:27.213679  543705 disk_info.go:125] begin check local disk info of client
I0321 02:01:27.216185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:01:27.216192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad300 0xc0003ad340]
E0321 02:01:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:01:33.409780  543705 memory.go:184] no items to output this cycle
I0321 02:01:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 02:01:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:01:43.409781  543705 memory.go:191] Add success.
I0321 02:01:43.409810  543705 cpu.go:282] Add success.
I0321 02:01:43.420035  543705 net.go:648] Add success.
I0321 02:01:43.422592  543705 net.go:770] primary dev: ETH0
I0321 02:01:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:01:43.422617  543705 net.go:698] Add success.
I0321 02:01:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:01:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:01:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:01:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:01:53.409767  543705 memory.go:184] no items to output this cycle
I0321 02:01:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 02:02:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:02:03.409782  543705 memory.go:184] no items to output this cycle
I0321 02:02:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 02:02:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:02:13.409794  543705 memory.go:191] Add success.
I0321 02:02:13.409796  543705 cpu.go:282] Add success.
W0321 02:02:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:02:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:02:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:02:13.420123  543705 net.go:648] Add success.
I0321 02:02:13.422824  543705 net.go:770] primary dev: ETH0
I0321 02:02:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:02:13.422848  543705 net.go:698] Add success.
W0321 02:02:14.455344  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:02:14.455358  543705 disk_worker.go:708] disk space is not compliant
W0321 02:02:14.455363  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:02:14.457492  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:02:14.457499  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:02:14.457503  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:02:14.457523  543705 disk_worker.go:494] system disk:vda1
I0321 02:02:14.457566  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:02:15.457028  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:02:15.457043  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:02:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:02:16.457987  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:02:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:02:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:02:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:02:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:02:23.409784  543705 memory.go:184] no items to output this cycle
I0321 02:02:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 02:02:27.217675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:02:27.220340  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:02:27.220349  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b600 0xc00007b640]
E0321 02:02:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:02:33.409774  543705 memory.go:184] no items to output this cycle
I0321 02:02:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 02:02:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:02:43.409792  543705 memory.go:191] Add success.
I0321 02:02:43.409792  543705 cpu.go:282] Add success.
I0321 02:02:43.419861  543705 net.go:648] Add success.
I0321 02:02:43.422569  543705 net.go:770] primary dev: ETH0
I0321 02:02:43.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:02:43.422597  543705 net.go:698] Add success.
I0321 02:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:02:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:02:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:02:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:02:53.409763  543705 memory.go:184] no items to output this cycle
I0321 02:02:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 02:03:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:03:03.409781  543705 memory.go:184] no items to output this cycle
I0321 02:03:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 02:03:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:03:13.409783  543705 memory.go:191] Add success.
I0321 02:03:13.409811  543705 cpu.go:282] Add success.
W0321 02:03:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:03:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:03:13.420094  543705 net.go:648] Add success.
I0321 02:03:13.422735  543705 net.go:770] primary dev: ETH0
I0321 02:03:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:03:13.422764  543705 net.go:698] Add success.
I0321 02:03:13.469545  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"489421b6-9d36-47fb-9924-d67b29148d09","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:03:13.469580  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:03:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:03:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:03:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 02:03:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:03:14.456731  543705 disk_worker.go:494] system disk:vda1
I0321 02:03:14.456762  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:03:15.455638  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:03:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:03:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:03:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:03:16.472481  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:03:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:03:23.409781  543705 memory.go:184] no items to output this cycle
I0321 02:03:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 02:03:27.221676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:03:27.224458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:03:27.224464  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc100 0xc0002bc140]
E0321 02:03:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:03:33.409776  543705 memory.go:184] no items to output this cycle
I0321 02:03:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 02:03:38.692299  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:03:38.692305  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:03:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:03:43.410722  543705 memory.go:191] Add success.
I0321 02:03:43.409794  543705 cpu.go:282] Add success.
I0321 02:03:43.420294  543705 net.go:770] primary dev: ETH0
I0321 02:03:43.420309  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:03:43.420324  543705 net.go:698] Add success.
I0321 02:03:43.420679  543705 net.go:648] Add success.
I0321 02:03:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:03:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:03:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:03:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:03:53.409802  543705 memory.go:184] no items to output this cycle
I0321 02:03:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:04:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:04:03.409807  543705 memory.go:184] no items to output this cycle
I0321 02:04:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 02:04:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:04:13.409901  543705 memory.go:191] Add success.
W0321 02:04:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:04:13.409944  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:04:13.409949  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:04:13.409990  543705 cpu.go:282] Add success.
I0321 02:04:13.419754  543705 net.go:648] Add success.
I0321 02:04:13.422528  543705 net.go:770] primary dev: ETH0
I0321 02:04:13.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:04:13.422556  543705 net.go:698] Add success.
I0321 02:04:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:04:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:04:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 02:04:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:04:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 02:04:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:04:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:04:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:04:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:04:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:04:16.472488  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:04:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:04:23.409809  543705 memory.go:184] no items to output this cycle
I0321 02:04:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 02:04:27.225682  543705 disk_info.go:125] begin check local disk info of client
I0321 02:04:27.228251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:04:27.228258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aabc0 0xc0001aac00]
E0321 02:04:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:04:33.409804  543705 memory.go:184] no items to output this cycle
I0321 02:04:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 02:04:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:04:43.409778  543705 memory.go:191] Add success.
I0321 02:04:43.409804  543705 cpu.go:282] Add success.
I0321 02:04:43.420013  543705 net.go:648] Add success.
I0321 02:04:43.423009  543705 net.go:770] primary dev: ETH0
I0321 02:04:43.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:04:43.423036  543705 net.go:698] Add success.
I0321 02:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:04:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:04:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:04:53.409764  543705 memory.go:184] no items to output this cycle
I0321 02:04:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 02:05:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:05:03.409778  543705 memory.go:184] no items to output this cycle
I0321 02:05:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 02:05:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:05:13.409894  543705 memory.go:191] Add success.
W0321 02:05:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 02:05:13.409939  543705 cpu.go:282] Add success.
W0321 02:05:13.409949  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:05:13.409989  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:05:13.419716  543705 net.go:648] Add success.
I0321 02:05:13.422346  543705 net.go:770] primary dev: ETH0
I0321 02:05:13.422359  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:05:13.422371  543705 net.go:698] Add success.
I0321 02:05:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:05:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:05:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0321 02:05:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:05:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 02:05:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:05:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:05:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:05:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:05:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:05:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:05:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:05:23.409772  543705 memory.go:184] no items to output this cycle
I0321 02:05:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 02:05:27.229676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:05:27.232175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:05:27.232181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4580 0xc0003e45c0]
E0321 02:05:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:05:33.409779  543705 memory.go:184] no items to output this cycle
I0321 02:05:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 02:05:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:05:43.409813  543705 memory.go:191] Add success.
I0321 02:05:43.409819  543705 cpu.go:282] Add success.
I0321 02:05:43.419719  543705 net.go:770] primary dev: ETH0
I0321 02:05:43.419734  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:05:43.419747  543705 net.go:698] Add success.
I0321 02:05:43.420090  543705 net.go:648] Add success.
I0321 02:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:05:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:05:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:05:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:05:53.409772  543705 memory.go:184] no items to output this cycle
I0321 02:05:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 02:06:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:06:03.409809  543705 memory.go:184] no items to output this cycle
I0321 02:06:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 02:06:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:06:13.409868  543705 memory.go:191] Add success.
W0321 02:06:13.409900  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:06:13.409913  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:06:13.409916  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:06:13.409925  543705 cpu.go:282] Add success.
I0321 02:06:13.419711  543705 net.go:648] Add success.
I0321 02:06:13.422702  543705 net.go:770] primary dev: ETH0
I0321 02:06:13.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:06:13.422726  543705 net.go:698] Add success.
I0321 02:06:13.463974  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"34967508-a74f-4d8c-afad-3a5950389716","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:06:13.464008  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:06:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:06:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:06:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 02:06:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:06:14.456687  543705 disk_worker.go:494] system disk:vda1
I0321 02:06:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:06:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:06:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:06:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:06:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:06:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:06:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:06:23.409808  543705 memory.go:184] no items to output this cycle
I0321 02:06:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 02:06:27.233674  543705 disk_info.go:125] begin check local disk info of client
I0321 02:06:27.236471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:06:27.236478  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd640 0xc0002bd680]
E0321 02:06:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:06:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 02:06:33.409784  543705 memory.go:184] no items to output this cycle
I0321 02:06:38.693153  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:06:38.693162  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:06:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:06:43.410633  543705 memory.go:191] Add success.
I0321 02:06:43.409792  543705 cpu.go:282] Add success.
I0321 02:06:43.420405  543705 net.go:648] Add success.
I0321 02:06:43.422989  543705 net.go:770] primary dev: ETH0
I0321 02:06:43.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:06:43.423015  543705 net.go:698] Add success.
I0321 02:06:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:06:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:06:53.409790  543705 memory.go:184] no items to output this cycle
I0321 02:06:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 02:07:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:07:03.409783  543705 memory.go:184] no items to output this cycle
I0321 02:07:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:07:13.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:07:13.409903  543705 memory.go:191] Add success.
W0321 02:07:13.409932  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:07:13.409949  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:07:13.409948  543705 cpu.go:282] Add success.
I0321 02:07:13.409953  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:07:13.419708  543705 net.go:648] Add success.
I0321 02:07:13.422320  543705 net.go:770] primary dev: ETH0
I0321 02:07:13.422333  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:07:13.422344  543705 net.go:698] Add success.
I0321 02:07:13.452769  543705 event_worker.go:152] Polling the log file for events...
W0321 02:07:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:07:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 02:07:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:07:14.456782  543705 disk_worker.go:494] system disk:vda1
I0321 02:07:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:07:14.457083  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:07:14.457091  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:07:14.457095  543705 custom_config.go:64] query custom config with name: gpu
E0321 02:07:15.456986  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:07:15.457000  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:07:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:07:16.457986  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:07:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:07:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:07:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:07:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:07:23.409795  543705 memory.go:184] no items to output this cycle
I0321 02:07:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 02:07:27.237674  543705 disk_info.go:125] begin check local disk info of client
I0321 02:07:27.240166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:07:27.240173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd6c0 0xc0002bd700]
E0321 02:07:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:07:33.409767  543705 memory.go:184] no items to output this cycle
I0321 02:07:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 02:07:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:07:43.409790  543705 memory.go:191] Add success.
I0321 02:07:43.409806  543705 cpu.go:282] Add success.
I0321 02:07:43.419964  543705 net.go:648] Add success.
I0321 02:07:43.422744  543705 net.go:770] primary dev: ETH0
I0321 02:07:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:07:43.422773  543705 net.go:698] Add success.
I0321 02:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:07:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:07:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:07:53.409794  543705 memory.go:184] no items to output this cycle
I0321 02:07:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 02:08:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:08:03.409772  543705 memory.go:184] no items to output this cycle
I0321 02:08:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:08:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:08:13.409903  543705 memory.go:191] Add success.
I0321 02:08:13.409931  543705 cpu.go:282] Add success.
W0321 02:08:13.409940  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:08:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:08:13.409956  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:08:13.419722  543705 net.go:648] Add success.
I0321 02:08:13.422331  543705 net.go:770] primary dev: ETH0
I0321 02:08:13.422343  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:08:13.422354  543705 net.go:698] Add success.
I0321 02:08:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:08:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:08:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 02:08:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:08:14.456821  543705 disk_worker.go:494] system disk:vda1
I0321 02:08:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:08:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:08:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:08:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:08:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:08:16.472500  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:08:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:08:23.409789  543705 memory.go:184] no items to output this cycle
I0321 02:08:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 02:08:27.241677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:08:27.244168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:08:27.244174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053ae80 0xc00053aec0]
E0321 02:08:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:08:33.409792  543705 memory.go:184] no items to output this cycle
I0321 02:08:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 02:08:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:08:43.409816  543705 memory.go:191] Add success.
I0321 02:08:43.409816  543705 cpu.go:282] Add success.
I0321 02:08:43.420461  543705 net.go:648] Add success.
I0321 02:08:43.423074  543705 net.go:770] primary dev: ETH0
I0321 02:08:43.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:08:43.423099  543705 net.go:698] Add success.
I0321 02:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:08:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:08:53.409799  543705 memory.go:184] no items to output this cycle
I0321 02:08:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 02:09:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:09:03.409774  543705 memory.go:184] no items to output this cycle
I0321 02:09:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 02:09:13.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:09:13.409888  543705 memory.go:191] Add success.
W0321 02:09:13.409919  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 02:09:13.409928  543705 cpu.go:282] Add success.
W0321 02:09:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:09:13.409938  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:09:13.419709  543705 net.go:648] Add success.
I0321 02:09:13.422222  543705 net.go:770] primary dev: ETH0
I0321 02:09:13.422236  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:09:13.422247  543705 net.go:698] Add success.
I0321 02:09:13.468295  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"78e139b5-c408-4bda-977c-0f696e63791a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:09:13.468340  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:09:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:09:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0321 02:09:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:09:14.456765  543705 disk_worker.go:494] system disk:vda1
I0321 02:09:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:09:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:09:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:09:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:09:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:09:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:09:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:09:23.409787  543705 memory.go:184] no items to output this cycle
I0321 02:09:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 02:09:27.245673  543705 disk_info.go:125] begin check local disk info of client
I0321 02:09:27.248177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:09:27.248183  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053b640 0xc00053b680]
E0321 02:09:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:09:33.409767  543705 memory.go:184] no items to output this cycle
I0321 02:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 02:09:38.693740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:09:38.693747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:09:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:09:43.410568  543705 memory.go:191] Add success.
I0321 02:09:43.409813  543705 cpu.go:282] Add success.
I0321 02:09:43.420301  543705 net.go:648] Add success.
I0321 02:09:43.422838  543705 net.go:770] primary dev: ETH0
I0321 02:09:43.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:09:43.422865  543705 net.go:698] Add success.
I0321 02:09:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:09:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:09:53.409813  543705 memory.go:184] no items to output this cycle
I0321 02:09:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 02:10:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:10:03.409798  543705 memory.go:184] no items to output this cycle
I0321 02:10:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 02:10:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:10:13.409862  543705 memory.go:191] Add success.
W0321 02:10:13.409892  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:10:13.409904  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:10:13.409907  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:10:13.409926  543705 cpu.go:282] Add success.
I0321 02:10:13.419709  543705 net.go:648] Add success.
I0321 02:10:13.422293  543705 net.go:770] primary dev: ETH0
I0321 02:10:13.422308  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:10:13.422322  543705 net.go:698] Add success.
I0321 02:10:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:10:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:10:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 02:10:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:10:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 02:10:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:10:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:10:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:10:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:10:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:10:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:10:23.409783  543705 memory.go:184] no items to output this cycle
I0321 02:10:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 02:10:27.249677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:10:27.252219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:10:27.252225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5840 0xc0000c5880]
E0321 02:10:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:10:33.409778  543705 memory.go:184] no items to output this cycle
I0321 02:10:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 02:10:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:10:43.409787  543705 cpu.go:282] Add success.
I0321 02:10:43.409797  543705 memory.go:191] Add success.
I0321 02:10:43.420053  543705 net.go:648] Add success.
I0321 02:10:43.422786  543705 net.go:770] primary dev: ETH0
I0321 02:10:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:10:43.422812  543705 net.go:698] Add success.
I0321 02:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:10:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:10:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:10:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:10:53.409769  543705 memory.go:184] no items to output this cycle
I0321 02:10:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 02:11:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:11:03.409783  543705 memory.go:184] no items to output this cycle
I0321 02:11:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 02:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:11:13.409788  543705 memory.go:191] Add success.
I0321 02:11:13.409802  543705 cpu.go:282] Add success.
W0321 02:11:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:11:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:11:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:11:13.419723  543705 net.go:648] Add success.
I0321 02:11:13.422699  543705 net.go:770] primary dev: ETH0
I0321 02:11:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:11:13.422724  543705 net.go:698] Add success.
I0321 02:11:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:11:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:11:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 02:11:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:11:14.456612  543705 disk_worker.go:494] system disk:vda1
I0321 02:11:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:11:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:11:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:11:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:11:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:11:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:11:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:11:23.409798  543705 memory.go:184] no items to output this cycle
I0321 02:11:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 02:11:27.253672  543705 disk_info.go:125] begin check local disk info of client
I0321 02:11:27.256158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:11:27.256165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e80 0xc0000c5ec0]
E0321 02:11:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:11:33.409773  543705 memory.go:184] no items to output this cycle
I0321 02:11:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 02:11:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:11:43.409787  543705 cpu.go:282] Add success.
I0321 02:11:43.409792  543705 memory.go:191] Add success.
I0321 02:11:43.419858  543705 net.go:648] Add success.
I0321 02:11:43.422559  543705 net.go:770] primary dev: ETH0
I0321 02:11:43.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:11:43.422584  543705 net.go:698] Add success.
I0321 02:11:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:11:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:11:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:11:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:11:53.409783  543705 memory.go:184] no items to output this cycle
I0321 02:11:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 02:12:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:12:03.409795  543705 memory.go:184] no items to output this cycle
I0321 02:12:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 02:12:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:12:13.409784  543705 memory.go:191] Add success.
I0321 02:12:13.409785  543705 cpu.go:282] Add success.
W0321 02:12:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:12:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:12:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:12:13.419728  543705 net.go:648] Add success.
I0321 02:12:13.422378  543705 net.go:770] primary dev: ETH0
I0321 02:12:13.422392  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:12:13.422402  543705 net.go:698] Add success.
I0321 02:12:13.469327  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2e2fd30-22a0-48ef-9a10-10c76d443c54","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:12:13.469360  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 02:12:14.455252  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:12:14.455270  543705 disk_worker.go:708] disk space is not compliant
W0321 02:12:14.455275  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:12:14.456230  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:12:14.456242  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:12:14.456248  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:12:14.456903  543705 disk_worker.go:494] system disk:vda1
I0321 02:12:14.456954  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:12:15.457143  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:12:15.457161  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 02:12:16.458036  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:12:16.458036  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:12:16.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:12:16.458114  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:12:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:12:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:12:23.409791  543705 memory.go:184] no items to output this cycle
I0321 02:12:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 02:12:27.257726  543705 disk_info.go:125] begin check local disk info of client
I0321 02:12:27.260269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:12:27.260275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb5c0 0xc0001fb600]
E0321 02:12:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:12:33.409795  543705 memory.go:184] no items to output this cycle
I0321 02:12:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 02:12:38.693891  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:12:38.693898  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:12:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:12:43.410636  543705 memory.go:191] Add success.
I0321 02:12:43.409822  543705 cpu.go:282] Add success.
I0321 02:12:43.420323  543705 net.go:648] Add success.
I0321 02:12:43.422836  543705 net.go:770] primary dev: ETH0
I0321 02:12:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:12:43.422862  543705 net.go:698] Add success.
I0321 02:12:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:12:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:12:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:12:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:12:53.409801  543705 memory.go:184] no items to output this cycle
I0321 02:12:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 02:13:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:13:03.409798  543705 memory.go:184] no items to output this cycle
I0321 02:13:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 02:13:13.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:13:13.409948  543705 memory.go:191] Add success.
I0321 02:13:13.409973  543705 cpu.go:282] Add success.
W0321 02:13:13.409977  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:13:13.409989  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:13:13.409992  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:13:13.419734  543705 net.go:648] Add success.
I0321 02:13:13.422446  543705 net.go:770] primary dev: ETH0
I0321 02:13:13.422460  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:13:13.422474  543705 net.go:698] Add success.
I0321 02:13:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:13:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:13:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 02:13:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:13:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 02:13:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:13:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:13:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:13:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:13:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:13:16.472497  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:13:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:13:23.409797  543705 memory.go:184] no items to output this cycle
I0321 02:13:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 02:13:27.261677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:13:27.264205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:13:27.264212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bce40 0xc0002bce80]
E0321 02:13:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:13:33.409797  543705 memory.go:184] no items to output this cycle
I0321 02:13:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 02:13:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:13:43.409831  543705 memory.go:191] Add success.
I0321 02:13:43.409832  543705 cpu.go:282] Add success.
I0321 02:13:43.419890  543705 net.go:648] Add success.
I0321 02:13:43.422368  543705 net.go:770] primary dev: ETH0
I0321 02:13:43.422384  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:13:43.422399  543705 net.go:698] Add success.
I0321 02:13:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:13:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:13:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:13:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:13:53.409812  543705 memory.go:184] no items to output this cycle
I0321 02:13:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 02:14:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:14:03.409764  543705 memory.go:184] no items to output this cycle
I0321 02:14:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 02:14:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:14:13.409785  543705 memory.go:191] Add success.
I0321 02:14:13.409788  543705 cpu.go:282] Add success.
W0321 02:14:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:14:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:14:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:14:13.420389  543705 net.go:648] Add success.
I0321 02:14:13.423294  543705 net.go:770] primary dev: ETH0
I0321 02:14:13.423308  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:14:13.423319  543705 net.go:698] Add success.
I0321 02:14:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:14:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:14:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 02:14:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:14:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 02:14:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:14:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:14:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:14:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:14:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:14:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:14:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:14:23.409781  543705 memory.go:184] no items to output this cycle
I0321 02:14:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 02:14:27.265678  543705 disk_info.go:125] begin check local disk info of client
I0321 02:14:27.268235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:14:27.268243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bcb80 0xc0002bcbc0]
E0321 02:14:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:14:33.409766  543705 memory.go:184] no items to output this cycle
I0321 02:14:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 02:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:14:43.409808  543705 memory.go:191] Add success.
I0321 02:14:43.409819  543705 cpu.go:282] Add success.
I0321 02:14:43.419704  543705 net.go:770] primary dev: ETH0
I0321 02:14:43.419719  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:14:43.419734  543705 net.go:698] Add success.
I0321 02:14:43.420095  543705 net.go:648] Add success.
I0321 02:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:14:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:14:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:14:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:14:53.409782  543705 memory.go:184] no items to output this cycle
I0321 02:14:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 02:15:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:15:03.409775  543705 memory.go:184] no items to output this cycle
I0321 02:15:03.409779  543705 cpu.go:275] no items to output this cycle
E0321 02:15:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:15:13.409900  543705 memory.go:191] Add success.
I0321 02:15:13.409926  543705 cpu.go:282] Add success.
W0321 02:15:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:15:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:15:13.409950  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:15:13.419736  543705 net.go:648] Add success.
I0321 02:15:13.422904  543705 net.go:770] primary dev: ETH0
I0321 02:15:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:15:13.422929  543705 net.go:698] Add success.
I0321 02:15:13.477090  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1ee8f3d0-7d11-4e3b-a8ea-552a6fa246a9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:15:13.477121  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:15:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:15:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:15:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 02:15:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:15:14.456792  543705 disk_worker.go:494] system disk:vda1
I0321 02:15:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:15:15.456001  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:15:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:15:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:15:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:15:16.472498  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:15:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:15:23.409778  543705 memory.go:184] no items to output this cycle
I0321 02:15:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 02:15:27.269675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:15:27.272189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:15:27.272195  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034b740 0xc00034b780]
E0321 02:15:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:15:33.409765  543705 memory.go:184] no items to output this cycle
I0321 02:15:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 02:15:38.695172  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:15:38.695179  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:15:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:15:43.410744  543705 memory.go:191] Add success.
I0321 02:15:43.409812  543705 cpu.go:282] Add success.
I0321 02:15:43.420441  543705 net.go:648] Add success.
I0321 02:15:43.423070  543705 net.go:770] primary dev: ETH0
I0321 02:15:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:15:43.423109  543705 net.go:698] Add success.
I0321 02:15:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:15:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:15:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:15:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:15:53.409788  543705 memory.go:184] no items to output this cycle
I0321 02:15:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 02:16:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:16:03.409764  543705 memory.go:184] no items to output this cycle
I0321 02:16:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 02:16:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:16:13.409791  543705 memory.go:191] Add success.
I0321 02:16:13.409793  543705 cpu.go:282] Add success.
W0321 02:16:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:16:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:16:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:16:13.420047  543705 net.go:648] Add success.
I0321 02:16:13.423075  543705 net.go:770] primary dev: ETH0
I0321 02:16:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:16:13.423111  543705 net.go:698] Add success.
I0321 02:16:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:16:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:16:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 02:16:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:16:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 02:16:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:16:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:16:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:16:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:16:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:16:16.472497  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:16:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:16:23.409794  543705 memory.go:184] no items to output this cycle
I0321 02:16:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 02:16:27.273674  543705 disk_info.go:125] begin check local disk info of client
I0321 02:16:27.276228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:16:27.276233  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a880 0xc00039a8c0]
E0321 02:16:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:16:33.409773  543705 memory.go:184] no items to output this cycle
I0321 02:16:33.409776  543705 cpu.go:275] no items to output this cycle
E0321 02:16:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:16:43.409787  543705 memory.go:191] Add success.
I0321 02:16:43.409791  543705 cpu.go:282] Add success.
I0321 02:16:43.420068  543705 net.go:648] Add success.
I0321 02:16:43.422954  543705 net.go:770] primary dev: ETH0
I0321 02:16:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:16:43.422985  543705 net.go:698] Add success.
I0321 02:16:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:16:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:16:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:16:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:16:53.409802  543705 memory.go:184] no items to output this cycle
I0321 02:16:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 02:17:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:17:03.409786  543705 memory.go:184] no items to output this cycle
I0321 02:17:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 02:17:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:17:13.409776  543705 memory.go:191] Add success.
W0321 02:17:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 02:17:13.409801  543705 cpu.go:282] Add success.
W0321 02:17:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:17:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:17:13.420134  543705 net.go:648] Add success.
I0321 02:17:13.422705  543705 net.go:770] primary dev: ETH0
I0321 02:17:13.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:17:13.422734  543705 net.go:698] Add success.
I0321 02:17:13.453287  543705 event_worker.go:152] Polling the log file for events...
W0321 02:17:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:17:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 02:17:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:17:14.456937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:17:14.456946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:17:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:17:14.456999  543705 disk_worker.go:494] system disk:vda1
I0321 02:17:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:17:15.456920  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:17:15.456931  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:17:16.457623  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:17:16.457623  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:17:16.457699  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:17:16.457723  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:17:16.472039  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:17:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:17:23.409782  543705 memory.go:184] no items to output this cycle
I0321 02:17:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 02:17:27.277677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:17:27.280446  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:17:27.280454  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001faf80 0xc0001fafc0]
E0321 02:17:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:17:33.409767  543705 memory.go:184] no items to output this cycle
I0321 02:17:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 02:17:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:17:43.409813  543705 memory.go:191] Add success.
I0321 02:17:43.409818  543705 cpu.go:282] Add success.
I0321 02:17:43.419920  543705 net.go:648] Add success.
I0321 02:17:43.422493  543705 net.go:770] primary dev: ETH0
I0321 02:17:43.422507  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:17:43.422518  543705 net.go:698] Add success.
I0321 02:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:17:53.409876  543705 cpu.go:275] no items to output this cycle
E0321 02:17:53.409949  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:17:53.409959  543705 memory.go:184] no items to output this cycle
E0321 02:18:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:18:03.409804  543705 memory.go:184] no items to output this cycle
I0321 02:18:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 02:18:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:18:13.409777  543705 memory.go:191] Add success.
I0321 02:18:13.409799  543705 cpu.go:282] Add success.
W0321 02:18:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:18:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:18:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:18:13.420071  543705 net.go:770] primary dev: ETH0
I0321 02:18:13.420084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:18:13.420096  543705 net.go:698] Add success.
I0321 02:18:13.420467  543705 net.go:648] Add success.
I0321 02:18:13.471548  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"28daf914-6b02-4b83-9956-107e454fa4ea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:18:13.471582  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:18:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:18:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 02:18:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:18:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 02:18:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:18:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:18:16.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:18:16.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:18:16.458106  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:18:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:18:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:18:23.409788  543705 memory.go:184] no items to output this cycle
I0321 02:18:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 02:18:27.281673  543705 disk_info.go:125] begin check local disk info of client
I0321 02:18:27.284322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:18:27.284328  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8b00 0xc0003e8b40]
E0321 02:18:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:18:33.409794  543705 memory.go:184] no items to output this cycle
I0321 02:18:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 02:18:38.695314  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:18:38.695320  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:18:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:18:43.410757  543705 memory.go:191] Add success.
I0321 02:18:43.409814  543705 cpu.go:282] Add success.
I0321 02:18:43.420546  543705 net.go:648] Add success.
I0321 02:18:43.423186  543705 net.go:770] primary dev: ETH0
I0321 02:18:43.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:18:43.423211  543705 net.go:698] Add success.
I0321 02:18:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:18:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:18:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:18:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 02:18:53.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:18:53.409832  543705 memory.go:184] no items to output this cycle
E0321 02:19:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:19:03.409785  543705 memory.go:184] no items to output this cycle
I0321 02:19:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 02:19:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:19:13.409793  543705 memory.go:191] Add success.
I0321 02:19:13.409797  543705 cpu.go:282] Add success.
W0321 02:19:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:19:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:19:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:19:13.420124  543705 net.go:648] Add success.
I0321 02:19:13.422583  543705 net.go:770] primary dev: ETH0
I0321 02:19:13.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:19:13.422609  543705 net.go:698] Add success.
I0321 02:19:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:19:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:19:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 02:19:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:19:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 02:19:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:19:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:19:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:19:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:19:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:19:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:19:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:19:23.409776  543705 memory.go:184] no items to output this cycle
I0321 02:19:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 02:19:27.285675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:19:27.288491  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:19:27.288497  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e540 0xc00037e580]
E0321 02:19:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:19:33.409782  543705 memory.go:184] no items to output this cycle
I0321 02:19:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 02:19:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:19:43.409789  543705 memory.go:191] Add success.
I0321 02:19:43.409797  543705 cpu.go:282] Add success.
I0321 02:19:43.419991  543705 net.go:648] Add success.
I0321 02:19:43.422831  543705 net.go:770] primary dev: ETH0
I0321 02:19:43.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:19:43.422861  543705 net.go:698] Add success.
I0321 02:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:19:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:19:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:19:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:19:53.409889  543705 cpu.go:275] no items to output this cycle
I0321 02:19:53.409892  543705 memory.go:184] no items to output this cycle
E0321 02:20:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:20:03.409779  543705 memory.go:184] no items to output this cycle
I0321 02:20:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 02:20:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:20:13.409814  543705 memory.go:191] Add success.
I0321 02:20:13.409818  543705 cpu.go:282] Add success.
W0321 02:20:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:20:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:20:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:20:13.420518  543705 net.go:648] Add success.
I0321 02:20:13.423322  543705 net.go:770] primary dev: ETH0
I0321 02:20:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:20:13.423351  543705 net.go:698] Add success.
I0321 02:20:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:20:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:20:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 02:20:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:20:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 02:20:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:20:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:20:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:20:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:20:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:20:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:20:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:20:23.409811  543705 memory.go:184] no items to output this cycle
I0321 02:20:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 02:20:27.289674  543705 disk_info.go:125] begin check local disk info of client
I0321 02:20:27.292462  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:20:27.292470  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374000 0xc000374040]
E0321 02:20:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:20:33.409794  543705 memory.go:184] no items to output this cycle
I0321 02:20:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 02:20:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:20:43.409790  543705 memory.go:191] Add success.
I0321 02:20:43.409809  543705 cpu.go:282] Add success.
I0321 02:20:43.419867  543705 net.go:648] Add success.
I0321 02:20:43.422326  543705 net.go:770] primary dev: ETH0
I0321 02:20:43.422342  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:20:43.422357  543705 net.go:698] Add success.
I0321 02:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:20:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:20:53.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:20:53.409887  543705 cpu.go:275] no items to output this cycle
I0321 02:20:53.409920  543705 memory.go:184] no items to output this cycle
E0321 02:21:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:21:03.409797  543705 memory.go:184] no items to output this cycle
I0321 02:21:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 02:21:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:21:13.409793  543705 memory.go:191] Add success.
I0321 02:21:13.409796  543705 cpu.go:282] Add success.
W0321 02:21:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:21:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:21:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:21:13.420062  543705 net.go:648] Add success.
I0321 02:21:13.422712  543705 net.go:770] primary dev: ETH0
I0321 02:21:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:21:13.422738  543705 net.go:698] Add success.
I0321 02:21:13.468649  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7188f136-c721-4fcd-ace9-e763dea26ef7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:21:13.468681  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:21:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:21:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:21:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 02:21:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:21:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 02:21:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:21:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:21:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:21:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:21:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:21:23.410197  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:21:23.410217  543705 memory.go:184] no items to output this cycle
I0321 02:21:23.410232  543705 cpu.go:275] no items to output this cycle
I0321 02:21:27.293680  543705 disk_info.go:125] begin check local disk info of client
I0321 02:21:27.296198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:21:27.296205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbe40 0xc0001fbe80]
E0321 02:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:21:33.409795  543705 memory.go:184] no items to output this cycle
I0321 02:21:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 02:21:38.695460  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:21:38.695467  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:21:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:21:43.410682  543705 memory.go:191] Add success.
I0321 02:21:43.409802  543705 cpu.go:282] Add success.
I0321 02:21:43.420458  543705 net.go:648] Add success.
I0321 02:21:43.423251  543705 net.go:770] primary dev: ETH0
I0321 02:21:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:21:43.423280  543705 net.go:698] Add success.
I0321 02:21:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:21:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:21:53.409782  543705 memory.go:184] no items to output this cycle
I0321 02:21:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 02:22:03.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:22:03.409870  543705 memory.go:184] no items to output this cycle
I0321 02:22:03.409940  543705 cpu.go:275] no items to output this cycle
E0321 02:22:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:22:13.409785  543705 memory.go:191] Add success.
W0321 02:22:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 02:22:13.409819  543705 cpu.go:282] Add success.
W0321 02:22:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:22:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:22:13.420102  543705 net.go:648] Add success.
I0321 02:22:13.422786  543705 net.go:770] primary dev: ETH0
I0321 02:22:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:22:13.422814  543705 net.go:698] Add success.
W0321 02:22:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:22:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 02:22:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:22:14.456910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:22:14.456920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:22:14.456926  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:22:14.456999  543705 disk_worker.go:494] system disk:vda1
I0321 02:22:14.457041  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:22:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:22:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:22:16.458100  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:22:16.458130  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:22:16.458161  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:22:16.458181  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:22:16.472553  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:22:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:22:23.409788  543705 memory.go:184] no items to output this cycle
I0321 02:22:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 02:22:27.297684  543705 disk_info.go:125] begin check local disk info of client
I0321 02:22:27.300209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:22:27.300217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb840 0xc0001fb880]
E0321 02:22:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:22:33.409799  543705 memory.go:184] no items to output this cycle
I0321 02:22:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:22:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:22:43.409825  543705 memory.go:191] Add success.
I0321 02:22:43.409829  543705 cpu.go:282] Add success.
I0321 02:22:43.419982  543705 net.go:648] Add success.
I0321 02:22:43.422537  543705 net.go:770] primary dev: ETH0
I0321 02:22:43.422551  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:22:43.422562  543705 net.go:698] Add success.
I0321 02:22:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:22:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:22:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:22:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:22:53.409787  543705 memory.go:184] no items to output this cycle
I0321 02:22:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 02:23:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:23:03.409811  543705 memory.go:184] no items to output this cycle
I0321 02:23:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 02:23:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:23:13.409788  543705 memory.go:191] Add success.
I0321 02:23:13.409807  543705 cpu.go:282] Add success.
W0321 02:23:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:23:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:23:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:23:13.420102  543705 net.go:648] Add success.
I0321 02:23:13.422880  543705 net.go:770] primary dev: ETH0
I0321 02:23:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:23:13.422904  543705 net.go:698] Add success.
I0321 02:23:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:23:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:23:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 02:23:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:23:14.456515  543705 disk_worker.go:494] system disk:vda1
I0321 02:23:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:23:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:23:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:23:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:23:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:23:16.472522  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:23:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:23:23.409791  543705 memory.go:184] no items to output this cycle
I0321 02:23:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 02:23:27.301676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:23:27.304166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:23:27.304172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa380 0xc0001fa3c0]
E0321 02:23:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:23:33.409790  543705 memory.go:184] no items to output this cycle
I0321 02:23:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 02:23:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:23:43.409795  543705 cpu.go:282] Add success.
I0321 02:23:43.409796  543705 memory.go:191] Add success.
I0321 02:23:43.419963  543705 net.go:648] Add success.
I0321 02:23:43.422793  543705 net.go:770] primary dev: ETH0
I0321 02:23:43.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:23:43.422832  543705 net.go:698] Add success.
I0321 02:23:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:23:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:23:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:23:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 02:23:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:23:53.409815  543705 memory.go:184] no items to output this cycle
E0321 02:24:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:24:03.409787  543705 memory.go:184] no items to output this cycle
I0321 02:24:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 02:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:24:13.409805  543705 memory.go:191] Add success.
I0321 02:24:13.409808  543705 cpu.go:282] Add success.
W0321 02:24:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:24:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:24:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:24:13.420048  543705 net.go:648] Add success.
I0321 02:24:13.422494  543705 net.go:770] primary dev: ETH0
I0321 02:24:13.422507  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:24:13.422519  543705 net.go:698] Add success.
I0321 02:24:13.468919  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"450b326d-07bc-49c6-8505-77846e886d91","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:24:13.468951  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:24:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:24:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:24:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 02:24:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:24:14.456692  543705 disk_worker.go:494] system disk:vda1
I0321 02:24:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:24:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:24:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:24:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:24:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:24:16.472508  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:24:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:24:23.409803  543705 memory.go:184] no items to output this cycle
I0321 02:24:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 02:24:27.305676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:24:27.308260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:24:27.308266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa140 0xc0001fa180]
E0321 02:24:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:24:33.409785  543705 memory.go:184] no items to output this cycle
I0321 02:24:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 02:24:38.695607  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:24:38.695613  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:24:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:24:43.410727  543705 memory.go:191] Add success.
I0321 02:24:43.409835  543705 cpu.go:282] Add success.
I0321 02:24:43.420416  543705 net.go:648] Add success.
I0321 02:24:43.423280  543705 net.go:770] primary dev: ETH0
I0321 02:24:43.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:24:43.423305  543705 net.go:698] Add success.
I0321 02:24:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:24:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:24:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:24:53.409807  543705 memory.go:184] no items to output this cycle
I0321 02:24:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 02:25:03.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:25:03.409883  543705 cpu.go:275] no items to output this cycle
I0321 02:25:03.409887  543705 memory.go:184] no items to output this cycle
E0321 02:25:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:25:13.409769  543705 memory.go:191] Add success.
W0321 02:25:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:25:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:25:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:25:13.409833  543705 cpu.go:282] Add success.
I0321 02:25:13.420041  543705 net.go:648] Add success.
I0321 02:25:13.423013  543705 net.go:770] primary dev: ETH0
I0321 02:25:13.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:25:13.423037  543705 net.go:698] Add success.
I0321 02:25:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:25:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:25:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 02:25:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:25:14.456770  543705 disk_worker.go:494] system disk:vda1
I0321 02:25:14.456811  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:25:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:25:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:25:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:25:16.472472  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:25:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:25:23.409780  543705 memory.go:184] no items to output this cycle
I0321 02:25:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 02:25:27.309676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:25:27.312180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:25:27.312186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fad00 0xc0001fad40]
E0321 02:25:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:25:33.409777  543705 memory.go:184] no items to output this cycle
I0321 02:25:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 02:25:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:25:43.409790  543705 memory.go:191] Add success.
I0321 02:25:43.409791  543705 cpu.go:282] Add success.
I0321 02:25:43.419950  543705 net.go:648] Add success.
I0321 02:25:43.422736  543705 net.go:770] primary dev: ETH0
I0321 02:25:43.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:25:43.422760  543705 net.go:698] Add success.
I0321 02:25:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:25:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:25:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:25:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:25:53.409774  543705 memory.go:184] no items to output this cycle
I0321 02:25:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 02:26:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:26:03.409772  543705 memory.go:184] no items to output this cycle
I0321 02:26:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 02:26:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:26:13.409812  543705 memory.go:191] Add success.
I0321 02:26:13.409820  543705 cpu.go:282] Add success.
W0321 02:26:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:26:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:26:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:26:13.420156  543705 net.go:648] Add success.
I0321 02:26:13.422880  543705 net.go:770] primary dev: ETH0
I0321 02:26:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:26:13.422915  543705 net.go:698] Add success.
I0321 02:26:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:26:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:26:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 02:26:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:26:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 02:26:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:26:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:26:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:26:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:26:16.472484  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:26:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:26:23.409779  543705 memory.go:184] no items to output this cycle
I0321 02:26:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 02:26:27.313678  543705 disk_info.go:125] begin check local disk info of client
I0321 02:26:27.316227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:26:27.316233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fbb80 0xc0001fbbc0]
E0321 02:26:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:26:33.409772  543705 memory.go:184] no items to output this cycle
I0321 02:26:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:26:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:26:43.409816  543705 memory.go:191] Add success.
I0321 02:26:43.409820  543705 cpu.go:282] Add success.
I0321 02:26:43.419969  543705 net.go:648] Add success.
I0321 02:26:43.422447  543705 net.go:770] primary dev: ETH0
I0321 02:26:43.422460  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:26:43.422474  543705 net.go:698] Add success.
I0321 02:26:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:26:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:26:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:26:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:26:53.409772  543705 memory.go:184] no items to output this cycle
I0321 02:26:53.409776  543705 cpu.go:275] no items to output this cycle
E0321 02:27:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:27:03.409770  543705 memory.go:184] no items to output this cycle
I0321 02:27:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:27:13.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:27:13.409877  543705 memory.go:191] Add success.
W0321 02:27:13.409916  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:27:13.409929  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:27:13.409937  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:27:13.409942  543705 cpu.go:282] Add success.
I0321 02:27:13.425366  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 02:27:13.425609  543705 net.go:648] Add success.
I0321 02:27:13.428295  543705 net.go:770] primary dev: ETH0
I0321 02:27:13.428307  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:27:13.428318  543705 net.go:698] Add success.
I0321 02:27:13.452785  543705 event_worker.go:152] Polling the log file for events...
I0321 02:27:13.463425  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44aa6401-8cf7-4f5e-a92b-c307a49fe7e4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:27:13.463456  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 02:27:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:27:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 02:27:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:27:14.456757  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:27:14.456766  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:27:14.456771  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:27:14.456814  543705 disk_worker.go:494] system disk:vda1
I0321 02:27:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:27:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:27:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:27:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:27:16.458129  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:27:16.458164  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:27:16.458185  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:27:16.472557  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:27:23.409797  543705 memory.go:184] no items to output this cycle
I0321 02:27:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 02:27:27.317679  543705 disk_info.go:125] begin check local disk info of client
I0321 02:27:27.320190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:27:27.320196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb040 0xc0001fb080]
E0321 02:27:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:27:33.409793  543705 memory.go:184] no items to output this cycle
I0321 02:27:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 02:27:38.695752  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:27:38.695759  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:27:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:27:43.410567  543705 memory.go:191] Add success.
I0321 02:27:43.409789  543705 cpu.go:282] Add success.
I0321 02:27:43.420359  543705 net.go:648] Add success.
I0321 02:27:43.422989  543705 net.go:770] primary dev: ETH0
I0321 02:27:43.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:27:43.423014  543705 net.go:698] Add success.
I0321 02:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:27:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:27:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:27:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:27:53.409771  543705 memory.go:184] no items to output this cycle
I0321 02:27:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 02:28:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:28:03.409777  543705 memory.go:184] no items to output this cycle
I0321 02:28:03.409880  543705 cpu.go:275] no items to output this cycle
E0321 02:28:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:28:13.409798  543705 memory.go:191] Add success.
I0321 02:28:13.409806  543705 cpu.go:282] Add success.
W0321 02:28:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:28:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:28:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:28:13.420154  543705 net.go:648] Add success.
I0321 02:28:13.422930  543705 net.go:770] primary dev: ETH0
I0321 02:28:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:28:13.422969  543705 net.go:698] Add success.
I0321 02:28:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:28:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:28:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 02:28:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:28:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 02:28:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:28:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:28:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:28:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:28:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:28:16.472512  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:28:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:28:23.409784  543705 memory.go:184] no items to output this cycle
I0321 02:28:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 02:28:27.321680  543705 disk_info.go:125] begin check local disk info of client
I0321 02:28:27.324233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:28:27.324246  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b1c0 0xc00007b200]
E0321 02:28:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:28:33.409793  543705 memory.go:184] no items to output this cycle
I0321 02:28:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 02:28:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:28:43.409786  543705 memory.go:191] Add success.
I0321 02:28:43.409800  543705 cpu.go:282] Add success.
I0321 02:28:43.419878  543705 net.go:648] Add success.
I0321 02:28:43.422523  543705 net.go:770] primary dev: ETH0
I0321 02:28:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:28:43.422548  543705 net.go:698] Add success.
I0321 02:28:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:28:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:28:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:28:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:28:53.409794  543705 memory.go:184] no items to output this cycle
I0321 02:28:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 02:29:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:29:03.409780  543705 memory.go:184] no items to output this cycle
I0321 02:29:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 02:29:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:29:13.409818  543705 memory.go:191] Add success.
I0321 02:29:13.409827  543705 cpu.go:282] Add success.
W0321 02:29:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:29:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:29:13.420189  543705 net.go:648] Add success.
I0321 02:29:13.422882  543705 net.go:770] primary dev: ETH0
I0321 02:29:13.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:29:13.422910  543705 net.go:698] Add success.
I0321 02:29:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:29:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:29:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 02:29:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:29:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 02:29:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:29:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:29:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:29:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:29:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:29:16.472576  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:29:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:29:23.409782  543705 memory.go:184] no items to output this cycle
I0321 02:29:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 02:29:27.325679  543705 disk_info.go:125] begin check local disk info of client
I0321 02:29:27.328180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:29:27.328186  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053bc40 0xc00053bc80]
E0321 02:29:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:29:33.409790  543705 memory.go:184] no items to output this cycle
I0321 02:29:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 02:29:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:29:43.409779  543705 memory.go:191] Add success.
I0321 02:29:43.409793  543705 cpu.go:282] Add success.
I0321 02:29:43.419971  543705 net.go:648] Add success.
I0321 02:29:43.420868  543705 net.go:770] primary dev: ETH0
I0321 02:29:43.420882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:29:43.420896  543705 net.go:698] Add success.
I0321 02:29:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:29:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:29:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:29:53.409772  543705 memory.go:184] no items to output this cycle
I0321 02:29:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 02:30:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:30:03.409768  543705 memory.go:184] no items to output this cycle
I0321 02:30:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:30:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:30:13.409897  543705 cpu.go:282] Add success.
I0321 02:30:13.409939  543705 memory.go:191] Add success.
W0321 02:30:13.409972  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:30:13.409985  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:30:13.409988  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:30:13.419705  543705 net.go:648] Add success.
I0321 02:30:13.422493  543705 net.go:770] primary dev: ETH0
I0321 02:30:13.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:30:13.422517  543705 net.go:698] Add success.
I0321 02:30:13.468805  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c2d434d-ca99-4cd0-8bab-9717e3660acf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:30:13.468839  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:30:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:30:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 02:30:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:30:14.456692  543705 disk_worker.go:494] system disk:vda1
I0321 02:30:14.456728  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:30:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:30:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:30:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:30:16.472547  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:30:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:30:23.409814  543705 memory.go:184] no items to output this cycle
I0321 02:30:23.409825  543705 cpu.go:275] no items to output this cycle
I0321 02:30:27.329674  543705 disk_info.go:125] begin check local disk info of client
I0321 02:30:27.332300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:30:27.332306  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053ac00 0xc00053ac40]
E0321 02:30:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:30:33.409776  543705 memory.go:184] no items to output this cycle
I0321 02:30:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 02:30:38.697189  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:30:38.697196  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:30:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:30:43.410735  543705 memory.go:191] Add success.
I0321 02:30:43.409811  543705 cpu.go:282] Add success.
I0321 02:30:43.420430  543705 net.go:648] Add success.
I0321 02:30:43.423002  543705 net.go:770] primary dev: ETH0
I0321 02:30:43.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:30:43.423028  543705 net.go:698] Add success.
I0321 02:30:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:30:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:30:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:30:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:30:53.409789  543705 memory.go:184] no items to output this cycle
I0321 02:30:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 02:31:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:31:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 02:31:03.409782  543705 memory.go:184] no items to output this cycle
E0321 02:31:13.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:31:13.409913  543705 cpu.go:282] Add success.
I0321 02:31:13.409924  543705 memory.go:191] Add success.
W0321 02:31:13.409957  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:31:13.409970  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:31:13.409976  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:31:13.419737  543705 net.go:648] Add success.
I0321 02:31:13.422930  543705 net.go:770] primary dev: ETH0
I0321 02:31:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:31:13.422957  543705 net.go:698] Add success.
I0321 02:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:31:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:31:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 02:31:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:31:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 02:31:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:31:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:31:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:31:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:31:16.472542  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:31:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:31:23.409789  543705 memory.go:184] no items to output this cycle
I0321 02:31:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 02:31:27.333676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:31:27.336161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:31:27.336167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd200 0xc0002bd240]
E0321 02:31:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:31:33.409781  543705 memory.go:184] no items to output this cycle
I0321 02:31:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 02:31:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:31:43.409796  543705 memory.go:191] Add success.
I0321 02:31:43.409797  543705 cpu.go:282] Add success.
I0321 02:31:43.419835  543705 net.go:648] Add success.
I0321 02:31:43.422318  543705 net.go:770] primary dev: ETH0
I0321 02:31:43.422330  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:31:43.422343  543705 net.go:698] Add success.
I0321 02:31:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:31:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:31:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:31:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:31:53.409786  543705 memory.go:184] no items to output this cycle
I0321 02:31:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 02:32:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:32:03.409801  543705 memory.go:184] no items to output this cycle
I0321 02:32:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:32:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:32:13.409898  543705 memory.go:191] Add success.
W0321 02:32:13.409929  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:32:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:32:13.409950  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:32:13.409977  543705 cpu.go:282] Add success.
I0321 02:32:13.419706  543705 net.go:648] Add success.
I0321 02:32:13.422399  543705 net.go:770] primary dev: ETH0
I0321 02:32:13.422417  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:32:13.422435  543705 net.go:698] Add success.
W0321 02:32:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:32:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 02:32:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:32:14.455913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:32:14.455922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:32:14.455928  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:32:14.456552  543705 disk_worker.go:494] system disk:vda1
I0321 02:32:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:32:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:32:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:32:16.458027  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:32:16.458027  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:32:16.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:32:16.458125  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:32:16.472535  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:32:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:32:23.409815  543705 memory.go:184] no items to output this cycle
I0321 02:32:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 02:32:27.337676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:32:27.340223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:32:27.340232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039bc00 0xc00039bc40]
E0321 02:32:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:32:33.409770  543705 memory.go:184] no items to output this cycle
I0321 02:32:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:32:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:32:43.409809  543705 memory.go:191] Add success.
I0321 02:32:43.409818  543705 cpu.go:282] Add success.
I0321 02:32:43.419954  543705 net.go:648] Add success.
I0321 02:32:43.422747  543705 net.go:770] primary dev: ETH0
I0321 02:32:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:32:43.422775  543705 net.go:698] Add success.
I0321 02:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:32:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:32:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:32:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:32:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 02:32:53.409785  543705 memory.go:184] no items to output this cycle
E0321 02:33:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:33:03.409911  543705 memory.go:184] no items to output this cycle
I0321 02:33:03.409913  543705 cpu.go:275] no items to output this cycle
E0321 02:33:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:33:13.409785  543705 memory.go:191] Add success.
I0321 02:33:13.409807  543705 cpu.go:282] Add success.
W0321 02:33:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:33:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:33:13.420115  543705 net.go:648] Add success.
I0321 02:33:13.423129  543705 net.go:770] primary dev: ETH0
I0321 02:33:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:33:13.423153  543705 net.go:698] Add success.
I0321 02:33:13.543974  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"11ab8216-d221-4c75-92ba-9252d23041cd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:33:13.544006  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:33:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:33:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:33:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 02:33:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:33:14.456537  543705 disk_worker.go:494] system disk:vda1
I0321 02:33:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:33:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:33:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:33:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:33:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:33:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:33:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:33:23.409779  543705 memory.go:184] no items to output this cycle
I0321 02:33:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 02:33:27.341676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:33:27.344198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:33:27.344205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf00 0xc0001aaf40]
E0321 02:33:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:33:33.409780  543705 memory.go:184] no items to output this cycle
I0321 02:33:33.409800  543705 cpu.go:275] no items to output this cycle
I0321 02:33:38.697732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:33:38.697738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:33:43.410773  543705 memory.go:191] Add success.
I0321 02:33:43.409792  543705 cpu.go:282] Add success.
I0321 02:33:43.420500  543705 net.go:648] Add success.
I0321 02:33:43.423679  543705 net.go:770] primary dev: ETH0
I0321 02:33:43.423694  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:33:43.423709  543705 net.go:698] Add success.
I0321 02:33:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:33:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:33:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:33:53.409894  543705 cpu.go:275] no items to output this cycle
E0321 02:33:53.409975  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:33:53.409988  543705 memory.go:184] no items to output this cycle
E0321 02:34:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:34:03.409783  543705 memory.go:184] no items to output this cycle
I0321 02:34:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 02:34:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:34:13.409804  543705 memory.go:191] Add success.
I0321 02:34:13.409811  543705 cpu.go:282] Add success.
W0321 02:34:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:34:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:34:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:34:13.420168  543705 net.go:648] Add success.
I0321 02:34:13.423313  543705 net.go:770] primary dev: ETH0
I0321 02:34:13.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:34:13.423340  543705 net.go:698] Add success.
I0321 02:34:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:34:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:34:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 02:34:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:34:14.456530  543705 disk_worker.go:494] system disk:vda1
I0321 02:34:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:34:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:34:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:34:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:34:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:34:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:34:23.409819  543705 memory.go:184] no items to output this cycle
I0321 02:34:23.409835  543705 cpu.go:275] no items to output this cycle
I0321 02:34:27.345677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:34:27.348245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:34:27.348251  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513540 0xc000513580]
E0321 02:34:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:34:33.409802  543705 memory.go:184] no items to output this cycle
I0321 02:34:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 02:34:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:34:43.409819  543705 memory.go:191] Add success.
I0321 02:34:43.409825  543705 cpu.go:282] Add success.
I0321 02:34:43.419904  543705 net.go:648] Add success.
I0321 02:34:43.423096  543705 net.go:770] primary dev: ETH0
I0321 02:34:43.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:34:43.423122  543705 net.go:698] Add success.
I0321 02:34:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:34:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:34:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:34:53.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:34:53.409890  543705 memory.go:184] no items to output this cycle
I0321 02:34:53.409967  543705 cpu.go:275] no items to output this cycle
E0321 02:35:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:35:03.409777  543705 memory.go:184] no items to output this cycle
I0321 02:35:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 02:35:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:35:13.409776  543705 memory.go:191] Add success.
W0321 02:35:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:35:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:35:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:35:13.409814  543705 cpu.go:282] Add success.
I0321 02:35:13.420221  543705 net.go:648] Add success.
I0321 02:35:13.422856  543705 net.go:770] primary dev: ETH0
I0321 02:35:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:35:13.422884  543705 net.go:698] Add success.
I0321 02:35:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:35:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:35:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 02:35:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:35:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 02:35:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:35:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:35:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:35:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:35:16.472528  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:35:23.409782  543705 memory.go:184] no items to output this cycle
I0321 02:35:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 02:35:27.349676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:35:27.352172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:35:27.352178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513540 0xc000513580]
E0321 02:35:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:35:33.409815  543705 memory.go:184] no items to output this cycle
I0321 02:35:33.409830  543705 cpu.go:275] no items to output this cycle
E0321 02:35:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:35:43.409782  543705 memory.go:191] Add success.
I0321 02:35:43.409806  543705 cpu.go:282] Add success.
I0321 02:35:43.419861  543705 net.go:648] Add success.
I0321 02:35:43.422336  543705 net.go:770] primary dev: ETH0
I0321 02:35:43.422351  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:35:43.422365  543705 net.go:698] Add success.
I0321 02:35:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:35:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:35:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:35:53.410687  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:35:53.410706  543705 memory.go:184] no items to output this cycle
I0321 02:35:53.410717  543705 cpu.go:275] no items to output this cycle
E0321 02:36:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:36:03.409787  543705 memory.go:184] no items to output this cycle
I0321 02:36:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 02:36:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:36:13.409795  543705 memory.go:191] Add success.
I0321 02:36:13.409802  543705 cpu.go:282] Add success.
W0321 02:36:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:36:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:36:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:36:13.420073  543705 net.go:648] Add success.
I0321 02:36:13.423090  543705 net.go:770] primary dev: ETH0
I0321 02:36:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:36:13.423115  543705 net.go:698] Add success.
I0321 02:36:13.470108  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0afc73eb-76c5-4dec-9bec-109d3d5cc455","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:36:13.470140  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:36:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:36:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:36:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 02:36:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:36:14.456521  543705 disk_worker.go:494] system disk:vda1
I0321 02:36:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:36:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:36:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:36:16.472387  543705 disk_local_worker.go:436] Get disk info: []
I0321 02:36:23.409814  543705 cpu.go:275] no items to output this cycle
E0321 02:36:23.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:36:23.409839  543705 memory.go:184] no items to output this cycle
I0321 02:36:27.353680  543705 disk_info.go:125] begin check local disk info of client
I0321 02:36:27.356209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:36:27.356217  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba80 0xc00007bac0]
E0321 02:36:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:36:33.409817  543705 memory.go:184] no items to output this cycle
I0321 02:36:33.409832  543705 cpu.go:275] no items to output this cycle
I0321 02:36:38.699210  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:36:38.699217  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:36:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:36:43.410620  543705 memory.go:191] Add success.
I0321 02:36:43.409814  543705 cpu.go:282] Add success.
I0321 02:36:43.420387  543705 net.go:648] Add success.
I0321 02:36:43.423071  543705 net.go:770] primary dev: ETH0
I0321 02:36:43.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:36:43.423097  543705 net.go:698] Add success.
I0321 02:36:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:36:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:36:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:36:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:36:53.409771  543705 memory.go:184] no items to output this cycle
I0321 02:36:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 02:37:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:37:03.409780  543705 memory.go:184] no items to output this cycle
I0321 02:37:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 02:37:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:37:13.409792  543705 memory.go:191] Add success.
I0321 02:37:13.409799  543705 cpu.go:282] Add success.
W0321 02:37:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:37:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:37:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:37:13.420451  543705 net.go:648] Add success.
I0321 02:37:13.423003  543705 net.go:770] primary dev: ETH0
I0321 02:37:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:37:13.423028  543705 net.go:698] Add success.
I0321 02:37:13.453596  543705 event_worker.go:152] Polling the log file for events...
W0321 02:37:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:37:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 02:37:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:37:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:37:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:37:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:37:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 02:37:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:37:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:37:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:37:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:37:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:37:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:37:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:37:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:37:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:37:23.409792  543705 memory.go:184] no items to output this cycle
I0321 02:37:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 02:37:27.357677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:37:27.360156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:37:27.360163  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cc300 0xc0004cc340]
E0321 02:37:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:37:33.409789  543705 memory.go:184] no items to output this cycle
I0321 02:37:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 02:37:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:37:43.409796  543705 memory.go:191] Add success.
I0321 02:37:43.409814  543705 cpu.go:282] Add success.
I0321 02:37:43.419987  543705 net.go:648] Add success.
I0321 02:37:43.422571  543705 net.go:770] primary dev: ETH0
I0321 02:37:43.422586  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:37:43.422600  543705 net.go:698] Add success.
I0321 02:37:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:37:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:37:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:37:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:37:53.409788  543705 cpu.go:275] no items to output this cycle
I0321 02:37:53.409791  543705 memory.go:184] no items to output this cycle
E0321 02:38:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:38:03.409775  543705 memory.go:184] no items to output this cycle
I0321 02:38:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:38:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:38:13.409792  543705 memory.go:191] Add success.
I0321 02:38:13.409805  543705 cpu.go:282] Add success.
W0321 02:38:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:38:13.412348  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:38:13.412352  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:38:13.420024  543705 net.go:648] Add success.
I0321 02:38:13.421729  543705 net.go:770] primary dev: ETH0
I0321 02:38:13.421753  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:38:13.421766  543705 net.go:698] Add success.
I0321 02:38:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:38:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:38:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 02:38:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:38:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 02:38:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:38:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:38:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:38:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:38:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:38:23.409782  543705 memory.go:184] no items to output this cycle
I0321 02:38:23.409845  543705 cpu.go:275] no items to output this cycle
I0321 02:38:27.361675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:38:27.364159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:38:27.364168  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053b940 0xc00053b980]
E0321 02:38:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:38:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 02:38:33.409797  543705 memory.go:184] no items to output this cycle
E0321 02:38:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:38:43.409790  543705 memory.go:191] Add success.
I0321 02:38:43.409813  543705 cpu.go:282] Add success.
I0321 02:38:43.419858  543705 net.go:648] Add success.
I0321 02:38:43.422870  543705 net.go:770] primary dev: ETH0
I0321 02:38:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:38:43.422895  543705 net.go:698] Add success.
I0321 02:38:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:38:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:38:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:38:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:38:53.409788  543705 memory.go:184] no items to output this cycle
I0321 02:38:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 02:39:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:39:03.409788  543705 memory.go:184] no items to output this cycle
I0321 02:39:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 02:39:13.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:39:13.409915  543705 memory.go:191] Add success.
I0321 02:39:13.409916  543705 cpu.go:282] Add success.
W0321 02:39:13.409959  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:39:13.409976  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:39:13.409981  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:39:13.419713  543705 net.go:648] Add success.
I0321 02:39:13.422446  543705 net.go:770] primary dev: ETH0
I0321 02:39:13.422461  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:39:13.422475  543705 net.go:698] Add success.
I0321 02:39:13.463830  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0a4c26d1-48d2-4796-9c2d-eb58fd185420","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:39:13.463862  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:39:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:39:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:39:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 02:39:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:39:14.456666  543705 disk_worker.go:494] system disk:vda1
I0321 02:39:14.456694  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:39:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:39:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:39:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:39:23.409814  543705 memory.go:184] no items to output this cycle
I0321 02:39:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 02:39:27.365679  543705 disk_info.go:125] begin check local disk info of client
I0321 02:39:27.368165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:39:27.368172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f00 0xc0000c5f40]
E0321 02:39:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:39:33.409774  543705 memory.go:184] no items to output this cycle
I0321 02:39:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 02:39:38.699359  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:39:38.699366  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:39:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:39:43.410607  543705 memory.go:191] Add success.
I0321 02:39:43.409803  543705 cpu.go:282] Add success.
I0321 02:39:43.420284  543705 net.go:648] Add success.
I0321 02:39:43.422902  543705 net.go:770] primary dev: ETH0
I0321 02:39:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:39:43.422929  543705 net.go:698] Add success.
I0321 02:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:39:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:39:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:39:53.409771  543705 memory.go:184] no items to output this cycle
I0321 02:39:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 02:40:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:40:03.409768  543705 memory.go:184] no items to output this cycle
I0321 02:40:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 02:40:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:40:13.409818  543705 memory.go:191] Add success.
I0321 02:40:13.409826  543705 cpu.go:282] Add success.
W0321 02:40:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:40:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:40:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:40:13.419545  543705 net.go:770] primary dev: ETH0
I0321 02:40:13.419560  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:40:13.419573  543705 net.go:698] Add success.
I0321 02:40:13.419922  543705 net.go:648] Add success.
I0321 02:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:40:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:40:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 02:40:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:40:14.456482  543705 disk_worker.go:494] system disk:vda1
I0321 02:40:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:40:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:40:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:40:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:40:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:40:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:40:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:40:23.409784  543705 memory.go:184] no items to output this cycle
I0321 02:40:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 02:40:27.369677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:40:27.372232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:40:27.372239  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375380 0xc0003753c0]
E0321 02:40:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:40:33.409778  543705 memory.go:184] no items to output this cycle
I0321 02:40:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 02:40:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:40:43.409788  543705 memory.go:191] Add success.
I0321 02:40:43.409808  543705 cpu.go:282] Add success.
I0321 02:40:43.419929  543705 net.go:648] Add success.
I0321 02:40:43.422667  543705 net.go:770] primary dev: ETH0
I0321 02:40:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:40:43.422692  543705 net.go:698] Add success.
I0321 02:40:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:40:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:40:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:40:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:40:53.409782  543705 memory.go:184] no items to output this cycle
I0321 02:40:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 02:41:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:41:03.409765  543705 memory.go:184] no items to output this cycle
I0321 02:41:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 02:41:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:41:13.409775  543705 memory.go:191] Add success.
W0321 02:41:13.409997  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 02:41:13.410010  543705 cpu.go:282] Add success.
W0321 02:41:13.410019  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:41:13.410023  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:41:13.419654  543705 net.go:648] Add success.
I0321 02:41:13.422595  543705 net.go:770] primary dev: ETH0
I0321 02:41:13.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:41:13.422619  543705 net.go:698] Add success.
I0321 02:41:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:41:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:41:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 02:41:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:41:14.456629  543705 disk_worker.go:494] system disk:vda1
I0321 02:41:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:41:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:41:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:41:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:41:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:41:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:41:23.409795  543705 memory.go:184] no items to output this cycle
I0321 02:41:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 02:41:27.373675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:41:27.376183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:41:27.376190  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b3c0 0xc00007b400]
E0321 02:41:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:41:33.409798  543705 memory.go:184] no items to output this cycle
I0321 02:41:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 02:41:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:41:43.409808  543705 memory.go:191] Add success.
I0321 02:41:43.409813  543705 cpu.go:282] Add success.
I0321 02:41:43.419954  543705 net.go:648] Add success.
I0321 02:41:43.422492  543705 net.go:770] primary dev: ETH0
I0321 02:41:43.422507  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:41:43.422522  543705 net.go:698] Add success.
I0321 02:41:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:41:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:41:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:41:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:41:53.409798  543705 memory.go:184] no items to output this cycle
I0321 02:41:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 02:42:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:42:03.409776  543705 memory.go:184] no items to output this cycle
I0321 02:42:03.409778  543705 cpu.go:275] no items to output this cycle
E0321 02:42:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:42:13.409802  543705 memory.go:191] Add success.
I0321 02:42:13.409807  543705 cpu.go:282] Add success.
W0321 02:42:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:42:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:42:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:42:13.420403  543705 net.go:648] Add success.
I0321 02:42:13.423161  543705 net.go:770] primary dev: ETH0
I0321 02:42:13.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:42:13.423184  543705 net.go:698] Add success.
I0321 02:42:13.485848  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"68572062-e5d4-443d-83cd-8fac66229632","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:42:13.485879  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 02:42:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:42:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 02:42:14.455211  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:42:14.456816  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:42:14.456826  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:42:14.456832  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:42:14.456968  543705 disk_worker.go:494] system disk:vda1
I0321 02:42:14.457004  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:42:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:42:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 02:42:16.457885  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:42:16.457886  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:42:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:42:16.457972  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:42:16.472283  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:42:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:42:23.409806  543705 memory.go:184] no items to output this cycle
I0321 02:42:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 02:42:27.377675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:42:27.380229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:42:27.380237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae8c0 0xc0004ae900]
E0321 02:42:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:42:33.409766  543705 memory.go:184] no items to output this cycle
I0321 02:42:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 02:42:38.700197  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:42:38.700204  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:42:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:42:43.410656  543705 memory.go:191] Add success.
I0321 02:42:43.409788  543705 cpu.go:282] Add success.
I0321 02:42:43.420480  543705 net.go:648] Add success.
I0321 02:42:43.423042  543705 net.go:770] primary dev: ETH0
I0321 02:42:43.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:42:43.423068  543705 net.go:698] Add success.
I0321 02:42:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:42:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:42:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:42:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:42:53.409793  543705 memory.go:184] no items to output this cycle
I0321 02:42:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 02:43:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:43:03.409776  543705 memory.go:184] no items to output this cycle
I0321 02:43:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 02:43:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:43:13.409786  543705 memory.go:191] Add success.
I0321 02:43:13.409787  543705 cpu.go:282] Add success.
W0321 02:43:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:43:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:43:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:43:13.420318  543705 net.go:648] Add success.
I0321 02:43:13.423053  543705 net.go:770] primary dev: ETH0
I0321 02:43:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:43:13.423077  543705 net.go:698] Add success.
I0321 02:43:14.454944  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:43:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:43:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 02:43:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:43:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 02:43:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:43:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:43:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:43:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:43:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:43:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:43:23.409777  543705 memory.go:184] no items to output this cycle
I0321 02:43:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 02:43:27.381676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:43:27.384174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:43:27.384180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5180 0xc0003d51c0]
E0321 02:43:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:43:33.409797  543705 memory.go:184] no items to output this cycle
I0321 02:43:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 02:43:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:43:43.409781  543705 memory.go:191] Add success.
I0321 02:43:43.409800  543705 cpu.go:282] Add success.
I0321 02:43:43.419868  543705 net.go:648] Add success.
I0321 02:43:43.422821  543705 net.go:770] primary dev: ETH0
I0321 02:43:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:43:43.422846  543705 net.go:698] Add success.
I0321 02:43:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:43:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:43:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:43:53.409773  543705 memory.go:184] no items to output this cycle
I0321 02:43:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 02:44:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:44:03.409772  543705 memory.go:184] no items to output this cycle
I0321 02:44:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 02:44:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:44:13.409784  543705 memory.go:191] Add success.
I0321 02:44:13.409788  543705 cpu.go:282] Add success.
W0321 02:44:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:44:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:44:13.420440  543705 net.go:648] Add success.
I0321 02:44:13.423556  543705 net.go:770] primary dev: ETH0
I0321 02:44:13.423569  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:44:13.423580  543705 net.go:698] Add success.
I0321 02:44:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:44:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:44:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 02:44:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:44:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 02:44:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:44:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:44:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:44:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:44:23.409778  543705 memory.go:184] no items to output this cycle
I0321 02:44:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 02:44:27.385677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:44:27.388254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:44:27.388260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aab00 0xc0001aab40]
E0321 02:44:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:44:33.409804  543705 memory.go:184] no items to output this cycle
I0321 02:44:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 02:44:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:44:43.409788  543705 memory.go:191] Add success.
I0321 02:44:43.409810  543705 cpu.go:282] Add success.
I0321 02:44:43.419955  543705 net.go:648] Add success.
I0321 02:44:43.422789  543705 net.go:770] primary dev: ETH0
I0321 02:44:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:44:43.422817  543705 net.go:698] Add success.
I0321 02:44:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:44:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:44:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:44:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:44:53.409776  543705 memory.go:184] no items to output this cycle
I0321 02:44:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 02:45:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:45:03.409799  543705 memory.go:184] no items to output this cycle
I0321 02:45:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 02:45:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:45:13.409774  543705 memory.go:191] Add success.
W0321 02:45:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:45:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:45:13.409810  543705 cpu.go:282] Add success.
I0321 02:45:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:45:13.420287  543705 net.go:648] Add success.
I0321 02:45:13.423203  543705 net.go:770] primary dev: ETH0
I0321 02:45:13.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:45:13.423232  543705 net.go:698] Add success.
I0321 02:45:13.468262  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d38c62c7-24b8-4a44-89d9-106596712bfd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:45:13.468294  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:45:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:45:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:45:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 02:45:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:45:14.456469  543705 disk_worker.go:494] system disk:vda1
I0321 02:45:14.456513  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:45:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:45:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:45:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:45:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:45:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:45:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:45:23.409776  543705 memory.go:184] no items to output this cycle
I0321 02:45:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 02:45:27.392066  543705 disk_info.go:125] begin check local disk info of client
I0321 02:45:27.394711  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:45:27.394718  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004863c0 0xc000486400]
E0321 02:45:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:45:33.409780  543705 memory.go:184] no items to output this cycle
I0321 02:45:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 02:45:38.700343  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:45:38.700349  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:45:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:45:43.410723  543705 memory.go:191] Add success.
I0321 02:45:43.409824  543705 cpu.go:282] Add success.
I0321 02:45:43.420426  543705 net.go:648] Add success.
I0321 02:45:43.423403  543705 net.go:770] primary dev: ETH0
I0321 02:45:43.423416  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:45:43.423428  543705 net.go:698] Add success.
I0321 02:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:45:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:45:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:45:53.409778  543705 memory.go:184] no items to output this cycle
I0321 02:45:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 02:46:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:46:03.409799  543705 memory.go:184] no items to output this cycle
I0321 02:46:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 02:46:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:46:13.409878  543705 memory.go:191] Add success.
W0321 02:46:13.409909  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:46:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:46:13.409925  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:46:13.409937  543705 cpu.go:282] Add success.
I0321 02:46:13.419714  543705 net.go:648] Add success.
I0321 02:46:13.422269  543705 net.go:770] primary dev: ETH0
I0321 02:46:13.422282  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:46:13.422293  543705 net.go:698] Add success.
I0321 02:46:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:46:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:46:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 02:46:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:46:14.456486  543705 disk_worker.go:494] system disk:vda1
I0321 02:46:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:46:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:46:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:46:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:46:16.472476  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:46:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:46:23.409779  543705 memory.go:184] no items to output this cycle
I0321 02:46:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 02:46:27.397675  543705 disk_info.go:125] begin check local disk info of client
I0321 02:46:27.400290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:46:27.400296  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509200 0xc000509240]
E0321 02:46:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:46:33.409784  543705 memory.go:184] no items to output this cycle
I0321 02:46:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 02:46:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:46:43.409773  543705 memory.go:191] Add success.
I0321 02:46:43.409793  543705 cpu.go:282] Add success.
I0321 02:46:43.419895  543705 net.go:648] Add success.
I0321 02:46:43.422506  543705 net.go:770] primary dev: ETH0
I0321 02:46:43.422519  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:46:43.422532  543705 net.go:698] Add success.
I0321 02:46:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:46:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:46:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:46:53.409765  543705 memory.go:184] no items to output this cycle
I0321 02:46:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 02:47:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:47:03.409783  543705 memory.go:184] no items to output this cycle
I0321 02:47:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 02:47:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:47:13.409809  543705 memory.go:191] Add success.
I0321 02:47:13.409815  543705 cpu.go:282] Add success.
W0321 02:47:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:47:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:47:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:47:13.420369  543705 net.go:648] Add success.
I0321 02:47:13.423586  543705 net.go:770] primary dev: ETH0
I0321 02:47:13.423599  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:47:13.423610  543705 net.go:698] Add success.
I0321 02:47:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0321 02:47:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:47:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 02:47:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:47:14.456782  543705 disk_worker.go:494] system disk:vda1
I0321 02:47:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:47:14.457057  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:47:14.457064  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:47:14.457068  543705 custom_config.go:64] query custom config with name: gpu
E0321 02:47:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:47:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:47:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:47:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:47:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:47:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:47:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:47:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:47:23.409784  543705 memory.go:184] no items to output this cycle
I0321 02:47:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 02:47:27.401674  543705 disk_info.go:125] begin check local disk info of client
I0321 02:47:27.404164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:47:27.404171  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462040 0xc000462080]
E0321 02:47:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:47:33.409800  543705 memory.go:184] no items to output this cycle
I0321 02:47:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:47:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:47:43.409781  543705 memory.go:191] Add success.
I0321 02:47:43.409789  543705 cpu.go:282] Add success.
I0321 02:47:43.419864  543705 net.go:648] Add success.
I0321 02:47:43.422551  543705 net.go:770] primary dev: ETH0
I0321 02:47:43.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:47:43.422577  543705 net.go:698] Add success.
I0321 02:47:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:47:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:47:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:47:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:47:53.409773  543705 memory.go:184] no items to output this cycle
I0321 02:47:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 02:48:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:48:03.409798  543705 memory.go:184] no items to output this cycle
I0321 02:48:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 02:48:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:48:13.409861  543705 memory.go:191] Add success.
W0321 02:48:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:48:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:48:13.409905  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:48:13.409923  543705 cpu.go:282] Add success.
I0321 02:48:13.419754  543705 net.go:648] Add success.
I0321 02:48:13.422311  543705 net.go:770] primary dev: ETH0
I0321 02:48:13.422324  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:48:13.422335  543705 net.go:698] Add success.
I0321 02:48:13.468097  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e775378-3de2-454a-82c8-a44d0d028471","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:48:13.468137  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:48:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:48:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:48:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 02:48:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:48:14.456523  543705 disk_worker.go:494] system disk:vda1
I0321 02:48:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:48:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:48:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:48:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:48:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:48:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:48:23.409790  543705 memory.go:184] no items to output this cycle
I0321 02:48:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 02:48:27.405676  543705 disk_info.go:125] begin check local disk info of client
I0321 02:48:27.408504  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:48:27.408511  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f080 0xc00046f0c0]
E0321 02:48:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:48:33.409781  543705 memory.go:184] no items to output this cycle
I0321 02:48:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 02:48:38.701203  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:48:38.701209  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:48:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:48:43.410593  543705 memory.go:191] Add success.
I0321 02:48:43.409794  543705 cpu.go:282] Add success.
I0321 02:48:43.420366  543705 net.go:648] Add success.
I0321 02:48:43.422853  543705 net.go:770] primary dev: ETH0
I0321 02:48:43.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:48:43.422885  543705 net.go:698] Add success.
I0321 02:48:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:48:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:48:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:48:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:48:53.409768  543705 memory.go:184] no items to output this cycle
I0321 02:48:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 02:49:03.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:49:03.409865  543705 memory.go:184] no items to output this cycle
I0321 02:49:03.409922  543705 cpu.go:275] no items to output this cycle
E0321 02:49:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:49:13.409792  543705 memory.go:191] Add success.
I0321 02:49:13.409798  543705 cpu.go:282] Add success.
W0321 02:49:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:49:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:49:13.419988  543705 net.go:770] primary dev: ETH0
I0321 02:49:13.420000  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:49:13.420011  543705 net.go:698] Add success.
I0321 02:49:13.420359  543705 net.go:648] Add success.
I0321 02:49:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:49:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:49:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 02:49:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:49:14.456509  543705 disk_worker.go:494] system disk:vda1
I0321 02:49:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:49:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:49:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:49:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:49:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:49:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:49:23.409783  543705 memory.go:184] no items to output this cycle
I0321 02:49:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 02:49:27.409677  543705 disk_info.go:125] begin check local disk info of client
I0321 02:49:27.412166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:49:27.412173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd600 0xc0002bd640]
E0321 02:49:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:49:33.409778  543705 memory.go:184] no items to output this cycle
I0321 02:49:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 02:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:49:43.409809  543705 memory.go:191] Add success.
I0321 02:49:43.409817  543705 cpu.go:282] Add success.
I0321 02:49:43.419937  543705 net.go:648] Add success.
I0321 02:49:43.423051  543705 net.go:770] primary dev: ETH0
I0321 02:49:43.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:49:43.423077  543705 net.go:698] Add success.
I0321 02:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:49:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:49:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:49:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:49:53.409791  543705 memory.go:184] no items to output this cycle
I0321 02:49:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 02:50:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:50:03.409806  543705 memory.go:184] no items to output this cycle
I0321 02:50:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 02:50:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:50:13.409783  543705 memory.go:191] Add success.
I0321 02:50:13.409798  543705 cpu.go:282] Add success.
W0321 02:50:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:50:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:50:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:50:13.420178  543705 net.go:648] Add success.
I0321 02:50:13.422855  543705 net.go:770] primary dev: ETH0
I0321 02:50:13.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:50:13.422885  543705 net.go:698] Add success.
I0321 02:50:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:50:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:50:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 02:50:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:50:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 02:50:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:50:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:50:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:50:16.472533  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:50:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:50:23.409814  543705 memory.go:184] no items to output this cycle
I0321 02:50:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 02:50:27.412791  543705 disk_info.go:125] begin check local disk info of client
I0321 02:50:27.415363  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:50:27.415371  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004eb880 0xc0004eb8c0]
E0321 02:50:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:50:33.409804  543705 memory.go:184] no items to output this cycle
I0321 02:50:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 02:50:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:50:43.409786  543705 memory.go:191] Add success.
I0321 02:50:43.409786  543705 cpu.go:282] Add success.
I0321 02:50:43.419855  543705 net.go:648] Add success.
I0321 02:50:43.422769  543705 net.go:770] primary dev: ETH0
I0321 02:50:43.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:50:43.422795  543705 net.go:698] Add success.
I0321 02:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:50:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:50:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:50:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:50:53.409893  543705 memory.go:184] no items to output this cycle
I0321 02:50:53.409909  543705 cpu.go:275] no items to output this cycle
E0321 02:51:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:51:03.409814  543705 memory.go:184] no items to output this cycle
I0321 02:51:03.409829  543705 cpu.go:275] no items to output this cycle
E0321 02:51:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:51:13.409773  543705 memory.go:191] Add success.
W0321 02:51:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 02:51:13.409803  543705 cpu.go:282] Add success.
W0321 02:51:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:51:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:51:13.420095  543705 net.go:648] Add success.
I0321 02:51:13.422998  543705 net.go:770] primary dev: ETH0
I0321 02:51:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:51:13.423022  543705 net.go:698] Add success.
I0321 02:51:13.482823  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"599ad000-0757-4675-ac78-7adad4f7003e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:51:13.482857  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:51:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:51:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:51:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 02:51:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:51:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 02:51:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:51:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:51:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:51:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:51:23.409810  543705 memory.go:184] no items to output this cycle
I0321 02:51:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 02:51:27.415802  543705 disk_info.go:125] begin check local disk info of client
I0321 02:51:27.418308  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:51:27.418314  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462600 0xc000462640]
E0321 02:51:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:51:33.409799  543705 memory.go:184] no items to output this cycle
I0321 02:51:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 02:51:38.701737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:51:38.701744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:51:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:51:43.410692  543705 memory.go:191] Add success.
I0321 02:51:43.409819  543705 cpu.go:282] Add success.
I0321 02:51:43.420500  543705 net.go:648] Add success.
I0321 02:51:43.423054  543705 net.go:770] primary dev: ETH0
I0321 02:51:43.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:51:43.423083  543705 net.go:698] Add success.
I0321 02:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:51:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:51:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:51:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:51:53.409783  543705 memory.go:184] no items to output this cycle
I0321 02:51:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 02:52:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:52:03.409760  543705 memory.go:184] no items to output this cycle
I0321 02:52:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 02:52:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:52:13.409796  543705 memory.go:191] Add success.
I0321 02:52:13.409801  543705 cpu.go:282] Add success.
W0321 02:52:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:52:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:52:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:52:13.420382  543705 net.go:648] Add success.
I0321 02:52:13.423360  543705 net.go:770] primary dev: ETH0
I0321 02:52:13.423373  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:52:13.423386  543705 net.go:698] Add success.
W0321 02:52:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:52:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 02:52:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:52:14.456948  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:52:14.456957  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:52:14.456963  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:52:14.457007  543705 disk_worker.go:494] system disk:vda1
I0321 02:52:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:52:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:52:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:52:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:52:16.457992  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:52:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:52:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:52:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:52:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:52:23.409814  543705 memory.go:184] no items to output this cycle
I0321 02:52:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 02:52:27.418794  543705 disk_info.go:125] begin check local disk info of client
I0321 02:52:27.421350  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:52:27.421357  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b9c0 0xc00032ba00]
E0321 02:52:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:52:33.409801  543705 memory.go:184] no items to output this cycle
I0321 02:52:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 02:52:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:52:43.409932  543705 memory.go:191] Add success.
I0321 02:52:43.409939  543705 cpu.go:282] Add success.
I0321 02:52:43.419713  543705 net.go:648] Add success.
I0321 02:52:43.422512  543705 net.go:770] primary dev: ETH0
I0321 02:52:43.422525  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:52:43.422536  543705 net.go:698] Add success.
I0321 02:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:52:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:52:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:52:53.409761  543705 memory.go:184] no items to output this cycle
I0321 02:52:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 02:53:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:53:03.409787  543705 memory.go:184] no items to output this cycle
I0321 02:53:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 02:53:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:53:13.409808  543705 memory.go:191] Add success.
I0321 02:53:13.409820  543705 cpu.go:282] Add success.
W0321 02:53:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:53:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:53:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:53:13.420137  543705 net.go:648] Add success.
I0321 02:53:13.423096  543705 net.go:770] primary dev: ETH0
I0321 02:53:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:53:13.423125  543705 net.go:698] Add success.
I0321 02:53:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:53:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:53:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 02:53:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:53:14.456520  543705 disk_worker.go:494] system disk:vda1
I0321 02:53:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:53:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:53:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:53:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:53:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:53:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:53:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:53:23.409811  543705 memory.go:184] no items to output this cycle
I0321 02:53:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 02:53:27.421806  543705 disk_info.go:125] begin check local disk info of client
I0321 02:53:27.424365  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:53:27.424372  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a880 0xc00034a8c0]
E0321 02:53:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:53:33.409803  543705 memory.go:184] no items to output this cycle
I0321 02:53:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 02:53:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:53:43.409780  543705 memory.go:191] Add success.
I0321 02:53:43.409812  543705 cpu.go:282] Add success.
I0321 02:53:43.419868  543705 net.go:648] Add success.
I0321 02:53:43.422684  543705 net.go:770] primary dev: ETH0
I0321 02:53:43.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:53:43.422801  543705 net.go:698] Add success.
I0321 02:53:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:53:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:53:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:53:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:53:53.409783  543705 memory.go:184] no items to output this cycle
I0321 02:53:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 02:54:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:54:03.409798  543705 memory.go:184] no items to output this cycle
I0321 02:54:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 02:54:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:54:13.409797  543705 memory.go:191] Add success.
I0321 02:54:13.409798  543705 cpu.go:282] Add success.
W0321 02:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:54:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:54:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:54:13.420066  543705 net.go:648] Add success.
I0321 02:54:13.423065  543705 net.go:770] primary dev: ETH0
I0321 02:54:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:54:13.423091  543705 net.go:698] Add success.
I0321 02:54:13.465812  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"640fe46a-f6ee-4cc5-97fc-a35083636044","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:54:13.465847  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 02:54:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:54:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:54:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 02:54:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:54:14.456511  543705 disk_worker.go:494] system disk:vda1
I0321 02:54:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:54:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:54:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:54:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:54:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:54:23.409797  543705 memory.go:184] no items to output this cycle
I0321 02:54:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 02:54:27.424836  543705 disk_info.go:125] begin check local disk info of client
I0321 02:54:27.427377  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:54:27.427384  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dfd00 0xc0004dfd40]
E0321 02:54:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:54:33.409796  543705 memory.go:184] no items to output this cycle
I0321 02:54:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 02:54:38.703212  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:54:38.703218  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:54:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:54:43.410595  543705 memory.go:191] Add success.
I0321 02:54:43.409830  543705 cpu.go:282] Add success.
I0321 02:54:43.420278  543705 net.go:648] Add success.
I0321 02:54:43.422901  543705 net.go:770] primary dev: ETH0
I0321 02:54:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:54:43.422930  543705 net.go:698] Add success.
I0321 02:54:46.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:54:46.458191  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:54:46.458218  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:54:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:54:53.409805  543705 memory.go:184] no items to output this cycle
I0321 02:54:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 02:55:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:55:03.409791  543705 memory.go:184] no items to output this cycle
I0321 02:55:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 02:55:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:55:13.409802  543705 memory.go:191] Add success.
I0321 02:55:13.409808  543705 cpu.go:282] Add success.
W0321 02:55:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:55:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:55:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:55:13.420089  543705 net.go:648] Add success.
I0321 02:55:13.423158  543705 net.go:770] primary dev: ETH0
I0321 02:55:13.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:55:13.423183  543705 net.go:698] Add success.
I0321 02:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:55:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:55:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 02:55:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:55:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 02:55:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:55:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:55:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:55:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:55:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:55:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:55:23.409810  543705 memory.go:184] no items to output this cycle
I0321 02:55:23.409828  543705 cpu.go:275] no items to output this cycle
I0321 02:55:27.427846  543705 disk_info.go:125] begin check local disk info of client
I0321 02:55:27.430279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:55:27.430285  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc040 0xc0003dc080]
E0321 02:55:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:55:33.409799  543705 memory.go:184] no items to output this cycle
I0321 02:55:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 02:55:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:55:43.409781  543705 memory.go:191] Add success.
I0321 02:55:43.409804  543705 cpu.go:282] Add success.
I0321 02:55:43.419868  543705 net.go:648] Add success.
I0321 02:55:43.422927  543705 net.go:770] primary dev: ETH0
I0321 02:55:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:55:43.422955  543705 net.go:698] Add success.
I0321 02:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:55:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:55:53.409768  543705 memory.go:184] no items to output this cycle
I0321 02:55:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 02:56:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:56:03.409809  543705 memory.go:184] no items to output this cycle
I0321 02:56:03.409822  543705 cpu.go:275] no items to output this cycle
E0321 02:56:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:56:13.409787  543705 memory.go:191] Add success.
I0321 02:56:13.409803  543705 cpu.go:282] Add success.
W0321 02:56:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:56:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:56:13.420068  543705 net.go:648] Add success.
I0321 02:56:13.422694  543705 net.go:770] primary dev: ETH0
I0321 02:56:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:56:13.422724  543705 net.go:698] Add success.
I0321 02:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:56:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:56:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 02:56:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:56:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 02:56:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:56:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:56:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:56:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:56:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:56:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:56:23.409777  543705 memory.go:184] no items to output this cycle
I0321 02:56:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 02:56:27.430848  543705 disk_info.go:125] begin check local disk info of client
I0321 02:56:27.433382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:56:27.433389  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278900 0xc000278940]
E0321 02:56:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:56:33.409798  543705 memory.go:184] no items to output this cycle
I0321 02:56:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 02:56:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:56:43.409791  543705 memory.go:191] Add success.
I0321 02:56:43.409795  543705 cpu.go:282] Add success.
I0321 02:56:43.419965  543705 net.go:648] Add success.
I0321 02:56:43.422507  543705 net.go:770] primary dev: ETH0
I0321 02:56:43.422520  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:56:43.422533  543705 net.go:698] Add success.
I0321 02:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:56:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:56:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:56:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:56:53.409770  543705 memory.go:184] no items to output this cycle
I0321 02:56:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 02:57:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:57:03.409780  543705 cpu.go:275] no items to output this cycle
I0321 02:57:03.409785  543705 memory.go:184] no items to output this cycle
E0321 02:57:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:57:13.409808  543705 cpu.go:282] Add success.
I0321 02:57:13.409814  543705 memory.go:191] Add success.
W0321 02:57:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:57:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:57:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:57:13.420161  543705 net.go:648] Add success.
I0321 02:57:13.423001  543705 net.go:770] primary dev: ETH0
I0321 02:57:13.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:57:13.423026  543705 net.go:698] Add success.
I0321 02:57:13.429076  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 02:57:13.453249  543705 event_worker.go:152] Polling the log file for events...
I0321 02:57:13.504210  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b31e7dae-75a1-4dc3-bc1c-dddafd36c766","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 02:57:13.504251  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 02:57:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:57:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 02:57:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0321 02:57:14.455906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 02:57:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 02:57:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0321 02:57:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 02:57:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 02:57:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 02:57:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:57:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 02:57:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 02:57:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:57:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:57:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:57:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:57:23.409782  543705 memory.go:184] no items to output this cycle
I0321 02:57:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 02:57:27.433861  543705 disk_info.go:125] begin check local disk info of client
I0321 02:57:27.436434  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:57:27.436441  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053a840 0xc00053a880]
E0321 02:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:57:33.409775  543705 memory.go:184] no items to output this cycle
I0321 02:57:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 02:57:38.704215  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 02:57:38.704222  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 02:57:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:57:43.410619  543705 memory.go:191] Add success.
I0321 02:57:43.409824  543705 cpu.go:282] Add success.
I0321 02:57:43.420334  543705 net.go:648] Add success.
I0321 02:57:43.423031  543705 net.go:770] primary dev: ETH0
I0321 02:57:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:57:43.423058  543705 net.go:698] Add success.
I0321 02:57:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:57:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:57:53.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:57:53.409893  543705 memory.go:184] no items to output this cycle
I0321 02:57:53.409951  543705 cpu.go:275] no items to output this cycle
E0321 02:58:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:58:03.409779  543705 memory.go:184] no items to output this cycle
I0321 02:58:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 02:58:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:58:13.409826  543705 memory.go:191] Add success.
I0321 02:58:13.409831  543705 cpu.go:282] Add success.
W0321 02:58:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:58:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:58:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:58:13.420170  543705 net.go:648] Add success.
I0321 02:58:13.423233  543705 net.go:770] primary dev: ETH0
I0321 02:58:13.423246  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:58:13.423258  543705 net.go:698] Add success.
I0321 02:58:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:58:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:58:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 02:58:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:58:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 02:58:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:58:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:58:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:58:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:58:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:58:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:58:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:58:23.409786  543705 memory.go:184] no items to output this cycle
I0321 02:58:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 02:58:27.436895  543705 disk_info.go:125] begin check local disk info of client
I0321 02:58:27.439445  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:58:27.439451  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fba00 0xc0001fba40]
E0321 02:58:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:58:33.409808  543705 memory.go:184] no items to output this cycle
I0321 02:58:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 02:58:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:58:43.409786  543705 memory.go:191] Add success.
I0321 02:58:43.409811  543705 cpu.go:282] Add success.
I0321 02:58:43.419864  543705 net.go:648] Add success.
I0321 02:58:43.422924  543705 net.go:770] primary dev: ETH0
I0321 02:58:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:58:43.422953  543705 net.go:698] Add success.
I0321 02:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:58:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:58:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:58:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:58:53.409762  543705 memory.go:184] no items to output this cycle
I0321 02:58:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 02:59:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:59:03.409791  543705 memory.go:184] no items to output this cycle
I0321 02:59:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 02:59:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:59:13.409794  543705 memory.go:191] Add success.
I0321 02:59:13.409795  543705 cpu.go:282] Add success.
W0321 02:59:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 02:59:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 02:59:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 02:59:13.420151  543705 net.go:648] Add success.
I0321 02:59:13.423026  543705 net.go:770] primary dev: ETH0
I0321 02:59:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:59:13.423050  543705 net.go:698] Add success.
I0321 02:59:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 02:59:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 02:59:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 02:59:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 02:59:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 02:59:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 02:59:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 02:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:59:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:59:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 02:59:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 02:59:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:59:23.409784  543705 memory.go:184] no items to output this cycle
I0321 02:59:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 02:59:27.439913  543705 disk_info.go:125] begin check local disk info of client
I0321 02:59:27.442386  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 02:59:27.442392  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa840 0xc0001aa880]
E0321 02:59:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:59:33.409804  543705 memory.go:184] no items to output this cycle
I0321 02:59:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 02:59:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:59:43.409795  543705 memory.go:191] Add success.
I0321 02:59:43.409799  543705 cpu.go:282] Add success.
I0321 02:59:43.419864  543705 net.go:648] Add success.
I0321 02:59:43.422648  543705 net.go:770] primary dev: ETH0
I0321 02:59:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0321 02:59:43.422675  543705 net.go:698] Add success.
I0321 02:59:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 02:59:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 02:59:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 02:59:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 02:59:53.409785  543705 memory.go:184] no items to output this cycle
I0321 02:59:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 03:00:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:00:03.409795  543705 memory.go:184] no items to output this cycle
I0321 03:00:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 03:00:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:00:13.409822  543705 memory.go:191] Add success.
I0321 03:00:13.409827  543705 cpu.go:282] Add success.
W0321 03:00:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:00:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:00:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:00:13.420147  543705 net.go:648] Add success.
I0321 03:00:13.422837  543705 net.go:770] primary dev: ETH0
I0321 03:00:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:00:13.422862  543705 net.go:698] Add success.
I0321 03:00:13.464024  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71cddfd8-c40a-481e-8b8d-11807deba22f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:00:13.464057  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:00:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:00:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:00:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 03:00:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:00:14.456550  543705 disk_worker.go:494] system disk:vda1
I0321 03:00:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:00:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:00:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:00:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:00:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:00:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:00:23.409795  543705 memory.go:184] no items to output this cycle
I0321 03:00:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 03:00:27.442922  543705 disk_info.go:125] begin check local disk info of client
I0321 03:00:27.445471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:00:27.445477  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465040 0xc000465080]
E0321 03:00:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:00:33.409803  543705 memory.go:184] no items to output this cycle
I0321 03:00:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 03:00:38.704358  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:00:38.704364  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:00:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:00:43.410600  543705 memory.go:191] Add success.
I0321 03:00:43.409806  543705 cpu.go:282] Add success.
I0321 03:00:43.420283  543705 net.go:648] Add success.
I0321 03:00:43.422854  543705 net.go:770] primary dev: ETH0
I0321 03:00:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:00:43.422882  543705 net.go:698] Add success.
I0321 03:00:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:00:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:00:53.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:00:53.409877  543705 cpu.go:275] no items to output this cycle
I0321 03:00:53.409879  543705 memory.go:184] no items to output this cycle
E0321 03:01:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:01:03.409779  543705 memory.go:184] no items to output this cycle
I0321 03:01:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 03:01:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:01:13.409785  543705 memory.go:191] Add success.
W0321 03:01:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:01:13.409812  543705 cpu.go:282] Add success.
W0321 03:01:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:01:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:01:13.420170  543705 net.go:648] Add success.
I0321 03:01:13.423168  543705 net.go:770] primary dev: ETH0
I0321 03:01:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:01:13.423196  543705 net.go:698] Add success.
I0321 03:01:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:01:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:01:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 03:01:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:01:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 03:01:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:01:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:01:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:01:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:01:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:01:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:01:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:01:23.409813  543705 memory.go:184] no items to output this cycle
I0321 03:01:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 03:01:27.445930  543705 disk_info.go:125] begin check local disk info of client
I0321 03:01:27.448416  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:01:27.448425  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037df40 0xc00053a040]
E0321 03:01:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:01:33.409777  543705 memory.go:184] no items to output this cycle
I0321 03:01:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 03:01:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:01:43.409812  543705 memory.go:191] Add success.
I0321 03:01:43.409818  543705 cpu.go:282] Add success.
I0321 03:01:43.419975  543705 net.go:648] Add success.
I0321 03:01:43.423220  543705 net.go:770] primary dev: ETH0
I0321 03:01:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:01:43.423247  543705 net.go:698] Add success.
I0321 03:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:01:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:01:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:01:53.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:01:53.410389  543705 memory.go:184] no items to output this cycle
I0321 03:01:53.410434  543705 cpu.go:275] no items to output this cycle
E0321 03:02:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:02:03.409770  543705 memory.go:184] no items to output this cycle
I0321 03:02:03.409842  543705 cpu.go:275] no items to output this cycle
E0321 03:02:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:02:13.409788  543705 memory.go:191] Add success.
W0321 03:02:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:02:13.409821  543705 cpu.go:282] Add success.
W0321 03:02:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:02:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:02:13.419952  543705 net.go:770] primary dev: ETH0
I0321 03:02:13.419964  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:02:13.419976  543705 net.go:698] Add success.
I0321 03:02:13.420362  543705 net.go:648] Add success.
W0321 03:02:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:02:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 03:02:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:02:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:02:14.456928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:02:14.456934  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:02:14.457004  543705 disk_worker.go:494] system disk:vda1
I0321 03:02:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:02:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:02:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:02:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:02:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:02:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:02:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:02:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:02:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:02:23.409813  543705 memory.go:184] no items to output this cycle
I0321 03:02:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 03:02:27.448939  543705 disk_info.go:125] begin check local disk info of client
I0321 03:02:27.451506  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:02:27.451513  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d800 0xc00037d840]
E0321 03:02:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:02:33.409798  543705 memory.go:184] no items to output this cycle
I0321 03:02:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 03:02:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:02:43.409783  543705 memory.go:191] Add success.
I0321 03:02:43.409799  543705 cpu.go:282] Add success.
I0321 03:02:43.419857  543705 net.go:648] Add success.
I0321 03:02:43.422870  543705 net.go:770] primary dev: ETH0
I0321 03:02:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:02:43.422900  543705 net.go:698] Add success.
I0321 03:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:02:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:02:53.409784  543705 memory.go:184] no items to output this cycle
I0321 03:02:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 03:03:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:03:03.409776  543705 memory.go:184] no items to output this cycle
I0321 03:03:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 03:03:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:03:13.409813  543705 memory.go:191] Add success.
I0321 03:03:13.409814  543705 cpu.go:282] Add success.
W0321 03:03:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:03:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:03:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:03:13.420173  543705 net.go:648] Add success.
I0321 03:03:13.423147  543705 net.go:770] primary dev: ETH0
I0321 03:03:13.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:03:13.423172  543705 net.go:698] Add success.
I0321 03:03:13.499180  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b728789f-812c-4671-af0c-095269d41783","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:03:13.499213  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:03:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:03:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:03:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 03:03:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:03:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 03:03:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:03:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:03:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:03:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:03:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:03:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:03:23.409790  543705 memory.go:184] no items to output this cycle
I0321 03:03:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 03:03:27.451959  543705 disk_info.go:125] begin check local disk info of client
I0321 03:03:27.454517  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:03:27.454525  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a40 0xc0000c5a80]
E0321 03:03:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:03:33.409803  543705 memory.go:184] no items to output this cycle
I0321 03:03:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 03:03:38.705228  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:03:38.705235  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:03:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:03:43.410810  543705 memory.go:191] Add success.
I0321 03:03:43.409838  543705 cpu.go:282] Add success.
I0321 03:03:43.420575  543705 net.go:648] Add success.
I0321 03:03:43.423461  543705 net.go:770] primary dev: ETH0
I0321 03:03:43.423477  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:03:43.423490  543705 net.go:698] Add success.
I0321 03:03:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:03:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:03:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:03:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:03:53.409781  543705 memory.go:184] no items to output this cycle
I0321 03:03:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 03:04:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:04:03.409798  543705 memory.go:184] no items to output this cycle
I0321 03:04:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 03:04:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:04:13.409801  543705 cpu.go:282] Add success.
I0321 03:04:13.409807  543705 memory.go:191] Add success.
W0321 03:04:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:04:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:04:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:04:13.420271  543705 net.go:648] Add success.
I0321 03:04:13.422876  543705 net.go:770] primary dev: ETH0
I0321 03:04:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:04:13.422901  543705 net.go:698] Add success.
I0321 03:04:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:04:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:04:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 03:04:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:04:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 03:04:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:04:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:04:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:04:23.409787  543705 memory.go:184] no items to output this cycle
I0321 03:04:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 03:04:27.454971  543705 disk_info.go:125] begin check local disk info of client
I0321 03:04:27.457582  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:04:27.457588  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f80 0xc0000c4fc0]
E0321 03:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:04:33.409786  543705 memory.go:184] no items to output this cycle
I0321 03:04:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 03:04:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:04:43.409784  543705 memory.go:191] Add success.
I0321 03:04:43.409788  543705 cpu.go:282] Add success.
I0321 03:04:43.419851  543705 net.go:648] Add success.
I0321 03:04:43.423034  543705 net.go:770] primary dev: ETH0
I0321 03:04:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:04:43.423058  543705 net.go:698] Add success.
I0321 03:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:04:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:04:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:04:53.409870  543705 cpu.go:275] no items to output this cycle
E0321 03:04:53.409928  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:04:53.409951  543705 memory.go:184] no items to output this cycle
E0321 03:05:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:05:03.409788  543705 memory.go:184] no items to output this cycle
I0321 03:05:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 03:05:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:05:13.409784  543705 memory.go:191] Add success.
I0321 03:05:13.409798  543705 cpu.go:282] Add success.
W0321 03:05:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:05:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:05:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:05:13.420184  543705 net.go:648] Add success.
I0321 03:05:13.422853  543705 net.go:770] primary dev: ETH0
I0321 03:05:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:05:13.422878  543705 net.go:698] Add success.
I0321 03:05:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:05:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:05:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 03:05:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:05:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 03:05:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:05:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:05:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:05:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:05:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:05:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:05:23.409773  543705 memory.go:184] no items to output this cycle
I0321 03:05:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 03:05:27.457989  543705 disk_info.go:125] begin check local disk info of client
I0321 03:05:27.460542  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:05:27.460549  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039b140 0xc00039b180]
E0321 03:05:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:05:33.409766  543705 memory.go:184] no items to output this cycle
I0321 03:05:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 03:05:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:05:43.409796  543705 memory.go:191] Add success.
I0321 03:05:43.409817  543705 cpu.go:282] Add success.
I0321 03:05:43.419876  543705 net.go:648] Add success.
I0321 03:05:43.422591  543705 net.go:770] primary dev: ETH0
I0321 03:05:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:05:43.422617  543705 net.go:698] Add success.
I0321 03:05:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:05:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:05:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:05:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:05:53.409772  543705 memory.go:184] no items to output this cycle
I0321 03:05:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 03:06:03.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:06:03.409895  543705 memory.go:184] no items to output this cycle
I0321 03:06:03.410147  543705 cpu.go:275] no items to output this cycle
E0321 03:06:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:06:13.409812  543705 memory.go:191] Add success.
I0321 03:06:13.409823  543705 cpu.go:282] Add success.
W0321 03:06:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:06:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:06:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:06:13.420120  543705 net.go:648] Add success.
I0321 03:06:13.422952  543705 net.go:770] primary dev: ETH0
I0321 03:06:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:06:13.422980  543705 net.go:698] Add success.
I0321 03:06:13.469621  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3af64b5c-d3b8-4fec-b994-3ae96698d156","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:06:13.469675  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:06:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:06:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:06:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 03:06:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:06:14.456650  543705 disk_worker.go:494] system disk:vda1
I0321 03:06:14.456682  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:06:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:06:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:06:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:06:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:06:23.409784  543705 memory.go:184] no items to output this cycle
I0321 03:06:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 03:06:27.461002  543705 disk_info.go:125] begin check local disk info of client
I0321 03:06:27.463620  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:06:27.463627  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053ac40 0xc00053ac80]
E0321 03:06:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:06:33.409776  543705 memory.go:184] no items to output this cycle
I0321 03:06:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 03:06:38.705734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:06:38.705740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:06:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:06:43.410674  543705 memory.go:191] Add success.
I0321 03:06:43.409820  543705 cpu.go:282] Add success.
I0321 03:06:43.420380  543705 net.go:648] Add success.
I0321 03:06:43.423099  543705 net.go:770] primary dev: ETH0
I0321 03:06:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:06:43.423126  543705 net.go:698] Add success.
I0321 03:06:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:06:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:06:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:06:53.409782  543705 cpu.go:275] no items to output this cycle
I0321 03:06:53.409785  543705 memory.go:184] no items to output this cycle
E0321 03:07:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:07:03.409781  543705 memory.go:184] no items to output this cycle
I0321 03:07:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 03:07:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:07:13.409811  543705 memory.go:191] Add success.
I0321 03:07:13.409821  543705 cpu.go:282] Add success.
W0321 03:07:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:07:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:07:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:07:13.420231  543705 net.go:648] Add success.
I0321 03:07:13.423195  543705 net.go:770] primary dev: ETH0
I0321 03:07:13.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:07:13.423220  543705 net.go:698] Add success.
I0321 03:07:13.452777  543705 event_worker.go:152] Polling the log file for events...
W0321 03:07:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:07:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 03:07:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:07:14.456913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:07:14.456922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:07:14.456928  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:07:14.456973  543705 disk_worker.go:494] system disk:vda1
I0321 03:07:14.457012  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:07:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:07:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:07:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:07:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:07:16.458011  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:07:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:07:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:07:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:07:23.409805  543705 memory.go:184] no items to output this cycle
I0321 03:07:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 03:07:27.464017  543705 disk_info.go:125] begin check local disk info of client
I0321 03:07:27.466488  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:07:27.466496  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa440 0xc0001fa480]
E0321 03:07:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:07:33.409793  543705 memory.go:184] no items to output this cycle
I0321 03:07:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 03:07:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:07:43.409785  543705 cpu.go:282] Add success.
I0321 03:07:43.409791  543705 memory.go:191] Add success.
I0321 03:07:43.419822  543705 net.go:648] Add success.
I0321 03:07:43.422750  543705 net.go:770] primary dev: ETH0
I0321 03:07:43.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:07:43.422775  543705 net.go:698] Add success.
I0321 03:07:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:07:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:07:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:07:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:07:53.409766  543705 memory.go:184] no items to output this cycle
I0321 03:07:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 03:08:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:08:03.409802  543705 memory.go:184] no items to output this cycle
I0321 03:08:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 03:08:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:08:13.409781  543705 memory.go:191] Add success.
W0321 03:08:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:08:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:08:13.409818  543705 cpu.go:282] Add success.
I0321 03:08:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:08:13.419741  543705 net.go:648] Add success.
I0321 03:08:13.422624  543705 net.go:770] primary dev: ETH0
I0321 03:08:13.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:08:13.422649  543705 net.go:698] Add success.
I0321 03:08:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:08:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:08:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 03:08:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:08:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 03:08:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:08:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:08:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:08:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:08:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:08:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:08:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:08:23.409776  543705 memory.go:184] no items to output this cycle
I0321 03:08:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 03:08:27.467026  543705 disk_info.go:125] begin check local disk info of client
I0321 03:08:27.469561  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:08:27.469568  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5480 0xc0000c54c0]
E0321 03:08:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:08:33.409795  543705 memory.go:184] no items to output this cycle
I0321 03:08:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 03:08:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:08:43.409787  543705 memory.go:191] Add success.
I0321 03:08:43.409790  543705 cpu.go:282] Add success.
I0321 03:08:43.419853  543705 net.go:648] Add success.
I0321 03:08:43.422352  543705 net.go:770] primary dev: ETH0
I0321 03:08:43.422364  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:08:43.422376  543705 net.go:698] Add success.
I0321 03:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:08:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:08:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:08:53.409774  543705 memory.go:184] no items to output this cycle
I0321 03:08:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 03:09:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:09:03.409785  543705 memory.go:184] no items to output this cycle
I0321 03:09:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 03:09:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:09:13.409791  543705 cpu.go:282] Add success.
I0321 03:09:13.409796  543705 memory.go:191] Add success.
W0321 03:09:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:09:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:09:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:09:13.420219  543705 net.go:648] Add success.
I0321 03:09:13.422962  543705 net.go:770] primary dev: ETH0
I0321 03:09:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:09:13.422987  543705 net.go:698] Add success.
I0321 03:09:13.472902  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"996699f6-a69c-4a68-983d-0f636afac8e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:09:13.472934  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:09:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:09:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:09:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 03:09:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:09:14.456646  543705 disk_worker.go:494] system disk:vda1
I0321 03:09:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:09:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:09:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:09:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:09:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:09:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:09:23.409781  543705 memory.go:184] no items to output this cycle
I0321 03:09:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 03:09:27.470065  543705 disk_info.go:125] begin check local disk info of client
I0321 03:09:27.472532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:09:27.472540  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2280 0xc0002b22c0]
E0321 03:09:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:09:33.409799  543705 memory.go:184] no items to output this cycle
I0321 03:09:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 03:09:38.707230  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:09:38.707237  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:09:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:09:43.410811  543705 memory.go:191] Add success.
I0321 03:09:43.409811  543705 cpu.go:282] Add success.
I0321 03:09:43.420543  543705 net.go:648] Add success.
I0321 03:09:43.423163  543705 net.go:770] primary dev: ETH0
I0321 03:09:43.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:09:43.423189  543705 net.go:698] Add success.
I0321 03:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:09:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:09:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:09:53.409792  543705 memory.go:184] no items to output this cycle
I0321 03:09:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 03:10:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:10:03.409784  543705 memory.go:184] no items to output this cycle
I0321 03:10:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 03:10:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:10:13.409787  543705 memory.go:191] Add success.
I0321 03:10:13.409789  543705 cpu.go:282] Add success.
W0321 03:10:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:10:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:10:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:10:13.420552  543705 net.go:648] Add success.
I0321 03:10:13.423295  543705 net.go:770] primary dev: ETH0
I0321 03:10:13.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:10:13.423324  543705 net.go:698] Add success.
I0321 03:10:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:10:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:10:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 03:10:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:10:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 03:10:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:10:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:10:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:10:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:10:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:10:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:10:23.409812  543705 memory.go:184] no items to output this cycle
I0321 03:10:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 03:10:27.473051  543705 disk_info.go:125] begin check local disk info of client
I0321 03:10:27.475587  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:10:27.475602  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b080 0xc00047b0c0]
E0321 03:10:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:10:33.409799  543705 memory.go:184] no items to output this cycle
I0321 03:10:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 03:10:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:10:43.409770  543705 memory.go:191] Add success.
I0321 03:10:43.409804  543705 cpu.go:282] Add success.
I0321 03:10:43.419879  543705 net.go:648] Add success.
I0321 03:10:43.422401  543705 net.go:770] primary dev: ETH0
I0321 03:10:43.422413  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:10:43.422425  543705 net.go:698] Add success.
I0321 03:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:10:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:10:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:10:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:10:53.409800  543705 memory.go:184] no items to output this cycle
I0321 03:10:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 03:11:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:11:03.409808  543705 memory.go:184] no items to output this cycle
I0321 03:11:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 03:11:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:11:13.409792  543705 memory.go:191] Add success.
I0321 03:11:13.409808  543705 cpu.go:282] Add success.
W0321 03:11:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:11:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:11:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:11:13.420042  543705 net.go:648] Add success.
I0321 03:11:13.422735  543705 net.go:770] primary dev: ETH0
I0321 03:11:13.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:11:13.422758  543705 net.go:698] Add success.
I0321 03:11:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:11:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:11:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 03:11:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:11:14.456827  543705 disk_worker.go:494] system disk:vda1
I0321 03:11:14.456856  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:11:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:11:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:11:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:11:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:11:23.409790  543705 memory.go:184] no items to output this cycle
I0321 03:11:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 03:11:27.476092  543705 disk_info.go:125] begin check local disk info of client
I0321 03:11:27.478633  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:11:27.478640  543705 disk_info.go:196] parse disk info done, disk is : [0xc000365c40 0xc000365c80]
E0321 03:11:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:11:33.409785  543705 memory.go:184] no items to output this cycle
I0321 03:11:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 03:11:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:11:43.409815  543705 memory.go:191] Add success.
I0321 03:11:43.409822  543705 cpu.go:282] Add success.
I0321 03:11:43.419863  543705 net.go:648] Add success.
I0321 03:11:43.422560  543705 net.go:770] primary dev: ETH0
I0321 03:11:43.422574  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:11:43.422587  543705 net.go:698] Add success.
I0321 03:11:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:11:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:11:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:11:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:11:53.409779  543705 memory.go:184] no items to output this cycle
I0321 03:11:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 03:12:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:12:03.409809  543705 memory.go:184] no items to output this cycle
I0321 03:12:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 03:12:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:12:13.409813  543705 memory.go:191] Add success.
I0321 03:12:13.409813  543705 cpu.go:282] Add success.
W0321 03:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:12:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:12:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:12:13.420138  543705 net.go:648] Add success.
I0321 03:12:13.422643  543705 net.go:770] primary dev: ETH0
I0321 03:12:13.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:12:13.422667  543705 net.go:698] Add success.
I0321 03:12:13.614730  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d1cbfc7f-4727-45d4-b8e6-55a821f37561","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:12:13.614763  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 03:12:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:12:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 03:12:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:12:14.456242  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:12:14.456252  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:12:14.456258  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:12:14.456905  543705 disk_worker.go:494] system disk:vda1
I0321 03:12:14.456943  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:12:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:12:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:12:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:12:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:12:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:12:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:12:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:12:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:12:23.409815  543705 memory.go:184] no items to output this cycle
I0321 03:12:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 03:12:27.479106  543705 disk_info.go:125] begin check local disk info of client
I0321 03:12:27.481626  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:12:27.481634  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468f80 0xc000468fc0]
E0321 03:12:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:12:33.409803  543705 memory.go:184] no items to output this cycle
I0321 03:12:33.409823  543705 cpu.go:275] no items to output this cycle
I0321 03:12:38.708227  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:12:38.708234  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:12:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:12:43.410581  543705 memory.go:191] Add success.
I0321 03:12:43.409813  543705 cpu.go:282] Add success.
I0321 03:12:43.420282  543705 net.go:648] Add success.
I0321 03:12:43.422806  543705 net.go:770] primary dev: ETH0
I0321 03:12:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:12:43.422837  543705 net.go:698] Add success.
I0321 03:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:12:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:12:53.409786  543705 cpu.go:275] no items to output this cycle
I0321 03:12:53.409797  543705 memory.go:184] no items to output this cycle
E0321 03:13:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:13:03.409783  543705 memory.go:184] no items to output this cycle
I0321 03:13:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 03:13:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:13:13.409781  543705 memory.go:191] Add success.
W0321 03:13:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:13:13.409810  543705 cpu.go:282] Add success.
W0321 03:13:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:13:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:13:13.420038  543705 net.go:648] Add success.
I0321 03:13:13.422810  543705 net.go:770] primary dev: ETH0
I0321 03:13:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:13:13.422835  543705 net.go:698] Add success.
I0321 03:13:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:13:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:13:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 03:13:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:13:14.456542  543705 disk_worker.go:494] system disk:vda1
I0321 03:13:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:13:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:13:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:13:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:13:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:13:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:13:23.409785  543705 memory.go:184] no items to output this cycle
I0321 03:13:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 03:13:27.482121  543705 disk_info.go:125] begin check local disk info of client
I0321 03:13:27.484594  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:13:27.484600  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025e240 0xc00025e280]
E0321 03:13:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:13:33.409781  543705 memory.go:184] no items to output this cycle
I0321 03:13:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 03:13:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:13:43.409781  543705 memory.go:191] Add success.
I0321 03:13:43.409809  543705 cpu.go:282] Add success.
I0321 03:13:43.420005  543705 net.go:648] Add success.
I0321 03:13:43.422603  543705 net.go:770] primary dev: ETH0
I0321 03:13:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:13:43.422633  543705 net.go:698] Add success.
I0321 03:13:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:13:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:13:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:13:53.409769  543705 memory.go:184] no items to output this cycle
I0321 03:13:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 03:14:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:14:03.409805  543705 memory.go:184] no items to output this cycle
I0321 03:14:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 03:14:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:14:13.409783  543705 memory.go:191] Add success.
W0321 03:14:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:14:13.409814  543705 cpu.go:282] Add success.
W0321 03:14:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:14:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:14:13.420083  543705 net.go:648] Add success.
I0321 03:14:13.422957  543705 net.go:770] primary dev: ETH0
I0321 03:14:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:14:13.422982  543705 net.go:698] Add success.
I0321 03:14:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:14:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:14:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 03:14:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:14:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 03:14:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:14:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:14:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:14:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:14:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:14:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:14:23.409782  543705 memory.go:184] no items to output this cycle
I0321 03:14:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 03:14:27.485135  543705 disk_info.go:125] begin check local disk info of client
I0321 03:14:27.487661  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:14:27.487670  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e19c0 0xc0003e1a00]
E0321 03:14:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:14:33.409797  543705 memory.go:184] no items to output this cycle
I0321 03:14:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 03:14:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:14:43.409789  543705 memory.go:191] Add success.
I0321 03:14:43.409808  543705 cpu.go:282] Add success.
I0321 03:14:43.419875  543705 net.go:648] Add success.
I0321 03:14:43.422694  543705 net.go:770] primary dev: ETH0
I0321 03:14:43.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:14:43.422719  543705 net.go:698] Add success.
I0321 03:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:14:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:14:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:14:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:14:53.409780  543705 memory.go:184] no items to output this cycle
I0321 03:14:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 03:15:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:15:03.409771  543705 memory.go:184] no items to output this cycle
I0321 03:15:03.409774  543705 cpu.go:275] no items to output this cycle
E0321 03:15:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:15:13.409787  543705 memory.go:191] Add success.
I0321 03:15:13.409810  543705 cpu.go:282] Add success.
W0321 03:15:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:15:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:15:13.420055  543705 net.go:648] Add success.
I0321 03:15:13.422916  543705 net.go:770] primary dev: ETH0
I0321 03:15:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:15:13.422942  543705 net.go:698] Add success.
I0321 03:15:13.469247  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"694737c2-48b2-45a7-b202-347454f6fd85","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:15:13.469280  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:15:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:15:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 03:15:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:15:14.456653  543705 disk_worker.go:494] system disk:vda1
I0321 03:15:14.456684  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:15:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:15:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:15:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:15:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:15:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:15:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:15:23.409816  543705 memory.go:184] no items to output this cycle
I0321 03:15:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 03:15:27.488080  543705 disk_info.go:125] begin check local disk info of client
I0321 03:15:27.490515  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:15:27.490522  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1300 0xc0003e1340]
E0321 03:15:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:15:33.409796  543705 memory.go:184] no items to output this cycle
I0321 03:15:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 03:15:38.708372  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:15:38.708379  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:15:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:15:43.410576  543705 memory.go:191] Add success.
I0321 03:15:43.409821  543705 cpu.go:282] Add success.
I0321 03:15:43.420256  543705 net.go:648] Add success.
I0321 03:15:43.422794  543705 net.go:770] primary dev: ETH0
I0321 03:15:43.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:15:43.422820  543705 net.go:698] Add success.
I0321 03:15:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:15:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:15:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:15:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:15:53.409776  543705 memory.go:184] no items to output this cycle
I0321 03:15:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 03:16:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:16:03.409786  543705 memory.go:184] no items to output this cycle
I0321 03:16:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 03:16:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:16:13.409814  543705 memory.go:191] Add success.
I0321 03:16:13.409814  543705 cpu.go:282] Add success.
W0321 03:16:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:16:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:16:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:16:13.420133  543705 net.go:648] Add success.
I0321 03:16:13.422919  543705 net.go:770] primary dev: ETH0
I0321 03:16:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:16:13.422955  543705 net.go:698] Add success.
I0321 03:16:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:16:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:16:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 03:16:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:16:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 03:16:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:16:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:16:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:16:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:16:23.409785  543705 memory.go:184] no items to output this cycle
I0321 03:16:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 03:16:27.491147  543705 disk_info.go:125] begin check local disk info of client
I0321 03:16:27.493724  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:16:27.493731  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bc0c0 0xc0004bc100]
E0321 03:16:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:16:33.409774  543705 memory.go:184] no items to output this cycle
I0321 03:16:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 03:16:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:16:43.409796  543705 memory.go:191] Add success.
I0321 03:16:43.409817  543705 cpu.go:282] Add success.
I0321 03:16:43.419874  543705 net.go:648] Add success.
I0321 03:16:43.422428  543705 net.go:770] primary dev: ETH0
I0321 03:16:43.422444  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:16:43.422459  543705 net.go:698] Add success.
I0321 03:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:16:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:16:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:16:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:16:53.409772  543705 memory.go:184] no items to output this cycle
I0321 03:16:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 03:17:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:17:03.409774  543705 memory.go:184] no items to output this cycle
I0321 03:17:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 03:17:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:17:13.409774  543705 memory.go:191] Add success.
W0321 03:17:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:17:13.409803  543705 cpu.go:282] Add success.
W0321 03:17:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:17:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:17:13.420210  543705 net.go:648] Add success.
I0321 03:17:13.422878  543705 net.go:770] primary dev: ETH0
I0321 03:17:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:17:13.422903  543705 net.go:698] Add success.
I0321 03:17:13.453413  543705 event_worker.go:152] Polling the log file for events...
W0321 03:17:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:17:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 03:17:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:17:14.456889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:17:14.456898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:17:14.456904  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:17:14.456976  543705 disk_worker.go:494] system disk:vda1
I0321 03:17:14.457017  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:17:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:17:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:17:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:17:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:17:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:17:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:17:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:17:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:17:23.409773  543705 memory.go:184] no items to output this cycle
I0321 03:17:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 03:17:27.494115  543705 disk_info.go:125] begin check local disk info of client
I0321 03:17:27.496614  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:17:27.496621  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f0480 0xc0000f04c0]
E0321 03:17:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:17:33.409799  543705 memory.go:184] no items to output this cycle
I0321 03:17:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 03:17:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:17:43.409813  543705 memory.go:191] Add success.
I0321 03:17:43.409819  543705 cpu.go:282] Add success.
I0321 03:17:43.419891  543705 net.go:648] Add success.
I0321 03:17:43.423084  543705 net.go:770] primary dev: ETH0
I0321 03:17:43.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:17:43.423108  543705 net.go:698] Add success.
I0321 03:17:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:17:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:17:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:17:53.409813  543705 memory.go:184] no items to output this cycle
I0321 03:17:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 03:18:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:18:03.409772  543705 memory.go:184] no items to output this cycle
I0321 03:18:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 03:18:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:18:13.409790  543705 memory.go:191] Add success.
I0321 03:18:13.409815  543705 cpu.go:282] Add success.
W0321 03:18:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:18:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:18:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:18:13.420160  543705 net.go:648] Add success.
I0321 03:18:13.422976  543705 net.go:770] primary dev: ETH0
I0321 03:18:13.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:18:13.423002  543705 net.go:698] Add success.
I0321 03:18:13.469666  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e9347a4-1f56-4e26-8a0f-13eaa7065f56","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:18:13.469704  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:18:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:18:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:18:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 03:18:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:18:14.456676  543705 disk_worker.go:494] system disk:vda1
I0321 03:18:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:18:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:18:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:18:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:18:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:18:23.409780  543705 memory.go:184] no items to output this cycle
I0321 03:18:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 03:18:27.497182  543705 disk_info.go:125] begin check local disk info of client
I0321 03:18:27.499708  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:18:27.499715  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029bdc0 0xc00029be00]
E0321 03:18:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:18:33.409794  543705 memory.go:184] no items to output this cycle
I0321 03:18:33.409797  543705 cpu.go:275] no items to output this cycle
I0321 03:18:38.709230  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:18:38.709237  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:18:43.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:18:43.410826  543705 memory.go:191] Add success.
I0321 03:18:43.409996  543705 cpu.go:282] Add success.
I0321 03:18:43.419712  543705 net.go:648] Add success.
I0321 03:18:43.422336  543705 net.go:770] primary dev: ETH0
I0321 03:18:43.422351  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:18:43.422366  543705 net.go:698] Add success.
I0321 03:18:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:18:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:18:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:18:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:18:53.409813  543705 memory.go:184] no items to output this cycle
I0321 03:18:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 03:19:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:19:03.409781  543705 memory.go:184] no items to output this cycle
I0321 03:19:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 03:19:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:19:13.409783  543705 cpu.go:282] Add success.
I0321 03:19:13.409790  543705 memory.go:191] Add success.
W0321 03:19:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:19:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:19:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:19:13.420136  543705 net.go:648] Add success.
I0321 03:19:13.422648  543705 net.go:770] primary dev: ETH0
I0321 03:19:13.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:19:13.422674  543705 net.go:698] Add success.
I0321 03:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:19:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:19:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 03:19:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:19:14.456566  543705 disk_worker.go:494] system disk:vda1
I0321 03:19:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:19:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:19:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:19:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:19:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:19:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:19:23.409787  543705 memory.go:184] no items to output this cycle
I0321 03:19:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 03:19:27.500137  543705 disk_info.go:125] begin check local disk info of client
I0321 03:19:27.502630  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:19:27.502636  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a240 0xc00029a280]
E0321 03:19:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:19:33.409763  543705 memory.go:184] no items to output this cycle
I0321 03:19:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 03:19:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:19:43.409780  543705 memory.go:191] Add success.
I0321 03:19:43.409798  543705 cpu.go:282] Add success.
I0321 03:19:43.420018  543705 net.go:648] Add success.
I0321 03:19:43.422867  543705 net.go:770] primary dev: ETH0
I0321 03:19:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:19:43.422892  543705 net.go:698] Add success.
I0321 03:19:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:19:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:19:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:19:53.409775  543705 memory.go:184] no items to output this cycle
I0321 03:19:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 03:20:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:20:03.409798  543705 memory.go:184] no items to output this cycle
I0321 03:20:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 03:20:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:20:13.409781  543705 memory.go:191] Add success.
I0321 03:20:13.409799  543705 cpu.go:282] Add success.
W0321 03:20:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:20:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:20:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:20:13.420132  543705 net.go:648] Add success.
I0321 03:20:13.422734  543705 net.go:770] primary dev: ETH0
I0321 03:20:13.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:20:13.422759  543705 net.go:698] Add success.
I0321 03:20:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:20:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:20:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 03:20:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:20:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 03:20:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:20:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:20:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:20:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:20:23.409809  543705 memory.go:184] no items to output this cycle
I0321 03:20:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 03:20:27.503206  543705 disk_info.go:125] begin check local disk info of client
I0321 03:20:27.505722  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:20:27.505730  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe900 0xc0003fe940]
E0321 03:20:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:20:33.409795  543705 memory.go:184] no items to output this cycle
I0321 03:20:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 03:20:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:20:43.409801  543705 memory.go:191] Add success.
I0321 03:20:43.409802  543705 cpu.go:282] Add success.
I0321 03:20:43.420145  543705 net.go:648] Add success.
I0321 03:20:43.422975  543705 net.go:770] primary dev: ETH0
I0321 03:20:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:20:43.422999  543705 net.go:698] Add success.
I0321 03:20:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:20:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:20:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:20:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:20:53.409799  543705 memory.go:184] no items to output this cycle
I0321 03:20:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 03:21:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:21:03.409783  543705 memory.go:184] no items to output this cycle
I0321 03:21:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 03:21:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:21:13.409774  543705 memory.go:191] Add success.
I0321 03:21:13.409797  543705 cpu.go:282] Add success.
W0321 03:21:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:21:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:21:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:21:13.420071  543705 net.go:648] Add success.
I0321 03:21:13.422860  543705 net.go:770] primary dev: ETH0
I0321 03:21:13.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:21:13.422884  543705 net.go:698] Add success.
I0321 03:21:13.462812  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4407e47-3a42-4841-9aaf-d906f4843399","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:21:13.462846  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:21:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:21:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:21:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 03:21:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:21:14.456466  543705 disk_worker.go:494] system disk:vda1
I0321 03:21:14.456511  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:21:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:21:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:21:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:21:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:21:23.409783  543705 memory.go:184] no items to output this cycle
I0321 03:21:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 03:21:27.506184  543705 disk_info.go:125] begin check local disk info of client
I0321 03:21:27.508532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:21:27.508539  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5440 0xc0000c5480]
E0321 03:21:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:21:33.409777  543705 memory.go:184] no items to output this cycle
I0321 03:21:33.409800  543705 cpu.go:275] no items to output this cycle
I0321 03:21:38.709735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:21:38.709742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:21:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:21:43.410592  543705 memory.go:191] Add success.
I0321 03:21:43.409810  543705 cpu.go:282] Add success.
I0321 03:21:43.419708  543705 net.go:648] Add success.
I0321 03:21:43.422104  543705 net.go:770] primary dev: ETH0
I0321 03:21:43.422118  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:21:43.422130  543705 net.go:698] Add success.
I0321 03:21:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:21:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:21:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:21:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:21:53.409769  543705 memory.go:184] no items to output this cycle
I0321 03:21:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 03:22:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:22:03.409798  543705 memory.go:184] no items to output this cycle
I0321 03:22:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 03:22:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:22:13.409784  543705 memory.go:191] Add success.
W0321 03:22:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:22:13.409819  543705 cpu.go:282] Add success.
W0321 03:22:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:22:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:22:13.420211  543705 net.go:648] Add success.
I0321 03:22:13.422999  543705 net.go:770] primary dev: ETH0
I0321 03:22:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:22:13.423028  543705 net.go:698] Add success.
W0321 03:22:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:22:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 03:22:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:22:14.456796  543705 disk_worker.go:494] system disk:vda1
I0321 03:22:14.456837  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:22:14.457142  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:22:14.457149  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:22:14.457154  543705 custom_config.go:64] query custom config with name: gpu
E0321 03:22:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:22:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:22:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:22:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:22:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:22:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:22:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:22:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:22:23.409792  543705 memory.go:184] no items to output this cycle
I0321 03:22:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 03:22:27.509245  543705 disk_info.go:125] begin check local disk info of client
I0321 03:22:27.511875  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:22:27.511881  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029aa40 0xc00029aa80]
E0321 03:22:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:22:33.409777  543705 memory.go:184] no items to output this cycle
I0321 03:22:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 03:22:43.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:22:43.409903  543705 cpu.go:282] Add success.
I0321 03:22:43.409938  543705 memory.go:191] Add success.
I0321 03:22:43.419715  543705 net.go:648] Add success.
I0321 03:22:43.422430  543705 net.go:770] primary dev: ETH0
I0321 03:22:43.422443  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:22:43.422454  543705 net.go:698] Add success.
I0321 03:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:22:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:22:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:22:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:22:53.409778  543705 memory.go:184] no items to output this cycle
I0321 03:22:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 03:23:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:23:03.409778  543705 memory.go:184] no items to output this cycle
I0321 03:23:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 03:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:23:13.409809  543705 memory.go:191] Add success.
I0321 03:23:13.409815  543705 cpu.go:282] Add success.
W0321 03:23:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:23:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:23:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:23:13.420132  543705 net.go:648] Add success.
I0321 03:23:13.422941  543705 net.go:770] primary dev: ETH0
I0321 03:23:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:23:13.422971  543705 net.go:698] Add success.
I0321 03:23:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:23:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:23:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 03:23:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:23:14.456485  543705 disk_worker.go:494] system disk:vda1
I0321 03:23:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:23:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:23:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:23:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:23:23.409781  543705 memory.go:184] no items to output this cycle
I0321 03:23:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 03:23:27.512201  543705 disk_info.go:125] begin check local disk info of client
I0321 03:23:27.514753  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:23:27.514759  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252780 0xc0002527c0]
E0321 03:23:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:23:33.409807  543705 memory.go:184] no items to output this cycle
I0321 03:23:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 03:23:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:23:43.409830  543705 memory.go:191] Add success.
I0321 03:23:43.409837  543705 cpu.go:282] Add success.
I0321 03:23:43.420016  543705 net.go:648] Add success.
I0321 03:23:43.422471  543705 net.go:770] primary dev: ETH0
I0321 03:23:43.422484  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:23:43.422497  543705 net.go:698] Add success.
I0321 03:23:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:23:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:23:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:23:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:23:53.409778  543705 memory.go:184] no items to output this cycle
I0321 03:23:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 03:24:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:24:03.409810  543705 memory.go:184] no items to output this cycle
I0321 03:24:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 03:24:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:24:13.409774  543705 memory.go:191] Add success.
W0321 03:24:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:24:13.409805  543705 cpu.go:282] Add success.
W0321 03:24:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:24:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:24:13.420041  543705 net.go:648] Add success.
I0321 03:24:13.422671  543705 net.go:770] primary dev: ETH0
I0321 03:24:13.422687  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:24:13.422701  543705 net.go:698] Add success.
I0321 03:24:13.468591  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae710903-eedb-4f70-ba55-e2d1c399a47d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:24:13.468626  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:24:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:24:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:24:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 03:24:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:24:14.456512  543705 disk_worker.go:494] system disk:vda1
I0321 03:24:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:24:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:24:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:24:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:24:23.409819  543705 memory.go:184] no items to output this cycle
I0321 03:24:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 03:24:27.515220  543705 disk_info.go:125] begin check local disk info of client
I0321 03:24:27.517746  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:24:27.517754  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326540 0xc000326580]
E0321 03:24:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:24:33.409795  543705 memory.go:184] no items to output this cycle
I0321 03:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 03:24:38.711255  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:24:38.711262  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0321 03:24:43.409899  543705 cpu.go:282] Add success.
E0321 03:24:43.409941  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:24:43.410868  543705 memory.go:191] Add success.
I0321 03:24:43.419710  543705 net.go:648] Add success.
I0321 03:24:43.422412  543705 net.go:770] primary dev: ETH0
I0321 03:24:43.422425  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:24:43.422436  543705 net.go:698] Add success.
I0321 03:24:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:24:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:24:53.410706  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:24:53.410720  543705 memory.go:184] no items to output this cycle
I0321 03:24:53.410746  543705 cpu.go:275] no items to output this cycle
E0321 03:25:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:25:03.409776  543705 memory.go:184] no items to output this cycle
I0321 03:25:03.409775  543705 cpu.go:275] no items to output this cycle
E0321 03:25:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:25:13.409790  543705 memory.go:191] Add success.
I0321 03:25:13.409791  543705 cpu.go:282] Add success.
W0321 03:25:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:25:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:25:13.420103  543705 net.go:648] Add success.
I0321 03:25:13.422656  543705 net.go:770] primary dev: ETH0
I0321 03:25:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:25:13.422685  543705 net.go:698] Add success.
I0321 03:25:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:25:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:25:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0321 03:25:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:25:14.456606  543705 disk_worker.go:494] system disk:vda1
I0321 03:25:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:25:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:25:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:25:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:25:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:25:23.409789  543705 memory.go:184] no items to output this cycle
I0321 03:25:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 03:25:27.518233  543705 disk_info.go:125] begin check local disk info of client
I0321 03:25:27.520820  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:25:27.520826  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275dc0 0xc000275e00]
E0321 03:25:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:25:33.409789  543705 memory.go:184] no items to output this cycle
I0321 03:25:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 03:25:43.409942  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:25:43.409952  543705 cpu.go:282] Add success.
I0321 03:25:43.409984  543705 memory.go:191] Add success.
I0321 03:25:43.419708  543705 net.go:648] Add success.
I0321 03:25:43.422497  543705 net.go:770] primary dev: ETH0
I0321 03:25:43.422510  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:25:43.422522  543705 net.go:698] Add success.
I0321 03:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:25:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:25:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:25:53.409794  543705 memory.go:184] no items to output this cycle
I0321 03:25:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 03:26:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:26:03.409775  543705 memory.go:184] no items to output this cycle
I0321 03:26:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 03:26:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:26:13.409810  543705 memory.go:191] Add success.
I0321 03:26:13.409811  543705 cpu.go:282] Add success.
W0321 03:26:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:26:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:26:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:26:13.420136  543705 net.go:648] Add success.
I0321 03:26:13.422721  543705 net.go:770] primary dev: ETH0
I0321 03:26:13.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:26:13.422747  543705 net.go:698] Add success.
I0321 03:26:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:26:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:26:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 03:26:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:26:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 03:26:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:26:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:26:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:26:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:26:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:26:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:26:23.409787  543705 memory.go:184] no items to output this cycle
I0321 03:26:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 03:26:27.521249  543705 disk_info.go:125] begin check local disk info of client
I0321 03:26:27.524135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:26:27.524142  543705 disk_info.go:196] parse disk info done, disk is : [0xc000217dc0 0xc000217e00]
E0321 03:26:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:26:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 03:26:33.409818  543705 memory.go:184] no items to output this cycle
E0321 03:26:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:26:43.409795  543705 memory.go:191] Add success.
I0321 03:26:43.409798  543705 cpu.go:282] Add success.
I0321 03:26:43.419744  543705 net.go:648] Add success.
I0321 03:26:43.422362  543705 net.go:770] primary dev: ETH0
I0321 03:26:43.422375  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:26:43.422386  543705 net.go:698] Add success.
I0321 03:26:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:26:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:26:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:26:53.409770  543705 memory.go:184] no items to output this cycle
I0321 03:26:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 03:27:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:27:03.409799  543705 memory.go:184] no items to output this cycle
I0321 03:27:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 03:27:13.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:27:13.409770  543705 memory.go:191] Add success.
W0321 03:27:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:27:13.409799  543705 cpu.go:282] Add success.
W0321 03:27:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:27:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:27:13.420122  543705 net.go:648] Add success.
I0321 03:27:13.422815  543705 net.go:770] primary dev: ETH0
I0321 03:27:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:27:13.422840  543705 net.go:698] Add success.
I0321 03:27:13.429176  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 03:27:13.453352  543705 event_worker.go:152] Polling the log file for events...
I0321 03:27:13.503802  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"290c338d-f314-4d34-8b4f-c1c53bb3809f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:27:13.503835  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 03:27:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:27:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 03:27:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:27:14.455881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:27:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:27:14.455895  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:27:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 03:27:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:27:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:27:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:27:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:27:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:27:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:27:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:27:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:27:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:27:23.409784  543705 memory.go:184] no items to output this cycle
I0321 03:27:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 03:27:27.524254  543705 disk_info.go:125] begin check local disk info of client
I0321 03:27:27.526842  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:27:27.526848  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb3c0 0xc0002bb400]
E0321 03:27:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:27:33.409801  543705 memory.go:184] no items to output this cycle
I0321 03:27:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 03:27:38.712266  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:27:38.712272  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:27:43.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:27:43.410774  543705 memory.go:191] Add success.
I0321 03:27:43.409947  543705 cpu.go:282] Add success.
I0321 03:27:43.419728  543705 net.go:648] Add success.
I0321 03:27:43.422125  543705 net.go:770] primary dev: ETH0
I0321 03:27:43.422156  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:27:43.422169  543705 net.go:698] Add success.
I0321 03:27:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:27:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:27:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:27:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:27:53.409773  543705 memory.go:184] no items to output this cycle
I0321 03:27:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 03:28:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:28:03.409799  543705 memory.go:184] no items to output this cycle
I0321 03:28:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 03:28:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:28:13.409771  543705 memory.go:191] Add success.
W0321 03:28:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:28:13.409805  543705 cpu.go:282] Add success.
W0321 03:28:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:28:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:28:13.420127  543705 net.go:648] Add success.
I0321 03:28:13.422828  543705 net.go:770] primary dev: ETH0
I0321 03:28:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:28:13.422857  543705 net.go:698] Add success.
I0321 03:28:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:28:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:28:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 03:28:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:28:14.456514  543705 disk_worker.go:494] system disk:vda1
I0321 03:28:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:28:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:28:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:28:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:28:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:28:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:28:23.409788  543705 memory.go:184] no items to output this cycle
I0321 03:28:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 03:28:27.527289  543705 disk_info.go:125] begin check local disk info of client
I0321 03:28:27.529855  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:28:27.529862  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305380 0xc0003053c0]
E0321 03:28:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:28:33.409798  543705 memory.go:184] no items to output this cycle
I0321 03:28:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 03:28:43.409898  543705 cpu.go:282] Add success.
E0321 03:28:43.409921  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:28:43.409947  543705 memory.go:191] Add success.
I0321 03:28:43.419720  543705 net.go:648] Add success.
I0321 03:28:43.422266  543705 net.go:770] primary dev: ETH0
I0321 03:28:43.422280  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:28:43.422292  543705 net.go:698] Add success.
I0321 03:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:28:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:28:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:28:53.409801  543705 memory.go:184] no items to output this cycle
I0321 03:28:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 03:29:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:29:03.409786  543705 memory.go:184] no items to output this cycle
I0321 03:29:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 03:29:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:29:13.409780  543705 memory.go:191] Add success.
I0321 03:29:13.409788  543705 cpu.go:282] Add success.
W0321 03:29:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:29:13.412405  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:29:13.412410  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:29:13.420032  543705 net.go:648] Add success.
I0321 03:29:13.421776  543705 net.go:770] primary dev: ETH0
I0321 03:29:13.421790  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:29:13.421802  543705 net.go:698] Add success.
I0321 03:29:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:29:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:29:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0321 03:29:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:29:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 03:29:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:29:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:29:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:29:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:29:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:29:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:29:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:29:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 03:29:23.409802  543705 memory.go:184] no items to output this cycle
I0321 03:29:27.530283  543705 disk_info.go:125] begin check local disk info of client
I0321 03:29:27.532964  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:29:27.532971  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266380 0xc0002663c0]
E0321 03:29:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:29:33.409774  543705 memory.go:184] no items to output this cycle
I0321 03:29:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 03:29:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:29:43.409813  543705 memory.go:191] Add success.
I0321 03:29:43.409822  543705 cpu.go:282] Add success.
I0321 03:29:43.420394  543705 net.go:648] Add success.
I0321 03:29:43.423150  543705 net.go:770] primary dev: ETH0
I0321 03:29:43.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:29:43.423184  543705 net.go:698] Add success.
I0321 03:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:29:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:29:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:29:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:29:53.410401  543705 cpu.go:275] no items to output this cycle
I0321 03:29:53.410403  543705 memory.go:184] no items to output this cycle
E0321 03:30:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:30:03.409768  543705 memory.go:184] no items to output this cycle
I0321 03:30:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 03:30:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:30:13.409812  543705 memory.go:191] Add success.
I0321 03:30:13.409816  543705 cpu.go:282] Add success.
W0321 03:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:30:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:30:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:30:13.420138  543705 net.go:648] Add success.
I0321 03:30:13.423260  543705 net.go:770] primary dev: ETH0
I0321 03:30:13.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:30:13.423284  543705 net.go:698] Add success.
I0321 03:30:13.653537  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"760d5416-8b36-4755-bcbb-b12a54baeac6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:30:13.653576  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:30:14.453964  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:30:14.455229  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:30:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0321 03:30:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:30:14.456751  543705 disk_worker.go:494] system disk:vda1
I0321 03:30:14.456808  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:30:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:30:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:30:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:30:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:30:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:30:23.409811  543705 memory.go:184] no items to output this cycle
I0321 03:30:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 03:30:27.533316  543705 disk_info.go:125] begin check local disk info of client
I0321 03:30:27.535978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:30:27.535986  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003acac0 0xc0003acb00]
E0321 03:30:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:30:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 03:30:33.409785  543705 memory.go:184] no items to output this cycle
I0321 03:30:38.712414  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:30:38.712421  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:30:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:30:43.410675  543705 memory.go:191] Add success.
I0321 03:30:43.409823  543705 cpu.go:282] Add success.
I0321 03:30:43.420422  543705 net.go:648] Add success.
I0321 03:30:43.423224  543705 net.go:770] primary dev: ETH0
I0321 03:30:43.423239  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:30:43.423253  543705 net.go:698] Add success.
I0321 03:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:30:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:30:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:30:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:30:53.409797  543705 memory.go:184] no items to output this cycle
I0321 03:30:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 03:31:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:31:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 03:31:03.409786  543705 memory.go:184] no items to output this cycle
E0321 03:31:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:31:13.409787  543705 memory.go:191] Add success.
I0321 03:31:13.409790  543705 cpu.go:282] Add success.
W0321 03:31:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:31:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:31:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:31:13.420085  543705 net.go:648] Add success.
I0321 03:31:13.422933  543705 net.go:770] primary dev: ETH0
I0321 03:31:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:31:13.422961  543705 net.go:698] Add success.
I0321 03:31:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:31:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:31:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0321 03:31:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:31:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 03:31:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:31:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:31:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:31:23.409807  543705 memory.go:184] no items to output this cycle
I0321 03:31:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 03:31:27.536319  543705 disk_info.go:125] begin check local disk info of client
I0321 03:31:27.538881  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:31:27.538887  543705 disk_info.go:196] parse disk info done, disk is : [0xc000496e80 0xc000496ec0]
E0321 03:31:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:31:33.409800  543705 memory.go:184] no items to output this cycle
I0321 03:31:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 03:31:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:31:43.409798  543705 cpu.go:282] Add success.
I0321 03:31:43.409800  543705 memory.go:191] Add success.
I0321 03:31:43.420206  543705 net.go:648] Add success.
I0321 03:31:43.423326  543705 net.go:770] primary dev: ETH0
I0321 03:31:43.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:31:43.423352  543705 net.go:698] Add success.
I0321 03:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:31:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:31:53.410615  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:31:53.410750  543705 memory.go:184] no items to output this cycle
I0321 03:31:53.410758  543705 cpu.go:275] no items to output this cycle
E0321 03:32:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:32:03.409780  543705 memory.go:184] no items to output this cycle
I0321 03:32:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 03:32:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:32:13.409783  543705 cpu.go:282] Add success.
I0321 03:32:13.409794  543705 memory.go:191] Add success.
W0321 03:32:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:32:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:32:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:32:13.420110  543705 net.go:648] Add success.
I0321 03:32:13.423014  543705 net.go:770] primary dev: ETH0
I0321 03:32:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:32:13.423043  543705 net.go:698] Add success.
W0321 03:32:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:32:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 03:32:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:32:14.455894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:32:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:32:14.455908  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:32:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 03:32:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:32:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:32:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:32:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:32:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:32:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:32:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:32:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:32:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:32:23.409786  543705 memory.go:184] no items to output this cycle
I0321 03:32:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 03:32:27.539351  543705 disk_info.go:125] begin check local disk info of client
I0321 03:32:27.541936  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:32:27.541942  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb200 0xc0002bb240]
E0321 03:32:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:32:33.409802  543705 memory.go:184] no items to output this cycle
I0321 03:32:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 03:32:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:32:43.409789  543705 memory.go:191] Add success.
I0321 03:32:43.409814  543705 cpu.go:282] Add success.
I0321 03:32:43.419971  543705 net.go:648] Add success.
I0321 03:32:43.422731  543705 net.go:770] primary dev: ETH0
I0321 03:32:43.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:32:43.422756  543705 net.go:698] Add success.
I0321 03:32:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:32:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:32:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:32:53.409777  543705 memory.go:184] no items to output this cycle
I0321 03:32:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 03:33:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:33:03.409794  543705 memory.go:184] no items to output this cycle
I0321 03:33:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 03:33:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:33:13.409780  543705 memory.go:191] Add success.
W0321 03:33:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:33:13.409810  543705 cpu.go:282] Add success.
W0321 03:33:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:33:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:33:13.420134  543705 net.go:648] Add success.
I0321 03:33:13.423338  543705 net.go:770] primary dev: ETH0
I0321 03:33:13.423350  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:33:13.423362  543705 net.go:698] Add success.
I0321 03:33:13.519313  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71c5817f-a75e-42ff-b212-4dd5cf807904","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:33:13.519346  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:33:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:33:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:33:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 03:33:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:33:14.456664  543705 disk_worker.go:494] system disk:vda1
I0321 03:33:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:33:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:33:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:33:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:33:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:33:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:33:23.409775  543705 memory.go:184] no items to output this cycle
I0321 03:33:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 03:33:27.542355  543705 disk_info.go:125] begin check local disk info of client
I0321 03:33:27.544934  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:33:27.544940  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0321 03:33:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:33:33.409786  543705 memory.go:184] no items to output this cycle
I0321 03:33:33.409843  543705 cpu.go:275] no items to output this cycle
I0321 03:33:38.713252  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:33:38.713259  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:33:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:33:43.410668  543705 memory.go:191] Add success.
I0321 03:33:43.409811  543705 cpu.go:282] Add success.
I0321 03:33:43.420371  543705 net.go:648] Add success.
I0321 03:33:43.423025  543705 net.go:770] primary dev: ETH0
I0321 03:33:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:33:43.423054  543705 net.go:698] Add success.
I0321 03:33:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:33:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:33:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:33:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:33:53.409785  543705 memory.go:184] no items to output this cycle
I0321 03:33:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 03:34:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:34:03.409810  543705 memory.go:184] no items to output this cycle
I0321 03:34:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 03:34:13.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:34:13.409909  543705 memory.go:191] Add success.
W0321 03:34:13.409937  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:34:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:34:13.410013  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:34:13.410119  543705 cpu.go:282] Add success.
I0321 03:34:13.419710  543705 net.go:648] Add success.
I0321 03:34:13.422201  543705 net.go:770] primary dev: ETH0
I0321 03:34:13.422214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:34:13.422225  543705 net.go:698] Add success.
I0321 03:34:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:34:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:34:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 03:34:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:34:14.456546  543705 disk_worker.go:494] system disk:vda1
I0321 03:34:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:34:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:34:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:34:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:34:23.409790  543705 memory.go:184] no items to output this cycle
I0321 03:34:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 03:34:27.545371  543705 disk_info.go:125] begin check local disk info of client
I0321 03:34:27.547905  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:34:27.547912  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bbc0 0xc00007bc00]
E0321 03:34:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:34:33.409780  543705 memory.go:184] no items to output this cycle
I0321 03:34:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 03:34:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:34:43.409810  543705 memory.go:191] Add success.
I0321 03:34:43.409817  543705 cpu.go:282] Add success.
I0321 03:34:43.419994  543705 net.go:648] Add success.
I0321 03:34:43.422500  543705 net.go:770] primary dev: ETH0
I0321 03:34:43.422516  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:34:43.422528  543705 net.go:698] Add success.
I0321 03:34:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:34:53.410370  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:34:53.410384  543705 memory.go:184] no items to output this cycle
I0321 03:34:53.410386  543705 cpu.go:275] no items to output this cycle
E0321 03:35:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:35:03.409773  543705 memory.go:184] no items to output this cycle
I0321 03:35:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 03:35:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:35:13.409794  543705 memory.go:191] Add success.
I0321 03:35:13.409794  543705 cpu.go:282] Add success.
W0321 03:35:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:35:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:35:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:35:13.420067  543705 net.go:648] Add success.
I0321 03:35:13.422525  543705 net.go:770] primary dev: ETH0
I0321 03:35:13.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:35:13.422553  543705 net.go:698] Add success.
I0321 03:35:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:35:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:35:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 03:35:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:35:14.456555  543705 disk_worker.go:494] system disk:vda1
I0321 03:35:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:35:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:35:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:35:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:35:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:35:23.409807  543705 memory.go:184] no items to output this cycle
I0321 03:35:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 03:35:27.548389  543705 disk_info.go:125] begin check local disk info of client
I0321 03:35:27.550980  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:35:27.550987  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5680 0xc0000c56c0]
E0321 03:35:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:35:33.409804  543705 memory.go:184] no items to output this cycle
I0321 03:35:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 03:35:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:35:43.409816  543705 memory.go:191] Add success.
I0321 03:35:43.409824  543705 cpu.go:282] Add success.
I0321 03:35:43.420027  543705 net.go:648] Add success.
I0321 03:35:43.421013  543705 net.go:770] primary dev: ETH0
I0321 03:35:43.421027  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:35:43.421042  543705 net.go:698] Add success.
I0321 03:35:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:35:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:35:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:35:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:35:53.409795  543705 memory.go:184] no items to output this cycle
I0321 03:35:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 03:36:03.409844  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:36:03.409869  543705 memory.go:184] no items to output this cycle
I0321 03:36:03.409951  543705 cpu.go:275] no items to output this cycle
E0321 03:36:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:36:13.409809  543705 memory.go:191] Add success.
I0321 03:36:13.409822  543705 cpu.go:282] Add success.
W0321 03:36:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:36:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:36:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:36:13.420137  543705 net.go:648] Add success.
I0321 03:36:13.422712  543705 net.go:770] primary dev: ETH0
I0321 03:36:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:36:13.422741  543705 net.go:698] Add success.
I0321 03:36:13.463603  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1b4f24e2-1c58-4c13-8c64-a4f7cac9e17d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:36:13.463636  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:36:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:36:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:36:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 03:36:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:36:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 03:36:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:36:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:36:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:36:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:36:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:36:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:36:23.409806  543705 memory.go:184] no items to output this cycle
I0321 03:36:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 03:36:27.551407  543705 disk_info.go:125] begin check local disk info of client
I0321 03:36:27.553997  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:36:27.554003  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048adc0 0xc00048ae00]
E0321 03:36:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:36:33.409799  543705 memory.go:184] no items to output this cycle
I0321 03:36:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 03:36:38.713728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:36:38.713735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:36:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:36:43.410618  543705 memory.go:191] Add success.
I0321 03:36:43.409803  543705 cpu.go:282] Add success.
I0321 03:36:43.420325  543705 net.go:648] Add success.
I0321 03:36:43.422843  543705 net.go:770] primary dev: ETH0
I0321 03:36:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:36:43.422869  543705 net.go:698] Add success.
I0321 03:36:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:36:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:36:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:36:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:36:53.409774  543705 memory.go:184] no items to output this cycle
I0321 03:36:53.409776  543705 cpu.go:275] no items to output this cycle
E0321 03:37:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:37:03.409775  543705 memory.go:184] no items to output this cycle
I0321 03:37:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 03:37:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:37:13.409781  543705 memory.go:191] Add success.
I0321 03:37:13.409800  543705 cpu.go:282] Add success.
W0321 03:37:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:37:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:37:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:37:13.420076  543705 net.go:648] Add success.
I0321 03:37:13.422642  543705 net.go:770] primary dev: ETH0
I0321 03:37:13.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:37:13.422666  543705 net.go:698] Add success.
I0321 03:37:13.453205  543705 event_worker.go:152] Polling the log file for events...
W0321 03:37:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:37:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 03:37:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:37:14.456902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:37:14.456911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:37:14.456918  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:37:14.456987  543705 disk_worker.go:494] system disk:vda1
I0321 03:37:14.457018  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:37:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:37:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:37:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:37:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:37:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:37:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:37:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:37:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:37:23.409807  543705 memory.go:184] no items to output this cycle
I0321 03:37:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 03:37:27.554411  543705 disk_info.go:125] begin check local disk info of client
I0321 03:37:27.557043  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:37:27.557049  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4340 0xc0000c4380]
E0321 03:37:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:37:33.409779  543705 memory.go:184] no items to output this cycle
I0321 03:37:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 03:37:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:37:43.409779  543705 memory.go:191] Add success.
I0321 03:37:43.409786  543705 cpu.go:282] Add success.
I0321 03:37:43.419970  543705 net.go:648] Add success.
I0321 03:37:43.420886  543705 net.go:770] primary dev: ETH0
I0321 03:37:43.420899  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:37:43.420913  543705 net.go:698] Add success.
I0321 03:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:37:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:37:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:37:53.409794  543705 memory.go:184] no items to output this cycle
I0321 03:37:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 03:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:38:03.409783  543705 memory.go:184] no items to output this cycle
I0321 03:38:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 03:38:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:38:13.409782  543705 memory.go:191] Add success.
I0321 03:38:13.409786  543705 cpu.go:282] Add success.
W0321 03:38:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:38:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:38:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:38:13.420058  543705 net.go:648] Add success.
I0321 03:38:13.422633  543705 net.go:770] primary dev: ETH0
I0321 03:38:13.422646  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:38:13.422658  543705 net.go:698] Add success.
I0321 03:38:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:38:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:38:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 03:38:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:38:14.456503  543705 disk_worker.go:494] system disk:vda1
I0321 03:38:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:38:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:38:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:38:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:38:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:38:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:38:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:38:23.409776  543705 memory.go:184] no items to output this cycle
I0321 03:38:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 03:38:27.557447  543705 disk_info.go:125] begin check local disk info of client
I0321 03:38:27.560030  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:38:27.560036  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae00 0xc0001aae40]
E0321 03:38:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:38:33.409805  543705 memory.go:184] no items to output this cycle
I0321 03:38:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 03:38:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:38:43.409775  543705 memory.go:191] Add success.
I0321 03:38:43.409803  543705 cpu.go:282] Add success.
I0321 03:38:43.420015  543705 net.go:648] Add success.
I0321 03:38:43.422664  543705 net.go:770] primary dev: ETH0
I0321 03:38:43.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:38:43.422690  543705 net.go:698] Add success.
I0321 03:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:38:53.410662  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:38:53.410683  543705 memory.go:184] no items to output this cycle
I0321 03:38:53.410765  543705 cpu.go:275] no items to output this cycle
E0321 03:39:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:39:03.409780  543705 memory.go:184] no items to output this cycle
I0321 03:39:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 03:39:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:39:13.409777  543705 memory.go:191] Add success.
W0321 03:39:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:39:13.409805  543705 cpu.go:282] Add success.
W0321 03:39:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:39:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:39:13.420138  543705 net.go:648] Add success.
I0321 03:39:13.422628  543705 net.go:770] primary dev: ETH0
I0321 03:39:13.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:39:13.422654  543705 net.go:698] Add success.
I0321 03:39:13.469156  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bab9748b-2fb7-4c19-987f-bf6c0be2ddf4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:39:13.469199  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:39:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:39:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 03:39:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:39:14.456676  543705 disk_worker.go:494] system disk:vda1
I0321 03:39:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:39:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:39:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:39:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:39:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:39:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:39:23.409784  543705 memory.go:184] no items to output this cycle
I0321 03:39:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 03:39:27.560435  543705 disk_info.go:125] begin check local disk info of client
I0321 03:39:27.562992  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:39:27.562999  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0b80 0xc0003c0bc0]
E0321 03:39:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:39:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 03:39:33.409784  543705 memory.go:184] no items to output this cycle
I0321 03:39:38.715263  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:39:38.715269  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:39:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:39:43.410689  543705 memory.go:191] Add success.
I0321 03:39:43.409815  543705 cpu.go:282] Add success.
I0321 03:39:43.420367  543705 net.go:648] Add success.
I0321 03:39:43.423238  543705 net.go:770] primary dev: ETH0
I0321 03:39:43.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:39:43.423272  543705 net.go:698] Add success.
I0321 03:39:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:39:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:39:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:39:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:39:53.409799  543705 memory.go:184] no items to output this cycle
I0321 03:39:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 03:40:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:40:03.409774  543705 cpu.go:275] no items to output this cycle
I0321 03:40:03.409784  543705 memory.go:184] no items to output this cycle
E0321 03:40:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:40:13.409804  543705 memory.go:191] Add success.
I0321 03:40:13.409812  543705 cpu.go:282] Add success.
W0321 03:40:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:40:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:40:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:40:13.420248  543705 net.go:648] Add success.
I0321 03:40:13.422715  543705 net.go:770] primary dev: ETH0
I0321 03:40:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:40:13.422745  543705 net.go:698] Add success.
I0321 03:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:40:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:40:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 03:40:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:40:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 03:40:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:40:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:40:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:40:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:40:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:40:23.410508  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:40:23.410528  543705 memory.go:184] no items to output this cycle
I0321 03:40:23.410538  543705 cpu.go:275] no items to output this cycle
I0321 03:40:27.563462  543705 disk_info.go:125] begin check local disk info of client
I0321 03:40:27.566025  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:40:27.566033  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0cc0 0xc0003c0d00]
E0321 03:40:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:40:33.409796  543705 memory.go:184] no items to output this cycle
I0321 03:40:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 03:40:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:40:43.409772  543705 memory.go:191] Add success.
I0321 03:40:43.409800  543705 cpu.go:282] Add success.
I0321 03:40:43.419867  543705 net.go:648] Add success.
I0321 03:40:43.422452  543705 net.go:770] primary dev: ETH0
I0321 03:40:43.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:40:43.422477  543705 net.go:698] Add success.
I0321 03:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:40:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:40:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:40:53.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:40:53.409839  543705 memory.go:184] no items to output this cycle
I0321 03:40:53.409917  543705 cpu.go:275] no items to output this cycle
E0321 03:41:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:41:03.409776  543705 memory.go:184] no items to output this cycle
I0321 03:41:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 03:41:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:41:13.409777  543705 memory.go:191] Add success.
I0321 03:41:13.409798  543705 cpu.go:282] Add success.
W0321 03:41:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:41:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:41:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:41:13.420127  543705 net.go:648] Add success.
I0321 03:41:13.422652  543705 net.go:770] primary dev: ETH0
I0321 03:41:13.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:41:13.422676  543705 net.go:698] Add success.
I0321 03:41:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:41:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:41:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 03:41:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:41:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 03:41:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:41:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:41:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:41:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:41:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:41:23.409806  543705 memory.go:184] no items to output this cycle
I0321 03:41:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 03:41:27.566466  543705 disk_info.go:125] begin check local disk info of client
I0321 03:41:27.569035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:41:27.569043  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5400 0xc0000c5440]
E0321 03:41:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:41:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 03:41:33.409793  543705 memory.go:184] no items to output this cycle
E0321 03:41:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:41:43.409788  543705 memory.go:191] Add success.
I0321 03:41:43.409792  543705 cpu.go:282] Add success.
I0321 03:41:43.419980  543705 net.go:648] Add success.
I0321 03:41:43.422977  543705 net.go:770] primary dev: ETH0
I0321 03:41:43.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:41:43.423013  543705 net.go:698] Add success.
I0321 03:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:41:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:41:53.410339  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:41:53.410354  543705 memory.go:184] no items to output this cycle
I0321 03:41:53.410481  543705 cpu.go:275] no items to output this cycle
E0321 03:42:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:42:03.409778  543705 memory.go:184] no items to output this cycle
I0321 03:42:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 03:42:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:42:13.409833  543705 memory.go:191] Add success.
I0321 03:42:13.409844  543705 cpu.go:282] Add success.
W0321 03:42:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:42:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:42:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:42:13.420425  543705 net.go:648] Add success.
I0321 03:42:13.423341  543705 net.go:770] primary dev: ETH0
I0321 03:42:13.423356  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:42:13.423370  543705 net.go:698] Add success.
I0321 03:42:14.128870  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"57c40913-b4f2-419c-a4d6-ef7188e720ae","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:42:14.128905  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 03:42:14.454737  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:42:14.454804  543705 disk_worker.go:708] disk space is not compliant
W0321 03:42:14.454808  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:42:14.455535  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:42:14.455545  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:42:14.455550  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:42:14.456430  543705 disk_worker.go:494] system disk:vda1
I0321 03:42:14.456458  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:42:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:42:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 03:42:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:42:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:42:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:42:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:42:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:42:23.409785  543705 memory.go:184] no items to output this cycle
I0321 03:42:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 03:42:27.569503  543705 disk_info.go:125] begin check local disk info of client
I0321 03:42:27.572062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:42:27.572068  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bba00 0xc0002bba40]
E0321 03:42:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:42:33.409794  543705 memory.go:184] no items to output this cycle
I0321 03:42:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 03:42:38.716282  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:42:38.716289  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:42:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:42:43.410678  543705 memory.go:191] Add success.
I0321 03:42:43.409795  543705 cpu.go:282] Add success.
I0321 03:42:43.420373  543705 net.go:648] Add success.
I0321 03:42:43.423070  543705 net.go:770] primary dev: ETH0
I0321 03:42:43.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:42:43.423101  543705 net.go:698] Add success.
I0321 03:42:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:42:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:42:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:42:53.410480  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:42:53.410577  543705 memory.go:184] no items to output this cycle
I0321 03:42:53.410580  543705 cpu.go:275] no items to output this cycle
E0321 03:43:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:43:03.409780  543705 memory.go:184] no items to output this cycle
I0321 03:43:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 03:43:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:43:13.409779  543705 memory.go:191] Add success.
W0321 03:43:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:43:13.409808  543705 cpu.go:282] Add success.
W0321 03:43:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:43:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:43:13.420110  543705 net.go:648] Add success.
I0321 03:43:13.422778  543705 net.go:770] primary dev: ETH0
I0321 03:43:13.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:43:13.422802  543705 net.go:698] Add success.
I0321 03:43:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:43:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:43:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0321 03:43:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:43:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 03:43:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:43:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:43:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:43:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:43:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:43:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:43:23.409791  543705 memory.go:184] no items to output this cycle
I0321 03:43:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 03:43:27.572504  543705 disk_info.go:125] begin check local disk info of client
I0321 03:43:27.575043  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:43:27.575049  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb300 0xc0002bb340]
E0321 03:43:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:43:33.409798  543705 memory.go:184] no items to output this cycle
I0321 03:43:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 03:43:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:43:43.409788  543705 memory.go:191] Add success.
I0321 03:43:43.409804  543705 cpu.go:282] Add success.
I0321 03:43:43.419898  543705 net.go:648] Add success.
I0321 03:43:43.422527  543705 net.go:770] primary dev: ETH0
I0321 03:43:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:43:43.422555  543705 net.go:698] Add success.
I0321 03:43:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:43:53.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:43:53.409891  543705 memory.go:184] no items to output this cycle
I0321 03:43:53.410083  543705 cpu.go:275] no items to output this cycle
E0321 03:44:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:44:03.409801  543705 memory.go:184] no items to output this cycle
I0321 03:44:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 03:44:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:44:13.409809  543705 memory.go:191] Add success.
I0321 03:44:13.409824  543705 cpu.go:282] Add success.
W0321 03:44:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:44:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:44:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:44:13.420145  543705 net.go:648] Add success.
I0321 03:44:13.422879  543705 net.go:770] primary dev: ETH0
I0321 03:44:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:44:13.422904  543705 net.go:698] Add success.
I0321 03:44:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:44:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:44:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 03:44:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:44:14.456610  543705 disk_worker.go:494] system disk:vda1
I0321 03:44:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:44:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:44:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:44:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:44:23.409787  543705 memory.go:184] no items to output this cycle
I0321 03:44:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 03:44:27.575536  543705 disk_info.go:125] begin check local disk info of client
I0321 03:44:27.578152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:44:27.578159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004faac0 0xc0004fab00]
E0321 03:44:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:44:33.409780  543705 memory.go:184] no items to output this cycle
I0321 03:44:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 03:44:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:44:43.409784  543705 memory.go:191] Add success.
I0321 03:44:43.409805  543705 cpu.go:282] Add success.
I0321 03:44:43.419969  543705 net.go:648] Add success.
I0321 03:44:43.422616  543705 net.go:770] primary dev: ETH0
I0321 03:44:43.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:44:43.422641  543705 net.go:698] Add success.
I0321 03:44:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:44:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:44:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:44:53.409769  543705 memory.go:184] no items to output this cycle
I0321 03:44:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 03:45:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:45:03.409890  543705 memory.go:184] no items to output this cycle
I0321 03:45:03.409923  543705 cpu.go:275] no items to output this cycle
E0321 03:45:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:45:13.409805  543705 memory.go:191] Add success.
I0321 03:45:13.409817  543705 cpu.go:282] Add success.
W0321 03:45:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:45:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:45:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:45:13.420276  543705 net.go:648] Add success.
I0321 03:45:13.423068  543705 net.go:770] primary dev: ETH0
I0321 03:45:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:45:13.423099  543705 net.go:698] Add success.
I0321 03:45:13.468864  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e4f801d6-390e-47b0-a1f6-7cd25e9454d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:45:13.468905  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:45:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:45:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:45:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 03:45:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:45:14.456615  543705 disk_worker.go:494] system disk:vda1
I0321 03:45:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:45:15.455607  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:45:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:45:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:45:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:45:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:45:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:45:23.409787  543705 memory.go:184] no items to output this cycle
I0321 03:45:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 03:45:27.578538  543705 disk_info.go:125] begin check local disk info of client
I0321 03:45:27.581049  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:45:27.581056  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5080 0xc0000c50c0]
E0321 03:45:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:45:33.409800  543705 memory.go:184] no items to output this cycle
I0321 03:45:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 03:45:38.717286  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:45:38.717293  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:45:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:45:43.409781  543705 memory.go:191] Add success.
I0321 03:45:43.409784  543705 cpu.go:282] Add success.
I0321 03:45:43.419928  543705 net.go:648] Add success.
I0321 03:45:43.420834  543705 net.go:770] primary dev: ETH0
I0321 03:45:43.420848  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:45:43.420860  543705 net.go:698] Add success.
I0321 03:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:45:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:45:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:45:53.409806  543705 memory.go:184] no items to output this cycle
I0321 03:45:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 03:46:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:46:03.409773  543705 memory.go:184] no items to output this cycle
I0321 03:46:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 03:46:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:46:13.409792  543705 memory.go:191] Add success.
I0321 03:46:13.409811  543705 cpu.go:282] Add success.
W0321 03:46:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:46:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:46:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:46:13.420116  543705 net.go:648] Add success.
I0321 03:46:13.422768  543705 net.go:770] primary dev: ETH0
I0321 03:46:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:46:13.422791  543705 net.go:698] Add success.
I0321 03:46:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:46:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:46:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 03:46:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:46:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 03:46:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:46:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:46:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:46:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:46:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:46:23.409809  543705 memory.go:184] no items to output this cycle
I0321 03:46:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 03:46:27.581559  543705 disk_info.go:125] begin check local disk info of client
I0321 03:46:27.584124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:46:27.584132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463980 0xc0004639c0]
E0321 03:46:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:46:33.409798  543705 memory.go:184] no items to output this cycle
I0321 03:46:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 03:46:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:46:43.409812  543705 memory.go:191] Add success.
I0321 03:46:43.409816  543705 cpu.go:282] Add success.
I0321 03:46:43.419981  543705 net.go:648] Add success.
I0321 03:46:43.422942  543705 net.go:770] primary dev: ETH0
I0321 03:46:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:46:43.422972  543705 net.go:698] Add success.
I0321 03:46:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:46:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:46:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:46:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:46:53.409791  543705 memory.go:184] no items to output this cycle
I0321 03:46:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 03:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:47:03.409776  543705 cpu.go:275] no items to output this cycle
I0321 03:47:03.409782  543705 memory.go:184] no items to output this cycle
E0321 03:47:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:47:13.409777  543705 memory.go:191] Add success.
I0321 03:47:13.409782  543705 cpu.go:282] Add success.
W0321 03:47:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:47:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:47:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:47:13.419708  543705 net.go:648] Add success.
I0321 03:47:13.422289  543705 net.go:770] primary dev: ETH0
I0321 03:47:13.422302  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:47:13.422313  543705 net.go:698] Add success.
I0321 03:47:13.452793  543705 event_worker.go:152] Polling the log file for events...
W0321 03:47:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:47:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 03:47:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:47:14.455881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:47:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:47:14.455894  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:47:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 03:47:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:47:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:47:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:47:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:47:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:47:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:47:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:47:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:47:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:47:23.409808  543705 memory.go:184] no items to output this cycle
I0321 03:47:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 03:47:27.584560  543705 disk_info.go:125] begin check local disk info of client
I0321 03:47:27.587164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:47:27.587170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353e80 0xc000353ec0]
E0321 03:47:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:47:33.409771  543705 memory.go:184] no items to output this cycle
I0321 03:47:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 03:47:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:47:43.409810  543705 memory.go:191] Add success.
I0321 03:47:43.409815  543705 cpu.go:282] Add success.
I0321 03:47:43.419862  543705 net.go:648] Add success.
I0321 03:47:43.422448  543705 net.go:770] primary dev: ETH0
I0321 03:47:43.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:47:43.422483  543705 net.go:698] Add success.
I0321 03:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:47:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:47:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:47:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:47:53.409771  543705 cpu.go:275] no items to output this cycle
I0321 03:47:53.409779  543705 memory.go:184] no items to output this cycle
E0321 03:48:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:48:03.409797  543705 memory.go:184] no items to output this cycle
I0321 03:48:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 03:48:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:48:13.409788  543705 memory.go:191] Add success.
I0321 03:48:13.409791  543705 cpu.go:282] Add success.
W0321 03:48:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:48:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:48:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:48:13.420323  543705 net.go:648] Add success.
I0321 03:48:13.422813  543705 net.go:770] primary dev: ETH0
I0321 03:48:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:48:13.422837  543705 net.go:698] Add success.
I0321 03:48:13.469592  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d2a84b86-2761-47f4-8405-9cc63dc7cbe2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:48:13.469623  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:48:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:48:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:48:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 03:48:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:48:14.456682  543705 disk_worker.go:494] system disk:vda1
I0321 03:48:14.456730  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:48:15.455611  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:48:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:48:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:48:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:48:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:48:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:48:23.409819  543705 memory.go:184] no items to output this cycle
I0321 03:48:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 03:48:27.587592  543705 disk_info.go:125] begin check local disk info of client
I0321 03:48:27.590243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:48:27.590251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1040 0xc0003e1080]
E0321 03:48:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:48:33.409789  543705 memory.go:184] no items to output this cycle
I0321 03:48:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 03:48:38.717733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:48:38.717740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:48:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:48:43.409829  543705 cpu.go:282] Add success.
I0321 03:48:43.410805  543705 memory.go:191] Add success.
I0321 03:48:43.420500  543705 net.go:648] Add success.
I0321 03:48:43.423123  543705 net.go:770] primary dev: ETH0
I0321 03:48:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:48:43.423148  543705 net.go:698] Add success.
I0321 03:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:48:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:48:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:48:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:48:53.409800  543705 memory.go:184] no items to output this cycle
I0321 03:48:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 03:49:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:49:03.409814  543705 memory.go:184] no items to output this cycle
I0321 03:49:03.409830  543705 cpu.go:275] no items to output this cycle
E0321 03:49:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:49:13.409820  543705 memory.go:191] Add success.
I0321 03:49:13.409826  543705 cpu.go:282] Add success.
W0321 03:49:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:49:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:49:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:49:13.420302  543705 net.go:648] Add success.
I0321 03:49:13.423249  543705 net.go:770] primary dev: ETH0
I0321 03:49:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:49:13.423273  543705 net.go:698] Add success.
I0321 03:49:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:49:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:49:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 03:49:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:49:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 03:49:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:49:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:49:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:49:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:49:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:49:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:49:23.409790  543705 memory.go:184] no items to output this cycle
I0321 03:49:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 03:49:27.590584  543705 disk_info.go:125] begin check local disk info of client
I0321 03:49:27.593137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:49:27.593144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa440 0xc0001fa480]
E0321 03:49:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:49:33.409801  543705 memory.go:184] no items to output this cycle
I0321 03:49:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 03:49:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:49:43.409810  543705 memory.go:191] Add success.
I0321 03:49:43.409810  543705 cpu.go:282] Add success.
I0321 03:49:43.419902  543705 net.go:648] Add success.
I0321 03:49:43.422807  543705 net.go:770] primary dev: ETH0
I0321 03:49:43.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:49:43.422836  543705 net.go:698] Add success.
I0321 03:49:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:49:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:49:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:49:53.409762  543705 memory.go:184] no items to output this cycle
I0321 03:49:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 03:50:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:50:03.409770  543705 memory.go:184] no items to output this cycle
I0321 03:50:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 03:50:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:50:13.409793  543705 memory.go:191] Add success.
I0321 03:50:13.409793  543705 cpu.go:282] Add success.
W0321 03:50:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:50:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:50:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:50:13.420104  543705 net.go:648] Add success.
I0321 03:50:13.422716  543705 net.go:770] primary dev: ETH0
I0321 03:50:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:50:13.422746  543705 net.go:698] Add success.
I0321 03:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:50:14.455310  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:50:14.455327  543705 disk_worker.go:708] disk space is not compliant
W0321 03:50:14.455331  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:50:14.456994  543705 disk_worker.go:494] system disk:vda1
I0321 03:50:14.457024  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:50:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:50:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:50:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:50:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:50:23.409790  543705 memory.go:184] no items to output this cycle
I0321 03:50:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 03:50:27.593614  543705 disk_info.go:125] begin check local disk info of client
I0321 03:50:27.596278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:50:27.596286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa000 0xc0001fa040]
E0321 03:50:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:50:33.409820  543705 memory.go:184] no items to output this cycle
I0321 03:50:33.409836  543705 cpu.go:275] no items to output this cycle
E0321 03:50:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:50:43.409819  543705 memory.go:191] Add success.
I0321 03:50:43.409826  543705 cpu.go:282] Add success.
I0321 03:50:43.419990  543705 net.go:648] Add success.
I0321 03:50:43.422644  543705 net.go:770] primary dev: ETH0
I0321 03:50:43.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:50:43.422673  543705 net.go:698] Add success.
I0321 03:50:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:50:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:50:53.409767  543705 memory.go:184] no items to output this cycle
I0321 03:50:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 03:51:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:51:03.409772  543705 memory.go:184] no items to output this cycle
I0321 03:51:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 03:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:51:13.409811  543705 memory.go:191] Add success.
I0321 03:51:13.409834  543705 cpu.go:282] Add success.
W0321 03:51:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:51:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:51:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:51:13.420056  543705 net.go:648] Add success.
I0321 03:51:13.422746  543705 net.go:770] primary dev: ETH0
I0321 03:51:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:51:13.422771  543705 net.go:698] Add success.
I0321 03:51:13.463194  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"987dde1e-3415-46c1-8106-b82ac050916d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:51:13.463229  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:51:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:51:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:51:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 03:51:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:51:14.456969  543705 disk_worker.go:494] system disk:vda1
I0321 03:51:14.456997  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:51:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:51:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:51:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:51:16.472496  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:51:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:51:23.409782  543705 memory.go:184] no items to output this cycle
I0321 03:51:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 03:51:27.596626  543705 disk_info.go:125] begin check local disk info of client
I0321 03:51:27.599192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:51:27.599198  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a200 0xc00048a240]
E0321 03:51:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:51:33.409799  543705 memory.go:184] no items to output this cycle
I0321 03:51:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 03:51:38.719285  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:51:38.719292  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:51:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:51:43.410621  543705 memory.go:191] Add success.
I0321 03:51:43.409812  543705 cpu.go:282] Add success.
I0321 03:51:43.420317  543705 net.go:648] Add success.
I0321 03:51:43.422688  543705 net.go:770] primary dev: ETH0
I0321 03:51:43.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:51:43.422714  543705 net.go:698] Add success.
I0321 03:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:51:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:51:53.409779  543705 memory.go:184] no items to output this cycle
I0321 03:51:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 03:52:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:52:03.409773  543705 memory.go:184] no items to output this cycle
I0321 03:52:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 03:52:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:52:13.409778  543705 memory.go:191] Add success.
W0321 03:52:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:52:13.409811  543705 cpu.go:282] Add success.
W0321 03:52:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:52:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:52:13.420064  543705 net.go:648] Add success.
I0321 03:52:13.422696  543705 net.go:770] primary dev: ETH0
I0321 03:52:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:52:13.422721  543705 net.go:698] Add success.
W0321 03:52:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:52:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 03:52:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0321 03:52:14.456092  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:52:14.456101  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:52:14.456108  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:52:14.456418  543705 disk_worker.go:494] system disk:vda1
I0321 03:52:14.456446  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:52:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:52:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:52:16.458036  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:52:16.458047  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:52:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:52:16.458187  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:52:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:52:23.409781  543705 memory.go:184] no items to output this cycle
I0321 03:52:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 03:52:27.599687  543705 disk_info.go:125] begin check local disk info of client
I0321 03:52:27.602344  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:52:27.602352  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a080 0xc00034a0c0]
E0321 03:52:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:52:33.409779  543705 memory.go:184] no items to output this cycle
I0321 03:52:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 03:52:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:52:43.409810  543705 memory.go:191] Add success.
I0321 03:52:43.409816  543705 cpu.go:282] Add success.
I0321 03:52:43.420026  543705 net.go:648] Add success.
I0321 03:52:43.423126  543705 net.go:770] primary dev: ETH0
I0321 03:52:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:52:43.423151  543705 net.go:698] Add success.
I0321 03:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:52:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:52:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:52:53.409765  543705 memory.go:184] no items to output this cycle
I0321 03:52:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 03:53:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:53:03.409803  543705 memory.go:184] no items to output this cycle
I0321 03:53:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 03:53:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:53:13.409786  543705 memory.go:191] Add success.
W0321 03:53:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:53:13.409817  543705 cpu.go:282] Add success.
W0321 03:53:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:53:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:53:13.420126  543705 net.go:648] Add success.
I0321 03:53:13.422626  543705 net.go:770] primary dev: ETH0
I0321 03:53:13.422638  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:53:13.422651  543705 net.go:698] Add success.
I0321 03:53:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:53:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:53:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 03:53:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:53:14.456477  543705 disk_worker.go:494] system disk:vda1
I0321 03:53:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:53:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:53:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:53:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:53:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:53:23.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:53:23.409917  543705 memory.go:184] no items to output this cycle
I0321 03:53:23.409932  543705 cpu.go:275] no items to output this cycle
I0321 03:53:27.602654  543705 disk_info.go:125] begin check local disk info of client
I0321 03:53:27.605272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:53:27.605280  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 03:53:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:53:33.409809  543705 memory.go:184] no items to output this cycle
I0321 03:53:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 03:53:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:53:43.409788  543705 memory.go:191] Add success.
I0321 03:53:43.409805  543705 cpu.go:282] Add success.
I0321 03:53:43.419869  543705 net.go:648] Add success.
I0321 03:53:43.422636  543705 net.go:770] primary dev: ETH0
I0321 03:53:43.422649  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:53:43.422662  543705 net.go:698] Add success.
I0321 03:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:53:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:53:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:53:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:53:53.409769  543705 memory.go:184] no items to output this cycle
I0321 03:53:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 03:54:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:54:03.409803  543705 memory.go:184] no items to output this cycle
I0321 03:54:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 03:54:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:54:13.409809  543705 memory.go:191] Add success.
I0321 03:54:13.409823  543705 cpu.go:282] Add success.
W0321 03:54:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:54:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:54:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:54:13.419944  543705 net.go:770] primary dev: ETH0
I0321 03:54:13.419958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:54:13.419971  543705 net.go:698] Add success.
I0321 03:54:13.420202  543705 net.go:648] Add success.
I0321 03:54:13.469620  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2b245415-e982-44f3-969d-b86e5863941f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:54:13.469668  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 03:54:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:54:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:54:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 03:54:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:54:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 03:54:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:54:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:54:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:54:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:54:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:54:23.409809  543705 memory.go:184] no items to output this cycle
I0321 03:54:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 03:54:27.605685  543705 disk_info.go:125] begin check local disk info of client
I0321 03:54:27.608318  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:54:27.608327  543705 disk_info.go:196] parse disk info done, disk is : [0xc000593a00 0xc000593a40]
E0321 03:54:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:54:33.409799  543705 memory.go:184] no items to output this cycle
I0321 03:54:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 03:54:38.720300  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:54:38.720307  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:54:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:54:43.410688  543705 memory.go:191] Add success.
I0321 03:54:43.409826  543705 cpu.go:282] Add success.
I0321 03:54:43.420415  543705 net.go:648] Add success.
I0321 03:54:43.423125  543705 net.go:770] primary dev: ETH0
I0321 03:54:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:54:43.423151  543705 net.go:698] Add success.
I0321 03:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:54:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:54:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:54:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:54:53.409798  543705 memory.go:184] no items to output this cycle
I0321 03:54:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 03:55:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:55:03.409774  543705 memory.go:184] no items to output this cycle
I0321 03:55:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 03:55:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:55:13.409791  543705 cpu.go:282] Add success.
I0321 03:55:13.409800  543705 memory.go:191] Add success.
W0321 03:55:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:55:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:55:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:55:13.420101  543705 net.go:648] Add success.
I0321 03:55:13.422992  543705 net.go:770] primary dev: ETH0
I0321 03:55:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:55:13.423018  543705 net.go:698] Add success.
I0321 03:55:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:55:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:55:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 03:55:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:55:14.456498  543705 disk_worker.go:494] system disk:vda1
I0321 03:55:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:55:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:55:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:55:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:55:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:55:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:55:23.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:55:23.409929  543705 memory.go:184] no items to output this cycle
I0321 03:55:23.409963  543705 cpu.go:275] no items to output this cycle
I0321 03:55:27.608684  543705 disk_info.go:125] begin check local disk info of client
I0321 03:55:27.611289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:55:27.611295  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 03:55:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:55:33.409805  543705 memory.go:184] no items to output this cycle
I0321 03:55:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 03:55:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:55:43.409780  543705 memory.go:191] Add success.
I0321 03:55:43.409801  543705 cpu.go:282] Add success.
I0321 03:55:43.419959  543705 net.go:648] Add success.
I0321 03:55:43.422390  543705 net.go:770] primary dev: ETH0
I0321 03:55:43.422403  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:55:43.422415  543705 net.go:698] Add success.
I0321 03:55:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:55:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:55:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:55:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:55:53.409779  543705 memory.go:184] no items to output this cycle
I0321 03:55:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 03:56:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:56:03.409784  543705 memory.go:184] no items to output this cycle
I0321 03:56:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 03:56:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:56:13.409777  543705 memory.go:191] Add success.
W0321 03:56:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:56:13.409802  543705 cpu.go:282] Add success.
W0321 03:56:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:56:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:56:13.420223  543705 net.go:648] Add success.
I0321 03:56:13.422855  543705 net.go:770] primary dev: ETH0
I0321 03:56:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:56:13.422880  543705 net.go:698] Add success.
I0321 03:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:56:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:56:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 03:56:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:56:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 03:56:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:56:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:56:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:56:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:56:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:56:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:56:23.409807  543705 memory.go:184] no items to output this cycle
I0321 03:56:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 03:56:27.611728  543705 disk_info.go:125] begin check local disk info of client
I0321 03:56:27.614379  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:56:27.614388  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521800 0xc000521840]
E0321 03:56:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:56:33.409802  543705 memory.go:184] no items to output this cycle
I0321 03:56:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 03:56:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:56:43.409810  543705 memory.go:191] Add success.
I0321 03:56:43.409814  543705 cpu.go:282] Add success.
I0321 03:56:43.419895  543705 net.go:648] Add success.
I0321 03:56:43.422256  543705 net.go:770] primary dev: ETH0
I0321 03:56:43.422270  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:56:43.422285  543705 net.go:698] Add success.
I0321 03:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:56:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:56:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:56:53.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:56:53.410426  543705 memory.go:184] no items to output this cycle
I0321 03:56:53.410440  543705 cpu.go:275] no items to output this cycle
E0321 03:57:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:57:03.409799  543705 memory.go:184] no items to output this cycle
I0321 03:57:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 03:57:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:57:13.409778  543705 memory.go:191] Add success.
W0321 03:57:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 03:57:13.409811  543705 cpu.go:282] Add success.
W0321 03:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:57:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:57:13.426208  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 03:57:13.426287  543705 net.go:770] primary dev: ETH0
I0321 03:57:13.426301  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:57:13.426313  543705 net.go:698] Add success.
I0321 03:57:13.426538  543705 net.go:648] Add success.
I0321 03:57:13.453068  543705 event_worker.go:152] Polling the log file for events...
I0321 03:57:13.468801  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6f5a1f4d-24ac-4ec6-ac89-ed41dea2e1f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 03:57:13.468839  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 03:57:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:57:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 03:57:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:57:14.456814  543705 disk_worker.go:494] system disk:vda1
E0321 03:57:14.456830  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 03:57:14.456837  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 03:57:14.456842  543705 custom_config.go:64] query custom config with name: gpu
I0321 03:57:14.456868  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 03:57:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 03:57:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:57:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 03:57:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 03:57:16.458018  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:57:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:57:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:57:23.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:57:23.409897  543705 memory.go:184] no items to output this cycle
I0321 03:57:23.409976  543705 cpu.go:275] no items to output this cycle
I0321 03:57:27.614721  543705 disk_info.go:125] begin check local disk info of client
I0321 03:57:27.617267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:57:27.617274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0321 03:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:57:33.409800  543705 memory.go:184] no items to output this cycle
I0321 03:57:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 03:57:38.721312  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 03:57:38.721318  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 03:57:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:57:43.410646  543705 memory.go:191] Add success.
I0321 03:57:43.409791  543705 cpu.go:282] Add success.
I0321 03:57:43.420439  543705 net.go:648] Add success.
I0321 03:57:43.423202  543705 net.go:770] primary dev: ETH0
I0321 03:57:43.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:57:43.423227  543705 net.go:698] Add success.
I0321 03:57:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:57:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:57:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:57:53.409779  543705 memory.go:184] no items to output this cycle
I0321 03:57:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 03:58:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:58:03.409768  543705 memory.go:184] no items to output this cycle
I0321 03:58:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 03:58:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:58:13.409779  543705 memory.go:191] Add success.
I0321 03:58:13.409804  543705 cpu.go:282] Add success.
W0321 03:58:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:58:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:58:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:58:13.420059  543705 net.go:648] Add success.
I0321 03:58:13.422799  543705 net.go:770] primary dev: ETH0
I0321 03:58:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:58:13.422822  543705 net.go:698] Add success.
I0321 03:58:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:58:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:58:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 03:58:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:58:14.456572  543705 disk_worker.go:494] system disk:vda1
I0321 03:58:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:58:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:58:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:58:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:58:16.472109  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:58:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:58:23.409792  543705 memory.go:184] no items to output this cycle
I0321 03:58:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 03:58:27.617687  543705 disk_info.go:125] begin check local disk info of client
I0321 03:58:27.620239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:58:27.620261  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 03:58:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:58:33.409775  543705 memory.go:184] no items to output this cycle
I0321 03:58:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 03:58:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:58:43.409807  543705 memory.go:191] Add success.
I0321 03:58:43.409816  543705 cpu.go:282] Add success.
I0321 03:58:43.419877  543705 net.go:648] Add success.
I0321 03:58:43.422483  543705 net.go:770] primary dev: ETH0
I0321 03:58:43.422495  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:58:43.422508  543705 net.go:698] Add success.
I0321 03:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:58:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:58:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:58:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:58:53.409762  543705 memory.go:184] no items to output this cycle
I0321 03:58:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 03:59:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:59:03.409781  543705 memory.go:184] no items to output this cycle
I0321 03:59:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 03:59:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:59:13.409813  543705 memory.go:191] Add success.
I0321 03:59:13.409820  543705 cpu.go:282] Add success.
W0321 03:59:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 03:59:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 03:59:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 03:59:13.420147  543705 net.go:648] Add success.
I0321 03:59:13.422602  543705 net.go:770] primary dev: ETH0
I0321 03:59:13.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:59:13.422631  543705 net.go:698] Add success.
I0321 03:59:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 03:59:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 03:59:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 03:59:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 03:59:14.456511  543705 disk_worker.go:494] system disk:vda1
I0321 03:59:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 03:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 03:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:59:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:59:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 03:59:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0321 03:59:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:59:23.409791  543705 memory.go:184] no items to output this cycle
I0321 03:59:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 03:59:27.620747  543705 disk_info.go:125] begin check local disk info of client
I0321 03:59:27.623459  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 03:59:27.623466  543705 disk_info.go:196] parse disk info done, disk is : [0xc000297a80 0xc000297ac0]
E0321 03:59:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:59:33.409774  543705 memory.go:184] no items to output this cycle
I0321 03:59:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 03:59:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:59:43.409786  543705 memory.go:191] Add success.
I0321 03:59:43.409790  543705 cpu.go:282] Add success.
I0321 03:59:43.420013  543705 net.go:648] Add success.
I0321 03:59:43.423220  543705 net.go:770] primary dev: ETH0
I0321 03:59:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0321 03:59:43.423245  543705 net.go:698] Add success.
I0321 03:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 03:59:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 03:59:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 03:59:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 03:59:53.409770  543705 memory.go:184] no items to output this cycle
I0321 03:59:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 04:00:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:00:03.409774  543705 memory.go:184] no items to output this cycle
I0321 04:00:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 04:00:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:00:13.409813  543705 memory.go:191] Add success.
I0321 04:00:13.409818  543705 cpu.go:282] Add success.
W0321 04:00:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:00:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:00:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:00:13.420138  543705 net.go:648] Add success.
I0321 04:00:13.422887  543705 net.go:770] primary dev: ETH0
I0321 04:00:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:00:13.422915  543705 net.go:698] Add success.
I0321 04:00:13.906982  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"22c8d952-7c6d-411e-a82e-00f976a0ee6b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:00:13.907017  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:00:14.454728  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:00:14.454877  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:00:14.454958  543705 disk_worker.go:708] disk space is not compliant
W0321 04:00:14.454962  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:00:14.456449  543705 disk_worker.go:494] system disk:vda1
I0321 04:00:14.456479  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:00:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:00:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:00:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:00:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:00:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:00:23.409796  543705 memory.go:184] no items to output this cycle
I0321 04:00:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 04:00:27.623795  543705 disk_info.go:125] begin check local disk info of client
I0321 04:00:27.626430  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:00:27.626437  543705 disk_info.go:196] parse disk info done, disk is : [0xc000323200 0xc000323240]
E0321 04:00:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:00:33.409772  543705 memory.go:184] no items to output this cycle
I0321 04:00:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 04:00:38.721740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:00:38.721747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:00:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:00:43.410598  543705 memory.go:191] Add success.
I0321 04:00:43.409791  543705 cpu.go:282] Add success.
I0321 04:00:43.420317  543705 net.go:648] Add success.
I0321 04:00:43.423007  543705 net.go:770] primary dev: ETH0
I0321 04:00:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:00:43.423033  543705 net.go:698] Add success.
I0321 04:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:00:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:00:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:00:53.409780  543705 memory.go:184] no items to output this cycle
I0321 04:00:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 04:01:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:01:03.409784  543705 memory.go:184] no items to output this cycle
I0321 04:01:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 04:01:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:01:13.409810  543705 memory.go:191] Add success.
I0321 04:01:13.409821  543705 cpu.go:282] Add success.
W0321 04:01:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:01:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:01:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:01:13.420118  543705 net.go:648] Add success.
I0321 04:01:13.422655  543705 net.go:770] primary dev: ETH0
I0321 04:01:13.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:01:13.422680  543705 net.go:698] Add success.
I0321 04:01:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:01:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:01:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 04:01:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:01:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 04:01:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:01:15.455918  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:01:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:01:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:01:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:01:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:01:23.409808  543705 memory.go:184] no items to output this cycle
I0321 04:01:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 04:01:27.626768  543705 disk_info.go:125] begin check local disk info of client
I0321 04:01:27.629417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:01:27.629424  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba280 0xc0002ba2c0]
E0321 04:01:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:01:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 04:01:33.409795  543705 memory.go:184] no items to output this cycle
E0321 04:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:01:43.409789  543705 memory.go:191] Add success.
I0321 04:01:43.409790  543705 cpu.go:282] Add success.
I0321 04:01:43.419898  543705 net.go:648] Add success.
I0321 04:01:43.422529  543705 net.go:770] primary dev: ETH0
I0321 04:01:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:01:43.422553  543705 net.go:698] Add success.
I0321 04:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:01:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:01:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:01:53.409801  543705 memory.go:184] no items to output this cycle
I0321 04:01:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 04:02:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:02:03.409805  543705 memory.go:184] no items to output this cycle
I0321 04:02:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 04:02:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:02:13.409789  543705 memory.go:191] Add success.
I0321 04:02:13.409790  543705 cpu.go:282] Add success.
W0321 04:02:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:02:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:02:13.420067  543705 net.go:648] Add success.
I0321 04:02:13.422785  543705 net.go:770] primary dev: ETH0
I0321 04:02:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:02:13.422810  543705 net.go:698] Add success.
W0321 04:02:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:02:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 04:02:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:02:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:02:14.455900  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:02:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:02:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 04:02:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:02:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:02:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:02:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:02:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:02:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:02:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:02:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:02:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:02:23.409781  543705 memory.go:184] no items to output this cycle
I0321 04:02:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 04:02:27.629678  543705 disk_info.go:125] begin check local disk info of client
I0321 04:02:27.632098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:02:27.632118  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499ac0 0xc000499b00]
E0321 04:02:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:02:33.409778  543705 memory.go:184] no items to output this cycle
I0321 04:02:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 04:02:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:02:43.409815  543705 memory.go:191] Add success.
I0321 04:02:43.409827  543705 cpu.go:282] Add success.
I0321 04:02:43.419994  543705 net.go:648] Add success.
I0321 04:02:43.422794  543705 net.go:770] primary dev: ETH0
I0321 04:02:43.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:02:43.422820  543705 net.go:698] Add success.
I0321 04:02:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:02:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:02:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:02:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:02:53.409774  543705 memory.go:184] no items to output this cycle
I0321 04:02:53.409774  543705 cpu.go:275] no items to output this cycle
E0321 04:03:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:03:03.409780  543705 memory.go:184] no items to output this cycle
I0321 04:03:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 04:03:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:03:13.409814  543705 memory.go:191] Add success.
I0321 04:03:13.409819  543705 cpu.go:282] Add success.
W0321 04:03:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:03:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:03:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:03:13.420186  543705 net.go:648] Add success.
I0321 04:03:13.422931  543705 net.go:770] primary dev: ETH0
I0321 04:03:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:03:13.422958  543705 net.go:698] Add success.
I0321 04:03:13.463885  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"47c6114a-ca82-4989-8d4f-637d588b051a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:03:13.463927  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:03:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:03:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:03:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 04:03:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:03:14.456693  543705 disk_worker.go:494] system disk:vda1
I0321 04:03:14.456725  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:03:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:03:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:03:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:03:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:03:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:03:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:03:23.409787  543705 memory.go:184] no items to output this cycle
I0321 04:03:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 04:03:27.632802  543705 disk_info.go:125] begin check local disk info of client
I0321 04:03:27.635363  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:03:27.635369  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498fc0 0xc000499000]
E0321 04:03:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:03:33.409797  543705 memory.go:184] no items to output this cycle
I0321 04:03:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 04:03:38.723317  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:03:38.723323  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:03:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:03:43.410646  543705 memory.go:191] Add success.
I0321 04:03:43.409784  543705 cpu.go:282] Add success.
I0321 04:03:43.420195  543705 net.go:770] primary dev: ETH0
I0321 04:03:43.420208  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:03:43.420221  543705 net.go:698] Add success.
I0321 04:03:43.420456  543705 net.go:648] Add success.
I0321 04:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:03:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:03:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:03:53.409771  543705 memory.go:184] no items to output this cycle
I0321 04:03:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 04:04:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:04:03.409779  543705 memory.go:184] no items to output this cycle
I0321 04:04:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 04:04:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:04:13.409803  543705 cpu.go:282] Add success.
I0321 04:04:13.409811  543705 memory.go:191] Add success.
W0321 04:04:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:04:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:04:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:04:13.420545  543705 net.go:648] Add success.
I0321 04:04:13.423161  543705 net.go:770] primary dev: ETH0
I0321 04:04:13.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:04:13.423186  543705 net.go:698] Add success.
I0321 04:04:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:04:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:04:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 04:04:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:04:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 04:04:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:04:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:04:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:04:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:04:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:04:23.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:04:23.409824  543705 memory.go:184] no items to output this cycle
I0321 04:04:23.409831  543705 cpu.go:275] no items to output this cycle
I0321 04:04:27.635836  543705 disk_info.go:125] begin check local disk info of client
I0321 04:04:27.638244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:04:27.638251  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d100 0xc00035d140]
E0321 04:04:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:04:33.409801  543705 memory.go:184] no items to output this cycle
I0321 04:04:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 04:04:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:04:43.409823  543705 memory.go:191] Add success.
I0321 04:04:43.409827  543705 cpu.go:282] Add success.
I0321 04:04:43.419972  543705 net.go:648] Add success.
I0321 04:04:43.422665  543705 net.go:770] primary dev: ETH0
I0321 04:04:43.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:04:43.422691  543705 net.go:698] Add success.
I0321 04:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:04:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:04:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:04:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:04:53.409773  543705 memory.go:184] no items to output this cycle
I0321 04:04:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 04:05:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:05:03.409781  543705 memory.go:184] no items to output this cycle
I0321 04:05:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 04:05:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:05:13.409784  543705 memory.go:191] Add success.
W0321 04:05:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 04:05:13.409812  543705 cpu.go:282] Add success.
W0321 04:05:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:05:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:05:13.420034  543705 net.go:648] Add success.
I0321 04:05:13.423122  543705 net.go:770] primary dev: ETH0
I0321 04:05:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:05:13.423146  543705 net.go:698] Add success.
I0321 04:05:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:05:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:05:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 04:05:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:05:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 04:05:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:05:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:05:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:05:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:05:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:05:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:05:23.409791  543705 memory.go:184] no items to output this cycle
I0321 04:05:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 04:05:27.638833  543705 disk_info.go:125] begin check local disk info of client
I0321 04:05:27.641414  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:05:27.641420  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391380 0xc0003913c0]
E0321 04:05:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:05:33.409788  543705 memory.go:184] no items to output this cycle
I0321 04:05:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 04:05:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:05:43.409817  543705 memory.go:191] Add success.
I0321 04:05:43.409826  543705 cpu.go:282] Add success.
I0321 04:05:43.419899  543705 net.go:648] Add success.
I0321 04:05:43.422652  543705 net.go:770] primary dev: ETH0
I0321 04:05:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:05:43.422677  543705 net.go:698] Add success.
I0321 04:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:05:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:05:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:05:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:05:53.409783  543705 memory.go:184] no items to output this cycle
I0321 04:05:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 04:06:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:06:03.409783  543705 memory.go:184] no items to output this cycle
I0321 04:06:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 04:06:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:06:13.409818  543705 memory.go:191] Add success.
I0321 04:06:13.409830  543705 cpu.go:282] Add success.
W0321 04:06:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:06:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:06:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:06:13.420388  543705 net.go:648] Add success.
I0321 04:06:13.422843  543705 net.go:770] primary dev: ETH0
I0321 04:06:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:06:13.422869  543705 net.go:698] Add success.
I0321 04:06:13.468630  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"47586ca1-6b15-4ad9-8736-a30742312a6d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:06:13.468663  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:06:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:06:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:06:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 04:06:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:06:14.456682  543705 disk_worker.go:494] system disk:vda1
I0321 04:06:14.456734  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:06:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:06:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:06:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:06:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:06:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:06:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:06:23.409822  543705 memory.go:184] no items to output this cycle
I0321 04:06:23.409833  543705 cpu.go:275] no items to output this cycle
I0321 04:06:27.641673  543705 disk_info.go:125] begin check local disk info of client
I0321 04:06:27.644232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:06:27.644238  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b940 0xc00007b980]
E0321 04:06:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:06:33.409817  543705 memory.go:184] no items to output this cycle
I0321 04:06:33.409833  543705 cpu.go:275] no items to output this cycle
I0321 04:06:38.723466  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:06:38.723473  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:06:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:06:43.410514  543705 memory.go:191] Add success.
I0321 04:06:43.409827  543705 cpu.go:282] Add success.
I0321 04:06:43.420206  543705 net.go:648] Add success.
I0321 04:06:43.422800  543705 net.go:770] primary dev: ETH0
I0321 04:06:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:06:43.422827  543705 net.go:698] Add success.
I0321 04:06:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:06:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:06:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:06:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:06:53.409804  543705 memory.go:184] no items to output this cycle
I0321 04:06:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 04:07:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:07:03.409812  543705 memory.go:184] no items to output this cycle
I0321 04:07:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 04:07:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:07:13.409783  543705 memory.go:191] Add success.
W0321 04:07:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 04:07:13.409821  543705 cpu.go:282] Add success.
W0321 04:07:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:07:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:07:13.420066  543705 net.go:648] Add success.
I0321 04:07:13.422759  543705 net.go:770] primary dev: ETH0
I0321 04:07:13.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:07:13.422788  543705 net.go:698] Add success.
I0321 04:07:13.453389  543705 event_worker.go:152] Polling the log file for events...
W0321 04:07:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:07:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 04:07:14.455157  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:07:14.456126  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:07:14.456135  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:07:14.456142  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:07:14.456487  543705 disk_worker.go:494] system disk:vda1
I0321 04:07:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:07:15.456892  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:07:15.456900  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:07:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:07:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:07:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:07:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:07:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:07:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:07:23.409827  543705 memory.go:184] no items to output this cycle
I0321 04:07:23.409834  543705 cpu.go:275] no items to output this cycle
I0321 04:07:27.644869  543705 disk_info.go:125] begin check local disk info of client
I0321 04:07:27.647513  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:07:27.647519  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa080 0xc0001aa0c0]
E0321 04:07:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:07:33.409782  543705 memory.go:184] no items to output this cycle
I0321 04:07:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 04:07:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:07:43.409827  543705 memory.go:191] Add success.
I0321 04:07:43.409831  543705 cpu.go:282] Add success.
I0321 04:07:43.419999  543705 net.go:648] Add success.
I0321 04:07:43.422319  543705 net.go:770] primary dev: ETH0
I0321 04:07:43.422332  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:07:43.422344  543705 net.go:698] Add success.
I0321 04:07:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:07:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:07:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:07:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:07:53.409780  543705 memory.go:184] no items to output this cycle
I0321 04:07:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 04:08:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:08:03.409779  543705 memory.go:184] no items to output this cycle
I0321 04:08:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 04:08:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:08:13.409812  543705 memory.go:191] Add success.
I0321 04:08:13.409820  543705 cpu.go:282] Add success.
W0321 04:08:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:08:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:08:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:08:13.420075  543705 net.go:648] Add success.
I0321 04:08:13.423251  543705 net.go:770] primary dev: ETH0
I0321 04:08:13.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:08:13.423276  543705 net.go:698] Add success.
I0321 04:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:08:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:08:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 04:08:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:08:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 04:08:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:08:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:08:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:08:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:08:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:08:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:08:23.409914  543705 cpu.go:275] no items to output this cycle
I0321 04:08:23.409920  543705 memory.go:184] no items to output this cycle
I0321 04:08:27.647880  543705 disk_info.go:125] begin check local disk info of client
I0321 04:08:27.650504  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:08:27.650510  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 04:08:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:08:33.409785  543705 memory.go:184] no items to output this cycle
I0321 04:08:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 04:08:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:08:43.409815  543705 memory.go:191] Add success.
I0321 04:08:43.409823  543705 cpu.go:282] Add success.
I0321 04:08:43.419980  543705 net.go:648] Add success.
I0321 04:08:43.422643  543705 net.go:770] primary dev: ETH0
I0321 04:08:43.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:08:43.422668  543705 net.go:698] Add success.
I0321 04:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:08:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:08:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:08:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:08:53.409798  543705 memory.go:184] no items to output this cycle
I0321 04:08:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 04:09:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:09:03.409785  543705 memory.go:184] no items to output this cycle
I0321 04:09:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 04:09:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:09:13.409793  543705 memory.go:191] Add success.
I0321 04:09:13.409794  543705 cpu.go:282] Add success.
W0321 04:09:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:09:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:09:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:09:13.420084  543705 net.go:648] Add success.
I0321 04:09:13.422600  543705 net.go:770] primary dev: ETH0
I0321 04:09:13.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:09:13.422630  543705 net.go:698] Add success.
I0321 04:09:13.468938  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8f2367b3-0e9d-4eee-bb0e-1f52859be378","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:09:13.468972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:09:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:09:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:09:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0321 04:09:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:09:14.456735  543705 disk_worker.go:494] system disk:vda1
I0321 04:09:14.456771  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:09:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:09:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:09:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:09:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:09:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:09:23.409912  543705 memory.go:184] no items to output this cycle
I0321 04:09:23.409913  543705 cpu.go:275] no items to output this cycle
I0321 04:09:27.650894  543705 disk_info.go:125] begin check local disk info of client
I0321 04:09:27.653452  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:09:27.653459  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c6c0 0xc00039c700]
E0321 04:09:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:09:33.409813  543705 memory.go:184] no items to output this cycle
I0321 04:09:33.409825  543705 cpu.go:275] no items to output this cycle
I0321 04:09:38.724324  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:09:38.724330  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:09:43.410935  543705 memory.go:191] Add success.
I0321 04:09:43.409797  543705 cpu.go:282] Add success.
I0321 04:09:43.420651  543705 net.go:648] Add success.
I0321 04:09:43.423583  543705 net.go:770] primary dev: ETH0
I0321 04:09:43.423601  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:09:43.423616  543705 net.go:698] Add success.
I0321 04:09:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:09:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:09:53.409812  543705 memory.go:184] no items to output this cycle
I0321 04:09:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 04:10:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:10:03.409784  543705 memory.go:184] no items to output this cycle
I0321 04:10:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 04:10:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:10:13.409816  543705 memory.go:191] Add success.
I0321 04:10:13.409826  543705 cpu.go:282] Add success.
W0321 04:10:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:10:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:10:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:10:13.420121  543705 net.go:648] Add success.
I0321 04:10:13.422753  543705 net.go:770] primary dev: ETH0
I0321 04:10:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:10:13.422780  543705 net.go:698] Add success.
I0321 04:10:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:10:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:10:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 04:10:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:10:14.456579  543705 disk_worker.go:494] system disk:vda1
I0321 04:10:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:10:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:10:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:10:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:10:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:10:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:10:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:10:23.409793  543705 memory.go:184] no items to output this cycle
I0321 04:10:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 04:10:27.653669  543705 disk_info.go:125] begin check local disk info of client
I0321 04:10:27.656230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:10:27.656236  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048bb40 0xc00048bb80]
E0321 04:10:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:10:33.409815  543705 memory.go:184] no items to output this cycle
I0321 04:10:33.409824  543705 cpu.go:275] no items to output this cycle
E0321 04:10:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:10:43.410002  543705 memory.go:191] Add success.
I0321 04:10:43.410092  543705 cpu.go:282] Add success.
I0321 04:10:43.419722  543705 net.go:648] Add success.
I0321 04:10:43.422550  543705 net.go:770] primary dev: ETH0
I0321 04:10:43.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:10:43.422580  543705 net.go:698] Add success.
I0321 04:10:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:10:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:10:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:10:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:10:53.409777  543705 memory.go:184] no items to output this cycle
I0321 04:10:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:11:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:11:03.409795  543705 memory.go:184] no items to output this cycle
I0321 04:11:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 04:11:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:11:13.409786  543705 memory.go:191] Add success.
I0321 04:11:13.409793  543705 cpu.go:282] Add success.
W0321 04:11:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:11:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:11:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:11:13.420251  543705 net.go:648] Add success.
I0321 04:11:13.423247  543705 net.go:770] primary dev: ETH0
I0321 04:11:13.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:11:13.423270  543705 net.go:698] Add success.
I0321 04:11:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:11:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:11:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 04:11:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:11:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 04:11:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:11:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:11:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:11:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:11:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:11:23.410294  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:11:23.410314  543705 memory.go:184] no items to output this cycle
I0321 04:11:23.410326  543705 cpu.go:275] no items to output this cycle
I0321 04:11:27.656936  543705 disk_info.go:125] begin check local disk info of client
I0321 04:11:27.659613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:11:27.659619  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bd200 0xc0004bd240]
E0321 04:11:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:11:33.409808  543705 memory.go:184] no items to output this cycle
I0321 04:11:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 04:11:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:11:43.409833  543705 memory.go:191] Add success.
I0321 04:11:43.409842  543705 cpu.go:282] Add success.
I0321 04:11:43.420216  543705 net.go:648] Add success.
I0321 04:11:43.422620  543705 net.go:770] primary dev: ETH0
I0321 04:11:43.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:11:43.422644  543705 net.go:698] Add success.
I0321 04:11:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:11:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:11:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:11:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:11:53.410280  543705 memory.go:184] no items to output this cycle
I0321 04:11:53.410286  543705 cpu.go:275] no items to output this cycle
E0321 04:12:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:12:03.409812  543705 memory.go:184] no items to output this cycle
I0321 04:12:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 04:12:13.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:12:13.410293  543705 memory.go:191] Add success.
I0321 04:12:13.410296  543705 cpu.go:282] Add success.
W0321 04:12:13.410321  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:12:13.410333  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:12:13.410336  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:12:13.420711  543705 net.go:648] Add success.
I0321 04:12:13.423597  543705 net.go:770] primary dev: ETH0
I0321 04:12:13.423611  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:12:13.423624  543705 net.go:698] Add success.
I0321 04:12:13.463728  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"270798a4-1a24-429a-92d6-ea9eaeb6ec25","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:12:13.463761  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 04:12:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:12:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 04:12:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:12:14.456199  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:12:14.456208  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:12:14.456214  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:12:14.456467  543705 disk_worker.go:494] system disk:vda1
I0321 04:12:14.456497  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:12:15.456777  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:12:15.456785  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:12:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:12:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:12:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:12:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:12:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:12:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:12:23.409777  543705 memory.go:184] no items to output this cycle
I0321 04:12:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 04:12:27.659940  543705 disk_info.go:125] begin check local disk info of client
I0321 04:12:27.662593  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:12:27.662599  543705 disk_info.go:196] parse disk info done, disk is : [0xc000547380 0xc0005473c0]
E0321 04:12:33.410265  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:12:33.410283  543705 memory.go:184] no items to output this cycle
I0321 04:12:33.410299  543705 cpu.go:275] no items to output this cycle
I0321 04:12:38.725317  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:12:38.725324  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:12:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:12:43.410622  543705 memory.go:191] Add success.
I0321 04:12:43.409791  543705 cpu.go:282] Add success.
I0321 04:12:43.419763  543705 net.go:648] Add success.
I0321 04:12:43.422579  543705 net.go:770] primary dev: ETH0
I0321 04:12:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:12:43.422603  543705 net.go:698] Add success.
I0321 04:12:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:12:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:12:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:12:53.409781  543705 memory.go:184] no items to output this cycle
I0321 04:12:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 04:13:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:13:03.409774  543705 memory.go:184] no items to output this cycle
I0321 04:13:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 04:13:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:13:13.409785  543705 memory.go:191] Add success.
I0321 04:13:13.409801  543705 cpu.go:282] Add success.
W0321 04:13:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:13:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:13:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:13:13.420069  543705 net.go:648] Add success.
I0321 04:13:13.422968  543705 net.go:770] primary dev: ETH0
I0321 04:13:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:13:13.423004  543705 net.go:698] Add success.
I0321 04:13:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:13:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:13:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 04:13:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:13:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 04:13:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:13:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:13:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:13:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:13:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:13:23.410221  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:13:23.410239  543705 memory.go:184] no items to output this cycle
I0321 04:13:23.410256  543705 cpu.go:275] no items to output this cycle
I0321 04:13:27.662953  543705 disk_info.go:125] begin check local disk info of client
I0321 04:13:27.665543  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:13:27.665550  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e6f80 0xc0000e6fc0]
E0321 04:13:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:13:33.409775  543705 memory.go:184] no items to output this cycle
I0321 04:13:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 04:13:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:13:43.409812  543705 memory.go:191] Add success.
I0321 04:13:43.409819  543705 cpu.go:282] Add success.
I0321 04:13:43.419716  543705 net.go:648] Add success.
I0321 04:13:43.422461  543705 net.go:770] primary dev: ETH0
I0321 04:13:43.422473  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:13:43.422485  543705 net.go:698] Add success.
I0321 04:13:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:13:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:13:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:13:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:13:53.409770  543705 memory.go:184] no items to output this cycle
I0321 04:13:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 04:14:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:14:03.409810  543705 memory.go:184] no items to output this cycle
I0321 04:14:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 04:14:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:14:13.409782  543705 memory.go:191] Add success.
I0321 04:14:13.409804  543705 cpu.go:282] Add success.
W0321 04:14:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:14:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:14:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:14:13.420066  543705 net.go:648] Add success.
I0321 04:14:13.422736  543705 net.go:770] primary dev: ETH0
I0321 04:14:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:14:13.422764  543705 net.go:698] Add success.
I0321 04:14:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:14:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:14:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 04:14:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:14:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 04:14:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:14:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:14:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:14:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:14:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:14:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:14:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:14:23.409783  543705 memory.go:184] no items to output this cycle
I0321 04:14:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 04:14:27.665676  543705 disk_info.go:125] begin check local disk info of client
I0321 04:14:27.668249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:14:27.668254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002924c0 0xc000292500]
E0321 04:14:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:14:33.409773  543705 memory.go:184] no items to output this cycle
I0321 04:14:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 04:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:14:43.409806  543705 memory.go:191] Add success.
I0321 04:14:43.409815  543705 cpu.go:282] Add success.
I0321 04:14:43.419729  543705 net.go:648] Add success.
I0321 04:14:43.422258  543705 net.go:770] primary dev: ETH0
I0321 04:14:43.422273  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:14:43.422285  543705 net.go:698] Add success.
I0321 04:14:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:14:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:14:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:14:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:14:53.409780  543705 memory.go:184] no items to output this cycle
I0321 04:14:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 04:15:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:15:03.409801  543705 memory.go:184] no items to output this cycle
I0321 04:15:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 04:15:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:15:13.409777  543705 memory.go:191] Add success.
W0321 04:15:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 04:15:13.409808  543705 cpu.go:282] Add success.
W0321 04:15:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:15:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:15:13.420338  543705 net.go:770] primary dev: ETH0
I0321 04:15:13.420353  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:15:13.420366  543705 net.go:698] Add success.
I0321 04:15:13.420728  543705 net.go:648] Add success.
I0321 04:15:13.469305  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"69e79c18-fb8c-4b77-9e78-33ee0321443c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:15:13.469338  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:15:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:15:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:15:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 04:15:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:15:14.456515  543705 disk_worker.go:494] system disk:vda1
I0321 04:15:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:15:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:15:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:15:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:15:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:15:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:15:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:15:23.409778  543705 memory.go:184] no items to output this cycle
I0321 04:15:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 04:15:27.668990  543705 disk_info.go:125] begin check local disk info of client
I0321 04:15:27.671647  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:15:27.671654  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8c00 0xc0003b8c40]
E0321 04:15:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:15:33.409782  543705 memory.go:184] no items to output this cycle
I0321 04:15:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 04:15:38.725742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:15:38.725748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:15:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:15:43.410666  543705 memory.go:191] Add success.
I0321 04:15:43.409820  543705 cpu.go:282] Add success.
I0321 04:15:43.420578  543705 net.go:648] Add success.
I0321 04:15:43.423280  543705 net.go:770] primary dev: ETH0
I0321 04:15:43.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:15:43.423305  543705 net.go:698] Add success.
I0321 04:15:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:15:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:15:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:15:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:15:53.409789  543705 memory.go:184] no items to output this cycle
I0321 04:15:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 04:16:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:16:03.409781  543705 memory.go:184] no items to output this cycle
I0321 04:16:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 04:16:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:16:13.409812  543705 memory.go:191] Add success.
I0321 04:16:13.409820  543705 cpu.go:282] Add success.
W0321 04:16:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:16:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:16:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:16:13.420227  543705 net.go:648] Add success.
I0321 04:16:13.422880  543705 net.go:770] primary dev: ETH0
I0321 04:16:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:16:13.422905  543705 net.go:698] Add success.
I0321 04:16:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:16:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:16:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 04:16:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:16:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 04:16:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:16:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:16:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:16:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:16:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:16:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:16:23.409791  543705 memory.go:184] no items to output this cycle
I0321 04:16:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 04:16:27.671992  543705 disk_info.go:125] begin check local disk info of client
I0321 04:16:27.674563  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:16:27.674569  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a40 0xc0000c5a80]
E0321 04:16:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:16:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 04:16:33.409798  543705 memory.go:184] no items to output this cycle
E0321 04:16:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:16:43.409786  543705 memory.go:191] Add success.
I0321 04:16:43.409802  543705 cpu.go:282] Add success.
I0321 04:16:43.420179  543705 net.go:648] Add success.
I0321 04:16:43.422712  543705 net.go:770] primary dev: ETH0
I0321 04:16:43.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:16:43.422738  543705 net.go:698] Add success.
I0321 04:16:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:16:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:16:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:16:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:16:53.409789  543705 memory.go:184] no items to output this cycle
I0321 04:16:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 04:17:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:17:03.409763  543705 memory.go:184] no items to output this cycle
I0321 04:17:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 04:17:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:17:13.409794  543705 memory.go:191] Add success.
W0321 04:17:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:17:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:17:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:17:13.409848  543705 cpu.go:282] Add success.
I0321 04:17:13.420157  543705 net.go:648] Add success.
I0321 04:17:13.422822  543705 net.go:770] primary dev: ETH0
I0321 04:17:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:17:13.422847  543705 net.go:698] Add success.
I0321 04:17:13.453399  543705 event_worker.go:152] Polling the log file for events...
W0321 04:17:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:17:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 04:17:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:17:14.456814  543705 disk_worker.go:494] system disk:vda1
I0321 04:17:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:17:14.457145  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:17:14.457153  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:17:14.457158  543705 custom_config.go:64] query custom config with name: gpu
E0321 04:17:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:17:15.456872  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:17:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:17:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:17:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:17:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:17:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:17:23.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:17:23.410249  543705 memory.go:184] no items to output this cycle
I0321 04:17:23.410248  543705 cpu.go:275] no items to output this cycle
I0321 04:17:27.675025  543705 disk_info.go:125] begin check local disk info of client
I0321 04:17:27.677598  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:17:27.677604  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393d00 0xc000393d40]
E0321 04:17:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:17:33.409801  543705 memory.go:184] no items to output this cycle
I0321 04:17:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 04:17:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:17:43.409910  543705 memory.go:191] Add success.
I0321 04:17:43.409924  543705 cpu.go:282] Add success.
I0321 04:17:43.419757  543705 net.go:648] Add success.
I0321 04:17:43.422441  543705 net.go:770] primary dev: ETH0
I0321 04:17:43.422455  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:17:43.422469  543705 net.go:698] Add success.
I0321 04:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:17:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:17:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:17:53.409804  543705 memory.go:184] no items to output this cycle
I0321 04:17:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 04:18:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:18:03.409765  543705 memory.go:184] no items to output this cycle
I0321 04:18:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 04:18:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:18:13.409807  543705 memory.go:191] Add success.
I0321 04:18:13.409816  543705 cpu.go:282] Add success.
W0321 04:18:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:18:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:18:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:18:13.420198  543705 net.go:648] Add success.
I0321 04:18:13.422855  543705 net.go:770] primary dev: ETH0
I0321 04:18:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:18:13.422883  543705 net.go:698] Add success.
I0321 04:18:13.463303  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"25b23a1b-822d-43e1-aafb-6d39519c9496","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:18:13.463337  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:18:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:18:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:18:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 04:18:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:18:14.456486  543705 disk_worker.go:494] system disk:vda1
I0321 04:18:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:18:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:18:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:18:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:18:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:18:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:18:23.409801  543705 memory.go:184] no items to output this cycle
I0321 04:18:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 04:18:27.677675  543705 disk_info.go:125] begin check local disk info of client
I0321 04:18:27.680264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:18:27.680273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bbd00 0xc0002bbd40]
E0321 04:18:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:18:33.409817  543705 memory.go:184] no items to output this cycle
I0321 04:18:33.409826  543705 cpu.go:275] no items to output this cycle
I0321 04:18:38.727332  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:18:38.727339  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:18:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:18:43.410797  543705 memory.go:191] Add success.
I0321 04:18:43.409901  543705 cpu.go:282] Add success.
I0321 04:18:43.419727  543705 net.go:648] Add success.
I0321 04:18:43.422193  543705 net.go:770] primary dev: ETH0
I0321 04:18:43.422206  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:18:43.422218  543705 net.go:698] Add success.
I0321 04:18:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:18:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:18:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:18:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:18:53.409794  543705 memory.go:184] no items to output this cycle
I0321 04:18:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 04:19:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:19:03.409797  543705 memory.go:184] no items to output this cycle
I0321 04:19:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 04:19:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:19:13.409803  543705 memory.go:191] Add success.
I0321 04:19:13.409805  543705 cpu.go:282] Add success.
W0321 04:19:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:19:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:19:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:19:13.420231  543705 net.go:648] Add success.
I0321 04:19:13.422896  543705 net.go:770] primary dev: ETH0
I0321 04:19:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:19:13.422919  543705 net.go:698] Add success.
I0321 04:19:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:19:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:19:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 04:19:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:19:14.456566  543705 disk_worker.go:494] system disk:vda1
I0321 04:19:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:19:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:19:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:19:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:19:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:19:23.409799  543705 memory.go:184] no items to output this cycle
I0321 04:19:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 04:19:27.681042  543705 disk_info.go:125] begin check local disk info of client
I0321 04:19:27.683718  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:19:27.683724  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bfdc0 0xc0003bfe00]
E0321 04:19:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:19:33.409780  543705 memory.go:184] no items to output this cycle
I0321 04:19:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 04:19:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:19:43.409830  543705 memory.go:191] Add success.
I0321 04:19:43.409838  543705 cpu.go:282] Add success.
I0321 04:19:43.420353  543705 net.go:648] Add success.
I0321 04:19:43.423127  543705 net.go:770] primary dev: ETH0
I0321 04:19:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:19:43.423151  543705 net.go:698] Add success.
I0321 04:19:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:19:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:19:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:19:53.410234  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:19:53.410257  543705 memory.go:184] no items to output this cycle
I0321 04:19:53.410270  543705 cpu.go:275] no items to output this cycle
E0321 04:20:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:20:03.409780  543705 memory.go:184] no items to output this cycle
I0321 04:20:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 04:20:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:20:13.409847  543705 memory.go:191] Add success.
I0321 04:20:13.409848  543705 cpu.go:282] Add success.
W0321 04:20:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:20:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:20:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:20:13.420266  543705 net.go:648] Add success.
I0321 04:20:13.422769  543705 net.go:770] primary dev: ETH0
I0321 04:20:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:20:13.422793  543705 net.go:698] Add success.
I0321 04:20:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:20:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:20:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 04:20:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:20:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 04:20:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:20:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:20:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:20:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:20:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:20:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:20:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:20:23.409800  543705 memory.go:184] no items to output this cycle
I0321 04:20:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 04:20:27.685062  543705 disk_info.go:125] begin check local disk info of client
I0321 04:20:27.687581  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:20:27.687587  543705 disk_info.go:196] parse disk info done, disk is : [0xc000392a40 0xc000392a80]
E0321 04:20:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:20:33.409800  543705 memory.go:184] no items to output this cycle
I0321 04:20:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 04:20:43.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:20:43.409929  543705 memory.go:191] Add success.
I0321 04:20:43.409930  543705 cpu.go:282] Add success.
I0321 04:20:43.419727  543705 net.go:648] Add success.
I0321 04:20:43.422707  543705 net.go:770] primary dev: ETH0
I0321 04:20:43.422720  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:20:43.422732  543705 net.go:698] Add success.
I0321 04:20:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:20:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:20:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:20:53.409798  543705 memory.go:184] no items to output this cycle
I0321 04:20:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 04:21:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:21:03.409778  543705 memory.go:184] no items to output this cycle
I0321 04:21:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 04:21:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:21:13.409797  543705 memory.go:191] Add success.
I0321 04:21:13.409818  543705 cpu.go:282] Add success.
W0321 04:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:21:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:21:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:21:13.420102  543705 net.go:648] Add success.
I0321 04:21:13.422731  543705 net.go:770] primary dev: ETH0
I0321 04:21:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:21:13.422756  543705 net.go:698] Add success.
I0321 04:21:13.469417  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d08daa44-817f-426f-8526-43f0394f36f0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:21:13.469450  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:21:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:21:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:21:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 04:21:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:21:14.456453  543705 disk_worker.go:494] system disk:vda1
I0321 04:21:14.456498  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:21:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:21:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:21:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:21:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:21:23.409812  543705 memory.go:184] no items to output this cycle
I0321 04:21:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 04:21:27.688070  543705 disk_info.go:125] begin check local disk info of client
I0321 04:21:27.690755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:21:27.690761  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dfdc0 0xc0003dfe00]
I0321 04:21:33.409881  543705 cpu.go:275] no items to output this cycle
E0321 04:21:33.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:21:33.409901  543705 memory.go:184] no items to output this cycle
I0321 04:21:38.728338  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:21:38.728344  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:21:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:21:43.410848  543705 memory.go:191] Add success.
I0321 04:21:43.409842  543705 cpu.go:282] Add success.
I0321 04:21:43.420518  543705 net.go:648] Add success.
I0321 04:21:43.423460  543705 net.go:770] primary dev: ETH0
I0321 04:21:43.423477  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:21:43.423489  543705 net.go:698] Add success.
I0321 04:21:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:21:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:21:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:21:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:21:53.409802  543705 memory.go:184] no items to output this cycle
I0321 04:21:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 04:22:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:22:03.409809  543705 memory.go:184] no items to output this cycle
I0321 04:22:03.409822  543705 cpu.go:275] no items to output this cycle
E0321 04:22:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:22:13.409800  543705 cpu.go:282] Add success.
I0321 04:22:13.409808  543705 memory.go:191] Add success.
W0321 04:22:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:22:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:22:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:22:13.420072  543705 net.go:648] Add success.
I0321 04:22:13.422849  543705 net.go:770] primary dev: ETH0
I0321 04:22:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:22:13.422874  543705 net.go:698] Add success.
W0321 04:22:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:22:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 04:22:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:22:14.455881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:22:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:22:14.455896  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:22:14.456541  543705 disk_worker.go:494] system disk:vda1
I0321 04:22:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:22:15.456846  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:22:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:22:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:22:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:22:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:22:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:22:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:22:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:22:23.409799  543705 memory.go:184] no items to output this cycle
I0321 04:22:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 04:22:27.692101  543705 disk_info.go:125] begin check local disk info of client
I0321 04:22:27.694663  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:22:27.694669  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0321 04:22:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:22:33.409823  543705 memory.go:184] no items to output this cycle
I0321 04:22:33.409831  543705 cpu.go:275] no items to output this cycle
E0321 04:22:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:22:43.409794  543705 memory.go:191] Add success.
I0321 04:22:43.409806  543705 cpu.go:282] Add success.
I0321 04:22:43.419982  543705 net.go:648] Add success.
I0321 04:22:43.422707  543705 net.go:770] primary dev: ETH0
I0321 04:22:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:22:43.422732  543705 net.go:698] Add success.
I0321 04:22:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:22:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:22:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:22:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:22:53.409796  543705 memory.go:184] no items to output this cycle
I0321 04:22:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:23:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:23:03.409778  543705 memory.go:184] no items to output this cycle
I0321 04:23:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 04:23:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:23:13.409792  543705 memory.go:191] Add success.
I0321 04:23:13.409812  543705 cpu.go:282] Add success.
W0321 04:23:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:23:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:23:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:23:13.420071  543705 net.go:648] Add success.
I0321 04:23:13.422650  543705 net.go:770] primary dev: ETH0
I0321 04:23:13.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:23:13.422679  543705 net.go:698] Add success.
I0321 04:23:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:23:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:23:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 04:23:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:23:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 04:23:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:23:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:23:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:23:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:23:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:23:23.409799  543705 memory.go:184] no items to output this cycle
I0321 04:23:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 04:23:27.696104  543705 disk_info.go:125] begin check local disk info of client
I0321 04:23:27.698658  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:23:27.698665  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bebc0 0xc0002bec00]
E0321 04:23:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:23:33.409908  543705 memory.go:184] no items to output this cycle
I0321 04:23:33.409952  543705 cpu.go:275] no items to output this cycle
E0321 04:23:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:23:43.409802  543705 memory.go:191] Add success.
I0321 04:23:43.409808  543705 cpu.go:282] Add success.
I0321 04:23:43.419892  543705 net.go:648] Add success.
I0321 04:23:43.422486  543705 net.go:770] primary dev: ETH0
I0321 04:23:43.422499  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:23:43.422512  543705 net.go:698] Add success.
I0321 04:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:23:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:23:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:23:53.409762  543705 memory.go:184] no items to output this cycle
I0321 04:23:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 04:24:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:24:03.409816  543705 memory.go:184] no items to output this cycle
I0321 04:24:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 04:24:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:24:13.409784  543705 memory.go:191] Add success.
W0321 04:24:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 04:24:13.409821  543705 cpu.go:282] Add success.
W0321 04:24:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:24:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:24:13.420200  543705 net.go:648] Add success.
I0321 04:24:13.423047  543705 net.go:770] primary dev: ETH0
I0321 04:24:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:24:13.423076  543705 net.go:698] Add success.
I0321 04:24:13.464179  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a5c5821c-3ac9-4e3a-bfbb-f524080de16a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:24:13.464213  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:24:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:24:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:24:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 04:24:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:24:14.456613  543705 disk_worker.go:494] system disk:vda1
I0321 04:24:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:24:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:24:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:24:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:24:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:24:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:24:23.409804  543705 memory.go:184] no items to output this cycle
I0321 04:24:23.409830  543705 cpu.go:275] no items to output this cycle
I0321 04:24:27.700139  543705 disk_info.go:125] begin check local disk info of client
I0321 04:24:27.702730  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:24:27.702737  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e2000 0xc0001e2040]
E0321 04:24:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:24:33.409793  543705 memory.go:184] no items to output this cycle
I0321 04:24:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 04:24:38.729339  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:24:38.729345  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:24:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:24:43.410585  543705 memory.go:191] Add success.
I0321 04:24:43.409788  543705 cpu.go:282] Add success.
I0321 04:24:43.420289  543705 net.go:648] Add success.
I0321 04:24:43.422969  543705 net.go:770] primary dev: ETH0
I0321 04:24:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:24:43.422995  543705 net.go:698] Add success.
I0321 04:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:24:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:24:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:24:53.409780  543705 memory.go:184] no items to output this cycle
I0321 04:24:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 04:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:25:03.409800  543705 memory.go:184] no items to output this cycle
I0321 04:25:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 04:25:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:25:13.409781  543705 memory.go:191] Add success.
I0321 04:25:13.409800  543705 cpu.go:282] Add success.
W0321 04:25:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:25:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:25:13.420136  543705 net.go:648] Add success.
I0321 04:25:13.422758  543705 net.go:770] primary dev: ETH0
I0321 04:25:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:25:13.422786  543705 net.go:698] Add success.
I0321 04:25:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:25:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:25:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 04:25:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:25:14.456544  543705 disk_worker.go:494] system disk:vda1
I0321 04:25:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:25:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:25:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:25:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:25:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:25:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:25:23.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:25:23.409826  543705 memory.go:184] no items to output this cycle
I0321 04:25:23.409833  543705 cpu.go:275] no items to output this cycle
I0321 04:25:27.704148  543705 disk_info.go:125] begin check local disk info of client
I0321 04:25:27.706700  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:25:27.706706  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327340 0xc000327380]
E0321 04:25:33.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:25:33.409915  543705 memory.go:184] no items to output this cycle
I0321 04:25:33.410043  543705 cpu.go:275] no items to output this cycle
E0321 04:25:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:25:43.409795  543705 memory.go:191] Add success.
I0321 04:25:43.409809  543705 cpu.go:282] Add success.
I0321 04:25:43.419908  543705 net.go:648] Add success.
I0321 04:25:43.422777  543705 net.go:770] primary dev: ETH0
I0321 04:25:43.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:25:43.422806  543705 net.go:698] Add success.
I0321 04:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:25:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:25:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:25:53.409784  543705 memory.go:184] no items to output this cycle
I0321 04:25:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 04:26:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:26:03.409774  543705 memory.go:184] no items to output this cycle
I0321 04:26:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 04:26:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:26:13.409811  543705 memory.go:191] Add success.
I0321 04:26:13.409821  543705 cpu.go:282] Add success.
W0321 04:26:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:26:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:26:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:26:13.420188  543705 net.go:648] Add success.
I0321 04:26:13.423590  543705 net.go:770] primary dev: ETH0
I0321 04:26:13.423603  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:26:13.423616  543705 net.go:698] Add success.
I0321 04:26:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:26:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:26:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 04:26:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:26:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 04:26:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:26:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:26:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:26:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:26:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:26:23.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:26:23.410272  543705 memory.go:184] no items to output this cycle
I0321 04:26:23.410338  543705 cpu.go:275] no items to output this cycle
I0321 04:26:27.708175  543705 disk_info.go:125] begin check local disk info of client
I0321 04:26:27.710828  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:26:27.710836  543705 disk_info.go:196] parse disk info done, disk is : [0xc000258000 0xc000258040]
E0321 04:26:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:26:33.409870  543705 memory.go:184] no items to output this cycle
I0321 04:26:33.409968  543705 cpu.go:275] no items to output this cycle
E0321 04:26:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:26:43.409807  543705 memory.go:191] Add success.
I0321 04:26:43.409810  543705 cpu.go:282] Add success.
I0321 04:26:43.420060  543705 net.go:648] Add success.
I0321 04:26:43.422601  543705 net.go:770] primary dev: ETH0
I0321 04:26:43.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:26:43.422625  543705 net.go:698] Add success.
I0321 04:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:26:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:26:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:26:53.410383  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:26:53.410398  543705 memory.go:184] no items to output this cycle
I0321 04:26:53.410422  543705 cpu.go:275] no items to output this cycle
E0321 04:27:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:27:03.409776  543705 memory.go:184] no items to output this cycle
I0321 04:27:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 04:27:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:27:13.409788  543705 memory.go:191] Add success.
I0321 04:27:13.409791  543705 cpu.go:282] Add success.
W0321 04:27:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:27:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:27:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:27:13.420203  543705 net.go:648] Add success.
I0321 04:27:13.422787  543705 net.go:770] primary dev: ETH0
I0321 04:27:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:27:13.422813  543705 net.go:698] Add success.
I0321 04:27:13.429071  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 04:27:13.453246  543705 event_worker.go:152] Polling the log file for events...
I0321 04:27:13.468695  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"939ed479-12f2-4447-9068-e3ff64528ad7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:27:13.468729  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 04:27:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:27:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 04:27:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:27:14.455859  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:27:14.455868  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:27:14.455873  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:27:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 04:27:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:27:15.456963  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:27:15.456971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:27:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:27:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:27:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:27:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:27:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:27:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:27:23.409779  543705 memory.go:184] no items to output this cycle
I0321 04:27:23.409977  543705 cpu.go:275] no items to output this cycle
I0321 04:27:27.712198  543705 disk_info.go:125] begin check local disk info of client
I0321 04:27:27.714761  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:27:27.714769  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d4100 0xc0004d4140]
E0321 04:27:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:27:33.409775  543705 memory.go:184] no items to output this cycle
I0321 04:27:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 04:27:38.729739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:27:38.729746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:27:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:27:43.410636  543705 memory.go:191] Add success.
I0321 04:27:43.409824  543705 cpu.go:282] Add success.
I0321 04:27:43.420163  543705 net.go:770] primary dev: ETH0
I0321 04:27:43.420176  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:27:43.420188  543705 net.go:698] Add success.
I0321 04:27:43.420529  543705 net.go:648] Add success.
I0321 04:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:27:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:27:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:27:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:27:53.409775  543705 memory.go:184] no items to output this cycle
I0321 04:27:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 04:28:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:28:03.409784  543705 memory.go:184] no items to output this cycle
I0321 04:28:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 04:28:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:28:13.409808  543705 memory.go:191] Add success.
I0321 04:28:13.409820  543705 cpu.go:282] Add success.
W0321 04:28:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:28:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:28:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:28:13.420314  543705 net.go:648] Add success.
I0321 04:28:13.423465  543705 net.go:770] primary dev: ETH0
I0321 04:28:13.423483  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:28:13.423498  543705 net.go:698] Add success.
I0321 04:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:28:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:28:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 04:28:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:28:14.456504  543705 disk_worker.go:494] system disk:vda1
I0321 04:28:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:28:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:28:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:28:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:28:23.409815  543705 memory.go:184] no items to output this cycle
I0321 04:28:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 04:28:27.716216  543705 disk_info.go:125] begin check local disk info of client
I0321 04:28:27.718751  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:28:27.718758  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba00 0xc00007ba40]
I0321 04:28:33.409923  543705 cpu.go:275] no items to output this cycle
E0321 04:28:33.409930  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:28:33.409992  543705 memory.go:184] no items to output this cycle
E0321 04:28:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:28:43.409799  543705 memory.go:191] Add success.
I0321 04:28:43.409798  543705 cpu.go:282] Add success.
I0321 04:28:43.419993  543705 net.go:648] Add success.
I0321 04:28:43.422831  543705 net.go:770] primary dev: ETH0
I0321 04:28:43.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:28:43.422861  543705 net.go:698] Add success.
I0321 04:28:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:28:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:28:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:28:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:28:53.409799  543705 memory.go:184] no items to output this cycle
I0321 04:28:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 04:29:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:29:03.409776  543705 memory.go:184] no items to output this cycle
I0321 04:29:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:29:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:29:13.409782  543705 memory.go:191] Add success.
I0321 04:29:13.409804  543705 cpu.go:282] Add success.
W0321 04:29:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:29:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:29:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:29:13.420142  543705 net.go:648] Add success.
I0321 04:29:13.422792  543705 net.go:770] primary dev: ETH0
I0321 04:29:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:29:13.422820  543705 net.go:698] Add success.
I0321 04:29:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:29:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:29:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 04:29:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:29:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 04:29:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:29:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:29:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:29:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:29:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:29:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:29:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:29:23.409804  543705 memory.go:184] no items to output this cycle
I0321 04:29:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 04:29:27.720239  543705 disk_info.go:125] begin check local disk info of client
I0321 04:29:27.722815  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:29:27.722822  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493640 0xc000493680]
E0321 04:29:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:29:33.409800  543705 memory.go:184] no items to output this cycle
I0321 04:29:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 04:29:43.409953  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:29:43.409986  543705 cpu.go:282] Add success.
I0321 04:29:43.410069  543705 memory.go:191] Add success.
I0321 04:29:43.419709  543705 net.go:648] Add success.
I0321 04:29:43.422398  543705 net.go:770] primary dev: ETH0
I0321 04:29:43.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:29:43.422423  543705 net.go:698] Add success.
I0321 04:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:29:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:29:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:29:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:29:53.409784  543705 memory.go:184] no items to output this cycle
I0321 04:29:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 04:30:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:30:03.409772  543705 memory.go:184] no items to output this cycle
I0321 04:30:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 04:30:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:30:13.409781  543705 memory.go:191] Add success.
W0321 04:30:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:30:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:30:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:30:13.409827  543705 cpu.go:282] Add success.
I0321 04:30:13.420083  543705 net.go:648] Add success.
I0321 04:30:13.422949  543705 net.go:770] primary dev: ETH0
I0321 04:30:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:30:13.422974  543705 net.go:698] Add success.
I0321 04:30:13.468638  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c857555b-7330-42c3-9944-e02e882f1b08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:30:13.468672  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:30:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:30:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:30:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 04:30:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:30:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 04:30:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:30:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:30:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:30:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:30:16.472401  543705 disk_local_worker.go:436] Get disk info: []
I0321 04:30:23.409794  543705 cpu.go:275] no items to output this cycle
E0321 04:30:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:30:23.409816  543705 memory.go:184] no items to output this cycle
I0321 04:30:27.724258  543705 disk_info.go:125] begin check local disk info of client
I0321 04:30:27.726847  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:30:27.726853  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493100 0xc000493140]
E0321 04:30:33.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:30:33.409886  543705 memory.go:184] no items to output this cycle
I0321 04:30:33.409961  543705 cpu.go:275] no items to output this cycle
I0321 04:30:38.731355  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:30:38.731361  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:30:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:30:43.410569  543705 memory.go:191] Add success.
I0321 04:30:43.409830  543705 cpu.go:282] Add success.
I0321 04:30:43.420349  543705 net.go:648] Add success.
I0321 04:30:43.422866  543705 net.go:770] primary dev: ETH0
I0321 04:30:43.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:30:43.422892  543705 net.go:698] Add success.
I0321 04:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:30:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:30:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:30:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:30:53.409777  543705 memory.go:184] no items to output this cycle
I0321 04:30:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 04:31:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:31:03.409774  543705 memory.go:184] no items to output this cycle
I0321 04:31:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 04:31:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:31:13.409805  543705 memory.go:191] Add success.
I0321 04:31:13.409823  543705 cpu.go:282] Add success.
W0321 04:31:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:31:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:31:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:31:13.420117  543705 net.go:770] primary dev: ETH0
I0321 04:31:13.420130  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:31:13.420142  543705 net.go:698] Add success.
I0321 04:31:13.420488  543705 net.go:648] Add success.
I0321 04:31:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:31:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:31:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 04:31:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:31:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 04:31:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:31:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:31:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:31:23.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:31:23.409885  543705 memory.go:184] no items to output this cycle
I0321 04:31:23.409985  543705 cpu.go:275] no items to output this cycle
I0321 04:31:27.728283  543705 disk_info.go:125] begin check local disk info of client
I0321 04:31:27.730901  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:31:27.730909  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053a900 0xc00053a940]
E0321 04:31:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:31:33.409785  543705 memory.go:184] no items to output this cycle
I0321 04:31:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 04:31:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:31:43.409797  543705 cpu.go:282] Add success.
I0321 04:31:43.409806  543705 memory.go:191] Add success.
I0321 04:31:43.420010  543705 net.go:648] Add success.
I0321 04:31:43.422572  543705 net.go:770] primary dev: ETH0
I0321 04:31:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:31:43.422602  543705 net.go:698] Add success.
I0321 04:31:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:31:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:31:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:31:53.409781  543705 memory.go:184] no items to output this cycle
I0321 04:31:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 04:32:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:32:03.409802  543705 memory.go:184] no items to output this cycle
I0321 04:32:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 04:32:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:32:13.409792  543705 memory.go:191] Add success.
I0321 04:32:13.409799  543705 cpu.go:282] Add success.
W0321 04:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:32:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:32:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:32:13.420141  543705 net.go:648] Add success.
I0321 04:32:13.423344  543705 net.go:770] primary dev: ETH0
I0321 04:32:13.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:32:13.423370  543705 net.go:698] Add success.
W0321 04:32:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:32:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 04:32:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:32:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:32:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:32:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:32:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 04:32:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:32:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:32:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:32:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:32:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:32:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:32:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:32:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:32:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:32:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 04:32:23.409788  543705 memory.go:184] no items to output this cycle
I0321 04:32:27.732307  543705 disk_info.go:125] begin check local disk info of client
I0321 04:32:27.734872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:32:27.734879  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2480 0xc0002b24c0]
E0321 04:32:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:32:33.409780  543705 memory.go:184] no items to output this cycle
I0321 04:32:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 04:32:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:32:43.409794  543705 memory.go:191] Add success.
I0321 04:32:43.409798  543705 cpu.go:282] Add success.
I0321 04:32:43.419877  543705 net.go:648] Add success.
I0321 04:32:43.422409  543705 net.go:770] primary dev: ETH0
I0321 04:32:43.422424  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:32:43.422439  543705 net.go:698] Add success.
I0321 04:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:32:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:32:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:32:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:32:53.409792  543705 memory.go:184] no items to output this cycle
I0321 04:32:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 04:33:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:33:03.409801  543705 memory.go:184] no items to output this cycle
I0321 04:33:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 04:33:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:33:13.409782  543705 memory.go:191] Add success.
I0321 04:33:13.409805  543705 cpu.go:282] Add success.
W0321 04:33:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:33:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:33:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:33:13.420152  543705 net.go:648] Add success.
I0321 04:33:13.423098  543705 net.go:770] primary dev: ETH0
I0321 04:33:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:33:13.423134  543705 net.go:698] Add success.
I0321 04:33:13.463765  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f1325af1-aa65-4015-9138-af3e88a4de73","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:33:13.463796  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:33:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:33:14.455374  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:33:14.455389  543705 disk_worker.go:708] disk space is not compliant
W0321 04:33:14.455392  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:33:14.457556  543705 disk_worker.go:494] system disk:vda1
I0321 04:33:14.457584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:33:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:33:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:33:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:33:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:33:23.409782  543705 memory.go:184] no items to output this cycle
I0321 04:33:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 04:33:27.736323  543705 disk_info.go:125] begin check local disk info of client
I0321 04:33:27.738949  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:33:27.738956  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306540 0xc000306580]
E0321 04:33:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:33:33.409780  543705 memory.go:184] no items to output this cycle
I0321 04:33:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 04:33:38.732372  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:33:38.732379  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:33:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:33:43.410570  543705 memory.go:191] Add success.
I0321 04:33:43.409848  543705 cpu.go:282] Add success.
I0321 04:33:43.420348  543705 net.go:648] Add success.
I0321 04:33:43.422871  543705 net.go:770] primary dev: ETH0
I0321 04:33:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:33:43.422901  543705 net.go:698] Add success.
I0321 04:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:33:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:33:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:33:53.410225  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:33:53.410247  543705 memory.go:184] no items to output this cycle
I0321 04:33:53.410259  543705 cpu.go:275] no items to output this cycle
E0321 04:34:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:34:03.409777  543705 memory.go:184] no items to output this cycle
I0321 04:34:03.409800  543705 cpu.go:275] no items to output this cycle
W0321 04:34:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:34:13.409728  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:34:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 04:34:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:34:13.409827  543705 cpu.go:282] Add success.
I0321 04:34:13.409847  543705 memory.go:191] Add success.
I0321 04:34:13.420005  543705 net.go:648] Add success.
I0321 04:34:13.422966  543705 net.go:770] primary dev: ETH0
I0321 04:34:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:34:13.422990  543705 net.go:698] Add success.
I0321 04:34:14.453952  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:34:14.455235  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:34:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0321 04:34:14.455248  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:34:14.456619  543705 disk_worker.go:494] system disk:vda1
I0321 04:34:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:34:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:34:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:34:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:34:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:34:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:34:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:34:23.409802  543705 memory.go:184] no items to output this cycle
I0321 04:34:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 04:34:27.740350  543705 disk_info.go:125] begin check local disk info of client
I0321 04:34:27.742972  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:34:27.742979  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465080 0xc0004650c0]
E0321 04:34:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:34:33.409786  543705 memory.go:184] no items to output this cycle
I0321 04:34:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 04:34:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:34:43.409816  543705 memory.go:191] Add success.
I0321 04:34:43.409822  543705 cpu.go:282] Add success.
I0321 04:34:43.419957  543705 net.go:648] Add success.
I0321 04:34:43.422738  543705 net.go:770] primary dev: ETH0
I0321 04:34:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:34:43.422768  543705 net.go:698] Add success.
I0321 04:34:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:34:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:34:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:34:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:34:53.409802  543705 memory.go:184] no items to output this cycle
I0321 04:34:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 04:35:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:35:03.409776  543705 memory.go:184] no items to output this cycle
I0321 04:35:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:35:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:35:13.409794  543705 memory.go:191] Add success.
I0321 04:35:13.409797  543705 cpu.go:282] Add success.
W0321 04:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:35:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:35:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:35:13.420202  543705 net.go:648] Add success.
I0321 04:35:13.422788  543705 net.go:770] primary dev: ETH0
I0321 04:35:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:35:13.422812  543705 net.go:698] Add success.
I0321 04:35:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:35:14.455084  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:35:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 04:35:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:35:14.456541  543705 disk_worker.go:494] system disk:vda1
I0321 04:35:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:35:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:35:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:35:23.409801  543705 memory.go:184] no items to output this cycle
I0321 04:35:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 04:35:27.744361  543705 disk_info.go:125] begin check local disk info of client
I0321 04:35:27.746913  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:35:27.746919  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465bc0 0xc000465c00]
E0321 04:35:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:35:33.409802  543705 memory.go:184] no items to output this cycle
I0321 04:35:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 04:35:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:35:43.409828  543705 memory.go:191] Add success.
I0321 04:35:43.409841  543705 cpu.go:282] Add success.
I0321 04:35:43.420382  543705 net.go:648] Add success.
I0321 04:35:43.423181  543705 net.go:770] primary dev: ETH0
I0321 04:35:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:35:43.423220  543705 net.go:698] Add success.
I0321 04:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:35:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:35:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:35:53.410544  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:35:53.410558  543705 memory.go:184] no items to output this cycle
I0321 04:35:53.410579  543705 cpu.go:275] no items to output this cycle
E0321 04:36:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:36:03.409801  543705 memory.go:184] no items to output this cycle
I0321 04:36:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 04:36:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:36:13.409881  543705 memory.go:191] Add success.
W0321 04:36:13.409920  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:36:13.409933  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:36:13.409936  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:36:13.409944  543705 cpu.go:282] Add success.
I0321 04:36:13.419730  543705 net.go:648] Add success.
I0321 04:36:13.422191  543705 net.go:770] primary dev: ETH0
I0321 04:36:13.422207  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:36:13.422220  543705 net.go:698] Add success.
I0321 04:36:13.468452  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"885c4c71-793c-4f16-981b-071407c0a655","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:36:13.468483  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:36:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:36:14.455085  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:36:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0321 04:36:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:36:14.456464  543705 disk_worker.go:494] system disk:vda1
I0321 04:36:14.456506  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:36:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:36:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:36:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:36:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:36:23.409764  543705 memory.go:184] no items to output this cycle
I0321 04:36:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 04:36:27.748384  543705 disk_info.go:125] begin check local disk info of client
I0321 04:36:27.750965  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:36:27.750972  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004659c0 0xc000465a00]
E0321 04:36:33.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:36:33.409834  543705 memory.go:184] no items to output this cycle
I0321 04:36:33.409847  543705 cpu.go:275] no items to output this cycle
I0321 04:36:38.733360  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:36:38.733366  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:36:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:36:43.410585  543705 memory.go:191] Add success.
I0321 04:36:43.409799  543705 cpu.go:282] Add success.
I0321 04:36:43.420287  543705 net.go:648] Add success.
I0321 04:36:43.422994  543705 net.go:770] primary dev: ETH0
I0321 04:36:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:36:43.423020  543705 net.go:698] Add success.
I0321 04:36:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:36:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:36:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:36:53.410363  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:36:53.410379  543705 memory.go:184] no items to output this cycle
I0321 04:36:53.410409  543705 cpu.go:275] no items to output this cycle
E0321 04:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:37:03.409781  543705 memory.go:184] no items to output this cycle
I0321 04:37:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 04:37:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:37:13.409771  543705 memory.go:191] Add success.
I0321 04:37:13.409790  543705 cpu.go:282] Add success.
W0321 04:37:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:37:13.412502  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:37:13.412507  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:37:13.419723  543705 net.go:648] Add success.
I0321 04:37:13.421472  543705 net.go:770] primary dev: ETH0
I0321 04:37:13.421486  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:37:13.421500  543705 net.go:698] Add success.
I0321 04:37:13.453069  543705 event_worker.go:152] Polling the log file for events...
W0321 04:37:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:37:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 04:37:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:37:14.456741  543705 disk_worker.go:494] system disk:vda1
I0321 04:37:14.456779  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:37:14.457110  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:37:14.457118  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:37:14.457122  543705 custom_config.go:64] query custom config with name: gpu
E0321 04:37:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:37:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:37:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:37:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:37:16.457955  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:37:16.457974  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:37:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:37:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:37:23.409798  543705 memory.go:184] no items to output this cycle
I0321 04:37:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 04:37:27.752399  543705 disk_info.go:125] begin check local disk info of client
I0321 04:37:27.755011  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:37:27.755018  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002756c0 0xc000275700]
E0321 04:37:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:37:33.409826  543705 memory.go:184] no items to output this cycle
I0321 04:37:33.409840  543705 cpu.go:275] no items to output this cycle
E0321 04:37:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:37:43.409779  543705 memory.go:191] Add success.
I0321 04:37:43.409804  543705 cpu.go:282] Add success.
I0321 04:37:43.419896  543705 net.go:648] Add success.
I0321 04:37:43.422634  543705 net.go:770] primary dev: ETH0
I0321 04:37:43.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:37:43.422660  543705 net.go:698] Add success.
I0321 04:37:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:37:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:37:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:37:53.410211  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:37:53.410226  543705 memory.go:184] no items to output this cycle
I0321 04:37:53.410231  543705 cpu.go:275] no items to output this cycle
E0321 04:38:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:38:03.409798  543705 memory.go:184] no items to output this cycle
I0321 04:38:03.409813  543705 cpu.go:275] no items to output this cycle
W0321 04:38:13.409703  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:38:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:38:13.409724  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 04:38:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:38:13.409818  543705 memory.go:191] Add success.
I0321 04:38:13.409823  543705 cpu.go:282] Add success.
I0321 04:38:13.420288  543705 net.go:648] Add success.
I0321 04:38:13.423148  543705 net.go:770] primary dev: ETH0
I0321 04:38:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:38:13.423173  543705 net.go:698] Add success.
I0321 04:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:38:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:38:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 04:38:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:38:14.456764  543705 disk_worker.go:494] system disk:vda1
I0321 04:38:14.456792  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:38:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:38:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:38:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:38:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:38:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:38:23.409779  543705 memory.go:184] no items to output this cycle
I0321 04:38:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 04:38:27.755104  543705 disk_info.go:125] begin check local disk info of client
I0321 04:38:27.757687  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:38:27.757693  543705 disk_info.go:196] parse disk info done, disk is : [0xc000321580 0xc0003215c0]
E0321 04:38:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:38:33.409815  543705 memory.go:184] no items to output this cycle
I0321 04:38:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 04:38:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:38:43.409787  543705 memory.go:191] Add success.
I0321 04:38:43.409788  543705 cpu.go:282] Add success.
I0321 04:38:43.419994  543705 net.go:648] Add success.
I0321 04:38:43.422556  543705 net.go:770] primary dev: ETH0
I0321 04:38:43.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:38:43.422581  543705 net.go:698] Add success.
I0321 04:38:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:38:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:38:53.409767  543705 memory.go:184] no items to output this cycle
I0321 04:38:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 04:39:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:39:03.409788  543705 memory.go:184] no items to output this cycle
I0321 04:39:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 04:39:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:39:13.409789  543705 memory.go:191] Add success.
I0321 04:39:13.409797  543705 cpu.go:282] Add success.
W0321 04:39:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:39:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:39:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:39:13.420223  543705 net.go:648] Add success.
I0321 04:39:13.422693  543705 net.go:770] primary dev: ETH0
I0321 04:39:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:39:13.422717  543705 net.go:698] Add success.
I0321 04:39:13.545240  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6652d39a-3949-4989-a6e2-40954354c274","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:39:13.545280  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:39:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:39:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 04:39:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:39:14.456702  543705 disk_worker.go:494] system disk:vda1
I0321 04:39:14.456732  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:39:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:39:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:39:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:39:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:39:23.409772  543705 memory.go:184] no items to output this cycle
I0321 04:39:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 04:39:27.758741  543705 disk_info.go:125] begin check local disk info of client
I0321 04:39:27.761378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:39:27.761385  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369640 0xc000369680]
E0321 04:39:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:39:33.409774  543705 memory.go:184] no items to output this cycle
I0321 04:39:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 04:39:38.733735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:39:38.733742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:39:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:39:43.410742  543705 memory.go:191] Add success.
I0321 04:39:43.409796  543705 cpu.go:282] Add success.
I0321 04:39:43.420439  543705 net.go:648] Add success.
I0321 04:39:43.423227  543705 net.go:770] primary dev: ETH0
I0321 04:39:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:39:43.423253  543705 net.go:698] Add success.
I0321 04:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:39:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:39:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:39:53.409780  543705 memory.go:184] no items to output this cycle
I0321 04:39:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 04:40:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:40:03.409781  543705 memory.go:184] no items to output this cycle
I0321 04:40:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 04:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:40:13.409810  543705 memory.go:191] Add success.
I0321 04:40:13.409822  543705 cpu.go:282] Add success.
W0321 04:40:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:40:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:40:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:40:13.420273  543705 net.go:648] Add success.
I0321 04:40:13.422633  543705 net.go:770] primary dev: ETH0
I0321 04:40:13.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:40:13.422662  543705 net.go:698] Add success.
I0321 04:40:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:40:14.455082  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:40:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0321 04:40:14.455148  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:40:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 04:40:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:40:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:40:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:40:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:40:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:40:23.409805  543705 memory.go:184] no items to output this cycle
I0321 04:40:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 04:40:27.761677  543705 disk_info.go:125] begin check local disk info of client
I0321 04:40:27.764254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:40:27.764261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051c080 0xc00051c0c0]
E0321 04:40:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:40:33.409827  543705 memory.go:184] no items to output this cycle
I0321 04:40:33.409839  543705 cpu.go:275] no items to output this cycle
E0321 04:40:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:40:43.409776  543705 memory.go:191] Add success.
I0321 04:40:43.409806  543705 cpu.go:282] Add success.
I0321 04:40:43.419856  543705 net.go:648] Add success.
I0321 04:40:43.422601  543705 net.go:770] primary dev: ETH0
I0321 04:40:43.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:40:43.422631  543705 net.go:698] Add success.
I0321 04:40:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:40:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:40:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:40:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:40:53.409768  543705 memory.go:184] no items to output this cycle
I0321 04:40:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 04:41:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:41:03.409790  543705 memory.go:184] no items to output this cycle
I0321 04:41:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 04:41:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:41:13.409821  543705 memory.go:191] Add success.
I0321 04:41:13.409821  543705 cpu.go:282] Add success.
W0321 04:41:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:41:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:41:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:41:13.420310  543705 net.go:648] Add success.
I0321 04:41:13.422727  543705 net.go:770] primary dev: ETH0
I0321 04:41:13.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:41:13.422751  543705 net.go:698] Add success.
I0321 04:41:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:41:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:41:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 04:41:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:41:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 04:41:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:41:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:41:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:41:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:41:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:41:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:41:23.409803  543705 memory.go:184] no items to output this cycle
I0321 04:41:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 04:41:27.765468  543705 disk_info.go:125] begin check local disk info of client
I0321 04:41:27.768115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:41:27.768122  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0321 04:41:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:41:33.409807  543705 memory.go:184] no items to output this cycle
I0321 04:41:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 04:41:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:41:43.409784  543705 memory.go:191] Add success.
I0321 04:41:43.409812  543705 cpu.go:282] Add success.
I0321 04:41:43.419862  543705 net.go:648] Add success.
I0321 04:41:43.422646  543705 net.go:770] primary dev: ETH0
I0321 04:41:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:41:43.422671  543705 net.go:698] Add success.
I0321 04:41:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:41:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:41:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:41:53.410257  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:41:53.410274  543705 memory.go:184] no items to output this cycle
I0321 04:41:53.410275  543705 cpu.go:275] no items to output this cycle
E0321 04:42:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:42:03.409771  543705 memory.go:184] no items to output this cycle
I0321 04:42:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 04:42:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:42:13.409815  543705 memory.go:191] Add success.
I0321 04:42:13.409823  543705 cpu.go:282] Add success.
W0321 04:42:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:42:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:42:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:42:13.419705  543705 net.go:648] Add success.
I0321 04:42:13.422610  543705 net.go:770] primary dev: ETH0
I0321 04:42:13.422622  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:42:13.422634  543705 net.go:698] Add success.
I0321 04:42:13.479995  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ca454520-d711-48bd-8899-b99307db3aa5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:42:13.480026  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 04:42:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:42:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 04:42:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:42:14.456842  543705 disk_worker.go:494] system disk:vda1
E0321 04:42:14.456844  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:42:14.456853  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:42:14.456859  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:42:14.456895  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:42:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:42:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:42:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:42:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:42:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:42:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:42:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:42:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:42:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 04:42:23.409781  543705 memory.go:184] no items to output this cycle
I0321 04:42:27.769492  543705 disk_info.go:125] begin check local disk info of client
I0321 04:42:27.771936  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:42:27.771945  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003da740 0xc0003da780]
E0321 04:42:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:42:33.409808  543705 memory.go:184] no items to output this cycle
I0321 04:42:33.409821  543705 cpu.go:275] no items to output this cycle
I0321 04:42:38.735373  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:42:38.735380  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:42:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:42:43.410745  543705 memory.go:191] Add success.
I0321 04:42:43.409792  543705 cpu.go:282] Add success.
I0321 04:42:43.420415  543705 net.go:648] Add success.
I0321 04:42:43.422974  543705 net.go:770] primary dev: ETH0
I0321 04:42:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:42:43.423013  543705 net.go:698] Add success.
I0321 04:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:42:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:42:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:42:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:42:53.409791  543705 memory.go:184] no items to output this cycle
I0321 04:42:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 04:43:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:43:03.409790  543705 cpu.go:275] no items to output this cycle
I0321 04:43:03.409797  543705 memory.go:184] no items to output this cycle
E0321 04:43:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:43:13.409822  543705 memory.go:191] Add success.
I0321 04:43:13.409830  543705 cpu.go:282] Add success.
W0321 04:43:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:43:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:43:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:43:13.420116  543705 net.go:648] Add success.
I0321 04:43:13.422976  543705 net.go:770] primary dev: ETH0
I0321 04:43:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:43:13.423011  543705 net.go:698] Add success.
I0321 04:43:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:43:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:43:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 04:43:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:43:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 04:43:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:43:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:43:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:43:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:43:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:43:23.409797  543705 memory.go:184] no items to output this cycle
I0321 04:43:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 04:43:27.773501  543705 disk_info.go:125] begin check local disk info of client
I0321 04:43:27.776076  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:43:27.776083  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036bcc0 0xc00036bd00]
E0321 04:43:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:43:33.409776  543705 memory.go:184] no items to output this cycle
I0321 04:43:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 04:43:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:43:43.409787  543705 memory.go:191] Add success.
I0321 04:43:43.409794  543705 cpu.go:282] Add success.
I0321 04:43:43.419835  543705 net.go:648] Add success.
I0321 04:43:43.422747  543705 net.go:770] primary dev: ETH0
I0321 04:43:43.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:43:43.422773  543705 net.go:698] Add success.
I0321 04:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:43:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:43:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:43:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:43:53.409800  543705 memory.go:184] no items to output this cycle
I0321 04:43:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 04:44:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:44:03.409778  543705 memory.go:184] no items to output this cycle
I0321 04:44:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:44:13.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:44:13.409906  543705 memory.go:191] Add success.
W0321 04:44:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:44:13.409962  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:44:13.409972  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:44:13.410024  543705 cpu.go:282] Add success.
I0321 04:44:13.419705  543705 net.go:648] Add success.
I0321 04:44:13.422123  543705 net.go:770] primary dev: ETH0
I0321 04:44:13.422137  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:44:13.422148  543705 net.go:698] Add success.
I0321 04:44:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:44:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:44:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 04:44:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:44:14.456474  543705 disk_worker.go:494] system disk:vda1
I0321 04:44:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:44:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:44:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:44:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:44:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:44:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:44:23.409772  543705 memory.go:184] no items to output this cycle
I0321 04:44:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 04:44:27.777525  543705 disk_info.go:125] begin check local disk info of client
I0321 04:44:27.780022  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:44:27.780029  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab5c0 0xc0001ab600]
E0321 04:44:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:44:33.409783  543705 memory.go:184] no items to output this cycle
I0321 04:44:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 04:44:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:44:43.409811  543705 memory.go:191] Add success.
I0321 04:44:43.409818  543705 cpu.go:282] Add success.
I0321 04:44:43.419961  543705 net.go:648] Add success.
I0321 04:44:43.422743  543705 net.go:770] primary dev: ETH0
I0321 04:44:43.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:44:43.422768  543705 net.go:698] Add success.
I0321 04:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:44:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:44:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:44:53.409764  543705 memory.go:184] no items to output this cycle
I0321 04:44:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 04:45:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:45:03.409801  543705 memory.go:184] no items to output this cycle
I0321 04:45:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 04:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:45:13.409797  543705 cpu.go:282] Add success.
I0321 04:45:13.409804  543705 memory.go:191] Add success.
W0321 04:45:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:45:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:45:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:45:13.420611  543705 net.go:648] Add success.
I0321 04:45:13.423761  543705 net.go:770] primary dev: ETH0
I0321 04:45:13.423777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:45:13.423789  543705 net.go:698] Add success.
I0321 04:45:13.469671  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"40b48d14-ee6a-4018-b6e3-d16f8525f6ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:45:13.469707  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:45:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:45:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:45:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 04:45:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:45:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 04:45:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:45:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:45:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:45:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:45:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:45:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:45:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:45:23.409798  543705 memory.go:184] no items to output this cycle
I0321 04:45:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 04:45:27.781545  543705 disk_info.go:125] begin check local disk info of client
I0321 04:45:27.784117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:45:27.784123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0321 04:45:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:45:33.409806  543705 memory.go:184] no items to output this cycle
I0321 04:45:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 04:45:38.735524  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:45:38.735531  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:45:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:45:43.410754  543705 memory.go:191] Add success.
I0321 04:45:43.409837  543705 cpu.go:282] Add success.
I0321 04:45:43.420501  543705 net.go:648] Add success.
I0321 04:45:43.423500  543705 net.go:770] primary dev: ETH0
I0321 04:45:43.423515  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:45:43.423529  543705 net.go:698] Add success.
I0321 04:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:45:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:45:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:45:53.409788  543705 memory.go:184] no items to output this cycle
I0321 04:45:53.409839  543705 cpu.go:275] no items to output this cycle
E0321 04:46:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:46:03.409782  543705 memory.go:184] no items to output this cycle
I0321 04:46:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 04:46:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:46:13.409826  543705 memory.go:191] Add success.
I0321 04:46:13.409828  543705 cpu.go:282] Add success.
W0321 04:46:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:46:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:46:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:46:13.420220  543705 net.go:648] Add success.
I0321 04:46:13.423462  543705 net.go:770] primary dev: ETH0
I0321 04:46:13.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:46:13.423486  543705 net.go:698] Add success.
I0321 04:46:14.454662  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:46:14.454806  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:46:14.454882  543705 disk_worker.go:708] disk space is not compliant
W0321 04:46:14.454885  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:46:14.456257  543705 disk_worker.go:494] system disk:vda1
I0321 04:46:14.456286  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:46:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:46:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:46:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:46:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:46:23.409776  543705 memory.go:184] no items to output this cycle
I0321 04:46:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 04:46:27.785563  543705 disk_info.go:125] begin check local disk info of client
I0321 04:46:27.788120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:46:27.788127  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369f00 0xc000369f40]
E0321 04:46:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:46:33.409813  543705 memory.go:184] no items to output this cycle
I0321 04:46:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 04:46:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:46:43.409815  543705 memory.go:191] Add success.
I0321 04:46:43.409820  543705 cpu.go:282] Add success.
I0321 04:46:43.419972  543705 net.go:648] Add success.
I0321 04:46:43.423170  543705 net.go:770] primary dev: ETH0
I0321 04:46:43.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:46:43.423195  543705 net.go:698] Add success.
I0321 04:46:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:46:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:46:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:46:53.410381  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:46:53.410396  543705 memory.go:184] no items to output this cycle
I0321 04:46:53.410413  543705 cpu.go:275] no items to output this cycle
E0321 04:47:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:47:03.409894  543705 memory.go:184] no items to output this cycle
I0321 04:47:03.409937  543705 cpu.go:275] no items to output this cycle
E0321 04:47:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:47:13.409781  543705 memory.go:191] Add success.
W0321 04:47:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:47:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:47:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:47:13.409829  543705 cpu.go:282] Add success.
I0321 04:47:13.420148  543705 net.go:648] Add success.
I0321 04:47:13.423067  543705 net.go:770] primary dev: ETH0
I0321 04:47:13.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:47:13.423092  543705 net.go:698] Add success.
I0321 04:47:13.453659  543705 event_worker.go:152] Polling the log file for events...
W0321 04:47:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:47:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 04:47:14.455203  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:47:14.455912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:47:14.455921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:47:14.455927  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:47:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 04:47:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:47:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:47:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:47:16.457905  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:47:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:47:16.457958  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:47:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:47:16.472299  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:47:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:47:23.409804  543705 memory.go:184] no items to output this cycle
I0321 04:47:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 04:47:27.789585  543705 disk_info.go:125] begin check local disk info of client
I0321 04:47:27.792258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:47:27.792264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0321 04:47:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:47:33.409791  543705 memory.go:184] no items to output this cycle
I0321 04:47:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 04:47:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:47:43.409794  543705 memory.go:191] Add success.
I0321 04:47:43.409794  543705 cpu.go:282] Add success.
I0321 04:47:43.419861  543705 net.go:648] Add success.
I0321 04:47:43.422739  543705 net.go:770] primary dev: ETH0
I0321 04:47:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:47:43.422766  543705 net.go:698] Add success.
I0321 04:47:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:47:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:47:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:47:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:47:53.409804  543705 memory.go:184] no items to output this cycle
I0321 04:47:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 04:48:03.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:48:03.409888  543705 memory.go:184] no items to output this cycle
I0321 04:48:03.410036  543705 cpu.go:275] no items to output this cycle
E0321 04:48:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:48:13.409805  543705 memory.go:191] Add success.
I0321 04:48:13.409805  543705 cpu.go:282] Add success.
W0321 04:48:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:48:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:48:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:48:13.420308  543705 net.go:648] Add success.
I0321 04:48:13.422987  543705 net.go:770] primary dev: ETH0
I0321 04:48:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:48:13.423010  543705 net.go:698] Add success.
I0321 04:48:13.482067  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c96d2855-fb3f-4b57-8079-07483cd862b7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:48:13.482100  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:48:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:48:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:48:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 04:48:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:48:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 04:48:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:48:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:48:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:48:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:48:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:48:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:48:23.410420  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:48:23.410438  543705 memory.go:184] no items to output this cycle
I0321 04:48:23.410473  543705 cpu.go:275] no items to output this cycle
I0321 04:48:27.793611  543705 disk_info.go:125] begin check local disk info of client
I0321 04:48:27.796240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:48:27.796246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa40 0xc0001aaa80]
E0321 04:48:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:48:33.409779  543705 memory.go:184] no items to output this cycle
I0321 04:48:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 04:48:38.735676  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:48:38.735682  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:48:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:48:43.410736  543705 memory.go:191] Add success.
I0321 04:48:43.409805  543705 cpu.go:282] Add success.
I0321 04:48:43.420446  543705 net.go:648] Add success.
I0321 04:48:43.423008  543705 net.go:770] primary dev: ETH0
I0321 04:48:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:48:43.423034  543705 net.go:698] Add success.
I0321 04:48:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:48:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:48:53.409774  543705 memory.go:184] no items to output this cycle
I0321 04:48:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 04:49:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:49:03.409812  543705 memory.go:184] no items to output this cycle
I0321 04:49:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 04:49:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:49:13.409828  543705 memory.go:191] Add success.
I0321 04:49:13.409833  543705 cpu.go:282] Add success.
W0321 04:49:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:49:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:49:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:49:13.420148  543705 net.go:648] Add success.
I0321 04:49:13.422892  543705 net.go:770] primary dev: ETH0
I0321 04:49:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:49:13.422921  543705 net.go:698] Add success.
I0321 04:49:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:49:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:49:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 04:49:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:49:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 04:49:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:49:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:49:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:49:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:49:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:49:23.409805  543705 memory.go:184] no items to output this cycle
I0321 04:49:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 04:49:27.797626  543705 disk_info.go:125] begin check local disk info of client
I0321 04:49:27.800221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:49:27.800228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ba480 0xc0004ba4c0]
E0321 04:49:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:49:33.409782  543705 memory.go:184] no items to output this cycle
I0321 04:49:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 04:49:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:49:43.409792  543705 memory.go:191] Add success.
I0321 04:49:43.409795  543705 cpu.go:282] Add success.
I0321 04:49:43.419873  543705 net.go:648] Add success.
I0321 04:49:43.422403  543705 net.go:770] primary dev: ETH0
I0321 04:49:43.422416  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:49:43.422429  543705 net.go:698] Add success.
I0321 04:49:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:49:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:49:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:49:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:49:53.409778  543705 memory.go:184] no items to output this cycle
I0321 04:49:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 04:50:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:50:03.409778  543705 memory.go:184] no items to output this cycle
I0321 04:50:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 04:50:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:50:13.409820  543705 memory.go:191] Add success.
I0321 04:50:13.409827  543705 cpu.go:282] Add success.
W0321 04:50:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:50:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:50:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:50:13.420136  543705 net.go:648] Add success.
I0321 04:50:13.422697  543705 net.go:770] primary dev: ETH0
I0321 04:50:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:50:13.422721  543705 net.go:698] Add success.
I0321 04:50:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:50:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:50:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 04:50:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:50:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 04:50:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:50:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:50:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:50:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:50:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:50:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:50:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:50:23.409788  543705 memory.go:184] no items to output this cycle
I0321 04:50:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 04:50:27.801655  543705 disk_info.go:125] begin check local disk info of client
I0321 04:50:27.804219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:50:27.804226  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4fc0 0xc0000c5000]
E0321 04:50:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:50:33.409810  543705 memory.go:184] no items to output this cycle
I0321 04:50:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 04:50:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:50:43.409789  543705 memory.go:191] Add success.
I0321 04:50:43.409804  543705 cpu.go:282] Add success.
I0321 04:50:43.420021  543705 net.go:648] Add success.
I0321 04:50:43.422995  543705 net.go:770] primary dev: ETH0
I0321 04:50:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:50:43.423021  543705 net.go:698] Add success.
I0321 04:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:50:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:50:53.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:50:53.410272  543705 memory.go:184] no items to output this cycle
I0321 04:50:53.410279  543705 cpu.go:275] no items to output this cycle
E0321 04:51:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:51:03.409801  543705 memory.go:184] no items to output this cycle
I0321 04:51:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 04:51:13.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:51:13.409906  543705 memory.go:191] Add success.
I0321 04:51:13.409920  543705 cpu.go:282] Add success.
W0321 04:51:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:51:13.409966  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:51:13.409973  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:51:13.419714  543705 net.go:648] Add success.
I0321 04:51:13.422582  543705 net.go:770] primary dev: ETH0
I0321 04:51:13.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:51:13.422620  543705 net.go:698] Add success.
I0321 04:51:13.468536  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"88a3e526-52ae-4581-b654-6a826bba63c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:51:13.468567  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:51:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:51:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:51:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0321 04:51:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:51:14.456759  543705 disk_worker.go:494] system disk:vda1
I0321 04:51:14.456789  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:51:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:51:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:51:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:51:23.409801  543705 memory.go:184] no items to output this cycle
I0321 04:51:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 04:51:27.805673  543705 disk_info.go:125] begin check local disk info of client
I0321 04:51:27.808207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:51:27.808213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dbcc0 0xc0003dbd00]
E0321 04:51:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:51:33.409804  543705 memory.go:184] no items to output this cycle
I0321 04:51:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 04:51:38.737383  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:51:38.737390  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:51:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:51:43.410631  543705 memory.go:191] Add success.
I0321 04:51:43.409819  543705 cpu.go:282] Add success.
I0321 04:51:43.420327  543705 net.go:648] Add success.
I0321 04:51:43.423015  543705 net.go:770] primary dev: ETH0
I0321 04:51:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:51:43.423041  543705 net.go:698] Add success.
I0321 04:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:51:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:51:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:51:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:51:53.409819  543705 memory.go:184] no items to output this cycle
I0321 04:51:53.409831  543705 cpu.go:275] no items to output this cycle
E0321 04:52:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:52:03.409782  543705 memory.go:184] no items to output this cycle
I0321 04:52:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 04:52:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:52:13.409902  543705 memory.go:191] Add success.
I0321 04:52:13.409904  543705 cpu.go:282] Add success.
W0321 04:52:13.409935  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:52:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:52:13.409950  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:52:13.419724  543705 net.go:648] Add success.
I0321 04:52:13.422764  543705 net.go:770] primary dev: ETH0
I0321 04:52:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:52:13.422793  543705 net.go:698] Add success.
W0321 04:52:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:52:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 04:52:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 04:52:14.455873  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:52:14.455881  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:52:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:52:14.456536  543705 disk_worker.go:494] system disk:vda1
I0321 04:52:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:52:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:52:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:52:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:52:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:52:16.457958  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:52:16.457976  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:52:16.472300  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:52:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:52:23.409791  543705 memory.go:184] no items to output this cycle
I0321 04:52:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 04:52:27.809674  543705 disk_info.go:125] begin check local disk info of client
I0321 04:52:27.812227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:52:27.812234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa180 0xc0001fa1c0]
E0321 04:52:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:52:33.409795  543705 memory.go:184] no items to output this cycle
I0321 04:52:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 04:52:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:52:43.409810  543705 cpu.go:282] Add success.
I0321 04:52:43.409812  543705 memory.go:191] Add success.
I0321 04:52:43.420279  543705 net.go:648] Add success.
I0321 04:52:43.422930  543705 net.go:770] primary dev: ETH0
I0321 04:52:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:52:43.422956  543705 net.go:698] Add success.
I0321 04:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:52:53.410203  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:52:53.410219  543705 memory.go:184] no items to output this cycle
I0321 04:52:53.410253  543705 cpu.go:275] no items to output this cycle
E0321 04:53:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:53:03.409790  543705 memory.go:184] no items to output this cycle
I0321 04:53:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 04:53:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:53:13.409868  543705 memory.go:191] Add success.
W0321 04:53:13.409896  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:53:13.409913  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:53:13.409916  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:53:13.410015  543705 cpu.go:282] Add success.
I0321 04:53:13.419726  543705 net.go:648] Add success.
I0321 04:53:13.422645  543705 net.go:770] primary dev: ETH0
I0321 04:53:13.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:53:13.422670  543705 net.go:698] Add success.
I0321 04:53:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:53:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:53:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 04:53:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:53:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 04:53:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:53:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:53:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:53:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:53:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:53:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:53:23.409795  543705 memory.go:184] no items to output this cycle
I0321 04:53:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 04:53:27.813676  543705 disk_info.go:125] begin check local disk info of client
I0321 04:53:27.816233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:53:27.816241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab440 0xc0001ab480]
E0321 04:53:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:53:33.409813  543705 memory.go:184] no items to output this cycle
I0321 04:53:33.409823  543705 cpu.go:275] no items to output this cycle
E0321 04:53:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:53:43.409831  543705 memory.go:191] Add success.
I0321 04:53:43.409836  543705 cpu.go:282] Add success.
I0321 04:53:43.420053  543705 net.go:648] Add success.
I0321 04:53:43.423034  543705 net.go:770] primary dev: ETH0
I0321 04:53:43.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:53:43.423065  543705 net.go:698] Add success.
I0321 04:53:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:53:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:53:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:53:53.409805  543705 memory.go:184] no items to output this cycle
I0321 04:53:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 04:54:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:54:03.409786  543705 memory.go:184] no items to output this cycle
I0321 04:54:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 04:54:13.409901  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:54:13.409922  543705 cpu.go:282] Add success.
I0321 04:54:13.409971  543705 memory.go:191] Add success.
W0321 04:54:13.410010  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:54:13.410028  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:54:13.410032  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:54:13.419708  543705 net.go:648] Add success.
I0321 04:54:13.422266  543705 net.go:770] primary dev: ETH0
I0321 04:54:13.422278  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:54:13.422290  543705 net.go:698] Add success.
I0321 04:54:13.468832  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"83aab1b8-11ee-4400-9802-220874650a4a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:54:13.468863  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 04:54:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:54:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:54:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 04:54:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:54:14.456659  543705 disk_worker.go:494] system disk:vda1
I0321 04:54:14.456694  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:54:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:54:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:54:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:54:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:54:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:54:23.409812  543705 memory.go:184] no items to output this cycle
I0321 04:54:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 04:54:27.817678  543705 disk_info.go:125] begin check local disk info of client
I0321 04:54:27.820322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:54:27.820329  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f5c40 0xc0001f5c80]
E0321 04:54:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:54:33.409788  543705 memory.go:184] no items to output this cycle
I0321 04:54:33.409823  543705 cpu.go:275] no items to output this cycle
I0321 04:54:38.737735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:54:38.737741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:54:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:54:43.410731  543705 memory.go:191] Add success.
I0321 04:54:43.409812  543705 cpu.go:282] Add success.
I0321 04:54:43.420448  543705 net.go:648] Add success.
I0321 04:54:43.423147  543705 net.go:770] primary dev: ETH0
I0321 04:54:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:54:43.423178  543705 net.go:698] Add success.
I0321 04:54:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:54:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:54:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:54:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:54:53.409783  543705 memory.go:184] no items to output this cycle
I0321 04:54:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 04:55:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:55:03.409779  543705 memory.go:184] no items to output this cycle
I0321 04:55:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 04:55:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:55:13.409795  543705 memory.go:191] Add success.
I0321 04:55:13.409813  543705 cpu.go:282] Add success.
W0321 04:55:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:55:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:55:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:55:13.420119  543705 net.go:648] Add success.
I0321 04:55:13.422875  543705 net.go:770] primary dev: ETH0
I0321 04:55:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:55:13.422899  543705 net.go:698] Add success.
I0321 04:55:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:55:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:55:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 04:55:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:55:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 04:55:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:55:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:55:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:55:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:55:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:55:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:55:23.409803  543705 memory.go:184] no items to output this cycle
I0321 04:55:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 04:55:27.821676  543705 disk_info.go:125] begin check local disk info of client
I0321 04:55:27.824337  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:55:27.824346  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 04:55:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:55:33.409783  543705 memory.go:184] no items to output this cycle
I0321 04:55:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 04:55:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:55:43.409793  543705 memory.go:191] Add success.
I0321 04:55:43.409795  543705 cpu.go:282] Add success.
I0321 04:55:43.419960  543705 net.go:648] Add success.
I0321 04:55:43.422755  543705 net.go:770] primary dev: ETH0
I0321 04:55:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:55:43.422782  543705 net.go:698] Add success.
I0321 04:55:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:55:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:55:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:55:53.409784  543705 memory.go:184] no items to output this cycle
I0321 04:55:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 04:56:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:56:03.409777  543705 memory.go:184] no items to output this cycle
I0321 04:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 04:56:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:56:13.409795  543705 memory.go:191] Add success.
I0321 04:56:13.409798  543705 cpu.go:282] Add success.
W0321 04:56:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:56:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:56:13.419706  543705 net.go:648] Add success.
I0321 04:56:13.422489  543705 net.go:770] primary dev: ETH0
I0321 04:56:13.422502  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:56:13.422513  543705 net.go:698] Add success.
I0321 04:56:14.453972  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:56:14.454203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:56:14.454214  543705 disk_worker.go:708] disk space is not compliant
W0321 04:56:14.454217  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:56:14.455609  543705 disk_worker.go:494] system disk:vda1
I0321 04:56:14.455642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:56:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:56:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:56:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:56:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:56:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:56:23.409765  543705 memory.go:184] no items to output this cycle
I0321 04:56:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 04:56:27.825680  543705 disk_info.go:125] begin check local disk info of client
I0321 04:56:27.828259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:56:27.828266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa2c0 0xc0001fa300]
E0321 04:56:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:56:33.409820  543705 memory.go:184] no items to output this cycle
I0321 04:56:33.409831  543705 cpu.go:275] no items to output this cycle
E0321 04:56:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:56:43.409784  543705 memory.go:191] Add success.
I0321 04:56:43.409793  543705 cpu.go:282] Add success.
I0321 04:56:43.419888  543705 net.go:648] Add success.
I0321 04:56:43.422747  543705 net.go:770] primary dev: ETH0
I0321 04:56:43.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:56:43.422773  543705 net.go:698] Add success.
I0321 04:56:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:56:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:56:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:56:53.409799  543705 memory.go:184] no items to output this cycle
I0321 04:56:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 04:57:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:57:03.409779  543705 memory.go:184] no items to output this cycle
I0321 04:57:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:57:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:57:13.409806  543705 cpu.go:282] Add success.
I0321 04:57:13.409834  543705 memory.go:191] Add success.
W0321 04:57:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 04:57:13.409904  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:57:13.409908  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:57:13.420416  543705 net.go:648] Add success.
I0321 04:57:13.427943  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 04:57:13.428024  543705 net.go:770] primary dev: ETH0
I0321 04:57:13.428037  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:57:13.428048  543705 net.go:698] Add success.
I0321 04:57:13.452779  543705 event_worker.go:152] Polling the log file for events...
I0321 04:57:13.468416  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c51157e9-4ab8-46c5-bd7c-f45804603f2e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 04:57:13.468452  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 04:57:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:57:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 04:57:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:57:14.456836  543705 disk_worker.go:494] system disk:vda1
E0321 04:57:14.456836  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 04:57:14.456845  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 04:57:14.456862  543705 custom_config.go:64] query custom config with name: gpu
I0321 04:57:14.456880  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 04:57:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 04:57:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:57:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 04:57:16.457985  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 04:57:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:57:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:57:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:57:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:57:23.409774  543705 memory.go:184] no items to output this cycle
I0321 04:57:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 04:57:27.829675  543705 disk_info.go:125] begin check local disk info of client
I0321 04:57:27.832229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:57:27.832235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b3740 0xc0004b3780]
E0321 04:57:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:57:33.409812  543705 memory.go:184] no items to output this cycle
I0321 04:57:33.409826  543705 cpu.go:275] no items to output this cycle
I0321 04:57:38.737875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 04:57:38.737882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 04:57:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:57:43.410624  543705 memory.go:191] Add success.
I0321 04:57:43.409809  543705 cpu.go:282] Add success.
I0321 04:57:43.420319  543705 net.go:648] Add success.
I0321 04:57:43.422890  543705 net.go:770] primary dev: ETH0
I0321 04:57:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:57:43.422928  543705 net.go:698] Add success.
I0321 04:57:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:57:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:57:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:57:53.409782  543705 cpu.go:275] no items to output this cycle
I0321 04:57:53.409784  543705 memory.go:184] no items to output this cycle
E0321 04:58:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:58:03.409808  543705 memory.go:184] no items to output this cycle
I0321 04:58:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 04:58:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:58:13.409786  543705 memory.go:191] Add success.
W0321 04:58:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 04:58:13.409815  543705 cpu.go:282] Add success.
W0321 04:58:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:58:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:58:13.420190  543705 net.go:648] Add success.
I0321 04:58:13.423013  543705 net.go:770] primary dev: ETH0
I0321 04:58:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:58:13.423042  543705 net.go:698] Add success.
I0321 04:58:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:58:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:58:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 04:58:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:58:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 04:58:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:58:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:58:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:58:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:58:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:58:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:58:23.409803  543705 memory.go:184] no items to output this cycle
I0321 04:58:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 04:58:27.833693  543705 disk_info.go:125] begin check local disk info of client
I0321 04:58:27.836343  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:58:27.836350  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b980 0xc00036b9c0]
E0321 04:58:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:58:33.409792  543705 memory.go:184] no items to output this cycle
I0321 04:58:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 04:58:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:58:43.409814  543705 memory.go:191] Add success.
I0321 04:58:43.409822  543705 cpu.go:282] Add success.
I0321 04:58:43.419859  543705 net.go:648] Add success.
I0321 04:58:43.422814  543705 net.go:770] primary dev: ETH0
I0321 04:58:43.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:58:43.422841  543705 net.go:698] Add success.
I0321 04:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:58:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:58:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:58:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:58:53.409797  543705 memory.go:184] no items to output this cycle
I0321 04:58:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 04:59:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:59:03.409815  543705 memory.go:184] no items to output this cycle
I0321 04:59:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 04:59:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:59:13.409784  543705 memory.go:191] Add success.
W0321 04:59:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 04:59:13.409812  543705 cpu.go:282] Add success.
W0321 04:59:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 04:59:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 04:59:13.420131  543705 net.go:648] Add success.
I0321 04:59:13.422843  543705 net.go:770] primary dev: ETH0
I0321 04:59:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:59:13.422867  543705 net.go:698] Add success.
I0321 04:59:14.454925  543705 custom_config.go:64] query custom config with name: gpu
W0321 04:59:14.455082  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 04:59:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0321 04:59:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0321 04:59:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 04:59:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 04:59:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 04:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:59:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:59:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 04:59:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 04:59:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:59:23.409797  543705 memory.go:184] no items to output this cycle
I0321 04:59:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 04:59:27.837678  543705 disk_info.go:125] begin check local disk info of client
I0321 04:59:27.840255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 04:59:27.840262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002891c0 0xc000289200]
E0321 04:59:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:59:33.409806  543705 memory.go:184] no items to output this cycle
I0321 04:59:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 04:59:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:59:43.409783  543705 memory.go:191] Add success.
I0321 04:59:43.409815  543705 cpu.go:282] Add success.
I0321 04:59:43.419872  543705 net.go:648] Add success.
I0321 04:59:43.423195  543705 net.go:770] primary dev: ETH0
I0321 04:59:43.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0321 04:59:43.423222  543705 net.go:698] Add success.
I0321 04:59:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 04:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 04:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 04:59:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 04:59:53.409771  543705 memory.go:184] no items to output this cycle
I0321 04:59:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 05:00:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:00:03.409793  543705 memory.go:184] no items to output this cycle
I0321 05:00:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 05:00:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:00:13.409781  543705 memory.go:191] Add success.
W0321 05:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 05:00:13.409811  543705 cpu.go:282] Add success.
W0321 05:00:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:00:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:00:13.420150  543705 net.go:648] Add success.
I0321 05:00:13.422836  543705 net.go:770] primary dev: ETH0
I0321 05:00:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:00:13.422861  543705 net.go:698] Add success.
I0321 05:00:13.469684  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6957e718-5778-441f-9494-d05be48164b0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:00:13.469726  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:00:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:00:14.455327  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:00:14.455341  543705 disk_worker.go:708] disk space is not compliant
W0321 05:00:14.455345  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:00:14.456784  543705 disk_worker.go:494] system disk:vda1
I0321 05:00:14.456827  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:00:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:00:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:00:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:00:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:00:23.409776  543705 memory.go:184] no items to output this cycle
I0321 05:00:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 05:00:27.841677  543705 disk_info.go:125] begin check local disk info of client
I0321 05:00:27.844252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:00:27.844259  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3480 0xc0002b34c0]
E0321 05:00:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:00:33.409809  543705 memory.go:184] no items to output this cycle
I0321 05:00:33.409820  543705 cpu.go:275] no items to output this cycle
I0321 05:00:38.739385  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:00:38.739392  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:00:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:00:43.410692  543705 memory.go:191] Add success.
I0321 05:00:43.409824  543705 cpu.go:282] Add success.
I0321 05:00:43.420389  543705 net.go:648] Add success.
I0321 05:00:43.423056  543705 net.go:770] primary dev: ETH0
I0321 05:00:43.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:00:43.423082  543705 net.go:698] Add success.
I0321 05:00:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:00:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:00:53.410394  543705 memory.go:184] no items to output this cycle
I0321 05:00:53.410422  543705 cpu.go:275] no items to output this cycle
E0321 05:01:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:01:03.409774  543705 memory.go:184] no items to output this cycle
I0321 05:01:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 05:01:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:01:13.409795  543705 memory.go:191] Add success.
I0321 05:01:13.409796  543705 cpu.go:282] Add success.
W0321 05:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:01:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:01:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:01:13.420146  543705 net.go:648] Add success.
I0321 05:01:13.422954  543705 net.go:770] primary dev: ETH0
I0321 05:01:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:01:13.422980  543705 net.go:698] Add success.
I0321 05:01:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:01:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:01:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 05:01:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:01:14.459138  543705 disk_worker.go:494] system disk:vda1
I0321 05:01:14.459166  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:01:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:01:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:01:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:01:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:01:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:01:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:01:23.409772  543705 memory.go:184] no items to output this cycle
I0321 05:01:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 05:01:27.845682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:01:27.848254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:01:27.848261  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0321 05:01:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:01:33.409789  543705 memory.go:184] no items to output this cycle
I0321 05:01:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 05:01:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:01:43.409787  543705 memory.go:191] Add success.
I0321 05:01:43.409791  543705 cpu.go:282] Add success.
I0321 05:01:43.419869  543705 net.go:648] Add success.
I0321 05:01:43.422725  543705 net.go:770] primary dev: ETH0
I0321 05:01:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:01:43.422750  543705 net.go:698] Add success.
I0321 05:01:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:01:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:01:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:01:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:01:53.409792  543705 memory.go:184] no items to output this cycle
I0321 05:01:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 05:02:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:02:03.409783  543705 memory.go:184] no items to output this cycle
I0321 05:02:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 05:02:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:02:13.409815  543705 memory.go:191] Add success.
I0321 05:02:13.409817  543705 cpu.go:282] Add success.
W0321 05:02:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:02:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:02:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:02:13.420343  543705 net.go:648] Add success.
I0321 05:02:13.423357  543705 net.go:770] primary dev: ETH0
I0321 05:02:13.423370  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:02:13.423382  543705 net.go:698] Add success.
W0321 05:02:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:02:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 05:02:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:02:14.456445  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:02:14.456456  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:02:14.456462  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:02:14.456843  543705 disk_worker.go:494] system disk:vda1
I0321 05:02:14.456875  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:02:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:02:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:02:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:02:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:02:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:02:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:02:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:02:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:02:23.409772  543705 memory.go:184] no items to output this cycle
I0321 05:02:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 05:02:27.849682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:02:27.852295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:02:27.852303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4d00 0xc0003e4d40]
E0321 05:02:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:02:33.409788  543705 memory.go:184] no items to output this cycle
I0321 05:02:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 05:02:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:02:43.409803  543705 memory.go:191] Add success.
I0321 05:02:43.409824  543705 cpu.go:282] Add success.
I0321 05:02:43.419872  543705 net.go:648] Add success.
I0321 05:02:43.422530  543705 net.go:770] primary dev: ETH0
I0321 05:02:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:02:43.422559  543705 net.go:698] Add success.
I0321 05:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:02:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:02:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:02:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:02:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 05:02:53.409779  543705 memory.go:184] no items to output this cycle
E0321 05:03:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:03:03.409776  543705 memory.go:184] no items to output this cycle
I0321 05:03:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 05:03:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:03:13.409792  543705 memory.go:191] Add success.
I0321 05:03:13.409811  543705 cpu.go:282] Add success.
W0321 05:03:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:03:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:03:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:03:13.420045  543705 net.go:648] Add success.
I0321 05:03:13.422551  543705 net.go:770] primary dev: ETH0
I0321 05:03:13.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:03:13.422577  543705 net.go:698] Add success.
I0321 05:03:13.471353  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1e224870-5bb7-4bc0-82d9-b97a9c1993aa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:03:13.471387  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:03:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:03:14.455242  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:03:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0321 05:03:14.455256  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:03:14.456908  543705 disk_worker.go:494] system disk:vda1
I0321 05:03:14.456937  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:03:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:03:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:03:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:03:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:03:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:03:23.409801  543705 memory.go:184] no items to output this cycle
I0321 05:03:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 05:03:27.853681  543705 disk_info.go:125] begin check local disk info of client
I0321 05:03:27.856299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:03:27.856305  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0321 05:03:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:03:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 05:03:33.409796  543705 memory.go:184] no items to output this cycle
I0321 05:03:38.740402  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:03:38.740409  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:03:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:03:43.410846  543705 memory.go:191] Add success.
I0321 05:03:43.409826  543705 cpu.go:282] Add success.
I0321 05:03:43.420534  543705 net.go:648] Add success.
I0321 05:03:43.423125  543705 net.go:770] primary dev: ETH0
I0321 05:03:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:03:43.423151  543705 net.go:698] Add success.
I0321 05:03:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:03:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:03:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:03:53.409771  543705 memory.go:184] no items to output this cycle
I0321 05:03:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 05:04:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:04:03.409786  543705 cpu.go:275] no items to output this cycle
I0321 05:04:03.409788  543705 memory.go:184] no items to output this cycle
E0321 05:04:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:04:13.409799  543705 memory.go:191] Add success.
I0321 05:04:13.409799  543705 cpu.go:282] Add success.
W0321 05:04:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:04:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:04:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:04:13.420116  543705 net.go:648] Add success.
I0321 05:04:13.423016  543705 net.go:770] primary dev: ETH0
I0321 05:04:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:04:13.423043  543705 net.go:698] Add success.
I0321 05:04:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:04:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:04:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 05:04:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:04:14.456579  543705 disk_worker.go:494] system disk:vda1
I0321 05:04:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:04:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:04:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:04:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:04:16.472502  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:04:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:04:23.409771  543705 memory.go:184] no items to output this cycle
I0321 05:04:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 05:04:27.857682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:04:27.860250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:04:27.860257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd380 0xc0002bd3c0]
E0321 05:04:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:04:33.409806  543705 memory.go:184] no items to output this cycle
I0321 05:04:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 05:04:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:04:43.409781  543705 memory.go:191] Add success.
I0321 05:04:43.409800  543705 cpu.go:282] Add success.
I0321 05:04:43.419863  543705 net.go:648] Add success.
I0321 05:04:43.422420  543705 net.go:770] primary dev: ETH0
I0321 05:04:43.422433  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:04:43.422446  543705 net.go:698] Add success.
I0321 05:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:04:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:04:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:04:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:04:53.409778  543705 memory.go:184] no items to output this cycle
I0321 05:04:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 05:05:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:05:03.409773  543705 memory.go:184] no items to output this cycle
I0321 05:05:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 05:05:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:05:13.409801  543705 memory.go:191] Add success.
I0321 05:05:13.409804  543705 cpu.go:282] Add success.
W0321 05:05:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:05:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:05:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:05:13.420071  543705 net.go:648] Add success.
I0321 05:05:13.422846  543705 net.go:770] primary dev: ETH0
I0321 05:05:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:05:13.422871  543705 net.go:698] Add success.
I0321 05:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:05:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:05:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 05:05:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:05:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 05:05:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:05:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:05:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:05:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:05:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:05:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:05:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 05:05:23.409788  543705 memory.go:184] no items to output this cycle
I0321 05:05:27.861684  543705 disk_info.go:125] begin check local disk info of client
I0321 05:05:27.864348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:05:27.864354  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc000 0xc0002bc040]
E0321 05:05:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:05:33.409784  543705 memory.go:184] no items to output this cycle
I0321 05:05:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 05:05:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:05:43.409793  543705 memory.go:191] Add success.
I0321 05:05:43.409796  543705 cpu.go:282] Add success.
I0321 05:05:43.419931  543705 net.go:648] Add success.
I0321 05:05:43.422942  543705 net.go:770] primary dev: ETH0
I0321 05:05:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:05:43.422968  543705 net.go:698] Add success.
I0321 05:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:05:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:05:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:05:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:05:53.409783  543705 cpu.go:275] no items to output this cycle
I0321 05:05:53.409786  543705 memory.go:184] no items to output this cycle
E0321 05:06:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:06:03.409779  543705 cpu.go:275] no items to output this cycle
I0321 05:06:03.409786  543705 memory.go:184] no items to output this cycle
E0321 05:06:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:06:13.409799  543705 memory.go:191] Add success.
I0321 05:06:13.409802  543705 cpu.go:282] Add success.
W0321 05:06:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:06:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:06:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:06:13.420067  543705 net.go:648] Add success.
I0321 05:06:13.422902  543705 net.go:770] primary dev: ETH0
I0321 05:06:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:06:13.422931  543705 net.go:698] Add success.
I0321 05:06:13.463376  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef96bdeb-cd5f-4698-81a2-f3802d032934","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:06:13.463409  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:06:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:06:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:06:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 05:06:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:06:14.456690  543705 disk_worker.go:494] system disk:vda1
I0321 05:06:14.456721  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:06:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:06:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:06:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:06:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:06:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:06:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:06:23.409798  543705 memory.go:184] no items to output this cycle
I0321 05:06:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 05:06:27.865681  543705 disk_info.go:125] begin check local disk info of client
I0321 05:06:27.868255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:06:27.868262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f6040 0xc0001f6080]
E0321 05:06:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:06:33.409808  543705 memory.go:184] no items to output this cycle
I0321 05:06:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 05:06:38.741426  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:06:38.741432  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:06:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:06:43.410670  543705 memory.go:191] Add success.
I0321 05:06:43.409791  543705 cpu.go:282] Add success.
I0321 05:06:43.420383  543705 net.go:648] Add success.
I0321 05:06:43.423111  543705 net.go:770] primary dev: ETH0
I0321 05:06:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:06:43.423136  543705 net.go:698] Add success.
I0321 05:06:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:06:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:06:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:06:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:06:53.409780  543705 memory.go:184] no items to output this cycle
I0321 05:06:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 05:07:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:07:03.409803  543705 memory.go:184] no items to output this cycle
I0321 05:07:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 05:07:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:07:13.409786  543705 memory.go:191] Add success.
I0321 05:07:13.409803  543705 cpu.go:282] Add success.
W0321 05:07:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:07:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:07:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:07:13.420146  543705 net.go:648] Add success.
I0321 05:07:13.423215  543705 net.go:770] primary dev: ETH0
I0321 05:07:13.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:07:13.423253  543705 net.go:698] Add success.
I0321 05:07:13.452812  543705 event_worker.go:152] Polling the log file for events...
W0321 05:07:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 05:07:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:07:14.456799  543705 disk_worker.go:494] system disk:vda1
I0321 05:07:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:07:14.457114  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:07:14.457122  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:07:14.457126  543705 custom_config.go:64] query custom config with name: gpu
E0321 05:07:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:07:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:07:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:07:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:07:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:07:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:07:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:07:23.410402  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:07:23.410422  543705 memory.go:184] no items to output this cycle
I0321 05:07:23.410432  543705 cpu.go:275] no items to output this cycle
I0321 05:07:27.869676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:07:27.872232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:07:27.872239  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003684c0 0xc000368500]
E0321 05:07:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:07:33.409788  543705 memory.go:184] no items to output this cycle
I0321 05:07:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 05:07:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:07:43.409790  543705 memory.go:191] Add success.
I0321 05:07:43.409799  543705 cpu.go:282] Add success.
I0321 05:07:43.419891  543705 net.go:648] Add success.
I0321 05:07:43.422776  543705 net.go:770] primary dev: ETH0
I0321 05:07:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:07:43.422801  543705 net.go:698] Add success.
I0321 05:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:07:53.410275  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:07:53.410293  543705 memory.go:184] no items to output this cycle
I0321 05:07:53.410307  543705 cpu.go:275] no items to output this cycle
E0321 05:08:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:08:03.409778  543705 memory.go:184] no items to output this cycle
I0321 05:08:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 05:08:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:08:13.409810  543705 memory.go:191] Add success.
I0321 05:08:13.409819  543705 cpu.go:282] Add success.
W0321 05:08:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:08:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:08:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:08:13.420131  543705 net.go:648] Add success.
I0321 05:08:13.422773  543705 net.go:770] primary dev: ETH0
I0321 05:08:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:08:13.422808  543705 net.go:698] Add success.
I0321 05:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:08:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:08:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 05:08:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:08:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 05:08:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:08:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:08:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:08:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:08:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:08:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:08:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:08:23.409803  543705 memory.go:184] no items to output this cycle
I0321 05:08:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 05:08:27.873680  543705 disk_info.go:125] begin check local disk info of client
I0321 05:08:27.876234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:08:27.876241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368000 0xc000368040]
E0321 05:08:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:08:33.409812  543705 memory.go:184] no items to output this cycle
I0321 05:08:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 05:08:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:08:43.409785  543705 memory.go:191] Add success.
I0321 05:08:43.409820  543705 cpu.go:282] Add success.
I0321 05:08:43.420408  543705 net.go:648] Add success.
I0321 05:08:43.423271  543705 net.go:770] primary dev: ETH0
I0321 05:08:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:08:43.423297  543705 net.go:698] Add success.
I0321 05:08:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:08:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:08:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:08:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 05:08:53.409783  543705 memory.go:184] no items to output this cycle
E0321 05:09:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:09:03.409803  543705 memory.go:184] no items to output this cycle
I0321 05:09:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:09:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:09:13.409815  543705 memory.go:191] Add success.
I0321 05:09:13.409822  543705 cpu.go:282] Add success.
W0321 05:09:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:09:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:09:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:09:13.420158  543705 net.go:648] Add success.
I0321 05:09:13.422708  543705 net.go:770] primary dev: ETH0
I0321 05:09:13.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:09:13.422738  543705 net.go:698] Add success.
I0321 05:09:13.463810  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dfbebf24-5e6e-4cb6-a08f-d0e443eea7bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:09:13.463854  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:09:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:09:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:09:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 05:09:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:09:14.456687  543705 disk_worker.go:494] system disk:vda1
I0321 05:09:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:09:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:09:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:09:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:09:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:09:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:09:23.409776  543705 memory.go:184] no items to output this cycle
I0321 05:09:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 05:09:27.877669  543705 disk_info.go:125] begin check local disk info of client
I0321 05:09:27.880230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:09:27.880237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b04c0 0xc0003b0500]
E0321 05:09:33.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:09:33.409925  543705 memory.go:184] no items to output this cycle
I0321 05:09:33.409997  543705 cpu.go:275] no items to output this cycle
I0321 05:09:38.741741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:09:38.741747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:09:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:09:43.410658  543705 memory.go:191] Add success.
I0321 05:09:43.409834  543705 cpu.go:282] Add success.
I0321 05:09:43.420373  543705 net.go:648] Add success.
I0321 05:09:43.423076  543705 net.go:770] primary dev: ETH0
I0321 05:09:43.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:09:43.423103  543705 net.go:698] Add success.
I0321 05:09:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:09:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:09:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:09:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:09:53.409768  543705 memory.go:184] no items to output this cycle
I0321 05:09:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 05:10:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:10:03.409791  543705 memory.go:184] no items to output this cycle
I0321 05:10:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 05:10:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:10:13.409798  543705 memory.go:191] Add success.
I0321 05:10:13.409799  543705 cpu.go:282] Add success.
W0321 05:10:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:10:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:10:13.420303  543705 net.go:648] Add success.
I0321 05:10:13.423481  543705 net.go:770] primary dev: ETH0
I0321 05:10:13.423501  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:10:13.423515  543705 net.go:698] Add success.
I0321 05:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:10:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:10:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 05:10:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:10:14.456505  543705 disk_worker.go:494] system disk:vda1
I0321 05:10:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:10:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:10:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:10:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:10:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:10:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:10:23.409781  543705 memory.go:184] no items to output this cycle
I0321 05:10:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 05:10:27.881676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:10:27.884308  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:10:27.884315  543705 disk_info.go:196] parse disk info done, disk is : [0xc000341900 0xc000341940]
E0321 05:10:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:10:33.409782  543705 memory.go:184] no items to output this cycle
I0321 05:10:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 05:10:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:10:43.409898  543705 memory.go:191] Add success.
I0321 05:10:43.409929  543705 cpu.go:282] Add success.
I0321 05:10:43.419717  543705 net.go:648] Add success.
I0321 05:10:43.422398  543705 net.go:770] primary dev: ETH0
I0321 05:10:43.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:10:43.422422  543705 net.go:698] Add success.
I0321 05:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:10:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:10:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:10:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:10:53.409795  543705 memory.go:184] no items to output this cycle
I0321 05:10:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 05:11:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:11:03.409813  543705 memory.go:184] no items to output this cycle
I0321 05:11:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 05:11:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:11:13.409797  543705 cpu.go:282] Add success.
I0321 05:11:13.409805  543705 memory.go:191] Add success.
W0321 05:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:11:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:11:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:11:13.420058  543705 net.go:648] Add success.
I0321 05:11:13.422851  543705 net.go:770] primary dev: ETH0
I0321 05:11:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:11:13.422875  543705 net.go:698] Add success.
I0321 05:11:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:11:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:11:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 05:11:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:11:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 05:11:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:11:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:11:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:11:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:11:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:11:23.409800  543705 memory.go:184] no items to output this cycle
I0321 05:11:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 05:11:27.885678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:11:27.888278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:11:27.888285  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c56c0 0xc0000c5700]
E0321 05:11:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:11:33.409784  543705 memory.go:184] no items to output this cycle
I0321 05:11:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 05:11:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:11:43.409815  543705 memory.go:191] Add success.
I0321 05:11:43.409823  543705 cpu.go:282] Add success.
I0321 05:11:43.420111  543705 net.go:648] Add success.
I0321 05:11:43.422808  543705 net.go:770] primary dev: ETH0
I0321 05:11:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:11:43.422833  543705 net.go:698] Add success.
I0321 05:11:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:11:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:11:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:11:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:11:53.409776  543705 cpu.go:275] no items to output this cycle
I0321 05:11:53.409787  543705 memory.go:184] no items to output this cycle
E0321 05:12:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:12:03.409801  543705 memory.go:184] no items to output this cycle
I0321 05:12:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 05:12:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:12:13.409820  543705 memory.go:191] Add success.
I0321 05:12:13.409824  543705 cpu.go:282] Add success.
W0321 05:12:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:12:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:12:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:12:13.420208  543705 net.go:648] Add success.
I0321 05:12:13.422890  543705 net.go:770] primary dev: ETH0
I0321 05:12:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:12:13.422915  543705 net.go:698] Add success.
I0321 05:12:13.469195  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b9c8ac72-35c3-4da6-abd6-a014cc80110a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:12:13.469228  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 05:12:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:12:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 05:12:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:12:14.456824  543705 disk_worker.go:494] system disk:vda1
E0321 05:12:14.456840  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:12:14.456847  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:12:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:12:14.456864  543705 custom_config.go:64] query custom config with name: gpu
E0321 05:12:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:12:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:12:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:12:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:12:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:12:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:12:16.472663  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:12:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:12:23.409769  543705 memory.go:184] no items to output this cycle
I0321 05:12:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 05:12:27.889675  543705 disk_info.go:125] begin check local disk info of client
I0321 05:12:27.892239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:12:27.892246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c53c0 0xc0000c5400]
E0321 05:12:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:12:33.409809  543705 memory.go:184] no items to output this cycle
I0321 05:12:33.409823  543705 cpu.go:275] no items to output this cycle
I0321 05:12:38.741889  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:12:38.741896  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:12:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:12:43.410628  543705 memory.go:191] Add success.
I0321 05:12:43.409803  543705 cpu.go:282] Add success.
I0321 05:12:43.420326  543705 net.go:648] Add success.
I0321 05:12:43.423092  543705 net.go:770] primary dev: ETH0
I0321 05:12:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:12:43.423118  543705 net.go:698] Add success.
I0321 05:12:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:12:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:12:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:12:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:12:53.409795  543705 memory.go:184] no items to output this cycle
I0321 05:12:53.409900  543705 cpu.go:275] no items to output this cycle
E0321 05:13:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:13:03.409809  543705 memory.go:184] no items to output this cycle
I0321 05:13:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 05:13:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:13:13.409822  543705 memory.go:191] Add success.
I0321 05:13:13.409823  543705 cpu.go:282] Add success.
W0321 05:13:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:13:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:13:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:13:13.420226  543705 net.go:648] Add success.
I0321 05:13:13.422734  543705 net.go:770] primary dev: ETH0
I0321 05:13:13.422746  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:13:13.422758  543705 net.go:698] Add success.
I0321 05:13:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:13:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:13:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 05:13:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:13:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 05:13:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:13:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:13:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:13:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:13:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:13:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:13:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:13:23.409794  543705 memory.go:184] no items to output this cycle
I0321 05:13:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 05:13:27.893676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:13:27.896314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:13:27.896321  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353400 0xc000353440]
E0321 05:13:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:13:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 05:13:33.409787  543705 memory.go:184] no items to output this cycle
E0321 05:13:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:13:43.409788  543705 memory.go:191] Add success.
I0321 05:13:43.409809  543705 cpu.go:282] Add success.
I0321 05:13:43.419995  543705 net.go:648] Add success.
I0321 05:13:43.422647  543705 net.go:770] primary dev: ETH0
I0321 05:13:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:13:43.422673  543705 net.go:698] Add success.
I0321 05:13:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:13:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:13:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:13:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:13:53.409773  543705 memory.go:184] no items to output this cycle
I0321 05:13:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 05:14:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:14:03.409814  543705 memory.go:184] no items to output this cycle
I0321 05:14:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 05:14:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:14:13.409795  543705 memory.go:191] Add success.
I0321 05:14:13.409809  543705 cpu.go:282] Add success.
W0321 05:14:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:14:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:14:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:14:13.420151  543705 net.go:648] Add success.
I0321 05:14:13.422962  543705 net.go:770] primary dev: ETH0
I0321 05:14:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:14:13.422987  543705 net.go:698] Add success.
I0321 05:14:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:14:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:14:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 05:14:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:14:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 05:14:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:14:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:14:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:14:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:14:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:14:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:14:23.409797  543705 memory.go:184] no items to output this cycle
I0321 05:14:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 05:14:27.897679  543705 disk_info.go:125] begin check local disk info of client
I0321 05:14:27.900252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:14:27.900259  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493140 0xc000493180]
E0321 05:14:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:14:33.409813  543705 memory.go:184] no items to output this cycle
I0321 05:14:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 05:14:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:14:43.409788  543705 memory.go:191] Add success.
I0321 05:14:43.409813  543705 cpu.go:282] Add success.
I0321 05:14:43.420050  543705 net.go:648] Add success.
I0321 05:14:43.422719  543705 net.go:770] primary dev: ETH0
I0321 05:14:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:14:43.422744  543705 net.go:698] Add success.
I0321 05:14:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:14:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:14:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:14:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:14:53.409771  543705 memory.go:184] no items to output this cycle
I0321 05:14:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 05:15:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:15:03.409772  543705 memory.go:184] no items to output this cycle
I0321 05:15:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 05:15:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:15:13.409794  543705 memory.go:191] Add success.
I0321 05:15:13.409795  543705 cpu.go:282] Add success.
W0321 05:15:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:15:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:15:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:15:13.420185  543705 net.go:648] Add success.
I0321 05:15:13.422782  543705 net.go:770] primary dev: ETH0
I0321 05:15:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:15:13.422806  543705 net.go:698] Add success.
I0321 05:15:13.469213  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bc5157d5-b3ee-44fb-89a4-efc4d6a7f208","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:15:13.469245  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:15:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:15:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:15:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 05:15:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:15:14.456726  543705 disk_worker.go:494] system disk:vda1
I0321 05:15:14.456757  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:15:15.455610  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:15:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:15:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:15:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:15:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:15:23.409794  543705 memory.go:184] no items to output this cycle
I0321 05:15:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 05:15:27.901678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:15:27.904319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:15:27.904326  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4600 0xc0000c4640]
E0321 05:15:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:15:33.409803  543705 memory.go:184] no items to output this cycle
I0321 05:15:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 05:15:38.742047  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:15:38.742053  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:15:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:15:43.410562  543705 memory.go:191] Add success.
I0321 05:15:43.409816  543705 cpu.go:282] Add success.
I0321 05:15:43.420337  543705 net.go:648] Add success.
I0321 05:15:43.422800  543705 net.go:770] primary dev: ETH0
I0321 05:15:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:15:43.422827  543705 net.go:698] Add success.
I0321 05:15:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:15:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:15:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:15:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:15:53.409786  543705 memory.go:184] no items to output this cycle
I0321 05:15:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 05:16:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:16:03.409788  543705 memory.go:184] no items to output this cycle
I0321 05:16:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 05:16:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:16:13.409814  543705 memory.go:191] Add success.
I0321 05:16:13.409821  543705 cpu.go:282] Add success.
W0321 05:16:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:16:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:16:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:16:13.420225  543705 net.go:648] Add success.
I0321 05:16:13.422651  543705 net.go:770] primary dev: ETH0
I0321 05:16:13.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:16:13.422679  543705 net.go:698] Add success.
I0321 05:16:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:16:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:16:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 05:16:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:16:14.456603  543705 disk_worker.go:494] system disk:vda1
I0321 05:16:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:16:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:16:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:16:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:16:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:16:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:16:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:16:23.409776  543705 memory.go:184] no items to output this cycle
I0321 05:16:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 05:16:27.905678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:16:27.908232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:16:27.908239  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492c40 0xc000492c80]
E0321 05:16:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:16:33.409784  543705 memory.go:184] no items to output this cycle
I0321 05:16:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 05:16:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:16:43.409810  543705 memory.go:191] Add success.
I0321 05:16:43.409823  543705 cpu.go:282] Add success.
I0321 05:16:43.419855  543705 net.go:648] Add success.
I0321 05:16:43.422690  543705 net.go:770] primary dev: ETH0
I0321 05:16:43.422704  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:16:43.422717  543705 net.go:698] Add success.
I0321 05:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:16:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:16:53.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:16:53.409888  543705 memory.go:184] no items to output this cycle
I0321 05:16:53.409905  543705 cpu.go:275] no items to output this cycle
E0321 05:17:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:17:03.409794  543705 cpu.go:275] no items to output this cycle
I0321 05:17:03.409797  543705 memory.go:184] no items to output this cycle
E0321 05:17:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:17:13.409788  543705 memory.go:191] Add success.
I0321 05:17:13.409789  543705 cpu.go:282] Add success.
W0321 05:17:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:17:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:17:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:17:13.420179  543705 net.go:648] Add success.
I0321 05:17:13.422888  543705 net.go:770] primary dev: ETH0
I0321 05:17:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:17:13.422912  543705 net.go:698] Add success.
I0321 05:17:13.453439  543705 event_worker.go:152] Polling the log file for events...
W0321 05:17:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:17:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 05:17:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:17:14.455906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:17:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:17:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:17:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 05:17:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:17:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:17:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:17:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:17:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:17:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:17:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:17:16.472318  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:17:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:17:23.409776  543705 memory.go:184] no items to output this cycle
I0321 05:17:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 05:17:27.909677  543705 disk_info.go:125] begin check local disk info of client
I0321 05:17:27.912285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:17:27.912292  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c58c0 0xc0000c5900]
E0321 05:17:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:17:33.409782  543705 memory.go:184] no items to output this cycle
I0321 05:17:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 05:17:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:17:43.409819  543705 memory.go:191] Add success.
I0321 05:17:43.409829  543705 cpu.go:282] Add success.
I0321 05:17:43.420332  543705 net.go:648] Add success.
I0321 05:17:43.423144  543705 net.go:770] primary dev: ETH0
I0321 05:17:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:17:43.423168  543705 net.go:698] Add success.
I0321 05:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:17:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:17:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:17:53.409779  543705 cpu.go:275] no items to output this cycle
I0321 05:17:53.409782  543705 memory.go:184] no items to output this cycle
E0321 05:18:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:18:03.409783  543705 memory.go:184] no items to output this cycle
I0321 05:18:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 05:18:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:18:13.409819  543705 memory.go:191] Add success.
I0321 05:18:13.409845  543705 cpu.go:282] Add success.
W0321 05:18:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:18:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:18:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:18:13.420644  543705 net.go:648] Add success.
I0321 05:18:13.423519  543705 net.go:770] primary dev: ETH0
I0321 05:18:13.423538  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:18:13.423552  543705 net.go:698] Add success.
I0321 05:18:13.469411  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7034886d-1ec8-49ed-aeb4-4c574d08533c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:18:13.469443  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:18:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:18:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:18:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 05:18:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:18:14.456737  543705 disk_worker.go:494] system disk:vda1
I0321 05:18:14.456767  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:18:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:18:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:18:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:18:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:18:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:18:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:18:23.409801  543705 memory.go:184] no items to output this cycle
I0321 05:18:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 05:18:27.913676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:18:27.916239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:18:27.916246  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029eac0 0xc00029eb00]
E0321 05:18:33.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:18:33.409896  543705 memory.go:184] no items to output this cycle
I0321 05:18:33.409935  543705 cpu.go:275] no items to output this cycle
I0321 05:18:38.743418  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:18:38.743424  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:18:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:18:43.410682  543705 memory.go:191] Add success.
I0321 05:18:43.409807  543705 cpu.go:282] Add success.
I0321 05:18:43.420397  543705 net.go:648] Add success.
I0321 05:18:43.423376  543705 net.go:770] primary dev: ETH0
I0321 05:18:43.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:18:43.423406  543705 net.go:698] Add success.
I0321 05:18:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:18:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:18:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:18:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:18:53.409776  543705 memory.go:184] no items to output this cycle
I0321 05:18:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 05:19:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:19:03.409806  543705 memory.go:184] no items to output this cycle
I0321 05:19:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:19:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:19:13.409823  543705 memory.go:191] Add success.
I0321 05:19:13.409833  543705 cpu.go:282] Add success.
W0321 05:19:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:19:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:19:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:19:13.420248  543705 net.go:648] Add success.
I0321 05:19:13.423133  543705 net.go:770] primary dev: ETH0
I0321 05:19:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:19:13.423160  543705 net.go:698] Add success.
I0321 05:19:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:19:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:19:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 05:19:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:19:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 05:19:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:19:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:19:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:19:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:19:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:19:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:19:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:19:23.409859  543705 memory.go:184] no items to output this cycle
I0321 05:19:23.409924  543705 cpu.go:275] no items to output this cycle
I0321 05:19:27.917681  543705 disk_info.go:125] begin check local disk info of client
I0321 05:19:27.920221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:19:27.920229  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033e000 0xc00033e040]
E0321 05:19:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:19:33.409801  543705 memory.go:184] no items to output this cycle
I0321 05:19:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:19:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:19:43.409778  543705 memory.go:191] Add success.
I0321 05:19:43.409785  543705 cpu.go:282] Add success.
I0321 05:19:43.419996  543705 net.go:648] Add success.
I0321 05:19:43.421018  543705 net.go:770] primary dev: ETH0
I0321 05:19:43.421036  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:19:43.421051  543705 net.go:698] Add success.
I0321 05:19:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:19:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:19:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:19:53.409787  543705 memory.go:184] no items to output this cycle
I0321 05:19:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 05:20:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:20:03.409787  543705 memory.go:184] no items to output this cycle
I0321 05:20:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 05:20:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:20:13.409799  543705 memory.go:191] Add success.
I0321 05:20:13.409811  543705 cpu.go:282] Add success.
W0321 05:20:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:20:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:20:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:20:13.420324  543705 net.go:648] Add success.
I0321 05:20:13.422930  543705 net.go:770] primary dev: ETH0
I0321 05:20:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:20:13.422956  543705 net.go:698] Add success.
I0321 05:20:14.454432  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:20:14.454586  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:20:14.454650  543705 disk_worker.go:708] disk space is not compliant
W0321 05:20:14.454652  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:20:14.455960  543705 disk_worker.go:494] system disk:vda1
I0321 05:20:14.456004  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:20:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:20:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:20:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:20:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:20:23.409777  543705 memory.go:184] no items to output this cycle
I0321 05:20:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 05:20:27.921669  543705 disk_info.go:125] begin check local disk info of client
I0321 05:20:27.924223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:20:27.924229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265b80 0xc000265bc0]
I0321 05:20:33.409929  543705 cpu.go:275] no items to output this cycle
E0321 05:20:33.410008  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:20:33.410030  543705 memory.go:184] no items to output this cycle
E0321 05:20:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:20:43.409784  543705 memory.go:191] Add success.
I0321 05:20:43.409813  543705 cpu.go:282] Add success.
I0321 05:20:43.419956  543705 net.go:648] Add success.
I0321 05:20:43.422514  543705 net.go:770] primary dev: ETH0
I0321 05:20:43.422528  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:20:43.422540  543705 net.go:698] Add success.
I0321 05:20:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:20:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:20:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:20:53.410409  543705 cpu.go:275] no items to output this cycle
I0321 05:20:53.410410  543705 memory.go:184] no items to output this cycle
E0321 05:21:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:21:03.409781  543705 memory.go:184] no items to output this cycle
I0321 05:21:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 05:21:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:21:13.409818  543705 memory.go:191] Add success.
I0321 05:21:13.409830  543705 cpu.go:282] Add success.
W0321 05:21:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:21:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:21:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:21:13.420124  543705 net.go:648] Add success.
I0321 05:21:13.422814  543705 net.go:770] primary dev: ETH0
I0321 05:21:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:21:13.422840  543705 net.go:698] Add success.
I0321 05:21:13.463799  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf05617f-b844-43b4-b9c0-77d214c3863b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:21:13.463834  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:21:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:21:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:21:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 05:21:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:21:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 05:21:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:21:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:21:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:21:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:21:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:21:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:21:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:21:23.409779  543705 memory.go:184] no items to output this cycle
I0321 05:21:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 05:21:27.925687  543705 disk_info.go:125] begin check local disk info of client
I0321 05:21:27.928254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:21:27.928261  543705 disk_info.go:196] parse disk info done, disk is : [0xc000251440 0xc000251480]
E0321 05:21:33.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:21:33.409883  543705 memory.go:184] no items to output this cycle
I0321 05:21:33.409974  543705 cpu.go:275] no items to output this cycle
I0321 05:21:38.744431  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:21:38.744438  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:21:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:21:43.410659  543705 memory.go:191] Add success.
I0321 05:21:43.409808  543705 cpu.go:282] Add success.
I0321 05:21:43.420535  543705 net.go:648] Add success.
I0321 05:21:43.423220  543705 net.go:770] primary dev: ETH0
I0321 05:21:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:21:43.423249  543705 net.go:698] Add success.
I0321 05:21:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:21:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:21:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:21:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:21:53.409805  543705 memory.go:184] no items to output this cycle
I0321 05:21:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:22:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:22:03.409776  543705 memory.go:184] no items to output this cycle
I0321 05:22:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 05:22:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:22:13.409810  543705 memory.go:191] Add success.
I0321 05:22:13.409815  543705 cpu.go:282] Add success.
W0321 05:22:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:22:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:22:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:22:13.420111  543705 net.go:648] Add success.
I0321 05:22:13.422727  543705 net.go:770] primary dev: ETH0
I0321 05:22:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:22:13.422751  543705 net.go:698] Add success.
W0321 05:22:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:22:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 05:22:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:22:14.456768  543705 disk_worker.go:494] system disk:vda1
I0321 05:22:14.456806  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:22:14.457112  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:22:14.457119  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:22:14.457124  543705 custom_config.go:64] query custom config with name: gpu
E0321 05:22:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:22:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:22:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:22:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:22:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:22:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:22:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:22:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:22:23.409782  543705 memory.go:184] no items to output this cycle
I0321 05:22:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 05:22:27.929680  543705 disk_info.go:125] begin check local disk info of client
I0321 05:22:27.932313  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:22:27.932332  543705 disk_info.go:196] parse disk info done, disk is : [0xc000341480 0xc0003414c0]
E0321 05:22:33.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:22:33.409921  543705 cpu.go:275] no items to output this cycle
I0321 05:22:33.409935  543705 memory.go:184] no items to output this cycle
E0321 05:22:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:22:43.409788  543705 memory.go:191] Add success.
I0321 05:22:43.409788  543705 cpu.go:282] Add success.
I0321 05:22:43.419980  543705 net.go:648] Add success.
I0321 05:22:43.422754  543705 net.go:770] primary dev: ETH0
I0321 05:22:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:22:43.422783  543705 net.go:698] Add success.
I0321 05:22:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:22:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:22:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:22:53.409779  543705 memory.go:184] no items to output this cycle
I0321 05:22:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 05:23:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:23:03.409778  543705 memory.go:184] no items to output this cycle
I0321 05:23:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 05:23:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:23:13.409808  543705 memory.go:191] Add success.
I0321 05:23:13.409826  543705 cpu.go:282] Add success.
W0321 05:23:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:23:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:23:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:23:13.420047  543705 net.go:648] Add success.
I0321 05:23:13.422425  543705 net.go:770] primary dev: ETH0
I0321 05:23:13.422438  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:23:13.422449  543705 net.go:698] Add success.
I0321 05:23:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:23:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:23:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 05:23:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:23:14.456608  543705 disk_worker.go:494] system disk:vda1
I0321 05:23:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:23:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:23:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:23:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:23:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:23:23.409798  543705 memory.go:184] no items to output this cycle
I0321 05:23:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 05:23:27.933677  543705 disk_info.go:125] begin check local disk info of client
I0321 05:23:27.936222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:23:27.936229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000247300 0xc000247340]
E0321 05:23:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:23:33.409787  543705 memory.go:184] no items to output this cycle
I0321 05:23:33.409850  543705 cpu.go:275] no items to output this cycle
E0321 05:23:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:23:43.409812  543705 memory.go:191] Add success.
I0321 05:23:43.409821  543705 cpu.go:282] Add success.
I0321 05:23:43.419997  543705 net.go:648] Add success.
I0321 05:23:43.422793  543705 net.go:770] primary dev: ETH0
I0321 05:23:43.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:23:43.422818  543705 net.go:698] Add success.
I0321 05:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:23:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:23:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:23:53.409781  543705 memory.go:184] no items to output this cycle
I0321 05:23:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 05:24:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:24:03.409782  543705 memory.go:184] no items to output this cycle
I0321 05:24:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 05:24:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:24:13.409806  543705 memory.go:191] Add success.
I0321 05:24:13.409811  543705 cpu.go:282] Add success.
W0321 05:24:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:24:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:24:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:24:13.420052  543705 net.go:648] Add success.
I0321 05:24:13.422566  543705 net.go:770] primary dev: ETH0
I0321 05:24:13.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:24:13.422592  543705 net.go:698] Add success.
I0321 05:24:13.468287  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"951fe4e3-f93a-48b0-8954-5710e81d0924","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:24:13.468331  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:24:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:24:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:24:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 05:24:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:24:14.456720  543705 disk_worker.go:494] system disk:vda1
I0321 05:24:14.456751  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:24:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:24:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:24:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:24:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:24:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:24:23.409791  543705 memory.go:184] no items to output this cycle
I0321 05:24:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 05:24:27.937678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:24:27.940245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:24:27.940253  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024b1c0 0xc00024b200]
E0321 05:24:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:24:33.409891  543705 memory.go:184] no items to output this cycle
I0321 05:24:33.409938  543705 cpu.go:275] no items to output this cycle
I0321 05:24:38.745430  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:24:38.745436  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:24:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:24:43.410599  543705 memory.go:191] Add success.
I0321 05:24:43.409807  543705 cpu.go:282] Add success.
I0321 05:24:43.420307  543705 net.go:648] Add success.
I0321 05:24:43.422782  543705 net.go:770] primary dev: ETH0
I0321 05:24:43.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:24:43.422808  543705 net.go:698] Add success.
I0321 05:24:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:24:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:24:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:24:53.409781  543705 memory.go:184] no items to output this cycle
I0321 05:24:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 05:25:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:25:03.409775  543705 memory.go:184] no items to output this cycle
I0321 05:25:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 05:25:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:25:13.409807  543705 memory.go:191] Add success.
I0321 05:25:13.409811  543705 cpu.go:282] Add success.
W0321 05:25:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:25:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:25:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:25:13.420214  543705 net.go:648] Add success.
I0321 05:25:13.422783  543705 net.go:770] primary dev: ETH0
I0321 05:25:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:25:13.422814  543705 net.go:698] Add success.
I0321 05:25:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:25:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:25:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 05:25:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:25:14.456546  543705 disk_worker.go:494] system disk:vda1
I0321 05:25:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:25:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:25:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:25:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:25:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:25:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:25:23.409794  543705 memory.go:184] no items to output this cycle
I0321 05:25:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 05:25:27.941678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:25:27.944305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:25:27.944313  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266f80 0xc000266fc0]
E0321 05:25:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:25:33.409780  543705 memory.go:184] no items to output this cycle
I0321 05:25:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 05:25:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:25:43.409789  543705 memory.go:191] Add success.
I0321 05:25:43.409822  543705 cpu.go:282] Add success.
I0321 05:25:43.419897  543705 net.go:648] Add success.
I0321 05:25:43.422558  543705 net.go:770] primary dev: ETH0
I0321 05:25:43.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:25:43.422583  543705 net.go:698] Add success.
I0321 05:25:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:25:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:25:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:25:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:25:53.409796  543705 memory.go:184] no items to output this cycle
I0321 05:25:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 05:26:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:26:03.409771  543705 memory.go:184] no items to output this cycle
I0321 05:26:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 05:26:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:26:13.409787  543705 memory.go:191] Add success.
I0321 05:26:13.409791  543705 cpu.go:282] Add success.
W0321 05:26:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:26:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:26:13.420057  543705 net.go:648] Add success.
I0321 05:26:13.422593  543705 net.go:770] primary dev: ETH0
I0321 05:26:13.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:26:13.422624  543705 net.go:698] Add success.
I0321 05:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:26:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:26:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 05:26:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:26:14.456496  543705 disk_worker.go:494] system disk:vda1
I0321 05:26:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:26:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:26:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:26:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:26:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:26:23.410258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:26:23.410270  543705 cpu.go:275] no items to output this cycle
I0321 05:26:23.410272  543705 memory.go:184] no items to output this cycle
I0321 05:26:27.945679  543705 disk_info.go:125] begin check local disk info of client
I0321 05:26:27.948237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:26:27.948243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d32c0 0xc0003d3300]
E0321 05:26:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:26:33.409790  543705 memory.go:184] no items to output this cycle
I0321 05:26:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 05:26:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:26:43.409879  543705 memory.go:191] Add success.
I0321 05:26:43.409927  543705 cpu.go:282] Add success.
I0321 05:26:43.419719  543705 net.go:648] Add success.
I0321 05:26:43.422425  543705 net.go:770] primary dev: ETH0
I0321 05:26:43.422440  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:26:43.422454  543705 net.go:698] Add success.
I0321 05:26:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:26:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:26:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:26:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:26:53.409780  543705 memory.go:184] no items to output this cycle
I0321 05:26:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 05:27:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:27:03.409781  543705 memory.go:184] no items to output this cycle
I0321 05:27:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 05:27:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:27:13.409793  543705 memory.go:191] Add success.
I0321 05:27:13.409792  543705 cpu.go:282] Add success.
W0321 05:27:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:27:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:27:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:27:13.420543  543705 net.go:648] Add success.
I0321 05:27:13.428990  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 05:27:13.429075  543705 net.go:770] primary dev: ETH0
I0321 05:27:13.429087  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:27:13.429099  543705 net.go:698] Add success.
I0321 05:27:13.453652  543705 event_worker.go:152] Polling the log file for events...
I0321 05:27:13.470738  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4d9f9952-bf5a-42a6-89c0-38ab8acdd22c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:27:13.470770  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 05:27:14.455238  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:27:14.455252  543705 disk_worker.go:708] disk space is not compliant
W0321 05:27:14.455256  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:27:14.456083  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:27:14.456093  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:27:14.456099  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:27:14.456930  543705 disk_worker.go:494] system disk:vda1
I0321 05:27:14.456962  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:27:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:27:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:27:16.457899  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:27:16.457899  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:27:16.457955  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:27:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:27:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:27:23.409797  543705 memory.go:184] no items to output this cycle
I0321 05:27:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 05:27:27.949677  543705 disk_info.go:125] begin check local disk info of client
I0321 05:27:27.952325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:27:27.952333  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492e40 0xc000492e80]
E0321 05:27:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:27:33.409800  543705 memory.go:184] no items to output this cycle
I0321 05:27:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 05:27:38.745733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:27:38.745739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:27:43.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:27:43.410682  543705 memory.go:191] Add success.
I0321 05:27:43.409946  543705 cpu.go:282] Add success.
I0321 05:27:43.419739  543705 net.go:648] Add success.
I0321 05:27:43.422394  543705 net.go:770] primary dev: ETH0
I0321 05:27:43.422408  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:27:43.422420  543705 net.go:698] Add success.
I0321 05:27:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:27:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:27:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:27:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:27:53.409805  543705 memory.go:184] no items to output this cycle
I0321 05:27:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 05:28:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:28:03.409796  543705 memory.go:184] no items to output this cycle
I0321 05:28:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 05:28:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:28:13.409803  543705 memory.go:191] Add success.
I0321 05:28:13.409826  543705 cpu.go:282] Add success.
W0321 05:28:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:28:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:28:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:28:13.420294  543705 net.go:648] Add success.
I0321 05:28:13.422875  543705 net.go:770] primary dev: ETH0
I0321 05:28:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:28:13.422906  543705 net.go:698] Add success.
I0321 05:28:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:28:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:28:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 05:28:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:28:14.456558  543705 disk_worker.go:494] system disk:vda1
I0321 05:28:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:28:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:28:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:28:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:28:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:28:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:28:23.409777  543705 memory.go:184] no items to output this cycle
I0321 05:28:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 05:28:27.953679  543705 disk_info.go:125] begin check local disk info of client
I0321 05:28:27.956319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:28:27.956325  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb740 0xc0001fb780]
E0321 05:28:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:28:33.409781  543705 memory.go:184] no items to output this cycle
I0321 05:28:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 05:28:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:28:43.409798  543705 memory.go:191] Add success.
I0321 05:28:43.409815  543705 cpu.go:282] Add success.
I0321 05:28:43.420151  543705 net.go:648] Add success.
I0321 05:28:43.423145  543705 net.go:770] primary dev: ETH0
I0321 05:28:43.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:28:43.423171  543705 net.go:698] Add success.
I0321 05:28:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:28:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:28:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:28:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:28:53.409783  543705 memory.go:184] no items to output this cycle
I0321 05:28:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 05:29:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:29:03.409802  543705 memory.go:184] no items to output this cycle
I0321 05:29:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 05:29:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:29:13.409781  543705 memory.go:191] Add success.
I0321 05:29:13.409803  543705 cpu.go:282] Add success.
W0321 05:29:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:29:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:29:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:29:13.420239  543705 net.go:648] Add success.
I0321 05:29:13.422891  543705 net.go:770] primary dev: ETH0
I0321 05:29:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:29:13.422915  543705 net.go:698] Add success.
I0321 05:29:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:29:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:29:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 05:29:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:29:14.456475  543705 disk_worker.go:494] system disk:vda1
I0321 05:29:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:29:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:29:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:29:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:29:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:29:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:29:23.409769  543705 memory.go:184] no items to output this cycle
I0321 05:29:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 05:29:27.957675  543705 disk_info.go:125] begin check local disk info of client
I0321 05:29:27.960228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:29:27.960235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3640 0xc0003d3680]
E0321 05:29:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:29:33.409804  543705 memory.go:184] no items to output this cycle
I0321 05:29:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 05:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:29:43.409788  543705 memory.go:191] Add success.
I0321 05:29:43.409803  543705 cpu.go:282] Add success.
I0321 05:29:43.419905  543705 net.go:648] Add success.
I0321 05:29:43.422873  543705 net.go:770] primary dev: ETH0
I0321 05:29:43.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:29:43.422903  543705 net.go:698] Add success.
I0321 05:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:29:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:29:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:29:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:29:53.409778  543705 memory.go:184] no items to output this cycle
I0321 05:29:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 05:30:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:30:03.409762  543705 memory.go:184] no items to output this cycle
I0321 05:30:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 05:30:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:30:13.409813  543705 memory.go:191] Add success.
I0321 05:30:13.409815  543705 cpu.go:282] Add success.
W0321 05:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:30:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:30:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:30:13.420176  543705 net.go:648] Add success.
I0321 05:30:13.422983  543705 net.go:770] primary dev: ETH0
I0321 05:30:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:30:13.423008  543705 net.go:698] Add success.
I0321 05:30:13.468452  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bbfe29a3-7db5-42de-bc6e-610225676f45","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:30:13.468484  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:30:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:30:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:30:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 05:30:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:30:14.456487  543705 disk_worker.go:494] system disk:vda1
I0321 05:30:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:30:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:30:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:30:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:30:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:30:23.409795  543705 memory.go:184] no items to output this cycle
I0321 05:30:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 05:30:27.961682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:30:27.964278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:30:27.964286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2640 0xc0002b2680]
E0321 05:30:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:30:33.409807  543705 memory.go:184] no items to output this cycle
I0321 05:30:33.409821  543705 cpu.go:275] no items to output this cycle
I0321 05:30:38.745876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:30:38.745882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:30:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:30:43.410759  543705 memory.go:191] Add success.
I0321 05:30:43.409792  543705 cpu.go:282] Add success.
I0321 05:30:43.420472  543705 net.go:648] Add success.
I0321 05:30:43.423178  543705 net.go:770] primary dev: ETH0
I0321 05:30:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:30:43.423210  543705 net.go:698] Add success.
I0321 05:30:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:30:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:30:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:30:53.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:30:53.409884  543705 memory.go:184] no items to output this cycle
I0321 05:30:53.409995  543705 cpu.go:275] no items to output this cycle
E0321 05:31:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:31:03.409774  543705 memory.go:184] no items to output this cycle
I0321 05:31:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 05:31:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:31:13.409781  543705 memory.go:191] Add success.
I0321 05:31:13.409786  543705 cpu.go:282] Add success.
W0321 05:31:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:31:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:31:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:31:13.420214  543705 net.go:648] Add success.
I0321 05:31:13.423018  543705 net.go:770] primary dev: ETH0
I0321 05:31:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:31:13.423042  543705 net.go:698] Add success.
I0321 05:31:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:31:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:31:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 05:31:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:31:14.456603  543705 disk_worker.go:494] system disk:vda1
I0321 05:31:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:31:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:31:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:31:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:31:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:31:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:31:23.409780  543705 memory.go:184] no items to output this cycle
I0321 05:31:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 05:31:27.965678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:31:27.968224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:31:27.968231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369a40 0xc000369a80]
E0321 05:31:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:31:33.409799  543705 memory.go:184] no items to output this cycle
I0321 05:31:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 05:31:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:31:43.409789  543705 memory.go:191] Add success.
I0321 05:31:43.409816  543705 cpu.go:282] Add success.
I0321 05:31:43.419975  543705 net.go:648] Add success.
I0321 05:31:43.422526  543705 net.go:770] primary dev: ETH0
I0321 05:31:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:31:43.422556  543705 net.go:698] Add success.
I0321 05:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:31:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:31:53.410265  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:31:53.410280  543705 memory.go:184] no items to output this cycle
I0321 05:31:53.410283  543705 cpu.go:275] no items to output this cycle
E0321 05:32:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:32:03.409860  543705 memory.go:184] no items to output this cycle
I0321 05:32:03.409959  543705 cpu.go:275] no items to output this cycle
E0321 05:32:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:32:13.409811  543705 memory.go:191] Add success.
I0321 05:32:13.409821  543705 cpu.go:282] Add success.
W0321 05:32:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:32:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:32:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:32:13.420125  543705 net.go:648] Add success.
I0321 05:32:13.422885  543705 net.go:770] primary dev: ETH0
I0321 05:32:13.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:32:13.422915  543705 net.go:698] Add success.
W0321 05:32:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:32:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 05:32:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:32:14.456912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:32:14.456921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:32:14.456927  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:32:14.456995  543705 disk_worker.go:494] system disk:vda1
I0321 05:32:14.457023  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:32:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:32:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:32:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:32:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:32:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:32:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:32:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:32:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:32:23.409760  543705 memory.go:184] no items to output this cycle
I0321 05:32:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 05:32:27.969683  543705 disk_info.go:125] begin check local disk info of client
I0321 05:32:27.972247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:32:27.972264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e40 0xc0000c5e80]
E0321 05:32:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:32:33.409787  543705 memory.go:184] no items to output this cycle
I0321 05:32:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 05:32:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:32:43.409818  543705 memory.go:191] Add success.
I0321 05:32:43.409821  543705 cpu.go:282] Add success.
I0321 05:32:43.419882  543705 net.go:648] Add success.
I0321 05:32:43.423148  543705 net.go:770] primary dev: ETH0
I0321 05:32:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:32:43.423176  543705 net.go:698] Add success.
I0321 05:32:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:32:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:32:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:32:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:32:53.409767  543705 memory.go:184] no items to output this cycle
I0321 05:32:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 05:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:33:03.409778  543705 memory.go:184] no items to output this cycle
I0321 05:33:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 05:33:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:33:13.409791  543705 memory.go:191] Add success.
I0321 05:33:13.409808  543705 cpu.go:282] Add success.
W0321 05:33:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:33:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:33:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:33:13.420114  543705 net.go:648] Add success.
I0321 05:33:13.422636  543705 net.go:770] primary dev: ETH0
I0321 05:33:13.422649  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:33:13.422660  543705 net.go:698] Add success.
I0321 05:33:13.463671  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aed041fb-0409-4bc7-855a-7f517d8d811b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:33:13.463705  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:33:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:33:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:33:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0321 05:33:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:33:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 05:33:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:33:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:33:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:33:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:33:23.409775  543705 memory.go:184] no items to output this cycle
I0321 05:33:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 05:33:27.973682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:33:27.976277  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:33:27.976285  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369180 0xc0003691c0]
E0321 05:33:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:33:33.409779  543705 memory.go:184] no items to output this cycle
I0321 05:33:33.409788  543705 cpu.go:275] no items to output this cycle
I0321 05:33:38.746066  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:33:38.746073  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:33:43.410567  543705 memory.go:191] Add success.
I0321 05:33:43.409803  543705 cpu.go:282] Add success.
I0321 05:33:43.420699  543705 net.go:648] Add success.
I0321 05:33:43.423394  543705 net.go:770] primary dev: ETH0
I0321 05:33:43.423408  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:33:43.423423  543705 net.go:698] Add success.
I0321 05:33:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:33:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:33:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:33:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:33:53.409775  543705 memory.go:184] no items to output this cycle
I0321 05:33:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 05:34:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:34:03.409810  543705 memory.go:184] no items to output this cycle
I0321 05:34:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 05:34:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:34:13.409800  543705 memory.go:191] Add success.
I0321 05:34:13.409801  543705 cpu.go:282] Add success.
W0321 05:34:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:34:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:34:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:34:13.420264  543705 net.go:648] Add success.
I0321 05:34:13.423005  543705 net.go:770] primary dev: ETH0
I0321 05:34:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:34:13.423029  543705 net.go:698] Add success.
I0321 05:34:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:34:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:34:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0321 05:34:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:34:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 05:34:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:34:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:34:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:34:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:34:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:34:23.409790  543705 memory.go:184] no items to output this cycle
I0321 05:34:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 05:34:27.977683  543705 disk_info.go:125] begin check local disk info of client
I0321 05:34:27.980190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:34:27.980198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb40 0xc0001abb80]
E0321 05:34:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:34:33.409787  543705 memory.go:184] no items to output this cycle
I0321 05:34:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 05:34:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:34:43.409792  543705 memory.go:191] Add success.
I0321 05:34:43.409810  543705 cpu.go:282] Add success.
I0321 05:34:43.420085  543705 net.go:648] Add success.
I0321 05:34:43.422730  543705 net.go:770] primary dev: ETH0
I0321 05:34:43.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:34:43.422760  543705 net.go:698] Add success.
I0321 05:34:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:34:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:34:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:34:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:34:53.409792  543705 memory.go:184] no items to output this cycle
I0321 05:34:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 05:35:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:35:03.409774  543705 memory.go:184] no items to output this cycle
I0321 05:35:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 05:35:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:35:13.409815  543705 memory.go:191] Add success.
I0321 05:35:13.409823  543705 cpu.go:282] Add success.
W0321 05:35:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:35:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:35:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:35:13.420217  543705 net.go:648] Add success.
I0321 05:35:13.423124  543705 net.go:770] primary dev: ETH0
I0321 05:35:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:35:13.423153  543705 net.go:698] Add success.
I0321 05:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:35:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:35:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0321 05:35:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:35:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 05:35:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:35:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:35:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:35:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:35:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:35:23.409778  543705 memory.go:184] no items to output this cycle
I0321 05:35:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 05:35:27.981683  543705 disk_info.go:125] begin check local disk info of client
I0321 05:35:27.984241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:35:27.984248  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493000 0xc000493040]
E0321 05:35:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:35:33.409777  543705 memory.go:184] no items to output this cycle
I0321 05:35:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 05:35:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:35:43.409823  543705 memory.go:191] Add success.
I0321 05:35:43.409829  543705 cpu.go:282] Add success.
I0321 05:35:43.420004  543705 net.go:648] Add success.
I0321 05:35:43.422549  543705 net.go:770] primary dev: ETH0
I0321 05:35:43.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:35:43.422575  543705 net.go:698] Add success.
I0321 05:35:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:35:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:35:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:35:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:35:53.409774  543705 memory.go:184] no items to output this cycle
I0321 05:35:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 05:36:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:36:03.409775  543705 memory.go:184] no items to output this cycle
I0321 05:36:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 05:36:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:36:13.409779  543705 memory.go:191] Add success.
W0321 05:36:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 05:36:13.409808  543705 cpu.go:282] Add success.
W0321 05:36:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:36:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:36:13.420224  543705 net.go:648] Add success.
I0321 05:36:13.422727  543705 net.go:770] primary dev: ETH0
I0321 05:36:13.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:36:13.422751  543705 net.go:698] Add success.
I0321 05:36:13.538678  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c740dca-6dd7-4a7b-a2a8-218cab03f635","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:36:13.538711  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:36:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:36:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 05:36:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:36:14.456541  543705 disk_worker.go:494] system disk:vda1
I0321 05:36:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:36:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:36:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:36:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:36:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:36:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:36:23.409771  543705 memory.go:184] no items to output this cycle
I0321 05:36:23.409775  543705 cpu.go:275] no items to output this cycle
I0321 05:36:27.985684  543705 disk_info.go:125] begin check local disk info of client
I0321 05:36:27.988219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:36:27.988227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395340 0xc000395380]
E0321 05:36:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:36:33.409816  543705 memory.go:184] no items to output this cycle
I0321 05:36:33.409827  543705 cpu.go:275] no items to output this cycle
I0321 05:36:38.747428  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:36:38.747435  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:36:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:36:43.410617  543705 memory.go:191] Add success.
I0321 05:36:43.409794  543705 cpu.go:282] Add success.
I0321 05:36:43.420371  543705 net.go:648] Add success.
I0321 05:36:43.423193  543705 net.go:770] primary dev: ETH0
I0321 05:36:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:36:43.423218  543705 net.go:698] Add success.
I0321 05:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:36:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:36:53.410278  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:36:53.410298  543705 memory.go:184] no items to output this cycle
I0321 05:36:53.410310  543705 cpu.go:275] no items to output this cycle
E0321 05:37:03.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:37:03.409925  543705 cpu.go:275] no items to output this cycle
I0321 05:37:03.409996  543705 memory.go:184] no items to output this cycle
E0321 05:37:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:37:13.409814  543705 memory.go:191] Add success.
I0321 05:37:13.409816  543705 cpu.go:282] Add success.
W0321 05:37:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:37:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:37:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:37:13.420127  543705 net.go:648] Add success.
I0321 05:37:13.423184  543705 net.go:770] primary dev: ETH0
I0321 05:37:13.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:37:13.423212  543705 net.go:698] Add success.
I0321 05:37:13.453760  543705 event_worker.go:152] Polling the log file for events...
W0321 05:37:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:37:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 05:37:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:37:14.456764  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:37:14.456773  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:37:14.456778  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:37:14.456822  543705 disk_worker.go:494] system disk:vda1
I0321 05:37:14.456861  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:37:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:37:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:37:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:37:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:37:16.457957  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:37:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:37:16.472314  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:37:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:37:23.409792  543705 memory.go:184] no items to output this cycle
I0321 05:37:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 05:37:27.989684  543705 disk_info.go:125] begin check local disk info of client
I0321 05:37:27.992266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:37:27.992273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa500 0xc0001aa580]
E0321 05:37:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:37:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 05:37:33.409790  543705 memory.go:184] no items to output this cycle
E0321 05:37:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:37:43.409797  543705 memory.go:191] Add success.
I0321 05:37:43.409807  543705 cpu.go:282] Add success.
I0321 05:37:43.419899  543705 net.go:648] Add success.
I0321 05:37:43.422756  543705 net.go:770] primary dev: ETH0
I0321 05:37:43.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:37:43.422785  543705 net.go:698] Add success.
I0321 05:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:37:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:37:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:37:53.409775  543705 memory.go:184] no items to output this cycle
I0321 05:37:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 05:38:03.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:38:03.409877  543705 memory.go:184] no items to output this cycle
I0321 05:38:03.409946  543705 cpu.go:275] no items to output this cycle
E0321 05:38:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:38:13.409806  543705 memory.go:191] Add success.
I0321 05:38:13.409817  543705 cpu.go:282] Add success.
W0321 05:38:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:38:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:38:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:38:13.420055  543705 net.go:648] Add success.
I0321 05:38:13.423098  543705 net.go:770] primary dev: ETH0
I0321 05:38:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:38:13.423127  543705 net.go:698] Add success.
I0321 05:38:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:38:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:38:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 05:38:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:38:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 05:38:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:38:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:38:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:38:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:38:23.409780  543705 memory.go:184] no items to output this cycle
I0321 05:38:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 05:38:27.993683  543705 disk_info.go:125] begin check local disk info of client
I0321 05:38:27.996497  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:38:27.996505  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368a00 0xc000368a40]
E0321 05:38:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:38:33.409794  543705 memory.go:184] no items to output this cycle
I0321 05:38:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 05:38:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:38:43.409783  543705 memory.go:191] Add success.
I0321 05:38:43.409808  543705 cpu.go:282] Add success.
I0321 05:38:43.419871  543705 net.go:648] Add success.
I0321 05:38:43.422618  543705 net.go:770] primary dev: ETH0
I0321 05:38:43.422633  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:38:43.422652  543705 net.go:698] Add success.
I0321 05:38:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:38:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:38:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:38:53.409766  543705 memory.go:184] no items to output this cycle
I0321 05:38:53.409798  543705 cpu.go:275] no items to output this cycle
I0321 05:39:03.409993  543705 cpu.go:275] no items to output this cycle
E0321 05:39:03.410066  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:39:03.410079  543705 memory.go:184] no items to output this cycle
E0321 05:39:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:39:13.409788  543705 memory.go:191] Add success.
I0321 05:39:13.409797  543705 cpu.go:282] Add success.
W0321 05:39:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:39:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:39:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:39:13.420349  543705 net.go:648] Add success.
I0321 05:39:13.422989  543705 net.go:770] primary dev: ETH0
I0321 05:39:13.423001  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:39:13.423013  543705 net.go:698] Add success.
I0321 05:39:13.470726  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4dc80e62-1759-429c-8672-bdddaa60c2b7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:39:13.470759  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:39:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:39:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:39:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0321 05:39:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:39:14.456469  543705 disk_worker.go:494] system disk:vda1
I0321 05:39:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:39:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:39:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:39:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:39:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:39:23.409798  543705 memory.go:184] no items to output this cycle
I0321 05:39:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 05:39:27.997682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:39:28.000322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:39:28.000330  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
E0321 05:39:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:39:33.409778  543705 memory.go:184] no items to output this cycle
I0321 05:39:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 05:39:38.748444  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:39:38.748452  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:39:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:39:43.410572  543705 memory.go:191] Add success.
I0321 05:39:43.409801  543705 cpu.go:282] Add success.
I0321 05:39:43.420342  543705 net.go:648] Add success.
I0321 05:39:43.422858  543705 net.go:770] primary dev: ETH0
I0321 05:39:43.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:39:43.422882  543705 net.go:698] Add success.
I0321 05:39:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:39:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:39:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:39:53.410361  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:39:53.410380  543705 memory.go:184] no items to output this cycle
I0321 05:39:53.410402  543705 cpu.go:275] no items to output this cycle
E0321 05:40:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:40:03.409803  543705 memory.go:184] no items to output this cycle
I0321 05:40:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:40:13.409809  543705 memory.go:191] Add success.
I0321 05:40:13.409818  543705 cpu.go:282] Add success.
W0321 05:40:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:40:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:40:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:40:13.420213  543705 net.go:648] Add success.
I0321 05:40:13.422842  543705 net.go:770] primary dev: ETH0
I0321 05:40:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:40:13.422866  543705 net.go:698] Add success.
I0321 05:40:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:40:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:40:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 05:40:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:40:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 05:40:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:40:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:40:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:40:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:40:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:40:23.409775  543705 memory.go:184] no items to output this cycle
I0321 05:40:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 05:40:28.001687  543705 disk_info.go:125] begin check local disk info of client
I0321 05:40:28.004211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:40:28.004219  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353080 0xc0003530c0]
E0321 05:40:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:40:33.409789  543705 memory.go:184] no items to output this cycle
I0321 05:40:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 05:40:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:40:43.409815  543705 memory.go:191] Add success.
I0321 05:40:43.409822  543705 cpu.go:282] Add success.
I0321 05:40:43.419889  543705 net.go:648] Add success.
I0321 05:40:43.423156  543705 net.go:770] primary dev: ETH0
I0321 05:40:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:40:43.423182  543705 net.go:698] Add success.
I0321 05:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:40:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:40:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:40:53.409879  543705 memory.go:184] no items to output this cycle
I0321 05:40:53.409978  543705 cpu.go:275] no items to output this cycle
E0321 05:41:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:41:03.409803  543705 memory.go:184] no items to output this cycle
I0321 05:41:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 05:41:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:41:13.409813  543705 memory.go:191] Add success.
I0321 05:41:13.409824  543705 cpu.go:282] Add success.
W0321 05:41:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:41:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:41:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:41:13.420131  543705 net.go:648] Add success.
I0321 05:41:13.422668  543705 net.go:770] primary dev: ETH0
I0321 05:41:13.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:41:13.422694  543705 net.go:698] Add success.
I0321 05:41:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:41:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:41:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 05:41:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:41:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 05:41:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:41:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:41:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:41:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:41:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:41:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:41:23.409772  543705 memory.go:184] no items to output this cycle
I0321 05:41:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 05:41:28.005682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:41:28.008300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:41:28.008307  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f780 0xc00032f7c0]
E0321 05:41:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:41:33.409773  543705 memory.go:184] no items to output this cycle
I0321 05:41:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 05:41:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:41:43.409816  543705 memory.go:191] Add success.
I0321 05:41:43.409826  543705 cpu.go:282] Add success.
I0321 05:41:43.419984  543705 net.go:648] Add success.
I0321 05:41:43.422833  543705 net.go:770] primary dev: ETH0
I0321 05:41:43.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:41:43.422860  543705 net.go:698] Add success.
I0321 05:41:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:41:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:41:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:41:53.409868  543705 cpu.go:275] no items to output this cycle
E0321 05:41:53.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:41:53.409893  543705 memory.go:184] no items to output this cycle
E0321 05:42:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:42:03.409760  543705 memory.go:184] no items to output this cycle
I0321 05:42:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 05:42:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:42:13.409810  543705 memory.go:191] Add success.
I0321 05:42:13.409819  543705 cpu.go:282] Add success.
W0321 05:42:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:42:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:42:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:42:13.420135  543705 net.go:648] Add success.
I0321 05:42:13.422784  543705 net.go:770] primary dev: ETH0
I0321 05:42:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:42:13.422809  543705 net.go:698] Add success.
I0321 05:42:13.684466  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b0f28d4b-8ba2-4aa8-9460-85f9c0c609f7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:42:13.684500  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 05:42:14.454289  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:42:14.454301  543705 disk_worker.go:708] disk space is not compliant
W0321 05:42:14.454305  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:42:14.454973  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:42:14.454982  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:42:14.454987  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:42:14.455938  543705 disk_worker.go:494] system disk:vda1
I0321 05:42:14.455981  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:42:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:42:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:42:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:42:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:42:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:42:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:42:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:42:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:42:23.409764  543705 memory.go:184] no items to output this cycle
I0321 05:42:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 05:42:28.010112  543705 disk_info.go:125] begin check local disk info of client
I0321 05:42:28.012594  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:42:28.012602  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331d80 0xc000331dc0]
E0321 05:42:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:42:33.409820  543705 memory.go:184] no items to output this cycle
I0321 05:42:33.409830  543705 cpu.go:275] no items to output this cycle
I0321 05:42:38.749446  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:42:38.749452  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:42:43.410604  543705 memory.go:191] Add success.
I0321 05:42:43.409788  543705 cpu.go:282] Add success.
I0321 05:42:43.420553  543705 net.go:648] Add success.
I0321 05:42:43.422891  543705 net.go:770] primary dev: ETH0
I0321 05:42:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:42:43.422916  543705 net.go:698] Add success.
I0321 05:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:42:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:42:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:42:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:42:53.409776  543705 memory.go:184] no items to output this cycle
I0321 05:42:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 05:43:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:43:03.409779  543705 memory.go:184] no items to output this cycle
I0321 05:43:03.409779  543705 cpu.go:275] no items to output this cycle
E0321 05:43:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:43:13.409787  543705 memory.go:191] Add success.
I0321 05:43:13.409786  543705 cpu.go:282] Add success.
W0321 05:43:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:43:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:43:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:43:13.420226  543705 net.go:648] Add success.
I0321 05:43:13.422818  543705 net.go:770] primary dev: ETH0
I0321 05:43:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:43:13.422848  543705 net.go:698] Add success.
I0321 05:43:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:43:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:43:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 05:43:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:43:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 05:43:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:43:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:43:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:43:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:43:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:43:23.409772  543705 memory.go:184] no items to output this cycle
I0321 05:43:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 05:43:28.013699  543705 disk_info.go:125] begin check local disk info of client
I0321 05:43:28.016199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:43:28.016206  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330d00 0xc000330d40]
E0321 05:43:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:43:33.409780  543705 memory.go:184] no items to output this cycle
I0321 05:43:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 05:43:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:43:43.409787  543705 memory.go:191] Add success.
I0321 05:43:43.409790  543705 cpu.go:282] Add success.
I0321 05:43:43.420189  543705 net.go:648] Add success.
I0321 05:43:43.422717  543705 net.go:770] primary dev: ETH0
I0321 05:43:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:43:43.422742  543705 net.go:698] Add success.
I0321 05:43:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:43:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:43:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:43:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:43:53.409800  543705 memory.go:184] no items to output this cycle
I0321 05:43:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 05:44:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:44:03.409768  543705 memory.go:184] no items to output this cycle
I0321 05:44:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 05:44:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:44:13.409815  543705 memory.go:191] Add success.
I0321 05:44:13.409822  543705 cpu.go:282] Add success.
W0321 05:44:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:44:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:44:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:44:13.420168  543705 net.go:648] Add success.
I0321 05:44:13.423180  543705 net.go:770] primary dev: ETH0
I0321 05:44:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:44:13.423206  543705 net.go:698] Add success.
I0321 05:44:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:44:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:44:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0321 05:44:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:44:14.456596  543705 disk_worker.go:494] system disk:vda1
I0321 05:44:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:44:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:44:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:44:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:44:23.409767  543705 memory.go:184] no items to output this cycle
I0321 05:44:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 05:44:28.017697  543705 disk_info.go:125] begin check local disk info of client
I0321 05:44:28.020130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:44:28.020139  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5bc0 0xc0000c5c00]
E0321 05:44:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:44:33.409793  543705 memory.go:184] no items to output this cycle
I0321 05:44:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 05:44:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:44:43.409812  543705 memory.go:191] Add success.
I0321 05:44:43.409818  543705 cpu.go:282] Add success.
I0321 05:44:43.420079  543705 net.go:648] Add success.
I0321 05:44:43.423030  543705 net.go:770] primary dev: ETH0
I0321 05:44:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:44:43.423059  543705 net.go:698] Add success.
I0321 05:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:44:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:44:53.409773  543705 memory.go:184] no items to output this cycle
I0321 05:44:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 05:45:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:45:03.409779  543705 memory.go:184] no items to output this cycle
I0321 05:45:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 05:45:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:45:13.409794  543705 cpu.go:282] Add success.
I0321 05:45:13.409804  543705 memory.go:191] Add success.
W0321 05:45:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:45:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:45:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:45:13.420063  543705 net.go:648] Add success.
I0321 05:45:13.422799  543705 net.go:770] primary dev: ETH0
I0321 05:45:13.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:45:13.422824  543705 net.go:698] Add success.
I0321 05:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:45:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:45:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 05:45:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:45:14.456537  543705 disk_worker.go:494] system disk:vda1
I0321 05:45:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:45:14.925802  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"871e6c80-a6ae-4da7-9548-448604a214fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:45:14.925841  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:45:15.455682  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:45:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:45:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:45:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:45:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:45:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:45:23.409801  543705 memory.go:184] no items to output this cycle
I0321 05:45:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 05:45:28.021713  543705 disk_info.go:125] begin check local disk info of client
I0321 05:45:28.024370  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:45:28.024378  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304b40 0xc000304b80]
E0321 05:45:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:45:33.409799  543705 memory.go:184] no items to output this cycle
I0321 05:45:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 05:45:38.749740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:45:38.749747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:45:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:45:43.410655  543705 memory.go:191] Add success.
I0321 05:45:43.409807  543705 cpu.go:282] Add success.
I0321 05:45:43.420638  543705 net.go:648] Add success.
I0321 05:45:43.423386  543705 net.go:770] primary dev: ETH0
I0321 05:45:43.423399  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:45:43.423411  543705 net.go:698] Add success.
I0321 05:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:45:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:45:53.409795  543705 cpu.go:275] no items to output this cycle
I0321 05:45:53.409800  543705 memory.go:184] no items to output this cycle
E0321 05:46:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:46:03.409819  543705 memory.go:184] no items to output this cycle
I0321 05:46:03.409834  543705 cpu.go:275] no items to output this cycle
E0321 05:46:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:46:13.409786  543705 memory.go:191] Add success.
W0321 05:46:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 05:46:13.409818  543705 cpu.go:282] Add success.
W0321 05:46:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:46:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:46:13.420155  543705 net.go:648] Add success.
I0321 05:46:13.422590  543705 net.go:770] primary dev: ETH0
I0321 05:46:13.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:46:13.422615  543705 net.go:698] Add success.
I0321 05:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:46:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:46:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 05:46:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:46:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 05:46:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:46:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:46:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:46:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:46:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:46:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:46:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:46:23.409786  543705 memory.go:184] no items to output this cycle
I0321 05:46:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 05:46:28.025683  543705 disk_info.go:125] begin check local disk info of client
I0321 05:46:28.028147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:46:28.028155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003688c0 0xc000368900]
E0321 05:46:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:46:33.409791  543705 memory.go:184] no items to output this cycle
I0321 05:46:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 05:46:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:46:43.409814  543705 memory.go:191] Add success.
I0321 05:46:43.409826  543705 cpu.go:282] Add success.
I0321 05:46:43.419856  543705 net.go:648] Add success.
I0321 05:46:43.422224  543705 net.go:770] primary dev: ETH0
I0321 05:46:43.422237  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:46:43.422251  543705 net.go:698] Add success.
I0321 05:46:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:46:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:46:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:46:53.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:46:53.409922  543705 memory.go:184] no items to output this cycle
I0321 05:46:53.409931  543705 cpu.go:275] no items to output this cycle
E0321 05:47:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:47:03.409790  543705 memory.go:184] no items to output this cycle
I0321 05:47:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 05:47:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:47:13.409796  543705 cpu.go:282] Add success.
I0321 05:47:13.409809  543705 memory.go:191] Add success.
W0321 05:47:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:47:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:47:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:47:13.420154  543705 net.go:648] Add success.
I0321 05:47:13.423130  543705 net.go:770] primary dev: ETH0
I0321 05:47:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:47:13.423159  543705 net.go:698] Add success.
I0321 05:47:13.453662  543705 event_worker.go:152] Polling the log file for events...
W0321 05:47:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:47:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 05:47:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:47:14.455906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:47:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:47:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:47:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 05:47:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:47:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:47:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:47:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:47:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:47:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:47:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:47:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:47:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:47:23.409801  543705 memory.go:184] no items to output this cycle
I0321 05:47:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 05:47:28.029682  543705 disk_info.go:125] begin check local disk info of client
I0321 05:47:28.032102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:47:28.032110  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab040]
E0321 05:47:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:47:33.409771  543705 memory.go:184] no items to output this cycle
I0321 05:47:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 05:47:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:47:43.409793  543705 memory.go:191] Add success.
I0321 05:47:43.409822  543705 cpu.go:282] Add success.
I0321 05:47:43.419857  543705 net.go:648] Add success.
I0321 05:47:43.422588  543705 net.go:770] primary dev: ETH0
I0321 05:47:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:47:43.422614  543705 net.go:698] Add success.
I0321 05:47:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:47:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:47:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:47:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:47:53.409771  543705 memory.go:184] no items to output this cycle
I0321 05:47:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 05:48:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:48:03.409804  543705 memory.go:184] no items to output this cycle
I0321 05:48:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:48:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:48:13.409817  543705 memory.go:191] Add success.
I0321 05:48:13.409828  543705 cpu.go:282] Add success.
W0321 05:48:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:48:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:48:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:48:13.420457  543705 net.go:648] Add success.
I0321 05:48:13.423359  543705 net.go:770] primary dev: ETH0
I0321 05:48:13.423372  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:48:13.423384  543705 net.go:698] Add success.
I0321 05:48:13.657898  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"092eaa68-6beb-4034-a41f-084c25a5d749","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:48:13.657930  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:48:14.454413  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:48:14.454567  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:48:14.454632  543705 disk_worker.go:708] disk space is not compliant
W0321 05:48:14.454635  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:48:14.455962  543705 disk_worker.go:494] system disk:vda1
I0321 05:48:14.456006  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:48:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:48:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:48:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:48:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:48:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:48:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:48:23.409781  543705 memory.go:184] no items to output this cycle
I0321 05:48:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 05:48:28.033683  543705 disk_info.go:125] begin check local disk info of client
I0321 05:48:28.036049  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:48:28.036059  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aadc0 0xc0001aae00]
E0321 05:48:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:48:33.409786  543705 memory.go:184] no items to output this cycle
I0321 05:48:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 05:48:38.749888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:48:38.749895  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:48:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:48:43.410590  543705 memory.go:191] Add success.
I0321 05:48:43.409801  543705 cpu.go:282] Add success.
I0321 05:48:43.420286  543705 net.go:648] Add success.
I0321 05:48:43.422784  543705 net.go:770] primary dev: ETH0
I0321 05:48:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:48:43.422817  543705 net.go:698] Add success.
I0321 05:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:48:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:48:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:48:53.409765  543705 memory.go:184] no items to output this cycle
I0321 05:48:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 05:49:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:49:03.409791  543705 memory.go:184] no items to output this cycle
I0321 05:49:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 05:49:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:49:13.409818  543705 memory.go:191] Add success.
I0321 05:49:13.409819  543705 cpu.go:282] Add success.
W0321 05:49:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:49:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:49:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:49:13.420196  543705 net.go:648] Add success.
I0321 05:49:13.423090  543705 net.go:770] primary dev: ETH0
I0321 05:49:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:49:13.423114  543705 net.go:698] Add success.
I0321 05:49:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:49:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:49:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 05:49:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:49:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 05:49:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:49:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:49:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:49:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:49:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:49:23.409777  543705 memory.go:184] no items to output this cycle
I0321 05:49:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 05:49:28.037678  543705 disk_info.go:125] begin check local disk info of client
I0321 05:49:28.040164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:49:28.040170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486380 0xc0004863c0]
E0321 05:49:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:49:33.409790  543705 memory.go:184] no items to output this cycle
I0321 05:49:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 05:49:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:49:43.409792  543705 memory.go:191] Add success.
I0321 05:49:43.409819  543705 cpu.go:282] Add success.
I0321 05:49:43.419891  543705 net.go:648] Add success.
I0321 05:49:43.422548  543705 net.go:770] primary dev: ETH0
I0321 05:49:43.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:49:43.422576  543705 net.go:698] Add success.
I0321 05:49:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:49:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:49:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:49:53.410511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:49:53.410527  543705 memory.go:184] no items to output this cycle
I0321 05:49:53.410557  543705 cpu.go:275] no items to output this cycle
E0321 05:50:03.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:50:03.409893  543705 memory.go:184] no items to output this cycle
I0321 05:50:03.409968  543705 cpu.go:275] no items to output this cycle
E0321 05:50:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:50:13.409818  543705 memory.go:191] Add success.
I0321 05:50:13.409829  543705 cpu.go:282] Add success.
W0321 05:50:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:50:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:50:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:50:13.420143  543705 net.go:648] Add success.
I0321 05:50:13.422473  543705 net.go:770] primary dev: ETH0
I0321 05:50:13.422487  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:50:13.422498  543705 net.go:698] Add success.
I0321 05:50:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:50:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:50:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 05:50:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:50:14.456487  543705 disk_worker.go:494] system disk:vda1
I0321 05:50:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:50:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:50:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:50:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:50:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:50:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:50:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:50:23.409799  543705 memory.go:184] no items to output this cycle
I0321 05:50:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 05:50:28.041676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:50:28.044261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:50:28.044267  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331480 0xc0003314c0]
E0321 05:50:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:50:33.409821  543705 memory.go:184] no items to output this cycle
I0321 05:50:33.409830  543705 cpu.go:275] no items to output this cycle
E0321 05:50:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:50:43.409799  543705 memory.go:191] Add success.
I0321 05:50:43.409809  543705 cpu.go:282] Add success.
I0321 05:50:43.419971  543705 net.go:648] Add success.
I0321 05:50:43.422864  543705 net.go:770] primary dev: ETH0
I0321 05:50:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:50:43.422891  543705 net.go:698] Add success.
I0321 05:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:50:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:50:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:50:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:50:53.409804  543705 memory.go:184] no items to output this cycle
I0321 05:50:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:51:03.409775  543705 memory.go:184] no items to output this cycle
I0321 05:51:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 05:51:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:51:13.409827  543705 memory.go:191] Add success.
I0321 05:51:13.409834  543705 cpu.go:282] Add success.
W0321 05:51:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:51:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:51:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:51:13.420110  543705 net.go:648] Add success.
I0321 05:51:13.422984  543705 net.go:770] primary dev: ETH0
I0321 05:51:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:51:13.423007  543705 net.go:698] Add success.
I0321 05:51:13.470251  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b3b834e3-0df3-45ac-b9c4-71bab3b97e43","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:51:13.470284  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:51:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:51:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:51:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 05:51:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:51:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 05:51:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:51:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:51:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:51:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:51:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:51:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:51:23.409792  543705 memory.go:184] no items to output this cycle
I0321 05:51:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 05:51:28.045675  543705 disk_info.go:125] begin check local disk info of client
I0321 05:51:28.048228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:51:28.048234  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499a40 0xc000499a80]
E0321 05:51:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:51:33.409788  543705 memory.go:184] no items to output this cycle
I0321 05:51:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 05:51:38.751454  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:51:38.751462  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:51:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:51:43.410597  543705 memory.go:191] Add success.
I0321 05:51:43.409813  543705 cpu.go:282] Add success.
I0321 05:51:43.420277  543705 net.go:648] Add success.
I0321 05:51:43.422702  543705 net.go:770] primary dev: ETH0
I0321 05:51:43.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:51:43.422728  543705 net.go:698] Add success.
I0321 05:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:51:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:51:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:51:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:51:53.409796  543705 memory.go:184] no items to output this cycle
I0321 05:51:53.409800  543705 cpu.go:275] no items to output this cycle
I0321 05:52:03.409898  543705 cpu.go:275] no items to output this cycle
E0321 05:52:03.410058  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:52:03.410069  543705 memory.go:184] no items to output this cycle
E0321 05:52:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:52:13.409826  543705 memory.go:191] Add success.
I0321 05:52:13.409834  543705 cpu.go:282] Add success.
W0321 05:52:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:52:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:52:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:52:13.420149  543705 net.go:648] Add success.
I0321 05:52:13.422835  543705 net.go:770] primary dev: ETH0
I0321 05:52:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:52:13.422859  543705 net.go:698] Add success.
W0321 05:52:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:52:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 05:52:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:52:14.455888  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:52:14.455897  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:52:14.455902  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:52:14.456541  543705 disk_worker.go:494] system disk:vda1
I0321 05:52:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 05:52:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:52:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:52:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:52:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:52:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:52:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:52:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:52:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:52:23.409801  543705 memory.go:184] no items to output this cycle
I0321 05:52:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 05:52:28.049676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:52:28.052241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:52:28.052246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4bc0 0xc0000c4c00]
E0321 05:52:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:52:33.409788  543705 memory.go:184] no items to output this cycle
I0321 05:52:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 05:52:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:52:43.409797  543705 memory.go:191] Add success.
I0321 05:52:43.409797  543705 cpu.go:282] Add success.
I0321 05:52:43.419829  543705 net.go:648] Add success.
I0321 05:52:43.422453  543705 net.go:770] primary dev: ETH0
I0321 05:52:43.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:52:43.422481  543705 net.go:698] Add success.
I0321 05:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:52:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:52:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:52:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:52:53.409767  543705 memory.go:184] no items to output this cycle
I0321 05:52:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 05:53:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:53:03.409782  543705 memory.go:184] no items to output this cycle
I0321 05:53:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 05:53:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:53:13.409807  543705 memory.go:191] Add success.
I0321 05:53:13.409816  543705 cpu.go:282] Add success.
W0321 05:53:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:53:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:53:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:53:13.420124  543705 net.go:648] Add success.
I0321 05:53:13.423169  543705 net.go:770] primary dev: ETH0
I0321 05:53:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:53:13.423194  543705 net.go:698] Add success.
I0321 05:53:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:53:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:53:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 05:53:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:53:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 05:53:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:53:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:53:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:53:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:53:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:53:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:53:23.409798  543705 memory.go:184] no items to output this cycle
I0321 05:53:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 05:53:28.053676  543705 disk_info.go:125] begin check local disk info of client
I0321 05:53:28.056191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:53:28.056197  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac40 0xc00007ac80]
E0321 05:53:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:53:33.409805  543705 memory.go:184] no items to output this cycle
I0321 05:53:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:53:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:53:43.409790  543705 memory.go:191] Add success.
I0321 05:53:43.409807  543705 cpu.go:282] Add success.
I0321 05:53:43.419864  543705 net.go:648] Add success.
I0321 05:53:43.422596  543705 net.go:770] primary dev: ETH0
I0321 05:53:43.422612  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:53:43.422626  543705 net.go:698] Add success.
I0321 05:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:53:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:53:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:53:53.409788  543705 memory.go:184] no items to output this cycle
I0321 05:53:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 05:54:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:54:03.409790  543705 memory.go:184] no items to output this cycle
I0321 05:54:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 05:54:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:54:13.409786  543705 memory.go:191] Add success.
I0321 05:54:13.409791  543705 cpu.go:282] Add success.
W0321 05:54:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:54:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:54:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:54:13.420142  543705 net.go:648] Add success.
I0321 05:54:13.423071  543705 net.go:770] primary dev: ETH0
I0321 05:54:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:54:13.423105  543705 net.go:698] Add success.
I0321 05:54:13.468936  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"23855dcc-10ff-4c93-a416-cbb788e169d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:54:13.468968  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 05:54:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:54:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:54:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 05:54:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:54:14.456746  543705 disk_worker.go:494] system disk:vda1
I0321 05:54:14.456777  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:54:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:54:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:54:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:54:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:54:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:54:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:54:23.409784  543705 memory.go:184] no items to output this cycle
I0321 05:54:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 05:54:28.057671  543705 disk_info.go:125] begin check local disk info of client
I0321 05:54:28.060183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:54:28.060189  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4dc0 0xc0000c4e00]
E0321 05:54:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:54:33.409781  543705 memory.go:184] no items to output this cycle
I0321 05:54:33.409889  543705 cpu.go:275] no items to output this cycle
I0321 05:54:38.751605  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:54:38.751613  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:54:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:54:43.410570  543705 memory.go:191] Add success.
I0321 05:54:43.409832  543705 cpu.go:282] Add success.
I0321 05:54:43.420265  543705 net.go:648] Add success.
I0321 05:54:43.422962  543705 net.go:770] primary dev: ETH0
I0321 05:54:43.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:54:43.422998  543705 net.go:698] Add success.
I0321 05:54:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:54:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:54:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:54:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:54:53.409802  543705 memory.go:184] no items to output this cycle
I0321 05:54:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 05:55:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:55:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 05:55:03.409787  543705 memory.go:184] no items to output this cycle
E0321 05:55:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:55:13.409772  543705 memory.go:191] Add success.
W0321 05:55:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 05:55:13.409807  543705 cpu.go:282] Add success.
W0321 05:55:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:55:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:55:13.420102  543705 net.go:648] Add success.
I0321 05:55:13.422693  543705 net.go:770] primary dev: ETH0
I0321 05:55:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:55:13.422718  543705 net.go:698] Add success.
I0321 05:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:55:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:55:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 05:55:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:55:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 05:55:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:55:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:55:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:55:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:55:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:55:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:55:23.409809  543705 memory.go:184] no items to output this cycle
I0321 05:55:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 05:55:28.061672  543705 disk_info.go:125] begin check local disk info of client
I0321 05:55:28.064283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:55:28.064288  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463580 0xc0004635c0]
E0321 05:55:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:55:33.409783  543705 memory.go:184] no items to output this cycle
I0321 05:55:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 05:55:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:55:43.409820  543705 memory.go:191] Add success.
I0321 05:55:43.409824  543705 cpu.go:282] Add success.
I0321 05:55:43.419881  543705 net.go:648] Add success.
I0321 05:55:43.422660  543705 net.go:770] primary dev: ETH0
I0321 05:55:43.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:55:43.422685  543705 net.go:698] Add success.
I0321 05:55:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:55:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:55:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:55:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:55:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 05:55:53.409777  543705 memory.go:184] no items to output this cycle
E0321 05:56:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:56:03.409771  543705 memory.go:184] no items to output this cycle
I0321 05:56:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 05:56:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:56:13.409811  543705 memory.go:191] Add success.
I0321 05:56:13.409818  543705 cpu.go:282] Add success.
W0321 05:56:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:56:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:56:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:56:13.420062  543705 net.go:648] Add success.
I0321 05:56:13.422921  543705 net.go:770] primary dev: ETH0
I0321 05:56:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:56:13.422949  543705 net.go:698] Add success.
I0321 05:56:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:56:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:56:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 05:56:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:56:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 05:56:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:56:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:56:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:56:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:56:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:56:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:56:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:56:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 05:56:23.409789  543705 memory.go:184] no items to output this cycle
I0321 05:56:28.065675  543705 disk_info.go:125] begin check local disk info of client
I0321 05:56:28.068171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:56:28.068178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472500 0xc000472540]
E0321 05:56:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:56:33.409804  543705 memory.go:184] no items to output this cycle
I0321 05:56:33.409843  543705 cpu.go:275] no items to output this cycle
E0321 05:56:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:56:43.409828  543705 memory.go:191] Add success.
I0321 05:56:43.409828  543705 cpu.go:282] Add success.
I0321 05:56:43.420212  543705 net.go:648] Add success.
I0321 05:56:43.423422  543705 net.go:770] primary dev: ETH0
I0321 05:56:43.423434  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:56:43.423445  543705 net.go:698] Add success.
I0321 05:56:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:56:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:56:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:56:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:56:53.409795  543705 memory.go:184] no items to output this cycle
I0321 05:56:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 05:57:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:57:03.409781  543705 memory.go:184] no items to output this cycle
I0321 05:57:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 05:57:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:57:13.409817  543705 memory.go:191] Add success.
I0321 05:57:13.409820  543705 cpu.go:282] Add success.
W0321 05:57:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:57:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:57:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:57:13.420054  543705 net.go:648] Add success.
I0321 05:57:13.422737  543705 net.go:770] primary dev: ETH0
I0321 05:57:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:57:13.422763  543705 net.go:698] Add success.
I0321 05:57:13.429175  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 05:57:13.453414  543705 event_worker.go:152] Polling the log file for events...
W0321 05:57:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:57:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0321 05:57:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0321 05:57:14.456871  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 05:57:14.456880  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 05:57:14.456885  543705 custom_config.go:64] query custom config with name: gpu
I0321 05:57:14.456955  543705 disk_worker.go:494] system disk:vda1
I0321 05:57:14.456997  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:57:14.864964  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5b536506-5872-4f8f-afc1-8dbd0a13871c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 05:57:14.864999  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0321 05:57:15.455828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 05:57:15.455836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:57:16.458076  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 05:57:16.458082  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 05:57:16.458131  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:57:16.458152  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:57:16.472516  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:57:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:57:23.409815  543705 memory.go:184] no items to output this cycle
I0321 05:57:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 05:57:28.069684  543705 disk_info.go:125] begin check local disk info of client
I0321 05:57:28.072199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:57:28.072205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000496980 0xc0004969c0]
E0321 05:57:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:57:33.409814  543705 memory.go:184] no items to output this cycle
I0321 05:57:33.409826  543705 cpu.go:275] no items to output this cycle
I0321 05:57:38.752455  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 05:57:38.752461  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 05:57:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:57:43.410701  543705 memory.go:191] Add success.
I0321 05:57:43.409990  543705 cpu.go:282] Add success.
I0321 05:57:43.419728  543705 net.go:648] Add success.
I0321 05:57:43.422218  543705 net.go:770] primary dev: ETH0
I0321 05:57:43.422230  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:57:43.422244  543705 net.go:698] Add success.
I0321 05:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:57:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:57:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:57:53.409776  543705 memory.go:184] no items to output this cycle
I0321 05:57:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 05:58:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:58:03.409790  543705 cpu.go:275] no items to output this cycle
I0321 05:58:03.409802  543705 memory.go:184] no items to output this cycle
E0321 05:58:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:58:13.409803  543705 memory.go:191] Add success.
I0321 05:58:13.409806  543705 cpu.go:282] Add success.
W0321 05:58:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:58:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:58:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:58:13.420113  543705 net.go:648] Add success.
I0321 05:58:13.422933  543705 net.go:770] primary dev: ETH0
I0321 05:58:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:58:13.422963  543705 net.go:698] Add success.
I0321 05:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:58:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:58:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 05:58:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:58:14.456486  543705 disk_worker.go:494] system disk:vda1
I0321 05:58:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:58:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:58:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:58:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:58:23.409786  543705 memory.go:184] no items to output this cycle
I0321 05:58:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 05:58:28.073675  543705 disk_info.go:125] begin check local disk info of client
I0321 05:58:28.076184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:58:28.076191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359800 0xc000359840]
E0321 05:58:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:58:33.409986  543705 memory.go:184] no items to output this cycle
I0321 05:58:33.410001  543705 cpu.go:275] no items to output this cycle
E0321 05:58:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:58:43.409803  543705 memory.go:191] Add success.
I0321 05:58:43.409824  543705 cpu.go:282] Add success.
I0321 05:58:43.420047  543705 net.go:648] Add success.
I0321 05:58:43.422702  543705 net.go:770] primary dev: ETH0
I0321 05:58:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:58:43.422730  543705 net.go:698] Add success.
I0321 05:58:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:58:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:58:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:58:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:58:53.409811  543705 memory.go:184] no items to output this cycle
I0321 05:58:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 05:59:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:59:03.409794  543705 memory.go:184] no items to output this cycle
I0321 05:59:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 05:59:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:59:13.409828  543705 memory.go:191] Add success.
I0321 05:59:13.409833  543705 cpu.go:282] Add success.
W0321 05:59:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 05:59:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 05:59:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 05:59:13.420049  543705 net.go:648] Add success.
I0321 05:59:13.422636  543705 net.go:770] primary dev: ETH0
I0321 05:59:13.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:59:13.422664  543705 net.go:698] Add success.
I0321 05:59:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0321 05:59:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 05:59:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 05:59:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 05:59:14.456607  543705 disk_worker.go:494] system disk:vda1
I0321 05:59:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 05:59:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 05:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:59:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:59:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 05:59:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 05:59:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:59:23.409776  543705 memory.go:184] no items to output this cycle
I0321 05:59:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 05:59:28.077671  543705 disk_info.go:125] begin check local disk info of client
I0321 05:59:28.080137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 05:59:28.080143  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382c40 0xc000382c80]
E0321 05:59:33.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:59:33.409924  543705 memory.go:184] no items to output this cycle
I0321 05:59:33.410041  543705 cpu.go:275] no items to output this cycle
E0321 05:59:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:59:43.409783  543705 memory.go:191] Add success.
I0321 05:59:43.409810  543705 cpu.go:282] Add success.
I0321 05:59:43.420131  543705 net.go:648] Add success.
I0321 05:59:43.423568  543705 net.go:770] primary dev: ETH0
I0321 05:59:43.423581  543705 net.go:802] Send network stats successfully!,count is 6
I0321 05:59:43.423595  543705 net.go:698] Add success.
I0321 05:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 05:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 05:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 05:59:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 05:59:53.409793  543705 memory.go:184] no items to output this cycle
I0321 05:59:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 06:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:00:03.409776  543705 cpu.go:275] no items to output this cycle
I0321 06:00:03.409782  543705 memory.go:184] no items to output this cycle
E0321 06:00:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:00:13.409815  543705 memory.go:191] Add success.
I0321 06:00:13.409822  543705 cpu.go:282] Add success.
W0321 06:00:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:00:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:00:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:00:13.420141  543705 net.go:648] Add success.
I0321 06:00:13.422878  543705 net.go:770] primary dev: ETH0
I0321 06:00:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:00:13.422909  543705 net.go:698] Add success.
I0321 06:00:13.464273  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"59d2a200-5f1e-47c2-be57-fc82ec5bfc44","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:00:13.464311  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:00:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:00:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:00:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 06:00:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:00:14.456523  543705 disk_worker.go:494] system disk:vda1
I0321 06:00:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:00:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:00:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:00:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:00:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:00:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:00:23.409795  543705 memory.go:184] no items to output this cycle
I0321 06:00:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 06:00:28.081674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:00:28.084156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:00:28.084163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340400 0xc000340440]
E0321 06:00:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:00:33.409781  543705 memory.go:184] no items to output this cycle
I0321 06:00:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 06:00:38.753473  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:00:38.753481  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:00:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:00:43.410738  543705 memory.go:191] Add success.
I0321 06:00:43.409828  543705 cpu.go:282] Add success.
I0321 06:00:43.420431  543705 net.go:648] Add success.
I0321 06:00:43.423003  543705 net.go:770] primary dev: ETH0
I0321 06:00:43.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:00:43.423029  543705 net.go:698] Add success.
I0321 06:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:00:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:00:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:00:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:00:53.409799  543705 memory.go:184] no items to output this cycle
I0321 06:00:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 06:01:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:01:03.409769  543705 memory.go:184] no items to output this cycle
I0321 06:01:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 06:01:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:01:13.409814  543705 memory.go:191] Add success.
I0321 06:01:13.409826  543705 cpu.go:282] Add success.
W0321 06:01:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:01:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:01:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:01:13.420146  543705 net.go:648] Add success.
I0321 06:01:13.423061  543705 net.go:770] primary dev: ETH0
I0321 06:01:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:01:13.423091  543705 net.go:698] Add success.
I0321 06:01:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:01:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:01:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 06:01:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:01:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 06:01:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:01:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:01:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:01:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:01:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:01:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:01:23.409775  543705 memory.go:184] no items to output this cycle
I0321 06:01:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 06:01:28.085675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:01:28.088263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:01:28.088269  543705 disk_info.go:196] parse disk info done, disk is : [0xc000254040 0xc000254080]
E0321 06:01:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:01:33.409792  543705 memory.go:184] no items to output this cycle
I0321 06:01:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 06:01:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:01:43.409797  543705 memory.go:191] Add success.
I0321 06:01:43.409797  543705 cpu.go:282] Add success.
I0321 06:01:43.420079  543705 net.go:648] Add success.
I0321 06:01:43.422713  543705 net.go:770] primary dev: ETH0
I0321 06:01:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:01:43.422741  543705 net.go:698] Add success.
I0321 06:01:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:01:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:01:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:01:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:01:53.409775  543705 memory.go:184] no items to output this cycle
I0321 06:01:53.409775  543705 cpu.go:275] no items to output this cycle
E0321 06:02:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:02:03.409780  543705 memory.go:184] no items to output this cycle
I0321 06:02:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 06:02:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:02:13.409791  543705 memory.go:191] Add success.
I0321 06:02:13.409790  543705 cpu.go:282] Add success.
W0321 06:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:02:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:02:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:02:13.420141  543705 net.go:648] Add success.
I0321 06:02:13.423168  543705 net.go:770] primary dev: ETH0
I0321 06:02:13.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:02:13.423198  543705 net.go:698] Add success.
W0321 06:02:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:02:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 06:02:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:02:14.456920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:02:14.456930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:02:14.456936  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:02:14.456996  543705 disk_worker.go:494] system disk:vda1
I0321 06:02:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:02:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:02:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:02:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:02:16.457893  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:02:16.457946  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:02:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:02:16.472287  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:02:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:02:23.409793  543705 memory.go:184] no items to output this cycle
I0321 06:02:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 06:02:28.089682  543705 disk_info.go:125] begin check local disk info of client
I0321 06:02:28.092143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:02:28.092151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 06:02:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:02:33.409809  543705 memory.go:184] no items to output this cycle
I0321 06:02:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 06:02:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:02:43.409797  543705 memory.go:191] Add success.
I0321 06:02:43.409801  543705 cpu.go:282] Add success.
I0321 06:02:43.419875  543705 net.go:648] Add success.
I0321 06:02:43.422362  543705 net.go:770] primary dev: ETH0
I0321 06:02:43.422374  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:02:43.422388  543705 net.go:698] Add success.
I0321 06:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:02:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:02:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:02:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:02:53.409768  543705 memory.go:184] no items to output this cycle
I0321 06:02:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 06:03:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:03:03.409773  543705 memory.go:184] no items to output this cycle
I0321 06:03:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 06:03:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:03:13.409794  543705 memory.go:191] Add success.
I0321 06:03:13.409797  543705 cpu.go:282] Add success.
W0321 06:03:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:03:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:03:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:03:13.420069  543705 net.go:648] Add success.
I0321 06:03:13.422778  543705 net.go:770] primary dev: ETH0
I0321 06:03:13.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:03:13.422803  543705 net.go:698] Add success.
I0321 06:03:13.469075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aa1466ec-d4b5-4334-8c29-5cdc931643cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:03:13.469107  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:03:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:03:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:03:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 06:03:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:03:14.457411  543705 disk_worker.go:494] system disk:vda1
I0321 06:03:14.457448  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:03:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:03:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:03:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:03:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:03:23.409766  543705 memory.go:184] no items to output this cycle
I0321 06:03:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 06:03:28.093675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:03:28.096131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:03:28.096138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e00 0xc0000c5e40]
E0321 06:03:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:03:33.409812  543705 memory.go:184] no items to output this cycle
I0321 06:03:33.409819  543705 cpu.go:275] no items to output this cycle
I0321 06:03:38.753734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:03:38.753740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:03:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:03:43.410562  543705 memory.go:191] Add success.
I0321 06:03:43.409804  543705 cpu.go:282] Add success.
I0321 06:03:43.420312  543705 net.go:648] Add success.
I0321 06:03:43.422747  543705 net.go:770] primary dev: ETH0
I0321 06:03:43.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:03:43.422775  543705 net.go:698] Add success.
I0321 06:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:03:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:03:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:03:53.409769  543705 memory.go:184] no items to output this cycle
I0321 06:03:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 06:04:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:04:03.409795  543705 memory.go:184] no items to output this cycle
I0321 06:04:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 06:04:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:04:13.409783  543705 memory.go:191] Add success.
I0321 06:04:13.409805  543705 cpu.go:282] Add success.
W0321 06:04:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:04:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:04:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:04:13.420152  543705 net.go:648] Add success.
I0321 06:04:13.423007  543705 net.go:770] primary dev: ETH0
I0321 06:04:13.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:04:13.423210  543705 net.go:698] Add success.
I0321 06:04:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:04:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:04:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 06:04:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:04:14.456647  543705 disk_worker.go:494] system disk:vda1
I0321 06:04:14.456676  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:04:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:04:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:04:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:04:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:04:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:04:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:04:23.409785  543705 memory.go:184] no items to output this cycle
I0321 06:04:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 06:04:28.097675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:04:28.100208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:04:28.100215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003523c0 0xc000352400]
E0321 06:04:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:04:33.409806  543705 memory.go:184] no items to output this cycle
I0321 06:04:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 06:04:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:04:43.409789  543705 memory.go:191] Add success.
I0321 06:04:43.409799  543705 cpu.go:282] Add success.
I0321 06:04:43.419912  543705 net.go:648] Add success.
I0321 06:04:43.422569  543705 net.go:770] primary dev: ETH0
I0321 06:04:43.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:04:43.422599  543705 net.go:698] Add success.
I0321 06:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:04:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:04:53.409772  543705 memory.go:184] no items to output this cycle
I0321 06:04:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 06:05:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:05:03.409798  543705 memory.go:184] no items to output this cycle
I0321 06:05:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 06:05:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:05:13.409815  543705 memory.go:191] Add success.
I0321 06:05:13.409821  543705 cpu.go:282] Add success.
W0321 06:05:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:05:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:05:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:05:13.420361  543705 net.go:648] Add success.
I0321 06:05:13.423083  543705 net.go:770] primary dev: ETH0
I0321 06:05:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:05:13.423107  543705 net.go:698] Add success.
I0321 06:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:05:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:05:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 06:05:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:05:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 06:05:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:05:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:05:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:05:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:05:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:05:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:05:23.409774  543705 memory.go:184] no items to output this cycle
I0321 06:05:23.409774  543705 cpu.go:275] no items to output this cycle
I0321 06:05:28.101672  543705 disk_info.go:125] begin check local disk info of client
I0321 06:05:28.104121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:05:28.104127  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039b8c0 0xc00039b900]
E0321 06:05:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:05:33.409780  543705 memory.go:184] no items to output this cycle
I0321 06:05:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 06:05:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:05:43.409793  543705 memory.go:191] Add success.
I0321 06:05:43.409796  543705 cpu.go:282] Add success.
I0321 06:05:43.419881  543705 net.go:648] Add success.
I0321 06:05:43.422604  543705 net.go:770] primary dev: ETH0
I0321 06:05:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:05:43.422631  543705 net.go:698] Add success.
I0321 06:05:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:05:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:05:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:05:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:05:53.409765  543705 memory.go:184] no items to output this cycle
I0321 06:05:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 06:06:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:06:03.409768  543705 memory.go:184] no items to output this cycle
I0321 06:06:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 06:06:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:06:13.409813  543705 memory.go:191] Add success.
I0321 06:06:13.409823  543705 cpu.go:282] Add success.
W0321 06:06:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:06:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:06:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:06:13.419735  543705 net.go:648] Add success.
I0321 06:06:13.422297  543705 net.go:770] primary dev: ETH0
I0321 06:06:13.422311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:06:13.422322  543705 net.go:698] Add success.
I0321 06:06:13.468921  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"913fc689-eabc-4fae-8226-fbf66fecb588","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:06:13.468951  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:06:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:06:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:06:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 06:06:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:06:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 06:06:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:06:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:06:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:06:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:06:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:06:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:06:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:06:23.409771  543705 memory.go:184] no items to output this cycle
I0321 06:06:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 06:06:28.105676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:06:28.108175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:06:28.108182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa980 0xc0001fa9c0]
E0321 06:06:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:06:33.409774  543705 memory.go:184] no items to output this cycle
I0321 06:06:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 06:06:38.755484  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:06:38.755492  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:06:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:06:43.410644  543705 memory.go:191] Add success.
I0321 06:06:43.409806  543705 cpu.go:282] Add success.
I0321 06:06:43.420357  543705 net.go:648] Add success.
I0321 06:06:43.422812  543705 net.go:770] primary dev: ETH0
I0321 06:06:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:06:43.422839  543705 net.go:698] Add success.
I0321 06:06:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:06:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:06:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:06:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:06:53.409770  543705 memory.go:184] no items to output this cycle
I0321 06:06:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 06:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:07:03.409776  543705 cpu.go:275] no items to output this cycle
I0321 06:07:03.409780  543705 memory.go:184] no items to output this cycle
E0321 06:07:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:07:13.409802  543705 cpu.go:282] Add success.
I0321 06:07:13.409807  543705 memory.go:191] Add success.
W0321 06:07:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:07:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:07:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:07:13.420132  543705 net.go:648] Add success.
I0321 06:07:13.422877  543705 net.go:770] primary dev: ETH0
I0321 06:07:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:07:13.422901  543705 net.go:698] Add success.
I0321 06:07:13.453421  543705 event_worker.go:152] Polling the log file for events...
W0321 06:07:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:07:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 06:07:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:07:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:07:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:07:14.455898  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:07:14.456621  543705 disk_worker.go:494] system disk:vda1
I0321 06:07:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:07:15.456846  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:07:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:07:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:07:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:07:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:07:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:07:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:07:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:07:23.409768  543705 memory.go:184] no items to output this cycle
I0321 06:07:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 06:07:28.109676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:07:28.112129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:07:28.112135  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315840 0xc000315880]
E0321 06:07:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:07:33.409811  543705 memory.go:184] no items to output this cycle
I0321 06:07:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 06:07:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:07:43.409795  543705 cpu.go:282] Add success.
I0321 06:07:43.409799  543705 memory.go:191] Add success.
I0321 06:07:43.419980  543705 net.go:648] Add success.
I0321 06:07:43.422608  543705 net.go:770] primary dev: ETH0
I0321 06:07:43.422621  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:07:43.422635  543705 net.go:698] Add success.
I0321 06:07:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:07:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:07:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:07:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:07:53.409778  543705 memory.go:184] no items to output this cycle
I0321 06:07:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 06:08:03.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:08:03.409912  543705 memory.go:184] no items to output this cycle
I0321 06:08:03.409910  543705 cpu.go:275] no items to output this cycle
E0321 06:08:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:08:13.409778  543705 memory.go:191] Add success.
W0321 06:08:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:08:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:08:13.409814  543705 cpu.go:282] Add success.
I0321 06:08:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:08:13.420143  543705 net.go:648] Add success.
I0321 06:08:13.422887  543705 net.go:770] primary dev: ETH0
I0321 06:08:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:08:13.422915  543705 net.go:698] Add success.
I0321 06:08:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:08:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:08:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 06:08:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:08:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 06:08:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:08:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:08:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:08:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:08:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:08:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:08:23.409772  543705 memory.go:184] no items to output this cycle
I0321 06:08:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 06:08:28.113670  543705 disk_info.go:125] begin check local disk info of client
I0321 06:08:28.116233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:08:28.116239  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0040 0xc0003c0080]
E0321 06:08:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:08:33.409786  543705 memory.go:184] no items to output this cycle
I0321 06:08:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 06:08:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:08:43.409821  543705 memory.go:191] Add success.
I0321 06:08:43.409824  543705 cpu.go:282] Add success.
I0321 06:08:43.419906  543705 net.go:648] Add success.
I0321 06:08:43.422553  543705 net.go:770] primary dev: ETH0
I0321 06:08:43.422567  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:08:43.422580  543705 net.go:698] Add success.
I0321 06:08:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:08:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:08:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:08:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:08:53.409768  543705 memory.go:184] no items to output this cycle
I0321 06:08:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 06:09:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:09:03.409810  543705 memory.go:184] no items to output this cycle
I0321 06:09:03.409822  543705 cpu.go:275] no items to output this cycle
E0321 06:09:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:09:13.409778  543705 memory.go:191] Add success.
W0321 06:09:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 06:09:13.409809  543705 cpu.go:282] Add success.
W0321 06:09:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:09:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:09:13.420094  543705 net.go:648] Add success.
I0321 06:09:13.422754  543705 net.go:770] primary dev: ETH0
I0321 06:09:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:09:13.422777  543705 net.go:698] Add success.
I0321 06:09:13.469688  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ad3f746b-ff53-434c-b44f-8395d4adbd9f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:09:13.469721  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:09:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:09:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 06:09:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:09:14.456603  543705 disk_worker.go:494] system disk:vda1
I0321 06:09:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:09:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:09:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:09:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:09:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:09:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:09:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:09:23.409798  543705 memory.go:184] no items to output this cycle
I0321 06:09:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 06:09:28.117673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:09:28.120228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:09:28.120234  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352c40 0xc000352c80]
E0321 06:09:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:09:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 06:09:33.409796  543705 memory.go:184] no items to output this cycle
I0321 06:09:38.756490  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:09:38.756497  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:09:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:09:43.410614  543705 memory.go:191] Add success.
I0321 06:09:43.409805  543705 cpu.go:282] Add success.
I0321 06:09:43.420306  543705 net.go:648] Add success.
I0321 06:09:43.422907  543705 net.go:770] primary dev: ETH0
I0321 06:09:43.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:09:43.422934  543705 net.go:698] Add success.
I0321 06:09:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:09:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:09:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:09:53.409776  543705 memory.go:184] no items to output this cycle
I0321 06:09:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 06:10:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:10:03.409792  543705 memory.go:184] no items to output this cycle
I0321 06:10:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 06:10:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:10:13.409805  543705 cpu.go:282] Add success.
I0321 06:10:13.409811  543705 memory.go:191] Add success.
W0321 06:10:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:10:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:10:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:10:13.420095  543705 net.go:648] Add success.
I0321 06:10:13.423015  543705 net.go:770] primary dev: ETH0
I0321 06:10:13.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:10:13.423040  543705 net.go:698] Add success.
I0321 06:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:10:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:10:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 06:10:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:10:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 06:10:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:10:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:10:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:10:23.409768  543705 memory.go:184] no items to output this cycle
I0321 06:10:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 06:10:28.121672  543705 disk_info.go:125] begin check local disk info of client
I0321 06:10:28.124134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:10:28.124140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a88c0 0xc0002a8900]
E0321 06:10:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:10:33.409783  543705 memory.go:184] no items to output this cycle
I0321 06:10:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 06:10:43.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:10:43.409852  543705 memory.go:191] Add success.
I0321 06:10:43.409853  543705 cpu.go:282] Add success.
I0321 06:10:43.420453  543705 net.go:648] Add success.
I0321 06:10:43.423030  543705 net.go:770] primary dev: ETH0
I0321 06:10:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:10:43.423053  543705 net.go:698] Add success.
I0321 06:10:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:10:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:10:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:10:53.409776  543705 memory.go:184] no items to output this cycle
I0321 06:10:53.409776  543705 cpu.go:275] no items to output this cycle
E0321 06:11:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:11:03.409781  543705 memory.go:184] no items to output this cycle
I0321 06:11:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 06:11:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:11:13.409792  543705 memory.go:191] Add success.
I0321 06:11:13.409796  543705 cpu.go:282] Add success.
W0321 06:11:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:11:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:11:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:11:13.420041  543705 net.go:648] Add success.
I0321 06:11:13.423069  543705 net.go:770] primary dev: ETH0
I0321 06:11:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:11:13.423093  543705 net.go:698] Add success.
I0321 06:11:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:11:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:11:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 06:11:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:11:14.456527  543705 disk_worker.go:494] system disk:vda1
I0321 06:11:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:11:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:11:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:11:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:11:23.409770  543705 memory.go:184] no items to output this cycle
I0321 06:11:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 06:11:28.125670  543705 disk_info.go:125] begin check local disk info of client
I0321 06:11:28.128093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:11:28.128099  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b400 0xc00027b440]
E0321 06:11:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:11:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 06:11:33.409797  543705 memory.go:184] no items to output this cycle
E0321 06:11:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:11:43.409916  543705 memory.go:191] Add success.
I0321 06:11:43.409929  543705 cpu.go:282] Add success.
I0321 06:11:43.419711  543705 net.go:648] Add success.
I0321 06:11:43.422313  543705 net.go:770] primary dev: ETH0
I0321 06:11:43.422325  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:11:43.422337  543705 net.go:698] Add success.
I0321 06:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:11:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:11:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:11:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:11:53.409791  543705 memory.go:184] no items to output this cycle
I0321 06:11:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 06:12:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:12:03.409802  543705 memory.go:184] no items to output this cycle
I0321 06:12:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 06:12:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:12:13.409784  543705 memory.go:191] Add success.
I0321 06:12:13.409788  543705 cpu.go:282] Add success.
W0321 06:12:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:12:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:12:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:12:13.420404  543705 net.go:648] Add success.
I0321 06:12:13.422823  543705 net.go:770] primary dev: ETH0
I0321 06:12:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:12:13.422850  543705 net.go:698] Add success.
I0321 06:12:13.469264  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d789324d-0711-450e-b45b-7ac9445515d1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:12:13.469301  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 06:12:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:12:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0321 06:12:14.455149  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:12:14.456939  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:12:14.456948  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:12:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:12:14.456993  543705 disk_worker.go:494] system disk:vda1
I0321 06:12:14.457022  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:12:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:12:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 06:12:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:12:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:12:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:12:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:12:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:12:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:12:23.409771  543705 memory.go:184] no items to output this cycle
I0321 06:12:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 06:12:28.129670  543705 disk_info.go:125] begin check local disk info of client
I0321 06:12:28.132111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:12:28.132118  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b440 0xc00027b4c0]
E0321 06:12:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:12:33.409809  543705 memory.go:184] no items to output this cycle
I0321 06:12:33.409821  543705 cpu.go:275] no items to output this cycle
I0321 06:12:38.756635  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:12:38.756643  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:12:43.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:12:43.410793  543705 memory.go:191] Add success.
I0321 06:12:43.410000  543705 cpu.go:282] Add success.
I0321 06:12:43.419735  543705 net.go:648] Add success.
I0321 06:12:43.422408  543705 net.go:770] primary dev: ETH0
I0321 06:12:43.422433  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:12:43.422446  543705 net.go:698] Add success.
I0321 06:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:12:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:12:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:12:53.409774  543705 memory.go:184] no items to output this cycle
I0321 06:12:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 06:13:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:13:03.409774  543705 memory.go:184] no items to output this cycle
I0321 06:13:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 06:13:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:13:13.409806  543705 memory.go:191] Add success.
I0321 06:13:13.409814  543705 cpu.go:282] Add success.
W0321 06:13:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:13:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:13:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:13:13.420133  543705 net.go:648] Add success.
I0321 06:13:13.423089  543705 net.go:770] primary dev: ETH0
I0321 06:13:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:13:13.423117  543705 net.go:698] Add success.
I0321 06:13:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:13:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:13:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 06:13:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:13:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 06:13:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:13:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:13:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:13:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:13:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:13:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:13:23.409761  543705 memory.go:184] no items to output this cycle
I0321 06:13:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 06:13:28.133673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:13:28.136383  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:13:28.136390  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003687c0 0xc000368800]
E0321 06:13:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:13:33.409785  543705 memory.go:184] no items to output this cycle
I0321 06:13:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 06:13:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:13:43.409825  543705 memory.go:191] Add success.
I0321 06:13:43.409825  543705 cpu.go:282] Add success.
I0321 06:13:43.419743  543705 net.go:648] Add success.
I0321 06:13:43.422599  543705 net.go:770] primary dev: ETH0
I0321 06:13:43.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:13:43.422627  543705 net.go:698] Add success.
I0321 06:13:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:13:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:13:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:13:53.409775  543705 memory.go:184] no items to output this cycle
I0321 06:13:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 06:14:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:14:03.409777  543705 memory.go:184] no items to output this cycle
I0321 06:14:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 06:14:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:14:13.409809  543705 memory.go:191] Add success.
I0321 06:14:13.409812  543705 cpu.go:282] Add success.
W0321 06:14:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:14:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:14:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:14:13.420125  543705 net.go:648] Add success.
I0321 06:14:13.423272  543705 net.go:770] primary dev: ETH0
I0321 06:14:13.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:14:13.423296  543705 net.go:698] Add success.
I0321 06:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:14:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:14:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 06:14:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:14:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 06:14:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:14:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:14:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:14:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:14:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:14:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:14:23.409794  543705 memory.go:184] no items to output this cycle
I0321 06:14:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 06:14:28.137678  543705 disk_info.go:125] begin check local disk info of client
I0321 06:14:28.140153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:14:28.140159  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024fdc0 0xc00024fe00]
E0321 06:14:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:14:33.409805  543705 memory.go:184] no items to output this cycle
I0321 06:14:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 06:14:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:14:43.409785  543705 memory.go:191] Add success.
I0321 06:14:43.409817  543705 cpu.go:282] Add success.
I0321 06:14:43.420031  543705 net.go:648] Add success.
I0321 06:14:43.422969  543705 net.go:770] primary dev: ETH0
I0321 06:14:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:14:43.422999  543705 net.go:698] Add success.
I0321 06:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:14:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:14:53.409762  543705 memory.go:184] no items to output this cycle
I0321 06:14:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 06:15:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:15:03.409781  543705 memory.go:184] no items to output this cycle
I0321 06:15:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 06:15:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:15:13.409786  543705 memory.go:191] Add success.
I0321 06:15:13.409800  543705 cpu.go:282] Add success.
W0321 06:15:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:15:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:15:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:15:13.420108  543705 net.go:648] Add success.
I0321 06:15:13.423280  543705 net.go:770] primary dev: ETH0
I0321 06:15:13.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:15:13.423309  543705 net.go:698] Add success.
I0321 06:15:13.467960  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eda16b78-f473-4852-9006-7dad1e754833","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:15:13.467995  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:15:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:15:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:15:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 06:15:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:15:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 06:15:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:15:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:15:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:15:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:15:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:15:23.409774  543705 cpu.go:275] no items to output this cycle
I0321 06:15:23.409777  543705 memory.go:184] no items to output this cycle
I0321 06:15:28.141679  543705 disk_info.go:125] begin check local disk info of client
I0321 06:15:28.144134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:15:28.144141  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051cf00 0xc00051cf40]
E0321 06:15:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:15:33.409814  543705 memory.go:184] no items to output this cycle
I0321 06:15:33.409826  543705 cpu.go:275] no items to output this cycle
I0321 06:15:38.757478  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:15:38.757485  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:15:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:15:43.410555  543705 memory.go:191] Add success.
I0321 06:15:43.409909  543705 cpu.go:282] Add success.
I0321 06:15:43.419731  543705 net.go:648] Add success.
I0321 06:15:43.422221  543705 net.go:770] primary dev: ETH0
I0321 06:15:43.422234  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:15:43.422246  543705 net.go:698] Add success.
I0321 06:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:15:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:15:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:15:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:15:53.409768  543705 memory.go:184] no items to output this cycle
I0321 06:15:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 06:16:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:16:03.409804  543705 memory.go:184] no items to output this cycle
I0321 06:16:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 06:16:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:16:13.409785  543705 memory.go:191] Add success.
I0321 06:16:13.409807  543705 cpu.go:282] Add success.
W0321 06:16:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:16:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:16:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:16:13.420060  543705 net.go:648] Add success.
I0321 06:16:13.422579  543705 net.go:770] primary dev: ETH0
I0321 06:16:13.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:16:13.422604  543705 net.go:698] Add success.
I0321 06:16:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:16:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:16:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 06:16:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:16:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 06:16:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:16:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:16:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:16:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:16:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:16:23.409781  543705 memory.go:184] no items to output this cycle
I0321 06:16:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 06:16:28.145684  543705 disk_info.go:125] begin check local disk info of client
I0321 06:16:28.148182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:16:28.148189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f500 0xc00037f540]
E0321 06:16:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:16:33.409819  543705 memory.go:184] no items to output this cycle
I0321 06:16:33.409835  543705 cpu.go:275] no items to output this cycle
E0321 06:16:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:16:43.409800  543705 memory.go:191] Add success.
I0321 06:16:43.409801  543705 cpu.go:282] Add success.
I0321 06:16:43.419729  543705 net.go:648] Add success.
I0321 06:16:43.422207  543705 net.go:770] primary dev: ETH0
I0321 06:16:43.422221  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:16:43.422232  543705 net.go:698] Add success.
I0321 06:16:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:16:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:16:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:16:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:16:53.409779  543705 memory.go:184] no items to output this cycle
I0321 06:16:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 06:17:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:17:03.409788  543705 memory.go:184] no items to output this cycle
I0321 06:17:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 06:17:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:17:13.409815  543705 memory.go:191] Add success.
I0321 06:17:13.409818  543705 cpu.go:282] Add success.
W0321 06:17:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:17:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:17:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:17:13.420103  543705 net.go:648] Add success.
I0321 06:17:13.422773  543705 net.go:770] primary dev: ETH0
I0321 06:17:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:17:13.422797  543705 net.go:698] Add success.
I0321 06:17:13.453346  543705 event_worker.go:152] Polling the log file for events...
W0321 06:17:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:17:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 06:17:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:17:14.456920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:17:14.456930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:17:14.456936  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:17:14.456982  543705 disk_worker.go:494] system disk:vda1
I0321 06:17:14.457026  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:17:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:17:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:17:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:17:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:17:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:17:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:17:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:17:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:17:23.409802  543705 memory.go:184] no items to output this cycle
I0321 06:17:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 06:17:28.149672  543705 disk_info.go:125] begin check local disk info of client
I0321 06:17:28.152221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:17:28.152227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bed80 0xc0002bedc0]
E0321 06:17:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:17:33.409796  543705 memory.go:184] no items to output this cycle
I0321 06:17:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 06:17:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:17:43.409800  543705 memory.go:191] Add success.
I0321 06:17:43.409801  543705 cpu.go:282] Add success.
I0321 06:17:43.419710  543705 net.go:648] Add success.
I0321 06:17:43.422189  543705 net.go:770] primary dev: ETH0
I0321 06:17:43.422202  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:17:43.422214  543705 net.go:698] Add success.
I0321 06:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:17:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:17:53.410615  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:17:53.410630  543705 memory.go:184] no items to output this cycle
I0321 06:17:53.410634  543705 cpu.go:275] no items to output this cycle
E0321 06:18:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:18:03.409778  543705 memory.go:184] no items to output this cycle
I0321 06:18:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 06:18:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:18:13.409817  543705 memory.go:191] Add success.
I0321 06:18:13.409828  543705 cpu.go:282] Add success.
W0321 06:18:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:18:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:18:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:18:13.420108  543705 net.go:648] Add success.
I0321 06:18:13.422716  543705 net.go:770] primary dev: ETH0
I0321 06:18:13.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:18:13.422745  543705 net.go:698] Add success.
I0321 06:18:13.509909  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7f6ae779-77c3-4867-a4c3-14c6a3d6ecaa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:18:13.509943  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:18:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:18:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 06:18:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:18:14.456663  543705 disk_worker.go:494] system disk:vda1
I0321 06:18:14.456693  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:18:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:18:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:18:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:18:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:18:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:18:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:18:23.409771  543705 memory.go:184] no items to output this cycle
I0321 06:18:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 06:18:28.153676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:18:28.156156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:18:28.156162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9200 0xc0004a9240]
E0321 06:18:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:18:33.409806  543705 memory.go:184] no items to output this cycle
I0321 06:18:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 06:18:38.757741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:18:38.757748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:18:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:18:43.410643  543705 memory.go:191] Add success.
I0321 06:18:43.409829  543705 cpu.go:282] Add success.
I0321 06:18:43.420578  543705 net.go:648] Add success.
I0321 06:18:43.423303  543705 net.go:770] primary dev: ETH0
I0321 06:18:43.423316  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:18:43.423328  543705 net.go:698] Add success.
I0321 06:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:18:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:18:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:18:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:18:53.409793  543705 memory.go:184] no items to output this cycle
I0321 06:18:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 06:19:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:19:03.409808  543705 memory.go:184] no items to output this cycle
I0321 06:19:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 06:19:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:19:13.409775  543705 memory.go:191] Add success.
I0321 06:19:13.409796  543705 cpu.go:282] Add success.
W0321 06:19:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:19:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:19:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:19:13.420107  543705 net.go:648] Add success.
I0321 06:19:13.423078  543705 net.go:770] primary dev: ETH0
I0321 06:19:13.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:19:13.423103  543705 net.go:698] Add success.
I0321 06:19:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:19:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:19:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 06:19:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:19:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 06:19:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:19:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:19:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:19:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:19:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:19:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:19:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:19:23.409793  543705 memory.go:184] no items to output this cycle
I0321 06:19:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 06:19:28.157673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:19:28.160213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:19:28.160220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb900 0xc0001fb940]
E0321 06:19:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:19:33.409789  543705 memory.go:184] no items to output this cycle
I0321 06:19:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 06:19:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:19:43.409792  543705 memory.go:191] Add success.
I0321 06:19:43.409793  543705 cpu.go:282] Add success.
I0321 06:19:43.420193  543705 net.go:648] Add success.
I0321 06:19:43.422858  543705 net.go:770] primary dev: ETH0
I0321 06:19:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:19:43.422887  543705 net.go:698] Add success.
I0321 06:19:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:19:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:19:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:19:53.409792  543705 memory.go:184] no items to output this cycle
I0321 06:19:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 06:20:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:20:03.409784  543705 memory.go:184] no items to output this cycle
I0321 06:20:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 06:20:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:20:13.409789  543705 cpu.go:282] Add success.
I0321 06:20:13.409797  543705 memory.go:191] Add success.
W0321 06:20:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:20:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:20:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:20:13.420060  543705 net.go:648] Add success.
I0321 06:20:13.422661  543705 net.go:770] primary dev: ETH0
I0321 06:20:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:20:13.422688  543705 net.go:698] Add success.
I0321 06:20:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:20:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:20:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 06:20:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:20:14.456506  543705 disk_worker.go:494] system disk:vda1
I0321 06:20:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:20:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:20:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:20:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:20:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:20:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:20:23.409790  543705 memory.go:184] no items to output this cycle
I0321 06:20:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 06:20:28.161675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:20:28.164181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:20:28.164187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fadc0 0xc0001fae40]
E0321 06:20:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:20:33.409806  543705 memory.go:184] no items to output this cycle
I0321 06:20:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 06:20:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:20:43.409784  543705 memory.go:191] Add success.
I0321 06:20:43.409811  543705 cpu.go:282] Add success.
I0321 06:20:43.420113  543705 net.go:648] Add success.
I0321 06:20:43.422797  543705 net.go:770] primary dev: ETH0
I0321 06:20:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:20:43.422825  543705 net.go:698] Add success.
I0321 06:20:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:20:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 06:20:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:20:53.409811  543705 memory.go:184] no items to output this cycle
E0321 06:21:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:21:03.409784  543705 memory.go:184] no items to output this cycle
I0321 06:21:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 06:21:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:21:13.409806  543705 memory.go:191] Add success.
I0321 06:21:13.409817  543705 cpu.go:282] Add success.
W0321 06:21:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:21:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:21:13.420196  543705 net.go:648] Add success.
I0321 06:21:13.423023  543705 net.go:770] primary dev: ETH0
I0321 06:21:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:21:13.423049  543705 net.go:698] Add success.
I0321 06:21:13.463658  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dc237d83-118a-4663-a948-eae60f23add4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:21:13.463698  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:21:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:21:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:21:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 06:21:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:21:14.456478  543705 disk_worker.go:494] system disk:vda1
I0321 06:21:14.456506  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:21:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:21:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:21:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:21:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:21:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:21:23.409773  543705 memory.go:184] no items to output this cycle
I0321 06:21:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 06:21:28.165672  543705 disk_info.go:125] begin check local disk info of client
I0321 06:21:28.168205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:21:28.168210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab180 0xc0001ab1c0]
E0321 06:21:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:21:33.409779  543705 memory.go:184] no items to output this cycle
I0321 06:21:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 06:21:38.759500  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:21:38.759508  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:21:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:21:43.409815  543705 cpu.go:282] Add success.
I0321 06:21:43.410656  543705 memory.go:191] Add success.
I0321 06:21:43.419734  543705 net.go:648] Add success.
I0321 06:21:43.422048  543705 net.go:770] primary dev: ETH0
I0321 06:21:43.422062  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:21:43.422075  543705 net.go:698] Add success.
I0321 06:21:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:21:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:21:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:21:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:21:53.409784  543705 memory.go:184] no items to output this cycle
I0321 06:21:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 06:22:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:22:03.409774  543705 memory.go:184] no items to output this cycle
I0321 06:22:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 06:22:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:22:13.409807  543705 memory.go:191] Add success.
I0321 06:22:13.409811  543705 cpu.go:282] Add success.
W0321 06:22:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:22:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:22:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:22:13.420054  543705 net.go:648] Add success.
I0321 06:22:13.422785  543705 net.go:770] primary dev: ETH0
I0321 06:22:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:22:13.422809  543705 net.go:698] Add success.
W0321 06:22:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:22:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 06:22:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:22:14.456038  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:22:14.456045  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:22:14.456050  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:22:14.456759  543705 disk_worker.go:494] system disk:vda1
I0321 06:22:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:22:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:22:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:22:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:22:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:22:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:22:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:22:16.472312  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:22:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:22:23.409794  543705 memory.go:184] no items to output this cycle
I0321 06:22:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 06:22:28.169673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:22:28.172196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:22:28.172202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6ec0 0xc0002b6f00]
E0321 06:22:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:22:33.409804  543705 memory.go:184] no items to output this cycle
I0321 06:22:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 06:22:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:22:43.409799  543705 memory.go:191] Add success.
I0321 06:22:43.409815  543705 cpu.go:282] Add success.
I0321 06:22:43.420017  543705 net.go:648] Add success.
I0321 06:22:43.422663  543705 net.go:770] primary dev: ETH0
I0321 06:22:43.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:22:43.422692  543705 net.go:698] Add success.
I0321 06:22:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:22:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:22:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:22:53.409772  543705 memory.go:184] no items to output this cycle
I0321 06:22:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 06:23:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:23:03.409774  543705 memory.go:184] no items to output this cycle
I0321 06:23:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 06:23:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:23:13.409807  543705 memory.go:191] Add success.
I0321 06:23:13.409813  543705 cpu.go:282] Add success.
W0321 06:23:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:23:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:23:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:23:13.420051  543705 net.go:648] Add success.
I0321 06:23:13.422663  543705 net.go:770] primary dev: ETH0
I0321 06:23:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:23:13.422688  543705 net.go:698] Add success.
I0321 06:23:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:23:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:23:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 06:23:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:23:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 06:23:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:23:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:23:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:23:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:23:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:23:23.409762  543705 memory.go:184] no items to output this cycle
I0321 06:23:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 06:23:28.173673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:23:28.176112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:23:28.176119  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003531c0 0xc000353200]
E0321 06:23:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:23:33.409901  543705 memory.go:184] no items to output this cycle
I0321 06:23:33.409953  543705 cpu.go:275] no items to output this cycle
E0321 06:23:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:23:43.409789  543705 memory.go:191] Add success.
I0321 06:23:43.409798  543705 cpu.go:282] Add success.
I0321 06:23:43.419859  543705 net.go:648] Add success.
I0321 06:23:43.423079  543705 net.go:770] primary dev: ETH0
I0321 06:23:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:23:43.423109  543705 net.go:698] Add success.
I0321 06:23:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:23:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:23:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:23:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:23:53.409773  543705 memory.go:184] no items to output this cycle
I0321 06:23:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 06:24:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:24:03.409803  543705 memory.go:184] no items to output this cycle
I0321 06:24:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 06:24:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:24:13.409781  543705 memory.go:191] Add success.
I0321 06:24:13.409800  543705 cpu.go:282] Add success.
W0321 06:24:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:24:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:24:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:24:13.420112  543705 net.go:648] Add success.
I0321 06:24:13.422924  543705 net.go:770] primary dev: ETH0
I0321 06:24:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:24:13.422953  543705 net.go:698] Add success.
I0321 06:24:13.464598  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"df4e3224-9255-4468-bf2d-4ede4574f019","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:24:13.464631  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:24:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:24:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:24:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 06:24:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:24:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 06:24:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:24:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:24:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:24:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:24:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:24:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:24:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:24:23.409765  543705 memory.go:184] no items to output this cycle
I0321 06:24:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 06:24:28.177672  543705 disk_info.go:125] begin check local disk info of client
I0321 06:24:28.180114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:24:28.180120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be680 0xc0003be6c0]
E0321 06:24:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:24:33.409907  543705 memory.go:184] no items to output this cycle
I0321 06:24:33.409969  543705 cpu.go:275] no items to output this cycle
I0321 06:24:38.759695  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:24:38.759702  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:24:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:24:43.410689  543705 memory.go:191] Add success.
I0321 06:24:43.409799  543705 cpu.go:282] Add success.
I0321 06:24:43.420419  543705 net.go:648] Add success.
I0321 06:24:43.423214  543705 net.go:770] primary dev: ETH0
I0321 06:24:43.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:24:43.423243  543705 net.go:698] Add success.
I0321 06:24:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:24:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:24:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:24:53.409790  543705 memory.go:184] no items to output this cycle
I0321 06:24:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 06:25:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:25:03.409793  543705 memory.go:184] no items to output this cycle
I0321 06:25:03.409807  543705 cpu.go:275] no items to output this cycle
W0321 06:25:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:25:13.409723  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:25:13.409728  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:25:13.409792  543705 cpu.go:282] Add success.
E0321 06:25:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:25:13.409836  543705 memory.go:191] Add success.
I0321 06:25:13.420076  543705 net.go:648] Add success.
I0321 06:25:13.422820  543705 net.go:770] primary dev: ETH0
I0321 06:25:13.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:25:13.422848  543705 net.go:698] Add success.
I0321 06:25:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:25:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:25:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 06:25:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:25:14.456468  543705 disk_worker.go:494] system disk:vda1
I0321 06:25:14.456512  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:25:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:25:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:25:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:25:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:25:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:25:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:25:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 06:25:23.409783  543705 memory.go:184] no items to output this cycle
I0321 06:25:28.181680  543705 disk_info.go:125] begin check local disk info of client
I0321 06:25:28.184184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:25:28.184190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375840 0xc000375880]
E0321 06:25:33.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:25:33.409892  543705 memory.go:184] no items to output this cycle
I0321 06:25:33.409895  543705 cpu.go:275] no items to output this cycle
E0321 06:25:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:25:43.409783  543705 memory.go:191] Add success.
I0321 06:25:43.409810  543705 cpu.go:282] Add success.
I0321 06:25:43.419828  543705 net.go:648] Add success.
I0321 06:25:43.422848  543705 net.go:770] primary dev: ETH0
I0321 06:25:43.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:25:43.422874  543705 net.go:698] Add success.
I0321 06:25:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:25:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:25:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:25:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:25:53.409796  543705 memory.go:184] no items to output this cycle
I0321 06:25:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 06:26:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:26:03.409800  543705 memory.go:184] no items to output this cycle
I0321 06:26:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 06:26:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:26:13.409784  543705 memory.go:191] Add success.
I0321 06:26:13.409802  543705 cpu.go:282] Add success.
W0321 06:26:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:26:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:26:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:26:13.420136  543705 net.go:648] Add success.
I0321 06:26:13.423121  543705 net.go:770] primary dev: ETH0
I0321 06:26:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:26:13.423150  543705 net.go:698] Add success.
I0321 06:26:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:26:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:26:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 06:26:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:26:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 06:26:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:26:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:26:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:26:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:26:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:26:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:26:23.409778  543705 memory.go:184] no items to output this cycle
I0321 06:26:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 06:26:28.185675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:26:28.188130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:26:28.188137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf840 0xc0002bf880]
E0321 06:26:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:26:33.409782  543705 memory.go:184] no items to output this cycle
I0321 06:26:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 06:26:43.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:26:43.409892  543705 memory.go:191] Add success.
I0321 06:26:43.410044  543705 cpu.go:282] Add success.
I0321 06:26:43.419748  543705 net.go:648] Add success.
I0321 06:26:43.422416  543705 net.go:770] primary dev: ETH0
I0321 06:26:43.422429  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:26:43.422440  543705 net.go:698] Add success.
I0321 06:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:26:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:26:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:26:53.409785  543705 memory.go:184] no items to output this cycle
I0321 06:26:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 06:27:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:27:03.409776  543705 memory.go:184] no items to output this cycle
I0321 06:27:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 06:27:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:27:13.409784  543705 memory.go:191] Add success.
I0321 06:27:13.409787  543705 cpu.go:282] Add success.
W0321 06:27:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:27:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:27:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:27:13.420070  543705 net.go:648] Add success.
I0321 06:27:13.422903  543705 net.go:770] primary dev: ETH0
I0321 06:27:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:27:13.422930  543705 net.go:698] Add success.
I0321 06:27:13.429610  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 06:27:13.452791  543705 event_worker.go:152] Polling the log file for events...
I0321 06:27:13.468724  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"098bb49e-6a6b-45c5-96b0-316e00868f0b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:27:13.468758  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 06:27:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:27:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 06:27:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:27:14.455920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:27:14.455929  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:27:14.455935  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:27:14.456785  543705 disk_worker.go:494] system disk:vda1
I0321 06:27:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:27:15.456765  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:27:15.456773  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:27:16.457891  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:27:16.457891  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:27:16.457945  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:27:16.457964  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:27:16.472299  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:27:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:27:23.409789  543705 memory.go:184] no items to output this cycle
I0321 06:27:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 06:27:28.189673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:27:28.192179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:27:28.192184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ca480 0xc0004ca4c0]
E0321 06:27:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:27:33.409909  543705 memory.go:184] no items to output this cycle
I0321 06:27:33.409909  543705 cpu.go:275] no items to output this cycle
I0321 06:27:38.761508  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:27:38.761514  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:27:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:27:43.410705  543705 memory.go:191] Add success.
I0321 06:27:43.409807  543705 cpu.go:282] Add success.
I0321 06:27:43.420405  543705 net.go:648] Add success.
I0321 06:27:43.423793  543705 net.go:770] primary dev: ETH0
I0321 06:27:43.423805  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:27:43.423817  543705 net.go:698] Add success.
I0321 06:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:27:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:27:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:27:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:27:53.409776  543705 memory.go:184] no items to output this cycle
I0321 06:27:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 06:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:28:03.409781  543705 memory.go:184] no items to output this cycle
I0321 06:28:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 06:28:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:28:13.409790  543705 memory.go:191] Add success.
I0321 06:28:13.409793  543705 cpu.go:282] Add success.
W0321 06:28:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:28:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:28:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:28:13.420041  543705 net.go:648] Add success.
I0321 06:28:13.422522  543705 net.go:770] primary dev: ETH0
I0321 06:28:13.422535  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:28:13.422548  543705 net.go:698] Add success.
I0321 06:28:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:28:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:28:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 06:28:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:28:14.456522  543705 disk_worker.go:494] system disk:vda1
I0321 06:28:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:28:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:28:16.458040  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:28:16.458102  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:28:16.458123  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:28:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:28:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:28:23.409769  543705 memory.go:184] no items to output this cycle
I0321 06:28:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 06:28:28.193674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:28:28.196235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:28:28.196243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dae40 0xc0003dae80]
E0321 06:28:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:28:33.409807  543705 memory.go:184] no items to output this cycle
I0321 06:28:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 06:28:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:28:43.409791  543705 memory.go:191] Add success.
I0321 06:28:43.409808  543705 cpu.go:282] Add success.
I0321 06:28:43.420263  543705 net.go:648] Add success.
I0321 06:28:43.422989  543705 net.go:770] primary dev: ETH0
I0321 06:28:43.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:28:43.423014  543705 net.go:698] Add success.
I0321 06:28:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:28:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:28:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:28:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:28:53.409778  543705 memory.go:184] no items to output this cycle
I0321 06:28:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 06:29:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:29:03.409787  543705 memory.go:184] no items to output this cycle
I0321 06:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 06:29:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:29:13.409780  543705 memory.go:191] Add success.
I0321 06:29:13.409802  543705 cpu.go:282] Add success.
W0321 06:29:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:29:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:29:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:29:13.420058  543705 net.go:648] Add success.
I0321 06:29:13.422728  543705 net.go:770] primary dev: ETH0
I0321 06:29:13.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:29:13.422753  543705 net.go:698] Add success.
I0321 06:29:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:29:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:29:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 06:29:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:29:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 06:29:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:29:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:29:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:29:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:29:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:29:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:29:23.409851  543705 memory.go:184] no items to output this cycle
I0321 06:29:23.409897  543705 cpu.go:275] no items to output this cycle
I0321 06:29:28.197675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:29:28.200208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:29:28.200215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac000 0xc0002ac040]
E0321 06:29:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:29:33.409775  543705 memory.go:184] no items to output this cycle
I0321 06:29:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 06:29:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:29:43.409784  543705 memory.go:191] Add success.
I0321 06:29:43.409809  543705 cpu.go:282] Add success.
I0321 06:29:43.419874  543705 net.go:648] Add success.
I0321 06:29:43.422733  543705 net.go:770] primary dev: ETH0
I0321 06:29:43.422746  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:29:43.422758  543705 net.go:698] Add success.
I0321 06:29:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:29:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:29:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:29:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:29:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 06:29:53.409779  543705 memory.go:184] no items to output this cycle
E0321 06:30:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:30:03.409772  543705 memory.go:184] no items to output this cycle
I0321 06:30:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 06:30:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:30:13.409780  543705 memory.go:191] Add success.
I0321 06:30:13.409798  543705 cpu.go:282] Add success.
W0321 06:30:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:30:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:30:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:30:13.420271  543705 net.go:648] Add success.
I0321 06:30:13.423073  543705 net.go:770] primary dev: ETH0
I0321 06:30:13.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:30:13.423107  543705 net.go:698] Add success.
I0321 06:30:13.576769  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"952b99a9-ade6-4352-82d2-72595bb25393","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:30:13.576803  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:30:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:30:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:30:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 06:30:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:30:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 06:30:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:30:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:30:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:30:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:30:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:30:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:30:23.409779  543705 memory.go:184] no items to output this cycle
I0321 06:30:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 06:30:28.201675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:30:28.204240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:30:28.204247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 06:30:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:30:33.409785  543705 memory.go:184] no items to output this cycle
I0321 06:30:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 06:30:38.761743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:30:38.761751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:30:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:30:43.410720  543705 memory.go:191] Add success.
I0321 06:30:43.409808  543705 cpu.go:282] Add success.
I0321 06:30:43.420414  543705 net.go:648] Add success.
I0321 06:30:43.423016  543705 net.go:770] primary dev: ETH0
I0321 06:30:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:30:43.423044  543705 net.go:698] Add success.
I0321 06:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:30:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:30:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:30:53.409798  543705 memory.go:184] no items to output this cycle
I0321 06:30:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 06:31:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:31:03.409774  543705 memory.go:184] no items to output this cycle
I0321 06:31:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 06:31:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:31:13.409785  543705 cpu.go:282] Add success.
I0321 06:31:13.409791  543705 memory.go:191] Add success.
W0321 06:31:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:31:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:31:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:31:13.420215  543705 net.go:648] Add success.
I0321 06:31:13.423433  543705 net.go:770] primary dev: ETH0
I0321 06:31:13.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:31:13.423460  543705 net.go:698] Add success.
I0321 06:31:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:31:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:31:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0321 06:31:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:31:14.456654  543705 disk_worker.go:494] system disk:vda1
I0321 06:31:14.456684  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:31:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:31:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:31:23.409797  543705 memory.go:184] no items to output this cycle
I0321 06:31:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 06:31:28.205687  543705 disk_info.go:125] begin check local disk info of client
I0321 06:31:28.208178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:31:28.208186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370500 0xc000370540]
E0321 06:31:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:31:33.409794  543705 memory.go:184] no items to output this cycle
I0321 06:31:33.409826  543705 cpu.go:275] no items to output this cycle
E0321 06:31:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:31:43.409808  543705 memory.go:191] Add success.
I0321 06:31:43.409819  543705 cpu.go:282] Add success.
I0321 06:31:43.420021  543705 net.go:648] Add success.
I0321 06:31:43.422827  543705 net.go:770] primary dev: ETH0
I0321 06:31:43.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:31:43.422857  543705 net.go:698] Add success.
I0321 06:31:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:31:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:31:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:31:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:31:53.409781  543705 memory.go:184] no items to output this cycle
I0321 06:31:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 06:32:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:32:03.409783  543705 memory.go:184] no items to output this cycle
I0321 06:32:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 06:32:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:32:13.409817  543705 memory.go:191] Add success.
I0321 06:32:13.409825  543705 cpu.go:282] Add success.
W0321 06:32:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:32:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:32:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:32:13.420299  543705 net.go:648] Add success.
I0321 06:32:13.423008  543705 net.go:770] primary dev: ETH0
I0321 06:32:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:32:13.423036  543705 net.go:698] Add success.
W0321 06:32:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:32:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 06:32:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:32:14.456797  543705 disk_worker.go:494] system disk:vda1
I0321 06:32:14.456836  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:32:14.457129  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:32:14.457136  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:32:14.457142  543705 custom_config.go:64] query custom config with name: gpu
E0321 06:32:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:32:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:32:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:32:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:32:16.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:32:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:32:16.472314  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:32:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:32:23.409803  543705 memory.go:184] no items to output this cycle
I0321 06:32:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 06:32:28.209675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:32:28.212225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:32:28.212231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000232240 0xc000232280]
E0321 06:32:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:32:33.409795  543705 memory.go:184] no items to output this cycle
I0321 06:32:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 06:32:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:32:43.409833  543705 memory.go:191] Add success.
I0321 06:32:43.409837  543705 cpu.go:282] Add success.
I0321 06:32:43.419999  543705 net.go:648] Add success.
I0321 06:32:43.422752  543705 net.go:770] primary dev: ETH0
I0321 06:32:43.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:32:43.422778  543705 net.go:698] Add success.
I0321 06:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:32:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:32:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:32:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:32:53.409779  543705 memory.go:184] no items to output this cycle
I0321 06:32:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 06:33:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:33:03.409793  543705 cpu.go:275] no items to output this cycle
I0321 06:33:03.409795  543705 memory.go:184] no items to output this cycle
E0321 06:33:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:33:13.409790  543705 memory.go:191] Add success.
I0321 06:33:13.409811  543705 cpu.go:282] Add success.
W0321 06:33:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:33:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:33:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:33:13.420247  543705 net.go:648] Add success.
I0321 06:33:13.423254  543705 net.go:770] primary dev: ETH0
I0321 06:33:13.423268  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:33:13.423283  543705 net.go:698] Add success.
I0321 06:33:13.469827  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c5d7c51-d3d2-4ef4-9329-786e9f076370","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:33:13.469860  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:33:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:33:14.455889  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:33:14.455970  543705 disk_worker.go:708] disk space is not compliant
W0321 06:33:14.455975  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:33:14.457842  543705 disk_worker.go:494] system disk:vda1
I0321 06:33:14.457882  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:33:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:33:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:33:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:33:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:33:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:33:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:33:23.409811  543705 memory.go:184] no items to output this cycle
I0321 06:33:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 06:33:28.213678  543705 disk_info.go:125] begin check local disk info of client
I0321 06:33:28.216243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:33:28.216250  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a640 0xc00039a680]
E0321 06:33:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:33:33.409770  543705 memory.go:184] no items to output this cycle
I0321 06:33:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 06:33:38.763510  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:33:38.763518  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:33:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:33:43.410763  543705 memory.go:191] Add success.
I0321 06:33:43.409863  543705 cpu.go:282] Add success.
I0321 06:33:43.420492  543705 net.go:648] Add success.
I0321 06:33:43.423251  543705 net.go:770] primary dev: ETH0
I0321 06:33:43.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:33:43.423278  543705 net.go:698] Add success.
I0321 06:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:33:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:33:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:33:53.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:33:53.410259  543705 memory.go:184] no items to output this cycle
I0321 06:33:53.410270  543705 cpu.go:275] no items to output this cycle
E0321 06:34:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:34:03.409789  543705 memory.go:184] no items to output this cycle
I0321 06:34:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 06:34:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:34:13.409787  543705 memory.go:191] Add success.
I0321 06:34:13.409790  543705 cpu.go:282] Add success.
W0321 06:34:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:34:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:34:13.420297  543705 net.go:648] Add success.
I0321 06:34:13.423489  543705 net.go:770] primary dev: ETH0
I0321 06:34:13.423504  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:34:13.423759  543705 net.go:698] Add success.
I0321 06:34:14.453952  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:34:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:34:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0321 06:34:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:34:14.456611  543705 disk_worker.go:494] system disk:vda1
I0321 06:34:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:34:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:34:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:34:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:34:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:34:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:34:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:34:23.409794  543705 memory.go:184] no items to output this cycle
I0321 06:34:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 06:34:28.217673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:34:28.220132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:34:28.220139  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f3c0 0xc00035f400]
E0321 06:34:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:34:33.409783  543705 memory.go:184] no items to output this cycle
I0321 06:34:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 06:34:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:34:43.409820  543705 memory.go:191] Add success.
I0321 06:34:43.409825  543705 cpu.go:282] Add success.
I0321 06:34:43.419889  543705 net.go:648] Add success.
I0321 06:34:43.422692  543705 net.go:770] primary dev: ETH0
I0321 06:34:43.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:34:43.422719  543705 net.go:698] Add success.
I0321 06:34:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:34:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:34:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 06:34:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:34:53.409809  543705 memory.go:184] no items to output this cycle
E0321 06:35:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:35:03.409788  543705 memory.go:184] no items to output this cycle
I0321 06:35:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 06:35:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:35:13.409895  543705 cpu.go:282] Add success.
I0321 06:35:13.409909  543705 memory.go:191] Add success.
W0321 06:35:13.409940  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:35:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:35:13.409961  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:35:13.419703  543705 net.go:648] Add success.
I0321 06:35:13.422572  543705 net.go:770] primary dev: ETH0
I0321 06:35:13.422585  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:35:13.422596  543705 net.go:698] Add success.
I0321 06:35:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:35:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:35:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 06:35:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:35:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 06:35:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:35:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:35:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:35:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:35:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:35:23.409780  543705 memory.go:184] no items to output this cycle
I0321 06:35:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 06:35:28.221676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:35:28.224164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:35:28.224171  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba00 0xc00007ba40]
E0321 06:35:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:35:33.409803  543705 memory.go:184] no items to output this cycle
I0321 06:35:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 06:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:35:43.409797  543705 cpu.go:282] Add success.
I0321 06:35:43.409801  543705 memory.go:191] Add success.
I0321 06:35:43.419851  543705 net.go:648] Add success.
I0321 06:35:43.422863  543705 net.go:770] primary dev: ETH0
I0321 06:35:43.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:35:43.422893  543705 net.go:698] Add success.
I0321 06:35:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:35:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:35:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:35:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:35:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 06:35:53.409787  543705 memory.go:184] no items to output this cycle
E0321 06:36:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:36:03.409903  543705 memory.go:184] no items to output this cycle
I0321 06:36:03.409948  543705 cpu.go:275] no items to output this cycle
E0321 06:36:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:36:13.409812  543705 cpu.go:282] Add success.
I0321 06:36:13.409819  543705 memory.go:191] Add success.
W0321 06:36:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:36:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:36:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:36:13.420144  543705 net.go:648] Add success.
I0321 06:36:13.422624  543705 net.go:770] primary dev: ETH0
I0321 06:36:13.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:36:13.422648  543705 net.go:698] Add success.
I0321 06:36:13.463678  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0cc539bb-d926-4b10-aac7-019773e3b12e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:36:13.463715  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:36:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:36:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:36:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 06:36:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:36:14.456676  543705 disk_worker.go:494] system disk:vda1
I0321 06:36:14.456705  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:36:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:36:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:36:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:36:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:36:23.409778  543705 memory.go:184] no items to output this cycle
I0321 06:36:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 06:36:28.225676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:36:28.228145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:36:28.228151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa1c0 0xc0001aa240]
E0321 06:36:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:36:33.409821  543705 memory.go:184] no items to output this cycle
I0321 06:36:33.409835  543705 cpu.go:275] no items to output this cycle
I0321 06:36:38.763661  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:36:38.763669  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:36:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:36:43.410719  543705 memory.go:191] Add success.
I0321 06:36:43.409833  543705 cpu.go:282] Add success.
I0321 06:36:43.420392  543705 net.go:648] Add success.
I0321 06:36:43.422892  543705 net.go:770] primary dev: ETH0
I0321 06:36:43.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:36:43.422918  543705 net.go:698] Add success.
I0321 06:36:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:36:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:36:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:36:53.409779  543705 cpu.go:275] no items to output this cycle
I0321 06:36:53.409789  543705 memory.go:184] no items to output this cycle
E0321 06:37:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:37:03.409798  543705 memory.go:184] no items to output this cycle
I0321 06:37:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 06:37:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:37:13.409816  543705 memory.go:191] Add success.
I0321 06:37:13.409828  543705 cpu.go:282] Add success.
W0321 06:37:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:37:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:37:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:37:13.420284  543705 net.go:648] Add success.
I0321 06:37:13.423436  543705 net.go:770] primary dev: ETH0
I0321 06:37:13.423450  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:37:13.423464  543705 net.go:698] Add success.
I0321 06:37:13.453009  543705 event_worker.go:152] Polling the log file for events...
W0321 06:37:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:37:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 06:37:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:37:14.457140  543705 disk_worker.go:494] system disk:vda1
E0321 06:37:14.457175  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:37:14.457183  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:37:14.457187  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:37:14.457187  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:37:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:37:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:37:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:37:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:37:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:37:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:37:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:37:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:37:23.409798  543705 memory.go:184] no items to output this cycle
I0321 06:37:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 06:37:28.229676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:37:28.232150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:37:28.232156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad40 0xc00007ad80]
E0321 06:37:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:37:33.409806  543705 memory.go:184] no items to output this cycle
I0321 06:37:33.409824  543705 cpu.go:275] no items to output this cycle
E0321 06:37:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:37:43.409791  543705 memory.go:191] Add success.
I0321 06:37:43.409794  543705 cpu.go:282] Add success.
I0321 06:37:43.420032  543705 net.go:648] Add success.
I0321 06:37:43.422813  543705 net.go:770] primary dev: ETH0
I0321 06:37:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:37:43.422839  543705 net.go:698] Add success.
I0321 06:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:37:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:37:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:37:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:37:53.409798  543705 memory.go:184] no items to output this cycle
I0321 06:37:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 06:38:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:38:03.409781  543705 memory.go:184] no items to output this cycle
I0321 06:38:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 06:38:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:38:13.409809  543705 memory.go:191] Add success.
I0321 06:38:13.409814  543705 cpu.go:282] Add success.
W0321 06:38:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:38:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:38:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:38:13.420147  543705 net.go:648] Add success.
I0321 06:38:13.423227  543705 net.go:770] primary dev: ETH0
I0321 06:38:13.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:38:13.423252  543705 net.go:698] Add success.
I0321 06:38:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:38:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:38:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 06:38:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:38:14.456471  543705 disk_worker.go:494] system disk:vda1
I0321 06:38:14.456500  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:38:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:38:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:38:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:38:23.409769  543705 memory.go:184] no items to output this cycle
I0321 06:38:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 06:38:28.233676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:38:28.236147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:38:28.236154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005e6600 0xc0005e6640]
E0321 06:38:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:38:33.409812  543705 memory.go:184] no items to output this cycle
I0321 06:38:33.409824  543705 cpu.go:275] no items to output this cycle
E0321 06:38:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:38:43.409833  543705 memory.go:191] Add success.
I0321 06:38:43.409835  543705 cpu.go:282] Add success.
I0321 06:38:43.419991  543705 net.go:648] Add success.
I0321 06:38:43.422720  543705 net.go:770] primary dev: ETH0
I0321 06:38:43.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:38:43.422746  543705 net.go:698] Add success.
I0321 06:38:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:38:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:38:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:38:53.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:38:53.409878  543705 cpu.go:275] no items to output this cycle
I0321 06:38:53.409910  543705 memory.go:184] no items to output this cycle
E0321 06:39:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:39:03.409786  543705 memory.go:184] no items to output this cycle
I0321 06:39:03.409803  543705 cpu.go:275] no items to output this cycle
W0321 06:39:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:39:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:39:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 06:39:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:39:13.409806  543705 cpu.go:282] Add success.
I0321 06:39:13.409818  543705 memory.go:191] Add success.
I0321 06:39:13.419868  543705 net.go:770] primary dev: ETH0
I0321 06:39:13.419883  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:39:13.419896  543705 net.go:698] Add success.
I0321 06:39:13.420275  543705 net.go:648] Add success.
I0321 06:39:13.469323  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c1d3feed-33f5-4ed6-8cb4-06371d71647c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:39:13.469356  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:39:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:39:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:39:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0321 06:39:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:39:14.456728  543705 disk_worker.go:494] system disk:vda1
I0321 06:39:14.456765  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:39:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:39:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:39:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:39:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:39:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:39:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:39:23.409771  543705 memory.go:184] no items to output this cycle
I0321 06:39:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 06:39:28.237678  543705 disk_info.go:125] begin check local disk info of client
I0321 06:39:28.240169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:39:28.240187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa640 0xc0001aa680]
E0321 06:39:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:39:33.409786  543705 memory.go:184] no items to output this cycle
I0321 06:39:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 06:39:38.765514  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:39:38.765522  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:39:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:39:43.410922  543705 memory.go:191] Add success.
I0321 06:39:43.409831  543705 cpu.go:282] Add success.
I0321 06:39:43.420808  543705 net.go:648] Add success.
I0321 06:39:43.423914  543705 net.go:770] primary dev: ETH0
I0321 06:39:43.423927  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:39:43.423939  543705 net.go:698] Add success.
I0321 06:39:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:39:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:39:53.410403  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:39:53.410418  543705 memory.go:184] no items to output this cycle
I0321 06:39:53.410417  543705 cpu.go:275] no items to output this cycle
E0321 06:40:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:40:03.409773  543705 memory.go:184] no items to output this cycle
I0321 06:40:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 06:40:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:40:13.409774  543705 memory.go:191] Add success.
W0321 06:40:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 06:40:13.409807  543705 cpu.go:282] Add success.
W0321 06:40:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:40:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:40:13.420046  543705 net.go:648] Add success.
I0321 06:40:13.422819  543705 net.go:770] primary dev: ETH0
I0321 06:40:13.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:40:13.422847  543705 net.go:698] Add success.
I0321 06:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:40:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:40:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 06:40:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:40:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 06:40:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:40:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:40:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:40:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:40:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:40:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:40:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:40:23.409791  543705 memory.go:184] no items to output this cycle
I0321 06:40:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 06:40:28.241675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:40:28.244155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:40:28.244161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb00 0xc0001abb40]
E0321 06:40:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:40:33.409807  543705 memory.go:184] no items to output this cycle
I0321 06:40:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 06:40:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:40:43.409886  543705 memory.go:191] Add success.
I0321 06:40:43.409933  543705 cpu.go:282] Add success.
I0321 06:40:43.419712  543705 net.go:648] Add success.
I0321 06:40:43.422459  543705 net.go:770] primary dev: ETH0
I0321 06:40:43.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:40:43.422483  543705 net.go:698] Add success.
I0321 06:40:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:40:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:40:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:40:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:40:53.409784  543705 memory.go:184] no items to output this cycle
I0321 06:40:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 06:41:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:41:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 06:41:03.409792  543705 memory.go:184] no items to output this cycle
E0321 06:41:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:41:13.409800  543705 memory.go:191] Add success.
I0321 06:41:13.409801  543705 cpu.go:282] Add success.
W0321 06:41:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:41:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:41:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:41:13.420256  543705 net.go:648] Add success.
I0321 06:41:13.423357  543705 net.go:770] primary dev: ETH0
I0321 06:41:13.423372  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:41:13.423385  543705 net.go:698] Add success.
I0321 06:41:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:41:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:41:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 06:41:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:41:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 06:41:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:41:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:41:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:41:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:41:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:41:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:41:23.409771  543705 memory.go:184] no items to output this cycle
I0321 06:41:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 06:41:28.245674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:41:28.248157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:41:28.248164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390f00 0xc000390f40]
E0321 06:41:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:41:33.409805  543705 memory.go:184] no items to output this cycle
I0321 06:41:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 06:41:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:41:43.409801  543705 memory.go:191] Add success.
I0321 06:41:43.409802  543705 cpu.go:282] Add success.
I0321 06:41:43.420049  543705 net.go:648] Add success.
I0321 06:41:43.423100  543705 net.go:770] primary dev: ETH0
I0321 06:41:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:41:43.423126  543705 net.go:698] Add success.
I0321 06:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:41:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:41:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:41:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:41:53.409807  543705 memory.go:184] no items to output this cycle
I0321 06:41:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 06:42:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:42:03.409776  543705 memory.go:184] no items to output this cycle
I0321 06:42:03.409777  543705 cpu.go:275] no items to output this cycle
E0321 06:42:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:42:13.409785  543705 memory.go:191] Add success.
I0321 06:42:13.409792  543705 cpu.go:282] Add success.
W0321 06:42:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:42:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:42:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:42:13.420058  543705 net.go:648] Add success.
I0321 06:42:13.422776  543705 net.go:770] primary dev: ETH0
I0321 06:42:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:42:13.422801  543705 net.go:698] Add success.
I0321 06:42:13.468705  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c33edf2-c22c-4b98-af37-d720b417b34c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:42:13.468748  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 06:42:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:42:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 06:42:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:42:14.456976  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:42:14.456986  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:42:14.456992  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:42:14.457025  543705 disk_worker.go:494] system disk:vda1
I0321 06:42:14.457053  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:42:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:42:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 06:42:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:42:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:42:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:42:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:42:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:42:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:42:23.409764  543705 memory.go:184] no items to output this cycle
I0321 06:42:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 06:42:28.249672  543705 disk_info.go:125] begin check local disk info of client
I0321 06:42:28.252201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:42:28.252207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003307c0 0xc000330800]
E0321 06:42:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:42:33.409796  543705 memory.go:184] no items to output this cycle
I0321 06:42:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 06:42:38.765747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:42:38.765754  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:42:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:42:43.410746  543705 memory.go:191] Add success.
I0321 06:42:43.409834  543705 cpu.go:282] Add success.
I0321 06:42:43.420471  543705 net.go:648] Add success.
I0321 06:42:43.423660  543705 net.go:770] primary dev: ETH0
I0321 06:42:43.423675  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:42:43.423690  543705 net.go:698] Add success.
I0321 06:42:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:42:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:42:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:42:53.409798  543705 memory.go:184] no items to output this cycle
I0321 06:42:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 06:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:43:03.409785  543705 cpu.go:275] no items to output this cycle
I0321 06:43:03.409787  543705 memory.go:184] no items to output this cycle
E0321 06:43:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:43:13.409794  543705 memory.go:191] Add success.
I0321 06:43:13.409795  543705 cpu.go:282] Add success.
W0321 06:43:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:43:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:43:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:43:13.420058  543705 net.go:648] Add success.
I0321 06:43:13.422767  543705 net.go:770] primary dev: ETH0
I0321 06:43:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:43:13.422790  543705 net.go:698] Add success.
I0321 06:43:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:43:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:43:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 06:43:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:43:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 06:43:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:43:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:43:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:43:23.409783  543705 memory.go:184] no items to output this cycle
I0321 06:43:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 06:43:28.253673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:43:28.256208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:43:28.256214  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474bc0 0xc000474c00]
E0321 06:43:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:43:33.409771  543705 memory.go:184] no items to output this cycle
I0321 06:43:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 06:43:43.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:43:43.409882  543705 memory.go:191] Add success.
I0321 06:43:43.410012  543705 cpu.go:282] Add success.
I0321 06:43:43.419717  543705 net.go:648] Add success.
I0321 06:43:43.422423  543705 net.go:770] primary dev: ETH0
I0321 06:43:43.422436  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:43:43.422448  543705 net.go:698] Add success.
I0321 06:43:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:43:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:43:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:43:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:43:53.409783  543705 memory.go:184] no items to output this cycle
I0321 06:43:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 06:44:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:44:03.409811  543705 memory.go:184] no items to output this cycle
I0321 06:44:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 06:44:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:44:13.409782  543705 memory.go:191] Add success.
I0321 06:44:13.409802  543705 cpu.go:282] Add success.
W0321 06:44:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:44:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:44:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:44:13.420192  543705 net.go:648] Add success.
I0321 06:44:13.423207  543705 net.go:770] primary dev: ETH0
I0321 06:44:13.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:44:13.423231  543705 net.go:698] Add success.
I0321 06:44:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:44:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:44:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 06:44:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:44:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 06:44:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:44:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:44:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:44:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:44:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:44:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:44:23.409805  543705 memory.go:184] no items to output this cycle
I0321 06:44:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 06:44:28.257674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:44:28.260244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:44:28.260250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f1040 0xc0000f1080]
E0321 06:44:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:44:33.409816  543705 memory.go:184] no items to output this cycle
I0321 06:44:33.409833  543705 cpu.go:275] no items to output this cycle
E0321 06:44:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:44:43.409785  543705 memory.go:191] Add success.
I0321 06:44:43.409816  543705 cpu.go:282] Add success.
I0321 06:44:43.419952  543705 net.go:648] Add success.
I0321 06:44:43.422403  543705 net.go:770] primary dev: ETH0
I0321 06:44:43.422417  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:44:43.422428  543705 net.go:698] Add success.
I0321 06:44:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:44:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:44:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:44:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:44:53.409796  543705 memory.go:184] no items to output this cycle
I0321 06:44:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 06:45:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:45:03.409783  543705 cpu.go:275] no items to output this cycle
I0321 06:45:03.409791  543705 memory.go:184] no items to output this cycle
E0321 06:45:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:45:13.409804  543705 memory.go:191] Add success.
I0321 06:45:13.409814  543705 cpu.go:282] Add success.
W0321 06:45:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:45:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:45:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:45:13.420073  543705 net.go:648] Add success.
I0321 06:45:13.423021  543705 net.go:770] primary dev: ETH0
I0321 06:45:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:45:13.423045  543705 net.go:698] Add success.
I0321 06:45:13.465282  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2884f6e2-c666-4cda-8e01-4f9ff9b83a71","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:45:13.465315  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:45:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:45:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:45:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 06:45:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:45:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 06:45:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:45:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:45:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:45:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:45:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:45:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:45:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:45:23.409777  543705 memory.go:184] no items to output this cycle
I0321 06:45:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 06:45:28.261676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:45:28.264156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:45:28.264163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368f40 0xc000368f80]
E0321 06:45:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:45:33.409791  543705 memory.go:184] no items to output this cycle
I0321 06:45:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 06:45:38.767531  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:45:38.767538  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:45:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:45:43.410684  543705 memory.go:191] Add success.
I0321 06:45:43.409787  543705 cpu.go:282] Add success.
I0321 06:45:43.420385  543705 net.go:648] Add success.
I0321 06:45:43.423324  543705 net.go:770] primary dev: ETH0
I0321 06:45:43.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:45:43.423351  543705 net.go:698] Add success.
I0321 06:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:45:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:45:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:45:53.409778  543705 memory.go:184] no items to output this cycle
I0321 06:45:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 06:46:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:46:03.409786  543705 memory.go:184] no items to output this cycle
I0321 06:46:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 06:46:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:46:13.409808  543705 memory.go:191] Add success.
I0321 06:46:13.409819  543705 cpu.go:282] Add success.
W0321 06:46:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:46:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:46:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:46:13.420244  543705 net.go:648] Add success.
I0321 06:46:13.422835  543705 net.go:770] primary dev: ETH0
I0321 06:46:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:46:13.422864  543705 net.go:698] Add success.
I0321 06:46:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:46:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:46:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 06:46:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:46:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 06:46:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:46:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:46:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:46:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:46:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:46:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:46:23.409762  543705 memory.go:184] no items to output this cycle
I0321 06:46:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 06:46:28.265673  543705 disk_info.go:125] begin check local disk info of client
I0321 06:46:28.268217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:46:28.268223  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331c40 0xc000331c80]
E0321 06:46:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:46:33.409788  543705 memory.go:184] no items to output this cycle
I0321 06:46:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 06:46:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:46:43.409828  543705 memory.go:191] Add success.
I0321 06:46:43.409835  543705 cpu.go:282] Add success.
I0321 06:46:43.419967  543705 net.go:648] Add success.
I0321 06:46:43.422686  543705 net.go:770] primary dev: ETH0
I0321 06:46:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:46:43.422712  543705 net.go:698] Add success.
I0321 06:46:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:46:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:46:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:46:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:46:53.409801  543705 memory.go:184] no items to output this cycle
I0321 06:46:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 06:47:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:47:03.409775  543705 cpu.go:275] no items to output this cycle
I0321 06:47:03.409790  543705 memory.go:184] no items to output this cycle
E0321 06:47:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:47:13.409805  543705 memory.go:191] Add success.
I0321 06:47:13.409815  543705 cpu.go:282] Add success.
W0321 06:47:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:47:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:47:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:47:13.420075  543705 net.go:648] Add success.
I0321 06:47:13.423394  543705 net.go:770] primary dev: ETH0
I0321 06:47:13.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:47:13.423419  543705 net.go:698] Add success.
I0321 06:47:13.453117  543705 event_worker.go:152] Polling the log file for events...
W0321 06:47:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 06:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:47:14.456931  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:47:14.456940  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:47:14.456946  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:47:14.456993  543705 disk_worker.go:494] system disk:vda1
I0321 06:47:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:47:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:47:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:47:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:47:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:47:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:47:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:47:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:47:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:47:23.409792  543705 memory.go:184] no items to output this cycle
I0321 06:47:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 06:47:28.269676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:47:28.272101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:47:28.272108  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051d700 0xc00051d740]
E0321 06:47:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:47:33.409807  543705 memory.go:184] no items to output this cycle
I0321 06:47:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 06:47:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:47:43.409789  543705 memory.go:191] Add success.
I0321 06:47:43.409806  543705 cpu.go:282] Add success.
I0321 06:47:43.419914  543705 net.go:648] Add success.
I0321 06:47:43.422775  543705 net.go:770] primary dev: ETH0
I0321 06:47:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:47:43.422804  543705 net.go:698] Add success.
I0321 06:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:47:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:47:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:47:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 06:47:53.409781  543705 memory.go:184] no items to output this cycle
E0321 06:48:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:48:03.409797  543705 memory.go:184] no items to output this cycle
I0321 06:48:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 06:48:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:48:13.409814  543705 memory.go:191] Add success.
I0321 06:48:13.409818  543705 cpu.go:282] Add success.
W0321 06:48:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:48:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:48:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:48:13.420086  543705 net.go:648] Add success.
I0321 06:48:13.422765  543705 net.go:770] primary dev: ETH0
I0321 06:48:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:48:13.422789  543705 net.go:698] Add success.
I0321 06:48:13.682014  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d5062ddc-85ae-4c38-affb-073d3f59e8f9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:48:13.682052  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:48:14.454724  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:48:14.454948  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:48:14.454958  543705 disk_worker.go:708] disk space is not compliant
W0321 06:48:14.454961  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:48:14.456357  543705 disk_worker.go:494] system disk:vda1
I0321 06:48:14.456385  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:48:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:48:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:48:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:48:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:48:23.409774  543705 memory.go:184] no items to output this cycle
I0321 06:48:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 06:48:28.273675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:48:28.276231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:48:28.276239  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003680c0 0xc000368100]
E0321 06:48:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:48:33.409784  543705 memory.go:184] no items to output this cycle
I0321 06:48:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 06:48:38.768541  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:48:38.768549  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:48:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:48:43.410640  543705 memory.go:191] Add success.
I0321 06:48:43.409800  543705 cpu.go:282] Add success.
I0321 06:48:43.420377  543705 net.go:648] Add success.
I0321 06:48:43.422783  543705 net.go:770] primary dev: ETH0
I0321 06:48:43.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:48:43.422812  543705 net.go:698] Add success.
I0321 06:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:48:53.409825  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:48:53.409842  543705 memory.go:184] no items to output this cycle
I0321 06:48:53.409915  543705 cpu.go:275] no items to output this cycle
E0321 06:49:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:49:03.409786  543705 memory.go:184] no items to output this cycle
I0321 06:49:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 06:49:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:49:13.409794  543705 memory.go:191] Add success.
I0321 06:49:13.409794  543705 cpu.go:282] Add success.
W0321 06:49:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:49:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:49:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:49:13.420459  543705 net.go:648] Add success.
I0321 06:49:13.423024  543705 net.go:770] primary dev: ETH0
I0321 06:49:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:49:13.423048  543705 net.go:698] Add success.
I0321 06:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:49:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:49:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 06:49:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:49:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 06:49:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:49:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:49:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:49:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:49:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:49:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:49:23.410330  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:49:23.410346  543705 memory.go:184] no items to output this cycle
I0321 06:49:23.410378  543705 cpu.go:275] no items to output this cycle
I0321 06:49:28.277674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:49:28.280148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:49:28.280156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368a80 0xc000368ac0]
E0321 06:49:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:49:33.409777  543705 memory.go:184] no items to output this cycle
I0321 06:49:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 06:49:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:49:43.409788  543705 memory.go:191] Add success.
I0321 06:49:43.409810  543705 cpu.go:282] Add success.
I0321 06:49:43.419971  543705 net.go:648] Add success.
I0321 06:49:43.422563  543705 net.go:770] primary dev: ETH0
I0321 06:49:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:49:43.422588  543705 net.go:698] Add success.
I0321 06:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:49:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:49:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:49:53.409873  543705 memory.go:184] no items to output this cycle
I0321 06:49:53.409917  543705 cpu.go:275] no items to output this cycle
E0321 06:50:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:50:03.409774  543705 memory.go:184] no items to output this cycle
I0321 06:50:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 06:50:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:50:13.409804  543705 memory.go:191] Add success.
I0321 06:50:13.409812  543705 cpu.go:282] Add success.
W0321 06:50:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:50:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:50:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:50:13.420134  543705 net.go:648] Add success.
I0321 06:50:13.422856  543705 net.go:770] primary dev: ETH0
I0321 06:50:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:50:13.422881  543705 net.go:698] Add success.
I0321 06:50:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:50:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:50:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 06:50:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:50:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 06:50:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:50:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:50:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:50:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:50:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:50:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 06:50:23.409781  543705 memory.go:184] no items to output this cycle
I0321 06:50:28.281679  543705 disk_info.go:125] begin check local disk info of client
I0321 06:50:28.284138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:50:28.284145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000323a80 0xc000323ac0]
E0321 06:50:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:50:33.409807  543705 memory.go:184] no items to output this cycle
I0321 06:50:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 06:50:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:50:43.409801  543705 memory.go:191] Add success.
I0321 06:50:43.409800  543705 cpu.go:282] Add success.
I0321 06:50:43.419946  543705 net.go:648] Add success.
I0321 06:50:43.423078  543705 net.go:770] primary dev: ETH0
I0321 06:50:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:50:43.423104  543705 net.go:698] Add success.
I0321 06:50:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:50:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:50:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:50:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:50:53.409866  543705 cpu.go:275] no items to output this cycle
I0321 06:50:53.409888  543705 memory.go:184] no items to output this cycle
E0321 06:51:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:51:03.409778  543705 memory.go:184] no items to output this cycle
I0321 06:51:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 06:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:51:13.409813  543705 memory.go:191] Add success.
I0321 06:51:13.409817  543705 cpu.go:282] Add success.
W0321 06:51:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:51:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:51:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:51:13.420056  543705 net.go:648] Add success.
I0321 06:51:13.422602  543705 net.go:770] primary dev: ETH0
I0321 06:51:13.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:51:13.422627  543705 net.go:698] Add success.
I0321 06:51:13.470537  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6822d29c-9507-4e63-bf8c-4d1ab9ddee2f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:51:13.470570  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:51:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:51:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 06:51:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:51:14.456519  543705 disk_worker.go:494] system disk:vda1
I0321 06:51:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:51:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:51:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:51:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:51:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:51:23.409797  543705 memory.go:184] no items to output this cycle
I0321 06:51:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 06:51:28.285675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:51:28.288188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:51:28.288195  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036bc00 0xc00036bc40]
E0321 06:51:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:51:33.409781  543705 memory.go:184] no items to output this cycle
I0321 06:51:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 06:51:38.769546  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:51:38.769553  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:51:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:51:43.410809  543705 memory.go:191] Add success.
I0321 06:51:43.409816  543705 cpu.go:282] Add success.
I0321 06:51:43.420555  543705 net.go:648] Add success.
I0321 06:51:43.423313  543705 net.go:770] primary dev: ETH0
I0321 06:51:43.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:51:43.423342  543705 net.go:698] Add success.
I0321 06:51:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:51:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:51:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:51:53.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:51:53.409895  543705 cpu.go:275] no items to output this cycle
I0321 06:51:53.409903  543705 memory.go:184] no items to output this cycle
E0321 06:52:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:52:03.409796  543705 memory.go:184] no items to output this cycle
I0321 06:52:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 06:52:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:52:13.409787  543705 memory.go:191] Add success.
I0321 06:52:13.409802  543705 cpu.go:282] Add success.
W0321 06:52:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:52:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:52:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:52:13.420054  543705 net.go:648] Add success.
I0321 06:52:13.422652  543705 net.go:770] primary dev: ETH0
I0321 06:52:13.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:52:13.422682  543705 net.go:698] Add success.
W0321 06:52:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:52:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 06:52:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:52:14.456795  543705 disk_worker.go:494] system disk:vda1
I0321 06:52:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:52:14.457113  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:52:14.457120  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:52:14.457125  543705 custom_config.go:64] query custom config with name: gpu
E0321 06:52:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:52:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:52:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:52:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:52:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:52:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:52:16.472317  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:52:23.409917  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:52:23.409935  543705 memory.go:184] no items to output this cycle
I0321 06:52:23.409946  543705 cpu.go:275] no items to output this cycle
I0321 06:52:28.289674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:52:28.292198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:52:28.292207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9840 0xc0003b9880]
E0321 06:52:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:52:33.409776  543705 memory.go:184] no items to output this cycle
I0321 06:52:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 06:52:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:52:43.409783  543705 memory.go:191] Add success.
I0321 06:52:43.409812  543705 cpu.go:282] Add success.
I0321 06:52:43.420403  543705 net.go:648] Add success.
I0321 06:52:43.423687  543705 net.go:770] primary dev: ETH0
I0321 06:52:43.423700  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:52:43.423712  543705 net.go:698] Add success.
I0321 06:52:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:52:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:52:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:52:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:52:53.409796  543705 memory.go:184] no items to output this cycle
I0321 06:52:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 06:53:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:53:03.409802  543705 memory.go:184] no items to output this cycle
I0321 06:53:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 06:53:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:53:13.409814  543705 memory.go:191] Add success.
I0321 06:53:13.409820  543705 cpu.go:282] Add success.
W0321 06:53:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:53:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:53:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:53:13.420056  543705 net.go:648] Add success.
I0321 06:53:13.422677  543705 net.go:770] primary dev: ETH0
I0321 06:53:13.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:53:13.422705  543705 net.go:698] Add success.
I0321 06:53:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:53:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:53:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 06:53:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:53:14.456607  543705 disk_worker.go:494] system disk:vda1
I0321 06:53:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:53:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:53:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:53:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:53:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:53:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:53:23.409789  543705 memory.go:184] no items to output this cycle
I0321 06:53:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 06:53:28.293674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:53:28.296152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:53:28.296159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003954c0 0xc000395500]
E0321 06:53:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:53:33.409807  543705 memory.go:184] no items to output this cycle
I0321 06:53:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 06:53:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:53:43.409823  543705 memory.go:191] Add success.
I0321 06:53:43.409828  543705 cpu.go:282] Add success.
I0321 06:53:43.419999  543705 net.go:648] Add success.
I0321 06:53:43.423041  543705 net.go:770] primary dev: ETH0
I0321 06:53:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:53:43.423067  543705 net.go:698] Add success.
I0321 06:53:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:53:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:53:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:53:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:53:53.409794  543705 memory.go:184] no items to output this cycle
I0321 06:53:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 06:54:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:54:03.409765  543705 memory.go:184] no items to output this cycle
I0321 06:54:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 06:54:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:54:13.409782  543705 memory.go:191] Add success.
W0321 06:54:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:54:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:54:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:54:13.409820  543705 cpu.go:282] Add success.
I0321 06:54:13.420026  543705 net.go:648] Add success.
I0321 06:54:13.423117  543705 net.go:770] primary dev: ETH0
I0321 06:54:13.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:54:13.423142  543705 net.go:698] Add success.
I0321 06:54:13.469003  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d0b7974e-d366-499c-9283-39a963ea7415","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:54:13.469038  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 06:54:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:54:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:54:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 06:54:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:54:14.456618  543705 disk_worker.go:494] system disk:vda1
I0321 06:54:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:54:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:54:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:54:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:54:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:54:23.409802  543705 memory.go:184] no items to output this cycle
I0321 06:54:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 06:54:28.297676  543705 disk_info.go:125] begin check local disk info of client
I0321 06:54:28.300217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:54:28.300223  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509d40 0xc000509d80]
E0321 06:54:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:54:33.409775  543705 memory.go:184] no items to output this cycle
I0321 06:54:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 06:54:38.769744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:54:38.769751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:54:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:54:43.410651  543705 memory.go:191] Add success.
I0321 06:54:43.409816  543705 cpu.go:282] Add success.
I0321 06:54:43.420432  543705 net.go:648] Add success.
I0321 06:54:43.423189  543705 net.go:770] primary dev: ETH0
I0321 06:54:43.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:54:43.423219  543705 net.go:698] Add success.
I0321 06:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:54:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:54:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:54:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:54:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 06:54:53.409785  543705 memory.go:184] no items to output this cycle
E0321 06:55:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:55:03.409804  543705 memory.go:184] no items to output this cycle
I0321 06:55:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 06:55:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:55:13.409786  543705 memory.go:191] Add success.
I0321 06:55:13.409802  543705 cpu.go:282] Add success.
W0321 06:55:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:55:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:55:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:55:13.420076  543705 net.go:648] Add success.
I0321 06:55:13.422608  543705 net.go:770] primary dev: ETH0
I0321 06:55:13.422621  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:55:13.422634  543705 net.go:698] Add success.
I0321 06:55:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:55:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:55:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 06:55:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:55:14.456549  543705 disk_worker.go:494] system disk:vda1
I0321 06:55:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:55:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:55:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:55:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:55:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:55:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:55:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:55:23.409810  543705 memory.go:184] no items to output this cycle
I0321 06:55:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 06:55:28.301681  543705 disk_info.go:125] begin check local disk info of client
I0321 06:55:28.304251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:55:28.304258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005080c0 0xc000508140]
E0321 06:55:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:55:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 06:55:33.409810  543705 memory.go:184] no items to output this cycle
E0321 06:55:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:55:43.409821  543705 memory.go:191] Add success.
I0321 06:55:43.409831  543705 cpu.go:282] Add success.
I0321 06:55:43.419877  543705 net.go:648] Add success.
I0321 06:55:43.422999  543705 net.go:770] primary dev: ETH0
I0321 06:55:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:55:43.423024  543705 net.go:698] Add success.
I0321 06:55:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:55:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:55:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:55:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:55:53.409777  543705 memory.go:184] no items to output this cycle
I0321 06:55:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 06:56:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:56:03.409775  543705 memory.go:184] no items to output this cycle
I0321 06:56:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 06:56:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:56:13.409821  543705 memory.go:191] Add success.
I0321 06:56:13.409853  543705 cpu.go:282] Add success.
W0321 06:56:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:56:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:56:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:56:13.420380  543705 net.go:648] Add success.
I0321 06:56:13.422981  543705 net.go:770] primary dev: ETH0
I0321 06:56:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:56:13.423010  543705 net.go:698] Add success.
I0321 06:56:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:56:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:56:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 06:56:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:56:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 06:56:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:56:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:56:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:56:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:56:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:56:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:56:23.409799  543705 memory.go:184] no items to output this cycle
I0321 06:56:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 06:56:28.305675  543705 disk_info.go:125] begin check local disk info of client
I0321 06:56:28.308196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:56:28.308203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5440 0xc0000c5480]
E0321 06:56:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:56:33.409803  543705 memory.go:184] no items to output this cycle
I0321 06:56:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 06:56:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:56:43.409813  543705 memory.go:191] Add success.
I0321 06:56:43.409818  543705 cpu.go:282] Add success.
I0321 06:56:43.419984  543705 net.go:648] Add success.
I0321 06:56:43.422922  543705 net.go:770] primary dev: ETH0
I0321 06:56:43.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:56:43.422952  543705 net.go:698] Add success.
I0321 06:56:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:56:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:56:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:56:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:56:53.409765  543705 memory.go:184] no items to output this cycle
I0321 06:56:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 06:57:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:57:03.409772  543705 memory.go:184] no items to output this cycle
I0321 06:57:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 06:57:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:57:13.409820  543705 memory.go:191] Add success.
I0321 06:57:13.409836  543705 cpu.go:282] Add success.
W0321 06:57:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:57:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:57:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:57:13.420115  543705 net.go:648] Add success.
I0321 06:57:13.429431  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 06:57:13.429509  543705 net.go:770] primary dev: ETH0
I0321 06:57:13.429524  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:57:13.429537  543705 net.go:698] Add success.
I0321 06:57:13.453150  543705 event_worker.go:152] Polling the log file for events...
I0321 06:57:13.468739  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"770c8e3a-8dc1-4fda-9a8b-2de90db54b45","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 06:57:13.468772  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 06:57:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:57:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0321 06:57:14.455231  543705 disk_worker.go:728] disk inode is not compliant
E0321 06:57:14.456155  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 06:57:14.456164  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 06:57:14.456171  543705 custom_config.go:64] query custom config with name: gpu
I0321 06:57:14.456683  543705 disk_worker.go:494] system disk:vda1
I0321 06:57:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 06:57:15.457056  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 06:57:15.457070  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:57:16.458016  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 06:57:16.458016  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 06:57:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:57:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:57:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:57:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:57:23.409768  543705 memory.go:184] no items to output this cycle
I0321 06:57:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 06:57:28.309679  543705 disk_info.go:125] begin check local disk info of client
I0321 06:57:28.312263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:57:28.312269  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ecc0 0xc00047ed00]
E0321 06:57:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:57:33.409787  543705 memory.go:184] no items to output this cycle
I0321 06:57:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 06:57:38.771553  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 06:57:38.771561  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 06:57:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:57:43.410675  543705 memory.go:191] Add success.
I0321 06:57:43.409814  543705 cpu.go:282] Add success.
I0321 06:57:43.420399  543705 net.go:648] Add success.
I0321 06:57:43.423083  543705 net.go:770] primary dev: ETH0
I0321 06:57:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:57:43.423113  543705 net.go:698] Add success.
I0321 06:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:57:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:57:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:57:53.409788  543705 memory.go:184] no items to output this cycle
I0321 06:57:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 06:58:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:58:03.409778  543705 memory.go:184] no items to output this cycle
I0321 06:58:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 06:58:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:58:13.409807  543705 memory.go:191] Add success.
I0321 06:58:13.409821  543705 cpu.go:282] Add success.
W0321 06:58:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:58:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:58:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:58:13.420203  543705 net.go:648] Add success.
I0321 06:58:13.422858  543705 net.go:770] primary dev: ETH0
I0321 06:58:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:58:13.422892  543705 net.go:698] Add success.
I0321 06:58:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:58:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:58:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 06:58:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:58:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 06:58:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:58:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:58:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:58:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:58:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:58:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:58:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:58:23.409784  543705 memory.go:184] no items to output this cycle
I0321 06:58:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 06:58:28.313674  543705 disk_info.go:125] begin check local disk info of client
I0321 06:58:28.316164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:58:28.316170  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4200 0xc0000c4240]
E0321 06:58:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:58:33.409811  543705 memory.go:184] no items to output this cycle
I0321 06:58:33.409823  543705 cpu.go:275] no items to output this cycle
E0321 06:58:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:58:43.409789  543705 memory.go:191] Add success.
I0321 06:58:43.409809  543705 cpu.go:282] Add success.
I0321 06:58:43.419903  543705 net.go:648] Add success.
I0321 06:58:43.422938  543705 net.go:770] primary dev: ETH0
I0321 06:58:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:58:43.422964  543705 net.go:698] Add success.
I0321 06:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:58:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:58:53.410433  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:58:53.410449  543705 memory.go:184] no items to output this cycle
I0321 06:58:53.410481  543705 cpu.go:275] no items to output this cycle
E0321 06:59:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:59:03.409870  543705 memory.go:184] no items to output this cycle
I0321 06:59:03.409936  543705 cpu.go:275] no items to output this cycle
E0321 06:59:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:59:13.409789  543705 memory.go:191] Add success.
I0321 06:59:13.409793  543705 cpu.go:282] Add success.
W0321 06:59:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 06:59:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 06:59:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 06:59:13.420166  543705 net.go:648] Add success.
I0321 06:59:13.422807  543705 net.go:770] primary dev: ETH0
I0321 06:59:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:59:13.422831  543705 net.go:698] Add success.
I0321 06:59:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 06:59:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 06:59:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 06:59:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 06:59:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 06:59:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 06:59:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 06:59:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:59:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:59:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 06:59:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 06:59:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:59:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 06:59:23.409783  543705 memory.go:184] no items to output this cycle
I0321 06:59:28.317677  543705 disk_info.go:125] begin check local disk info of client
I0321 06:59:28.320209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 06:59:28.320215  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463300 0xc000463380]
E0321 06:59:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:59:33.409798  543705 memory.go:184] no items to output this cycle
I0321 06:59:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 06:59:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:59:43.409795  543705 memory.go:191] Add success.
I0321 06:59:43.409801  543705 cpu.go:282] Add success.
I0321 06:59:43.419962  543705 net.go:648] Add success.
I0321 06:59:43.423022  543705 net.go:770] primary dev: ETH0
I0321 06:59:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0321 06:59:43.423052  543705 net.go:698] Add success.
I0321 06:59:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 06:59:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 06:59:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 06:59:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 06:59:53.409780  543705 memory.go:184] no items to output this cycle
I0321 06:59:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 07:00:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:00:03.409763  543705 memory.go:184] no items to output this cycle
I0321 07:00:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 07:00:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:00:13.409794  543705 memory.go:191] Add success.
I0321 07:00:13.409797  543705 cpu.go:282] Add success.
W0321 07:00:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:00:13.409955  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:00:13.409962  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:00:13.419708  543705 net.go:648] Add success.
I0321 07:00:13.422790  543705 net.go:770] primary dev: ETH0
I0321 07:00:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:00:13.422816  543705 net.go:698] Add success.
I0321 07:00:13.470071  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"70eeb649-1cb2-4666-8852-3f3a97299dc1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:00:13.470102  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:00:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:00:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 07:00:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:00:14.456482  543705 disk_worker.go:494] system disk:vda1
I0321 07:00:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:00:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:00:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:00:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:00:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:00:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:00:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:00:23.409773  543705 memory.go:184] no items to output this cycle
I0321 07:00:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 07:00:28.321677  543705 disk_info.go:125] begin check local disk info of client
I0321 07:00:28.324208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:00:28.324214  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353300 0xc000353340]
E0321 07:00:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:00:33.409794  543705 memory.go:184] no items to output this cycle
I0321 07:00:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 07:00:38.771708  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:00:38.771716  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:00:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:00:43.410698  543705 memory.go:191] Add success.
I0321 07:00:43.409805  543705 cpu.go:282] Add success.
I0321 07:00:43.420407  543705 net.go:648] Add success.
I0321 07:00:43.423006  543705 net.go:770] primary dev: ETH0
I0321 07:00:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:00:43.423032  543705 net.go:698] Add success.
I0321 07:00:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:00:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:00:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:00:53.409773  543705 memory.go:184] no items to output this cycle
I0321 07:00:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 07:01:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:01:03.409799  543705 memory.go:184] no items to output this cycle
I0321 07:01:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 07:01:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:01:13.409862  543705 memory.go:191] Add success.
W0321 07:01:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:01:13.409906  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:01:13.409950  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:01:13.409997  543705 cpu.go:282] Add success.
I0321 07:01:13.419736  543705 net.go:648] Add success.
I0321 07:01:13.422671  543705 net.go:770] primary dev: ETH0
I0321 07:01:13.422686  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:01:13.422700  543705 net.go:698] Add success.
I0321 07:01:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:01:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:01:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 07:01:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:01:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 07:01:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:01:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:01:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:01:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:01:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:01:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:01:23.409786  543705 memory.go:184] no items to output this cycle
I0321 07:01:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 07:01:28.327013  543705 disk_info.go:125] begin check local disk info of client
I0321 07:01:28.329570  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:01:28.329576  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369880 0xc0003698c0]
E0321 07:01:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:01:33.409785  543705 memory.go:184] no items to output this cycle
I0321 07:01:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:01:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:01:43.409815  543705 memory.go:191] Add success.
I0321 07:01:43.409838  543705 cpu.go:282] Add success.
I0321 07:01:43.419996  543705 net.go:648] Add success.
I0321 07:01:43.422932  543705 net.go:770] primary dev: ETH0
I0321 07:01:43.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:01:43.422962  543705 net.go:698] Add success.
I0321 07:01:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:01:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:01:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:01:53.409795  543705 memory.go:184] no items to output this cycle
I0321 07:01:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 07:02:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:02:03.409777  543705 memory.go:184] no items to output this cycle
I0321 07:02:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 07:02:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:02:13.409811  543705 memory.go:191] Add success.
I0321 07:02:13.409819  543705 cpu.go:282] Add success.
W0321 07:02:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:02:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:02:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:02:13.420583  543705 net.go:648] Add success.
I0321 07:02:13.423349  543705 net.go:770] primary dev: ETH0
I0321 07:02:13.423362  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:02:13.423373  543705 net.go:698] Add success.
W0321 07:02:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:02:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 07:02:14.455203  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:02:14.455956  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:02:14.455965  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:02:14.455971  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:02:14.456604  543705 disk_worker.go:494] system disk:vda1
I0321 07:02:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:02:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:02:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:02:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:02:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:02:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:02:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:02:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:02:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:02:23.409790  543705 memory.go:184] no items to output this cycle
I0321 07:02:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 07:02:28.329678  543705 disk_info.go:125] begin check local disk info of client
I0321 07:02:28.332128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:02:28.332134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
E0321 07:02:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:02:33.409795  543705 memory.go:184] no items to output this cycle
I0321 07:02:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 07:02:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:02:43.409795  543705 memory.go:191] Add success.
I0321 07:02:43.409798  543705 cpu.go:282] Add success.
I0321 07:02:43.419851  543705 net.go:648] Add success.
I0321 07:02:43.422534  543705 net.go:770] primary dev: ETH0
I0321 07:02:43.422548  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:02:43.422560  543705 net.go:698] Add success.
I0321 07:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:02:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:02:53.410431  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:02:53.410458  543705 memory.go:184] no items to output this cycle
I0321 07:02:53.410481  543705 cpu.go:275] no items to output this cycle
E0321 07:03:03.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:03:03.409897  543705 memory.go:184] no items to output this cycle
I0321 07:03:03.409935  543705 cpu.go:275] no items to output this cycle
E0321 07:03:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:03:13.409792  543705 memory.go:191] Add success.
I0321 07:03:13.409797  543705 cpu.go:282] Add success.
W0321 07:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:03:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:03:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:03:13.419928  543705 net.go:770] primary dev: ETH0
I0321 07:03:13.419942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:03:13.419954  543705 net.go:698] Add success.
I0321 07:03:13.420299  543705 net.go:648] Add success.
I0321 07:03:13.464369  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"03c07ffe-1700-4c73-ae8f-63436dd80131","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:03:13.464401  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:03:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:03:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:03:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 07:03:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:03:14.456636  543705 disk_worker.go:494] system disk:vda1
I0321 07:03:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:03:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:03:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:03:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:03:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:03:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:03:23.409771  543705 memory.go:184] no items to output this cycle
I0321 07:03:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 07:03:28.333675  543705 disk_info.go:125] begin check local disk info of client
I0321 07:03:28.336169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:03:28.336176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262a40 0xc000262a80]
E0321 07:03:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:03:33.409822  543705 memory.go:184] no items to output this cycle
I0321 07:03:33.409832  543705 cpu.go:275] no items to output this cycle
I0321 07:03:38.773571  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:03:38.773579  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:03:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:03:43.410742  543705 memory.go:191] Add success.
I0321 07:03:43.409814  543705 cpu.go:282] Add success.
I0321 07:03:43.420497  543705 net.go:648] Add success.
I0321 07:03:43.423188  543705 net.go:770] primary dev: ETH0
I0321 07:03:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:03:43.423213  543705 net.go:698] Add success.
I0321 07:03:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:03:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:03:53.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:03:53.409917  543705 memory.go:184] no items to output this cycle
I0321 07:03:53.409921  543705 cpu.go:275] no items to output this cycle
E0321 07:04:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:04:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 07:04:03.409787  543705 memory.go:184] no items to output this cycle
E0321 07:04:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:04:13.409798  543705 memory.go:191] Add success.
I0321 07:04:13.409805  543705 cpu.go:282] Add success.
W0321 07:04:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:04:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:04:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:04:13.420529  543705 net.go:648] Add success.
I0321 07:04:13.423038  543705 net.go:770] primary dev: ETH0
I0321 07:04:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:04:13.423062  543705 net.go:698] Add success.
I0321 07:04:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:04:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:04:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 07:04:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:04:14.456617  543705 disk_worker.go:494] system disk:vda1
I0321 07:04:14.456661  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:04:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:04:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:04:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:04:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:04:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:04:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:04:23.409776  543705 memory.go:184] no items to output this cycle
I0321 07:04:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 07:04:28.337672  543705 disk_info.go:125] begin check local disk info of client
I0321 07:04:28.340171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:04:28.340178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369d40 0xc000369d80]
E0321 07:04:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:04:33.409810  543705 memory.go:184] no items to output this cycle
I0321 07:04:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 07:04:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:04:43.409826  543705 memory.go:191] Add success.
I0321 07:04:43.409833  543705 cpu.go:282] Add success.
I0321 07:04:43.419985  543705 net.go:648] Add success.
I0321 07:04:43.422732  543705 net.go:770] primary dev: ETH0
I0321 07:04:43.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:04:43.422758  543705 net.go:698] Add success.
I0321 07:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:04:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:04:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:04:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:04:53.409917  543705 cpu.go:275] no items to output this cycle
I0321 07:04:53.409920  543705 memory.go:184] no items to output this cycle
E0321 07:05:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:05:03.409763  543705 memory.go:184] no items to output this cycle
I0321 07:05:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 07:05:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:05:13.409782  543705 memory.go:191] Add success.
I0321 07:05:13.409802  543705 cpu.go:282] Add success.
W0321 07:05:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:05:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:05:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:05:13.420127  543705 net.go:648] Add success.
I0321 07:05:13.422771  543705 net.go:770] primary dev: ETH0
I0321 07:05:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:05:13.422797  543705 net.go:698] Add success.
I0321 07:05:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:05:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:05:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 07:05:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:05:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 07:05:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:05:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:05:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:05:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:05:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:05:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:05:23.409771  543705 memory.go:184] no items to output this cycle
I0321 07:05:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 07:05:28.341674  543705 disk_info.go:125] begin check local disk info of client
I0321 07:05:28.344143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:05:28.344149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8540 0xc0003d8580]
E0321 07:05:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:05:33.409822  543705 memory.go:184] no items to output this cycle
I0321 07:05:33.409833  543705 cpu.go:275] no items to output this cycle
E0321 07:05:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:05:43.409793  543705 memory.go:191] Add success.
I0321 07:05:43.409813  543705 cpu.go:282] Add success.
I0321 07:05:43.419896  543705 net.go:648] Add success.
I0321 07:05:43.422822  543705 net.go:770] primary dev: ETH0
I0321 07:05:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:05:43.422851  543705 net.go:698] Add success.
I0321 07:05:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:05:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:05:53.409801  543705 memory.go:184] no items to output this cycle
I0321 07:05:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 07:06:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:06:03.409791  543705 memory.go:184] no items to output this cycle
I0321 07:06:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 07:06:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:06:13.409799  543705 cpu.go:282] Add success.
I0321 07:06:13.409806  543705 memory.go:191] Add success.
W0321 07:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:06:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:06:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:06:13.420105  543705 net.go:648] Add success.
I0321 07:06:13.422824  543705 net.go:770] primary dev: ETH0
I0321 07:06:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:06:13.422849  543705 net.go:698] Add success.
I0321 07:06:13.836367  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6c46cb63-9f43-449f-9b3a-0f7d9efcc0be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:06:13.836401  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:06:14.454687  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:06:14.454867  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:06:14.454877  543705 disk_worker.go:708] disk space is not compliant
W0321 07:06:14.454879  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:06:14.456242  543705 disk_worker.go:494] system disk:vda1
I0321 07:06:14.456295  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:06:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:06:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:06:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:06:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:06:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:06:23.409780  543705 memory.go:184] no items to output this cycle
I0321 07:06:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 07:06:28.345675  543705 disk_info.go:125] begin check local disk info of client
I0321 07:06:28.348173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:06:28.348181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b88c0 0xc0002b8900]
E0321 07:06:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:06:33.409825  543705 memory.go:184] no items to output this cycle
I0321 07:06:33.409839  543705 cpu.go:275] no items to output this cycle
I0321 07:06:38.773742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:06:38.773749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:06:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:06:43.410591  543705 memory.go:191] Add success.
I0321 07:06:43.409805  543705 cpu.go:282] Add success.
I0321 07:06:43.420307  543705 net.go:648] Add success.
I0321 07:06:43.423322  543705 net.go:770] primary dev: ETH0
I0321 07:06:43.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:06:43.423350  543705 net.go:698] Add success.
I0321 07:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:06:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:06:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:06:53.409805  543705 memory.go:184] no items to output this cycle
I0321 07:06:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 07:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:07:03.409786  543705 memory.go:184] no items to output this cycle
I0321 07:07:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:07:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:07:13.409794  543705 memory.go:191] Add success.
I0321 07:07:13.409819  543705 cpu.go:282] Add success.
W0321 07:07:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:07:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:07:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:07:13.420201  543705 net.go:648] Add success.
I0321 07:07:13.423303  543705 net.go:770] primary dev: ETH0
I0321 07:07:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:07:13.423331  543705 net.go:698] Add success.
I0321 07:07:13.452851  543705 event_worker.go:152] Polling the log file for events...
W0321 07:07:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:07:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 07:07:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:07:14.455911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:07:14.455920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:07:14.455926  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:07:14.456534  543705 disk_worker.go:494] system disk:vda1
I0321 07:07:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:07:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:07:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:07:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:07:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:07:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:07:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:07:16.472313  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:07:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:07:23.409800  543705 memory.go:184] no items to output this cycle
I0321 07:07:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 07:07:28.349674  543705 disk_info.go:125] begin check local disk info of client
I0321 07:07:28.352115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:07:28.352121  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003683c0 0xc000368400]
E0321 07:07:33.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:07:33.409832  543705 memory.go:184] no items to output this cycle
I0321 07:07:33.409844  543705 cpu.go:275] no items to output this cycle
E0321 07:07:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:07:43.409785  543705 memory.go:191] Add success.
I0321 07:07:43.409819  543705 cpu.go:282] Add success.
I0321 07:07:43.420047  543705 net.go:648] Add success.
I0321 07:07:43.423061  543705 net.go:770] primary dev: ETH0
I0321 07:07:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:07:43.423093  543705 net.go:698] Add success.
I0321 07:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:07:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:07:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:07:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:07:53.409884  543705 cpu.go:275] no items to output this cycle
I0321 07:07:53.409899  543705 memory.go:184] no items to output this cycle
E0321 07:08:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:08:03.409782  543705 memory.go:184] no items to output this cycle
I0321 07:08:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 07:08:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:08:13.409778  543705 memory.go:191] Add success.
W0321 07:08:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 07:08:13.409805  543705 cpu.go:282] Add success.
W0321 07:08:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:08:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:08:13.420083  543705 net.go:648] Add success.
I0321 07:08:13.422832  543705 net.go:770] primary dev: ETH0
I0321 07:08:13.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:08:13.422866  543705 net.go:698] Add success.
I0321 07:08:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:08:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:08:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 07:08:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:08:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 07:08:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:08:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:08:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:08:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:08:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:08:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:08:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 07:08:23.409783  543705 memory.go:184] no items to output this cycle
I0321 07:08:28.354685  543705 disk_info.go:125] begin check local disk info of client
I0321 07:08:28.357245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:08:28.357252  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368740 0xc000368780]
E0321 07:08:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:08:33.409779  543705 memory.go:184] no items to output this cycle
I0321 07:08:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 07:08:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:08:43.409792  543705 memory.go:191] Add success.
I0321 07:08:43.409794  543705 cpu.go:282] Add success.
I0321 07:08:43.419988  543705 net.go:648] Add success.
I0321 07:08:43.422538  543705 net.go:770] primary dev: ETH0
I0321 07:08:43.422552  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:08:43.422565  543705 net.go:698] Add success.
I0321 07:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:08:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:08:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:08:53.409773  543705 cpu.go:275] no items to output this cycle
I0321 07:08:53.409786  543705 memory.go:184] no items to output this cycle
E0321 07:09:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:09:03.409812  543705 memory.go:184] no items to output this cycle
I0321 07:09:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 07:09:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:09:13.409787  543705 memory.go:191] Add success.
I0321 07:09:13.409804  543705 cpu.go:282] Add success.
W0321 07:09:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:09:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:09:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:09:13.420291  543705 net.go:648] Add success.
I0321 07:09:13.423051  543705 net.go:770] primary dev: ETH0
I0321 07:09:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:09:13.423078  543705 net.go:698] Add success.
I0321 07:09:13.469292  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7f7f6f48-2c3f-40d3-9dd1-d068ae169821","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:09:13.469325  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:09:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:09:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:09:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 07:09:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:09:14.456604  543705 disk_worker.go:494] system disk:vda1
I0321 07:09:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:09:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:09:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:09:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:09:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:09:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:09:23.409763  543705 memory.go:184] no items to output this cycle
I0321 07:09:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 07:09:28.357677  543705 disk_info.go:125] begin check local disk info of client
I0321 07:09:28.360148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:09:28.360155  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368ec0 0xc000368f00]
E0321 07:09:33.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:09:33.409842  543705 memory.go:184] no items to output this cycle
I0321 07:09:33.409857  543705 cpu.go:275] no items to output this cycle
I0321 07:09:38.773981  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:09:38.773989  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:09:43.410739  543705 memory.go:191] Add success.
I0321 07:09:43.409793  543705 cpu.go:282] Add success.
I0321 07:09:43.420436  543705 net.go:648] Add success.
I0321 07:09:43.423120  543705 net.go:770] primary dev: ETH0
I0321 07:09:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:09:43.423146  543705 net.go:698] Add success.
I0321 07:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:09:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:09:53.409904  543705 memory.go:184] no items to output this cycle
I0321 07:09:53.409881  543705 cpu.go:275] no items to output this cycle
E0321 07:10:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:10:03.409766  543705 memory.go:184] no items to output this cycle
I0321 07:10:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 07:10:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:10:13.409819  543705 memory.go:191] Add success.
I0321 07:10:13.409824  543705 cpu.go:282] Add success.
W0321 07:10:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:10:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:10:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:10:13.420156  543705 net.go:648] Add success.
I0321 07:10:13.423209  543705 net.go:770] primary dev: ETH0
I0321 07:10:13.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:10:13.423235  543705 net.go:698] Add success.
I0321 07:10:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:10:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:10:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 07:10:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:10:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 07:10:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:10:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:10:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:10:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:10:23.409772  543705 memory.go:184] no items to output this cycle
I0321 07:10:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 07:10:28.361671  543705 disk_info.go:125] begin check local disk info of client
I0321 07:10:28.364224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:10:28.364230  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492400 0xc000492440]
E0321 07:10:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:10:33.409798  543705 memory.go:184] no items to output this cycle
I0321 07:10:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:10:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:10:43.409819  543705 memory.go:191] Add success.
I0321 07:10:43.409826  543705 cpu.go:282] Add success.
I0321 07:10:43.419957  543705 net.go:648] Add success.
I0321 07:10:43.422623  543705 net.go:770] primary dev: ETH0
I0321 07:10:43.422638  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:10:43.422652  543705 net.go:698] Add success.
I0321 07:10:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:10:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:10:53.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:10:53.409879  543705 cpu.go:275] no items to output this cycle
I0321 07:10:53.409890  543705 memory.go:184] no items to output this cycle
E0321 07:11:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:11:03.409775  543705 memory.go:184] no items to output this cycle
I0321 07:11:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 07:11:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:11:13.409822  543705 memory.go:191] Add success.
I0321 07:11:13.409826  543705 cpu.go:282] Add success.
W0321 07:11:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:11:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:11:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:11:13.420162  543705 net.go:648] Add success.
I0321 07:11:13.422922  543705 net.go:770] primary dev: ETH0
I0321 07:11:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:11:13.422946  543705 net.go:698] Add success.
I0321 07:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:11:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:11:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 07:11:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:11:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 07:11:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:11:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:11:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:11:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:11:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:11:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:11:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 07:11:23.409783  543705 memory.go:184] no items to output this cycle
I0321 07:11:28.365675  543705 disk_info.go:125] begin check local disk info of client
I0321 07:11:28.368160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:11:28.368166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004825c0 0xc000482600]
E0321 07:11:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:11:33.409814  543705 memory.go:184] no items to output this cycle
I0321 07:11:33.409966  543705 cpu.go:275] no items to output this cycle
E0321 07:11:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:11:43.409794  543705 memory.go:191] Add success.
I0321 07:11:43.409800  543705 cpu.go:282] Add success.
I0321 07:11:43.420072  543705 net.go:648] Add success.
I0321 07:11:43.422684  543705 net.go:770] primary dev: ETH0
I0321 07:11:43.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:11:43.422709  543705 net.go:698] Add success.
I0321 07:11:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:11:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:11:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:11:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:11:53.409769  543705 memory.go:184] no items to output this cycle
I0321 07:11:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 07:12:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:12:03.409777  543705 memory.go:184] no items to output this cycle
I0321 07:12:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 07:12:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:12:13.409789  543705 cpu.go:282] Add success.
I0321 07:12:13.409803  543705 memory.go:191] Add success.
W0321 07:12:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:12:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:12:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:12:13.420065  543705 net.go:648] Add success.
I0321 07:12:13.422830  543705 net.go:770] primary dev: ETH0
I0321 07:12:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:12:13.422860  543705 net.go:698] Add success.
I0321 07:12:13.463847  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c69dabc-7d2f-41aa-95ce-0d6e73e95aa2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:12:13.463883  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 07:12:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:12:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 07:12:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:12:14.456109  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:12:14.456117  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:12:14.456123  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:12:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 07:12:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:12:15.456882  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:12:15.456891  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:12:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:12:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:12:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:12:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:12:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:12:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:12:23.409760  543705 memory.go:184] no items to output this cycle
I0321 07:12:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 07:12:28.369674  543705 disk_info.go:125] begin check local disk info of client
I0321 07:12:28.372146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:12:28.372162  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369f40 0xc0004dc000]
I0321 07:12:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 07:12:33.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:12:33.409831  543705 memory.go:184] no items to output this cycle
I0321 07:12:38.774126  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:12:38.774134  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:12:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:12:43.410894  543705 memory.go:191] Add success.
I0321 07:12:43.409825  543705 cpu.go:282] Add success.
I0321 07:12:43.420967  543705 net.go:648] Add success.
I0321 07:12:43.423841  543705 net.go:770] primary dev: ETH0
I0321 07:12:43.423859  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:12:43.423871  543705 net.go:698] Add success.
I0321 07:12:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:12:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:12:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:12:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:12:53.409794  543705 memory.go:184] no items to output this cycle
I0321 07:12:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 07:13:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:13:03.409774  543705 memory.go:184] no items to output this cycle
I0321 07:13:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 07:13:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:13:13.409819  543705 memory.go:191] Add success.
I0321 07:13:13.409829  543705 cpu.go:282] Add success.
W0321 07:13:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:13:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:13:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:13:13.420124  543705 net.go:648] Add success.
I0321 07:13:13.422579  543705 net.go:770] primary dev: ETH0
I0321 07:13:13.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:13:13.422603  543705 net.go:698] Add success.
I0321 07:13:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:13:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:13:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 07:13:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:13:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 07:13:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:13:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:13:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:13:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:13:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:13:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:13:23.409773  543705 memory.go:184] no items to output this cycle
I0321 07:13:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 07:13:28.373674  543705 disk_info.go:125] begin check local disk info of client
I0321 07:13:28.376155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:13:28.376162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dcf00 0xc000230000]
I0321 07:13:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 07:13:33.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:13:33.409842  543705 memory.go:184] no items to output this cycle
E0321 07:13:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:13:43.409793  543705 memory.go:191] Add success.
I0321 07:13:43.409818  543705 cpu.go:282] Add success.
I0321 07:13:43.420018  543705 net.go:648] Add success.
I0321 07:13:43.422819  543705 net.go:770] primary dev: ETH0
I0321 07:13:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:13:43.422846  543705 net.go:698] Add success.
I0321 07:13:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:13:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:13:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:13:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:13:53.409792  543705 memory.go:184] no items to output this cycle
I0321 07:13:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 07:14:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:14:03.409783  543705 memory.go:184] no items to output this cycle
I0321 07:14:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 07:14:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:14:13.409778  543705 memory.go:191] Add success.
W0321 07:14:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:14:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:14:13.409818  543705 cpu.go:282] Add success.
I0321 07:14:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:14:13.420160  543705 net.go:648] Add success.
I0321 07:14:13.421100  543705 net.go:770] primary dev: ETH0
I0321 07:14:13.421115  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:14:13.421129  543705 net.go:698] Add success.
I0321 07:14:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:14:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:14:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 07:14:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:14:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 07:14:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:14:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:14:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:14:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:14:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:14:23.409767  543705 memory.go:184] no items to output this cycle
I0321 07:14:23.409876  543705 cpu.go:275] no items to output this cycle
I0321 07:14:28.377679  543705 disk_info.go:125] begin check local disk info of client
I0321 07:14:28.380139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:14:28.380145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353340 0xc000353380]
E0321 07:14:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:14:33.409809  543705 memory.go:184] no items to output this cycle
I0321 07:14:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 07:14:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:14:43.409795  543705 cpu.go:282] Add success.
I0321 07:14:43.409805  543705 memory.go:191] Add success.
I0321 07:14:43.419982  543705 net.go:648] Add success.
I0321 07:14:43.422713  543705 net.go:770] primary dev: ETH0
I0321 07:14:43.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:14:43.422742  543705 net.go:698] Add success.
I0321 07:14:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:14:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:14:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:14:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:14:53.409781  543705 memory.go:184] no items to output this cycle
I0321 07:14:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 07:15:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:15:03.409786  543705 memory.go:184] no items to output this cycle
I0321 07:15:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 07:15:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:15:13.409786  543705 cpu.go:282] Add success.
I0321 07:15:13.409794  543705 memory.go:191] Add success.
W0321 07:15:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:15:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:15:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:15:13.420150  543705 net.go:648] Add success.
I0321 07:15:13.423426  543705 net.go:770] primary dev: ETH0
I0321 07:15:13.423439  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:15:13.423449  543705 net.go:698] Add success.
I0321 07:15:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:15:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:15:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 07:15:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:15:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 07:15:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:15:16.125419  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"22eb4fbf-0b47-4e8c-ae1f-36968f688f69","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:15:16.125550  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:15:16.458031  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:15:16.458095  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:15:16.458129  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:15:16.472465  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:15:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:15:23.409764  543705 memory.go:184] no items to output this cycle
I0321 07:15:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 07:15:28.381678  543705 disk_info.go:125] begin check local disk info of client
I0321 07:15:28.384149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:15:28.384156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353580 0xc0003535c0]
I0321 07:15:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 07:15:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:15:33.409825  543705 memory.go:184] no items to output this cycle
I0321 07:15:38.775608  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:15:38.775619  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:15:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:15:43.410905  543705 memory.go:191] Add success.
I0321 07:15:43.409835  543705 cpu.go:282] Add success.
I0321 07:15:43.420685  543705 net.go:648] Add success.
I0321 07:15:43.424152  543705 net.go:770] primary dev: ETH0
I0321 07:15:43.424166  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:15:43.424180  543705 net.go:698] Add success.
I0321 07:15:46.458049  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:15:46.458117  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:15:46.458144  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:15:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:15:53.409777  543705 memory.go:184] no items to output this cycle
I0321 07:15:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 07:16:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:16:03.409790  543705 cpu.go:275] no items to output this cycle
I0321 07:16:03.409793  543705 memory.go:184] no items to output this cycle
E0321 07:16:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:16:13.409797  543705 memory.go:191] Add success.
I0321 07:16:13.409816  543705 cpu.go:282] Add success.
W0321 07:16:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:16:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:16:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:16:13.420199  543705 net.go:648] Add success.
I0321 07:16:13.423135  543705 net.go:770] primary dev: ETH0
I0321 07:16:13.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:16:13.423163  543705 net.go:698] Add success.
I0321 07:16:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:16:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:16:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 07:16:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:16:14.456531  543705 disk_worker.go:494] system disk:vda1
I0321 07:16:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:16:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:16:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:16:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:16:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:16:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:16:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:16:23.409786  543705 memory.go:184] no items to output this cycle
I0321 07:16:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 07:16:28.385676  543705 disk_info.go:125] begin check local disk info of client
I0321 07:16:28.388160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:16:28.388167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaec0 0xc0001aaf00]
E0321 07:16:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:16:33.409809  543705 memory.go:184] no items to output this cycle
I0321 07:16:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 07:16:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:16:43.409807  543705 memory.go:191] Add success.
I0321 07:16:43.409810  543705 cpu.go:282] Add success.
I0321 07:16:43.419966  543705 net.go:648] Add success.
I0321 07:16:43.422885  543705 net.go:770] primary dev: ETH0
I0321 07:16:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:16:43.422920  543705 net.go:698] Add success.
I0321 07:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:16:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:16:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:16:53.409785  543705 memory.go:184] no items to output this cycle
I0321 07:16:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 07:17:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:17:03.409787  543705 memory.go:184] no items to output this cycle
I0321 07:17:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 07:17:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:17:13.409794  543705 memory.go:191] Add success.
I0321 07:17:13.409810  543705 cpu.go:282] Add success.
W0321 07:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:17:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:17:13.420067  543705 net.go:648] Add success.
I0321 07:17:13.422922  543705 net.go:770] primary dev: ETH0
I0321 07:17:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:17:13.422955  543705 net.go:698] Add success.
I0321 07:17:13.453489  543705 event_worker.go:152] Polling the log file for events...
W0321 07:17:14.455353  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:17:14.455372  543705 disk_worker.go:708] disk space is not compliant
W0321 07:17:14.455376  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:17:14.456885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:17:14.456894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:17:14.456900  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:17:14.457438  543705 disk_worker.go:494] system disk:vda1
I0321 07:17:14.457467  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:17:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:17:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:17:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:17:16.458014  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:17:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:17:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:17:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:17:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:17:23.409771  543705 cpu.go:275] no items to output this cycle
I0321 07:17:23.409793  543705 memory.go:184] no items to output this cycle
I0321 07:17:28.392066  543705 disk_info.go:125] begin check local disk info of client
I0321 07:17:28.394629  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:17:28.394635  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a540 0xc00039a580]
I0321 07:17:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 07:17:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:17:33.409822  543705 memory.go:184] no items to output this cycle
E0321 07:17:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:17:43.409825  543705 memory.go:191] Add success.
I0321 07:17:43.409835  543705 cpu.go:282] Add success.
I0321 07:17:43.420082  543705 net.go:648] Add success.
I0321 07:17:43.423193  543705 net.go:770] primary dev: ETH0
I0321 07:17:43.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:17:43.423218  543705 net.go:698] Add success.
I0321 07:17:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:17:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:17:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:17:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:17:53.409772  543705 memory.go:184] no items to output this cycle
I0321 07:17:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 07:18:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:18:03.409788  543705 memory.go:184] no items to output this cycle
I0321 07:18:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 07:18:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:18:13.409817  543705 memory.go:191] Add success.
I0321 07:18:13.409823  543705 cpu.go:282] Add success.
W0321 07:18:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:18:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:18:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:18:13.420600  543705 net.go:648] Add success.
I0321 07:18:13.423592  543705 net.go:770] primary dev: ETH0
I0321 07:18:13.423607  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:18:13.423620  543705 net.go:698] Add success.
I0321 07:18:13.669807  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b32835f2-d25e-45d5-8c38-f6f59a61025b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:18:13.669840  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:18:14.454730  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:18:14.454969  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:18:14.454979  543705 disk_worker.go:708] disk space is not compliant
W0321 07:18:14.454981  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:18:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 07:18:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:18:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:18:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:18:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:18:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:18:23.409761  543705 memory.go:184] no items to output this cycle
I0321 07:18:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 07:18:28.397671  543705 disk_info.go:125] begin check local disk info of client
I0321 07:18:28.400238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:18:28.400244  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046e0c0 0xc00046e100]
E0321 07:18:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:18:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 07:18:33.409800  543705 memory.go:184] no items to output this cycle
I0321 07:18:38.775760  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:18:38.775767  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:18:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:18:43.409784  543705 memory.go:191] Add success.
I0321 07:18:43.409797  543705 cpu.go:282] Add success.
I0321 07:18:43.419905  543705 net.go:648] Add success.
I0321 07:18:43.420855  543705 net.go:770] primary dev: ETH0
I0321 07:18:43.420870  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:18:43.420883  543705 net.go:698] Add success.
I0321 07:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:18:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:18:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:18:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:18:53.409795  543705 memory.go:184] no items to output this cycle
I0321 07:18:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 07:19:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:19:03.409805  543705 memory.go:184] no items to output this cycle
I0321 07:19:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 07:19:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:19:13.409780  543705 memory.go:191] Add success.
I0321 07:19:13.409803  543705 cpu.go:282] Add success.
W0321 07:19:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:19:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:19:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:19:13.420512  543705 net.go:648] Add success.
I0321 07:19:13.423772  543705 net.go:770] primary dev: ETH0
I0321 07:19:13.423787  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:19:13.423801  543705 net.go:698] Add success.
I0321 07:19:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:19:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:19:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 07:19:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:19:14.456482  543705 disk_worker.go:494] system disk:vda1
I0321 07:19:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:19:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:19:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:19:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:19:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:19:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:19:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:19:23.409794  543705 memory.go:184] no items to output this cycle
I0321 07:19:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 07:19:28.402117  543705 disk_info.go:125] begin check local disk info of client
I0321 07:19:28.404578  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:19:28.404584  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515c40 0xc000515c80]
E0321 07:19:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:19:33.409797  543705 memory.go:184] no items to output this cycle
I0321 07:19:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:19:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:19:43.409798  543705 memory.go:191] Add success.
I0321 07:19:43.409815  543705 cpu.go:282] Add success.
I0321 07:19:43.420044  543705 net.go:648] Add success.
I0321 07:19:43.422861  543705 net.go:770] primary dev: ETH0
I0321 07:19:43.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:19:43.422890  543705 net.go:698] Add success.
I0321 07:19:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:19:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:19:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:19:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:19:53.409764  543705 memory.go:184] no items to output this cycle
I0321 07:19:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 07:20:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:20:03.409771  543705 memory.go:184] no items to output this cycle
I0321 07:20:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 07:20:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:20:13.409809  543705 memory.go:191] Add success.
I0321 07:20:13.409816  543705 cpu.go:282] Add success.
W0321 07:20:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:20:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:20:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:20:13.420052  543705 net.go:648] Add success.
I0321 07:20:13.422614  543705 net.go:770] primary dev: ETH0
I0321 07:20:13.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:20:13.422644  543705 net.go:698] Add success.
I0321 07:20:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:20:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:20:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0321 07:20:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:20:14.456486  543705 disk_worker.go:494] system disk:vda1
I0321 07:20:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:20:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:20:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:20:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:20:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:20:23.409784  543705 memory.go:184] no items to output this cycle
I0321 07:20:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 07:20:28.405674  543705 disk_info.go:125] begin check local disk info of client
I0321 07:20:28.408291  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:20:28.408297  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d40 0xc0000c4d80]
E0321 07:20:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:20:33.409760  543705 memory.go:184] no items to output this cycle
I0321 07:20:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 07:20:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:20:43.409817  543705 memory.go:191] Add success.
I0321 07:20:43.409823  543705 cpu.go:282] Add success.
I0321 07:20:43.419895  543705 net.go:648] Add success.
I0321 07:20:43.422844  543705 net.go:770] primary dev: ETH0
I0321 07:20:43.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:20:43.422869  543705 net.go:698] Add success.
I0321 07:20:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:20:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:20:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:20:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:20:53.409795  543705 memory.go:184] no items to output this cycle
I0321 07:20:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 07:21:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:21:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 07:21:03.409796  543705 memory.go:184] no items to output this cycle
E0321 07:21:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:21:13.409796  543705 memory.go:191] Add success.
I0321 07:21:13.409795  543705 cpu.go:282] Add success.
W0321 07:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:21:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:21:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:21:13.420408  543705 net.go:648] Add success.
I0321 07:21:13.423651  543705 net.go:770] primary dev: ETH0
I0321 07:21:13.423663  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:21:13.423675  543705 net.go:698] Add success.
I0321 07:21:13.469687  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7382d190-4749-4674-ba58-225bdc01bb51","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:21:13.469719  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:21:14.455275  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:21:14.455289  543705 disk_worker.go:708] disk space is not compliant
W0321 07:21:14.455292  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:21:14.457141  543705 disk_worker.go:494] system disk:vda1
I0321 07:21:14.457180  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:21:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:21:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:21:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:21:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:21:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:21:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:21:23.409770  543705 memory.go:184] no items to output this cycle
I0321 07:21:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 07:21:28.409682  543705 disk_info.go:125] begin check local disk info of client
I0321 07:21:28.412161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:21:28.412177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e40 0xc0000c4e80]
E0321 07:21:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:21:33.409806  543705 memory.go:184] no items to output this cycle
I0321 07:21:33.409823  543705 cpu.go:275] no items to output this cycle
I0321 07:21:38.775915  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:21:38.775923  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:21:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:21:43.410784  543705 memory.go:191] Add success.
I0321 07:21:43.409797  543705 cpu.go:282] Add success.
I0321 07:21:43.420523  543705 net.go:648] Add success.
I0321 07:21:43.423254  543705 net.go:770] primary dev: ETH0
I0321 07:21:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:21:43.423278  543705 net.go:698] Add success.
I0321 07:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:21:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:21:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:21:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:21:53.409763  543705 memory.go:184] no items to output this cycle
I0321 07:21:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 07:22:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:22:03.409809  543705 memory.go:184] no items to output this cycle
I0321 07:22:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 07:22:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:22:13.409780  543705 memory.go:191] Add success.
I0321 07:22:13.409804  543705 cpu.go:282] Add success.
W0321 07:22:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:22:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:22:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:22:13.420109  543705 net.go:648] Add success.
I0321 07:22:13.422762  543705 net.go:770] primary dev: ETH0
I0321 07:22:13.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:22:13.422790  543705 net.go:698] Add success.
W0321 07:22:14.455602  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:22:14.455616  543705 disk_worker.go:708] disk space is not compliant
W0321 07:22:14.455620  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:22:14.456877  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:22:14.456887  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:22:14.456893  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:22:14.457608  543705 disk_worker.go:494] system disk:vda1
I0321 07:22:14.457636  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:22:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:22:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:22:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:22:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:22:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:22:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:22:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:22:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:22:23.409796  543705 memory.go:184] no items to output this cycle
I0321 07:22:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 07:22:28.412788  543705 disk_info.go:125] begin check local disk info of client
I0321 07:22:28.415307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:22:28.415313  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0240 0xc0004a0280]
E0321 07:22:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:22:33.409777  543705 memory.go:184] no items to output this cycle
I0321 07:22:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 07:22:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:22:43.409791  543705 memory.go:191] Add success.
I0321 07:22:43.409791  543705 cpu.go:282] Add success.
I0321 07:22:43.419946  543705 net.go:648] Add success.
I0321 07:22:43.422535  543705 net.go:770] primary dev: ETH0
I0321 07:22:43.422548  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:22:43.422560  543705 net.go:698] Add success.
I0321 07:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:22:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:22:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:22:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:22:53.409778  543705 memory.go:184] no items to output this cycle
I0321 07:22:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 07:23:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:23:03.409777  543705 memory.go:184] no items to output this cycle
I0321 07:23:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 07:23:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:23:13.409806  543705 memory.go:191] Add success.
I0321 07:23:13.409811  543705 cpu.go:282] Add success.
W0321 07:23:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:23:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:23:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:23:13.420166  543705 net.go:648] Add success.
I0321 07:23:13.422871  543705 net.go:770] primary dev: ETH0
I0321 07:23:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:23:13.422900  543705 net.go:698] Add success.
I0321 07:23:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:23:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:23:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 07:23:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:23:14.458913  543705 disk_worker.go:494] system disk:vda1
I0321 07:23:14.458941  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:23:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:23:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:23:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:23:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:23:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:23:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:23:23.409801  543705 memory.go:184] no items to output this cycle
I0321 07:23:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 07:23:28.415794  543705 disk_info.go:125] begin check local disk info of client
I0321 07:23:28.418274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:23:28.418281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab540 0xc0001ab580]
E0321 07:23:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:23:33.409797  543705 memory.go:184] no items to output this cycle
I0321 07:23:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 07:23:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:23:43.409793  543705 memory.go:191] Add success.
I0321 07:23:43.409816  543705 cpu.go:282] Add success.
I0321 07:23:43.419899  543705 net.go:648] Add success.
I0321 07:23:43.422786  543705 net.go:770] primary dev: ETH0
I0321 07:23:43.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:23:43.422814  543705 net.go:698] Add success.
I0321 07:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:23:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:23:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:23:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:23:53.409772  543705 memory.go:184] no items to output this cycle
I0321 07:23:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 07:24:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:24:03.409799  543705 memory.go:184] no items to output this cycle
I0321 07:24:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 07:24:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:24:13.409795  543705 memory.go:191] Add success.
I0321 07:24:13.409795  543705 cpu.go:282] Add success.
W0321 07:24:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:24:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:24:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:24:13.420190  543705 net.go:648] Add success.
I0321 07:24:13.422817  543705 net.go:770] primary dev: ETH0
I0321 07:24:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:24:13.422842  543705 net.go:698] Add success.
I0321 07:24:13.528729  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"838b8c4f-2257-4d91-b5f0-3357fd50a3e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:24:13.528763  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:24:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:24:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:24:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 07:24:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:24:14.456914  543705 disk_worker.go:494] system disk:vda1
I0321 07:24:14.456958  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:24:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:24:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:24:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:24:23.409799  543705 memory.go:184] no items to output this cycle
I0321 07:24:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 07:24:28.418795  543705 disk_info.go:125] begin check local disk info of client
I0321 07:24:28.421299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:24:28.421306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1640 0xc0003f1680]
E0321 07:24:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:24:33.409759  543705 memory.go:184] no items to output this cycle
I0321 07:24:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 07:24:38.777591  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:24:38.777599  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:24:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:24:43.410674  543705 memory.go:191] Add success.
I0321 07:24:43.409807  543705 cpu.go:282] Add success.
I0321 07:24:43.420426  543705 net.go:648] Add success.
I0321 07:24:43.423269  543705 net.go:770] primary dev: ETH0
I0321 07:24:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:24:43.423296  543705 net.go:698] Add success.
I0321 07:24:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:24:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:24:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:24:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:24:53.409798  543705 memory.go:184] no items to output this cycle
I0321 07:24:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 07:25:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:25:03.409797  543705 memory.go:184] no items to output this cycle
I0321 07:25:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 07:25:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:25:13.409790  543705 memory.go:191] Add success.
I0321 07:25:13.409791  543705 cpu.go:282] Add success.
W0321 07:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:25:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:25:13.420119  543705 net.go:648] Add success.
I0321 07:25:13.423067  543705 net.go:770] primary dev: ETH0
I0321 07:25:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:25:13.423095  543705 net.go:698] Add success.
I0321 07:25:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:25:14.455388  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:25:14.455407  543705 disk_worker.go:708] disk space is not compliant
W0321 07:25:14.455411  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:25:14.457013  543705 disk_worker.go:494] system disk:vda1
I0321 07:25:14.457042  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:25:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:25:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:25:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:25:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:25:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:25:23.409796  543705 memory.go:184] no items to output this cycle
I0321 07:25:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 07:25:28.421794  543705 disk_info.go:125] begin check local disk info of client
I0321 07:25:28.424336  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:25:28.424343  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003523c0 0xc000352400]
E0321 07:25:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:25:33.409765  543705 memory.go:184] no items to output this cycle
I0321 07:25:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 07:25:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:25:43.409818  543705 memory.go:191] Add success.
I0321 07:25:43.409826  543705 cpu.go:282] Add success.
I0321 07:25:43.419979  543705 net.go:648] Add success.
I0321 07:25:43.422947  543705 net.go:770] primary dev: ETH0
I0321 07:25:43.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:25:43.422973  543705 net.go:698] Add success.
I0321 07:25:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:25:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:25:53.410513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:25:53.410531  543705 memory.go:184] no items to output this cycle
I0321 07:25:53.410535  543705 cpu.go:275] no items to output this cycle
E0321 07:26:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:26:03.409782  543705 memory.go:184] no items to output this cycle
I0321 07:26:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 07:26:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:26:13.409793  543705 memory.go:191] Add success.
I0321 07:26:13.409797  543705 cpu.go:282] Add success.
W0321 07:26:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:26:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:26:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:26:13.420058  543705 net.go:648] Add success.
I0321 07:26:13.423002  543705 net.go:770] primary dev: ETH0
I0321 07:26:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:26:13.423026  543705 net.go:698] Add success.
I0321 07:26:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:26:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:26:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 07:26:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:26:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 07:26:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:26:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:26:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:26:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:26:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:26:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:26:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:26:23.409771  543705 memory.go:184] no items to output this cycle
I0321 07:26:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 07:26:28.424814  543705 disk_info.go:125] begin check local disk info of client
I0321 07:26:28.427324  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:26:28.427330  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003521c0 0xc000352200]
E0321 07:26:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:26:33.409776  543705 memory.go:184] no items to output this cycle
I0321 07:26:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 07:26:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:26:43.409829  543705 memory.go:191] Add success.
I0321 07:26:43.409841  543705 cpu.go:282] Add success.
I0321 07:26:43.419891  543705 net.go:648] Add success.
I0321 07:26:43.422972  543705 net.go:770] primary dev: ETH0
I0321 07:26:43.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:26:43.422999  543705 net.go:698] Add success.
I0321 07:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:26:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:26:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:26:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:26:53.409776  543705 memory.go:184] no items to output this cycle
I0321 07:26:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 07:27:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:27:03.409764  543705 memory.go:184] no items to output this cycle
I0321 07:27:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 07:27:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:27:13.409776  543705 memory.go:191] Add success.
I0321 07:27:13.409790  543705 cpu.go:282] Add success.
W0321 07:27:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:27:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:27:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:27:13.420035  543705 net.go:648] Add success.
I0321 07:27:13.429235  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 07:27:13.429311  543705 net.go:770] primary dev: ETH0
I0321 07:27:13.429325  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:27:13.429340  543705 net.go:698] Add success.
I0321 07:27:13.452880  543705 event_worker.go:152] Polling the log file for events...
I0321 07:27:13.468826  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"51053155-69ad-445f-b7a4-5ac030038c78","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:27:13.468860  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 07:27:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:27:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 07:27:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:27:14.456105  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:27:14.456114  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:27:14.456120  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:27:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 07:27:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:27:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:27:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:27:16.457905  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:27:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:27:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:27:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:27:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:27:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:27:23.409794  543705 memory.go:184] no items to output this cycle
I0321 07:27:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 07:27:28.427827  543705 disk_info.go:125] begin check local disk info of client
I0321 07:27:28.430282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:27:28.430288  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c50c0 0xc0000c5100]
E0321 07:27:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:27:33.409800  543705 memory.go:184] no items to output this cycle
I0321 07:27:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 07:27:38.777741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:27:38.777748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:27:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:27:43.410529  543705 memory.go:191] Add success.
I0321 07:27:43.409816  543705 cpu.go:282] Add success.
I0321 07:27:43.420220  543705 net.go:648] Add success.
I0321 07:27:43.422557  543705 net.go:770] primary dev: ETH0
I0321 07:27:43.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:27:43.422584  543705 net.go:698] Add success.
I0321 07:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:27:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:27:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:27:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:27:53.409778  543705 memory.go:184] no items to output this cycle
I0321 07:27:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 07:28:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:28:03.409795  543705 memory.go:184] no items to output this cycle
I0321 07:28:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 07:28:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:28:13.409797  543705 memory.go:191] Add success.
I0321 07:28:13.409818  543705 cpu.go:282] Add success.
W0321 07:28:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:28:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:28:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:28:13.420166  543705 net.go:648] Add success.
I0321 07:28:13.422723  543705 net.go:770] primary dev: ETH0
I0321 07:28:13.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:28:13.422750  543705 net.go:698] Add success.
I0321 07:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:28:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:28:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 07:28:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:28:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 07:28:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:28:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:28:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:28:23.409790  543705 memory.go:184] no items to output this cycle
I0321 07:28:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 07:28:28.430852  543705 disk_info.go:125] begin check local disk info of client
I0321 07:28:28.433304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:28:28.433310  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004633c0 0xc000463400]
E0321 07:28:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:28:33.409800  543705 memory.go:184] no items to output this cycle
I0321 07:28:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 07:28:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:28:43.409799  543705 memory.go:191] Add success.
I0321 07:28:43.409815  543705 cpu.go:282] Add success.
I0321 07:28:43.419897  543705 net.go:648] Add success.
I0321 07:28:43.422667  543705 net.go:770] primary dev: ETH0
I0321 07:28:43.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:28:43.422706  543705 net.go:698] Add success.
I0321 07:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:28:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:28:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:28:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:28:53.409788  543705 memory.go:184] no items to output this cycle
I0321 07:28:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 07:29:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:29:03.409794  543705 memory.go:184] no items to output this cycle
I0321 07:29:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 07:29:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:29:13.409801  543705 memory.go:191] Add success.
I0321 07:29:13.409802  543705 cpu.go:282] Add success.
W0321 07:29:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:29:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:29:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:29:13.420151  543705 net.go:648] Add success.
I0321 07:29:13.422954  543705 net.go:770] primary dev: ETH0
I0321 07:29:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:29:13.422979  543705 net.go:698] Add success.
I0321 07:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:29:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:29:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 07:29:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:29:14.457374  543705 disk_worker.go:494] system disk:vda1
I0321 07:29:14.457477  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:29:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:29:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:29:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:29:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:29:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:29:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:29:23.409784  543705 memory.go:184] no items to output this cycle
I0321 07:29:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 07:29:28.433860  543705 disk_info.go:125] begin check local disk info of client
I0321 07:29:28.436288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:29:28.436294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c50c0 0xc0000c5100]
E0321 07:29:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:29:33.409797  543705 memory.go:184] no items to output this cycle
I0321 07:29:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 07:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:29:43.409788  543705 memory.go:191] Add success.
I0321 07:29:43.409810  543705 cpu.go:282] Add success.
I0321 07:29:43.419995  543705 net.go:648] Add success.
I0321 07:29:43.423108  543705 net.go:770] primary dev: ETH0
I0321 07:29:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:29:43.423135  543705 net.go:698] Add success.
I0321 07:29:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:29:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:29:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:29:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:29:53.409815  543705 memory.go:184] no items to output this cycle
I0321 07:29:53.409828  543705 cpu.go:275] no items to output this cycle
E0321 07:30:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:30:03.409789  543705 memory.go:184] no items to output this cycle
I0321 07:30:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 07:30:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:30:13.409781  543705 memory.go:191] Add success.
I0321 07:30:13.409786  543705 cpu.go:282] Add success.
W0321 07:30:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:30:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:30:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:30:13.420101  543705 net.go:648] Add success.
I0321 07:30:13.422514  543705 net.go:770] primary dev: ETH0
I0321 07:30:13.422528  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:30:13.422542  543705 net.go:698] Add success.
I0321 07:30:13.464885  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bb26f959-020f-4676-9a9c-f396be9b0af2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:30:13.464917  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:30:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:30:14.455303  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:30:14.455369  543705 disk_worker.go:708] disk space is not compliant
W0321 07:30:14.455505  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:30:14.457081  543705 disk_worker.go:494] system disk:vda1
I0321 07:30:14.457125  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:30:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:30:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:30:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:30:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:30:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:30:23.409777  543705 memory.go:184] no items to output this cycle
I0321 07:30:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 07:30:28.436880  543705 disk_info.go:125] begin check local disk info of client
I0321 07:30:28.439424  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:30:28.439431  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4600 0xc0000c4640]
E0321 07:30:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:30:33.409775  543705 memory.go:184] no items to output this cycle
I0321 07:30:33.409783  543705 cpu.go:275] no items to output this cycle
I0321 07:30:38.777910  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:30:38.777918  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:30:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:30:43.410886  543705 memory.go:191] Add success.
I0321 07:30:43.409818  543705 cpu.go:282] Add success.
I0321 07:30:43.420627  543705 net.go:648] Add success.
I0321 07:30:43.423227  543705 net.go:770] primary dev: ETH0
I0321 07:30:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:30:43.423255  543705 net.go:698] Add success.
I0321 07:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:30:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:30:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:30:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:30:53.409783  543705 memory.go:184] no items to output this cycle
I0321 07:30:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 07:31:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:31:03.409782  543705 memory.go:184] no items to output this cycle
I0321 07:31:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 07:31:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:31:13.409814  543705 memory.go:191] Add success.
I0321 07:31:13.409827  543705 cpu.go:282] Add success.
W0321 07:31:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:31:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:31:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:31:13.420151  543705 net.go:648] Add success.
I0321 07:31:13.422609  543705 net.go:770] primary dev: ETH0
I0321 07:31:13.422623  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:31:13.422636  543705 net.go:698] Add success.
I0321 07:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:31:14.455343  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:31:14.455426  543705 disk_worker.go:708] disk space is not compliant
W0321 07:31:14.455430  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:31:14.457044  543705 disk_worker.go:494] system disk:vda1
I0321 07:31:14.457072  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:31:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:31:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:31:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:31:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:31:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:31:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:31:23.409797  543705 memory.go:184] no items to output this cycle
I0321 07:31:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 07:31:28.439900  543705 disk_info.go:125] begin check local disk info of client
I0321 07:31:28.442439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:31:28.442445  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003691c0 0xc000369200]
E0321 07:31:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:31:33.409775  543705 memory.go:184] no items to output this cycle
I0321 07:31:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 07:31:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:31:43.409820  543705 memory.go:191] Add success.
I0321 07:31:43.409829  543705 cpu.go:282] Add success.
I0321 07:31:43.419984  543705 net.go:648] Add success.
I0321 07:31:43.422627  543705 net.go:770] primary dev: ETH0
I0321 07:31:43.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:31:43.422657  543705 net.go:698] Add success.
I0321 07:31:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:31:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:31:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:31:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:31:53.409820  543705 memory.go:184] no items to output this cycle
I0321 07:31:53.409826  543705 cpu.go:275] no items to output this cycle
E0321 07:32:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:32:03.409795  543705 memory.go:184] no items to output this cycle
I0321 07:32:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 07:32:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:32:13.409789  543705 memory.go:191] Add success.
I0321 07:32:13.409811  543705 cpu.go:282] Add success.
W0321 07:32:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:32:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:32:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:32:13.420554  543705 net.go:648] Add success.
I0321 07:32:13.423314  543705 net.go:770] primary dev: ETH0
I0321 07:32:13.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:32:13.423338  543705 net.go:698] Add success.
W0321 07:32:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:32:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 07:32:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:32:14.456426  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:32:14.456436  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:32:14.456459  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:32:14.456873  543705 disk_worker.go:494] system disk:vda1
I0321 07:32:14.456916  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:32:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:32:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:32:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:32:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:32:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:32:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:32:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:32:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:32:23.409781  543705 memory.go:184] no items to output this cycle
I0321 07:32:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 07:32:28.442918  543705 disk_info.go:125] begin check local disk info of client
I0321 07:32:28.445401  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:32:28.445409  543705 disk_info.go:196] parse disk info done, disk is : [0xc000255580 0xc0002555c0]
E0321 07:32:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:32:33.409775  543705 memory.go:184] no items to output this cycle
I0321 07:32:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 07:32:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:32:43.409827  543705 memory.go:191] Add success.
I0321 07:32:43.409830  543705 cpu.go:282] Add success.
I0321 07:32:43.419977  543705 net.go:648] Add success.
I0321 07:32:43.423577  543705 net.go:770] primary dev: ETH0
I0321 07:32:43.423593  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:32:43.423607  543705 net.go:698] Add success.
I0321 07:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:32:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:32:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:32:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:32:53.409766  543705 memory.go:184] no items to output this cycle
I0321 07:32:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 07:33:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:33:03.409775  543705 memory.go:184] no items to output this cycle
I0321 07:33:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 07:33:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:33:13.409773  543705 memory.go:191] Add success.
W0321 07:33:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 07:33:13.409808  543705 cpu.go:282] Add success.
W0321 07:33:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:33:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:33:13.420072  543705 net.go:648] Add success.
I0321 07:33:13.423180  543705 net.go:770] primary dev: ETH0
I0321 07:33:13.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:33:13.423209  543705 net.go:698] Add success.
I0321 07:33:13.468747  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4867703e-f45e-48da-b781-39a45d74c933","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:33:13.468779  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:33:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:33:14.455383  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:33:14.455470  543705 disk_worker.go:708] disk space is not compliant
W0321 07:33:14.455474  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:33:14.457105  543705 disk_worker.go:494] system disk:vda1
I0321 07:33:14.457140  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:33:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:33:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:33:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:33:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:33:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:33:23.409786  543705 memory.go:184] no items to output this cycle
I0321 07:33:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 07:33:28.445911  543705 disk_info.go:125] begin check local disk info of client
I0321 07:33:28.448465  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:33:28.448471  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465800 0xc000465840]
E0321 07:33:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:33:33.409791  543705 memory.go:184] no items to output this cycle
I0321 07:33:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 07:33:38.778059  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:33:38.778067  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:33:43.410759  543705 memory.go:191] Add success.
I0321 07:33:43.409806  543705 cpu.go:282] Add success.
I0321 07:33:43.420517  543705 net.go:648] Add success.
I0321 07:33:43.424049  543705 net.go:770] primary dev: ETH0
I0321 07:33:43.424063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:33:43.424077  543705 net.go:698] Add success.
I0321 07:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:33:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:33:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:33:53.409782  543705 memory.go:184] no items to output this cycle
I0321 07:33:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 07:34:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:34:03.409810  543705 memory.go:184] no items to output this cycle
I0321 07:34:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 07:34:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:34:13.409778  543705 memory.go:191] Add success.
I0321 07:34:13.409802  543705 cpu.go:282] Add success.
W0321 07:34:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:34:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:34:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:34:13.420412  543705 net.go:648] Add success.
I0321 07:34:13.422978  543705 net.go:770] primary dev: ETH0
I0321 07:34:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:34:13.423004  543705 net.go:698] Add success.
I0321 07:34:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:34:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:34:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 07:34:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:34:14.458902  543705 disk_worker.go:494] system disk:vda1
I0321 07:34:14.458933  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:34:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:34:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:34:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:34:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:34:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:34:23.409800  543705 memory.go:184] no items to output this cycle
I0321 07:34:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 07:34:28.448925  543705 disk_info.go:125] begin check local disk info of client
I0321 07:34:28.451390  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:34:28.451396  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369d80 0xc000369dc0]
E0321 07:34:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:34:33.409791  543705 memory.go:184] no items to output this cycle
I0321 07:34:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 07:34:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:34:43.409798  543705 memory.go:191] Add success.
I0321 07:34:43.409817  543705 cpu.go:282] Add success.
I0321 07:34:43.419988  543705 net.go:648] Add success.
I0321 07:34:43.422800  543705 net.go:770] primary dev: ETH0
I0321 07:34:43.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:34:43.422831  543705 net.go:698] Add success.
I0321 07:34:46.458591  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:34:46.458658  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:34:46.458683  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:34:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:34:53.409776  543705 memory.go:184] no items to output this cycle
I0321 07:34:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 07:35:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:35:03.409763  543705 memory.go:184] no items to output this cycle
I0321 07:35:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 07:35:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:35:13.409796  543705 memory.go:191] Add success.
I0321 07:35:13.409796  543705 cpu.go:282] Add success.
W0321 07:35:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:35:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:35:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:35:13.420144  543705 net.go:648] Add success.
I0321 07:35:13.423325  543705 net.go:770] primary dev: ETH0
I0321 07:35:13.423342  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:35:13.423357  543705 net.go:698] Add success.
I0321 07:35:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:35:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:35:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 07:35:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:35:14.456975  543705 disk_worker.go:494] system disk:vda1
I0321 07:35:14.457004  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:35:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:35:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:35:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:35:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:35:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:35:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:35:23.409801  543705 memory.go:184] no items to output this cycle
I0321 07:35:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 07:35:28.451953  543705 disk_info.go:125] begin check local disk info of client
I0321 07:35:28.454455  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:35:28.454463  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369f40 0xc000352000]
E0321 07:35:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:35:33.409791  543705 memory.go:184] no items to output this cycle
I0321 07:35:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 07:35:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:35:43.409787  543705 memory.go:191] Add success.
I0321 07:35:43.409801  543705 cpu.go:282] Add success.
I0321 07:35:43.419983  543705 net.go:648] Add success.
I0321 07:35:43.422715  543705 net.go:770] primary dev: ETH0
I0321 07:35:43.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:35:43.422740  543705 net.go:698] Add success.
I0321 07:35:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:35:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:35:53.409793  543705 memory.go:184] no items to output this cycle
I0321 07:35:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 07:36:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:36:03.409775  543705 memory.go:184] no items to output this cycle
I0321 07:36:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 07:36:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:36:13.409785  543705 memory.go:191] Add success.
I0321 07:36:13.409801  543705 cpu.go:282] Add success.
W0321 07:36:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:36:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:36:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:36:13.420105  543705 net.go:648] Add success.
I0321 07:36:13.423415  543705 net.go:770] primary dev: ETH0
I0321 07:36:13.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:36:13.423440  543705 net.go:698] Add success.
I0321 07:36:13.463645  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"04973c32-66ce-4956-8d61-8c283170320f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:36:13.463675  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:36:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:36:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:36:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 07:36:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:36:14.456669  543705 disk_worker.go:494] system disk:vda1
I0321 07:36:14.456698  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:36:15.455608  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:36:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:36:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:36:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:36:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:36:23.409775  543705 memory.go:184] no items to output this cycle
I0321 07:36:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 07:36:28.454957  543705 disk_info.go:125] begin check local disk info of client
I0321 07:36:28.457421  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:36:28.457427  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3180 0xc0002a31c0]
E0321 07:36:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:36:33.409773  543705 memory.go:184] no items to output this cycle
I0321 07:36:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 07:36:38.778214  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:36:38.778222  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:36:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:36:43.410587  543705 memory.go:191] Add success.
I0321 07:36:43.409801  543705 cpu.go:282] Add success.
I0321 07:36:43.420300  543705 net.go:648] Add success.
I0321 07:36:43.422934  543705 net.go:770] primary dev: ETH0
I0321 07:36:43.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:36:43.422962  543705 net.go:698] Add success.
I0321 07:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:36:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:36:53.409802  543705 memory.go:184] no items to output this cycle
I0321 07:36:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 07:37:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:37:03.409792  543705 memory.go:184] no items to output this cycle
I0321 07:37:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 07:37:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:37:13.409805  543705 memory.go:191] Add success.
I0321 07:37:13.409817  543705 cpu.go:282] Add success.
W0321 07:37:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:37:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:37:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:37:13.420056  543705 net.go:648] Add success.
I0321 07:37:13.422822  543705 net.go:770] primary dev: ETH0
I0321 07:37:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:37:13.422851  543705 net.go:698] Add success.
I0321 07:37:13.452895  543705 event_worker.go:152] Polling the log file for events...
W0321 07:37:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:37:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 07:37:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:37:14.456945  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:37:14.456955  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:37:14.456961  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:37:14.457011  543705 disk_worker.go:494] system disk:vda1
I0321 07:37:14.457049  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:37:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:37:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:37:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:37:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:37:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:37:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:37:16.472297  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:37:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:37:23.409775  543705 memory.go:184] no items to output this cycle
I0321 07:37:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 07:37:28.457982  543705 disk_info.go:125] begin check local disk info of client
I0321 07:37:28.460457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:37:28.460464  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368e80 0xc000368ec0]
E0321 07:37:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:37:33.409797  543705 memory.go:184] no items to output this cycle
I0321 07:37:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:37:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:37:43.409823  543705 memory.go:191] Add success.
I0321 07:37:43.409836  543705 cpu.go:282] Add success.
I0321 07:37:43.420014  543705 net.go:648] Add success.
I0321 07:37:43.422966  543705 net.go:770] primary dev: ETH0
I0321 07:37:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:37:43.422991  543705 net.go:698] Add success.
I0321 07:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:37:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:37:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:37:53.409767  543705 memory.go:184] no items to output this cycle
I0321 07:37:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 07:38:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:38:03.409798  543705 memory.go:184] no items to output this cycle
I0321 07:38:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 07:38:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:38:13.409780  543705 memory.go:191] Add success.
I0321 07:38:13.409802  543705 cpu.go:282] Add success.
W0321 07:38:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:38:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:38:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:38:13.420099  543705 net.go:648] Add success.
I0321 07:38:13.422972  543705 net.go:770] primary dev: ETH0
I0321 07:38:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:38:13.423000  543705 net.go:698] Add success.
I0321 07:38:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:38:14.455436  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:38:14.455449  543705 disk_worker.go:708] disk space is not compliant
W0321 07:38:14.455453  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:38:14.457041  543705 disk_worker.go:494] system disk:vda1
I0321 07:38:14.457072  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:38:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:38:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:38:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:38:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:38:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:38:23.409773  543705 memory.go:184] no items to output this cycle
I0321 07:38:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 07:38:28.460995  543705 disk_info.go:125] begin check local disk info of client
I0321 07:38:28.463485  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:38:28.463491  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac80 0xc00007acc0]
E0321 07:38:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:38:33.409796  543705 memory.go:184] no items to output this cycle
I0321 07:38:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:38:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:38:43.409788  543705 memory.go:191] Add success.
I0321 07:38:43.409812  543705 cpu.go:282] Add success.
I0321 07:38:43.419894  543705 net.go:648] Add success.
I0321 07:38:43.422678  543705 net.go:770] primary dev: ETH0
I0321 07:38:43.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:38:43.422705  543705 net.go:698] Add success.
I0321 07:38:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:38:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:38:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:38:53.409768  543705 memory.go:184] no items to output this cycle
I0321 07:38:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 07:39:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:39:03.409788  543705 memory.go:184] no items to output this cycle
I0321 07:39:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 07:39:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:39:13.409791  543705 memory.go:191] Add success.
I0321 07:39:13.409794  543705 cpu.go:282] Add success.
W0321 07:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:39:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:39:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:39:13.420237  543705 net.go:648] Add success.
I0321 07:39:13.423414  543705 net.go:770] primary dev: ETH0
I0321 07:39:13.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:39:13.423438  543705 net.go:698] Add success.
I0321 07:39:13.468704  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4237c875-b0a0-4b6f-8cf3-555e1df2ed08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:39:13.468740  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:39:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:39:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 07:39:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:39:14.457585  543705 disk_worker.go:494] system disk:vda1
I0321 07:39:14.457627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:39:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:39:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:39:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:39:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:39:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:39:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:39:23.409773  543705 memory.go:184] no items to output this cycle
I0321 07:39:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 07:39:28.464010  543705 disk_info.go:125] begin check local disk info of client
I0321 07:39:28.466488  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:39:28.466506  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003693c0 0xc000369400]
E0321 07:39:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:39:33.409789  543705 memory.go:184] no items to output this cycle
I0321 07:39:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 07:39:38.779597  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:39:38.779604  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:39:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:39:43.410639  543705 memory.go:191] Add success.
I0321 07:39:43.409828  543705 cpu.go:282] Add success.
I0321 07:39:43.420404  543705 net.go:648] Add success.
I0321 07:39:43.422891  543705 net.go:770] primary dev: ETH0
I0321 07:39:43.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:39:43.422918  543705 net.go:698] Add success.
I0321 07:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:39:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:39:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:39:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:39:53.409794  543705 memory.go:184] no items to output this cycle
I0321 07:39:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 07:40:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:40:03.409805  543705 memory.go:184] no items to output this cycle
I0321 07:40:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 07:40:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:40:13.409778  543705 memory.go:191] Add success.
I0321 07:40:13.409798  543705 cpu.go:282] Add success.
W0321 07:40:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:40:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:40:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:40:13.420071  543705 net.go:648] Add success.
I0321 07:40:13.422790  543705 net.go:770] primary dev: ETH0
I0321 07:40:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:40:13.422820  543705 net.go:698] Add success.
I0321 07:40:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:40:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:40:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 07:40:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:40:14.456862  543705 disk_worker.go:494] system disk:vda1
I0321 07:40:14.456893  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:40:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:40:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:40:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:40:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:40:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:40:23.409763  543705 memory.go:184] no items to output this cycle
I0321 07:40:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 07:40:28.467038  543705 disk_info.go:125] begin check local disk info of client
I0321 07:40:28.469504  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:40:28.469510  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5240 0xc0000c5280]
E0321 07:40:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:40:33.409790  543705 memory.go:184] no items to output this cycle
I0321 07:40:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 07:40:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:40:43.409814  543705 memory.go:191] Add success.
I0321 07:40:43.409827  543705 cpu.go:282] Add success.
I0321 07:40:43.419892  543705 net.go:648] Add success.
I0321 07:40:43.422978  543705 net.go:770] primary dev: ETH0
I0321 07:40:43.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:40:43.423003  543705 net.go:698] Add success.
I0321 07:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:40:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:40:53.409777  543705 memory.go:184] no items to output this cycle
I0321 07:40:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 07:41:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:41:03.409764  543705 memory.go:184] no items to output this cycle
I0321 07:41:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 07:41:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:41:13.409813  543705 memory.go:191] Add success.
I0321 07:41:13.409819  543705 cpu.go:282] Add success.
W0321 07:41:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:41:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:41:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:41:13.420439  543705 net.go:648] Add success.
I0321 07:41:13.423754  543705 net.go:770] primary dev: ETH0
I0321 07:41:13.423766  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:41:13.423778  543705 net.go:698] Add success.
I0321 07:41:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:41:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:41:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 07:41:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:41:14.456552  543705 disk_worker.go:494] system disk:vda1
I0321 07:41:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:41:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:41:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:41:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:41:23.409797  543705 memory.go:184] no items to output this cycle
I0321 07:41:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 07:41:28.470038  543705 disk_info.go:125] begin check local disk info of client
I0321 07:41:28.472504  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:41:28.472509  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352bc0 0xc000352c00]
E0321 07:41:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:41:33.409797  543705 memory.go:184] no items to output this cycle
I0321 07:41:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 07:41:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:41:43.409800  543705 memory.go:191] Add success.
I0321 07:41:43.409801  543705 cpu.go:282] Add success.
I0321 07:41:43.419877  543705 net.go:648] Add success.
I0321 07:41:43.422778  543705 net.go:770] primary dev: ETH0
I0321 07:41:43.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:41:43.422808  543705 net.go:698] Add success.
I0321 07:41:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:41:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:41:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:41:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:41:53.409786  543705 cpu.go:275] no items to output this cycle
I0321 07:41:53.409788  543705 memory.go:184] no items to output this cycle
E0321 07:42:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:42:03.409802  543705 memory.go:184] no items to output this cycle
I0321 07:42:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 07:42:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:42:13.409794  543705 memory.go:191] Add success.
I0321 07:42:13.409810  543705 cpu.go:282] Add success.
W0321 07:42:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:42:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:42:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:42:13.420254  543705 net.go:648] Add success.
I0321 07:42:13.423136  543705 net.go:770] primary dev: ETH0
I0321 07:42:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:42:13.423162  543705 net.go:698] Add success.
I0321 07:42:13.467701  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"833efe95-54d1-4a1e-b237-70843efc8c09","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:42:13.467734  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 07:42:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:42:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 07:42:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:42:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:42:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:42:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:42:14.456642  543705 disk_worker.go:494] system disk:vda1
I0321 07:42:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:42:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:42:15.456872  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:42:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:42:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:42:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:42:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:42:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:42:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:42:23.409783  543705 memory.go:184] no items to output this cycle
I0321 07:42:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 07:42:28.473052  543705 disk_info.go:125] begin check local disk info of client
I0321 07:42:28.475631  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:42:28.475637  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003524c0 0xc000352500]
E0321 07:42:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:42:33.409779  543705 memory.go:184] no items to output this cycle
I0321 07:42:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 07:42:38.779763  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:42:38.779770  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:42:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:42:43.410736  543705 memory.go:191] Add success.
I0321 07:42:43.409804  543705 cpu.go:282] Add success.
I0321 07:42:43.420463  543705 net.go:648] Add success.
I0321 07:42:43.423053  543705 net.go:770] primary dev: ETH0
I0321 07:42:43.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:42:43.423080  543705 net.go:698] Add success.
I0321 07:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:42:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:42:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:42:53.409786  543705 memory.go:184] no items to output this cycle
I0321 07:42:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 07:43:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:43:03.409770  543705 memory.go:184] no items to output this cycle
I0321 07:43:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 07:43:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:43:13.409825  543705 memory.go:191] Add success.
I0321 07:43:13.409846  543705 cpu.go:282] Add success.
W0321 07:43:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:43:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:43:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:43:13.420167  543705 net.go:648] Add success.
I0321 07:43:13.423508  543705 net.go:770] primary dev: ETH0
I0321 07:43:13.423521  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:43:13.423533  543705 net.go:698] Add success.
I0321 07:43:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:43:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:43:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 07:43:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:43:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 07:43:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:43:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:43:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:43:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:43:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:43:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:43:23.409780  543705 memory.go:184] no items to output this cycle
I0321 07:43:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 07:43:28.476062  543705 disk_info.go:125] begin check local disk info of client
I0321 07:43:28.478590  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:43:28.478596  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0321 07:43:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:43:33.409781  543705 memory.go:184] no items to output this cycle
I0321 07:43:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 07:43:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:43:43.409809  543705 memory.go:191] Add success.
I0321 07:43:43.409810  543705 cpu.go:282] Add success.
I0321 07:43:43.419918  543705 net.go:648] Add success.
I0321 07:43:43.422720  543705 net.go:770] primary dev: ETH0
I0321 07:43:43.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:43:43.422750  543705 net.go:698] Add success.
I0321 07:43:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:43:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:43:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:43:53.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:43:53.410388  543705 memory.go:184] no items to output this cycle
I0321 07:43:53.410392  543705 cpu.go:275] no items to output this cycle
E0321 07:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:44:03.409781  543705 memory.go:184] no items to output this cycle
I0321 07:44:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 07:44:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:44:13.409812  543705 memory.go:191] Add success.
I0321 07:44:13.409828  543705 cpu.go:282] Add success.
W0321 07:44:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:44:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:44:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:44:13.420107  543705 net.go:648] Add success.
I0321 07:44:13.422587  543705 net.go:770] primary dev: ETH0
I0321 07:44:13.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:44:13.422617  543705 net.go:698] Add success.
I0321 07:44:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:44:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:44:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 07:44:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:44:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 07:44:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:44:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:44:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:44:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:44:23.409792  543705 memory.go:184] no items to output this cycle
I0321 07:44:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 07:44:28.479078  543705 disk_info.go:125] begin check local disk info of client
I0321 07:44:28.481527  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:44:28.481533  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e01c0 0xc0003e0200]
E0321 07:44:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:44:33.409933  543705 memory.go:184] no items to output this cycle
I0321 07:44:33.410061  543705 cpu.go:275] no items to output this cycle
E0321 07:44:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:44:43.409793  543705 memory.go:191] Add success.
I0321 07:44:43.409824  543705 cpu.go:282] Add success.
I0321 07:44:43.419878  543705 net.go:648] Add success.
I0321 07:44:43.422552  543705 net.go:770] primary dev: ETH0
I0321 07:44:43.422566  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:44:43.422578  543705 net.go:698] Add success.
I0321 07:44:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:44:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:44:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:44:53.409801  543705 memory.go:184] no items to output this cycle
I0321 07:44:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 07:45:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:45:03.409794  543705 memory.go:184] no items to output this cycle
I0321 07:45:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 07:45:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:45:13.409816  543705 memory.go:191] Add success.
I0321 07:45:13.409825  543705 cpu.go:282] Add success.
W0321 07:45:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:45:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:45:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:45:13.420108  543705 net.go:648] Add success.
I0321 07:45:13.422978  543705 net.go:770] primary dev: ETH0
I0321 07:45:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:45:13.423005  543705 net.go:698] Add success.
I0321 07:45:13.469253  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a863909f-bbe0-426d-9486-9b9e38b6c371","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:45:13.469287  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:45:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:45:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:45:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 07:45:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:45:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 07:45:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:45:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:45:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:45:23.410403  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:45:23.410419  543705 memory.go:184] no items to output this cycle
I0321 07:45:23.410434  543705 cpu.go:275] no items to output this cycle
I0321 07:45:28.482106  543705 disk_info.go:125] begin check local disk info of client
I0321 07:45:28.484546  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:45:28.484552  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492cc0 0xc000492d00]
E0321 07:45:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:45:33.409920  543705 memory.go:184] no items to output this cycle
I0321 07:45:33.409947  543705 cpu.go:275] no items to output this cycle
I0321 07:45:38.779930  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:45:38.779937  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:45:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:45:43.410586  543705 memory.go:191] Add success.
I0321 07:45:43.409809  543705 cpu.go:282] Add success.
I0321 07:45:43.420289  543705 net.go:648] Add success.
I0321 07:45:43.422839  543705 net.go:770] primary dev: ETH0
I0321 07:45:43.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:45:43.422864  543705 net.go:698] Add success.
I0321 07:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:45:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:45:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:45:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:45:53.409801  543705 memory.go:184] no items to output this cycle
I0321 07:45:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 07:46:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:46:03.409789  543705 memory.go:184] no items to output this cycle
I0321 07:46:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 07:46:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:46:13.409791  543705 memory.go:191] Add success.
I0321 07:46:13.409812  543705 cpu.go:282] Add success.
W0321 07:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:46:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:46:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:46:13.420146  543705 net.go:648] Add success.
I0321 07:46:13.422958  543705 net.go:770] primary dev: ETH0
I0321 07:46:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:46:13.422983  543705 net.go:698] Add success.
I0321 07:46:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:46:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:46:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 07:46:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:46:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 07:46:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:46:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:46:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:46:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:46:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:46:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:46:23.409776  543705 memory.go:184] no items to output this cycle
I0321 07:46:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 07:46:28.485114  543705 disk_info.go:125] begin check local disk info of client
I0321 07:46:28.487634  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:46:28.487640  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a70c0 0xc0004a7100]
E0321 07:46:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:46:33.409862  543705 memory.go:184] no items to output this cycle
I0321 07:46:33.409935  543705 cpu.go:275] no items to output this cycle
E0321 07:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:46:43.409794  543705 memory.go:191] Add success.
I0321 07:46:43.409805  543705 cpu.go:282] Add success.
I0321 07:46:43.420027  543705 net.go:648] Add success.
I0321 07:46:43.422876  543705 net.go:770] primary dev: ETH0
I0321 07:46:43.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:46:43.422902  543705 net.go:698] Add success.
I0321 07:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:46:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:46:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:46:53.409778  543705 memory.go:184] no items to output this cycle
I0321 07:46:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 07:47:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:47:03.409780  543705 memory.go:184] no items to output this cycle
I0321 07:47:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 07:47:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:47:13.409792  543705 memory.go:191] Add success.
I0321 07:47:13.409792  543705 cpu.go:282] Add success.
W0321 07:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:47:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:47:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:47:13.420062  543705 net.go:648] Add success.
I0321 07:47:13.422947  543705 net.go:770] primary dev: ETH0
I0321 07:47:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:47:13.422970  543705 net.go:698] Add success.
I0321 07:47:13.453537  543705 event_worker.go:152] Polling the log file for events...
W0321 07:47:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:47:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 07:47:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:47:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:47:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:47:14.455919  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:47:14.456527  543705 disk_worker.go:494] system disk:vda1
I0321 07:47:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:47:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:47:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:47:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:47:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:47:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:47:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:47:16.472344  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:47:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:47:23.409797  543705 memory.go:184] no items to output this cycle
I0321 07:47:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 07:47:28.488131  543705 disk_info.go:125] begin check local disk info of client
I0321 07:47:28.490557  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:47:28.490564  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053abc0 0xc00053ac00]
E0321 07:47:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:47:33.409799  543705 memory.go:184] no items to output this cycle
I0321 07:47:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 07:47:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:47:43.409996  543705 memory.go:191] Add success.
I0321 07:47:43.410118  543705 cpu.go:282] Add success.
I0321 07:47:43.419712  543705 net.go:648] Add success.
I0321 07:47:43.422797  543705 net.go:770] primary dev: ETH0
I0321 07:47:43.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:47:43.422822  543705 net.go:698] Add success.
I0321 07:47:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:47:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:47:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:47:53.409781  543705 memory.go:184] no items to output this cycle
I0321 07:47:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 07:48:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:48:03.409779  543705 memory.go:184] no items to output this cycle
I0321 07:48:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 07:48:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:48:13.409824  543705 memory.go:191] Add success.
I0321 07:48:13.409834  543705 cpu.go:282] Add success.
W0321 07:48:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:48:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:48:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:48:13.420149  543705 net.go:648] Add success.
I0321 07:48:13.423074  543705 net.go:770] primary dev: ETH0
I0321 07:48:13.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:48:13.423111  543705 net.go:698] Add success.
I0321 07:48:13.471956  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b7c6802e-428b-49f4-8024-bcd9d6d1e9f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:48:13.471989  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:48:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:48:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:48:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 07:48:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:48:14.456610  543705 disk_worker.go:494] system disk:vda1
I0321 07:48:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:48:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:48:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:48:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:48:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:48:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:48:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:48:23.409780  543705 memory.go:184] no items to output this cycle
I0321 07:48:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 07:48:28.491182  543705 disk_info.go:125] begin check local disk info of client
I0321 07:48:28.493665  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:48:28.493671  543705 disk_info.go:196] parse disk info done, disk is : [0xc000392840 0xc000392880]
E0321 07:48:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:48:33.409914  543705 memory.go:184] no items to output this cycle
I0321 07:48:33.409936  543705 cpu.go:275] no items to output this cycle
I0321 07:48:38.781615  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:48:38.781623  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:48:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:48:43.410702  543705 memory.go:191] Add success.
I0321 07:48:43.409808  543705 cpu.go:282] Add success.
I0321 07:48:43.420398  543705 net.go:648] Add success.
I0321 07:48:43.423210  543705 net.go:770] primary dev: ETH0
I0321 07:48:43.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:48:43.423237  543705 net.go:698] Add success.
I0321 07:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:48:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:48:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:48:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:48:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 07:48:53.409784  543705 memory.go:184] no items to output this cycle
E0321 07:49:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:49:03.409783  543705 memory.go:184] no items to output this cycle
I0321 07:49:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 07:49:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:49:13.409783  543705 memory.go:191] Add success.
I0321 07:49:13.409787  543705 cpu.go:282] Add success.
W0321 07:49:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:49:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:49:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:49:13.420081  543705 net.go:648] Add success.
I0321 07:49:13.423002  543705 net.go:770] primary dev: ETH0
I0321 07:49:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:49:13.423026  543705 net.go:698] Add success.
I0321 07:49:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:49:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:49:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 07:49:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:49:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 07:49:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:49:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:49:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:49:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:49:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:49:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:49:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:49:23.409777  543705 memory.go:184] no items to output this cycle
I0321 07:49:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 07:49:28.494120  543705 disk_info.go:125] begin check local disk info of client
I0321 07:49:28.496638  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:49:28.496644  543705 disk_info.go:196] parse disk info done, disk is : [0xc000496100 0xc000496140]
E0321 07:49:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:49:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 07:49:33.409800  543705 memory.go:184] no items to output this cycle
E0321 07:49:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:49:43.409839  543705 memory.go:191] Add success.
I0321 07:49:43.409849  543705 cpu.go:282] Add success.
I0321 07:49:43.420041  543705 net.go:648] Add success.
I0321 07:49:43.423182  543705 net.go:770] primary dev: ETH0
I0321 07:49:43.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:49:43.423217  543705 net.go:698] Add success.
I0321 07:49:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:49:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:49:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:49:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:49:53.409775  543705 memory.go:184] no items to output this cycle
I0321 07:49:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 07:50:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:50:03.409770  543705 memory.go:184] no items to output this cycle
I0321 07:50:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 07:50:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:50:13.409787  543705 memory.go:191] Add success.
W0321 07:50:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 07:50:13.409819  543705 cpu.go:282] Add success.
W0321 07:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:50:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:50:13.420063  543705 net.go:648] Add success.
I0321 07:50:13.423244  543705 net.go:770] primary dev: ETH0
I0321 07:50:13.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:50:13.423270  543705 net.go:698] Add success.
I0321 07:50:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:50:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:50:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 07:50:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:50:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 07:50:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:50:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:50:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:50:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:50:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:50:23.409813  543705 memory.go:184] no items to output this cycle
I0321 07:50:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 07:50:28.497186  543705 disk_info.go:125] begin check local disk info of client
I0321 07:50:28.499664  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:50:28.499671  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039b600 0xc00039b640]
E0321 07:50:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:50:33.409819  543705 memory.go:184] no items to output this cycle
I0321 07:50:33.409830  543705 cpu.go:275] no items to output this cycle
E0321 07:50:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:50:43.409811  543705 memory.go:191] Add success.
I0321 07:50:43.409813  543705 cpu.go:282] Add success.
I0321 07:50:43.420001  543705 net.go:648] Add success.
I0321 07:50:43.422603  543705 net.go:770] primary dev: ETH0
I0321 07:50:43.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:50:43.422629  543705 net.go:698] Add success.
I0321 07:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:50:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:50:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:50:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:50:53.409781  543705 memory.go:184] no items to output this cycle
I0321 07:50:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 07:51:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:51:03.409796  543705 memory.go:184] no items to output this cycle
I0321 07:51:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 07:51:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:51:13.409785  543705 cpu.go:282] Add success.
I0321 07:51:13.409792  543705 memory.go:191] Add success.
W0321 07:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:51:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:51:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:51:13.420040  543705 net.go:648] Add success.
I0321 07:51:13.423244  543705 net.go:770] primary dev: ETH0
I0321 07:51:13.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:51:13.423269  543705 net.go:698] Add success.
I0321 07:51:13.467649  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"57da2cae-5983-480a-bcb4-5a085076842c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:51:13.467682  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:51:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:51:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 07:51:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:51:14.456687  543705 disk_worker.go:494] system disk:vda1
I0321 07:51:14.456716  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:51:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:51:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:51:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:51:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:51:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:51:23.409801  543705 memory.go:184] no items to output this cycle
I0321 07:51:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 07:51:28.500145  543705 disk_info.go:125] begin check local disk info of client
I0321 07:51:28.502621  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:51:28.502628  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ad640 0xc0004ad680]
E0321 07:51:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:51:33.409896  543705 memory.go:184] no items to output this cycle
I0321 07:51:33.409913  543705 cpu.go:275] no items to output this cycle
I0321 07:51:38.781749  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:51:38.781756  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:51:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:51:43.410541  543705 memory.go:191] Add success.
I0321 07:51:43.409834  543705 cpu.go:282] Add success.
I0321 07:51:43.420285  543705 net.go:648] Add success.
I0321 07:51:43.422973  543705 net.go:770] primary dev: ETH0
I0321 07:51:43.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:51:43.422999  543705 net.go:698] Add success.
I0321 07:51:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:51:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:51:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:51:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:51:53.409782  543705 memory.go:184] no items to output this cycle
I0321 07:51:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 07:52:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:52:03.409774  543705 memory.go:184] no items to output this cycle
I0321 07:52:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 07:52:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:52:13.409790  543705 memory.go:191] Add success.
I0321 07:52:13.409790  543705 cpu.go:282] Add success.
W0321 07:52:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:52:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:52:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:52:13.420130  543705 net.go:648] Add success.
I0321 07:52:13.422904  543705 net.go:770] primary dev: ETH0
I0321 07:52:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:52:13.422933  543705 net.go:698] Add success.
W0321 07:52:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:52:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 07:52:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:52:14.456914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:52:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:52:14.456930  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:52:14.457007  543705 disk_worker.go:494] system disk:vda1
I0321 07:52:14.457041  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:52:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:52:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:52:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:52:16.457980  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:52:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:52:16.458040  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:52:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:52:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:52:23.409818  543705 memory.go:184] no items to output this cycle
I0321 07:52:23.409964  543705 cpu.go:275] no items to output this cycle
I0321 07:52:28.503213  543705 disk_info.go:125] begin check local disk info of client
I0321 07:52:28.505805  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:52:28.505811  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385b40 0xc000385b80]
E0321 07:52:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:52:33.409780  543705 memory.go:184] no items to output this cycle
I0321 07:52:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 07:52:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:52:43.409790  543705 memory.go:191] Add success.
I0321 07:52:43.409802  543705 cpu.go:282] Add success.
I0321 07:52:43.420070  543705 net.go:648] Add success.
I0321 07:52:43.422772  543705 net.go:770] primary dev: ETH0
I0321 07:52:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:52:43.422802  543705 net.go:698] Add success.
I0321 07:52:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:52:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:52:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:52:53.410193  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:52:53.410210  543705 memory.go:184] no items to output this cycle
I0321 07:52:53.410239  543705 cpu.go:275] no items to output this cycle
E0321 07:53:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:53:03.409799  543705 memory.go:184] no items to output this cycle
I0321 07:53:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 07:53:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:53:13.409810  543705 memory.go:191] Add success.
I0321 07:53:13.409815  543705 cpu.go:282] Add success.
W0321 07:53:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:53:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:53:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:53:13.420062  543705 net.go:648] Add success.
I0321 07:53:13.422670  543705 net.go:770] primary dev: ETH0
I0321 07:53:13.422682  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:53:13.422693  543705 net.go:698] Add success.
I0321 07:53:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:53:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:53:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 07:53:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:53:14.456604  543705 disk_worker.go:494] system disk:vda1
I0321 07:53:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:53:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:53:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:53:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:53:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:53:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:53:23.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:53:23.409853  543705 memory.go:184] no items to output this cycle
I0321 07:53:23.409956  543705 cpu.go:275] no items to output this cycle
I0321 07:53:28.506173  543705 disk_info.go:125] begin check local disk info of client
I0321 07:53:28.508702  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:53:28.508709  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3f00 0xc0003b3f40]
E0321 07:53:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:53:33.409768  543705 memory.go:184] no items to output this cycle
I0321 07:53:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 07:53:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:53:43.409790  543705 memory.go:191] Add success.
I0321 07:53:43.409817  543705 cpu.go:282] Add success.
I0321 07:53:43.419975  543705 net.go:648] Add success.
I0321 07:53:43.422965  543705 net.go:770] primary dev: ETH0
I0321 07:53:43.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:53:43.422991  543705 net.go:698] Add success.
I0321 07:53:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:53:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:53:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:53:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:53:53.409767  543705 memory.go:184] no items to output this cycle
I0321 07:53:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 07:54:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:54:03.409770  543705 memory.go:184] no items to output this cycle
I0321 07:54:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 07:54:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:54:13.409805  543705 memory.go:191] Add success.
I0321 07:54:13.409812  543705 cpu.go:282] Add success.
W0321 07:54:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:54:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:54:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:54:13.420151  543705 net.go:648] Add success.
I0321 07:54:13.423105  543705 net.go:770] primary dev: ETH0
I0321 07:54:13.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:54:13.423131  543705 net.go:698] Add success.
I0321 07:54:13.469738  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e28ce99b-efef-4dde-9424-a1bc9d5438ea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:54:13.469772  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 07:54:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:54:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:54:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 07:54:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:54:14.456698  543705 disk_worker.go:494] system disk:vda1
I0321 07:54:14.456735  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:54:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:54:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:54:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:54:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:54:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:54:23.409774  543705 memory.go:184] no items to output this cycle
I0321 07:54:23.409775  543705 cpu.go:275] no items to output this cycle
I0321 07:54:28.509187  543705 disk_info.go:125] begin check local disk info of client
I0321 07:54:28.511677  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:54:28.511683  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395d40 0xc000395d80]
E0321 07:54:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:54:33.409804  543705 memory.go:184] no items to output this cycle
I0321 07:54:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 07:54:38.781907  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:54:38.781915  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:54:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:54:43.410653  543705 memory.go:191] Add success.
I0321 07:54:43.409810  543705 cpu.go:282] Add success.
I0321 07:54:43.420367  543705 net.go:648] Add success.
I0321 07:54:43.422897  543705 net.go:770] primary dev: ETH0
I0321 07:54:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:54:43.422927  543705 net.go:698] Add success.
I0321 07:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:54:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:54:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:54:53.409802  543705 memory.go:184] no items to output this cycle
I0321 07:54:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 07:55:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:55:03.409769  543705 memory.go:184] no items to output this cycle
I0321 07:55:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 07:55:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:55:13.409818  543705 memory.go:191] Add success.
I0321 07:55:13.409821  543705 cpu.go:282] Add success.
W0321 07:55:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:55:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:55:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:55:13.420043  543705 net.go:648] Add success.
I0321 07:55:13.422795  543705 net.go:770] primary dev: ETH0
I0321 07:55:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:55:13.422820  543705 net.go:698] Add success.
I0321 07:55:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:55:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:55:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 07:55:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:55:14.456503  543705 disk_worker.go:494] system disk:vda1
I0321 07:55:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:55:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:55:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:55:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:55:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:55:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:55:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:55:23.409790  543705 memory.go:184] no items to output this cycle
I0321 07:55:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 07:55:28.512201  543705 disk_info.go:125] begin check local disk info of client
I0321 07:55:28.514801  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:55:28.514807  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf200 0xc0002bf240]
E0321 07:55:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:55:33.409774  543705 memory.go:184] no items to output this cycle
I0321 07:55:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 07:55:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:55:43.409797  543705 memory.go:191] Add success.
I0321 07:55:43.409802  543705 cpu.go:282] Add success.
I0321 07:55:43.419983  543705 net.go:648] Add success.
I0321 07:55:43.422614  543705 net.go:770] primary dev: ETH0
I0321 07:55:43.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:55:43.422640  543705 net.go:698] Add success.
I0321 07:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:55:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:55:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:55:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:55:53.409787  543705 memory.go:184] no items to output this cycle
I0321 07:55:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 07:56:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:56:03.409781  543705 memory.go:184] no items to output this cycle
I0321 07:56:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 07:56:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:56:13.409796  543705 memory.go:191] Add success.
I0321 07:56:13.409799  543705 cpu.go:282] Add success.
W0321 07:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:56:13.420057  543705 net.go:648] Add success.
I0321 07:56:13.423028  543705 net.go:770] primary dev: ETH0
I0321 07:56:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:56:13.423058  543705 net.go:698] Add success.
I0321 07:56:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:56:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:56:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 07:56:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:56:14.456834  543705 disk_worker.go:494] system disk:vda1
I0321 07:56:14.456863  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:56:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:56:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:56:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:56:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:56:23.409790  543705 memory.go:184] no items to output this cycle
I0321 07:56:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 07:56:28.514888  543705 disk_info.go:125] begin check local disk info of client
I0321 07:56:28.517356  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:56:28.517362  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
E0321 07:56:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:56:33.409795  543705 memory.go:184] no items to output this cycle
I0321 07:56:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 07:56:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:56:43.409793  543705 memory.go:191] Add success.
I0321 07:56:43.409823  543705 cpu.go:282] Add success.
I0321 07:56:43.420032  543705 net.go:648] Add success.
I0321 07:56:43.422894  543705 net.go:770] primary dev: ETH0
I0321 07:56:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:56:43.422920  543705 net.go:698] Add success.
I0321 07:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:56:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:56:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:56:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:56:53.409772  543705 memory.go:184] no items to output this cycle
I0321 07:56:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 07:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:57:03.409777  543705 memory.go:184] no items to output this cycle
I0321 07:57:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 07:57:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:57:13.409792  543705 memory.go:191] Add success.
I0321 07:57:13.409798  543705 cpu.go:282] Add success.
W0321 07:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:57:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:57:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:57:13.420223  543705 net.go:648] Add success.
I0321 07:57:13.422903  543705 net.go:770] primary dev: ETH0
I0321 07:57:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:57:13.422929  543705 net.go:698] Add success.
I0321 07:57:13.429398  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 07:57:13.453568  543705 event_worker.go:152] Polling the log file for events...
I0321 07:57:13.469636  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"da46cc6a-8961-49d7-ab85-460efb475211","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 07:57:13.469688  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 07:57:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:57:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 07:57:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0321 07:57:14.455886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 07:57:14.455895  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 07:57:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0321 07:57:14.456696  543705 disk_worker.go:494] system disk:vda1
I0321 07:57:14.456724  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 07:57:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 07:57:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:57:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 07:57:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 07:57:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:57:16.458037  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:57:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:57:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:57:23.409786  543705 memory.go:184] no items to output this cycle
I0321 07:57:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 07:57:28.518282  543705 disk_info.go:125] begin check local disk info of client
I0321 07:57:28.520846  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:57:28.520854  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003691c0 0xc000369200]
E0321 07:57:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:57:33.409778  543705 memory.go:184] no items to output this cycle
I0321 07:57:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 07:57:38.782058  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 07:57:38.782065  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 07:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:57:43.410651  543705 memory.go:191] Add success.
I0321 07:57:43.409816  543705 cpu.go:282] Add success.
I0321 07:57:43.420475  543705 net.go:648] Add success.
I0321 07:57:43.423071  543705 net.go:770] primary dev: ETH0
I0321 07:57:43.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:57:43.423100  543705 net.go:698] Add success.
I0321 07:57:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:57:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:57:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:57:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:57:53.409778  543705 memory.go:184] no items to output this cycle
I0321 07:57:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 07:58:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:58:03.409788  543705 cpu.go:275] no items to output this cycle
I0321 07:58:03.409798  543705 memory.go:184] no items to output this cycle
E0321 07:58:13.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:58:13.409896  543705 memory.go:191] Add success.
W0321 07:58:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:58:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:58:13.409963  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:58:13.409973  543705 cpu.go:282] Add success.
I0321 07:58:13.419721  543705 net.go:648] Add success.
I0321 07:58:13.422752  543705 net.go:770] primary dev: ETH0
I0321 07:58:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:58:13.422775  543705 net.go:698] Add success.
I0321 07:58:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:58:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:58:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 07:58:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:58:14.456489  543705 disk_worker.go:494] system disk:vda1
I0321 07:58:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:58:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:58:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:58:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:58:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:58:23.409804  543705 memory.go:184] no items to output this cycle
I0321 07:58:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 07:58:28.521240  543705 disk_info.go:125] begin check local disk info of client
I0321 07:58:28.523796  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:58:28.523802  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486340 0xc000486380]
E0321 07:58:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:58:33.409796  543705 memory.go:184] no items to output this cycle
I0321 07:58:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 07:58:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:58:43.409783  543705 memory.go:191] Add success.
I0321 07:58:43.409816  543705 cpu.go:282] Add success.
I0321 07:58:43.420052  543705 net.go:648] Add success.
I0321 07:58:43.422754  543705 net.go:770] primary dev: ETH0
I0321 07:58:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:58:43.422781  543705 net.go:698] Add success.
I0321 07:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:58:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:58:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:58:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:58:53.409764  543705 memory.go:184] no items to output this cycle
I0321 07:58:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 07:59:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:59:03.409785  543705 memory.go:184] no items to output this cycle
I0321 07:59:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 07:59:13.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:59:13.409926  543705 memory.go:191] Add success.
I0321 07:59:13.409930  543705 cpu.go:282] Add success.
W0321 07:59:13.409970  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 07:59:13.409993  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 07:59:13.409998  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 07:59:13.419717  543705 net.go:648] Add success.
I0321 07:59:13.422184  543705 net.go:770] primary dev: ETH0
I0321 07:59:13.422198  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:59:13.422209  543705 net.go:698] Add success.
I0321 07:59:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 07:59:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 07:59:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 07:59:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 07:59:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 07:59:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 07:59:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 07:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:59:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 07:59:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 07:59:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:59:23.409769  543705 memory.go:184] no items to output this cycle
I0321 07:59:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 07:59:28.524271  543705 disk_info.go:125] begin check local disk info of client
I0321 07:59:28.526767  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 07:59:28.526773  543705 disk_info.go:196] parse disk info done, disk is : [0xc000248540 0xc000248580]
E0321 07:59:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:59:33.409793  543705 memory.go:184] no items to output this cycle
I0321 07:59:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 07:59:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:59:43.409818  543705 memory.go:191] Add success.
I0321 07:59:43.409824  543705 cpu.go:282] Add success.
I0321 07:59:43.419978  543705 net.go:648] Add success.
I0321 07:59:43.422573  543705 net.go:770] primary dev: ETH0
I0321 07:59:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0321 07:59:43.422604  543705 net.go:698] Add success.
I0321 07:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 07:59:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 07:59:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 07:59:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 07:59:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 07:59:53.409784  543705 memory.go:184] no items to output this cycle
E0321 08:00:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:00:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 08:00:03.409782  543705 memory.go:184] no items to output this cycle
E0321 08:00:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:00:13.409793  543705 memory.go:191] Add success.
I0321 08:00:13.409795  543705 cpu.go:282] Add success.
W0321 08:00:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:00:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:00:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:00:13.420202  543705 net.go:648] Add success.
I0321 08:00:13.422797  543705 net.go:770] primary dev: ETH0
I0321 08:00:13.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:00:13.422827  543705 net.go:698] Add success.
I0321 08:00:13.784610  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"41347bde-1b70-4f77-b86b-779c8d4bbddb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:00:13.784646  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:00:14.453980  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:00:14.454157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:00:14.454245  543705 disk_worker.go:708] disk space is not compliant
W0321 08:00:14.454248  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:00:14.455756  543705 disk_worker.go:494] system disk:vda1
I0321 08:00:14.455785  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:00:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:00:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:00:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:00:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:00:23.409776  543705 memory.go:184] no items to output this cycle
I0321 08:00:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 08:00:28.527274  543705 disk_info.go:125] begin check local disk info of client
I0321 08:00:28.529801  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:00:28.529808  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370580 0xc0003705c0]
E0321 08:00:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:00:33.409801  543705 memory.go:184] no items to output this cycle
I0321 08:00:33.409816  543705 cpu.go:275] no items to output this cycle
I0321 08:00:38.782219  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:00:38.782227  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:00:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:00:43.410639  543705 memory.go:191] Add success.
I0321 08:00:43.409814  543705 cpu.go:282] Add success.
I0321 08:00:43.420333  543705 net.go:648] Add success.
I0321 08:00:43.422954  543705 net.go:770] primary dev: ETH0
I0321 08:00:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:00:43.422979  543705 net.go:698] Add success.
I0321 08:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:00:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:00:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:00:53.409804  543705 memory.go:184] no items to output this cycle
I0321 08:00:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 08:01:03.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:01:03.409915  543705 cpu.go:275] no items to output this cycle
I0321 08:01:03.409922  543705 memory.go:184] no items to output this cycle
W0321 08:01:13.409725  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:01:13.409744  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:01:13.409750  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:01:13.409827  543705 cpu.go:282] Add success.
E0321 08:01:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:01:13.409855  543705 memory.go:191] Add success.
I0321 08:01:13.420170  543705 net.go:648] Add success.
I0321 08:01:13.422920  543705 net.go:770] primary dev: ETH0
I0321 08:01:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:01:13.422947  543705 net.go:698] Add success.
I0321 08:01:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:01:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:01:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 08:01:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:01:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 08:01:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:01:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:01:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:01:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:01:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:01:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:01:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:01:23.409783  543705 memory.go:184] no items to output this cycle
I0321 08:01:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 08:01:28.530289  543705 disk_info.go:125] begin check local disk info of client
I0321 08:01:28.532873  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:01:28.532879  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be9c0 0xc0002bea00]
E0321 08:01:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:01:33.409784  543705 memory.go:184] no items to output this cycle
I0321 08:01:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 08:01:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:01:43.409829  543705 memory.go:191] Add success.
I0321 08:01:43.409835  543705 cpu.go:282] Add success.
I0321 08:01:43.419960  543705 net.go:648] Add success.
I0321 08:01:43.422917  543705 net.go:770] primary dev: ETH0
I0321 08:01:43.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:01:43.422943  543705 net.go:698] Add success.
I0321 08:01:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:01:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:01:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:01:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:01:53.409779  543705 memory.go:184] no items to output this cycle
I0321 08:01:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 08:02:03.409905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:02:03.409929  543705 memory.go:184] no items to output this cycle
I0321 08:02:03.409929  543705 cpu.go:275] no items to output this cycle
E0321 08:02:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:02:13.409787  543705 memory.go:191] Add success.
I0321 08:02:13.409796  543705 cpu.go:282] Add success.
W0321 08:02:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:02:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:02:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:02:13.420124  543705 net.go:648] Add success.
I0321 08:02:13.423139  543705 net.go:770] primary dev: ETH0
I0321 08:02:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:02:13.423162  543705 net.go:698] Add success.
W0321 08:02:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:02:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 08:02:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:02:14.455895  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:02:14.455903  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:02:14.455909  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:02:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 08:02:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:02:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:02:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:02:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:02:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:02:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:02:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:02:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:02:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:02:23.409773  543705 memory.go:184] no items to output this cycle
I0321 08:02:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 08:02:28.533298  543705 disk_info.go:125] begin check local disk info of client
I0321 08:02:28.535858  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:02:28.535864  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001b8300 0xc0001b8340]
E0321 08:02:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:02:33.409764  543705 memory.go:184] no items to output this cycle
I0321 08:02:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:02:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:02:43.409806  543705 memory.go:191] Add success.
I0321 08:02:43.409806  543705 cpu.go:282] Add success.
I0321 08:02:43.419848  543705 net.go:648] Add success.
I0321 08:02:43.422754  543705 net.go:770] primary dev: ETH0
I0321 08:02:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:02:43.422785  543705 net.go:698] Add success.
I0321 08:02:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:02:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:02:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:02:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:02:53.409779  543705 memory.go:184] no items to output this cycle
I0321 08:02:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 08:03:03.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:03:03.409891  543705 cpu.go:275] no items to output this cycle
I0321 08:03:03.410006  543705 memory.go:184] no items to output this cycle
E0321 08:03:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:03:13.409784  543705 memory.go:191] Add success.
W0321 08:03:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 08:03:13.409820  543705 cpu.go:282] Add success.
W0321 08:03:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:03:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:03:13.420132  543705 net.go:648] Add success.
I0321 08:03:13.422804  543705 net.go:770] primary dev: ETH0
I0321 08:03:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:03:13.422832  543705 net.go:698] Add success.
I0321 08:03:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:03:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:03:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 08:03:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:03:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 08:03:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:03:15.134601  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ce35c973-fb05-4db6-9690-71be91dd5a29","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:03:15.134639  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:03:15.454976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:03:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:03:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:03:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:03:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:03:23.409797  543705 memory.go:184] no items to output this cycle
I0321 08:03:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 08:03:28.536321  543705 disk_info.go:125] begin check local disk info of client
I0321 08:03:28.538836  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:03:28.538842  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aa740 0xc0003aa780]
E0321 08:03:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:03:33.409790  543705 memory.go:184] no items to output this cycle
I0321 08:03:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 08:03:38.783617  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:03:38.783625  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:03:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:03:43.410546  543705 memory.go:191] Add success.
I0321 08:03:43.409809  543705 cpu.go:282] Add success.
I0321 08:03:43.420261  543705 net.go:648] Add success.
I0321 08:03:43.422823  543705 net.go:770] primary dev: ETH0
I0321 08:03:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:03:43.422853  543705 net.go:698] Add success.
I0321 08:03:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:03:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:03:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:03:53.409772  543705 memory.go:184] no items to output this cycle
I0321 08:03:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 08:04:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:04:03.409779  543705 memory.go:184] no items to output this cycle
I0321 08:04:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 08:04:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:04:13.409793  543705 memory.go:191] Add success.
I0321 08:04:13.409798  543705 cpu.go:282] Add success.
W0321 08:04:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:04:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:04:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:04:13.420248  543705 net.go:648] Add success.
I0321 08:04:13.422946  543705 net.go:770] primary dev: ETH0
I0321 08:04:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:04:13.422971  543705 net.go:698] Add success.
I0321 08:04:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:04:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:04:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 08:04:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:04:14.456610  543705 disk_worker.go:494] system disk:vda1
I0321 08:04:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:04:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:04:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:04:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:04:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:04:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:04:23.409802  543705 memory.go:184] no items to output this cycle
I0321 08:04:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 08:04:28.539340  543705 disk_info.go:125] begin check local disk info of client
I0321 08:04:28.541892  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:04:28.541898  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002745c0 0xc000274600]
E0321 08:04:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:04:33.409796  543705 memory.go:184] no items to output this cycle
I0321 08:04:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 08:04:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:04:43.409796  543705 memory.go:191] Add success.
I0321 08:04:43.409811  543705 cpu.go:282] Add success.
I0321 08:04:43.420081  543705 net.go:648] Add success.
I0321 08:04:43.422753  543705 net.go:770] primary dev: ETH0
I0321 08:04:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:04:43.422782  543705 net.go:698] Add success.
I0321 08:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:04:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:04:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:04:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:04:53.409796  543705 memory.go:184] no items to output this cycle
I0321 08:04:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 08:05:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:05:03.409766  543705 memory.go:184] no items to output this cycle
I0321 08:05:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:05:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:05:13.409779  543705 memory.go:191] Add success.
I0321 08:05:13.409800  543705 cpu.go:282] Add success.
W0321 08:05:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:05:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:05:13.420160  543705 net.go:648] Add success.
I0321 08:05:13.423037  543705 net.go:770] primary dev: ETH0
I0321 08:05:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:05:13.423070  543705 net.go:698] Add success.
I0321 08:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:05:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:05:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0321 08:05:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:05:14.456624  543705 disk_worker.go:494] system disk:vda1
I0321 08:05:14.456658  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:05:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:05:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:05:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:05:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:05:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 08:05:23.409788  543705 memory.go:184] no items to output this cycle
I0321 08:05:28.542386  543705 disk_info.go:125] begin check local disk info of client
I0321 08:05:28.544871  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:05:28.544877  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f300 0xc00039f340]
E0321 08:05:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:05:33.409789  543705 memory.go:184] no items to output this cycle
I0321 08:05:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:05:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:05:43.409789  543705 memory.go:191] Add success.
I0321 08:05:43.409810  543705 cpu.go:282] Add success.
I0321 08:05:43.419889  543705 net.go:648] Add success.
I0321 08:05:43.422684  543705 net.go:770] primary dev: ETH0
I0321 08:05:43.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:05:43.422711  543705 net.go:698] Add success.
I0321 08:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:05:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:05:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:05:53.409771  543705 memory.go:184] no items to output this cycle
I0321 08:05:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:06:03.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:06:03.409879  543705 memory.go:184] no items to output this cycle
I0321 08:06:03.409921  543705 cpu.go:275] no items to output this cycle
E0321 08:06:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:06:13.409777  543705 memory.go:191] Add success.
W0321 08:06:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:06:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:06:13.409814  543705 cpu.go:282] Add success.
I0321 08:06:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:06:13.420127  543705 net.go:648] Add success.
I0321 08:06:13.422937  543705 net.go:770] primary dev: ETH0
I0321 08:06:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:06:13.422963  543705 net.go:698] Add success.
I0321 08:06:13.465921  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a788c95-6dba-49c5-92a7-5bc7923b0157","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:06:13.465954  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:06:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:06:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:06:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 08:06:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:06:14.456730  543705 disk_worker.go:494] system disk:vda1
I0321 08:06:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:06:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:06:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:06:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:06:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:06:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:06:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:06:23.409769  543705 memory.go:184] no items to output this cycle
I0321 08:06:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 08:06:28.545368  543705 disk_info.go:125] begin check local disk info of client
I0321 08:06:28.547843  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:06:28.547850  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4180 0xc0000c41c0]
E0321 08:06:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:06:33.409791  543705 memory.go:184] no items to output this cycle
I0321 08:06:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 08:06:38.784629  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:06:38.784636  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:06:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:06:43.410673  543705 memory.go:191] Add success.
I0321 08:06:43.409812  543705 cpu.go:282] Add success.
I0321 08:06:43.420448  543705 net.go:648] Add success.
I0321 08:06:43.423311  543705 net.go:770] primary dev: ETH0
I0321 08:06:43.423326  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:06:43.423342  543705 net.go:698] Add success.
I0321 08:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:06:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:06:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:06:53.409798  543705 memory.go:184] no items to output this cycle
I0321 08:06:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:07:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:07:03.409787  543705 memory.go:184] no items to output this cycle
I0321 08:07:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 08:07:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:07:13.409788  543705 memory.go:191] Add success.
I0321 08:07:13.409793  543705 cpu.go:282] Add success.
W0321 08:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:07:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:07:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:07:13.420252  543705 net.go:648] Add success.
I0321 08:07:13.423205  543705 net.go:770] primary dev: ETH0
I0321 08:07:13.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:07:13.423231  543705 net.go:698] Add success.
I0321 08:07:13.452857  543705 event_worker.go:152] Polling the log file for events...
W0321 08:07:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:07:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 08:07:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:07:14.456934  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:07:14.456943  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:07:14.456950  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:07:14.456990  543705 disk_worker.go:494] system disk:vda1
I0321 08:07:14.457021  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:07:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:07:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:07:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:07:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:07:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:07:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:07:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:07:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:07:23.409795  543705 memory.go:184] no items to output this cycle
I0321 08:07:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 08:07:28.548379  543705 disk_info.go:125] begin check local disk info of client
I0321 08:07:28.550902  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:07:28.550908  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e180 0xc00047e1c0]
E0321 08:07:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:07:33.409770  543705 memory.go:184] no items to output this cycle
I0321 08:07:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 08:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:07:43.409794  543705 memory.go:191] Add success.
I0321 08:07:43.409795  543705 cpu.go:282] Add success.
I0321 08:07:43.419869  543705 net.go:648] Add success.
I0321 08:07:43.422749  543705 net.go:770] primary dev: ETH0
I0321 08:07:43.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:07:43.422779  543705 net.go:698] Add success.
I0321 08:07:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:07:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:07:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:07:53.410223  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:07:53.410238  543705 memory.go:184] no items to output this cycle
I0321 08:07:53.410267  543705 cpu.go:275] no items to output this cycle
E0321 08:08:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:08:03.409779  543705 memory.go:184] no items to output this cycle
I0321 08:08:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 08:08:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:08:13.409815  543705 memory.go:191] Add success.
I0321 08:08:13.409821  543705 cpu.go:282] Add success.
W0321 08:08:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:08:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:08:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:08:13.420486  543705 net.go:648] Add success.
I0321 08:08:13.423233  543705 net.go:770] primary dev: ETH0
I0321 08:08:13.423246  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:08:13.423258  543705 net.go:698] Add success.
I0321 08:08:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:08:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:08:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 08:08:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:08:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 08:08:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:08:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:08:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:08:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:08:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:08:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:08:23.409781  543705 memory.go:184] no items to output this cycle
I0321 08:08:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 08:08:28.551394  543705 disk_info.go:125] begin check local disk info of client
I0321 08:08:28.553849  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:08:28.553855  543705 disk_info.go:196] parse disk info done, disk is : [0xc000251000 0xc000251040]
E0321 08:08:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:08:33.409776  543705 memory.go:184] no items to output this cycle
I0321 08:08:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:08:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:08:43.409788  543705 memory.go:191] Add success.
I0321 08:08:43.409809  543705 cpu.go:282] Add success.
I0321 08:08:43.420046  543705 net.go:648] Add success.
I0321 08:08:43.422596  543705 net.go:770] primary dev: ETH0
I0321 08:08:43.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:08:43.422622  543705 net.go:698] Add success.
I0321 08:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:08:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:08:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:08:53.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:08:53.410381  543705 memory.go:184] no items to output this cycle
I0321 08:08:53.410362  543705 cpu.go:275] no items to output this cycle
E0321 08:09:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:09:03.409787  543705 memory.go:184] no items to output this cycle
I0321 08:09:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 08:09:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:09:13.409824  543705 memory.go:191] Add success.
I0321 08:09:13.409830  543705 cpu.go:282] Add success.
W0321 08:09:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:09:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:09:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:09:13.420303  543705 net.go:648] Add success.
I0321 08:09:13.423064  543705 net.go:770] primary dev: ETH0
I0321 08:09:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:09:13.423090  543705 net.go:698] Add success.
I0321 08:09:13.493057  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3c2713c7-0587-4b0e-a8e4-e8c0d9943190","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:09:13.493097  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:09:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:09:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:09:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 08:09:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:09:14.456514  543705 disk_worker.go:494] system disk:vda1
I0321 08:09:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:09:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:09:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:09:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:09:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:09:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:09:23.409766  543705 memory.go:184] no items to output this cycle
I0321 08:09:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 08:09:28.554409  543705 disk_info.go:125] begin check local disk info of client
I0321 08:09:28.556865  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:09:28.556871  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8480 0xc0003b84c0]
E0321 08:09:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:09:33.409789  543705 memory.go:184] no items to output this cycle
I0321 08:09:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 08:09:38.785634  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:09:38.785642  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:09:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:09:43.410540  543705 memory.go:191] Add success.
I0321 08:09:43.409812  543705 cpu.go:282] Add success.
I0321 08:09:43.420241  543705 net.go:648] Add success.
I0321 08:09:43.423253  543705 net.go:770] primary dev: ETH0
I0321 08:09:43.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:09:43.423278  543705 net.go:698] Add success.
I0321 08:09:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:09:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:09:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:09:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:09:53.409783  543705 memory.go:184] no items to output this cycle
I0321 08:09:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 08:10:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:10:03.409800  543705 memory.go:184] no items to output this cycle
I0321 08:10:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 08:10:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:10:13.409789  543705 memory.go:191] Add success.
I0321 08:10:13.409813  543705 cpu.go:282] Add success.
W0321 08:10:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:10:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:10:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:10:13.420113  543705 net.go:648] Add success.
I0321 08:10:13.423195  543705 net.go:770] primary dev: ETH0
I0321 08:10:13.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:10:13.423221  543705 net.go:698] Add success.
I0321 08:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:10:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:10:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 08:10:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:10:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 08:10:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:10:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:10:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:10:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:10:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:10:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:10:23.409767  543705 memory.go:184] no items to output this cycle
I0321 08:10:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 08:10:28.557433  543705 disk_info.go:125] begin check local disk info of client
I0321 08:10:28.559897  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:10:28.559904  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8740 0xc0002b8780]
E0321 08:10:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:10:33.409783  543705 memory.go:184] no items to output this cycle
I0321 08:10:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 08:10:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:10:43.409795  543705 memory.go:191] Add success.
I0321 08:10:43.409815  543705 cpu.go:282] Add success.
I0321 08:10:43.420147  543705 net.go:648] Add success.
I0321 08:10:43.423117  543705 net.go:770] primary dev: ETH0
I0321 08:10:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:10:43.423143  543705 net.go:698] Add success.
I0321 08:10:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:10:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:10:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:10:53.409786  543705 memory.go:184] no items to output this cycle
I0321 08:10:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 08:11:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:11:03.409803  543705 memory.go:184] no items to output this cycle
I0321 08:11:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 08:11:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:11:13.409791  543705 memory.go:191] Add success.
W0321 08:11:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 08:11:13.409822  543705 cpu.go:282] Add success.
W0321 08:11:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:11:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:11:13.420066  543705 net.go:648] Add success.
I0321 08:11:13.422802  543705 net.go:770] primary dev: ETH0
I0321 08:11:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:11:13.422828  543705 net.go:698] Add success.
I0321 08:11:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:11:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:11:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 08:11:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:11:14.456537  543705 disk_worker.go:494] system disk:vda1
I0321 08:11:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:11:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:11:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:11:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:11:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:11:23.409801  543705 memory.go:184] no items to output this cycle
I0321 08:11:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 08:11:28.560448  543705 disk_info.go:125] begin check local disk info of client
I0321 08:11:28.562925  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:11:28.562931  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9c00 0xc0003e9c40]
E0321 08:11:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:11:33.409771  543705 memory.go:184] no items to output this cycle
I0321 08:11:33.409776  543705 cpu.go:275] no items to output this cycle
E0321 08:11:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:11:43.409784  543705 memory.go:191] Add success.
I0321 08:11:43.409810  543705 cpu.go:282] Add success.
I0321 08:11:43.419989  543705 net.go:648] Add success.
I0321 08:11:43.422775  543705 net.go:770] primary dev: ETH0
I0321 08:11:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:11:43.422801  543705 net.go:698] Add success.
I0321 08:11:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:11:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:11:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:11:53.409799  543705 memory.go:184] no items to output this cycle
I0321 08:11:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 08:12:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:12:03.409771  543705 memory.go:184] no items to output this cycle
I0321 08:12:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 08:12:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:12:13.409825  543705 memory.go:191] Add success.
I0321 08:12:13.409828  543705 cpu.go:282] Add success.
W0321 08:12:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:12:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:12:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:12:13.420151  543705 net.go:648] Add success.
I0321 08:12:13.422959  543705 net.go:770] primary dev: ETH0
I0321 08:12:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:12:13.422983  543705 net.go:698] Add success.
I0321 08:12:13.906109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6bd13cdf-529f-4c3f-abb4-c46abad5a332","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:12:13.906141  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 08:12:14.454929  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:12:14.454938  543705 disk_worker.go:708] disk space is not compliant
W0321 08:12:14.454941  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:12:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:12:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:12:14.455897  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:12:14.456245  543705 disk_worker.go:494] system disk:vda1
I0321 08:12:14.456273  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:12:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:12:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:12:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:12:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:12:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:12:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:12:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:12:23.410290  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:12:23.410306  543705 memory.go:184] no items to output this cycle
I0321 08:12:23.410319  543705 cpu.go:275] no items to output this cycle
I0321 08:12:28.563453  543705 disk_info.go:125] begin check local disk info of client
I0321 08:12:28.565940  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:12:28.565947  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ee80 0xc00047eec0]
E0321 08:12:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:12:33.409805  543705 memory.go:184] no items to output this cycle
I0321 08:12:33.409816  543705 cpu.go:275] no items to output this cycle
I0321 08:12:38.786794  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:12:38.786802  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:12:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:12:43.410617  543705 memory.go:191] Add success.
I0321 08:12:43.409828  543705 cpu.go:282] Add success.
I0321 08:12:43.420534  543705 net.go:648] Add success.
I0321 08:12:43.423294  543705 net.go:770] primary dev: ETH0
I0321 08:12:43.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:12:43.423318  543705 net.go:698] Add success.
I0321 08:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:12:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:12:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:12:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:12:53.409782  543705 memory.go:184] no items to output this cycle
I0321 08:12:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:13:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:13:03.409787  543705 memory.go:184] no items to output this cycle
I0321 08:13:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 08:13:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:13:13.409829  543705 memory.go:191] Add success.
I0321 08:13:13.409844  543705 cpu.go:282] Add success.
W0321 08:13:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:13:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:13:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:13:13.420064  543705 net.go:648] Add success.
I0321 08:13:13.422931  543705 net.go:770] primary dev: ETH0
I0321 08:13:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:13:13.422956  543705 net.go:698] Add success.
I0321 08:13:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:13:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:13:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 08:13:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:13:14.456559  543705 disk_worker.go:494] system disk:vda1
I0321 08:13:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:13:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:13:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:13:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:13:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:13:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:13:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:13:23.409802  543705 memory.go:184] no items to output this cycle
I0321 08:13:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 08:13:28.566473  543705 disk_info.go:125] begin check local disk info of client
I0321 08:13:28.568988  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:13:28.568994  543705 disk_info.go:196] parse disk info done, disk is : [0xc00023ee80 0xc00023eec0]
E0321 08:13:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:13:33.409788  543705 memory.go:184] no items to output this cycle
I0321 08:13:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 08:13:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:13:43.409809  543705 memory.go:191] Add success.
I0321 08:13:43.409811  543705 cpu.go:282] Add success.
I0321 08:13:43.420067  543705 net.go:648] Add success.
I0321 08:13:43.423171  543705 net.go:770] primary dev: ETH0
I0321 08:13:43.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:13:43.423195  543705 net.go:698] Add success.
I0321 08:13:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:13:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:13:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:13:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:13:53.409776  543705 memory.go:184] no items to output this cycle
I0321 08:13:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 08:14:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:14:03.409774  543705 memory.go:184] no items to output this cycle
I0321 08:14:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 08:14:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:14:13.409786  543705 memory.go:191] Add success.
I0321 08:14:13.409822  543705 cpu.go:282] Add success.
W0321 08:14:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:14:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:14:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:14:13.420142  543705 net.go:648] Add success.
I0321 08:14:13.422788  543705 net.go:770] primary dev: ETH0
I0321 08:14:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:14:13.422824  543705 net.go:698] Add success.
I0321 08:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:14:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:14:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 08:14:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:14:14.456492  543705 disk_worker.go:494] system disk:vda1
I0321 08:14:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:14:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:14:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:14:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:14:23.409775  543705 memory.go:184] no items to output this cycle
I0321 08:14:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 08:14:28.569488  543705 disk_info.go:125] begin check local disk info of client
I0321 08:14:28.571992  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:14:28.571999  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484f00 0xc000484f40]
E0321 08:14:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:14:33.409791  543705 memory.go:184] no items to output this cycle
I0321 08:14:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 08:14:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:14:43.409788  543705 memory.go:191] Add success.
I0321 08:14:43.409813  543705 cpu.go:282] Add success.
I0321 08:14:43.419713  543705 net.go:648] Add success.
I0321 08:14:43.422247  543705 net.go:770] primary dev: ETH0
I0321 08:14:43.422259  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:14:43.422271  543705 net.go:698] Add success.
I0321 08:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:14:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:14:53.409774  543705 memory.go:184] no items to output this cycle
I0321 08:14:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 08:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:15:03.409777  543705 memory.go:184] no items to output this cycle
I0321 08:15:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:15:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:15:13.409824  543705 memory.go:191] Add success.
I0321 08:15:13.409829  543705 cpu.go:282] Add success.
W0321 08:15:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:15:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:15:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:15:13.420165  543705 net.go:648] Add success.
I0321 08:15:13.422903  543705 net.go:770] primary dev: ETH0
I0321 08:15:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:15:13.422927  543705 net.go:698] Add success.
I0321 08:15:13.469932  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b1d50ee4-dfc0-4580-a03a-4646457f7f61","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:15:13.469967  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:15:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:15:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 08:15:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:15:14.456666  543705 disk_worker.go:494] system disk:vda1
I0321 08:15:14.456695  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:15:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:15:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:15:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:15:23.409775  543705 memory.go:184] no items to output this cycle
I0321 08:15:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 08:15:28.572494  543705 disk_info.go:125] begin check local disk info of client
I0321 08:15:28.575013  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:15:28.575020  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004970c0 0xc000497100]
E0321 08:15:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:15:33.409786  543705 memory.go:184] no items to output this cycle
I0321 08:15:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 08:15:38.788646  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:15:38.788653  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:15:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:15:43.410667  543705 memory.go:191] Add success.
I0321 08:15:43.409934  543705 cpu.go:282] Add success.
I0321 08:15:43.419716  543705 net.go:648] Add success.
I0321 08:15:43.422444  543705 net.go:770] primary dev: ETH0
I0321 08:15:43.422457  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:15:43.422469  543705 net.go:698] Add success.
I0321 08:15:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:15:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:15:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:15:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:15:53.409775  543705 memory.go:184] no items to output this cycle
I0321 08:15:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 08:16:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:16:03.409778  543705 memory.go:184] no items to output this cycle
I0321 08:16:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:16:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:16:13.409817  543705 memory.go:191] Add success.
I0321 08:16:13.409830  543705 cpu.go:282] Add success.
W0321 08:16:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:16:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:16:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:16:13.420142  543705 net.go:648] Add success.
I0321 08:16:13.423008  543705 net.go:770] primary dev: ETH0
I0321 08:16:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:16:13.423048  543705 net.go:698] Add success.
I0321 08:16:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:16:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:16:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 08:16:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:16:14.456603  543705 disk_worker.go:494] system disk:vda1
I0321 08:16:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:16:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:16:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:16:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:16:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:16:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:16:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:16:23.409772  543705 memory.go:184] no items to output this cycle
I0321 08:16:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 08:16:28.575508  543705 disk_info.go:125] begin check local disk info of client
I0321 08:16:28.578021  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:16:28.578027  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9c80 0xc0003b9cc0]
E0321 08:16:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:16:33.409760  543705 memory.go:184] no items to output this cycle
I0321 08:16:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 08:16:43.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:16:43.409929  543705 memory.go:191] Add success.
I0321 08:16:43.409956  543705 cpu.go:282] Add success.
I0321 08:16:43.419733  543705 net.go:648] Add success.
I0321 08:16:43.422393  543705 net.go:770] primary dev: ETH0
I0321 08:16:43.422408  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:16:43.422422  543705 net.go:698] Add success.
I0321 08:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:16:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:16:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:16:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:16:53.409771  543705 memory.go:184] no items to output this cycle
I0321 08:16:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 08:17:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:17:03.409784  543705 memory.go:184] no items to output this cycle
I0321 08:17:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 08:17:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:17:13.409786  543705 memory.go:191] Add success.
W0321 08:17:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 08:17:13.409812  543705 cpu.go:282] Add success.
W0321 08:17:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:17:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:17:13.420251  543705 net.go:648] Add success.
I0321 08:17:13.423325  543705 net.go:770] primary dev: ETH0
I0321 08:17:13.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:17:13.423350  543705 net.go:698] Add success.
I0321 08:17:13.453239  543705 event_worker.go:152] Polling the log file for events...
W0321 08:17:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:17:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 08:17:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:17:14.456828  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:17:14.456838  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:17:14.456844  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:17:14.456888  543705 disk_worker.go:494] system disk:vda1
I0321 08:17:14.456929  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:17:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:17:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:17:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:17:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:17:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:17:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:17:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:17:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:17:23.409773  543705 memory.go:184] no items to output this cycle
I0321 08:17:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 08:17:28.578531  543705 disk_info.go:125] begin check local disk info of client
I0321 08:17:28.580983  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:17:28.580989  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466700 0xc000466740]
E0321 08:17:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:17:33.409799  543705 memory.go:184] no items to output this cycle
I0321 08:17:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 08:17:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:17:43.409828  543705 memory.go:191] Add success.
I0321 08:17:43.409834  543705 cpu.go:282] Add success.
I0321 08:17:43.420061  543705 net.go:648] Add success.
I0321 08:17:43.422881  543705 net.go:770] primary dev: ETH0
I0321 08:17:43.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:17:43.422908  543705 net.go:698] Add success.
I0321 08:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:17:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:17:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:17:53.409791  543705 memory.go:184] no items to output this cycle
I0321 08:17:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 08:18:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:18:03.409771  543705 memory.go:184] no items to output this cycle
I0321 08:18:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:18:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:18:13.409778  543705 memory.go:191] Add success.
I0321 08:18:13.409790  543705 cpu.go:282] Add success.
W0321 08:18:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:18:13.412482  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:18:13.412487  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:18:13.420052  543705 net.go:648] Add success.
I0321 08:18:13.421733  543705 net.go:770] primary dev: ETH0
I0321 08:18:13.421746  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:18:13.421758  543705 net.go:698] Add success.
I0321 08:18:13.465102  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"485e4d0d-c0b3-443e-956e-f7a287d3f3da","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:18:13.465135  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:18:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:18:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:18:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 08:18:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:18:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 08:18:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:18:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:18:16.458216  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:18:16.458273  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:18:16.458292  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:18:16.472607  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:18:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:18:23.409757  543705 memory.go:184] no items to output this cycle
I0321 08:18:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 08:18:28.581548  543705 disk_info.go:125] begin check local disk info of client
I0321 08:18:28.583959  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:18:28.583964  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2640 0xc0003b2680]
E0321 08:18:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:18:33.409793  543705 memory.go:184] no items to output this cycle
I0321 08:18:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 08:18:38.789662  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:18:38.789670  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:18:43.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:18:43.410742  543705 memory.go:191] Add success.
I0321 08:18:43.409998  543705 cpu.go:282] Add success.
I0321 08:18:43.419712  543705 net.go:648] Add success.
I0321 08:18:43.422264  543705 net.go:770] primary dev: ETH0
I0321 08:18:43.422280  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:18:43.422294  543705 net.go:698] Add success.
I0321 08:18:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:18:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:18:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:18:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:18:53.409773  543705 memory.go:184] no items to output this cycle
I0321 08:18:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 08:19:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:19:03.409807  543705 memory.go:184] no items to output this cycle
I0321 08:19:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 08:19:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:19:13.409821  543705 memory.go:191] Add success.
I0321 08:19:13.409824  543705 cpu.go:282] Add success.
W0321 08:19:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:19:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:19:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:19:13.420468  543705 net.go:648] Add success.
I0321 08:19:13.424035  543705 net.go:770] primary dev: ETH0
I0321 08:19:13.424048  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:19:13.424061  543705 net.go:698] Add success.
I0321 08:19:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:19:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:19:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 08:19:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:19:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 08:19:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:19:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:19:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:19:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:19:23.409797  543705 memory.go:184] no items to output this cycle
I0321 08:19:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 08:19:28.584552  543705 disk_info.go:125] begin check local disk info of client
I0321 08:19:28.587023  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:19:28.587029  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353400 0xc000353440]
E0321 08:19:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:19:33.409793  543705 memory.go:184] no items to output this cycle
I0321 08:19:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 08:19:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:19:43.409796  543705 memory.go:191] Add success.
I0321 08:19:43.409814  543705 cpu.go:282] Add success.
I0321 08:19:43.420062  543705 net.go:648] Add success.
I0321 08:19:43.423278  543705 net.go:770] primary dev: ETH0
I0321 08:19:43.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:19:43.423317  543705 net.go:698] Add success.
I0321 08:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:19:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:19:53.409773  543705 memory.go:184] no items to output this cycle
I0321 08:19:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:20:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:20:03.409801  543705 memory.go:184] no items to output this cycle
I0321 08:20:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 08:20:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:20:13.409790  543705 cpu.go:282] Add success.
I0321 08:20:13.409795  543705 memory.go:191] Add success.
W0321 08:20:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:20:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:20:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:20:13.420035  543705 net.go:648] Add success.
I0321 08:20:13.422776  543705 net.go:770] primary dev: ETH0
I0321 08:20:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:20:13.422802  543705 net.go:698] Add success.
I0321 08:20:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:20:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:20:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 08:20:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:20:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 08:20:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:20:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:20:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:20:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:20:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:20:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:20:23.409799  543705 memory.go:184] no items to output this cycle
I0321 08:20:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 08:20:28.587576  543705 disk_info.go:125] begin check local disk info of client
I0321 08:20:28.590092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:20:28.590098  543705 disk_info.go:196] parse disk info done, disk is : [0xc000251500 0xc000251540]
E0321 08:20:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:20:33.409777  543705 memory.go:184] no items to output this cycle
I0321 08:20:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 08:20:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:20:43.409825  543705 memory.go:191] Add success.
I0321 08:20:43.409826  543705 cpu.go:282] Add success.
I0321 08:20:43.419986  543705 net.go:648] Add success.
I0321 08:20:43.422945  543705 net.go:770] primary dev: ETH0
I0321 08:20:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:20:43.422971  543705 net.go:698] Add success.
I0321 08:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:20:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:20:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:20:53.409794  543705 memory.go:184] no items to output this cycle
I0321 08:20:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 08:21:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:21:03.409789  543705 memory.go:184] no items to output this cycle
I0321 08:21:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 08:21:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:21:13.409807  543705 memory.go:191] Add success.
I0321 08:21:13.409815  543705 cpu.go:282] Add success.
W0321 08:21:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:21:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:21:13.420534  543705 net.go:648] Add success.
I0321 08:21:13.423080  543705 net.go:770] primary dev: ETH0
I0321 08:21:13.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:21:13.423104  543705 net.go:698] Add success.
I0321 08:21:13.470869  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fb1fb321-ac95-49f4-95af-7317141c9f87","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:21:13.470905  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:21:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:21:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:21:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 08:21:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:21:14.456531  543705 disk_worker.go:494] system disk:vda1
I0321 08:21:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:21:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:21:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:21:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:21:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:21:23.409773  543705 cpu.go:275] no items to output this cycle
I0321 08:21:23.409782  543705 memory.go:184] no items to output this cycle
I0321 08:21:28.590604  543705 disk_info.go:125] begin check local disk info of client
I0321 08:21:28.593030  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:21:28.593035  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465940 0xc000465980]
E0321 08:21:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:21:33.409794  543705 memory.go:184] no items to output this cycle
I0321 08:21:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 08:21:38.790795  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:21:38.790803  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:21:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:21:43.410523  543705 memory.go:191] Add success.
I0321 08:21:43.409830  543705 cpu.go:282] Add success.
I0321 08:21:43.420217  543705 net.go:648] Add success.
I0321 08:21:43.422647  543705 net.go:770] primary dev: ETH0
I0321 08:21:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:21:43.422676  543705 net.go:698] Add success.
I0321 08:21:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:21:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:21:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:21:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:21:53.409914  543705 memory.go:184] no items to output this cycle
I0321 08:21:53.409966  543705 cpu.go:275] no items to output this cycle
E0321 08:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:22:03.409784  543705 memory.go:184] no items to output this cycle
I0321 08:22:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 08:22:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:22:13.409810  543705 memory.go:191] Add success.
I0321 08:22:13.409815  543705 cpu.go:282] Add success.
W0321 08:22:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:22:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:22:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:22:13.420103  543705 net.go:648] Add success.
I0321 08:22:13.422747  543705 net.go:770] primary dev: ETH0
I0321 08:22:13.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:22:13.422776  543705 net.go:698] Add success.
W0321 08:22:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:22:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 08:22:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:22:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:22:14.455886  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:22:14.455892  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:22:14.456544  543705 disk_worker.go:494] system disk:vda1
I0321 08:22:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:22:15.456773  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:22:15.456781  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:22:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:22:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:22:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:22:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:22:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:22:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:22:23.409795  543705 memory.go:184] no items to output this cycle
I0321 08:22:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 08:22:28.593605  543705 disk_info.go:125] begin check local disk info of client
I0321 08:22:28.596119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:22:28.596125  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b700 0xc00032b740]
E0321 08:22:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:22:33.409766  543705 memory.go:184] no items to output this cycle
I0321 08:22:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 08:22:43.410424  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:22:43.410453  543705 memory.go:191] Add success.
I0321 08:22:43.410470  543705 cpu.go:282] Add success.
I0321 08:22:43.420697  543705 net.go:648] Add success.
I0321 08:22:43.423545  543705 net.go:770] primary dev: ETH0
I0321 08:22:43.423558  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:22:43.423570  543705 net.go:698] Add success.
I0321 08:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:22:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:22:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:22:53.409765  543705 memory.go:184] no items to output this cycle
I0321 08:22:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:23:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:23:03.409779  543705 memory.go:184] no items to output this cycle
I0321 08:23:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 08:23:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:23:13.409797  543705 memory.go:191] Add success.
I0321 08:23:13.409811  543705 cpu.go:282] Add success.
W0321 08:23:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:23:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:23:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:23:13.420211  543705 net.go:648] Add success.
I0321 08:23:13.423003  543705 net.go:770] primary dev: ETH0
I0321 08:23:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:23:13.423032  543705 net.go:698] Add success.
I0321 08:23:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:23:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:23:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 08:23:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:23:14.456572  543705 disk_worker.go:494] system disk:vda1
I0321 08:23:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:23:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:23:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:23:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:23:23.409770  543705 memory.go:184] no items to output this cycle
I0321 08:23:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 08:23:28.596622  543705 disk_info.go:125] begin check local disk info of client
I0321 08:23:28.599080  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:23:28.599086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bcbc0 0xc0002bcc00]
E0321 08:23:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:23:33.409791  543705 memory.go:184] no items to output this cycle
I0321 08:23:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 08:23:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:23:43.409802  543705 memory.go:191] Add success.
I0321 08:23:43.409817  543705 cpu.go:282] Add success.
I0321 08:23:43.420022  543705 net.go:648] Add success.
I0321 08:23:43.422852  543705 net.go:770] primary dev: ETH0
I0321 08:23:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:23:43.422879  543705 net.go:698] Add success.
I0321 08:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:23:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:23:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:23:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:23:53.409779  543705 memory.go:184] no items to output this cycle
I0321 08:23:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 08:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:24:03.409782  543705 memory.go:184] no items to output this cycle
I0321 08:24:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:24:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:24:13.409782  543705 memory.go:191] Add success.
I0321 08:24:13.409806  543705 cpu.go:282] Add success.
W0321 08:24:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:24:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:24:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:24:13.420109  543705 net.go:648] Add success.
I0321 08:24:13.422606  543705 net.go:770] primary dev: ETH0
I0321 08:24:13.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:24:13.422630  543705 net.go:698] Add success.
I0321 08:24:13.471057  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f412c1e3-bda0-4011-a727-328bd129fb53","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:24:13.471091  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:24:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:24:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:24:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 08:24:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:24:14.456692  543705 disk_worker.go:494] system disk:vda1
I0321 08:24:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:24:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:24:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:24:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:24:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:24:23.409798  543705 memory.go:184] no items to output this cycle
I0321 08:24:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 08:24:28.599645  543705 disk_info.go:125] begin check local disk info of client
I0321 08:24:28.602129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:24:28.602135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003abc00 0xc0003abc40]
E0321 08:24:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:24:33.409797  543705 memory.go:184] no items to output this cycle
I0321 08:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 08:24:38.792688  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:24:38.792695  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:24:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:24:43.410841  543705 memory.go:191] Add success.
I0321 08:24:43.409834  543705 cpu.go:282] Add success.
I0321 08:24:43.420883  543705 net.go:648] Add success.
I0321 08:24:43.423417  543705 net.go:770] primary dev: ETH0
I0321 08:24:43.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:24:43.423443  543705 net.go:698] Add success.
I0321 08:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:24:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:24:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:24:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:24:53.409795  543705 cpu.go:275] no items to output this cycle
I0321 08:24:53.409800  543705 memory.go:184] no items to output this cycle
E0321 08:25:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:25:03.409785  543705 memory.go:184] no items to output this cycle
I0321 08:25:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 08:25:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:25:13.409809  543705 memory.go:191] Add success.
I0321 08:25:13.409812  543705 cpu.go:282] Add success.
W0321 08:25:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:25:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:25:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:25:13.420064  543705 net.go:648] Add success.
I0321 08:25:13.422667  543705 net.go:770] primary dev: ETH0
I0321 08:25:13.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:25:13.422692  543705 net.go:698] Add success.
I0321 08:25:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:25:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:25:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 08:25:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:25:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 08:25:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:25:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:25:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:25:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:25:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:25:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:25:23.409801  543705 memory.go:184] no items to output this cycle
I0321 08:25:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 08:25:28.602650  543705 disk_info.go:125] begin check local disk info of client
I0321 08:25:28.605139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:25:28.605145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba380 0xc0002ba3c0]
E0321 08:25:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:25:33.409806  543705 memory.go:184] no items to output this cycle
I0321 08:25:33.409830  543705 cpu.go:275] no items to output this cycle
E0321 08:25:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:25:43.409808  543705 memory.go:191] Add success.
I0321 08:25:43.409839  543705 cpu.go:282] Add success.
I0321 08:25:43.420135  543705 net.go:648] Add success.
I0321 08:25:43.422954  543705 net.go:770] primary dev: ETH0
I0321 08:25:43.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:25:43.422980  543705 net.go:698] Add success.
I0321 08:25:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:25:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:25:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:25:53.409807  543705 memory.go:184] no items to output this cycle
I0321 08:25:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 08:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:26:03.409776  543705 memory.go:184] no items to output this cycle
I0321 08:26:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 08:26:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:26:13.409808  543705 memory.go:191] Add success.
I0321 08:26:13.409824  543705 cpu.go:282] Add success.
W0321 08:26:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:26:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:26:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:26:13.420098  543705 net.go:648] Add success.
I0321 08:26:13.422893  543705 net.go:770] primary dev: ETH0
I0321 08:26:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:26:13.422919  543705 net.go:698] Add success.
I0321 08:26:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:26:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:26:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 08:26:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:26:14.456559  543705 disk_worker.go:494] system disk:vda1
I0321 08:26:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:26:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:26:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:26:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:26:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:26:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:26:23.409778  543705 memory.go:184] no items to output this cycle
I0321 08:26:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 08:26:28.605670  543705 disk_info.go:125] begin check local disk info of client
I0321 08:26:28.608163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:26:28.608169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486940 0xc000486980]
E0321 08:26:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:26:33.409770  543705 memory.go:184] no items to output this cycle
I0321 08:26:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 08:26:43.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:26:43.409928  543705 memory.go:191] Add success.
I0321 08:26:43.409968  543705 cpu.go:282] Add success.
I0321 08:26:43.419723  543705 net.go:648] Add success.
I0321 08:26:43.423126  543705 net.go:770] primary dev: ETH0
I0321 08:26:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:26:43.423151  543705 net.go:698] Add success.
I0321 08:26:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:26:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:26:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:26:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:26:53.409784  543705 memory.go:184] no items to output this cycle
I0321 08:26:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 08:27:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:27:03.409777  543705 memory.go:184] no items to output this cycle
I0321 08:27:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 08:27:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:27:13.409786  543705 memory.go:191] Add success.
I0321 08:27:13.409789  543705 cpu.go:282] Add success.
W0321 08:27:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:27:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:27:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:27:13.420091  543705 net.go:648] Add success.
I0321 08:27:13.422894  543705 net.go:770] primary dev: ETH0
I0321 08:27:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:27:13.422937  543705 net.go:698] Add success.
I0321 08:27:13.429384  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 08:27:13.453561  543705 event_worker.go:152] Polling the log file for events...
I0321 08:27:13.469417  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75567df8-bb9a-432e-a29b-a3019fc102cd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:27:13.469459  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 08:27:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:27:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 08:27:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:27:14.456937  543705 disk_worker.go:494] system disk:vda1
E0321 08:27:14.456948  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:27:14.456956  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:27:14.456960  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:27:14.456980  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:27:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:27:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:27:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:27:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:27:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:27:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:27:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:27:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:27:23.409766  543705 memory.go:184] no items to output this cycle
I0321 08:27:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 08:27:28.608681  543705 disk_info.go:125] begin check local disk info of client
I0321 08:27:28.611151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:27:28.611157  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033b580 0xc00033b5c0]
E0321 08:27:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:27:33.409762  543705 memory.go:184] no items to output this cycle
I0321 08:27:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 08:27:38.793732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:27:38.793739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:27:43.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:27:43.410907  543705 memory.go:191] Add success.
I0321 08:27:43.410035  543705 cpu.go:282] Add success.
I0321 08:27:43.419721  543705 net.go:648] Add success.
I0321 08:27:43.422456  543705 net.go:770] primary dev: ETH0
I0321 08:27:43.422471  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:27:43.422486  543705 net.go:698] Add success.
I0321 08:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:27:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:27:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:27:53.409804  543705 memory.go:184] no items to output this cycle
I0321 08:27:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 08:28:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:28:03.409777  543705 memory.go:184] no items to output this cycle
I0321 08:28:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 08:28:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:28:13.409788  543705 memory.go:191] Add success.
I0321 08:28:13.409791  543705 cpu.go:282] Add success.
W0321 08:28:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:28:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:28:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:28:13.420075  543705 net.go:648] Add success.
I0321 08:28:13.423075  543705 net.go:770] primary dev: ETH0
I0321 08:28:13.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:28:13.423100  543705 net.go:698] Add success.
I0321 08:28:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:28:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:28:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 08:28:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:28:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 08:28:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:28:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:28:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:28:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:28:23.409798  543705 memory.go:184] no items to output this cycle
I0321 08:28:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 08:28:28.611709  543705 disk_info.go:125] begin check local disk info of client
I0321 08:28:28.614199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:28:28.614205  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048da00 0xc00048da40]
E0321 08:28:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:28:33.409790  543705 memory.go:184] no items to output this cycle
I0321 08:28:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:28:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:28:43.409792  543705 memory.go:191] Add success.
I0321 08:28:43.409813  543705 cpu.go:282] Add success.
I0321 08:28:43.419726  543705 net.go:648] Add success.
I0321 08:28:43.422767  543705 net.go:770] primary dev: ETH0
I0321 08:28:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:28:43.422792  543705 net.go:698] Add success.
I0321 08:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:28:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:28:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:28:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:28:53.409774  543705 memory.go:184] no items to output this cycle
I0321 08:28:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 08:29:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:29:03.409780  543705 memory.go:184] no items to output this cycle
I0321 08:29:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:29:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:29:13.409772  543705 memory.go:191] Add success.
W0321 08:29:13.409796  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 08:29:13.409804  543705 cpu.go:282] Add success.
W0321 08:29:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:29:13.409810  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:29:13.420111  543705 net.go:648] Add success.
I0321 08:29:13.422738  543705 net.go:770] primary dev: ETH0
I0321 08:29:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:29:13.422763  543705 net.go:698] Add success.
I0321 08:29:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:29:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:29:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 08:29:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:29:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 08:29:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:29:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:29:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:29:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:29:23.409989  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:29:23.410004  543705 memory.go:184] no items to output this cycle
I0321 08:29:23.410016  543705 cpu.go:275] no items to output this cycle
I0321 08:29:28.614287  543705 disk_info.go:125] begin check local disk info of client
I0321 08:29:28.616788  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:29:28.616794  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002722c0 0xc000272300]
E0321 08:29:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:29:33.409774  543705 memory.go:184] no items to output this cycle
I0321 08:29:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 08:29:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:29:43.409795  543705 memory.go:191] Add success.
I0321 08:29:43.409795  543705 cpu.go:282] Add success.
I0321 08:29:43.420109  543705 net.go:648] Add success.
I0321 08:29:43.422966  543705 net.go:770] primary dev: ETH0
I0321 08:29:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:29:43.422996  543705 net.go:698] Add success.
I0321 08:29:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:29:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:29:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:29:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:29:53.409808  543705 memory.go:184] no items to output this cycle
I0321 08:29:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 08:30:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:30:03.409786  543705 memory.go:184] no items to output this cycle
I0321 08:30:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:30:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:30:13.409807  543705 memory.go:191] Add success.
I0321 08:30:13.409816  543705 cpu.go:282] Add success.
W0321 08:30:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:30:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:30:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:30:13.420122  543705 net.go:648] Add success.
I0321 08:30:13.422746  543705 net.go:770] primary dev: ETH0
I0321 08:30:13.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:30:13.422772  543705 net.go:698] Add success.
I0321 08:30:13.521346  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c99048d0-f1bc-4794-9326-89ef180d9ba0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:30:13.521380  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:30:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:30:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:30:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 08:30:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:30:14.456687  543705 disk_worker.go:494] system disk:vda1
I0321 08:30:14.456725  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:30:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:30:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:30:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:30:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:30:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:30:23.409769  543705 memory.go:184] no items to output this cycle
I0321 08:30:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 08:30:28.617671  543705 disk_info.go:125] begin check local disk info of client
I0321 08:30:28.620188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:30:28.620194  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048cd40 0xc00048cd80]
E0321 08:30:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:30:33.409775  543705 memory.go:184] no items to output this cycle
I0321 08:30:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 08:30:38.795679  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:30:38.795686  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:30:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:30:43.410593  543705 memory.go:191] Add success.
I0321 08:30:43.409828  543705 cpu.go:282] Add success.
I0321 08:30:43.420333  543705 net.go:648] Add success.
I0321 08:30:43.423231  543705 net.go:770] primary dev: ETH0
I0321 08:30:43.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:30:43.423258  543705 net.go:698] Add success.
I0321 08:30:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:30:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:30:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:30:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:30:53.409800  543705 memory.go:184] no items to output this cycle
I0321 08:30:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 08:31:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:31:03.409764  543705 memory.go:184] no items to output this cycle
I0321 08:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 08:31:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:31:13.409789  543705 memory.go:191] Add success.
I0321 08:31:13.409793  543705 cpu.go:282] Add success.
W0321 08:31:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:31:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:31:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:31:13.420115  543705 net.go:648] Add success.
I0321 08:31:13.422652  543705 net.go:770] primary dev: ETH0
I0321 08:31:13.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:31:13.422676  543705 net.go:698] Add success.
I0321 08:31:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:31:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:31:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 08:31:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:31:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 08:31:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:31:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:31:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:31:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:31:23.409770  543705 memory.go:184] no items to output this cycle
I0321 08:31:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 08:31:28.620735  543705 disk_info.go:125] begin check local disk info of client
I0321 08:31:28.623272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:31:28.623278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd4c0 0xc0002bd500]
E0321 08:31:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:31:33.409771  543705 memory.go:184] no items to output this cycle
I0321 08:31:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 08:31:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:31:43.409801  543705 memory.go:191] Add success.
I0321 08:31:43.409806  543705 cpu.go:282] Add success.
I0321 08:31:43.419957  543705 net.go:648] Add success.
I0321 08:31:43.422826  543705 net.go:770] primary dev: ETH0
I0321 08:31:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:31:43.422852  543705 net.go:698] Add success.
I0321 08:31:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:31:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:31:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:31:53.409781  543705 memory.go:184] no items to output this cycle
I0321 08:31:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 08:32:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:32:03.409779  543705 memory.go:184] no items to output this cycle
I0321 08:32:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 08:32:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:32:13.409782  543705 memory.go:191] Add success.
W0321 08:32:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 08:32:13.409811  543705 cpu.go:282] Add success.
W0321 08:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:32:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:32:13.420173  543705 net.go:648] Add success.
I0321 08:32:13.423076  543705 net.go:770] primary dev: ETH0
I0321 08:32:13.423090  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:32:13.423102  543705 net.go:698] Add success.
W0321 08:32:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:32:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 08:32:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:32:14.455887  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:32:14.455895  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:32:14.455901  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:32:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 08:32:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:32:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:32:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:32:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:32:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:32:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:32:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:32:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:32:23.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:32:23.410257  543705 memory.go:184] no items to output this cycle
I0321 08:32:23.410271  543705 cpu.go:275] no items to output this cycle
I0321 08:32:28.623747  543705 disk_info.go:125] begin check local disk info of client
I0321 08:32:28.626196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:32:28.626202  543705 disk_info.go:196] parse disk info done, disk is : [0xc000365340 0xc000365380]
E0321 08:32:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:32:33.409799  543705 memory.go:184] no items to output this cycle
I0321 08:32:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 08:32:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:32:43.409833  543705 memory.go:191] Add success.
I0321 08:32:43.409835  543705 cpu.go:282] Add success.
I0321 08:32:43.420031  543705 net.go:648] Add success.
I0321 08:32:43.422943  543705 net.go:770] primary dev: ETH0
I0321 08:32:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:32:43.422974  543705 net.go:698] Add success.
I0321 08:32:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:32:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:32:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:32:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:32:53.409792  543705 memory.go:184] no items to output this cycle
I0321 08:32:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 08:33:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:33:03.409765  543705 memory.go:184] no items to output this cycle
I0321 08:33:03.409887  543705 cpu.go:275] no items to output this cycle
E0321 08:33:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:33:13.409798  543705 cpu.go:282] Add success.
I0321 08:33:13.409800  543705 memory.go:191] Add success.
W0321 08:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:33:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:33:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:33:13.420073  543705 net.go:648] Add success.
I0321 08:33:13.423095  543705 net.go:770] primary dev: ETH0
I0321 08:33:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:33:13.423120  543705 net.go:698] Add success.
I0321 08:33:13.470962  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a9ecaba-120f-4b9a-b4f7-7b8c5e98ae44","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:33:13.470995  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:33:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:33:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:33:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 08:33:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:33:14.456765  543705 disk_worker.go:494] system disk:vda1
I0321 08:33:14.456792  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:33:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:33:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:33:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:33:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:33:23.409799  543705 memory.go:184] no items to output this cycle
I0321 08:33:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 08:33:28.626770  543705 disk_info.go:125] begin check local disk info of client
I0321 08:33:28.629225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:33:28.629231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4fc0 0xc0003e5000]
E0321 08:33:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:33:33.409787  543705 memory.go:184] no items to output this cycle
I0321 08:33:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 08:33:38.797707  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:33:38.797715  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:33:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:33:43.410687  543705 memory.go:191] Add success.
I0321 08:33:43.409834  543705 cpu.go:282] Add success.
I0321 08:33:43.420383  543705 net.go:648] Add success.
I0321 08:33:43.423244  543705 net.go:770] primary dev: ETH0
I0321 08:33:43.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:33:43.423269  543705 net.go:698] Add success.
I0321 08:33:46.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:33:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:33:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:33:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:33:53.409786  543705 memory.go:184] no items to output this cycle
I0321 08:33:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 08:34:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:34:03.409783  543705 memory.go:184] no items to output this cycle
I0321 08:34:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 08:34:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:34:13.409794  543705 cpu.go:282] Add success.
I0321 08:34:13.409801  543705 memory.go:191] Add success.
W0321 08:34:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:34:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:34:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:34:13.420236  543705 net.go:648] Add success.
I0321 08:34:13.422926  543705 net.go:770] primary dev: ETH0
I0321 08:34:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:34:13.422969  543705 net.go:698] Add success.
I0321 08:34:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:34:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:34:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 08:34:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:34:14.456547  543705 disk_worker.go:494] system disk:vda1
I0321 08:34:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:34:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:34:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:34:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:34:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:34:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:34:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:34:23.409770  543705 memory.go:184] no items to output this cycle
I0321 08:34:23.409773  543705 cpu.go:275] no items to output this cycle
I0321 08:34:28.629671  543705 disk_info.go:125] begin check local disk info of client
I0321 08:34:28.632111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:34:28.632116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a40 0xc0000c5a80]
E0321 08:34:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:34:33.409794  543705 memory.go:184] no items to output this cycle
I0321 08:34:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 08:34:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:34:43.409785  543705 memory.go:191] Add success.
I0321 08:34:43.409807  543705 cpu.go:282] Add success.
I0321 08:34:43.419875  543705 net.go:648] Add success.
I0321 08:34:43.422392  543705 net.go:770] primary dev: ETH0
I0321 08:34:43.422405  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:34:43.422418  543705 net.go:698] Add success.
I0321 08:34:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:34:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:34:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:34:53.409773  543705 memory.go:184] no items to output this cycle
I0321 08:34:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:35:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:35:03.409779  543705 cpu.go:275] no items to output this cycle
I0321 08:35:03.409786  543705 memory.go:184] no items to output this cycle
E0321 08:35:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:35:13.409777  543705 memory.go:191] Add success.
I0321 08:35:13.409792  543705 cpu.go:282] Add success.
W0321 08:35:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:35:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:35:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:35:13.419710  543705 net.go:648] Add success.
I0321 08:35:13.422646  543705 net.go:770] primary dev: ETH0
I0321 08:35:13.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:35:13.422671  543705 net.go:698] Add success.
I0321 08:35:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:35:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:35:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 08:35:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:35:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 08:35:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:35:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:35:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:35:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:35:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:35:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:35:23.409760  543705 memory.go:184] no items to output this cycle
I0321 08:35:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 08:35:28.632803  543705 disk_info.go:125] begin check local disk info of client
I0321 08:35:28.635256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:35:28.635263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003538c0 0xc000353900]
E0321 08:35:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:35:33.409791  543705 memory.go:184] no items to output this cycle
I0321 08:35:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 08:35:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:35:43.409805  543705 cpu.go:282] Add success.
I0321 08:35:43.409807  543705 memory.go:191] Add success.
I0321 08:35:43.419959  543705 net.go:648] Add success.
I0321 08:35:43.422852  543705 net.go:770] primary dev: ETH0
I0321 08:35:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:35:43.422877  543705 net.go:698] Add success.
I0321 08:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:35:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:35:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:35:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:35:53.409774  543705 memory.go:184] no items to output this cycle
I0321 08:35:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:36:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:36:03.409779  543705 memory.go:184] no items to output this cycle
I0321 08:36:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 08:36:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:36:13.409807  543705 memory.go:191] Add success.
I0321 08:36:13.409815  543705 cpu.go:282] Add success.
W0321 08:36:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:36:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:36:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:36:13.420110  543705 net.go:648] Add success.
I0321 08:36:13.423182  543705 net.go:770] primary dev: ETH0
I0321 08:36:13.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:36:13.423207  543705 net.go:698] Add success.
I0321 08:36:13.826182  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b4e7a69-1007-421c-bb3f-908f8b00e69a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:36:13.826222  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:36:14.454372  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:36:14.454665  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:36:14.454730  543705 disk_worker.go:708] disk space is not compliant
W0321 08:36:14.454733  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:36:14.456203  543705 disk_worker.go:494] system disk:vda1
I0321 08:36:14.456244  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:36:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:36:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:36:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:36:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:36:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:36:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:36:23.409763  543705 memory.go:184] no items to output this cycle
I0321 08:36:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 08:36:28.635825  543705 disk_info.go:125] begin check local disk info of client
I0321 08:36:28.638335  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:36:28.638341  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003af080 0xc0003af0c0]
E0321 08:36:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:36:33.409793  543705 memory.go:184] no items to output this cycle
I0321 08:36:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 08:36:38.797870  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:36:38.797878  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:36:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:36:43.410601  543705 memory.go:191] Add success.
I0321 08:36:43.409827  543705 cpu.go:282] Add success.
I0321 08:36:43.420383  543705 net.go:648] Add success.
I0321 08:36:43.422947  543705 net.go:770] primary dev: ETH0
I0321 08:36:43.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:36:43.422978  543705 net.go:698] Add success.
I0321 08:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:36:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:36:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:36:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:36:53.409769  543705 memory.go:184] no items to output this cycle
I0321 08:36:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 08:37:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:37:03.409768  543705 memory.go:184] no items to output this cycle
I0321 08:37:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:37:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:37:13.409797  543705 memory.go:191] Add success.
I0321 08:37:13.409797  543705 cpu.go:282] Add success.
W0321 08:37:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:37:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:37:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:37:13.420114  543705 net.go:648] Add success.
I0321 08:37:13.422870  543705 net.go:770] primary dev: ETH0
I0321 08:37:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:37:13.422896  543705 net.go:698] Add success.
I0321 08:37:13.453412  543705 event_worker.go:152] Polling the log file for events...
W0321 08:37:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:37:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 08:37:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:37:14.455923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:37:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:37:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:37:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 08:37:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:37:15.456867  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:37:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:37:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:37:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:37:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:37:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:37:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:37:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:37:23.409795  543705 memory.go:184] no items to output this cycle
I0321 08:37:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 08:37:28.638824  543705 disk_info.go:125] begin check local disk info of client
I0321 08:37:28.641325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:37:28.641331  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509040 0xc000509080]
E0321 08:37:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:37:33.409762  543705 memory.go:184] no items to output this cycle
I0321 08:37:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:37:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:37:43.409800  543705 memory.go:191] Add success.
I0321 08:37:43.409804  543705 cpu.go:282] Add success.
I0321 08:37:43.419885  543705 net.go:648] Add success.
I0321 08:37:43.422476  543705 net.go:770] primary dev: ETH0
I0321 08:37:43.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:37:43.422501  543705 net.go:698] Add success.
I0321 08:37:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:37:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:37:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:37:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:37:53.409779  543705 memory.go:184] no items to output this cycle
I0321 08:37:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 08:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:38:03.409783  543705 memory.go:184] no items to output this cycle
I0321 08:38:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 08:38:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:38:13.409820  543705 memory.go:191] Add success.
I0321 08:38:13.409826  543705 cpu.go:282] Add success.
W0321 08:38:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:38:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:38:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:38:13.420239  543705 net.go:648] Add success.
I0321 08:38:13.422753  543705 net.go:770] primary dev: ETH0
I0321 08:38:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:38:13.422778  543705 net.go:698] Add success.
I0321 08:38:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:38:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:38:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 08:38:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:38:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 08:38:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:38:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:38:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:38:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:38:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:38:23.409764  543705 memory.go:184] no items to output this cycle
I0321 08:38:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 08:38:28.641674  543705 disk_info.go:125] begin check local disk info of client
I0321 08:38:28.644150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:38:28.644156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b880 0xc00007b8c0]
E0321 08:38:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:38:33.409787  543705 memory.go:184] no items to output this cycle
I0321 08:38:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 08:38:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:38:43.409797  543705 memory.go:191] Add success.
I0321 08:38:43.409808  543705 cpu.go:282] Add success.
I0321 08:38:43.419992  543705 net.go:648] Add success.
I0321 08:38:43.422608  543705 net.go:770] primary dev: ETH0
I0321 08:38:43.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:38:43.422639  543705 net.go:698] Add success.
I0321 08:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:38:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:38:53.410227  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:38:53.410243  543705 memory.go:184] no items to output this cycle
I0321 08:38:53.410268  543705 cpu.go:275] no items to output this cycle
E0321 08:39:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:39:03.409807  543705 memory.go:184] no items to output this cycle
I0321 08:39:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 08:39:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:39:13.409784  543705 memory.go:191] Add success.
I0321 08:39:13.409806  543705 cpu.go:282] Add success.
W0321 08:39:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:39:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:39:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:39:13.420124  543705 net.go:648] Add success.
I0321 08:39:13.422914  543705 net.go:770] primary dev: ETH0
I0321 08:39:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:39:13.422942  543705 net.go:698] Add success.
I0321 08:39:13.512699  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b6474abf-3aaa-4fe3-8ab2-88222c03faa4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:39:13.512733  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:39:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:39:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:39:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 08:39:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:39:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 08:39:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:39:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:39:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:39:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:39:23.409807  543705 memory.go:184] no items to output this cycle
I0321 08:39:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 08:39:28.644876  543705 disk_info.go:125] begin check local disk info of client
I0321 08:39:28.647366  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:39:28.647372  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004910c0 0xc000491100]
E0321 08:39:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:39:33.409759  543705 memory.go:184] no items to output this cycle
I0321 08:39:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 08:39:38.799713  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:39:38.799721  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:39:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:39:43.410726  543705 memory.go:191] Add success.
I0321 08:39:43.409807  543705 cpu.go:282] Add success.
I0321 08:39:43.420508  543705 net.go:648] Add success.
I0321 08:39:43.423307  543705 net.go:770] primary dev: ETH0
I0321 08:39:43.423321  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:39:43.423336  543705 net.go:698] Add success.
I0321 08:39:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:39:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:39:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:39:53.409787  543705 memory.go:184] no items to output this cycle
I0321 08:39:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:40:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:40:03.409772  543705 memory.go:184] no items to output this cycle
I0321 08:40:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:40:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:40:13.409796  543705 memory.go:191] Add success.
I0321 08:40:13.409795  543705 cpu.go:282] Add success.
W0321 08:40:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:40:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:40:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:40:13.420252  543705 net.go:648] Add success.
I0321 08:40:13.422951  543705 net.go:770] primary dev: ETH0
I0321 08:40:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:40:13.422979  543705 net.go:698] Add success.
I0321 08:40:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:40:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:40:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0321 08:40:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:40:14.456607  543705 disk_worker.go:494] system disk:vda1
I0321 08:40:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:40:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:40:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:40:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:40:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:40:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:40:23.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:40:23.409885  543705 cpu.go:275] no items to output this cycle
I0321 08:40:23.409894  543705 memory.go:184] no items to output this cycle
I0321 08:40:28.647881  543705 disk_info.go:125] begin check local disk info of client
I0321 08:40:28.650369  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:40:28.650375  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab40 0xc00007ab80]
E0321 08:40:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:40:33.409790  543705 memory.go:184] no items to output this cycle
I0321 08:40:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 08:40:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:40:43.409807  543705 memory.go:191] Add success.
I0321 08:40:43.409822  543705 cpu.go:282] Add success.
I0321 08:40:43.420020  543705 net.go:648] Add success.
I0321 08:40:43.422802  543705 net.go:770] primary dev: ETH0
I0321 08:40:43.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:40:43.422832  543705 net.go:698] Add success.
I0321 08:40:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:40:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:40:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:40:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:40:53.409783  543705 memory.go:184] no items to output this cycle
I0321 08:40:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 08:41:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:41:03.409802  543705 memory.go:184] no items to output this cycle
I0321 08:41:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 08:41:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:41:13.409787  543705 memory.go:191] Add success.
I0321 08:41:13.409809  543705 cpu.go:282] Add success.
W0321 08:41:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:41:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:41:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:41:13.420193  543705 net.go:648] Add success.
I0321 08:41:13.423441  543705 net.go:770] primary dev: ETH0
I0321 08:41:13.423454  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:41:13.423466  543705 net.go:698] Add success.
I0321 08:41:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:41:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:41:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 08:41:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:41:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 08:41:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:41:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:41:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:41:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:41:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:41:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:41:23.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:41:23.409882  543705 memory.go:184] no items to output this cycle
I0321 08:41:23.409977  543705 cpu.go:275] no items to output this cycle
I0321 08:41:28.650891  543705 disk_info.go:125] begin check local disk info of client
I0321 08:41:28.653346  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:41:28.653352  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470e40 0xc000470e80]
E0321 08:41:33.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:41:33.409756  543705 memory.go:184] no items to output this cycle
I0321 08:41:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 08:41:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:41:43.409792  543705 memory.go:191] Add success.
I0321 08:41:43.409799  543705 cpu.go:282] Add success.
I0321 08:41:43.420026  543705 net.go:648] Add success.
I0321 08:41:43.422681  543705 net.go:770] primary dev: ETH0
I0321 08:41:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:41:43.422710  543705 net.go:698] Add success.
I0321 08:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:41:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:41:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:41:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:41:53.409773  543705 memory.go:184] no items to output this cycle
I0321 08:41:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 08:42:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:42:03.409794  543705 memory.go:184] no items to output this cycle
I0321 08:42:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:42:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:42:13.409791  543705 memory.go:191] Add success.
W0321 08:42:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 08:42:13.409817  543705 cpu.go:282] Add success.
W0321 08:42:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:42:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:42:13.420094  543705 net.go:648] Add success.
I0321 08:42:13.422555  543705 net.go:770] primary dev: ETH0
I0321 08:42:13.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:42:13.422584  543705 net.go:698] Add success.
I0321 08:42:13.464648  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1e9af866-aeed-4af7-9085-877ef90fa3be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:42:13.464692  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 08:42:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:42:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 08:42:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:42:14.456994  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:42:14.457003  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:42:14.457009  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:42:14.457058  543705 disk_worker.go:494] system disk:vda1
I0321 08:42:14.457087  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:42:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:42:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:42:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:42:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:42:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:42:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:42:23.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:42:23.409871  543705 memory.go:184] no items to output this cycle
I0321 08:42:23.409953  543705 cpu.go:275] no items to output this cycle
I0321 08:42:28.653681  543705 disk_info.go:125] begin check local disk info of client
I0321 08:42:28.656117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:42:28.656123  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331580 0xc0003315c0]
E0321 08:42:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:42:33.409799  543705 memory.go:184] no items to output this cycle
I0321 08:42:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 08:42:38.799874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:42:38.799882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:42:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:42:43.410715  543705 memory.go:191] Add success.
I0321 08:42:43.409817  543705 cpu.go:282] Add success.
I0321 08:42:43.420407  543705 net.go:648] Add success.
I0321 08:42:43.423670  543705 net.go:770] primary dev: ETH0
I0321 08:42:43.423692  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:42:43.423706  543705 net.go:698] Add success.
I0321 08:42:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:42:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:42:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:42:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:42:53.409777  543705 memory.go:184] no items to output this cycle
I0321 08:42:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 08:43:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:43:03.409781  543705 memory.go:184] no items to output this cycle
I0321 08:43:03.409802  543705 cpu.go:275] no items to output this cycle
W0321 08:43:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:43:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:43:13.409746  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:43:13.409807  543705 cpu.go:282] Add success.
E0321 08:43:13.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:43:13.409869  543705 memory.go:191] Add success.
I0321 08:43:13.420061  543705 net.go:648] Add success.
I0321 08:43:13.423167  543705 net.go:770] primary dev: ETH0
I0321 08:43:13.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:43:13.423197  543705 net.go:698] Add success.
I0321 08:43:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:43:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:43:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 08:43:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:43:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 08:43:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:43:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:43:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:43:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:43:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:43:23.409801  543705 memory.go:184] no items to output this cycle
I0321 08:43:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 08:43:28.656926  543705 disk_info.go:125] begin check local disk info of client
I0321 08:43:28.659472  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:43:28.659480  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472680 0xc0004726c0]
E0321 08:43:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:43:33.409778  543705 memory.go:184] no items to output this cycle
I0321 08:43:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 08:43:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:43:43.409817  543705 memory.go:191] Add success.
I0321 08:43:43.409825  543705 cpu.go:282] Add success.
I0321 08:43:43.419980  543705 net.go:648] Add success.
I0321 08:43:43.422692  543705 net.go:770] primary dev: ETH0
I0321 08:43:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:43:43.422718  543705 net.go:698] Add success.
I0321 08:43:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:43:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:43:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:43:53.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:43:53.410273  543705 memory.go:184] no items to output this cycle
I0321 08:43:53.410290  543705 cpu.go:275] no items to output this cycle
E0321 08:44:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:44:03.409773  543705 memory.go:184] no items to output this cycle
I0321 08:44:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 08:44:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:44:13.409785  543705 memory.go:191] Add success.
I0321 08:44:13.409808  543705 cpu.go:282] Add success.
W0321 08:44:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:44:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:44:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:44:13.420118  543705 net.go:648] Add success.
I0321 08:44:13.422975  543705 net.go:770] primary dev: ETH0
I0321 08:44:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:44:13.423000  543705 net.go:698] Add success.
I0321 08:44:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:44:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:44:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 08:44:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:44:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 08:44:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:44:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:44:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:44:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:44:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:44:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:44:23.409771  543705 memory.go:184] no items to output this cycle
I0321 08:44:23.409775  543705 cpu.go:275] no items to output this cycle
I0321 08:44:28.659933  543705 disk_info.go:125] begin check local disk info of client
I0321 08:44:28.662484  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:44:28.662490  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051e500 0xc00051e540]
E0321 08:44:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:44:33.409763  543705 memory.go:184] no items to output this cycle
I0321 08:44:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 08:44:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:44:43.409828  543705 memory.go:191] Add success.
I0321 08:44:43.409837  543705 cpu.go:282] Add success.
I0321 08:44:43.420090  543705 net.go:648] Add success.
I0321 08:44:43.423081  543705 net.go:770] primary dev: ETH0
I0321 08:44:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:44:43.423108  543705 net.go:698] Add success.
I0321 08:44:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:44:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:44:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:44:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:44:53.409796  543705 memory.go:184] no items to output this cycle
I0321 08:44:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 08:45:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:45:03.409767  543705 memory.go:184] no items to output this cycle
I0321 08:45:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:45:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:45:13.409806  543705 memory.go:191] Add success.
I0321 08:45:13.409817  543705 cpu.go:282] Add success.
W0321 08:45:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:45:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:45:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:45:13.420074  543705 net.go:648] Add success.
I0321 08:45:13.422634  543705 net.go:770] primary dev: ETH0
I0321 08:45:13.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:45:13.422659  543705 net.go:698] Add success.
I0321 08:45:14.380384  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1b6b299f-08f2-464d-87ef-c2ea9282cd5a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:45:14.380425  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:45:14.454722  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:45:14.454962  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:45:14.454975  543705 disk_worker.go:708] disk space is not compliant
W0321 08:45:14.454978  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:45:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 08:45:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:45:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:45:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:45:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:45:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:45:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:45:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:45:23.409796  543705 memory.go:184] no items to output this cycle
I0321 08:45:23.409837  543705 cpu.go:275] no items to output this cycle
I0321 08:45:28.663270  543705 disk_info.go:125] begin check local disk info of client
I0321 08:45:28.665787  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:45:28.665794  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2ac0 0xc0002a2b00]
E0321 08:45:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:45:33.409786  543705 memory.go:184] no items to output this cycle
I0321 08:45:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 08:45:38.800043  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:45:38.800051  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:45:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:45:43.409800  543705 memory.go:191] Add success.
I0321 08:45:43.409848  543705 cpu.go:282] Add success.
I0321 08:45:43.420066  543705 net.go:648] Add success.
I0321 08:45:43.421043  543705 net.go:770] primary dev: ETH0
I0321 08:45:43.421055  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:45:43.421067  543705 net.go:698] Add success.
I0321 08:45:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:45:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:45:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:45:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:45:53.409764  543705 memory.go:184] no items to output this cycle
I0321 08:45:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:46:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:46:03.409812  543705 memory.go:184] no items to output this cycle
I0321 08:46:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 08:46:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:46:13.409787  543705 memory.go:191] Add success.
I0321 08:46:13.409807  543705 cpu.go:282] Add success.
W0321 08:46:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:46:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:46:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:46:13.420150  543705 net.go:648] Add success.
I0321 08:46:13.422945  543705 net.go:770] primary dev: ETH0
I0321 08:46:13.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:46:13.422975  543705 net.go:698] Add success.
I0321 08:46:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:46:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:46:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 08:46:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:46:14.456609  543705 disk_worker.go:494] system disk:vda1
I0321 08:46:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:46:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:46:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:46:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:46:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:46:23.409776  543705 memory.go:184] no items to output this cycle
I0321 08:46:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 08:46:28.666968  543705 disk_info.go:125] begin check local disk info of client
I0321 08:46:28.669477  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:46:28.669484  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab0c0 0xc0001ab100]
E0321 08:46:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:46:33.409757  543705 memory.go:184] no items to output this cycle
I0321 08:46:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 08:46:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:46:43.409819  543705 memory.go:191] Add success.
I0321 08:46:43.409825  543705 cpu.go:282] Add success.
I0321 08:46:43.420006  543705 net.go:648] Add success.
I0321 08:46:43.423130  543705 net.go:770] primary dev: ETH0
I0321 08:46:43.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:46:43.423160  543705 net.go:698] Add success.
I0321 08:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:46:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:46:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:46:53.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:46:53.409761  543705 memory.go:184] no items to output this cycle
I0321 08:46:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 08:47:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:47:03.409800  543705 memory.go:184] no items to output this cycle
I0321 08:47:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 08:47:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:47:13.409773  543705 memory.go:191] Add success.
W0321 08:47:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:47:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:47:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:47:13.409827  543705 cpu.go:282] Add success.
I0321 08:47:13.420068  543705 net.go:648] Add success.
I0321 08:47:13.422855  543705 net.go:770] primary dev: ETH0
I0321 08:47:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:47:13.422880  543705 net.go:698] Add success.
I0321 08:47:13.453438  543705 event_worker.go:152] Polling the log file for events...
W0321 08:47:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:47:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 08:47:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:47:14.455879  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:47:14.455887  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:47:14.455893  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:47:14.456559  543705 disk_worker.go:494] system disk:vda1
I0321 08:47:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:47:15.456919  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:47:15.456927  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:47:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:47:16.458016  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:47:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:47:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:47:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:47:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:47:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 08:47:23.409777  543705 memory.go:184] no items to output this cycle
I0321 08:47:28.669677  543705 disk_info.go:125] begin check local disk info of client
I0321 08:47:28.672130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:47:28.672136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5540 0xc0000c5580]
E0321 08:47:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:47:33.409799  543705 memory.go:184] no items to output this cycle
I0321 08:47:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 08:47:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:47:43.409787  543705 memory.go:191] Add success.
I0321 08:47:43.409857  543705 cpu.go:282] Add success.
I0321 08:47:43.420063  543705 net.go:648] Add success.
I0321 08:47:43.421119  543705 net.go:770] primary dev: ETH0
I0321 08:47:43.421146  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:47:43.421159  543705 net.go:698] Add success.
I0321 08:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:47:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:47:53.409785  543705 memory.go:184] no items to output this cycle
I0321 08:47:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 08:48:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:48:03.409775  543705 cpu.go:275] no items to output this cycle
I0321 08:48:03.409783  543705 memory.go:184] no items to output this cycle
E0321 08:48:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:48:13.409794  543705 memory.go:191] Add success.
I0321 08:48:13.409798  543705 cpu.go:282] Add success.
W0321 08:48:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:48:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:48:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:48:13.420171  543705 net.go:648] Add success.
I0321 08:48:13.423213  543705 net.go:770] primary dev: ETH0
I0321 08:48:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:48:13.423239  543705 net.go:698] Add success.
I0321 08:48:13.467759  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"888f7a91-c2a4-45d4-8a12-170f0dca2ad8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:48:13.467796  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:48:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:48:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:48:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 08:48:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:48:14.456653  543705 disk_worker.go:494] system disk:vda1
I0321 08:48:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:48:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:48:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:48:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:48:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:48:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:48:23.409803  543705 memory.go:184] no items to output this cycle
I0321 08:48:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 08:48:28.673009  543705 disk_info.go:125] begin check local disk info of client
I0321 08:48:28.675567  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:48:28.675573  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad40 0xc0001aad80]
E0321 08:48:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:48:33.409779  543705 memory.go:184] no items to output this cycle
I0321 08:48:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 08:48:38.800184  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:48:38.800190  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:48:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:48:43.410868  543705 memory.go:191] Add success.
I0321 08:48:43.409827  543705 cpu.go:282] Add success.
I0321 08:48:43.420641  543705 net.go:648] Add success.
I0321 08:48:43.423175  543705 net.go:770] primary dev: ETH0
I0321 08:48:43.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:48:43.423205  543705 net.go:698] Add success.
I0321 08:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:48:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:48:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:48:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:48:53.409783  543705 memory.go:184] no items to output this cycle
I0321 08:48:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 08:49:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:49:03.409780  543705 memory.go:184] no items to output this cycle
I0321 08:49:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 08:49:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:49:13.409788  543705 cpu.go:282] Add success.
I0321 08:49:13.409797  543705 memory.go:191] Add success.
W0321 08:49:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:49:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:49:13.420229  543705 net.go:648] Add success.
I0321 08:49:13.423140  543705 net.go:770] primary dev: ETH0
I0321 08:49:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:49:13.423166  543705 net.go:698] Add success.
I0321 08:49:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:49:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:49:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 08:49:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:49:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 08:49:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:49:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:49:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:49:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:49:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:49:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:49:23.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:49:23.409894  543705 memory.go:184] no items to output this cycle
I0321 08:49:23.409974  543705 cpu.go:275] no items to output this cycle
I0321 08:49:28.676008  543705 disk_info.go:125] begin check local disk info of client
I0321 08:49:28.678551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:49:28.678557  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025ff00 0xc00025ff40]
E0321 08:49:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:49:33.409787  543705 memory.go:184] no items to output this cycle
I0321 08:49:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 08:49:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:49:43.409825  543705 memory.go:191] Add success.
I0321 08:49:43.409838  543705 cpu.go:282] Add success.
I0321 08:49:43.419995  543705 net.go:648] Add success.
I0321 08:49:43.423051  543705 net.go:770] primary dev: ETH0
I0321 08:49:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:49:43.423077  543705 net.go:698] Add success.
I0321 08:49:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:49:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:49:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:49:53.409782  543705 memory.go:184] no items to output this cycle
I0321 08:49:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 08:50:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:50:03.409777  543705 cpu.go:275] no items to output this cycle
I0321 08:50:03.409787  543705 memory.go:184] no items to output this cycle
E0321 08:50:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:50:13.409808  543705 memory.go:191] Add success.
I0321 08:50:13.409817  543705 cpu.go:282] Add success.
W0321 08:50:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:50:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:50:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:50:13.420058  543705 net.go:648] Add success.
I0321 08:50:13.422962  543705 net.go:770] primary dev: ETH0
I0321 08:50:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:50:13.422991  543705 net.go:698] Add success.
I0321 08:50:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:50:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:50:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 08:50:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:50:14.456505  543705 disk_worker.go:494] system disk:vda1
I0321 08:50:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:50:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:50:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:50:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:50:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:50:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:50:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:50:23.409799  543705 memory.go:184] no items to output this cycle
I0321 08:50:23.409917  543705 cpu.go:275] no items to output this cycle
I0321 08:50:28.679027  543705 disk_info.go:125] begin check local disk info of client
I0321 08:50:28.681492  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:50:28.681497  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003296c0 0xc000329700]
E0321 08:50:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:50:33.409781  543705 memory.go:184] no items to output this cycle
I0321 08:50:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:50:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:50:43.409790  543705 memory.go:191] Add success.
I0321 08:50:43.409818  543705 cpu.go:282] Add success.
I0321 08:50:43.419980  543705 net.go:648] Add success.
I0321 08:50:43.422745  543705 net.go:770] primary dev: ETH0
I0321 08:50:43.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:50:43.422775  543705 net.go:698] Add success.
I0321 08:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:50:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:50:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:50:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:50:53.409767  543705 memory.go:184] no items to output this cycle
I0321 08:50:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 08:51:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:51:03.409773  543705 memory.go:184] no items to output this cycle
I0321 08:51:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 08:51:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:51:13.409810  543705 memory.go:191] Add success.
I0321 08:51:13.409816  543705 cpu.go:282] Add success.
W0321 08:51:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:51:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:51:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:51:13.420071  543705 net.go:648] Add success.
I0321 08:51:13.422874  543705 net.go:770] primary dev: ETH0
I0321 08:51:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:51:13.422902  543705 net.go:698] Add success.
I0321 08:51:13.464258  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dacd108d-8ecb-4b61-9181-3841534468a6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:51:13.464292  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:51:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:51:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:51:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 08:51:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:51:14.456705  543705 disk_worker.go:494] system disk:vda1
I0321 08:51:14.456737  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:51:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:51:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:51:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:51:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:51:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:51:23.409792  543705 memory.go:184] no items to output this cycle
I0321 08:51:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 08:51:28.681675  543705 disk_info.go:125] begin check local disk info of client
I0321 08:51:28.684185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:51:28.684192  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a000 0xc00027a040]
E0321 08:51:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:51:33.409792  543705 memory.go:184] no items to output this cycle
I0321 08:51:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 08:51:38.801714  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:51:38.801721  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:51:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:51:43.410693  543705 memory.go:191] Add success.
I0321 08:51:43.409808  543705 cpu.go:282] Add success.
I0321 08:51:43.420407  543705 net.go:648] Add success.
I0321 08:51:43.423076  543705 net.go:770] primary dev: ETH0
I0321 08:51:43.423090  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:51:43.423103  543705 net.go:698] Add success.
I0321 08:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:51:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:51:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:51:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:51:53.409768  543705 memory.go:184] no items to output this cycle
I0321 08:51:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 08:52:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:52:03.409784  543705 memory.go:184] no items to output this cycle
I0321 08:52:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 08:52:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:52:13.409791  543705 memory.go:191] Add success.
I0321 08:52:13.409807  543705 cpu.go:282] Add success.
W0321 08:52:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:52:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:52:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:52:13.420108  543705 net.go:648] Add success.
I0321 08:52:13.422663  543705 net.go:770] primary dev: ETH0
I0321 08:52:13.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:52:13.422687  543705 net.go:698] Add success.
W0321 08:52:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:52:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 08:52:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:52:14.455886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:52:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:52:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:52:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 08:52:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:52:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:52:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:52:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:52:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:52:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:52:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:52:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:52:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:52:23.409765  543705 memory.go:184] no items to output this cycle
I0321 08:52:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 08:52:28.685079  543705 disk_info.go:125] begin check local disk info of client
I0321 08:52:28.687511  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:52:28.687519  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480140 0xc000480180]
E0321 08:52:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:52:33.409801  543705 memory.go:184] no items to output this cycle
I0321 08:52:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 08:52:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:52:43.409821  543705 memory.go:191] Add success.
I0321 08:52:43.409823  543705 cpu.go:282] Add success.
I0321 08:52:43.420008  543705 net.go:648] Add success.
I0321 08:52:43.422530  543705 net.go:770] primary dev: ETH0
I0321 08:52:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:52:43.422560  543705 net.go:698] Add success.
I0321 08:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:52:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:52:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:52:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:52:53.409769  543705 memory.go:184] no items to output this cycle
I0321 08:52:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 08:53:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:53:03.409781  543705 cpu.go:275] no items to output this cycle
I0321 08:53:03.409784  543705 memory.go:184] no items to output this cycle
E0321 08:53:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:53:13.409817  543705 memory.go:191] Add success.
I0321 08:53:13.409818  543705 cpu.go:282] Add success.
W0321 08:53:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:53:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:53:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:53:13.420126  543705 net.go:648] Add success.
I0321 08:53:13.422801  543705 net.go:770] primary dev: ETH0
I0321 08:53:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:53:13.422829  543705 net.go:698] Add success.
I0321 08:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:53:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:53:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 08:53:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:53:14.456819  543705 disk_worker.go:494] system disk:vda1
I0321 08:53:14.456848  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:53:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:53:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:53:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:53:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:53:23.410509  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:53:23.410528  543705 memory.go:184] no items to output this cycle
I0321 08:53:23.410534  543705 cpu.go:275] no items to output this cycle
I0321 08:53:28.688079  543705 disk_info.go:125] begin check local disk info of client
I0321 08:53:28.690604  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:53:28.690611  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462380 0xc0004623c0]
E0321 08:53:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:53:33.409780  543705 memory.go:184] no items to output this cycle
I0321 08:53:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 08:53:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:53:43.409802  543705 memory.go:191] Add success.
I0321 08:53:43.409830  543705 cpu.go:282] Add success.
I0321 08:53:43.419982  543705 net.go:648] Add success.
I0321 08:53:43.423158  543705 net.go:770] primary dev: ETH0
I0321 08:53:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:53:43.423183  543705 net.go:698] Add success.
I0321 08:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:53:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:53:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:53:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:53:53.409765  543705 memory.go:184] no items to output this cycle
I0321 08:53:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 08:54:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:54:03.409771  543705 memory.go:184] no items to output this cycle
I0321 08:54:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 08:54:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:54:13.409818  543705 memory.go:191] Add success.
I0321 08:54:13.409829  543705 cpu.go:282] Add success.
W0321 08:54:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:54:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:54:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:54:13.420100  543705 net.go:648] Add success.
I0321 08:54:13.422732  543705 net.go:770] primary dev: ETH0
I0321 08:54:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:54:13.422756  543705 net.go:698] Add success.
I0321 08:54:13.471870  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"19e74745-dd3d-486b-b05b-2ce4c2714a8a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:54:13.471904  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 08:54:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:54:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:54:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 08:54:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:54:14.456611  543705 disk_worker.go:494] system disk:vda1
I0321 08:54:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:54:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:54:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:54:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:54:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:54:23.409801  543705 memory.go:184] no items to output this cycle
I0321 08:54:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 08:54:28.691109  543705 disk_info.go:125] begin check local disk info of client
I0321 08:54:28.693663  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:54:28.693765  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bf880 0xc0004bf8c0]
E0321 08:54:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:54:33.409788  543705 memory.go:184] no items to output this cycle
I0321 08:54:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 08:54:38.803733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:54:38.803739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:54:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:54:43.410496  543705 memory.go:191] Add success.
I0321 08:54:43.409852  543705 cpu.go:282] Add success.
I0321 08:54:43.420226  543705 net.go:648] Add success.
I0321 08:54:43.422934  543705 net.go:770] primary dev: ETH0
I0321 08:54:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:54:43.422960  543705 net.go:698] Add success.
I0321 08:54:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:54:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:54:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:54:53.409781  543705 memory.go:184] no items to output this cycle
I0321 08:54:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 08:55:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:55:03.409803  543705 memory.go:184] no items to output this cycle
I0321 08:55:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 08:55:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:55:13.409832  543705 memory.go:191] Add success.
I0321 08:55:13.409839  543705 cpu.go:282] Add success.
W0321 08:55:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:55:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:55:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:55:13.420097  543705 net.go:648] Add success.
I0321 08:55:13.422988  543705 net.go:770] primary dev: ETH0
I0321 08:55:13.423001  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:55:13.423014  543705 net.go:698] Add success.
I0321 08:55:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:55:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:55:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 08:55:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:55:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 08:55:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:55:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:55:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:55:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:55:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:55:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:55:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:55:23.409786  543705 memory.go:184] no items to output this cycle
I0321 08:55:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 08:55:28.695118  543705 disk_info.go:125] begin check local disk info of client
I0321 08:55:28.697610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:55:28.697618  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472fc0 0xc000473000]
E0321 08:55:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:55:33.409811  543705 memory.go:184] no items to output this cycle
I0321 08:55:33.409824  543705 cpu.go:275] no items to output this cycle
E0321 08:55:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:55:43.409805  543705 memory.go:191] Add success.
I0321 08:55:43.409825  543705 cpu.go:282] Add success.
I0321 08:55:43.420029  543705 net.go:648] Add success.
I0321 08:55:43.422582  543705 net.go:770] primary dev: ETH0
I0321 08:55:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:55:43.422611  543705 net.go:698] Add success.
I0321 08:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:55:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:55:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:55:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:55:53.409777  543705 memory.go:184] no items to output this cycle
I0321 08:55:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 08:56:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:56:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 08:56:03.409785  543705 memory.go:184] no items to output this cycle
E0321 08:56:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:56:13.409790  543705 memory.go:191] Add success.
I0321 08:56:13.409793  543705 cpu.go:282] Add success.
W0321 08:56:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:56:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:56:13.420154  543705 net.go:648] Add success.
I0321 08:56:13.422771  543705 net.go:770] primary dev: ETH0
I0321 08:56:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:56:13.422796  543705 net.go:698] Add success.
I0321 08:56:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:56:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:56:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 08:56:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:56:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 08:56:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:56:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:56:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:56:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:56:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:56:23.409787  543705 memory.go:184] no items to output this cycle
I0321 08:56:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 08:56:28.697673  543705 disk_info.go:125] begin check local disk info of client
I0321 08:56:28.700166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:56:28.700173  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d8c0 0xc00037d900]
E0321 08:56:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:56:33.409901  543705 memory.go:184] no items to output this cycle
I0321 08:56:33.409933  543705 cpu.go:275] no items to output this cycle
E0321 08:56:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:56:43.409824  543705 memory.go:191] Add success.
I0321 08:56:43.409836  543705 cpu.go:282] Add success.
I0321 08:56:43.420028  543705 net.go:648] Add success.
I0321 08:56:43.422703  543705 net.go:770] primary dev: ETH0
I0321 08:56:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:56:43.422732  543705 net.go:698] Add success.
I0321 08:56:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:56:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:56:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:56:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:56:53.409784  543705 memory.go:184] no items to output this cycle
I0321 08:56:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 08:57:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:57:03.409798  543705 memory.go:184] no items to output this cycle
I0321 08:57:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 08:57:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:57:13.409776  543705 memory.go:191] Add success.
I0321 08:57:13.409795  543705 cpu.go:282] Add success.
W0321 08:57:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:57:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:57:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:57:13.420107  543705 net.go:648] Add success.
I0321 08:57:13.429375  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 08:57:13.429450  543705 net.go:770] primary dev: ETH0
I0321 08:57:13.429463  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:57:13.429474  543705 net.go:698] Add success.
I0321 08:57:13.453013  543705 event_worker.go:152] Polling the log file for events...
I0321 08:57:13.464293  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab87205b-8470-43c5-a57c-f4f3f779c1a1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 08:57:13.464328  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 08:57:14.455243  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:57:14.455260  543705 disk_worker.go:708] disk space is not compliant
W0321 08:57:14.455265  543705 disk_worker.go:728] disk inode is not compliant
E0321 08:57:14.455864  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 08:57:14.455874  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 08:57:14.455879  543705 custom_config.go:64] query custom config with name: gpu
I0321 08:57:14.456788  543705 disk_worker.go:494] system disk:vda1
I0321 08:57:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 08:57:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 08:57:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:57:16.457905  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 08:57:16.457904  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 08:57:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:57:16.457986  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:57:16.472303  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:57:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:57:23.409767  543705 memory.go:184] no items to output this cycle
I0321 08:57:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 08:57:28.701152  543705 disk_info.go:125] begin check local disk info of client
I0321 08:57:28.703665  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:57:28.703672  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bc540 0xc0004bc580]
E0321 08:57:33.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:57:33.409886  543705 memory.go:184] no items to output this cycle
I0321 08:57:33.410000  543705 cpu.go:275] no items to output this cycle
I0321 08:57:38.803880  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 08:57:38.803886  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 08:57:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:57:43.410637  543705 memory.go:191] Add success.
I0321 08:57:43.409816  543705 cpu.go:282] Add success.
I0321 08:57:43.420424  543705 net.go:648] Add success.
I0321 08:57:43.423037  543705 net.go:770] primary dev: ETH0
I0321 08:57:43.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:57:43.423064  543705 net.go:698] Add success.
I0321 08:57:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:57:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:57:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:57:53.409779  543705 memory.go:184] no items to output this cycle
I0321 08:57:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 08:58:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:58:03.409775  543705 memory.go:184] no items to output this cycle
I0321 08:58:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 08:58:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:58:13.409794  543705 memory.go:191] Add success.
I0321 08:58:13.409794  543705 cpu.go:282] Add success.
W0321 08:58:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:58:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:58:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:58:13.420047  543705 net.go:648] Add success.
I0321 08:58:13.422823  543705 net.go:770] primary dev: ETH0
I0321 08:58:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:58:13.422848  543705 net.go:698] Add success.
I0321 08:58:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:58:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:58:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 08:58:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:58:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 08:58:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:58:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:58:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:58:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:58:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:58:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:58:23.409786  543705 memory.go:184] no items to output this cycle
I0321 08:58:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 08:58:28.705162  543705 disk_info.go:125] begin check local disk info of client
I0321 08:58:28.707719  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:58:28.707730  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270000 0xc000270040]
E0321 08:58:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:58:33.409905  543705 memory.go:184] no items to output this cycle
I0321 08:58:33.409942  543705 cpu.go:275] no items to output this cycle
E0321 08:58:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:58:43.409800  543705 memory.go:191] Add success.
I0321 08:58:43.409833  543705 cpu.go:282] Add success.
I0321 08:58:43.419991  543705 net.go:648] Add success.
I0321 08:58:43.422451  543705 net.go:770] primary dev: ETH0
I0321 08:58:43.422463  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:58:43.422476  543705 net.go:698] Add success.
I0321 08:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:58:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:58:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:58:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:58:53.409797  543705 memory.go:184] no items to output this cycle
I0321 08:58:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 08:59:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:59:03.409811  543705 memory.go:184] no items to output this cycle
I0321 08:59:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 08:59:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:59:13.409775  543705 memory.go:191] Add success.
I0321 08:59:13.409800  543705 cpu.go:282] Add success.
W0321 08:59:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 08:59:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 08:59:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 08:59:13.420680  543705 net.go:648] Add success.
I0321 08:59:13.423371  543705 net.go:770] primary dev: ETH0
I0321 08:59:13.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:59:13.423395  543705 net.go:698] Add success.
I0321 08:59:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 08:59:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 08:59:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0321 08:59:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 08:59:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 08:59:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 08:59:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 08:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 08:59:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 08:59:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:59:23.409781  543705 memory.go:184] no items to output this cycle
I0321 08:59:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 08:59:28.709229  543705 disk_info.go:125] begin check local disk info of client
I0321 08:59:28.711787  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 08:59:28.711794  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004accc0 0xc0004acd00]
E0321 08:59:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:59:33.409867  543705 memory.go:184] no items to output this cycle
I0321 08:59:33.409930  543705 cpu.go:275] no items to output this cycle
E0321 08:59:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:59:43.409831  543705 memory.go:191] Add success.
I0321 08:59:43.409839  543705 cpu.go:282] Add success.
I0321 08:59:43.420006  543705 net.go:648] Add success.
I0321 08:59:43.422687  543705 net.go:770] primary dev: ETH0
I0321 08:59:43.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0321 08:59:43.422713  543705 net.go:698] Add success.
I0321 08:59:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 08:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 08:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 08:59:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 08:59:53.409805  543705 memory.go:184] no items to output this cycle
I0321 08:59:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 09:00:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:00:03.409781  543705 memory.go:184] no items to output this cycle
I0321 09:00:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 09:00:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:00:13.409793  543705 memory.go:191] Add success.
I0321 09:00:13.409794  543705 cpu.go:282] Add success.
W0321 09:00:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:00:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:00:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:00:13.420034  543705 net.go:648] Add success.
I0321 09:00:13.422658  543705 net.go:770] primary dev: ETH0
I0321 09:00:13.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:00:13.422682  543705 net.go:698] Add success.
I0321 09:00:13.536072  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"623ed74d-43b8-4d03-8d2e-21567e9728f9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:00:13.536107  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:00:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:00:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:00:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 09:00:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:00:14.456623  543705 disk_worker.go:494] system disk:vda1
I0321 09:00:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:00:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:00:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:00:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:00:23.409787  543705 memory.go:184] no items to output this cycle
I0321 09:00:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 09:00:28.713213  543705 disk_info.go:125] begin check local disk info of client
I0321 09:00:28.715732  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:00:28.715739  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492940 0xc000492980]
E0321 09:00:33.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:00:33.409890  543705 memory.go:184] no items to output this cycle
I0321 09:00:33.409959  543705 cpu.go:275] no items to output this cycle
I0321 09:00:38.804035  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:00:38.804042  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:00:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:00:43.410697  543705 memory.go:191] Add success.
I0321 09:00:43.409822  543705 cpu.go:282] Add success.
I0321 09:00:43.420456  543705 net.go:648] Add success.
I0321 09:00:43.423185  543705 net.go:770] primary dev: ETH0
I0321 09:00:43.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:00:43.423211  543705 net.go:698] Add success.
I0321 09:00:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:00:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:00:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:00:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:00:53.409776  543705 memory.go:184] no items to output this cycle
I0321 09:00:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 09:01:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:01:03.409786  543705 memory.go:184] no items to output this cycle
I0321 09:01:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 09:01:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:01:13.409794  543705 memory.go:191] Add success.
I0321 09:01:13.409796  543705 cpu.go:282] Add success.
W0321 09:01:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:01:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:01:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:01:13.420221  543705 net.go:648] Add success.
I0321 09:01:13.423204  543705 net.go:770] primary dev: ETH0
I0321 09:01:13.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:01:13.423231  543705 net.go:698] Add success.
I0321 09:01:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:01:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:01:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 09:01:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:01:14.456521  543705 disk_worker.go:494] system disk:vda1
I0321 09:01:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:01:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:01:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:01:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:01:23.409774  543705 memory.go:184] no items to output this cycle
I0321 09:01:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 09:01:28.717227  543705 disk_info.go:125] begin check local disk info of client
I0321 09:01:28.719732  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:01:28.719739  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052a3c0 0xc00052a400]
E0321 09:01:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:01:33.409911  543705 memory.go:184] no items to output this cycle
I0321 09:01:33.409922  543705 cpu.go:275] no items to output this cycle
E0321 09:01:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:01:43.409801  543705 memory.go:191] Add success.
I0321 09:01:43.409824  543705 cpu.go:282] Add success.
I0321 09:01:43.420024  543705 net.go:648] Add success.
I0321 09:01:43.422724  543705 net.go:770] primary dev: ETH0
I0321 09:01:43.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:01:43.422753  543705 net.go:698] Add success.
I0321 09:01:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:01:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:01:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:01:53.409794  543705 memory.go:184] no items to output this cycle
I0321 09:01:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 09:02:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:02:03.409771  543705 memory.go:184] no items to output this cycle
I0321 09:02:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 09:02:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:02:13.409790  543705 memory.go:191] Add success.
I0321 09:02:13.409793  543705 cpu.go:282] Add success.
W0321 09:02:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:02:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:02:13.420040  543705 net.go:648] Add success.
I0321 09:02:13.422933  543705 net.go:770] primary dev: ETH0
I0321 09:02:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:02:13.422962  543705 net.go:698] Add success.
W0321 09:02:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:02:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 09:02:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:02:14.456892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:02:14.456901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:02:14.456908  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:02:14.456982  543705 disk_worker.go:494] system disk:vda1
I0321 09:02:14.457010  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:02:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:02:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:02:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:02:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:02:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:02:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:02:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:02:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:02:23.409775  543705 memory.go:184] no items to output this cycle
I0321 09:02:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 09:02:28.721243  543705 disk_info.go:125] begin check local disk info of client
I0321 09:02:28.723747  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:02:28.723754  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8d00 0xc0003c8d40]
E0321 09:02:33.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:02:33.409915  543705 memory.go:184] no items to output this cycle
I0321 09:02:33.410065  543705 cpu.go:275] no items to output this cycle
E0321 09:02:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:02:43.409799  543705 memory.go:191] Add success.
I0321 09:02:43.409820  543705 cpu.go:282] Add success.
I0321 09:02:43.420027  543705 net.go:648] Add success.
I0321 09:02:43.423062  543705 net.go:770] primary dev: ETH0
I0321 09:02:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:02:43.423091  543705 net.go:698] Add success.
I0321 09:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:02:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:02:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:02:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:02:53.409763  543705 memory.go:184] no items to output this cycle
I0321 09:02:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 09:03:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:03:03.409796  543705 memory.go:184] no items to output this cycle
I0321 09:03:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 09:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:03:13.409780  543705 memory.go:191] Add success.
W0321 09:03:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 09:03:13.409808  543705 cpu.go:282] Add success.
W0321 09:03:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:03:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:03:13.420052  543705 net.go:648] Add success.
I0321 09:03:13.423325  543705 net.go:770] primary dev: ETH0
I0321 09:03:13.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:03:13.423351  543705 net.go:698] Add success.
I0321 09:03:13.472133  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"be7f9c9e-23a1-4c7e-bc50-b0673a6e7e0a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:03:13.472165  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:03:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:03:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 09:03:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:03:14.456546  543705 disk_worker.go:494] system disk:vda1
I0321 09:03:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:03:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:03:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:03:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:03:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:03:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:03:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:03:23.409801  543705 memory.go:184] no items to output this cycle
I0321 09:03:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 09:03:28.725266  543705 disk_info.go:125] begin check local disk info of client
I0321 09:03:28.727792  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:03:28.727798  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2a40 0xc0003b2a80]
E0321 09:03:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:03:33.409777  543705 memory.go:184] no items to output this cycle
I0321 09:03:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 09:03:38.805729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:03:38.805735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:03:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:03:43.410737  543705 memory.go:191] Add success.
I0321 09:03:43.409832  543705 cpu.go:282] Add success.
I0321 09:03:43.420446  543705 net.go:648] Add success.
I0321 09:03:43.423505  543705 net.go:770] primary dev: ETH0
I0321 09:03:43.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:03:43.423534  543705 net.go:698] Add success.
I0321 09:03:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:03:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:03:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:03:53.409811  543705 memory.go:184] no items to output this cycle
I0321 09:03:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 09:04:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:04:03.409804  543705 memory.go:184] no items to output this cycle
I0321 09:04:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 09:04:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:04:13.409816  543705 memory.go:191] Add success.
I0321 09:04:13.409824  543705 cpu.go:282] Add success.
W0321 09:04:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:04:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:04:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:04:13.420056  543705 net.go:648] Add success.
I0321 09:04:13.422946  543705 net.go:770] primary dev: ETH0
I0321 09:04:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:04:13.422975  543705 net.go:698] Add success.
I0321 09:04:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:04:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:04:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 09:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:04:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 09:04:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:04:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:04:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:04:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:04:23.409783  543705 memory.go:184] no items to output this cycle
I0321 09:04:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 09:04:28.729285  543705 disk_info.go:125] begin check local disk info of client
I0321 09:04:28.731813  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:04:28.731819  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376e00 0xc000376e40]
E0321 09:04:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:04:33.409779  543705 memory.go:184] no items to output this cycle
I0321 09:04:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 09:04:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:04:43.409811  543705 memory.go:191] Add success.
I0321 09:04:43.409813  543705 cpu.go:282] Add success.
I0321 09:04:43.420098  543705 net.go:648] Add success.
I0321 09:04:43.422911  543705 net.go:770] primary dev: ETH0
I0321 09:04:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:04:43.422936  543705 net.go:698] Add success.
I0321 09:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:04:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:04:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:04:53.409762  543705 memory.go:184] no items to output this cycle
I0321 09:04:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 09:05:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:05:03.409784  543705 memory.go:184] no items to output this cycle
I0321 09:05:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 09:05:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:05:13.409771  543705 memory.go:191] Add success.
W0321 09:05:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 09:05:13.409803  543705 cpu.go:282] Add success.
W0321 09:05:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:05:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:05:13.420056  543705 net.go:648] Add success.
I0321 09:05:13.423345  543705 net.go:770] primary dev: ETH0
I0321 09:05:13.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:05:13.423370  543705 net.go:698] Add success.
I0321 09:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:05:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:05:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 09:05:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:05:14.456484  543705 disk_worker.go:494] system disk:vda1
I0321 09:05:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:05:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:05:16.472491  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:05:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:05:23.409799  543705 memory.go:184] no items to output this cycle
I0321 09:05:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 09:05:28.733302  543705 disk_info.go:125] begin check local disk info of client
I0321 09:05:28.735840  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:05:28.735847  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003753c0 0xc000375400]
E0321 09:05:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:05:33.409780  543705 memory.go:184] no items to output this cycle
I0321 09:05:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 09:05:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:05:43.409790  543705 memory.go:191] Add success.
I0321 09:05:43.409808  543705 cpu.go:282] Add success.
I0321 09:05:43.419983  543705 net.go:648] Add success.
I0321 09:05:43.423673  543705 net.go:770] primary dev: ETH0
I0321 09:05:43.423690  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:05:43.423706  543705 net.go:698] Add success.
I0321 09:05:46.457807  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:05:46.457870  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:05:46.457894  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:05:53.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:05:53.409883  543705 memory.go:184] no items to output this cycle
I0321 09:05:53.409943  543705 cpu.go:275] no items to output this cycle
E0321 09:06:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:06:03.409779  543705 memory.go:184] no items to output this cycle
I0321 09:06:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 09:06:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:06:13.409807  543705 memory.go:191] Add success.
I0321 09:06:13.409819  543705 cpu.go:282] Add success.
W0321 09:06:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:06:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:06:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:06:13.420155  543705 net.go:648] Add success.
I0321 09:06:13.422956  543705 net.go:770] primary dev: ETH0
I0321 09:06:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:06:13.422980  543705 net.go:698] Add success.
I0321 09:06:13.700547  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a4e2a732-a6a4-4257-92f1-9baca6c4e4f3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:06:13.700585  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:06:14.454682  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:06:14.454928  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:06:14.454939  543705 disk_worker.go:708] disk space is not compliant
W0321 09:06:14.454942  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:06:14.456474  543705 disk_worker.go:494] system disk:vda1
I0321 09:06:14.456509  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:06:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:06:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:06:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:06:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:06:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:06:23.409808  543705 memory.go:184] no items to output this cycle
I0321 09:06:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 09:06:28.737325  543705 disk_info.go:125] begin check local disk info of client
I0321 09:06:28.739863  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:06:28.739869  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8240 0xc0003d8280]
E0321 09:06:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:06:33.409808  543705 memory.go:184] no items to output this cycle
I0321 09:06:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 09:06:38.807726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:06:38.807733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:06:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:06:43.410611  543705 memory.go:191] Add success.
I0321 09:06:43.409820  543705 cpu.go:282] Add success.
I0321 09:06:43.420571  543705 net.go:648] Add success.
I0321 09:06:43.422981  543705 net.go:770] primary dev: ETH0
I0321 09:06:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:06:43.423006  543705 net.go:698] Add success.
I0321 09:06:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:06:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:06:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:06:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:06:53.409774  543705 memory.go:184] no items to output this cycle
I0321 09:06:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 09:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:07:03.409774  543705 memory.go:184] no items to output this cycle
I0321 09:07:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 09:07:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:07:13.409789  543705 memory.go:191] Add success.
I0321 09:07:13.409812  543705 cpu.go:282] Add success.
W0321 09:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:07:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:07:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:07:13.420054  543705 net.go:648] Add success.
I0321 09:07:13.423282  543705 net.go:770] primary dev: ETH0
I0321 09:07:13.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:07:13.423324  543705 net.go:698] Add success.
I0321 09:07:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0321 09:07:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:07:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 09:07:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:07:14.456981  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:07:14.456992  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:07:14.456999  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:07:14.457049  543705 disk_worker.go:494] system disk:vda1
I0321 09:07:14.457093  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:07:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:07:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:07:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:07:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:07:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:07:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:07:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:07:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:07:23.409792  543705 memory.go:184] no items to output this cycle
I0321 09:07:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 09:07:28.741346  543705 disk_info.go:125] begin check local disk info of client
I0321 09:07:28.743810  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:07:28.743817  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358080 0xc0003580c0]
E0321 09:07:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:07:33.409792  543705 memory.go:184] no items to output this cycle
I0321 09:07:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 09:07:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:07:43.409793  543705 memory.go:191] Add success.
I0321 09:07:43.409810  543705 cpu.go:282] Add success.
I0321 09:07:43.419879  543705 net.go:648] Add success.
I0321 09:07:43.422837  543705 net.go:770] primary dev: ETH0
I0321 09:07:43.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:07:43.422864  543705 net.go:698] Add success.
I0321 09:07:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:07:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:07:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:07:53.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:07:53.409944  543705 cpu.go:275] no items to output this cycle
I0321 09:07:53.409982  543705 memory.go:184] no items to output this cycle
E0321 09:08:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:08:03.409783  543705 memory.go:184] no items to output this cycle
I0321 09:08:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 09:08:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:08:13.409788  543705 memory.go:191] Add success.
W0321 09:08:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:08:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:08:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:08:13.409875  543705 cpu.go:282] Add success.
I0321 09:08:13.420281  543705 net.go:648] Add success.
I0321 09:08:13.421214  543705 net.go:770] primary dev: ETH0
I0321 09:08:13.421227  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:08:13.421239  543705 net.go:698] Add success.
I0321 09:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:08:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:08:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 09:08:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:08:14.456639  543705 disk_worker.go:494] system disk:vda1
I0321 09:08:14.456670  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:08:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:08:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:08:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:08:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:08:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:08:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:08:23.409776  543705 memory.go:184] no items to output this cycle
I0321 09:08:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 09:08:28.745363  543705 disk_info.go:125] begin check local disk info of client
I0321 09:08:28.747879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:08:28.747886  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357740 0xc000357780]
E0321 09:08:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:08:33.409783  543705 memory.go:184] no items to output this cycle
I0321 09:08:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 09:08:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:08:43.409805  543705 memory.go:191] Add success.
I0321 09:08:43.409818  543705 cpu.go:282] Add success.
I0321 09:08:43.420063  543705 net.go:648] Add success.
I0321 09:08:43.422731  543705 net.go:770] primary dev: ETH0
I0321 09:08:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:08:43.422757  543705 net.go:698] Add success.
I0321 09:08:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:08:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:08:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:08:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:08:53.409800  543705 memory.go:184] no items to output this cycle
I0321 09:08:53.409908  543705 cpu.go:275] no items to output this cycle
E0321 09:09:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:09:03.409784  543705 memory.go:184] no items to output this cycle
I0321 09:09:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 09:09:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:09:13.409799  543705 cpu.go:282] Add success.
I0321 09:09:13.409805  543705 memory.go:191] Add success.
W0321 09:09:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:09:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:09:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:09:13.420138  543705 net.go:648] Add success.
I0321 09:09:13.422898  543705 net.go:770] primary dev: ETH0
I0321 09:09:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:09:13.422924  543705 net.go:698] Add success.
I0321 09:09:13.464409  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8d01d69d-b013-4c93-a005-86e9e9b42aeb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:09:13.464443  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:09:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:09:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:09:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 09:09:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:09:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 09:09:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:09:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:09:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:09:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:09:23.409794  543705 memory.go:184] no items to output this cycle
I0321 09:09:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 09:09:28.749382  543705 disk_info.go:125] begin check local disk info of client
I0321 09:09:28.751851  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:09:28.751858  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba480 0xc0003ba4c0]
E0321 09:09:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:09:33.409776  543705 memory.go:184] no items to output this cycle
I0321 09:09:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 09:09:38.809738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:09:38.809745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:09:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:09:43.410607  543705 memory.go:191] Add success.
I0321 09:09:43.409814  543705 cpu.go:282] Add success.
I0321 09:09:43.420302  543705 net.go:648] Add success.
I0321 09:09:43.422927  543705 net.go:770] primary dev: ETH0
I0321 09:09:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:09:43.422952  543705 net.go:698] Add success.
I0321 09:09:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:09:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:09:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:09:53.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:09:53.409910  543705 cpu.go:275] no items to output this cycle
I0321 09:09:53.409920  543705 memory.go:184] no items to output this cycle
E0321 09:10:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:10:03.409780  543705 memory.go:184] no items to output this cycle
I0321 09:10:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 09:10:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:10:13.409797  543705 cpu.go:282] Add success.
I0321 09:10:13.409807  543705 memory.go:191] Add success.
W0321 09:10:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:10:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:10:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:10:13.420160  543705 net.go:648] Add success.
I0321 09:10:13.422990  543705 net.go:770] primary dev: ETH0
I0321 09:10:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:10:13.423020  543705 net.go:698] Add success.
I0321 09:10:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:10:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:10:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 09:10:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:10:14.456558  543705 disk_worker.go:494] system disk:vda1
I0321 09:10:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:10:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:10:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:10:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:10:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:10:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:10:23.409785  543705 memory.go:184] no items to output this cycle
I0321 09:10:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 09:10:28.753393  543705 disk_info.go:125] begin check local disk info of client
I0321 09:10:28.755877  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:10:28.755883  543705 disk_info.go:196] parse disk info done, disk is : [0xc000251700 0xc000251740]
E0321 09:10:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:10:33.409804  543705 memory.go:184] no items to output this cycle
I0321 09:10:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 09:10:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:10:43.409805  543705 memory.go:191] Add success.
I0321 09:10:43.409806  543705 cpu.go:282] Add success.
I0321 09:10:43.419930  543705 net.go:648] Add success.
I0321 09:10:43.422540  543705 net.go:770] primary dev: ETH0
I0321 09:10:43.422556  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:10:43.422571  543705 net.go:698] Add success.
I0321 09:10:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:10:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:10:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:10:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:10:53.409797  543705 memory.go:184] no items to output this cycle
I0321 09:10:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 09:11:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:11:03.409788  543705 memory.go:184] no items to output this cycle
I0321 09:11:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 09:11:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:11:13.409795  543705 memory.go:191] Add success.
I0321 09:11:13.409798  543705 cpu.go:282] Add success.
W0321 09:11:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:11:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:11:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:11:13.420277  543705 net.go:648] Add success.
I0321 09:11:13.423092  543705 net.go:770] primary dev: ETH0
I0321 09:11:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:11:13.423122  543705 net.go:698] Add success.
I0321 09:11:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:11:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:11:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 09:11:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:11:14.456514  543705 disk_worker.go:494] system disk:vda1
I0321 09:11:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:11:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:11:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:11:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:11:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:11:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:11:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:11:23.409799  543705 memory.go:184] no items to output this cycle
I0321 09:11:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 09:11:28.757420  543705 disk_info.go:125] begin check local disk info of client
I0321 09:11:28.759927  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:11:28.759933  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaac0 0xc0001aab00]
E0321 09:11:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:11:33.409801  543705 memory.go:184] no items to output this cycle
I0321 09:11:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 09:11:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:11:43.409833  543705 memory.go:191] Add success.
I0321 09:11:43.409840  543705 cpu.go:282] Add success.
I0321 09:11:43.419955  543705 net.go:648] Add success.
I0321 09:11:43.422684  543705 net.go:770] primary dev: ETH0
I0321 09:11:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:11:43.422722  543705 net.go:698] Add success.
I0321 09:11:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:11:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:11:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:11:53.409807  543705 memory.go:184] no items to output this cycle
I0321 09:11:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 09:12:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:12:03.409794  543705 cpu.go:275] no items to output this cycle
I0321 09:12:03.409796  543705 memory.go:184] no items to output this cycle
I0321 09:12:13.409914  543705 cpu.go:282] Add success.
E0321 09:12:13.410080  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:12:13.410099  543705 memory.go:191] Add success.
W0321 09:12:13.410126  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:12:13.410138  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:12:13.410141  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:12:13.419708  543705 net.go:648] Add success.
I0321 09:12:13.422813  543705 net.go:770] primary dev: ETH0
I0321 09:12:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:12:13.422837  543705 net.go:698] Add success.
I0321 09:12:13.555106  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"20364ebc-328c-48a0-b485-da7d7f0315b3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:12:13.555137  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 09:12:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:12:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 09:12:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:12:14.456801  543705 disk_worker.go:494] system disk:vda1
I0321 09:12:14.456841  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:12:14.457137  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:12:14.457145  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:12:14.457149  543705 custom_config.go:64] query custom config with name: gpu
E0321 09:12:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:12:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:12:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:12:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:12:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:12:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:12:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:12:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:12:23.409773  543705 memory.go:184] no items to output this cycle
I0321 09:12:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 09:12:28.761444  543705 disk_info.go:125] begin check local disk info of client
I0321 09:12:28.763957  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:12:28.763964  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003529c0 0xc000352a00]
E0321 09:12:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:12:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 09:12:33.409803  543705 memory.go:184] no items to output this cycle
I0321 09:12:38.811749  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:12:38.811757  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:12:43.409922  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:12:43.411080  543705 memory.go:191] Add success.
I0321 09:12:43.409950  543705 cpu.go:282] Add success.
I0321 09:12:43.419828  543705 net.go:648] Add success.
I0321 09:12:43.422623  543705 net.go:770] primary dev: ETH0
I0321 09:12:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:12:43.422648  543705 net.go:698] Add success.
I0321 09:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:12:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:12:53.410200  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:12:53.410217  543705 memory.go:184] no items to output this cycle
I0321 09:12:53.410221  543705 cpu.go:275] no items to output this cycle
E0321 09:13:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:13:03.409799  543705 memory.go:184] no items to output this cycle
I0321 09:13:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 09:13:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:13:13.409831  543705 memory.go:191] Add success.
I0321 09:13:13.409839  543705 cpu.go:282] Add success.
W0321 09:13:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:13:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:13:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:13:13.420274  543705 net.go:648] Add success.
I0321 09:13:13.423049  543705 net.go:770] primary dev: ETH0
I0321 09:13:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:13:13.423079  543705 net.go:698] Add success.
I0321 09:13:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:13:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:13:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 09:13:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:13:14.456793  543705 disk_worker.go:494] system disk:vda1
I0321 09:13:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:13:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:13:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:13:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:13:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:13:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:13:23.409761  543705 memory.go:184] no items to output this cycle
I0321 09:13:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 09:13:28.765482  543705 disk_info.go:125] begin check local disk info of client
I0321 09:13:28.767975  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:13:28.767981  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353740 0xc000353780]
E0321 09:13:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:13:33.409766  543705 memory.go:184] no items to output this cycle
I0321 09:13:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 09:13:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:13:43.409822  543705 memory.go:191] Add success.
I0321 09:13:43.409826  543705 cpu.go:282] Add success.
I0321 09:13:43.420005  543705 net.go:648] Add success.
I0321 09:13:43.422508  543705 net.go:770] primary dev: ETH0
I0321 09:13:43.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:13:43.422533  543705 net.go:698] Add success.
I0321 09:13:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:13:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:13:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:13:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:13:53.409783  543705 memory.go:184] no items to output this cycle
I0321 09:13:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 09:14:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:14:03.409909  543705 memory.go:184] no items to output this cycle
I0321 09:14:03.409944  543705 cpu.go:275] no items to output this cycle
E0321 09:14:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:14:13.409800  543705 memory.go:191] Add success.
I0321 09:14:13.409802  543705 cpu.go:282] Add success.
W0321 09:14:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:14:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:14:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:14:13.420170  543705 net.go:648] Add success.
I0321 09:14:13.423424  543705 net.go:770] primary dev: ETH0
I0321 09:14:13.423437  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:14:13.423448  543705 net.go:698] Add success.
I0321 09:14:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:14:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:14:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 09:14:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:14:14.456498  543705 disk_worker.go:494] system disk:vda1
I0321 09:14:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:14:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:14:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:14:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:14:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:14:16.472487  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:14:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:14:23.409763  543705 memory.go:184] no items to output this cycle
I0321 09:14:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 09:14:28.769480  543705 disk_info.go:125] begin check local disk info of client
I0321 09:14:28.771978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:14:28.771984  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4380 0xc0000c43c0]
E0321 09:14:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:14:33.409764  543705 memory.go:184] no items to output this cycle
I0321 09:14:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 09:14:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:14:43.409844  543705 memory.go:191] Add success.
I0321 09:14:43.409812  543705 cpu.go:282] Add success.
I0321 09:14:43.420217  543705 net.go:648] Add success.
I0321 09:14:43.421276  543705 net.go:770] primary dev: ETH0
I0321 09:14:43.421289  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:14:43.421303  543705 net.go:698] Add success.
I0321 09:14:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:14:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:14:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:14:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:14:53.409770  543705 memory.go:184] no items to output this cycle
I0321 09:14:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 09:15:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:15:03.409769  543705 memory.go:184] no items to output this cycle
I0321 09:15:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 09:15:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:15:13.409804  543705 memory.go:191] Add success.
I0321 09:15:13.409805  543705 cpu.go:282] Add success.
W0321 09:15:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:15:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:15:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:15:13.420158  543705 net.go:648] Add success.
I0321 09:15:13.423050  543705 net.go:770] primary dev: ETH0
I0321 09:15:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:15:13.423076  543705 net.go:698] Add success.
I0321 09:15:13.464458  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5512e2ad-e96d-4ccc-a330-1f93047e524c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:15:13.464493  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:15:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:15:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 09:15:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:15:14.456596  543705 disk_worker.go:494] system disk:vda1
I0321 09:15:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:15:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:15:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:15:23.409776  543705 memory.go:184] no items to output this cycle
I0321 09:15:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 09:15:28.773506  543705 disk_info.go:125] begin check local disk info of client
I0321 09:15:28.775990  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:15:28.775995  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab440 0xc0001ab480]
E0321 09:15:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:15:33.409796  543705 memory.go:184] no items to output this cycle
I0321 09:15:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 09:15:38.813732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:15:38.813739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:15:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:15:43.410624  543705 memory.go:191] Add success.
I0321 09:15:43.409830  543705 cpu.go:282] Add success.
I0321 09:15:43.420334  543705 net.go:648] Add success.
I0321 09:15:43.423503  543705 net.go:770] primary dev: ETH0
I0321 09:15:43.423516  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:15:43.423529  543705 net.go:698] Add success.
I0321 09:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:15:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:15:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:15:53.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:15:53.410424  543705 memory.go:184] no items to output this cycle
I0321 09:15:53.410438  543705 cpu.go:275] no items to output this cycle
E0321 09:16:03.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:16:03.409852  543705 memory.go:184] no items to output this cycle
I0321 09:16:03.409923  543705 cpu.go:275] no items to output this cycle
E0321 09:16:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:16:13.409802  543705 memory.go:191] Add success.
I0321 09:16:13.409804  543705 cpu.go:282] Add success.
W0321 09:16:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:16:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:16:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:16:13.420247  543705 net.go:648] Add success.
I0321 09:16:13.423234  543705 net.go:770] primary dev: ETH0
I0321 09:16:13.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:16:13.423261  543705 net.go:698] Add success.
I0321 09:16:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:16:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:16:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 09:16:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:16:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 09:16:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:16:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:16:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:16:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:16:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:16:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:16:23.409768  543705 memory.go:184] no items to output this cycle
I0321 09:16:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 09:16:28.777540  543705 disk_info.go:125] begin check local disk info of client
I0321 09:16:28.780000  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:16:28.780006  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab9c0 0xc0001aba00]
E0321 09:16:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:16:33.409794  543705 memory.go:184] no items to output this cycle
I0321 09:16:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 09:16:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:16:43.409826  543705 memory.go:191] Add success.
I0321 09:16:43.409833  543705 cpu.go:282] Add success.
I0321 09:16:43.420248  543705 net.go:648] Add success.
I0321 09:16:43.423393  543705 net.go:770] primary dev: ETH0
I0321 09:16:43.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:16:43.423431  543705 net.go:698] Add success.
I0321 09:16:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:16:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:16:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:16:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:16:53.409793  543705 memory.go:184] no items to output this cycle
I0321 09:16:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 09:17:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:17:03.409860  543705 memory.go:184] no items to output this cycle
I0321 09:17:03.409942  543705 cpu.go:275] no items to output this cycle
E0321 09:17:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:17:13.409802  543705 memory.go:191] Add success.
I0321 09:17:13.409802  543705 cpu.go:282] Add success.
W0321 09:17:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:17:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:17:13.420198  543705 net.go:648] Add success.
I0321 09:17:13.422900  543705 net.go:770] primary dev: ETH0
I0321 09:17:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:17:13.422934  543705 net.go:698] Add success.
I0321 09:17:13.453503  543705 event_worker.go:152] Polling the log file for events...
W0321 09:17:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:17:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 09:17:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:17:14.456950  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:17:14.456960  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:17:14.456966  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:17:14.457001  543705 disk_worker.go:494] system disk:vda1
I0321 09:17:14.457027  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:17:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:17:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:17:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:17:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:17:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:17:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:17:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:17:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:17:23.409792  543705 memory.go:184] no items to output this cycle
I0321 09:17:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 09:17:28.781537  543705 disk_info.go:125] begin check local disk info of client
I0321 09:17:28.783977  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:17:28.783983  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003147c0 0xc000314800]
E0321 09:17:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:17:33.409795  543705 memory.go:184] no items to output this cycle
I0321 09:17:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 09:17:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:17:43.409792  543705 memory.go:191] Add success.
I0321 09:17:43.409826  543705 cpu.go:282] Add success.
I0321 09:17:43.419955  543705 net.go:648] Add success.
I0321 09:17:43.422712  543705 net.go:770] primary dev: ETH0
I0321 09:17:43.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:17:43.422738  543705 net.go:698] Add success.
I0321 09:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:17:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:17:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:17:53.409781  543705 memory.go:184] no items to output this cycle
I0321 09:17:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 09:18:03.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:18:03.409909  543705 memory.go:184] no items to output this cycle
I0321 09:18:03.409924  543705 cpu.go:275] no items to output this cycle
E0321 09:18:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:18:13.409794  543705 memory.go:191] Add success.
W0321 09:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:18:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:18:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:18:13.409840  543705 cpu.go:282] Add success.
I0321 09:18:13.420078  543705 net.go:648] Add success.
I0321 09:18:13.423240  543705 net.go:770] primary dev: ETH0
I0321 09:18:13.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:18:13.423265  543705 net.go:698] Add success.
I0321 09:18:13.468831  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ecf341b3-a187-49be-9004-93ef51132b9f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:18:13.468866  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:18:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:18:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:18:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 09:18:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:18:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 09:18:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:18:15.455608  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:18:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:18:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:18:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:18:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:18:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:18:23.409759  543705 memory.go:184] no items to output this cycle
I0321 09:18:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 09:18:28.785574  543705 disk_info.go:125] begin check local disk info of client
I0321 09:18:28.788066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:18:28.788073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003142c0 0xc000314300]
E0321 09:18:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:18:33.409795  543705 memory.go:184] no items to output this cycle
I0321 09:18:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 09:18:38.813876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:18:38.813883  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:18:43.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:18:43.410714  543705 memory.go:191] Add success.
I0321 09:18:43.409847  543705 cpu.go:282] Add success.
I0321 09:18:43.420464  543705 net.go:648] Add success.
I0321 09:18:43.422922  543705 net.go:770] primary dev: ETH0
I0321 09:18:43.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:18:43.422950  543705 net.go:698] Add success.
I0321 09:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:18:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:18:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:18:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:18:53.409807  543705 memory.go:184] no items to output this cycle
I0321 09:18:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 09:19:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:19:03.409788  543705 memory.go:184] no items to output this cycle
I0321 09:19:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 09:19:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:19:13.409796  543705 memory.go:191] Add success.
I0321 09:19:13.409802  543705 cpu.go:282] Add success.
W0321 09:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:19:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:19:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:19:13.420213  543705 net.go:648] Add success.
I0321 09:19:13.422731  543705 net.go:770] primary dev: ETH0
I0321 09:19:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:19:13.422757  543705 net.go:698] Add success.
I0321 09:19:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:19:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:19:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 09:19:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:19:14.456542  543705 disk_worker.go:494] system disk:vda1
I0321 09:19:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:19:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:19:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:19:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:19:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:19:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:19:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:19:23.409758  543705 memory.go:184] no items to output this cycle
I0321 09:19:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 09:19:28.789577  543705 disk_info.go:125] begin check local disk info of client
I0321 09:19:28.792052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:19:28.792058  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c5c40 0xc0001c5c80]
E0321 09:19:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:19:33.409766  543705 memory.go:184] no items to output this cycle
I0321 09:19:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 09:19:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:19:43.409795  543705 memory.go:191] Add success.
I0321 09:19:43.409799  543705 cpu.go:282] Add success.
I0321 09:19:43.420005  543705 net.go:648] Add success.
I0321 09:19:43.423265  543705 net.go:770] primary dev: ETH0
I0321 09:19:43.423278  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:19:43.423291  543705 net.go:698] Add success.
I0321 09:19:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:19:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:19:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:19:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:19:53.409767  543705 memory.go:184] no items to output this cycle
I0321 09:19:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 09:20:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:20:03.409818  543705 memory.go:184] no items to output this cycle
I0321 09:20:03.409831  543705 cpu.go:275] no items to output this cycle
E0321 09:20:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:20:13.409808  543705 memory.go:191] Add success.
I0321 09:20:13.409831  543705 cpu.go:282] Add success.
W0321 09:20:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:20:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:20:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:20:13.420391  543705 net.go:648] Add success.
I0321 09:20:13.423162  543705 net.go:770] primary dev: ETH0
I0321 09:20:13.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:20:13.423191  543705 net.go:698] Add success.
I0321 09:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:20:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:20:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 09:20:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:20:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 09:20:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:20:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:20:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:20:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:20:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:20:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:20:23.409794  543705 memory.go:184] no items to output this cycle
I0321 09:20:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 09:20:28.793602  543705 disk_info.go:125] begin check local disk info of client
I0321 09:20:28.796099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:20:28.796105  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393900 0xc000393940]
E0321 09:20:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:20:33.409795  543705 memory.go:184] no items to output this cycle
I0321 09:20:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 09:20:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:20:43.409826  543705 memory.go:191] Add success.
I0321 09:20:43.409830  543705 cpu.go:282] Add success.
I0321 09:20:43.419983  543705 net.go:648] Add success.
I0321 09:20:43.422611  543705 net.go:770] primary dev: ETH0
I0321 09:20:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:20:43.422638  543705 net.go:698] Add success.
I0321 09:20:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:20:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:20:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:20:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:20:53.409782  543705 memory.go:184] no items to output this cycle
I0321 09:20:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 09:21:03.409912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:21:03.409935  543705 memory.go:184] no items to output this cycle
I0321 09:21:03.409977  543705 cpu.go:275] no items to output this cycle
E0321 09:21:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:21:13.409804  543705 memory.go:191] Add success.
I0321 09:21:13.409808  543705 cpu.go:282] Add success.
W0321 09:21:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:21:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:21:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:21:13.420071  543705 net.go:648] Add success.
I0321 09:21:13.422861  543705 net.go:770] primary dev: ETH0
I0321 09:21:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:21:13.422888  543705 net.go:698] Add success.
I0321 09:21:13.469840  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9a9ce27-b83f-4416-9211-f0cee40f24cb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:21:13.469871  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:21:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:21:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:21:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0321 09:21:14.455242  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:21:14.456806  543705 disk_worker.go:494] system disk:vda1
I0321 09:21:14.456841  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:21:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:21:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:21:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:21:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:21:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:21:23.409770  543705 memory.go:184] no items to output this cycle
I0321 09:21:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 09:21:28.797614  543705 disk_info.go:125] begin check local disk info of client
I0321 09:21:28.800069  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:21:28.800076  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b000 0xc00007b040]
E0321 09:21:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:21:33.409775  543705 memory.go:184] no items to output this cycle
I0321 09:21:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 09:21:38.815774  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:21:38.815781  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:21:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:21:43.410730  543705 memory.go:191] Add success.
I0321 09:21:43.409837  543705 cpu.go:282] Add success.
I0321 09:21:43.420413  543705 net.go:648] Add success.
I0321 09:21:43.422877  543705 net.go:770] primary dev: ETH0
I0321 09:21:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:21:43.422905  543705 net.go:698] Add success.
I0321 09:21:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:21:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:21:53.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:21:53.409829  543705 memory.go:184] no items to output this cycle
I0321 09:21:53.409830  543705 cpu.go:275] no items to output this cycle
E0321 09:22:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:22:03.409801  543705 memory.go:184] no items to output this cycle
I0321 09:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 09:22:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:22:13.409790  543705 memory.go:191] Add success.
W0321 09:22:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:22:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:22:13.409831  543705 cpu.go:282] Add success.
I0321 09:22:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:22:13.420303  543705 net.go:648] Add success.
I0321 09:22:13.422942  543705 net.go:770] primary dev: ETH0
I0321 09:22:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:22:13.422971  543705 net.go:698] Add success.
W0321 09:22:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:22:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 09:22:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:22:14.456809  543705 disk_worker.go:494] system disk:vda1
I0321 09:22:14.456847  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:22:14.457104  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:22:14.457111  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:22:14.457116  543705 custom_config.go:64] query custom config with name: gpu
E0321 09:22:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:22:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:22:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:22:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:22:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:22:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:22:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:22:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:22:23.409788  543705 memory.go:184] no items to output this cycle
I0321 09:22:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 09:22:28.801638  543705 disk_info.go:125] begin check local disk info of client
I0321 09:22:28.804101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:22:28.804107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3c80 0xc0003b3cc0]
E0321 09:22:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:22:33.409802  543705 memory.go:184] no items to output this cycle
I0321 09:22:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 09:22:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:22:43.409794  543705 memory.go:191] Add success.
I0321 09:22:43.409827  543705 cpu.go:282] Add success.
I0321 09:22:43.419971  543705 net.go:648] Add success.
I0321 09:22:43.422819  543705 net.go:770] primary dev: ETH0
I0321 09:22:43.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:22:43.422845  543705 net.go:698] Add success.
I0321 09:22:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:22:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:22:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:22:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:22:53.409772  543705 memory.go:184] no items to output this cycle
I0321 09:22:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 09:23:03.409938  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:23:03.410088  543705 memory.go:184] no items to output this cycle
I0321 09:23:03.409955  543705 cpu.go:275] no items to output this cycle
E0321 09:23:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:23:13.409828  543705 memory.go:191] Add success.
I0321 09:23:13.409837  543705 cpu.go:282] Add success.
W0321 09:23:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:23:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:23:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:23:13.420306  543705 net.go:648] Add success.
I0321 09:23:13.422899  543705 net.go:770] primary dev: ETH0
I0321 09:23:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:23:13.422927  543705 net.go:698] Add success.
I0321 09:23:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:23:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:23:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 09:23:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:23:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 09:23:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:23:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:23:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:23:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:23:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:23:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:23:23.409764  543705 memory.go:184] no items to output this cycle
I0321 09:23:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 09:23:28.805666  543705 disk_info.go:125] begin check local disk info of client
I0321 09:23:28.808135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:23:28.808141  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353780 0xc0003537c0]
E0321 09:23:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:23:33.409766  543705 memory.go:184] no items to output this cycle
I0321 09:23:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 09:23:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:23:43.409824  543705 memory.go:191] Add success.
I0321 09:23:43.409833  543705 cpu.go:282] Add success.
I0321 09:23:43.419986  543705 net.go:648] Add success.
I0321 09:23:43.423126  543705 net.go:770] primary dev: ETH0
I0321 09:23:43.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:23:43.423153  543705 net.go:698] Add success.
I0321 09:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:23:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:23:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:23:53.409783  543705 memory.go:184] no items to output this cycle
I0321 09:23:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 09:24:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:24:03.409821  543705 memory.go:184] no items to output this cycle
I0321 09:24:03.409832  543705 cpu.go:275] no items to output this cycle
E0321 09:24:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:24:13.409793  543705 memory.go:191] Add success.
I0321 09:24:13.409801  543705 cpu.go:282] Add success.
W0321 09:24:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:24:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:24:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:24:13.420176  543705 net.go:648] Add success.
I0321 09:24:13.422590  543705 net.go:770] primary dev: ETH0
I0321 09:24:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:24:13.422615  543705 net.go:698] Add success.
I0321 09:24:13.463387  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7833abcd-6ce6-44b4-b132-0ced8725bf65","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:24:13.463420  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:24:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:24:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:24:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 09:24:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:24:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 09:24:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:24:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:24:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:24:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:24:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:24:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:24:23.409803  543705 memory.go:184] no items to output this cycle
I0321 09:24:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 09:24:28.809676  543705 disk_info.go:125] begin check local disk info of client
I0321 09:24:28.812140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:24:28.812145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b74c0 0xc0002b7500]
E0321 09:24:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:24:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 09:24:33.409781  543705 memory.go:184] no items to output this cycle
I0321 09:24:38.817734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:24:38.817740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:24:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:24:43.410876  543705 memory.go:191] Add success.
I0321 09:24:43.409871  543705 cpu.go:282] Add success.
I0321 09:24:43.420715  543705 net.go:648] Add success.
I0321 09:24:43.423444  543705 net.go:770] primary dev: ETH0
I0321 09:24:43.423460  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:24:43.423474  543705 net.go:698] Add success.
I0321 09:24:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:24:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:24:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:24:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:24:53.409802  543705 memory.go:184] no items to output this cycle
I0321 09:24:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 09:25:03.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:25:03.409878  543705 memory.go:184] no items to output this cycle
I0321 09:25:03.409946  543705 cpu.go:275] no items to output this cycle
E0321 09:25:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:25:13.409794  543705 memory.go:191] Add success.
I0321 09:25:13.409798  543705 cpu.go:282] Add success.
W0321 09:25:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:25:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:25:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:25:13.420179  543705 net.go:648] Add success.
I0321 09:25:13.423031  543705 net.go:770] primary dev: ETH0
I0321 09:25:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:25:13.423059  543705 net.go:698] Add success.
I0321 09:25:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:25:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:25:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 09:25:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:25:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 09:25:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:25:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:25:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:25:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:25:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:25:23.409772  543705 memory.go:184] no items to output this cycle
I0321 09:25:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 09:25:28.813672  543705 disk_info.go:125] begin check local disk info of client
I0321 09:25:28.816178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:25:28.816184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba6c0 0xc0002ba700]
E0321 09:25:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:25:33.409780  543705 memory.go:184] no items to output this cycle
I0321 09:25:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 09:25:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:25:43.409817  543705 memory.go:191] Add success.
I0321 09:25:43.409828  543705 cpu.go:282] Add success.
I0321 09:25:43.419984  543705 net.go:648] Add success.
I0321 09:25:43.422737  543705 net.go:770] primary dev: ETH0
I0321 09:25:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:25:43.422766  543705 net.go:698] Add success.
I0321 09:25:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:25:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:25:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:25:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:25:53.409810  543705 memory.go:184] no items to output this cycle
I0321 09:25:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 09:26:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:26:03.409813  543705 memory.go:184] no items to output this cycle
I0321 09:26:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 09:26:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:26:13.409831  543705 memory.go:191] Add success.
I0321 09:26:13.409846  543705 cpu.go:282] Add success.
W0321 09:26:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:26:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:26:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:26:13.420191  543705 net.go:648] Add success.
I0321 09:26:13.423106  543705 net.go:770] primary dev: ETH0
I0321 09:26:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:26:13.423141  543705 net.go:698] Add success.
I0321 09:26:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:26:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:26:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 09:26:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:26:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 09:26:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:26:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:26:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:26:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:26:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:26:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:26:23.409796  543705 memory.go:184] no items to output this cycle
I0321 09:26:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 09:26:28.817672  543705 disk_info.go:125] begin check local disk info of client
I0321 09:26:28.820145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:26:28.820152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384bc0 0xc000384c00]
E0321 09:26:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:26:33.409767  543705 memory.go:184] no items to output this cycle
I0321 09:26:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 09:26:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:26:43.409799  543705 memory.go:191] Add success.
I0321 09:26:43.409800  543705 cpu.go:282] Add success.
I0321 09:26:43.420072  543705 net.go:648] Add success.
I0321 09:26:43.422897  543705 net.go:770] primary dev: ETH0
I0321 09:26:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:26:43.422922  543705 net.go:698] Add success.
I0321 09:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:26:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:26:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:26:53.409785  543705 memory.go:184] no items to output this cycle
I0321 09:26:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 09:27:03.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:27:03.409895  543705 cpu.go:275] no items to output this cycle
I0321 09:27:03.409901  543705 memory.go:184] no items to output this cycle
E0321 09:27:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:27:13.409782  543705 memory.go:191] Add success.
W0321 09:27:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:27:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:27:13.409818  543705 cpu.go:282] Add success.
I0321 09:27:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:27:13.420290  543705 net.go:648] Add success.
I0321 09:27:13.429365  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 09:27:13.429447  543705 net.go:770] primary dev: ETH0
I0321 09:27:13.429460  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:27:13.429472  543705 net.go:698] Add success.
I0321 09:27:13.453007  543705 event_worker.go:152] Polling the log file for events...
I0321 09:27:13.468524  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"81c0f7a6-4370-4704-9426-06940d5af157","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:27:13.468558  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 09:27:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:27:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 09:27:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:27:14.455966  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:27:14.455974  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:27:14.455980  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:27:14.456436  543705 disk_worker.go:494] system disk:vda1
I0321 09:27:14.456465  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:27:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:27:15.456792  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:27:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:27:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:27:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:27:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:27:16.472341  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:27:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:27:23.409770  543705 memory.go:184] no items to output this cycle
I0321 09:27:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 09:27:28.821675  543705 disk_info.go:125] begin check local disk info of client
I0321 09:27:28.824146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:27:28.824153  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029e7c0 0xc00029e800]
E0321 09:27:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:27:33.409798  543705 memory.go:184] no items to output this cycle
I0321 09:27:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 09:27:38.817883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:27:38.817889  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:27:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:27:43.410585  543705 memory.go:191] Add success.
I0321 09:27:43.409801  543705 cpu.go:282] Add success.
I0321 09:27:43.420328  543705 net.go:648] Add success.
I0321 09:27:43.422806  543705 net.go:770] primary dev: ETH0
I0321 09:27:43.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:27:43.422839  543705 net.go:698] Add success.
I0321 09:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:27:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:27:53.409798  543705 memory.go:184] no items to output this cycle
I0321 09:27:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 09:28:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:28:03.409807  543705 memory.go:184] no items to output this cycle
I0321 09:28:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 09:28:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:28:13.409791  543705 memory.go:191] Add success.
I0321 09:28:13.409796  543705 cpu.go:282] Add success.
W0321 09:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:28:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:28:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:28:13.420250  543705 net.go:648] Add success.
I0321 09:28:13.423178  543705 net.go:770] primary dev: ETH0
I0321 09:28:13.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:28:13.423202  543705 net.go:698] Add success.
I0321 09:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:28:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:28:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 09:28:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:28:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 09:28:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:28:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:28:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:28:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:28:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:28:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:28:23.409766  543705 memory.go:184] no items to output this cycle
I0321 09:28:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 09:28:28.825674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:28:28.828129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:28:28.828135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8440 0xc0002b8480]
E0321 09:28:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:28:33.409795  543705 memory.go:184] no items to output this cycle
I0321 09:28:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 09:28:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:28:43.409794  543705 memory.go:191] Add success.
I0321 09:28:43.409796  543705 cpu.go:282] Add success.
I0321 09:28:43.419977  543705 net.go:648] Add success.
I0321 09:28:43.422750  543705 net.go:770] primary dev: ETH0
I0321 09:28:43.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:28:43.422775  543705 net.go:698] Add success.
I0321 09:28:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:28:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:28:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:28:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:28:53.409806  543705 memory.go:184] no items to output this cycle
I0321 09:28:53.409814  543705 cpu.go:275] no items to output this cycle
I0321 09:29:03.409918  543705 cpu.go:275] no items to output this cycle
E0321 09:29:03.410007  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:29:03.410029  543705 memory.go:184] no items to output this cycle
E0321 09:29:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:29:13.409790  543705 memory.go:191] Add success.
I0321 09:29:13.409799  543705 cpu.go:282] Add success.
W0321 09:29:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:29:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:29:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:29:13.420186  543705 net.go:648] Add success.
I0321 09:29:13.423307  543705 net.go:770] primary dev: ETH0
I0321 09:29:13.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:29:13.423341  543705 net.go:698] Add success.
I0321 09:29:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:29:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:29:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 09:29:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:29:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 09:29:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:29:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:29:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:29:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:29:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:29:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:29:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:29:23.409794  543705 memory.go:184] no items to output this cycle
I0321 09:29:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 09:29:28.829672  543705 disk_info.go:125] begin check local disk info of client
I0321 09:29:28.832144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:29:28.832151  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028ad40 0xc00028ad80]
E0321 09:29:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:29:33.409759  543705 memory.go:184] no items to output this cycle
I0321 09:29:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 09:29:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:29:43.409796  543705 memory.go:191] Add success.
I0321 09:29:43.409797  543705 cpu.go:282] Add success.
I0321 09:29:43.419882  543705 net.go:648] Add success.
I0321 09:29:43.422563  543705 net.go:770] primary dev: ETH0
I0321 09:29:43.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:29:43.422594  543705 net.go:698] Add success.
I0321 09:29:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:29:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:29:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:29:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:29:53.409778  543705 memory.go:184] no items to output this cycle
I0321 09:29:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 09:30:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:30:03.409787  543705 memory.go:184] no items to output this cycle
I0321 09:30:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 09:30:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:30:13.409783  543705 memory.go:191] Add success.
I0321 09:30:13.409807  543705 cpu.go:282] Add success.
W0321 09:30:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:30:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:30:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:30:13.420126  543705 net.go:648] Add success.
I0321 09:30:13.422717  543705 net.go:770] primary dev: ETH0
I0321 09:30:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:30:13.422742  543705 net.go:698] Add success.
I0321 09:30:13.464376  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b1ef3694-5e8e-4f94-8c27-e04189e178d6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:30:13.464422  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:30:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:30:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:30:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 09:30:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:30:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 09:30:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:30:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:30:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:30:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:30:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:30:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:30:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:30:23.409769  543705 memory.go:184] no items to output this cycle
I0321 09:30:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 09:30:28.833671  543705 disk_info.go:125] begin check local disk info of client
I0321 09:30:28.836149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:30:28.836155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e7540 0xc0001e7580]
E0321 09:30:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:30:33.409776  543705 memory.go:184] no items to output this cycle
I0321 09:30:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 09:30:38.818024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:30:38.818030  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:30:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:30:43.410588  543705 memory.go:191] Add success.
I0321 09:30:43.409801  543705 cpu.go:282] Add success.
I0321 09:30:43.420356  543705 net.go:648] Add success.
I0321 09:30:43.423122  543705 net.go:770] primary dev: ETH0
I0321 09:30:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:30:43.423154  543705 net.go:698] Add success.
I0321 09:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:30:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:30:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:30:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:30:53.409778  543705 memory.go:184] no items to output this cycle
I0321 09:30:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 09:31:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:31:03.409806  543705 memory.go:184] no items to output this cycle
I0321 09:31:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 09:31:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:31:13.409797  543705 memory.go:191] Add success.
I0321 09:31:13.409802  543705 cpu.go:282] Add success.
W0321 09:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:31:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:31:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:31:13.420245  543705 net.go:648] Add success.
I0321 09:31:13.422923  543705 net.go:770] primary dev: ETH0
I0321 09:31:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:31:13.422949  543705 net.go:698] Add success.
I0321 09:31:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:31:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:31:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 09:31:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:31:14.456537  543705 disk_worker.go:494] system disk:vda1
I0321 09:31:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:31:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:31:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:31:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:31:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:31:23.409797  543705 memory.go:184] no items to output this cycle
I0321 09:31:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 09:31:28.837673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:31:28.840156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:31:28.840162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa840 0xc0001aa880]
E0321 09:31:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:31:33.409762  543705 memory.go:184] no items to output this cycle
I0321 09:31:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 09:31:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:31:43.409817  543705 memory.go:191] Add success.
I0321 09:31:43.409829  543705 cpu.go:282] Add success.
I0321 09:31:43.420007  543705 net.go:648] Add success.
I0321 09:31:43.422495  543705 net.go:770] primary dev: ETH0
I0321 09:31:43.422509  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:31:43.422531  543705 net.go:698] Add success.
I0321 09:31:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:31:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:31:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:31:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:31:53.409783  543705 memory.go:184] no items to output this cycle
I0321 09:31:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 09:32:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:32:03.409771  543705 memory.go:184] no items to output this cycle
I0321 09:32:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 09:32:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:32:13.409796  543705 memory.go:191] Add success.
I0321 09:32:13.409984  543705 cpu.go:282] Add success.
W0321 09:32:13.410063  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:32:13.410094  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:32:13.410099  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:32:13.419719  543705 net.go:648] Add success.
I0321 09:32:13.422387  543705 net.go:770] primary dev: ETH0
I0321 09:32:13.422399  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:32:13.422410  543705 net.go:698] Add success.
W0321 09:32:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:32:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 09:32:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:32:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:32:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:32:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:32:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 09:32:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:32:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:32:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:32:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:32:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:32:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:32:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:32:16.472313  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:32:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:32:23.409765  543705 memory.go:184] no items to output this cycle
I0321 09:32:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 09:32:28.841674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:32:28.844114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:32:28.844120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb40 0xc0001abb80]
E0321 09:32:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:32:33.409793  543705 memory.go:184] no items to output this cycle
I0321 09:32:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 09:32:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:32:43.409821  543705 memory.go:191] Add success.
I0321 09:32:43.409827  543705 cpu.go:282] Add success.
I0321 09:32:43.419997  543705 net.go:648] Add success.
I0321 09:32:43.422832  543705 net.go:770] primary dev: ETH0
I0321 09:32:43.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:32:43.422858  543705 net.go:698] Add success.
I0321 09:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:32:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:32:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:32:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:32:53.409806  543705 memory.go:184] no items to output this cycle
I0321 09:32:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 09:33:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:33:03.409775  543705 memory.go:184] no items to output this cycle
I0321 09:33:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 09:33:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:33:13.409814  543705 memory.go:191] Add success.
I0321 09:33:13.409821  543705 cpu.go:282] Add success.
W0321 09:33:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:33:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:33:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:33:13.419721  543705 net.go:648] Add success.
I0321 09:33:13.422526  543705 net.go:770] primary dev: ETH0
I0321 09:33:13.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:33:13.422549  543705 net.go:698] Add success.
I0321 09:33:13.464162  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bd64cc80-09cb-40ab-8fd6-54a5d7350629","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:33:13.464193  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:33:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:33:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:33:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 09:33:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:33:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 09:33:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:33:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:33:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:33:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:33:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:33:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:33:23.409786  543705 memory.go:184] no items to output this cycle
I0321 09:33:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 09:33:28.845683  543705 disk_info.go:125] begin check local disk info of client
I0321 09:33:28.848178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:33:28.848184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa240 0xc0001aa280]
E0321 09:33:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:33:33.409800  543705 memory.go:184] no items to output this cycle
I0321 09:33:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 09:33:38.819797  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:33:38.819803  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:33:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:33:43.410701  543705 memory.go:191] Add success.
I0321 09:33:43.409819  543705 cpu.go:282] Add success.
I0321 09:33:43.420468  543705 net.go:648] Add success.
I0321 09:33:43.423219  543705 net.go:770] primary dev: ETH0
I0321 09:33:43.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:33:43.423245  543705 net.go:698] Add success.
I0321 09:33:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:33:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:33:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:33:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:33:53.409793  543705 memory.go:184] no items to output this cycle
I0321 09:33:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 09:34:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:34:03.409792  543705 memory.go:184] no items to output this cycle
I0321 09:34:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 09:34:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:34:13.409804  543705 memory.go:191] Add success.
I0321 09:34:13.409811  543705 cpu.go:282] Add success.
W0321 09:34:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:34:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:34:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:34:13.419713  543705 net.go:648] Add success.
I0321 09:34:13.422419  543705 net.go:770] primary dev: ETH0
I0321 09:34:13.422432  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:34:13.422444  543705 net.go:698] Add success.
I0321 09:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:34:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:34:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 09:34:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:34:14.456482  543705 disk_worker.go:494] system disk:vda1
I0321 09:34:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:34:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:34:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:34:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:34:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:34:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:34:23.409772  543705 memory.go:184] no items to output this cycle
I0321 09:34:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 09:34:28.849673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:34:28.852173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:34:28.852179  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa040 0xc0001aa080]
E0321 09:34:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:34:33.409805  543705 memory.go:184] no items to output this cycle
I0321 09:34:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 09:34:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:34:43.409835  543705 memory.go:191] Add success.
I0321 09:34:43.409839  543705 cpu.go:282] Add success.
I0321 09:34:43.419969  543705 net.go:648] Add success.
I0321 09:34:43.422525  543705 net.go:770] primary dev: ETH0
I0321 09:34:43.422538  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:34:43.422551  543705 net.go:698] Add success.
I0321 09:34:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:34:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:34:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:34:53.409793  543705 memory.go:184] no items to output this cycle
I0321 09:34:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 09:35:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:35:03.409809  543705 memory.go:184] no items to output this cycle
I0321 09:35:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 09:35:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:35:13.409824  543705 memory.go:191] Add success.
I0321 09:35:13.409846  543705 cpu.go:282] Add success.
W0321 09:35:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:35:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:35:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:35:13.419733  543705 net.go:648] Add success.
I0321 09:35:13.422319  543705 net.go:770] primary dev: ETH0
I0321 09:35:13.422333  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:35:13.422346  543705 net.go:698] Add success.
I0321 09:35:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:35:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:35:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 09:35:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:35:14.456555  543705 disk_worker.go:494] system disk:vda1
I0321 09:35:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:35:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:35:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:35:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:35:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:35:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:35:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:35:23.409799  543705 memory.go:184] no items to output this cycle
I0321 09:35:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 09:35:28.853675  543705 disk_info.go:125] begin check local disk info of client
I0321 09:35:28.856154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:35:28.856160  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3600 0xc0002a3640]
E0321 09:35:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:35:33.409791  543705 memory.go:184] no items to output this cycle
I0321 09:35:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 09:35:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:35:43.409824  543705 memory.go:191] Add success.
I0321 09:35:43.409828  543705 cpu.go:282] Add success.
I0321 09:35:43.419991  543705 net.go:648] Add success.
I0321 09:35:43.423042  543705 net.go:770] primary dev: ETH0
I0321 09:35:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:35:43.423067  543705 net.go:698] Add success.
I0321 09:35:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:35:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:35:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:35:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:35:53.409776  543705 memory.go:184] no items to output this cycle
I0321 09:35:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 09:36:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:36:03.409775  543705 memory.go:184] no items to output this cycle
I0321 09:36:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 09:36:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:36:13.409796  543705 memory.go:191] Add success.
I0321 09:36:13.409800  543705 cpu.go:282] Add success.
W0321 09:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:36:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:36:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:36:13.420329  543705 net.go:648] Add success.
I0321 09:36:13.422974  543705 net.go:770] primary dev: ETH0
I0321 09:36:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:36:13.422998  543705 net.go:698] Add success.
I0321 09:36:13.546062  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b926b2a-d933-4677-a251-010ab05fc4e5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:36:13.546093  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:36:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:36:14.455249  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:36:14.455262  543705 disk_worker.go:708] disk space is not compliant
W0321 09:36:14.455266  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:36:14.457107  543705 disk_worker.go:494] system disk:vda1
I0321 09:36:14.457139  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:36:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:36:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:36:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:36:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:36:16.472496  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:36:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:36:23.409763  543705 memory.go:184] no items to output this cycle
I0321 09:36:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 09:36:28.857671  543705 disk_info.go:125] begin check local disk info of client
I0321 09:36:28.860118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:36:28.860124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bc80 0xc00007bcc0]
E0321 09:36:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:36:33.409768  543705 memory.go:184] no items to output this cycle
I0321 09:36:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 09:36:38.821731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:36:38.821737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:36:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:36:43.410578  543705 memory.go:191] Add success.
I0321 09:36:43.409816  543705 cpu.go:282] Add success.
I0321 09:36:43.420308  543705 net.go:648] Add success.
I0321 09:36:43.422771  543705 net.go:770] primary dev: ETH0
I0321 09:36:43.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:36:43.422799  543705 net.go:698] Add success.
I0321 09:36:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:36:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:36:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:36:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:36:53.409774  543705 memory.go:184] no items to output this cycle
I0321 09:36:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 09:37:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:37:03.409792  543705 memory.go:184] no items to output this cycle
I0321 09:37:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 09:37:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:37:13.409804  543705 memory.go:191] Add success.
I0321 09:37:13.409812  543705 cpu.go:282] Add success.
W0321 09:37:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:37:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:37:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:37:13.420148  543705 net.go:648] Add success.
I0321 09:37:13.422930  543705 net.go:770] primary dev: ETH0
I0321 09:37:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:37:13.422958  543705 net.go:698] Add success.
I0321 09:37:13.453628  543705 event_worker.go:152] Polling the log file for events...
W0321 09:37:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:37:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 09:37:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:37:14.456770  543705 disk_worker.go:494] system disk:vda1
I0321 09:37:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:37:14.457087  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:37:14.457095  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:37:14.457099  543705 custom_config.go:64] query custom config with name: gpu
E0321 09:37:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:37:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:37:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:37:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:37:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:37:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:37:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:37:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:37:23.409794  543705 memory.go:184] no items to output this cycle
I0321 09:37:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 09:37:28.861672  543705 disk_info.go:125] begin check local disk info of client
I0321 09:37:28.864116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:37:28.864122  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342840 0xc000342880]
E0321 09:37:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:37:33.409757  543705 memory.go:184] no items to output this cycle
I0321 09:37:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 09:37:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:37:43.409794  543705 memory.go:191] Add success.
I0321 09:37:43.409799  543705 cpu.go:282] Add success.
I0321 09:37:43.419906  543705 net.go:648] Add success.
I0321 09:37:43.422928  543705 net.go:770] primary dev: ETH0
I0321 09:37:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:37:43.422954  543705 net.go:698] Add success.
I0321 09:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:37:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:37:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:37:53.409781  543705 memory.go:184] no items to output this cycle
I0321 09:37:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 09:38:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:38:03.409797  543705 memory.go:184] no items to output this cycle
I0321 09:38:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 09:38:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:38:13.409809  543705 memory.go:191] Add success.
I0321 09:38:13.409820  543705 cpu.go:282] Add success.
W0321 09:38:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:38:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:38:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:38:13.420060  543705 net.go:648] Add success.
I0321 09:38:13.423065  543705 net.go:770] primary dev: ETH0
I0321 09:38:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:38:13.423094  543705 net.go:698] Add success.
I0321 09:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:38:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:38:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 09:38:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:38:14.456864  543705 disk_worker.go:494] system disk:vda1
I0321 09:38:14.456906  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:38:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:38:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:38:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:38:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:38:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:38:23.409802  543705 memory.go:184] no items to output this cycle
I0321 09:38:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 09:38:28.865673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:38:28.868150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:38:28.868156  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2440 0xc0002a2480]
E0321 09:38:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:38:33.409760  543705 memory.go:184] no items to output this cycle
I0321 09:38:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 09:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:38:43.409787  543705 memory.go:191] Add success.
I0321 09:38:43.409828  543705 cpu.go:282] Add success.
I0321 09:38:43.420020  543705 net.go:648] Add success.
I0321 09:38:43.422804  543705 net.go:770] primary dev: ETH0
I0321 09:38:43.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:38:43.422835  543705 net.go:698] Add success.
I0321 09:38:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:38:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:38:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:38:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:38:53.409783  543705 memory.go:184] no items to output this cycle
I0321 09:38:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 09:39:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:39:03.409805  543705 cpu.go:275] no items to output this cycle
I0321 09:39:03.409810  543705 memory.go:184] no items to output this cycle
E0321 09:39:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:39:13.409781  543705 memory.go:191] Add success.
W0321 09:39:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 09:39:13.409811  543705 cpu.go:282] Add success.
W0321 09:39:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:39:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:39:13.420098  543705 net.go:648] Add success.
I0321 09:39:13.423218  543705 net.go:770] primary dev: ETH0
I0321 09:39:13.423246  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:39:13.423257  543705 net.go:698] Add success.
I0321 09:39:13.467636  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"619cf06b-a917-4ce7-a692-cf013c2aa919","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:39:13.467668  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:39:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:39:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:39:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 09:39:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:39:14.457364  543705 disk_worker.go:494] system disk:vda1
I0321 09:39:14.457427  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:39:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:39:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:39:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:39:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:39:23.409799  543705 memory.go:184] no items to output this cycle
I0321 09:39:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 09:39:28.869673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:39:28.872191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:39:28.872197  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264e00 0xc000264e40]
E0321 09:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:39:33.409775  543705 memory.go:184] no items to output this cycle
I0321 09:39:33.409790  543705 cpu.go:275] no items to output this cycle
I0321 09:39:38.821875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:39:38.821882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:39:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:39:43.410639  543705 memory.go:191] Add success.
I0321 09:39:43.409816  543705 cpu.go:282] Add success.
I0321 09:39:43.420345  543705 net.go:648] Add success.
I0321 09:39:43.423119  543705 net.go:770] primary dev: ETH0
I0321 09:39:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:39:43.423168  543705 net.go:698] Add success.
I0321 09:39:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:39:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:39:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:39:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:39:53.409799  543705 memory.go:184] no items to output this cycle
I0321 09:39:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 09:40:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:40:03.409793  543705 memory.go:184] no items to output this cycle
I0321 09:40:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 09:40:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:40:13.409810  543705 memory.go:191] Add success.
I0321 09:40:13.409817  543705 cpu.go:282] Add success.
W0321 09:40:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:40:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:40:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:40:13.420060  543705 net.go:648] Add success.
I0321 09:40:13.423170  543705 net.go:770] primary dev: ETH0
I0321 09:40:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:40:13.423217  543705 net.go:698] Add success.
I0321 09:40:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:40:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:40:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 09:40:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:40:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 09:40:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:40:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:40:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:40:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:40:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:40:23.409767  543705 memory.go:184] no items to output this cycle
I0321 09:40:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 09:40:28.873674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:40:28.876192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:40:28.876198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0321 09:40:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:40:33.409774  543705 memory.go:184] no items to output this cycle
I0321 09:40:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 09:40:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:40:43.409789  543705 memory.go:191] Add success.
I0321 09:40:43.409810  543705 cpu.go:282] Add success.
I0321 09:40:43.419996  543705 net.go:648] Add success.
I0321 09:40:43.422903  543705 net.go:770] primary dev: ETH0
I0321 09:40:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:40:43.422928  543705 net.go:698] Add success.
I0321 09:40:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:40:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:40:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:40:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:40:53.409773  543705 memory.go:184] no items to output this cycle
I0321 09:40:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 09:41:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:41:03.409796  543705 memory.go:184] no items to output this cycle
I0321 09:41:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 09:41:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:41:13.409821  543705 memory.go:191] Add success.
I0321 09:41:13.409830  543705 cpu.go:282] Add success.
W0321 09:41:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:41:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:41:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:41:13.420276  543705 net.go:648] Add success.
I0321 09:41:13.423108  543705 net.go:770] primary dev: ETH0
I0321 09:41:13.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:41:13.423137  543705 net.go:698] Add success.
I0321 09:41:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:41:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:41:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 09:41:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:41:14.456470  543705 disk_worker.go:494] system disk:vda1
I0321 09:41:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:41:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:41:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:41:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:41:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:41:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:41:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:41:23.409779  543705 memory.go:184] no items to output this cycle
I0321 09:41:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 09:41:28.877679  543705 disk_info.go:125] begin check local disk info of client
I0321 09:41:28.880182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:41:28.880189  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272000 0xc000272040]
E0321 09:41:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:41:33.409796  543705 memory.go:184] no items to output this cycle
I0321 09:41:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 09:41:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:41:43.409822  543705 memory.go:191] Add success.
I0321 09:41:43.409829  543705 cpu.go:282] Add success.
I0321 09:41:43.419982  543705 net.go:648] Add success.
I0321 09:41:43.422613  543705 net.go:770] primary dev: ETH0
I0321 09:41:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:41:43.422651  543705 net.go:698] Add success.
I0321 09:41:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:41:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:41:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:41:53.409774  543705 memory.go:184] no items to output this cycle
I0321 09:41:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 09:42:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:42:03.409787  543705 memory.go:184] no items to output this cycle
I0321 09:42:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 09:42:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:42:13.409802  543705 memory.go:191] Add success.
I0321 09:42:13.409803  543705 cpu.go:282] Add success.
W0321 09:42:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:42:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:42:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:42:13.420068  543705 net.go:648] Add success.
I0321 09:42:13.422865  543705 net.go:770] primary dev: ETH0
I0321 09:42:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:42:13.422890  543705 net.go:698] Add success.
I0321 09:42:13.469370  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"855cb5f3-d9e1-4052-8583-e1b1016c07c0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:42:13.469402  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 09:42:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:42:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 09:42:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:42:14.456798  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:42:14.456807  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:42:14.456813  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:42:14.456856  543705 disk_worker.go:494] system disk:vda1
I0321 09:42:14.456896  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:42:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:42:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:42:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:42:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:42:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:42:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:42:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:42:23.409805  543705 memory.go:184] no items to output this cycle
I0321 09:42:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 09:42:28.881668  543705 disk_info.go:125] begin check local disk info of client
I0321 09:42:28.884130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:42:28.884136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3dc0 0xc0003e3e00]
E0321 09:42:33.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:42:33.409897  543705 memory.go:184] no items to output this cycle
I0321 09:42:33.409913  543705 cpu.go:275] no items to output this cycle
I0321 09:42:38.822024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:42:38.822031  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:42:43.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:42:43.410534  543705 memory.go:191] Add success.
I0321 09:42:43.409846  543705 cpu.go:282] Add success.
I0321 09:42:43.420260  543705 net.go:648] Add success.
I0321 09:42:43.422953  543705 net.go:770] primary dev: ETH0
I0321 09:42:43.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:42:43.422982  543705 net.go:698] Add success.
I0321 09:42:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:42:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:42:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:42:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:42:53.409819  543705 memory.go:184] no items to output this cycle
I0321 09:42:53.409831  543705 cpu.go:275] no items to output this cycle
E0321 09:43:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:43:03.409790  543705 memory.go:184] no items to output this cycle
I0321 09:43:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 09:43:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:43:13.409836  543705 memory.go:191] Add success.
I0321 09:43:13.409846  543705 cpu.go:282] Add success.
W0321 09:43:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:43:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:43:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:43:13.420114  543705 net.go:648] Add success.
I0321 09:43:13.422838  543705 net.go:770] primary dev: ETH0
I0321 09:43:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:43:13.422866  543705 net.go:698] Add success.
I0321 09:43:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:43:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:43:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 09:43:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:43:14.456504  543705 disk_worker.go:494] system disk:vda1
I0321 09:43:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:43:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:43:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:43:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:43:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:43:23.409786  543705 memory.go:184] no items to output this cycle
I0321 09:43:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 09:43:28.885687  543705 disk_info.go:125] begin check local disk info of client
I0321 09:43:28.888162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:43:28.888169  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d540 0xc00039d580]
E0321 09:43:33.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:43:33.409912  543705 memory.go:184] no items to output this cycle
I0321 09:43:33.409953  543705 cpu.go:275] no items to output this cycle
E0321 09:43:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:43:43.409816  543705 memory.go:191] Add success.
I0321 09:43:43.409817  543705 cpu.go:282] Add success.
I0321 09:43:43.419988  543705 net.go:648] Add success.
I0321 09:43:43.422602  543705 net.go:770] primary dev: ETH0
I0321 09:43:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:43:43.422627  543705 net.go:698] Add success.
I0321 09:43:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:43:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:43:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:43:53.409797  543705 memory.go:184] no items to output this cycle
I0321 09:43:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 09:44:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:44:03.409805  543705 memory.go:184] no items to output this cycle
I0321 09:44:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 09:44:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:44:13.409783  543705 memory.go:191] Add success.
I0321 09:44:13.409804  543705 cpu.go:282] Add success.
W0321 09:44:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:44:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:44:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:44:13.420066  543705 net.go:648] Add success.
I0321 09:44:13.422900  543705 net.go:770] primary dev: ETH0
I0321 09:44:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:44:13.422929  543705 net.go:698] Add success.
I0321 09:44:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:44:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:44:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 09:44:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:44:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 09:44:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:44:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:44:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:44:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:44:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:44:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:44:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:44:23.409768  543705 memory.go:184] no items to output this cycle
I0321 09:44:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 09:44:28.889673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:44:28.892211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:44:28.892217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e36c0 0xc0003e3700]
E0321 09:44:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:44:33.409777  543705 memory.go:184] no items to output this cycle
I0321 09:44:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 09:44:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:44:43.409810  543705 memory.go:191] Add success.
I0321 09:44:43.409825  543705 cpu.go:282] Add success.
I0321 09:44:43.420026  543705 net.go:648] Add success.
I0321 09:44:43.422701  543705 net.go:770] primary dev: ETH0
I0321 09:44:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:44:43.422731  543705 net.go:698] Add success.
I0321 09:44:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:44:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:44:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:44:53.410216  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:44:53.410230  543705 cpu.go:275] no items to output this cycle
I0321 09:44:53.410232  543705 memory.go:184] no items to output this cycle
E0321 09:45:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:45:03.409764  543705 memory.go:184] no items to output this cycle
I0321 09:45:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 09:45:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:45:13.409804  543705 memory.go:191] Add success.
I0321 09:45:13.409814  543705 cpu.go:282] Add success.
W0321 09:45:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:45:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:45:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:45:13.420197  543705 net.go:648] Add success.
I0321 09:45:13.422885  543705 net.go:770] primary dev: ETH0
I0321 09:45:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:45:13.422910  543705 net.go:698] Add success.
I0321 09:45:13.470976  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e19417ed-f92b-4487-9fbe-5697433a2dd1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:45:13.471009  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:45:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:45:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:45:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 09:45:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:45:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 09:45:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:45:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:45:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:45:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:45:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:45:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:45:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:45:23.409765  543705 memory.go:184] no items to output this cycle
I0321 09:45:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 09:45:28.893673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:45:28.896236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:45:28.896241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492500 0xc000492540]
I0321 09:45:33.409882  543705 cpu.go:275] no items to output this cycle
E0321 09:45:33.409960  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:45:33.409971  543705 memory.go:184] no items to output this cycle
I0321 09:45:38.822169  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:45:38.822176  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:45:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:45:43.410605  543705 memory.go:191] Add success.
I0321 09:45:43.409830  543705 cpu.go:282] Add success.
I0321 09:45:43.420334  543705 net.go:648] Add success.
I0321 09:45:43.422864  543705 net.go:770] primary dev: ETH0
I0321 09:45:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:45:43.422890  543705 net.go:698] Add success.
I0321 09:45:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:45:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:45:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:45:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:45:53.409777  543705 memory.go:184] no items to output this cycle
I0321 09:45:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 09:46:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:46:03.409781  543705 memory.go:184] no items to output this cycle
I0321 09:46:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 09:46:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:46:13.409820  543705 memory.go:191] Add success.
I0321 09:46:13.409828  543705 cpu.go:282] Add success.
W0321 09:46:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:46:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:46:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:46:13.420112  543705 net.go:648] Add success.
I0321 09:46:13.423031  543705 net.go:770] primary dev: ETH0
I0321 09:46:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:46:13.423057  543705 net.go:698] Add success.
I0321 09:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:46:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:46:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 09:46:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:46:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 09:46:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:46:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:46:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:46:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:46:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:46:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:46:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:46:23.409796  543705 memory.go:184] no items to output this cycle
I0321 09:46:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 09:46:28.897674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:46:28.900138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:46:28.900144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb980 0xc0002bb9c0]
E0321 09:46:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:46:33.409775  543705 cpu.go:275] no items to output this cycle
I0321 09:46:33.409777  543705 memory.go:184] no items to output this cycle
E0321 09:46:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:46:43.409793  543705 memory.go:191] Add success.
I0321 09:46:43.409927  543705 cpu.go:282] Add success.
I0321 09:46:43.419716  543705 net.go:648] Add success.
I0321 09:46:43.422513  543705 net.go:770] primary dev: ETH0
I0321 09:46:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:46:43.422537  543705 net.go:698] Add success.
I0321 09:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:46:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:46:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:46:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:46:53.409768  543705 memory.go:184] no items to output this cycle
I0321 09:46:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 09:47:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:47:03.409780  543705 cpu.go:275] no items to output this cycle
I0321 09:47:03.409782  543705 memory.go:184] no items to output this cycle
E0321 09:47:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:47:13.409812  543705 memory.go:191] Add success.
I0321 09:47:13.409816  543705 cpu.go:282] Add success.
W0321 09:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:47:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:47:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:47:13.420075  543705 net.go:648] Add success.
I0321 09:47:13.422800  543705 net.go:770] primary dev: ETH0
I0321 09:47:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:47:13.422825  543705 net.go:698] Add success.
I0321 09:47:13.453386  543705 event_worker.go:152] Polling the log file for events...
W0321 09:47:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:47:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 09:47:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:47:14.455848  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:47:14.455857  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:47:14.455863  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:47:14.456804  543705 disk_worker.go:494] system disk:vda1
I0321 09:47:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:47:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:47:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:47:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:47:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:47:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:47:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:47:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:47:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:47:23.409791  543705 memory.go:184] no items to output this cycle
I0321 09:47:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 09:47:28.901773  543705 disk_info.go:125] begin check local disk info of client
I0321 09:47:28.904237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:47:28.904243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a5200 0xc0004a5240]
E0321 09:47:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:47:33.409789  543705 memory.go:184] no items to output this cycle
I0321 09:47:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 09:47:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:47:43.409831  543705 memory.go:191] Add success.
I0321 09:47:43.409835  543705 cpu.go:282] Add success.
I0321 09:47:43.420025  543705 net.go:648] Add success.
I0321 09:47:43.422739  543705 net.go:770] primary dev: ETH0
I0321 09:47:43.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:47:43.422768  543705 net.go:698] Add success.
I0321 09:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:47:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:47:53.409776  543705 memory.go:184] no items to output this cycle
I0321 09:47:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 09:48:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:48:03.409807  543705 memory.go:184] no items to output this cycle
I0321 09:48:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 09:48:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:48:13.409798  543705 memory.go:191] Add success.
I0321 09:48:13.409804  543705 cpu.go:282] Add success.
W0321 09:48:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:48:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:48:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:48:13.420297  543705 net.go:648] Add success.
I0321 09:48:13.422834  543705 net.go:770] primary dev: ETH0
I0321 09:48:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:48:13.422863  543705 net.go:698] Add success.
I0321 09:48:13.469681  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"27ddd2fe-b94a-4a63-965d-6a900a4c8bb2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:48:13.469719  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:48:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:48:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:48:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 09:48:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:48:14.456693  543705 disk_worker.go:494] system disk:vda1
I0321 09:48:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:48:15.455620  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:48:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:48:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:48:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:48:23.409793  543705 memory.go:184] no items to output this cycle
I0321 09:48:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 09:48:28.905672  543705 disk_info.go:125] begin check local disk info of client
I0321 09:48:28.908151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:48:28.908157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468ac0 0xc000468b00]
E0321 09:48:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:48:33.409770  543705 memory.go:184] no items to output this cycle
I0321 09:48:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 09:48:38.822319  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:48:38.822326  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:48:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:48:43.410738  543705 memory.go:191] Add success.
I0321 09:48:43.409811  543705 cpu.go:282] Add success.
I0321 09:48:43.420458  543705 net.go:648] Add success.
I0321 09:48:43.423432  543705 net.go:770] primary dev: ETH0
I0321 09:48:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:48:43.423465  543705 net.go:698] Add success.
I0321 09:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:48:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:48:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:48:53.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:48:53.409904  543705 memory.go:184] no items to output this cycle
I0321 09:48:53.409977  543705 cpu.go:275] no items to output this cycle
E0321 09:49:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:49:03.409800  543705 memory.go:184] no items to output this cycle
I0321 09:49:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 09:49:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:49:13.409791  543705 memory.go:191] Add success.
I0321 09:49:13.409800  543705 cpu.go:282] Add success.
W0321 09:49:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:49:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:49:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:49:13.420088  543705 net.go:648] Add success.
I0321 09:49:13.423104  543705 net.go:770] primary dev: ETH0
I0321 09:49:13.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:49:13.423129  543705 net.go:698] Add success.
I0321 09:49:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:49:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:49:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 09:49:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:49:14.456492  543705 disk_worker.go:494] system disk:vda1
I0321 09:49:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:49:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:49:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:49:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:49:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:49:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:49:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:49:23.409766  543705 memory.go:184] no items to output this cycle
I0321 09:49:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 09:49:28.909674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:49:28.912223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:49:28.912228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d09c0 0xc0004d0a00]
E0321 09:49:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:49:33.409769  543705 memory.go:184] no items to output this cycle
I0321 09:49:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 09:49:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:49:43.409823  543705 memory.go:191] Add success.
I0321 09:49:43.409831  543705 cpu.go:282] Add success.
I0321 09:49:43.420067  543705 net.go:648] Add success.
I0321 09:49:43.423041  543705 net.go:770] primary dev: ETH0
I0321 09:49:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:49:43.423068  543705 net.go:698] Add success.
I0321 09:49:46.458020  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:49:46.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:49:46.458131  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:49:53.410441  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:49:53.410455  543705 memory.go:184] no items to output this cycle
I0321 09:49:53.410458  543705 cpu.go:275] no items to output this cycle
E0321 09:50:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:50:03.409773  543705 memory.go:184] no items to output this cycle
I0321 09:50:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 09:50:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:50:13.409811  543705 memory.go:191] Add success.
I0321 09:50:13.409815  543705 cpu.go:282] Add success.
W0321 09:50:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:50:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:50:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:50:13.420134  543705 net.go:648] Add success.
I0321 09:50:13.422990  543705 net.go:770] primary dev: ETH0
I0321 09:50:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:50:13.423027  543705 net.go:698] Add success.
I0321 09:50:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:50:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:50:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 09:50:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:50:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 09:50:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:50:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:50:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:50:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:50:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:50:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:50:23.409771  543705 memory.go:184] no items to output this cycle
I0321 09:50:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 09:50:28.913673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:50:28.916182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:50:28.916189  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368500 0xc000368540]
E0321 09:50:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:50:33.409788  543705 memory.go:184] no items to output this cycle
I0321 09:50:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 09:50:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:50:43.409800  543705 memory.go:191] Add success.
I0321 09:50:43.409802  543705 cpu.go:282] Add success.
I0321 09:50:43.419918  543705 net.go:648] Add success.
I0321 09:50:43.422838  543705 net.go:770] primary dev: ETH0
I0321 09:50:43.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:50:43.422871  543705 net.go:698] Add success.
I0321 09:50:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:50:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:50:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:50:53.409785  543705 memory.go:184] no items to output this cycle
I0321 09:50:53.409786  543705 cpu.go:275] no items to output this cycle
I0321 09:51:03.409895  543705 cpu.go:275] no items to output this cycle
E0321 09:51:03.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:51:03.409915  543705 memory.go:184] no items to output this cycle
E0321 09:51:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:51:13.409812  543705 memory.go:191] Add success.
I0321 09:51:13.409822  543705 cpu.go:282] Add success.
W0321 09:51:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:51:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:51:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:51:13.420109  543705 net.go:648] Add success.
I0321 09:51:13.422860  543705 net.go:770] primary dev: ETH0
I0321 09:51:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:51:13.422885  543705 net.go:698] Add success.
I0321 09:51:13.464311  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d783687c-4d29-4726-bc31-f2e2fac1413f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:51:13.464343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:51:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:51:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:51:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 09:51:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:51:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 09:51:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:51:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:51:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:51:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:51:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:51:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:51:23.409765  543705 memory.go:184] no items to output this cycle
I0321 09:51:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 09:51:28.917677  543705 disk_info.go:125] begin check local disk info of client
I0321 09:51:28.920167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:51:28.920173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2240 0xc0002a2280]
E0321 09:51:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:51:33.409776  543705 memory.go:184] no items to output this cycle
I0321 09:51:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 09:51:38.822465  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:51:38.822472  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:51:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:51:43.410778  543705 memory.go:191] Add success.
I0321 09:51:43.409812  543705 cpu.go:282] Add success.
I0321 09:51:43.420500  543705 net.go:648] Add success.
I0321 09:51:43.423325  543705 net.go:770] primary dev: ETH0
I0321 09:51:43.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:51:43.423354  543705 net.go:698] Add success.
I0321 09:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:51:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:51:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:51:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:51:53.409799  543705 memory.go:184] no items to output this cycle
I0321 09:51:53.409812  543705 cpu.go:275] no items to output this cycle
I0321 09:52:03.409906  543705 cpu.go:275] no items to output this cycle
E0321 09:52:03.409975  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:52:03.409994  543705 memory.go:184] no items to output this cycle
E0321 09:52:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:52:13.409818  543705 memory.go:191] Add success.
I0321 09:52:13.409824  543705 cpu.go:282] Add success.
W0321 09:52:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:52:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:52:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:52:13.420178  543705 net.go:648] Add success.
I0321 09:52:13.423033  543705 net.go:770] primary dev: ETH0
I0321 09:52:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:52:13.423059  543705 net.go:698] Add success.
W0321 09:52:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:52:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 09:52:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:52:14.455890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:52:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:52:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:52:14.456642  543705 disk_worker.go:494] system disk:vda1
I0321 09:52:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:52:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:52:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:52:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:52:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:52:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:52:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:52:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:52:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:52:23.409776  543705 memory.go:184] no items to output this cycle
I0321 09:52:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 09:52:28.921673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:52:28.924104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:52:28.924110  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029c980 0xc00029c9c0]
E0321 09:52:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:52:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 09:52:33.409790  543705 memory.go:184] no items to output this cycle
E0321 09:52:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:52:43.409824  543705 memory.go:191] Add success.
I0321 09:52:43.409830  543705 cpu.go:282] Add success.
I0321 09:52:43.420003  543705 net.go:648] Add success.
I0321 09:52:43.422724  543705 net.go:770] primary dev: ETH0
I0321 09:52:43.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:52:43.422749  543705 net.go:698] Add success.
I0321 09:52:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:52:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:52:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:52:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:52:53.409773  543705 memory.go:184] no items to output this cycle
I0321 09:52:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 09:53:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:53:03.409801  543705 memory.go:184] no items to output this cycle
I0321 09:53:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 09:53:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:53:13.409809  543705 memory.go:191] Add success.
I0321 09:53:13.409817  543705 cpu.go:282] Add success.
W0321 09:53:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:53:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:53:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:53:13.420103  543705 net.go:648] Add success.
I0321 09:53:13.423066  543705 net.go:770] primary dev: ETH0
I0321 09:53:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:53:13.423091  543705 net.go:698] Add success.
I0321 09:53:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:53:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:53:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 09:53:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:53:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 09:53:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:53:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:53:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:53:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:53:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:53:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:53:23.409795  543705 memory.go:184] no items to output this cycle
I0321 09:53:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 09:53:28.925676  543705 disk_info.go:125] begin check local disk info of client
I0321 09:53:28.928128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:53:28.928133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc7c0 0xc0003dc800]
E0321 09:53:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:53:33.409809  543705 memory.go:184] no items to output this cycle
I0321 09:53:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 09:53:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:53:43.409802  543705 memory.go:191] Add success.
I0321 09:53:43.409829  543705 cpu.go:282] Add success.
I0321 09:53:43.419924  543705 net.go:648] Add success.
I0321 09:53:43.423006  543705 net.go:770] primary dev: ETH0
I0321 09:53:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:53:43.423032  543705 net.go:698] Add success.
I0321 09:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:53:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:53:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:53:53.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:53:53.410409  543705 memory.go:184] no items to output this cycle
I0321 09:53:53.410419  543705 cpu.go:275] no items to output this cycle
E0321 09:54:03.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:54:03.409927  543705 memory.go:184] no items to output this cycle
I0321 09:54:03.409954  543705 cpu.go:275] no items to output this cycle
E0321 09:54:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:54:13.409825  543705 memory.go:191] Add success.
I0321 09:54:13.409834  543705 cpu.go:282] Add success.
W0321 09:54:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:54:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:54:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:54:13.420135  543705 net.go:648] Add success.
I0321 09:54:13.423056  543705 net.go:770] primary dev: ETH0
I0321 09:54:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:54:13.423085  543705 net.go:698] Add success.
I0321 09:54:13.469047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b85a3117-b276-4c7d-9934-b0e7ccc46522","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:54:13.469080  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 09:54:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:54:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:54:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 09:54:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:54:14.456696  543705 disk_worker.go:494] system disk:vda1
I0321 09:54:14.456740  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:54:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:54:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:54:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:54:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:54:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 09:54:23.409785  543705 memory.go:184] no items to output this cycle
I0321 09:54:28.929674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:54:28.932150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:54:28.932156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024aac0 0xc00024ab00]
E0321 09:54:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:54:33.409807  543705 memory.go:184] no items to output this cycle
I0321 09:54:33.409823  543705 cpu.go:275] no items to output this cycle
I0321 09:54:38.823814  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:54:38.823820  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:54:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:54:43.410715  543705 memory.go:191] Add success.
I0321 09:54:43.409819  543705 cpu.go:282] Add success.
I0321 09:54:43.420470  543705 net.go:648] Add success.
I0321 09:54:43.423084  543705 net.go:770] primary dev: ETH0
I0321 09:54:43.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:54:43.423108  543705 net.go:698] Add success.
I0321 09:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:54:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:54:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:54:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:54:53.409810  543705 memory.go:184] no items to output this cycle
I0321 09:54:53.409825  543705 cpu.go:275] no items to output this cycle
E0321 09:55:03.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:55:03.409884  543705 cpu.go:275] no items to output this cycle
I0321 09:55:03.409894  543705 memory.go:184] no items to output this cycle
E0321 09:55:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:55:13.409823  543705 memory.go:191] Add success.
I0321 09:55:13.409828  543705 cpu.go:282] Add success.
W0321 09:55:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:55:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:55:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:55:13.420126  543705 net.go:648] Add success.
I0321 09:55:13.422870  543705 net.go:770] primary dev: ETH0
I0321 09:55:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:55:13.422895  543705 net.go:698] Add success.
I0321 09:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:55:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:55:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 09:55:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:55:14.456774  543705 disk_worker.go:494] system disk:vda1
I0321 09:55:14.456814  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:55:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:55:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:55:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:55:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:55:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:55:23.409771  543705 memory.go:184] no items to output this cycle
I0321 09:55:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 09:55:28.933678  543705 disk_info.go:125] begin check local disk info of client
I0321 09:55:28.936141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:55:28.936148  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273280 0xc0002732c0]
E0321 09:55:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:55:33.409796  543705 memory.go:184] no items to output this cycle
I0321 09:55:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 09:55:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:55:43.409789  543705 memory.go:191] Add success.
I0321 09:55:43.409813  543705 cpu.go:282] Add success.
I0321 09:55:43.420003  543705 net.go:648] Add success.
I0321 09:55:43.422823  543705 net.go:770] primary dev: ETH0
I0321 09:55:43.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:55:43.422859  543705 net.go:698] Add success.
I0321 09:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:55:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:55:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:55:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:55:53.409775  543705 memory.go:184] no items to output this cycle
I0321 09:55:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 09:56:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:56:03.409773  543705 memory.go:184] no items to output this cycle
I0321 09:56:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 09:56:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:56:13.409795  543705 memory.go:191] Add success.
I0321 09:56:13.409795  543705 cpu.go:282] Add success.
W0321 09:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:56:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:56:13.420221  543705 net.go:648] Add success.
I0321 09:56:13.422983  543705 net.go:770] primary dev: ETH0
I0321 09:56:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:56:13.423009  543705 net.go:698] Add success.
I0321 09:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:56:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:56:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 09:56:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:56:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 09:56:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:56:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:56:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:56:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:56:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:56:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:56:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:56:23.409769  543705 memory.go:184] no items to output this cycle
I0321 09:56:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 09:56:28.937679  543705 disk_info.go:125] begin check local disk info of client
I0321 09:56:28.940137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:56:28.940143  543705 disk_info.go:196] parse disk info done, disk is : [0xc000228d40 0xc000228d80]
E0321 09:56:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:56:33.409795  543705 memory.go:184] no items to output this cycle
I0321 09:56:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 09:56:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:56:43.409789  543705 memory.go:191] Add success.
I0321 09:56:43.409820  543705 cpu.go:282] Add success.
I0321 09:56:43.419981  543705 net.go:648] Add success.
I0321 09:56:43.423032  543705 net.go:770] primary dev: ETH0
I0321 09:56:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:56:43.423058  543705 net.go:698] Add success.
I0321 09:56:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:56:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:56:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:56:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:56:53.409775  543705 memory.go:184] no items to output this cycle
I0321 09:56:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 09:57:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:57:03.409799  543705 memory.go:184] no items to output this cycle
I0321 09:57:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 09:57:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:57:13.409790  543705 memory.go:191] Add success.
I0321 09:57:13.409792  543705 cpu.go:282] Add success.
W0321 09:57:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:57:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:57:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:57:13.420143  543705 net.go:648] Add success.
I0321 09:57:13.429461  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 09:57:13.429548  543705 net.go:770] primary dev: ETH0
I0321 09:57:13.429560  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:57:13.429571  543705 net.go:698] Add success.
I0321 09:57:13.453584  543705 event_worker.go:152] Polling the log file for events...
I0321 09:57:13.469496  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3cb555e6-f678-4aa2-abee-c2813f8d77e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 09:57:13.469528  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 09:57:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:57:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 09:57:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 09:57:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 09:57:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 09:57:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0321 09:57:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 09:57:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 09:57:15.456779  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 09:57:15.456788  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:57:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 09:57:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 09:57:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:57:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:57:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:57:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:57:23.409777  543705 memory.go:184] no items to output this cycle
I0321 09:57:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 09:57:28.941673  543705 disk_info.go:125] begin check local disk info of client
I0321 09:57:28.944136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:57:28.944143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a29c0 0xc0002a2a00]
E0321 09:57:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:57:33.409776  543705 memory.go:184] no items to output this cycle
I0321 09:57:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 09:57:38.825727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 09:57:38.825734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 09:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:57:43.410729  543705 memory.go:191] Add success.
I0321 09:57:43.409823  543705 cpu.go:282] Add success.
I0321 09:57:43.420450  543705 net.go:648] Add success.
I0321 09:57:43.422970  543705 net.go:770] primary dev: ETH0
I0321 09:57:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:57:43.422999  543705 net.go:698] Add success.
I0321 09:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:57:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:57:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:57:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:57:53.409780  543705 memory.go:184] no items to output this cycle
I0321 09:57:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 09:58:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:58:03.409784  543705 memory.go:184] no items to output this cycle
I0321 09:58:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 09:58:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:58:13.409796  543705 memory.go:191] Add success.
I0321 09:58:13.409814  543705 cpu.go:282] Add success.
W0321 09:58:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:58:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:58:13.420155  543705 net.go:648] Add success.
I0321 09:58:13.423745  543705 net.go:770] primary dev: ETH0
I0321 09:58:13.423759  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:58:13.423770  543705 net.go:698] Add success.
I0321 09:58:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:58:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:58:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 09:58:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:58:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 09:58:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:58:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:58:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:58:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:58:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:58:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:58:23.409773  543705 cpu.go:275] no items to output this cycle
I0321 09:58:23.409777  543705 memory.go:184] no items to output this cycle
I0321 09:58:28.945674  543705 disk_info.go:125] begin check local disk info of client
I0321 09:58:28.948414  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:58:28.948420  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352a00 0xc000352a40]
E0321 09:58:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:58:33.409772  543705 memory.go:184] no items to output this cycle
I0321 09:58:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 09:58:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:58:43.409829  543705 memory.go:191] Add success.
I0321 09:58:43.409844  543705 cpu.go:282] Add success.
I0321 09:58:43.420192  543705 net.go:648] Add success.
I0321 09:58:43.422959  543705 net.go:770] primary dev: ETH0
I0321 09:58:43.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:58:43.422986  543705 net.go:698] Add success.
I0321 09:58:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:58:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:58:46.458109  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:58:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:58:53.409801  543705 memory.go:184] no items to output this cycle
I0321 09:58:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 09:59:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:59:03.409785  543705 memory.go:184] no items to output this cycle
I0321 09:59:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 09:59:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:59:13.409821  543705 memory.go:191] Add success.
I0321 09:59:13.409829  543705 cpu.go:282] Add success.
W0321 09:59:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 09:59:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 09:59:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 09:59:13.420119  543705 net.go:648] Add success.
I0321 09:59:13.423107  543705 net.go:770] primary dev: ETH0
I0321 09:59:13.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:59:13.423145  543705 net.go:698] Add success.
I0321 09:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 09:59:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 09:59:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 09:59:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 09:59:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 09:59:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 09:59:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 09:59:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:59:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:59:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 09:59:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 09:59:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:59:23.409762  543705 memory.go:184] no items to output this cycle
I0321 09:59:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 09:59:28.949677  543705 disk_info.go:125] begin check local disk info of client
I0321 09:59:28.952143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 09:59:28.952157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352500 0xc000352540]
E0321 09:59:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:59:33.409797  543705 memory.go:184] no items to output this cycle
I0321 09:59:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 09:59:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:59:43.409841  543705 memory.go:191] Add success.
I0321 09:59:43.409845  543705 cpu.go:282] Add success.
I0321 09:59:43.420236  543705 net.go:648] Add success.
I0321 09:59:43.423138  543705 net.go:770] primary dev: ETH0
I0321 09:59:43.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0321 09:59:43.423185  543705 net.go:698] Add success.
I0321 09:59:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 09:59:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 09:59:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 09:59:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 09:59:53.409785  543705 memory.go:184] no items to output this cycle
I0321 09:59:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 10:00:03.409955  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:00:03.409971  543705 memory.go:184] no items to output this cycle
I0321 10:00:03.409972  543705 cpu.go:275] no items to output this cycle
E0321 10:00:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:00:13.409818  543705 memory.go:191] Add success.
I0321 10:00:13.409833  543705 cpu.go:282] Add success.
W0321 10:00:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:00:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:00:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:00:13.420124  543705 net.go:648] Add success.
I0321 10:00:13.423154  543705 net.go:770] primary dev: ETH0
I0321 10:00:13.423167  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:00:13.423179  543705 net.go:698] Add success.
I0321 10:00:13.704610  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"851300a3-a069-4288-b9c9-32e2c3999f82","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:00:13.704643  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:00:14.454491  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:00:14.454712  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:00:14.454723  543705 disk_worker.go:708] disk space is not compliant
W0321 10:00:14.454726  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:00:14.456159  543705 disk_worker.go:494] system disk:vda1
I0321 10:00:14.456188  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:00:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:00:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:00:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:00:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:00:23.409769  543705 memory.go:184] no items to output this cycle
I0321 10:00:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 10:00:28.953678  543705 disk_info.go:125] begin check local disk info of client
I0321 10:00:28.956161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:00:28.956168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e27c0 0xc0001e2800]
E0321 10:00:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:00:33.409812  543705 memory.go:184] no items to output this cycle
I0321 10:00:33.409823  543705 cpu.go:275] no items to output this cycle
I0321 10:00:38.827832  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:00:38.827839  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:00:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:00:43.411062  543705 memory.go:191] Add success.
I0321 10:00:43.409852  543705 cpu.go:282] Add success.
I0321 10:00:43.419867  543705 net.go:648] Add success.
I0321 10:00:43.422381  543705 net.go:770] primary dev: ETH0
I0321 10:00:43.422394  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:00:43.422407  543705 net.go:698] Add success.
I0321 10:00:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:00:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:00:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:00:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:00:53.409770  543705 memory.go:184] no items to output this cycle
I0321 10:00:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 10:01:03.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:01:03.409889  543705 memory.go:184] no items to output this cycle
I0321 10:01:03.409955  543705 cpu.go:275] no items to output this cycle
E0321 10:01:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:01:13.409812  543705 memory.go:191] Add success.
I0321 10:01:13.409825  543705 cpu.go:282] Add success.
W0321 10:01:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:01:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:01:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:01:13.420219  543705 net.go:648] Add success.
I0321 10:01:13.422871  543705 net.go:770] primary dev: ETH0
I0321 10:01:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:01:13.422900  543705 net.go:698] Add success.
I0321 10:01:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:01:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:01:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 10:01:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:01:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 10:01:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:01:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:01:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:01:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:01:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:01:23.409773  543705 memory.go:184] no items to output this cycle
I0321 10:01:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 10:01:28.957677  543705 disk_info.go:125] begin check local disk info of client
I0321 10:01:28.960146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:01:28.960152  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003823c0 0xc000382400]
E0321 10:01:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:01:33.409793  543705 memory.go:184] no items to output this cycle
I0321 10:01:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 10:01:43.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:01:43.409837  543705 cpu.go:282] Add success.
I0321 10:01:43.409846  543705 memory.go:191] Add success.
I0321 10:01:43.420135  543705 net.go:648] Add success.
I0321 10:01:43.423249  543705 net.go:770] primary dev: ETH0
I0321 10:01:43.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:01:43.423279  543705 net.go:698] Add success.
I0321 10:01:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:01:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:01:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:01:53.409898  543705 memory.go:184] no items to output this cycle
I0321 10:01:53.409915  543705 cpu.go:275] no items to output this cycle
E0321 10:02:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:02:03.409784  543705 memory.go:184] no items to output this cycle
I0321 10:02:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 10:02:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:02:13.409810  543705 memory.go:191] Add success.
I0321 10:02:13.409817  543705 cpu.go:282] Add success.
W0321 10:02:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:02:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:02:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:02:13.420124  543705 net.go:648] Add success.
I0321 10:02:13.422929  543705 net.go:770] primary dev: ETH0
I0321 10:02:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:02:13.422955  543705 net.go:698] Add success.
W0321 10:02:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:02:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 10:02:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:02:14.456772  543705 disk_worker.go:494] system disk:vda1
I0321 10:02:14.456815  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:02:14.457090  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:02:14.457098  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:02:14.457102  543705 custom_config.go:64] query custom config with name: gpu
E0321 10:02:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:02:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:02:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:02:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:02:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:02:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:02:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:02:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:02:23.409805  543705 memory.go:184] no items to output this cycle
I0321 10:02:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 10:02:28.961674  543705 disk_info.go:125] begin check local disk info of client
I0321 10:02:28.964239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:02:28.964246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b48c0 0xc0002b4900]
E0321 10:02:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:02:33.409771  543705 memory.go:184] no items to output this cycle
I0321 10:02:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 10:02:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:02:43.409827  543705 memory.go:191] Add success.
I0321 10:02:43.409828  543705 cpu.go:282] Add success.
I0321 10:02:43.420094  543705 net.go:648] Add success.
I0321 10:02:43.422883  543705 net.go:770] primary dev: ETH0
I0321 10:02:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:02:43.422908  543705 net.go:698] Add success.
I0321 10:02:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:02:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:02:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:02:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:02:53.409769  543705 memory.go:184] no items to output this cycle
I0321 10:02:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 10:03:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:03:03.409802  543705 memory.go:184] no items to output this cycle
I0321 10:03:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 10:03:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:03:13.409776  543705 memory.go:191] Add success.
W0321 10:03:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:03:13.409808  543705 cpu.go:282] Add success.
W0321 10:03:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:03:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:03:13.420292  543705 net.go:648] Add success.
I0321 10:03:13.423241  543705 net.go:770] primary dev: ETH0
I0321 10:03:13.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:03:13.423270  543705 net.go:698] Add success.
I0321 10:03:14.343109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"054b172e-8a41-43c0-8888-90ccbea6b127","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:03:14.343146  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:03:14.454734  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:03:14.454870  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:03:14.454954  543705 disk_worker.go:708] disk space is not compliant
W0321 10:03:14.454957  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:03:14.456532  543705 disk_worker.go:494] system disk:vda1
I0321 10:03:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:03:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:03:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:03:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:03:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:03:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:03:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:03:23.409805  543705 memory.go:184] no items to output this cycle
I0321 10:03:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 10:03:28.965674  543705 disk_info.go:125] begin check local disk info of client
I0321 10:03:28.968187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:03:28.968193  543705 disk_info.go:196] parse disk info done, disk is : [0xc000321640 0xc000321680]
E0321 10:03:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:03:33.409761  543705 memory.go:184] no items to output this cycle
I0321 10:03:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 10:03:38.829735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:03:38.829742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:03:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:03:43.410639  543705 memory.go:191] Add success.
I0321 10:03:43.409812  543705 cpu.go:282] Add success.
I0321 10:03:43.420601  543705 net.go:648] Add success.
I0321 10:03:43.423296  543705 net.go:770] primary dev: ETH0
I0321 10:03:43.423314  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:03:43.423337  543705 net.go:698] Add success.
I0321 10:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:03:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:03:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:03:53.409873  543705 memory.go:184] no items to output this cycle
I0321 10:03:53.409914  543705 cpu.go:275] no items to output this cycle
E0321 10:04:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:04:03.409808  543705 memory.go:184] no items to output this cycle
I0321 10:04:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 10:04:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:04:13.409788  543705 memory.go:191] Add success.
I0321 10:04:13.409789  543705 cpu.go:282] Add success.
W0321 10:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:04:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:04:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:04:13.420114  543705 net.go:648] Add success.
I0321 10:04:13.422816  543705 net.go:770] primary dev: ETH0
I0321 10:04:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:04:13.422843  543705 net.go:698] Add success.
I0321 10:04:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:04:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:04:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 10:04:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:04:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 10:04:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:04:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:04:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:04:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:04:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:04:23.409804  543705 memory.go:184] no items to output this cycle
I0321 10:04:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 10:04:28.969673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:04:28.972205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:04:28.972213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493100 0xc000493140]
E0321 10:04:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:04:33.409763  543705 memory.go:184] no items to output this cycle
I0321 10:04:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 10:04:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:04:43.409816  543705 memory.go:191] Add success.
I0321 10:04:43.409820  543705 cpu.go:282] Add success.
I0321 10:04:43.419941  543705 net.go:648] Add success.
I0321 10:04:43.423552  543705 net.go:770] primary dev: ETH0
I0321 10:04:43.423567  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:04:43.423590  543705 net.go:698] Add success.
I0321 10:04:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:04:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:04:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:04:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:04:53.409785  543705 memory.go:184] no items to output this cycle
I0321 10:04:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 10:05:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:05:03.409783  543705 memory.go:184] no items to output this cycle
I0321 10:05:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 10:05:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:05:13.409770  543705 memory.go:191] Add success.
W0321 10:05:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:05:13.409806  543705 cpu.go:282] Add success.
W0321 10:05:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:05:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:05:13.420096  543705 net.go:648] Add success.
I0321 10:05:13.423272  543705 net.go:770] primary dev: ETH0
I0321 10:05:13.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:05:13.423297  543705 net.go:698] Add success.
I0321 10:05:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:05:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:05:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 10:05:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:05:14.456473  543705 disk_worker.go:494] system disk:vda1
I0321 10:05:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:05:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:05:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:05:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:05:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:05:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:05:23.409782  543705 memory.go:184] no items to output this cycle
I0321 10:05:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 10:05:28.973675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:05:28.976173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:05:28.976179  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1180 0xc0002b11c0]
E0321 10:05:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:05:33.409795  543705 memory.go:184] no items to output this cycle
I0321 10:05:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 10:05:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:05:43.409788  543705 memory.go:191] Add success.
I0321 10:05:43.409812  543705 cpu.go:282] Add success.
I0321 10:05:43.419883  543705 net.go:648] Add success.
I0321 10:05:43.422733  543705 net.go:770] primary dev: ETH0
I0321 10:05:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:05:43.422758  543705 net.go:698] Add success.
I0321 10:05:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:05:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:05:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:05:53.410479  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:05:53.410503  543705 memory.go:184] no items to output this cycle
I0321 10:05:53.410520  543705 cpu.go:275] no items to output this cycle
E0321 10:06:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:06:03.409791  543705 memory.go:184] no items to output this cycle
I0321 10:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 10:06:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:06:13.409778  543705 memory.go:191] Add success.
W0321 10:06:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:06:13.409813  543705 cpu.go:282] Add success.
W0321 10:06:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:06:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:06:13.420131  543705 net.go:648] Add success.
I0321 10:06:13.423158  543705 net.go:770] primary dev: ETH0
I0321 10:06:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:06:13.423184  543705 net.go:698] Add success.
I0321 10:06:13.469094  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4de06e27-0f42-4145-8a62-22d1e186dff5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:06:13.469127  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:06:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:06:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:06:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0321 10:06:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:06:14.456800  543705 disk_worker.go:494] system disk:vda1
I0321 10:06:14.456837  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:06:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:06:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:06:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:06:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:06:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:06:23.409781  543705 memory.go:184] no items to output this cycle
I0321 10:06:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 10:06:28.977675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:06:28.980182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:06:28.980188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0bc0 0xc0002b0c00]
E0321 10:06:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:06:33.409814  543705 memory.go:184] no items to output this cycle
I0321 10:06:33.409831  543705 cpu.go:275] no items to output this cycle
I0321 10:06:38.829882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:06:38.829890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:06:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:06:43.410685  543705 memory.go:191] Add success.
I0321 10:06:43.409808  543705 cpu.go:282] Add success.
I0321 10:06:43.420390  543705 net.go:648] Add success.
I0321 10:06:43.423330  543705 net.go:770] primary dev: ETH0
I0321 10:06:43.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:06:43.423370  543705 net.go:698] Add success.
I0321 10:06:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:06:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:06:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:06:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:06:53.409783  543705 memory.go:184] no items to output this cycle
I0321 10:06:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 10:07:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:07:03.409760  543705 memory.go:184] no items to output this cycle
I0321 10:07:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 10:07:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:07:13.409825  543705 memory.go:191] Add success.
I0321 10:07:13.409833  543705 cpu.go:282] Add success.
W0321 10:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:07:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:07:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:07:13.420154  543705 net.go:648] Add success.
I0321 10:07:13.422767  543705 net.go:770] primary dev: ETH0
I0321 10:07:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:07:13.422806  543705 net.go:698] Add success.
I0321 10:07:13.453347  543705 event_worker.go:152] Polling the log file for events...
W0321 10:07:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:07:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 10:07:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:07:14.455944  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:07:14.455952  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:07:14.455958  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:07:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 10:07:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:07:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:07:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:07:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:07:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:07:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:07:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:07:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:07:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:07:23.409784  543705 memory.go:184] no items to output this cycle
I0321 10:07:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 10:07:28.981672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:07:28.984229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:07:28.984235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000346e40 0xc000346e80]
E0321 10:07:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:07:33.409759  543705 memory.go:184] no items to output this cycle
I0321 10:07:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 10:07:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:07:43.409807  543705 memory.go:191] Add success.
I0321 10:07:43.409829  543705 cpu.go:282] Add success.
I0321 10:07:43.420050  543705 net.go:648] Add success.
I0321 10:07:43.423073  543705 net.go:770] primary dev: ETH0
I0321 10:07:43.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:07:43.423096  543705 net.go:698] Add success.
I0321 10:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:07:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:07:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:07:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:07:53.409809  543705 memory.go:184] no items to output this cycle
I0321 10:07:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 10:08:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:08:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 10:08:03.409790  543705 memory.go:184] no items to output this cycle
E0321 10:08:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:08:13.409814  543705 memory.go:191] Add success.
I0321 10:08:13.409826  543705 cpu.go:282] Add success.
W0321 10:08:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:08:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:08:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:08:13.420159  543705 net.go:648] Add success.
I0321 10:08:13.422904  543705 net.go:770] primary dev: ETH0
I0321 10:08:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:08:13.422929  543705 net.go:698] Add success.
I0321 10:08:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:08:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:08:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 10:08:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:08:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 10:08:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:08:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:08:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:08:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:08:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:08:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:08:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:08:23.409790  543705 memory.go:184] no items to output this cycle
I0321 10:08:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 10:08:28.985673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:08:28.988257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:08:28.988263  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035fb00 0xc00035fb40]
E0321 10:08:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:08:33.409794  543705 memory.go:184] no items to output this cycle
I0321 10:08:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 10:08:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:08:43.409830  543705 memory.go:191] Add success.
I0321 10:08:43.409833  543705 cpu.go:282] Add success.
I0321 10:08:43.419868  543705 net.go:648] Add success.
I0321 10:08:43.422819  543705 net.go:770] primary dev: ETH0
I0321 10:08:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:08:43.422854  543705 net.go:698] Add success.
I0321 10:08:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:08:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:08:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:08:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:08:53.409781  543705 memory.go:184] no items to output this cycle
I0321 10:08:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 10:09:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:09:03.409803  543705 memory.go:184] no items to output this cycle
I0321 10:09:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 10:09:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:09:13.409790  543705 memory.go:191] Add success.
W0321 10:09:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:09:13.409826  543705 cpu.go:282] Add success.
W0321 10:09:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:09:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:09:13.420252  543705 net.go:648] Add success.
I0321 10:09:13.423500  543705 net.go:770] primary dev: ETH0
I0321 10:09:13.423514  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:09:13.423525  543705 net.go:698] Add success.
I0321 10:09:13.469469  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"485c5834-e737-4cfa-ab85-f9a29a91c446","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:09:13.469504  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:09:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:09:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:09:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 10:09:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:09:14.456715  543705 disk_worker.go:494] system disk:vda1
I0321 10:09:14.456744  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:09:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:09:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:09:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:09:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:09:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:09:23.409777  543705 memory.go:184] no items to output this cycle
I0321 10:09:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 10:09:28.989676  543705 disk_info.go:125] begin check local disk info of client
I0321 10:09:28.992174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:09:28.992180  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024e380 0xc00024e3c0]
E0321 10:09:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:09:33.409799  543705 memory.go:184] no items to output this cycle
I0321 10:09:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 10:09:38.830030  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:09:38.830036  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:09:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:09:43.410547  543705 memory.go:191] Add success.
I0321 10:09:43.409818  543705 cpu.go:282] Add success.
I0321 10:09:43.420311  543705 net.go:648] Add success.
I0321 10:09:43.422852  543705 net.go:770] primary dev: ETH0
I0321 10:09:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:09:43.422877  543705 net.go:698] Add success.
I0321 10:09:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:09:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:09:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:09:53.410258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:09:53.410268  543705 cpu.go:275] no items to output this cycle
I0321 10:09:53.410275  543705 memory.go:184] no items to output this cycle
E0321 10:10:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:10:03.409776  543705 memory.go:184] no items to output this cycle
I0321 10:10:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 10:10:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:10:13.409800  543705 memory.go:191] Add success.
I0321 10:10:13.409803  543705 cpu.go:282] Add success.
W0321 10:10:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:10:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:10:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:10:13.420047  543705 net.go:648] Add success.
I0321 10:10:13.422577  543705 net.go:770] primary dev: ETH0
I0321 10:10:13.422590  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:10:13.422602  543705 net.go:698] Add success.
I0321 10:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:10:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:10:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 10:10:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:10:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 10:10:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:10:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:10:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:10:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:10:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:10:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:10:23.409768  543705 memory.go:184] no items to output this cycle
I0321 10:10:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 10:10:28.993675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:10:28.996178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:10:28.996184  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ebc0 0xc00035ec00]
E0321 10:10:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:10:33.409792  543705 memory.go:184] no items to output this cycle
I0321 10:10:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 10:10:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:10:43.409783  543705 memory.go:191] Add success.
I0321 10:10:43.409807  543705 cpu.go:282] Add success.
I0321 10:10:43.420048  543705 net.go:648] Add success.
I0321 10:10:43.422958  543705 net.go:770] primary dev: ETH0
I0321 10:10:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:10:43.422982  543705 net.go:698] Add success.
I0321 10:10:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:10:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:10:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:10:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:10:53.409784  543705 memory.go:184] no items to output this cycle
I0321 10:10:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 10:11:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:11:03.409771  543705 memory.go:184] no items to output this cycle
I0321 10:11:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 10:11:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:11:13.409825  543705 memory.go:191] Add success.
I0321 10:11:13.409830  543705 cpu.go:282] Add success.
W0321 10:11:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:11:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:11:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:11:13.420177  543705 net.go:648] Add success.
I0321 10:11:13.423214  543705 net.go:770] primary dev: ETH0
I0321 10:11:13.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:11:13.423239  543705 net.go:698] Add success.
I0321 10:11:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:11:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:11:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 10:11:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:11:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 10:11:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:11:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:11:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:11:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:11:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:11:23.409797  543705 memory.go:184] no items to output this cycle
I0321 10:11:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 10:11:28.997675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:11:29.000146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:11:29.000152  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e100 0xc00035e140]
E0321 10:11:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:11:33.409784  543705 memory.go:184] no items to output this cycle
I0321 10:11:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 10:11:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:11:43.409815  543705 memory.go:191] Add success.
I0321 10:11:43.409826  543705 cpu.go:282] Add success.
I0321 10:11:43.419947  543705 net.go:648] Add success.
I0321 10:11:43.422743  543705 net.go:770] primary dev: ETH0
I0321 10:11:43.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:11:43.422774  543705 net.go:698] Add success.
I0321 10:11:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:11:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:11:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:11:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:11:53.409801  543705 memory.go:184] no items to output this cycle
I0321 10:11:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 10:12:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:12:03.409804  543705 memory.go:184] no items to output this cycle
I0321 10:12:03.409822  543705 cpu.go:275] no items to output this cycle
E0321 10:12:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:12:13.409800  543705 memory.go:191] Add success.
I0321 10:12:13.409818  543705 cpu.go:282] Add success.
W0321 10:12:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:12:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:12:13.420104  543705 net.go:648] Add success.
I0321 10:12:13.422830  543705 net.go:770] primary dev: ETH0
I0321 10:12:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:12:13.422855  543705 net.go:698] Add success.
I0321 10:12:13.464694  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2d9c9f45-0478-4164-ba7c-9e712e77ce0c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:12:13.464740  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 10:12:14.454725  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:12:14.454735  543705 disk_worker.go:708] disk space is not compliant
W0321 10:12:14.454737  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:12:14.456419  543705 disk_worker.go:494] system disk:vda1
E0321 10:12:14.456419  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:12:14.456427  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:12:14.456432  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:12:14.456464  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:12:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:12:15.456884  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:12:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:12:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:12:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:12:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:12:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:12:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:12:23.409778  543705 memory.go:184] no items to output this cycle
I0321 10:12:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 10:12:29.001676  543705 disk_info.go:125] begin check local disk info of client
I0321 10:12:29.004150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:12:29.004155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5040 0xc0000c5080]
E0321 10:12:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:12:33.409808  543705 memory.go:184] no items to output this cycle
I0321 10:12:33.409825  543705 cpu.go:275] no items to output this cycle
I0321 10:12:38.831850  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:12:38.831857  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:12:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:12:43.410817  543705 memory.go:191] Add success.
I0321 10:12:43.409806  543705 cpu.go:282] Add success.
I0321 10:12:43.420602  543705 net.go:648] Add success.
I0321 10:12:43.423169  543705 net.go:770] primary dev: ETH0
I0321 10:12:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:12:43.423194  543705 net.go:698] Add success.
I0321 10:12:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:12:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:12:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:12:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:12:53.409779  543705 memory.go:184] no items to output this cycle
I0321 10:12:53.409944  543705 cpu.go:275] no items to output this cycle
E0321 10:13:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:13:03.409767  543705 memory.go:184] no items to output this cycle
I0321 10:13:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 10:13:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:13:13.409806  543705 memory.go:191] Add success.
I0321 10:13:13.409805  543705 cpu.go:282] Add success.
W0321 10:13:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:13:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:13:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:13:13.420062  543705 net.go:648] Add success.
I0321 10:13:13.422847  543705 net.go:770] primary dev: ETH0
I0321 10:13:13.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:13:13.422876  543705 net.go:698] Add success.
I0321 10:13:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:13:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:13:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 10:13:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:13:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 10:13:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:13:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:13:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:13:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:13:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:13:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:13:23.409775  543705 memory.go:184] no items to output this cycle
I0321 10:13:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 10:13:29.005674  543705 disk_info.go:125] begin check local disk info of client
I0321 10:13:29.008118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:13:29.008124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0321 10:13:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:13:33.409800  543705 memory.go:184] no items to output this cycle
I0321 10:13:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 10:13:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:13:43.409785  543705 memory.go:191] Add success.
I0321 10:13:43.409788  543705 cpu.go:282] Add success.
I0321 10:13:43.419864  543705 net.go:648] Add success.
I0321 10:13:43.422560  543705 net.go:770] primary dev: ETH0
I0321 10:13:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:13:43.422584  543705 net.go:698] Add success.
I0321 10:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:13:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:13:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:13:53.409785  543705 memory.go:184] no items to output this cycle
I0321 10:13:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 10:14:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:14:03.409782  543705 memory.go:184] no items to output this cycle
I0321 10:14:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 10:14:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:14:13.409804  543705 memory.go:191] Add success.
I0321 10:14:13.409806  543705 cpu.go:282] Add success.
W0321 10:14:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:14:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:14:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:14:13.420200  543705 net.go:648] Add success.
I0321 10:14:13.422741  543705 net.go:770] primary dev: ETH0
I0321 10:14:13.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:14:13.422770  543705 net.go:698] Add success.
I0321 10:14:14.454046  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:14:14.454190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:14:14.454268  543705 disk_worker.go:708] disk space is not compliant
W0321 10:14:14.454271  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:14:14.455637  543705 disk_worker.go:494] system disk:vda1
I0321 10:14:14.455668  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:14:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:14:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:14:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:14:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:14:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 10:14:23.409788  543705 memory.go:184] no items to output this cycle
I0321 10:14:29.009672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:14:29.012214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:14:29.012222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
E0321 10:14:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:14:33.409766  543705 memory.go:184] no items to output this cycle
I0321 10:14:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 10:14:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:14:43.409792  543705 memory.go:191] Add success.
I0321 10:14:43.409792  543705 cpu.go:282] Add success.
I0321 10:14:43.419988  543705 net.go:648] Add success.
I0321 10:14:43.422999  543705 net.go:770] primary dev: ETH0
I0321 10:14:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:14:43.423049  543705 net.go:698] Add success.
I0321 10:14:46.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:14:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:14:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:14:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:14:53.409786  543705 memory.go:184] no items to output this cycle
I0321 10:14:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 10:15:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:15:03.409804  543705 memory.go:184] no items to output this cycle
I0321 10:15:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 10:15:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:15:13.409787  543705 memory.go:191] Add success.
I0321 10:15:13.409805  543705 cpu.go:282] Add success.
W0321 10:15:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:15:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:15:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:15:13.420155  543705 net.go:648] Add success.
I0321 10:15:13.423000  543705 net.go:770] primary dev: ETH0
I0321 10:15:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:15:13.423024  543705 net.go:698] Add success.
I0321 10:15:13.470280  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e60a369-362b-43c7-af92-b62a120b22ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:15:13.470312  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:15:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:15:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:15:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0321 10:15:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:15:14.456767  543705 disk_worker.go:494] system disk:vda1
I0321 10:15:14.456796  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:15:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:15:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:15:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:15:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:15:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:15:23.409765  543705 memory.go:184] no items to output this cycle
I0321 10:15:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 10:15:29.013674  543705 disk_info.go:125] begin check local disk info of client
I0321 10:15:29.016183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:15:29.016189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007adc0 0xc00007ae00]
E0321 10:15:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:15:33.409773  543705 memory.go:184] no items to output this cycle
I0321 10:15:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 10:15:38.833734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:15:38.833740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:15:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:15:43.410734  543705 memory.go:191] Add success.
I0321 10:15:43.409804  543705 cpu.go:282] Add success.
I0321 10:15:43.420431  543705 net.go:648] Add success.
I0321 10:15:43.423692  543705 net.go:770] primary dev: ETH0
I0321 10:15:43.423707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:15:43.423720  543705 net.go:698] Add success.
I0321 10:15:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:15:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:15:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:15:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:15:53.409774  543705 memory.go:184] no items to output this cycle
I0321 10:15:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 10:16:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:16:03.409795  543705 memory.go:184] no items to output this cycle
I0321 10:16:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 10:16:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:16:13.409781  543705 memory.go:191] Add success.
W0321 10:16:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:16:13.409814  543705 cpu.go:282] Add success.
W0321 10:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:16:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:16:13.420587  543705 net.go:648] Add success.
I0321 10:16:13.423389  543705 net.go:770] primary dev: ETH0
I0321 10:16:13.423405  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:16:13.423418  543705 net.go:698] Add success.
I0321 10:16:14.453950  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:16:14.455234  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:16:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0321 10:16:14.455248  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:16:14.456622  543705 disk_worker.go:494] system disk:vda1
I0321 10:16:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:16:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:16:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:16:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:16:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:16:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:16:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:16:23.409772  543705 memory.go:184] no items to output this cycle
I0321 10:16:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 10:16:29.017675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:16:29.020158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:16:29.020164  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353380 0xc0003533c0]
E0321 10:16:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:16:33.409794  543705 memory.go:184] no items to output this cycle
I0321 10:16:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 10:16:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:16:43.409817  543705 memory.go:191] Add success.
I0321 10:16:43.409820  543705 cpu.go:282] Add success.
I0321 10:16:43.419947  543705 net.go:648] Add success.
I0321 10:16:43.423006  543705 net.go:770] primary dev: ETH0
I0321 10:16:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:16:43.423031  543705 net.go:698] Add success.
I0321 10:16:46.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:16:46.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:16:46.458137  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:16:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:16:53.410378  543705 memory.go:184] no items to output this cycle
I0321 10:16:53.410392  543705 cpu.go:275] no items to output this cycle
E0321 10:17:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:17:03.409767  543705 memory.go:184] no items to output this cycle
I0321 10:17:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 10:17:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:17:13.409797  543705 memory.go:191] Add success.
I0321 10:17:13.409800  543705 cpu.go:282] Add success.
W0321 10:17:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:17:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:17:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:17:13.420072  543705 net.go:648] Add success.
I0321 10:17:13.423026  543705 net.go:770] primary dev: ETH0
I0321 10:17:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:17:13.423054  543705 net.go:698] Add success.
I0321 10:17:13.452773  543705 event_worker.go:152] Polling the log file for events...
W0321 10:17:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:17:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 10:17:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:17:14.455888  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:17:14.455897  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:17:14.455903  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:17:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 10:17:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:17:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:17:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:17:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:17:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:17:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:17:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:17:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:17:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:17:23.409773  543705 memory.go:184] no items to output this cycle
I0321 10:17:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 10:17:29.021675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:17:29.024098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:17:29.024106  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d8c0 0xc00037d900]
E0321 10:17:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:17:33.409798  543705 memory.go:184] no items to output this cycle
I0321 10:17:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 10:17:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:17:43.409791  543705 memory.go:191] Add success.
I0321 10:17:43.409795  543705 cpu.go:282] Add success.
I0321 10:17:43.419860  543705 net.go:648] Add success.
I0321 10:17:43.422480  543705 net.go:770] primary dev: ETH0
I0321 10:17:43.422494  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:17:43.422507  543705 net.go:698] Add success.
I0321 10:17:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:17:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:17:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:17:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:17:53.409777  543705 memory.go:184] no items to output this cycle
I0321 10:17:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 10:18:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:18:03.409812  543705 memory.go:184] no items to output this cycle
I0321 10:18:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 10:18:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:18:13.409786  543705 memory.go:191] Add success.
W0321 10:18:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:18:13.409812  543705 cpu.go:282] Add success.
W0321 10:18:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:18:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:18:13.420095  543705 net.go:648] Add success.
I0321 10:18:13.422937  543705 net.go:770] primary dev: ETH0
I0321 10:18:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:18:13.422962  543705 net.go:698] Add success.
I0321 10:18:13.464291  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fa678f7-d7b4-4564-9adb-f6c1ac0e2fa9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:18:13.464321  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:18:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:18:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 10:18:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:18:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 10:18:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:18:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:18:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:18:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:18:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:18:16.472595  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:18:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:18:23.409803  543705 memory.go:184] no items to output this cycle
I0321 10:18:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 10:18:29.025681  543705 disk_info.go:125] begin check local disk info of client
I0321 10:18:29.028145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:18:29.028151  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353600 0xc000353640]
E0321 10:18:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:18:33.409804  543705 memory.go:184] no items to output this cycle
I0321 10:18:33.409818  543705 cpu.go:275] no items to output this cycle
I0321 10:18:38.833877  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:18:38.833883  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:18:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:18:43.410855  543705 memory.go:191] Add success.
I0321 10:18:43.409823  543705 cpu.go:282] Add success.
I0321 10:18:43.420575  543705 net.go:648] Add success.
I0321 10:18:43.423343  543705 net.go:770] primary dev: ETH0
I0321 10:18:43.423357  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:18:43.423370  543705 net.go:698] Add success.
I0321 10:18:46.457680  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:18:46.457742  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:18:46.457767  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:18:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:18:53.409788  543705 memory.go:184] no items to output this cycle
I0321 10:18:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 10:19:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:19:03.409796  543705 memory.go:184] no items to output this cycle
I0321 10:19:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 10:19:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:19:13.409802  543705 memory.go:191] Add success.
I0321 10:19:13.409818  543705 cpu.go:282] Add success.
W0321 10:19:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:19:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:19:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:19:13.420143  543705 net.go:648] Add success.
I0321 10:19:13.423349  543705 net.go:770] primary dev: ETH0
I0321 10:19:13.423363  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:19:13.423374  543705 net.go:698] Add success.
I0321 10:19:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:19:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:19:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 10:19:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:19:14.456472  543705 disk_worker.go:494] system disk:vda1
I0321 10:19:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:19:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:19:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:19:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:19:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:19:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:19:23.409809  543705 memory.go:184] no items to output this cycle
I0321 10:19:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 10:19:29.029672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:19:29.032152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:19:29.032159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
E0321 10:19:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:19:33.409803  543705 memory.go:184] no items to output this cycle
I0321 10:19:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 10:19:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:19:43.409784  543705 memory.go:191] Add success.
I0321 10:19:43.409821  543705 cpu.go:282] Add success.
I0321 10:19:43.419855  543705 net.go:648] Add success.
I0321 10:19:43.422617  543705 net.go:770] primary dev: ETH0
I0321 10:19:43.422629  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:19:43.422642  543705 net.go:698] Add success.
I0321 10:19:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:19:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:19:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:19:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:19:53.409813  543705 memory.go:184] no items to output this cycle
I0321 10:19:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 10:20:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:20:03.409803  543705 memory.go:184] no items to output this cycle
I0321 10:20:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 10:20:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:20:13.409830  543705 memory.go:191] Add success.
I0321 10:20:13.409830  543705 cpu.go:282] Add success.
W0321 10:20:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:20:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:20:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:20:13.420163  543705 net.go:648] Add success.
I0321 10:20:13.422917  543705 net.go:770] primary dev: ETH0
I0321 10:20:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:20:13.422944  543705 net.go:698] Add success.
I0321 10:20:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:20:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:20:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 10:20:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:20:14.456483  543705 disk_worker.go:494] system disk:vda1
I0321 10:20:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:20:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:20:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:20:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:20:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:20:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:20:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:20:23.409917  543705 memory.go:184] no items to output this cycle
I0321 10:20:23.409921  543705 cpu.go:275] no items to output this cycle
I0321 10:20:29.033676  543705 disk_info.go:125] begin check local disk info of client
I0321 10:20:29.036149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:20:29.036155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0800 0xc0003b0840]
E0321 10:20:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:20:33.409789  543705 memory.go:184] no items to output this cycle
I0321 10:20:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 10:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:20:43.409789  543705 memory.go:191] Add success.
I0321 10:20:43.409789  543705 cpu.go:282] Add success.
I0321 10:20:43.420077  543705 net.go:648] Add success.
I0321 10:20:43.423195  543705 net.go:770] primary dev: ETH0
I0321 10:20:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:20:43.423224  543705 net.go:698] Add success.
I0321 10:20:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:20:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:20:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:20:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:20:53.409779  543705 memory.go:184] no items to output this cycle
I0321 10:20:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 10:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:21:03.409799  543705 memory.go:184] no items to output this cycle
I0321 10:21:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 10:21:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:21:13.409786  543705 memory.go:191] Add success.
W0321 10:21:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:21:13.409812  543705 cpu.go:282] Add success.
W0321 10:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:21:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:21:13.420139  543705 net.go:648] Add success.
I0321 10:21:13.424013  543705 net.go:770] primary dev: ETH0
I0321 10:21:13.424028  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:21:13.424041  543705 net.go:698] Add success.
I0321 10:21:13.464452  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5cfacd1e-8d7a-4c6d-97c4-8e8bf185a2dc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:21:13.464494  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:21:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:21:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:21:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 10:21:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:21:14.456537  543705 disk_worker.go:494] system disk:vda1
I0321 10:21:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:21:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:21:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:21:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:21:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:21:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:21:23.409822  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:21:23.409840  543705 memory.go:184] no items to output this cycle
I0321 10:21:23.409925  543705 cpu.go:275] no items to output this cycle
I0321 10:21:29.037675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:21:29.040177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:21:29.040184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bbac0 0xc0002bbb00]
E0321 10:21:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:21:33.409789  543705 memory.go:184] no items to output this cycle
I0321 10:21:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 10:21:38.834028  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:21:38.834034  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:21:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:21:43.410661  543705 memory.go:191] Add success.
I0321 10:21:43.409797  543705 cpu.go:282] Add success.
I0321 10:21:43.420408  543705 net.go:648] Add success.
I0321 10:21:43.423109  543705 net.go:770] primary dev: ETH0
I0321 10:21:43.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:21:43.423134  543705 net.go:698] Add success.
I0321 10:21:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:21:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:21:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:21:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:21:53.409801  543705 memory.go:184] no items to output this cycle
I0321 10:21:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 10:22:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:22:03.409788  543705 memory.go:184] no items to output this cycle
I0321 10:22:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 10:22:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:22:13.409795  543705 memory.go:191] Add success.
I0321 10:22:13.409800  543705 cpu.go:282] Add success.
W0321 10:22:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:22:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:22:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:22:13.420051  543705 net.go:648] Add success.
I0321 10:22:13.422825  543705 net.go:770] primary dev: ETH0
I0321 10:22:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:22:13.422849  543705 net.go:698] Add success.
W0321 10:22:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:22:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 10:22:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:22:14.456793  543705 disk_worker.go:494] system disk:vda1
I0321 10:22:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:22:14.457105  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:22:14.457113  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:22:14.457118  543705 custom_config.go:64] query custom config with name: gpu
E0321 10:22:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:22:15.456792  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:22:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:22:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:22:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:22:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:22:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:22:23.410419  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:22:23.410434  543705 memory.go:184] no items to output this cycle
I0321 10:22:23.410465  543705 cpu.go:275] no items to output this cycle
I0321 10:22:29.041675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:22:29.044224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:22:29.044231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6000 0xc0004a6040]
E0321 10:22:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:22:33.409776  543705 memory.go:184] no items to output this cycle
I0321 10:22:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 10:22:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:22:43.409803  543705 memory.go:191] Add success.
I0321 10:22:43.409812  543705 cpu.go:282] Add success.
I0321 10:22:43.419983  543705 net.go:648] Add success.
I0321 10:22:43.422977  543705 net.go:770] primary dev: ETH0
I0321 10:22:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:22:43.423002  543705 net.go:698] Add success.
I0321 10:22:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:22:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:22:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:22:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:22:53.409784  543705 memory.go:184] no items to output this cycle
I0321 10:22:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 10:23:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:23:03.409767  543705 memory.go:184] no items to output this cycle
I0321 10:23:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 10:23:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:23:13.409777  543705 memory.go:191] Add success.
W0321 10:23:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:23:13.409802  543705 cpu.go:282] Add success.
W0321 10:23:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:23:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:23:13.420183  543705 net.go:648] Add success.
I0321 10:23:13.422766  543705 net.go:770] primary dev: ETH0
I0321 10:23:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:23:13.422790  543705 net.go:698] Add success.
I0321 10:23:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:23:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:23:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 10:23:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:23:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 10:23:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:23:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:23:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:23:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:23:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:23:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:23:23.409775  543705 memory.go:184] no items to output this cycle
I0321 10:23:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 10:23:29.045668  543705 disk_info.go:125] begin check local disk info of client
I0321 10:23:29.048137  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:23:29.048143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bb6c0 0xc0004bb700]
E0321 10:23:33.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:23:33.409865  543705 memory.go:184] no items to output this cycle
I0321 10:23:33.409936  543705 cpu.go:275] no items to output this cycle
E0321 10:23:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:23:43.409778  543705 memory.go:191] Add success.
I0321 10:23:43.409814  543705 cpu.go:282] Add success.
I0321 10:23:43.419864  543705 net.go:648] Add success.
I0321 10:23:43.422620  543705 net.go:770] primary dev: ETH0
I0321 10:23:43.422633  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:23:43.422645  543705 net.go:698] Add success.
I0321 10:23:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:23:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:23:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:23:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:23:53.409768  543705 memory.go:184] no items to output this cycle
I0321 10:23:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 10:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:24:03.409776  543705 memory.go:184] no items to output this cycle
I0321 10:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 10:24:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:24:13.409780  543705 memory.go:191] Add success.
I0321 10:24:13.409798  543705 cpu.go:282] Add success.
W0321 10:24:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:24:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:24:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:24:13.420103  543705 net.go:648] Add success.
I0321 10:24:13.422762  543705 net.go:770] primary dev: ETH0
I0321 10:24:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:24:13.422790  543705 net.go:698] Add success.
I0321 10:24:13.488566  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a210c289-a872-4c89-950f-141559d6f745","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:24:13.488603  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:24:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:24:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:24:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 10:24:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:24:14.456610  543705 disk_worker.go:494] system disk:vda1
I0321 10:24:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:24:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:24:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:24:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:24:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:24:16.472508  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:24:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:24:23.409794  543705 memory.go:184] no items to output this cycle
I0321 10:24:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 10:24:29.049678  543705 disk_info.go:125] begin check local disk info of client
I0321 10:24:29.052194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:24:29.052203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e3f40 0xc0004ea000]
E0321 10:24:33.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:24:33.409909  543705 memory.go:184] no items to output this cycle
I0321 10:24:33.409999  543705 cpu.go:275] no items to output this cycle
I0321 10:24:38.835870  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:24:38.835876  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:24:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:24:43.410620  543705 memory.go:191] Add success.
I0321 10:24:43.409792  543705 cpu.go:282] Add success.
I0321 10:24:43.420409  543705 net.go:648] Add success.
I0321 10:24:43.422939  543705 net.go:770] primary dev: ETH0
I0321 10:24:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:24:43.422964  543705 net.go:698] Add success.
I0321 10:24:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:24:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:24:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:24:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:24:53.409810  543705 memory.go:184] no items to output this cycle
I0321 10:24:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 10:25:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:25:03.409786  543705 memory.go:184] no items to output this cycle
I0321 10:25:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 10:25:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:25:13.409805  543705 memory.go:191] Add success.
I0321 10:25:13.409815  543705 cpu.go:282] Add success.
W0321 10:25:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:25:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:25:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:25:13.420144  543705 net.go:648] Add success.
I0321 10:25:13.423207  543705 net.go:770] primary dev: ETH0
I0321 10:25:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:25:13.423253  543705 net.go:698] Add success.
I0321 10:25:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:25:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:25:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 10:25:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:25:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 10:25:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:25:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:25:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:25:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:25:23.409774  543705 memory.go:184] no items to output this cycle
I0321 10:25:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 10:25:29.053672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:25:29.056229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:25:29.056235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515700 0xc000515740]
E0321 10:25:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:25:33.409777  543705 memory.go:184] no items to output this cycle
I0321 10:25:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 10:25:43.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:25:43.409939  543705 memory.go:191] Add success.
I0321 10:25:43.410161  543705 cpu.go:282] Add success.
I0321 10:25:43.419709  543705 net.go:648] Add success.
I0321 10:25:43.422567  543705 net.go:770] primary dev: ETH0
I0321 10:25:43.422580  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:25:43.422591  543705 net.go:698] Add success.
I0321 10:25:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:25:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:25:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:25:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:25:53.409783  543705 memory.go:184] no items to output this cycle
I0321 10:25:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 10:26:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:26:03.409786  543705 memory.go:184] no items to output this cycle
I0321 10:26:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 10:26:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:26:13.409789  543705 memory.go:191] Add success.
I0321 10:26:13.409790  543705 cpu.go:282] Add success.
W0321 10:26:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:26:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:26:13.420149  543705 net.go:648] Add success.
I0321 10:26:13.422897  543705 net.go:770] primary dev: ETH0
I0321 10:26:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:26:13.422926  543705 net.go:698] Add success.
I0321 10:26:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:26:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:26:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 10:26:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:26:14.456543  543705 disk_worker.go:494] system disk:vda1
I0321 10:26:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:26:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:26:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:26:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:26:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:26:23.409768  543705 memory.go:184] no items to output this cycle
I0321 10:26:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 10:26:29.057675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:26:29.060123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:26:29.060131  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289940 0xc000289980]
E0321 10:26:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:26:33.409776  543705 memory.go:184] no items to output this cycle
I0321 10:26:33.409777  543705 cpu.go:275] no items to output this cycle
E0321 10:26:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:26:43.409784  543705 memory.go:191] Add success.
I0321 10:26:43.409785  543705 cpu.go:282] Add success.
I0321 10:26:43.420002  543705 net.go:648] Add success.
I0321 10:26:43.422814  543705 net.go:770] primary dev: ETH0
I0321 10:26:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:26:43.422839  543705 net.go:698] Add success.
I0321 10:26:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:26:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:26:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:26:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:26:53.409780  543705 memory.go:184] no items to output this cycle
I0321 10:26:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 10:27:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:27:03.409773  543705 memory.go:184] no items to output this cycle
I0321 10:27:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 10:27:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:27:13.409773  543705 memory.go:191] Add success.
W0321 10:27:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:27:13.409799  543705 cpu.go:282] Add success.
W0321 10:27:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:27:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:27:13.420089  543705 net.go:648] Add success.
I0321 10:27:13.429054  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 10:27:13.429128  543705 net.go:770] primary dev: ETH0
I0321 10:27:13.429142  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:27:13.429157  543705 net.go:698] Add success.
I0321 10:27:13.453679  543705 event_worker.go:152] Polling the log file for events...
I0321 10:27:13.469336  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f61335ca-1121-409b-aeef-5f23e8628916","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:27:13.469379  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 10:27:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:27:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 10:27:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:27:14.456866  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:27:14.456875  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:27:14.456881  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:27:14.456905  543705 disk_worker.go:494] system disk:vda1
I0321 10:27:14.456933  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:27:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:27:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:27:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:27:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:27:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:27:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:27:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:27:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:27:23.409775  543705 memory.go:184] no items to output this cycle
I0321 10:27:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 10:27:29.061674  543705 disk_info.go:125] begin check local disk info of client
I0321 10:27:29.064098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:27:29.064104  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515340 0xc000515380]
E0321 10:27:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:27:33.409796  543705 memory.go:184] no items to output this cycle
I0321 10:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 10:27:38.837745  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:27:38.837751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:27:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:27:43.410530  543705 memory.go:191] Add success.
I0321 10:27:43.409822  543705 cpu.go:282] Add success.
I0321 10:27:43.420236  543705 net.go:648] Add success.
I0321 10:27:43.422900  543705 net.go:770] primary dev: ETH0
I0321 10:27:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:27:43.422924  543705 net.go:698] Add success.
I0321 10:27:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:27:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:27:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:27:53.409782  543705 memory.go:184] no items to output this cycle
I0321 10:27:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 10:28:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:28:03.409818  543705 memory.go:184] no items to output this cycle
I0321 10:28:03.409830  543705 cpu.go:275] no items to output this cycle
E0321 10:28:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:28:13.409784  543705 memory.go:191] Add success.
W0321 10:28:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:28:13.409818  543705 cpu.go:282] Add success.
W0321 10:28:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:28:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:28:13.420157  543705 net.go:648] Add success.
I0321 10:28:13.422959  543705 net.go:770] primary dev: ETH0
I0321 10:28:13.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:28:13.422988  543705 net.go:698] Add success.
I0321 10:28:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:28:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:28:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 10:28:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:28:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 10:28:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:28:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:28:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:28:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:28:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:28:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:28:23.409776  543705 memory.go:184] no items to output this cycle
I0321 10:28:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 10:28:29.065675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:28:29.068178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:28:29.068185  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bbec0 0xc0003bbf00]
E0321 10:28:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:28:33.409773  543705 memory.go:184] no items to output this cycle
I0321 10:28:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 10:28:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:28:43.409793  543705 memory.go:191] Add success.
I0321 10:28:43.409827  543705 cpu.go:282] Add success.
I0321 10:28:43.419938  543705 net.go:648] Add success.
I0321 10:28:43.422627  543705 net.go:770] primary dev: ETH0
I0321 10:28:43.422640  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:28:43.422651  543705 net.go:698] Add success.
I0321 10:28:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:28:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:28:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:28:53.410357  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:28:53.410376  543705 memory.go:184] no items to output this cycle
I0321 10:28:53.410411  543705 cpu.go:275] no items to output this cycle
E0321 10:29:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:29:03.409791  543705 memory.go:184] no items to output this cycle
I0321 10:29:03.409890  543705 cpu.go:275] no items to output this cycle
E0321 10:29:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:29:13.409817  543705 memory.go:191] Add success.
I0321 10:29:13.409826  543705 cpu.go:282] Add success.
W0321 10:29:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:29:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:29:13.420149  543705 net.go:648] Add success.
I0321 10:29:13.423048  543705 net.go:770] primary dev: ETH0
I0321 10:29:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:29:13.423074  543705 net.go:698] Add success.
I0321 10:29:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:29:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:29:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0321 10:29:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:29:14.456668  543705 disk_worker.go:494] system disk:vda1
I0321 10:29:14.456699  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:29:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:29:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:29:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:29:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:29:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:29:23.409802  543705 memory.go:184] no items to output this cycle
I0321 10:29:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 10:29:29.069677  543705 disk_info.go:125] begin check local disk info of client
I0321 10:29:29.072132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:29:29.072139  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037c000 0xc00037c040]
E0321 10:29:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:29:33.409787  543705 memory.go:184] no items to output this cycle
I0321 10:29:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 10:29:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:29:43.409773  543705 memory.go:191] Add success.
I0321 10:29:43.409824  543705 cpu.go:282] Add success.
I0321 10:29:43.419891  543705 net.go:648] Add success.
I0321 10:29:43.422882  543705 net.go:770] primary dev: ETH0
I0321 10:29:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:29:43.422911  543705 net.go:698] Add success.
I0321 10:29:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:29:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:29:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:29:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:29:53.409780  543705 memory.go:184] no items to output this cycle
I0321 10:29:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 10:30:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:30:03.409797  543705 memory.go:184] no items to output this cycle
I0321 10:30:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 10:30:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:30:13.409782  543705 memory.go:191] Add success.
W0321 10:30:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:30:13.409812  543705 cpu.go:282] Add success.
W0321 10:30:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:30:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:30:13.420054  543705 net.go:648] Add success.
I0321 10:30:13.422683  543705 net.go:770] primary dev: ETH0
I0321 10:30:13.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:30:13.422706  543705 net.go:698] Add success.
I0321 10:30:13.469209  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ca864991-6ac7-4023-ab08-5a2aebe972f9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:30:13.469246  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:30:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:30:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:30:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 10:30:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:30:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 10:30:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:30:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:30:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:30:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:30:16.472092  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:30:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:30:23.409778  543705 memory.go:184] no items to output this cycle
I0321 10:30:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 10:30:29.073677  543705 disk_info.go:125] begin check local disk info of client
I0321 10:30:29.076184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:30:29.076191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352f00 0xc000352f40]
E0321 10:30:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:30:33.409786  543705 memory.go:184] no items to output this cycle
I0321 10:30:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 10:30:38.837899  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:30:38.837906  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:30:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:30:43.410742  543705 memory.go:191] Add success.
I0321 10:30:43.409824  543705 cpu.go:282] Add success.
I0321 10:30:43.420468  543705 net.go:648] Add success.
I0321 10:30:43.423185  543705 net.go:770] primary dev: ETH0
I0321 10:30:43.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:30:43.423210  543705 net.go:698] Add success.
I0321 10:30:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:30:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:30:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:30:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:30:53.409808  543705 memory.go:184] no items to output this cycle
I0321 10:30:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 10:31:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:31:03.409779  543705 memory.go:184] no items to output this cycle
I0321 10:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 10:31:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:31:13.409775  543705 memory.go:191] Add success.
W0321 10:31:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:31:13.409808  543705 cpu.go:282] Add success.
W0321 10:31:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:31:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:31:13.420071  543705 net.go:770] primary dev: ETH0
I0321 10:31:13.420084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:31:13.420095  543705 net.go:698] Add success.
I0321 10:31:13.420325  543705 net.go:648] Add success.
I0321 10:31:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:31:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:31:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 10:31:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:31:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 10:31:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:31:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:31:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:31:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:31:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:31:23.409763  543705 memory.go:184] no items to output this cycle
I0321 10:31:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 10:31:29.077671  543705 disk_info.go:125] begin check local disk info of client
I0321 10:31:29.080135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:31:29.080142  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469ec0 0xc000469f00]
E0321 10:31:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:31:33.409795  543705 memory.go:184] no items to output this cycle
I0321 10:31:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 10:31:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:31:43.409777  543705 memory.go:191] Add success.
I0321 10:31:43.409809  543705 cpu.go:282] Add success.
I0321 10:31:43.419894  543705 net.go:648] Add success.
I0321 10:31:43.422851  543705 net.go:770] primary dev: ETH0
I0321 10:31:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:31:43.422879  543705 net.go:698] Add success.
I0321 10:31:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:31:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:31:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:31:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:31:53.409792  543705 memory.go:184] no items to output this cycle
I0321 10:31:53.409832  543705 cpu.go:275] no items to output this cycle
E0321 10:32:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:32:03.409784  543705 memory.go:184] no items to output this cycle
I0321 10:32:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 10:32:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:32:13.409790  543705 cpu.go:282] Add success.
I0321 10:32:13.409792  543705 memory.go:191] Add success.
W0321 10:32:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:32:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:32:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:32:13.420050  543705 net.go:648] Add success.
I0321 10:32:13.422687  543705 net.go:770] primary dev: ETH0
I0321 10:32:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:32:13.422712  543705 net.go:698] Add success.
W0321 10:32:14.455457  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:32:14.455472  543705 disk_worker.go:708] disk space is not compliant
W0321 10:32:14.455476  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:32:14.456509  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:32:14.456517  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:32:14.456523  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:32:14.457444  543705 disk_worker.go:494] system disk:vda1
I0321 10:32:14.457482  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:32:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:32:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:32:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:32:16.457985  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:32:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:32:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:32:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:32:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:32:23.409773  543705 memory.go:184] no items to output this cycle
I0321 10:32:23.409774  543705 cpu.go:275] no items to output this cycle
I0321 10:32:29.081673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:32:29.084115  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:32:29.084121  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa700 0xc0001aa740]
E0321 10:32:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:32:33.409773  543705 memory.go:184] no items to output this cycle
I0321 10:32:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 10:32:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:32:43.409808  543705 memory.go:191] Add success.
I0321 10:32:43.409817  543705 cpu.go:282] Add success.
I0321 10:32:43.419941  543705 net.go:648] Add success.
I0321 10:32:43.422972  543705 net.go:770] primary dev: ETH0
I0321 10:32:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:32:43.422998  543705 net.go:698] Add success.
I0321 10:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:32:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:32:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:32:53.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:32:53.410387  543705 cpu.go:275] no items to output this cycle
I0321 10:32:53.410393  543705 memory.go:184] no items to output this cycle
E0321 10:33:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:33:03.409779  543705 memory.go:184] no items to output this cycle
I0321 10:33:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 10:33:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:33:13.409816  543705 memory.go:191] Add success.
I0321 10:33:13.409821  543705 cpu.go:282] Add success.
W0321 10:33:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:33:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:33:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:33:13.420059  543705 net.go:648] Add success.
I0321 10:33:13.422880  543705 net.go:770] primary dev: ETH0
I0321 10:33:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:33:13.422908  543705 net.go:698] Add success.
I0321 10:33:13.463027  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9bec600a-fc62-48d3-9f8a-637b83083fc2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:33:13.463057  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:33:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:33:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:33:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 10:33:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:33:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 10:33:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:33:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:33:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:33:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:33:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:33:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:33:23.409769  543705 memory.go:184] no items to output this cycle
I0321 10:33:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 10:33:29.085676  543705 disk_info.go:125] begin check local disk info of client
I0321 10:33:29.088163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:33:29.088169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353480 0xc0003534c0]
E0321 10:33:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:33:33.409789  543705 memory.go:184] no items to output this cycle
I0321 10:33:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 10:33:38.839894  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:33:38.839901  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:33:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:33:43.410603  543705 memory.go:191] Add success.
I0321 10:33:43.409801  543705 cpu.go:282] Add success.
I0321 10:33:43.420401  543705 net.go:648] Add success.
I0321 10:33:43.422860  543705 net.go:770] primary dev: ETH0
I0321 10:33:43.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:33:43.422888  543705 net.go:698] Add success.
I0321 10:33:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:33:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:33:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:33:53.410283  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:33:53.410306  543705 memory.go:184] no items to output this cycle
I0321 10:33:53.410311  543705 cpu.go:275] no items to output this cycle
E0321 10:34:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:34:03.409795  543705 memory.go:184] no items to output this cycle
I0321 10:34:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 10:34:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:34:13.409790  543705 memory.go:191] Add success.
I0321 10:34:13.409793  543705 cpu.go:282] Add success.
W0321 10:34:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:34:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:34:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:34:13.420350  543705 net.go:648] Add success.
I0321 10:34:13.423237  543705 net.go:770] primary dev: ETH0
I0321 10:34:13.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:34:13.423263  543705 net.go:698] Add success.
I0321 10:34:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:34:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:34:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 10:34:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:34:14.456472  543705 disk_worker.go:494] system disk:vda1
I0321 10:34:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:34:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:34:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:34:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:34:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:34:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:34:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:34:23.409774  543705 memory.go:184] no items to output this cycle
I0321 10:34:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 10:34:29.089676  543705 disk_info.go:125] begin check local disk info of client
I0321 10:34:29.092134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:34:29.092140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4580 0xc0000c45c0]
E0321 10:34:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:34:33.409799  543705 memory.go:184] no items to output this cycle
I0321 10:34:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 10:34:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:34:43.409783  543705 memory.go:191] Add success.
I0321 10:34:43.409813  543705 cpu.go:282] Add success.
I0321 10:34:43.419885  543705 net.go:648] Add success.
I0321 10:34:43.422702  543705 net.go:770] primary dev: ETH0
I0321 10:34:43.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:34:43.422727  543705 net.go:698] Add success.
I0321 10:34:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:34:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:34:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:34:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:34:53.409772  543705 memory.go:184] no items to output this cycle
I0321 10:34:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 10:35:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:35:03.409769  543705 memory.go:184] no items to output this cycle
I0321 10:35:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 10:35:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:35:13.409793  543705 memory.go:191] Add success.
I0321 10:35:13.409793  543705 cpu.go:282] Add success.
W0321 10:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:35:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:35:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:35:13.419738  543705 net.go:648] Add success.
I0321 10:35:13.422483  543705 net.go:770] primary dev: ETH0
I0321 10:35:13.422496  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:35:13.422508  543705 net.go:698] Add success.
I0321 10:35:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:35:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:35:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 10:35:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:35:14.456504  543705 disk_worker.go:494] system disk:vda1
I0321 10:35:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:35:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:35:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:35:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:35:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:35:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:35:23.409765  543705 memory.go:184] no items to output this cycle
I0321 10:35:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 10:35:29.093673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:35:29.096135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:35:29.096141  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007acc0 0xc00007ad00]
E0321 10:35:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:35:33.409766  543705 memory.go:184] no items to output this cycle
I0321 10:35:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 10:35:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:35:43.409792  543705 memory.go:191] Add success.
I0321 10:35:43.409797  543705 cpu.go:282] Add success.
I0321 10:35:43.419869  543705 net.go:648] Add success.
I0321 10:35:43.423033  543705 net.go:770] primary dev: ETH0
I0321 10:35:43.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:35:43.423067  543705 net.go:698] Add success.
I0321 10:35:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:35:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:35:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:35:53.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:35:53.410379  543705 memory.go:184] no items to output this cycle
I0321 10:35:53.410388  543705 cpu.go:275] no items to output this cycle
E0321 10:36:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:36:03.409778  543705 memory.go:184] no items to output this cycle
I0321 10:36:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 10:36:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:36:13.409778  543705 memory.go:191] Add success.
W0321 10:36:13.409969  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:36:13.409985  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:36:13.409988  543705 cpu.go:282] Add success.
I0321 10:36:13.409988  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:36:13.419704  543705 net.go:648] Add success.
I0321 10:36:13.422401  543705 net.go:770] primary dev: ETH0
I0321 10:36:13.422414  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:36:13.422424  543705 net.go:698] Add success.
I0321 10:36:13.544295  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"64b72181-015f-4139-a3fa-d4c8f4130425","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:36:13.544339  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:36:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:36:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:36:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 10:36:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:36:14.456531  543705 disk_worker.go:494] system disk:vda1
I0321 10:36:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:36:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:36:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:36:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:36:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:36:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:36:23.409770  543705 memory.go:184] no items to output this cycle
I0321 10:36:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 10:36:29.097675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:36:29.100121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:36:29.100128  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007afc0 0xc00007b000]
E0321 10:36:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:36:33.409763  543705 memory.go:184] no items to output this cycle
I0321 10:36:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 10:36:38.841734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:36:38.841742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:36:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:36:43.410618  543705 memory.go:191] Add success.
I0321 10:36:43.409796  543705 cpu.go:282] Add success.
I0321 10:36:43.420339  543705 net.go:648] Add success.
I0321 10:36:43.422922  543705 net.go:770] primary dev: ETH0
I0321 10:36:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:36:43.422951  543705 net.go:698] Add success.
I0321 10:36:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:36:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:36:46.458111  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:36:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:36:53.409811  543705 memory.go:184] no items to output this cycle
I0321 10:36:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 10:37:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:37:03.409765  543705 memory.go:184] no items to output this cycle
I0321 10:37:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 10:37:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:37:13.409781  543705 memory.go:191] Add success.
W0321 10:37:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:37:13.409814  543705 cpu.go:282] Add success.
W0321 10:37:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:37:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:37:13.420254  543705 net.go:648] Add success.
I0321 10:37:13.422895  543705 net.go:770] primary dev: ETH0
I0321 10:37:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:37:13.422919  543705 net.go:698] Add success.
I0321 10:37:13.453472  543705 event_worker.go:152] Polling the log file for events...
W0321 10:37:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:37:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 10:37:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:37:14.456787  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:37:14.456796  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:37:14.456802  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:37:14.456846  543705 disk_worker.go:494] system disk:vda1
I0321 10:37:14.456887  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:37:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:37:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 10:37:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:37:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:37:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:37:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:37:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:37:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:37:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 10:37:23.409781  543705 memory.go:184] no items to output this cycle
I0321 10:37:29.101675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:37:29.104195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:37:29.104201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa080 0xc0001fa0c0]
E0321 10:37:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:37:33.409764  543705 memory.go:184] no items to output this cycle
I0321 10:37:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 10:37:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:37:43.409788  543705 memory.go:191] Add success.
I0321 10:37:43.409787  543705 cpu.go:282] Add success.
I0321 10:37:43.419847  543705 net.go:648] Add success.
I0321 10:37:43.422555  543705 net.go:770] primary dev: ETH0
I0321 10:37:43.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:37:43.422580  543705 net.go:698] Add success.
I0321 10:37:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:37:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:37:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:37:53.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:37:53.409902  543705 memory.go:184] no items to output this cycle
I0321 10:37:53.409959  543705 cpu.go:275] no items to output this cycle
E0321 10:38:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:38:03.409794  543705 memory.go:184] no items to output this cycle
I0321 10:38:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 10:38:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:38:13.409815  543705 memory.go:191] Add success.
I0321 10:38:13.409816  543705 cpu.go:282] Add success.
W0321 10:38:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:38:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:38:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:38:13.420197  543705 net.go:648] Add success.
I0321 10:38:13.423050  543705 net.go:770] primary dev: ETH0
I0321 10:38:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:38:13.423075  543705 net.go:698] Add success.
I0321 10:38:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:38:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:38:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 10:38:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:38:14.456516  543705 disk_worker.go:494] system disk:vda1
I0321 10:38:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:38:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:38:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:38:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:38:23.409799  543705 memory.go:184] no items to output this cycle
I0321 10:38:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 10:38:29.105672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:38:29.108183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:38:29.108189  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003af200 0xc0003af240]
E0321 10:38:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:38:33.409773  543705 memory.go:184] no items to output this cycle
I0321 10:38:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 10:38:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:38:43.409775  543705 memory.go:191] Add success.
I0321 10:38:43.409808  543705 cpu.go:282] Add success.
I0321 10:38:43.419831  543705 net.go:648] Add success.
I0321 10:38:43.422421  543705 net.go:770] primary dev: ETH0
I0321 10:38:43.422434  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:38:43.422448  543705 net.go:698] Add success.
I0321 10:38:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:38:46.458172  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:38:46.458198  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:38:53.410349  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:38:53.410371  543705 memory.go:184] no items to output this cycle
I0321 10:38:53.410380  543705 cpu.go:275] no items to output this cycle
E0321 10:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:39:03.409779  543705 memory.go:184] no items to output this cycle
I0321 10:39:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 10:39:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:39:13.409808  543705 memory.go:191] Add success.
I0321 10:39:13.409817  543705 cpu.go:282] Add success.
W0321 10:39:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:39:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:39:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:39:13.420196  543705 net.go:648] Add success.
I0321 10:39:13.423410  543705 net.go:770] primary dev: ETH0
I0321 10:39:13.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:39:13.423448  543705 net.go:698] Add success.
I0321 10:39:13.463503  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"151383fd-f27b-4783-ad32-096012ae5d87","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:39:13.463537  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:39:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:39:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:39:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 10:39:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:39:14.456509  543705 disk_worker.go:494] system disk:vda1
I0321 10:39:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:39:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:39:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:39:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:39:23.409799  543705 memory.go:184] no items to output this cycle
I0321 10:39:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 10:39:29.109682  543705 disk_info.go:125] begin check local disk info of client
I0321 10:39:29.112160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:39:29.112166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f8580 0xc0001f85c0]
E0321 10:39:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:39:33.409793  543705 memory.go:184] no items to output this cycle
I0321 10:39:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 10:39:38.841881  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:39:38.841888  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:39:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:39:43.410643  543705 memory.go:191] Add success.
I0321 10:39:43.409814  543705 cpu.go:282] Add success.
I0321 10:39:43.419741  543705 net.go:648] Add success.
I0321 10:39:43.422716  543705 net.go:770] primary dev: ETH0
I0321 10:39:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:39:43.422741  543705 net.go:698] Add success.
I0321 10:39:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:39:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:39:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:39:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:39:53.409778  543705 memory.go:184] no items to output this cycle
I0321 10:39:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 10:40:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:40:03.409792  543705 memory.go:184] no items to output this cycle
I0321 10:40:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 10:40:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:40:13.409788  543705 memory.go:191] Add success.
I0321 10:40:13.409810  543705 cpu.go:282] Add success.
W0321 10:40:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:40:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:40:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:40:13.420527  543705 net.go:648] Add success.
I0321 10:40:13.423217  543705 net.go:770] primary dev: ETH0
I0321 10:40:13.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:40:13.423242  543705 net.go:698] Add success.
I0321 10:40:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:40:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:40:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 10:40:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:40:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 10:40:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:40:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:40:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:40:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:40:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:40:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:40:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:40:23.409778  543705 memory.go:184] no items to output this cycle
I0321 10:40:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 10:40:29.113676  543705 disk_info.go:125] begin check local disk info of client
I0321 10:40:29.116156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:40:29.116162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e440 0xc00047e480]
E0321 10:40:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:40:33.409818  543705 memory.go:184] no items to output this cycle
I0321 10:40:33.409966  543705 cpu.go:275] no items to output this cycle
E0321 10:40:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:40:43.409796  543705 cpu.go:282] Add success.
I0321 10:40:43.409803  543705 memory.go:191] Add success.
I0321 10:40:43.419883  543705 net.go:648] Add success.
I0321 10:40:43.422657  543705 net.go:770] primary dev: ETH0
I0321 10:40:43.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:40:43.422685  543705 net.go:698] Add success.
I0321 10:40:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:40:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:40:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:40:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:40:53.409775  543705 memory.go:184] no items to output this cycle
I0321 10:40:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 10:41:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:41:03.409786  543705 memory.go:184] no items to output this cycle
I0321 10:41:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 10:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:41:13.409794  543705 memory.go:191] Add success.
I0321 10:41:13.409795  543705 cpu.go:282] Add success.
W0321 10:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:41:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:41:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:41:13.420088  543705 net.go:648] Add success.
I0321 10:41:13.422895  543705 net.go:770] primary dev: ETH0
I0321 10:41:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:41:13.422918  543705 net.go:698] Add success.
I0321 10:41:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:41:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:41:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 10:41:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:41:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 10:41:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:41:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:41:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:41:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:41:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:41:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:41:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:41:23.409808  543705 memory.go:184] no items to output this cycle
I0321 10:41:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 10:41:29.117688  543705 disk_info.go:125] begin check local disk info of client
I0321 10:41:29.120145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:41:29.120151  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a5c0 0xc00047a600]
E0321 10:41:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:41:33.409781  543705 memory.go:184] no items to output this cycle
I0321 10:41:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 10:41:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:41:43.409796  543705 memory.go:191] Add success.
I0321 10:41:43.409797  543705 cpu.go:282] Add success.
I0321 10:41:43.419857  543705 net.go:648] Add success.
I0321 10:41:43.422485  543705 net.go:770] primary dev: ETH0
I0321 10:41:43.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:41:43.422510  543705 net.go:698] Add success.
I0321 10:41:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:41:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:41:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:41:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:41:53.409782  543705 memory.go:184] no items to output this cycle
I0321 10:41:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 10:42:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:42:03.409812  543705 memory.go:184] no items to output this cycle
I0321 10:42:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 10:42:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:42:13.409797  543705 memory.go:191] Add success.
I0321 10:42:13.409798  543705 cpu.go:282] Add success.
W0321 10:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:42:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:42:13.420173  543705 net.go:648] Add success.
I0321 10:42:13.422750  543705 net.go:770] primary dev: ETH0
I0321 10:42:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:42:13.422781  543705 net.go:698] Add success.
I0321 10:42:13.468498  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7b50517-b0ad-4621-8906-873f841ebf5d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:42:13.468533  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 10:42:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:42:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 10:42:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:42:14.455968  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:42:14.455977  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:42:14.455982  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:42:14.456425  543705 disk_worker.go:494] system disk:vda1
I0321 10:42:14.456455  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:42:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:42:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:42:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:42:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:42:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:42:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:42:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:42:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:42:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 10:42:23.409800  543705 memory.go:184] no items to output this cycle
I0321 10:42:29.121668  543705 disk_info.go:125] begin check local disk info of client
I0321 10:42:29.124128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:42:29.124135  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521b40 0xc000521b80]
E0321 10:42:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:42:33.409905  543705 memory.go:184] no items to output this cycle
I0321 10:42:33.409933  543705 cpu.go:275] no items to output this cycle
I0321 10:42:38.843919  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:42:38.843925  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:42:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:42:43.410689  543705 memory.go:191] Add success.
I0321 10:42:43.409826  543705 cpu.go:282] Add success.
I0321 10:42:43.420383  543705 net.go:648] Add success.
I0321 10:42:43.423045  543705 net.go:770] primary dev: ETH0
I0321 10:42:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:42:43.423072  543705 net.go:698] Add success.
I0321 10:42:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:42:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:42:46.458107  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:42:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:42:53.409821  543705 memory.go:184] no items to output this cycle
I0321 10:42:53.409837  543705 cpu.go:275] no items to output this cycle
E0321 10:43:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:43:03.409781  543705 memory.go:184] no items to output this cycle
I0321 10:43:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 10:43:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:43:13.409779  543705 memory.go:191] Add success.
W0321 10:43:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:43:13.409831  543705 cpu.go:282] Add success.
I0321 10:43:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:43:13.420268  543705 net.go:648] Add success.
I0321 10:43:13.422728  543705 net.go:770] primary dev: ETH0
I0321 10:43:13.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:43:13.422754  543705 net.go:698] Add success.
I0321 10:43:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:43:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:43:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 10:43:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:43:14.456511  543705 disk_worker.go:494] system disk:vda1
I0321 10:43:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:43:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:43:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:43:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:43:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:43:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:43:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:43:23.409770  543705 memory.go:184] no items to output this cycle
I0321 10:43:23.409775  543705 cpu.go:275] no items to output this cycle
I0321 10:43:29.125673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:43:29.128134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:43:29.128140  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c140 0xc00034c180]
E0321 10:43:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:43:33.409796  543705 memory.go:184] no items to output this cycle
I0321 10:43:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 10:43:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:43:43.409789  543705 memory.go:191] Add success.
I0321 10:43:43.409821  543705 cpu.go:282] Add success.
I0321 10:43:43.419878  543705 net.go:648] Add success.
I0321 10:43:43.422471  543705 net.go:770] primary dev: ETH0
I0321 10:43:43.422487  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:43:43.422502  543705 net.go:698] Add success.
I0321 10:43:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:43:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:43:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:43:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:43:53.409790  543705 memory.go:184] no items to output this cycle
I0321 10:43:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 10:44:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:44:03.409781  543705 memory.go:184] no items to output this cycle
I0321 10:44:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 10:44:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:44:13.409808  543705 memory.go:191] Add success.
I0321 10:44:13.409813  543705 cpu.go:282] Add success.
W0321 10:44:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:44:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:44:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:44:13.420132  543705 net.go:648] Add success.
I0321 10:44:13.422794  543705 net.go:770] primary dev: ETH0
I0321 10:44:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:44:13.422822  543705 net.go:698] Add success.
I0321 10:44:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:44:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:44:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 10:44:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:44:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 10:44:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:44:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:44:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:44:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:44:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:44:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:44:23.409776  543705 memory.go:184] no items to output this cycle
I0321 10:44:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 10:44:29.129675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:44:29.132148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:44:29.132155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4680 0xc0004a46c0]
E0321 10:44:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:44:33.409793  543705 memory.go:184] no items to output this cycle
I0321 10:44:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 10:44:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:44:43.409796  543705 memory.go:191] Add success.
I0321 10:44:43.409796  543705 cpu.go:282] Add success.
I0321 10:44:43.419974  543705 net.go:648] Add success.
I0321 10:44:43.422836  543705 net.go:770] primary dev: ETH0
I0321 10:44:43.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:44:43.422898  543705 net.go:698] Add success.
I0321 10:44:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:44:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:44:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:44:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:44:53.409789  543705 memory.go:184] no items to output this cycle
I0321 10:44:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 10:45:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:45:03.409800  543705 memory.go:184] no items to output this cycle
I0321 10:45:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 10:45:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:45:13.409775  543705 memory.go:191] Add success.
I0321 10:45:13.409797  543705 cpu.go:282] Add success.
W0321 10:45:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:45:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:45:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:45:13.420120  543705 net.go:648] Add success.
I0321 10:45:13.422712  543705 net.go:770] primary dev: ETH0
I0321 10:45:13.422725  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:45:13.422737  543705 net.go:698] Add success.
I0321 10:45:13.472914  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5fbc0a47-b1a1-45e5-9877-dded829b5c62","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:45:13.472948  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:45:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:45:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:45:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 10:45:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:45:14.456521  543705 disk_worker.go:494] system disk:vda1
I0321 10:45:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:45:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:45:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:45:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:45:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:45:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:45:23.409791  543705 memory.go:184] no items to output this cycle
I0321 10:45:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 10:45:29.133674  543705 disk_info.go:125] begin check local disk info of client
I0321 10:45:29.136204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:45:29.136210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b14c0 0xc0002b1500]
E0321 10:45:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:45:33.409771  543705 memory.go:184] no items to output this cycle
I0321 10:45:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 10:45:38.845729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:45:38.845736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:45:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:45:43.410656  543705 memory.go:191] Add success.
I0321 10:45:43.409825  543705 cpu.go:282] Add success.
I0321 10:45:43.420370  543705 net.go:648] Add success.
I0321 10:45:43.423124  543705 net.go:770] primary dev: ETH0
I0321 10:45:43.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:45:43.423149  543705 net.go:698] Add success.
I0321 10:45:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:45:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:45:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:45:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:45:53.409810  543705 memory.go:184] no items to output this cycle
I0321 10:45:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 10:46:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:46:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 10:46:03.409789  543705 memory.go:184] no items to output this cycle
E0321 10:46:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:46:13.409815  543705 memory.go:191] Add success.
I0321 10:46:13.409821  543705 cpu.go:282] Add success.
W0321 10:46:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:46:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:46:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:46:13.420084  543705 net.go:648] Add success.
I0321 10:46:13.422686  543705 net.go:770] primary dev: ETH0
I0321 10:46:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:46:13.422712  543705 net.go:698] Add success.
I0321 10:46:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:46:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:46:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 10:46:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:46:14.456550  543705 disk_worker.go:494] system disk:vda1
I0321 10:46:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:46:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:46:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:46:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:46:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:46:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:46:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:46:23.409793  543705 memory.go:184] no items to output this cycle
I0321 10:46:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 10:46:29.137673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:46:29.140177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:46:29.140184  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d0c0 0xc00035d100]
E0321 10:46:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:46:33.409772  543705 memory.go:184] no items to output this cycle
I0321 10:46:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 10:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:46:43.409785  543705 memory.go:191] Add success.
I0321 10:46:43.409806  543705 cpu.go:282] Add success.
I0321 10:46:43.419944  543705 net.go:648] Add success.
I0321 10:46:43.422651  543705 net.go:770] primary dev: ETH0
I0321 10:46:43.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:46:43.422675  543705 net.go:698] Add success.
I0321 10:46:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:46:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:46:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:46:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:46:53.409787  543705 memory.go:184] no items to output this cycle
I0321 10:46:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 10:47:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:47:03.409771  543705 memory.go:184] no items to output this cycle
I0321 10:47:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 10:47:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:47:13.409785  543705 memory.go:191] Add success.
W0321 10:47:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:47:13.409810  543705 cpu.go:282] Add success.
W0321 10:47:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:47:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:47:13.420120  543705 net.go:648] Add success.
I0321 10:47:13.422840  543705 net.go:770] primary dev: ETH0
I0321 10:47:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:47:13.422864  543705 net.go:698] Add success.
I0321 10:47:13.453436  543705 event_worker.go:152] Polling the log file for events...
W0321 10:47:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:47:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 10:47:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:47:14.456790  543705 disk_worker.go:494] system disk:vda1
I0321 10:47:14.456828  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:47:14.457124  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:47:14.457131  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:47:14.457146  543705 custom_config.go:64] query custom config with name: gpu
E0321 10:47:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:47:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:47:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:47:16.457978  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:47:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:47:16.458037  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:47:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:47:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:47:23.409794  543705 memory.go:184] no items to output this cycle
I0321 10:47:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 10:47:29.141701  543705 disk_info.go:125] begin check local disk info of client
I0321 10:47:29.144224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:47:29.144231  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b300 0xc00007b340]
E0321 10:47:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:47:33.409770  543705 memory.go:184] no items to output this cycle
I0321 10:47:33.409777  543705 cpu.go:275] no items to output this cycle
E0321 10:47:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:47:43.409819  543705 memory.go:191] Add success.
I0321 10:47:43.409826  543705 cpu.go:282] Add success.
I0321 10:47:43.419864  543705 net.go:648] Add success.
I0321 10:47:43.422386  543705 net.go:770] primary dev: ETH0
I0321 10:47:43.422400  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:47:43.422415  543705 net.go:698] Add success.
I0321 10:47:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:47:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:47:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:47:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:47:53.409773  543705 memory.go:184] no items to output this cycle
I0321 10:47:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 10:48:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:48:03.409800  543705 memory.go:184] no items to output this cycle
I0321 10:48:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 10:48:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:48:13.409818  543705 memory.go:191] Add success.
I0321 10:48:13.409827  543705 cpu.go:282] Add success.
W0321 10:48:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:48:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:48:13.409967  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:48:13.419781  543705 net.go:648] Add success.
I0321 10:48:13.422743  543705 net.go:770] primary dev: ETH0
I0321 10:48:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:48:13.422771  543705 net.go:698] Add success.
I0321 10:48:13.569940  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"924c9c2e-9fae-4514-b88c-186410e4a0c2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:48:13.569972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:48:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:48:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:48:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 10:48:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:48:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 10:48:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:48:15.455612  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:48:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:48:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:48:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:48:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:48:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:48:23.409771  543705 memory.go:184] no items to output this cycle
I0321 10:48:23.409774  543705 cpu.go:275] no items to output this cycle
I0321 10:48:29.145673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:48:29.148153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:48:29.148160  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd40 0xc0001abd80]
E0321 10:48:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:48:33.409778  543705 memory.go:184] no items to output this cycle
I0321 10:48:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 10:48:38.847932  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:48:38.847939  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:48:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:48:43.410664  543705 memory.go:191] Add success.
I0321 10:48:43.409821  543705 cpu.go:282] Add success.
I0321 10:48:43.420369  543705 net.go:648] Add success.
I0321 10:48:43.423024  543705 net.go:770] primary dev: ETH0
I0321 10:48:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:48:43.423054  543705 net.go:698] Add success.
I0321 10:48:46.458014  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:48:46.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:48:46.458123  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:48:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:48:53.409777  543705 memory.go:184] no items to output this cycle
I0321 10:48:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 10:49:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:49:03.409797  543705 memory.go:184] no items to output this cycle
I0321 10:49:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 10:49:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:49:13.409793  543705 memory.go:191] Add success.
I0321 10:49:13.409793  543705 cpu.go:282] Add success.
W0321 10:49:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:49:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:49:13.419731  543705 net.go:648] Add success.
I0321 10:49:13.422317  543705 net.go:770] primary dev: ETH0
I0321 10:49:13.422331  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:49:13.422342  543705 net.go:698] Add success.
I0321 10:49:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:49:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:49:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 10:49:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:49:14.456559  543705 disk_worker.go:494] system disk:vda1
I0321 10:49:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:49:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:49:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:49:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:49:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:49:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:49:23.409776  543705 memory.go:184] no items to output this cycle
I0321 10:49:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 10:49:29.149672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:49:29.152187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:49:29.152192  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352700 0xc000352740]
E0321 10:49:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:49:33.409773  543705 memory.go:184] no items to output this cycle
I0321 10:49:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 10:49:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:49:43.409820  543705 memory.go:191] Add success.
I0321 10:49:43.409826  543705 cpu.go:282] Add success.
I0321 10:49:43.419974  543705 net.go:648] Add success.
I0321 10:49:43.422423  543705 net.go:770] primary dev: ETH0
I0321 10:49:43.422436  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:49:43.422449  543705 net.go:698] Add success.
I0321 10:49:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:49:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:49:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:49:53.410408  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:49:53.410425  543705 memory.go:184] no items to output this cycle
I0321 10:49:53.410431  543705 cpu.go:275] no items to output this cycle
E0321 10:50:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:50:03.409765  543705 memory.go:184] no items to output this cycle
I0321 10:50:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 10:50:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:50:13.409806  543705 memory.go:191] Add success.
I0321 10:50:13.409813  543705 cpu.go:282] Add success.
W0321 10:50:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:50:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:50:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:50:13.419708  543705 net.go:648] Add success.
I0321 10:50:13.422328  543705 net.go:770] primary dev: ETH0
I0321 10:50:13.422342  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:50:13.422354  543705 net.go:698] Add success.
I0321 10:50:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:50:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:50:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 10:50:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:50:14.456496  543705 disk_worker.go:494] system disk:vda1
I0321 10:50:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:50:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:50:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:50:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:50:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:50:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:50:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:50:23.409766  543705 memory.go:184] no items to output this cycle
I0321 10:50:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 10:50:29.153675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:50:29.156196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:50:29.156202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c57c0 0xc0000c5800]
E0321 10:50:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:50:33.409780  543705 memory.go:184] no items to output this cycle
I0321 10:50:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 10:50:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:50:43.409799  543705 memory.go:191] Add success.
I0321 10:50:43.409799  543705 cpu.go:282] Add success.
I0321 10:50:43.419875  543705 net.go:648] Add success.
I0321 10:50:43.422850  543705 net.go:770] primary dev: ETH0
I0321 10:50:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:50:43.422874  543705 net.go:698] Add success.
I0321 10:50:46.458033  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:50:46.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:50:46.458144  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:50:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:50:53.409788  543705 memory.go:184] no items to output this cycle
I0321 10:50:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 10:51:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:51:03.409764  543705 memory.go:184] no items to output this cycle
I0321 10:51:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 10:51:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:51:13.409816  543705 memory.go:191] Add success.
I0321 10:51:13.409824  543705 cpu.go:282] Add success.
W0321 10:51:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:51:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:51:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:51:13.420266  543705 net.go:648] Add success.
I0321 10:51:13.423462  543705 net.go:770] primary dev: ETH0
I0321 10:51:13.423475  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:51:13.423487  543705 net.go:698] Add success.
I0321 10:51:13.464539  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b903f479-225e-4474-ae4f-5721bb2991a2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:51:13.464571  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:51:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:51:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:51:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 10:51:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:51:14.456496  543705 disk_worker.go:494] system disk:vda1
I0321 10:51:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:51:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:51:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:51:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:51:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:51:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:51:23.409787  543705 memory.go:184] no items to output this cycle
I0321 10:51:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 10:51:29.157673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:51:29.160173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:51:29.160180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0321 10:51:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:51:33.409760  543705 memory.go:184] no items to output this cycle
I0321 10:51:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 10:51:38.849735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:51:38.849742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:51:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:51:43.410648  543705 memory.go:191] Add success.
I0321 10:51:43.409838  543705 cpu.go:282] Add success.
I0321 10:51:43.420350  543705 net.go:648] Add success.
I0321 10:51:43.423168  543705 net.go:770] primary dev: ETH0
I0321 10:51:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:51:43.423196  543705 net.go:698] Add success.
I0321 10:51:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:51:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:51:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:51:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:51:53.409777  543705 memory.go:184] no items to output this cycle
I0321 10:51:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 10:52:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:52:03.409812  543705 memory.go:184] no items to output this cycle
I0321 10:52:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 10:52:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:52:13.409785  543705 memory.go:191] Add success.
W0321 10:52:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 10:52:13.409822  543705 cpu.go:282] Add success.
W0321 10:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:52:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:52:13.420092  543705 net.go:648] Add success.
I0321 10:52:13.422743  543705 net.go:770] primary dev: ETH0
I0321 10:52:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:52:13.422772  543705 net.go:698] Add success.
W0321 10:52:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:52:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 10:52:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:52:14.456785  543705 disk_worker.go:494] system disk:vda1
I0321 10:52:14.456822  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:52:14.457138  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:52:14.457145  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:52:14.457150  543705 custom_config.go:64] query custom config with name: gpu
E0321 10:52:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:52:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:52:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:52:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:52:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:52:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:52:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:52:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:52:23.409775  543705 memory.go:184] no items to output this cycle
I0321 10:52:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 10:52:29.161675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:52:29.164124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:52:29.164130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af80 0xc00007afc0]
E0321 10:52:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:52:33.409790  543705 memory.go:184] no items to output this cycle
I0321 10:52:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 10:52:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:52:43.409793  543705 memory.go:191] Add success.
I0321 10:52:43.409793  543705 cpu.go:282] Add success.
I0321 10:52:43.419874  543705 net.go:648] Add success.
I0321 10:52:43.422562  543705 net.go:770] primary dev: ETH0
I0321 10:52:43.422575  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:52:43.422588  543705 net.go:698] Add success.
I0321 10:52:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:52:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:52:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:52:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:52:53.409809  543705 memory.go:184] no items to output this cycle
I0321 10:52:53.409937  543705 cpu.go:275] no items to output this cycle
E0321 10:53:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:53:03.409777  543705 memory.go:184] no items to output this cycle
I0321 10:53:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 10:53:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:53:13.409794  543705 memory.go:191] Add success.
I0321 10:53:13.409798  543705 cpu.go:282] Add success.
W0321 10:53:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:53:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:53:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:53:13.420349  543705 net.go:648] Add success.
I0321 10:53:13.422913  543705 net.go:770] primary dev: ETH0
I0321 10:53:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:53:13.422939  543705 net.go:698] Add success.
I0321 10:53:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:53:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:53:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 10:53:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:53:14.456496  543705 disk_worker.go:494] system disk:vda1
I0321 10:53:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:53:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:53:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:53:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:53:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:53:23.409795  543705 memory.go:184] no items to output this cycle
I0321 10:53:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 10:53:29.165672  543705 disk_info.go:125] begin check local disk info of client
I0321 10:53:29.168191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:53:29.168197  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029ef80 0xc00029efc0]
E0321 10:53:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:53:33.409781  543705 memory.go:184] no items to output this cycle
I0321 10:53:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 10:53:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:53:43.409804  543705 memory.go:191] Add success.
I0321 10:53:43.409808  543705 cpu.go:282] Add success.
I0321 10:53:43.419891  543705 net.go:648] Add success.
I0321 10:53:43.422944  543705 net.go:770] primary dev: ETH0
I0321 10:53:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:53:43.422973  543705 net.go:698] Add success.
I0321 10:53:46.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:53:46.458107  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:53:46.458145  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:53:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:53:53.409813  543705 memory.go:184] no items to output this cycle
I0321 10:53:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 10:54:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:54:03.409774  543705 memory.go:184] no items to output this cycle
I0321 10:54:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 10:54:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:54:13.409819  543705 memory.go:191] Add success.
I0321 10:54:13.409821  543705 cpu.go:282] Add success.
W0321 10:54:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:54:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:54:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:54:13.420241  543705 net.go:648] Add success.
I0321 10:54:13.422954  543705 net.go:770] primary dev: ETH0
I0321 10:54:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:54:13.422983  543705 net.go:698] Add success.
I0321 10:54:13.502923  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"533c11bb-302e-4611-90ae-493f272ccae4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:54:13.502957  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 10:54:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:54:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:54:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 10:54:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:54:14.456515  543705 disk_worker.go:494] system disk:vda1
I0321 10:54:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:54:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:54:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:54:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:54:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:54:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:54:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:54:23.409772  543705 memory.go:184] no items to output this cycle
I0321 10:54:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 10:54:29.169680  543705 disk_info.go:125] begin check local disk info of client
I0321 10:54:29.172134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:54:29.172140  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380b80 0xc000380bc0]
E0321 10:54:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:54:33.409776  543705 memory.go:184] no items to output this cycle
I0321 10:54:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 10:54:38.851959  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:54:38.851966  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:54:43.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:54:43.410740  543705 memory.go:191] Add success.
I0321 10:54:43.409966  543705 cpu.go:282] Add success.
I0321 10:54:43.419768  543705 net.go:648] Add success.
I0321 10:54:43.422383  543705 net.go:770] primary dev: ETH0
I0321 10:54:43.422397  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:54:43.422411  543705 net.go:698] Add success.
I0321 10:54:46.457667  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:54:46.457736  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:54:46.457760  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:54:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:54:53.409800  543705 cpu.go:275] no items to output this cycle
I0321 10:54:53.409802  543705 memory.go:184] no items to output this cycle
E0321 10:55:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:55:03.409772  543705 memory.go:184] no items to output this cycle
I0321 10:55:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 10:55:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:55:13.409823  543705 memory.go:191] Add success.
I0321 10:55:13.409837  543705 cpu.go:282] Add success.
W0321 10:55:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:55:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:55:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:55:13.420377  543705 net.go:648] Add success.
I0321 10:55:13.423100  543705 net.go:770] primary dev: ETH0
I0321 10:55:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:55:13.423126  543705 net.go:698] Add success.
I0321 10:55:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:55:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:55:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 10:55:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:55:14.456563  543705 disk_worker.go:494] system disk:vda1
I0321 10:55:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:55:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:55:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:55:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:55:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:55:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:55:23.409815  543705 memory.go:184] no items to output this cycle
I0321 10:55:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 10:55:29.173673  543705 disk_info.go:125] begin check local disk info of client
I0321 10:55:29.176150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:55:29.176156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e5c0 0xc00049e600]
E0321 10:55:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:55:33.409811  543705 memory.go:184] no items to output this cycle
I0321 10:55:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 10:55:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:55:43.409794  543705 memory.go:191] Add success.
I0321 10:55:43.409814  543705 cpu.go:282] Add success.
I0321 10:55:43.420058  543705 net.go:648] Add success.
I0321 10:55:43.423086  543705 net.go:770] primary dev: ETH0
I0321 10:55:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:55:43.423112  543705 net.go:698] Add success.
I0321 10:55:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:55:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:55:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:55:53.409825  543705 memory.go:184] no items to output this cycle
I0321 10:55:53.409834  543705 cpu.go:275] no items to output this cycle
E0321 10:56:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:56:03.409771  543705 memory.go:184] no items to output this cycle
I0321 10:56:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 10:56:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:56:13.409817  543705 memory.go:191] Add success.
I0321 10:56:13.409825  543705 cpu.go:282] Add success.
W0321 10:56:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:56:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:56:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:56:13.420144  543705 net.go:648] Add success.
I0321 10:56:13.422750  543705 net.go:770] primary dev: ETH0
I0321 10:56:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:56:13.422780  543705 net.go:698] Add success.
I0321 10:56:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:56:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:56:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 10:56:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:56:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 10:56:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:56:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:56:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:56:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:56:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:56:23.409802  543705 memory.go:184] no items to output this cycle
I0321 10:56:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 10:56:29.177675  543705 disk_info.go:125] begin check local disk info of client
I0321 10:56:29.180130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:56:29.180136  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490300 0xc000490340]
E0321 10:56:33.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:56:33.409863  543705 memory.go:184] no items to output this cycle
I0321 10:56:33.409935  543705 cpu.go:275] no items to output this cycle
E0321 10:56:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:56:43.409820  543705 memory.go:191] Add success.
I0321 10:56:43.409826  543705 cpu.go:282] Add success.
I0321 10:56:43.419975  543705 net.go:648] Add success.
I0321 10:56:43.422715  543705 net.go:770] primary dev: ETH0
I0321 10:56:43.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:56:43.422740  543705 net.go:698] Add success.
I0321 10:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:56:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:56:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:56:53.409788  543705 memory.go:184] no items to output this cycle
I0321 10:56:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 10:57:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:57:03.409776  543705 memory.go:184] no items to output this cycle
I0321 10:57:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 10:57:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:57:13.409805  543705 memory.go:191] Add success.
I0321 10:57:13.409816  543705 cpu.go:282] Add success.
W0321 10:57:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:57:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:57:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:57:13.420518  543705 net.go:648] Add success.
I0321 10:57:13.423371  543705 net.go:770] primary dev: ETH0
I0321 10:57:13.423385  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:57:13.423400  543705 net.go:698] Add success.
I0321 10:57:13.429735  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 10:57:13.452914  543705 event_worker.go:152] Polling the log file for events...
I0321 10:57:13.463912  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c48d2f92-96a3-4fcd-8915-4f0b2458fa9b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 10:57:13.463946  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 10:57:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:57:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 10:57:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0321 10:57:14.456204  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 10:57:14.456213  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 10:57:14.456219  543705 custom_config.go:64] query custom config with name: gpu
I0321 10:57:14.457152  543705 disk_worker.go:494] system disk:vda1
I0321 10:57:14.457181  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 10:57:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 10:57:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:57:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 10:57:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 10:57:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:57:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:57:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:57:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:57:23.409790  543705 memory.go:184] no items to output this cycle
I0321 10:57:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 10:57:29.181681  543705 disk_info.go:125] begin check local disk info of client
I0321 10:57:29.184119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:57:29.184125  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032af80 0xc00032afc0]
E0321 10:57:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:57:33.409783  543705 memory.go:184] no items to output this cycle
I0321 10:57:33.409788  543705 cpu.go:275] no items to output this cycle
I0321 10:57:38.853735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 10:57:38.853741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 10:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:57:43.410811  543705 memory.go:191] Add success.
I0321 10:57:43.409780  543705 cpu.go:282] Add success.
I0321 10:57:43.420545  543705 net.go:648] Add success.
I0321 10:57:43.423630  543705 net.go:770] primary dev: ETH0
I0321 10:57:43.423644  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:57:43.423656  543705 net.go:698] Add success.
I0321 10:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:57:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:57:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:57:53.409783  543705 memory.go:184] no items to output this cycle
I0321 10:57:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 10:58:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:58:03.409781  543705 memory.go:184] no items to output this cycle
I0321 10:58:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 10:58:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:58:13.409818  543705 memory.go:191] Add success.
I0321 10:58:13.409828  543705 cpu.go:282] Add success.
W0321 10:58:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:58:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:58:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:58:13.420173  543705 net.go:648] Add success.
I0321 10:58:13.422854  543705 net.go:770] primary dev: ETH0
I0321 10:58:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:58:13.422880  543705 net.go:698] Add success.
I0321 10:58:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:58:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:58:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 10:58:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:58:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 10:58:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:58:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:58:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:58:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:58:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:58:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:58:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:58:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 10:58:23.409779  543705 memory.go:184] no items to output this cycle
I0321 10:58:29.185669  543705 disk_info.go:125] begin check local disk info of client
I0321 10:58:29.188130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:58:29.188136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4b00 0xc0003e4b40]
E0321 10:58:33.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:58:33.409872  543705 memory.go:184] no items to output this cycle
I0321 10:58:33.409966  543705 cpu.go:275] no items to output this cycle
E0321 10:58:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:58:43.409787  543705 memory.go:191] Add success.
I0321 10:58:43.409801  543705 cpu.go:282] Add success.
I0321 10:58:43.419999  543705 net.go:648] Add success.
I0321 10:58:43.423124  543705 net.go:770] primary dev: ETH0
I0321 10:58:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:58:43.423152  543705 net.go:698] Add success.
I0321 10:58:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:58:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:58:53.409815  543705 memory.go:184] no items to output this cycle
I0321 10:58:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 10:59:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:59:03.409775  543705 memory.go:184] no items to output this cycle
I0321 10:59:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 10:59:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:59:13.409823  543705 memory.go:191] Add success.
I0321 10:59:13.409829  543705 cpu.go:282] Add success.
W0321 10:59:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 10:59:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 10:59:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 10:59:13.420254  543705 net.go:648] Add success.
I0321 10:59:13.422927  543705 net.go:770] primary dev: ETH0
I0321 10:59:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:59:13.422955  543705 net.go:698] Add success.
I0321 10:59:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 10:59:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 10:59:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 10:59:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 10:59:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 10:59:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 10:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 10:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:59:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:59:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 10:59:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0321 10:59:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:59:23.409763  543705 memory.go:184] no items to output this cycle
I0321 10:59:23.409890  543705 cpu.go:275] no items to output this cycle
I0321 10:59:29.189681  543705 disk_info.go:125] begin check local disk info of client
I0321 10:59:29.192255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 10:59:29.192263  543705 disk_info.go:196] parse disk info done, disk is : [0xc000216000 0xc000216040]
E0321 10:59:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:59:33.409766  543705 memory.go:184] no items to output this cycle
I0321 10:59:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 10:59:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:59:43.409790  543705 memory.go:191] Add success.
I0321 10:59:43.409794  543705 cpu.go:282] Add success.
I0321 10:59:43.419877  543705 net.go:648] Add success.
I0321 10:59:43.422584  543705 net.go:770] primary dev: ETH0
I0321 10:59:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0321 10:59:43.422609  543705 net.go:698] Add success.
I0321 10:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 10:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 10:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 10:59:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 10:59:53.409788  543705 memory.go:184] no items to output this cycle
I0321 10:59:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 11:00:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:00:03.409792  543705 memory.go:184] no items to output this cycle
I0321 11:00:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:00:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:00:13.409805  543705 memory.go:191] Add success.
I0321 11:00:13.409806  543705 cpu.go:282] Add success.
W0321 11:00:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:00:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:00:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:00:13.420065  543705 net.go:648] Add success.
I0321 11:00:13.422927  543705 net.go:770] primary dev: ETH0
I0321 11:00:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:00:13.422952  543705 net.go:698] Add success.
I0321 11:00:13.468932  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f415953-3f60-408e-bed2-e5cb723c8e75","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:00:13.468966  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:00:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:00:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:00:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0321 11:00:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:00:14.456607  543705 disk_worker.go:494] system disk:vda1
I0321 11:00:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:00:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:00:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:00:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:00:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:00:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:00:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 11:00:23.409785  543705 memory.go:184] no items to output this cycle
I0321 11:00:29.193680  543705 disk_info.go:125] begin check local disk info of client
I0321 11:00:29.196174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:00:29.196180  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bd00 0xc00007bd80]
E0321 11:00:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:00:33.409802  543705 memory.go:184] no items to output this cycle
I0321 11:00:33.409822  543705 cpu.go:275] no items to output this cycle
I0321 11:00:38.855973  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:00:38.855980  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:00:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:00:43.410595  543705 memory.go:191] Add success.
I0321 11:00:43.409796  543705 cpu.go:282] Add success.
I0321 11:00:43.420419  543705 net.go:648] Add success.
I0321 11:00:43.422960  543705 net.go:770] primary dev: ETH0
I0321 11:00:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:00:43.422986  543705 net.go:698] Add success.
I0321 11:00:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:00:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:00:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:00:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:00:53.409787  543705 memory.go:184] no items to output this cycle
I0321 11:00:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 11:01:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:01:03.409778  543705 memory.go:184] no items to output this cycle
I0321 11:01:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 11:01:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:01:13.409789  543705 memory.go:191] Add success.
I0321 11:01:13.409793  543705 cpu.go:282] Add success.
W0321 11:01:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:01:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:01:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:01:13.420121  543705 net.go:648] Add success.
I0321 11:01:13.422778  543705 net.go:770] primary dev: ETH0
I0321 11:01:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:01:13.422807  543705 net.go:698] Add success.
I0321 11:01:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:01:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:01:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 11:01:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:01:14.458946  543705 disk_worker.go:494] system disk:vda1
I0321 11:01:14.458975  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:01:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:01:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:01:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:01:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:01:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:01:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:01:23.409775  543705 memory.go:184] no items to output this cycle
I0321 11:01:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 11:01:29.197680  543705 disk_info.go:125] begin check local disk info of client
I0321 11:01:29.200158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:01:29.200166  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353f40 0xc00047e000]
E0321 11:01:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:01:33.409797  543705 memory.go:184] no items to output this cycle
I0321 11:01:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 11:01:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:01:43.409775  543705 memory.go:191] Add success.
I0321 11:01:43.409803  543705 cpu.go:282] Add success.
I0321 11:01:43.419954  543705 net.go:648] Add success.
I0321 11:01:43.423019  543705 net.go:770] primary dev: ETH0
I0321 11:01:43.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:01:43.423048  543705 net.go:698] Add success.
I0321 11:01:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:01:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:01:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:01:53.409812  543705 memory.go:184] no items to output this cycle
I0321 11:01:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 11:02:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:02:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:02:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 11:02:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:02:13.409783  543705 memory.go:191] Add success.
W0321 11:02:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 11:02:13.409809  543705 cpu.go:282] Add success.
W0321 11:02:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:02:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:02:13.420219  543705 net.go:648] Add success.
I0321 11:02:13.422989  543705 net.go:770] primary dev: ETH0
I0321 11:02:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:02:13.423016  543705 net.go:698] Add success.
W0321 11:02:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:02:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 11:02:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:02:14.456677  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:02:14.456687  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:02:14.456694  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:02:14.456951  543705 disk_worker.go:494] system disk:vda1
I0321 11:02:14.456986  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:02:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:02:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:02:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:02:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:02:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:02:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:02:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:02:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:02:23.409798  543705 memory.go:184] no items to output this cycle
I0321 11:02:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 11:02:29.201679  543705 disk_info.go:125] begin check local disk info of client
I0321 11:02:29.204173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:02:29.204179  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273880 0xc0002738c0]
E0321 11:02:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:02:33.409797  543705 memory.go:184] no items to output this cycle
I0321 11:02:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 11:02:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:02:43.409815  543705 memory.go:191] Add success.
I0321 11:02:43.409818  543705 cpu.go:282] Add success.
I0321 11:02:43.419889  543705 net.go:648] Add success.
I0321 11:02:43.422713  543705 net.go:770] primary dev: ETH0
I0321 11:02:43.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:02:43.422751  543705 net.go:698] Add success.
I0321 11:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:02:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:02:53.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:02:53.410393  543705 memory.go:184] no items to output this cycle
I0321 11:02:53.410408  543705 cpu.go:275] no items to output this cycle
E0321 11:03:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:03:03.409771  543705 memory.go:184] no items to output this cycle
I0321 11:03:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 11:03:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:03:13.409774  543705 memory.go:191] Add success.
W0321 11:03:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 11:03:13.409804  543705 cpu.go:282] Add success.
W0321 11:03:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:03:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:03:13.420103  543705 net.go:648] Add success.
I0321 11:03:13.423053  543705 net.go:770] primary dev: ETH0
I0321 11:03:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:03:13.423078  543705 net.go:698] Add success.
I0321 11:03:13.468193  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a933208-dd2c-41bf-a784-8f5d060f1020","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:03:13.468225  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:03:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:03:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:03:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 11:03:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:03:14.456488  543705 disk_worker.go:494] system disk:vda1
I0321 11:03:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:03:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:03:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:03:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:03:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:03:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:03:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:03:23.409805  543705 memory.go:184] no items to output this cycle
I0321 11:03:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 11:03:29.205674  543705 disk_info.go:125] begin check local disk info of client
I0321 11:03:29.208270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:03:29.208276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004852c0 0xc000485300]
E0321 11:03:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:03:33.409769  543705 memory.go:184] no items to output this cycle
I0321 11:03:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 11:03:38.857734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:03:38.857740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:03:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:03:43.410603  543705 memory.go:191] Add success.
I0321 11:03:43.409786  543705 cpu.go:282] Add success.
I0321 11:03:43.420326  543705 net.go:648] Add success.
I0321 11:03:43.422926  543705 net.go:770] primary dev: ETH0
I0321 11:03:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:03:43.422951  543705 net.go:698] Add success.
I0321 11:03:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:03:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:03:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:03:53.410330  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:03:53.410350  543705 memory.go:184] no items to output this cycle
I0321 11:03:53.410369  543705 cpu.go:275] no items to output this cycle
E0321 11:04:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:04:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:04:03.409780  543705 cpu.go:275] no items to output this cycle
W0321 11:04:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:04:13.409744  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:04:13.409751  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 11:04:13.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:04:13.409847  543705 cpu.go:282] Add success.
I0321 11:04:13.409864  543705 memory.go:191] Add success.
I0321 11:04:13.420174  543705 net.go:648] Add success.
I0321 11:04:13.422926  543705 net.go:770] primary dev: ETH0
I0321 11:04:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:04:13.422952  543705 net.go:698] Add success.
I0321 11:04:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:04:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:04:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 11:04:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:04:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 11:04:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:04:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:04:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:04:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:04:23.409803  543705 memory.go:184] no items to output this cycle
I0321 11:04:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 11:04:29.209671  543705 disk_info.go:125] begin check local disk info of client
I0321 11:04:29.212140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:04:29.212147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0cc0 0xc0002b0d00]
E0321 11:04:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:04:33.409797  543705 memory.go:184] no items to output this cycle
I0321 11:04:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 11:04:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:04:43.409791  543705 memory.go:191] Add success.
I0321 11:04:43.409801  543705 cpu.go:282] Add success.
I0321 11:04:43.419995  543705 net.go:648] Add success.
I0321 11:04:43.422705  543705 net.go:770] primary dev: ETH0
I0321 11:04:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:04:43.422734  543705 net.go:698] Add success.
I0321 11:04:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:04:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:04:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:04:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:04:53.409788  543705 memory.go:184] no items to output this cycle
I0321 11:04:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 11:05:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:05:03.409767  543705 memory.go:184] no items to output this cycle
I0321 11:05:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 11:05:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:05:13.409795  543705 memory.go:191] Add success.
I0321 11:05:13.409797  543705 cpu.go:282] Add success.
W0321 11:05:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:05:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:05:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:05:13.420125  543705 net.go:648] Add success.
I0321 11:05:13.422920  543705 net.go:770] primary dev: ETH0
I0321 11:05:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:05:13.422945  543705 net.go:698] Add success.
I0321 11:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:05:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:05:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 11:05:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:05:14.456486  543705 disk_worker.go:494] system disk:vda1
I0321 11:05:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:05:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:05:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:05:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:05:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:05:23.409768  543705 memory.go:184] no items to output this cycle
I0321 11:05:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 11:05:29.213673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:05:29.216244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:05:29.216250  543705 disk_info.go:196] parse disk info done, disk is : [0xc000575ac0 0xc000575b00]
E0321 11:05:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:05:33.409764  543705 memory.go:184] no items to output this cycle
I0321 11:05:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 11:05:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:05:43.409795  543705 memory.go:191] Add success.
I0321 11:05:43.409795  543705 cpu.go:282] Add success.
I0321 11:05:43.419655  543705 net.go:770] primary dev: ETH0
I0321 11:05:43.419670  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:05:43.419689  543705 net.go:698] Add success.
I0321 11:05:43.420039  543705 net.go:648] Add success.
I0321 11:05:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:05:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:05:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:05:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:05:53.409780  543705 memory.go:184] no items to output this cycle
I0321 11:05:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 11:06:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:06:03.409801  543705 memory.go:184] no items to output this cycle
I0321 11:06:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 11:06:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:06:13.409786  543705 memory.go:191] Add success.
W0321 11:06:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 11:06:13.409820  543705 cpu.go:282] Add success.
W0321 11:06:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:06:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:06:13.419987  543705 net.go:770] primary dev: ETH0
I0321 11:06:13.420001  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:06:13.420016  543705 net.go:698] Add success.
I0321 11:06:13.420372  543705 net.go:648] Add success.
I0321 11:06:13.469719  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"97ef9cf5-e110-4dad-a34b-13ec65e3ec52","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:06:13.469751  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:06:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:06:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:06:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0321 11:06:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:06:14.456814  543705 disk_worker.go:494] system disk:vda1
I0321 11:06:14.456844  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:06:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:06:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:06:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:06:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:06:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:06:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:06:23.409770  543705 memory.go:184] no items to output this cycle
I0321 11:06:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 11:06:29.217673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:06:29.220236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:06:29.220241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353480 0xc0003534c0]
E0321 11:06:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:06:33.409780  543705 memory.go:184] no items to output this cycle
I0321 11:06:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 11:06:38.857876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:06:38.857883  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:06:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:06:43.410603  543705 memory.go:191] Add success.
I0321 11:06:43.409814  543705 cpu.go:282] Add success.
I0321 11:06:43.420122  543705 net.go:770] primary dev: ETH0
I0321 11:06:43.420135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:06:43.420149  543705 net.go:698] Add success.
I0321 11:06:43.420501  543705 net.go:648] Add success.
I0321 11:06:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:06:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:06:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:06:53.409783  543705 memory.go:184] no items to output this cycle
I0321 11:06:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 11:07:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:07:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:07:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 11:07:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:07:13.409786  543705 memory.go:191] Add success.
W0321 11:07:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:07:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:07:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:07:13.409832  543705 cpu.go:282] Add success.
I0321 11:07:13.420047  543705 net.go:648] Add success.
I0321 11:07:13.422789  543705 net.go:770] primary dev: ETH0
I0321 11:07:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:07:13.422815  543705 net.go:698] Add success.
I0321 11:07:13.453372  543705 event_worker.go:152] Polling the log file for events...
W0321 11:07:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:07:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 11:07:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:07:14.455898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:07:14.455907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:07:14.455913  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:07:14.456623  543705 disk_worker.go:494] system disk:vda1
I0321 11:07:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:07:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:07:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:07:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:07:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:07:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:07:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:07:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:07:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:07:23.409788  543705 memory.go:184] no items to output this cycle
I0321 11:07:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 11:07:29.221671  543705 disk_info.go:125] begin check local disk info of client
I0321 11:07:29.224158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:07:29.224166  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e000 0xc00039e040]
E0321 11:07:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:07:33.409771  543705 memory.go:184] no items to output this cycle
I0321 11:07:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 11:07:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:07:43.409820  543705 memory.go:191] Add success.
I0321 11:07:43.409831  543705 cpu.go:282] Add success.
I0321 11:07:43.419725  543705 net.go:648] Add success.
I0321 11:07:43.422565  543705 net.go:770] primary dev: ETH0
I0321 11:07:43.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:07:43.422589  543705 net.go:698] Add success.
I0321 11:07:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:07:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:07:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:07:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:07:53.409784  543705 memory.go:184] no items to output this cycle
I0321 11:07:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 11:08:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:08:03.409807  543705 memory.go:184] no items to output this cycle
I0321 11:08:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 11:08:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:08:13.409818  543705 memory.go:191] Add success.
I0321 11:08:13.409829  543705 cpu.go:282] Add success.
W0321 11:08:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:08:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:08:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:08:13.420052  543705 net.go:648] Add success.
I0321 11:08:13.423114  543705 net.go:770] primary dev: ETH0
I0321 11:08:13.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:08:13.423139  543705 net.go:698] Add success.
I0321 11:08:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:08:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:08:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 11:08:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:08:14.456476  543705 disk_worker.go:494] system disk:vda1
I0321 11:08:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:08:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:08:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:08:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:08:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:08:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:08:23.409817  543705 memory.go:184] no items to output this cycle
I0321 11:08:23.409833  543705 cpu.go:275] no items to output this cycle
I0321 11:08:29.225674  543705 disk_info.go:125] begin check local disk info of client
I0321 11:08:29.228191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:08:29.228197  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033a580 0xc00033a5c0]
E0321 11:08:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:08:33.409785  543705 memory.go:184] no items to output this cycle
I0321 11:08:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 11:08:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:08:43.409872  543705 memory.go:191] Add success.
I0321 11:08:43.409939  543705 cpu.go:282] Add success.
I0321 11:08:43.419712  543705 net.go:648] Add success.
I0321 11:08:43.422510  543705 net.go:770] primary dev: ETH0
I0321 11:08:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:08:43.422534  543705 net.go:698] Add success.
I0321 11:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:08:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:08:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:08:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:08:53.409775  543705 memory.go:184] no items to output this cycle
I0321 11:08:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 11:09:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:09:03.409777  543705 memory.go:184] no items to output this cycle
I0321 11:09:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 11:09:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:09:13.409796  543705 memory.go:191] Add success.
I0321 11:09:13.409799  543705 cpu.go:282] Add success.
W0321 11:09:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:09:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:09:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:09:13.420054  543705 net.go:648] Add success.
I0321 11:09:13.423363  543705 net.go:770] primary dev: ETH0
I0321 11:09:13.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:09:13.423388  543705 net.go:698] Add success.
I0321 11:09:13.463467  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a1bd7043-ad92-42f8-b3c4-d4b1d9e7b760","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:09:13.463501  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:09:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:09:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:09:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 11:09:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:09:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 11:09:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:09:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:09:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:09:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:09:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:09:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:09:23.409786  543705 memory.go:184] no items to output this cycle
I0321 11:09:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 11:09:29.229682  543705 disk_info.go:125] begin check local disk info of client
I0321 11:09:29.232220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:09:29.232227  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049d180 0xc00049d1c0]
E0321 11:09:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:09:33.409796  543705 memory.go:184] no items to output this cycle
I0321 11:09:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 11:09:38.859988  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:09:38.859994  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:09:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:09:43.410657  543705 memory.go:191] Add success.
I0321 11:09:43.409817  543705 cpu.go:282] Add success.
I0321 11:09:43.420369  543705 net.go:648] Add success.
I0321 11:09:43.423633  543705 net.go:770] primary dev: ETH0
I0321 11:09:43.423647  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:09:43.423660  543705 net.go:698] Add success.
I0321 11:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:09:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:09:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:09:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:09:53.409787  543705 memory.go:184] no items to output this cycle
I0321 11:09:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 11:10:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:10:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:10:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 11:10:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:10:13.409818  543705 memory.go:191] Add success.
I0321 11:10:13.409822  543705 cpu.go:282] Add success.
W0321 11:10:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:10:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:10:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:10:13.420070  543705 net.go:648] Add success.
I0321 11:10:13.422621  543705 net.go:770] primary dev: ETH0
I0321 11:10:13.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:10:13.422651  543705 net.go:698] Add success.
I0321 11:10:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:10:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:10:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 11:10:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:10:14.456563  543705 disk_worker.go:494] system disk:vda1
I0321 11:10:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:10:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:10:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:10:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:10:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:10:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:10:23.409806  543705 memory.go:184] no items to output this cycle
I0321 11:10:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 11:10:29.233675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:10:29.236190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:10:29.236197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a06c0 0xc0004a0700]
E0321 11:10:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:10:33.409792  543705 memory.go:184] no items to output this cycle
I0321 11:10:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:10:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:10:43.409790  543705 memory.go:191] Add success.
I0321 11:10:43.409790  543705 cpu.go:282] Add success.
I0321 11:10:43.419866  543705 net.go:648] Add success.
I0321 11:10:43.422546  543705 net.go:770] primary dev: ETH0
I0321 11:10:43.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:10:43.422575  543705 net.go:698] Add success.
I0321 11:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:10:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:10:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:10:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:10:53.409771  543705 memory.go:184] no items to output this cycle
I0321 11:10:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 11:11:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:11:03.409783  543705 cpu.go:275] no items to output this cycle
I0321 11:11:03.409785  543705 memory.go:184] no items to output this cycle
E0321 11:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:11:13.409792  543705 memory.go:191] Add success.
I0321 11:11:13.409794  543705 cpu.go:282] Add success.
W0321 11:11:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:11:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:11:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:11:13.420071  543705 net.go:648] Add success.
I0321 11:11:13.422743  543705 net.go:770] primary dev: ETH0
I0321 11:11:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:11:13.422772  543705 net.go:698] Add success.
I0321 11:11:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:11:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:11:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 11:11:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:11:14.456522  543705 disk_worker.go:494] system disk:vda1
I0321 11:11:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:11:16.458013  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:11:16.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:11:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:11:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:11:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:11:23.409780  543705 memory.go:184] no items to output this cycle
I0321 11:11:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 11:11:29.237673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:11:29.240267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:11:29.240273  543705 disk_info.go:196] parse disk info done, disk is : [0xc000575b80 0xc000575bc0]
E0321 11:11:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:11:33.409774  543705 memory.go:184] no items to output this cycle
I0321 11:11:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:11:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:11:43.409793  543705 memory.go:191] Add success.
I0321 11:11:43.409797  543705 cpu.go:282] Add success.
I0321 11:11:43.419877  543705 net.go:648] Add success.
I0321 11:11:43.422704  543705 net.go:770] primary dev: ETH0
I0321 11:11:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:11:43.422734  543705 net.go:698] Add success.
I0321 11:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:11:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:11:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:11:53.409810  543705 memory.go:184] no items to output this cycle
I0321 11:11:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 11:12:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:12:03.409775  543705 memory.go:184] no items to output this cycle
I0321 11:12:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:12:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:12:13.409789  543705 memory.go:191] Add success.
I0321 11:12:13.409789  543705 cpu.go:282] Add success.
W0321 11:12:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:12:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:12:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:12:13.420144  543705 net.go:648] Add success.
I0321 11:12:13.422760  543705 net.go:770] primary dev: ETH0
I0321 11:12:13.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:12:13.422787  543705 net.go:698] Add success.
I0321 11:12:13.468798  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c05488f6-fadb-4200-b238-fafc0cf4d351","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:12:13.468829  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 11:12:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:12:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 11:12:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:12:14.456856  543705 disk_worker.go:494] system disk:vda1
E0321 11:12:14.456856  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:12:14.456865  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:12:14.456872  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:12:14.456903  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:12:15.456917  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:12:15.456930  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:12:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:12:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:12:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:12:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:12:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:12:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:12:23.409796  543705 memory.go:184] no items to output this cycle
I0321 11:12:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 11:12:29.241675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:12:29.244177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:12:29.244184  543705 disk_info.go:196] parse disk info done, disk is : [0xc000575140 0xc000575180]
E0321 11:12:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:12:33.409795  543705 memory.go:184] no items to output this cycle
I0321 11:12:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 11:12:38.861733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:12:38.861740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:12:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:12:43.410564  543705 memory.go:191] Add success.
I0321 11:12:43.409800  543705 cpu.go:282] Add success.
I0321 11:12:43.420244  543705 net.go:648] Add success.
I0321 11:12:43.422852  543705 net.go:770] primary dev: ETH0
I0321 11:12:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:12:43.422878  543705 net.go:698] Add success.
I0321 11:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:12:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:12:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:12:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:12:53.409814  543705 memory.go:184] no items to output this cycle
I0321 11:12:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 11:13:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:13:03.409778  543705 memory.go:184] no items to output this cycle
I0321 11:13:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:13:13.409803  543705 memory.go:191] Add success.
I0321 11:13:13.409809  543705 cpu.go:282] Add success.
W0321 11:13:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:13:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:13:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:13:13.420300  543705 net.go:648] Add success.
I0321 11:13:13.423267  543705 net.go:770] primary dev: ETH0
I0321 11:13:13.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:13:13.423293  543705 net.go:698] Add success.
I0321 11:13:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:13:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:13:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 11:13:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:13:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 11:13:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:13:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:13:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:13:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:13:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:13:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:13:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:13:23.409767  543705 memory.go:184] no items to output this cycle
I0321 11:13:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 11:13:29.245673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:13:29.248165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:13:29.248171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b74c0 0xc0002b7500]
E0321 11:13:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:13:33.409759  543705 memory.go:184] no items to output this cycle
I0321 11:13:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 11:13:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:13:43.409809  543705 memory.go:191] Add success.
I0321 11:13:43.409816  543705 cpu.go:282] Add success.
I0321 11:13:43.419875  543705 net.go:648] Add success.
I0321 11:13:43.422836  543705 net.go:770] primary dev: ETH0
I0321 11:13:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:13:43.422862  543705 net.go:698] Add success.
I0321 11:13:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:13:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:13:53.409805  543705 memory.go:184] no items to output this cycle
I0321 11:13:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 11:14:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:14:03.409772  543705 memory.go:184] no items to output this cycle
I0321 11:14:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 11:14:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:14:13.409797  543705 cpu.go:282] Add success.
I0321 11:14:13.409798  543705 memory.go:191] Add success.
W0321 11:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:14:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:14:13.420207  543705 net.go:648] Add success.
I0321 11:14:13.422922  543705 net.go:770] primary dev: ETH0
I0321 11:14:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:14:13.422947  543705 net.go:698] Add success.
I0321 11:14:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:14:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:14:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 11:14:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:14:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 11:14:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:14:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:14:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:14:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:14:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:14:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:14:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:14:23.409780  543705 memory.go:184] no items to output this cycle
I0321 11:14:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 11:14:29.249676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:14:29.252143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:14:29.252149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b7c0 0xc00007b800]
E0321 11:14:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:14:33.409793  543705 memory.go:184] no items to output this cycle
I0321 11:14:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 11:14:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:14:43.409815  543705 memory.go:191] Add success.
I0321 11:14:43.409818  543705 cpu.go:282] Add success.
I0321 11:14:43.420022  543705 net.go:648] Add success.
I0321 11:14:43.422920  543705 net.go:770] primary dev: ETH0
I0321 11:14:43.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:14:43.422946  543705 net.go:698] Add success.
I0321 11:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:14:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:14:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:14:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:14:53.409786  543705 cpu.go:275] no items to output this cycle
I0321 11:14:53.409790  543705 memory.go:184] no items to output this cycle
E0321 11:15:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:15:03.409804  543705 memory.go:184] no items to output this cycle
I0321 11:15:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 11:15:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:15:13.409921  543705 memory.go:191] Add success.
I0321 11:15:13.409951  543705 cpu.go:282] Add success.
W0321 11:15:13.409957  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:15:13.409970  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:15:13.409973  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:15:13.419723  543705 net.go:648] Add success.
I0321 11:15:13.422785  543705 net.go:770] primary dev: ETH0
I0321 11:15:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:15:13.422809  543705 net.go:698] Add success.
I0321 11:15:13.469154  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"221922e7-9bdc-42c6-a943-0bf49e75215b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:15:13.469185  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:15:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:15:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:15:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 11:15:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:15:14.456526  543705 disk_worker.go:494] system disk:vda1
I0321 11:15:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:15:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:15:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:15:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:15:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:15:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:15:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:15:23.409801  543705 memory.go:184] no items to output this cycle
I0321 11:15:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 11:15:29.253674  543705 disk_info.go:125] begin check local disk info of client
I0321 11:15:29.256147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:15:29.256154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab700 0xc0001ab740]
E0321 11:15:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:15:33.409782  543705 memory.go:184] no items to output this cycle
I0321 11:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 11:15:38.864004  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:15:38.864011  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:15:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:15:43.410615  543705 memory.go:191] Add success.
I0321 11:15:43.409825  543705 cpu.go:282] Add success.
I0321 11:15:43.420324  543705 net.go:648] Add success.
I0321 11:15:43.423090  543705 net.go:770] primary dev: ETH0
I0321 11:15:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:15:43.423117  543705 net.go:698] Add success.
I0321 11:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:15:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:15:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:15:53.409803  543705 memory.go:184] no items to output this cycle
I0321 11:15:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 11:16:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:16:03.409788  543705 memory.go:184] no items to output this cycle
I0321 11:16:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 11:16:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:16:13.409808  543705 memory.go:191] Add success.
I0321 11:16:13.409809  543705 cpu.go:282] Add success.
W0321 11:16:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:16:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:16:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:16:13.420222  543705 net.go:648] Add success.
I0321 11:16:13.423034  543705 net.go:770] primary dev: ETH0
I0321 11:16:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:16:13.423061  543705 net.go:698] Add success.
I0321 11:16:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:16:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:16:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 11:16:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:16:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 11:16:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:16:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:16:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:16:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:16:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:16:23.409783  543705 memory.go:184] no items to output this cycle
I0321 11:16:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 11:16:29.257674  543705 disk_info.go:125] begin check local disk info of client
I0321 11:16:29.260202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:16:29.260210  543705 disk_info.go:196] parse disk info done, disk is : [0xc000574000 0xc000574040]
E0321 11:16:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:16:33.409759  543705 memory.go:184] no items to output this cycle
I0321 11:16:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 11:16:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:16:43.409790  543705 memory.go:191] Add success.
I0321 11:16:43.409790  543705 cpu.go:282] Add success.
I0321 11:16:43.419880  543705 net.go:648] Add success.
I0321 11:16:43.422754  543705 net.go:770] primary dev: ETH0
I0321 11:16:43.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:16:43.422786  543705 net.go:698] Add success.
I0321 11:16:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:16:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:16:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:16:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:16:53.409814  543705 memory.go:184] no items to output this cycle
I0321 11:16:53.409824  543705 cpu.go:275] no items to output this cycle
E0321 11:17:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:17:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:17:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 11:17:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:17:13.409824  543705 memory.go:191] Add success.
I0321 11:17:13.409842  543705 cpu.go:282] Add success.
W0321 11:17:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:17:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:17:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:17:13.420181  543705 net.go:648] Add success.
I0321 11:17:13.422843  543705 net.go:770] primary dev: ETH0
I0321 11:17:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:17:13.422868  543705 net.go:698] Add success.
I0321 11:17:13.453397  543705 event_worker.go:152] Polling the log file for events...
W0321 11:17:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:17:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 11:17:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:17:14.455921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:17:14.455930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:17:14.455936  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:17:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 11:17:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:17:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:17:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:17:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:17:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:17:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:17:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:17:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:17:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:17:23.409792  543705 memory.go:184] no items to output this cycle
I0321 11:17:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 11:17:29.261672  543705 disk_info.go:125] begin check local disk info of client
I0321 11:17:29.264118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:17:29.264123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005744c0 0xc000574500]
E0321 11:17:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:17:33.409773  543705 memory.go:184] no items to output this cycle
I0321 11:17:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:17:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:17:43.409787  543705 memory.go:191] Add success.
I0321 11:17:43.409788  543705 cpu.go:282] Add success.
I0321 11:17:43.419887  543705 net.go:648] Add success.
I0321 11:17:43.422340  543705 net.go:770] primary dev: ETH0
I0321 11:17:43.422352  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:17:43.422364  543705 net.go:698] Add success.
I0321 11:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:17:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:17:53.409806  543705 memory.go:184] no items to output this cycle
I0321 11:17:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 11:18:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:18:03.409781  543705 memory.go:184] no items to output this cycle
I0321 11:18:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 11:18:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:18:13.409799  543705 memory.go:191] Add success.
I0321 11:18:13.409800  543705 cpu.go:282] Add success.
W0321 11:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:18:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:18:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:18:13.420232  543705 net.go:648] Add success.
I0321 11:18:13.422842  543705 net.go:770] primary dev: ETH0
I0321 11:18:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:18:13.422866  543705 net.go:698] Add success.
I0321 11:18:13.534466  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"82ce742e-2c9d-4a0f-a1c6-19fcebf9900b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:18:13.534498  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:18:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:18:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 11:18:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:18:14.456658  543705 disk_worker.go:494] system disk:vda1
I0321 11:18:14.456687  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:18:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:18:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:18:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:18:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:18:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:18:23.409773  543705 memory.go:184] no items to output this cycle
I0321 11:18:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 11:18:29.265676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:18:29.268156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:18:29.268162  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353940 0xc000353980]
E0321 11:18:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:18:33.409794  543705 memory.go:184] no items to output this cycle
I0321 11:18:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 11:18:38.865732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:18:38.865738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:18:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:18:43.410603  543705 memory.go:191] Add success.
I0321 11:18:43.409794  543705 cpu.go:282] Add success.
I0321 11:18:43.420284  543705 net.go:648] Add success.
I0321 11:18:43.422881  543705 net.go:770] primary dev: ETH0
I0321 11:18:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:18:43.422905  543705 net.go:698] Add success.
I0321 11:18:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:18:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:18:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:18:53.409777  543705 memory.go:184] no items to output this cycle
I0321 11:18:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:19:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:19:03.409778  543705 memory.go:184] no items to output this cycle
I0321 11:19:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 11:19:13.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:19:13.409923  543705 memory.go:191] Add success.
W0321 11:19:13.410031  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 11:19:13.410035  543705 cpu.go:282] Add success.
W0321 11:19:13.410044  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:19:13.410047  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:19:13.419735  543705 net.go:648] Add success.
I0321 11:19:13.422844  543705 net.go:770] primary dev: ETH0
I0321 11:19:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:19:13.422869  543705 net.go:698] Add success.
I0321 11:19:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:19:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:19:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 11:19:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:19:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 11:19:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:19:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:19:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:19:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:19:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:19:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:19:23.409779  543705 memory.go:184] no items to output this cycle
I0321 11:19:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 11:19:29.269676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:19:29.272156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:19:29.272162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b0240 0xc0004b0280]
E0321 11:19:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:19:33.409766  543705 memory.go:184] no items to output this cycle
I0321 11:19:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 11:19:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:19:43.409784  543705 memory.go:191] Add success.
I0321 11:19:43.409805  543705 cpu.go:282] Add success.
I0321 11:19:43.419823  543705 net.go:648] Add success.
I0321 11:19:43.422596  543705 net.go:770] primary dev: ETH0
I0321 11:19:43.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:19:43.422622  543705 net.go:698] Add success.
I0321 11:19:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:19:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:19:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:19:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:19:53.409774  543705 memory.go:184] no items to output this cycle
I0321 11:19:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 11:20:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:20:03.409798  543705 memory.go:184] no items to output this cycle
I0321 11:20:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 11:20:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:20:13.409796  543705 memory.go:191] Add success.
I0321 11:20:13.409797  543705 cpu.go:282] Add success.
W0321 11:20:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:20:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:20:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:20:13.419742  543705 net.go:648] Add success.
I0321 11:20:13.422876  543705 net.go:770] primary dev: ETH0
I0321 11:20:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:20:13.422904  543705 net.go:698] Add success.
I0321 11:20:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:20:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:20:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 11:20:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:20:14.456608  543705 disk_worker.go:494] system disk:vda1
I0321 11:20:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:20:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:20:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:20:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:20:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:20:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:20:23.409762  543705 memory.go:184] no items to output this cycle
I0321 11:20:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 11:20:29.273675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:20:29.276180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:20:29.276186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000575000 0xc000575040]
E0321 11:20:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:20:33.409790  543705 memory.go:184] no items to output this cycle
I0321 11:20:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 11:20:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:20:43.409819  543705 memory.go:191] Add success.
I0321 11:20:43.409838  543705 cpu.go:282] Add success.
I0321 11:20:43.419862  543705 net.go:648] Add success.
I0321 11:20:43.422859  543705 net.go:770] primary dev: ETH0
I0321 11:20:43.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:20:43.422884  543705 net.go:698] Add success.
I0321 11:20:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:20:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:20:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:20:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:20:53.409808  543705 memory.go:184] no items to output this cycle
I0321 11:20:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 11:21:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:21:03.409798  543705 memory.go:184] no items to output this cycle
I0321 11:21:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 11:21:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:21:13.409820  543705 memory.go:191] Add success.
I0321 11:21:13.409835  543705 cpu.go:282] Add success.
W0321 11:21:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:21:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:21:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:21:13.420456  543705 net.go:648] Add success.
I0321 11:21:13.423907  543705 net.go:770] primary dev: ETH0
I0321 11:21:13.423920  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:21:13.423932  543705 net.go:698] Add success.
I0321 11:21:13.506447  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bcc96bfd-21ca-44e7-9b68-c3e88123d24b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:21:13.506479  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:21:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:21:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:21:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 11:21:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:21:14.456726  543705 disk_worker.go:494] system disk:vda1
I0321 11:21:14.456756  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:21:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:21:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:21:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:21:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:21:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:21:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:21:23.409795  543705 memory.go:184] no items to output this cycle
I0321 11:21:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 11:21:29.277670  543705 disk_info.go:125] begin check local disk info of client
I0321 11:21:29.280128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:21:29.280134  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032bac0 0xc00032bb00]
E0321 11:21:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:21:33.409764  543705 memory.go:184] no items to output this cycle
I0321 11:21:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 11:21:38.868038  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:21:38.868045  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:21:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:21:43.410529  543705 memory.go:191] Add success.
I0321 11:21:43.409789  543705 cpu.go:282] Add success.
I0321 11:21:43.420228  543705 net.go:648] Add success.
I0321 11:21:43.422813  543705 net.go:770] primary dev: ETH0
I0321 11:21:43.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:21:43.422838  543705 net.go:698] Add success.
I0321 11:21:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:21:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:21:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:21:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:21:53.409807  543705 memory.go:184] no items to output this cycle
I0321 11:21:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 11:22:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:22:03.409766  543705 memory.go:184] no items to output this cycle
I0321 11:22:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 11:22:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:22:13.409800  543705 memory.go:191] Add success.
I0321 11:22:13.409806  543705 cpu.go:282] Add success.
W0321 11:22:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:22:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:22:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:22:13.420245  543705 net.go:648] Add success.
I0321 11:22:13.423591  543705 net.go:770] primary dev: ETH0
I0321 11:22:13.423606  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:22:13.423618  543705 net.go:698] Add success.
W0321 11:22:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:22:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 11:22:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:22:14.455894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:22:14.455903  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:22:14.455909  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:22:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 11:22:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:22:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:22:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:22:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:22:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:22:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:22:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:22:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:22:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:22:23.409777  543705 memory.go:184] no items to output this cycle
I0321 11:22:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 11:22:29.281675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:22:29.284175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:22:29.284181  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471d00 0xc000471d40]
E0321 11:22:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:22:33.409774  543705 memory.go:184] no items to output this cycle
I0321 11:22:33.409777  543705 cpu.go:275] no items to output this cycle
E0321 11:22:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:22:43.409819  543705 memory.go:191] Add success.
I0321 11:22:43.409827  543705 cpu.go:282] Add success.
I0321 11:22:43.419862  543705 net.go:648] Add success.
I0321 11:22:43.422372  543705 net.go:770] primary dev: ETH0
I0321 11:22:43.422387  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:22:43.422401  543705 net.go:698] Add success.
I0321 11:22:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:22:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:22:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:22:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:22:53.409813  543705 memory.go:184] no items to output this cycle
I0321 11:22:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 11:23:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:23:03.409765  543705 memory.go:184] no items to output this cycle
I0321 11:23:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 11:23:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:23:13.409796  543705 memory.go:191] Add success.
I0321 11:23:13.409800  543705 cpu.go:282] Add success.
W0321 11:23:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:23:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:23:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:23:13.420079  543705 net.go:648] Add success.
I0321 11:23:13.423429  543705 net.go:770] primary dev: ETH0
I0321 11:23:13.423442  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:23:13.423458  543705 net.go:698] Add success.
I0321 11:23:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:23:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:23:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 11:23:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:23:14.456514  543705 disk_worker.go:494] system disk:vda1
I0321 11:23:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:23:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:23:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:23:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:23:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:23:23.410286  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:23:23.410300  543705 memory.go:184] no items to output this cycle
I0321 11:23:23.410302  543705 cpu.go:275] no items to output this cycle
I0321 11:23:29.285675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:23:29.288120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:23:29.288126  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abd00 0xc0002abd40]
E0321 11:23:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:23:33.409763  543705 memory.go:184] no items to output this cycle
I0321 11:23:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 11:23:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:23:43.409817  543705 memory.go:191] Add success.
I0321 11:23:43.409826  543705 cpu.go:282] Add success.
I0321 11:23:43.419853  543705 net.go:648] Add success.
I0321 11:23:43.422974  543705 net.go:770] primary dev: ETH0
I0321 11:23:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:23:43.423000  543705 net.go:698] Add success.
I0321 11:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:23:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:23:53.410371  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:23:53.410392  543705 memory.go:184] no items to output this cycle
I0321 11:23:53.410401  543705 cpu.go:275] no items to output this cycle
E0321 11:24:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:24:03.409767  543705 memory.go:184] no items to output this cycle
I0321 11:24:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:24:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:24:13.409795  543705 memory.go:191] Add success.
I0321 11:24:13.409800  543705 cpu.go:282] Add success.
W0321 11:24:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:24:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:24:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:24:13.420132  543705 net.go:648] Add success.
I0321 11:24:13.422872  543705 net.go:770] primary dev: ETH0
I0321 11:24:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:24:13.422902  543705 net.go:698] Add success.
I0321 11:24:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:24:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:24:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 11:24:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:24:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 11:24:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:24:14.486008  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"55986900-e0b7-4735-9f71-3743eb350347","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:24:14.486158  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:24:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:24:16.458318  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:24:16.458379  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:24:16.458403  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:24:16.472784  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:24:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:24:23.409779  543705 memory.go:184] no items to output this cycle
I0321 11:24:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 11:24:29.289678  543705 disk_info.go:125] begin check local disk info of client
I0321 11:24:29.292142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:24:29.292148  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032fac0 0xc00032fb00]
E0321 11:24:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:24:33.409790  543705 memory.go:184] no items to output this cycle
I0321 11:24:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 11:24:38.869748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:24:38.869755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:24:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:24:43.410538  543705 memory.go:191] Add success.
I0321 11:24:43.409806  543705 cpu.go:282] Add success.
I0321 11:24:43.420226  543705 net.go:648] Add success.
I0321 11:24:43.423091  543705 net.go:770] primary dev: ETH0
I0321 11:24:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:24:43.423121  543705 net.go:698] Add success.
I0321 11:24:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:24:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:24:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:24:53.410342  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:24:53.410359  543705 memory.go:184] no items to output this cycle
I0321 11:24:53.410358  543705 cpu.go:275] no items to output this cycle
E0321 11:25:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:25:03.409777  543705 memory.go:184] no items to output this cycle
I0321 11:25:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 11:25:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:25:13.409799  543705 memory.go:191] Add success.
I0321 11:25:13.409805  543705 cpu.go:282] Add success.
W0321 11:25:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:25:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:25:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:25:13.420110  543705 net.go:648] Add success.
I0321 11:25:13.422794  543705 net.go:770] primary dev: ETH0
I0321 11:25:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:25:13.422820  543705 net.go:698] Add success.
I0321 11:25:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:25:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:25:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 11:25:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:25:14.456490  543705 disk_worker.go:494] system disk:vda1
I0321 11:25:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:25:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:25:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:25:16.458149  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:25:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:25:23.409776  543705 memory.go:184] no items to output this cycle
I0321 11:25:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 11:25:29.293671  543705 disk_info.go:125] begin check local disk info of client
I0321 11:25:29.296157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:25:29.296163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278300 0xc000278340]
E0321 11:25:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:25:33.409777  543705 memory.go:184] no items to output this cycle
I0321 11:25:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 11:25:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:25:43.409788  543705 memory.go:191] Add success.
I0321 11:25:43.409819  543705 cpu.go:282] Add success.
I0321 11:25:43.419840  543705 net.go:648] Add success.
I0321 11:25:43.422742  543705 net.go:770] primary dev: ETH0
I0321 11:25:43.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:25:43.422776  543705 net.go:698] Add success.
I0321 11:25:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:25:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:25:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:25:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:25:53.409797  543705 memory.go:184] no items to output this cycle
I0321 11:25:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 11:26:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:26:03.409771  543705 memory.go:184] no items to output this cycle
I0321 11:26:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 11:26:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:26:13.409819  543705 memory.go:191] Add success.
I0321 11:26:13.409826  543705 cpu.go:282] Add success.
W0321 11:26:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:26:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:26:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:26:13.420082  543705 net.go:648] Add success.
I0321 11:26:13.423023  543705 net.go:770] primary dev: ETH0
I0321 11:26:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:26:13.423052  543705 net.go:698] Add success.
I0321 11:26:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:26:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:26:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 11:26:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:26:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 11:26:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:26:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:26:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:26:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:26:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:26:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:26:23.409778  543705 memory.go:184] no items to output this cycle
I0321 11:26:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 11:26:29.297675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:26:29.300155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:26:29.300162  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390000 0xc000390040]
E0321 11:26:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:26:33.409795  543705 memory.go:184] no items to output this cycle
I0321 11:26:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 11:26:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:26:43.409778  543705 memory.go:191] Add success.
I0321 11:26:43.409795  543705 cpu.go:282] Add success.
I0321 11:26:43.420064  543705 net.go:648] Add success.
I0321 11:26:43.423149  543705 net.go:770] primary dev: ETH0
I0321 11:26:43.423168  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:26:43.423183  543705 net.go:698] Add success.
I0321 11:26:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:26:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:26:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:26:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:26:53.409808  543705 memory.go:184] no items to output this cycle
I0321 11:26:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 11:27:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:27:03.409781  543705 memory.go:184] no items to output this cycle
I0321 11:27:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 11:27:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:27:13.409779  543705 memory.go:191] Add success.
I0321 11:27:13.409796  543705 cpu.go:282] Add success.
W0321 11:27:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:27:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:27:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:27:13.420132  543705 net.go:648] Add success.
I0321 11:27:13.422655  543705 net.go:770] primary dev: ETH0
I0321 11:27:13.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:27:13.422681  543705 net.go:698] Add success.
I0321 11:27:13.429307  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 11:27:13.453543  543705 event_worker.go:152] Polling the log file for events...
I0321 11:27:13.469392  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e32c47e3-2dc7-4924-afdc-0c00d3b239b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:27:13.469427  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 11:27:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:27:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 11:27:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:27:14.455912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:27:14.455921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:27:14.455926  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:27:14.456543  543705 disk_worker.go:494] system disk:vda1
I0321 11:27:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:27:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:27:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:27:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:27:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:27:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:27:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:27:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:27:23.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:27:23.409877  543705 memory.go:184] no items to output this cycle
I0321 11:27:23.410000  543705 cpu.go:275] no items to output this cycle
I0321 11:27:29.301687  543705 disk_info.go:125] begin check local disk info of client
I0321 11:27:29.304182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:27:29.304190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376d80 0xc000376dc0]
E0321 11:27:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:27:33.409792  543705 memory.go:184] no items to output this cycle
I0321 11:27:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 11:27:38.872065  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:27:38.872072  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:27:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:27:43.410679  543705 memory.go:191] Add success.
I0321 11:27:43.409805  543705 cpu.go:282] Add success.
I0321 11:27:43.420371  543705 net.go:648] Add success.
I0321 11:27:43.423066  543705 net.go:770] primary dev: ETH0
I0321 11:27:43.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:27:43.423092  543705 net.go:698] Add success.
I0321 11:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:27:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:27:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:27:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 11:27:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:27:53.409796  543705 memory.go:184] no items to output this cycle
E0321 11:28:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:28:03.409792  543705 cpu.go:275] no items to output this cycle
I0321 11:28:03.409797  543705 memory.go:184] no items to output this cycle
E0321 11:28:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:28:13.409814  543705 memory.go:191] Add success.
I0321 11:28:13.409821  543705 cpu.go:282] Add success.
W0321 11:28:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:28:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:28:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:28:13.420244  543705 net.go:648] Add success.
I0321 11:28:13.422843  543705 net.go:770] primary dev: ETH0
I0321 11:28:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:28:13.422870  543705 net.go:698] Add success.
I0321 11:28:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:28:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:28:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 11:28:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:28:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 11:28:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:28:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:28:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:28:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:28:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:28:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:28:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:28:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 11:28:23.409782  543705 memory.go:184] no items to output this cycle
I0321 11:28:29.305690  543705 disk_info.go:125] begin check local disk info of client
I0321 11:28:29.308185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:28:29.308191  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ea80 0xc00047eac0]
E0321 11:28:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:28:33.409794  543705 memory.go:184] no items to output this cycle
I0321 11:28:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 11:28:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:28:43.409779  543705 memory.go:191] Add success.
I0321 11:28:43.409799  543705 cpu.go:282] Add success.
I0321 11:28:43.419888  543705 net.go:648] Add success.
I0321 11:28:43.422866  543705 net.go:770] primary dev: ETH0
I0321 11:28:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:28:43.422894  543705 net.go:698] Add success.
I0321 11:28:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:28:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:28:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:28:53.409787  543705 cpu.go:275] no items to output this cycle
I0321 11:28:53.409795  543705 memory.go:184] no items to output this cycle
E0321 11:29:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:29:03.409802  543705 memory.go:184] no items to output this cycle
I0321 11:29:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 11:29:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:29:13.409790  543705 memory.go:191] Add success.
I0321 11:29:13.409807  543705 cpu.go:282] Add success.
W0321 11:29:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:29:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:29:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:29:13.420128  543705 net.go:648] Add success.
I0321 11:29:13.422938  543705 net.go:770] primary dev: ETH0
I0321 11:29:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:29:13.422967  543705 net.go:698] Add success.
I0321 11:29:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:29:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:29:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 11:29:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:29:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 11:29:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:29:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:29:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:29:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:29:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:29:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:29:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:29:23.409791  543705 memory.go:184] no items to output this cycle
I0321 11:29:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 11:29:29.309675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:29:29.312193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:29:29.312200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fc300 0xc0004fc340]
E0321 11:29:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:29:33.409767  543705 memory.go:184] no items to output this cycle
I0321 11:29:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 11:29:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:29:43.409775  543705 memory.go:191] Add success.
I0321 11:29:43.409798  543705 cpu.go:282] Add success.
I0321 11:29:43.419860  543705 net.go:648] Add success.
I0321 11:29:43.422717  543705 net.go:770] primary dev: ETH0
I0321 11:29:43.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:29:43.422746  543705 net.go:698] Add success.
I0321 11:29:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:29:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:29:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:29:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:29:53.409774  543705 memory.go:184] no items to output this cycle
I0321 11:29:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 11:30:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:30:03.409771  543705 memory.go:184] no items to output this cycle
I0321 11:30:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 11:30:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:30:13.409811  543705 memory.go:191] Add success.
I0321 11:30:13.409816  543705 cpu.go:282] Add success.
W0321 11:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:30:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:30:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:30:13.420186  543705 net.go:648] Add success.
I0321 11:30:13.422895  543705 net.go:770] primary dev: ETH0
I0321 11:30:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:30:13.422920  543705 net.go:698] Add success.
I0321 11:30:13.469187  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d67a93d3-4f9c-4f4d-be85-e34993a2ab20","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:30:13.469222  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:30:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:30:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:30:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0321 11:30:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:30:14.456515  543705 disk_worker.go:494] system disk:vda1
I0321 11:30:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:30:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:30:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:30:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:30:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:30:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:30:23.409761  543705 memory.go:184] no items to output this cycle
I0321 11:30:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 11:30:29.313676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:30:29.316154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:30:29.316160  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2000 0xc0002b2040]
E0321 11:30:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:30:33.409793  543705 memory.go:184] no items to output this cycle
I0321 11:30:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 11:30:38.873734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:30:38.873740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:30:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:30:43.409809  543705 cpu.go:282] Add success.
I0321 11:30:43.410634  543705 memory.go:191] Add success.
I0321 11:30:43.420571  543705 net.go:648] Add success.
I0321 11:30:43.423193  543705 net.go:770] primary dev: ETH0
I0321 11:30:43.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:30:43.423217  543705 net.go:698] Add success.
I0321 11:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:30:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:30:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:30:53.409793  543705 cpu.go:275] no items to output this cycle
I0321 11:30:53.409807  543705 memory.go:184] no items to output this cycle
E0321 11:31:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:31:03.409804  543705 memory.go:184] no items to output this cycle
I0321 11:31:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 11:31:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:31:13.409778  543705 memory.go:191] Add success.
W0321 11:31:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 11:31:13.409807  543705 cpu.go:282] Add success.
W0321 11:31:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:31:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:31:13.420058  543705 net.go:648] Add success.
I0321 11:31:13.422891  543705 net.go:770] primary dev: ETH0
I0321 11:31:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:31:13.422920  543705 net.go:698] Add success.
I0321 11:31:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:31:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:31:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 11:31:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:31:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 11:31:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:31:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:31:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:31:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:31:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:31:23.409771  543705 memory.go:184] no items to output this cycle
I0321 11:31:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 11:31:29.317667  543705 disk_info.go:125] begin check local disk info of client
I0321 11:31:29.320181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:31:29.320187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3040 0xc0003d3080]
E0321 11:31:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:31:33.409883  543705 cpu.go:275] no items to output this cycle
I0321 11:31:33.409896  543705 memory.go:184] no items to output this cycle
E0321 11:31:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:31:43.409785  543705 memory.go:191] Add success.
I0321 11:31:43.409787  543705 cpu.go:282] Add success.
I0321 11:31:43.419874  543705 net.go:648] Add success.
I0321 11:31:43.422848  543705 net.go:770] primary dev: ETH0
I0321 11:31:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:31:43.422890  543705 net.go:698] Add success.
I0321 11:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:31:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:31:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:31:53.409808  543705 memory.go:184] no items to output this cycle
I0321 11:31:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 11:32:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:32:03.409780  543705 memory.go:184] no items to output this cycle
I0321 11:32:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 11:32:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:32:13.409813  543705 memory.go:191] Add success.
I0321 11:32:13.409815  543705 cpu.go:282] Add success.
W0321 11:32:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:32:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:32:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:32:13.420135  543705 net.go:648] Add success.
I0321 11:32:13.423212  543705 net.go:770] primary dev: ETH0
I0321 11:32:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:32:13.423240  543705 net.go:698] Add success.
W0321 11:32:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:32:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0321 11:32:14.455159  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:32:14.456876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:32:14.456884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:32:14.456891  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:32:14.456964  543705 disk_worker.go:494] system disk:vda1
I0321 11:32:14.457008  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:32:15.456491  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:32:15.456500  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:32:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:32:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:32:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:32:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:32:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:32:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:32:23.409801  543705 memory.go:184] no items to output this cycle
I0321 11:32:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 11:32:29.321687  543705 disk_info.go:125] begin check local disk info of client
I0321 11:32:29.324150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:32:29.324156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049b4c0 0xc00049b500]
E0321 11:32:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:32:33.409787  543705 memory.go:184] no items to output this cycle
I0321 11:32:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 11:32:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:32:43.409822  543705 memory.go:191] Add success.
I0321 11:32:43.409832  543705 cpu.go:282] Add success.
I0321 11:32:43.420481  543705 net.go:648] Add success.
I0321 11:32:43.423043  543705 net.go:770] primary dev: ETH0
I0321 11:32:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:32:43.423069  543705 net.go:698] Add success.
I0321 11:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:32:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:32:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:32:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:32:53.409822  543705 memory.go:184] no items to output this cycle
I0321 11:32:53.409830  543705 cpu.go:275] no items to output this cycle
E0321 11:33:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:33:03.409807  543705 memory.go:184] no items to output this cycle
I0321 11:33:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 11:33:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:33:13.409787  543705 memory.go:191] Add success.
I0321 11:33:13.409812  543705 cpu.go:282] Add success.
W0321 11:33:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:33:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:33:13.420361  543705 net.go:648] Add success.
I0321 11:33:13.423047  543705 net.go:770] primary dev: ETH0
I0321 11:33:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:33:13.423079  543705 net.go:698] Add success.
I0321 11:33:13.463620  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75d3d24d-b8b6-45f4-a374-35a2d8ba8260","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:33:13.463654  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:33:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:33:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:33:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 11:33:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:33:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 11:33:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:33:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:33:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:33:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:33:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:33:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:33:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:33:23.409775  543705 memory.go:184] no items to output this cycle
I0321 11:33:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 11:33:29.325676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:33:29.328138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:33:29.328144  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359bc0 0xc000359c00]
E0321 11:33:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:33:33.409766  543705 memory.go:184] no items to output this cycle
I0321 11:33:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 11:33:38.876066  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:33:38.876073  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:33:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:33:43.410639  543705 memory.go:191] Add success.
I0321 11:33:43.409828  543705 cpu.go:282] Add success.
I0321 11:33:43.420362  543705 net.go:648] Add success.
I0321 11:33:43.423014  543705 net.go:770] primary dev: ETH0
I0321 11:33:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:33:43.423043  543705 net.go:698] Add success.
I0321 11:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:33:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:33:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:33:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:33:53.409812  543705 memory.go:184] no items to output this cycle
I0321 11:33:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 11:34:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:34:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:34:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 11:34:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:34:13.409822  543705 memory.go:191] Add success.
I0321 11:34:13.409842  543705 cpu.go:282] Add success.
W0321 11:34:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:34:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:34:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:34:13.420066  543705 net.go:648] Add success.
I0321 11:34:13.422742  543705 net.go:770] primary dev: ETH0
I0321 11:34:13.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:34:13.422768  543705 net.go:698] Add success.
I0321 11:34:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:34:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:34:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 11:34:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:34:14.456522  543705 disk_worker.go:494] system disk:vda1
I0321 11:34:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:34:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:34:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:34:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:34:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:34:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:34:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:34:23.409795  543705 memory.go:184] no items to output this cycle
I0321 11:34:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 11:34:29.329678  543705 disk_info.go:125] begin check local disk info of client
I0321 11:34:29.332128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:34:29.332134  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051edc0 0xc00051ee00]
E0321 11:34:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:34:33.409789  543705 memory.go:184] no items to output this cycle
I0321 11:34:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 11:34:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:34:43.409820  543705 memory.go:191] Add success.
I0321 11:34:43.409828  543705 cpu.go:282] Add success.
I0321 11:34:43.420418  543705 net.go:648] Add success.
I0321 11:34:43.422971  543705 net.go:770] primary dev: ETH0
I0321 11:34:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:34:43.423000  543705 net.go:698] Add success.
I0321 11:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:34:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:34:53.409767  543705 memory.go:184] no items to output this cycle
I0321 11:34:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 11:35:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:35:03.409778  543705 memory.go:184] no items to output this cycle
I0321 11:35:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:35:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:35:13.409810  543705 memory.go:191] Add success.
I0321 11:35:13.409816  543705 cpu.go:282] Add success.
W0321 11:35:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:35:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:35:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:35:13.420149  543705 net.go:648] Add success.
I0321 11:35:13.422899  543705 net.go:770] primary dev: ETH0
I0321 11:35:13.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:35:13.422936  543705 net.go:698] Add success.
I0321 11:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:35:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:35:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 11:35:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:35:14.456563  543705 disk_worker.go:494] system disk:vda1
I0321 11:35:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:35:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:35:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:35:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:35:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:35:23.409795  543705 memory.go:184] no items to output this cycle
I0321 11:35:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 11:35:29.333672  543705 disk_info.go:125] begin check local disk info of client
I0321 11:35:29.336206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:35:29.336211  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a240 0xc00027a280]
E0321 11:35:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:35:33.409772  543705 memory.go:184] no items to output this cycle
I0321 11:35:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 11:35:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:35:43.409779  543705 memory.go:191] Add success.
I0321 11:35:43.409813  543705 cpu.go:282] Add success.
I0321 11:35:43.419873  543705 net.go:648] Add success.
I0321 11:35:43.422643  543705 net.go:770] primary dev: ETH0
I0321 11:35:43.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:35:43.422666  543705 net.go:698] Add success.
I0321 11:35:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:35:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:35:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:35:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:35:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 11:35:53.409791  543705 memory.go:184] no items to output this cycle
E0321 11:36:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:36:03.409806  543705 memory.go:184] no items to output this cycle
I0321 11:36:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 11:36:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:36:13.409797  543705 memory.go:191] Add success.
I0321 11:36:13.409814  543705 cpu.go:282] Add success.
W0321 11:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:36:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:36:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:36:13.420271  543705 net.go:648] Add success.
I0321 11:36:13.422864  543705 net.go:770] primary dev: ETH0
I0321 11:36:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:36:13.422889  543705 net.go:698] Add success.
I0321 11:36:13.469745  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6dae770d-eeff-4a2d-b0be-6a5f9b573969","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:36:13.469779  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:36:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:36:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:36:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 11:36:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:36:14.456710  543705 disk_worker.go:494] system disk:vda1
I0321 11:36:14.456741  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:36:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:36:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:36:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:36:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:36:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:36:23.409776  543705 memory.go:184] no items to output this cycle
I0321 11:36:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 11:36:29.337675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:36:29.340240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:36:29.340247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff540 0xc0003ff580]
E0321 11:36:33.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:36:33.409865  543705 memory.go:184] no items to output this cycle
I0321 11:36:33.409935  543705 cpu.go:275] no items to output this cycle
I0321 11:36:38.877739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:36:38.877745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:36:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:36:43.410701  543705 memory.go:191] Add success.
I0321 11:36:43.409795  543705 cpu.go:282] Add success.
I0321 11:36:43.420414  543705 net.go:648] Add success.
I0321 11:36:43.423266  543705 net.go:770] primary dev: ETH0
I0321 11:36:43.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:36:43.423292  543705 net.go:698] Add success.
I0321 11:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:36:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:36:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:36:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:36:53.409776  543705 memory.go:184] no items to output this cycle
I0321 11:36:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 11:37:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:37:03.409767  543705 memory.go:184] no items to output this cycle
I0321 11:37:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 11:37:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:37:13.409817  543705 memory.go:191] Add success.
I0321 11:37:13.409830  543705 cpu.go:282] Add success.
W0321 11:37:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:37:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:37:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:37:13.420140  543705 net.go:648] Add success.
I0321 11:37:13.422819  543705 net.go:770] primary dev: ETH0
I0321 11:37:13.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:37:13.422847  543705 net.go:698] Add success.
I0321 11:37:13.453417  543705 event_worker.go:152] Polling the log file for events...
W0321 11:37:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:37:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 11:37:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:37:14.456825  543705 disk_worker.go:494] system disk:vda1
I0321 11:37:14.456864  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:37:14.457521  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:37:14.457528  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:37:14.457541  543705 custom_config.go:64] query custom config with name: gpu
E0321 11:37:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:37:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:37:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:37:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:37:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:37:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:37:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:37:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:37:23.409766  543705 memory.go:184] no items to output this cycle
I0321 11:37:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 11:37:29.341673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:37:29.344122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:37:29.344128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ff880 0xc0004ff8c0]
E0321 11:37:33.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:37:33.409909  543705 cpu.go:275] no items to output this cycle
I0321 11:37:33.409922  543705 memory.go:184] no items to output this cycle
E0321 11:37:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:37:43.409782  543705 memory.go:191] Add success.
I0321 11:37:43.409788  543705 cpu.go:282] Add success.
I0321 11:37:43.419837  543705 net.go:648] Add success.
I0321 11:37:43.422735  543705 net.go:770] primary dev: ETH0
I0321 11:37:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:37:43.422764  543705 net.go:698] Add success.
I0321 11:37:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:37:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:37:53.410208  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:37:53.410229  543705 memory.go:184] no items to output this cycle
I0321 11:37:53.410242  543705 cpu.go:275] no items to output this cycle
E0321 11:38:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:38:03.409810  543705 memory.go:184] no items to output this cycle
I0321 11:38:03.409822  543705 cpu.go:275] no items to output this cycle
E0321 11:38:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:38:13.409777  543705 memory.go:191] Add success.
I0321 11:38:13.409800  543705 cpu.go:282] Add success.
W0321 11:38:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:38:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:38:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:38:13.420058  543705 net.go:648] Add success.
I0321 11:38:13.422598  543705 net.go:770] primary dev: ETH0
I0321 11:38:13.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:38:13.422622  543705 net.go:698] Add success.
I0321 11:38:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:38:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:38:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 11:38:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:38:14.456527  543705 disk_worker.go:494] system disk:vda1
I0321 11:38:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:38:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:38:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:38:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:38:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:38:23.409773  543705 memory.go:184] no items to output this cycle
I0321 11:38:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 11:38:29.345675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:38:29.348259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:38:29.348267  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508000 0xc000508f40]
E0321 11:38:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:38:33.409797  543705 memory.go:184] no items to output this cycle
I0321 11:38:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 11:38:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:38:43.409776  543705 memory.go:191] Add success.
I0321 11:38:43.409808  543705 cpu.go:282] Add success.
I0321 11:38:43.419842  543705 net.go:648] Add success.
I0321 11:38:43.422438  543705 net.go:770] primary dev: ETH0
I0321 11:38:43.422450  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:38:43.422462  543705 net.go:698] Add success.
I0321 11:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:38:53.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:38:53.409837  543705 memory.go:184] no items to output this cycle
I0321 11:38:53.409849  543705 cpu.go:275] no items to output this cycle
E0321 11:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:39:03.409779  543705 memory.go:184] no items to output this cycle
I0321 11:39:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 11:39:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:39:13.409817  543705 memory.go:191] Add success.
I0321 11:39:13.409821  543705 cpu.go:282] Add success.
W0321 11:39:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:39:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:39:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:39:13.420178  543705 net.go:648] Add success.
I0321 11:39:13.423218  543705 net.go:770] primary dev: ETH0
I0321 11:39:13.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:39:13.423247  543705 net.go:698] Add success.
I0321 11:39:13.467962  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4d2d4dc-3492-454d-93b3-e5284b24e157","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:39:13.467997  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:39:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:39:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:39:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 11:39:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:39:14.456738  543705 disk_worker.go:494] system disk:vda1
I0321 11:39:14.456773  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:39:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:39:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:39:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:39:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:39:23.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:39:23.409870  543705 cpu.go:275] no items to output this cycle
I0321 11:39:23.409872  543705 memory.go:184] no items to output this cycle
I0321 11:39:29.349672  543705 disk_info.go:125] begin check local disk info of client
I0321 11:39:29.352167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:39:29.352173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e6280 0xc0001e62c0]
E0321 11:39:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:39:33.409791  543705 memory.go:184] no items to output this cycle
I0321 11:39:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 11:39:38.877888  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:39:38.877895  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:39:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:39:43.410679  543705 memory.go:191] Add success.
I0321 11:39:43.409796  543705 cpu.go:282] Add success.
I0321 11:39:43.420405  543705 net.go:648] Add success.
I0321 11:39:43.422927  543705 net.go:770] primary dev: ETH0
I0321 11:39:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:39:43.422956  543705 net.go:698] Add success.
I0321 11:39:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:39:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:39:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:39:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:39:53.409812  543705 memory.go:184] no items to output this cycle
I0321 11:39:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 11:40:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:40:03.409802  543705 memory.go:184] no items to output this cycle
I0321 11:40:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 11:40:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:40:13.409822  543705 memory.go:191] Add success.
I0321 11:40:13.409826  543705 cpu.go:282] Add success.
W0321 11:40:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:40:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:40:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:40:13.419919  543705 net.go:770] primary dev: ETH0
I0321 11:40:13.419932  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:40:13.419945  543705 net.go:698] Add success.
I0321 11:40:13.420181  543705 net.go:648] Add success.
I0321 11:40:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:40:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:40:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 11:40:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:40:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 11:40:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:40:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:40:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:40:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:40:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:40:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:40:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:40:23.409769  543705 memory.go:184] no items to output this cycle
I0321 11:40:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 11:40:29.353677  543705 disk_info.go:125] begin check local disk info of client
I0321 11:40:29.356188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:40:29.356195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469d00 0xc000469d40]
E0321 11:40:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:40:33.409779  543705 memory.go:184] no items to output this cycle
I0321 11:40:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:40:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:40:43.409811  543705 memory.go:191] Add success.
I0321 11:40:43.409824  543705 cpu.go:282] Add success.
I0321 11:40:43.419876  543705 net.go:648] Add success.
I0321 11:40:43.422822  543705 net.go:770] primary dev: ETH0
I0321 11:40:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:40:43.422846  543705 net.go:698] Add success.
I0321 11:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:40:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:40:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:40:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:40:53.409785  543705 memory.go:184] no items to output this cycle
I0321 11:40:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 11:41:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:41:03.409769  543705 memory.go:184] no items to output this cycle
I0321 11:41:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 11:41:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:41:13.409788  543705 memory.go:191] Add success.
I0321 11:41:13.409805  543705 cpu.go:282] Add success.
W0321 11:41:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:41:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:41:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:41:13.420063  543705 net.go:648] Add success.
I0321 11:41:13.422816  543705 net.go:770] primary dev: ETH0
I0321 11:41:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:41:13.422840  543705 net.go:698] Add success.
I0321 11:41:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:41:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:41:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 11:41:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:41:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 11:41:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:41:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:41:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:41:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:41:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:41:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:41:23.409790  543705 memory.go:184] no items to output this cycle
I0321 11:41:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 11:41:29.357692  543705 disk_info.go:125] begin check local disk info of client
I0321 11:41:29.360185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:41:29.360191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306000 0xc000306040]
E0321 11:41:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:41:33.409762  543705 memory.go:184] no items to output this cycle
I0321 11:41:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 11:41:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:41:43.409781  543705 memory.go:191] Add success.
I0321 11:41:43.409805  543705 cpu.go:282] Add success.
I0321 11:41:43.419886  543705 net.go:648] Add success.
I0321 11:41:43.423047  543705 net.go:770] primary dev: ETH0
I0321 11:41:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:41:43.423072  543705 net.go:698] Add success.
I0321 11:41:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:41:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:41:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:41:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:41:53.409785  543705 cpu.go:275] no items to output this cycle
I0321 11:41:53.409790  543705 memory.go:184] no items to output this cycle
E0321 11:42:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:42:03.409784  543705 memory.go:184] no items to output this cycle
I0321 11:42:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 11:42:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:42:13.409792  543705 memory.go:191] Add success.
I0321 11:42:13.409792  543705 cpu.go:282] Add success.
W0321 11:42:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:42:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:42:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:42:13.420151  543705 net.go:648] Add success.
I0321 11:42:13.423100  543705 net.go:770] primary dev: ETH0
I0321 11:42:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:42:13.423125  543705 net.go:698] Add success.
I0321 11:42:13.849097  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"46dcb2a0-60e4-49b0-b279-18243dae999c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:42:13.849134  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 11:42:14.454848  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:42:14.454908  543705 disk_worker.go:708] disk space is not compliant
W0321 11:42:14.454912  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:42:14.455838  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:42:14.455847  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:42:14.455861  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:42:14.456213  543705 disk_worker.go:494] system disk:vda1
I0321 11:42:14.456255  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:42:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:42:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:42:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:42:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:42:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:42:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:42:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:42:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:42:23.409792  543705 memory.go:184] no items to output this cycle
I0321 11:42:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 11:42:29.361667  543705 disk_info.go:125] begin check local disk info of client
I0321 11:42:29.364187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:42:29.364193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004adb00 0xc0004adb40]
E0321 11:42:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:42:33.409765  543705 memory.go:184] no items to output this cycle
I0321 11:42:33.409897  543705 cpu.go:275] no items to output this cycle
I0321 11:42:38.880100  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:42:38.880106  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:42:43.410604  543705 memory.go:191] Add success.
I0321 11:42:43.409791  543705 cpu.go:282] Add success.
I0321 11:42:43.420294  543705 net.go:648] Add success.
I0321 11:42:43.422776  543705 net.go:770] primary dev: ETH0
I0321 11:42:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:42:43.422801  543705 net.go:698] Add success.
I0321 11:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:42:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:42:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:42:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:42:53.409777  543705 memory.go:184] no items to output this cycle
I0321 11:42:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 11:43:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:43:03.409792  543705 memory.go:184] no items to output this cycle
I0321 11:43:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 11:43:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:43:13.409793  543705 memory.go:191] Add success.
I0321 11:43:13.409809  543705 cpu.go:282] Add success.
W0321 11:43:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:43:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:43:13.420057  543705 net.go:648] Add success.
I0321 11:43:13.422657  543705 net.go:770] primary dev: ETH0
I0321 11:43:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:43:13.422682  543705 net.go:698] Add success.
I0321 11:43:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:43:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:43:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 11:43:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:43:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 11:43:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:43:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:43:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:43:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:43:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:43:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:43:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:43:23.409777  543705 memory.go:184] no items to output this cycle
I0321 11:43:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 11:43:29.365674  543705 disk_info.go:125] begin check local disk info of client
I0321 11:43:29.368190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:43:29.368196  543705 disk_info.go:196] parse disk info done, disk is : [0xc000516540 0xc000516580]
E0321 11:43:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:43:33.409801  543705 memory.go:184] no items to output this cycle
I0321 11:43:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:43:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:43:43.409800  543705 memory.go:191] Add success.
I0321 11:43:43.409801  543705 cpu.go:282] Add success.
I0321 11:43:43.419875  543705 net.go:648] Add success.
I0321 11:43:43.422766  543705 net.go:770] primary dev: ETH0
I0321 11:43:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:43:43.422791  543705 net.go:698] Add success.
I0321 11:43:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:43:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:43:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:43:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:43:53.409775  543705 memory.go:184] no items to output this cycle
I0321 11:43:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 11:44:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:44:03.409800  543705 memory.go:184] no items to output this cycle
I0321 11:44:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 11:44:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:44:13.409797  543705 memory.go:191] Add success.
I0321 11:44:13.409799  543705 cpu.go:282] Add success.
W0321 11:44:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:44:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:44:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:44:13.420166  543705 net.go:648] Add success.
I0321 11:44:13.423313  543705 net.go:770] primary dev: ETH0
I0321 11:44:13.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:44:13.423342  543705 net.go:698] Add success.
I0321 11:44:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:44:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:44:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 11:44:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:44:14.456579  543705 disk_worker.go:494] system disk:vda1
I0321 11:44:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:44:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:44:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:44:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:44:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:44:23.409766  543705 memory.go:184] no items to output this cycle
I0321 11:44:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 11:44:29.369672  543705 disk_info.go:125] begin check local disk info of client
I0321 11:44:29.372114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:44:29.372120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aec80 0xc0003aecc0]
E0321 11:44:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:44:33.409794  543705 memory.go:184] no items to output this cycle
I0321 11:44:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:44:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:44:43.409789  543705 memory.go:191] Add success.
I0321 11:44:43.409806  543705 cpu.go:282] Add success.
I0321 11:44:43.419884  543705 net.go:648] Add success.
I0321 11:44:43.422741  543705 net.go:770] primary dev: ETH0
I0321 11:44:43.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:44:43.422765  543705 net.go:698] Add success.
I0321 11:44:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:44:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:44:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:44:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:44:53.409786  543705 memory.go:184] no items to output this cycle
I0321 11:44:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 11:45:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:45:03.409771  543705 memory.go:184] no items to output this cycle
I0321 11:45:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 11:45:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:45:13.409814  543705 memory.go:191] Add success.
I0321 11:45:13.409819  543705 cpu.go:282] Add success.
W0321 11:45:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:45:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:45:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:45:13.420482  543705 net.go:648] Add success.
I0321 11:45:13.423072  543705 net.go:770] primary dev: ETH0
I0321 11:45:13.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:45:13.423096  543705 net.go:698] Add success.
I0321 11:45:13.464728  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab1e982f-a57c-47a5-8a30-151cf197a56b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:45:13.464761  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:45:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:45:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 11:45:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:45:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 11:45:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:45:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:45:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:45:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:45:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:45:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:45:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:45:23.409770  543705 memory.go:184] no items to output this cycle
I0321 11:45:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 11:45:29.373675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:45:29.376147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:45:29.376153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004848c0 0xc000484900]
E0321 11:45:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:45:33.409794  543705 memory.go:184] no items to output this cycle
I0321 11:45:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 11:45:38.881740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:45:38.881746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:45:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:45:43.410663  543705 memory.go:191] Add success.
I0321 11:45:43.409820  543705 cpu.go:282] Add success.
I0321 11:45:43.420356  543705 net.go:648] Add success.
I0321 11:45:43.423345  543705 net.go:770] primary dev: ETH0
I0321 11:45:43.423357  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:45:43.423382  543705 net.go:698] Add success.
I0321 11:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:45:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:45:53.410205  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:45:53.410223  543705 memory.go:184] no items to output this cycle
I0321 11:45:53.410249  543705 cpu.go:275] no items to output this cycle
E0321 11:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:46:03.409779  543705 memory.go:184] no items to output this cycle
I0321 11:46:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 11:46:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:46:13.409789  543705 memory.go:191] Add success.
W0321 11:46:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:46:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:46:13.409826  543705 cpu.go:282] Add success.
I0321 11:46:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:46:13.420106  543705 net.go:648] Add success.
I0321 11:46:13.422742  543705 net.go:770] primary dev: ETH0
I0321 11:46:13.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:46:13.422768  543705 net.go:698] Add success.
I0321 11:46:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:46:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:46:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 11:46:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:46:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 11:46:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:46:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:46:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:46:23.409797  543705 memory.go:184] no items to output this cycle
I0321 11:46:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 11:46:29.377673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:46:29.380160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:46:29.380167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004749c0 0xc000474a00]
E0321 11:46:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:46:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 11:46:33.409783  543705 memory.go:184] no items to output this cycle
E0321 11:46:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:46:43.409796  543705 memory.go:191] Add success.
I0321 11:46:43.409812  543705 cpu.go:282] Add success.
I0321 11:46:43.419886  543705 net.go:648] Add success.
I0321 11:46:43.422319  543705 net.go:770] primary dev: ETH0
I0321 11:46:43.422332  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:46:43.422344  543705 net.go:698] Add success.
I0321 11:46:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:46:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:46:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:46:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:46:53.409806  543705 memory.go:184] no items to output this cycle
I0321 11:46:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 11:47:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:47:03.409770  543705 memory.go:184] no items to output this cycle
I0321 11:47:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 11:47:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:47:13.409809  543705 memory.go:191] Add success.
I0321 11:47:13.409820  543705 cpu.go:282] Add success.
W0321 11:47:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:47:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:47:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:47:13.420210  543705 net.go:648] Add success.
I0321 11:47:13.422785  543705 net.go:770] primary dev: ETH0
I0321 11:47:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:47:13.422811  543705 net.go:698] Add success.
I0321 11:47:13.453354  543705 event_worker.go:152] Polling the log file for events...
W0321 11:47:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:47:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 11:47:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:47:14.456918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:47:14.456927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:47:14.456933  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:47:14.456996  543705 disk_worker.go:494] system disk:vda1
I0321 11:47:14.457037  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:47:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:47:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:47:16.457884  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:47:16.457884  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:47:16.457939  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:47:16.457959  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:47:16.472281  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:47:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:47:23.409794  543705 memory.go:184] no items to output this cycle
I0321 11:47:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 11:47:29.381673  543705 disk_info.go:125] begin check local disk info of client
I0321 11:47:29.384138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:47:29.384145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f01c0 0xc0003f0200]
E0321 11:47:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:47:33.409788  543705 memory.go:184] no items to output this cycle
I0321 11:47:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 11:47:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:47:43.409827  543705 memory.go:191] Add success.
I0321 11:47:43.409836  543705 cpu.go:282] Add success.
I0321 11:47:43.419962  543705 net.go:648] Add success.
I0321 11:47:43.422698  543705 net.go:770] primary dev: ETH0
I0321 11:47:43.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:47:43.422724  543705 net.go:698] Add success.
I0321 11:47:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:47:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:47:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:47:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:47:53.409789  543705 cpu.go:275] no items to output this cycle
I0321 11:47:53.409797  543705 memory.go:184] no items to output this cycle
E0321 11:48:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:48:03.409795  543705 memory.go:184] no items to output this cycle
I0321 11:48:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 11:48:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:48:13.409779  543705 memory.go:191] Add success.
I0321 11:48:13.409811  543705 cpu.go:282] Add success.
W0321 11:48:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:48:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:48:13.420150  543705 net.go:648] Add success.
I0321 11:48:13.423056  543705 net.go:770] primary dev: ETH0
I0321 11:48:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:48:13.423090  543705 net.go:698] Add success.
I0321 11:48:13.469207  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e40c367-8a5c-4521-bb26-891680095144","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:48:13.469238  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:48:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:48:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 11:48:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:48:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 11:48:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:48:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:48:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:48:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:48:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:48:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:48:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:48:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 11:48:23.409790  543705 memory.go:184] no items to output this cycle
I0321 11:48:29.385676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:48:29.388161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:48:29.388167  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487200 0xc000487240]
E0321 11:48:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:48:33.409796  543705 memory.go:184] no items to output this cycle
I0321 11:48:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 11:48:38.884106  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:48:38.884113  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:48:43.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:48:43.410675  543705 memory.go:191] Add success.
I0321 11:48:43.409966  543705 cpu.go:282] Add success.
I0321 11:48:43.419719  543705 net.go:648] Add success.
I0321 11:48:43.422159  543705 net.go:770] primary dev: ETH0
I0321 11:48:43.422174  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:48:43.422188  543705 net.go:698] Add success.
I0321 11:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:48:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:48:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:48:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:48:53.409778  543705 memory.go:184] no items to output this cycle
I0321 11:48:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 11:49:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:49:03.409779  543705 cpu.go:275] no items to output this cycle
I0321 11:49:03.409786  543705 memory.go:184] no items to output this cycle
E0321 11:49:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:49:13.409807  543705 memory.go:191] Add success.
I0321 11:49:13.409816  543705 cpu.go:282] Add success.
W0321 11:49:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:49:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:49:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:49:13.420193  543705 net.go:648] Add success.
I0321 11:49:13.423365  543705 net.go:770] primary dev: ETH0
I0321 11:49:13.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:49:13.423395  543705 net.go:698] Add success.
I0321 11:49:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:49:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:49:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 11:49:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:49:14.456529  543705 disk_worker.go:494] system disk:vda1
I0321 11:49:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:49:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:49:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:49:23.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:49:23.410260  543705 memory.go:184] no items to output this cycle
I0321 11:49:23.410282  543705 cpu.go:275] no items to output this cycle
I0321 11:49:29.391996  543705 disk_info.go:125] begin check local disk info of client
I0321 11:49:29.394575  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:49:29.394581  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047fd00 0xc00047fd40]
E0321 11:49:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:49:33.409765  543705 memory.go:184] no items to output this cycle
I0321 11:49:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 11:49:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:49:43.409790  543705 memory.go:191] Add success.
I0321 11:49:43.409794  543705 cpu.go:282] Add success.
I0321 11:49:43.420149  543705 net.go:648] Add success.
I0321 11:49:43.422849  543705 net.go:770] primary dev: ETH0
I0321 11:49:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:49:43.422877  543705 net.go:698] Add success.
I0321 11:49:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:49:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:49:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:49:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:49:53.409808  543705 memory.go:184] no items to output this cycle
I0321 11:49:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 11:50:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:50:03.409786  543705 cpu.go:275] no items to output this cycle
I0321 11:50:03.409796  543705 memory.go:184] no items to output this cycle
E0321 11:50:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:50:13.409788  543705 memory.go:191] Add success.
I0321 11:50:13.409792  543705 cpu.go:282] Add success.
W0321 11:50:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:50:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:50:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:50:13.420210  543705 net.go:648] Add success.
I0321 11:50:13.422983  543705 net.go:770] primary dev: ETH0
I0321 11:50:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:50:13.423008  543705 net.go:698] Add success.
I0321 11:50:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:50:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:50:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 11:50:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:50:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 11:50:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:50:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:50:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:50:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:50:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:50:23.409792  543705 memory.go:184] no items to output this cycle
I0321 11:50:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 11:50:29.397675  543705 disk_info.go:125] begin check local disk info of client
I0321 11:50:29.400232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:50:29.400239  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa640 0xc0001aa680]
E0321 11:50:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:50:33.409770  543705 memory.go:184] no items to output this cycle
I0321 11:50:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 11:50:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:50:43.409818  543705 memory.go:191] Add success.
I0321 11:50:43.409827  543705 cpu.go:282] Add success.
I0321 11:50:43.420125  543705 net.go:648] Add success.
I0321 11:50:43.423099  543705 net.go:770] primary dev: ETH0
I0321 11:50:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:50:43.423124  543705 net.go:698] Add success.
I0321 11:50:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:50:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:50:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:50:53.409809  543705 memory.go:184] no items to output this cycle
I0321 11:50:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 11:51:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:51:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 11:51:03.409792  543705 memory.go:184] no items to output this cycle
E0321 11:51:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:51:13.409771  543705 memory.go:191] Add success.
I0321 11:51:13.409789  543705 cpu.go:282] Add success.
W0321 11:51:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:51:13.412428  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:51:13.412432  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:51:13.420044  543705 net.go:648] Add success.
I0321 11:51:13.421779  543705 net.go:770] primary dev: ETH0
I0321 11:51:13.421792  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:51:13.421804  543705 net.go:698] Add success.
I0321 11:51:13.465124  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c26aad8c-2c56-434c-b04d-b9ac4cccc0d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:51:13.465159  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:51:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:51:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:51:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 11:51:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:51:14.456669  543705 disk_worker.go:494] system disk:vda1
I0321 11:51:14.456697  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:51:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:51:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:51:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:51:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:51:23.409770  543705 memory.go:184] no items to output this cycle
I0321 11:51:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 11:51:29.402121  543705 disk_info.go:125] begin check local disk info of client
I0321 11:51:29.404578  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:51:29.404584  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a04c0 0xc0004a0500]
E0321 11:51:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:51:33.409788  543705 memory.go:184] no items to output this cycle
I0321 11:51:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 11:51:38.885731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:51:38.885738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:51:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:51:43.410543  543705 memory.go:191] Add success.
I0321 11:51:43.409798  543705 cpu.go:282] Add success.
I0321 11:51:43.419714  543705 net.go:648] Add success.
I0321 11:51:43.422504  543705 net.go:770] primary dev: ETH0
I0321 11:51:43.422517  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:51:43.422529  543705 net.go:698] Add success.
I0321 11:51:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:51:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:51:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:51:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:51:53.409773  543705 memory.go:184] no items to output this cycle
I0321 11:51:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 11:52:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:52:03.409793  543705 memory.go:184] no items to output this cycle
I0321 11:52:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 11:52:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:52:13.409782  543705 memory.go:191] Add success.
I0321 11:52:13.409784  543705 cpu.go:282] Add success.
W0321 11:52:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:52:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:52:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:52:13.420063  543705 net.go:648] Add success.
I0321 11:52:13.422824  543705 net.go:770] primary dev: ETH0
I0321 11:52:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:52:13.422853  543705 net.go:698] Add success.
W0321 11:52:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:52:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 11:52:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:52:14.456791  543705 disk_worker.go:494] system disk:vda1
I0321 11:52:14.456830  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:52:14.457098  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:52:14.457107  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:52:14.457111  543705 custom_config.go:64] query custom config with name: gpu
E0321 11:52:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:52:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:52:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:52:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:52:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:52:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:52:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:52:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:52:23.409792  543705 memory.go:184] no items to output this cycle
I0321 11:52:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 11:52:29.405676  543705 disk_info.go:125] begin check local disk info of client
I0321 11:52:29.408142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:52:29.408148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d2040 0xc0003d2080]
E0321 11:52:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:52:33.409796  543705 memory.go:184] no items to output this cycle
I0321 11:52:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 11:52:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:52:43.409775  543705 memory.go:191] Add success.
I0321 11:52:43.409811  543705 cpu.go:282] Add success.
I0321 11:52:43.419741  543705 net.go:648] Add success.
I0321 11:52:43.422611  543705 net.go:770] primary dev: ETH0
I0321 11:52:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:52:43.422639  543705 net.go:698] Add success.
I0321 11:52:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:52:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:52:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:52:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:52:53.409790  543705 memory.go:184] no items to output this cycle
I0321 11:52:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 11:53:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:53:03.409765  543705 memory.go:184] no items to output this cycle
I0321 11:53:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 11:53:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:53:13.409805  543705 memory.go:191] Add success.
I0321 11:53:13.409811  543705 cpu.go:282] Add success.
W0321 11:53:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:53:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:53:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:53:13.420043  543705 net.go:648] Add success.
I0321 11:53:13.422567  543705 net.go:770] primary dev: ETH0
I0321 11:53:13.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:53:13.422595  543705 net.go:698] Add success.
I0321 11:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:53:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:53:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 11:53:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:53:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 11:53:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:53:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:53:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:53:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:53:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:53:23.409768  543705 memory.go:184] no items to output this cycle
I0321 11:53:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 11:53:29.409677  543705 disk_info.go:125] begin check local disk info of client
I0321 11:53:29.412144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:53:29.412150  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d480 0xc00039d4c0]
E0321 11:53:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:53:33.409763  543705 memory.go:184] no items to output this cycle
I0321 11:53:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 11:53:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:53:43.409812  543705 memory.go:191] Add success.
I0321 11:53:43.409819  543705 cpu.go:282] Add success.
I0321 11:53:43.420502  543705 net.go:648] Add success.
I0321 11:53:43.423386  543705 net.go:770] primary dev: ETH0
I0321 11:53:43.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:53:43.423414  543705 net.go:698] Add success.
I0321 11:53:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:53:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:53:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:53:53.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:53:53.409899  543705 memory.go:184] no items to output this cycle
I0321 11:53:53.409953  543705 cpu.go:275] no items to output this cycle
E0321 11:54:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:54:03.409774  543705 memory.go:184] no items to output this cycle
I0321 11:54:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 11:54:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:54:13.409787  543705 memory.go:191] Add success.
I0321 11:54:13.409803  543705 cpu.go:282] Add success.
W0321 11:54:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:54:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:54:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:54:13.420271  543705 net.go:648] Add success.
I0321 11:54:13.423142  543705 net.go:770] primary dev: ETH0
I0321 11:54:13.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:54:13.423166  543705 net.go:698] Add success.
I0321 11:54:13.513293  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e07ea8fb-48cb-45cd-88eb-ffb777cf684d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:54:13.513328  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 11:54:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:54:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:54:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 11:54:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:54:14.456725  543705 disk_worker.go:494] system disk:vda1
I0321 11:54:14.456755  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:54:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:54:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:54:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:54:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:54:23.409777  543705 memory.go:184] no items to output this cycle
I0321 11:54:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 11:54:29.412794  543705 disk_info.go:125] begin check local disk info of client
I0321 11:54:29.415275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:54:29.415281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4840 0xc0000c4880]
E0321 11:54:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:54:33.409791  543705 memory.go:184] no items to output this cycle
I0321 11:54:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 11:54:38.885878  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:54:38.885886  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:54:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:54:43.410633  543705 memory.go:191] Add success.
I0321 11:54:43.409831  543705 cpu.go:282] Add success.
I0321 11:54:43.420449  543705 net.go:648] Add success.
I0321 11:54:43.423064  543705 net.go:770] primary dev: ETH0
I0321 11:54:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:54:43.423089  543705 net.go:698] Add success.
I0321 11:54:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:54:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:54:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:54:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:54:53.409818  543705 memory.go:184] no items to output this cycle
I0321 11:54:53.409830  543705 cpu.go:275] no items to output this cycle
E0321 11:55:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:55:03.409778  543705 memory.go:184] no items to output this cycle
I0321 11:55:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 11:55:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:55:13.409829  543705 memory.go:191] Add success.
I0321 11:55:13.409839  543705 cpu.go:282] Add success.
W0321 11:55:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:55:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:55:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:55:13.420107  543705 net.go:648] Add success.
I0321 11:55:13.423329  543705 net.go:770] primary dev: ETH0
I0321 11:55:13.423344  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:55:13.423358  543705 net.go:698] Add success.
I0321 11:55:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:55:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:55:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 11:55:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:55:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 11:55:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:55:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:55:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:55:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:55:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:55:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:55:23.409774  543705 memory.go:184] no items to output this cycle
I0321 11:55:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 11:55:29.415797  543705 disk_info.go:125] begin check local disk info of client
I0321 11:55:29.418277  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:55:29.418283  543705 disk_info.go:196] parse disk info done, disk is : [0xc000261f00 0xc000261f40]
I0321 11:55:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 11:55:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:55:33.409823  543705 memory.go:184] no items to output this cycle
E0321 11:55:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:55:43.409821  543705 memory.go:191] Add success.
I0321 11:55:43.409825  543705 cpu.go:282] Add success.
I0321 11:55:43.420403  543705 net.go:648] Add success.
I0321 11:55:43.423051  543705 net.go:770] primary dev: ETH0
I0321 11:55:43.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:55:43.423078  543705 net.go:698] Add success.
I0321 11:55:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:55:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:55:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:55:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:55:53.409795  543705 memory.go:184] no items to output this cycle
I0321 11:55:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 11:56:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:56:03.409776  543705 memory.go:184] no items to output this cycle
I0321 11:56:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 11:56:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:56:13.409828  543705 memory.go:191] Add success.
I0321 11:56:13.409836  543705 cpu.go:282] Add success.
W0321 11:56:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:56:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:56:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:56:13.420180  543705 net.go:648] Add success.
I0321 11:56:13.423543  543705 net.go:770] primary dev: ETH0
I0321 11:56:13.423557  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:56:13.423572  543705 net.go:698] Add success.
I0321 11:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:56:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:56:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 11:56:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:56:14.456473  543705 disk_worker.go:494] system disk:vda1
I0321 11:56:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:56:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:56:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:56:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:56:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:56:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:56:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:56:23.409770  543705 cpu.go:275] no items to output this cycle
I0321 11:56:23.409774  543705 memory.go:184] no items to output this cycle
I0321 11:56:29.418806  543705 disk_info.go:125] begin check local disk info of client
I0321 11:56:29.421284  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:56:29.421290  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d91c0 0xc0004d9200]
E0321 11:56:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:56:33.409781  543705 memory.go:184] no items to output this cycle
I0321 11:56:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 11:56:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:56:43.409803  543705 memory.go:191] Add success.
I0321 11:56:43.409802  543705 cpu.go:282] Add success.
I0321 11:56:43.419740  543705 net.go:648] Add success.
I0321 11:56:43.423254  543705 net.go:770] primary dev: ETH0
I0321 11:56:43.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:56:43.423279  543705 net.go:698] Add success.
I0321 11:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:56:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:56:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:56:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:56:53.409772  543705 memory.go:184] no items to output this cycle
I0321 11:56:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 11:57:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:57:03.409777  543705 memory.go:184] no items to output this cycle
I0321 11:57:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 11:57:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:57:13.409793  543705 memory.go:191] Add success.
I0321 11:57:13.409796  543705 cpu.go:282] Add success.
W0321 11:57:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:57:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:57:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:57:13.420096  543705 net.go:648] Add success.
I0321 11:57:13.429150  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 11:57:13.429224  543705 net.go:770] primary dev: ETH0
I0321 11:57:13.429236  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:57:13.429246  543705 net.go:698] Add success.
I0321 11:57:13.452770  543705 event_worker.go:152] Polling the log file for events...
I0321 11:57:13.463831  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6149d0e-a078-467a-b430-3ca48e3f213a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 11:57:13.463876  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 11:57:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:57:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 11:57:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0321 11:57:14.455905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 11:57:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 11:57:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0321 11:57:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 11:57:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 11:57:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 11:57:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:57:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 11:57:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 11:57:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:57:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:57:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:57:23.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:57:23.410245  543705 memory.go:184] no items to output this cycle
I0321 11:57:23.410261  543705 cpu.go:275] no items to output this cycle
I0321 11:57:29.421805  543705 disk_info.go:125] begin check local disk info of client
I0321 11:57:29.424253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:57:29.424259  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
E0321 11:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:57:33.409802  543705 memory.go:184] no items to output this cycle
I0321 11:57:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 11:57:38.888135  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 11:57:38.888142  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 11:57:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:57:43.410714  543705 memory.go:191] Add success.
I0321 11:57:43.409816  543705 cpu.go:282] Add success.
I0321 11:57:43.420438  543705 net.go:648] Add success.
I0321 11:57:43.423143  543705 net.go:770] primary dev: ETH0
I0321 11:57:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:57:43.423169  543705 net.go:698] Add success.
I0321 11:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:57:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:57:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:57:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:57:53.409777  543705 memory.go:184] no items to output this cycle
I0321 11:57:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:58:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:58:03.409768  543705 memory.go:184] no items to output this cycle
I0321 11:58:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 11:58:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:58:13.409816  543705 memory.go:191] Add success.
I0321 11:58:13.409824  543705 cpu.go:282] Add success.
W0321 11:58:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:58:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:58:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:58:13.420142  543705 net.go:648] Add success.
I0321 11:58:13.423032  543705 net.go:770] primary dev: ETH0
I0321 11:58:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:58:13.423063  543705 net.go:698] Add success.
I0321 11:58:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:58:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:58:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 11:58:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:58:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 11:58:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:58:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:58:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:58:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:58:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:58:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 11:58:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:58:23.409795  543705 memory.go:184] no items to output this cycle
I0321 11:58:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 11:58:29.424818  543705 disk_info.go:125] begin check local disk info of client
I0321 11:58:29.427298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:58:29.427305  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396000 0xc000396040]
E0321 11:58:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:58:33.409808  543705 memory.go:184] no items to output this cycle
I0321 11:58:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 11:58:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:58:43.409813  543705 memory.go:191] Add success.
I0321 11:58:43.409824  543705 cpu.go:282] Add success.
I0321 11:58:43.420060  543705 net.go:648] Add success.
I0321 11:58:43.422746  543705 net.go:770] primary dev: ETH0
I0321 11:58:43.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:58:43.422777  543705 net.go:698] Add success.
I0321 11:58:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:58:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:58:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:58:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:58:53.409785  543705 memory.go:184] no items to output this cycle
I0321 11:58:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 11:59:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:59:03.409771  543705 memory.go:184] no items to output this cycle
I0321 11:59:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 11:59:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:59:13.409823  543705 memory.go:191] Add success.
I0321 11:59:13.409832  543705 cpu.go:282] Add success.
W0321 11:59:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 11:59:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 11:59:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 11:59:13.420181  543705 net.go:648] Add success.
I0321 11:59:13.423160  543705 net.go:770] primary dev: ETH0
I0321 11:59:13.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:59:13.423187  543705 net.go:698] Add success.
I0321 11:59:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0321 11:59:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 11:59:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 11:59:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 11:59:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 11:59:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 11:59:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 11:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:59:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:59:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 11:59:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0321 11:59:23.409908  543705 cpu.go:275] no items to output this cycle
E0321 11:59:23.409995  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:59:23.410012  543705 memory.go:184] no items to output this cycle
I0321 11:59:29.427834  543705 disk_info.go:125] begin check local disk info of client
I0321 11:59:29.430320  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 11:59:29.430327  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0000 0xc0004a0040]
E0321 11:59:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:59:33.409795  543705 memory.go:184] no items to output this cycle
I0321 11:59:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 11:59:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:59:43.409821  543705 memory.go:191] Add success.
I0321 11:59:43.409825  543705 cpu.go:282] Add success.
I0321 11:59:43.419915  543705 net.go:648] Add success.
I0321 11:59:43.422806  543705 net.go:770] primary dev: ETH0
I0321 11:59:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0321 11:59:43.422834  543705 net.go:698] Add success.
I0321 11:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 11:59:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 11:59:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 11:59:53.410408  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 11:59:53.410420  543705 cpu.go:275] no items to output this cycle
I0321 11:59:53.410424  543705 memory.go:184] no items to output this cycle
E0321 12:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:00:03.409782  543705 memory.go:184] no items to output this cycle
I0321 12:00:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 12:00:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:00:13.409786  543705 memory.go:191] Add success.
I0321 12:00:13.409808  543705 cpu.go:282] Add success.
W0321 12:00:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:00:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:00:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:00:13.420051  543705 net.go:648] Add success.
I0321 12:00:13.422775  543705 net.go:770] primary dev: ETH0
I0321 12:00:13.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:00:13.422800  543705 net.go:698] Add success.
I0321 12:00:13.469094  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"67f87252-dcca-435c-83df-6274d547ecb9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:00:13.469135  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:00:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:00:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:00:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 12:00:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:00:14.456506  543705 disk_worker.go:494] system disk:vda1
I0321 12:00:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:00:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:00:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:00:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:00:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:00:23.409763  543705 memory.go:184] no items to output this cycle
I0321 12:00:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 12:00:29.430854  543705 disk_info.go:125] begin check local disk info of client
I0321 12:00:29.433394  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:00:29.433401  543705 disk_info.go:196] parse disk info done, disk is : [0xc000485a40 0xc000485a80]
E0321 12:00:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:00:33.409776  543705 memory.go:184] no items to output this cycle
I0321 12:00:33.409783  543705 cpu.go:275] no items to output this cycle
I0321 12:00:38.889733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:00:38.889740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:00:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:00:43.410655  543705 memory.go:191] Add success.
I0321 12:00:43.409789  543705 cpu.go:282] Add success.
I0321 12:00:43.420347  543705 net.go:648] Add success.
I0321 12:00:43.422944  543705 net.go:770] primary dev: ETH0
I0321 12:00:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:00:43.422970  543705 net.go:698] Add success.
I0321 12:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:00:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:00:53.409776  543705 memory.go:184] no items to output this cycle
I0321 12:00:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 12:01:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:01:03.409802  543705 memory.go:184] no items to output this cycle
I0321 12:01:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 12:01:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:01:13.409781  543705 memory.go:191] Add success.
I0321 12:01:13.409805  543705 cpu.go:282] Add success.
W0321 12:01:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:01:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:01:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:01:13.420231  543705 net.go:648] Add success.
I0321 12:01:13.422803  543705 net.go:770] primary dev: ETH0
I0321 12:01:13.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:01:13.422829  543705 net.go:698] Add success.
I0321 12:01:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:01:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:01:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 12:01:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:01:14.456841  543705 disk_worker.go:494] system disk:vda1
I0321 12:01:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:01:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:01:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:01:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:01:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:01:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:01:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:01:23.409782  543705 memory.go:184] no items to output this cycle
I0321 12:01:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 12:01:29.433864  543705 disk_info.go:125] begin check local disk info of client
I0321 12:01:29.436362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:01:29.436369  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0f80 0xc0004a0fc0]
E0321 12:01:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:01:33.409778  543705 memory.go:184] no items to output this cycle
I0321 12:01:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 12:01:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:01:43.409786  543705 memory.go:191] Add success.
I0321 12:01:43.409792  543705 cpu.go:282] Add success.
I0321 12:01:43.419871  543705 net.go:648] Add success.
I0321 12:01:43.422932  543705 net.go:770] primary dev: ETH0
I0321 12:01:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:01:43.422958  543705 net.go:698] Add success.
I0321 12:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:01:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:01:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:01:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:01:53.409818  543705 memory.go:184] no items to output this cycle
I0321 12:01:53.409824  543705 cpu.go:275] no items to output this cycle
E0321 12:02:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:02:03.409804  543705 memory.go:184] no items to output this cycle
I0321 12:02:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 12:02:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:02:13.409780  543705 memory.go:191] Add success.
I0321 12:02:13.409796  543705 cpu.go:282] Add success.
W0321 12:02:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:02:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:02:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:02:13.420073  543705 net.go:648] Add success.
I0321 12:02:13.423131  543705 net.go:770] primary dev: ETH0
I0321 12:02:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:02:13.423156  543705 net.go:698] Add success.
W0321 12:02:14.455992  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:02:14.456002  543705 disk_worker.go:708] disk space is not compliant
W0321 12:02:14.456004  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:02:14.457777  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:02:14.457784  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:02:14.457788  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:02:14.458855  543705 disk_worker.go:494] system disk:vda1
I0321 12:02:14.458884  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:02:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:02:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:02:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:02:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:02:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:02:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:02:16.472309  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:02:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:02:23.409775  543705 memory.go:184] no items to output this cycle
I0321 12:02:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 12:02:29.436879  543705 disk_info.go:125] begin check local disk info of client
I0321 12:02:29.439357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:02:29.439364  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a480 0xc00035a4c0]
E0321 12:02:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:02:33.409800  543705 memory.go:184] no items to output this cycle
I0321 12:02:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 12:02:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:02:43.409787  543705 memory.go:191] Add success.
I0321 12:02:43.409803  543705 cpu.go:282] Add success.
I0321 12:02:43.419853  543705 net.go:648] Add success.
I0321 12:02:43.422704  543705 net.go:770] primary dev: ETH0
I0321 12:02:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:02:43.422729  543705 net.go:698] Add success.
I0321 12:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:02:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:02:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:02:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:02:53.409814  543705 memory.go:184] no items to output this cycle
I0321 12:02:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 12:03:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:03:03.409798  543705 memory.go:184] no items to output this cycle
I0321 12:03:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 12:03:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:03:13.409783  543705 memory.go:191] Add success.
I0321 12:03:13.409800  543705 cpu.go:282] Add success.
W0321 12:03:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:03:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:03:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:03:13.420131  543705 net.go:648] Add success.
I0321 12:03:13.423282  543705 net.go:770] primary dev: ETH0
I0321 12:03:13.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:03:13.423311  543705 net.go:698] Add success.
I0321 12:03:13.469261  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c66d1a7d-c796-4508-b6c4-5cb01e7c993c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:03:13.469295  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:03:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:03:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 12:03:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:03:14.459243  543705 disk_worker.go:494] system disk:vda1
I0321 12:03:14.459273  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:03:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:03:16.458593  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:03:16.458658  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:03:16.458681  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:03:16.473011  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:03:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:03:23.409762  543705 memory.go:184] no items to output this cycle
I0321 12:03:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 12:03:29.439893  543705 disk_info.go:125] begin check local disk info of client
I0321 12:03:29.442392  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:03:29.442399  543705 disk_info.go:196] parse disk info done, disk is : [0xc000247040 0xc000247080]
E0321 12:03:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:03:33.409799  543705 memory.go:184] no items to output this cycle
I0321 12:03:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 12:03:38.889884  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:03:38.889891  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:03:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:03:43.410720  543705 memory.go:191] Add success.
I0321 12:03:43.409816  543705 cpu.go:282] Add success.
I0321 12:03:43.420455  543705 net.go:648] Add success.
I0321 12:03:43.423023  543705 net.go:770] primary dev: ETH0
I0321 12:03:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:03:43.423049  543705 net.go:698] Add success.
I0321 12:03:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:03:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:03:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:03:53.410264  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:03:53.410290  543705 memory.go:184] no items to output this cycle
I0321 12:03:53.410306  543705 cpu.go:275] no items to output this cycle
E0321 12:04:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:04:03.409774  543705 memory.go:184] no items to output this cycle
I0321 12:04:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 12:04:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:04:13.409810  543705 memory.go:191] Add success.
I0321 12:04:13.409817  543705 cpu.go:282] Add success.
W0321 12:04:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:04:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:04:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:04:13.420113  543705 net.go:648] Add success.
I0321 12:04:13.423360  543705 net.go:770] primary dev: ETH0
I0321 12:04:13.423377  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:04:13.423390  543705 net.go:698] Add success.
I0321 12:04:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:04:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:04:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 12:04:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:04:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 12:04:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:04:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:04:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:04:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:04:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:04:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:04:23.409803  543705 memory.go:184] no items to output this cycle
I0321 12:04:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 12:04:29.442911  543705 disk_info.go:125] begin check local disk info of client
I0321 12:04:29.445419  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:04:29.445426  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475b00 0xc000475b40]
E0321 12:04:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:04:33.409793  543705 memory.go:184] no items to output this cycle
I0321 12:04:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 12:04:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:04:43.409812  543705 memory.go:191] Add success.
I0321 12:04:43.409822  543705 cpu.go:282] Add success.
I0321 12:04:43.419859  543705 net.go:648] Add success.
I0321 12:04:43.422926  543705 net.go:770] primary dev: ETH0
I0321 12:04:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:04:43.422956  543705 net.go:698] Add success.
I0321 12:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:04:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:04:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:04:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:04:53.409773  543705 memory.go:184] no items to output this cycle
I0321 12:04:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 12:05:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:05:03.409789  543705 memory.go:184] no items to output this cycle
I0321 12:05:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 12:05:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:05:13.409775  543705 memory.go:191] Add success.
I0321 12:05:13.409797  543705 cpu.go:282] Add success.
W0321 12:05:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:05:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:05:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:05:13.419687  543705 net.go:648] Add success.
I0321 12:05:13.422787  543705 net.go:770] primary dev: ETH0
I0321 12:05:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:05:13.422813  543705 net.go:698] Add success.
I0321 12:05:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:05:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:05:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 12:05:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:05:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 12:05:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:05:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:05:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:05:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:05:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:05:23.409777  543705 memory.go:184] no items to output this cycle
I0321 12:05:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 12:05:29.445925  543705 disk_info.go:125] begin check local disk info of client
I0321 12:05:29.448475  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:05:29.448482  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049f5c0 0xc00049f600]
E0321 12:05:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:05:33.409792  543705 memory.go:184] no items to output this cycle
I0321 12:05:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 12:05:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:05:43.409777  543705 memory.go:191] Add success.
I0321 12:05:43.409806  543705 cpu.go:282] Add success.
I0321 12:05:43.419979  543705 net.go:648] Add success.
I0321 12:05:43.422763  543705 net.go:770] primary dev: ETH0
I0321 12:05:43.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:05:43.422789  543705 net.go:698] Add success.
I0321 12:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:05:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:05:53.410355  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:05:53.410372  543705 memory.go:184] no items to output this cycle
I0321 12:05:53.410377  543705 cpu.go:275] no items to output this cycle
E0321 12:06:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:06:03.409794  543705 memory.go:184] no items to output this cycle
I0321 12:06:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 12:06:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:06:13.409775  543705 memory.go:191] Add success.
I0321 12:06:13.409796  543705 cpu.go:282] Add success.
W0321 12:06:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:06:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:06:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:06:13.420082  543705 net.go:648] Add success.
I0321 12:06:13.423028  543705 net.go:770] primary dev: ETH0
I0321 12:06:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:06:13.423051  543705 net.go:698] Add success.
I0321 12:06:13.464638  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d835aea7-e837-4efe-85b7-8f78dee94c8a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:06:13.464670  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:06:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:06:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:06:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 12:06:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:06:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 12:06:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:06:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:06:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:06:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:06:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:06:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:06:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 12:06:23.409793  543705 memory.go:184] no items to output this cycle
I0321 12:06:29.448939  543705 disk_info.go:125] begin check local disk info of client
I0321 12:06:29.451457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:06:29.451464  543705 disk_info.go:196] parse disk info done, disk is : [0xc000347280 0xc0003472c0]
E0321 12:06:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:06:33.409789  543705 memory.go:184] no items to output this cycle
I0321 12:06:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 12:06:38.892155  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:06:38.892162  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:06:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:06:43.410624  543705 memory.go:191] Add success.
I0321 12:06:43.409786  543705 cpu.go:282] Add success.
I0321 12:06:43.420308  543705 net.go:648] Add success.
I0321 12:06:43.423599  543705 net.go:770] primary dev: ETH0
I0321 12:06:43.423613  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:06:43.423626  543705 net.go:698] Add success.
I0321 12:06:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:06:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:06:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:06:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:06:53.409812  543705 memory.go:184] no items to output this cycle
I0321 12:06:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 12:07:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:07:03.409780  543705 memory.go:184] no items to output this cycle
I0321 12:07:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 12:07:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:07:13.409790  543705 memory.go:191] Add success.
I0321 12:07:13.409793  543705 cpu.go:282] Add success.
W0321 12:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:07:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:07:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:07:13.420763  543705 net.go:648] Add success.
I0321 12:07:13.423742  543705 net.go:770] primary dev: ETH0
I0321 12:07:13.423756  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:07:13.423766  543705 net.go:698] Add success.
I0321 12:07:13.452773  543705 event_worker.go:152] Polling the log file for events...
W0321 12:07:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:07:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 12:07:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:07:14.456783  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:07:14.456792  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:07:14.456797  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:07:14.456841  543705 disk_worker.go:494] system disk:vda1
I0321 12:07:14.456882  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:07:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:07:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:07:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:07:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:07:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:07:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:07:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:07:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:07:23.409803  543705 memory.go:184] no items to output this cycle
I0321 12:07:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 12:07:29.451951  543705 disk_info.go:125] begin check local disk info of client
I0321 12:07:29.454417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:07:29.454423  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b35c0 0xc0002b3600]
E0321 12:07:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:07:33.409789  543705 memory.go:184] no items to output this cycle
I0321 12:07:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 12:07:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:07:43.409779  543705 memory.go:191] Add success.
I0321 12:07:43.409795  543705 cpu.go:282] Add success.
I0321 12:07:43.419839  543705 net.go:648] Add success.
I0321 12:07:43.422527  543705 net.go:770] primary dev: ETH0
I0321 12:07:43.422540  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:07:43.422552  543705 net.go:698] Add success.
I0321 12:07:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:07:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:07:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:07:53.409783  543705 memory.go:184] no items to output this cycle
I0321 12:07:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 12:08:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:08:03.409786  543705 memory.go:184] no items to output this cycle
I0321 12:08:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 12:08:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:08:13.409772  543705 memory.go:191] Add success.
I0321 12:08:13.409785  543705 cpu.go:282] Add success.
W0321 12:08:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:08:13.412858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:08:13.412863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:08:13.420645  543705 net.go:648] Add success.
I0321 12:08:13.422862  543705 net.go:770] primary dev: ETH0
I0321 12:08:13.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:08:13.422886  543705 net.go:698] Add success.
I0321 12:08:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:08:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:08:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 12:08:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:08:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 12:08:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:08:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:08:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:08:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:08:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:08:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:08:23.409810  543705 memory.go:184] no items to output this cycle
I0321 12:08:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 12:08:29.454971  543705 disk_info.go:125] begin check local disk info of client
I0321 12:08:29.457468  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:08:29.457475  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0240 0xc0004a0280]
E0321 12:08:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:08:33.409794  543705 memory.go:184] no items to output this cycle
I0321 12:08:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 12:08:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:08:43.409777  543705 memory.go:191] Add success.
I0321 12:08:43.409794  543705 cpu.go:282] Add success.
I0321 12:08:43.419837  543705 net.go:648] Add success.
I0321 12:08:43.422498  543705 net.go:770] primary dev: ETH0
I0321 12:08:43.422513  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:08:43.422528  543705 net.go:698] Add success.
I0321 12:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:08:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:08:53.409788  543705 memory.go:184] no items to output this cycle
I0321 12:08:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 12:09:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:09:03.409774  543705 memory.go:184] no items to output this cycle
I0321 12:09:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 12:09:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:09:13.409784  543705 memory.go:191] Add success.
I0321 12:09:13.409807  543705 cpu.go:282] Add success.
W0321 12:09:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:09:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:09:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:09:13.420599  543705 net.go:648] Add success.
I0321 12:09:13.423532  543705 net.go:770] primary dev: ETH0
I0321 12:09:13.423545  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:09:13.423556  543705 net.go:698] Add success.
I0321 12:09:13.468788  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d5c760fe-5ad2-4b58-afcb-3a38562ee4de","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:09:13.468818  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:09:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:09:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:09:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0321 12:09:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:09:14.456483  543705 disk_worker.go:494] system disk:vda1
I0321 12:09:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:09:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:09:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:09:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:09:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:09:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:09:23.409809  543705 memory.go:184] no items to output this cycle
I0321 12:09:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 12:09:29.457980  543705 disk_info.go:125] begin check local disk info of client
I0321 12:09:29.460459  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:09:29.460465  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
E0321 12:09:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:09:33.409789  543705 memory.go:184] no items to output this cycle
I0321 12:09:33.409800  543705 cpu.go:275] no items to output this cycle
I0321 12:09:38.893727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:09:38.893734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:09:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:09:43.410854  543705 memory.go:191] Add success.
I0321 12:09:43.409816  543705 cpu.go:282] Add success.
I0321 12:09:43.420534  543705 net.go:648] Add success.
I0321 12:09:43.423351  543705 net.go:770] primary dev: ETH0
I0321 12:09:43.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:09:43.423377  543705 net.go:698] Add success.
I0321 12:09:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:09:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:09:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:09:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:09:53.409786  543705 memory.go:184] no items to output this cycle
I0321 12:09:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 12:10:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:10:03.409784  543705 memory.go:184] no items to output this cycle
I0321 12:10:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 12:10:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:10:13.409810  543705 memory.go:191] Add success.
I0321 12:10:13.409811  543705 cpu.go:282] Add success.
W0321 12:10:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:10:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:10:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:10:13.420052  543705 net.go:648] Add success.
I0321 12:10:13.422592  543705 net.go:770] primary dev: ETH0
I0321 12:10:13.422604  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:10:13.422621  543705 net.go:698] Add success.
I0321 12:10:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:10:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:10:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 12:10:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:10:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 12:10:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:10:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:10:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:10:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:10:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:10:23.409800  543705 memory.go:184] no items to output this cycle
I0321 12:10:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 12:10:29.460994  543705 disk_info.go:125] begin check local disk info of client
I0321 12:10:29.463550  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:10:29.463556  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa380 0xc0001aa3c0]
E0321 12:10:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:10:33.409784  543705 memory.go:184] no items to output this cycle
I0321 12:10:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 12:10:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:10:43.409796  543705 memory.go:191] Add success.
I0321 12:10:43.409806  543705 cpu.go:282] Add success.
I0321 12:10:43.420035  543705 net.go:648] Add success.
I0321 12:10:43.422862  543705 net.go:770] primary dev: ETH0
I0321 12:10:43.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:10:43.422888  543705 net.go:698] Add success.
I0321 12:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:10:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:10:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:10:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:10:53.409795  543705 memory.go:184] no items to output this cycle
I0321 12:10:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 12:11:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:11:03.409786  543705 memory.go:184] no items to output this cycle
I0321 12:11:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 12:11:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:11:13.409822  543705 memory.go:191] Add success.
I0321 12:11:13.409830  543705 cpu.go:282] Add success.
W0321 12:11:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:11:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:11:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:11:13.420072  543705 net.go:648] Add success.
I0321 12:11:13.422693  543705 net.go:770] primary dev: ETH0
I0321 12:11:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:11:13.422717  543705 net.go:698] Add success.
I0321 12:11:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:11:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:11:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 12:11:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:11:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 12:11:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:11:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:11:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:11:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:11:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:11:23.409789  543705 memory.go:184] no items to output this cycle
I0321 12:11:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 12:11:29.464005  543705 disk_info.go:125] begin check local disk info of client
I0321 12:11:29.466510  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:11:29.466517  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0321 12:11:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:11:33.409803  543705 memory.go:184] no items to output this cycle
I0321 12:11:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 12:11:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:11:43.409780  543705 memory.go:191] Add success.
I0321 12:11:43.409820  543705 cpu.go:282] Add success.
I0321 12:11:43.419910  543705 net.go:648] Add success.
I0321 12:11:43.422739  543705 net.go:770] primary dev: ETH0
I0321 12:11:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:11:43.422764  543705 net.go:698] Add success.
I0321 12:11:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:11:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:11:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:11:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:11:53.409816  543705 memory.go:184] no items to output this cycle
I0321 12:11:53.409826  543705 cpu.go:275] no items to output this cycle
E0321 12:12:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:12:03.409763  543705 memory.go:184] no items to output this cycle
I0321 12:12:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 12:12:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:12:13.409805  543705 memory.go:191] Add success.
I0321 12:12:13.409819  543705 cpu.go:282] Add success.
W0321 12:12:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:12:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:12:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:12:13.420127  543705 net.go:648] Add success.
I0321 12:12:13.423130  543705 net.go:770] primary dev: ETH0
I0321 12:12:13.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:12:13.423156  543705 net.go:698] Add success.
I0321 12:12:13.471170  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6e621ced-57ac-4f4b-b30f-a98474e93943","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:12:13.471202  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 12:12:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:12:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 12:12:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:12:14.455988  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:12:14.455997  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:12:14.456002  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:12:14.456443  543705 disk_worker.go:494] system disk:vda1
I0321 12:12:14.456473  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:12:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:12:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:12:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:12:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:12:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:12:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:12:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:12:23.410512  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:12:23.410529  543705 memory.go:184] no items to output this cycle
I0321 12:12:23.410548  543705 cpu.go:275] no items to output this cycle
I0321 12:12:29.467020  543705 disk_info.go:125] begin check local disk info of client
I0321 12:12:29.469511  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:12:29.469517  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c52c0 0xc0000c5300]
E0321 12:12:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:12:33.409790  543705 memory.go:184] no items to output this cycle
I0321 12:12:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 12:12:38.893890  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:12:38.893897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:12:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:12:43.410733  543705 memory.go:191] Add success.
I0321 12:12:43.409794  543705 cpu.go:282] Add success.
I0321 12:12:43.420482  543705 net.go:648] Add success.
I0321 12:12:43.423207  543705 net.go:770] primary dev: ETH0
I0321 12:12:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:12:43.423233  543705 net.go:698] Add success.
I0321 12:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:12:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:12:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:12:53.410336  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:12:53.410354  543705 memory.go:184] no items to output this cycle
I0321 12:12:53.410371  543705 cpu.go:275] no items to output this cycle
E0321 12:13:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:13:03.409794  543705 memory.go:184] no items to output this cycle
I0321 12:13:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 12:13:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:13:13.409787  543705 memory.go:191] Add success.
I0321 12:13:13.409788  543705 cpu.go:282] Add success.
W0321 12:13:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:13:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:13:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:13:13.420137  543705 net.go:648] Add success.
I0321 12:13:13.423121  543705 net.go:770] primary dev: ETH0
I0321 12:13:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:13:13.423146  543705 net.go:698] Add success.
I0321 12:13:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:13:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:13:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 12:13:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:13:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 12:13:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:13:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:13:16.458030  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:13:16.458104  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:13:16.458137  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:13:16.472638  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:13:23.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:13:23.409911  543705 memory.go:184] no items to output this cycle
I0321 12:13:23.410003  543705 cpu.go:275] no items to output this cycle
I0321 12:13:29.470043  543705 disk_info.go:125] begin check local disk info of client
I0321 12:13:29.472572  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:13:29.472578  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492000 0xc000492040]
E0321 12:13:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:13:33.409777  543705 memory.go:184] no items to output this cycle
I0321 12:13:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 12:13:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:13:43.409803  543705 memory.go:191] Add success.
I0321 12:13:43.409813  543705 cpu.go:282] Add success.
I0321 12:13:43.419981  543705 net.go:648] Add success.
I0321 12:13:43.423017  543705 net.go:770] primary dev: ETH0
I0321 12:13:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:13:43.423043  543705 net.go:698] Add success.
I0321 12:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:13:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:13:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:13:53.410213  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:13:53.410230  543705 memory.go:184] no items to output this cycle
I0321 12:13:53.410230  543705 cpu.go:275] no items to output this cycle
E0321 12:14:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:14:03.409796  543705 memory.go:184] no items to output this cycle
I0321 12:14:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 12:14:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:14:13.409794  543705 memory.go:191] Add success.
I0321 12:14:13.409796  543705 cpu.go:282] Add success.
W0321 12:14:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:14:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:14:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:14:13.420021  543705 net.go:648] Add success.
I0321 12:14:13.423144  543705 net.go:770] primary dev: ETH0
I0321 12:14:13.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:14:13.423170  543705 net.go:698] Add success.
I0321 12:14:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:14:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:14:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 12:14:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:14:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 12:14:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:14:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:14:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:14:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:14:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:14:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:14:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:14:23.409773  543705 memory.go:184] no items to output this cycle
I0321 12:14:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 12:14:29.473058  543705 disk_info.go:125] begin check local disk info of client
I0321 12:14:29.475641  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:14:29.475648  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a20c0 0xc0002a2100]
E0321 12:14:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:14:33.409779  543705 memory.go:184] no items to output this cycle
I0321 12:14:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 12:14:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:14:43.409810  543705 memory.go:191] Add success.
I0321 12:14:43.409815  543705 cpu.go:282] Add success.
I0321 12:14:43.420019  543705 net.go:648] Add success.
I0321 12:14:43.422766  543705 net.go:770] primary dev: ETH0
I0321 12:14:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:14:43.422793  543705 net.go:698] Add success.
I0321 12:14:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:14:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:14:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:14:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:14:53.409794  543705 memory.go:184] no items to output this cycle
I0321 12:14:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 12:15:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:15:03.409783  543705 cpu.go:275] no items to output this cycle
I0321 12:15:03.409784  543705 memory.go:184] no items to output this cycle
E0321 12:15:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:15:13.409808  543705 memory.go:191] Add success.
I0321 12:15:13.409815  543705 cpu.go:282] Add success.
W0321 12:15:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:15:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:15:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:15:13.420237  543705 net.go:648] Add success.
I0321 12:15:13.422696  543705 net.go:770] primary dev: ETH0
I0321 12:15:13.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:15:13.422726  543705 net.go:698] Add success.
I0321 12:15:13.467653  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"09967328-5ee1-4c80-bf45-cefda15e8346","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:15:13.467687  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:15:14.455091  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:15:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:15:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 12:15:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:15:14.457834  543705 disk_worker.go:494] system disk:vda1
I0321 12:15:14.457867  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:15:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:15:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:15:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:15:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:15:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:15:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:15:23.409792  543705 memory.go:184] no items to output this cycle
I0321 12:15:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 12:15:29.476064  543705 disk_info.go:125] begin check local disk info of client
I0321 12:15:29.478606  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:15:29.478612  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e100 0xc00035e140]
E0321 12:15:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:15:33.409799  543705 memory.go:184] no items to output this cycle
I0321 12:15:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 12:15:38.896169  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:15:38.896175  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:15:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:15:43.410710  543705 memory.go:191] Add success.
I0321 12:15:43.409820  543705 cpu.go:282] Add success.
I0321 12:15:43.420404  543705 net.go:648] Add success.
I0321 12:15:43.423636  543705 net.go:770] primary dev: ETH0
I0321 12:15:43.423649  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:15:43.423662  543705 net.go:698] Add success.
I0321 12:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:15:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:15:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:15:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:15:53.409798  543705 memory.go:184] no items to output this cycle
I0321 12:15:53.409833  543705 cpu.go:275] no items to output this cycle
E0321 12:16:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:16:03.409777  543705 memory.go:184] no items to output this cycle
I0321 12:16:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 12:16:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:16:13.409783  543705 memory.go:191] Add success.
I0321 12:16:13.409803  543705 cpu.go:282] Add success.
W0321 12:16:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:16:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:16:13.420236  543705 net.go:648] Add success.
I0321 12:16:13.423080  543705 net.go:770] primary dev: ETH0
I0321 12:16:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:16:13.423106  543705 net.go:698] Add success.
I0321 12:16:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:16:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:16:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 12:16:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:16:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 12:16:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:16:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:16:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:16:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:16:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:16:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0321 12:16:23.409785  543705 cpu.go:275] no items to output this cycle
E0321 12:16:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:16:23.409799  543705 memory.go:184] no items to output this cycle
I0321 12:16:29.479098  543705 disk_info.go:125] begin check local disk info of client
I0321 12:16:29.481554  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:16:29.481561  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f9d00 0xc0001f9d40]
E0321 12:16:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:16:33.409816  543705 memory.go:184] no items to output this cycle
I0321 12:16:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 12:16:43.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:16:43.409979  543705 memory.go:191] Add success.
I0321 12:16:43.410042  543705 cpu.go:282] Add success.
I0321 12:16:43.419722  543705 net.go:648] Add success.
I0321 12:16:43.422166  543705 net.go:770] primary dev: ETH0
I0321 12:16:43.422178  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:16:43.422189  543705 net.go:698] Add success.
I0321 12:16:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:16:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:16:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:16:53.410795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:16:53.410817  543705 memory.go:184] no items to output this cycle
I0321 12:16:53.410828  543705 cpu.go:275] no items to output this cycle
E0321 12:17:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:17:03.409774  543705 memory.go:184] no items to output this cycle
I0321 12:17:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 12:17:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:17:13.409809  543705 memory.go:191] Add success.
I0321 12:17:13.409819  543705 cpu.go:282] Add success.
W0321 12:17:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:17:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:17:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:17:13.420147  543705 net.go:648] Add success.
I0321 12:17:13.423366  543705 net.go:770] primary dev: ETH0
I0321 12:17:13.423380  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:17:13.423391  543705 net.go:698] Add success.
I0321 12:17:13.452939  543705 event_worker.go:152] Polling the log file for events...
W0321 12:17:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:17:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 12:17:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:17:14.455918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:17:14.455927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:17:14.455934  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:17:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 12:17:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:17:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:17:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:17:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:17:16.457992  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:17:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:17:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:17:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:17:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:17:23.409787  543705 memory.go:184] no items to output this cycle
I0321 12:17:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 12:17:29.482100  543705 disk_info.go:125] begin check local disk info of client
I0321 12:17:29.484532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:17:29.484538  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b4040 0xc0002b4080]
E0321 12:17:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:17:33.409806  543705 memory.go:184] no items to output this cycle
I0321 12:17:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 12:17:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:17:43.409805  543705 cpu.go:282] Add success.
I0321 12:17:43.409806  543705 memory.go:191] Add success.
I0321 12:17:43.420011  543705 net.go:648] Add success.
I0321 12:17:43.422680  543705 net.go:770] primary dev: ETH0
I0321 12:17:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:17:43.422705  543705 net.go:698] Add success.
I0321 12:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:17:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:17:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:17:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:17:53.409782  543705 memory.go:184] no items to output this cycle
I0321 12:17:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 12:18:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:18:03.409768  543705 memory.go:184] no items to output this cycle
I0321 12:18:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 12:18:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:18:13.409812  543705 memory.go:191] Add success.
I0321 12:18:13.409821  543705 cpu.go:282] Add success.
W0321 12:18:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:18:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:18:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:18:13.420078  543705 net.go:648] Add success.
I0321 12:18:13.422750  543705 net.go:770] primary dev: ETH0
I0321 12:18:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:18:13.422776  543705 net.go:698] Add success.
I0321 12:18:13.469174  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3d107562-4e89-4990-ae29-73b302e2693f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:18:13.469218  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:18:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:18:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:18:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 12:18:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:18:14.456521  543705 disk_worker.go:494] system disk:vda1
I0321 12:18:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:18:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:18:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:18:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:18:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:18:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 12:18:23.409788  543705 memory.go:184] no items to output this cycle
I0321 12:18:29.485105  543705 disk_info.go:125] begin check local disk info of client
I0321 12:18:29.487560  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:18:29.487567  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029d080 0xc00029d0c0]
E0321 12:18:33.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:18:33.409887  543705 memory.go:184] no items to output this cycle
I0321 12:18:33.409920  543705 cpu.go:275] no items to output this cycle
I0321 12:18:38.897737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:18:38.897744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:18:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:18:43.410721  543705 memory.go:191] Add success.
I0321 12:18:43.409826  543705 cpu.go:282] Add success.
I0321 12:18:43.420428  543705 net.go:648] Add success.
I0321 12:18:43.423195  543705 net.go:770] primary dev: ETH0
I0321 12:18:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:18:43.423225  543705 net.go:698] Add success.
I0321 12:18:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:18:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:18:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:18:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:18:53.409789  543705 memory.go:184] no items to output this cycle
I0321 12:18:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 12:19:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:19:03.409799  543705 memory.go:184] no items to output this cycle
I0321 12:19:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 12:19:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:19:13.409785  543705 memory.go:191] Add success.
I0321 12:19:13.409808  543705 cpu.go:282] Add success.
W0321 12:19:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:19:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:19:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:19:13.420320  543705 net.go:648] Add success.
I0321 12:19:13.423121  543705 net.go:770] primary dev: ETH0
I0321 12:19:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:19:13.423160  543705 net.go:698] Add success.
I0321 12:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:19:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:19:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 12:19:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:19:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 12:19:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:19:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:19:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:19:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:19:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:19:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:19:23.409779  543705 memory.go:184] no items to output this cycle
I0321 12:19:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 12:19:29.488131  543705 disk_info.go:125] begin check local disk info of client
I0321 12:19:29.490620  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:19:29.490626  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029c8c0 0xc00029c900]
E0321 12:19:33.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:19:33.409902  543705 memory.go:184] no items to output this cycle
I0321 12:19:33.409923  543705 cpu.go:275] no items to output this cycle
E0321 12:19:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:19:43.409776  543705 memory.go:191] Add success.
I0321 12:19:43.409817  543705 cpu.go:282] Add success.
I0321 12:19:43.419862  543705 net.go:648] Add success.
I0321 12:19:43.422869  543705 net.go:770] primary dev: ETH0
I0321 12:19:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:19:43.422898  543705 net.go:698] Add success.
I0321 12:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:19:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:19:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:19:53.410202  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:19:53.410222  543705 memory.go:184] no items to output this cycle
I0321 12:19:53.410248  543705 cpu.go:275] no items to output this cycle
E0321 12:20:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:20:03.409780  543705 cpu.go:275] no items to output this cycle
I0321 12:20:03.409793  543705 memory.go:184] no items to output this cycle
E0321 12:20:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:20:13.409789  543705 memory.go:191] Add success.
W0321 12:20:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 12:20:13.409820  543705 cpu.go:282] Add success.
W0321 12:20:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:20:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:20:13.420198  543705 net.go:648] Add success.
I0321 12:20:13.423319  543705 net.go:770] primary dev: ETH0
I0321 12:20:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:20:13.423346  543705 net.go:698] Add success.
I0321 12:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:20:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:20:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 12:20:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:20:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 12:20:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:20:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:20:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:20:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:20:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:20:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 12:20:23.409791  543705 memory.go:184] no items to output this cycle
I0321 12:20:29.491137  543705 disk_info.go:125] begin check local disk info of client
I0321 12:20:29.493611  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:20:29.493617  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000bbac0 0xc0000bbc00]
E0321 12:20:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:20:33.409871  543705 memory.go:184] no items to output this cycle
I0321 12:20:33.409926  543705 cpu.go:275] no items to output this cycle
E0321 12:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:20:43.409783  543705 memory.go:191] Add success.
I0321 12:20:43.409810  543705 cpu.go:282] Add success.
I0321 12:20:43.419986  543705 net.go:648] Add success.
I0321 12:20:43.422831  543705 net.go:770] primary dev: ETH0
I0321 12:20:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:20:43.422856  543705 net.go:698] Add success.
I0321 12:20:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:20:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:20:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:20:53.410354  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:20:53.410372  543705 memory.go:184] no items to output this cycle
I0321 12:20:53.410379  543705 cpu.go:275] no items to output this cycle
E0321 12:21:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:21:03.409766  543705 memory.go:184] no items to output this cycle
I0321 12:21:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:21:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:21:13.409792  543705 memory.go:191] Add success.
I0321 12:21:13.409811  543705 cpu.go:282] Add success.
W0321 12:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:21:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:21:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:21:13.420172  543705 net.go:648] Add success.
I0321 12:21:13.423065  543705 net.go:770] primary dev: ETH0
I0321 12:21:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:21:13.423090  543705 net.go:698] Add success.
I0321 12:21:13.710746  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"483563f9-2069-4e42-af24-81c3d1d516c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:21:13.710780  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:21:14.454512  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:21:14.454680  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:21:14.454753  543705 disk_worker.go:708] disk space is not compliant
W0321 12:21:14.454757  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:21:14.456192  543705 disk_worker.go:494] system disk:vda1
I0321 12:21:14.456224  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:21:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:21:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:21:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:21:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:21:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:21:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:21:23.409797  543705 memory.go:184] no items to output this cycle
I0321 12:21:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 12:21:29.494159  543705 disk_info.go:125] begin check local disk info of client
I0321 12:21:29.496634  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:21:29.496640  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465480 0xc0004654c0]
E0321 12:21:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:21:33.409792  543705 memory.go:184] no items to output this cycle
I0321 12:21:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 12:21:38.897891  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:21:38.897898  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:21:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:21:43.410804  543705 memory.go:191] Add success.
I0321 12:21:43.409802  543705 cpu.go:282] Add success.
I0321 12:21:43.420518  543705 net.go:648] Add success.
I0321 12:21:43.424051  543705 net.go:770] primary dev: ETH0
I0321 12:21:43.424065  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:21:43.424077  543705 net.go:698] Add success.
I0321 12:21:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:21:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:21:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:21:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:21:53.409793  543705 memory.go:184] no items to output this cycle
I0321 12:21:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 12:22:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:22:03.409783  543705 memory.go:184] no items to output this cycle
I0321 12:22:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:22:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:22:13.409793  543705 memory.go:191] Add success.
I0321 12:22:13.409794  543705 cpu.go:282] Add success.
W0321 12:22:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:22:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:22:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:22:13.420150  543705 net.go:648] Add success.
I0321 12:22:13.423676  543705 net.go:770] primary dev: ETH0
I0321 12:22:13.423689  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:22:13.423700  543705 net.go:698] Add success.
W0321 12:22:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:22:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 12:22:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:22:14.455905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:22:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:22:14.455919  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:22:14.456544  543705 disk_worker.go:494] system disk:vda1
I0321 12:22:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:22:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:22:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:22:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:22:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:22:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:22:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:22:16.472318  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:22:23.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:22:23.410262  543705 cpu.go:275] no items to output this cycle
I0321 12:22:23.410267  543705 memory.go:184] no items to output this cycle
I0321 12:22:29.497182  543705 disk_info.go:125] begin check local disk info of client
I0321 12:22:29.499704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:22:29.499710  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270640 0xc000270680]
E0321 12:22:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:22:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 12:22:33.409811  543705 memory.go:184] no items to output this cycle
E0321 12:22:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:22:43.409784  543705 memory.go:191] Add success.
I0321 12:22:43.409804  543705 cpu.go:282] Add success.
I0321 12:22:43.419894  543705 net.go:648] Add success.
I0321 12:22:43.422802  543705 net.go:770] primary dev: ETH0
I0321 12:22:43.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:22:43.422829  543705 net.go:698] Add success.
I0321 12:22:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:22:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:22:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:22:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:22:53.409793  543705 memory.go:184] no items to output this cycle
I0321 12:22:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 12:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:23:03.409768  543705 memory.go:184] no items to output this cycle
I0321 12:23:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 12:23:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:23:13.409782  543705 memory.go:191] Add success.
W0321 12:23:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 12:23:13.409814  543705 cpu.go:282] Add success.
W0321 12:23:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:23:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:23:13.420059  543705 net.go:648] Add success.
I0321 12:23:13.422692  543705 net.go:770] primary dev: ETH0
I0321 12:23:13.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:23:13.422726  543705 net.go:698] Add success.
I0321 12:23:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:23:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:23:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 12:23:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:23:14.456505  543705 disk_worker.go:494] system disk:vda1
I0321 12:23:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:23:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:23:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:23:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:23:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:23:23.409794  543705 memory.go:184] no items to output this cycle
I0321 12:23:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 12:23:29.500140  543705 disk_info.go:125] begin check local disk info of client
I0321 12:23:29.502656  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:23:29.502664  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039cdc0 0xc00039ce00]
E0321 12:23:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:23:33.409766  543705 memory.go:184] no items to output this cycle
I0321 12:23:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 12:23:43.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:23:43.409914  543705 cpu.go:282] Add success.
I0321 12:23:43.409924  543705 memory.go:191] Add success.
I0321 12:23:43.419739  543705 net.go:648] Add success.
I0321 12:23:43.422773  543705 net.go:770] primary dev: ETH0
I0321 12:23:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:23:43.422797  543705 net.go:698] Add success.
I0321 12:23:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:23:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:23:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:23:53.410732  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:23:53.410754  543705 memory.go:184] no items to output this cycle
I0321 12:23:53.410764  543705 cpu.go:275] no items to output this cycle
E0321 12:24:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:24:03.409766  543705 memory.go:184] no items to output this cycle
I0321 12:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 12:24:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:24:13.409824  543705 memory.go:191] Add success.
I0321 12:24:13.409827  543705 cpu.go:282] Add success.
W0321 12:24:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:24:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:24:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:24:13.420154  543705 net.go:648] Add success.
I0321 12:24:13.422648  543705 net.go:770] primary dev: ETH0
I0321 12:24:13.422660  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:24:13.422672  543705 net.go:698] Add success.
I0321 12:24:13.469431  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9dccdb65-2364-4038-af0f-68cbba2ca44c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:24:13.469466  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:24:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:24:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:24:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 12:24:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:24:14.456521  543705 disk_worker.go:494] system disk:vda1
I0321 12:24:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:24:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:24:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:24:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:24:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:24:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:24:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:24:23.409778  543705 memory.go:184] no items to output this cycle
I0321 12:24:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 12:24:29.503208  543705 disk_info.go:125] begin check local disk info of client
I0321 12:24:29.505694  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:24:29.505699  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5280 0xc0000c52c0]
E0321 12:24:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:24:33.409798  543705 memory.go:184] no items to output this cycle
I0321 12:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 12:24:38.900195  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:24:38.900201  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:24:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:24:43.410718  543705 memory.go:191] Add success.
I0321 12:24:43.409798  543705 cpu.go:282] Add success.
I0321 12:24:43.420437  543705 net.go:648] Add success.
I0321 12:24:43.423101  543705 net.go:770] primary dev: ETH0
I0321 12:24:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:24:43.423130  543705 net.go:698] Add success.
I0321 12:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:24:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:24:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:24:53.409784  543705 memory.go:184] no items to output this cycle
I0321 12:24:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 12:25:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:25:03.409787  543705 memory.go:184] no items to output this cycle
I0321 12:25:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:25:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:25:13.409791  543705 memory.go:191] Add success.
I0321 12:25:13.409796  543705 cpu.go:282] Add success.
W0321 12:25:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:25:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:25:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:25:13.420043  543705 net.go:648] Add success.
I0321 12:25:13.422991  543705 net.go:770] primary dev: ETH0
I0321 12:25:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:25:13.423016  543705 net.go:698] Add success.
I0321 12:25:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:25:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:25:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 12:25:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:25:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 12:25:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:25:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:25:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:25:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:25:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:25:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:25:23.409776  543705 memory.go:184] no items to output this cycle
I0321 12:25:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 12:25:29.506164  543705 disk_info.go:125] begin check local disk info of client
I0321 12:25:29.508634  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:25:29.508639  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e0e80 0xc0004e0ec0]
E0321 12:25:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:25:33.409794  543705 memory.go:184] no items to output this cycle
I0321 12:25:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 12:25:43.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:25:43.409983  543705 memory.go:191] Add success.
I0321 12:25:43.410008  543705 cpu.go:282] Add success.
I0321 12:25:43.419718  543705 net.go:648] Add success.
I0321 12:25:43.422407  543705 net.go:770] primary dev: ETH0
I0321 12:25:43.422419  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:25:43.422431  543705 net.go:698] Add success.
I0321 12:25:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:25:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:25:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:25:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:25:53.409822  543705 memory.go:184] no items to output this cycle
I0321 12:25:53.409828  543705 cpu.go:275] no items to output this cycle
E0321 12:26:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:26:03.409780  543705 memory.go:184] no items to output this cycle
I0321 12:26:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 12:26:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:26:13.409838  543705 memory.go:191] Add success.
I0321 12:26:13.409839  543705 cpu.go:282] Add success.
W0321 12:26:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:26:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:26:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:26:13.420232  543705 net.go:648] Add success.
I0321 12:26:13.422901  543705 net.go:770] primary dev: ETH0
I0321 12:26:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:26:13.422926  543705 net.go:698] Add success.
I0321 12:26:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:26:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:26:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0321 12:26:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:26:14.456485  543705 disk_worker.go:494] system disk:vda1
I0321 12:26:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:26:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:26:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:26:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:26:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:26:23.409805  543705 memory.go:184] no items to output this cycle
I0321 12:26:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 12:26:29.509240  543705 disk_info.go:125] begin check local disk info of client
I0321 12:26:29.511732  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:26:29.511738  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0480 0xc0002b04c0]
E0321 12:26:33.410566  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:26:33.410582  543705 memory.go:184] no items to output this cycle
I0321 12:26:33.410635  543705 cpu.go:275] no items to output this cycle
E0321 12:26:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:26:43.409790  543705 memory.go:191] Add success.
I0321 12:26:43.409808  543705 cpu.go:282] Add success.
I0321 12:26:43.420069  543705 net.go:648] Add success.
I0321 12:26:43.422756  543705 net.go:770] primary dev: ETH0
I0321 12:26:43.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:26:43.422779  543705 net.go:698] Add success.
I0321 12:26:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:26:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:26:53.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:26:53.410273  543705 cpu.go:275] no items to output this cycle
I0321 12:26:53.410272  543705 memory.go:184] no items to output this cycle
E0321 12:27:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:27:03.409816  543705 memory.go:184] no items to output this cycle
I0321 12:27:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 12:27:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:27:13.409797  543705 memory.go:191] Add success.
I0321 12:27:13.409818  543705 cpu.go:282] Add success.
W0321 12:27:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:27:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:27:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:27:13.420244  543705 net.go:648] Add success.
I0321 12:27:13.429616  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 12:27:13.429710  543705 net.go:770] primary dev: ETH0
I0321 12:27:13.429723  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:27:13.429737  543705 net.go:698] Add success.
I0321 12:27:13.453294  543705 event_worker.go:152] Polling the log file for events...
I0321 12:27:13.469444  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9697986c-2ae4-47a4-9bf0-63385dd8cb15","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:27:13.469477  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 12:27:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:27:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 12:27:14.455209  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:27:14.456077  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:27:14.456086  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:27:14.456101  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:27:14.456520  543705 disk_worker.go:494] system disk:vda1
I0321 12:27:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:27:15.456781  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:27:15.456789  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:27:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:27:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:27:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:27:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:27:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:27:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:27:23.409768  543705 memory.go:184] no items to output this cycle
I0321 12:27:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 12:27:29.512198  543705 disk_info.go:125] begin check local disk info of client
I0321 12:27:29.514672  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:27:29.514678  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e0240 0xc0004e0280]
E0321 12:27:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:27:33.409805  543705 memory.go:184] no items to output this cycle
I0321 12:27:33.409818  543705 cpu.go:275] no items to output this cycle
I0321 12:27:38.901737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:27:38.901743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:27:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:27:43.410673  543705 memory.go:191] Add success.
I0321 12:27:43.409810  543705 cpu.go:282] Add success.
I0321 12:27:43.420362  543705 net.go:648] Add success.
I0321 12:27:43.423169  543705 net.go:770] primary dev: ETH0
I0321 12:27:43.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:27:43.423197  543705 net.go:698] Add success.
I0321 12:27:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:27:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:27:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:27:53.410197  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:27:53.410215  543705 memory.go:184] no items to output this cycle
I0321 12:27:53.410227  543705 cpu.go:275] no items to output this cycle
E0321 12:28:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:28:03.409775  543705 memory.go:184] no items to output this cycle
I0321 12:28:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 12:28:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:28:13.409813  543705 memory.go:191] Add success.
I0321 12:28:13.409822  543705 cpu.go:282] Add success.
W0321 12:28:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:28:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:28:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:28:13.420173  543705 net.go:648] Add success.
I0321 12:28:13.422913  543705 net.go:770] primary dev: ETH0
I0321 12:28:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:28:13.422939  543705 net.go:698] Add success.
I0321 12:28:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:28:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:28:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 12:28:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:28:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 12:28:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:28:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:28:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:28:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:28:23.409795  543705 memory.go:184] no items to output this cycle
I0321 12:28:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 12:28:29.515215  543705 disk_info.go:125] begin check local disk info of client
I0321 12:28:29.517732  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:28:29.517738  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582900 0xc000582940]
E0321 12:28:33.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:28:33.409896  543705 memory.go:184] no items to output this cycle
I0321 12:28:33.409934  543705 cpu.go:275] no items to output this cycle
E0321 12:28:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:28:43.409785  543705 memory.go:191] Add success.
I0321 12:28:43.409815  543705 cpu.go:282] Add success.
I0321 12:28:43.419872  543705 net.go:648] Add success.
I0321 12:28:43.423099  543705 net.go:770] primary dev: ETH0
I0321 12:28:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:28:43.423124  543705 net.go:698] Add success.
I0321 12:28:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:28:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:28:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:28:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:28:53.409790  543705 memory.go:184] no items to output this cycle
I0321 12:28:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 12:29:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:29:03.409787  543705 memory.go:184] no items to output this cycle
I0321 12:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 12:29:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:29:13.409790  543705 memory.go:191] Add success.
I0321 12:29:13.409793  543705 cpu.go:282] Add success.
W0321 12:29:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:29:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:29:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:29:13.420296  543705 net.go:648] Add success.
I0321 12:29:13.423154  543705 net.go:770] primary dev: ETH0
I0321 12:29:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:29:13.423184  543705 net.go:698] Add success.
I0321 12:29:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:29:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:29:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 12:29:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:29:14.456828  543705 disk_worker.go:494] system disk:vda1
I0321 12:29:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:29:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:29:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:29:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:29:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:29:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:29:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:29:23.409799  543705 memory.go:184] no items to output this cycle
I0321 12:29:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 12:29:29.518234  543705 disk_info.go:125] begin check local disk info of client
I0321 12:29:29.520753  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:29:29.520759  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee480 0xc0003ee4c0]
E0321 12:29:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:29:33.409769  543705 memory.go:184] no items to output this cycle
I0321 12:29:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 12:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:29:43.409785  543705 memory.go:191] Add success.
I0321 12:29:43.409808  543705 cpu.go:282] Add success.
I0321 12:29:43.419843  543705 net.go:648] Add success.
I0321 12:29:43.423046  543705 net.go:770] primary dev: ETH0
I0321 12:29:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:29:43.423071  543705 net.go:698] Add success.
I0321 12:29:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:29:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:29:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:29:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:29:53.409796  543705 memory.go:184] no items to output this cycle
I0321 12:29:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 12:30:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:30:03.409781  543705 memory.go:184] no items to output this cycle
I0321 12:30:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 12:30:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:30:13.409811  543705 memory.go:191] Add success.
I0321 12:30:13.409819  543705 cpu.go:282] Add success.
W0321 12:30:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:30:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:30:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:30:13.420190  543705 net.go:648] Add success.
I0321 12:30:13.422848  543705 net.go:770] primary dev: ETH0
I0321 12:30:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:30:13.422877  543705 net.go:698] Add success.
I0321 12:30:13.468733  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f9446453-ecb2-4a15-8e71-8535221a402d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:30:13.468777  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:30:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:30:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:30:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 12:30:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:30:14.456519  543705 disk_worker.go:494] system disk:vda1
I0321 12:30:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:30:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:30:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:30:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:30:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:30:23.409805  543705 memory.go:184] no items to output this cycle
I0321 12:30:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 12:30:29.521257  543705 disk_info.go:125] begin check local disk info of client
I0321 12:30:29.523834  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:30:29.523841  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 12:30:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:30:33.409775  543705 cpu.go:275] no items to output this cycle
I0321 12:30:33.409784  543705 memory.go:184] no items to output this cycle
I0321 12:30:38.904210  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:30:38.904217  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:30:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:30:43.410650  543705 memory.go:191] Add success.
I0321 12:30:43.409804  543705 cpu.go:282] Add success.
I0321 12:30:43.420334  543705 net.go:648] Add success.
I0321 12:30:43.423469  543705 net.go:770] primary dev: ETH0
I0321 12:30:43.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:30:43.423498  543705 net.go:698] Add success.
I0321 12:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:30:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:30:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:30:53.410214  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:30:53.410232  543705 memory.go:184] no items to output this cycle
I0321 12:30:53.410261  543705 cpu.go:275] no items to output this cycle
E0321 12:31:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:31:03.409776  543705 memory.go:184] no items to output this cycle
I0321 12:31:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 12:31:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:31:13.409811  543705 memory.go:191] Add success.
I0321 12:31:13.409821  543705 cpu.go:282] Add success.
W0321 12:31:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:31:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:31:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:31:13.420133  543705 net.go:648] Add success.
I0321 12:31:13.423238  543705 net.go:770] primary dev: ETH0
I0321 12:31:13.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:31:13.423263  543705 net.go:698] Add success.
I0321 12:31:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:31:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:31:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 12:31:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:31:14.456831  543705 disk_worker.go:494] system disk:vda1
I0321 12:31:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:31:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:31:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:31:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:31:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:31:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:31:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:31:23.409765  543705 memory.go:184] no items to output this cycle
I0321 12:31:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 12:31:29.524261  543705 disk_info.go:125] begin check local disk info of client
I0321 12:31:29.526758  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:31:29.526765  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003951c0 0xc000395200]
E0321 12:31:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:31:33.409790  543705 memory.go:184] no items to output this cycle
I0321 12:31:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 12:31:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:31:43.409786  543705 memory.go:191] Add success.
I0321 12:31:43.409787  543705 cpu.go:282] Add success.
I0321 12:31:43.420027  543705 net.go:648] Add success.
I0321 12:31:43.422832  543705 net.go:770] primary dev: ETH0
I0321 12:31:43.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:31:43.422861  543705 net.go:698] Add success.
I0321 12:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:31:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:31:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:31:53.409790  543705 memory.go:184] no items to output this cycle
I0321 12:31:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 12:32:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:32:03.409781  543705 memory.go:184] no items to output this cycle
I0321 12:32:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 12:32:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:32:13.409817  543705 memory.go:191] Add success.
I0321 12:32:13.409827  543705 cpu.go:282] Add success.
W0321 12:32:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:32:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:32:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:32:13.420172  543705 net.go:648] Add success.
I0321 12:32:13.423418  543705 net.go:770] primary dev: ETH0
I0321 12:32:13.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:32:13.423441  543705 net.go:698] Add success.
W0321 12:32:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:32:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 12:32:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:32:14.456904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:32:14.456913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:32:14.456919  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:32:14.456975  543705 disk_worker.go:494] system disk:vda1
I0321 12:32:14.457004  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:32:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:32:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:32:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:32:16.458000  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:32:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:32:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:32:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:32:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:32:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 12:32:23.409783  543705 memory.go:184] no items to output this cycle
I0321 12:32:29.527268  543705 disk_info.go:125] begin check local disk info of client
I0321 12:32:29.529728  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:32:29.529735  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464a00 0xc000464a40]
E0321 12:32:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:32:33.409787  543705 memory.go:184] no items to output this cycle
I0321 12:32:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 12:32:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:32:43.409779  543705 memory.go:191] Add success.
I0321 12:32:43.409803  543705 cpu.go:282] Add success.
I0321 12:32:43.419885  543705 net.go:648] Add success.
I0321 12:32:43.422694  543705 net.go:770] primary dev: ETH0
I0321 12:32:43.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:32:43.422719  543705 net.go:698] Add success.
I0321 12:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:32:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:32:53.409785  543705 memory.go:184] no items to output this cycle
I0321 12:32:53.409963  543705 cpu.go:275] no items to output this cycle
E0321 12:33:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:33:03.409786  543705 memory.go:184] no items to output this cycle
I0321 12:33:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 12:33:13.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:33:13.409911  543705 memory.go:191] Add success.
I0321 12:33:13.409917  543705 cpu.go:282] Add success.
W0321 12:33:13.409950  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:33:13.409971  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:33:13.409987  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:33:13.419759  543705 net.go:648] Add success.
I0321 12:33:13.422599  543705 net.go:770] primary dev: ETH0
I0321 12:33:13.422612  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:33:13.422623  543705 net.go:698] Add success.
I0321 12:33:13.468749  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fd745f9e-1c82-442f-a136-a34bc865bed2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:33:13.468781  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:33:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:33:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:33:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 12:33:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:33:14.456514  543705 disk_worker.go:494] system disk:vda1
I0321 12:33:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:33:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:33:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:33:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:33:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:33:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:33:23.409766  543705 memory.go:184] no items to output this cycle
I0321 12:33:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 12:33:29.530291  543705 disk_info.go:125] begin check local disk info of client
I0321 12:33:29.532755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:33:29.532761  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1480 0xc0004a14c0]
E0321 12:33:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:33:33.409797  543705 memory.go:184] no items to output this cycle
I0321 12:33:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 12:33:38.905738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:33:38.905744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:33:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:33:43.410817  543705 memory.go:191] Add success.
I0321 12:33:43.409805  543705 cpu.go:282] Add success.
I0321 12:33:43.420541  543705 net.go:648] Add success.
I0321 12:33:43.423124  543705 net.go:770] primary dev: ETH0
I0321 12:33:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:33:43.423149  543705 net.go:698] Add success.
I0321 12:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:33:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:33:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:33:53.410224  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:33:53.410243  543705 memory.go:184] no items to output this cycle
I0321 12:33:53.410248  543705 cpu.go:275] no items to output this cycle
I0321 12:34:03.409937  543705 cpu.go:275] no items to output this cycle
E0321 12:34:03.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:34:03.410021  543705 memory.go:184] no items to output this cycle
E0321 12:34:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:34:13.409795  543705 memory.go:191] Add success.
W0321 12:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 12:34:13.409825  543705 cpu.go:282] Add success.
W0321 12:34:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:34:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:34:13.420278  543705 net.go:648] Add success.
I0321 12:34:13.423054  543705 net.go:770] primary dev: ETH0
I0321 12:34:13.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:34:13.423079  543705 net.go:698] Add success.
I0321 12:34:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:34:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:34:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 12:34:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:34:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 12:34:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:34:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:34:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:34:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:34:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:34:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:34:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:34:23.409794  543705 memory.go:184] no items to output this cycle
I0321 12:34:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 12:34:29.533309  543705 disk_info.go:125] begin check local disk info of client
I0321 12:34:29.535832  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:34:29.535838  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5680 0xc0000c56c0]
E0321 12:34:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:34:33.409760  543705 memory.go:184] no items to output this cycle
I0321 12:34:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 12:34:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:34:43.409825  543705 memory.go:191] Add success.
I0321 12:34:43.409835  543705 cpu.go:282] Add success.
I0321 12:34:43.419882  543705 net.go:648] Add success.
I0321 12:34:43.422696  543705 net.go:770] primary dev: ETH0
I0321 12:34:43.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:34:43.422733  543705 net.go:698] Add success.
I0321 12:34:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:34:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 12:34:53.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:34:53.409834  543705 memory.go:184] no items to output this cycle
E0321 12:35:03.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:35:03.409878  543705 memory.go:184] no items to output this cycle
I0321 12:35:03.409961  543705 cpu.go:275] no items to output this cycle
E0321 12:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:35:13.409787  543705 memory.go:191] Add success.
I0321 12:35:13.409806  543705 cpu.go:282] Add success.
W0321 12:35:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:35:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:35:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:35:13.420110  543705 net.go:648] Add success.
I0321 12:35:13.423032  543705 net.go:770] primary dev: ETH0
I0321 12:35:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:35:13.423057  543705 net.go:698] Add success.
I0321 12:35:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:35:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:35:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 12:35:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:35:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 12:35:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:35:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:35:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:35:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:35:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:35:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:35:23.409800  543705 memory.go:184] no items to output this cycle
I0321 12:35:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 12:35:29.536321  543705 disk_info.go:125] begin check local disk info of client
I0321 12:35:29.538855  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:35:29.538861  543705 disk_info.go:196] parse disk info done, disk is : [0xc000261400 0xc000261440]
E0321 12:35:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:35:33.409773  543705 memory.go:184] no items to output this cycle
I0321 12:35:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 12:35:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:35:43.409789  543705 memory.go:191] Add success.
I0321 12:35:43.409810  543705 cpu.go:282] Add success.
I0321 12:35:43.419914  543705 net.go:648] Add success.
I0321 12:35:43.422897  543705 net.go:770] primary dev: ETH0
I0321 12:35:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:35:43.422923  543705 net.go:698] Add success.
I0321 12:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:35:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:35:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:35:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:35:53.409793  543705 memory.go:184] no items to output this cycle
I0321 12:35:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 12:36:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:36:03.409796  543705 memory.go:184] no items to output this cycle
I0321 12:36:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 12:36:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:36:13.409792  543705 memory.go:191] Add success.
W0321 12:36:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:36:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:36:13.409827  543705 cpu.go:282] Add success.
I0321 12:36:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:36:13.420118  543705 net.go:648] Add success.
I0321 12:36:13.423027  543705 net.go:770] primary dev: ETH0
I0321 12:36:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:36:13.423051  543705 net.go:698] Add success.
I0321 12:36:13.619152  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c1a433d1-5837-42c6-96ae-dfaecf9341ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:36:13.619183  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:36:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:36:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:36:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 12:36:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:36:14.456533  543705 disk_worker.go:494] system disk:vda1
I0321 12:36:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:36:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:36:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:36:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:36:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:36:23.410231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:36:23.410246  543705 memory.go:184] no items to output this cycle
I0321 12:36:23.410275  543705 cpu.go:275] no items to output this cycle
I0321 12:36:29.539340  543705 disk_info.go:125] begin check local disk info of client
I0321 12:36:29.541849  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:36:29.541856  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352100 0xc000352180]
E0321 12:36:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:36:33.409797  543705 memory.go:184] no items to output this cycle
I0321 12:36:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 12:36:38.905884  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:36:38.905891  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:36:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:36:43.410715  543705 memory.go:191] Add success.
I0321 12:36:43.409814  543705 cpu.go:282] Add success.
I0321 12:36:43.420411  543705 net.go:648] Add success.
I0321 12:36:43.423119  543705 net.go:770] primary dev: ETH0
I0321 12:36:43.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:36:43.423145  543705 net.go:698] Add success.
I0321 12:36:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:36:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:36:53.410259  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:36:53.410282  543705 memory.go:184] no items to output this cycle
I0321 12:36:53.410262  543705 cpu.go:275] no items to output this cycle
E0321 12:37:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:37:03.409780  543705 memory.go:184] no items to output this cycle
I0321 12:37:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:37:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:37:13.409794  543705 memory.go:191] Add success.
I0321 12:37:13.409795  543705 cpu.go:282] Add success.
W0321 12:37:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:37:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:37:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:37:13.420052  543705 net.go:648] Add success.
I0321 12:37:13.422666  543705 net.go:770] primary dev: ETH0
I0321 12:37:13.422679  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:37:13.422691  543705 net.go:698] Add success.
I0321 12:37:13.453252  543705 event_worker.go:152] Polling the log file for events...
W0321 12:37:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:37:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0321 12:37:14.455240  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:37:14.455926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:37:14.455935  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:37:14.455941  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:37:14.456843  543705 disk_worker.go:494] system disk:vda1
I0321 12:37:14.456873  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:37:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:37:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:37:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:37:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:37:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:37:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:37:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:37:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:37:23.409761  543705 memory.go:184] no items to output this cycle
I0321 12:37:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 12:37:29.542351  543705 disk_info.go:125] begin check local disk info of client
I0321 12:37:29.544808  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:37:29.544814  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e0d40 0xc0004e0d80]
E0321 12:37:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:37:33.409788  543705 memory.go:184] no items to output this cycle
I0321 12:37:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 12:37:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:37:43.409782  543705 memory.go:191] Add success.
I0321 12:37:43.409802  543705 cpu.go:282] Add success.
I0321 12:37:43.419894  543705 net.go:648] Add success.
I0321 12:37:43.422734  543705 net.go:770] primary dev: ETH0
I0321 12:37:43.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:37:43.422761  543705 net.go:698] Add success.
I0321 12:37:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:37:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:37:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:37:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:37:53.410249  543705 memory.go:184] no items to output this cycle
I0321 12:37:53.410249  543705 cpu.go:275] no items to output this cycle
E0321 12:38:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:38:03.409773  543705 memory.go:184] no items to output this cycle
I0321 12:38:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 12:38:13.410080  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:38:13.410107  543705 memory.go:191] Add success.
I0321 12:38:13.410131  543705 cpu.go:282] Add success.
W0321 12:38:13.410137  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:38:13.410150  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:38:13.410154  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:38:13.419708  543705 net.go:648] Add success.
I0321 12:38:13.422351  543705 net.go:770] primary dev: ETH0
I0321 12:38:13.422363  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:38:13.422375  543705 net.go:698] Add success.
I0321 12:38:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:38:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:38:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 12:38:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:38:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 12:38:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:38:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:38:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:38:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:38:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:38:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:38:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:38:23.409767  543705 memory.go:184] no items to output this cycle
I0321 12:38:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 12:38:29.545363  543705 disk_info.go:125] begin check local disk info of client
I0321 12:38:29.547878  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:38:29.547884  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033e380 0xc00033e3c0]
E0321 12:38:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:38:33.409794  543705 memory.go:184] no items to output this cycle
I0321 12:38:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 12:38:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:38:43.409776  543705 memory.go:191] Add success.
I0321 12:38:43.409805  543705 cpu.go:282] Add success.
I0321 12:38:43.419993  543705 net.go:648] Add success.
I0321 12:38:43.422920  543705 net.go:770] primary dev: ETH0
I0321 12:38:43.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:38:43.422958  543705 net.go:698] Add success.
I0321 12:38:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:38:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:38:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:38:53.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:38:53.410267  543705 memory.go:184] no items to output this cycle
I0321 12:38:53.410281  543705 cpu.go:275] no items to output this cycle
E0321 12:39:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:39:03.409807  543705 memory.go:184] no items to output this cycle
I0321 12:39:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 12:39:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:39:13.409817  543705 memory.go:191] Add success.
I0321 12:39:13.409822  543705 cpu.go:282] Add success.
W0321 12:39:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:39:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:39:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:39:13.419722  543705 net.go:648] Add success.
I0321 12:39:13.422864  543705 net.go:770] primary dev: ETH0
I0321 12:39:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:39:13.422892  543705 net.go:698] Add success.
I0321 12:39:13.468755  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7875c832-87c4-4858-833c-86105729fb65","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:39:13.468785  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:39:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:39:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:39:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 12:39:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:39:14.456613  543705 disk_worker.go:494] system disk:vda1
I0321 12:39:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:39:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:39:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:39:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:39:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:39:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:39:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:39:23.409796  543705 memory.go:184] no items to output this cycle
I0321 12:39:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 12:39:29.548375  543705 disk_info.go:125] begin check local disk info of client
I0321 12:39:29.550845  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:39:29.550851  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260680 0xc0002606c0]
E0321 12:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:39:33.409774  543705 memory.go:184] no items to output this cycle
I0321 12:39:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 12:39:38.908241  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:39:38.908249  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:39:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:39:43.410854  543705 memory.go:191] Add success.
I0321 12:39:43.409796  543705 cpu.go:282] Add success.
I0321 12:39:43.420552  543705 net.go:648] Add success.
I0321 12:39:43.423388  543705 net.go:770] primary dev: ETH0
I0321 12:39:43.423401  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:39:43.423413  543705 net.go:698] Add success.
I0321 12:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:39:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:39:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:39:53.410343  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:39:53.410360  543705 memory.go:184] no items to output this cycle
I0321 12:39:53.410362  543705 cpu.go:275] no items to output this cycle
E0321 12:40:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:40:03.409772  543705 memory.go:184] no items to output this cycle
I0321 12:40:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 12:40:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:40:13.409814  543705 memory.go:191] Add success.
I0321 12:40:13.409825  543705 cpu.go:282] Add success.
W0321 12:40:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:40:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:40:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:40:13.420178  543705 net.go:648] Add success.
I0321 12:40:13.423098  543705 net.go:770] primary dev: ETH0
I0321 12:40:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:40:13.423129  543705 net.go:698] Add success.
I0321 12:40:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:40:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:40:14.455376  543705 disk_worker.go:708] disk space is not compliant
W0321 12:40:14.455380  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:40:14.457028  543705 disk_worker.go:494] system disk:vda1
I0321 12:40:14.457055  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:40:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:40:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:40:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:40:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:40:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:40:23.409793  543705 memory.go:184] no items to output this cycle
I0321 12:40:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 12:40:29.551390  543705 disk_info.go:125] begin check local disk info of client
I0321 12:40:29.553883  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:40:29.553889  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049dac0 0xc00049db00]
E0321 12:40:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:40:33.409777  543705 memory.go:184] no items to output this cycle
I0321 12:40:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 12:40:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:40:43.409782  543705 memory.go:191] Add success.
I0321 12:40:43.409801  543705 cpu.go:282] Add success.
I0321 12:40:43.419903  543705 net.go:648] Add success.
I0321 12:40:43.423295  543705 net.go:770] primary dev: ETH0
I0321 12:40:43.423310  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:40:43.423324  543705 net.go:698] Add success.
I0321 12:40:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:40:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:40:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:40:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:40:53.409803  543705 memory.go:184] no items to output this cycle
I0321 12:40:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 12:41:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:41:03.409776  543705 memory.go:184] no items to output this cycle
I0321 12:41:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 12:41:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:41:13.409817  543705 memory.go:191] Add success.
I0321 12:41:13.409825  543705 cpu.go:282] Add success.
W0321 12:41:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:41:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:41:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:41:13.420310  543705 net.go:648] Add success.
I0321 12:41:13.423224  543705 net.go:770] primary dev: ETH0
I0321 12:41:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:41:13.423250  543705 net.go:698] Add success.
I0321 12:41:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:41:14.455365  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:41:14.455376  543705 disk_worker.go:708] disk space is not compliant
W0321 12:41:14.455383  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:41:14.457549  543705 disk_worker.go:494] system disk:vda1
I0321 12:41:14.457591  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:41:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:41:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:41:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:41:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:41:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:41:23.409762  543705 memory.go:184] no items to output this cycle
I0321 12:41:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 12:41:29.554405  543705 disk_info.go:125] begin check local disk info of client
I0321 12:41:29.556875  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:41:29.556881  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b12c0 0xc0004b1300]
E0321 12:41:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:41:33.409789  543705 memory.go:184] no items to output this cycle
I0321 12:41:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 12:41:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:41:43.409782  543705 memory.go:191] Add success.
I0321 12:41:43.409805  543705 cpu.go:282] Add success.
I0321 12:41:43.419849  543705 net.go:648] Add success.
I0321 12:41:43.423046  543705 net.go:770] primary dev: ETH0
I0321 12:41:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:41:43.423077  543705 net.go:698] Add success.
I0321 12:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:41:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:41:53.410322  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:41:53.410342  543705 memory.go:184] no items to output this cycle
I0321 12:41:53.410378  543705 cpu.go:275] no items to output this cycle
E0321 12:42:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:42:03.409798  543705 memory.go:184] no items to output this cycle
I0321 12:42:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 12:42:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:42:13.409820  543705 memory.go:191] Add success.
I0321 12:42:13.409826  543705 cpu.go:282] Add success.
W0321 12:42:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:42:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:42:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:42:13.420132  543705 net.go:648] Add success.
I0321 12:42:13.422944  543705 net.go:770] primary dev: ETH0
I0321 12:42:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:42:13.422972  543705 net.go:698] Add success.
I0321 12:42:13.464029  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae95b731-4bc6-486b-ae24-3c67896c61cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:42:13.464062  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 12:42:14.455411  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:42:14.455629  543705 disk_worker.go:708] disk space is not compliant
W0321 12:42:14.455635  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:42:14.456327  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:42:14.456336  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:42:14.456341  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:42:14.457996  543705 disk_worker.go:494] system disk:vda1
I0321 12:42:14.458038  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:42:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:42:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:42:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:42:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:42:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:42:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:42:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:42:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:42:23.409778  543705 memory.go:184] no items to output this cycle
I0321 12:42:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 12:42:29.557423  543705 disk_info.go:125] begin check local disk info of client
I0321 12:42:29.559950  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:42:29.559959  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326780 0xc0003267c0]
E0321 12:42:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:42:33.409796  543705 memory.go:184] no items to output this cycle
I0321 12:42:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 12:42:38.909729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:42:38.909735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:42:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:42:43.410735  543705 memory.go:191] Add success.
I0321 12:42:43.409803  543705 cpu.go:282] Add success.
I0321 12:42:43.420393  543705 net.go:648] Add success.
I0321 12:42:43.423318  543705 net.go:770] primary dev: ETH0
I0321 12:42:43.423331  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:42:43.423344  543705 net.go:698] Add success.
I0321 12:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:42:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:42:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:42:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:42:53.409817  543705 memory.go:184] no items to output this cycle
I0321 12:42:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 12:43:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:43:03.409781  543705 memory.go:184] no items to output this cycle
I0321 12:43:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 12:43:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:43:13.409781  543705 memory.go:191] Add success.
I0321 12:43:13.409795  543705 cpu.go:282] Add success.
W0321 12:43:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:43:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:43:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:43:13.420157  543705 net.go:648] Add success.
I0321 12:43:13.422993  543705 net.go:770] primary dev: ETH0
I0321 12:43:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:43:13.423020  543705 net.go:698] Add success.
I0321 12:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:43:14.455254  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:43:14.455322  543705 disk_worker.go:708] disk space is not compliant
W0321 12:43:14.455325  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:43:14.457560  543705 disk_worker.go:494] system disk:vda1
I0321 12:43:14.457601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:43:15.455000  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:43:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:43:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:43:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:43:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:43:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:43:23.409762  543705 memory.go:184] no items to output this cycle
I0321 12:43:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 12:43:29.560442  543705 disk_info.go:125] begin check local disk info of client
I0321 12:43:29.562964  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:43:29.562970  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae4c0 0xc0003ae500]
E0321 12:43:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:43:33.409796  543705 memory.go:184] no items to output this cycle
I0321 12:43:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 12:43:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:43:43.409809  543705 memory.go:191] Add success.
I0321 12:43:43.409817  543705 cpu.go:282] Add success.
I0321 12:43:43.419847  543705 net.go:648] Add success.
I0321 12:43:43.422477  543705 net.go:770] primary dev: ETH0
I0321 12:43:43.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:43:43.422502  543705 net.go:698] Add success.
I0321 12:43:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:43:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:43:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:43:53.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:43:53.409868  543705 memory.go:184] no items to output this cycle
I0321 12:43:53.409870  543705 cpu.go:275] no items to output this cycle
E0321 12:44:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:44:03.409800  543705 memory.go:184] no items to output this cycle
I0321 12:44:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 12:44:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:44:13.409790  543705 memory.go:191] Add success.
I0321 12:44:13.409809  543705 cpu.go:282] Add success.
W0321 12:44:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:44:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:44:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:44:13.420136  543705 net.go:648] Add success.
I0321 12:44:13.422821  543705 net.go:770] primary dev: ETH0
I0321 12:44:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:44:13.422847  543705 net.go:698] Add success.
I0321 12:44:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:44:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:44:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 12:44:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:44:14.457001  543705 disk_worker.go:494] system disk:vda1
I0321 12:44:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:44:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:44:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:44:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:44:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:44:23.409778  543705 memory.go:184] no items to output this cycle
I0321 12:44:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 12:44:29.563470  543705 disk_info.go:125] begin check local disk info of client
I0321 12:44:29.565971  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:44:29.565977  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4180 0xc0000c41c0]
E0321 12:44:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:44:33.409799  543705 memory.go:184] no items to output this cycle
I0321 12:44:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 12:44:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:44:43.409820  543705 memory.go:191] Add success.
I0321 12:44:43.409826  543705 cpu.go:282] Add success.
I0321 12:44:43.419912  543705 net.go:648] Add success.
I0321 12:44:43.422684  543705 net.go:770] primary dev: ETH0
I0321 12:44:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:44:43.422716  543705 net.go:698] Add success.
I0321 12:44:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:44:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:44:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:44:53.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:44:53.409859  543705 memory.go:184] no items to output this cycle
I0321 12:44:53.409886  543705 cpu.go:275] no items to output this cycle
E0321 12:45:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:45:03.409782  543705 memory.go:184] no items to output this cycle
I0321 12:45:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 12:45:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:45:13.409813  543705 memory.go:191] Add success.
I0321 12:45:13.409819  543705 cpu.go:282] Add success.
W0321 12:45:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:45:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:45:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:45:13.420100  543705 net.go:648] Add success.
I0321 12:45:13.423125  543705 net.go:770] primary dev: ETH0
I0321 12:45:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:45:13.423149  543705 net.go:698] Add success.
I0321 12:45:13.464169  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e2dea8b0-9876-4eed-9054-4eaf944360b9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:45:13.464200  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:45:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:45:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:45:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 12:45:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:45:14.456731  543705 disk_worker.go:494] system disk:vda1
I0321 12:45:14.456759  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:45:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:45:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:45:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:45:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:45:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:45:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:45:23.409771  543705 memory.go:184] no items to output this cycle
I0321 12:45:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 12:45:29.566463  543705 disk_info.go:125] begin check local disk info of client
I0321 12:45:29.568980  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:45:29.568986  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac40 0xc00007ac80]
E0321 12:45:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:45:33.409793  543705 memory.go:184] no items to output this cycle
I0321 12:45:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 12:45:38.912245  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:45:38.912251  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:45:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:45:43.410722  543705 memory.go:191] Add success.
I0321 12:45:43.409804  543705 cpu.go:282] Add success.
I0321 12:45:43.420408  543705 net.go:648] Add success.
I0321 12:45:43.423125  543705 net.go:770] primary dev: ETH0
I0321 12:45:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:45:43.423153  543705 net.go:698] Add success.
I0321 12:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:45:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:45:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:45:53.409801  543705 cpu.go:275] no items to output this cycle
I0321 12:45:53.409816  543705 memory.go:184] no items to output this cycle
E0321 12:46:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:46:03.409790  543705 memory.go:184] no items to output this cycle
I0321 12:46:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 12:46:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:46:13.409792  543705 memory.go:191] Add success.
I0321 12:46:13.409795  543705 cpu.go:282] Add success.
W0321 12:46:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:46:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:46:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:46:13.420160  543705 net.go:648] Add success.
I0321 12:46:13.423160  543705 net.go:770] primary dev: ETH0
I0321 12:46:13.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:46:13.423340  543705 net.go:698] Add success.
I0321 12:46:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:46:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:46:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 12:46:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:46:14.456559  543705 disk_worker.go:494] system disk:vda1
I0321 12:46:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:46:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:46:16.458036  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:46:16.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:46:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:46:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:46:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:46:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 12:46:23.409784  543705 memory.go:184] no items to output this cycle
I0321 12:46:29.569485  543705 disk_info.go:125] begin check local disk info of client
I0321 12:46:29.572062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:46:29.572068  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cb8c0 0xc0004cb900]
E0321 12:46:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:46:33.409778  543705 memory.go:184] no items to output this cycle
I0321 12:46:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 12:46:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:46:43.409784  543705 memory.go:191] Add success.
I0321 12:46:43.409811  543705 cpu.go:282] Add success.
I0321 12:46:43.419941  543705 net.go:648] Add success.
I0321 12:46:43.422681  543705 net.go:770] primary dev: ETH0
I0321 12:46:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:46:43.422707  543705 net.go:698] Add success.
I0321 12:46:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:46:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:46:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:46:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:46:53.409800  543705 memory.go:184] no items to output this cycle
I0321 12:46:53.409866  543705 cpu.go:275] no items to output this cycle
E0321 12:47:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:47:03.409819  543705 memory.go:184] no items to output this cycle
I0321 12:47:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 12:47:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:47:13.409812  543705 memory.go:191] Add success.
I0321 12:47:13.409826  543705 cpu.go:282] Add success.
W0321 12:47:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:47:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:47:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:47:13.420544  543705 net.go:648] Add success.
I0321 12:47:13.423291  543705 net.go:770] primary dev: ETH0
I0321 12:47:13.423304  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:47:13.423480  543705 net.go:698] Add success.
I0321 12:47:13.452801  543705 event_worker.go:152] Polling the log file for events...
W0321 12:47:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 12:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:47:14.456855  543705 disk_worker.go:494] system disk:vda1
I0321 12:47:14.456895  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:47:14.457171  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:47:14.457179  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:47:14.457183  543705 custom_config.go:64] query custom config with name: gpu
E0321 12:47:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:47:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:47:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:47:16.457991  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:47:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:47:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:47:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:47:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:47:23.409778  543705 memory.go:184] no items to output this cycle
I0321 12:47:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 12:47:29.572501  543705 disk_info.go:125] begin check local disk info of client
I0321 12:47:29.575027  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:47:29.575034  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462100 0xc000462140]
E0321 12:47:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:47:33.409788  543705 memory.go:184] no items to output this cycle
I0321 12:47:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 12:47:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:47:43.409795  543705 memory.go:191] Add success.
I0321 12:47:43.409841  543705 cpu.go:282] Add success.
I0321 12:47:43.420178  543705 net.go:648] Add success.
I0321 12:47:43.423250  543705 net.go:770] primary dev: ETH0
I0321 12:47:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:47:43.423280  543705 net.go:698] Add success.
I0321 12:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:47:53.410408  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:47:53.410427  543705 memory.go:184] no items to output this cycle
I0321 12:47:53.410473  543705 cpu.go:275] no items to output this cycle
E0321 12:48:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:48:03.409784  543705 memory.go:184] no items to output this cycle
I0321 12:48:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:48:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:48:13.409820  543705 memory.go:191] Add success.
I0321 12:48:13.409829  543705 cpu.go:282] Add success.
W0321 12:48:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:48:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:48:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:48:13.420065  543705 net.go:648] Add success.
I0321 12:48:13.423120  543705 net.go:770] primary dev: ETH0
I0321 12:48:13.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:48:13.423237  543705 net.go:698] Add success.
I0321 12:48:13.469377  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"45b2ffbb-6164-4044-ba59-eae16832c210","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:48:13.469410  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:48:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:48:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 12:48:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:48:14.456699  543705 disk_worker.go:494] system disk:vda1
I0321 12:48:14.456734  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:48:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:48:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:48:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:48:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:48:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:48:23.409775  543705 memory.go:184] no items to output this cycle
I0321 12:48:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 12:48:29.575511  543705 disk_info.go:125] begin check local disk info of client
I0321 12:48:29.578075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:48:29.578081  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e40 0xc0000c4e80]
E0321 12:48:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:48:33.409783  543705 cpu.go:275] no items to output this cycle
I0321 12:48:33.409783  543705 memory.go:184] no items to output this cycle
I0321 12:48:38.913736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:48:38.913743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:48:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:48:43.410601  543705 memory.go:191] Add success.
I0321 12:48:43.409804  543705 cpu.go:282] Add success.
I0321 12:48:43.420403  543705 net.go:648] Add success.
I0321 12:48:43.423023  543705 net.go:770] primary dev: ETH0
I0321 12:48:43.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:48:43.423052  543705 net.go:698] Add success.
I0321 12:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:48:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:48:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:48:53.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:48:53.409829  543705 cpu.go:275] no items to output this cycle
I0321 12:48:53.409835  543705 memory.go:184] no items to output this cycle
E0321 12:49:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:49:03.409790  543705 memory.go:184] no items to output this cycle
I0321 12:49:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 12:49:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:49:13.409816  543705 memory.go:191] Add success.
I0321 12:49:13.409827  543705 cpu.go:282] Add success.
W0321 12:49:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:49:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:49:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:49:13.420402  543705 net.go:648] Add success.
I0321 12:49:13.423408  543705 net.go:770] primary dev: ETH0
I0321 12:49:13.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:49:13.423438  543705 net.go:698] Add success.
I0321 12:49:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:49:14.455079  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:49:14.455140  543705 disk_worker.go:708] disk space is not compliant
W0321 12:49:14.455142  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:49:14.456474  543705 disk_worker.go:494] system disk:vda1
I0321 12:49:14.456517  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:49:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:49:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:49:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:49:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:49:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:49:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 12:49:23.409781  543705 memory.go:184] no items to output this cycle
I0321 12:49:29.578535  543705 disk_info.go:125] begin check local disk info of client
I0321 12:49:29.581029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:49:29.581035  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330640 0xc000330680]
E0321 12:49:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:49:33.409797  543705 memory.go:184] no items to output this cycle
I0321 12:49:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 12:49:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:49:43.409811  543705 memory.go:191] Add success.
I0321 12:49:43.409819  543705 cpu.go:282] Add success.
I0321 12:49:43.419969  543705 net.go:648] Add success.
I0321 12:49:43.423072  543705 net.go:770] primary dev: ETH0
I0321 12:49:43.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:49:43.423096  543705 net.go:698] Add success.
I0321 12:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:49:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:49:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:49:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:49:53.409780  543705 memory.go:184] no items to output this cycle
I0321 12:49:53.409843  543705 cpu.go:275] no items to output this cycle
E0321 12:50:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:50:03.409786  543705 memory.go:184] no items to output this cycle
I0321 12:50:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 12:50:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:50:13.409797  543705 memory.go:191] Add success.
I0321 12:50:13.409798  543705 cpu.go:282] Add success.
W0321 12:50:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:50:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:50:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:50:13.420482  543705 net.go:648] Add success.
I0321 12:50:13.423918  543705 net.go:770] primary dev: ETH0
I0321 12:50:13.423931  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:50:13.423943  543705 net.go:698] Add success.
I0321 12:50:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:50:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:50:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0321 12:50:14.455148  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:50:14.456464  543705 disk_worker.go:494] system disk:vda1
I0321 12:50:14.456506  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:50:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:50:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:50:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:50:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:50:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:50:23.409770  543705 memory.go:184] no items to output this cycle
I0321 12:50:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 12:50:29.581546  543705 disk_info.go:125] begin check local disk info of client
I0321 12:50:29.584106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:50:29.584112  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0480 0xc0002a04c0]
E0321 12:50:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:50:33.409779  543705 memory.go:184] no items to output this cycle
I0321 12:50:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 12:50:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:50:43.409781  543705 memory.go:191] Add success.
I0321 12:50:43.409815  543705 cpu.go:282] Add success.
I0321 12:50:43.419886  543705 net.go:648] Add success.
I0321 12:50:43.422599  543705 net.go:770] primary dev: ETH0
I0321 12:50:43.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:50:43.422626  543705 net.go:698] Add success.
I0321 12:50:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:50:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:50:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:50:53.410248  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:50:53.410266  543705 memory.go:184] no items to output this cycle
I0321 12:50:53.410269  543705 cpu.go:275] no items to output this cycle
E0321 12:51:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:51:03.409772  543705 memory.go:184] no items to output this cycle
I0321 12:51:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 12:51:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:51:13.409798  543705 memory.go:191] Add success.
I0321 12:51:13.409799  543705 cpu.go:282] Add success.
W0321 12:51:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:51:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:51:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:51:13.420250  543705 net.go:648] Add success.
I0321 12:51:13.423571  543705 net.go:770] primary dev: ETH0
I0321 12:51:13.423585  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:51:13.423597  543705 net.go:698] Add success.
I0321 12:51:13.469132  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c83b4ab5-0781-4a71-931d-b1560fcf156d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:51:13.469164  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:51:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:51:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:51:14.455139  543705 disk_worker.go:708] disk space is not compliant
W0321 12:51:14.455141  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:51:14.456516  543705 disk_worker.go:494] system disk:vda1
I0321 12:51:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:51:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:51:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:51:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:51:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:51:23.409778  543705 memory.go:184] no items to output this cycle
I0321 12:51:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 12:51:29.584564  543705 disk_info.go:125] begin check local disk info of client
I0321 12:51:29.587073  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:51:29.587080  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002489c0 0xc000248a00]
E0321 12:51:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:51:33.409795  543705 memory.go:184] no items to output this cycle
I0321 12:51:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 12:51:38.913882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:51:38.913890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:51:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:51:43.410686  543705 memory.go:191] Add success.
I0321 12:51:43.409801  543705 cpu.go:282] Add success.
I0321 12:51:43.420389  543705 net.go:648] Add success.
I0321 12:51:43.423047  543705 net.go:770] primary dev: ETH0
I0321 12:51:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:51:43.423078  543705 net.go:698] Add success.
I0321 12:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:51:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:51:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 12:51:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:51:53.409811  543705 memory.go:184] no items to output this cycle
E0321 12:52:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:52:03.409781  543705 memory.go:184] no items to output this cycle
I0321 12:52:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 12:52:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:52:13.409806  543705 memory.go:191] Add success.
I0321 12:52:13.409812  543705 cpu.go:282] Add success.
W0321 12:52:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:52:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:52:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:52:13.420241  543705 net.go:648] Add success.
I0321 12:52:13.423031  543705 net.go:770] primary dev: ETH0
I0321 12:52:13.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:52:13.423055  543705 net.go:698] Add success.
W0321 12:52:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:52:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 12:52:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:52:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:52:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:52:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:52:14.456542  543705 disk_worker.go:494] system disk:vda1
I0321 12:52:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:52:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:52:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:52:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:52:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:52:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:52:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:52:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:52:23.409777  543705 memory.go:184] no items to output this cycle
I0321 12:52:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 12:52:29.587577  543705 disk_info.go:125] begin check local disk info of client
I0321 12:52:29.590068  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:52:29.590073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae740 0xc0004ae780]
E0321 12:52:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:52:33.409787  543705 memory.go:184] no items to output this cycle
I0321 12:52:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 12:52:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:52:43.409786  543705 memory.go:191] Add success.
I0321 12:52:43.409804  543705 cpu.go:282] Add success.
I0321 12:52:43.419886  543705 net.go:648] Add success.
I0321 12:52:43.422360  543705 net.go:770] primary dev: ETH0
I0321 12:52:43.422375  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:52:43.422388  543705 net.go:698] Add success.
I0321 12:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:52:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:52:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:52:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:52:53.409783  543705 memory.go:184] no items to output this cycle
I0321 12:52:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 12:53:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:53:03.409775  543705 memory.go:184] no items to output this cycle
I0321 12:53:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 12:53:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:53:13.409819  543705 memory.go:191] Add success.
I0321 12:53:13.409819  543705 cpu.go:282] Add success.
W0321 12:53:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:53:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:53:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:53:13.420282  543705 net.go:648] Add success.
I0321 12:53:13.423121  543705 net.go:770] primary dev: ETH0
I0321 12:53:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:53:13.423145  543705 net.go:698] Add success.
I0321 12:53:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:53:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:53:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0321 12:53:14.455146  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:53:14.456471  543705 disk_worker.go:494] system disk:vda1
I0321 12:53:14.456513  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:53:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:53:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:53:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:53:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:53:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:53:23.409784  543705 memory.go:184] no items to output this cycle
I0321 12:53:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 12:53:29.590601  543705 disk_info.go:125] begin check local disk info of client
I0321 12:53:29.593061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:53:29.593068  543705 disk_info.go:196] parse disk info done, disk is : [0xc000550700 0xc000550740]
E0321 12:53:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:53:33.409782  543705 memory.go:184] no items to output this cycle
I0321 12:53:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 12:53:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:53:43.409800  543705 memory.go:191] Add success.
I0321 12:53:43.409804  543705 cpu.go:282] Add success.
I0321 12:53:43.419867  543705 net.go:648] Add success.
I0321 12:53:43.422629  543705 net.go:770] primary dev: ETH0
I0321 12:53:43.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:53:43.422654  543705 net.go:698] Add success.
I0321 12:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:53:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:53:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:53:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:53:53.409804  543705 memory.go:184] no items to output this cycle
I0321 12:53:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 12:54:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:54:03.409811  543705 memory.go:184] no items to output this cycle
I0321 12:54:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 12:54:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:54:13.409796  543705 memory.go:191] Add success.
I0321 12:54:13.409815  543705 cpu.go:282] Add success.
W0321 12:54:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:54:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:54:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:54:13.420058  543705 net.go:648] Add success.
I0321 12:54:13.422800  543705 net.go:770] primary dev: ETH0
I0321 12:54:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:54:13.422829  543705 net.go:698] Add success.
I0321 12:54:13.472097  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db6bb65e-5687-47ed-a88d-578ad47889f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:54:13.472127  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 12:54:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:54:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:54:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 12:54:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:54:14.456683  543705 disk_worker.go:494] system disk:vda1
I0321 12:54:14.456731  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:54:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:54:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:54:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:54:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:54:23.409805  543705 memory.go:184] no items to output this cycle
I0321 12:54:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 12:54:29.593601  543705 disk_info.go:125] begin check local disk info of client
I0321 12:54:29.596164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:54:29.596171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2dc0 0xc0002b2e00]
E0321 12:54:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:54:33.409773  543705 memory.go:184] no items to output this cycle
I0321 12:54:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 12:54:38.914039  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:54:38.914046  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:54:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:54:43.410641  543705 memory.go:191] Add success.
I0321 12:54:43.409818  543705 cpu.go:282] Add success.
I0321 12:54:43.420378  543705 net.go:648] Add success.
I0321 12:54:43.423039  543705 net.go:770] primary dev: ETH0
I0321 12:54:43.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:54:43.423064  543705 net.go:698] Add success.
I0321 12:54:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:54:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:54:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:54:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:54:53.409785  543705 memory.go:184] no items to output this cycle
I0321 12:54:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 12:55:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:55:03.409774  543705 memory.go:184] no items to output this cycle
I0321 12:55:03.409810  543705 cpu.go:275] no items to output this cycle
W0321 12:55:13.409712  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:55:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:55:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:55:13.409804  543705 cpu.go:282] Add success.
E0321 12:55:13.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:55:13.409829  543705 memory.go:191] Add success.
I0321 12:55:13.420063  543705 net.go:648] Add success.
I0321 12:55:13.422894  543705 net.go:770] primary dev: ETH0
I0321 12:55:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:55:13.422918  543705 net.go:698] Add success.
I0321 12:55:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:55:14.455413  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:55:14.455427  543705 disk_worker.go:708] disk space is not compliant
W0321 12:55:14.455431  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:55:14.457020  543705 disk_worker.go:494] system disk:vda1
I0321 12:55:14.457049  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:55:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:55:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:55:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:55:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:55:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:55:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:55:23.409794  543705 memory.go:184] no items to output this cycle
I0321 12:55:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 12:55:29.596626  543705 disk_info.go:125] begin check local disk info of client
I0321 12:55:29.599181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:55:29.599188  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260000 0xc000260040]
E0321 12:55:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:55:33.409764  543705 memory.go:184] no items to output this cycle
I0321 12:55:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 12:55:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:55:43.409818  543705 memory.go:191] Add success.
I0321 12:55:43.409826  543705 cpu.go:282] Add success.
I0321 12:55:43.419986  543705 net.go:648] Add success.
I0321 12:55:43.422859  543705 net.go:770] primary dev: ETH0
I0321 12:55:43.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:55:43.422886  543705 net.go:698] Add success.
I0321 12:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:55:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:55:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:55:53.410382  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:55:53.410397  543705 memory.go:184] no items to output this cycle
I0321 12:55:53.410416  543705 cpu.go:275] no items to output this cycle
E0321 12:56:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:56:03.409787  543705 memory.go:184] no items to output this cycle
I0321 12:56:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:56:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:56:13.409792  543705 memory.go:191] Add success.
I0321 12:56:13.409794  543705 cpu.go:282] Add success.
W0321 12:56:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:56:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:56:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:56:13.420069  543705 net.go:648] Add success.
I0321 12:56:13.422690  543705 net.go:770] primary dev: ETH0
I0321 12:56:13.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:56:13.422719  543705 net.go:698] Add success.
I0321 12:56:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:56:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:56:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 12:56:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:56:14.456810  543705 disk_worker.go:494] system disk:vda1
I0321 12:56:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:56:15.454989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:56:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:56:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:56:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:56:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:56:23.409765  543705 memory.go:184] no items to output this cycle
I0321 12:56:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 12:56:29.599640  543705 disk_info.go:125] begin check local disk info of client
I0321 12:56:29.602166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:56:29.602172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000566d40 0xc000566d80]
E0321 12:56:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:56:33.409793  543705 memory.go:184] no items to output this cycle
I0321 12:56:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 12:56:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:56:43.409783  543705 memory.go:191] Add success.
I0321 12:56:43.409815  543705 cpu.go:282] Add success.
I0321 12:56:43.419963  543705 net.go:648] Add success.
I0321 12:56:43.423017  543705 net.go:770] primary dev: ETH0
I0321 12:56:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:56:43.423043  543705 net.go:698] Add success.
I0321 12:56:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:56:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:56:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:56:53.409796  543705 memory.go:184] no items to output this cycle
I0321 12:56:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 12:57:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:57:03.409784  543705 memory.go:184] no items to output this cycle
I0321 12:57:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 12:57:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:57:13.409788  543705 memory.go:191] Add success.
I0321 12:57:13.409788  543705 cpu.go:282] Add success.
W0321 12:57:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:57:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:57:13.420235  543705 net.go:648] Add success.
I0321 12:57:13.423521  543705 net.go:770] primary dev: ETH0
I0321 12:57:13.423538  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:57:13.423551  543705 net.go:698] Add success.
I0321 12:57:13.429866  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 12:57:13.453053  543705 event_worker.go:152] Polling the log file for events...
I0321 12:57:13.472236  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4d6a2edb-2e0d-4f9c-94da-0c4daf8bba0c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 12:57:13.472285  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 12:57:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:57:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 12:57:14.455204  543705 disk_worker.go:728] disk inode is not compliant
E0321 12:57:14.456801  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 12:57:14.456811  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 12:57:14.456816  543705 custom_config.go:64] query custom config with name: gpu
I0321 12:57:14.456918  543705 disk_worker.go:494] system disk:vda1
I0321 12:57:14.456948  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 12:57:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 12:57:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:57:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 12:57:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 12:57:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:57:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:57:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:57:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:57:23.409795  543705 memory.go:184] no items to output this cycle
I0321 12:57:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 12:57:29.602653  543705 disk_info.go:125] begin check local disk info of client
I0321 12:57:29.605164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:57:29.605170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359100 0xc000359140]
E0321 12:57:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:57:33.409759  543705 memory.go:184] no items to output this cycle
I0321 12:57:33.409797  543705 cpu.go:275] no items to output this cycle
I0321 12:57:38.916276  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 12:57:38.916283  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 12:57:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:57:43.410889  543705 memory.go:191] Add success.
I0321 12:57:43.409792  543705 cpu.go:282] Add success.
I0321 12:57:43.420584  543705 net.go:648] Add success.
I0321 12:57:43.423357  543705 net.go:770] primary dev: ETH0
I0321 12:57:43.423369  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:57:43.423383  543705 net.go:698] Add success.
I0321 12:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:57:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:57:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:57:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:57:53.409772  543705 memory.go:184] no items to output this cycle
I0321 12:57:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 12:58:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:58:03.409804  543705 memory.go:184] no items to output this cycle
I0321 12:58:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 12:58:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:58:13.409789  543705 memory.go:191] Add success.
I0321 12:58:13.409813  543705 cpu.go:282] Add success.
W0321 12:58:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:58:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:58:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:58:13.420163  543705 net.go:648] Add success.
I0321 12:58:13.422894  543705 net.go:770] primary dev: ETH0
I0321 12:58:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:58:13.422921  543705 net.go:698] Add success.
I0321 12:58:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:58:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:58:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 12:58:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:58:14.457118  543705 disk_worker.go:494] system disk:vda1
I0321 12:58:14.457148  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:58:15.454993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:58:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:58:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:58:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:58:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:58:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:58:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 12:58:23.409789  543705 memory.go:184] no items to output this cycle
I0321 12:58:29.605675  543705 disk_info.go:125] begin check local disk info of client
I0321 12:58:29.608122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:58:29.608129  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253140 0xc000253180]
E0321 12:58:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:58:33.409788  543705 memory.go:184] no items to output this cycle
I0321 12:58:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 12:58:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:58:43.409787  543705 memory.go:191] Add success.
I0321 12:58:43.409787  543705 cpu.go:282] Add success.
I0321 12:58:43.419872  543705 net.go:648] Add success.
I0321 12:58:43.422539  543705 net.go:770] primary dev: ETH0
I0321 12:58:43.422552  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:58:43.422566  543705 net.go:698] Add success.
I0321 12:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:58:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:58:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:58:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:58:53.409768  543705 memory.go:184] no items to output this cycle
I0321 12:58:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 12:59:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:59:03.409774  543705 memory.go:184] no items to output this cycle
I0321 12:59:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 12:59:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:59:13.409838  543705 memory.go:191] Add success.
I0321 12:59:13.409841  543705 cpu.go:282] Add success.
W0321 12:59:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 12:59:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 12:59:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 12:59:13.420189  543705 net.go:648] Add success.
I0321 12:59:13.423163  543705 net.go:770] primary dev: ETH0
I0321 12:59:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:59:13.423189  543705 net.go:698] Add success.
I0321 12:59:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 12:59:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 12:59:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 12:59:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 12:59:14.458897  543705 disk_worker.go:494] system disk:vda1
I0321 12:59:14.458926  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 12:59:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 12:59:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:59:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:59:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0321 12:59:16.472484  543705 disk_local_worker.go:436] Get disk info: []
E0321 12:59:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:59:23.409768  543705 memory.go:184] no items to output this cycle
I0321 12:59:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 12:59:29.608675  543705 disk_info.go:125] begin check local disk info of client
I0321 12:59:29.611160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 12:59:29.611167  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a8c0 0xc00036a900]
E0321 12:59:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:59:33.409803  543705 memory.go:184] no items to output this cycle
I0321 12:59:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 12:59:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:59:43.409779  543705 memory.go:191] Add success.
I0321 12:59:43.409797  543705 cpu.go:282] Add success.
I0321 12:59:43.419842  543705 net.go:648] Add success.
I0321 12:59:43.423015  543705 net.go:770] primary dev: ETH0
I0321 12:59:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0321 12:59:43.423044  543705 net.go:698] Add success.
I0321 12:59:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 12:59:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 12:59:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 12:59:53.410225  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 12:59:53.410236  543705 cpu.go:275] no items to output this cycle
I0321 12:59:53.410240  543705 memory.go:184] no items to output this cycle
E0321 13:00:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:00:03.409801  543705 memory.go:184] no items to output this cycle
I0321 13:00:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 13:00:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:00:13.409778  543705 memory.go:191] Add success.
I0321 13:00:13.409800  543705 cpu.go:282] Add success.
W0321 13:00:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:00:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:00:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:00:13.420046  543705 net.go:648] Add success.
I0321 13:00:13.422805  543705 net.go:770] primary dev: ETH0
I0321 13:00:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:00:13.422835  543705 net.go:698] Add success.
I0321 13:00:13.468799  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab25858d-41e5-4f86-9e56-5be57634c1c0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:00:13.468833  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:00:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:00:14.455378  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:00:14.455391  543705 disk_worker.go:708] disk space is not compliant
W0321 13:00:14.455395  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:00:14.457019  543705 disk_worker.go:494] system disk:vda1
I0321 13:00:14.457048  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:00:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:00:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:00:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:00:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:00:23.409796  543705 memory.go:184] no items to output this cycle
I0321 13:00:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 13:00:29.611688  543705 disk_info.go:125] begin check local disk info of client
I0321 13:00:29.614177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:00:29.614183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b3500 0xc0004b3540]
E0321 13:00:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:00:33.409800  543705 memory.go:184] no items to output this cycle
I0321 13:00:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 13:00:38.917737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:00:38.917744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:00:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:00:43.410611  543705 memory.go:191] Add success.
I0321 13:00:43.409791  543705 cpu.go:282] Add success.
I0321 13:00:43.420360  543705 net.go:648] Add success.
I0321 13:00:43.423276  543705 net.go:770] primary dev: ETH0
I0321 13:00:43.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:00:43.423303  543705 net.go:698] Add success.
I0321 13:00:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:00:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:00:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:00:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:00:53.409797  543705 memory.go:184] no items to output this cycle
I0321 13:00:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 13:01:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:01:03.409780  543705 memory.go:184] no items to output this cycle
I0321 13:01:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 13:01:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:01:13.409784  543705 memory.go:191] Add success.
I0321 13:01:13.409804  543705 cpu.go:282] Add success.
W0321 13:01:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:01:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:01:13.420050  543705 net.go:648] Add success.
I0321 13:01:13.422730  543705 net.go:770] primary dev: ETH0
I0321 13:01:13.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:01:13.422755  543705 net.go:698] Add success.
I0321 13:01:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:01:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:01:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 13:01:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:01:14.456830  543705 disk_worker.go:494] system disk:vda1
I0321 13:01:14.456858  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:01:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:01:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:01:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:01:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:01:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:01:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:01:23.409799  543705 memory.go:184] no items to output this cycle
I0321 13:01:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 13:01:29.614711  543705 disk_info.go:125] begin check local disk info of client
I0321 13:01:29.617238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:01:29.617247  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374000 0xc000374040]
E0321 13:01:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:01:33.409786  543705 memory.go:184] no items to output this cycle
I0321 13:01:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 13:01:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:01:43.409780  543705 memory.go:191] Add success.
I0321 13:01:43.409809  543705 cpu.go:282] Add success.
I0321 13:01:43.419870  543705 net.go:648] Add success.
I0321 13:01:43.422739  543705 net.go:770] primary dev: ETH0
I0321 13:01:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:01:43.422765  543705 net.go:698] Add success.
I0321 13:01:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:01:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:01:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:01:53.410215  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:01:53.410230  543705 memory.go:184] no items to output this cycle
I0321 13:01:53.410257  543705 cpu.go:275] no items to output this cycle
E0321 13:02:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:02:03.409807  543705 memory.go:184] no items to output this cycle
I0321 13:02:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 13:02:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:02:13.409778  543705 memory.go:191] Add success.
W0321 13:02:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:02:13.409809  543705 cpu.go:282] Add success.
W0321 13:02:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:02:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:02:13.420139  543705 net.go:648] Add success.
I0321 13:02:13.422962  543705 net.go:770] primary dev: ETH0
I0321 13:02:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:02:13.422993  543705 net.go:698] Add success.
W0321 13:02:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:02:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 13:02:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:02:14.456448  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:02:14.456458  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:02:14.456465  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:02:14.456921  543705 disk_worker.go:494] system disk:vda1
I0321 13:02:14.456963  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:02:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:02:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:02:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:02:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:02:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:02:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:02:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:02:23.410349  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:02:23.410373  543705 memory.go:184] no items to output this cycle
I0321 13:02:23.410412  543705 cpu.go:275] no items to output this cycle
I0321 13:02:29.617676  543705 disk_info.go:125] begin check local disk info of client
I0321 13:02:29.620090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:02:29.620097  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae40 0xc00007ae80]
E0321 13:02:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:02:33.409805  543705 memory.go:184] no items to output this cycle
I0321 13:02:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 13:02:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:02:43.409787  543705 memory.go:191] Add success.
I0321 13:02:43.409807  543705 cpu.go:282] Add success.
I0321 13:02:43.419875  543705 net.go:648] Add success.
I0321 13:02:43.422770  543705 net.go:770] primary dev: ETH0
I0321 13:02:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:02:43.422800  543705 net.go:698] Add success.
I0321 13:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:02:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:02:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:02:53.409762  543705 memory.go:184] no items to output this cycle
I0321 13:02:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 13:03:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:03:03.409815  543705 memory.go:184] no items to output this cycle
I0321 13:03:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 13:03:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:03:13.409784  543705 memory.go:191] Add success.
W0321 13:03:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:03:13.409813  543705 cpu.go:282] Add success.
W0321 13:03:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:03:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:03:13.420210  543705 net.go:648] Add success.
I0321 13:03:13.423104  543705 net.go:770] primary dev: ETH0
I0321 13:03:13.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:03:13.423130  543705 net.go:698] Add success.
I0321 13:03:14.209564  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"acf4c13e-32c2-4a9a-a73f-f12e19b83275","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:03:14.209598  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:03:14.454688  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:03:14.454829  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:03:14.454888  543705 disk_worker.go:708] disk space is not compliant
W0321 13:03:14.454891  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:03:14.456246  543705 disk_worker.go:494] system disk:vda1
I0321 13:03:14.456305  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:03:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:03:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:03:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:03:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:03:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:03:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:03:23.409766  543705 memory.go:184] no items to output this cycle
I0321 13:03:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 13:03:29.620183  543705 disk_info.go:125] begin check local disk info of client
I0321 13:03:29.622617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:03:29.622624  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002985c0 0xc000298600]
E0321 13:03:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:03:33.409791  543705 memory.go:184] no items to output this cycle
I0321 13:03:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 13:03:38.920303  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:03:38.920309  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:03:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:03:43.410642  543705 memory.go:191] Add success.
I0321 13:03:43.409794  543705 cpu.go:282] Add success.
I0321 13:03:43.420401  543705 net.go:648] Add success.
I0321 13:03:43.422866  543705 net.go:770] primary dev: ETH0
I0321 13:03:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:03:43.422896  543705 net.go:698] Add success.
I0321 13:03:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:03:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:03:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:03:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:03:53.409762  543705 memory.go:184] no items to output this cycle
I0321 13:03:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 13:04:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:04:03.409789  543705 memory.go:184] no items to output this cycle
I0321 13:04:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 13:04:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:04:13.409819  543705 memory.go:191] Add success.
I0321 13:04:13.409832  543705 cpu.go:282] Add success.
W0321 13:04:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:04:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:04:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:04:13.420142  543705 net.go:648] Add success.
I0321 13:04:13.422866  543705 net.go:770] primary dev: ETH0
I0321 13:04:13.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:04:13.422896  543705 net.go:698] Add success.
I0321 13:04:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:04:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:04:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 13:04:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:04:14.456604  543705 disk_worker.go:494] system disk:vda1
I0321 13:04:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:04:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:04:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:04:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:04:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:04:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:04:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 13:04:23.409780  543705 memory.go:184] no items to output this cycle
I0321 13:04:29.622751  543705 disk_info.go:125] begin check local disk info of client
I0321 13:04:29.625200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:04:29.625207  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 13:04:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:04:33.409793  543705 memory.go:184] no items to output this cycle
I0321 13:04:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 13:04:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:04:43.409814  543705 memory.go:191] Add success.
I0321 13:04:43.409815  543705 cpu.go:282] Add success.
I0321 13:04:43.419875  543705 net.go:648] Add success.
I0321 13:04:43.422351  543705 net.go:770] primary dev: ETH0
I0321 13:04:43.422366  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:04:43.422380  543705 net.go:698] Add success.
I0321 13:04:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:04:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:04:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:04:53.409765  543705 memory.go:184] no items to output this cycle
I0321 13:04:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 13:05:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:05:03.409783  543705 memory.go:184] no items to output this cycle
I0321 13:05:03.409785  543705 cpu.go:275] no items to output this cycle
W0321 13:05:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:05:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:05:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:05:13.409795  543705 cpu.go:282] Add success.
E0321 13:05:13.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:05:13.409836  543705 memory.go:191] Add success.
I0321 13:05:13.420211  543705 net.go:648] Add success.
I0321 13:05:13.423168  543705 net.go:770] primary dev: ETH0
I0321 13:05:13.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:05:13.423192  543705 net.go:698] Add success.
I0321 13:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:05:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:05:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 13:05:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:05:14.456506  543705 disk_worker.go:494] system disk:vda1
I0321 13:05:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:05:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:05:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:05:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:05:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:05:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:05:23.409774  543705 memory.go:184] no items to output this cycle
I0321 13:05:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 13:05:29.625676  543705 disk_info.go:125] begin check local disk info of client
I0321 13:05:29.628211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:05:29.628218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003073c0 0xc000307400]
E0321 13:05:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:05:33.409802  543705 memory.go:184] no items to output this cycle
I0321 13:05:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 13:05:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:05:43.409819  543705 memory.go:191] Add success.
I0321 13:05:43.409828  543705 cpu.go:282] Add success.
I0321 13:05:43.419868  543705 net.go:648] Add success.
I0321 13:05:43.422291  543705 net.go:770] primary dev: ETH0
I0321 13:05:43.422304  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:05:43.422318  543705 net.go:698] Add success.
I0321 13:05:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:05:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:05:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:05:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:05:53.409768  543705 memory.go:184] no items to output this cycle
I0321 13:05:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 13:06:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:06:03.409776  543705 memory.go:184] no items to output this cycle
I0321 13:06:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 13:06:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:06:13.409790  543705 memory.go:191] Add success.
I0321 13:06:13.409814  543705 cpu.go:282] Add success.
W0321 13:06:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:06:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:06:13.420475  543705 net.go:648] Add success.
I0321 13:06:13.423387  543705 net.go:770] primary dev: ETH0
I0321 13:06:13.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:06:13.423442  543705 net.go:698] Add success.
I0321 13:06:13.464088  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7f3bab3-aa64-40df-b9e3-bbbed7029050","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:06:13.464123  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:06:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:06:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:06:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 13:06:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:06:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 13:06:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:06:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:06:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:06:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:06:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:06:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:06:23.409800  543705 memory.go:184] no items to output this cycle
I0321 13:06:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 13:06:29.628776  543705 disk_info.go:125] begin check local disk info of client
I0321 13:06:29.631259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:06:29.631266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f53c0 0xc0003f5400]
E0321 13:06:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:06:33.409794  543705 memory.go:184] no items to output this cycle
I0321 13:06:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 13:06:38.921735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:06:38.921743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:06:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:06:43.410634  543705 memory.go:191] Add success.
I0321 13:06:43.409797  543705 cpu.go:282] Add success.
I0321 13:06:43.420351  543705 net.go:648] Add success.
I0321 13:06:43.422915  543705 net.go:770] primary dev: ETH0
I0321 13:06:43.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:06:43.422941  543705 net.go:698] Add success.
I0321 13:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:06:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:06:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:06:53.409760  543705 memory.go:184] no items to output this cycle
I0321 13:06:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 13:07:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:07:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:07:03.409788  543705 memory.go:184] no items to output this cycle
E0321 13:07:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:07:13.409809  543705 memory.go:191] Add success.
I0321 13:07:13.409820  543705 cpu.go:282] Add success.
W0321 13:07:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:07:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:07:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:07:13.420168  543705 net.go:648] Add success.
I0321 13:07:13.423266  543705 net.go:770] primary dev: ETH0
I0321 13:07:13.423278  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:07:13.423290  543705 net.go:698] Add success.
I0321 13:07:13.452779  543705 event_worker.go:152] Polling the log file for events...
W0321 13:07:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:07:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0321 13:07:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:07:14.456931  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:07:14.456940  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:07:14.456946  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:07:14.456998  543705 disk_worker.go:494] system disk:vda1
I0321 13:07:14.457026  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:07:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:07:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:07:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:07:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:07:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:07:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:07:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:07:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:07:23.409789  543705 memory.go:184] no items to output this cycle
I0321 13:07:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 13:07:29.631790  543705 disk_info.go:125] begin check local disk info of client
I0321 13:07:29.634361  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:07:29.634370  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c86c0 0xc0003c8700]
E0321 13:07:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:07:33.409772  543705 memory.go:184] no items to output this cycle
I0321 13:07:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 13:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:07:43.409788  543705 memory.go:191] Add success.
I0321 13:07:43.409791  543705 cpu.go:282] Add success.
I0321 13:07:43.419847  543705 net.go:648] Add success.
I0321 13:07:43.423018  543705 net.go:770] primary dev: ETH0
I0321 13:07:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:07:43.423044  543705 net.go:698] Add success.
I0321 13:07:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:07:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:07:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:07:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:07:53.409785  543705 memory.go:184] no items to output this cycle
I0321 13:07:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 13:08:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:08:03.409808  543705 memory.go:184] no items to output this cycle
I0321 13:08:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 13:08:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:08:13.409787  543705 memory.go:191] Add success.
I0321 13:08:13.409790  543705 cpu.go:282] Add success.
W0321 13:08:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:08:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:08:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:08:13.420047  543705 net.go:648] Add success.
I0321 13:08:13.422848  543705 net.go:770] primary dev: ETH0
I0321 13:08:13.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:08:13.422874  543705 net.go:698] Add success.
I0321 13:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:08:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:08:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 13:08:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:08:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 13:08:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:08:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:08:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:08:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:08:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:08:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:08:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:08:23.409769  543705 memory.go:184] no items to output this cycle
I0321 13:08:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 13:08:29.634807  543705 disk_info.go:125] begin check local disk info of client
I0321 13:08:29.637309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:08:29.637316  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bfb40 0xc0003bfb80]
E0321 13:08:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:08:33.409790  543705 memory.go:184] no items to output this cycle
I0321 13:08:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 13:08:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:08:43.409777  543705 memory.go:191] Add success.
I0321 13:08:43.409797  543705 cpu.go:282] Add success.
I0321 13:08:43.419908  543705 net.go:648] Add success.
I0321 13:08:43.422596  543705 net.go:770] primary dev: ETH0
I0321 13:08:43.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:08:43.422621  543705 net.go:698] Add success.
I0321 13:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:08:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:08:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:08:53.410219  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:08:53.410239  543705 memory.go:184] no items to output this cycle
I0321 13:08:53.410259  543705 cpu.go:275] no items to output this cycle
E0321 13:09:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:09:03.409788  543705 cpu.go:275] no items to output this cycle
I0321 13:09:03.409800  543705 memory.go:184] no items to output this cycle
E0321 13:09:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:09:13.409827  543705 memory.go:191] Add success.
I0321 13:09:13.409836  543705 cpu.go:282] Add success.
W0321 13:09:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:09:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:09:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:09:13.420095  543705 net.go:648] Add success.
I0321 13:09:13.422845  543705 net.go:770] primary dev: ETH0
I0321 13:09:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:09:13.422870  543705 net.go:698] Add success.
I0321 13:09:13.481184  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0366a072-3036-4669-bdb8-580fa303e6d2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:09:13.481220  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:09:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:09:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 13:09:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:09:14.456530  543705 disk_worker.go:494] system disk:vda1
I0321 13:09:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:09:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:09:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:09:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:09:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:09:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:09:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:09:23.409819  543705 memory.go:184] no items to output this cycle
I0321 13:09:23.409829  543705 cpu.go:275] no items to output this cycle
I0321 13:09:29.637679  543705 disk_info.go:125] begin check local disk info of client
I0321 13:09:29.640251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:09:29.640257  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374000 0xc000374040]
E0321 13:09:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:09:33.409781  543705 memory.go:184] no items to output this cycle
I0321 13:09:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 13:09:38.924323  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:09:38.924329  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:09:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:09:43.410686  543705 memory.go:191] Add success.
I0321 13:09:43.409819  543705 cpu.go:282] Add success.
I0321 13:09:43.420386  543705 net.go:648] Add success.
I0321 13:09:43.423614  543705 net.go:770] primary dev: ETH0
I0321 13:09:43.423632  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:09:43.423647  543705 net.go:698] Add success.
I0321 13:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:09:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:09:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:09:53.410234  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:09:53.410250  543705 memory.go:184] no items to output this cycle
I0321 13:09:53.410268  543705 cpu.go:275] no items to output this cycle
E0321 13:10:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:10:03.409822  543705 memory.go:184] no items to output this cycle
I0321 13:10:03.409834  543705 cpu.go:275] no items to output this cycle
E0321 13:10:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:10:13.409795  543705 memory.go:191] Add success.
I0321 13:10:13.409817  543705 cpu.go:282] Add success.
W0321 13:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:10:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:10:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:10:13.420151  543705 net.go:648] Add success.
I0321 13:10:13.422695  543705 net.go:770] primary dev: ETH0
I0321 13:10:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:10:13.422721  543705 net.go:698] Add success.
I0321 13:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:10:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:10:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 13:10:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:10:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 13:10:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:10:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:10:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:10:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:10:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:10:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:10:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:10:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 13:10:23.409793  543705 memory.go:184] no items to output this cycle
I0321 13:10:29.640839  543705 disk_info.go:125] begin check local disk info of client
I0321 13:10:29.643347  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:10:29.643354  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326440 0xc000326480]
E0321 13:10:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:10:33.409784  543705 memory.go:184] no items to output this cycle
I0321 13:10:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 13:10:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:10:43.409796  543705 memory.go:191] Add success.
I0321 13:10:43.409802  543705 cpu.go:282] Add success.
I0321 13:10:43.420016  543705 net.go:648] Add success.
I0321 13:10:43.422933  543705 net.go:770] primary dev: ETH0
I0321 13:10:43.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:10:43.422962  543705 net.go:698] Add success.
I0321 13:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:10:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:10:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:10:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:10:53.409769  543705 memory.go:184] no items to output this cycle
I0321 13:10:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 13:11:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:11:03.409803  543705 memory.go:184] no items to output this cycle
I0321 13:11:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 13:11:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:11:13.409782  543705 memory.go:191] Add success.
I0321 13:11:13.409801  543705 cpu.go:282] Add success.
W0321 13:11:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:11:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:11:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:11:13.420094  543705 net.go:648] Add success.
I0321 13:11:13.422865  543705 net.go:770] primary dev: ETH0
I0321 13:11:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:11:13.422890  543705 net.go:698] Add success.
I0321 13:11:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:11:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:11:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 13:11:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:11:14.456489  543705 disk_worker.go:494] system disk:vda1
I0321 13:11:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:11:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:11:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:11:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:11:23.409810  543705 memory.go:184] no items to output this cycle
I0321 13:11:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 13:11:29.643871  543705 disk_info.go:125] begin check local disk info of client
I0321 13:11:29.646468  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:11:29.646474  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374000 0xc000374040]
E0321 13:11:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:11:33.409772  543705 memory.go:184] no items to output this cycle
I0321 13:11:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 13:11:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:11:43.409811  543705 memory.go:191] Add success.
I0321 13:11:43.409818  543705 cpu.go:282] Add success.
I0321 13:11:43.419990  543705 net.go:648] Add success.
I0321 13:11:43.422596  543705 net.go:770] primary dev: ETH0
I0321 13:11:43.422610  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:11:43.422621  543705 net.go:698] Add success.
I0321 13:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:11:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:11:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:11:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:11:53.409796  543705 memory.go:184] no items to output this cycle
I0321 13:11:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 13:12:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:12:03.409775  543705 memory.go:184] no items to output this cycle
I0321 13:12:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 13:12:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:12:13.409808  543705 memory.go:191] Add success.
I0321 13:12:13.409812  543705 cpu.go:282] Add success.
W0321 13:12:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:12:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:12:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:12:13.420068  543705 net.go:648] Add success.
I0321 13:12:13.422973  543705 net.go:770] primary dev: ETH0
I0321 13:12:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:12:13.423003  543705 net.go:698] Add success.
I0321 13:12:13.470015  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e13abba-3144-4eee-b40f-75324d44744b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:12:13.470050  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 13:12:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:12:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 13:12:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:12:14.456806  543705 disk_worker.go:494] system disk:vda1
I0321 13:12:14.456848  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:12:14.457093  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:12:14.457101  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:12:14.457104  543705 custom_config.go:64] query custom config with name: gpu
E0321 13:12:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:12:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:12:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:12:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:12:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:12:16.458040  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:12:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:12:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:12:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 13:12:23.409793  543705 memory.go:184] no items to output this cycle
I0321 13:12:29.646881  543705 disk_info.go:125] begin check local disk info of client
I0321 13:12:29.649425  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:12:29.649431  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0321 13:12:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:12:33.409790  543705 memory.go:184] no items to output this cycle
I0321 13:12:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 13:12:38.925739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:12:38.925746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:12:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:12:43.410607  543705 memory.go:191] Add success.
I0321 13:12:43.409812  543705 cpu.go:282] Add success.
I0321 13:12:43.420308  543705 net.go:648] Add success.
I0321 13:12:43.422931  543705 net.go:770] primary dev: ETH0
I0321 13:12:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:12:43.422971  543705 net.go:698] Add success.
I0321 13:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:12:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:12:53.409764  543705 memory.go:184] no items to output this cycle
I0321 13:12:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 13:13:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:13:03.409805  543705 memory.go:184] no items to output this cycle
I0321 13:13:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 13:13:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:13:13.409789  543705 memory.go:191] Add success.
W0321 13:13:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:13:13.409822  543705 cpu.go:282] Add success.
W0321 13:13:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:13:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:13:13.420117  543705 net.go:648] Add success.
I0321 13:13:13.422692  543705 net.go:770] primary dev: ETH0
I0321 13:13:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:13:13.422722  543705 net.go:698] Add success.
I0321 13:13:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:13:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:13:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 13:13:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:13:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 13:13:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:13:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:13:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:13:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:13:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:13:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:13:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:13:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 13:13:23.409797  543705 memory.go:184] no items to output this cycle
I0321 13:13:29.649674  543705 disk_info.go:125] begin check local disk info of client
I0321 13:13:29.652221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:13:29.652228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b66c0 0xc0003b6700]
E0321 13:13:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:13:33.409770  543705 memory.go:184] no items to output this cycle
I0321 13:13:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 13:13:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:13:43.409791  543705 memory.go:191] Add success.
I0321 13:13:43.409791  543705 cpu.go:282] Add success.
I0321 13:13:43.419856  543705 net.go:648] Add success.
I0321 13:13:43.422509  543705 net.go:770] primary dev: ETH0
I0321 13:13:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:13:43.422535  543705 net.go:698] Add success.
I0321 13:13:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:13:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:13:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:13:53.409790  543705 memory.go:184] no items to output this cycle
I0321 13:13:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 13:14:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:14:03.409786  543705 cpu.go:275] no items to output this cycle
I0321 13:14:03.409790  543705 memory.go:184] no items to output this cycle
E0321 13:14:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:14:13.409814  543705 memory.go:191] Add success.
I0321 13:14:13.409819  543705 cpu.go:282] Add success.
W0321 13:14:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:14:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:14:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:14:13.420152  543705 net.go:648] Add success.
I0321 13:14:13.423447  543705 net.go:770] primary dev: ETH0
I0321 13:14:13.423461  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:14:13.423474  543705 net.go:698] Add success.
I0321 13:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:14:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:14:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 13:14:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:14:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 13:14:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:14:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:14:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:14:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:14:23.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:14:23.409880  543705 memory.go:184] no items to output this cycle
I0321 13:14:23.409917  543705 cpu.go:275] no items to output this cycle
I0321 13:14:29.652901  543705 disk_info.go:125] begin check local disk info of client
I0321 13:14:29.655504  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:14:29.655511  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492340 0xc000492380]
E0321 13:14:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:14:33.409773  543705 cpu.go:275] no items to output this cycle
I0321 13:14:33.409779  543705 memory.go:184] no items to output this cycle
E0321 13:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:14:43.409808  543705 memory.go:191] Add success.
I0321 13:14:43.409814  543705 cpu.go:282] Add success.
I0321 13:14:43.419945  543705 net.go:648] Add success.
I0321 13:14:43.422705  543705 net.go:770] primary dev: ETH0
I0321 13:14:43.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:14:43.422730  543705 net.go:698] Add success.
I0321 13:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:14:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:14:53.409801  543705 memory.go:184] no items to output this cycle
I0321 13:14:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 13:15:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:15:03.409782  543705 memory.go:184] no items to output this cycle
I0321 13:15:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 13:15:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:15:13.409813  543705 memory.go:191] Add success.
I0321 13:15:13.409821  543705 cpu.go:282] Add success.
W0321 13:15:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:15:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:15:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:15:13.420066  543705 net.go:648] Add success.
I0321 13:15:13.422725  543705 net.go:770] primary dev: ETH0
I0321 13:15:13.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:15:13.422750  543705 net.go:698] Add success.
I0321 13:15:13.571861  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fb1b547-32b9-4182-86e9-b6678221f05b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:15:13.571894  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:15:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:15:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 13:15:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:15:14.456603  543705 disk_worker.go:494] system disk:vda1
I0321 13:15:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:15:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:15:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:15:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:15:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:15:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:15:23.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:15:23.409891  543705 cpu.go:275] no items to output this cycle
I0321 13:15:23.409901  543705 memory.go:184] no items to output this cycle
I0321 13:15:29.655909  543705 disk_info.go:125] begin check local disk info of client
I0321 13:15:29.658506  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:15:29.658512  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bad40 0xc0003bad80]
E0321 13:15:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:15:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 13:15:33.409781  543705 memory.go:184] no items to output this cycle
I0321 13:15:38.928328  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:15:38.928335  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:15:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:15:43.410616  543705 memory.go:191] Add success.
I0321 13:15:43.409809  543705 cpu.go:282] Add success.
I0321 13:15:43.420345  543705 net.go:648] Add success.
I0321 13:15:43.423091  543705 net.go:770] primary dev: ETH0
I0321 13:15:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:15:43.423119  543705 net.go:698] Add success.
I0321 13:15:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:15:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:15:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:15:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:15:53.409774  543705 memory.go:184] no items to output this cycle
I0321 13:15:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 13:16:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:16:03.409773  543705 memory.go:184] no items to output this cycle
I0321 13:16:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 13:16:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:16:13.409790  543705 memory.go:191] Add success.
W0321 13:16:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:16:13.409819  543705 cpu.go:282] Add success.
W0321 13:16:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:16:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:16:13.420050  543705 net.go:648] Add success.
I0321 13:16:13.422920  543705 net.go:770] primary dev: ETH0
I0321 13:16:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:16:13.422945  543705 net.go:698] Add success.
I0321 13:16:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:16:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:16:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 13:16:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:16:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 13:16:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:16:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:16:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:16:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:16:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:16:16.472510  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:16:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:16:23.409801  543705 memory.go:184] no items to output this cycle
I0321 13:16:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 13:16:29.658936  543705 disk_info.go:125] begin check local disk info of client
I0321 13:16:29.661478  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:16:29.661485  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033e000 0xc00033e040]
E0321 13:16:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:16:33.409807  543705 memory.go:184] no items to output this cycle
I0321 13:16:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 13:16:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:16:43.409780  543705 memory.go:191] Add success.
I0321 13:16:43.409805  543705 cpu.go:282] Add success.
I0321 13:16:43.419859  543705 net.go:648] Add success.
I0321 13:16:43.422557  543705 net.go:770] primary dev: ETH0
I0321 13:16:43.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:16:43.422581  543705 net.go:698] Add success.
I0321 13:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:16:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:16:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:16:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:16:53.409771  543705 memory.go:184] no items to output this cycle
I0321 13:16:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 13:17:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:17:03.409788  543705 cpu.go:275] no items to output this cycle
I0321 13:17:03.409793  543705 memory.go:184] no items to output this cycle
E0321 13:17:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:17:13.409806  543705 memory.go:191] Add success.
I0321 13:17:13.409816  543705 cpu.go:282] Add success.
W0321 13:17:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:17:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:17:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:17:13.420325  543705 net.go:648] Add success.
I0321 13:17:13.423398  543705 net.go:770] primary dev: ETH0
I0321 13:17:13.423413  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:17:13.423427  543705 net.go:698] Add success.
I0321 13:17:13.452950  543705 event_worker.go:152] Polling the log file for events...
W0321 13:17:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:17:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0321 13:17:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:17:14.456932  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:17:14.456942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:17:14.456948  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:17:14.457001  543705 disk_worker.go:494] system disk:vda1
I0321 13:17:14.457031  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:17:15.456876  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:17:15.456889  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:17:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:17:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:17:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:17:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:17:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:17:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:17:23.409794  543705 memory.go:184] no items to output this cycle
I0321 13:17:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 13:17:29.661664  543705 disk_info.go:125] begin check local disk info of client
I0321 13:17:29.664169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:17:29.664175  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029c480 0xc00029c4c0]
E0321 13:17:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:17:33.409789  543705 memory.go:184] no items to output this cycle
I0321 13:17:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 13:17:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:17:43.409784  543705 memory.go:191] Add success.
I0321 13:17:43.409802  543705 cpu.go:282] Add success.
I0321 13:17:43.420023  543705 net.go:648] Add success.
I0321 13:17:43.422842  543705 net.go:770] primary dev: ETH0
I0321 13:17:43.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:17:43.422867  543705 net.go:698] Add success.
I0321 13:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:17:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:17:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:17:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:17:53.409759  543705 memory.go:184] no items to output this cycle
I0321 13:17:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 13:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:18:03.409778  543705 memory.go:184] no items to output this cycle
I0321 13:18:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 13:18:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:18:13.409791  543705 memory.go:191] Add success.
I0321 13:18:13.409792  543705 cpu.go:282] Add success.
W0321 13:18:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:18:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:18:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:18:13.420253  543705 net.go:648] Add success.
I0321 13:18:13.423585  543705 net.go:770] primary dev: ETH0
I0321 13:18:13.423600  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:18:13.423614  543705 net.go:698] Add success.
I0321 13:18:13.476888  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a4447a3b-67a8-4f4b-bdaa-0f8247b99c7d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:18:13.476922  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:18:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:18:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:18:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 13:18:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:18:14.456774  543705 disk_worker.go:494] system disk:vda1
I0321 13:18:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:18:15.455622  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:18:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:18:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:18:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:18:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:18:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:18:23.409767  543705 memory.go:184] no items to output this cycle
I0321 13:18:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 13:18:29.664956  543705 disk_info.go:125] begin check local disk info of client
I0321 13:18:29.667433  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:18:29.667439  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002615c0 0xc000261600]
E0321 13:18:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:18:33.409794  543705 memory.go:184] no items to output this cycle
I0321 13:18:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 13:18:38.929740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:18:38.929747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:18:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:18:43.410658  543705 memory.go:191] Add success.
I0321 13:18:43.409839  543705 cpu.go:282] Add success.
I0321 13:18:43.420338  543705 net.go:648] Add success.
I0321 13:18:43.422823  543705 net.go:770] primary dev: ETH0
I0321 13:18:43.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:18:43.422848  543705 net.go:698] Add success.
I0321 13:18:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:18:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:18:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:18:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:18:53.409769  543705 memory.go:184] no items to output this cycle
I0321 13:18:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 13:19:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:19:03.409786  543705 memory.go:184] no items to output this cycle
I0321 13:19:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 13:19:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:19:13.409793  543705 memory.go:191] Add success.
I0321 13:19:13.409794  543705 cpu.go:282] Add success.
W0321 13:19:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:19:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:19:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:19:13.420159  543705 net.go:648] Add success.
I0321 13:19:13.422834  543705 net.go:770] primary dev: ETH0
I0321 13:19:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:19:13.422861  543705 net.go:698] Add success.
I0321 13:19:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:19:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:19:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 13:19:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:19:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 13:19:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:19:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:19:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:19:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:19:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:19:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:19:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 13:19:23.409782  543705 memory.go:184] no items to output this cycle
I0321 13:19:29.667970  543705 disk_info.go:125] begin check local disk info of client
I0321 13:19:29.670507  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:19:29.670514  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032e500 0xc00032e540]
E0321 13:19:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:19:33.409768  543705 memory.go:184] no items to output this cycle
I0321 13:19:33.409776  543705 cpu.go:275] no items to output this cycle
E0321 13:19:43.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:19:43.409867  543705 memory.go:191] Add success.
I0321 13:19:43.410036  543705 cpu.go:282] Add success.
I0321 13:19:43.419709  543705 net.go:648] Add success.
I0321 13:19:43.422348  543705 net.go:770] primary dev: ETH0
I0321 13:19:43.422360  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:19:43.422372  543705 net.go:698] Add success.
I0321 13:19:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:19:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:19:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:19:53.409768  543705 memory.go:184] no items to output this cycle
I0321 13:19:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 13:20:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:20:03.409802  543705 memory.go:184] no items to output this cycle
I0321 13:20:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 13:20:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:20:13.409822  543705 memory.go:191] Add success.
I0321 13:20:13.409824  543705 cpu.go:282] Add success.
W0321 13:20:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:20:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:20:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:20:13.420223  543705 net.go:648] Add success.
I0321 13:20:13.423278  543705 net.go:770] primary dev: ETH0
I0321 13:20:13.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:20:13.423300  543705 net.go:698] Add success.
I0321 13:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:20:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:20:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 13:20:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:20:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 13:20:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:20:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:20:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:20:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:20:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:20:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:20:23.409775  543705 memory.go:184] no items to output this cycle
I0321 13:20:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 13:20:29.670983  543705 disk_info.go:125] begin check local disk info of client
I0321 13:20:29.673762  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:20:29.673768  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000be9c0 0xc0000bea00]
E0321 13:20:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:20:33.409763  543705 memory.go:184] no items to output this cycle
I0321 13:20:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 13:20:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:20:43.409902  543705 memory.go:191] Add success.
I0321 13:20:43.409934  543705 cpu.go:282] Add success.
I0321 13:20:43.419715  543705 net.go:648] Add success.
I0321 13:20:43.422414  543705 net.go:770] primary dev: ETH0
I0321 13:20:43.422427  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:20:43.422438  543705 net.go:698] Add success.
I0321 13:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:20:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:20:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:20:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:20:53.409772  543705 memory.go:184] no items to output this cycle
I0321 13:20:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 13:21:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:21:03.409776  543705 memory.go:184] no items to output this cycle
I0321 13:21:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 13:21:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:21:13.409789  543705 memory.go:191] Add success.
I0321 13:21:13.409809  543705 cpu.go:282] Add success.
W0321 13:21:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:21:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:21:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:21:13.420006  543705 net.go:770] primary dev: ETH0
I0321 13:21:13.420020  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:21:13.420032  543705 net.go:698] Add success.
I0321 13:21:13.420389  543705 net.go:648] Add success.
I0321 13:21:13.463314  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ce8003dc-ad68-48a0-ac96-1dca5f3ecaee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:21:13.463347  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:21:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:21:14.455240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:21:14.455252  543705 disk_worker.go:708] disk space is not compliant
W0321 13:21:14.455255  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:21:14.456695  543705 disk_worker.go:494] system disk:vda1
I0321 13:21:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:21:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:21:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:21:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:21:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:21:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:21:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:21:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 13:21:23.409782  543705 memory.go:184] no items to output this cycle
I0321 13:21:29.675013  543705 disk_info.go:125] begin check local disk info of client
I0321 13:21:29.677526  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:21:29.677532  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029d440 0xc00029d480]
E0321 13:21:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:21:33.409799  543705 memory.go:184] no items to output this cycle
I0321 13:21:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 13:21:38.932347  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:21:38.932354  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:21:43.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:21:43.410866  543705 memory.go:191] Add success.
I0321 13:21:43.409965  543705 cpu.go:282] Add success.
I0321 13:21:43.419708  543705 net.go:648] Add success.
I0321 13:21:43.422345  543705 net.go:770] primary dev: ETH0
I0321 13:21:43.422361  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:21:43.422375  543705 net.go:698] Add success.
I0321 13:21:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:21:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:21:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:21:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:21:53.409787  543705 memory.go:184] no items to output this cycle
I0321 13:21:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 13:22:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:22:03.409802  543705 memory.go:184] no items to output this cycle
I0321 13:22:03.409812  543705 cpu.go:275] no items to output this cycle
I0321 13:22:13.409797  543705 cpu.go:282] Add success.
E0321 13:22:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:22:13.409828  543705 memory.go:191] Add success.
W0321 13:22:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:22:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:22:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:22:13.420362  543705 net.go:648] Add success.
I0321 13:22:13.421315  543705 net.go:770] primary dev: ETH0
I0321 13:22:13.421328  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:22:13.421341  543705 net.go:698] Add success.
W0321 13:22:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:22:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 13:22:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:22:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:22:14.456952  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:22:14.456958  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:22:14.457011  543705 disk_worker.go:494] system disk:vda1
I0321 13:22:14.457043  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:22:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:22:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:22:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:22:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:22:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:22:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:22:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:22:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:22:23.409763  543705 memory.go:184] no items to output this cycle
I0321 13:22:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:22:29.677671  543705 disk_info.go:125] begin check local disk info of client
I0321 13:22:29.680164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:22:29.680170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353580 0xc0003535c0]
E0321 13:22:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:22:33.409780  543705 memory.go:184] no items to output this cycle
I0321 13:22:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 13:22:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:22:43.409812  543705 memory.go:191] Add success.
I0321 13:22:43.409821  543705 cpu.go:282] Add success.
I0321 13:22:43.420202  543705 net.go:648] Add success.
I0321 13:22:43.422883  543705 net.go:770] primary dev: ETH0
I0321 13:22:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:22:43.422909  543705 net.go:698] Add success.
I0321 13:22:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:22:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:22:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:22:53.410488  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:22:53.410504  543705 memory.go:184] no items to output this cycle
I0321 13:22:53.410524  543705 cpu.go:275] no items to output this cycle
E0321 13:23:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:23:03.409783  543705 memory.go:184] no items to output this cycle
I0321 13:23:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 13:23:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:23:13.409784  543705 memory.go:191] Add success.
I0321 13:23:13.409807  543705 cpu.go:282] Add success.
W0321 13:23:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:23:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:23:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:23:13.420140  543705 net.go:648] Add success.
I0321 13:23:13.423352  543705 net.go:770] primary dev: ETH0
I0321 13:23:13.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:23:13.423380  543705 net.go:698] Add success.
I0321 13:23:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:23:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:23:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 13:23:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:23:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 13:23:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:23:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:23:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:23:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:23:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:23:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:23:23.409778  543705 memory.go:184] no items to output this cycle
I0321 13:23:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:23:29.681038  543705 disk_info.go:125] begin check local disk info of client
I0321 13:23:29.683537  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:23:29.683543  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa40 0xc0001aaa80]
E0321 13:23:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:23:33.409759  543705 memory.go:184] no items to output this cycle
I0321 13:23:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 13:23:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:23:43.409807  543705 memory.go:191] Add success.
I0321 13:23:43.409820  543705 cpu.go:282] Add success.
I0321 13:23:43.419910  543705 net.go:648] Add success.
I0321 13:23:43.422952  543705 net.go:770] primary dev: ETH0
I0321 13:23:43.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:23:43.423167  543705 net.go:698] Add success.
I0321 13:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:23:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:23:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:23:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 13:23:53.409783  543705 memory.go:184] no items to output this cycle
E0321 13:24:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:24:03.409783  543705 memory.go:184] no items to output this cycle
I0321 13:24:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 13:24:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:24:13.409797  543705 memory.go:191] Add success.
I0321 13:24:13.409801  543705 cpu.go:282] Add success.
W0321 13:24:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:24:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:24:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:24:13.420152  543705 net.go:648] Add success.
I0321 13:24:13.422947  543705 net.go:770] primary dev: ETH0
I0321 13:24:13.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:24:13.422973  543705 net.go:698] Add success.
I0321 13:24:13.470317  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e4a6c8e-8c55-4a8f-aadf-39c09f23d70b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:24:13.470351  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:24:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:24:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:24:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 13:24:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:24:14.456728  543705 disk_worker.go:494] system disk:vda1
I0321 13:24:14.456757  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:24:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:24:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:24:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:24:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:24:23.409774  543705 cpu.go:275] no items to output this cycle
I0321 13:24:23.409785  543705 memory.go:184] no items to output this cycle
I0321 13:24:29.684058  543705 disk_info.go:125] begin check local disk info of client
I0321 13:24:29.686596  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:24:29.686602  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b80 0xc0000c5bc0]
E0321 13:24:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:24:33.409770  543705 memory.go:184] no items to output this cycle
I0321 13:24:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 13:24:38.933730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:24:38.933737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:24:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:24:43.410825  543705 memory.go:191] Add success.
I0321 13:24:43.409788  543705 cpu.go:282] Add success.
I0321 13:24:43.420326  543705 net.go:770] primary dev: ETH0
I0321 13:24:43.420341  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:24:43.420355  543705 net.go:698] Add success.
I0321 13:24:43.420717  543705 net.go:648] Add success.
I0321 13:24:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:24:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:24:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:24:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:24:53.409782  543705 cpu.go:275] no items to output this cycle
I0321 13:24:53.409784  543705 memory.go:184] no items to output this cycle
E0321 13:25:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:25:03.409784  543705 memory.go:184] no items to output this cycle
I0321 13:25:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 13:25:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:25:13.409802  543705 memory.go:191] Add success.
I0321 13:25:13.409803  543705 cpu.go:282] Add success.
W0321 13:25:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:25:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:25:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:25:13.420264  543705 net.go:648] Add success.
I0321 13:25:13.422845  543705 net.go:770] primary dev: ETH0
I0321 13:25:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:25:13.422869  543705 net.go:698] Add success.
I0321 13:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:25:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:25:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 13:25:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:25:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 13:25:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:25:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:25:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:25:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:25:23.409772  543705 memory.go:184] no items to output this cycle
I0321 13:25:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 13:25:29.687065  543705 disk_info.go:125] begin check local disk info of client
I0321 13:25:29.689599  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:25:29.689607  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3f00 0xc0002b3f40]
E0321 13:25:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:25:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 13:25:33.409787  543705 memory.go:184] no items to output this cycle
E0321 13:25:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:25:43.409786  543705 memory.go:191] Add success.
I0321 13:25:43.409816  543705 cpu.go:282] Add success.
I0321 13:25:43.419839  543705 net.go:648] Add success.
I0321 13:25:43.422461  543705 net.go:770] primary dev: ETH0
I0321 13:25:43.422480  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:25:43.422495  543705 net.go:698] Add success.
I0321 13:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:25:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:25:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:25:53.409885  543705 cpu.go:275] no items to output this cycle
I0321 13:25:53.409892  543705 memory.go:184] no items to output this cycle
E0321 13:26:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:26:03.409772  543705 memory.go:184] no items to output this cycle
I0321 13:26:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 13:26:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:26:13.409815  543705 memory.go:191] Add success.
I0321 13:26:13.409821  543705 cpu.go:282] Add success.
W0321 13:26:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:26:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:26:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:26:13.420647  543705 net.go:648] Add success.
I0321 13:26:13.424065  543705 net.go:770] primary dev: ETH0
I0321 13:26:13.424079  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:26:13.424091  543705 net.go:698] Add success.
I0321 13:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:26:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:26:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 13:26:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:26:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 13:26:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:26:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:26:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:26:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:26:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:26:23.409770  543705 memory.go:184] no items to output this cycle
I0321 13:26:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:26:29.689672  543705 disk_info.go:125] begin check local disk info of client
I0321 13:26:29.692154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:26:29.692160  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003041c0 0xc000304200]
E0321 13:26:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:26:33.409792  543705 memory.go:184] no items to output this cycle
I0321 13:26:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 13:26:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:26:43.409776  543705 memory.go:191] Add success.
I0321 13:26:43.409797  543705 cpu.go:282] Add success.
I0321 13:26:43.419847  543705 net.go:648] Add success.
I0321 13:26:43.423073  543705 net.go:770] primary dev: ETH0
I0321 13:26:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:26:43.423099  543705 net.go:698] Add success.
I0321 13:26:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:26:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:26:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:26:53.409814  543705 memory.go:184] no items to output this cycle
I0321 13:26:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 13:27:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:27:03.409812  543705 memory.go:184] no items to output this cycle
I0321 13:27:03.409822  543705 cpu.go:275] no items to output this cycle
W0321 13:27:13.409752  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0321 13:27:13.409766  543705 conf_downlod.go:89] use old conf
E0321 13:27:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:27:13.409800  543705 memory.go:191] Add success.
W0321 13:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:27:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:27:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:27:13.409846  543705 cpu.go:282] Add success.
I0321 13:27:13.420183  543705 net.go:648] Add success.
I0321 13:27:13.423013  543705 net.go:770] primary dev: ETH0
I0321 13:27:13.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:27:13.423038  543705 net.go:698] Add success.
I0321 13:27:13.429779  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 13:27:13.453010  543705 event_worker.go:152] Polling the log file for events...
I0321 13:27:13.463545  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"02b62b80-2b16-412f-b4e4-185293bb11ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:27:13.463577  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 13:27:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:27:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 13:27:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:27:14.456139  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:27:14.456148  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:27:14.456154  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:27:14.456448  543705 disk_worker.go:494] system disk:vda1
I0321 13:27:14.456476  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:27:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:27:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:27:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:27:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:27:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:27:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:27:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:27:23.409777  543705 memory.go:184] no items to output this cycle
I0321 13:27:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 13:27:29.693106  543705 disk_info.go:125] begin check local disk info of client
I0321 13:27:29.695558  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:27:29.695564  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005474c0 0xc000547500]
E0321 13:27:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:27:33.409801  543705 memory.go:184] no items to output this cycle
I0321 13:27:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 13:27:38.933874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:27:38.933881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:27:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:27:43.410888  543705 memory.go:191] Add success.
I0321 13:27:43.409826  543705 cpu.go:282] Add success.
I0321 13:27:43.420567  543705 net.go:648] Add success.
I0321 13:27:43.423348  543705 net.go:770] primary dev: ETH0
I0321 13:27:43.423362  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:27:43.423374  543705 net.go:698] Add success.
I0321 13:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:27:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:27:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:27:53.409780  543705 memory.go:184] no items to output this cycle
I0321 13:27:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 13:28:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:28:03.409797  543705 memory.go:184] no items to output this cycle
I0321 13:28:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 13:28:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:28:13.409843  543705 memory.go:191] Add success.
I0321 13:28:13.409851  543705 cpu.go:282] Add success.
W0321 13:28:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:28:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:28:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:28:13.420175  543705 net.go:648] Add success.
I0321 13:28:13.422969  543705 net.go:770] primary dev: ETH0
I0321 13:28:13.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:28:13.422998  543705 net.go:698] Add success.
I0321 13:28:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:28:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:28:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 13:28:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:28:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 13:28:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:28:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:28:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:28:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:28:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:28:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:28:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:28:23.409772  543705 memory.go:184] no items to output this cycle
I0321 13:28:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 13:28:29.696111  543705 disk_info.go:125] begin check local disk info of client
I0321 13:28:29.698605  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:28:29.698611  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d00 0xc0000c5d40]
E0321 13:28:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:28:33.409781  543705 memory.go:184] no items to output this cycle
I0321 13:28:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 13:28:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:28:43.409785  543705 memory.go:191] Add success.
I0321 13:28:43.409785  543705 cpu.go:282] Add success.
I0321 13:28:43.419883  543705 net.go:648] Add success.
I0321 13:28:43.423085  543705 net.go:770] primary dev: ETH0
I0321 13:28:43.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:28:43.423110  543705 net.go:698] Add success.
I0321 13:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:28:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:28:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:28:53.409769  543705 memory.go:184] no items to output this cycle
I0321 13:28:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 13:29:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:29:03.409778  543705 memory.go:184] no items to output this cycle
I0321 13:29:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 13:29:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:29:13.409916  543705 memory.go:191] Add success.
I0321 13:29:13.409927  543705 cpu.go:282] Add success.
W0321 13:29:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:29:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:29:13.409972  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:29:13.419732  543705 net.go:648] Add success.
I0321 13:29:13.423129  543705 net.go:770] primary dev: ETH0
I0321 13:29:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:29:13.423157  543705 net.go:698] Add success.
I0321 13:29:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:29:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:29:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0321 13:29:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:29:14.456490  543705 disk_worker.go:494] system disk:vda1
I0321 13:29:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:29:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:29:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:29:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:29:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:29:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:29:23.409769  543705 memory.go:184] no items to output this cycle
I0321 13:29:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 13:29:29.699131  543705 disk_info.go:125] begin check local disk info of client
I0321 13:29:29.701661  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:29:29.701667  543705 disk_info.go:196] parse disk info done, disk is : [0xc000573c40 0xc000573c80]
E0321 13:29:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:29:33.409774  543705 cpu.go:275] no items to output this cycle
I0321 13:29:33.409781  543705 memory.go:184] no items to output this cycle
E0321 13:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:29:43.409806  543705 memory.go:191] Add success.
I0321 13:29:43.409816  543705 cpu.go:282] Add success.
I0321 13:29:43.419871  543705 net.go:648] Add success.
I0321 13:29:43.422699  543705 net.go:770] primary dev: ETH0
I0321 13:29:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:29:43.422726  543705 net.go:698] Add success.
I0321 13:29:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:29:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:29:53.410445  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:29:53.410460  543705 memory.go:184] no items to output this cycle
I0321 13:29:53.410493  543705 cpu.go:275] no items to output this cycle
E0321 13:30:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:30:03.409778  543705 memory.go:184] no items to output this cycle
I0321 13:30:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 13:30:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:30:13.409777  543705 memory.go:191] Add success.
W0321 13:30:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:30:13.409805  543705 cpu.go:282] Add success.
W0321 13:30:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:30:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:30:13.420149  543705 net.go:648] Add success.
I0321 13:30:13.423159  543705 net.go:770] primary dev: ETH0
I0321 13:30:13.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:30:13.423198  543705 net.go:698] Add success.
I0321 13:30:13.518918  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c122a52b-0088-44f5-baf1-d8381d4bc945","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:30:13.518959  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:30:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:30:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:30:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 13:30:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:30:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 13:30:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:30:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:30:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:30:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:30:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:30:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:30:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:30:23.409769  543705 memory.go:184] no items to output this cycle
I0321 13:30:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 13:30:29.702728  543705 disk_info.go:125] begin check local disk info of client
I0321 13:30:29.705242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:30:29.705248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf40 0xc0001aaf80]
E0321 13:30:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:30:33.409769  543705 memory.go:184] no items to output this cycle
I0321 13:30:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 13:30:38.936370  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:30:38.936376  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:30:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:30:43.410684  543705 memory.go:191] Add success.
I0321 13:30:43.409817  543705 cpu.go:282] Add success.
I0321 13:30:43.420459  543705 net.go:648] Add success.
I0321 13:30:43.423114  543705 net.go:770] primary dev: ETH0
I0321 13:30:43.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:30:43.423146  543705 net.go:698] Add success.
I0321 13:30:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:30:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:30:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:30:53.409791  543705 memory.go:184] no items to output this cycle
I0321 13:30:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 13:31:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:31:03.409776  543705 memory.go:184] no items to output this cycle
I0321 13:31:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 13:31:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:31:13.409815  543705 memory.go:191] Add success.
I0321 13:31:13.409820  543705 cpu.go:282] Add success.
W0321 13:31:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:31:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:31:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:31:13.420070  543705 net.go:648] Add success.
I0321 13:31:13.422819  543705 net.go:770] primary dev: ETH0
I0321 13:31:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:31:13.422849  543705 net.go:698] Add success.
I0321 13:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:31:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:31:14.455360  543705 disk_worker.go:708] disk space is not compliant
W0321 13:31:14.455365  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:31:14.457151  543705 disk_worker.go:494] system disk:vda1
I0321 13:31:14.457179  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:31:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:31:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:31:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:31:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:31:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:31:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:31:23.409775  543705 memory.go:184] no items to output this cycle
I0321 13:31:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 13:31:29.705673  543705 disk_info.go:125] begin check local disk info of client
I0321 13:31:29.708136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:31:29.708142  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266300 0xc000266340]
E0321 13:31:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:31:33.409758  543705 memory.go:184] no items to output this cycle
I0321 13:31:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 13:31:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:31:43.409806  543705 memory.go:191] Add success.
I0321 13:31:43.409816  543705 cpu.go:282] Add success.
I0321 13:31:43.420415  543705 net.go:648] Add success.
I0321 13:31:43.423368  543705 net.go:770] primary dev: ETH0
I0321 13:31:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:31:43.423393  543705 net.go:698] Add success.
I0321 13:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:31:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:31:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:31:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:31:53.409769  543705 memory.go:184] no items to output this cycle
I0321 13:31:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 13:32:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:32:03.409811  543705 memory.go:184] no items to output this cycle
I0321 13:32:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 13:32:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:32:13.409785  543705 memory.go:191] Add success.
I0321 13:32:13.409791  543705 cpu.go:282] Add success.
W0321 13:32:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:32:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:32:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:32:13.420071  543705 net.go:648] Add success.
I0321 13:32:13.422920  543705 net.go:770] primary dev: ETH0
I0321 13:32:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:32:13.422949  543705 net.go:698] Add success.
W0321 13:32:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:32:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 13:32:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:32:14.456160  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:32:14.456168  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:32:14.456177  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:32:14.457440  543705 disk_worker.go:494] system disk:vda1
I0321 13:32:14.457470  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:32:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:32:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:32:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:32:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:32:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:32:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:32:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:32:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:32:23.409779  543705 memory.go:184] no items to output this cycle
I0321 13:32:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:32:29.709182  543705 disk_info.go:125] begin check local disk info of client
I0321 13:32:29.711689  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:32:29.711695  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004935c0 0xc000493600]
E0321 13:32:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:32:33.409758  543705 memory.go:184] no items to output this cycle
I0321 13:32:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 13:32:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:32:43.409797  543705 cpu.go:282] Add success.
I0321 13:32:43.409808  543705 memory.go:191] Add success.
I0321 13:32:43.419845  543705 net.go:648] Add success.
I0321 13:32:43.422851  543705 net.go:770] primary dev: ETH0
I0321 13:32:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:32:43.422882  543705 net.go:698] Add success.
I0321 13:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:32:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:32:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:32:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:32:53.409807  543705 memory.go:184] no items to output this cycle
I0321 13:32:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 13:33:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:33:03.409788  543705 memory.go:184] no items to output this cycle
I0321 13:33:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 13:33:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:33:13.409815  543705 memory.go:191] Add success.
I0321 13:33:13.409821  543705 cpu.go:282] Add success.
W0321 13:33:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:33:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:33:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:33:13.420137  543705 net.go:648] Add success.
I0321 13:33:13.422777  543705 net.go:770] primary dev: ETH0
I0321 13:33:13.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:33:13.422802  543705 net.go:698] Add success.
I0321 13:33:13.468393  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ba349ba-308b-46f4-b822-934119980d2f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:33:13.468426  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:33:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:33:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:33:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 13:33:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:33:14.456986  543705 disk_worker.go:494] system disk:vda1
I0321 13:33:14.457019  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:33:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:33:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:33:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:33:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:33:23.409798  543705 memory.go:184] no items to output this cycle
I0321 13:33:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 13:33:29.713205  543705 disk_info.go:125] begin check local disk info of client
I0321 13:33:29.715699  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:33:29.715705  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf240 0xc0002bf280]
E0321 13:33:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:33:33.409779  543705 memory.go:184] no items to output this cycle
I0321 13:33:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 13:33:38.937731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:33:38.937737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:33:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:33:43.410671  543705 memory.go:191] Add success.
I0321 13:33:43.409819  543705 cpu.go:282] Add success.
I0321 13:33:43.420393  543705 net.go:648] Add success.
I0321 13:33:43.423278  543705 net.go:770] primary dev: ETH0
I0321 13:33:43.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:33:43.423306  543705 net.go:698] Add success.
I0321 13:33:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:33:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:33:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:33:53.409761  543705 memory.go:184] no items to output this cycle
I0321 13:33:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 13:34:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:34:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 13:34:03.409791  543705 memory.go:184] no items to output this cycle
E0321 13:34:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:34:13.409789  543705 memory.go:191] Add success.
I0321 13:34:13.409789  543705 cpu.go:282] Add success.
W0321 13:34:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:34:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:34:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:34:13.420065  543705 net.go:648] Add success.
I0321 13:34:13.422992  543705 net.go:770] primary dev: ETH0
I0321 13:34:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:34:13.423022  543705 net.go:698] Add success.
I0321 13:34:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:34:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:34:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 13:34:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:34:14.456876  543705 disk_worker.go:494] system disk:vda1
I0321 13:34:14.456914  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:34:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:34:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:34:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:34:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:34:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:34:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:34:23.409797  543705 memory.go:184] no items to output this cycle
I0321 13:34:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 13:34:29.717226  543705 disk_info.go:125] begin check local disk info of client
I0321 13:34:29.719741  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:34:29.719747  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8540 0xc0003e8580]
E0321 13:34:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:34:33.409777  543705 memory.go:184] no items to output this cycle
I0321 13:34:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 13:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:34:43.409784  543705 cpu.go:282] Add success.
I0321 13:34:43.409794  543705 memory.go:191] Add success.
I0321 13:34:43.419822  543705 net.go:648] Add success.
I0321 13:34:43.422605  543705 net.go:770] primary dev: ETH0
I0321 13:34:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:34:43.422630  543705 net.go:698] Add success.
I0321 13:34:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:34:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:34:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:34:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:34:53.409773  543705 cpu.go:275] no items to output this cycle
I0321 13:34:53.409783  543705 memory.go:184] no items to output this cycle
E0321 13:35:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:35:03.409802  543705 memory.go:184] no items to output this cycle
I0321 13:35:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 13:35:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:35:13.409782  543705 memory.go:191] Add success.
I0321 13:35:13.409806  543705 cpu.go:282] Add success.
W0321 13:35:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:35:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:35:13.420303  543705 net.go:648] Add success.
I0321 13:35:13.423185  543705 net.go:770] primary dev: ETH0
I0321 13:35:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:35:13.423228  543705 net.go:698] Add success.
I0321 13:35:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:35:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:35:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 13:35:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:35:14.456490  543705 disk_worker.go:494] system disk:vda1
I0321 13:35:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:35:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:35:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:35:23.410383  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:35:23.410391  543705 cpu.go:275] no items to output this cycle
I0321 13:35:23.410399  543705 memory.go:184] no items to output this cycle
I0321 13:35:29.721248  543705 disk_info.go:125] begin check local disk info of client
I0321 13:35:29.723755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:35:29.723760  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004684c0 0xc000468500]
E0321 13:35:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:35:33.409789  543705 memory.go:184] no items to output this cycle
I0321 13:35:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 13:35:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:35:43.409789  543705 memory.go:191] Add success.
I0321 13:35:43.409790  543705 cpu.go:282] Add success.
I0321 13:35:43.419956  543705 net.go:648] Add success.
I0321 13:35:43.422911  543705 net.go:770] primary dev: ETH0
I0321 13:35:43.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:35:43.422940  543705 net.go:698] Add success.
I0321 13:35:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:35:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:35:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:35:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:35:53.409797  543705 memory.go:184] no items to output this cycle
I0321 13:35:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 13:36:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:36:03.409774  543705 memory.go:184] no items to output this cycle
I0321 13:36:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 13:36:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:36:13.409807  543705 memory.go:191] Add success.
I0321 13:36:13.409821  543705 cpu.go:282] Add success.
W0321 13:36:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:36:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:36:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:36:13.420125  543705 net.go:648] Add success.
I0321 13:36:13.423025  543705 net.go:770] primary dev: ETH0
I0321 13:36:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:36:13.423051  543705 net.go:698] Add success.
I0321 13:36:13.464877  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ed3d1da0-0f78-49b3-afb2-37d6947e7188","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:36:13.464912  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:36:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:36:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 13:36:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:36:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 13:36:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:36:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:36:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:36:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:36:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:36:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:36:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:36:23.409867  543705 memory.go:184] no items to output this cycle
I0321 13:36:23.409924  543705 cpu.go:275] no items to output this cycle
I0321 13:36:29.725268  543705 disk_info.go:125] begin check local disk info of client
I0321 13:36:29.727800  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:36:29.727806  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4000 0xc0004b4040]
E0321 13:36:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:36:33.409764  543705 memory.go:184] no items to output this cycle
I0321 13:36:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 13:36:38.937874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:36:38.937880  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:36:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:36:43.410645  543705 memory.go:191] Add success.
I0321 13:36:43.409805  543705 cpu.go:282] Add success.
I0321 13:36:43.420358  543705 net.go:648] Add success.
I0321 13:36:43.423125  543705 net.go:770] primary dev: ETH0
I0321 13:36:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:36:43.423150  543705 net.go:698] Add success.
I0321 13:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:36:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:36:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:36:53.409805  543705 memory.go:184] no items to output this cycle
I0321 13:36:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 13:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:37:03.409787  543705 memory.go:184] no items to output this cycle
I0321 13:37:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 13:37:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:37:13.409803  543705 memory.go:191] Add success.
I0321 13:37:13.409811  543705 cpu.go:282] Add success.
W0321 13:37:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:37:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:37:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:37:13.420100  543705 net.go:648] Add success.
I0321 13:37:13.423060  543705 net.go:770] primary dev: ETH0
I0321 13:37:13.423074  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:37:13.423087  543705 net.go:698] Add success.
I0321 13:37:13.453623  543705 event_worker.go:152] Polling the log file for events...
W0321 13:37:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:37:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 13:37:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:37:14.456811  543705 disk_worker.go:494] system disk:vda1
I0321 13:37:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:37:14.457108  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:37:14.457116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:37:14.457121  543705 custom_config.go:64] query custom config with name: gpu
E0321 13:37:15.456871  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:37:15.456879  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:37:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:37:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:37:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:37:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:37:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:37:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:37:23.409771  543705 memory.go:184] no items to output this cycle
I0321 13:37:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 13:37:29.729278  543705 disk_info.go:125] begin check local disk info of client
I0321 13:37:29.731836  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:37:29.731842  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278180 0xc0002781c0]
E0321 13:37:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:37:33.409781  543705 memory.go:184] no items to output this cycle
I0321 13:37:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 13:37:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:37:43.409807  543705 memory.go:191] Add success.
I0321 13:37:43.409812  543705 cpu.go:282] Add success.
I0321 13:37:43.419869  543705 net.go:648] Add success.
I0321 13:37:43.422961  543705 net.go:770] primary dev: ETH0
I0321 13:37:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:37:43.422987  543705 net.go:698] Add success.
I0321 13:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:37:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:37:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:37:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:37:53.409769  543705 memory.go:184] no items to output this cycle
I0321 13:37:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 13:38:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:38:03.409807  543705 memory.go:184] no items to output this cycle
I0321 13:38:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 13:38:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:38:13.409810  543705 memory.go:191] Add success.
I0321 13:38:13.409817  543705 cpu.go:282] Add success.
W0321 13:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:38:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:38:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:38:13.420058  543705 net.go:648] Add success.
I0321 13:38:13.422814  543705 net.go:770] primary dev: ETH0
I0321 13:38:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:38:13.422839  543705 net.go:698] Add success.
I0321 13:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:38:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:38:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 13:38:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:38:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 13:38:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:38:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:38:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:38:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:38:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:38:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:38:23.409800  543705 memory.go:184] no items to output this cycle
I0321 13:38:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 13:38:29.733296  543705 disk_info.go:125] begin check local disk info of client
I0321 13:38:29.735842  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:38:29.735849  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ae340 0xc0002ae380]
E0321 13:38:33.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:38:33.409900  543705 memory.go:184] no items to output this cycle
I0321 13:38:33.409916  543705 cpu.go:275] no items to output this cycle
E0321 13:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:38:43.409787  543705 memory.go:191] Add success.
I0321 13:38:43.409820  543705 cpu.go:282] Add success.
I0321 13:38:43.419865  543705 net.go:648] Add success.
I0321 13:38:43.422443  543705 net.go:770] primary dev: ETH0
I0321 13:38:43.422456  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:38:43.422468  543705 net.go:698] Add success.
I0321 13:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:38:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:38:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:38:53.409795  543705 memory.go:184] no items to output this cycle
I0321 13:38:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 13:39:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:39:03.409784  543705 memory.go:184] no items to output this cycle
I0321 13:39:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 13:39:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:39:13.409787  543705 memory.go:191] Add success.
I0321 13:39:13.409806  543705 cpu.go:282] Add success.
W0321 13:39:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:39:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:39:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:39:13.420203  543705 net.go:648] Add success.
I0321 13:39:13.422748  543705 net.go:770] primary dev: ETH0
I0321 13:39:13.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:39:13.422778  543705 net.go:698] Add success.
I0321 13:39:13.463804  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f2505eff-ac70-405d-bd38-871526fcf915","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:39:13.463837  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:39:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:39:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:39:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 13:39:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:39:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 13:39:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:39:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:39:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:39:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:39:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:39:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:39:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:39:23.409797  543705 memory.go:184] no items to output this cycle
I0321 13:39:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 13:39:29.737320  543705 disk_info.go:125] begin check local disk info of client
I0321 13:39:29.739889  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:39:29.739895  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352fc0 0xc000353000]
E0321 13:39:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:39:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 13:39:33.409790  543705 memory.go:184] no items to output this cycle
I0321 13:39:38.940394  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:39:38.940401  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:39:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:39:43.410651  543705 memory.go:191] Add success.
I0321 13:39:43.409822  543705 cpu.go:282] Add success.
I0321 13:39:43.420372  543705 net.go:648] Add success.
I0321 13:39:43.423136  543705 net.go:770] primary dev: ETH0
I0321 13:39:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:39:43.423165  543705 net.go:698] Add success.
I0321 13:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:39:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:39:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:39:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:39:53.409777  543705 memory.go:184] no items to output this cycle
I0321 13:39:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 13:40:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:40:03.409812  543705 memory.go:184] no items to output this cycle
I0321 13:40:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 13:40:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:40:13.409775  543705 memory.go:191] Add success.
W0321 13:40:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:40:13.409812  543705 cpu.go:282] Add success.
W0321 13:40:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:40:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:40:13.420239  543705 net.go:648] Add success.
I0321 13:40:13.423060  543705 net.go:770] primary dev: ETH0
I0321 13:40:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:40:13.423086  543705 net.go:698] Add success.
I0321 13:40:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:40:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:40:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 13:40:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:40:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 13:40:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:40:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:40:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:40:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:40:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:40:23.409779  543705 memory.go:184] no items to output this cycle
I0321 13:40:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:40:29.741349  543705 disk_info.go:125] begin check local disk info of client
I0321 13:40:29.743841  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:40:29.743847  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374600 0xc000374640]
E0321 13:40:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:40:33.409802  543705 memory.go:184] no items to output this cycle
I0321 13:40:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 13:40:43.409881  543705 cpu.go:282] Add success.
E0321 13:40:43.409962  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:40:43.409986  543705 memory.go:191] Add success.
I0321 13:40:43.419765  543705 net.go:648] Add success.
I0321 13:40:43.422558  543705 net.go:770] primary dev: ETH0
I0321 13:40:43.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:40:43.422585  543705 net.go:698] Add success.
I0321 13:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:40:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:40:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:40:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:40:53.409796  543705 memory.go:184] no items to output this cycle
I0321 13:40:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 13:41:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:41:03.409789  543705 memory.go:184] no items to output this cycle
I0321 13:41:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 13:41:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:41:13.409794  543705 memory.go:191] Add success.
I0321 13:41:13.409817  543705 cpu.go:282] Add success.
W0321 13:41:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:41:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:41:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:41:13.420273  543705 net.go:648] Add success.
I0321 13:41:13.422940  543705 net.go:770] primary dev: ETH0
I0321 13:41:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:41:13.422968  543705 net.go:698] Add success.
I0321 13:41:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:41:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:41:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 13:41:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:41:14.456512  543705 disk_worker.go:494] system disk:vda1
I0321 13:41:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:41:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:41:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:41:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:41:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:41:23.409805  543705 memory.go:184] no items to output this cycle
I0321 13:41:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 13:41:29.745361  543705 disk_info.go:125] begin check local disk info of client
I0321 13:41:29.747951  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:41:29.747958  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370a40 0xc000370a80]
E0321 13:41:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:41:33.409783  543705 memory.go:184] no items to output this cycle
I0321 13:41:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 13:41:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:41:43.409789  543705 memory.go:191] Add success.
I0321 13:41:43.409821  543705 cpu.go:282] Add success.
I0321 13:41:43.419992  543705 net.go:648] Add success.
I0321 13:41:43.423070  543705 net.go:770] primary dev: ETH0
I0321 13:41:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:41:43.423099  543705 net.go:698] Add success.
I0321 13:41:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:41:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:41:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:41:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:41:53.409818  543705 memory.go:184] no items to output this cycle
I0321 13:41:53.409828  543705 cpu.go:275] no items to output this cycle
E0321 13:42:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:42:03.409801  543705 cpu.go:275] no items to output this cycle
I0321 13:42:03.409808  543705 memory.go:184] no items to output this cycle
E0321 13:42:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:42:13.409788  543705 memory.go:191] Add success.
I0321 13:42:13.409789  543705 cpu.go:282] Add success.
W0321 13:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:42:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:42:13.420123  543705 net.go:648] Add success.
I0321 13:42:13.422759  543705 net.go:770] primary dev: ETH0
I0321 13:42:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:42:13.422788  543705 net.go:698] Add success.
I0321 13:42:13.527923  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dec03dbc-b2a7-4229-b7df-fb486a6c5f52","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:42:13.527958  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 13:42:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:42:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 13:42:14.455203  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:42:14.455942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:42:14.455951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:42:14.455956  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:42:14.456822  543705 disk_worker.go:494] system disk:vda1
I0321 13:42:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:42:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:42:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:42:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:42:16.457965  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:42:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:42:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:42:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:42:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:42:23.409799  543705 memory.go:184] no items to output this cycle
I0321 13:42:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 13:42:29.749380  543705 disk_info.go:125] begin check local disk info of client
I0321 13:42:29.751895  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:42:29.751902  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b840 0xc00007b880]
E0321 13:42:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:42:33.409762  543705 memory.go:184] no items to output this cycle
I0321 13:42:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 13:42:38.941731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:42:38.941737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:42:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:42:43.410591  543705 memory.go:191] Add success.
I0321 13:42:43.409819  543705 cpu.go:282] Add success.
I0321 13:42:43.420427  543705 net.go:648] Add success.
I0321 13:42:43.423026  543705 net.go:770] primary dev: ETH0
I0321 13:42:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:42:43.423052  543705 net.go:698] Add success.
I0321 13:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:42:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:42:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:42:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:42:53.409779  543705 memory.go:184] no items to output this cycle
I0321 13:42:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 13:43:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:43:03.409773  543705 memory.go:184] no items to output this cycle
I0321 13:43:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 13:43:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:43:13.409796  543705 memory.go:191] Add success.
I0321 13:43:13.409797  543705 cpu.go:282] Add success.
W0321 13:43:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:43:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:43:13.420143  543705 net.go:648] Add success.
I0321 13:43:13.422813  543705 net.go:770] primary dev: ETH0
I0321 13:43:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:43:13.422846  543705 net.go:698] Add success.
I0321 13:43:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:43:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:43:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 13:43:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:43:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 13:43:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:43:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:43:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:43:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:43:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:43:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:43:23.409796  543705 memory.go:184] no items to output this cycle
I0321 13:43:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 13:43:29.753399  543705 disk_info.go:125] begin check local disk info of client
I0321 13:43:29.755877  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:43:29.755883  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307940 0xc000307980]
E0321 13:43:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:43:33.409790  543705 memory.go:184] no items to output this cycle
I0321 13:43:33.409895  543705 cpu.go:275] no items to output this cycle
E0321 13:43:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:43:43.409795  543705 memory.go:191] Add success.
I0321 13:43:43.409795  543705 cpu.go:282] Add success.
I0321 13:43:43.419889  543705 net.go:648] Add success.
I0321 13:43:43.422395  543705 net.go:770] primary dev: ETH0
I0321 13:43:43.422409  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:43:43.422421  543705 net.go:698] Add success.
I0321 13:43:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:43:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:43:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:43:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:43:53.409806  543705 memory.go:184] no items to output this cycle
I0321 13:43:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 13:44:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:44:03.409787  543705 memory.go:184] no items to output this cycle
I0321 13:44:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 13:44:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:44:13.409782  543705 memory.go:191] Add success.
I0321 13:44:13.409805  543705 cpu.go:282] Add success.
W0321 13:44:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:44:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:44:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:44:13.420120  543705 net.go:648] Add success.
I0321 13:44:13.422629  543705 net.go:770] primary dev: ETH0
I0321 13:44:13.422644  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:44:13.422658  543705 net.go:698] Add success.
I0321 13:44:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:44:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:44:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 13:44:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:44:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 13:44:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:44:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:44:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:44:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:44:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:44:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:44:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 13:44:23.409785  543705 memory.go:184] no items to output this cycle
I0321 13:44:29.757424  543705 disk_info.go:125] begin check local disk info of client
I0321 13:44:29.759912  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:44:29.759919  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4540 0xc0004b4580]
E0321 13:44:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:44:33.409792  543705 memory.go:184] no items to output this cycle
I0321 13:44:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 13:44:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:44:43.409790  543705 memory.go:191] Add success.
I0321 13:44:43.409792  543705 cpu.go:282] Add success.
I0321 13:44:43.420007  543705 net.go:648] Add success.
I0321 13:44:43.423046  543705 net.go:770] primary dev: ETH0
I0321 13:44:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:44:43.423071  543705 net.go:698] Add success.
I0321 13:44:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:44:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:44:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:44:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:44:53.409810  543705 memory.go:184] no items to output this cycle
I0321 13:44:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 13:45:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:45:03.409811  543705 memory.go:184] no items to output this cycle
I0321 13:45:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 13:45:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:45:13.409794  543705 memory.go:191] Add success.
I0321 13:45:13.409793  543705 cpu.go:282] Add success.
W0321 13:45:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:45:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:45:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:45:13.420028  543705 net.go:770] primary dev: ETH0
I0321 13:45:13.420043  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:45:13.420057  543705 net.go:698] Add success.
I0321 13:45:13.420422  543705 net.go:648] Add success.
I0321 13:45:13.468890  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fbc75567-f4f8-4ed4-b51b-cdd0c672146b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:45:13.468923  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:45:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:45:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:45:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 13:45:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:45:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 13:45:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:45:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:45:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:45:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:45:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:45:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:45:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:45:23.409768  543705 memory.go:184] no items to output this cycle
I0321 13:45:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 13:45:29.761437  543705 disk_info.go:125] begin check local disk info of client
I0321 13:45:29.763955  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:45:29.763962  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003877c0 0xc000387800]
E0321 13:45:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:45:33.409793  543705 memory.go:184] no items to output this cycle
I0321 13:45:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 13:45:38.944408  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:45:38.944415  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:45:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:45:43.410789  543705 memory.go:191] Add success.
I0321 13:45:43.409790  543705 cpu.go:282] Add success.
I0321 13:45:43.420496  543705 net.go:648] Add success.
I0321 13:45:43.423728  543705 net.go:770] primary dev: ETH0
I0321 13:45:43.423742  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:45:43.423754  543705 net.go:698] Add success.
I0321 13:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:45:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:45:53.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:45:53.410277  543705 memory.go:184] no items to output this cycle
I0321 13:45:53.410283  543705 cpu.go:275] no items to output this cycle
E0321 13:46:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:46:03.409793  543705 cpu.go:275] no items to output this cycle
I0321 13:46:03.409795  543705 memory.go:184] no items to output this cycle
E0321 13:46:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:46:13.409790  543705 memory.go:191] Add success.
I0321 13:46:13.409790  543705 cpu.go:282] Add success.
W0321 13:46:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:46:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:46:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:46:13.420169  543705 net.go:648] Add success.
I0321 13:46:13.422811  543705 net.go:770] primary dev: ETH0
I0321 13:46:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:46:13.422835  543705 net.go:698] Add success.
I0321 13:46:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:46:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:46:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 13:46:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:46:14.456884  543705 disk_worker.go:494] system disk:vda1
I0321 13:46:14.456921  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:46:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:46:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:46:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:46:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:46:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:46:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:46:23.409764  543705 memory.go:184] no items to output this cycle
I0321 13:46:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 13:46:29.765463  543705 disk_info.go:125] begin check local disk info of client
I0321 13:46:29.767964  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:46:29.767971  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab5c0 0xc0001ab600]
E0321 13:46:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:46:33.409760  543705 memory.go:184] no items to output this cycle
I0321 13:46:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 13:46:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:46:43.409808  543705 memory.go:191] Add success.
I0321 13:46:43.409813  543705 cpu.go:282] Add success.
I0321 13:46:43.419883  543705 net.go:648] Add success.
I0321 13:46:43.422430  543705 net.go:770] primary dev: ETH0
I0321 13:46:43.422445  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:46:43.422457  543705 net.go:698] Add success.
I0321 13:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:46:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:46:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:46:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:46:53.409780  543705 memory.go:184] no items to output this cycle
I0321 13:46:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 13:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:47:03.409786  543705 memory.go:184] no items to output this cycle
I0321 13:47:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 13:47:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:47:13.409815  543705 memory.go:191] Add success.
I0321 13:47:13.409815  543705 cpu.go:282] Add success.
W0321 13:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:47:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:47:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:47:13.420508  543705 net.go:648] Add success.
I0321 13:47:13.423542  543705 net.go:770] primary dev: ETH0
I0321 13:47:13.423555  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:47:13.423567  543705 net.go:698] Add success.
I0321 13:47:13.452770  543705 event_worker.go:152] Polling the log file for events...
W0321 13:47:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:47:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 13:47:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:47:14.456832  543705 disk_worker.go:494] system disk:vda1
I0321 13:47:14.456873  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:47:14.457053  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:47:14.457062  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:47:14.457068  543705 custom_config.go:64] query custom config with name: gpu
E0321 13:47:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:47:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:47:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:47:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:47:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:47:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:47:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:47:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:47:23.409776  543705 memory.go:184] no items to output this cycle
I0321 13:47:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 13:47:29.769484  543705 disk_info.go:125] begin check local disk info of client
I0321 13:47:29.771947  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:47:29.771953  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ebc0 0xc00035ec00]
E0321 13:47:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:47:33.409778  543705 memory.go:184] no items to output this cycle
I0321 13:47:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 13:47:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:47:43.409783  543705 memory.go:191] Add success.
I0321 13:47:43.409799  543705 cpu.go:282] Add success.
I0321 13:47:43.420070  543705 net.go:648] Add success.
I0321 13:47:43.422921  543705 net.go:770] primary dev: ETH0
I0321 13:47:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:47:43.422959  543705 net.go:698] Add success.
I0321 13:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:47:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:47:53.409781  543705 memory.go:184] no items to output this cycle
I0321 13:47:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 13:48:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:48:03.409807  543705 memory.go:184] no items to output this cycle
I0321 13:48:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 13:48:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:48:13.409810  543705 cpu.go:282] Add success.
I0321 13:48:13.409818  543705 memory.go:191] Add success.
W0321 13:48:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:48:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:48:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:48:13.420105  543705 net.go:648] Add success.
I0321 13:48:13.422919  543705 net.go:770] primary dev: ETH0
I0321 13:48:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:48:13.422944  543705 net.go:698] Add success.
I0321 13:48:13.469550  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"169d01b4-e2b5-4054-a25a-617bfae847fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:48:13.469583  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:48:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:48:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:48:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 13:48:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:48:14.456718  543705 disk_worker.go:494] system disk:vda1
I0321 13:48:14.456748  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:48:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:48:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:48:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:48:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:48:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:48:23.409769  543705 memory.go:184] no items to output this cycle
I0321 13:48:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 13:48:29.773498  543705 disk_info.go:125] begin check local disk info of client
I0321 13:48:29.776010  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:48:29.776016  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0321 13:48:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:48:33.409794  543705 memory.go:184] no items to output this cycle
I0321 13:48:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 13:48:38.945731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:48:38.945737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:48:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:48:43.410656  543705 memory.go:191] Add success.
I0321 13:48:43.409792  543705 cpu.go:282] Add success.
I0321 13:48:43.420431  543705 net.go:648] Add success.
I0321 13:48:43.423157  543705 net.go:770] primary dev: ETH0
I0321 13:48:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:48:43.423183  543705 net.go:698] Add success.
I0321 13:48:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:48:46.458130  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:48:46.458160  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:48:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:48:53.409786  543705 memory.go:184] no items to output this cycle
I0321 13:48:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 13:49:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:49:03.409809  543705 memory.go:184] no items to output this cycle
I0321 13:49:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 13:49:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:49:13.409801  543705 memory.go:191] Add success.
I0321 13:49:13.409802  543705 cpu.go:282] Add success.
W0321 13:49:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:49:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:49:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:49:13.420207  543705 net.go:648] Add success.
I0321 13:49:13.423153  543705 net.go:770] primary dev: ETH0
I0321 13:49:13.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:49:13.423177  543705 net.go:698] Add success.
I0321 13:49:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:49:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:49:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 13:49:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:49:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 13:49:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:49:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:49:16.458039  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:49:16.458102  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:49:16.458124  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:49:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:49:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:49:23.409776  543705 memory.go:184] no items to output this cycle
I0321 13:49:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 13:49:29.777512  543705 disk_info.go:125] begin check local disk info of client
I0321 13:49:29.779987  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:49:29.779994  543705 disk_info.go:196] parse disk info done, disk is : [0xc000346a00 0xc000346a40]
E0321 13:49:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:49:33.409797  543705 memory.go:184] no items to output this cycle
I0321 13:49:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 13:49:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:49:43.409819  543705 memory.go:191] Add success.
I0321 13:49:43.409829  543705 cpu.go:282] Add success.
I0321 13:49:43.420130  543705 net.go:648] Add success.
I0321 13:49:43.422926  543705 net.go:770] primary dev: ETH0
I0321 13:49:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:49:43.422951  543705 net.go:698] Add success.
I0321 13:49:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:49:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:49:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:49:53.410409  543705 memory.go:184] no items to output this cycle
I0321 13:49:53.410421  543705 cpu.go:275] no items to output this cycle
E0321 13:50:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:50:03.409792  543705 memory.go:184] no items to output this cycle
I0321 13:50:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 13:50:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:50:13.409831  543705 memory.go:191] Add success.
I0321 13:50:13.409841  543705 cpu.go:282] Add success.
W0321 13:50:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:50:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:50:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:50:13.420347  543705 net.go:648] Add success.
I0321 13:50:13.423033  543705 net.go:770] primary dev: ETH0
I0321 13:50:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:50:13.423059  543705 net.go:698] Add success.
I0321 13:50:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:50:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:50:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 13:50:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:50:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 13:50:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:50:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:50:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:50:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:50:23.409774  543705 memory.go:184] no items to output this cycle
I0321 13:50:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 13:50:29.781539  543705 disk_info.go:125] begin check local disk info of client
I0321 13:50:29.784010  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:50:29.784016  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353500 0xc000353540]
E0321 13:50:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:50:33.409795  543705 memory.go:184] no items to output this cycle
I0321 13:50:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 13:50:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:50:43.409835  543705 memory.go:191] Add success.
I0321 13:50:43.409837  543705 cpu.go:282] Add success.
I0321 13:50:43.420076  543705 net.go:648] Add success.
I0321 13:50:43.422810  543705 net.go:770] primary dev: ETH0
I0321 13:50:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:50:43.422840  543705 net.go:698] Add success.
I0321 13:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:50:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:50:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:50:53.409784  543705 memory.go:184] no items to output this cycle
I0321 13:50:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 13:51:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:51:03.409809  543705 memory.go:184] no items to output this cycle
I0321 13:51:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 13:51:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:51:13.409775  543705 memory.go:191] Add success.
W0321 13:51:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 13:51:13.409804  543705 cpu.go:282] Add success.
W0321 13:51:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:51:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:51:13.420067  543705 net.go:648] Add success.
I0321 13:51:13.422674  543705 net.go:770] primary dev: ETH0
I0321 13:51:13.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:51:13.422703  543705 net.go:698] Add success.
I0321 13:51:13.513984  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe9df3e1-ac3e-4f19-916b-477b55600cd1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:51:13.514019  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:51:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:51:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:51:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 13:51:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:51:14.456676  543705 disk_worker.go:494] system disk:vda1
I0321 13:51:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:51:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:51:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:51:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:51:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:51:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:51:23.409771  543705 memory.go:184] no items to output this cycle
I0321 13:51:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 13:51:29.785554  543705 disk_info.go:125] begin check local disk info of client
I0321 13:51:29.788013  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:51:29.788019  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7480 0xc0003e74c0]
E0321 13:51:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:51:33.409802  543705 memory.go:184] no items to output this cycle
I0321 13:51:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 13:51:38.945874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:51:38.945881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:51:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:51:43.410668  543705 memory.go:191] Add success.
I0321 13:51:43.409826  543705 cpu.go:282] Add success.
I0321 13:51:43.420397  543705 net.go:648] Add success.
I0321 13:51:43.422878  543705 net.go:770] primary dev: ETH0
I0321 13:51:43.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:51:43.423098  543705 net.go:698] Add success.
I0321 13:51:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:51:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:51:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:51:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:51:53.409804  543705 memory.go:184] no items to output this cycle
I0321 13:51:53.409817  543705 cpu.go:275] no items to output this cycle
E0321 13:52:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:52:03.409789  543705 memory.go:184] no items to output this cycle
I0321 13:52:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 13:52:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:52:13.409801  543705 memory.go:191] Add success.
I0321 13:52:13.409802  543705 cpu.go:282] Add success.
W0321 13:52:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:52:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:52:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:52:13.420131  543705 net.go:648] Add success.
I0321 13:52:13.422554  543705 net.go:770] primary dev: ETH0
I0321 13:52:13.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:52:13.422580  543705 net.go:698] Add success.
W0321 13:52:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:52:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 13:52:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0321 13:52:14.456941  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:52:14.456950  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:52:14.456956  543705 custom_config.go:64] query custom config with name: gpu
I0321 13:52:14.457011  543705 disk_worker.go:494] system disk:vda1
I0321 13:52:14.457043  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:52:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:52:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:52:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:52:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:52:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:52:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:52:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:52:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:52:23.409813  543705 memory.go:184] no items to output this cycle
I0321 13:52:23.409829  543705 cpu.go:275] no items to output this cycle
I0321 13:52:29.789579  543705 disk_info.go:125] begin check local disk info of client
I0321 13:52:29.792048  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:52:29.792055  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
E0321 13:52:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:52:33.409790  543705 memory.go:184] no items to output this cycle
I0321 13:52:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 13:52:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:52:43.409771  543705 memory.go:191] Add success.
I0321 13:52:43.409801  543705 cpu.go:282] Add success.
I0321 13:52:43.419882  543705 net.go:648] Add success.
I0321 13:52:43.422962  543705 net.go:770] primary dev: ETH0
I0321 13:52:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:52:43.422988  543705 net.go:698] Add success.
I0321 13:52:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:52:53.410379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:52:53.410396  543705 memory.go:184] no items to output this cycle
I0321 13:52:53.410564  543705 cpu.go:275] no items to output this cycle
E0321 13:53:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:53:03.409788  543705 memory.go:184] no items to output this cycle
I0321 13:53:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 13:53:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:53:13.409780  543705 memory.go:191] Add success.
W0321 13:53:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:53:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:53:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:53:13.409830  543705 cpu.go:282] Add success.
I0321 13:53:13.420081  543705 net.go:648] Add success.
I0321 13:53:13.423114  543705 net.go:770] primary dev: ETH0
I0321 13:53:13.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:53:13.423140  543705 net.go:698] Add success.
I0321 13:53:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:53:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:53:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 13:53:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:53:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 13:53:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:53:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:53:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:53:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:53:23.409773  543705 memory.go:184] no items to output this cycle
I0321 13:53:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 13:53:29.793607  543705 disk_info.go:125] begin check local disk info of client
I0321 13:53:29.796100  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:53:29.796106  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358d00 0xc000358d40]
E0321 13:53:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:53:33.409795  543705 memory.go:184] no items to output this cycle
I0321 13:53:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 13:53:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:53:43.409808  543705 memory.go:191] Add success.
I0321 13:53:43.409809  543705 cpu.go:282] Add success.
I0321 13:53:43.420020  543705 net.go:648] Add success.
I0321 13:53:43.422776  543705 net.go:770] primary dev: ETH0
I0321 13:53:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:53:43.422804  543705 net.go:698] Add success.
I0321 13:53:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:53:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:53:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:53:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:53:53.409813  543705 memory.go:184] no items to output this cycle
I0321 13:53:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 13:54:03.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:54:03.409826  543705 memory.go:184] no items to output this cycle
I0321 13:54:03.409839  543705 cpu.go:275] no items to output this cycle
E0321 13:54:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:54:13.409807  543705 cpu.go:282] Add success.
I0321 13:54:13.409808  543705 memory.go:191] Add success.
W0321 13:54:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:54:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:54:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:54:13.420057  543705 net.go:648] Add success.
I0321 13:54:13.422738  543705 net.go:770] primary dev: ETH0
I0321 13:54:13.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:54:13.422767  543705 net.go:698] Add success.
I0321 13:54:13.633003  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5bbe607a-6432-4898-b5e0-978826f094f0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:54:13.633036  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 13:54:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:54:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:54:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 13:54:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:54:14.456612  543705 disk_worker.go:494] system disk:vda1
I0321 13:54:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:54:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:54:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:54:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:54:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:54:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:54:23.409807  543705 memory.go:184] no items to output this cycle
I0321 13:54:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 13:54:29.797624  543705 disk_info.go:125] begin check local disk info of client
I0321 13:54:29.800113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:54:29.800121  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3100 0xc0003e3140]
E0321 13:54:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:54:33.409804  543705 memory.go:184] no items to output this cycle
I0321 13:54:33.409820  543705 cpu.go:275] no items to output this cycle
I0321 13:54:38.948426  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:54:38.948433  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:54:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:54:43.410820  543705 memory.go:191] Add success.
I0321 13:54:43.409829  543705 cpu.go:282] Add success.
I0321 13:54:43.420872  543705 net.go:648] Add success.
I0321 13:54:43.424411  543705 net.go:770] primary dev: ETH0
I0321 13:54:43.424424  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:54:43.424436  543705 net.go:698] Add success.
I0321 13:54:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:54:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:54:53.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:54:53.410390  543705 memory.go:184] no items to output this cycle
I0321 13:54:53.410411  543705 cpu.go:275] no items to output this cycle
E0321 13:55:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:55:03.409800  543705 cpu.go:275] no items to output this cycle
I0321 13:55:03.409810  543705 memory.go:184] no items to output this cycle
E0321 13:55:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:55:13.409819  543705 memory.go:191] Add success.
I0321 13:55:13.409822  543705 cpu.go:282] Add success.
W0321 13:55:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:55:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:55:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:55:13.420156  543705 net.go:648] Add success.
I0321 13:55:13.423095  543705 net.go:770] primary dev: ETH0
I0321 13:55:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:55:13.423125  543705 net.go:698] Add success.
I0321 13:55:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:55:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:55:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 13:55:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:55:14.456528  543705 disk_worker.go:494] system disk:vda1
I0321 13:55:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:55:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:55:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:55:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:55:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:55:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:55:23.409794  543705 memory.go:184] no items to output this cycle
I0321 13:55:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 13:55:29.801651  543705 disk_info.go:125] begin check local disk info of client
I0321 13:55:29.804189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:55:29.804194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa040 0xc0001aa080]
E0321 13:55:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:55:33.409768  543705 memory.go:184] no items to output this cycle
I0321 13:55:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 13:55:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:55:43.409787  543705 memory.go:191] Add success.
I0321 13:55:43.409955  543705 cpu.go:282] Add success.
I0321 13:55:43.419731  543705 net.go:648] Add success.
I0321 13:55:43.422361  543705 net.go:770] primary dev: ETH0
I0321 13:55:43.422377  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:55:43.422390  543705 net.go:698] Add success.
I0321 13:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:55:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:55:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 13:55:53.409792  543705 memory.go:184] no items to output this cycle
E0321 13:56:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:56:03.409785  543705 memory.go:184] no items to output this cycle
I0321 13:56:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 13:56:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:56:13.409793  543705 cpu.go:282] Add success.
I0321 13:56:13.409794  543705 memory.go:191] Add success.
W0321 13:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:56:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:56:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:56:13.420161  543705 net.go:648] Add success.
I0321 13:56:13.422915  543705 net.go:770] primary dev: ETH0
I0321 13:56:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:56:13.422940  543705 net.go:698] Add success.
I0321 13:56:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:56:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:56:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 13:56:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:56:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 13:56:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:56:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:56:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:56:23.409795  543705 memory.go:184] no items to output this cycle
I0321 13:56:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 13:56:29.805669  543705 disk_info.go:125] begin check local disk info of client
I0321 13:56:29.808172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:56:29.808178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000232dc0 0xc000232e00]
E0321 13:56:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:56:33.409772  543705 cpu.go:275] no items to output this cycle
I0321 13:56:33.409780  543705 memory.go:184] no items to output this cycle
E0321 13:56:43.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:56:43.409898  543705 memory.go:191] Add success.
I0321 13:56:43.409904  543705 cpu.go:282] Add success.
I0321 13:56:43.419720  543705 net.go:648] Add success.
I0321 13:56:43.422269  543705 net.go:770] primary dev: ETH0
I0321 13:56:43.422281  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:56:43.422293  543705 net.go:698] Add success.
I0321 13:56:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:56:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:56:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:56:53.409794  543705 memory.go:184] no items to output this cycle
I0321 13:56:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 13:57:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:57:03.409787  543705 memory.go:184] no items to output this cycle
I0321 13:57:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 13:57:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:57:13.409783  543705 memory.go:191] Add success.
I0321 13:57:13.409806  543705 cpu.go:282] Add success.
W0321 13:57:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:57:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:57:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:57:13.420244  543705 net.go:648] Add success.
I0321 13:57:13.429290  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 13:57:13.429365  543705 net.go:770] primary dev: ETH0
I0321 13:57:13.429377  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:57:13.429387  543705 net.go:698] Add success.
I0321 13:57:13.452927  543705 event_worker.go:152] Polling the log file for events...
I0321 13:57:13.468671  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d317f8ee-3709-4c49-bf36-ba9b9da485ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 13:57:13.468710  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 13:57:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:57:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 13:57:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:57:14.456792  543705 disk_worker.go:494] system disk:vda1
I0321 13:57:14.456832  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 13:57:14.457077  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 13:57:14.457084  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 13:57:14.457088  543705 custom_config.go:64] query custom config with name: gpu
E0321 13:57:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 13:57:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:57:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 13:57:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 13:57:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:57:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:57:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:57:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:57:23.409765  543705 memory.go:184] no items to output this cycle
I0321 13:57:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 13:57:29.809676  543705 disk_info.go:125] begin check local disk info of client
I0321 13:57:29.812251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:57:29.812258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353a80 0xc000353c80]
E0321 13:57:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:57:33.409775  543705 memory.go:184] no items to output this cycle
I0321 13:57:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 13:57:38.949734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 13:57:38.949741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 13:57:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:57:43.410633  543705 memory.go:191] Add success.
I0321 13:57:43.409804  543705 cpu.go:282] Add success.
I0321 13:57:43.420337  543705 net.go:648] Add success.
I0321 13:57:43.422880  543705 net.go:770] primary dev: ETH0
I0321 13:57:43.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:57:43.422907  543705 net.go:698] Add success.
I0321 13:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:57:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:57:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:57:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:57:53.409803  543705 memory.go:184] no items to output this cycle
I0321 13:57:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 13:58:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:58:03.409811  543705 memory.go:184] no items to output this cycle
I0321 13:58:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 13:58:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:58:13.409811  543705 memory.go:191] Add success.
I0321 13:58:13.409817  543705 cpu.go:282] Add success.
W0321 13:58:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:58:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:58:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:58:13.420106  543705 net.go:648] Add success.
I0321 13:58:13.422662  543705 net.go:770] primary dev: ETH0
I0321 13:58:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:58:13.422688  543705 net.go:698] Add success.
I0321 13:58:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:58:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:58:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 13:58:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:58:14.456504  543705 disk_worker.go:494] system disk:vda1
I0321 13:58:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:58:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:58:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:58:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:58:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:58:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:58:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:58:23.409777  543705 memory.go:184] no items to output this cycle
I0321 13:58:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 13:58:29.813684  543705 disk_info.go:125] begin check local disk info of client
I0321 13:58:29.816251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:58:29.816258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c05c0 0xc0003c0600]
E0321 13:58:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:58:33.409769  543705 memory.go:184] no items to output this cycle
I0321 13:58:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 13:58:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:58:43.409817  543705 memory.go:191] Add success.
I0321 13:58:43.409823  543705 cpu.go:282] Add success.
I0321 13:58:43.420004  543705 net.go:648] Add success.
I0321 13:58:43.422627  543705 net.go:770] primary dev: ETH0
I0321 13:58:43.422640  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:58:43.422653  543705 net.go:698] Add success.
I0321 13:58:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:58:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:58:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:58:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:58:53.409767  543705 memory.go:184] no items to output this cycle
I0321 13:58:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 13:59:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:59:03.409777  543705 memory.go:184] no items to output this cycle
I0321 13:59:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 13:59:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:59:13.409820  543705 memory.go:191] Add success.
I0321 13:59:13.409825  543705 cpu.go:282] Add success.
W0321 13:59:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 13:59:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 13:59:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 13:59:13.420202  543705 net.go:648] Add success.
I0321 13:59:13.422731  543705 net.go:770] primary dev: ETH0
I0321 13:59:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:59:13.422755  543705 net.go:698] Add success.
I0321 13:59:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 13:59:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 13:59:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 13:59:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 13:59:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 13:59:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 13:59:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 13:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:59:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:59:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 13:59:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 13:59:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:59:23.409794  543705 memory.go:184] no items to output this cycle
I0321 13:59:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 13:59:29.817673  543705 disk_info.go:125] begin check local disk info of client
I0321 13:59:29.820143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 13:59:29.820149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a140 0xc00032a180]
E0321 13:59:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:59:33.409795  543705 memory.go:184] no items to output this cycle
I0321 13:59:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 13:59:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:59:43.409795  543705 memory.go:191] Add success.
I0321 13:59:43.409797  543705 cpu.go:282] Add success.
I0321 13:59:43.420032  543705 net.go:648] Add success.
I0321 13:59:43.422825  543705 net.go:770] primary dev: ETH0
I0321 13:59:43.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0321 13:59:43.422851  543705 net.go:698] Add success.
I0321 13:59:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 13:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 13:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 13:59:53.410491  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 13:59:53.410505  543705 memory.go:184] no items to output this cycle
I0321 13:59:53.410504  543705 cpu.go:275] no items to output this cycle
E0321 14:00:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:00:03.409802  543705 memory.go:184] no items to output this cycle
I0321 14:00:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 14:00:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:00:13.409786  543705 memory.go:191] Add success.
I0321 14:00:13.409789  543705 cpu.go:282] Add success.
W0321 14:00:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:00:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:00:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:00:13.420089  543705 net.go:648] Add success.
I0321 14:00:13.422960  543705 net.go:770] primary dev: ETH0
I0321 14:00:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:00:13.422985  543705 net.go:698] Add success.
I0321 14:00:13.469375  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b283eca-b741-4c19-a1e4-226697c94fed","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:00:13.469409  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:00:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:00:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:00:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 14:00:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:00:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 14:00:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:00:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:00:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:00:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:00:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:00:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:00:23.409794  543705 memory.go:184] no items to output this cycle
I0321 14:00:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 14:00:29.821668  543705 disk_info.go:125] begin check local disk info of client
I0321 14:00:29.824196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:00:29.824203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f000 0xc00032f040]
E0321 14:00:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:00:33.409853  543705 memory.go:184] no items to output this cycle
I0321 14:00:33.409979  543705 cpu.go:275] no items to output this cycle
I0321 14:00:38.949886  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:00:38.949893  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:00:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:00:43.410792  543705 memory.go:191] Add success.
I0321 14:00:43.409825  543705 cpu.go:282] Add success.
I0321 14:00:43.420596  543705 net.go:648] Add success.
I0321 14:00:43.423358  543705 net.go:770] primary dev: ETH0
I0321 14:00:43.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:00:43.423384  543705 net.go:698] Add success.
I0321 14:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:00:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:00:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:00:53.410412  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:00:53.410429  543705 memory.go:184] no items to output this cycle
I0321 14:00:53.410441  543705 cpu.go:275] no items to output this cycle
E0321 14:01:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:01:03.409785  543705 memory.go:184] no items to output this cycle
I0321 14:01:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 14:01:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:01:13.409786  543705 memory.go:191] Add success.
I0321 14:01:13.409803  543705 cpu.go:282] Add success.
W0321 14:01:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:01:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:01:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:01:13.420252  543705 net.go:648] Add success.
I0321 14:01:13.423194  543705 net.go:770] primary dev: ETH0
I0321 14:01:13.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:01:13.423221  543705 net.go:698] Add success.
I0321 14:01:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:01:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:01:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 14:01:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:01:14.456575  543705 disk_worker.go:494] system disk:vda1
I0321 14:01:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:01:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:01:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:01:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:01:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:01:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:01:23.409795  543705 memory.go:184] no items to output this cycle
I0321 14:01:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 14:01:29.825674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:01:29.828209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:01:29.828217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ca780 0xc0004ca7c0]
E0321 14:01:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:01:33.409781  543705 memory.go:184] no items to output this cycle
I0321 14:01:33.409898  543705 cpu.go:275] no items to output this cycle
E0321 14:01:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:01:43.409781  543705 memory.go:191] Add success.
I0321 14:01:43.409821  543705 cpu.go:282] Add success.
I0321 14:01:43.419996  543705 net.go:648] Add success.
I0321 14:01:43.422668  543705 net.go:770] primary dev: ETH0
I0321 14:01:43.422682  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:01:43.422697  543705 net.go:698] Add success.
I0321 14:01:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:01:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:01:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:01:53.409798  543705 memory.go:184] no items to output this cycle
I0321 14:01:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 14:02:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:02:03.409791  543705 memory.go:184] no items to output this cycle
I0321 14:02:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 14:02:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:02:13.409787  543705 memory.go:191] Add success.
I0321 14:02:13.409788  543705 cpu.go:282] Add success.
W0321 14:02:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:02:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:02:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:02:13.420042  543705 net.go:648] Add success.
I0321 14:02:13.423204  543705 net.go:770] primary dev: ETH0
I0321 14:02:13.423217  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:02:13.423229  543705 net.go:698] Add success.
W0321 14:02:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:02:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 14:02:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:02:14.455871  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:02:14.455880  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:02:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:02:14.456621  543705 disk_worker.go:494] system disk:vda1
I0321 14:02:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:02:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:02:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:02:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:02:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:02:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:02:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:02:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:02:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:02:23.409763  543705 memory.go:184] no items to output this cycle
I0321 14:02:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 14:02:29.829672  543705 disk_info.go:125] begin check local disk info of client
I0321 14:02:29.832176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:02:29.832182  543705 disk_info.go:196] parse disk info done, disk is : [0xc000299bc0 0xc000299c00]
E0321 14:02:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:02:33.409782  543705 memory.go:184] no items to output this cycle
I0321 14:02:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 14:02:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:02:43.409974  543705 memory.go:191] Add success.
I0321 14:02:43.410034  543705 cpu.go:282] Add success.
I0321 14:02:43.419746  543705 net.go:648] Add success.
I0321 14:02:43.422404  543705 net.go:770] primary dev: ETH0
I0321 14:02:43.422420  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:02:43.422434  543705 net.go:698] Add success.
I0321 14:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:02:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:02:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:02:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:02:53.409767  543705 memory.go:184] no items to output this cycle
I0321 14:02:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 14:03:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:03:03.409778  543705 memory.go:184] no items to output this cycle
I0321 14:03:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 14:03:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:03:13.409808  543705 memory.go:191] Add success.
I0321 14:03:13.409819  543705 cpu.go:282] Add success.
W0321 14:03:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:03:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:03:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:03:13.420152  543705 net.go:648] Add success.
I0321 14:03:13.422955  543705 net.go:770] primary dev: ETH0
I0321 14:03:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:03:13.422981  543705 net.go:698] Add success.
I0321 14:03:13.468764  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"18b1adec-9493-46fb-b328-af8a7ec09dea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:03:13.468806  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:03:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:03:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:03:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 14:03:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:03:14.456498  543705 disk_worker.go:494] system disk:vda1
I0321 14:03:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:03:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:03:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:03:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:03:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:03:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:03:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 14:03:23.409788  543705 memory.go:184] no items to output this cycle
I0321 14:03:29.833672  543705 disk_info.go:125] begin check local disk info of client
I0321 14:03:29.836118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:03:29.836124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f7780 0xc0001f77c0]
E0321 14:03:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:03:33.409801  543705 memory.go:184] no items to output this cycle
I0321 14:03:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 14:03:38.952457  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:03:38.952464  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:03:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:03:43.410810  543705 memory.go:191] Add success.
I0321 14:03:43.409791  543705 cpu.go:282] Add success.
I0321 14:03:43.420518  543705 net.go:648] Add success.
I0321 14:03:43.423439  543705 net.go:770] primary dev: ETH0
I0321 14:03:43.423452  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:03:43.423464  543705 net.go:698] Add success.
I0321 14:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:03:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:03:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:03:53.409794  543705 memory.go:184] no items to output this cycle
I0321 14:03:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 14:04:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:04:03.409784  543705 memory.go:184] no items to output this cycle
I0321 14:04:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 14:04:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:04:13.409792  543705 memory.go:191] Add success.
I0321 14:04:13.409807  543705 cpu.go:282] Add success.
W0321 14:04:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:04:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:04:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:04:13.420067  543705 net.go:648] Add success.
I0321 14:04:13.422950  543705 net.go:770] primary dev: ETH0
I0321 14:04:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:04:13.422982  543705 net.go:698] Add success.
I0321 14:04:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:04:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:04:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 14:04:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:04:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 14:04:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:04:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:04:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:04:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:04:23.409798  543705 memory.go:184] no items to output this cycle
I0321 14:04:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 14:04:29.837673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:04:29.840170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:04:29.840176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265440 0xc000265480]
E0321 14:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:04:33.409783  543705 memory.go:184] no items to output this cycle
I0321 14:04:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 14:04:43.409964  543705 cpu.go:282] Add success.
E0321 14:04:43.410035  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:04:43.410059  543705 memory.go:191] Add success.
I0321 14:04:43.419740  543705 net.go:648] Add success.
I0321 14:04:43.422401  543705 net.go:770] primary dev: ETH0
I0321 14:04:43.422414  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:04:43.422425  543705 net.go:698] Add success.
I0321 14:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:04:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:04:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:04:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:04:53.409809  543705 memory.go:184] no items to output this cycle
I0321 14:04:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 14:05:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:05:03.409818  543705 memory.go:184] no items to output this cycle
I0321 14:05:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 14:05:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:05:13.409789  543705 memory.go:191] Add success.
W0321 14:05:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:05:13.409817  543705 cpu.go:282] Add success.
W0321 14:05:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:05:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:05:13.420299  543705 net.go:648] Add success.
I0321 14:05:13.423145  543705 net.go:770] primary dev: ETH0
I0321 14:05:13.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:05:13.423169  543705 net.go:698] Add success.
I0321 14:05:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:05:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:05:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 14:05:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:05:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 14:05:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:05:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:05:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:05:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:05:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:05:23.409785  543705 memory.go:184] no items to output this cycle
I0321 14:05:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 14:05:29.841676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:05:29.844132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:05:29.844140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cf580 0xc0003cf5c0]
E0321 14:05:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:05:33.409775  543705 memory.go:184] no items to output this cycle
I0321 14:05:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 14:05:43.410016  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:05:43.410052  543705 memory.go:191] Add success.
I0321 14:05:43.410059  543705 cpu.go:282] Add success.
I0321 14:05:43.419728  543705 net.go:648] Add success.
I0321 14:05:43.422412  543705 net.go:770] primary dev: ETH0
I0321 14:05:43.422425  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:05:43.422437  543705 net.go:698] Add success.
I0321 14:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:05:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:05:53.409811  543705 memory.go:184] no items to output this cycle
I0321 14:05:53.409823  543705 cpu.go:275] no items to output this cycle
E0321 14:06:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:06:03.409788  543705 memory.go:184] no items to output this cycle
I0321 14:06:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 14:06:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:06:13.409827  543705 memory.go:191] Add success.
I0321 14:06:13.409832  543705 cpu.go:282] Add success.
W0321 14:06:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:06:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:06:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:06:13.420060  543705 net.go:648] Add success.
I0321 14:06:13.422503  543705 net.go:770] primary dev: ETH0
I0321 14:06:13.422515  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:06:13.422528  543705 net.go:698] Add success.
I0321 14:06:13.469424  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c7fc21f-5131-4d97-9f40-50df8305e343","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:06:13.469457  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:06:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:06:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:06:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0321 14:06:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:06:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 14:06:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:06:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:06:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:06:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:06:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:06:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 14:06:23.409785  543705 memory.go:184] no items to output this cycle
I0321 14:06:29.845676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:06:29.848131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:06:29.848138  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b840 0xc00007b880]
E0321 14:06:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:06:33.409807  543705 memory.go:184] no items to output this cycle
I0321 14:06:33.409826  543705 cpu.go:275] no items to output this cycle
I0321 14:06:38.953743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:06:38.953761  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:06:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:06:43.410674  543705 memory.go:191] Add success.
I0321 14:06:43.409837  543705 cpu.go:282] Add success.
I0321 14:06:43.420395  543705 net.go:648] Add success.
I0321 14:06:43.423168  543705 net.go:770] primary dev: ETH0
I0321 14:06:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:06:43.423192  543705 net.go:698] Add success.
I0321 14:06:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:06:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:06:53.410592  543705 cpu.go:275] no items to output this cycle
E0321 14:06:53.410591  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:06:53.410607  543705 memory.go:184] no items to output this cycle
E0321 14:07:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:07:03.409793  543705 memory.go:184] no items to output this cycle
I0321 14:07:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 14:07:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:07:13.409796  543705 memory.go:191] Add success.
I0321 14:07:13.409825  543705 cpu.go:282] Add success.
W0321 14:07:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:07:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:07:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:07:13.420216  543705 net.go:648] Add success.
I0321 14:07:13.423115  543705 net.go:770] primary dev: ETH0
I0321 14:07:13.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:07:13.423139  543705 net.go:698] Add success.
I0321 14:07:13.453669  543705 event_worker.go:152] Polling the log file for events...
W0321 14:07:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:07:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 14:07:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:07:14.456944  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:07:14.456953  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:07:14.456959  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:07:14.457016  543705 disk_worker.go:494] system disk:vda1
I0321 14:07:14.457047  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:07:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:07:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:07:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:07:16.457984  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:07:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:07:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:07:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:07:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:07:23.409771  543705 memory.go:184] no items to output this cycle
I0321 14:07:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 14:07:29.849676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:07:29.852152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:07:29.852159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ed0c0 0xc0004ed100]
E0321 14:07:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:07:33.409805  543705 memory.go:184] no items to output this cycle
I0321 14:07:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 14:07:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:07:43.409795  543705 memory.go:191] Add success.
I0321 14:07:43.409796  543705 cpu.go:282] Add success.
I0321 14:07:43.420003  543705 net.go:648] Add success.
I0321 14:07:43.422722  543705 net.go:770] primary dev: ETH0
I0321 14:07:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:07:43.422752  543705 net.go:698] Add success.
I0321 14:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:07:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:07:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:07:53.409790  543705 memory.go:184] no items to output this cycle
I0321 14:07:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 14:08:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:08:03.409812  543705 memory.go:184] no items to output this cycle
I0321 14:08:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 14:08:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:08:13.409781  543705 memory.go:191] Add success.
I0321 14:08:13.409798  543705 cpu.go:282] Add success.
W0321 14:08:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:08:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:08:13.420252  543705 net.go:648] Add success.
I0321 14:08:13.422797  543705 net.go:770] primary dev: ETH0
I0321 14:08:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:08:13.422822  543705 net.go:698] Add success.
I0321 14:08:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:08:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:08:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 14:08:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:08:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 14:08:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:08:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:08:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:08:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:08:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:08:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:08:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:08:23.409764  543705 memory.go:184] no items to output this cycle
I0321 14:08:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 14:08:29.853676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:08:29.856201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:08:29.856208  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec740 0xc0000ec780]
E0321 14:08:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:08:33.409801  543705 memory.go:184] no items to output this cycle
I0321 14:08:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 14:08:43.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:08:43.409900  543705 memory.go:191] Add success.
I0321 14:08:43.409902  543705 cpu.go:282] Add success.
I0321 14:08:43.419710  543705 net.go:648] Add success.
I0321 14:08:43.422667  543705 net.go:770] primary dev: ETH0
I0321 14:08:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:08:43.422691  543705 net.go:698] Add success.
I0321 14:08:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:08:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:08:53.409783  543705 memory.go:184] no items to output this cycle
I0321 14:08:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 14:09:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:09:03.409778  543705 memory.go:184] no items to output this cycle
I0321 14:09:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 14:09:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:09:13.409786  543705 memory.go:191] Add success.
I0321 14:09:13.409801  543705 cpu.go:282] Add success.
W0321 14:09:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:09:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:09:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:09:13.420086  543705 net.go:648] Add success.
I0321 14:09:13.423060  543705 net.go:770] primary dev: ETH0
I0321 14:09:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:09:13.423090  543705 net.go:698] Add success.
I0321 14:09:13.464511  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ba0b592f-e821-4763-9037-fa9040093cf8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:09:13.464544  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:09:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:09:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:09:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 14:09:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:09:14.456519  543705 disk_worker.go:494] system disk:vda1
I0321 14:09:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:09:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:09:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:09:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:09:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:09:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:09:23.409783  543705 memory.go:184] no items to output this cycle
I0321 14:09:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 14:09:29.857676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:09:29.860181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:09:29.860187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a140 0xc00035a180]
E0321 14:09:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:09:33.409770  543705 memory.go:184] no items to output this cycle
I0321 14:09:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 14:09:38.956472  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:09:38.956479  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:09:43.410659  543705 memory.go:191] Add success.
I0321 14:09:43.409815  543705 cpu.go:282] Add success.
I0321 14:09:43.420352  543705 net.go:648] Add success.
I0321 14:09:43.423215  543705 net.go:770] primary dev: ETH0
I0321 14:09:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:09:43.423240  543705 net.go:698] Add success.
I0321 14:09:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:09:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:09:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:09:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:09:53.409768  543705 memory.go:184] no items to output this cycle
I0321 14:09:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 14:10:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:10:03.409786  543705 cpu.go:275] no items to output this cycle
I0321 14:10:03.409789  543705 memory.go:184] no items to output this cycle
E0321 14:10:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:10:13.409811  543705 memory.go:191] Add success.
I0321 14:10:13.409823  543705 cpu.go:282] Add success.
W0321 14:10:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:10:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:10:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:10:13.420119  543705 net.go:648] Add success.
I0321 14:10:13.422691  543705 net.go:770] primary dev: ETH0
I0321 14:10:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:10:13.422721  543705 net.go:698] Add success.
I0321 14:10:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:10:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:10:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 14:10:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:10:14.456579  543705 disk_worker.go:494] system disk:vda1
I0321 14:10:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:10:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:10:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:10:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:10:16.472549  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:10:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:10:23.409774  543705 memory.go:184] no items to output this cycle
I0321 14:10:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 14:10:29.861681  543705 disk_info.go:125] begin check local disk info of client
I0321 14:10:29.864224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:10:29.864230  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267600 0xc000267640]
E0321 14:10:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:10:33.409797  543705 memory.go:184] no items to output this cycle
I0321 14:10:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 14:10:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:10:43.409885  543705 memory.go:191] Add success.
I0321 14:10:43.409926  543705 cpu.go:282] Add success.
I0321 14:10:43.419735  543705 net.go:648] Add success.
I0321 14:10:43.422508  543705 net.go:770] primary dev: ETH0
I0321 14:10:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:10:43.422536  543705 net.go:698] Add success.
I0321 14:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:10:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:10:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:10:53.410262  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:10:53.410280  543705 memory.go:184] no items to output this cycle
I0321 14:10:53.410292  543705 cpu.go:275] no items to output this cycle
E0321 14:11:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:11:03.409815  543705 memory.go:184] no items to output this cycle
I0321 14:11:03.409829  543705 cpu.go:275] no items to output this cycle
E0321 14:11:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:11:13.409787  543705 memory.go:191] Add success.
I0321 14:11:13.409792  543705 cpu.go:282] Add success.
W0321 14:11:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:11:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:11:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:11:13.420131  543705 net.go:648] Add success.
I0321 14:11:13.422800  543705 net.go:770] primary dev: ETH0
I0321 14:11:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:11:13.422829  543705 net.go:698] Add success.
I0321 14:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:11:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:11:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 14:11:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:11:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 14:11:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:11:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:11:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:11:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:11:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:11:23.410376  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:11:23.410394  543705 memory.go:184] no items to output this cycle
I0321 14:11:23.410429  543705 cpu.go:275] no items to output this cycle
I0321 14:11:29.865673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:11:29.868239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:11:29.868245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267540 0xc000267580]
E0321 14:11:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:11:33.409769  543705 memory.go:184] no items to output this cycle
I0321 14:11:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 14:11:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:11:43.409810  543705 memory.go:191] Add success.
I0321 14:11:43.409815  543705 cpu.go:282] Add success.
I0321 14:11:43.419864  543705 net.go:648] Add success.
I0321 14:11:43.422462  543705 net.go:770] primary dev: ETH0
I0321 14:11:43.422476  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:11:43.422488  543705 net.go:698] Add success.
I0321 14:11:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:11:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:11:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:11:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:11:53.409780  543705 memory.go:184] no items to output this cycle
I0321 14:11:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 14:12:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:12:03.409785  543705 memory.go:184] no items to output this cycle
I0321 14:12:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 14:12:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:12:13.409809  543705 memory.go:191] Add success.
I0321 14:12:13.409815  543705 cpu.go:282] Add success.
W0321 14:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:12:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:12:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:12:13.420272  543705 net.go:648] Add success.
I0321 14:12:13.423150  543705 net.go:770] primary dev: ETH0
I0321 14:12:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:12:13.423174  543705 net.go:698] Add success.
I0321 14:12:13.468167  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1afeea66-de65-465e-becf-d684002ecdfd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:12:13.468201  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 14:12:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:12:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 14:12:14.455199  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:12:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:12:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:12:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:12:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 14:12:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:12:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:12:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 14:12:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:12:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:12:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:12:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:12:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:12:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:12:23.409783  543705 memory.go:184] no items to output this cycle
I0321 14:12:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 14:12:29.869675  543705 disk_info.go:125] begin check local disk info of client
I0321 14:12:29.872139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:12:29.872145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2800 0xc0002a2840]
E0321 14:12:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:12:33.409778  543705 memory.go:184] no items to output this cycle
I0321 14:12:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 14:12:38.957732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:12:38.957738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:12:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:12:43.410713  543705 memory.go:191] Add success.
I0321 14:12:43.409810  543705 cpu.go:282] Add success.
I0321 14:12:43.420618  543705 net.go:648] Add success.
I0321 14:12:43.423032  543705 net.go:770] primary dev: ETH0
I0321 14:12:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:12:43.423057  543705 net.go:698] Add success.
I0321 14:12:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:12:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:12:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:12:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:12:53.409765  543705 memory.go:184] no items to output this cycle
I0321 14:12:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 14:13:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:13:03.409785  543705 memory.go:184] no items to output this cycle
I0321 14:13:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 14:13:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:13:13.409809  543705 memory.go:191] Add success.
I0321 14:13:13.409815  543705 cpu.go:282] Add success.
W0321 14:13:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:13:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:13:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:13:13.420099  543705 net.go:648] Add success.
I0321 14:13:13.422873  543705 net.go:770] primary dev: ETH0
I0321 14:13:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:13:13.422899  543705 net.go:698] Add success.
I0321 14:13:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:13:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:13:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 14:13:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:13:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 14:13:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:13:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:13:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:13:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:13:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:13:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:13:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:13:23.409779  543705 memory.go:184] no items to output this cycle
I0321 14:13:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 14:13:29.873672  543705 disk_info.go:125] begin check local disk info of client
I0321 14:13:29.876163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:13:29.876172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253180 0xc0004fc000]
E0321 14:13:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:13:33.409760  543705 memory.go:184] no items to output this cycle
I0321 14:13:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 14:13:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:13:43.409785  543705 memory.go:191] Add success.
I0321 14:13:43.409788  543705 cpu.go:282] Add success.
I0321 14:13:43.420125  543705 net.go:648] Add success.
I0321 14:13:43.423190  543705 net.go:770] primary dev: ETH0
I0321 14:13:43.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:13:43.423215  543705 net.go:698] Add success.
I0321 14:13:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:13:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:13:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:13:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:13:53.409795  543705 memory.go:184] no items to output this cycle
I0321 14:13:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 14:14:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:14:03.409811  543705 memory.go:184] no items to output this cycle
I0321 14:14:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 14:14:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:14:13.409787  543705 memory.go:191] Add success.
I0321 14:14:13.409804  543705 cpu.go:282] Add success.
W0321 14:14:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:14:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:14:13.420151  543705 net.go:648] Add success.
I0321 14:14:13.422771  543705 net.go:770] primary dev: ETH0
I0321 14:14:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:14:13.422797  543705 net.go:698] Add success.
I0321 14:14:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:14:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:14:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 14:14:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:14:14.456485  543705 disk_worker.go:494] system disk:vda1
I0321 14:14:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:14:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:14:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:14:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:14:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:14:23.409777  543705 memory.go:184] no items to output this cycle
I0321 14:14:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 14:14:29.877674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:14:29.880161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:14:29.880168  543705 disk_info.go:196] parse disk info done, disk is : [0xc000217a00 0xc000217a40]
E0321 14:14:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:14:33.409803  543705 memory.go:184] no items to output this cycle
I0321 14:14:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 14:14:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:14:43.409779  543705 memory.go:191] Add success.
I0321 14:14:43.409798  543705 cpu.go:282] Add success.
I0321 14:14:43.419876  543705 net.go:648] Add success.
I0321 14:14:43.422767  543705 net.go:770] primary dev: ETH0
I0321 14:14:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:14:43.422795  543705 net.go:698] Add success.
I0321 14:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:14:53.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:14:53.410423  543705 memory.go:184] no items to output this cycle
I0321 14:14:53.410486  543705 cpu.go:275] no items to output this cycle
E0321 14:15:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:15:03.409781  543705 memory.go:184] no items to output this cycle
I0321 14:15:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 14:15:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:15:13.409817  543705 memory.go:191] Add success.
I0321 14:15:13.409818  543705 cpu.go:282] Add success.
W0321 14:15:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:15:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:15:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:15:13.420151  543705 net.go:648] Add success.
I0321 14:15:13.422872  543705 net.go:770] primary dev: ETH0
I0321 14:15:13.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:15:13.422897  543705 net.go:698] Add success.
I0321 14:15:13.505744  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4eb85a9f-dc62-43bd-b130-fa166a08def0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:15:13.505776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:15:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:15:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 14:15:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:15:14.456723  543705 disk_worker.go:494] system disk:vda1
I0321 14:15:14.456754  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:15:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:15:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:15:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:15:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:15:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:15:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 14:15:23.409792  543705 memory.go:184] no items to output this cycle
I0321 14:15:29.881676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:15:29.884161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:15:29.884167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa100 0xc0001aa140]
E0321 14:15:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:15:33.409804  543705 memory.go:184] no items to output this cycle
I0321 14:15:33.409816  543705 cpu.go:275] no items to output this cycle
I0321 14:15:38.960494  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:15:38.960500  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:15:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:15:43.410766  543705 memory.go:191] Add success.
I0321 14:15:43.409790  543705 cpu.go:282] Add success.
I0321 14:15:43.420524  543705 net.go:648] Add success.
I0321 14:15:43.423698  543705 net.go:770] primary dev: ETH0
I0321 14:15:43.423711  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:15:43.423723  543705 net.go:698] Add success.
I0321 14:15:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:15:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:15:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:15:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:15:53.409804  543705 memory.go:184] no items to output this cycle
I0321 14:15:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 14:16:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:16:03.409817  543705 memory.go:184] no items to output this cycle
I0321 14:16:03.409829  543705 cpu.go:275] no items to output this cycle
E0321 14:16:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:16:13.409781  543705 memory.go:191] Add success.
I0321 14:16:13.409800  543705 cpu.go:282] Add success.
W0321 14:16:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:16:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:16:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:16:13.420250  543705 net.go:648] Add success.
I0321 14:16:13.422836  543705 net.go:770] primary dev: ETH0
I0321 14:16:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:16:13.422860  543705 net.go:698] Add success.
I0321 14:16:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:16:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:16:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 14:16:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:16:14.456504  543705 disk_worker.go:494] system disk:vda1
I0321 14:16:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:16:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:16:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:16:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:16:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:16:23.409797  543705 memory.go:184] no items to output this cycle
I0321 14:16:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 14:16:29.885675  543705 disk_info.go:125] begin check local disk info of client
I0321 14:16:29.888166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:16:29.888174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2440 0xc0002a2480]
E0321 14:16:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:16:33.409800  543705 memory.go:184] no items to output this cycle
I0321 14:16:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 14:16:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:16:43.409815  543705 memory.go:191] Add success.
I0321 14:16:43.409827  543705 cpu.go:282] Add success.
I0321 14:16:43.419681  543705 net.go:770] primary dev: ETH0
I0321 14:16:43.419698  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:16:43.419712  543705 net.go:698] Add success.
I0321 14:16:43.420252  543705 net.go:648] Add success.
I0321 14:16:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:16:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:16:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:16:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:16:53.409815  543705 memory.go:184] no items to output this cycle
I0321 14:16:53.409825  543705 cpu.go:275] no items to output this cycle
E0321 14:17:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:17:03.409817  543705 memory.go:184] no items to output this cycle
I0321 14:17:03.409837  543705 cpu.go:275] no items to output this cycle
E0321 14:17:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:17:13.409793  543705 memory.go:191] Add success.
W0321 14:17:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:17:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:17:13.409830  543705 cpu.go:282] Add success.
I0321 14:17:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:17:13.420143  543705 net.go:648] Add success.
I0321 14:17:13.422979  543705 net.go:770] primary dev: ETH0
I0321 14:17:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:17:13.423003  543705 net.go:698] Add success.
I0321 14:17:13.453600  543705 event_worker.go:152] Polling the log file for events...
W0321 14:17:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:17:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 14:17:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:17:14.456918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:17:14.456927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:17:14.456933  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:17:14.457004  543705 disk_worker.go:494] system disk:vda1
I0321 14:17:14.457047  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:17:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:17:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:17:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:17:16.457996  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:17:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:17:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:17:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:17:23.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:17:23.410267  543705 memory.go:184] no items to output this cycle
I0321 14:17:23.410279  543705 cpu.go:275] no items to output this cycle
I0321 14:17:29.889897  543705 disk_info.go:125] begin check local disk info of client
I0321 14:17:29.892454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:17:29.892461  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366bc0 0xc000366c00]
E0321 14:17:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:17:33.409782  543705 memory.go:184] no items to output this cycle
I0321 14:17:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 14:17:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:17:43.409803  543705 memory.go:191] Add success.
I0321 14:17:43.409808  543705 cpu.go:282] Add success.
I0321 14:17:43.419844  543705 net.go:648] Add success.
I0321 14:17:43.422838  543705 net.go:770] primary dev: ETH0
I0321 14:17:43.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:17:43.422863  543705 net.go:698] Add success.
I0321 14:17:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:17:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:17:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:17:53.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:17:53.409880  543705 memory.go:184] no items to output this cycle
I0321 14:17:53.410006  543705 cpu.go:275] no items to output this cycle
E0321 14:18:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:18:03.409790  543705 memory.go:184] no items to output this cycle
I0321 14:18:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 14:18:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:18:13.409788  543705 cpu.go:282] Add success.
I0321 14:18:13.409793  543705 memory.go:191] Add success.
W0321 14:18:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:18:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:18:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:18:13.420084  543705 net.go:648] Add success.
I0321 14:18:13.422784  543705 net.go:770] primary dev: ETH0
I0321 14:18:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:18:13.422811  543705 net.go:698] Add success.
I0321 14:18:13.463127  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"96a3f8ec-d6f4-4c58-aaf4-70e513f4d238","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:18:13.463161  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:18:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:18:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:18:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 14:18:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:18:14.456514  543705 disk_worker.go:494] system disk:vda1
I0321 14:18:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:18:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:18:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:18:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:18:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:18:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:18:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:18:23.409777  543705 memory.go:184] no items to output this cycle
I0321 14:18:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 14:18:29.894113  543705 disk_info.go:125] begin check local disk info of client
I0321 14:18:29.896627  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:18:29.896633  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2fc0 0xc0002a3000]
E0321 14:18:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:18:33.409788  543705 memory.go:184] no items to output this cycle
I0321 14:18:33.409795  543705 cpu.go:275] no items to output this cycle
I0321 14:18:38.961741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:18:38.961747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:18:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:18:43.410733  543705 memory.go:191] Add success.
I0321 14:18:43.409819  543705 cpu.go:282] Add success.
I0321 14:18:43.420458  543705 net.go:648] Add success.
I0321 14:18:43.423354  543705 net.go:770] primary dev: ETH0
I0321 14:18:43.423369  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:18:43.423384  543705 net.go:698] Add success.
I0321 14:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:18:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:18:53.410455  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:18:53.410488  543705 memory.go:184] no items to output this cycle
I0321 14:18:53.410518  543705 cpu.go:275] no items to output this cycle
E0321 14:19:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:19:03.409819  543705 memory.go:184] no items to output this cycle
I0321 14:19:03.409833  543705 cpu.go:275] no items to output this cycle
E0321 14:19:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:19:13.409783  543705 memory.go:191] Add success.
I0321 14:19:13.409806  543705 cpu.go:282] Add success.
W0321 14:19:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:19:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:19:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:19:13.420159  543705 net.go:648] Add success.
I0321 14:19:13.422931  543705 net.go:770] primary dev: ETH0
I0321 14:19:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:19:13.422971  543705 net.go:698] Add success.
I0321 14:19:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:19:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:19:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 14:19:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:19:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 14:19:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:19:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:19:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:19:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:19:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:19:16.472502  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:19:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:19:23.409809  543705 memory.go:184] no items to output this cycle
I0321 14:19:23.409825  543705 cpu.go:275] no items to output this cycle
I0321 14:19:29.897675  543705 disk_info.go:125] begin check local disk info of client
I0321 14:19:29.900188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:19:29.900194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4940 0xc0000c4980]
E0321 14:19:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:19:33.409804  543705 memory.go:184] no items to output this cycle
I0321 14:19:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 14:19:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:19:43.409781  543705 memory.go:191] Add success.
I0321 14:19:43.409800  543705 cpu.go:282] Add success.
I0321 14:19:43.419876  543705 net.go:648] Add success.
I0321 14:19:43.422720  543705 net.go:770] primary dev: ETH0
I0321 14:19:43.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:19:43.422746  543705 net.go:698] Add success.
I0321 14:19:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:19:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:19:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:19:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:19:53.409777  543705 memory.go:184] no items to output this cycle
I0321 14:19:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 14:20:03.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:20:03.409924  543705 memory.go:184] no items to output this cycle
I0321 14:20:03.409985  543705 cpu.go:275] no items to output this cycle
E0321 14:20:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:20:13.409787  543705 memory.go:191] Add success.
W0321 14:20:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:20:13.409821  543705 cpu.go:282] Add success.
W0321 14:20:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:20:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:20:13.420171  543705 net.go:648] Add success.
I0321 14:20:13.423096  543705 net.go:770] primary dev: ETH0
I0321 14:20:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:20:13.423122  543705 net.go:698] Add success.
I0321 14:20:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:20:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:20:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 14:20:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:20:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 14:20:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:20:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:20:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:20:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:20:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:20:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:20:23.409781  543705 memory.go:184] no items to output this cycle
I0321 14:20:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 14:20:29.901673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:20:29.904229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:20:29.904236  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e5c0 0xc00028e600]
E0321 14:20:33.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:20:33.409758  543705 memory.go:184] no items to output this cycle
I0321 14:20:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 14:20:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:20:43.409793  543705 memory.go:191] Add success.
I0321 14:20:43.409807  543705 cpu.go:282] Add success.
I0321 14:20:43.419881  543705 net.go:648] Add success.
I0321 14:20:43.422934  543705 net.go:770] primary dev: ETH0
I0321 14:20:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:20:43.422960  543705 net.go:698] Add success.
I0321 14:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:20:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:20:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:20:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:20:53.409775  543705 memory.go:184] no items to output this cycle
I0321 14:20:53.409893  543705 cpu.go:275] no items to output this cycle
E0321 14:21:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:21:03.409790  543705 memory.go:184] no items to output this cycle
I0321 14:21:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 14:21:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:21:13.409810  543705 memory.go:191] Add success.
I0321 14:21:13.409820  543705 cpu.go:282] Add success.
W0321 14:21:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:21:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:21:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:21:13.420287  543705 net.go:648] Add success.
I0321 14:21:13.423080  543705 net.go:770] primary dev: ETH0
I0321 14:21:13.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:21:13.423121  543705 net.go:698] Add success.
I0321 14:21:13.463733  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c70ddf20-4766-4df7-ae81-3dd30ccadeb4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:21:13.463768  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:21:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:21:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:21:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0321 14:21:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:21:14.457055  543705 disk_worker.go:494] system disk:vda1
I0321 14:21:14.457101  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:21:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:21:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:21:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:21:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:21:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:21:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:21:23.409778  543705 memory.go:184] no items to output this cycle
I0321 14:21:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 14:21:29.905676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:21:29.908159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:21:29.908166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003122c0 0xc000312300]
E0321 14:21:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:21:33.409769  543705 memory.go:184] no items to output this cycle
I0321 14:21:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 14:21:38.964506  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:21:38.964513  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:21:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:21:43.410731  543705 memory.go:191] Add success.
I0321 14:21:43.409808  543705 cpu.go:282] Add success.
I0321 14:21:43.420542  543705 net.go:648] Add success.
I0321 14:21:43.423401  543705 net.go:770] primary dev: ETH0
I0321 14:21:43.423414  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:21:43.423426  543705 net.go:698] Add success.
I0321 14:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:21:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:21:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:21:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:21:53.409799  543705 memory.go:184] no items to output this cycle
I0321 14:21:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 14:22:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:22:03.409785  543705 memory.go:184] no items to output this cycle
I0321 14:22:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 14:22:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:22:13.409790  543705 cpu.go:282] Add success.
I0321 14:22:13.409799  543705 memory.go:191] Add success.
W0321 14:22:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:22:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:22:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:22:13.420110  543705 net.go:648] Add success.
I0321 14:22:13.422731  543705 net.go:770] primary dev: ETH0
I0321 14:22:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:22:13.422757  543705 net.go:698] Add success.
W0321 14:22:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:22:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0321 14:22:14.455152  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:22:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:22:14.456952  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:22:14.456958  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:22:14.457013  543705 disk_worker.go:494] system disk:vda1
I0321 14:22:14.457044  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:22:15.456846  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:22:15.456855  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:22:16.458043  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:22:16.458051  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:22:16.458098  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:22:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:22:16.472487  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:22:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:22:23.409765  543705 memory.go:184] no items to output this cycle
I0321 14:22:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 14:22:29.909676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:22:29.912168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:22:29.912174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b380 0xc00007b3c0]
E0321 14:22:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:22:33.409787  543705 memory.go:184] no items to output this cycle
I0321 14:22:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 14:22:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:22:43.409773  543705 memory.go:191] Add success.
I0321 14:22:43.409815  543705 cpu.go:282] Add success.
I0321 14:22:43.419753  543705 net.go:648] Add success.
I0321 14:22:43.422851  543705 net.go:770] primary dev: ETH0
I0321 14:22:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:22:43.422879  543705 net.go:698] Add success.
I0321 14:22:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:22:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:22:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:22:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:22:53.409784  543705 memory.go:184] no items to output this cycle
I0321 14:22:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 14:23:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:23:03.409789  543705 memory.go:184] no items to output this cycle
I0321 14:23:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 14:23:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:23:13.409780  543705 memory.go:191] Add success.
W0321 14:23:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:23:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:23:13.409818  543705 cpu.go:282] Add success.
I0321 14:23:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:23:13.420187  543705 net.go:648] Add success.
I0321 14:23:13.422627  543705 net.go:770] primary dev: ETH0
I0321 14:23:13.422640  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:23:13.422651  543705 net.go:698] Add success.
I0321 14:23:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:23:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:23:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 14:23:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:23:14.456505  543705 disk_worker.go:494] system disk:vda1
I0321 14:23:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:23:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:23:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:23:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:23:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:23:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:23:23.409796  543705 memory.go:184] no items to output this cycle
I0321 14:23:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 14:23:29.913689  543705 disk_info.go:125] begin check local disk info of client
I0321 14:23:29.916145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:23:29.916151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8d40 0xc0002b8d80]
E0321 14:23:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:23:33.409787  543705 memory.go:184] no items to output this cycle
I0321 14:23:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 14:23:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:23:43.409895  543705 cpu.go:282] Add success.
I0321 14:23:43.409908  543705 memory.go:191] Add success.
I0321 14:23:43.419764  543705 net.go:648] Add success.
I0321 14:23:43.422540  543705 net.go:770] primary dev: ETH0
I0321 14:23:43.422554  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:23:43.422567  543705 net.go:698] Add success.
I0321 14:23:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:23:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:23:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:23:53.410231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:23:53.410247  543705 memory.go:184] no items to output this cycle
I0321 14:23:53.410280  543705 cpu.go:275] no items to output this cycle
E0321 14:24:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:24:03.409786  543705 memory.go:184] no items to output this cycle
I0321 14:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 14:24:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:24:13.409842  543705 memory.go:191] Add success.
I0321 14:24:13.409853  543705 cpu.go:282] Add success.
W0321 14:24:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:24:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:24:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:24:13.420257  543705 net.go:648] Add success.
I0321 14:24:13.423472  543705 net.go:770] primary dev: ETH0
I0321 14:24:13.423487  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:24:13.423501  543705 net.go:698] Add success.
I0321 14:24:13.469879  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"37ec70e6-0688-43f5-a6e3-89bbe0dfc9f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:24:13.469914  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:24:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:24:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:24:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 14:24:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:24:14.456528  543705 disk_worker.go:494] system disk:vda1
I0321 14:24:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:24:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:24:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:24:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:24:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:24:16.472485  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:24:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:24:23.409764  543705 memory.go:184] no items to output this cycle
I0321 14:24:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 14:24:29.917674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:24:29.920214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:24:29.920220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000497b40 0xc000497b80]
E0321 14:24:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:24:33.409790  543705 memory.go:184] no items to output this cycle
I0321 14:24:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 14:24:38.965735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:24:38.965741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:24:43.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:24:43.410836  543705 memory.go:191] Add success.
I0321 14:24:43.409994  543705 cpu.go:282] Add success.
I0321 14:24:43.419709  543705 net.go:648] Add success.
I0321 14:24:43.422765  543705 net.go:770] primary dev: ETH0
I0321 14:24:43.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:24:43.422790  543705 net.go:698] Add success.
I0321 14:24:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:24:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:24:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:24:53.410742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:24:53.410756  543705 memory.go:184] no items to output this cycle
I0321 14:24:53.410775  543705 cpu.go:275] no items to output this cycle
E0321 14:25:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:25:03.409784  543705 memory.go:184] no items to output this cycle
I0321 14:25:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 14:25:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:25:13.409795  543705 memory.go:191] Add success.
W0321 14:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:25:13.409828  543705 cpu.go:282] Add success.
W0321 14:25:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:25:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:25:13.420222  543705 net.go:648] Add success.
I0321 14:25:13.423356  543705 net.go:770] primary dev: ETH0
I0321 14:25:13.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:25:13.423391  543705 net.go:698] Add success.
I0321 14:25:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:25:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:25:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 14:25:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:25:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 14:25:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:25:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:25:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:25:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:25:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:25:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:25:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:25:23.409784  543705 memory.go:184] no items to output this cycle
I0321 14:25:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 14:25:29.921672  543705 disk_info.go:125] begin check local disk info of client
I0321 14:25:29.924189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:25:29.924195  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac80 0xc00007acc0]
E0321 14:25:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:25:33.409804  543705 memory.go:184] no items to output this cycle
I0321 14:25:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 14:25:43.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:25:43.409906  543705 memory.go:191] Add success.
I0321 14:25:43.409951  543705 cpu.go:282] Add success.
I0321 14:25:43.419730  543705 net.go:648] Add success.
I0321 14:25:43.422850  543705 net.go:770] primary dev: ETH0
I0321 14:25:43.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:25:43.422890  543705 net.go:698] Add success.
I0321 14:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:25:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:25:53.410261  543705 memory.go:184] no items to output this cycle
I0321 14:25:53.410267  543705 cpu.go:275] no items to output this cycle
E0321 14:26:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:26:03.409806  543705 memory.go:184] no items to output this cycle
I0321 14:26:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 14:26:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:26:13.409789  543705 memory.go:191] Add success.
I0321 14:26:13.409814  543705 cpu.go:282] Add success.
W0321 14:26:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:26:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:26:13.420269  543705 net.go:648] Add success.
I0321 14:26:13.423003  543705 net.go:770] primary dev: ETH0
I0321 14:26:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:26:13.423028  543705 net.go:698] Add success.
I0321 14:26:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:26:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:26:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 14:26:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:26:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 14:26:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:26:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:26:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:26:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:26:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:26:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:26:23.409779  543705 memory.go:184] no items to output this cycle
I0321 14:26:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 14:26:29.925674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:26:29.928157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:26:29.928163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329200 0xc000329240]
E0321 14:26:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:26:33.409787  543705 memory.go:184] no items to output this cycle
I0321 14:26:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 14:26:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:26:43.409805  543705 memory.go:191] Add success.
I0321 14:26:43.409813  543705 cpu.go:282] Add success.
I0321 14:26:43.419737  543705 net.go:648] Add success.
I0321 14:26:43.422604  543705 net.go:770] primary dev: ETH0
I0321 14:26:43.422619  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:26:43.422633  543705 net.go:698] Add success.
I0321 14:26:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:26:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:26:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:26:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:26:53.409800  543705 memory.go:184] no items to output this cycle
I0321 14:26:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 14:27:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:27:03.409778  543705 memory.go:184] no items to output this cycle
I0321 14:27:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 14:27:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:27:13.409787  543705 memory.go:191] Add success.
I0321 14:27:13.409805  543705 cpu.go:282] Add success.
W0321 14:27:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:27:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:27:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:27:13.420190  543705 net.go:648] Add success.
I0321 14:27:13.423123  543705 net.go:770] primary dev: ETH0
I0321 14:27:13.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:27:13.423149  543705 net.go:698] Add success.
I0321 14:27:13.429806  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 14:27:13.452991  543705 event_worker.go:152] Polling the log file for events...
I0321 14:27:13.470089  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"97da5b91-ccb7-4bdc-b267-766fe0158b47","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:27:13.470122  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 14:27:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:27:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 14:27:14.455208  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:27:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:27:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:27:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:27:14.456779  543705 disk_worker.go:494] system disk:vda1
I0321 14:27:14.456815  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:27:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:27:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:27:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:27:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:27:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:27:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:27:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:27:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:27:23.409792  543705 memory.go:184] no items to output this cycle
I0321 14:27:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 14:27:29.929673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:27:29.932136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:27:29.932142  543705 disk_info.go:196] parse disk info done, disk is : [0xc000497a80 0xc000497ac0]
E0321 14:27:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:27:33.409786  543705 memory.go:184] no items to output this cycle
I0321 14:27:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 14:27:38.965884  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:27:38.965890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:27:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:27:43.410802  543705 memory.go:191] Add success.
I0321 14:27:43.409811  543705 cpu.go:282] Add success.
I0321 14:27:43.420508  543705 net.go:648] Add success.
I0321 14:27:43.423521  543705 net.go:770] primary dev: ETH0
I0321 14:27:43.423542  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:27:43.423558  543705 net.go:698] Add success.
I0321 14:27:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:27:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:27:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:27:53.409779  543705 memory.go:184] no items to output this cycle
I0321 14:27:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 14:28:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:28:03.409792  543705 memory.go:184] no items to output this cycle
I0321 14:28:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 14:28:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:28:13.409795  543705 memory.go:191] Add success.
I0321 14:28:13.409796  543705 cpu.go:282] Add success.
W0321 14:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:28:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:28:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:28:13.420089  543705 net.go:648] Add success.
I0321 14:28:13.423043  543705 net.go:770] primary dev: ETH0
I0321 14:28:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:28:13.423071  543705 net.go:698] Add success.
I0321 14:28:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:28:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:28:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 14:28:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:28:14.456519  543705 disk_worker.go:494] system disk:vda1
I0321 14:28:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:28:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:28:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:28:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:28:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:28:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:28:23.409809  543705 memory.go:184] no items to output this cycle
I0321 14:28:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 14:28:29.933673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:28:29.936153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:28:29.936159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002672c0 0xc000267300]
E0321 14:28:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:28:33.409795  543705 memory.go:184] no items to output this cycle
I0321 14:28:33.409904  543705 cpu.go:275] no items to output this cycle
E0321 14:28:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:28:43.409793  543705 memory.go:191] Add success.
I0321 14:28:43.409800  543705 cpu.go:282] Add success.
I0321 14:28:43.419860  543705 net.go:648] Add success.
I0321 14:28:43.423677  543705 net.go:770] primary dev: ETH0
I0321 14:28:43.423693  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:28:43.423707  543705 net.go:698] Add success.
I0321 14:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:28:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:28:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:28:53.409781  543705 memory.go:184] no items to output this cycle
I0321 14:28:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 14:29:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:29:03.409786  543705 memory.go:184] no items to output this cycle
I0321 14:29:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 14:29:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:29:13.409808  543705 memory.go:191] Add success.
I0321 14:29:13.409809  543705 cpu.go:282] Add success.
W0321 14:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:29:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:29:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:29:13.420101  543705 net.go:648] Add success.
I0321 14:29:13.423215  543705 net.go:770] primary dev: ETH0
I0321 14:29:13.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:29:13.423240  543705 net.go:698] Add success.
I0321 14:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:29:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:29:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 14:29:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:29:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 14:29:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:29:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:29:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:29:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:29:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:29:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 14:29:23.409798  543705 memory.go:184] no items to output this cycle
I0321 14:29:29.937676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:29:29.940147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:29:29.940153  543705 disk_info.go:196] parse disk info done, disk is : [0xc000565700 0xc000565740]
E0321 14:29:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:29:33.409811  543705 memory.go:184] no items to output this cycle
I0321 14:29:33.409827  543705 cpu.go:275] no items to output this cycle
E0321 14:29:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:29:43.409788  543705 memory.go:191] Add success.
I0321 14:29:43.409813  543705 cpu.go:282] Add success.
I0321 14:29:43.420001  543705 net.go:648] Add success.
I0321 14:29:43.422871  543705 net.go:770] primary dev: ETH0
I0321 14:29:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:29:43.422897  543705 net.go:698] Add success.
I0321 14:29:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:29:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:29:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:29:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:29:53.409783  543705 cpu.go:275] no items to output this cycle
I0321 14:29:53.409794  543705 memory.go:184] no items to output this cycle
E0321 14:30:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:30:03.409816  543705 memory.go:184] no items to output this cycle
I0321 14:30:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 14:30:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:30:13.409809  543705 memory.go:191] Add success.
I0321 14:30:13.409809  543705 cpu.go:282] Add success.
W0321 14:30:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:30:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:30:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:30:13.420231  543705 net.go:648] Add success.
I0321 14:30:13.423238  543705 net.go:770] primary dev: ETH0
I0321 14:30:13.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:30:13.423267  543705 net.go:698] Add success.
I0321 14:30:13.468384  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf99f19c-b5e1-4c82-b1d3-9383630f10c5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:30:13.468418  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:30:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:30:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:30:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 14:30:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:30:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 14:30:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:30:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:30:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:30:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:30:16.472485  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:30:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:30:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 14:30:23.409787  543705 memory.go:184] no items to output this cycle
I0321 14:30:29.941673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:30:29.944223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:30:29.944229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353a40 0xc000353a80]
E0321 14:30:33.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:30:33.409875  543705 cpu.go:275] no items to output this cycle
I0321 14:30:33.409883  543705 memory.go:184] no items to output this cycle
I0321 14:30:38.968542  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:30:38.968549  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:30:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:30:43.410752  543705 memory.go:191] Add success.
I0321 14:30:43.409789  543705 cpu.go:282] Add success.
I0321 14:30:43.420534  543705 net.go:648] Add success.
I0321 14:30:43.423704  543705 net.go:770] primary dev: ETH0
I0321 14:30:43.423718  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:30:43.423731  543705 net.go:698] Add success.
I0321 14:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:30:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:30:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:30:53.409770  543705 memory.go:184] no items to output this cycle
I0321 14:30:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 14:31:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:31:03.409793  543705 memory.go:184] no items to output this cycle
I0321 14:31:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 14:31:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:31:13.409794  543705 memory.go:191] Add success.
I0321 14:31:13.409796  543705 cpu.go:282] Add success.
W0321 14:31:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:31:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:31:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:31:13.420081  543705 net.go:648] Add success.
I0321 14:31:13.423085  543705 net.go:770] primary dev: ETH0
I0321 14:31:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:31:13.423113  543705 net.go:698] Add success.
I0321 14:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:31:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:31:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 14:31:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:31:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 14:31:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:31:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:31:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:31:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:31:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:31:23.409782  543705 memory.go:184] no items to output this cycle
I0321 14:31:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 14:31:29.945674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:31:29.948166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:31:29.948172  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a240 0xc00029a280]
E0321 14:31:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:31:33.409918  543705 memory.go:184] no items to output this cycle
I0321 14:31:33.409920  543705 cpu.go:275] no items to output this cycle
E0321 14:31:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:31:43.409773  543705 memory.go:191] Add success.
I0321 14:31:43.409802  543705 cpu.go:282] Add success.
I0321 14:31:43.419883  543705 net.go:648] Add success.
I0321 14:31:43.422756  543705 net.go:770] primary dev: ETH0
I0321 14:31:43.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:31:43.422780  543705 net.go:698] Add success.
I0321 14:31:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:31:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:31:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:31:53.409771  543705 memory.go:184] no items to output this cycle
I0321 14:31:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 14:32:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:32:03.409775  543705 memory.go:184] no items to output this cycle
I0321 14:32:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 14:32:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:32:13.409790  543705 memory.go:191] Add success.
W0321 14:32:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:32:13.409815  543705 cpu.go:282] Add success.
W0321 14:32:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:32:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:32:13.420154  543705 net.go:648] Add success.
I0321 14:32:13.422745  543705 net.go:770] primary dev: ETH0
I0321 14:32:13.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:32:13.422770  543705 net.go:698] Add success.
W0321 14:32:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:32:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 14:32:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:32:14.456757  543705 disk_worker.go:494] system disk:vda1
I0321 14:32:14.456800  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:32:14.457167  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:32:14.457175  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:32:14.457180  543705 custom_config.go:64] query custom config with name: gpu
E0321 14:32:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:32:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:32:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:32:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:32:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:32:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:32:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:32:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:32:23.409787  543705 memory.go:184] no items to output this cycle
I0321 14:32:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 14:32:29.949673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:32:29.952204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:32:29.952210  543705 disk_info.go:196] parse disk info done, disk is : [0xc000564e40 0xc000564e80]
E0321 14:32:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:32:33.409772  543705 memory.go:184] no items to output this cycle
I0321 14:32:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 14:32:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:32:43.409801  543705 memory.go:191] Add success.
I0321 14:32:43.409804  543705 cpu.go:282] Add success.
I0321 14:32:43.419899  543705 net.go:648] Add success.
I0321 14:32:43.422667  543705 net.go:770] primary dev: ETH0
I0321 14:32:43.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:32:43.422690  543705 net.go:698] Add success.
I0321 14:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:32:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:32:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:32:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:32:53.409794  543705 memory.go:184] no items to output this cycle
I0321 14:32:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 14:33:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:33:03.409815  543705 memory.go:184] no items to output this cycle
I0321 14:33:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 14:33:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:33:13.409780  543705 memory.go:191] Add success.
I0321 14:33:13.409803  543705 cpu.go:282] Add success.
W0321 14:33:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:33:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:33:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:33:13.420151  543705 net.go:648] Add success.
I0321 14:33:13.422769  543705 net.go:770] primary dev: ETH0
I0321 14:33:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:33:13.422799  543705 net.go:698] Add success.
I0321 14:33:13.507011  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d683c26-c08d-4637-a440-b7998edb8ec6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:33:13.507061  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:33:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:33:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:33:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 14:33:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:33:14.456687  543705 disk_worker.go:494] system disk:vda1
I0321 14:33:14.456716  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:33:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:33:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:33:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:33:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:33:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:33:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:33:23.409772  543705 memory.go:184] no items to output this cycle
I0321 14:33:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 14:33:29.953679  543705 disk_info.go:125] begin check local disk info of client
I0321 14:33:29.956267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:33:29.956274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ea6c0 0xc0000ea700]
E0321 14:33:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:33:33.409772  543705 memory.go:184] no items to output this cycle
I0321 14:33:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 14:33:38.969730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:33:38.969736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:33:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:33:43.410995  543705 memory.go:191] Add success.
I0321 14:33:43.409806  543705 cpu.go:282] Add success.
I0321 14:33:43.419678  543705 net.go:648] Add success.
I0321 14:33:43.422380  543705 net.go:770] primary dev: ETH0
I0321 14:33:43.422393  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:33:43.422405  543705 net.go:698] Add success.
I0321 14:33:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:33:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:33:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:33:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:33:53.409760  543705 memory.go:184] no items to output this cycle
I0321 14:33:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 14:34:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:34:03.409803  543705 memory.go:184] no items to output this cycle
I0321 14:34:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 14:34:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:34:13.409815  543705 memory.go:191] Add success.
I0321 14:34:13.409822  543705 cpu.go:282] Add success.
W0321 14:34:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:34:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:34:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:34:13.420249  543705 net.go:648] Add success.
I0321 14:34:13.422885  543705 net.go:770] primary dev: ETH0
I0321 14:34:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:34:13.422913  543705 net.go:698] Add success.
I0321 14:34:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:34:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:34:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 14:34:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:34:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 14:34:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:34:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:34:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:34:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:34:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:34:16.472487  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:34:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:34:23.409796  543705 memory.go:184] no items to output this cycle
I0321 14:34:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 14:34:29.957673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:34:29.960268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:34:29.960276  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b600 0xc00007b640]
E0321 14:34:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:34:33.409772  543705 cpu.go:275] no items to output this cycle
I0321 14:34:33.409779  543705 memory.go:184] no items to output this cycle
E0321 14:34:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:34:43.409816  543705 memory.go:191] Add success.
I0321 14:34:43.409826  543705 cpu.go:282] Add success.
I0321 14:34:43.420001  543705 net.go:648] Add success.
I0321 14:34:43.422817  543705 net.go:770] primary dev: ETH0
I0321 14:34:43.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:34:43.423015  543705 net.go:698] Add success.
I0321 14:34:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:34:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:34:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:34:53.409790  543705 memory.go:184] no items to output this cycle
I0321 14:34:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 14:35:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:35:03.409802  543705 memory.go:184] no items to output this cycle
I0321 14:35:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 14:35:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:35:13.409783  543705 memory.go:191] Add success.
I0321 14:35:13.409800  543705 cpu.go:282] Add success.
W0321 14:35:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:35:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:35:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:35:13.420141  543705 net.go:648] Add success.
I0321 14:35:13.422763  543705 net.go:770] primary dev: ETH0
I0321 14:35:13.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:35:13.422793  543705 net.go:698] Add success.
I0321 14:35:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:35:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:35:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 14:35:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:35:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 14:35:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:35:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:35:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:35:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:35:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:35:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:35:23.409766  543705 memory.go:184] no items to output this cycle
I0321 14:35:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 14:35:29.961671  543705 disk_info.go:125] begin check local disk info of client
I0321 14:35:29.964128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:35:29.964134  543705 disk_info.go:196] parse disk info done, disk is : [0xc000497480 0xc0004974c0]
E0321 14:35:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:35:33.409766  543705 memory.go:184] no items to output this cycle
I0321 14:35:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 14:35:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:35:43.409812  543705 memory.go:191] Add success.
I0321 14:35:43.409816  543705 cpu.go:282] Add success.
I0321 14:35:43.419895  543705 net.go:648] Add success.
I0321 14:35:43.422416  543705 net.go:770] primary dev: ETH0
I0321 14:35:43.422430  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:35:43.422441  543705 net.go:698] Add success.
I0321 14:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:35:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:35:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:35:53.409766  543705 memory.go:184] no items to output this cycle
I0321 14:35:53.409894  543705 cpu.go:275] no items to output this cycle
E0321 14:36:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:36:03.409771  543705 memory.go:184] no items to output this cycle
I0321 14:36:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 14:36:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:36:13.409788  543705 memory.go:191] Add success.
I0321 14:36:13.409806  543705 cpu.go:282] Add success.
W0321 14:36:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:36:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:36:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:36:13.420143  543705 net.go:648] Add success.
I0321 14:36:13.423363  543705 net.go:770] primary dev: ETH0
I0321 14:36:13.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:36:13.423389  543705 net.go:698] Add success.
I0321 14:36:13.463232  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fa406de-33cc-4136-979f-bbf826d0d5d2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:36:13.463264  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:36:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:36:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:36:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 14:36:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:36:14.456618  543705 disk_worker.go:494] system disk:vda1
I0321 14:36:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:36:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:36:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:36:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:36:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:36:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:36:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:36:23.409796  543705 memory.go:184] no items to output this cycle
I0321 14:36:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 14:36:29.965672  543705 disk_info.go:125] begin check local disk info of client
I0321 14:36:29.968214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:36:29.968220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000158700 0xc000158740]
E0321 14:36:33.410234  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:36:33.410250  543705 memory.go:184] no items to output this cycle
I0321 14:36:33.410301  543705 cpu.go:275] no items to output this cycle
I0321 14:36:38.971714  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:36:38.971721  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:36:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:36:43.410644  543705 memory.go:191] Add success.
I0321 14:36:43.409808  543705 cpu.go:282] Add success.
I0321 14:36:43.420378  543705 net.go:648] Add success.
I0321 14:36:43.423082  543705 net.go:770] primary dev: ETH0
I0321 14:36:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:36:43.423107  543705 net.go:698] Add success.
I0321 14:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:36:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:36:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:36:53.409765  543705 memory.go:184] no items to output this cycle
I0321 14:36:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 14:37:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:37:03.409785  543705 memory.go:184] no items to output this cycle
I0321 14:37:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 14:37:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:37:13.409792  543705 memory.go:191] Add success.
I0321 14:37:13.409802  543705 cpu.go:282] Add success.
W0321 14:37:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:37:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:37:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:37:13.420112  543705 net.go:648] Add success.
I0321 14:37:13.422602  543705 net.go:770] primary dev: ETH0
I0321 14:37:13.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:37:13.422627  543705 net.go:698] Add success.
I0321 14:37:13.453178  543705 event_worker.go:152] Polling the log file for events...
W0321 14:37:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:37:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 14:37:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:37:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:37:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:37:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:37:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 14:37:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:37:15.456874  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:37:15.456883  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:37:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:37:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:37:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:37:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:37:16.472290  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:37:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:37:23.409776  543705 memory.go:184] no items to output this cycle
I0321 14:37:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 14:37:29.969674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:37:29.972223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:37:29.972229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000158380 0xc0001583c0]
E0321 14:37:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:37:33.409796  543705 memory.go:184] no items to output this cycle
I0321 14:37:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 14:37:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:37:43.409819  543705 memory.go:191] Add success.
I0321 14:37:43.409823  543705 cpu.go:282] Add success.
I0321 14:37:43.419844  543705 net.go:648] Add success.
I0321 14:37:43.422515  543705 net.go:770] primary dev: ETH0
I0321 14:37:43.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:37:43.422540  543705 net.go:698] Add success.
I0321 14:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:37:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:37:53.409801  543705 memory.go:184] no items to output this cycle
I0321 14:37:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 14:38:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:38:03.409817  543705 memory.go:184] no items to output this cycle
I0321 14:38:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 14:38:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:38:13.409824  543705 memory.go:191] Add success.
I0321 14:38:13.409832  543705 cpu.go:282] Add success.
W0321 14:38:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:38:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:38:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:38:13.420102  543705 net.go:648] Add success.
I0321 14:38:13.423072  543705 net.go:770] primary dev: ETH0
I0321 14:38:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:38:13.423097  543705 net.go:698] Add success.
I0321 14:38:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:38:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:38:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 14:38:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:38:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 14:38:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:38:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:38:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:38:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:38:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:38:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:38:23.409768  543705 memory.go:184] no items to output this cycle
I0321 14:38:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 14:38:29.973676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:38:29.976173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:38:29.976179  543705 disk_info.go:196] parse disk info done, disk is : [0xc000496340 0xc000496380]
E0321 14:38:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:38:33.409808  543705 memory.go:184] no items to output this cycle
I0321 14:38:33.409833  543705 cpu.go:275] no items to output this cycle
E0321 14:38:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:38:43.409818  543705 memory.go:191] Add success.
I0321 14:38:43.409827  543705 cpu.go:282] Add success.
I0321 14:38:43.420033  543705 net.go:648] Add success.
I0321 14:38:43.422859  543705 net.go:770] primary dev: ETH0
I0321 14:38:43.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:38:43.422884  543705 net.go:698] Add success.
I0321 14:38:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:38:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:38:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:38:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:38:53.409770  543705 memory.go:184] no items to output this cycle
I0321 14:38:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 14:39:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:39:03.409791  543705 memory.go:184] no items to output this cycle
I0321 14:39:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 14:39:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:39:13.409808  543705 memory.go:191] Add success.
I0321 14:39:13.409811  543705 cpu.go:282] Add success.
W0321 14:39:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:39:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:39:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:39:13.420426  543705 net.go:648] Add success.
I0321 14:39:13.423157  543705 net.go:770] primary dev: ETH0
I0321 14:39:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:39:13.423186  543705 net.go:698] Add success.
I0321 14:39:13.663508  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"510aa503-86e7-4d38-9433-a7263d6ddb11","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:39:13.663540  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:39:14.453977  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:39:14.454212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:39:14.454225  543705 disk_worker.go:708] disk space is not compliant
W0321 14:39:14.454229  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:39:14.455810  543705 disk_worker.go:494] system disk:vda1
I0321 14:39:14.455839  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:39:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:39:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:39:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:39:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:39:23.409773  543705 memory.go:184] no items to output this cycle
I0321 14:39:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 14:39:29.977671  543705 disk_info.go:125] begin check local disk info of client
I0321 14:39:29.980206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:39:29.980212  543705 disk_info.go:196] parse disk info done, disk is : [0xc000547480 0xc0005474c0]
E0321 14:39:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:39:33.409776  543705 memory.go:184] no items to output this cycle
I0321 14:39:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 14:39:38.973747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:39:38.973753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:39:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:39:43.410748  543705 memory.go:191] Add success.
I0321 14:39:43.409838  543705 cpu.go:282] Add success.
I0321 14:39:43.420489  543705 net.go:648] Add success.
I0321 14:39:43.423208  543705 net.go:770] primary dev: ETH0
I0321 14:39:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:39:43.423235  543705 net.go:698] Add success.
I0321 14:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:39:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:39:53.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:39:53.410394  543705 memory.go:184] no items to output this cycle
I0321 14:39:53.410407  543705 cpu.go:275] no items to output this cycle
E0321 14:40:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:40:03.409794  543705 memory.go:184] no items to output this cycle
I0321 14:40:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 14:40:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:40:13.409812  543705 memory.go:191] Add success.
I0321 14:40:13.409820  543705 cpu.go:282] Add success.
W0321 14:40:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:40:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:40:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:40:13.420249  543705 net.go:648] Add success.
I0321 14:40:13.423065  543705 net.go:770] primary dev: ETH0
I0321 14:40:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:40:13.423090  543705 net.go:698] Add success.
I0321 14:40:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:40:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:40:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 14:40:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:40:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 14:40:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:40:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:40:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:40:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:40:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:40:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:40:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:40:23.409783  543705 memory.go:184] no items to output this cycle
I0321 14:40:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 14:40:29.981679  543705 disk_info.go:125] begin check local disk info of client
I0321 14:40:29.984225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:40:29.984232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046fc80 0xc00046fcc0]
E0321 14:40:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:40:33.409792  543705 memory.go:184] no items to output this cycle
I0321 14:40:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 14:40:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:40:43.409795  543705 memory.go:191] Add success.
I0321 14:40:43.409823  543705 cpu.go:282] Add success.
I0321 14:40:43.420020  543705 net.go:648] Add success.
I0321 14:40:43.423068  543705 net.go:770] primary dev: ETH0
I0321 14:40:43.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:40:43.423096  543705 net.go:698] Add success.
I0321 14:40:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:40:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:40:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:40:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:40:53.409774  543705 memory.go:184] no items to output this cycle
I0321 14:40:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 14:41:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:41:03.409783  543705 memory.go:184] no items to output this cycle
I0321 14:41:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 14:41:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:41:13.409816  543705 memory.go:191] Add success.
I0321 14:41:13.409821  543705 cpu.go:282] Add success.
W0321 14:41:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:41:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:41:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:41:13.420127  543705 net.go:648] Add success.
I0321 14:41:13.423235  543705 net.go:770] primary dev: ETH0
I0321 14:41:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:41:13.423296  543705 net.go:698] Add success.
I0321 14:41:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:41:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:41:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 14:41:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:41:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 14:41:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:41:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:41:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:41:16.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:41:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:41:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:41:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:41:23.409776  543705 memory.go:184] no items to output this cycle
I0321 14:41:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 14:41:29.985678  543705 disk_info.go:125] begin check local disk info of client
I0321 14:41:29.988176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:41:29.988184  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
E0321 14:41:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:41:33.409793  543705 memory.go:184] no items to output this cycle
I0321 14:41:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 14:41:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:41:43.409785  543705 memory.go:191] Add success.
I0321 14:41:43.409795  543705 cpu.go:282] Add success.
I0321 14:41:43.419876  543705 net.go:648] Add success.
I0321 14:41:43.423044  543705 net.go:770] primary dev: ETH0
I0321 14:41:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:41:43.423070  543705 net.go:698] Add success.
I0321 14:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:41:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:41:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:41:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:41:53.409796  543705 memory.go:184] no items to output this cycle
I0321 14:41:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 14:42:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:42:03.409786  543705 memory.go:184] no items to output this cycle
I0321 14:42:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 14:42:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:42:13.409775  543705 memory.go:191] Add success.
W0321 14:42:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:42:13.409805  543705 cpu.go:282] Add success.
W0321 14:42:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:42:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:42:13.420161  543705 net.go:648] Add success.
I0321 14:42:13.422747  543705 net.go:770] primary dev: ETH0
I0321 14:42:13.422763  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:42:13.422778  543705 net.go:698] Add success.
I0321 14:42:13.463626  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a82c0506-0271-4f1c-b60d-697035b2862d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:42:13.463657  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 14:42:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:42:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 14:42:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:42:14.455903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:42:14.455912  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:42:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:42:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 14:42:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:42:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:42:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:42:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:42:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:42:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:42:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:42:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:42:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:42:23.409771  543705 memory.go:184] no items to output this cycle
I0321 14:42:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 14:42:29.989674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:42:29.992175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:42:29.992181  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536140 0xc000536180]
E0321 14:42:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:42:33.409796  543705 memory.go:184] no items to output this cycle
I0321 14:42:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 14:42:38.976571  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:42:38.976577  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:42:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:42:43.410617  543705 memory.go:191] Add success.
I0321 14:42:43.409835  543705 cpu.go:282] Add success.
I0321 14:42:43.420328  543705 net.go:648] Add success.
I0321 14:42:43.423020  543705 net.go:770] primary dev: ETH0
I0321 14:42:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:42:43.423048  543705 net.go:698] Add success.
I0321 14:42:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:42:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:42:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:42:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:42:53.409771  543705 memory.go:184] no items to output this cycle
I0321 14:42:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 14:43:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:43:03.409814  543705 memory.go:184] no items to output this cycle
I0321 14:43:03.409831  543705 cpu.go:275] no items to output this cycle
E0321 14:43:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:43:13.409810  543705 memory.go:191] Add success.
I0321 14:43:13.409820  543705 cpu.go:282] Add success.
W0321 14:43:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:43:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:43:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:43:13.420070  543705 net.go:648] Add success.
I0321 14:43:13.422865  543705 net.go:770] primary dev: ETH0
I0321 14:43:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:43:13.422891  543705 net.go:698] Add success.
I0321 14:43:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:43:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:43:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 14:43:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:43:14.456993  543705 disk_worker.go:494] system disk:vda1
I0321 14:43:14.457022  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:43:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:43:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:43:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:43:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:43:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:43:23.409773  543705 memory.go:184] no items to output this cycle
I0321 14:43:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 14:43:29.993674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:43:29.996151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:43:29.996158  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1e00 0xc0002b1e40]
E0321 14:43:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:43:33.409794  543705 memory.go:184] no items to output this cycle
I0321 14:43:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 14:43:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:43:43.409779  543705 memory.go:191] Add success.
I0321 14:43:43.409804  543705 cpu.go:282] Add success.
I0321 14:43:43.419879  543705 net.go:648] Add success.
I0321 14:43:43.422534  543705 net.go:770] primary dev: ETH0
I0321 14:43:43.422547  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:43:43.422559  543705 net.go:698] Add success.
I0321 14:43:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:43:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:43:53.410302  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:43:53.410318  543705 memory.go:184] no items to output this cycle
I0321 14:43:53.410354  543705 cpu.go:275] no items to output this cycle
E0321 14:44:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:44:03.409811  543705 memory.go:184] no items to output this cycle
I0321 14:44:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 14:44:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:44:13.409783  543705 memory.go:191] Add success.
W0321 14:44:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:44:13.409819  543705 cpu.go:282] Add success.
W0321 14:44:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:44:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:44:13.420073  543705 net.go:648] Add success.
I0321 14:44:13.423048  543705 net.go:770] primary dev: ETH0
I0321 14:44:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:44:13.423077  543705 net.go:698] Add success.
I0321 14:44:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:44:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:44:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 14:44:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:44:14.456839  543705 disk_worker.go:494] system disk:vda1
I0321 14:44:14.456869  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:44:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:44:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:44:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:44:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:44:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:44:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:44:23.409779  543705 memory.go:184] no items to output this cycle
I0321 14:44:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 14:44:29.997675  543705 disk_info.go:125] begin check local disk info of client
I0321 14:44:30.000160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:44:30.000167  543705 disk_info.go:196] parse disk info done, disk is : [0xc000545240 0xc000545280]
E0321 14:44:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:44:33.409791  543705 memory.go:184] no items to output this cycle
I0321 14:44:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 14:44:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:44:43.409779  543705 memory.go:191] Add success.
I0321 14:44:43.409810  543705 cpu.go:282] Add success.
I0321 14:44:43.419895  543705 net.go:648] Add success.
I0321 14:44:43.422846  543705 net.go:770] primary dev: ETH0
I0321 14:44:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:44:43.422876  543705 net.go:698] Add success.
I0321 14:44:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:44:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:44:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:44:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:44:53.409784  543705 memory.go:184] no items to output this cycle
I0321 14:44:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 14:45:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:45:03.409782  543705 memory.go:184] no items to output this cycle
I0321 14:45:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 14:45:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:45:13.409792  543705 memory.go:191] Add success.
I0321 14:45:13.409793  543705 cpu.go:282] Add success.
W0321 14:45:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:45:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:45:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:45:13.420180  543705 net.go:648] Add success.
I0321 14:45:13.422874  543705 net.go:770] primary dev: ETH0
I0321 14:45:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:45:13.422903  543705 net.go:698] Add success.
I0321 14:45:13.468511  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1a2b09d5-f657-4859-bd61-c91db3a1f7c2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:45:13.468543  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:45:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:45:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:45:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 14:45:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:45:14.456729  543705 disk_worker.go:494] system disk:vda1
I0321 14:45:14.456762  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:45:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:45:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:45:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:45:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:45:23.409778  543705 memory.go:184] no items to output this cycle
I0321 14:45:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 14:45:30.001676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:45:30.004121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:45:30.004127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac200 0xc0002ac280]
E0321 14:45:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:45:33.409795  543705 memory.go:184] no items to output this cycle
I0321 14:45:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 14:45:38.977736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:45:38.977743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:45:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:45:43.410860  543705 memory.go:191] Add success.
I0321 14:45:43.409804  543705 cpu.go:282] Add success.
I0321 14:45:43.420551  543705 net.go:648] Add success.
I0321 14:45:43.423216  543705 net.go:770] primary dev: ETH0
I0321 14:45:43.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:45:43.423247  543705 net.go:698] Add success.
I0321 14:45:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:45:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:45:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:45:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:45:53.409784  543705 memory.go:184] no items to output this cycle
I0321 14:45:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 14:46:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:46:03.409785  543705 memory.go:184] no items to output this cycle
I0321 14:46:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 14:46:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:46:13.409779  543705 memory.go:191] Add success.
W0321 14:46:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:46:13.409810  543705 cpu.go:282] Add success.
W0321 14:46:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:46:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:46:13.420175  543705 net.go:648] Add success.
I0321 14:46:13.422932  543705 net.go:770] primary dev: ETH0
I0321 14:46:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:46:13.422961  543705 net.go:698] Add success.
I0321 14:46:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:46:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:46:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 14:46:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:46:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 14:46:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:46:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:46:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:46:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:46:16.472374  543705 disk_local_worker.go:436] Get disk info: []
I0321 14:46:23.409913  543705 cpu.go:275] no items to output this cycle
E0321 14:46:23.409980  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:46:23.409996  543705 memory.go:184] no items to output this cycle
I0321 14:46:30.005675  543705 disk_info.go:125] begin check local disk info of client
I0321 14:46:30.008282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:46:30.008288  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
E0321 14:46:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:46:33.409773  543705 memory.go:184] no items to output this cycle
I0321 14:46:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 14:46:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:46:43.409784  543705 memory.go:191] Add success.
I0321 14:46:43.409785  543705 cpu.go:282] Add success.
I0321 14:46:43.420038  543705 net.go:648] Add success.
I0321 14:46:43.422868  543705 net.go:770] primary dev: ETH0
I0321 14:46:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:46:43.422893  543705 net.go:698] Add success.
I0321 14:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:46:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:46:53.410264  543705 memory.go:184] no items to output this cycle
I0321 14:46:53.410284  543705 cpu.go:275] no items to output this cycle
E0321 14:47:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:47:03.409776  543705 memory.go:184] no items to output this cycle
I0321 14:47:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 14:47:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:47:13.409781  543705 memory.go:191] Add success.
I0321 14:47:13.409800  543705 cpu.go:282] Add success.
W0321 14:47:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:47:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:47:13.420224  543705 net.go:648] Add success.
I0321 14:47:13.422955  543705 net.go:770] primary dev: ETH0
I0321 14:47:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:47:13.422979  543705 net.go:698] Add success.
I0321 14:47:13.453559  543705 event_worker.go:152] Polling the log file for events...
W0321 14:47:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:47:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 14:47:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:47:14.455898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:47:14.455907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:47:14.455912  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:47:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 14:47:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:47:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:47:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:47:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:47:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:47:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:47:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:47:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:47:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:47:23.409904  543705 cpu.go:275] no items to output this cycle
I0321 14:47:23.409908  543705 memory.go:184] no items to output this cycle
I0321 14:47:30.009681  543705 disk_info.go:125] begin check local disk info of client
I0321 14:47:30.012149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:47:30.012156  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa0c0 0xc0002aa100]
E0321 14:47:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:47:33.409768  543705 memory.go:184] no items to output this cycle
I0321 14:47:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 14:47:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:47:43.409791  543705 memory.go:191] Add success.
I0321 14:47:43.409790  543705 cpu.go:282] Add success.
I0321 14:47:43.419893  543705 net.go:648] Add success.
I0321 14:47:43.422788  543705 net.go:770] primary dev: ETH0
I0321 14:47:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:47:43.422821  543705 net.go:698] Add success.
I0321 14:47:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:47:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:47:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:47:53.410264  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:47:53.410281  543705 memory.go:184] no items to output this cycle
I0321 14:47:53.410292  543705 cpu.go:275] no items to output this cycle
E0321 14:48:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:48:03.409808  543705 memory.go:184] no items to output this cycle
I0321 14:48:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 14:48:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:48:13.409813  543705 memory.go:191] Add success.
I0321 14:48:13.409819  543705 cpu.go:282] Add success.
W0321 14:48:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:48:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:48:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:48:13.420041  543705 net.go:648] Add success.
I0321 14:48:13.422813  543705 net.go:770] primary dev: ETH0
I0321 14:48:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:48:13.422839  543705 net.go:698] Add success.
I0321 14:48:13.468049  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"278820a1-4720-4c20-bf4d-3b3f589fdc8f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:48:13.468081  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:48:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:48:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 14:48:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:48:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 14:48:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:48:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:48:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:48:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:48:23.409804  543705 memory.go:184] no items to output this cycle
I0321 14:48:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 14:48:30.013678  543705 disk_info.go:125] begin check local disk info of client
I0321 14:48:30.016156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:48:30.016178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000564480 0xc0005644c0]
E0321 14:48:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:48:33.409783  543705 memory.go:184] no items to output this cycle
I0321 14:48:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 14:48:38.980585  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:48:38.980592  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:48:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:48:43.410692  543705 memory.go:191] Add success.
I0321 14:48:43.409804  543705 cpu.go:282] Add success.
I0321 14:48:43.420435  543705 net.go:648] Add success.
I0321 14:48:43.423155  543705 net.go:770] primary dev: ETH0
I0321 14:48:43.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:48:43.423201  543705 net.go:698] Add success.
I0321 14:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:48:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:48:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:48:53.409780  543705 memory.go:184] no items to output this cycle
I0321 14:48:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 14:49:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:49:03.409788  543705 memory.go:184] no items to output this cycle
I0321 14:49:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 14:49:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:49:13.409795  543705 cpu.go:282] Add success.
I0321 14:49:13.409803  543705 memory.go:191] Add success.
W0321 14:49:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:49:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:49:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:49:13.420214  543705 net.go:648] Add success.
I0321 14:49:13.423082  543705 net.go:770] primary dev: ETH0
I0321 14:49:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:49:13.423108  543705 net.go:698] Add success.
I0321 14:49:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:49:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:49:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 14:49:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:49:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 14:49:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:49:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:49:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:49:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:49:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:49:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:49:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:49:23.409874  543705 cpu.go:275] no items to output this cycle
I0321 14:49:23.409883  543705 memory.go:184] no items to output this cycle
I0321 14:49:30.017674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:49:30.020217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:49:30.020223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2000 0xc0002a2040]
E0321 14:49:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:49:33.409775  543705 memory.go:184] no items to output this cycle
I0321 14:49:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 14:49:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:49:43.409805  543705 memory.go:191] Add success.
I0321 14:49:43.409813  543705 cpu.go:282] Add success.
I0321 14:49:43.420049  543705 net.go:648] Add success.
I0321 14:49:43.422686  543705 net.go:770] primary dev: ETH0
I0321 14:49:43.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:49:43.422711  543705 net.go:698] Add success.
I0321 14:49:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:49:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:49:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:49:53.409773  543705 memory.go:184] no items to output this cycle
I0321 14:49:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 14:50:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:50:03.409804  543705 memory.go:184] no items to output this cycle
I0321 14:50:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 14:50:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:50:13.409785  543705 memory.go:191] Add success.
W0321 14:50:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:50:13.409814  543705 cpu.go:282] Add success.
W0321 14:50:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:50:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:50:13.420045  543705 net.go:648] Add success.
I0321 14:50:13.422622  543705 net.go:770] primary dev: ETH0
I0321 14:50:13.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:50:13.422648  543705 net.go:698] Add success.
I0321 14:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:50:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:50:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 14:50:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:50:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 14:50:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:50:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:50:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:50:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:50:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:50:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:50:23.409879  543705 memory.go:184] no items to output this cycle
I0321 14:50:23.409937  543705 cpu.go:275] no items to output this cycle
I0321 14:50:30.021680  543705 disk_info.go:125] begin check local disk info of client
I0321 14:50:30.024257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:50:30.024264  543705 disk_info.go:196] parse disk info done, disk is : [0xc000158000 0xc000158040]
E0321 14:50:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:50:33.409798  543705 memory.go:184] no items to output this cycle
I0321 14:50:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 14:50:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:50:43.409776  543705 memory.go:191] Add success.
I0321 14:50:43.409799  543705 cpu.go:282] Add success.
I0321 14:50:43.419858  543705 net.go:648] Add success.
I0321 14:50:43.422601  543705 net.go:770] primary dev: ETH0
I0321 14:50:43.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:50:43.422626  543705 net.go:698] Add success.
I0321 14:50:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:50:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:50:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:50:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:50:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 14:50:53.409785  543705 memory.go:184] no items to output this cycle
E0321 14:51:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:51:03.409808  543705 memory.go:184] no items to output this cycle
I0321 14:51:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 14:51:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:51:13.409779  543705 memory.go:191] Add success.
I0321 14:51:13.409800  543705 cpu.go:282] Add success.
W0321 14:51:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:51:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:51:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:51:13.420164  543705 net.go:648] Add success.
I0321 14:51:13.423885  543705 net.go:770] primary dev: ETH0
I0321 14:51:13.423899  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:51:13.423911  543705 net.go:698] Add success.
I0321 14:51:13.463517  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a6f0f53-75fc-48ad-8ebb-b266eaa3194f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:51:13.463551  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:51:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:51:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:51:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 14:51:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:51:14.456665  543705 disk_worker.go:494] system disk:vda1
I0321 14:51:14.456696  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:51:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:51:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:51:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:51:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:51:23.409775  543705 memory.go:184] no items to output this cycle
I0321 14:51:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 14:51:30.025677  543705 disk_info.go:125] begin check local disk info of client
I0321 14:51:30.028149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:51:30.028155  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034b2c0 0xc00034b300]
E0321 14:51:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:51:33.409792  543705 memory.go:184] no items to output this cycle
I0321 14:51:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 14:51:38.981733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:51:38.981740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:51:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:51:43.410751  543705 memory.go:191] Add success.
I0321 14:51:43.409809  543705 cpu.go:282] Add success.
I0321 14:51:43.420403  543705 net.go:648] Add success.
I0321 14:51:43.423047  543705 net.go:770] primary dev: ETH0
I0321 14:51:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:51:43.423073  543705 net.go:698] Add success.
I0321 14:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:51:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:51:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:51:53.409777  543705 memory.go:184] no items to output this cycle
I0321 14:51:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 14:52:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:52:03.409776  543705 memory.go:184] no items to output this cycle
I0321 14:52:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 14:52:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:52:13.409814  543705 memory.go:191] Add success.
I0321 14:52:13.409817  543705 cpu.go:282] Add success.
W0321 14:52:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:52:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:52:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:52:13.420109  543705 net.go:648] Add success.
I0321 14:52:13.422930  543705 net.go:770] primary dev: ETH0
I0321 14:52:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:52:13.422956  543705 net.go:698] Add success.
W0321 14:52:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:52:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 14:52:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:52:14.456932  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:52:14.456941  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:52:14.456947  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:52:14.457019  543705 disk_worker.go:494] system disk:vda1
I0321 14:52:14.457062  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:52:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:52:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:52:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:52:16.457998  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:52:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:52:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:52:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:52:23.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:52:23.409885  543705 memory.go:184] no items to output this cycle
I0321 14:52:23.409917  543705 cpu.go:275] no items to output this cycle
I0321 14:52:30.029676  543705 disk_info.go:125] begin check local disk info of client
I0321 14:52:30.032147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:52:30.032153  543705 disk_info.go:196] parse disk info done, disk is : [0xc000394000 0xc000394040]
E0321 14:52:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:52:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 14:52:33.409783  543705 memory.go:184] no items to output this cycle
E0321 14:52:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:52:43.409804  543705 memory.go:191] Add success.
I0321 14:52:43.409816  543705 cpu.go:282] Add success.
I0321 14:52:43.419902  543705 net.go:648] Add success.
I0321 14:52:43.422551  543705 net.go:770] primary dev: ETH0
I0321 14:52:43.422566  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:52:43.422580  543705 net.go:698] Add success.
I0321 14:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:52:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:52:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:52:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:52:53.409777  543705 memory.go:184] no items to output this cycle
I0321 14:52:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 14:53:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:53:03.409784  543705 memory.go:184] no items to output this cycle
I0321 14:53:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 14:53:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:53:13.409810  543705 memory.go:191] Add success.
I0321 14:53:13.409816  543705 cpu.go:282] Add success.
W0321 14:53:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:53:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:53:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:53:13.420119  543705 net.go:648] Add success.
I0321 14:53:13.422722  543705 net.go:770] primary dev: ETH0
I0321 14:53:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:53:13.422746  543705 net.go:698] Add success.
I0321 14:53:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:53:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:53:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 14:53:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:53:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 14:53:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:53:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:53:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:53:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:53:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:53:23.409781  543705 memory.go:184] no items to output this cycle
I0321 14:53:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 14:53:30.033674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:53:30.036160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:53:30.036167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6000 0xc0003e6040]
E0321 14:53:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:53:33.409767  543705 memory.go:184] no items to output this cycle
I0321 14:53:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 14:53:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:53:43.409814  543705 memory.go:191] Add success.
I0321 14:53:43.409827  543705 cpu.go:282] Add success.
I0321 14:53:43.420000  543705 net.go:648] Add success.
I0321 14:53:43.422666  543705 net.go:770] primary dev: ETH0
I0321 14:53:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:53:43.422692  543705 net.go:698] Add success.
I0321 14:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:53:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:53:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:53:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:53:53.409781  543705 memory.go:184] no items to output this cycle
I0321 14:53:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 14:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:54:03.409776  543705 memory.go:184] no items to output this cycle
I0321 14:54:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 14:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:54:13.409789  543705 memory.go:191] Add success.
I0321 14:54:13.409792  543705 cpu.go:282] Add success.
W0321 14:54:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:54:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:54:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:54:13.420441  543705 net.go:648] Add success.
I0321 14:54:13.423023  543705 net.go:770] primary dev: ETH0
I0321 14:54:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:54:13.423048  543705 net.go:698] Add success.
I0321 14:54:13.469406  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b3d131cc-8918-4df3-9177-70df0bbd6fc9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:54:13.469438  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 14:54:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:54:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:54:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 14:54:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:54:14.456697  543705 disk_worker.go:494] system disk:vda1
I0321 14:54:14.456732  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:54:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:54:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:54:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:54:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:54:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:54:23.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:54:23.409872  543705 memory.go:184] no items to output this cycle
I0321 14:54:23.409956  543705 cpu.go:275] no items to output this cycle
I0321 14:54:30.037673  543705 disk_info.go:125] begin check local disk info of client
I0321 14:54:30.040189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:54:30.040196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6180 0xc0003b61c0]
E0321 14:54:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:54:33.409777  543705 cpu.go:275] no items to output this cycle
I0321 14:54:33.409785  543705 memory.go:184] no items to output this cycle
I0321 14:54:38.981876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:54:38.981881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:54:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:54:43.410714  543705 memory.go:191] Add success.
I0321 14:54:43.409825  543705 cpu.go:282] Add success.
I0321 14:54:43.420444  543705 net.go:648] Add success.
I0321 14:54:43.423060  543705 net.go:770] primary dev: ETH0
I0321 14:54:43.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:54:43.423094  543705 net.go:698] Add success.
I0321 14:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:54:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:54:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:54:53.409797  543705 memory.go:184] no items to output this cycle
I0321 14:54:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 14:55:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:55:03.409786  543705 memory.go:184] no items to output this cycle
I0321 14:55:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 14:55:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:55:13.409817  543705 memory.go:191] Add success.
I0321 14:55:13.409825  543705 cpu.go:282] Add success.
W0321 14:55:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:55:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:55:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:55:13.420157  543705 net.go:648] Add success.
I0321 14:55:13.422946  543705 net.go:770] primary dev: ETH0
I0321 14:55:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:55:13.422970  543705 net.go:698] Add success.
I0321 14:55:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:55:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:55:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 14:55:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:55:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 14:55:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:55:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:55:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:55:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:55:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:55:23.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:55:23.409893  543705 memory.go:184] no items to output this cycle
I0321 14:55:23.409941  543705 cpu.go:275] no items to output this cycle
I0321 14:55:30.041674  543705 disk_info.go:125] begin check local disk info of client
I0321 14:55:30.044258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:55:30.044264  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
E0321 14:55:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:55:33.409776  543705 memory.go:184] no items to output this cycle
I0321 14:55:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 14:55:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:55:43.409783  543705 memory.go:191] Add success.
I0321 14:55:43.409810  543705 cpu.go:282] Add success.
I0321 14:55:43.419874  543705 net.go:648] Add success.
I0321 14:55:43.422722  543705 net.go:770] primary dev: ETH0
I0321 14:55:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:55:43.422751  543705 net.go:698] Add success.
I0321 14:55:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:55:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:55:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:55:53.409795  543705 memory.go:184] no items to output this cycle
I0321 14:55:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 14:56:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:56:03.409793  543705 memory.go:184] no items to output this cycle
I0321 14:56:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 14:56:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:56:13.409781  543705 memory.go:191] Add success.
W0321 14:56:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:56:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:56:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:56:13.409821  543705 cpu.go:282] Add success.
I0321 14:56:13.420458  543705 net.go:648] Add success.
I0321 14:56:13.423413  543705 net.go:770] primary dev: ETH0
I0321 14:56:13.423425  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:56:13.423437  543705 net.go:698] Add success.
I0321 14:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:56:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:56:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 14:56:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:56:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 14:56:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:56:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:56:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:56:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:56:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:56:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:56:23.409773  543705 memory.go:184] no items to output this cycle
I0321 14:56:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 14:56:30.045677  543705 disk_info.go:125] begin check local disk info of client
I0321 14:56:30.048245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:56:30.048251  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035fcc0 0xc00035fd00]
E0321 14:56:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:56:33.409777  543705 memory.go:184] no items to output this cycle
I0321 14:56:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 14:56:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:56:43.409783  543705 memory.go:191] Add success.
I0321 14:56:43.409825  543705 cpu.go:282] Add success.
I0321 14:56:43.419998  543705 net.go:648] Add success.
I0321 14:56:43.423071  543705 net.go:770] primary dev: ETH0
I0321 14:56:43.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:56:43.423095  543705 net.go:698] Add success.
I0321 14:56:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:56:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:56:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:56:53.410344  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:56:53.410360  543705 memory.go:184] no items to output this cycle
I0321 14:56:53.410379  543705 cpu.go:275] no items to output this cycle
E0321 14:57:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:57:03.409776  543705 memory.go:184] no items to output this cycle
I0321 14:57:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 14:57:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:57:13.409784  543705 memory.go:191] Add success.
W0321 14:57:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 14:57:13.409818  543705 cpu.go:282] Add success.
W0321 14:57:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:57:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:57:13.420276  543705 net.go:648] Add success.
I0321 14:57:13.423080  543705 net.go:770] primary dev: ETH0
I0321 14:57:13.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:57:13.423105  543705 net.go:698] Add success.
I0321 14:57:13.429093  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 14:57:13.453266  543705 event_worker.go:152] Polling the log file for events...
I0321 14:57:13.469311  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"160b0680-1989-4ddf-982e-3e72cf251831","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 14:57:13.469343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 14:57:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:57:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 14:57:14.455212  543705 disk_worker.go:728] disk inode is not compliant
E0321 14:57:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 14:57:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 14:57:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0321 14:57:14.456803  543705 disk_worker.go:494] system disk:vda1
I0321 14:57:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 14:57:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 14:57:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:57:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 14:57:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 14:57:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:57:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:57:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:57:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:57:23.409802  543705 memory.go:184] no items to output this cycle
I0321 14:57:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 14:57:30.049675  543705 disk_info.go:125] begin check local disk info of client
I0321 14:57:30.052217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:57:30.052223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bfac0 0xc0004bfb00]
E0321 14:57:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:57:33.409780  543705 memory.go:184] no items to output this cycle
I0321 14:57:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 14:57:38.984616  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 14:57:38.984623  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 14:57:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:57:43.410762  543705 memory.go:191] Add success.
I0321 14:57:43.409822  543705 cpu.go:282] Add success.
I0321 14:57:43.420477  543705 net.go:648] Add success.
I0321 14:57:43.423196  543705 net.go:770] primary dev: ETH0
I0321 14:57:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:57:43.423222  543705 net.go:698] Add success.
I0321 14:57:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:57:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:57:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:57:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:57:53.409774  543705 memory.go:184] no items to output this cycle
I0321 14:57:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 14:58:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:58:03.409783  543705 memory.go:184] no items to output this cycle
I0321 14:58:03.409841  543705 cpu.go:275] no items to output this cycle
E0321 14:58:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:58:13.409818  543705 memory.go:191] Add success.
I0321 14:58:13.409823  543705 cpu.go:282] Add success.
W0321 14:58:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:58:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:58:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:58:13.420066  543705 net.go:648] Add success.
I0321 14:58:13.423105  543705 net.go:770] primary dev: ETH0
I0321 14:58:13.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:58:13.423134  543705 net.go:698] Add success.
I0321 14:58:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:58:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:58:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 14:58:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:58:14.456520  543705 disk_worker.go:494] system disk:vda1
I0321 14:58:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:58:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:58:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:58:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:58:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:58:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:58:23.409808  543705 memory.go:184] no items to output this cycle
I0321 14:58:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 14:58:30.053677  543705 disk_info.go:125] begin check local disk info of client
I0321 14:58:30.056223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:58:30.056230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6340 0xc0003e6380]
E0321 14:58:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:58:33.409783  543705 memory.go:184] no items to output this cycle
I0321 14:58:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 14:58:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:58:43.409796  543705 memory.go:191] Add success.
I0321 14:58:43.409800  543705 cpu.go:282] Add success.
I0321 14:58:43.420036  543705 net.go:648] Add success.
I0321 14:58:43.423267  543705 net.go:770] primary dev: ETH0
I0321 14:58:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:58:43.423297  543705 net.go:698] Add success.
I0321 14:58:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:58:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:58:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:58:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:58:53.409786  543705 memory.go:184] no items to output this cycle
I0321 14:58:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 14:59:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:59:03.409779  543705 memory.go:184] no items to output this cycle
I0321 14:59:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 14:59:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:59:13.409818  543705 memory.go:191] Add success.
I0321 14:59:13.409827  543705 cpu.go:282] Add success.
W0321 14:59:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 14:59:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 14:59:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 14:59:13.420208  543705 net.go:648] Add success.
I0321 14:59:13.422876  543705 net.go:770] primary dev: ETH0
I0321 14:59:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:59:13.422902  543705 net.go:698] Add success.
I0321 14:59:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 14:59:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 14:59:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 14:59:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 14:59:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 14:59:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 14:59:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 14:59:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:59:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:59:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 14:59:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 14:59:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:59:23.409777  543705 memory.go:184] no items to output this cycle
I0321 14:59:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 14:59:30.057668  543705 disk_info.go:125] begin check local disk info of client
I0321 14:59:30.060138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 14:59:30.060145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003538c0 0xc000353900]
E0321 14:59:33.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:59:33.409898  543705 memory.go:184] no items to output this cycle
I0321 14:59:33.409955  543705 cpu.go:275] no items to output this cycle
E0321 14:59:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:59:43.409788  543705 memory.go:191] Add success.
I0321 14:59:43.409814  543705 cpu.go:282] Add success.
I0321 14:59:43.420228  543705 net.go:648] Add success.
I0321 14:59:43.422703  543705 net.go:770] primary dev: ETH0
I0321 14:59:43.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0321 14:59:43.422732  543705 net.go:698] Add success.
I0321 14:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 14:59:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 14:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 14:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 14:59:53.409780  543705 memory.go:184] no items to output this cycle
I0321 14:59:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 15:00:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:00:03.409801  543705 memory.go:184] no items to output this cycle
I0321 15:00:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 15:00:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:00:13.409822  543705 memory.go:191] Add success.
I0321 15:00:13.409822  543705 cpu.go:282] Add success.
W0321 15:00:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:00:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:00:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:00:13.420187  543705 net.go:648] Add success.
I0321 15:00:13.422860  543705 net.go:770] primary dev: ETH0
I0321 15:00:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:00:13.422885  543705 net.go:698] Add success.
I0321 15:00:13.468849  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e663516-32c2-4aef-b961-531734509a3c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:00:13.468881  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:00:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:00:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:00:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 15:00:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:00:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 15:00:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:00:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:00:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:00:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:00:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:00:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:00:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:00:23.409772  543705 memory.go:184] no items to output this cycle
I0321 15:00:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 15:00:30.061677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:00:30.064190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:00:30.064197  543705 disk_info.go:196] parse disk info done, disk is : [0xc000293a00 0xc000293a40]
E0321 15:00:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:00:33.409901  543705 memory.go:184] no items to output this cycle
I0321 15:00:33.410016  543705 cpu.go:275] no items to output this cycle
I0321 15:00:38.985750  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:00:38.985757  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:00:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:00:43.410852  543705 memory.go:191] Add success.
I0321 15:00:43.409816  543705 cpu.go:282] Add success.
I0321 15:00:43.420585  543705 net.go:648] Add success.
I0321 15:00:43.423407  543705 net.go:770] primary dev: ETH0
I0321 15:00:43.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:00:43.423434  543705 net.go:698] Add success.
I0321 15:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:00:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:00:53.409774  543705 memory.go:184] no items to output this cycle
I0321 15:00:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 15:01:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:01:03.409795  543705 memory.go:184] no items to output this cycle
I0321 15:01:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 15:01:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:01:13.409790  543705 memory.go:191] Add success.
I0321 15:01:13.409814  543705 cpu.go:282] Add success.
W0321 15:01:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:01:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:01:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:01:13.420117  543705 net.go:648] Add success.
I0321 15:01:13.422984  543705 net.go:770] primary dev: ETH0
I0321 15:01:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:01:13.423013  543705 net.go:698] Add success.
I0321 15:01:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:01:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:01:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 15:01:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:01:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 15:01:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:01:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:01:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:01:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:01:23.409782  543705 memory.go:184] no items to output this cycle
I0321 15:01:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 15:01:30.065672  543705 disk_info.go:125] begin check local disk info of client
I0321 15:01:30.068136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:01:30.068143  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382b80 0xc000382bc0]
E0321 15:01:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:01:33.409915  543705 cpu.go:275] no items to output this cycle
I0321 15:01:33.409961  543705 memory.go:184] no items to output this cycle
E0321 15:01:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:01:43.409829  543705 memory.go:191] Add success.
I0321 15:01:43.409842  543705 cpu.go:282] Add success.
I0321 15:01:43.420433  543705 net.go:648] Add success.
I0321 15:01:43.423474  543705 net.go:770] primary dev: ETH0
I0321 15:01:43.423487  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:01:43.423500  543705 net.go:698] Add success.
I0321 15:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:01:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:01:53.409775  543705 memory.go:184] no items to output this cycle
I0321 15:01:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 15:02:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:02:03.409783  543705 memory.go:184] no items to output this cycle
I0321 15:02:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 15:02:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:02:13.409789  543705 memory.go:191] Add success.
I0321 15:02:13.409790  543705 cpu.go:282] Add success.
W0321 15:02:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:02:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:02:13.420176  543705 net.go:648] Add success.
I0321 15:02:13.423372  543705 net.go:770] primary dev: ETH0
I0321 15:02:13.423387  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:02:13.423400  543705 net.go:698] Add success.
W0321 15:02:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:02:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 15:02:14.455157  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:02:14.456925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:02:14.456935  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:02:14.456941  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:02:14.457014  543705 disk_worker.go:494] system disk:vda1
I0321 15:02:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:02:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:02:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:02:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:02:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:02:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:02:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:02:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:02:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:02:23.409790  543705 memory.go:184] no items to output this cycle
I0321 15:02:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 15:02:30.069675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:02:30.072157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:02:30.072163  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380640 0xc000380680]
E0321 15:02:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:02:33.409762  543705 memory.go:184] no items to output this cycle
I0321 15:02:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 15:02:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:02:43.409796  543705 memory.go:191] Add success.
I0321 15:02:43.409814  543705 cpu.go:282] Add success.
I0321 15:02:43.419976  543705 net.go:648] Add success.
I0321 15:02:43.422991  543705 net.go:770] primary dev: ETH0
I0321 15:02:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:02:43.423022  543705 net.go:698] Add success.
I0321 15:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:02:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:02:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:02:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:02:53.409763  543705 memory.go:184] no items to output this cycle
I0321 15:02:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 15:03:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:03:03.409787  543705 memory.go:184] no items to output this cycle
I0321 15:03:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 15:03:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:03:13.409797  543705 memory.go:191] Add success.
I0321 15:03:13.409797  543705 cpu.go:282] Add success.
W0321 15:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:03:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:03:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:03:13.420074  543705 net.go:648] Add success.
I0321 15:03:13.422845  543705 net.go:770] primary dev: ETH0
I0321 15:03:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:03:13.422871  543705 net.go:698] Add success.
I0321 15:03:13.488885  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"821bfae8-ccdb-48ee-904e-afc6fd309e45","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:03:13.488918  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:03:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:03:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:03:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 15:03:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:03:14.456503  543705 disk_worker.go:494] system disk:vda1
I0321 15:03:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:03:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:03:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:03:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:03:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:03:23.409778  543705 memory.go:184] no items to output this cycle
I0321 15:03:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 15:03:30.073674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:03:30.076202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:03:30.076209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002704c0 0xc000270500]
E0321 15:03:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:03:33.409760  543705 memory.go:184] no items to output this cycle
I0321 15:03:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 15:03:38.988640  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:03:38.988646  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:03:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:03:43.410593  543705 memory.go:191] Add success.
I0321 15:03:43.409838  543705 cpu.go:282] Add success.
I0321 15:03:43.420313  543705 net.go:648] Add success.
I0321 15:03:43.423198  543705 net.go:770] primary dev: ETH0
I0321 15:03:43.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:03:43.423228  543705 net.go:698] Add success.
I0321 15:03:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:03:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:03:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:03:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:03:53.409793  543705 memory.go:184] no items to output this cycle
I0321 15:03:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 15:04:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:04:03.409783  543705 memory.go:184] no items to output this cycle
I0321 15:04:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 15:04:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:04:13.409812  543705 memory.go:191] Add success.
I0321 15:04:13.409817  543705 cpu.go:282] Add success.
W0321 15:04:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:04:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:04:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:04:13.420052  543705 net.go:648] Add success.
I0321 15:04:13.422576  543705 net.go:770] primary dev: ETH0
I0321 15:04:13.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:04:13.422601  543705 net.go:698] Add success.
I0321 15:04:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:04:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:04:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 15:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:04:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 15:04:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:04:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:04:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:04:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:04:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:04:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 15:04:23.409783  543705 memory.go:184] no items to output this cycle
I0321 15:04:30.077673  543705 disk_info.go:125] begin check local disk info of client
I0321 15:04:30.080169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:04:30.080175  543705 disk_info.go:196] parse disk info done, disk is : [0xc000159e00 0xc000159e40]
E0321 15:04:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:04:33.409791  543705 memory.go:184] no items to output this cycle
I0321 15:04:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 15:04:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:04:43.409780  543705 memory.go:191] Add success.
I0321 15:04:43.409804  543705 cpu.go:282] Add success.
I0321 15:04:43.420187  543705 net.go:648] Add success.
I0321 15:04:43.423264  543705 net.go:770] primary dev: ETH0
I0321 15:04:43.423278  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:04:43.423292  543705 net.go:698] Add success.
I0321 15:04:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:04:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:04:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:04:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:04:53.409763  543705 memory.go:184] no items to output this cycle
I0321 15:04:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 15:05:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:05:03.409818  543705 memory.go:184] no items to output this cycle
I0321 15:05:03.409831  543705 cpu.go:275] no items to output this cycle
E0321 15:05:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:05:13.409789  543705 memory.go:191] Add success.
I0321 15:05:13.409796  543705 cpu.go:282] Add success.
W0321 15:05:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:05:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:05:13.420119  543705 net.go:648] Add success.
I0321 15:05:13.422893  543705 net.go:770] primary dev: ETH0
I0321 15:05:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:05:13.422918  543705 net.go:698] Add success.
I0321 15:05:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:05:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:05:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 15:05:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:05:14.456522  543705 disk_worker.go:494] system disk:vda1
I0321 15:05:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:05:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:05:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:05:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:05:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:05:23.409775  543705 memory.go:184] no items to output this cycle
I0321 15:05:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 15:05:30.081671  543705 disk_info.go:125] begin check local disk info of client
I0321 15:05:30.084167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:05:30.084174  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470d80 0xc000470dc0]
E0321 15:05:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:05:33.409796  543705 memory.go:184] no items to output this cycle
I0321 15:05:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 15:05:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:05:43.409783  543705 memory.go:191] Add success.
I0321 15:05:43.409801  543705 cpu.go:282] Add success.
I0321 15:05:43.420019  543705 net.go:648] Add success.
I0321 15:05:43.422812  543705 net.go:770] primary dev: ETH0
I0321 15:05:43.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:05:43.422836  543705 net.go:698] Add success.
I0321 15:05:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:05:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:05:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:05:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:05:53.409797  543705 memory.go:184] no items to output this cycle
I0321 15:05:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 15:06:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:06:03.409783  543705 memory.go:184] no items to output this cycle
I0321 15:06:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 15:06:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:06:13.409810  543705 memory.go:191] Add success.
I0321 15:06:13.409821  543705 cpu.go:282] Add success.
W0321 15:06:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:06:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:06:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:06:13.420061  543705 net.go:648] Add success.
I0321 15:06:13.422883  543705 net.go:770] primary dev: ETH0
I0321 15:06:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:06:13.422907  543705 net.go:698] Add success.
I0321 15:06:13.468632  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bde72723-3d91-416f-bb21-85b0d78cd0ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:06:13.468667  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:06:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:06:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:06:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 15:06:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:06:14.456543  543705 disk_worker.go:494] system disk:vda1
I0321 15:06:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:06:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:06:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:06:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:06:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:06:23.409802  543705 memory.go:184] no items to output this cycle
I0321 15:06:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 15:06:30.085677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:06:30.088159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:06:30.088165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004724c0 0xc000472500]
E0321 15:06:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:06:33.409807  543705 memory.go:184] no items to output this cycle
I0321 15:06:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 15:06:38.989742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:06:38.989748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:06:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:06:43.410712  543705 memory.go:191] Add success.
I0321 15:06:43.409797  543705 cpu.go:282] Add success.
I0321 15:06:43.420444  543705 net.go:648] Add success.
I0321 15:06:43.423317  543705 net.go:770] primary dev: ETH0
I0321 15:06:43.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:06:43.423547  543705 net.go:698] Add success.
I0321 15:06:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:06:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:06:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:06:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:06:53.409782  543705 memory.go:184] no items to output this cycle
I0321 15:06:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 15:07:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:07:03.409780  543705 memory.go:184] no items to output this cycle
I0321 15:07:03.409803  543705 cpu.go:275] no items to output this cycle
W0321 15:07:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:07:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:07:13.409729  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:07:13.409813  543705 cpu.go:282] Add success.
E0321 15:07:13.409822  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:07:13.409844  543705 memory.go:191] Add success.
I0321 15:07:13.420048  543705 net.go:648] Add success.
I0321 15:07:13.422832  543705 net.go:770] primary dev: ETH0
I0321 15:07:13.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:07:13.422857  543705 net.go:698] Add success.
I0321 15:07:13.453423  543705 event_worker.go:152] Polling the log file for events...
W0321 15:07:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:07:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 15:07:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:07:14.455930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:07:14.455939  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:07:14.455945  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:07:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 15:07:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:07:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:07:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:07:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:07:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:07:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:07:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:07:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:07:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:07:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 15:07:23.409784  543705 memory.go:184] no items to output this cycle
I0321 15:07:30.089682  543705 disk_info.go:125] begin check local disk info of client
I0321 15:07:30.092117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:07:30.092123  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508580 0xc0005085c0]
E0321 15:07:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:07:33.409797  543705 memory.go:184] no items to output this cycle
I0321 15:07:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 15:07:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:07:43.409789  543705 memory.go:191] Add success.
I0321 15:07:43.409793  543705 cpu.go:282] Add success.
I0321 15:07:43.419899  543705 net.go:648] Add success.
I0321 15:07:43.423097  543705 net.go:770] primary dev: ETH0
I0321 15:07:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:07:43.423127  543705 net.go:698] Add success.
I0321 15:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:07:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:07:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:07:53.410253  543705 memory.go:184] no items to output this cycle
I0321 15:07:53.410253  543705 cpu.go:275] no items to output this cycle
E0321 15:08:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:08:03.409780  543705 memory.go:184] no items to output this cycle
I0321 15:08:03.409805  543705 cpu.go:275] no items to output this cycle
W0321 15:08:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:08:13.409735  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:08:13.409743  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:08:13.409823  543705 cpu.go:282] Add success.
E0321 15:08:13.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:08:13.409851  543705 memory.go:191] Add success.
I0321 15:08:13.420154  543705 net.go:648] Add success.
I0321 15:08:13.422995  543705 net.go:770] primary dev: ETH0
I0321 15:08:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:08:13.423024  543705 net.go:698] Add success.
I0321 15:08:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:08:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:08:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 15:08:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:08:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 15:08:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:08:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:08:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:08:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:08:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:08:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:08:23.409769  543705 memory.go:184] no items to output this cycle
I0321 15:08:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 15:08:30.093676  543705 disk_info.go:125] begin check local disk info of client
I0321 15:08:30.096191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:08:30.096202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e5580 0xc0003e55c0]
E0321 15:08:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:08:33.409767  543705 memory.go:184] no items to output this cycle
I0321 15:08:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 15:08:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:08:43.409785  543705 memory.go:191] Add success.
I0321 15:08:43.409808  543705 cpu.go:282] Add success.
I0321 15:08:43.419840  543705 net.go:648] Add success.
I0321 15:08:43.423082  543705 net.go:770] primary dev: ETH0
I0321 15:08:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:08:43.423107  543705 net.go:698] Add success.
I0321 15:08:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:08:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:08:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:08:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:08:53.409762  543705 memory.go:184] no items to output this cycle
I0321 15:08:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 15:09:03.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:09:03.409870  543705 memory.go:184] no items to output this cycle
I0321 15:09:03.409980  543705 cpu.go:275] no items to output this cycle
E0321 15:09:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:09:13.409786  543705 memory.go:191] Add success.
I0321 15:09:13.409803  543705 cpu.go:282] Add success.
W0321 15:09:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:09:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:09:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:09:13.420049  543705 net.go:648] Add success.
I0321 15:09:13.422722  543705 net.go:770] primary dev: ETH0
I0321 15:09:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:09:13.422783  543705 net.go:698] Add success.
I0321 15:09:13.468270  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5d174c75-c000-4e51-9b0e-5df4a67c7cfe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:09:13.468304  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:09:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:09:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:09:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 15:09:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:09:14.456498  543705 disk_worker.go:494] system disk:vda1
I0321 15:09:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:09:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:09:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:09:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:09:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:09:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:09:23.409762  543705 memory.go:184] no items to output this cycle
I0321 15:09:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 15:09:30.097675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:09:30.100148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:09:30.100154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047afc0 0xc00047b000]
E0321 15:09:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:09:33.409797  543705 memory.go:184] no items to output this cycle
I0321 15:09:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 15:09:38.992650  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:09:38.992656  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:09:43.410757  543705 memory.go:191] Add success.
I0321 15:09:43.409797  543705 cpu.go:282] Add success.
I0321 15:09:43.420458  543705 net.go:648] Add success.
I0321 15:09:43.423345  543705 net.go:770] primary dev: ETH0
I0321 15:09:43.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:09:43.423374  543705 net.go:698] Add success.
I0321 15:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:09:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:09:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:09:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:09:53.409797  543705 memory.go:184] no items to output this cycle
I0321 15:09:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 15:10:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:10:03.409795  543705 memory.go:184] no items to output this cycle
I0321 15:10:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:10:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:10:13.409794  543705 memory.go:191] Add success.
W0321 15:10:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 15:10:13.409827  543705 cpu.go:282] Add success.
W0321 15:10:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:10:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:10:13.420208  543705 net.go:648] Add success.
I0321 15:10:13.422874  543705 net.go:770] primary dev: ETH0
I0321 15:10:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:10:13.422898  543705 net.go:698] Add success.
I0321 15:10:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:10:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:10:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 15:10:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:10:14.456499  543705 disk_worker.go:494] system disk:vda1
I0321 15:10:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:10:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:10:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:10:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:10:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:10:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:10:23.409791  543705 memory.go:184] no items to output this cycle
I0321 15:10:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 15:10:30.101678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:10:30.104153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:10:30.104160  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353800 0xc000353840]
E0321 15:10:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:10:33.409795  543705 memory.go:184] no items to output this cycle
I0321 15:10:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 15:10:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:10:43.409783  543705 memory.go:191] Add success.
I0321 15:10:43.409816  543705 cpu.go:282] Add success.
I0321 15:10:43.419870  543705 net.go:648] Add success.
I0321 15:10:43.422838  543705 net.go:770] primary dev: ETH0
I0321 15:10:43.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:10:43.422868  543705 net.go:698] Add success.
I0321 15:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:10:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:10:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:10:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:10:53.409791  543705 memory.go:184] no items to output this cycle
I0321 15:10:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:11:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:11:03.409781  543705 memory.go:184] no items to output this cycle
I0321 15:11:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 15:11:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:11:13.409809  543705 memory.go:191] Add success.
I0321 15:11:13.409816  543705 cpu.go:282] Add success.
W0321 15:11:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:11:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:11:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:11:13.419753  543705 net.go:648] Add success.
I0321 15:11:13.422605  543705 net.go:770] primary dev: ETH0
I0321 15:11:13.422620  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:11:13.422633  543705 net.go:698] Add success.
I0321 15:11:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:11:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:11:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 15:11:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:11:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 15:11:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:11:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:11:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:11:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:11:23.409793  543705 memory.go:184] no items to output this cycle
I0321 15:11:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 15:11:30.105677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:11:30.108164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:11:30.108170  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b500 0xc00007b540]
E0321 15:11:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:11:33.409792  543705 memory.go:184] no items to output this cycle
I0321 15:11:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 15:11:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:11:43.409806  543705 memory.go:191] Add success.
I0321 15:11:43.409808  543705 cpu.go:282] Add success.
I0321 15:11:43.419992  543705 net.go:648] Add success.
I0321 15:11:43.422694  543705 net.go:770] primary dev: ETH0
I0321 15:11:43.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:11:43.422724  543705 net.go:698] Add success.
I0321 15:11:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:11:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:11:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:11:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:11:53.409796  543705 memory.go:184] no items to output this cycle
I0321 15:11:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 15:12:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:12:03.409779  543705 memory.go:184] no items to output this cycle
I0321 15:12:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 15:12:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:12:13.409811  543705 memory.go:191] Add success.
I0321 15:12:13.409822  543705 cpu.go:282] Add success.
W0321 15:12:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:12:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:12:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:12:13.420189  543705 net.go:648] Add success.
I0321 15:12:13.422773  543705 net.go:770] primary dev: ETH0
I0321 15:12:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:12:13.422797  543705 net.go:698] Add success.
I0321 15:12:13.463370  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"58c4048b-749c-4af2-8ca4-d0277aa5b60f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:12:13.463402  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 15:12:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:12:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 15:12:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:12:14.455906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:12:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:12:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:12:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 15:12:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:12:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:12:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:12:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:12:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:12:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:12:16.457986  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:12:16.472303  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:12:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:12:23.409769  543705 memory.go:184] no items to output this cycle
I0321 15:12:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 15:12:30.109679  543705 disk_info.go:125] begin check local disk info of client
I0321 15:12:30.112212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:12:30.112218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
E0321 15:12:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:12:33.409797  543705 memory.go:184] no items to output this cycle
I0321 15:12:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 15:12:38.993734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:12:38.993740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:12:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:12:43.410634  543705 memory.go:191] Add success.
I0321 15:12:43.409798  543705 cpu.go:282] Add success.
I0321 15:12:43.420333  543705 net.go:648] Add success.
I0321 15:12:43.423101  543705 net.go:770] primary dev: ETH0
I0321 15:12:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:12:43.423126  543705 net.go:698] Add success.
I0321 15:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:12:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:12:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:12:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:12:53.409788  543705 memory.go:184] no items to output this cycle
I0321 15:12:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 15:13:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:13:03.409778  543705 memory.go:184] no items to output this cycle
I0321 15:13:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:13:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:13:13.409783  543705 memory.go:191] Add success.
I0321 15:13:13.409805  543705 cpu.go:282] Add success.
W0321 15:13:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:13:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:13:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:13:13.420155  543705 net.go:648] Add success.
I0321 15:13:13.422739  543705 net.go:770] primary dev: ETH0
I0321 15:13:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:13:13.422766  543705 net.go:698] Add success.
I0321 15:13:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:13:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:13:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 15:13:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:13:14.456635  543705 disk_worker.go:494] system disk:vda1
I0321 15:13:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:13:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:13:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:13:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:13:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:13:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:13:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:13:23.409774  543705 memory.go:184] no items to output this cycle
I0321 15:13:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 15:13:30.113674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:13:30.116234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:13:30.116240  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048bec0 0xc00048bf00]
E0321 15:13:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:13:33.409772  543705 memory.go:184] no items to output this cycle
I0321 15:13:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 15:13:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:13:43.409807  543705 memory.go:191] Add success.
I0321 15:13:43.409817  543705 cpu.go:282] Add success.
I0321 15:13:43.419892  543705 net.go:648] Add success.
I0321 15:13:43.422381  543705 net.go:770] primary dev: ETH0
I0321 15:13:43.422400  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:13:43.422415  543705 net.go:698] Add success.
I0321 15:13:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:13:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:13:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:13:53.409785  543705 memory.go:184] no items to output this cycle
I0321 15:13:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 15:14:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:14:03.409782  543705 memory.go:184] no items to output this cycle
I0321 15:14:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 15:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:14:13.409793  543705 memory.go:191] Add success.
I0321 15:14:13.409794  543705 cpu.go:282] Add success.
W0321 15:14:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:14:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:14:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:14:13.420132  543705 net.go:648] Add success.
I0321 15:14:13.422871  543705 net.go:770] primary dev: ETH0
I0321 15:14:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:14:13.422896  543705 net.go:698] Add success.
I0321 15:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:14:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:14:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 15:14:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:14:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 15:14:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:14:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:14:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:14:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:14:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:14:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:14:23.409785  543705 memory.go:184] no items to output this cycle
I0321 15:14:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 15:14:30.117674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:14:30.120251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:14:30.120257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048af40 0xc00048af80]
E0321 15:14:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:14:33.409770  543705 memory.go:184] no items to output this cycle
I0321 15:14:33.409776  543705 cpu.go:275] no items to output this cycle
E0321 15:14:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:14:43.409808  543705 memory.go:191] Add success.
I0321 15:14:43.409819  543705 cpu.go:282] Add success.
I0321 15:14:43.420032  543705 net.go:648] Add success.
I0321 15:14:43.422869  543705 net.go:770] primary dev: ETH0
I0321 15:14:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:14:43.422894  543705 net.go:698] Add success.
I0321 15:14:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:14:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:14:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:14:53.410379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:14:53.410395  543705 memory.go:184] no items to output this cycle
I0321 15:14:53.410404  543705 cpu.go:275] no items to output this cycle
E0321 15:15:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:15:03.409792  543705 memory.go:184] no items to output this cycle
I0321 15:15:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:15:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:15:13.409790  543705 memory.go:191] Add success.
I0321 15:15:13.409791  543705 cpu.go:282] Add success.
W0321 15:15:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:15:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:15:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:15:13.420540  543705 net.go:648] Add success.
I0321 15:15:13.423233  543705 net.go:770] primary dev: ETH0
I0321 15:15:13.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:15:13.423259  543705 net.go:698] Add success.
I0321 15:15:13.469621  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"945bddc5-839c-48b2-92b5-d1ba78f8c6a9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:15:13.469670  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:15:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:15:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:15:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 15:15:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:15:14.456748  543705 disk_worker.go:494] system disk:vda1
I0321 15:15:14.456783  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:15:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:15:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:15:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:15:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:15:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:15:23.409788  543705 memory.go:184] no items to output this cycle
I0321 15:15:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 15:15:30.121678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:15:30.124162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:15:30.124168  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046cd40 0xc00046cd80]
E0321 15:15:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:15:33.409797  543705 memory.go:184] no items to output this cycle
I0321 15:15:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 15:15:38.996675  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:15:38.996682  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:15:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:15:43.410673  543705 memory.go:191] Add success.
I0321 15:15:43.409800  543705 cpu.go:282] Add success.
I0321 15:15:43.420389  543705 net.go:648] Add success.
I0321 15:15:43.423437  543705 net.go:770] primary dev: ETH0
I0321 15:15:43.423451  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:15:43.423463  543705 net.go:698] Add success.
I0321 15:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:15:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:15:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:15:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:15:53.409797  543705 memory.go:184] no items to output this cycle
I0321 15:15:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 15:16:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:16:03.409810  543705 memory.go:184] no items to output this cycle
I0321 15:16:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 15:16:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:16:13.409794  543705 memory.go:191] Add success.
I0321 15:16:13.409797  543705 cpu.go:282] Add success.
W0321 15:16:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:16:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:16:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:16:13.420057  543705 net.go:648] Add success.
I0321 15:16:13.422963  543705 net.go:770] primary dev: ETH0
I0321 15:16:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:16:13.422991  543705 net.go:698] Add success.
I0321 15:16:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:16:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:16:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 15:16:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:16:14.456542  543705 disk_worker.go:494] system disk:vda1
I0321 15:16:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:16:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:16:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:16:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:16:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:16:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:16:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:16:23.409775  543705 memory.go:184] no items to output this cycle
I0321 15:16:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 15:16:30.125678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:16:30.128173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:16:30.128180  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034b3c0 0xc00034b400]
E0321 15:16:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:16:33.409789  543705 memory.go:184] no items to output this cycle
I0321 15:16:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:16:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:16:43.409780  543705 memory.go:191] Add success.
I0321 15:16:43.409800  543705 cpu.go:282] Add success.
I0321 15:16:43.419985  543705 net.go:648] Add success.
I0321 15:16:43.422872  543705 net.go:770] primary dev: ETH0
I0321 15:16:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:16:43.422899  543705 net.go:698] Add success.
I0321 15:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:16:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:16:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:16:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:16:53.409790  543705 memory.go:184] no items to output this cycle
I0321 15:16:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:17:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:17:03.409791  543705 memory.go:184] no items to output this cycle
I0321 15:17:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 15:17:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:17:13.409789  543705 cpu.go:282] Add success.
I0321 15:17:13.409797  543705 memory.go:191] Add success.
W0321 15:17:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:17:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:17:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:17:13.420567  543705 net.go:648] Add success.
I0321 15:17:13.423168  543705 net.go:770] primary dev: ETH0
I0321 15:17:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:17:13.423196  543705 net.go:698] Add success.
I0321 15:17:13.452790  543705 event_worker.go:152] Polling the log file for events...
W0321 15:17:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:17:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 15:17:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:17:14.456113  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:17:14.456123  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:17:14.456129  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:17:14.456458  543705 disk_worker.go:494] system disk:vda1
I0321 15:17:14.456488  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:17:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:17:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:17:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:17:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:17:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:17:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:17:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:17:23.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:17:23.409866  543705 memory.go:184] no items to output this cycle
I0321 15:17:23.409923  543705 cpu.go:275] no items to output this cycle
I0321 15:17:30.129677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:17:30.132182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:17:30.132188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8040 0xc0004a8080]
E0321 15:17:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:17:33.409773  543705 cpu.go:275] no items to output this cycle
I0321 15:17:33.409780  543705 memory.go:184] no items to output this cycle
E0321 15:17:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:17:43.409819  543705 memory.go:191] Add success.
I0321 15:17:43.409820  543705 cpu.go:282] Add success.
I0321 15:17:43.419968  543705 net.go:648] Add success.
I0321 15:17:43.422404  543705 net.go:770] primary dev: ETH0
I0321 15:17:43.422416  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:17:43.422429  543705 net.go:698] Add success.
I0321 15:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:17:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:17:53.409768  543705 memory.go:184] no items to output this cycle
I0321 15:17:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 15:18:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:18:03.409810  543705 memory.go:184] no items to output this cycle
I0321 15:18:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 15:18:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:18:13.409777  543705 memory.go:191] Add success.
W0321 15:18:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 15:18:13.409804  543705 cpu.go:282] Add success.
W0321 15:18:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:18:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:18:13.420062  543705 net.go:648] Add success.
I0321 15:18:13.422956  543705 net.go:770] primary dev: ETH0
I0321 15:18:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:18:13.422986  543705 net.go:698] Add success.
I0321 15:18:13.500102  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"72f1f245-5757-4fda-91ec-eeea5646cb23","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:18:13.500135  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:18:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:18:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:18:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 15:18:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:18:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 15:18:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:18:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:18:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:18:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:18:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:18:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:18:23.409781  543705 memory.go:184] no items to output this cycle
I0321 15:18:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 15:18:30.133675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:18:30.136189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:18:30.136197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003773c0 0xc000377400]
E0321 15:18:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:18:33.409802  543705 memory.go:184] no items to output this cycle
I0321 15:18:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 15:18:38.997732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:18:38.997738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:18:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:18:43.410773  543705 memory.go:191] Add success.
I0321 15:18:43.409819  543705 cpu.go:282] Add success.
I0321 15:18:43.420465  543705 net.go:648] Add success.
I0321 15:18:43.423503  543705 net.go:770] primary dev: ETH0
I0321 15:18:43.423516  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:18:43.423528  543705 net.go:698] Add success.
I0321 15:18:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:18:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:18:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:18:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:18:53.409795  543705 memory.go:184] no items to output this cycle
I0321 15:18:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 15:19:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:19:03.409814  543705 memory.go:184] no items to output this cycle
I0321 15:19:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 15:19:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:19:13.409790  543705 memory.go:191] Add success.
I0321 15:19:13.409806  543705 cpu.go:282] Add success.
W0321 15:19:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:19:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:19:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:19:13.420045  543705 net.go:648] Add success.
I0321 15:19:13.422964  543705 net.go:770] primary dev: ETH0
I0321 15:19:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:19:13.422988  543705 net.go:698] Add success.
I0321 15:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:19:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:19:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 15:19:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:19:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 15:19:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:19:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:19:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:19:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:19:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:19:23.410195  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:19:23.410212  543705 memory.go:184] no items to output this cycle
I0321 15:19:23.410226  543705 cpu.go:275] no items to output this cycle
I0321 15:19:30.137669  543705 disk_info.go:125] begin check local disk info of client
I0321 15:19:30.140179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:19:30.140186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370c40 0xc000370c80]
E0321 15:19:33.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:19:33.409902  543705 memory.go:184] no items to output this cycle
I0321 15:19:33.409993  543705 cpu.go:275] no items to output this cycle
E0321 15:19:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:19:43.409791  543705 memory.go:191] Add success.
I0321 15:19:43.409804  543705 cpu.go:282] Add success.
I0321 15:19:43.419897  543705 net.go:648] Add success.
I0321 15:19:43.422553  543705 net.go:770] primary dev: ETH0
I0321 15:19:43.422567  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:19:43.422578  543705 net.go:698] Add success.
I0321 15:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:19:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:19:53.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:19:53.410253  543705 cpu.go:275] no items to output this cycle
I0321 15:19:53.410255  543705 memory.go:184] no items to output this cycle
E0321 15:20:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:20:03.409815  543705 memory.go:184] no items to output this cycle
I0321 15:20:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 15:20:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:20:13.409780  543705 memory.go:191] Add success.
I0321 15:20:13.409799  543705 cpu.go:282] Add success.
W0321 15:20:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:20:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:20:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:20:13.420120  543705 net.go:648] Add success.
I0321 15:20:13.423046  543705 net.go:770] primary dev: ETH0
I0321 15:20:13.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:20:13.423073  543705 net.go:698] Add success.
I0321 15:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:20:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:20:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 15:20:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:20:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 15:20:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:20:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:20:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:20:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:20:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:20:23.409805  543705 memory.go:184] no items to output this cycle
I0321 15:20:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 15:20:30.141678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:20:30.144207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:20:30.144214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e77c0 0xc0001e7800]
E0321 15:20:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:20:33.409798  543705 memory.go:184] no items to output this cycle
I0321 15:20:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 15:20:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:20:43.409787  543705 memory.go:191] Add success.
I0321 15:20:43.409804  543705 cpu.go:282] Add success.
I0321 15:20:43.419871  543705 net.go:648] Add success.
I0321 15:20:43.422845  543705 net.go:770] primary dev: ETH0
I0321 15:20:43.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:20:43.422869  543705 net.go:698] Add success.
I0321 15:20:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:20:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:20:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:20:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:20:53.409792  543705 memory.go:184] no items to output this cycle
I0321 15:20:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 15:21:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:21:03.409785  543705 memory.go:184] no items to output this cycle
I0321 15:21:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 15:21:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:21:13.409813  543705 memory.go:191] Add success.
I0321 15:21:13.409820  543705 cpu.go:282] Add success.
W0321 15:21:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:21:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:21:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:21:13.420190  543705 net.go:648] Add success.
I0321 15:21:13.422769  543705 net.go:770] primary dev: ETH0
I0321 15:21:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:21:13.422793  543705 net.go:698] Add success.
I0321 15:21:13.468071  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aca72cb3-d4ec-47cb-9e7b-a5d6e3dd6036","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:21:13.468114  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:21:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:21:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:21:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 15:21:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:21:14.456697  543705 disk_worker.go:494] system disk:vda1
I0321 15:21:14.456741  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:21:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:21:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:21:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:21:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:21:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:21:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:21:23.409811  543705 memory.go:184] no items to output this cycle
I0321 15:21:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 15:21:30.145680  543705 disk_info.go:125] begin check local disk info of client
I0321 15:21:30.148212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:21:30.148219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e7240 0xc0001e7280]
E0321 15:21:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:21:33.409785  543705 memory.go:184] no items to output this cycle
I0321 15:21:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 15:21:38.997883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:21:38.997890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:21:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:21:43.410749  543705 memory.go:191] Add success.
I0321 15:21:43.409809  543705 cpu.go:282] Add success.
I0321 15:21:43.420447  543705 net.go:648] Add success.
I0321 15:21:43.423496  543705 net.go:770] primary dev: ETH0
I0321 15:21:43.423509  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:21:43.423522  543705 net.go:698] Add success.
I0321 15:21:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:21:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:21:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:21:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:21:53.409809  543705 memory.go:184] no items to output this cycle
I0321 15:21:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 15:22:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:22:03.409790  543705 memory.go:184] no items to output this cycle
I0321 15:22:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 15:22:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:22:13.409819  543705 memory.go:191] Add success.
I0321 15:22:13.409828  543705 cpu.go:282] Add success.
W0321 15:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:22:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:22:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:22:13.420266  543705 net.go:648] Add success.
I0321 15:22:13.423364  543705 net.go:770] primary dev: ETH0
I0321 15:22:13.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:22:13.423390  543705 net.go:698] Add success.
W0321 15:22:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:22:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 15:22:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:22:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:22:14.456951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:22:14.456957  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:22:14.457027  543705 disk_worker.go:494] system disk:vda1
I0321 15:22:14.457058  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:22:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:22:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:22:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:22:16.458035  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:22:16.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:22:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:22:16.472561  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:22:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:22:23.409809  543705 memory.go:184] no items to output this cycle
I0321 15:22:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 15:22:30.149676  543705 disk_info.go:125] begin check local disk info of client
I0321 15:22:30.152216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:22:30.152223  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469280 0xc0004692c0]
E0321 15:22:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:22:33.409906  543705 memory.go:184] no items to output this cycle
I0321 15:22:33.409889  543705 cpu.go:275] no items to output this cycle
E0321 15:22:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:22:43.409800  543705 memory.go:191] Add success.
I0321 15:22:43.409799  543705 cpu.go:282] Add success.
I0321 15:22:43.420108  543705 net.go:648] Add success.
I0321 15:22:43.422895  543705 net.go:770] primary dev: ETH0
I0321 15:22:43.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:22:43.422919  543705 net.go:698] Add success.
I0321 15:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:22:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:22:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:22:53.410376  543705 memory.go:184] no items to output this cycle
I0321 15:22:53.410404  543705 cpu.go:275] no items to output this cycle
E0321 15:23:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:23:03.409794  543705 memory.go:184] no items to output this cycle
I0321 15:23:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 15:23:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:23:13.409796  543705 memory.go:191] Add success.
W0321 15:23:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:23:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:23:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:23:13.409857  543705 cpu.go:282] Add success.
I0321 15:23:13.420341  543705 net.go:648] Add success.
I0321 15:23:13.422933  543705 net.go:770] primary dev: ETH0
I0321 15:23:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:23:13.422961  543705 net.go:698] Add success.
I0321 15:23:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:23:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:23:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 15:23:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:23:14.456618  543705 disk_worker.go:494] system disk:vda1
I0321 15:23:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:23:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:23:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:23:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:23:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:23:16.472533  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:23:23.410553  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:23:23.410571  543705 memory.go:184] no items to output this cycle
I0321 15:23:23.410589  543705 cpu.go:275] no items to output this cycle
I0321 15:23:30.153676  543705 disk_info.go:125] begin check local disk info of client
I0321 15:23:30.156254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:23:30.156261  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492d80 0xc000492dc0]
E0321 15:23:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:23:33.409777  543705 memory.go:184] no items to output this cycle
I0321 15:23:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 15:23:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:23:43.409798  543705 memory.go:191] Add success.
I0321 15:23:43.409810  543705 cpu.go:282] Add success.
I0321 15:23:43.420003  543705 net.go:648] Add success.
I0321 15:23:43.422969  543705 net.go:770] primary dev: ETH0
I0321 15:23:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:23:43.422999  543705 net.go:698] Add success.
I0321 15:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:23:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:23:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:23:53.409795  543705 memory.go:184] no items to output this cycle
I0321 15:23:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 15:24:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:24:03.409827  543705 cpu.go:275] no items to output this cycle
I0321 15:24:03.409831  543705 memory.go:184] no items to output this cycle
E0321 15:24:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:24:13.409789  543705 memory.go:191] Add success.
W0321 15:24:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 15:24:13.409824  543705 cpu.go:282] Add success.
W0321 15:24:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:24:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:24:13.420079  543705 net.go:648] Add success.
I0321 15:24:13.423388  543705 net.go:770] primary dev: ETH0
I0321 15:24:13.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:24:13.423417  543705 net.go:698] Add success.
I0321 15:24:13.467747  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ac007f6-ab9f-496d-8f3c-d04e70042f3c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:24:13.467782  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:24:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:24:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:24:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 15:24:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:24:14.456698  543705 disk_worker.go:494] system disk:vda1
I0321 15:24:14.456738  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:24:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:24:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:24:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:24:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:24:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:24:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:24:23.409766  543705 memory.go:184] no items to output this cycle
I0321 15:24:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 15:24:30.157677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:24:30.160241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:24:30.160248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0300 0xc0002a0340]
E0321 15:24:33.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:24:33.409866  543705 memory.go:184] no items to output this cycle
I0321 15:24:33.409871  543705 cpu.go:275] no items to output this cycle
I0321 15:24:39.000695  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:24:39.000702  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:24:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:24:43.410706  543705 memory.go:191] Add success.
I0321 15:24:43.409788  543705 cpu.go:282] Add success.
I0321 15:24:43.420459  543705 net.go:648] Add success.
I0321 15:24:43.423049  543705 net.go:770] primary dev: ETH0
I0321 15:24:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:24:43.423077  543705 net.go:698] Add success.
I0321 15:24:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:24:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:24:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:24:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:24:53.409761  543705 memory.go:184] no items to output this cycle
I0321 15:24:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 15:25:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:25:03.409802  543705 memory.go:184] no items to output this cycle
I0321 15:25:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 15:25:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:25:13.409828  543705 memory.go:191] Add success.
I0321 15:25:13.409833  543705 cpu.go:282] Add success.
W0321 15:25:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:25:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:25:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:25:13.420155  543705 net.go:770] primary dev: ETH0
I0321 15:25:13.420171  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:25:13.420185  543705 net.go:698] Add success.
I0321 15:25:13.420556  543705 net.go:648] Add success.
I0321 15:25:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:25:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:25:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 15:25:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:25:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 15:25:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:25:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:25:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:25:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:25:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:25:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:25:23.409798  543705 memory.go:184] no items to output this cycle
I0321 15:25:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 15:25:30.161667  543705 disk_info.go:125] begin check local disk info of client
I0321 15:25:30.164156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:25:30.164162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e5e00 0xc0003e5e40]
E0321 15:25:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:25:33.409876  543705 cpu.go:275] no items to output this cycle
I0321 15:25:33.409890  543705 memory.go:184] no items to output this cycle
E0321 15:25:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:25:43.409786  543705 memory.go:191] Add success.
I0321 15:25:43.409790  543705 cpu.go:282] Add success.
I0321 15:25:43.419908  543705 net.go:648] Add success.
I0321 15:25:43.422489  543705 net.go:770] primary dev: ETH0
I0321 15:25:43.422502  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:25:43.422513  543705 net.go:698] Add success.
I0321 15:25:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:25:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:25:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:25:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:25:53.409791  543705 memory.go:184] no items to output this cycle
I0321 15:25:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:26:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:26:03.409791  543705 memory.go:184] no items to output this cycle
I0321 15:26:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:26:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:26:13.409787  543705 memory.go:191] Add success.
I0321 15:26:13.409791  543705 cpu.go:282] Add success.
W0321 15:26:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:26:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:26:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:26:13.420074  543705 net.go:648] Add success.
I0321 15:26:13.422685  543705 net.go:770] primary dev: ETH0
I0321 15:26:13.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:26:13.422714  543705 net.go:698] Add success.
I0321 15:26:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:26:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:26:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 15:26:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:26:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 15:26:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:26:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:26:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:26:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:26:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:26:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:26:23.409789  543705 memory.go:184] no items to output this cycle
I0321 15:26:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 15:26:30.165675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:26:30.168129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:26:30.168135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5dc0 0xc0002a5e00]
E0321 15:26:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:26:33.409787  543705 memory.go:184] no items to output this cycle
I0321 15:26:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 15:26:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:26:43.409790  543705 memory.go:191] Add success.
I0321 15:26:43.409795  543705 cpu.go:282] Add success.
I0321 15:26:43.419986  543705 net.go:648] Add success.
I0321 15:26:43.422571  543705 net.go:770] primary dev: ETH0
I0321 15:26:43.422583  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:26:43.422594  543705 net.go:698] Add success.
I0321 15:26:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:26:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:26:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:26:53.409767  543705 memory.go:184] no items to output this cycle
I0321 15:26:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 15:27:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:27:03.409788  543705 memory.go:184] no items to output this cycle
I0321 15:27:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 15:27:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:27:13.409815  543705 memory.go:191] Add success.
I0321 15:27:13.409823  543705 cpu.go:282] Add success.
W0321 15:27:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:27:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:27:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:27:13.420137  543705 net.go:648] Add success.
I0321 15:27:13.428604  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 15:27:13.428681  543705 net.go:770] primary dev: ETH0
I0321 15:27:13.428694  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:27:13.428705  543705 net.go:698] Add success.
I0321 15:27:13.453277  543705 event_worker.go:152] Polling the log file for events...
I0321 15:27:13.554994  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"163f94bb-da67-45dc-91a1-3079073449b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:27:13.555030  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 15:27:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:27:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 15:27:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:27:14.456156  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:27:14.456165  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:27:14.456170  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:27:14.456473  543705 disk_worker.go:494] system disk:vda1
I0321 15:27:14.456517  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:27:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:27:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:27:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:27:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:27:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:27:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:27:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:27:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:27:23.409764  543705 memory.go:184] no items to output this cycle
I0321 15:27:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 15:27:30.169673  543705 disk_info.go:125] begin check local disk info of client
I0321 15:27:30.172124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:27:30.172133  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482000 0xc000482040]
E0321 15:27:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:27:33.409759  543705 memory.go:184] no items to output this cycle
I0321 15:27:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 15:27:39.001741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:27:39.001747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:27:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:27:43.410637  543705 memory.go:191] Add success.
I0321 15:27:43.409790  543705 cpu.go:282] Add success.
I0321 15:27:43.420344  543705 net.go:648] Add success.
I0321 15:27:43.423056  543705 net.go:770] primary dev: ETH0
I0321 15:27:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:27:43.423085  543705 net.go:698] Add success.
I0321 15:27:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:27:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:27:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:27:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:27:53.409763  543705 memory.go:184] no items to output this cycle
I0321 15:27:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 15:28:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:28:03.409817  543705 memory.go:184] no items to output this cycle
I0321 15:28:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 15:28:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:28:13.409782  543705 memory.go:191] Add success.
W0321 15:28:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:28:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:28:13.409821  543705 cpu.go:282] Add success.
I0321 15:28:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:28:13.420564  543705 net.go:648] Add success.
I0321 15:28:13.423470  543705 net.go:770] primary dev: ETH0
I0321 15:28:13.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:28:13.423496  543705 net.go:698] Add success.
I0321 15:28:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:28:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:28:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 15:28:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:28:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 15:28:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:28:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:28:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:28:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:28:23.409785  543705 memory.go:184] no items to output this cycle
I0321 15:28:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 15:28:30.173673  543705 disk_info.go:125] begin check local disk info of client
I0321 15:28:30.176158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:28:30.176165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5840 0xc0000c5880]
E0321 15:28:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:28:33.409786  543705 memory.go:184] no items to output this cycle
I0321 15:28:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 15:28:43.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:28:43.409894  543705 memory.go:191] Add success.
I0321 15:28:43.409968  543705 cpu.go:282] Add success.
I0321 15:28:43.419707  543705 net.go:648] Add success.
I0321 15:28:43.422439  543705 net.go:770] primary dev: ETH0
I0321 15:28:43.422452  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:28:43.422463  543705 net.go:698] Add success.
I0321 15:28:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:28:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:28:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:28:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:28:53.409792  543705 memory.go:184] no items to output this cycle
I0321 15:28:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 15:29:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:29:03.409800  543705 cpu.go:275] no items to output this cycle
I0321 15:29:03.409806  543705 memory.go:184] no items to output this cycle
E0321 15:29:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:29:13.409796  543705 memory.go:191] Add success.
I0321 15:29:13.409800  543705 cpu.go:282] Add success.
W0321 15:29:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:29:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:29:13.420072  543705 net.go:648] Add success.
I0321 15:29:13.422775  543705 net.go:770] primary dev: ETH0
I0321 15:29:13.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:29:13.422805  543705 net.go:698] Add success.
I0321 15:29:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:29:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:29:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 15:29:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:29:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 15:29:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:29:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:29:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:29:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:29:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:29:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:29:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 15:29:23.409782  543705 memory.go:184] no items to output this cycle
I0321 15:29:30.177678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:29:30.180169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:29:30.180175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002616c0 0xc000261700]
E0321 15:29:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:29:33.409791  543705 memory.go:184] no items to output this cycle
I0321 15:29:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 15:29:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:29:43.409883  543705 memory.go:191] Add success.
I0321 15:29:43.409905  543705 cpu.go:282] Add success.
I0321 15:29:43.419748  543705 net.go:648] Add success.
I0321 15:29:43.422293  543705 net.go:770] primary dev: ETH0
I0321 15:29:43.422306  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:29:43.422318  543705 net.go:698] Add success.
I0321 15:29:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:29:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:29:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:29:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:29:53.409797  543705 memory.go:184] no items to output this cycle
I0321 15:29:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 15:30:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:30:03.409813  543705 memory.go:184] no items to output this cycle
I0321 15:30:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 15:30:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:30:13.409810  543705 memory.go:191] Add success.
I0321 15:30:13.409815  543705 cpu.go:282] Add success.
W0321 15:30:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:30:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:30:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:30:13.420166  543705 net.go:648] Add success.
I0321 15:30:13.422842  543705 net.go:770] primary dev: ETH0
I0321 15:30:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:30:13.422871  543705 net.go:698] Add success.
I0321 15:30:13.470110  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e0b487ba-70eb-4fcb-bd0f-cb16ebfdd204","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:30:13.470143  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:30:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:30:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:30:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 15:30:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:30:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 15:30:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:30:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:30:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:30:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:30:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:30:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:30:23.409797  543705 memory.go:184] no items to output this cycle
I0321 15:30:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 15:30:30.181674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:30:30.184215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:30:30.184220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4200 0xc0002a4240]
E0321 15:30:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:30:33.409778  543705 memory.go:184] no items to output this cycle
I0321 15:30:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 15:30:39.001893  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:30:39.001900  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:30:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:30:43.410620  543705 memory.go:191] Add success.
I0321 15:30:43.409788  543705 cpu.go:282] Add success.
I0321 15:30:43.419724  543705 net.go:648] Add success.
I0321 15:30:43.422202  543705 net.go:770] primary dev: ETH0
I0321 15:30:43.422215  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:30:43.422227  543705 net.go:698] Add success.
I0321 15:30:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:30:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:30:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:30:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:30:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 15:30:53.409785  543705 memory.go:184] no items to output this cycle
E0321 15:31:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:31:03.409813  543705 memory.go:184] no items to output this cycle
I0321 15:31:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 15:31:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:31:13.409794  543705 memory.go:191] Add success.
I0321 15:31:13.409803  543705 cpu.go:282] Add success.
W0321 15:31:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:31:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:31:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:31:13.420187  543705 net.go:648] Add success.
I0321 15:31:13.422839  543705 net.go:770] primary dev: ETH0
I0321 15:31:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:31:13.422864  543705 net.go:698] Add success.
I0321 15:31:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:31:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:31:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 15:31:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:31:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 15:31:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:31:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:31:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:31:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:31:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:31:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:31:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:31:23.409790  543705 memory.go:184] no items to output this cycle
I0321 15:31:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 15:31:30.185674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:31:30.188125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:31:30.188133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4bc0 0xc0002a4c00]
E0321 15:31:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:31:33.409795  543705 memory.go:184] no items to output this cycle
I0321 15:31:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 15:31:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:31:43.409813  543705 memory.go:191] Add success.
I0321 15:31:43.409821  543705 cpu.go:282] Add success.
I0321 15:31:43.419893  543705 net.go:648] Add success.
I0321 15:31:43.423026  543705 net.go:770] primary dev: ETH0
I0321 15:31:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:31:43.423053  543705 net.go:698] Add success.
I0321 15:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:31:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:31:46.458153  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:31:53.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:31:53.410270  543705 memory.go:184] no items to output this cycle
I0321 15:31:53.410270  543705 cpu.go:275] no items to output this cycle
E0321 15:32:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:32:03.409786  543705 memory.go:184] no items to output this cycle
I0321 15:32:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 15:32:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:32:13.409802  543705 memory.go:191] Add success.
I0321 15:32:13.409804  543705 cpu.go:282] Add success.
W0321 15:32:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:32:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:32:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:32:13.420059  543705 net.go:648] Add success.
I0321 15:32:13.422755  543705 net.go:770] primary dev: ETH0
I0321 15:32:13.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:32:13.422780  543705 net.go:698] Add success.
W0321 15:32:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:32:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0321 15:32:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:32:14.456879  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:32:14.456889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:32:14.456895  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:32:14.456966  543705 disk_worker.go:494] system disk:vda1
I0321 15:32:14.457009  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:32:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:32:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:32:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:32:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:32:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:32:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:32:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:32:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:32:23.409775  543705 memory.go:184] no items to output this cycle
I0321 15:32:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 15:32:30.189675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:32:30.192136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:32:30.192142  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260bc0 0xc000260c00]
E0321 15:32:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:32:33.409794  543705 memory.go:184] no items to output this cycle
I0321 15:32:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:32:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:32:43.409781  543705 memory.go:191] Add success.
I0321 15:32:43.409800  543705 cpu.go:282] Add success.
I0321 15:32:43.419874  543705 net.go:648] Add success.
I0321 15:32:43.422589  543705 net.go:770] primary dev: ETH0
I0321 15:32:43.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:32:43.422615  543705 net.go:698] Add success.
I0321 15:32:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:32:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:32:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:32:53.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:32:53.409899  543705 memory.go:184] no items to output this cycle
I0321 15:32:53.410061  543705 cpu.go:275] no items to output this cycle
I0321 15:33:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 15:33:03.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:33:03.409845  543705 memory.go:184] no items to output this cycle
E0321 15:33:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:33:13.409807  543705 memory.go:191] Add success.
I0321 15:33:13.409808  543705 cpu.go:282] Add success.
W0321 15:33:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:33:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:33:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:33:13.420118  543705 net.go:648] Add success.
I0321 15:33:13.422626  543705 net.go:770] primary dev: ETH0
I0321 15:33:13.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:33:13.422651  543705 net.go:698] Add success.
I0321 15:33:13.463462  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f54054b7-f08d-465d-932c-b71a1830e25f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:33:13.463508  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:33:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:33:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 15:33:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:33:14.456734  543705 disk_worker.go:494] system disk:vda1
I0321 15:33:14.456763  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:33:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:33:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:33:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:33:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:33:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:33:23.409778  543705 memory.go:184] no items to output this cycle
I0321 15:33:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 15:33:30.193677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:33:30.196146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:33:30.196153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf00 0xc0001abf40]
E0321 15:33:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:33:33.409788  543705 cpu.go:275] no items to output this cycle
I0321 15:33:33.409793  543705 memory.go:184] no items to output this cycle
I0321 15:33:39.004715  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:33:39.004722  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:33:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:33:43.410661  543705 memory.go:191] Add success.
I0321 15:33:43.409832  543705 cpu.go:282] Add success.
I0321 15:33:43.420609  543705 net.go:648] Add success.
I0321 15:33:43.423617  543705 net.go:770] primary dev: ETH0
I0321 15:33:43.423633  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:33:43.423648  543705 net.go:698] Add success.
I0321 15:33:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:33:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:33:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:33:53.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:33:53.409900  543705 cpu.go:275] no items to output this cycle
I0321 15:33:53.409909  543705 memory.go:184] no items to output this cycle
E0321 15:34:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:34:03.409833  543705 memory.go:184] no items to output this cycle
I0321 15:34:03.409877  543705 cpu.go:275] no items to output this cycle
E0321 15:34:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:34:13.409834  543705 memory.go:191] Add success.
I0321 15:34:13.409837  543705 cpu.go:282] Add success.
W0321 15:34:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:34:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:34:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:34:13.420234  543705 net.go:648] Add success.
I0321 15:34:13.422900  543705 net.go:770] primary dev: ETH0
I0321 15:34:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:34:13.422925  543705 net.go:698] Add success.
I0321 15:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:34:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:34:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 15:34:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:34:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 15:34:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:34:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:34:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:34:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:34:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:34:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:34:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:34:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 15:34:23.409800  543705 memory.go:184] no items to output this cycle
I0321 15:34:30.197675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:34:30.200216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:34:30.200222  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352d80 0xc000352dc0]
E0321 15:34:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:34:33.409773  543705 memory.go:184] no items to output this cycle
I0321 15:34:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 15:34:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:34:43.409821  543705 memory.go:191] Add success.
I0321 15:34:43.409822  543705 cpu.go:282] Add success.
I0321 15:34:43.420007  543705 net.go:648] Add success.
I0321 15:34:43.422915  543705 net.go:770] primary dev: ETH0
I0321 15:34:43.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:34:43.422939  543705 net.go:698] Add success.
I0321 15:34:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:34:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:34:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:34:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:34:53.409798  543705 memory.go:184] no items to output this cycle
I0321 15:34:53.409822  543705 cpu.go:275] no items to output this cycle
E0321 15:35:03.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:35:03.409867  543705 memory.go:184] no items to output this cycle
I0321 15:35:03.410045  543705 cpu.go:275] no items to output this cycle
E0321 15:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:35:13.409793  543705 memory.go:191] Add success.
I0321 15:35:13.409808  543705 cpu.go:282] Add success.
W0321 15:35:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:35:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:35:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:35:13.420149  543705 net.go:648] Add success.
I0321 15:35:13.422685  543705 net.go:770] primary dev: ETH0
I0321 15:35:13.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:35:13.422714  543705 net.go:698] Add success.
I0321 15:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:35:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:35:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 15:35:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:35:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 15:35:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:35:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:35:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:35:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:35:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 15:35:23.409784  543705 memory.go:184] no items to output this cycle
I0321 15:35:30.201678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:35:30.204201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:35:30.204207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5bc0 0xc0000c5c00]
E0321 15:35:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:35:33.409798  543705 memory.go:184] no items to output this cycle
I0321 15:35:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 15:35:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:35:43.409784  543705 memory.go:191] Add success.
I0321 15:35:43.409793  543705 cpu.go:282] Add success.
I0321 15:35:43.419898  543705 net.go:648] Add success.
I0321 15:35:43.422305  543705 net.go:770] primary dev: ETH0
I0321 15:35:43.422318  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:35:43.422330  543705 net.go:698] Add success.
I0321 15:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:35:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:35:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:35:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:35:53.409803  543705 memory.go:184] no items to output this cycle
I0321 15:35:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 15:36:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:36:03.409802  543705 memory.go:184] no items to output this cycle
I0321 15:36:03.409852  543705 cpu.go:275] no items to output this cycle
E0321 15:36:13.409934  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:36:13.409943  543705 cpu.go:282] Add success.
I0321 15:36:13.409968  543705 memory.go:191] Add success.
W0321 15:36:13.410001  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:36:13.410023  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:36:13.410027  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:36:13.419712  543705 net.go:648] Add success.
I0321 15:36:13.422797  543705 net.go:770] primary dev: ETH0
I0321 15:36:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:36:13.422819  543705 net.go:698] Add success.
I0321 15:36:13.469064  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d2ca3ce3-e2ce-4bc0-ac7d-56a3546b884e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:36:13.469095  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:36:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:36:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:36:14.455139  543705 disk_worker.go:708] disk space is not compliant
W0321 15:36:14.455141  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:36:14.456474  543705 disk_worker.go:494] system disk:vda1
I0321 15:36:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:36:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:36:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:36:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:36:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:36:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 15:36:23.409786  543705 memory.go:184] no items to output this cycle
I0321 15:36:30.205672  543705 disk_info.go:125] begin check local disk info of client
I0321 15:36:30.208143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:36:30.208149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dcb80 0xc0003dcbc0]
E0321 15:36:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:36:33.409762  543705 memory.go:184] no items to output this cycle
I0321 15:36:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 15:36:39.005734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:36:39.005740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:36:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:36:43.410635  543705 memory.go:191] Add success.
I0321 15:36:43.409783  543705 cpu.go:282] Add success.
I0321 15:36:43.420328  543705 net.go:648] Add success.
I0321 15:36:43.423087  543705 net.go:770] primary dev: ETH0
I0321 15:36:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:36:43.423113  543705 net.go:698] Add success.
I0321 15:36:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:36:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:36:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:36:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:36:53.409801  543705 memory.go:184] no items to output this cycle
I0321 15:36:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 15:37:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:37:03.409816  543705 memory.go:184] no items to output this cycle
I0321 15:37:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 15:37:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:37:13.409795  543705 memory.go:191] Add success.
I0321 15:37:13.409809  543705 cpu.go:282] Add success.
W0321 15:37:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:37:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:37:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:37:13.420199  543705 net.go:648] Add success.
I0321 15:37:13.423045  543705 net.go:770] primary dev: ETH0
I0321 15:37:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:37:13.423069  543705 net.go:698] Add success.
I0321 15:37:13.453664  543705 event_worker.go:152] Polling the log file for events...
W0321 15:37:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:37:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 15:37:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:37:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:37:14.455884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:37:14.455890  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:37:14.456543  543705 disk_worker.go:494] system disk:vda1
I0321 15:37:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:37:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:37:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:37:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:37:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:37:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:37:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:37:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:37:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:37:23.409793  543705 memory.go:184] no items to output this cycle
I0321 15:37:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 15:37:30.209677  543705 disk_info.go:125] begin check local disk info of client
I0321 15:37:30.212147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:37:30.212154  543705 disk_info.go:196] parse disk info done, disk is : [0xc000585100 0xc000585140]
E0321 15:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:37:33.409794  543705 memory.go:184] no items to output this cycle
I0321 15:37:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 15:37:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:37:43.409788  543705 memory.go:191] Add success.
I0321 15:37:43.409789  543705 cpu.go:282] Add success.
I0321 15:37:43.419999  543705 net.go:648] Add success.
I0321 15:37:43.422652  543705 net.go:770] primary dev: ETH0
I0321 15:37:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:37:43.422677  543705 net.go:698] Add success.
I0321 15:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:37:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:37:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:37:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:37:53.409767  543705 memory.go:184] no items to output this cycle
I0321 15:37:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 15:38:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:38:03.409808  543705 memory.go:184] no items to output this cycle
I0321 15:38:03.409844  543705 cpu.go:275] no items to output this cycle
E0321 15:38:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:38:13.409799  543705 cpu.go:282] Add success.
I0321 15:38:13.409803  543705 memory.go:191] Add success.
W0321 15:38:13.409991  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:38:13.410006  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:38:13.410010  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:38:13.419711  543705 net.go:648] Add success.
I0321 15:38:13.422852  543705 net.go:770] primary dev: ETH0
I0321 15:38:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:38:13.422876  543705 net.go:698] Add success.
I0321 15:38:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:38:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:38:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 15:38:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:38:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 15:38:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:38:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:38:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:38:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:38:23.409792  543705 memory.go:184] no items to output this cycle
I0321 15:38:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 15:38:30.213688  543705 disk_info.go:125] begin check local disk info of client
I0321 15:38:30.216147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:38:30.216153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa00 0xc0001aaa40]
E0321 15:38:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:38:33.409792  543705 memory.go:184] no items to output this cycle
I0321 15:38:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 15:38:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:38:43.409810  543705 memory.go:191] Add success.
I0321 15:38:43.409817  543705 cpu.go:282] Add success.
I0321 15:38:43.419852  543705 net.go:648] Add success.
I0321 15:38:43.422612  543705 net.go:770] primary dev: ETH0
I0321 15:38:43.422625  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:38:43.422637  543705 net.go:698] Add success.
I0321 15:38:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:38:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:38:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:38:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:38:53.409776  543705 memory.go:184] no items to output this cycle
I0321 15:38:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 15:39:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:39:03.409775  543705 memory.go:184] no items to output this cycle
I0321 15:39:03.409849  543705 cpu.go:275] no items to output this cycle
E0321 15:39:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:39:13.409798  543705 memory.go:191] Add success.
I0321 15:39:13.409798  543705 cpu.go:282] Add success.
W0321 15:39:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:39:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:39:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:39:13.420507  543705 net.go:648] Add success.
I0321 15:39:13.423331  543705 net.go:770] primary dev: ETH0
I0321 15:39:13.423344  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:39:13.423356  543705 net.go:698] Add success.
I0321 15:39:13.462724  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"03603555-9713-4578-b0ff-b198f31bbf7c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:39:13.462757  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:39:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:39:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:39:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 15:39:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:39:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 15:39:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:39:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:39:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:39:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:39:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:39:23.409792  543705 memory.go:184] no items to output this cycle
I0321 15:39:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 15:39:30.217674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:39:30.220182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:39:30.220188  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d400 0xc00039d440]
E0321 15:39:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:39:33.409761  543705 memory.go:184] no items to output this cycle
I0321 15:39:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 15:39:39.008734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:39:39.008740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:39:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:39:43.410619  543705 memory.go:191] Add success.
I0321 15:39:43.409786  543705 cpu.go:282] Add success.
I0321 15:39:43.420416  543705 net.go:648] Add success.
I0321 15:39:43.422944  543705 net.go:770] primary dev: ETH0
I0321 15:39:43.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:39:43.422973  543705 net.go:698] Add success.
I0321 15:39:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:39:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:39:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:39:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:39:53.409778  543705 memory.go:184] no items to output this cycle
I0321 15:39:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 15:40:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:40:03.409812  543705 cpu.go:275] no items to output this cycle
I0321 15:40:03.409812  543705 memory.go:184] no items to output this cycle
E0321 15:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:40:13.409799  543705 memory.go:191] Add success.
I0321 15:40:13.409803  543705 cpu.go:282] Add success.
W0321 15:40:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:40:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:40:13.420195  543705 net.go:648] Add success.
I0321 15:40:13.422731  543705 net.go:770] primary dev: ETH0
I0321 15:40:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:40:13.422755  543705 net.go:698] Add success.
I0321 15:40:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:40:14.455073  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:40:14.455135  543705 disk_worker.go:708] disk space is not compliant
W0321 15:40:14.455138  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:40:14.456475  543705 disk_worker.go:494] system disk:vda1
I0321 15:40:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:40:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:40:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:40:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:40:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:40:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:40:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:40:23.409793  543705 memory.go:184] no items to output this cycle
I0321 15:40:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 15:40:30.221674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:40:30.224200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:40:30.224206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2780 0xc0004b27c0]
E0321 15:40:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:40:33.409763  543705 memory.go:184] no items to output this cycle
I0321 15:40:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 15:40:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:40:43.409784  543705 memory.go:191] Add success.
I0321 15:40:43.409807  543705 cpu.go:282] Add success.
I0321 15:40:43.419841  543705 net.go:648] Add success.
I0321 15:40:43.423043  543705 net.go:770] primary dev: ETH0
I0321 15:40:43.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:40:43.423068  543705 net.go:698] Add success.
I0321 15:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:40:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:40:53.409763  543705 memory.go:184] no items to output this cycle
I0321 15:40:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 15:41:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:41:03.409792  543705 memory.go:184] no items to output this cycle
I0321 15:41:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 15:41:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:41:13.409798  543705 memory.go:191] Add success.
I0321 15:41:13.409800  543705 cpu.go:282] Add success.
W0321 15:41:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:41:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:41:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:41:13.420636  543705 net.go:648] Add success.
I0321 15:41:13.422960  543705 net.go:770] primary dev: ETH0
I0321 15:41:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:41:13.422985  543705 net.go:698] Add success.
I0321 15:41:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:41:14.455082  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:41:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0321 15:41:14.455146  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:41:14.456471  543705 disk_worker.go:494] system disk:vda1
I0321 15:41:14.456512  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:41:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:41:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:41:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:41:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:41:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:41:23.409767  543705 memory.go:184] no items to output this cycle
I0321 15:41:23.409770  543705 cpu.go:275] no items to output this cycle
I0321 15:41:30.225676  543705 disk_info.go:125] begin check local disk info of client
I0321 15:41:30.228162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:41:30.228168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f49c0 0xc0003f4a00]
E0321 15:41:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:41:33.409797  543705 memory.go:184] no items to output this cycle
I0321 15:41:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 15:41:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:41:43.409810  543705 memory.go:191] Add success.
I0321 15:41:43.409819  543705 cpu.go:282] Add success.
I0321 15:41:43.420126  543705 net.go:648] Add success.
I0321 15:41:43.423039  543705 net.go:770] primary dev: ETH0
I0321 15:41:43.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:41:43.423067  543705 net.go:698] Add success.
I0321 15:41:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:41:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:41:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:41:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:41:53.409764  543705 memory.go:184] no items to output this cycle
I0321 15:41:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 15:42:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:42:03.409785  543705 memory.go:184] no items to output this cycle
I0321 15:42:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 15:42:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:42:13.409785  543705 memory.go:191] Add success.
W0321 15:42:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 15:42:13.409814  543705 cpu.go:282] Add success.
W0321 15:42:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:42:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:42:13.420130  543705 net.go:648] Add success.
I0321 15:42:13.422873  543705 net.go:770] primary dev: ETH0
I0321 15:42:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:42:13.422903  543705 net.go:698] Add success.
I0321 15:42:13.529835  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"32c99884-17c7-4d6e-aca3-5c28c7b614b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:42:13.529877  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 15:42:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:42:14.455435  543705 disk_worker.go:708] disk space is not compliant
W0321 15:42:14.455440  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:42:14.456177  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:42:14.456185  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:42:14.456191  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:42:14.457714  543705 disk_worker.go:494] system disk:vda1
I0321 15:42:14.457744  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:42:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:42:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:42:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:42:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:42:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:42:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:42:16.472304  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:42:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:42:23.409767  543705 memory.go:184] no items to output this cycle
I0321 15:42:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 15:42:30.229675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:42:30.232127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:42:30.232133  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052a380 0xc00052a3c0]
E0321 15:42:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:42:33.409791  543705 memory.go:184] no items to output this cycle
I0321 15:42:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 15:42:39.009728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:42:39.009735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:42:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:42:43.410656  543705 memory.go:191] Add success.
I0321 15:42:43.409800  543705 cpu.go:282] Add success.
I0321 15:42:43.420359  543705 net.go:648] Add success.
I0321 15:42:43.422860  543705 net.go:770] primary dev: ETH0
I0321 15:42:43.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:42:43.422887  543705 net.go:698] Add success.
I0321 15:42:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:42:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:42:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:42:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:42:53.409767  543705 memory.go:184] no items to output this cycle
I0321 15:42:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 15:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:43:03.409788  543705 memory.go:184] no items to output this cycle
I0321 15:43:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 15:43:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:43:13.409800  543705 memory.go:191] Add success.
I0321 15:43:13.409801  543705 cpu.go:282] Add success.
W0321 15:43:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:43:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:43:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:43:13.420265  543705 net.go:648] Add success.
I0321 15:43:13.423141  543705 net.go:770] primary dev: ETH0
I0321 15:43:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:43:13.423167  543705 net.go:698] Add success.
I0321 15:43:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:43:14.455280  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:43:14.455295  543705 disk_worker.go:708] disk space is not compliant
W0321 15:43:14.455299  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:43:14.457186  543705 disk_worker.go:494] system disk:vda1
I0321 15:43:14.457226  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:43:15.456012  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:43:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:43:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:43:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:43:23.409794  543705 memory.go:184] no items to output this cycle
I0321 15:43:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 15:43:30.233674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:43:30.236152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:43:30.236159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab2c0 0xc0001ab300]
E0321 15:43:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:43:33.409789  543705 memory.go:184] no items to output this cycle
I0321 15:43:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 15:43:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:43:43.409815  543705 memory.go:191] Add success.
I0321 15:43:43.409820  543705 cpu.go:282] Add success.
I0321 15:43:43.419891  543705 net.go:648] Add success.
I0321 15:43:43.423179  543705 net.go:770] primary dev: ETH0
I0321 15:43:43.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:43:43.423204  543705 net.go:698] Add success.
I0321 15:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:43:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:43:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:43:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:43:53.409769  543705 memory.go:184] no items to output this cycle
I0321 15:43:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 15:44:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:44:03.409767  543705 memory.go:184] no items to output this cycle
I0321 15:44:03.409844  543705 cpu.go:275] no items to output this cycle
E0321 15:44:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:44:13.409799  543705 memory.go:191] Add success.
I0321 15:44:13.409799  543705 cpu.go:282] Add success.
W0321 15:44:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:44:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:44:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:44:13.420049  543705 net.go:648] Add success.
I0321 15:44:13.422744  543705 net.go:770] primary dev: ETH0
I0321 15:44:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:44:13.422769  543705 net.go:698] Add success.
I0321 15:44:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:44:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:44:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 15:44:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:44:14.456781  543705 disk_worker.go:494] system disk:vda1
I0321 15:44:14.456810  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:44:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:44:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:44:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:44:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:44:23.409771  543705 memory.go:184] no items to output this cycle
I0321 15:44:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 15:44:30.237672  543705 disk_info.go:125] begin check local disk info of client
I0321 15:44:30.240215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:44:30.240221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000535000 0xc000535040]
E0321 15:44:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:44:33.409799  543705 memory.go:184] no items to output this cycle
I0321 15:44:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 15:44:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:44:43.409779  543705 memory.go:191] Add success.
I0321 15:44:43.409826  543705 cpu.go:282] Add success.
I0321 15:44:43.419846  543705 net.go:648] Add success.
I0321 15:44:43.422455  543705 net.go:770] primary dev: ETH0
I0321 15:44:43.422468  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:44:43.422480  543705 net.go:698] Add success.
I0321 15:44:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:44:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:44:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:44:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:44:53.409796  543705 cpu.go:275] no items to output this cycle
I0321 15:44:53.409803  543705 memory.go:184] no items to output this cycle
E0321 15:45:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:45:03.409792  543705 memory.go:184] no items to output this cycle
I0321 15:45:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:45:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:45:13.409821  543705 memory.go:191] Add success.
I0321 15:45:13.409830  543705 cpu.go:282] Add success.
W0321 15:45:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:45:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:45:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:45:13.420064  543705 net.go:648] Add success.
I0321 15:45:13.422691  543705 net.go:770] primary dev: ETH0
I0321 15:45:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:45:13.422715  543705 net.go:698] Add success.
I0321 15:45:13.468126  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ac4db95a-b425-4f86-976e-da354d91e64a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:45:13.468161  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:45:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:45:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:45:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 15:45:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:45:14.456858  543705 disk_worker.go:494] system disk:vda1
I0321 15:45:14.456887  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:45:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:45:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:45:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:45:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:45:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:45:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:45:23.409773  543705 memory.go:184] no items to output this cycle
I0321 15:45:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 15:45:30.241675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:45:30.244193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:45:30.244200  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a880 0xc00035a8c0]
E0321 15:45:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:45:33.409777  543705 memory.go:184] no items to output this cycle
I0321 15:45:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 15:45:39.012748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:45:39.012755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:45:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:45:43.410623  543705 memory.go:191] Add success.
I0321 15:45:43.409805  543705 cpu.go:282] Add success.
I0321 15:45:43.420333  543705 net.go:648] Add success.
I0321 15:45:43.423178  543705 net.go:770] primary dev: ETH0
I0321 15:45:43.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:45:43.423204  543705 net.go:698] Add success.
I0321 15:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:45:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:45:53.409802  543705 memory.go:184] no items to output this cycle
I0321 15:45:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 15:46:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:46:03.409779  543705 memory.go:184] no items to output this cycle
I0321 15:46:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 15:46:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:46:13.409786  543705 memory.go:191] Add success.
W0321 15:46:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 15:46:13.409820  543705 cpu.go:282] Add success.
W0321 15:46:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:46:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:46:13.420155  543705 net.go:648] Add success.
I0321 15:46:13.422988  543705 net.go:770] primary dev: ETH0
I0321 15:46:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:46:13.423016  543705 net.go:698] Add success.
I0321 15:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:46:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:46:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 15:46:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:46:14.457046  543705 disk_worker.go:494] system disk:vda1
I0321 15:46:14.457077  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:46:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:46:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:46:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:46:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:46:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:46:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:46:23.409804  543705 memory.go:184] no items to output this cycle
I0321 15:46:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 15:46:30.245674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:46:30.248165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:46:30.248171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b53c0 0xc0002b5400]
E0321 15:46:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:46:33.409797  543705 memory.go:184] no items to output this cycle
I0321 15:46:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 15:46:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:46:43.409790  543705 memory.go:191] Add success.
I0321 15:46:43.409791  543705 cpu.go:282] Add success.
I0321 15:46:43.419986  543705 net.go:648] Add success.
I0321 15:46:43.422529  543705 net.go:770] primary dev: ETH0
I0321 15:46:43.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:46:43.422555  543705 net.go:698] Add success.
I0321 15:46:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:46:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:46:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:46:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:46:53.409801  543705 memory.go:184] no items to output this cycle
I0321 15:46:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 15:47:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:47:03.409790  543705 memory.go:184] no items to output this cycle
I0321 15:47:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 15:47:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:47:13.409798  543705 memory.go:191] Add success.
I0321 15:47:13.409801  543705 cpu.go:282] Add success.
W0321 15:47:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:47:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:47:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:47:13.420051  543705 net.go:648] Add success.
I0321 15:47:13.422818  543705 net.go:770] primary dev: ETH0
I0321 15:47:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:47:13.422842  543705 net.go:698] Add success.
I0321 15:47:13.453382  543705 event_worker.go:152] Polling the log file for events...
W0321 15:47:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:47:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 15:47:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:47:14.456889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:47:14.456898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:47:14.456904  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:47:14.456954  543705 disk_worker.go:494] system disk:vda1
I0321 15:47:14.456996  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:47:15.456881  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:47:15.456890  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:47:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:47:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:47:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:47:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:47:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:47:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:47:23.409776  543705 memory.go:184] no items to output this cycle
I0321 15:47:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 15:47:30.249674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:47:30.252145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:47:30.252151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b55c0 0xc0002b5600]
E0321 15:47:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:47:33.409767  543705 memory.go:184] no items to output this cycle
I0321 15:47:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 15:47:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:47:43.409825  543705 memory.go:191] Add success.
I0321 15:47:43.409831  543705 cpu.go:282] Add success.
I0321 15:47:43.419951  543705 net.go:648] Add success.
I0321 15:47:43.423262  543705 net.go:770] primary dev: ETH0
I0321 15:47:43.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:47:43.423287  543705 net.go:698] Add success.
I0321 15:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:47:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:47:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:47:53.409782  543705 memory.go:184] no items to output this cycle
I0321 15:47:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:48:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:48:03.409794  543705 memory.go:184] no items to output this cycle
I0321 15:48:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 15:48:13.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:48:13.410407  543705 memory.go:191] Add success.
I0321 15:48:13.410425  543705 cpu.go:282] Add success.
W0321 15:48:13.410434  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:48:13.410446  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:48:13.410449  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:48:13.420691  543705 net.go:648] Add success.
I0321 15:48:13.423454  543705 net.go:770] primary dev: ETH0
I0321 15:48:13.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:48:13.423482  543705 net.go:698] Add success.
I0321 15:48:13.463365  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1530f912-c318-4bad-a2b8-6a90b205bf7e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:48:13.463398  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:48:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:48:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:48:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 15:48:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:48:14.456620  543705 disk_worker.go:494] system disk:vda1
I0321 15:48:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:48:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:48:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:48:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:48:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:48:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:48:23.409800  543705 memory.go:184] no items to output this cycle
I0321 15:48:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 15:48:30.253678  543705 disk_info.go:125] begin check local disk info of client
I0321 15:48:30.256125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:48:30.256132  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0321 15:48:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:48:33.409765  543705 memory.go:184] no items to output this cycle
I0321 15:48:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 15:48:39.013730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:48:39.013736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:48:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:48:43.410704  543705 memory.go:191] Add success.
I0321 15:48:43.409793  543705 cpu.go:282] Add success.
I0321 15:48:43.420505  543705 net.go:648] Add success.
I0321 15:48:43.423250  543705 net.go:770] primary dev: ETH0
I0321 15:48:43.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:48:43.423280  543705 net.go:698] Add success.
I0321 15:48:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:48:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:48:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:48:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:48:53.409784  543705 memory.go:184] no items to output this cycle
I0321 15:48:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 15:49:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:49:03.409781  543705 memory.go:184] no items to output this cycle
I0321 15:49:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 15:49:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:49:13.409794  543705 memory.go:191] Add success.
I0321 15:49:13.409795  543705 cpu.go:282] Add success.
W0321 15:49:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:49:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:49:13.420045  543705 net.go:648] Add success.
I0321 15:49:13.423040  543705 net.go:770] primary dev: ETH0
I0321 15:49:13.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:49:13.423065  543705 net.go:698] Add success.
I0321 15:49:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:49:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:49:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 15:49:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:49:14.456528  543705 disk_worker.go:494] system disk:vda1
I0321 15:49:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:49:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:49:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:49:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:49:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:49:23.409764  543705 memory.go:184] no items to output this cycle
I0321 15:49:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 15:49:30.257675  543705 disk_info.go:125] begin check local disk info of client
I0321 15:49:30.260138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:49:30.260145  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025e000 0xc00025e040]
E0321 15:49:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:49:33.409761  543705 memory.go:184] no items to output this cycle
I0321 15:49:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 15:49:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:49:43.409810  543705 memory.go:191] Add success.
I0321 15:49:43.409830  543705 cpu.go:282] Add success.
I0321 15:49:43.419914  543705 net.go:648] Add success.
I0321 15:49:43.422573  543705 net.go:770] primary dev: ETH0
I0321 15:49:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:49:43.422606  543705 net.go:698] Add success.
I0321 15:49:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:49:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:49:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:49:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:49:53.409775  543705 memory.go:184] no items to output this cycle
I0321 15:49:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 15:50:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:50:03.409800  543705 memory.go:184] no items to output this cycle
I0321 15:50:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 15:50:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:50:13.409802  543705 memory.go:191] Add success.
I0321 15:50:13.409802  543705 cpu.go:282] Add success.
W0321 15:50:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:50:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:50:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:50:13.420220  543705 net.go:648] Add success.
I0321 15:50:13.423322  543705 net.go:770] primary dev: ETH0
I0321 15:50:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:50:13.423345  543705 net.go:698] Add success.
I0321 15:50:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:50:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:50:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 15:50:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:50:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 15:50:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:50:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:50:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:50:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:50:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:50:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:50:23.409765  543705 memory.go:184] no items to output this cycle
I0321 15:50:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 15:50:30.261682  543705 disk_info.go:125] begin check local disk info of client
I0321 15:50:30.264141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:50:30.264148  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5840 0xc0002a5880]
E0321 15:50:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:50:33.409846  543705 memory.go:184] no items to output this cycle
I0321 15:50:33.409922  543705 cpu.go:275] no items to output this cycle
E0321 15:50:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:50:43.409793  543705 memory.go:191] Add success.
I0321 15:50:43.409810  543705 cpu.go:282] Add success.
I0321 15:50:43.419701  543705 net.go:770] primary dev: ETH0
I0321 15:50:43.419713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:50:43.419725  543705 net.go:698] Add success.
I0321 15:50:43.419951  543705 net.go:648] Add success.
I0321 15:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:50:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:50:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:50:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:50:53.409778  543705 memory.go:184] no items to output this cycle
I0321 15:50:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 15:51:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:51:03.409788  543705 memory.go:184] no items to output this cycle
I0321 15:51:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 15:51:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:51:13.409798  543705 memory.go:191] Add success.
I0321 15:51:13.409805  543705 cpu.go:282] Add success.
W0321 15:51:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:51:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:51:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:51:13.420159  543705 net.go:648] Add success.
I0321 15:51:13.422612  543705 net.go:770] primary dev: ETH0
I0321 15:51:13.422628  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:51:13.422652  543705 net.go:698] Add success.
I0321 15:51:13.463389  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"08f8a353-a2e5-4369-a201-9ab97bc0b5b7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:51:13.463422  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:51:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:51:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:51:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 15:51:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:51:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 15:51:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:51:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:51:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:51:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:51:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:51:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:51:23.409844  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:51:23.409869  543705 memory.go:184] no items to output this cycle
I0321 15:51:23.409880  543705 cpu.go:275] no items to output this cycle
I0321 15:51:30.265680  543705 disk_info.go:125] begin check local disk info of client
I0321 15:51:30.268159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:51:30.268165  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb40 0xc00007bb80]
E0321 15:51:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:51:33.409775  543705 memory.go:184] no items to output this cycle
I0321 15:51:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 15:51:39.013875  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:51:39.013881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:51:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:51:43.410577  543705 memory.go:191] Add success.
I0321 15:51:43.409798  543705 cpu.go:282] Add success.
I0321 15:51:43.420265  543705 net.go:648] Add success.
I0321 15:51:43.422899  543705 net.go:770] primary dev: ETH0
I0321 15:51:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:51:43.422925  543705 net.go:698] Add success.
I0321 15:51:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:51:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:51:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:51:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:51:53.409787  543705 memory.go:184] no items to output this cycle
I0321 15:51:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 15:52:03.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:52:03.409821  543705 memory.go:184] no items to output this cycle
I0321 15:52:03.409835  543705 cpu.go:275] no items to output this cycle
E0321 15:52:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:52:13.409821  543705 memory.go:191] Add success.
I0321 15:52:13.409825  543705 cpu.go:282] Add success.
W0321 15:52:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:52:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:52:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:52:13.420080  543705 net.go:648] Add success.
I0321 15:52:13.422839  543705 net.go:770] primary dev: ETH0
I0321 15:52:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:52:13.422866  543705 net.go:698] Add success.
W0321 15:52:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:52:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 15:52:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0321 15:52:14.455931  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:52:14.455939  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:52:14.455945  543705 custom_config.go:64] query custom config with name: gpu
I0321 15:52:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 15:52:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:52:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:52:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:52:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:52:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:52:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:52:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:52:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:52:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:52:23.409802  543705 memory.go:184] no items to output this cycle
I0321 15:52:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 15:52:30.269674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:52:30.272136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:52:30.272143  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ccc0 0xc00039cd00]
E0321 15:52:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:52:33.409777  543705 memory.go:184] no items to output this cycle
I0321 15:52:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 15:52:43.410689  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:52:43.410711  543705 memory.go:191] Add success.
I0321 15:52:43.410715  543705 cpu.go:282] Add success.
I0321 15:52:43.420259  543705 net.go:648] Add success.
I0321 15:52:43.422886  543705 net.go:770] primary dev: ETH0
I0321 15:52:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:52:43.422914  543705 net.go:698] Add success.
I0321 15:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:52:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:52:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:52:53.409789  543705 memory.go:184] no items to output this cycle
I0321 15:52:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 15:53:03.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:53:03.409827  543705 memory.go:184] no items to output this cycle
I0321 15:53:03.409835  543705 cpu.go:275] no items to output this cycle
E0321 15:53:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:53:13.409812  543705 memory.go:191] Add success.
I0321 15:53:13.409815  543705 cpu.go:282] Add success.
W0321 15:53:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:53:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:53:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:53:13.420279  543705 net.go:648] Add success.
I0321 15:53:13.422819  543705 net.go:770] primary dev: ETH0
I0321 15:53:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:53:13.422849  543705 net.go:698] Add success.
I0321 15:53:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:53:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:53:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 15:53:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:53:14.456628  543705 disk_worker.go:494] system disk:vda1
I0321 15:53:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:53:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:53:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:53:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:53:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:53:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:53:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:53:23.409778  543705 memory.go:184] no items to output this cycle
I0321 15:53:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 15:53:30.273676  543705 disk_info.go:125] begin check local disk info of client
I0321 15:53:30.276175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:53:30.276181  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ca40 0xc00039ca80]
E0321 15:53:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:53:33.409773  543705 memory.go:184] no items to output this cycle
I0321 15:53:33.409774  543705 cpu.go:275] no items to output this cycle
E0321 15:53:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:53:43.409784  543705 memory.go:191] Add success.
I0321 15:53:43.409816  543705 cpu.go:282] Add success.
I0321 15:53:43.420040  543705 net.go:648] Add success.
I0321 15:53:43.422655  543705 net.go:770] primary dev: ETH0
I0321 15:53:43.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:53:43.422679  543705 net.go:698] Add success.
I0321 15:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:53:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:53:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:53:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:53:53.409797  543705 memory.go:184] no items to output this cycle
I0321 15:53:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 15:54:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:54:03.409770  543705 memory.go:184] no items to output this cycle
I0321 15:54:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:54:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:54:13.409786  543705 memory.go:191] Add success.
W0321 15:54:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 15:54:13.409814  543705 cpu.go:282] Add success.
W0321 15:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:54:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:54:13.420222  543705 net.go:648] Add success.
I0321 15:54:13.423034  543705 net.go:770] primary dev: ETH0
I0321 15:54:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:54:13.423060  543705 net.go:698] Add success.
I0321 15:54:13.469150  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1af8c298-05e3-40bc-8d85-e5ba8e02e200","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:54:13.469183  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 15:54:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:54:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:54:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 15:54:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:54:14.456723  543705 disk_worker.go:494] system disk:vda1
I0321 15:54:14.456755  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:54:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:54:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:54:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:54:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:54:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:54:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:54:23.409798  543705 memory.go:184] no items to output this cycle
I0321 15:54:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 15:54:30.277676  543705 disk_info.go:125] begin check local disk info of client
I0321 15:54:30.280143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:54:30.280149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039cac0 0xc00039cb00]
E0321 15:54:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:54:33.409761  543705 memory.go:184] no items to output this cycle
I0321 15:54:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 15:54:39.016769  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:54:39.016776  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:54:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:54:43.410907  543705 memory.go:191] Add success.
I0321 15:54:43.409821  543705 cpu.go:282] Add success.
I0321 15:54:43.420653  543705 net.go:648] Add success.
I0321 15:54:43.423322  543705 net.go:770] primary dev: ETH0
I0321 15:54:43.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:54:43.423349  543705 net.go:698] Add success.
I0321 15:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:54:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:54:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:54:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:54:53.409773  543705 memory.go:184] no items to output this cycle
I0321 15:54:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 15:55:03.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:55:03.409832  543705 memory.go:184] no items to output this cycle
I0321 15:55:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 15:55:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:55:13.409801  543705 memory.go:191] Add success.
I0321 15:55:13.409801  543705 cpu.go:282] Add success.
W0321 15:55:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:55:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:55:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:55:13.420157  543705 net.go:648] Add success.
I0321 15:55:13.422814  543705 net.go:770] primary dev: ETH0
I0321 15:55:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:55:13.422840  543705 net.go:698] Add success.
I0321 15:55:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:55:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:55:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 15:55:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:55:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 15:55:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:55:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:55:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:55:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:55:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:55:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:55:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:55:23.409767  543705 memory.go:184] no items to output this cycle
I0321 15:55:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 15:55:30.281671  543705 disk_info.go:125] begin check local disk info of client
I0321 15:55:30.284148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:55:30.284154  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382c80 0xc000382cc0]
E0321 15:55:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:55:33.409760  543705 memory.go:184] no items to output this cycle
I0321 15:55:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 15:55:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:55:43.409788  543705 memory.go:191] Add success.
I0321 15:55:43.409789  543705 cpu.go:282] Add success.
I0321 15:55:43.419845  543705 net.go:648] Add success.
I0321 15:55:43.422543  543705 net.go:770] primary dev: ETH0
I0321 15:55:43.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:55:43.422570  543705 net.go:698] Add success.
I0321 15:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:55:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:55:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:55:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:55:53.409777  543705 memory.go:184] no items to output this cycle
I0321 15:55:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 15:56:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:56:03.409792  543705 memory.go:184] no items to output this cycle
I0321 15:56:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 15:56:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:56:13.409795  543705 memory.go:191] Add success.
I0321 15:56:13.409796  543705 cpu.go:282] Add success.
W0321 15:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:56:13.420253  543705 net.go:648] Add success.
I0321 15:56:13.422910  543705 net.go:770] primary dev: ETH0
I0321 15:56:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:56:13.422944  543705 net.go:698] Add success.
I0321 15:56:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:56:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:56:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 15:56:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:56:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 15:56:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:56:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:56:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:56:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:56:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:56:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:56:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:56:23.409794  543705 memory.go:184] no items to output this cycle
I0321 15:56:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 15:56:30.285673  543705 disk_info.go:125] begin check local disk info of client
I0321 15:56:30.288169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:56:30.288175  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051d6c0 0xc00051d700]
E0321 15:56:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:56:33.409759  543705 memory.go:184] no items to output this cycle
I0321 15:56:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 15:56:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:56:43.409779  543705 memory.go:191] Add success.
I0321 15:56:43.409803  543705 cpu.go:282] Add success.
I0321 15:56:43.419989  543705 net.go:648] Add success.
I0321 15:56:43.422736  543705 net.go:770] primary dev: ETH0
I0321 15:56:43.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:56:43.422761  543705 net.go:698] Add success.
I0321 15:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:56:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:56:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:56:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:56:53.409780  543705 memory.go:184] no items to output this cycle
I0321 15:56:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 15:57:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 15:57:03.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:57:03.409820  543705 memory.go:184] no items to output this cycle
E0321 15:57:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:57:13.409825  543705 memory.go:191] Add success.
I0321 15:57:13.409829  543705 cpu.go:282] Add success.
W0321 15:57:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:57:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:57:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:57:13.420155  543705 net.go:648] Add success.
I0321 15:57:13.428898  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 15:57:13.428975  543705 net.go:770] primary dev: ETH0
I0321 15:57:13.428988  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:57:13.429000  543705 net.go:698] Add success.
I0321 15:57:13.453561  543705 event_worker.go:152] Polling the log file for events...
I0321 15:57:13.468852  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fce1f6fa-6f0c-4d08-adec-8cb383d566ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 15:57:13.468886  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 15:57:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:57:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 15:57:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:57:14.456800  543705 disk_worker.go:494] system disk:vda1
I0321 15:57:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 15:57:14.457094  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 15:57:14.457103  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 15:57:14.457107  543705 custom_config.go:64] query custom config with name: gpu
E0321 15:57:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 15:57:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:57:16.457909  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 15:57:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 15:57:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:57:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:57:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:57:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:57:23.409798  543705 memory.go:184] no items to output this cycle
I0321 15:57:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 15:57:30.289674  543705 disk_info.go:125] begin check local disk info of client
I0321 15:57:30.292119  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:57:30.292125  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc40 0xc0001abc80]
E0321 15:57:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:57:33.409774  543705 cpu.go:275] no items to output this cycle
I0321 15:57:33.409779  543705 memory.go:184] no items to output this cycle
I0321 15:57:39.017730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 15:57:39.017737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 15:57:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:57:43.410803  543705 memory.go:191] Add success.
I0321 15:57:43.409820  543705 cpu.go:282] Add success.
I0321 15:57:43.420487  543705 net.go:648] Add success.
I0321 15:57:43.423441  543705 net.go:770] primary dev: ETH0
I0321 15:57:43.423456  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:57:43.423471  543705 net.go:698] Add success.
I0321 15:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:57:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:57:53.410225  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:57:53.410241  543705 memory.go:184] no items to output this cycle
I0321 15:57:53.410269  543705 cpu.go:275] no items to output this cycle
E0321 15:58:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:58:03.409776  543705 memory.go:184] no items to output this cycle
I0321 15:58:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 15:58:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:58:13.409814  543705 memory.go:191] Add success.
I0321 15:58:13.409819  543705 cpu.go:282] Add success.
W0321 15:58:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:58:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:58:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:58:13.420269  543705 net.go:648] Add success.
I0321 15:58:13.422962  543705 net.go:770] primary dev: ETH0
I0321 15:58:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:58:13.422995  543705 net.go:698] Add success.
I0321 15:58:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:58:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:58:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 15:58:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:58:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 15:58:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:58:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:58:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:58:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:58:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:58:23.409822  543705 memory.go:184] no items to output this cycle
I0321 15:58:23.409829  543705 cpu.go:275] no items to output this cycle
I0321 15:58:30.293679  543705 disk_info.go:125] begin check local disk info of client
I0321 15:58:30.296194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:58:30.296201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c540 0xc00039c580]
E0321 15:58:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:58:33.409775  543705 memory.go:184] no items to output this cycle
I0321 15:58:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 15:58:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:58:43.409795  543705 memory.go:191] Add success.
I0321 15:58:43.409798  543705 cpu.go:282] Add success.
I0321 15:58:43.419878  543705 net.go:648] Add success.
I0321 15:58:43.422525  543705 net.go:770] primary dev: ETH0
I0321 15:58:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:58:43.422553  543705 net.go:698] Add success.
I0321 15:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:58:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:58:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:58:53.409774  543705 memory.go:184] no items to output this cycle
I0321 15:58:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 15:59:03.409939  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:59:03.409991  543705 memory.go:184] no items to output this cycle
I0321 15:59:03.410002  543705 cpu.go:275] no items to output this cycle
E0321 15:59:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:59:13.409807  543705 memory.go:191] Add success.
I0321 15:59:13.409810  543705 cpu.go:282] Add success.
W0321 15:59:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 15:59:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 15:59:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 15:59:13.419966  543705 net.go:770] primary dev: ETH0
I0321 15:59:13.419980  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:59:13.419992  543705 net.go:698] Add success.
I0321 15:59:13.420230  543705 net.go:648] Add success.
I0321 15:59:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 15:59:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 15:59:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 15:59:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 15:59:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 15:59:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 15:59:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 15:59:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 15:59:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 15:59:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:59:23.409776  543705 memory.go:184] no items to output this cycle
I0321 15:59:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 15:59:30.297672  543705 disk_info.go:125] begin check local disk info of client
I0321 15:59:30.300232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 15:59:30.300238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5440 0xc0000c5480]
E0321 15:59:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:59:33.409781  543705 memory.go:184] no items to output this cycle
I0321 15:59:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 15:59:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:59:43.409794  543705 memory.go:191] Add success.
I0321 15:59:43.409799  543705 cpu.go:282] Add success.
I0321 15:59:43.419863  543705 net.go:648] Add success.
I0321 15:59:43.422805  543705 net.go:770] primary dev: ETH0
I0321 15:59:43.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0321 15:59:43.422831  543705 net.go:698] Add success.
I0321 15:59:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 15:59:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 15:59:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 15:59:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 15:59:53.409764  543705 memory.go:184] no items to output this cycle
I0321 15:59:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 16:00:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:00:03.409776  543705 memory.go:184] no items to output this cycle
I0321 16:00:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 16:00:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:00:13.409803  543705 cpu.go:282] Add success.
I0321 16:00:13.409811  543705 memory.go:191] Add success.
W0321 16:00:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:00:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:00:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:00:13.420078  543705 net.go:648] Add success.
I0321 16:00:13.422793  543705 net.go:770] primary dev: ETH0
I0321 16:00:13.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:00:13.422820  543705 net.go:698] Add success.
I0321 16:00:13.468582  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"86215beb-9298-488c-aa0e-9a22ff9227d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:00:13.468619  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:00:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:00:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:00:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 16:00:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:00:14.456531  543705 disk_worker.go:494] system disk:vda1
I0321 16:00:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:00:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:00:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:00:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:00:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:00:23.409770  543705 memory.go:184] no items to output this cycle
I0321 16:00:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 16:00:30.301673  543705 disk_info.go:125] begin check local disk info of client
I0321 16:00:30.304251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:00:30.304258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481c00 0xc000481c40]
E0321 16:00:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:00:33.409767  543705 memory.go:184] no items to output this cycle
I0321 16:00:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 16:00:39.017882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:00:39.017891  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:00:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:00:43.410730  543705 memory.go:191] Add success.
I0321 16:00:43.409821  543705 cpu.go:282] Add success.
I0321 16:00:43.420407  543705 net.go:648] Add success.
I0321 16:00:43.423259  543705 net.go:770] primary dev: ETH0
I0321 16:00:43.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:00:43.423441  543705 net.go:698] Add success.
I0321 16:00:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:00:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:00:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:00:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:00:53.409797  543705 memory.go:184] no items to output this cycle
I0321 16:00:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:01:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:01:03.409797  543705 memory.go:184] no items to output this cycle
I0321 16:01:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 16:01:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:01:13.409823  543705 memory.go:191] Add success.
I0321 16:01:13.409829  543705 cpu.go:282] Add success.
W0321 16:01:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:01:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:01:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:01:13.420188  543705 net.go:648] Add success.
I0321 16:01:13.423124  543705 net.go:770] primary dev: ETH0
I0321 16:01:13.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:01:13.423153  543705 net.go:698] Add success.
I0321 16:01:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:01:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:01:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 16:01:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:01:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 16:01:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:01:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:01:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:01:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:01:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:01:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:01:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:01:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 16:01:23.409783  543705 memory.go:184] no items to output this cycle
I0321 16:01:30.305674  543705 disk_info.go:125] begin check local disk info of client
I0321 16:01:30.308186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:01:30.308193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab840 0xc0001ab880]
E0321 16:01:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:01:33.409773  543705 memory.go:184] no items to output this cycle
I0321 16:01:33.409777  543705 cpu.go:275] no items to output this cycle
E0321 16:01:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:01:43.409820  543705 memory.go:191] Add success.
I0321 16:01:43.409827  543705 cpu.go:282] Add success.
I0321 16:01:43.419982  543705 net.go:648] Add success.
I0321 16:01:43.423080  543705 net.go:770] primary dev: ETH0
I0321 16:01:43.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:01:43.423304  543705 net.go:698] Add success.
I0321 16:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:01:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:01:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:01:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:01:53.410259  543705 memory.go:184] no items to output this cycle
I0321 16:01:53.410263  543705 cpu.go:275] no items to output this cycle
E0321 16:02:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:02:03.409780  543705 memory.go:184] no items to output this cycle
I0321 16:02:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 16:02:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:02:13.409795  543705 memory.go:191] Add success.
I0321 16:02:13.409804  543705 cpu.go:282] Add success.
W0321 16:02:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:02:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:02:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:02:13.420094  543705 net.go:648] Add success.
I0321 16:02:13.422800  543705 net.go:770] primary dev: ETH0
I0321 16:02:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:02:13.422827  543705 net.go:698] Add success.
W0321 16:02:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:02:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 16:02:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:02:14.456957  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:02:14.456966  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:02:14.456973  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:02:14.457020  543705 disk_worker.go:494] system disk:vda1
I0321 16:02:14.457062  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:02:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:02:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:02:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:02:16.457994  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:02:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:02:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:02:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:02:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:02:23.409770  543705 memory.go:184] no items to output this cycle
I0321 16:02:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 16:02:30.309672  543705 disk_info.go:125] begin check local disk info of client
I0321 16:02:30.312168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:02:30.312174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003906c0 0xc000390700]
E0321 16:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:02:33.409777  543705 cpu.go:275] no items to output this cycle
I0321 16:02:33.409786  543705 memory.go:184] no items to output this cycle
E0321 16:02:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:02:43.409792  543705 memory.go:191] Add success.
I0321 16:02:43.409800  543705 cpu.go:282] Add success.
I0321 16:02:43.419862  543705 net.go:648] Add success.
I0321 16:02:43.422624  543705 net.go:770] primary dev: ETH0
I0321 16:02:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:02:43.422648  543705 net.go:698] Add success.
I0321 16:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:02:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:02:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:02:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:02:53.409793  543705 memory.go:184] no items to output this cycle
I0321 16:02:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:03:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:03:03.409771  543705 memory.go:184] no items to output this cycle
I0321 16:03:03.409825  543705 cpu.go:275] no items to output this cycle
E0321 16:03:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:03:13.409794  543705 memory.go:191] Add success.
I0321 16:03:13.409794  543705 cpu.go:282] Add success.
W0321 16:03:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:03:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:03:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:03:13.420126  543705 net.go:648] Add success.
I0321 16:03:13.422900  543705 net.go:770] primary dev: ETH0
I0321 16:03:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:03:13.422928  543705 net.go:698] Add success.
I0321 16:03:13.468811  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d98e2a5d-4076-43b7-8887-2c484bc92fd3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:03:13.468854  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:03:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:03:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:03:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 16:03:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:03:14.456530  543705 disk_worker.go:494] system disk:vda1
I0321 16:03:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:03:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:03:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:03:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:03:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:03:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:03:23.409776  543705 memory.go:184] no items to output this cycle
I0321 16:03:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 16:03:30.313672  543705 disk_info.go:125] begin check local disk info of client
I0321 16:03:30.316192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:03:30.316199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c00 0xc0000c5c40]
E0321 16:03:33.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:03:33.409756  543705 memory.go:184] no items to output this cycle
I0321 16:03:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 16:03:39.018031  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:03:39.018039  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:03:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:03:43.409801  543705 cpu.go:282] Add success.
I0321 16:03:43.410723  543705 memory.go:191] Add success.
I0321 16:03:43.420882  543705 net.go:648] Add success.
I0321 16:03:43.423359  543705 net.go:770] primary dev: ETH0
I0321 16:03:43.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:03:43.423386  543705 net.go:698] Add success.
I0321 16:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:03:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:03:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:03:53.409795  543705 memory.go:184] no items to output this cycle
I0321 16:03:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 16:04:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:04:03.409795  543705 memory.go:184] no items to output this cycle
I0321 16:04:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 16:04:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:04:13.409794  543705 memory.go:191] Add success.
I0321 16:04:13.409800  543705 cpu.go:282] Add success.
W0321 16:04:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:04:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:04:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:04:13.420309  543705 net.go:648] Add success.
I0321 16:04:13.422983  543705 net.go:770] primary dev: ETH0
I0321 16:04:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:04:13.423009  543705 net.go:698] Add success.
I0321 16:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:04:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:04:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 16:04:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:04:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 16:04:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:04:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:04:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:04:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:04:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:04:23.409777  543705 memory.go:184] no items to output this cycle
I0321 16:04:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 16:04:30.317676  543705 disk_info.go:125] begin check local disk info of client
I0321 16:04:30.320230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:04:30.320238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b01c0 0xc0003b0200]
E0321 16:04:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:04:33.409766  543705 memory.go:184] no items to output this cycle
I0321 16:04:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 16:04:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:04:43.409825  543705 memory.go:191] Add success.
I0321 16:04:43.409831  543705 cpu.go:282] Add success.
I0321 16:04:43.419727  543705 net.go:648] Add success.
I0321 16:04:43.422648  543705 net.go:770] primary dev: ETH0
I0321 16:04:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:04:43.422672  543705 net.go:698] Add success.
I0321 16:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:04:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:04:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:04:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:04:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 16:04:53.409782  543705 memory.go:184] no items to output this cycle
E0321 16:05:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:05:03.409795  543705 memory.go:184] no items to output this cycle
I0321 16:05:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 16:05:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:05:13.409796  543705 memory.go:191] Add success.
I0321 16:05:13.409810  543705 cpu.go:282] Add success.
W0321 16:05:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:05:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:05:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:05:13.420123  543705 net.go:648] Add success.
I0321 16:05:13.423031  543705 net.go:770] primary dev: ETH0
I0321 16:05:13.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:05:13.423056  543705 net.go:698] Add success.
I0321 16:05:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:05:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:05:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 16:05:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:05:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 16:05:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:05:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:05:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:05:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:05:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:05:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:05:23.409798  543705 memory.go:184] no items to output this cycle
I0321 16:05:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 16:05:30.321673  543705 disk_info.go:125] begin check local disk info of client
I0321 16:05:30.324223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:05:30.324229  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9280 0xc0004d92c0]
E0321 16:05:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:05:33.409773  543705 cpu.go:275] no items to output this cycle
I0321 16:05:33.409776  543705 memory.go:184] no items to output this cycle
E0321 16:05:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:05:43.409792  543705 memory.go:191] Add success.
I0321 16:05:43.409794  543705 cpu.go:282] Add success.
I0321 16:05:43.420048  543705 net.go:648] Add success.
I0321 16:05:43.422829  543705 net.go:770] primary dev: ETH0
I0321 16:05:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:05:43.422853  543705 net.go:698] Add success.
I0321 16:05:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:05:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:05:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:05:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:05:53.409796  543705 memory.go:184] no items to output this cycle
I0321 16:05:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 16:06:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:06:03.409773  543705 memory.go:184] no items to output this cycle
I0321 16:06:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 16:06:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:06:13.409794  543705 memory.go:191] Add success.
I0321 16:06:13.409798  543705 cpu.go:282] Add success.
W0321 16:06:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:06:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:06:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:06:13.420164  543705 net.go:648] Add success.
I0321 16:06:13.422727  543705 net.go:770] primary dev: ETH0
I0321 16:06:13.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:06:13.422752  543705 net.go:698] Add success.
I0321 16:06:13.468704  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3920f4a4-8c9e-40db-9215-e3a39eae8e87","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:06:13.468737  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:06:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:06:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:06:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 16:06:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:06:14.456624  543705 disk_worker.go:494] system disk:vda1
I0321 16:06:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:06:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:06:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:06:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:06:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:06:23.409798  543705 memory.go:184] no items to output this cycle
I0321 16:06:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 16:06:30.325677  543705 disk_info.go:125] begin check local disk info of client
I0321 16:06:30.328247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:06:30.328253  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab100 0xc0001ab140]
E0321 16:06:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:06:33.409792  543705 memory.go:184] no items to output this cycle
I0321 16:06:33.409838  543705 cpu.go:275] no items to output this cycle
I0321 16:06:39.020794  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:06:39.020801  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:06:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:06:43.409814  543705 cpu.go:282] Add success.
I0321 16:06:43.410786  543705 memory.go:191] Add success.
I0321 16:06:43.419733  543705 net.go:648] Add success.
I0321 16:06:43.422437  543705 net.go:770] primary dev: ETH0
I0321 16:06:43.422452  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:06:43.422466  543705 net.go:698] Add success.
I0321 16:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:06:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:06:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:06:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:06:53.409771  543705 memory.go:184] no items to output this cycle
I0321 16:06:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 16:07:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:07:03.409766  543705 memory.go:184] no items to output this cycle
I0321 16:07:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 16:07:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:07:13.409802  543705 memory.go:191] Add success.
I0321 16:07:13.409814  543705 cpu.go:282] Add success.
W0321 16:07:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:07:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:07:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:07:13.420263  543705 net.go:648] Add success.
I0321 16:07:13.423314  543705 net.go:770] primary dev: ETH0
I0321 16:07:13.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:07:13.423342  543705 net.go:698] Add success.
I0321 16:07:13.452858  543705 event_worker.go:152] Polling the log file for events...
W0321 16:07:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 16:07:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:07:14.456782  543705 disk_worker.go:494] system disk:vda1
I0321 16:07:14.456822  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:07:14.457137  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:07:14.457145  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:07:14.457149  543705 custom_config.go:64] query custom config with name: gpu
E0321 16:07:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:07:15.456868  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:07:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:07:16.458003  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:07:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:07:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:07:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:07:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:07:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 16:07:23.409798  543705 memory.go:184] no items to output this cycle
I0321 16:07:30.329687  543705 disk_info.go:125] begin check local disk info of client
I0321 16:07:30.332166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:07:30.332173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3300 0xc0002a3340]
E0321 16:07:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:07:33.409826  543705 memory.go:184] no items to output this cycle
I0321 16:07:33.409834  543705 cpu.go:275] no items to output this cycle
E0321 16:07:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:07:43.409928  543705 cpu.go:282] Add success.
I0321 16:07:43.409931  543705 memory.go:191] Add success.
I0321 16:07:43.419737  543705 net.go:648] Add success.
I0321 16:07:43.422680  543705 net.go:770] primary dev: ETH0
I0321 16:07:43.422704  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:07:43.422716  543705 net.go:698] Add success.
I0321 16:07:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:07:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:07:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:07:53.409806  543705 memory.go:184] no items to output this cycle
I0321 16:07:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 16:08:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:08:03.409790  543705 memory.go:184] no items to output this cycle
I0321 16:08:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 16:08:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:08:13.409811  543705 memory.go:191] Add success.
I0321 16:08:13.409816  543705 cpu.go:282] Add success.
W0321 16:08:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:08:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:08:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:08:13.420055  543705 net.go:648] Add success.
I0321 16:08:13.422928  543705 net.go:770] primary dev: ETH0
I0321 16:08:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:08:13.422957  543705 net.go:698] Add success.
I0321 16:08:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:08:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:08:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 16:08:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:08:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 16:08:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:08:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:08:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:08:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:08:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:08:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:08:23.409783  543705 memory.go:184] no items to output this cycle
I0321 16:08:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 16:08:30.333675  543705 disk_info.go:125] begin check local disk info of client
I0321 16:08:30.336124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:08:30.336130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039de00 0xc00039de40]
E0321 16:08:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:08:33.409810  543705 memory.go:184] no items to output this cycle
I0321 16:08:33.409826  543705 cpu.go:275] no items to output this cycle
E0321 16:08:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:08:43.409802  543705 memory.go:191] Add success.
I0321 16:08:43.409802  543705 cpu.go:282] Add success.
I0321 16:08:43.420158  543705 net.go:648] Add success.
I0321 16:08:43.423435  543705 net.go:770] primary dev: ETH0
I0321 16:08:43.423447  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:08:43.423459  543705 net.go:698] Add success.
I0321 16:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:08:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:08:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:08:53.409779  543705 memory.go:184] no items to output this cycle
I0321 16:08:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 16:09:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:09:03.409796  543705 memory.go:184] no items to output this cycle
I0321 16:09:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:09:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:09:13.409789  543705 memory.go:191] Add success.
I0321 16:09:13.409804  543705 cpu.go:282] Add success.
W0321 16:09:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:09:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:09:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:09:13.420055  543705 net.go:648] Add success.
I0321 16:09:13.422770  543705 net.go:770] primary dev: ETH0
I0321 16:09:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:09:13.422799  543705 net.go:698] Add success.
I0321 16:09:13.470433  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8bd3c112-9e57-4afa-8870-a1ecdb5370ed","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:09:13.470467  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:09:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:09:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:09:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 16:09:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:09:14.456608  543705 disk_worker.go:494] system disk:vda1
I0321 16:09:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:09:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:09:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:09:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:09:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:09:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:09:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:09:23.409776  543705 memory.go:184] no items to output this cycle
I0321 16:09:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 16:09:30.337673  543705 disk_info.go:125] begin check local disk info of client
I0321 16:09:30.340131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:09:30.340137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002794c0 0xc000279500]
E0321 16:09:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:09:33.409773  543705 memory.go:184] no items to output this cycle
I0321 16:09:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 16:09:39.021734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:09:39.021740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:09:43.409814  543705 cpu.go:282] Add success.
I0321 16:09:43.410819  543705 memory.go:191] Add success.
I0321 16:09:43.419722  543705 net.go:648] Add success.
I0321 16:09:43.422480  543705 net.go:770] primary dev: ETH0
I0321 16:09:43.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:09:43.422505  543705 net.go:698] Add success.
I0321 16:09:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:09:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:09:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:09:53.409762  543705 memory.go:184] no items to output this cycle
I0321 16:09:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 16:10:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:10:03.409772  543705 memory.go:184] no items to output this cycle
I0321 16:10:03.409778  543705 cpu.go:275] no items to output this cycle
E0321 16:10:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:10:13.409788  543705 memory.go:191] Add success.
I0321 16:10:13.409805  543705 cpu.go:282] Add success.
W0321 16:10:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:10:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:10:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:10:13.420133  543705 net.go:648] Add success.
I0321 16:10:13.422769  543705 net.go:770] primary dev: ETH0
I0321 16:10:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:10:13.422794  543705 net.go:698] Add success.
I0321 16:10:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:10:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:10:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 16:10:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:10:14.456477  543705 disk_worker.go:494] system disk:vda1
I0321 16:10:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:10:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:10:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:10:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:10:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:10:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:10:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:10:23.409782  543705 memory.go:184] no items to output this cycle
I0321 16:10:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 16:10:30.341680  543705 disk_info.go:125] begin check local disk info of client
I0321 16:10:30.344135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:10:30.344145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330000 0xc000330040]
E0321 16:10:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:10:33.409802  543705 memory.go:184] no items to output this cycle
I0321 16:10:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 16:10:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:10:43.409789  543705 memory.go:191] Add success.
I0321 16:10:43.409790  543705 cpu.go:282] Add success.
I0321 16:10:43.419710  543705 net.go:648] Add success.
I0321 16:10:43.422662  543705 net.go:770] primary dev: ETH0
I0321 16:10:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:10:43.422687  543705 net.go:698] Add success.
I0321 16:10:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:10:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:10:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:10:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:10:53.409771  543705 memory.go:184] no items to output this cycle
I0321 16:10:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 16:11:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:11:03.409783  543705 memory.go:184] no items to output this cycle
I0321 16:11:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 16:11:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:11:13.409796  543705 memory.go:191] Add success.
I0321 16:11:13.409812  543705 cpu.go:282] Add success.
W0321 16:11:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:11:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:11:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:11:13.420158  543705 net.go:648] Add success.
I0321 16:11:13.422966  543705 net.go:770] primary dev: ETH0
I0321 16:11:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:11:13.422996  543705 net.go:698] Add success.
I0321 16:11:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:11:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:11:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 16:11:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:11:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 16:11:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:11:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:11:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:11:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:11:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:11:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:11:23.409774  543705 memory.go:184] no items to output this cycle
I0321 16:11:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 16:11:30.345675  543705 disk_info.go:125] begin check local disk info of client
I0321 16:11:30.348192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:11:30.348200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f07c0 0xc0001f0800]
E0321 16:11:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:11:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 16:11:33.409782  543705 memory.go:184] no items to output this cycle
E0321 16:11:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:11:43.409889  543705 cpu.go:282] Add success.
I0321 16:11:43.409906  543705 memory.go:191] Add success.
I0321 16:11:43.419735  543705 net.go:648] Add success.
I0321 16:11:43.422598  543705 net.go:770] primary dev: ETH0
I0321 16:11:43.422610  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:11:43.422622  543705 net.go:698] Add success.
I0321 16:11:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:11:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:11:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:11:53.410710  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:11:53.410725  543705 memory.go:184] no items to output this cycle
I0321 16:11:53.410739  543705 cpu.go:275] no items to output this cycle
E0321 16:12:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:12:03.409774  543705 memory.go:184] no items to output this cycle
I0321 16:12:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 16:12:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:12:13.409822  543705 memory.go:191] Add success.
I0321 16:12:13.409824  543705 cpu.go:282] Add success.
W0321 16:12:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:12:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:12:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:12:13.420197  543705 net.go:648] Add success.
I0321 16:12:13.423083  543705 net.go:770] primary dev: ETH0
I0321 16:12:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:12:13.423112  543705 net.go:698] Add success.
I0321 16:12:13.498307  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a21440bf-3b10-482d-9a3e-c05e219cb8af","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:12:13.498342  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 16:12:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:12:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 16:12:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:12:14.456194  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:12:14.456203  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:12:14.456208  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:12:14.456472  543705 disk_worker.go:494] system disk:vda1
I0321 16:12:14.456500  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:12:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:12:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:12:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:12:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:12:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:12:16.457986  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:12:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:12:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:12:23.409806  543705 memory.go:184] no items to output this cycle
I0321 16:12:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 16:12:30.349675  543705 disk_info.go:125] begin check local disk info of client
I0321 16:12:30.352205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:12:30.352212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0321 16:12:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:12:33.409804  543705 memory.go:184] no items to output this cycle
I0321 16:12:33.409819  543705 cpu.go:275] no items to output this cycle
I0321 16:12:39.024815  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:12:39.024821  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:12:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:12:43.410701  543705 memory.go:191] Add success.
I0321 16:12:43.409822  543705 cpu.go:282] Add success.
I0321 16:12:43.420402  543705 net.go:648] Add success.
I0321 16:12:43.423038  543705 net.go:770] primary dev: ETH0
I0321 16:12:43.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:12:43.423062  543705 net.go:698] Add success.
I0321 16:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:12:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:12:53.409768  543705 memory.go:184] no items to output this cycle
I0321 16:12:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:13:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:13:03.409785  543705 memory.go:184] no items to output this cycle
I0321 16:13:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 16:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:13:13.409797  543705 memory.go:191] Add success.
I0321 16:13:13.409798  543705 cpu.go:282] Add success.
W0321 16:13:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:13:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:13:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:13:13.420165  543705 net.go:648] Add success.
I0321 16:13:13.422922  543705 net.go:770] primary dev: ETH0
I0321 16:13:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:13:13.422951  543705 net.go:698] Add success.
I0321 16:13:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:13:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:13:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 16:13:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:13:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 16:13:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:13:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:13:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:13:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:13:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:13:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:13:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:13:23.409792  543705 memory.go:184] no items to output this cycle
I0321 16:13:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 16:13:30.353674  543705 disk_info.go:125] begin check local disk info of client
I0321 16:13:30.356218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:13:30.356224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa040 0xc0001aa080]
E0321 16:13:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:13:33.409769  543705 memory.go:184] no items to output this cycle
I0321 16:13:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 16:13:43.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:13:43.409895  543705 memory.go:191] Add success.
I0321 16:13:43.409977  543705 cpu.go:282] Add success.
I0321 16:13:43.419710  543705 net.go:648] Add success.
I0321 16:13:43.422286  543705 net.go:770] primary dev: ETH0
I0321 16:13:43.422299  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:13:43.422311  543705 net.go:698] Add success.
I0321 16:13:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:13:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:13:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:13:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:13:53.409769  543705 memory.go:184] no items to output this cycle
I0321 16:13:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 16:14:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:14:03.409778  543705 memory.go:184] no items to output this cycle
I0321 16:14:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 16:14:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:14:13.409786  543705 memory.go:191] Add success.
W0321 16:14:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 16:14:13.409814  543705 cpu.go:282] Add success.
W0321 16:14:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:14:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:14:13.420144  543705 net.go:648] Add success.
I0321 16:14:13.422798  543705 net.go:770] primary dev: ETH0
I0321 16:14:13.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:14:13.422825  543705 net.go:698] Add success.
I0321 16:14:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:14:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:14:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 16:14:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:14:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 16:14:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:14:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:14:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:14:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:14:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:14:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:14:23.409765  543705 memory.go:184] no items to output this cycle
I0321 16:14:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 16:14:30.357673  543705 disk_info.go:125] begin check local disk info of client
I0321 16:14:30.360155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:14:30.360162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028aa80 0xc00028aac0]
E0321 16:14:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:14:33.409791  543705 memory.go:184] no items to output this cycle
I0321 16:14:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 16:14:43.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:14:43.409887  543705 memory.go:191] Add success.
I0321 16:14:43.409963  543705 cpu.go:282] Add success.
I0321 16:14:43.419714  543705 net.go:648] Add success.
I0321 16:14:43.423085  543705 net.go:770] primary dev: ETH0
I0321 16:14:43.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:14:43.423110  543705 net.go:698] Add success.
I0321 16:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:14:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:14:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:14:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:14:53.409799  543705 memory.go:184] no items to output this cycle
I0321 16:14:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:15:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:15:03.409768  543705 memory.go:184] no items to output this cycle
I0321 16:15:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 16:15:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:15:13.409782  543705 memory.go:191] Add success.
W0321 16:15:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 16:15:13.409813  543705 cpu.go:282] Add success.
W0321 16:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:15:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:15:13.420208  543705 net.go:648] Add success.
I0321 16:15:13.423023  543705 net.go:770] primary dev: ETH0
I0321 16:15:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:15:13.423051  543705 net.go:698] Add success.
I0321 16:15:13.464216  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f52dd062-2129-44c2-bd47-039998c14a8b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:15:13.464251  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:15:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:15:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:15:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 16:15:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:15:14.456682  543705 disk_worker.go:494] system disk:vda1
I0321 16:15:14.456720  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:15:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:15:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:15:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:15:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:15:23.409763  543705 memory.go:184] no items to output this cycle
I0321 16:15:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 16:15:30.361677  543705 disk_info.go:125] begin check local disk info of client
I0321 16:15:30.364198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:15:30.364205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376b40 0xc000376b80]
E0321 16:15:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:15:33.409802  543705 memory.go:184] no items to output this cycle
I0321 16:15:33.409816  543705 cpu.go:275] no items to output this cycle
I0321 16:15:39.025727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:15:39.025734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:15:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:15:43.410663  543705 memory.go:191] Add success.
I0321 16:15:43.409801  543705 cpu.go:282] Add success.
I0321 16:15:43.420742  543705 net.go:648] Add success.
I0321 16:15:43.423395  543705 net.go:770] primary dev: ETH0
I0321 16:15:43.423408  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:15:43.423420  543705 net.go:698] Add success.
I0321 16:15:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:15:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:15:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:15:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:15:53.409780  543705 memory.go:184] no items to output this cycle
I0321 16:15:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 16:16:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:16:03.409800  543705 memory.go:184] no items to output this cycle
I0321 16:16:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 16:16:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:16:13.409823  543705 memory.go:191] Add success.
I0321 16:16:13.409827  543705 cpu.go:282] Add success.
W0321 16:16:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:16:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:16:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:16:13.420274  543705 net.go:648] Add success.
I0321 16:16:13.423486  543705 net.go:770] primary dev: ETH0
I0321 16:16:13.423499  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:16:13.423511  543705 net.go:698] Add success.
I0321 16:16:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:16:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:16:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 16:16:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:16:14.456529  543705 disk_worker.go:494] system disk:vda1
I0321 16:16:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:16:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:16:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:16:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:16:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:16:23.409775  543705 memory.go:184] no items to output this cycle
I0321 16:16:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 16:16:30.365675  543705 disk_info.go:125] begin check local disk info of client
I0321 16:16:30.368255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:16:30.368262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b3a80 0xc0004b3ac0]
E0321 16:16:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:16:33.409769  543705 memory.go:184] no items to output this cycle
I0321 16:16:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 16:16:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:16:43.409812  543705 memory.go:191] Add success.
I0321 16:16:43.409818  543705 cpu.go:282] Add success.
I0321 16:16:43.419740  543705 net.go:648] Add success.
I0321 16:16:43.422303  543705 net.go:770] primary dev: ETH0
I0321 16:16:43.422316  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:16:43.422327  543705 net.go:698] Add success.
I0321 16:16:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:16:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:16:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:16:53.409767  543705 memory.go:184] no items to output this cycle
I0321 16:16:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 16:17:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:17:03.409765  543705 memory.go:184] no items to output this cycle
I0321 16:17:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 16:17:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:17:13.409792  543705 memory.go:191] Add success.
I0321 16:17:13.409806  543705 cpu.go:282] Add success.
W0321 16:17:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:17:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:17:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:17:13.420139  543705 net.go:648] Add success.
I0321 16:17:13.422762  543705 net.go:770] primary dev: ETH0
I0321 16:17:13.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:17:13.422786  543705 net.go:698] Add success.
I0321 16:17:13.453337  543705 event_worker.go:152] Polling the log file for events...
W0321 16:17:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:17:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0321 16:17:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:17:14.456914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:17:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:17:14.456930  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:17:14.457005  543705 disk_worker.go:494] system disk:vda1
I0321 16:17:14.457047  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:17:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:17:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:17:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:17:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:17:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:17:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:17:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:17:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:17:23.409804  543705 memory.go:184] no items to output this cycle
I0321 16:17:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 16:17:30.369672  543705 disk_info.go:125] begin check local disk info of client
I0321 16:17:30.372195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:17:30.372201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048cd00 0xc00048cd40]
E0321 16:17:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:17:33.409763  543705 memory.go:184] no items to output this cycle
I0321 16:17:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 16:17:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:17:43.409816  543705 memory.go:191] Add success.
I0321 16:17:43.409817  543705 cpu.go:282] Add success.
I0321 16:17:43.420148  543705 net.go:648] Add success.
I0321 16:17:43.423216  543705 net.go:770] primary dev: ETH0
I0321 16:17:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:17:43.423241  543705 net.go:698] Add success.
I0321 16:17:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:17:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:17:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 16:17:53.409786  543705 memory.go:184] no items to output this cycle
E0321 16:18:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:18:03.409779  543705 memory.go:184] no items to output this cycle
I0321 16:18:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 16:18:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:18:13.409801  543705 memory.go:191] Add success.
I0321 16:18:13.409803  543705 cpu.go:282] Add success.
W0321 16:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:18:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:18:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:18:13.420293  543705 net.go:648] Add success.
I0321 16:18:13.423051  543705 net.go:770] primary dev: ETH0
I0321 16:18:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:18:13.423079  543705 net.go:698] Add success.
I0321 16:18:13.469010  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"48984e99-73b4-4bc0-8462-30c081d24981","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:18:13.469043  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:18:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:18:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:18:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 16:18:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:18:14.456622  543705 disk_worker.go:494] system disk:vda1
I0321 16:18:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:18:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:18:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:18:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:18:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:18:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:18:23.409767  543705 memory.go:184] no items to output this cycle
I0321 16:18:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 16:18:30.373675  543705 disk_info.go:125] begin check local disk info of client
I0321 16:18:30.376228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:18:30.376235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0000 0xc0003b0040]
E0321 16:18:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:18:33.409771  543705 memory.go:184] no items to output this cycle
I0321 16:18:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 16:18:39.028830  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:18:39.028837  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:18:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:18:43.409796  543705 cpu.go:282] Add success.
I0321 16:18:43.410744  543705 memory.go:191] Add success.
I0321 16:18:43.419684  543705 net.go:648] Add success.
I0321 16:18:43.422623  543705 net.go:770] primary dev: ETH0
I0321 16:18:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:18:43.422648  543705 net.go:698] Add success.
I0321 16:18:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:18:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:18:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:18:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:18:53.409803  543705 memory.go:184] no items to output this cycle
I0321 16:18:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:19:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:19:03.409780  543705 cpu.go:275] no items to output this cycle
I0321 16:19:03.409786  543705 memory.go:184] no items to output this cycle
E0321 16:19:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:19:13.409798  543705 memory.go:191] Add success.
I0321 16:19:13.409806  543705 cpu.go:282] Add success.
W0321 16:19:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:19:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:19:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:19:13.420114  543705 net.go:648] Add success.
I0321 16:19:13.422778  543705 net.go:770] primary dev: ETH0
I0321 16:19:13.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:19:13.422804  543705 net.go:698] Add success.
I0321 16:19:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:19:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:19:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 16:19:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:19:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 16:19:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:19:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:19:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:19:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:19:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:19:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:19:23.409816  543705 memory.go:184] no items to output this cycle
I0321 16:19:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 16:19:30.377671  543705 disk_info.go:125] begin check local disk info of client
I0321 16:19:30.380419  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:19:30.380425  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481940 0xc000481980]
E0321 16:19:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:19:33.409794  543705 memory.go:184] no items to output this cycle
I0321 16:19:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 16:19:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:19:43.409774  543705 memory.go:191] Add success.
I0321 16:19:43.409805  543705 cpu.go:282] Add success.
I0321 16:19:43.420119  543705 net.go:648] Add success.
I0321 16:19:43.423362  543705 net.go:770] primary dev: ETH0
I0321 16:19:43.423377  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:19:43.423391  543705 net.go:698] Add success.
I0321 16:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:19:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:19:53.409795  543705 memory.go:184] no items to output this cycle
I0321 16:19:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:20:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:20:03.409793  543705 memory.go:184] no items to output this cycle
I0321 16:20:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 16:20:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:20:13.409790  543705 memory.go:191] Add success.
I0321 16:20:13.409802  543705 cpu.go:282] Add success.
W0321 16:20:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:20:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:20:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:20:13.420068  543705 net.go:648] Add success.
I0321 16:20:13.422998  543705 net.go:770] primary dev: ETH0
I0321 16:20:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:20:13.423025  543705 net.go:698] Add success.
I0321 16:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:20:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:20:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 16:20:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:20:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 16:20:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:20:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:20:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:20:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:20:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:20:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:20:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:20:23.409769  543705 memory.go:184] no items to output this cycle
I0321 16:20:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 16:20:30.381677  543705 disk_info.go:125] begin check local disk info of client
I0321 16:20:30.384179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:20:30.384186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384280 0xc0003842c0]
E0321 16:20:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:20:33.409790  543705 memory.go:184] no items to output this cycle
I0321 16:20:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 16:20:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:20:43.409788  543705 memory.go:191] Add success.
I0321 16:20:43.409790  543705 cpu.go:282] Add success.
I0321 16:20:43.419824  543705 net.go:648] Add success.
I0321 16:20:43.422683  543705 net.go:770] primary dev: ETH0
I0321 16:20:43.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:20:43.422707  543705 net.go:698] Add success.
I0321 16:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:20:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:20:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:20:53.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:20:53.409893  543705 memory.go:184] no items to output this cycle
I0321 16:20:53.409990  543705 cpu.go:275] no items to output this cycle
E0321 16:21:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:21:03.409794  543705 memory.go:184] no items to output this cycle
I0321 16:21:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 16:21:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:21:13.409794  543705 memory.go:191] Add success.
I0321 16:21:13.409811  543705 cpu.go:282] Add success.
W0321 16:21:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:21:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:21:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:21:13.420228  543705 net.go:648] Add success.
I0321 16:21:13.422693  543705 net.go:770] primary dev: ETH0
I0321 16:21:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:21:13.422722  543705 net.go:698] Add success.
I0321 16:21:13.468614  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"50a79e94-5ea5-4e3a-847e-fd605b4f80c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:21:13.468647  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:21:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:21:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:21:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 16:21:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:21:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 16:21:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:21:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:21:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:21:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:21:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:21:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:21:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:21:23.409812  543705 memory.go:184] no items to output this cycle
I0321 16:21:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 16:21:30.385675  543705 disk_info.go:125] begin check local disk info of client
I0321 16:21:30.388243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:21:30.388250  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480600 0xc000480640]
E0321 16:21:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:21:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 16:21:33.409788  543705 memory.go:184] no items to output this cycle
I0321 16:21:39.029733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:21:39.029740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:21:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:21:43.410610  543705 memory.go:191] Add success.
I0321 16:21:43.409789  543705 cpu.go:282] Add success.
I0321 16:21:43.420334  543705 net.go:648] Add success.
I0321 16:21:43.422955  543705 net.go:770] primary dev: ETH0
I0321 16:21:43.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:21:43.423074  543705 net.go:698] Add success.
I0321 16:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:21:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:21:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:21:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:21:53.409772  543705 memory.go:184] no items to output this cycle
I0321 16:21:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 16:22:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:22:03.409806  543705 memory.go:184] no items to output this cycle
I0321 16:22:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 16:22:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:22:13.409825  543705 memory.go:191] Add success.
I0321 16:22:13.409831  543705 cpu.go:282] Add success.
W0321 16:22:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:22:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:22:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:22:13.420140  543705 net.go:648] Add success.
I0321 16:22:13.422893  543705 net.go:770] primary dev: ETH0
I0321 16:22:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:22:13.422918  543705 net.go:698] Add success.
W0321 16:22:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:22:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 16:22:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:22:14.455909  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:22:14.455918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:22:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:22:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 16:22:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:22:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:22:15.456794  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:22:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:22:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:22:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:22:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:22:16.472341  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:22:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:22:23.409789  543705 memory.go:184] no items to output this cycle
I0321 16:22:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 16:22:30.392067  543705 disk_info.go:125] begin check local disk info of client
I0321 16:22:30.394619  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:22:30.394625  543705 disk_info.go:196] parse disk info done, disk is : [0xc000346800 0xc000346840]
E0321 16:22:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:22:33.409805  543705 memory.go:184] no items to output this cycle
I0321 16:22:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 16:22:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:22:43.409791  543705 memory.go:191] Add success.
I0321 16:22:43.409796  543705 cpu.go:282] Add success.
I0321 16:22:43.419981  543705 net.go:648] Add success.
I0321 16:22:43.422659  543705 net.go:770] primary dev: ETH0
I0321 16:22:43.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:22:43.422686  543705 net.go:698] Add success.
I0321 16:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:22:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:22:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:22:53.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:22:53.409868  543705 memory.go:184] no items to output this cycle
I0321 16:22:53.409977  543705 cpu.go:275] no items to output this cycle
E0321 16:23:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:23:03.409774  543705 memory.go:184] no items to output this cycle
I0321 16:23:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 16:23:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:23:13.409812  543705 memory.go:191] Add success.
I0321 16:23:13.409821  543705 cpu.go:282] Add success.
W0321 16:23:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:23:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:23:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:23:13.420149  543705 net.go:648] Add success.
I0321 16:23:13.422916  543705 net.go:770] primary dev: ETH0
I0321 16:23:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:23:13.422942  543705 net.go:698] Add success.
I0321 16:23:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:23:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:23:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 16:23:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:23:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 16:23:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:23:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:23:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:23:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:23:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:23:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:23:23.409814  543705 memory.go:184] no items to output this cycle
I0321 16:23:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 16:23:30.397674  543705 disk_info.go:125] begin check local disk info of client
I0321 16:23:30.400180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:23:30.400186  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c980 0xc00039c9c0]
E0321 16:23:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:23:33.409801  543705 memory.go:184] no items to output this cycle
I0321 16:23:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 16:23:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:23:43.409771  543705 memory.go:191] Add success.
I0321 16:23:43.409820  543705 cpu.go:282] Add success.
I0321 16:23:43.419999  543705 net.go:648] Add success.
I0321 16:23:43.422880  543705 net.go:770] primary dev: ETH0
I0321 16:23:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:23:43.422907  543705 net.go:698] Add success.
I0321 16:23:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:23:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:23:53.409762  543705 memory.go:184] no items to output this cycle
I0321 16:23:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 16:24:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:24:03.409807  543705 memory.go:184] no items to output this cycle
I0321 16:24:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 16:24:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:24:13.409802  543705 memory.go:191] Add success.
I0321 16:24:13.409815  543705 cpu.go:282] Add success.
W0321 16:24:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:24:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:24:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:24:13.420266  543705 net.go:648] Add success.
I0321 16:24:13.423014  543705 net.go:770] primary dev: ETH0
I0321 16:24:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:24:13.423043  543705 net.go:698] Add success.
I0321 16:24:13.651897  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"74b28425-2fef-4d40-92eb-411abbed0b22","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:24:13.651932  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:24:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:24:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:24:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 16:24:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:24:14.456711  543705 disk_worker.go:494] system disk:vda1
I0321 16:24:14.456739  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:24:15.455619  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:24:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:24:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:24:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:24:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:24:23.409801  543705 memory.go:184] no items to output this cycle
I0321 16:24:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 16:24:30.401673  543705 disk_info.go:125] begin check local disk info of client
I0321 16:24:30.404221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:24:30.404228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003852c0 0xc000385300]
E0321 16:24:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:24:33.409791  543705 memory.go:184] no items to output this cycle
I0321 16:24:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 16:24:39.032856  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:24:39.032862  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:24:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:24:43.410675  543705 memory.go:191] Add success.
I0321 16:24:43.409801  543705 cpu.go:282] Add success.
I0321 16:24:43.420368  543705 net.go:648] Add success.
I0321 16:24:43.423051  543705 net.go:770] primary dev: ETH0
I0321 16:24:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:24:43.423075  543705 net.go:698] Add success.
I0321 16:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:24:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:24:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:24:53.409765  543705 memory.go:184] no items to output this cycle
I0321 16:24:53.409892  543705 cpu.go:275] no items to output this cycle
E0321 16:25:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:25:03.409773  543705 memory.go:184] no items to output this cycle
I0321 16:25:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 16:25:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:25:13.409787  543705 memory.go:191] Add success.
I0321 16:25:13.409806  543705 cpu.go:282] Add success.
W0321 16:25:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:25:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:25:13.420137  543705 net.go:648] Add success.
I0321 16:25:13.422796  543705 net.go:770] primary dev: ETH0
I0321 16:25:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:25:13.422819  543705 net.go:698] Add success.
I0321 16:25:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:25:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:25:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0321 16:25:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:25:14.456617  543705 disk_worker.go:494] system disk:vda1
I0321 16:25:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:25:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:25:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:25:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:25:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:25:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:25:23.409804  543705 memory.go:184] no items to output this cycle
I0321 16:25:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 16:25:30.405674  543705 disk_info.go:125] begin check local disk info of client
I0321 16:25:30.408190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:25:30.408197  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384ac0 0xc000384b00]
E0321 16:25:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:25:33.409793  543705 memory.go:184] no items to output this cycle
I0321 16:25:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 16:25:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:25:43.409779  543705 memory.go:191] Add success.
I0321 16:25:43.409807  543705 cpu.go:282] Add success.
I0321 16:25:43.419936  543705 net.go:648] Add success.
I0321 16:25:43.423006  543705 net.go:770] primary dev: ETH0
I0321 16:25:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:25:43.423033  543705 net.go:698] Add success.
I0321 16:25:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:25:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:25:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:25:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:25:53.409780  543705 cpu.go:275] no items to output this cycle
I0321 16:25:53.409790  543705 memory.go:184] no items to output this cycle
E0321 16:26:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:26:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 16:26:03.409788  543705 memory.go:184] no items to output this cycle
E0321 16:26:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:26:13.409789  543705 memory.go:191] Add success.
W0321 16:26:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:26:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:26:13.409825  543705 cpu.go:282] Add success.
I0321 16:26:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:26:13.420175  543705 net.go:648] Add success.
I0321 16:26:13.423127  543705 net.go:770] primary dev: ETH0
I0321 16:26:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:26:13.423152  543705 net.go:698] Add success.
I0321 16:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:26:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:26:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 16:26:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:26:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 16:26:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:26:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:26:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:26:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:26:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:26:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:26:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:26:23.409769  543705 memory.go:184] no items to output this cycle
I0321 16:26:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 16:26:30.409676  543705 disk_info.go:125] begin check local disk info of client
I0321 16:26:30.412451  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:26:30.412458  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513280 0xc0005132c0]
E0321 16:26:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:26:33.409772  543705 memory.go:184] no items to output this cycle
I0321 16:26:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 16:26:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:26:43.409810  543705 memory.go:191] Add success.
I0321 16:26:43.409815  543705 cpu.go:282] Add success.
I0321 16:26:43.419970  543705 net.go:648] Add success.
I0321 16:26:43.423259  543705 net.go:770] primary dev: ETH0
I0321 16:26:43.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:26:43.423287  543705 net.go:698] Add success.
I0321 16:26:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:26:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:26:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:26:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:26:53.409774  543705 memory.go:184] no items to output this cycle
I0321 16:26:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 16:27:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:27:03.409805  543705 memory.go:184] no items to output this cycle
I0321 16:27:03.409817  543705 cpu.go:275] no items to output this cycle
W0321 16:27:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:27:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:27:13.409743  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 16:27:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:27:13.409849  543705 cpu.go:282] Add success.
I0321 16:27:13.409857  543705 memory.go:191] Add success.
I0321 16:27:13.420121  543705 net.go:648] Add success.
I0321 16:27:13.429251  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 16:27:13.429335  543705 net.go:770] primary dev: ETH0
I0321 16:27:13.429349  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:27:13.429362  543705 net.go:698] Add success.
I0321 16:27:13.452879  543705 event_worker.go:152] Polling the log file for events...
I0321 16:27:13.469155  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b2640f89-7416-462b-aa3b-067e62eb611e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:27:13.469187  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 16:27:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:27:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 16:27:14.455213  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:27:14.457060  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:27:14.457071  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:27:14.457077  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:27:14.457104  543705 disk_worker.go:494] system disk:vda1
I0321 16:27:14.457153  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:27:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:27:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:27:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:27:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:27:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:27:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:27:16.472303  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:27:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:27:23.409779  543705 memory.go:184] no items to output this cycle
I0321 16:27:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 16:27:30.412795  543705 disk_info.go:125] begin check local disk info of client
I0321 16:27:30.415260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:27:30.415266  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348740 0xc000348780]
E0321 16:27:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:27:33.409794  543705 memory.go:184] no items to output this cycle
I0321 16:27:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 16:27:39.033745  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:27:39.033753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:27:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:27:43.410842  543705 memory.go:191] Add success.
I0321 16:27:43.409817  543705 cpu.go:282] Add success.
I0321 16:27:43.420529  543705 net.go:648] Add success.
I0321 16:27:43.423056  543705 net.go:770] primary dev: ETH0
I0321 16:27:43.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:27:43.423088  543705 net.go:698] Add success.
I0321 16:27:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:27:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:27:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:27:53.409780  543705 memory.go:184] no items to output this cycle
I0321 16:27:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 16:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:28:03.409800  543705 memory.go:184] no items to output this cycle
I0321 16:28:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 16:28:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:28:13.409788  543705 memory.go:191] Add success.
I0321 16:28:13.409806  543705 cpu.go:282] Add success.
W0321 16:28:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:28:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:28:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:28:13.420159  543705 net.go:648] Add success.
I0321 16:28:13.423465  543705 net.go:770] primary dev: ETH0
I0321 16:28:13.423478  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:28:13.423490  543705 net.go:698] Add success.
I0321 16:28:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:28:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:28:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 16:28:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:28:14.456522  543705 disk_worker.go:494] system disk:vda1
I0321 16:28:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:28:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:28:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:28:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:28:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:28:23.409798  543705 memory.go:184] no items to output this cycle
I0321 16:28:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 16:28:30.415792  543705 disk_info.go:125] begin check local disk info of client
I0321 16:28:30.418302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:28:30.418308  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480280 0xc0004802c0]
E0321 16:28:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:28:33.409769  543705 memory.go:184] no items to output this cycle
I0321 16:28:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 16:28:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:28:43.409810  543705 memory.go:191] Add success.
I0321 16:28:43.409816  543705 cpu.go:282] Add success.
I0321 16:28:43.419880  543705 net.go:648] Add success.
I0321 16:28:43.422578  543705 net.go:770] primary dev: ETH0
I0321 16:28:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:28:43.422603  543705 net.go:698] Add success.
I0321 16:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:28:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:28:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:28:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:28:53.409798  543705 memory.go:184] no items to output this cycle
I0321 16:28:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 16:29:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:29:03.409780  543705 memory.go:184] no items to output this cycle
I0321 16:29:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 16:29:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:29:13.409778  543705 memory.go:191] Add success.
W0321 16:29:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 16:29:13.409807  543705 cpu.go:282] Add success.
W0321 16:29:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:29:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:29:13.420270  543705 net.go:648] Add success.
I0321 16:29:13.422823  543705 net.go:770] primary dev: ETH0
I0321 16:29:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:29:13.422849  543705 net.go:698] Add success.
I0321 16:29:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:29:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:29:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 16:29:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:29:14.456511  543705 disk_worker.go:494] system disk:vda1
I0321 16:29:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:29:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:29:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:29:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:29:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:29:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:29:23.409765  543705 memory.go:184] no items to output this cycle
I0321 16:29:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 16:29:30.418785  543705 disk_info.go:125] begin check local disk info of client
I0321 16:29:30.421304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:29:30.421311  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328180 0xc0003281c0]
E0321 16:29:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:29:33.409766  543705 memory.go:184] no items to output this cycle
I0321 16:29:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 16:29:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:29:43.409814  543705 memory.go:191] Add success.
I0321 16:29:43.409820  543705 cpu.go:282] Add success.
I0321 16:29:43.419876  543705 net.go:648] Add success.
I0321 16:29:43.422524  543705 net.go:770] primary dev: ETH0
I0321 16:29:43.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:29:43.422553  543705 net.go:698] Add success.
I0321 16:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:29:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:29:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:29:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:29:53.409765  543705 memory.go:184] no items to output this cycle
I0321 16:29:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 16:30:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:30:03.409777  543705 memory.go:184] no items to output this cycle
I0321 16:30:03.409778  543705 cpu.go:275] no items to output this cycle
E0321 16:30:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:30:13.409816  543705 memory.go:191] Add success.
I0321 16:30:13.409826  543705 cpu.go:282] Add success.
W0321 16:30:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:30:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:30:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:30:13.420532  543705 net.go:648] Add success.
I0321 16:30:13.423315  543705 net.go:770] primary dev: ETH0
I0321 16:30:13.423330  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:30:13.423344  543705 net.go:698] Add success.
I0321 16:30:13.468307  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f3181ad1-46c4-4932-b646-1d1e47ed2218","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:30:13.468342  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:30:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:30:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:30:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 16:30:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:30:14.456627  543705 disk_worker.go:494] system disk:vda1
I0321 16:30:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:30:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:30:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:30:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:30:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:30:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:30:23.409766  543705 memory.go:184] no items to output this cycle
I0321 16:30:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 16:30:30.421796  543705 disk_info.go:125] begin check local disk info of client
I0321 16:30:30.424289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:30:30.424295  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371a00 0xc000371a40]
E0321 16:30:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:30:33.409789  543705 memory.go:184] no items to output this cycle
I0321 16:30:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 16:30:39.036879  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:30:39.036886  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:30:43.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:30:43.410630  543705 memory.go:191] Add success.
I0321 16:30:43.409955  543705 cpu.go:282] Add success.
I0321 16:30:43.419707  543705 net.go:648] Add success.
I0321 16:30:43.422510  543705 net.go:770] primary dev: ETH0
I0321 16:30:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:30:43.422535  543705 net.go:698] Add success.
I0321 16:30:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:30:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:30:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:30:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:30:53.409770  543705 memory.go:184] no items to output this cycle
I0321 16:30:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 16:31:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:31:03.409779  543705 memory.go:184] no items to output this cycle
I0321 16:31:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 16:31:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:31:13.409795  543705 memory.go:191] Add success.
I0321 16:31:13.409797  543705 cpu.go:282] Add success.
W0321 16:31:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:31:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:31:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:31:13.420076  543705 net.go:648] Add success.
I0321 16:31:13.423062  543705 net.go:770] primary dev: ETH0
I0321 16:31:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:31:13.423092  543705 net.go:698] Add success.
I0321 16:31:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:31:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:31:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 16:31:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:31:14.456516  543705 disk_worker.go:494] system disk:vda1
I0321 16:31:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:31:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:31:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:31:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:31:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:31:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:31:23.409768  543705 memory.go:184] no items to output this cycle
I0321 16:31:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 16:31:30.424813  543705 disk_info.go:125] begin check local disk info of client
I0321 16:31:30.427395  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:31:30.427401  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b600 0xc00047b640]
E0321 16:31:33.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:31:33.409872  543705 memory.go:184] no items to output this cycle
I0321 16:31:33.409947  543705 cpu.go:275] no items to output this cycle
E0321 16:31:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:31:43.409805  543705 memory.go:191] Add success.
I0321 16:31:43.409818  543705 cpu.go:282] Add success.
I0321 16:31:43.419856  543705 net.go:648] Add success.
I0321 16:31:43.422759  543705 net.go:770] primary dev: ETH0
I0321 16:31:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:31:43.422785  543705 net.go:698] Add success.
I0321 16:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:31:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:31:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:31:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:31:53.409769  543705 memory.go:184] no items to output this cycle
I0321 16:31:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 16:32:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:32:03.409800  543705 memory.go:184] no items to output this cycle
I0321 16:32:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 16:32:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:32:13.409783  543705 memory.go:191] Add success.
I0321 16:32:13.409802  543705 cpu.go:282] Add success.
W0321 16:32:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:32:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:32:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:32:13.420218  543705 net.go:648] Add success.
I0321 16:32:13.423187  543705 net.go:770] primary dev: ETH0
I0321 16:32:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:32:13.423213  543705 net.go:698] Add success.
W0321 16:32:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:32:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 16:32:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:32:14.456877  543705 disk_worker.go:494] system disk:vda1
I0321 16:32:14.456929  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:32:14.457120  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:32:14.457129  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:32:14.457135  543705 custom_config.go:64] query custom config with name: gpu
E0321 16:32:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:32:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:32:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:32:16.457979  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:32:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:32:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:32:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:32:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:32:23.409772  543705 memory.go:184] no items to output this cycle
I0321 16:32:23.409777  543705 cpu.go:275] no items to output this cycle
I0321 16:32:30.427830  543705 disk_info.go:125] begin check local disk info of client
I0321 16:32:30.430322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:32:30.430329  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3840 0xc0002a3880]
E0321 16:32:33.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:32:33.409880  543705 memory.go:184] no items to output this cycle
I0321 16:32:33.410041  543705 cpu.go:275] no items to output this cycle
E0321 16:32:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:32:43.409789  543705 memory.go:191] Add success.
I0321 16:32:43.409805  543705 cpu.go:282] Add success.
I0321 16:32:43.419860  543705 net.go:648] Add success.
I0321 16:32:43.422407  543705 net.go:770] primary dev: ETH0
I0321 16:32:43.422421  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:32:43.422433  543705 net.go:698] Add success.
I0321 16:32:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:32:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:32:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:32:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:32:53.409772  543705 memory.go:184] no items to output this cycle
I0321 16:32:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:33:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:33:03.409796  543705 memory.go:184] no items to output this cycle
I0321 16:33:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 16:33:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:33:13.409790  543705 memory.go:191] Add success.
I0321 16:33:13.409809  543705 cpu.go:282] Add success.
W0321 16:33:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:33:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:33:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:33:13.420146  543705 net.go:648] Add success.
I0321 16:33:13.422957  543705 net.go:770] primary dev: ETH0
I0321 16:33:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:33:13.422982  543705 net.go:698] Add success.
I0321 16:33:13.468973  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d354cc8d-9429-4324-b9f8-4bdff3bda34e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:33:13.469006  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:33:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:33:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:33:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 16:33:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:33:14.456688  543705 disk_worker.go:494] system disk:vda1
I0321 16:33:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:33:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:33:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:33:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:33:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:33:23.409901  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:33:23.409918  543705 memory.go:184] no items to output this cycle
I0321 16:33:23.410020  543705 cpu.go:275] no items to output this cycle
I0321 16:33:30.430849  543705 disk_info.go:125] begin check local disk info of client
I0321 16:33:30.433412  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:33:30.433417  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4280 0xc0000c42c0]
E0321 16:33:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:33:33.409774  543705 memory.go:184] no items to output this cycle
I0321 16:33:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 16:33:39.037731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:33:39.037736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:33:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:33:43.410642  543705 memory.go:191] Add success.
I0321 16:33:43.409798  543705 cpu.go:282] Add success.
I0321 16:33:43.420504  543705 net.go:648] Add success.
I0321 16:33:43.423275  543705 net.go:770] primary dev: ETH0
I0321 16:33:43.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:33:43.423304  543705 net.go:698] Add success.
I0321 16:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:33:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:33:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:33:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:33:53.409787  543705 memory.go:184] no items to output this cycle
I0321 16:33:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 16:34:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:34:03.409770  543705 memory.go:184] no items to output this cycle
I0321 16:34:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 16:34:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:34:13.409792  543705 memory.go:191] Add success.
I0321 16:34:13.409811  543705 cpu.go:282] Add success.
W0321 16:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:34:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:34:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:34:13.420043  543705 net.go:648] Add success.
I0321 16:34:13.423004  543705 net.go:770] primary dev: ETH0
I0321 16:34:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:34:13.423030  543705 net.go:698] Add success.
I0321 16:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:34:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:34:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 16:34:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:34:14.456479  543705 disk_worker.go:494] system disk:vda1
I0321 16:34:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:34:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:34:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:34:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:34:23.409874  543705 cpu.go:275] no items to output this cycle
I0321 16:34:23.409904  543705 memory.go:184] no items to output this cycle
I0321 16:34:30.433862  543705 disk_info.go:125] begin check local disk info of client
I0321 16:34:30.436386  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:34:30.436394  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046c2c0 0xc00046c300]
E0321 16:34:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:34:33.409796  543705 memory.go:184] no items to output this cycle
I0321 16:34:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 16:34:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:34:43.409776  543705 memory.go:191] Add success.
I0321 16:34:43.409796  543705 cpu.go:282] Add success.
I0321 16:34:43.419855  543705 net.go:648] Add success.
I0321 16:34:43.422937  543705 net.go:770] primary dev: ETH0
I0321 16:34:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:34:43.422965  543705 net.go:698] Add success.
I0321 16:34:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:34:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:34:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:34:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:34:53.409783  543705 memory.go:184] no items to output this cycle
I0321 16:34:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:35:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:35:03.409769  543705 memory.go:184] no items to output this cycle
I0321 16:35:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 16:35:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:35:13.409787  543705 memory.go:191] Add success.
I0321 16:35:13.409811  543705 cpu.go:282] Add success.
W0321 16:35:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:35:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:35:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:35:13.420277  543705 net.go:648] Add success.
I0321 16:35:13.423032  543705 net.go:770] primary dev: ETH0
I0321 16:35:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:35:13.423059  543705 net.go:698] Add success.
I0321 16:35:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:35:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:35:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 16:35:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:35:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 16:35:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:35:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:35:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:35:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:35:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:35:23.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:35:23.409874  543705 memory.go:184] no items to output this cycle
I0321 16:35:23.409898  543705 cpu.go:275] no items to output this cycle
I0321 16:35:30.436881  543705 disk_info.go:125] begin check local disk info of client
I0321 16:35:30.439350  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:35:30.439356  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328e00 0xc000328e40]
E0321 16:35:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:35:33.409795  543705 memory.go:184] no items to output this cycle
I0321 16:35:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:35:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:35:43.409813  543705 memory.go:191] Add success.
I0321 16:35:43.409817  543705 cpu.go:282] Add success.
I0321 16:35:43.419982  543705 net.go:648] Add success.
I0321 16:35:43.422777  543705 net.go:770] primary dev: ETH0
I0321 16:35:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:35:43.422801  543705 net.go:698] Add success.
I0321 16:35:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:35:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:35:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:35:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:35:53.409798  543705 memory.go:184] no items to output this cycle
I0321 16:35:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 16:36:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:36:03.409811  543705 memory.go:184] no items to output this cycle
I0321 16:36:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 16:36:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:36:13.409800  543705 cpu.go:282] Add success.
I0321 16:36:13.409804  543705 memory.go:191] Add success.
W0321 16:36:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:36:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:36:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:36:13.420261  543705 net.go:648] Add success.
I0321 16:36:13.423248  543705 net.go:770] primary dev: ETH0
I0321 16:36:13.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:36:13.423278  543705 net.go:698] Add success.
I0321 16:36:13.462922  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"11832300-43ba-418e-b250-f5913f5d3ebf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:36:13.462957  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:36:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:36:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 16:36:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:36:14.456503  543705 disk_worker.go:494] system disk:vda1
I0321 16:36:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:36:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:36:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:36:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:36:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:36:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:36:23.409776  543705 memory.go:184] no items to output this cycle
I0321 16:36:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 16:36:30.439897  543705 disk_info.go:125] begin check local disk info of client
I0321 16:36:30.442390  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:36:30.442398  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384000 0xc000384040]
E0321 16:36:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:36:33.409800  543705 memory.go:184] no items to output this cycle
I0321 16:36:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 16:36:39.037890  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:36:39.037897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:36:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:36:43.410737  543705 memory.go:191] Add success.
I0321 16:36:43.409825  543705 cpu.go:282] Add success.
I0321 16:36:43.420466  543705 net.go:648] Add success.
I0321 16:36:43.423457  543705 net.go:770] primary dev: ETH0
I0321 16:36:43.423471  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:36:43.423484  543705 net.go:698] Add success.
I0321 16:36:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:36:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:36:53.409789  543705 cpu.go:275] no items to output this cycle
I0321 16:36:53.409791  543705 memory.go:184] no items to output this cycle
E0321 16:37:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:37:03.409790  543705 memory.go:184] no items to output this cycle
I0321 16:37:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 16:37:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:37:13.409809  543705 memory.go:191] Add success.
I0321 16:37:13.409810  543705 cpu.go:282] Add success.
W0321 16:37:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:37:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:37:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:37:13.420146  543705 net.go:648] Add success.
I0321 16:37:13.422879  543705 net.go:770] primary dev: ETH0
I0321 16:37:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:37:13.422907  543705 net.go:698] Add success.
I0321 16:37:13.453470  543705 event_worker.go:152] Polling the log file for events...
W0321 16:37:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:37:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 16:37:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:37:14.456953  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:37:14.456962  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:37:14.456968  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:37:14.457030  543705 disk_worker.go:494] system disk:vda1
I0321 16:37:14.457063  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:37:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:37:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:37:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:37:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:37:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:37:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:37:16.472322  543705 disk_local_worker.go:436] Get disk info: []
I0321 16:37:23.409794  543705 cpu.go:275] no items to output this cycle
E0321 16:37:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:37:23.409815  543705 memory.go:184] no items to output this cycle
I0321 16:37:30.442902  543705 disk_info.go:125] begin check local disk info of client
I0321 16:37:30.445385  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:37:30.445391  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a400 0xc00047a440]
E0321 16:37:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:37:33.409793  543705 memory.go:184] no items to output this cycle
I0321 16:37:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 16:37:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:37:43.409790  543705 memory.go:191] Add success.
I0321 16:37:43.409821  543705 cpu.go:282] Add success.
I0321 16:37:43.419922  543705 net.go:648] Add success.
I0321 16:37:43.422857  543705 net.go:770] primary dev: ETH0
I0321 16:37:43.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:37:43.422890  543705 net.go:698] Add success.
I0321 16:37:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:37:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:37:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:37:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:37:53.409771  543705 memory.go:184] no items to output this cycle
I0321 16:37:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 16:38:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:38:03.409773  543705 memory.go:184] no items to output this cycle
I0321 16:38:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 16:38:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:38:13.409826  543705 memory.go:191] Add success.
I0321 16:38:13.409830  543705 cpu.go:282] Add success.
W0321 16:38:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:38:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:38:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:38:13.420061  543705 net.go:648] Add success.
I0321 16:38:13.422768  543705 net.go:770] primary dev: ETH0
I0321 16:38:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:38:13.422793  543705 net.go:698] Add success.
I0321 16:38:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:38:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:38:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 16:38:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:38:14.456596  543705 disk_worker.go:494] system disk:vda1
I0321 16:38:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:38:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:38:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:38:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:38:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:38:23.409799  543705 memory.go:184] no items to output this cycle
I0321 16:38:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 16:38:30.445933  543705 disk_info.go:125] begin check local disk info of client
I0321 16:38:30.448388  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:38:30.448395  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369740 0xc000369780]
E0321 16:38:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:38:33.409790  543705 memory.go:184] no items to output this cycle
I0321 16:38:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 16:38:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:38:43.409813  543705 memory.go:191] Add success.
I0321 16:38:43.409830  543705 cpu.go:282] Add success.
I0321 16:38:43.419955  543705 net.go:648] Add success.
I0321 16:38:43.422501  543705 net.go:770] primary dev: ETH0
I0321 16:38:43.422514  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:38:43.422526  543705 net.go:698] Add success.
I0321 16:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:38:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:38:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:38:53.409769  543705 memory.go:184] no items to output this cycle
I0321 16:38:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 16:39:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:39:03.409808  543705 memory.go:184] no items to output this cycle
I0321 16:39:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 16:39:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:39:13.409789  543705 memory.go:191] Add success.
I0321 16:39:13.409808  543705 cpu.go:282] Add success.
W0321 16:39:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:39:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:39:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:39:13.420162  543705 net.go:648] Add success.
I0321 16:39:13.423291  543705 net.go:770] primary dev: ETH0
I0321 16:39:13.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:39:13.423321  543705 net.go:698] Add success.
I0321 16:39:13.468107  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"39ff5eb2-e391-446b-8daf-0407b5b18318","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:39:13.468140  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:39:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:39:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:39:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 16:39:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:39:14.456530  543705 disk_worker.go:494] system disk:vda1
I0321 16:39:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:39:15.455860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:39:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:39:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:39:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:39:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:39:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:39:23.409801  543705 memory.go:184] no items to output this cycle
I0321 16:39:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 16:39:30.448939  543705 disk_info.go:125] begin check local disk info of client
I0321 16:39:30.451419  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:39:30.451425  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cd880 0xc0004cd8c0]
E0321 16:39:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:39:33.409797  543705 memory.go:184] no items to output this cycle
I0321 16:39:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 16:39:39.040893  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:39:39.040899  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:39:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:39:43.410651  543705 memory.go:191] Add success.
I0321 16:39:43.409808  543705 cpu.go:282] Add success.
I0321 16:39:43.420365  543705 net.go:648] Add success.
I0321 16:39:43.423139  543705 net.go:770] primary dev: ETH0
I0321 16:39:43.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:39:43.423164  543705 net.go:698] Add success.
I0321 16:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:39:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:39:53.409794  543705 memory.go:184] no items to output this cycle
I0321 16:39:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:40:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:40:03.409772  543705 memory.go:184] no items to output this cycle
I0321 16:40:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 16:40:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:40:13.409796  543705 memory.go:191] Add success.
I0321 16:40:13.409797  543705 cpu.go:282] Add success.
W0321 16:40:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:40:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:40:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:40:13.420165  543705 net.go:648] Add success.
I0321 16:40:13.422819  543705 net.go:770] primary dev: ETH0
I0321 16:40:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:40:13.422850  543705 net.go:698] Add success.
I0321 16:40:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:40:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:40:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 16:40:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:40:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 16:40:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:40:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:40:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:40:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:40:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:40:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:40:23.409798  543705 memory.go:184] no items to output this cycle
I0321 16:40:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 16:40:30.451953  543705 disk_info.go:125] begin check local disk info of client
I0321 16:40:30.454458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:40:30.454465  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329bc0 0xc000329c00]
E0321 16:40:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:40:33.409795  543705 memory.go:184] no items to output this cycle
I0321 16:40:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 16:40:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:40:43.409788  543705 memory.go:191] Add success.
I0321 16:40:43.409800  543705 cpu.go:282] Add success.
I0321 16:40:43.419894  543705 net.go:648] Add success.
I0321 16:40:43.423407  543705 net.go:770] primary dev: ETH0
I0321 16:40:43.423422  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:40:43.423435  543705 net.go:698] Add success.
I0321 16:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:40:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:40:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:40:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:40:53.409785  543705 memory.go:184] no items to output this cycle
I0321 16:40:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 16:41:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:41:03.409775  543705 memory.go:184] no items to output this cycle
I0321 16:41:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 16:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:41:13.409800  543705 memory.go:191] Add success.
I0321 16:41:13.409814  543705 cpu.go:282] Add success.
W0321 16:41:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:41:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:41:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:41:13.420183  543705 net.go:648] Add success.
I0321 16:41:13.422841  543705 net.go:770] primary dev: ETH0
I0321 16:41:13.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:41:13.422866  543705 net.go:698] Add success.
I0321 16:41:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:41:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:41:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 16:41:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:41:14.456606  543705 disk_worker.go:494] system disk:vda1
I0321 16:41:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:41:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:41:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:41:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:41:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:41:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:41:23.409777  543705 memory.go:184] no items to output this cycle
I0321 16:41:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 16:41:30.454953  543705 disk_info.go:125] begin check local disk info of client
I0321 16:41:30.457471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:41:30.457478  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051d700 0xc00051d740]
E0321 16:41:33.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:41:33.409906  543705 memory.go:184] no items to output this cycle
I0321 16:41:33.409925  543705 cpu.go:275] no items to output this cycle
E0321 16:41:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:41:43.409789  543705 memory.go:191] Add success.
I0321 16:41:43.409793  543705 cpu.go:282] Add success.
I0321 16:41:43.419968  543705 net.go:648] Add success.
I0321 16:41:43.422739  543705 net.go:770] primary dev: ETH0
I0321 16:41:43.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:41:43.422763  543705 net.go:698] Add success.
I0321 16:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:41:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:41:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:41:53.409780  543705 memory.go:184] no items to output this cycle
I0321 16:41:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 16:42:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:42:03.409809  543705 memory.go:184] no items to output this cycle
I0321 16:42:03.409822  543705 cpu.go:275] no items to output this cycle
E0321 16:42:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:42:13.409789  543705 memory.go:191] Add success.
I0321 16:42:13.409808  543705 cpu.go:282] Add success.
W0321 16:42:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:42:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:42:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:42:13.420112  543705 net.go:648] Add success.
I0321 16:42:13.423308  543705 net.go:770] primary dev: ETH0
I0321 16:42:13.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:42:13.423337  543705 net.go:698] Add success.
I0321 16:42:13.469390  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2013acb8-b138-4b28-bf27-6c37377231aa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:42:13.469425  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 16:42:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:42:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 16:42:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:42:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:42:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:42:14.456938  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:42:14.456982  543705 disk_worker.go:494] system disk:vda1
I0321 16:42:14.457013  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:42:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:42:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:42:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:42:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:42:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:42:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:42:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:42:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:42:23.409778  543705 memory.go:184] no items to output this cycle
I0321 16:42:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 16:42:30.457974  543705 disk_info.go:125] begin check local disk info of client
I0321 16:42:30.460543  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:42:30.460551  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025c000 0xc00025c040]
E0321 16:42:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:42:33.409760  543705 memory.go:184] no items to output this cycle
I0321 16:42:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 16:42:39.041742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:42:39.041748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:42:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:42:43.410636  543705 memory.go:191] Add success.
I0321 16:42:43.409847  543705 cpu.go:282] Add success.
I0321 16:42:43.420404  543705 net.go:648] Add success.
I0321 16:42:43.423092  543705 net.go:770] primary dev: ETH0
I0321 16:42:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:42:43.423117  543705 net.go:698] Add success.
I0321 16:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:42:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:42:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:42:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:42:53.409778  543705 memory.go:184] no items to output this cycle
I0321 16:42:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 16:43:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:43:03.409806  543705 memory.go:184] no items to output this cycle
I0321 16:43:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 16:43:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:43:13.409781  543705 memory.go:191] Add success.
W0321 16:43:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 16:43:13.409811  543705 cpu.go:282] Add success.
W0321 16:43:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:43:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:43:13.420399  543705 net.go:648] Add success.
I0321 16:43:13.423060  543705 net.go:770] primary dev: ETH0
I0321 16:43:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:43:13.423090  543705 net.go:698] Add success.
I0321 16:43:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:43:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:43:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 16:43:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:43:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 16:43:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:43:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:43:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:43:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:43:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:43:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:43:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:43:23.409780  543705 memory.go:184] no items to output this cycle
I0321 16:43:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 16:43:30.460986  543705 disk_info.go:125] begin check local disk info of client
I0321 16:43:30.463483  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:43:30.463489  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8900 0xc0003c8940]
E0321 16:43:33.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:43:33.409903  543705 memory.go:184] no items to output this cycle
I0321 16:43:33.409955  543705 cpu.go:275] no items to output this cycle
E0321 16:43:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:43:43.409811  543705 memory.go:191] Add success.
I0321 16:43:43.409824  543705 cpu.go:282] Add success.
I0321 16:43:43.420035  543705 net.go:648] Add success.
I0321 16:43:43.422849  543705 net.go:770] primary dev: ETH0
I0321 16:43:43.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:43:43.422875  543705 net.go:698] Add success.
I0321 16:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:43:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:43:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:43:53.409795  543705 memory.go:184] no items to output this cycle
I0321 16:43:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 16:44:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:44:03.409776  543705 memory.go:184] no items to output this cycle
I0321 16:44:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 16:44:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:44:13.409786  543705 memory.go:191] Add success.
I0321 16:44:13.409803  543705 cpu.go:282] Add success.
W0321 16:44:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:44:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:44:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:44:13.420148  543705 net.go:648] Add success.
I0321 16:44:13.422828  543705 net.go:770] primary dev: ETH0
I0321 16:44:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:44:13.422852  543705 net.go:698] Add success.
I0321 16:44:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:44:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:44:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 16:44:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:44:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 16:44:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:44:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:44:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:44:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:44:23.409770  543705 memory.go:184] no items to output this cycle
I0321 16:44:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 16:44:30.464017  543705 disk_info.go:125] begin check local disk info of client
I0321 16:44:30.466550  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:44:30.466557  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bb100 0xc0004bb140]
E0321 16:44:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:44:33.409794  543705 memory.go:184] no items to output this cycle
I0321 16:44:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:44:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:44:43.409794  543705 memory.go:191] Add success.
I0321 16:44:43.409795  543705 cpu.go:282] Add success.
I0321 16:44:43.419886  543705 net.go:648] Add success.
I0321 16:44:43.423085  543705 net.go:770] primary dev: ETH0
I0321 16:44:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:44:43.423112  543705 net.go:698] Add success.
I0321 16:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:44:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:44:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:44:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:44:53.409774  543705 memory.go:184] no items to output this cycle
I0321 16:44:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 16:45:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:45:03.409771  543705 memory.go:184] no items to output this cycle
I0321 16:45:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 16:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:45:13.409807  543705 memory.go:191] Add success.
I0321 16:45:13.409807  543705 cpu.go:282] Add success.
W0321 16:45:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:45:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:45:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:45:13.420616  543705 net.go:648] Add success.
I0321 16:45:13.423361  543705 net.go:770] primary dev: ETH0
I0321 16:45:13.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:45:13.423389  543705 net.go:698] Add success.
I0321 16:45:13.469030  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8f65b50f-42af-481e-b7af-5d08816b61c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:45:13.469063  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:45:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:45:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:45:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 16:45:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:45:14.456710  543705 disk_worker.go:494] system disk:vda1
I0321 16:45:14.456766  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:45:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:45:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:45:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:45:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:45:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:45:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:45:23.409806  543705 memory.go:184] no items to output this cycle
I0321 16:45:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 16:45:30.467032  543705 disk_info.go:125] begin check local disk info of client
I0321 16:45:30.469493  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:45:30.469499  543705 disk_info.go:196] parse disk info done, disk is : [0xc000514ec0 0xc000514f00]
E0321 16:45:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:45:33.409806  543705 memory.go:184] no items to output this cycle
I0321 16:45:33.409818  543705 cpu.go:275] no items to output this cycle
I0321 16:45:39.044913  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:45:39.044918  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:45:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:45:43.410656  543705 memory.go:191] Add success.
I0321 16:45:43.409804  543705 cpu.go:282] Add success.
I0321 16:45:43.420355  543705 net.go:648] Add success.
I0321 16:45:43.422930  543705 net.go:770] primary dev: ETH0
I0321 16:45:43.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:45:43.422956  543705 net.go:698] Add success.
I0321 16:45:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:45:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:45:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:45:53.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:45:53.409769  543705 memory.go:184] no items to output this cycle
I0321 16:45:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 16:46:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:46:03.409784  543705 memory.go:184] no items to output this cycle
I0321 16:46:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:46:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:46:13.409829  543705 memory.go:191] Add success.
I0321 16:46:13.409837  543705 cpu.go:282] Add success.
W0321 16:46:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:46:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:46:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:46:13.420124  543705 net.go:648] Add success.
I0321 16:46:13.422719  543705 net.go:770] primary dev: ETH0
I0321 16:46:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:46:13.422746  543705 net.go:698] Add success.
I0321 16:46:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:46:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:46:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 16:46:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:46:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 16:46:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:46:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:46:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:46:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:46:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:46:23.409823  543705 memory.go:184] no items to output this cycle
I0321 16:46:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 16:46:30.470044  543705 disk_info.go:125] begin check local disk info of client
I0321 16:46:30.472554  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:46:30.472561  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035b700 0xc00035b740]
E0321 16:46:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:46:33.409788  543705 memory.go:184] no items to output this cycle
I0321 16:46:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 16:46:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:46:43.409783  543705 memory.go:191] Add success.
I0321 16:46:43.409801  543705 cpu.go:282] Add success.
I0321 16:46:43.419871  543705 net.go:648] Add success.
I0321 16:46:43.422431  543705 net.go:770] primary dev: ETH0
I0321 16:46:43.422444  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:46:43.422456  543705 net.go:698] Add success.
I0321 16:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:46:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:46:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:46:53.409763  543705 memory.go:184] no items to output this cycle
I0321 16:46:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:47:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:47:03.409804  543705 memory.go:184] no items to output this cycle
I0321 16:47:03.409818  543705 cpu.go:275] no items to output this cycle
W0321 16:47:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:47:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:47:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:47:13.409796  543705 cpu.go:282] Add success.
E0321 16:47:13.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:47:13.409859  543705 memory.go:191] Add success.
I0321 16:47:13.420076  543705 net.go:648] Add success.
I0321 16:47:13.422934  543705 net.go:770] primary dev: ETH0
I0321 16:47:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:47:13.422958  543705 net.go:698] Add success.
I0321 16:47:13.453488  543705 event_worker.go:152] Polling the log file for events...
W0321 16:47:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:47:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 16:47:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:47:14.456811  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:47:14.456820  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:47:14.456827  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:47:14.456885  543705 disk_worker.go:494] system disk:vda1
I0321 16:47:14.456927  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:47:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:47:15.456881  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:47:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:47:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:47:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:47:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:47:16.472341  543705 disk_local_worker.go:436] Get disk info: []
I0321 16:47:23.409887  543705 cpu.go:275] no items to output this cycle
E0321 16:47:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:47:23.409906  543705 memory.go:184] no items to output this cycle
I0321 16:47:30.473048  543705 disk_info.go:125] begin check local disk info of client
I0321 16:47:30.475544  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:47:30.475550  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2200 0xc0002a2240]
E0321 16:47:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:47:33.409796  543705 memory.go:184] no items to output this cycle
I0321 16:47:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 16:47:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:47:43.409779  543705 memory.go:191] Add success.
I0321 16:47:43.409803  543705 cpu.go:282] Add success.
I0321 16:47:43.419837  543705 net.go:648] Add success.
I0321 16:47:43.422471  543705 net.go:770] primary dev: ETH0
I0321 16:47:43.422484  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:47:43.422496  543705 net.go:698] Add success.
I0321 16:47:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:47:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:47:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:47:53.410434  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:47:53.410449  543705 memory.go:184] no items to output this cycle
I0321 16:47:53.410451  543705 cpu.go:275] no items to output this cycle
E0321 16:48:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:48:03.409772  543705 memory.go:184] no items to output this cycle
I0321 16:48:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:48:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:48:13.409818  543705 memory.go:191] Add success.
I0321 16:48:13.409819  543705 cpu.go:282] Add success.
W0321 16:48:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:48:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:48:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:48:13.420287  543705 net.go:648] Add success.
I0321 16:48:13.423339  543705 net.go:770] primary dev: ETH0
I0321 16:48:13.423354  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:48:13.423369  543705 net.go:698] Add success.
I0321 16:48:13.467715  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d701ec67-9ffe-4fc7-a319-4c1f3f007e3e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:48:13.467749  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:48:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:48:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:48:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 16:48:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:48:14.456721  543705 disk_worker.go:494] system disk:vda1
I0321 16:48:14.456751  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:48:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:48:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:48:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:48:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:48:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:48:23.409778  543705 memory.go:184] no items to output this cycle
I0321 16:48:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 16:48:30.476064  543705 disk_info.go:125] begin check local disk info of client
I0321 16:48:30.478568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:48:30.478575  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480cc0 0xc000480d00]
E0321 16:48:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:48:33.409792  543705 memory.go:184] no items to output this cycle
I0321 16:48:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 16:48:39.045727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:48:39.045734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:48:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:48:43.410760  543705 memory.go:191] Add success.
I0321 16:48:43.409797  543705 cpu.go:282] Add success.
I0321 16:48:43.420496  543705 net.go:648] Add success.
I0321 16:48:43.423078  543705 net.go:770] primary dev: ETH0
I0321 16:48:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:48:43.423103  543705 net.go:698] Add success.
I0321 16:48:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:48:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:48:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:48:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:48:53.409805  543705 memory.go:184] no items to output this cycle
I0321 16:48:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 16:49:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:49:03.409768  543705 memory.go:184] no items to output this cycle
I0321 16:49:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 16:49:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:49:13.409816  543705 memory.go:191] Add success.
I0321 16:49:13.409821  543705 cpu.go:282] Add success.
W0321 16:49:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:49:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:49:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:49:13.420156  543705 net.go:648] Add success.
I0321 16:49:13.423034  543705 net.go:770] primary dev: ETH0
I0321 16:49:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:49:13.423065  543705 net.go:698] Add success.
I0321 16:49:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:49:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:49:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 16:49:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:49:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 16:49:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:49:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:49:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:49:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:49:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:49:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:49:23.409794  543705 memory.go:184] no items to output this cycle
I0321 16:49:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 16:49:30.479332  543705 disk_info.go:125] begin check local disk info of client
I0321 16:49:30.481915  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:49:30.481922  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481840 0xc000481880]
E0321 16:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:49:33.409781  543705 memory.go:184] no items to output this cycle
I0321 16:49:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 16:49:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:49:43.409809  543705 memory.go:191] Add success.
I0321 16:49:43.409817  543705 cpu.go:282] Add success.
I0321 16:49:43.419926  543705 net.go:648] Add success.
I0321 16:49:43.422587  543705 net.go:770] primary dev: ETH0
I0321 16:49:43.422599  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:49:43.422611  543705 net.go:698] Add success.
I0321 16:49:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:49:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:49:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:49:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:49:53.409779  543705 memory.go:184] no items to output this cycle
I0321 16:49:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 16:50:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:50:03.409800  543705 memory.go:184] no items to output this cycle
I0321 16:50:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:50:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:50:13.409789  543705 memory.go:191] Add success.
I0321 16:50:13.409806  543705 cpu.go:282] Add success.
W0321 16:50:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:50:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:50:13.420146  543705 net.go:648] Add success.
I0321 16:50:13.423128  543705 net.go:770] primary dev: ETH0
I0321 16:50:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:50:13.423157  543705 net.go:698] Add success.
I0321 16:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:50:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:50:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 16:50:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:50:14.456853  543705 disk_worker.go:494] system disk:vda1
I0321 16:50:14.456882  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:50:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:50:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:50:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:50:23.409763  543705 memory.go:184] no items to output this cycle
I0321 16:50:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 16:50:30.482054  543705 disk_info.go:125] begin check local disk info of client
I0321 16:50:30.484557  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:50:30.484564  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b2c0 0xc00007b300]
E0321 16:50:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:50:33.409791  543705 memory.go:184] no items to output this cycle
I0321 16:50:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 16:50:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:50:43.409810  543705 memory.go:191] Add success.
I0321 16:50:43.409815  543705 cpu.go:282] Add success.
I0321 16:50:43.419856  543705 net.go:648] Add success.
I0321 16:50:43.422951  543705 net.go:770] primary dev: ETH0
I0321 16:50:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:50:43.422976  543705 net.go:698] Add success.
I0321 16:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:50:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:50:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:50:53.409773  543705 memory.go:184] no items to output this cycle
I0321 16:50:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 16:51:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:51:03.409772  543705 memory.go:184] no items to output this cycle
I0321 16:51:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 16:51:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:51:13.409791  543705 memory.go:191] Add success.
I0321 16:51:13.409808  543705 cpu.go:282] Add success.
W0321 16:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:51:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:51:13.420296  543705 net.go:648] Add success.
I0321 16:51:13.423123  543705 net.go:770] primary dev: ETH0
I0321 16:51:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:51:13.423150  543705 net.go:698] Add success.
I0321 16:51:13.470900  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a640a197-d82b-4e71-bc29-227628319b33","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:51:13.470936  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:51:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:51:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:51:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 16:51:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:51:14.457100  543705 disk_worker.go:494] system disk:vda1
I0321 16:51:14.457129  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:51:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:51:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:51:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:51:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:51:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:51:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:51:23.409797  543705 memory.go:184] no items to output this cycle
I0321 16:51:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 16:51:30.485109  543705 disk_info.go:125] begin check local disk info of client
I0321 16:51:30.487611  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:51:30.487617  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385ac0 0xc000385b00]
E0321 16:51:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:51:33.409796  543705 memory.go:184] no items to output this cycle
I0321 16:51:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 16:51:39.048931  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:51:39.048938  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:51:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:51:43.410657  543705 memory.go:191] Add success.
I0321 16:51:43.409813  543705 cpu.go:282] Add success.
I0321 16:51:43.420341  543705 net.go:648] Add success.
I0321 16:51:43.422702  543705 net.go:770] primary dev: ETH0
I0321 16:51:43.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:51:43.422728  543705 net.go:698] Add success.
I0321 16:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:51:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:51:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:51:53.409778  543705 memory.go:184] no items to output this cycle
I0321 16:51:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 16:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:52:03.409781  543705 memory.go:184] no items to output this cycle
I0321 16:52:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 16:52:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:52:13.409819  543705 memory.go:191] Add success.
I0321 16:52:13.409832  543705 cpu.go:282] Add success.
W0321 16:52:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:52:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:52:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:52:13.419930  543705 net.go:770] primary dev: ETH0
I0321 16:52:13.419945  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:52:13.419960  543705 net.go:698] Add success.
I0321 16:52:13.420320  543705 net.go:648] Add success.
W0321 16:52:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:52:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 16:52:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:52:14.456793  543705 disk_worker.go:494] system disk:vda1
I0321 16:52:14.456830  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:52:14.457084  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:52:14.457092  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:52:14.457096  543705 custom_config.go:64] query custom config with name: gpu
E0321 16:52:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:52:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:52:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:52:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:52:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:52:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:52:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:52:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:52:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 16:52:23.409786  543705 memory.go:184] no items to output this cycle
I0321 16:52:30.488156  543705 disk_info.go:125] begin check local disk info of client
I0321 16:52:30.490626  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:52:30.490632  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8f80 0xc0004d8fc0]
E0321 16:52:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:52:33.409810  543705 memory.go:184] no items to output this cycle
I0321 16:52:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 16:52:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:52:43.409792  543705 memory.go:191] Add success.
I0321 16:52:43.409810  543705 cpu.go:282] Add success.
I0321 16:52:43.419888  543705 net.go:648] Add success.
I0321 16:52:43.422912  543705 net.go:770] primary dev: ETH0
I0321 16:52:43.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:52:43.422938  543705 net.go:698] Add success.
I0321 16:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:52:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:52:53.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:52:53.410270  543705 memory.go:184] no items to output this cycle
I0321 16:52:53.410284  543705 cpu.go:275] no items to output this cycle
E0321 16:53:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:53:03.409786  543705 memory.go:184] no items to output this cycle
I0321 16:53:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 16:53:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:53:13.409833  543705 memory.go:191] Add success.
I0321 16:53:13.409839  543705 cpu.go:282] Add success.
W0321 16:53:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:53:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:53:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:53:13.420079  543705 net.go:648] Add success.
I0321 16:53:13.422825  543705 net.go:770] primary dev: ETH0
I0321 16:53:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:53:13.422854  543705 net.go:698] Add success.
I0321 16:53:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:53:14.455346  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:53:14.455450  543705 disk_worker.go:708] disk space is not compliant
W0321 16:53:14.455454  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:53:14.457532  543705 disk_worker.go:494] system disk:vda1
I0321 16:53:14.457573  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:53:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:53:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:53:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:53:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:53:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:53:23.409793  543705 memory.go:184] no items to output this cycle
I0321 16:53:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 16:53:30.491150  543705 disk_info.go:125] begin check local disk info of client
I0321 16:53:30.493617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:53:30.493623  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4380 0xc0000c43c0]
E0321 16:53:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:53:33.409788  543705 memory.go:184] no items to output this cycle
I0321 16:53:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:53:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:53:43.409822  543705 memory.go:191] Add success.
I0321 16:53:43.409830  543705 cpu.go:282] Add success.
I0321 16:53:43.419873  543705 net.go:648] Add success.
I0321 16:53:43.422599  543705 net.go:770] primary dev: ETH0
I0321 16:53:43.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:53:43.422628  543705 net.go:698] Add success.
I0321 16:53:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:53:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:53:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:53:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:53:53.409804  543705 memory.go:184] no items to output this cycle
I0321 16:53:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:54:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:54:03.409775  543705 memory.go:184] no items to output this cycle
I0321 16:54:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 16:54:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:54:13.409836  543705 memory.go:191] Add success.
I0321 16:54:13.409840  543705 cpu.go:282] Add success.
W0321 16:54:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:54:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:54:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:54:13.420186  543705 net.go:648] Add success.
I0321 16:54:13.423203  543705 net.go:770] primary dev: ETH0
I0321 16:54:13.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:54:13.423233  543705 net.go:698] Add success.
I0321 16:54:13.463729  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b12c02ef-6743-41f7-80c5-3a81c55e0db9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:54:13.463760  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 16:54:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:54:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:54:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 16:54:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:54:14.456686  543705 disk_worker.go:494] system disk:vda1
I0321 16:54:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:54:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:54:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:54:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:54:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:54:23.409794  543705 memory.go:184] no items to output this cycle
I0321 16:54:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 16:54:30.494162  543705 disk_info.go:125] begin check local disk info of client
I0321 16:54:30.496723  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:54:30.496728  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4180 0xc0000c41c0]
E0321 16:54:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:54:33.409770  543705 memory.go:184] no items to output this cycle
I0321 16:54:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 16:54:39.049731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:54:39.049737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:54:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:54:43.410616  543705 memory.go:191] Add success.
I0321 16:54:43.409823  543705 cpu.go:282] Add success.
I0321 16:54:43.420389  543705 net.go:648] Add success.
I0321 16:54:43.423222  543705 net.go:770] primary dev: ETH0
I0321 16:54:43.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:54:43.423250  543705 net.go:698] Add success.
I0321 16:54:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:54:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:54:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:54:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:54:53.409778  543705 memory.go:184] no items to output this cycle
I0321 16:54:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 16:55:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:55:03.409761  543705 memory.go:184] no items to output this cycle
I0321 16:55:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 16:55:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:55:13.409816  543705 memory.go:191] Add success.
I0321 16:55:13.409824  543705 cpu.go:282] Add success.
W0321 16:55:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:55:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:55:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:55:13.420330  543705 net.go:648] Add success.
I0321 16:55:13.423079  543705 net.go:770] primary dev: ETH0
I0321 16:55:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:55:13.423103  543705 net.go:698] Add success.
I0321 16:55:14.454945  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:55:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:55:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 16:55:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:55:14.456552  543705 disk_worker.go:494] system disk:vda1
I0321 16:55:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:55:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:55:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:55:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:55:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:55:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:55:23.409771  543705 memory.go:184] no items to output this cycle
I0321 16:55:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 16:55:30.497128  543705 disk_info.go:125] begin check local disk info of client
I0321 16:55:30.499709  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:55:30.499716  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b3c0 0xc00007b400]
E0321 16:55:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:55:33.409795  543705 memory.go:184] no items to output this cycle
I0321 16:55:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:55:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:55:43.409779  543705 memory.go:191] Add success.
I0321 16:55:43.409800  543705 cpu.go:282] Add success.
I0321 16:55:43.419837  543705 net.go:648] Add success.
I0321 16:55:43.422774  543705 net.go:770] primary dev: ETH0
I0321 16:55:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:55:43.422801  543705 net.go:698] Add success.
I0321 16:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:55:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:55:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:55:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:55:53.409769  543705 memory.go:184] no items to output this cycle
I0321 16:55:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 16:56:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:56:03.409799  543705 memory.go:184] no items to output this cycle
I0321 16:56:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 16:56:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:56:13.409918  543705 memory.go:191] Add success.
I0321 16:56:13.409947  543705 cpu.go:282] Add success.
W0321 16:56:13.409955  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:56:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:56:13.409983  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:56:13.419721  543705 net.go:648] Add success.
I0321 16:56:13.422241  543705 net.go:770] primary dev: ETH0
I0321 16:56:13.422254  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:56:13.422265  543705 net.go:698] Add success.
I0321 16:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:56:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:56:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 16:56:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:56:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 16:56:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:56:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:56:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:56:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:56:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:56:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:56:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:56:23.409793  543705 memory.go:184] no items to output this cycle
I0321 16:56:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 16:56:30.500140  543705 disk_info.go:125] begin check local disk info of client
I0321 16:56:30.502697  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:56:30.502703  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387300 0xc000387340]
E0321 16:56:33.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:56:33.409758  543705 memory.go:184] no items to output this cycle
I0321 16:56:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 16:56:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:56:43.409809  543705 memory.go:191] Add success.
I0321 16:56:43.409815  543705 cpu.go:282] Add success.
I0321 16:56:43.419914  543705 net.go:648] Add success.
I0321 16:56:43.422640  543705 net.go:770] primary dev: ETH0
I0321 16:56:43.422654  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:56:43.422667  543705 net.go:698] Add success.
I0321 16:56:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:56:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:56:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:56:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:56:53.409780  543705 cpu.go:275] no items to output this cycle
I0321 16:56:53.409787  543705 memory.go:184] no items to output this cycle
E0321 16:57:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:57:03.409776  543705 cpu.go:275] no items to output this cycle
I0321 16:57:03.409779  543705 memory.go:184] no items to output this cycle
E0321 16:57:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:57:13.409790  543705 memory.go:191] Add success.
I0321 16:57:13.409807  543705 cpu.go:282] Add success.
W0321 16:57:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:57:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:57:13.420413  543705 net.go:648] Add success.
I0321 16:57:13.429284  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 16:57:13.429359  543705 net.go:770] primary dev: ETH0
I0321 16:57:13.429371  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:57:13.429381  543705 net.go:698] Add success.
I0321 16:57:13.452943  543705 event_worker.go:152] Polling the log file for events...
I0321 16:57:13.555297  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"70686686-f855-4da5-bd4a-271f5d5fcc8c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 16:57:13.555328  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 16:57:14.454203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:57:14.454214  543705 disk_worker.go:708] disk space is not compliant
W0321 16:57:14.454217  543705 disk_worker.go:728] disk inode is not compliant
E0321 16:57:14.456070  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 16:57:14.456078  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 16:57:14.456082  543705 custom_config.go:64] query custom config with name: gpu
I0321 16:57:14.456082  543705 disk_worker.go:494] system disk:vda1
I0321 16:57:14.456133  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 16:57:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 16:57:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:57:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 16:57:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 16:57:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:57:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:57:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:57:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:57:23.409794  543705 memory.go:184] no items to output this cycle
I0321 16:57:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 16:57:30.503157  543705 disk_info.go:125] begin check local disk info of client
I0321 16:57:30.505677  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:57:30.505683  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d80c0 0xc0004d8100]
E0321 16:57:33.409739  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:57:33.409753  543705 memory.go:184] no items to output this cycle
I0321 16:57:33.409790  543705 cpu.go:275] no items to output this cycle
I0321 16:57:39.052954  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 16:57:39.052959  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 16:57:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:57:43.410575  543705 memory.go:191] Add success.
I0321 16:57:43.409793  543705 cpu.go:282] Add success.
I0321 16:57:43.420266  543705 net.go:648] Add success.
I0321 16:57:43.422830  543705 net.go:770] primary dev: ETH0
I0321 16:57:43.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:57:43.422856  543705 net.go:698] Add success.
I0321 16:57:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:57:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:57:53.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:57:53.410265  543705 memory.go:184] no items to output this cycle
I0321 16:57:53.410270  543705 cpu.go:275] no items to output this cycle
E0321 16:58:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:58:03.409801  543705 memory.go:184] no items to output this cycle
I0321 16:58:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 16:58:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:58:13.409804  543705 memory.go:191] Add success.
I0321 16:58:13.409807  543705 cpu.go:282] Add success.
W0321 16:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:58:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:58:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:58:13.420625  543705 net.go:648] Add success.
I0321 16:58:13.423179  543705 net.go:770] primary dev: ETH0
I0321 16:58:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:58:13.423205  543705 net.go:698] Add success.
I0321 16:58:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:58:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:58:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 16:58:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:58:14.456512  543705 disk_worker.go:494] system disk:vda1
I0321 16:58:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:58:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:58:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:58:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:58:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:58:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:58:23.409780  543705 memory.go:184] no items to output this cycle
I0321 16:58:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 16:58:30.506184  543705 disk_info.go:125] begin check local disk info of client
I0321 16:58:30.508692  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:58:30.508698  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e00 0xc0000c5e40]
E0321 16:58:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:58:33.409800  543705 memory.go:184] no items to output this cycle
I0321 16:58:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 16:58:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:58:43.409825  543705 memory.go:191] Add success.
I0321 16:58:43.409827  543705 cpu.go:282] Add success.
I0321 16:58:43.420022  543705 net.go:648] Add success.
I0321 16:58:43.422801  543705 net.go:770] primary dev: ETH0
I0321 16:58:43.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:58:43.422831  543705 net.go:698] Add success.
I0321 16:58:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:58:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:58:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:58:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:58:53.409773  543705 memory.go:184] no items to output this cycle
I0321 16:58:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 16:59:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:59:03.409771  543705 memory.go:184] no items to output this cycle
I0321 16:59:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 16:59:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:59:13.409829  543705 memory.go:191] Add success.
I0321 16:59:13.409833  543705 cpu.go:282] Add success.
W0321 16:59:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 16:59:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 16:59:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 16:59:13.420191  543705 net.go:648] Add success.
I0321 16:59:13.423238  543705 net.go:770] primary dev: ETH0
I0321 16:59:13.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:59:13.423263  543705 net.go:698] Add success.
I0321 16:59:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 16:59:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 16:59:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 16:59:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 16:59:14.456512  543705 disk_worker.go:494] system disk:vda1
I0321 16:59:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 16:59:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 16:59:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:59:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:59:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 16:59:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 16:59:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:59:23.409801  543705 memory.go:184] no items to output this cycle
I0321 16:59:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 16:59:30.509188  543705 disk_info.go:125] begin check local disk info of client
I0321 16:59:30.511719  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 16:59:30.511725  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0880 0xc0002b08c0]
E0321 16:59:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:59:33.409772  543705 memory.go:184] no items to output this cycle
I0321 16:59:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 16:59:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:59:43.409816  543705 memory.go:191] Add success.
I0321 16:59:43.409827  543705 cpu.go:282] Add success.
I0321 16:59:43.419956  543705 net.go:648] Add success.
I0321 16:59:43.422582  543705 net.go:770] primary dev: ETH0
I0321 16:59:43.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0321 16:59:43.422608  543705 net.go:698] Add success.
I0321 16:59:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 16:59:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 16:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 16:59:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 16:59:53.409772  543705 memory.go:184] no items to output this cycle
I0321 16:59:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 17:00:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:00:03.409772  543705 memory.go:184] no items to output this cycle
I0321 17:00:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 17:00:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:00:13.409824  543705 memory.go:191] Add success.
I0321 17:00:13.409826  543705 cpu.go:282] Add success.
W0321 17:00:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:00:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:00:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:00:13.420216  543705 net.go:648] Add success.
I0321 17:00:13.423005  543705 net.go:770] primary dev: ETH0
I0321 17:00:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:00:13.423030  543705 net.go:698] Add success.
I0321 17:00:13.468689  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0a737c99-cf73-45cf-8d4b-3d70adf64b68","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:00:13.468724  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:00:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:00:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 17:00:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:00:14.456683  543705 disk_worker.go:494] system disk:vda1
I0321 17:00:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:00:15.455621  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:00:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:00:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:00:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:00:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:00:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:00:23.409774  543705 memory.go:184] no items to output this cycle
I0321 17:00:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 17:00:30.512211  543705 disk_info.go:125] begin check local disk info of client
I0321 17:00:30.514737  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:00:30.514744  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352640 0xc000352680]
E0321 17:00:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:00:33.409790  543705 memory.go:184] no items to output this cycle
I0321 17:00:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 17:00:39.053735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:00:39.053742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:00:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:00:43.410800  543705 memory.go:191] Add success.
I0321 17:00:43.409829  543705 cpu.go:282] Add success.
I0321 17:00:43.420492  543705 net.go:648] Add success.
I0321 17:00:43.423104  543705 net.go:770] primary dev: ETH0
I0321 17:00:43.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:00:43.423132  543705 net.go:698] Add success.
I0321 17:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:00:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:00:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:00:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:00:53.409806  543705 memory.go:184] no items to output this cycle
I0321 17:00:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 17:01:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:01:03.409771  543705 memory.go:184] no items to output this cycle
I0321 17:01:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 17:01:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:01:13.409784  543705 memory.go:191] Add success.
W0321 17:01:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:01:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:01:13.409822  543705 cpu.go:282] Add success.
I0321 17:01:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:01:13.420272  543705 net.go:648] Add success.
I0321 17:01:13.423009  543705 net.go:770] primary dev: ETH0
I0321 17:01:13.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:01:13.423033  543705 net.go:698] Add success.
I0321 17:01:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:01:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:01:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 17:01:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:01:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 17:01:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:01:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:01:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:01:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:01:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:01:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:01:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:01:23.409794  543705 memory.go:184] no items to output this cycle
I0321 17:01:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 17:01:30.515203  543705 disk_info.go:125] begin check local disk info of client
I0321 17:01:30.517669  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:01:30.517675  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2d80 0xc0003b2dc0]
E0321 17:01:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:01:33.409781  543705 memory.go:184] no items to output this cycle
I0321 17:01:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 17:01:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:01:43.409784  543705 memory.go:191] Add success.
I0321 17:01:43.409811  543705 cpu.go:282] Add success.
I0321 17:01:43.420244  543705 net.go:648] Add success.
I0321 17:01:43.422896  543705 net.go:770] primary dev: ETH0
I0321 17:01:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:01:43.422922  543705 net.go:698] Add success.
I0321 17:01:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:01:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:01:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:01:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:01:53.409763  543705 memory.go:184] no items to output this cycle
I0321 17:01:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 17:02:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:02:03.409772  543705 memory.go:184] no items to output this cycle
I0321 17:02:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 17:02:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:02:13.409787  543705 memory.go:191] Add success.
I0321 17:02:13.409804  543705 cpu.go:282] Add success.
W0321 17:02:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:02:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:02:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:02:13.420062  543705 net.go:648] Add success.
I0321 17:02:13.422766  543705 net.go:770] primary dev: ETH0
I0321 17:02:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:02:13.422793  543705 net.go:698] Add success.
W0321 17:02:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:02:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 17:02:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:02:14.456803  543705 disk_worker.go:494] system disk:vda1
I0321 17:02:14.456841  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:02:14.457117  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:02:14.457125  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:02:14.457130  543705 custom_config.go:64] query custom config with name: gpu
E0321 17:02:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:02:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:02:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:02:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:02:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:02:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:02:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:02:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:02:23.409798  543705 memory.go:184] no items to output this cycle
I0321 17:02:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 17:02:30.518235  543705 disk_info.go:125] begin check local disk info of client
I0321 17:02:30.520761  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:02:30.520767  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035b880 0xc00035b8c0]
E0321 17:02:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:02:33.409789  543705 memory.go:184] no items to output this cycle
I0321 17:02:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 17:02:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:02:43.409790  543705 memory.go:191] Add success.
I0321 17:02:43.409806  543705 cpu.go:282] Add success.
I0321 17:02:43.419896  543705 net.go:648] Add success.
I0321 17:02:43.423146  543705 net.go:770] primary dev: ETH0
I0321 17:02:43.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:02:43.423173  543705 net.go:698] Add success.
I0321 17:02:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:02:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:02:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:02:53.409790  543705 memory.go:184] no items to output this cycle
I0321 17:02:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 17:03:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:03:03.409772  543705 memory.go:184] no items to output this cycle
I0321 17:03:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 17:03:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:03:13.409825  543705 memory.go:191] Add success.
I0321 17:03:13.409854  543705 cpu.go:282] Add success.
W0321 17:03:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:03:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:03:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:03:13.420226  543705 net.go:648] Add success.
I0321 17:03:13.423107  543705 net.go:770] primary dev: ETH0
I0321 17:03:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:03:13.423134  543705 net.go:698] Add success.
I0321 17:03:13.469196  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"58546378-d037-48ec-849d-5c24f41b97d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:03:13.469230  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:03:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:03:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:03:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 17:03:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:03:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 17:03:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:03:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:03:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:03:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:03:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:03:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:03:23.409794  543705 memory.go:184] no items to output this cycle
I0321 17:03:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 17:03:30.521247  543705 disk_info.go:125] begin check local disk info of client
I0321 17:03:30.523735  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:03:30.523742  543705 disk_info.go:196] parse disk info done, disk is : [0xc000294240 0xc000294280]
E0321 17:03:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:03:33.409794  543705 memory.go:184] no items to output this cycle
I0321 17:03:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 17:03:39.056970  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:03:39.056977  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:03:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:03:43.410628  543705 memory.go:191] Add success.
I0321 17:03:43.409814  543705 cpu.go:282] Add success.
I0321 17:03:43.420419  543705 net.go:648] Add success.
I0321 17:03:43.423208  543705 net.go:770] primary dev: ETH0
I0321 17:03:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:03:43.423234  543705 net.go:698] Add success.
I0321 17:03:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:03:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:03:53.409929  543705 cpu.go:275] no items to output this cycle
E0321 17:03:53.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:03:53.410010  543705 memory.go:184] no items to output this cycle
E0321 17:04:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:04:03.409763  543705 memory.go:184] no items to output this cycle
I0321 17:04:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 17:04:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:04:13.409793  543705 memory.go:191] Add success.
I0321 17:04:13.409794  543705 cpu.go:282] Add success.
W0321 17:04:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:04:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:04:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:04:13.420120  543705 net.go:648] Add success.
I0321 17:04:13.422696  543705 net.go:770] primary dev: ETH0
I0321 17:04:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:04:13.422723  543705 net.go:698] Add success.
I0321 17:04:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:04:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:04:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 17:04:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:04:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 17:04:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:04:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:04:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:04:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:04:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:04:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:04:23.409804  543705 memory.go:184] no items to output this cycle
I0321 17:04:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 17:04:30.524270  543705 disk_info.go:125] begin check local disk info of client
I0321 17:04:30.526788  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:04:30.526794  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e940 0xc00028e980]
E0321 17:04:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:04:33.409795  543705 memory.go:184] no items to output this cycle
I0321 17:04:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 17:04:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:04:43.409821  543705 memory.go:191] Add success.
I0321 17:04:43.409834  543705 cpu.go:282] Add success.
I0321 17:04:43.419888  543705 net.go:648] Add success.
I0321 17:04:43.422491  543705 net.go:770] primary dev: ETH0
I0321 17:04:43.422504  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:04:43.422515  543705 net.go:698] Add success.
I0321 17:04:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:04:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:04:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:04:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:04:53.409779  543705 memory.go:184] no items to output this cycle
I0321 17:04:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 17:05:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:05:03.409794  543705 memory.go:184] no items to output this cycle
I0321 17:05:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 17:05:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:05:13.409839  543705 memory.go:191] Add success.
I0321 17:05:13.409842  543705 cpu.go:282] Add success.
W0321 17:05:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:05:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:05:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:05:13.420158  543705 net.go:648] Add success.
I0321 17:05:13.423160  543705 net.go:770] primary dev: ETH0
I0321 17:05:13.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:05:13.423185  543705 net.go:698] Add success.
I0321 17:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:05:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:05:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 17:05:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:05:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 17:05:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:05:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:05:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:05:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:05:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:05:23.409787  543705 memory.go:184] no items to output this cycle
I0321 17:05:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 17:05:30.527274  543705 disk_info.go:125] begin check local disk info of client
I0321 17:05:30.529794  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:05:30.529800  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ae80 0xc00035aec0]
E0321 17:05:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:05:33.409776  543705 memory.go:184] no items to output this cycle
I0321 17:05:33.409796  543705 cpu.go:275] no items to output this cycle
E0321 17:05:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:05:43.409819  543705 memory.go:191] Add success.
I0321 17:05:43.409828  543705 cpu.go:282] Add success.
I0321 17:05:43.419718  543705 net.go:770] primary dev: ETH0
I0321 17:05:43.419731  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:05:43.419744  543705 net.go:698] Add success.
I0321 17:05:43.419988  543705 net.go:648] Add success.
I0321 17:05:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:05:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:05:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:05:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:05:53.409812  543705 memory.go:184] no items to output this cycle
I0321 17:05:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 17:06:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:06:03.409766  543705 memory.go:184] no items to output this cycle
I0321 17:06:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 17:06:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:06:13.409819  543705 memory.go:191] Add success.
I0321 17:06:13.409820  543705 cpu.go:282] Add success.
W0321 17:06:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:06:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:06:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:06:13.420265  543705 net.go:648] Add success.
I0321 17:06:13.423201  543705 net.go:770] primary dev: ETH0
I0321 17:06:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:06:13.423226  543705 net.go:698] Add success.
I0321 17:06:13.467986  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d1893b2e-361d-4a38-9a54-d57a901ff26b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:06:13.468030  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:06:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:06:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:06:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 17:06:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:06:14.456686  543705 disk_worker.go:494] system disk:vda1
I0321 17:06:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:06:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:06:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:06:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:06:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:06:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:06:23.409758  543705 memory.go:184] no items to output this cycle
I0321 17:06:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 17:06:30.530293  543705 disk_info.go:125] begin check local disk info of client
I0321 17:06:30.532772  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:06:30.532778  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc80 0xc0001abcc0]
E0321 17:06:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:06:33.409791  543705 memory.go:184] no items to output this cycle
I0321 17:06:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 17:06:39.057731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:06:39.057738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:06:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:06:43.411005  543705 memory.go:191] Add success.
I0321 17:06:43.409824  543705 cpu.go:282] Add success.
I0321 17:06:43.419713  543705 net.go:648] Add success.
I0321 17:06:43.422700  543705 net.go:770] primary dev: ETH0
I0321 17:06:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:06:43.422727  543705 net.go:698] Add success.
I0321 17:06:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:06:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:06:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:06:53.409793  543705 memory.go:184] no items to output this cycle
I0321 17:06:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 17:07:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:07:03.409783  543705 memory.go:184] no items to output this cycle
I0321 17:07:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 17:07:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:07:13.409803  543705 memory.go:191] Add success.
I0321 17:07:13.409807  543705 cpu.go:282] Add success.
W0321 17:07:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:07:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:07:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:07:13.420159  543705 net.go:648] Add success.
I0321 17:07:13.422810  543705 net.go:770] primary dev: ETH0
I0321 17:07:13.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:07:13.422835  543705 net.go:698] Add success.
I0321 17:07:13.453407  543705 event_worker.go:152] Polling the log file for events...
W0321 17:07:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:07:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 17:07:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:07:14.456947  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:07:14.456957  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:07:14.456963  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:07:14.457020  543705 disk_worker.go:494] system disk:vda1
I0321 17:07:14.457063  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:07:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:07:15.456800  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:07:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:07:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:07:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:07:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:07:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:07:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:07:23.409771  543705 memory.go:184] no items to output this cycle
I0321 17:07:23.409775  543705 cpu.go:275] no items to output this cycle
I0321 17:07:30.533301  543705 disk_info.go:125] begin check local disk info of client
I0321 17:07:30.535839  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:07:30.535845  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d180 0xc00035d1c0]
E0321 17:07:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:07:33.409772  543705 memory.go:184] no items to output this cycle
I0321 17:07:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 17:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:07:43.409784  543705 memory.go:191] Add success.
I0321 17:07:43.409810  543705 cpu.go:282] Add success.
I0321 17:07:43.419851  543705 net.go:648] Add success.
I0321 17:07:43.422483  543705 net.go:770] primary dev: ETH0
I0321 17:07:43.422496  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:07:43.422508  543705 net.go:698] Add success.
I0321 17:07:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:07:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:07:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:07:53.410323  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:07:53.410339  543705 memory.go:184] no items to output this cycle
I0321 17:07:53.410361  543705 cpu.go:275] no items to output this cycle
E0321 17:08:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:08:03.409807  543705 memory.go:184] no items to output this cycle
I0321 17:08:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 17:08:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:08:13.409822  543705 memory.go:191] Add success.
I0321 17:08:13.409832  543705 cpu.go:282] Add success.
W0321 17:08:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:08:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:08:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:08:13.420167  543705 net.go:648] Add success.
I0321 17:08:13.423201  543705 net.go:770] primary dev: ETH0
I0321 17:08:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:08:13.423227  543705 net.go:698] Add success.
I0321 17:08:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:08:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:08:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 17:08:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:08:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 17:08:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:08:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:08:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:08:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:08:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:08:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:08:23.409771  543705 memory.go:184] no items to output this cycle
I0321 17:08:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 17:08:30.536321  543705 disk_info.go:125] begin check local disk info of client
I0321 17:08:30.538807  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:08:30.538812  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d94c0 0xc0004d9500]
E0321 17:08:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:08:33.409781  543705 memory.go:184] no items to output this cycle
I0321 17:08:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 17:08:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:08:43.409793  543705 memory.go:191] Add success.
I0321 17:08:43.409821  543705 cpu.go:282] Add success.
I0321 17:08:43.419835  543705 net.go:648] Add success.
I0321 17:08:43.422452  543705 net.go:770] primary dev: ETH0
I0321 17:08:43.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:08:43.422478  543705 net.go:698] Add success.
I0321 17:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:08:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:08:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:08:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:08:53.409775  543705 memory.go:184] no items to output this cycle
I0321 17:08:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 17:09:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:09:03.409788  543705 memory.go:184] no items to output this cycle
I0321 17:09:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 17:09:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:09:13.409805  543705 memory.go:191] Add success.
I0321 17:09:13.409808  543705 cpu.go:282] Add success.
W0321 17:09:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:09:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:09:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:09:13.419976  543705 net.go:770] primary dev: ETH0
I0321 17:09:13.419992  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:09:13.420006  543705 net.go:698] Add success.
I0321 17:09:13.420391  543705 net.go:648] Add success.
I0321 17:09:13.463305  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c6be8f87-510f-4829-9f26-465580b2be6d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:09:13.463340  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:09:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:09:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:09:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 17:09:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:09:14.456621  543705 disk_worker.go:494] system disk:vda1
I0321 17:09:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:09:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:09:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:09:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:09:23.409785  543705 memory.go:184] no items to output this cycle
I0321 17:09:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 17:09:30.539343  543705 disk_info.go:125] begin check local disk info of client
I0321 17:09:30.541812  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:09:30.541819  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e840 0xc00037e880]
E0321 17:09:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:09:33.409785  543705 memory.go:184] no items to output this cycle
I0321 17:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 17:09:39.057880  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:09:39.057887  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:09:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:09:43.410752  543705 memory.go:191] Add success.
I0321 17:09:43.409798  543705 cpu.go:282] Add success.
I0321 17:09:43.420474  543705 net.go:648] Add success.
I0321 17:09:43.423311  543705 net.go:770] primary dev: ETH0
I0321 17:09:43.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:09:43.423337  543705 net.go:698] Add success.
I0321 17:09:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:09:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:09:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:09:53.409780  543705 memory.go:184] no items to output this cycle
I0321 17:09:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 17:10:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:10:03.409773  543705 memory.go:184] no items to output this cycle
I0321 17:10:03.409795  543705 cpu.go:275] no items to output this cycle
W0321 17:10:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:10:13.409747  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:10:13.409754  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:10:13.409849  543705 cpu.go:282] Add success.
E0321 17:10:13.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:10:13.409868  543705 memory.go:191] Add success.
I0321 17:10:13.420244  543705 net.go:648] Add success.
I0321 17:10:13.422928  543705 net.go:770] primary dev: ETH0
I0321 17:10:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:10:13.422956  543705 net.go:698] Add success.
I0321 17:10:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:10:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:10:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 17:10:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:10:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 17:10:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:10:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:10:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:10:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:10:23.409771  543705 memory.go:184] no items to output this cycle
I0321 17:10:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 17:10:30.542351  543705 disk_info.go:125] begin check local disk info of client
I0321 17:10:30.544858  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:10:30.544864  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5280 0xc0000c52c0]
E0321 17:10:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:10:33.409806  543705 memory.go:184] no items to output this cycle
I0321 17:10:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 17:10:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:10:43.409790  543705 memory.go:191] Add success.
I0321 17:10:43.409807  543705 cpu.go:282] Add success.
I0321 17:10:43.419875  543705 net.go:648] Add success.
I0321 17:10:43.422791  543705 net.go:770] primary dev: ETH0
I0321 17:10:43.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:10:43.422817  543705 net.go:698] Add success.
I0321 17:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:10:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:10:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:10:53.409804  543705 memory.go:184] no items to output this cycle
I0321 17:10:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 17:11:03.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:11:03.409900  543705 cpu.go:275] no items to output this cycle
I0321 17:11:03.409913  543705 memory.go:184] no items to output this cycle
E0321 17:11:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:11:13.409799  543705 memory.go:191] Add success.
I0321 17:11:13.409817  543705 cpu.go:282] Add success.
W0321 17:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:11:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:11:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:11:13.420172  543705 net.go:648] Add success.
I0321 17:11:13.422722  543705 net.go:770] primary dev: ETH0
I0321 17:11:13.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:11:13.422747  543705 net.go:698] Add success.
I0321 17:11:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:11:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:11:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 17:11:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:11:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 17:11:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:11:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:11:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:11:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:11:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:11:23.409784  543705 memory.go:184] no items to output this cycle
I0321 17:11:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 17:11:30.545357  543705 disk_info.go:125] begin check local disk info of client
I0321 17:11:30.547836  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:11:30.547843  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f5c0 0xc00037f600]
E0321 17:11:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:11:33.409801  543705 memory.go:184] no items to output this cycle
I0321 17:11:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 17:11:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:11:43.409818  543705 memory.go:191] Add success.
I0321 17:11:43.409827  543705 cpu.go:282] Add success.
I0321 17:11:43.419966  543705 net.go:648] Add success.
I0321 17:11:43.422662  543705 net.go:770] primary dev: ETH0
I0321 17:11:43.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:11:43.422696  543705 net.go:698] Add success.
I0321 17:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:11:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:11:53.410440  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:11:53.410458  543705 memory.go:184] no items to output this cycle
I0321 17:11:53.410503  543705 cpu.go:275] no items to output this cycle
E0321 17:12:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:12:03.409801  543705 memory.go:184] no items to output this cycle
I0321 17:12:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 17:12:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:12:13.409793  543705 memory.go:191] Add success.
I0321 17:12:13.409814  543705 cpu.go:282] Add success.
W0321 17:12:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:12:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:12:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:12:13.420145  543705 net.go:648] Add success.
I0321 17:12:13.422991  543705 net.go:770] primary dev: ETH0
I0321 17:12:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:12:13.423015  543705 net.go:698] Add success.
I0321 17:12:13.464109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"526f250a-9e94-4090-b402-caac1b2eabe0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:12:13.464143  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 17:12:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:12:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 17:12:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:12:14.456019  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:12:14.456027  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:12:14.456032  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:12:14.456468  543705 disk_worker.go:494] system disk:vda1
I0321 17:12:14.456498  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:12:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:12:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:12:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:12:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:12:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:12:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:12:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:12:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:12:23.409798  543705 memory.go:184] no items to output this cycle
I0321 17:12:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 17:12:30.548389  543705 disk_info.go:125] begin check local disk info of client
I0321 17:12:30.550834  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:12:30.550842  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486ec0 0xc000486f00]
E0321 17:12:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:12:33.409799  543705 memory.go:184] no items to output this cycle
I0321 17:12:33.409816  543705 cpu.go:275] no items to output this cycle
I0321 17:12:39.061012  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:12:39.061018  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:12:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:12:43.410778  543705 memory.go:191] Add success.
I0321 17:12:43.409823  543705 cpu.go:282] Add success.
I0321 17:12:43.420704  543705 net.go:648] Add success.
I0321 17:12:43.423380  543705 net.go:770] primary dev: ETH0
I0321 17:12:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:12:43.423405  543705 net.go:698] Add success.
I0321 17:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:12:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:12:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:12:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:12:53.409802  543705 memory.go:184] no items to output this cycle
I0321 17:12:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 17:13:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:13:03.409800  543705 memory.go:184] no items to output this cycle
I0321 17:13:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 17:13:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:13:13.409827  543705 memory.go:191] Add success.
I0321 17:13:13.409832  543705 cpu.go:282] Add success.
W0321 17:13:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:13:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:13:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:13:13.420184  543705 net.go:648] Add success.
I0321 17:13:13.423329  543705 net.go:770] primary dev: ETH0
I0321 17:13:13.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:13:13.423358  543705 net.go:698] Add success.
I0321 17:13:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:13:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:13:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 17:13:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:13:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 17:13:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:13:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:13:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:13:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:13:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:13:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:13:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:13:23.409766  543705 memory.go:184] no items to output this cycle
I0321 17:13:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 17:13:30.551398  543705 disk_info.go:125] begin check local disk info of client
I0321 17:13:30.553844  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:13:30.553851  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536d80 0xc000536dc0]
E0321 17:13:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:13:33.409798  543705 memory.go:184] no items to output this cycle
I0321 17:13:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 17:13:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:13:43.409812  543705 memory.go:191] Add success.
I0321 17:13:43.409814  543705 cpu.go:282] Add success.
I0321 17:13:43.420182  543705 net.go:648] Add success.
I0321 17:13:43.422927  543705 net.go:770] primary dev: ETH0
I0321 17:13:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:13:43.422952  543705 net.go:698] Add success.
I0321 17:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:13:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:13:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:13:53.409770  543705 memory.go:184] no items to output this cycle
I0321 17:13:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 17:14:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:14:03.409772  543705 memory.go:184] no items to output this cycle
I0321 17:14:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 17:14:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:14:13.409803  543705 memory.go:191] Add success.
I0321 17:14:13.409803  543705 cpu.go:282] Add success.
W0321 17:14:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:14:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:14:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:14:13.420161  543705 net.go:648] Add success.
I0321 17:14:13.423156  543705 net.go:770] primary dev: ETH0
I0321 17:14:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:14:13.423186  543705 net.go:698] Add success.
I0321 17:14:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:14:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:14:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0321 17:14:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:14:14.456596  543705 disk_worker.go:494] system disk:vda1
I0321 17:14:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:14:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:14:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:14:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:14:23.410398  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:14:23.410416  543705 memory.go:184] no items to output this cycle
I0321 17:14:23.410443  543705 cpu.go:275] no items to output this cycle
I0321 17:14:30.554415  543705 disk_info.go:125] begin check local disk info of client
I0321 17:14:30.556877  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:14:30.556884  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9740 0xc0003c9780]
E0321 17:14:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:14:33.409797  543705 memory.go:184] no items to output this cycle
I0321 17:14:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 17:14:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:14:43.409775  543705 memory.go:191] Add success.
I0321 17:14:43.409806  543705 cpu.go:282] Add success.
I0321 17:14:43.419745  543705 net.go:648] Add success.
I0321 17:14:43.422811  543705 net.go:770] primary dev: ETH0
I0321 17:14:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:14:43.422839  543705 net.go:698] Add success.
I0321 17:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:14:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:14:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:14:53.409778  543705 memory.go:184] no items to output this cycle
I0321 17:14:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 17:15:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:15:03.409784  543705 memory.go:184] no items to output this cycle
I0321 17:15:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 17:15:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:15:13.409794  543705 memory.go:191] Add success.
I0321 17:15:13.409810  543705 cpu.go:282] Add success.
W0321 17:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:15:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:15:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:15:13.420124  543705 net.go:648] Add success.
I0321 17:15:13.422931  543705 net.go:770] primary dev: ETH0
I0321 17:15:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:15:13.422957  543705 net.go:698] Add success.
I0321 17:15:13.468371  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3d97f7c7-7a46-4763-9060-563d0f087e49","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:15:13.468404  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:15:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:15:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:15:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 17:15:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:15:14.456524  543705 disk_worker.go:494] system disk:vda1
I0321 17:15:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:15:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:15:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:15:23.410368  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:15:23.410383  543705 cpu.go:275] no items to output this cycle
I0321 17:15:23.410384  543705 memory.go:184] no items to output this cycle
I0321 17:15:30.557429  543705 disk_info.go:125] begin check local disk info of client
I0321 17:15:30.559927  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:15:30.559933  543705 disk_info.go:196] parse disk info done, disk is : [0xc000313780 0xc0003137c0]
E0321 17:15:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:15:33.409790  543705 memory.go:184] no items to output this cycle
I0321 17:15:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 17:15:39.061728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:15:39.061733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:15:43.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:15:43.410763  543705 memory.go:191] Add success.
I0321 17:15:43.409973  543705 cpu.go:282] Add success.
I0321 17:15:43.419726  543705 net.go:648] Add success.
I0321 17:15:43.422355  543705 net.go:770] primary dev: ETH0
I0321 17:15:43.422369  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:15:43.422382  543705 net.go:698] Add success.
I0321 17:15:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:15:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:15:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:15:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:15:53.409781  543705 memory.go:184] no items to output this cycle
I0321 17:15:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 17:16:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:16:03.409800  543705 memory.go:184] no items to output this cycle
I0321 17:16:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 17:16:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:16:13.409789  543705 memory.go:191] Add success.
W0321 17:16:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 17:16:13.409820  543705 cpu.go:282] Add success.
W0321 17:16:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:16:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:16:13.420163  543705 net.go:648] Add success.
I0321 17:16:13.423041  543705 net.go:770] primary dev: ETH0
I0321 17:16:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:16:13.423065  543705 net.go:698] Add success.
I0321 17:16:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:16:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:16:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 17:16:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:16:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 17:16:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:16:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:16:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:16:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:16:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:16:23.409799  543705 memory.go:184] no items to output this cycle
I0321 17:16:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 17:16:30.560446  543705 disk_info.go:125] begin check local disk info of client
I0321 17:16:30.563025  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:16:30.563032  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2900 0xc0003e2940]
E0321 17:16:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:16:33.409801  543705 memory.go:184] no items to output this cycle
I0321 17:16:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 17:16:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:16:43.409799  543705 memory.go:191] Add success.
I0321 17:16:43.409820  543705 cpu.go:282] Add success.
I0321 17:16:43.420209  543705 net.go:648] Add success.
I0321 17:16:43.422883  543705 net.go:770] primary dev: ETH0
I0321 17:16:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:16:43.422916  543705 net.go:698] Add success.
I0321 17:16:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:16:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:16:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:16:53.409785  543705 memory.go:184] no items to output this cycle
I0321 17:16:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 17:17:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:17:03.409766  543705 memory.go:184] no items to output this cycle
I0321 17:17:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 17:17:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:17:13.409803  543705 memory.go:191] Add success.
I0321 17:17:13.409804  543705 cpu.go:282] Add success.
W0321 17:17:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:17:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:17:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:17:13.420483  543705 net.go:648] Add success.
I0321 17:17:13.423079  543705 net.go:770] primary dev: ETH0
I0321 17:17:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:17:13.423106  543705 net.go:698] Add success.
I0321 17:17:13.453675  543705 event_worker.go:152] Polling the log file for events...
W0321 17:17:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:17:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 17:17:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:17:14.455918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:17:14.455927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:17:14.455933  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:17:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 17:17:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:17:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:17:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 17:17:16.457405  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:17:16.458458  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:17:16.458510  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:17:16.458527  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:17:16.472865  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:17:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:17:23.409796  543705 memory.go:184] no items to output this cycle
I0321 17:17:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 17:17:30.563449  543705 disk_info.go:125] begin check local disk info of client
I0321 17:17:30.565954  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:17:30.565960  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380a00 0xc000380a40]
E0321 17:17:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:17:33.409787  543705 memory.go:184] no items to output this cycle
I0321 17:17:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 17:17:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:17:43.409798  543705 memory.go:191] Add success.
I0321 17:17:43.409798  543705 cpu.go:282] Add success.
I0321 17:17:43.419876  543705 net.go:648] Add success.
I0321 17:17:43.422510  543705 net.go:770] primary dev: ETH0
I0321 17:17:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:17:43.422534  543705 net.go:698] Add success.
I0321 17:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:17:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:17:53.409804  543705 memory.go:184] no items to output this cycle
I0321 17:17:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 17:18:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:18:03.409775  543705 memory.go:184] no items to output this cycle
I0321 17:18:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 17:18:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:18:13.409831  543705 memory.go:191] Add success.
I0321 17:18:13.409840  543705 cpu.go:282] Add success.
W0321 17:18:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:18:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:18:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:18:13.420168  543705 net.go:648] Add success.
I0321 17:18:13.422663  543705 net.go:770] primary dev: ETH0
I0321 17:18:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:18:13.422698  543705 net.go:698] Add success.
I0321 17:18:13.467850  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7636c8d-50f6-4c8f-a159-43e2ae76cd3d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:18:13.467884  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:18:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:18:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:18:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 17:18:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:18:14.456541  543705 disk_worker.go:494] system disk:vda1
I0321 17:18:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:18:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:18:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:18:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:18:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:18:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:18:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:18:23.409777  543705 memory.go:184] no items to output this cycle
I0321 17:18:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 17:18:30.566471  543705 disk_info.go:125] begin check local disk info of client
I0321 17:18:30.568986  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:18:30.568992  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d200 0xc00035d240]
E0321 17:18:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:18:33.409803  543705 memory.go:184] no items to output this cycle
I0321 17:18:33.409819  543705 cpu.go:275] no items to output this cycle
I0321 17:18:39.065014  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:18:39.065021  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:18:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:18:43.410706  543705 memory.go:191] Add success.
I0321 17:18:43.409804  543705 cpu.go:282] Add success.
I0321 17:18:43.420427  543705 net.go:648] Add success.
I0321 17:18:43.423166  543705 net.go:770] primary dev: ETH0
I0321 17:18:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:18:43.423193  543705 net.go:698] Add success.
I0321 17:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:18:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:18:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:18:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:18:53.409778  543705 memory.go:184] no items to output this cycle
I0321 17:18:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 17:19:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:19:03.409782  543705 memory.go:184] no items to output this cycle
I0321 17:19:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 17:19:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:19:13.409785  543705 memory.go:191] Add success.
I0321 17:19:13.409807  543705 cpu.go:282] Add success.
W0321 17:19:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:19:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:19:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:19:13.420162  543705 net.go:648] Add success.
I0321 17:19:13.423666  543705 net.go:770] primary dev: ETH0
I0321 17:19:13.423680  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:19:13.423695  543705 net.go:698] Add success.
I0321 17:19:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:19:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:19:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 17:19:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:19:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 17:19:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:19:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:19:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:19:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:19:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:19:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:19:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 17:19:23.409792  543705 memory.go:184] no items to output this cycle
I0321 17:19:30.569481  543705 disk_info.go:125] begin check local disk info of client
I0321 17:19:30.572033  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:19:30.572038  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae500 0xc0004ae540]
E0321 17:19:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:19:33.409768  543705 memory.go:184] no items to output this cycle
I0321 17:19:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 17:19:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:19:43.409783  543705 memory.go:191] Add success.
I0321 17:19:43.409801  543705 cpu.go:282] Add success.
I0321 17:19:43.419949  543705 net.go:648] Add success.
I0321 17:19:43.422632  543705 net.go:770] primary dev: ETH0
I0321 17:19:43.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:19:43.422658  543705 net.go:698] Add success.
I0321 17:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:19:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:19:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:19:53.409796  543705 memory.go:184] no items to output this cycle
I0321 17:19:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 17:20:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:20:03.409774  543705 memory.go:184] no items to output this cycle
I0321 17:20:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 17:20:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:20:13.409819  543705 memory.go:191] Add success.
I0321 17:20:13.409823  543705 cpu.go:282] Add success.
W0321 17:20:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:20:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:20:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:20:13.420118  543705 net.go:648] Add success.
I0321 17:20:13.423227  543705 net.go:770] primary dev: ETH0
I0321 17:20:13.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:20:13.423467  543705 net.go:698] Add success.
I0321 17:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:20:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:20:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0321 17:20:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:20:14.456509  543705 disk_worker.go:494] system disk:vda1
I0321 17:20:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:20:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:20:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:20:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:20:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:20:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:20:23.409776  543705 memory.go:184] no items to output this cycle
I0321 17:20:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 17:20:30.572493  543705 disk_info.go:125] begin check local disk info of client
I0321 17:20:30.575005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:20:30.575011  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a80 0xc0000c4ac0]
E0321 17:20:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:20:33.409792  543705 memory.go:184] no items to output this cycle
I0321 17:20:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 17:20:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:20:43.409811  543705 memory.go:191] Add success.
I0321 17:20:43.409823  543705 cpu.go:282] Add success.
I0321 17:20:43.419876  543705 net.go:648] Add success.
I0321 17:20:43.422690  543705 net.go:770] primary dev: ETH0
I0321 17:20:43.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:20:43.422716  543705 net.go:698] Add success.
I0321 17:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:20:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:20:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:20:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:20:53.409770  543705 memory.go:184] no items to output this cycle
I0321 17:20:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 17:21:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:21:03.409783  543705 memory.go:184] no items to output this cycle
I0321 17:21:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 17:21:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:21:13.409820  543705 memory.go:191] Add success.
I0321 17:21:13.409827  543705 cpu.go:282] Add success.
W0321 17:21:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:21:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:21:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:21:13.419761  543705 net.go:648] Add success.
I0321 17:21:13.422892  543705 net.go:770] primary dev: ETH0
I0321 17:21:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:21:13.422920  543705 net.go:698] Add success.
I0321 17:21:13.468104  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"66aa36dd-94f7-442d-b250-5a31f872cc5d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:21:13.468137  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:21:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:21:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:21:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 17:21:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:21:14.456615  543705 disk_worker.go:494] system disk:vda1
I0321 17:21:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:21:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:21:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:21:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:21:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:21:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:21:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 17:21:23.409784  543705 memory.go:184] no items to output this cycle
I0321 17:21:30.575517  543705 disk_info.go:125] begin check local disk info of client
I0321 17:21:30.578081  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:21:30.578088  543705 disk_info.go:196] parse disk info done, disk is : [0xc000537580 0xc0005375c0]
E0321 17:21:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:21:33.409764  543705 memory.go:184] no items to output this cycle
I0321 17:21:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 17:21:39.065735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:21:39.065742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:21:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:21:43.410829  543705 memory.go:191] Add success.
I0321 17:21:43.409816  543705 cpu.go:282] Add success.
I0321 17:21:43.420512  543705 net.go:648] Add success.
I0321 17:21:43.423627  543705 net.go:770] primary dev: ETH0
I0321 17:21:43.423641  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:21:43.423656  543705 net.go:698] Add success.
I0321 17:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:21:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:21:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:21:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:21:53.409777  543705 memory.go:184] no items to output this cycle
I0321 17:21:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 17:22:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:22:03.409806  543705 memory.go:184] no items to output this cycle
I0321 17:22:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 17:22:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:22:13.409788  543705 memory.go:191] Add success.
I0321 17:22:13.409804  543705 cpu.go:282] Add success.
W0321 17:22:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:22:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:22:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:22:13.420164  543705 net.go:648] Add success.
I0321 17:22:13.423782  543705 net.go:770] primary dev: ETH0
I0321 17:22:13.423797  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:22:13.423811  543705 net.go:698] Add success.
W0321 17:22:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:22:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 17:22:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:22:14.455959  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:22:14.455968  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:22:14.455974  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:22:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 17:22:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:22:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:22:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:22:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:22:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:22:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:22:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:22:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:22:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:22:23.409821  543705 memory.go:184] no items to output this cycle
I0321 17:22:23.409825  543705 cpu.go:275] no items to output this cycle
I0321 17:22:30.578536  543705 disk_info.go:125] begin check local disk info of client
I0321 17:22:30.581062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:22:30.581069  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536c40 0xc000536c80]
E0321 17:22:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:22:33.409774  543705 memory.go:184] no items to output this cycle
I0321 17:22:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 17:22:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:22:43.409798  543705 cpu.go:282] Add success.
I0321 17:22:43.409802  543705 memory.go:191] Add success.
I0321 17:22:43.419889  543705 net.go:648] Add success.
I0321 17:22:43.423264  543705 net.go:770] primary dev: ETH0
I0321 17:22:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:22:43.423289  543705 net.go:698] Add success.
I0321 17:22:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:22:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:22:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:22:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:22:53.409771  543705 memory.go:184] no items to output this cycle
I0321 17:22:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 17:23:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:23:03.409783  543705 memory.go:184] no items to output this cycle
I0321 17:23:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 17:23:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:23:13.409827  543705 memory.go:191] Add success.
I0321 17:23:13.409833  543705 cpu.go:282] Add success.
W0321 17:23:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:23:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:23:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:23:13.420097  543705 net.go:648] Add success.
I0321 17:23:13.423050  543705 net.go:770] primary dev: ETH0
I0321 17:23:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:23:13.423080  543705 net.go:698] Add success.
I0321 17:23:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:23:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:23:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 17:23:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:23:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 17:23:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:23:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:23:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:23:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:23:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:23:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:23:23.409786  543705 memory.go:184] no items to output this cycle
I0321 17:23:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 17:23:30.581539  543705 disk_info.go:125] begin check local disk info of client
I0321 17:23:30.584102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:23:30.584109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa840 0xc0002aa880]
E0321 17:23:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:23:33.409785  543705 memory.go:184] no items to output this cycle
I0321 17:23:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 17:23:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:23:43.409820  543705 memory.go:191] Add success.
I0321 17:23:43.409828  543705 cpu.go:282] Add success.
I0321 17:23:43.419959  543705 net.go:648] Add success.
I0321 17:23:43.423173  543705 net.go:770] primary dev: ETH0
I0321 17:23:43.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:23:43.423198  543705 net.go:698] Add success.
I0321 17:23:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:23:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:23:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:23:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:23:53.409786  543705 cpu.go:275] no items to output this cycle
I0321 17:23:53.409795  543705 memory.go:184] no items to output this cycle
E0321 17:24:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:24:03.409820  543705 memory.go:184] no items to output this cycle
I0321 17:24:03.409833  543705 cpu.go:275] no items to output this cycle
E0321 17:24:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:24:13.409830  543705 memory.go:191] Add success.
I0321 17:24:13.409834  543705 cpu.go:282] Add success.
W0321 17:24:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:24:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:24:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:24:13.420198  543705 net.go:648] Add success.
I0321 17:24:13.422775  543705 net.go:770] primary dev: ETH0
I0321 17:24:13.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:24:13.422810  543705 net.go:698] Add success.
I0321 17:24:13.463872  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f316f0c-278b-4204-8ec9-05f86218dbbd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:24:13.463906  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:24:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:24:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:24:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 17:24:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:24:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 17:24:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:24:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:24:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:24:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:24:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:24:23.409811  543705 memory.go:184] no items to output this cycle
I0321 17:24:23.409833  543705 cpu.go:275] no items to output this cycle
I0321 17:24:30.584568  543705 disk_info.go:125] begin check local disk info of client
I0321 17:24:30.587152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:24:30.587160  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abec0 0xc0001abf00]
E0321 17:24:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:24:33.409765  543705 memory.go:184] no items to output this cycle
I0321 17:24:33.409816  543705 cpu.go:275] no items to output this cycle
I0321 17:24:39.069029  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:24:39.069036  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:24:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:24:43.410648  543705 memory.go:191] Add success.
I0321 17:24:43.409787  543705 cpu.go:282] Add success.
I0321 17:24:43.420345  543705 net.go:648] Add success.
I0321 17:24:43.423091  543705 net.go:770] primary dev: ETH0
I0321 17:24:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:24:43.423116  543705 net.go:698] Add success.
I0321 17:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:24:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:24:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:24:53.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:24:53.409894  543705 memory.go:184] no items to output this cycle
I0321 17:24:53.409926  543705 cpu.go:275] no items to output this cycle
E0321 17:25:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:25:03.409797  543705 memory.go:184] no items to output this cycle
I0321 17:25:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 17:25:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:25:13.409793  543705 memory.go:191] Add success.
I0321 17:25:13.409810  543705 cpu.go:282] Add success.
W0321 17:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:25:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:25:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:25:13.420315  543705 net.go:648] Add success.
I0321 17:25:13.422917  543705 net.go:770] primary dev: ETH0
I0321 17:25:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:25:13.422943  543705 net.go:698] Add success.
I0321 17:25:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:25:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:25:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 17:25:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:25:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 17:25:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:25:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:25:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:25:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:25:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:25:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:25:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:25:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 17:25:23.409791  543705 memory.go:184] no items to output this cycle
I0321 17:25:30.587580  543705 disk_info.go:125] begin check local disk info of client
I0321 17:25:30.590143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:25:30.590149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
E0321 17:25:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:25:33.409795  543705 memory.go:184] no items to output this cycle
I0321 17:25:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 17:25:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:25:43.409785  543705 memory.go:191] Add success.
I0321 17:25:43.409786  543705 cpu.go:282] Add success.
I0321 17:25:43.419993  543705 net.go:648] Add success.
I0321 17:25:43.422721  543705 net.go:770] primary dev: ETH0
I0321 17:25:43.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:25:43.422750  543705 net.go:698] Add success.
I0321 17:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:25:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:25:53.409770  543705 memory.go:184] no items to output this cycle
I0321 17:25:53.409902  543705 cpu.go:275] no items to output this cycle
E0321 17:26:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:26:03.409810  543705 memory.go:184] no items to output this cycle
I0321 17:26:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 17:26:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:26:13.409800  543705 memory.go:191] Add success.
I0321 17:26:13.409805  543705 cpu.go:282] Add success.
W0321 17:26:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:26:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:26:13.420105  543705 net.go:648] Add success.
I0321 17:26:13.422782  543705 net.go:770] primary dev: ETH0
I0321 17:26:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:26:13.422807  543705 net.go:698] Add success.
I0321 17:26:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:26:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:26:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 17:26:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:26:14.456608  543705 disk_worker.go:494] system disk:vda1
I0321 17:26:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:26:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:26:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:26:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:26:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:26:23.409804  543705 memory.go:184] no items to output this cycle
I0321 17:26:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 17:26:30.590594  543705 disk_info.go:125] begin check local disk info of client
I0321 17:26:30.593120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:26:30.593127  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046d940 0xc00046d980]
E0321 17:26:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:26:33.409790  543705 memory.go:184] no items to output this cycle
I0321 17:26:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 17:26:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:26:43.409814  543705 memory.go:191] Add success.
I0321 17:26:43.409818  543705 cpu.go:282] Add success.
I0321 17:26:43.419893  543705 net.go:648] Add success.
I0321 17:26:43.422592  543705 net.go:770] primary dev: ETH0
I0321 17:26:43.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:26:43.422618  543705 net.go:698] Add success.
I0321 17:26:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:26:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:26:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:26:53.410623  543705 cpu.go:275] no items to output this cycle
E0321 17:26:53.410592  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:26:53.410707  543705 memory.go:184] no items to output this cycle
E0321 17:27:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:27:03.409799  543705 memory.go:184] no items to output this cycle
I0321 17:27:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 17:27:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:27:13.409794  543705 memory.go:191] Add success.
I0321 17:27:13.409811  543705 cpu.go:282] Add success.
W0321 17:27:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:27:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:27:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:27:13.420149  543705 net.go:648] Add success.
I0321 17:27:13.422811  543705 net.go:770] primary dev: ETH0
I0321 17:27:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:27:13.422837  543705 net.go:698] Add success.
I0321 17:27:13.429128  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 17:27:13.453299  543705 event_worker.go:152] Polling the log file for events...
I0321 17:27:13.463845  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"48da834e-6646-443e-a966-f1f8551bcc90","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:27:13.463881  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 17:27:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:27:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0321 17:27:14.455220  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:27:14.455928  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:27:14.455937  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:27:14.455943  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:27:14.456809  543705 disk_worker.go:494] system disk:vda1
I0321 17:27:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:27:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:27:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:27:16.458040  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:27:16.458040  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:27:16.458109  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:27:16.458133  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:27:16.472523  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:27:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:27:23.409795  543705 memory.go:184] no items to output this cycle
I0321 17:27:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 17:27:30.593609  543705 disk_info.go:125] begin check local disk info of client
I0321 17:27:30.596165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:27:30.596172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000392d40 0xc000392d80]
E0321 17:27:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:27:33.409785  543705 memory.go:184] no items to output this cycle
I0321 17:27:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 17:27:39.069738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:27:39.069744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:27:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:27:43.410641  543705 memory.go:191] Add success.
I0321 17:27:43.409817  543705 cpu.go:282] Add success.
I0321 17:27:43.420436  543705 net.go:648] Add success.
I0321 17:27:43.423247  543705 net.go:770] primary dev: ETH0
I0321 17:27:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:27:43.423284  543705 net.go:698] Add success.
I0321 17:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:27:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:27:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:27:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 17:27:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:27:53.409810  543705 memory.go:184] no items to output this cycle
E0321 17:28:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:28:03.409769  543705 memory.go:184] no items to output this cycle
I0321 17:28:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 17:28:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:28:13.409800  543705 memory.go:191] Add success.
I0321 17:28:13.409824  543705 cpu.go:282] Add success.
W0321 17:28:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:28:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:28:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:28:13.420364  543705 net.go:648] Add success.
I0321 17:28:13.422860  543705 net.go:770] primary dev: ETH0
I0321 17:28:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:28:13.422891  543705 net.go:698] Add success.
I0321 17:28:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:28:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:28:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 17:28:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:28:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 17:28:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:28:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:28:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:28:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:28:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:28:23.409777  543705 memory.go:184] no items to output this cycle
I0321 17:28:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 17:28:30.596617  543705 disk_info.go:125] begin check local disk info of client
I0321 17:28:30.599226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:28:30.599232  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b7c80 0xc0002b7cc0]
E0321 17:28:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:28:33.409770  543705 memory.go:184] no items to output this cycle
I0321 17:28:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 17:28:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:28:43.409784  543705 memory.go:191] Add success.
I0321 17:28:43.409803  543705 cpu.go:282] Add success.
I0321 17:28:43.419858  543705 net.go:648] Add success.
I0321 17:28:43.423044  543705 net.go:770] primary dev: ETH0
I0321 17:28:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:28:43.423074  543705 net.go:698] Add success.
I0321 17:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:28:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:28:53.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:28:53.409886  543705 cpu.go:275] no items to output this cycle
I0321 17:28:53.409897  543705 memory.go:184] no items to output this cycle
E0321 17:29:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:29:03.409766  543705 memory.go:184] no items to output this cycle
I0321 17:29:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 17:29:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:29:13.409820  543705 memory.go:191] Add success.
I0321 17:29:13.409825  543705 cpu.go:282] Add success.
W0321 17:29:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:29:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:29:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:29:13.420113  543705 net.go:648] Add success.
I0321 17:29:13.422859  543705 net.go:770] primary dev: ETH0
I0321 17:29:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:29:13.422885  543705 net.go:698] Add success.
I0321 17:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:29:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:29:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 17:29:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:29:14.456626  543705 disk_worker.go:494] system disk:vda1
I0321 17:29:14.456658  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:29:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:29:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:29:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:29:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:29:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:29:23.409781  543705 memory.go:184] no items to output this cycle
I0321 17:29:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 17:29:30.599630  543705 disk_info.go:125] begin check local disk info of client
I0321 17:29:30.602117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:29:30.602124  543705 disk_info.go:196] parse disk info done, disk is : [0xc000537980 0xc0005379c0]
E0321 17:29:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:29:33.409791  543705 memory.go:184] no items to output this cycle
I0321 17:29:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 17:29:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:29:43.409810  543705 memory.go:191] Add success.
I0321 17:29:43.409818  543705 cpu.go:282] Add success.
I0321 17:29:43.419853  543705 net.go:648] Add success.
I0321 17:29:43.422568  543705 net.go:770] primary dev: ETH0
I0321 17:29:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:29:43.422595  543705 net.go:698] Add success.
I0321 17:29:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:29:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:29:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:29:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:29:53.409793  543705 memory.go:184] no items to output this cycle
I0321 17:29:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 17:30:03.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:30:03.409906  543705 cpu.go:275] no items to output this cycle
I0321 17:30:03.409913  543705 memory.go:184] no items to output this cycle
E0321 17:30:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:30:13.409825  543705 memory.go:191] Add success.
I0321 17:30:13.409827  543705 cpu.go:282] Add success.
W0321 17:30:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:30:13.412994  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:30:13.413000  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:30:13.420693  543705 net.go:648] Add success.
I0321 17:30:13.422790  543705 net.go:770] primary dev: ETH0
I0321 17:30:13.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:30:13.422820  543705 net.go:698] Add success.
I0321 17:30:13.464112  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6ed9a6ef-38c5-46c9-b19e-0eea47a3acb4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:30:13.464146  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:30:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:30:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:30:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 17:30:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:30:14.456493  543705 disk_worker.go:494] system disk:vda1
I0321 17:30:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:30:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:30:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:30:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:30:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:30:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:30:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 17:30:23.409785  543705 memory.go:184] no items to output this cycle
I0321 17:30:30.602648  543705 disk_info.go:125] begin check local disk info of client
I0321 17:30:30.605150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:30:30.605156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046d540 0xc00046d580]
E0321 17:30:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:30:33.409794  543705 memory.go:184] no items to output this cycle
I0321 17:30:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 17:30:39.069883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:30:39.069890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:30:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:30:43.410619  543705 memory.go:191] Add success.
I0321 17:30:43.409825  543705 cpu.go:282] Add success.
I0321 17:30:43.420288  543705 net.go:648] Add success.
I0321 17:30:43.422958  543705 net.go:770] primary dev: ETH0
I0321 17:30:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:30:43.422984  543705 net.go:698] Add success.
I0321 17:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:30:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:30:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:30:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:30:53.409797  543705 memory.go:184] no items to output this cycle
I0321 17:30:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 17:31:03.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:31:03.409872  543705 memory.go:184] no items to output this cycle
I0321 17:31:03.409974  543705 cpu.go:275] no items to output this cycle
E0321 17:31:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:31:13.409830  543705 memory.go:191] Add success.
I0321 17:31:13.409837  543705 cpu.go:282] Add success.
W0321 17:31:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:31:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:31:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:31:13.420137  543705 net.go:648] Add success.
I0321 17:31:13.422884  543705 net.go:770] primary dev: ETH0
I0321 17:31:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:31:13.422912  543705 net.go:698] Add success.
I0321 17:31:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:31:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:31:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 17:31:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:31:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 17:31:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:31:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:31:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:31:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:31:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:31:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:31:23.409771  543705 memory.go:184] no items to output this cycle
I0321 17:31:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 17:31:30.605673  543705 disk_info.go:125] begin check local disk info of client
I0321 17:31:30.608168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:31:30.608174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c380 0xc00057c3c0]
E0321 17:31:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:31:33.409773  543705 memory.go:184] no items to output this cycle
I0321 17:31:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 17:31:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:31:43.409793  543705 memory.go:191] Add success.
I0321 17:31:43.409794  543705 cpu.go:282] Add success.
I0321 17:31:43.419945  543705 net.go:648] Add success.
I0321 17:31:43.423022  543705 net.go:770] primary dev: ETH0
I0321 17:31:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:31:43.423048  543705 net.go:698] Add success.
I0321 17:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:31:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:31:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:31:53.409775  543705 memory.go:184] no items to output this cycle
I0321 17:31:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 17:32:03.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:32:03.409885  543705 cpu.go:275] no items to output this cycle
I0321 17:32:03.409898  543705 memory.go:184] no items to output this cycle
E0321 17:32:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:32:13.409787  543705 memory.go:191] Add success.
W0321 17:32:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 17:32:13.409815  543705 cpu.go:282] Add success.
W0321 17:32:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:32:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:32:13.420265  543705 net.go:648] Add success.
I0321 17:32:13.422952  543705 net.go:770] primary dev: ETH0
I0321 17:32:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:32:13.422977  543705 net.go:698] Add success.
W0321 17:32:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:32:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 17:32:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:32:14.456925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:32:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:32:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:32:14.457015  543705 disk_worker.go:494] system disk:vda1
I0321 17:32:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:32:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:32:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:32:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:32:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:32:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:32:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:32:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:32:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:32:23.409768  543705 memory.go:184] no items to output this cycle
I0321 17:32:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 17:32:30.608678  543705 disk_info.go:125] begin check local disk info of client
I0321 17:32:30.611174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:32:30.611181  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296940 0xc000296980]
E0321 17:32:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:32:33.409797  543705 memory.go:184] no items to output this cycle
I0321 17:32:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 17:32:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:32:43.409786  543705 memory.go:191] Add success.
I0321 17:32:43.409811  543705 cpu.go:282] Add success.
I0321 17:32:43.419881  543705 net.go:648] Add success.
I0321 17:32:43.422663  543705 net.go:770] primary dev: ETH0
I0321 17:32:43.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:32:43.422686  543705 net.go:698] Add success.
I0321 17:32:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:32:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:32:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:32:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 17:32:53.409780  543705 memory.go:184] no items to output this cycle
E0321 17:33:03.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:33:03.409861  543705 memory.go:184] no items to output this cycle
I0321 17:33:03.409935  543705 cpu.go:275] no items to output this cycle
E0321 17:33:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:33:13.409800  543705 memory.go:191] Add success.
I0321 17:33:13.409804  543705 cpu.go:282] Add success.
W0321 17:33:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:33:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:33:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:33:13.420137  543705 net.go:648] Add success.
I0321 17:33:13.423068  543705 net.go:770] primary dev: ETH0
I0321 17:33:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:33:13.423094  543705 net.go:698] Add success.
I0321 17:33:13.469371  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a2f8694-004d-4cfe-aaca-57d982ecc7e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:33:13.469410  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:33:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:33:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:33:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 17:33:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:33:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 17:33:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:33:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:33:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:33:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:33:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:33:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:33:23.409763  543705 memory.go:184] no items to output this cycle
I0321 17:33:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 17:33:30.611694  543705 disk_info.go:125] begin check local disk info of client
I0321 17:33:30.614231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:33:30.614237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aadc0 0xc0002aae00]
E0321 17:33:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:33:33.409790  543705 memory.go:184] no items to output this cycle
I0321 17:33:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 17:33:39.073052  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:33:39.073068  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:33:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:33:43.410636  543705 memory.go:191] Add success.
I0321 17:33:43.409808  543705 cpu.go:282] Add success.
I0321 17:33:43.420332  543705 net.go:648] Add success.
I0321 17:33:43.423236  543705 net.go:770] primary dev: ETH0
I0321 17:33:43.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:33:43.423265  543705 net.go:698] Add success.
I0321 17:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:33:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:33:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:33:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:33:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 17:33:53.409794  543705 memory.go:184] no items to output this cycle
E0321 17:34:03.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:34:03.409905  543705 memory.go:184] no items to output this cycle
I0321 17:34:03.409963  543705 cpu.go:275] no items to output this cycle
E0321 17:34:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:34:13.409831  543705 memory.go:191] Add success.
I0321 17:34:13.409836  543705 cpu.go:282] Add success.
W0321 17:34:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:34:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:34:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:34:13.420324  543705 net.go:648] Add success.
I0321 17:34:13.422964  543705 net.go:770] primary dev: ETH0
I0321 17:34:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:34:13.422989  543705 net.go:698] Add success.
I0321 17:34:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:34:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:34:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 17:34:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:34:14.456509  543705 disk_worker.go:494] system disk:vda1
I0321 17:34:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:34:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:34:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:34:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:34:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:34:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:34:23.409798  543705 memory.go:184] no items to output this cycle
I0321 17:34:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 17:34:30.614712  543705 disk_info.go:125] begin check local disk info of client
I0321 17:34:30.617209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:34:30.617217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003963c0 0xc000396400]
E0321 17:34:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:34:33.409796  543705 memory.go:184] no items to output this cycle
I0321 17:34:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 17:34:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:34:43.409811  543705 memory.go:191] Add success.
I0321 17:34:43.409818  543705 cpu.go:282] Add success.
I0321 17:34:43.419877  543705 net.go:648] Add success.
I0321 17:34:43.422782  543705 net.go:770] primary dev: ETH0
I0321 17:34:43.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:34:43.422822  543705 net.go:698] Add success.
I0321 17:34:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:34:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:34:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:34:53.409793  543705 memory.go:184] no items to output this cycle
I0321 17:34:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 17:35:03.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:35:03.409907  543705 memory.go:184] no items to output this cycle
I0321 17:35:03.409907  543705 cpu.go:275] no items to output this cycle
E0321 17:35:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:35:13.409804  543705 memory.go:191] Add success.
I0321 17:35:13.409804  543705 cpu.go:282] Add success.
W0321 17:35:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:35:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:35:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:35:13.420234  543705 net.go:648] Add success.
I0321 17:35:13.423002  543705 net.go:770] primary dev: ETH0
I0321 17:35:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:35:13.423036  543705 net.go:698] Add success.
I0321 17:35:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:35:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:35:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 17:35:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:35:14.456630  543705 disk_worker.go:494] system disk:vda1
I0321 17:35:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:35:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:35:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:35:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:35:23.409799  543705 memory.go:184] no items to output this cycle
I0321 17:35:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 17:35:30.617673  543705 disk_info.go:125] begin check local disk info of client
I0321 17:35:30.620188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:35:30.620194  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354640 0xc000354680]
E0321 17:35:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:35:33.409775  543705 memory.go:184] no items to output this cycle
I0321 17:35:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 17:35:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:35:43.409786  543705 memory.go:191] Add success.
I0321 17:35:43.409806  543705 cpu.go:282] Add success.
I0321 17:35:43.419891  543705 net.go:648] Add success.
I0321 17:35:43.422402  543705 net.go:770] primary dev: ETH0
I0321 17:35:43.422415  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:35:43.422427  543705 net.go:698] Add success.
I0321 17:35:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:35:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:35:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:35:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:35:53.410274  543705 cpu.go:275] no items to output this cycle
I0321 17:35:53.410275  543705 memory.go:184] no items to output this cycle
E0321 17:36:03.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:36:03.409887  543705 memory.go:184] no items to output this cycle
I0321 17:36:03.409889  543705 cpu.go:275] no items to output this cycle
E0321 17:36:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:36:13.409827  543705 memory.go:191] Add success.
I0321 17:36:13.409836  543705 cpu.go:282] Add success.
W0321 17:36:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:36:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:36:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:36:13.420472  543705 net.go:648] Add success.
I0321 17:36:13.423330  543705 net.go:770] primary dev: ETH0
I0321 17:36:13.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:36:13.423360  543705 net.go:698] Add success.
I0321 17:36:13.511044  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bcaa6e26-6412-4f36-a0e1-e3d5ddb2fa19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:36:13.511079  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:36:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:36:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:36:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 17:36:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:36:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 17:36:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:36:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:36:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:36:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:36:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:36:23.409806  543705 memory.go:184] no items to output this cycle
I0321 17:36:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 17:36:30.620745  543705 disk_info.go:125] begin check local disk info of client
I0321 17:36:30.623261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:36:30.623269  543705 disk_info.go:196] parse disk info done, disk is : [0xc000271ec0 0xc000271f00]
E0321 17:36:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:36:33.409771  543705 memory.go:184] no items to output this cycle
I0321 17:36:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 17:36:39.073734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:36:39.073740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:36:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:36:43.410642  543705 memory.go:191] Add success.
I0321 17:36:43.409791  543705 cpu.go:282] Add success.
I0321 17:36:43.420360  543705 net.go:648] Add success.
I0321 17:36:43.423607  543705 net.go:770] primary dev: ETH0
I0321 17:36:43.423622  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:36:43.423636  543705 net.go:698] Add success.
I0321 17:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:36:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:36:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:36:53.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:36:53.409878  543705 cpu.go:275] no items to output this cycle
I0321 17:36:53.409879  543705 memory.go:184] no items to output this cycle
E0321 17:37:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:37:03.409789  543705 memory.go:184] no items to output this cycle
I0321 17:37:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 17:37:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:37:13.409789  543705 memory.go:191] Add success.
I0321 17:37:13.409790  543705 cpu.go:282] Add success.
W0321 17:37:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:37:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:37:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:37:13.420151  543705 net.go:648] Add success.
I0321 17:37:13.422782  543705 net.go:770] primary dev: ETH0
I0321 17:37:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:37:13.422811  543705 net.go:698] Add success.
I0321 17:37:13.453362  543705 event_worker.go:152] Polling the log file for events...
W0321 17:37:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:37:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0321 17:37:14.455152  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:37:14.456992  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:37:14.457001  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:37:14.457008  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:37:14.457027  543705 disk_worker.go:494] system disk:vda1
I0321 17:37:14.457067  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:37:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:37:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:37:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:37:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:37:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:37:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:37:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:37:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:37:23.409793  543705 memory.go:184] no items to output this cycle
I0321 17:37:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 17:37:30.623755  543705 disk_info.go:125] begin check local disk info of client
I0321 17:37:30.626270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:37:30.626276  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036af00 0xc00036af40]
E0321 17:37:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:37:33.409775  543705 memory.go:184] no items to output this cycle
I0321 17:37:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 17:37:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:37:43.409806  543705 memory.go:191] Add success.
I0321 17:37:43.409817  543705 cpu.go:282] Add success.
I0321 17:37:43.420054  543705 net.go:648] Add success.
I0321 17:37:43.422913  543705 net.go:770] primary dev: ETH0
I0321 17:37:43.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:37:43.422939  543705 net.go:698] Add success.
I0321 17:37:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:37:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:37:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:37:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:37:53.409777  543705 memory.go:184] no items to output this cycle
I0321 17:37:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 17:38:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:38:03.409776  543705 memory.go:184] no items to output this cycle
I0321 17:38:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 17:38:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:38:13.409819  543705 memory.go:191] Add success.
I0321 17:38:13.409828  543705 cpu.go:282] Add success.
W0321 17:38:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:38:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:38:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:38:13.420340  543705 net.go:648] Add success.
I0321 17:38:13.423038  543705 net.go:770] primary dev: ETH0
I0321 17:38:13.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:38:13.423065  543705 net.go:698] Add success.
I0321 17:38:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:38:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:38:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 17:38:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:38:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 17:38:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:38:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:38:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:38:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:38:23.409761  543705 memory.go:184] no items to output this cycle
I0321 17:38:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 17:38:30.626776  543705 disk_info.go:125] begin check local disk info of client
I0321 17:38:30.629236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:38:30.629242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307600 0xc000307640]
E0321 17:38:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:38:33.409789  543705 memory.go:184] no items to output this cycle
I0321 17:38:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 17:38:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:38:43.409781  543705 memory.go:191] Add success.
I0321 17:38:43.409800  543705 cpu.go:282] Add success.
I0321 17:38:43.420059  543705 net.go:648] Add success.
I0321 17:38:43.422721  543705 net.go:770] primary dev: ETH0
I0321 17:38:43.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:38:43.422749  543705 net.go:698] Add success.
I0321 17:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:38:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:38:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:38:53.409792  543705 memory.go:184] no items to output this cycle
I0321 17:38:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 17:39:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:39:03.409772  543705 memory.go:184] no items to output this cycle
I0321 17:39:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 17:39:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:39:13.409794  543705 memory.go:191] Add success.
I0321 17:39:13.409796  543705 cpu.go:282] Add success.
W0321 17:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:39:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:39:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:39:13.420117  543705 net.go:648] Add success.
I0321 17:39:13.422631  543705 net.go:770] primary dev: ETH0
I0321 17:39:13.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:39:13.422657  543705 net.go:698] Add success.
I0321 17:39:13.468109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a0f42bc-5775-430b-b96f-49cd52a6528c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:39:13.468141  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:39:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:39:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:39:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0321 17:39:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:39:14.456760  543705 disk_worker.go:494] system disk:vda1
I0321 17:39:14.456796  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:39:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:39:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:39:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:39:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:39:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:39:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:39:23.409795  543705 memory.go:184] no items to output this cycle
I0321 17:39:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 17:39:30.629679  543705 disk_info.go:125] begin check local disk info of client
I0321 17:39:30.632163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:39:30.632173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb440 0xc0002bb480]
E0321 17:39:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:39:33.409792  543705 memory.go:184] no items to output this cycle
I0321 17:39:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 17:39:39.077075  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:39:39.077081  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:39:43.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:39:43.410691  543705 memory.go:191] Add success.
I0321 17:39:43.409946  543705 cpu.go:282] Add success.
I0321 17:39:43.419768  543705 net.go:648] Add success.
I0321 17:39:43.422702  543705 net.go:770] primary dev: ETH0
I0321 17:39:43.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:39:43.422726  543705 net.go:698] Add success.
I0321 17:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:39:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:39:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:39:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:39:53.409774  543705 memory.go:184] no items to output this cycle
I0321 17:39:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 17:40:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:40:03.409769  543705 memory.go:184] no items to output this cycle
I0321 17:40:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 17:40:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:40:13.409825  543705 memory.go:191] Add success.
I0321 17:40:13.409830  543705 cpu.go:282] Add success.
W0321 17:40:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:40:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:40:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:40:13.420154  543705 net.go:648] Add success.
I0321 17:40:13.422896  543705 net.go:770] primary dev: ETH0
I0321 17:40:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:40:13.422923  543705 net.go:698] Add success.
I0321 17:40:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:40:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:40:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 17:40:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:40:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 17:40:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:40:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:40:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:40:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:40:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:40:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:40:23.409796  543705 memory.go:184] no items to output this cycle
I0321 17:40:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 17:40:30.632805  543705 disk_info.go:125] begin check local disk info of client
I0321 17:40:30.635307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:40:30.635313  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1340 0xc0003f1380]
E0321 17:40:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:40:33.409803  543705 memory.go:184] no items to output this cycle
I0321 17:40:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 17:40:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:40:43.409827  543705 memory.go:191] Add success.
I0321 17:40:43.409838  543705 cpu.go:282] Add success.
I0321 17:40:43.419961  543705 net.go:648] Add success.
I0321 17:40:43.422711  543705 net.go:770] primary dev: ETH0
I0321 17:40:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:40:43.422739  543705 net.go:698] Add success.
I0321 17:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:40:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:40:53.409785  543705 memory.go:184] no items to output this cycle
I0321 17:40:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 17:41:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:41:03.409796  543705 memory.go:184] no items to output this cycle
I0321 17:41:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 17:41:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:41:13.409805  543705 memory.go:191] Add success.
I0321 17:41:13.409807  543705 cpu.go:282] Add success.
W0321 17:41:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:41:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:41:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:41:13.420183  543705 net.go:648] Add success.
I0321 17:41:13.423162  543705 net.go:770] primary dev: ETH0
I0321 17:41:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:41:13.423199  543705 net.go:698] Add success.
I0321 17:41:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:41:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:41:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0321 17:41:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:41:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 17:41:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:41:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:41:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:41:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:41:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:41:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:41:23.409784  543705 memory.go:184] no items to output this cycle
I0321 17:41:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 17:41:30.635832  543705 disk_info.go:125] begin check local disk info of client
I0321 17:41:30.638333  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:41:30.638341  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a800 0xc00027a840]
E0321 17:41:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:41:33.409768  543705 memory.go:184] no items to output this cycle
I0321 17:41:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 17:41:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:41:43.409817  543705 memory.go:191] Add success.
I0321 17:41:43.409827  543705 cpu.go:282] Add success.
I0321 17:41:43.419915  543705 net.go:648] Add success.
I0321 17:41:43.422562  543705 net.go:770] primary dev: ETH0
I0321 17:41:43.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:41:43.422590  543705 net.go:698] Add success.
I0321 17:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:41:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:41:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:41:53.409765  543705 memory.go:184] no items to output this cycle
I0321 17:41:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 17:42:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:42:03.409786  543705 memory.go:184] no items to output this cycle
I0321 17:42:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 17:42:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:42:13.409793  543705 cpu.go:282] Add success.
I0321 17:42:13.409801  543705 memory.go:191] Add success.
W0321 17:42:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:42:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:42:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:42:13.420180  543705 net.go:648] Add success.
I0321 17:42:13.422827  543705 net.go:770] primary dev: ETH0
I0321 17:42:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:42:13.422865  543705 net.go:698] Add success.
I0321 17:42:13.469676  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5f265541-a00a-4144-a52f-b8182a744247","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:42:13.469710  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 17:42:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:42:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 17:42:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:42:14.456822  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0321 17:42:14.456831  543705 disk_worker.go:494] system disk:vda1
E0321 17:42:14.456832  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:42:14.456838  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:42:14.456879  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:42:15.456878  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:42:15.456887  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:42:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:42:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:42:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:42:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:42:16.472323  543705 disk_local_worker.go:436] Get disk info: []
I0321 17:42:23.409867  543705 cpu.go:275] no items to output this cycle
E0321 17:42:23.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:42:23.409904  543705 memory.go:184] no items to output this cycle
I0321 17:42:30.638834  543705 disk_info.go:125] begin check local disk info of client
I0321 17:42:30.641370  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:42:30.641377  543705 disk_info.go:196] parse disk info done, disk is : [0xc000313900 0xc000313940]
E0321 17:42:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:42:33.409786  543705 memory.go:184] no items to output this cycle
I0321 17:42:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 17:42:39.077731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:42:39.077738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:42:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:42:43.410603  543705 memory.go:191] Add success.
I0321 17:42:43.409812  543705 cpu.go:282] Add success.
I0321 17:42:43.420286  543705 net.go:648] Add success.
I0321 17:42:43.422791  543705 net.go:770] primary dev: ETH0
I0321 17:42:43.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:42:43.422817  543705 net.go:698] Add success.
I0321 17:42:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:42:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:42:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:42:53.409769  543705 memory.go:184] no items to output this cycle
I0321 17:42:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 17:43:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:43:03.409783  543705 memory.go:184] no items to output this cycle
I0321 17:43:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 17:43:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:43:13.409790  543705 memory.go:191] Add success.
I0321 17:43:13.409816  543705 cpu.go:282] Add success.
W0321 17:43:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:43:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:43:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:43:13.420154  543705 net.go:648] Add success.
I0321 17:43:13.422721  543705 net.go:770] primary dev: ETH0
I0321 17:43:13.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:43:13.422753  543705 net.go:698] Add success.
I0321 17:43:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:43:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:43:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 17:43:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:43:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 17:43:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:43:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:43:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:43:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:43:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:43:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:43:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:43:23.409786  543705 memory.go:184] no items to output this cycle
I0321 17:43:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 17:43:30.641674  543705 disk_info.go:125] begin check local disk info of client
I0321 17:43:30.644228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:43:30.644235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8980 0xc0004d89c0]
E0321 17:43:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:43:33.409766  543705 memory.go:184] no items to output this cycle
I0321 17:43:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 17:43:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:43:43.409813  543705 memory.go:191] Add success.
I0321 17:43:43.409821  543705 cpu.go:282] Add success.
I0321 17:43:43.419892  543705 net.go:648] Add success.
I0321 17:43:43.422910  543705 net.go:770] primary dev: ETH0
I0321 17:43:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:43:43.422938  543705 net.go:698] Add success.
I0321 17:43:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:43:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:43:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:43:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:43:53.409765  543705 memory.go:184] no items to output this cycle
I0321 17:43:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 17:44:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:44:03.409807  543705 memory.go:184] no items to output this cycle
I0321 17:44:03.409820  543705 cpu.go:275] no items to output this cycle
E0321 17:44:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:44:13.409798  543705 memory.go:191] Add success.
I0321 17:44:13.409815  543705 cpu.go:282] Add success.
W0321 17:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:44:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:44:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:44:13.420293  543705 net.go:648] Add success.
I0321 17:44:13.422738  543705 net.go:770] primary dev: ETH0
I0321 17:44:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:44:13.422763  543705 net.go:698] Add success.
I0321 17:44:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:44:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:44:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0321 17:44:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:44:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 17:44:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:44:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:44:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:44:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:44:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:44:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:44:23.409778  543705 memory.go:184] no items to output this cycle
I0321 17:44:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 17:44:30.644854  543705 disk_info.go:125] begin check local disk info of client
I0321 17:44:30.647458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:44:30.647465  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f2c0 0xc00037f300]
E0321 17:44:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:44:33.409765  543705 memory.go:184] no items to output this cycle
I0321 17:44:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 17:44:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:44:43.409813  543705 memory.go:191] Add success.
I0321 17:44:43.409818  543705 cpu.go:282] Add success.
I0321 17:44:43.419872  543705 net.go:648] Add success.
I0321 17:44:43.422728  543705 net.go:770] primary dev: ETH0
I0321 17:44:43.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:44:43.422756  543705 net.go:698] Add success.
I0321 17:44:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:44:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:44:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:44:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:44:53.409776  543705 memory.go:184] no items to output this cycle
I0321 17:44:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 17:45:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:45:03.409805  543705 memory.go:184] no items to output this cycle
I0321 17:45:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 17:45:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:45:13.409788  543705 memory.go:191] Add success.
I0321 17:45:13.409812  543705 cpu.go:282] Add success.
W0321 17:45:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:45:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:45:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:45:13.420173  543705 net.go:648] Add success.
I0321 17:45:13.422876  543705 net.go:770] primary dev: ETH0
I0321 17:45:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:45:13.422903  543705 net.go:698] Add success.
I0321 17:45:13.953188  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"85563018-15e1-43db-92e9-ebd142e889ec","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:45:13.953224  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:45:14.454723  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:45:14.454947  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:45:14.454958  543705 disk_worker.go:708] disk space is not compliant
W0321 17:45:14.454960  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:45:14.456451  543705 disk_worker.go:494] system disk:vda1
I0321 17:45:14.456482  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:45:15.455076  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:45:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:45:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:45:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:45:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:45:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:45:23.409773  543705 memory.go:184] no items to output this cycle
I0321 17:45:23.409774  543705 cpu.go:275] no items to output this cycle
I0321 17:45:30.647880  543705 disk_info.go:125] begin check local disk info of client
I0321 17:45:30.650449  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:45:30.650456  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ef40 0xc00037ef80]
E0321 17:45:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:45:33.409772  543705 memory.go:184] no items to output this cycle
I0321 17:45:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 17:45:39.081093  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:45:39.081099  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:45:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:45:43.410657  543705 memory.go:191] Add success.
I0321 17:45:43.409825  543705 cpu.go:282] Add success.
I0321 17:45:43.420368  543705 net.go:648] Add success.
I0321 17:45:43.422846  543705 net.go:770] primary dev: ETH0
I0321 17:45:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:45:43.422871  543705 net.go:698] Add success.
I0321 17:45:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:45:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:45:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:45:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:45:53.409801  543705 memory.go:184] no items to output this cycle
I0321 17:45:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 17:46:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:46:03.409782  543705 memory.go:184] no items to output this cycle
I0321 17:46:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 17:46:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:46:13.409788  543705 memory.go:191] Add success.
I0321 17:46:13.409807  543705 cpu.go:282] Add success.
W0321 17:46:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:46:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:46:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:46:13.420307  543705 net.go:648] Add success.
I0321 17:46:13.423285  543705 net.go:770] primary dev: ETH0
I0321 17:46:13.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:46:13.423317  543705 net.go:698] Add success.
I0321 17:46:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:46:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:46:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 17:46:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:46:14.456834  543705 disk_worker.go:494] system disk:vda1
I0321 17:46:14.456863  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:46:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:46:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:46:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:46:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:46:23.409783  543705 memory.go:184] no items to output this cycle
I0321 17:46:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 17:46:30.650898  543705 disk_info.go:125] begin check local disk info of client
I0321 17:46:30.653391  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:46:30.653397  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2880 0xc0004a28c0]
E0321 17:46:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:46:33.409790  543705 memory.go:184] no items to output this cycle
I0321 17:46:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 17:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:46:43.409784  543705 memory.go:191] Add success.
I0321 17:46:43.409809  543705 cpu.go:282] Add success.
I0321 17:46:43.419996  543705 net.go:648] Add success.
I0321 17:46:43.422692  543705 net.go:770] primary dev: ETH0
I0321 17:46:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:46:43.422717  543705 net.go:698] Add success.
I0321 17:46:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:46:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:46:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:46:53.410200  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:46:53.410216  543705 memory.go:184] no items to output this cycle
I0321 17:46:53.410245  543705 cpu.go:275] no items to output this cycle
E0321 17:47:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:47:03.409783  543705 memory.go:184] no items to output this cycle
I0321 17:47:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 17:47:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:47:13.409819  543705 memory.go:191] Add success.
I0321 17:47:13.409827  543705 cpu.go:282] Add success.
W0321 17:47:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:47:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:47:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:47:13.420215  543705 net.go:648] Add success.
I0321 17:47:13.422890  543705 net.go:770] primary dev: ETH0
I0321 17:47:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:47:13.422920  543705 net.go:698] Add success.
I0321 17:47:13.453477  543705 event_worker.go:152] Polling the log file for events...
W0321 17:47:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:47:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 17:47:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:47:14.456700  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:47:14.456710  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:47:14.456716  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:47:14.457154  543705 disk_worker.go:494] system disk:vda1
I0321 17:47:14.457196  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:47:15.456880  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:47:15.456889  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:47:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:47:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:47:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:47:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:47:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:47:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:47:23.409763  543705 memory.go:184] no items to output this cycle
I0321 17:47:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 17:47:30.653674  543705 disk_info.go:125] begin check local disk info of client
I0321 17:47:30.656158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:47:30.656165  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af80 0xc00007afc0]
E0321 17:47:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:47:33.409788  543705 memory.go:184] no items to output this cycle
I0321 17:47:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 17:47:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:47:43.409810  543705 memory.go:191] Add success.
I0321 17:47:43.409815  543705 cpu.go:282] Add success.
I0321 17:47:43.419984  543705 net.go:648] Add success.
I0321 17:47:43.422545  543705 net.go:770] primary dev: ETH0
I0321 17:47:43.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:47:43.422570  543705 net.go:698] Add success.
I0321 17:47:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:47:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:47:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:47:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:47:53.409810  543705 memory.go:184] no items to output this cycle
I0321 17:47:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 17:48:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:48:03.409787  543705 memory.go:184] no items to output this cycle
I0321 17:48:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 17:48:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:48:13.409798  543705 cpu.go:282] Add success.
I0321 17:48:13.409824  543705 memory.go:191] Add success.
W0321 17:48:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:48:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:48:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:48:13.420308  543705 net.go:648] Add success.
I0321 17:48:13.421321  543705 net.go:770] primary dev: ETH0
I0321 17:48:13.421336  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:48:13.421347  543705 net.go:698] Add success.
I0321 17:48:13.652361  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a12e06d-ed37-426f-835c-ab1afbec73e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:48:13.652395  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:48:14.453981  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:48:14.454658  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:48:14.454694  543705 disk_worker.go:708] disk space is not compliant
W0321 17:48:14.454698  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:48:14.456381  543705 disk_worker.go:494] system disk:vda1
I0321 17:48:14.456410  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:48:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:48:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:48:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:48:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:48:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:48:23.409769  543705 memory.go:184] no items to output this cycle
I0321 17:48:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 17:48:30.656920  543705 disk_info.go:125] begin check local disk info of client
I0321 17:48:30.659447  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:48:30.659454  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9dc0 0xc0004d9e00]
E0321 17:48:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:48:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 17:48:33.409793  543705 memory.go:184] no items to output this cycle
I0321 17:48:39.081741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:48:39.081748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:48:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:48:43.410625  543705 memory.go:191] Add success.
I0321 17:48:43.409820  543705 cpu.go:282] Add success.
I0321 17:48:43.420356  543705 net.go:648] Add success.
I0321 17:48:43.423067  543705 net.go:770] primary dev: ETH0
I0321 17:48:43.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:48:43.423094  543705 net.go:698] Add success.
I0321 17:48:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:48:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:48:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:48:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:48:53.409786  543705 memory.go:184] no items to output this cycle
I0321 17:48:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 17:49:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:49:03.409812  543705 memory.go:184] no items to output this cycle
I0321 17:49:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 17:49:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:49:13.409805  543705 cpu.go:282] Add success.
I0321 17:49:13.409811  543705 memory.go:191] Add success.
W0321 17:49:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:49:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:49:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:49:13.420132  543705 net.go:648] Add success.
I0321 17:49:13.422830  543705 net.go:770] primary dev: ETH0
I0321 17:49:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:49:13.422855  543705 net.go:698] Add success.
I0321 17:49:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:49:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:49:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 17:49:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:49:14.456503  543705 disk_worker.go:494] system disk:vda1
I0321 17:49:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:49:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:49:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:49:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:49:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:49:23.409802  543705 memory.go:184] no items to output this cycle
I0321 17:49:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 17:49:30.659936  543705 disk_info.go:125] begin check local disk info of client
I0321 17:49:30.662404  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:49:30.662410  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8480 0xc0004d84c0]
E0321 17:49:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:49:33.409801  543705 memory.go:184] no items to output this cycle
I0321 17:49:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 17:49:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:49:43.409780  543705 memory.go:191] Add success.
I0321 17:49:43.409798  543705 cpu.go:282] Add success.
I0321 17:49:43.420093  543705 net.go:648] Add success.
I0321 17:49:43.423078  543705 net.go:770] primary dev: ETH0
I0321 17:49:43.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:49:43.423108  543705 net.go:698] Add success.
I0321 17:49:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:49:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:49:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:49:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:49:53.409777  543705 memory.go:184] no items to output this cycle
I0321 17:49:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 17:50:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:50:03.409773  543705 memory.go:184] no items to output this cycle
I0321 17:50:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 17:50:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:50:13.409796  543705 memory.go:191] Add success.
I0321 17:50:13.409798  543705 cpu.go:282] Add success.
W0321 17:50:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:50:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:50:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:50:13.420137  543705 net.go:648] Add success.
I0321 17:50:13.422504  543705 net.go:770] primary dev: ETH0
I0321 17:50:13.422518  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:50:13.422532  543705 net.go:698] Add success.
I0321 17:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:50:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:50:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 17:50:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:50:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 17:50:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:50:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:50:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:50:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:50:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:50:23.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:50:23.409882  543705 memory.go:184] no items to output this cycle
I0321 17:50:23.409951  543705 cpu.go:275] no items to output this cycle
I0321 17:50:30.662952  543705 disk_info.go:125] begin check local disk info of client
I0321 17:50:30.665441  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:50:30.665448  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366600 0xc000366640]
E0321 17:50:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:50:33.409783  543705 memory.go:184] no items to output this cycle
I0321 17:50:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 17:50:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:50:43.409779  543705 memory.go:191] Add success.
I0321 17:50:43.409804  543705 cpu.go:282] Add success.
I0321 17:50:43.419973  543705 net.go:648] Add success.
I0321 17:50:43.422606  543705 net.go:770] primary dev: ETH0
I0321 17:50:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:50:43.422630  543705 net.go:698] Add success.
I0321 17:50:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:50:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:50:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:50:53.409779  543705 memory.go:184] no items to output this cycle
I0321 17:50:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 17:51:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:51:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 17:51:03.409790  543705 memory.go:184] no items to output this cycle
E0321 17:51:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:51:13.409788  543705 memory.go:191] Add success.
W0321 17:51:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 17:51:13.409820  543705 cpu.go:282] Add success.
W0321 17:51:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:51:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:51:13.420142  543705 net.go:648] Add success.
I0321 17:51:13.422932  543705 net.go:770] primary dev: ETH0
I0321 17:51:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:51:13.422970  543705 net.go:698] Add success.
I0321 17:51:13.464887  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"019abd00-a47c-4132-ac87-1bb1e092dc07","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:51:13.464923  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:51:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:51:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:51:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 17:51:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:51:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 17:51:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:51:15.455605  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:51:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:51:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:51:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:51:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:51:23.409885  543705 memory.go:184] no items to output this cycle
I0321 17:51:23.409926  543705 cpu.go:275] no items to output this cycle
I0321 17:51:30.665674  543705 disk_info.go:125] begin check local disk info of client
I0321 17:51:30.668219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:51:30.668225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf740 0xc0002bf780]
E0321 17:51:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:51:33.409771  543705 memory.go:184] no items to output this cycle
I0321 17:51:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 17:51:39.085125  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:51:39.085132  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:51:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:51:43.410668  543705 memory.go:191] Add success.
I0321 17:51:43.409823  543705 cpu.go:282] Add success.
I0321 17:51:43.420402  543705 net.go:648] Add success.
I0321 17:51:43.423077  543705 net.go:770] primary dev: ETH0
I0321 17:51:43.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:51:43.423106  543705 net.go:698] Add success.
I0321 17:51:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:51:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:51:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:51:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:51:53.409771  543705 memory.go:184] no items to output this cycle
I0321 17:51:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 17:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:52:03.409782  543705 memory.go:184] no items to output this cycle
I0321 17:52:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 17:52:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:52:13.409798  543705 memory.go:191] Add success.
I0321 17:52:13.409799  543705 cpu.go:282] Add success.
W0321 17:52:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:52:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:52:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:52:13.420121  543705 net.go:648] Add success.
I0321 17:52:13.422758  543705 net.go:770] primary dev: ETH0
I0321 17:52:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:52:13.422784  543705 net.go:698] Add success.
W0321 17:52:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:52:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 17:52:14.455159  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:52:14.456926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:52:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:52:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:52:14.457014  543705 disk_worker.go:494] system disk:vda1
I0321 17:52:14.457067  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:52:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:52:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:52:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:52:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:52:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:52:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:52:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:52:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:52:23.409769  543705 memory.go:184] no items to output this cycle
I0321 17:52:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 17:52:30.668978  543705 disk_info.go:125] begin check local disk info of client
I0321 17:52:30.671473  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:52:30.671480  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f7c0 0xc00032f840]
E0321 17:52:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:52:33.409909  543705 memory.go:184] no items to output this cycle
I0321 17:52:33.410101  543705 cpu.go:275] no items to output this cycle
E0321 17:52:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:52:43.409791  543705 memory.go:191] Add success.
I0321 17:52:43.409804  543705 cpu.go:282] Add success.
I0321 17:52:43.419863  543705 net.go:648] Add success.
I0321 17:52:43.422298  543705 net.go:770] primary dev: ETH0
I0321 17:52:43.422311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:52:43.422323  543705 net.go:698] Add success.
I0321 17:52:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:52:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:52:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:52:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:52:53.409797  543705 memory.go:184] no items to output this cycle
I0321 17:52:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 17:53:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:53:03.409779  543705 memory.go:184] no items to output this cycle
I0321 17:53:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 17:53:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:53:13.409800  543705 cpu.go:282] Add success.
I0321 17:53:13.409802  543705 memory.go:191] Add success.
W0321 17:53:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:53:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:53:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:53:13.420087  543705 net.go:648] Add success.
I0321 17:53:13.422729  543705 net.go:770] primary dev: ETH0
I0321 17:53:13.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:53:13.422758  543705 net.go:698] Add success.
I0321 17:53:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:53:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:53:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 17:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:53:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 17:53:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:53:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:53:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:53:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:53:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:53:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:53:23.409764  543705 memory.go:184] no items to output this cycle
I0321 17:53:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 17:53:30.671996  543705 disk_info.go:125] begin check local disk info of client
I0321 17:53:30.674503  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:53:30.674509  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b680 0xc00036b6c0]
E0321 17:53:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:53:33.409879  543705 memory.go:184] no items to output this cycle
I0321 17:53:33.409915  543705 cpu.go:275] no items to output this cycle
E0321 17:53:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:53:43.409796  543705 memory.go:191] Add success.
I0321 17:53:43.409798  543705 cpu.go:282] Add success.
I0321 17:53:43.420128  543705 net.go:648] Add success.
I0321 17:53:43.422754  543705 net.go:770] primary dev: ETH0
I0321 17:53:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:53:43.422779  543705 net.go:698] Add success.
I0321 17:53:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:53:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:53:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:53:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:53:53.409798  543705 memory.go:184] no items to output this cycle
I0321 17:53:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 17:54:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:54:03.409771  543705 memory.go:184] no items to output this cycle
I0321 17:54:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 17:54:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:54:13.409820  543705 memory.go:191] Add success.
I0321 17:54:13.409825  543705 cpu.go:282] Add success.
W0321 17:54:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:54:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:54:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:54:13.420211  543705 net.go:648] Add success.
I0321 17:54:13.422691  543705 net.go:770] primary dev: ETH0
I0321 17:54:13.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:54:13.422717  543705 net.go:698] Add success.
I0321 17:54:13.468396  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9070172-56af-45f1-9c42-fecc34a5d8e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:54:13.468430  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 17:54:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:54:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:54:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 17:54:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:54:14.456735  543705 disk_worker.go:494] system disk:vda1
I0321 17:54:14.456765  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:54:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:54:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:54:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:54:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:54:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:54:23.409783  543705 memory.go:184] no items to output this cycle
I0321 17:54:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 17:54:30.675017  543705 disk_info.go:125] begin check local disk info of client
I0321 17:54:30.677551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:54:30.677558  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487b40 0xc000487b80]
E0321 17:54:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:54:33.409762  543705 memory.go:184] no items to output this cycle
I0321 17:54:33.409800  543705 cpu.go:275] no items to output this cycle
I0321 17:54:39.085735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:54:39.085742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:54:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:54:43.410669  543705 memory.go:191] Add success.
I0321 17:54:43.409820  543705 cpu.go:282] Add success.
I0321 17:54:43.420376  543705 net.go:648] Add success.
I0321 17:54:43.423192  543705 net.go:770] primary dev: ETH0
I0321 17:54:43.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:54:43.423221  543705 net.go:698] Add success.
I0321 17:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:54:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:54:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:54:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:54:53.409773  543705 memory.go:184] no items to output this cycle
I0321 17:54:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 17:55:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:55:03.409789  543705 cpu.go:275] no items to output this cycle
I0321 17:55:03.409791  543705 memory.go:184] no items to output this cycle
E0321 17:55:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:55:13.409819  543705 memory.go:191] Add success.
I0321 17:55:13.409824  543705 cpu.go:282] Add success.
W0321 17:55:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:55:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:55:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:55:13.420141  543705 net.go:648] Add success.
I0321 17:55:13.423021  543705 net.go:770] primary dev: ETH0
I0321 17:55:13.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:55:13.423049  543705 net.go:698] Add success.
I0321 17:55:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:55:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:55:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 17:55:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:55:14.456516  543705 disk_worker.go:494] system disk:vda1
I0321 17:55:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:55:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:55:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:55:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:55:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:55:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:55:23.409798  543705 memory.go:184] no items to output this cycle
I0321 17:55:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 17:55:30.677687  543705 disk_info.go:125] begin check local disk info of client
I0321 17:55:30.680186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:55:30.680194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000de000 0xc0000de040]
E0321 17:55:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:55:33.409791  543705 memory.go:184] no items to output this cycle
I0321 17:55:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 17:55:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:55:43.409780  543705 memory.go:191] Add success.
I0321 17:55:43.409799  543705 cpu.go:282] Add success.
I0321 17:55:43.419859  543705 net.go:648] Add success.
I0321 17:55:43.423581  543705 net.go:770] primary dev: ETH0
I0321 17:55:43.423595  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:55:43.423610  543705 net.go:698] Add success.
I0321 17:55:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:55:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:55:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:55:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:55:53.409771  543705 memory.go:184] no items to output this cycle
I0321 17:55:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 17:56:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:56:03.409783  543705 memory.go:184] no items to output this cycle
I0321 17:56:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 17:56:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:56:13.409826  543705 memory.go:191] Add success.
I0321 17:56:13.409827  543705 cpu.go:282] Add success.
W0321 17:56:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:56:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:56:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:56:13.420277  543705 net.go:648] Add success.
I0321 17:56:13.422983  543705 net.go:770] primary dev: ETH0
I0321 17:56:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:56:13.423009  543705 net.go:698] Add success.
I0321 17:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:56:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:56:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0321 17:56:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:56:14.456606  543705 disk_worker.go:494] system disk:vda1
I0321 17:56:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:56:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:56:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:56:16.472092  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:56:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:56:23.409778  543705 memory.go:184] no items to output this cycle
I0321 17:56:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 17:56:30.681045  543705 disk_info.go:125] begin check local disk info of client
I0321 17:56:30.683578  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:56:30.683585  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367080 0xc0003670c0]
E0321 17:56:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:56:33.409774  543705 memory.go:184] no items to output this cycle
I0321 17:56:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 17:56:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:56:43.409785  543705 cpu.go:282] Add success.
I0321 17:56:43.409792  543705 memory.go:191] Add success.
I0321 17:56:43.419857  543705 net.go:648] Add success.
I0321 17:56:43.422666  543705 net.go:770] primary dev: ETH0
I0321 17:56:43.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:56:43.422691  543705 net.go:698] Add success.
I0321 17:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:56:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:56:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:56:53.409805  543705 memory.go:184] no items to output this cycle
I0321 17:56:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 17:57:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:57:03.409793  543705 memory.go:184] no items to output this cycle
I0321 17:57:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 17:57:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:57:13.409790  543705 memory.go:191] Add success.
I0321 17:57:13.409812  543705 cpu.go:282] Add success.
W0321 17:57:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:57:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:57:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:57:13.420158  543705 net.go:648] Add success.
I0321 17:57:13.428931  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 17:57:13.429008  543705 net.go:770] primary dev: ETH0
I0321 17:57:13.429021  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:57:13.429034  543705 net.go:698] Add success.
I0321 17:57:13.453588  543705 event_worker.go:152] Polling the log file for events...
I0321 17:57:13.469790  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"673cb8bf-a245-4b34-87ce-f186d63ab941","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 17:57:13.469825  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 17:57:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:57:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 17:57:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0321 17:57:14.456122  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 17:57:14.456132  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 17:57:14.456138  543705 custom_config.go:64] query custom config with name: gpu
I0321 17:57:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 17:57:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 17:57:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 17:57:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:57:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 17:57:16.457979  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 17:57:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:57:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:57:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:57:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:57:23.409768  543705 memory.go:184] no items to output this cycle
I0321 17:57:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 17:57:30.684058  543705 disk_info.go:125] begin check local disk info of client
I0321 17:57:30.686582  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:57:30.686588  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039af00 0xc00039af40]
E0321 17:57:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:57:33.409763  543705 memory.go:184] no items to output this cycle
I0321 17:57:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 17:57:39.085881  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 17:57:39.085888  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 17:57:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:57:43.410741  543705 memory.go:191] Add success.
I0321 17:57:43.409815  543705 cpu.go:282] Add success.
I0321 17:57:43.420468  543705 net.go:648] Add success.
I0321 17:57:43.423082  543705 net.go:770] primary dev: ETH0
I0321 17:57:43.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:57:43.423111  543705 net.go:698] Add success.
I0321 17:57:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:57:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:57:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:57:53.409774  543705 memory.go:184] no items to output this cycle
I0321 17:57:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 17:58:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:58:03.409780  543705 memory.go:184] no items to output this cycle
I0321 17:58:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 17:58:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:58:13.409810  543705 memory.go:191] Add success.
I0321 17:58:13.409814  543705 cpu.go:282] Add success.
W0321 17:58:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:58:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:58:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:58:13.420124  543705 net.go:648] Add success.
I0321 17:58:13.422764  543705 net.go:770] primary dev: ETH0
I0321 17:58:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:58:13.422790  543705 net.go:698] Add success.
I0321 17:58:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:58:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:58:14.455337  543705 disk_worker.go:708] disk space is not compliant
W0321 17:58:14.455344  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:58:14.456989  543705 disk_worker.go:494] system disk:vda1
I0321 17:58:14.457020  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:58:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:58:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:58:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:58:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:58:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:58:23.409800  543705 memory.go:184] no items to output this cycle
I0321 17:58:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 17:58:30.687079  543705 disk_info.go:125] begin check local disk info of client
I0321 17:58:30.689667  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:58:30.689673  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003688c0 0xc000368900]
E0321 17:58:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:58:33.409774  543705 memory.go:184] no items to output this cycle
I0321 17:58:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 17:58:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:58:43.409794  543705 memory.go:191] Add success.
I0321 17:58:43.409796  543705 cpu.go:282] Add success.
I0321 17:58:43.420053  543705 net.go:648] Add success.
I0321 17:58:43.422792  543705 net.go:770] primary dev: ETH0
I0321 17:58:43.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:58:43.422818  543705 net.go:698] Add success.
I0321 17:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:58:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:58:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:58:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:58:53.409781  543705 memory.go:184] no items to output this cycle
I0321 17:58:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 17:59:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:59:03.409798  543705 memory.go:184] no items to output this cycle
I0321 17:59:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 17:59:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:59:13.409788  543705 memory.go:191] Add success.
I0321 17:59:13.409806  543705 cpu.go:282] Add success.
W0321 17:59:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 17:59:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 17:59:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 17:59:13.420259  543705 net.go:648] Add success.
I0321 17:59:13.423148  543705 net.go:770] primary dev: ETH0
I0321 17:59:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:59:13.423173  543705 net.go:698] Add success.
I0321 17:59:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0321 17:59:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 17:59:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 17:59:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 17:59:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 17:59:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 17:59:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 17:59:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:59:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 17:59:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 17:59:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:59:23.409777  543705 memory.go:184] no items to output this cycle
I0321 17:59:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 17:59:30.691104  543705 disk_info.go:125] begin check local disk info of client
I0321 17:59:30.693589  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 17:59:30.693595  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd580 0xc0002bd5c0]
E0321 17:59:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:59:33.409787  543705 memory.go:184] no items to output this cycle
I0321 17:59:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 17:59:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:59:43.409797  543705 memory.go:191] Add success.
I0321 17:59:43.409811  543705 cpu.go:282] Add success.
I0321 17:59:43.420049  543705 net.go:648] Add success.
I0321 17:59:43.423625  543705 net.go:770] primary dev: ETH0
I0321 17:59:43.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0321 17:59:43.423651  543705 net.go:698] Add success.
I0321 17:59:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 17:59:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 17:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 17:59:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 17:59:53.409789  543705 memory.go:184] no items to output this cycle
I0321 17:59:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 18:00:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:00:03.409783  543705 memory.go:184] no items to output this cycle
I0321 18:00:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 18:00:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:00:13.409917  543705 cpu.go:282] Add success.
I0321 18:00:13.409929  543705 memory.go:191] Add success.
W0321 18:00:13.409959  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:00:13.409972  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:00:13.409976  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:00:13.419716  543705 net.go:648] Add success.
I0321 18:00:13.422681  543705 net.go:770] primary dev: ETH0
I0321 18:00:13.422695  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:00:13.422707  543705 net.go:698] Add success.
I0321 18:00:13.623124  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1b0ff6dd-51f7-4c0b-8217-2a5b9a7a5a78","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:00:13.623157  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:00:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:00:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:00:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 18:00:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:00:14.456606  543705 disk_worker.go:494] system disk:vda1
I0321 18:00:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:00:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:00:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:00:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:00:23.409786  543705 memory.go:184] no items to output this cycle
I0321 18:00:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 18:00:30.693673  543705 disk_info.go:125] begin check local disk info of client
I0321 18:00:30.696232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:00:30.696238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c56c0 0xc0000c5700]
E0321 18:00:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:00:33.409778  543705 memory.go:184] no items to output this cycle
I0321 18:00:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 18:00:39.086030  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:00:39.086037  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:00:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:00:43.410671  543705 memory.go:191] Add success.
I0321 18:00:43.409824  543705 cpu.go:282] Add success.
I0321 18:00:43.420377  543705 net.go:648] Add success.
I0321 18:00:43.423437  543705 net.go:770] primary dev: ETH0
I0321 18:00:43.423449  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:00:43.423462  543705 net.go:698] Add success.
I0321 18:00:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:00:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:00:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:00:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:00:53.409775  543705 memory.go:184] no items to output this cycle
I0321 18:00:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:01:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:01:03.409788  543705 memory.go:184] no items to output this cycle
I0321 18:01:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 18:01:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:01:13.409839  543705 memory.go:191] Add success.
I0321 18:01:13.409840  543705 cpu.go:282] Add success.
W0321 18:01:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:01:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:01:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:01:13.420336  543705 net.go:648] Add success.
I0321 18:01:13.423284  543705 net.go:770] primary dev: ETH0
I0321 18:01:13.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:01:13.423308  543705 net.go:698] Add success.
I0321 18:01:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:01:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:01:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 18:01:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:01:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 18:01:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:01:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:01:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:01:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:01:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:01:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:01:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:01:23.409781  543705 memory.go:184] no items to output this cycle
I0321 18:01:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 18:01:30.697124  543705 disk_info.go:125] begin check local disk info of client
I0321 18:01:30.699594  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:01:30.699600  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4fc0 0xc0000c5000]
E0321 18:01:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:01:33.409802  543705 memory.go:184] no items to output this cycle
I0321 18:01:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 18:01:43.409944  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:01:43.409976  543705 memory.go:191] Add success.
I0321 18:01:43.409988  543705 cpu.go:282] Add success.
I0321 18:01:43.420133  543705 net.go:648] Add success.
I0321 18:01:43.422945  543705 net.go:770] primary dev: ETH0
I0321 18:01:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:01:43.422971  543705 net.go:698] Add success.
I0321 18:01:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:01:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:01:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:01:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:01:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 18:02:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:02:03.409804  543705 memory.go:184] no items to output this cycle
I0321 18:02:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 18:02:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:02:13.409796  543705 memory.go:191] Add success.
I0321 18:02:13.409798  543705 cpu.go:282] Add success.
W0321 18:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:02:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:02:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:02:13.420355  543705 net.go:648] Add success.
I0321 18:02:13.422965  543705 net.go:770] primary dev: ETH0
I0321 18:02:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:02:13.422994  543705 net.go:698] Add success.
W0321 18:02:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:02:14.455734  543705 disk_worker.go:708] disk space is not compliant
W0321 18:02:14.455738  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:02:14.457900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:02:14.457920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:02:14.457924  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:02:14.458957  543705 disk_worker.go:494] system disk:vda1
I0321 18:02:14.458988  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:02:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:02:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:02:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:02:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:02:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:02:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:02:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:02:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:02:23.409795  543705 memory.go:184] no items to output this cycle
I0321 18:02:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 18:02:30.700136  543705 disk_info.go:125] begin check local disk info of client
I0321 18:02:30.702669  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:02:30.702675  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5400 0xc0000c5440]
E0321 18:02:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:02:33.409767  543705 memory.go:184] no items to output this cycle
I0321 18:02:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 18:02:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:02:43.409820  543705 memory.go:191] Add success.
I0321 18:02:43.409829  543705 cpu.go:282] Add success.
I0321 18:02:43.420000  543705 net.go:648] Add success.
I0321 18:02:43.422510  543705 net.go:770] primary dev: ETH0
I0321 18:02:43.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:02:43.422536  543705 net.go:698] Add success.
I0321 18:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:02:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:02:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:02:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:02:53.409777  543705 memory.go:184] no items to output this cycle
I0321 18:02:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 18:03:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:03:03.409760  543705 memory.go:184] no items to output this cycle
I0321 18:03:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 18:03:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:03:13.409806  543705 memory.go:191] Add success.
I0321 18:03:13.409822  543705 cpu.go:282] Add success.
W0321 18:03:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:03:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:03:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:03:13.420215  543705 net.go:648] Add success.
I0321 18:03:13.423033  543705 net.go:770] primary dev: ETH0
I0321 18:03:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:03:13.423063  543705 net.go:698] Add success.
I0321 18:03:13.514959  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bfe784ef-c17e-4630-bc0c-1d6789fb1d8c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:03:13.515001  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:03:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:03:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:03:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0321 18:03:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:03:14.457137  543705 disk_worker.go:494] system disk:vda1
I0321 18:03:14.457168  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:03:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:03:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:03:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:03:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:03:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:03:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:03:23.409804  543705 memory.go:184] no items to output this cycle
I0321 18:03:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 18:03:30.704160  543705 disk_info.go:125] begin check local disk info of client
I0321 18:03:30.706697  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:03:30.706703  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b80 0xc0000c4bc0]
E0321 18:03:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:03:33.409776  543705 cpu.go:275] no items to output this cycle
I0321 18:03:33.409786  543705 memory.go:184] no items to output this cycle
I0321 18:03:39.089128  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:03:39.089134  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:03:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:03:43.410637  543705 memory.go:191] Add success.
I0321 18:03:43.409819  543705 cpu.go:282] Add success.
I0321 18:03:43.420357  543705 net.go:648] Add success.
I0321 18:03:43.422858  543705 net.go:770] primary dev: ETH0
I0321 18:03:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:03:43.422888  543705 net.go:698] Add success.
I0321 18:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:03:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:03:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:03:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:03:53.409802  543705 memory.go:184] no items to output this cycle
I0321 18:03:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 18:04:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:04:03.409776  543705 memory.go:184] no items to output this cycle
I0321 18:04:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 18:04:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:04:13.409800  543705 memory.go:191] Add success.
I0321 18:04:13.409819  543705 cpu.go:282] Add success.
W0321 18:04:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:04:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:04:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:04:13.420144  543705 net.go:648] Add success.
I0321 18:04:13.423200  543705 net.go:770] primary dev: ETH0
I0321 18:04:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:04:13.423226  543705 net.go:698] Add success.
I0321 18:04:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:04:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:04:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 18:04:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:04:14.456522  543705 disk_worker.go:494] system disk:vda1
I0321 18:04:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:04:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:04:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:04:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:04:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:04:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:04:23.409779  543705 memory.go:184] no items to output this cycle
I0321 18:04:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 18:04:30.708169  543705 disk_info.go:125] begin check local disk info of client
I0321 18:04:30.710670  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:04:30.710686  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5780 0xc0000c57c0]
E0321 18:04:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:04:33.409793  543705 cpu.go:275] no items to output this cycle
I0321 18:04:33.409800  543705 memory.go:184] no items to output this cycle
E0321 18:04:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:04:43.409809  543705 cpu.go:282] Add success.
I0321 18:04:43.409811  543705 memory.go:191] Add success.
I0321 18:04:43.419877  543705 net.go:648] Add success.
I0321 18:04:43.422834  543705 net.go:770] primary dev: ETH0
I0321 18:04:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:04:43.422864  543705 net.go:698] Add success.
I0321 18:04:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:04:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:04:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:04:53.409803  543705 memory.go:184] no items to output this cycle
I0321 18:04:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 18:05:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:05:03.409783  543705 memory.go:184] no items to output this cycle
I0321 18:05:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 18:05:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:05:13.409834  543705 memory.go:191] Add success.
I0321 18:05:13.409838  543705 cpu.go:282] Add success.
W0321 18:05:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:05:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:05:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:05:13.420215  543705 net.go:648] Add success.
I0321 18:05:13.423310  543705 net.go:770] primary dev: ETH0
I0321 18:05:13.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:05:13.423336  543705 net.go:698] Add success.
I0321 18:05:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:05:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:05:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 18:05:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:05:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 18:05:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:05:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:05:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:05:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:05:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:05:23.409799  543705 memory.go:184] no items to output this cycle
I0321 18:05:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 18:05:30.710780  543705 disk_info.go:125] begin check local disk info of client
I0321 18:05:30.713281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:05:30.713287  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b800 0xc00007b840]
E0321 18:05:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:05:33.409809  543705 memory.go:184] no items to output this cycle
I0321 18:05:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 18:05:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:05:43.409802  543705 memory.go:191] Add success.
I0321 18:05:43.409803  543705 cpu.go:282] Add success.
I0321 18:05:43.419970  543705 net.go:648] Add success.
I0321 18:05:43.422583  543705 net.go:770] primary dev: ETH0
I0321 18:05:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:05:43.422610  543705 net.go:698] Add success.
I0321 18:05:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:05:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:05:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:05:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:05:53.409789  543705 memory.go:184] no items to output this cycle
I0321 18:05:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 18:06:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:06:03.409781  543705 cpu.go:275] no items to output this cycle
I0321 18:06:03.409791  543705 memory.go:184] no items to output this cycle
E0321 18:06:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:06:13.409831  543705 memory.go:191] Add success.
I0321 18:06:13.409839  543705 cpu.go:282] Add success.
W0321 18:06:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:06:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:06:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:06:13.420425  543705 net.go:648] Add success.
I0321 18:06:13.423053  543705 net.go:770] primary dev: ETH0
I0321 18:06:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:06:13.423078  543705 net.go:698] Add success.
I0321 18:06:13.478558  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"04889c23-e160-4100-a4cd-40f8e8bd477d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:06:13.478592  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:06:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:06:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:06:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 18:06:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:06:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 18:06:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:06:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:06:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:06:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:06:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:06:23.409775  543705 memory.go:184] no items to output this cycle
I0321 18:06:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 18:06:30.713677  543705 disk_info.go:125] begin check local disk info of client
I0321 18:06:30.716169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:06:30.716175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9cc0 0xc0004a9d00]
E0321 18:06:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:06:33.409791  543705 memory.go:184] no items to output this cycle
I0321 18:06:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 18:06:39.089732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:06:39.089739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:06:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:06:43.410725  543705 memory.go:191] Add success.
I0321 18:06:43.409796  543705 cpu.go:282] Add success.
I0321 18:06:43.420430  543705 net.go:648] Add success.
I0321 18:06:43.423062  543705 net.go:770] primary dev: ETH0
I0321 18:06:43.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:06:43.423088  543705 net.go:698] Add success.
I0321 18:06:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:06:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:06:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:06:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 18:07:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:07:03.409798  543705 memory.go:184] no items to output this cycle
I0321 18:07:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 18:07:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:07:13.409787  543705 memory.go:191] Add success.
W0321 18:07:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:07:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:07:13.409824  543705 cpu.go:282] Add success.
I0321 18:07:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:07:13.420133  543705 net.go:648] Add success.
I0321 18:07:13.423237  543705 net.go:770] primary dev: ETH0
I0321 18:07:13.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:07:13.423262  543705 net.go:698] Add success.
I0321 18:07:13.452793  543705 event_worker.go:152] Polling the log file for events...
W0321 18:07:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:07:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 18:07:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:07:14.456796  543705 disk_worker.go:494] system disk:vda1
I0321 18:07:14.456836  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:07:14.457162  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:07:14.457170  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:07:14.457175  543705 custom_config.go:64] query custom config with name: gpu
E0321 18:07:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:07:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:07:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:07:16.457989  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:07:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:07:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:07:16.472407  543705 disk_local_worker.go:436] Get disk info: []
I0321 18:07:23.409905  543705 cpu.go:275] no items to output this cycle
E0321 18:07:23.409969  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:07:23.409989  543705 memory.go:184] no items to output this cycle
I0321 18:07:30.717226  543705 disk_info.go:125] begin check local disk info of client
I0321 18:07:30.719729  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:07:30.719735  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9a00 0xc0003c9a40]
E0321 18:07:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:07:33.409797  543705 memory.go:184] no items to output this cycle
I0321 18:07:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:07:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:07:43.409822  543705 memory.go:191] Add success.
I0321 18:07:43.409825  543705 cpu.go:282] Add success.
I0321 18:07:43.420072  543705 net.go:648] Add success.
I0321 18:07:43.423447  543705 net.go:770] primary dev: ETH0
I0321 18:07:43.423462  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:07:43.423476  543705 net.go:698] Add success.
I0321 18:07:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:07:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:07:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:07:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:07:53.409794  543705 memory.go:184] no items to output this cycle
I0321 18:07:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 18:08:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:08:03.409797  543705 memory.go:184] no items to output this cycle
I0321 18:08:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 18:08:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:08:13.409799  543705 memory.go:191] Add success.
I0321 18:08:13.409814  543705 cpu.go:282] Add success.
W0321 18:08:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:08:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:08:13.420104  543705 net.go:648] Add success.
I0321 18:08:13.422624  543705 net.go:770] primary dev: ETH0
I0321 18:08:13.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:08:13.422650  543705 net.go:698] Add success.
I0321 18:08:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:08:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:08:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 18:08:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:08:14.456544  543705 disk_worker.go:494] system disk:vda1
I0321 18:08:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:08:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:08:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:08:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:08:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:08:16.472476  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:08:23.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:08:23.409882  543705 cpu.go:275] no items to output this cycle
I0321 18:08:23.409891  543705 memory.go:184] no items to output this cycle
I0321 18:08:30.721240  543705 disk_info.go:125] begin check local disk info of client
I0321 18:08:30.723811  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:08:30.723818  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0321 18:08:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:08:33.409798  543705 memory.go:184] no items to output this cycle
I0321 18:08:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 18:08:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:08:43.409818  543705 memory.go:191] Add success.
I0321 18:08:43.409824  543705 cpu.go:282] Add success.
I0321 18:08:43.419955  543705 net.go:648] Add success.
I0321 18:08:43.423184  543705 net.go:770] primary dev: ETH0
I0321 18:08:43.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:08:43.423214  543705 net.go:698] Add success.
I0321 18:08:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:08:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:08:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:08:53.410401  543705 cpu.go:275] no items to output this cycle
I0321 18:08:53.410403  543705 memory.go:184] no items to output this cycle
E0321 18:09:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:09:03.409763  543705 memory.go:184] no items to output this cycle
I0321 18:09:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 18:09:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:09:13.409803  543705 memory.go:191] Add success.
I0321 18:09:13.409821  543705 cpu.go:282] Add success.
W0321 18:09:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:09:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:09:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:09:13.420197  543705 net.go:648] Add success.
I0321 18:09:13.422691  543705 net.go:770] primary dev: ETH0
I0321 18:09:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:09:13.422723  543705 net.go:698] Add success.
I0321 18:09:13.467965  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b8026b9-d34e-4b9e-80af-ed0021153813","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:09:13.468001  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:09:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:09:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:09:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 18:09:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:09:14.456728  543705 disk_worker.go:494] system disk:vda1
I0321 18:09:14.456759  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:09:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:09:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:09:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:09:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:09:23.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:09:23.409888  543705 memory.go:184] no items to output this cycle
I0321 18:09:23.409927  543705 cpu.go:275] no items to output this cycle
I0321 18:09:30.725259  543705 disk_info.go:125] begin check local disk info of client
I0321 18:09:30.727769  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:09:30.727775  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e240 0xc00037e280]
E0321 18:09:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:09:33.409793  543705 memory.go:184] no items to output this cycle
I0321 18:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 18:09:39.093169  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:09:39.093177  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:09:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:09:43.410843  543705 memory.go:191] Add success.
I0321 18:09:43.409799  543705 cpu.go:282] Add success.
I0321 18:09:43.420550  543705 net.go:648] Add success.
I0321 18:09:43.423748  543705 net.go:770] primary dev: ETH0
I0321 18:09:43.423761  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:09:43.423774  543705 net.go:698] Add success.
I0321 18:09:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:09:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:09:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:09:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:09:53.409781  543705 memory.go:184] no items to output this cycle
I0321 18:09:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 18:10:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:10:03.409782  543705 memory.go:184] no items to output this cycle
I0321 18:10:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 18:10:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:10:13.409828  543705 memory.go:191] Add success.
I0321 18:10:13.409840  543705 cpu.go:282] Add success.
W0321 18:10:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:10:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:10:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:10:13.420208  543705 net.go:648] Add success.
I0321 18:10:13.423259  543705 net.go:770] primary dev: ETH0
I0321 18:10:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:10:13.423284  543705 net.go:698] Add success.
I0321 18:10:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:10:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:10:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 18:10:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:10:14.456609  543705 disk_worker.go:494] system disk:vda1
I0321 18:10:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:10:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:10:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:10:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:10:23.409776  543705 memory.go:184] no items to output this cycle
I0321 18:10:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 18:10:30.729293  543705 disk_info.go:125] begin check local disk info of client
I0321 18:10:30.731904  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:10:30.731912  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ae1c0 0xc0004ae200]
E0321 18:10:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:10:33.409762  543705 memory.go:184] no items to output this cycle
I0321 18:10:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 18:10:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:10:43.409784  543705 memory.go:191] Add success.
I0321 18:10:43.409809  543705 cpu.go:282] Add success.
I0321 18:10:43.419873  543705 net.go:648] Add success.
I0321 18:10:43.422612  543705 net.go:770] primary dev: ETH0
I0321 18:10:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:10:43.422638  543705 net.go:698] Add success.
I0321 18:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:10:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:10:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:10:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:10:53.409770  543705 memory.go:184] no items to output this cycle
I0321 18:10:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 18:11:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:11:03.409770  543705 memory.go:184] no items to output this cycle
I0321 18:11:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 18:11:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:11:13.409806  543705 memory.go:191] Add success.
I0321 18:11:13.409831  543705 cpu.go:282] Add success.
W0321 18:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:11:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:11:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:11:13.420147  543705 net.go:648] Add success.
I0321 18:11:13.422944  543705 net.go:770] primary dev: ETH0
I0321 18:11:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:11:13.422970  543705 net.go:698] Add success.
I0321 18:11:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:11:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:11:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 18:11:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:11:14.456630  543705 disk_worker.go:494] system disk:vda1
I0321 18:11:14.456661  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:11:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:11:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:11:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:11:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:11:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:11:23.409771  543705 memory.go:184] no items to output this cycle
I0321 18:11:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 18:11:30.733320  543705 disk_info.go:125] begin check local disk info of client
I0321 18:11:30.735831  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:11:30.735838  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340000 0xc000340040]
E0321 18:11:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:11:33.409802  543705 memory.go:184] no items to output this cycle
I0321 18:11:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 18:11:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:11:43.409791  543705 memory.go:191] Add success.
I0321 18:11:43.409814  543705 cpu.go:282] Add success.
I0321 18:11:43.419972  543705 net.go:648] Add success.
I0321 18:11:43.422545  543705 net.go:770] primary dev: ETH0
I0321 18:11:43.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:11:43.422573  543705 net.go:698] Add success.
I0321 18:11:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:11:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:11:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:11:53.409782  543705 memory.go:184] no items to output this cycle
I0321 18:11:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 18:12:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:12:03.409795  543705 memory.go:184] no items to output this cycle
I0321 18:12:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 18:12:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:12:13.409793  543705 memory.go:191] Add success.
I0321 18:12:13.409815  543705 cpu.go:282] Add success.
W0321 18:12:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:12:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:12:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:12:13.420132  543705 net.go:648] Add success.
I0321 18:12:13.423464  543705 net.go:770] primary dev: ETH0
I0321 18:12:13.423480  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:12:13.423494  543705 net.go:698] Add success.
I0321 18:12:13.468842  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9df842ff-6f6d-4dd5-8798-3093b0b6f71e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:12:13.468877  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 18:12:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:12:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 18:12:14.455199  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:12:14.455944  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:12:14.455954  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:12:14.455959  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:12:14.456592  543705 disk_worker.go:494] system disk:vda1
I0321 18:12:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:12:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:12:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:12:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:12:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:12:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:12:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:12:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:12:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:12:23.409795  543705 memory.go:184] no items to output this cycle
I0321 18:12:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 18:12:30.737327  543705 disk_info.go:125] begin check local disk info of client
I0321 18:12:30.739824  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:12:30.739830  543705 disk_info.go:196] parse disk info done, disk is : [0xc000341500 0xc000341540]
E0321 18:12:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:12:33.409775  543705 memory.go:184] no items to output this cycle
I0321 18:12:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 18:12:39.093734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:12:39.093741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:12:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:12:43.410554  543705 memory.go:191] Add success.
I0321 18:12:43.409789  543705 cpu.go:282] Add success.
I0321 18:12:43.420318  543705 net.go:648] Add success.
I0321 18:12:43.422792  543705 net.go:770] primary dev: ETH0
I0321 18:12:43.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:12:43.422818  543705 net.go:698] Add success.
I0321 18:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:12:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:12:53.409784  543705 memory.go:184] no items to output this cycle
I0321 18:12:53.409824  543705 cpu.go:275] no items to output this cycle
E0321 18:13:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:13:03.409784  543705 memory.go:184] no items to output this cycle
I0321 18:13:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 18:13:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:13:13.409808  543705 cpu.go:282] Add success.
I0321 18:13:13.409811  543705 memory.go:191] Add success.
W0321 18:13:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:13:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:13:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:13:13.420132  543705 net.go:648] Add success.
I0321 18:13:13.422764  543705 net.go:770] primary dev: ETH0
I0321 18:13:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:13:13.422790  543705 net.go:698] Add success.
I0321 18:13:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:13:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:13:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 18:13:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:13:14.456612  543705 disk_worker.go:494] system disk:vda1
I0321 18:13:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:13:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:13:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:13:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:13:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:13:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:13:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:13:23.409781  543705 memory.go:184] no items to output this cycle
I0321 18:13:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 18:13:30.741347  543705 disk_info.go:125] begin check local disk info of client
I0321 18:13:30.743962  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:13:30.743969  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab200 0xc0001ab240]
E0321 18:13:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:13:33.409781  543705 memory.go:184] no items to output this cycle
I0321 18:13:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 18:13:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:13:43.409777  543705 memory.go:191] Add success.
I0321 18:13:43.409809  543705 cpu.go:282] Add success.
I0321 18:13:43.419888  543705 net.go:648] Add success.
I0321 18:13:43.422471  543705 net.go:770] primary dev: ETH0
I0321 18:13:43.422484  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:13:43.422506  543705 net.go:698] Add success.
I0321 18:13:46.457890  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:13:46.457954  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:13:46.457977  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:13:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:13:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:13:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 18:14:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:14:03.409780  543705 memory.go:184] no items to output this cycle
I0321 18:14:03.409780  543705 cpu.go:275] no items to output this cycle
W0321 18:14:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:14:13.409744  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:14:13.409748  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 18:14:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:14:13.409837  543705 cpu.go:282] Add success.
I0321 18:14:13.409844  543705 memory.go:191] Add success.
I0321 18:14:13.420149  543705 net.go:648] Add success.
I0321 18:14:13.422936  543705 net.go:770] primary dev: ETH0
I0321 18:14:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:14:13.422964  543705 net.go:698] Add success.
I0321 18:14:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:14:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:14:14.455323  543705 disk_worker.go:708] disk space is not compliant
W0321 18:14:14.455329  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:14:14.457041  543705 disk_worker.go:494] system disk:vda1
I0321 18:14:14.457070  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:14:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:14:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:14:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:14:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:14:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:14:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:14:23.409815  543705 memory.go:184] no items to output this cycle
I0321 18:14:23.409829  543705 cpu.go:275] no items to output this cycle
I0321 18:14:30.744054  543705 disk_info.go:125] begin check local disk info of client
I0321 18:14:30.746573  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:14:30.746579  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377740 0xc000377780]
E0321 18:14:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:14:33.409803  543705 memory.go:184] no items to output this cycle
I0321 18:14:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 18:14:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:14:43.409786  543705 memory.go:191] Add success.
I0321 18:14:43.409788  543705 cpu.go:282] Add success.
I0321 18:14:43.419855  543705 net.go:648] Add success.
I0321 18:14:43.422702  543705 net.go:770] primary dev: ETH0
I0321 18:14:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:14:43.422729  543705 net.go:698] Add success.
I0321 18:14:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:14:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:14:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:14:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:14:53.409769  543705 memory.go:184] no items to output this cycle
I0321 18:14:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 18:15:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:15:03.409786  543705 memory.go:184] no items to output this cycle
I0321 18:15:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 18:15:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:15:13.409827  543705 memory.go:191] Add success.
I0321 18:15:13.409830  543705 cpu.go:282] Add success.
W0321 18:15:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:15:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:15:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:15:13.420176  543705 net.go:648] Add success.
I0321 18:15:13.422652  543705 net.go:770] primary dev: ETH0
I0321 18:15:13.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:15:13.422679  543705 net.go:698] Add success.
I0321 18:15:13.467907  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c37b9ae-b68e-4389-ac15-9ba22b3ca50a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:15:13.467939  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:15:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:15:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 18:15:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:15:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 18:15:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:15:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:15:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:15:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:15:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:15:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:15:23.409773  543705 cpu.go:275] no items to output this cycle
I0321 18:15:23.409780  543705 memory.go:184] no items to output this cycle
I0321 18:15:30.747382  543705 disk_info.go:125] begin check local disk info of client
I0321 18:15:30.749853  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:15:30.749860  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be300 0xc0002be340]
E0321 18:15:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:15:33.409797  543705 memory.go:184] no items to output this cycle
I0321 18:15:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 18:15:39.097180  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:15:39.097186  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:15:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:15:43.410604  543705 memory.go:191] Add success.
I0321 18:15:43.409809  543705 cpu.go:282] Add success.
I0321 18:15:43.420407  543705 net.go:648] Add success.
I0321 18:15:43.423055  543705 net.go:770] primary dev: ETH0
I0321 18:15:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:15:43.423086  543705 net.go:698] Add success.
I0321 18:15:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:15:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:15:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:15:53.409791  543705 memory.go:184] no items to output this cycle
I0321 18:15:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 18:16:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:16:03.409779  543705 memory.go:184] no items to output this cycle
I0321 18:16:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 18:16:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:16:13.409932  543705 memory.go:191] Add success.
I0321 18:16:13.409959  543705 cpu.go:282] Add success.
W0321 18:16:13.409970  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:16:13.409983  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:16:13.409986  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:16:13.419732  543705 net.go:648] Add success.
I0321 18:16:13.422703  543705 net.go:770] primary dev: ETH0
I0321 18:16:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:16:13.422746  543705 net.go:698] Add success.
I0321 18:16:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:16:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:16:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 18:16:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:16:14.456621  543705 disk_worker.go:494] system disk:vda1
I0321 18:16:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:16:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:16:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:16:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:16:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:16:23.409812  543705 memory.go:184] no items to output this cycle
I0321 18:16:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 18:16:30.751387  543705 disk_info.go:125] begin check local disk info of client
I0321 18:16:30.753920  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:16:30.753928  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464280 0xc0004642c0]
E0321 18:16:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:16:33.409783  543705 memory.go:184] no items to output this cycle
I0321 18:16:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 18:16:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:16:43.409797  543705 memory.go:191] Add success.
I0321 18:16:43.409801  543705 cpu.go:282] Add success.
I0321 18:16:43.419873  543705 net.go:648] Add success.
I0321 18:16:43.422623  543705 net.go:770] primary dev: ETH0
I0321 18:16:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:16:43.422648  543705 net.go:698] Add success.
I0321 18:16:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:16:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:16:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:16:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:16:53.409789  543705 memory.go:184] no items to output this cycle
I0321 18:16:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 18:17:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:17:03.409764  543705 memory.go:184] no items to output this cycle
I0321 18:17:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 18:17:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:17:13.409820  543705 memory.go:191] Add success.
I0321 18:17:13.409829  543705 cpu.go:282] Add success.
W0321 18:17:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:17:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:17:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:17:13.420318  543705 net.go:648] Add success.
I0321 18:17:13.422951  543705 net.go:770] primary dev: ETH0
I0321 18:17:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:17:13.422980  543705 net.go:698] Add success.
I0321 18:17:13.452772  543705 event_worker.go:152] Polling the log file for events...
W0321 18:17:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:17:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 18:17:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:17:14.455929  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:17:14.455938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:17:14.455945  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:17:14.456530  543705 disk_worker.go:494] system disk:vda1
I0321 18:17:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:17:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:17:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:17:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:17:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:17:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:17:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:17:16.472338  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:17:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:17:23.409801  543705 memory.go:184] no items to output this cycle
I0321 18:17:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 18:17:30.755414  543705 disk_info.go:125] begin check local disk info of client
I0321 18:17:30.757912  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:17:30.757919  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5000 0xc0000c5040]
E0321 18:17:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:17:33.409772  543705 memory.go:184] no items to output this cycle
I0321 18:17:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 18:17:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:17:43.409808  543705 memory.go:191] Add success.
I0321 18:17:43.409820  543705 cpu.go:282] Add success.
I0321 18:17:43.419900  543705 net.go:648] Add success.
I0321 18:17:43.422506  543705 net.go:770] primary dev: ETH0
I0321 18:17:43.422518  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:17:43.422532  543705 net.go:698] Add success.
I0321 18:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:17:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:17:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:17:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 18:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:18:03.409775  543705 memory.go:184] no items to output this cycle
I0321 18:18:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 18:18:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:18:13.409821  543705 memory.go:191] Add success.
W0321 18:18:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:18:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:18:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:18:13.410062  543705 cpu.go:282] Add success.
I0321 18:18:13.419723  543705 net.go:648] Add success.
I0321 18:18:13.420565  543705 net.go:770] primary dev: ETH0
I0321 18:18:13.420577  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:18:13.420589  543705 net.go:698] Add success.
I0321 18:18:13.468307  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6698afb7-86d7-4f74-81a4-55b5d1a913d1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:18:13.468338  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:18:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:18:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:18:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 18:18:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:18:14.456769  543705 disk_worker.go:494] system disk:vda1
I0321 18:18:14.456814  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:18:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:18:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:18:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:18:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:18:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:18:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:18:23.409775  543705 memory.go:184] no items to output this cycle
I0321 18:18:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 18:18:30.759439  543705 disk_info.go:125] begin check local disk info of client
I0321 18:18:30.762027  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:18:30.762033  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa100 0xc0001aa140]
E0321 18:18:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:18:33.409768  543705 memory.go:184] no items to output this cycle
I0321 18:18:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 18:18:39.097730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:18:39.097737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:18:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:18:43.410534  543705 memory.go:191] Add success.
I0321 18:18:43.409790  543705 cpu.go:282] Add success.
I0321 18:18:43.420225  543705 net.go:648] Add success.
I0321 18:18:43.422818  543705 net.go:770] primary dev: ETH0
I0321 18:18:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:18:43.422842  543705 net.go:698] Add success.
I0321 18:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:18:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:18:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:18:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:18:53.409793  543705 memory.go:184] no items to output this cycle
I0321 18:18:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 18:19:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:19:03.409778  543705 memory.go:184] no items to output this cycle
I0321 18:19:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 18:19:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:19:13.409836  543705 memory.go:191] Add success.
I0321 18:19:13.409844  543705 cpu.go:282] Add success.
W0321 18:19:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:19:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:19:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:19:13.420776  543705 net.go:648] Add success.
I0321 18:19:13.423291  543705 net.go:770] primary dev: ETH0
I0321 18:19:13.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:19:13.423316  543705 net.go:698] Add success.
I0321 18:19:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:19:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:19:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 18:19:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:19:14.456639  543705 disk_worker.go:494] system disk:vda1
I0321 18:19:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:19:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:19:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:19:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:19:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:19:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:19:23.409780  543705 memory.go:184] no items to output this cycle
I0321 18:19:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 18:19:30.763459  543705 disk_info.go:125] begin check local disk info of client
I0321 18:19:30.765961  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:19:30.765968  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa4c0 0xc0001aa500]
E0321 18:19:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:19:33.409796  543705 memory.go:184] no items to output this cycle
I0321 18:19:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 18:19:43.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:19:43.409771  543705 memory.go:191] Add success.
I0321 18:19:43.409804  543705 cpu.go:282] Add success.
I0321 18:19:43.419993  543705 net.go:648] Add success.
I0321 18:19:43.422695  543705 net.go:770] primary dev: ETH0
I0321 18:19:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:19:43.422723  543705 net.go:698] Add success.
I0321 18:19:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:19:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:19:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:19:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:19:53.409778  543705 memory.go:184] no items to output this cycle
I0321 18:19:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 18:20:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:20:03.409763  543705 memory.go:184] no items to output this cycle
I0321 18:20:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 18:20:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:20:13.409832  543705 memory.go:191] Add success.
I0321 18:20:13.409837  543705 cpu.go:282] Add success.
W0321 18:20:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:20:13.412622  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:20:13.412628  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:20:13.420468  543705 net.go:648] Add success.
I0321 18:20:13.422120  543705 net.go:770] primary dev: ETH0
I0321 18:20:13.422135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:20:13.422148  543705 net.go:698] Add success.
I0321 18:20:14.453959  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:20:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:20:14.455242  543705 disk_worker.go:708] disk space is not compliant
W0321 18:20:14.455245  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:20:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 18:20:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:20:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:20:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:20:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:20:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:20:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:20:23.409782  543705 memory.go:184] no items to output this cycle
I0321 18:20:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 18:20:30.767475  543705 disk_info.go:125] begin check local disk info of client
I0321 18:20:30.769984  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:20:30.769990  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b740 0xc00007b780]
E0321 18:20:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:20:33.409792  543705 memory.go:184] no items to output this cycle
I0321 18:20:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:20:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:20:43.409812  543705 memory.go:191] Add success.
I0321 18:20:43.409815  543705 cpu.go:282] Add success.
I0321 18:20:43.419965  543705 net.go:648] Add success.
I0321 18:20:43.422989  543705 net.go:770] primary dev: ETH0
I0321 18:20:43.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:20:43.423014  543705 net.go:698] Add success.
I0321 18:20:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:20:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:20:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:20:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:20:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 18:21:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:21:03.409767  543705 memory.go:184] no items to output this cycle
I0321 18:21:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 18:21:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:21:13.409817  543705 cpu.go:282] Add success.
I0321 18:21:13.409830  543705 memory.go:191] Add success.
W0321 18:21:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:21:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:21:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:21:13.420570  543705 net.go:648] Add success.
I0321 18:21:13.423623  543705 net.go:770] primary dev: ETH0
I0321 18:21:13.423640  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:21:13.423652  543705 net.go:698] Add success.
I0321 18:21:13.463076  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2eb2946a-874e-4baf-9807-18a82195c37c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:21:13.463110  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:21:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:21:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:21:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 18:21:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:21:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 18:21:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:21:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:21:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:21:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:21:23.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:21:23.410257  543705 memory.go:184] no items to output this cycle
I0321 18:21:23.410266  543705 cpu.go:275] no items to output this cycle
I0321 18:21:30.771497  543705 disk_info.go:125] begin check local disk info of client
I0321 18:21:30.774053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:21:30.774060  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a7c0 0xc00007a940]
E0321 18:21:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:21:33.409775  543705 memory.go:184] no items to output this cycle
I0321 18:21:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 18:21:39.101193  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:21:39.101200  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:21:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:21:43.410648  543705 memory.go:191] Add success.
I0321 18:21:43.409821  543705 cpu.go:282] Add success.
I0321 18:21:43.420489  543705 net.go:648] Add success.
I0321 18:21:43.423309  543705 net.go:770] primary dev: ETH0
I0321 18:21:43.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:21:43.423337  543705 net.go:698] Add success.
I0321 18:21:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:21:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:21:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:21:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:21:53.409804  543705 memory.go:184] no items to output this cycle
I0321 18:21:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 18:22:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:22:03.409769  543705 memory.go:184] no items to output this cycle
I0321 18:22:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 18:22:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:22:13.409827  543705 memory.go:191] Add success.
I0321 18:22:13.409832  543705 cpu.go:282] Add success.
W0321 18:22:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:22:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:22:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:22:13.419874  543705 net.go:648] Add success.
I0321 18:22:13.422583  543705 net.go:770] primary dev: ETH0
I0321 18:22:13.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:22:13.422620  543705 net.go:698] Add success.
W0321 18:22:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:22:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 18:22:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:22:14.456835  543705 disk_worker.go:494] system disk:vda1
I0321 18:22:14.456879  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:22:14.457293  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:22:14.457302  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:22:14.457307  543705 custom_config.go:64] query custom config with name: gpu
E0321 18:22:15.456878  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:22:15.456887  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 18:22:16.457987  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:22:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:22:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:22:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:22:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:22:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:22:23.409767  543705 memory.go:184] no items to output this cycle
I0321 18:22:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 18:22:30.775511  543705 disk_info.go:125] begin check local disk info of client
I0321 18:22:30.778050  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:22:30.778057  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0280 0xc0003b02c0]
E0321 18:22:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:22:33.409797  543705 memory.go:184] no items to output this cycle
I0321 18:22:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 18:22:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:22:43.409794  543705 memory.go:191] Add success.
I0321 18:22:43.409794  543705 cpu.go:282] Add success.
I0321 18:22:43.419879  543705 net.go:648] Add success.
I0321 18:22:43.422547  543705 net.go:770] primary dev: ETH0
I0321 18:22:43.422562  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:22:43.422575  543705 net.go:698] Add success.
I0321 18:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:22:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:22:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:22:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:22:53.409815  543705 memory.go:184] no items to output this cycle
I0321 18:22:53.409827  543705 cpu.go:275] no items to output this cycle
E0321 18:23:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:23:03.409777  543705 memory.go:184] no items to output this cycle
I0321 18:23:03.409779  543705 cpu.go:275] no items to output this cycle
E0321 18:23:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:23:13.409790  543705 memory.go:191] Add success.
W0321 18:23:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:23:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:23:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:23:13.409855  543705 cpu.go:282] Add success.
I0321 18:23:13.420494  543705 net.go:648] Add success.
I0321 18:23:13.421465  543705 net.go:770] primary dev: ETH0
I0321 18:23:13.421478  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:23:13.421490  543705 net.go:698] Add success.
I0321 18:23:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:23:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:23:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 18:23:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:23:14.456651  543705 disk_worker.go:494] system disk:vda1
I0321 18:23:14.456683  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:23:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:23:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:23:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:23:23.409784  543705 memory.go:184] no items to output this cycle
I0321 18:23:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 18:23:30.779525  543705 disk_info.go:125] begin check local disk info of client
I0321 18:23:30.782005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:23:30.782011  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046d680 0xc00046d6c0]
E0321 18:23:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:23:33.409794  543705 memory.go:184] no items to output this cycle
I0321 18:23:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 18:23:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:23:43.409810  543705 memory.go:191] Add success.
I0321 18:23:43.409820  543705 cpu.go:282] Add success.
I0321 18:23:43.419889  543705 net.go:648] Add success.
I0321 18:23:43.422817  543705 net.go:770] primary dev: ETH0
I0321 18:23:43.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:23:43.422844  543705 net.go:698] Add success.
I0321 18:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:23:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:23:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:23:53.409776  543705 memory.go:184] no items to output this cycle
I0321 18:23:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 18:24:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:24:03.409778  543705 memory.go:184] no items to output this cycle
I0321 18:24:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 18:24:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:24:13.409812  543705 cpu.go:282] Add success.
I0321 18:24:13.409828  543705 memory.go:191] Add success.
W0321 18:24:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:24:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:24:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:24:13.420293  543705 net.go:648] Add success.
I0321 18:24:13.423347  543705 net.go:770] primary dev: ETH0
I0321 18:24:13.423361  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:24:13.423374  543705 net.go:698] Add success.
I0321 18:24:13.467731  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"017e0a86-16a5-461c-abe2-36e23a499627","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:24:13.467766  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:24:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:24:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:24:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 18:24:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:24:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 18:24:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:24:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:24:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:24:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:24:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:24:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:24:23.409809  543705 memory.go:184] no items to output this cycle
I0321 18:24:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 18:24:30.783553  543705 disk_info.go:125] begin check local disk info of client
I0321 18:24:30.786087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:24:30.786093  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d85c0 0xc0004d8600]
E0321 18:24:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:24:33.409799  543705 memory.go:184] no items to output this cycle
I0321 18:24:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 18:24:39.101727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:24:39.101733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:24:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:24:43.410702  543705 memory.go:191] Add success.
I0321 18:24:43.409818  543705 cpu.go:282] Add success.
I0321 18:24:43.420382  543705 net.go:648] Add success.
I0321 18:24:43.423326  543705 net.go:770] primary dev: ETH0
I0321 18:24:43.423341  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:24:43.423355  543705 net.go:698] Add success.
I0321 18:24:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:24:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:24:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:24:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:24:53.409779  543705 memory.go:184] no items to output this cycle
I0321 18:24:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 18:25:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:25:03.409783  543705 memory.go:184] no items to output this cycle
I0321 18:25:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 18:25:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:25:13.409809  543705 cpu.go:282] Add success.
I0321 18:25:13.409835  543705 memory.go:191] Add success.
W0321 18:25:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:25:13.409892  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:25:13.409897  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:25:13.420175  543705 net.go:648] Add success.
I0321 18:25:13.423294  543705 net.go:770] primary dev: ETH0
I0321 18:25:13.423308  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:25:13.423321  543705 net.go:698] Add success.
I0321 18:25:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:25:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:25:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 18:25:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:25:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 18:25:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:25:15.456004  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:25:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:25:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:25:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:25:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:25:23.409780  543705 memory.go:184] no items to output this cycle
I0321 18:25:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 18:25:30.787574  543705 disk_info.go:125] begin check local disk info of client
I0321 18:25:30.790136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:25:30.790142  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0c40 0xc0003b0c80]
E0321 18:25:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:25:33.409775  543705 memory.go:184] no items to output this cycle
I0321 18:25:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 18:25:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:25:43.409790  543705 memory.go:191] Add success.
I0321 18:25:43.409790  543705 cpu.go:282] Add success.
I0321 18:25:43.419948  543705 net.go:648] Add success.
I0321 18:25:43.422577  543705 net.go:770] primary dev: ETH0
I0321 18:25:43.422590  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:25:43.422602  543705 net.go:698] Add success.
I0321 18:25:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:25:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:25:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:25:53.409796  543705 memory.go:184] no items to output this cycle
I0321 18:25:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 18:26:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:26:03.409761  543705 memory.go:184] no items to output this cycle
I0321 18:26:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 18:26:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:26:13.409794  543705 memory.go:191] Add success.
I0321 18:26:13.409794  543705 cpu.go:282] Add success.
W0321 18:26:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:26:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:26:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:26:13.420141  543705 net.go:648] Add success.
I0321 18:26:13.423081  543705 net.go:770] primary dev: ETH0
I0321 18:26:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:26:13.423107  543705 net.go:698] Add success.
I0321 18:26:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:26:14.455224  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:26:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0321 18:26:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:26:14.456904  543705 disk_worker.go:494] system disk:vda1
I0321 18:26:14.456938  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:26:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:26:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:26:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:26:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:26:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:26:23.409769  543705 memory.go:184] no items to output this cycle
I0321 18:26:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 18:26:30.791593  543705 disk_info.go:125] begin check local disk info of client
I0321 18:26:30.794117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:26:30.794123  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c80 0xc0000c5cc0]
E0321 18:26:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:26:33.409789  543705 memory.go:184] no items to output this cycle
I0321 18:26:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 18:26:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:26:43.409816  543705 memory.go:191] Add success.
I0321 18:26:43.409818  543705 cpu.go:282] Add success.
I0321 18:26:43.419859  543705 net.go:648] Add success.
I0321 18:26:43.422410  543705 net.go:770] primary dev: ETH0
I0321 18:26:43.422424  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:26:43.422437  543705 net.go:698] Add success.
I0321 18:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:26:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:26:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:26:53.409794  543705 memory.go:184] no items to output this cycle
I0321 18:26:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:27:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:27:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 18:27:03.409783  543705 memory.go:184] no items to output this cycle
E0321 18:27:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:27:13.409801  543705 memory.go:191] Add success.
I0321 18:27:13.409804  543705 cpu.go:282] Add success.
W0321 18:27:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:27:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:27:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:27:13.420153  543705 net.go:648] Add success.
I0321 18:27:13.422693  543705 net.go:770] primary dev: ETH0
I0321 18:27:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:27:13.422722  543705 net.go:698] Add success.
I0321 18:27:13.429264  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 18:27:13.453440  543705 event_worker.go:152] Polling the log file for events...
I0321 18:27:13.463002  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"de46d294-10db-4c41-9281-4ccb97972c28","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:27:13.463034  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 18:27:14.455360  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:27:14.455488  543705 disk_worker.go:708] disk space is not compliant
W0321 18:27:14.455492  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:27:14.456464  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:27:14.456473  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:27:14.456478  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:27:14.457861  543705 disk_worker.go:494] system disk:vda1
I0321 18:27:14.457910  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:27:15.456892  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:27:15.456900  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:27:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:27:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:27:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:27:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:27:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:27:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:27:23.409777  543705 memory.go:184] no items to output this cycle
I0321 18:27:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 18:27:30.795621  543705 disk_info.go:125] begin check local disk info of client
I0321 18:27:30.798222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:27:30.798229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521200 0xc000521240]
E0321 18:27:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:27:33.409763  543705 memory.go:184] no items to output this cycle
I0321 18:27:33.409794  543705 cpu.go:275] no items to output this cycle
I0321 18:27:39.101866  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:27:39.101873  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:27:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:27:43.410657  543705 memory.go:191] Add success.
I0321 18:27:43.409808  543705 cpu.go:282] Add success.
I0321 18:27:43.420354  543705 net.go:648] Add success.
I0321 18:27:43.423009  543705 net.go:770] primary dev: ETH0
I0321 18:27:43.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:27:43.423035  543705 net.go:698] Add success.
I0321 18:27:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:27:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:27:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:27:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:27:53.409774  543705 memory.go:184] no items to output this cycle
I0321 18:27:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 18:28:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:28:03.409771  543705 memory.go:184] no items to output this cycle
I0321 18:28:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 18:28:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:28:13.409821  543705 memory.go:191] Add success.
I0321 18:28:13.409831  543705 cpu.go:282] Add success.
W0321 18:28:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:28:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:28:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:28:13.420195  543705 net.go:648] Add success.
I0321 18:28:13.423323  543705 net.go:770] primary dev: ETH0
I0321 18:28:13.423335  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:28:13.423348  543705 net.go:698] Add success.
I0321 18:28:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:28:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:28:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 18:28:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:28:14.456626  543705 disk_worker.go:494] system disk:vda1
I0321 18:28:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:28:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:28:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:28:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:28:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:28:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:28:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:28:23.409783  543705 memory.go:184] no items to output this cycle
I0321 18:28:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 18:28:30.799639  543705 disk_info.go:125] begin check local disk info of client
I0321 18:28:30.802214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:28:30.802221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521780 0xc0005217c0]
E0321 18:28:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:28:33.409806  543705 memory.go:184] no items to output this cycle
I0321 18:28:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 18:28:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:28:43.409821  543705 memory.go:191] Add success.
I0321 18:28:43.409832  543705 cpu.go:282] Add success.
I0321 18:28:43.419851  543705 net.go:648] Add success.
I0321 18:28:43.422790  543705 net.go:770] primary dev: ETH0
I0321 18:28:43.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:28:43.422815  543705 net.go:698] Add success.
I0321 18:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:28:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:28:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:28:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:28:53.409790  543705 memory.go:184] no items to output this cycle
I0321 18:28:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 18:29:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:29:03.409790  543705 memory.go:184] no items to output this cycle
I0321 18:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 18:29:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:29:13.409841  543705 memory.go:191] Add success.
I0321 18:29:13.409847  543705 cpu.go:282] Add success.
W0321 18:29:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:29:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:29:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:29:13.420150  543705 net.go:648] Add success.
I0321 18:29:13.422950  543705 net.go:770] primary dev: ETH0
I0321 18:29:13.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:29:13.422976  543705 net.go:698] Add success.
I0321 18:29:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:29:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:29:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 18:29:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:29:14.457109  543705 disk_worker.go:494] system disk:vda1
I0321 18:29:14.457143  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:29:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:29:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:29:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:29:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:29:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:29:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:29:23.409780  543705 memory.go:184] no items to output this cycle
I0321 18:29:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 18:29:30.803660  543705 disk_info.go:125] begin check local disk info of client
I0321 18:29:30.806218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:29:30.806224  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521e80 0xc000521ec0]
E0321 18:29:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:29:33.409802  543705 memory.go:184] no items to output this cycle
I0321 18:29:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 18:29:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:29:43.409821  543705 memory.go:191] Add success.
I0321 18:29:43.409829  543705 cpu.go:282] Add success.
I0321 18:29:43.419974  543705 net.go:648] Add success.
I0321 18:29:43.423043  543705 net.go:770] primary dev: ETH0
I0321 18:29:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:29:43.423073  543705 net.go:698] Add success.
I0321 18:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:29:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:29:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:29:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:29:53.409777  543705 memory.go:184] no items to output this cycle
I0321 18:29:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 18:30:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:30:03.409773  543705 memory.go:184] no items to output this cycle
I0321 18:30:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 18:30:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:30:13.409800  543705 memory.go:191] Add success.
I0321 18:30:13.409801  543705 cpu.go:282] Add success.
W0321 18:30:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:30:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:30:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:30:13.420151  543705 net.go:648] Add success.
I0321 18:30:13.422939  543705 net.go:770] primary dev: ETH0
I0321 18:30:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:30:13.422969  543705 net.go:698] Add success.
I0321 18:30:13.464232  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7cc5efb9-4b0e-434f-a94f-9c5421e6e893","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:30:13.464265  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:30:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:30:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:30:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 18:30:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:30:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 18:30:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:30:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:30:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:30:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:30:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:30:16.472474  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:30:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:30:23.409800  543705 memory.go:184] no items to output this cycle
I0321 18:30:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 18:30:30.807673  543705 disk_info.go:125] begin check local disk info of client
I0321 18:30:30.810445  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:30:30.810452  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9540 0xc0004d9580]
E0321 18:30:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:30:33.409766  543705 memory.go:184] no items to output this cycle
I0321 18:30:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 18:30:39.102012  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:30:39.102018  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:30:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:30:43.410617  543705 memory.go:191] Add success.
I0321 18:30:43.409800  543705 cpu.go:282] Add success.
I0321 18:30:43.420316  543705 net.go:648] Add success.
I0321 18:30:43.422970  543705 net.go:770] primary dev: ETH0
I0321 18:30:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:30:43.422995  543705 net.go:698] Add success.
I0321 18:30:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:30:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:30:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:30:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:30:53.409795  543705 memory.go:184] no items to output this cycle
I0321 18:30:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 18:31:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:31:03.409782  543705 memory.go:184] no items to output this cycle
I0321 18:31:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 18:31:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:31:13.409796  543705 memory.go:191] Add success.
I0321 18:31:13.409797  543705 cpu.go:282] Add success.
W0321 18:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:31:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:31:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:31:13.420235  543705 net.go:648] Add success.
I0321 18:31:13.422981  543705 net.go:770] primary dev: ETH0
I0321 18:31:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:31:13.423007  543705 net.go:698] Add success.
I0321 18:31:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:31:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:31:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0321 18:31:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:31:14.456879  543705 disk_worker.go:494] system disk:vda1
I0321 18:31:14.456921  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:31:15.455009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:31:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:31:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:31:16.472523  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:31:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:31:23.409773  543705 memory.go:184] no items to output this cycle
I0321 18:31:23.409775  543705 cpu.go:275] no items to output this cycle
I0321 18:31:30.811686  543705 disk_info.go:125] begin check local disk info of client
I0321 18:31:30.814274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:31:30.814281  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469a40 0xc000469a80]
E0321 18:31:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:31:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 18:31:33.409783  543705 memory.go:184] no items to output this cycle
E0321 18:31:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:31:43.409781  543705 memory.go:191] Add success.
I0321 18:31:43.409803  543705 cpu.go:282] Add success.
I0321 18:31:43.419850  543705 net.go:648] Add success.
I0321 18:31:43.422566  543705 net.go:770] primary dev: ETH0
I0321 18:31:43.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:31:43.422591  543705 net.go:698] Add success.
I0321 18:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:31:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:31:53.410329  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:31:53.410344  543705 memory.go:184] no items to output this cycle
I0321 18:31:53.410363  543705 cpu.go:275] no items to output this cycle
E0321 18:32:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:32:03.409796  543705 memory.go:184] no items to output this cycle
I0321 18:32:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 18:32:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:32:13.409791  543705 cpu.go:282] Add success.
I0321 18:32:13.409799  543705 memory.go:191] Add success.
W0321 18:32:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:32:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:32:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:32:13.420071  543705 net.go:648] Add success.
I0321 18:32:13.422893  543705 net.go:770] primary dev: ETH0
I0321 18:32:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:32:13.422917  543705 net.go:698] Add success.
W0321 18:32:14.455418  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:32:14.455438  543705 disk_worker.go:708] disk space is not compliant
W0321 18:32:14.455442  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:32:14.456378  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:32:14.456387  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:32:14.456393  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:32:14.457483  543705 disk_worker.go:494] system disk:vda1
I0321 18:32:14.457519  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:32:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:32:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:32:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:32:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:32:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:32:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:32:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:32:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:32:23.409794  543705 memory.go:184] no items to output this cycle
I0321 18:32:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 18:32:30.815704  543705 disk_info.go:125] begin check local disk info of client
I0321 18:32:30.818241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:32:30.818247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5180 0xc0000c51c0]
E0321 18:32:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:32:33.409756  543705 memory.go:184] no items to output this cycle
I0321 18:32:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 18:32:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:32:43.409781  543705 memory.go:191] Add success.
I0321 18:32:43.409798  543705 cpu.go:282] Add success.
I0321 18:32:43.419984  543705 net.go:648] Add success.
I0321 18:32:43.422566  543705 net.go:770] primary dev: ETH0
I0321 18:32:43.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:32:43.422590  543705 net.go:698] Add success.
I0321 18:32:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:32:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:32:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:32:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:32:53.409788  543705 memory.go:184] no items to output this cycle
I0321 18:32:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 18:33:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:33:03.409811  543705 memory.go:184] no items to output this cycle
I0321 18:33:03.409830  543705 cpu.go:275] no items to output this cycle
E0321 18:33:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:33:13.409804  543705 memory.go:191] Add success.
I0321 18:33:13.409803  543705 cpu.go:282] Add success.
W0321 18:33:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:33:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:33:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:33:13.420138  543705 net.go:648] Add success.
I0321 18:33:13.422845  543705 net.go:770] primary dev: ETH0
I0321 18:33:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:33:13.422871  543705 net.go:698] Add success.
I0321 18:33:13.514226  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d2b247e5-da08-4adc-9c80-755fdfa1d658","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:33:13.514286  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:33:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:33:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 18:33:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:33:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 18:33:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:33:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:33:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:33:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:33:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:33:16.472501  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:33:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:33:23.409820  543705 memory.go:184] no items to output this cycle
I0321 18:33:23.409830  543705 cpu.go:275] no items to output this cycle
I0321 18:33:30.819747  543705 disk_info.go:125] begin check local disk info of client
I0321 18:33:30.822360  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:33:30.822367  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0321 18:33:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:33:33.409807  543705 memory.go:184] no items to output this cycle
I0321 18:33:33.409820  543705 cpu.go:275] no items to output this cycle
I0321 18:33:39.102180  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:33:39.102188  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:33:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:33:43.410567  543705 memory.go:191] Add success.
I0321 18:33:43.409790  543705 cpu.go:282] Add success.
I0321 18:33:43.420281  543705 net.go:648] Add success.
I0321 18:33:43.422827  543705 net.go:770] primary dev: ETH0
I0321 18:33:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:33:43.422854  543705 net.go:698] Add success.
I0321 18:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:33:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:33:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:33:53.410264  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:33:53.410280  543705 memory.go:184] no items to output this cycle
I0321 18:33:53.410282  543705 cpu.go:275] no items to output this cycle
E0321 18:34:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:34:03.409799  543705 memory.go:184] no items to output this cycle
I0321 18:34:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 18:34:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:34:13.409794  543705 memory.go:191] Add success.
I0321 18:34:13.409817  543705 cpu.go:282] Add success.
W0321 18:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:34:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:34:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:34:13.420148  543705 net.go:648] Add success.
I0321 18:34:13.422719  543705 net.go:770] primary dev: ETH0
I0321 18:34:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:34:13.422746  543705 net.go:698] Add success.
I0321 18:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:34:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:34:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0321 18:34:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:34:14.456890  543705 disk_worker.go:494] system disk:vda1
I0321 18:34:14.456922  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:34:15.454998  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:34:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:34:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:34:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:34:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:34:23.409782  543705 memory.go:184] no items to output this cycle
I0321 18:34:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 18:34:30.823757  543705 disk_info.go:125] begin check local disk info of client
I0321 18:34:30.826313  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:34:30.826319  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487c00 0xc000487c40]
E0321 18:34:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:34:33.409769  543705 memory.go:184] no items to output this cycle
I0321 18:34:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 18:34:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:34:43.409804  543705 memory.go:191] Add success.
I0321 18:34:43.409813  543705 cpu.go:282] Add success.
I0321 18:34:43.419934  543705 net.go:648] Add success.
I0321 18:34:43.422551  543705 net.go:770] primary dev: ETH0
I0321 18:34:43.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:34:43.422584  543705 net.go:698] Add success.
I0321 18:34:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:34:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:34:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:34:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:34:53.409786  543705 memory.go:184] no items to output this cycle
I0321 18:34:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 18:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:35:03.409782  543705 memory.go:184] no items to output this cycle
I0321 18:35:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 18:35:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:35:13.409821  543705 memory.go:191] Add success.
I0321 18:35:13.409833  543705 cpu.go:282] Add success.
W0321 18:35:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:35:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:35:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:35:13.420135  543705 net.go:648] Add success.
I0321 18:35:13.422641  543705 net.go:770] primary dev: ETH0
I0321 18:35:13.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:35:13.422669  543705 net.go:698] Add success.
I0321 18:35:14.454087  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:35:14.454250  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:35:14.454325  543705 disk_worker.go:708] disk space is not compliant
W0321 18:35:14.454328  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:35:14.455744  543705 disk_worker.go:494] system disk:vda1
I0321 18:35:14.455784  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:35:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:35:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:35:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:35:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:35:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:35:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:35:23.409765  543705 memory.go:184] no items to output this cycle
I0321 18:35:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 18:35:30.827773  543705 disk_info.go:125] begin check local disk info of client
I0321 18:35:30.830282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:35:30.830288  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b0c0 0xc00007b100]
E0321 18:35:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:35:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 18:35:33.409785  543705 memory.go:184] no items to output this cycle
E0321 18:35:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:35:43.409806  543705 memory.go:191] Add success.
I0321 18:35:43.409814  543705 cpu.go:282] Add success.
I0321 18:35:43.419839  543705 net.go:648] Add success.
I0321 18:35:43.422606  543705 net.go:770] primary dev: ETH0
I0321 18:35:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:35:43.422632  543705 net.go:698] Add success.
I0321 18:35:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:35:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:35:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:35:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:35:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 18:35:53.409784  543705 memory.go:184] no items to output this cycle
E0321 18:36:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:36:03.409796  543705 memory.go:184] no items to output this cycle
I0321 18:36:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 18:36:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:36:13.409783  543705 memory.go:191] Add success.
W0321 18:36:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 18:36:13.409816  543705 cpu.go:282] Add success.
W0321 18:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:36:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:36:13.420155  543705 net.go:648] Add success.
I0321 18:36:13.422688  543705 net.go:770] primary dev: ETH0
I0321 18:36:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:36:13.422719  543705 net.go:698] Add success.
I0321 18:36:13.468103  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f733a17e-0349-403e-b9fa-8472a51bf3ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:36:13.468133  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:36:14.453939  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:36:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:36:14.455249  543705 disk_worker.go:708] disk space is not compliant
W0321 18:36:14.455252  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:36:14.456625  543705 disk_worker.go:494] system disk:vda1
I0321 18:36:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:36:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:36:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:36:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:36:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:36:23.409776  543705 memory.go:184] no items to output this cycle
I0321 18:36:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 18:36:30.831796  543705 disk_info.go:125] begin check local disk info of client
I0321 18:36:30.834365  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:36:30.834372  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4bc0 0xc0000c4c00]
E0321 18:36:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:36:33.409774  543705 memory.go:184] no items to output this cycle
I0321 18:36:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 18:36:39.105207  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:36:39.105213  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:36:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:36:43.410597  543705 memory.go:191] Add success.
I0321 18:36:43.409792  543705 cpu.go:282] Add success.
I0321 18:36:43.420309  543705 net.go:648] Add success.
I0321 18:36:43.422823  543705 net.go:770] primary dev: ETH0
I0321 18:36:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:36:43.422850  543705 net.go:698] Add success.
I0321 18:36:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:36:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:36:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:36:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:36:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:36:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 18:37:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:37:03.409773  543705 memory.go:184] no items to output this cycle
I0321 18:37:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 18:37:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:37:13.409803  543705 cpu.go:282] Add success.
I0321 18:37:13.409805  543705 memory.go:191] Add success.
W0321 18:37:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:37:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:37:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:37:13.420252  543705 net.go:648] Add success.
I0321 18:37:13.422994  543705 net.go:770] primary dev: ETH0
I0321 18:37:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:37:13.423020  543705 net.go:698] Add success.
I0321 18:37:13.453608  543705 event_worker.go:152] Polling the log file for events...
W0321 18:37:14.454214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:37:14.454227  543705 disk_worker.go:708] disk space is not compliant
W0321 18:37:14.454230  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:37:14.455608  543705 disk_worker.go:494] system disk:vda1
I0321 18:37:14.455641  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:37:14.456353  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:37:14.456361  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:37:14.456365  543705 custom_config.go:64] query custom config with name: gpu
E0321 18:37:15.456994  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:37:15.457008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:37:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:37:16.457993  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:37:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:37:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:37:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:37:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:37:23.409780  543705 memory.go:184] no items to output this cycle
I0321 18:37:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 18:37:30.835816  543705 disk_info.go:125] begin check local disk info of client
I0321 18:37:30.838395  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:37:30.838401  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af40 0xc00007af80]
E0321 18:37:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:37:33.409774  543705 memory.go:184] no items to output this cycle
I0321 18:37:33.409824  543705 cpu.go:275] no items to output this cycle
E0321 18:37:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:37:43.409785  543705 memory.go:191] Add success.
I0321 18:37:43.409804  543705 cpu.go:282] Add success.
I0321 18:37:43.419861  543705 net.go:648] Add success.
I0321 18:37:43.422722  543705 net.go:770] primary dev: ETH0
I0321 18:37:43.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:37:43.422746  543705 net.go:698] Add success.
I0321 18:37:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:37:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:37:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:37:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:37:53.409763  543705 memory.go:184] no items to output this cycle
I0321 18:37:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 18:38:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:38:03.409781  543705 memory.go:184] no items to output this cycle
I0321 18:38:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 18:38:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:38:13.409780  543705 memory.go:191] Add success.
I0321 18:38:13.409791  543705 cpu.go:282] Add success.
W0321 18:38:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:38:13.412629  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:38:13.412634  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:38:13.420253  543705 net.go:648] Add success.
I0321 18:38:13.422206  543705 net.go:770] primary dev: ETH0
I0321 18:38:13.422219  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:38:13.422230  543705 net.go:698] Add success.
I0321 18:38:14.455145  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:38:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:38:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 18:38:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:38:14.458027  543705 disk_worker.go:494] system disk:vda1
I0321 18:38:14.458058  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:38:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:38:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:38:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:38:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:38:16.472529  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:38:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:38:23.409799  543705 memory.go:184] no items to output this cycle
I0321 18:38:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 18:38:30.839830  543705 disk_info.go:125] begin check local disk info of client
I0321 18:38:30.842359  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:38:30.842366  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521f00 0xc000521f40]
E0321 18:38:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:38:33.409775  543705 cpu.go:275] no items to output this cycle
I0321 18:38:33.409784  543705 memory.go:184] no items to output this cycle
E0321 18:38:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:38:43.409807  543705 memory.go:191] Add success.
I0321 18:38:43.409817  543705 cpu.go:282] Add success.
I0321 18:38:43.419946  543705 net.go:648] Add success.
I0321 18:38:43.422531  543705 net.go:770] primary dev: ETH0
I0321 18:38:43.422544  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:38:43.422556  543705 net.go:698] Add success.
I0321 18:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:38:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:38:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:38:53.409762  543705 memory.go:184] no items to output this cycle
I0321 18:38:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 18:39:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:39:03.409783  543705 memory.go:184] no items to output this cycle
I0321 18:39:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 18:39:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:39:13.409795  543705 memory.go:191] Add success.
I0321 18:39:13.409814  543705 cpu.go:282] Add success.
W0321 18:39:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:39:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:39:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:39:13.420179  543705 net.go:648] Add success.
I0321 18:39:13.423246  543705 net.go:770] primary dev: ETH0
I0321 18:39:13.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:39:13.423275  543705 net.go:698] Add success.
I0321 18:39:13.468499  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c3f1311-25e3-4ec5-94cf-5e790dced138","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:39:13.468538  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 18:39:14.455309  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:39:14.455327  543705 disk_worker.go:708] disk space is not compliant
W0321 18:39:14.455332  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:39:14.455705  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:39:14.457462  543705 disk_worker.go:494] system disk:vda1
I0321 18:39:14.457512  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:39:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:39:16.457578  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:39:16.457671  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:39:16.457701  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:39:16.473085  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:39:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:39:23.409804  543705 memory.go:184] no items to output this cycle
I0321 18:39:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 18:39:30.843853  543705 disk_info.go:125] begin check local disk info of client
I0321 18:39:30.846458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:39:30.846464  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0780 0xc0003b07c0]
E0321 18:39:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:39:33.409774  543705 memory.go:184] no items to output this cycle
I0321 18:39:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 18:39:39.105732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:39:39.105739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:39:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:39:43.410622  543705 memory.go:191] Add success.
I0321 18:39:43.409786  543705 cpu.go:282] Add success.
I0321 18:39:43.420320  543705 net.go:648] Add success.
I0321 18:39:43.422980  543705 net.go:770] primary dev: ETH0
I0321 18:39:43.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:39:43.423007  543705 net.go:698] Add success.
I0321 18:39:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:39:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:39:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:39:53.410213  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:39:53.410234  543705 memory.go:184] no items to output this cycle
I0321 18:39:53.410243  543705 cpu.go:275] no items to output this cycle
E0321 18:40:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:40:03.409767  543705 memory.go:184] no items to output this cycle
I0321 18:40:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 18:40:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:40:13.409816  543705 memory.go:191] Add success.
I0321 18:40:13.409824  543705 cpu.go:282] Add success.
W0321 18:40:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:40:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:40:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:40:13.420123  543705 net.go:648] Add success.
I0321 18:40:13.423024  543705 net.go:770] primary dev: ETH0
I0321 18:40:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:40:13.423052  543705 net.go:698] Add success.
I0321 18:40:14.455075  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:40:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:40:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 18:40:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:40:14.456723  543705 disk_worker.go:494] system disk:vda1
I0321 18:40:14.456756  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:40:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:40:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:40:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:40:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:40:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:40:23.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:40:23.409903  543705 cpu.go:275] no items to output this cycle
I0321 18:40:23.409910  543705 memory.go:184] no items to output this cycle
I0321 18:40:30.847874  543705 disk_info.go:125] begin check local disk info of client
I0321 18:40:30.850459  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:40:30.850465  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053b700 0xc00053b740]
E0321 18:40:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:40:33.409771  543705 memory.go:184] no items to output this cycle
I0321 18:40:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 18:40:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:40:43.409792  543705 cpu.go:282] Add success.
I0321 18:40:43.409796  543705 memory.go:191] Add success.
I0321 18:40:43.420027  543705 net.go:648] Add success.
I0321 18:40:43.422825  543705 net.go:770] primary dev: ETH0
I0321 18:40:43.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:40:43.422854  543705 net.go:698] Add success.
I0321 18:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:40:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:40:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:40:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:40:53.409779  543705 memory.go:184] no items to output this cycle
I0321 18:40:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 18:41:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:41:03.409768  543705 memory.go:184] no items to output this cycle
I0321 18:41:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 18:41:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:41:13.409788  543705 memory.go:191] Add success.
I0321 18:41:13.409810  543705 cpu.go:282] Add success.
W0321 18:41:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:41:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:41:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:41:13.420227  543705 net.go:648] Add success.
I0321 18:41:13.423303  543705 net.go:770] primary dev: ETH0
I0321 18:41:13.423316  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:41:13.423329  543705 net.go:698] Add success.
I0321 18:41:14.453938  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:41:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:41:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 18:41:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:41:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 18:41:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:41:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:41:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:41:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:41:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:41:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:41:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:41:23.409799  543705 memory.go:184] no items to output this cycle
I0321 18:41:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 18:41:30.851896  543705 disk_info.go:125] begin check local disk info of client
I0321 18:41:30.854502  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:41:30.854509  543705 disk_info.go:196] parse disk info done, disk is : [0xc000280000 0xc000280040]
E0321 18:41:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:41:33.409795  543705 memory.go:184] no items to output this cycle
I0321 18:41:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 18:41:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:41:43.409803  543705 memory.go:191] Add success.
I0321 18:41:43.409811  543705 cpu.go:282] Add success.
I0321 18:41:43.419863  543705 net.go:648] Add success.
I0321 18:41:43.422442  543705 net.go:770] primary dev: ETH0
I0321 18:41:43.422457  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:41:43.422471  543705 net.go:698] Add success.
I0321 18:41:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:41:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:41:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:41:53.409772  543705 memory.go:184] no items to output this cycle
I0321 18:41:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 18:42:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:42:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 18:42:03.409786  543705 memory.go:184] no items to output this cycle
E0321 18:42:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:42:13.409812  543705 memory.go:191] Add success.
I0321 18:42:13.409822  543705 cpu.go:282] Add success.
W0321 18:42:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:42:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:42:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:42:13.420187  543705 net.go:648] Add success.
I0321 18:42:13.423155  543705 net.go:770] primary dev: ETH0
I0321 18:42:13.423168  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:42:13.423181  543705 net.go:698] Add success.
I0321 18:42:13.530011  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fec671bc-9406-448b-b8a6-d6d38c479aed","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:42:13.530049  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 18:42:14.454505  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:42:14.454519  543705 disk_worker.go:708] disk space is not compliant
W0321 18:42:14.454524  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:42:14.454906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:42:14.454915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:42:14.454921  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:42:14.457112  543705 disk_worker.go:494] system disk:vda1
I0321 18:42:14.457159  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:42:15.457035  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:42:15.457050  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:42:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:42:16.457998  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:42:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:42:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:42:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:42:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:42:23.409764  543705 memory.go:184] no items to output this cycle
I0321 18:42:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 18:42:30.855909  543705 disk_info.go:125] begin check local disk info of client
I0321 18:42:30.858697  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:42:30.858704  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b13c0 0xc0003b1400]
E0321 18:42:33.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:42:33.409951  543705 memory.go:184] no items to output this cycle
I0321 18:42:33.409967  543705 cpu.go:275] no items to output this cycle
I0321 18:42:39.105880  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:42:39.105886  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:42:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:42:43.410771  543705 memory.go:191] Add success.
I0321 18:42:43.409788  543705 cpu.go:282] Add success.
I0321 18:42:43.420507  543705 net.go:648] Add success.
I0321 18:42:43.423917  543705 net.go:770] primary dev: ETH0
I0321 18:42:43.423930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:42:43.423942  543705 net.go:698] Add success.
I0321 18:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:42:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:42:53.409763  543705 memory.go:184] no items to output this cycle
I0321 18:42:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 18:43:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:43:03.409770  543705 memory.go:184] no items to output this cycle
I0321 18:43:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 18:43:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:43:13.409821  543705 memory.go:191] Add success.
I0321 18:43:13.409826  543705 cpu.go:282] Add success.
W0321 18:43:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:43:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:43:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:43:13.420340  543705 net.go:648] Add success.
I0321 18:43:13.422922  543705 net.go:770] primary dev: ETH0
I0321 18:43:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:43:13.422947  543705 net.go:698] Add success.
I0321 18:43:14.455084  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:43:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:43:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 18:43:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:43:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 18:43:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:43:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:43:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:43:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:43:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:43:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:43:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:43:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 18:43:23.409792  543705 memory.go:184] no items to output this cycle
I0321 18:43:30.860944  543705 disk_info.go:125] begin check local disk info of client
I0321 18:43:30.863476  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:43:30.863482  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049ae40 0xc00049ae80]
E0321 18:43:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:43:33.409792  543705 memory.go:184] no items to output this cycle
I0321 18:43:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:43:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:43:43.409793  543705 memory.go:191] Add success.
I0321 18:43:43.409796  543705 cpu.go:282] Add success.
I0321 18:43:43.419878  543705 net.go:648] Add success.
I0321 18:43:43.422753  543705 net.go:770] primary dev: ETH0
I0321 18:43:43.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:43:43.422783  543705 net.go:698] Add success.
I0321 18:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:43:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:43:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:43:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:43:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 18:43:53.409781  543705 memory.go:184] no items to output this cycle
E0321 18:44:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:44:03.409776  543705 memory.go:184] no items to output this cycle
I0321 18:44:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 18:44:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:44:13.409795  543705 memory.go:191] Add success.
I0321 18:44:13.409812  543705 cpu.go:282] Add success.
W0321 18:44:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:44:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:44:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:44:13.420216  543705 net.go:648] Add success.
I0321 18:44:13.422876  543705 net.go:770] primary dev: ETH0
I0321 18:44:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:44:13.422906  543705 net.go:698] Add success.
I0321 18:44:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:44:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:44:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 18:44:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:44:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 18:44:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:44:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:44:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:44:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:44:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:44:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:44:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:44:23.409782  543705 memory.go:184] no items to output this cycle
I0321 18:44:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 18:44:30.864963  543705 disk_info.go:125] begin check local disk info of client
I0321 18:44:30.867523  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:44:30.867530  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381980 0xc0003819c0]
E0321 18:44:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:44:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 18:44:33.409789  543705 memory.go:184] no items to output this cycle
E0321 18:44:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:44:43.409793  543705 memory.go:191] Add success.
I0321 18:44:43.409812  543705 cpu.go:282] Add success.
I0321 18:44:43.420273  543705 net.go:648] Add success.
I0321 18:44:43.423390  543705 net.go:770] primary dev: ETH0
I0321 18:44:43.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:44:43.423430  543705 net.go:698] Add success.
I0321 18:44:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:44:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:44:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:44:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:44:53.409781  543705 memory.go:184] no items to output this cycle
I0321 18:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:45:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:45:03.409796  543705 cpu.go:275] no items to output this cycle
I0321 18:45:03.409803  543705 memory.go:184] no items to output this cycle
E0321 18:45:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:45:13.409811  543705 memory.go:191] Add success.
I0321 18:45:13.409827  543705 cpu.go:282] Add success.
W0321 18:45:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:45:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:45:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:45:13.420294  543705 net.go:648] Add success.
I0321 18:45:13.423179  543705 net.go:770] primary dev: ETH0
I0321 18:45:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:45:13.423208  543705 net.go:698] Add success.
I0321 18:45:13.462595  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a1e30838-9984-4152-97e6-c99c90913356","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:45:13.462627  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:45:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:45:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:45:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 18:45:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:45:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 18:45:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:45:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:45:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:45:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:45:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:45:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:45:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:45:23.409782  543705 memory.go:184] no items to output this cycle
I0321 18:45:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 18:45:30.868982  543705 disk_info.go:125] begin check local disk info of client
I0321 18:45:30.871610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:45:30.871617  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be5c0 0xc0002be600]
E0321 18:45:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:45:33.409779  543705 memory.go:184] no items to output this cycle
I0321 18:45:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 18:45:39.108928  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:45:39.108935  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:45:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:45:43.410815  543705 memory.go:191] Add success.
I0321 18:45:43.409802  543705 cpu.go:282] Add success.
I0321 18:45:43.420956  543705 net.go:648] Add success.
I0321 18:45:43.424278  543705 net.go:770] primary dev: ETH0
I0321 18:45:43.424292  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:45:43.424304  543705 net.go:698] Add success.
I0321 18:45:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:45:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:45:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:45:53.409776  543705 cpu.go:275] no items to output this cycle
I0321 18:45:53.409778  543705 memory.go:184] no items to output this cycle
E0321 18:46:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:46:03.409766  543705 memory.go:184] no items to output this cycle
I0321 18:46:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 18:46:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:46:13.409801  543705 memory.go:191] Add success.
I0321 18:46:13.409802  543705 cpu.go:282] Add success.
W0321 18:46:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:46:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:46:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:46:13.420132  543705 net.go:648] Add success.
I0321 18:46:13.422869  543705 net.go:770] primary dev: ETH0
I0321 18:46:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:46:13.422895  543705 net.go:698] Add success.
I0321 18:46:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:46:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:46:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 18:46:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:46:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 18:46:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:46:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:46:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:46:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:46:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:46:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:46:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:46:23.409779  543705 memory.go:184] no items to output this cycle
I0321 18:46:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 18:46:30.873001  543705 disk_info.go:125] begin check local disk info of client
I0321 18:46:30.875562  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:46:30.875569  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298840 0xc000298880]
E0321 18:46:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:46:33.409758  543705 memory.go:184] no items to output this cycle
I0321 18:46:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 18:46:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:46:43.409779  543705 memory.go:191] Add success.
I0321 18:46:43.409802  543705 cpu.go:282] Add success.
I0321 18:46:43.419852  543705 net.go:648] Add success.
I0321 18:46:43.422481  543705 net.go:770] primary dev: ETH0
I0321 18:46:43.422494  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:46:43.422506  543705 net.go:698] Add success.
I0321 18:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:46:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:46:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:46:53.409803  543705 memory.go:184] no items to output this cycle
I0321 18:46:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 18:47:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:47:03.409820  543705 memory.go:184] no items to output this cycle
I0321 18:47:03.409837  543705 cpu.go:275] no items to output this cycle
E0321 18:47:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:47:13.409797  543705 memory.go:191] Add success.
I0321 18:47:13.409806  543705 cpu.go:282] Add success.
W0321 18:47:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:47:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:47:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:47:13.420185  543705 net.go:648] Add success.
I0321 18:47:13.422836  543705 net.go:770] primary dev: ETH0
I0321 18:47:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:47:13.422865  543705 net.go:698] Add success.
I0321 18:47:13.453428  543705 event_worker.go:152] Polling the log file for events...
W0321 18:47:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:47:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 18:47:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:47:14.455894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:47:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:47:14.455918  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:47:14.456549  543705 disk_worker.go:494] system disk:vda1
I0321 18:47:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:47:15.457024  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:47:15.457038  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:47:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:47:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:47:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:47:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:47:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:47:23.410248  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:47:23.410263  543705 memory.go:184] no items to output this cycle
I0321 18:47:23.410269  543705 cpu.go:275] no items to output this cycle
I0321 18:47:30.877018  543705 disk_info.go:125] begin check local disk info of client
I0321 18:47:30.879613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:47:30.879620  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f680 0xc00046f6c0]
E0321 18:47:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:47:33.409775  543705 cpu.go:275] no items to output this cycle
I0321 18:47:33.409778  543705 memory.go:184] no items to output this cycle
E0321 18:47:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:47:43.409781  543705 memory.go:191] Add success.
I0321 18:47:43.409799  543705 cpu.go:282] Add success.
I0321 18:47:43.419853  543705 net.go:648] Add success.
I0321 18:47:43.422641  543705 net.go:770] primary dev: ETH0
I0321 18:47:43.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:47:43.422668  543705 net.go:698] Add success.
I0321 18:47:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:47:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:47:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:47:53.409778  543705 memory.go:184] no items to output this cycle
I0321 18:47:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 18:48:03.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:48:03.409888  543705 memory.go:184] no items to output this cycle
I0321 18:48:03.409959  543705 cpu.go:275] no items to output this cycle
E0321 18:48:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:48:13.409794  543705 memory.go:191] Add success.
I0321 18:48:13.409800  543705 cpu.go:282] Add success.
W0321 18:48:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:48:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:48:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:48:13.420081  543705 net.go:648] Add success.
I0321 18:48:13.422975  543705 net.go:770] primary dev: ETH0
I0321 18:48:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:48:13.423001  543705 net.go:698] Add success.
I0321 18:48:13.468953  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71431e4e-5d3e-4007-9e71-e0b36e7a4166","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:48:13.468991  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:48:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:48:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:48:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 18:48:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:48:14.456634  543705 disk_worker.go:494] system disk:vda1
I0321 18:48:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:48:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:48:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:48:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:48:16.472492  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:48:23.410266  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:48:23.410294  543705 memory.go:184] no items to output this cycle
I0321 18:48:23.410296  543705 cpu.go:275] no items to output this cycle
I0321 18:48:30.881034  543705 disk_info.go:125] begin check local disk info of client
I0321 18:48:30.883585  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:48:30.883592  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005219c0 0xc000521a00]
E0321 18:48:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:48:33.409766  543705 memory.go:184] no items to output this cycle
I0321 18:48:33.409798  543705 cpu.go:275] no items to output this cycle
I0321 18:48:39.109728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:48:39.109734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:48:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:48:43.410606  543705 memory.go:191] Add success.
I0321 18:48:43.409811  543705 cpu.go:282] Add success.
I0321 18:48:43.420274  543705 net.go:648] Add success.
I0321 18:48:43.423102  543705 net.go:770] primary dev: ETH0
I0321 18:48:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:48:43.423126  543705 net.go:698] Add success.
I0321 18:48:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:48:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:48:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:48:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:48:53.409769  543705 memory.go:184] no items to output this cycle
I0321 18:48:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 18:49:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:49:03.409800  543705 memory.go:184] no items to output this cycle
I0321 18:49:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 18:49:13.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:49:13.409969  543705 memory.go:191] Add success.
W0321 18:49:13.410005  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 18:49:13.410016  543705 cpu.go:282] Add success.
W0321 18:49:13.410028  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:49:13.410032  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:49:13.419758  543705 net.go:648] Add success.
I0321 18:49:13.422697  543705 net.go:770] primary dev: ETH0
I0321 18:49:13.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:49:13.422727  543705 net.go:698] Add success.
I0321 18:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:49:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:49:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0321 18:49:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:49:14.456614  543705 disk_worker.go:494] system disk:vda1
I0321 18:49:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:49:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:49:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:49:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:49:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:49:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:49:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:49:23.409783  543705 memory.go:184] no items to output this cycle
I0321 18:49:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 18:49:30.885069  543705 disk_info.go:125] begin check local disk info of client
I0321 18:49:30.887654  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:49:30.887668  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005200c0 0xc000520100]
E0321 18:49:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:49:33.409780  543705 memory.go:184] no items to output this cycle
I0321 18:49:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 18:49:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:49:43.409821  543705 memory.go:191] Add success.
I0321 18:49:43.409831  543705 cpu.go:282] Add success.
I0321 18:49:43.419874  543705 net.go:648] Add success.
I0321 18:49:43.422620  543705 net.go:770] primary dev: ETH0
I0321 18:49:43.422635  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:49:43.422648  543705 net.go:698] Add success.
I0321 18:49:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:49:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:49:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:49:53.409775  543705 memory.go:184] no items to output this cycle
I0321 18:49:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 18:50:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:50:03.409796  543705 memory.go:184] no items to output this cycle
I0321 18:50:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:50:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:50:13.409810  543705 memory.go:191] Add success.
I0321 18:50:13.409811  543705 cpu.go:282] Add success.
W0321 18:50:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:50:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:50:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:50:13.419759  543705 net.go:648] Add success.
I0321 18:50:13.422508  543705 net.go:770] primary dev: ETH0
I0321 18:50:13.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:50:13.422532  543705 net.go:698] Add success.
I0321 18:50:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:50:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:50:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0321 18:50:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:50:14.456480  543705 disk_worker.go:494] system disk:vda1
I0321 18:50:14.456522  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:50:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:50:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:50:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:50:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:50:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:50:23.409789  543705 memory.go:184] no items to output this cycle
I0321 18:50:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 18:50:30.889680  543705 disk_info.go:125] begin check local disk info of client
I0321 18:50:30.892229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:50:30.892236  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9740 0xc0004d9780]
E0321 18:50:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:50:33.409806  543705 memory.go:184] no items to output this cycle
I0321 18:50:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 18:50:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:50:43.409803  543705 memory.go:191] Add success.
I0321 18:50:43.409804  543705 cpu.go:282] Add success.
I0321 18:50:43.419861  543705 net.go:648] Add success.
I0321 18:50:43.422574  543705 net.go:770] primary dev: ETH0
I0321 18:50:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:50:43.422599  543705 net.go:698] Add success.
I0321 18:50:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:50:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:50:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:50:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:50:53.409769  543705 memory.go:184] no items to output this cycle
I0321 18:50:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:51:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:51:03.409815  543705 memory.go:184] no items to output this cycle
I0321 18:51:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 18:51:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:51:13.409882  543705 memory.go:191] Add success.
W0321 18:51:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:51:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:51:13.409930  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:51:13.409987  543705 cpu.go:282] Add success.
I0321 18:51:13.419731  543705 net.go:648] Add success.
I0321 18:51:13.422403  543705 net.go:770] primary dev: ETH0
I0321 18:51:13.422418  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:51:13.422431  543705 net.go:698] Add success.
I0321 18:51:13.469394  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9e2471c2-3a66-439e-845e-d8a6592d8ef0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:51:13.469425  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:51:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:51:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:51:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 18:51:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:51:14.456733  543705 disk_worker.go:494] system disk:vda1
I0321 18:51:14.456764  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:51:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:51:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:51:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:51:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:51:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:51:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:51:23.409781  543705 memory.go:184] no items to output this cycle
I0321 18:51:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 18:51:30.893680  543705 disk_info.go:125] begin check local disk info of client
I0321 18:51:30.896251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:51:30.896257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b8c0 0xc00007b900]
E0321 18:51:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:51:33.409793  543705 memory.go:184] no items to output this cycle
I0321 18:51:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 18:51:39.113252  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:51:39.113259  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:51:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:51:43.410595  543705 memory.go:191] Add success.
I0321 18:51:43.409817  543705 cpu.go:282] Add success.
I0321 18:51:43.420271  543705 net.go:648] Add success.
I0321 18:51:43.422826  543705 net.go:770] primary dev: ETH0
I0321 18:51:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:51:43.422851  543705 net.go:698] Add success.
I0321 18:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:51:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:51:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:51:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:51:53.409768  543705 memory.go:184] no items to output this cycle
I0321 18:51:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 18:52:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:52:03.409793  543705 memory.go:184] no items to output this cycle
I0321 18:52:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 18:52:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:52:13.409827  543705 memory.go:191] Add success.
I0321 18:52:13.409836  543705 cpu.go:282] Add success.
W0321 18:52:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:52:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:52:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:52:13.420160  543705 net.go:648] Add success.
I0321 18:52:13.422669  543705 net.go:770] primary dev: ETH0
I0321 18:52:13.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:52:13.422697  543705 net.go:698] Add success.
W0321 18:52:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:52:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 18:52:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:52:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:52:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:52:14.456939  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:52:14.457007  543705 disk_worker.go:494] system disk:vda1
I0321 18:52:14.457036  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:52:15.457025  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:52:15.457039  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:52:16.458014  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:52:16.458017  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:52:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:52:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:52:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:52:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:52:23.409785  543705 memory.go:184] no items to output this cycle
I0321 18:52:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 18:52:30.897676  543705 disk_info.go:125] begin check local disk info of client
I0321 18:52:30.900213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:52:30.900219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b0700 0xc0003b0740]
E0321 18:52:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:52:33.409784  543705 memory.go:184] no items to output this cycle
I0321 18:52:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 18:52:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:52:43.409787  543705 memory.go:191] Add success.
I0321 18:52:43.409810  543705 cpu.go:282] Add success.
I0321 18:52:43.419910  543705 net.go:648] Add success.
I0321 18:52:43.422547  543705 net.go:770] primary dev: ETH0
I0321 18:52:43.422563  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:52:43.422578  543705 net.go:698] Add success.
I0321 18:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:52:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:52:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:52:53.410403  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:52:53.410420  543705 memory.go:184] no items to output this cycle
I0321 18:52:53.410435  543705 cpu.go:275] no items to output this cycle
E0321 18:53:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:53:03.409771  543705 memory.go:184] no items to output this cycle
I0321 18:53:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 18:53:13.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:53:13.409929  543705 memory.go:191] Add success.
I0321 18:53:13.409904  543705 cpu.go:282] Add success.
W0321 18:53:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:53:13.409983  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:53:13.409998  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:53:13.419738  543705 net.go:648] Add success.
I0321 18:53:13.422758  543705 net.go:770] primary dev: ETH0
I0321 18:53:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:53:13.422783  543705 net.go:698] Add success.
I0321 18:53:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:53:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:53:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 18:53:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:53:14.456507  543705 disk_worker.go:494] system disk:vda1
I0321 18:53:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:53:15.455988  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:53:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:53:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:53:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:53:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:53:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:53:23.409782  543705 memory.go:184] no items to output this cycle
I0321 18:53:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 18:53:30.901675  543705 disk_info.go:125] begin check local disk info of client
I0321 18:53:30.904250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:53:30.904257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0321 18:53:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:53:33.409789  543705 memory.go:184] no items to output this cycle
I0321 18:53:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 18:53:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:53:43.409786  543705 memory.go:191] Add success.
I0321 18:53:43.409786  543705 cpu.go:282] Add success.
I0321 18:53:43.419877  543705 net.go:648] Add success.
I0321 18:53:43.423464  543705 net.go:770] primary dev: ETH0
I0321 18:53:43.423478  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:53:43.423490  543705 net.go:698] Add success.
I0321 18:53:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:53:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:53:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:53:53.409791  543705 memory.go:184] no items to output this cycle
I0321 18:53:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 18:54:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:54:03.409774  543705 memory.go:184] no items to output this cycle
I0321 18:54:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 18:54:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:54:13.409807  543705 memory.go:191] Add success.
W0321 18:54:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:54:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:54:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:54:13.409864  543705 cpu.go:282] Add success.
I0321 18:54:13.420404  543705 net.go:648] Add success.
I0321 18:54:13.423356  543705 net.go:770] primary dev: ETH0
I0321 18:54:13.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:54:13.423388  543705 net.go:698] Add success.
I0321 18:54:13.467898  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"10f08325-c88f-4704-9377-33a466a93bc1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:54:13.467933  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 18:54:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:54:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:54:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 18:54:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:54:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 18:54:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:54:15.455045  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:54:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:54:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:54:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:54:16.472560  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:54:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:54:23.409782  543705 memory.go:184] no items to output this cycle
I0321 18:54:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 18:54:30.905671  543705 disk_info.go:125] begin check local disk info of client
I0321 18:54:30.908265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:54:30.908271  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5540 0xc0000c5580]
E0321 18:54:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:54:33.409773  543705 memory.go:184] no items to output this cycle
I0321 18:54:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 18:54:39.113732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:54:39.113738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:54:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:54:43.410671  543705 memory.go:191] Add success.
I0321 18:54:43.409789  543705 cpu.go:282] Add success.
I0321 18:54:43.420193  543705 net.go:770] primary dev: ETH0
I0321 18:54:43.420207  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:54:43.420219  543705 net.go:698] Add success.
I0321 18:54:43.420568  543705 net.go:648] Add success.
I0321 18:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:54:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:54:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:54:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:54:53.409789  543705 memory.go:184] no items to output this cycle
I0321 18:54:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 18:55:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:55:03.409768  543705 memory.go:184] no items to output this cycle
I0321 18:55:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 18:55:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:55:13.409802  543705 memory.go:191] Add success.
I0321 18:55:13.409824  543705 cpu.go:282] Add success.
W0321 18:55:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:55:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:55:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:55:13.420207  543705 net.go:648] Add success.
I0321 18:55:13.423185  543705 net.go:770] primary dev: ETH0
I0321 18:55:13.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:55:13.423211  543705 net.go:698] Add success.
I0321 18:55:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:55:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:55:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 18:55:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:55:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 18:55:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:55:15.456008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:55:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:55:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:55:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:55:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:55:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:55:23.409770  543705 memory.go:184] no items to output this cycle
I0321 18:55:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 18:55:30.909673  543705 disk_info.go:125] begin check local disk info of client
I0321 18:55:30.912253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:55:30.912260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0321 18:55:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:55:33.409777  543705 cpu.go:275] no items to output this cycle
I0321 18:55:33.409781  543705 memory.go:184] no items to output this cycle
E0321 18:55:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:55:43.409796  543705 memory.go:191] Add success.
I0321 18:55:43.409799  543705 cpu.go:282] Add success.
I0321 18:55:43.419871  543705 net.go:648] Add success.
I0321 18:55:43.422560  543705 net.go:770] primary dev: ETH0
I0321 18:55:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:55:43.422591  543705 net.go:698] Add success.
I0321 18:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:55:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:55:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:55:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:55:53.409804  543705 memory.go:184] no items to output this cycle
I0321 18:55:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 18:56:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:56:03.409879  543705 cpu.go:275] no items to output this cycle
I0321 18:56:03.409890  543705 memory.go:184] no items to output this cycle
E0321 18:56:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:56:13.409800  543705 memory.go:191] Add success.
I0321 18:56:13.409817  543705 cpu.go:282] Add success.
W0321 18:56:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:56:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:56:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:56:13.420171  543705 net.go:648] Add success.
I0321 18:56:13.423207  543705 net.go:770] primary dev: ETH0
I0321 18:56:13.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:56:13.423232  543705 net.go:698] Add success.
I0321 18:56:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:56:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:56:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 18:56:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:56:14.456527  543705 disk_worker.go:494] system disk:vda1
I0321 18:56:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:56:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:56:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:56:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:56:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:56:16.472498  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:56:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:56:23.409794  543705 memory.go:184] no items to output this cycle
I0321 18:56:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 18:56:30.913674  543705 disk_info.go:125] begin check local disk info of client
I0321 18:56:30.916250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:56:30.916256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0cc0 0xc0003c0d00]
E0321 18:56:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:56:33.409785  543705 memory.go:184] no items to output this cycle
I0321 18:56:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 18:56:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:56:43.409785  543705 memory.go:191] Add success.
I0321 18:56:43.409790  543705 cpu.go:282] Add success.
I0321 18:56:43.419898  543705 net.go:648] Add success.
I0321 18:56:43.422873  543705 net.go:770] primary dev: ETH0
I0321 18:56:43.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:56:43.422902  543705 net.go:698] Add success.
I0321 18:56:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:56:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:56:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:56:53.409800  543705 cpu.go:275] no items to output this cycle
I0321 18:56:53.409803  543705 memory.go:184] no items to output this cycle
E0321 18:57:03.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:57:03.409896  543705 memory.go:184] no items to output this cycle
I0321 18:57:03.409901  543705 cpu.go:275] no items to output this cycle
E0321 18:57:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:57:13.409812  543705 cpu.go:282] Add success.
I0321 18:57:13.409813  543705 memory.go:191] Add success.
W0321 18:57:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:57:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:57:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:57:13.420054  543705 net.go:648] Add success.
I0321 18:57:13.428822  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 18:57:13.428901  543705 net.go:770] primary dev: ETH0
I0321 18:57:13.428915  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:57:13.428928  543705 net.go:698] Add success.
I0321 18:57:13.453466  543705 event_worker.go:152] Polling the log file for events...
I0321 18:57:13.468983  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe292cb2-16de-49fd-a5eb-b5ad45580576","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 18:57:13.469017  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 18:57:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:57:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 18:57:14.455214  543705 disk_worker.go:728] disk inode is not compliant
E0321 18:57:14.455930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 18:57:14.455938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 18:57:14.455943  543705 custom_config.go:64] query custom config with name: gpu
I0321 18:57:14.456611  543705 disk_worker.go:494] system disk:vda1
I0321 18:57:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 18:57:15.456982  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 18:57:15.456997  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:57:16.458109  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 18:57:16.458143  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 18:57:16.458189  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:57:16.458210  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:57:16.472602  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:57:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:57:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 18:57:23.409786  543705 memory.go:184] no items to output this cycle
I0321 18:57:30.917676  543705 disk_info.go:125] begin check local disk info of client
I0321 18:57:30.920203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:57:30.920209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9480 0xc0004d94c0]
E0321 18:57:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:57:33.409789  543705 memory.go:184] no items to output this cycle
I0321 18:57:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 18:57:39.117289  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 18:57:39.117295  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 18:57:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:57:43.410665  543705 memory.go:191] Add success.
I0321 18:57:43.409812  543705 cpu.go:282] Add success.
I0321 18:57:43.420389  543705 net.go:648] Add success.
I0321 18:57:43.423084  543705 net.go:770] primary dev: ETH0
I0321 18:57:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:57:43.423110  543705 net.go:698] Add success.
I0321 18:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:57:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:57:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:57:53.409874  543705 memory.go:184] no items to output this cycle
I0321 18:57:53.409914  543705 cpu.go:275] no items to output this cycle
E0321 18:58:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:58:03.409768  543705 memory.go:184] no items to output this cycle
I0321 18:58:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 18:58:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:58:13.409823  543705 memory.go:191] Add success.
I0321 18:58:13.409824  543705 cpu.go:282] Add success.
W0321 18:58:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 18:58:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:58:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:58:13.420192  543705 net.go:648] Add success.
I0321 18:58:13.422837  543705 net.go:770] primary dev: ETH0
I0321 18:58:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:58:13.422863  543705 net.go:698] Add success.
I0321 18:58:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:58:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:58:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 18:58:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:58:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 18:58:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:58:15.456004  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:58:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:58:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:58:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:58:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:58:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:58:23.409771  543705 memory.go:184] no items to output this cycle
I0321 18:58:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 18:58:30.921675  543705 disk_info.go:125] begin check local disk info of client
I0321 18:58:30.924211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:58:30.924218  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049b5c0 0xc00049b600]
E0321 18:58:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:58:33.409760  543705 memory.go:184] no items to output this cycle
I0321 18:58:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 18:58:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:58:43.409777  543705 memory.go:191] Add success.
I0321 18:58:43.409800  543705 cpu.go:282] Add success.
I0321 18:58:43.419848  543705 net.go:648] Add success.
I0321 18:58:43.422636  543705 net.go:770] primary dev: ETH0
I0321 18:58:43.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:58:43.422659  543705 net.go:698] Add success.
I0321 18:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:58:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:58:53.409781  543705 memory.go:184] no items to output this cycle
I0321 18:58:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 18:59:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:59:03.409767  543705 memory.go:184] no items to output this cycle
I0321 18:59:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 18:59:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:59:13.409797  543705 memory.go:191] Add success.
W0321 18:59:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 18:59:13.409823  543705 cpu.go:282] Add success.
W0321 18:59:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 18:59:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 18:59:13.420187  543705 net.go:648] Add success.
I0321 18:59:13.423308  543705 net.go:770] primary dev: ETH0
I0321 18:59:13.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:59:13.423334  543705 net.go:698] Add success.
I0321 18:59:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 18:59:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 18:59:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 18:59:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 18:59:14.456613  543705 disk_worker.go:494] system disk:vda1
I0321 18:59:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 18:59:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 18:59:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:59:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:59:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0321 18:59:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0321 18:59:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:59:23.409770  543705 memory.go:184] no items to output this cycle
I0321 18:59:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 18:59:30.925674  543705 disk_info.go:125] begin check local disk info of client
I0321 18:59:30.928217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 18:59:30.928223  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048cc80 0xc00048ccc0]
E0321 18:59:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:59:33.409758  543705 memory.go:184] no items to output this cycle
I0321 18:59:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 18:59:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:59:43.409778  543705 memory.go:191] Add success.
I0321 18:59:43.409801  543705 cpu.go:282] Add success.
I0321 18:59:43.419887  543705 net.go:648] Add success.
I0321 18:59:43.423025  543705 net.go:770] primary dev: ETH0
I0321 18:59:43.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0321 18:59:43.423051  543705 net.go:698] Add success.
I0321 18:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 18:59:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 18:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 18:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 18:59:53.409781  543705 memory.go:184] no items to output this cycle
I0321 18:59:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 19:00:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:00:03.409864  543705 memory.go:184] no items to output this cycle
I0321 19:00:03.409927  543705 cpu.go:275] no items to output this cycle
E0321 19:00:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:00:13.409803  543705 cpu.go:282] Add success.
I0321 19:00:13.409808  543705 memory.go:191] Add success.
W0321 19:00:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:00:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:00:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:00:13.420138  543705 net.go:648] Add success.
I0321 19:00:13.422659  543705 net.go:770] primary dev: ETH0
I0321 19:00:13.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:00:13.422684  543705 net.go:698] Add success.
I0321 19:00:13.469039  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84adbd5c-73c0-44ec-a44c-72d026d1691f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:00:13.469071  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:00:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:00:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:00:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 19:00:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:00:14.456600  543705 disk_worker.go:494] system disk:vda1
I0321 19:00:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:00:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:00:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:00:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:00:16.472514  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:00:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:00:23.409779  543705 memory.go:184] no items to output this cycle
I0321 19:00:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 19:00:30.929676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:00:30.932200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:00:30.932206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3b00 0xc0002a3b40]
E0321 19:00:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:00:33.409793  543705 memory.go:184] no items to output this cycle
I0321 19:00:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 19:00:39.117739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:00:39.117745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:00:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:00:43.410860  543705 memory.go:191] Add success.
I0321 19:00:43.409808  543705 cpu.go:282] Add success.
I0321 19:00:43.420546  543705 net.go:648] Add success.
I0321 19:00:43.423395  543705 net.go:770] primary dev: ETH0
I0321 19:00:43.423409  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:00:43.423422  543705 net.go:698] Add success.
I0321 19:00:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:00:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:00:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:00:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:00:53.409808  543705 memory.go:184] no items to output this cycle
I0321 19:00:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 19:01:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:01:03.409780  543705 memory.go:184] no items to output this cycle
I0321 19:01:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 19:01:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:01:13.409842  543705 memory.go:191] Add success.
I0321 19:01:13.409852  543705 cpu.go:282] Add success.
W0321 19:01:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:01:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:01:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:01:13.420311  543705 net.go:648] Add success.
I0321 19:01:13.423230  543705 net.go:770] primary dev: ETH0
I0321 19:01:13.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:01:13.423255  543705 net.go:698] Add success.
I0321 19:01:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:01:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:01:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 19:01:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:01:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 19:01:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:01:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:01:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:01:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:01:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:01:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:01:23.409802  543705 memory.go:184] no items to output this cycle
I0321 19:01:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 19:01:30.933677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:01:30.936212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:01:30.936218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dd400 0xc0003dd440]
E0321 19:01:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:01:33.409785  543705 memory.go:184] no items to output this cycle
I0321 19:01:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 19:01:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:01:43.409805  543705 memory.go:191] Add success.
I0321 19:01:43.409813  543705 cpu.go:282] Add success.
I0321 19:01:43.419841  543705 net.go:648] Add success.
I0321 19:01:43.422783  543705 net.go:770] primary dev: ETH0
I0321 19:01:43.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:01:43.422817  543705 net.go:698] Add success.
I0321 19:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:01:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:01:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:01:53.409781  543705 memory.go:184] no items to output this cycle
I0321 19:01:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 19:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:02:03.409774  543705 memory.go:184] no items to output this cycle
I0321 19:02:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 19:02:13.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:02:13.409894  543705 memory.go:191] Add success.
W0321 19:02:13.409926  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:02:13.409957  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:02:13.409961  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:02:13.410053  543705 cpu.go:282] Add success.
I0321 19:02:13.419718  543705 net.go:648] Add success.
I0321 19:02:13.422493  543705 net.go:770] primary dev: ETH0
I0321 19:02:13.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:02:13.422518  543705 net.go:698] Add success.
W0321 19:02:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:02:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 19:02:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:02:14.455837  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:02:14.455844  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:02:14.455849  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:02:14.456798  543705 disk_worker.go:494] system disk:vda1
I0321 19:02:14.456828  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:02:15.456856  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:02:15.456865  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:02:16.458106  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:02:16.458171  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:02:16.458178  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:02:16.458196  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:02:16.472659  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:02:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:02:23.409771  543705 memory.go:184] no items to output this cycle
I0321 19:02:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 19:02:30.937679  543705 disk_info.go:125] begin check local disk info of client
I0321 19:02:30.940252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:02:30.940259  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abdc0 0xc0001abe00]
E0321 19:02:33.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:02:33.409755  543705 memory.go:184] no items to output this cycle
I0321 19:02:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 19:02:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:02:43.409809  543705 memory.go:191] Add success.
I0321 19:02:43.409820  543705 cpu.go:282] Add success.
I0321 19:02:43.419894  543705 net.go:648] Add success.
I0321 19:02:43.422752  543705 net.go:770] primary dev: ETH0
I0321 19:02:43.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:02:43.422780  543705 net.go:698] Add success.
I0321 19:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:02:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:02:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:02:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:02:53.409787  543705 memory.go:184] no items to output this cycle
I0321 19:02:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 19:03:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:03:03.409786  543705 memory.go:184] no items to output this cycle
I0321 19:03:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 19:03:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:03:13.409808  543705 memory.go:191] Add success.
I0321 19:03:13.409812  543705 cpu.go:282] Add success.
W0321 19:03:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:03:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:03:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:03:13.420403  543705 net.go:648] Add success.
I0321 19:03:13.423334  543705 net.go:770] primary dev: ETH0
I0321 19:03:13.423346  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:03:13.423358  543705 net.go:698] Add success.
I0321 19:03:13.467676  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"059f7844-fa52-4a23-9bc2-c59ab59b3bb9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:03:13.467716  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:03:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:03:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:03:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 19:03:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:03:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 19:03:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:03:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:03:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:03:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:03:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:03:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:03:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:03:23.409786  543705 memory.go:184] no items to output this cycle
I0321 19:03:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 19:03:30.941675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:03:30.944238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:03:30.944245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483f40 0xc0003b0000]
E0321 19:03:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:03:33.409783  543705 memory.go:184] no items to output this cycle
I0321 19:03:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 19:03:39.121283  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:03:39.121289  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:03:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:03:43.410591  543705 memory.go:191] Add success.
I0321 19:03:43.409797  543705 cpu.go:282] Add success.
I0321 19:03:43.420300  543705 net.go:648] Add success.
I0321 19:03:43.422863  543705 net.go:770] primary dev: ETH0
I0321 19:03:43.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:03:43.422890  543705 net.go:698] Add success.
I0321 19:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:03:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:03:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:03:53.409776  543705 memory.go:184] no items to output this cycle
I0321 19:03:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 19:04:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:04:03.409787  543705 memory.go:184] no items to output this cycle
I0321 19:04:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 19:04:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:04:13.409825  543705 memory.go:191] Add success.
I0321 19:04:13.409833  543705 cpu.go:282] Add success.
W0321 19:04:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:04:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:04:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:04:13.420538  543705 net.go:648] Add success.
I0321 19:04:13.423496  543705 net.go:770] primary dev: ETH0
I0321 19:04:13.423509  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:04:13.423521  543705 net.go:698] Add success.
I0321 19:04:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:04:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:04:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 19:04:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:04:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 19:04:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:04:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:04:16.458011  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:04:16.458085  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:04:16.458117  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:04:16.472573  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:04:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:04:23.409806  543705 memory.go:184] no items to output this cycle
I0321 19:04:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 19:04:30.945677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:04:30.948253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:04:30.948260  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483180 0xc0004831c0]
E0321 19:04:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:04:33.409777  543705 memory.go:184] no items to output this cycle
I0321 19:04:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 19:04:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:04:43.409829  543705 memory.go:191] Add success.
I0321 19:04:43.409836  543705 cpu.go:282] Add success.
I0321 19:04:43.419898  543705 net.go:648] Add success.
I0321 19:04:43.422701  543705 net.go:770] primary dev: ETH0
I0321 19:04:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:04:43.422728  543705 net.go:698] Add success.
I0321 19:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:04:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:04:53.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:04:53.410383  543705 memory.go:184] no items to output this cycle
I0321 19:04:53.410402  543705 cpu.go:275] no items to output this cycle
E0321 19:05:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:05:03.409775  543705 memory.go:184] no items to output this cycle
I0321 19:05:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 19:05:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:05:13.409828  543705 memory.go:191] Add success.
I0321 19:05:13.409831  543705 cpu.go:282] Add success.
W0321 19:05:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:05:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:05:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:05:13.420131  543705 net.go:648] Add success.
I0321 19:05:13.422890  543705 net.go:770] primary dev: ETH0
I0321 19:05:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:05:13.422915  543705 net.go:698] Add success.
I0321 19:05:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:05:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:05:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 19:05:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:05:14.456484  543705 disk_worker.go:494] system disk:vda1
I0321 19:05:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:05:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:05:16.458020  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:05:16.458096  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:05:16.458127  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:05:16.472515  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:05:23.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:05:23.409818  543705 memory.go:184] no items to output this cycle
I0321 19:05:23.409830  543705 cpu.go:275] no items to output this cycle
I0321 19:05:30.949677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:05:30.952263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:05:30.952270  543705 disk_info.go:196] parse disk info done, disk is : [0xc000562080 0xc0005620c0]
E0321 19:05:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:05:33.409770  543705 memory.go:184] no items to output this cycle
I0321 19:05:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:05:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:05:43.409816  543705 memory.go:191] Add success.
I0321 19:05:43.409819  543705 cpu.go:282] Add success.
I0321 19:05:43.419980  543705 net.go:648] Add success.
I0321 19:05:43.422647  543705 net.go:770] primary dev: ETH0
I0321 19:05:43.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:05:43.422674  543705 net.go:698] Add success.
I0321 19:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:05:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:05:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 19:05:53.409786  543705 memory.go:184] no items to output this cycle
E0321 19:06:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:06:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 19:06:03.409792  543705 memory.go:184] no items to output this cycle
E0321 19:06:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:06:13.409789  543705 memory.go:191] Add success.
I0321 19:06:13.409809  543705 cpu.go:282] Add success.
W0321 19:06:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:06:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:06:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:06:13.420232  543705 net.go:648] Add success.
I0321 19:06:13.422738  543705 net.go:770] primary dev: ETH0
I0321 19:06:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:06:13.422764  543705 net.go:698] Add success.
I0321 19:06:13.463712  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75bd0ec6-a234-45a5-874d-76e845a57a40","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:06:13.463746  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:06:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:06:14.455355  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:06:14.455439  543705 disk_worker.go:708] disk space is not compliant
W0321 19:06:14.455444  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:06:14.457123  543705 disk_worker.go:494] system disk:vda1
I0321 19:06:14.457160  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:06:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:06:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:06:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:06:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:06:16.472555  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:06:23.409781  543705 memory.go:184] no items to output this cycle
I0321 19:06:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 19:06:30.953674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:06:30.956239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:06:30.956245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1bc0 0xc0003b1c00]
E0321 19:06:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:06:33.409790  543705 memory.go:184] no items to output this cycle
I0321 19:06:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 19:06:39.121731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:06:39.121738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:06:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:06:43.410630  543705 memory.go:191] Add success.
I0321 19:06:43.409806  543705 cpu.go:282] Add success.
I0321 19:06:43.420390  543705 net.go:648] Add success.
I0321 19:06:43.423020  543705 net.go:770] primary dev: ETH0
I0321 19:06:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:06:43.423049  543705 net.go:698] Add success.
I0321 19:06:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:06:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:06:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:06:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:06:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 19:06:53.409786  543705 memory.go:184] no items to output this cycle
E0321 19:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:07:03.409771  543705 memory.go:184] no items to output this cycle
I0321 19:07:03.409778  543705 cpu.go:275] no items to output this cycle
E0321 19:07:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:07:13.409805  543705 memory.go:191] Add success.
I0321 19:07:13.409805  543705 cpu.go:282] Add success.
W0321 19:07:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:07:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:07:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:07:13.420061  543705 net.go:648] Add success.
I0321 19:07:13.422880  543705 net.go:770] primary dev: ETH0
I0321 19:07:13.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:07:13.422911  543705 net.go:698] Add success.
I0321 19:07:13.453507  543705 event_worker.go:152] Polling the log file for events...
W0321 19:07:14.455586  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:07:14.455600  543705 disk_worker.go:708] disk space is not compliant
W0321 19:07:14.455605  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:07:14.457805  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:07:14.457827  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:07:14.457908  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:07:14.457924  543705 disk_worker.go:494] system disk:vda1
I0321 19:07:14.457958  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:07:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:07:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:07:16.458109  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:07:16.458126  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:07:16.458169  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:07:16.458192  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:07:16.472593  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:07:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:07:23.409781  543705 memory.go:184] no items to output this cycle
I0321 19:07:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 19:07:30.957678  543705 disk_info.go:125] begin check local disk info of client
I0321 19:07:30.960193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:07:30.960200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
E0321 19:07:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:07:33.409794  543705 memory.go:184] no items to output this cycle
I0321 19:07:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 19:07:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:07:43.409775  543705 memory.go:191] Add success.
I0321 19:07:43.409806  543705 cpu.go:282] Add success.
I0321 19:07:43.419830  543705 net.go:648] Add success.
I0321 19:07:43.422577  543705 net.go:770] primary dev: ETH0
I0321 19:07:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:07:43.422603  543705 net.go:698] Add success.
I0321 19:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:07:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:07:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:07:53.409764  543705 memory.go:184] no items to output this cycle
I0321 19:07:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 19:08:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:08:03.409781  543705 cpu.go:275] no items to output this cycle
I0321 19:08:03.409783  543705 memory.go:184] no items to output this cycle
E0321 19:08:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:08:13.409824  543705 memory.go:191] Add success.
I0321 19:08:13.409832  543705 cpu.go:282] Add success.
W0321 19:08:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:08:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:08:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:08:13.420178  543705 net.go:648] Add success.
I0321 19:08:13.423194  543705 net.go:770] primary dev: ETH0
I0321 19:08:13.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:08:13.423224  543705 net.go:698] Add success.
I0321 19:08:14.453956  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:08:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:08:14.455256  543705 disk_worker.go:708] disk space is not compliant
W0321 19:08:14.455259  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:08:14.456635  543705 disk_worker.go:494] system disk:vda1
I0321 19:08:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:08:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:08:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:08:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:08:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:08:16.472514  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:08:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:08:23.409766  543705 memory.go:184] no items to output this cycle
I0321 19:08:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 19:08:30.961676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:08:30.964207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:08:30.964213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1f00 0xc0003b1f40]
E0321 19:08:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:08:33.409788  543705 memory.go:184] no items to output this cycle
I0321 19:08:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:08:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:08:43.409785  543705 memory.go:191] Add success.
I0321 19:08:43.409808  543705 cpu.go:282] Add success.
I0321 19:08:43.419891  543705 net.go:648] Add success.
I0321 19:08:43.422580  543705 net.go:770] primary dev: ETH0
I0321 19:08:43.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:08:43.422608  543705 net.go:698] Add success.
I0321 19:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:08:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:08:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:08:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:08:53.409763  543705 memory.go:184] no items to output this cycle
I0321 19:08:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 19:09:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:09:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 19:09:03.409786  543705 memory.go:184] no items to output this cycle
E0321 19:09:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:09:13.409795  543705 memory.go:191] Add success.
I0321 19:09:13.409814  543705 cpu.go:282] Add success.
W0321 19:09:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:09:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:09:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:09:13.420162  543705 net.go:648] Add success.
I0321 19:09:13.423256  543705 net.go:770] primary dev: ETH0
I0321 19:09:13.423269  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:09:13.423282  543705 net.go:698] Add success.
I0321 19:09:13.468091  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0157ab8c-7148-4aa0-8339-67e9118acb9f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:09:13.468123  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:09:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:09:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:09:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 19:09:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:09:14.456819  543705 disk_worker.go:494] system disk:vda1
I0321 19:09:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:09:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:09:16.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:09:16.458085  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:09:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:09:16.472514  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:09:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:09:23.409800  543705 memory.go:184] no items to output this cycle
I0321 19:09:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 19:09:30.965674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:09:30.968213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:09:30.968219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0321 19:09:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:09:33.409774  543705 cpu.go:275] no items to output this cycle
I0321 19:09:33.409781  543705 memory.go:184] no items to output this cycle
I0321 19:09:39.125350  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:09:39.125357  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:09:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:09:43.410881  543705 memory.go:191] Add success.
I0321 19:09:43.409803  543705 cpu.go:282] Add success.
I0321 19:09:43.420577  543705 net.go:648] Add success.
I0321 19:09:43.423763  543705 net.go:770] primary dev: ETH0
I0321 19:09:43.423776  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:09:43.423790  543705 net.go:698] Add success.
I0321 19:09:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:09:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:09:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:09:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:09:53.409774  543705 cpu.go:275] no items to output this cycle
I0321 19:09:53.409776  543705 memory.go:184] no items to output this cycle
E0321 19:10:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:10:03.409762  543705 memory.go:184] no items to output this cycle
I0321 19:10:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 19:10:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:10:13.409811  543705 memory.go:191] Add success.
I0321 19:10:13.409818  543705 cpu.go:282] Add success.
W0321 19:10:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:10:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:10:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:10:13.420235  543705 net.go:648] Add success.
I0321 19:10:13.423200  543705 net.go:770] primary dev: ETH0
I0321 19:10:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:10:13.423229  543705 net.go:698] Add success.
I0321 19:10:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:10:14.455327  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:10:14.455438  543705 disk_worker.go:708] disk space is not compliant
W0321 19:10:14.455442  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:10:14.457482  543705 disk_worker.go:494] system disk:vda1
I0321 19:10:14.457512  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:10:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:10:16.458028  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:10:16.458109  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:10:16.458145  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:10:16.472601  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:10:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:10:23.409774  543705 memory.go:184] no items to output this cycle
I0321 19:10:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 19:10:30.969677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:10:30.972170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:10:30.972177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b15c0 0xc0003b1600]
E0321 19:10:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:10:33.409791  543705 memory.go:184] no items to output this cycle
I0321 19:10:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 19:10:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:10:43.409782  543705 memory.go:191] Add success.
I0321 19:10:43.409804  543705 cpu.go:282] Add success.
I0321 19:10:43.419994  543705 net.go:648] Add success.
I0321 19:10:43.423590  543705 net.go:770] primary dev: ETH0
I0321 19:10:43.423602  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:10:43.423615  543705 net.go:698] Add success.
I0321 19:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:10:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:10:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:10:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:10:53.409767  543705 memory.go:184] no items to output this cycle
I0321 19:10:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 19:11:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:11:03.409788  543705 cpu.go:275] no items to output this cycle
I0321 19:11:03.409796  543705 memory.go:184] no items to output this cycle
E0321 19:11:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:11:13.409804  543705 memory.go:191] Add success.
I0321 19:11:13.409806  543705 cpu.go:282] Add success.
W0321 19:11:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:11:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:11:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:11:13.420198  543705 net.go:648] Add success.
I0321 19:11:13.423037  543705 net.go:770] primary dev: ETH0
I0321 19:11:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:11:13.423077  543705 net.go:698] Add success.
I0321 19:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:11:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:11:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 19:11:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:11:14.456541  543705 disk_worker.go:494] system disk:vda1
I0321 19:11:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:11:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:11:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:11:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:11:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:11:16.472547  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:11:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:11:23.409793  543705 memory.go:184] no items to output this cycle
I0321 19:11:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 19:11:30.973674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:11:30.976308  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:11:30.976316  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9d40 0xc0004d9d80]
E0321 19:11:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:11:33.409773  543705 memory.go:184] no items to output this cycle
I0321 19:11:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 19:11:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:11:43.409788  543705 memory.go:191] Add success.
I0321 19:11:43.409788  543705 cpu.go:282] Add success.
I0321 19:11:43.419875  543705 net.go:648] Add success.
I0321 19:11:43.422994  543705 net.go:770] primary dev: ETH0
I0321 19:11:43.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:11:43.423020  543705 net.go:698] Add success.
I0321 19:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:11:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:11:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:11:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:11:53.410248  543705 memory.go:184] no items to output this cycle
I0321 19:11:53.410262  543705 cpu.go:275] no items to output this cycle
E0321 19:12:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:12:03.409776  543705 memory.go:184] no items to output this cycle
I0321 19:12:03.409776  543705 cpu.go:275] no items to output this cycle
E0321 19:12:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:12:13.409816  543705 memory.go:191] Add success.
I0321 19:12:13.409821  543705 cpu.go:282] Add success.
W0321 19:12:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:12:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:12:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:12:13.420163  543705 net.go:648] Add success.
I0321 19:12:13.423071  543705 net.go:770] primary dev: ETH0
I0321 19:12:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:12:13.423099  543705 net.go:698] Add success.
I0321 19:12:13.687260  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eb70a894-cd0c-444b-9bb6-77570b55405d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:12:13.687306  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 19:12:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:12:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 19:12:14.455207  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:12:14.455989  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:12:14.455998  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:12:14.456004  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:12:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 19:12:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:12:15.456794  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:12:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:12:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:12:16.458000  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:12:16.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:12:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:12:16.472550  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:12:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:12:23.409774  543705 memory.go:184] no items to output this cycle
I0321 19:12:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 19:12:30.977674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:12:30.980235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:12:30.980242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9a80 0xc0004d9ac0]
E0321 19:12:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:12:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 19:12:33.409786  543705 memory.go:184] no items to output this cycle
I0321 19:12:39.125735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:12:39.125742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:12:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:12:43.410774  543705 memory.go:191] Add success.
I0321 19:12:43.409824  543705 cpu.go:282] Add success.
I0321 19:12:43.420473  543705 net.go:648] Add success.
I0321 19:12:43.423494  543705 net.go:770] primary dev: ETH0
I0321 19:12:43.423508  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:12:43.423520  543705 net.go:698] Add success.
I0321 19:12:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:12:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:12:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:12:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:12:53.409796  543705 memory.go:184] no items to output this cycle
I0321 19:12:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 19:13:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:13:03.409789  543705 memory.go:184] no items to output this cycle
I0321 19:13:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 19:13:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:13:13.409803  543705 memory.go:191] Add success.
I0321 19:13:13.409805  543705 cpu.go:282] Add success.
W0321 19:13:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:13:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:13:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:13:13.420156  543705 net.go:648] Add success.
I0321 19:13:13.423014  543705 net.go:770] primary dev: ETH0
I0321 19:13:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:13:13.423048  543705 net.go:698] Add success.
I0321 19:13:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:13:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:13:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 19:13:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:13:14.456615  543705 disk_worker.go:494] system disk:vda1
I0321 19:13:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:13:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:13:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:13:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:13:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:13:16.472539  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:13:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:13:23.409808  543705 memory.go:184] no items to output this cycle
I0321 19:13:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 19:13:30.981674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:13:30.984262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:13:30.984269  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9e00 0xc0004d9e40]
E0321 19:13:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:13:33.409767  543705 memory.go:184] no items to output this cycle
I0321 19:13:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 19:13:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:13:43.409812  543705 memory.go:191] Add success.
I0321 19:13:43.409820  543705 cpu.go:282] Add success.
I0321 19:13:43.419856  543705 net.go:648] Add success.
I0321 19:13:43.422981  543705 net.go:770] primary dev: ETH0
I0321 19:13:43.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:13:43.423006  543705 net.go:698] Add success.
I0321 19:13:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:13:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:13:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:13:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:13:53.409772  543705 memory.go:184] no items to output this cycle
I0321 19:13:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 19:14:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:14:03.409769  543705 memory.go:184] no items to output this cycle
I0321 19:14:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 19:14:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:14:13.409805  543705 memory.go:191] Add success.
I0321 19:14:13.409817  543705 cpu.go:282] Add success.
W0321 19:14:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:14:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:14:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:14:13.420151  543705 net.go:648] Add success.
I0321 19:14:13.422958  543705 net.go:770] primary dev: ETH0
I0321 19:14:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:14:13.422983  543705 net.go:698] Add success.
I0321 19:14:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:14:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:14:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 19:14:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:14:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 19:14:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:14:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:14:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:14:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:14:16.472533  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:14:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:14:23.409808  543705 memory.go:184] no items to output this cycle
I0321 19:14:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 19:14:30.985669  543705 disk_info.go:125] begin check local disk info of client
I0321 19:14:30.988206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:14:30.988213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fea00 0xc0003fea40]
E0321 19:14:33.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:14:33.409889  543705 memory.go:184] no items to output this cycle
I0321 19:14:33.410004  543705 cpu.go:275] no items to output this cycle
E0321 19:14:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:14:43.409789  543705 memory.go:191] Add success.
I0321 19:14:43.409805  543705 cpu.go:282] Add success.
I0321 19:14:43.420085  543705 net.go:648] Add success.
I0321 19:14:43.422768  543705 net.go:770] primary dev: ETH0
I0321 19:14:43.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:14:43.422797  543705 net.go:698] Add success.
I0321 19:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:14:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:14:53.409769  543705 memory.go:184] no items to output this cycle
I0321 19:14:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 19:15:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:15:03.409793  543705 memory.go:184] no items to output this cycle
I0321 19:15:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 19:15:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:15:13.409820  543705 memory.go:191] Add success.
I0321 19:15:13.409828  543705 cpu.go:282] Add success.
W0321 19:15:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:15:13.413111  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:15:13.413117  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:15:13.419942  543705 net.go:648] Add success.
I0321 19:15:13.421925  543705 net.go:770] primary dev: ETH0
I0321 19:15:13.421942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:15:13.421956  543705 net.go:698] Add success.
I0321 19:15:13.527469  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"08bac7e6-a6dc-4a3d-aca7-16f51f814db7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:15:13.527518  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:15:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:15:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 19:15:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:15:14.456617  543705 disk_worker.go:494] system disk:vda1
I0321 19:15:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:15:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:15:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:15:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:15:16.458109  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:15:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:15:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:15:23.409789  543705 memory.go:184] no items to output this cycle
I0321 19:15:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 19:15:30.989677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:15:30.992249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:15:30.992256  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039cb80 0xc00039cbc0]
E0321 19:15:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:15:33.409783  543705 memory.go:184] no items to output this cycle
I0321 19:15:33.409791  543705 cpu.go:275] no items to output this cycle
I0321 19:15:39.129337  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:15:39.129343  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:15:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:15:43.410577  543705 memory.go:191] Add success.
I0321 19:15:43.409817  543705 cpu.go:282] Add success.
I0321 19:15:43.420277  543705 net.go:648] Add success.
I0321 19:15:43.422788  543705 net.go:770] primary dev: ETH0
I0321 19:15:43.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:15:43.422814  543705 net.go:698] Add success.
I0321 19:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:15:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:15:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:15:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:15:53.409781  543705 memory.go:184] no items to output this cycle
I0321 19:15:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 19:16:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:16:03.409775  543705 memory.go:184] no items to output this cycle
I0321 19:16:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 19:16:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:16:13.409814  543705 memory.go:191] Add success.
I0321 19:16:13.409821  543705 cpu.go:282] Add success.
W0321 19:16:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:16:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:16:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:16:13.420163  543705 net.go:648] Add success.
I0321 19:16:13.422960  543705 net.go:770] primary dev: ETH0
I0321 19:16:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:16:13.422985  543705 net.go:698] Add success.
I0321 19:16:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:16:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:16:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 19:16:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:16:14.456509  543705 disk_worker.go:494] system disk:vda1
I0321 19:16:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:16:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:16:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:16:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:16:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:16:16.472487  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:16:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:16:23.409789  543705 memory.go:184] no items to output this cycle
I0321 19:16:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 19:16:30.993675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:16:30.996288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:16:30.996295  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395c00 0xc000395c40]
E0321 19:16:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:16:33.409775  543705 memory.go:184] no items to output this cycle
I0321 19:16:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 19:16:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:16:43.409899  543705 memory.go:191] Add success.
I0321 19:16:43.409933  543705 cpu.go:282] Add success.
I0321 19:16:43.419726  543705 net.go:648] Add success.
I0321 19:16:43.422488  543705 net.go:770] primary dev: ETH0
I0321 19:16:43.422500  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:16:43.422511  543705 net.go:698] Add success.
I0321 19:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:16:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:16:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:16:53.410261  543705 memory.go:184] no items to output this cycle
I0321 19:16:53.410266  543705 cpu.go:275] no items to output this cycle
E0321 19:17:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:17:03.409798  543705 memory.go:184] no items to output this cycle
I0321 19:17:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 19:17:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:17:13.409815  543705 memory.go:191] Add success.
W0321 19:17:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:17:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:17:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:17:13.409859  543705 cpu.go:282] Add success.
I0321 19:17:13.420483  543705 net.go:648] Add success.
I0321 19:17:13.423519  543705 net.go:770] primary dev: ETH0
I0321 19:17:13.423532  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:17:13.423543  543705 net.go:698] Add success.
I0321 19:17:13.453176  543705 event_worker.go:152] Polling the log file for events...
W0321 19:17:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:17:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 19:17:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:17:14.457101  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0321 19:17:14.457109  543705 disk_worker.go:494] system disk:vda1
E0321 19:17:14.457112  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:17:14.457119  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:17:14.457142  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:17:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:17:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:17:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:17:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:17:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:17:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:17:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:17:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:17:23.409794  543705 memory.go:184] no items to output this cycle
I0321 19:17:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 19:17:30.997744  543705 disk_info.go:125] begin check local disk info of client
I0321 19:17:31.000157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:17:31.000169  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ff40 0xc00046c000]
E0321 19:17:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:17:33.409792  543705 memory.go:184] no items to output this cycle
I0321 19:17:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 19:17:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:17:43.409788  543705 memory.go:191] Add success.
I0321 19:17:43.409807  543705 cpu.go:282] Add success.
I0321 19:17:43.419875  543705 net.go:648] Add success.
I0321 19:17:43.423098  543705 net.go:770] primary dev: ETH0
I0321 19:17:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:17:43.423125  543705 net.go:698] Add success.
I0321 19:17:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:17:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:17:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:17:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:17:53.409765  543705 memory.go:184] no items to output this cycle
I0321 19:17:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:18:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:18:03.409797  543705 memory.go:184] no items to output this cycle
I0321 19:18:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 19:18:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:18:13.409779  543705 memory.go:191] Add success.
W0321 19:18:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 19:18:13.409803  543705 cpu.go:282] Add success.
W0321 19:18:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:18:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:18:13.420060  543705 net.go:648] Add success.
I0321 19:18:13.422954  543705 net.go:770] primary dev: ETH0
I0321 19:18:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:18:13.422981  543705 net.go:698] Add success.
I0321 19:18:13.583145  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8cf46fbb-a08d-4435-9729-a8d5253c23cd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:18:13.583179  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:18:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:18:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:18:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 19:18:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:18:14.456560  543705 disk_worker.go:494] system disk:vda1
I0321 19:18:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:18:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:18:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:18:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:18:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:18:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:18:23.409807  543705 memory.go:184] no items to output this cycle
I0321 19:18:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 19:18:31.001676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:18:31.004145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:18:31.004154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6980 0xc0003b69c0]
E0321 19:18:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:18:33.409795  543705 memory.go:184] no items to output this cycle
I0321 19:18:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 19:18:39.129733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:18:39.129740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:18:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:18:43.410792  543705 memory.go:191] Add success.
I0321 19:18:43.409983  543705 cpu.go:282] Add success.
I0321 19:18:43.419713  543705 net.go:648] Add success.
I0321 19:18:43.422280  543705 net.go:770] primary dev: ETH0
I0321 19:18:43.422295  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:18:43.422309  543705 net.go:698] Add success.
I0321 19:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:18:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:18:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:18:53.409795  543705 memory.go:184] no items to output this cycle
I0321 19:18:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 19:19:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:19:03.409773  543705 memory.go:184] no items to output this cycle
I0321 19:19:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 19:19:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:19:13.409816  543705 cpu.go:282] Add success.
I0321 19:19:13.409833  543705 memory.go:191] Add success.
W0321 19:19:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:19:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:19:13.409894  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:19:13.420238  543705 net.go:648] Add success.
I0321 19:19:13.422903  543705 net.go:770] primary dev: ETH0
I0321 19:19:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:19:13.422930  543705 net.go:698] Add success.
I0321 19:19:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:19:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:19:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 19:19:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:19:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 19:19:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:19:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:19:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:19:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:19:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:19:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:19:23.409777  543705 memory.go:184] no items to output this cycle
I0321 19:19:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 19:19:31.005674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:19:31.008242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:19:31.008249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003adc80 0xc0003adcc0]
E0321 19:19:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:19:33.409783  543705 memory.go:184] no items to output this cycle
I0321 19:19:33.409786  543705 cpu.go:275] no items to output this cycle
E0321 19:19:43.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:19:43.409911  543705 cpu.go:282] Add success.
I0321 19:19:43.409919  543705 memory.go:191] Add success.
I0321 19:19:43.419711  543705 net.go:648] Add success.
I0321 19:19:43.422388  543705 net.go:770] primary dev: ETH0
I0321 19:19:43.422404  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:19:43.422427  543705 net.go:698] Add success.
I0321 19:19:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:19:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:19:53.410735  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:19:53.410754  543705 memory.go:184] no items to output this cycle
I0321 19:19:53.410764  543705 cpu.go:275] no items to output this cycle
E0321 19:20:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:20:03.409778  543705 memory.go:184] no items to output this cycle
I0321 19:20:03.409778  543705 cpu.go:275] no items to output this cycle
E0321 19:20:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:20:13.409783  543705 memory.go:191] Add success.
I0321 19:20:13.409785  543705 cpu.go:282] Add success.
W0321 19:20:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:20:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:20:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:20:13.420194  543705 net.go:648] Add success.
I0321 19:20:13.422658  543705 net.go:770] primary dev: ETH0
I0321 19:20:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:20:13.422683  543705 net.go:698] Add success.
I0321 19:20:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:20:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:20:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0321 19:20:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:20:14.456634  543705 disk_worker.go:494] system disk:vda1
I0321 19:20:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:20:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:20:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:20:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:20:16.472563  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:20:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:20:23.409788  543705 memory.go:184] no items to output this cycle
I0321 19:20:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 19:20:31.009679  543705 disk_info.go:125] begin check local disk info of client
I0321 19:20:31.012169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:20:31.012175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4100 0xc0003d4140]
E0321 19:20:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:20:33.409794  543705 memory.go:184] no items to output this cycle
I0321 19:20:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 19:20:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:20:43.409868  543705 memory.go:191] Add success.
I0321 19:20:43.409913  543705 cpu.go:282] Add success.
I0321 19:20:43.419721  543705 net.go:648] Add success.
I0321 19:20:43.422317  543705 net.go:770] primary dev: ETH0
I0321 19:20:43.422333  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:20:43.422346  543705 net.go:698] Add success.
I0321 19:20:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:20:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:20:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:20:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:20:53.409776  543705 memory.go:184] no items to output this cycle
I0321 19:20:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 19:21:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:21:03.409803  543705 memory.go:184] no items to output this cycle
I0321 19:21:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 19:21:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:21:13.409794  543705 memory.go:191] Add success.
I0321 19:21:13.409806  543705 cpu.go:282] Add success.
W0321 19:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:21:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:21:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:21:13.420127  543705 net.go:648] Add success.
I0321 19:21:13.422982  543705 net.go:770] primary dev: ETH0
I0321 19:21:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:21:13.423007  543705 net.go:698] Add success.
I0321 19:21:13.463788  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ba62653-624f-4069-bfe8-2b401e2edc81","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:21:13.463824  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:21:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:21:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:21:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0321 19:21:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:21:14.456890  543705 disk_worker.go:494] system disk:vda1
I0321 19:21:14.456927  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:21:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:21:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:21:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:21:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:21:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:21:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:21:23.409784  543705 memory.go:184] no items to output this cycle
I0321 19:21:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 19:21:31.013675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:21:31.016176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:21:31.016182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b07c0 0xc0002b0800]
E0321 19:21:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:21:33.409783  543705 memory.go:184] no items to output this cycle
I0321 19:21:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 19:21:39.133349  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:21:39.133356  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:21:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:21:43.410604  543705 memory.go:191] Add success.
I0321 19:21:43.409796  543705 cpu.go:282] Add success.
I0321 19:21:43.420309  543705 net.go:648] Add success.
I0321 19:21:43.422951  543705 net.go:770] primary dev: ETH0
I0321 19:21:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:21:43.422976  543705 net.go:698] Add success.
I0321 19:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:21:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:21:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:21:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:21:53.410386  543705 cpu.go:275] no items to output this cycle
I0321 19:21:53.410391  543705 memory.go:184] no items to output this cycle
E0321 19:22:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:22:03.409779  543705 cpu.go:275] no items to output this cycle
I0321 19:22:03.409787  543705 memory.go:184] no items to output this cycle
E0321 19:22:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:22:13.409807  543705 memory.go:191] Add success.
I0321 19:22:13.409817  543705 cpu.go:282] Add success.
W0321 19:22:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:22:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:22:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:22:13.420060  543705 net.go:648] Add success.
I0321 19:22:13.422863  543705 net.go:770] primary dev: ETH0
I0321 19:22:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:22:13.422887  543705 net.go:698] Add success.
W0321 19:22:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:22:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 19:22:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:22:14.456836  543705 disk_worker.go:494] system disk:vda1
I0321 19:22:14.456880  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:22:14.457205  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:22:14.457213  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:22:14.457219  543705 custom_config.go:64] query custom config with name: gpu
E0321 19:22:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:22:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:22:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:22:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:22:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:22:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:22:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:22:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:22:23.409784  543705 memory.go:184] no items to output this cycle
I0321 19:22:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 19:22:31.017686  543705 disk_info.go:125] begin check local disk info of client
I0321 19:22:31.020164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:22:31.020171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bf300 0xc0004bf340]
E0321 19:22:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:22:33.409803  543705 memory.go:184] no items to output this cycle
I0321 19:22:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 19:22:43.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:22:43.409938  543705 memory.go:191] Add success.
I0321 19:22:43.410092  543705 cpu.go:282] Add success.
I0321 19:22:43.419752  543705 net.go:648] Add success.
I0321 19:22:43.422288  543705 net.go:770] primary dev: ETH0
I0321 19:22:43.422303  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:22:43.422316  543705 net.go:698] Add success.
I0321 19:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:22:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:22:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:22:53.409785  543705 cpu.go:275] no items to output this cycle
I0321 19:22:53.409794  543705 memory.go:184] no items to output this cycle
E0321 19:23:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:23:03.409780  543705 memory.go:184] no items to output this cycle
I0321 19:23:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 19:23:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:23:13.409808  543705 memory.go:191] Add success.
I0321 19:23:13.409809  543705 cpu.go:282] Add success.
W0321 19:23:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:23:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:23:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:23:13.420244  543705 net.go:648] Add success.
I0321 19:23:13.423111  543705 net.go:770] primary dev: ETH0
I0321 19:23:13.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:23:13.423139  543705 net.go:698] Add success.
I0321 19:23:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:23:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:23:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 19:23:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:23:14.456532  543705 disk_worker.go:494] system disk:vda1
I0321 19:23:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:23:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:23:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:23:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:23:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:23:23.409798  543705 memory.go:184] no items to output this cycle
I0321 19:23:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 19:23:31.021674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:23:31.024194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:23:31.024200  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492d00 0xc000492d40]
E0321 19:23:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:23:33.409775  543705 memory.go:184] no items to output this cycle
I0321 19:23:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 19:23:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:23:43.409915  543705 memory.go:191] Add success.
I0321 19:23:43.409971  543705 cpu.go:282] Add success.
I0321 19:23:43.419738  543705 net.go:648] Add success.
I0321 19:23:43.422531  543705 net.go:770] primary dev: ETH0
I0321 19:23:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:23:43.422557  543705 net.go:698] Add success.
I0321 19:23:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:23:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:23:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:23:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:23:53.409783  543705 memory.go:184] no items to output this cycle
I0321 19:23:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:24:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:24:03.409810  543705 memory.go:184] no items to output this cycle
I0321 19:24:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 19:24:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:24:13.409796  543705 memory.go:191] Add success.
I0321 19:24:13.409805  543705 cpu.go:282] Add success.
W0321 19:24:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:24:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:24:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:24:13.420171  543705 net.go:648] Add success.
I0321 19:24:13.423004  543705 net.go:770] primary dev: ETH0
I0321 19:24:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:24:13.423031  543705 net.go:698] Add success.
I0321 19:24:13.469568  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6fa5e234-c2d4-4eb7-aaaf-5c238cb85f46","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:24:13.469600  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:24:14.453939  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:24:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:24:14.455256  543705 disk_worker.go:708] disk space is not compliant
W0321 19:24:14.455260  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:24:14.456701  543705 disk_worker.go:494] system disk:vda1
I0321 19:24:14.456734  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:24:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:24:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:24:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:24:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:24:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:24:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:24:23.409796  543705 memory.go:184] no items to output this cycle
I0321 19:24:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 19:24:31.025675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:24:31.028261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:24:31.028267  543705 disk_info.go:196] parse disk info done, disk is : [0xc000246540 0xc000246580]
E0321 19:24:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:24:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 19:24:33.409794  543705 memory.go:184] no items to output this cycle
I0321 19:24:39.133747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:24:39.133754  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:24:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:24:43.410550  543705 memory.go:191] Add success.
I0321 19:24:43.409823  543705 cpu.go:282] Add success.
I0321 19:24:43.420304  543705 net.go:648] Add success.
I0321 19:24:43.422920  543705 net.go:770] primary dev: ETH0
I0321 19:24:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:24:43.422950  543705 net.go:698] Add success.
I0321 19:24:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:24:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:24:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:24:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:24:53.409804  543705 memory.go:184] no items to output this cycle
I0321 19:24:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 19:25:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:25:03.409785  543705 memory.go:184] no items to output this cycle
I0321 19:25:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 19:25:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:25:13.409797  543705 memory.go:191] Add success.
I0321 19:25:13.409799  543705 cpu.go:282] Add success.
W0321 19:25:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:25:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:25:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:25:13.420174  543705 net.go:648] Add success.
I0321 19:25:13.423056  543705 net.go:770] primary dev: ETH0
I0321 19:25:13.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:25:13.423080  543705 net.go:698] Add success.
I0321 19:25:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:25:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:25:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 19:25:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:25:14.456558  543705 disk_worker.go:494] system disk:vda1
I0321 19:25:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:25:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:25:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:25:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:25:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:25:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:25:23.409791  543705 memory.go:184] no items to output this cycle
I0321 19:25:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 19:25:31.029688  543705 disk_info.go:125] begin check local disk info of client
I0321 19:25:31.032230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:25:31.032237  543705 disk_info.go:196] parse disk info done, disk is : [0xc000535540 0xc000535580]
E0321 19:25:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:25:33.409806  543705 memory.go:184] no items to output this cycle
I0321 19:25:33.409823  543705 cpu.go:275] no items to output this cycle
E0321 19:25:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:25:43.409779  543705 memory.go:191] Add success.
I0321 19:25:43.409796  543705 cpu.go:282] Add success.
I0321 19:25:43.420028  543705 net.go:648] Add success.
I0321 19:25:43.423761  543705 net.go:770] primary dev: ETH0
I0321 19:25:43.423774  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:25:43.423786  543705 net.go:698] Add success.
I0321 19:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:25:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:25:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:25:53.409761  543705 memory.go:184] no items to output this cycle
I0321 19:25:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 19:26:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:26:03.409786  543705 memory.go:184] no items to output this cycle
I0321 19:26:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 19:26:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:26:13.409809  543705 memory.go:191] Add success.
I0321 19:26:13.409819  543705 cpu.go:282] Add success.
W0321 19:26:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:26:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:26:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:26:13.420133  543705 net.go:648] Add success.
I0321 19:26:13.422690  543705 net.go:770] primary dev: ETH0
I0321 19:26:13.422707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:26:13.422721  543705 net.go:698] Add success.
I0321 19:26:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:26:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:26:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 19:26:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:26:14.456892  543705 disk_worker.go:494] system disk:vda1
I0321 19:26:14.456924  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:26:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:26:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:26:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:26:23.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:26:23.409854  543705 memory.go:184] no items to output this cycle
I0321 19:26:23.409929  543705 cpu.go:275] no items to output this cycle
I0321 19:26:31.033690  543705 disk_info.go:125] begin check local disk info of client
I0321 19:26:31.036265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:26:31.036273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0321 19:26:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:26:33.409793  543705 memory.go:184] no items to output this cycle
I0321 19:26:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 19:26:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:26:43.409809  543705 memory.go:191] Add success.
I0321 19:26:43.409814  543705 cpu.go:282] Add success.
I0321 19:26:43.420042  543705 net.go:648] Add success.
I0321 19:26:43.423224  543705 net.go:770] primary dev: ETH0
I0321 19:26:43.423239  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:26:43.423253  543705 net.go:698] Add success.
I0321 19:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:26:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:26:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:26:53.409759  543705 memory.go:184] no items to output this cycle
I0321 19:26:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 19:27:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:27:03.409796  543705 memory.go:184] no items to output this cycle
I0321 19:27:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 19:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:27:13.409811  543705 memory.go:191] Add success.
I0321 19:27:13.409819  543705 cpu.go:282] Add success.
W0321 19:27:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:27:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:27:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:27:13.420061  543705 net.go:648] Add success.
I0321 19:27:13.422660  543705 net.go:770] primary dev: ETH0
I0321 19:27:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:27:13.422690  543705 net.go:698] Add success.
I0321 19:27:13.428865  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 19:27:13.453049  543705 event_worker.go:152] Polling the log file for events...
I0321 19:27:13.468968  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"04360026-809b-430a-8cd9-2a899c72a2b8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:27:13.469011  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 19:27:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:27:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0321 19:27:14.455229  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:27:14.456043  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:27:14.456054  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:27:14.456061  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:27:14.456640  543705 disk_worker.go:494] system disk:vda1
I0321 19:27:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:27:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:27:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:27:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:27:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:27:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:27:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:27:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:27:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 19:27:23.409795  543705 memory.go:184] no items to output this cycle
I0321 19:27:31.037675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:27:31.040196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:27:31.040203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8700 0xc0004d8780]
E0321 19:27:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:27:33.409786  543705 memory.go:184] no items to output this cycle
I0321 19:27:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 19:27:39.133901  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:27:39.133908  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:27:43.410676  543705 memory.go:191] Add success.
I0321 19:27:43.409791  543705 cpu.go:282] Add success.
I0321 19:27:43.420381  543705 net.go:648] Add success.
I0321 19:27:43.423353  543705 net.go:770] primary dev: ETH0
I0321 19:27:43.423366  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:27:43.423379  543705 net.go:698] Add success.
I0321 19:27:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:27:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:27:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:27:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:27:53.409793  543705 memory.go:184] no items to output this cycle
I0321 19:27:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 19:28:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:28:03.409774  543705 memory.go:184] no items to output this cycle
I0321 19:28:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 19:28:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:28:13.409790  543705 memory.go:191] Add success.
I0321 19:28:13.409795  543705 cpu.go:282] Add success.
W0321 19:28:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:28:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:28:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:28:13.420089  543705 net.go:648] Add success.
I0321 19:28:13.423262  543705 net.go:770] primary dev: ETH0
I0321 19:28:13.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:28:13.423288  543705 net.go:698] Add success.
I0321 19:28:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:28:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:28:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 19:28:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:28:14.456865  543705 disk_worker.go:494] system disk:vda1
I0321 19:28:14.456897  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:28:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:28:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:28:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:28:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:28:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:28:23.409807  543705 memory.go:184] no items to output this cycle
I0321 19:28:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 19:28:31.041676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:28:31.044237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:28:31.044244  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9000 0xc0004d9040]
E0321 19:28:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:28:33.409784  543705 memory.go:184] no items to output this cycle
I0321 19:28:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 19:28:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:28:43.409796  543705 cpu.go:282] Add success.
I0321 19:28:43.409803  543705 memory.go:191] Add success.
I0321 19:28:43.419961  543705 net.go:648] Add success.
I0321 19:28:43.422617  543705 net.go:770] primary dev: ETH0
I0321 19:28:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:28:43.422644  543705 net.go:698] Add success.
I0321 19:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:28:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:28:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:28:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:28:53.409776  543705 memory.go:184] no items to output this cycle
I0321 19:28:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 19:29:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:29:03.409769  543705 memory.go:184] no items to output this cycle
I0321 19:29:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 19:29:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:29:13.409795  543705 cpu.go:282] Add success.
I0321 19:29:13.409805  543705 memory.go:191] Add success.
W0321 19:29:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:29:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:29:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:29:13.420046  543705 net.go:648] Add success.
I0321 19:29:13.422970  543705 net.go:770] primary dev: ETH0
I0321 19:29:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:29:13.422995  543705 net.go:698] Add success.
I0321 19:29:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:29:14.455409  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:29:14.455421  543705 disk_worker.go:708] disk space is not compliant
W0321 19:29:14.455424  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:29:14.457128  543705 disk_worker.go:494] system disk:vda1
I0321 19:29:14.457176  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:29:15.456008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:29:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:29:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:29:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:29:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:29:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:29:23.409787  543705 memory.go:184] no items to output this cycle
I0321 19:29:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 19:29:31.045678  543705 disk_info.go:125] begin check local disk info of client
I0321 19:29:31.048194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:29:31.048200  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac80 0xc00007acc0]
E0321 19:29:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:29:33.409795  543705 memory.go:184] no items to output this cycle
I0321 19:29:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 19:29:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:29:43.409823  543705 memory.go:191] Add success.
I0321 19:29:43.409824  543705 cpu.go:282] Add success.
I0321 19:29:43.419987  543705 net.go:648] Add success.
I0321 19:29:43.422918  543705 net.go:770] primary dev: ETH0
I0321 19:29:43.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:29:43.422943  543705 net.go:698] Add success.
I0321 19:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:29:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:29:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:29:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:29:53.409776  543705 cpu.go:275] no items to output this cycle
I0321 19:29:53.409786  543705 memory.go:184] no items to output this cycle
E0321 19:30:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:30:03.409764  543705 memory.go:184] no items to output this cycle
I0321 19:30:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 19:30:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:30:13.409810  543705 memory.go:191] Add success.
I0321 19:30:13.409815  543705 cpu.go:282] Add success.
W0321 19:30:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:30:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:30:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:30:13.420143  543705 net.go:648] Add success.
I0321 19:30:13.422555  543705 net.go:770] primary dev: ETH0
I0321 19:30:13.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:30:13.422579  543705 net.go:698] Add success.
I0321 19:30:13.468905  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c45a3bf-e664-49ea-9282-74d972a2b173","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:30:13.468940  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:30:14.455416  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:30:14.455484  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:30:14.455634  543705 disk_worker.go:708] disk space is not compliant
W0321 19:30:14.455639  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:30:14.457489  543705 disk_worker.go:494] system disk:vda1
I0321 19:30:14.457520  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:30:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:30:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:30:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:30:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:30:23.409804  543705 memory.go:184] no items to output this cycle
I0321 19:30:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 19:30:31.049680  543705 disk_info.go:125] begin check local disk info of client
I0321 19:30:31.052245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:30:31.052251  543705 disk_info.go:196] parse disk info done, disk is : [0xc000572e80 0xc000572ec0]
E0321 19:30:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:30:33.409790  543705 memory.go:184] no items to output this cycle
I0321 19:30:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 19:30:39.134050  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:30:39.134057  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:30:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:30:43.410813  543705 memory.go:191] Add success.
I0321 19:30:43.409816  543705 cpu.go:282] Add success.
I0321 19:30:43.420532  543705 net.go:648] Add success.
I0321 19:30:43.423032  543705 net.go:770] primary dev: ETH0
I0321 19:30:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:30:43.423058  543705 net.go:698] Add success.
I0321 19:30:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:30:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:30:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:30:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:30:53.409763  543705 memory.go:184] no items to output this cycle
I0321 19:30:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 19:31:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:31:03.409798  543705 memory.go:184] no items to output this cycle
I0321 19:31:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 19:31:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:31:13.409820  543705 memory.go:191] Add success.
I0321 19:31:13.409823  543705 cpu.go:282] Add success.
W0321 19:31:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:31:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:31:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:31:13.419887  543705 net.go:770] primary dev: ETH0
I0321 19:31:13.419900  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:31:13.419912  543705 net.go:698] Add success.
I0321 19:31:13.420465  543705 net.go:648] Add success.
I0321 19:31:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:31:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:31:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0321 19:31:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:31:14.456680  543705 disk_worker.go:494] system disk:vda1
I0321 19:31:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:31:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:31:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:31:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:31:23.409809  543705 memory.go:184] no items to output this cycle
I0321 19:31:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 19:31:31.053676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:31:31.056269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:31:31.056275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4880 0xc0000c48c0]
E0321 19:31:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:31:33.409774  543705 cpu.go:275] no items to output this cycle
I0321 19:31:33.409784  543705 memory.go:184] no items to output this cycle
E0321 19:31:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:31:43.409795  543705 memory.go:191] Add success.
I0321 19:31:43.409796  543705 cpu.go:282] Add success.
I0321 19:31:43.419941  543705 net.go:648] Add success.
I0321 19:31:43.422578  543705 net.go:770] primary dev: ETH0
I0321 19:31:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:31:43.422603  543705 net.go:698] Add success.
I0321 19:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:31:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:31:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:31:53.409777  543705 memory.go:184] no items to output this cycle
I0321 19:31:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 19:32:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:32:03.409768  543705 memory.go:184] no items to output this cycle
I0321 19:32:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 19:32:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:32:13.409807  543705 memory.go:191] Add success.
I0321 19:32:13.409817  543705 cpu.go:282] Add success.
W0321 19:32:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:32:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:32:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:32:13.420352  543705 net.go:648] Add success.
I0321 19:32:13.423446  543705 net.go:770] primary dev: ETH0
I0321 19:32:13.423458  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:32:13.423470  543705 net.go:698] Add success.
W0321 19:32:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:32:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0321 19:32:14.455242  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:32:14.457141  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:32:14.457151  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:32:14.457157  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:32:14.458112  543705 disk_worker.go:494] system disk:vda1
I0321 19:32:14.458157  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:32:15.456885  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:32:15.456896  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:32:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:32:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:32:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:32:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:32:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:32:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:32:23.409781  543705 memory.go:184] no items to output this cycle
I0321 19:32:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 19:32:31.057677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:32:31.060242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:32:31.060248  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af00 0xc00007af40]
E0321 19:32:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:32:33.409795  543705 memory.go:184] no items to output this cycle
I0321 19:32:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 19:32:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:32:43.409775  543705 memory.go:191] Add success.
I0321 19:32:43.409805  543705 cpu.go:282] Add success.
I0321 19:32:43.419860  543705 net.go:648] Add success.
I0321 19:32:43.422298  543705 net.go:770] primary dev: ETH0
I0321 19:32:43.422311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:32:43.422327  543705 net.go:698] Add success.
I0321 19:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:32:53.410315  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:32:53.410331  543705 memory.go:184] no items to output this cycle
I0321 19:32:53.410357  543705 cpu.go:275] no items to output this cycle
E0321 19:33:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:33:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 19:33:03.409787  543705 memory.go:184] no items to output this cycle
E0321 19:33:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:33:13.409809  543705 memory.go:191] Add success.
I0321 19:33:13.409812  543705 cpu.go:282] Add success.
W0321 19:33:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:33:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:33:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:33:13.420047  543705 net.go:648] Add success.
I0321 19:33:13.423092  543705 net.go:770] primary dev: ETH0
I0321 19:33:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:33:13.423116  543705 net.go:698] Add success.
I0321 19:33:13.464483  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d302e841-4ee6-4428-910b-e5199b2fa3b0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:33:13.464525  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:33:14.453984  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:33:14.454183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:33:14.454272  543705 disk_worker.go:708] disk space is not compliant
W0321 19:33:14.454276  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:33:14.456185  543705 disk_worker.go:494] system disk:vda1
I0321 19:33:14.456217  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:33:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:33:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:33:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:33:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:33:16.472475  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:33:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:33:23.409781  543705 memory.go:184] no items to output this cycle
I0321 19:33:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 19:33:31.061675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:33:31.064303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:33:31.064309  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9b80 0xc0004d9bc0]
E0321 19:33:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:33:33.409775  543705 cpu.go:275] no items to output this cycle
I0321 19:33:33.409781  543705 memory.go:184] no items to output this cycle
I0321 19:33:39.134196  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:33:39.134203  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:33:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:33:43.410563  543705 memory.go:191] Add success.
I0321 19:33:43.409814  543705 cpu.go:282] Add success.
I0321 19:33:43.420262  543705 net.go:648] Add success.
I0321 19:33:43.422670  543705 net.go:770] primary dev: ETH0
I0321 19:33:43.422683  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:33:43.422696  543705 net.go:698] Add success.
I0321 19:33:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:33:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:33:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:33:53.409771  543705 memory.go:184] no items to output this cycle
I0321 19:33:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 19:34:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:34:03.409803  543705 memory.go:184] no items to output this cycle
I0321 19:34:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 19:34:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:34:13.409808  543705 memory.go:191] Add success.
I0321 19:34:13.409820  543705 cpu.go:282] Add success.
W0321 19:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:34:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:34:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:34:13.420145  543705 net.go:648] Add success.
I0321 19:34:13.423043  543705 net.go:770] primary dev: ETH0
I0321 19:34:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:34:13.423070  543705 net.go:698] Add success.
I0321 19:34:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:34:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:34:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 19:34:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:34:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 19:34:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:34:15.456037  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:34:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:34:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:34:16.472484  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:34:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:34:23.409783  543705 memory.go:184] no items to output this cycle
I0321 19:34:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 19:34:31.065688  543705 disk_info.go:125] begin check local disk info of client
I0321 19:34:31.068190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:34:31.068198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9f40 0xc00007a000]
E0321 19:34:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:34:33.409803  543705 memory.go:184] no items to output this cycle
I0321 19:34:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 19:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:34:43.409787  543705 memory.go:191] Add success.
I0321 19:34:43.409807  543705 cpu.go:282] Add success.
I0321 19:34:43.419999  543705 net.go:648] Add success.
I0321 19:34:43.422551  543705 net.go:770] primary dev: ETH0
I0321 19:34:43.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:34:43.422588  543705 net.go:698] Add success.
I0321 19:34:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:34:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:34:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:34:53.409778  543705 memory.go:184] no items to output this cycle
I0321 19:34:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 19:35:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:35:03.409803  543705 memory.go:184] no items to output this cycle
I0321 19:35:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 19:35:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:35:13.409817  543705 memory.go:191] Add success.
I0321 19:35:13.409832  543705 cpu.go:282] Add success.
W0321 19:35:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:35:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:35:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:35:13.420106  543705 net.go:648] Add success.
I0321 19:35:13.422935  543705 net.go:770] primary dev: ETH0
I0321 19:35:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:35:13.422959  543705 net.go:698] Add success.
I0321 19:35:14.455090  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:35:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:35:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0321 19:35:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:35:14.456671  543705 disk_worker.go:494] system disk:vda1
I0321 19:35:14.456708  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:35:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:35:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:35:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:35:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:35:23.409786  543705 memory.go:184] no items to output this cycle
I0321 19:35:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 19:35:31.069675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:35:31.072296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:35:31.072303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9e80 0xc0004d9ec0]
E0321 19:35:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:35:33.409802  543705 memory.go:184] no items to output this cycle
I0321 19:35:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 19:35:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:35:43.409784  543705 memory.go:191] Add success.
I0321 19:35:43.409814  543705 cpu.go:282] Add success.
I0321 19:35:43.419885  543705 net.go:648] Add success.
I0321 19:35:43.422345  543705 net.go:770] primary dev: ETH0
I0321 19:35:43.422360  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:35:43.422374  543705 net.go:698] Add success.
I0321 19:35:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:35:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:35:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:35:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:35:53.409809  543705 memory.go:184] no items to output this cycle
I0321 19:35:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 19:36:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:36:03.409779  543705 memory.go:184] no items to output this cycle
I0321 19:36:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 19:36:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:36:13.409821  543705 memory.go:191] Add success.
I0321 19:36:13.409828  543705 cpu.go:282] Add success.
W0321 19:36:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:36:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:36:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:36:13.420319  543705 net.go:648] Add success.
I0321 19:36:13.423342  543705 net.go:770] primary dev: ETH0
I0321 19:36:13.423357  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:36:13.423371  543705 net.go:698] Add success.
I0321 19:36:14.011178  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9d8768d2-b176-4953-9c50-cfaa2e2061d2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:36:14.011215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:36:14.454677  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:36:14.454839  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:36:14.454927  543705 disk_worker.go:708] disk space is not compliant
W0321 19:36:14.454930  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:36:14.456466  543705 disk_worker.go:494] system disk:vda1
I0321 19:36:14.456495  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:36:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:36:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:36:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:36:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:36:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:36:23.409821  543705 memory.go:184] no items to output this cycle
I0321 19:36:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 19:36:31.073675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:36:31.076263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:36:31.076269  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba800 0xc0002ba840]
E0321 19:36:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:36:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 19:36:33.409794  543705 memory.go:184] no items to output this cycle
I0321 19:36:39.134346  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:36:39.134354  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:36:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:36:43.410680  543705 memory.go:191] Add success.
I0321 19:36:43.409802  543705 cpu.go:282] Add success.
I0321 19:36:43.420372  543705 net.go:648] Add success.
I0321 19:36:43.422887  543705 net.go:770] primary dev: ETH0
I0321 19:36:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:36:43.422912  543705 net.go:698] Add success.
I0321 19:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:36:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:36:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:36:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:36:53.410376  543705 memory.go:184] no items to output this cycle
I0321 19:36:53.410404  543705 cpu.go:275] no items to output this cycle
E0321 19:37:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:37:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 19:37:03.409794  543705 memory.go:184] no items to output this cycle
E0321 19:37:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:37:13.409824  543705 memory.go:191] Add success.
I0321 19:37:13.409832  543705 cpu.go:282] Add success.
W0321 19:37:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:37:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:37:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:37:13.420145  543705 net.go:648] Add success.
I0321 19:37:13.422864  543705 net.go:770] primary dev: ETH0
I0321 19:37:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:37:13.422891  543705 net.go:698] Add success.
I0321 19:37:13.453426  543705 event_worker.go:152] Polling the log file for events...
W0321 19:37:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:37:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0321 19:37:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:37:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 19:37:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:37:14.457485  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:37:14.457495  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:37:14.457502  543705 custom_config.go:64] query custom config with name: gpu
E0321 19:37:15.457046  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:37:15.457061  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:37:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:37:16.458009  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:37:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:37:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:37:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:37:23.410400  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:37:23.410418  543705 memory.go:184] no items to output this cycle
I0321 19:37:23.410420  543705 cpu.go:275] no items to output this cycle
I0321 19:37:31.077667  543705 disk_info.go:125] begin check local disk info of client
I0321 19:37:31.080241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:37:31.080248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1b40 0xc0003e1b80]
E0321 19:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:37:33.409808  543705 memory.go:184] no items to output this cycle
I0321 19:37:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 19:37:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:37:43.409801  543705 memory.go:191] Add success.
I0321 19:37:43.409802  543705 cpu.go:282] Add success.
I0321 19:37:43.420093  543705 net.go:648] Add success.
I0321 19:37:43.422605  543705 net.go:770] primary dev: ETH0
I0321 19:37:43.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:37:43.422639  543705 net.go:698] Add success.
I0321 19:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:37:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:37:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:37:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:37:53.409768  543705 memory.go:184] no items to output this cycle
I0321 19:37:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:38:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:38:03.409798  543705 memory.go:184] no items to output this cycle
I0321 19:38:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 19:38:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:38:13.409780  543705 memory.go:191] Add success.
I0321 19:38:13.409800  543705 cpu.go:282] Add success.
W0321 19:38:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:38:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:38:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:38:13.420093  543705 net.go:648] Add success.
I0321 19:38:13.422958  543705 net.go:770] primary dev: ETH0
I0321 19:38:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:38:13.422985  543705 net.go:698] Add success.
I0321 19:38:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:38:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:38:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 19:38:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:38:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 19:38:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:38:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:38:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:38:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:38:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:38:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:38:23.409786  543705 memory.go:184] no items to output this cycle
I0321 19:38:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 19:38:31.081674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:38:31.084241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:38:31.084248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf980 0xc0002bf9c0]
E0321 19:38:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:38:33.409791  543705 memory.go:184] no items to output this cycle
I0321 19:38:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 19:38:43.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:38:43.409907  543705 memory.go:191] Add success.
I0321 19:38:43.410103  543705 cpu.go:282] Add success.
I0321 19:38:43.419716  543705 net.go:648] Add success.
I0321 19:38:43.422760  543705 net.go:770] primary dev: ETH0
I0321 19:38:43.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:38:43.422784  543705 net.go:698] Add success.
I0321 19:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:38:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:38:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:38:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:38:53.409795  543705 memory.go:184] no items to output this cycle
I0321 19:38:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 19:39:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:39:03.409781  543705 cpu.go:275] no items to output this cycle
I0321 19:39:03.409790  543705 memory.go:184] no items to output this cycle
E0321 19:39:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:39:13.409820  543705 memory.go:191] Add success.
I0321 19:39:13.409823  543705 cpu.go:282] Add success.
W0321 19:39:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:39:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:39:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:39:13.420612  543705 net.go:648] Add success.
I0321 19:39:13.423315  543705 net.go:770] primary dev: ETH0
I0321 19:39:13.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:39:13.423340  543705 net.go:698] Add success.
I0321 19:39:13.467850  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9be9de10-126d-40e0-9182-401d6db78656","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:39:13.467882  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:39:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:39:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:39:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 19:39:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:39:14.456798  543705 disk_worker.go:494] system disk:vda1
I0321 19:39:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:39:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:39:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:39:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:39:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:39:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:39:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:39:23.409806  543705 memory.go:184] no items to output this cycle
I0321 19:39:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 19:39:31.085677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:39:31.088261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:39:31.088267  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033b0c0 0xc00033b100]
E0321 19:39:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:39:33.409799  543705 memory.go:184] no items to output this cycle
I0321 19:39:33.409811  543705 cpu.go:275] no items to output this cycle
I0321 19:39:39.134499  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:39:39.134506  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:39:43.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:39:43.410858  543705 memory.go:191] Add success.
I0321 19:39:43.409963  543705 cpu.go:282] Add success.
I0321 19:39:43.419762  543705 net.go:648] Add success.
I0321 19:39:43.422627  543705 net.go:770] primary dev: ETH0
I0321 19:39:43.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:39:43.422656  543705 net.go:698] Add success.
I0321 19:39:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:39:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:39:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:39:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:39:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 19:39:53.409790  543705 memory.go:184] no items to output this cycle
E0321 19:40:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:40:03.409791  543705 memory.go:184] no items to output this cycle
I0321 19:40:03.409804  543705 cpu.go:275] no items to output this cycle
W0321 19:40:13.409705  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:40:13.409721  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:40:13.409726  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0321 19:40:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:40:13.409820  543705 memory.go:191] Add success.
I0321 19:40:13.409828  543705 cpu.go:282] Add success.
I0321 19:40:13.420065  543705 net.go:648] Add success.
I0321 19:40:13.422771  543705 net.go:770] primary dev: ETH0
I0321 19:40:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:40:13.422801  543705 net.go:698] Add success.
I0321 19:40:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:40:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:40:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 19:40:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:40:14.456568  543705 disk_worker.go:494] system disk:vda1
I0321 19:40:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:40:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:40:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:40:16.472513  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:40:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:40:23.409811  543705 memory.go:184] no items to output this cycle
I0321 19:40:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 19:40:31.089675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:40:31.092260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:40:31.092267  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3780 0xc0003e37c0]
E0321 19:40:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:40:33.409800  543705 memory.go:184] no items to output this cycle
I0321 19:40:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 19:40:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:40:43.409813  543705 memory.go:191] Add success.
I0321 19:40:43.409821  543705 cpu.go:282] Add success.
I0321 19:40:43.420232  543705 net.go:648] Add success.
I0321 19:40:43.423214  543705 net.go:770] primary dev: ETH0
I0321 19:40:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:40:43.423239  543705 net.go:698] Add success.
I0321 19:40:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:40:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:40:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:40:53.409793  543705 memory.go:184] no items to output this cycle
I0321 19:40:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 19:41:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:41:03.409779  543705 memory.go:184] no items to output this cycle
I0321 19:41:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 19:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:41:13.409791  543705 memory.go:191] Add success.
I0321 19:41:13.409808  543705 cpu.go:282] Add success.
W0321 19:41:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:41:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:41:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:41:13.420162  543705 net.go:648] Add success.
I0321 19:41:13.422775  543705 net.go:770] primary dev: ETH0
I0321 19:41:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:41:13.422800  543705 net.go:698] Add success.
I0321 19:41:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:41:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:41:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 19:41:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:41:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 19:41:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:41:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:41:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:41:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:41:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:41:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:41:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 19:41:23.409796  543705 memory.go:184] no items to output this cycle
I0321 19:41:31.093673  543705 disk_info.go:125] begin check local disk info of client
I0321 19:41:31.096198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:41:31.096204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002913c0 0xc000291400]
E0321 19:41:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:41:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 19:41:33.409791  543705 memory.go:184] no items to output this cycle
E0321 19:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:41:43.409812  543705 memory.go:191] Add success.
I0321 19:41:43.409823  543705 cpu.go:282] Add success.
I0321 19:41:43.420011  543705 net.go:648] Add success.
I0321 19:41:43.422701  543705 net.go:770] primary dev: ETH0
I0321 19:41:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:41:43.422730  543705 net.go:698] Add success.
I0321 19:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:41:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:41:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:41:53.409781  543705 memory.go:184] no items to output this cycle
I0321 19:41:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 19:42:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:42:03.409784  543705 memory.go:184] no items to output this cycle
I0321 19:42:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 19:42:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:42:13.409802  543705 memory.go:191] Add success.
I0321 19:42:13.409803  543705 cpu.go:282] Add success.
W0321 19:42:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:42:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:42:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:42:13.420127  543705 net.go:648] Add success.
I0321 19:42:13.423036  543705 net.go:770] primary dev: ETH0
I0321 19:42:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:42:13.423065  543705 net.go:698] Add success.
I0321 19:42:13.468467  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fffffc0-7c25-4738-88c7-7199bf070c36","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:42:13.468500  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 19:42:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:42:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0321 19:42:14.455239  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:42:14.455923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:42:14.455932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:42:14.455938  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:42:14.456853  543705 disk_worker.go:494] system disk:vda1
I0321 19:42:14.456898  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:42:15.456940  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:42:15.456953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:42:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:42:16.457979  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:42:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:42:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:42:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:42:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:42:23.409820  543705 memory.go:184] no items to output this cycle
I0321 19:42:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 19:42:31.097673  543705 disk_info.go:125] begin check local disk info of client
I0321 19:42:31.100238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:42:31.100244  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368a40 0xc000368a80]
E0321 19:42:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:42:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 19:42:33.409790  543705 memory.go:184] no items to output this cycle
I0321 19:42:39.134643  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:42:39.134650  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:42:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:42:43.410595  543705 memory.go:191] Add success.
I0321 19:42:43.409937  543705 cpu.go:282] Add success.
I0321 19:42:43.419717  543705 net.go:648] Add success.
I0321 19:42:43.422652  543705 net.go:770] primary dev: ETH0
I0321 19:42:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:42:43.422677  543705 net.go:698] Add success.
I0321 19:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:42:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:42:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:42:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:42:53.409805  543705 memory.go:184] no items to output this cycle
I0321 19:42:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 19:43:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:43:03.409795  543705 cpu.go:275] no items to output this cycle
I0321 19:43:03.409800  543705 memory.go:184] no items to output this cycle
E0321 19:43:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:43:13.409821  543705 memory.go:191] Add success.
I0321 19:43:13.409831  543705 cpu.go:282] Add success.
W0321 19:43:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:43:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:43:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:43:13.420181  543705 net.go:648] Add success.
I0321 19:43:13.422752  543705 net.go:770] primary dev: ETH0
I0321 19:43:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:43:13.422789  543705 net.go:698] Add success.
I0321 19:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:43:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:43:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 19:43:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:43:14.456599  543705 disk_worker.go:494] system disk:vda1
I0321 19:43:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:43:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:43:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:43:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:43:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:43:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:43:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:43:23.409813  543705 memory.go:184] no items to output this cycle
I0321 19:43:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 19:43:31.101676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:43:31.104268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:43:31.104275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003beb40 0xc0003beb80]
E0321 19:43:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:43:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 19:43:33.409784  543705 memory.go:184] no items to output this cycle
E0321 19:43:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:43:43.409776  543705 memory.go:191] Add success.
I0321 19:43:43.409816  543705 cpu.go:282] Add success.
I0321 19:43:43.419740  543705 net.go:648] Add success.
I0321 19:43:43.422657  543705 net.go:770] primary dev: ETH0
I0321 19:43:43.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:43:43.422685  543705 net.go:698] Add success.
I0321 19:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:43:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:43:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:43:53.410322  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:43:53.410338  543705 memory.go:184] no items to output this cycle
I0321 19:43:53.410356  543705 cpu.go:275] no items to output this cycle
E0321 19:44:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:44:03.409774  543705 memory.go:184] no items to output this cycle
I0321 19:44:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 19:44:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:44:13.409780  543705 memory.go:191] Add success.
W0321 19:44:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 19:44:13.409812  543705 cpu.go:282] Add success.
W0321 19:44:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:44:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:44:13.420268  543705 net.go:648] Add success.
I0321 19:44:13.422877  543705 net.go:770] primary dev: ETH0
I0321 19:44:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:44:13.422902  543705 net.go:698] Add success.
I0321 19:44:14.453937  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:44:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:44:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 19:44:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:44:14.458644  543705 disk_worker.go:494] system disk:vda1
I0321 19:44:14.458690  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:44:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:44:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:44:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:44:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:44:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:44:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:44:23.409775  543705 memory.go:184] no items to output this cycle
I0321 19:44:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 19:44:31.105675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:44:31.108229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:44:31.108237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003eec00 0xc0003eec40]
E0321 19:44:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:44:33.409790  543705 memory.go:184] no items to output this cycle
I0321 19:44:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 19:44:43.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:44:43.409860  543705 memory.go:191] Add success.
I0321 19:44:43.409979  543705 cpu.go:282] Add success.
I0321 19:44:43.419714  543705 net.go:648] Add success.
I0321 19:44:43.422951  543705 net.go:770] primary dev: ETH0
I0321 19:44:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:44:43.422976  543705 net.go:698] Add success.
I0321 19:44:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:44:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:44:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:44:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:44:53.409794  543705 memory.go:184] no items to output this cycle
I0321 19:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 19:45:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:45:03.409773  543705 memory.go:184] no items to output this cycle
I0321 19:45:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 19:45:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:45:13.409792  543705 memory.go:191] Add success.
I0321 19:45:13.409799  543705 cpu.go:282] Add success.
W0321 19:45:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:45:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:45:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:45:13.420151  543705 net.go:648] Add success.
I0321 19:45:13.422835  543705 net.go:770] primary dev: ETH0
I0321 19:45:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:45:13.422864  543705 net.go:698] Add success.
I0321 19:45:13.470672  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"74c1a4b1-878b-4401-8e0a-52443f0a762f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:45:13.470709  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:45:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:45:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:45:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 19:45:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:45:14.456722  543705 disk_worker.go:494] system disk:vda1
I0321 19:45:14.456754  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:45:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:45:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:45:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:45:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:45:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:45:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:45:23.409788  543705 memory.go:184] no items to output this cycle
I0321 19:45:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 19:45:31.109672  543705 disk_info.go:125] begin check local disk info of client
I0321 19:45:31.112208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:45:31.112215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003801c0 0xc000380200]
E0321 19:45:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:45:33.409782  543705 memory.go:184] no items to output this cycle
I0321 19:45:33.409783  543705 cpu.go:275] no items to output this cycle
I0321 19:45:39.137372  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:45:39.137379  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:45:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:45:43.410669  543705 memory.go:191] Add success.
I0321 19:45:43.409808  543705 cpu.go:282] Add success.
I0321 19:45:43.419729  543705 net.go:648] Add success.
I0321 19:45:43.422561  543705 net.go:770] primary dev: ETH0
I0321 19:45:43.422573  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:45:43.422585  543705 net.go:698] Add success.
I0321 19:45:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:45:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:45:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:45:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:45:53.409765  543705 memory.go:184] no items to output this cycle
I0321 19:45:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 19:46:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:46:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 19:46:03.409789  543705 memory.go:184] no items to output this cycle
E0321 19:46:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:46:13.409789  543705 memory.go:191] Add success.
I0321 19:46:13.409792  543705 cpu.go:282] Add success.
W0321 19:46:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:46:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:46:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:46:13.420051  543705 net.go:648] Add success.
I0321 19:46:13.423197  543705 net.go:770] primary dev: ETH0
I0321 19:46:13.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:46:13.423222  543705 net.go:698] Add success.
I0321 19:46:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:46:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:46:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 19:46:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:46:14.456587  543705 disk_worker.go:494] system disk:vda1
I0321 19:46:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:46:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:46:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:46:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:46:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:46:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:46:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:46:23.409812  543705 memory.go:184] no items to output this cycle
I0321 19:46:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 19:46:31.113676  543705 disk_info.go:125] begin check local disk info of client
I0321 19:46:31.116174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:46:31.116180  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486500 0xc000486540]
E0321 19:46:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:46:33.409778  543705 cpu.go:275] no items to output this cycle
I0321 19:46:33.409790  543705 memory.go:184] no items to output this cycle
E0321 19:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:46:43.409782  543705 memory.go:191] Add success.
I0321 19:46:43.409804  543705 cpu.go:282] Add success.
I0321 19:46:43.419698  543705 net.go:648] Add success.
I0321 19:46:43.422552  543705 net.go:770] primary dev: ETH0
I0321 19:46:43.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:46:43.422581  543705 net.go:698] Add success.
I0321 19:46:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:46:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:46:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:46:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:46:53.409796  543705 memory.go:184] no items to output this cycle
I0321 19:46:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 19:47:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:47:03.409781  543705 memory.go:184] no items to output this cycle
I0321 19:47:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 19:47:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:47:13.409788  543705 memory.go:191] Add success.
I0321 19:47:13.409789  543705 cpu.go:282] Add success.
W0321 19:47:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:47:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:47:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:47:13.420059  543705 net.go:648] Add success.
I0321 19:47:13.422676  543705 net.go:770] primary dev: ETH0
I0321 19:47:13.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:47:13.422705  543705 net.go:698] Add success.
I0321 19:47:13.453256  543705 event_worker.go:152] Polling the log file for events...
W0321 19:47:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:47:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 19:47:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:47:14.456949  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:47:14.456958  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:47:14.456965  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:47:14.456995  543705 disk_worker.go:494] system disk:vda1
I0321 19:47:14.457037  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:47:15.456971  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:47:15.456985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:47:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:47:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:47:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:47:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:47:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:47:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:47:23.409808  543705 memory.go:184] no items to output this cycle
I0321 19:47:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 19:47:31.117675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:47:31.120297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:47:31.120304  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035fd80 0xc00035fdc0]
E0321 19:47:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:47:33.409776  543705 cpu.go:275] no items to output this cycle
I0321 19:47:33.409782  543705 memory.go:184] no items to output this cycle
E0321 19:47:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:47:43.409788  543705 memory.go:191] Add success.
I0321 19:47:43.409807  543705 cpu.go:282] Add success.
I0321 19:47:43.420142  543705 net.go:648] Add success.
I0321 19:47:43.423102  543705 net.go:770] primary dev: ETH0
I0321 19:47:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:47:43.423137  543705 net.go:698] Add success.
I0321 19:47:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:47:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:47:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:47:53.409773  543705 memory.go:184] no items to output this cycle
I0321 19:47:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 19:48:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:48:03.409772  543705 memory.go:184] no items to output this cycle
I0321 19:48:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 19:48:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:48:13.409793  543705 memory.go:191] Add success.
I0321 19:48:13.409796  543705 cpu.go:282] Add success.
W0321 19:48:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:48:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:48:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:48:13.420126  543705 net.go:648] Add success.
I0321 19:48:13.423183  543705 net.go:770] primary dev: ETH0
I0321 19:48:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:48:13.423208  543705 net.go:698] Add success.
I0321 19:48:13.463088  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44cae160-5e71-426b-abb6-fa1f7a3853af","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:48:13.463124  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:48:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:48:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:48:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 19:48:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:48:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 19:48:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:48:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:48:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:48:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:48:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:48:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:48:23.409781  543705 memory.go:184] no items to output this cycle
I0321 19:48:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 19:48:31.121672  543705 disk_info.go:125] begin check local disk info of client
I0321 19:48:31.124205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:48:31.124211  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483300 0xc000483340]
E0321 19:48:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:48:33.409774  543705 cpu.go:275] no items to output this cycle
I0321 19:48:33.409783  543705 memory.go:184] no items to output this cycle
I0321 19:48:39.137734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:48:39.137741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:48:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:48:43.410714  543705 memory.go:191] Add success.
I0321 19:48:43.409791  543705 cpu.go:282] Add success.
I0321 19:48:43.420432  543705 net.go:648] Add success.
I0321 19:48:43.423167  543705 net.go:770] primary dev: ETH0
I0321 19:48:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:48:43.423191  543705 net.go:698] Add success.
I0321 19:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:48:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:48:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:48:53.409797  543705 memory.go:184] no items to output this cycle
I0321 19:48:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 19:49:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:49:03.409787  543705 cpu.go:275] no items to output this cycle
I0321 19:49:03.409799  543705 memory.go:184] no items to output this cycle
E0321 19:49:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:49:13.409792  543705 memory.go:191] Add success.
I0321 19:49:13.409794  543705 cpu.go:282] Add success.
W0321 19:49:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:49:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:49:13.419997  543705 net.go:648] Add success.
I0321 19:49:13.423342  543705 net.go:770] primary dev: ETH0
I0321 19:49:13.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:49:13.423367  543705 net.go:698] Add success.
I0321 19:49:14.454998  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:49:14.455240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:49:14.455263  543705 disk_worker.go:708] disk space is not compliant
W0321 19:49:14.455268  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:49:14.457230  543705 disk_worker.go:494] system disk:vda1
I0321 19:49:14.457291  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:49:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:49:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:49:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:49:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:49:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:49:23.409812  543705 memory.go:184] no items to output this cycle
I0321 19:49:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 19:49:31.125678  543705 disk_info.go:125] begin check local disk info of client
I0321 19:49:31.128196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:49:31.128203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1340 0xc0003c1380]
E0321 19:49:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:49:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 19:49:33.409791  543705 memory.go:184] no items to output this cycle
E0321 19:49:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:49:43.409785  543705 memory.go:191] Add success.
I0321 19:49:43.409786  543705 cpu.go:282] Add success.
I0321 19:49:43.419923  543705 net.go:648] Add success.
I0321 19:49:43.422675  543705 net.go:770] primary dev: ETH0
I0321 19:49:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:49:43.422701  543705 net.go:698] Add success.
I0321 19:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:49:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:49:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:49:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:49:53.409779  543705 memory.go:184] no items to output this cycle
I0321 19:49:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 19:50:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:50:03.409815  543705 memory.go:184] no items to output this cycle
I0321 19:50:03.409826  543705 cpu.go:275] no items to output this cycle
E0321 19:50:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:50:13.409775  543705 memory.go:191] Add success.
W0321 19:50:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 19:50:13.409802  543705 cpu.go:282] Add success.
W0321 19:50:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:50:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:50:13.420146  543705 net.go:648] Add success.
I0321 19:50:13.423637  543705 net.go:770] primary dev: ETH0
I0321 19:50:13.423655  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:50:13.423668  543705 net.go:698] Add success.
I0321 19:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:50:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:50:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 19:50:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:50:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 19:50:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:50:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:50:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:50:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:50:16.472428  543705 disk_local_worker.go:436] Get disk info: []
I0321 19:50:23.409791  543705 cpu.go:275] no items to output this cycle
E0321 19:50:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:50:23.409812  543705 memory.go:184] no items to output this cycle
I0321 19:50:31.129678  543705 disk_info.go:125] begin check local disk info of client
I0321 19:50:31.132236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:50:31.132241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368780 0xc0003687c0]
E0321 19:50:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:50:33.409785  543705 memory.go:184] no items to output this cycle
I0321 19:50:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 19:50:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:50:43.409792  543705 memory.go:191] Add success.
I0321 19:50:43.409807  543705 cpu.go:282] Add success.
I0321 19:50:43.420078  543705 net.go:648] Add success.
I0321 19:50:43.422898  543705 net.go:770] primary dev: ETH0
I0321 19:50:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:50:43.422924  543705 net.go:698] Add success.
I0321 19:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:50:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:50:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:50:53.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:50:53.409973  543705 memory.go:184] no items to output this cycle
I0321 19:50:53.410028  543705 cpu.go:275] no items to output this cycle
E0321 19:51:03.409990  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:51:03.410008  543705 memory.go:184] no items to output this cycle
I0321 19:51:03.410042  543705 cpu.go:275] no items to output this cycle
E0321 19:51:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:51:13.409818  543705 memory.go:191] Add success.
I0321 19:51:13.409822  543705 cpu.go:282] Add success.
W0321 19:51:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:51:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:51:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:51:13.420224  543705 net.go:648] Add success.
I0321 19:51:13.423222  543705 net.go:770] primary dev: ETH0
I0321 19:51:13.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:51:13.423246  543705 net.go:698] Add success.
I0321 19:51:13.468049  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"55c3e4ea-7070-4c5a-8fd6-cc744d125783","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:51:13.468083  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:51:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:51:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 19:51:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:51:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 19:51:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:51:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:51:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:51:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:51:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:51:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:51:23.409807  543705 memory.go:184] no items to output this cycle
I0321 19:51:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 19:51:31.133673  543705 disk_info.go:125] begin check local disk info of client
I0321 19:51:31.136249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:51:31.136255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0440 0xc0004a0480]
E0321 19:51:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:51:33.409772  543705 memory.go:184] no items to output this cycle
I0321 19:51:33.409786  543705 cpu.go:275] no items to output this cycle
I0321 19:51:39.141398  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:51:39.141404  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:51:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:51:43.410957  543705 memory.go:191] Add success.
I0321 19:51:43.409811  543705 cpu.go:282] Add success.
I0321 19:51:43.420657  543705 net.go:648] Add success.
I0321 19:51:43.424897  543705 net.go:770] primary dev: ETH0
I0321 19:51:43.424909  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:51:43.425070  543705 net.go:698] Add success.
I0321 19:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:51:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:51:53.409800  543705 memory.go:184] no items to output this cycle
I0321 19:51:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 19:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:52:03.409781  543705 memory.go:184] no items to output this cycle
I0321 19:52:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 19:52:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:52:13.409781  543705 memory.go:191] Add success.
W0321 19:52:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 19:52:13.409809  543705 cpu.go:282] Add success.
W0321 19:52:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:52:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:52:13.420149  543705 net.go:648] Add success.
I0321 19:52:13.423183  543705 net.go:770] primary dev: ETH0
I0321 19:52:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:52:13.423208  543705 net.go:698] Add success.
W0321 19:52:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:52:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 19:52:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:52:14.456890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:52:14.456899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:52:14.456905  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:52:14.456961  543705 disk_worker.go:494] system disk:vda1
I0321 19:52:14.456990  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:52:15.456917  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:52:15.456930  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:52:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:52:16.457996  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:52:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:52:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:52:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:52:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:52:23.409784  543705 memory.go:184] no items to output this cycle
I0321 19:52:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 19:52:31.137675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:52:31.140329  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:52:31.140336  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003153c0 0xc000315400]
E0321 19:52:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:52:33.409772  543705 memory.go:184] no items to output this cycle
I0321 19:52:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 19:52:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:52:43.409789  543705 memory.go:191] Add success.
I0321 19:52:43.409790  543705 cpu.go:282] Add success.
I0321 19:52:43.419968  543705 net.go:648] Add success.
I0321 19:52:43.423055  543705 net.go:770] primary dev: ETH0
I0321 19:52:43.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:52:43.423079  543705 net.go:698] Add success.
I0321 19:52:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:52:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:52:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:52:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:52:53.409781  543705 memory.go:184] no items to output this cycle
I0321 19:52:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 19:53:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:53:03.409780  543705 memory.go:184] no items to output this cycle
I0321 19:53:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 19:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:53:13.409784  543705 memory.go:191] Add success.
I0321 19:53:13.409801  543705 cpu.go:282] Add success.
W0321 19:53:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:53:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:53:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:53:13.420079  543705 net.go:648] Add success.
I0321 19:53:13.422879  543705 net.go:770] primary dev: ETH0
I0321 19:53:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:53:13.422908  543705 net.go:698] Add success.
I0321 19:53:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:53:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:53:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0321 19:53:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:53:14.456550  543705 disk_worker.go:494] system disk:vda1
I0321 19:53:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:53:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:53:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:53:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:53:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:53:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:53:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:53:23.409791  543705 memory.go:184] no items to output this cycle
I0321 19:53:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 19:53:31.141675  543705 disk_info.go:125] begin check local disk info of client
I0321 19:53:31.144271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:53:31.144277  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e6fc0 0xc0004e7000]
E0321 19:53:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:53:33.409773  543705 memory.go:184] no items to output this cycle
I0321 19:53:33.409783  543705 cpu.go:275] no items to output this cycle
E0321 19:53:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:53:43.409783  543705 memory.go:191] Add success.
I0321 19:53:43.409802  543705 cpu.go:282] Add success.
I0321 19:53:43.419969  543705 net.go:648] Add success.
I0321 19:53:43.423124  543705 net.go:770] primary dev: ETH0
I0321 19:53:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:53:43.423149  543705 net.go:698] Add success.
I0321 19:53:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:53:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:53:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:53:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:53:53.409767  543705 memory.go:184] no items to output this cycle
I0321 19:53:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:54:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:54:03.409808  543705 memory.go:184] no items to output this cycle
I0321 19:54:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 19:54:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:54:13.409798  543705 memory.go:191] Add success.
I0321 19:54:13.409816  543705 cpu.go:282] Add success.
W0321 19:54:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:54:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:54:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:54:13.420062  543705 net.go:648] Add success.
I0321 19:54:13.422635  543705 net.go:770] primary dev: ETH0
I0321 19:54:13.422649  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:54:13.422675  543705 net.go:698] Add success.
I0321 19:54:13.463279  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b70463dc-1d47-4775-b124-77d6a3db880a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:54:13.463315  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 19:54:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:54:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:54:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 19:54:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:54:14.456500  543705 disk_worker.go:494] system disk:vda1
I0321 19:54:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:54:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:54:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:54:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:54:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:54:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:54:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:54:23.409805  543705 memory.go:184] no items to output this cycle
I0321 19:54:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 19:54:31.145677  543705 disk_info.go:125] begin check local disk info of client
I0321 19:54:31.148285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:54:31.148292  543705 disk_info.go:196] parse disk info done, disk is : [0xc000313e00 0xc000313e40]
E0321 19:54:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:54:33.409773  543705 memory.go:184] no items to output this cycle
I0321 19:54:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 19:54:39.141743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:54:39.141749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:54:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:54:43.410867  543705 memory.go:191] Add success.
I0321 19:54:43.409816  543705 cpu.go:282] Add success.
I0321 19:54:43.420687  543705 net.go:648] Add success.
I0321 19:54:43.423867  543705 net.go:770] primary dev: ETH0
I0321 19:54:43.423882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:54:43.423896  543705 net.go:698] Add success.
I0321 19:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:54:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:54:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:54:53.409780  543705 memory.go:184] no items to output this cycle
I0321 19:54:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 19:55:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:55:03.409813  543705 memory.go:184] no items to output this cycle
I0321 19:55:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 19:55:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:55:13.409794  543705 cpu.go:282] Add success.
I0321 19:55:13.409806  543705 memory.go:191] Add success.
W0321 19:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:55:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:55:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:55:13.420077  543705 net.go:648] Add success.
I0321 19:55:13.422911  543705 net.go:770] primary dev: ETH0
I0321 19:55:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:55:13.422940  543705 net.go:698] Add success.
I0321 19:55:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:55:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:55:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 19:55:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:55:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 19:55:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:55:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:55:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:55:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:55:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:55:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:55:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:55:23.409793  543705 memory.go:184] no items to output this cycle
I0321 19:55:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 19:55:31.149674  543705 disk_info.go:125] begin check local disk info of client
I0321 19:55:31.152301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:55:31.152307  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331180 0xc0003311c0]
E0321 19:55:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:55:33.409767  543705 memory.go:184] no items to output this cycle
I0321 19:55:33.409784  543705 cpu.go:275] no items to output this cycle
E0321 19:55:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:55:43.409790  543705 memory.go:191] Add success.
I0321 19:55:43.409802  543705 cpu.go:282] Add success.
I0321 19:55:43.419748  543705 net.go:648] Add success.
I0321 19:55:43.422755  543705 net.go:770] primary dev: ETH0
I0321 19:55:43.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:55:43.422783  543705 net.go:698] Add success.
I0321 19:55:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:55:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:55:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:55:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:55:53.409759  543705 memory.go:184] no items to output this cycle
I0321 19:55:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 19:56:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:56:03.409802  543705 memory.go:184] no items to output this cycle
I0321 19:56:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 19:56:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:56:13.409814  543705 memory.go:191] Add success.
I0321 19:56:13.409815  543705 cpu.go:282] Add success.
W0321 19:56:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:56:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:56:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:56:13.420194  543705 net.go:648] Add success.
I0321 19:56:13.422972  543705 net.go:770] primary dev: ETH0
I0321 19:56:13.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:56:13.422998  543705 net.go:698] Add success.
I0321 19:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:56:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:56:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 19:56:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:56:14.456622  543705 disk_worker.go:494] system disk:vda1
I0321 19:56:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:56:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:56:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:56:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:56:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:56:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:56:23.409773  543705 memory.go:184] no items to output this cycle
I0321 19:56:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 19:56:31.153673  543705 disk_info.go:125] begin check local disk info of client
I0321 19:56:31.156251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:56:31.156258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487a00 0xc000487a40]
E0321 19:56:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:56:33.409774  543705 memory.go:184] no items to output this cycle
I0321 19:56:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 19:56:43.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:56:43.409904  543705 memory.go:191] Add success.
I0321 19:56:43.409955  543705 cpu.go:282] Add success.
I0321 19:56:43.419723  543705 net.go:648] Add success.
I0321 19:56:43.422911  543705 net.go:770] primary dev: ETH0
I0321 19:56:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:56:43.422936  543705 net.go:698] Add success.
I0321 19:56:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:56:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:56:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:56:53.409783  543705 memory.go:184] no items to output this cycle
I0321 19:56:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 19:57:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:57:03.409772  543705 memory.go:184] no items to output this cycle
I0321 19:57:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 19:57:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:57:13.409794  543705 cpu.go:282] Add success.
I0321 19:57:13.409799  543705 memory.go:191] Add success.
W0321 19:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:57:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:57:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:57:13.420054  543705 net.go:648] Add success.
I0321 19:57:13.422785  543705 net.go:770] primary dev: ETH0
I0321 19:57:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:57:13.422812  543705 net.go:698] Add success.
I0321 19:57:13.429772  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 19:57:13.453007  543705 event_worker.go:152] Polling the log file for events...
I0321 19:57:13.468385  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4ecb859-c5f8-4cfb-84ff-79dd49b6914e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 19:57:13.468419  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 19:57:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:57:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 19:57:14.455221  543705 disk_worker.go:728] disk inode is not compliant
E0321 19:57:14.456807  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 19:57:14.456828  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 19:57:14.456834  543705 custom_config.go:64] query custom config with name: gpu
I0321 19:57:14.456959  543705 disk_worker.go:494] system disk:vda1
I0321 19:57:14.456990  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 19:57:15.456870  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 19:57:15.456885  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:57:16.458092  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 19:57:16.458162  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 19:57:16.458166  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:57:16.458192  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:57:16.472579  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:57:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:57:23.409807  543705 memory.go:184] no items to output this cycle
I0321 19:57:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 19:57:31.157680  543705 disk_info.go:125] begin check local disk info of client
I0321 19:57:31.160218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:57:31.160224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de040 0xc0003de080]
E0321 19:57:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:57:33.409792  543705 memory.go:184] no items to output this cycle
I0321 19:57:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 19:57:39.141896  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 19:57:39.141903  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 19:57:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:57:43.410701  543705 memory.go:191] Add success.
I0321 19:57:43.409789  543705 cpu.go:282] Add success.
I0321 19:57:43.420764  543705 net.go:648] Add success.
I0321 19:57:43.423363  543705 net.go:770] primary dev: ETH0
I0321 19:57:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:57:43.423386  543705 net.go:698] Add success.
I0321 19:57:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:57:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:57:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:57:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:57:53.409773  543705 memory.go:184] no items to output this cycle
I0321 19:57:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 19:58:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:58:03.409776  543705 memory.go:184] no items to output this cycle
I0321 19:58:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 19:58:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:58:13.409786  543705 memory.go:191] Add success.
I0321 19:58:13.409811  543705 cpu.go:282] Add success.
W0321 19:58:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:58:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:58:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:58:13.420063  543705 net.go:648] Add success.
I0321 19:58:13.422879  543705 net.go:770] primary dev: ETH0
I0321 19:58:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:58:13.422904  543705 net.go:698] Add success.
I0321 19:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:58:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:58:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 19:58:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:58:14.456512  543705 disk_worker.go:494] system disk:vda1
I0321 19:58:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:58:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:58:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:58:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:58:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:58:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:58:23.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:58:23.409836  543705 memory.go:184] no items to output this cycle
I0321 19:58:23.409845  543705 cpu.go:275] no items to output this cycle
I0321 19:58:31.161682  543705 disk_info.go:125] begin check local disk info of client
I0321 19:58:31.164192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:58:31.164199  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298fc0 0xc000299000]
E0321 19:58:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:58:33.409781  543705 memory.go:184] no items to output this cycle
I0321 19:58:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 19:58:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:58:43.409817  543705 memory.go:191] Add success.
I0321 19:58:43.409826  543705 cpu.go:282] Add success.
I0321 19:58:43.420102  543705 net.go:648] Add success.
I0321 19:58:43.423170  543705 net.go:770] primary dev: ETH0
I0321 19:58:43.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:58:43.423200  543705 net.go:698] Add success.
I0321 19:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:58:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:58:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 19:58:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:58:53.409822  543705 memory.go:184] no items to output this cycle
E0321 19:59:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:59:03.409776  543705 memory.go:184] no items to output this cycle
I0321 19:59:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 19:59:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:59:13.409792  543705 memory.go:191] Add success.
I0321 19:59:13.409811  543705 cpu.go:282] Add success.
W0321 19:59:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 19:59:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 19:59:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 19:59:13.420062  543705 net.go:648] Add success.
I0321 19:59:13.422692  543705 net.go:770] primary dev: ETH0
I0321 19:59:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:59:13.422720  543705 net.go:698] Add success.
I0321 19:59:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 19:59:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 19:59:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 19:59:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 19:59:14.456572  543705 disk_worker.go:494] system disk:vda1
I0321 19:59:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 19:59:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 19:59:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:59:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:59:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0321 19:59:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0321 19:59:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:59:23.409796  543705 memory.go:184] no items to output this cycle
I0321 19:59:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 19:59:31.165679  543705 disk_info.go:125] begin check local disk info of client
I0321 19:59:31.168257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 19:59:31.168265  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492d00 0xc000492d40]
E0321 19:59:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:59:33.409799  543705 memory.go:184] no items to output this cycle
I0321 19:59:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 19:59:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:59:43.409816  543705 memory.go:191] Add success.
I0321 19:59:43.409821  543705 cpu.go:282] Add success.
I0321 19:59:43.419994  543705 net.go:648] Add success.
I0321 19:59:43.423105  543705 net.go:770] primary dev: ETH0
I0321 19:59:43.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0321 19:59:43.423133  543705 net.go:698] Add success.
I0321 19:59:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 19:59:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 19:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 19:59:53.410393  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 19:59:53.410412  543705 memory.go:184] no items to output this cycle
I0321 19:59:53.410424  543705 cpu.go:275] no items to output this cycle
E0321 20:00:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:00:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 20:00:03.409791  543705 memory.go:184] no items to output this cycle
E0321 20:00:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:00:13.409789  543705 memory.go:191] Add success.
I0321 20:00:13.409798  543705 cpu.go:282] Add success.
W0321 20:00:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:00:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:00:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:00:13.420045  543705 net.go:648] Add success.
I0321 20:00:13.422941  543705 net.go:770] primary dev: ETH0
I0321 20:00:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:00:13.422967  543705 net.go:698] Add success.
I0321 20:00:13.467174  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"febe2c8b-bf59-4f29-b74f-a641b2676646","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:00:13.467208  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:00:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:00:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 20:00:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:00:14.456509  543705 disk_worker.go:494] system disk:vda1
I0321 20:00:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:00:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:00:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:00:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:00:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:00:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:00:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:00:23.409778  543705 memory.go:184] no items to output this cycle
I0321 20:00:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 20:00:31.169681  543705 disk_info.go:125] begin check local disk info of client
I0321 20:00:31.172255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:00:31.172261  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003542c0 0xc000354300]
E0321 20:00:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:00:33.409798  543705 memory.go:184] no items to output this cycle
I0321 20:00:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 20:00:39.142046  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:00:39.142052  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:00:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:00:43.410667  543705 memory.go:191] Add success.
I0321 20:00:43.409804  543705 cpu.go:282] Add success.
I0321 20:00:43.420650  543705 net.go:648] Add success.
I0321 20:00:43.423634  543705 net.go:770] primary dev: ETH0
I0321 20:00:43.423648  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:00:43.423660  543705 net.go:698] Add success.
I0321 20:00:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:00:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:00:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:00:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:00:53.409786  543705 cpu.go:275] no items to output this cycle
I0321 20:00:53.409798  543705 memory.go:184] no items to output this cycle
E0321 20:01:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:01:03.409781  543705 cpu.go:275] no items to output this cycle
I0321 20:01:03.409787  543705 memory.go:184] no items to output this cycle
E0321 20:01:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:01:13.409811  543705 memory.go:191] Add success.
I0321 20:01:13.409813  543705 cpu.go:282] Add success.
W0321 20:01:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:01:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:01:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:01:13.420173  543705 net.go:648] Add success.
I0321 20:01:13.422704  543705 net.go:770] primary dev: ETH0
I0321 20:01:13.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:01:13.422729  543705 net.go:698] Add success.
I0321 20:01:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:01:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:01:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 20:01:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:01:14.456615  543705 disk_worker.go:494] system disk:vda1
I0321 20:01:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:01:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:01:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:01:16.458080  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:01:16.458110  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:01:16.472546  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:01:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:01:23.409810  543705 memory.go:184] no items to output this cycle
I0321 20:01:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 20:01:31.173675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:01:31.176293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:01:31.176299  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d1200 0xc0003d1240]
E0321 20:01:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:01:33.409762  543705 memory.go:184] no items to output this cycle
I0321 20:01:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:01:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:01:43.409813  543705 memory.go:191] Add success.
I0321 20:01:43.409817  543705 cpu.go:282] Add success.
I0321 20:01:43.419839  543705 net.go:648] Add success.
I0321 20:01:43.422890  543705 net.go:770] primary dev: ETH0
I0321 20:01:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:01:43.422916  543705 net.go:698] Add success.
I0321 20:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:01:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:01:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:01:53.409814  543705 memory.go:184] no items to output this cycle
I0321 20:01:53.409827  543705 cpu.go:275] no items to output this cycle
E0321 20:02:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:02:03.409803  543705 memory.go:184] no items to output this cycle
I0321 20:02:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 20:02:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:02:13.409790  543705 memory.go:191] Add success.
W0321 20:02:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 20:02:13.409820  543705 cpu.go:282] Add success.
W0321 20:02:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:02:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:02:13.420447  543705 net.go:648] Add success.
I0321 20:02:13.423303  543705 net.go:770] primary dev: ETH0
I0321 20:02:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:02:13.423329  543705 net.go:698] Add success.
W0321 20:02:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:02:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 20:02:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:02:14.456925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:02:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:02:14.456941  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:02:14.457026  543705 disk_worker.go:494] system disk:vda1
I0321 20:02:14.457058  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:02:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:02:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:02:16.458088  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:02:16.458095  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:02:16.458146  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:02:16.458164  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:02:16.472525  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:02:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:02:23.409776  543705 memory.go:184] no items to output this cycle
I0321 20:02:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 20:02:31.177674  543705 disk_info.go:125] begin check local disk info of client
I0321 20:02:31.180214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:02:31.180220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000355380 0xc0003553c0]
E0321 20:02:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:02:33.409786  543705 memory.go:184] no items to output this cycle
I0321 20:02:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 20:02:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:02:43.409780  543705 memory.go:191] Add success.
I0321 20:02:43.409799  543705 cpu.go:282] Add success.
I0321 20:02:43.419901  543705 net.go:648] Add success.
I0321 20:02:43.422381  543705 net.go:770] primary dev: ETH0
I0321 20:02:43.422469  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:02:43.422484  543705 net.go:698] Add success.
I0321 20:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:02:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:02:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:02:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:02:53.409783  543705 memory.go:184] no items to output this cycle
I0321 20:02:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:03:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:03:03.409775  543705 memory.go:184] no items to output this cycle
I0321 20:03:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 20:03:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:03:13.409807  543705 memory.go:191] Add success.
I0321 20:03:13.409812  543705 cpu.go:282] Add success.
W0321 20:03:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:03:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:03:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:03:13.420065  543705 net.go:648] Add success.
I0321 20:03:13.422895  543705 net.go:770] primary dev: ETH0
I0321 20:03:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:03:13.422921  543705 net.go:698] Add success.
I0321 20:03:13.465061  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6c5f6657-9a61-433a-8629-d616d04ee9fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:03:13.465101  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:03:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:03:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:03:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 20:03:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:03:14.456606  543705 disk_worker.go:494] system disk:vda1
I0321 20:03:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:03:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:03:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:03:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:03:16.472487  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:03:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:03:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 20:03:23.409797  543705 memory.go:184] no items to output this cycle
I0321 20:03:31.181677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:03:31.184229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:03:31.184237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6000 0xc0003b6040]
E0321 20:03:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:03:33.409798  543705 memory.go:184] no items to output this cycle
I0321 20:03:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 20:03:39.142204  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:03:39.142211  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:03:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:03:43.410698  543705 memory.go:191] Add success.
I0321 20:03:43.409806  543705 cpu.go:282] Add success.
I0321 20:03:43.420465  543705 net.go:648] Add success.
I0321 20:03:43.423222  543705 net.go:770] primary dev: ETH0
I0321 20:03:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:03:43.423246  543705 net.go:698] Add success.
I0321 20:03:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:03:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:03:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:03:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:03:53.409809  543705 memory.go:184] no items to output this cycle
I0321 20:03:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 20:04:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:04:03.409782  543705 memory.go:184] no items to output this cycle
I0321 20:04:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 20:04:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:04:13.409797  543705 memory.go:191] Add success.
I0321 20:04:13.409805  543705 cpu.go:282] Add success.
W0321 20:04:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:04:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:04:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:04:13.420096  543705 net.go:648] Add success.
I0321 20:04:13.422802  543705 net.go:770] primary dev: ETH0
I0321 20:04:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:04:13.422834  543705 net.go:698] Add success.
I0321 20:04:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:04:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:04:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 20:04:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:04:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 20:04:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:04:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:04:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:04:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:04:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:04:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:04:23.409810  543705 memory.go:184] no items to output this cycle
I0321 20:04:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 20:04:31.185677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:04:31.188225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:04:31.188232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af80 0xc00007afc0]
E0321 20:04:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:04:33.409793  543705 memory.go:184] no items to output this cycle
I0321 20:04:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 20:04:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:04:43.409816  543705 memory.go:191] Add success.
I0321 20:04:43.409826  543705 cpu.go:282] Add success.
I0321 20:04:43.419885  543705 net.go:648] Add success.
I0321 20:04:43.423238  543705 net.go:770] primary dev: ETH0
I0321 20:04:43.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:04:43.423264  543705 net.go:698] Add success.
I0321 20:04:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:04:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:04:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:04:53.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:04:53.409904  543705 memory.go:184] no items to output this cycle
I0321 20:04:53.410010  543705 cpu.go:275] no items to output this cycle
E0321 20:05:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:05:03.409803  543705 memory.go:184] no items to output this cycle
I0321 20:05:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 20:05:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:05:13.409780  543705 memory.go:191] Add success.
I0321 20:05:13.409799  543705 cpu.go:282] Add success.
W0321 20:05:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:05:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:05:13.420039  543705 net.go:648] Add success.
I0321 20:05:13.422875  543705 net.go:770] primary dev: ETH0
I0321 20:05:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:05:13.422899  543705 net.go:698] Add success.
I0321 20:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:05:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:05:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 20:05:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:05:14.456562  543705 disk_worker.go:494] system disk:vda1
I0321 20:05:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:05:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:05:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:05:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:05:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:05:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:05:23.409773  543705 memory.go:184] no items to output this cycle
I0321 20:05:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 20:05:31.189677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:05:31.192228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:05:31.192234  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381440 0xc000381480]
E0321 20:05:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:05:33.409790  543705 memory.go:184] no items to output this cycle
I0321 20:05:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 20:05:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:05:43.409783  543705 memory.go:191] Add success.
I0321 20:05:43.409802  543705 cpu.go:282] Add success.
I0321 20:05:43.419945  543705 net.go:648] Add success.
I0321 20:05:43.422747  543705 net.go:770] primary dev: ETH0
I0321 20:05:43.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:05:43.422773  543705 net.go:698] Add success.
I0321 20:05:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:05:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:05:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:05:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:05:53.409775  543705 memory.go:184] no items to output this cycle
I0321 20:05:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 20:06:03.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:06:03.409878  543705 memory.go:184] no items to output this cycle
I0321 20:06:03.409948  543705 cpu.go:275] no items to output this cycle
E0321 20:06:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:06:13.409819  543705 memory.go:191] Add success.
I0321 20:06:13.409826  543705 cpu.go:282] Add success.
W0321 20:06:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:06:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:06:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:06:13.420086  543705 net.go:648] Add success.
I0321 20:06:13.422878  543705 net.go:770] primary dev: ETH0
I0321 20:06:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:06:13.422903  543705 net.go:698] Add success.
I0321 20:06:13.468387  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ac834ad-9612-4990-ae4d-1fd1daee6d54","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:06:13.468420  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:06:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:06:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:06:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 20:06:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:06:14.456525  543705 disk_worker.go:494] system disk:vda1
I0321 20:06:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:06:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:06:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:06:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:06:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:06:23.409784  543705 memory.go:184] no items to output this cycle
I0321 20:06:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 20:06:31.193675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:06:31.196209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:06:31.196216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa900 0xc0001aa940]
E0321 20:06:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:06:33.409793  543705 memory.go:184] no items to output this cycle
I0321 20:06:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 20:06:39.142350  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:06:39.142356  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:06:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:06:43.410840  543705 memory.go:191] Add success.
I0321 20:06:43.409822  543705 cpu.go:282] Add success.
I0321 20:06:43.420541  543705 net.go:648] Add success.
I0321 20:06:43.423000  543705 net.go:770] primary dev: ETH0
I0321 20:06:43.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:06:43.423026  543705 net.go:698] Add success.
I0321 20:06:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:06:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:06:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:06:53.409781  543705 memory.go:184] no items to output this cycle
I0321 20:06:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 20:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:07:03.409777  543705 memory.go:184] no items to output this cycle
I0321 20:07:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 20:07:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:07:13.409817  543705 memory.go:191] Add success.
I0321 20:07:13.409820  543705 cpu.go:282] Add success.
W0321 20:07:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:07:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:07:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:07:13.420074  543705 net.go:648] Add success.
I0321 20:07:13.422765  543705 net.go:770] primary dev: ETH0
I0321 20:07:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:07:13.422806  543705 net.go:698] Add success.
I0321 20:07:13.453363  543705 event_worker.go:152] Polling the log file for events...
W0321 20:07:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:07:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 20:07:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:07:14.456775  543705 disk_worker.go:494] system disk:vda1
I0321 20:07:14.456815  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:07:14.457056  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:07:14.457064  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:07:14.457068  543705 custom_config.go:64] query custom config with name: gpu
E0321 20:07:15.456882  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:07:15.456891  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:07:16.458087  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:07:16.458156  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0321 20:07:16.458157  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:07:16.458174  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:07:16.472587  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:07:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:07:23.409783  543705 memory.go:184] no items to output this cycle
I0321 20:07:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 20:07:31.197674  543705 disk_info.go:125] begin check local disk info of client
I0321 20:07:31.200248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:07:31.200253  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329ac0 0xc000329b00]
E0321 20:07:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:07:33.409772  543705 memory.go:184] no items to output this cycle
I0321 20:07:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 20:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:07:43.409786  543705 memory.go:191] Add success.
I0321 20:07:43.409814  543705 cpu.go:282] Add success.
I0321 20:07:43.419875  543705 net.go:648] Add success.
I0321 20:07:43.422703  543705 net.go:770] primary dev: ETH0
I0321 20:07:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:07:43.422728  543705 net.go:698] Add success.
I0321 20:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:07:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:07:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:07:53.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:07:53.409885  543705 memory.go:184] no items to output this cycle
I0321 20:07:53.410039  543705 cpu.go:275] no items to output this cycle
E0321 20:08:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:08:03.409780  543705 memory.go:184] no items to output this cycle
I0321 20:08:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:08:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:08:13.409784  543705 memory.go:191] Add success.
I0321 20:08:13.409785  543705 cpu.go:282] Add success.
W0321 20:08:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:08:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:08:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:08:13.420138  543705 net.go:648] Add success.
I0321 20:08:13.423675  543705 net.go:770] primary dev: ETH0
I0321 20:08:13.423688  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:08:13.423700  543705 net.go:698] Add success.
I0321 20:08:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:08:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:08:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 20:08:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:08:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 20:08:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:08:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:08:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:08:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:08:23.409804  543705 memory.go:184] no items to output this cycle
I0321 20:08:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 20:08:31.201674  543705 disk_info.go:125] begin check local disk info of client
I0321 20:08:31.204228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:08:31.204235  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029ab80 0xc00029abc0]
E0321 20:08:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:08:33.409791  543705 memory.go:184] no items to output this cycle
I0321 20:08:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 20:08:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:08:43.409786  543705 memory.go:191] Add success.
I0321 20:08:43.409812  543705 cpu.go:282] Add success.
I0321 20:08:43.419912  543705 net.go:648] Add success.
I0321 20:08:43.422616  543705 net.go:770] primary dev: ETH0
I0321 20:08:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:08:43.422643  543705 net.go:698] Add success.
I0321 20:08:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:08:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:08:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:08:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:08:53.409793  543705 memory.go:184] no items to output this cycle
I0321 20:08:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 20:09:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:09:03.409779  543705 memory.go:184] no items to output this cycle
I0321 20:09:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:09:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:09:13.409775  543705 memory.go:191] Add success.
W0321 20:09:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 20:09:13.409808  543705 cpu.go:282] Add success.
W0321 20:09:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:09:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:09:13.420065  543705 net.go:648] Add success.
I0321 20:09:13.422760  543705 net.go:770] primary dev: ETH0
I0321 20:09:13.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:09:13.422789  543705 net.go:698] Add success.
I0321 20:09:13.468904  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b68d2066-5b4b-4cfb-8de8-2a48e23e46f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:09:13.468941  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:09:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:09:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:09:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 20:09:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:09:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 20:09:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:09:15.455629  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:09:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:09:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:09:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:09:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:09:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:09:23.409812  543705 memory.go:184] no items to output this cycle
I0321 20:09:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 20:09:31.205678  543705 disk_info.go:125] begin check local disk info of client
I0321 20:09:31.208212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:09:31.208218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001da180 0xc0001da1c0]
E0321 20:09:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:09:33.409756  543705 memory.go:184] no items to output this cycle
I0321 20:09:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 20:09:39.145414  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:09:39.145421  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:09:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:09:43.410803  543705 memory.go:191] Add success.
I0321 20:09:43.409824  543705 cpu.go:282] Add success.
I0321 20:09:43.420598  543705 net.go:648] Add success.
I0321 20:09:43.423161  543705 net.go:770] primary dev: ETH0
I0321 20:09:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:09:43.423186  543705 net.go:698] Add success.
I0321 20:09:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:09:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:09:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:09:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:09:53.409809  543705 memory.go:184] no items to output this cycle
I0321 20:09:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 20:10:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:10:03.409788  543705 memory.go:184] no items to output this cycle
I0321 20:10:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 20:10:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:10:13.409815  543705 memory.go:191] Add success.
I0321 20:10:13.409825  543705 cpu.go:282] Add success.
W0321 20:10:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:10:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:10:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:10:13.420147  543705 net.go:648] Add success.
I0321 20:10:13.422536  543705 net.go:770] primary dev: ETH0
I0321 20:10:13.422549  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:10:13.422562  543705 net.go:698] Add success.
I0321 20:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:10:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:10:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 20:10:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:10:14.456612  543705 disk_worker.go:494] system disk:vda1
I0321 20:10:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:10:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:10:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:10:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:10:16.472497  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:10:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:10:23.409789  543705 memory.go:184] no items to output this cycle
I0321 20:10:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 20:10:31.209675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:10:31.212168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:10:31.212175  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046b480 0xc00046b4c0]
E0321 20:10:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:10:33.409800  543705 memory.go:184] no items to output this cycle
I0321 20:10:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 20:10:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:10:43.409815  543705 memory.go:191] Add success.
I0321 20:10:43.409821  543705 cpu.go:282] Add success.
I0321 20:10:43.419961  543705 net.go:648] Add success.
I0321 20:10:43.422631  543705 net.go:770] primary dev: ETH0
I0321 20:10:43.422644  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:10:43.422656  543705 net.go:698] Add success.
I0321 20:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:10:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:10:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:10:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:10:53.409801  543705 memory.go:184] no items to output this cycle
I0321 20:10:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 20:11:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:11:03.409789  543705 memory.go:184] no items to output this cycle
I0321 20:11:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 20:11:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:11:13.409815  543705 memory.go:191] Add success.
I0321 20:11:13.409824  543705 cpu.go:282] Add success.
W0321 20:11:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:11:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:11:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:11:13.420142  543705 net.go:648] Add success.
I0321 20:11:13.423205  543705 net.go:770] primary dev: ETH0
I0321 20:11:13.423217  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:11:13.423228  543705 net.go:698] Add success.
I0321 20:11:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:11:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:11:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 20:11:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:11:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 20:11:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:11:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:11:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:11:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:11:16.472502  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:11:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:11:23.409818  543705 memory.go:184] no items to output this cycle
I0321 20:11:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 20:11:31.213685  543705 disk_info.go:125] begin check local disk info of client
I0321 20:11:31.216298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:11:31.216304  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fe200 0xc0004fe240]
E0321 20:11:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:11:33.409779  543705 memory.go:184] no items to output this cycle
I0321 20:11:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 20:11:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:11:43.409815  543705 memory.go:191] Add success.
I0321 20:11:43.409818  543705 cpu.go:282] Add success.
I0321 20:11:43.419988  543705 net.go:648] Add success.
I0321 20:11:43.422817  543705 net.go:770] primary dev: ETH0
I0321 20:11:43.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:11:43.422843  543705 net.go:698] Add success.
I0321 20:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:11:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:11:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:11:53.409767  543705 memory.go:184] no items to output this cycle
I0321 20:11:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 20:12:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:12:03.409777  543705 memory.go:184] no items to output this cycle
I0321 20:12:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 20:12:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:12:13.409805  543705 memory.go:191] Add success.
I0321 20:12:13.409811  543705 cpu.go:282] Add success.
W0321 20:12:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:12:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:12:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:12:13.420051  543705 net.go:648] Add success.
I0321 20:12:13.423033  543705 net.go:770] primary dev: ETH0
I0321 20:12:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:12:13.423058  543705 net.go:698] Add success.
I0321 20:12:13.470203  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"425a9ac2-fe78-463c-aa56-0bd396abe773","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:12:13.470237  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 20:12:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:12:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 20:12:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:12:14.455872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:12:14.455881  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:12:14.455886  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:12:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 20:12:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:12:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:12:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:12:16.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:12:16.458008  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:12:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:12:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:12:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:12:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:12:23.409810  543705 memory.go:184] no items to output this cycle
I0321 20:12:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 20:12:31.217675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:12:31.220235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:12:31.220242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fef80 0xc0004fefc0]
E0321 20:12:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:12:33.409790  543705 memory.go:184] no items to output this cycle
I0321 20:12:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 20:12:39.145741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:12:39.145749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:12:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:12:43.410727  543705 memory.go:191] Add success.
I0321 20:12:43.409810  543705 cpu.go:282] Add success.
I0321 20:12:43.420434  543705 net.go:648] Add success.
I0321 20:12:43.423043  543705 net.go:770] primary dev: ETH0
I0321 20:12:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:12:43.423072  543705 net.go:698] Add success.
I0321 20:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:12:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:12:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:12:53.410375  543705 memory.go:184] no items to output this cycle
I0321 20:12:53.410378  543705 cpu.go:275] no items to output this cycle
E0321 20:13:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:13:03.409779  543705 memory.go:184] no items to output this cycle
I0321 20:13:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 20:13:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:13:13.409805  543705 memory.go:191] Add success.
I0321 20:13:13.409818  543705 cpu.go:282] Add success.
W0321 20:13:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:13:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:13:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:13:13.420192  543705 net.go:648] Add success.
I0321 20:13:13.422889  543705 net.go:770] primary dev: ETH0
I0321 20:13:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:13:13.422923  543705 net.go:698] Add success.
I0321 20:13:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:13:14.455280  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:13:14.455291  543705 disk_worker.go:708] disk space is not compliant
W0321 20:13:14.455294  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:13:14.456875  543705 disk_worker.go:494] system disk:vda1
I0321 20:13:14.456927  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:13:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:13:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:13:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:13:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:13:16.472521  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:13:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:13:23.409777  543705 memory.go:184] no items to output this cycle
I0321 20:13:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 20:13:31.221686  543705 disk_info.go:125] begin check local disk info of client
I0321 20:13:31.224233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:13:31.224240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f40 0xc0000c4f80]
E0321 20:13:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:13:33.409789  543705 memory.go:184] no items to output this cycle
I0321 20:13:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:13:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:13:43.409794  543705 cpu.go:282] Add success.
I0321 20:13:43.409802  543705 memory.go:191] Add success.
I0321 20:13:43.420074  543705 net.go:648] Add success.
I0321 20:13:43.422781  543705 net.go:770] primary dev: ETH0
I0321 20:13:43.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:13:43.422811  543705 net.go:698] Add success.
I0321 20:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:13:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:13:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:13:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:13:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 20:13:53.409787  543705 memory.go:184] no items to output this cycle
E0321 20:14:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:14:03.409792  543705 memory.go:184] no items to output this cycle
I0321 20:14:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 20:14:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:14:13.409787  543705 cpu.go:282] Add success.
I0321 20:14:13.409790  543705 memory.go:191] Add success.
W0321 20:14:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:14:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:14:13.419877  543705 net.go:770] primary dev: ETH0
I0321 20:14:13.419890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:14:13.419902  543705 net.go:698] Add success.
I0321 20:14:13.420409  543705 net.go:648] Add success.
I0321 20:14:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:14:14.455087  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:14:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0321 20:14:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:14:14.456471  543705 disk_worker.go:494] system disk:vda1
I0321 20:14:14.456512  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:14:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:14:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:14:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:14:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:14:16.472549  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:14:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:14:23.409778  543705 memory.go:184] no items to output this cycle
I0321 20:14:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 20:14:31.225854  543705 disk_info.go:125] begin check local disk info of client
I0321 20:14:31.228423  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:14:31.228430  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8fc0 0xc0004d9000]
E0321 20:14:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:14:33.409791  543705 memory.go:184] no items to output this cycle
I0321 20:14:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 20:14:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:14:43.409794  543705 memory.go:191] Add success.
I0321 20:14:43.409796  543705 cpu.go:282] Add success.
I0321 20:14:43.420031  543705 net.go:648] Add success.
I0321 20:14:43.423041  543705 net.go:770] primary dev: ETH0
I0321 20:14:43.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:14:43.423079  543705 net.go:698] Add success.
I0321 20:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:14:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:14:53.409780  543705 memory.go:184] no items to output this cycle
I0321 20:14:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 20:15:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:15:03.409764  543705 memory.go:184] no items to output this cycle
I0321 20:15:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 20:15:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:15:13.409808  543705 memory.go:191] Add success.
I0321 20:15:13.409813  543705 cpu.go:282] Add success.
W0321 20:15:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:15:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:15:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:15:13.420053  543705 net.go:648] Add success.
I0321 20:15:13.422847  543705 net.go:770] primary dev: ETH0
I0321 20:15:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:15:13.422871  543705 net.go:698] Add success.
I0321 20:15:13.464241  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8de0b39b-634c-45c2-875f-958e01bd9825","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:15:13.464276  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:15:14.455321  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:15:14.455494  543705 disk_worker.go:708] disk space is not compliant
W0321 20:15:14.455502  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:15:14.457992  543705 disk_worker.go:494] system disk:vda1
I0321 20:15:14.458022  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:15:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:15:16.458021  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:15:16.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:15:16.458131  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:15:16.472582  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:15:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:15:23.409792  543705 memory.go:184] no items to output this cycle
I0321 20:15:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 20:15:31.229677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:15:31.232225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:15:31.232231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bdec0 0xc0002bdf00]
E0321 20:15:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:15:33.409789  543705 memory.go:184] no items to output this cycle
I0321 20:15:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 20:15:39.149443  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:15:39.149451  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:15:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:15:43.410707  543705 memory.go:191] Add success.
I0321 20:15:43.409806  543705 cpu.go:282] Add success.
I0321 20:15:43.420411  543705 net.go:648] Add success.
I0321 20:15:43.423541  543705 net.go:770] primary dev: ETH0
I0321 20:15:43.423554  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:15:43.423567  543705 net.go:698] Add success.
I0321 20:15:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:15:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:15:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:15:53.409800  543705 memory.go:184] no items to output this cycle
I0321 20:15:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 20:16:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:16:03.409780  543705 memory.go:184] no items to output this cycle
I0321 20:16:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 20:16:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:16:13.409778  543705 memory.go:191] Add success.
W0321 20:16:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 20:16:13.409809  543705 cpu.go:282] Add success.
W0321 20:16:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:16:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:16:13.420081  543705 net.go:648] Add success.
I0321 20:16:13.423744  543705 net.go:770] primary dev: ETH0
I0321 20:16:13.423759  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:16:13.423773  543705 net.go:698] Add success.
I0321 20:16:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:16:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:16:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0321 20:16:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:16:14.458897  543705 disk_worker.go:494] system disk:vda1
I0321 20:16:14.458927  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:16:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:16:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:16:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:16:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:16:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:16:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:16:23.409774  543705 memory.go:184] no items to output this cycle
I0321 20:16:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 20:16:31.233676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:16:31.236267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:16:31.236274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9e00 0xc0004d9e40]
E0321 20:16:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:16:33.409778  543705 memory.go:184] no items to output this cycle
I0321 20:16:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 20:16:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:16:43.409821  543705 memory.go:191] Add success.
I0321 20:16:43.409832  543705 cpu.go:282] Add success.
I0321 20:16:43.419897  543705 net.go:648] Add success.
I0321 20:16:43.422720  543705 net.go:770] primary dev: ETH0
I0321 20:16:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:16:43.422745  543705 net.go:698] Add success.
I0321 20:16:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:16:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:16:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:16:53.410357  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:16:53.410373  543705 memory.go:184] no items to output this cycle
I0321 20:16:53.410393  543705 cpu.go:275] no items to output this cycle
E0321 20:17:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:17:03.409783  543705 memory.go:184] no items to output this cycle
I0321 20:17:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 20:17:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:17:13.409795  543705 memory.go:191] Add success.
I0321 20:17:13.409798  543705 cpu.go:282] Add success.
W0321 20:17:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:17:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:17:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:17:13.420246  543705 net.go:648] Add success.
I0321 20:17:13.422929  543705 net.go:770] primary dev: ETH0
I0321 20:17:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:17:13.422953  543705 net.go:698] Add success.
I0321 20:17:13.453603  543705 event_worker.go:152] Polling the log file for events...
W0321 20:17:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:17:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 20:17:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:17:14.455878  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:17:14.455886  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:17:14.455892  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:17:14.456549  543705 disk_worker.go:494] system disk:vda1
I0321 20:17:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:17:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:17:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:17:16.458155  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:17:16.458203  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:17:16.458230  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:17:16.458257  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:17:16.472734  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:17:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:17:23.409792  543705 memory.go:184] no items to output this cycle
I0321 20:17:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 20:17:31.237676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:17:31.240260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:17:31.240266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d90c0 0xc0004d9100]
I0321 20:17:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:17:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:17:33.409804  543705 memory.go:184] no items to output this cycle
E0321 20:17:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:17:43.409789  543705 memory.go:191] Add success.
I0321 20:17:43.409789  543705 cpu.go:282] Add success.
I0321 20:17:43.419849  543705 net.go:648] Add success.
I0321 20:17:43.422329  543705 net.go:770] primary dev: ETH0
I0321 20:17:43.422344  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:17:43.422359  543705 net.go:698] Add success.
I0321 20:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:17:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:17:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:17:53.409777  543705 memory.go:184] no items to output this cycle
I0321 20:17:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 20:18:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:18:03.409781  543705 cpu.go:275] no items to output this cycle
I0321 20:18:03.409785  543705 memory.go:184] no items to output this cycle
E0321 20:18:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:18:13.409789  543705 memory.go:191] Add success.
I0321 20:18:13.409789  543705 cpu.go:282] Add success.
W0321 20:18:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:18:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:18:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:18:13.420075  543705 net.go:648] Add success.
I0321 20:18:13.423118  543705 net.go:770] primary dev: ETH0
I0321 20:18:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:18:13.423146  543705 net.go:698] Add success.
I0321 20:18:13.472229  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eb595e66-4339-4886-8d1f-a89d100aad7d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:18:13.472263  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:18:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:18:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:18:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 20:18:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:18:14.456702  543705 disk_worker.go:494] system disk:vda1
I0321 20:18:14.456799  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:18:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:18:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:18:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:18:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:18:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:18:23.409782  543705 memory.go:184] no items to output this cycle
I0321 20:18:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 20:18:31.241676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:18:31.244217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:18:31.244223  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049dc80 0xc00049dcc0]
E0321 20:18:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:18:33.409812  543705 memory.go:184] no items to output this cycle
I0321 20:18:33.409825  543705 cpu.go:275] no items to output this cycle
I0321 20:18:39.149742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:18:39.149749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:18:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:18:43.410729  543705 memory.go:191] Add success.
I0321 20:18:43.409818  543705 cpu.go:282] Add success.
I0321 20:18:43.420416  543705 net.go:648] Add success.
I0321 20:18:43.423823  543705 net.go:770] primary dev: ETH0
I0321 20:18:43.423839  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:18:43.423854  543705 net.go:698] Add success.
I0321 20:18:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:18:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:18:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:18:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:18:53.409796  543705 memory.go:184] no items to output this cycle
I0321 20:18:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 20:19:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:19:03.409778  543705 memory.go:184] no items to output this cycle
I0321 20:19:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 20:19:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:19:13.409825  543705 memory.go:191] Add success.
I0321 20:19:13.409833  543705 cpu.go:282] Add success.
W0321 20:19:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:19:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:19:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:19:13.420143  543705 net.go:648] Add success.
I0321 20:19:13.423027  543705 net.go:770] primary dev: ETH0
I0321 20:19:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:19:13.423053  543705 net.go:698] Add success.
I0321 20:19:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:19:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:19:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 20:19:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:19:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 20:19:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:19:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:19:16.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:19:16.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:19:16.458139  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:19:16.472594  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:19:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:19:23.409777  543705 memory.go:184] no items to output this cycle
I0321 20:19:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 20:19:31.245675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:19:31.248397  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:19:31.248404  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b3c0 0xc00007b400]
E0321 20:19:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:19:33.409806  543705 memory.go:184] no items to output this cycle
I0321 20:19:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 20:19:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:19:43.409811  543705 memory.go:191] Add success.
I0321 20:19:43.409819  543705 cpu.go:282] Add success.
I0321 20:19:43.419910  543705 net.go:648] Add success.
I0321 20:19:43.422505  543705 net.go:770] primary dev: ETH0
I0321 20:19:43.422518  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:19:43.422540  543705 net.go:698] Add success.
I0321 20:19:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:19:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:19:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:19:53.409764  543705 memory.go:184] no items to output this cycle
I0321 20:19:53.409799  543705 cpu.go:275] no items to output this cycle
E0321 20:20:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:20:03.409763  543705 memory.go:184] no items to output this cycle
I0321 20:20:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 20:20:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:20:13.409783  543705 memory.go:191] Add success.
I0321 20:20:13.409801  543705 cpu.go:282] Add success.
W0321 20:20:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:20:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:20:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:20:13.420162  543705 net.go:648] Add success.
I0321 20:20:13.422878  543705 net.go:770] primary dev: ETH0
I0321 20:20:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:20:13.422911  543705 net.go:698] Add success.
I0321 20:20:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:20:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:20:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 20:20:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:20:14.456477  543705 disk_worker.go:494] system disk:vda1
I0321 20:20:14.456522  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:20:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:20:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:20:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:20:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:20:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:20:23.409789  543705 memory.go:184] no items to output this cycle
I0321 20:20:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 20:20:31.249681  543705 disk_info.go:125] begin check local disk info of client
I0321 20:20:31.252121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:20:31.252128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abdc0 0xc0002abe00]
E0321 20:20:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:20:33.409802  543705 memory.go:184] no items to output this cycle
I0321 20:20:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 20:20:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:20:43.409779  543705 memory.go:191] Add success.
I0321 20:20:43.409809  543705 cpu.go:282] Add success.
I0321 20:20:43.419861  543705 net.go:648] Add success.
I0321 20:20:43.423045  543705 net.go:770] primary dev: ETH0
I0321 20:20:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:20:43.423070  543705 net.go:698] Add success.
I0321 20:20:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:20:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:20:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:20:53.410236  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:20:53.410254  543705 memory.go:184] no items to output this cycle
I0321 20:20:53.410283  543705 cpu.go:275] no items to output this cycle
E0321 20:21:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:21:03.409778  543705 memory.go:184] no items to output this cycle
I0321 20:21:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:21:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:21:13.409802  543705 memory.go:191] Add success.
I0321 20:21:13.409803  543705 cpu.go:282] Add success.
W0321 20:21:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:21:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:21:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:21:13.420047  543705 net.go:648] Add success.
I0321 20:21:13.422891  543705 net.go:770] primary dev: ETH0
I0321 20:21:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:21:13.422917  543705 net.go:698] Add success.
I0321 20:21:13.472531  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5440afb8-d3cf-4aa2-ae9f-b43db9040479","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:21:13.472564  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:21:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:21:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:21:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 20:21:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:21:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 20:21:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:21:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:21:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:21:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:21:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:21:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:21:23.409794  543705 memory.go:184] no items to output this cycle
I0321 20:21:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 20:21:31.253676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:21:31.256138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:21:31.256145  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0b00 0xc0003c0b40]
E0321 20:21:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:21:33.409775  543705 memory.go:184] no items to output this cycle
I0321 20:21:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 20:21:39.153454  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:21:39.153460  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:21:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:21:43.410632  543705 memory.go:191] Add success.
I0321 20:21:43.409846  543705 cpu.go:282] Add success.
I0321 20:21:43.420338  543705 net.go:648] Add success.
I0321 20:21:43.423459  543705 net.go:770] primary dev: ETH0
I0321 20:21:43.423473  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:21:43.423486  543705 net.go:698] Add success.
I0321 20:21:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:21:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:21:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:21:53.409787  543705 memory.go:184] no items to output this cycle
I0321 20:21:53.409830  543705 cpu.go:275] no items to output this cycle
E0321 20:22:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:22:03.409791  543705 memory.go:184] no items to output this cycle
I0321 20:22:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 20:22:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:22:13.409819  543705 memory.go:191] Add success.
I0321 20:22:13.409833  543705 cpu.go:282] Add success.
W0321 20:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:22:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:22:13.420130  543705 net.go:648] Add success.
I0321 20:22:13.422854  543705 net.go:770] primary dev: ETH0
I0321 20:22:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:22:13.422880  543705 net.go:698] Add success.
W0321 20:22:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:22:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 20:22:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:22:14.455878  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:22:14.455887  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:22:14.455893  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:22:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 20:22:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:22:15.456782  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:22:15.456790  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:22:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:22:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:22:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:22:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:22:16.472299  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:22:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:22:23.409797  543705 memory.go:184] no items to output this cycle
I0321 20:22:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 20:22:31.257680  543705 disk_info.go:125] begin check local disk info of client
I0321 20:22:31.260191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:22:31.260197  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475a80 0xc000475ac0]
E0321 20:22:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:22:33.409807  543705 memory.go:184] no items to output this cycle
I0321 20:22:33.409824  543705 cpu.go:275] no items to output this cycle
E0321 20:22:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:22:43.409776  543705 memory.go:191] Add success.
I0321 20:22:43.409821  543705 cpu.go:282] Add success.
I0321 20:22:43.420015  543705 net.go:648] Add success.
I0321 20:22:43.422807  543705 net.go:770] primary dev: ETH0
I0321 20:22:43.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:22:43.422839  543705 net.go:698] Add success.
I0321 20:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:22:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:22:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:22:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:22:53.409780  543705 memory.go:184] no items to output this cycle
I0321 20:22:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 20:23:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:23:03.409779  543705 memory.go:184] no items to output this cycle
I0321 20:23:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 20:23:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:23:13.409786  543705 memory.go:191] Add success.
I0321 20:23:13.409807  543705 cpu.go:282] Add success.
W0321 20:23:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:23:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:23:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:23:13.420076  543705 net.go:648] Add success.
I0321 20:23:13.422727  543705 net.go:770] primary dev: ETH0
I0321 20:23:13.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:23:13.422752  543705 net.go:698] Add success.
I0321 20:23:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:23:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:23:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 20:23:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:23:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 20:23:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:23:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:23:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:23:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:23:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:23:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:23:23.409813  543705 memory.go:184] no items to output this cycle
I0321 20:23:23.409825  543705 cpu.go:275] no items to output this cycle
I0321 20:23:31.261675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:23:31.264171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:23:31.264177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003488c0 0xc000348900]
E0321 20:23:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:23:33.409792  543705 memory.go:184] no items to output this cycle
I0321 20:23:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:23:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:23:43.409814  543705 memory.go:191] Add success.
I0321 20:23:43.409836  543705 cpu.go:282] Add success.
I0321 20:23:43.419966  543705 net.go:648] Add success.
I0321 20:23:43.422854  543705 net.go:770] primary dev: ETH0
I0321 20:23:43.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:23:43.422894  543705 net.go:698] Add success.
I0321 20:23:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:23:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:23:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:23:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:23:53.409771  543705 memory.go:184] no items to output this cycle
I0321 20:23:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 20:24:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:24:03.409768  543705 memory.go:184] no items to output this cycle
I0321 20:24:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 20:24:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:24:13.409818  543705 memory.go:191] Add success.
I0321 20:24:13.409840  543705 cpu.go:282] Add success.
W0321 20:24:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:24:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:24:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:24:13.420132  543705 net.go:648] Add success.
I0321 20:24:13.422703  543705 net.go:770] primary dev: ETH0
I0321 20:24:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:24:13.422729  543705 net.go:698] Add success.
I0321 20:24:13.463164  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6a459e1c-1e42-4543-b68a-af9d759d3cba","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:24:13.463202  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:24:14.454947  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:24:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:24:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 20:24:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:24:14.456682  543705 disk_worker.go:494] system disk:vda1
I0321 20:24:14.456721  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:24:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:24:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:24:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:24:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:24:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:24:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:24:23.409790  543705 memory.go:184] no items to output this cycle
I0321 20:24:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 20:24:31.265676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:24:31.268272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:24:31.268279  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf440 0xc0002bf480]
E0321 20:24:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:24:33.409776  543705 memory.go:184] no items to output this cycle
I0321 20:24:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 20:24:39.153736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:24:39.153742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:24:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:24:43.410584  543705 memory.go:191] Add success.
I0321 20:24:43.409787  543705 cpu.go:282] Add success.
I0321 20:24:43.420317  543705 net.go:648] Add success.
I0321 20:24:43.422930  543705 net.go:770] primary dev: ETH0
I0321 20:24:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:24:43.422955  543705 net.go:698] Add success.
I0321 20:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:24:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:24:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:24:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:24:53.409775  543705 memory.go:184] no items to output this cycle
I0321 20:24:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 20:25:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:25:03.409764  543705 memory.go:184] no items to output this cycle
I0321 20:25:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 20:25:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:25:13.409804  543705 memory.go:191] Add success.
I0321 20:25:13.409815  543705 cpu.go:282] Add success.
W0321 20:25:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:25:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:25:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:25:13.420116  543705 net.go:648] Add success.
I0321 20:25:13.423106  543705 net.go:770] primary dev: ETH0
I0321 20:25:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:25:13.423137  543705 net.go:698] Add success.
I0321 20:25:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:25:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:25:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 20:25:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:25:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 20:25:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:25:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:25:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:25:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:25:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:25:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:25:23.409777  543705 memory.go:184] no items to output this cycle
I0321 20:25:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 20:25:31.269677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:25:31.272237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:25:31.272243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348700 0xc000348740]
E0321 20:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:25:33.409782  543705 memory.go:184] no items to output this cycle
I0321 20:25:33.409790  543705 cpu.go:275] no items to output this cycle
E0321 20:25:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:25:43.409785  543705 memory.go:191] Add success.
I0321 20:25:43.409787  543705 cpu.go:282] Add success.
I0321 20:25:43.419933  543705 net.go:648] Add success.
I0321 20:25:43.422680  543705 net.go:770] primary dev: ETH0
I0321 20:25:43.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:25:43.422705  543705 net.go:698] Add success.
I0321 20:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:25:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:25:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:25:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:25:53.409798  543705 memory.go:184] no items to output this cycle
I0321 20:25:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 20:26:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:26:03.409795  543705 memory.go:184] no items to output this cycle
I0321 20:26:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 20:26:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:26:13.409809  543705 memory.go:191] Add success.
I0321 20:26:13.409814  543705 cpu.go:282] Add success.
W0321 20:26:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:26:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:26:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:26:13.420050  543705 net.go:648] Add success.
I0321 20:26:13.422942  543705 net.go:770] primary dev: ETH0
I0321 20:26:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:26:13.422974  543705 net.go:698] Add success.
I0321 20:26:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:26:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:26:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 20:26:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:26:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 20:26:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:26:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:26:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:26:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:26:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:26:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:26:23.409784  543705 memory.go:184] no items to output this cycle
I0321 20:26:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 20:26:31.275939  543705 disk_info.go:125] begin check local disk info of client
I0321 20:26:31.278484  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:26:31.278490  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b880 0xc00007b8c0]
E0321 20:26:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:26:33.409788  543705 memory.go:184] no items to output this cycle
I0321 20:26:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 20:26:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:26:43.409792  543705 memory.go:191] Add success.
I0321 20:26:43.409794  543705 cpu.go:282] Add success.
I0321 20:26:43.419937  543705 net.go:648] Add success.
I0321 20:26:43.422613  543705 net.go:770] primary dev: ETH0
I0321 20:26:43.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:26:43.422639  543705 net.go:698] Add success.
I0321 20:26:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:26:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:26:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:26:53.410472  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:26:53.410487  543705 memory.go:184] no items to output this cycle
I0321 20:26:53.410497  543705 cpu.go:275] no items to output this cycle
E0321 20:27:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:27:03.409780  543705 memory.go:184] no items to output this cycle
I0321 20:27:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:27:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:27:13.409807  543705 memory.go:191] Add success.
I0321 20:27:13.409810  543705 cpu.go:282] Add success.
W0321 20:27:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:27:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:27:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:27:13.420153  543705 net.go:648] Add success.
I0321 20:27:13.422899  543705 net.go:770] primary dev: ETH0
I0321 20:27:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:27:13.422924  543705 net.go:698] Add success.
I0321 20:27:13.429137  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 20:27:13.453304  543705 event_worker.go:152] Polling the log file for events...
I0321 20:27:13.468443  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4859ae31-d082-427a-8898-ae107811fa9e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:27:13.468486  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 20:27:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:27:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 20:27:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:27:14.456913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:27:14.456922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:27:14.456927  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:27:14.456982  543705 disk_worker.go:494] system disk:vda1
I0321 20:27:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:27:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:27:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:27:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:27:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:27:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:27:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:27:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:27:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:27:23.409786  543705 memory.go:184] no items to output this cycle
I0321 20:27:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 20:27:31.281677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:27:31.284270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:27:31.284277  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462fc0 0xc000463000]
E0321 20:27:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:27:33.409778  543705 memory.go:184] no items to output this cycle
I0321 20:27:33.409785  543705 cpu.go:275] no items to output this cycle
I0321 20:27:39.157473  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:27:39.157480  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:27:43.410792  543705 memory.go:191] Add success.
I0321 20:27:43.409812  543705 cpu.go:282] Add success.
I0321 20:27:43.420493  543705 net.go:648] Add success.
I0321 20:27:43.423309  543705 net.go:770] primary dev: ETH0
I0321 20:27:43.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:27:43.423336  543705 net.go:698] Add success.
I0321 20:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:27:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:27:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:27:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:27:53.409769  543705 memory.go:184] no items to output this cycle
I0321 20:27:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 20:28:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:28:03.409793  543705 memory.go:184] no items to output this cycle
I0321 20:28:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:28:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:28:13.409791  543705 memory.go:191] Add success.
I0321 20:28:13.409793  543705 cpu.go:282] Add success.
W0321 20:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:28:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:28:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:28:13.420052  543705 net.go:648] Add success.
I0321 20:28:13.423172  543705 net.go:770] primary dev: ETH0
I0321 20:28:13.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:28:13.423212  543705 net.go:698] Add success.
I0321 20:28:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:28:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:28:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0321 20:28:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:28:14.456610  543705 disk_worker.go:494] system disk:vda1
I0321 20:28:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:28:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:28:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:28:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:28:23.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:28:23.409928  543705 memory.go:184] no items to output this cycle
I0321 20:28:23.409976  543705 cpu.go:275] no items to output this cycle
I0321 20:28:31.285680  543705 disk_info.go:125] begin check local disk info of client
I0321 20:28:31.288224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:28:31.288231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 20:28:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:28:33.409792  543705 memory.go:184] no items to output this cycle
I0321 20:28:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:28:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:28:43.409798  543705 memory.go:191] Add success.
I0321 20:28:43.409798  543705 cpu.go:282] Add success.
I0321 20:28:43.419981  543705 net.go:648] Add success.
I0321 20:28:43.422613  543705 net.go:770] primary dev: ETH0
I0321 20:28:43.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:28:43.422639  543705 net.go:698] Add success.
I0321 20:28:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:28:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:28:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:28:53.409780  543705 memory.go:184] no items to output this cycle
I0321 20:28:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 20:29:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:29:03.409779  543705 memory.go:184] no items to output this cycle
I0321 20:29:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 20:29:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:29:13.409814  543705 memory.go:191] Add success.
I0321 20:29:13.409818  543705 cpu.go:282] Add success.
W0321 20:29:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:29:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:29:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:29:13.420124  543705 net.go:648] Add success.
I0321 20:29:13.423213  543705 net.go:770] primary dev: ETH0
I0321 20:29:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:29:13.423237  543705 net.go:698] Add success.
I0321 20:29:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:29:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:29:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 20:29:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:29:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 20:29:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:29:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:29:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:29:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:29:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:29:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:29:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:29:23.409781  543705 memory.go:184] no items to output this cycle
I0321 20:29:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 20:29:31.289682  543705 disk_info.go:125] begin check local disk info of client
I0321 20:29:31.292210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:29:31.292217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cc580 0xc0004cc5c0]
E0321 20:29:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:29:33.409777  543705 memory.go:184] no items to output this cycle
I0321 20:29:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 20:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:29:43.409808  543705 memory.go:191] Add success.
I0321 20:29:43.409816  543705 cpu.go:282] Add success.
I0321 20:29:43.419929  543705 net.go:648] Add success.
I0321 20:29:43.422475  543705 net.go:770] primary dev: ETH0
I0321 20:29:43.422490  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:29:43.422504  543705 net.go:698] Add success.
I0321 20:29:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:29:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:29:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:29:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:29:53.409764  543705 memory.go:184] no items to output this cycle
I0321 20:29:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 20:30:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:30:03.409795  543705 memory.go:184] no items to output this cycle
I0321 20:30:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 20:30:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:30:13.409818  543705 memory.go:191] Add success.
I0321 20:30:13.409827  543705 cpu.go:282] Add success.
W0321 20:30:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:30:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:30:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:30:13.420046  543705 net.go:648] Add success.
I0321 20:30:13.422713  543705 net.go:770] primary dev: ETH0
I0321 20:30:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:30:13.422739  543705 net.go:698] Add success.
I0321 20:30:13.468539  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8d7a3479-0ae5-4091-b6d9-66404b016ab3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:30:13.468591  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:30:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:30:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 20:30:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:30:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 20:30:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:30:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:30:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:30:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:30:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:30:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:30:23.409777  543705 memory.go:184] no items to output this cycle
I0321 20:30:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 20:30:31.293667  543705 disk_info.go:125] begin check local disk info of client
I0321 20:30:31.296360  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:30:31.296369  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2fc0 0xc0004a3000]
E0321 20:30:33.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:30:33.409898  543705 memory.go:184] no items to output this cycle
I0321 20:30:33.409955  543705 cpu.go:275] no items to output this cycle
I0321 20:30:39.157743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:30:39.157750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:30:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:30:43.410744  543705 memory.go:191] Add success.
I0321 20:30:43.409812  543705 cpu.go:282] Add success.
I0321 20:30:43.420434  543705 net.go:648] Add success.
I0321 20:30:43.423313  543705 net.go:770] primary dev: ETH0
I0321 20:30:43.423326  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:30:43.423353  543705 net.go:698] Add success.
I0321 20:30:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:30:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:30:53.410402  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:30:53.410419  543705 memory.go:184] no items to output this cycle
I0321 20:30:53.410422  543705 cpu.go:275] no items to output this cycle
E0321 20:31:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:31:03.409785  543705 memory.go:184] no items to output this cycle
I0321 20:31:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 20:31:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:31:13.409808  543705 memory.go:191] Add success.
I0321 20:31:13.409817  543705 cpu.go:282] Add success.
W0321 20:31:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:31:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:31:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:31:13.420108  543705 net.go:648] Add success.
I0321 20:31:13.422970  543705 net.go:770] primary dev: ETH0
I0321 20:31:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:31:13.422994  543705 net.go:698] Add success.
I0321 20:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:31:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:31:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 20:31:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:31:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 20:31:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:31:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:31:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:31:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:31:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 20:31:23.409798  543705 memory.go:184] no items to output this cycle
I0321 20:31:31.297679  543705 disk_info.go:125] begin check local disk info of client
I0321 20:31:31.300228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:31:31.300234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c08c0 0xc0004c0900]
E0321 20:31:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:31:33.409784  543705 memory.go:184] no items to output this cycle
I0321 20:31:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 20:31:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:31:43.409790  543705 memory.go:191] Add success.
I0321 20:31:43.409809  543705 cpu.go:282] Add success.
I0321 20:31:43.419876  543705 net.go:648] Add success.
I0321 20:31:43.422475  543705 net.go:770] primary dev: ETH0
I0321 20:31:43.422491  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:31:43.422504  543705 net.go:698] Add success.
I0321 20:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:31:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:31:53.410372  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:31:53.410391  543705 memory.go:184] no items to output this cycle
I0321 20:31:53.410403  543705 cpu.go:275] no items to output this cycle
E0321 20:32:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:32:03.409796  543705 memory.go:184] no items to output this cycle
I0321 20:32:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 20:32:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:32:13.409778  543705 memory.go:191] Add success.
I0321 20:32:13.409799  543705 cpu.go:282] Add success.
W0321 20:32:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:32:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:32:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:32:13.420201  543705 net.go:648] Add success.
I0321 20:32:13.422891  543705 net.go:770] primary dev: ETH0
I0321 20:32:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:32:13.422916  543705 net.go:698] Add success.
W0321 20:32:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:32:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 20:32:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:32:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:32:14.455918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:32:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:32:14.456556  543705 disk_worker.go:494] system disk:vda1
I0321 20:32:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:32:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:32:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:32:16.457904  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:32:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:32:16.457959  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:32:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:32:16.472304  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:32:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:32:23.409791  543705 memory.go:184] no items to output this cycle
I0321 20:32:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 20:32:31.301675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:32:31.304303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:32:31.304308  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1100 0xc0003c1140]
E0321 20:32:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:32:33.409775  543705 memory.go:184] no items to output this cycle
I0321 20:32:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 20:32:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:32:43.409816  543705 memory.go:191] Add success.
I0321 20:32:43.409825  543705 cpu.go:282] Add success.
I0321 20:32:43.420092  543705 net.go:648] Add success.
I0321 20:32:43.422583  543705 net.go:770] primary dev: ETH0
I0321 20:32:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:32:43.422608  543705 net.go:698] Add success.
I0321 20:32:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:32:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:32:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:32:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:32:53.409808  543705 memory.go:184] no items to output this cycle
I0321 20:32:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 20:33:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:33:03.409788  543705 memory.go:184] no items to output this cycle
I0321 20:33:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 20:33:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:33:13.409799  543705 memory.go:191] Add success.
I0321 20:33:13.409814  543705 cpu.go:282] Add success.
W0321 20:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:33:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:33:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:33:13.420059  543705 net.go:648] Add success.
I0321 20:33:13.422800  543705 net.go:770] primary dev: ETH0
I0321 20:33:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:33:13.422834  543705 net.go:698] Add success.
I0321 20:33:13.965178  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ec5db0d5-e948-45e4-93be-d6dfcc0aa973","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:33:13.965216  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:33:14.454677  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:33:14.454836  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:33:14.454898  543705 disk_worker.go:708] disk space is not compliant
W0321 20:33:14.454901  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:33:14.456239  543705 disk_worker.go:494] system disk:vda1
I0321 20:33:14.456284  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:33:15.455605  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:33:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:33:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:33:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:33:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:33:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:33:23.409783  543705 memory.go:184] no items to output this cycle
I0321 20:33:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 20:33:31.305673  543705 disk_info.go:125] begin check local disk info of client
I0321 20:33:31.308228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:33:31.308234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c1000 0xc0004c1040]
E0321 20:33:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:33:33.409772  543705 memory.go:184] no items to output this cycle
I0321 20:33:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 20:33:39.161488  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:33:39.161496  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:33:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:33:43.410710  543705 memory.go:191] Add success.
I0321 20:33:43.409812  543705 cpu.go:282] Add success.
I0321 20:33:43.420488  543705 net.go:648] Add success.
I0321 20:33:43.422910  543705 net.go:770] primary dev: ETH0
I0321 20:33:43.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:33:43.422940  543705 net.go:698] Add success.
I0321 20:33:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:33:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:33:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:33:53.409781  543705 memory.go:184] no items to output this cycle
I0321 20:33:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 20:34:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:34:03.409806  543705 memory.go:184] no items to output this cycle
I0321 20:34:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 20:34:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:34:13.409817  543705 memory.go:191] Add success.
I0321 20:34:13.409828  543705 cpu.go:282] Add success.
W0321 20:34:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:34:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:34:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:34:13.420297  543705 net.go:648] Add success.
I0321 20:34:13.423419  543705 net.go:770] primary dev: ETH0
I0321 20:34:13.423432  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:34:13.423443  543705 net.go:698] Add success.
I0321 20:34:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:34:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:34:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0321 20:34:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:34:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 20:34:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:34:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:34:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:34:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:34:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:34:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:34:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:34:23.409813  543705 memory.go:184] no items to output this cycle
I0321 20:34:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 20:34:31.309679  543705 disk_info.go:125] begin check local disk info of client
I0321 20:34:31.312133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:34:31.312140  543705 disk_info.go:196] parse disk info done, disk is : [0xc000392180 0xc0003921c0]
E0321 20:34:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:34:33.409777  543705 cpu.go:275] no items to output this cycle
I0321 20:34:33.409782  543705 memory.go:184] no items to output this cycle
E0321 20:34:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:34:43.409812  543705 memory.go:191] Add success.
I0321 20:34:43.409822  543705 cpu.go:282] Add success.
I0321 20:34:43.419889  543705 net.go:648] Add success.
I0321 20:34:43.422796  543705 net.go:770] primary dev: ETH0
I0321 20:34:43.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:34:43.422820  543705 net.go:698] Add success.
I0321 20:34:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:34:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:34:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:34:53.409944  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:34:53.410015  543705 memory.go:184] no items to output this cycle
I0321 20:34:53.410040  543705 cpu.go:275] no items to output this cycle
E0321 20:35:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:35:03.409779  543705 memory.go:184] no items to output this cycle
I0321 20:35:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 20:35:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:35:13.409794  543705 memory.go:191] Add success.
I0321 20:35:13.409794  543705 cpu.go:282] Add success.
W0321 20:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:35:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:35:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:35:13.420108  543705 net.go:648] Add success.
I0321 20:35:13.423382  543705 net.go:770] primary dev: ETH0
I0321 20:35:13.423395  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:35:13.423406  543705 net.go:698] Add success.
I0321 20:35:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:35:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:35:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 20:35:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:35:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 20:35:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:35:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:35:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:35:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:35:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:35:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 20:35:23.409788  543705 memory.go:184] no items to output this cycle
I0321 20:35:31.313677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:35:31.316438  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:35:31.316444  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5640 0xc0000c5680]
E0321 20:35:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:35:33.409794  543705 memory.go:184] no items to output this cycle
I0321 20:35:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 20:35:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:35:43.409814  543705 memory.go:191] Add success.
I0321 20:35:43.409821  543705 cpu.go:282] Add success.
I0321 20:35:43.419885  543705 net.go:648] Add success.
I0321 20:35:43.422497  543705 net.go:770] primary dev: ETH0
I0321 20:35:43.422513  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:35:43.422682  543705 net.go:698] Add success.
I0321 20:35:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:35:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:35:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:35:53.409776  543705 memory.go:184] no items to output this cycle
I0321 20:35:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 20:36:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:36:03.409778  543705 cpu.go:275] no items to output this cycle
I0321 20:36:03.409787  543705 memory.go:184] no items to output this cycle
E0321 20:36:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:36:13.409819  543705 memory.go:191] Add success.
I0321 20:36:13.409826  543705 cpu.go:282] Add success.
W0321 20:36:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:36:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:36:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:36:13.420075  543705 net.go:648] Add success.
I0321 20:36:13.423277  543705 net.go:770] primary dev: ETH0
I0321 20:36:13.423291  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:36:13.423302  543705 net.go:698] Add success.
I0321 20:36:13.469170  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"baffdd08-9f25-44e8-a07b-b7f136bcddce","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:36:13.469207  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:36:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:36:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:36:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0321 20:36:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:36:14.456625  543705 disk_worker.go:494] system disk:vda1
I0321 20:36:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:36:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:36:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:36:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:36:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:36:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:36:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 20:36:23.409794  543705 memory.go:184] no items to output this cycle
I0321 20:36:31.317678  543705 disk_info.go:125] begin check local disk info of client
I0321 20:36:31.320250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:36:31.320256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463980 0xc0004639c0]
E0321 20:36:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:36:33.409787  543705 memory.go:184] no items to output this cycle
I0321 20:36:33.409805  543705 cpu.go:275] no items to output this cycle
I0321 20:36:39.161731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:36:39.161738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:36:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:36:43.410561  543705 memory.go:191] Add success.
I0321 20:36:43.409815  543705 cpu.go:282] Add success.
I0321 20:36:43.420264  543705 net.go:648] Add success.
I0321 20:36:43.422937  543705 net.go:770] primary dev: ETH0
I0321 20:36:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:36:43.422967  543705 net.go:698] Add success.
I0321 20:36:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:36:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:36:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:36:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:36:53.409798  543705 memory.go:184] no items to output this cycle
I0321 20:36:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 20:37:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:37:03.409775  543705 memory.go:184] no items to output this cycle
I0321 20:37:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 20:37:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:37:13.409791  543705 memory.go:191] Add success.
I0321 20:37:13.409799  543705 cpu.go:282] Add success.
W0321 20:37:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:37:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:37:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:37:13.420064  543705 net.go:648] Add success.
I0321 20:37:13.422774  543705 net.go:770] primary dev: ETH0
I0321 20:37:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:37:13.422798  543705 net.go:698] Add success.
I0321 20:37:13.453422  543705 event_worker.go:152] Polling the log file for events...
W0321 20:37:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:37:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 20:37:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:37:14.455890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:37:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:37:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:37:14.456552  543705 disk_worker.go:494] system disk:vda1
I0321 20:37:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:37:15.456953  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:37:15.456967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:37:16.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:37:16.458019  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:37:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:37:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:37:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:37:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:37:23.409810  543705 memory.go:184] no items to output this cycle
I0321 20:37:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 20:37:31.321676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:37:31.324293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:37:31.324300  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049a900 0xc00049a940]
E0321 20:37:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:37:33.409771  543705 memory.go:184] no items to output this cycle
I0321 20:37:33.409776  543705 cpu.go:275] no items to output this cycle
E0321 20:37:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:37:43.409777  543705 memory.go:191] Add success.
I0321 20:37:43.409806  543705 cpu.go:282] Add success.
I0321 20:37:43.419919  543705 net.go:648] Add success.
I0321 20:37:43.422656  543705 net.go:770] primary dev: ETH0
I0321 20:37:43.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:37:43.422682  543705 net.go:698] Add success.
I0321 20:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:37:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:37:53.409802  543705 memory.go:184] no items to output this cycle
I0321 20:37:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 20:38:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:38:03.409780  543705 cpu.go:275] no items to output this cycle
I0321 20:38:03.409783  543705 memory.go:184] no items to output this cycle
E0321 20:38:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:38:13.409802  543705 memory.go:191] Add success.
I0321 20:38:13.409814  543705 cpu.go:282] Add success.
W0321 20:38:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:38:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:38:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:38:13.420119  543705 net.go:648] Add success.
I0321 20:38:13.423041  543705 net.go:770] primary dev: ETH0
I0321 20:38:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:38:13.423066  543705 net.go:698] Add success.
I0321 20:38:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:38:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:38:14.455243  543705 disk_worker.go:708] disk space is not compliant
W0321 20:38:14.455246  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:38:14.456651  543705 disk_worker.go:494] system disk:vda1
I0321 20:38:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:38:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:38:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:38:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:38:23.409791  543705 memory.go:184] no items to output this cycle
I0321 20:38:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 20:38:31.325677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:38:31.328206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:38:31.328212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1400 0xc0002a1440]
E0321 20:38:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:38:33.409795  543705 memory.go:184] no items to output this cycle
I0321 20:38:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 20:38:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:38:43.409780  543705 memory.go:191] Add success.
I0321 20:38:43.409798  543705 cpu.go:282] Add success.
I0321 20:38:43.419857  543705 net.go:648] Add success.
I0321 20:38:43.422810  543705 net.go:770] primary dev: ETH0
I0321 20:38:43.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:38:43.422879  543705 net.go:698] Add success.
I0321 20:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:38:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:38:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:38:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:38:53.409781  543705 cpu.go:275] no items to output this cycle
I0321 20:38:53.409793  543705 memory.go:184] no items to output this cycle
E0321 20:39:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:39:03.409773  543705 memory.go:184] no items to output this cycle
I0321 20:39:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 20:39:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:39:13.409819  543705 memory.go:191] Add success.
I0321 20:39:13.409824  543705 cpu.go:282] Add success.
W0321 20:39:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:39:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:39:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:39:13.420156  543705 net.go:648] Add success.
I0321 20:39:13.422945  543705 net.go:770] primary dev: ETH0
I0321 20:39:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:39:13.422971  543705 net.go:698] Add success.
I0321 20:39:13.491573  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"95c6aaa1-ad22-4d6c-9cdd-5fed1328bec0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:39:13.491607  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:39:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:39:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:39:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 20:39:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:39:14.456524  543705 disk_worker.go:494] system disk:vda1
I0321 20:39:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:39:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:39:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:39:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:39:16.472499  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:39:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:39:23.409810  543705 memory.go:184] no items to output this cycle
I0321 20:39:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 20:39:31.329681  543705 disk_info.go:125] begin check local disk info of client
I0321 20:39:31.332208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:39:31.332215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4840 0xc0000c4880]
E0321 20:39:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:39:33.409793  543705 memory.go:184] no items to output this cycle
I0321 20:39:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 20:39:39.161880  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:39:39.161887  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:39:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:39:43.410897  543705 memory.go:191] Add success.
I0321 20:39:43.409799  543705 cpu.go:282] Add success.
I0321 20:39:43.420836  543705 net.go:648] Add success.
I0321 20:39:43.423636  543705 net.go:770] primary dev: ETH0
I0321 20:39:43.423649  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:39:43.423661  543705 net.go:698] Add success.
I0321 20:39:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:39:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:39:53.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:39:53.410380  543705 memory.go:184] no items to output this cycle
I0321 20:39:53.410383  543705 cpu.go:275] no items to output this cycle
E0321 20:40:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:40:03.409778  543705 memory.go:184] no items to output this cycle
I0321 20:40:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 20:40:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:40:13.409796  543705 memory.go:191] Add success.
I0321 20:40:13.409797  543705 cpu.go:282] Add success.
W0321 20:40:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:40:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:40:13.420090  543705 net.go:648] Add success.
I0321 20:40:13.422690  543705 net.go:770] primary dev: ETH0
I0321 20:40:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:40:13.422716  543705 net.go:698] Add success.
I0321 20:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:40:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:40:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 20:40:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:40:14.456543  543705 disk_worker.go:494] system disk:vda1
I0321 20:40:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:40:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:40:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:40:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:40:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:40:23.409816  543705 memory.go:184] no items to output this cycle
I0321 20:40:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 20:40:31.333677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:40:31.336249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:40:31.336257  543705 disk_info.go:196] parse disk info done, disk is : [0xc000287f40 0xc00028c000]
E0321 20:40:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:40:33.409761  543705 memory.go:184] no items to output this cycle
I0321 20:40:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 20:40:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:40:43.409786  543705 memory.go:191] Add success.
I0321 20:40:43.409815  543705 cpu.go:282] Add success.
I0321 20:40:43.419871  543705 net.go:648] Add success.
I0321 20:40:43.422728  543705 net.go:770] primary dev: ETH0
I0321 20:40:43.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:40:43.422752  543705 net.go:698] Add success.
I0321 20:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:40:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:40:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:40:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:40:53.409797  543705 memory.go:184] no items to output this cycle
I0321 20:40:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 20:41:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:41:03.409796  543705 memory.go:184] no items to output this cycle
I0321 20:41:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 20:41:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:41:13.409798  543705 memory.go:191] Add success.
I0321 20:41:13.409799  543705 cpu.go:282] Add success.
W0321 20:41:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:41:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:41:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:41:13.420194  543705 net.go:648] Add success.
I0321 20:41:13.422799  543705 net.go:770] primary dev: ETH0
I0321 20:41:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:41:13.422830  543705 net.go:698] Add success.
I0321 20:41:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:41:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:41:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 20:41:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:41:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 20:41:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:41:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:41:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:41:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:41:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:41:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:41:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:41:23.409777  543705 memory.go:184] no items to output this cycle
I0321 20:41:23.409800  543705 cpu.go:275] no items to output this cycle
I0321 20:41:31.337674  543705 disk_info.go:125] begin check local disk info of client
I0321 20:41:31.340237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:41:31.340243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380e80 0xc000380ec0]
E0321 20:41:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:41:33.409759  543705 memory.go:184] no items to output this cycle
I0321 20:41:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 20:41:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:41:43.409809  543705 memory.go:191] Add success.
I0321 20:41:43.409817  543705 cpu.go:282] Add success.
I0321 20:41:43.420038  543705 net.go:648] Add success.
I0321 20:41:43.422833  543705 net.go:770] primary dev: ETH0
I0321 20:41:43.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:41:43.422857  543705 net.go:698] Add success.
I0321 20:41:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:41:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:41:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:41:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:41:53.409781  543705 memory.go:184] no items to output this cycle
I0321 20:41:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 20:42:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:42:03.409810  543705 memory.go:184] no items to output this cycle
I0321 20:42:03.409839  543705 cpu.go:275] no items to output this cycle
E0321 20:42:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:42:13.409788  543705 memory.go:191] Add success.
W0321 20:42:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:42:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:42:13.409828  543705 cpu.go:282] Add success.
I0321 20:42:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:42:13.420224  543705 net.go:648] Add success.
I0321 20:42:13.423081  543705 net.go:770] primary dev: ETH0
I0321 20:42:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:42:13.423107  543705 net.go:698] Add success.
I0321 20:42:13.511243  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"09da04ec-5f05-4349-9d26-95328e0e850f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:42:13.511274  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 20:42:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:42:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 20:42:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:42:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:42:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:42:14.455894  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:42:14.456800  543705 disk_worker.go:494] system disk:vda1
I0321 20:42:14.456836  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:42:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:42:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:42:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:42:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:42:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:42:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:42:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:42:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:42:23.409782  543705 memory.go:184] no items to output this cycle
I0321 20:42:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 20:42:31.341684  543705 disk_info.go:125] begin check local disk info of client
I0321 20:42:31.344225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:42:31.344231  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380ec0 0xc000380f00]
E0321 20:42:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:42:33.409805  543705 memory.go:184] no items to output this cycle
I0321 20:42:33.409818  543705 cpu.go:275] no items to output this cycle
I0321 20:42:39.165523  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:42:39.165530  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:42:43.410778  543705 memory.go:191] Add success.
I0321 20:42:43.409802  543705 cpu.go:282] Add success.
I0321 20:42:43.420481  543705 net.go:648] Add success.
I0321 20:42:43.423462  543705 net.go:770] primary dev: ETH0
I0321 20:42:43.423475  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:42:43.423487  543705 net.go:698] Add success.
I0321 20:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:42:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:42:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:42:53.410258  543705 memory.go:184] no items to output this cycle
I0321 20:42:53.410277  543705 cpu.go:275] no items to output this cycle
E0321 20:43:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:43:03.409778  543705 memory.go:184] no items to output this cycle
I0321 20:43:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 20:43:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:43:13.409786  543705 memory.go:191] Add success.
I0321 20:43:13.409805  543705 cpu.go:282] Add success.
W0321 20:43:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:43:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:43:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:43:13.420104  543705 net.go:648] Add success.
I0321 20:43:13.422909  543705 net.go:770] primary dev: ETH0
I0321 20:43:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:43:13.422935  543705 net.go:698] Add success.
I0321 20:43:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:43:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:43:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 20:43:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:43:14.456549  543705 disk_worker.go:494] system disk:vda1
I0321 20:43:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:43:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:43:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:43:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:43:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:43:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:43:23.409812  543705 memory.go:184] no items to output this cycle
I0321 20:43:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 20:43:31.345672  543705 disk_info.go:125] begin check local disk info of client
I0321 20:43:31.348207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:43:31.348213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc3c0 0xc0003dc400]
E0321 20:43:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:43:33.409763  543705 memory.go:184] no items to output this cycle
I0321 20:43:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 20:43:43.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:43:43.409897  543705 memory.go:191] Add success.
I0321 20:43:43.409976  543705 cpu.go:282] Add success.
I0321 20:43:43.419712  543705 net.go:648] Add success.
I0321 20:43:43.422659  543705 net.go:770] primary dev: ETH0
I0321 20:43:43.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:43:43.422683  543705 net.go:698] Add success.
I0321 20:43:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:43:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:43:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:43:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:43:53.409791  543705 memory.go:184] no items to output this cycle
I0321 20:43:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 20:44:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:44:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 20:44:03.409788  543705 memory.go:184] no items to output this cycle
E0321 20:44:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:44:13.409801  543705 memory.go:191] Add success.
I0321 20:44:13.409801  543705 cpu.go:282] Add success.
W0321 20:44:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:44:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:44:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:44:13.420095  543705 net.go:648] Add success.
I0321 20:44:13.422875  543705 net.go:770] primary dev: ETH0
I0321 20:44:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:44:13.422903  543705 net.go:698] Add success.
I0321 20:44:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:44:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:44:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 20:44:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:44:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 20:44:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:44:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:44:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:44:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:44:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:44:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:44:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 20:44:23.409793  543705 memory.go:184] no items to output this cycle
I0321 20:44:31.349673  543705 disk_info.go:125] begin check local disk info of client
I0321 20:44:31.352283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:44:31.352291  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2580 0xc0002a25c0]
E0321 20:44:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:44:33.409759  543705 memory.go:184] no items to output this cycle
I0321 20:44:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 20:44:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:44:43.409810  543705 memory.go:191] Add success.
I0321 20:44:43.409811  543705 cpu.go:282] Add success.
I0321 20:44:43.420252  543705 net.go:648] Add success.
I0321 20:44:43.423199  543705 net.go:770] primary dev: ETH0
I0321 20:44:43.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:44:43.423228  543705 net.go:698] Add success.
I0321 20:44:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:44:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:44:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:44:53.409781  543705 memory.go:184] no items to output this cycle
I0321 20:44:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 20:45:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:45:03.409777  543705 memory.go:184] no items to output this cycle
I0321 20:45:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 20:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:45:13.409812  543705 memory.go:191] Add success.
I0321 20:45:13.409817  543705 cpu.go:282] Add success.
W0321 20:45:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:45:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:45:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:45:13.420110  543705 net.go:648] Add success.
I0321 20:45:13.422700  543705 net.go:770] primary dev: ETH0
I0321 20:45:13.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:45:13.422725  543705 net.go:698] Add success.
I0321 20:45:13.470220  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1b9b1edf-6b65-4623-8e68-c7f43ce28e27","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:45:13.470255  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:45:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:45:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:45:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 20:45:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:45:14.456542  543705 disk_worker.go:494] system disk:vda1
I0321 20:45:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:45:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:45:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:45:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:45:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:45:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:45:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:45:23.409774  543705 memory.go:184] no items to output this cycle
I0321 20:45:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 20:45:31.353674  543705 disk_info.go:125] begin check local disk info of client
I0321 20:45:31.356243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:45:31.356249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c43c0 0xc0000c4400]
E0321 20:45:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:45:33.409769  543705 memory.go:184] no items to output this cycle
I0321 20:45:33.409796  543705 cpu.go:275] no items to output this cycle
I0321 20:45:39.165760  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:45:39.165767  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:45:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:45:43.410697  543705 memory.go:191] Add success.
I0321 20:45:43.409812  543705 cpu.go:282] Add success.
I0321 20:45:43.420553  543705 net.go:648] Add success.
I0321 20:45:43.423263  543705 net.go:770] primary dev: ETH0
I0321 20:45:43.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:45:43.423288  543705 net.go:698] Add success.
I0321 20:45:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:45:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:45:53.410357  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:45:53.410377  543705 memory.go:184] no items to output this cycle
I0321 20:45:53.410383  543705 cpu.go:275] no items to output this cycle
E0321 20:46:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:46:03.409764  543705 memory.go:184] no items to output this cycle
I0321 20:46:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:46:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:46:13.409831  543705 memory.go:191] Add success.
I0321 20:46:13.409831  543705 cpu.go:282] Add success.
W0321 20:46:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:46:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:46:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:46:13.420307  543705 net.go:648] Add success.
I0321 20:46:13.423254  543705 net.go:770] primary dev: ETH0
I0321 20:46:13.423268  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:46:13.423282  543705 net.go:698] Add success.
I0321 20:46:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:46:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:46:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0321 20:46:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:46:14.456601  543705 disk_worker.go:494] system disk:vda1
I0321 20:46:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:46:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:46:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:46:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:46:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:46:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:46:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 20:46:23.409790  543705 memory.go:184] no items to output this cycle
I0321 20:46:31.357682  543705 disk_info.go:125] begin check local disk info of client
I0321 20:46:31.360127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:46:31.360134  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028c880 0xc00028c8c0]
E0321 20:46:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:46:33.409796  543705 memory.go:184] no items to output this cycle
I0321 20:46:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 20:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:46:43.409784  543705 memory.go:191] Add success.
I0321 20:46:43.409788  543705 cpu.go:282] Add success.
I0321 20:46:43.420145  543705 net.go:648] Add success.
I0321 20:46:43.423361  543705 net.go:770] primary dev: ETH0
I0321 20:46:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:46:43.423386  543705 net.go:698] Add success.
I0321 20:46:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:46:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:46:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:46:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:46:53.409770  543705 cpu.go:275] no items to output this cycle
I0321 20:46:53.409780  543705 memory.go:184] no items to output this cycle
E0321 20:47:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:47:03.409777  543705 memory.go:184] no items to output this cycle
I0321 20:47:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 20:47:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:47:13.409790  543705 memory.go:191] Add success.
I0321 20:47:13.409791  543705 cpu.go:282] Add success.
W0321 20:47:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:47:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:47:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:47:13.420069  543705 net.go:648] Add success.
I0321 20:47:13.422927  543705 net.go:770] primary dev: ETH0
I0321 20:47:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:47:13.422952  543705 net.go:698] Add success.
I0321 20:47:13.453492  543705 event_worker.go:152] Polling the log file for events...
W0321 20:47:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:47:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 20:47:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:47:14.456774  543705 disk_worker.go:494] system disk:vda1
I0321 20:47:14.456812  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:47:14.457156  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:47:14.457164  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:47:14.457169  543705 custom_config.go:64] query custom config with name: gpu
E0321 20:47:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:47:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:47:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:47:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:47:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:47:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:47:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:47:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:47:23.409794  543705 memory.go:184] no items to output this cycle
I0321 20:47:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 20:47:31.361674  543705 disk_info.go:125] begin check local disk info of client
I0321 20:47:31.364336  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:47:31.364342  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c48c0 0xc0000c4900]
E0321 20:47:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:47:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 20:47:33.409787  543705 memory.go:184] no items to output this cycle
E0321 20:47:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:47:43.409873  543705 memory.go:191] Add success.
I0321 20:47:43.409921  543705 cpu.go:282] Add success.
I0321 20:47:43.419729  543705 net.go:648] Add success.
I0321 20:47:43.420694  543705 net.go:770] primary dev: ETH0
I0321 20:47:43.420707  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:47:43.420719  543705 net.go:698] Add success.
I0321 20:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:47:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:47:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:47:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 20:47:53.409784  543705 memory.go:184] no items to output this cycle
E0321 20:48:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:48:03.409765  543705 memory.go:184] no items to output this cycle
I0321 20:48:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 20:48:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:48:13.409819  543705 memory.go:191] Add success.
I0321 20:48:13.409831  543705 cpu.go:282] Add success.
W0321 20:48:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:48:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:48:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:48:13.420199  543705 net.go:648] Add success.
I0321 20:48:13.423119  543705 net.go:770] primary dev: ETH0
I0321 20:48:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:48:13.423145  543705 net.go:698] Add success.
I0321 20:48:13.469472  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4cca580a-120d-4bdd-9ada-e89a9917fbda","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:48:13.469514  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:48:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:48:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:48:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 20:48:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:48:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 20:48:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:48:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:48:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:48:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:48:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:48:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:48:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:48:23.409787  543705 memory.go:184] no items to output this cycle
I0321 20:48:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 20:48:31.365675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:48:31.368221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:48:31.368228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fefc0 0xc0003ff000]
E0321 20:48:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:48:33.409788  543705 memory.go:184] no items to output this cycle
I0321 20:48:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 20:48:39.165908  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:48:39.165915  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:48:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:48:43.410861  543705 memory.go:191] Add success.
I0321 20:48:43.410030  543705 cpu.go:282] Add success.
I0321 20:48:43.419712  543705 net.go:648] Add success.
I0321 20:48:43.422468  543705 net.go:770] primary dev: ETH0
I0321 20:48:43.422481  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:48:43.422493  543705 net.go:698] Add success.
I0321 20:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:48:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:48:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:48:53.409765  543705 memory.go:184] no items to output this cycle
I0321 20:48:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 20:49:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:49:03.409787  543705 memory.go:184] no items to output this cycle
I0321 20:49:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 20:49:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:49:13.409792  543705 memory.go:191] Add success.
I0321 20:49:13.409793  543705 cpu.go:282] Add success.
W0321 20:49:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:49:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:49:13.420039  543705 net.go:648] Add success.
I0321 20:49:13.423396  543705 net.go:770] primary dev: ETH0
I0321 20:49:13.423409  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:49:13.423421  543705 net.go:698] Add success.
I0321 20:49:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:49:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:49:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 20:49:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:49:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 20:49:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:49:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:49:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:49:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:49:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:49:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:49:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:49:23.409787  543705 memory.go:184] no items to output this cycle
I0321 20:49:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 20:49:31.369678  543705 disk_info.go:125] begin check local disk info of client
I0321 20:49:31.372240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:49:31.372246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003feec0 0xc0003fef00]
E0321 20:49:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:49:33.409797  543705 memory.go:184] no items to output this cycle
I0321 20:49:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 20:49:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:49:43.409777  543705 memory.go:191] Add success.
I0321 20:49:43.409810  543705 cpu.go:282] Add success.
I0321 20:49:43.420014  543705 net.go:648] Add success.
I0321 20:49:43.423163  543705 net.go:770] primary dev: ETH0
I0321 20:49:43.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:49:43.423192  543705 net.go:698] Add success.
I0321 20:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:49:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:49:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:49:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:49:53.409762  543705 memory.go:184] no items to output this cycle
I0321 20:49:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 20:50:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:50:03.409770  543705 memory.go:184] no items to output this cycle
I0321 20:50:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 20:50:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:50:13.409810  543705 memory.go:191] Add success.
I0321 20:50:13.409818  543705 cpu.go:282] Add success.
W0321 20:50:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:50:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:50:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:50:13.420374  543705 net.go:648] Add success.
I0321 20:50:13.423308  543705 net.go:770] primary dev: ETH0
I0321 20:50:13.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:50:13.423337  543705 net.go:698] Add success.
I0321 20:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:50:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:50:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 20:50:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:50:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 20:50:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:50:15.455129  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:50:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:50:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:50:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:50:23.409818  543705 memory.go:184] no items to output this cycle
I0321 20:50:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 20:50:31.373677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:50:31.376242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:50:31.376248  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034ec80 0xc00034ecc0]
E0321 20:50:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:50:33.409769  543705 memory.go:184] no items to output this cycle
I0321 20:50:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 20:50:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:50:43.409905  543705 memory.go:191] Add success.
I0321 20:50:43.409935  543705 cpu.go:282] Add success.
I0321 20:50:43.419741  543705 net.go:648] Add success.
I0321 20:50:43.422459  543705 net.go:770] primary dev: ETH0
I0321 20:50:43.422474  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:50:43.422486  543705 net.go:698] Add success.
I0321 20:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:50:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:50:53.409772  543705 memory.go:184] no items to output this cycle
I0321 20:50:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 20:51:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:51:03.409805  543705 memory.go:184] no items to output this cycle
I0321 20:51:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 20:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:51:13.409802  543705 memory.go:191] Add success.
I0321 20:51:13.409824  543705 cpu.go:282] Add success.
W0321 20:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:51:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:51:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:51:13.420076  543705 net.go:648] Add success.
I0321 20:51:13.423065  543705 net.go:770] primary dev: ETH0
I0321 20:51:13.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:51:13.423094  543705 net.go:698] Add success.
I0321 20:51:13.468681  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71a183dd-aa24-4fed-be2c-9f46ff1f194b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:51:13.468715  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:51:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:51:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:51:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 20:51:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:51:14.456616  543705 disk_worker.go:494] system disk:vda1
I0321 20:51:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:51:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:51:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:51:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:51:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:51:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:51:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:51:23.409794  543705 memory.go:184] no items to output this cycle
I0321 20:51:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 20:51:31.377678  543705 disk_info.go:125] begin check local disk info of client
I0321 20:51:31.380248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:51:31.380254  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a200 0xc00047a240]
E0321 20:51:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:51:33.409798  543705 memory.go:184] no items to output this cycle
I0321 20:51:33.409814  543705 cpu.go:275] no items to output this cycle
I0321 20:51:39.166063  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:51:39.166070  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:51:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:51:43.410746  543705 memory.go:191] Add success.
I0321 20:51:43.409827  543705 cpu.go:282] Add success.
I0321 20:51:43.420423  543705 net.go:648] Add success.
I0321 20:51:43.422966  543705 net.go:770] primary dev: ETH0
I0321 20:51:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:51:43.422991  543705 net.go:698] Add success.
I0321 20:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:51:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:51:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:51:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:51:53.409788  543705 memory.go:184] no items to output this cycle
I0321 20:51:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 20:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:52:03.409781  543705 memory.go:184] no items to output this cycle
I0321 20:52:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 20:52:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:52:13.409796  543705 memory.go:191] Add success.
I0321 20:52:13.409817  543705 cpu.go:282] Add success.
W0321 20:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:52:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:52:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:52:13.420258  543705 net.go:648] Add success.
I0321 20:52:13.423800  543705 net.go:770] primary dev: ETH0
I0321 20:52:13.423814  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:52:13.423825  543705 net.go:698] Add success.
W0321 20:52:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:52:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 20:52:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0321 20:52:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:52:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:52:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0321 20:52:14.456527  543705 disk_worker.go:494] system disk:vda1
I0321 20:52:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:52:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:52:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:52:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:52:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:52:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:52:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:52:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:52:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:52:23.409807  543705 memory.go:184] no items to output this cycle
I0321 20:52:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 20:52:31.381676  543705 disk_info.go:125] begin check local disk info of client
I0321 20:52:31.384228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:52:31.384234  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463940 0xc000463980]
E0321 20:52:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:52:33.409797  543705 memory.go:184] no items to output this cycle
I0321 20:52:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 20:52:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:52:43.409799  543705 memory.go:191] Add success.
I0321 20:52:43.409800  543705 cpu.go:282] Add success.
I0321 20:52:43.419965  543705 net.go:648] Add success.
I0321 20:52:43.422737  543705 net.go:770] primary dev: ETH0
I0321 20:52:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:52:43.422762  543705 net.go:698] Add success.
I0321 20:52:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:52:53.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:52:53.410390  543705 memory.go:184] no items to output this cycle
I0321 20:52:53.410414  543705 cpu.go:275] no items to output this cycle
E0321 20:53:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:53:03.409788  543705 cpu.go:275] no items to output this cycle
I0321 20:53:03.409793  543705 memory.go:184] no items to output this cycle
E0321 20:53:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:53:13.409793  543705 memory.go:191] Add success.
I0321 20:53:13.409797  543705 cpu.go:282] Add success.
W0321 20:53:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:53:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:53:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:53:13.420059  543705 net.go:648] Add success.
I0321 20:53:13.423105  543705 net.go:770] primary dev: ETH0
I0321 20:53:13.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:53:13.423133  543705 net.go:698] Add success.
I0321 20:53:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:53:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:53:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 20:53:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:53:14.456520  543705 disk_worker.go:494] system disk:vda1
I0321 20:53:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:53:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:53:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:53:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:53:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:53:23.409799  543705 memory.go:184] no items to output this cycle
I0321 20:53:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 20:53:31.385677  543705 disk_info.go:125] begin check local disk info of client
I0321 20:53:31.388244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:53:31.388251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2980 0xc0003b29c0]
E0321 20:53:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:53:33.409787  543705 memory.go:184] no items to output this cycle
I0321 20:53:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 20:53:43.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:53:43.409867  543705 memory.go:191] Add success.
I0321 20:53:43.409945  543705 cpu.go:282] Add success.
I0321 20:53:43.419707  543705 net.go:648] Add success.
I0321 20:53:43.422647  543705 net.go:770] primary dev: ETH0
I0321 20:53:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:53:43.422671  543705 net.go:698] Add success.
I0321 20:53:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:53:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:53:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:53:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:53:53.409778  543705 memory.go:184] no items to output this cycle
I0321 20:53:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 20:54:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:54:03.409806  543705 memory.go:184] no items to output this cycle
I0321 20:54:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 20:54:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:54:13.409776  543705 memory.go:191] Add success.
W0321 20:54:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 20:54:13.409810  543705 cpu.go:282] Add success.
W0321 20:54:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:54:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:54:13.420065  543705 net.go:648] Add success.
I0321 20:54:13.422896  543705 net.go:770] primary dev: ETH0
I0321 20:54:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:54:13.422936  543705 net.go:698] Add success.
I0321 20:54:13.463928  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8855efdd-c5dd-4951-ac1d-1a8a2d8217a6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:54:13.463972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 20:54:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:54:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:54:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 20:54:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:54:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 20:54:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:54:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:54:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:54:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:54:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:54:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:54:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:54:23.409808  543705 memory.go:184] no items to output this cycle
I0321 20:54:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 20:54:31.391817  543705 disk_info.go:125] begin check local disk info of client
I0321 20:54:31.394424  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:54:31.394431  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8240 0xc0003c8280]
E0321 20:54:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:54:33.409787  543705 memory.go:184] no items to output this cycle
I0321 20:54:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 20:54:39.169522  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:54:39.169529  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:54:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:54:43.410857  543705 memory.go:191] Add success.
I0321 20:54:43.409915  543705 cpu.go:282] Add success.
I0321 20:54:43.419706  543705 net.go:648] Add success.
I0321 20:54:43.422844  543705 net.go:770] primary dev: ETH0
I0321 20:54:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:54:43.422868  543705 net.go:698] Add success.
I0321 20:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:54:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:54:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:54:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:54:53.409768  543705 memory.go:184] no items to output this cycle
I0321 20:54:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 20:55:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:55:03.409804  543705 memory.go:184] no items to output this cycle
I0321 20:55:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 20:55:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:55:13.409786  543705 memory.go:191] Add success.
I0321 20:55:13.409789  543705 cpu.go:282] Add success.
W0321 20:55:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:55:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:55:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:55:13.420017  543705 net.go:648] Add success.
I0321 20:55:13.422913  543705 net.go:770] primary dev: ETH0
I0321 20:55:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:55:13.422938  543705 net.go:698] Add success.
I0321 20:55:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:55:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:55:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 20:55:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:55:14.456605  543705 disk_worker.go:494] system disk:vda1
I0321 20:55:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:55:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:55:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:55:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:55:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:55:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:55:23.409784  543705 memory.go:184] no items to output this cycle
I0321 20:55:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 20:55:31.397675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:55:31.400294  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:55:31.400300  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003eec80 0xc0003eecc0]
E0321 20:55:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:55:33.409777  543705 memory.go:184] no items to output this cycle
I0321 20:55:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 20:55:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:55:43.409787  543705 memory.go:191] Add success.
I0321 20:55:43.409787  543705 cpu.go:282] Add success.
I0321 20:55:43.419732  543705 net.go:648] Add success.
I0321 20:55:43.422952  543705 net.go:770] primary dev: ETH0
I0321 20:55:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:55:43.422978  543705 net.go:698] Add success.
I0321 20:55:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:55:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:55:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:55:53.409770  543705 memory.go:184] no items to output this cycle
I0321 20:55:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 20:56:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:56:03.409770  543705 memory.go:184] no items to output this cycle
I0321 20:56:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 20:56:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:56:13.409779  543705 memory.go:191] Add success.
W0321 20:56:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 20:56:13.409806  543705 cpu.go:282] Add success.
W0321 20:56:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:56:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:56:13.420085  543705 net.go:648] Add success.
I0321 20:56:13.422665  543705 net.go:770] primary dev: ETH0
I0321 20:56:13.422679  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:56:13.422693  543705 net.go:698] Add success.
I0321 20:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:56:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:56:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 20:56:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:56:14.456582  543705 disk_worker.go:494] system disk:vda1
I0321 20:56:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:56:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:56:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:56:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:56:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:56:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:56:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:56:23.409820  543705 memory.go:184] no items to output this cycle
I0321 20:56:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 20:56:31.401675  543705 disk_info.go:125] begin check local disk info of client
I0321 20:56:31.404239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:56:31.404246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be100 0xc0003be140]
E0321 20:56:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:56:33.409791  543705 memory.go:184] no items to output this cycle
I0321 20:56:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 20:56:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:56:43.409781  543705 memory.go:191] Add success.
I0321 20:56:43.409805  543705 cpu.go:282] Add success.
I0321 20:56:43.419749  543705 net.go:648] Add success.
I0321 20:56:43.422706  543705 net.go:770] primary dev: ETH0
I0321 20:56:43.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:56:43.422734  543705 net.go:698] Add success.
I0321 20:56:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:56:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:56:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:56:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:56:53.409800  543705 memory.go:184] no items to output this cycle
I0321 20:56:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 20:57:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:57:03.409789  543705 cpu.go:275] no items to output this cycle
I0321 20:57:03.409792  543705 memory.go:184] no items to output this cycle
E0321 20:57:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:57:13.409811  543705 memory.go:191] Add success.
I0321 20:57:13.409819  543705 cpu.go:282] Add success.
W0321 20:57:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:57:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:57:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:57:13.420102  543705 net.go:648] Add success.
I0321 20:57:13.429219  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 20:57:13.429305  543705 net.go:770] primary dev: ETH0
I0321 20:57:13.429317  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:57:13.429328  543705 net.go:698] Add success.
I0321 20:57:13.452891  543705 event_worker.go:152] Polling the log file for events...
I0321 20:57:13.463409  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"47fc990d-26e7-4847-993d-afd2208a0ed6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 20:57:13.463442  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 20:57:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:57:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 20:57:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:57:14.456818  543705 disk_worker.go:494] system disk:vda1
I0321 20:57:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 20:57:14.456859  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 20:57:14.456868  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 20:57:14.456873  543705 custom_config.go:64] query custom config with name: gpu
E0321 20:57:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 20:57:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:57:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 20:57:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 20:57:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:57:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:57:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:57:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:57:23.409810  543705 memory.go:184] no items to output this cycle
I0321 20:57:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 20:57:31.405672  543705 disk_info.go:125] begin check local disk info of client
I0321 20:57:31.408247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:57:31.408254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3600 0xc0003e3640]
E0321 20:57:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:57:33.409787  543705 memory.go:184] no items to output this cycle
I0321 20:57:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 20:57:39.169744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 20:57:39.169750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 20:57:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:57:43.410697  543705 memory.go:191] Add success.
I0321 20:57:43.409802  543705 cpu.go:282] Add success.
I0321 20:57:43.420427  543705 net.go:648] Add success.
I0321 20:57:43.423278  543705 net.go:770] primary dev: ETH0
I0321 20:57:43.423298  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:57:43.423327  543705 net.go:698] Add success.
I0321 20:57:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:57:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:57:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:57:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:57:53.409773  543705 memory.go:184] no items to output this cycle
I0321 20:57:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 20:58:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:58:03.409804  543705 memory.go:184] no items to output this cycle
I0321 20:58:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 20:58:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:58:13.409792  543705 cpu.go:282] Add success.
I0321 20:58:13.409802  543705 memory.go:191] Add success.
W0321 20:58:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:58:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:58:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:58:13.420043  543705 net.go:648] Add success.
I0321 20:58:13.423002  543705 net.go:770] primary dev: ETH0
I0321 20:58:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:58:13.423028  543705 net.go:698] Add success.
I0321 20:58:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:58:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:58:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 20:58:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:58:14.456496  543705 disk_worker.go:494] system disk:vda1
I0321 20:58:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:58:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:58:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:58:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:58:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:58:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 20:58:23.409796  543705 memory.go:184] no items to output this cycle
I0321 20:58:31.409686  543705 disk_info.go:125] begin check local disk info of client
I0321 20:58:31.412070  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:58:31.412077  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003129c0 0xc000312a00]
E0321 20:58:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:58:33.409792  543705 memory.go:184] no items to output this cycle
I0321 20:58:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 20:58:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:58:43.409804  543705 cpu.go:282] Add success.
I0321 20:58:43.409805  543705 memory.go:191] Add success.
I0321 20:58:43.419990  543705 net.go:648] Add success.
I0321 20:58:43.422734  543705 net.go:770] primary dev: ETH0
I0321 20:58:43.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:58:43.422763  543705 net.go:698] Add success.
I0321 20:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:58:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:58:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:58:53.409763  543705 memory.go:184] no items to output this cycle
I0321 20:58:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 20:59:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:59:03.409766  543705 memory.go:184] no items to output this cycle
I0321 20:59:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 20:59:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:59:13.409813  543705 memory.go:191] Add success.
I0321 20:59:13.409821  543705 cpu.go:282] Add success.
W0321 20:59:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 20:59:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 20:59:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 20:59:13.420111  543705 net.go:648] Add success.
I0321 20:59:13.423133  543705 net.go:770] primary dev: ETH0
I0321 20:59:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:59:13.423158  543705 net.go:698] Add success.
I0321 20:59:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 20:59:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 20:59:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 20:59:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 20:59:14.456517  543705 disk_worker.go:494] system disk:vda1
I0321 20:59:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 20:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 20:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:59:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:59:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 20:59:16.472478  543705 disk_local_worker.go:436] Get disk info: []
E0321 20:59:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:59:23.409794  543705 memory.go:184] no items to output this cycle
I0321 20:59:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 20:59:31.412797  543705 disk_info.go:125] begin check local disk info of client
I0321 20:59:31.415427  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 20:59:31.415433  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342e80 0xc000342ec0]
E0321 20:59:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:59:33.409766  543705 memory.go:184] no items to output this cycle
I0321 20:59:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 20:59:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:59:43.409822  543705 memory.go:191] Add success.
I0321 20:59:43.409831  543705 cpu.go:282] Add success.
I0321 20:59:43.419879  543705 net.go:648] Add success.
I0321 20:59:43.422755  543705 net.go:770] primary dev: ETH0
I0321 20:59:43.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0321 20:59:43.422783  543705 net.go:698] Add success.
I0321 20:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 20:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 20:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 20:59:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 20:59:53.409809  543705 memory.go:184] no items to output this cycle
I0321 20:59:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 21:00:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:00:03.409781  543705 memory.go:184] no items to output this cycle
I0321 21:00:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 21:00:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:00:13.409810  543705 memory.go:191] Add success.
I0321 21:00:13.409828  543705 cpu.go:282] Add success.
W0321 21:00:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:00:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:00:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:00:13.420085  543705 net.go:648] Add success.
I0321 21:00:13.423121  543705 net.go:770] primary dev: ETH0
I0321 21:00:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:00:13.423150  543705 net.go:698] Add success.
I0321 21:00:13.577249  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8868d44d-42a4-4404-a005-899b1ef2ef4a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:00:13.577282  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:00:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:00:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0321 21:00:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:00:14.456593  543705 disk_worker.go:494] system disk:vda1
I0321 21:00:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:00:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:00:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:00:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:00:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:00:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:00:23.409811  543705 memory.go:184] no items to output this cycle
I0321 21:00:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 21:00:31.415795  543705 disk_info.go:125] begin check local disk info of client
I0321 21:00:31.418448  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:00:31.418454  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307180 0xc0003071c0]
E0321 21:00:33.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:00:33.409882  543705 memory.go:184] no items to output this cycle
I0321 21:00:33.409983  543705 cpu.go:275] no items to output this cycle
I0321 21:00:39.169891  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:00:39.169897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:00:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:00:43.410769  543705 memory.go:191] Add success.
I0321 21:00:43.409797  543705 cpu.go:282] Add success.
I0321 21:00:43.420565  543705 net.go:648] Add success.
I0321 21:00:43.423421  543705 net.go:770] primary dev: ETH0
I0321 21:00:43.423435  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:00:43.423450  543705 net.go:698] Add success.
I0321 21:00:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:00:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:00:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:00:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:00:53.409786  543705 memory.go:184] no items to output this cycle
I0321 21:00:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 21:01:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:01:03.409773  543705 memory.go:184] no items to output this cycle
I0321 21:01:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 21:01:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:01:13.409787  543705 memory.go:191] Add success.
I0321 21:01:13.409792  543705 cpu.go:282] Add success.
W0321 21:01:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:01:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:01:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:01:13.420042  543705 net.go:648] Add success.
I0321 21:01:13.423184  543705 net.go:770] primary dev: ETH0
I0321 21:01:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:01:13.423208  543705 net.go:698] Add success.
I0321 21:01:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:01:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:01:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 21:01:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:01:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 21:01:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:01:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:01:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:01:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:01:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:01:23.409814  543705 memory.go:184] no items to output this cycle
I0321 21:01:23.409827  543705 cpu.go:275] no items to output this cycle
I0321 21:01:31.418798  543705 disk_info.go:125] begin check local disk info of client
I0321 21:01:31.421368  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:01:31.421375  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c100 0xc00048c140]
E0321 21:01:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:01:33.409916  543705 memory.go:184] no items to output this cycle
I0321 21:01:33.409926  543705 cpu.go:275] no items to output this cycle
E0321 21:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:01:43.409790  543705 memory.go:191] Add success.
I0321 21:01:43.409794  543705 cpu.go:282] Add success.
I0321 21:01:43.419983  543705 net.go:648] Add success.
I0321 21:01:43.423141  543705 net.go:770] primary dev: ETH0
I0321 21:01:43.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:01:43.423165  543705 net.go:698] Add success.
I0321 21:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:01:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:01:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:01:53.409778  543705 memory.go:184] no items to output this cycle
I0321 21:01:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 21:02:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:02:03.409781  543705 memory.go:184] no items to output this cycle
I0321 21:02:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:02:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:02:13.409786  543705 cpu.go:282] Add success.
I0321 21:02:13.409789  543705 memory.go:191] Add success.
W0321 21:02:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:02:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:02:13.420262  543705 net.go:648] Add success.
I0321 21:02:13.423452  543705 net.go:770] primary dev: ETH0
I0321 21:02:13.423464  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:02:13.423477  543705 net.go:698] Add success.
W0321 21:02:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:02:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 21:02:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:02:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:02:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:02:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:02:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 21:02:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:02:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:02:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:02:16.457904  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:02:16.457903  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:02:16.457955  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:02:16.457974  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:02:16.472302  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:02:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:02:23.409814  543705 memory.go:184] no items to output this cycle
I0321 21:02:23.409823  543705 cpu.go:275] no items to output this cycle
I0321 21:02:31.421792  543705 disk_info.go:125] begin check local disk info of client
I0321 21:02:31.424310  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:02:31.424316  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ae80 0xc00035aec0]
E0321 21:02:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:02:33.409792  543705 memory.go:184] no items to output this cycle
I0321 21:02:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 21:02:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:02:43.409801  543705 memory.go:191] Add success.
I0321 21:02:43.409801  543705 cpu.go:282] Add success.
I0321 21:02:43.419954  543705 net.go:648] Add success.
I0321 21:02:43.422907  543705 net.go:770] primary dev: ETH0
I0321 21:02:43.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:02:43.422931  543705 net.go:698] Add success.
I0321 21:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:02:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:02:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:02:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:02:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 21:02:53.409794  543705 memory.go:184] no items to output this cycle
E0321 21:03:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:03:03.409776  543705 memory.go:184] no items to output this cycle
I0321 21:03:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 21:03:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:03:13.409782  543705 memory.go:191] Add success.
I0321 21:03:13.409801  543705 cpu.go:282] Add success.
W0321 21:03:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:03:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:03:13.420056  543705 net.go:648] Add success.
I0321 21:03:13.423077  543705 net.go:770] primary dev: ETH0
I0321 21:03:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:03:13.423101  543705 net.go:698] Add success.
I0321 21:03:13.469232  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"99547216-9779-436b-8266-54034ff0d491","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:03:13.469264  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:03:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:03:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:03:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 21:03:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:03:14.456781  543705 disk_worker.go:494] system disk:vda1
I0321 21:03:14.456811  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:03:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:03:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:03:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:03:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:03:23.409791  543705 memory.go:184] no items to output this cycle
I0321 21:03:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 21:03:31.424819  543705 disk_info.go:125] begin check local disk info of client
I0321 21:03:31.427411  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:03:31.427418  543705 disk_info.go:196] parse disk info done, disk is : [0xc000495740 0xc000495780]
E0321 21:03:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:03:33.409783  543705 cpu.go:275] no items to output this cycle
I0321 21:03:33.409787  543705 memory.go:184] no items to output this cycle
I0321 21:03:39.173566  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:03:39.173572  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:03:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:03:43.410784  543705 memory.go:191] Add success.
I0321 21:03:43.409800  543705 cpu.go:282] Add success.
I0321 21:03:43.420519  543705 net.go:648] Add success.
I0321 21:03:43.423417  543705 net.go:770] primary dev: ETH0
I0321 21:03:43.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:03:43.423442  543705 net.go:698] Add success.
I0321 21:03:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:03:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:03:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:03:53.409783  543705 memory.go:184] no items to output this cycle
I0321 21:03:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 21:04:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:04:03.409774  543705 memory.go:184] no items to output this cycle
I0321 21:04:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 21:04:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:04:13.409809  543705 memory.go:191] Add success.
I0321 21:04:13.409814  543705 cpu.go:282] Add success.
W0321 21:04:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:04:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:04:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:04:13.420046  543705 net.go:648] Add success.
I0321 21:04:13.423095  543705 net.go:770] primary dev: ETH0
I0321 21:04:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:04:13.423121  543705 net.go:698] Add success.
I0321 21:04:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:04:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:04:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 21:04:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:04:14.456489  543705 disk_worker.go:494] system disk:vda1
I0321 21:04:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:04:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:04:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:04:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:04:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:04:23.409784  543705 memory.go:184] no items to output this cycle
I0321 21:04:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 21:04:31.427822  543705 disk_info.go:125] begin check local disk info of client
I0321 21:04:31.430408  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:04:31.430415  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c59c0 0xc0000c5a00]
E0321 21:04:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:04:33.409788  543705 memory.go:184] no items to output this cycle
I0321 21:04:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 21:04:43.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:04:43.409873  543705 memory.go:191] Add success.
I0321 21:04:43.409975  543705 cpu.go:282] Add success.
I0321 21:04:43.419753  543705 net.go:648] Add success.
I0321 21:04:43.422577  543705 net.go:770] primary dev: ETH0
I0321 21:04:43.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:04:43.422605  543705 net.go:698] Add success.
I0321 21:04:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:04:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:04:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:04:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:04:53.409787  543705 cpu.go:275] no items to output this cycle
I0321 21:04:53.409799  543705 memory.go:184] no items to output this cycle
E0321 21:05:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:05:03.409800  543705 memory.go:184] no items to output this cycle
I0321 21:05:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 21:05:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:05:13.409793  543705 cpu.go:282] Add success.
I0321 21:05:13.409795  543705 memory.go:191] Add success.
W0321 21:05:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:05:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:05:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:05:13.420065  543705 net.go:648] Add success.
I0321 21:05:13.422893  543705 net.go:770] primary dev: ETH0
I0321 21:05:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:05:13.422925  543705 net.go:698] Add success.
I0321 21:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:05:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:05:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 21:05:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:05:14.456590  543705 disk_worker.go:494] system disk:vda1
I0321 21:05:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:05:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:05:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:05:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:05:23.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:05:23.409881  543705 memory.go:184] no items to output this cycle
I0321 21:05:23.409899  543705 cpu.go:275] no items to output this cycle
I0321 21:05:31.430839  543705 disk_info.go:125] begin check local disk info of client
I0321 21:05:31.433416  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:05:31.433422  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f40 0xc0000c4f80]
E0321 21:05:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:05:33.409801  543705 memory.go:184] no items to output this cycle
I0321 21:05:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 21:05:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:05:43.409798  543705 memory.go:191] Add success.
I0321 21:05:43.409801  543705 cpu.go:282] Add success.
I0321 21:05:43.420058  543705 net.go:648] Add success.
I0321 21:05:43.422967  543705 net.go:770] primary dev: ETH0
I0321 21:05:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:05:43.422992  543705 net.go:698] Add success.
I0321 21:05:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:05:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:05:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:05:53.409799  543705 memory.go:184] no items to output this cycle
I0321 21:05:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 21:06:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:06:03.409810  543705 memory.go:184] no items to output this cycle
I0321 21:06:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 21:06:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:06:13.409811  543705 memory.go:191] Add success.
I0321 21:06:13.409811  543705 cpu.go:282] Add success.
W0321 21:06:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:06:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:06:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:06:13.420125  543705 net.go:648] Add success.
I0321 21:06:13.423057  543705 net.go:770] primary dev: ETH0
I0321 21:06:13.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:06:13.423082  543705 net.go:698] Add success.
I0321 21:06:13.469351  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"21a3ac35-2019-4023-b61f-5b4319601ffb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:06:13.469385  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:06:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:06:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0321 21:06:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:06:14.456499  543705 disk_worker.go:494] system disk:vda1
I0321 21:06:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:06:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:06:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:06:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:06:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:06:23.409783  543705 memory.go:184] no items to output this cycle
I0321 21:06:23.409853  543705 cpu.go:275] no items to output this cycle
I0321 21:06:31.433856  543705 disk_info.go:125] begin check local disk info of client
I0321 21:06:31.436415  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:06:31.436422  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abcc0 0xc0001abd00]
E0321 21:06:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:06:33.409795  543705 memory.go:184] no items to output this cycle
I0321 21:06:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 21:06:39.173732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:06:39.173739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:06:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:06:43.410652  543705 memory.go:191] Add success.
I0321 21:06:43.409789  543705 cpu.go:282] Add success.
I0321 21:06:43.420443  543705 net.go:648] Add success.
I0321 21:06:43.423149  543705 net.go:770] primary dev: ETH0
I0321 21:06:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:06:43.423174  543705 net.go:698] Add success.
I0321 21:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:06:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:06:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:06:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:06:53.409769  543705 memory.go:184] no items to output this cycle
I0321 21:06:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 21:07:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:07:03.409776  543705 memory.go:184] no items to output this cycle
I0321 21:07:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:07:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:07:13.409793  543705 memory.go:191] Add success.
I0321 21:07:13.409794  543705 cpu.go:282] Add success.
W0321 21:07:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:07:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:07:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:07:13.420136  543705 net.go:648] Add success.
I0321 21:07:13.422957  543705 net.go:770] primary dev: ETH0
I0321 21:07:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:07:13.422989  543705 net.go:698] Add success.
I0321 21:07:13.453590  543705 event_worker.go:152] Polling the log file for events...
W0321 21:07:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:07:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 21:07:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:07:14.456810  543705 disk_worker.go:494] system disk:vda1
I0321 21:07:14.456849  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:07:14.457043  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:07:14.457051  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:07:14.457057  543705 custom_config.go:64] query custom config with name: gpu
E0321 21:07:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:07:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:07:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:07:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:07:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:07:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:07:16.472338  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:07:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:07:23.409826  543705 memory.go:184] no items to output this cycle
I0321 21:07:23.409832  543705 cpu.go:275] no items to output this cycle
I0321 21:07:31.436886  543705 disk_info.go:125] begin check local disk info of client
I0321 21:07:31.439474  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:07:31.439481  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa180 0xc0001aa1c0]
E0321 21:07:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:07:33.409791  543705 memory.go:184] no items to output this cycle
I0321 21:07:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 21:07:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:07:43.409820  543705 memory.go:191] Add success.
I0321 21:07:43.409827  543705 cpu.go:282] Add success.
I0321 21:07:43.419999  543705 net.go:648] Add success.
I0321 21:07:43.422630  543705 net.go:770] primary dev: ETH0
I0321 21:07:43.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:07:43.422659  543705 net.go:698] Add success.
I0321 21:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:07:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:07:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:07:53.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:07:53.409877  543705 memory.go:184] no items to output this cycle
I0321 21:07:53.410098  543705 cpu.go:275] no items to output this cycle
E0321 21:08:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:08:03.409781  543705 memory.go:184] no items to output this cycle
I0321 21:08:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 21:08:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:08:13.409785  543705 memory.go:191] Add success.
I0321 21:08:13.409811  543705 cpu.go:282] Add success.
W0321 21:08:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:08:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:08:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:08:13.420054  543705 net.go:648] Add success.
I0321 21:08:13.422694  543705 net.go:770] primary dev: ETH0
I0321 21:08:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:08:13.422724  543705 net.go:698] Add success.
I0321 21:08:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:08:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:08:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 21:08:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:08:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 21:08:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:08:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:08:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:08:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:08:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:08:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:08:23.409784  543705 memory.go:184] no items to output this cycle
I0321 21:08:23.409846  543705 cpu.go:275] no items to output this cycle
I0321 21:08:31.439886  543705 disk_info.go:125] begin check local disk info of client
I0321 21:08:31.442439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:08:31.442446  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348ac0 0xc000348b00]
E0321 21:08:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:08:33.409795  543705 memory.go:184] no items to output this cycle
I0321 21:08:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 21:08:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:08:43.409782  543705 memory.go:191] Add success.
I0321 21:08:43.409803  543705 cpu.go:282] Add success.
I0321 21:08:43.419944  543705 net.go:648] Add success.
I0321 21:08:43.422677  543705 net.go:770] primary dev: ETH0
I0321 21:08:43.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:08:43.422722  543705 net.go:698] Add success.
I0321 21:08:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:08:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:08:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:08:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:08:53.409783  543705 memory.go:184] no items to output this cycle
I0321 21:08:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 21:09:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:09:03.409779  543705 memory.go:184] no items to output this cycle
I0321 21:09:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:09:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:09:13.409790  543705 cpu.go:282] Add success.
I0321 21:09:13.409799  543705 memory.go:191] Add success.
W0321 21:09:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:09:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:09:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:09:13.420034  543705 net.go:648] Add success.
I0321 21:09:13.422642  543705 net.go:770] primary dev: ETH0
I0321 21:09:13.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:09:13.422671  543705 net.go:698] Add success.
I0321 21:09:13.469360  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"23424237-54e8-4520-abd1-4a5309ff47f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:09:13.469392  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:09:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:09:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:09:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 21:09:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:09:14.456527  543705 disk_worker.go:494] system disk:vda1
I0321 21:09:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:09:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:09:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:09:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:09:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:09:16.472470  543705 disk_local_worker.go:436] Get disk info: []
I0321 21:09:23.409794  543705 cpu.go:275] no items to output this cycle
E0321 21:09:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:09:23.409822  543705 memory.go:184] no items to output this cycle
I0321 21:09:31.442909  543705 disk_info.go:125] begin check local disk info of client
I0321 21:09:31.445450  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:09:31.445457  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1000 0xc0003e1040]
E0321 21:09:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:09:33.409791  543705 memory.go:184] no items to output this cycle
I0321 21:09:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 21:09:39.177574  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:09:39.177582  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:09:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:09:43.410572  543705 memory.go:191] Add success.
I0321 21:09:43.409822  543705 cpu.go:282] Add success.
I0321 21:09:43.420443  543705 net.go:648] Add success.
I0321 21:09:43.423280  543705 net.go:770] primary dev: ETH0
I0321 21:09:43.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:09:43.423305  543705 net.go:698] Add success.
I0321 21:09:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:09:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:09:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:09:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:09:53.409793  543705 memory.go:184] no items to output this cycle
I0321 21:09:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 21:10:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:10:03.409791  543705 memory.go:184] no items to output this cycle
I0321 21:10:03.409804  543705 cpu.go:275] no items to output this cycle
E0321 21:10:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:10:13.409790  543705 memory.go:191] Add success.
I0321 21:10:13.409792  543705 cpu.go:282] Add success.
W0321 21:10:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:10:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:10:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:10:13.420053  543705 net.go:648] Add success.
I0321 21:10:13.422720  543705 net.go:770] primary dev: ETH0
I0321 21:10:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:10:13.422747  543705 net.go:698] Add success.
I0321 21:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:10:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:10:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0321 21:10:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:10:14.456477  543705 disk_worker.go:494] system disk:vda1
I0321 21:10:14.456522  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:10:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:10:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:10:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:10:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:10:16.472400  543705 disk_local_worker.go:436] Get disk info: []
I0321 21:10:23.409806  543705 cpu.go:275] no items to output this cycle
E0321 21:10:23.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:10:23.409827  543705 memory.go:184] no items to output this cycle
I0321 21:10:31.445925  543705 disk_info.go:125] begin check local disk info of client
I0321 21:10:31.448536  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:10:31.448545  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472d00 0xc000472d40]
E0321 21:10:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:10:33.409779  543705 memory.go:184] no items to output this cycle
I0321 21:10:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 21:10:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:10:43.409820  543705 memory.go:191] Add success.
I0321 21:10:43.409821  543705 cpu.go:282] Add success.
I0321 21:10:43.419714  543705 net.go:648] Add success.
I0321 21:10:43.422561  543705 net.go:770] primary dev: ETH0
I0321 21:10:43.422573  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:10:43.422586  543705 net.go:698] Add success.
I0321 21:10:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:10:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:10:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:10:53.409800  543705 memory.go:184] no items to output this cycle
I0321 21:10:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 21:11:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:11:03.409778  543705 memory.go:184] no items to output this cycle
I0321 21:11:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:11:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:11:13.409786  543705 cpu.go:282] Add success.
I0321 21:11:13.409793  543705 memory.go:191] Add success.
W0321 21:11:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:11:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:11:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:11:13.420046  543705 net.go:648] Add success.
I0321 21:11:13.422786  543705 net.go:770] primary dev: ETH0
I0321 21:11:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:11:13.422811  543705 net.go:698] Add success.
I0321 21:11:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:11:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:11:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 21:11:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:11:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 21:11:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:11:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:11:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:11:16.472378  543705 disk_local_worker.go:436] Get disk info: []
I0321 21:11:23.409802  543705 cpu.go:275] no items to output this cycle
E0321 21:11:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:11:23.409828  543705 memory.go:184] no items to output this cycle
I0321 21:11:31.448939  543705 disk_info.go:125] begin check local disk info of client
I0321 21:11:31.451511  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:11:31.451517  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ae740 0xc0002ae780]
E0321 21:11:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:11:33.409796  543705 memory.go:184] no items to output this cycle
I0321 21:11:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 21:11:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:11:43.409816  543705 memory.go:191] Add success.
I0321 21:11:43.409819  543705 cpu.go:282] Add success.
I0321 21:11:43.419714  543705 net.go:648] Add success.
I0321 21:11:43.422239  543705 net.go:770] primary dev: ETH0
I0321 21:11:43.422253  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:11:43.422264  543705 net.go:698] Add success.
I0321 21:11:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:11:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:11:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:11:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:11:53.409780  543705 memory.go:184] no items to output this cycle
I0321 21:11:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 21:12:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:12:03.409763  543705 memory.go:184] no items to output this cycle
I0321 21:12:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 21:12:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:12:13.409785  543705 memory.go:191] Add success.
I0321 21:12:13.409802  543705 cpu.go:282] Add success.
W0321 21:12:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:12:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:12:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:12:13.420111  543705 net.go:648] Add success.
I0321 21:12:13.422846  543705 net.go:770] primary dev: ETH0
I0321 21:12:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:12:13.422872  543705 net.go:698] Add success.
I0321 21:12:13.470792  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a44f5eac-589f-4d1b-9269-d2a250b83ec4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:12:13.470825  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 21:12:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:12:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 21:12:14.455230  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:12:14.456115  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:12:14.456124  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:12:14.456130  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:12:14.456944  543705 disk_worker.go:494] system disk:vda1
I0321 21:12:14.456975  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:12:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:12:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:12:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:12:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:12:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:12:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:12:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:12:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:12:23.409772  543705 memory.go:184] no items to output this cycle
I0321 21:12:23.409843  543705 cpu.go:275] no items to output this cycle
I0321 21:12:31.451954  543705 disk_info.go:125] begin check local disk info of client
I0321 21:12:31.454564  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:12:31.454571  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3540 0xc0003f3580]
E0321 21:12:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:12:33.409766  543705 memory.go:184] no items to output this cycle
I0321 21:12:33.409797  543705 cpu.go:275] no items to output this cycle
I0321 21:12:39.177733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:12:39.177740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:12:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:12:43.410718  543705 memory.go:191] Add success.
I0321 21:12:43.409817  543705 cpu.go:282] Add success.
I0321 21:12:43.419757  543705 net.go:648] Add success.
I0321 21:12:43.422544  543705 net.go:770] primary dev: ETH0
I0321 21:12:43.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:12:43.422574  543705 net.go:698] Add success.
I0321 21:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:12:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:12:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:12:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:12:53.409782  543705 cpu.go:275] no items to output this cycle
I0321 21:12:53.409787  543705 memory.go:184] no items to output this cycle
E0321 21:13:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:13:03.409800  543705 memory.go:184] no items to output this cycle
I0321 21:13:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 21:13:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:13:13.409789  543705 memory.go:191] Add success.
I0321 21:13:13.409808  543705 cpu.go:282] Add success.
W0321 21:13:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:13:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:13:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:13:13.420058  543705 net.go:648] Add success.
I0321 21:13:13.422892  543705 net.go:770] primary dev: ETH0
I0321 21:13:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:13:13.422916  543705 net.go:698] Add success.
I0321 21:13:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:13:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:13:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 21:13:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:13:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 21:13:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:13:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:13:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:13:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:13:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:13:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:13:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:13:23.409789  543705 memory.go:184] no items to output this cycle
I0321 21:13:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 21:13:31.454960  543705 disk_info.go:125] begin check local disk info of client
I0321 21:13:31.457519  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:13:31.457526  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee980 0xc0003ee9c0]
E0321 21:13:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:13:33.409781  543705 memory.go:184] no items to output this cycle
I0321 21:13:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 21:13:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:13:43.409917  543705 memory.go:191] Add success.
I0321 21:13:43.409942  543705 cpu.go:282] Add success.
I0321 21:13:43.419737  543705 net.go:648] Add success.
I0321 21:13:43.422455  543705 net.go:770] primary dev: ETH0
I0321 21:13:43.422468  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:13:43.422479  543705 net.go:698] Add success.
I0321 21:13:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:13:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:13:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:13:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:13:53.409779  543705 memory.go:184] no items to output this cycle
I0321 21:13:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 21:14:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:14:03.409794  543705 memory.go:184] no items to output this cycle
I0321 21:14:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 21:14:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:14:13.409786  543705 memory.go:191] Add success.
I0321 21:14:13.409810  543705 cpu.go:282] Add success.
W0321 21:14:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:14:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:14:13.420128  543705 net.go:648] Add success.
I0321 21:14:13.422835  543705 net.go:770] primary dev: ETH0
I0321 21:14:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:14:13.422863  543705 net.go:698] Add success.
I0321 21:14:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:14:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:14:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 21:14:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:14:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 21:14:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:14:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:14:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:14:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:14:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:14:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:14:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:14:23.409807  543705 memory.go:184] no items to output this cycle
I0321 21:14:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 21:14:31.457985  543705 disk_info.go:125] begin check local disk info of client
I0321 21:14:31.460584  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:14:31.460591  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0321 21:14:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:14:33.409802  543705 memory.go:184] no items to output this cycle
I0321 21:14:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 21:14:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:14:43.409905  543705 memory.go:191] Add success.
I0321 21:14:43.409948  543705 cpu.go:282] Add success.
I0321 21:14:43.419731  543705 net.go:648] Add success.
I0321 21:14:43.422611  543705 net.go:770] primary dev: ETH0
I0321 21:14:43.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:14:43.422635  543705 net.go:698] Add success.
I0321 21:14:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:14:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:14:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:14:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:14:53.409763  543705 memory.go:184] no items to output this cycle
I0321 21:14:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 21:15:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:15:03.409775  543705 memory.go:184] no items to output this cycle
I0321 21:15:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 21:15:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:15:13.409778  543705 memory.go:191] Add success.
W0321 21:15:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 21:15:13.409808  543705 cpu.go:282] Add success.
W0321 21:15:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:15:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:15:13.420105  543705 net.go:648] Add success.
I0321 21:15:13.422802  543705 net.go:770] primary dev: ETH0
I0321 21:15:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:15:13.422826  543705 net.go:698] Add success.
I0321 21:15:13.469166  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3c410d18-ec59-4da0-93b3-c417df9bd390","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:15:13.469197  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:15:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:15:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 21:15:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:15:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 21:15:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:15:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:15:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:15:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:15:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:15:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:15:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 21:15:23.409802  543705 memory.go:184] no items to output this cycle
I0321 21:15:31.461010  543705 disk_info.go:125] begin check local disk info of client
I0321 21:15:31.463568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:15:31.463574  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5200 0xc0000c5240]
E0321 21:15:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:15:33.409790  543705 memory.go:184] no items to output this cycle
I0321 21:15:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 21:15:39.181595  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:15:39.181603  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:15:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:15:43.410573  543705 memory.go:191] Add success.
I0321 21:15:43.409802  543705 cpu.go:282] Add success.
I0321 21:15:43.420385  543705 net.go:648] Add success.
I0321 21:15:43.422956  543705 net.go:770] primary dev: ETH0
I0321 21:15:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:15:43.422981  543705 net.go:698] Add success.
I0321 21:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:15:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:15:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:15:53.409777  543705 memory.go:184] no items to output this cycle
I0321 21:15:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 21:16:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:16:03.409784  543705 memory.go:184] no items to output this cycle
I0321 21:16:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 21:16:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:16:13.409812  543705 memory.go:191] Add success.
I0321 21:16:13.409820  543705 cpu.go:282] Add success.
W0321 21:16:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:16:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:16:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:16:13.420067  543705 net.go:648] Add success.
I0321 21:16:13.423020  543705 net.go:770] primary dev: ETH0
I0321 21:16:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:16:13.423045  543705 net.go:698] Add success.
I0321 21:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:16:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:16:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0321 21:16:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:16:14.456607  543705 disk_worker.go:494] system disk:vda1
I0321 21:16:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:16:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:16:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:16:23.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:16:23.410264  543705 memory.go:184] no items to output this cycle
I0321 21:16:23.410286  543705 cpu.go:275] no items to output this cycle
I0321 21:16:31.464017  543705 disk_info.go:125] begin check local disk info of client
I0321 21:16:31.466588  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:16:31.466594  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fe600 0xc0004fe640]
E0321 21:16:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:16:33.409793  543705 memory.go:184] no items to output this cycle
I0321 21:16:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 21:16:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:16:43.409790  543705 memory.go:191] Add success.
I0321 21:16:43.409816  543705 cpu.go:282] Add success.
I0321 21:16:43.420051  543705 net.go:648] Add success.
I0321 21:16:43.422992  543705 net.go:770] primary dev: ETH0
I0321 21:16:43.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:16:43.423019  543705 net.go:698] Add success.
I0321 21:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:16:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:16:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:16:53.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:16:53.409978  543705 cpu.go:275] no items to output this cycle
I0321 21:16:53.409981  543705 memory.go:184] no items to output this cycle
E0321 21:17:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:17:03.409793  543705 memory.go:184] no items to output this cycle
I0321 21:17:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 21:17:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:17:13.409779  543705 memory.go:191] Add success.
I0321 21:17:13.409802  543705 cpu.go:282] Add success.
W0321 21:17:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:17:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:17:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:17:13.420052  543705 net.go:648] Add success.
I0321 21:17:13.423094  543705 net.go:770] primary dev: ETH0
I0321 21:17:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:17:13.423118  543705 net.go:698] Add success.
I0321 21:17:13.453667  543705 event_worker.go:152] Polling the log file for events...
W0321 21:17:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:17:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 21:17:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:17:14.456915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:17:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:17:14.456931  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:17:14.456996  543705 disk_worker.go:494] system disk:vda1
I0321 21:17:14.457023  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:17:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:17:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:17:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:17:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:17:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:17:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:17:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:17:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:17:23.409809  543705 memory.go:184] no items to output this cycle
I0321 21:17:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 21:17:31.467022  543705 disk_info.go:125] begin check local disk info of client
I0321 21:17:31.469622  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:17:31.469628  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab200 0xc0001ab240]
E0321 21:17:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:17:33.409787  543705 memory.go:184] no items to output this cycle
I0321 21:17:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 21:17:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:17:43.409813  543705 memory.go:191] Add success.
I0321 21:17:43.409819  543705 cpu.go:282] Add success.
I0321 21:17:43.419883  543705 net.go:648] Add success.
I0321 21:17:43.422951  543705 net.go:770] primary dev: ETH0
I0321 21:17:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:17:43.422976  543705 net.go:698] Add success.
I0321 21:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:17:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:17:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:17:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:17:53.409796  543705 memory.go:184] no items to output this cycle
I0321 21:17:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 21:18:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:18:03.409784  543705 cpu.go:275] no items to output this cycle
I0321 21:18:03.409792  543705 memory.go:184] no items to output this cycle
E0321 21:18:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:18:13.409803  543705 memory.go:191] Add success.
I0321 21:18:13.409815  543705 cpu.go:282] Add success.
W0321 21:18:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:18:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:18:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:18:13.420069  543705 net.go:648] Add success.
I0321 21:18:13.422917  543705 net.go:770] primary dev: ETH0
I0321 21:18:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:18:13.422942  543705 net.go:698] Add success.
I0321 21:18:13.476597  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c6f89055-27aa-4967-b292-5ab40da1190c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:18:13.476630  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:18:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:18:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:18:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0321 21:18:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:18:14.456613  543705 disk_worker.go:494] system disk:vda1
I0321 21:18:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:18:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:18:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:18:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:18:23.409781  543705 memory.go:184] no items to output this cycle
I0321 21:18:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 21:18:31.470045  543705 disk_info.go:125] begin check local disk info of client
I0321 21:18:31.472638  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:18:31.472645  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354c00 0xc000354c40]
E0321 21:18:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:18:33.409764  543705 memory.go:184] no items to output this cycle
I0321 21:18:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 21:18:39.181737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:18:39.181745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:18:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:18:43.410626  543705 memory.go:191] Add success.
I0321 21:18:43.409802  543705 cpu.go:282] Add success.
I0321 21:18:43.420320  543705 net.go:648] Add success.
I0321 21:18:43.422840  543705 net.go:770] primary dev: ETH0
I0321 21:18:43.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:18:43.422959  543705 net.go:698] Add success.
I0321 21:18:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:18:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:18:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:18:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:18:53.409770  543705 memory.go:184] no items to output this cycle
I0321 21:18:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 21:19:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:19:03.409781  543705 memory.go:184] no items to output this cycle
I0321 21:19:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 21:19:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:19:13.409784  543705 cpu.go:282] Add success.
I0321 21:19:13.409795  543705 memory.go:191] Add success.
W0321 21:19:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:19:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:19:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:19:13.420398  543705 net.go:648] Add success.
I0321 21:19:13.423352  543705 net.go:770] primary dev: ETH0
I0321 21:19:13.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:19:13.423376  543705 net.go:698] Add success.
I0321 21:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:19:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:19:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 21:19:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:19:14.456584  543705 disk_worker.go:494] system disk:vda1
I0321 21:19:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:19:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:19:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:19:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:19:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:19:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:19:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:19:23.409783  543705 memory.go:184] no items to output this cycle
I0321 21:19:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 21:19:31.473060  543705 disk_info.go:125] begin check local disk info of client
I0321 21:19:31.475626  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:19:31.475632  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
E0321 21:19:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:19:33.409794  543705 memory.go:184] no items to output this cycle
I0321 21:19:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 21:19:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:19:43.409794  543705 cpu.go:282] Add success.
I0321 21:19:43.409802  543705 memory.go:191] Add success.
I0321 21:19:43.419998  543705 net.go:648] Add success.
I0321 21:19:43.423178  543705 net.go:770] primary dev: ETH0
I0321 21:19:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:19:43.423203  543705 net.go:698] Add success.
I0321 21:19:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:19:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:19:53.409777  543705 memory.go:184] no items to output this cycle
I0321 21:19:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:20:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:20:03.409777  543705 cpu.go:275] no items to output this cycle
I0321 21:20:03.409783  543705 memory.go:184] no items to output this cycle
E0321 21:20:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:20:13.409790  543705 memory.go:191] Add success.
I0321 21:20:13.409798  543705 cpu.go:282] Add success.
W0321 21:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:20:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:20:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:20:13.420009  543705 net.go:648] Add success.
I0321 21:20:13.422815  543705 net.go:770] primary dev: ETH0
I0321 21:20:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:20:13.422839  543705 net.go:698] Add success.
I0321 21:20:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:20:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:20:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 21:20:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:20:14.456588  543705 disk_worker.go:494] system disk:vda1
I0321 21:20:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:20:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:20:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:20:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:20:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:20:23.409784  543705 memory.go:184] no items to output this cycle
I0321 21:20:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 21:20:31.476074  543705 disk_info.go:125] begin check local disk info of client
I0321 21:20:31.478650  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:20:31.478657  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0321 21:20:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:20:33.409796  543705 memory.go:184] no items to output this cycle
I0321 21:20:33.409811  543705 cpu.go:275] no items to output this cycle
E0321 21:20:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:20:43.409779  543705 memory.go:191] Add success.
I0321 21:20:43.409799  543705 cpu.go:282] Add success.
I0321 21:20:43.419872  543705 net.go:648] Add success.
I0321 21:20:43.422619  543705 net.go:770] primary dev: ETH0
I0321 21:20:43.422633  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:20:43.422646  543705 net.go:698] Add success.
I0321 21:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:20:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:20:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:20:53.409791  543705 memory.go:184] no items to output this cycle
I0321 21:20:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 21:21:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:21:03.409778  543705 memory.go:184] no items to output this cycle
I0321 21:21:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 21:21:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:21:13.409781  543705 memory.go:191] Add success.
I0321 21:21:13.409800  543705 cpu.go:282] Add success.
W0321 21:21:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:21:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:21:13.420080  543705 net.go:648] Add success.
I0321 21:21:13.422959  543705 net.go:770] primary dev: ETH0
I0321 21:21:13.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:21:13.422992  543705 net.go:698] Add success.
I0321 21:21:13.471282  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"632cfa79-9658-48c7-8623-d21217e764c4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:21:13.471317  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:21:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:21:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:21:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 21:21:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:21:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 21:21:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:21:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:21:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:21:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:21:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:21:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:21:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:21:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 21:21:23.409792  543705 memory.go:184] no items to output this cycle
I0321 21:21:31.479091  543705 disk_info.go:125] begin check local disk info of client
I0321 21:21:31.481788  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:21:31.481796  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349600 0xc000349640]
E0321 21:21:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:21:33.409782  543705 memory.go:184] no items to output this cycle
I0321 21:21:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 21:21:39.181884  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:21:39.181890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:21:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:21:43.410562  543705 memory.go:191] Add success.
I0321 21:21:43.409793  543705 cpu.go:282] Add success.
I0321 21:21:43.420326  543705 net.go:648] Add success.
I0321 21:21:43.423087  543705 net.go:770] primary dev: ETH0
I0321 21:21:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:21:43.423112  543705 net.go:698] Add success.
I0321 21:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:21:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:21:53.410648  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:21:53.410662  543705 memory.go:184] no items to output this cycle
I0321 21:21:53.410668  543705 cpu.go:275] no items to output this cycle
E0321 21:22:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:22:03.409787  543705 cpu.go:275] no items to output this cycle
I0321 21:22:03.409790  543705 memory.go:184] no items to output this cycle
E0321 21:22:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:22:13.409785  543705 memory.go:191] Add success.
I0321 21:22:13.409800  543705 cpu.go:282] Add success.
W0321 21:22:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:22:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:22:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:22:13.420116  543705 net.go:648] Add success.
I0321 21:22:13.423393  543705 net.go:770] primary dev: ETH0
I0321 21:22:13.423406  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:22:13.423418  543705 net.go:698] Add success.
W0321 21:22:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:22:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 21:22:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:22:14.456793  543705 disk_worker.go:494] system disk:vda1
I0321 21:22:14.456832  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:22:14.457155  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:22:14.457163  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:22:14.457168  543705 custom_config.go:64] query custom config with name: gpu
E0321 21:22:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:22:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:22:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:22:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:22:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:22:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:22:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:22:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:22:23.409780  543705 memory.go:184] no items to output this cycle
I0321 21:22:23.409792  543705 cpu.go:275] no items to output this cycle
I0321 21:22:31.482061  543705 disk_info.go:125] begin check local disk info of client
I0321 21:22:31.484512  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:22:31.484519  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003496c0 0xc000349700]
E0321 21:22:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:22:33.409805  543705 memory.go:184] no items to output this cycle
I0321 21:22:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 21:22:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:22:43.409811  543705 memory.go:191] Add success.
I0321 21:22:43.409812  543705 cpu.go:282] Add success.
I0321 21:22:43.420266  543705 net.go:648] Add success.
I0321 21:22:43.422942  543705 net.go:770] primary dev: ETH0
I0321 21:22:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:22:43.422967  543705 net.go:698] Add success.
I0321 21:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:22:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:22:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:22:53.409786  543705 memory.go:184] no items to output this cycle
I0321 21:22:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 21:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:23:03.409771  543705 memory.go:184] no items to output this cycle
I0321 21:23:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 21:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:23:13.409809  543705 memory.go:191] Add success.
I0321 21:23:13.409817  543705 cpu.go:282] Add success.
W0321 21:23:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:23:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:23:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:23:13.420214  543705 net.go:648] Add success.
I0321 21:23:13.422821  543705 net.go:770] primary dev: ETH0
I0321 21:23:13.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:23:13.422845  543705 net.go:698] Add success.
I0321 21:23:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:23:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:23:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 21:23:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:23:14.456516  543705 disk_worker.go:494] system disk:vda1
I0321 21:23:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:23:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:23:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:23:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:23:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:23:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:23:23.409810  543705 memory.go:184] no items to output this cycle
I0321 21:23:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 21:23:31.485121  543705 disk_info.go:125] begin check local disk info of client
I0321 21:23:31.487628  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:23:31.487635  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9640 0xc0004d9680]
E0321 21:23:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:23:33.409792  543705 memory.go:184] no items to output this cycle
I0321 21:23:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 21:23:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:23:43.409779  543705 memory.go:191] Add success.
I0321 21:23:43.409802  543705 cpu.go:282] Add success.
I0321 21:23:43.420144  543705 net.go:648] Add success.
I0321 21:23:43.422927  543705 net.go:770] primary dev: ETH0
I0321 21:23:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:23:43.422951  543705 net.go:698] Add success.
I0321 21:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:23:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:23:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:23:53.409775  543705 cpu.go:275] no items to output this cycle
I0321 21:23:53.409784  543705 memory.go:184] no items to output this cycle
E0321 21:24:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:24:03.409766  543705 memory.go:184] no items to output this cycle
I0321 21:24:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 21:24:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:24:13.409784  543705 memory.go:191] Add success.
I0321 21:24:13.409804  543705 cpu.go:282] Add success.
W0321 21:24:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:24:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:24:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:24:13.420043  543705 net.go:648] Add success.
I0321 21:24:13.423051  543705 net.go:770] primary dev: ETH0
I0321 21:24:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:24:13.423076  543705 net.go:698] Add success.
I0321 21:24:13.966307  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ff7b11c8-24c0-4739-b17d-d59f22cd3071","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:24:13.966344  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:24:14.454685  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:24:14.454907  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:24:14.454920  543705 disk_worker.go:708] disk space is not compliant
W0321 21:24:14.454924  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:24:14.456431  543705 disk_worker.go:494] system disk:vda1
I0321 21:24:14.456468  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:24:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:24:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:24:16.458104  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:24:16.472600  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:24:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:24:23.409780  543705 memory.go:184] no items to output this cycle
I0321 21:24:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 21:24:31.487773  543705 disk_info.go:125] begin check local disk info of client
I0321 21:24:31.490349  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:24:31.490357  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8180 0xc0004d81c0]
E0321 21:24:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:24:33.409808  543705 memory.go:184] no items to output this cycle
I0321 21:24:33.409819  543705 cpu.go:275] no items to output this cycle
I0321 21:24:39.185620  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:24:39.185627  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:24:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:24:43.410692  543705 memory.go:191] Add success.
I0321 21:24:43.409816  543705 cpu.go:282] Add success.
I0321 21:24:43.420461  543705 net.go:648] Add success.
I0321 21:24:43.422983  543705 net.go:770] primary dev: ETH0
I0321 21:24:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:24:43.423013  543705 net.go:698] Add success.
I0321 21:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:24:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:24:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:24:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:24:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 21:24:53.409786  543705 memory.go:184] no items to output this cycle
E0321 21:25:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:25:03.409777  543705 memory.go:184] no items to output this cycle
I0321 21:25:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:25:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:25:13.409793  543705 memory.go:191] Add success.
I0321 21:25:13.409795  543705 cpu.go:282] Add success.
W0321 21:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:25:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:25:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:25:13.420022  543705 net.go:648] Add success.
I0321 21:25:13.422742  543705 net.go:770] primary dev: ETH0
I0321 21:25:13.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:25:13.422767  543705 net.go:698] Add success.
I0321 21:25:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:25:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:25:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 21:25:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:25:14.456551  543705 disk_worker.go:494] system disk:vda1
I0321 21:25:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:25:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:25:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:25:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:25:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:25:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:25:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:25:23.409807  543705 memory.go:184] no items to output this cycle
I0321 21:25:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 21:25:31.491150  543705 disk_info.go:125] begin check local disk info of client
I0321 21:25:31.493765  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:25:31.493772  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad1c0 0xc0003ad200]
E0321 21:25:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:25:33.409761  543705 memory.go:184] no items to output this cycle
I0321 21:25:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 21:25:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:25:43.409820  543705 memory.go:191] Add success.
I0321 21:25:43.409828  543705 cpu.go:282] Add success.
I0321 21:25:43.419875  543705 net.go:648] Add success.
I0321 21:25:43.422758  543705 net.go:770] primary dev: ETH0
I0321 21:25:43.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:25:43.422811  543705 net.go:698] Add success.
I0321 21:25:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:25:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:25:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:25:53.410379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:25:53.410397  543705 memory.go:184] no items to output this cycle
I0321 21:25:53.410428  543705 cpu.go:275] no items to output this cycle
E0321 21:26:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:26:03.409791  543705 memory.go:184] no items to output this cycle
I0321 21:26:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 21:26:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:26:13.409818  543705 memory.go:191] Add success.
I0321 21:26:13.409826  543705 cpu.go:282] Add success.
W0321 21:26:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:26:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:26:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:26:13.420137  543705 net.go:648] Add success.
I0321 21:26:13.423226  543705 net.go:770] primary dev: ETH0
I0321 21:26:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:26:13.423251  543705 net.go:698] Add success.
I0321 21:26:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:26:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:26:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 21:26:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:26:14.456658  543705 disk_worker.go:494] system disk:vda1
I0321 21:26:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:26:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:26:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:26:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:26:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:26:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:26:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:26:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 21:26:23.409806  543705 memory.go:184] no items to output this cycle
I0321 21:26:31.494110  543705 disk_info.go:125] begin check local disk info of client
I0321 21:26:31.496605  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:26:31.496611  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462ac0 0xc000462b00]
E0321 21:26:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:26:33.409802  543705 memory.go:184] no items to output this cycle
I0321 21:26:33.409827  543705 cpu.go:275] no items to output this cycle
E0321 21:26:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:26:43.409805  543705 memory.go:191] Add success.
I0321 21:26:43.409815  543705 cpu.go:282] Add success.
I0321 21:26:43.419884  543705 net.go:648] Add success.
I0321 21:26:43.422393  543705 net.go:770] primary dev: ETH0
I0321 21:26:43.422405  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:26:43.422418  543705 net.go:698] Add success.
I0321 21:26:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:26:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:26:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:26:53.409792  543705 memory.go:184] no items to output this cycle
I0321 21:26:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 21:27:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:27:03.409791  543705 memory.go:184] no items to output this cycle
I0321 21:27:03.409814  543705 cpu.go:275] no items to output this cycle
E0321 21:27:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:27:13.409808  543705 memory.go:191] Add success.
I0321 21:27:13.409821  543705 cpu.go:282] Add success.
W0321 21:27:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:27:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:27:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:27:13.420198  543705 net.go:648] Add success.
I0321 21:27:13.428787  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 21:27:13.428871  543705 net.go:770] primary dev: ETH0
I0321 21:27:13.428883  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:27:13.428895  543705 net.go:698] Add success.
I0321 21:27:13.452772  543705 event_worker.go:152] Polling the log file for events...
I0321 21:27:13.463962  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c0bb5626-ce89-4c8e-b481-4aa6789225a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:27:13.463994  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 21:27:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:27:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 21:27:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:27:14.455918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:27:14.455927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:27:14.455932  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:27:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 21:27:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:27:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:27:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:27:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:27:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:27:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:27:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:27:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:27:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:27:23.409807  543705 memory.go:184] no items to output this cycle
I0321 21:27:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 21:27:31.497188  543705 disk_info.go:125] begin check local disk info of client
I0321 21:27:31.499729  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:27:31.499736  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8d00 0xc0004d8d40]
E0321 21:27:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:27:33.409791  543705 memory.go:184] no items to output this cycle
I0321 21:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 21:27:39.185732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:27:39.185739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:27:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:27:43.410551  543705 memory.go:191] Add success.
I0321 21:27:43.409798  543705 cpu.go:282] Add success.
I0321 21:27:43.420248  543705 net.go:648] Add success.
I0321 21:27:43.422803  543705 net.go:770] primary dev: ETH0
I0321 21:27:43.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:27:43.422833  543705 net.go:698] Add success.
I0321 21:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:27:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:27:53.409795  543705 memory.go:184] no items to output this cycle
I0321 21:27:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 21:28:03.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:28:03.409887  543705 memory.go:184] no items to output this cycle
I0321 21:28:03.409996  543705 cpu.go:275] no items to output this cycle
E0321 21:28:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:28:13.409789  543705 memory.go:191] Add success.
I0321 21:28:13.409809  543705 cpu.go:282] Add success.
W0321 21:28:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:28:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:28:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:28:13.420069  543705 net.go:648] Add success.
I0321 21:28:13.423101  543705 net.go:770] primary dev: ETH0
I0321 21:28:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:28:13.423126  543705 net.go:698] Add success.
I0321 21:28:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:28:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:28:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 21:28:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:28:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 21:28:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:28:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:28:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:28:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:28:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:28:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:28:23.409773  543705 memory.go:184] no items to output this cycle
I0321 21:28:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 21:28:31.500148  543705 disk_info.go:125] begin check local disk info of client
I0321 21:28:31.502691  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:28:31.502698  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8100 0xc0004d8140]
E0321 21:28:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:28:33.409792  543705 memory.go:184] no items to output this cycle
I0321 21:28:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 21:28:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:28:43.409782  543705 memory.go:191] Add success.
I0321 21:28:43.409801  543705 cpu.go:282] Add success.
I0321 21:28:43.419889  543705 net.go:648] Add success.
I0321 21:28:43.422759  543705 net.go:770] primary dev: ETH0
I0321 21:28:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:28:43.422786  543705 net.go:698] Add success.
I0321 21:28:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:28:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:28:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:28:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:28:53.409768  543705 memory.go:184] no items to output this cycle
I0321 21:28:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:29:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:29:03.409794  543705 memory.go:184] no items to output this cycle
I0321 21:29:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 21:29:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:29:13.409818  543705 memory.go:191] Add success.
I0321 21:29:13.409823  543705 cpu.go:282] Add success.
W0321 21:29:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:29:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:29:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:29:13.420079  543705 net.go:648] Add success.
I0321 21:29:13.422701  543705 net.go:770] primary dev: ETH0
I0321 21:29:13.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:29:13.422729  543705 net.go:698] Add success.
I0321 21:29:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:29:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:29:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 21:29:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:29:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 21:29:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:29:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:29:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:29:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:29:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:29:16.472513  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:29:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:29:23.409787  543705 memory.go:184] no items to output this cycle
I0321 21:29:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 21:29:31.503160  543705 disk_info.go:125] begin check local disk info of client
I0321 21:29:31.505749  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:29:31.505755  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f27c0 0xc0003f2800]
E0321 21:29:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:29:33.409788  543705 memory.go:184] no items to output this cycle
I0321 21:29:33.409802  543705 cpu.go:275] no items to output this cycle
E0321 21:29:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:29:43.409812  543705 memory.go:191] Add success.
I0321 21:29:43.409820  543705 cpu.go:282] Add success.
I0321 21:29:43.419871  543705 net.go:648] Add success.
I0321 21:29:43.422463  543705 net.go:770] primary dev: ETH0
I0321 21:29:43.422476  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:29:43.422488  543705 net.go:698] Add success.
I0321 21:29:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:29:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:29:53.410364  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:29:53.410379  543705 memory.go:184] no items to output this cycle
I0321 21:29:53.410395  543705 cpu.go:275] no items to output this cycle
E0321 21:30:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:30:03.409767  543705 memory.go:184] no items to output this cycle
I0321 21:30:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 21:30:13.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:30:13.409899  543705 memory.go:191] Add success.
W0321 21:30:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:30:13.409945  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:30:13.409952  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:30:13.410002  543705 cpu.go:282] Add success.
I0321 21:30:13.419727  543705 net.go:648] Add success.
I0321 21:30:13.422517  543705 net.go:770] primary dev: ETH0
I0321 21:30:13.422529  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:30:13.422541  543705 net.go:698] Add success.
I0321 21:30:13.469809  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"683a160c-c7f0-4a9f-8d7e-6a650d982a48","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:30:13.469840  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:30:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:30:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:30:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0321 21:30:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:30:14.456730  543705 disk_worker.go:494] system disk:vda1
I0321 21:30:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:30:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:30:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:30:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:30:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:30:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:30:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:30:23.409786  543705 memory.go:184] no items to output this cycle
I0321 21:30:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 21:30:31.506177  543705 disk_info.go:125] begin check local disk info of client
I0321 21:30:31.508755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:30:31.508763  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
E0321 21:30:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:30:33.409780  543705 memory.go:184] no items to output this cycle
I0321 21:30:33.409820  543705 cpu.go:275] no items to output this cycle
I0321 21:30:39.189636  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:30:39.189656  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:30:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:30:43.410604  543705 memory.go:191] Add success.
I0321 21:30:43.409808  543705 cpu.go:282] Add success.
I0321 21:30:43.420279  543705 net.go:648] Add success.
I0321 21:30:43.423153  543705 net.go:770] primary dev: ETH0
I0321 21:30:43.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:30:43.423180  543705 net.go:698] Add success.
I0321 21:30:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:30:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:30:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:30:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:30:53.409781  543705 memory.go:184] no items to output this cycle
I0321 21:30:53.409786  543705 cpu.go:275] no items to output this cycle
E0321 21:31:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:31:03.409787  543705 cpu.go:275] no items to output this cycle
I0321 21:31:03.409798  543705 memory.go:184] no items to output this cycle
E0321 21:31:13.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:31:13.409912  543705 memory.go:191] Add success.
W0321 21:31:13.409964  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 21:31:13.409979  543705 cpu.go:282] Add success.
W0321 21:31:13.409983  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:31:13.409988  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:31:13.419705  543705 net.go:648] Add success.
I0321 21:31:13.422506  543705 net.go:770] primary dev: ETH0
I0321 21:31:13.422519  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:31:13.422531  543705 net.go:698] Add success.
I0321 21:31:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:31:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:31:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 21:31:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:31:14.456549  543705 disk_worker.go:494] system disk:vda1
I0321 21:31:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:31:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:31:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:31:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:31:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:31:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:31:23.409811  543705 memory.go:184] no items to output this cycle
I0321 21:31:23.409878  543705 cpu.go:275] no items to output this cycle
I0321 21:31:31.509188  543705 disk_info.go:125] begin check local disk info of client
I0321 21:31:31.511717  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:31:31.511724  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4a80 0xc0003d4ac0]
E0321 21:31:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:31:33.409791  543705 memory.go:184] no items to output this cycle
I0321 21:31:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 21:31:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:31:43.409778  543705 memory.go:191] Add success.
I0321 21:31:43.409799  543705 cpu.go:282] Add success.
I0321 21:31:43.420071  543705 net.go:648] Add success.
I0321 21:31:43.422734  543705 net.go:770] primary dev: ETH0
I0321 21:31:43.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:31:43.422760  543705 net.go:698] Add success.
I0321 21:31:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:31:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:31:53.410670  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:31:53.410684  543705 memory.go:184] no items to output this cycle
I0321 21:31:53.410684  543705 cpu.go:275] no items to output this cycle
E0321 21:32:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:32:03.409772  543705 memory.go:184] no items to output this cycle
I0321 21:32:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 21:32:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:32:13.409807  543705 memory.go:191] Add success.
I0321 21:32:13.409813  543705 cpu.go:282] Add success.
W0321 21:32:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:32:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:32:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:32:13.419756  543705 net.go:648] Add success.
I0321 21:32:13.422512  543705 net.go:770] primary dev: ETH0
I0321 21:32:13.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:32:13.422539  543705 net.go:698] Add success.
W0321 21:32:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:32:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 21:32:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:32:14.456763  543705 disk_worker.go:494] system disk:vda1
I0321 21:32:14.456803  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:32:14.457162  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:32:14.457170  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:32:14.457175  543705 custom_config.go:64] query custom config with name: gpu
E0321 21:32:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:32:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:32:16.457904  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:32:16.457904  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:32:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:32:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:32:16.472293  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:32:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 21:32:23.409792  543705 memory.go:184] no items to output this cycle
I0321 21:32:31.512208  543705 disk_info.go:125] begin check local disk info of client
I0321 21:32:31.514801  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:32:31.514807  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348180 0xc0003481c0]
E0321 21:32:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:32:33.409768  543705 memory.go:184] no items to output this cycle
I0321 21:32:33.409779  543705 cpu.go:275] no items to output this cycle
E0321 21:32:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:32:43.409816  543705 memory.go:191] Add success.
I0321 21:32:43.409819  543705 cpu.go:282] Add success.
I0321 21:32:43.419894  543705 net.go:648] Add success.
I0321 21:32:43.423275  543705 net.go:770] primary dev: ETH0
I0321 21:32:43.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:32:43.423301  543705 net.go:698] Add success.
I0321 21:32:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:32:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:32:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:32:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:32:53.409783  543705 memory.go:184] no items to output this cycle
I0321 21:32:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:33:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:33:03.409770  543705 memory.go:184] no items to output this cycle
I0321 21:33:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 21:33:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:33:13.409788  543705 memory.go:191] Add success.
W0321 21:33:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:33:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:33:13.409833  543705 cpu.go:282] Add success.
I0321 21:33:13.420151  543705 net.go:648] Add success.
I0321 21:33:13.423067  543705 net.go:770] primary dev: ETH0
I0321 21:33:13.423083  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:33:13.423096  543705 net.go:698] Add success.
I0321 21:33:13.468582  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9bb46809-d3e1-43da-97f5-27a798949914","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:33:13.468618  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:33:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:33:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:33:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 21:33:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:33:14.456621  543705 disk_worker.go:494] system disk:vda1
I0321 21:33:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:33:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:33:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:33:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:33:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:33:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:33:23.409782  543705 memory.go:184] no items to output this cycle
I0321 21:33:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 21:33:31.515216  543705 disk_info.go:125] begin check local disk info of client
I0321 21:33:31.517793  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:33:31.517800  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab200 0xc0001ab240]
E0321 21:33:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:33:33.409794  543705 memory.go:184] no items to output this cycle
I0321 21:33:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 21:33:39.190800  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:33:39.190807  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:33:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:33:43.410725  543705 memory.go:191] Add success.
I0321 21:33:43.409806  543705 cpu.go:282] Add success.
I0321 21:33:43.420460  543705 net.go:648] Add success.
I0321 21:33:43.423214  543705 net.go:770] primary dev: ETH0
I0321 21:33:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:33:43.423239  543705 net.go:698] Add success.
I0321 21:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:33:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:33:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:33:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:33:53.409797  543705 memory.go:184] no items to output this cycle
I0321 21:33:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 21:34:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:34:03.409784  543705 memory.go:184] no items to output this cycle
I0321 21:34:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 21:34:13.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:34:13.409910  543705 memory.go:191] Add success.
I0321 21:34:13.409982  543705 cpu.go:282] Add success.
W0321 21:34:13.409985  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:34:13.410007  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:34:13.410012  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:34:13.419709  543705 net.go:648] Add success.
I0321 21:34:13.422481  543705 net.go:770] primary dev: ETH0
I0321 21:34:13.422493  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:34:13.422504  543705 net.go:698] Add success.
I0321 21:34:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:34:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:34:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 21:34:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:34:14.456529  543705 disk_worker.go:494] system disk:vda1
I0321 21:34:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:34:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:34:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:34:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:34:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:34:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:34:23.409783  543705 memory.go:184] no items to output this cycle
I0321 21:34:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 21:34:31.518237  543705 disk_info.go:125] begin check local disk info of client
I0321 21:34:31.520795  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:34:31.520801  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349c00 0xc000349c40]
E0321 21:34:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:34:33.409812  543705 memory.go:184] no items to output this cycle
I0321 21:34:33.409825  543705 cpu.go:275] no items to output this cycle
E0321 21:34:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:34:43.409775  543705 memory.go:191] Add success.
I0321 21:34:43.409804  543705 cpu.go:282] Add success.
I0321 21:34:43.419870  543705 net.go:648] Add success.
I0321 21:34:43.422595  543705 net.go:770] primary dev: ETH0
I0321 21:34:43.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:34:43.422620  543705 net.go:698] Add success.
I0321 21:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:34:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:34:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:34:53.409780  543705 memory.go:184] no items to output this cycle
I0321 21:34:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 21:35:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:35:03.409781  543705 memory.go:184] no items to output this cycle
I0321 21:35:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 21:35:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:35:13.409779  543705 memory.go:191] Add success.
W0321 21:35:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 21:35:13.409808  543705 cpu.go:282] Add success.
W0321 21:35:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:35:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:35:13.419729  543705 net.go:648] Add success.
I0321 21:35:13.422422  543705 net.go:770] primary dev: ETH0
I0321 21:35:13.422435  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:35:13.422447  543705 net.go:698] Add success.
I0321 21:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:35:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:35:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 21:35:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:35:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 21:35:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:35:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:35:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:35:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:35:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:35:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:35:23.409777  543705 memory.go:184] no items to output this cycle
I0321 21:35:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 21:35:31.521261  543705 disk_info.go:125] begin check local disk info of client
I0321 21:35:31.523898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:35:31.523904  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033ba40 0xc00033ba80]
E0321 21:35:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:35:33.409769  543705 memory.go:184] no items to output this cycle
I0321 21:35:33.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:35:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:35:43.409795  543705 memory.go:191] Add success.
I0321 21:35:43.409797  543705 cpu.go:282] Add success.
I0321 21:35:43.419899  543705 net.go:648] Add success.
I0321 21:35:43.422334  543705 net.go:770] primary dev: ETH0
I0321 21:35:43.422349  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:35:43.422364  543705 net.go:698] Add success.
I0321 21:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:35:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:35:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:35:53.409766  543705 memory.go:184] no items to output this cycle
I0321 21:35:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 21:36:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:36:03.409796  543705 memory.go:184] no items to output this cycle
I0321 21:36:03.409807  543705 cpu.go:275] no items to output this cycle
E0321 21:36:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:36:13.409799  543705 memory.go:191] Add success.
I0321 21:36:13.409801  543705 cpu.go:282] Add success.
W0321 21:36:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:36:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:36:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:36:13.420285  543705 net.go:648] Add success.
I0321 21:36:13.422921  543705 net.go:770] primary dev: ETH0
I0321 21:36:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:36:13.422949  543705 net.go:698] Add success.
I0321 21:36:13.468852  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf416c45-9215-4ee4-98d1-adbe89ed19eb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:36:13.468885  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:36:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:36:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:36:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 21:36:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:36:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 21:36:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:36:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:36:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:36:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:36:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:36:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:36:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:36:23.409810  543705 memory.go:184] no items to output this cycle
I0321 21:36:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 21:36:31.524264  543705 disk_info.go:125] begin check local disk info of client
I0321 21:36:31.526900  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:36:31.526907  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaac0 0xc0001aab00]
E0321 21:36:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:36:33.409767  543705 memory.go:184] no items to output this cycle
I0321 21:36:33.409797  543705 cpu.go:275] no items to output this cycle
I0321 21:36:39.193733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:36:39.193740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:36:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:36:43.410584  543705 memory.go:191] Add success.
I0321 21:36:43.409802  543705 cpu.go:282] Add success.
I0321 21:36:43.420263  543705 net.go:648] Add success.
I0321 21:36:43.422888  543705 net.go:770] primary dev: ETH0
I0321 21:36:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:36:43.422917  543705 net.go:698] Add success.
I0321 21:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:36:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:36:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:36:53.409780  543705 memory.go:184] no items to output this cycle
I0321 21:36:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 21:37:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:37:03.409773  543705 memory.go:184] no items to output this cycle
I0321 21:37:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 21:37:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:37:13.409796  543705 memory.go:191] Add success.
I0321 21:37:13.409797  543705 cpu.go:282] Add success.
W0321 21:37:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:37:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:37:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:37:13.420146  543705 net.go:648] Add success.
I0321 21:37:13.423297  543705 net.go:770] primary dev: ETH0
I0321 21:37:13.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:37:13.423323  543705 net.go:698] Add success.
I0321 21:37:13.453031  543705 event_worker.go:152] Polling the log file for events...
W0321 21:37:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:37:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 21:37:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:37:14.455920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:37:14.455928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:37:14.455934  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:37:14.456576  543705 disk_worker.go:494] system disk:vda1
I0321 21:37:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:37:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:37:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:37:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:37:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:37:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:37:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:37:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:37:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:37:23.409773  543705 memory.go:184] no items to output this cycle
I0321 21:37:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 21:37:31.527278  543705 disk_info.go:125] begin check local disk info of client
I0321 21:37:31.529878  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:37:31.529884  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0780 0xc0003c07c0]
E0321 21:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:37:33.409795  543705 memory.go:184] no items to output this cycle
I0321 21:37:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 21:37:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:37:43.409779  543705 memory.go:191] Add success.
I0321 21:37:43.409808  543705 cpu.go:282] Add success.
I0321 21:37:43.419888  543705 net.go:648] Add success.
I0321 21:37:43.422868  543705 net.go:770] primary dev: ETH0
I0321 21:37:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:37:43.422896  543705 net.go:698] Add success.
I0321 21:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:37:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:37:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:37:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:37:53.409804  543705 memory.go:184] no items to output this cycle
I0321 21:37:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 21:38:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:38:03.409780  543705 memory.go:184] no items to output this cycle
I0321 21:38:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 21:38:13.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:38:13.409910  543705 memory.go:191] Add success.
W0321 21:38:13.409937  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:38:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:38:13.409953  543705 cpu.go:282] Add success.
I0321 21:38:13.409956  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:38:13.419735  543705 net.go:648] Add success.
I0321 21:38:13.422293  543705 net.go:770] primary dev: ETH0
I0321 21:38:13.422308  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:38:13.422326  543705 net.go:698] Add success.
I0321 21:38:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:38:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:38:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 21:38:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:38:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 21:38:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:38:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:38:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:38:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:38:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:38:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:38:23.409779  543705 memory.go:184] no items to output this cycle
I0321 21:38:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 21:38:31.530298  543705 disk_info.go:125] begin check local disk info of client
I0321 21:38:31.532861  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:38:31.532867  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b840 0xc00007b880]
E0321 21:38:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:38:33.409801  543705 memory.go:184] no items to output this cycle
I0321 21:38:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 21:38:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:38:43.409788  543705 memory.go:191] Add success.
I0321 21:38:43.409794  543705 cpu.go:282] Add success.
I0321 21:38:43.419880  543705 net.go:648] Add success.
I0321 21:38:43.422581  543705 net.go:770] primary dev: ETH0
I0321 21:38:43.422595  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:38:43.422606  543705 net.go:698] Add success.
I0321 21:38:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:38:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:38:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:38:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:38:53.409784  543705 memory.go:184] no items to output this cycle
I0321 21:38:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:39:03.409781  543705 memory.go:184] no items to output this cycle
I0321 21:39:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 21:39:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:39:13.409790  543705 memory.go:191] Add success.
I0321 21:39:13.409810  543705 cpu.go:282] Add success.
W0321 21:39:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:39:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:39:13.420113  543705 net.go:648] Add success.
I0321 21:39:13.422844  543705 net.go:770] primary dev: ETH0
I0321 21:39:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:39:13.422873  543705 net.go:698] Add success.
I0321 21:39:13.467954  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8323ee9f-1b98-4e72-8186-f599519f769f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:39:13.467993  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:39:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:39:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:39:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 21:39:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:39:14.456720  543705 disk_worker.go:494] system disk:vda1
I0321 21:39:14.456749  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:39:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:39:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:39:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:39:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:39:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:39:23.409790  543705 memory.go:184] no items to output this cycle
I0321 21:39:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 21:39:31.533318  543705 disk_info.go:125] begin check local disk info of client
I0321 21:39:31.535917  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:39:31.535925  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393880 0xc0003938c0]
E0321 21:39:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:39:33.409799  543705 memory.go:184] no items to output this cycle
I0321 21:39:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 21:39:39.197689  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:39:39.197695  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:39:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:39:43.410623  543705 memory.go:191] Add success.
I0321 21:39:43.409803  543705 cpu.go:282] Add success.
I0321 21:39:43.420358  543705 net.go:648] Add success.
I0321 21:39:43.423273  543705 net.go:770] primary dev: ETH0
I0321 21:39:43.423288  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:39:43.423302  543705 net.go:698] Add success.
I0321 21:39:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:39:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:39:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:39:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:39:53.409794  543705 cpu.go:275] no items to output this cycle
I0321 21:39:53.409796  543705 memory.go:184] no items to output this cycle
E0321 21:40:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:40:03.409832  543705 memory.go:184] no items to output this cycle
I0321 21:40:03.409945  543705 cpu.go:275] no items to output this cycle
E0321 21:40:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:40:13.409821  543705 memory.go:191] Add success.
I0321 21:40:13.409824  543705 cpu.go:282] Add success.
W0321 21:40:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:40:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:40:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:40:13.420064  543705 net.go:648] Add success.
I0321 21:40:13.422862  543705 net.go:770] primary dev: ETH0
I0321 21:40:13.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:40:13.422886  543705 net.go:698] Add success.
I0321 21:40:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:40:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:40:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0321 21:40:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:40:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 21:40:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:40:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:40:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:40:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:40:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:40:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:40:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:40:23.409785  543705 memory.go:184] no items to output this cycle
I0321 21:40:23.409819  543705 cpu.go:275] no items to output this cycle
I0321 21:40:31.536320  543705 disk_info.go:125] begin check local disk info of client
I0321 21:40:31.538898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:40:31.538904  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033b3c0 0xc00033b400]
E0321 21:40:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:40:33.409806  543705 memory.go:184] no items to output this cycle
I0321 21:40:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 21:40:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:40:43.409799  543705 memory.go:191] Add success.
I0321 21:40:43.409799  543705 cpu.go:282] Add success.
I0321 21:40:43.419974  543705 net.go:648] Add success.
I0321 21:40:43.423058  543705 net.go:770] primary dev: ETH0
I0321 21:40:43.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:40:43.423084  543705 net.go:698] Add success.
I0321 21:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:40:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:40:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:40:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:40:53.409798  543705 memory.go:184] no items to output this cycle
I0321 21:40:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 21:41:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:41:03.409778  543705 memory.go:184] no items to output this cycle
I0321 21:41:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:41:13.409976  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:41:13.410017  543705 memory.go:191] Add success.
I0321 21:41:13.410038  543705 cpu.go:282] Add success.
W0321 21:41:13.410057  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:41:13.410084  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:41:13.410122  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:41:13.419743  543705 net.go:648] Add success.
I0321 21:41:13.422487  543705 net.go:770] primary dev: ETH0
I0321 21:41:13.422499  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:41:13.422511  543705 net.go:698] Add success.
I0321 21:41:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:41:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:41:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0321 21:41:14.455243  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:41:14.456640  543705 disk_worker.go:494] system disk:vda1
I0321 21:41:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:41:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:41:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:41:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:41:23.409794  543705 memory.go:184] no items to output this cycle
I0321 21:41:23.409795  543705 cpu.go:275] no items to output this cycle
I0321 21:41:31.539345  543705 disk_info.go:125] begin check local disk info of client
I0321 21:41:31.541919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:41:31.541926  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482f40 0xc000482f80]
E0321 21:41:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:41:33.409799  543705 memory.go:184] no items to output this cycle
I0321 21:41:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 21:41:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:41:43.409797  543705 memory.go:191] Add success.
I0321 21:41:43.409807  543705 cpu.go:282] Add success.
I0321 21:41:43.419889  543705 net.go:648] Add success.
I0321 21:41:43.422556  543705 net.go:770] primary dev: ETH0
I0321 21:41:43.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:41:43.422585  543705 net.go:698] Add success.
I0321 21:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:41:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:41:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:41:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:41:53.409782  543705 memory.go:184] no items to output this cycle
I0321 21:41:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 21:42:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:42:03.409777  543705 memory.go:184] no items to output this cycle
I0321 21:42:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 21:42:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:42:13.409840  543705 memory.go:191] Add success.
I0321 21:42:13.409847  543705 cpu.go:282] Add success.
W0321 21:42:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:42:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:42:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:42:13.420246  543705 net.go:648] Add success.
I0321 21:42:13.423061  543705 net.go:770] primary dev: ETH0
I0321 21:42:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:42:13.423090  543705 net.go:698] Add success.
I0321 21:42:13.469010  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a4946fc5-0a97-4354-a154-9e04270477e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:42:13.469044  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 21:42:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:42:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 21:42:14.455205  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:42:14.456785  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:42:14.456793  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:42:14.456799  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:42:14.456838  543705 disk_worker.go:494] system disk:vda1
I0321 21:42:14.456867  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:42:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:42:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:42:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:42:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:42:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:42:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:42:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:42:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:42:23.409806  543705 memory.go:184] no items to output this cycle
I0321 21:42:23.409818  543705 cpu.go:275] no items to output this cycle
I0321 21:42:31.542356  543705 disk_info.go:125] begin check local disk info of client
I0321 21:42:31.544918  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:42:31.544924  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c89c0 0xc0003c8a00]
E0321 21:42:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:42:33.409795  543705 memory.go:184] no items to output this cycle
I0321 21:42:33.409804  543705 cpu.go:275] no items to output this cycle
I0321 21:42:39.201700  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:42:39.201707  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:42:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:42:43.410752  543705 memory.go:191] Add success.
I0321 21:42:43.409792  543705 cpu.go:282] Add success.
I0321 21:42:43.420429  543705 net.go:648] Add success.
I0321 21:42:43.422995  543705 net.go:770] primary dev: ETH0
I0321 21:42:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:42:43.423030  543705 net.go:698] Add success.
I0321 21:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:42:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:42:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:42:53.410201  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:42:53.410217  543705 memory.go:184] no items to output this cycle
I0321 21:42:53.410247  543705 cpu.go:275] no items to output this cycle
E0321 21:43:03.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:43:03.409857  543705 memory.go:184] no items to output this cycle
I0321 21:43:03.409920  543705 cpu.go:275] no items to output this cycle
E0321 21:43:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:43:13.409820  543705 memory.go:191] Add success.
I0321 21:43:13.409830  543705 cpu.go:282] Add success.
W0321 21:43:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:43:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:43:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:43:13.420760  543705 net.go:648] Add success.
I0321 21:43:13.423446  543705 net.go:770] primary dev: ETH0
I0321 21:43:13.423458  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:43:13.423470  543705 net.go:698] Add success.
I0321 21:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:43:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:43:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 21:43:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:43:14.456553  543705 disk_worker.go:494] system disk:vda1
I0321 21:43:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:43:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:43:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:43:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:43:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:43:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:43:23.409811  543705 memory.go:184] no items to output this cycle
I0321 21:43:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 21:43:31.545373  543705 disk_info.go:125] begin check local disk info of client
I0321 21:43:31.548004  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:43:31.548010  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b4c0 0xc00036b500]
E0321 21:43:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:43:33.409772  543705 memory.go:184] no items to output this cycle
I0321 21:43:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:43:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:43:43.409790  543705 memory.go:191] Add success.
I0321 21:43:43.409792  543705 cpu.go:282] Add success.
I0321 21:43:43.419978  543705 net.go:648] Add success.
I0321 21:43:43.422583  543705 net.go:770] primary dev: ETH0
I0321 21:43:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:43:43.422612  543705 net.go:698] Add success.
I0321 21:43:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:43:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:43:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:43:53.409801  543705 memory.go:184] no items to output this cycle
I0321 21:43:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 21:44:03.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:44:03.409909  543705 cpu.go:275] no items to output this cycle
I0321 21:44:03.409982  543705 memory.go:184] no items to output this cycle
E0321 21:44:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:44:13.409797  543705 memory.go:191] Add success.
I0321 21:44:13.409798  543705 cpu.go:282] Add success.
W0321 21:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:44:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:44:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:44:13.420144  543705 net.go:648] Add success.
I0321 21:44:13.423065  543705 net.go:770] primary dev: ETH0
I0321 21:44:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:44:13.423090  543705 net.go:698] Add success.
I0321 21:44:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:44:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:44:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0321 21:44:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:44:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 21:44:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:44:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:44:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:44:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:44:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:44:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:44:23.409806  543705 memory.go:184] no items to output this cycle
I0321 21:44:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 21:44:31.548386  543705 disk_info.go:125] begin check local disk info of client
I0321 21:44:31.550970  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:44:31.550976  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9600 0xc0003c9640]
E0321 21:44:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:44:33.409799  543705 memory.go:184] no items to output this cycle
I0321 21:44:33.409810  543705 cpu.go:275] no items to output this cycle
E0321 21:44:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:44:43.409787  543705 memory.go:191] Add success.
I0321 21:44:43.409793  543705 cpu.go:282] Add success.
I0321 21:44:43.419867  543705 net.go:648] Add success.
I0321 21:44:43.422791  543705 net.go:770] primary dev: ETH0
I0321 21:44:43.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:44:43.422824  543705 net.go:698] Add success.
I0321 21:44:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:44:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:44:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:44:53.409799  543705 memory.go:184] no items to output this cycle
I0321 21:44:53.409810  543705 cpu.go:275] no items to output this cycle
I0321 21:45:03.409896  543705 cpu.go:275] no items to output this cycle
E0321 21:45:03.409918  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:45:03.409932  543705 memory.go:184] no items to output this cycle
E0321 21:45:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:45:13.409799  543705 memory.go:191] Add success.
I0321 21:45:13.409806  543705 cpu.go:282] Add success.
W0321 21:45:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:45:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:45:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:45:13.420449  543705 net.go:648] Add success.
I0321 21:45:13.423533  543705 net.go:770] primary dev: ETH0
I0321 21:45:13.423548  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:45:13.423561  543705 net.go:698] Add success.
I0321 21:45:13.552702  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ac0ac44f-d153-4c6b-a984-85373dbc630d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:45:13.552735  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:45:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:45:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:45:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 21:45:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:45:14.456611  543705 disk_worker.go:494] system disk:vda1
I0321 21:45:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:45:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:45:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:45:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:45:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:45:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:45:23.409772  543705 memory.go:184] no items to output this cycle
I0321 21:45:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 21:45:31.551397  543705 disk_info.go:125] begin check local disk info of client
I0321 21:45:31.554079  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:45:31.554086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e00 0xc0000c5e40]
E0321 21:45:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:45:33.409773  543705 memory.go:184] no items to output this cycle
I0321 21:45:33.409808  543705 cpu.go:275] no items to output this cycle
I0321 21:45:39.205723  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:45:39.205730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:45:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:45:43.410578  543705 memory.go:191] Add success.
I0321 21:45:43.409808  543705 cpu.go:282] Add success.
I0321 21:45:43.420276  543705 net.go:648] Add success.
I0321 21:45:43.422623  543705 net.go:770] primary dev: ETH0
I0321 21:45:43.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:45:43.422650  543705 net.go:698] Add success.
I0321 21:45:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:45:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:45:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:45:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:45:53.409796  543705 memory.go:184] no items to output this cycle
I0321 21:45:53.409806  543705 cpu.go:275] no items to output this cycle
I0321 21:46:03.409904  543705 cpu.go:275] no items to output this cycle
E0321 21:46:03.409949  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:46:03.409965  543705 memory.go:184] no items to output this cycle
E0321 21:46:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:46:13.409798  543705 memory.go:191] Add success.
I0321 21:46:13.409804  543705 cpu.go:282] Add success.
W0321 21:46:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:46:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:46:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:46:13.420256  543705 net.go:648] Add success.
I0321 21:46:13.423324  543705 net.go:770] primary dev: ETH0
I0321 21:46:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:46:13.423348  543705 net.go:698] Add success.
I0321 21:46:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:46:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:46:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 21:46:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:46:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 21:46:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:46:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:46:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:46:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:46:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:46:23.409803  543705 memory.go:184] no items to output this cycle
I0321 21:46:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 21:46:31.554406  543705 disk_info.go:125] begin check local disk info of client
I0321 21:46:31.557029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:46:31.557035  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa940 0xc0001aa980]
E0321 21:46:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:46:33.409783  543705 memory.go:184] no items to output this cycle
I0321 21:46:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 21:46:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:46:43.409791  543705 memory.go:191] Add success.
I0321 21:46:43.409795  543705 cpu.go:282] Add success.
I0321 21:46:43.419860  543705 net.go:648] Add success.
I0321 21:46:43.422693  543705 net.go:770] primary dev: ETH0
I0321 21:46:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:46:43.422716  543705 net.go:698] Add success.
I0321 21:46:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:46:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:46:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:46:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:46:53.409782  543705 memory.go:184] no items to output this cycle
I0321 21:46:53.409801  543705 cpu.go:275] no items to output this cycle
E0321 21:47:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:47:03.409759  543705 memory.go:184] no items to output this cycle
I0321 21:47:03.409891  543705 cpu.go:275] no items to output this cycle
E0321 21:47:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:47:13.409795  543705 memory.go:191] Add success.
I0321 21:47:13.409809  543705 cpu.go:282] Add success.
W0321 21:47:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:47:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:47:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:47:13.420155  543705 net.go:648] Add success.
I0321 21:47:13.423147  543705 net.go:770] primary dev: ETH0
I0321 21:47:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:47:13.423170  543705 net.go:698] Add success.
I0321 21:47:13.453725  543705 event_worker.go:152] Polling the log file for events...
W0321 21:47:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:47:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0321 21:47:14.455150  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:47:14.456934  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:47:14.456944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:47:14.456951  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:47:14.457025  543705 disk_worker.go:494] system disk:vda1
I0321 21:47:14.457067  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:47:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:47:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:47:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:47:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:47:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:47:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:47:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:47:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:47:23.409790  543705 memory.go:184] no items to output this cycle
I0321 21:47:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 21:47:31.557423  543705 disk_info.go:125] begin check local disk info of client
I0321 21:47:31.559971  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:47:31.559978  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003487c0 0xc000348800]
E0321 21:47:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:47:33.409790  543705 memory.go:184] no items to output this cycle
I0321 21:47:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 21:47:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:47:43.409778  543705 memory.go:191] Add success.
I0321 21:47:43.409809  543705 cpu.go:282] Add success.
I0321 21:47:43.419843  543705 net.go:648] Add success.
I0321 21:47:43.422961  543705 net.go:770] primary dev: ETH0
I0321 21:47:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:47:43.422986  543705 net.go:698] Add success.
I0321 21:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:47:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:47:53.409764  543705 memory.go:184] no items to output this cycle
I0321 21:47:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 21:48:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:48:03.409764  543705 memory.go:184] no items to output this cycle
I0321 21:48:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 21:48:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:48:13.409823  543705 memory.go:191] Add success.
I0321 21:48:13.409829  543705 cpu.go:282] Add success.
W0321 21:48:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:48:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:48:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:48:13.420150  543705 net.go:648] Add success.
I0321 21:48:13.422873  543705 net.go:770] primary dev: ETH0
I0321 21:48:13.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:48:13.422897  543705 net.go:698] Add success.
I0321 21:48:13.899334  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f6c09015-f888-49f4-afe6-f31d932f682a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:48:13.899368  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:48:14.454678  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:48:14.454829  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:48:14.454885  543705 disk_worker.go:708] disk space is not compliant
W0321 21:48:14.454888  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:48:14.456244  543705 disk_worker.go:494] system disk:vda1
I0321 21:48:14.456299  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:48:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:48:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:48:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:48:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:48:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:48:23.409781  543705 memory.go:184] no items to output this cycle
I0321 21:48:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 21:48:31.560449  543705 disk_info.go:125] begin check local disk info of client
I0321 21:48:31.563055  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:48:31.563062  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3000 0xc0003f3040]
E0321 21:48:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:48:33.409794  543705 memory.go:184] no items to output this cycle
I0321 21:48:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 21:48:39.209728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:48:39.209735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:48:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:48:43.410601  543705 memory.go:191] Add success.
I0321 21:48:43.409795  543705 cpu.go:282] Add success.
I0321 21:48:43.420306  543705 net.go:648] Add success.
I0321 21:48:43.423362  543705 net.go:770] primary dev: ETH0
I0321 21:48:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:48:43.423401  543705 net.go:698] Add success.
I0321 21:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:48:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:48:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:48:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:48:53.409761  543705 memory.go:184] no items to output this cycle
I0321 21:48:53.409793  543705 cpu.go:275] no items to output this cycle
I0321 21:49:03.409933  543705 cpu.go:275] no items to output this cycle
E0321 21:49:03.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:49:03.410022  543705 memory.go:184] no items to output this cycle
E0321 21:49:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:49:13.409801  543705 memory.go:191] Add success.
I0321 21:49:13.409810  543705 cpu.go:282] Add success.
W0321 21:49:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:49:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:49:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:49:13.420202  543705 net.go:648] Add success.
I0321 21:49:13.423460  543705 net.go:770] primary dev: ETH0
I0321 21:49:13.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:49:13.423486  543705 net.go:698] Add success.
I0321 21:49:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:49:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:49:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 21:49:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:49:14.456518  543705 disk_worker.go:494] system disk:vda1
I0321 21:49:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:49:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:49:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:49:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:49:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:49:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:49:23.409782  543705 memory.go:184] no items to output this cycle
I0321 21:49:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 21:49:31.563457  543705 disk_info.go:125] begin check local disk info of client
I0321 21:49:31.566027  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:49:31.566034  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8d40 0xc0004d8d80]
E0321 21:49:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:49:33.409789  543705 memory.go:184] no items to output this cycle
I0321 21:49:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 21:49:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:49:43.409791  543705 memory.go:191] Add success.
I0321 21:49:43.409792  543705 cpu.go:282] Add success.
I0321 21:49:43.420003  543705 net.go:648] Add success.
I0321 21:49:43.423033  543705 net.go:770] primary dev: ETH0
I0321 21:49:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:49:43.423059  543705 net.go:698] Add success.
I0321 21:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:49:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:49:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:49:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:49:53.409808  543705 memory.go:184] no items to output this cycle
I0321 21:49:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 21:50:03.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:50:03.409865  543705 memory.go:184] no items to output this cycle
I0321 21:50:03.409929  543705 cpu.go:275] no items to output this cycle
E0321 21:50:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:50:13.409795  543705 memory.go:191] Add success.
I0321 21:50:13.409810  543705 cpu.go:282] Add success.
W0321 21:50:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:50:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:50:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:50:13.420177  543705 net.go:648] Add success.
I0321 21:50:13.422863  543705 net.go:770] primary dev: ETH0
I0321 21:50:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:50:13.422887  543705 net.go:698] Add success.
I0321 21:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:50:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:50:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 21:50:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:50:14.456494  543705 disk_worker.go:494] system disk:vda1
I0321 21:50:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:50:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:50:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:50:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:50:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:50:16.472534  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:50:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:50:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 21:50:23.409795  543705 memory.go:184] no items to output this cycle
I0321 21:50:31.566478  543705 disk_info.go:125] begin check local disk info of client
I0321 21:50:31.569039  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:50:31.569046  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b540 0xc00007b580]
E0321 21:50:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:50:33.409776  543705 memory.go:184] no items to output this cycle
I0321 21:50:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 21:50:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:50:43.409784  543705 memory.go:191] Add success.
I0321 21:50:43.409814  543705 cpu.go:282] Add success.
I0321 21:50:43.419871  543705 net.go:648] Add success.
I0321 21:50:43.422472  543705 net.go:770] primary dev: ETH0
I0321 21:50:43.422485  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:50:43.422498  543705 net.go:698] Add success.
I0321 21:50:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:50:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:50:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:50:53.409794  543705 memory.go:184] no items to output this cycle
I0321 21:50:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 21:51:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:51:03.409802  543705 memory.go:184] no items to output this cycle
I0321 21:51:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 21:51:13.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:51:13.409861  543705 memory.go:191] Add success.
W0321 21:51:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:51:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:51:13.409910  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:51:13.410014  543705 cpu.go:282] Add success.
I0321 21:51:13.419725  543705 net.go:648] Add success.
I0321 21:51:13.422686  543705 net.go:770] primary dev: ETH0
I0321 21:51:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:51:13.422709  543705 net.go:698] Add success.
I0321 21:51:13.468293  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fa0dca38-f18f-4203-b137-22c90757e997","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:51:13.468324  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:51:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:51:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:51:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 21:51:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:51:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 21:51:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:51:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:51:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:51:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:51:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:51:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:51:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:51:23.409786  543705 memory.go:184] no items to output this cycle
I0321 21:51:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 21:51:31.569500  543705 disk_info.go:125] begin check local disk info of client
I0321 21:51:31.572054  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:51:31.572061  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348240 0xc000348280]
E0321 21:51:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:51:33.409805  543705 memory.go:184] no items to output this cycle
I0321 21:51:33.409821  543705 cpu.go:275] no items to output this cycle
I0321 21:51:39.213762  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:51:39.213768  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:51:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:51:43.410688  543705 memory.go:191] Add success.
I0321 21:51:43.409811  543705 cpu.go:282] Add success.
I0321 21:51:43.420467  543705 net.go:648] Add success.
I0321 21:51:43.423406  543705 net.go:770] primary dev: ETH0
I0321 21:51:43.423419  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:51:43.423431  543705 net.go:698] Add success.
I0321 21:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:51:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:51:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:51:53.409777  543705 memory.go:184] no items to output this cycle
I0321 21:51:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 21:52:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:52:03.409816  543705 memory.go:184] no items to output this cycle
I0321 21:52:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 21:52:13.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:52:13.409931  543705 memory.go:191] Add success.
W0321 21:52:13.409980  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:52:13.410000  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:52:13.410005  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:52:13.410031  543705 cpu.go:282] Add success.
I0321 21:52:13.419721  543705 net.go:648] Add success.
I0321 21:52:13.422662  543705 net.go:770] primary dev: ETH0
I0321 21:52:13.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:52:13.422694  543705 net.go:698] Add success.
W0321 21:52:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:52:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 21:52:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:52:14.456709  543705 disk_worker.go:494] system disk:vda1
I0321 21:52:14.456746  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:52:14.457015  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:52:14.457022  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:52:14.457027  543705 custom_config.go:64] query custom config with name: gpu
E0321 21:52:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:52:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:52:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:52:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:52:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:52:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:52:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:52:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:52:23.409815  543705 memory.go:184] no items to output this cycle
I0321 21:52:23.409828  543705 cpu.go:275] no items to output this cycle
I0321 21:52:31.572506  543705 disk_info.go:125] begin check local disk info of client
I0321 21:52:31.575149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:52:31.575155  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386640 0xc000386680]
E0321 21:52:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:52:33.409767  543705 memory.go:184] no items to output this cycle
I0321 21:52:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 21:52:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:52:43.409780  543705 memory.go:191] Add success.
I0321 21:52:43.409790  543705 cpu.go:282] Add success.
I0321 21:52:43.419862  543705 net.go:648] Add success.
I0321 21:52:43.422484  543705 net.go:770] primary dev: ETH0
I0321 21:52:43.422499  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:52:43.422513  543705 net.go:698] Add success.
I0321 21:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:52:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:52:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:52:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:52:53.409766  543705 memory.go:184] no items to output this cycle
I0321 21:52:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 21:53:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:53:03.409809  543705 memory.go:184] no items to output this cycle
I0321 21:53:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 21:53:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:53:13.409918  543705 memory.go:191] Add success.
W0321 21:53:13.409950  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:53:13.409964  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:53:13.409967  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:53:13.409979  543705 cpu.go:282] Add success.
I0321 21:53:13.419728  543705 net.go:648] Add success.
I0321 21:53:13.422417  543705 net.go:770] primary dev: ETH0
I0321 21:53:13.422432  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:53:13.422445  543705 net.go:698] Add success.
I0321 21:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:53:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:53:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0321 21:53:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:53:14.456585  543705 disk_worker.go:494] system disk:vda1
I0321 21:53:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:53:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:53:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:53:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:53:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:53:23.409785  543705 memory.go:184] no items to output this cycle
I0321 21:53:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 21:53:31.575524  543705 disk_info.go:125] begin check local disk info of client
I0321 21:53:31.578110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:53:31.578116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513b40 0xc000513b80]
E0321 21:53:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:53:33.409800  543705 memory.go:184] no items to output this cycle
I0321 21:53:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 21:53:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:53:43.409791  543705 cpu.go:282] Add success.
I0321 21:53:43.409799  543705 memory.go:191] Add success.
I0321 21:53:43.419980  543705 net.go:648] Add success.
I0321 21:53:43.422656  543705 net.go:770] primary dev: ETH0
I0321 21:53:43.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:53:43.422685  543705 net.go:698] Add success.
I0321 21:53:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:53:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:53:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:53:53.409769  543705 memory.go:184] no items to output this cycle
I0321 21:53:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 21:54:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:54:03.409775  543705 memory.go:184] no items to output this cycle
I0321 21:54:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 21:54:13.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:54:13.409907  543705 memory.go:191] Add success.
W0321 21:54:13.409944  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 21:54:13.409951  543705 cpu.go:282] Add success.
W0321 21:54:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:54:13.409959  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:54:13.419740  543705 net.go:648] Add success.
I0321 21:54:13.422761  543705 net.go:770] primary dev: ETH0
I0321 21:54:13.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:54:13.422785  543705 net.go:698] Add success.
I0321 21:54:13.468434  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"62c0b1bb-d5ca-4919-a123-22edb35e0790","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:54:13.468465  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 21:54:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:54:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:54:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 21:54:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:54:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 21:54:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:54:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:54:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:54:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:54:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:54:23.409804  543705 memory.go:184] no items to output this cycle
I0321 21:54:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 21:54:31.578534  543705 disk_info.go:125] begin check local disk info of client
I0321 21:54:31.581142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:54:31.581149  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ff00 0xc00037ff40]
E0321 21:54:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:54:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 21:54:33.409786  543705 memory.go:184] no items to output this cycle
I0321 21:54:39.217733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:54:39.217740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:54:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:54:43.410682  543705 memory.go:191] Add success.
I0321 21:54:43.409818  543705 cpu.go:282] Add success.
I0321 21:54:43.420375  543705 net.go:648] Add success.
I0321 21:54:43.423081  543705 net.go:770] primary dev: ETH0
I0321 21:54:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:54:43.423109  543705 net.go:698] Add success.
I0321 21:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:54:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:54:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:54:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:54:53.409771  543705 memory.go:184] no items to output this cycle
I0321 21:54:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 21:55:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:55:03.409784  543705 memory.go:184] no items to output this cycle
I0321 21:55:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 21:55:13.409844  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:55:13.409872  543705 memory.go:191] Add success.
W0321 21:55:13.409901  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:55:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:55:13.409921  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:55:13.409933  543705 cpu.go:282] Add success.
I0321 21:55:13.419709  543705 net.go:648] Add success.
I0321 21:55:13.422943  543705 net.go:770] primary dev: ETH0
I0321 21:55:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:55:13.422968  543705 net.go:698] Add success.
I0321 21:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:55:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:55:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 21:55:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:55:14.456591  543705 disk_worker.go:494] system disk:vda1
I0321 21:55:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:55:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:55:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:55:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:55:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:55:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:55:23.409798  543705 cpu.go:275] no items to output this cycle
I0321 21:55:23.409807  543705 memory.go:184] no items to output this cycle
I0321 21:55:31.581548  543705 disk_info.go:125] begin check local disk info of client
I0321 21:55:31.584097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:55:31.584104  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7740 0xc0004a7780]
E0321 21:55:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:55:33.409802  543705 memory.go:184] no items to output this cycle
I0321 21:55:33.409816  543705 cpu.go:275] no items to output this cycle
E0321 21:55:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:55:43.409786  543705 memory.go:191] Add success.
I0321 21:55:43.409808  543705 cpu.go:282] Add success.
I0321 21:55:43.419977  543705 net.go:648] Add success.
I0321 21:55:43.423048  543705 net.go:770] primary dev: ETH0
I0321 21:55:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:55:43.423078  543705 net.go:698] Add success.
I0321 21:55:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:55:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:55:53.409769  543705 memory.go:184] no items to output this cycle
I0321 21:55:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 21:56:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:56:03.409804  543705 memory.go:184] no items to output this cycle
I0321 21:56:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 21:56:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:56:13.409795  543705 memory.go:191] Add success.
I0321 21:56:13.409805  543705 cpu.go:282] Add success.
W0321 21:56:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:56:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:56:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:56:13.419734  543705 net.go:648] Add success.
I0321 21:56:13.422165  543705 net.go:770] primary dev: ETH0
I0321 21:56:13.422178  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:56:13.422189  543705 net.go:698] Add success.
I0321 21:56:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:56:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:56:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0321 21:56:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:56:14.456583  543705 disk_worker.go:494] system disk:vda1
I0321 21:56:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:56:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:56:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:56:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:56:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:56:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:56:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:56:23.409801  543705 memory.go:184] no items to output this cycle
I0321 21:56:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 21:56:31.584556  543705 disk_info.go:125] begin check local disk info of client
I0321 21:56:31.587114  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:56:31.587120  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002793c0 0xc000279400]
E0321 21:56:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:56:33.409786  543705 memory.go:184] no items to output this cycle
I0321 21:56:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 21:56:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:56:43.409810  543705 memory.go:191] Add success.
I0321 21:56:43.409816  543705 cpu.go:282] Add success.
I0321 21:56:43.419855  543705 net.go:648] Add success.
I0321 21:56:43.422942  543705 net.go:770] primary dev: ETH0
I0321 21:56:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:56:43.422968  543705 net.go:698] Add success.
I0321 21:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:56:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:56:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:56:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:56:53.409772  543705 memory.go:184] no items to output this cycle
I0321 21:56:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 21:57:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:57:03.409777  543705 memory.go:184] no items to output this cycle
I0321 21:57:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 21:57:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:57:13.409802  543705 memory.go:191] Add success.
I0321 21:57:13.409813  543705 cpu.go:282] Add success.
W0321 21:57:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:57:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:57:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:57:13.420216  543705 net.go:648] Add success.
I0321 21:57:13.423414  543705 net.go:770] primary dev: ETH0
I0321 21:57:13.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:57:13.423438  543705 net.go:698] Add success.
I0321 21:57:13.429633  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 21:57:13.452780  543705 event_worker.go:152] Polling the log file for events...
I0321 21:57:13.469477  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d45bb71-6395-4a27-ab46-5334c6362585","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 21:57:13.469508  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 21:57:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:57:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 21:57:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 21:57:14.456853  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 21:57:14.456862  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 21:57:14.456868  543705 custom_config.go:64] query custom config with name: gpu
I0321 21:57:14.456871  543705 disk_worker.go:494] system disk:vda1
I0321 21:57:14.456912  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 21:57:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 21:57:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:57:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 21:57:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 21:57:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:57:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:57:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:57:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:57:23.409788  543705 memory.go:184] no items to output this cycle
I0321 21:57:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 21:57:31.587585  543705 disk_info.go:125] begin check local disk info of client
I0321 21:57:31.590144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:57:31.590150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9500 0xc0004d9540]
E0321 21:57:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:57:33.409799  543705 memory.go:184] no items to output this cycle
I0321 21:57:33.409819  543705 cpu.go:275] no items to output this cycle
I0321 21:57:39.221732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 21:57:39.221738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 21:57:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:57:43.410591  543705 memory.go:191] Add success.
I0321 21:57:43.409802  543705 cpu.go:282] Add success.
I0321 21:57:43.420527  543705 net.go:648] Add success.
I0321 21:57:43.423345  543705 net.go:770] primary dev: ETH0
I0321 21:57:43.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:57:43.423387  543705 net.go:698] Add success.
I0321 21:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:57:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:57:53.410263  543705 cpu.go:275] no items to output this cycle
I0321 21:57:53.410267  543705 memory.go:184] no items to output this cycle
E0321 21:58:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:58:03.409785  543705 memory.go:184] no items to output this cycle
I0321 21:58:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 21:58:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:58:13.409821  543705 memory.go:191] Add success.
I0321 21:58:13.409826  543705 cpu.go:282] Add success.
W0321 21:58:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:58:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:58:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:58:13.420258  543705 net.go:648] Add success.
I0321 21:58:13.423069  543705 net.go:770] primary dev: ETH0
I0321 21:58:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:58:13.423093  543705 net.go:698] Add success.
I0321 21:58:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:58:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:58:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 21:58:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:58:14.456564  543705 disk_worker.go:494] system disk:vda1
I0321 21:58:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:58:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:58:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:58:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:58:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:58:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:58:23.409798  543705 memory.go:184] no items to output this cycle
I0321 21:58:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 21:58:31.590599  543705 disk_info.go:125] begin check local disk info of client
I0321 21:58:31.593194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:58:31.593200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d2200 0xc0003d2240]
E0321 21:58:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:58:33.409776  543705 cpu.go:275] no items to output this cycle
I0321 21:58:33.409782  543705 memory.go:184] no items to output this cycle
E0321 21:58:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:58:43.409815  543705 memory.go:191] Add success.
I0321 21:58:43.409824  543705 cpu.go:282] Add success.
I0321 21:58:43.419946  543705 net.go:648] Add success.
I0321 21:58:43.422853  543705 net.go:770] primary dev: ETH0
I0321 21:58:43.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:58:43.422877  543705 net.go:698] Add success.
I0321 21:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:58:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:58:53.409764  543705 memory.go:184] no items to output this cycle
I0321 21:58:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 21:59:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:59:03.409779  543705 memory.go:184] no items to output this cycle
I0321 21:59:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 21:59:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:59:13.409815  543705 memory.go:191] Add success.
I0321 21:59:13.409818  543705 cpu.go:282] Add success.
W0321 21:59:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 21:59:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 21:59:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 21:59:13.420123  543705 net.go:648] Add success.
I0321 21:59:13.422661  543705 net.go:770] primary dev: ETH0
I0321 21:59:13.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:59:13.422688  543705 net.go:698] Add success.
I0321 21:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 21:59:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 21:59:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 21:59:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0321 21:59:14.456889  543705 disk_worker.go:494] system disk:vda1
I0321 21:59:14.456932  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 21:59:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 21:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:59:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:59:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 21:59:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 21:59:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:59:23.409806  543705 memory.go:184] no items to output this cycle
I0321 21:59:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 21:59:31.593616  543705 disk_info.go:125] begin check local disk info of client
I0321 21:59:31.596212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 21:59:31.596219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ba080 0xc0004ba0c0]
E0321 21:59:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:59:33.409776  543705 cpu.go:275] no items to output this cycle
I0321 21:59:33.409780  543705 memory.go:184] no items to output this cycle
E0321 21:59:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:59:43.409791  543705 memory.go:191] Add success.
I0321 21:59:43.409792  543705 cpu.go:282] Add success.
I0321 21:59:43.419842  543705 net.go:648] Add success.
I0321 21:59:43.422877  543705 net.go:770] primary dev: ETH0
I0321 21:59:43.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0321 21:59:43.422902  543705 net.go:698] Add success.
I0321 21:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 21:59:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 21:59:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 21:59:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 21:59:53.409779  543705 memory.go:184] no items to output this cycle
I0321 21:59:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 22:00:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:00:03.409803  543705 memory.go:184] no items to output this cycle
I0321 22:00:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 22:00:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:00:13.409782  543705 memory.go:191] Add success.
I0321 22:00:13.409801  543705 cpu.go:282] Add success.
W0321 22:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:00:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:00:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:00:13.420335  543705 net.go:648] Add success.
I0321 22:00:13.423210  543705 net.go:770] primary dev: ETH0
I0321 22:00:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:00:13.423238  543705 net.go:698] Add success.
I0321 22:00:13.469950  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1159adc7-29b2-4e6b-9274-7220a565b6d0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:00:13.469983  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:00:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:00:14.455243  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:00:14.455356  543705 disk_worker.go:708] disk space is not compliant
W0321 22:00:14.455361  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:00:14.457511  543705 disk_worker.go:494] system disk:vda1
I0321 22:00:14.457540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:00:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:00:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:00:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:00:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:00:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:00:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:00:23.409809  543705 memory.go:184] no items to output this cycle
I0321 22:00:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 22:00:31.596633  543705 disk_info.go:125] begin check local disk info of client
I0321 22:00:31.599285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:00:31.599291  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2580 0xc0003f25c0]
E0321 22:00:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:00:33.409780  543705 memory.go:184] no items to output this cycle
I0321 22:00:33.409788  543705 cpu.go:275] no items to output this cycle
I0321 22:00:39.225748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:00:39.225755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:00:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:00:43.410815  543705 memory.go:191] Add success.
I0321 22:00:43.409804  543705 cpu.go:282] Add success.
I0321 22:00:43.420578  543705 net.go:648] Add success.
I0321 22:00:43.424052  543705 net.go:770] primary dev: ETH0
I0321 22:00:43.424066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:00:43.424081  543705 net.go:698] Add success.
I0321 22:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:00:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:00:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:00:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:00:53.409793  543705 memory.go:184] no items to output this cycle
I0321 22:00:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 22:01:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:01:03.409793  543705 memory.go:184] no items to output this cycle
I0321 22:01:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 22:01:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:01:13.409812  543705 memory.go:191] Add success.
I0321 22:01:13.409813  543705 cpu.go:282] Add success.
W0321 22:01:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:01:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:01:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:01:13.420125  543705 net.go:648] Add success.
I0321 22:01:13.423201  543705 net.go:770] primary dev: ETH0
I0321 22:01:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:01:13.423226  543705 net.go:698] Add success.
I0321 22:01:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:01:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:01:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 22:01:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:01:14.456703  543705 disk_worker.go:494] system disk:vda1
I0321 22:01:14.456754  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:01:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:01:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:01:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:01:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:01:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:01:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:01:23.409800  543705 memory.go:184] no items to output this cycle
I0321 22:01:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 22:01:31.599637  543705 disk_info.go:125] begin check local disk info of client
I0321 22:01:31.602223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:01:31.602230  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e000 0xc00047e040]
E0321 22:01:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:01:33.409799  543705 memory.go:184] no items to output this cycle
I0321 22:01:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 22:01:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:01:43.409794  543705 memory.go:191] Add success.
I0321 22:01:43.409798  543705 cpu.go:282] Add success.
I0321 22:01:43.419897  543705 net.go:648] Add success.
I0321 22:01:43.422623  543705 net.go:770] primary dev: ETH0
I0321 22:01:43.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:01:43.422650  543705 net.go:698] Add success.
I0321 22:01:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:01:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:01:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:01:53.410351  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:01:53.410366  543705 memory.go:184] no items to output this cycle
I0321 22:01:53.410400  543705 cpu.go:275] no items to output this cycle
E0321 22:02:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:02:03.409807  543705 memory.go:184] no items to output this cycle
I0321 22:02:03.409828  543705 cpu.go:275] no items to output this cycle
E0321 22:02:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:02:13.409784  543705 memory.go:191] Add success.
I0321 22:02:13.409798  543705 cpu.go:282] Add success.
W0321 22:02:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:02:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:02:13.420062  543705 net.go:648] Add success.
I0321 22:02:13.422714  543705 net.go:770] primary dev: ETH0
I0321 22:02:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:02:13.422745  543705 net.go:698] Add success.
W0321 22:02:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:02:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 22:02:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:02:14.455873  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:02:14.455883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:02:14.455888  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:02:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 22:02:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:02:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:02:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:02:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:02:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:02:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:02:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:02:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:02:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:02:23.409782  543705 memory.go:184] no items to output this cycle
I0321 22:02:23.409785  543705 cpu.go:275] no items to output this cycle
I0321 22:02:31.602665  543705 disk_info.go:125] begin check local disk info of client
I0321 22:02:31.605214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:02:31.605221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab540 0xc0001ab580]
E0321 22:02:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:02:33.409792  543705 memory.go:184] no items to output this cycle
I0321 22:02:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 22:02:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:02:43.409780  543705 memory.go:191] Add success.
I0321 22:02:43.409796  543705 cpu.go:282] Add success.
I0321 22:02:43.419831  543705 net.go:648] Add success.
I0321 22:02:43.422686  543705 net.go:770] primary dev: ETH0
I0321 22:02:43.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:02:43.422712  543705 net.go:698] Add success.
I0321 22:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:02:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:02:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:02:53.409775  543705 memory.go:184] no items to output this cycle
I0321 22:02:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 22:03:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:03:03.409769  543705 memory.go:184] no items to output this cycle
I0321 22:03:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 22:03:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:03:13.409812  543705 memory.go:191] Add success.
I0321 22:03:13.409820  543705 cpu.go:282] Add success.
W0321 22:03:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:03:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:03:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:03:13.420187  543705 net.go:648] Add success.
I0321 22:03:13.423084  543705 net.go:770] primary dev: ETH0
I0321 22:03:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:03:13.423108  543705 net.go:698] Add success.
I0321 22:03:13.469123  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e45b2726-1832-4ee5-bd05-dfaf9e49112d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:03:13.469157  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:03:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:03:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:03:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 22:03:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:03:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 22:03:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:03:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:03:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:03:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:03:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:03:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:03:23.410399  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:03:23.410420  543705 memory.go:184] no items to output this cycle
I0321 22:03:23.410423  543705 cpu.go:275] no items to output this cycle
I0321 22:03:31.605676  543705 disk_info.go:125] begin check local disk info of client
I0321 22:03:31.608246  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:03:31.608253  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4000 0xc0004b4040]
E0321 22:03:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:03:33.409775  543705 memory.go:184] no items to output this cycle
I0321 22:03:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 22:03:39.229736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:03:39.229743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:03:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:03:43.410616  543705 memory.go:191] Add success.
I0321 22:03:43.409813  543705 cpu.go:282] Add success.
I0321 22:03:43.420370  543705 net.go:648] Add success.
I0321 22:03:43.423270  543705 net.go:770] primary dev: ETH0
I0321 22:03:43.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:03:43.423296  543705 net.go:698] Add success.
I0321 22:03:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:03:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:03:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:03:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:03:53.409786  543705 memory.go:184] no items to output this cycle
I0321 22:03:53.409811  543705 cpu.go:275] no items to output this cycle
E0321 22:04:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:04:03.409782  543705 memory.go:184] no items to output this cycle
I0321 22:04:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 22:04:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:04:13.409814  543705 memory.go:191] Add success.
I0321 22:04:13.409821  543705 cpu.go:282] Add success.
W0321 22:04:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:04:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:04:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:04:13.420131  543705 net.go:648] Add success.
I0321 22:04:13.422840  543705 net.go:770] primary dev: ETH0
I0321 22:04:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:04:13.422863  543705 net.go:698] Add success.
I0321 22:04:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:04:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:04:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0321 22:04:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:04:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 22:04:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:04:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:04:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:04:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:04:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:04:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:04:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:04:23.409770  543705 memory.go:184] no items to output this cycle
I0321 22:04:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 22:04:31.608679  543705 disk_info.go:125] begin check local disk info of client
I0321 22:04:31.611545  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:04:31.611553  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b87c0 0xc0002b8800]
E0321 22:04:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:04:33.409759  543705 memory.go:184] no items to output this cycle
I0321 22:04:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 22:04:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:04:43.409810  543705 memory.go:191] Add success.
I0321 22:04:43.409817  543705 cpu.go:282] Add success.
I0321 22:04:43.419873  543705 net.go:648] Add success.
I0321 22:04:43.422789  543705 net.go:770] primary dev: ETH0
I0321 22:04:43.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:04:43.422818  543705 net.go:698] Add success.
I0321 22:04:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:04:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:04:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:04:53.409802  543705 memory.go:184] no items to output this cycle
I0321 22:04:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 22:05:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:05:03.409780  543705 memory.go:184] no items to output this cycle
I0321 22:05:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 22:05:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:05:13.409803  543705 memory.go:191] Add success.
I0321 22:05:13.409811  543705 cpu.go:282] Add success.
W0321 22:05:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:05:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:05:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:05:13.420061  543705 net.go:648] Add success.
I0321 22:05:13.422851  543705 net.go:770] primary dev: ETH0
I0321 22:05:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:05:13.422875  543705 net.go:698] Add success.
I0321 22:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:05:14.455088  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:05:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0321 22:05:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:05:14.456475  543705 disk_worker.go:494] system disk:vda1
I0321 22:05:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:05:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:05:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:05:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:05:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:05:23.409798  543705 memory.go:184] no items to output this cycle
I0321 22:05:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 22:05:31.611693  543705 disk_info.go:125] begin check local disk info of client
I0321 22:05:31.614236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:05:31.614242  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a080 0xc00039a0c0]
E0321 22:05:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:05:33.409789  543705 memory.go:184] no items to output this cycle
I0321 22:05:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:05:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:05:43.409798  543705 memory.go:191] Add success.
I0321 22:05:43.409799  543705 cpu.go:282] Add success.
I0321 22:05:43.419965  543705 net.go:648] Add success.
I0321 22:05:43.422694  543705 net.go:770] primary dev: ETH0
I0321 22:05:43.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:05:43.422718  543705 net.go:698] Add success.
I0321 22:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:05:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:05:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:05:53.409808  543705 memory.go:184] no items to output this cycle
I0321 22:05:53.409820  543705 cpu.go:275] no items to output this cycle
E0321 22:06:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:06:03.409769  543705 memory.go:184] no items to output this cycle
I0321 22:06:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 22:06:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:06:13.409811  543705 memory.go:191] Add success.
I0321 22:06:13.409820  543705 cpu.go:282] Add success.
W0321 22:06:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:06:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:06:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:06:13.420053  543705 net.go:648] Add success.
I0321 22:06:13.422627  543705 net.go:770] primary dev: ETH0
I0321 22:06:13.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:06:13.422654  543705 net.go:698] Add success.
I0321 22:06:13.469084  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a1f76348-04a6-4838-8044-a6200501f5eb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:06:13.469117  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:06:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:06:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:06:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 22:06:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:06:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 22:06:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:06:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:06:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:06:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:06:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:06:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:06:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:06:23.409782  543705 memory.go:184] no items to output this cycle
I0321 22:06:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 22:06:31.614721  543705 disk_info.go:125] begin check local disk info of client
I0321 22:06:31.617278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:06:31.617285  543705 disk_info.go:196] parse disk info done, disk is : [0xc000280700 0xc000280740]
E0321 22:06:33.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:06:33.409882  543705 memory.go:184] no items to output this cycle
I0321 22:06:33.409949  543705 cpu.go:275] no items to output this cycle
I0321 22:06:39.233732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:06:39.233738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:06:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:06:43.410808  543705 memory.go:191] Add success.
I0321 22:06:43.409802  543705 cpu.go:282] Add success.
I0321 22:06:43.420496  543705 net.go:648] Add success.
I0321 22:06:43.423526  543705 net.go:770] primary dev: ETH0
I0321 22:06:43.423538  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:06:43.423563  543705 net.go:698] Add success.
I0321 22:06:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:06:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:06:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:06:53.409811  543705 memory.go:184] no items to output this cycle
I0321 22:06:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 22:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:07:03.409775  543705 memory.go:184] no items to output this cycle
I0321 22:07:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 22:07:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:07:13.409787  543705 memory.go:191] Add success.
I0321 22:07:13.409808  543705 cpu.go:282] Add success.
W0321 22:07:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:07:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:07:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:07:13.420121  543705 net.go:648] Add success.
I0321 22:07:13.422909  543705 net.go:770] primary dev: ETH0
I0321 22:07:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:07:13.422933  543705 net.go:698] Add success.
I0321 22:07:13.453480  543705 event_worker.go:152] Polling the log file for events...
W0321 22:07:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:07:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0321 22:07:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:07:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:07:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:07:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:07:14.456531  543705 disk_worker.go:494] system disk:vda1
I0321 22:07:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:07:15.456893  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:07:15.456901  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:07:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:07:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:07:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:07:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:07:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:07:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:07:23.409798  543705 memory.go:184] no items to output this cycle
I0321 22:07:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 22:07:31.617676  543705 disk_info.go:125] begin check local disk info of client
I0321 22:07:31.620223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:07:31.620230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3440 0xc0003f3480]
E0321 22:07:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:07:33.409798  543705 memory.go:184] no items to output this cycle
I0321 22:07:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 22:07:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:07:43.409810  543705 memory.go:191] Add success.
I0321 22:07:43.409809  543705 cpu.go:282] Add success.
I0321 22:07:43.419946  543705 net.go:648] Add success.
I0321 22:07:43.422641  543705 net.go:770] primary dev: ETH0
I0321 22:07:43.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:07:43.422677  543705 net.go:698] Add success.
I0321 22:07:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:07:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:07:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:07:53.409814  543705 memory.go:184] no items to output this cycle
I0321 22:07:53.409824  543705 cpu.go:275] no items to output this cycle
E0321 22:08:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:08:03.409809  543705 memory.go:184] no items to output this cycle
I0321 22:08:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 22:08:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:08:13.409826  543705 memory.go:191] Add success.
I0321 22:08:13.409840  543705 cpu.go:282] Add success.
W0321 22:08:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:08:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:08:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:08:13.420109  543705 net.go:648] Add success.
I0321 22:08:13.423147  543705 net.go:770] primary dev: ETH0
I0321 22:08:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:08:13.423170  543705 net.go:698] Add success.
I0321 22:08:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:08:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:08:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 22:08:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:08:14.456535  543705 disk_worker.go:494] system disk:vda1
I0321 22:08:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:08:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:08:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:08:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:08:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:08:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:08:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:08:23.409808  543705 memory.go:184] no items to output this cycle
I0321 22:08:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 22:08:31.620755  543705 disk_info.go:125] begin check local disk info of client
I0321 22:08:31.623359  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:08:31.623366  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9140 0xc0003c9180]
E0321 22:08:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:08:33.409780  543705 memory.go:184] no items to output this cycle
I0321 22:08:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 22:08:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:08:43.409906  543705 memory.go:191] Add success.
I0321 22:08:43.410038  543705 cpu.go:282] Add success.
I0321 22:08:43.419743  543705 net.go:648] Add success.
I0321 22:08:43.422439  543705 net.go:770] primary dev: ETH0
I0321 22:08:43.422453  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:08:43.422466  543705 net.go:698] Add success.
I0321 22:08:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:08:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:08:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:08:53.409795  543705 memory.go:184] no items to output this cycle
I0321 22:08:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 22:09:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:09:03.409777  543705 memory.go:184] no items to output this cycle
I0321 22:09:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:09:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:09:13.409822  543705 memory.go:191] Add success.
I0321 22:09:13.409833  543705 cpu.go:282] Add success.
W0321 22:09:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:09:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:09:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:09:13.420069  543705 net.go:648] Add success.
I0321 22:09:13.422559  543705 net.go:770] primary dev: ETH0
I0321 22:09:13.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:09:13.422583  543705 net.go:698] Add success.
I0321 22:09:13.463915  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f1b3d7ae-c110-47ca-aa27-853e171df814","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:09:13.463947  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:09:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:09:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 22:09:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:09:14.456491  543705 disk_worker.go:494] system disk:vda1
I0321 22:09:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:09:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:09:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:09:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:09:23.409812  543705 memory.go:184] no items to output this cycle
I0321 22:09:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 22:09:31.623755  543705 disk_info.go:125] begin check local disk info of client
I0321 22:09:31.626375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:09:31.626382  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc200 0xc0002bc240]
E0321 22:09:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:09:33.409761  543705 memory.go:184] no items to output this cycle
I0321 22:09:33.409809  543705 cpu.go:275] no items to output this cycle
I0321 22:09:39.237744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:09:39.237750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:09:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:09:43.410662  543705 memory.go:191] Add success.
I0321 22:09:43.409800  543705 cpu.go:282] Add success.
I0321 22:09:43.420353  543705 net.go:648] Add success.
I0321 22:09:43.422945  543705 net.go:770] primary dev: ETH0
I0321 22:09:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:09:43.422970  543705 net.go:698] Add success.
I0321 22:09:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:09:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:09:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:09:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:09:53.409779  543705 cpu.go:275] no items to output this cycle
I0321 22:09:53.409785  543705 memory.go:184] no items to output this cycle
E0321 22:10:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:10:03.409800  543705 memory.go:184] no items to output this cycle
I0321 22:10:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 22:10:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:10:13.409783  543705 memory.go:191] Add success.
W0321 22:10:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 22:10:13.409808  543705 cpu.go:282] Add success.
W0321 22:10:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:10:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:10:13.420150  543705 net.go:648] Add success.
I0321 22:10:13.422898  543705 net.go:770] primary dev: ETH0
I0321 22:10:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:10:13.422933  543705 net.go:698] Add success.
I0321 22:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:10:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:10:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 22:10:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:10:14.456577  543705 disk_worker.go:494] system disk:vda1
I0321 22:10:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:10:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:10:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:10:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:10:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:10:23.409796  543705 memory.go:184] no items to output this cycle
I0321 22:10:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 22:10:31.626766  543705 disk_info.go:125] begin check local disk info of client
I0321 22:10:31.629393  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:10:31.629400  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f040 0xc00029f080]
E0321 22:10:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:10:33.409768  543705 memory.go:184] no items to output this cycle
I0321 22:10:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 22:10:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:10:43.409901  543705 memory.go:191] Add success.
I0321 22:10:43.409939  543705 cpu.go:282] Add success.
I0321 22:10:43.419715  543705 net.go:648] Add success.
I0321 22:10:43.422815  543705 net.go:770] primary dev: ETH0
I0321 22:10:43.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:10:43.422840  543705 net.go:698] Add success.
I0321 22:10:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:10:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:10:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:10:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:10:53.409768  543705 memory.go:184] no items to output this cycle
I0321 22:10:53.409800  543705 cpu.go:275] no items to output this cycle
E0321 22:11:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:11:03.409805  543705 memory.go:184] no items to output this cycle
I0321 22:11:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 22:11:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:11:13.409782  543705 memory.go:191] Add success.
I0321 22:11:13.409804  543705 cpu.go:282] Add success.
W0321 22:11:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:11:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:11:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:11:13.420158  543705 net.go:648] Add success.
I0321 22:11:13.423109  543705 net.go:770] primary dev: ETH0
I0321 22:11:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:11:13.423138  543705 net.go:698] Add success.
I0321 22:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:11:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:11:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 22:11:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:11:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 22:11:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:11:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:11:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:11:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:11:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:11:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:11:23.409781  543705 memory.go:184] no items to output this cycle
I0321 22:11:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 22:11:31.629676  543705 disk_info.go:125] begin check local disk info of client
I0321 22:11:31.632375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:11:31.632381  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aa240 0xc0003aa280]
E0321 22:11:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:11:33.409767  543705 memory.go:184] no items to output this cycle
I0321 22:11:33.409781  543705 cpu.go:275] no items to output this cycle
E0321 22:11:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:11:43.409817  543705 memory.go:191] Add success.
I0321 22:11:43.409829  543705 cpu.go:282] Add success.
I0321 22:11:43.420084  543705 net.go:648] Add success.
I0321 22:11:43.422837  543705 net.go:770] primary dev: ETH0
I0321 22:11:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:11:43.422863  543705 net.go:698] Add success.
I0321 22:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:11:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:11:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:11:53.409778  543705 cpu.go:275] no items to output this cycle
I0321 22:11:53.409781  543705 memory.go:184] no items to output this cycle
E0321 22:12:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:12:03.409782  543705 memory.go:184] no items to output this cycle
I0321 22:12:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 22:12:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:12:13.409790  543705 memory.go:191] Add success.
I0321 22:12:13.409794  543705 cpu.go:282] Add success.
W0321 22:12:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:12:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:12:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:12:13.420068  543705 net.go:648] Add success.
I0321 22:12:13.422757  543705 net.go:770] primary dev: ETH0
I0321 22:12:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:12:13.422786  543705 net.go:698] Add success.
I0321 22:12:14.330285  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c3ed76cb-8562-424d-91db-07f85dc1a8e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:12:14.330321  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 22:12:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:12:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 22:12:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:12:14.457011  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:12:14.457020  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:12:14.457025  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:12:14.457040  543705 disk_worker.go:494] system disk:vda1
I0321 22:12:14.457079  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:12:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:12:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:12:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:12:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:12:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:12:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:12:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:12:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:12:23.409776  543705 memory.go:184] no items to output this cycle
I0321 22:12:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 22:12:31.632807  543705 disk_info.go:125] begin check local disk info of client
I0321 22:12:31.635379  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:12:31.635386  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fb1c0 0xc0001fb200]
E0321 22:12:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:12:33.409796  543705 memory.go:184] no items to output this cycle
I0321 22:12:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 22:12:39.241734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:12:39.241741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:12:43.409951  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:12:43.410687  543705 memory.go:191] Add success.
I0321 22:12:43.410015  543705 cpu.go:282] Add success.
I0321 22:12:43.419735  543705 net.go:648] Add success.
I0321 22:12:43.422534  543705 net.go:770] primary dev: ETH0
I0321 22:12:43.422549  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:12:43.422564  543705 net.go:698] Add success.
I0321 22:12:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:12:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:12:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:12:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:12:53.409780  543705 memory.go:184] no items to output this cycle
I0321 22:12:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 22:13:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:13:03.409798  543705 memory.go:184] no items to output this cycle
I0321 22:13:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 22:13:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:13:13.409814  543705 memory.go:191] Add success.
I0321 22:13:13.409827  543705 cpu.go:282] Add success.
W0321 22:13:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:13:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:13:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:13:13.420105  543705 net.go:648] Add success.
I0321 22:13:13.422669  543705 net.go:770] primary dev: ETH0
I0321 22:13:13.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:13:13.422699  543705 net.go:698] Add success.
I0321 22:13:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:13:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:13:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 22:13:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:13:14.456613  543705 disk_worker.go:494] system disk:vda1
I0321 22:13:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:13:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:13:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:13:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:13:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:13:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:13:23.409791  543705 memory.go:184] no items to output this cycle
I0321 22:13:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 22:13:31.635813  543705 disk_info.go:125] begin check local disk info of client
I0321 22:13:31.638452  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:13:31.638460  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa6c0 0xc0001fa700]
E0321 22:13:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:13:33.409779  543705 memory.go:184] no items to output this cycle
I0321 22:13:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 22:13:43.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:13:43.409926  543705 memory.go:191] Add success.
I0321 22:13:43.409931  543705 cpu.go:282] Add success.
I0321 22:13:43.419721  543705 net.go:648] Add success.
I0321 22:13:43.422370  543705 net.go:770] primary dev: ETH0
I0321 22:13:43.422384  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:13:43.422396  543705 net.go:698] Add success.
I0321 22:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:13:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:13:53.409803  543705 memory.go:184] no items to output this cycle
I0321 22:13:53.409813  543705 cpu.go:275] no items to output this cycle
E0321 22:14:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:14:03.409780  543705 memory.go:184] no items to output this cycle
I0321 22:14:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:14:13.409795  543705 cpu.go:282] Add success.
I0321 22:14:13.409801  543705 memory.go:191] Add success.
W0321 22:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:14:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:14:13.420267  543705 net.go:648] Add success.
I0321 22:14:13.422966  543705 net.go:770] primary dev: ETH0
I0321 22:14:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:14:13.422990  543705 net.go:698] Add success.
I0321 22:14:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:14:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:14:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0321 22:14:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:14:14.456572  543705 disk_worker.go:494] system disk:vda1
I0321 22:14:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:14:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:14:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:14:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:14:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:14:23.409805  543705 memory.go:184] no items to output this cycle
I0321 22:14:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 22:14:31.638831  543705 disk_info.go:125] begin check local disk info of client
I0321 22:14:31.641402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:14:31.641408  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa480 0xc0001fa4c0]
E0321 22:14:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:14:33.409789  543705 memory.go:184] no items to output this cycle
I0321 22:14:33.409804  543705 cpu.go:275] no items to output this cycle
E0321 22:14:43.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:14:43.409880  543705 cpu.go:282] Add success.
I0321 22:14:43.409885  543705 memory.go:191] Add success.
I0321 22:14:43.419738  543705 net.go:648] Add success.
I0321 22:14:43.422579  543705 net.go:770] primary dev: ETH0
I0321 22:14:43.422594  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:14:43.422607  543705 net.go:698] Add success.
I0321 22:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:14:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:14:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:14:53.409805  543705 memory.go:184] no items to output this cycle
I0321 22:14:53.409816  543705 cpu.go:275] no items to output this cycle
E0321 22:15:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:15:03.409776  543705 cpu.go:275] no items to output this cycle
I0321 22:15:03.409785  543705 memory.go:184] no items to output this cycle
E0321 22:15:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:15:13.409802  543705 memory.go:191] Add success.
I0321 22:15:13.409812  543705 cpu.go:282] Add success.
W0321 22:15:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:15:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:15:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:15:13.420117  543705 net.go:648] Add success.
I0321 22:15:13.423166  543705 net.go:770] primary dev: ETH0
I0321 22:15:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:15:13.423195  543705 net.go:698] Add success.
I0321 22:15:13.468421  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6362d02e-4ad4-4367-9e40-5a4bd7c86080","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:15:13.468455  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:15:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:15:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 22:15:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:15:14.456702  543705 disk_worker.go:494] system disk:vda1
I0321 22:15:14.456737  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:15:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:15:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:15:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:15:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:15:23.409788  543705 memory.go:184] no items to output this cycle
I0321 22:15:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 22:15:31.641674  543705 disk_info.go:125] begin check local disk info of client
I0321 22:15:31.644315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:15:31.644321  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fff00 0xc0003fff40]
E0321 22:15:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:15:33.409775  543705 memory.go:184] no items to output this cycle
I0321 22:15:33.409782  543705 cpu.go:275] no items to output this cycle
I0321 22:15:39.245736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:15:39.245743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:15:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:15:43.410650  543705 memory.go:191] Add success.
I0321 22:15:43.409808  543705 cpu.go:282] Add success.
I0321 22:15:43.420796  543705 net.go:648] Add success.
I0321 22:15:43.423659  543705 net.go:770] primary dev: ETH0
I0321 22:15:43.423672  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:15:43.423684  543705 net.go:698] Add success.
I0321 22:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:15:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:15:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:15:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:15:53.409783  543705 cpu.go:275] no items to output this cycle
I0321 22:15:53.409786  543705 memory.go:184] no items to output this cycle
E0321 22:16:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:16:03.409773  543705 memory.go:184] no items to output this cycle
I0321 22:16:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 22:16:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:16:13.409810  543705 memory.go:191] Add success.
I0321 22:16:13.409821  543705 cpu.go:282] Add success.
W0321 22:16:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:16:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:16:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:16:13.420045  543705 net.go:648] Add success.
I0321 22:16:13.422543  543705 net.go:770] primary dev: ETH0
I0321 22:16:13.422555  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:16:13.422567  543705 net.go:698] Add success.
I0321 22:16:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:16:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:16:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0321 22:16:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:16:14.456519  543705 disk_worker.go:494] system disk:vda1
I0321 22:16:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:16:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:16:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:16:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:16:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:16:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:16:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 22:16:23.409788  543705 memory.go:184] no items to output this cycle
I0321 22:16:31.644863  543705 disk_info.go:125] begin check local disk info of client
I0321 22:16:31.647472  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:16:31.647478  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d400 0xc00035d440]
E0321 22:16:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:16:33.409779  543705 memory.go:184] no items to output this cycle
I0321 22:16:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:16:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:16:43.409829  543705 memory.go:191] Add success.
I0321 22:16:43.409838  543705 cpu.go:282] Add success.
I0321 22:16:43.420072  543705 net.go:648] Add success.
I0321 22:16:43.423143  543705 net.go:770] primary dev: ETH0
I0321 22:16:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:16:43.423172  543705 net.go:698] Add success.
I0321 22:16:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:16:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:16:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:16:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:16:53.409775  543705 memory.go:184] no items to output this cycle
I0321 22:16:53.409809  543705 cpu.go:275] no items to output this cycle
E0321 22:17:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:17:03.409796  543705 memory.go:184] no items to output this cycle
I0321 22:17:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 22:17:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:17:13.409783  543705 memory.go:191] Add success.
I0321 22:17:13.409808  543705 cpu.go:282] Add success.
W0321 22:17:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:17:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:17:13.420054  543705 net.go:648] Add success.
I0321 22:17:13.422553  543705 net.go:770] primary dev: ETH0
I0321 22:17:13.422567  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:17:13.422580  543705 net.go:698] Add success.
I0321 22:17:13.453118  543705 event_worker.go:152] Polling the log file for events...
W0321 22:17:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:17:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 22:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:17:14.455953  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:17:14.455962  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:17:14.455969  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:17:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 22:17:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:17:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:17:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:17:16.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:17:16.458009  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:17:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:17:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:17:16.472498  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:17:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:17:23.409797  543705 memory.go:184] no items to output this cycle
I0321 22:17:23.409828  543705 cpu.go:275] no items to output this cycle
I0321 22:17:31.647873  543705 disk_info.go:125] begin check local disk info of client
I0321 22:17:31.650437  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:17:31.650444  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1e40 0xc0003c1e80]
E0321 22:17:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:17:33.409793  543705 memory.go:184] no items to output this cycle
I0321 22:17:33.409795  543705 cpu.go:275] no items to output this cycle
E0321 22:17:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:17:43.409833  543705 memory.go:191] Add success.
I0321 22:17:43.409842  543705 cpu.go:282] Add success.
I0321 22:17:43.420015  543705 net.go:648] Add success.
I0321 22:17:43.423186  543705 net.go:770] primary dev: ETH0
I0321 22:17:43.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:17:43.423214  543705 net.go:698] Add success.
I0321 22:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:17:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:17:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:17:53.409804  543705 memory.go:184] no items to output this cycle
I0321 22:17:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 22:18:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:18:03.409777  543705 memory.go:184] no items to output this cycle
I0321 22:18:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 22:18:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:18:13.409789  543705 cpu.go:282] Add success.
I0321 22:18:13.409795  543705 memory.go:191] Add success.
W0321 22:18:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:18:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:18:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:18:13.420044  543705 net.go:648] Add success.
I0321 22:18:13.422638  543705 net.go:770] primary dev: ETH0
I0321 22:18:13.422651  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:18:13.422664  543705 net.go:698] Add success.
I0321 22:18:13.853830  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2fa1bfe-bd3b-454b-8537-692c986c74f0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:18:13.853864  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:18:14.454722  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:18:14.454851  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:18:14.454914  543705 disk_worker.go:708] disk space is not compliant
W0321 22:18:14.454917  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:18:14.456236  543705 disk_worker.go:494] system disk:vda1
I0321 22:18:14.456283  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:18:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:18:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:18:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:18:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:18:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:18:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:18:23.409770  543705 memory.go:184] no items to output this cycle
I0321 22:18:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 22:18:31.650899  543705 disk_info.go:125] begin check local disk info of client
I0321 22:18:31.653458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:18:31.653466  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396c40 0xc000396c80]
E0321 22:18:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:18:33.409903  543705 memory.go:184] no items to output this cycle
I0321 22:18:33.409925  543705 cpu.go:275] no items to output this cycle
I0321 22:18:39.249741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:18:39.249748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:18:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:18:43.410633  543705 memory.go:191] Add success.
I0321 22:18:43.409806  543705 cpu.go:282] Add success.
I0321 22:18:43.420352  543705 net.go:648] Add success.
I0321 22:18:43.423305  543705 net.go:770] primary dev: ETH0
I0321 22:18:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:18:43.423332  543705 net.go:698] Add success.
I0321 22:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:18:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:18:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:18:53.409781  543705 memory.go:184] no items to output this cycle
I0321 22:18:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 22:19:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:19:03.409779  543705 memory.go:184] no items to output this cycle
I0321 22:19:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 22:19:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:19:13.409804  543705 memory.go:191] Add success.
I0321 22:19:13.409804  543705 cpu.go:282] Add success.
W0321 22:19:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:19:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:19:13.420118  543705 net.go:648] Add success.
I0321 22:19:13.422770  543705 net.go:770] primary dev: ETH0
I0321 22:19:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:19:13.422795  543705 net.go:698] Add success.
I0321 22:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:19:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:19:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0321 22:19:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:19:14.456623  543705 disk_worker.go:494] system disk:vda1
I0321 22:19:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:19:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:19:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:19:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:19:23.409799  543705 memory.go:184] no items to output this cycle
I0321 22:19:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 22:19:31.653679  543705 disk_info.go:125] begin check local disk info of client
I0321 22:19:31.656333  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:19:31.656341  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ca000 0xc0000ca040]
E0321 22:19:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:19:33.409785  543705 memory.go:184] no items to output this cycle
I0321 22:19:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 22:19:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:19:43.409813  543705 cpu.go:282] Add success.
I0321 22:19:43.409821  543705 memory.go:191] Add success.
I0321 22:19:43.420066  543705 net.go:648] Add success.
I0321 22:19:43.423597  543705 net.go:770] primary dev: ETH0
I0321 22:19:43.423611  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:19:43.423623  543705 net.go:698] Add success.
I0321 22:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:19:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:19:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:19:53.409771  543705 memory.go:184] no items to output this cycle
I0321 22:19:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 22:20:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:20:03.409775  543705 memory.go:184] no items to output this cycle
I0321 22:20:03.409783  543705 cpu.go:275] no items to output this cycle
E0321 22:20:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:20:13.409801  543705 memory.go:191] Add success.
I0321 22:20:13.409810  543705 cpu.go:282] Add success.
W0321 22:20:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:20:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:20:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:20:13.420118  543705 net.go:648] Add success.
I0321 22:20:13.422911  543705 net.go:770] primary dev: ETH0
I0321 22:20:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:20:13.422935  543705 net.go:698] Add success.
I0321 22:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:20:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:20:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 22:20:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:20:14.456558  543705 disk_worker.go:494] system disk:vda1
I0321 22:20:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:20:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:20:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:20:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:20:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:20:23.409776  543705 memory.go:184] no items to output this cycle
I0321 22:20:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 22:20:31.656912  543705 disk_info.go:125] begin check local disk info of client
I0321 22:20:31.659472  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:20:31.659479  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf180 0xc0002bf1c0]
E0321 22:20:33.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:20:33.409902  543705 memory.go:184] no items to output this cycle
I0321 22:20:33.409958  543705 cpu.go:275] no items to output this cycle
E0321 22:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:20:43.409786  543705 memory.go:191] Add success.
I0321 22:20:43.409811  543705 cpu.go:282] Add success.
I0321 22:20:43.419965  543705 net.go:648] Add success.
I0321 22:20:43.423222  543705 net.go:770] primary dev: ETH0
I0321 22:20:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:20:43.423247  543705 net.go:698] Add success.
I0321 22:20:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:20:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:20:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:20:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:20:53.409798  543705 memory.go:184] no items to output this cycle
I0321 22:20:53.409803  543705 cpu.go:275] no items to output this cycle
E0321 22:21:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:21:03.409795  543705 memory.go:184] no items to output this cycle
I0321 22:21:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 22:21:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:21:13.409782  543705 memory.go:191] Add success.
I0321 22:21:13.409802  543705 cpu.go:282] Add success.
W0321 22:21:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:21:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:21:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:21:13.420067  543705 net.go:648] Add success.
I0321 22:21:13.423119  543705 net.go:770] primary dev: ETH0
I0321 22:21:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:21:13.423151  543705 net.go:698] Add success.
I0321 22:21:13.542214  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fac49ca-5cd6-462f-a042-dd427b28c12a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:21:13.542246  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:21:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:21:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:21:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 22:21:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:21:14.456684  543705 disk_worker.go:494] system disk:vda1
I0321 22:21:14.456724  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:21:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:21:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:21:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:21:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:21:23.410531  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:21:23.410556  543705 memory.go:184] no items to output this cycle
I0321 22:21:23.410671  543705 cpu.go:275] no items to output this cycle
I0321 22:21:31.659937  543705 disk_info.go:125] begin check local disk info of client
I0321 22:21:31.662511  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:21:31.662518  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e000 0xc00028e040]
E0321 22:21:33.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:21:33.409757  543705 memory.go:184] no items to output this cycle
I0321 22:21:33.409797  543705 cpu.go:275] no items to output this cycle
I0321 22:21:39.253736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:21:39.253744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:21:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:21:43.410689  543705 memory.go:191] Add success.
I0321 22:21:43.409789  543705 cpu.go:282] Add success.
I0321 22:21:43.420429  543705 net.go:648] Add success.
I0321 22:21:43.423050  543705 net.go:770] primary dev: ETH0
I0321 22:21:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:21:43.423076  543705 net.go:698] Add success.
I0321 22:21:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:21:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:21:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:21:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:21:53.409768  543705 memory.go:184] no items to output this cycle
I0321 22:21:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 22:22:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:22:03.409762  543705 memory.go:184] no items to output this cycle
I0321 22:22:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 22:22:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:22:13.409806  543705 memory.go:191] Add success.
I0321 22:22:13.409819  543705 cpu.go:282] Add success.
W0321 22:22:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:22:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:22:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:22:13.420205  543705 net.go:648] Add success.
I0321 22:22:13.423429  543705 net.go:770] primary dev: ETH0
I0321 22:22:13.423444  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:22:13.423456  543705 net.go:698] Add success.
W0321 22:22:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:22:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 22:22:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:22:14.456904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:22:14.456914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:22:14.456920  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:22:14.456967  543705 disk_worker.go:494] system disk:vda1
I0321 22:22:14.457009  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:22:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:22:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:22:16.458082  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:22:16.458102  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:22:16.458142  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:22:16.458160  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:22:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:22:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:22:23.409811  543705 memory.go:184] no items to output this cycle
I0321 22:22:23.409821  543705 cpu.go:275] no items to output this cycle
I0321 22:22:31.662970  543705 disk_info.go:125] begin check local disk info of client
I0321 22:22:31.665539  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:22:31.665546  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe000 0xc0003fe040]
E0321 22:22:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:22:33.409782  543705 memory.go:184] no items to output this cycle
I0321 22:22:33.409803  543705 cpu.go:275] no items to output this cycle
E0321 22:22:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:22:43.409793  543705 cpu.go:282] Add success.
I0321 22:22:43.409797  543705 memory.go:191] Add success.
I0321 22:22:43.420036  543705 net.go:648] Add success.
I0321 22:22:43.422869  543705 net.go:770] primary dev: ETH0
I0321 22:22:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:22:43.422895  543705 net.go:698] Add success.
I0321 22:22:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:22:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:22:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:22:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:22:53.409793  543705 memory.go:184] no items to output this cycle
I0321 22:22:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:23:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:23:03.409776  543705 memory.go:184] no items to output this cycle
I0321 22:23:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 22:23:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:23:13.409773  543705 memory.go:191] Add success.
W0321 22:23:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 22:23:13.409802  543705 cpu.go:282] Add success.
W0321 22:23:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:23:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:23:13.420046  543705 net.go:648] Add success.
I0321 22:23:13.422991  543705 net.go:770] primary dev: ETH0
I0321 22:23:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:23:13.423016  543705 net.go:698] Add success.
I0321 22:23:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:23:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:23:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 22:23:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:23:14.456529  543705 disk_worker.go:494] system disk:vda1
I0321 22:23:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:23:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:23:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:23:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:23:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:23:23.409821  543705 memory.go:184] no items to output this cycle
I0321 22:23:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 22:23:31.665670  543705 disk_info.go:125] begin check local disk info of client
I0321 22:23:31.668280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:23:31.668287  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330b00 0xc000330b40]
E0321 22:23:33.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:23:33.409978  543705 memory.go:184] no items to output this cycle
I0321 22:23:33.410099  543705 cpu.go:275] no items to output this cycle
E0321 22:23:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:23:43.409781  543705 memory.go:191] Add success.
I0321 22:23:43.409811  543705 cpu.go:282] Add success.
I0321 22:23:43.419857  543705 net.go:648] Add success.
I0321 22:23:43.422614  543705 net.go:770] primary dev: ETH0
I0321 22:23:43.422628  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:23:43.422643  543705 net.go:698] Add success.
I0321 22:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:23:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:23:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:23:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:23:53.409765  543705 memory.go:184] no items to output this cycle
I0321 22:23:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 22:24:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:24:03.409771  543705 memory.go:184] no items to output this cycle
I0321 22:24:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 22:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:24:13.409810  543705 memory.go:191] Add success.
I0321 22:24:13.409815  543705 cpu.go:282] Add success.
W0321 22:24:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:24:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:24:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:24:13.420061  543705 net.go:648] Add success.
I0321 22:24:13.422744  543705 net.go:770] primary dev: ETH0
I0321 22:24:13.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:24:13.422772  543705 net.go:698] Add success.
I0321 22:24:13.594256  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"23e95ecc-4f38-4d9f-af30-15146c740aa7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:24:13.594310  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:24:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:24:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:24:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0321 22:24:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:24:14.456771  543705 disk_worker.go:494] system disk:vda1
I0321 22:24:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:24:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:24:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:24:23.409806  543705 memory.go:184] no items to output this cycle
I0321 22:24:23.409814  543705 cpu.go:275] no items to output this cycle
I0321 22:24:31.668372  543705 disk_info.go:125] begin check local disk info of client
I0321 22:24:31.670991  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:24:31.670999  543705 disk_info.go:196] parse disk info done, disk is : [0xc000245900 0xc000245940]
E0321 22:24:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:24:33.409775  543705 memory.go:184] no items to output this cycle
I0321 22:24:33.409885  543705 cpu.go:275] no items to output this cycle
I0321 22:24:39.257742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:24:39.257749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:24:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:24:43.410555  543705 memory.go:191] Add success.
I0321 22:24:43.409800  543705 cpu.go:282] Add success.
I0321 22:24:43.420324  543705 net.go:648] Add success.
I0321 22:24:43.422870  543705 net.go:770] primary dev: ETH0
I0321 22:24:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:24:43.422899  543705 net.go:698] Add success.
I0321 22:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:24:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:24:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:24:53.409767  543705 memory.go:184] no items to output this cycle
I0321 22:24:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 22:25:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:25:03.409775  543705 memory.go:184] no items to output this cycle
I0321 22:25:03.409776  543705 cpu.go:275] no items to output this cycle
E0321 22:25:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:25:13.409793  543705 memory.go:191] Add success.
I0321 22:25:13.409793  543705 cpu.go:282] Add success.
W0321 22:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:25:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:25:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:25:13.420103  543705 net.go:648] Add success.
I0321 22:25:13.422754  543705 net.go:770] primary dev: ETH0
I0321 22:25:13.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:25:13.422783  543705 net.go:698] Add success.
I0321 22:25:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:25:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:25:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 22:25:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:25:14.456570  543705 disk_worker.go:494] system disk:vda1
I0321 22:25:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:25:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:25:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:25:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:25:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:25:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:25:23.409799  543705 memory.go:184] no items to output this cycle
I0321 22:25:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 22:25:31.671998  543705 disk_info.go:125] begin check local disk info of client
I0321 22:25:31.674512  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:25:31.674520  543705 disk_info.go:196] parse disk info done, disk is : [0xc000274b00 0xc000274b40]
E0321 22:25:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:25:33.409762  543705 memory.go:184] no items to output this cycle
I0321 22:25:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 22:25:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:25:43.409801  543705 memory.go:191] Add success.
I0321 22:25:43.409803  543705 cpu.go:282] Add success.
I0321 22:25:43.420045  543705 net.go:648] Add success.
I0321 22:25:43.423147  543705 net.go:770] primary dev: ETH0
I0321 22:25:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:25:43.423176  543705 net.go:698] Add success.
I0321 22:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:25:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:25:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:25:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:25:53.409803  543705 memory.go:184] no items to output this cycle
I0321 22:25:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 22:26:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:26:03.409768  543705 memory.go:184] no items to output this cycle
I0321 22:26:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 22:26:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:26:13.409824  543705 memory.go:191] Add success.
I0321 22:26:13.409839  543705 cpu.go:282] Add success.
W0321 22:26:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:26:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:26:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:26:13.420201  543705 net.go:648] Add success.
I0321 22:26:13.423212  543705 net.go:770] primary dev: ETH0
I0321 22:26:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:26:13.423237  543705 net.go:698] Add success.
I0321 22:26:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:26:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:26:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 22:26:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:26:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 22:26:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:26:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:26:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:26:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:26:16.472537  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:26:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:26:23.409779  543705 memory.go:184] no items to output this cycle
I0321 22:26:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 22:26:31.675016  543705 disk_info.go:125] begin check local disk info of client
I0321 22:26:31.677486  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:26:31.677492  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abe00 0xc0001abe40]
E0321 22:26:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:26:33.409760  543705 memory.go:184] no items to output this cycle
I0321 22:26:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:26:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:26:43.409990  543705 cpu.go:282] Add success.
I0321 22:26:43.410035  543705 memory.go:191] Add success.
I0321 22:26:43.419723  543705 net.go:648] Add success.
I0321 22:26:43.422229  543705 net.go:770] primary dev: ETH0
I0321 22:26:43.422242  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:26:43.422254  543705 net.go:698] Add success.
I0321 22:26:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:26:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:26:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:26:53.409800  543705 memory.go:184] no items to output this cycle
I0321 22:26:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 22:27:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:27:03.409806  543705 memory.go:184] no items to output this cycle
I0321 22:27:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 22:27:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:27:13.409792  543705 memory.go:191] Add success.
I0321 22:27:13.409797  543705 cpu.go:282] Add success.
W0321 22:27:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:27:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:27:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:27:13.420045  543705 net.go:648] Add success.
I0321 22:27:13.422736  543705 net.go:770] primary dev: ETH0
I0321 22:27:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:27:13.422766  543705 net.go:698] Add success.
I0321 22:27:13.428859  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 22:27:13.453036  543705 event_worker.go:152] Polling the log file for events...
I0321 22:27:13.463347  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af21d928-5bc4-4b05-ac1c-3b94fc654097","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:27:13.463390  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 22:27:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:27:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0321 22:27:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:27:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:27:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:27:14.456938  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:27:14.456971  543705 disk_worker.go:494] system disk:vda1
I0321 22:27:14.456998  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:27:15.456781  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:27:15.456789  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:27:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:27:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:27:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:27:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:27:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:27:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:27:23.409823  543705 memory.go:184] no items to output this cycle
I0321 22:27:23.409837  543705 cpu.go:275] no items to output this cycle
I0321 22:27:31.677687  543705 disk_info.go:125] begin check local disk info of client
I0321 22:27:31.680201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:27:31.680207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f4440 0xc0001f4480]
E0321 22:27:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:27:33.409803  543705 memory.go:184] no items to output this cycle
I0321 22:27:33.409818  543705 cpu.go:275] no items to output this cycle
I0321 22:27:39.261744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:27:39.261750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:27:43.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:27:43.410810  543705 memory.go:191] Add success.
I0321 22:27:43.409965  543705 cpu.go:282] Add success.
I0321 22:27:43.419729  543705 net.go:648] Add success.
I0321 22:27:43.422219  543705 net.go:770] primary dev: ETH0
I0321 22:27:43.422232  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:27:43.422244  543705 net.go:698] Add success.
I0321 22:27:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:27:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:27:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:27:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:27:53.409808  543705 memory.go:184] no items to output this cycle
I0321 22:27:53.409815  543705 cpu.go:275] no items to output this cycle
E0321 22:28:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:28:03.409792  543705 cpu.go:275] no items to output this cycle
I0321 22:28:03.409796  543705 memory.go:184] no items to output this cycle
E0321 22:28:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:28:13.409794  543705 memory.go:191] Add success.
I0321 22:28:13.409798  543705 cpu.go:282] Add success.
W0321 22:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:28:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:28:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:28:13.420040  543705 net.go:648] Add success.
I0321 22:28:13.423791  543705 net.go:770] primary dev: ETH0
I0321 22:28:13.423805  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:28:13.423817  543705 net.go:698] Add success.
I0321 22:28:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:28:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:28:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 22:28:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:28:14.456566  543705 disk_worker.go:494] system disk:vda1
I0321 22:28:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:28:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:28:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:28:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:28:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:28:23.409781  543705 memory.go:184] no items to output this cycle
I0321 22:28:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 22:28:31.681050  543705 disk_info.go:125] begin check local disk info of client
I0321 22:28:31.683610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:28:31.683617  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa480 0xc0001aa4c0]
E0321 22:28:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:28:33.409795  543705 memory.go:184] no items to output this cycle
I0321 22:28:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 22:28:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:28:43.409787  543705 memory.go:191] Add success.
I0321 22:28:43.409789  543705 cpu.go:282] Add success.
I0321 22:28:43.419894  543705 net.go:648] Add success.
I0321 22:28:43.422706  543705 net.go:770] primary dev: ETH0
I0321 22:28:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:28:43.422731  543705 net.go:698] Add success.
I0321 22:28:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:28:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:28:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:28:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:28:53.409773  543705 memory.go:184] no items to output this cycle
I0321 22:28:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 22:29:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:29:03.409780  543705 memory.go:184] no items to output this cycle
I0321 22:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 22:29:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:29:13.409783  543705 memory.go:191] Add success.
I0321 22:29:13.409806  543705 cpu.go:282] Add success.
W0321 22:29:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:29:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:29:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:29:13.420178  543705 net.go:648] Add success.
I0321 22:29:13.423053  543705 net.go:770] primary dev: ETH0
I0321 22:29:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:29:13.423081  543705 net.go:698] Add success.
I0321 22:29:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:29:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:29:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 22:29:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:29:14.456558  543705 disk_worker.go:494] system disk:vda1
I0321 22:29:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:29:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:29:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:29:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:29:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:29:23.409794  543705 memory.go:184] no items to output this cycle
I0321 22:29:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 22:29:31.684062  543705 disk_info.go:125] begin check local disk info of client
I0321 22:29:31.686659  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:29:31.686665  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa280 0xc0001aa2c0]
E0321 22:29:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:29:33.409774  543705 memory.go:184] no items to output this cycle
I0321 22:29:33.409789  543705 cpu.go:275] no items to output this cycle
E0321 22:29:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:29:43.409792  543705 memory.go:191] Add success.
I0321 22:29:43.409794  543705 cpu.go:282] Add success.
I0321 22:29:43.420057  543705 net.go:648] Add success.
I0321 22:29:43.422885  543705 net.go:770] primary dev: ETH0
I0321 22:29:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:29:43.422922  543705 net.go:698] Add success.
I0321 22:29:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:29:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:29:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:29:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:29:53.409772  543705 memory.go:184] no items to output this cycle
I0321 22:29:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:30:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:30:03.409777  543705 memory.go:184] no items to output this cycle
I0321 22:30:03.409778  543705 cpu.go:275] no items to output this cycle
E0321 22:30:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:30:13.409787  543705 memory.go:191] Add success.
I0321 22:30:13.409788  543705 cpu.go:282] Add success.
W0321 22:30:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:30:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:30:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:30:13.420117  543705 net.go:648] Add success.
I0321 22:30:13.423331  543705 net.go:770] primary dev: ETH0
I0321 22:30:13.423344  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:30:13.423356  543705 net.go:698] Add success.
I0321 22:30:14.149956  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c0561e1f-95b2-4b8c-a8b1-b82f402463bd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:30:14.150004  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:30:14.454544  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:30:14.454780  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:30:14.454790  543705 disk_worker.go:708] disk space is not compliant
W0321 22:30:14.454792  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:30:14.456344  543705 disk_worker.go:494] system disk:vda1
I0321 22:30:14.456381  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:30:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:30:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:30:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:30:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:30:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:30:23.409778  543705 memory.go:184] no items to output this cycle
I0321 22:30:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 22:30:31.686751  543705 disk_info.go:125] begin check local disk info of client
I0321 22:30:31.689358  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:30:31.689366  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521500 0xc000521540]
E0321 22:30:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:30:33.409781  543705 memory.go:184] no items to output this cycle
I0321 22:30:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 22:30:39.265732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:30:39.265739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:30:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:30:43.410879  543705 memory.go:191] Add success.
I0321 22:30:43.409813  543705 cpu.go:282] Add success.
I0321 22:30:43.420793  543705 net.go:648] Add success.
I0321 22:30:43.423392  543705 net.go:770] primary dev: ETH0
I0321 22:30:43.423405  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:30:43.423416  543705 net.go:698] Add success.
I0321 22:30:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:30:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:30:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:30:53.410399  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:30:53.410418  543705 memory.go:184] no items to output this cycle
I0321 22:30:53.410430  543705 cpu.go:275] no items to output this cycle
E0321 22:31:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:31:03.409799  543705 memory.go:184] no items to output this cycle
I0321 22:31:03.409813  543705 cpu.go:275] no items to output this cycle
E0321 22:31:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:31:13.409786  543705 memory.go:191] Add success.
I0321 22:31:13.409809  543705 cpu.go:282] Add success.
W0321 22:31:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:31:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:31:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:31:13.420196  543705 net.go:648] Add success.
I0321 22:31:13.423215  543705 net.go:770] primary dev: ETH0
I0321 22:31:13.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:31:13.423246  543705 net.go:698] Add success.
I0321 22:31:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:31:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:31:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0321 22:31:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:31:14.456557  543705 disk_worker.go:494] system disk:vda1
I0321 22:31:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:31:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:31:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:31:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:31:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:31:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:31:23.409804  543705 memory.go:184] no items to output this cycle
I0321 22:31:23.409817  543705 cpu.go:275] no items to output this cycle
I0321 22:31:31.689677  543705 disk_info.go:125] begin check local disk info of client
I0321 22:31:31.692260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:31:31.692267  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003220c0 0xc000322100]
E0321 22:31:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:31:33.409788  543705 memory.go:184] no items to output this cycle
I0321 22:31:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:31:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:31:43.409807  543705 memory.go:191] Add success.
I0321 22:31:43.409815  543705 cpu.go:282] Add success.
I0321 22:31:43.419950  543705 net.go:648] Add success.
I0321 22:31:43.422610  543705 net.go:770] primary dev: ETH0
I0321 22:31:43.422623  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:31:43.422636  543705 net.go:698] Add success.
I0321 22:31:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:31:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:31:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:31:53.409772  543705 cpu.go:275] no items to output this cycle
E0321 22:31:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:31:53.409790  543705 memory.go:184] no items to output this cycle
E0321 22:32:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:32:03.409775  543705 memory.go:184] no items to output this cycle
I0321 22:32:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 22:32:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:32:13.409794  543705 memory.go:191] Add success.
I0321 22:32:13.409796  543705 cpu.go:282] Add success.
W0321 22:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:32:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:32:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:32:13.420215  543705 net.go:648] Add success.
I0321 22:32:13.423651  543705 net.go:770] primary dev: ETH0
I0321 22:32:13.423664  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:32:13.423675  543705 net.go:698] Add success.
W0321 22:32:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:32:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 22:32:14.455208  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:32:14.455925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:32:14.455934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:32:14.455940  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:32:14.456578  543705 disk_worker.go:494] system disk:vda1
I0321 22:32:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:32:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:32:15.456791  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:32:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:32:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:32:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:32:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:32:16.472344  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:32:23.409795  543705 memory.go:184] no items to output this cycle
I0321 22:32:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 22:32:31.693098  543705 disk_info.go:125] begin check local disk info of client
I0321 22:32:31.695718  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:32:31.695725  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331900 0xc000331940]
E0321 22:32:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:32:33.409787  543705 cpu.go:275] no items to output this cycle
I0321 22:32:33.409794  543705 memory.go:184] no items to output this cycle
E0321 22:32:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:32:43.409806  543705 memory.go:191] Add success.
I0321 22:32:43.409822  543705 cpu.go:282] Add success.
I0321 22:32:43.420084  543705 net.go:648] Add success.
I0321 22:32:43.422877  543705 net.go:770] primary dev: ETH0
I0321 22:32:43.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:32:43.422906  543705 net.go:698] Add success.
I0321 22:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:32:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:32:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:32:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:32:53.409772  543705 memory.go:184] no items to output this cycle
I0321 22:32:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 22:33:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:33:03.409772  543705 memory.go:184] no items to output this cycle
I0321 22:33:03.409793  543705 cpu.go:275] no items to output this cycle
E0321 22:33:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:33:13.409807  543705 memory.go:191] Add success.
I0321 22:33:13.409817  543705 cpu.go:282] Add success.
W0321 22:33:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:33:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:33:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:33:13.420142  543705 net.go:648] Add success.
I0321 22:33:13.422763  543705 net.go:770] primary dev: ETH0
I0321 22:33:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:33:13.422789  543705 net.go:698] Add success.
I0321 22:33:13.464076  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f1fd2c93-3337-4d39-a437-66c7806540e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:33:13.464109  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:33:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:33:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:33:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0321 22:33:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:33:14.456735  543705 disk_worker.go:494] system disk:vda1
I0321 22:33:14.456768  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:33:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:33:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:33:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:33:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:33:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:33:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:33:23.409819  543705 memory.go:184] no items to output this cycle
I0321 22:33:23.409828  543705 cpu.go:275] no items to output this cycle
I0321 22:33:31.697119  543705 disk_info.go:125] begin check local disk info of client
I0321 22:33:31.699701  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:33:31.699708  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab7c0 0xc0001ab800]
E0321 22:33:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:33:33.409776  543705 memory.go:184] no items to output this cycle
I0321 22:33:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 22:33:39.269729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:33:39.269735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:33:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:33:43.410611  543705 memory.go:191] Add success.
I0321 22:33:43.409797  543705 cpu.go:282] Add success.
I0321 22:33:43.420504  543705 net.go:648] Add success.
I0321 22:33:43.422981  543705 net.go:770] primary dev: ETH0
I0321 22:33:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:33:43.423006  543705 net.go:698] Add success.
I0321 22:33:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:33:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:33:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:33:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:33:53.409776  543705 memory.go:184] no items to output this cycle
I0321 22:33:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 22:34:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:34:03.409771  543705 memory.go:184] no items to output this cycle
I0321 22:34:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 22:34:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:34:13.409784  543705 memory.go:191] Add success.
I0321 22:34:13.409805  543705 cpu.go:282] Add success.
W0321 22:34:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:34:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:34:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:34:13.420460  543705 net.go:648] Add success.
I0321 22:34:13.423353  543705 net.go:770] primary dev: ETH0
I0321 22:34:13.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:34:13.423377  543705 net.go:698] Add success.
I0321 22:34:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:34:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:34:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 22:34:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:34:14.456540  543705 disk_worker.go:494] system disk:vda1
I0321 22:34:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:34:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:34:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:34:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:34:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:34:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:34:23.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:34:23.409914  543705 memory.go:184] no items to output this cycle
I0321 22:34:23.409924  543705 cpu.go:275] no items to output this cycle
I0321 22:34:31.701149  543705 disk_info.go:125] begin check local disk info of client
I0321 22:34:31.703775  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:34:31.703783  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348940 0xc000348980]
E0321 22:34:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:34:33.409766  543705 memory.go:184] no items to output this cycle
I0321 22:34:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 22:34:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:34:43.409786  543705 memory.go:191] Add success.
I0321 22:34:43.409812  543705 cpu.go:282] Add success.
I0321 22:34:43.420010  543705 net.go:648] Add success.
I0321 22:34:43.422653  543705 net.go:770] primary dev: ETH0
I0321 22:34:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:34:43.422678  543705 net.go:698] Add success.
I0321 22:34:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:34:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:34:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:34:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:34:53.409783  543705 memory.go:184] no items to output this cycle
I0321 22:34:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 22:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:35:03.409783  543705 memory.go:184] no items to output this cycle
I0321 22:35:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 22:35:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:35:13.409783  543705 memory.go:191] Add success.
I0321 22:35:13.409802  543705 cpu.go:282] Add success.
W0321 22:35:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:35:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:35:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:35:13.420099  543705 net.go:648] Add success.
I0321 22:35:13.422679  543705 net.go:770] primary dev: ETH0
I0321 22:35:13.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:35:13.422704  543705 net.go:698] Add success.
I0321 22:35:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:35:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:35:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 22:35:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:35:14.456566  543705 disk_worker.go:494] system disk:vda1
I0321 22:35:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:35:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:35:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:35:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:35:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:35:23.409781  543705 memory.go:184] no items to output this cycle
I0321 22:35:23.409850  543705 cpu.go:275] no items to output this cycle
I0321 22:35:31.705166  543705 disk_info.go:125] begin check local disk info of client
I0321 22:35:31.707765  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:35:31.707772  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaac0 0xc0001aab00]
E0321 22:35:33.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:35:33.409757  543705 memory.go:184] no items to output this cycle
I0321 22:35:33.409794  543705 cpu.go:275] no items to output this cycle
E0321 22:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:35:43.409810  543705 memory.go:191] Add success.
I0321 22:35:43.409818  543705 cpu.go:282] Add success.
I0321 22:35:43.420250  543705 net.go:648] Add success.
I0321 22:35:43.422916  543705 net.go:770] primary dev: ETH0
I0321 22:35:43.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:35:43.422944  543705 net.go:698] Add success.
I0321 22:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:35:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:35:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:35:53.409779  543705 memory.go:184] no items to output this cycle
I0321 22:35:53.409784  543705 cpu.go:275] no items to output this cycle
E0321 22:36:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:36:03.409798  543705 memory.go:184] no items to output this cycle
I0321 22:36:03.409817  543705 cpu.go:275] no items to output this cycle
E0321 22:36:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:36:13.409791  543705 memory.go:191] Add success.
I0321 22:36:13.409795  543705 cpu.go:282] Add success.
W0321 22:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:36:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:36:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:36:13.420078  543705 net.go:648] Add success.
I0321 22:36:13.423071  543705 net.go:770] primary dev: ETH0
I0321 22:36:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:36:13.423096  543705 net.go:698] Add success.
I0321 22:36:13.468663  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4d22c018-c796-419a-912e-6313e0732517","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:36:13.468695  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:36:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:36:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:36:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 22:36:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:36:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 22:36:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:36:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:36:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:36:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:36:23.409803  543705 memory.go:184] no items to output this cycle
I0321 22:36:23.409919  543705 cpu.go:275] no items to output this cycle
I0321 22:36:31.709190  543705 disk_info.go:125] begin check local disk info of client
I0321 22:36:31.711773  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:36:31.711779  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033e540 0xc00033e580]
E0321 22:36:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:36:33.409768  543705 memory.go:184] no items to output this cycle
I0321 22:36:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 22:36:39.273742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:36:39.273749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:36:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:36:43.410762  543705 memory.go:191] Add success.
I0321 22:36:43.409819  543705 cpu.go:282] Add success.
I0321 22:36:43.419758  543705 net.go:648] Add success.
I0321 22:36:43.422631  543705 net.go:770] primary dev: ETH0
I0321 22:36:43.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:36:43.422666  543705 net.go:698] Add success.
I0321 22:36:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:36:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:36:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:36:53.410216  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:36:53.410223  543705 cpu.go:275] no items to output this cycle
I0321 22:36:53.410233  543705 memory.go:184] no items to output this cycle
E0321 22:37:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:37:03.409778  543705 memory.go:184] no items to output this cycle
I0321 22:37:03.409800  543705 cpu.go:275] no items to output this cycle
W0321 22:37:13.409704  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:37:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:37:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:37:13.409833  543705 cpu.go:282] Add success.
E0321 22:37:13.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:37:13.409854  543705 memory.go:191] Add success.
I0321 22:37:13.420067  543705 net.go:648] Add success.
I0321 22:37:13.422737  543705 net.go:770] primary dev: ETH0
I0321 22:37:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:37:13.422766  543705 net.go:698] Add success.
I0321 22:37:13.453319  543705 event_worker.go:152] Polling the log file for events...
W0321 22:37:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:37:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 22:37:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:37:14.456786  543705 disk_worker.go:494] system disk:vda1
I0321 22:37:14.456828  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:37:14.456993  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:37:14.457002  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:37:14.457008  543705 custom_config.go:64] query custom config with name: gpu
E0321 22:37:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:37:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:37:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:37:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:37:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:37:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:37:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:37:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:37:23.409809  543705 memory.go:184] no items to output this cycle
I0321 22:37:23.409855  543705 cpu.go:275] no items to output this cycle
I0321 22:37:31.713204  543705 disk_info.go:125] begin check local disk info of client
I0321 22:37:31.715775  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:37:31.715781  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c50c0 0xc0000c5100]
E0321 22:37:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:37:33.409780  543705 memory.go:184] no items to output this cycle
I0321 22:37:33.409788  543705 cpu.go:275] no items to output this cycle
E0321 22:37:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:37:43.409806  543705 memory.go:191] Add success.
I0321 22:37:43.409806  543705 cpu.go:282] Add success.
I0321 22:37:43.420195  543705 net.go:648] Add success.
I0321 22:37:43.422763  543705 net.go:770] primary dev: ETH0
I0321 22:37:43.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:37:43.422796  543705 net.go:698] Add success.
I0321 22:37:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:37:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:37:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:37:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:37:53.409771  543705 memory.go:184] no items to output this cycle
I0321 22:37:53.409781  543705 cpu.go:275] no items to output this cycle
E0321 22:38:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:38:03.409795  543705 memory.go:184] no items to output this cycle
I0321 22:38:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 22:38:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:38:13.409774  543705 memory.go:191] Add success.
I0321 22:38:13.409802  543705 cpu.go:282] Add success.
W0321 22:38:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:38:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:38:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:38:13.420140  543705 net.go:648] Add success.
I0321 22:38:13.422905  543705 net.go:770] primary dev: ETH0
I0321 22:38:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:38:13.422930  543705 net.go:698] Add success.
I0321 22:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:38:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:38:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 22:38:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:38:14.456780  543705 disk_worker.go:494] system disk:vda1
I0321 22:38:14.456810  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:38:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:38:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:38:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:38:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:38:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:38:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:38:23.409792  543705 memory.go:184] no items to output this cycle
I0321 22:38:23.409849  543705 cpu.go:275] no items to output this cycle
I0321 22:38:31.717234  543705 disk_info.go:125] begin check local disk info of client
I0321 22:38:31.719812  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:38:31.719819  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498b40 0xc000498b80]
E0321 22:38:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:38:33.409793  543705 memory.go:184] no items to output this cycle
I0321 22:38:33.409810  543705 cpu.go:275] no items to output this cycle
I0321 22:38:43.409888  543705 cpu.go:282] Add success.
E0321 22:38:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:38:43.409922  543705 memory.go:191] Add success.
I0321 22:38:43.419741  543705 net.go:648] Add success.
I0321 22:38:43.422357  543705 net.go:770] primary dev: ETH0
I0321 22:38:43.422372  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:38:43.422386  543705 net.go:698] Add success.
I0321 22:38:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:38:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:38:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:38:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:38:53.409796  543705 memory.go:184] no items to output this cycle
I0321 22:38:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:39:03.409778  543705 memory.go:184] no items to output this cycle
I0321 22:39:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 22:39:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:39:13.409782  543705 memory.go:191] Add success.
W0321 22:39:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 22:39:13.409815  543705 cpu.go:282] Add success.
W0321 22:39:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:39:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:39:13.420121  543705 net.go:648] Add success.
I0321 22:39:13.423083  543705 net.go:770] primary dev: ETH0
I0321 22:39:13.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:39:13.423113  543705 net.go:698] Add success.
I0321 22:39:13.469111  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c1807bd6-319a-4b71-a1f5-7cee031cc474","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:39:13.469144  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:39:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:39:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:39:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0321 22:39:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:39:14.456737  543705 disk_worker.go:494] system disk:vda1
I0321 22:39:14.456774  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:39:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:39:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:39:16.472395  543705 disk_local_worker.go:436] Get disk info: []
I0321 22:39:23.409799  543705 cpu.go:275] no items to output this cycle
E0321 22:39:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:39:23.409819  543705 memory.go:184] no items to output this cycle
I0321 22:39:31.721248  543705 disk_info.go:125] begin check local disk info of client
I0321 22:39:31.723823  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:39:31.723830  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290e00 0xc000290e40]
E0321 22:39:33.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:39:33.409888  543705 memory.go:184] no items to output this cycle
I0321 22:39:33.409942  543705 cpu.go:275] no items to output this cycle
I0321 22:39:39.277736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:39:39.277742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:39:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:39:43.410711  543705 memory.go:191] Add success.
I0321 22:39:43.409800  543705 cpu.go:282] Add success.
I0321 22:39:43.420417  543705 net.go:648] Add success.
I0321 22:39:43.423214  543705 net.go:770] primary dev: ETH0
I0321 22:39:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:39:43.423237  543705 net.go:698] Add success.
I0321 22:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:39:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:39:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:39:53.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:39:53.410267  543705 memory.go:184] no items to output this cycle
I0321 22:39:53.410280  543705 cpu.go:275] no items to output this cycle
E0321 22:40:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:40:03.409778  543705 memory.go:184] no items to output this cycle
I0321 22:40:03.409780  543705 cpu.go:275] no items to output this cycle
E0321 22:40:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:40:13.409790  543705 memory.go:191] Add success.
I0321 22:40:13.409791  543705 cpu.go:282] Add success.
W0321 22:40:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:40:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:40:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:40:13.420169  543705 net.go:648] Add success.
I0321 22:40:13.423072  543705 net.go:770] primary dev: ETH0
I0321 22:40:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:40:13.423096  543705 net.go:698] Add success.
I0321 22:40:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:40:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:40:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0321 22:40:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:40:14.456485  543705 disk_worker.go:494] system disk:vda1
I0321 22:40:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:40:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:40:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:40:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:40:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:40:23.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:40:23.409895  543705 memory.go:184] no items to output this cycle
I0321 22:40:23.409949  543705 cpu.go:275] no items to output this cycle
I0321 22:40:31.725261  543705 disk_info.go:125] begin check local disk info of client
I0321 22:40:31.727803  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:40:31.727811  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a440 0xc00029a480]
E0321 22:40:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:40:33.409770  543705 memory.go:184] no items to output this cycle
I0321 22:40:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 22:40:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:40:43.409775  543705 memory.go:191] Add success.
I0321 22:40:43.409804  543705 cpu.go:282] Add success.
I0321 22:40:43.419854  543705 net.go:648] Add success.
I0321 22:40:43.422938  543705 net.go:770] primary dev: ETH0
I0321 22:40:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:40:43.422964  543705 net.go:698] Add success.
I0321 22:40:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:40:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:40:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:40:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:40:53.409799  543705 memory.go:184] no items to output this cycle
I0321 22:40:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 22:41:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:41:03.409773  543705 memory.go:184] no items to output this cycle
I0321 22:41:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 22:41:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:41:13.409809  543705 memory.go:191] Add success.
I0321 22:41:13.409814  543705 cpu.go:282] Add success.
W0321 22:41:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:41:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:41:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:41:13.420056  543705 net.go:648] Add success.
I0321 22:41:13.422877  543705 net.go:770] primary dev: ETH0
I0321 22:41:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:41:13.422905  543705 net.go:698] Add success.
I0321 22:41:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:41:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:41:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 22:41:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:41:14.456554  543705 disk_worker.go:494] system disk:vda1
I0321 22:41:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:41:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:41:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:41:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:41:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:41:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:41:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:41:23.409780  543705 memory.go:184] no items to output this cycle
I0321 22:41:23.409923  543705 cpu.go:275] no items to output this cycle
I0321 22:41:31.729291  543705 disk_info.go:125] begin check local disk info of client
I0321 22:41:31.731873  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:41:31.731880  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4140 0xc0003e4180]
E0321 22:41:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:41:33.409799  543705 memory.go:184] no items to output this cycle
I0321 22:41:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 22:41:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:41:43.409810  543705 memory.go:191] Add success.
I0321 22:41:43.409842  543705 cpu.go:282] Add success.
I0321 22:41:43.420020  543705 net.go:648] Add success.
I0321 22:41:43.422622  543705 net.go:770] primary dev: ETH0
I0321 22:41:43.422636  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:41:43.422648  543705 net.go:698] Add success.
I0321 22:41:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:41:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:41:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:41:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:41:53.409777  543705 cpu.go:275] no items to output this cycle
I0321 22:41:53.409781  543705 memory.go:184] no items to output this cycle
E0321 22:42:03.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:42:03.409757  543705 memory.go:184] no items to output this cycle
I0321 22:42:03.409796  543705 cpu.go:275] no items to output this cycle
E0321 22:42:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:42:13.409785  543705 memory.go:191] Add success.
W0321 22:42:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 22:42:13.409814  543705 cpu.go:282] Add success.
W0321 22:42:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:42:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:42:13.420223  543705 net.go:648] Add success.
I0321 22:42:13.423187  543705 net.go:770] primary dev: ETH0
I0321 22:42:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:42:13.423211  543705 net.go:698] Add success.
I0321 22:42:13.469026  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9bbda184-42ba-4d0e-a895-63dc6407d66c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:42:13.469057  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 22:42:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:42:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0321 22:42:14.455157  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:42:14.457023  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:42:14.457030  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:42:14.457034  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:42:14.457043  543705 disk_worker.go:494] system disk:vda1
I0321 22:42:14.457084  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:42:15.456922  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:42:15.456935  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:42:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:42:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:42:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:42:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:42:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:42:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:42:23.409798  543705 memory.go:184] no items to output this cycle
I0321 22:42:23.409810  543705 cpu.go:275] no items to output this cycle
I0321 22:42:31.733321  543705 disk_info.go:125] begin check local disk info of client
I0321 22:42:31.735887  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:42:31.735894  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028cfc0 0xc00028d000]
E0321 22:42:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:42:33.409793  543705 memory.go:184] no items to output this cycle
I0321 22:42:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 22:42:39.281731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:42:39.281738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:42:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:42:43.410629  543705 memory.go:191] Add success.
I0321 22:42:43.409809  543705 cpu.go:282] Add success.
I0321 22:42:43.420394  543705 net.go:648] Add success.
I0321 22:42:43.422831  543705 net.go:770] primary dev: ETH0
I0321 22:42:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:42:43.422856  543705 net.go:698] Add success.
I0321 22:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:42:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:42:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:42:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:42:53.409813  543705 memory.go:184] no items to output this cycle
I0321 22:42:53.409825  543705 cpu.go:275] no items to output this cycle
E0321 22:43:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:43:03.409778  543705 memory.go:184] no items to output this cycle
I0321 22:43:03.409800  543705 cpu.go:275] no items to output this cycle
E0321 22:43:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:43:13.409783  543705 memory.go:191] Add success.
I0321 22:43:13.409792  543705 cpu.go:282] Add success.
W0321 22:43:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:43:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:43:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:43:13.419943  543705 net.go:770] primary dev: ETH0
I0321 22:43:13.419957  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:43:13.419969  543705 net.go:698] Add success.
I0321 22:43:13.420307  543705 net.go:648] Add success.
I0321 22:43:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:43:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:43:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0321 22:43:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:43:14.457543  543705 disk_worker.go:494] system disk:vda1
I0321 22:43:14.457583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:43:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:43:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:43:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:43:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:43:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:43:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:43:23.409784  543705 memory.go:184] no items to output this cycle
I0321 22:43:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 22:43:31.737327  543705 disk_info.go:125] begin check local disk info of client
I0321 22:43:31.739880  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:43:31.739887  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048af80 0xc00048afc0]
E0321 22:43:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:43:33.409799  543705 memory.go:184] no items to output this cycle
I0321 22:43:33.409814  543705 cpu.go:275] no items to output this cycle
E0321 22:43:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:43:43.409779  543705 memory.go:191] Add success.
I0321 22:43:43.409807  543705 cpu.go:282] Add success.
I0321 22:43:43.420042  543705 net.go:648] Add success.
I0321 22:43:43.422662  543705 net.go:770] primary dev: ETH0
I0321 22:43:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:43:43.422688  543705 net.go:698] Add success.
I0321 22:43:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:43:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:43:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:43:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:43:53.409774  543705 memory.go:184] no items to output this cycle
I0321 22:43:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 22:44:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:44:03.409768  543705 memory.go:184] no items to output this cycle
I0321 22:44:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 22:44:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:44:13.409811  543705 memory.go:191] Add success.
I0321 22:44:13.409816  543705 cpu.go:282] Add success.
W0321 22:44:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:44:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:44:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:44:13.420529  543705 net.go:648] Add success.
I0321 22:44:13.423261  543705 net.go:770] primary dev: ETH0
I0321 22:44:13.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:44:13.423286  543705 net.go:698] Add success.
I0321 22:44:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:44:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:44:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 22:44:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:44:14.456563  543705 disk_worker.go:494] system disk:vda1
I0321 22:44:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:44:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:44:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:44:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:44:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:44:23.409809  543705 memory.go:184] no items to output this cycle
I0321 22:44:23.409820  543705 cpu.go:275] no items to output this cycle
I0321 22:44:31.741344  543705 disk_info.go:125] begin check local disk info of client
I0321 22:44:31.743958  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:44:31.743965  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004740c0 0xc000474100]
E0321 22:44:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:44:33.409765  543705 memory.go:184] no items to output this cycle
I0321 22:44:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 22:44:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:44:43.409784  543705 memory.go:191] Add success.
I0321 22:44:43.409798  543705 cpu.go:282] Add success.
I0321 22:44:43.419961  543705 net.go:648] Add success.
I0321 22:44:43.422630  543705 net.go:770] primary dev: ETH0
I0321 22:44:43.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:44:43.422656  543705 net.go:698] Add success.
I0321 22:44:46.457663  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:44:46.457731  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:44:46.457755  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:44:53.410334  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:44:53.410350  543705 memory.go:184] no items to output this cycle
I0321 22:44:53.410370  543705 cpu.go:275] no items to output this cycle
E0321 22:45:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:45:03.409793  543705 memory.go:184] no items to output this cycle
I0321 22:45:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 22:45:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:45:13.409784  543705 memory.go:191] Add success.
I0321 22:45:13.409802  543705 cpu.go:282] Add success.
W0321 22:45:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:45:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:45:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:45:13.420143  543705 net.go:648] Add success.
I0321 22:45:13.422477  543705 net.go:770] primary dev: ETH0
I0321 22:45:13.422492  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:45:13.422513  543705 net.go:698] Add success.
I0321 22:45:13.462962  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e2f03d03-34d2-48d5-942b-9c986933afe2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:45:13.462996  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:45:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:45:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0321 22:45:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:45:14.456531  543705 disk_worker.go:494] system disk:vda1
I0321 22:45:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:45:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:45:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:45:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:45:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:45:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:45:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:45:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 22:45:23.409779  543705 memory.go:184] no items to output this cycle
I0321 22:45:31.745371  543705 disk_info.go:125] begin check local disk info of client
I0321 22:45:31.747952  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:45:31.747958  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f3c0 0xc00032f400]
E0321 22:45:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:45:33.409786  543705 memory.go:184] no items to output this cycle
I0321 22:45:33.409789  543705 cpu.go:275] no items to output this cycle
I0321 22:45:39.285743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:45:39.285751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:45:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:45:43.410677  543705 memory.go:191] Add success.
I0321 22:45:43.409835  543705 cpu.go:282] Add success.
I0321 22:45:43.420422  543705 net.go:648] Add success.
I0321 22:45:43.423146  543705 net.go:770] primary dev: ETH0
I0321 22:45:43.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:45:43.423176  543705 net.go:698] Add success.
I0321 22:45:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:45:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:45:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:45:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:45:53.409784  543705 memory.go:184] no items to output this cycle
I0321 22:45:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:46:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:46:03.409796  543705 memory.go:184] no items to output this cycle
I0321 22:46:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 22:46:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:46:13.409827  543705 memory.go:191] Add success.
I0321 22:46:13.409844  543705 cpu.go:282] Add success.
W0321 22:46:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:46:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:46:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:46:13.420178  543705 net.go:648] Add success.
I0321 22:46:13.422902  543705 net.go:770] primary dev: ETH0
I0321 22:46:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:46:13.422928  543705 net.go:698] Add success.
I0321 22:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:46:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:46:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 22:46:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:46:14.456510  543705 disk_worker.go:494] system disk:vda1
I0321 22:46:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:46:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:46:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:46:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:46:23.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:46:23.409863  543705 memory.go:184] no items to output this cycle
I0321 22:46:23.410021  543705 cpu.go:275] no items to output this cycle
I0321 22:46:31.748048  543705 disk_info.go:125] begin check local disk info of client
I0321 22:46:31.750619  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:46:31.750626  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396000 0xc000396040]
E0321 22:46:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:46:33.409815  543705 memory.go:184] no items to output this cycle
I0321 22:46:33.409838  543705 cpu.go:275] no items to output this cycle
E0321 22:46:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:46:43.409826  543705 memory.go:191] Add success.
I0321 22:46:43.409833  543705 cpu.go:282] Add success.
I0321 22:46:43.420026  543705 net.go:648] Add success.
I0321 22:46:43.422605  543705 net.go:770] primary dev: ETH0
I0321 22:46:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:46:43.422630  543705 net.go:698] Add success.
I0321 22:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:46:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:46:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:46:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:46:53.409808  543705 memory.go:184] no items to output this cycle
I0321 22:46:53.409818  543705 cpu.go:275] no items to output this cycle
E0321 22:47:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:47:03.409770  543705 memory.go:184] no items to output this cycle
I0321 22:47:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 22:47:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:47:13.409820  543705 memory.go:191] Add success.
I0321 22:47:13.409822  543705 cpu.go:282] Add success.
W0321 22:47:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:47:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:47:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:47:13.420122  543705 net.go:648] Add success.
I0321 22:47:13.422721  543705 net.go:770] primary dev: ETH0
I0321 22:47:13.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:47:13.422752  543705 net.go:698] Add success.
I0321 22:47:13.453294  543705 event_worker.go:152] Polling the log file for events...
W0321 22:47:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:47:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 22:47:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:47:14.455864  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:47:14.455873  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:47:14.455879  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:47:14.456912  543705 disk_worker.go:494] system disk:vda1
I0321 22:47:14.456959  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:47:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:47:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:47:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:47:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:47:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:47:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:47:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:47:23.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:47:23.409902  543705 memory.go:184] no items to output this cycle
I0321 22:47:23.409964  543705 cpu.go:275] no items to output this cycle
I0321 22:47:31.750716  543705 disk_info.go:125] begin check local disk info of client
I0321 22:47:31.753360  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:47:31.753368  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b00c0 0xc0003b0100]
E0321 22:47:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:47:33.409805  543705 memory.go:184] no items to output this cycle
I0321 22:47:33.409818  543705 cpu.go:275] no items to output this cycle
E0321 22:47:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:47:43.409810  543705 memory.go:191] Add success.
I0321 22:47:43.409820  543705 cpu.go:282] Add success.
I0321 22:47:43.419893  543705 net.go:648] Add success.
I0321 22:47:43.422765  543705 net.go:770] primary dev: ETH0
I0321 22:47:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:47:43.422800  543705 net.go:698] Add success.
I0321 22:47:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:47:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:47:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:47:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:47:53.409760  543705 memory.go:184] no items to output this cycle
I0321 22:47:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 22:48:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:48:03.409781  543705 memory.go:184] no items to output this cycle
I0321 22:48:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:48:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:48:13.409786  543705 memory.go:191] Add success.
I0321 22:48:13.409803  543705 cpu.go:282] Add success.
W0321 22:48:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:48:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:48:13.420047  543705 net.go:648] Add success.
I0321 22:48:13.422899  543705 net.go:770] primary dev: ETH0
I0321 22:48:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:48:13.422924  543705 net.go:698] Add success.
I0321 22:48:13.648479  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7a2f6203-4607-431a-95ae-028ec979b086","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:48:13.648512  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:48:14.453975  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:48:14.454183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:48:14.454193  543705 disk_worker.go:708] disk space is not compliant
W0321 22:48:14.454196  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:48:14.455528  543705 disk_worker.go:494] system disk:vda1
I0321 22:48:14.455576  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:48:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:48:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:48:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:48:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:48:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:48:23.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:48:23.409889  543705 memory.go:184] no items to output this cycle
I0321 22:48:23.409945  543705 cpu.go:275] no items to output this cycle
I0321 22:48:31.753681  543705 disk_info.go:125] begin check local disk info of client
I0321 22:48:31.756240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:48:31.756247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6300 0xc0001c6340]
E0321 22:48:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:48:33.409789  543705 memory.go:184] no items to output this cycle
I0321 22:48:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 22:48:39.289733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:48:39.289740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:48:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:48:43.410630  543705 memory.go:191] Add success.
I0321 22:48:43.409812  543705 cpu.go:282] Add success.
I0321 22:48:43.420335  543705 net.go:648] Add success.
I0321 22:48:43.423024  543705 net.go:770] primary dev: ETH0
I0321 22:48:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:48:43.423049  543705 net.go:698] Add success.
I0321 22:48:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:48:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:48:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:48:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:48:53.409798  543705 memory.go:184] no items to output this cycle
I0321 22:48:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:49:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:49:03.409773  543705 memory.go:184] no items to output this cycle
I0321 22:49:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 22:49:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:49:13.409817  543705 memory.go:191] Add success.
I0321 22:49:13.409829  543705 cpu.go:282] Add success.
W0321 22:49:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:49:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:49:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:49:13.419921  543705 net.go:770] primary dev: ETH0
I0321 22:49:13.419935  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:49:13.419948  543705 net.go:698] Add success.
I0321 22:49:13.420177  543705 net.go:648] Add success.
I0321 22:49:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:49:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:49:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 22:49:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:49:14.456597  543705 disk_worker.go:494] system disk:vda1
I0321 22:49:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:49:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:49:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:49:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:49:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:49:23.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:49:23.409909  543705 memory.go:184] no items to output this cycle
I0321 22:49:23.409927  543705 cpu.go:275] no items to output this cycle
I0321 22:49:31.757421  543705 disk_info.go:125] begin check local disk info of client
I0321 22:49:31.760062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:49:31.760070  543705 disk_info.go:196] parse disk info done, disk is : [0xc00015e000 0xc00015e040]
E0321 22:49:33.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:49:33.409757  543705 memory.go:184] no items to output this cycle
I0321 22:49:33.409799  543705 cpu.go:275] no items to output this cycle
E0321 22:49:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:49:43.409797  543705 memory.go:191] Add success.
I0321 22:49:43.409809  543705 cpu.go:282] Add success.
I0321 22:49:43.419952  543705 net.go:648] Add success.
I0321 22:49:43.422373  543705 net.go:770] primary dev: ETH0
I0321 22:49:43.422385  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:49:43.422397  543705 net.go:698] Add success.
I0321 22:49:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:49:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:49:53.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:49:53.409758  543705 memory.go:184] no items to output this cycle
I0321 22:49:53.409802  543705 cpu.go:275] no items to output this cycle
E0321 22:50:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:50:03.409786  543705 memory.go:184] no items to output this cycle
I0321 22:50:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:50:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:50:13.409783  543705 memory.go:191] Add success.
W0321 22:50:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 22:50:13.409810  543705 cpu.go:282] Add success.
W0321 22:50:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:50:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:50:13.420082  543705 net.go:648] Add success.
I0321 22:50:13.422917  543705 net.go:770] primary dev: ETH0
I0321 22:50:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:50:13.422943  543705 net.go:698] Add success.
I0321 22:50:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:50:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:50:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 22:50:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:50:14.456567  543705 disk_worker.go:494] system disk:vda1
I0321 22:50:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:50:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:50:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:50:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:50:23.409774  543705 memory.go:184] no items to output this cycle
I0321 22:50:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 22:50:31.761449  543705 disk_info.go:125] begin check local disk info of client
I0321 22:50:31.764030  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:50:31.764037  543705 disk_info.go:196] parse disk info done, disk is : [0xc000274000 0xc000274040]
E0321 22:50:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:50:33.409794  543705 memory.go:184] no items to output this cycle
I0321 22:50:33.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:50:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:50:43.409775  543705 memory.go:191] Add success.
I0321 22:50:43.409798  543705 cpu.go:282] Add success.
I0321 22:50:43.419950  543705 net.go:648] Add success.
I0321 22:50:43.422390  543705 net.go:770] primary dev: ETH0
I0321 22:50:43.422404  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:50:43.422418  543705 net.go:698] Add success.
I0321 22:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:50:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:50:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:50:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:50:53.409797  543705 memory.go:184] no items to output this cycle
I0321 22:50:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 22:51:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:51:03.409778  543705 memory.go:184] no items to output this cycle
I0321 22:51:03.409777  543705 cpu.go:275] no items to output this cycle
E0321 22:51:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:51:13.409795  543705 cpu.go:282] Add success.
I0321 22:51:13.409799  543705 memory.go:191] Add success.
W0321 22:51:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:51:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:51:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:51:13.420062  543705 net.go:648] Add success.
I0321 22:51:13.422602  543705 net.go:770] primary dev: ETH0
I0321 22:51:13.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:51:13.422628  543705 net.go:698] Add success.
I0321 22:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:51:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:51:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0321 22:51:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:51:14.456573  543705 disk_worker.go:494] system disk:vda1
I0321 22:51:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:51:14.762596  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"17584d4b-d839-4129-8391-ea186901ca66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:51:14.762642  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:51:15.456023  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:51:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:51:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:51:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:51:23.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:51:23.409906  543705 memory.go:184] no items to output this cycle
I0321 22:51:23.409926  543705 cpu.go:275] no items to output this cycle
I0321 22:51:31.765473  543705 disk_info.go:125] begin check local disk info of client
I0321 22:51:31.768012  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:51:31.768019  543705 disk_info.go:196] parse disk info done, disk is : [0xc00023e000 0xc00023e040]
E0321 22:51:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:51:33.409799  543705 memory.go:184] no items to output this cycle
I0321 22:51:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 22:51:39.293741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:51:39.293749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:51:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:51:43.410669  543705 memory.go:191] Add success.
I0321 22:51:43.409785  543705 cpu.go:282] Add success.
I0321 22:51:43.420445  543705 net.go:648] Add success.
I0321 22:51:43.422955  543705 net.go:770] primary dev: ETH0
I0321 22:51:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:51:43.422982  543705 net.go:698] Add success.
I0321 22:51:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:51:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:51:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:51:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:51:53.409765  543705 memory.go:184] no items to output this cycle
I0321 22:51:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 22:52:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:52:03.409769  543705 memory.go:184] no items to output this cycle
I0321 22:52:03.409789  543705 cpu.go:275] no items to output this cycle
E0321 22:52:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:52:13.409816  543705 memory.go:191] Add success.
I0321 22:52:13.409822  543705 cpu.go:282] Add success.
W0321 22:52:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:52:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:52:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:52:13.420132  543705 net.go:648] Add success.
I0321 22:52:13.422803  543705 net.go:770] primary dev: ETH0
I0321 22:52:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:52:13.422828  543705 net.go:698] Add success.
W0321 22:52:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:52:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0321 22:52:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:52:14.456887  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:52:14.456896  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:52:14.456902  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:52:14.456975  543705 disk_worker.go:494] system disk:vda1
I0321 22:52:14.457017  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:52:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:52:15.456868  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:52:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:52:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:52:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:52:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:52:16.472341  543705 disk_local_worker.go:436] Get disk info: []
I0321 22:52:23.409911  543705 cpu.go:275] no items to output this cycle
E0321 22:52:23.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:52:23.409929  543705 memory.go:184] no items to output this cycle
I0321 22:52:31.769496  543705 disk_info.go:125] begin check local disk info of client
I0321 22:52:31.772159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:52:31.772168  543705 disk_info.go:196] parse disk info done, disk is : [0xc000512380 0xc0005123c0]
E0321 22:52:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:52:33.409776  543705 memory.go:184] no items to output this cycle
I0321 22:52:33.409791  543705 cpu.go:275] no items to output this cycle
E0321 22:52:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:52:43.409782  543705 memory.go:191] Add success.
I0321 22:52:43.409799  543705 cpu.go:282] Add success.
I0321 22:52:43.420064  543705 net.go:648] Add success.
I0321 22:52:43.422934  543705 net.go:770] primary dev: ETH0
I0321 22:52:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:52:43.422959  543705 net.go:698] Add success.
I0321 22:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:52:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:52:53.410383  543705 memory.go:184] no items to output this cycle
I0321 22:52:53.410392  543705 cpu.go:275] no items to output this cycle
E0321 22:53:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:53:03.409779  543705 memory.go:184] no items to output this cycle
I0321 22:53:03.409802  543705 cpu.go:275] no items to output this cycle
E0321 22:53:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:53:13.409796  543705 memory.go:191] Add success.
I0321 22:53:13.409803  543705 cpu.go:282] Add success.
W0321 22:53:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:53:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:53:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:53:13.420073  543705 net.go:648] Add success.
I0321 22:53:13.422742  543705 net.go:770] primary dev: ETH0
I0321 22:53:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:53:13.422771  543705 net.go:698] Add success.
I0321 22:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:53:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:53:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 22:53:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:53:14.456492  543705 disk_worker.go:494] system disk:vda1
I0321 22:53:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:53:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:53:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:53:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:53:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:53:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:53:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:53:23.409776  543705 memory.go:184] no items to output this cycle
I0321 22:53:23.409807  543705 cpu.go:275] no items to output this cycle
I0321 22:53:31.773517  543705 disk_info.go:125] begin check local disk info of client
I0321 22:53:31.776084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:53:31.776091  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0321 22:53:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:53:33.409795  543705 memory.go:184] no items to output this cycle
I0321 22:53:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 22:53:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:53:43.409776  543705 memory.go:191] Add success.
I0321 22:53:43.409808  543705 cpu.go:282] Add success.
I0321 22:53:43.419861  543705 net.go:648] Add success.
I0321 22:53:43.422479  543705 net.go:770] primary dev: ETH0
I0321 22:53:43.422491  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:53:43.422504  543705 net.go:698] Add success.
I0321 22:53:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:53:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:53:53.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:53:53.410266  543705 memory.go:184] no items to output this cycle
I0321 22:53:53.410266  543705 cpu.go:275] no items to output this cycle
E0321 22:54:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:54:03.409785  543705 memory.go:184] no items to output this cycle
I0321 22:54:03.409808  543705 cpu.go:275] no items to output this cycle
E0321 22:54:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:54:13.409792  543705 memory.go:191] Add success.
I0321 22:54:13.409796  543705 cpu.go:282] Add success.
W0321 22:54:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:54:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:54:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:54:13.420071  543705 net.go:648] Add success.
I0321 22:54:13.422569  543705 net.go:770] primary dev: ETH0
I0321 22:54:13.422583  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:54:13.422594  543705 net.go:698] Add success.
I0321 22:54:13.463746  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f7f60514-9759-4ed3-a898-afd6629ceb7d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:54:13.463780  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 22:54:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:54:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:54:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 22:54:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:54:14.456625  543705 disk_worker.go:494] system disk:vda1
I0321 22:54:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:54:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:54:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:54:16.458150  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:54:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:54:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:54:23.409787  543705 memory.go:184] no items to output this cycle
I0321 22:54:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 22:54:31.777533  543705 disk_info.go:125] begin check local disk info of client
I0321 22:54:31.780061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:54:31.780069  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0321 22:54:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:54:33.409812  543705 memory.go:184] no items to output this cycle
I0321 22:54:33.409817  543705 cpu.go:275] no items to output this cycle
I0321 22:54:39.297730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:54:39.297737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:54:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:54:43.410658  543705 memory.go:191] Add success.
I0321 22:54:43.409829  543705 cpu.go:282] Add success.
I0321 22:54:43.420406  543705 net.go:648] Add success.
I0321 22:54:43.423030  543705 net.go:770] primary dev: ETH0
I0321 22:54:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:54:43.423056  543705 net.go:698] Add success.
I0321 22:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:54:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:54:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:54:53.409778  543705 memory.go:184] no items to output this cycle
I0321 22:54:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 22:55:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:55:03.409789  543705 memory.go:184] no items to output this cycle
I0321 22:55:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 22:55:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:55:13.409813  543705 memory.go:191] Add success.
I0321 22:55:13.409819  543705 cpu.go:282] Add success.
W0321 22:55:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:55:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:55:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:55:13.420048  543705 net.go:648] Add success.
I0321 22:55:13.422600  543705 net.go:770] primary dev: ETH0
I0321 22:55:13.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:55:13.422626  543705 net.go:698] Add success.
I0321 22:55:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:55:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:55:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 22:55:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:55:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 22:55:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:55:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:55:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:55:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:55:16.472119  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:55:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:55:23.409810  543705 memory.go:184] no items to output this cycle
I0321 22:55:23.409815  543705 cpu.go:275] no items to output this cycle
I0321 22:55:31.781552  543705 disk_info.go:125] begin check local disk info of client
I0321 22:55:31.784190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:55:31.784197  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348000 0xc000348040]
E0321 22:55:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:55:33.409780  543705 memory.go:184] no items to output this cycle
I0321 22:55:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 22:55:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:55:43.409773  543705 memory.go:191] Add success.
I0321 22:55:43.409805  543705 cpu.go:282] Add success.
I0321 22:55:43.419923  543705 net.go:648] Add success.
I0321 22:55:43.422643  543705 net.go:770] primary dev: ETH0
I0321 22:55:43.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:55:43.422671  543705 net.go:698] Add success.
I0321 22:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:55:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:55:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:55:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:55:53.409785  543705 cpu.go:275] no items to output this cycle
I0321 22:55:53.409786  543705 memory.go:184] no items to output this cycle
E0321 22:56:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:56:03.409816  543705 memory.go:184] no items to output this cycle
I0321 22:56:03.409827  543705 cpu.go:275] no items to output this cycle
E0321 22:56:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:56:13.409788  543705 memory.go:191] Add success.
I0321 22:56:13.409805  543705 cpu.go:282] Add success.
W0321 22:56:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:56:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:56:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:56:13.420146  543705 net.go:648] Add success.
I0321 22:56:13.423005  543705 net.go:770] primary dev: ETH0
I0321 22:56:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:56:13.423031  543705 net.go:698] Add success.
I0321 22:56:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:56:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:56:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0321 22:56:14.455147  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:56:14.456464  543705 disk_worker.go:494] system disk:vda1
I0321 22:56:14.456508  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:56:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:56:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:56:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:56:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:56:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:56:23.410354  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:56:23.410371  543705 memory.go:184] no items to output this cycle
I0321 22:56:23.410396  543705 cpu.go:275] no items to output this cycle
I0321 22:56:31.785575  543705 disk_info.go:125] begin check local disk info of client
I0321 22:56:31.788143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:56:31.788151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0321 22:56:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:56:33.409797  543705 memory.go:184] no items to output this cycle
I0321 22:56:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 22:56:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:56:43.409782  543705 memory.go:191] Add success.
I0321 22:56:43.409816  543705 cpu.go:282] Add success.
I0321 22:56:43.419870  543705 net.go:648] Add success.
I0321 22:56:43.422484  543705 net.go:770] primary dev: ETH0
I0321 22:56:43.422497  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:56:43.422509  543705 net.go:698] Add success.
I0321 22:56:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:56:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:56:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:56:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:56:53.409783  543705 memory.go:184] no items to output this cycle
I0321 22:56:53.409797  543705 cpu.go:275] no items to output this cycle
E0321 22:57:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:57:03.409810  543705 memory.go:184] no items to output this cycle
I0321 22:57:03.409819  543705 cpu.go:275] no items to output this cycle
E0321 22:57:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:57:13.409780  543705 memory.go:191] Add success.
I0321 22:57:13.409812  543705 cpu.go:282] Add success.
W0321 22:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:57:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:57:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:57:13.420143  543705 net.go:648] Add success.
I0321 22:57:13.423251  543705 net.go:770] primary dev: ETH0
I0321 22:57:13.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:57:13.423280  543705 net.go:698] Add success.
I0321 22:57:13.429307  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 22:57:13.453554  543705 event_worker.go:152] Polling the log file for events...
I0321 22:57:13.463778  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ab6e614-fe43-4410-bcee-058d3eca532d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 22:57:13.463826  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 22:57:14.455082  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:57:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0321 22:57:14.455146  543705 disk_worker.go:728] disk inode is not compliant
E0321 22:57:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 22:57:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 22:57:14.456938  543705 custom_config.go:64] query custom config with name: gpu
I0321 22:57:14.456974  543705 disk_worker.go:494] system disk:vda1
I0321 22:57:14.457002  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 22:57:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 22:57:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:57:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 22:57:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 22:57:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:57:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:57:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:57:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:57:23.409767  543705 memory.go:184] no items to output this cycle
I0321 22:57:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 22:57:31.789594  543705 disk_info.go:125] begin check local disk info of client
I0321 22:57:31.792168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:57:31.792174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033c3c0 0xc00033c400]
E0321 22:57:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:57:33.409780  543705 cpu.go:275] no items to output this cycle
I0321 22:57:33.409783  543705 memory.go:184] no items to output this cycle
I0321 22:57:39.301735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 22:57:39.301742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 22:57:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:57:43.410846  543705 memory.go:191] Add success.
I0321 22:57:43.409819  543705 cpu.go:282] Add success.
I0321 22:57:43.420609  543705 net.go:648] Add success.
I0321 22:57:43.423296  543705 net.go:770] primary dev: ETH0
I0321 22:57:43.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:57:43.423326  543705 net.go:698] Add success.
I0321 22:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:57:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:57:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:57:53.409778  543705 memory.go:184] no items to output this cycle
I0321 22:57:53.409780  543705 cpu.go:275] no items to output this cycle
E0321 22:58:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:58:03.409807  543705 memory.go:184] no items to output this cycle
I0321 22:58:03.409821  543705 cpu.go:275] no items to output this cycle
E0321 22:58:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:58:13.409808  543705 memory.go:191] Add success.
I0321 22:58:13.409821  543705 cpu.go:282] Add success.
W0321 22:58:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:58:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:58:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:58:13.420125  543705 net.go:648] Add success.
I0321 22:58:13.422770  543705 net.go:770] primary dev: ETH0
I0321 22:58:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:58:13.422795  543705 net.go:698] Add success.
I0321 22:58:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:58:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:58:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0321 22:58:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:58:14.456501  543705 disk_worker.go:494] system disk:vda1
I0321 22:58:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:58:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:58:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:58:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:58:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:58:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:58:23.409771  543705 memory.go:184] no items to output this cycle
I0321 22:58:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 22:58:31.793617  543705 disk_info.go:125] begin check local disk info of client
I0321 22:58:31.796191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:58:31.796198  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c180 0xc00035c1c0]
E0321 22:58:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:58:33.409792  543705 memory.go:184] no items to output this cycle
I0321 22:58:33.409808  543705 cpu.go:275] no items to output this cycle
E0321 22:58:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:58:43.409786  543705 memory.go:191] Add success.
I0321 22:58:43.409811  543705 cpu.go:282] Add success.
I0321 22:58:43.419870  543705 net.go:648] Add success.
I0321 22:58:43.422841  543705 net.go:770] primary dev: ETH0
I0321 22:58:43.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:58:43.422867  543705 net.go:698] Add success.
I0321 22:58:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:58:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:58:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:58:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:58:53.409768  543705 memory.go:184] no items to output this cycle
I0321 22:58:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 22:59:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:59:03.409782  543705 memory.go:184] no items to output this cycle
I0321 22:59:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 22:59:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:59:13.409782  543705 memory.go:191] Add success.
I0321 22:59:13.409798  543705 cpu.go:282] Add success.
W0321 22:59:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 22:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 22:59:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 22:59:13.420217  543705 net.go:648] Add success.
I0321 22:59:13.422870  543705 net.go:770] primary dev: ETH0
I0321 22:59:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:59:13.422894  543705 net.go:698] Add success.
I0321 22:59:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 22:59:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 22:59:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 22:59:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 22:59:14.456598  543705 disk_worker.go:494] system disk:vda1
I0321 22:59:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 22:59:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 22:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:59:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:59:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0321 22:59:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 22:59:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:59:23.409782  543705 memory.go:184] no items to output this cycle
I0321 22:59:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 22:59:31.797629  543705 disk_info.go:125] begin check local disk info of client
I0321 22:59:31.800275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 22:59:31.800282  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a540 0xc00039a580]
E0321 22:59:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:59:33.409778  543705 memory.go:184] no items to output this cycle
I0321 22:59:33.409780  543705 cpu.go:275] no items to output this cycle
E0321 22:59:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:59:43.409788  543705 memory.go:191] Add success.
I0321 22:59:43.409790  543705 cpu.go:282] Add success.
I0321 22:59:43.419862  543705 net.go:648] Add success.
I0321 22:59:43.422903  543705 net.go:770] primary dev: ETH0
I0321 22:59:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0321 22:59:43.422928  543705 net.go:698] Add success.
I0321 22:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 22:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 22:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0321 22:59:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 22:59:53.409793  543705 memory.go:184] no items to output this cycle
I0321 22:59:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 23:00:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:00:03.409781  543705 memory.go:184] no items to output this cycle
I0321 23:00:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 23:00:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:00:13.409785  543705 cpu.go:282] Add success.
I0321 23:00:13.409789  543705 memory.go:191] Add success.
W0321 23:00:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:00:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:00:13.419990  543705 net.go:648] Add success.
I0321 23:00:13.422775  543705 net.go:770] primary dev: ETH0
I0321 23:00:13.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:00:13.422799  543705 net.go:698] Add success.
I0321 23:00:13.574029  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a846d7e7-01f7-462e-93fa-5c42d118668b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:00:13.574064  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:00:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:00:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:00:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 23:00:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:00:14.456705  543705 disk_worker.go:494] system disk:vda1
I0321 23:00:14.456739  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:00:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:00:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:00:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:00:23.409790  543705 memory.go:184] no items to output this cycle
I0321 23:00:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 23:00:31.801657  543705 disk_info.go:125] begin check local disk info of client
I0321 23:00:31.804197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:00:31.804203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046e8c0 0xc00046e900]
E0321 23:00:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:00:33.409767  543705 memory.go:184] no items to output this cycle
I0321 23:00:33.409801  543705 cpu.go:275] no items to output this cycle
I0321 23:00:39.305738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:00:39.305745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:00:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:00:43.410628  543705 memory.go:191] Add success.
I0321 23:00:43.409814  543705 cpu.go:282] Add success.
I0321 23:00:43.420325  543705 net.go:648] Add success.
I0321 23:00:43.422867  543705 net.go:770] primary dev: ETH0
I0321 23:00:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:00:43.422894  543705 net.go:698] Add success.
I0321 23:00:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:00:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:00:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:00:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:00:53.409776  543705 memory.go:184] no items to output this cycle
I0321 23:00:53.409792  543705 cpu.go:275] no items to output this cycle
E0321 23:01:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:01:03.409781  543705 memory.go:184] no items to output this cycle
I0321 23:01:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 23:01:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:01:13.409792  543705 memory.go:191] Add success.
I0321 23:01:13.409794  543705 cpu.go:282] Add success.
W0321 23:01:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:01:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:01:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:01:13.420054  543705 net.go:648] Add success.
I0321 23:01:13.422741  543705 net.go:770] primary dev: ETH0
I0321 23:01:13.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:01:13.422767  543705 net.go:698] Add success.
I0321 23:01:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:01:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:01:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 23:01:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:01:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 23:01:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:01:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:01:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:01:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:01:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:01:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:01:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:01:23.409778  543705 memory.go:184] no items to output this cycle
I0321 23:01:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 23:01:31.805673  543705 disk_info.go:125] begin check local disk info of client
I0321 23:01:31.808217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:01:31.808224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa1c0 0xc0001aa240]
E0321 23:01:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:01:33.409805  543705 memory.go:184] no items to output this cycle
I0321 23:01:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 23:01:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:01:43.409814  543705 memory.go:191] Add success.
I0321 23:01:43.409820  543705 cpu.go:282] Add success.
I0321 23:01:43.419920  543705 net.go:648] Add success.
I0321 23:01:43.422563  543705 net.go:770] primary dev: ETH0
I0321 23:01:43.422576  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:01:43.422588  543705 net.go:698] Add success.
I0321 23:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:01:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:01:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:01:53.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:01:53.410280  543705 memory.go:184] no items to output this cycle
I0321 23:01:53.410299  543705 cpu.go:275] no items to output this cycle
E0321 23:02:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:02:03.409784  543705 memory.go:184] no items to output this cycle
I0321 23:02:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 23:02:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:02:13.409810  543705 memory.go:191] Add success.
I0321 23:02:13.409814  543705 cpu.go:282] Add success.
W0321 23:02:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:02:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:02:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:02:13.420174  543705 net.go:648] Add success.
I0321 23:02:13.423134  543705 net.go:770] primary dev: ETH0
I0321 23:02:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:02:13.423158  543705 net.go:698] Add success.
W0321 23:02:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:02:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 23:02:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:02:14.456097  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:02:14.456107  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:02:14.456113  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:02:14.456451  543705 disk_worker.go:494] system disk:vda1
I0321 23:02:14.456481  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:02:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:02:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:02:16.457594  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:02:16.457679  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:02:16.457703  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:02:16.458276  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:02:16.472092  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:02:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:02:23.409780  543705 memory.go:184] no items to output this cycle
I0321 23:02:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 23:02:31.809676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:02:31.812210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:02:31.812217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490440 0xc000490480]
E0321 23:02:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:02:33.409790  543705 memory.go:184] no items to output this cycle
I0321 23:02:33.409807  543705 cpu.go:275] no items to output this cycle
E0321 23:02:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:02:43.409780  543705 memory.go:191] Add success.
I0321 23:02:43.409803  543705 cpu.go:282] Add success.
I0321 23:02:43.419868  543705 net.go:648] Add success.
I0321 23:02:43.422588  543705 net.go:770] primary dev: ETH0
I0321 23:02:43.422600  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:02:43.422612  543705 net.go:698] Add success.
I0321 23:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:02:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:02:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:02:53.409797  543705 memory.go:184] no items to output this cycle
I0321 23:02:53.409806  543705 cpu.go:275] no items to output this cycle
E0321 23:03:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:03:03.409788  543705 memory.go:184] no items to output this cycle
I0321 23:03:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 23:03:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:03:13.409815  543705 memory.go:191] Add success.
I0321 23:03:13.409816  543705 cpu.go:282] Add success.
W0321 23:03:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:03:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:03:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:03:13.420154  543705 net.go:648] Add success.
I0321 23:03:13.423126  543705 net.go:770] primary dev: ETH0
I0321 23:03:13.423141  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:03:13.423155  543705 net.go:698] Add success.
I0321 23:03:13.468992  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b94bd808-f1f9-4f73-a7d3-681e7e9fdecb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:03:13.469025  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:03:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:03:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:03:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 23:03:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:03:14.456622  543705 disk_worker.go:494] system disk:vda1
I0321 23:03:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:03:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:03:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:03:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:03:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:03:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:03:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:03:23.409786  543705 memory.go:184] no items to output this cycle
I0321 23:03:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 23:03:31.813681  543705 disk_info.go:125] begin check local disk info of client
I0321 23:03:31.816279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:03:31.816286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 23:03:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:03:33.409780  543705 memory.go:184] no items to output this cycle
I0321 23:03:33.409784  543705 cpu.go:275] no items to output this cycle
I0321 23:03:39.309734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:03:39.309740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:03:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:03:43.410729  543705 memory.go:191] Add success.
I0321 23:03:43.409814  543705 cpu.go:282] Add success.
I0321 23:03:43.420422  543705 net.go:648] Add success.
I0321 23:03:43.423094  543705 net.go:770] primary dev: ETH0
I0321 23:03:43.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:03:43.423119  543705 net.go:698] Add success.
I0321 23:03:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:03:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:03:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:03:53.409766  543705 memory.go:184] no items to output this cycle
I0321 23:03:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 23:04:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:04:03.409786  543705 memory.go:184] no items to output this cycle
I0321 23:04:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:04:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:04:13.409786  543705 memory.go:191] Add success.
I0321 23:04:13.409786  543705 cpu.go:282] Add success.
W0321 23:04:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:04:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:04:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:04:13.420130  543705 net.go:648] Add success.
I0321 23:04:13.422763  543705 net.go:770] primary dev: ETH0
I0321 23:04:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:04:13.422789  543705 net.go:698] Add success.
I0321 23:04:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:04:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:04:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 23:04:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:04:14.456497  543705 disk_worker.go:494] system disk:vda1
I0321 23:04:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:04:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:04:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:04:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:04:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 23:04:23.409786  543705 memory.go:184] no items to output this cycle
I0321 23:04:31.817673  543705 disk_info.go:125] begin check local disk info of client
I0321 23:04:31.820209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:04:31.820215  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375040 0xc000375080]
E0321 23:04:33.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:04:33.409882  543705 memory.go:184] no items to output this cycle
I0321 23:04:33.409994  543705 cpu.go:275] no items to output this cycle
E0321 23:04:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:04:43.409786  543705 memory.go:191] Add success.
I0321 23:04:43.409790  543705 cpu.go:282] Add success.
I0321 23:04:43.419865  543705 net.go:648] Add success.
I0321 23:04:43.422654  543705 net.go:770] primary dev: ETH0
I0321 23:04:43.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:04:43.422679  543705 net.go:698] Add success.
I0321 23:04:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:04:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:04:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:04:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:04:53.409770  543705 memory.go:184] no items to output this cycle
I0321 23:04:53.409845  543705 cpu.go:275] no items to output this cycle
E0321 23:05:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:05:03.409808  543705 memory.go:184] no items to output this cycle
I0321 23:05:03.409823  543705 cpu.go:275] no items to output this cycle
E0321 23:05:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:05:13.409787  543705 memory.go:191] Add success.
W0321 23:05:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 23:05:13.409813  543705 cpu.go:282] Add success.
W0321 23:05:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:05:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:05:13.420051  543705 net.go:648] Add success.
I0321 23:05:13.422998  543705 net.go:770] primary dev: ETH0
I0321 23:05:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:05:13.423026  543705 net.go:698] Add success.
I0321 23:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:05:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:05:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 23:05:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:05:14.456504  543705 disk_worker.go:494] system disk:vda1
I0321 23:05:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:05:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:05:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:05:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:05:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:05:23.409784  543705 memory.go:184] no items to output this cycle
I0321 23:05:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 23:05:31.821676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:05:31.824304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:05:31.824314  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002839c0 0xc000283a00]
E0321 23:05:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:05:33.409801  543705 memory.go:184] no items to output this cycle
I0321 23:05:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 23:05:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:05:43.409815  543705 memory.go:191] Add success.
I0321 23:05:43.409821  543705 cpu.go:282] Add success.
I0321 23:05:43.419948  543705 net.go:648] Add success.
I0321 23:05:43.422651  543705 net.go:770] primary dev: ETH0
I0321 23:05:43.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:05:43.422678  543705 net.go:698] Add success.
I0321 23:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:05:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:05:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:05:53.409786  543705 memory.go:184] no items to output this cycle
I0321 23:05:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:06:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:06:03.409781  543705 memory.go:184] no items to output this cycle
I0321 23:06:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 23:06:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:06:13.409803  543705 memory.go:191] Add success.
I0321 23:06:13.409815  543705 cpu.go:282] Add success.
W0321 23:06:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:06:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:06:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:06:13.420121  543705 net.go:648] Add success.
I0321 23:06:13.422892  543705 net.go:770] primary dev: ETH0
I0321 23:06:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:06:13.422917  543705 net.go:698] Add success.
I0321 23:06:13.463702  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"132652b2-4a2c-4747-b2e7-03119ccdb341","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:06:13.463735  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:06:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:06:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:06:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 23:06:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:06:14.456677  543705 disk_worker.go:494] system disk:vda1
I0321 23:06:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:06:15.455613  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:06:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:06:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:06:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:06:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:06:23.409768  543705 memory.go:184] no items to output this cycle
I0321 23:06:23.409789  543705 cpu.go:275] no items to output this cycle
I0321 23:06:31.825680  543705 disk_info.go:125] begin check local disk info of client
I0321 23:06:31.828232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:06:31.828239  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037c980 0xc00037c9c0]
E0321 23:06:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:06:33.409790  543705 memory.go:184] no items to output this cycle
I0321 23:06:33.409807  543705 cpu.go:275] no items to output this cycle
I0321 23:06:39.313742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:06:39.313749  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:06:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:06:43.410643  543705 memory.go:191] Add success.
I0321 23:06:43.409802  543705 cpu.go:282] Add success.
I0321 23:06:43.420331  543705 net.go:648] Add success.
I0321 23:06:43.422967  543705 net.go:770] primary dev: ETH0
I0321 23:06:43.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:06:43.422992  543705 net.go:698] Add success.
I0321 23:06:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:06:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:06:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:06:53.409805  543705 memory.go:184] no items to output this cycle
I0321 23:06:53.409812  543705 cpu.go:275] no items to output this cycle
E0321 23:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:07:03.409782  543705 memory.go:184] no items to output this cycle
I0321 23:07:03.409784  543705 cpu.go:275] no items to output this cycle
E0321 23:07:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:07:13.409795  543705 memory.go:191] Add success.
I0321 23:07:13.409795  543705 cpu.go:282] Add success.
W0321 23:07:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:07:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:07:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:07:13.420058  543705 net.go:648] Add success.
I0321 23:07:13.423053  543705 net.go:770] primary dev: ETH0
I0321 23:07:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:07:13.423078  543705 net.go:698] Add success.
I0321 23:07:13.453612  543705 event_worker.go:152] Polling the log file for events...
W0321 23:07:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:07:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0321 23:07:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:07:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:07:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:07:14.455919  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:07:14.456539  543705 disk_worker.go:494] system disk:vda1
I0321 23:07:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:07:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:07:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:07:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:07:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:07:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:07:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:07:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:07:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:07:23.409776  543705 memory.go:184] no items to output this cycle
I0321 23:07:23.409783  543705 cpu.go:275] no items to output this cycle
I0321 23:07:31.829679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:07:31.832217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:07:31.832223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002951c0 0xc000295200]
E0321 23:07:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:07:33.409795  543705 memory.go:184] no items to output this cycle
I0321 23:07:33.409817  543705 cpu.go:275] no items to output this cycle
E0321 23:07:43.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:07:43.409921  543705 cpu.go:282] Add success.
I0321 23:07:43.409927  543705 memory.go:191] Add success.
I0321 23:07:43.419723  543705 net.go:648] Add success.
I0321 23:07:43.422503  543705 net.go:770] primary dev: ETH0
I0321 23:07:43.422516  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:07:43.422528  543705 net.go:698] Add success.
I0321 23:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:07:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:07:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:07:53.410529  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:07:53.410550  543705 memory.go:184] no items to output this cycle
I0321 23:07:53.410559  543705 cpu.go:275] no items to output this cycle
E0321 23:08:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:08:03.409787  543705 memory.go:184] no items to output this cycle
I0321 23:08:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 23:08:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:08:13.409795  543705 cpu.go:282] Add success.
I0321 23:08:13.409798  543705 memory.go:191] Add success.
W0321 23:08:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:08:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:08:13.420050  543705 net.go:648] Add success.
I0321 23:08:13.422521  543705 net.go:770] primary dev: ETH0
I0321 23:08:13.422533  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:08:13.422545  543705 net.go:698] Add success.
I0321 23:08:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:08:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:08:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0321 23:08:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:08:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 23:08:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:08:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:08:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:08:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:08:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:08:23.409785  543705 memory.go:184] no items to output this cycle
I0321 23:08:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 23:08:31.833683  543705 disk_info.go:125] begin check local disk info of client
I0321 23:08:31.836289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:08:31.836296  543705 disk_info.go:196] parse disk info done, disk is : [0xc000294300 0xc000294380]
E0321 23:08:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:08:33.409801  543705 memory.go:184] no items to output this cycle
I0321 23:08:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 23:08:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:08:43.409799  543705 memory.go:191] Add success.
I0321 23:08:43.409813  543705 cpu.go:282] Add success.
I0321 23:08:43.420007  543705 net.go:648] Add success.
I0321 23:08:43.422780  543705 net.go:770] primary dev: ETH0
I0321 23:08:43.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:08:43.422808  543705 net.go:698] Add success.
I0321 23:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:08:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:08:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:08:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:08:53.409818  543705 memory.go:184] no items to output this cycle
I0321 23:08:53.409826  543705 cpu.go:275] no items to output this cycle
E0321 23:09:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:09:03.409777  543705 memory.go:184] no items to output this cycle
I0321 23:09:03.409791  543705 cpu.go:275] no items to output this cycle
W0321 23:09:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:09:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:09:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:09:13.409840  543705 cpu.go:282] Add success.
E0321 23:09:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:09:13.409859  543705 memory.go:191] Add success.
I0321 23:09:13.420201  543705 net.go:648] Add success.
I0321 23:09:13.423040  543705 net.go:770] primary dev: ETH0
I0321 23:09:13.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:09:13.423065  543705 net.go:698] Add success.
I0321 23:09:13.469376  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fb14f337-3d36-4740-a528-cba8d84098ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:09:13.469409  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:09:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:09:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:09:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0321 23:09:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:09:14.456586  543705 disk_worker.go:494] system disk:vda1
I0321 23:09:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:09:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:09:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:09:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:09:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:09:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:09:23.409766  543705 memory.go:184] no items to output this cycle
I0321 23:09:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 23:09:31.837676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:09:31.840289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:09:31.840296  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a680 0xc00027a6c0]
E0321 23:09:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:09:33.409781  543705 cpu.go:275] no items to output this cycle
I0321 23:09:33.409781  543705 memory.go:184] no items to output this cycle
I0321 23:09:39.317736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:09:39.317744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:09:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:09:43.410669  543705 memory.go:191] Add success.
I0321 23:09:43.409793  543705 cpu.go:282] Add success.
I0321 23:09:43.420523  543705 net.go:648] Add success.
I0321 23:09:43.423244  543705 net.go:770] primary dev: ETH0
I0321 23:09:43.423257  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:09:43.423268  543705 net.go:698] Add success.
I0321 23:09:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:09:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:09:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:09:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:09:53.409777  543705 memory.go:184] no items to output this cycle
I0321 23:09:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 23:10:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:10:03.409784  543705 memory.go:184] no items to output this cycle
I0321 23:10:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 23:10:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:10:13.409809  543705 memory.go:191] Add success.
I0321 23:10:13.409815  543705 cpu.go:282] Add success.
W0321 23:10:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:10:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:10:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:10:13.420151  543705 net.go:648] Add success.
I0321 23:10:13.422683  543705 net.go:770] primary dev: ETH0
I0321 23:10:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:10:13.422710  543705 net.go:698] Add success.
I0321 23:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:10:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:10:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0321 23:10:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:10:14.456571  543705 disk_worker.go:494] system disk:vda1
I0321 23:10:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:10:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:10:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:10:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:10:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:10:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:10:23.409765  543705 memory.go:184] no items to output this cycle
I0321 23:10:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 23:10:31.841677  543705 disk_info.go:125] begin check local disk info of client
I0321 23:10:31.844210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:10:31.844217  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b080 0xc00007b0c0]
E0321 23:10:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:10:33.409766  543705 memory.go:184] no items to output this cycle
I0321 23:10:33.409778  543705 cpu.go:275] no items to output this cycle
E0321 23:10:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:10:43.409786  543705 memory.go:191] Add success.
I0321 23:10:43.409807  543705 cpu.go:282] Add success.
I0321 23:10:43.420023  543705 net.go:648] Add success.
I0321 23:10:43.423244  543705 net.go:770] primary dev: ETH0
I0321 23:10:43.423257  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:10:43.423271  543705 net.go:698] Add success.
I0321 23:10:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:10:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:10:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:10:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:10:53.409875  543705 cpu.go:275] no items to output this cycle
I0321 23:10:53.409891  543705 memory.go:184] no items to output this cycle
E0321 23:11:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:11:03.409782  543705 cpu.go:275] no items to output this cycle
I0321 23:11:03.409783  543705 memory.go:184] no items to output this cycle
E0321 23:11:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:11:13.409810  543705 memory.go:191] Add success.
I0321 23:11:13.409819  543705 cpu.go:282] Add success.
W0321 23:11:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:11:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:11:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:11:13.420174  543705 net.go:648] Add success.
I0321 23:11:13.423297  543705 net.go:770] primary dev: ETH0
I0321 23:11:13.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:11:13.423322  543705 net.go:698] Add success.
I0321 23:11:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:11:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:11:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0321 23:11:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:11:14.456499  543705 disk_worker.go:494] system disk:vda1
I0321 23:11:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:11:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:11:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:11:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:11:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:11:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:11:23.409779  543705 memory.go:184] no items to output this cycle
I0321 23:11:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 23:11:31.845679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:11:31.848266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:11:31.848273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae80 0xc0001aaec0]
E0321 23:11:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:11:33.409772  543705 memory.go:184] no items to output this cycle
I0321 23:11:33.409772  543705 cpu.go:275] no items to output this cycle
E0321 23:11:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:11:43.409787  543705 memory.go:191] Add success.
I0321 23:11:43.409806  543705 cpu.go:282] Add success.
I0321 23:11:43.419852  543705 net.go:648] Add success.
I0321 23:11:43.422627  543705 net.go:770] primary dev: ETH0
I0321 23:11:43.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:11:43.422654  543705 net.go:698] Add success.
I0321 23:11:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:11:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:11:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:11:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:11:53.410269  543705 memory.go:184] no items to output this cycle
I0321 23:11:53.410292  543705 cpu.go:275] no items to output this cycle
E0321 23:12:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:12:03.409777  543705 memory.go:184] no items to output this cycle
I0321 23:12:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 23:12:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:12:13.409797  543705 memory.go:191] Add success.
I0321 23:12:13.409798  543705 cpu.go:282] Add success.
W0321 23:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:12:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:12:13.420174  543705 net.go:648] Add success.
I0321 23:12:13.423207  543705 net.go:770] primary dev: ETH0
I0321 23:12:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:12:13.423234  543705 net.go:698] Add success.
I0321 23:12:13.475798  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44e40f85-3b01-4ca3-944a-a894daf0fb92","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:12:13.475830  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 23:12:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:12:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0321 23:12:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:12:14.455998  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:12:14.456007  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:12:14.456012  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:12:14.456441  543705 disk_worker.go:494] system disk:vda1
I0321 23:12:14.456469  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:12:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:12:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 23:12:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:12:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:12:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:12:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:12:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:12:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:12:23.409798  543705 memory.go:184] no items to output this cycle
I0321 23:12:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 23:12:31.849678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:12:31.852232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:12:31.852239  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491bc0 0xc000491c00]
E0321 23:12:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:12:33.409771  543705 memory.go:184] no items to output this cycle
I0321 23:12:33.409779  543705 cpu.go:275] no items to output this cycle
I0321 23:12:39.321732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:12:39.321739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:12:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:12:43.410642  543705 memory.go:191] Add success.
I0321 23:12:43.409815  543705 cpu.go:282] Add success.
I0321 23:12:43.420342  543705 net.go:648] Add success.
I0321 23:12:43.422940  543705 net.go:770] primary dev: ETH0
I0321 23:12:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:12:43.422963  543705 net.go:698] Add success.
I0321 23:12:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:12:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:12:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:12:53.409777  543705 memory.go:184] no items to output this cycle
I0321 23:12:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 23:13:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:13:03.409765  543705 memory.go:184] no items to output this cycle
I0321 23:13:03.409803  543705 cpu.go:275] no items to output this cycle
E0321 23:13:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:13:13.409814  543705 memory.go:191] Add success.
I0321 23:13:13.409819  543705 cpu.go:282] Add success.
W0321 23:13:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:13:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:13:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:13:13.420286  543705 net.go:648] Add success.
I0321 23:13:13.422992  543705 net.go:770] primary dev: ETH0
I0321 23:13:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:13:13.423018  543705 net.go:698] Add success.
I0321 23:13:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:13:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:13:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 23:13:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:13:14.456502  543705 disk_worker.go:494] system disk:vda1
I0321 23:13:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:13:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:13:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:13:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:13:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:13:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:13:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:13:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 23:13:23.409782  543705 memory.go:184] no items to output this cycle
I0321 23:13:31.853680  543705 disk_info.go:125] begin check local disk info of client
I0321 23:13:31.856229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:13:31.856235  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b2c0 0xc00007b300]
E0321 23:13:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:13:33.409792  543705 memory.go:184] no items to output this cycle
I0321 23:13:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 23:13:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:13:43.409782  543705 memory.go:191] Add success.
I0321 23:13:43.409802  543705 cpu.go:282] Add success.
I0321 23:13:43.419913  543705 net.go:648] Add success.
I0321 23:13:43.422678  543705 net.go:770] primary dev: ETH0
I0321 23:13:43.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:13:43.422703  543705 net.go:698] Add success.
I0321 23:13:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:13:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:13:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:13:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:13:53.409797  543705 memory.go:184] no items to output this cycle
I0321 23:13:53.409810  543705 cpu.go:275] no items to output this cycle
E0321 23:14:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:14:03.409795  543705 memory.go:184] no items to output this cycle
I0321 23:14:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 23:14:13.409832  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:14:13.409859  543705 memory.go:191] Add success.
I0321 23:14:13.409945  543705 cpu.go:282] Add success.
W0321 23:14:13.409958  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:14:13.409980  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:14:13.409985  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:14:13.419717  543705 net.go:648] Add success.
I0321 23:14:13.422027  543705 net.go:770] primary dev: ETH0
I0321 23:14:13.422040  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:14:13.422051  543705 net.go:698] Add success.
I0321 23:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:14:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:14:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0321 23:14:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:14:14.456486  543705 disk_worker.go:494] system disk:vda1
I0321 23:14:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:14:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:14:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:14:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:14:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:14:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:14:23.409772  543705 memory.go:184] no items to output this cycle
I0321 23:14:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 23:14:31.857678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:14:31.860275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:14:31.860283  543705 disk_info.go:196] parse disk info done, disk is : [0xc000349240 0xc000349280]
E0321 23:14:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:14:33.409770  543705 memory.go:184] no items to output this cycle
I0321 23:14:33.409792  543705 cpu.go:275] no items to output this cycle
E0321 23:14:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:14:43.409808  543705 memory.go:191] Add success.
I0321 23:14:43.409817  543705 cpu.go:282] Add success.
I0321 23:14:43.419863  543705 net.go:648] Add success.
I0321 23:14:43.422748  543705 net.go:770] primary dev: ETH0
I0321 23:14:43.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:14:43.422774  543705 net.go:698] Add success.
I0321 23:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:14:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:14:53.410361  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:14:53.410380  543705 memory.go:184] no items to output this cycle
I0321 23:14:53.410391  543705 cpu.go:275] no items to output this cycle
E0321 23:15:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:15:03.409794  543705 memory.go:184] no items to output this cycle
I0321 23:15:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:15:13.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:15:13.409898  543705 memory.go:191] Add success.
W0321 23:15:13.409924  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:15:13.409936  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:15:13.409939  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:15:13.410026  543705 cpu.go:282] Add success.
I0321 23:15:13.419717  543705 net.go:648] Add success.
I0321 23:15:13.422403  543705 net.go:770] primary dev: ETH0
I0321 23:15:13.422416  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:15:13.422427  543705 net.go:698] Add success.
I0321 23:15:13.463407  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"693560e6-61b5-43a3-a7ae-c60bffbc021a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:15:13.463436  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:15:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:15:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:15:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0321 23:15:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:15:14.456469  543705 disk_worker.go:494] system disk:vda1
I0321 23:15:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:15:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:15:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:15:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:15:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:15:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:15:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:15:23.409778  543705 memory.go:184] no items to output this cycle
I0321 23:15:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 23:15:31.861676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:15:31.864211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:15:31.864218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa8c0 0xc0001aa900]
E0321 23:15:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:15:33.409794  543705 memory.go:184] no items to output this cycle
I0321 23:15:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 23:15:39.325730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:15:39.325737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:15:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:15:43.410765  543705 memory.go:191] Add success.
I0321 23:15:43.409793  543705 cpu.go:282] Add success.
I0321 23:15:43.420491  543705 net.go:648] Add success.
I0321 23:15:43.422995  543705 net.go:770] primary dev: ETH0
I0321 23:15:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:15:43.423020  543705 net.go:698] Add success.
I0321 23:15:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:15:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:15:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:15:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:15:53.409799  543705 memory.go:184] no items to output this cycle
I0321 23:15:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 23:16:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:16:03.409781  543705 memory.go:184] no items to output this cycle
I0321 23:16:03.409787  543705 cpu.go:275] no items to output this cycle
E0321 23:16:13.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:16:13.409902  543705 memory.go:191] Add success.
I0321 23:16:13.409935  543705 cpu.go:282] Add success.
W0321 23:16:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:16:13.409972  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:16:13.409975  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:16:13.419713  543705 net.go:648] Add success.
I0321 23:16:13.422291  543705 net.go:770] primary dev: ETH0
I0321 23:16:13.422304  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:16:13.422315  543705 net.go:698] Add success.
I0321 23:16:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:16:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:16:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0321 23:16:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:16:14.456473  543705 disk_worker.go:494] system disk:vda1
I0321 23:16:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:16:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:16:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:16:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:16:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:16:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:16:23.409771  543705 memory.go:184] no items to output this cycle
I0321 23:16:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 23:16:31.865679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:16:31.868199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:16:31.868206  543705 disk_info.go:196] parse disk info done, disk is : [0xc000538680 0xc0005386c0]
E0321 23:16:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:16:33.409793  543705 memory.go:184] no items to output this cycle
I0321 23:16:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 23:16:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:16:43.409793  543705 memory.go:191] Add success.
I0321 23:16:43.409815  543705 cpu.go:282] Add success.
I0321 23:16:43.420075  543705 net.go:648] Add success.
I0321 23:16:43.422711  543705 net.go:770] primary dev: ETH0
I0321 23:16:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:16:43.422748  543705 net.go:698] Add success.
I0321 23:16:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:16:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:16:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:16:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:16:53.409787  543705 memory.go:184] no items to output this cycle
I0321 23:16:53.409790  543705 cpu.go:275] no items to output this cycle
E0321 23:17:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:17:03.409786  543705 memory.go:184] no items to output this cycle
I0321 23:17:03.409792  543705 cpu.go:275] no items to output this cycle
E0321 23:17:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:17:13.409802  543705 memory.go:191] Add success.
I0321 23:17:13.409806  543705 cpu.go:282] Add success.
W0321 23:17:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:17:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:17:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:17:13.420123  543705 net.go:648] Add success.
I0321 23:17:13.422774  543705 net.go:770] primary dev: ETH0
I0321 23:17:13.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:17:13.422799  543705 net.go:698] Add success.
I0321 23:17:13.453350  543705 event_worker.go:152] Polling the log file for events...
W0321 23:17:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:17:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0321 23:17:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:17:14.456906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:17:14.456916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:17:14.456922  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:17:14.456968  543705 disk_worker.go:494] system disk:vda1
I0321 23:17:14.457006  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:17:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:17:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:17:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:17:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:17:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:17:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:17:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:17:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:17:23.409786  543705 memory.go:184] no items to output this cycle
I0321 23:17:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 23:17:31.869676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:17:31.872259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:17:31.872265  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b500 0xc00007b540]
E0321 23:17:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:17:33.409805  543705 memory.go:184] no items to output this cycle
I0321 23:17:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 23:17:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:17:43.409814  543705 memory.go:191] Add success.
I0321 23:17:43.409814  543705 cpu.go:282] Add success.
I0321 23:17:43.420328  543705 net.go:648] Add success.
I0321 23:17:43.423422  543705 net.go:770] primary dev: ETH0
I0321 23:17:43.423435  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:17:43.423447  543705 net.go:698] Add success.
I0321 23:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:17:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:17:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:17:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:17:53.409786  543705 memory.go:184] no items to output this cycle
I0321 23:17:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 23:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:18:03.409779  543705 memory.go:184] no items to output this cycle
I0321 23:18:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:18:13.409814  543705 memory.go:191] Add success.
I0321 23:18:13.409823  543705 cpu.go:282] Add success.
W0321 23:18:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:18:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:18:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:18:13.420164  543705 net.go:648] Add success.
I0321 23:18:13.423207  543705 net.go:770] primary dev: ETH0
I0321 23:18:13.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:18:13.423234  543705 net.go:698] Add success.
I0321 23:18:13.469355  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db8b7b12-74b0-4642-862a-8638cca95fcc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:18:13.469387  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:18:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:18:14.455086  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:18:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0321 23:18:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:18:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 23:18:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:18:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:18:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:18:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:18:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:18:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:18:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:18:23.409777  543705 memory.go:184] no items to output this cycle
I0321 23:18:23.409803  543705 cpu.go:275] no items to output this cycle
I0321 23:18:31.873679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:18:31.876265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:18:31.876272  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348300 0xc000348340]
E0321 23:18:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:18:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 23:18:33.409794  543705 memory.go:184] no items to output this cycle
I0321 23:18:39.329754  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:18:39.329762  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:18:43.410611  543705 memory.go:191] Add success.
I0321 23:18:43.409829  543705 cpu.go:282] Add success.
I0321 23:18:43.420389  543705 net.go:648] Add success.
I0321 23:18:43.422986  543705 net.go:770] primary dev: ETH0
I0321 23:18:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:18:43.423013  543705 net.go:698] Add success.
I0321 23:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:18:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:18:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:18:53.409809  543705 memory.go:184] no items to output this cycle
I0321 23:18:53.409821  543705 cpu.go:275] no items to output this cycle
E0321 23:19:03.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:19:03.409876  543705 memory.go:184] no items to output this cycle
I0321 23:19:03.410029  543705 cpu.go:275] no items to output this cycle
E0321 23:19:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:19:13.409776  543705 memory.go:191] Add success.
W0321 23:19:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:19:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:19:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:19:13.409831  543705 cpu.go:282] Add success.
I0321 23:19:13.420268  543705 net.go:648] Add success.
I0321 23:19:13.423200  543705 net.go:770] primary dev: ETH0
I0321 23:19:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:19:13.423240  543705 net.go:698] Add success.
I0321 23:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:19:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:19:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0321 23:19:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:19:14.456569  543705 disk_worker.go:494] system disk:vda1
I0321 23:19:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:19:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:19:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:19:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:19:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:19:23.409785  543705 memory.go:184] no items to output this cycle
I0321 23:19:23.409811  543705 cpu.go:275] no items to output this cycle
I0321 23:19:31.877680  543705 disk_info.go:125] begin check local disk info of client
I0321 23:19:31.880272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:19:31.880281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aadc0 0xc0001aae00]
E0321 23:19:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:19:33.409800  543705 memory.go:184] no items to output this cycle
I0321 23:19:33.409812  543705 cpu.go:275] no items to output this cycle
E0321 23:19:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:19:43.409795  543705 memory.go:191] Add success.
I0321 23:19:43.409796  543705 cpu.go:282] Add success.
I0321 23:19:43.420248  543705 net.go:648] Add success.
I0321 23:19:43.423239  543705 net.go:770] primary dev: ETH0
I0321 23:19:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:19:43.423265  543705 net.go:698] Add success.
I0321 23:19:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:19:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:19:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:19:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:19:53.409771  543705 memory.go:184] no items to output this cycle
I0321 23:19:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 23:20:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:20:03.409773  543705 memory.go:184] no items to output this cycle
I0321 23:20:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:20:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:20:13.409799  543705 memory.go:191] Add success.
I0321 23:20:13.409801  543705 cpu.go:282] Add success.
W0321 23:20:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:20:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:20:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:20:13.420055  543705 net.go:648] Add success.
I0321 23:20:13.422814  543705 net.go:770] primary dev: ETH0
I0321 23:20:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:20:13.422842  543705 net.go:698] Add success.
I0321 23:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:20:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:20:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 23:20:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:20:14.456572  543705 disk_worker.go:494] system disk:vda1
I0321 23:20:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:20:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:20:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:20:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:20:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:20:23.409776  543705 memory.go:184] no items to output this cycle
I0321 23:20:23.409797  543705 cpu.go:275] no items to output this cycle
I0321 23:20:31.881679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:20:31.884226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:20:31.884233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d81c0 0xc0004d8200]
E0321 23:20:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:20:33.409796  543705 memory.go:184] no items to output this cycle
I0321 23:20:33.409819  543705 cpu.go:275] no items to output this cycle
E0321 23:20:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:20:43.409789  543705 memory.go:191] Add success.
I0321 23:20:43.409808  543705 cpu.go:282] Add success.
I0321 23:20:43.419873  543705 net.go:648] Add success.
I0321 23:20:43.422713  543705 net.go:770] primary dev: ETH0
I0321 23:20:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:20:43.422738  543705 net.go:698] Add success.
I0321 23:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:20:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:20:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:20:53.409773  543705 memory.go:184] no items to output this cycle
I0321 23:20:53.409777  543705 cpu.go:275] no items to output this cycle
E0321 23:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:21:03.409798  543705 memory.go:184] no items to output this cycle
I0321 23:21:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 23:21:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:21:13.409779  543705 memory.go:191] Add success.
W0321 23:21:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 23:21:13.409811  543705 cpu.go:282] Add success.
W0321 23:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:21:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:21:13.420410  543705 net.go:648] Add success.
I0321 23:21:13.423340  543705 net.go:770] primary dev: ETH0
I0321 23:21:13.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:21:13.423368  543705 net.go:698] Add success.
I0321 23:21:13.467447  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"282c0784-fe33-4bee-86a2-7dffe1c270db","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:21:13.467477  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:21:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:21:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:21:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0321 23:21:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:21:14.456499  543705 disk_worker.go:494] system disk:vda1
I0321 23:21:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:21:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:21:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:21:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:21:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:21:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:21:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:21:23.409805  543705 memory.go:184] no items to output this cycle
I0321 23:21:23.409816  543705 cpu.go:275] no items to output this cycle
I0321 23:21:31.885681  543705 disk_info.go:125] begin check local disk info of client
I0321 23:21:31.888286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:21:31.888294  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366900 0xc000366940]
E0321 23:21:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:21:33.409778  543705 memory.go:184] no items to output this cycle
I0321 23:21:33.409792  543705 cpu.go:275] no items to output this cycle
I0321 23:21:39.333733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:21:39.333739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:21:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:21:43.410620  543705 memory.go:191] Add success.
I0321 23:21:43.409810  543705 cpu.go:282] Add success.
I0321 23:21:43.420337  543705 net.go:648] Add success.
I0321 23:21:43.422816  543705 net.go:770] primary dev: ETH0
I0321 23:21:43.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:21:43.422845  543705 net.go:698] Add success.
I0321 23:21:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:21:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:21:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:21:53.409781  543705 memory.go:184] no items to output this cycle
I0321 23:21:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 23:22:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:22:03.409763  543705 memory.go:184] no items to output this cycle
I0321 23:22:03.409801  543705 cpu.go:275] no items to output this cycle
E0321 23:22:13.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:22:13.409892  543705 memory.go:191] Add success.
W0321 23:22:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:22:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:22:13.409942  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:22:13.409952  543705 cpu.go:282] Add success.
I0321 23:22:13.419710  543705 net.go:648] Add success.
I0321 23:22:13.422098  543705 net.go:770] primary dev: ETH0
I0321 23:22:13.422110  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:22:13.422122  543705 net.go:698] Add success.
W0321 23:22:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:22:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0321 23:22:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:22:14.455939  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:22:14.455948  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:22:14.455954  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:22:14.456546  543705 disk_worker.go:494] system disk:vda1
I0321 23:22:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:22:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:22:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:22:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:22:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:22:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:22:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:22:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:22:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:22:23.409798  543705 memory.go:184] no items to output this cycle
I0321 23:22:23.409809  543705 cpu.go:275] no items to output this cycle
I0321 23:22:31.889682  543705 disk_info.go:125] begin check local disk info of client
I0321 23:22:31.892265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:22:31.892274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1300 0xc0003c1340]
E0321 23:22:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:22:33.409779  543705 memory.go:184] no items to output this cycle
I0321 23:22:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 23:22:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:22:43.409794  543705 memory.go:191] Add success.
I0321 23:22:43.409796  543705 cpu.go:282] Add success.
I0321 23:22:43.419864  543705 net.go:648] Add success.
I0321 23:22:43.422433  543705 net.go:770] primary dev: ETH0
I0321 23:22:43.422447  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:22:43.422459  543705 net.go:698] Add success.
I0321 23:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:22:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:22:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:22:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:22:53.410403  543705 memory.go:184] no items to output this cycle
I0321 23:22:53.410409  543705 cpu.go:275] no items to output this cycle
E0321 23:23:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:23:03.409785  543705 memory.go:184] no items to output this cycle
I0321 23:23:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 23:23:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:23:13.409899  543705 memory.go:191] Add success.
I0321 23:23:13.409936  543705 cpu.go:282] Add success.
W0321 23:23:13.409938  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:23:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:23:13.409955  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:23:13.419723  543705 net.go:648] Add success.
I0321 23:23:13.422470  543705 net.go:770] primary dev: ETH0
I0321 23:23:13.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:23:13.422500  543705 net.go:698] Add success.
I0321 23:23:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:23:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:23:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 23:23:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:23:14.456831  543705 disk_worker.go:494] system disk:vda1
I0321 23:23:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:23:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:23:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:23:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:23:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:23:23.409765  543705 memory.go:184] no items to output this cycle
I0321 23:23:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 23:23:31.893683  543705 disk_info.go:125] begin check local disk info of client
I0321 23:23:31.896314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:23:31.896322  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ba40 0xc00047ba80]
E0321 23:23:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:23:33.409773  543705 memory.go:184] no items to output this cycle
I0321 23:23:33.409787  543705 cpu.go:275] no items to output this cycle
E0321 23:23:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:23:43.409780  543705 memory.go:191] Add success.
I0321 23:23:43.409811  543705 cpu.go:282] Add success.
I0321 23:23:43.419864  543705 net.go:648] Add success.
I0321 23:23:43.422598  543705 net.go:770] primary dev: ETH0
I0321 23:23:43.422618  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:23:43.422638  543705 net.go:698] Add success.
I0321 23:23:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:23:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:23:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:23:53.410347  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:23:53.410364  543705 memory.go:184] no items to output this cycle
I0321 23:23:53.410378  543705 cpu.go:275] no items to output this cycle
E0321 23:24:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:24:03.409776  543705 memory.go:184] no items to output this cycle
I0321 23:24:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 23:24:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:24:13.409808  543705 memory.go:191] Add success.
W0321 23:24:13.409918  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:24:13.409935  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:24:13.409938  543705 cpu.go:282] Add success.
I0321 23:24:13.409938  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:24:13.419715  543705 net.go:648] Add success.
I0321 23:24:13.422583  543705 net.go:770] primary dev: ETH0
I0321 23:24:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:24:13.422607  543705 net.go:698] Add success.
I0321 23:24:13.463000  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d602ec92-b30e-4ece-b445-0945cb4468dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:24:13.463030  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:24:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:24:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:24:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0321 23:24:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:24:14.456707  543705 disk_worker.go:494] system disk:vda1
I0321 23:24:14.456736  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:24:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:24:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:24:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:24:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:24:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:24:23.409782  543705 memory.go:184] no items to output this cycle
I0321 23:24:23.409787  543705 cpu.go:275] no items to output this cycle
I0321 23:24:31.897683  543705 disk_info.go:125] begin check local disk info of client
I0321 23:24:31.900257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:24:31.900265  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b080 0xc00007b0c0]
E0321 23:24:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:24:33.409765  543705 memory.go:184] no items to output this cycle
I0321 23:24:33.409803  543705 cpu.go:275] no items to output this cycle
I0321 23:24:39.337737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:24:39.337744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:24:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:24:43.410545  543705 memory.go:191] Add success.
I0321 23:24:43.409809  543705 cpu.go:282] Add success.
I0321 23:24:43.420234  543705 net.go:648] Add success.
I0321 23:24:43.422826  543705 net.go:770] primary dev: ETH0
I0321 23:24:43.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:24:43.422855  543705 net.go:698] Add success.
I0321 23:24:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:24:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:24:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:24:53.409798  543705 memory.go:184] no items to output this cycle
I0321 23:24:53.409804  543705 cpu.go:275] no items to output this cycle
E0321 23:25:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:25:03.409782  543705 memory.go:184] no items to output this cycle
I0321 23:25:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:25:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:25:13.409790  543705 memory.go:191] Add success.
I0321 23:25:13.409814  543705 cpu.go:282] Add success.
W0321 23:25:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:25:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:25:13.420154  543705 net.go:648] Add success.
I0321 23:25:13.422698  543705 net.go:770] primary dev: ETH0
I0321 23:25:13.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:25:13.422727  543705 net.go:698] Add success.
I0321 23:25:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:25:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:25:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0321 23:25:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:25:14.456580  543705 disk_worker.go:494] system disk:vda1
I0321 23:25:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:25:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:25:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:25:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:25:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:25:23.410407  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:25:23.410422  543705 memory.go:184] no items to output this cycle
I0321 23:25:23.410448  543705 cpu.go:275] no items to output this cycle
I0321 23:25:31.901682  543705 disk_info.go:125] begin check local disk info of client
I0321 23:25:31.904323  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:25:31.904330  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046adc0 0xc00046ae00]
E0321 23:25:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:25:33.409775  543705 memory.go:184] no items to output this cycle
I0321 23:25:33.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:25:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:25:43.409819  543705 memory.go:191] Add success.
I0321 23:25:43.409833  543705 cpu.go:282] Add success.
I0321 23:25:43.419987  543705 net.go:648] Add success.
I0321 23:25:43.422459  543705 net.go:770] primary dev: ETH0
I0321 23:25:43.422474  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:25:43.422490  543705 net.go:698] Add success.
I0321 23:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:25:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:25:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:25:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:25:53.409779  543705 memory.go:184] no items to output this cycle
I0321 23:25:53.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:26:03.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:26:03.409895  543705 cpu.go:275] no items to output this cycle
I0321 23:26:03.409899  543705 memory.go:184] no items to output this cycle
E0321 23:26:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:26:13.409793  543705 memory.go:191] Add success.
I0321 23:26:13.409798  543705 cpu.go:282] Add success.
W0321 23:26:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:26:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:26:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:26:13.420079  543705 net.go:648] Add success.
I0321 23:26:13.422654  543705 net.go:770] primary dev: ETH0
I0321 23:26:13.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:26:13.422679  543705 net.go:698] Add success.
I0321 23:26:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:26:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:26:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0321 23:26:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:26:14.456513  543705 disk_worker.go:494] system disk:vda1
I0321 23:26:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:26:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:26:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:26:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:26:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:26:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:26:23.409801  543705 memory.go:184] no items to output this cycle
I0321 23:26:23.409812  543705 cpu.go:275] no items to output this cycle
I0321 23:26:31.905770  543705 disk_info.go:125] begin check local disk info of client
I0321 23:26:31.908396  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:26:31.908403  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b940 0xc00007b980]
E0321 23:26:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:26:33.409786  543705 memory.go:184] no items to output this cycle
I0321 23:26:33.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:26:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:26:43.409791  543705 memory.go:191] Add success.
I0321 23:26:43.409791  543705 cpu.go:282] Add success.
I0321 23:26:43.419908  543705 net.go:648] Add success.
I0321 23:26:43.422766  543705 net.go:770] primary dev: ETH0
I0321 23:26:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:26:43.422795  543705 net.go:698] Add success.
I0321 23:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:26:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:26:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:26:53.410266  543705 memory.go:184] no items to output this cycle
I0321 23:26:53.410305  543705 cpu.go:275] no items to output this cycle
E0321 23:27:03.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:27:03.409911  543705 memory.go:184] no items to output this cycle
I0321 23:27:03.409926  543705 cpu.go:275] no items to output this cycle
E0321 23:27:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:27:13.409781  543705 memory.go:191] Add success.
W0321 23:27:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 23:27:13.409816  543705 cpu.go:282] Add success.
W0321 23:27:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:27:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:27:13.420274  543705 net.go:648] Add success.
I0321 23:27:13.423034  543705 net.go:770] primary dev: ETH0
I0321 23:27:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:27:13.423058  543705 net.go:698] Add success.
I0321 23:27:13.429199  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 23:27:13.453379  543705 event_worker.go:152] Polling the log file for events...
I0321 23:27:13.463184  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9dd8f987-f675-4f9a-a94d-ca4f3cf8a6ed","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:27:13.463217  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 23:27:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:27:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 23:27:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:27:14.456828  543705 disk_worker.go:494] system disk:vda1
I0321 23:27:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:27:14.456872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:27:14.456879  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:27:14.456884  543705 custom_config.go:64] query custom config with name: gpu
E0321 23:27:15.456872  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:27:15.456881  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:27:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:27:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:27:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:27:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:27:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:27:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:27:23.409793  543705 memory.go:184] no items to output this cycle
I0321 23:27:23.409806  543705 cpu.go:275] no items to output this cycle
I0321 23:27:31.909697  543705 disk_info.go:125] begin check local disk info of client
I0321 23:27:31.912233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:27:31.912241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1540 0xc0003b1580]
E0321 23:27:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:27:33.409795  543705 memory.go:184] no items to output this cycle
I0321 23:27:33.409813  543705 cpu.go:275] no items to output this cycle
I0321 23:27:39.341734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:27:39.341741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:27:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:27:43.410533  543705 memory.go:191] Add success.
I0321 23:27:43.409795  543705 cpu.go:282] Add success.
I0321 23:27:43.420229  543705 net.go:648] Add success.
I0321 23:27:43.422586  543705 net.go:770] primary dev: ETH0
I0321 23:27:43.422599  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:27:43.422612  543705 net.go:698] Add success.
I0321 23:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:27:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:27:53.409778  543705 memory.go:184] no items to output this cycle
I0321 23:27:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 23:28:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:28:03.409774  543705 memory.go:184] no items to output this cycle
I0321 23:28:03.409798  543705 cpu.go:275] no items to output this cycle
E0321 23:28:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:28:13.409797  543705 memory.go:191] Add success.
I0321 23:28:13.409805  543705 cpu.go:282] Add success.
W0321 23:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:28:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:28:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:28:13.420169  543705 net.go:648] Add success.
I0321 23:28:13.422813  543705 net.go:770] primary dev: ETH0
I0321 23:28:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:28:13.422837  543705 net.go:698] Add success.
I0321 23:28:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:28:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:28:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 23:28:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:28:14.456481  543705 disk_worker.go:494] system disk:vda1
I0321 23:28:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:28:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:28:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:28:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:28:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:28:23.409785  543705 memory.go:184] no items to output this cycle
I0321 23:28:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 23:28:31.913693  543705 disk_info.go:125] begin check local disk info of client
I0321 23:28:31.916148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:28:31.916156  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8280 0xc0004d82c0]
E0321 23:28:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:28:33.409782  543705 memory.go:184] no items to output this cycle
I0321 23:28:33.409813  543705 cpu.go:275] no items to output this cycle
E0321 23:28:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:28:43.409803  543705 memory.go:191] Add success.
I0321 23:28:43.409833  543705 cpu.go:282] Add success.
I0321 23:28:43.419876  543705 net.go:648] Add success.
I0321 23:28:43.422775  543705 net.go:770] primary dev: ETH0
I0321 23:28:43.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:28:43.422812  543705 net.go:698] Add success.
I0321 23:28:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:28:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:28:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:28:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:28:53.409781  543705 memory.go:184] no items to output this cycle
I0321 23:28:53.409814  543705 cpu.go:275] no items to output this cycle
E0321 23:29:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:29:03.409814  543705 memory.go:184] no items to output this cycle
I0321 23:29:03.409824  543705 cpu.go:275] no items to output this cycle
E0321 23:29:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:29:13.409829  543705 memory.go:191] Add success.
I0321 23:29:13.409847  543705 cpu.go:282] Add success.
W0321 23:29:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:29:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:29:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:29:13.420269  543705 net.go:648] Add success.
I0321 23:29:13.423314  543705 net.go:770] primary dev: ETH0
I0321 23:29:13.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:29:13.423345  543705 net.go:698] Add success.
I0321 23:29:14.453953  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:29:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:29:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 23:29:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:29:14.456547  543705 disk_worker.go:494] system disk:vda1
I0321 23:29:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:29:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:29:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:29:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:29:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:29:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:29:23.409812  543705 memory.go:184] no items to output this cycle
I0321 23:29:23.409822  543705 cpu.go:275] no items to output this cycle
I0321 23:29:31.917686  543705 disk_info.go:125] begin check local disk info of client
I0321 23:29:31.920196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:29:31.920203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a280 0xc00047a2c0]
E0321 23:29:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:29:33.409816  543705 memory.go:184] no items to output this cycle
I0321 23:29:33.409826  543705 cpu.go:275] no items to output this cycle
E0321 23:29:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:29:43.409780  543705 memory.go:191] Add success.
I0321 23:29:43.409808  543705 cpu.go:282] Add success.
I0321 23:29:43.419956  543705 net.go:648] Add success.
I0321 23:29:43.422943  543705 net.go:770] primary dev: ETH0
I0321 23:29:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:29:43.422970  543705 net.go:698] Add success.
I0321 23:29:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:29:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:29:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:29:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:29:53.409780  543705 cpu.go:275] no items to output this cycle
I0321 23:29:53.409783  543705 memory.go:184] no items to output this cycle
E0321 23:30:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:30:03.409762  543705 memory.go:184] no items to output this cycle
I0321 23:30:03.409794  543705 cpu.go:275] no items to output this cycle
E0321 23:30:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:30:13.409775  543705 memory.go:191] Add success.
I0321 23:30:13.409798  543705 cpu.go:282] Add success.
W0321 23:30:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:30:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:30:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:30:13.420086  543705 net.go:648] Add success.
I0321 23:30:13.422573  543705 net.go:770] primary dev: ETH0
I0321 23:30:13.422586  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:30:13.422598  543705 net.go:698] Add success.
I0321 23:30:13.635085  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e1dbc125-86ef-484b-8710-d7a859416365","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:30:13.635117  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:30:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:30:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:30:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0321 23:30:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:30:14.456722  543705 disk_worker.go:494] system disk:vda1
I0321 23:30:14.456751  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:30:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:30:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:30:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:30:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:30:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:30:23.409766  543705 memory.go:184] no items to output this cycle
I0321 23:30:23.409804  543705 cpu.go:275] no items to output this cycle
I0321 23:30:31.921697  543705 disk_info.go:125] begin check local disk info of client
I0321 23:30:31.924116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:30:31.924124  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034fa80 0xc00034fac0]
E0321 23:30:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:30:33.409786  543705 memory.go:184] no items to output this cycle
I0321 23:30:33.409815  543705 cpu.go:275] no items to output this cycle
I0321 23:30:39.345737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:30:39.345744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:30:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:30:43.411054  543705 memory.go:191] Add success.
I0321 23:30:43.409829  543705 cpu.go:282] Add success.
I0321 23:30:43.419689  543705 net.go:648] Add success.
I0321 23:30:43.423250  543705 net.go:770] primary dev: ETH0
I0321 23:30:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:30:43.423277  543705 net.go:698] Add success.
I0321 23:30:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:30:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:30:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:30:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:30:53.410261  543705 memory.go:184] no items to output this cycle
I0321 23:30:53.410275  543705 cpu.go:275] no items to output this cycle
E0321 23:31:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:31:03.409781  543705 memory.go:184] no items to output this cycle
I0321 23:31:03.409810  543705 cpu.go:275] no items to output this cycle
E0321 23:31:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:31:13.409813  543705 memory.go:191] Add success.
I0321 23:31:13.409816  543705 cpu.go:282] Add success.
W0321 23:31:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:31:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:31:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:31:13.420139  543705 net.go:648] Add success.
I0321 23:31:13.422776  543705 net.go:770] primary dev: ETH0
I0321 23:31:13.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:31:13.422807  543705 net.go:698] Add success.
I0321 23:31:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:31:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:31:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0321 23:31:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:31:14.456566  543705 disk_worker.go:494] system disk:vda1
I0321 23:31:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:31:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:31:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:31:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:31:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:31:23.409775  543705 memory.go:184] no items to output this cycle
I0321 23:31:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 23:31:31.925683  543705 disk_info.go:125] begin check local disk info of client
I0321 23:31:31.928214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:31:31.928222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028f200 0xc00028f240]
E0321 23:31:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:31:33.409781  543705 memory.go:184] no items to output this cycle
I0321 23:31:33.409826  543705 cpu.go:275] no items to output this cycle
E0321 23:31:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:31:43.409816  543705 memory.go:191] Add success.
I0321 23:31:43.409818  543705 cpu.go:282] Add success.
I0321 23:31:43.419934  543705 net.go:648] Add success.
I0321 23:31:43.422433  543705 net.go:770] primary dev: ETH0
I0321 23:31:43.422446  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:31:43.422458  543705 net.go:698] Add success.
I0321 23:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:31:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:31:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:31:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:31:53.409774  543705 memory.go:184] no items to output this cycle
I0321 23:31:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 23:32:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:32:03.409784  543705 memory.go:184] no items to output this cycle
I0321 23:32:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 23:32:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:32:13.409785  543705 memory.go:191] Add success.
I0321 23:32:13.409802  543705 cpu.go:282] Add success.
W0321 23:32:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:32:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:32:13.420163  543705 net.go:648] Add success.
I0321 23:32:13.422682  543705 net.go:770] primary dev: ETH0
I0321 23:32:13.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:32:13.422711  543705 net.go:698] Add success.
W0321 23:32:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:32:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 23:32:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:32:14.456400  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:32:14.456409  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:32:14.456415  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:32:14.456459  543705 disk_worker.go:494] system disk:vda1
I0321 23:32:14.456488  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:32:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:32:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:32:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:32:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:32:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:32:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:32:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:32:23.409919  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:32:23.410013  543705 cpu.go:275] no items to output this cycle
I0321 23:32:23.410098  543705 memory.go:184] no items to output this cycle
I0321 23:32:31.929679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:32:31.932124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:32:31.932132  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e000 0xc00037e040]
E0321 23:32:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:32:33.409794  543705 memory.go:184] no items to output this cycle
I0321 23:32:33.409797  543705 cpu.go:275] no items to output this cycle
E0321 23:32:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:32:43.409779  543705 memory.go:191] Add success.
I0321 23:32:43.409810  543705 cpu.go:282] Add success.
I0321 23:32:43.419955  543705 net.go:648] Add success.
I0321 23:32:43.422455  543705 net.go:770] primary dev: ETH0
I0321 23:32:43.422469  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:32:43.422481  543705 net.go:698] Add success.
I0321 23:32:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:32:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:32:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:32:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:32:53.409767  543705 memory.go:184] no items to output this cycle
I0321 23:32:53.409776  543705 cpu.go:275] no items to output this cycle
E0321 23:33:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:33:03.409770  543705 memory.go:184] no items to output this cycle
I0321 23:33:03.409812  543705 cpu.go:275] no items to output this cycle
E0321 23:33:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:33:13.409812  543705 memory.go:191] Add success.
I0321 23:33:13.409817  543705 cpu.go:282] Add success.
W0321 23:33:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:33:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:33:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:33:13.420058  543705 net.go:648] Add success.
I0321 23:33:13.422690  543705 net.go:770] primary dev: ETH0
I0321 23:33:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:33:13.422715  543705 net.go:698] Add success.
I0321 23:33:13.468281  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b6f379b3-3066-4b4c-b2e1-e1417b68e60d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:33:13.468316  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:33:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:33:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:33:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0321 23:33:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:33:14.456595  543705 disk_worker.go:494] system disk:vda1
I0321 23:33:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:33:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:33:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:33:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:33:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:33:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:33:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:33:23.409775  543705 memory.go:184] no items to output this cycle
I0321 23:33:23.409778  543705 cpu.go:275] no items to output this cycle
I0321 23:33:31.933700  543705 disk_info.go:125] begin check local disk info of client
I0321 23:33:31.936129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:33:31.936138  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304000 0xc000304040]
E0321 23:33:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:33:33.409782  543705 memory.go:184] no items to output this cycle
I0321 23:33:33.409825  543705 cpu.go:275] no items to output this cycle
I0321 23:33:39.349742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:33:39.349748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:33:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:33:43.410698  543705 memory.go:191] Add success.
I0321 23:33:43.409810  543705 cpu.go:282] Add success.
I0321 23:33:43.420385  543705 net.go:648] Add success.
I0321 23:33:43.423097  543705 net.go:770] primary dev: ETH0
I0321 23:33:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:33:43.423127  543705 net.go:698] Add success.
I0321 23:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:33:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:33:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:33:53.409784  543705 memory.go:184] no items to output this cycle
I0321 23:33:53.409788  543705 cpu.go:275] no items to output this cycle
E0321 23:34:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:34:03.409769  543705 memory.go:184] no items to output this cycle
I0321 23:34:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 23:34:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:34:13.409810  543705 memory.go:191] Add success.
I0321 23:34:13.409813  543705 cpu.go:282] Add success.
W0321 23:34:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:34:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:34:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:34:13.420061  543705 net.go:648] Add success.
I0321 23:34:13.422866  543705 net.go:770] primary dev: ETH0
I0321 23:34:13.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:34:13.422894  543705 net.go:698] Add success.
I0321 23:34:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:34:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:34:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0321 23:34:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:34:14.456788  543705 disk_worker.go:494] system disk:vda1
I0321 23:34:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:34:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:34:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:34:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:34:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:34:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:34:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:34:23.409788  543705 cpu.go:275] no items to output this cycle
I0321 23:34:23.409796  543705 memory.go:184] no items to output this cycle
I0321 23:34:31.937687  543705 disk_info.go:125] begin check local disk info of client
I0321 23:34:31.940143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:34:31.940152  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052b800 0xc00052b840]
E0321 23:34:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:34:33.409782  543705 memory.go:184] no items to output this cycle
I0321 23:34:33.409801  543705 cpu.go:275] no items to output this cycle
E0321 23:34:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:34:43.409815  543705 memory.go:191] Add success.
I0321 23:34:43.409819  543705 cpu.go:282] Add success.
I0321 23:34:43.420086  543705 net.go:648] Add success.
I0321 23:34:43.422697  543705 net.go:770] primary dev: ETH0
I0321 23:34:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:34:43.422722  543705 net.go:698] Add success.
I0321 23:34:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:34:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:34:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:34:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:34:53.409784  543705 memory.go:184] no items to output this cycle
I0321 23:34:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 23:35:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:35:03.409777  543705 memory.go:184] no items to output this cycle
I0321 23:35:03.409781  543705 cpu.go:275] no items to output this cycle
E0321 23:35:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:35:13.409785  543705 cpu.go:282] Add success.
I0321 23:35:13.409791  543705 memory.go:191] Add success.
W0321 23:35:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:35:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:35:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:35:13.420459  543705 net.go:648] Add success.
I0321 23:35:13.423466  543705 net.go:770] primary dev: ETH0
I0321 23:35:13.423479  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:35:13.423494  543705 net.go:698] Add success.
I0321 23:35:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:35:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:35:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0321 23:35:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:35:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 23:35:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:35:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:35:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:35:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:35:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:35:23.409778  543705 memory.go:184] no items to output this cycle
I0321 23:35:23.409784  543705 cpu.go:275] no items to output this cycle
I0321 23:35:31.941686  543705 disk_info.go:125] begin check local disk info of client
I0321 23:35:31.944242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:35:31.944249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2580 0xc0004a25c0]
E0321 23:35:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:35:33.409792  543705 memory.go:184] no items to output this cycle
I0321 23:35:33.409793  543705 cpu.go:275] no items to output this cycle
E0321 23:35:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:35:43.409791  543705 memory.go:191] Add success.
I0321 23:35:43.409794  543705 cpu.go:282] Add success.
I0321 23:35:43.419864  543705 net.go:648] Add success.
I0321 23:35:43.422378  543705 net.go:770] primary dev: ETH0
I0321 23:35:43.422391  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:35:43.422405  543705 net.go:698] Add success.
I0321 23:35:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:35:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:35:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:35:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:35:53.409796  543705 memory.go:184] no items to output this cycle
I0321 23:35:53.409808  543705 cpu.go:275] no items to output this cycle
E0321 23:36:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:36:03.409797  543705 memory.go:184] no items to output this cycle
I0321 23:36:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 23:36:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:36:13.409774  543705 memory.go:191] Add success.
I0321 23:36:13.409794  543705 cpu.go:282] Add success.
W0321 23:36:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:36:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:36:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:36:13.420049  543705 net.go:648] Add success.
I0321 23:36:13.422689  543705 net.go:770] primary dev: ETH0
I0321 23:36:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:36:13.422718  543705 net.go:698] Add success.
I0321 23:36:13.600147  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5914cb39-6850-4ef7-af04-454c32692c3d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:36:13.600178  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:36:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:36:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:36:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 23:36:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:36:14.456508  543705 disk_worker.go:494] system disk:vda1
I0321 23:36:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:36:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:36:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:36:16.472390  543705 disk_local_worker.go:436] Get disk info: []
I0321 23:36:23.409796  543705 cpu.go:275] no items to output this cycle
E0321 23:36:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:36:23.409812  543705 memory.go:184] no items to output this cycle
I0321 23:36:31.945680  543705 disk_info.go:125] begin check local disk info of client
I0321 23:36:31.948254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:36:31.948261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a000 0xc00047a040]
E0321 23:36:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:36:33.409788  543705 memory.go:184] no items to output this cycle
I0321 23:36:33.409806  543705 cpu.go:275] no items to output this cycle
I0321 23:36:39.353740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:36:39.353747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:36:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:36:43.410623  543705 memory.go:191] Add success.
I0321 23:36:43.409811  543705 cpu.go:282] Add success.
I0321 23:36:43.420371  543705 net.go:648] Add success.
I0321 23:36:43.423074  543705 net.go:770] primary dev: ETH0
I0321 23:36:43.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:36:43.423098  543705 net.go:698] Add success.
I0321 23:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:36:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:36:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:36:53.409764  543705 memory.go:184] no items to output this cycle
I0321 23:36:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 23:37:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:37:03.409771  543705 memory.go:184] no items to output this cycle
I0321 23:37:03.409795  543705 cpu.go:275] no items to output this cycle
W0321 23:37:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:37:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:37:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:37:13.409797  543705 cpu.go:282] Add success.
E0321 23:37:13.409824  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:37:13.409847  543705 memory.go:191] Add success.
I0321 23:37:13.419823  543705 net.go:770] primary dev: ETH0
I0321 23:37:13.419835  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:37:13.419847  543705 net.go:698] Add success.
I0321 23:37:13.420073  543705 net.go:648] Add success.
I0321 23:37:13.453581  543705 event_worker.go:152] Polling the log file for events...
W0321 23:37:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:37:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 23:37:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:37:14.456930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:37:14.456938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:37:14.456944  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:37:14.456998  543705 disk_worker.go:494] system disk:vda1
I0321 23:37:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:37:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:37:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:37:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:37:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:37:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:37:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:37:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:37:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:37:23.409801  543705 cpu.go:275] no items to output this cycle
I0321 23:37:23.409806  543705 memory.go:184] no items to output this cycle
I0321 23:37:31.949670  543705 disk_info.go:125] begin check local disk info of client
I0321 23:37:31.952236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:37:31.952242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396ec0 0xc000396f00]
E0321 23:37:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:37:33.409919  543705 cpu.go:275] no items to output this cycle
I0321 23:37:33.409973  543705 memory.go:184] no items to output this cycle
E0321 23:37:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:37:43.409781  543705 memory.go:191] Add success.
I0321 23:37:43.409816  543705 cpu.go:282] Add success.
I0321 23:37:43.420022  543705 net.go:648] Add success.
I0321 23:37:43.423140  543705 net.go:770] primary dev: ETH0
I0321 23:37:43.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:37:43.423169  543705 net.go:698] Add success.
I0321 23:37:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:37:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:37:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:37:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:37:53.409770  543705 memory.go:184] no items to output this cycle
I0321 23:37:53.409782  543705 cpu.go:275] no items to output this cycle
E0321 23:38:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:38:03.409773  543705 memory.go:184] no items to output this cycle
I0321 23:38:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:38:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:38:13.409822  543705 memory.go:191] Add success.
I0321 23:38:13.409826  543705 cpu.go:282] Add success.
W0321 23:38:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:38:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:38:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:38:13.420087  543705 net.go:648] Add success.
I0321 23:38:13.422831  543705 net.go:770] primary dev: ETH0
I0321 23:38:13.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:38:13.422856  543705 net.go:698] Add success.
I0321 23:38:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:38:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:38:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0321 23:38:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:38:14.456618  543705 disk_worker.go:494] system disk:vda1
I0321 23:38:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:38:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:38:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:38:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:38:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:38:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:38:23.409812  543705 memory.go:184] no items to output this cycle
I0321 23:38:23.409826  543705 cpu.go:275] no items to output this cycle
I0321 23:38:31.953696  543705 disk_info.go:125] begin check local disk info of client
I0321 23:38:31.956205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:38:31.956212  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e100 0xc00047e140]
E0321 23:38:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:38:33.409788  543705 memory.go:184] no items to output this cycle
I0321 23:38:33.409809  543705 cpu.go:275] no items to output this cycle
E0321 23:38:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:38:43.409793  543705 memory.go:191] Add success.
I0321 23:38:43.409794  543705 cpu.go:282] Add success.
I0321 23:38:43.419958  543705 net.go:648] Add success.
I0321 23:38:43.422714  543705 net.go:770] primary dev: ETH0
I0321 23:38:43.422728  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:38:43.422739  543705 net.go:698] Add success.
I0321 23:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:38:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:38:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:38:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:38:53.409781  543705 memory.go:184] no items to output this cycle
I0321 23:38:53.409783  543705 cpu.go:275] no items to output this cycle
E0321 23:39:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:39:03.409773  543705 memory.go:184] no items to output this cycle
I0321 23:39:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 23:39:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:39:13.409776  543705 memory.go:191] Add success.
W0321 23:39:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 23:39:13.409810  543705 cpu.go:282] Add success.
W0321 23:39:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:39:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:39:13.420074  543705 net.go:648] Add success.
I0321 23:39:13.422548  543705 net.go:770] primary dev: ETH0
I0321 23:39:13.422563  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:39:13.422577  543705 net.go:698] Add success.
I0321 23:39:13.478657  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31ce8a28-e7cb-41c0-bb10-cab8891cd126","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:39:13.478696  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:39:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:39:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:39:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0321 23:39:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:39:14.456538  543705 disk_worker.go:494] system disk:vda1
I0321 23:39:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:39:15.455608  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:39:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:39:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:39:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:39:23.409780  543705 cpu.go:275] no items to output this cycle
I0321 23:39:23.409916  543705 memory.go:184] no items to output this cycle
I0321 23:39:31.957686  543705 disk_info.go:125] begin check local disk info of client
I0321 23:39:31.960284  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:39:31.960291  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4000 0xc0002a4040]
E0321 23:39:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:39:33.409789  543705 memory.go:184] no items to output this cycle
I0321 23:39:33.409800  543705 cpu.go:275] no items to output this cycle
I0321 23:39:39.357741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:39:39.357748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:39:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:39:43.410611  543705 memory.go:191] Add success.
I0321 23:39:43.409807  543705 cpu.go:282] Add success.
I0321 23:39:43.420394  543705 net.go:648] Add success.
I0321 23:39:43.423353  543705 net.go:770] primary dev: ETH0
I0321 23:39:43.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:39:43.423386  543705 net.go:698] Add success.
I0321 23:39:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:39:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:39:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:39:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:39:53.409770  543705 memory.go:184] no items to output this cycle
I0321 23:39:53.409798  543705 cpu.go:275] no items to output this cycle
E0321 23:40:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:40:03.409766  543705 memory.go:184] no items to output this cycle
I0321 23:40:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 23:40:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:40:13.409807  543705 memory.go:191] Add success.
I0321 23:40:13.409813  543705 cpu.go:282] Add success.
W0321 23:40:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:40:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:40:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:40:13.420229  543705 net.go:648] Add success.
I0321 23:40:13.422898  543705 net.go:770] primary dev: ETH0
I0321 23:40:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:40:13.422924  543705 net.go:698] Add success.
I0321 23:40:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:40:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:40:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0321 23:40:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:40:14.456581  543705 disk_worker.go:494] system disk:vda1
I0321 23:40:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:40:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:40:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:40:23.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:40:23.409886  543705 cpu.go:275] no items to output this cycle
I0321 23:40:23.409895  543705 memory.go:184] no items to output this cycle
I0321 23:40:31.961681  543705 disk_info.go:125] begin check local disk info of client
I0321 23:40:31.964274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:40:31.964281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 23:40:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:40:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 23:40:33.409806  543705 memory.go:184] no items to output this cycle
E0321 23:40:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:40:43.409810  543705 memory.go:191] Add success.
I0321 23:40:43.409815  543705 cpu.go:282] Add success.
I0321 23:40:43.419961  543705 net.go:648] Add success.
I0321 23:40:43.422593  543705 net.go:770] primary dev: ETH0
I0321 23:40:43.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:40:43.422622  543705 net.go:698] Add success.
I0321 23:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:40:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:40:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:40:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:40:53.409780  543705 memory.go:184] no items to output this cycle
I0321 23:40:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:41:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:41:03.409795  543705 memory.go:184] no items to output this cycle
I0321 23:41:03.409806  543705 cpu.go:275] no items to output this cycle
E0321 23:41:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:41:13.409786  543705 memory.go:191] Add success.
I0321 23:41:13.409786  543705 cpu.go:282] Add success.
W0321 23:41:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:41:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:41:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:41:13.420297  543705 net.go:648] Add success.
I0321 23:41:13.423247  543705 net.go:770] primary dev: ETH0
I0321 23:41:13.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:41:13.423273  543705 net.go:698] Add success.
I0321 23:41:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:41:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:41:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0321 23:41:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:41:14.456823  543705 disk_worker.go:494] system disk:vda1
I0321 23:41:14.456854  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:41:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:41:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:41:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:41:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:41:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:41:23.409781  543705 memory.go:184] no items to output this cycle
I0321 23:41:23.409796  543705 cpu.go:275] no items to output this cycle
I0321 23:41:31.965680  543705 disk_info.go:125] begin check local disk info of client
I0321 23:41:31.968251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:41:31.968258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000520840 0xc000520880]
E0321 23:41:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:41:33.409800  543705 memory.go:184] no items to output this cycle
I0321 23:41:33.409800  543705 cpu.go:275] no items to output this cycle
E0321 23:41:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:41:43.409810  543705 memory.go:191] Add success.
I0321 23:41:43.409825  543705 cpu.go:282] Add success.
I0321 23:41:43.419912  543705 net.go:648] Add success.
I0321 23:41:43.422712  543705 net.go:770] primary dev: ETH0
I0321 23:41:43.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:41:43.422737  543705 net.go:698] Add success.
I0321 23:41:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:41:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:41:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:41:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:41:53.409778  543705 memory.go:184] no items to output this cycle
I0321 23:41:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 23:42:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:42:03.409768  543705 memory.go:184] no items to output this cycle
I0321 23:42:03.409809  543705 cpu.go:275] no items to output this cycle
E0321 23:42:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:42:13.409776  543705 memory.go:191] Add success.
W0321 23:42:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 23:42:13.409801  543705 cpu.go:282] Add success.
W0321 23:42:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:42:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:42:13.420069  543705 net.go:648] Add success.
I0321 23:42:13.422799  543705 net.go:770] primary dev: ETH0
I0321 23:42:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:42:13.422833  543705 net.go:698] Add success.
I0321 23:42:13.468823  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"02a2f519-6cd6-4ec4-8e42-600ac4e471dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:42:13.468859  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 23:42:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:42:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 23:42:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:42:14.455898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:42:14.455907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:42:14.455913  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:42:14.456594  543705 disk_worker.go:494] system disk:vda1
I0321 23:42:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:42:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:42:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:42:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:42:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:42:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:42:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:42:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:42:23.409832  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:42:23.409852  543705 memory.go:184] no items to output this cycle
I0321 23:42:23.409912  543705 cpu.go:275] no items to output this cycle
I0321 23:42:31.969678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:42:31.972271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:42:31.972278  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e000 0xc00028e040]
E0321 23:42:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:42:33.409797  543705 memory.go:184] no items to output this cycle
I0321 23:42:33.409799  543705 cpu.go:275] no items to output this cycle
I0321 23:42:39.361736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:42:39.361744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:42:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:42:43.410590  543705 memory.go:191] Add success.
I0321 23:42:43.409810  543705 cpu.go:282] Add success.
I0321 23:42:43.420261  543705 net.go:648] Add success.
I0321 23:42:43.422894  543705 net.go:770] primary dev: ETH0
I0321 23:42:43.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:42:43.422920  543705 net.go:698] Add success.
I0321 23:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:42:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:42:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:42:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:42:53.409775  543705 memory.go:184] no items to output this cycle
I0321 23:42:53.409776  543705 cpu.go:275] no items to output this cycle
E0321 23:43:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:43:03.409762  543705 memory.go:184] no items to output this cycle
I0321 23:43:03.409790  543705 cpu.go:275] no items to output this cycle
E0321 23:43:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:43:13.409795  543705 memory.go:191] Add success.
I0321 23:43:13.409798  543705 cpu.go:282] Add success.
W0321 23:43:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:43:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:43:13.420045  543705 net.go:648] Add success.
I0321 23:43:13.422664  543705 net.go:770] primary dev: ETH0
I0321 23:43:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:43:13.422688  543705 net.go:698] Add success.
I0321 23:43:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:43:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:43:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0321 23:43:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:43:14.456606  543705 disk_worker.go:494] system disk:vda1
I0321 23:43:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:43:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:43:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:43:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:43:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:43:23.409787  543705 memory.go:184] no items to output this cycle
I0321 23:43:23.409790  543705 cpu.go:275] no items to output this cycle
I0321 23:43:31.973678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:43:31.976262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:43:31.976269  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b4440 0xc0002b4480]
E0321 23:43:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:43:33.409795  543705 memory.go:184] no items to output this cycle
I0321 23:43:33.409798  543705 cpu.go:275] no items to output this cycle
E0321 23:43:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:43:43.409794  543705 memory.go:191] Add success.
I0321 23:43:43.409798  543705 cpu.go:282] Add success.
I0321 23:43:43.419859  543705 net.go:648] Add success.
I0321 23:43:43.422766  543705 net.go:770] primary dev: ETH0
I0321 23:43:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:43:43.422792  543705 net.go:698] Add success.
I0321 23:43:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:43:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:43:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:43:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:43:53.409805  543705 memory.go:184] no items to output this cycle
I0321 23:43:53.409819  543705 cpu.go:275] no items to output this cycle
E0321 23:44:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:44:03.409803  543705 memory.go:184] no items to output this cycle
I0321 23:44:03.409811  543705 cpu.go:275] no items to output this cycle
E0321 23:44:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:44:13.409782  543705 memory.go:191] Add success.
W0321 23:44:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0321 23:44:13.409812  543705 cpu.go:282] Add success.
W0321 23:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:44:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:44:13.420066  543705 net.go:648] Add success.
I0321 23:44:13.422599  543705 net.go:770] primary dev: ETH0
I0321 23:44:13.422612  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:44:13.422623  543705 net.go:698] Add success.
I0321 23:44:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:44:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:44:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0321 23:44:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:44:14.456574  543705 disk_worker.go:494] system disk:vda1
I0321 23:44:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:44:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:44:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:44:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:44:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:44:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:44:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:44:23.409780  543705 memory.go:184] no items to output this cycle
I0321 23:44:23.409799  543705 cpu.go:275] no items to output this cycle
I0321 23:44:31.977678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:44:31.980206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:44:31.980213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005523c0 0xc000552400]
E0321 23:44:33.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:44:33.409829  543705 memory.go:184] no items to output this cycle
I0321 23:44:33.409838  543705 cpu.go:275] no items to output this cycle
E0321 23:44:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:44:43.409792  543705 memory.go:191] Add success.
I0321 23:44:43.409815  543705 cpu.go:282] Add success.
I0321 23:44:43.419890  543705 net.go:648] Add success.
I0321 23:44:43.422725  543705 net.go:770] primary dev: ETH0
I0321 23:44:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:44:43.422750  543705 net.go:698] Add success.
I0321 23:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:44:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:44:53.409778  543705 memory.go:184] no items to output this cycle
I0321 23:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0321 23:45:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:45:03.409805  543705 memory.go:184] no items to output this cycle
I0321 23:45:03.409818  543705 cpu.go:275] no items to output this cycle
E0321 23:45:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:45:13.409793  543705 memory.go:191] Add success.
I0321 23:45:13.409810  543705 cpu.go:282] Add success.
W0321 23:45:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:45:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:45:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:45:13.420101  543705 net.go:648] Add success.
I0321 23:45:13.422630  543705 net.go:770] primary dev: ETH0
I0321 23:45:13.422643  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:45:13.422656  543705 net.go:698] Add success.
I0321 23:45:13.470691  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"52ac195b-f526-4f57-b501-d59fda410f44","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:45:13.470725  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:45:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:45:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:45:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0321 23:45:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:45:14.456589  543705 disk_worker.go:494] system disk:vda1
I0321 23:45:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:45:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:45:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:45:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:45:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:45:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:45:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:45:23.409813  543705 memory.go:184] no items to output this cycle
I0321 23:45:23.409824  543705 cpu.go:275] no items to output this cycle
I0321 23:45:31.981678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:45:31.984290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:45:31.984297  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033c580 0xc00033c5c0]
E0321 23:45:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:45:33.409801  543705 memory.go:184] no items to output this cycle
I0321 23:45:33.409802  543705 cpu.go:275] no items to output this cycle
I0321 23:45:39.365732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:45:39.365739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:45:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:45:43.410621  543705 memory.go:191] Add success.
I0321 23:45:43.409803  543705 cpu.go:282] Add success.
I0321 23:45:43.420326  543705 net.go:648] Add success.
I0321 23:45:43.422931  543705 net.go:770] primary dev: ETH0
I0321 23:45:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:45:43.422957  543705 net.go:698] Add success.
I0321 23:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:45:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:45:53.409776  543705 memory.go:184] no items to output this cycle
I0321 23:45:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:46:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:46:03.409792  543705 memory.go:184] no items to output this cycle
I0321 23:46:03.409805  543705 cpu.go:275] no items to output this cycle
E0321 23:46:13.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:46:13.409771  543705 memory.go:191] Add success.
W0321 23:46:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:46:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:46:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:46:13.409815  543705 cpu.go:282] Add success.
I0321 23:46:13.420073  543705 net.go:648] Add success.
I0321 23:46:13.422680  543705 net.go:770] primary dev: ETH0
I0321 23:46:13.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:46:13.422707  543705 net.go:698] Add success.
I0321 23:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:46:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:46:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0321 23:46:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:46:14.456495  543705 disk_worker.go:494] system disk:vda1
I0321 23:46:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:46:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:46:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:46:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:46:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:46:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:46:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:46:23.409798  543705 memory.go:184] no items to output this cycle
I0321 23:46:23.409808  543705 cpu.go:275] no items to output this cycle
I0321 23:46:31.985678  543705 disk_info.go:125] begin check local disk info of client
I0321 23:46:31.988238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:46:31.988245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000246080 0xc0002460c0]
E0321 23:46:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:46:33.409795  543705 memory.go:184] no items to output this cycle
I0321 23:46:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 23:46:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:46:43.409796  543705 memory.go:191] Add success.
I0321 23:46:43.409800  543705 cpu.go:282] Add success.
I0321 23:46:43.419870  543705 net.go:648] Add success.
I0321 23:46:43.422452  543705 net.go:770] primary dev: ETH0
I0321 23:46:43.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:46:43.422477  543705 net.go:698] Add success.
I0321 23:46:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:46:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:46:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:46:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:46:53.409763  543705 memory.go:184] no items to output this cycle
I0321 23:46:53.409787  543705 cpu.go:275] no items to output this cycle
E0321 23:47:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:47:03.409772  543705 memory.go:184] no items to output this cycle
I0321 23:47:03.409799  543705 cpu.go:275] no items to output this cycle
E0321 23:47:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:47:13.409796  543705 memory.go:191] Add success.
I0321 23:47:13.409797  543705 cpu.go:282] Add success.
W0321 23:47:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:47:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:47:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:47:13.420247  543705 net.go:648] Add success.
I0321 23:47:13.422928  543705 net.go:770] primary dev: ETH0
I0321 23:47:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:47:13.422954  543705 net.go:698] Add success.
I0321 23:47:13.453523  543705 event_worker.go:152] Polling the log file for events...
W0321 23:47:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:47:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0321 23:47:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:47:14.456790  543705 disk_worker.go:494] system disk:vda1
I0321 23:47:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:47:14.457168  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:47:14.457176  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:47:14.457181  543705 custom_config.go:64] query custom config with name: gpu
E0321 23:47:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:47:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
E0321 23:47:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:47:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:47:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:47:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:47:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:47:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:47:23.409773  543705 memory.go:184] no items to output this cycle
I0321 23:47:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 23:47:31.989679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:47:31.992228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:47:31.992235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004660c0 0xc000466100]
E0321 23:47:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:47:33.409825  543705 memory.go:184] no items to output this cycle
I0321 23:47:33.409836  543705 cpu.go:275] no items to output this cycle
E0321 23:47:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:47:43.409783  543705 memory.go:191] Add success.
I0321 23:47:43.409813  543705 cpu.go:282] Add success.
I0321 23:47:43.419978  543705 net.go:648] Add success.
I0321 23:47:43.422700  543705 net.go:770] primary dev: ETH0
I0321 23:47:43.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:47:43.422725  543705 net.go:698] Add success.
I0321 23:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:47:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:47:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:47:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:47:53.409776  543705 memory.go:184] no items to output this cycle
I0321 23:47:53.409779  543705 cpu.go:275] no items to output this cycle
E0321 23:48:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:48:03.409766  543705 memory.go:184] no items to output this cycle
I0321 23:48:03.409791  543705 cpu.go:275] no items to output this cycle
E0321 23:48:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:48:13.409823  543705 memory.go:191] Add success.
I0321 23:48:13.409826  543705 cpu.go:282] Add success.
W0321 23:48:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:48:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:48:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:48:13.420166  543705 net.go:648] Add success.
I0321 23:48:13.422992  543705 net.go:770] primary dev: ETH0
I0321 23:48:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:48:13.423033  543705 net.go:698] Add success.
I0321 23:48:13.463797  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c20e1828-e7c1-4c2d-aec4-ba67222ac30d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:48:13.463832  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:48:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:48:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:48:14.455332  543705 disk_worker.go:708] disk space is not compliant
W0321 23:48:14.455338  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:48:14.457500  543705 disk_worker.go:494] system disk:vda1
I0321 23:48:14.457529  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:48:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:48:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:48:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:48:23.409771  543705 memory.go:184] no items to output this cycle
I0321 23:48:23.409791  543705 cpu.go:275] no items to output this cycle
I0321 23:48:31.993686  543705 disk_info.go:125] begin check local disk info of client
I0321 23:48:31.996242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:48:31.996249  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047e200 0xc00047e240]
E0321 23:48:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:48:33.409795  543705 memory.go:184] no items to output this cycle
I0321 23:48:33.409812  543705 cpu.go:275] no items to output this cycle
I0321 23:48:39.369741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:48:39.369748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:48:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:48:43.410614  543705 memory.go:191] Add success.
I0321 23:48:43.409814  543705 cpu.go:282] Add success.
I0321 23:48:43.420278  543705 net.go:648] Add success.
I0321 23:48:43.423043  543705 net.go:770] primary dev: ETH0
I0321 23:48:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:48:43.423074  543705 net.go:698] Add success.
I0321 23:48:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:48:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:48:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:48:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:48:53.409785  543705 memory.go:184] no items to output this cycle
I0321 23:48:53.409793  543705 cpu.go:275] no items to output this cycle
E0321 23:49:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:49:03.409810  543705 memory.go:184] no items to output this cycle
I0321 23:49:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 23:49:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:49:13.409785  543705 memory.go:191] Add success.
I0321 23:49:13.409806  543705 cpu.go:282] Add success.
W0321 23:49:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:49:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:49:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:49:13.420113  543705 net.go:648] Add success.
I0321 23:49:13.422785  543705 net.go:770] primary dev: ETH0
I0321 23:49:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:49:13.422810  543705 net.go:698] Add success.
I0321 23:49:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:49:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:49:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0321 23:49:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:49:14.458914  543705 disk_worker.go:494] system disk:vda1
I0321 23:49:14.458943  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:49:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:49:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:49:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:49:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:49:23.409779  543705 memory.go:184] no items to output this cycle
I0321 23:49:23.409793  543705 cpu.go:275] no items to output this cycle
I0321 23:49:31.997681  543705 disk_info.go:125] begin check local disk info of client
I0321 23:49:32.000197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:49:32.000203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
E0321 23:49:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:49:33.409811  543705 memory.go:184] no items to output this cycle
I0321 23:49:33.409822  543705 cpu.go:275] no items to output this cycle
E0321 23:49:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:49:43.409778  543705 memory.go:191] Add success.
I0321 23:49:43.409800  543705 cpu.go:282] Add success.
I0321 23:49:43.419894  543705 net.go:648] Add success.
I0321 23:49:43.422737  543705 net.go:770] primary dev: ETH0
I0321 23:49:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:49:43.422763  543705 net.go:698] Add success.
I0321 23:49:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:49:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:49:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:49:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:49:53.409769  543705 memory.go:184] no items to output this cycle
I0321 23:49:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 23:50:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:50:03.409770  543705 memory.go:184] no items to output this cycle
I0321 23:50:03.409797  543705 cpu.go:275] no items to output this cycle
E0321 23:50:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:50:13.409791  543705 memory.go:191] Add success.
I0321 23:50:13.409811  543705 cpu.go:282] Add success.
W0321 23:50:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:50:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:50:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:50:13.420285  543705 net.go:648] Add success.
I0321 23:50:13.423019  543705 net.go:770] primary dev: ETH0
I0321 23:50:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:50:13.423044  543705 net.go:698] Add success.
I0321 23:50:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:50:14.455249  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:50:14.455386  543705 disk_worker.go:708] disk space is not compliant
W0321 23:50:14.455393  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:50:14.456906  543705 disk_worker.go:494] system disk:vda1
I0321 23:50:14.456949  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:50:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:50:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:50:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:50:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:50:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:50:23.409783  543705 memory.go:184] no items to output this cycle
I0321 23:50:23.409802  543705 cpu.go:275] no items to output this cycle
I0321 23:50:32.001674  543705 disk_info.go:125] begin check local disk info of client
I0321 23:50:32.004180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:50:32.004187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466d40 0xc000466d80]
E0321 23:50:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:50:33.409814  543705 memory.go:184] no items to output this cycle
I0321 23:50:33.409815  543705 cpu.go:275] no items to output this cycle
E0321 23:50:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:50:43.409787  543705 memory.go:191] Add success.
I0321 23:50:43.409813  543705 cpu.go:282] Add success.
I0321 23:50:43.420027  543705 net.go:648] Add success.
I0321 23:50:43.422804  543705 net.go:770] primary dev: ETH0
I0321 23:50:43.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:50:43.422830  543705 net.go:698] Add success.
I0321 23:50:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:50:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:50:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:50:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:50:53.409784  543705 memory.go:184] no items to output this cycle
I0321 23:50:53.409789  543705 cpu.go:275] no items to output this cycle
E0321 23:51:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:51:03.409793  543705 memory.go:184] no items to output this cycle
I0321 23:51:03.409815  543705 cpu.go:275] no items to output this cycle
E0321 23:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:51:13.409800  543705 memory.go:191] Add success.
I0321 23:51:13.409809  543705 cpu.go:282] Add success.
W0321 23:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:51:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:51:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:51:13.420178  543705 net.go:648] Add success.
I0321 23:51:13.422903  543705 net.go:770] primary dev: ETH0
I0321 23:51:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:51:13.422949  543705 net.go:698] Add success.
I0321 23:51:13.471312  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1ca10582-fdc6-47fe-8d9e-98f7cd40114b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:51:13.471352  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:51:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:51:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:51:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0321 23:51:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:51:14.456521  543705 disk_worker.go:494] system disk:vda1
I0321 23:51:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:51:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:51:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:51:23.409803  543705 memory.go:184] no items to output this cycle
I0321 23:51:23.409813  543705 cpu.go:275] no items to output this cycle
I0321 23:51:32.005675  543705 disk_info.go:125] begin check local disk info of client
I0321 23:51:32.008287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:51:32.008294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5540 0xc0000c5580]
E0321 23:51:33.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:51:33.409865  543705 memory.go:184] no items to output this cycle
I0321 23:51:33.409875  543705 cpu.go:275] no items to output this cycle
I0321 23:51:39.373734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:51:39.373741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:51:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:51:43.410570  543705 memory.go:191] Add success.
I0321 23:51:43.409787  543705 cpu.go:282] Add success.
I0321 23:51:43.420289  543705 net.go:648] Add success.
I0321 23:51:43.422829  543705 net.go:770] primary dev: ETH0
I0321 23:51:43.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:51:43.422857  543705 net.go:698] Add success.
I0321 23:51:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:51:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:51:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:51:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:51:53.409774  543705 memory.go:184] no items to output this cycle
I0321 23:51:53.409791  543705 cpu.go:275] no items to output this cycle
E0321 23:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:52:03.409780  543705 memory.go:184] no items to output this cycle
I0321 23:52:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 23:52:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:52:13.409806  543705 memory.go:191] Add success.
I0321 23:52:13.409813  543705 cpu.go:282] Add success.
W0321 23:52:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:52:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:52:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:52:13.419714  543705 net.go:648] Add success.
I0321 23:52:13.422930  543705 net.go:770] primary dev: ETH0
I0321 23:52:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:52:13.422977  543705 net.go:698] Add success.
W0321 23:52:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:52:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0321 23:52:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:52:14.457078  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:52:14.457086  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:52:14.457090  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:52:14.457108  543705 disk_worker.go:494] system disk:vda1
I0321 23:52:14.457150  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:52:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:52:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:52:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:52:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:52:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:52:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:52:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:52:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:52:23.409779  543705 memory.go:184] no items to output this cycle
I0321 23:52:23.409782  543705 cpu.go:275] no items to output this cycle
I0321 23:52:32.009677  543705 disk_info.go:125] begin check local disk info of client
I0321 23:52:32.012195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:52:32.012201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484180 0xc0004841c0]
E0321 23:52:33.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:52:33.409826  543705 memory.go:184] no items to output this cycle
I0321 23:52:33.409850  543705 cpu.go:275] no items to output this cycle
E0321 23:52:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:52:43.409808  543705 memory.go:191] Add success.
I0321 23:52:43.409815  543705 cpu.go:282] Add success.
I0321 23:52:43.419847  543705 net.go:648] Add success.
I0321 23:52:43.422568  543705 net.go:770] primary dev: ETH0
I0321 23:52:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:52:43.422594  543705 net.go:698] Add success.
I0321 23:52:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:52:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:52:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:52:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:52:53.409776  543705 memory.go:184] no items to output this cycle
I0321 23:52:53.409778  543705 cpu.go:275] no items to output this cycle
E0321 23:53:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:53:03.409782  543705 memory.go:184] no items to output this cycle
I0321 23:53:03.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:53:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:53:13.409810  543705 memory.go:191] Add success.
I0321 23:53:13.409818  543705 cpu.go:282] Add success.
W0321 23:53:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:53:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:53:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:53:13.419739  543705 net.go:648] Add success.
I0321 23:53:13.422296  543705 net.go:770] primary dev: ETH0
I0321 23:53:13.422311  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:53:13.422324  543705 net.go:698] Add success.
I0321 23:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:53:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:53:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0321 23:53:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:53:14.456602  543705 disk_worker.go:494] system disk:vda1
I0321 23:53:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:53:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:53:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:53:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:53:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:53:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:53:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:53:23.409766  543705 memory.go:184] no items to output this cycle
I0321 23:53:23.409786  543705 cpu.go:275] no items to output this cycle
I0321 23:53:32.013677  543705 disk_info.go:125] begin check local disk info of client
I0321 23:53:32.016223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:53:32.016229  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3840 0xc0003b3880]
E0321 23:53:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:53:33.409809  543705 memory.go:184] no items to output this cycle
I0321 23:53:33.409820  543705 cpu.go:275] no items to output this cycle
E0321 23:53:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:53:43.409817  543705 memory.go:191] Add success.
I0321 23:53:43.409825  543705 cpu.go:282] Add success.
I0321 23:53:43.419943  543705 net.go:648] Add success.
I0321 23:53:43.422323  543705 net.go:770] primary dev: ETH0
I0321 23:53:43.422335  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:53:43.422348  543705 net.go:698] Add success.
I0321 23:53:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:53:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:53:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:53:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:53:53.409761  543705 memory.go:184] no items to output this cycle
I0321 23:53:53.409796  543705 cpu.go:275] no items to output this cycle
E0321 23:54:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:54:03.409773  543705 memory.go:184] no items to output this cycle
I0321 23:54:03.409782  543705 cpu.go:275] no items to output this cycle
E0321 23:54:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:54:13.409800  543705 memory.go:191] Add success.
I0321 23:54:13.409800  543705 cpu.go:282] Add success.
W0321 23:54:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:54:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:54:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:54:13.419729  543705 net.go:648] Add success.
I0321 23:54:13.422205  543705 net.go:770] primary dev: ETH0
I0321 23:54:13.422220  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:54:13.422234  543705 net.go:698] Add success.
I0321 23:54:14.299055  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"da2d1cc1-2004-4393-87c3-984a2297c94e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:54:14.299091  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0321 23:54:14.454686  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:54:14.454919  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:54:14.454930  543705 disk_worker.go:708] disk space is not compliant
W0321 23:54:14.454932  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:54:14.456484  543705 disk_worker.go:494] system disk:vda1
I0321 23:54:14.456513  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:54:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:54:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:54:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:54:23.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:54:23.409831  543705 memory.go:184] no items to output this cycle
I0321 23:54:23.409849  543705 cpu.go:275] no items to output this cycle
I0321 23:54:32.017676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:54:32.020212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:54:32.020218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9200 0xc0004d9240]
E0321 23:54:33.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:54:33.409831  543705 cpu.go:275] no items to output this cycle
I0321 23:54:33.409842  543705 memory.go:184] no items to output this cycle
I0321 23:54:39.377738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:54:39.377745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:54:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:54:43.410629  543705 memory.go:191] Add success.
I0321 23:54:43.409809  543705 cpu.go:282] Add success.
I0321 23:54:43.420398  543705 net.go:648] Add success.
I0321 23:54:43.423116  543705 net.go:770] primary dev: ETH0
I0321 23:54:43.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:54:43.423144  543705 net.go:698] Add success.
I0321 23:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:54:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:54:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:54:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:54:53.409772  543705 memory.go:184] no items to output this cycle
I0321 23:54:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 23:55:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:55:03.409777  543705 memory.go:184] no items to output this cycle
I0321 23:55:03.409786  543705 cpu.go:275] no items to output this cycle
E0321 23:55:13.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:55:13.409907  543705 cpu.go:282] Add success.
I0321 23:55:13.409914  543705 memory.go:191] Add success.
W0321 23:55:13.409948  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:55:13.409969  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:55:13.409972  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:55:13.419707  543705 net.go:648] Add success.
I0321 23:55:13.422400  543705 net.go:770] primary dev: ETH0
I0321 23:55:13.422412  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:55:13.422424  543705 net.go:698] Add success.
I0321 23:55:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:55:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:55:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0321 23:55:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:55:14.456565  543705 disk_worker.go:494] system disk:vda1
I0321 23:55:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:55:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:55:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:55:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:55:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:55:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:55:23.409775  543705 memory.go:184] no items to output this cycle
I0321 23:55:23.409776  543705 cpu.go:275] no items to output this cycle
I0321 23:55:32.021679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:55:32.024232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:55:32.024238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4640 0xc0000c4680]
E0321 23:55:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:55:33.409804  543705 memory.go:184] no items to output this cycle
I0321 23:55:33.409821  543705 cpu.go:275] no items to output this cycle
E0321 23:55:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:55:43.409788  543705 memory.go:191] Add success.
I0321 23:55:43.409815  543705 cpu.go:282] Add success.
I0321 23:55:43.419879  543705 net.go:648] Add success.
I0321 23:55:43.422573  543705 net.go:770] primary dev: ETH0
I0321 23:55:43.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:55:43.422608  543705 net.go:698] Add success.
I0321 23:55:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:55:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:55:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:55:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:55:53.409778  543705 memory.go:184] no items to output this cycle
I0321 23:55:53.409785  543705 cpu.go:275] no items to output this cycle
E0321 23:56:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:56:03.409775  543705 memory.go:184] no items to output this cycle
I0321 23:56:03.409776  543705 cpu.go:275] no items to output this cycle
E0321 23:56:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:56:13.409817  543705 memory.go:191] Add success.
I0321 23:56:13.409822  543705 cpu.go:282] Add success.
W0321 23:56:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:56:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:56:13.409965  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:56:13.419737  543705 net.go:648] Add success.
I0321 23:56:13.422162  543705 net.go:770] primary dev: ETH0
I0321 23:56:13.422175  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:56:13.422187  543705 net.go:698] Add success.
I0321 23:56:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:56:14.455083  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:56:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0321 23:56:14.455146  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:56:14.456483  543705 disk_worker.go:494] system disk:vda1
I0321 23:56:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:56:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:56:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:56:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:56:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:56:23.409777  543705 memory.go:184] no items to output this cycle
I0321 23:56:23.409779  543705 cpu.go:275] no items to output this cycle
I0321 23:56:32.025676  543705 disk_info.go:125] begin check local disk info of client
I0321 23:56:32.028163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:56:32.028169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498280 0xc0004982c0]
E0321 23:56:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:56:33.409772  543705 memory.go:184] no items to output this cycle
I0321 23:56:33.409845  543705 cpu.go:275] no items to output this cycle
E0321 23:56:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:56:43.409792  543705 memory.go:191] Add success.
I0321 23:56:43.409808  543705 cpu.go:282] Add success.
I0321 23:56:43.420280  543705 net.go:648] Add success.
I0321 23:56:43.423552  543705 net.go:770] primary dev: ETH0
I0321 23:56:43.423566  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:56:43.423578  543705 net.go:698] Add success.
I0321 23:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:56:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:56:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:56:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:56:53.409765  543705 memory.go:184] no items to output this cycle
I0321 23:56:53.409795  543705 cpu.go:275] no items to output this cycle
E0321 23:57:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:57:03.409771  543705 memory.go:184] no items to output this cycle
I0321 23:57:03.409788  543705 cpu.go:275] no items to output this cycle
E0321 23:57:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:57:13.409823  543705 memory.go:191] Add success.
I0321 23:57:13.409825  543705 cpu.go:282] Add success.
W0321 23:57:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:57:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:57:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:57:13.419723  543705 net.go:648] Add success.
I0321 23:57:13.422381  543705 net.go:770] primary dev: ETH0
I0321 23:57:13.422394  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:57:13.422405  543705 net.go:698] Add success.
I0321 23:57:13.428725  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0321 23:57:13.452894  543705 event_worker.go:152] Polling the log file for events...
I0321 23:57:13.468898  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf03ac43-de80-4376-8ee7-f2d8c05401cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0321 23:57:13.468929  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0321 23:57:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:57:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0321 23:57:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0321 23:57:14.456763  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0321 23:57:14.456772  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0321 23:57:14.456777  543705 custom_config.go:64] query custom config with name: gpu
I0321 23:57:14.456809  543705 disk_worker.go:494] system disk:vda1
I0321 23:57:14.456836  543705 disk_worker.go:432] add disk info successfully, len:33
E0321 23:57:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0321 23:57:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:57:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0321 23:57:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0321 23:57:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:57:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:57:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:57:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:57:23.409774  543705 memory.go:184] no items to output this cycle
I0321 23:57:23.409781  543705 cpu.go:275] no items to output this cycle
I0321 23:57:32.029679  543705 disk_info.go:125] begin check local disk info of client
I0321 23:57:32.032236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:57:32.032243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ed680 0xc0000ed6c0]
E0321 23:57:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:57:33.409785  543705 memory.go:184] no items to output this cycle
I0321 23:57:33.409837  543705 cpu.go:275] no items to output this cycle
I0321 23:57:39.381739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0321 23:57:39.381747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0321 23:57:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:57:43.411073  543705 memory.go:191] Add success.
I0321 23:57:43.409790  543705 cpu.go:282] Add success.
I0321 23:57:43.419739  543705 net.go:648] Add success.
I0321 23:57:43.422826  543705 net.go:770] primary dev: ETH0
I0321 23:57:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:57:43.422858  543705 net.go:698] Add success.
I0321 23:57:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:57:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:57:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:57:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:57:53.409767  543705 memory.go:184] no items to output this cycle
I0321 23:57:53.409794  543705 cpu.go:275] no items to output this cycle
E0321 23:58:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:58:03.409789  543705 memory.go:184] no items to output this cycle
I0321 23:58:03.409795  543705 cpu.go:275] no items to output this cycle
E0321 23:58:13.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:58:13.409915  543705 memory.go:191] Add success.
W0321 23:58:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:58:13.409976  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:58:13.409979  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:58:13.410042  543705 cpu.go:282] Add success.
I0321 23:58:13.419717  543705 net.go:648] Add success.
I0321 23:58:13.422464  543705 net.go:770] primary dev: ETH0
I0321 23:58:13.422476  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:58:13.422487  543705 net.go:698] Add success.
I0321 23:58:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:58:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:58:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0321 23:58:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:58:14.456479  543705 disk_worker.go:494] system disk:vda1
I0321 23:58:14.456522  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:58:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:58:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:58:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:58:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:58:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:58:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:58:23.409777  543705 memory.go:184] no items to output this cycle
I0321 23:58:23.409794  543705 cpu.go:275] no items to output this cycle
I0321 23:58:32.033686  543705 disk_info.go:125] begin check local disk info of client
I0321 23:58:32.036183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:58:32.036191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0321 23:58:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:58:33.409783  543705 memory.go:184] no items to output this cycle
I0321 23:58:33.409836  543705 cpu.go:275] no items to output this cycle
E0321 23:58:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:58:43.409828  543705 memory.go:191] Add success.
I0321 23:58:43.409832  543705 cpu.go:282] Add success.
I0321 23:58:43.420434  543705 net.go:648] Add success.
I0321 23:58:43.423046  543705 net.go:770] primary dev: ETH0
I0321 23:58:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:58:43.423076  543705 net.go:698] Add success.
I0321 23:58:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:58:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:58:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:58:53.409810  543705 memory.go:184] no items to output this cycle
I0321 23:58:53.409825  543705 cpu.go:275] no items to output this cycle
E0321 23:59:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:59:03.409801  543705 memory.go:184] no items to output this cycle
I0321 23:59:03.409816  543705 cpu.go:275] no items to output this cycle
E0321 23:59:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:59:13.409826  543705 memory.go:191] Add success.
I0321 23:59:13.409831  543705 cpu.go:282] Add success.
W0321 23:59:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0321 23:59:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0321 23:59:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0321 23:59:13.419723  543705 net.go:648] Add success.
I0321 23:59:13.422518  543705 net.go:770] primary dev: ETH0
I0321 23:59:13.422534  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:59:13.422548  543705 net.go:698] Add success.
I0321 23:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0321 23:59:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0321 23:59:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0321 23:59:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0321 23:59:14.456561  543705 disk_worker.go:494] system disk:vda1
I0321 23:59:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0321 23:59:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0321 23:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:59:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:59:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0321 23:59:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0321 23:59:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:59:23.409777  543705 memory.go:184] no items to output this cycle
I0321 23:59:23.409805  543705 cpu.go:275] no items to output this cycle
I0321 23:59:32.037674  543705 disk_info.go:125] begin check local disk info of client
I0321 23:59:32.040279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0321 23:59:32.040286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4380 0xc0000c43c0]
E0321 23:59:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:59:33.409783  543705 memory.go:184] no items to output this cycle
I0321 23:59:33.409845  543705 cpu.go:275] no items to output this cycle
E0321 23:59:43.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:59:43.409887  543705 memory.go:191] Add success.
I0321 23:59:43.409933  543705 cpu.go:282] Add success.
I0321 23:59:43.420297  543705 net.go:648] Add success.
I0321 23:59:43.422872  543705 net.go:770] primary dev: ETH0
I0321 23:59:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0321 23:59:43.422900  543705 net.go:698] Add success.
I0321 23:59:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0321 23:59:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0321 23:59:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0321 23:59:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0321 23:59:53.409772  543705 cpu.go:275] no items to output this cycle
I0321 23:59:53.409775  543705 memory.go:184] no items to output this cycle
E0322 00:00:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:00:03.409812  543705 memory.go:184] no items to output this cycle
I0322 00:00:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 00:00:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:00:13.409814  543705 memory.go:191] Add success.
I0322 00:00:13.409816  543705 cpu.go:282] Add success.
W0322 00:00:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:00:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:00:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:00:13.420259  543705 net.go:648] Add success.
I0322 00:00:13.422853  543705 net.go:770] primary dev: ETH0
I0322 00:00:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:00:13.422888  543705 net.go:698] Add success.
I0322 00:00:13.469550  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29ee07c2-7bfc-44b4-b0b0-3d6105ce6824","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:00:13.469581  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:00:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:00:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:00:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 00:00:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:00:14.456714  543705 disk_worker.go:494] system disk:vda1
I0322 00:00:14.456749  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:00:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:00:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:00:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:00:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:00:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:00:23.409778  543705 memory.go:184] no items to output this cycle
I0322 00:00:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 00:00:32.041681  543705 disk_info.go:125] begin check local disk info of client
I0322 00:00:32.044190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:00:32.044197  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f100 0xc00037f140]
E0322 00:00:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:00:33.409794  543705 memory.go:184] no items to output this cycle
I0322 00:00:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 00:00:39.385730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:00:39.385738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:00:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:00:43.410710  543705 memory.go:191] Add success.
I0322 00:00:43.409804  543705 cpu.go:282] Add success.
I0322 00:00:43.420437  543705 net.go:648] Add success.
I0322 00:00:43.423122  543705 net.go:770] primary dev: ETH0
I0322 00:00:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:00:43.423147  543705 net.go:698] Add success.
I0322 00:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:00:53.410356  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:00:53.410372  543705 memory.go:184] no items to output this cycle
I0322 00:00:53.410404  543705 cpu.go:275] no items to output this cycle
E0322 00:01:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:01:03.409786  543705 memory.go:184] no items to output this cycle
I0322 00:01:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:01:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:01:13.409790  543705 cpu.go:282] Add success.
I0322 00:01:13.409791  543705 memory.go:191] Add success.
W0322 00:01:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:01:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:01:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:01:13.420239  543705 net.go:648] Add success.
I0322 00:01:13.423481  543705 net.go:770] primary dev: ETH0
I0322 00:01:13.423495  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:01:13.423506  543705 net.go:698] Add success.
I0322 00:01:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:01:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:01:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 00:01:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:01:14.456552  543705 disk_worker.go:494] system disk:vda1
I0322 00:01:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:01:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:01:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:01:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:01:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:01:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:01:23.409770  543705 memory.go:184] no items to output this cycle
I0322 00:01:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 00:01:32.045677  543705 disk_info.go:125] begin check local disk info of client
I0322 00:01:32.048251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:01:32.048257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1340 0xc0003c1380]
E0322 00:01:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:01:33.409775  543705 memory.go:184] no items to output this cycle
I0322 00:01:33.409783  543705 cpu.go:275] no items to output this cycle
E0322 00:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:01:43.409794  543705 memory.go:191] Add success.
I0322 00:01:43.409794  543705 cpu.go:282] Add success.
I0322 00:01:43.419889  543705 net.go:648] Add success.
I0322 00:01:43.423234  543705 net.go:770] primary dev: ETH0
I0322 00:01:43.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:01:43.423263  543705 net.go:698] Add success.
I0322 00:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:01:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:01:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:01:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:01:53.409773  543705 cpu.go:275] no items to output this cycle
I0322 00:01:53.409778  543705 memory.go:184] no items to output this cycle
E0322 00:02:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:02:03.409781  543705 memory.go:184] no items to output this cycle
I0322 00:02:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 00:02:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:02:13.409779  543705 memory.go:191] Add success.
W0322 00:02:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:02:13.409803  543705 cpu.go:282] Add success.
W0322 00:02:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:02:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:02:13.420066  543705 net.go:648] Add success.
I0322 00:02:13.422767  543705 net.go:770] primary dev: ETH0
I0322 00:02:13.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:02:13.422796  543705 net.go:698] Add success.
W0322 00:02:14.455576  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:02:14.455588  543705 disk_worker.go:708] disk space is not compliant
W0322 00:02:14.455593  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:02:14.456195  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:02:14.456205  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:02:14.456211  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:02:14.458003  543705 disk_worker.go:494] system disk:vda1
I0322 00:02:14.458032  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:02:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:02:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:02:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:02:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:02:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:02:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:02:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:02:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:02:23.409798  543705 memory.go:184] no items to output this cycle
I0322 00:02:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 00:02:32.049680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:02:32.052175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:02:32.052181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4240 0xc0000c4280]
E0322 00:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:02:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 00:02:33.409783  543705 memory.go:184] no items to output this cycle
E0322 00:02:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:02:43.409818  543705 memory.go:191] Add success.
I0322 00:02:43.409828  543705 cpu.go:282] Add success.
I0322 00:02:43.420041  543705 net.go:648] Add success.
I0322 00:02:43.422842  543705 net.go:770] primary dev: ETH0
I0322 00:02:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:02:43.422868  543705 net.go:698] Add success.
I0322 00:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:02:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:02:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:02:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:02:53.409798  543705 memory.go:184] no items to output this cycle
I0322 00:02:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 00:03:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:03:03.409800  543705 memory.go:184] no items to output this cycle
I0322 00:03:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 00:03:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:03:13.409782  543705 memory.go:191] Add success.
W0322 00:03:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:03:13.409809  543705 cpu.go:282] Add success.
W0322 00:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:03:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:03:13.420196  543705 net.go:648] Add success.
I0322 00:03:13.422780  543705 net.go:770] primary dev: ETH0
I0322 00:03:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:03:13.422806  543705 net.go:698] Add success.
I0322 00:03:13.580561  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c8283af1-6482-4087-b960-6eb94557e1ee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:03:13.580592  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:03:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:03:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:03:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 00:03:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:03:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 00:03:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:03:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:03:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:03:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:03:23.409799  543705 memory.go:184] no items to output this cycle
I0322 00:03:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 00:03:32.053679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:03:32.056288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:03:32.056294  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037da00 0xc00037da40]
E0322 00:03:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:03:33.409782  543705 memory.go:184] no items to output this cycle
I0322 00:03:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 00:03:39.389739  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:03:39.389747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:03:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:03:43.410698  543705 memory.go:191] Add success.
I0322 00:03:43.409825  543705 cpu.go:282] Add success.
I0322 00:03:43.420513  543705 net.go:648] Add success.
I0322 00:03:43.423337  543705 net.go:770] primary dev: ETH0
I0322 00:03:43.423350  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:03:43.423364  543705 net.go:698] Add success.
I0322 00:03:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:03:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:03:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:03:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:03:53.409769  543705 memory.go:184] no items to output this cycle
I0322 00:03:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 00:04:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:04:03.409800  543705 memory.go:184] no items to output this cycle
I0322 00:04:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 00:04:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:04:13.409782  543705 memory.go:191] Add success.
I0322 00:04:13.409798  543705 cpu.go:282] Add success.
W0322 00:04:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:04:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:04:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:04:13.420055  543705 net.go:648] Add success.
I0322 00:04:13.423387  543705 net.go:770] primary dev: ETH0
I0322 00:04:13.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:04:13.423412  543705 net.go:698] Add success.
I0322 00:04:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:04:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:04:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 00:04:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:04:14.457027  543705 disk_worker.go:494] system disk:vda1
I0322 00:04:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:04:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:04:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:04:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:04:23.409809  543705 memory.go:184] no items to output this cycle
I0322 00:04:23.409818  543705 cpu.go:275] no items to output this cycle
I0322 00:04:32.057680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:04:32.060198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:04:32.060204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1200 0xc0004a1240]
E0322 00:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:04:33.409785  543705 memory.go:184] no items to output this cycle
I0322 00:04:33.409801  543705 cpu.go:275] no items to output this cycle
E0322 00:04:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:04:43.409798  543705 memory.go:191] Add success.
I0322 00:04:43.409800  543705 cpu.go:282] Add success.
I0322 00:04:43.419886  543705 net.go:648] Add success.
I0322 00:04:43.422493  543705 net.go:770] primary dev: ETH0
I0322 00:04:43.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:04:43.422518  543705 net.go:698] Add success.
I0322 00:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:04:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:04:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:04:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:04:53.409800  543705 memory.go:184] no items to output this cycle
I0322 00:04:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 00:05:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:05:03.409787  543705 memory.go:184] no items to output this cycle
I0322 00:05:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 00:05:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:05:13.409802  543705 memory.go:191] Add success.
I0322 00:05:13.409815  543705 cpu.go:282] Add success.
W0322 00:05:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:05:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:05:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:05:13.420366  543705 net.go:648] Add success.
I0322 00:05:13.423236  543705 net.go:770] primary dev: ETH0
I0322 00:05:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:05:13.423270  543705 net.go:698] Add success.
I0322 00:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:05:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:05:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 00:05:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:05:14.457161  543705 disk_worker.go:494] system disk:vda1
I0322 00:05:14.457190  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:05:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:05:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:05:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:05:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:05:23.409774  543705 memory.go:184] no items to output this cycle
I0322 00:05:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 00:05:32.061680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:05:32.064211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:05:32.064217  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029dcc0 0xc00029dd00]
E0322 00:05:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:05:33.409795  543705 memory.go:184] no items to output this cycle
I0322 00:05:33.409810  543705 cpu.go:275] no items to output this cycle
E0322 00:05:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:05:43.409782  543705 memory.go:191] Add success.
I0322 00:05:43.409804  543705 cpu.go:282] Add success.
I0322 00:05:43.420021  543705 net.go:648] Add success.
I0322 00:05:43.422730  543705 net.go:770] primary dev: ETH0
I0322 00:05:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:05:43.422757  543705 net.go:698] Add success.
I0322 00:05:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:05:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:05:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:05:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:05:53.409778  543705 memory.go:184] no items to output this cycle
I0322 00:05:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 00:06:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:06:03.409776  543705 memory.go:184] no items to output this cycle
I0322 00:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 00:06:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:06:13.409810  543705 memory.go:191] Add success.
I0322 00:06:13.409817  543705 cpu.go:282] Add success.
W0322 00:06:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:06:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:06:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:06:13.420048  543705 net.go:648] Add success.
I0322 00:06:13.422780  543705 net.go:770] primary dev: ETH0
I0322 00:06:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:06:13.422805  543705 net.go:698] Add success.
I0322 00:06:13.468372  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a867e16a-c5ea-48b3-a320-fd60d5d6c066","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:06:13.468406  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:06:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:06:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:06:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 00:06:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:06:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 00:06:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:06:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:06:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:06:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:06:23.409769  543705 memory.go:184] no items to output this cycle
I0322 00:06:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 00:06:32.065680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:06:32.068156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:06:32.068161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7400 0xc0003e7440]
E0322 00:06:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:06:33.409791  543705 memory.go:184] no items to output this cycle
I0322 00:06:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 00:06:39.393745  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:06:39.393753  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:06:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:06:43.410801  543705 memory.go:191] Add success.
I0322 00:06:43.409809  543705 cpu.go:282] Add success.
I0322 00:06:43.420500  543705 net.go:648] Add success.
I0322 00:06:43.423578  543705 net.go:770] primary dev: ETH0
I0322 00:06:43.423592  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:06:43.423606  543705 net.go:698] Add success.
I0322 00:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:06:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:06:53.410342  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:06:53.410359  543705 memory.go:184] no items to output this cycle
I0322 00:06:53.410390  543705 cpu.go:275] no items to output this cycle
E0322 00:07:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:07:03.409801  543705 memory.go:184] no items to output this cycle
I0322 00:07:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 00:07:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:07:13.409772  543705 memory.go:191] Add success.
W0322 00:07:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:07:13.409803  543705 cpu.go:282] Add success.
W0322 00:07:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:07:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:07:13.420065  543705 net.go:648] Add success.
I0322 00:07:13.422665  543705 net.go:770] primary dev: ETH0
I0322 00:07:13.422679  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:07:13.422691  543705 net.go:698] Add success.
I0322 00:07:13.453243  543705 event_worker.go:152] Polling the log file for events...
W0322 00:07:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 00:07:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:07:14.455886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:07:14.455895  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:07:14.455901  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:07:14.456538  543705 disk_worker.go:494] system disk:vda1
I0322 00:07:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:07:15.456884  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:07:15.456893  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:07:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:07:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:07:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:07:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:07:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:07:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:07:23.409778  543705 memory.go:184] no items to output this cycle
I0322 00:07:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 00:07:32.069677  543705 disk_info.go:125] begin check local disk info of client
I0322 00:07:32.072211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:07:32.072219  543705 disk_info.go:196] parse disk info done, disk is : [0xc000520dc0 0xc000520e00]
E0322 00:07:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:07:33.409762  543705 memory.go:184] no items to output this cycle
I0322 00:07:33.409798  543705 cpu.go:275] no items to output this cycle
E0322 00:07:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:07:43.409810  543705 memory.go:191] Add success.
I0322 00:07:43.409812  543705 cpu.go:282] Add success.
I0322 00:07:43.420034  543705 net.go:648] Add success.
I0322 00:07:43.422742  543705 net.go:770] primary dev: ETH0
I0322 00:07:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:07:43.422772  543705 net.go:698] Add success.
I0322 00:07:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:07:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:07:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:07:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:07:53.409790  543705 memory.go:184] no items to output this cycle
I0322 00:07:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 00:08:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:08:03.409781  543705 memory.go:184] no items to output this cycle
I0322 00:08:03.409817  543705 cpu.go:275] no items to output this cycle
W0322 00:08:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:08:13.409738  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:08:13.409745  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:08:13.409834  543705 cpu.go:282] Add success.
E0322 00:08:13.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:08:13.409852  543705 memory.go:191] Add success.
I0322 00:08:13.419991  543705 net.go:648] Add success.
I0322 00:08:13.422770  543705 net.go:770] primary dev: ETH0
I0322 00:08:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:08:13.422800  543705 net.go:698] Add success.
I0322 00:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:08:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:08:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 00:08:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:08:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 00:08:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:08:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:08:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:08:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:08:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:08:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:08:23.409769  543705 memory.go:184] no items to output this cycle
I0322 00:08:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 00:08:32.073683  543705 disk_info.go:125] begin check local disk info of client
I0322 00:08:32.076171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:08:32.076177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 00:08:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:08:33.409798  543705 memory.go:184] no items to output this cycle
I0322 00:08:33.409811  543705 cpu.go:275] no items to output this cycle
E0322 00:08:43.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:08:43.409827  543705 cpu.go:282] Add success.
I0322 00:08:43.409851  543705 memory.go:191] Add success.
I0322 00:08:43.420195  543705 net.go:648] Add success.
I0322 00:08:43.421147  543705 net.go:770] primary dev: ETH0
I0322 00:08:43.421165  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:08:43.421184  543705 net.go:698] Add success.
I0322 00:08:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:08:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:08:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:08:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:08:53.409817  543705 memory.go:184] no items to output this cycle
I0322 00:08:53.409827  543705 cpu.go:275] no items to output this cycle
E0322 00:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:09:03.409782  543705 memory.go:184] no items to output this cycle
I0322 00:09:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 00:09:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:09:13.409785  543705 memory.go:191] Add success.
I0322 00:09:13.409787  543705 cpu.go:282] Add success.
W0322 00:09:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:09:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:09:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:09:13.420050  543705 net.go:648] Add success.
I0322 00:09:13.422819  543705 net.go:770] primary dev: ETH0
I0322 00:09:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:09:13.422844  543705 net.go:698] Add success.
I0322 00:09:13.467983  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"10436310-8c60-42a4-b6a1-e27d554cb741","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:09:13.468014  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:09:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:09:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 00:09:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:09:14.456574  543705 disk_worker.go:494] system disk:vda1
I0322 00:09:14.456744  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:09:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:09:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:09:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:09:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:09:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:09:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:09:23.409783  543705 memory.go:184] no items to output this cycle
I0322 00:09:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 00:09:32.077680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:09:32.080236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:09:32.080243  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028fcc0 0xc00028fd00]
E0322 00:09:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:09:33.409792  543705 memory.go:184] no items to output this cycle
I0322 00:09:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 00:09:39.397741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:09:39.397748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:09:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:09:43.410675  543705 memory.go:191] Add success.
I0322 00:09:43.409828  543705 cpu.go:282] Add success.
I0322 00:09:43.420448  543705 net.go:648] Add success.
I0322 00:09:43.423152  543705 net.go:770] primary dev: ETH0
I0322 00:09:43.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:09:43.423178  543705 net.go:698] Add success.
I0322 00:09:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:09:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:09:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:09:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:09:53.409768  543705 memory.go:184] no items to output this cycle
I0322 00:09:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:10:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:10:03.409775  543705 memory.go:184] no items to output this cycle
I0322 00:10:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 00:10:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:10:13.409784  543705 memory.go:191] Add success.
W0322 00:10:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:10:13.409812  543705 cpu.go:282] Add success.
W0322 00:10:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:10:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:10:13.420056  543705 net.go:648] Add success.
I0322 00:10:13.422493  543705 net.go:770] primary dev: ETH0
I0322 00:10:13.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:10:13.422518  543705 net.go:698] Add success.
I0322 00:10:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:10:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:10:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 00:10:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:10:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 00:10:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:10:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:10:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:10:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:10:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:10:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:10:23.409767  543705 memory.go:184] no items to output this cycle
I0322 00:10:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 00:10:32.081681  543705 disk_info.go:125] begin check local disk info of client
I0322 00:10:32.084167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:10:32.084173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2080 0xc0003b20c0]
E0322 00:10:33.409739  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:10:33.409753  543705 memory.go:184] no items to output this cycle
I0322 00:10:33.409797  543705 cpu.go:275] no items to output this cycle
E0322 00:10:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:10:43.409800  543705 memory.go:191] Add success.
I0322 00:10:43.409803  543705 cpu.go:282] Add success.
I0322 00:10:43.419887  543705 net.go:648] Add success.
I0322 00:10:43.422679  543705 net.go:770] primary dev: ETH0
I0322 00:10:43.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:10:43.422709  543705 net.go:698] Add success.
I0322 00:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:10:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:10:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:10:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:10:53.409779  543705 memory.go:184] no items to output this cycle
I0322 00:10:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 00:11:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:11:03.409792  543705 memory.go:184] no items to output this cycle
I0322 00:11:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 00:11:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:11:13.409812  543705 memory.go:191] Add success.
I0322 00:11:13.409817  543705 cpu.go:282] Add success.
W0322 00:11:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:11:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:11:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:11:13.420137  543705 net.go:648] Add success.
I0322 00:11:13.422829  543705 net.go:770] primary dev: ETH0
I0322 00:11:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:11:13.422855  543705 net.go:698] Add success.
I0322 00:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:11:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:11:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 00:11:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:11:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 00:11:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:11:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:11:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:11:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:11:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:11:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:11:23.409787  543705 memory.go:184] no items to output this cycle
I0322 00:11:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 00:11:32.085699  543705 disk_info.go:125] begin check local disk info of client
I0322 00:11:32.088266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:11:32.088273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1140 0xc0004a1180]
E0322 00:11:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:11:33.409794  543705 memory.go:184] no items to output this cycle
I0322 00:11:33.409807  543705 cpu.go:275] no items to output this cycle
E0322 00:11:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:11:43.409821  543705 memory.go:191] Add success.
I0322 00:11:43.409834  543705 cpu.go:282] Add success.
I0322 00:11:43.419956  543705 net.go:648] Add success.
I0322 00:11:43.422946  543705 net.go:770] primary dev: ETH0
I0322 00:11:43.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:11:43.422971  543705 net.go:698] Add success.
I0322 00:11:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:11:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:11:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:11:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:11:53.409809  543705 memory.go:184] no items to output this cycle
I0322 00:11:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 00:12:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:12:03.409780  543705 memory.go:184] no items to output this cycle
I0322 00:12:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 00:12:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:12:13.409826  543705 memory.go:191] Add success.
I0322 00:12:13.409840  543705 cpu.go:282] Add success.
W0322 00:12:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:12:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:12:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:12:13.420195  543705 net.go:648] Add success.
I0322 00:12:13.423382  543705 net.go:770] primary dev: ETH0
I0322 00:12:13.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:12:13.423419  543705 net.go:698] Add success.
I0322 00:12:13.588103  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"830e0004-6469-4ca0-b7c1-8805199531ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:12:13.588138  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 00:12:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:12:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 00:12:14.455212  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:12:14.455983  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:12:14.455993  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:12:14.455999  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:12:14.456615  543705 disk_worker.go:494] system disk:vda1
I0322 00:12:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:12:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:12:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
E0322 00:12:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:12:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:12:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:12:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:12:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:12:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:12:23.409792  543705 memory.go:184] no items to output this cycle
I0322 00:12:23.409795  543705 cpu.go:275] no items to output this cycle
I0322 00:12:32.089677  543705 disk_info.go:125] begin check local disk info of client
I0322 00:12:32.092207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:12:32.092213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b43c0 0xc0002b4400]
E0322 00:12:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:12:33.409761  543705 memory.go:184] no items to output this cycle
I0322 00:12:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 00:12:39.401750  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:12:39.401757  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:12:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:12:43.410719  543705 memory.go:191] Add success.
I0322 00:12:43.409812  543705 cpu.go:282] Add success.
I0322 00:12:43.420428  543705 net.go:648] Add success.
I0322 00:12:43.423192  543705 net.go:770] primary dev: ETH0
I0322 00:12:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:12:43.423220  543705 net.go:698] Add success.
I0322 00:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:12:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:12:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:12:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:12:53.409803  543705 memory.go:184] no items to output this cycle
I0322 00:12:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 00:13:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:13:03.409776  543705 memory.go:184] no items to output this cycle
I0322 00:13:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 00:13:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:13:13.409781  543705 memory.go:191] Add success.
I0322 00:13:13.409799  543705 cpu.go:282] Add success.
W0322 00:13:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:13:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:13:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:13:13.420037  543705 net.go:648] Add success.
I0322 00:13:13.422558  543705 net.go:770] primary dev: ETH0
I0322 00:13:13.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:13:13.422584  543705 net.go:698] Add success.
I0322 00:13:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:13:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:13:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 00:13:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:13:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 00:13:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:13:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:13:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:13:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:13:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:13:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:13:23.409773  543705 memory.go:184] no items to output this cycle
I0322 00:13:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 00:13:32.093682  543705 disk_info.go:125] begin check local disk info of client
I0322 00:13:32.096224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:13:32.096230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a23c0 0xc0004a2400]
E0322 00:13:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:13:33.409768  543705 memory.go:184] no items to output this cycle
I0322 00:13:33.409796  543705 cpu.go:275] no items to output this cycle
E0322 00:13:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:13:43.409825  543705 memory.go:191] Add success.
I0322 00:13:43.409827  543705 cpu.go:282] Add success.
I0322 00:13:43.419888  543705 net.go:648] Add success.
I0322 00:13:43.422532  543705 net.go:770] primary dev: ETH0
I0322 00:13:43.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:13:43.422558  543705 net.go:698] Add success.
I0322 00:13:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:13:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:13:53.409766  543705 memory.go:184] no items to output this cycle
I0322 00:13:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 00:14:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:14:03.409784  543705 memory.go:184] no items to output this cycle
I0322 00:14:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 00:14:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:14:13.409781  543705 memory.go:191] Add success.
I0322 00:14:13.409805  543705 cpu.go:282] Add success.
W0322 00:14:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:14:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:14:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:14:13.420612  543705 net.go:648] Add success.
I0322 00:14:13.423318  543705 net.go:770] primary dev: ETH0
I0322 00:14:13.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:14:13.423345  543705 net.go:698] Add success.
I0322 00:14:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:14:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:14:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 00:14:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:14:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 00:14:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:14:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:14:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:14:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:14:23.409772  543705 memory.go:184] no items to output this cycle
I0322 00:14:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 00:14:32.097668  543705 disk_info.go:125] begin check local disk info of client
I0322 00:14:32.100161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:14:32.100168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003585c0 0xc000358600]
E0322 00:14:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:14:33.409763  543705 memory.go:184] no items to output this cycle
I0322 00:14:33.409889  543705 cpu.go:275] no items to output this cycle
E0322 00:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:14:43.409803  543705 memory.go:191] Add success.
I0322 00:14:43.409809  543705 cpu.go:282] Add success.
I0322 00:14:43.419941  543705 net.go:648] Add success.
I0322 00:14:43.422776  543705 net.go:770] primary dev: ETH0
I0322 00:14:43.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:14:43.422802  543705 net.go:698] Add success.
I0322 00:14:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:14:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:14:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:14:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:14:53.409803  543705 memory.go:184] no items to output this cycle
I0322 00:14:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 00:15:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:15:03.409781  543705 memory.go:184] no items to output this cycle
I0322 00:15:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 00:15:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:15:13.409788  543705 cpu.go:282] Add success.
I0322 00:15:13.409789  543705 memory.go:191] Add success.
W0322 00:15:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:15:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:15:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:15:13.420135  543705 net.go:648] Add success.
I0322 00:15:13.422918  543705 net.go:770] primary dev: ETH0
I0322 00:15:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:15:13.422947  543705 net.go:698] Add success.
I0322 00:15:13.572280  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6ddeff94-3ff4-43c5-ba1a-379301c1f1a7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:15:13.572316  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:15:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:15:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:15:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 00:15:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:15:14.456748  543705 disk_worker.go:494] system disk:vda1
I0322 00:15:14.456784  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:15:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:15:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:15:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:15:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:15:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:15:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:15:23.409775  543705 memory.go:184] no items to output this cycle
I0322 00:15:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 00:15:32.101678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:15:32.104204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:15:32.104211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be0c0 0xc0003be100]
E0322 00:15:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:15:33.409795  543705 memory.go:184] no items to output this cycle
I0322 00:15:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 00:15:39.405742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:15:39.405750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:15:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:15:43.410538  543705 memory.go:191] Add success.
I0322 00:15:43.409812  543705 cpu.go:282] Add success.
I0322 00:15:43.420267  543705 net.go:648] Add success.
I0322 00:15:43.423276  543705 net.go:770] primary dev: ETH0
I0322 00:15:43.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:15:43.423304  543705 net.go:698] Add success.
I0322 00:15:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:15:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:15:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:15:53.409766  543705 memory.go:184] no items to output this cycle
I0322 00:15:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 00:16:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:16:03.409775  543705 memory.go:184] no items to output this cycle
I0322 00:16:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 00:16:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:16:13.409810  543705 memory.go:191] Add success.
I0322 00:16:13.409824  543705 cpu.go:282] Add success.
W0322 00:16:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:16:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:16:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:16:13.420159  543705 net.go:648] Add success.
I0322 00:16:13.422920  543705 net.go:770] primary dev: ETH0
I0322 00:16:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:16:13.422948  543705 net.go:698] Add success.
I0322 00:16:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:16:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:16:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 00:16:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:16:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 00:16:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:16:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:16:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:16:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:16:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:16:23.409766  543705 memory.go:184] no items to output this cycle
I0322 00:16:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 00:16:32.105674  543705 disk_info.go:125] begin check local disk info of client
I0322 00:16:32.108181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:16:32.108187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047fbc0 0xc00047fc00]
E0322 00:16:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:16:33.409782  543705 memory.go:184] no items to output this cycle
I0322 00:16:33.409798  543705 cpu.go:275] no items to output this cycle
E0322 00:16:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:16:43.409800  543705 memory.go:191] Add success.
I0322 00:16:43.409815  543705 cpu.go:282] Add success.
I0322 00:16:43.420096  543705 net.go:648] Add success.
I0322 00:16:43.423122  543705 net.go:770] primary dev: ETH0
I0322 00:16:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:16:43.423151  543705 net.go:698] Add success.
I0322 00:16:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:16:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:16:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:16:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:16:53.409785  543705 memory.go:184] no items to output this cycle
I0322 00:16:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 00:17:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:17:03.409801  543705 memory.go:184] no items to output this cycle
I0322 00:17:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 00:17:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:17:13.409801  543705 memory.go:191] Add success.
I0322 00:17:13.409805  543705 cpu.go:282] Add success.
W0322 00:17:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:17:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:17:13.420116  543705 net.go:648] Add success.
I0322 00:17:13.422924  543705 net.go:770] primary dev: ETH0
I0322 00:17:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:17:13.422950  543705 net.go:698] Add success.
I0322 00:17:13.453503  543705 event_worker.go:152] Polling the log file for events...
W0322 00:17:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:17:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0322 00:17:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:17:14.456897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:17:14.456906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:17:14.456913  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:17:14.456965  543705 disk_worker.go:494] system disk:vda1
I0322 00:17:14.456995  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:17:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:17:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:17:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:17:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:17:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:17:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:17:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:17:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:17:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 00:17:23.409788  543705 memory.go:184] no items to output this cycle
I0322 00:17:32.109681  543705 disk_info.go:125] begin check local disk info of client
I0322 00:17:32.112236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:17:32.112243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9ec0 0xc0004d9f00]
E0322 00:17:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:17:33.409772  543705 memory.go:184] no items to output this cycle
I0322 00:17:33.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:17:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:17:43.409831  543705 memory.go:191] Add success.
I0322 00:17:43.409834  543705 cpu.go:282] Add success.
I0322 00:17:43.419755  543705 net.go:648] Add success.
I0322 00:17:43.422372  543705 net.go:770] primary dev: ETH0
I0322 00:17:43.422384  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:17:43.422396  543705 net.go:698] Add success.
I0322 00:17:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:17:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:17:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:17:53.409787  543705 memory.go:184] no items to output this cycle
I0322 00:17:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 00:18:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:18:03.409767  543705 memory.go:184] no items to output this cycle
I0322 00:18:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:18:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:18:13.409780  543705 memory.go:191] Add success.
I0322 00:18:13.409801  543705 cpu.go:282] Add success.
W0322 00:18:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:18:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:18:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:18:13.420111  543705 net.go:648] Add success.
I0322 00:18:13.422771  543705 net.go:770] primary dev: ETH0
I0322 00:18:13.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:18:13.422795  543705 net.go:698] Add success.
I0322 00:18:13.464066  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8acec1aa-e6ea-4742-9f30-8cb0d36ab0c6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:18:13.464100  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:18:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:18:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:18:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 00:18:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:18:14.456613  543705 disk_worker.go:494] system disk:vda1
I0322 00:18:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:18:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:18:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:18:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:18:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:18:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:18:23.409792  543705 memory.go:184] no items to output this cycle
I0322 00:18:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 00:18:32.113680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:18:32.116198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:18:32.116205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005fcbc0 0xc0005fcc00]
E0322 00:18:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:18:33.409786  543705 memory.go:184] no items to output this cycle
I0322 00:18:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 00:18:39.409738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:18:39.409746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:18:43.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:18:43.410726  543705 memory.go:191] Add success.
I0322 00:18:43.409967  543705 cpu.go:282] Add success.
I0322 00:18:43.419719  543705 net.go:648] Add success.
I0322 00:18:43.422770  543705 net.go:770] primary dev: ETH0
I0322 00:18:43.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:18:43.422800  543705 net.go:698] Add success.
I0322 00:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:18:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:18:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:18:53.409780  543705 memory.go:184] no items to output this cycle
I0322 00:18:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 00:19:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:19:03.409796  543705 memory.go:184] no items to output this cycle
I0322 00:19:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 00:19:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:19:13.409815  543705 memory.go:191] Add success.
I0322 00:19:13.409825  543705 cpu.go:282] Add success.
W0322 00:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:19:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:19:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:19:13.420089  543705 net.go:648] Add success.
I0322 00:19:13.422550  543705 net.go:770] primary dev: ETH0
I0322 00:19:13.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:19:13.422578  543705 net.go:698] Add success.
I0322 00:19:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:19:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:19:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 00:19:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:19:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 00:19:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:19:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:19:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:19:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:19:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:19:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:19:23.409784  543705 memory.go:184] no items to output this cycle
I0322 00:19:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 00:19:32.117678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:19:32.120234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:19:32.120240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba80 0xc0001abac0]
E0322 00:19:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:19:33.409795  543705 memory.go:184] no items to output this cycle
I0322 00:19:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:19:43.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:19:43.409881  543705 memory.go:191] Add success.
I0322 00:19:43.409958  543705 cpu.go:282] Add success.
I0322 00:19:43.419751  543705 net.go:648] Add success.
I0322 00:19:43.422260  543705 net.go:770] primary dev: ETH0
I0322 00:19:43.422273  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:19:43.422285  543705 net.go:698] Add success.
I0322 00:19:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:19:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:19:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:19:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:19:53.409772  543705 memory.go:184] no items to output this cycle
I0322 00:19:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 00:20:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:20:03.409796  543705 memory.go:184] no items to output this cycle
I0322 00:20:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 00:20:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:20:13.409772  543705 memory.go:191] Add success.
W0322 00:20:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:20:13.409798  543705 cpu.go:282] Add success.
W0322 00:20:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:20:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:20:13.420106  543705 net.go:648] Add success.
I0322 00:20:13.422690  543705 net.go:770] primary dev: ETH0
I0322 00:20:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:20:13.422720  543705 net.go:698] Add success.
I0322 00:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:20:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:20:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 00:20:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:20:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 00:20:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:20:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:20:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:20:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:20:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:20:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:20:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:20:23.409768  543705 memory.go:184] no items to output this cycle
I0322 00:20:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 00:20:32.121678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:20:32.124168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:20:32.124174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae80 0xc00007aec0]
E0322 00:20:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:20:33.409791  543705 memory.go:184] no items to output this cycle
I0322 00:20:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:20:43.409789  543705 memory.go:191] Add success.
I0322 00:20:43.409813  543705 cpu.go:282] Add success.
I0322 00:20:43.419749  543705 net.go:648] Add success.
I0322 00:20:43.422515  543705 net.go:770] primary dev: ETH0
I0322 00:20:43.422529  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:20:43.422542  543705 net.go:698] Add success.
I0322 00:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:20:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:20:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:20:53.409776  543705 memory.go:184] no items to output this cycle
I0322 00:20:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 00:21:03.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:21:03.409757  543705 memory.go:184] no items to output this cycle
I0322 00:21:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 00:21:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:21:13.409783  543705 memory.go:191] Add success.
I0322 00:21:13.409802  543705 cpu.go:282] Add success.
W0322 00:21:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:21:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:21:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:21:13.420064  543705 net.go:648] Add success.
I0322 00:21:13.422734  543705 net.go:770] primary dev: ETH0
I0322 00:21:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:21:13.422763  543705 net.go:698] Add success.
I0322 00:21:13.468565  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a8af3f25-0545-4fd6-bbb8-c0e53ea92c7a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:21:13.468598  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:21:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:21:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:21:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 00:21:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:21:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 00:21:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:21:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:21:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:21:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:21:23.409806  543705 memory.go:184] no items to output this cycle
I0322 00:21:23.409820  543705 cpu.go:275] no items to output this cycle
I0322 00:21:32.125684  543705 disk_info.go:125] begin check local disk info of client
I0322 00:21:32.128283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:21:32.128291  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a440 0xc00047a480]
E0322 00:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:21:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 00:21:33.409788  543705 memory.go:184] no items to output this cycle
I0322 00:21:39.410816  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:21:39.410825  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:21:43.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:21:43.410602  543705 memory.go:191] Add success.
I0322 00:21:43.409994  543705 cpu.go:282] Add success.
I0322 00:21:43.419727  543705 net.go:648] Add success.
I0322 00:21:43.422012  543705 net.go:770] primary dev: ETH0
I0322 00:21:43.422026  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:21:43.422037  543705 net.go:698] Add success.
I0322 00:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:21:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:21:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:21:53.409787  543705 memory.go:184] no items to output this cycle
I0322 00:21:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 00:22:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:22:03.409788  543705 memory.go:184] no items to output this cycle
I0322 00:22:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 00:22:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:22:13.409794  543705 memory.go:191] Add success.
W0322 00:22:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:22:13.409821  543705 cpu.go:282] Add success.
W0322 00:22:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:22:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:22:13.420263  543705 net.go:648] Add success.
I0322 00:22:13.423330  543705 net.go:770] primary dev: ETH0
I0322 00:22:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:22:13.423354  543705 net.go:698] Add success.
W0322 00:22:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:22:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 00:22:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:22:14.456920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:22:14.456930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:22:14.456936  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:22:14.456992  543705 disk_worker.go:494] system disk:vda1
I0322 00:22:14.457023  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:22:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:22:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:22:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:22:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:22:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:22:16.458037  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:22:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:22:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:22:23.409786  543705 memory.go:184] no items to output this cycle
I0322 00:22:23.409814  543705 cpu.go:275] no items to output this cycle
I0322 00:22:32.129676  543705 disk_info.go:125] begin check local disk info of client
I0322 00:22:32.132194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:22:32.132200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab1c0 0xc0001ab200]
E0322 00:22:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:22:33.409784  543705 memory.go:184] no items to output this cycle
I0322 00:22:33.409816  543705 cpu.go:275] no items to output this cycle
E0322 00:22:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:22:43.409890  543705 memory.go:191] Add success.
I0322 00:22:43.409955  543705 cpu.go:282] Add success.
I0322 00:22:43.419722  543705 net.go:648] Add success.
I0322 00:22:43.422442  543705 net.go:770] primary dev: ETH0
I0322 00:22:43.422457  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:22:43.422472  543705 net.go:698] Add success.
I0322 00:22:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:22:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:22:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:22:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:22:53.409808  543705 memory.go:184] no items to output this cycle
I0322 00:22:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 00:23:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:23:03.409775  543705 memory.go:184] no items to output this cycle
I0322 00:23:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 00:23:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:23:13.409832  543705 memory.go:191] Add success.
I0322 00:23:13.409834  543705 cpu.go:282] Add success.
W0322 00:23:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:23:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:23:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:23:13.420127  543705 net.go:648] Add success.
I0322 00:23:13.423362  543705 net.go:770] primary dev: ETH0
I0322 00:23:13.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:23:13.423387  543705 net.go:698] Add success.
I0322 00:23:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:23:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:23:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 00:23:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:23:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 00:23:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:23:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:23:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:23:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:23:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:23:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:23:23.409798  543705 memory.go:184] no items to output this cycle
I0322 00:23:23.409822  543705 cpu.go:275] no items to output this cycle
I0322 00:23:32.133679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:23:32.136223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:23:32.136229  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5600 0xc0000c5640]
E0322 00:23:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:23:33.409793  543705 memory.go:184] no items to output this cycle
I0322 00:23:33.409816  543705 cpu.go:275] no items to output this cycle
E0322 00:23:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:23:43.409801  543705 memory.go:191] Add success.
I0322 00:23:43.409807  543705 cpu.go:282] Add success.
I0322 00:23:43.420357  543705 net.go:648] Add success.
I0322 00:23:43.423010  543705 net.go:770] primary dev: ETH0
I0322 00:23:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:23:43.423039  543705 net.go:698] Add success.
I0322 00:23:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:23:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:23:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:23:53.409802  543705 memory.go:184] no items to output this cycle
I0322 00:23:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 00:24:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:24:03.409788  543705 memory.go:184] no items to output this cycle
I0322 00:24:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 00:24:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:24:13.409810  543705 memory.go:191] Add success.
I0322 00:24:13.409819  543705 cpu.go:282] Add success.
W0322 00:24:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:24:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:24:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:24:13.420156  543705 net.go:648] Add success.
I0322 00:24:13.422991  543705 net.go:770] primary dev: ETH0
I0322 00:24:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:24:13.423021  543705 net.go:698] Add success.
I0322 00:24:13.468463  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"588176ae-30ec-45f8-868d-3d48a74ba4a6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:24:13.468496  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:24:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:24:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:24:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 00:24:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:24:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 00:24:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:24:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:24:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:24:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:24:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 00:24:23.409789  543705 memory.go:184] no items to output this cycle
I0322 00:24:32.137676  543705 disk_info.go:125] begin check local disk info of client
I0322 00:24:32.140215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:24:32.140222  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305bc0 0xc000305c00]
E0322 00:24:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:24:33.409759  543705 memory.go:184] no items to output this cycle
I0322 00:24:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 00:24:39.411856  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:24:39.411864  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:24:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:24:43.410642  543705 memory.go:191] Add success.
I0322 00:24:43.409801  543705 cpu.go:282] Add success.
I0322 00:24:43.420483  543705 net.go:648] Add success.
I0322 00:24:43.423336  543705 net.go:770] primary dev: ETH0
I0322 00:24:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:24:43.423366  543705 net.go:698] Add success.
I0322 00:24:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:24:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:24:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:24:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:24:53.409804  543705 memory.go:184] no items to output this cycle
I0322 00:24:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 00:25:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:25:03.409764  543705 memory.go:184] no items to output this cycle
I0322 00:25:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 00:25:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:25:13.409778  543705 memory.go:191] Add success.
W0322 00:25:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:25:13.409806  543705 cpu.go:282] Add success.
W0322 00:25:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:25:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:25:13.420057  543705 net.go:648] Add success.
I0322 00:25:13.422789  543705 net.go:770] primary dev: ETH0
I0322 00:25:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:25:13.422819  543705 net.go:698] Add success.
I0322 00:25:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:25:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:25:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 00:25:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:25:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 00:25:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:25:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:25:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:25:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:25:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:25:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:25:23.409777  543705 memory.go:184] no items to output this cycle
I0322 00:25:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 00:25:32.141684  543705 disk_info.go:125] begin check local disk info of client
I0322 00:25:32.144210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:25:32.144218  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bc40 0xc00007bc80]
E0322 00:25:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:25:33.409791  543705 memory.go:184] no items to output this cycle
I0322 00:25:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:25:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:25:43.409803  543705 memory.go:191] Add success.
I0322 00:25:43.409805  543705 cpu.go:282] Add success.
I0322 00:25:43.419839  543705 net.go:648] Add success.
I0322 00:25:43.422467  543705 net.go:770] primary dev: ETH0
I0322 00:25:43.422480  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:25:43.422492  543705 net.go:698] Add success.
I0322 00:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:25:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:25:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:25:53.409796  543705 memory.go:184] no items to output this cycle
I0322 00:25:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 00:26:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:26:03.409771  543705 memory.go:184] no items to output this cycle
I0322 00:26:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 00:26:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:26:13.409796  543705 memory.go:191] Add success.
I0322 00:26:13.409796  543705 cpu.go:282] Add success.
W0322 00:26:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:26:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:26:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:26:13.420043  543705 net.go:648] Add success.
I0322 00:26:13.422814  543705 net.go:770] primary dev: ETH0
I0322 00:26:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:26:13.422838  543705 net.go:698] Add success.
I0322 00:26:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:26:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:26:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 00:26:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:26:14.456618  543705 disk_worker.go:494] system disk:vda1
I0322 00:26:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:26:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:26:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:26:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:26:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:26:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:26:23.409771  543705 memory.go:184] no items to output this cycle
I0322 00:26:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 00:26:32.145678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:26:32.148169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:26:32.148176  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1c80 0xc0003b1cc0]
E0322 00:26:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:26:33.409796  543705 memory.go:184] no items to output this cycle
I0322 00:26:33.409817  543705 cpu.go:275] no items to output this cycle
E0322 00:26:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:26:43.409819  543705 memory.go:191] Add success.
I0322 00:26:43.409824  543705 cpu.go:282] Add success.
I0322 00:26:43.420054  543705 net.go:648] Add success.
I0322 00:26:43.423102  543705 net.go:770] primary dev: ETH0
I0322 00:26:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:26:43.423129  543705 net.go:698] Add success.
I0322 00:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:26:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:26:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:26:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:26:53.409798  543705 memory.go:184] no items to output this cycle
I0322 00:26:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 00:27:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:27:03.409780  543705 memory.go:184] no items to output this cycle
I0322 00:27:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 00:27:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:27:13.409778  543705 memory.go:191] Add success.
W0322 00:27:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:27:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:27:13.409814  543705 cpu.go:282] Add success.
I0322 00:27:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:27:13.420138  543705 net.go:648] Add success.
I0322 00:27:13.422579  543705 net.go:770] primary dev: ETH0
I0322 00:27:13.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:27:13.422604  543705 net.go:698] Add success.
I0322 00:27:13.428687  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 00:27:13.452931  543705 event_worker.go:152] Polling the log file for events...
I0322 00:27:13.761177  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"17789dc2-7bb6-4b60-9f4a-5604c4fa0ff6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:27:13.761210  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 00:27:14.454952  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:27:14.454965  543705 disk_worker.go:708] disk space is not compliant
W0322 00:27:14.454969  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:27:14.455605  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:27:14.455613  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:27:14.455619  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:27:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 00:27:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:27:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:27:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:27:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:27:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:27:16.457967  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:27:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:27:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:27:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:27:23.409801  543705 memory.go:184] no items to output this cycle
I0322 00:27:23.409808  543705 cpu.go:275] no items to output this cycle
I0322 00:27:32.149687  543705 disk_info.go:125] begin check local disk info of client
I0322 00:27:32.152259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:27:32.152266  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039bf00 0xc00039bf40]
E0322 00:27:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:27:33.409777  543705 memory.go:184] no items to output this cycle
I0322 00:27:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 00:27:39.412857  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:27:39.412864  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:27:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:27:43.410735  543705 memory.go:191] Add success.
I0322 00:27:43.409818  543705 cpu.go:282] Add success.
I0322 00:27:43.420453  543705 net.go:648] Add success.
I0322 00:27:43.423108  543705 net.go:770] primary dev: ETH0
I0322 00:27:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:27:43.423139  543705 net.go:698] Add success.
I0322 00:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:27:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:27:53.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:27:53.410382  543705 cpu.go:275] no items to output this cycle
I0322 00:27:53.410384  543705 memory.go:184] no items to output this cycle
E0322 00:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:28:03.409786  543705 memory.go:184] no items to output this cycle
I0322 00:28:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 00:28:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:28:13.409790  543705 memory.go:191] Add success.
I0322 00:28:13.409797  543705 cpu.go:282] Add success.
W0322 00:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:28:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:28:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:28:13.420174  543705 net.go:648] Add success.
I0322 00:28:13.422688  543705 net.go:770] primary dev: ETH0
I0322 00:28:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:28:13.422717  543705 net.go:698] Add success.
I0322 00:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:28:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:28:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 00:28:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:28:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 00:28:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:28:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:28:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:28:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:28:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:28:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:28:23.409762  543705 memory.go:184] no items to output this cycle
I0322 00:28:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 00:28:32.153673  543705 disk_info.go:125] begin check local disk info of client
I0322 00:28:32.156153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:28:32.156159  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b180 0xc00007b1c0]
E0322 00:28:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:28:33.409768  543705 memory.go:184] no items to output this cycle
I0322 00:28:33.409876  543705 cpu.go:275] no items to output this cycle
E0322 00:28:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:28:43.409819  543705 memory.go:191] Add success.
I0322 00:28:43.409824  543705 cpu.go:282] Add success.
I0322 00:28:43.419893  543705 net.go:648] Add success.
I0322 00:28:43.422344  543705 net.go:770] primary dev: ETH0
I0322 00:28:43.422357  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:28:43.422369  543705 net.go:698] Add success.
I0322 00:28:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:28:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:28:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:28:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:28:53.409763  543705 memory.go:184] no items to output this cycle
I0322 00:28:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 00:29:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:29:03.409782  543705 memory.go:184] no items to output this cycle
I0322 00:29:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 00:29:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:29:13.409812  543705 memory.go:191] Add success.
I0322 00:29:13.409822  543705 cpu.go:282] Add success.
W0322 00:29:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:29:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:29:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:29:13.420143  543705 net.go:648] Add success.
I0322 00:29:13.422801  543705 net.go:770] primary dev: ETH0
I0322 00:29:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:29:13.422830  543705 net.go:698] Add success.
I0322 00:29:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:29:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:29:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 00:29:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:29:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 00:29:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:29:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:29:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:29:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:29:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:29:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:29:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:29:23.409783  543705 memory.go:184] no items to output this cycle
I0322 00:29:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 00:29:32.157679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:29:32.160301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:29:32.160307  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9980 0xc0004d99c0]
E0322 00:29:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:29:33.409789  543705 memory.go:184] no items to output this cycle
I0322 00:29:33.409791  543705 cpu.go:275] no items to output this cycle
E0322 00:29:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:29:43.409787  543705 memory.go:191] Add success.
I0322 00:29:43.409812  543705 cpu.go:282] Add success.
I0322 00:29:43.419877  543705 net.go:648] Add success.
I0322 00:29:43.422783  543705 net.go:770] primary dev: ETH0
I0322 00:29:43.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:29:43.422808  543705 net.go:698] Add success.
I0322 00:29:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:29:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:29:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:29:53.410417  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:29:53.410433  543705 memory.go:184] no items to output this cycle
I0322 00:29:53.410451  543705 cpu.go:275] no items to output this cycle
E0322 00:30:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:30:03.409811  543705 memory.go:184] no items to output this cycle
I0322 00:30:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 00:30:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:30:13.409784  543705 memory.go:191] Add success.
W0322 00:30:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:30:13.409812  543705 cpu.go:282] Add success.
W0322 00:30:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:30:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:30:13.420166  543705 net.go:648] Add success.
I0322 00:30:13.422972  543705 net.go:770] primary dev: ETH0
I0322 00:30:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:30:13.422999  543705 net.go:698] Add success.
I0322 00:30:13.463881  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"66c3c463-b335-44cc-bd2e-6eee5e70b293","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:30:13.463914  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:30:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:30:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:30:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 00:30:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:30:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 00:30:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:30:15.455606  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:30:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:30:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:30:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:30:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:30:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:30:23.409801  543705 memory.go:184] no items to output this cycle
I0322 00:30:23.409812  543705 cpu.go:275] no items to output this cycle
I0322 00:30:32.161694  543705 disk_info.go:125] begin check local disk info of client
I0322 00:30:32.164152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:30:32.164159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000556ec0 0xc000556f00]
E0322 00:30:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:30:33.409800  543705 memory.go:184] no items to output this cycle
I0322 00:30:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 00:30:39.413850  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:30:39.413858  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:30:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:30:43.410728  543705 memory.go:191] Add success.
I0322 00:30:43.409805  543705 cpu.go:282] Add success.
I0322 00:30:43.420416  543705 net.go:648] Add success.
I0322 00:30:43.423049  543705 net.go:770] primary dev: ETH0
I0322 00:30:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:30:43.423074  543705 net.go:698] Add success.
I0322 00:30:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:30:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:30:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:30:53.409915  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:30:53.409933  543705 memory.go:184] no items to output this cycle
I0322 00:30:53.409980  543705 cpu.go:275] no items to output this cycle
E0322 00:31:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:31:03.409793  543705 cpu.go:275] no items to output this cycle
I0322 00:31:03.409798  543705 memory.go:184] no items to output this cycle
E0322 00:31:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:31:13.409814  543705 memory.go:191] Add success.
I0322 00:31:13.409824  543705 cpu.go:282] Add success.
W0322 00:31:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:31:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:31:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:31:13.420156  543705 net.go:648] Add success.
I0322 00:31:13.422957  543705 net.go:770] primary dev: ETH0
I0322 00:31:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:31:13.422983  543705 net.go:698] Add success.
I0322 00:31:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:31:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:31:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 00:31:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:31:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 00:31:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:31:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:31:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:31:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:31:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:31:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:31:23.409785  543705 memory.go:184] no items to output this cycle
I0322 00:31:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 00:31:32.165679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:31:32.168143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:31:32.168150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8100 0xc0004d8140]
E0322 00:31:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:31:33.409785  543705 memory.go:184] no items to output this cycle
I0322 00:31:33.409786  543705 cpu.go:275] no items to output this cycle
E0322 00:31:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:31:43.409797  543705 memory.go:191] Add success.
I0322 00:31:43.409817  543705 cpu.go:282] Add success.
I0322 00:31:43.420013  543705 net.go:648] Add success.
I0322 00:31:43.422472  543705 net.go:770] primary dev: ETH0
I0322 00:31:43.422488  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:31:43.422502  543705 net.go:698] Add success.
I0322 00:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:31:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:31:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:31:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:31:53.409809  543705 memory.go:184] no items to output this cycle
I0322 00:31:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 00:32:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:32:03.409784  543705 memory.go:184] no items to output this cycle
I0322 00:32:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 00:32:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:32:13.409815  543705 memory.go:191] Add success.
I0322 00:32:13.409816  543705 cpu.go:282] Add success.
W0322 00:32:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:32:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:32:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:32:13.420179  543705 net.go:648] Add success.
I0322 00:32:13.422882  543705 net.go:770] primary dev: ETH0
I0322 00:32:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:32:13.422908  543705 net.go:698] Add success.
W0322 00:32:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:32:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 00:32:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:32:14.455886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:32:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:32:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:32:14.456763  543705 disk_worker.go:494] system disk:vda1
I0322 00:32:14.456792  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:32:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:32:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:32:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:32:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:32:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:32:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:32:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:32:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:32:23.409766  543705 memory.go:184] no items to output this cycle
I0322 00:32:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 00:32:32.169678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:32:32.172190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:32:32.172196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4580 0xc0000c45c0]
E0322 00:32:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:32:33.409764  543705 memory.go:184] no items to output this cycle
I0322 00:32:33.409799  543705 cpu.go:275] no items to output this cycle
E0322 00:32:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:32:43.409792  543705 memory.go:191] Add success.
I0322 00:32:43.409813  543705 cpu.go:282] Add success.
I0322 00:32:43.419970  543705 net.go:648] Add success.
I0322 00:32:43.422786  543705 net.go:770] primary dev: ETH0
I0322 00:32:43.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:32:43.422817  543705 net.go:698] Add success.
I0322 00:32:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:32:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:32:53.410329  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:32:53.410346  543705 memory.go:184] no items to output this cycle
I0322 00:32:53.410361  543705 cpu.go:275] no items to output this cycle
E0322 00:33:03.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:33:03.409885  543705 memory.go:184] no items to output this cycle
I0322 00:33:03.409969  543705 cpu.go:275] no items to output this cycle
E0322 00:33:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:33:13.409789  543705 cpu.go:282] Add success.
I0322 00:33:13.409790  543705 memory.go:191] Add success.
W0322 00:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:33:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:33:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:33:13.420123  543705 net.go:648] Add success.
I0322 00:33:13.422913  543705 net.go:770] primary dev: ETH0
I0322 00:33:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:33:13.422941  543705 net.go:698] Add success.
I0322 00:33:13.468632  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"46504c70-6f49-461e-8014-0a27f307745d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:33:13.468664  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:33:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:33:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:33:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 00:33:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:33:14.456686  543705 disk_worker.go:494] system disk:vda1
I0322 00:33:14.456716  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:33:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:33:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:33:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:33:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:33:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:33:23.410194  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:33:23.410210  543705 memory.go:184] no items to output this cycle
I0322 00:33:23.410222  543705 cpu.go:275] no items to output this cycle
I0322 00:33:32.173677  543705 disk_info.go:125] begin check local disk info of client
I0322 00:33:32.176269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:33:32.176276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab240 0xc0001ab280]
E0322 00:33:33.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:33:33.409762  543705 memory.go:184] no items to output this cycle
I0322 00:33:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 00:33:39.414863  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:33:39.414871  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:33:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:33:43.410638  543705 memory.go:191] Add success.
I0322 00:33:43.409809  543705 cpu.go:282] Add success.
I0322 00:33:43.420335  543705 net.go:648] Add success.
I0322 00:33:43.423236  543705 net.go:770] primary dev: ETH0
I0322 00:33:43.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:33:43.423261  543705 net.go:698] Add success.
I0322 00:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:33:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:33:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:33:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:33:53.410403  543705 memory.go:184] no items to output this cycle
I0322 00:33:53.410431  543705 cpu.go:275] no items to output this cycle
I0322 00:34:03.409904  543705 cpu.go:275] no items to output this cycle
E0322 00:34:03.409908  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:34:03.409978  543705 memory.go:184] no items to output this cycle
E0322 00:34:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:34:13.409807  543705 memory.go:191] Add success.
I0322 00:34:13.409819  543705 cpu.go:282] Add success.
W0322 00:34:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:34:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:34:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:34:13.420100  543705 net.go:648] Add success.
I0322 00:34:13.423091  543705 net.go:770] primary dev: ETH0
I0322 00:34:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:34:13.423116  543705 net.go:698] Add success.
I0322 00:34:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:34:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:34:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 00:34:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:34:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 00:34:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:34:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:34:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:34:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:34:23.409804  543705 memory.go:184] no items to output this cycle
I0322 00:34:23.409818  543705 cpu.go:275] no items to output this cycle
I0322 00:34:32.177676  543705 disk_info.go:125] begin check local disk info of client
I0322 00:34:32.180163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:34:32.180169  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf40 0xc0001aaf80]
E0322 00:34:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:34:33.409797  543705 memory.go:184] no items to output this cycle
I0322 00:34:33.409809  543705 cpu.go:275] no items to output this cycle
E0322 00:34:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:34:43.409792  543705 memory.go:191] Add success.
I0322 00:34:43.409808  543705 cpu.go:282] Add success.
I0322 00:34:43.419993  543705 net.go:648] Add success.
I0322 00:34:43.422886  543705 net.go:770] primary dev: ETH0
I0322 00:34:43.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:34:43.422911  543705 net.go:698] Add success.
I0322 00:34:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:34:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:34:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:34:53.409806  543705 memory.go:184] no items to output this cycle
I0322 00:34:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 00:35:03.409905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:35:03.409919  543705 cpu.go:275] no items to output this cycle
I0322 00:35:03.409976  543705 memory.go:184] no items to output this cycle
E0322 00:35:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:35:13.409783  543705 memory.go:191] Add success.
W0322 00:35:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:35:13.409810  543705 cpu.go:282] Add success.
W0322 00:35:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:35:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:35:13.420058  543705 net.go:648] Add success.
I0322 00:35:13.422744  543705 net.go:770] primary dev: ETH0
I0322 00:35:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:35:13.422768  543705 net.go:698] Add success.
I0322 00:35:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:35:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:35:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 00:35:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:35:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 00:35:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:35:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:35:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:35:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:35:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:35:23.409797  543705 memory.go:184] no items to output this cycle
I0322 00:35:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 00:35:32.181675  543705 disk_info.go:125] begin check local disk info of client
I0322 00:35:32.184410  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:35:32.184417  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad80 0xc0001aadc0]
E0322 00:35:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:35:33.409762  543705 memory.go:184] no items to output this cycle
I0322 00:35:33.409784  543705 cpu.go:275] no items to output this cycle
E0322 00:35:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:35:43.409782  543705 memory.go:191] Add success.
I0322 00:35:43.409808  543705 cpu.go:282] Add success.
I0322 00:35:43.419862  543705 net.go:648] Add success.
I0322 00:35:43.422297  543705 net.go:770] primary dev: ETH0
I0322 00:35:43.422310  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:35:43.422323  543705 net.go:698] Add success.
I0322 00:35:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:35:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:35:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:35:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:35:53.409779  543705 memory.go:184] no items to output this cycle
I0322 00:35:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 00:36:03.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:36:03.409872  543705 memory.go:184] no items to output this cycle
I0322 00:36:03.410016  543705 cpu.go:275] no items to output this cycle
E0322 00:36:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:36:13.409782  543705 memory.go:191] Add success.
W0322 00:36:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:36:13.409813  543705 cpu.go:282] Add success.
W0322 00:36:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:36:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:36:13.420140  543705 net.go:648] Add success.
I0322 00:36:13.422710  543705 net.go:770] primary dev: ETH0
I0322 00:36:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:36:13.422739  543705 net.go:698] Add success.
I0322 00:36:13.645669  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"392104e1-c9f0-47e9-a991-42374edd52c2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:36:13.645702  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:36:14.454690  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:36:14.454875  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:36:14.454885  543705 disk_worker.go:708] disk space is not compliant
W0322 00:36:14.454887  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:36:14.456241  543705 disk_worker.go:494] system disk:vda1
I0322 00:36:14.456298  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:36:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:36:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:36:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:36:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:36:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:36:23.409796  543705 memory.go:184] no items to output this cycle
I0322 00:36:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 00:36:32.185677  543705 disk_info.go:125] begin check local disk info of client
I0322 00:36:32.188252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:36:32.188259  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e2c0 0xc00028e300]
E0322 00:36:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:36:33.409777  543705 memory.go:184] no items to output this cycle
I0322 00:36:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 00:36:39.415864  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:36:39.415871  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:36:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:36:43.410836  543705 memory.go:191] Add success.
I0322 00:36:43.409825  543705 cpu.go:282] Add success.
I0322 00:36:43.420554  543705 net.go:648] Add success.
I0322 00:36:43.423212  543705 net.go:770] primary dev: ETH0
I0322 00:36:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:36:43.423240  543705 net.go:698] Add success.
I0322 00:36:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:36:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:36:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:36:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:36:53.409775  543705 memory.go:184] no items to output this cycle
I0322 00:36:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 00:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:37:03.409784  543705 memory.go:184] no items to output this cycle
I0322 00:37:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 00:37:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:37:13.409805  543705 memory.go:191] Add success.
I0322 00:37:13.409810  543705 cpu.go:282] Add success.
W0322 00:37:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:37:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:37:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:37:13.420057  543705 net.go:648] Add success.
I0322 00:37:13.422722  543705 net.go:770] primary dev: ETH0
I0322 00:37:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:37:13.422746  543705 net.go:698] Add success.
I0322 00:37:13.453295  543705 event_worker.go:152] Polling the log file for events...
W0322 00:37:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:37:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 00:37:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:37:14.456135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:37:14.456144  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:37:14.456150  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:37:14.456450  543705 disk_worker.go:494] system disk:vda1
I0322 00:37:14.456479  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:37:15.456763  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:37:15.456771  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:37:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:37:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:37:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:37:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:37:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:37:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:37:23.409770  543705 memory.go:184] no items to output this cycle
I0322 00:37:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 00:37:32.189678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:37:32.192219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:37:32.192226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027af00 0xc00027af40]
E0322 00:37:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:37:33.409794  543705 memory.go:184] no items to output this cycle
I0322 00:37:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:37:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:37:43.409786  543705 memory.go:191] Add success.
I0322 00:37:43.409811  543705 cpu.go:282] Add success.
I0322 00:37:43.419878  543705 net.go:648] Add success.
I0322 00:37:43.423014  543705 net.go:770] primary dev: ETH0
I0322 00:37:43.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:37:43.423039  543705 net.go:698] Add success.
I0322 00:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:37:53.410428  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:37:53.410446  543705 memory.go:184] no items to output this cycle
I0322 00:37:53.410458  543705 cpu.go:275] no items to output this cycle
E0322 00:38:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:38:03.409771  543705 memory.go:184] no items to output this cycle
I0322 00:38:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 00:38:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:38:13.409810  543705 memory.go:191] Add success.
I0322 00:38:13.409823  543705 cpu.go:282] Add success.
W0322 00:38:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:38:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:38:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:38:13.420256  543705 net.go:648] Add success.
I0322 00:38:13.422653  543705 net.go:770] primary dev: ETH0
I0322 00:38:13.422665  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:38:13.422676  543705 net.go:698] Add success.
I0322 00:38:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:38:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:38:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 00:38:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:38:14.456527  543705 disk_worker.go:494] system disk:vda1
I0322 00:38:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:38:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:38:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:38:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:38:23.409774  543705 memory.go:184] no items to output this cycle
I0322 00:38:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 00:38:32.193678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:38:32.196183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:38:32.196189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b680 0xc00007b6c0]
E0322 00:38:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:38:33.409794  543705 memory.go:184] no items to output this cycle
I0322 00:38:33.409811  543705 cpu.go:275] no items to output this cycle
E0322 00:38:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:38:43.409796  543705 memory.go:191] Add success.
I0322 00:38:43.409799  543705 cpu.go:282] Add success.
I0322 00:38:43.419836  543705 net.go:648] Add success.
I0322 00:38:43.422665  543705 net.go:770] primary dev: ETH0
I0322 00:38:43.422678  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:38:43.422690  543705 net.go:698] Add success.
I0322 00:38:46.458104  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:38:46.458168  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:38:46.458195  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:38:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:38:53.409762  543705 memory.go:184] no items to output this cycle
I0322 00:38:53.409796  543705 cpu.go:275] no items to output this cycle
I0322 00:39:03.409926  543705 cpu.go:275] no items to output this cycle
E0322 00:39:03.409995  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:39:03.410012  543705 memory.go:184] no items to output this cycle
E0322 00:39:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:39:13.409792  543705 memory.go:191] Add success.
I0322 00:39:13.409808  543705 cpu.go:282] Add success.
W0322 00:39:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:39:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:39:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:39:13.420193  543705 net.go:648] Add success.
I0322 00:39:13.422986  543705 net.go:770] primary dev: ETH0
I0322 00:39:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:39:13.423019  543705 net.go:698] Add success.
I0322 00:39:13.468609  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c30ae449-df4b-4070-95fe-d78bc9ec52e9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:39:13.468641  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:39:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:39:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:39:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 00:39:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:39:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 00:39:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:39:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:39:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:39:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:39:23.409772  543705 memory.go:184] no items to output this cycle
I0322 00:39:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 00:39:32.197681  543705 disk_info.go:125] begin check local disk info of client
I0322 00:39:32.200258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:39:32.200264  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b5c0 0xc00007b600]
E0322 00:39:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:39:33.409783  543705 memory.go:184] no items to output this cycle
I0322 00:39:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 00:39:39.416851  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:39:39.416859  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:39:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:39:43.410842  543705 memory.go:191] Add success.
I0322 00:39:43.409800  543705 cpu.go:282] Add success.
I0322 00:39:43.420542  543705 net.go:648] Add success.
I0322 00:39:43.423120  543705 net.go:770] primary dev: ETH0
I0322 00:39:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:39:43.423147  543705 net.go:698] Add success.
I0322 00:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:39:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:39:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:39:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:39:53.409780  543705 memory.go:184] no items to output this cycle
I0322 00:39:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 00:40:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:40:03.409800  543705 memory.go:184] no items to output this cycle
I0322 00:40:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 00:40:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:40:13.409787  543705 memory.go:191] Add success.
I0322 00:40:13.409788  543705 cpu.go:282] Add success.
W0322 00:40:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:40:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:40:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:40:13.420136  543705 net.go:648] Add success.
I0322 00:40:13.423041  543705 net.go:770] primary dev: ETH0
I0322 00:40:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:40:13.423069  543705 net.go:698] Add success.
I0322 00:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:40:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:40:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 00:40:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:40:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 00:40:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:40:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:40:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:40:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:40:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:40:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:40:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:40:23.409779  543705 memory.go:184] no items to output this cycle
I0322 00:40:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 00:40:32.201679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:40:32.204198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:40:32.204205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000292680 0xc0002926c0]
E0322 00:40:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:40:33.409791  543705 memory.go:184] no items to output this cycle
I0322 00:40:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:40:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:40:43.409825  543705 memory.go:191] Add success.
I0322 00:40:43.409835  543705 cpu.go:282] Add success.
I0322 00:40:43.419971  543705 net.go:648] Add success.
I0322 00:40:43.422607  543705 net.go:770] primary dev: ETH0
I0322 00:40:43.422621  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:40:43.422633  543705 net.go:698] Add success.
I0322 00:40:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:40:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:40:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:40:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:40:53.409781  543705 memory.go:184] no items to output this cycle
I0322 00:40:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 00:41:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:41:03.409875  543705 cpu.go:275] no items to output this cycle
I0322 00:41:03.409893  543705 memory.go:184] no items to output this cycle
E0322 00:41:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:41:13.409784  543705 memory.go:191] Add success.
I0322 00:41:13.409802  543705 cpu.go:282] Add success.
W0322 00:41:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:41:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:41:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:41:13.420215  543705 net.go:648] Add success.
I0322 00:41:13.423149  543705 net.go:770] primary dev: ETH0
I0322 00:41:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:41:13.423178  543705 net.go:698] Add success.
I0322 00:41:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:41:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:41:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0322 00:41:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:41:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 00:41:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:41:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:41:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:41:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:41:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:41:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:41:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:41:23.409794  543705 memory.go:184] no items to output this cycle
I0322 00:41:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 00:41:32.205680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:41:32.208217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:41:32.208223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa100 0xc0001aa140]
E0322 00:41:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:41:33.409789  543705 memory.go:184] no items to output this cycle
I0322 00:41:33.409806  543705 cpu.go:275] no items to output this cycle
E0322 00:41:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:41:43.409794  543705 memory.go:191] Add success.
I0322 00:41:43.409810  543705 cpu.go:282] Add success.
I0322 00:41:43.419872  543705 net.go:648] Add success.
I0322 00:41:43.422634  543705 net.go:770] primary dev: ETH0
I0322 00:41:43.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:41:43.422659  543705 net.go:698] Add success.
I0322 00:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:41:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:41:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:41:53.410618  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:41:53.410633  543705 memory.go:184] no items to output this cycle
I0322 00:41:53.410646  543705 cpu.go:275] no items to output this cycle
E0322 00:42:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:42:03.409776  543705 memory.go:184] no items to output this cycle
I0322 00:42:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 00:42:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:42:13.409772  543705 memory.go:191] Add success.
W0322 00:42:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 00:42:13.409804  543705 cpu.go:282] Add success.
W0322 00:42:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:42:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:42:13.420503  543705 net.go:648] Add success.
I0322 00:42:13.423185  543705 net.go:770] primary dev: ETH0
I0322 00:42:13.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:42:13.423209  543705 net.go:698] Add success.
I0322 00:42:13.468400  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"35b4a4d7-1595-44ec-9f04-ebf60f0b2be2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:42:13.468429  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 00:42:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:42:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 00:42:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:42:14.456145  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:42:14.456154  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:42:14.456159  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:42:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 00:42:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:42:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:42:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:42:16.457881  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:42:16.457881  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:42:16.457937  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:42:16.457956  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:42:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:42:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:42:23.409806  543705 memory.go:184] no items to output this cycle
I0322 00:42:23.409814  543705 cpu.go:275] no items to output this cycle
I0322 00:42:32.209676  543705 disk_info.go:125] begin check local disk info of client
I0322 00:42:32.212198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:42:32.212205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000551e80 0xc000551ec0]
E0322 00:42:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:42:33.409781  543705 memory.go:184] no items to output this cycle
I0322 00:42:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 00:42:39.417861  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:42:39.417869  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:42:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:42:43.409811  543705 memory.go:191] Add success.
I0322 00:42:43.409818  543705 cpu.go:282] Add success.
I0322 00:42:43.420075  543705 net.go:648] Add success.
I0322 00:42:43.421237  543705 net.go:770] primary dev: ETH0
I0322 00:42:43.421252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:42:43.421268  543705 net.go:698] Add success.
I0322 00:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:42:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:42:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:42:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:42:53.409800  543705 memory.go:184] no items to output this cycle
I0322 00:42:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 00:43:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:43:03.409777  543705 memory.go:184] no items to output this cycle
I0322 00:43:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 00:43:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:43:13.409782  543705 memory.go:191] Add success.
I0322 00:43:13.409804  543705 cpu.go:282] Add success.
W0322 00:43:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:43:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:43:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:43:13.420156  543705 net.go:648] Add success.
I0322 00:43:13.423057  543705 net.go:770] primary dev: ETH0
I0322 00:43:13.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:43:13.423086  543705 net.go:698] Add success.
I0322 00:43:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:43:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:43:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 00:43:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:43:14.456489  543705 disk_worker.go:494] system disk:vda1
I0322 00:43:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:43:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:43:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:43:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:43:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:43:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:43:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:43:23.409769  543705 memory.go:184] no items to output this cycle
I0322 00:43:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 00:43:32.213678  543705 disk_info.go:125] begin check local disk info of client
I0322 00:43:32.216279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:43:32.216286  543705 disk_info.go:196] parse disk info done, disk is : [0xc000550a40 0xc000550a80]
E0322 00:43:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:43:33.409771  543705 memory.go:184] no items to output this cycle
I0322 00:43:33.409772  543705 cpu.go:275] no items to output this cycle
E0322 00:43:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:43:43.409815  543705 memory.go:191] Add success.
I0322 00:43:43.409830  543705 cpu.go:282] Add success.
I0322 00:43:43.419860  543705 net.go:648] Add success.
I0322 00:43:43.422517  543705 net.go:770] primary dev: ETH0
I0322 00:43:43.422532  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:43:43.422546  543705 net.go:698] Add success.
I0322 00:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:43:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:43:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:43:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:43:53.409762  543705 memory.go:184] no items to output this cycle
I0322 00:43:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 00:44:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:44:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 00:44:03.409780  543705 memory.go:184] no items to output this cycle
E0322 00:44:13.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:44:13.409886  543705 memory.go:191] Add success.
I0322 00:44:13.409900  543705 cpu.go:282] Add success.
W0322 00:44:13.409932  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:44:13.409949  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:44:13.409959  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:44:13.419714  543705 net.go:648] Add success.
I0322 00:44:13.422419  543705 net.go:770] primary dev: ETH0
I0322 00:44:13.422432  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:44:13.422443  543705 net.go:698] Add success.
I0322 00:44:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:44:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:44:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 00:44:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:44:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 00:44:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:44:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:44:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:44:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:44:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:44:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:44:23.409777  543705 memory.go:184] no items to output this cycle
I0322 00:44:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 00:44:32.217695  543705 disk_info.go:125] begin check local disk info of client
I0322 00:44:32.220189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:44:32.220196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c53c0 0xc0000c5400]
E0322 00:44:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:44:33.409777  543705 memory.go:184] no items to output this cycle
I0322 00:44:33.409777  543705 cpu.go:275] no items to output this cycle
E0322 00:44:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:44:43.409788  543705 memory.go:191] Add success.
I0322 00:44:43.409807  543705 cpu.go:282] Add success.
I0322 00:44:43.419877  543705 net.go:648] Add success.
I0322 00:44:43.422717  543705 net.go:770] primary dev: ETH0
I0322 00:44:43.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:44:43.422741  543705 net.go:698] Add success.
I0322 00:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:44:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:44:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:44:53.409765  543705 memory.go:184] no items to output this cycle
I0322 00:44:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 00:45:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:45:03.409788  543705 memory.go:184] no items to output this cycle
I0322 00:45:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 00:45:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:45:13.409806  543705 memory.go:191] Add success.
I0322 00:45:13.409815  543705 cpu.go:282] Add success.
W0322 00:45:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:45:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:45:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:45:13.420267  543705 net.go:648] Add success.
I0322 00:45:13.423164  543705 net.go:770] primary dev: ETH0
I0322 00:45:13.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:45:13.423192  543705 net.go:698] Add success.
I0322 00:45:13.463044  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a736baf-4b1e-4429-9efc-9b51ad3a97ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:45:13.463076  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:45:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:45:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:45:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 00:45:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:45:14.456748  543705 disk_worker.go:494] system disk:vda1
I0322 00:45:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:45:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:45:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:45:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:45:23.409773  543705 memory.go:184] no items to output this cycle
I0322 00:45:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 00:45:32.221684  543705 disk_info.go:125] begin check local disk info of client
I0322 00:45:32.224213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:45:32.224220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2400 0xc0003b2440]
E0322 00:45:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:45:33.409793  543705 memory.go:184] no items to output this cycle
I0322 00:45:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 00:45:39.418858  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:45:39.418866  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:45:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:45:43.410582  543705 memory.go:191] Add success.
I0322 00:45:43.409823  543705 cpu.go:282] Add success.
I0322 00:45:43.420296  543705 net.go:648] Add success.
I0322 00:45:43.422716  543705 net.go:770] primary dev: ETH0
I0322 00:45:43.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:45:43.422742  543705 net.go:698] Add success.
I0322 00:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:45:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:45:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:45:53.409771  543705 memory.go:184] no items to output this cycle
I0322 00:45:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 00:46:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:46:03.409785  543705 memory.go:184] no items to output this cycle
I0322 00:46:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 00:46:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:46:13.409801  543705 memory.go:191] Add success.
I0322 00:46:13.409806  543705 cpu.go:282] Add success.
W0322 00:46:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:46:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:46:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:46:13.420331  543705 net.go:648] Add success.
I0322 00:46:13.423087  543705 net.go:770] primary dev: ETH0
I0322 00:46:13.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:46:13.423109  543705 net.go:698] Add success.
I0322 00:46:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:46:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:46:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 00:46:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:46:14.456532  543705 disk_worker.go:494] system disk:vda1
I0322 00:46:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:46:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:46:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:46:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:46:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:46:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:46:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:46:23.409785  543705 memory.go:184] no items to output this cycle
I0322 00:46:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 00:46:32.225677  543705 disk_info.go:125] begin check local disk info of client
I0322 00:46:32.228225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:46:32.228232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f9c0 0xc00039fa00]
E0322 00:46:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:46:33.409776  543705 cpu.go:275] no items to output this cycle
I0322 00:46:33.409787  543705 memory.go:184] no items to output this cycle
E0322 00:46:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:46:43.409811  543705 memory.go:191] Add success.
I0322 00:46:43.409813  543705 cpu.go:282] Add success.
I0322 00:46:43.419987  543705 net.go:648] Add success.
I0322 00:46:43.422783  543705 net.go:770] primary dev: ETH0
I0322 00:46:43.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:46:43.422809  543705 net.go:698] Add success.
I0322 00:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:46:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:46:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:46:53.409775  543705 memory.go:184] no items to output this cycle
I0322 00:46:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 00:47:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:47:03.409788  543705 cpu.go:275] no items to output this cycle
I0322 00:47:03.409793  543705 memory.go:184] no items to output this cycle
E0322 00:47:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:47:13.409801  543705 memory.go:191] Add success.
I0322 00:47:13.409808  543705 cpu.go:282] Add success.
W0322 00:47:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:47:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:47:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:47:13.420038  543705 net.go:648] Add success.
I0322 00:47:13.422507  543705 net.go:770] primary dev: ETH0
I0322 00:47:13.422595  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:47:13.422609  543705 net.go:698] Add success.
I0322 00:47:13.452776  543705 event_worker.go:152] Polling the log file for events...
W0322 00:47:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:47:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 00:47:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:47:14.456788  543705 disk_worker.go:494] system disk:vda1
I0322 00:47:14.456825  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:47:14.457095  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:47:14.457103  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:47:14.457108  543705 custom_config.go:64] query custom config with name: gpu
E0322 00:47:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:47:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:47:16.458037  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:47:16.458037  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:47:16.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:47:16.458119  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:47:16.472510  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:47:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:47:23.409784  543705 memory.go:184] no items to output this cycle
I0322 00:47:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 00:47:32.229676  543705 disk_info.go:125] begin check local disk info of client
I0322 00:47:32.232230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:47:32.232237  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367180 0xc0003671c0]
E0322 00:47:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:47:33.409785  543705 memory.go:184] no items to output this cycle
I0322 00:47:33.409786  543705 cpu.go:275] no items to output this cycle
E0322 00:47:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:47:43.409797  543705 memory.go:191] Add success.
I0322 00:47:43.409826  543705 cpu.go:282] Add success.
I0322 00:47:43.419857  543705 net.go:648] Add success.
I0322 00:47:43.423065  543705 net.go:770] primary dev: ETH0
I0322 00:47:43.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:47:43.423095  543705 net.go:698] Add success.
I0322 00:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:47:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:47:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:47:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:47:53.409776  543705 memory.go:184] no items to output this cycle
I0322 00:47:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 00:48:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:48:03.409796  543705 memory.go:184] no items to output this cycle
I0322 00:48:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:48:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:48:13.409819  543705 memory.go:191] Add success.
I0322 00:48:13.409819  543705 cpu.go:282] Add success.
W0322 00:48:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:48:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:48:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:48:13.420134  543705 net.go:648] Add success.
I0322 00:48:13.423028  543705 net.go:770] primary dev: ETH0
I0322 00:48:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:48:13.423052  543705 net.go:698] Add success.
I0322 00:48:13.469546  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"51c324a0-39ea-4219-88cd-8ebe8f8bd142","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:48:13.469578  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:48:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:48:14.455353  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:48:14.455365  543705 disk_worker.go:708] disk space is not compliant
W0322 00:48:14.455368  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:48:14.457020  543705 disk_worker.go:494] system disk:vda1
I0322 00:48:14.457061  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:48:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:48:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:48:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:48:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:48:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:48:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 00:48:23.409786  543705 memory.go:184] no items to output this cycle
I0322 00:48:32.233673  543705 disk_info.go:125] begin check local disk info of client
I0322 00:48:32.236167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:48:32.236174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1580 0xc0003e15c0]
E0322 00:48:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:48:33.409775  543705 cpu.go:275] no items to output this cycle
I0322 00:48:33.409778  543705 memory.go:184] no items to output this cycle
I0322 00:48:39.419866  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:48:39.419874  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:48:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:48:43.410468  543705 memory.go:191] Add success.
I0322 00:48:43.409799  543705 cpu.go:282] Add success.
I0322 00:48:43.420180  543705 net.go:648] Add success.
I0322 00:48:43.422500  543705 net.go:770] primary dev: ETH0
I0322 00:48:43.422514  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:48:43.422527  543705 net.go:698] Add success.
I0322 00:48:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:48:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:48:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:48:53.409795  543705 memory.go:184] no items to output this cycle
I0322 00:48:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:49:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:49:03.409779  543705 memory.go:184] no items to output this cycle
I0322 00:49:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 00:49:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:49:13.409792  543705 memory.go:191] Add success.
I0322 00:49:13.409793  543705 cpu.go:282] Add success.
W0322 00:49:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:49:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:49:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:49:13.420051  543705 net.go:648] Add success.
I0322 00:49:13.423197  543705 net.go:770] primary dev: ETH0
I0322 00:49:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:49:13.423226  543705 net.go:698] Add success.
I0322 00:49:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:49:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:49:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 00:49:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:49:14.456605  543705 disk_worker.go:494] system disk:vda1
I0322 00:49:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:49:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:49:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:49:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:49:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:49:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:49:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:49:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 00:49:23.409801  543705 memory.go:184] no items to output this cycle
I0322 00:49:32.237676  543705 disk_info.go:125] begin check local disk info of client
I0322 00:49:32.240201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:49:32.240208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049aec0 0xc00049af00]
E0322 00:49:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:49:33.409775  543705 memory.go:184] no items to output this cycle
I0322 00:49:33.409783  543705 cpu.go:275] no items to output this cycle
E0322 00:49:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:49:43.409828  543705 memory.go:191] Add success.
I0322 00:49:43.409844  543705 cpu.go:282] Add success.
I0322 00:49:43.419981  543705 net.go:648] Add success.
I0322 00:49:43.423001  543705 net.go:770] primary dev: ETH0
I0322 00:49:43.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:49:43.423030  543705 net.go:698] Add success.
I0322 00:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:49:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:49:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:49:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:49:53.409791  543705 cpu.go:275] no items to output this cycle
I0322 00:49:53.409798  543705 memory.go:184] no items to output this cycle
E0322 00:50:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:50:03.409806  543705 memory.go:184] no items to output this cycle
I0322 00:50:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 00:50:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:50:13.409806  543705 memory.go:191] Add success.
I0322 00:50:13.409827  543705 cpu.go:282] Add success.
W0322 00:50:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:50:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:50:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:50:13.420220  543705 net.go:648] Add success.
I0322 00:50:13.423175  543705 net.go:770] primary dev: ETH0
I0322 00:50:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:50:13.423201  543705 net.go:698] Add success.
I0322 00:50:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:50:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:50:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 00:50:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:50:14.456506  543705 disk_worker.go:494] system disk:vda1
I0322 00:50:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:50:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:50:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:50:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:50:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:50:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:50:23.409780  543705 memory.go:184] no items to output this cycle
I0322 00:50:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 00:50:32.241679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:50:32.244203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:50:32.244209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0322 00:50:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:50:33.409804  543705 memory.go:184] no items to output this cycle
I0322 00:50:33.409820  543705 cpu.go:275] no items to output this cycle
E0322 00:50:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:50:43.409802  543705 memory.go:191] Add success.
I0322 00:50:43.409835  543705 cpu.go:282] Add success.
I0322 00:50:43.419899  543705 net.go:648] Add success.
I0322 00:50:43.422523  543705 net.go:770] primary dev: ETH0
I0322 00:50:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:50:43.422548  543705 net.go:698] Add success.
I0322 00:50:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:50:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:50:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:50:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:50:53.409807  543705 memory.go:184] no items to output this cycle
I0322 00:50:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 00:51:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:51:03.409778  543705 memory.go:184] no items to output this cycle
I0322 00:51:03.409777  543705 cpu.go:275] no items to output this cycle
E0322 00:51:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:51:13.409799  543705 memory.go:191] Add success.
I0322 00:51:13.409800  543705 cpu.go:282] Add success.
W0322 00:51:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:51:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:51:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:51:13.420143  543705 net.go:648] Add success.
I0322 00:51:13.422995  543705 net.go:770] primary dev: ETH0
I0322 00:51:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:51:13.423021  543705 net.go:698] Add success.
I0322 00:51:13.468621  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"991bdc61-7416-4c92-83d9-2da61c94ed88","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:51:13.468657  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:51:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:51:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:51:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 00:51:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:51:14.456539  543705 disk_worker.go:494] system disk:vda1
I0322 00:51:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:51:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:51:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:51:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:51:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 00:51:23.409793  543705 memory.go:184] no items to output this cycle
I0322 00:51:32.245679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:51:32.248236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:51:32.248242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367240 0xc000367280]
E0322 00:51:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:51:33.409791  543705 memory.go:184] no items to output this cycle
I0322 00:51:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 00:51:39.420866  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:51:39.420873  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:51:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:51:43.410607  543705 memory.go:191] Add success.
I0322 00:51:43.409803  543705 cpu.go:282] Add success.
I0322 00:51:43.420327  543705 net.go:648] Add success.
I0322 00:51:43.422937  543705 net.go:770] primary dev: ETH0
I0322 00:51:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:51:43.422966  543705 net.go:698] Add success.
I0322 00:51:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:51:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:51:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:51:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:51:53.409767  543705 memory.go:184] no items to output this cycle
I0322 00:51:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:52:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:52:03.409776  543705 memory.go:184] no items to output this cycle
I0322 00:52:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 00:52:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:52:13.409798  543705 memory.go:191] Add success.
I0322 00:52:13.409801  543705 cpu.go:282] Add success.
W0322 00:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:52:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:52:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:52:13.420075  543705 net.go:648] Add success.
I0322 00:52:13.422697  543705 net.go:770] primary dev: ETH0
I0322 00:52:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:52:13.422726  543705 net.go:698] Add success.
W0322 00:52:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:52:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 00:52:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:52:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 00:52:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:52:14.457773  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:52:14.457781  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:52:14.457786  543705 custom_config.go:64] query custom config with name: gpu
E0322 00:52:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:52:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:52:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:52:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:52:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:52:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:52:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:52:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:52:23.409769  543705 memory.go:184] no items to output this cycle
I0322 00:52:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 00:52:32.249685  543705 disk_info.go:125] begin check local disk info of client
I0322 00:52:32.252173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:52:32.252180  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366000 0xc000366040]
E0322 00:52:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:52:33.409793  543705 memory.go:184] no items to output this cycle
I0322 00:52:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:52:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:52:43.409793  543705 memory.go:191] Add success.
I0322 00:52:43.409813  543705 cpu.go:282] Add success.
I0322 00:52:43.419996  543705 net.go:648] Add success.
I0322 00:52:43.422654  543705 net.go:770] primary dev: ETH0
I0322 00:52:43.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:52:43.422687  543705 net.go:698] Add success.
I0322 00:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:52:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:52:53.409793  543705 memory.go:184] no items to output this cycle
I0322 00:52:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:53:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:53:03.409770  543705 memory.go:184] no items to output this cycle
I0322 00:53:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 00:53:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:53:13.409817  543705 memory.go:191] Add success.
I0322 00:53:13.409827  543705 cpu.go:282] Add success.
W0322 00:53:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:53:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:53:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:53:13.420168  543705 net.go:648] Add success.
I0322 00:53:13.423235  543705 net.go:770] primary dev: ETH0
I0322 00:53:13.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:53:13.423261  543705 net.go:698] Add success.
I0322 00:53:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:53:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:53:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 00:53:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:53:14.456506  543705 disk_worker.go:494] system disk:vda1
I0322 00:53:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:53:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:53:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:53:16.458164  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:53:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:53:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:53:23.409779  543705 memory.go:184] no items to output this cycle
I0322 00:53:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 00:53:32.253684  543705 disk_info.go:125] begin check local disk info of client
I0322 00:53:32.256230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:53:32.256237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2000 0xc0003b2040]
E0322 00:53:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:53:33.409788  543705 memory.go:184] no items to output this cycle
I0322 00:53:33.409803  543705 cpu.go:275] no items to output this cycle
E0322 00:53:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:53:43.409789  543705 memory.go:191] Add success.
I0322 00:53:43.409807  543705 cpu.go:282] Add success.
I0322 00:53:43.420033  543705 net.go:648] Add success.
I0322 00:53:43.422971  543705 net.go:770] primary dev: ETH0
I0322 00:53:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:53:43.423012  543705 net.go:698] Add success.
I0322 00:53:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:53:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:53:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:53:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:53:53.409797  543705 memory.go:184] no items to output this cycle
I0322 00:53:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 00:54:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:54:03.409769  543705 memory.go:184] no items to output this cycle
I0322 00:54:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 00:54:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:54:13.409813  543705 memory.go:191] Add success.
I0322 00:54:13.409820  543705 cpu.go:282] Add success.
W0322 00:54:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:54:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:54:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:54:13.420088  543705 net.go:648] Add success.
I0322 00:54:13.422781  543705 net.go:770] primary dev: ETH0
I0322 00:54:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:54:13.422806  543705 net.go:698] Add success.
I0322 00:54:13.468229  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"546ae1f0-dabe-406e-946e-5877b36b179b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:54:13.468263  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 00:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:54:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:54:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 00:54:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:54:14.456605  543705 disk_worker.go:494] system disk:vda1
I0322 00:54:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:54:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:54:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:54:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:54:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:54:23.409917  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:54:23.409998  543705 cpu.go:275] no items to output this cycle
I0322 00:54:23.410003  543705 memory.go:184] no items to output this cycle
I0322 00:54:32.257679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:54:32.260263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:54:32.260270  543705 disk_info.go:196] parse disk info done, disk is : [0xc000594d00 0xc000594d40]
E0322 00:54:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:54:33.409762  543705 memory.go:184] no items to output this cycle
I0322 00:54:33.409778  543705 cpu.go:275] no items to output this cycle
I0322 00:54:39.421872  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:54:39.421880  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:54:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:54:43.410502  543705 memory.go:191] Add success.
I0322 00:54:43.409827  543705 cpu.go:282] Add success.
I0322 00:54:43.420188  543705 net.go:648] Add success.
I0322 00:54:43.422589  543705 net.go:770] primary dev: ETH0
I0322 00:54:43.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:54:43.422615  543705 net.go:698] Add success.
I0322 00:54:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:54:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:54:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:54:53.409771  543705 memory.go:184] no items to output this cycle
I0322 00:54:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 00:55:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:55:03.409783  543705 cpu.go:275] no items to output this cycle
I0322 00:55:03.409786  543705 memory.go:184] no items to output this cycle
E0322 00:55:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:55:13.409818  543705 memory.go:191] Add success.
I0322 00:55:13.409824  543705 cpu.go:282] Add success.
W0322 00:55:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:55:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:55:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:55:13.420152  543705 net.go:648] Add success.
I0322 00:55:13.422812  543705 net.go:770] primary dev: ETH0
I0322 00:55:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:55:13.422838  543705 net.go:698] Add success.
I0322 00:55:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:55:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:55:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 00:55:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:55:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 00:55:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:55:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:55:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:55:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:55:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:55:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:55:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:55:23.409798  543705 memory.go:184] no items to output this cycle
I0322 00:55:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 00:55:32.261679  543705 disk_info.go:125] begin check local disk info of client
I0322 00:55:32.264252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:55:32.264260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003711c0 0xc000371200]
E0322 00:55:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:55:33.409794  543705 memory.go:184] no items to output this cycle
I0322 00:55:33.409809  543705 cpu.go:275] no items to output this cycle
E0322 00:55:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:55:43.409822  543705 memory.go:191] Add success.
I0322 00:55:43.409833  543705 cpu.go:282] Add success.
I0322 00:55:43.420006  543705 net.go:648] Add success.
I0322 00:55:43.422703  543705 net.go:770] primary dev: ETH0
I0322 00:55:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:55:43.422732  543705 net.go:698] Add success.
I0322 00:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:55:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:55:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:55:53.409791  543705 memory.go:184] no items to output this cycle
I0322 00:55:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 00:56:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:56:03.409772  543705 memory.go:184] no items to output this cycle
I0322 00:56:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 00:56:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:56:13.409811  543705 memory.go:191] Add success.
I0322 00:56:13.409819  543705 cpu.go:282] Add success.
W0322 00:56:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:56:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:56:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:56:13.420165  543705 net.go:648] Add success.
I0322 00:56:13.422917  543705 net.go:770] primary dev: ETH0
I0322 00:56:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:56:13.422947  543705 net.go:698] Add success.
I0322 00:56:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:56:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:56:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 00:56:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:56:14.456502  543705 disk_worker.go:494] system disk:vda1
I0322 00:56:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:56:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:56:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:56:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:56:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:56:23.409916  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:56:23.409932  543705 cpu.go:275] no items to output this cycle
I0322 00:56:23.409934  543705 memory.go:184] no items to output this cycle
I0322 00:56:32.265680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:56:32.268164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:56:32.268171  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470100 0xc000470140]
E0322 00:56:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:56:33.409772  543705 memory.go:184] no items to output this cycle
I0322 00:56:33.409776  543705 cpu.go:275] no items to output this cycle
E0322 00:56:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:56:43.409804  543705 memory.go:191] Add success.
I0322 00:56:43.409806  543705 cpu.go:282] Add success.
I0322 00:56:43.419991  543705 net.go:648] Add success.
I0322 00:56:43.422990  543705 net.go:770] primary dev: ETH0
I0322 00:56:43.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:56:43.423017  543705 net.go:698] Add success.
I0322 00:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:56:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:56:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:56:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:56:53.409796  543705 memory.go:184] no items to output this cycle
I0322 00:56:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 00:57:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:57:03.409778  543705 memory.go:184] no items to output this cycle
I0322 00:57:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 00:57:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:57:13.409787  543705 memory.go:191] Add success.
I0322 00:57:13.409789  543705 cpu.go:282] Add success.
W0322 00:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:57:13.420051  543705 net.go:648] Add success.
I0322 00:57:13.422575  543705 net.go:770] primary dev: ETH0
I0322 00:57:13.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:57:13.422611  543705 net.go:698] Add success.
I0322 00:57:13.428804  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 00:57:13.452977  543705 event_worker.go:152] Polling the log file for events...
I0322 00:57:13.592168  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2870870c-6d99-46d9-a0fa-f142f475931c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 00:57:13.592202  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 00:57:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:57:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0322 00:57:14.455238  543705 disk_worker.go:728] disk inode is not compliant
E0322 00:57:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 00:57:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 00:57:14.455898  543705 custom_config.go:64] query custom config with name: gpu
I0322 00:57:14.456818  543705 disk_worker.go:494] system disk:vda1
I0322 00:57:14.456865  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 00:57:15.456903  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 00:57:15.456912  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:57:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 00:57:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 00:57:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:57:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:57:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:57:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:57:23.409759  543705 memory.go:184] no items to output this cycle
I0322 00:57:23.409795  543705 cpu.go:275] no items to output this cycle
I0322 00:57:32.269681  543705 disk_info.go:125] begin check local disk info of client
I0322 00:57:32.272205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:57:32.272212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1440 0xc0003b1480]
E0322 00:57:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:57:33.409787  543705 memory.go:184] no items to output this cycle
I0322 00:57:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 00:57:39.422873  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 00:57:39.422881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 00:57:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:57:43.410631  543705 memory.go:191] Add success.
I0322 00:57:43.409812  543705 cpu.go:282] Add success.
I0322 00:57:43.420381  543705 net.go:648] Add success.
I0322 00:57:43.422960  543705 net.go:770] primary dev: ETH0
I0322 00:57:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:57:43.422987  543705 net.go:698] Add success.
I0322 00:57:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:57:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:57:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:57:53.409778  543705 memory.go:184] no items to output this cycle
I0322 00:57:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 00:58:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:58:03.409774  543705 memory.go:184] no items to output this cycle
I0322 00:58:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 00:58:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:58:13.409803  543705 memory.go:191] Add success.
I0322 00:58:13.409804  543705 cpu.go:282] Add success.
W0322 00:58:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:58:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:58:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:58:13.420179  543705 net.go:648] Add success.
I0322 00:58:13.423000  543705 net.go:770] primary dev: ETH0
I0322 00:58:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:58:13.423042  543705 net.go:698] Add success.
I0322 00:58:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:58:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:58:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 00:58:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:58:14.456858  543705 disk_worker.go:494] system disk:vda1
I0322 00:58:14.456888  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:58:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:58:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:58:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:58:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:58:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:58:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:58:23.409773  543705 memory.go:184] no items to output this cycle
I0322 00:58:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 00:58:32.275940  543705 disk_info.go:125] begin check local disk info of client
I0322 00:58:32.278409  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:58:32.278416  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6080 0xc0002b60c0]
E0322 00:58:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:58:33.409800  543705 memory.go:184] no items to output this cycle
I0322 00:58:33.409819  543705 cpu.go:275] no items to output this cycle
E0322 00:58:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:58:43.409812  543705 memory.go:191] Add success.
I0322 00:58:43.409845  543705 cpu.go:282] Add success.
I0322 00:58:43.420150  543705 net.go:648] Add success.
I0322 00:58:43.423120  543705 net.go:770] primary dev: ETH0
I0322 00:58:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:58:43.423146  543705 net.go:698] Add success.
I0322 00:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:58:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:58:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:58:53.409787  543705 memory.go:184] no items to output this cycle
I0322 00:58:53.409835  543705 cpu.go:275] no items to output this cycle
E0322 00:59:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:59:03.409777  543705 memory.go:184] no items to output this cycle
I0322 00:59:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 00:59:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:59:13.409811  543705 cpu.go:282] Add success.
I0322 00:59:13.409820  543705 memory.go:191] Add success.
W0322 00:59:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 00:59:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 00:59:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 00:59:13.420138  543705 net.go:648] Add success.
I0322 00:59:13.422912  543705 net.go:770] primary dev: ETH0
I0322 00:59:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:59:13.422937  543705 net.go:698] Add success.
I0322 00:59:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 00:59:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 00:59:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 00:59:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 00:59:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 00:59:14.456728  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 00:59:15.456008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 00:59:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:59:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 00:59:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 00:59:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:59:23.409825  543705 memory.go:184] no items to output this cycle
I0322 00:59:23.409832  543705 cpu.go:275] no items to output this cycle
I0322 00:59:32.281680  543705 disk_info.go:125] begin check local disk info of client
I0322 00:59:32.284218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 00:59:32.284225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b01c0 0xc0003b0200]
E0322 00:59:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:59:33.409776  543705 memory.go:184] no items to output this cycle
I0322 00:59:33.409805  543705 cpu.go:275] no items to output this cycle
E0322 00:59:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:59:43.409840  543705 memory.go:191] Add success.
I0322 00:59:43.409840  543705 cpu.go:282] Add success.
I0322 00:59:43.419967  543705 net.go:648] Add success.
I0322 00:59:43.422819  543705 net.go:770] primary dev: ETH0
I0322 00:59:43.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0322 00:59:43.422846  543705 net.go:698] Add success.
I0322 00:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 00:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 00:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 00:59:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 00:59:53.409802  543705 memory.go:184] no items to output this cycle
I0322 00:59:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 01:00:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:00:03.409819  543705 memory.go:184] no items to output this cycle
I0322 01:00:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 01:00:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:00:13.409775  543705 memory.go:191] Add success.
W0322 01:00:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:00:13.409808  543705 cpu.go:282] Add success.
W0322 01:00:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:00:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:00:13.420087  543705 net.go:648] Add success.
I0322 01:00:13.422733  543705 net.go:770] primary dev: ETH0
I0322 01:00:13.422746  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:00:13.422759  543705 net.go:698] Add success.
I0322 01:00:13.469855  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"62b51e8c-6733-4d64-8825-f0bbd131fcf9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:00:13.469889  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:00:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:00:14.455429  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:00:14.455442  543705 disk_worker.go:708] disk space is not compliant
W0322 01:00:14.455446  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:00:14.457057  543705 disk_worker.go:494] system disk:vda1
I0322 01:00:14.457087  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:00:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:00:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:00:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:00:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:00:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:00:23.409772  543705 memory.go:184] no items to output this cycle
I0322 01:00:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 01:00:32.285675  543705 disk_info.go:125] begin check local disk info of client
I0322 01:00:32.288116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:00:32.288122  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a300 0xc00048a340]
E0322 01:00:33.409738  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:00:33.409757  543705 memory.go:184] no items to output this cycle
I0322 01:00:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 01:00:39.423882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:00:39.423890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:00:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:00:43.410639  543705 memory.go:191] Add success.
I0322 01:00:43.409822  543705 cpu.go:282] Add success.
I0322 01:00:43.420324  543705 net.go:648] Add success.
I0322 01:00:43.423072  543705 net.go:770] primary dev: ETH0
I0322 01:00:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:00:43.423100  543705 net.go:698] Add success.
I0322 01:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:00:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:00:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:00:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:00:53.409800  543705 memory.go:184] no items to output this cycle
I0322 01:00:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 01:01:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:01:03.409800  543705 memory.go:184] no items to output this cycle
I0322 01:01:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 01:01:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:01:13.409808  543705 memory.go:191] Add success.
I0322 01:01:13.409817  543705 cpu.go:282] Add success.
W0322 01:01:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:01:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:01:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:01:13.420153  543705 net.go:648] Add success.
I0322 01:01:13.422789  543705 net.go:770] primary dev: ETH0
I0322 01:01:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:01:13.422812  543705 net.go:698] Add success.
I0322 01:01:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:01:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:01:14.455284  543705 disk_worker.go:708] disk space is not compliant
W0322 01:01:14.455289  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:01:14.456974  543705 disk_worker.go:494] system disk:vda1
I0322 01:01:14.457003  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:01:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:01:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:01:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:01:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:01:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:01:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:01:23.409777  543705 memory.go:184] no items to output this cycle
I0322 01:01:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 01:01:32.289679  543705 disk_info.go:125] begin check local disk info of client
I0322 01:01:32.292216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:01:32.292223  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046b080 0xc00046b280]
E0322 01:01:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:01:33.409759  543705 memory.go:184] no items to output this cycle
I0322 01:01:33.409792  543705 cpu.go:275] no items to output this cycle
E0322 01:01:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:01:43.409802  543705 memory.go:191] Add success.
I0322 01:01:43.409803  543705 cpu.go:282] Add success.
I0322 01:01:43.419873  543705 net.go:648] Add success.
I0322 01:01:43.422541  543705 net.go:770] primary dev: ETH0
I0322 01:01:43.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:01:43.422573  543705 net.go:698] Add success.
I0322 01:01:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:01:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:01:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:01:53.410350  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:01:53.410366  543705 memory.go:184] no items to output this cycle
I0322 01:01:53.410400  543705 cpu.go:275] no items to output this cycle
E0322 01:02:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:02:03.409782  543705 memory.go:184] no items to output this cycle
I0322 01:02:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:02:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:02:13.409811  543705 memory.go:191] Add success.
I0322 01:02:13.409814  543705 cpu.go:282] Add success.
W0322 01:02:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:02:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:02:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:02:13.420202  543705 net.go:648] Add success.
I0322 01:02:13.422790  543705 net.go:770] primary dev: ETH0
I0322 01:02:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:02:13.422819  543705 net.go:698] Add success.
W0322 01:02:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:02:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 01:02:14.455208  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:02:14.457408  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:02:14.457419  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:02:14.457426  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:02:14.457522  543705 disk_worker.go:494] system disk:vda1
I0322 01:02:14.457560  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:02:15.456254  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:02:15.456267  543705 custom_config.go:64] query custom config with name: huawei_npu
E0322 01:02:16.457504  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:02:16.458549  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:02:16.458604  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:02:16.458622  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:02:16.472985  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:02:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:02:23.409772  543705 memory.go:184] no items to output this cycle
I0322 01:02:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 01:02:32.293678  543705 disk_info.go:125] begin check local disk info of client
I0322 01:02:32.296194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:02:32.296201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049ab40 0xc00049ab80]
E0322 01:02:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:02:33.409771  543705 memory.go:184] no items to output this cycle
I0322 01:02:33.409777  543705 cpu.go:275] no items to output this cycle
E0322 01:02:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:02:43.409791  543705 memory.go:191] Add success.
I0322 01:02:43.409813  543705 cpu.go:282] Add success.
I0322 01:02:43.419894  543705 net.go:648] Add success.
I0322 01:02:43.422642  543705 net.go:770] primary dev: ETH0
I0322 01:02:43.422656  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:02:43.422684  543705 net.go:698] Add success.
I0322 01:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:02:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:02:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:02:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:02:53.409772  543705 memory.go:184] no items to output this cycle
I0322 01:02:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:03:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:03:03.409799  543705 memory.go:184] no items to output this cycle
I0322 01:03:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 01:03:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:03:13.409787  543705 memory.go:191] Add success.
I0322 01:03:13.409789  543705 cpu.go:282] Add success.
W0322 01:03:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:03:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:03:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:03:13.420043  543705 net.go:648] Add success.
I0322 01:03:13.422915  543705 net.go:770] primary dev: ETH0
I0322 01:03:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:03:13.422945  543705 net.go:698] Add success.
I0322 01:03:13.468798  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4daa1b30-2801-4f6c-9f1d-3b3c077495ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:03:13.468830  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:03:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:03:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 01:03:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:03:14.457545  543705 disk_worker.go:494] system disk:vda1
I0322 01:03:14.457575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:03:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:03:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:03:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:03:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:03:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:03:23.409765  543705 memory.go:184] no items to output this cycle
I0322 01:03:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 01:03:32.297686  543705 disk_info.go:125] begin check local disk info of client
I0322 01:03:32.300240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:03:32.300247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049a000 0xc00049a040]
E0322 01:03:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:03:33.409792  543705 memory.go:184] no items to output this cycle
I0322 01:03:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 01:03:39.424885  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:03:39.424893  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:03:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:03:43.410539  543705 memory.go:191] Add success.
I0322 01:03:43.409799  543705 cpu.go:282] Add success.
I0322 01:03:43.420252  543705 net.go:648] Add success.
I0322 01:03:43.422769  543705 net.go:770] primary dev: ETH0
I0322 01:03:43.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:03:43.422796  543705 net.go:698] Add success.
I0322 01:03:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:03:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:03:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:03:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:03:53.409793  543705 memory.go:184] no items to output this cycle
I0322 01:03:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 01:04:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:04:03.409775  543705 memory.go:184] no items to output this cycle
I0322 01:04:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 01:04:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:04:13.409807  543705 memory.go:191] Add success.
I0322 01:04:13.409817  543705 cpu.go:282] Add success.
W0322 01:04:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:04:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:04:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:04:13.420283  543705 net.go:648] Add success.
I0322 01:04:13.423067  543705 net.go:770] primary dev: ETH0
I0322 01:04:13.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:04:13.423092  543705 net.go:698] Add success.
I0322 01:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:04:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:04:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 01:04:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:04:14.456534  543705 disk_worker.go:494] system disk:vda1
I0322 01:04:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:04:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:04:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:04:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:04:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:04:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:04:23.409805  543705 memory.go:184] no items to output this cycle
I0322 01:04:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 01:04:32.301676  543705 disk_info.go:125] begin check local disk info of client
I0322 01:04:32.304171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:04:32.304178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003584c0 0xc000358500]
E0322 01:04:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:04:33.409766  543705 memory.go:184] no items to output this cycle
I0322 01:04:33.409774  543705 cpu.go:275] no items to output this cycle
E0322 01:04:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:04:43.409787  543705 memory.go:191] Add success.
I0322 01:04:43.409806  543705 cpu.go:282] Add success.
I0322 01:04:43.419969  543705 net.go:648] Add success.
I0322 01:04:43.422742  543705 net.go:770] primary dev: ETH0
I0322 01:04:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:04:43.422771  543705 net.go:698] Add success.
I0322 01:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:04:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:04:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:04:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:04:53.409771  543705 memory.go:184] no items to output this cycle
I0322 01:04:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 01:05:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:05:03.409811  543705 memory.go:184] no items to output this cycle
I0322 01:05:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 01:05:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:05:13.409776  543705 memory.go:191] Add success.
I0322 01:05:13.409796  543705 cpu.go:282] Add success.
W0322 01:05:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:05:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:05:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:05:13.420155  543705 net.go:648] Add success.
I0322 01:05:13.423038  543705 net.go:770] primary dev: ETH0
I0322 01:05:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:05:13.423062  543705 net.go:698] Add success.
I0322 01:05:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:05:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:05:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 01:05:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:05:14.456480  543705 disk_worker.go:494] system disk:vda1
I0322 01:05:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:05:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:05:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:05:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:05:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:05:23.409792  543705 memory.go:184] no items to output this cycle
I0322 01:05:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 01:05:32.305681  543705 disk_info.go:125] begin check local disk info of client
I0322 01:05:32.308300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:05:32.308307  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 01:05:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:05:33.409799  543705 memory.go:184] no items to output this cycle
I0322 01:05:33.409815  543705 cpu.go:275] no items to output this cycle
E0322 01:05:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:05:43.409794  543705 memory.go:191] Add success.
I0322 01:05:43.409810  543705 cpu.go:282] Add success.
I0322 01:05:43.420149  543705 net.go:648] Add success.
I0322 01:05:43.422869  543705 net.go:770] primary dev: ETH0
I0322 01:05:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:05:43.422898  543705 net.go:698] Add success.
I0322 01:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:05:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:05:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:05:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:05:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:05:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 01:06:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:06:03.409778  543705 memory.go:184] no items to output this cycle
I0322 01:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 01:06:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:06:13.409798  543705 memory.go:191] Add success.
I0322 01:06:13.409816  543705 cpu.go:282] Add success.
W0322 01:06:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:06:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:06:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:06:13.420316  543705 net.go:648] Add success.
I0322 01:06:13.423029  543705 net.go:770] primary dev: ETH0
I0322 01:06:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:06:13.423053  543705 net.go:698] Add success.
I0322 01:06:13.724059  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0ad4da40-726c-4640-85ba-e3a4c1243dc2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:06:13.724094  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:06:14.454716  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:06:14.454890  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:06:14.454966  543705 disk_worker.go:708] disk space is not compliant
W0322 01:06:14.454969  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:06:14.456480  543705 disk_worker.go:494] system disk:vda1
I0322 01:06:14.456507  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:06:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:06:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:06:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:06:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:06:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:06:23.409764  543705 memory.go:184] no items to output this cycle
I0322 01:06:23.409893  543705 cpu.go:275] no items to output this cycle
I0322 01:06:32.309683  543705 disk_info.go:125] begin check local disk info of client
I0322 01:06:32.312121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:06:32.312128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 01:06:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:06:33.409773  543705 memory.go:184] no items to output this cycle
I0322 01:06:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 01:06:39.425882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:06:39.425889  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:06:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:06:43.410492  543705 memory.go:191] Add success.
I0322 01:06:43.409815  543705 cpu.go:282] Add success.
I0322 01:06:43.420194  543705 net.go:648] Add success.
I0322 01:06:43.422770  543705 net.go:770] primary dev: ETH0
I0322 01:06:43.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:06:43.422795  543705 net.go:698] Add success.
I0322 01:06:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:06:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:06:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:06:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:06:53.409804  543705 memory.go:184] no items to output this cycle
I0322 01:06:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 01:07:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:07:03.409779  543705 memory.go:184] no items to output this cycle
I0322 01:07:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 01:07:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:07:13.409782  543705 memory.go:191] Add success.
W0322 01:07:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:07:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:07:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:07:13.409824  543705 cpu.go:282] Add success.
I0322 01:07:13.420243  543705 net.go:648] Add success.
I0322 01:07:13.423040  543705 net.go:770] primary dev: ETH0
I0322 01:07:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:07:13.423069  543705 net.go:698] Add success.
I0322 01:07:13.453623  543705 event_worker.go:152] Polling the log file for events...
W0322 01:07:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:07:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 01:07:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:07:14.456956  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:07:14.456965  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:07:14.456971  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:07:14.457020  543705 disk_worker.go:494] system disk:vda1
I0322 01:07:14.457061  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:07:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:07:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:07:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:07:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:07:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:07:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:07:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:07:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:07:23.409882  543705 memory.go:184] no items to output this cycle
I0322 01:07:23.409883  543705 cpu.go:275] no items to output this cycle
I0322 01:07:32.313681  543705 disk_info.go:125] begin check local disk info of client
I0322 01:07:32.316221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:07:32.316228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 01:07:33.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:07:33.409756  543705 memory.go:184] no items to output this cycle
I0322 01:07:33.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:07:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:07:43.409817  543705 memory.go:191] Add success.
I0322 01:07:43.409823  543705 cpu.go:282] Add success.
I0322 01:07:43.420057  543705 net.go:648] Add success.
I0322 01:07:43.422775  543705 net.go:770] primary dev: ETH0
I0322 01:07:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:07:43.422804  543705 net.go:698] Add success.
I0322 01:07:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:07:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:07:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:07:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:07:53.409769  543705 memory.go:184] no items to output this cycle
I0322 01:07:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 01:08:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:08:03.409782  543705 memory.go:184] no items to output this cycle
I0322 01:08:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 01:08:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:08:13.409788  543705 memory.go:191] Add success.
I0322 01:08:13.409790  543705 cpu.go:282] Add success.
W0322 01:08:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:08:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:08:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:08:13.420174  543705 net.go:648] Add success.
I0322 01:08:13.422938  543705 net.go:770] primary dev: ETH0
I0322 01:08:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:08:13.422966  543705 net.go:698] Add success.
I0322 01:08:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:08:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:08:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 01:08:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:08:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 01:08:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:08:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:08:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:08:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:08:23.409775  543705 memory.go:184] no items to output this cycle
I0322 01:08:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 01:08:32.317673  543705 disk_info.go:125] begin check local disk info of client
I0322 01:08:32.320110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:08:32.320116  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f380 0xc00035f3c0]
E0322 01:08:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:08:33.409899  543705 memory.go:184] no items to output this cycle
I0322 01:08:33.409901  543705 cpu.go:275] no items to output this cycle
E0322 01:08:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:08:43.409790  543705 memory.go:191] Add success.
I0322 01:08:43.409804  543705 cpu.go:282] Add success.
I0322 01:08:43.419858  543705 net.go:648] Add success.
I0322 01:08:43.422684  543705 net.go:770] primary dev: ETH0
I0322 01:08:43.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:08:43.422709  543705 net.go:698] Add success.
I0322 01:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:08:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:08:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:08:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:08:53.409766  543705 memory.go:184] no items to output this cycle
I0322 01:08:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 01:09:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:09:03.409780  543705 memory.go:184] no items to output this cycle
I0322 01:09:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 01:09:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:09:13.409807  543705 memory.go:191] Add success.
I0322 01:09:13.409817  543705 cpu.go:282] Add success.
W0322 01:09:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:09:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:09:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:09:13.420140  543705 net.go:648] Add success.
I0322 01:09:13.422793  543705 net.go:770] primary dev: ETH0
I0322 01:09:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:09:13.422822  543705 net.go:698] Add success.
I0322 01:09:13.514240  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"53a2e3d2-74c0-49ee-9025-5337b34ca7fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:09:13.514274  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:09:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:09:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:09:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 01:09:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:09:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 01:09:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:09:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:09:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:09:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:09:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:09:23.409808  543705 memory.go:184] no items to output this cycle
I0322 01:09:23.409822  543705 cpu.go:275] no items to output this cycle
I0322 01:09:32.321683  543705 disk_info.go:125] begin check local disk info of client
I0322 01:09:32.324213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:09:32.324220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3180 0xc0003b31c0]
E0322 01:09:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:09:33.409791  543705 memory.go:184] no items to output this cycle
I0322 01:09:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 01:09:39.426892  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:09:39.426900  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:09:43.410586  543705 memory.go:191] Add success.
I0322 01:09:43.409814  543705 cpu.go:282] Add success.
I0322 01:09:43.420378  543705 net.go:648] Add success.
I0322 01:09:43.422969  543705 net.go:770] primary dev: ETH0
I0322 01:09:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:09:43.422996  543705 net.go:698] Add success.
I0322 01:09:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:09:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:09:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:09:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:09:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:09:53.409777  543705 cpu.go:275] no items to output this cycle
E0322 01:10:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:10:03.409770  543705 memory.go:184] no items to output this cycle
I0322 01:10:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 01:10:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:10:13.409783  543705 memory.go:191] Add success.
I0322 01:10:13.409805  543705 cpu.go:282] Add success.
W0322 01:10:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:10:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:10:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:10:13.420059  543705 net.go:648] Add success.
I0322 01:10:13.423209  543705 net.go:770] primary dev: ETH0
I0322 01:10:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:10:13.423236  543705 net.go:698] Add success.
I0322 01:10:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:10:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:10:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 01:10:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:10:14.456491  543705 disk_worker.go:494] system disk:vda1
I0322 01:10:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:10:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:10:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:10:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:10:23.409802  543705 memory.go:184] no items to output this cycle
I0322 01:10:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 01:10:32.325676  543705 disk_info.go:125] begin check local disk info of client
I0322 01:10:32.328228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:10:32.328235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265300 0xc000265340]
E0322 01:10:33.409734  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:10:33.409749  543705 memory.go:184] no items to output this cycle
I0322 01:10:33.409795  543705 cpu.go:275] no items to output this cycle
E0322 01:10:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:10:43.409991  543705 memory.go:191] Add success.
I0322 01:10:43.410014  543705 cpu.go:282] Add success.
I0322 01:10:43.419742  543705 net.go:648] Add success.
I0322 01:10:43.422360  543705 net.go:770] primary dev: ETH0
I0322 01:10:43.422373  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:10:43.422384  543705 net.go:698] Add success.
I0322 01:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:10:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:10:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:10:53.409791  543705 memory.go:184] no items to output this cycle
I0322 01:10:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 01:11:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:11:03.409787  543705 memory.go:184] no items to output this cycle
I0322 01:11:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 01:11:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:11:13.409787  543705 memory.go:191] Add success.
I0322 01:11:13.409807  543705 cpu.go:282] Add success.
W0322 01:11:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:11:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:11:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:11:13.420097  543705 net.go:648] Add success.
I0322 01:11:13.422638  543705 net.go:770] primary dev: ETH0
I0322 01:11:13.422652  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:11:13.422663  543705 net.go:698] Add success.
I0322 01:11:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:11:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:11:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 01:11:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:11:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 01:11:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:11:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:11:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:11:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:11:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:11:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:11:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:11:23.409795  543705 memory.go:184] no items to output this cycle
I0322 01:11:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 01:11:32.329678  543705 disk_info.go:125] begin check local disk info of client
I0322 01:11:32.332262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:11:32.332270  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264f00 0xc000264f40]
E0322 01:11:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:11:33.409801  543705 memory.go:184] no items to output this cycle
I0322 01:11:33.409819  543705 cpu.go:275] no items to output this cycle
E0322 01:11:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:11:43.409791  543705 memory.go:191] Add success.
I0322 01:11:43.409813  543705 cpu.go:282] Add success.
I0322 01:11:43.419759  543705 net.go:648] Add success.
I0322 01:11:43.422781  543705 net.go:770] primary dev: ETH0
I0322 01:11:43.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:11:43.422837  543705 net.go:698] Add success.
I0322 01:11:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:11:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:11:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:11:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:11:53.409820  543705 memory.go:184] no items to output this cycle
I0322 01:11:53.409831  543705 cpu.go:275] no items to output this cycle
E0322 01:12:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:12:03.409785  543705 memory.go:184] no items to output this cycle
I0322 01:12:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 01:12:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:12:13.409789  543705 memory.go:191] Add success.
I0322 01:12:13.409812  543705 cpu.go:282] Add success.
W0322 01:12:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:12:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:12:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:12:13.420120  543705 net.go:648] Add success.
I0322 01:12:13.422713  543705 net.go:770] primary dev: ETH0
I0322 01:12:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:12:13.422740  543705 net.go:698] Add success.
I0322 01:12:13.467905  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef73d923-dfbc-472e-94c8-c60d844713c5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:12:13.467936  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 01:12:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:12:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 01:12:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:12:14.455952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:12:14.455960  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:12:14.455965  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:12:14.456564  543705 disk_worker.go:494] system disk:vda1
I0322 01:12:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:12:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:12:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:12:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:12:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:12:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:12:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:12:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:12:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:12:23.409760  543705 memory.go:184] no items to output this cycle
I0322 01:12:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 01:12:32.333679  543705 disk_info.go:125] begin check local disk info of client
I0322 01:12:32.336106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:12:32.336112  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aa340 0xc0003aa380]
E0322 01:12:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:12:33.409787  543705 memory.go:184] no items to output this cycle
I0322 01:12:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 01:12:39.427902  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:12:39.427910  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:12:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:12:43.410538  543705 memory.go:191] Add success.
I0322 01:12:43.409833  543705 cpu.go:282] Add success.
I0322 01:12:43.420317  543705 net.go:648] Add success.
I0322 01:12:43.422718  543705 net.go:770] primary dev: ETH0
I0322 01:12:43.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:12:43.422743  543705 net.go:698] Add success.
I0322 01:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:12:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:12:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:12:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:12:53.409812  543705 memory.go:184] no items to output this cycle
I0322 01:12:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 01:13:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:13:03.409780  543705 memory.go:184] no items to output this cycle
I0322 01:13:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 01:13:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:13:13.409773  543705 memory.go:191] Add success.
W0322 01:13:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:13:13.409805  543705 cpu.go:282] Add success.
W0322 01:13:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:13:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:13:13.420452  543705 net.go:648] Add success.
I0322 01:13:13.423248  543705 net.go:770] primary dev: ETH0
I0322 01:13:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:13:13.423274  543705 net.go:698] Add success.
I0322 01:13:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:13:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:13:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 01:13:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:13:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 01:13:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:13:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:13:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:13:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:13:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:13:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:13:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:13:23.409767  543705 memory.go:184] no items to output this cycle
I0322 01:13:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 01:13:32.337678  543705 disk_info.go:125] begin check local disk info of client
I0322 01:13:32.340275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:13:32.340282  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8780 0xc0003c87c0]
E0322 01:13:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:13:33.409780  543705 memory.go:184] no items to output this cycle
I0322 01:13:33.409787  543705 cpu.go:275] no items to output this cycle
E0322 01:13:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:13:43.409934  543705 cpu.go:282] Add success.
I0322 01:13:43.409938  543705 memory.go:191] Add success.
I0322 01:13:43.419713  543705 net.go:648] Add success.
I0322 01:13:43.422876  543705 net.go:770] primary dev: ETH0
I0322 01:13:43.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:13:43.422902  543705 net.go:698] Add success.
I0322 01:13:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:13:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:13:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:13:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:13:53.409805  543705 memory.go:184] no items to output this cycle
I0322 01:13:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 01:14:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:14:03.409782  543705 memory.go:184] no items to output this cycle
I0322 01:14:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 01:14:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:14:13.409807  543705 memory.go:191] Add success.
I0322 01:14:13.409813  543705 cpu.go:282] Add success.
W0322 01:14:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:14:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:14:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:14:13.420085  543705 net.go:648] Add success.
I0322 01:14:13.423117  543705 net.go:770] primary dev: ETH0
I0322 01:14:13.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:14:13.423141  543705 net.go:698] Add success.
I0322 01:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:14:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:14:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 01:14:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:14:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 01:14:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:14:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:14:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:14:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:14:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:14:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 01:14:23.409793  543705 memory.go:184] no items to output this cycle
I0322 01:14:32.341679  543705 disk_info.go:125] begin check local disk info of client
I0322 01:14:32.344144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:14:32.344150  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471100 0xc000471140]
E0322 01:14:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:14:33.409790  543705 memory.go:184] no items to output this cycle
I0322 01:14:33.409806  543705 cpu.go:275] no items to output this cycle
E0322 01:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:14:43.409791  543705 memory.go:191] Add success.
I0322 01:14:43.409811  543705 cpu.go:282] Add success.
I0322 01:14:43.420004  543705 net.go:648] Add success.
I0322 01:14:43.422707  543705 net.go:770] primary dev: ETH0
I0322 01:14:43.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:14:43.422731  543705 net.go:698] Add success.
I0322 01:14:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:14:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:14:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:14:53.409768  543705 memory.go:184] no items to output this cycle
I0322 01:14:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 01:15:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:15:03.409774  543705 memory.go:184] no items to output this cycle
I0322 01:15:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 01:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:15:13.409776  543705 memory.go:191] Add success.
W0322 01:15:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:15:13.409802  543705 cpu.go:282] Add success.
W0322 01:15:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:15:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:15:13.420206  543705 net.go:648] Add success.
I0322 01:15:13.422822  543705 net.go:770] primary dev: ETH0
I0322 01:15:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:15:13.422851  543705 net.go:698] Add success.
I0322 01:15:13.469085  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd813608-34ee-42ec-8087-91b114336410","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:15:13.469117  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:15:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:15:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 01:15:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:15:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 01:15:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:15:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:15:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:15:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:15:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:15:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 01:15:23.409782  543705 memory.go:184] no items to output this cycle
I0322 01:15:32.345684  543705 disk_info.go:125] begin check local disk info of client
I0322 01:15:32.348223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:15:32.348230  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bec0 0xc00007bf00]
E0322 01:15:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:15:33.409789  543705 memory.go:184] no items to output this cycle
I0322 01:15:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 01:15:39.428893  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:15:39.428901  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:15:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:15:43.410714  543705 memory.go:191] Add success.
I0322 01:15:43.409804  543705 cpu.go:282] Add success.
I0322 01:15:43.420685  543705 net.go:648] Add success.
I0322 01:15:43.423336  543705 net.go:770] primary dev: ETH0
I0322 01:15:43.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:15:43.423361  543705 net.go:698] Add success.
I0322 01:15:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:15:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:15:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:15:53.410337  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:15:53.410355  543705 memory.go:184] no items to output this cycle
I0322 01:15:53.410382  543705 cpu.go:275] no items to output this cycle
I0322 01:16:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 01:16:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:16:03.409793  543705 memory.go:184] no items to output this cycle
E0322 01:16:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:16:13.409792  543705 memory.go:191] Add success.
I0322 01:16:13.409793  543705 cpu.go:282] Add success.
W0322 01:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:16:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:16:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:16:13.419878  543705 net.go:770] primary dev: ETH0
I0322 01:16:13.419891  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:16:13.419903  543705 net.go:698] Add success.
I0322 01:16:13.420131  543705 net.go:648] Add success.
I0322 01:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:16:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:16:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 01:16:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:16:14.456506  543705 disk_worker.go:494] system disk:vda1
I0322 01:16:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:16:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:16:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:16:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:16:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:16:23.409771  543705 memory.go:184] no items to output this cycle
I0322 01:16:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 01:16:32.349674  543705 disk_info.go:125] begin check local disk info of client
I0322 01:16:32.352146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:16:32.352154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9340 0xc0004d9380]
E0322 01:16:33.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:16:33.409757  543705 memory.go:184] no items to output this cycle
I0322 01:16:33.409791  543705 cpu.go:275] no items to output this cycle
E0322 01:16:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:16:43.409784  543705 memory.go:191] Add success.
I0322 01:16:43.409805  543705 cpu.go:282] Add success.
I0322 01:16:43.419885  543705 net.go:648] Add success.
I0322 01:16:43.422701  543705 net.go:770] primary dev: ETH0
I0322 01:16:43.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:16:43.422727  543705 net.go:698] Add success.
I0322 01:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:16:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:16:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:16:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:16:53.409772  543705 memory.go:184] no items to output this cycle
I0322 01:16:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 01:17:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:17:03.409796  543705 memory.go:184] no items to output this cycle
I0322 01:17:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 01:17:13.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:17:13.409771  543705 memory.go:191] Add success.
W0322 01:17:13.409795  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:17:13.409800  543705 cpu.go:282] Add success.
W0322 01:17:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:17:13.409811  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:17:13.420134  543705 net.go:648] Add success.
I0322 01:17:13.422817  543705 net.go:770] primary dev: ETH0
I0322 01:17:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:17:13.422846  543705 net.go:698] Add success.
I0322 01:17:13.453397  543705 event_worker.go:152] Polling the log file for events...
W0322 01:17:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:17:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 01:17:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:17:14.456897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:17:14.456906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:17:14.456912  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:17:14.456978  543705 disk_worker.go:494] system disk:vda1
I0322 01:17:14.457008  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:17:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:17:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:17:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:17:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:17:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:17:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:17:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:17:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:17:23.409781  543705 memory.go:184] no items to output this cycle
I0322 01:17:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 01:17:32.353685  543705 disk_info.go:125] begin check local disk info of client
I0322 01:17:32.356279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:17:32.356286  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046b6c0 0xc00046b700]
E0322 01:17:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:17:33.409792  543705 memory.go:184] no items to output this cycle
I0322 01:17:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 01:17:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:17:43.409801  543705 memory.go:191] Add success.
I0322 01:17:43.409802  543705 cpu.go:282] Add success.
I0322 01:17:43.419850  543705 net.go:648] Add success.
I0322 01:17:43.422508  543705 net.go:770] primary dev: ETH0
I0322 01:17:43.422522  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:17:43.422537  543705 net.go:698] Add success.
I0322 01:17:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:17:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:17:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:17:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:17:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 01:17:53.409788  543705 memory.go:184] no items to output this cycle
E0322 01:18:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:18:03.409799  543705 memory.go:184] no items to output this cycle
I0322 01:18:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 01:18:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:18:13.409787  543705 memory.go:191] Add success.
I0322 01:18:13.409794  543705 cpu.go:282] Add success.
W0322 01:18:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:18:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:18:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:18:13.420122  543705 net.go:648] Add success.
I0322 01:18:13.423039  543705 net.go:770] primary dev: ETH0
I0322 01:18:13.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:18:13.423065  543705 net.go:698] Add success.
I0322 01:18:13.468363  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"13c7b890-b849-40c8-bead-ded70f1fa76d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:18:13.468400  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:18:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:18:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:18:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 01:18:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:18:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 01:18:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:18:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:18:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:18:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:18:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:18:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:18:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:18:23.409803  543705 memory.go:184] no items to output this cycle
I0322 01:18:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 01:18:32.357678  543705 disk_info.go:125] begin check local disk info of client
I0322 01:18:32.360173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:18:32.360180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa80 0xc0001aaac0]
E0322 01:18:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:18:33.409774  543705 memory.go:184] no items to output this cycle
I0322 01:18:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 01:18:39.429904  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:18:39.429912  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:18:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:18:43.410749  543705 memory.go:191] Add success.
I0322 01:18:43.409804  543705 cpu.go:282] Add success.
I0322 01:18:43.420463  543705 net.go:648] Add success.
I0322 01:18:43.423456  543705 net.go:770] primary dev: ETH0
I0322 01:18:43.423469  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:18:43.423483  543705 net.go:698] Add success.
I0322 01:18:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:18:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:18:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:18:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:18:53.409787  543705 memory.go:184] no items to output this cycle
I0322 01:18:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 01:19:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:19:03.409779  543705 memory.go:184] no items to output this cycle
I0322 01:19:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 01:19:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:19:13.409788  543705 cpu.go:282] Add success.
I0322 01:19:13.409796  543705 memory.go:191] Add success.
W0322 01:19:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:19:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:19:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:19:13.420059  543705 net.go:648] Add success.
I0322 01:19:13.423179  543705 net.go:770] primary dev: ETH0
I0322 01:19:13.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:19:13.423207  543705 net.go:698] Add success.
I0322 01:19:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:19:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:19:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 01:19:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:19:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 01:19:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:19:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:19:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:19:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:19:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:19:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:19:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:19:23.409806  543705 memory.go:184] no items to output this cycle
I0322 01:19:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 01:19:32.361683  543705 disk_info.go:125] begin check local disk info of client
I0322 01:19:32.364225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:19:32.364232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b9c0 0xc00007ba00]
E0322 01:19:33.409739  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:19:33.409753  543705 memory.go:184] no items to output this cycle
I0322 01:19:33.409797  543705 cpu.go:275] no items to output this cycle
E0322 01:19:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:19:43.409790  543705 memory.go:191] Add success.
I0322 01:19:43.409793  543705 cpu.go:282] Add success.
I0322 01:19:43.419860  543705 net.go:648] Add success.
I0322 01:19:43.422815  543705 net.go:770] primary dev: ETH0
I0322 01:19:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:19:43.422840  543705 net.go:698] Add success.
I0322 01:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:19:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:19:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:19:53.409765  543705 memory.go:184] no items to output this cycle
I0322 01:19:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 01:20:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:20:03.409765  543705 memory.go:184] no items to output this cycle
I0322 01:20:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 01:20:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:20:13.409811  543705 memory.go:191] Add success.
I0322 01:20:13.409822  543705 cpu.go:282] Add success.
W0322 01:20:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:20:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:20:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:20:13.420041  543705 net.go:648] Add success.
I0322 01:20:13.422583  543705 net.go:770] primary dev: ETH0
I0322 01:20:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:20:13.422608  543705 net.go:698] Add success.
I0322 01:20:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:20:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:20:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 01:20:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:20:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 01:20:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:20:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:20:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:20:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:20:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:20:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:20:23.409767  543705 memory.go:184] no items to output this cycle
I0322 01:20:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 01:20:32.365675  543705 disk_info.go:125] begin check local disk info of client
I0322 01:20:32.368092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:20:32.368099  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027afc0 0xc00027b000]
E0322 01:20:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:20:33.409789  543705 memory.go:184] no items to output this cycle
I0322 01:20:33.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:20:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:20:43.409814  543705 memory.go:191] Add success.
I0322 01:20:43.409825  543705 cpu.go:282] Add success.
I0322 01:20:43.420070  543705 net.go:648] Add success.
I0322 01:20:43.423077  543705 net.go:770] primary dev: ETH0
I0322 01:20:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:20:43.423103  543705 net.go:698] Add success.
I0322 01:20:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:20:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:20:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:20:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:20:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:20:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 01:21:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:21:03.409774  543705 memory.go:184] no items to output this cycle
I0322 01:21:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 01:21:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:21:13.409780  543705 memory.go:191] Add success.
I0322 01:21:13.409800  543705 cpu.go:282] Add success.
W0322 01:21:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:21:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:21:13.420074  543705 net.go:648] Add success.
I0322 01:21:13.422524  543705 net.go:770] primary dev: ETH0
I0322 01:21:13.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:21:13.422557  543705 net.go:698] Add success.
I0322 01:21:13.555842  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ddbc0900-f109-46cf-a449-c683d23daa3e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:21:13.555878  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:21:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:21:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:21:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 01:21:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:21:14.456546  543705 disk_worker.go:494] system disk:vda1
I0322 01:21:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:21:15.455620  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:21:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:21:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:21:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:21:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:21:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 01:21:23.409784  543705 memory.go:184] no items to output this cycle
I0322 01:21:32.369682  543705 disk_info.go:125] begin check local disk info of client
I0322 01:21:32.372285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:21:32.372292  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0322 01:21:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:21:33.409761  543705 memory.go:184] no items to output this cycle
I0322 01:21:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 01:21:39.430926  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:21:39.430933  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:21:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:21:43.410557  543705 memory.go:191] Add success.
I0322 01:21:43.409789  543705 cpu.go:282] Add success.
I0322 01:21:43.420243  543705 net.go:648] Add success.
I0322 01:21:43.422832  543705 net.go:770] primary dev: ETH0
I0322 01:21:43.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:21:43.422860  543705 net.go:698] Add success.
I0322 01:21:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:21:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:21:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:21:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:21:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:21:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 01:22:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:22:03.409776  543705 memory.go:184] no items to output this cycle
I0322 01:22:03.409787  543705 cpu.go:275] no items to output this cycle
W0322 01:22:13.409705  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:22:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:22:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:22:13.409787  543705 cpu.go:282] Add success.
E0322 01:22:13.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:22:13.409856  543705 memory.go:191] Add success.
I0322 01:22:13.420130  543705 net.go:648] Add success.
I0322 01:22:13.422609  543705 net.go:770] primary dev: ETH0
I0322 01:22:13.422622  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:22:13.422634  543705 net.go:698] Add success.
W0322 01:22:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:22:14.455139  543705 disk_worker.go:708] disk space is not compliant
W0322 01:22:14.455142  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:22:14.456926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:22:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:22:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:22:14.456993  543705 disk_worker.go:494] system disk:vda1
I0322 01:22:14.457036  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:22:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:22:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:22:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:22:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:22:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:22:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:22:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:22:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:22:23.409778  543705 memory.go:184] no items to output this cycle
I0322 01:22:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 01:22:32.373676  543705 disk_info.go:125] begin check local disk info of client
I0322 01:22:32.376126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:22:32.376132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470200 0xc000470240]
E0322 01:22:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:22:33.409785  543705 memory.go:184] no items to output this cycle
I0322 01:22:33.409799  543705 cpu.go:275] no items to output this cycle
E0322 01:22:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:22:43.409795  543705 memory.go:191] Add success.
I0322 01:22:43.409812  543705 cpu.go:282] Add success.
I0322 01:22:43.420074  543705 net.go:648] Add success.
I0322 01:22:43.423085  543705 net.go:770] primary dev: ETH0
I0322 01:22:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:22:43.423110  543705 net.go:698] Add success.
I0322 01:22:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:22:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:22:53.410338  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:22:53.410355  543705 memory.go:184] no items to output this cycle
I0322 01:22:53.410379  543705 cpu.go:275] no items to output this cycle
E0322 01:23:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:23:03.409764  543705 memory.go:184] no items to output this cycle
I0322 01:23:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:23:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:23:13.409787  543705 memory.go:191] Add success.
I0322 01:23:13.409805  543705 cpu.go:282] Add success.
W0322 01:23:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:23:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:23:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:23:13.420106  543705 net.go:648] Add success.
I0322 01:23:13.423172  543705 net.go:770] primary dev: ETH0
I0322 01:23:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:23:13.423198  543705 net.go:698] Add success.
I0322 01:23:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:23:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:23:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 01:23:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:23:14.456471  543705 disk_worker.go:494] system disk:vda1
I0322 01:23:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:23:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:23:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:23:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:23:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:23:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:23:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:23:23.409770  543705 memory.go:184] no items to output this cycle
I0322 01:23:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 01:23:32.377685  543705 disk_info.go:125] begin check local disk info of client
I0322 01:23:32.380241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:23:32.380247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003924c0 0xc000392500]
E0322 01:23:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:23:33.409762  543705 memory.go:184] no items to output this cycle
I0322 01:23:33.409793  543705 cpu.go:275] no items to output this cycle
E0322 01:23:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:23:43.409792  543705 memory.go:191] Add success.
I0322 01:23:43.409817  543705 cpu.go:282] Add success.
I0322 01:23:43.420073  543705 net.go:648] Add success.
I0322 01:23:43.422654  543705 net.go:770] primary dev: ETH0
I0322 01:23:43.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:23:43.422679  543705 net.go:698] Add success.
I0322 01:23:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:23:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:23:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:23:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:23:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:23:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 01:24:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:24:03.409796  543705 memory.go:184] no items to output this cycle
I0322 01:24:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 01:24:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:24:13.409800  543705 memory.go:191] Add success.
I0322 01:24:13.409800  543705 cpu.go:282] Add success.
W0322 01:24:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:24:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:24:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:24:13.420063  543705 net.go:648] Add success.
I0322 01:24:13.422907  543705 net.go:770] primary dev: ETH0
I0322 01:24:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:24:13.422932  543705 net.go:698] Add success.
I0322 01:24:13.468384  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"89c163f2-dd14-4924-9ae0-3fa631d5ec6d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:24:13.468417  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:24:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:24:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:24:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 01:24:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:24:14.456883  543705 disk_worker.go:494] system disk:vda1
I0322 01:24:14.456925  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:24:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:24:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:24:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:24:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:24:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:24:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:24:23.409817  543705 memory.go:184] no items to output this cycle
I0322 01:24:23.409825  543705 cpu.go:275] no items to output this cycle
I0322 01:24:32.381675  543705 disk_info.go:125] begin check local disk info of client
I0322 01:24:32.384200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:24:32.384207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2740 0xc0003f2780]
E0322 01:24:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:24:33.409779  543705 memory.go:184] no items to output this cycle
I0322 01:24:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 01:24:39.431924  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:24:39.431931  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:24:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:24:43.410581  543705 memory.go:191] Add success.
I0322 01:24:43.409813  543705 cpu.go:282] Add success.
I0322 01:24:43.420295  543705 net.go:648] Add success.
I0322 01:24:43.422974  543705 net.go:770] primary dev: ETH0
I0322 01:24:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:24:43.423004  543705 net.go:698] Add success.
I0322 01:24:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:24:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:24:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:24:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:24:53.409801  543705 memory.go:184] no items to output this cycle
I0322 01:24:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 01:25:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:25:03.409818  543705 memory.go:184] no items to output this cycle
I0322 01:25:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 01:25:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:25:13.409797  543705 memory.go:191] Add success.
I0322 01:25:13.409814  543705 cpu.go:282] Add success.
W0322 01:25:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:25:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:25:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:25:13.420131  543705 net.go:648] Add success.
I0322 01:25:13.422920  543705 net.go:770] primary dev: ETH0
I0322 01:25:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:25:13.422943  543705 net.go:698] Add success.
I0322 01:25:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:25:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:25:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 01:25:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:25:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 01:25:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:25:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:25:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:25:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:25:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:25:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:25:23.409810  543705 memory.go:184] no items to output this cycle
I0322 01:25:23.409822  543705 cpu.go:275] no items to output this cycle
I0322 01:25:32.385682  543705 disk_info.go:125] begin check local disk info of client
I0322 01:25:32.388259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:25:32.388266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8b00 0xc0004d8b40]
E0322 01:25:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:25:33.409810  543705 memory.go:184] no items to output this cycle
I0322 01:25:33.409827  543705 cpu.go:275] no items to output this cycle
E0322 01:25:43.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:25:43.409845  543705 memory.go:191] Add success.
I0322 01:25:43.409852  543705 cpu.go:282] Add success.
I0322 01:25:43.420001  543705 net.go:648] Add success.
I0322 01:25:43.423139  543705 net.go:770] primary dev: ETH0
I0322 01:25:43.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:25:43.423173  543705 net.go:698] Add success.
I0322 01:25:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:25:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:25:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:25:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:25:53.409785  543705 memory.go:184] no items to output this cycle
I0322 01:25:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 01:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:26:03.409768  543705 memory.go:184] no items to output this cycle
I0322 01:26:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 01:26:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:26:13.409802  543705 memory.go:191] Add success.
I0322 01:26:13.409822  543705 cpu.go:282] Add success.
W0322 01:26:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:26:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:26:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:26:13.420063  543705 net.go:648] Add success.
I0322 01:26:13.422633  543705 net.go:770] primary dev: ETH0
I0322 01:26:13.422646  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:26:13.422657  543705 net.go:698] Add success.
I0322 01:26:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:26:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:26:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 01:26:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:26:14.456488  543705 disk_worker.go:494] system disk:vda1
I0322 01:26:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:26:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:26:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:26:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:26:23.410458  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:26:23.410474  543705 memory.go:184] no items to output this cycle
I0322 01:26:23.410494  543705 cpu.go:275] no items to output this cycle
I0322 01:26:32.389678  543705 disk_info.go:125] begin check local disk info of client
I0322 01:26:32.392088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:26:32.392094  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4580 0xc0000c45c0]
E0322 01:26:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:26:33.409789  543705 memory.go:184] no items to output this cycle
I0322 01:26:33.409805  543705 cpu.go:275] no items to output this cycle
E0322 01:26:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:26:43.409801  543705 memory.go:191] Add success.
I0322 01:26:43.409802  543705 cpu.go:282] Add success.
I0322 01:26:43.419876  543705 net.go:648] Add success.
I0322 01:26:43.422362  543705 net.go:770] primary dev: ETH0
I0322 01:26:43.422377  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:26:43.422392  543705 net.go:698] Add success.
I0322 01:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:26:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:26:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:26:53.409763  543705 memory.go:184] no items to output this cycle
I0322 01:26:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 01:27:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:27:03.409777  543705 memory.go:184] no items to output this cycle
I0322 01:27:03.409778  543705 cpu.go:275] no items to output this cycle
W0322 01:27:13.409689  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0322 01:27:13.409698  543705 conf_downlod.go:89] use old conf
E0322 01:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:27:13.409809  543705 memory.go:191] Add success.
I0322 01:27:13.409823  543705 cpu.go:282] Add success.
W0322 01:27:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:27:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:27:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:27:13.419739  543705 net.go:648] Add success.
I0322 01:27:13.428722  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 01:27:13.428796  543705 net.go:770] primary dev: ETH0
I0322 01:27:13.428808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:27:13.428819  543705 net.go:698] Add success.
I0322 01:27:13.453343  543705 event_worker.go:152] Polling the log file for events...
I0322 01:27:13.468901  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c672de81-162b-4263-8681-e868d82d68b2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:27:13.468932  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 01:27:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:27:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 01:27:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:27:14.456785  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:27:14.456794  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:27:14.456800  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:27:14.456813  543705 disk_worker.go:494] system disk:vda1
I0322 01:27:14.456842  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:27:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:27:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:27:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:27:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:27:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:27:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:27:16.472310  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:27:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:27:23.409783  543705 memory.go:184] no items to output this cycle
I0322 01:27:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 01:27:32.393686  543705 disk_info.go:125] begin check local disk info of client
I0322 01:27:32.396238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:27:32.396245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468840 0xc000468880]
E0322 01:27:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:27:33.409793  543705 memory.go:184] no items to output this cycle
I0322 01:27:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 01:27:39.432941  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:27:39.432949  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:27:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:27:43.410628  543705 memory.go:191] Add success.
I0322 01:27:43.409794  543705 cpu.go:282] Add success.
I0322 01:27:43.420418  543705 net.go:648] Add success.
I0322 01:27:43.422968  543705 net.go:770] primary dev: ETH0
I0322 01:27:43.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:27:43.422993  543705 net.go:698] Add success.
I0322 01:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:27:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:27:53.410364  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:27:53.410378  543705 memory.go:184] no items to output this cycle
I0322 01:27:53.410390  543705 cpu.go:275] no items to output this cycle
E0322 01:28:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:28:03.409802  543705 memory.go:184] no items to output this cycle
I0322 01:28:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 01:28:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:28:13.409862  543705 memory.go:191] Add success.
W0322 01:28:13.409893  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:28:13.409905  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:28:13.409909  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:28:13.409941  543705 cpu.go:282] Add success.
I0322 01:28:13.419701  543705 net.go:648] Add success.
I0322 01:28:13.422220  543705 net.go:770] primary dev: ETH0
I0322 01:28:13.422233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:28:13.422243  543705 net.go:698] Add success.
I0322 01:28:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:28:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:28:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 01:28:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:28:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 01:28:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:28:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:28:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:28:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:28:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:28:23.409789  543705 memory.go:184] no items to output this cycle
I0322 01:28:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 01:28:32.397684  543705 disk_info.go:125] begin check local disk info of client
I0322 01:28:32.400102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:28:32.400108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d08c0 0xc0003d0900]
E0322 01:28:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:28:33.409797  543705 memory.go:184] no items to output this cycle
I0322 01:28:33.409810  543705 cpu.go:275] no items to output this cycle
E0322 01:28:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:28:43.409823  543705 memory.go:191] Add success.
I0322 01:28:43.409827  543705 cpu.go:282] Add success.
I0322 01:28:43.420047  543705 net.go:648] Add success.
I0322 01:28:43.422917  543705 net.go:770] primary dev: ETH0
I0322 01:28:43.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:28:43.422948  543705 net.go:698] Add success.
I0322 01:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:28:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:28:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:28:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:28:53.409787  543705 memory.go:184] no items to output this cycle
I0322 01:28:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 01:29:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:29:03.409782  543705 memory.go:184] no items to output this cycle
I0322 01:29:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 01:29:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:29:13.409805  543705 memory.go:191] Add success.
I0322 01:29:13.409813  543705 cpu.go:282] Add success.
W0322 01:29:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:29:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:29:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:29:13.419708  543705 net.go:648] Add success.
I0322 01:29:13.422310  543705 net.go:770] primary dev: ETH0
I0322 01:29:13.422322  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:29:13.422334  543705 net.go:698] Add success.
I0322 01:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:29:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:29:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 01:29:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:29:14.456754  543705 disk_worker.go:494] system disk:vda1
I0322 01:29:14.456781  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:29:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:29:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:29:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:29:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:29:23.409784  543705 memory.go:184] no items to output this cycle
I0322 01:29:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 01:29:32.401683  543705 disk_info.go:125] begin check local disk info of client
I0322 01:29:32.404274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:29:32.404280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c9e80 0xc0004c9ec0]
E0322 01:29:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:29:33.409769  543705 memory.go:184] no items to output this cycle
I0322 01:29:33.409781  543705 cpu.go:275] no items to output this cycle
E0322 01:29:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:29:43.409817  543705 memory.go:191] Add success.
I0322 01:29:43.409828  543705 cpu.go:282] Add success.
I0322 01:29:43.419939  543705 net.go:648] Add success.
I0322 01:29:43.422672  543705 net.go:770] primary dev: ETH0
I0322 01:29:43.422687  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:29:43.422699  543705 net.go:698] Add success.
I0322 01:29:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:29:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:29:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:29:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:29:53.409764  543705 memory.go:184] no items to output this cycle
I0322 01:29:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 01:30:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:30:03.409798  543705 memory.go:184] no items to output this cycle
I0322 01:30:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 01:30:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:30:13.409778  543705 memory.go:191] Add success.
I0322 01:30:13.409800  543705 cpu.go:282] Add success.
W0322 01:30:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:30:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:30:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:30:13.420128  543705 net.go:648] Add success.
I0322 01:30:13.422773  543705 net.go:770] primary dev: ETH0
I0322 01:30:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:30:13.422799  543705 net.go:698] Add success.
I0322 01:30:13.467445  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0d816f0f-4f83-4e42-9209-d587173d57b1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:30:13.467484  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:30:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:30:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 01:30:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:30:14.456775  543705 disk_worker.go:494] system disk:vda1
I0322 01:30:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:30:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:30:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:30:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:30:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:30:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:30:23.409794  543705 memory.go:184] no items to output this cycle
I0322 01:30:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 01:30:32.405681  543705 disk_info.go:125] begin check local disk info of client
I0322 01:30:32.408125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:30:32.408132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508500 0xc000508540]
E0322 01:30:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:30:33.409782  543705 memory.go:184] no items to output this cycle
I0322 01:30:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 01:30:39.433932  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:30:39.433939  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:30:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:30:43.410770  543705 memory.go:191] Add success.
I0322 01:30:43.409809  543705 cpu.go:282] Add success.
I0322 01:30:43.420485  543705 net.go:648] Add success.
I0322 01:30:43.423588  543705 net.go:770] primary dev: ETH0
I0322 01:30:43.423600  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:30:43.423613  543705 net.go:698] Add success.
I0322 01:30:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:30:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:30:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:30:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:30:53.409805  543705 memory.go:184] no items to output this cycle
I0322 01:30:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 01:31:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:31:03.409777  543705 memory.go:184] no items to output this cycle
I0322 01:31:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 01:31:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:31:13.409777  543705 memory.go:191] Add success.
W0322 01:31:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:31:13.409801  543705 cpu.go:282] Add success.
W0322 01:31:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:31:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:31:13.420157  543705 net.go:648] Add success.
I0322 01:31:13.423117  543705 net.go:770] primary dev: ETH0
I0322 01:31:13.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:31:13.423140  543705 net.go:698] Add success.
I0322 01:31:14.454943  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:31:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:31:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 01:31:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:31:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 01:31:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:31:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:31:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:31:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:31:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:31:23.409778  543705 memory.go:184] no items to output this cycle
I0322 01:31:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 01:31:32.409684  543705 disk_info.go:125] begin check local disk info of client
I0322 01:31:32.412196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:31:32.412203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8b80 0xc0003b8bc0]
E0322 01:31:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:31:33.409794  543705 memory.go:184] no items to output this cycle
I0322 01:31:33.409809  543705 cpu.go:275] no items to output this cycle
E0322 01:31:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:31:43.409787  543705 memory.go:191] Add success.
I0322 01:31:43.409803  543705 cpu.go:282] Add success.
I0322 01:31:43.419856  543705 net.go:648] Add success.
I0322 01:31:43.422593  543705 net.go:770] primary dev: ETH0
I0322 01:31:43.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:31:43.422621  543705 net.go:698] Add success.
I0322 01:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:31:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:31:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:31:53.409773  543705 memory.go:184] no items to output this cycle
I0322 01:31:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 01:32:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:32:03.409777  543705 memory.go:184] no items to output this cycle
I0322 01:32:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 01:32:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:32:13.409785  543705 memory.go:191] Add success.
I0322 01:32:13.409784  543705 cpu.go:282] Add success.
W0322 01:32:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:32:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:32:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:32:13.420029  543705 net.go:648] Add success.
I0322 01:32:13.423298  543705 net.go:770] primary dev: ETH0
I0322 01:32:13.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:32:13.423323  543705 net.go:698] Add success.
W0322 01:32:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:32:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 01:32:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:32:14.456913  543705 disk_worker.go:494] system disk:vda1
I0322 01:32:14.457009  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:32:14.457341  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:32:14.457348  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:32:14.457354  543705 custom_config.go:64] query custom config with name: gpu
E0322 01:32:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:32:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:32:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:32:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:32:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:32:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:32:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:32:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:32:23.409774  543705 cpu.go:275] no items to output this cycle
I0322 01:32:23.409780  543705 memory.go:184] no items to output this cycle
I0322 01:32:32.412804  543705 disk_info.go:125] begin check local disk info of client
I0322 01:32:32.415210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:32:32.415218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
E0322 01:32:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:32:33.409784  543705 memory.go:184] no items to output this cycle
I0322 01:32:33.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:32:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:32:43.409814  543705 memory.go:191] Add success.
I0322 01:32:43.409822  543705 cpu.go:282] Add success.
I0322 01:32:43.419886  543705 net.go:648] Add success.
I0322 01:32:43.422634  543705 net.go:770] primary dev: ETH0
I0322 01:32:43.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:32:43.422661  543705 net.go:698] Add success.
I0322 01:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:32:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:32:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:32:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:32:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:32:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 01:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:33:03.409779  543705 memory.go:184] no items to output this cycle
I0322 01:33:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 01:33:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:33:13.409811  543705 memory.go:191] Add success.
I0322 01:33:13.409816  543705 cpu.go:282] Add success.
W0322 01:33:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:33:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:33:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:33:13.420076  543705 net.go:648] Add success.
I0322 01:33:13.423015  543705 net.go:770] primary dev: ETH0
I0322 01:33:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:33:13.423044  543705 net.go:698] Add success.
I0322 01:33:13.468474  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"352087a5-7d44-4961-898e-e11ff5a5ebbe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:33:13.468507  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:33:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:33:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:33:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 01:33:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:33:14.456526  543705 disk_worker.go:494] system disk:vda1
I0322 01:33:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:33:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:33:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:33:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:33:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:33:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:33:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 01:33:23.409778  543705 memory.go:184] no items to output this cycle
I0322 01:33:32.415829  543705 disk_info.go:125] begin check local disk info of client
I0322 01:33:32.418431  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:33:32.418438  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ff180 0xc0004ff1c0]
E0322 01:33:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:33:33.409799  543705 memory.go:184] no items to output this cycle
I0322 01:33:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 01:33:39.434934  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:33:39.434943  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:33:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:33:43.410635  543705 memory.go:191] Add success.
I0322 01:33:43.409822  543705 cpu.go:282] Add success.
I0322 01:33:43.420328  543705 net.go:648] Add success.
I0322 01:33:43.422970  543705 net.go:770] primary dev: ETH0
I0322 01:33:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:33:43.422996  543705 net.go:698] Add success.
I0322 01:33:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:33:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:33:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:33:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:33:53.409774  543705 memory.go:184] no items to output this cycle
I0322 01:33:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 01:34:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:34:03.409771  543705 memory.go:184] no items to output this cycle
I0322 01:34:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 01:34:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:34:13.409804  543705 memory.go:191] Add success.
I0322 01:34:13.409816  543705 cpu.go:282] Add success.
W0322 01:34:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:34:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:34:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:34:13.420358  543705 net.go:648] Add success.
I0322 01:34:13.423250  543705 net.go:770] primary dev: ETH0
I0322 01:34:13.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:34:13.423275  543705 net.go:698] Add success.
I0322 01:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:34:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:34:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 01:34:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:34:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 01:34:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:34:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:34:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:34:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:34:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:34:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:34:23.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:34:23.409935  543705 memory.go:184] no items to output this cycle
I0322 01:34:23.410007  543705 cpu.go:275] no items to output this cycle
I0322 01:34:32.418803  543705 disk_info.go:125] begin check local disk info of client
I0322 01:34:32.421233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:34:32.421240  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ff00 0xc00047ff40]
E0322 01:34:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:34:33.409789  543705 memory.go:184] no items to output this cycle
I0322 01:34:33.409806  543705 cpu.go:275] no items to output this cycle
E0322 01:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:34:43.409786  543705 memory.go:191] Add success.
I0322 01:34:43.409806  543705 cpu.go:282] Add success.
I0322 01:34:43.419886  543705 net.go:648] Add success.
I0322 01:34:43.422825  543705 net.go:770] primary dev: ETH0
I0322 01:34:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:34:43.422851  543705 net.go:698] Add success.
I0322 01:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:34:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:34:53.409792  543705 memory.go:184] no items to output this cycle
I0322 01:34:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:35:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:35:03.409803  543705 memory.go:184] no items to output this cycle
I0322 01:35:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 01:35:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:35:13.409774  543705 memory.go:191] Add success.
W0322 01:35:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:35:13.409800  543705 cpu.go:282] Add success.
W0322 01:35:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:35:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:35:13.420230  543705 net.go:648] Add success.
I0322 01:35:13.422767  543705 net.go:770] primary dev: ETH0
I0322 01:35:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:35:13.422793  543705 net.go:698] Add success.
I0322 01:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:35:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:35:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 01:35:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:35:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 01:35:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:35:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:35:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:35:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:35:23.409791  543705 memory.go:184] no items to output this cycle
I0322 01:35:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 01:35:32.421803  543705 disk_info.go:125] begin check local disk info of client
I0322 01:35:32.424278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:35:32.424287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9000 0xc0003c9040]
E0322 01:35:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:35:33.409811  543705 memory.go:184] no items to output this cycle
I0322 01:35:33.409820  543705 cpu.go:275] no items to output this cycle
E0322 01:35:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:35:43.409799  543705 memory.go:191] Add success.
I0322 01:35:43.409805  543705 cpu.go:282] Add success.
I0322 01:35:43.420031  543705 net.go:648] Add success.
I0322 01:35:43.423199  543705 net.go:770] primary dev: ETH0
I0322 01:35:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:35:43.423224  543705 net.go:698] Add success.
I0322 01:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:35:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:35:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:35:53.410359  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:35:53.410376  543705 memory.go:184] no items to output this cycle
I0322 01:35:53.410397  543705 cpu.go:275] no items to output this cycle
E0322 01:36:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:36:03.409782  543705 memory.go:184] no items to output this cycle
I0322 01:36:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 01:36:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:36:13.409798  543705 memory.go:191] Add success.
I0322 01:36:13.409799  543705 cpu.go:282] Add success.
W0322 01:36:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:36:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:36:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:36:13.420120  543705 net.go:648] Add success.
I0322 01:36:13.422805  543705 net.go:770] primary dev: ETH0
I0322 01:36:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:36:13.422829  543705 net.go:698] Add success.
I0322 01:36:13.463528  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bb6beec7-dfea-48b8-bfdf-4feb53e046af","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:36:13.463563  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:36:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:36:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:36:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 01:36:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:36:14.456524  543705 disk_worker.go:494] system disk:vda1
I0322 01:36:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:36:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:36:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:36:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:36:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:36:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:36:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:36:23.409795  543705 memory.go:184] no items to output this cycle
I0322 01:36:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 01:36:32.424829  543705 disk_info.go:125] begin check local disk info of client
I0322 01:36:32.427320  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:36:32.427328  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c9500 0xc0004c9540]
E0322 01:36:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:36:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 01:36:33.409799  543705 memory.go:184] no items to output this cycle
I0322 01:36:39.435951  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:36:39.435958  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:36:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:36:43.410682  543705 memory.go:191] Add success.
I0322 01:36:43.409817  543705 cpu.go:282] Add success.
I0322 01:36:43.420420  543705 net.go:648] Add success.
I0322 01:36:43.423217  543705 net.go:770] primary dev: ETH0
I0322 01:36:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:36:43.423243  543705 net.go:698] Add success.
I0322 01:36:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:36:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:36:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:36:53.409787  543705 cpu.go:275] no items to output this cycle
I0322 01:36:53.409790  543705 memory.go:184] no items to output this cycle
E0322 01:37:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:37:03.409788  543705 memory.go:184] no items to output this cycle
I0322 01:37:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 01:37:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:37:13.409800  543705 memory.go:191] Add success.
I0322 01:37:13.409801  543705 cpu.go:282] Add success.
W0322 01:37:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:37:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:37:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:37:13.420075  543705 net.go:648] Add success.
I0322 01:37:13.422838  543705 net.go:770] primary dev: ETH0
I0322 01:37:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:37:13.422864  543705 net.go:698] Add success.
I0322 01:37:13.453400  543705 event_worker.go:152] Polling the log file for events...
W0322 01:37:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:37:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 01:37:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:37:14.456856  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:37:14.456866  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:37:14.456871  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:37:14.456942  543705 disk_worker.go:494] system disk:vda1
I0322 01:37:14.456985  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:37:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:37:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:37:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:37:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:37:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:37:16.458040  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:37:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:37:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:37:23.409804  543705 memory.go:184] no items to output this cycle
I0322 01:37:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 01:37:32.427886  543705 disk_info.go:125] begin check local disk info of client
I0322 01:37:32.430441  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:37:32.430449  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c11c0 0xc0004c1200]
E0322 01:37:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:37:33.409899  543705 memory.go:184] no items to output this cycle
I0322 01:37:33.409942  543705 cpu.go:275] no items to output this cycle
E0322 01:37:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:37:43.409796  543705 memory.go:191] Add success.
I0322 01:37:43.409837  543705 cpu.go:282] Add success.
I0322 01:37:43.419880  543705 net.go:648] Add success.
I0322 01:37:43.422401  543705 net.go:770] primary dev: ETH0
I0322 01:37:43.422415  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:37:43.422427  543705 net.go:698] Add success.
I0322 01:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:37:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:37:53.409786  543705 memory.go:184] no items to output this cycle
I0322 01:37:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 01:38:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:38:03.409770  543705 memory.go:184] no items to output this cycle
I0322 01:38:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 01:38:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:38:13.409817  543705 memory.go:191] Add success.
I0322 01:38:13.409823  543705 cpu.go:282] Add success.
W0322 01:38:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:38:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:38:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:38:13.420218  543705 net.go:648] Add success.
I0322 01:38:13.422790  543705 net.go:770] primary dev: ETH0
I0322 01:38:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:38:13.422827  543705 net.go:698] Add success.
I0322 01:38:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:38:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:38:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 01:38:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:38:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 01:38:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:38:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:38:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:38:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:38:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:38:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:38:23.409793  543705 memory.go:184] no items to output this cycle
I0322 01:38:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 01:38:32.430854  543705 disk_info.go:125] begin check local disk info of client
I0322 01:38:32.433268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:38:32.433276  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051ea80 0xc00051eac0]
E0322 01:38:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:38:33.409791  543705 memory.go:184] no items to output this cycle
I0322 01:38:33.409807  543705 cpu.go:275] no items to output this cycle
E0322 01:38:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:38:43.409808  543705 cpu.go:282] Add success.
I0322 01:38:43.409817  543705 memory.go:191] Add success.
I0322 01:38:43.419913  543705 net.go:648] Add success.
I0322 01:38:43.422460  543705 net.go:770] primary dev: ETH0
I0322 01:38:43.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:38:43.422485  543705 net.go:698] Add success.
I0322 01:38:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:38:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:38:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:38:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:38:53.409796  543705 memory.go:184] no items to output this cycle
I0322 01:38:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 01:39:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:39:03.409769  543705 memory.go:184] no items to output this cycle
I0322 01:39:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 01:39:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:39:13.409818  543705 memory.go:191] Add success.
I0322 01:39:13.409824  543705 cpu.go:282] Add success.
W0322 01:39:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:39:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:39:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:39:13.420059  543705 net.go:648] Add success.
I0322 01:39:13.422834  543705 net.go:770] primary dev: ETH0
I0322 01:39:13.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:39:13.422872  543705 net.go:698] Add success.
I0322 01:39:13.467976  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"21e8355f-d02c-448e-a41f-64d4ffe4c282","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:39:13.468009  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:39:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:39:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 01:39:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:39:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 01:39:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:39:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:39:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:39:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:39:23.409928  543705 memory.go:184] no items to output this cycle
I0322 01:39:23.409930  543705 cpu.go:275] no items to output this cycle
I0322 01:39:32.433929  543705 disk_info.go:125] begin check local disk info of client
I0322 01:39:32.436429  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:39:32.436438  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0040 0xc0003c0080]
E0322 01:39:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:39:33.409794  543705 memory.go:184] no items to output this cycle
I0322 01:39:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 01:39:39.436936  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:39:39.436942  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:39:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:39:43.410658  543705 memory.go:191] Add success.
I0322 01:39:43.409808  543705 cpu.go:282] Add success.
I0322 01:39:43.420455  543705 net.go:648] Add success.
I0322 01:39:43.423057  543705 net.go:770] primary dev: ETH0
I0322 01:39:43.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:39:43.423082  543705 net.go:698] Add success.
I0322 01:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:39:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:39:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:39:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:39:53.409796  543705 memory.go:184] no items to output this cycle
I0322 01:39:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 01:40:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:40:03.409768  543705 memory.go:184] no items to output this cycle
I0322 01:40:03.409776  543705 cpu.go:275] no items to output this cycle
E0322 01:40:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:40:13.409804  543705 memory.go:191] Add success.
I0322 01:40:13.409811  543705 cpu.go:282] Add success.
W0322 01:40:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:40:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:40:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:40:13.420041  543705 net.go:648] Add success.
I0322 01:40:13.422752  543705 net.go:770] primary dev: ETH0
I0322 01:40:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:40:13.422777  543705 net.go:698] Add success.
I0322 01:40:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:40:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:40:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 01:40:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:40:14.456479  543705 disk_worker.go:494] system disk:vda1
I0322 01:40:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:40:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:40:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:40:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:40:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:40:23.409889  543705 memory.go:184] no items to output this cycle
I0322 01:40:23.409979  543705 cpu.go:275] no items to output this cycle
I0322 01:40:32.436883  543705 disk_info.go:125] begin check local disk info of client
I0322 01:40:32.439370  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:40:32.439378  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470e40 0xc000470e80]
E0322 01:40:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:40:33.409761  543705 memory.go:184] no items to output this cycle
I0322 01:40:33.409794  543705 cpu.go:275] no items to output this cycle
E0322 01:40:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:40:43.409799  543705 memory.go:191] Add success.
I0322 01:40:43.409799  543705 cpu.go:282] Add success.
I0322 01:40:43.419956  543705 net.go:648] Add success.
I0322 01:40:43.422653  543705 net.go:770] primary dev: ETH0
I0322 01:40:43.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:40:43.422683  543705 net.go:698] Add success.
I0322 01:40:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:40:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:40:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:40:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:40:53.409767  543705 memory.go:184] no items to output this cycle
I0322 01:40:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:41:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:41:03.409808  543705 memory.go:184] no items to output this cycle
I0322 01:41:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 01:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:41:13.409791  543705 cpu.go:282] Add success.
I0322 01:41:13.409792  543705 memory.go:191] Add success.
W0322 01:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:41:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:41:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:41:13.420058  543705 net.go:648] Add success.
I0322 01:41:13.422801  543705 net.go:770] primary dev: ETH0
I0322 01:41:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:41:13.422831  543705 net.go:698] Add success.
I0322 01:41:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:41:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:41:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 01:41:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:41:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 01:41:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:41:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:41:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:41:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:41:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:41:23.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:41:23.409896  543705 memory.go:184] no items to output this cycle
I0322 01:41:23.409909  543705 cpu.go:275] no items to output this cycle
I0322 01:41:32.439939  543705 disk_info.go:125] begin check local disk info of client
I0322 01:41:32.442333  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:41:32.442340  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f000 0xc00035f040]
E0322 01:41:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:41:33.409794  543705 memory.go:184] no items to output this cycle
I0322 01:41:33.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:41:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:41:43.409790  543705 memory.go:191] Add success.
I0322 01:41:43.409830  543705 cpu.go:282] Add success.
I0322 01:41:43.419885  543705 net.go:648] Add success.
I0322 01:41:43.422602  543705 net.go:770] primary dev: ETH0
I0322 01:41:43.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:41:43.422628  543705 net.go:698] Add success.
I0322 01:41:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:41:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:41:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:41:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:41:53.409798  543705 memory.go:184] no items to output this cycle
I0322 01:41:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 01:42:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:42:03.409804  543705 memory.go:184] no items to output this cycle
I0322 01:42:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 01:42:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:42:13.409819  543705 memory.go:191] Add success.
I0322 01:42:13.409822  543705 cpu.go:282] Add success.
W0322 01:42:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:42:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:42:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:42:13.420038  543705 net.go:648] Add success.
I0322 01:42:13.422781  543705 net.go:770] primary dev: ETH0
I0322 01:42:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:42:13.422806  543705 net.go:698] Add success.
I0322 01:42:13.464175  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e5e98c1c-18a6-4867-8b0d-733f575c26fa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:42:13.464206  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 01:42:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:42:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 01:42:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:42:14.456204  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:42:14.456213  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:42:14.456219  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:42:14.456476  543705 disk_worker.go:494] system disk:vda1
I0322 01:42:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:42:15.457102  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:42:15.457111  543705 custom_config.go:64] query custom config with name: huawei_npu
E0322 01:42:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:42:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:42:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:42:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:42:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:42:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:42:23.409785  543705 memory.go:184] no items to output this cycle
I0322 01:42:23.409794  543705 cpu.go:275] no items to output this cycle
I0322 01:42:32.442910  543705 disk_info.go:125] begin check local disk info of client
I0322 01:42:32.445381  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:42:32.445388  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004702c0 0xc000470300]
E0322 01:42:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:42:33.409775  543705 memory.go:184] no items to output this cycle
I0322 01:42:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 01:42:39.437943  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:42:39.437949  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:42:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:42:43.410643  543705 memory.go:191] Add success.
I0322 01:42:43.409811  543705 cpu.go:282] Add success.
I0322 01:42:43.420361  543705 net.go:648] Add success.
I0322 01:42:43.422991  543705 net.go:770] primary dev: ETH0
I0322 01:42:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:42:43.423022  543705 net.go:698] Add success.
I0322 01:42:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:42:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:42:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:42:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:42:53.409805  543705 memory.go:184] no items to output this cycle
I0322 01:42:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 01:43:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:43:03.409806  543705 memory.go:184] no items to output this cycle
I0322 01:43:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 01:43:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:43:13.409777  543705 memory.go:191] Add success.
I0322 01:43:13.409797  543705 cpu.go:282] Add success.
W0322 01:43:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:43:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:43:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:43:13.420116  543705 net.go:648] Add success.
I0322 01:43:13.422814  543705 net.go:770] primary dev: ETH0
I0322 01:43:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:43:13.422843  543705 net.go:698] Add success.
I0322 01:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:43:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:43:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 01:43:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:43:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 01:43:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:43:15.456015  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:43:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:43:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:43:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:43:23.409780  543705 memory.go:184] no items to output this cycle
I0322 01:43:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 01:43:32.445912  543705 disk_info.go:125] begin check local disk info of client
I0322 01:43:32.448411  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:43:32.448417  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0322 01:43:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:43:33.409822  543705 memory.go:184] no items to output this cycle
I0322 01:43:33.409831  543705 cpu.go:275] no items to output this cycle
E0322 01:43:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:43:43.409822  543705 memory.go:191] Add success.
I0322 01:43:43.409827  543705 cpu.go:282] Add success.
I0322 01:43:43.419967  543705 net.go:648] Add success.
I0322 01:43:43.422645  543705 net.go:770] primary dev: ETH0
I0322 01:43:43.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:43:43.422671  543705 net.go:698] Add success.
I0322 01:43:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:43:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:43:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:43:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:43:53.409772  543705 memory.go:184] no items to output this cycle
I0322 01:43:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 01:44:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:44:03.409779  543705 memory.go:184] no items to output this cycle
I0322 01:44:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 01:44:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:44:13.409791  543705 memory.go:191] Add success.
I0322 01:44:13.409790  543705 cpu.go:282] Add success.
W0322 01:44:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:44:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:44:13.420057  543705 net.go:648] Add success.
I0322 01:44:13.422652  543705 net.go:770] primary dev: ETH0
I0322 01:44:13.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:44:13.422680  543705 net.go:698] Add success.
I0322 01:44:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:44:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:44:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 01:44:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:44:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 01:44:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:44:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:44:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:44:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:44:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:44:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:44:23.409808  543705 memory.go:184] no items to output this cycle
I0322 01:44:23.409818  543705 cpu.go:275] no items to output this cycle
I0322 01:44:32.448966  543705 disk_info.go:125] begin check local disk info of client
I0322 01:44:32.451362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:44:32.451369  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf00 0xc0001abf40]
E0322 01:44:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:44:33.409783  543705 memory.go:184] no items to output this cycle
I0322 01:44:33.409807  543705 cpu.go:275] no items to output this cycle
E0322 01:44:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:44:43.409788  543705 memory.go:191] Add success.
I0322 01:44:43.409817  543705 cpu.go:282] Add success.
I0322 01:44:43.419973  543705 net.go:648] Add success.
I0322 01:44:43.422524  543705 net.go:770] primary dev: ETH0
I0322 01:44:43.422538  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:44:43.422553  543705 net.go:698] Add success.
I0322 01:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:44:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:44:53.409771  543705 memory.go:184] no items to output this cycle
I0322 01:44:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 01:45:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:45:03.409773  543705 memory.go:184] no items to output this cycle
I0322 01:45:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 01:45:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:45:13.409812  543705 memory.go:191] Add success.
I0322 01:45:13.409824  543705 cpu.go:282] Add success.
W0322 01:45:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:45:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:45:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:45:13.420048  543705 net.go:648] Add success.
I0322 01:45:13.422596  543705 net.go:770] primary dev: ETH0
I0322 01:45:13.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:45:13.422621  543705 net.go:698] Add success.
I0322 01:45:13.529723  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"373f2d38-4150-45ae-9e00-b2f0158f6f9a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:45:13.529756  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:45:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:45:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:45:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 01:45:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:45:14.456854  543705 disk_worker.go:494] system disk:vda1
I0322 01:45:14.456882  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:45:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:45:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:45:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:45:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:45:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:45:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:45:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 01:45:23.409802  543705 memory.go:184] no items to output this cycle
I0322 01:45:32.451955  543705 disk_info.go:125] begin check local disk info of client
I0322 01:45:32.454478  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:45:32.454494  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0322 01:45:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:45:33.409825  543705 memory.go:184] no items to output this cycle
I0322 01:45:33.409833  543705 cpu.go:275] no items to output this cycle
I0322 01:45:39.438943  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:45:39.438949  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:45:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:45:43.410760  543705 memory.go:191] Add success.
I0322 01:45:43.409819  543705 cpu.go:282] Add success.
I0322 01:45:43.420526  543705 net.go:648] Add success.
I0322 01:45:43.423230  543705 net.go:770] primary dev: ETH0
I0322 01:45:43.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:45:43.423262  543705 net.go:698] Add success.
I0322 01:45:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:45:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:45:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:45:53.409775  543705 memory.go:184] no items to output this cycle
I0322 01:45:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 01:46:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:46:03.409790  543705 memory.go:184] no items to output this cycle
I0322 01:46:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 01:46:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:46:13.409812  543705 memory.go:191] Add success.
I0322 01:46:13.409825  543705 cpu.go:282] Add success.
W0322 01:46:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:46:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:46:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:46:13.420118  543705 net.go:648] Add success.
I0322 01:46:13.422756  543705 net.go:770] primary dev: ETH0
I0322 01:46:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:46:13.422785  543705 net.go:698] Add success.
I0322 01:46:14.453927  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:46:14.455384  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:46:14.455396  543705 disk_worker.go:708] disk space is not compliant
W0322 01:46:14.455400  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:46:14.457097  543705 disk_worker.go:494] system disk:vda1
I0322 01:46:14.457126  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:46:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:46:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:46:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:46:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:46:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:46:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:46:23.409782  543705 memory.go:184] no items to output this cycle
I0322 01:46:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 01:46:32.454974  543705 disk_info.go:125] begin check local disk info of client
I0322 01:46:32.457454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:46:32.457462  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352740 0xc000352780]
E0322 01:46:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:46:33.409786  543705 memory.go:184] no items to output this cycle
I0322 01:46:33.409802  543705 cpu.go:275] no items to output this cycle
E0322 01:46:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:46:43.409816  543705 memory.go:191] Add success.
I0322 01:46:43.409831  543705 cpu.go:282] Add success.
I0322 01:46:43.419865  543705 net.go:648] Add success.
I0322 01:46:43.422778  543705 net.go:770] primary dev: ETH0
I0322 01:46:43.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:46:43.422803  543705 net.go:698] Add success.
I0322 01:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:46:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:46:53.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:46:53.410373  543705 memory.go:184] no items to output this cycle
I0322 01:46:53.410386  543705 cpu.go:275] no items to output this cycle
E0322 01:47:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:47:03.409795  543705 memory.go:184] no items to output this cycle
I0322 01:47:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 01:47:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:47:13.409811  543705 memory.go:191] Add success.
I0322 01:47:13.409817  543705 cpu.go:282] Add success.
W0322 01:47:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:47:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:47:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:47:13.420050  543705 net.go:648] Add success.
I0322 01:47:13.422706  543705 net.go:770] primary dev: ETH0
I0322 01:47:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:47:13.422730  543705 net.go:698] Add success.
I0322 01:47:13.453294  543705 event_worker.go:152] Polling the log file for events...
W0322 01:47:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:47:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 01:47:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:47:14.456118  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:47:14.456128  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:47:14.456134  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:47:14.456428  543705 disk_worker.go:494] system disk:vda1
I0322 01:47:14.456461  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:47:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:47:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:47:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:47:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:47:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:47:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:47:16.472306  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:47:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:47:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 01:47:23.409801  543705 memory.go:184] no items to output this cycle
I0322 01:47:32.457986  543705 disk_info.go:125] begin check local disk info of client
I0322 01:47:32.460482  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:47:32.460488  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bb480 0xc0003bb4c0]
E0322 01:47:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:47:33.409803  543705 memory.go:184] no items to output this cycle
I0322 01:47:33.409805  543705 cpu.go:275] no items to output this cycle
E0322 01:47:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:47:43.409806  543705 memory.go:191] Add success.
I0322 01:47:43.409812  543705 cpu.go:282] Add success.
I0322 01:47:43.420052  543705 net.go:648] Add success.
I0322 01:47:43.422911  543705 net.go:770] primary dev: ETH0
I0322 01:47:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:47:43.422936  543705 net.go:698] Add success.
I0322 01:47:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:47:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:47:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:47:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:47:53.409778  543705 memory.go:184] no items to output this cycle
I0322 01:47:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 01:48:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:48:03.409802  543705 memory.go:184] no items to output this cycle
I0322 01:48:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 01:48:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:48:13.409807  543705 memory.go:191] Add success.
I0322 01:48:13.409807  543705 cpu.go:282] Add success.
W0322 01:48:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:48:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:48:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:48:13.420115  543705 net.go:648] Add success.
I0322 01:48:13.422945  543705 net.go:770] primary dev: ETH0
I0322 01:48:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:48:13.422970  543705 net.go:698] Add success.
I0322 01:48:13.468966  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"831b7e53-e4c3-496f-8c41-f1fa78f60e13","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:48:13.468998  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:48:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:48:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:48:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 01:48:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:48:14.456541  543705 disk_worker.go:494] system disk:vda1
I0322 01:48:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:48:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:48:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:48:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:48:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:48:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:48:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:48:23.409791  543705 memory.go:184] no items to output this cycle
I0322 01:48:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 01:48:32.460993  543705 disk_info.go:125] begin check local disk info of client
I0322 01:48:32.463359  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:48:32.463365  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc80 0xc0001abcc0]
E0322 01:48:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:48:33.409806  543705 memory.go:184] no items to output this cycle
I0322 01:48:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 01:48:39.439959  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:48:39.439966  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:48:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:48:43.410831  543705 memory.go:191] Add success.
I0322 01:48:43.409843  543705 cpu.go:282] Add success.
I0322 01:48:43.420539  543705 net.go:648] Add success.
I0322 01:48:43.422966  543705 net.go:770] primary dev: ETH0
I0322 01:48:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:48:43.422996  543705 net.go:698] Add success.
I0322 01:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:48:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:48:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:48:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:48:53.409780  543705 memory.go:184] no items to output this cycle
I0322 01:48:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 01:49:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:49:03.409791  543705 memory.go:184] no items to output this cycle
I0322 01:49:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 01:49:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:49:13.409822  543705 memory.go:191] Add success.
I0322 01:49:13.409835  543705 cpu.go:282] Add success.
W0322 01:49:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:49:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:49:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:49:13.420090  543705 net.go:648] Add success.
I0322 01:49:13.422863  543705 net.go:770] primary dev: ETH0
I0322 01:49:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:49:13.422888  543705 net.go:698] Add success.
I0322 01:49:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:49:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:49:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 01:49:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:49:14.456564  543705 disk_worker.go:494] system disk:vda1
I0322 01:49:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:49:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:49:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:49:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:49:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:49:16.472521  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:49:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:49:23.409790  543705 memory.go:184] no items to output this cycle
I0322 01:49:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 01:49:32.464020  543705 disk_info.go:125] begin check local disk info of client
I0322 01:49:32.466555  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:49:32.466561  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471ec0 0xc000471f00]
E0322 01:49:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:49:33.409813  543705 memory.go:184] no items to output this cycle
I0322 01:49:33.409815  543705 cpu.go:275] no items to output this cycle
E0322 01:49:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:49:43.409814  543705 memory.go:191] Add success.
I0322 01:49:43.409817  543705 cpu.go:282] Add success.
I0322 01:49:43.419954  543705 net.go:648] Add success.
I0322 01:49:43.422742  543705 net.go:770] primary dev: ETH0
I0322 01:49:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:49:43.422788  543705 net.go:698] Add success.
I0322 01:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:49:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:49:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:49:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:49:53.409811  543705 memory.go:184] no items to output this cycle
I0322 01:49:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 01:50:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:50:03.409808  543705 memory.go:184] no items to output this cycle
I0322 01:50:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 01:50:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:50:13.409795  543705 memory.go:191] Add success.
I0322 01:50:13.409796  543705 cpu.go:282] Add success.
W0322 01:50:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:50:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:50:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:50:13.420074  543705 net.go:648] Add success.
I0322 01:50:13.422860  543705 net.go:770] primary dev: ETH0
I0322 01:50:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:50:13.422888  543705 net.go:698] Add success.
I0322 01:50:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:50:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:50:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 01:50:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:50:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 01:50:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:50:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:50:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:50:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:50:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:50:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:50:23.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:50:23.409885  543705 memory.go:184] no items to output this cycle
I0322 01:50:23.410042  543705 cpu.go:275] no items to output this cycle
I0322 01:50:32.467025  543705 disk_info.go:125] begin check local disk info of client
I0322 01:50:32.469549  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:50:32.469556  543705 disk_info.go:196] parse disk info done, disk is : [0xc00054cc40 0xc00054cc80]
E0322 01:50:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:50:33.409798  543705 memory.go:184] no items to output this cycle
I0322 01:50:33.409815  543705 cpu.go:275] no items to output this cycle
E0322 01:50:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:50:43.409793  543705 memory.go:191] Add success.
I0322 01:50:43.409812  543705 cpu.go:282] Add success.
I0322 01:50:43.419986  543705 net.go:648] Add success.
I0322 01:50:43.422562  543705 net.go:770] primary dev: ETH0
I0322 01:50:43.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:50:43.422592  543705 net.go:698] Add success.
I0322 01:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:50:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:50:53.409793  543705 memory.go:184] no items to output this cycle
I0322 01:50:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:51:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:51:03.409805  543705 memory.go:184] no items to output this cycle
I0322 01:51:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 01:51:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:51:13.409790  543705 memory.go:191] Add success.
I0322 01:51:13.409808  543705 cpu.go:282] Add success.
W0322 01:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:51:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:51:13.420194  543705 net.go:648] Add success.
I0322 01:51:13.422897  543705 net.go:770] primary dev: ETH0
I0322 01:51:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:51:13.422921  543705 net.go:698] Add success.
I0322 01:51:13.807717  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"47ea1867-b997-469b-a2ed-fbc9501aaa39","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:51:13.807756  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:51:14.454697  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:51:14.454965  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:51:14.454977  543705 disk_worker.go:708] disk space is not compliant
W0322 01:51:14.454979  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:51:14.456513  543705 disk_worker.go:494] system disk:vda1
I0322 01:51:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:51:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:51:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:51:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:51:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:51:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:51:23.409772  543705 memory.go:184] no items to output this cycle
I0322 01:51:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 01:51:32.470046  543705 disk_info.go:125] begin check local disk info of client
I0322 01:51:32.472521  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:51:32.472527  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8440 0xc0004d8480]
E0322 01:51:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:51:33.409810  543705 memory.go:184] no items to output this cycle
I0322 01:51:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 01:51:39.440963  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:51:39.440969  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:51:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:51:43.410661  543705 memory.go:191] Add success.
I0322 01:51:43.409836  543705 cpu.go:282] Add success.
I0322 01:51:43.420376  543705 net.go:648] Add success.
I0322 01:51:43.423028  543705 net.go:770] primary dev: ETH0
I0322 01:51:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:51:43.423064  543705 net.go:698] Add success.
I0322 01:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:51:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:51:53.410231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:51:53.410247  543705 memory.go:184] no items to output this cycle
I0322 01:51:53.410275  543705 cpu.go:275] no items to output this cycle
E0322 01:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:52:03.409783  543705 memory.go:184] no items to output this cycle
I0322 01:52:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 01:52:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:52:13.409843  543705 memory.go:191] Add success.
W0322 01:52:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:52:13.409893  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:52:13.409897  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:52:13.410227  543705 cpu.go:282] Add success.
I0322 01:52:13.420445  543705 net.go:648] Add success.
I0322 01:52:13.421534  543705 net.go:770] primary dev: ETH0
I0322 01:52:13.421554  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:52:13.421572  543705 net.go:698] Add success.
W0322 01:52:14.455272  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:52:14.456134  543705 disk_worker.go:708] disk space is not compliant
W0322 01:52:14.456140  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:52:14.458509  543705 disk_worker.go:494] system disk:vda1
I0322 01:52:14.458558  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:52:14.458683  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:52:14.458691  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:52:14.458696  543705 custom_config.go:64] query custom config with name: gpu
E0322 01:52:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:52:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:52:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:52:16.458003  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:52:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:52:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:52:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:52:23.409779  543705 memory.go:184] no items to output this cycle
I0322 01:52:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 01:52:32.473055  543705 disk_info.go:125] begin check local disk info of client
I0322 01:52:32.475651  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:52:32.475659  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa2c0 0xc0001aa300]
E0322 01:52:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:52:33.409779  543705 memory.go:184] no items to output this cycle
I0322 01:52:33.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:52:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:52:43.409801  543705 memory.go:191] Add success.
I0322 01:52:43.409816  543705 cpu.go:282] Add success.
I0322 01:52:43.419881  543705 net.go:648] Add success.
I0322 01:52:43.422599  543705 net.go:770] primary dev: ETH0
I0322 01:52:43.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:52:43.422630  543705 net.go:698] Add success.
I0322 01:52:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:52:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:52:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:52:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:52:53.409796  543705 memory.go:184] no items to output this cycle
I0322 01:52:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 01:53:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:53:03.409793  543705 cpu.go:275] no items to output this cycle
I0322 01:53:03.409797  543705 memory.go:184] no items to output this cycle
E0322 01:53:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:53:13.409822  543705 memory.go:191] Add success.
I0322 01:53:13.409829  543705 cpu.go:282] Add success.
W0322 01:53:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:53:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:53:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:53:13.420222  543705 net.go:648] Add success.
I0322 01:53:13.423618  543705 net.go:770] primary dev: ETH0
I0322 01:53:13.423634  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:53:13.423648  543705 net.go:698] Add success.
I0322 01:53:14.453954  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:53:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:53:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0322 01:53:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:53:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 01:53:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:53:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:53:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:53:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:53:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:53:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:53:23.409782  543705 memory.go:184] no items to output this cycle
I0322 01:53:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 01:53:32.476013  543705 disk_info.go:125] begin check local disk info of client
I0322 01:53:32.478498  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:53:32.478504  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051d740 0xc00051d780]
E0322 01:53:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:53:33.409825  543705 memory.go:184] no items to output this cycle
I0322 01:53:33.409837  543705 cpu.go:275] no items to output this cycle
E0322 01:53:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:53:43.409790  543705 memory.go:191] Add success.
I0322 01:53:43.409812  543705 cpu.go:282] Add success.
I0322 01:53:43.419877  543705 net.go:648] Add success.
I0322 01:53:43.422450  543705 net.go:770] primary dev: ETH0
I0322 01:53:43.422463  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:53:43.422475  543705 net.go:698] Add success.
I0322 01:53:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:53:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:53:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:53:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:53:53.409792  543705 memory.go:184] no items to output this cycle
I0322 01:53:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 01:54:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:54:03.409795  543705 memory.go:184] no items to output this cycle
I0322 01:54:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 01:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:54:13.409794  543705 memory.go:191] Add success.
W0322 01:54:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:54:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:54:13.409835  543705 cpu.go:282] Add success.
I0322 01:54:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:54:13.419718  543705 net.go:648] Add success.
I0322 01:54:13.422324  543705 net.go:770] primary dev: ETH0
I0322 01:54:13.422337  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:54:13.422348  543705 net.go:698] Add success.
I0322 01:54:13.468011  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"01d856f4-e7e8-4fd9-96a5-f3f39e238af3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:54:13.468042  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 01:54:14.455130  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:54:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:54:14.455273  543705 disk_worker.go:708] disk space is not compliant
W0322 01:54:14.455277  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:54:14.456643  543705 disk_worker.go:494] system disk:vda1
I0322 01:54:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:54:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:54:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:54:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:54:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:54:23.409790  543705 memory.go:184] no items to output this cycle
I0322 01:54:23.409792  543705 cpu.go:275] no items to output this cycle
I0322 01:54:32.479087  543705 disk_info.go:125] begin check local disk info of client
I0322 01:54:32.481663  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:54:32.481671  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
E0322 01:54:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:54:33.409765  543705 memory.go:184] no items to output this cycle
I0322 01:54:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 01:54:39.441952  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:54:39.441959  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:54:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:54:43.410605  543705 memory.go:191] Add success.
I0322 01:54:43.409839  543705 cpu.go:282] Add success.
I0322 01:54:43.420425  543705 net.go:648] Add success.
I0322 01:54:43.422780  543705 net.go:770] primary dev: ETH0
I0322 01:54:43.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:54:43.422807  543705 net.go:698] Add success.
I0322 01:54:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:54:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:54:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:54:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:54:53.409801  543705 memory.go:184] no items to output this cycle
I0322 01:54:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 01:55:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:55:03.409767  543705 memory.go:184] no items to output this cycle
I0322 01:55:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 01:55:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:55:13.409812  543705 memory.go:191] Add success.
I0322 01:55:13.409817  543705 cpu.go:282] Add success.
W0322 01:55:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:55:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:55:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:55:13.420177  543705 net.go:648] Add success.
I0322 01:55:13.423375  543705 net.go:770] primary dev: ETH0
I0322 01:55:13.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:55:13.423401  543705 net.go:698] Add success.
I0322 01:55:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:55:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:55:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 01:55:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:55:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 01:55:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:55:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:55:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:55:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:55:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:55:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:55:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:55:23.409793  543705 memory.go:184] no items to output this cycle
I0322 01:55:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 01:55:32.482052  543705 disk_info.go:125] begin check local disk info of client
I0322 01:55:32.484513  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:55:32.484519  543705 disk_info.go:196] parse disk info done, disk is : [0xc000507d40 0xc000507d80]
E0322 01:55:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:55:33.409802  543705 memory.go:184] no items to output this cycle
I0322 01:55:33.409815  543705 cpu.go:275] no items to output this cycle
E0322 01:55:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:55:43.409795  543705 memory.go:191] Add success.
I0322 01:55:43.409796  543705 cpu.go:282] Add success.
I0322 01:55:43.419961  543705 net.go:648] Add success.
I0322 01:55:43.422744  543705 net.go:770] primary dev: ETH0
I0322 01:55:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:55:43.422770  543705 net.go:698] Add success.
I0322 01:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:55:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:55:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:55:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:55:53.409799  543705 memory.go:184] no items to output this cycle
I0322 01:55:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 01:56:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:56:03.409773  543705 memory.go:184] no items to output this cycle
I0322 01:56:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 01:56:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:56:13.409784  543705 memory.go:191] Add success.
W0322 01:56:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 01:56:13.409815  543705 cpu.go:282] Add success.
W0322 01:56:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:56:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:56:13.420182  543705 net.go:648] Add success.
I0322 01:56:13.422809  543705 net.go:770] primary dev: ETH0
I0322 01:56:13.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:56:13.422837  543705 net.go:698] Add success.
I0322 01:56:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:56:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:56:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 01:56:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:56:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 01:56:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:56:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:56:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:56:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:56:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:56:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:56:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:56:23.409769  543705 memory.go:184] no items to output this cycle
I0322 01:56:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 01:56:32.485120  543705 disk_info.go:125] begin check local disk info of client
I0322 01:56:32.487630  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:56:32.487636  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033d540 0xc00033d580]
E0322 01:56:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:56:33.409800  543705 memory.go:184] no items to output this cycle
I0322 01:56:33.409808  543705 cpu.go:275] no items to output this cycle
E0322 01:56:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:56:43.409796  543705 memory.go:191] Add success.
I0322 01:56:43.409798  543705 cpu.go:282] Add success.
I0322 01:56:43.419736  543705 net.go:648] Add success.
I0322 01:56:43.422568  543705 net.go:770] primary dev: ETH0
I0322 01:56:43.422584  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:56:43.422598  543705 net.go:698] Add success.
I0322 01:56:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:56:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:56:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:56:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:56:53.409793  543705 memory.go:184] no items to output this cycle
I0322 01:56:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:57:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:57:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 01:57:03.409788  543705 memory.go:184] no items to output this cycle
E0322 01:57:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:57:13.409807  543705 memory.go:191] Add success.
I0322 01:57:13.409817  543705 cpu.go:282] Add success.
W0322 01:57:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:57:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:57:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:57:13.420463  543705 net.go:648] Add success.
I0322 01:57:13.422911  543705 net.go:770] primary dev: ETH0
I0322 01:57:13.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:57:13.422942  543705 net.go:698] Add success.
I0322 01:57:13.429249  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 01:57:13.453421  543705 event_worker.go:152] Polling the log file for events...
I0322 01:57:13.463509  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe15de46-0210-497e-b2bd-79d03d37a3d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 01:57:13.463543  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 01:57:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:57:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 01:57:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0322 01:57:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 01:57:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 01:57:14.455897  543705 custom_config.go:64] query custom config with name: gpu
I0322 01:57:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 01:57:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 01:57:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 01:57:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:57:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 01:57:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 01:57:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:57:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:57:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:57:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:57:23.409766  543705 memory.go:184] no items to output this cycle
I0322 01:57:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 01:57:32.488152  543705 disk_info.go:125] begin check local disk info of client
I0322 01:57:32.490653  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:57:32.490659  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462040 0xc000462080]
E0322 01:57:33.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:57:33.409919  543705 memory.go:184] no items to output this cycle
I0322 01:57:33.409986  543705 cpu.go:275] no items to output this cycle
I0322 01:57:39.442975  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 01:57:39.442983  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 01:57:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:57:43.410840  543705 memory.go:191] Add success.
I0322 01:57:43.409833  543705 cpu.go:282] Add success.
I0322 01:57:43.420534  543705 net.go:648] Add success.
I0322 01:57:43.423175  543705 net.go:770] primary dev: ETH0
I0322 01:57:43.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:57:43.423201  543705 net.go:698] Add success.
I0322 01:57:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:57:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:57:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:57:53.410423  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:57:53.410441  543705 memory.go:184] no items to output this cycle
I0322 01:57:53.410477  543705 cpu.go:275] no items to output this cycle
E0322 01:58:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:58:03.409775  543705 memory.go:184] no items to output this cycle
I0322 01:58:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 01:58:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:58:13.409813  543705 memory.go:191] Add success.
I0322 01:58:13.409820  543705 cpu.go:282] Add success.
W0322 01:58:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:58:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:58:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:58:13.420235  543705 net.go:648] Add success.
I0322 01:58:13.423059  543705 net.go:770] primary dev: ETH0
I0322 01:58:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:58:13.423082  543705 net.go:698] Add success.
I0322 01:58:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:58:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:58:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 01:58:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:58:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 01:58:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:58:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:58:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:58:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:58:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:58:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:58:23.409790  543705 memory.go:184] no items to output this cycle
I0322 01:58:23.409800  543705 cpu.go:275] no items to output this cycle
I0322 01:58:32.491144  543705 disk_info.go:125] begin check local disk info of client
I0322 01:58:32.493689  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:58:32.493696  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a640 0xc00048a680]
E0322 01:58:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:58:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 01:58:33.409795  543705 memory.go:184] no items to output this cycle
E0322 01:58:43.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:58:43.409917  543705 memory.go:191] Add success.
I0322 01:58:43.409919  543705 cpu.go:282] Add success.
I0322 01:58:43.419718  543705 net.go:648] Add success.
I0322 01:58:43.422363  543705 net.go:770] primary dev: ETH0
I0322 01:58:43.422376  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:58:43.422388  543705 net.go:698] Add success.
I0322 01:58:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:58:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:58:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:58:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:58:53.409797  543705 memory.go:184] no items to output this cycle
I0322 01:58:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 01:59:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:59:03.409780  543705 memory.go:184] no items to output this cycle
I0322 01:59:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 01:59:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:59:13.409816  543705 memory.go:191] Add success.
I0322 01:59:13.409823  543705 cpu.go:282] Add success.
W0322 01:59:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 01:59:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 01:59:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 01:59:13.420193  543705 net.go:648] Add success.
I0322 01:59:13.423000  543705 net.go:770] primary dev: ETH0
I0322 01:59:13.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:59:13.423026  543705 net.go:698] Add success.
I0322 01:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 01:59:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 01:59:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 01:59:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 01:59:14.456509  543705 disk_worker.go:494] system disk:vda1
I0322 01:59:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 01:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 01:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:59:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:59:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 01:59:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 01:59:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:59:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 01:59:23.409786  543705 memory.go:184] no items to output this cycle
I0322 01:59:32.494114  543705 disk_info.go:125] begin check local disk info of client
I0322 01:59:32.496696  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 01:59:32.496702  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003421c0 0xc000342200]
E0322 01:59:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:59:33.409773  543705 memory.go:184] no items to output this cycle
I0322 01:59:33.409839  543705 cpu.go:275] no items to output this cycle
E0322 01:59:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:59:43.409831  543705 memory.go:191] Add success.
I0322 01:59:43.409831  543705 cpu.go:282] Add success.
I0322 01:59:43.420156  543705 net.go:648] Add success.
I0322 01:59:43.422758  543705 net.go:770] primary dev: ETH0
I0322 01:59:43.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0322 01:59:43.422783  543705 net.go:698] Add success.
I0322 01:59:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 01:59:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 01:59:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 01:59:53.410385  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 01:59:53.410401  543705 memory.go:184] no items to output this cycle
I0322 01:59:53.410407  543705 cpu.go:275] no items to output this cycle
E0322 02:00:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:00:03.409775  543705 memory.go:184] no items to output this cycle
I0322 02:00:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 02:00:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:00:13.409801  543705 memory.go:191] Add success.
I0322 02:00:13.409813  543705 cpu.go:282] Add success.
W0322 02:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:00:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:00:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:00:13.420161  543705 net.go:648] Add success.
I0322 02:00:13.422944  543705 net.go:770] primary dev: ETH0
I0322 02:00:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:00:13.422972  543705 net.go:698] Add success.
I0322 02:00:13.539094  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"abe5e5a6-97a5-4894-8527-ad2bc9d9fd66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:00:13.539135  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:00:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:00:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:00:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 02:00:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:00:14.456682  543705 disk_worker.go:494] system disk:vda1
I0322 02:00:14.456716  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:00:15.455164  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:00:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:00:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:00:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:00:23.409774  543705 memory.go:184] no items to output this cycle
I0322 02:00:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 02:00:32.497129  543705 disk_info.go:125] begin check local disk info of client
I0322 02:00:32.499609  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:00:32.499625  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048bf40 0xc000252000]
E0322 02:00:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:00:33.409796  543705 memory.go:184] no items to output this cycle
I0322 02:00:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 02:00:39.443969  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:00:39.443975  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:00:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:00:43.410805  543705 memory.go:191] Add success.
I0322 02:00:43.409805  543705 cpu.go:282] Add success.
I0322 02:00:43.420509  543705 net.go:648] Add success.
I0322 02:00:43.423743  543705 net.go:770] primary dev: ETH0
I0322 02:00:43.423757  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:00:43.423770  543705 net.go:698] Add success.
I0322 02:00:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:00:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:00:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:00:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:00:53.409785  543705 memory.go:184] no items to output this cycle
I0322 02:00:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 02:01:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:01:03.409807  543705 memory.go:184] no items to output this cycle
I0322 02:01:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 02:01:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:01:13.409796  543705 memory.go:191] Add success.
I0322 02:01:13.409801  543705 cpu.go:282] Add success.
W0322 02:01:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:01:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:01:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:01:13.420063  543705 net.go:648] Add success.
I0322 02:01:13.423025  543705 net.go:770] primary dev: ETH0
I0322 02:01:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:01:13.423055  543705 net.go:698] Add success.
I0322 02:01:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:01:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:01:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0322 02:01:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:01:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 02:01:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:01:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:01:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:01:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:01:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:01:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:01:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:01:23.409782  543705 memory.go:184] no items to output this cycle
I0322 02:01:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 02:01:32.500188  543705 disk_info.go:125] begin check local disk info of client
I0322 02:01:32.502664  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:01:32.502679  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a31c0 0xc0002a3200]
E0322 02:01:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:01:33.409792  543705 memory.go:184] no items to output this cycle
I0322 02:01:33.409805  543705 cpu.go:275] no items to output this cycle
E0322 02:01:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:01:43.409798  543705 memory.go:191] Add success.
I0322 02:01:43.409816  543705 cpu.go:282] Add success.
I0322 02:01:43.420003  543705 net.go:648] Add success.
I0322 02:01:43.422889  543705 net.go:770] primary dev: ETH0
I0322 02:01:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:01:43.422915  543705 net.go:698] Add success.
I0322 02:01:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:01:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:01:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:01:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:01:53.409775  543705 memory.go:184] no items to output this cycle
I0322 02:01:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 02:02:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:02:03.409808  543705 memory.go:184] no items to output this cycle
I0322 02:02:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 02:02:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:02:13.409828  543705 memory.go:191] Add success.
I0322 02:02:13.409837  543705 cpu.go:282] Add success.
W0322 02:02:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:02:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:02:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:02:13.420128  543705 net.go:648] Add success.
I0322 02:02:13.422674  543705 net.go:770] primary dev: ETH0
I0322 02:02:13.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:02:13.422703  543705 net.go:698] Add success.
W0322 02:02:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:02:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 02:02:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:02:14.456809  543705 disk_worker.go:494] system disk:vda1
I0322 02:02:14.456848  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:02:14.457078  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:02:14.457086  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:02:14.457090  543705 custom_config.go:64] query custom config with name: gpu
E0322 02:02:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:02:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:02:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:02:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:02:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:02:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:02:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:02:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:02:23.409786  543705 memory.go:184] no items to output this cycle
I0322 02:02:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 02:02:32.503166  543705 disk_info.go:125] begin check local disk info of client
I0322 02:02:32.505629  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:02:32.505635  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461c80 0xc000461cc0]
E0322 02:02:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:02:33.409812  543705 memory.go:184] no items to output this cycle
I0322 02:02:33.409929  543705 cpu.go:275] no items to output this cycle
E0322 02:02:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:02:43.409802  543705 memory.go:191] Add success.
I0322 02:02:43.409823  543705 cpu.go:282] Add success.
I0322 02:02:43.419888  543705 net.go:648] Add success.
I0322 02:02:43.423089  543705 net.go:770] primary dev: ETH0
I0322 02:02:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:02:43.423113  543705 net.go:698] Add success.
I0322 02:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:02:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:02:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:02:53.409801  543705 memory.go:184] no items to output this cycle
I0322 02:02:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 02:03:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:03:03.409909  543705 memory.go:184] no items to output this cycle
I0322 02:03:03.409931  543705 cpu.go:275] no items to output this cycle
E0322 02:03:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:03:13.409789  543705 memory.go:191] Add success.
W0322 02:03:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 02:03:13.409820  543705 cpu.go:282] Add success.
W0322 02:03:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:03:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:03:13.420137  543705 net.go:648] Add success.
I0322 02:03:13.423497  543705 net.go:770] primary dev: ETH0
I0322 02:03:13.423512  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:03:13.423526  543705 net.go:698] Add success.
I0322 02:03:13.470213  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"959bef51-f101-459b-bb0e-7190fbd1933d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:03:13.470246  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:03:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:03:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:03:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 02:03:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:03:14.456570  543705 disk_worker.go:494] system disk:vda1
I0322 02:03:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:03:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:03:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:03:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:03:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:03:23.409801  543705 memory.go:184] no items to output this cycle
I0322 02:03:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 02:03:32.506223  543705 disk_info.go:125] begin check local disk info of client
I0322 02:03:32.508663  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:03:32.508669  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375280 0xc0003752c0]
E0322 02:03:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:03:33.409798  543705 memory.go:184] no items to output this cycle
I0322 02:03:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 02:03:39.444974  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:03:39.444982  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:03:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:03:43.410663  543705 memory.go:191] Add success.
I0322 02:03:43.409838  543705 cpu.go:282] Add success.
I0322 02:03:43.420408  543705 net.go:648] Add success.
I0322 02:03:43.423150  543705 net.go:770] primary dev: ETH0
I0322 02:03:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:03:43.423175  543705 net.go:698] Add success.
I0322 02:03:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:03:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:03:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:03:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:03:53.409805  543705 memory.go:184] no items to output this cycle
I0322 02:03:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 02:04:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:04:03.409817  543705 memory.go:184] no items to output this cycle
I0322 02:04:03.409827  543705 cpu.go:275] no items to output this cycle
E0322 02:04:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:04:13.409881  543705 memory.go:191] Add success.
W0322 02:04:13.409912  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:04:13.409925  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:04:13.409928  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:04:13.409929  543705 cpu.go:282] Add success.
I0322 02:04:13.419711  543705 net.go:648] Add success.
I0322 02:04:13.422258  543705 net.go:770] primary dev: ETH0
I0322 02:04:13.422273  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:04:13.422286  543705 net.go:698] Add success.
I0322 02:04:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:04:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:04:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 02:04:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:04:14.456550  543705 disk_worker.go:494] system disk:vda1
I0322 02:04:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:04:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:04:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:04:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:04:23.409803  543705 memory.go:184] no items to output this cycle
I0322 02:04:23.409817  543705 cpu.go:275] no items to output this cycle
I0322 02:04:32.509186  543705 disk_info.go:125] begin check local disk info of client
I0322 02:04:32.511733  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:04:32.511740  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d80 0xc0000c4dc0]
E0322 02:04:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:04:33.409791  543705 memory.go:184] no items to output this cycle
I0322 02:04:33.409807  543705 cpu.go:275] no items to output this cycle
E0322 02:04:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:04:43.409799  543705 cpu.go:282] Add success.
I0322 02:04:43.409805  543705 memory.go:191] Add success.
I0322 02:04:43.419909  543705 net.go:648] Add success.
I0322 02:04:43.422523  543705 net.go:770] primary dev: ETH0
I0322 02:04:43.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:04:43.422553  543705 net.go:698] Add success.
I0322 02:04:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:04:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:04:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:04:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:04:53.409766  543705 memory.go:184] no items to output this cycle
I0322 02:04:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 02:05:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:05:03.409804  543705 memory.go:184] no items to output this cycle
I0322 02:05:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 02:05:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:05:13.409775  543705 memory.go:191] Add success.
I0322 02:05:13.409796  543705 cpu.go:282] Add success.
W0322 02:05:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:05:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:05:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:05:13.420316  543705 net.go:648] Add success.
I0322 02:05:13.423141  543705 net.go:770] primary dev: ETH0
I0322 02:05:13.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:05:13.423182  543705 net.go:698] Add success.
I0322 02:05:14.454945  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:05:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:05:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 02:05:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:05:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 02:05:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:05:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:05:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:05:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:05:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:05:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:05:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:05:23.409794  543705 memory.go:184] no items to output this cycle
I0322 02:05:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 02:05:32.512202  543705 disk_info.go:125] begin check local disk info of client
I0322 02:05:32.514660  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:05:32.514666  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264500 0xc000264540]
E0322 02:05:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:05:33.409783  543705 memory.go:184] no items to output this cycle
I0322 02:05:33.409798  543705 cpu.go:275] no items to output this cycle
E0322 02:05:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:05:43.409821  543705 memory.go:191] Add success.
I0322 02:05:43.409827  543705 cpu.go:282] Add success.
I0322 02:05:43.419980  543705 net.go:648] Add success.
I0322 02:05:43.422773  543705 net.go:770] primary dev: ETH0
I0322 02:05:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:05:43.422798  543705 net.go:698] Add success.
I0322 02:05:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:05:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:05:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:05:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:05:53.409771  543705 memory.go:184] no items to output this cycle
I0322 02:05:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 02:06:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:06:03.409781  543705 memory.go:184] no items to output this cycle
I0322 02:06:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 02:06:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:06:13.409787  543705 cpu.go:282] Add success.
I0322 02:06:13.409795  543705 memory.go:191] Add success.
W0322 02:06:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:06:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:06:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:06:13.420196  543705 net.go:648] Add success.
I0322 02:06:13.422949  543705 net.go:770] primary dev: ETH0
I0322 02:06:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:06:13.422974  543705 net.go:698] Add success.
I0322 02:06:13.467564  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d434e1ab-c243-4988-97d3-89d53a887e98","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:06:13.467595  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:06:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:06:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:06:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 02:06:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:06:14.456670  543705 disk_worker.go:494] system disk:vda1
I0322 02:06:14.456697  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:06:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:06:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:06:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:06:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:06:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:06:23.409795  543705 memory.go:184] no items to output this cycle
I0322 02:06:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 02:06:32.515223  543705 disk_info.go:125] begin check local disk info of client
I0322 02:06:32.517694  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:06:32.517700  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc480 0xc0004dc4c0]
E0322 02:06:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:06:33.409784  543705 memory.go:184] no items to output this cycle
I0322 02:06:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 02:06:39.445985  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:06:39.445992  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:06:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:06:43.410752  543705 memory.go:191] Add success.
I0322 02:06:43.409809  543705 cpu.go:282] Add success.
I0322 02:06:43.420504  543705 net.go:648] Add success.
I0322 02:06:43.423323  543705 net.go:770] primary dev: ETH0
I0322 02:06:43.423339  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:06:43.423353  543705 net.go:698] Add success.
I0322 02:06:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:06:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:06:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:06:53.409802  543705 memory.go:184] no items to output this cycle
I0322 02:06:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 02:07:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:07:03.409781  543705 memory.go:184] no items to output this cycle
I0322 02:07:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 02:07:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:07:13.409781  543705 memory.go:191] Add success.
I0322 02:07:13.409784  543705 cpu.go:282] Add success.
W0322 02:07:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:07:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:07:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:07:13.420057  543705 net.go:648] Add success.
I0322 02:07:13.422626  543705 net.go:770] primary dev: ETH0
I0322 02:07:13.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:07:13.422651  543705 net.go:698] Add success.
I0322 02:07:13.452787  543705 event_worker.go:152] Polling the log file for events...
W0322 02:07:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:07:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 02:07:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:07:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:07:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:07:14.455894  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:07:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 02:07:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:07:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:07:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:07:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:07:16.457982  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:07:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:07:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:07:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:07:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:07:23.409768  543705 memory.go:184] no items to output this cycle
I0322 02:07:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 02:07:32.518224  543705 disk_info.go:125] begin check local disk info of client
I0322 02:07:32.520764  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:07:32.520770  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc900 0xc0004dc940]
E0322 02:07:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:07:33.409760  543705 memory.go:184] no items to output this cycle
I0322 02:07:33.409772  543705 cpu.go:275] no items to output this cycle
E0322 02:07:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:07:43.409796  543705 memory.go:191] Add success.
I0322 02:07:43.409797  543705 cpu.go:282] Add success.
I0322 02:07:43.419889  543705 net.go:648] Add success.
I0322 02:07:43.422831  543705 net.go:770] primary dev: ETH0
I0322 02:07:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:07:43.422857  543705 net.go:698] Add success.
I0322 02:07:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:07:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:07:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:07:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:07:53.409774  543705 memory.go:184] no items to output this cycle
I0322 02:07:53.409775  543705 cpu.go:275] no items to output this cycle
E0322 02:08:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:08:03.409768  543705 memory.go:184] no items to output this cycle
I0322 02:08:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 02:08:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:08:13.409805  543705 memory.go:191] Add success.
I0322 02:08:13.409814  543705 cpu.go:282] Add success.
W0322 02:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:08:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:08:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:08:13.420148  543705 net.go:648] Add success.
I0322 02:08:13.422991  543705 net.go:770] primary dev: ETH0
I0322 02:08:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:08:13.423026  543705 net.go:698] Add success.
I0322 02:08:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:08:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:08:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 02:08:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:08:14.456477  543705 disk_worker.go:494] system disk:vda1
I0322 02:08:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:08:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:08:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:08:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:08:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:08:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:08:23.409799  543705 memory.go:184] no items to output this cycle
I0322 02:08:23.409808  543705 cpu.go:275] no items to output this cycle
I0322 02:08:32.521244  543705 disk_info.go:125] begin check local disk info of client
I0322 02:08:32.523806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:08:32.523812  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c52c0 0xc0000c5300]
E0322 02:08:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:08:33.409792  543705 memory.go:184] no items to output this cycle
I0322 02:08:33.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:08:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:08:43.409791  543705 memory.go:191] Add success.
I0322 02:08:43.409818  543705 cpu.go:282] Add success.
I0322 02:08:43.420387  543705 net.go:648] Add success.
I0322 02:08:43.423070  543705 net.go:770] primary dev: ETH0
I0322 02:08:43.423083  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:08:43.423096  543705 net.go:698] Add success.
I0322 02:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:08:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:08:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:08:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:08:53.409787  543705 memory.go:184] no items to output this cycle
I0322 02:08:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 02:09:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:09:03.409799  543705 memory.go:184] no items to output this cycle
I0322 02:09:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:09:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:09:13.409776  543705 memory.go:191] Add success.
W0322 02:09:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 02:09:13.409801  543705 cpu.go:282] Add success.
W0322 02:09:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:09:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:09:13.420040  543705 net.go:648] Add success.
I0322 02:09:13.422920  543705 net.go:770] primary dev: ETH0
I0322 02:09:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:09:13.422946  543705 net.go:698] Add success.
I0322 02:09:13.540493  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6b5d3013-4eb3-4e57-935e-4ef5d68ffb6b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:09:13.540528  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:09:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:09:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:09:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 02:09:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:09:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 02:09:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:09:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:09:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:09:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:09:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:09:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:09:23.409786  543705 memory.go:184] no items to output this cycle
I0322 02:09:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 02:09:32.524267  543705 disk_info.go:125] begin check local disk info of client
I0322 02:09:32.526792  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:09:32.526800  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 02:09:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:09:33.409785  543705 memory.go:184] no items to output this cycle
I0322 02:09:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 02:09:39.446979  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:09:39.446987  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:09:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:09:43.410691  543705 memory.go:191] Add success.
I0322 02:09:43.409798  543705 cpu.go:282] Add success.
I0322 02:09:43.420393  543705 net.go:648] Add success.
I0322 02:09:43.423202  543705 net.go:770] primary dev: ETH0
I0322 02:09:43.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:09:43.423229  543705 net.go:698] Add success.
I0322 02:09:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:09:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:09:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:09:53.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:09:53.410259  543705 memory.go:184] no items to output this cycle
I0322 02:09:53.410264  543705 cpu.go:275] no items to output this cycle
E0322 02:10:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:10:03.409778  543705 memory.go:184] no items to output this cycle
I0322 02:10:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 02:10:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:10:13.409792  543705 cpu.go:282] Add success.
I0322 02:10:13.409794  543705 memory.go:191] Add success.
W0322 02:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:10:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:10:13.420046  543705 net.go:648] Add success.
I0322 02:10:13.422799  543705 net.go:770] primary dev: ETH0
I0322 02:10:13.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:10:13.422825  543705 net.go:698] Add success.
I0322 02:10:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:10:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:10:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 02:10:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:10:14.456485  543705 disk_worker.go:494] system disk:vda1
I0322 02:10:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:10:15.456015  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:10:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:10:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:10:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:10:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:10:23.409793  543705 memory.go:184] no items to output this cycle
I0322 02:10:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 02:10:32.527277  543705 disk_info.go:125] begin check local disk info of client
I0322 02:10:32.529765  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:10:32.529772  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8f40 0xc0003e8f80]
E0322 02:10:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:10:33.409777  543705 memory.go:184] no items to output this cycle
I0322 02:10:33.409798  543705 cpu.go:275] no items to output this cycle
E0322 02:10:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:10:43.409794  543705 memory.go:191] Add success.
I0322 02:10:43.409812  543705 cpu.go:282] Add success.
I0322 02:10:43.419923  543705 net.go:648] Add success.
I0322 02:10:43.422505  543705 net.go:770] primary dev: ETH0
I0322 02:10:43.422518  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:10:43.422531  543705 net.go:698] Add success.
I0322 02:10:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:10:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:10:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:10:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:10:53.409765  543705 memory.go:184] no items to output this cycle
I0322 02:10:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 02:11:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:11:03.409780  543705 memory.go:184] no items to output this cycle
I0322 02:11:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 02:11:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:11:13.409786  543705 memory.go:191] Add success.
I0322 02:11:13.409796  543705 cpu.go:282] Add success.
W0322 02:11:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:11:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:11:13.420059  543705 net.go:648] Add success.
I0322 02:11:13.422693  543705 net.go:770] primary dev: ETH0
I0322 02:11:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:11:13.422720  543705 net.go:698] Add success.
I0322 02:11:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:11:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:11:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 02:11:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:11:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 02:11:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:11:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:11:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:11:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:11:23.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:11:23.409899  543705 memory.go:184] no items to output this cycle
I0322 02:11:23.409977  543705 cpu.go:275] no items to output this cycle
I0322 02:11:32.530291  543705 disk_info.go:125] begin check local disk info of client
I0322 02:11:32.532764  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:11:32.532771  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d97c0 0xc0004d9800]
E0322 02:11:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:11:33.409777  543705 memory.go:184] no items to output this cycle
I0322 02:11:33.409805  543705 cpu.go:275] no items to output this cycle
E0322 02:11:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:11:43.409822  543705 memory.go:191] Add success.
I0322 02:11:43.409835  543705 cpu.go:282] Add success.
I0322 02:11:43.419964  543705 net.go:648] Add success.
I0322 02:11:43.422552  543705 net.go:770] primary dev: ETH0
I0322 02:11:43.422567  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:11:43.422582  543705 net.go:698] Add success.
I0322 02:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:11:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:11:53.409776  543705 memory.go:184] no items to output this cycle
I0322 02:11:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 02:12:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:12:03.409806  543705 memory.go:184] no items to output this cycle
I0322 02:12:03.409835  543705 cpu.go:275] no items to output this cycle
E0322 02:12:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:12:13.409789  543705 memory.go:191] Add success.
I0322 02:12:13.409795  543705 cpu.go:282] Add success.
W0322 02:12:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:12:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:12:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:12:13.420274  543705 net.go:648] Add success.
I0322 02:12:13.423144  543705 net.go:770] primary dev: ETH0
I0322 02:12:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:12:13.423173  543705 net.go:698] Add success.
I0322 02:12:13.469695  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1be94b44-1423-467a-8bb3-5374ecf71b18","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:12:13.469729  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 02:12:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:12:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 02:12:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:12:14.455897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:12:14.455905  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:12:14.455911  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:12:14.456722  543705 disk_worker.go:494] system disk:vda1
I0322 02:12:14.456763  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:12:15.456776  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:12:15.456784  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:12:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:12:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:12:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:12:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:12:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:12:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:12:23.409863  543705 memory.go:184] no items to output this cycle
I0322 02:12:23.409930  543705 cpu.go:275] no items to output this cycle
I0322 02:12:32.533310  543705 disk_info.go:125] begin check local disk info of client
I0322 02:12:32.535809  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:12:32.535816  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0322 02:12:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:12:33.409787  543705 memory.go:184] no items to output this cycle
I0322 02:12:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 02:12:39.447992  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:12:39.448000  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:12:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:12:43.410556  543705 memory.go:191] Add success.
I0322 02:12:43.409807  543705 cpu.go:282] Add success.
I0322 02:12:43.420284  543705 net.go:648] Add success.
I0322 02:12:43.422975  543705 net.go:770] primary dev: ETH0
I0322 02:12:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:12:43.423004  543705 net.go:698] Add success.
I0322 02:12:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:12:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:12:53.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:12:53.410262  543705 memory.go:184] no items to output this cycle
I0322 02:12:53.410268  543705 cpu.go:275] no items to output this cycle
E0322 02:13:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:13:03.409782  543705 memory.go:184] no items to output this cycle
I0322 02:13:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 02:13:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:13:13.409808  543705 memory.go:191] Add success.
I0322 02:13:13.409814  543705 cpu.go:282] Add success.
W0322 02:13:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:13:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:13:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:13:13.420083  543705 net.go:648] Add success.
I0322 02:13:13.422916  543705 net.go:770] primary dev: ETH0
I0322 02:13:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:13:13.422941  543705 net.go:698] Add success.
I0322 02:13:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:13:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:13:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 02:13:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:13:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 02:13:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:13:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:13:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:13:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:13:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:13:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:13:23.409800  543705 memory.go:184] no items to output this cycle
I0322 02:13:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 02:13:32.535905  543705 disk_info.go:125] begin check local disk info of client
I0322 02:13:32.538405  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:13:32.538411  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6840 0xc0003e6880]
E0322 02:13:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:13:33.409795  543705 memory.go:184] no items to output this cycle
I0322 02:13:33.409812  543705 cpu.go:275] no items to output this cycle
E0322 02:13:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:13:43.409789  543705 memory.go:191] Add success.
I0322 02:13:43.409809  543705 cpu.go:282] Add success.
I0322 02:13:43.419968  543705 net.go:648] Add success.
I0322 02:13:43.423851  543705 net.go:770] primary dev: ETH0
I0322 02:13:43.423863  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:13:43.423875  543705 net.go:698] Add success.
I0322 02:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:13:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:13:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:13:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:13:53.409814  543705 memory.go:184] no items to output this cycle
I0322 02:13:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 02:14:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:14:03.409784  543705 memory.go:184] no items to output this cycle
I0322 02:14:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 02:14:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:14:13.409786  543705 memory.go:191] Add success.
I0322 02:14:13.409786  543705 cpu.go:282] Add success.
W0322 02:14:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:14:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:14:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:14:13.420157  543705 net.go:648] Add success.
I0322 02:14:13.423039  543705 net.go:770] primary dev: ETH0
I0322 02:14:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:14:13.423063  543705 net.go:698] Add success.
I0322 02:14:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:14:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:14:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 02:14:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:14:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 02:14:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:14:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:14:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:14:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:14:23.410407  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:14:23.410424  543705 memory.go:184] no items to output this cycle
I0322 02:14:23.410438  543705 cpu.go:275] no items to output this cycle
I0322 02:14:32.539381  543705 disk_info.go:125] begin check local disk info of client
I0322 02:14:32.541888  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:14:32.541895  543705 disk_info.go:196] parse disk info done, disk is : [0xc000558a00 0xc000558a40]
E0322 02:14:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:14:33.409905  543705 memory.go:184] no items to output this cycle
I0322 02:14:33.410051  543705 cpu.go:275] no items to output this cycle
E0322 02:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:14:43.409806  543705 memory.go:191] Add success.
I0322 02:14:43.409827  543705 cpu.go:282] Add success.
I0322 02:14:43.420221  543705 net.go:648] Add success.
I0322 02:14:43.422858  543705 net.go:770] primary dev: ETH0
I0322 02:14:43.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:14:43.422884  543705 net.go:698] Add success.
I0322 02:14:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:14:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:14:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:14:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:14:53.409783  543705 memory.go:184] no items to output this cycle
I0322 02:14:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 02:15:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:15:03.409806  543705 memory.go:184] no items to output this cycle
I0322 02:15:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 02:15:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:15:13.409774  543705 memory.go:191] Add success.
W0322 02:15:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 02:15:13.409805  543705 cpu.go:282] Add success.
W0322 02:15:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:15:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:15:13.420131  543705 net.go:648] Add success.
I0322 02:15:13.422949  543705 net.go:770] primary dev: ETH0
I0322 02:15:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:15:13.422974  543705 net.go:698] Add success.
I0322 02:15:13.464028  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"450211a7-dc42-4f04-9ca5-f8c7871e0e23","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:15:13.464061  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:15:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:15:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 02:15:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:15:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 02:15:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:15:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:15:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:15:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:15:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:15:23.409797  543705 memory.go:184] no items to output this cycle
I0322 02:15:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 02:15:32.542354  543705 disk_info.go:125] begin check local disk info of client
I0322 02:15:32.544857  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:15:32.544864  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047fd80 0xc00047fdc0]
E0322 02:15:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:15:33.409783  543705 memory.go:184] no items to output this cycle
I0322 02:15:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 02:15:39.448995  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:15:39.449002  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:15:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:15:43.410753  543705 memory.go:191] Add success.
I0322 02:15:43.409826  543705 cpu.go:282] Add success.
I0322 02:15:43.420458  543705 net.go:648] Add success.
I0322 02:15:43.423276  543705 net.go:770] primary dev: ETH0
I0322 02:15:43.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:15:43.423301  543705 net.go:698] Add success.
I0322 02:15:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:15:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:15:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:15:53.409803  543705 memory.go:184] no items to output this cycle
I0322 02:15:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 02:16:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:16:03.409796  543705 memory.go:184] no items to output this cycle
I0322 02:16:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:16:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:16:13.409787  543705 memory.go:191] Add success.
W0322 02:16:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:16:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:16:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:16:13.409828  543705 cpu.go:282] Add success.
I0322 02:16:13.420057  543705 net.go:648] Add success.
I0322 02:16:13.422785  543705 net.go:770] primary dev: ETH0
I0322 02:16:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:16:13.422809  543705 net.go:698] Add success.
I0322 02:16:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:16:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:16:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 02:16:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:16:14.456480  543705 disk_worker.go:494] system disk:vda1
I0322 02:16:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:16:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:16:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:16:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:16:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:16:23.409772  543705 memory.go:184] no items to output this cycle
I0322 02:16:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 02:16:32.545370  543705 disk_info.go:125] begin check local disk info of client
I0322 02:16:32.547888  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:16:32.547897  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004730c0 0xc000473100]
E0322 02:16:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:16:33.409791  543705 memory.go:184] no items to output this cycle
I0322 02:16:33.409806  543705 cpu.go:275] no items to output this cycle
E0322 02:16:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:16:43.409929  543705 memory.go:191] Add success.
I0322 02:16:43.409956  543705 cpu.go:282] Add success.
I0322 02:16:43.419744  543705 net.go:648] Add success.
I0322 02:16:43.422289  543705 net.go:770] primary dev: ETH0
I0322 02:16:43.422303  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:16:43.422314  543705 net.go:698] Add success.
I0322 02:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:16:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:16:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:16:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:16:53.409813  543705 memory.go:184] no items to output this cycle
I0322 02:16:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 02:17:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:17:03.409804  543705 memory.go:184] no items to output this cycle
I0322 02:17:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 02:17:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:17:13.409823  543705 memory.go:191] Add success.
I0322 02:17:13.409836  543705 cpu.go:282] Add success.
W0322 02:17:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:17:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:17:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:17:13.420042  543705 net.go:648] Add success.
I0322 02:17:13.422720  543705 net.go:770] primary dev: ETH0
I0322 02:17:13.422734  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:17:13.422745  543705 net.go:698] Add success.
I0322 02:17:13.453295  543705 event_worker.go:152] Polling the log file for events...
W0322 02:17:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:17:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 02:17:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:17:14.456802  543705 disk_worker.go:494] system disk:vda1
I0322 02:17:14.456841  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:17:14.457083  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:17:14.457091  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:17:14.457096  543705 custom_config.go:64] query custom config with name: gpu
E0322 02:17:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:17:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:17:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:17:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:17:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:17:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:17:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:17:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:17:23.409784  543705 memory.go:184] no items to output this cycle
I0322 02:17:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 02:17:32.548382  543705 disk_info.go:125] begin check local disk info of client
I0322 02:17:32.550879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:17:32.550886  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1980 0xc0002b19c0]
E0322 02:17:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:17:33.409789  543705 memory.go:184] no items to output this cycle
I0322 02:17:33.409809  543705 cpu.go:275] no items to output this cycle
E0322 02:17:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:17:43.409803  543705 memory.go:191] Add success.
I0322 02:17:43.409808  543705 cpu.go:282] Add success.
I0322 02:17:43.419898  543705 net.go:648] Add success.
I0322 02:17:43.422430  543705 net.go:770] primary dev: ETH0
I0322 02:17:43.422444  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:17:43.422459  543705 net.go:698] Add success.
I0322 02:17:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:17:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:17:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:17:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:17:53.409786  543705 memory.go:184] no items to output this cycle
I0322 02:17:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 02:18:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:18:03.409805  543705 memory.go:184] no items to output this cycle
I0322 02:18:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 02:18:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:18:13.409781  543705 memory.go:191] Add success.
I0322 02:18:13.409787  543705 cpu.go:282] Add success.
W0322 02:18:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:18:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:18:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:18:13.420076  543705 net.go:648] Add success.
I0322 02:18:13.422735  543705 net.go:770] primary dev: ETH0
I0322 02:18:13.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:18:13.422763  543705 net.go:698] Add success.
I0322 02:18:13.467979  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"512b48f8-d40b-47d9-902a-8f0b136aa661","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:18:13.468024  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:18:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:18:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:18:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 02:18:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:18:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 02:18:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:18:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:18:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:18:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:18:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:18:23.409772  543705 memory.go:184] no items to output this cycle
I0322 02:18:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 02:18:32.551402  543705 disk_info.go:125] begin check local disk info of client
I0322 02:18:32.553859  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:18:32.553865  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470700 0xc000470740]
E0322 02:18:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:18:33.409781  543705 memory.go:184] no items to output this cycle
I0322 02:18:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 02:18:39.449995  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:18:39.450002  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:18:43.410622  543705 memory.go:191] Add success.
I0322 02:18:43.409807  543705 cpu.go:282] Add success.
I0322 02:18:43.420314  543705 net.go:648] Add success.
I0322 02:18:43.423062  543705 net.go:770] primary dev: ETH0
I0322 02:18:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:18:43.423091  543705 net.go:698] Add success.
I0322 02:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:18:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:18:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:18:53.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:18:53.409891  543705 memory.go:184] no items to output this cycle
I0322 02:18:53.409963  543705 cpu.go:275] no items to output this cycle
E0322 02:19:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:19:03.409770  543705 memory.go:184] no items to output this cycle
I0322 02:19:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 02:19:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:19:13.409805  543705 memory.go:191] Add success.
I0322 02:19:13.409816  543705 cpu.go:282] Add success.
W0322 02:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:19:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:19:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:19:13.420132  543705 net.go:648] Add success.
I0322 02:19:13.422761  543705 net.go:770] primary dev: ETH0
I0322 02:19:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:19:13.422789  543705 net.go:698] Add success.
I0322 02:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:19:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:19:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 02:19:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:19:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 02:19:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:19:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:19:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:19:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:19:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:19:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:19:23.409815  543705 memory.go:184] no items to output this cycle
I0322 02:19:23.409831  543705 cpu.go:275] no items to output this cycle
I0322 02:19:32.554410  543705 disk_info.go:125] begin check local disk info of client
I0322 02:19:32.556896  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:19:32.556903  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e95c0 0xc0003e9600]
E0322 02:19:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:19:33.409774  543705 memory.go:184] no items to output this cycle
I0322 02:19:33.409785  543705 cpu.go:275] no items to output this cycle
E0322 02:19:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:19:43.409796  543705 memory.go:191] Add success.
I0322 02:19:43.409820  543705 cpu.go:282] Add success.
I0322 02:19:43.419782  543705 net.go:770] primary dev: ETH0
I0322 02:19:43.419796  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:19:43.419810  543705 net.go:698] Add success.
I0322 02:19:43.420039  543705 net.go:648] Add success.
I0322 02:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:19:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:19:53.410382  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:19:53.410403  543705 memory.go:184] no items to output this cycle
I0322 02:19:53.410413  543705 cpu.go:275] no items to output this cycle
E0322 02:20:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:20:03.409784  543705 memory.go:184] no items to output this cycle
I0322 02:20:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 02:20:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:20:13.409781  543705 memory.go:191] Add success.
I0322 02:20:13.409782  543705 cpu.go:282] Add success.
W0322 02:20:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:20:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:20:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:20:13.420168  543705 net.go:648] Add success.
I0322 02:20:13.422855  543705 net.go:770] primary dev: ETH0
I0322 02:20:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:20:13.422880  543705 net.go:698] Add success.
I0322 02:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:20:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:20:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0322 02:20:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:20:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 02:20:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:20:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:20:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:20:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:20:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:20:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:20:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:20:23.409786  543705 memory.go:184] no items to output this cycle
I0322 02:20:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 02:20:32.557430  543705 disk_info.go:125] begin check local disk info of client
I0322 02:20:32.559902  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:20:32.559908  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd1c0 0xc0002bd200]
E0322 02:20:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:20:33.409789  543705 memory.go:184] no items to output this cycle
I0322 02:20:33.409804  543705 cpu.go:275] no items to output this cycle
E0322 02:20:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:20:43.409787  543705 memory.go:191] Add success.
I0322 02:20:43.409814  543705 cpu.go:282] Add success.
I0322 02:20:43.419866  543705 net.go:648] Add success.
I0322 02:20:43.422380  543705 net.go:770] primary dev: ETH0
I0322 02:20:43.422393  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:20:43.422406  543705 net.go:698] Add success.
I0322 02:20:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:20:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:20:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:20:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:20:53.409795  543705 memory.go:184] no items to output this cycle
I0322 02:20:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 02:21:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:21:03.409790  543705 memory.go:184] no items to output this cycle
I0322 02:21:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 02:21:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:21:13.409796  543705 memory.go:191] Add success.
I0322 02:21:13.409797  543705 cpu.go:282] Add success.
W0322 02:21:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:21:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:21:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:21:13.420132  543705 net.go:648] Add success.
I0322 02:21:13.422873  543705 net.go:770] primary dev: ETH0
I0322 02:21:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:21:13.422899  543705 net.go:698] Add success.
I0322 02:21:13.521050  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eacbd838-18b6-4161-8a3c-989f5068907e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:21:13.521091  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:21:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:21:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:21:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 02:21:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:21:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 02:21:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:21:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:21:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:21:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:21:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:21:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:21:23.409771  543705 memory.go:184] no items to output this cycle
I0322 02:21:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 02:21:32.560462  543705 disk_info.go:125] begin check local disk info of client
I0322 02:21:32.562936  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:21:32.562942  543705 disk_info.go:196] parse disk info done, disk is : [0xc00023e740 0xc00023e780]
E0322 02:21:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:21:33.409801  543705 memory.go:184] no items to output this cycle
I0322 02:21:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 02:21:39.451010  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:21:39.451017  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:21:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:21:43.410724  543705 memory.go:191] Add success.
I0322 02:21:43.409825  543705 cpu.go:282] Add success.
I0322 02:21:43.420430  543705 net.go:648] Add success.
I0322 02:21:43.423112  543705 net.go:770] primary dev: ETH0
I0322 02:21:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:21:43.423139  543705 net.go:698] Add success.
I0322 02:21:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:21:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:21:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:21:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:21:53.409809  543705 memory.go:184] no items to output this cycle
I0322 02:21:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 02:22:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:22:03.409779  543705 memory.go:184] no items to output this cycle
I0322 02:22:03.409794  543705 cpu.go:275] no items to output this cycle
W0322 02:22:13.409700  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:22:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:22:13.409719  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 02:22:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:22:13.409806  543705 memory.go:191] Add success.
I0322 02:22:13.409826  543705 cpu.go:282] Add success.
I0322 02:22:13.420117  543705 net.go:648] Add success.
I0322 02:22:13.422678  543705 net.go:770] primary dev: ETH0
I0322 02:22:13.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:22:13.422703  543705 net.go:698] Add success.
W0322 02:22:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:22:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 02:22:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:22:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:22:14.455898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:22:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:22:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 02:22:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:22:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:22:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:22:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:22:16.457967  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:22:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:22:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:22:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:22:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:22:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 02:22:23.409787  543705 memory.go:184] no items to output this cycle
I0322 02:22:32.563458  543705 disk_info.go:125] begin check local disk info of client
I0322 02:22:32.565973  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:22:32.565980  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264a00 0xc000264a40]
E0322 02:22:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:22:33.409773  543705 memory.go:184] no items to output this cycle
I0322 02:22:33.409788  543705 cpu.go:275] no items to output this cycle
E0322 02:22:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:22:43.409802  543705 memory.go:191] Add success.
I0322 02:22:43.409804  543705 cpu.go:282] Add success.
I0322 02:22:43.419885  543705 net.go:648] Add success.
I0322 02:22:43.422408  543705 net.go:770] primary dev: ETH0
I0322 02:22:43.422421  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:22:43.422434  543705 net.go:698] Add success.
I0322 02:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:22:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:22:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:22:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:22:53.409780  543705 memory.go:184] no items to output this cycle
I0322 02:22:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 02:23:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:23:03.409783  543705 memory.go:184] no items to output this cycle
I0322 02:23:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 02:23:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:23:13.409785  543705 memory.go:191] Add success.
I0322 02:23:13.409786  543705 cpu.go:282] Add success.
W0322 02:23:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:23:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:23:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:23:13.420045  543705 net.go:648] Add success.
I0322 02:23:13.422694  543705 net.go:770] primary dev: ETH0
I0322 02:23:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:23:13.422723  543705 net.go:698] Add success.
I0322 02:23:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:23:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:23:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 02:23:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:23:14.456679  543705 disk_worker.go:494] system disk:vda1
I0322 02:23:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:23:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:23:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:23:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:23:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:23:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:23:23.409765  543705 memory.go:184] no items to output this cycle
I0322 02:23:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 02:23:32.566469  543705 disk_info.go:125] begin check local disk info of client
I0322 02:23:32.568941  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:23:32.568947  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265900 0xc000265940]
E0322 02:23:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:23:33.409785  543705 memory.go:184] no items to output this cycle
I0322 02:23:33.409801  543705 cpu.go:275] no items to output this cycle
E0322 02:23:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:23:43.409822  543705 memory.go:191] Add success.
I0322 02:23:43.409826  543705 cpu.go:282] Add success.
I0322 02:23:43.420002  543705 net.go:648] Add success.
I0322 02:23:43.422984  543705 net.go:770] primary dev: ETH0
I0322 02:23:43.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:23:43.423010  543705 net.go:698] Add success.
I0322 02:23:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:23:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:23:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:23:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:23:53.409793  543705 memory.go:184] no items to output this cycle
I0322 02:23:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 02:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:24:03.409780  543705 memory.go:184] no items to output this cycle
I0322 02:24:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 02:24:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:24:13.409777  543705 memory.go:191] Add success.
W0322 02:24:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:24:13.409915  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:24:13.409919  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:24:13.409948  543705 cpu.go:282] Add success.
I0322 02:24:13.419712  543705 net.go:648] Add success.
I0322 02:24:13.422583  543705 net.go:770] primary dev: ETH0
I0322 02:24:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:24:13.422607  543705 net.go:698] Add success.
I0322 02:24:13.468785  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ed69965a-c523-47c9-899d-170a67161a2c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:24:13.468816  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:24:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:24:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:24:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 02:24:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:24:14.456696  543705 disk_worker.go:494] system disk:vda1
I0322 02:24:14.456735  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:24:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:24:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:24:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:24:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:24:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:24:23.409777  543705 memory.go:184] no items to output this cycle
I0322 02:24:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 02:24:32.569490  543705 disk_info.go:125] begin check local disk info of client
I0322 02:24:32.571991  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:24:32.571997  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312080 0xc0003120c0]
E0322 02:24:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:24:33.409781  543705 memory.go:184] no items to output this cycle
I0322 02:24:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 02:24:39.452008  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:24:39.452015  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:24:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:24:43.410633  543705 memory.go:191] Add success.
I0322 02:24:43.409815  543705 cpu.go:282] Add success.
I0322 02:24:43.420412  543705 net.go:648] Add success.
I0322 02:24:43.423360  543705 net.go:770] primary dev: ETH0
I0322 02:24:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:24:43.423395  543705 net.go:698] Add success.
I0322 02:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:24:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:24:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:24:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:24:53.409787  543705 memory.go:184] no items to output this cycle
I0322 02:24:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 02:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:25:03.409800  543705 memory.go:184] no items to output this cycle
I0322 02:25:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 02:25:13.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:25:13.409875  543705 memory.go:191] Add success.
W0322 02:25:13.409905  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:25:13.409917  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:25:13.409924  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:25:13.409955  543705 cpu.go:282] Add success.
I0322 02:25:13.419729  543705 net.go:648] Add success.
I0322 02:25:13.422267  543705 net.go:770] primary dev: ETH0
I0322 02:25:13.422282  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:25:13.422295  543705 net.go:698] Add success.
I0322 02:25:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:25:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:25:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 02:25:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:25:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 02:25:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:25:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:25:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:25:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:25:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:25:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:25:23.409784  543705 memory.go:184] no items to output this cycle
I0322 02:25:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 02:25:32.572503  543705 disk_info.go:125] begin check local disk info of client
I0322 02:25:32.575015  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:25:32.575022  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ddb40 0xc0004ddb80]
E0322 02:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:25:33.409783  543705 memory.go:184] no items to output this cycle
I0322 02:25:33.409799  543705 cpu.go:275] no items to output this cycle
E0322 02:25:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:25:43.409798  543705 memory.go:191] Add success.
I0322 02:25:43.409823  543705 cpu.go:282] Add success.
I0322 02:25:43.419981  543705 net.go:648] Add success.
I0322 02:25:43.422767  543705 net.go:770] primary dev: ETH0
I0322 02:25:43.422782  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:25:43.422797  543705 net.go:698] Add success.
I0322 02:25:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:25:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:25:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:25:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:25:53.409791  543705 memory.go:184] no items to output this cycle
I0322 02:25:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 02:26:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:26:03.409780  543705 memory.go:184] no items to output this cycle
I0322 02:26:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 02:26:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:26:13.409811  543705 memory.go:191] Add success.
I0322 02:26:13.409818  543705 cpu.go:282] Add success.
W0322 02:26:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:26:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:26:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:26:13.420332  543705 net.go:648] Add success.
I0322 02:26:13.423166  543705 net.go:770] primary dev: ETH0
I0322 02:26:13.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:26:13.423189  543705 net.go:698] Add success.
I0322 02:26:14.454946  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:26:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:26:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 02:26:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:26:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 02:26:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:26:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:26:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:26:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:26:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:26:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:26:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:26:23.409798  543705 memory.go:184] no items to output this cycle
I0322 02:26:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 02:26:32.575519  543705 disk_info.go:125] begin check local disk info of client
I0322 02:26:32.578017  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:26:32.578023  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dd600 0xc0004dd640]
E0322 02:26:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:26:33.409785  543705 memory.go:184] no items to output this cycle
I0322 02:26:33.409801  543705 cpu.go:275] no items to output this cycle
E0322 02:26:43.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:26:43.409842  543705 memory.go:191] Add success.
I0322 02:26:43.409845  543705 cpu.go:282] Add success.
I0322 02:26:43.420209  543705 net.go:648] Add success.
I0322 02:26:43.422727  543705 net.go:770] primary dev: ETH0
I0322 02:26:43.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:26:43.422751  543705 net.go:698] Add success.
I0322 02:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:26:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:26:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:26:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:26:53.409792  543705 memory.go:184] no items to output this cycle
I0322 02:26:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 02:27:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:27:03.409777  543705 memory.go:184] no items to output this cycle
I0322 02:27:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 02:27:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:27:13.409809  543705 memory.go:191] Add success.
I0322 02:27:13.409820  543705 cpu.go:282] Add success.
W0322 02:27:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:27:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:27:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:27:13.420166  543705 net.go:648] Add success.
I0322 02:27:13.429618  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 02:27:13.429776  543705 net.go:770] primary dev: ETH0
I0322 02:27:13.429789  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:27:13.429802  543705 net.go:698] Add success.
I0322 02:27:13.453548  543705 event_worker.go:152] Polling the log file for events...
I0322 02:27:13.470857  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4295f0b6-b004-4716-81db-4c5d81f21085","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:27:13.470887  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 02:27:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:27:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 02:27:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:27:14.456132  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:27:14.456141  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:27:14.456147  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:27:14.456498  543705 disk_worker.go:494] system disk:vda1
I0322 02:27:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:27:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:27:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:27:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:27:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:27:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:27:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:27:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:27:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:27:23.409776  543705 memory.go:184] no items to output this cycle
I0322 02:27:23.409777  543705 cpu.go:275] no items to output this cycle
I0322 02:27:32.578522  543705 disk_info.go:125] begin check local disk info of client
I0322 02:27:32.581016  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:27:32.581023  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9000 0xc0004d9040]
E0322 02:27:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:27:33.409772  543705 cpu.go:275] no items to output this cycle
I0322 02:27:33.409778  543705 memory.go:184] no items to output this cycle
I0322 02:27:39.453024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:27:39.453032  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:27:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:27:43.410640  543705 memory.go:191] Add success.
I0322 02:27:43.409815  543705 cpu.go:282] Add success.
I0322 02:27:43.420333  543705 net.go:648] Add success.
I0322 02:27:43.422988  543705 net.go:770] primary dev: ETH0
I0322 02:27:43.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:27:43.423015  543705 net.go:698] Add success.
I0322 02:27:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:27:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:27:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:27:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:27:53.409774  543705 cpu.go:275] no items to output this cycle
I0322 02:27:53.409785  543705 memory.go:184] no items to output this cycle
E0322 02:28:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:28:03.409796  543705 memory.go:184] no items to output this cycle
I0322 02:28:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 02:28:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:28:13.409884  543705 cpu.go:282] Add success.
I0322 02:28:13.409895  543705 memory.go:191] Add success.
W0322 02:28:13.409926  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:28:13.409939  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:28:13.409944  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:28:13.419728  543705 net.go:648] Add success.
I0322 02:28:13.422602  543705 net.go:770] primary dev: ETH0
I0322 02:28:13.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:28:13.422627  543705 net.go:698] Add success.
I0322 02:28:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:28:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:28:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 02:28:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:28:14.456540  543705 disk_worker.go:494] system disk:vda1
I0322 02:28:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:28:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:28:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:28:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:28:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:28:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:28:23.409795  543705 memory.go:184] no items to output this cycle
I0322 02:28:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 02:28:32.581548  543705 disk_info.go:125] begin check local disk info of client
I0322 02:28:32.583995  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:28:32.584001  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471800 0xc000471840]
E0322 02:28:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:28:33.409774  543705 memory.go:184] no items to output this cycle
I0322 02:28:33.409793  543705 cpu.go:275] no items to output this cycle
E0322 02:28:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:28:43.409803  543705 cpu.go:282] Add success.
I0322 02:28:43.409811  543705 memory.go:191] Add success.
I0322 02:28:43.419940  543705 net.go:648] Add success.
I0322 02:28:43.422590  543705 net.go:770] primary dev: ETH0
I0322 02:28:43.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:28:43.422615  543705 net.go:698] Add success.
I0322 02:28:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:28:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:28:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:28:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:28:53.409801  543705 memory.go:184] no items to output this cycle
I0322 02:28:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 02:29:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:29:03.409805  543705 memory.go:184] no items to output this cycle
I0322 02:29:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 02:29:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:29:13.409891  543705 cpu.go:282] Add success.
I0322 02:29:13.409905  543705 memory.go:191] Add success.
W0322 02:29:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:29:13.409979  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:29:13.409982  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:29:13.419755  543705 net.go:648] Add success.
I0322 02:29:13.422294  543705 net.go:770] primary dev: ETH0
I0322 02:29:13.422306  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:29:13.422317  543705 net.go:698] Add success.
I0322 02:29:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:29:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:29:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 02:29:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:29:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 02:29:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:29:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:29:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:29:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:29:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:29:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:29:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:29:23.409810  543705 memory.go:184] no items to output this cycle
I0322 02:29:23.409818  543705 cpu.go:275] no items to output this cycle
I0322 02:29:32.584591  543705 disk_info.go:125] begin check local disk info of client
I0322 02:29:32.587103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:29:32.587109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc5c0 0xc0004dc600]
E0322 02:29:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:29:33.409778  543705 memory.go:184] no items to output this cycle
I0322 02:29:33.409794  543705 cpu.go:275] no items to output this cycle
E0322 02:29:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:29:43.409800  543705 cpu.go:282] Add success.
I0322 02:29:43.409808  543705 memory.go:191] Add success.
I0322 02:29:43.419874  543705 net.go:648] Add success.
I0322 02:29:43.423182  543705 net.go:770] primary dev: ETH0
I0322 02:29:43.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:29:43.423208  543705 net.go:698] Add success.
I0322 02:29:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:29:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:29:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:29:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:29:53.409776  543705 memory.go:184] no items to output this cycle
I0322 02:29:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 02:30:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:30:03.409793  543705 memory.go:184] no items to output this cycle
I0322 02:30:03.409806  543705 cpu.go:275] no items to output this cycle
W0322 02:30:13.409699  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:30:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:30:13.409720  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 02:30:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:30:13.409811  543705 memory.go:191] Add success.
I0322 02:30:13.409822  543705 cpu.go:282] Add success.
I0322 02:30:13.420240  543705 net.go:648] Add success.
I0322 02:30:13.423086  543705 net.go:770] primary dev: ETH0
I0322 02:30:13.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:30:13.423110  543705 net.go:698] Add success.
I0322 02:30:13.467758  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c0e5ce6-d0fc-4db8-81fb-2050a0338b09","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:30:13.467789  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:30:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:30:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:30:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 02:30:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:30:14.456693  543705 disk_worker.go:494] system disk:vda1
I0322 02:30:14.456730  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:30:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:30:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:30:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:30:23.409769  543705 memory.go:184] no items to output this cycle
I0322 02:30:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 02:30:32.587576  543705 disk_info.go:125] begin check local disk info of client
I0322 02:30:32.590077  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:30:32.590083  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc680 0xc0004dc6c0]
E0322 02:30:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:30:33.409781  543705 memory.go:184] no items to output this cycle
I0322 02:30:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 02:30:39.454005  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:30:39.454013  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:30:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:30:43.410781  543705 memory.go:191] Add success.
I0322 02:30:43.409859  543705 cpu.go:282] Add success.
I0322 02:30:43.420459  543705 net.go:648] Add success.
I0322 02:30:43.423151  543705 net.go:770] primary dev: ETH0
I0322 02:30:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:30:43.423178  543705 net.go:698] Add success.
I0322 02:30:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:30:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:30:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:30:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:30:53.409777  543705 memory.go:184] no items to output this cycle
I0322 02:30:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 02:31:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:31:03.409776  543705 memory.go:184] no items to output this cycle
I0322 02:31:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 02:31:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:31:13.409781  543705 memory.go:191] Add success.
I0322 02:31:13.409805  543705 cpu.go:282] Add success.
W0322 02:31:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:31:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:31:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:31:13.420278  543705 net.go:648] Add success.
I0322 02:31:13.423428  543705 net.go:770] primary dev: ETH0
I0322 02:31:13.423444  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:31:13.423457  543705 net.go:698] Add success.
I0322 02:31:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:31:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:31:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 02:31:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:31:14.456506  543705 disk_worker.go:494] system disk:vda1
I0322 02:31:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:31:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:31:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:31:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:31:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:31:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:31:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:31:23.409793  543705 memory.go:184] no items to output this cycle
I0322 02:31:23.409792  543705 cpu.go:275] no items to output this cycle
I0322 02:31:32.590583  543705 disk_info.go:125] begin check local disk info of client
I0322 02:31:32.593056  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:31:32.593062  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004701c0 0xc000470200]
E0322 02:31:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:31:33.409789  543705 memory.go:184] no items to output this cycle
I0322 02:31:33.409802  543705 cpu.go:275] no items to output this cycle
E0322 02:31:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:31:43.409794  543705 memory.go:191] Add success.
I0322 02:31:43.409817  543705 cpu.go:282] Add success.
I0322 02:31:43.419946  543705 net.go:648] Add success.
I0322 02:31:43.422624  543705 net.go:770] primary dev: ETH0
I0322 02:31:43.422638  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:31:43.422650  543705 net.go:698] Add success.
I0322 02:31:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:31:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:31:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:31:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:31:53.409760  543705 memory.go:184] no items to output this cycle
I0322 02:31:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 02:32:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:32:03.409762  543705 memory.go:184] no items to output this cycle
I0322 02:32:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 02:32:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:32:13.409781  543705 memory.go:191] Add success.
I0322 02:32:13.409798  543705 cpu.go:282] Add success.
W0322 02:32:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:32:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:32:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:32:13.420113  543705 net.go:648] Add success.
I0322 02:32:13.422938  543705 net.go:770] primary dev: ETH0
I0322 02:32:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:32:13.422964  543705 net.go:698] Add success.
W0322 02:32:14.455323  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:32:14.455337  543705 disk_worker.go:708] disk space is not compliant
W0322 02:32:14.455341  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:32:14.457029  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:32:14.457037  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:32:14.457043  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:32:14.457458  543705 disk_worker.go:494] system disk:vda1
I0322 02:32:14.457490  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:32:15.456776  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:32:15.456784  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:32:16.457898  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:32:16.457898  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:32:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:32:16.457971  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:32:16.472305  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:32:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:32:23.409777  543705 memory.go:184] no items to output this cycle
I0322 02:32:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 02:32:32.593612  543705 disk_info.go:125] begin check local disk info of client
I0322 02:32:32.596079  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:32:32.596086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a80 0xc0000c4ac0]
E0322 02:32:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:32:33.409783  543705 memory.go:184] no items to output this cycle
I0322 02:32:33.409797  543705 cpu.go:275] no items to output this cycle
E0322 02:32:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:32:43.409803  543705 memory.go:191] Add success.
I0322 02:32:43.409806  543705 cpu.go:282] Add success.
I0322 02:32:43.419993  543705 net.go:648] Add success.
I0322 02:32:43.422769  543705 net.go:770] primary dev: ETH0
I0322 02:32:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:32:43.422804  543705 net.go:698] Add success.
I0322 02:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:32:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:32:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:32:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:32:53.409793  543705 memory.go:184] no items to output this cycle
I0322 02:32:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 02:33:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:33:03.409774  543705 memory.go:184] no items to output this cycle
I0322 02:33:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 02:33:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:33:13.409787  543705 cpu.go:282] Add success.
I0322 02:33:13.409789  543705 memory.go:191] Add success.
W0322 02:33:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:33:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:33:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:33:13.420245  543705 net.go:648] Add success.
I0322 02:33:13.422977  543705 net.go:770] primary dev: ETH0
I0322 02:33:13.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:33:13.423002  543705 net.go:698] Add success.
I0322 02:33:13.464528  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3651f748-b131-4e6b-a34d-73335fd479ca","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:33:13.464564  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:33:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:33:14.455545  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:33:14.455559  543705 disk_worker.go:708] disk space is not compliant
W0322 02:33:14.455562  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:33:14.457142  543705 disk_worker.go:494] system disk:vda1
I0322 02:33:14.457195  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:33:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:33:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:33:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:33:23.409777  543705 memory.go:184] no items to output this cycle
I0322 02:33:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 02:33:32.596625  543705 disk_info.go:125] begin check local disk info of client
I0322 02:33:32.599106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:33:32.599113  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264cc0 0xc000264d00]
E0322 02:33:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:33:33.409761  543705 memory.go:184] no items to output this cycle
I0322 02:33:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 02:33:39.455037  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:33:39.455044  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:33:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:33:43.410621  543705 memory.go:191] Add success.
I0322 02:33:43.409834  543705 cpu.go:282] Add success.
I0322 02:33:43.420324  543705 net.go:648] Add success.
I0322 02:33:43.422867  543705 net.go:770] primary dev: ETH0
I0322 02:33:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:33:43.422896  543705 net.go:698] Add success.
I0322 02:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:33:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:33:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:33:53.409795  543705 memory.go:184] no items to output this cycle
I0322 02:33:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 02:34:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:34:03.409783  543705 memory.go:184] no items to output this cycle
I0322 02:34:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 02:34:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:34:13.409803  543705 memory.go:191] Add success.
I0322 02:34:13.409814  543705 cpu.go:282] Add success.
W0322 02:34:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:34:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:34:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:34:13.420129  543705 net.go:648] Add success.
I0322 02:34:13.423128  543705 net.go:770] primary dev: ETH0
I0322 02:34:13.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:34:13.423159  543705 net.go:698] Add success.
I0322 02:34:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:34:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:34:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 02:34:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:34:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 02:34:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:34:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:34:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:34:16.472093  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:34:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:34:23.409809  543705 memory.go:184] no items to output this cycle
I0322 02:34:23.409819  543705 cpu.go:275] no items to output this cycle
I0322 02:34:32.599637  543705 disk_info.go:125] begin check local disk info of client
I0322 02:34:32.602203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:34:32.602210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9840 0xc0004d9880]
E0322 02:34:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:34:33.409774  543705 memory.go:184] no items to output this cycle
I0322 02:34:33.409797  543705 cpu.go:275] no items to output this cycle
E0322 02:34:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:34:43.409796  543705 memory.go:191] Add success.
I0322 02:34:43.409813  543705 cpu.go:282] Add success.
I0322 02:34:43.420058  543705 net.go:648] Add success.
I0322 02:34:43.422628  543705 net.go:770] primary dev: ETH0
I0322 02:34:43.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:34:43.422653  543705 net.go:698] Add success.
I0322 02:34:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:34:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:34:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:34:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:34:53.409798  543705 memory.go:184] no items to output this cycle
I0322 02:34:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 02:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:35:03.409783  543705 memory.go:184] no items to output this cycle
I0322 02:35:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 02:35:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:35:13.409782  543705 memory.go:191] Add success.
I0322 02:35:13.409786  543705 cpu.go:282] Add success.
W0322 02:35:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:35:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:35:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:35:13.420070  543705 net.go:648] Add success.
I0322 02:35:13.422770  543705 net.go:770] primary dev: ETH0
I0322 02:35:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:35:13.422798  543705 net.go:698] Add success.
I0322 02:35:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:35:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:35:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 02:35:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:35:14.456616  543705 disk_worker.go:494] system disk:vda1
I0322 02:35:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:35:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:35:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:35:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:35:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:35:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 02:35:23.409779  543705 memory.go:184] no items to output this cycle
I0322 02:35:32.602302  543705 disk_info.go:125] begin check local disk info of client
I0322 02:35:32.604752  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:35:32.604759  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046c580 0xc00046c5c0]
E0322 02:35:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:35:33.409787  543705 memory.go:184] no items to output this cycle
I0322 02:35:33.409797  543705 cpu.go:275] no items to output this cycle
E0322 02:35:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:35:43.409829  543705 memory.go:191] Add success.
I0322 02:35:43.409831  543705 cpu.go:282] Add success.
I0322 02:35:43.420022  543705 net.go:648] Add success.
I0322 02:35:43.422978  543705 net.go:770] primary dev: ETH0
I0322 02:35:43.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:35:43.423005  543705 net.go:698] Add success.
I0322 02:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:35:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:35:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:35:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:35:53.409791  543705 memory.go:184] no items to output this cycle
I0322 02:35:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 02:36:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:36:03.409778  543705 memory.go:184] no items to output this cycle
I0322 02:36:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 02:36:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:36:13.409809  543705 memory.go:191] Add success.
I0322 02:36:13.409812  543705 cpu.go:282] Add success.
W0322 02:36:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:36:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:36:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:36:13.420069  543705 net.go:648] Add success.
I0322 02:36:13.423004  543705 net.go:770] primary dev: ETH0
I0322 02:36:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:36:13.423030  543705 net.go:698] Add success.
I0322 02:36:13.469478  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"210a314a-6c09-4f35-81b2-ac4369cb5f80","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:36:13.469511  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:36:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:36:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:36:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 02:36:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:36:14.456526  543705 disk_worker.go:494] system disk:vda1
I0322 02:36:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:36:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:36:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:36:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:36:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:36:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:36:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:36:23.409811  543705 memory.go:184] no items to output this cycle
I0322 02:36:23.409823  543705 cpu.go:275] no items to output this cycle
I0322 02:36:32.605675  543705 disk_info.go:125] begin check local disk info of client
I0322 02:36:32.608108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:36:32.608115  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac80 0xc00007acc0]
E0322 02:36:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:36:33.409783  543705 memory.go:184] no items to output this cycle
I0322 02:36:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 02:36:39.456032  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:36:39.456039  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:36:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:36:43.410610  543705 memory.go:191] Add success.
I0322 02:36:43.409809  543705 cpu.go:282] Add success.
I0322 02:36:43.420313  543705 net.go:648] Add success.
I0322 02:36:43.422934  543705 net.go:770] primary dev: ETH0
I0322 02:36:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:36:43.422960  543705 net.go:698] Add success.
I0322 02:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:36:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:36:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:36:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:36:53.409798  543705 memory.go:184] no items to output this cycle
I0322 02:36:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 02:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:37:03.409784  543705 memory.go:184] no items to output this cycle
I0322 02:37:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 02:37:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:37:13.409812  543705 memory.go:191] Add success.
I0322 02:37:13.409820  543705 cpu.go:282] Add success.
W0322 02:37:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:37:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:37:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:37:13.420071  543705 net.go:648] Add success.
I0322 02:37:13.422805  543705 net.go:770] primary dev: ETH0
I0322 02:37:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:37:13.422834  543705 net.go:698] Add success.
I0322 02:37:13.453385  543705 event_worker.go:152] Polling the log file for events...
W0322 02:37:14.455460  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:37:14.455476  543705 disk_worker.go:708] disk space is not compliant
W0322 02:37:14.455487  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:37:14.456516  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:37:14.456525  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:37:14.456531  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:37:14.457453  543705 disk_worker.go:494] system disk:vda1
I0322 02:37:14.457492  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:37:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:37:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:37:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:37:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:37:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:37:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:37:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:37:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:37:23.409799  543705 memory.go:184] no items to output this cycle
I0322 02:37:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 02:37:32.608708  543705 disk_info.go:125] begin check local disk info of client
I0322 02:37:32.611197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:37:32.611204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d89c0 0xc0004d8a00]
E0322 02:37:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:37:33.409793  543705 memory.go:184] no items to output this cycle
I0322 02:37:33.409809  543705 cpu.go:275] no items to output this cycle
E0322 02:37:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:37:43.409832  543705 memory.go:191] Add success.
I0322 02:37:43.409841  543705 cpu.go:282] Add success.
I0322 02:37:43.419935  543705 net.go:648] Add success.
I0322 02:37:43.422743  543705 net.go:770] primary dev: ETH0
I0322 02:37:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:37:43.422773  543705 net.go:698] Add success.
I0322 02:37:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:37:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:37:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:37:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:37:53.409784  543705 cpu.go:275] no items to output this cycle
I0322 02:37:53.409786  543705 memory.go:184] no items to output this cycle
E0322 02:38:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:38:03.409803  543705 memory.go:184] no items to output this cycle
I0322 02:38:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 02:38:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:38:13.409798  543705 cpu.go:282] Add success.
I0322 02:38:13.409807  543705 memory.go:191] Add success.
W0322 02:38:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:38:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:38:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:38:13.420239  543705 net.go:648] Add success.
I0322 02:38:13.423141  543705 net.go:770] primary dev: ETH0
I0322 02:38:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:38:13.423169  543705 net.go:698] Add success.
I0322 02:38:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:38:14.455317  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:38:14.455392  543705 disk_worker.go:708] disk space is not compliant
W0322 02:38:14.455395  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:38:14.457528  543705 disk_worker.go:494] system disk:vda1
I0322 02:38:14.457556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:38:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:38:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:38:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:38:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:38:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:38:23.409778  543705 memory.go:184] no items to output this cycle
I0322 02:38:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 02:38:32.611691  543705 disk_info.go:125] begin check local disk info of client
I0322 02:38:32.614197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:38:32.614204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2340 0xc0004a2380]
E0322 02:38:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:38:33.409790  543705 memory.go:184] no items to output this cycle
I0322 02:38:33.409812  543705 cpu.go:275] no items to output this cycle
E0322 02:38:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:38:43.409819  543705 memory.go:191] Add success.
I0322 02:38:43.409824  543705 cpu.go:282] Add success.
I0322 02:38:43.420056  543705 net.go:648] Add success.
I0322 02:38:43.423220  543705 net.go:770] primary dev: ETH0
I0322 02:38:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:38:43.423246  543705 net.go:698] Add success.
I0322 02:38:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:38:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:38:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:38:53.409809  543705 memory.go:184] no items to output this cycle
I0322 02:38:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 02:39:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:39:03.409782  543705 memory.go:184] no items to output this cycle
I0322 02:39:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 02:39:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:39:13.409790  543705 memory.go:191] Add success.
I0322 02:39:13.409815  543705 cpu.go:282] Add success.
W0322 02:39:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:39:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:39:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:39:13.420041  543705 net.go:648] Add success.
I0322 02:39:13.422659  543705 net.go:770] primary dev: ETH0
I0322 02:39:13.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:39:13.422684  543705 net.go:698] Add success.
I0322 02:39:13.467845  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e99190e2-fb19-408f-9cec-b6bedf71feb0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:39:13.467876  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:39:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:39:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:39:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 02:39:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:39:14.456723  543705 disk_worker.go:494] system disk:vda1
I0322 02:39:14.456753  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:39:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:39:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:39:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:39:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:39:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:39:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:39:23.409802  543705 memory.go:184] no items to output this cycle
I0322 02:39:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 02:39:32.614707  543705 disk_info.go:125] begin check local disk info of client
I0322 02:39:32.617189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:39:32.617195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278480 0xc0002784c0]
E0322 02:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:39:33.409790  543705 memory.go:184] no items to output this cycle
I0322 02:39:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 02:39:39.457042  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:39:39.457050  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:39:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:39:43.409816  543705 cpu.go:282] Add success.
I0322 02:39:43.409839  543705 memory.go:191] Add success.
I0322 02:39:43.420132  543705 net.go:648] Add success.
I0322 02:39:43.421031  543705 net.go:770] primary dev: ETH0
I0322 02:39:43.421045  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:39:43.421058  543705 net.go:698] Add success.
I0322 02:39:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:39:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:39:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:39:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:39:53.409780  543705 memory.go:184] no items to output this cycle
I0322 02:39:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:40:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:40:03.409795  543705 memory.go:184] no items to output this cycle
I0322 02:40:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 02:40:13.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:40:13.409900  543705 memory.go:191] Add success.
W0322 02:40:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:40:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:40:13.409946  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:40:13.409989  543705 cpu.go:282] Add success.
I0322 02:40:13.419721  543705 net.go:648] Add success.
I0322 02:40:13.422276  543705 net.go:770] primary dev: ETH0
I0322 02:40:13.422289  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:40:13.422301  543705 net.go:698] Add success.
I0322 02:40:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:40:14.455080  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:40:14.455142  543705 disk_worker.go:708] disk space is not compliant
W0322 02:40:14.455145  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:40:14.456446  543705 disk_worker.go:494] system disk:vda1
I0322 02:40:14.456489  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:40:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:40:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:40:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:40:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:40:23.409790  543705 memory.go:184] no items to output this cycle
I0322 02:40:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 02:40:32.617676  543705 disk_info.go:125] begin check local disk info of client
I0322 02:40:32.620164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:40:32.620171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dd780 0xc0004dd7c0]
E0322 02:40:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:40:33.409778  543705 memory.go:184] no items to output this cycle
I0322 02:40:33.409797  543705 cpu.go:275] no items to output this cycle
E0322 02:40:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:40:43.409807  543705 memory.go:191] Add success.
I0322 02:40:43.409881  543705 cpu.go:282] Add success.
I0322 02:40:43.420209  543705 net.go:648] Add success.
I0322 02:40:43.421107  543705 net.go:770] primary dev: ETH0
I0322 02:40:43.421127  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:40:43.421149  543705 net.go:698] Add success.
I0322 02:40:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:40:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:40:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:40:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:40:53.409797  543705 memory.go:184] no items to output this cycle
I0322 02:40:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 02:41:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:41:03.409805  543705 memory.go:184] no items to output this cycle
I0322 02:41:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 02:41:13.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:41:13.409916  543705 memory.go:191] Add success.
W0322 02:41:13.409944  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:41:13.409997  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:41:13.409997  543705 cpu.go:282] Add success.
I0322 02:41:13.410004  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:41:13.419705  543705 net.go:648] Add success.
I0322 02:41:13.422079  543705 net.go:770] primary dev: ETH0
I0322 02:41:13.422092  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:41:13.422103  543705 net.go:698] Add success.
I0322 02:41:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:41:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:41:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 02:41:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:41:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 02:41:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:41:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:41:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:41:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:41:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:41:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:41:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:41:23.409797  543705 memory.go:184] no items to output this cycle
I0322 02:41:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 02:41:32.620738  543705 disk_info.go:125] begin check local disk info of client
I0322 02:41:32.623256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:41:32.623263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
E0322 02:41:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:41:33.409782  543705 memory.go:184] no items to output this cycle
I0322 02:41:33.409802  543705 cpu.go:275] no items to output this cycle
E0322 02:41:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:41:43.409844  543705 memory.go:191] Add success.
I0322 02:41:43.409842  543705 cpu.go:282] Add success.
I0322 02:41:43.420201  543705 net.go:648] Add success.
I0322 02:41:43.423340  543705 net.go:770] primary dev: ETH0
I0322 02:41:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:41:43.423366  543705 net.go:698] Add success.
I0322 02:41:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:41:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:41:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:41:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:41:53.409779  543705 memory.go:184] no items to output this cycle
I0322 02:41:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 02:42:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:42:03.409800  543705 memory.go:184] no items to output this cycle
I0322 02:42:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 02:42:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:42:13.409779  543705 memory.go:191] Add success.
W0322 02:42:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 02:42:13.409807  543705 cpu.go:282] Add success.
W0322 02:42:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:42:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:42:13.420137  543705 net.go:648] Add success.
I0322 02:42:13.423015  543705 net.go:770] primary dev: ETH0
I0322 02:42:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:42:13.423051  543705 net.go:698] Add success.
I0322 02:42:13.693442  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"62a43e60-4f43-4362-bc98-87cbb6b702bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:42:13.693475  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 02:42:14.454204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:42:14.454294  543705 disk_worker.go:708] disk space is not compliant
W0322 02:42:14.454299  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:42:14.454952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:42:14.454960  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:42:14.454966  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:42:14.455854  543705 disk_worker.go:494] system disk:vda1
I0322 02:42:14.455883  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:42:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:42:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:42:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:42:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:42:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:42:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:42:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:42:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:42:23.409782  543705 memory.go:184] no items to output this cycle
I0322 02:42:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 02:42:32.623760  543705 disk_info.go:125] begin check local disk info of client
I0322 02:42:32.626300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:42:32.626306  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471d00 0xc000471d40]
E0322 02:42:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:42:33.409770  543705 memory.go:184] no items to output this cycle
I0322 02:42:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 02:42:39.458041  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:42:39.458047  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:42:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:42:43.409799  543705 memory.go:191] Add success.
I0322 02:42:43.409866  543705 cpu.go:282] Add success.
I0322 02:42:43.420118  543705 net.go:648] Add success.
I0322 02:42:43.420969  543705 net.go:770] primary dev: ETH0
I0322 02:42:43.420983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:42:43.420996  543705 net.go:698] Add success.
I0322 02:42:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:42:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:42:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:42:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:42:53.409776  543705 memory.go:184] no items to output this cycle
I0322 02:42:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 02:43:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:43:03.409797  543705 memory.go:184] no items to output this cycle
I0322 02:43:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:43:13.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:43:13.409910  543705 cpu.go:282] Add success.
I0322 02:43:13.409931  543705 memory.go:191] Add success.
W0322 02:43:13.409989  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:43:13.410008  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:43:13.410013  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:43:13.419710  543705 net.go:648] Add success.
I0322 02:43:13.422374  543705 net.go:770] primary dev: ETH0
I0322 02:43:13.422388  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:43:13.422401  543705 net.go:698] Add success.
I0322 02:43:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:43:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:43:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 02:43:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:43:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 02:43:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:43:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:43:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:43:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:43:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:43:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:43:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:43:23.409772  543705 memory.go:184] no items to output this cycle
I0322 02:43:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 02:43:32.626776  543705 disk_info.go:125] begin check local disk info of client
I0322 02:43:32.629293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:43:32.629300  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471440 0xc000471480]
E0322 02:43:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:43:33.409771  543705 memory.go:184] no items to output this cycle
I0322 02:43:33.409778  543705 cpu.go:275] no items to output this cycle
E0322 02:43:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:43:43.409823  543705 memory.go:191] Add success.
I0322 02:43:43.409826  543705 cpu.go:282] Add success.
I0322 02:43:43.420170  543705 net.go:648] Add success.
I0322 02:43:43.421148  543705 net.go:770] primary dev: ETH0
I0322 02:43:43.421165  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:43:43.421178  543705 net.go:698] Add success.
I0322 02:43:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:43:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:43:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:43:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:43:53.409780  543705 memory.go:184] no items to output this cycle
I0322 02:43:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 02:44:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:44:03.409795  543705 memory.go:184] no items to output this cycle
I0322 02:44:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:44:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:44:13.409814  543705 memory.go:191] Add success.
I0322 02:44:13.409822  543705 cpu.go:282] Add success.
W0322 02:44:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:44:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:44:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:44:13.420160  543705 net.go:648] Add success.
I0322 02:44:13.422950  543705 net.go:770] primary dev: ETH0
I0322 02:44:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:44:13.422974  543705 net.go:698] Add success.
I0322 02:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:44:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:44:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 02:44:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:44:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 02:44:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:44:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:44:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:44:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:44:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:44:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:44:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:44:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 02:44:23.409793  543705 memory.go:184] no items to output this cycle
I0322 02:44:32.629674  543705 disk_info.go:125] begin check local disk info of client
I0322 02:44:32.632195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:44:32.632201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9c80 0xc0004d9cc0]
E0322 02:44:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:44:33.409772  543705 memory.go:184] no items to output this cycle
I0322 02:44:33.409787  543705 cpu.go:275] no items to output this cycle
E0322 02:44:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:44:43.409808  543705 memory.go:191] Add success.
I0322 02:44:43.409864  543705 cpu.go:282] Add success.
I0322 02:44:43.420074  543705 net.go:648] Add success.
I0322 02:44:43.422650  543705 net.go:770] primary dev: ETH0
I0322 02:44:43.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:44:43.422676  543705 net.go:698] Add success.
I0322 02:44:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:44:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:44:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:44:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:44:53.409775  543705 memory.go:184] no items to output this cycle
I0322 02:44:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 02:45:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:45:03.409800  543705 memory.go:184] no items to output this cycle
I0322 02:45:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 02:45:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:45:13.409782  543705 memory.go:191] Add success.
I0322 02:45:13.409793  543705 cpu.go:282] Add success.
W0322 02:45:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:45:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:45:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:45:13.420420  543705 net.go:648] Add success.
I0322 02:45:13.423628  543705 net.go:770] primary dev: ETH0
I0322 02:45:13.423642  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:45:13.423656  543705 net.go:698] Add success.
I0322 02:45:13.463856  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c18b411-03e9-459e-a12c-5d06d4ccdeb6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:45:13.463887  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:45:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:45:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:45:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 02:45:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:45:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 02:45:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:45:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:45:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:45:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:45:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:45:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:45:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:45:23.409779  543705 memory.go:184] no items to output this cycle
I0322 02:45:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 02:45:32.632814  543705 disk_info.go:125] begin check local disk info of client
I0322 02:45:32.635405  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:45:32.635411  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265780 0xc0002657c0]
E0322 02:45:33.407520  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:45:33.407535  543705 memory.go:184] no items to output this cycle
I0322 02:45:33.407546  543705 cpu.go:275] no items to output this cycle
I0322 02:45:39.459050  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:45:39.459059  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:45:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:45:43.410677  543705 memory.go:191] Add success.
I0322 02:45:43.409821  543705 cpu.go:282] Add success.
I0322 02:45:43.420416  543705 net.go:648] Add success.
I0322 02:45:43.423013  543705 net.go:770] primary dev: ETH0
I0322 02:45:43.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:45:43.423039  543705 net.go:698] Add success.
I0322 02:45:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:45:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:45:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:45:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:45:53.409806  543705 memory.go:184] no items to output this cycle
I0322 02:45:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 02:46:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:46:03.409779  543705 memory.go:184] no items to output this cycle
I0322 02:46:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 02:46:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:46:13.409815  543705 memory.go:191] Add success.
I0322 02:46:13.409821  543705 cpu.go:282] Add success.
W0322 02:46:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:46:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:46:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:46:13.420180  543705 net.go:648] Add success.
I0322 02:46:13.422741  543705 net.go:770] primary dev: ETH0
I0322 02:46:13.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:46:13.422767  543705 net.go:698] Add success.
I0322 02:46:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:46:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:46:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0322 02:46:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:46:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 02:46:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:46:16.458020  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:46:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:46:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:46:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:46:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:46:23.409788  543705 memory.go:184] no items to output this cycle
I0322 02:46:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 02:46:32.635818  543705 disk_info.go:125] begin check local disk info of client
I0322 02:46:32.638358  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:46:32.638365  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc3c0 0xc0004dc400]
E0322 02:46:33.407883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:46:33.407898  543705 memory.go:184] no items to output this cycle
I0322 02:46:33.407916  543705 cpu.go:275] no items to output this cycle
E0322 02:46:43.409822  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:46:43.409856  543705 memory.go:191] Add success.
I0322 02:46:43.409937  543705 cpu.go:282] Add success.
I0322 02:46:43.419840  543705 net.go:648] Add success.
I0322 02:46:43.422755  543705 net.go:770] primary dev: ETH0
I0322 02:46:43.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:46:43.422792  543705 net.go:698] Add success.
I0322 02:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:46:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:46:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:46:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:46:53.409784  543705 memory.go:184] no items to output this cycle
I0322 02:46:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 02:47:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:47:03.409799  543705 memory.go:184] no items to output this cycle
I0322 02:47:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:47:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:47:13.409776  543705 memory.go:191] Add success.
I0322 02:47:13.409798  543705 cpu.go:282] Add success.
W0322 02:47:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:47:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:47:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:47:13.420103  543705 net.go:648] Add success.
I0322 02:47:13.422684  543705 net.go:770] primary dev: ETH0
I0322 02:47:13.422697  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:47:13.422709  543705 net.go:698] Add success.
I0322 02:47:13.453243  543705 event_worker.go:152] Polling the log file for events...
W0322 02:47:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:47:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 02:47:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:47:14.455863  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:47:14.455871  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:47:14.455877  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:47:14.456616  543705 disk_worker.go:494] system disk:vda1
I0322 02:47:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:47:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:47:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:47:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:47:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:47:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:47:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:47:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:47:23.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:47:23.410386  543705 memory.go:184] no items to output this cycle
I0322 02:47:23.410396  543705 cpu.go:275] no items to output this cycle
I0322 02:47:32.638826  543705 disk_info.go:125] begin check local disk info of client
I0322 02:47:32.641393  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:47:32.641400  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003776c0 0xc000377700]
E0322 02:47:33.407863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:47:33.407880  543705 memory.go:184] no items to output this cycle
I0322 02:47:33.407905  543705 cpu.go:275] no items to output this cycle
E0322 02:47:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:47:43.409823  543705 memory.go:191] Add success.
I0322 02:47:43.409825  543705 cpu.go:282] Add success.
I0322 02:47:43.420058  543705 net.go:648] Add success.
I0322 02:47:43.422513  543705 net.go:770] primary dev: ETH0
I0322 02:47:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:47:43.422538  543705 net.go:698] Add success.
I0322 02:47:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:47:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:47:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:47:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:47:53.409772  543705 memory.go:184] no items to output this cycle
I0322 02:47:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 02:48:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:48:03.409771  543705 memory.go:184] no items to output this cycle
I0322 02:48:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 02:48:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:48:13.409785  543705 memory.go:191] Add success.
I0322 02:48:13.409788  543705 cpu.go:282] Add success.
W0322 02:48:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:48:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:48:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:48:13.420100  543705 net.go:648] Add success.
I0322 02:48:13.422768  543705 net.go:770] primary dev: ETH0
I0322 02:48:13.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:48:13.422793  543705 net.go:698] Add success.
I0322 02:48:13.463297  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a7744c3-e4d9-4766-a438-33bef225f21f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:48:13.463330  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:48:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:48:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:48:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 02:48:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:48:14.456848  543705 disk_worker.go:494] system disk:vda1
I0322 02:48:14.456879  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:48:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:48:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:48:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:48:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:48:23.409821  543705 memory.go:184] no items to output this cycle
I0322 02:48:23.409833  543705 cpu.go:275] no items to output this cycle
I0322 02:48:32.641677  543705 disk_info.go:125] begin check local disk info of client
I0322 02:48:32.644190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:48:32.644197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4400 0xc0000c4440]
E0322 02:48:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:48:33.409801  543705 memory.go:184] no items to output this cycle
I0322 02:48:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 02:48:39.460052  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:48:39.460058  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:48:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:48:43.410603  543705 memory.go:191] Add success.
I0322 02:48:43.409821  543705 cpu.go:282] Add success.
I0322 02:48:43.420315  543705 net.go:648] Add success.
I0322 02:48:43.422908  543705 net.go:770] primary dev: ETH0
I0322 02:48:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:48:43.422937  543705 net.go:698] Add success.
I0322 02:48:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:48:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:48:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:48:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:48:53.409816  543705 memory.go:184] no items to output this cycle
I0322 02:48:53.409827  543705 cpu.go:275] no items to output this cycle
E0322 02:49:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:49:03.409811  543705 memory.go:184] no items to output this cycle
I0322 02:49:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 02:49:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:49:13.409797  543705 memory.go:191] Add success.
I0322 02:49:13.409799  543705 cpu.go:282] Add success.
W0322 02:49:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:49:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:49:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:49:13.420123  543705 net.go:648] Add success.
I0322 02:49:13.422614  543705 net.go:770] primary dev: ETH0
I0322 02:49:13.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:49:13.422638  543705 net.go:698] Add success.
I0322 02:49:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:49:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:49:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0322 02:49:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:49:14.456469  543705 disk_worker.go:494] system disk:vda1
I0322 02:49:14.456511  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:49:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:49:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:49:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:49:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:49:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:49:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:49:23.409796  543705 memory.go:184] no items to output this cycle
I0322 02:49:23.409800  543705 cpu.go:275] no items to output this cycle
I0322 02:49:32.644859  543705 disk_info.go:125] begin check local disk info of client
I0322 02:49:32.647379  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:49:32.647386  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4380 0xc0000c43c0]
E0322 02:49:33.407889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:49:33.407905  543705 memory.go:184] no items to output this cycle
I0322 02:49:33.407923  543705 cpu.go:275] no items to output this cycle
E0322 02:49:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:49:43.409837  543705 memory.go:191] Add success.
I0322 02:49:43.409843  543705 cpu.go:282] Add success.
I0322 02:49:43.419972  543705 net.go:648] Add success.
I0322 02:49:43.422808  543705 net.go:770] primary dev: ETH0
I0322 02:49:43.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:49:43.422836  543705 net.go:698] Add success.
I0322 02:49:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:49:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:49:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:49:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:49:53.409784  543705 cpu.go:275] no items to output this cycle
I0322 02:49:53.409786  543705 memory.go:184] no items to output this cycle
E0322 02:50:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:50:03.409773  543705 memory.go:184] no items to output this cycle
I0322 02:50:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 02:50:13.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:50:13.409944  543705 cpu.go:282] Add success.
I0322 02:50:13.410040  543705 memory.go:191] Add success.
W0322 02:50:13.410070  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:50:13.410083  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:50:13.410087  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:50:13.419725  543705 net.go:648] Add success.
I0322 02:50:13.422287  543705 net.go:770] primary dev: ETH0
I0322 02:50:13.422300  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:50:13.422311  543705 net.go:698] Add success.
I0322 02:50:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:50:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:50:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 02:50:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:50:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 02:50:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:50:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:50:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:50:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:50:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:50:23.409788  543705 memory.go:184] no items to output this cycle
I0322 02:50:23.409792  543705 cpu.go:275] no items to output this cycle
I0322 02:50:32.647881  543705 disk_info.go:125] begin check local disk info of client
I0322 02:50:32.650409  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:50:32.650415  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa080 0xc0001aa0c0]
E0322 02:50:33.407894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:50:33.407912  543705 memory.go:184] no items to output this cycle
I0322 02:50:33.407926  543705 cpu.go:275] no items to output this cycle
E0322 02:50:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:50:43.409821  543705 memory.go:191] Add success.
I0322 02:50:43.409830  543705 cpu.go:282] Add success.
I0322 02:50:43.419892  543705 net.go:648] Add success.
I0322 02:50:43.422694  543705 net.go:770] primary dev: ETH0
I0322 02:50:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:50:43.422725  543705 net.go:698] Add success.
I0322 02:50:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:50:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:50:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:50:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:50:53.409777  543705 memory.go:184] no items to output this cycle
I0322 02:50:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 02:51:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:51:03.409809  543705 memory.go:184] no items to output this cycle
I0322 02:51:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 02:51:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:51:13.409776  543705 memory.go:191] Add success.
I0322 02:51:13.409796  543705 cpu.go:282] Add success.
W0322 02:51:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:51:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:51:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:51:13.420184  543705 net.go:648] Add success.
I0322 02:51:13.423267  543705 net.go:770] primary dev: ETH0
I0322 02:51:13.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:51:13.423291  543705 net.go:698] Add success.
I0322 02:51:13.586439  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98413842-96fc-488b-bf05-ca22b6164eb7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:51:13.586471  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:51:14.454683  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:51:14.454900  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:51:14.454910  543705 disk_worker.go:708] disk space is not compliant
W0322 02:51:14.454912  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:51:14.456256  543705 disk_worker.go:494] system disk:vda1
I0322 02:51:14.456307  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:51:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:51:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:51:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:51:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:51:23.409780  543705 memory.go:184] no items to output this cycle
I0322 02:51:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 02:51:32.650894  543705 disk_info.go:125] begin check local disk info of client
I0322 02:51:32.653478  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:51:32.653486  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264000 0xc000264040]
E0322 02:51:33.407516  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:51:33.407530  543705 memory.go:184] no items to output this cycle
I0322 02:51:33.407538  543705 cpu.go:275] no items to output this cycle
I0322 02:51:39.461053  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:51:39.461061  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:51:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:51:43.410573  543705 memory.go:191] Add success.
I0322 02:51:43.409809  543705 cpu.go:282] Add success.
I0322 02:51:43.420323  543705 net.go:648] Add success.
I0322 02:51:43.423023  543705 net.go:770] primary dev: ETH0
I0322 02:51:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:51:43.423049  543705 net.go:698] Add success.
I0322 02:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:51:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:51:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:51:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:51:53.409800  543705 memory.go:184] no items to output this cycle
I0322 02:51:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 02:52:03.410012  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:52:03.410027  543705 memory.go:184] no items to output this cycle
I0322 02:52:03.410032  543705 cpu.go:275] no items to output this cycle
E0322 02:52:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:52:13.409783  543705 memory.go:191] Add success.
I0322 02:52:13.409783  543705 cpu.go:282] Add success.
W0322 02:52:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:52:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:52:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:52:13.420449  543705 net.go:648] Add success.
I0322 02:52:13.423474  543705 net.go:770] primary dev: ETH0
I0322 02:52:13.423486  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:52:13.423508  543705 net.go:698] Add success.
W0322 02:52:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:52:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 02:52:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:52:14.456816  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:52:14.456826  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:52:14.456831  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:52:14.456875  543705 disk_worker.go:494] system disk:vda1
I0322 02:52:14.456917  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:52:15.456794  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:52:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:52:16.458032  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:52:16.458032  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:52:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:52:16.458114  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:52:16.472500  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:52:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:52:23.409796  543705 memory.go:184] no items to output this cycle
I0322 02:52:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 02:52:32.653673  543705 disk_info.go:125] begin check local disk info of client
I0322 02:52:32.656176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:52:32.656183  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471d40 0xc000471d80]
E0322 02:52:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:52:33.409788  543705 memory.go:184] no items to output this cycle
I0322 02:52:33.409803  543705 cpu.go:275] no items to output this cycle
E0322 02:52:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:52:43.409783  543705 memory.go:191] Add success.
I0322 02:52:43.409806  543705 cpu.go:282] Add success.
I0322 02:52:43.419889  543705 net.go:648] Add success.
I0322 02:52:43.422475  543705 net.go:770] primary dev: ETH0
I0322 02:52:43.422488  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:52:43.422500  543705 net.go:698] Add success.
I0322 02:52:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:52:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:52:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:52:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:52:53.409784  543705 memory.go:184] no items to output this cycle
I0322 02:52:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 02:53:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:53:03.409795  543705 memory.go:184] no items to output this cycle
I0322 02:53:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 02:53:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:53:13.409776  543705 memory.go:191] Add success.
I0322 02:53:13.409802  543705 cpu.go:282] Add success.
W0322 02:53:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:53:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:53:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:53:13.420056  543705 net.go:648] Add success.
I0322 02:53:13.423484  543705 net.go:770] primary dev: ETH0
I0322 02:53:13.423498  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:53:13.423509  543705 net.go:698] Add success.
I0322 02:53:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:53:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:53:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 02:53:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:53:14.456983  543705 disk_worker.go:494] system disk:vda1
I0322 02:53:14.457022  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:53:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:53:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:53:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:53:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:53:23.409789  543705 memory.go:184] no items to output this cycle
I0322 02:53:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 02:53:32.656923  543705 disk_info.go:125] begin check local disk info of client
I0322 02:53:32.659463  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:53:32.659469  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007abc0 0xc00007ac00]
E0322 02:53:33.407899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:53:33.407915  543705 memory.go:184] no items to output this cycle
I0322 02:53:33.407931  543705 cpu.go:275] no items to output this cycle
E0322 02:53:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:53:43.409779  543705 memory.go:191] Add success.
I0322 02:53:43.409802  543705 cpu.go:282] Add success.
I0322 02:53:43.420011  543705 net.go:648] Add success.
I0322 02:53:43.422479  543705 net.go:770] primary dev: ETH0
I0322 02:53:43.422493  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:53:43.422506  543705 net.go:698] Add success.
I0322 02:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:53:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:53:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:53:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:53:53.409770  543705 memory.go:184] no items to output this cycle
I0322 02:53:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 02:54:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:54:03.409761  543705 memory.go:184] no items to output this cycle
I0322 02:54:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 02:54:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:54:13.409791  543705 cpu.go:282] Add success.
I0322 02:54:13.409793  543705 memory.go:191] Add success.
W0322 02:54:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:54:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:54:13.420504  543705 net.go:648] Add success.
I0322 02:54:13.423324  543705 net.go:770] primary dev: ETH0
I0322 02:54:13.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:54:13.423350  543705 net.go:698] Add success.
I0322 02:54:13.890466  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c8756ae-d089-4e7c-9f67-94bbd02e6070","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:54:13.890505  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 02:54:14.454700  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:54:14.454892  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:54:14.454973  543705 disk_worker.go:708] disk space is not compliant
W0322 02:54:14.454977  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:54:14.456697  543705 disk_worker.go:494] system disk:vda1
I0322 02:54:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:54:15.455612  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:54:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:54:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:54:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:54:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:54:23.409779  543705 memory.go:184] no items to output this cycle
I0322 02:54:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 02:54:32.659935  543705 disk_info.go:125] begin check local disk info of client
I0322 02:54:32.662495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:54:32.662501  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ccf00 0xc0004ccf40]
E0322 02:54:33.407886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:54:33.407902  543705 memory.go:184] no items to output this cycle
I0322 02:54:33.407918  543705 cpu.go:275] no items to output this cycle
I0322 02:54:39.462063  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:54:39.462070  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:54:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:54:43.410836  543705 memory.go:191] Add success.
I0322 02:54:43.409828  543705 cpu.go:282] Add success.
I0322 02:54:43.420550  543705 net.go:648] Add success.
I0322 02:54:43.423009  543705 net.go:770] primary dev: ETH0
I0322 02:54:43.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:54:43.423050  543705 net.go:698] Add success.
I0322 02:54:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:54:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:54:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:54:53.409772  543705 memory.go:184] no items to output this cycle
I0322 02:54:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 02:55:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:55:03.409768  543705 memory.go:184] no items to output this cycle
I0322 02:55:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 02:55:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:55:13.409818  543705 memory.go:191] Add success.
I0322 02:55:13.409823  543705 cpu.go:282] Add success.
W0322 02:55:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:55:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:55:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:55:13.420159  543705 net.go:648] Add success.
I0322 02:55:13.423026  543705 net.go:770] primary dev: ETH0
I0322 02:55:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:55:13.423060  543705 net.go:698] Add success.
I0322 02:55:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:55:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:55:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 02:55:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:55:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 02:55:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:55:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:55:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:55:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:55:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:55:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:55:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:55:23.409776  543705 memory.go:184] no items to output this cycle
I0322 02:55:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 02:55:32.662947  543705 disk_info.go:125] begin check local disk info of client
I0322 02:55:32.665432  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:55:32.665439  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265e40 0xc000265e80]
E0322 02:55:33.407527  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:55:33.407539  543705 memory.go:184] no items to output this cycle
I0322 02:55:33.407542  543705 cpu.go:275] no items to output this cycle
E0322 02:55:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:55:43.409824  543705 memory.go:191] Add success.
I0322 02:55:43.409827  543705 cpu.go:282] Add success.
I0322 02:55:43.420015  543705 net.go:648] Add success.
I0322 02:55:43.423089  543705 net.go:770] primary dev: ETH0
I0322 02:55:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:55:43.423116  543705 net.go:698] Add success.
I0322 02:55:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:55:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:55:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:55:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:55:53.409776  543705 memory.go:184] no items to output this cycle
I0322 02:55:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 02:56:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:56:03.409766  543705 memory.go:184] no items to output this cycle
I0322 02:56:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 02:56:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:56:13.409812  543705 memory.go:191] Add success.
I0322 02:56:13.409822  543705 cpu.go:282] Add success.
W0322 02:56:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:56:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:56:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:56:13.420238  543705 net.go:648] Add success.
I0322 02:56:13.423088  543705 net.go:770] primary dev: ETH0
I0322 02:56:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:56:13.423114  543705 net.go:698] Add success.
I0322 02:56:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:56:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:56:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 02:56:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:56:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 02:56:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:56:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:56:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:56:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:56:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:56:23.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:56:23.409912  543705 cpu.go:275] no items to output this cycle
I0322 02:56:23.410026  543705 memory.go:184] no items to output this cycle
I0322 02:56:32.665674  543705 disk_info.go:125] begin check local disk info of client
I0322 02:56:32.668145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:56:32.668152  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036ba40 0xc00036ba80]
E0322 02:56:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:56:33.409793  543705 memory.go:184] no items to output this cycle
I0322 02:56:33.409807  543705 cpu.go:275] no items to output this cycle
E0322 02:56:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:56:43.409794  543705 memory.go:191] Add success.
I0322 02:56:43.409796  543705 cpu.go:282] Add success.
I0322 02:56:43.419883  543705 net.go:648] Add success.
I0322 02:56:43.422941  543705 net.go:770] primary dev: ETH0
I0322 02:56:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:56:43.422966  543705 net.go:698] Add success.
I0322 02:56:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:56:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:56:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:56:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:56:53.409776  543705 memory.go:184] no items to output this cycle
I0322 02:56:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 02:57:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:57:03.409797  543705 memory.go:184] no items to output this cycle
I0322 02:57:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 02:57:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:57:13.409789  543705 memory.go:191] Add success.
I0322 02:57:13.409805  543705 cpu.go:282] Add success.
W0322 02:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:57:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:57:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:57:13.420121  543705 net.go:648] Add success.
I0322 02:57:13.428650  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 02:57:13.428723  543705 net.go:770] primary dev: ETH0
I0322 02:57:13.428738  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:57:13.428752  543705 net.go:698] Add success.
I0322 02:57:13.453291  543705 event_worker.go:152] Polling the log file for events...
I0322 02:57:13.468465  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b70218f-4316-4e7a-9dfc-ee5ce32e1c3e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 02:57:13.468496  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 02:57:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:57:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 02:57:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0322 02:57:14.455888  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 02:57:14.455897  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 02:57:14.455903  543705 custom_config.go:64] query custom config with name: gpu
I0322 02:57:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 02:57:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 02:57:15.456786  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 02:57:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:57:16.457903  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 02:57:16.457903  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 02:57:16.457957  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:57:16.457976  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:57:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:57:23.410361  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:57:23.410376  543705 memory.go:184] no items to output this cycle
I0322 02:57:23.410375  543705 cpu.go:275] no items to output this cycle
I0322 02:57:32.668979  543705 disk_info.go:125] begin check local disk info of client
I0322 02:57:32.671471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:57:32.671478  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048bd40 0xc00048bd80]
E0322 02:57:33.407878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:57:33.407896  543705 memory.go:184] no items to output this cycle
I0322 02:57:33.407911  543705 cpu.go:275] no items to output this cycle
I0322 02:57:39.463071  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 02:57:39.463078  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 02:57:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:57:43.410937  543705 memory.go:191] Add success.
I0322 02:57:43.409825  543705 cpu.go:282] Add success.
I0322 02:57:43.420687  543705 net.go:648] Add success.
I0322 02:57:43.423098  543705 net.go:770] primary dev: ETH0
I0322 02:57:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:57:43.423127  543705 net.go:698] Add success.
I0322 02:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:57:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:57:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:57:53.410389  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:57:53.410404  543705 memory.go:184] no items to output this cycle
I0322 02:57:53.410415  543705 cpu.go:275] no items to output this cycle
E0322 02:58:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:58:03.409768  543705 memory.go:184] no items to output this cycle
I0322 02:58:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 02:58:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:58:13.409796  543705 memory.go:191] Add success.
I0322 02:58:13.409799  543705 cpu.go:282] Add success.
W0322 02:58:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:58:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:58:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:58:13.420104  543705 net.go:648] Add success.
I0322 02:58:13.422618  543705 net.go:770] primary dev: ETH0
I0322 02:58:13.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:58:13.422644  543705 net.go:698] Add success.
I0322 02:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:58:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:58:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 02:58:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:58:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 02:58:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:58:15.456008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:58:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:58:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:58:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:58:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:58:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:58:23.409764  543705 memory.go:184] no items to output this cycle
I0322 02:58:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 02:58:32.672000  543705 disk_info.go:125] begin check local disk info of client
I0322 02:58:32.674482  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:58:32.674488  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471a80 0xc000471ac0]
E0322 02:58:33.407881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:58:33.407896  543705 memory.go:184] no items to output this cycle
I0322 02:58:33.407912  543705 cpu.go:275] no items to output this cycle
E0322 02:58:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:58:43.409795  543705 memory.go:191] Add success.
I0322 02:58:43.409812  543705 cpu.go:282] Add success.
I0322 02:58:43.419888  543705 net.go:648] Add success.
I0322 02:58:43.423173  543705 net.go:770] primary dev: ETH0
I0322 02:58:43.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:58:43.423198  543705 net.go:698] Add success.
I0322 02:58:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:58:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:58:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:58:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:58:53.409775  543705 memory.go:184] no items to output this cycle
I0322 02:58:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 02:59:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:59:03.409806  543705 memory.go:184] no items to output this cycle
I0322 02:59:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 02:59:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:59:13.409826  543705 memory.go:191] Add success.
I0322 02:59:13.409829  543705 cpu.go:282] Add success.
W0322 02:59:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 02:59:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 02:59:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 02:59:13.420343  543705 net.go:648] Add success.
I0322 02:59:13.423215  543705 net.go:770] primary dev: ETH0
I0322 02:59:13.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:59:13.423238  543705 net.go:698] Add success.
I0322 02:59:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 02:59:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 02:59:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 02:59:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 02:59:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 02:59:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 02:59:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 02:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:59:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 02:59:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 02:59:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:59:23.409777  543705 memory.go:184] no items to output this cycle
I0322 02:59:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 02:59:32.675004  543705 disk_info.go:125] begin check local disk info of client
I0322 02:59:32.677551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 02:59:32.677558  543705 disk_info.go:196] parse disk info done, disk is : [0xc00054ccc0 0xc00054cd00]
E0322 02:59:33.407950  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:59:33.407967  543705 memory.go:184] no items to output this cycle
I0322 02:59:33.408153  543705 cpu.go:275] no items to output this cycle
E0322 02:59:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:59:43.409816  543705 cpu.go:282] Add success.
I0322 02:59:43.409820  543705 memory.go:191] Add success.
I0322 02:59:43.419991  543705 net.go:648] Add success.
I0322 02:59:43.422547  543705 net.go:770] primary dev: ETH0
I0322 02:59:43.422560  543705 net.go:802] Send network stats successfully!,count is 6
I0322 02:59:43.422572  543705 net.go:698] Add success.
I0322 02:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 02:59:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 02:59:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 02:59:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 02:59:53.409810  543705 memory.go:184] no items to output this cycle
I0322 02:59:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 03:00:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:00:03.409802  543705 memory.go:184] no items to output this cycle
I0322 03:00:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 03:00:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:00:13.409778  543705 memory.go:191] Add success.
I0322 03:00:13.409808  543705 cpu.go:282] Add success.
W0322 03:00:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:00:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:00:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:00:13.420151  543705 net.go:648] Add success.
I0322 03:00:13.423040  543705 net.go:770] primary dev: ETH0
I0322 03:00:13.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:00:13.423065  543705 net.go:698] Add success.
I0322 03:00:13.467991  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1def8e26-b38b-4fcb-80bc-b7ab9cd7c290","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:00:13.468026  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:00:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:00:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:00:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 03:00:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:00:14.456778  543705 disk_worker.go:494] system disk:vda1
I0322 03:00:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:00:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:00:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:00:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:00:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:00:23.409798  543705 memory.go:184] no items to output this cycle
I0322 03:00:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 03:00:32.677671  543705 disk_info.go:125] begin check local disk info of client
I0322 03:00:32.680129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:00:32.680135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7e80 0xc0004a7ec0]
E0322 03:00:33.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:00:33.409896  543705 memory.go:184] no items to output this cycle
I0322 03:00:33.410068  543705 cpu.go:275] no items to output this cycle
I0322 03:00:39.464067  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:00:39.464075  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:00:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:00:43.410700  543705 memory.go:191] Add success.
I0322 03:00:43.409793  543705 cpu.go:282] Add success.
I0322 03:00:43.420448  543705 net.go:648] Add success.
I0322 03:00:43.423347  543705 net.go:770] primary dev: ETH0
I0322 03:00:43.423360  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:00:43.423373  543705 net.go:698] Add success.
I0322 03:00:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:00:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:00:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:00:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:00:53.409776  543705 memory.go:184] no items to output this cycle
I0322 03:00:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 03:01:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:01:03.409779  543705 memory.go:184] no items to output this cycle
I0322 03:01:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 03:01:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:01:13.409826  543705 memory.go:191] Add success.
I0322 03:01:13.409830  543705 cpu.go:282] Add success.
W0322 03:01:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:01:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:01:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:01:13.420666  543705 net.go:648] Add success.
I0322 03:01:13.423612  543705 net.go:770] primary dev: ETH0
I0322 03:01:13.423625  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:01:13.423638  543705 net.go:698] Add success.
I0322 03:01:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:01:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:01:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 03:01:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:01:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 03:01:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:01:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:01:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:01:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:01:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:01:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:01:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:01:23.409774  543705 memory.go:184] no items to output this cycle
I0322 03:01:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 03:01:32.681058  543705 disk_info.go:125] begin check local disk info of client
I0322 03:01:32.683560  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:01:32.683567  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484680 0xc0004846c0]
E0322 03:01:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:01:33.407522  543705 memory.go:184] no items to output this cycle
I0322 03:01:33.407546  543705 cpu.go:275] no items to output this cycle
E0322 03:01:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:01:43.409806  543705 memory.go:191] Add success.
I0322 03:01:43.409806  543705 cpu.go:282] Add success.
I0322 03:01:43.420009  543705 net.go:648] Add success.
I0322 03:01:43.423535  543705 net.go:770] primary dev: ETH0
I0322 03:01:43.423550  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:01:43.423564  543705 net.go:698] Add success.
I0322 03:01:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:01:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:01:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:01:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:01:53.409786  543705 memory.go:184] no items to output this cycle
I0322 03:01:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 03:02:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:02:03.409784  543705 memory.go:184] no items to output this cycle
I0322 03:02:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 03:02:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:02:13.409823  543705 memory.go:191] Add success.
I0322 03:02:13.409829  543705 cpu.go:282] Add success.
W0322 03:02:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:02:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:02:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:02:13.420114  543705 net.go:648] Add success.
I0322 03:02:13.422635  543705 net.go:770] primary dev: ETH0
I0322 03:02:13.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:02:13.422660  543705 net.go:698] Add success.
W0322 03:02:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:02:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 03:02:14.455207  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:02:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:02:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:02:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:02:14.456658  543705 disk_worker.go:494] system disk:vda1
I0322 03:02:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:02:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:02:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:02:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:02:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:02:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:02:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:02:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:02:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:02:23.409776  543705 memory.go:184] no items to output this cycle
I0322 03:02:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 03:02:32.684048  543705 disk_info.go:125] begin check local disk info of client
I0322 03:02:32.686549  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:02:32.686555  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465840 0xc000465880]
E0322 03:02:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:02:33.407520  543705 memory.go:184] no items to output this cycle
I0322 03:02:33.407545  543705 cpu.go:275] no items to output this cycle
E0322 03:02:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:02:43.409892  543705 cpu.go:282] Add success.
I0322 03:02:43.409906  543705 memory.go:191] Add success.
I0322 03:02:43.419721  543705 net.go:648] Add success.
I0322 03:02:43.422880  543705 net.go:770] primary dev: ETH0
I0322 03:02:43.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:02:43.422908  543705 net.go:698] Add success.
I0322 03:02:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:02:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:02:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:02:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:02:53.409771  543705 memory.go:184] no items to output this cycle
I0322 03:02:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 03:03:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:03:03.409762  543705 memory.go:184] no items to output this cycle
I0322 03:03:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 03:03:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:03:13.409791  543705 memory.go:191] Add success.
W0322 03:03:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:03:13.409823  543705 cpu.go:282] Add success.
W0322 03:03:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:03:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:03:13.420128  543705 net.go:648] Add success.
I0322 03:03:13.422856  543705 net.go:770] primary dev: ETH0
I0322 03:03:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:03:13.422881  543705 net.go:698] Add success.
I0322 03:03:13.469871  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"58c469ee-986e-4a3f-a250-577f520274e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:03:13.469905  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:03:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:03:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:03:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 03:03:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:03:14.456691  543705 disk_worker.go:494] system disk:vda1
I0322 03:03:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:03:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:03:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:03:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:03:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:03:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:03:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:03:23.409764  543705 memory.go:184] no items to output this cycle
I0322 03:03:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 03:03:32.687070  543705 disk_info.go:125] begin check local disk info of client
I0322 03:03:32.689527  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:03:32.689534  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461300 0xc000461340]
E0322 03:03:33.407892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:03:33.407916  543705 memory.go:184] no items to output this cycle
I0322 03:03:33.407928  543705 cpu.go:275] no items to output this cycle
I0322 03:03:39.465087  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:03:39.465095  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:03:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:03:43.410940  543705 memory.go:191] Add success.
I0322 03:03:43.409828  543705 cpu.go:282] Add success.
I0322 03:03:43.420638  543705 net.go:648] Add success.
I0322 03:03:43.423121  543705 net.go:770] primary dev: ETH0
I0322 03:03:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:03:43.423151  543705 net.go:698] Add success.
I0322 03:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:03:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:03:53.410397  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:03:53.410415  543705 memory.go:184] no items to output this cycle
I0322 03:03:53.410442  543705 cpu.go:275] no items to output this cycle
E0322 03:04:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:04:03.409781  543705 memory.go:184] no items to output this cycle
I0322 03:04:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 03:04:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:04:13.409790  543705 memory.go:191] Add success.
I0322 03:04:13.409790  543705 cpu.go:282] Add success.
W0322 03:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:04:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:04:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:04:13.420137  543705 net.go:648] Add success.
I0322 03:04:13.422648  543705 net.go:770] primary dev: ETH0
I0322 03:04:13.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:04:13.422672  543705 net.go:698] Add success.
I0322 03:04:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:04:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:04:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 03:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:04:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 03:04:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:04:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:04:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:04:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:04:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:04:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:04:23.409782  543705 memory.go:184] no items to output this cycle
I0322 03:04:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 03:04:32.689672  543705 disk_info.go:125] begin check local disk info of client
I0322 03:04:32.692197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:04:32.692203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b780 0xc00036b7c0]
E0322 03:04:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:04:33.409775  543705 cpu.go:275] no items to output this cycle
I0322 03:04:33.409777  543705 memory.go:184] no items to output this cycle
E0322 03:04:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:04:43.409815  543705 memory.go:191] Add success.
I0322 03:04:43.409818  543705 cpu.go:282] Add success.
I0322 03:04:43.419747  543705 net.go:648] Add success.
I0322 03:04:43.422320  543705 net.go:770] primary dev: ETH0
I0322 03:04:43.422334  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:04:43.422346  543705 net.go:698] Add success.
I0322 03:04:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:04:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:04:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:04:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:04:53.409777  543705 memory.go:184] no items to output this cycle
I0322 03:04:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 03:05:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:05:03.409801  543705 memory.go:184] no items to output this cycle
I0322 03:05:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 03:05:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:05:13.409779  543705 memory.go:191] Add success.
I0322 03:05:13.409802  543705 cpu.go:282] Add success.
W0322 03:05:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:05:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:05:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:05:13.420266  543705 net.go:648] Add success.
I0322 03:05:13.423032  543705 net.go:770] primary dev: ETH0
I0322 03:05:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:05:13.423057  543705 net.go:698] Add success.
I0322 03:05:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:05:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:05:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 03:05:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:05:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 03:05:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:05:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:05:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:05:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:05:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:05:23.409771  543705 cpu.go:275] no items to output this cycle
I0322 03:05:23.409780  543705 memory.go:184] no items to output this cycle
I0322 03:05:32.693105  543705 disk_info.go:125] begin check local disk info of client
I0322 03:05:32.695583  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:05:32.695590  543705 disk_info.go:196] parse disk info done, disk is : [0xc000355700 0xc000355740]
E0322 03:05:33.407887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:05:33.407904  543705 memory.go:184] no items to output this cycle
I0322 03:05:33.407920  543705 cpu.go:275] no items to output this cycle
E0322 03:05:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:05:43.409795  543705 memory.go:191] Add success.
I0322 03:05:43.409798  543705 cpu.go:282] Add success.
I0322 03:05:43.419750  543705 net.go:648] Add success.
I0322 03:05:43.422260  543705 net.go:770] primary dev: ETH0
I0322 03:05:43.422274  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:05:43.422288  543705 net.go:698] Add success.
I0322 03:05:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:05:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:05:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:05:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:05:53.409766  543705 memory.go:184] no items to output this cycle
I0322 03:05:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 03:06:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:06:03.409802  543705 memory.go:184] no items to output this cycle
I0322 03:06:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 03:06:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:06:13.409777  543705 memory.go:191] Add success.
W0322 03:06:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:06:13.409809  543705 cpu.go:282] Add success.
W0322 03:06:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:06:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:06:13.420038  543705 net.go:648] Add success.
I0322 03:06:13.422594  543705 net.go:770] primary dev: ETH0
I0322 03:06:13.422614  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:06:13.422629  543705 net.go:698] Add success.
I0322 03:06:13.463611  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f788a475-8c47-4858-ae13-9eec4cc2b178","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:06:13.463646  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:06:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:06:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:06:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 03:06:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:06:14.456730  543705 disk_worker.go:494] system disk:vda1
I0322 03:06:14.456767  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:06:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:06:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:06:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:06:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:06:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:06:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 03:06:23.409785  543705 memory.go:184] no items to output this cycle
I0322 03:06:32.696122  543705 disk_info.go:125] begin check local disk info of client
I0322 03:06:32.698715  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:06:32.698722  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d240 0xc00039d280]
E0322 03:06:33.409351  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:06:33.409359  543705 cpu.go:275] no items to output this cycle
I0322 03:06:33.409366  543705 memory.go:184] no items to output this cycle
I0322 03:06:39.466094  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:06:39.466101  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:06:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:06:43.410759  543705 memory.go:191] Add success.
I0322 03:06:43.409824  543705 cpu.go:282] Add success.
I0322 03:06:43.420446  543705 net.go:648] Add success.
I0322 03:06:43.423037  543705 net.go:770] primary dev: ETH0
I0322 03:06:43.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:06:43.423063  543705 net.go:698] Add success.
I0322 03:06:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:06:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:06:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:06:53.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:06:53.409877  543705 memory.go:184] no items to output this cycle
I0322 03:06:53.409877  543705 cpu.go:275] no items to output this cycle
E0322 03:07:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:07:03.409774  543705 memory.go:184] no items to output this cycle
I0322 03:07:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 03:07:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:07:13.409799  543705 memory.go:191] Add success.
I0322 03:07:13.409799  543705 cpu.go:282] Add success.
W0322 03:07:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:07:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:07:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:07:13.420054  543705 net.go:648] Add success.
I0322 03:07:13.423069  543705 net.go:770] primary dev: ETH0
I0322 03:07:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:07:13.423094  543705 net.go:698] Add success.
I0322 03:07:13.453663  543705 event_worker.go:152] Polling the log file for events...
W0322 03:07:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:07:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 03:07:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:07:14.456892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:07:14.456902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:07:14.456908  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:07:14.456953  543705 disk_worker.go:494] system disk:vda1
I0322 03:07:14.457009  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:07:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:07:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:07:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:07:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:07:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:07:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:07:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:07:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:07:23.409769  543705 memory.go:184] no items to output this cycle
I0322 03:07:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 03:07:32.700142  543705 disk_info.go:125] begin check local disk info of client
I0322 03:07:32.702635  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:07:32.702641  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475780 0xc0004757c0]
E0322 03:07:33.407887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:07:33.407903  543705 memory.go:184] no items to output this cycle
I0322 03:07:33.407919  543705 cpu.go:275] no items to output this cycle
E0322 03:07:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:07:43.409980  543705 memory.go:191] Add success.
I0322 03:07:43.410031  543705 cpu.go:282] Add success.
I0322 03:07:43.419712  543705 net.go:648] Add success.
I0322 03:07:43.422660  543705 net.go:770] primary dev: ETH0
I0322 03:07:43.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:07:43.422685  543705 net.go:698] Add success.
I0322 03:07:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:07:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:07:46.458106  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:07:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:07:53.409810  543705 memory.go:184] no items to output this cycle
I0322 03:07:53.409826  543705 cpu.go:275] no items to output this cycle
E0322 03:08:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:08:03.409801  543705 memory.go:184] no items to output this cycle
I0322 03:08:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 03:08:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:08:13.409815  543705 memory.go:191] Add success.
I0322 03:08:13.409823  543705 cpu.go:282] Add success.
W0322 03:08:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:08:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:08:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:08:13.420161  543705 net.go:648] Add success.
I0322 03:08:13.423457  543705 net.go:770] primary dev: ETH0
I0322 03:08:13.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:08:13.423490  543705 net.go:698] Add success.
I0322 03:08:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:08:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:08:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 03:08:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:08:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 03:08:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:08:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:08:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:08:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:08:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:08:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:08:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:08:23.409769  543705 memory.go:184] no items to output this cycle
I0322 03:08:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 03:08:32.703145  543705 disk_info.go:125] begin check local disk info of client
I0322 03:08:32.705615  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:08:32.705621  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371b00 0xc000371b40]
E0322 03:08:33.407539  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:08:33.407554  543705 memory.go:184] no items to output this cycle
I0322 03:08:33.407571  543705 cpu.go:275] no items to output this cycle
E0322 03:08:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:08:43.409786  543705 memory.go:191] Add success.
I0322 03:08:43.409812  543705 cpu.go:282] Add success.
I0322 03:08:43.419711  543705 net.go:648] Add success.
I0322 03:08:43.422727  543705 net.go:770] primary dev: ETH0
I0322 03:08:43.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:08:43.422751  543705 net.go:698] Add success.
I0322 03:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:08:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:08:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:08:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:08:53.409774  543705 memory.go:184] no items to output this cycle
I0322 03:08:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 03:09:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:09:03.409789  543705 memory.go:184] no items to output this cycle
I0322 03:09:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 03:09:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:09:13.409806  543705 memory.go:191] Add success.
I0322 03:09:13.409823  543705 cpu.go:282] Add success.
W0322 03:09:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:09:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:09:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:09:13.420132  543705 net.go:648] Add success.
I0322 03:09:13.422765  543705 net.go:770] primary dev: ETH0
I0322 03:09:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:09:13.422791  543705 net.go:698] Add success.
I0322 03:09:13.528294  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6cf0904a-9596-4d70-a237-6d235a93eb5c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:09:13.528341  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:09:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:09:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:09:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 03:09:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:09:14.456690  543705 disk_worker.go:494] system disk:vda1
I0322 03:09:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:09:15.455607  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:09:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:09:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:09:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:09:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:09:23.409782  543705 memory.go:184] no items to output this cycle
I0322 03:09:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 03:09:32.705678  543705 disk_info.go:125] begin check local disk info of client
I0322 03:09:32.708207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:09:32.708213  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027bf00 0xc00027bf40]
E0322 03:09:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:09:33.409789  543705 memory.go:184] no items to output this cycle
I0322 03:09:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 03:09:39.467101  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:09:39.467109  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:09:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:09:43.410749  543705 memory.go:191] Add success.
I0322 03:09:43.409822  543705 cpu.go:282] Add success.
I0322 03:09:43.420509  543705 net.go:648] Add success.
I0322 03:09:43.423173  543705 net.go:770] primary dev: ETH0
I0322 03:09:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:09:43.423196  543705 net.go:698] Add success.
I0322 03:09:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:09:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:09:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:09:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:09:53.409774  543705 memory.go:184] no items to output this cycle
I0322 03:09:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 03:10:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:10:03.409787  543705 memory.go:184] no items to output this cycle
I0322 03:10:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 03:10:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:10:13.409812  543705 memory.go:191] Add success.
I0322 03:10:13.409822  543705 cpu.go:282] Add success.
W0322 03:10:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:10:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:10:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:10:13.420086  543705 net.go:648] Add success.
I0322 03:10:13.423074  543705 net.go:770] primary dev: ETH0
I0322 03:10:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:10:13.423102  543705 net.go:698] Add success.
I0322 03:10:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:10:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:10:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 03:10:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:10:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 03:10:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:10:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:10:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:10:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:10:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:10:23.409768  543705 memory.go:184] no items to output this cycle
I0322 03:10:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 03:10:32.709177  543705 disk_info.go:125] begin check local disk info of client
I0322 03:10:32.711732  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:10:32.711739  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ecc0 0xc00035ed00]
E0322 03:10:33.409295  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:10:33.409303  543705 cpu.go:275] no items to output this cycle
I0322 03:10:33.409307  543705 memory.go:184] no items to output this cycle
E0322 03:10:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:10:43.409784  543705 memory.go:191] Add success.
I0322 03:10:43.409797  543705 cpu.go:282] Add success.
I0322 03:10:43.419855  543705 net.go:648] Add success.
I0322 03:10:43.422686  543705 net.go:770] primary dev: ETH0
I0322 03:10:43.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:10:43.422714  543705 net.go:698] Add success.
I0322 03:10:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:10:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:10:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:10:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:10:53.409766  543705 memory.go:184] no items to output this cycle
I0322 03:10:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 03:11:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:11:03.409794  543705 memory.go:184] no items to output this cycle
I0322 03:11:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 03:11:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:11:13.409790  543705 memory.go:191] Add success.
I0322 03:11:13.409793  543705 cpu.go:282] Add success.
W0322 03:11:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:11:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:11:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:11:13.420216  543705 net.go:648] Add success.
I0322 03:11:13.422880  543705 net.go:770] primary dev: ETH0
I0322 03:11:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:11:13.422904  543705 net.go:698] Add success.
I0322 03:11:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:11:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:11:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 03:11:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:11:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 03:11:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:11:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:11:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:11:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:11:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:11:23.409758  543705 memory.go:184] no items to output this cycle
I0322 03:11:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 03:11:32.713203  543705 disk_info.go:125] begin check local disk info of client
I0322 03:11:32.715729  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:11:32.715735  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a400 0xc00027a440]
E0322 03:11:33.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:11:33.407519  543705 memory.go:184] no items to output this cycle
I0322 03:11:33.407541  543705 cpu.go:275] no items to output this cycle
E0322 03:11:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:11:43.409800  543705 memory.go:191] Add success.
I0322 03:11:43.409801  543705 cpu.go:282] Add success.
I0322 03:11:43.419981  543705 net.go:648] Add success.
I0322 03:11:43.422990  543705 net.go:770] primary dev: ETH0
I0322 03:11:43.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:11:43.423016  543705 net.go:698] Add success.
I0322 03:11:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:11:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:11:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:11:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:11:53.409805  543705 memory.go:184] no items to output this cycle
I0322 03:11:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 03:12:03.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:12:03.409888  543705 memory.go:184] no items to output this cycle
I0322 03:12:03.409952  543705 cpu.go:275] no items to output this cycle
E0322 03:12:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:12:13.409821  543705 memory.go:191] Add success.
I0322 03:12:13.409823  543705 cpu.go:282] Add success.
W0322 03:12:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:12:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:12:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:12:13.420171  543705 net.go:648] Add success.
I0322 03:12:13.422967  543705 net.go:770] primary dev: ETH0
I0322 03:12:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:12:13.422996  543705 net.go:698] Add success.
I0322 03:12:13.731496  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b2152776-ec08-4b28-b156-45c0ba0d3cd3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:12:13.731530  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 03:12:14.454817  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:12:14.454876  543705 disk_worker.go:708] disk space is not compliant
W0322 03:12:14.454880  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:12:14.455866  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:12:14.455876  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:12:14.455882  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:12:14.456192  543705 disk_worker.go:494] system disk:vda1
I0322 03:12:14.456220  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:12:15.456772  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:12:15.456779  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:12:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:12:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:12:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:12:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:12:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:12:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:12:23.409791  543705 memory.go:184] no items to output this cycle
I0322 03:12:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 03:12:32.717229  543705 disk_info.go:125] begin check local disk info of client
I0322 03:12:32.719712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:12:32.719720  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f5240 0xc0001f5280]
E0322 03:12:33.409250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:12:33.409267  543705 memory.go:184] no items to output this cycle
I0322 03:12:33.409285  543705 cpu.go:275] no items to output this cycle
I0322 03:12:39.468097  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:12:39.468104  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:12:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:12:43.410813  543705 memory.go:191] Add success.
I0322 03:12:43.409819  543705 cpu.go:282] Add success.
I0322 03:12:43.420512  543705 net.go:648] Add success.
I0322 03:12:43.423502  543705 net.go:770] primary dev: ETH0
I0322 03:12:43.423516  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:12:43.423528  543705 net.go:698] Add success.
I0322 03:12:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:12:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:12:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:12:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:12:53.409784  543705 memory.go:184] no items to output this cycle
I0322 03:12:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 03:13:03.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:13:03.409876  543705 memory.go:184] no items to output this cycle
I0322 03:13:03.409954  543705 cpu.go:275] no items to output this cycle
E0322 03:13:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:13:13.409776  543705 memory.go:191] Add success.
W0322 03:13:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:13:13.409808  543705 cpu.go:282] Add success.
W0322 03:13:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:13:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:13:13.420151  543705 net.go:648] Add success.
I0322 03:13:13.422776  543705 net.go:770] primary dev: ETH0
I0322 03:13:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:13:13.422801  543705 net.go:698] Add success.
I0322 03:13:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:13:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:13:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 03:13:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:13:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 03:13:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:13:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:13:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:13:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:13:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:13:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:13:23.409796  543705 memory.go:184] no items to output this cycle
I0322 03:13:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 03:13:32.721244  543705 disk_info.go:125] begin check local disk info of client
I0322 03:13:32.723755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:13:32.723761  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469140 0xc000469180]
E0322 03:13:33.407521  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:13:33.407535  543705 memory.go:184] no items to output this cycle
I0322 03:13:33.407548  543705 cpu.go:275] no items to output this cycle
E0322 03:13:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:13:43.409828  543705 memory.go:191] Add success.
I0322 03:13:43.409833  543705 cpu.go:282] Add success.
I0322 03:13:43.419891  543705 net.go:648] Add success.
I0322 03:13:43.422668  543705 net.go:770] primary dev: ETH0
I0322 03:13:43.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:13:43.422693  543705 net.go:698] Add success.
I0322 03:13:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:13:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:13:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:13:53.409805  543705 memory.go:184] no items to output this cycle
I0322 03:13:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 03:14:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:14:03.409792  543705 memory.go:184] no items to output this cycle
I0322 03:14:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 03:14:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:14:13.409812  543705 memory.go:191] Add success.
I0322 03:14:13.409825  543705 cpu.go:282] Add success.
W0322 03:14:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:14:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:14:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:14:13.420149  543705 net.go:648] Add success.
I0322 03:14:13.422931  543705 net.go:770] primary dev: ETH0
I0322 03:14:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:14:13.422960  543705 net.go:698] Add success.
I0322 03:14:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:14:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:14:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 03:14:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:14:14.456502  543705 disk_worker.go:494] system disk:vda1
I0322 03:14:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:14:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:14:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:14:23.409789  543705 memory.go:184] no items to output this cycle
I0322 03:14:23.409831  543705 cpu.go:275] no items to output this cycle
I0322 03:14:32.725267  543705 disk_info.go:125] begin check local disk info of client
I0322 03:14:32.727773  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:14:32.727780  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6a40 0xc0002a6a80]
E0322 03:14:33.407521  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:14:33.407527  543705 cpu.go:275] no items to output this cycle
I0322 03:14:33.407534  543705 memory.go:184] no items to output this cycle
E0322 03:14:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:14:43.409827  543705 memory.go:191] Add success.
I0322 03:14:43.409835  543705 cpu.go:282] Add success.
I0322 03:14:43.419982  543705 net.go:648] Add success.
I0322 03:14:43.422663  543705 net.go:770] primary dev: ETH0
I0322 03:14:43.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:14:43.422689  543705 net.go:698] Add success.
I0322 03:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:14:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:14:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:14:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:14:53.409814  543705 memory.go:184] no items to output this cycle
I0322 03:14:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 03:15:03.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:15:03.409906  543705 memory.go:184] no items to output this cycle
I0322 03:15:03.410041  543705 cpu.go:275] no items to output this cycle
E0322 03:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:15:13.409776  543705 memory.go:191] Add success.
W0322 03:15:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:15:13.409811  543705 cpu.go:282] Add success.
W0322 03:15:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:15:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:15:13.420124  543705 net.go:648] Add success.
I0322 03:15:13.422797  543705 net.go:770] primary dev: ETH0
I0322 03:15:13.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:15:13.422822  543705 net.go:698] Add success.
I0322 03:15:13.490469  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"765d7319-8ae4-4776-92a5-15b9a5dcbafd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:15:13.490502  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:15:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:15:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:15:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0322 03:15:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:15:14.456608  543705 disk_worker.go:494] system disk:vda1
I0322 03:15:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:15:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:15:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:15:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:15:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:15:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:15:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:15:23.409796  543705 memory.go:184] no items to output this cycle
I0322 03:15:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 03:15:32.729280  543705 disk_info.go:125] begin check local disk info of client
I0322 03:15:32.731786  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:15:32.731793  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d9140 0xc0003d9180]
E0322 03:15:33.407515  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:15:33.407528  543705 memory.go:184] no items to output this cycle
I0322 03:15:33.407536  543705 cpu.go:275] no items to output this cycle
I0322 03:15:39.469115  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:15:39.469123  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:15:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:15:43.410721  543705 memory.go:191] Add success.
I0322 03:15:43.409819  543705 cpu.go:282] Add success.
I0322 03:15:43.420415  543705 net.go:648] Add success.
I0322 03:15:43.422870  543705 net.go:770] primary dev: ETH0
I0322 03:15:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:15:43.422900  543705 net.go:698] Add success.
I0322 03:15:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:15:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:15:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:15:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:15:53.409812  543705 memory.go:184] no items to output this cycle
I0322 03:15:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 03:16:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:16:03.409807  543705 memory.go:184] no items to output this cycle
I0322 03:16:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 03:16:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:16:13.409783  543705 memory.go:191] Add success.
I0322 03:16:13.409798  543705 cpu.go:282] Add success.
W0322 03:16:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:16:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:16:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:16:13.419938  543705 net.go:770] primary dev: ETH0
I0322 03:16:13.419953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:16:13.419968  543705 net.go:698] Add success.
I0322 03:16:13.420345  543705 net.go:648] Add success.
I0322 03:16:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:16:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:16:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 03:16:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:16:14.456527  543705 disk_worker.go:494] system disk:vda1
I0322 03:16:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:16:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:16:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:16:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:16:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:16:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 03:16:23.409785  543705 memory.go:184] no items to output this cycle
I0322 03:16:32.733301  543705 disk_info.go:125] begin check local disk info of client
I0322 03:16:32.735840  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:16:32.735846  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003078c0 0xc000307900]
E0322 03:16:33.407509  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:16:33.407521  543705 memory.go:184] no items to output this cycle
I0322 03:16:33.407545  543705 cpu.go:275] no items to output this cycle
E0322 03:16:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:16:43.409817  543705 memory.go:191] Add success.
I0322 03:16:43.409821  543705 cpu.go:282] Add success.
I0322 03:16:43.419969  543705 net.go:648] Add success.
I0322 03:16:43.422546  543705 net.go:770] primary dev: ETH0
I0322 03:16:43.422561  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:16:43.422577  543705 net.go:698] Add success.
I0322 03:16:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:16:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:16:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:16:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:16:53.409779  543705 memory.go:184] no items to output this cycle
I0322 03:16:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 03:17:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:17:03.409776  543705 memory.go:184] no items to output this cycle
I0322 03:17:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 03:17:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:17:13.409813  543705 memory.go:191] Add success.
I0322 03:17:13.409818  543705 cpu.go:282] Add success.
W0322 03:17:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:17:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:17:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:17:13.420132  543705 net.go:648] Add success.
I0322 03:17:13.422838  543705 net.go:770] primary dev: ETH0
I0322 03:17:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:17:13.422864  543705 net.go:698] Add success.
I0322 03:17:13.453414  543705 event_worker.go:152] Polling the log file for events...
W0322 03:17:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:17:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 03:17:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:17:14.456932  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:17:14.456941  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:17:14.456947  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:17:14.456995  543705 disk_worker.go:494] system disk:vda1
I0322 03:17:14.457036  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:17:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:17:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:17:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:17:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:17:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:17:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:17:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:17:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:17:23.409764  543705 memory.go:184] no items to output this cycle
I0322 03:17:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 03:17:32.737322  543705 disk_info.go:125] begin check local disk info of client
I0322 03:17:32.739794  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:17:32.739802  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328fc0 0xc000329000]
E0322 03:17:33.407502  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:17:33.407516  543705 memory.go:184] no items to output this cycle
I0322 03:17:33.407541  543705 cpu.go:275] no items to output this cycle
E0322 03:17:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:17:43.409788  543705 memory.go:191] Add success.
I0322 03:17:43.409814  543705 cpu.go:282] Add success.
I0322 03:17:43.419868  543705 net.go:648] Add success.
I0322 03:17:43.422584  543705 net.go:770] primary dev: ETH0
I0322 03:17:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:17:43.422609  543705 net.go:698] Add success.
I0322 03:17:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:17:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:17:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:17:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:17:53.409774  543705 memory.go:184] no items to output this cycle
I0322 03:17:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 03:18:03.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:18:03.409900  543705 memory.go:184] no items to output this cycle
I0322 03:18:03.409908  543705 cpu.go:275] no items to output this cycle
E0322 03:18:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:18:13.409809  543705 memory.go:191] Add success.
I0322 03:18:13.409817  543705 cpu.go:282] Add success.
W0322 03:18:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:18:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:18:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:18:13.420177  543705 net.go:648] Add success.
I0322 03:18:13.422532  543705 net.go:770] primary dev: ETH0
I0322 03:18:13.422545  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:18:13.422558  543705 net.go:698] Add success.
I0322 03:18:13.613797  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f7db8e25-4fd5-4625-baf2-421e39cc9d5a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:18:13.613830  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:18:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:18:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:18:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 03:18:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:18:14.456735  543705 disk_worker.go:494] system disk:vda1
I0322 03:18:14.456765  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:18:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:18:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:18:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:18:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:18:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:18:23.409763  543705 memory.go:184] no items to output this cycle
I0322 03:18:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 03:18:32.741344  543705 disk_info.go:125] begin check local disk info of client
I0322 03:18:32.743881  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:18:32.743887  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6c80 0xc0002b6cc0]
E0322 03:18:33.409296  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:18:33.409311  543705 memory.go:184] no items to output this cycle
I0322 03:18:33.409326  543705 cpu.go:275] no items to output this cycle
I0322 03:18:39.470096  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:18:39.470103  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:18:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:18:43.410652  543705 memory.go:191] Add success.
I0322 03:18:43.409802  543705 cpu.go:282] Add success.
I0322 03:18:43.420355  543705 net.go:648] Add success.
I0322 03:18:43.422982  543705 net.go:770] primary dev: ETH0
I0322 03:18:43.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:18:43.423009  543705 net.go:698] Add success.
I0322 03:18:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:18:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:18:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:18:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:18:53.409804  543705 memory.go:184] no items to output this cycle
I0322 03:18:53.409816  543705 cpu.go:275] no items to output this cycle
I0322 03:19:03.409884  543705 cpu.go:275] no items to output this cycle
E0322 03:19:03.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:19:03.409902  543705 memory.go:184] no items to output this cycle
E0322 03:19:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:19:13.409810  543705 memory.go:191] Add success.
I0322 03:19:13.409820  543705 cpu.go:282] Add success.
W0322 03:19:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:19:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:19:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:19:13.420228  543705 net.go:648] Add success.
I0322 03:19:13.423306  543705 net.go:770] primary dev: ETH0
I0322 03:19:13.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:19:13.423332  543705 net.go:698] Add success.
I0322 03:19:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:19:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:19:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 03:19:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:19:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 03:19:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:19:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:19:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:19:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:19:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:19:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:19:23.409773  543705 cpu.go:275] no items to output this cycle
I0322 03:19:23.409778  543705 memory.go:184] no items to output this cycle
I0322 03:19:32.745369  543705 disk_info.go:125] begin check local disk info of client
I0322 03:19:32.747882  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:19:32.747889  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253200 0xc000253240]
E0322 03:19:33.409277  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:19:33.409294  543705 memory.go:184] no items to output this cycle
I0322 03:19:33.409311  543705 cpu.go:275] no items to output this cycle
E0322 03:19:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:19:43.409817  543705 memory.go:191] Add success.
I0322 03:19:43.409826  543705 cpu.go:282] Add success.
I0322 03:19:43.419958  543705 net.go:648] Add success.
I0322 03:19:43.422663  543705 net.go:770] primary dev: ETH0
I0322 03:19:43.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:19:43.422691  543705 net.go:698] Add success.
I0322 03:19:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:19:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:19:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:19:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:19:53.409799  543705 memory.go:184] no items to output this cycle
I0322 03:19:53.409809  543705 cpu.go:275] no items to output this cycle
I0322 03:20:03.409885  543705 cpu.go:275] no items to output this cycle
E0322 03:20:03.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:20:03.409901  543705 memory.go:184] no items to output this cycle
E0322 03:20:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:20:13.409799  543705 cpu.go:282] Add success.
I0322 03:20:13.409805  543705 memory.go:191] Add success.
W0322 03:20:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:20:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:20:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:20:13.420056  543705 net.go:648] Add success.
I0322 03:20:13.422740  543705 net.go:770] primary dev: ETH0
I0322 03:20:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:20:13.422768  543705 net.go:698] Add success.
I0322 03:20:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:20:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:20:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 03:20:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:20:14.456479  543705 disk_worker.go:494] system disk:vda1
I0322 03:20:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:20:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:20:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:20:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:20:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:20:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:20:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:20:23.409773  543705 memory.go:184] no items to output this cycle
I0322 03:20:23.409774  543705 cpu.go:275] no items to output this cycle
I0322 03:20:32.749382  543705 disk_info.go:125] begin check local disk info of client
I0322 03:20:32.751865  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:20:32.751871  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265000 0xc000265040]
E0322 03:20:33.409215  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:20:33.409228  543705 memory.go:184] no items to output this cycle
I0322 03:20:33.409242  543705 cpu.go:275] no items to output this cycle
E0322 03:20:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:20:43.409792  543705 memory.go:191] Add success.
I0322 03:20:43.409796  543705 cpu.go:282] Add success.
I0322 03:20:43.419960  543705 net.go:648] Add success.
I0322 03:20:43.422564  543705 net.go:770] primary dev: ETH0
I0322 03:20:43.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:20:43.422589  543705 net.go:698] Add success.
I0322 03:20:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:20:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:20:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:20:53.409781  543705 memory.go:184] no items to output this cycle
I0322 03:20:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 03:21:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:21:03.409793  543705 memory.go:184] no items to output this cycle
I0322 03:21:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 03:21:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:21:13.409781  543705 memory.go:191] Add success.
W0322 03:21:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:21:13.409810  543705 cpu.go:282] Add success.
W0322 03:21:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:21:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:21:13.420050  543705 net.go:648] Add success.
I0322 03:21:13.422613  543705 net.go:770] primary dev: ETH0
I0322 03:21:13.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:21:13.422638  543705 net.go:698] Add success.
I0322 03:21:14.265887  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cc906a4a-9ac5-4bb1-86ee-7f12665a4267","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:21:14.265927  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:21:14.454727  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:21:14.454962  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:21:14.454972  543705 disk_worker.go:708] disk space is not compliant
W0322 03:21:14.454974  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:21:14.456479  543705 disk_worker.go:494] system disk:vda1
I0322 03:21:14.456514  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:21:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:21:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:21:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:21:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:21:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:21:23.409779  543705 memory.go:184] no items to output this cycle
I0322 03:21:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 03:21:32.753395  543705 disk_info.go:125] begin check local disk info of client
I0322 03:21:32.755902  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:21:32.755908  543705 disk_info.go:196] parse disk info done, disk is : [0xc000258180 0xc0002581c0]
E0322 03:21:33.407508  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:21:33.407522  543705 memory.go:184] no items to output this cycle
I0322 03:21:33.407552  543705 cpu.go:275] no items to output this cycle
I0322 03:21:39.471103  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:21:39.471110  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:21:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:21:43.410720  543705 memory.go:191] Add success.
I0322 03:21:43.409824  543705 cpu.go:282] Add success.
I0322 03:21:43.420432  543705 net.go:648] Add success.
I0322 03:21:43.423256  543705 net.go:770] primary dev: ETH0
I0322 03:21:43.423268  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:21:43.423281  543705 net.go:698] Add success.
I0322 03:21:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:21:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:21:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:21:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:21:53.409773  543705 memory.go:184] no items to output this cycle
I0322 03:21:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 03:22:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:22:03.409784  543705 memory.go:184] no items to output this cycle
I0322 03:22:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 03:22:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:22:13.409783  543705 memory.go:191] Add success.
W0322 03:22:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:22:13.409810  543705 cpu.go:282] Add success.
W0322 03:22:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:22:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:22:13.420071  543705 net.go:648] Add success.
I0322 03:22:13.422735  543705 net.go:770] primary dev: ETH0
I0322 03:22:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:22:13.422763  543705 net.go:698] Add success.
W0322 03:22:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:22:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 03:22:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:22:14.456889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:22:14.456898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:22:14.456905  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:22:14.456981  543705 disk_worker.go:494] system disk:vda1
I0322 03:22:14.457023  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:22:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:22:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:22:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:22:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:22:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:22:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:22:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:22:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:22:23.409793  543705 memory.go:184] no items to output this cycle
I0322 03:22:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 03:22:32.757425  543705 disk_info.go:125] begin check local disk info of client
I0322 03:22:32.759936  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:22:32.759942  543705 disk_info.go:196] parse disk info done, disk is : [0xc000251040 0xc000251080]
E0322 03:22:33.409265  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:22:33.409283  543705 memory.go:184] no items to output this cycle
I0322 03:22:33.409299  543705 cpu.go:275] no items to output this cycle
E0322 03:22:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:22:43.409819  543705 memory.go:191] Add success.
I0322 03:22:43.409828  543705 cpu.go:282] Add success.
I0322 03:22:43.419982  543705 net.go:648] Add success.
I0322 03:22:43.422675  543705 net.go:770] primary dev: ETH0
I0322 03:22:43.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:22:43.422700  543705 net.go:698] Add success.
I0322 03:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:22:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:22:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:22:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:22:53.409804  543705 memory.go:184] no items to output this cycle
I0322 03:22:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 03:23:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:23:03.409774  543705 cpu.go:275] no items to output this cycle
I0322 03:23:03.409778  543705 memory.go:184] no items to output this cycle
E0322 03:23:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:23:13.409794  543705 memory.go:191] Add success.
I0322 03:23:13.409798  543705 cpu.go:282] Add success.
W0322 03:23:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:23:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:23:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:23:13.420062  543705 net.go:648] Add success.
I0322 03:23:13.422825  543705 net.go:770] primary dev: ETH0
I0322 03:23:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:23:13.422848  543705 net.go:698] Add success.
I0322 03:23:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:23:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:23:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 03:23:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:23:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 03:23:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:23:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:23:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:23:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:23:23.409799  543705 memory.go:184] no items to output this cycle
I0322 03:23:23.409808  543705 cpu.go:275] no items to output this cycle
I0322 03:23:32.761436  543705 disk_info.go:125] begin check local disk info of client
I0322 03:23:32.763990  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:23:32.763997  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344040 0xc000344080]
E0322 03:23:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:23:33.407521  543705 memory.go:184] no items to output this cycle
I0322 03:23:33.407537  543705 cpu.go:275] no items to output this cycle
E0322 03:23:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:23:43.409828  543705 cpu.go:282] Add success.
I0322 03:23:43.409839  543705 memory.go:191] Add success.
I0322 03:23:43.420002  543705 net.go:648] Add success.
I0322 03:23:43.422844  543705 net.go:770] primary dev: ETH0
I0322 03:23:43.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:23:43.422900  543705 net.go:698] Add success.
I0322 03:23:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:23:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:23:53.409771  543705 memory.go:184] no items to output this cycle
I0322 03:23:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 03:24:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:24:03.409773  543705 memory.go:184] no items to output this cycle
I0322 03:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 03:24:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:24:13.409790  543705 memory.go:191] Add success.
I0322 03:24:13.409793  543705 cpu.go:282] Add success.
W0322 03:24:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:24:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:24:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:24:13.420071  543705 net.go:648] Add success.
I0322 03:24:13.422749  543705 net.go:770] primary dev: ETH0
I0322 03:24:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:24:13.422777  543705 net.go:698] Add success.
I0322 03:24:13.468372  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"209390eb-44bc-46ef-8266-4bca9bc94344","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:24:13.468405  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:24:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:24:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:24:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 03:24:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:24:14.456598  543705 disk_worker.go:494] system disk:vda1
I0322 03:24:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:24:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:24:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:24:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:24:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:24:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:24:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:24:23.409794  543705 memory.go:184] no items to output this cycle
I0322 03:24:23.409807  543705 cpu.go:275] no items to output this cycle
I0322 03:24:32.765461  543705 disk_info.go:125] begin check local disk info of client
I0322 03:24:32.767984  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:24:32.767992  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c000 0xc00035c040]
E0322 03:24:33.409282  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:24:33.409300  543705 memory.go:184] no items to output this cycle
I0322 03:24:33.409316  543705 cpu.go:275] no items to output this cycle
I0322 03:24:39.472111  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:24:39.472118  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:24:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:24:43.410687  543705 memory.go:191] Add success.
I0322 03:24:43.409804  543705 cpu.go:282] Add success.
I0322 03:24:43.420389  543705 net.go:648] Add success.
I0322 03:24:43.423509  543705 net.go:770] primary dev: ETH0
I0322 03:24:43.423526  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:24:43.423555  543705 net.go:698] Add success.
I0322 03:24:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:24:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:24:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:24:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:24:53.409811  543705 memory.go:184] no items to output this cycle
I0322 03:24:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 03:25:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:25:03.409800  543705 memory.go:184] no items to output this cycle
I0322 03:25:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 03:25:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:25:13.409786  543705 memory.go:191] Add success.
I0322 03:25:13.409788  543705 cpu.go:282] Add success.
W0322 03:25:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:25:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:25:13.419947  543705 net.go:770] primary dev: ETH0
I0322 03:25:13.419960  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:25:13.419973  543705 net.go:698] Add success.
I0322 03:25:13.420209  543705 net.go:648] Add success.
I0322 03:25:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:25:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:25:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 03:25:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:25:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 03:25:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:25:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:25:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:25:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:25:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:25:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:25:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:25:23.409765  543705 memory.go:184] no items to output this cycle
I0322 03:25:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 03:25:32.769474  543705 disk_info.go:125] begin check local disk info of client
I0322 03:25:32.771979  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:25:32.771986  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a040 0xc00046a080]
E0322 03:25:33.409258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:25:33.409274  543705 memory.go:184] no items to output this cycle
I0322 03:25:33.409293  543705 cpu.go:275] no items to output this cycle
E0322 03:25:43.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:25:43.409933  543705 memory.go:191] Add success.
I0322 03:25:43.409969  543705 cpu.go:282] Add success.
I0322 03:25:43.419733  543705 net.go:648] Add success.
I0322 03:25:43.422353  543705 net.go:770] primary dev: ETH0
I0322 03:25:43.422365  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:25:43.422377  543705 net.go:698] Add success.
I0322 03:25:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:25:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:25:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:25:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:25:53.409774  543705 memory.go:184] no items to output this cycle
I0322 03:25:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 03:26:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:26:03.409782  543705 memory.go:184] no items to output this cycle
I0322 03:26:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 03:26:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:26:13.409776  543705 memory.go:191] Add success.
W0322 03:26:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:26:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:26:13.409813  543705 cpu.go:282] Add success.
I0322 03:26:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:26:13.420064  543705 net.go:648] Add success.
I0322 03:26:13.422724  543705 net.go:770] primary dev: ETH0
I0322 03:26:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:26:13.422754  543705 net.go:698] Add success.
I0322 03:26:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:26:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:26:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 03:26:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:26:14.456799  543705 disk_worker.go:494] system disk:vda1
I0322 03:26:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:26:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:26:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:26:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:26:23.410616  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:26:23.410630  543705 memory.go:184] no items to output this cycle
I0322 03:26:23.410652  543705 cpu.go:275] no items to output this cycle
I0322 03:26:32.773508  543705 disk_info.go:125] begin check local disk info of client
I0322 03:26:32.776065  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:26:32.776072  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c40 0xc0000c5c80]
E0322 03:26:33.409293  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:26:33.409413  543705 memory.go:184] no items to output this cycle
I0322 03:26:33.409456  543705 cpu.go:275] no items to output this cycle
E0322 03:26:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:26:43.409810  543705 memory.go:191] Add success.
I0322 03:26:43.409822  543705 cpu.go:282] Add success.
I0322 03:26:43.419996  543705 net.go:648] Add success.
I0322 03:26:43.422598  543705 net.go:770] primary dev: ETH0
I0322 03:26:43.422612  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:26:43.422625  543705 net.go:698] Add success.
I0322 03:26:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:26:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:26:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:26:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:26:53.409776  543705 memory.go:184] no items to output this cycle
I0322 03:26:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 03:27:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:27:03.409802  543705 memory.go:184] no items to output this cycle
I0322 03:27:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 03:27:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:27:13.409771  543705 memory.go:191] Add success.
W0322 03:27:13.409796  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:27:13.409802  543705 cpu.go:282] Add success.
W0322 03:27:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:27:13.409810  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:27:13.420214  543705 net.go:648] Add success.
I0322 03:27:13.428776  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 03:27:13.428853  543705 net.go:770] primary dev: ETH0
I0322 03:27:13.428866  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:27:13.428877  543705 net.go:698] Add success.
I0322 03:27:13.453420  543705 event_worker.go:152] Polling the log file for events...
I0322 03:27:13.464370  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5d99b04e-6e52-442c-8488-e880c28de96d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:27:13.464402  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 03:27:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:27:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 03:27:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:27:14.456784  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:27:14.456793  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:27:14.456799  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:27:14.456834  543705 disk_worker.go:494] system disk:vda1
I0322 03:27:14.456863  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:27:15.456494  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:27:15.456503  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:27:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:27:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:27:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:27:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:27:16.472314  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:27:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:27:23.409902  543705 memory.go:184] no items to output this cycle
I0322 03:27:23.409902  543705 cpu.go:275] no items to output this cycle
I0322 03:27:32.777516  543705 disk_info.go:125] begin check local disk info of client
I0322 03:27:32.780041  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:27:32.780048  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004737c0 0xc000473800]
E0322 03:27:33.409240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:27:33.409252  543705 memory.go:184] no items to output this cycle
I0322 03:27:33.409296  543705 cpu.go:275] no items to output this cycle
I0322 03:27:39.473132  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:27:39.473140  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:27:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:27:43.410698  543705 memory.go:191] Add success.
I0322 03:27:43.409832  543705 cpu.go:282] Add success.
I0322 03:27:43.420398  543705 net.go:648] Add success.
I0322 03:27:43.423011  543705 net.go:770] primary dev: ETH0
I0322 03:27:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:27:43.423037  543705 net.go:698] Add success.
I0322 03:27:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:27:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:27:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:27:53.409781  543705 memory.go:184] no items to output this cycle
I0322 03:27:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 03:28:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:28:03.409805  543705 memory.go:184] no items to output this cycle
I0322 03:28:03.409810  543705 cpu.go:275] no items to output this cycle
W0322 03:28:13.409703  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:28:13.409719  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:28:13.409723  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 03:28:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:28:13.409813  543705 memory.go:191] Add success.
I0322 03:28:13.409824  543705 cpu.go:282] Add success.
I0322 03:28:13.420066  543705 net.go:648] Add success.
I0322 03:28:13.422653  543705 net.go:770] primary dev: ETH0
I0322 03:28:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:28:13.422684  543705 net.go:698] Add success.
I0322 03:28:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:28:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:28:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 03:28:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:28:14.456574  543705 disk_worker.go:494] system disk:vda1
I0322 03:28:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:28:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:28:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:28:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:28:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:28:23.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:28:23.409866  543705 memory.go:184] no items to output this cycle
I0322 03:28:23.409904  543705 cpu.go:275] no items to output this cycle
I0322 03:28:32.781546  543705 disk_info.go:125] begin check local disk info of client
I0322 03:28:32.784106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:28:32.784113  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c40 0xc0000c4c80]
E0322 03:28:33.409306  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:28:33.409314  543705 cpu.go:275] no items to output this cycle
I0322 03:28:33.409320  543705 memory.go:184] no items to output this cycle
E0322 03:28:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:28:43.409795  543705 memory.go:191] Add success.
I0322 03:28:43.409817  543705 cpu.go:282] Add success.
I0322 03:28:43.419987  543705 net.go:648] Add success.
I0322 03:28:43.422725  543705 net.go:770] primary dev: ETH0
I0322 03:28:43.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:28:43.422755  543705 net.go:698] Add success.
I0322 03:28:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:28:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:28:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:28:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:28:53.409773  543705 memory.go:184] no items to output this cycle
I0322 03:28:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 03:29:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:29:03.409783  543705 memory.go:184] no items to output this cycle
I0322 03:29:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 03:29:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:29:13.409792  543705 cpu.go:282] Add success.
I0322 03:29:13.409803  543705 memory.go:191] Add success.
W0322 03:29:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:29:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:29:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:29:13.420054  543705 net.go:648] Add success.
I0322 03:29:13.422783  543705 net.go:770] primary dev: ETH0
I0322 03:29:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:29:13.422813  543705 net.go:698] Add success.
I0322 03:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:29:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:29:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 03:29:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:29:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 03:29:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:29:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:29:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:29:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:29:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:29:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:29:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:29:23.409775  543705 memory.go:184] no items to output this cycle
I0322 03:29:23.409777  543705 cpu.go:275] no items to output this cycle
I0322 03:29:32.785563  543705 disk_info.go:125] begin check local disk info of client
I0322 03:29:32.788071  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:29:32.788078  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9f00 0xc0003b9f40]
E0322 03:29:33.407518  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:29:33.407536  543705 memory.go:184] no items to output this cycle
I0322 03:29:33.407549  543705 cpu.go:275] no items to output this cycle
E0322 03:29:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:29:43.409814  543705 cpu.go:282] Add success.
I0322 03:29:43.409821  543705 memory.go:191] Add success.
I0322 03:29:43.419969  543705 net.go:648] Add success.
I0322 03:29:43.422892  543705 net.go:770] primary dev: ETH0
I0322 03:29:43.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:29:43.422917  543705 net.go:698] Add success.
I0322 03:29:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:29:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:29:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:29:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:29:53.409777  543705 memory.go:184] no items to output this cycle
I0322 03:29:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 03:30:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:30:03.409766  543705 memory.go:184] no items to output this cycle
I0322 03:30:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 03:30:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:30:13.409813  543705 memory.go:191] Add success.
I0322 03:30:13.409819  543705 cpu.go:282] Add success.
W0322 03:30:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:30:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:30:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:30:13.420053  543705 net.go:648] Add success.
I0322 03:30:13.422609  543705 net.go:770] primary dev: ETH0
I0322 03:30:13.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:30:13.422638  543705 net.go:698] Add success.
I0322 03:30:13.468897  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9bee9fa6-3688-4b06-9c1a-1a55e8bcf80e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:30:13.468930  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:30:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:30:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:30:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 03:30:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:30:14.456680  543705 disk_worker.go:494] system disk:vda1
I0322 03:30:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:30:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:30:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:30:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:30:16.472507  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:30:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:30:23.409761  543705 memory.go:184] no items to output this cycle
I0322 03:30:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 03:30:32.789579  543705 disk_info.go:125] begin check local disk info of client
I0322 03:30:32.792101  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:30:32.792108  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 03:30:33.409268  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:30:33.409286  543705 memory.go:184] no items to output this cycle
I0322 03:30:33.409300  543705 cpu.go:275] no items to output this cycle
I0322 03:30:39.474128  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:30:39.474135  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:30:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:30:43.410953  543705 memory.go:191] Add success.
I0322 03:30:43.409834  543705 cpu.go:282] Add success.
I0322 03:30:43.419700  543705 net.go:648] Add success.
I0322 03:30:43.422374  543705 net.go:770] primary dev: ETH0
I0322 03:30:43.422389  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:30:43.422404  543705 net.go:698] Add success.
I0322 03:30:46.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:30:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:30:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:30:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:30:53.409781  543705 memory.go:184] no items to output this cycle
I0322 03:30:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 03:31:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:31:03.409770  543705 memory.go:184] no items to output this cycle
I0322 03:31:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 03:31:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:31:13.409812  543705 memory.go:191] Add success.
I0322 03:31:13.409820  543705 cpu.go:282] Add success.
W0322 03:31:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:31:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:31:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:31:13.420185  543705 net.go:648] Add success.
I0322 03:31:13.423131  543705 net.go:770] primary dev: ETH0
I0322 03:31:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:31:13.423163  543705 net.go:698] Add success.
I0322 03:31:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:31:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:31:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 03:31:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:31:14.456472  543705 disk_worker.go:494] system disk:vda1
I0322 03:31:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:31:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:31:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:31:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:31:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:31:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:31:23.409792  543705 memory.go:184] no items to output this cycle
I0322 03:31:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 03:31:32.793600  543705 disk_info.go:125] begin check local disk info of client
I0322 03:31:32.796152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:31:32.796162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f680 0xc00039f6c0]
E0322 03:31:33.407512  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:31:33.407525  543705 memory.go:184] no items to output this cycle
I0322 03:31:33.407531  543705 cpu.go:275] no items to output this cycle
E0322 03:31:43.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:31:43.409931  543705 memory.go:191] Add success.
I0322 03:31:43.409934  543705 cpu.go:282] Add success.
I0322 03:31:43.419719  543705 net.go:648] Add success.
I0322 03:31:43.422289  543705 net.go:770] primary dev: ETH0
I0322 03:31:43.422304  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:31:43.422317  543705 net.go:698] Add success.
I0322 03:31:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:31:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:31:46.458106  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:31:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:31:53.409807  543705 memory.go:184] no items to output this cycle
I0322 03:31:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 03:32:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:32:03.409771  543705 memory.go:184] no items to output this cycle
I0322 03:32:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 03:32:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:32:13.409781  543705 memory.go:191] Add success.
W0322 03:32:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:32:13.409808  543705 cpu.go:282] Add success.
W0322 03:32:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:32:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:32:13.420239  543705 net.go:648] Add success.
I0322 03:32:13.423049  543705 net.go:770] primary dev: ETH0
I0322 03:32:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:32:13.423074  543705 net.go:698] Add success.
W0322 03:32:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:32:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 03:32:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:32:14.456786  543705 disk_worker.go:494] system disk:vda1
I0322 03:32:14.456824  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:32:14.457155  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:32:14.457163  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:32:14.457168  543705 custom_config.go:64] query custom config with name: gpu
E0322 03:32:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:32:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:32:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:32:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:32:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:32:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:32:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:32:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:32:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 03:32:23.409795  543705 memory.go:184] no items to output this cycle
I0322 03:32:32.797623  543705 disk_info.go:125] begin check local disk info of client
I0322 03:32:32.800143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:32:32.800151  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e180 0xc00039e1c0]
E0322 03:32:33.407513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:32:33.407528  543705 memory.go:184] no items to output this cycle
I0322 03:32:33.407540  543705 cpu.go:275] no items to output this cycle
E0322 03:32:43.409937  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:32:43.409981  543705 memory.go:191] Add success.
I0322 03:32:43.409939  543705 cpu.go:282] Add success.
I0322 03:32:43.419715  543705 net.go:648] Add success.
I0322 03:32:43.422450  543705 net.go:770] primary dev: ETH0
I0322 03:32:43.422462  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:32:43.422474  543705 net.go:698] Add success.
I0322 03:32:46.458015  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:32:46.458107  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:32:46.458143  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:32:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:32:53.409773  543705 memory.go:184] no items to output this cycle
I0322 03:32:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 03:33:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:33:03.409800  543705 memory.go:184] no items to output this cycle
I0322 03:33:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 03:33:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:33:13.409814  543705 memory.go:191] Add success.
I0322 03:33:13.409820  543705 cpu.go:282] Add success.
W0322 03:33:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:33:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:33:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:33:13.420089  543705 net.go:648] Add success.
I0322 03:33:13.423238  543705 net.go:770] primary dev: ETH0
I0322 03:33:13.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:33:13.423263  543705 net.go:698] Add success.
I0322 03:33:13.467768  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"067e085b-2183-4b0c-a621-464581444725","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:33:13.467801  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:33:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:33:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:33:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 03:33:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:33:14.456691  543705 disk_worker.go:494] system disk:vda1
I0322 03:33:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:33:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:33:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:33:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:33:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:33:23.409780  543705 memory.go:184] no items to output this cycle
I0322 03:33:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 03:33:32.801672  543705 disk_info.go:125] begin check local disk info of client
I0322 03:33:32.804143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:33:32.804149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003497c0 0xc000349800]
E0322 03:33:33.409261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:33:33.409386  543705 cpu.go:275] no items to output this cycle
I0322 03:33:33.409391  543705 memory.go:184] no items to output this cycle
I0322 03:33:39.475152  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:33:39.475160  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:33:43.410597  543705 memory.go:191] Add success.
I0322 03:33:43.409806  543705 cpu.go:282] Add success.
I0322 03:33:43.420307  543705 net.go:648] Add success.
I0322 03:33:43.422938  543705 net.go:770] primary dev: ETH0
I0322 03:33:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:33:43.422968  543705 net.go:698] Add success.
I0322 03:33:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:33:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:33:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:33:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:33:53.409783  543705 memory.go:184] no items to output this cycle
I0322 03:33:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 03:34:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:34:03.409803  543705 memory.go:184] no items to output this cycle
I0322 03:34:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 03:34:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:34:13.409810  543705 memory.go:191] Add success.
I0322 03:34:13.409818  543705 cpu.go:282] Add success.
W0322 03:34:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:34:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:34:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:34:13.420049  543705 net.go:648] Add success.
I0322 03:34:13.422579  543705 net.go:770] primary dev: ETH0
I0322 03:34:13.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:34:13.422605  543705 net.go:698] Add success.
I0322 03:34:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:34:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:34:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 03:34:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:34:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 03:34:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:34:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:34:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:34:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:34:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:34:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:34:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:34:23.409798  543705 memory.go:184] no items to output this cycle
I0322 03:34:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 03:34:32.805666  543705 disk_info.go:125] begin check local disk info of client
I0322 03:34:32.808251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:34:32.808258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e5740 0xc0003e5780]
E0322 03:34:33.409330  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:34:33.409432  543705 cpu.go:275] no items to output this cycle
I0322 03:34:33.409457  543705 memory.go:184] no items to output this cycle
E0322 03:34:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:34:43.409794  543705 memory.go:191] Add success.
I0322 03:34:43.409809  543705 cpu.go:282] Add success.
I0322 03:34:43.419971  543705 net.go:648] Add success.
I0322 03:34:43.422879  543705 net.go:770] primary dev: ETH0
I0322 03:34:43.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:34:43.422904  543705 net.go:698] Add success.
I0322 03:34:46.458025  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:34:46.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:34:46.458142  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:34:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:34:53.409806  543705 memory.go:184] no items to output this cycle
I0322 03:34:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 03:35:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:35:03.409788  543705 memory.go:184] no items to output this cycle
I0322 03:35:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 03:35:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:35:13.409810  543705 memory.go:191] Add success.
I0322 03:35:13.409818  543705 cpu.go:282] Add success.
W0322 03:35:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:35:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:35:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:35:13.420061  543705 net.go:648] Add success.
I0322 03:35:13.422460  543705 net.go:770] primary dev: ETH0
I0322 03:35:13.422473  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:35:13.422484  543705 net.go:698] Add success.
I0322 03:35:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:35:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:35:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 03:35:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:35:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 03:35:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:35:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:35:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:35:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:35:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:35:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:35:23.409772  543705 memory.go:184] no items to output this cycle
I0322 03:35:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 03:35:32.809676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:35:32.812151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:35:32.812159  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049f680 0xc00049f6c0]
E0322 03:35:33.409225  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:35:33.409239  543705 memory.go:184] no items to output this cycle
I0322 03:35:33.409254  543705 cpu.go:275] no items to output this cycle
E0322 03:35:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:35:43.409812  543705 memory.go:191] Add success.
I0322 03:35:43.409814  543705 cpu.go:282] Add success.
I0322 03:35:43.420021  543705 net.go:648] Add success.
I0322 03:35:43.422615  543705 net.go:770] primary dev: ETH0
I0322 03:35:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:35:43.422645  543705 net.go:698] Add success.
I0322 03:35:46.458011  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:35:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:35:46.458114  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:35:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:35:53.409813  543705 memory.go:184] no items to output this cycle
I0322 03:35:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 03:36:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:36:03.409777  543705 memory.go:184] no items to output this cycle
I0322 03:36:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 03:36:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:36:13.409782  543705 memory.go:191] Add success.
W0322 03:36:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:36:13.409815  543705 cpu.go:282] Add success.
W0322 03:36:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:36:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:36:13.420046  543705 net.go:648] Add success.
I0322 03:36:13.422459  543705 net.go:770] primary dev: ETH0
I0322 03:36:13.422473  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:36:13.422485  543705 net.go:698] Add success.
I0322 03:36:13.467413  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9959d401-0fbd-4d7e-8dce-4e1827e5e94d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:36:13.467446  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:36:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:36:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:36:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 03:36:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:36:14.456530  543705 disk_worker.go:494] system disk:vda1
I0322 03:36:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:36:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:36:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:36:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:36:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:36:23.409777  543705 memory.go:184] no items to output this cycle
I0322 03:36:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 03:36:32.813674  543705 disk_info.go:125] begin check local disk info of client
I0322 03:36:32.816155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:36:32.816162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a380 0xc00034a3c0]
E0322 03:36:33.407516  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:36:33.407529  543705 memory.go:184] no items to output this cycle
I0322 03:36:33.407552  543705 cpu.go:275] no items to output this cycle
I0322 03:36:39.476140  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:36:39.476147  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:36:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:36:43.410579  543705 memory.go:191] Add success.
I0322 03:36:43.409819  543705 cpu.go:282] Add success.
I0322 03:36:43.420284  543705 net.go:648] Add success.
I0322 03:36:43.423061  543705 net.go:770] primary dev: ETH0
I0322 03:36:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:36:43.423091  543705 net.go:698] Add success.
I0322 03:36:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:36:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:36:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:36:53.409786  543705 memory.go:184] no items to output this cycle
I0322 03:36:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 03:37:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:37:03.409796  543705 memory.go:184] no items to output this cycle
I0322 03:37:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 03:37:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:37:13.409776  543705 memory.go:191] Add success.
W0322 03:37:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:37:13.409807  543705 cpu.go:282] Add success.
W0322 03:37:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:37:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:37:13.420171  543705 net.go:648] Add success.
I0322 03:37:13.422745  543705 net.go:770] primary dev: ETH0
I0322 03:37:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:37:13.422770  543705 net.go:698] Add success.
I0322 03:37:13.453306  543705 event_worker.go:152] Polling the log file for events...
W0322 03:37:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:37:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 03:37:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:37:14.456885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:37:14.456894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:37:14.456900  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:37:14.456972  543705 disk_worker.go:494] system disk:vda1
I0322 03:37:14.457015  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:37:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:37:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:37:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:37:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:37:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:37:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:37:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:37:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:37:23.409801  543705 memory.go:184] no items to output this cycle
I0322 03:37:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 03:37:32.817677  543705 disk_info.go:125] begin check local disk info of client
I0322 03:37:32.820113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:37:32.820119  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d200 0xc00034d240]
E0322 03:37:33.407523  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:37:33.407639  543705 cpu.go:275] no items to output this cycle
I0322 03:37:33.407644  543705 memory.go:184] no items to output this cycle
E0322 03:37:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:37:43.409831  543705 memory.go:191] Add success.
I0322 03:37:43.409841  543705 cpu.go:282] Add success.
I0322 03:37:43.419992  543705 net.go:648] Add success.
I0322 03:37:43.422566  543705 net.go:770] primary dev: ETH0
I0322 03:37:43.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:37:43.422592  543705 net.go:698] Add success.
I0322 03:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:37:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:37:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:37:53.409788  543705 memory.go:184] no items to output this cycle
I0322 03:37:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 03:38:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:38:03.409764  543705 memory.go:184] no items to output this cycle
I0322 03:38:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 03:38:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:38:13.409791  543705 memory.go:191] Add success.
I0322 03:38:13.409791  543705 cpu.go:282] Add success.
W0322 03:38:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:38:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:38:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:38:13.420063  543705 net.go:648] Add success.
I0322 03:38:13.422803  543705 net.go:770] primary dev: ETH0
I0322 03:38:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:38:13.422832  543705 net.go:698] Add success.
I0322 03:38:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:38:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:38:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 03:38:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:38:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 03:38:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:38:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:38:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:38:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:38:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:38:23.409777  543705 memory.go:184] no items to output this cycle
I0322 03:38:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 03:38:32.821674  543705 disk_info.go:125] begin check local disk info of client
I0322 03:38:32.824188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:38:32.824197  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e2c0 0xc00049e300]
E0322 03:38:33.409278  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:38:33.409294  543705 memory.go:184] no items to output this cycle
I0322 03:38:33.409383  543705 cpu.go:275] no items to output this cycle
E0322 03:38:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:38:43.409788  543705 memory.go:191] Add success.
I0322 03:38:43.409820  543705 cpu.go:282] Add success.
I0322 03:38:43.420001  543705 net.go:648] Add success.
I0322 03:38:43.422492  543705 net.go:770] primary dev: ETH0
I0322 03:38:43.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:38:43.422518  543705 net.go:698] Add success.
I0322 03:38:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:38:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:38:53.410394  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:38:53.410413  543705 memory.go:184] no items to output this cycle
I0322 03:38:53.410438  543705 cpu.go:275] no items to output this cycle
E0322 03:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:39:03.409781  543705 memory.go:184] no items to output this cycle
I0322 03:39:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 03:39:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:39:13.409785  543705 memory.go:191] Add success.
W0322 03:39:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:39:13.409809  543705 cpu.go:282] Add success.
W0322 03:39:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:39:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:39:13.420048  543705 net.go:648] Add success.
I0322 03:39:13.422635  543705 net.go:770] primary dev: ETH0
I0322 03:39:13.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:39:13.422664  543705 net.go:698] Add success.
I0322 03:39:13.464420  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"01494cc8-c7c7-4668-a8c6-ce2191d1a82b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:39:13.464453  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:39:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:39:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:39:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 03:39:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:39:14.456539  543705 disk_worker.go:494] system disk:vda1
I0322 03:39:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:39:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:39:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:39:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:39:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:39:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:39:23.409772  543705 memory.go:184] no items to output this cycle
I0322 03:39:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 03:39:32.825679  543705 disk_info.go:125] begin check local disk info of client
I0322 03:39:32.828154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:39:32.828162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dac40 0xc0004dac80]
E0322 03:39:33.407639  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:39:33.407681  543705 cpu.go:275] no items to output this cycle
I0322 03:39:33.407762  543705 memory.go:184] no items to output this cycle
I0322 03:39:39.477154  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:39:39.477162  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:39:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:39:43.410574  543705 memory.go:191] Add success.
I0322 03:39:43.409833  543705 cpu.go:282] Add success.
I0322 03:39:43.420265  543705 net.go:648] Add success.
I0322 03:39:43.422867  543705 net.go:770] primary dev: ETH0
I0322 03:39:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:39:43.422893  543705 net.go:698] Add success.
I0322 03:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:39:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:39:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:39:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:39:53.409783  543705 memory.go:184] no items to output this cycle
I0322 03:39:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 03:40:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:40:03.409775  543705 memory.go:184] no items to output this cycle
I0322 03:40:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 03:40:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:40:13.409815  543705 memory.go:191] Add success.
I0322 03:40:13.409825  543705 cpu.go:282] Add success.
W0322 03:40:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:40:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:40:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:40:13.420127  543705 net.go:648] Add success.
I0322 03:40:13.422696  543705 net.go:770] primary dev: ETH0
I0322 03:40:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:40:13.422720  543705 net.go:698] Add success.
I0322 03:40:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:40:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:40:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 03:40:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:40:14.456480  543705 disk_worker.go:494] system disk:vda1
I0322 03:40:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:40:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:40:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:40:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:40:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:40:23.409771  543705 memory.go:184] no items to output this cycle
I0322 03:40:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 03:40:32.829675  543705 disk_info.go:125] begin check local disk info of client
I0322 03:40:32.832220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:40:32.832226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049d380 0xc00049d3c0]
E0322 03:40:33.409184  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:40:33.409199  543705 memory.go:184] no items to output this cycle
I0322 03:40:33.409319  543705 cpu.go:275] no items to output this cycle
E0322 03:40:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:40:43.409802  543705 memory.go:191] Add success.
I0322 03:40:43.409817  543705 cpu.go:282] Add success.
I0322 03:40:43.419871  543705 net.go:648] Add success.
I0322 03:40:43.422425  543705 net.go:770] primary dev: ETH0
I0322 03:40:43.422439  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:40:43.422454  543705 net.go:698] Add success.
I0322 03:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:40:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:40:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:40:53.409777  543705 memory.go:184] no items to output this cycle
I0322 03:40:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 03:41:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:41:03.409782  543705 memory.go:184] no items to output this cycle
I0322 03:41:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 03:41:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:41:13.409783  543705 memory.go:191] Add success.
I0322 03:41:13.409785  543705 cpu.go:282] Add success.
W0322 03:41:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:41:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:41:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:41:13.420106  543705 net.go:648] Add success.
I0322 03:41:13.422839  543705 net.go:770] primary dev: ETH0
I0322 03:41:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:41:13.422864  543705 net.go:698] Add success.
I0322 03:41:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:41:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:41:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 03:41:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:41:14.456557  543705 disk_worker.go:494] system disk:vda1
I0322 03:41:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:41:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:41:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:41:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:41:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:41:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:41:23.409800  543705 memory.go:184] no items to output this cycle
I0322 03:41:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 03:41:32.833676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:41:32.836214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:41:32.836220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004866c0 0xc000486700]
E0322 03:41:33.407519  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:41:33.407533  543705 memory.go:184] no items to output this cycle
I0322 03:41:33.407539  543705 cpu.go:275] no items to output this cycle
E0322 03:41:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:41:43.409794  543705 memory.go:191] Add success.
I0322 03:41:43.409795  543705 cpu.go:282] Add success.
I0322 03:41:43.419756  543705 net.go:648] Add success.
I0322 03:41:43.422276  543705 net.go:770] primary dev: ETH0
I0322 03:41:43.422290  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:41:43.422301  543705 net.go:698] Add success.
I0322 03:41:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:41:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:41:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:41:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:41:53.409805  543705 memory.go:184] no items to output this cycle
I0322 03:41:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 03:42:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:42:03.409806  543705 memory.go:184] no items to output this cycle
I0322 03:42:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 03:42:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:42:13.409801  543705 memory.go:191] Add success.
I0322 03:42:13.409801  543705 cpu.go:282] Add success.
W0322 03:42:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:42:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:42:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:42:13.420119  543705 net.go:648] Add success.
I0322 03:42:13.422468  543705 net.go:770] primary dev: ETH0
I0322 03:42:13.422480  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:42:13.422492  543705 net.go:698] Add success.
I0322 03:42:13.467528  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6cbbcf3a-e02b-40d0-bc1d-cb99cd386148","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:42:13.467562  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 03:42:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:42:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 03:42:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:42:14.456878  543705 disk_worker.go:494] system disk:vda1
E0322 03:42:14.456881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:42:14.456888  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:42:14.456893  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:42:14.456918  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:42:15.456484  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:42:15.456494  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:42:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:42:16.457989  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:42:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:42:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:42:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:42:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:42:23.409774  543705 memory.go:184] no items to output this cycle
I0322 03:42:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 03:42:32.837676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:42:32.840143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:42:32.840150  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034af40 0xc00034af80]
E0322 03:42:33.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:42:33.407523  543705 memory.go:184] no items to output this cycle
I0322 03:42:33.407556  543705 cpu.go:275] no items to output this cycle
I0322 03:42:39.478136  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:42:39.478144  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:42:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:42:43.410818  543705 memory.go:191] Add success.
I0322 03:42:43.409816  543705 cpu.go:282] Add success.
I0322 03:42:43.419725  543705 net.go:648] Add success.
I0322 03:42:43.422472  543705 net.go:770] primary dev: ETH0
I0322 03:42:43.422485  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:42:43.422497  543705 net.go:698] Add success.
I0322 03:42:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:42:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:42:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:42:53.410744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:42:53.410764  543705 memory.go:184] no items to output this cycle
I0322 03:42:53.410779  543705 cpu.go:275] no items to output this cycle
E0322 03:43:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:43:03.409780  543705 memory.go:184] no items to output this cycle
I0322 03:43:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 03:43:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:43:13.409807  543705 memory.go:191] Add success.
I0322 03:43:13.409807  543705 cpu.go:282] Add success.
W0322 03:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:43:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:43:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:43:13.420314  543705 net.go:648] Add success.
I0322 03:43:13.423556  543705 net.go:770] primary dev: ETH0
I0322 03:43:13.423569  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:43:13.423580  543705 net.go:698] Add success.
I0322 03:43:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:43:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:43:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 03:43:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:43:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 03:43:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:43:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:43:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:43:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:43:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:43:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:43:23.409779  543705 memory.go:184] no items to output this cycle
I0322 03:43:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 03:43:32.841674  543705 disk_info.go:125] begin check local disk info of client
I0322 03:43:32.844188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:43:32.844195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf000 0xc0003bf040]
E0322 03:43:33.409081  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:43:33.409096  543705 memory.go:184] no items to output this cycle
I0322 03:43:33.409105  543705 cpu.go:275] no items to output this cycle
E0322 03:43:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:43:43.409799  543705 memory.go:191] Add success.
I0322 03:43:43.409800  543705 cpu.go:282] Add success.
I0322 03:43:43.419958  543705 net.go:648] Add success.
I0322 03:43:43.422913  543705 net.go:770] primary dev: ETH0
I0322 03:43:43.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:43:43.422942  543705 net.go:698] Add success.
I0322 03:43:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:43:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:43:53.409788  543705 memory.go:184] no items to output this cycle
I0322 03:43:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 03:44:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:44:03.409810  543705 memory.go:184] no items to output this cycle
I0322 03:44:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 03:44:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:44:13.409805  543705 memory.go:191] Add success.
I0322 03:44:13.409813  543705 cpu.go:282] Add success.
W0322 03:44:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:44:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:44:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:44:13.420138  543705 net.go:648] Add success.
I0322 03:44:13.423136  543705 net.go:770] primary dev: ETH0
I0322 03:44:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:44:13.423161  543705 net.go:698] Add success.
I0322 03:44:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:44:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:44:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 03:44:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:44:14.456557  543705 disk_worker.go:494] system disk:vda1
I0322 03:44:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:44:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:44:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:44:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:44:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:44:23.409779  543705 memory.go:184] no items to output this cycle
I0322 03:44:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 03:44:32.845685  543705 disk_info.go:125] begin check local disk info of client
I0322 03:44:32.848220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:44:32.848227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003add00 0xc0003add40]
E0322 03:44:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:44:33.407521  543705 memory.go:184] no items to output this cycle
I0322 03:44:33.407551  543705 cpu.go:275] no items to output this cycle
E0322 03:44:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:44:43.409798  543705 memory.go:191] Add success.
I0322 03:44:43.409825  543705 cpu.go:282] Add success.
I0322 03:44:43.419958  543705 net.go:648] Add success.
I0322 03:44:43.423192  543705 net.go:770] primary dev: ETH0
I0322 03:44:43.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:44:43.423219  543705 net.go:698] Add success.
I0322 03:44:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:44:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:44:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:44:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:44:53.409786  543705 memory.go:184] no items to output this cycle
I0322 03:44:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 03:45:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:45:03.409776  543705 memory.go:184] no items to output this cycle
I0322 03:45:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 03:45:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:45:13.409788  543705 memory.go:191] Add success.
I0322 03:45:13.409801  543705 cpu.go:282] Add success.
W0322 03:45:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:45:13.412396  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:45:13.412401  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:45:13.420078  543705 net.go:648] Add success.
I0322 03:45:13.421738  543705 net.go:770] primary dev: ETH0
I0322 03:45:13.421752  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:45:13.421766  543705 net.go:698] Add success.
I0322 03:45:13.468028  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d5bfc28c-8fad-47fb-83cc-5337b283d7e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:45:13.468062  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:45:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:45:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:45:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 03:45:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:45:14.456653  543705 disk_worker.go:494] system disk:vda1
I0322 03:45:14.456692  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:45:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:45:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:45:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:45:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:45:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:45:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:45:23.409779  543705 memory.go:184] no items to output this cycle
I0322 03:45:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 03:45:32.849674  543705 disk_info.go:125] begin check local disk info of client
I0322 03:45:32.852193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:45:32.852199  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327340 0xc000327380]
E0322 03:45:33.407501  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:45:33.407514  543705 memory.go:184] no items to output this cycle
I0322 03:45:33.407547  543705 cpu.go:275] no items to output this cycle
I0322 03:45:39.479163  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:45:39.479171  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:45:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:45:43.410832  543705 memory.go:191] Add success.
I0322 03:45:43.409829  543705 cpu.go:282] Add success.
I0322 03:45:43.420520  543705 net.go:648] Add success.
I0322 03:45:43.423201  543705 net.go:770] primary dev: ETH0
I0322 03:45:43.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:45:43.423229  543705 net.go:698] Add success.
I0322 03:45:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:45:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:45:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:45:53.409793  543705 memory.go:184] no items to output this cycle
I0322 03:45:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 03:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:46:03.409783  543705 memory.go:184] no items to output this cycle
I0322 03:46:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 03:46:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:46:13.409798  543705 memory.go:191] Add success.
I0322 03:46:13.409802  543705 cpu.go:282] Add success.
W0322 03:46:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:46:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:46:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:46:13.420109  543705 net.go:648] Add success.
I0322 03:46:13.422854  543705 net.go:770] primary dev: ETH0
I0322 03:46:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:46:13.422882  543705 net.go:698] Add success.
I0322 03:46:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:46:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:46:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 03:46:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:46:14.456486  543705 disk_worker.go:494] system disk:vda1
I0322 03:46:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:46:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:46:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:46:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:46:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:46:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:46:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:46:23.409780  543705 memory.go:184] no items to output this cycle
I0322 03:46:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 03:46:32.853675  543705 disk_info.go:125] begin check local disk info of client
I0322 03:46:32.856166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:46:32.856173  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265bc0 0xc000265c00]
E0322 03:46:33.407497  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:46:33.407512  543705 memory.go:184] no items to output this cycle
I0322 03:46:33.407545  543705 cpu.go:275] no items to output this cycle
E0322 03:46:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:46:43.409832  543705 memory.go:191] Add success.
I0322 03:46:43.409832  543705 cpu.go:282] Add success.
I0322 03:46:43.419883  543705 net.go:648] Add success.
I0322 03:46:43.422670  543705 net.go:770] primary dev: ETH0
I0322 03:46:43.422682  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:46:43.422704  543705 net.go:698] Add success.
I0322 03:46:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:46:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:46:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:46:53.410259  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:46:53.410273  543705 cpu.go:275] no items to output this cycle
I0322 03:46:53.410275  543705 memory.go:184] no items to output this cycle
E0322 03:47:03.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:47:03.409875  543705 memory.go:184] no items to output this cycle
I0322 03:47:03.409970  543705 cpu.go:275] no items to output this cycle
E0322 03:47:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:47:13.409803  543705 memory.go:191] Add success.
I0322 03:47:13.409804  543705 cpu.go:282] Add success.
W0322 03:47:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:47:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:47:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:47:13.420127  543705 net.go:648] Add success.
I0322 03:47:13.422880  543705 net.go:770] primary dev: ETH0
I0322 03:47:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:47:13.422906  543705 net.go:698] Add success.
I0322 03:47:13.453509  543705 event_worker.go:152] Polling the log file for events...
W0322 03:47:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:47:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 03:47:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:47:14.456958  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:47:14.456967  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:47:14.456972  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:47:14.457018  543705 disk_worker.go:494] system disk:vda1
I0322 03:47:14.457058  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:47:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:47:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:47:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:47:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:47:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:47:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:47:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:47:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:47:23.409770  543705 memory.go:184] no items to output this cycle
I0322 03:47:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 03:47:32.857679  543705 disk_info.go:125] begin check local disk info of client
I0322 03:47:32.860170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:47:32.860177  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b3c0 0xc00032b400]
E0322 03:47:33.409010  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:47:33.409025  543705 memory.go:184] no items to output this cycle
I0322 03:47:33.409042  543705 cpu.go:275] no items to output this cycle
E0322 03:47:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:47:43.409796  543705 memory.go:191] Add success.
I0322 03:47:43.409821  543705 cpu.go:282] Add success.
I0322 03:47:43.420005  543705 net.go:648] Add success.
I0322 03:47:43.422716  543705 net.go:770] primary dev: ETH0
I0322 03:47:43.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:47:43.422741  543705 net.go:698] Add success.
I0322 03:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:47:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:47:53.410353  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:47:53.410375  543705 memory.go:184] no items to output this cycle
I0322 03:47:53.410388  543705 cpu.go:275] no items to output this cycle
E0322 03:48:03.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:48:03.409917  543705 memory.go:184] no items to output this cycle
I0322 03:48:03.409934  543705 cpu.go:275] no items to output this cycle
E0322 03:48:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:48:13.409789  543705 memory.go:191] Add success.
I0322 03:48:13.409794  543705 cpu.go:282] Add success.
W0322 03:48:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:48:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:48:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:48:13.420136  543705 net.go:648] Add success.
I0322 03:48:13.422574  543705 net.go:770] primary dev: ETH0
I0322 03:48:13.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:48:13.422600  543705 net.go:698] Add success.
I0322 03:48:13.469001  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65ed76a8-34f9-4249-ba67-476c95b5e049","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:48:13.469034  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:48:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:48:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:48:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 03:48:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:48:14.456494  543705 disk_worker.go:494] system disk:vda1
I0322 03:48:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:48:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:48:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:48:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:48:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:48:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 03:48:23.409787  543705 memory.go:184] no items to output this cycle
I0322 03:48:32.861675  543705 disk_info.go:125] begin check local disk info of client
I0322 03:48:32.864163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:48:32.864169  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf900 0xc0002bf940]
E0322 03:48:33.408968  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:48:33.408984  543705 memory.go:184] no items to output this cycle
I0322 03:48:33.409000  543705 cpu.go:275] no items to output this cycle
I0322 03:48:39.480155  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:48:39.480162  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:48:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:48:43.410595  543705 memory.go:191] Add success.
I0322 03:48:43.409795  543705 cpu.go:282] Add success.
I0322 03:48:43.420327  543705 net.go:648] Add success.
I0322 03:48:43.422775  543705 net.go:770] primary dev: ETH0
I0322 03:48:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:48:43.422801  543705 net.go:698] Add success.
I0322 03:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:48:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:48:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:48:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:48:53.409805  543705 memory.go:184] no items to output this cycle
I0322 03:48:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 03:49:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:49:03.409868  543705 cpu.go:275] no items to output this cycle
I0322 03:49:03.409927  543705 memory.go:184] no items to output this cycle
E0322 03:49:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:49:13.409780  543705 memory.go:191] Add success.
W0322 03:49:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:49:13.409813  543705 cpu.go:282] Add success.
W0322 03:49:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:49:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:49:13.420142  543705 net.go:648] Add success.
I0322 03:49:13.422597  543705 net.go:770] primary dev: ETH0
I0322 03:49:13.422612  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:49:13.422625  543705 net.go:698] Add success.
I0322 03:49:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:49:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:49:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 03:49:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:49:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 03:49:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:49:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:49:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:49:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:49:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:49:23.409806  543705 memory.go:184] no items to output this cycle
I0322 03:49:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 03:49:32.865676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:49:32.868265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:49:32.868272  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469580 0xc0004695c0]
E0322 03:49:33.409045  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:49:33.409057  543705 memory.go:184] no items to output this cycle
I0322 03:49:33.409059  543705 cpu.go:275] no items to output this cycle
E0322 03:49:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:49:43.409793  543705 memory.go:191] Add success.
I0322 03:49:43.409805  543705 cpu.go:282] Add success.
I0322 03:49:43.419957  543705 net.go:648] Add success.
I0322 03:49:43.422340  543705 net.go:770] primary dev: ETH0
I0322 03:49:43.422354  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:49:43.422367  543705 net.go:698] Add success.
I0322 03:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:49:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:49:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:49:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:49:53.409789  543705 cpu.go:275] no items to output this cycle
I0322 03:49:53.409795  543705 memory.go:184] no items to output this cycle
E0322 03:50:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:50:03.409795  543705 memory.go:184] no items to output this cycle
I0322 03:50:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 03:50:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:50:13.409781  543705 memory.go:191] Add success.
W0322 03:50:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 03:50:13.409810  543705 cpu.go:282] Add success.
W0322 03:50:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:50:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:50:13.420119  543705 net.go:648] Add success.
I0322 03:50:13.422598  543705 net.go:770] primary dev: ETH0
I0322 03:50:13.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:50:13.422628  543705 net.go:698] Add success.
I0322 03:50:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:50:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:50:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 03:50:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:50:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 03:50:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:50:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:50:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:50:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:50:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:50:23.409799  543705 memory.go:184] no items to output this cycle
I0322 03:50:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 03:50:32.869673  543705 disk_info.go:125] begin check local disk info of client
I0322 03:50:32.872226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:50:32.872245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000274800 0xc000274840]
E0322 03:50:33.408987  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:50:33.408997  543705 cpu.go:275] no items to output this cycle
I0322 03:50:33.409000  543705 memory.go:184] no items to output this cycle
E0322 03:50:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:50:43.409795  543705 memory.go:191] Add success.
I0322 03:50:43.409798  543705 cpu.go:282] Add success.
I0322 03:50:43.419872  543705 net.go:648] Add success.
I0322 03:50:43.422470  543705 net.go:770] primary dev: ETH0
I0322 03:50:43.422485  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:50:43.422498  543705 net.go:698] Add success.
I0322 03:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:50:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:50:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:50:53.409806  543705 memory.go:184] no items to output this cycle
I0322 03:50:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 03:51:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:51:03.409778  543705 memory.go:184] no items to output this cycle
I0322 03:51:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 03:51:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:51:13.409783  543705 memory.go:191] Add success.
I0322 03:51:13.409787  543705 cpu.go:282] Add success.
W0322 03:51:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:51:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:51:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:51:13.420351  543705 net.go:648] Add success.
I0322 03:51:13.423019  543705 net.go:770] primary dev: ETH0
I0322 03:51:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:51:13.423043  543705 net.go:698] Add success.
I0322 03:51:13.471513  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e9a5def0-3867-4bcf-9220-f47f0d21b2d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:51:13.471554  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:51:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:51:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:51:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 03:51:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:51:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 03:51:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:51:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:51:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:51:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:51:23.409805  543705 memory.go:184] no items to output this cycle
I0322 03:51:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 03:51:32.873675  543705 disk_info.go:125] begin check local disk info of client
I0322 03:51:32.876259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:51:32.876265  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba940 0xc0002ba980]
E0322 03:51:33.408995  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:51:33.409005  543705 cpu.go:275] no items to output this cycle
I0322 03:51:33.409011  543705 memory.go:184] no items to output this cycle
I0322 03:51:39.481167  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:51:39.481175  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:51:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:51:43.410524  543705 memory.go:191] Add success.
I0322 03:51:43.409839  543705 cpu.go:282] Add success.
I0322 03:51:43.420213  543705 net.go:648] Add success.
I0322 03:51:43.422534  543705 net.go:770] primary dev: ETH0
I0322 03:51:43.422546  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:51:43.422560  543705 net.go:698] Add success.
I0322 03:51:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:51:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:51:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:51:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:51:53.409789  543705 memory.go:184] no items to output this cycle
I0322 03:51:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 03:52:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:52:03.409767  543705 memory.go:184] no items to output this cycle
I0322 03:52:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 03:52:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:52:13.409806  543705 memory.go:191] Add success.
I0322 03:52:13.409814  543705 cpu.go:282] Add success.
W0322 03:52:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:52:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:52:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:52:13.420045  543705 net.go:648] Add success.
I0322 03:52:13.422857  543705 net.go:770] primary dev: ETH0
I0322 03:52:13.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:52:13.422882  543705 net.go:698] Add success.
W0322 03:52:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:52:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 03:52:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:52:14.456757  543705 disk_worker.go:494] system disk:vda1
I0322 03:52:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:52:14.457108  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:52:14.457116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:52:14.457121  543705 custom_config.go:64] query custom config with name: gpu
E0322 03:52:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:52:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:52:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:52:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:52:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:52:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:52:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:52:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:52:23.409782  543705 memory.go:184] no items to output this cycle
I0322 03:52:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 03:52:32.877676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:52:32.880165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:52:32.880172  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab80 0xc00007abc0]
E0322 03:52:33.408903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:52:33.408920  543705 memory.go:184] no items to output this cycle
I0322 03:52:33.408935  543705 cpu.go:275] no items to output this cycle
E0322 03:52:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:52:43.409780  543705 memory.go:191] Add success.
I0322 03:52:43.409808  543705 cpu.go:282] Add success.
I0322 03:52:43.419867  543705 net.go:648] Add success.
I0322 03:52:43.422875  543705 net.go:770] primary dev: ETH0
I0322 03:52:43.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:52:43.422904  543705 net.go:698] Add success.
I0322 03:52:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:52:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:52:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:52:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:52:53.409787  543705 memory.go:184] no items to output this cycle
I0322 03:52:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 03:53:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:53:03.409780  543705 memory.go:184] no items to output this cycle
I0322 03:53:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 03:53:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:53:13.409788  543705 memory.go:191] Add success.
I0322 03:53:13.409811  543705 cpu.go:282] Add success.
W0322 03:53:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:53:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:53:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:53:13.420252  543705 net.go:648] Add success.
I0322 03:53:13.422846  543705 net.go:770] primary dev: ETH0
I0322 03:53:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:53:13.422871  543705 net.go:698] Add success.
I0322 03:53:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:53:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:53:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0322 03:53:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:53:14.456490  543705 disk_worker.go:494] system disk:vda1
I0322 03:53:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:53:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:53:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:53:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:53:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:53:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:53:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 03:53:23.409790  543705 memory.go:184] no items to output this cycle
I0322 03:53:32.881676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:53:32.884161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:53:32.884167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a40 0xc0000c5a80]
E0322 03:53:33.408874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:53:33.408889  543705 memory.go:184] no items to output this cycle
I0322 03:53:33.408910  543705 cpu.go:275] no items to output this cycle
E0322 03:53:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:53:43.409797  543705 cpu.go:282] Add success.
I0322 03:53:43.409806  543705 memory.go:191] Add success.
I0322 03:53:43.420071  543705 net.go:648] Add success.
I0322 03:53:43.422671  543705 net.go:770] primary dev: ETH0
I0322 03:53:43.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:53:43.422698  543705 net.go:698] Add success.
I0322 03:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:53:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:53:53.409795  543705 cpu.go:275] no items to output this cycle
I0322 03:53:53.409803  543705 memory.go:184] no items to output this cycle
E0322 03:54:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:54:03.409797  543705 memory.go:184] no items to output this cycle
I0322 03:54:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 03:54:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:54:13.409812  543705 memory.go:191] Add success.
I0322 03:54:13.409817  543705 cpu.go:282] Add success.
W0322 03:54:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:54:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:54:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:54:13.420042  543705 net.go:648] Add success.
I0322 03:54:13.422541  543705 net.go:770] primary dev: ETH0
I0322 03:54:13.422554  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:54:13.422566  543705 net.go:698] Add success.
I0322 03:54:13.552692  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d03687a0-ca68-459a-9147-31bda3df41c7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:54:13.552723  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 03:54:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:54:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:54:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0322 03:54:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:54:14.456660  543705 disk_worker.go:494] system disk:vda1
I0322 03:54:14.456688  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:54:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:54:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:54:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:54:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:54:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:54:23.409781  543705 memory.go:184] no items to output this cycle
I0322 03:54:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 03:54:32.885674  543705 disk_info.go:125] begin check local disk info of client
I0322 03:54:32.888249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:54:32.888256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486680 0xc0004866c0]
E0322 03:54:33.408918  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:54:33.408931  543705 memory.go:184] no items to output this cycle
I0322 03:54:33.408935  543705 cpu.go:275] no items to output this cycle
I0322 03:54:39.482157  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:54:39.482164  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:54:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:54:43.410756  543705 memory.go:191] Add success.
I0322 03:54:43.409805  543705 cpu.go:282] Add success.
I0322 03:54:43.420546  543705 net.go:648] Add success.
I0322 03:54:43.423087  543705 net.go:770] primary dev: ETH0
I0322 03:54:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:54:43.423116  543705 net.go:698] Add success.
I0322 03:54:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:54:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:54:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:54:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:54:53.409804  543705 memory.go:184] no items to output this cycle
I0322 03:54:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 03:55:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:55:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 03:55:03.409787  543705 memory.go:184] no items to output this cycle
E0322 03:55:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:55:13.409785  543705 memory.go:191] Add success.
I0322 03:55:13.409792  543705 cpu.go:282] Add success.
W0322 03:55:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:55:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:55:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:55:13.420142  543705 net.go:648] Add success.
I0322 03:55:13.422773  543705 net.go:770] primary dev: ETH0
I0322 03:55:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:55:13.422798  543705 net.go:698] Add success.
I0322 03:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:55:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:55:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 03:55:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:55:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 03:55:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:55:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:55:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:55:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:55:16.472545  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:55:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:55:23.409801  543705 memory.go:184] no items to output this cycle
I0322 03:55:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 03:55:32.889675  543705 disk_info.go:125] begin check local disk info of client
I0322 03:55:32.892189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:55:32.892196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dce40 0xc0004dce80]
E0322 03:55:33.407529  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:55:33.407543  543705 memory.go:184] no items to output this cycle
I0322 03:55:33.407553  543705 cpu.go:275] no items to output this cycle
E0322 03:55:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:55:43.409819  543705 memory.go:191] Add success.
I0322 03:55:43.409832  543705 cpu.go:282] Add success.
I0322 03:55:43.420126  543705 net.go:648] Add success.
I0322 03:55:43.423168  543705 net.go:770] primary dev: ETH0
I0322 03:55:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:55:43.423194  543705 net.go:698] Add success.
I0322 03:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:55:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:55:53.409782  543705 memory.go:184] no items to output this cycle
I0322 03:55:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 03:56:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:56:03.409774  543705 memory.go:184] no items to output this cycle
I0322 03:56:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 03:56:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:56:13.409780  543705 memory.go:191] Add success.
I0322 03:56:13.409802  543705 cpu.go:282] Add success.
W0322 03:56:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:56:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:56:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:56:13.420237  543705 net.go:648] Add success.
I0322 03:56:13.423100  543705 net.go:770] primary dev: ETH0
I0322 03:56:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:56:13.423125  543705 net.go:698] Add success.
I0322 03:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:56:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:56:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 03:56:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:56:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 03:56:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:56:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:56:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:56:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:56:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:56:23.409782  543705 memory.go:184] no items to output this cycle
I0322 03:56:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 03:56:32.893674  543705 disk_info.go:125] begin check local disk info of client
I0322 03:56:32.896192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:56:32.896198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a3b40 0xc0004a3b80]
E0322 03:56:33.407521  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:56:33.407534  543705 memory.go:184] no items to output this cycle
I0322 03:56:33.407551  543705 cpu.go:275] no items to output this cycle
E0322 03:56:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:56:43.409792  543705 memory.go:191] Add success.
I0322 03:56:43.409793  543705 cpu.go:282] Add success.
I0322 03:56:43.419863  543705 net.go:648] Add success.
I0322 03:56:43.422290  543705 net.go:770] primary dev: ETH0
I0322 03:56:43.422302  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:56:43.422314  543705 net.go:698] Add success.
I0322 03:56:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:56:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:56:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:56:53.410381  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:56:53.410405  543705 memory.go:184] no items to output this cycle
I0322 03:56:53.410409  543705 cpu.go:275] no items to output this cycle
E0322 03:57:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:57:03.409794  543705 memory.go:184] no items to output this cycle
I0322 03:57:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 03:57:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:57:13.409784  543705 memory.go:191] Add success.
I0322 03:57:13.409804  543705 cpu.go:282] Add success.
W0322 03:57:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:57:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:57:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:57:13.420166  543705 net.go:648] Add success.
I0322 03:57:13.422781  543705 net.go:770] primary dev: ETH0
I0322 03:57:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:57:13.422806  543705 net.go:698] Add success.
I0322 03:57:13.428822  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 03:57:13.453017  543705 event_worker.go:152] Polling the log file for events...
I0322 03:57:13.463396  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ce03ebc0-fa0b-4ded-a7ed-7b2811ed51d0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 03:57:13.463432  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 03:57:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:57:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0322 03:57:14.455231  543705 disk_worker.go:728] disk inode is not compliant
E0322 03:57:14.456937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 03:57:14.456947  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 03:57:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0322 03:57:14.457045  543705 disk_worker.go:494] system disk:vda1
I0322 03:57:14.457080  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 03:57:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 03:57:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:57:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 03:57:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 03:57:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:57:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:57:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:57:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:57:23.409791  543705 memory.go:184] no items to output this cycle
I0322 03:57:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 03:57:32.897673  543705 disk_info.go:125] begin check local disk info of client
I0322 03:57:32.900181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:57:32.900188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dcc80 0xc0004dccc0]
E0322 03:57:33.408802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:57:33.408804  543705 cpu.go:275] no items to output this cycle
I0322 03:57:33.408815  543705 memory.go:184] no items to output this cycle
I0322 03:57:39.483178  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 03:57:39.483185  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 03:57:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:57:43.410650  543705 memory.go:191] Add success.
I0322 03:57:43.409805  543705 cpu.go:282] Add success.
I0322 03:57:43.420373  543705 net.go:648] Add success.
I0322 03:57:43.422753  543705 net.go:770] primary dev: ETH0
I0322 03:57:43.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:57:43.422779  543705 net.go:698] Add success.
I0322 03:57:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:57:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:57:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:57:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:57:53.409808  543705 memory.go:184] no items to output this cycle
I0322 03:57:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 03:58:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:58:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 03:58:03.409784  543705 memory.go:184] no items to output this cycle
E0322 03:58:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:58:13.409815  543705 memory.go:191] Add success.
I0322 03:58:13.409824  543705 cpu.go:282] Add success.
W0322 03:58:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:58:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:58:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:58:13.420193  543705 net.go:648] Add success.
I0322 03:58:13.422730  543705 net.go:770] primary dev: ETH0
I0322 03:58:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:58:13.422757  543705 net.go:698] Add success.
I0322 03:58:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:58:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:58:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 03:58:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:58:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 03:58:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:58:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:58:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:58:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:58:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:58:23.409802  543705 memory.go:184] no items to output this cycle
I0322 03:58:23.409812  543705 cpu.go:275] no items to output this cycle
I0322 03:58:32.901675  543705 disk_info.go:125] begin check local disk info of client
I0322 03:58:32.904158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:58:32.904164  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba200 0xc0003ba240]
E0322 03:58:33.407524  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:58:33.407537  543705 memory.go:184] no items to output this cycle
I0322 03:58:33.407564  543705 cpu.go:275] no items to output this cycle
E0322 03:58:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:58:43.409815  543705 memory.go:191] Add success.
I0322 03:58:43.409821  543705 cpu.go:282] Add success.
I0322 03:58:43.419952  543705 net.go:648] Add success.
I0322 03:58:43.422689  543705 net.go:770] primary dev: ETH0
I0322 03:58:43.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:58:43.422715  543705 net.go:698] Add success.
I0322 03:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:58:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:58:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:58:53.409807  543705 memory.go:184] no items to output this cycle
I0322 03:58:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 03:59:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:59:03.409794  543705 memory.go:184] no items to output this cycle
I0322 03:59:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 03:59:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:59:13.409791  543705 memory.go:191] Add success.
I0322 03:59:13.409808  543705 cpu.go:282] Add success.
W0322 03:59:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 03:59:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 03:59:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 03:59:13.420287  543705 net.go:648] Add success.
I0322 03:59:13.422917  543705 net.go:770] primary dev: ETH0
I0322 03:59:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:59:13.422942  543705 net.go:698] Add success.
I0322 03:59:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 03:59:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 03:59:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 03:59:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 03:59:14.456619  543705 disk_worker.go:494] system disk:vda1
I0322 03:59:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 03:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 03:59:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:59:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:59:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 03:59:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 03:59:23.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:59:23.409866  543705 memory.go:184] no items to output this cycle
I0322 03:59:23.409964  543705 cpu.go:275] no items to output this cycle
I0322 03:59:32.905676  543705 disk_info.go:125] begin check local disk info of client
I0322 03:59:32.908255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 03:59:32.908262  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037da80 0xc00037dac0]
E0322 03:59:33.407533  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:59:33.407548  543705 memory.go:184] no items to output this cycle
I0322 03:59:33.407547  543705 cpu.go:275] no items to output this cycle
E0322 03:59:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:59:43.409800  543705 memory.go:191] Add success.
I0322 03:59:43.409816  543705 cpu.go:282] Add success.
I0322 03:59:43.420023  543705 net.go:648] Add success.
I0322 03:59:43.422871  543705 net.go:770] primary dev: ETH0
I0322 03:59:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 03:59:43.422900  543705 net.go:698] Add success.
I0322 03:59:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 03:59:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 03:59:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 03:59:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 03:59:53.409777  543705 memory.go:184] no items to output this cycle
I0322 03:59:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 04:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:00:03.409781  543705 memory.go:184] no items to output this cycle
I0322 04:00:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 04:00:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:00:13.409821  543705 memory.go:191] Add success.
I0322 04:00:13.409826  543705 cpu.go:282] Add success.
W0322 04:00:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:00:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:00:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:00:13.420110  543705 net.go:648] Add success.
I0322 04:00:13.422925  543705 net.go:770] primary dev: ETH0
I0322 04:00:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:00:13.422950  543705 net.go:698] Add success.
I0322 04:00:13.464200  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7561e844-a5c1-4725-84b8-655bb8764c70","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:00:13.464235  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:00:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:00:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 04:00:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:00:14.456600  543705 disk_worker.go:494] system disk:vda1
I0322 04:00:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:00:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:00:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:00:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:00:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:00:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:00:23.409768  543705 memory.go:184] no items to output this cycle
I0322 04:00:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 04:00:32.909666  543705 disk_info.go:125] begin check local disk info of client
I0322 04:00:32.912147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:00:32.912154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e1480 0xc0004e14c0]
E0322 04:00:33.407711  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:00:33.407728  543705 memory.go:184] no items to output this cycle
I0322 04:00:33.407730  543705 cpu.go:275] no items to output this cycle
I0322 04:00:39.484181  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:00:39.484188  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:00:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:00:43.410603  543705 memory.go:191] Add success.
I0322 04:00:43.409807  543705 cpu.go:282] Add success.
I0322 04:00:43.420310  543705 net.go:648] Add success.
I0322 04:00:43.422846  543705 net.go:770] primary dev: ETH0
I0322 04:00:43.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:00:43.422873  543705 net.go:698] Add success.
I0322 04:00:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:00:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:00:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:00:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:00:53.409793  543705 memory.go:184] no items to output this cycle
I0322 04:00:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 04:01:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:01:03.409774  543705 memory.go:184] no items to output this cycle
I0322 04:01:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 04:01:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:01:13.409813  543705 memory.go:191] Add success.
I0322 04:01:13.409822  543705 cpu.go:282] Add success.
W0322 04:01:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:01:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:01:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:01:13.420144  543705 net.go:648] Add success.
I0322 04:01:13.422893  543705 net.go:770] primary dev: ETH0
I0322 04:01:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:01:13.422922  543705 net.go:698] Add success.
I0322 04:01:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:01:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:01:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 04:01:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:01:14.456631  543705 disk_worker.go:494] system disk:vda1
I0322 04:01:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:01:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:01:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:01:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:01:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:01:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:01:23.409777  543705 cpu.go:275] no items to output this cycle
I0322 04:01:23.409785  543705 memory.go:184] no items to output this cycle
I0322 04:01:32.913675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:01:32.916219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:01:32.916226  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9940 0xc0003c9980]
E0322 04:01:33.408742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:01:33.408754  543705 memory.go:184] no items to output this cycle
I0322 04:01:33.408776  543705 cpu.go:275] no items to output this cycle
E0322 04:01:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:01:43.409789  543705 memory.go:191] Add success.
I0322 04:01:43.409811  543705 cpu.go:282] Add success.
I0322 04:01:43.419968  543705 net.go:648] Add success.
I0322 04:01:43.422526  543705 net.go:770] primary dev: ETH0
I0322 04:01:43.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:01:43.422551  543705 net.go:698] Add success.
I0322 04:01:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:01:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:01:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:01:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:01:53.409809  543705 memory.go:184] no items to output this cycle
I0322 04:01:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 04:02:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:02:03.409779  543705 memory.go:184] no items to output this cycle
I0322 04:02:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 04:02:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:02:13.409786  543705 memory.go:191] Add success.
I0322 04:02:13.409794  543705 cpu.go:282] Add success.
W0322 04:02:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:02:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:02:13.420082  543705 net.go:648] Add success.
I0322 04:02:13.422686  543705 net.go:770] primary dev: ETH0
I0322 04:02:13.422699  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:02:13.422711  543705 net.go:698] Add success.
W0322 04:02:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:02:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0322 04:02:14.455150  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:02:14.456916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:02:14.456925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:02:14.456932  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:02:14.456981  543705 disk_worker.go:494] system disk:vda1
I0322 04:02:14.457010  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:02:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:02:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:02:16.457898  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:02:16.457897  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:02:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:02:16.457971  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:02:16.472291  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:02:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:02:23.409797  543705 memory.go:184] no items to output this cycle
I0322 04:02:23.409807  543705 cpu.go:275] no items to output this cycle
I0322 04:02:32.917679  543705 disk_info.go:125] begin check local disk info of client
I0322 04:02:32.920197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:02:32.920205  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 04:02:33.408711  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:02:33.408724  543705 memory.go:184] no items to output this cycle
I0322 04:02:33.408730  543705 cpu.go:275] no items to output this cycle
E0322 04:02:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:02:43.409816  543705 memory.go:191] Add success.
I0322 04:02:43.409828  543705 cpu.go:282] Add success.
I0322 04:02:43.419956  543705 net.go:648] Add success.
I0322 04:02:43.422897  543705 net.go:770] primary dev: ETH0
I0322 04:02:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:02:43.422922  543705 net.go:698] Add success.
I0322 04:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:02:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:02:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:02:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:02:53.409779  543705 memory.go:184] no items to output this cycle
I0322 04:02:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 04:03:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:03:03.409769  543705 memory.go:184] no items to output this cycle
I0322 04:03:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 04:03:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:03:13.409793  543705 memory.go:191] Add success.
I0322 04:03:13.409814  543705 cpu.go:282] Add success.
W0322 04:03:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:03:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:03:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:03:13.420281  543705 net.go:648] Add success.
I0322 04:03:13.422885  543705 net.go:770] primary dev: ETH0
I0322 04:03:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:03:13.422910  543705 net.go:698] Add success.
I0322 04:03:13.491396  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"280b24c8-d0fd-42e1-96dd-88c6103c34c0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:03:13.491431  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:03:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:03:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:03:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0322 04:03:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:03:14.456620  543705 disk_worker.go:494] system disk:vda1
I0322 04:03:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:03:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:03:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:03:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:03:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:03:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:03:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:03:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 04:03:23.409777  543705 memory.go:184] no items to output this cycle
I0322 04:03:32.921692  543705 disk_info.go:125] begin check local disk info of client
I0322 04:03:32.924181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:03:32.924187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a000 0xc00047a040]
E0322 04:03:33.408690  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:03:33.408707  543705 memory.go:184] no items to output this cycle
I0322 04:03:33.408725  543705 cpu.go:275] no items to output this cycle
I0322 04:03:39.485175  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:03:39.485181  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:03:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:03:43.410643  543705 memory.go:191] Add success.
I0322 04:03:43.409831  543705 cpu.go:282] Add success.
I0322 04:03:43.420302  543705 net.go:648] Add success.
I0322 04:03:43.422746  543705 net.go:770] primary dev: ETH0
I0322 04:03:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:03:43.422771  543705 net.go:698] Add success.
I0322 04:03:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:03:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:03:53.410434  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:03:53.410452  543705 memory.go:184] no items to output this cycle
I0322 04:03:53.410475  543705 cpu.go:275] no items to output this cycle
E0322 04:04:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:04:03.409797  543705 memory.go:184] no items to output this cycle
I0322 04:04:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 04:04:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:04:13.409780  543705 memory.go:191] Add success.
I0322 04:04:13.409800  543705 cpu.go:282] Add success.
W0322 04:04:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:04:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:04:13.420043  543705 net.go:648] Add success.
I0322 04:04:13.422699  543705 net.go:770] primary dev: ETH0
I0322 04:04:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:04:13.422723  543705 net.go:698] Add success.
I0322 04:04:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:04:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:04:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 04:04:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:04:14.456491  543705 disk_worker.go:494] system disk:vda1
I0322 04:04:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:04:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:04:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:04:23.409794  543705 memory.go:184] no items to output this cycle
I0322 04:04:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 04:04:32.925666  543705 disk_info.go:125] begin check local disk info of client
I0322 04:04:32.928219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:04:32.928224  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376740 0xc000376780]
E0322 04:04:33.407638  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:04:33.407654  543705 memory.go:184] no items to output this cycle
I0322 04:04:33.407654  543705 cpu.go:275] no items to output this cycle
E0322 04:04:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:04:43.409785  543705 memory.go:191] Add success.
I0322 04:04:43.409820  543705 cpu.go:282] Add success.
I0322 04:04:43.419953  543705 net.go:648] Add success.
I0322 04:04:43.422347  543705 net.go:770] primary dev: ETH0
I0322 04:04:43.422359  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:04:43.422372  543705 net.go:698] Add success.
I0322 04:04:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:04:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:04:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:04:53.410277  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:04:53.410297  543705 memory.go:184] no items to output this cycle
I0322 04:04:53.410298  543705 cpu.go:275] no items to output this cycle
E0322 04:05:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:05:03.409786  543705 cpu.go:275] no items to output this cycle
I0322 04:05:03.409790  543705 memory.go:184] no items to output this cycle
E0322 04:05:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:05:13.409814  543705 memory.go:191] Add success.
I0322 04:05:13.409821  543705 cpu.go:282] Add success.
W0322 04:05:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:05:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:05:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:05:13.420121  543705 net.go:648] Add success.
I0322 04:05:13.423147  543705 net.go:770] primary dev: ETH0
I0322 04:05:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:05:13.423176  543705 net.go:698] Add success.
I0322 04:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:05:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:05:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 04:05:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:05:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 04:05:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:05:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:05:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:05:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:05:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:05:23.409797  543705 memory.go:184] no items to output this cycle
I0322 04:05:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 04:05:32.929675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:05:32.932146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:05:32.932152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509680 0xc0005096c0]
E0322 04:05:33.408763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:05:33.408804  543705 memory.go:184] no items to output this cycle
I0322 04:05:33.408846  543705 cpu.go:275] no items to output this cycle
E0322 04:05:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:05:43.409828  543705 memory.go:191] Add success.
I0322 04:05:43.409829  543705 cpu.go:282] Add success.
I0322 04:05:43.420002  543705 net.go:648] Add success.
I0322 04:05:43.422858  543705 net.go:770] primary dev: ETH0
I0322 04:05:43.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:05:43.422884  543705 net.go:698] Add success.
I0322 04:05:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:05:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:05:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:05:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:05:53.409809  543705 memory.go:184] no items to output this cycle
I0322 04:05:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 04:06:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:06:03.409776  543705 memory.go:184] no items to output this cycle
I0322 04:06:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 04:06:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:06:13.409815  543705 memory.go:191] Add success.
I0322 04:06:13.409818  543705 cpu.go:282] Add success.
W0322 04:06:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:06:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:06:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:06:13.420062  543705 net.go:648] Add success.
I0322 04:06:13.423144  543705 net.go:770] primary dev: ETH0
I0322 04:06:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:06:13.423172  543705 net.go:698] Add success.
I0322 04:06:13.523787  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cc15bfbc-af26-4e1a-8b87-3331050e9222","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:06:13.523829  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:06:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:06:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:06:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 04:06:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:06:14.456718  543705 disk_worker.go:494] system disk:vda1
I0322 04:06:14.456751  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:06:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:06:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:06:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:06:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:06:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:06:23.409767  543705 memory.go:184] no items to output this cycle
I0322 04:06:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 04:06:32.933674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:06:32.936158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:06:32.936164  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034f640 0xc00034f680]
E0322 04:06:33.408721  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:06:33.408736  543705 memory.go:184] no items to output this cycle
I0322 04:06:33.408788  543705 cpu.go:275] no items to output this cycle
I0322 04:06:39.486179  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:06:39.486185  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:06:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:06:43.410710  543705 memory.go:191] Add success.
I0322 04:06:43.409812  543705 cpu.go:282] Add success.
I0322 04:06:43.420406  543705 net.go:648] Add success.
I0322 04:06:43.422955  543705 net.go:770] primary dev: ETH0
I0322 04:06:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:06:43.422981  543705 net.go:698] Add success.
I0322 04:06:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:06:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:06:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:06:53.409795  543705 memory.go:184] no items to output this cycle
I0322 04:06:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 04:07:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:07:03.409803  543705 memory.go:184] no items to output this cycle
I0322 04:07:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 04:07:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:07:13.409797  543705 memory.go:191] Add success.
I0322 04:07:13.409821  543705 cpu.go:282] Add success.
W0322 04:07:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:07:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:07:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:07:13.420131  543705 net.go:648] Add success.
I0322 04:07:13.423085  543705 net.go:770] primary dev: ETH0
I0322 04:07:13.423098  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:07:13.423111  543705 net.go:698] Add success.
I0322 04:07:13.453655  543705 event_worker.go:152] Polling the log file for events...
W0322 04:07:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:07:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 04:07:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:07:14.456921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:07:14.456930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:07:14.456936  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:07:14.457005  543705 disk_worker.go:494] system disk:vda1
I0322 04:07:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:07:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:07:15.456855  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:07:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:07:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:07:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:07:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:07:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:07:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:07:23.409779  543705 memory.go:184] no items to output this cycle
I0322 04:07:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 04:07:32.937675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:07:32.940148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:07:32.940154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ad040 0xc0004ad080]
I0322 04:07:33.408725  543705 cpu.go:275] no items to output this cycle
E0322 04:07:33.408728  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:07:33.408755  543705 memory.go:184] no items to output this cycle
E0322 04:07:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:07:43.409837  543705 memory.go:191] Add success.
I0322 04:07:43.409844  543705 cpu.go:282] Add success.
I0322 04:07:43.420066  543705 net.go:648] Add success.
I0322 04:07:43.422785  543705 net.go:770] primary dev: ETH0
I0322 04:07:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:07:43.422811  543705 net.go:698] Add success.
I0322 04:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:07:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:07:53.409815  543705 memory.go:184] no items to output this cycle
I0322 04:07:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 04:08:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:08:03.409783  543705 memory.go:184] no items to output this cycle
I0322 04:08:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 04:08:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:08:13.409787  543705 memory.go:191] Add success.
W0322 04:08:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:08:13.409818  543705 cpu.go:282] Add success.
W0322 04:08:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:08:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:08:13.420116  543705 net.go:648] Add success.
I0322 04:08:13.422575  543705 net.go:770] primary dev: ETH0
I0322 04:08:13.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:08:13.422600  543705 net.go:698] Add success.
I0322 04:08:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:08:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:08:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 04:08:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:08:14.456550  543705 disk_worker.go:494] system disk:vda1
I0322 04:08:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:08:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:08:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:08:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:08:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:08:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:08:23.409789  543705 memory.go:184] no items to output this cycle
I0322 04:08:23.409808  543705 cpu.go:275] no items to output this cycle
I0322 04:08:32.941678  543705 disk_info.go:125] begin check local disk info of client
I0322 04:08:32.944266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:08:32.944274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4000 0xc0002a4040]
E0322 04:08:33.408645  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:08:33.408657  543705 memory.go:184] no items to output this cycle
I0322 04:08:33.408685  543705 cpu.go:275] no items to output this cycle
E0322 04:08:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:08:43.409824  543705 memory.go:191] Add success.
I0322 04:08:43.409829  543705 cpu.go:282] Add success.
I0322 04:08:43.420013  543705 net.go:648] Add success.
I0322 04:08:43.422453  543705 net.go:770] primary dev: ETH0
I0322 04:08:43.422465  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:08:43.422477  543705 net.go:698] Add success.
I0322 04:08:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:08:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:08:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:08:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:08:53.409819  543705 memory.go:184] no items to output this cycle
I0322 04:08:53.409829  543705 cpu.go:275] no items to output this cycle
E0322 04:09:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:09:03.409793  543705 memory.go:184] no items to output this cycle
I0322 04:09:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 04:09:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:09:13.409819  543705 memory.go:191] Add success.
I0322 04:09:13.409820  543705 cpu.go:282] Add success.
W0322 04:09:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:09:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:09:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:09:13.420179  543705 net.go:648] Add success.
I0322 04:09:13.422743  543705 net.go:770] primary dev: ETH0
I0322 04:09:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:09:13.422771  543705 net.go:698] Add success.
I0322 04:09:13.568474  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6c393d06-dfba-4cb4-84cd-5f9b3b7de43b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:09:13.568509  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:09:14.454685  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:09:14.454935  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:09:14.454947  543705 disk_worker.go:708] disk space is not compliant
W0322 04:09:14.454950  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:09:14.456372  543705 disk_worker.go:494] system disk:vda1
I0322 04:09:14.456403  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:09:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:09:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:09:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:09:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:09:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:09:23.409770  543705 memory.go:184] no items to output this cycle
I0322 04:09:23.409819  543705 cpu.go:275] no items to output this cycle
I0322 04:09:32.945668  543705 disk_info.go:125] begin check local disk info of client
I0322 04:09:32.948197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:09:32.948203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c7100 0xc0004c7140]
E0322 04:09:33.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:09:33.407611  543705 memory.go:184] no items to output this cycle
I0322 04:09:33.407691  543705 cpu.go:275] no items to output this cycle
I0322 04:09:39.487184  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:09:39.487190  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:09:43.410608  543705 memory.go:191] Add success.
I0322 04:09:43.409825  543705 cpu.go:282] Add success.
I0322 04:09:43.420315  543705 net.go:648] Add success.
I0322 04:09:43.422823  543705 net.go:770] primary dev: ETH0
I0322 04:09:43.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:09:43.422848  543705 net.go:698] Add success.
I0322 04:09:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:09:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:09:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:09:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:09:53.409773  543705 memory.go:184] no items to output this cycle
I0322 04:09:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 04:10:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:10:03.409814  543705 memory.go:184] no items to output this cycle
I0322 04:10:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 04:10:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:10:13.409782  543705 memory.go:191] Add success.
W0322 04:10:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:10:13.409809  543705 cpu.go:282] Add success.
W0322 04:10:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:10:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:10:13.420218  543705 net.go:648] Add success.
I0322 04:10:13.423088  543705 net.go:770] primary dev: ETH0
I0322 04:10:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:10:13.423118  543705 net.go:698] Add success.
I0322 04:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:10:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:10:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0322 04:10:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:10:14.456466  543705 disk_worker.go:494] system disk:vda1
I0322 04:10:14.456513  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:10:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:10:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:10:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:10:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:10:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:10:23.409800  543705 memory.go:184] no items to output this cycle
I0322 04:10:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 04:10:32.949677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:10:32.952234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:10:32.952241  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f1c0 0xc00037f200]
E0322 04:10:33.408586  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:10:33.408599  543705 memory.go:184] no items to output this cycle
I0322 04:10:33.408601  543705 cpu.go:275] no items to output this cycle
E0322 04:10:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:10:43.409930  543705 cpu.go:282] Add success.
I0322 04:10:43.409984  543705 memory.go:191] Add success.
I0322 04:10:43.419720  543705 net.go:648] Add success.
I0322 04:10:43.422398  543705 net.go:770] primary dev: ETH0
I0322 04:10:43.422410  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:10:43.422422  543705 net.go:698] Add success.
I0322 04:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:10:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:10:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:10:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:10:53.409787  543705 memory.go:184] no items to output this cycle
I0322 04:10:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 04:11:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:11:03.409776  543705 memory.go:184] no items to output this cycle
I0322 04:11:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 04:11:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:11:13.409791  543705 memory.go:191] Add success.
I0322 04:11:13.409793  543705 cpu.go:282] Add success.
W0322 04:11:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:11:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:11:13.420073  543705 net.go:648] Add success.
I0322 04:11:13.422770  543705 net.go:770] primary dev: ETH0
I0322 04:11:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:11:13.422798  543705 net.go:698] Add success.
I0322 04:11:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:11:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:11:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 04:11:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:11:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 04:11:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:11:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:11:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:11:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:11:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:11:23.409800  543705 memory.go:184] no items to output this cycle
I0322 04:11:23.409812  543705 cpu.go:275] no items to output this cycle
I0322 04:11:32.953676  543705 disk_info.go:125] begin check local disk info of client
I0322 04:11:32.956260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:11:32.956266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d1940 0xc0003d1980]
E0322 04:11:33.408575  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:11:33.408587  543705 memory.go:184] no items to output this cycle
I0322 04:11:33.408629  543705 cpu.go:275] no items to output this cycle
E0322 04:11:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:11:43.409782  543705 memory.go:191] Add success.
I0322 04:11:43.409806  543705 cpu.go:282] Add success.
I0322 04:11:43.420058  543705 net.go:648] Add success.
I0322 04:11:43.423204  543705 net.go:770] primary dev: ETH0
I0322 04:11:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:11:43.423263  543705 net.go:698] Add success.
I0322 04:11:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:11:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:11:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:11:53.409779  543705 memory.go:184] no items to output this cycle
I0322 04:11:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 04:12:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:12:03.409812  543705 memory.go:184] no items to output this cycle
I0322 04:12:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 04:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:12:13.409808  543705 memory.go:191] Add success.
I0322 04:12:13.409820  543705 cpu.go:282] Add success.
W0322 04:12:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:12:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:12:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:12:13.420153  543705 net.go:648] Add success.
I0322 04:12:13.422914  543705 net.go:770] primary dev: ETH0
I0322 04:12:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:12:13.422944  543705 net.go:698] Add success.
I0322 04:12:13.469047  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c0d6de2f-0e69-4ebf-bf5c-0b6395e0d7e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:12:13.469081  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 04:12:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:12:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 04:12:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:12:14.455925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:12:14.455933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:12:14.455938  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:12:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 04:12:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:12:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:12:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:12:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:12:16.457965  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:12:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:12:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:12:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:12:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:12:23.409771  543705 memory.go:184] no items to output this cycle
I0322 04:12:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 04:12:32.957688  543705 disk_info.go:125] begin check local disk info of client
I0322 04:12:32.960187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:12:32.960193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc640 0xc0004dc680]
E0322 04:12:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:12:33.407521  543705 memory.go:184] no items to output this cycle
I0322 04:12:33.407548  543705 cpu.go:275] no items to output this cycle
I0322 04:12:39.488251  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:12:39.488257  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:12:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:12:43.410726  543705 memory.go:191] Add success.
I0322 04:12:43.409821  543705 cpu.go:282] Add success.
I0322 04:12:43.420625  543705 net.go:648] Add success.
I0322 04:12:43.423590  543705 net.go:770] primary dev: ETH0
I0322 04:12:43.423605  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:12:43.423619  543705 net.go:698] Add success.
I0322 04:12:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:12:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:12:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:12:53.409786  543705 memory.go:184] no items to output this cycle
I0322 04:12:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 04:13:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:13:03.409795  543705 cpu.go:275] no items to output this cycle
I0322 04:13:03.409798  543705 memory.go:184] no items to output this cycle
E0322 04:13:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:13:13.409822  543705 memory.go:191] Add success.
I0322 04:13:13.409828  543705 cpu.go:282] Add success.
W0322 04:13:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:13:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:13:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:13:13.420076  543705 net.go:648] Add success.
I0322 04:13:13.422600  543705 net.go:770] primary dev: ETH0
I0322 04:13:13.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:13:13.422631  543705 net.go:698] Add success.
I0322 04:13:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:13:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:13:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 04:13:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:13:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 04:13:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:13:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:13:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:13:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:13:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:13:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:13:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:13:23.409780  543705 memory.go:184] no items to output this cycle
I0322 04:13:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 04:13:32.961674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:13:32.964328  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:13:32.964335  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003839c0 0xc000383a00]
E0322 04:13:33.407511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:13:33.407527  543705 memory.go:184] no items to output this cycle
I0322 04:13:33.407535  543705 cpu.go:275] no items to output this cycle
E0322 04:13:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:13:43.409821  543705 memory.go:191] Add success.
I0322 04:13:43.409831  543705 cpu.go:282] Add success.
I0322 04:13:43.419947  543705 net.go:648] Add success.
I0322 04:13:43.422589  543705 net.go:770] primary dev: ETH0
I0322 04:13:43.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:13:43.422615  543705 net.go:698] Add success.
I0322 04:13:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:13:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:13:53.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:13:53.409884  543705 memory.go:184] no items to output this cycle
I0322 04:13:53.409990  543705 cpu.go:275] no items to output this cycle
E0322 04:14:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:14:03.409783  543705 memory.go:184] no items to output this cycle
I0322 04:14:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 04:14:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:14:13.409797  543705 memory.go:191] Add success.
I0322 04:14:13.409798  543705 cpu.go:282] Add success.
W0322 04:14:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:14:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:14:13.420258  543705 net.go:648] Add success.
I0322 04:14:13.422838  543705 net.go:770] primary dev: ETH0
I0322 04:14:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:14:13.422864  543705 net.go:698] Add success.
I0322 04:14:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:14:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:14:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 04:14:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:14:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 04:14:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:14:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:14:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:14:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:14:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:14:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:14:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:14:23.409769  543705 memory.go:184] no items to output this cycle
I0322 04:14:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 04:14:32.965675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:14:32.968197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:14:32.968204  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382540 0xc000382580]
E0322 04:14:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:14:33.407520  543705 memory.go:184] no items to output this cycle
I0322 04:14:33.407525  543705 cpu.go:275] no items to output this cycle
E0322 04:14:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:14:43.409815  543705 memory.go:191] Add success.
I0322 04:14:43.409818  543705 cpu.go:282] Add success.
I0322 04:14:43.420015  543705 net.go:648] Add success.
I0322 04:14:43.422797  543705 net.go:770] primary dev: ETH0
I0322 04:14:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:14:43.422824  543705 net.go:698] Add success.
I0322 04:14:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:14:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:14:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:14:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:14:53.409812  543705 memory.go:184] no items to output this cycle
I0322 04:14:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 04:15:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:15:03.409781  543705 memory.go:184] no items to output this cycle
I0322 04:15:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 04:15:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:15:13.409815  543705 memory.go:191] Add success.
I0322 04:15:13.409818  543705 cpu.go:282] Add success.
W0322 04:15:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:15:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:15:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:15:13.420298  543705 net.go:648] Add success.
I0322 04:15:13.423042  543705 net.go:770] primary dev: ETH0
I0322 04:15:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:15:13.423067  543705 net.go:698] Add success.
I0322 04:15:13.469487  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef179f99-a1a0-465a-837e-2731359ccc5f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:15:13.469522  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:15:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:15:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:15:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 04:15:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:15:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 04:15:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:15:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:15:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:15:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:15:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:15:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:15:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:15:23.409776  543705 memory.go:184] no items to output this cycle
I0322 04:15:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 04:15:32.969674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:15:32.972152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:15:32.972159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000383740 0xc000383780]
E0322 04:15:33.407514  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:15:33.407526  543705 memory.go:184] no items to output this cycle
I0322 04:15:33.407532  543705 cpu.go:275] no items to output this cycle
I0322 04:15:39.489199  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:15:39.489207  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:15:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:15:43.410835  543705 memory.go:191] Add success.
I0322 04:15:43.409822  543705 cpu.go:282] Add success.
I0322 04:15:43.420535  543705 net.go:648] Add success.
I0322 04:15:43.423436  543705 net.go:770] primary dev: ETH0
I0322 04:15:43.423450  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:15:43.423465  543705 net.go:698] Add success.
I0322 04:15:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:15:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:15:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:15:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:15:53.409813  543705 memory.go:184] no items to output this cycle
I0322 04:15:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 04:16:03.409916  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:16:03.409927  543705 cpu.go:275] no items to output this cycle
I0322 04:16:03.409940  543705 memory.go:184] no items to output this cycle
E0322 04:16:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:16:13.409783  543705 memory.go:191] Add success.
I0322 04:16:13.409798  543705 cpu.go:282] Add success.
W0322 04:16:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:16:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:16:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:16:13.420141  543705 net.go:648] Add success.
I0322 04:16:13.422747  543705 net.go:770] primary dev: ETH0
I0322 04:16:13.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:16:13.422775  543705 net.go:698] Add success.
I0322 04:16:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:16:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:16:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 04:16:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:16:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 04:16:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:16:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:16:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:16:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:16:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:16:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:16:23.409795  543705 memory.go:184] no items to output this cycle
I0322 04:16:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 04:16:32.973671  543705 disk_info.go:125] begin check local disk info of client
I0322 04:16:32.976156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:16:32.976164  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3f40 0xc00049e000]
E0322 04:16:33.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:16:33.407523  543705 memory.go:184] no items to output this cycle
I0322 04:16:33.407554  543705 cpu.go:275] no items to output this cycle
E0322 04:16:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:16:43.409821  543705 memory.go:191] Add success.
I0322 04:16:43.409827  543705 cpu.go:282] Add success.
I0322 04:16:43.419871  543705 net.go:648] Add success.
I0322 04:16:43.423053  543705 net.go:770] primary dev: ETH0
I0322 04:16:43.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:16:43.423088  543705 net.go:698] Add success.
I0322 04:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:16:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:16:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:16:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:16:53.409787  543705 memory.go:184] no items to output this cycle
I0322 04:16:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 04:17:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:17:03.409781  543705 memory.go:184] no items to output this cycle
I0322 04:17:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 04:17:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:17:13.409805  543705 memory.go:191] Add success.
I0322 04:17:13.409812  543705 cpu.go:282] Add success.
W0322 04:17:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:17:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:17:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:17:13.420165  543705 net.go:648] Add success.
I0322 04:17:13.422976  543705 net.go:770] primary dev: ETH0
I0322 04:17:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:17:13.423000  543705 net.go:698] Add success.
I0322 04:17:13.453548  543705 event_worker.go:152] Polling the log file for events...
W0322 04:17:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:17:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 04:17:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:17:14.455867  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:17:14.455875  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:17:14.455880  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:17:14.456532  543705 disk_worker.go:494] system disk:vda1
I0322 04:17:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:17:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:17:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:17:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:17:16.457985  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:17:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:17:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:17:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:17:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:17:23.409774  543705 memory.go:184] no items to output this cycle
I0322 04:17:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 04:17:32.977675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:17:32.980172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:17:32.980178  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033d080 0xc00033d0c0]
E0322 04:17:33.408398  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:17:33.408415  543705 memory.go:184] no items to output this cycle
I0322 04:17:33.408430  543705 cpu.go:275] no items to output this cycle
E0322 04:17:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:17:43.409785  543705 memory.go:191] Add success.
I0322 04:17:43.409803  543705 cpu.go:282] Add success.
I0322 04:17:43.419945  543705 net.go:648] Add success.
I0322 04:17:43.422618  543705 net.go:770] primary dev: ETH0
I0322 04:17:43.422630  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:17:43.422643  543705 net.go:698] Add success.
I0322 04:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:17:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:17:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:17:53.409775  543705 memory.go:184] no items to output this cycle
I0322 04:17:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 04:18:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:18:03.409780  543705 memory.go:184] no items to output this cycle
I0322 04:18:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 04:18:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:18:13.409777  543705 memory.go:191] Add success.
W0322 04:18:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:18:13.409804  543705 cpu.go:282] Add success.
W0322 04:18:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:18:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:18:13.420165  543705 net.go:648] Add success.
I0322 04:18:13.423110  543705 net.go:770] primary dev: ETH0
I0322 04:18:13.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:18:13.423143  543705 net.go:698] Add success.
I0322 04:18:13.463939  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"296abd22-336f-404a-9e58-d605ba0ea608","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:18:13.463972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:18:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:18:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:18:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 04:18:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:18:14.456528  543705 disk_worker.go:494] system disk:vda1
I0322 04:18:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:18:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:18:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:18:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:18:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:18:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:18:23.409775  543705 memory.go:184] no items to output this cycle
I0322 04:18:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 04:18:32.981677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:18:32.984179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:18:32.984186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487500 0xc000487540]
E0322 04:18:33.407522  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:18:33.407533  543705 memory.go:184] no items to output this cycle
I0322 04:18:33.407535  543705 cpu.go:275] no items to output this cycle
I0322 04:18:39.490199  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:18:39.490205  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:18:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:18:43.410654  543705 memory.go:191] Add success.
I0322 04:18:43.409803  543705 cpu.go:282] Add success.
I0322 04:18:43.420464  543705 net.go:648] Add success.
I0322 04:18:43.422907  543705 net.go:770] primary dev: ETH0
I0322 04:18:43.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:18:43.422932  543705 net.go:698] Add success.
I0322 04:18:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:18:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:18:53.409814  543705 memory.go:184] no items to output this cycle
I0322 04:18:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 04:19:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:19:03.409770  543705 memory.go:184] no items to output this cycle
I0322 04:19:03.409811  543705 cpu.go:275] no items to output this cycle
W0322 04:19:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:19:13.409750  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:19:13.409756  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:19:13.409822  543705 cpu.go:282] Add success.
E0322 04:19:13.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:19:13.409875  543705 memory.go:191] Add success.
I0322 04:19:13.420071  543705 net.go:648] Add success.
I0322 04:19:13.422868  543705 net.go:770] primary dev: ETH0
I0322 04:19:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:19:13.422892  543705 net.go:698] Add success.
I0322 04:19:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:19:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:19:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 04:19:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:19:14.456471  543705 disk_worker.go:494] system disk:vda1
I0322 04:19:14.456517  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:19:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:19:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:19:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:19:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:19:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:19:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 04:19:23.409785  543705 memory.go:184] no items to output this cycle
I0322 04:19:32.985679  543705 disk_info.go:125] begin check local disk info of client
I0322 04:19:32.988176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:19:32.988183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8e00 0xc0003c8e40]
E0322 04:19:33.407532  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:19:33.407545  543705 memory.go:184] no items to output this cycle
I0322 04:19:33.407550  543705 cpu.go:275] no items to output this cycle
E0322 04:19:43.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:19:43.409902  543705 memory.go:191] Add success.
I0322 04:19:43.409974  543705 cpu.go:282] Add success.
I0322 04:19:43.419722  543705 net.go:648] Add success.
I0322 04:19:43.422282  543705 net.go:770] primary dev: ETH0
I0322 04:19:43.422294  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:19:43.422306  543705 net.go:698] Add success.
I0322 04:19:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:19:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:19:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:19:53.409811  543705 memory.go:184] no items to output this cycle
I0322 04:19:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 04:20:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:20:03.409788  543705 cpu.go:275] no items to output this cycle
I0322 04:20:03.409793  543705 memory.go:184] no items to output this cycle
W0322 04:20:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:20:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:20:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 04:20:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:20:13.409823  543705 memory.go:191] Add success.
I0322 04:20:13.409822  543705 cpu.go:282] Add success.
I0322 04:20:13.420069  543705 net.go:648] Add success.
I0322 04:20:13.422694  543705 net.go:770] primary dev: ETH0
I0322 04:20:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:20:13.422720  543705 net.go:698] Add success.
I0322 04:20:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:20:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:20:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 04:20:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:20:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 04:20:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:20:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:20:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:20:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:20:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:20:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:20:23.409801  543705 memory.go:184] no items to output this cycle
I0322 04:20:23.409812  543705 cpu.go:275] no items to output this cycle
I0322 04:20:32.989672  543705 disk_info.go:125] begin check local disk info of client
I0322 04:20:32.992190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:20:32.992196  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386340 0xc000386380]
E0322 04:20:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:20:33.407520  543705 memory.go:184] no items to output this cycle
I0322 04:20:33.407530  543705 cpu.go:275] no items to output this cycle
E0322 04:20:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:20:43.409802  543705 memory.go:191] Add success.
I0322 04:20:43.409820  543705 cpu.go:282] Add success.
I0322 04:20:43.419979  543705 net.go:648] Add success.
I0322 04:20:43.422357  543705 net.go:770] primary dev: ETH0
I0322 04:20:43.422369  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:20:43.422381  543705 net.go:698] Add success.
I0322 04:20:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:20:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:20:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:20:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:20:53.409827  543705 memory.go:184] no items to output this cycle
I0322 04:20:53.409833  543705 cpu.go:275] no items to output this cycle
E0322 04:21:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:21:03.409782  543705 memory.go:184] no items to output this cycle
I0322 04:21:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 04:21:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:21:13.409802  543705 memory.go:191] Add success.
I0322 04:21:13.409805  543705 cpu.go:282] Add success.
W0322 04:21:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:21:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:21:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:21:13.420065  543705 net.go:648] Add success.
I0322 04:21:13.423010  543705 net.go:770] primary dev: ETH0
I0322 04:21:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:21:13.423036  543705 net.go:698] Add success.
I0322 04:21:13.479959  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9c8dd93-b7dc-4640-908a-dddc97ba1de9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:21:13.479993  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:21:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:21:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:21:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 04:21:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:21:14.456682  543705 disk_worker.go:494] system disk:vda1
I0322 04:21:14.456722  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:21:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:21:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:21:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:21:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:21:23.409793  543705 memory.go:184] no items to output this cycle
I0322 04:21:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 04:21:32.993673  543705 disk_info.go:125] begin check local disk info of client
I0322 04:21:32.996162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:21:32.996168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004acd40 0xc0004acd80]
E0322 04:21:33.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:21:33.407522  543705 memory.go:184] no items to output this cycle
I0322 04:21:33.407551  543705 cpu.go:275] no items to output this cycle
I0322 04:21:39.491219  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:21:39.491225  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:21:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:21:43.410598  543705 memory.go:191] Add success.
I0322 04:21:43.409833  543705 cpu.go:282] Add success.
I0322 04:21:43.420350  543705 net.go:648] Add success.
I0322 04:21:43.423013  543705 net.go:770] primary dev: ETH0
I0322 04:21:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:21:43.423043  543705 net.go:698] Add success.
I0322 04:21:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:21:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:21:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:21:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:21:53.409789  543705 memory.go:184] no items to output this cycle
I0322 04:21:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 04:22:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:22:03.409809  543705 memory.go:184] no items to output this cycle
I0322 04:22:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 04:22:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:22:13.409802  543705 memory.go:191] Add success.
I0322 04:22:13.409802  543705 cpu.go:282] Add success.
W0322 04:22:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:22:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:22:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:22:13.420161  543705 net.go:648] Add success.
I0322 04:22:13.422954  543705 net.go:770] primary dev: ETH0
I0322 04:22:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:22:13.422979  543705 net.go:698] Add success.
W0322 04:22:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:22:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 04:22:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:22:14.456760  543705 disk_worker.go:494] system disk:vda1
I0322 04:22:14.456799  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:22:14.457095  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:22:14.457103  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:22:14.457108  543705 custom_config.go:64] query custom config with name: gpu
E0322 04:22:15.456780  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:22:15.456789  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:22:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:22:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:22:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:22:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:22:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:22:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:22:23.409817  543705 memory.go:184] no items to output this cycle
I0322 04:22:23.409826  543705 cpu.go:275] no items to output this cycle
I0322 04:22:32.997679  543705 disk_info.go:125] begin check local disk info of client
I0322 04:22:33.000193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:22:33.000199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1f00 0xc0003e1f40]
E0322 04:22:33.408472  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:22:33.408488  543705 memory.go:184] no items to output this cycle
I0322 04:22:33.408532  543705 cpu.go:275] no items to output this cycle
E0322 04:22:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:22:43.409794  543705 memory.go:191] Add success.
I0322 04:22:43.409795  543705 cpu.go:282] Add success.
I0322 04:22:43.419901  543705 net.go:648] Add success.
I0322 04:22:43.422737  543705 net.go:770] primary dev: ETH0
I0322 04:22:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:22:43.422762  543705 net.go:698] Add success.
I0322 04:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:22:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:22:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:22:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:22:53.409809  543705 memory.go:184] no items to output this cycle
I0322 04:22:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 04:23:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:23:03.409785  543705 memory.go:184] no items to output this cycle
I0322 04:23:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 04:23:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:23:13.409798  543705 memory.go:191] Add success.
I0322 04:23:13.409802  543705 cpu.go:282] Add success.
W0322 04:23:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:23:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:23:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:23:13.420083  543705 net.go:648] Add success.
I0322 04:23:13.422746  543705 net.go:770] primary dev: ETH0
I0322 04:23:13.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:23:13.422775  543705 net.go:698] Add success.
I0322 04:23:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:23:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:23:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 04:23:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:23:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 04:23:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:23:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:23:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:23:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:23:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:23:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:23:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:23:23.409774  543705 memory.go:184] no items to output this cycle
I0322 04:23:23.409794  543705 cpu.go:275] no items to output this cycle
I0322 04:23:33.001683  543705 disk_info.go:125] begin check local disk info of client
I0322 04:23:33.004233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:23:33.004240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8000 0xc0003d8040]
E0322 04:23:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:23:33.407521  543705 memory.go:184] no items to output this cycle
I0322 04:23:33.407553  543705 cpu.go:275] no items to output this cycle
E0322 04:23:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:23:43.409800  543705 memory.go:191] Add success.
I0322 04:23:43.409819  543705 cpu.go:282] Add success.
I0322 04:23:43.420077  543705 net.go:648] Add success.
I0322 04:23:43.422710  543705 net.go:770] primary dev: ETH0
I0322 04:23:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:23:43.422739  543705 net.go:698] Add success.
I0322 04:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:23:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:23:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:23:53.409792  543705 memory.go:184] no items to output this cycle
I0322 04:23:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 04:24:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:24:03.409807  543705 memory.go:184] no items to output this cycle
I0322 04:24:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 04:24:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:24:13.409788  543705 memory.go:191] Add success.
W0322 04:24:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:24:13.409814  543705 cpu.go:282] Add success.
W0322 04:24:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:24:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:24:13.420138  543705 net.go:648] Add success.
I0322 04:24:13.422849  543705 net.go:770] primary dev: ETH0
I0322 04:24:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:24:13.422877  543705 net.go:698] Add success.
I0322 04:24:13.464140  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f0b46d0c-72cd-40e9-85a2-56ee8165cb71","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:24:13.464175  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:24:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:24:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:24:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 04:24:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:24:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 04:24:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:24:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:24:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:24:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:24:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:24:23.409804  543705 memory.go:184] no items to output this cycle
I0322 04:24:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 04:24:33.005678  543705 disk_info.go:125] begin check local disk info of client
I0322 04:24:33.008244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:24:33.008251  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278200 0xc000278240]
E0322 04:24:33.407521  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:24:33.407537  543705 memory.go:184] no items to output this cycle
I0322 04:24:33.407549  543705 cpu.go:275] no items to output this cycle
I0322 04:24:39.492204  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:24:39.492210  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:24:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:24:43.410581  543705 memory.go:191] Add success.
I0322 04:24:43.409813  543705 cpu.go:282] Add success.
I0322 04:24:43.420282  543705 net.go:648] Add success.
I0322 04:24:43.422855  543705 net.go:770] primary dev: ETH0
I0322 04:24:43.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:24:43.422886  543705 net.go:698] Add success.
I0322 04:24:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:24:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:24:53.409784  543705 memory.go:184] no items to output this cycle
I0322 04:24:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 04:25:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:25:03.409809  543705 memory.go:184] no items to output this cycle
I0322 04:25:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 04:25:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:25:13.409788  543705 memory.go:191] Add success.
W0322 04:25:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:25:13.409816  543705 cpu.go:282] Add success.
W0322 04:25:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:25:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:25:13.420075  543705 net.go:648] Add success.
I0322 04:25:13.422604  543705 net.go:770] primary dev: ETH0
I0322 04:25:13.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:25:13.422629  543705 net.go:698] Add success.
I0322 04:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:25:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:25:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 04:25:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:25:14.456477  543705 disk_worker.go:494] system disk:vda1
I0322 04:25:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:25:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:25:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:25:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:25:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:25:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:25:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:25:23.409811  543705 memory.go:184] no items to output this cycle
I0322 04:25:23.409823  543705 cpu.go:275] no items to output this cycle
I0322 04:25:33.009676  543705 disk_info.go:125] begin check local disk info of client
I0322 04:25:33.012158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:25:33.012165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa7c0 0xc0001aa800]
E0322 04:25:33.408228  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:25:33.408243  543705 memory.go:184] no items to output this cycle
I0322 04:25:33.408259  543705 cpu.go:275] no items to output this cycle
E0322 04:25:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:25:43.409802  543705 memory.go:191] Add success.
I0322 04:25:43.409817  543705 cpu.go:282] Add success.
I0322 04:25:43.420004  543705 net.go:648] Add success.
I0322 04:25:43.422635  543705 net.go:770] primary dev: ETH0
I0322 04:25:43.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:25:43.422660  543705 net.go:698] Add success.
I0322 04:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:25:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:25:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:25:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:25:53.409793  543705 memory.go:184] no items to output this cycle
I0322 04:25:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 04:26:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:26:03.409793  543705 memory.go:184] no items to output this cycle
I0322 04:26:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 04:26:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:26:13.409799  543705 memory.go:191] Add success.
I0322 04:26:13.409799  543705 cpu.go:282] Add success.
W0322 04:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:26:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:26:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:26:13.420171  543705 net.go:648] Add success.
I0322 04:26:13.422869  543705 net.go:770] primary dev: ETH0
I0322 04:26:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:26:13.422898  543705 net.go:698] Add success.
I0322 04:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:26:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:26:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 04:26:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:26:14.457001  543705 disk_worker.go:494] system disk:vda1
I0322 04:26:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:26:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:26:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:26:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:26:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:26:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:26:23.409801  543705 memory.go:184] no items to output this cycle
I0322 04:26:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 04:26:33.013687  543705 disk_info.go:125] begin check local disk info of client
I0322 04:26:33.016173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:26:33.016179  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9080 0xc0004d90c0]
E0322 04:26:33.408223  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:26:33.408240  543705 memory.go:184] no items to output this cycle
I0322 04:26:33.408255  543705 cpu.go:275] no items to output this cycle
E0322 04:26:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:26:43.409780  543705 memory.go:191] Add success.
I0322 04:26:43.409822  543705 cpu.go:282] Add success.
I0322 04:26:43.419891  543705 net.go:648] Add success.
I0322 04:26:43.423370  543705 net.go:770] primary dev: ETH0
I0322 04:26:43.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:26:43.423396  543705 net.go:698] Add success.
I0322 04:26:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:26:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:26:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:26:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:26:53.409791  543705 cpu.go:275] no items to output this cycle
I0322 04:26:53.409795  543705 memory.go:184] no items to output this cycle
E0322 04:27:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:27:03.409802  543705 memory.go:184] no items to output this cycle
I0322 04:27:03.409827  543705 cpu.go:275] no items to output this cycle
E0322 04:27:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:27:13.409779  543705 memory.go:191] Add success.
I0322 04:27:13.409801  543705 cpu.go:282] Add success.
W0322 04:27:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:27:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:27:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:27:13.420055  543705 net.go:648] Add success.
I0322 04:27:13.428657  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 04:27:13.428730  543705 net.go:770] primary dev: ETH0
I0322 04:27:13.428742  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:27:13.428754  543705 net.go:698] Add success.
I0322 04:27:13.453636  543705 event_worker.go:152] Polling the log file for events...
I0322 04:27:13.468570  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab40fafa-f0c6-4e6e-b5ab-1ccbb810b0a9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:27:13.468600  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 04:27:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:27:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 04:27:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:27:14.456098  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:27:14.456107  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:27:14.456113  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:27:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 04:27:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:27:15.456775  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:27:15.456784  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:27:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:27:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:27:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:27:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:27:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:27:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:27:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 04:27:23.409787  543705 memory.go:184] no items to output this cycle
I0322 04:27:33.017675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:27:33.020213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:27:33.020220  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8b80 0xc0004d8bc0]
E0322 04:27:33.408231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:27:33.408240  543705 cpu.go:275] no items to output this cycle
I0322 04:27:33.408243  543705 memory.go:184] no items to output this cycle
I0322 04:27:39.493217  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:27:39.493224  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:27:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:27:43.410441  543705 memory.go:191] Add success.
I0322 04:27:43.409852  543705 cpu.go:282] Add success.
I0322 04:27:43.420145  543705 net.go:648] Add success.
I0322 04:27:43.422447  543705 net.go:770] primary dev: ETH0
I0322 04:27:43.422460  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:27:43.422473  543705 net.go:698] Add success.
I0322 04:27:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:27:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:27:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:27:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:27:53.409780  543705 memory.go:184] no items to output this cycle
I0322 04:27:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 04:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:28:03.409783  543705 memory.go:184] no items to output this cycle
I0322 04:28:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 04:28:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:28:13.409819  543705 memory.go:191] Add success.
I0322 04:28:13.409825  543705 cpu.go:282] Add success.
W0322 04:28:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:28:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:28:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:28:13.420073  543705 net.go:648] Add success.
I0322 04:28:13.423225  543705 net.go:770] primary dev: ETH0
I0322 04:28:13.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:28:13.423253  543705 net.go:698] Add success.
I0322 04:28:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:28:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:28:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 04:28:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:28:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 04:28:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:28:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:28:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:28:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:28:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:28:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:28:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:28:23.409793  543705 memory.go:184] no items to output this cycle
I0322 04:28:23.409808  543705 cpu.go:275] no items to output this cycle
I0322 04:28:33.021675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:28:33.024178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:28:33.024184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8580 0xc0004d85c0]
E0322 04:28:33.408188  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:28:33.408205  543705 memory.go:184] no items to output this cycle
I0322 04:28:33.408219  543705 cpu.go:275] no items to output this cycle
E0322 04:28:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:28:43.409791  543705 memory.go:191] Add success.
I0322 04:28:43.409793  543705 cpu.go:282] Add success.
I0322 04:28:43.419858  543705 net.go:648] Add success.
I0322 04:28:43.422723  543705 net.go:770] primary dev: ETH0
I0322 04:28:43.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:28:43.422753  543705 net.go:698] Add success.
I0322 04:28:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:28:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:28:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:28:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:28:53.409812  543705 memory.go:184] no items to output this cycle
I0322 04:28:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 04:29:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:29:03.409793  543705 memory.go:184] no items to output this cycle
I0322 04:29:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 04:29:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:29:13.409819  543705 memory.go:191] Add success.
I0322 04:29:13.409825  543705 cpu.go:282] Add success.
W0322 04:29:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:29:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:29:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:29:13.420198  543705 net.go:648] Add success.
I0322 04:29:13.422945  543705 net.go:770] primary dev: ETH0
I0322 04:29:13.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:29:13.422973  543705 net.go:698] Add success.
I0322 04:29:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:29:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:29:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 04:29:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:29:14.456475  543705 disk_worker.go:494] system disk:vda1
I0322 04:29:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:29:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:29:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:29:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:29:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:29:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:29:23.409777  543705 memory.go:184] no items to output this cycle
I0322 04:29:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 04:29:33.025675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:29:33.028225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:29:33.028232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046e300 0xc00046e340]
E0322 04:29:33.407517  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:29:33.407532  543705 memory.go:184] no items to output this cycle
I0322 04:29:33.407542  543705 cpu.go:275] no items to output this cycle
E0322 04:29:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:29:43.409796  543705 memory.go:191] Add success.
I0322 04:29:43.409796  543705 cpu.go:282] Add success.
I0322 04:29:43.420019  543705 net.go:648] Add success.
I0322 04:29:43.422703  543705 net.go:770] primary dev: ETH0
I0322 04:29:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:29:43.422730  543705 net.go:698] Add success.
I0322 04:29:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:29:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:29:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:29:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:29:53.409787  543705 memory.go:184] no items to output this cycle
I0322 04:29:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 04:30:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:30:03.409782  543705 memory.go:184] no items to output this cycle
I0322 04:30:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 04:30:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:30:13.409798  543705 memory.go:191] Add success.
I0322 04:30:13.409798  543705 cpu.go:282] Add success.
W0322 04:30:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:30:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:30:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:30:13.420122  543705 net.go:648] Add success.
I0322 04:30:13.422908  543705 net.go:770] primary dev: ETH0
I0322 04:30:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:30:13.422946  543705 net.go:698] Add success.
I0322 04:30:13.465404  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4b8dcb79-09a1-4ddc-9c74-837a1c7dab14","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:30:13.465439  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:30:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:30:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:30:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 04:30:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:30:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 04:30:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:30:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:30:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:30:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:30:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:30:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:30:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:30:23.409796  543705 memory.go:184] no items to output this cycle
I0322 04:30:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 04:30:33.029677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:30:33.032162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:30:33.032168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ce440 0xc0003ce480]
E0322 04:30:33.408255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:30:33.408278  543705 memory.go:184] no items to output this cycle
I0322 04:30:33.408290  543705 cpu.go:275] no items to output this cycle
I0322 04:30:39.494218  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:30:39.494224  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:30:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:30:43.410678  543705 memory.go:191] Add success.
I0322 04:30:43.409815  543705 cpu.go:282] Add success.
I0322 04:30:43.420386  543705 net.go:648] Add success.
I0322 04:30:43.422868  543705 net.go:770] primary dev: ETH0
I0322 04:30:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:30:43.422893  543705 net.go:698] Add success.
I0322 04:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:30:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:30:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:30:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:30:53.409786  543705 memory.go:184] no items to output this cycle
I0322 04:30:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 04:31:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:31:03.409797  543705 memory.go:184] no items to output this cycle
I0322 04:31:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 04:31:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:31:13.409777  543705 memory.go:191] Add success.
W0322 04:31:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:31:13.409802  543705 cpu.go:282] Add success.
W0322 04:31:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:31:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:31:13.420191  543705 net.go:648] Add success.
I0322 04:31:13.422787  543705 net.go:770] primary dev: ETH0
I0322 04:31:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:31:13.422812  543705 net.go:698] Add success.
I0322 04:31:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:31:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:31:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 04:31:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:31:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 04:31:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:31:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:31:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:31:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:31:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:31:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:31:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:31:23.409791  543705 memory.go:184] no items to output this cycle
I0322 04:31:23.409800  543705 cpu.go:275] no items to output this cycle
I0322 04:31:33.033674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:31:33.036269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:31:33.036275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004607c0 0xc000460800]
E0322 04:31:33.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:31:33.407522  543705 memory.go:184] no items to output this cycle
I0322 04:31:33.407544  543705 cpu.go:275] no items to output this cycle
E0322 04:31:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:31:43.409835  543705 memory.go:191] Add success.
I0322 04:31:43.409852  543705 cpu.go:282] Add success.
I0322 04:31:43.420052  543705 net.go:648] Add success.
I0322 04:31:43.423121  543705 net.go:770] primary dev: ETH0
I0322 04:31:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:31:43.423150  543705 net.go:698] Add success.
I0322 04:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:31:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:31:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:31:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:31:53.409782  543705 memory.go:184] no items to output this cycle
I0322 04:31:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 04:32:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:32:03.409778  543705 memory.go:184] no items to output this cycle
I0322 04:32:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 04:32:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:32:13.409842  543705 memory.go:191] Add success.
I0322 04:32:13.409846  543705 cpu.go:282] Add success.
W0322 04:32:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:32:13.409900  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:32:13.409905  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:32:13.420382  543705 net.go:648] Add success.
I0322 04:32:13.423064  543705 net.go:770] primary dev: ETH0
I0322 04:32:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:32:13.423095  543705 net.go:698] Add success.
W0322 04:32:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:32:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 04:32:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:32:14.456099  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:32:14.456108  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:32:14.456114  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:32:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 04:32:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:32:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:32:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:32:16.457892  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:32:16.457892  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:32:16.457947  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:32:16.457966  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:32:16.472292  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:32:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:32:23.409796  543705 memory.go:184] no items to output this cycle
I0322 04:32:23.409810  543705 cpu.go:275] no items to output this cycle
I0322 04:32:33.037674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:32:33.040221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:32:33.040227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275dc0 0xc000275e00]
E0322 04:32:33.408143  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:32:33.408156  543705 memory.go:184] no items to output this cycle
I0322 04:32:33.408158  543705 cpu.go:275] no items to output this cycle
E0322 04:32:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:32:43.409911  543705 memory.go:191] Add success.
I0322 04:32:43.409927  543705 cpu.go:282] Add success.
I0322 04:32:43.419712  543705 net.go:648] Add success.
I0322 04:32:43.422171  543705 net.go:770] primary dev: ETH0
I0322 04:32:43.422184  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:32:43.422196  543705 net.go:698] Add success.
I0322 04:32:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:32:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:32:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:32:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:32:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 04:32:53.409793  543705 memory.go:184] no items to output this cycle
E0322 04:33:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:33:03.409770  543705 memory.go:184] no items to output this cycle
I0322 04:33:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 04:33:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:33:13.409786  543705 memory.go:191] Add success.
I0322 04:33:13.409802  543705 cpu.go:282] Add success.
W0322 04:33:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:33:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:33:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:33:13.420436  543705 net.go:648] Add success.
I0322 04:33:13.423035  543705 net.go:770] primary dev: ETH0
I0322 04:33:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:33:13.423071  543705 net.go:698] Add success.
I0322 04:33:13.468571  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00e0a674-5725-4f38-b41f-e4fb779c1d6f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:33:13.468609  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:33:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:33:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:33:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 04:33:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:33:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 04:33:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:33:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:33:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:33:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:33:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:33:23.409794  543705 memory.go:184] no items to output this cycle
I0322 04:33:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 04:33:33.041677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:33:33.044220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:33:33.044226  543705 disk_info.go:196] parse disk info done, disk is : [0xc000356e40 0xc000356e80]
E0322 04:33:33.408127  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:33:33.408136  543705 cpu.go:275] no items to output this cycle
I0322 04:33:33.408138  543705 memory.go:184] no items to output this cycle
I0322 04:33:39.495229  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:33:39.495236  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:33:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:33:43.409792  543705 cpu.go:282] Add success.
I0322 04:33:43.410813  543705 memory.go:191] Add success.
I0322 04:33:43.419733  543705 net.go:648] Add success.
I0322 04:33:43.422131  543705 net.go:770] primary dev: ETH0
I0322 04:33:43.422144  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:33:43.422156  543705 net.go:698] Add success.
I0322 04:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:33:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:33:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:33:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:33:53.409776  543705 memory.go:184] no items to output this cycle
I0322 04:33:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 04:34:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:34:03.409779  543705 cpu.go:275] no items to output this cycle
I0322 04:34:03.409788  543705 memory.go:184] no items to output this cycle
E0322 04:34:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:34:13.409806  543705 memory.go:191] Add success.
I0322 04:34:13.409813  543705 cpu.go:282] Add success.
W0322 04:34:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:34:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:34:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:34:13.420060  543705 net.go:648] Add success.
I0322 04:34:13.422987  543705 net.go:770] primary dev: ETH0
I0322 04:34:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:34:13.423016  543705 net.go:698] Add success.
I0322 04:34:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:34:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:34:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 04:34:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:34:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 04:34:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:34:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:34:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:34:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:34:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:34:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 04:34:23.409780  543705 memory.go:184] no items to output this cycle
I0322 04:34:33.045679  543705 disk_info.go:125] begin check local disk info of client
I0322 04:34:33.048198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:34:33.048205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329800 0xc000329840]
E0322 04:34:33.408092  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:34:33.408121  543705 memory.go:184] no items to output this cycle
I0322 04:34:33.408140  543705 cpu.go:275] no items to output this cycle
E0322 04:34:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:34:43.409791  543705 cpu.go:282] Add success.
I0322 04:34:43.409803  543705 memory.go:191] Add success.
I0322 04:34:43.419698  543705 net.go:770] primary dev: ETH0
I0322 04:34:43.419712  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:34:43.419725  543705 net.go:698] Add success.
I0322 04:34:43.420197  543705 net.go:648] Add success.
I0322 04:34:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:34:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:34:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:34:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:34:53.409779  543705 memory.go:184] no items to output this cycle
I0322 04:34:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 04:35:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:35:03.409804  543705 memory.go:184] no items to output this cycle
I0322 04:35:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 04:35:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:35:13.409789  543705 memory.go:191] Add success.
I0322 04:35:13.409791  543705 cpu.go:282] Add success.
W0322 04:35:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:35:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:35:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:35:13.420361  543705 net.go:648] Add success.
I0322 04:35:13.423168  543705 net.go:770] primary dev: ETH0
I0322 04:35:13.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:35:13.423197  543705 net.go:698] Add success.
I0322 04:35:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:35:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:35:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 04:35:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:35:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 04:35:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:35:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:35:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:35:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:35:23.409768  543705 memory.go:184] no items to output this cycle
I0322 04:35:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 04:35:33.049674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:35:33.052182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:35:33.052188  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275e80 0xc000275ec0]
E0322 04:35:33.407513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:35:33.407526  543705 memory.go:184] no items to output this cycle
I0322 04:35:33.407526  543705 cpu.go:275] no items to output this cycle
E0322 04:35:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:35:43.409818  543705 memory.go:191] Add success.
I0322 04:35:43.409822  543705 cpu.go:282] Add success.
I0322 04:35:43.419951  543705 net.go:648] Add success.
I0322 04:35:43.422819  543705 net.go:770] primary dev: ETH0
I0322 04:35:43.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:35:43.422844  543705 net.go:698] Add success.
I0322 04:35:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:35:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:35:53.410487  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:35:53.410541  543705 memory.go:184] no items to output this cycle
I0322 04:35:53.410658  543705 cpu.go:275] no items to output this cycle
E0322 04:36:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:36:03.409780  543705 cpu.go:275] no items to output this cycle
I0322 04:36:03.409784  543705 memory.go:184] no items to output this cycle
E0322 04:36:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:36:13.409788  543705 memory.go:191] Add success.
I0322 04:36:13.409794  543705 cpu.go:282] Add success.
W0322 04:36:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:36:13.412807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:36:13.412813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:36:13.420402  543705 net.go:648] Add success.
I0322 04:36:13.422352  543705 net.go:770] primary dev: ETH0
I0322 04:36:13.422367  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:36:13.422380  543705 net.go:698] Add success.
I0322 04:36:13.467958  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b1b525b-e5ad-43bc-b774-0c471992e651","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:36:13.467991  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:36:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:36:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:36:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0322 04:36:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:36:14.456489  543705 disk_worker.go:494] system disk:vda1
I0322 04:36:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:36:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:36:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:36:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:36:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:36:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:36:23.409798  543705 memory.go:184] no items to output this cycle
I0322 04:36:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 04:36:33.053673  543705 disk_info.go:125] begin check local disk info of client
I0322 04:36:33.056160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:36:33.056166  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374780 0xc0003747c0]
E0322 04:36:33.407495  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:36:33.407512  543705 memory.go:184] no items to output this cycle
I0322 04:36:33.407546  543705 cpu.go:275] no items to output this cycle
I0322 04:36:39.496243  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:36:39.496250  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:36:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:36:43.410713  543705 memory.go:191] Add success.
I0322 04:36:43.409822  543705 cpu.go:282] Add success.
I0322 04:36:43.420408  543705 net.go:648] Add success.
I0322 04:36:43.423079  543705 net.go:770] primary dev: ETH0
I0322 04:36:43.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:36:43.423105  543705 net.go:698] Add success.
I0322 04:36:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:36:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:36:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:36:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:36:53.409814  543705 memory.go:184] no items to output this cycle
I0322 04:36:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 04:37:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:37:03.409783  543705 memory.go:184] no items to output this cycle
I0322 04:37:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 04:37:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:37:13.409788  543705 cpu.go:282] Add success.
I0322 04:37:13.409792  543705 memory.go:191] Add success.
W0322 04:37:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:37:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:37:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:37:13.420114  543705 net.go:648] Add success.
I0322 04:37:13.423178  543705 net.go:770] primary dev: ETH0
I0322 04:37:13.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:37:13.423209  543705 net.go:698] Add success.
I0322 04:37:13.452923  543705 event_worker.go:152] Polling the log file for events...
W0322 04:37:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:37:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 04:37:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:37:14.455867  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:37:14.455876  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:37:14.455881  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:37:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 04:37:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:37:15.456467  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:37:15.456475  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:37:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:37:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:37:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:37:16.457982  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:37:16.472298  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:37:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:37:23.409775  543705 memory.go:184] no items to output this cycle
I0322 04:37:23.409795  543705 cpu.go:275] no items to output this cycle
I0322 04:37:33.057677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:37:33.060144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:37:33.060151  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b680 0xc00007b6c0]
E0322 04:37:33.407978  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:37:33.407995  543705 memory.go:184] no items to output this cycle
I0322 04:37:33.408012  543705 cpu.go:275] no items to output this cycle
E0322 04:37:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:37:43.409807  543705 memory.go:191] Add success.
I0322 04:37:43.409808  543705 cpu.go:282] Add success.
I0322 04:37:43.419875  543705 net.go:648] Add success.
I0322 04:37:43.422581  543705 net.go:770] primary dev: ETH0
I0322 04:37:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:37:43.422610  543705 net.go:698] Add success.
I0322 04:37:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:37:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:37:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:37:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:37:53.409785  543705 memory.go:184] no items to output this cycle
I0322 04:37:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 04:38:03.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:38:03.409933  543705 memory.go:184] no items to output this cycle
I0322 04:38:03.410049  543705 cpu.go:275] no items to output this cycle
E0322 04:38:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:38:13.409789  543705 memory.go:191] Add success.
I0322 04:38:13.409792  543705 cpu.go:282] Add success.
W0322 04:38:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:38:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:38:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:38:13.420041  543705 net.go:648] Add success.
I0322 04:38:13.423229  543705 net.go:770] primary dev: ETH0
I0322 04:38:13.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:38:13.423267  543705 net.go:698] Add success.
I0322 04:38:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:38:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:38:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 04:38:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:38:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 04:38:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:38:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:38:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:38:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:38:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:38:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:38:23.409773  543705 memory.go:184] no items to output this cycle
I0322 04:38:23.409773  543705 cpu.go:275] no items to output this cycle
I0322 04:38:33.061672  543705 disk_info.go:125] begin check local disk info of client
I0322 04:38:33.064144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:38:33.064150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ffb80 0xc0004ffbc0]
E0322 04:38:33.407525  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:38:33.407536  543705 memory.go:184] no items to output this cycle
I0322 04:38:33.407539  543705 cpu.go:275] no items to output this cycle
E0322 04:38:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:38:43.409791  543705 memory.go:191] Add success.
I0322 04:38:43.409792  543705 cpu.go:282] Add success.
I0322 04:38:43.419972  543705 net.go:648] Add success.
I0322 04:38:43.423072  543705 net.go:770] primary dev: ETH0
I0322 04:38:43.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:38:43.423099  543705 net.go:698] Add success.
I0322 04:38:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:38:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:38:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:38:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:38:53.409783  543705 cpu.go:275] no items to output this cycle
I0322 04:38:53.409794  543705 memory.go:184] no items to output this cycle
E0322 04:39:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:39:03.409896  543705 memory.go:184] no items to output this cycle
I0322 04:39:03.409922  543705 cpu.go:275] no items to output this cycle
E0322 04:39:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:39:13.409787  543705 memory.go:191] Add success.
I0322 04:39:13.409808  543705 cpu.go:282] Add success.
W0322 04:39:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:39:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:39:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:39:13.420129  543705 net.go:648] Add success.
I0322 04:39:13.422919  543705 net.go:770] primary dev: ETH0
I0322 04:39:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:39:13.422947  543705 net.go:698] Add success.
I0322 04:39:13.464124  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f8fd99e1-905d-46f5-bf23-e64f358a7894","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:39:13.464156  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:39:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:39:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:39:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 04:39:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:39:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 04:39:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:39:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:39:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:39:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:39:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:39:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:39:23.409780  543705 memory.go:184] no items to output this cycle
I0322 04:39:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 04:39:33.065679  543705 disk_info.go:125] begin check local disk info of client
I0322 04:39:33.068203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:39:33.068209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a7c0 0xc00027a800]
E0322 04:39:33.408006  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:39:33.408023  543705 memory.go:184] no items to output this cycle
I0322 04:39:33.408038  543705 cpu.go:275] no items to output this cycle
I0322 04:39:39.497238  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:39:39.497245  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:39:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:39:43.410678  543705 memory.go:191] Add success.
I0322 04:39:43.409819  543705 cpu.go:282] Add success.
I0322 04:39:43.420380  543705 net.go:648] Add success.
I0322 04:39:43.423540  543705 net.go:770] primary dev: ETH0
I0322 04:39:43.423553  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:39:43.423565  543705 net.go:698] Add success.
I0322 04:39:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:39:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:39:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:39:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:39:53.409782  543705 memory.go:184] no items to output this cycle
I0322 04:39:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 04:40:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:40:03.409777  543705 memory.go:184] no items to output this cycle
I0322 04:40:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 04:40:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:40:13.409811  543705 memory.go:191] Add success.
I0322 04:40:13.409820  543705 cpu.go:282] Add success.
W0322 04:40:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:40:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:40:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:40:13.420280  543705 net.go:648] Add success.
I0322 04:40:13.423260  543705 net.go:770] primary dev: ETH0
I0322 04:40:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:40:13.423284  543705 net.go:698] Add success.
I0322 04:40:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:40:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:40:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0322 04:40:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:40:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 04:40:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:40:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:40:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:40:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:40:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:40:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 04:40:23.409786  543705 memory.go:184] no items to output this cycle
I0322 04:40:33.069678  543705 disk_info.go:125] begin check local disk info of client
I0322 04:40:33.072118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:40:33.072133  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c000 0xc00057c040]
E0322 04:40:33.407895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:40:33.407912  543705 memory.go:184] no items to output this cycle
I0322 04:40:33.407937  543705 cpu.go:275] no items to output this cycle
E0322 04:40:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:40:43.409816  543705 memory.go:191] Add success.
I0322 04:40:43.409820  543705 cpu.go:282] Add success.
I0322 04:40:43.419945  543705 net.go:648] Add success.
I0322 04:40:43.422535  543705 net.go:770] primary dev: ETH0
I0322 04:40:43.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:40:43.422566  543705 net.go:698] Add success.
I0322 04:40:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:40:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:40:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:40:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:40:53.409779  543705 memory.go:184] no items to output this cycle
I0322 04:40:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 04:41:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:41:03.409809  543705 memory.go:184] no items to output this cycle
I0322 04:41:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 04:41:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:41:13.409819  543705 memory.go:191] Add success.
I0322 04:41:13.409820  543705 cpu.go:282] Add success.
W0322 04:41:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:41:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:41:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:41:13.420154  543705 net.go:648] Add success.
I0322 04:41:13.422776  543705 net.go:770] primary dev: ETH0
I0322 04:41:13.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:41:13.422801  543705 net.go:698] Add success.
I0322 04:41:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:41:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:41:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 04:41:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:41:14.456577  543705 disk_worker.go:494] system disk:vda1
I0322 04:41:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:41:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:41:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:41:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:41:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:41:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:41:23.409777  543705 memory.go:184] no items to output this cycle
I0322 04:41:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 04:41:33.073675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:41:33.076175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:41:33.076181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003822c0 0xc000382300]
E0322 04:41:33.407970  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:41:33.407986  543705 memory.go:184] no items to output this cycle
I0322 04:41:33.408000  543705 cpu.go:275] no items to output this cycle
E0322 04:41:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:41:43.409799  543705 memory.go:191] Add success.
I0322 04:41:43.409802  543705 cpu.go:282] Add success.
I0322 04:41:43.419988  543705 net.go:648] Add success.
I0322 04:41:43.422584  543705 net.go:770] primary dev: ETH0
I0322 04:41:43.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:41:43.422612  543705 net.go:698] Add success.
I0322 04:41:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:41:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:41:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:41:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:41:53.409772  543705 memory.go:184] no items to output this cycle
I0322 04:41:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 04:42:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:42:03.409787  543705 memory.go:184] no items to output this cycle
I0322 04:42:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 04:42:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:42:13.409816  543705 memory.go:191] Add success.
I0322 04:42:13.409823  543705 cpu.go:282] Add success.
W0322 04:42:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:42:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:42:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:42:13.420207  543705 net.go:648] Add success.
I0322 04:42:13.423075  543705 net.go:770] primary dev: ETH0
I0322 04:42:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:42:13.423104  543705 net.go:698] Add success.
I0322 04:42:13.464304  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"57ea12a8-f303-42c1-b088-2bbef301540e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:42:13.464339  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 04:42:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:42:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 04:42:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:42:14.456971  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:42:14.456981  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:42:14.456987  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:42:14.457036  543705 disk_worker.go:494] system disk:vda1
I0322 04:42:14.457078  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:42:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:42:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:42:16.457903  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:42:16.457904  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:42:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:42:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:42:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:42:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:42:23.409796  543705 memory.go:184] no items to output this cycle
I0322 04:42:23.409808  543705 cpu.go:275] no items to output this cycle
I0322 04:42:33.077674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:42:33.080150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:42:33.080157  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb80 0xc0001abbc0]
E0322 04:42:33.407521  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:42:33.407535  543705 memory.go:184] no items to output this cycle
I0322 04:42:33.407544  543705 cpu.go:275] no items to output this cycle
I0322 04:42:39.498232  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:42:39.498238  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:42:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:42:43.410543  543705 memory.go:191] Add success.
I0322 04:42:43.409790  543705 cpu.go:282] Add success.
I0322 04:42:43.420310  543705 net.go:648] Add success.
I0322 04:42:43.423110  543705 net.go:770] primary dev: ETH0
I0322 04:42:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:42:43.423136  543705 net.go:698] Add success.
I0322 04:42:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:42:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:42:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:42:53.410225  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:42:53.410242  543705 cpu.go:275] no items to output this cycle
I0322 04:42:53.410245  543705 memory.go:184] no items to output this cycle
E0322 04:43:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:43:03.409785  543705 memory.go:184] no items to output this cycle
I0322 04:43:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 04:43:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:43:13.409811  543705 memory.go:191] Add success.
I0322 04:43:13.409814  543705 cpu.go:282] Add success.
W0322 04:43:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:43:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:43:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:43:13.420253  543705 net.go:648] Add success.
I0322 04:43:13.423099  543705 net.go:770] primary dev: ETH0
I0322 04:43:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:43:13.423124  543705 net.go:698] Add success.
I0322 04:43:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:43:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:43:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 04:43:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:43:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 04:43:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:43:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:43:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:43:23.409796  543705 memory.go:184] no items to output this cycle
I0322 04:43:23.409807  543705 cpu.go:275] no items to output this cycle
I0322 04:43:33.081675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:43:33.084138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:43:33.084145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348280 0xc0003482c0]
E0322 04:43:33.407845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:43:33.407860  543705 memory.go:184] no items to output this cycle
I0322 04:43:33.407874  543705 cpu.go:275] no items to output this cycle
E0322 04:43:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:43:43.409796  543705 memory.go:191] Add success.
I0322 04:43:43.409798  543705 cpu.go:282] Add success.
I0322 04:43:43.419948  543705 net.go:648] Add success.
I0322 04:43:43.422496  543705 net.go:770] primary dev: ETH0
I0322 04:43:43.422510  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:43:43.422527  543705 net.go:698] Add success.
I0322 04:43:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:43:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:43:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:43:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:43:53.409826  543705 memory.go:184] no items to output this cycle
I0322 04:43:53.409842  543705 cpu.go:275] no items to output this cycle
E0322 04:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:44:03.409777  543705 memory.go:184] no items to output this cycle
I0322 04:44:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 04:44:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:44:13.409788  543705 memory.go:191] Add success.
I0322 04:44:13.409814  543705 cpu.go:282] Add success.
W0322 04:44:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:44:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:44:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:44:13.420132  543705 net.go:648] Add success.
I0322 04:44:13.422659  543705 net.go:770] primary dev: ETH0
I0322 04:44:13.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:44:13.422688  543705 net.go:698] Add success.
I0322 04:44:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:44:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:44:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 04:44:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:44:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 04:44:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:44:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:44:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:44:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:44:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:44:23.409808  543705 memory.go:184] no items to output this cycle
I0322 04:44:23.409825  543705 cpu.go:275] no items to output this cycle
I0322 04:44:33.085681  543705 disk_info.go:125] begin check local disk info of client
I0322 04:44:33.088206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:44:33.088212  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046acc0 0xc00046ad00]
E0322 04:44:33.407900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:44:33.407919  543705 memory.go:184] no items to output this cycle
I0322 04:44:33.407942  543705 cpu.go:275] no items to output this cycle
E0322 04:44:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:44:43.409806  543705 memory.go:191] Add success.
I0322 04:44:43.409828  543705 cpu.go:282] Add success.
I0322 04:44:43.420009  543705 net.go:648] Add success.
I0322 04:44:43.422723  543705 net.go:770] primary dev: ETH0
I0322 04:44:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:44:43.422752  543705 net.go:698] Add success.
I0322 04:44:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:44:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:44:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:44:53.409807  543705 memory.go:184] no items to output this cycle
I0322 04:44:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 04:45:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:45:03.409772  543705 memory.go:184] no items to output this cycle
I0322 04:45:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 04:45:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:45:13.409773  543705 memory.go:191] Add success.
W0322 04:45:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:45:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:45:13.409809  543705 cpu.go:282] Add success.
I0322 04:45:13.409811  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:45:13.420041  543705 net.go:648] Add success.
I0322 04:45:13.422930  543705 net.go:770] primary dev: ETH0
I0322 04:45:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:45:13.422954  543705 net.go:698] Add success.
I0322 04:45:13.520545  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"163b9fdb-14ac-40ee-8206-16bff16c6345","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:45:13.520583  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:45:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:45:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:45:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 04:45:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:45:14.456477  543705 disk_worker.go:494] system disk:vda1
I0322 04:45:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:45:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:45:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:45:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:45:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:45:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:45:23.409798  543705 memory.go:184] no items to output this cycle
I0322 04:45:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 04:45:33.089678  543705 disk_info.go:125] begin check local disk info of client
I0322 04:45:33.092159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:45:33.092166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004861c0 0xc000486200]
E0322 04:45:33.407527  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:45:33.407549  543705 memory.go:184] no items to output this cycle
I0322 04:45:33.407549  543705 cpu.go:275] no items to output this cycle
I0322 04:45:39.499241  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:45:39.499248  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:45:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:45:43.410678  543705 memory.go:191] Add success.
I0322 04:45:43.409814  543705 cpu.go:282] Add success.
I0322 04:45:43.420662  543705 net.go:648] Add success.
I0322 04:45:43.422935  543705 net.go:770] primary dev: ETH0
I0322 04:45:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:45:43.422958  543705 net.go:698] Add success.
I0322 04:45:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:45:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:45:53.409824  543705 memory.go:184] no items to output this cycle
I0322 04:45:53.409837  543705 cpu.go:275] no items to output this cycle
E0322 04:46:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:46:03.409777  543705 memory.go:184] no items to output this cycle
I0322 04:46:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 04:46:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:46:13.409789  543705 memory.go:191] Add success.
I0322 04:46:13.409794  543705 cpu.go:282] Add success.
W0322 04:46:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:46:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:46:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:46:13.420106  543705 net.go:648] Add success.
I0322 04:46:13.422608  543705 net.go:770] primary dev: ETH0
I0322 04:46:13.422620  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:46:13.422633  543705 net.go:698] Add success.
I0322 04:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:46:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:46:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 04:46:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:46:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 04:46:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:46:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:46:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:46:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:46:23.409767  543705 memory.go:184] no items to output this cycle
I0322 04:46:23.409799  543705 cpu.go:275] no items to output this cycle
I0322 04:46:33.093676  543705 disk_info.go:125] begin check local disk info of client
I0322 04:46:33.096170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:46:33.096176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370d00 0xc000370d40]
E0322 04:46:33.407532  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:46:33.407545  543705 memory.go:184] no items to output this cycle
I0322 04:46:33.407547  543705 cpu.go:275] no items to output this cycle
E0322 04:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:46:43.409807  543705 memory.go:191] Add success.
I0322 04:46:43.409811  543705 cpu.go:282] Add success.
I0322 04:46:43.420035  543705 net.go:648] Add success.
I0322 04:46:43.422798  543705 net.go:770] primary dev: ETH0
I0322 04:46:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:46:43.422828  543705 net.go:698] Add success.
I0322 04:46:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:46:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:46:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:46:53.409787  543705 memory.go:184] no items to output this cycle
I0322 04:46:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 04:47:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:47:03.409784  543705 memory.go:184] no items to output this cycle
I0322 04:47:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 04:47:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:47:13.409824  543705 memory.go:191] Add success.
I0322 04:47:13.409830  543705 cpu.go:282] Add success.
W0322 04:47:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:47:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:47:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:47:13.420247  543705 net.go:648] Add success.
I0322 04:47:13.422890  543705 net.go:770] primary dev: ETH0
I0322 04:47:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:47:13.422919  543705 net.go:698] Add success.
I0322 04:47:13.453473  543705 event_worker.go:152] Polling the log file for events...
W0322 04:47:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:47:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 04:47:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:47:14.455865  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:47:14.455874  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:47:14.455879  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:47:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 04:47:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:47:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:47:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:47:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:47:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:47:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:47:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:47:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:47:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:47:23.409799  543705 memory.go:184] no items to output this cycle
I0322 04:47:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 04:47:33.097684  543705 disk_info.go:125] begin check local disk info of client
I0322 04:47:33.100185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:47:33.100192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de300 0xc0003de340]
E0322 04:47:33.407809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:47:33.407826  543705 memory.go:184] no items to output this cycle
I0322 04:47:33.407850  543705 cpu.go:275] no items to output this cycle
E0322 04:47:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:47:43.409807  543705 memory.go:191] Add success.
I0322 04:47:43.409824  543705 cpu.go:282] Add success.
I0322 04:47:43.419976  543705 net.go:648] Add success.
I0322 04:47:43.422552  543705 net.go:770] primary dev: ETH0
I0322 04:47:43.422566  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:47:43.422578  543705 net.go:698] Add success.
I0322 04:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:47:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:47:53.409792  543705 memory.go:184] no items to output this cycle
I0322 04:47:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 04:48:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:48:03.409785  543705 memory.go:184] no items to output this cycle
I0322 04:48:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 04:48:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:48:13.409782  543705 memory.go:191] Add success.
W0322 04:48:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:48:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:48:13.409820  543705 cpu.go:282] Add success.
I0322 04:48:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:48:13.420047  543705 net.go:648] Add success.
I0322 04:48:13.422889  543705 net.go:770] primary dev: ETH0
I0322 04:48:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:48:13.422913  543705 net.go:698] Add success.
I0322 04:48:13.468602  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a058900b-beba-4416-98e9-50e38332e63c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:48:13.468636  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:48:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:48:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 04:48:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:48:14.456489  543705 disk_worker.go:494] system disk:vda1
I0322 04:48:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:48:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:48:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:48:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:48:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:48:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:48:23.409764  543705 memory.go:184] no items to output this cycle
I0322 04:48:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 04:48:33.101677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:48:33.104166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:48:33.104172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1500 0xc0003c1540]
E0322 04:48:33.407780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:48:33.407796  543705 memory.go:184] no items to output this cycle
I0322 04:48:33.407812  543705 cpu.go:275] no items to output this cycle
I0322 04:48:39.500247  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:48:39.500253  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:48:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:48:43.410597  543705 memory.go:191] Add success.
I0322 04:48:43.409792  543705 cpu.go:282] Add success.
I0322 04:48:43.420706  543705 net.go:648] Add success.
I0322 04:48:43.423236  543705 net.go:770] primary dev: ETH0
I0322 04:48:43.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:48:43.423260  543705 net.go:698] Add success.
I0322 04:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:48:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:48:53.409777  543705 memory.go:184] no items to output this cycle
I0322 04:48:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 04:49:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:49:03.409762  543705 memory.go:184] no items to output this cycle
I0322 04:49:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 04:49:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:49:13.409781  543705 memory.go:191] Add success.
I0322 04:49:13.409801  543705 cpu.go:282] Add success.
W0322 04:49:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:49:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:49:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:49:13.420123  543705 net.go:648] Add success.
I0322 04:49:13.422733  543705 net.go:770] primary dev: ETH0
I0322 04:49:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:49:13.422757  543705 net.go:698] Add success.
I0322 04:49:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:49:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:49:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 04:49:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:49:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 04:49:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:49:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:49:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:49:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:49:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:49:23.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:49:23.410405  543705 memory.go:184] no items to output this cycle
I0322 04:49:23.410416  543705 cpu.go:275] no items to output this cycle
I0322 04:49:33.105677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:49:33.108253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:49:33.108259  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5780 0xc0000c57c0]
E0322 04:49:33.407514  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:49:33.407529  543705 memory.go:184] no items to output this cycle
I0322 04:49:33.407563  543705 cpu.go:275] no items to output this cycle
E0322 04:49:43.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:49:43.409918  543705 memory.go:191] Add success.
I0322 04:49:43.409968  543705 cpu.go:282] Add success.
I0322 04:49:43.419791  543705 net.go:648] Add success.
I0322 04:49:43.422159  543705 net.go:770] primary dev: ETH0
I0322 04:49:43.422174  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:49:43.422187  543705 net.go:698] Add success.
I0322 04:49:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:49:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:49:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:49:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:49:53.409779  543705 memory.go:184] no items to output this cycle
I0322 04:49:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 04:50:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:50:03.409775  543705 memory.go:184] no items to output this cycle
I0322 04:50:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 04:50:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:50:13.409811  543705 memory.go:191] Add success.
I0322 04:50:13.409818  543705 cpu.go:282] Add success.
W0322 04:50:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:50:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:50:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:50:13.420058  543705 net.go:648] Add success.
I0322 04:50:13.422583  543705 net.go:770] primary dev: ETH0
I0322 04:50:13.422597  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:50:13.422610  543705 net.go:698] Add success.
I0322 04:50:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:50:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:50:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 04:50:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:50:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 04:50:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:50:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:50:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:50:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:50:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:50:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:50:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:50:23.409806  543705 memory.go:184] no items to output this cycle
I0322 04:50:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 04:50:33.109675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:50:33.112440  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:50:33.112449  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2000 0xc0003b2040]
E0322 04:50:33.407719  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:50:33.407727  543705 cpu.go:275] no items to output this cycle
I0322 04:50:33.407730  543705 memory.go:184] no items to output this cycle
E0322 04:50:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:50:43.409784  543705 memory.go:191] Add success.
I0322 04:50:43.409816  543705 cpu.go:282] Add success.
I0322 04:50:43.420055  543705 net.go:648] Add success.
I0322 04:50:43.422482  543705 net.go:770] primary dev: ETH0
I0322 04:50:43.422495  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:50:43.422507  543705 net.go:698] Add success.
I0322 04:50:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:50:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:50:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:50:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:50:53.409776  543705 memory.go:184] no items to output this cycle
I0322 04:50:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 04:51:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:51:03.409796  543705 memory.go:184] no items to output this cycle
I0322 04:51:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 04:51:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:51:13.409788  543705 cpu.go:282] Add success.
I0322 04:51:13.409792  543705 memory.go:191] Add success.
W0322 04:51:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:51:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:51:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:51:13.420100  543705 net.go:648] Add success.
I0322 04:51:13.422811  543705 net.go:770] primary dev: ETH0
I0322 04:51:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:51:13.422838  543705 net.go:698] Add success.
I0322 04:51:13.469041  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d1a144a-54d5-47ce-afde-694bc44cde5f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:51:13.469078  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:51:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:51:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:51:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 04:51:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:51:14.456699  543705 disk_worker.go:494] system disk:vda1
I0322 04:51:14.456734  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:51:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:51:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:51:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:51:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:51:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:51:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 04:51:23.409792  543705 memory.go:184] no items to output this cycle
I0322 04:51:33.113686  543705 disk_info.go:125] begin check local disk info of client
I0322 04:51:33.116231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:51:33.116238  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e000 0xc00035e040]
E0322 04:51:33.407769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:51:33.407784  543705 memory.go:184] no items to output this cycle
I0322 04:51:33.407804  543705 cpu.go:275] no items to output this cycle
I0322 04:51:39.501256  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:51:39.501262  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:51:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:51:43.410647  543705 memory.go:191] Add success.
I0322 04:51:43.409811  543705 cpu.go:282] Add success.
I0322 04:51:43.420357  543705 net.go:648] Add success.
I0322 04:51:43.423007  543705 net.go:770] primary dev: ETH0
I0322 04:51:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:51:43.423034  543705 net.go:698] Add success.
I0322 04:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:51:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:51:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:51:53.409789  543705 memory.go:184] no items to output this cycle
I0322 04:51:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 04:52:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:52:03.409796  543705 memory.go:184] no items to output this cycle
I0322 04:52:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 04:52:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:52:13.409783  543705 memory.go:191] Add success.
I0322 04:52:13.409804  543705 cpu.go:282] Add success.
W0322 04:52:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:52:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:52:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:52:13.420052  543705 net.go:648] Add success.
I0322 04:52:13.422784  543705 net.go:770] primary dev: ETH0
I0322 04:52:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:52:13.422814  543705 net.go:698] Add success.
W0322 04:52:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:52:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 04:52:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:52:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:52:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:52:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:52:14.456531  543705 disk_worker.go:494] system disk:vda1
I0322 04:52:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:52:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:52:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:52:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:52:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:52:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:52:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:52:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:52:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:52:23.409772  543705 memory.go:184] no items to output this cycle
I0322 04:52:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 04:52:33.117674  543705 disk_info.go:125] begin check local disk info of client
I0322 04:52:33.120194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:52:33.120200  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f800 0xc00035f840]
E0322 04:52:33.407514  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:52:33.407534  543705 memory.go:184] no items to output this cycle
I0322 04:52:33.407539  543705 cpu.go:275] no items to output this cycle
E0322 04:52:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:52:43.409823  543705 memory.go:191] Add success.
I0322 04:52:43.409825  543705 cpu.go:282] Add success.
I0322 04:52:43.419959  543705 net.go:648] Add success.
I0322 04:52:43.422653  543705 net.go:770] primary dev: ETH0
I0322 04:52:43.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:52:43.422678  543705 net.go:698] Add success.
I0322 04:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:52:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:52:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:52:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:52:53.409810  543705 memory.go:184] no items to output this cycle
I0322 04:52:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 04:53:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:53:03.409773  543705 memory.go:184] no items to output this cycle
I0322 04:53:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 04:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:53:13.409783  543705 memory.go:191] Add success.
I0322 04:53:13.409794  543705 cpu.go:282] Add success.
W0322 04:53:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:53:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:53:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:53:13.420135  543705 net.go:648] Add success.
I0322 04:53:13.422770  543705 net.go:770] primary dev: ETH0
I0322 04:53:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:53:13.422796  543705 net.go:698] Add success.
I0322 04:53:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:53:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:53:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 04:53:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:53:14.458933  543705 disk_worker.go:494] system disk:vda1
I0322 04:53:14.458961  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:53:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:53:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:53:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:53:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:53:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:53:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:53:23.409805  543705 memory.go:184] no items to output this cycle
I0322 04:53:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 04:53:33.121676  543705 disk_info.go:125] begin check local disk info of client
I0322 04:53:33.124276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:53:33.124282  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003673c0 0xc000367400]
E0322 04:53:33.407771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:53:33.407781  543705 cpu.go:275] no items to output this cycle
I0322 04:53:33.407783  543705 memory.go:184] no items to output this cycle
E0322 04:53:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:53:43.409792  543705 memory.go:191] Add success.
I0322 04:53:43.409814  543705 cpu.go:282] Add success.
I0322 04:53:43.419909  543705 net.go:648] Add success.
I0322 04:53:43.422368  543705 net.go:770] primary dev: ETH0
I0322 04:53:43.422381  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:53:43.422395  543705 net.go:698] Add success.
I0322 04:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:53:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:53:53.409791  543705 cpu.go:275] no items to output this cycle
I0322 04:53:53.409795  543705 memory.go:184] no items to output this cycle
E0322 04:54:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:54:03.409770  543705 memory.go:184] no items to output this cycle
I0322 04:54:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 04:54:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:54:13.409784  543705 memory.go:191] Add success.
W0322 04:54:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:54:13.409815  543705 cpu.go:282] Add success.
W0322 04:54:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:54:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:54:13.420126  543705 net.go:648] Add success.
I0322 04:54:13.422600  543705 net.go:770] primary dev: ETH0
I0322 04:54:13.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:54:13.422629  543705 net.go:698] Add success.
I0322 04:54:13.463835  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4d6582a6-4f0a-498e-8ba5-b341d7ddc660","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:54:13.463989  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 04:54:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:54:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:54:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 04:54:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:54:14.456742  543705 disk_worker.go:494] system disk:vda1
I0322 04:54:14.456770  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:54:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:54:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:54:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:54:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:54:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:54:23.409781  543705 memory.go:184] no items to output this cycle
I0322 04:54:23.409814  543705 cpu.go:275] no items to output this cycle
I0322 04:54:33.125678  543705 disk_info.go:125] begin check local disk info of client
I0322 04:54:33.128202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:54:33.128208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bcc0 0xc00007bd00]
E0322 04:54:33.407527  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:54:33.407540  543705 memory.go:184] no items to output this cycle
I0322 04:54:33.407553  543705 cpu.go:275] no items to output this cycle
I0322 04:54:39.502262  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:54:39.502270  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:54:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:54:43.410654  543705 memory.go:191] Add success.
I0322 04:54:43.409792  543705 cpu.go:282] Add success.
I0322 04:54:43.420351  543705 net.go:648] Add success.
I0322 04:54:43.423146  543705 net.go:770] primary dev: ETH0
I0322 04:54:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:54:43.423173  543705 net.go:698] Add success.
I0322 04:54:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:54:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:54:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:54:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:54:53.409802  543705 memory.go:184] no items to output this cycle
I0322 04:54:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 04:55:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:55:03.409778  543705 memory.go:184] no items to output this cycle
I0322 04:55:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 04:55:13.409908  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:55:13.409972  543705 memory.go:191] Add success.
I0322 04:55:13.409977  543705 cpu.go:282] Add success.
W0322 04:55:13.410002  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:55:13.410015  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:55:13.410018  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:55:13.419741  543705 net.go:648] Add success.
I0322 04:55:13.422355  543705 net.go:770] primary dev: ETH0
I0322 04:55:13.422368  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:55:13.422380  543705 net.go:698] Add success.
I0322 04:55:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:55:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:55:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 04:55:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:55:14.456526  543705 disk_worker.go:494] system disk:vda1
I0322 04:55:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:55:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:55:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:55:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:55:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:55:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:55:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:55:23.409781  543705 memory.go:184] no items to output this cycle
I0322 04:55:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 04:55:33.129675  543705 disk_info.go:125] begin check local disk info of client
I0322 04:55:33.132185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:55:33.132192  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326a40 0xc000326a80]
E0322 04:55:33.407500  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:55:33.407517  543705 memory.go:184] no items to output this cycle
I0322 04:55:33.407549  543705 cpu.go:275] no items to output this cycle
E0322 04:55:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:55:43.409804  543705 memory.go:191] Add success.
I0322 04:55:43.409820  543705 cpu.go:282] Add success.
I0322 04:55:43.419880  543705 net.go:648] Add success.
I0322 04:55:43.422553  543705 net.go:770] primary dev: ETH0
I0322 04:55:43.422568  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:55:43.422588  543705 net.go:698] Add success.
I0322 04:55:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:55:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:55:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:55:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:55:53.409807  543705 memory.go:184] no items to output this cycle
I0322 04:55:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 04:56:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:56:03.409803  543705 memory.go:184] no items to output this cycle
I0322 04:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 04:56:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:56:13.409913  543705 memory.go:191] Add success.
I0322 04:56:13.409929  543705 cpu.go:282] Add success.
W0322 04:56:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:56:13.409959  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:56:13.409962  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:56:13.419753  543705 net.go:648] Add success.
I0322 04:56:13.422471  543705 net.go:770] primary dev: ETH0
I0322 04:56:13.422491  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:56:13.422505  543705 net.go:698] Add success.
I0322 04:56:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:56:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:56:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 04:56:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:56:14.456535  543705 disk_worker.go:494] system disk:vda1
I0322 04:56:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:56:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:56:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:56:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:56:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:56:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:56:23.409803  543705 memory.go:184] no items to output this cycle
I0322 04:56:23.409813  543705 cpu.go:275] no items to output this cycle
I0322 04:56:33.133680  543705 disk_info.go:125] begin check local disk info of client
I0322 04:56:33.136251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:56:33.136258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dcd40 0xc0004dcd80]
E0322 04:56:33.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:56:33.407522  543705 memory.go:184] no items to output this cycle
I0322 04:56:33.407552  543705 cpu.go:275] no items to output this cycle
E0322 04:56:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:56:43.409813  543705 memory.go:191] Add success.
I0322 04:56:43.409817  543705 cpu.go:282] Add success.
I0322 04:56:43.419877  543705 net.go:648] Add success.
I0322 04:56:43.422476  543705 net.go:770] primary dev: ETH0
I0322 04:56:43.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:56:43.422502  543705 net.go:698] Add success.
I0322 04:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:56:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:56:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:56:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:56:53.409809  543705 memory.go:184] no items to output this cycle
I0322 04:56:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 04:57:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:57:03.409789  543705 memory.go:184] no items to output this cycle
I0322 04:57:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 04:57:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:57:13.409822  543705 memory.go:191] Add success.
I0322 04:57:13.409828  543705 cpu.go:282] Add success.
W0322 04:57:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:57:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:57:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:57:13.420432  543705 net.go:648] Add success.
I0322 04:57:13.429049  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 04:57:13.429127  543705 net.go:770] primary dev: ETH0
I0322 04:57:13.429139  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:57:13.429149  543705 net.go:698] Add success.
I0322 04:57:13.452771  543705 event_worker.go:152] Polling the log file for events...
I0322 04:57:13.532225  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"40fd3e7c-8f2a-4bf5-85a0-d11ce9f88bf8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 04:57:13.532255  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 04:57:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:57:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 04:57:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0322 04:57:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 04:57:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 04:57:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0322 04:57:14.456636  543705 disk_worker.go:494] system disk:vda1
I0322 04:57:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 04:57:15.456451  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 04:57:15.456460  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:57:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 04:57:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 04:57:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:57:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:57:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:57:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:57:23.409817  543705 memory.go:184] no items to output this cycle
I0322 04:57:23.409826  543705 cpu.go:275] no items to output this cycle
I0322 04:57:33.137676  543705 disk_info.go:125] begin check local disk info of client
I0322 04:57:33.140244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:57:33.140250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8240 0xc0004d8280]
E0322 04:57:33.407518  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:57:33.407532  543705 memory.go:184] no items to output this cycle
I0322 04:57:33.407538  543705 cpu.go:275] no items to output this cycle
I0322 04:57:39.503261  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 04:57:39.503268  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 04:57:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:57:43.410603  543705 memory.go:191] Add success.
I0322 04:57:43.409820  543705 cpu.go:282] Add success.
I0322 04:57:43.420320  543705 net.go:648] Add success.
I0322 04:57:43.423235  543705 net.go:770] primary dev: ETH0
I0322 04:57:43.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:57:43.423261  543705 net.go:698] Add success.
I0322 04:57:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:57:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:57:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:57:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:57:53.409786  543705 memory.go:184] no items to output this cycle
I0322 04:57:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 04:58:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:58:03.409812  543705 memory.go:184] no items to output this cycle
I0322 04:58:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 04:58:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:58:13.409807  543705 memory.go:191] Add success.
W0322 04:58:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 04:58:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:58:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:58:13.410143  543705 cpu.go:282] Add success.
I0322 04:58:13.420277  543705 net.go:648] Add success.
I0322 04:58:13.421273  543705 net.go:770] primary dev: ETH0
I0322 04:58:13.421285  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:58:13.421298  543705 net.go:698] Add success.
I0322 04:58:14.453954  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:58:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:58:14.455249  543705 disk_worker.go:708] disk space is not compliant
W0322 04:58:14.455253  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:58:14.456617  543705 disk_worker.go:494] system disk:vda1
I0322 04:58:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:58:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:58:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:58:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:58:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:58:16.472476  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:58:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:58:23.409787  543705 memory.go:184] no items to output this cycle
I0322 04:58:23.409821  543705 cpu.go:275] no items to output this cycle
I0322 04:58:33.141677  543705 disk_info.go:125] begin check local disk info of client
I0322 04:58:33.144228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:58:33.144233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4280 0xc0000c42c0]
E0322 04:58:33.407653  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:58:33.407670  543705 memory.go:184] no items to output this cycle
I0322 04:58:33.407678  543705 cpu.go:275] no items to output this cycle
E0322 04:58:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:58:43.409786  543705 memory.go:191] Add success.
I0322 04:58:43.409817  543705 cpu.go:282] Add success.
I0322 04:58:43.419907  543705 net.go:648] Add success.
I0322 04:58:43.422620  543705 net.go:770] primary dev: ETH0
I0322 04:58:43.422634  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:58:43.422652  543705 net.go:698] Add success.
I0322 04:58:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:58:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:58:53.409802  543705 memory.go:184] no items to output this cycle
I0322 04:58:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 04:59:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:59:03.409779  543705 memory.go:184] no items to output this cycle
I0322 04:59:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 04:59:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:59:13.409780  543705 memory.go:191] Add success.
W0322 04:59:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 04:59:13.409825  543705 cpu.go:282] Add success.
W0322 04:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 04:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 04:59:13.420053  543705 net.go:648] Add success.
I0322 04:59:13.422701  543705 net.go:770] primary dev: ETH0
I0322 04:59:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:59:13.422736  543705 net.go:698] Add success.
I0322 04:59:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 04:59:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 04:59:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 04:59:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 04:59:14.456542  543705 disk_worker.go:494] system disk:vda1
I0322 04:59:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 04:59:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 04:59:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:59:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:59:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 04:59:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0322 04:59:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:59:23.409802  543705 memory.go:184] no items to output this cycle
I0322 04:59:23.409825  543705 cpu.go:275] no items to output this cycle
I0322 04:59:33.145676  543705 disk_info.go:125] begin check local disk info of client
I0322 04:59:33.148203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 04:59:33.148209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
E0322 04:59:33.407494  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:59:33.407507  543705 memory.go:184] no items to output this cycle
I0322 04:59:33.407544  543705 cpu.go:275] no items to output this cycle
E0322 04:59:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:59:43.409804  543705 memory.go:191] Add success.
I0322 04:59:43.409805  543705 cpu.go:282] Add success.
I0322 04:59:43.420061  543705 net.go:648] Add success.
I0322 04:59:43.422682  543705 net.go:770] primary dev: ETH0
I0322 04:59:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0322 04:59:43.422710  543705 net.go:698] Add success.
I0322 04:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 04:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 04:59:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 04:59:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 04:59:53.409786  543705 memory.go:184] no items to output this cycle
I0322 04:59:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 05:00:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:00:03.409777  543705 cpu.go:275] no items to output this cycle
I0322 05:00:03.409788  543705 memory.go:184] no items to output this cycle
E0322 05:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:00:13.409786  543705 memory.go:191] Add success.
I0322 05:00:13.409795  543705 cpu.go:282] Add success.
W0322 05:00:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:00:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:00:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:00:13.420090  543705 net.go:648] Add success.
I0322 05:00:13.422919  543705 net.go:770] primary dev: ETH0
I0322 05:00:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:00:13.422947  543705 net.go:698] Add success.
I0322 05:00:13.519054  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"467e845b-064a-43ac-b4a4-4226966292f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:00:13.519087  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:00:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:00:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:00:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 05:00:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:00:14.456645  543705 disk_worker.go:494] system disk:vda1
I0322 05:00:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:00:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:00:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:00:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:00:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:00:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:00:23.409803  543705 memory.go:184] no items to output this cycle
I0322 05:00:23.409814  543705 cpu.go:275] no items to output this cycle
I0322 05:00:33.149675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:00:33.152196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:00:33.152202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dd0c0 0xc0004dd100]
E0322 05:00:33.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:00:33.407525  543705 memory.go:184] no items to output this cycle
I0322 05:00:33.407554  543705 cpu.go:275] no items to output this cycle
I0322 05:00:39.504266  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:00:39.504272  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:00:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:00:43.410792  543705 memory.go:191] Add success.
I0322 05:00:43.409817  543705 cpu.go:282] Add success.
I0322 05:00:43.420506  543705 net.go:648] Add success.
I0322 05:00:43.423933  543705 net.go:770] primary dev: ETH0
I0322 05:00:43.423948  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:00:43.423963  543705 net.go:698] Add success.
I0322 05:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:00:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:00:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:00:53.410415  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:00:53.410436  543705 memory.go:184] no items to output this cycle
I0322 05:00:53.410444  543705 cpu.go:275] no items to output this cycle
E0322 05:01:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:01:03.409778  543705 memory.go:184] no items to output this cycle
I0322 05:01:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 05:01:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:01:13.409793  543705 memory.go:191] Add success.
I0322 05:01:13.409794  543705 cpu.go:282] Add success.
W0322 05:01:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:01:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:01:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:01:13.420141  543705 net.go:648] Add success.
I0322 05:01:13.423239  543705 net.go:770] primary dev: ETH0
I0322 05:01:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:01:13.423264  543705 net.go:698] Add success.
I0322 05:01:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:01:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:01:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 05:01:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:01:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 05:01:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:01:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:01:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:01:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:01:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:01:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:01:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:01:23.409799  543705 memory.go:184] no items to output this cycle
I0322 05:01:23.409807  543705 cpu.go:275] no items to output this cycle
I0322 05:01:33.153676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:01:33.156215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:01:33.156221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491d00 0xc000491d40]
E0322 05:01:33.407550  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:01:33.407562  543705 cpu.go:275] no items to output this cycle
I0322 05:01:33.407564  543705 memory.go:184] no items to output this cycle
E0322 05:01:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:01:43.409811  543705 memory.go:191] Add success.
I0322 05:01:43.409812  543705 cpu.go:282] Add success.
I0322 05:01:43.419956  543705 net.go:648] Add success.
I0322 05:01:43.422667  543705 net.go:770] primary dev: ETH0
I0322 05:01:43.422681  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:01:43.422694  543705 net.go:698] Add success.
I0322 05:01:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:01:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:01:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:01:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:01:53.409808  543705 memory.go:184] no items to output this cycle
I0322 05:01:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 05:02:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:02:03.409769  543705 memory.go:184] no items to output this cycle
I0322 05:02:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 05:02:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:02:13.409812  543705 memory.go:191] Add success.
I0322 05:02:13.409815  543705 cpu.go:282] Add success.
W0322 05:02:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:02:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:02:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:02:13.420212  543705 net.go:648] Add success.
I0322 05:02:13.423061  543705 net.go:770] primary dev: ETH0
I0322 05:02:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:02:13.423086  543705 net.go:698] Add success.
W0322 05:02:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:02:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 05:02:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:02:14.456522  543705 disk_worker.go:494] system disk:vda1
I0322 05:02:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:02:14.457872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:02:14.457887  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:02:14.457893  543705 custom_config.go:64] query custom config with name: gpu
E0322 05:02:15.456785  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:02:15.456794  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:02:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:02:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:02:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:02:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:02:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:02:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:02:23.409776  543705 memory.go:184] no items to output this cycle
I0322 05:02:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:02:33.157676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:02:33.160194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:02:33.160201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4b00 0xc0002a4b40]
E0322 05:02:33.407524  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:02:33.407541  543705 memory.go:184] no items to output this cycle
I0322 05:02:33.407554  543705 cpu.go:275] no items to output this cycle
E0322 05:02:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:02:43.409781  543705 memory.go:191] Add success.
I0322 05:02:43.409844  543705 cpu.go:282] Add success.
I0322 05:02:43.420047  543705 net.go:648] Add success.
I0322 05:02:43.422554  543705 net.go:770] primary dev: ETH0
I0322 05:02:43.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:02:43.422585  543705 net.go:698] Add success.
I0322 05:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:02:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:02:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:02:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:02:53.409787  543705 memory.go:184] no items to output this cycle
I0322 05:02:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 05:03:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:03:03.409772  543705 memory.go:184] no items to output this cycle
I0322 05:03:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 05:03:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:03:13.409820  543705 memory.go:191] Add success.
I0322 05:03:13.409826  543705 cpu.go:282] Add success.
W0322 05:03:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:03:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:03:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:03:13.420169  543705 net.go:648] Add success.
I0322 05:03:13.423407  543705 net.go:770] primary dev: ETH0
I0322 05:03:13.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:03:13.423432  543705 net.go:698] Add success.
I0322 05:03:13.467538  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8303bdd7-5e92-4cc0-a0d8-048a4e5123f0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:03:13.467571  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:03:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:03:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 05:03:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:03:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 05:03:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:03:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:03:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:03:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:03:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:03:23.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:03:23.409860  543705 cpu.go:275] no items to output this cycle
I0322 05:03:23.409874  543705 memory.go:184] no items to output this cycle
I0322 05:03:33.161675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:03:33.164267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:03:33.164272  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035afc0 0xc00035b000]
E0322 05:03:33.407511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:03:33.407526  543705 memory.go:184] no items to output this cycle
I0322 05:03:33.407528  543705 cpu.go:275] no items to output this cycle
I0322 05:03:39.505266  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:03:39.505273  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:03:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:03:43.410568  543705 memory.go:191] Add success.
I0322 05:03:43.409820  543705 cpu.go:282] Add success.
I0322 05:03:43.420287  543705 net.go:648] Add success.
I0322 05:03:43.422779  543705 net.go:770] primary dev: ETH0
I0322 05:03:43.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:03:43.422806  543705 net.go:698] Add success.
I0322 05:03:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:03:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:03:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:03:53.409811  543705 memory.go:184] no items to output this cycle
I0322 05:03:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 05:04:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:04:03.409771  543705 memory.go:184] no items to output this cycle
I0322 05:04:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 05:04:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:04:13.409816  543705 memory.go:191] Add success.
I0322 05:04:13.409823  543705 cpu.go:282] Add success.
W0322 05:04:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:04:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:04:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:04:13.420075  543705 net.go:648] Add success.
I0322 05:04:13.422999  543705 net.go:770] primary dev: ETH0
I0322 05:04:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:04:13.423029  543705 net.go:698] Add success.
I0322 05:04:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:04:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:04:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 05:04:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:04:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 05:04:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:04:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:04:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:04:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:04:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:04:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:04:23.409795  543705 memory.go:184] no items to output this cycle
I0322 05:04:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 05:04:33.165680  543705 disk_info.go:125] begin check local disk info of client
I0322 05:04:33.168188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:04:33.168195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f02c0 0xc0004f0300]
E0322 05:04:33.407513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:04:33.407530  543705 memory.go:184] no items to output this cycle
I0322 05:04:33.407547  543705 cpu.go:275] no items to output this cycle
E0322 05:04:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:04:43.409787  543705 memory.go:191] Add success.
I0322 05:04:43.409789  543705 cpu.go:282] Add success.
I0322 05:04:43.419974  543705 net.go:648] Add success.
I0322 05:04:43.422741  543705 net.go:770] primary dev: ETH0
I0322 05:04:43.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:04:43.422771  543705 net.go:698] Add success.
I0322 05:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:04:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:04:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:04:53.410282  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:04:53.410301  543705 memory.go:184] no items to output this cycle
I0322 05:04:53.410304  543705 cpu.go:275] no items to output this cycle
E0322 05:05:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:05:03.409800  543705 memory.go:184] no items to output this cycle
I0322 05:05:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 05:05:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:05:13.409786  543705 memory.go:191] Add success.
I0322 05:05:13.409805  543705 cpu.go:282] Add success.
W0322 05:05:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:05:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:05:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:05:13.420168  543705 net.go:648] Add success.
I0322 05:05:13.422812  543705 net.go:770] primary dev: ETH0
I0322 05:05:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:05:13.422840  543705 net.go:698] Add success.
I0322 05:05:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:05:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:05:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 05:05:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:05:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 05:05:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:05:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:05:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:05:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:05:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:05:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:05:23.409765  543705 memory.go:184] no items to output this cycle
I0322 05:05:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 05:05:33.169671  543705 disk_info.go:125] begin check local disk info of client
I0322 05:05:33.172166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:05:33.172172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492440 0xc000492480]
E0322 05:05:33.408495  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:05:33.408510  543705 memory.go:184] no items to output this cycle
I0322 05:05:33.408530  543705 cpu.go:275] no items to output this cycle
E0322 05:05:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:05:43.409804  543705 memory.go:191] Add success.
I0322 05:05:43.409822  543705 cpu.go:282] Add success.
I0322 05:05:43.419972  543705 net.go:648] Add success.
I0322 05:05:43.422905  543705 net.go:770] primary dev: ETH0
I0322 05:05:43.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:05:43.422932  543705 net.go:698] Add success.
I0322 05:05:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:05:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:05:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:05:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:05:53.409785  543705 cpu.go:275] no items to output this cycle
I0322 05:05:53.409791  543705 memory.go:184] no items to output this cycle
E0322 05:06:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:06:03.409801  543705 memory.go:184] no items to output this cycle
I0322 05:06:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 05:06:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:06:13.409782  543705 memory.go:191] Add success.
W0322 05:06:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 05:06:13.409816  543705 cpu.go:282] Add success.
W0322 05:06:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:06:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:06:13.420122  543705 net.go:648] Add success.
I0322 05:06:13.422795  543705 net.go:770] primary dev: ETH0
I0322 05:06:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:06:13.422821  543705 net.go:698] Add success.
I0322 05:06:13.468335  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ec74b328-bfe1-4419-bf84-800566d24d3d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:06:13.468368  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:06:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:06:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:06:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 05:06:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:06:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 05:06:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:06:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:06:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:06:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:06:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:06:23.409797  543705 memory.go:184] no items to output this cycle
I0322 05:06:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 05:06:33.173674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:06:33.176202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:06:33.176208  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386c80 0xc000386cc0]
E0322 05:06:33.407449  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:06:33.407455  543705 cpu.go:275] no items to output this cycle
I0322 05:06:33.407465  543705 memory.go:184] no items to output this cycle
I0322 05:06:39.506300  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:06:39.506306  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:06:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:06:43.410760  543705 memory.go:191] Add success.
I0322 05:06:43.409830  543705 cpu.go:282] Add success.
I0322 05:06:43.420477  543705 net.go:648] Add success.
I0322 05:06:43.422909  543705 net.go:770] primary dev: ETH0
I0322 05:06:43.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:06:43.422933  543705 net.go:698] Add success.
I0322 05:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:06:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:06:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:06:53.410259  543705 memory.go:184] no items to output this cycle
I0322 05:06:53.410272  543705 cpu.go:275] no items to output this cycle
E0322 05:07:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:07:03.409787  543705 cpu.go:275] no items to output this cycle
I0322 05:07:03.409793  543705 memory.go:184] no items to output this cycle
E0322 05:07:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:07:13.409811  543705 memory.go:191] Add success.
I0322 05:07:13.409818  543705 cpu.go:282] Add success.
W0322 05:07:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:07:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:07:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:07:13.420050  543705 net.go:648] Add success.
I0322 05:07:13.422846  543705 net.go:770] primary dev: ETH0
I0322 05:07:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:07:13.422875  543705 net.go:698] Add success.
I0322 05:07:13.453432  543705 event_worker.go:152] Polling the log file for events...
W0322 05:07:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:07:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 05:07:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:07:14.456920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:07:14.456929  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:07:14.456935  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:07:14.456984  543705 disk_worker.go:494] system disk:vda1
I0322 05:07:14.457027  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:07:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:07:15.456874  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:07:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:07:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:07:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:07:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:07:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:07:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:07:23.409766  543705 memory.go:184] no items to output this cycle
I0322 05:07:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 05:07:33.177678  543705 disk_info.go:125] begin check local disk info of client
I0322 05:07:33.180180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:07:33.180186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004730c0 0xc000473100]
E0322 05:07:33.408475  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:07:33.408493  543705 memory.go:184] no items to output this cycle
I0322 05:07:33.408506  543705 cpu.go:275] no items to output this cycle
E0322 05:07:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:07:43.409804  543705 memory.go:191] Add success.
I0322 05:07:43.409808  543705 cpu.go:282] Add success.
I0322 05:07:43.419989  543705 net.go:648] Add success.
I0322 05:07:43.422451  543705 net.go:770] primary dev: ETH0
I0322 05:07:43.422464  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:07:43.422477  543705 net.go:698] Add success.
I0322 05:07:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:07:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:07:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:07:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:07:53.409782  543705 memory.go:184] no items to output this cycle
I0322 05:07:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 05:08:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:08:03.409776  543705 memory.go:184] no items to output this cycle
I0322 05:08:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 05:08:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:08:13.409818  543705 memory.go:191] Add success.
I0322 05:08:13.409829  543705 cpu.go:282] Add success.
W0322 05:08:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:08:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:08:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:08:13.420196  543705 net.go:648] Add success.
I0322 05:08:13.422802  543705 net.go:770] primary dev: ETH0
I0322 05:08:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:08:13.422826  543705 net.go:698] Add success.
I0322 05:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:08:14.455088  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:08:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0322 05:08:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:08:14.456485  543705 disk_worker.go:494] system disk:vda1
I0322 05:08:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:08:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:08:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:08:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:08:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:08:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:08:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:08:23.409801  543705 memory.go:184] no items to output this cycle
I0322 05:08:23.409809  543705 cpu.go:275] no items to output this cycle
I0322 05:08:33.181676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:08:33.184240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:08:33.184245  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e840 0xc00034e880]
E0322 05:08:33.407502  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:08:33.407514  543705 memory.go:184] no items to output this cycle
I0322 05:08:33.407535  543705 cpu.go:275] no items to output this cycle
E0322 05:08:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:08:43.409913  543705 memory.go:191] Add success.
I0322 05:08:43.409930  543705 cpu.go:282] Add success.
I0322 05:08:43.419712  543705 net.go:648] Add success.
I0322 05:08:43.422078  543705 net.go:770] primary dev: ETH0
I0322 05:08:43.422092  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:08:43.422103  543705 net.go:698] Add success.
I0322 05:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:08:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:08:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:08:53.409781  543705 memory.go:184] no items to output this cycle
I0322 05:08:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 05:09:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:09:03.409800  543705 memory.go:184] no items to output this cycle
I0322 05:09:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 05:09:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:09:13.409820  543705 memory.go:191] Add success.
I0322 05:09:13.409834  543705 cpu.go:282] Add success.
W0322 05:09:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:09:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:09:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:09:13.420304  543705 net.go:648] Add success.
I0322 05:09:13.422873  543705 net.go:770] primary dev: ETH0
I0322 05:09:13.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:09:13.422897  543705 net.go:698] Add success.
I0322 05:09:13.468260  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d529abc9-c890-4126-8948-824b9add05b1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:09:13.468295  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:09:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:09:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 05:09:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:09:14.456671  543705 disk_worker.go:494] system disk:vda1
I0322 05:09:14.456701  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:09:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:09:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:09:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:09:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:09:23.409774  543705 memory.go:184] no items to output this cycle
I0322 05:09:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 05:09:33.185677  543705 disk_info.go:125] begin check local disk info of client
I0322 05:09:33.188215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:09:33.188222  543705 disk_info.go:196] parse disk info done, disk is : [0xc000461480 0xc0004614c0]
E0322 05:09:33.407526  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:09:33.407539  543705 memory.go:184] no items to output this cycle
I0322 05:09:33.407566  543705 cpu.go:275] no items to output this cycle
I0322 05:09:39.507288  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:09:39.507295  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:09:43.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:09:43.410696  543705 memory.go:191] Add success.
I0322 05:09:43.409980  543705 cpu.go:282] Add success.
I0322 05:09:43.419723  543705 net.go:648] Add success.
I0322 05:09:43.422321  543705 net.go:770] primary dev: ETH0
I0322 05:09:43.422334  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:09:43.422346  543705 net.go:698] Add success.
I0322 05:09:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:09:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:09:53.409790  543705 memory.go:184] no items to output this cycle
I0322 05:09:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 05:10:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:10:03.409797  543705 memory.go:184] no items to output this cycle
I0322 05:10:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 05:10:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:10:13.409795  543705 memory.go:191] Add success.
I0322 05:10:13.409810  543705 cpu.go:282] Add success.
W0322 05:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:10:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:10:13.420607  543705 net.go:648] Add success.
I0322 05:10:13.423293  543705 net.go:770] primary dev: ETH0
I0322 05:10:13.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:10:13.423319  543705 net.go:698] Add success.
I0322 05:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:10:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:10:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 05:10:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:10:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 05:10:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:10:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:10:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:10:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:10:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:10:23.409775  543705 memory.go:184] no items to output this cycle
I0322 05:10:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 05:10:33.189674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:10:33.192156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:10:33.192162  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4780 0xc0004b47c0]
E0322 05:10:33.407511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:10:33.407523  543705 memory.go:184] no items to output this cycle
I0322 05:10:33.407548  543705 cpu.go:275] no items to output this cycle
E0322 05:10:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:10:43.409789  543705 memory.go:191] Add success.
I0322 05:10:43.409807  543705 cpu.go:282] Add success.
I0322 05:10:43.419719  543705 net.go:648] Add success.
I0322 05:10:43.422118  543705 net.go:770] primary dev: ETH0
I0322 05:10:43.422130  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:10:43.422141  543705 net.go:698] Add success.
I0322 05:10:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:10:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:10:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:10:53.409806  543705 memory.go:184] no items to output this cycle
I0322 05:10:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 05:11:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:11:03.409797  543705 cpu.go:275] no items to output this cycle
I0322 05:11:03.409810  543705 memory.go:184] no items to output this cycle
E0322 05:11:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:11:13.409810  543705 memory.go:191] Add success.
I0322 05:11:13.409824  543705 cpu.go:282] Add success.
W0322 05:11:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:11:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:11:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:11:13.420129  543705 net.go:648] Add success.
I0322 05:11:13.422558  543705 net.go:770] primary dev: ETH0
I0322 05:11:13.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:11:13.422582  543705 net.go:698] Add success.
I0322 05:11:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:11:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:11:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 05:11:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:11:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 05:11:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:11:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:11:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:11:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:11:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:11:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:11:23.409794  543705 memory.go:184] no items to output this cycle
I0322 05:11:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:11:33.193677  543705 disk_info.go:125] begin check local disk info of client
I0322 05:11:33.196221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:11:33.196228  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e680 0xc00037e6c0]
E0322 05:11:33.408396  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:11:33.408408  543705 memory.go:184] no items to output this cycle
I0322 05:11:33.408451  543705 cpu.go:275] no items to output this cycle
E0322 05:11:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:11:43.409819  543705 memory.go:191] Add success.
I0322 05:11:43.409824  543705 cpu.go:282] Add success.
I0322 05:11:43.420129  543705 net.go:648] Add success.
I0322 05:11:43.422652  543705 net.go:770] primary dev: ETH0
I0322 05:11:43.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:11:43.422681  543705 net.go:698] Add success.
I0322 05:11:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:11:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:11:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:11:53.409796  543705 memory.go:184] no items to output this cycle
I0322 05:11:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 05:12:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:12:03.409799  543705 memory.go:184] no items to output this cycle
I0322 05:12:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 05:12:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:12:13.409814  543705 memory.go:191] Add success.
I0322 05:12:13.409824  543705 cpu.go:282] Add success.
W0322 05:12:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:12:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:12:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:12:13.420205  543705 net.go:648] Add success.
I0322 05:12:13.422861  543705 net.go:770] primary dev: ETH0
I0322 05:12:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:12:13.422889  543705 net.go:698] Add success.
I0322 05:12:13.468534  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2dfc4cd6-771a-4a96-80b6-464a2628a910","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:12:13.468577  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 05:12:14.455222  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:12:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0322 05:12:14.455242  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:12:14.456299  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:12:14.456308  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:12:14.456313  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:12:14.457272  543705 disk_worker.go:494] system disk:vda1
I0322 05:12:14.457301  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:12:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:12:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:12:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:12:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:12:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:12:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:12:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:12:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:12:23.409788  543705 memory.go:184] no items to output this cycle
I0322 05:12:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:12:33.197674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:12:33.200155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:12:33.200161  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469340 0xc000469380]
E0322 05:12:33.408338  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:12:33.408354  543705 memory.go:184] no items to output this cycle
I0322 05:12:33.408369  543705 cpu.go:275] no items to output this cycle
I0322 05:12:39.508293  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:12:39.508299  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:12:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:12:43.410596  543705 memory.go:191] Add success.
I0322 05:12:43.409795  543705 cpu.go:282] Add success.
I0322 05:12:43.420545  543705 net.go:648] Add success.
I0322 05:12:43.423555  543705 net.go:770] primary dev: ETH0
I0322 05:12:43.423580  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:12:43.423592  543705 net.go:698] Add success.
I0322 05:12:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:12:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:12:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:12:53.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:12:53.410258  543705 memory.go:184] no items to output this cycle
I0322 05:12:53.410277  543705 cpu.go:275] no items to output this cycle
E0322 05:13:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:13:03.409806  543705 memory.go:184] no items to output this cycle
I0322 05:13:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 05:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:13:13.409787  543705 cpu.go:282] Add success.
I0322 05:13:13.409794  543705 memory.go:191] Add success.
W0322 05:13:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:13:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:13:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:13:13.420061  543705 net.go:648] Add success.
I0322 05:13:13.422737  543705 net.go:770] primary dev: ETH0
I0322 05:13:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:13:13.422768  543705 net.go:698] Add success.
I0322 05:13:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:13:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:13:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 05:13:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:13:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 05:13:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:13:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:13:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:13:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:13:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:13:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:13:23.409798  543705 memory.go:184] no items to output this cycle
I0322 05:13:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 05:13:33.201673  543705 disk_info.go:125] begin check local disk info of client
I0322 05:13:33.204168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:13:33.204174  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f500 0xc00037f540]
E0322 05:13:33.407525  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:13:33.407536  543705 memory.go:184] no items to output this cycle
I0322 05:13:33.407536  543705 cpu.go:275] no items to output this cycle
E0322 05:13:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:13:43.409825  543705 memory.go:191] Add success.
I0322 05:13:43.409829  543705 cpu.go:282] Add success.
I0322 05:13:43.420591  543705 net.go:648] Add success.
I0322 05:13:43.423569  543705 net.go:770] primary dev: ETH0
I0322 05:13:43.423583  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:13:43.423596  543705 net.go:698] Add success.
I0322 05:13:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:13:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:13:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:13:53.409806  543705 memory.go:184] no items to output this cycle
I0322 05:13:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 05:14:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:14:03.409776  543705 memory.go:184] no items to output this cycle
I0322 05:14:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 05:14:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:14:13.409823  543705 memory.go:191] Add success.
I0322 05:14:13.409838  543705 cpu.go:282] Add success.
W0322 05:14:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:14:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:14:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:14:13.420113  543705 net.go:648] Add success.
I0322 05:14:13.422706  543705 net.go:770] primary dev: ETH0
I0322 05:14:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:14:13.422729  543705 net.go:698] Add success.
I0322 05:14:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:14:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:14:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 05:14:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:14:14.456490  543705 disk_worker.go:494] system disk:vda1
I0322 05:14:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:14:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:14:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:14:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:14:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:14:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:14:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:14:23.409788  543705 memory.go:184] no items to output this cycle
I0322 05:14:23.409795  543705 cpu.go:275] no items to output this cycle
I0322 05:14:33.205692  543705 disk_info.go:125] begin check local disk info of client
I0322 05:14:33.208203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:14:33.208210  543705 disk_info.go:196] parse disk info done, disk is : [0xc000503ac0 0xc000503b00]
E0322 05:14:33.407524  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:14:33.407534  543705 cpu.go:275] no items to output this cycle
I0322 05:14:33.407535  543705 memory.go:184] no items to output this cycle
E0322 05:14:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:14:43.409824  543705 memory.go:191] Add success.
I0322 05:14:43.409830  543705 cpu.go:282] Add success.
I0322 05:14:43.419921  543705 net.go:648] Add success.
I0322 05:14:43.422798  543705 net.go:770] primary dev: ETH0
I0322 05:14:43.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:14:43.422823  543705 net.go:698] Add success.
I0322 05:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:14:53.409914  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:14:53.409934  543705 cpu.go:275] no items to output this cycle
I0322 05:14:53.409941  543705 memory.go:184] no items to output this cycle
E0322 05:15:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:15:03.409791  543705 memory.go:184] no items to output this cycle
I0322 05:15:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 05:15:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:15:13.409801  543705 memory.go:191] Add success.
I0322 05:15:13.409805  543705 cpu.go:282] Add success.
W0322 05:15:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:15:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:15:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:15:13.420184  543705 net.go:648] Add success.
I0322 05:15:13.422908  543705 net.go:770] primary dev: ETH0
I0322 05:15:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:15:13.422933  543705 net.go:698] Add success.
I0322 05:15:13.504441  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"63b2039f-ff08-45ab-b0eb-8213c48e4ac1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:15:13.504474  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:15:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:15:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:15:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 05:15:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:15:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 05:15:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:15:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:15:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:15:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:15:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:15:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:15:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:15:23.409784  543705 memory.go:184] no items to output this cycle
I0322 05:15:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:15:33.209677  543705 disk_info.go:125] begin check local disk info of client
I0322 05:15:33.212160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:15:33.212166  543705 disk_info.go:196] parse disk info done, disk is : [0xc000507b80 0xc000507bc0]
E0322 05:15:33.408300  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:15:33.408317  543705 memory.go:184] no items to output this cycle
I0322 05:15:33.408331  543705 cpu.go:275] no items to output this cycle
I0322 05:15:39.509291  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:15:39.509298  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:15:43.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:15:43.409826  543705 cpu.go:282] Add success.
I0322 05:15:43.409844  543705 memory.go:191] Add success.
I0322 05:15:43.420183  543705 net.go:648] Add success.
I0322 05:15:43.421154  543705 net.go:770] primary dev: ETH0
I0322 05:15:43.421171  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:15:43.421188  543705 net.go:698] Add success.
I0322 05:15:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:15:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:15:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:15:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:15:53.409801  543705 memory.go:184] no items to output this cycle
I0322 05:15:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 05:16:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:16:03.409793  543705 memory.go:184] no items to output this cycle
I0322 05:16:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 05:16:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:16:13.409832  543705 memory.go:191] Add success.
I0322 05:16:13.409848  543705 cpu.go:282] Add success.
W0322 05:16:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:16:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:16:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:16:13.420356  543705 net.go:648] Add success.
I0322 05:16:13.422925  543705 net.go:770] primary dev: ETH0
I0322 05:16:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:16:13.422950  543705 net.go:698] Add success.
I0322 05:16:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:16:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:16:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 05:16:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:16:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 05:16:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:16:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:16:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:16:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:16:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:16:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:16:23.409774  543705 memory.go:184] no items to output this cycle
I0322 05:16:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 05:16:33.213676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:16:33.216227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:16:33.216233  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298440 0xc000298480]
E0322 05:16:33.407510  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:16:33.407522  543705 memory.go:184] no items to output this cycle
I0322 05:16:33.407526  543705 cpu.go:275] no items to output this cycle
E0322 05:16:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:16:43.409827  543705 memory.go:191] Add success.
I0322 05:16:43.409840  543705 cpu.go:282] Add success.
I0322 05:16:43.420062  543705 net.go:648] Add success.
I0322 05:16:43.422663  543705 net.go:770] primary dev: ETH0
I0322 05:16:43.422679  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:16:43.422693  543705 net.go:698] Add success.
I0322 05:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:16:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:16:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:16:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:16:53.409781  543705 memory.go:184] no items to output this cycle
I0322 05:16:53.409817  543705 cpu.go:275] no items to output this cycle
I0322 05:17:03.409917  543705 cpu.go:275] no items to output this cycle
E0322 05:17:03.409969  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:17:03.409989  543705 memory.go:184] no items to output this cycle
E0322 05:17:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:17:13.409786  543705 memory.go:191] Add success.
I0322 05:17:13.409789  543705 cpu.go:282] Add success.
W0322 05:17:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:17:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:17:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:17:13.420094  543705 net.go:648] Add success.
I0322 05:17:13.423190  543705 net.go:770] primary dev: ETH0
I0322 05:17:13.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:17:13.423217  543705 net.go:698] Add success.
I0322 05:17:13.452871  543705 event_worker.go:152] Polling the log file for events...
W0322 05:17:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:17:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 05:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:17:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:17:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:17:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:17:14.456544  543705 disk_worker.go:494] system disk:vda1
I0322 05:17:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:17:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:17:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:17:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:17:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:17:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:17:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:17:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:17:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:17:23.409775  543705 memory.go:184] no items to output this cycle
I0322 05:17:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 05:17:33.217675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:17:33.220171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:17:33.220178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464bc0 0xc000464c00]
E0322 05:17:33.407503  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:17:33.407517  543705 memory.go:184] no items to output this cycle
I0322 05:17:33.407545  543705 cpu.go:275] no items to output this cycle
E0322 05:17:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:17:43.409816  543705 cpu.go:282] Add success.
I0322 05:17:43.409834  543705 memory.go:191] Add success.
I0322 05:17:43.420121  543705 net.go:648] Add success.
I0322 05:17:43.422689  543705 net.go:770] primary dev: ETH0
I0322 05:17:43.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:17:43.422716  543705 net.go:698] Add success.
I0322 05:17:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:17:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:17:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:17:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:17:53.409797  543705 memory.go:184] no items to output this cycle
I0322 05:17:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 05:18:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:18:03.409787  543705 memory.go:184] no items to output this cycle
I0322 05:18:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 05:18:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:18:13.409799  543705 memory.go:191] Add success.
I0322 05:18:13.409800  543705 cpu.go:282] Add success.
W0322 05:18:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:18:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:18:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:18:13.420167  543705 net.go:648] Add success.
I0322 05:18:13.422910  543705 net.go:770] primary dev: ETH0
I0322 05:18:13.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:18:13.422934  543705 net.go:698] Add success.
I0322 05:18:13.468235  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"179838d9-0b58-4c69-8bf4-f6d4b2b3f7b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:18:13.468268  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:18:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:18:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:18:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 05:18:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:18:14.456680  543705 disk_worker.go:494] system disk:vda1
I0322 05:18:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:18:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:18:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:18:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:18:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:18:23.409760  543705 memory.go:184] no items to output this cycle
I0322 05:18:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:18:33.221676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:18:33.224289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:18:33.224295  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465280 0xc0004652c0]
E0322 05:18:33.407524  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:18:33.407538  543705 memory.go:184] no items to output this cycle
I0322 05:18:33.407546  543705 cpu.go:275] no items to output this cycle
I0322 05:18:39.510299  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:18:39.510305  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:18:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:18:43.410534  543705 memory.go:191] Add success.
I0322 05:18:43.409827  543705 cpu.go:282] Add success.
I0322 05:18:43.420256  543705 net.go:648] Add success.
I0322 05:18:43.422805  543705 net.go:770] primary dev: ETH0
I0322 05:18:43.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:18:43.422832  543705 net.go:698] Add success.
I0322 05:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:18:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:18:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:18:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:18:53.409802  543705 memory.go:184] no items to output this cycle
I0322 05:18:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 05:19:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:19:03.409778  543705 memory.go:184] no items to output this cycle
I0322 05:19:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 05:19:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:19:13.409813  543705 memory.go:191] Add success.
I0322 05:19:13.409822  543705 cpu.go:282] Add success.
W0322 05:19:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:19:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:19:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:19:13.420142  543705 net.go:648] Add success.
I0322 05:19:13.423363  543705 net.go:770] primary dev: ETH0
I0322 05:19:13.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:19:13.423388  543705 net.go:698] Add success.
I0322 05:19:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:19:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:19:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 05:19:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:19:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 05:19:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:19:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:19:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:19:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:19:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:19:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:19:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 05:19:23.409796  543705 memory.go:184] no items to output this cycle
I0322 05:19:33.225682  543705 disk_info.go:125] begin check local disk info of client
I0322 05:19:33.228176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:19:33.228183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9c80 0xc0004d9cc0]
E0322 05:19:33.408231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:19:33.408249  543705 memory.go:184] no items to output this cycle
I0322 05:19:33.408265  543705 cpu.go:275] no items to output this cycle
I0322 05:19:43.409790  543705 cpu.go:282] Add success.
E0322 05:19:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:19:43.410828  543705 memory.go:191] Add success.
I0322 05:19:43.420603  543705 net.go:648] Add success.
I0322 05:19:43.423210  543705 net.go:770] primary dev: ETH0
I0322 05:19:43.423225  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:19:43.423238  543705 net.go:698] Add success.
I0322 05:19:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:19:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:19:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:19:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:19:53.409789  543705 memory.go:184] no items to output this cycle
I0322 05:19:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 05:20:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:20:03.409771  543705 memory.go:184] no items to output this cycle
I0322 05:20:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 05:20:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:20:13.409795  543705 cpu.go:282] Add success.
I0322 05:20:13.409803  543705 memory.go:191] Add success.
W0322 05:20:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:20:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:20:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:20:13.420051  543705 net.go:648] Add success.
I0322 05:20:13.422870  543705 net.go:770] primary dev: ETH0
I0322 05:20:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:20:13.422898  543705 net.go:698] Add success.
I0322 05:20:14.454802  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:20:14.454982  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:20:14.454992  543705 disk_worker.go:708] disk space is not compliant
W0322 05:20:14.454995  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:20:14.456298  543705 disk_worker.go:494] system disk:vda1
I0322 05:20:14.456343  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:20:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:20:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:20:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:20:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:20:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:20:23.409792  543705 memory.go:184] no items to output this cycle
I0322 05:20:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 05:20:33.229675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:20:33.232348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:20:33.232355  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465900 0xc000465940]
E0322 05:20:33.407874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:20:33.407886  543705 memory.go:184] no items to output this cycle
I0322 05:20:33.407893  543705 cpu.go:275] no items to output this cycle
E0322 05:20:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:20:43.409797  543705 memory.go:191] Add success.
I0322 05:20:43.409796  543705 cpu.go:282] Add success.
I0322 05:20:43.419842  543705 net.go:648] Add success.
I0322 05:20:43.422543  543705 net.go:770] primary dev: ETH0
I0322 05:20:43.422556  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:20:43.422569  543705 net.go:698] Add success.
I0322 05:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:20:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:20:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:20:53.409810  543705 memory.go:184] no items to output this cycle
I0322 05:20:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 05:21:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:21:03.409782  543705 memory.go:184] no items to output this cycle
I0322 05:21:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 05:21:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:21:13.409786  543705 memory.go:191] Add success.
I0322 05:21:13.409804  543705 cpu.go:282] Add success.
W0322 05:21:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:21:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:21:13.420203  543705 net.go:648] Add success.
I0322 05:21:13.422870  543705 net.go:770] primary dev: ETH0
I0322 05:21:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:21:13.422895  543705 net.go:698] Add success.
I0322 05:21:13.471920  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dbc56388-e98d-49af-bcd2-0a4495d5daf3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:21:13.471953  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:21:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:21:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:21:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 05:21:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:21:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 05:21:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:21:15.455611  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:21:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:21:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:21:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:21:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:21:23.409768  543705 memory.go:184] no items to output this cycle
I0322 05:21:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 05:21:33.233673  543705 disk_info.go:125] begin check local disk info of client
I0322 05:21:33.236254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:21:33.236261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b900 0xc00007b940]
E0322 05:21:33.408224  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:21:33.408236  543705 memory.go:184] no items to output this cycle
I0322 05:21:33.408273  543705 cpu.go:275] no items to output this cycle
I0322 05:21:39.511316  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:21:39.511322  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:21:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:21:43.410674  543705 memory.go:191] Add success.
I0322 05:21:43.409813  543705 cpu.go:282] Add success.
I0322 05:21:43.420343  543705 net.go:648] Add success.
I0322 05:21:43.422913  543705 net.go:770] primary dev: ETH0
I0322 05:21:43.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:21:43.422940  543705 net.go:698] Add success.
I0322 05:21:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:21:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:21:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:21:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:21:53.409816  543705 memory.go:184] no items to output this cycle
I0322 05:21:53.409839  543705 cpu.go:275] no items to output this cycle
E0322 05:22:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:22:03.409800  543705 memory.go:184] no items to output this cycle
I0322 05:22:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 05:22:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:22:13.409816  543705 memory.go:191] Add success.
I0322 05:22:13.409824  543705 cpu.go:282] Add success.
W0322 05:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:22:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:22:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:22:13.420110  543705 net.go:648] Add success.
I0322 05:22:13.422833  543705 net.go:770] primary dev: ETH0
I0322 05:22:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:22:13.422858  543705 net.go:698] Add success.
W0322 05:22:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:22:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 05:22:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:22:14.456781  543705 disk_worker.go:494] system disk:vda1
I0322 05:22:14.456818  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:22:14.457110  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:22:14.457117  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:22:14.457122  543705 custom_config.go:64] query custom config with name: gpu
E0322 05:22:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:22:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:22:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:22:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:22:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:22:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:22:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:22:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:22:23.409764  543705 memory.go:184] no items to output this cycle
I0322 05:22:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:22:33.237668  543705 disk_info.go:125] begin check local disk info of client
I0322 05:22:33.240198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:22:33.240205  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be00 0xc00007be40]
E0322 05:22:33.407511  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:22:33.407523  543705 memory.go:184] no items to output this cycle
I0322 05:22:33.407534  543705 cpu.go:275] no items to output this cycle
E0322 05:22:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:22:43.409781  543705 memory.go:191] Add success.
I0322 05:22:43.409807  543705 cpu.go:282] Add success.
I0322 05:22:43.419880  543705 net.go:648] Add success.
I0322 05:22:43.422445  543705 net.go:770] primary dev: ETH0
I0322 05:22:43.422461  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:22:43.422473  543705 net.go:698] Add success.
I0322 05:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:22:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:22:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:22:53.410287  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:22:53.410309  543705 memory.go:184] no items to output this cycle
I0322 05:22:53.410313  543705 cpu.go:275] no items to output this cycle
E0322 05:23:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:23:03.409764  543705 memory.go:184] no items to output this cycle
I0322 05:23:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 05:23:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:23:13.409821  543705 memory.go:191] Add success.
I0322 05:23:13.409825  543705 cpu.go:282] Add success.
W0322 05:23:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:23:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:23:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:23:13.420333  543705 net.go:648] Add success.
I0322 05:23:13.423092  543705 net.go:770] primary dev: ETH0
I0322 05:23:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:23:13.423120  543705 net.go:698] Add success.
I0322 05:23:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:23:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:23:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 05:23:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:23:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 05:23:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:23:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:23:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:23:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:23:23.409768  543705 memory.go:184] no items to output this cycle
I0322 05:23:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 05:23:33.241687  543705 disk_info.go:125] begin check local disk info of client
I0322 05:23:33.244227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:23:33.244233  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b980 0xc00048b9c0]
E0322 05:23:33.408194  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:23:33.408212  543705 memory.go:184] no items to output this cycle
I0322 05:23:33.408224  543705 cpu.go:275] no items to output this cycle
E0322 05:23:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:23:43.409823  543705 memory.go:191] Add success.
I0322 05:23:43.409827  543705 cpu.go:282] Add success.
I0322 05:23:43.419904  543705 net.go:648] Add success.
I0322 05:23:43.422902  543705 net.go:770] primary dev: ETH0
I0322 05:23:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:23:43.422931  543705 net.go:698] Add success.
I0322 05:23:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:23:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:23:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:23:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:23:53.409789  543705 memory.go:184] no items to output this cycle
I0322 05:23:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 05:24:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:24:03.409802  543705 memory.go:184] no items to output this cycle
I0322 05:24:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 05:24:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:24:13.409915  543705 memory.go:191] Add success.
I0322 05:24:13.409941  543705 cpu.go:282] Add success.
W0322 05:24:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:24:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:24:13.409969  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:24:13.419717  543705 net.go:648] Add success.
I0322 05:24:13.422905  543705 net.go:770] primary dev: ETH0
I0322 05:24:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:24:13.422930  543705 net.go:698] Add success.
I0322 05:24:13.463924  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"da2774de-369d-454a-95cf-44f2a7a850a1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:24:13.463963  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:24:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:24:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:24:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 05:24:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:24:14.456675  543705 disk_worker.go:494] system disk:vda1
I0322 05:24:14.456725  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:24:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:24:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:24:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:24:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:24:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:24:23.409786  543705 memory.go:184] no items to output this cycle
I0322 05:24:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 05:24:33.245677  543705 disk_info.go:125] begin check local disk info of client
I0322 05:24:33.248246  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:24:33.248252  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b540 0xc00048b580]
E0322 05:24:33.407514  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:24:33.407528  543705 memory.go:184] no items to output this cycle
I0322 05:24:33.407539  543705 cpu.go:275] no items to output this cycle
I0322 05:24:39.512302  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:24:39.512310  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:24:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:24:43.410674  543705 memory.go:191] Add success.
I0322 05:24:43.409835  543705 cpu.go:282] Add success.
I0322 05:24:43.420398  543705 net.go:648] Add success.
I0322 05:24:43.423049  543705 net.go:770] primary dev: ETH0
I0322 05:24:43.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:24:43.423079  543705 net.go:698] Add success.
I0322 05:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:24:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:24:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:24:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:24:53.409805  543705 memory.go:184] no items to output this cycle
I0322 05:24:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 05:25:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:25:03.409782  543705 memory.go:184] no items to output this cycle
I0322 05:25:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 05:25:13.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:25:13.409910  543705 memory.go:191] Add success.
W0322 05:25:13.409939  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:25:13.409955  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:25:13.409958  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:25:13.409968  543705 cpu.go:282] Add success.
I0322 05:25:13.419705  543705 net.go:648] Add success.
I0322 05:25:13.422436  543705 net.go:770] primary dev: ETH0
I0322 05:25:13.422450  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:25:13.422461  543705 net.go:698] Add success.
I0322 05:25:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:25:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:25:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 05:25:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:25:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 05:25:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:25:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:25:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:25:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:25:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:25:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:25:23.409802  543705 memory.go:184] no items to output this cycle
I0322 05:25:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 05:25:33.249672  543705 disk_info.go:125] begin check local disk info of client
I0322 05:25:33.252179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:25:33.252186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
E0322 05:25:33.408114  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:25:33.408130  543705 memory.go:184] no items to output this cycle
I0322 05:25:33.408145  543705 cpu.go:275] no items to output this cycle
E0322 05:25:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:25:43.409796  543705 memory.go:191] Add success.
I0322 05:25:43.409797  543705 cpu.go:282] Add success.
I0322 05:25:43.419859  543705 net.go:648] Add success.
I0322 05:25:43.422635  543705 net.go:770] primary dev: ETH0
I0322 05:25:43.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:25:43.422661  543705 net.go:698] Add success.
I0322 05:25:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:25:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:25:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:25:53.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:25:53.409826  543705 memory.go:184] no items to output this cycle
I0322 05:25:53.409837  543705 cpu.go:275] no items to output this cycle
E0322 05:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:26:03.409775  543705 memory.go:184] no items to output this cycle
I0322 05:26:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 05:26:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:26:13.409784  543705 memory.go:191] Add success.
W0322 05:26:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 05:26:13.409812  543705 cpu.go:282] Add success.
W0322 05:26:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:26:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:26:13.420218  543705 net.go:648] Add success.
I0322 05:26:13.422933  543705 net.go:770] primary dev: ETH0
I0322 05:26:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:26:13.422957  543705 net.go:698] Add success.
I0322 05:26:14.453960  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:26:14.455238  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:26:14.455249  543705 disk_worker.go:708] disk space is not compliant
W0322 05:26:14.455252  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:26:14.456689  543705 disk_worker.go:494] system disk:vda1
I0322 05:26:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:26:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:26:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:26:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:26:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:26:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 05:26:23.409794  543705 memory.go:184] no items to output this cycle
I0322 05:26:33.253676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:26:33.256164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:26:33.256170  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0322 05:26:33.407497  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:26:33.407509  543705 memory.go:184] no items to output this cycle
I0322 05:26:33.407536  543705 cpu.go:275] no items to output this cycle
E0322 05:26:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:26:43.409822  543705 memory.go:191] Add success.
I0322 05:26:43.409826  543705 cpu.go:282] Add success.
I0322 05:26:43.420131  543705 net.go:648] Add success.
I0322 05:26:43.422871  543705 net.go:770] primary dev: ETH0
I0322 05:26:43.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:26:43.422899  543705 net.go:698] Add success.
I0322 05:26:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:26:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:26:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:26:53.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:26:53.409835  543705 memory.go:184] no items to output this cycle
I0322 05:26:53.409838  543705 cpu.go:275] no items to output this cycle
E0322 05:27:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:27:03.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:27:03.409821  543705 memory.go:184] no items to output this cycle
E0322 05:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:27:13.409814  543705 memory.go:191] Add success.
I0322 05:27:13.409813  543705 cpu.go:282] Add success.
W0322 05:27:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:27:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:27:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:27:13.420247  543705 net.go:648] Add success.
I0322 05:27:13.423129  543705 net.go:770] primary dev: ETH0
I0322 05:27:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:27:13.423153  543705 net.go:698] Add success.
I0322 05:27:13.429440  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 05:27:13.452773  543705 event_worker.go:152] Polling the log file for events...
I0322 05:27:13.467854  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"544b95f9-84aa-4181-b7c8-8b3ae6c89266","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:27:13.467885  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 05:27:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:27:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 05:27:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:27:14.455894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:27:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:27:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:27:14.456531  543705 disk_worker.go:494] system disk:vda1
I0322 05:27:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:27:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:27:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:27:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:27:16.457985  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:27:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:27:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:27:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:27:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:27:23.409779  543705 memory.go:184] no items to output this cycle
I0322 05:27:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 05:27:33.257674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:27:33.260231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:27:33.260238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5e00 0xc0002a5e40]
E0322 05:27:33.408084  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:27:33.408096  543705 memory.go:184] no items to output this cycle
I0322 05:27:33.408130  543705 cpu.go:275] no items to output this cycle
I0322 05:27:39.513305  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:27:39.513312  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:27:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:27:43.410742  543705 memory.go:191] Add success.
I0322 05:27:43.409832  543705 cpu.go:282] Add success.
I0322 05:27:43.420457  543705 net.go:648] Add success.
I0322 05:27:43.423344  543705 net.go:770] primary dev: ETH0
I0322 05:27:43.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:27:43.423371  543705 net.go:698] Add success.
I0322 05:27:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:27:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:27:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:27:53.409791  543705 memory.go:184] no items to output this cycle
I0322 05:27:53.409864  543705 cpu.go:275] no items to output this cycle
E0322 05:28:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:28:03.409772  543705 memory.go:184] no items to output this cycle
I0322 05:28:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 05:28:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:28:13.409783  543705 memory.go:191] Add success.
I0322 05:28:13.409806  543705 cpu.go:282] Add success.
W0322 05:28:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:28:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:28:13.420516  543705 net.go:648] Add success.
I0322 05:28:13.423288  543705 net.go:770] primary dev: ETH0
I0322 05:28:13.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:28:13.423311  543705 net.go:698] Add success.
I0322 05:28:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:28:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:28:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 05:28:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:28:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 05:28:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:28:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:28:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:28:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:28:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:28:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:28:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:28:23.409769  543705 memory.go:184] no items to output this cycle
I0322 05:28:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:28:33.261674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:28:33.264232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:28:33.264241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a7bc0 0xc0002a7c00]
E0322 05:28:33.408063  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:28:33.408075  543705 memory.go:184] no items to output this cycle
I0322 05:28:33.408116  543705 cpu.go:275] no items to output this cycle
E0322 05:28:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:28:43.409815  543705 memory.go:191] Add success.
I0322 05:28:43.409820  543705 cpu.go:282] Add success.
I0322 05:28:43.420233  543705 net.go:648] Add success.
I0322 05:28:43.423516  543705 net.go:770] primary dev: ETH0
I0322 05:28:43.423530  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:28:43.423543  543705 net.go:698] Add success.
I0322 05:28:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:28:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:28:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:28:53.409783  543705 memory.go:184] no items to output this cycle
I0322 05:28:53.409844  543705 cpu.go:275] no items to output this cycle
E0322 05:29:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:29:03.409781  543705 memory.go:184] no items to output this cycle
I0322 05:29:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 05:29:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:29:13.409915  543705 memory.go:191] Add success.
I0322 05:29:13.409933  543705 cpu.go:282] Add success.
W0322 05:29:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:29:13.409964  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:29:13.409967  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:29:13.419731  543705 net.go:648] Add success.
I0322 05:29:13.422505  543705 net.go:770] primary dev: ETH0
I0322 05:29:13.422524  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:29:13.422539  543705 net.go:698] Add success.
I0322 05:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:29:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:29:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0322 05:29:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:29:14.456464  543705 disk_worker.go:494] system disk:vda1
I0322 05:29:14.456507  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:29:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:29:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:29:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:29:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:29:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:29:23.409775  543705 memory.go:184] no items to output this cycle
I0322 05:29:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 05:29:33.265675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:29:33.268165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:29:33.268172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4500 0xc0002a4540]
E0322 05:29:33.407497  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:29:33.407513  543705 memory.go:184] no items to output this cycle
I0322 05:29:33.407538  543705 cpu.go:275] no items to output this cycle
E0322 05:29:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:29:43.409790  543705 memory.go:191] Add success.
I0322 05:29:43.409793  543705 cpu.go:282] Add success.
I0322 05:29:43.419920  543705 net.go:648] Add success.
I0322 05:29:43.422715  543705 net.go:770] primary dev: ETH0
I0322 05:29:43.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:29:43.422747  543705 net.go:698] Add success.
I0322 05:29:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:29:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:29:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:29:53.410366  543705 cpu.go:275] no items to output this cycle
E0322 05:29:53.410365  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:29:53.410386  543705 memory.go:184] no items to output this cycle
E0322 05:30:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:30:03.409775  543705 memory.go:184] no items to output this cycle
I0322 05:30:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 05:30:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:30:13.409914  543705 cpu.go:282] Add success.
I0322 05:30:13.409942  543705 memory.go:191] Add success.
W0322 05:30:13.410005  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:30:13.410029  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:30:13.410033  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:30:13.419705  543705 net.go:648] Add success.
I0322 05:30:13.422583  543705 net.go:770] primary dev: ETH0
I0322 05:30:13.422595  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:30:13.422606  543705 net.go:698] Add success.
I0322 05:30:13.463274  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"723a06d5-3343-4494-a06b-5c46b88a2e89","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:30:13.463304  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:30:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:30:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:30:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 05:30:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:30:14.456528  543705 disk_worker.go:494] system disk:vda1
I0322 05:30:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:30:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:30:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:30:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:30:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:30:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:30:23.409791  543705 memory.go:184] no items to output this cycle
I0322 05:30:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 05:30:33.269674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:30:33.272174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:30:33.272180  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b000 0xc00007b040]
E0322 05:30:33.407498  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:30:33.407510  543705 memory.go:184] no items to output this cycle
I0322 05:30:33.407542  543705 cpu.go:275] no items to output this cycle
I0322 05:30:39.514317  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:30:39.514323  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:30:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:30:43.410577  543705 memory.go:191] Add success.
I0322 05:30:43.409807  543705 cpu.go:282] Add success.
I0322 05:30:43.420292  543705 net.go:648] Add success.
I0322 05:30:43.422842  543705 net.go:770] primary dev: ETH0
I0322 05:30:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:30:43.422868  543705 net.go:698] Add success.
I0322 05:30:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:30:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:30:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:30:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:30:53.409778  543705 memory.go:184] no items to output this cycle
I0322 05:30:53.409850  543705 cpu.go:275] no items to output this cycle
E0322 05:31:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:31:03.409783  543705 memory.go:184] no items to output this cycle
I0322 05:31:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 05:31:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:31:13.409794  543705 memory.go:191] Add success.
I0322 05:31:13.409798  543705 cpu.go:282] Add success.
W0322 05:31:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:31:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:31:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:31:13.420188  543705 net.go:648] Add success.
I0322 05:31:13.423236  543705 net.go:770] primary dev: ETH0
I0322 05:31:13.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:31:13.423265  543705 net.go:698] Add success.
I0322 05:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:31:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:31:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 05:31:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:31:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 05:31:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:31:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:31:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:31:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:31:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:31:23.409795  543705 memory.go:184] no items to output this cycle
I0322 05:31:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:31:33.275937  543705 disk_info.go:125] begin check local disk info of client
I0322 05:31:33.278440  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:31:33.278446  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4c40 0xc0002a4c80]
E0322 05:31:33.407878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:31:33.407894  543705 memory.go:184] no items to output this cycle
I0322 05:31:33.407909  543705 cpu.go:275] no items to output this cycle
E0322 05:31:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:31:43.409778  543705 memory.go:191] Add success.
I0322 05:31:43.409801  543705 cpu.go:282] Add success.
I0322 05:31:43.419849  543705 net.go:648] Add success.
I0322 05:31:43.422461  543705 net.go:770] primary dev: ETH0
I0322 05:31:43.422476  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:31:43.422491  543705 net.go:698] Add success.
I0322 05:31:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:31:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:31:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:31:53.410235  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:31:53.410253  543705 memory.go:184] no items to output this cycle
I0322 05:31:53.410340  543705 cpu.go:275] no items to output this cycle
E0322 05:32:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:32:03.409779  543705 memory.go:184] no items to output this cycle
I0322 05:32:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 05:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:32:13.409793  543705 memory.go:191] Add success.
I0322 05:32:13.409809  543705 cpu.go:282] Add success.
W0322 05:32:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:32:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:32:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:32:13.420129  543705 net.go:648] Add success.
I0322 05:32:13.422994  543705 net.go:770] primary dev: ETH0
I0322 05:32:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:32:13.423018  543705 net.go:698] Add success.
W0322 05:32:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:32:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 05:32:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:32:14.456900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:32:14.456910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:32:14.456915  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:32:14.456990  543705 disk_worker.go:494] system disk:vda1
I0322 05:32:14.457031  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:32:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:32:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:32:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:32:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:32:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:32:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:32:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:32:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:32:23.409762  543705 memory.go:184] no items to output this cycle
I0322 05:32:23.409794  543705 cpu.go:275] no items to output this cycle
I0322 05:32:33.281681  543705 disk_info.go:125] begin check local disk info of client
I0322 05:32:33.284171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:32:33.284177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464ec0 0xc000464f00]
E0322 05:32:33.407939  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:32:33.407956  543705 memory.go:184] no items to output this cycle
I0322 05:32:33.407968  543705 cpu.go:275] no items to output this cycle
E0322 05:32:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:32:43.409801  543705 memory.go:191] Add success.
I0322 05:32:43.409807  543705 cpu.go:282] Add success.
I0322 05:32:43.420014  543705 net.go:648] Add success.
I0322 05:32:43.422722  543705 net.go:770] primary dev: ETH0
I0322 05:32:43.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:32:43.422753  543705 net.go:698] Add success.
I0322 05:32:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:32:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:32:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:32:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:32:53.409763  543705 memory.go:184] no items to output this cycle
I0322 05:32:53.409832  543705 cpu.go:275] no items to output this cycle
E0322 05:33:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:33:03.409786  543705 memory.go:184] no items to output this cycle
I0322 05:33:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 05:33:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:33:13.409791  543705 memory.go:191] Add success.
I0322 05:33:13.409792  543705 cpu.go:282] Add success.
W0322 05:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:33:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:33:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:33:13.420223  543705 net.go:648] Add success.
I0322 05:33:13.422863  543705 net.go:770] primary dev: ETH0
I0322 05:33:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:33:13.422887  543705 net.go:698] Add success.
I0322 05:33:13.462872  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6ee2606-1fbc-4247-976a-6ecd5786ae93","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:33:13.462907  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:33:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:33:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:33:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 05:33:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:33:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 05:33:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:33:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:33:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:33:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:33:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:33:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:33:23.409759  543705 memory.go:184] no items to output this cycle
I0322 05:33:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 05:33:33.285676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:33:33.288184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:33:33.288190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab700 0xc0001ab740]
E0322 05:33:33.407501  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:33:33.407514  543705 memory.go:184] no items to output this cycle
I0322 05:33:33.407525  543705 cpu.go:275] no items to output this cycle
I0322 05:33:39.515325  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:33:39.515332  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:33:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:33:43.410654  543705 memory.go:191] Add success.
I0322 05:33:43.409805  543705 cpu.go:282] Add success.
I0322 05:33:43.420433  543705 net.go:648] Add success.
I0322 05:33:43.422962  543705 net.go:770] primary dev: ETH0
I0322 05:33:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:33:43.422987  543705 net.go:698] Add success.
I0322 05:33:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:33:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:33:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:33:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:33:53.409768  543705 memory.go:184] no items to output this cycle
I0322 05:33:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 05:34:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:34:03.409780  543705 memory.go:184] no items to output this cycle
I0322 05:34:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 05:34:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:34:13.409821  543705 memory.go:191] Add success.
I0322 05:34:13.409828  543705 cpu.go:282] Add success.
W0322 05:34:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:34:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:34:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:34:13.420167  543705 net.go:648] Add success.
I0322 05:34:13.422862  543705 net.go:770] primary dev: ETH0
I0322 05:34:13.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:34:13.422886  543705 net.go:698] Add success.
I0322 05:34:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:34:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:34:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 05:34:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:34:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 05:34:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:34:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:34:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:34:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:34:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:34:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:34:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:34:23.409757  543705 memory.go:184] no items to output this cycle
I0322 05:34:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:34:33.289679  543705 disk_info.go:125] begin check local disk info of client
I0322 05:34:33.292186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:34:33.292193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4580 0xc0002a45c0]
E0322 05:34:33.407920  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:34:33.407937  543705 memory.go:184] no items to output this cycle
I0322 05:34:33.407954  543705 cpu.go:275] no items to output this cycle
E0322 05:34:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:34:43.409823  543705 memory.go:191] Add success.
I0322 05:34:43.409831  543705 cpu.go:282] Add success.
I0322 05:34:43.420159  543705 net.go:648] Add success.
I0322 05:34:43.422884  543705 net.go:770] primary dev: ETH0
I0322 05:34:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:34:43.422911  543705 net.go:698] Add success.
I0322 05:34:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:34:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:34:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:34:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:34:53.409779  543705 memory.go:184] no items to output this cycle
I0322 05:34:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 05:35:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:35:03.409783  543705 memory.go:184] no items to output this cycle
I0322 05:35:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 05:35:13.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:35:13.409890  543705 memory.go:191] Add success.
I0322 05:35:13.409900  543705 cpu.go:282] Add success.
W0322 05:35:13.409924  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:35:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:35:13.409946  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:35:13.419709  543705 net.go:648] Add success.
I0322 05:35:13.422365  543705 net.go:770] primary dev: ETH0
I0322 05:35:13.422379  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:35:13.422393  543705 net.go:698] Add success.
I0322 05:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:35:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:35:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 05:35:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:35:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 05:35:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:35:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:35:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:35:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:35:23.410697  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:35:23.410711  543705 memory.go:184] no items to output this cycle
I0322 05:35:23.410741  543705 cpu.go:275] no items to output this cycle
I0322 05:35:33.293676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:35:33.296133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:35:33.296139  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9cc0 0xc0004d9d00]
E0322 05:35:33.407849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:35:33.407865  543705 memory.go:184] no items to output this cycle
I0322 05:35:33.407881  543705 cpu.go:275] no items to output this cycle
E0322 05:35:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:35:43.409790  543705 memory.go:191] Add success.
I0322 05:35:43.409816  543705 cpu.go:282] Add success.
I0322 05:35:43.419871  543705 net.go:648] Add success.
I0322 05:35:43.422474  543705 net.go:770] primary dev: ETH0
I0322 05:35:43.422487  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:35:43.422500  543705 net.go:698] Add success.
I0322 05:35:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:35:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:35:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:35:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:35:53.409787  543705 memory.go:184] no items to output this cycle
I0322 05:35:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 05:36:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:36:03.409770  543705 memory.go:184] no items to output this cycle
I0322 05:36:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 05:36:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:36:13.409788  543705 memory.go:191] Add success.
I0322 05:36:13.409789  543705 cpu.go:282] Add success.
W0322 05:36:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:36:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:36:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:36:13.420190  543705 net.go:648] Add success.
I0322 05:36:13.423182  543705 net.go:770] primary dev: ETH0
I0322 05:36:13.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:36:13.423211  543705 net.go:698] Add success.
I0322 05:36:13.467811  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"06feaa07-0f51-4abe-9e3b-1a8ae4ae5999","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:36:13.467842  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:36:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:36:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:36:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 05:36:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:36:14.456646  543705 disk_worker.go:494] system disk:vda1
I0322 05:36:14.456675  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:36:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:36:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:36:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:36:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:36:23.409763  543705 memory.go:184] no items to output this cycle
I0322 05:36:23.409796  543705 cpu.go:275] no items to output this cycle
I0322 05:36:33.297673  543705 disk_info.go:125] begin check local disk info of client
I0322 05:36:33.300140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:36:33.300146  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482cc0 0xc000482d00]
E0322 05:36:33.407499  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:36:33.407513  543705 memory.go:184] no items to output this cycle
I0322 05:36:33.407547  543705 cpu.go:275] no items to output this cycle
I0322 05:36:39.516339  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:36:39.516353  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:36:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:36:43.410724  543705 memory.go:191] Add success.
I0322 05:36:43.409864  543705 cpu.go:282] Add success.
I0322 05:36:43.420540  543705 net.go:648] Add success.
I0322 05:36:43.423382  543705 net.go:770] primary dev: ETH0
I0322 05:36:43.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:36:43.423429  543705 net.go:698] Add success.
I0322 05:36:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:36:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:36:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:36:53.409765  543705 memory.go:184] no items to output this cycle
I0322 05:36:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 05:37:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:37:03.409775  543705 memory.go:184] no items to output this cycle
I0322 05:37:03.409808  543705 cpu.go:275] no items to output this cycle
W0322 05:37:13.409702  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:37:13.409718  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:37:13.409722  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:37:13.409786  543705 cpu.go:282] Add success.
E0322 05:37:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:37:13.409826  543705 memory.go:191] Add success.
I0322 05:37:13.420338  543705 net.go:648] Add success.
I0322 05:37:13.422787  543705 net.go:770] primary dev: ETH0
I0322 05:37:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:37:13.422811  543705 net.go:698] Add success.
I0322 05:37:13.452783  543705 event_worker.go:152] Polling the log file for events...
W0322 05:37:14.455087  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:37:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0322 05:37:14.455153  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:37:14.456938  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:37:14.456947  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:37:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:37:14.457000  543705 disk_worker.go:494] system disk:vda1
I0322 05:37:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:37:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:37:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:37:16.457885  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:37:16.457885  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:37:16.457939  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:37:16.457958  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:37:16.472274  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:37:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:37:23.409790  543705 memory.go:184] no items to output this cycle
I0322 05:37:23.409803  543705 cpu.go:275] no items to output this cycle
I0322 05:37:33.301687  543705 disk_info.go:125] begin check local disk info of client
I0322 05:37:33.304205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:37:33.304211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4740 0xc0000c4780]
E0322 05:37:33.407506  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:37:33.407519  543705 memory.go:184] no items to output this cycle
I0322 05:37:33.407526  543705 cpu.go:275] no items to output this cycle
E0322 05:37:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:37:43.409785  543705 memory.go:191] Add success.
I0322 05:37:43.409806  543705 cpu.go:282] Add success.
I0322 05:37:43.419890  543705 net.go:648] Add success.
I0322 05:37:43.422779  543705 net.go:770] primary dev: ETH0
I0322 05:37:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:37:43.422819  543705 net.go:698] Add success.
I0322 05:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:37:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:37:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:37:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:37:53.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:37:53.409812  543705 memory.go:184] no items to output this cycle
E0322 05:38:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:38:03.409804  543705 memory.go:184] no items to output this cycle
I0322 05:38:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 05:38:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:38:13.409782  543705 memory.go:191] Add success.
I0322 05:38:13.409807  543705 cpu.go:282] Add success.
W0322 05:38:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:38:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:38:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:38:13.420289  543705 net.go:648] Add success.
I0322 05:38:13.423020  543705 net.go:770] primary dev: ETH0
I0322 05:38:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:38:13.423045  543705 net.go:698] Add success.
I0322 05:38:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:38:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:38:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 05:38:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:38:14.459175  543705 disk_worker.go:494] system disk:vda1
I0322 05:38:14.459203  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:38:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:38:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:38:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:38:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:38:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:38:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:38:23.409794  543705 memory.go:184] no items to output this cycle
I0322 05:38:23.409806  543705 cpu.go:275] no items to output this cycle
I0322 05:38:33.305677  543705 disk_info.go:125] begin check local disk info of client
I0322 05:38:33.308157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:38:33.308164  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5780 0xc0000c57c0]
E0322 05:38:33.407507  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:38:33.407522  543705 memory.go:184] no items to output this cycle
I0322 05:38:33.407550  543705 cpu.go:275] no items to output this cycle
E0322 05:38:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:38:43.409796  543705 memory.go:191] Add success.
I0322 05:38:43.409801  543705 cpu.go:282] Add success.
I0322 05:38:43.419852  543705 net.go:648] Add success.
I0322 05:38:43.422494  543705 net.go:770] primary dev: ETH0
I0322 05:38:43.422506  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:38:43.422519  543705 net.go:698] Add success.
I0322 05:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:38:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:38:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:38:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:38:53.409797  543705 memory.go:184] no items to output this cycle
I0322 05:38:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 05:39:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:39:03.409783  543705 memory.go:184] no items to output this cycle
I0322 05:39:03.409793  543705 cpu.go:275] no items to output this cycle
W0322 05:39:13.409702  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:39:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:39:13.409721  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 05:39:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:39:13.409807  543705 memory.go:191] Add success.
I0322 05:39:13.409813  543705 cpu.go:282] Add success.
I0322 05:39:13.420082  543705 net.go:648] Add success.
I0322 05:39:13.422829  543705 net.go:770] primary dev: ETH0
I0322 05:39:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:39:13.422854  543705 net.go:698] Add success.
I0322 05:39:13.671381  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e68353cd-ada7-4573-8d90-cb7b60147e94","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:39:13.671419  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:39:14.453978  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:39:14.454235  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:39:14.454246  543705 disk_worker.go:708] disk space is not compliant
W0322 05:39:14.454248  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:39:14.455939  543705 disk_worker.go:494] system disk:vda1
I0322 05:39:14.455973  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:39:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:39:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:39:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:39:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:39:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:39:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:39:23.409760  543705 memory.go:184] no items to output this cycle
I0322 05:39:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 05:39:33.309678  543705 disk_info.go:125] begin check local disk info of client
I0322 05:39:33.312237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:39:33.312245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003adc00 0xc0003adc40]
E0322 05:39:33.407862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:39:33.407878  543705 memory.go:184] no items to output this cycle
I0322 05:39:33.407896  543705 cpu.go:275] no items to output this cycle
I0322 05:39:39.517330  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:39:39.517337  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:39:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:39:43.410693  543705 memory.go:191] Add success.
I0322 05:39:43.409802  543705 cpu.go:282] Add success.
I0322 05:39:43.420389  543705 net.go:648] Add success.
I0322 05:39:43.423187  543705 net.go:770] primary dev: ETH0
I0322 05:39:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:39:43.423213  543705 net.go:698] Add success.
I0322 05:39:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:39:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:39:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:39:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:39:53.409780  543705 memory.go:184] no items to output this cycle
I0322 05:39:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 05:40:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:40:03.409770  543705 memory.go:184] no items to output this cycle
I0322 05:40:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 05:40:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:40:13.409805  543705 memory.go:191] Add success.
I0322 05:40:13.409817  543705 cpu.go:282] Add success.
W0322 05:40:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:40:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:40:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:40:13.420064  543705 net.go:648] Add success.
I0322 05:40:13.422934  543705 net.go:770] primary dev: ETH0
I0322 05:40:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:40:13.422959  543705 net.go:698] Add success.
I0322 05:40:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:40:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:40:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 05:40:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:40:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 05:40:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:40:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:40:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:40:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:40:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:40:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:40:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:40:23.409774  543705 memory.go:184] no items to output this cycle
I0322 05:40:23.409777  543705 cpu.go:275] no items to output this cycle
I0322 05:40:33.313673  543705 disk_info.go:125] begin check local disk info of client
I0322 05:40:33.316309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:40:33.316315  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc180 0xc0004dc1c0]
E0322 05:40:33.407500  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:40:33.407516  543705 memory.go:184] no items to output this cycle
I0322 05:40:33.407541  543705 cpu.go:275] no items to output this cycle
E0322 05:40:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:40:43.409789  543705 memory.go:191] Add success.
I0322 05:40:43.409809  543705 cpu.go:282] Add success.
I0322 05:40:43.419846  543705 net.go:648] Add success.
I0322 05:40:43.422532  543705 net.go:770] primary dev: ETH0
I0322 05:40:43.422546  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:40:43.422560  543705 net.go:698] Add success.
I0322 05:40:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:40:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:40:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:40:53.409774  543705 memory.go:184] no items to output this cycle
I0322 05:40:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 05:41:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:41:03.409773  543705 memory.go:184] no items to output this cycle
I0322 05:41:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 05:41:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:41:13.409788  543705 memory.go:191] Add success.
I0322 05:41:13.409788  543705 cpu.go:282] Add success.
W0322 05:41:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:41:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:41:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:41:13.420040  543705 net.go:648] Add success.
I0322 05:41:13.422935  543705 net.go:770] primary dev: ETH0
I0322 05:41:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:41:13.422961  543705 net.go:698] Add success.
I0322 05:41:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:41:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:41:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 05:41:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:41:14.456494  543705 disk_worker.go:494] system disk:vda1
I0322 05:41:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:41:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:41:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:41:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:41:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:41:23.409792  543705 memory.go:184] no items to output this cycle
I0322 05:41:23.409801  543705 cpu.go:275] no items to output this cycle
I0322 05:41:33.317674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:41:33.320147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:41:33.320153  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037c500 0xc00037c540]
E0322 05:41:33.407504  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:41:33.407518  543705 memory.go:184] no items to output this cycle
I0322 05:41:33.407547  543705 cpu.go:275] no items to output this cycle
E0322 05:41:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:41:43.409799  543705 memory.go:191] Add success.
I0322 05:41:43.409803  543705 cpu.go:282] Add success.
I0322 05:41:43.419880  543705 net.go:648] Add success.
I0322 05:41:43.422598  543705 net.go:770] primary dev: ETH0
I0322 05:41:43.422610  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:41:43.422623  543705 net.go:698] Add success.
I0322 05:41:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:41:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:41:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:41:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:41:53.409790  543705 memory.go:184] no items to output this cycle
I0322 05:41:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 05:42:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:42:03.409789  543705 memory.go:184] no items to output this cycle
I0322 05:42:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 05:42:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:42:13.409792  543705 memory.go:191] Add success.
I0322 05:42:13.409792  543705 cpu.go:282] Add success.
W0322 05:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:42:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:42:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:42:13.420182  543705 net.go:648] Add success.
I0322 05:42:13.422747  543705 net.go:770] primary dev: ETH0
I0322 05:42:13.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:42:13.422773  543705 net.go:698] Add success.
I0322 05:42:13.464289  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e43cafdd-2660-4b06-ab1f-c105c8f88a72","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:42:13.464324  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 05:42:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:42:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 05:42:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:42:14.456796  543705 disk_worker.go:494] system disk:vda1
I0322 05:42:14.456844  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:42:14.457122  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:42:14.457130  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:42:14.457134  543705 custom_config.go:64] query custom config with name: gpu
E0322 05:42:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:42:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:42:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:42:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:42:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:42:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:42:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:42:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:42:23.409764  543705 memory.go:184] no items to output this cycle
I0322 05:42:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 05:42:33.321681  543705 disk_info.go:125] begin check local disk info of client
I0322 05:42:33.324090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:42:33.324098  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ddc80 0xc0004ddcc0]
E0322 05:42:33.407502  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:42:33.407516  543705 memory.go:184] no items to output this cycle
I0322 05:42:33.407545  543705 cpu.go:275] no items to output this cycle
I0322 05:42:39.518335  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:42:39.518341  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:42:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:42:43.410700  543705 memory.go:191] Add success.
I0322 05:42:43.409813  543705 cpu.go:282] Add success.
I0322 05:42:43.420412  543705 net.go:648] Add success.
I0322 05:42:43.422961  543705 net.go:770] primary dev: ETH0
I0322 05:42:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:42:43.422986  543705 net.go:698] Add success.
I0322 05:42:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:42:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:42:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:42:53.409784  543705 cpu.go:275] no items to output this cycle
I0322 05:42:53.409789  543705 memory.go:184] no items to output this cycle
E0322 05:43:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:43:03.409783  543705 memory.go:184] no items to output this cycle
I0322 05:43:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 05:43:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:43:13.409790  543705 memory.go:191] Add success.
I0322 05:43:13.409791  543705 cpu.go:282] Add success.
W0322 05:43:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:43:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:43:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:43:13.420095  543705 net.go:648] Add success.
I0322 05:43:13.423323  543705 net.go:770] primary dev: ETH0
I0322 05:43:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:43:13.423351  543705 net.go:698] Add success.
I0322 05:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:43:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:43:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 05:43:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:43:14.459143  543705 disk_worker.go:494] system disk:vda1
I0322 05:43:14.459176  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:43:15.455892  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:43:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:43:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:43:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:43:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:43:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:43:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 05:43:23.409796  543705 memory.go:184] no items to output this cycle
I0322 05:43:33.325675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:43:33.328124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:43:33.328130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc900 0xc0004dc940]
E0322 05:43:33.407498  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:43:33.407510  543705 memory.go:184] no items to output this cycle
I0322 05:43:33.407536  543705 cpu.go:275] no items to output this cycle
E0322 05:43:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:43:43.409814  543705 memory.go:191] Add success.
I0322 05:43:43.409822  543705 cpu.go:282] Add success.
I0322 05:43:43.419956  543705 net.go:648] Add success.
I0322 05:43:43.422778  543705 net.go:770] primary dev: ETH0
I0322 05:43:43.422791  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:43:43.422803  543705 net.go:698] Add success.
I0322 05:43:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:43:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:43:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:43:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:43:53.409772  543705 memory.go:184] no items to output this cycle
I0322 05:43:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 05:44:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:44:03.409774  543705 memory.go:184] no items to output this cycle
I0322 05:44:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 05:44:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:44:13.409838  543705 memory.go:191] Add success.
I0322 05:44:13.409859  543705 cpu.go:282] Add success.
W0322 05:44:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:44:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:44:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:44:13.420162  543705 net.go:648] Add success.
I0322 05:44:13.422928  543705 net.go:770] primary dev: ETH0
I0322 05:44:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:44:13.422967  543705 net.go:698] Add success.
I0322 05:44:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:44:14.455380  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:44:14.455485  543705 disk_worker.go:708] disk space is not compliant
W0322 05:44:14.455490  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:44:14.457512  543705 disk_worker.go:494] system disk:vda1
I0322 05:44:14.457551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:44:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:44:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:44:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:44:23.409773  543705 memory.go:184] no items to output this cycle
I0322 05:44:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 05:44:33.329684  543705 disk_info.go:125] begin check local disk info of client
I0322 05:44:33.332210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:44:33.332217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc240 0xc0004dc280]
E0322 05:44:33.407535  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:44:33.407552  543705 memory.go:184] no items to output this cycle
I0322 05:44:33.407568  543705 cpu.go:275] no items to output this cycle
E0322 05:44:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:44:43.409812  543705 memory.go:191] Add success.
I0322 05:44:43.409815  543705 cpu.go:282] Add success.
I0322 05:44:43.419851  543705 net.go:648] Add success.
I0322 05:44:43.422422  543705 net.go:770] primary dev: ETH0
I0322 05:44:43.422435  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:44:43.422448  543705 net.go:698] Add success.
I0322 05:44:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:44:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:44:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:44:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:44:53.409764  543705 memory.go:184] no items to output this cycle
I0322 05:44:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 05:45:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:45:03.409804  543705 memory.go:184] no items to output this cycle
I0322 05:45:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 05:45:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:45:13.409791  543705 memory.go:191] Add success.
I0322 05:45:13.409794  543705 cpu.go:282] Add success.
W0322 05:45:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:45:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:45:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:45:13.419974  543705 net.go:770] primary dev: ETH0
I0322 05:45:13.420016  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:45:13.420039  543705 net.go:698] Add success.
I0322 05:45:13.420592  543705 net.go:648] Add success.
I0322 05:45:13.743874  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d134b094-b899-4546-bcbe-1bae0f944d34","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:45:13.743914  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:45:14.454492  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:45:14.454646  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:45:14.454720  543705 disk_worker.go:708] disk space is not compliant
W0322 05:45:14.454723  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:45:14.456117  543705 disk_worker.go:494] system disk:vda1
I0322 05:45:14.456145  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:45:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:45:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:45:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:45:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:45:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:45:23.410337  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:45:23.410351  543705 memory.go:184] no items to output this cycle
I0322 05:45:23.410357  543705 cpu.go:275] no items to output this cycle
I0322 05:45:33.333676  543705 disk_info.go:125] begin check local disk info of client
I0322 05:45:33.336171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:45:33.336178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc940 0xc0004dc980]
E0322 05:45:33.407495  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:45:33.407508  543705 memory.go:184] no items to output this cycle
I0322 05:45:33.407550  543705 cpu.go:275] no items to output this cycle
I0322 05:45:39.519346  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:45:39.519353  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:45:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:45:43.410658  543705 memory.go:191] Add success.
I0322 05:45:43.409798  543705 cpu.go:282] Add success.
I0322 05:45:43.420538  543705 net.go:648] Add success.
I0322 05:45:43.423661  543705 net.go:770] primary dev: ETH0
I0322 05:45:43.423675  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:45:43.423687  543705 net.go:698] Add success.
I0322 05:45:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:45:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:45:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:45:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:45:53.409780  543705 memory.go:184] no items to output this cycle
I0322 05:45:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 05:46:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:46:03.409782  543705 memory.go:184] no items to output this cycle
I0322 05:46:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 05:46:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:46:13.409818  543705 memory.go:191] Add success.
I0322 05:46:13.409820  543705 cpu.go:282] Add success.
W0322 05:46:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:46:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:46:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:46:13.420161  543705 net.go:648] Add success.
I0322 05:46:13.422658  543705 net.go:770] primary dev: ETH0
I0322 05:46:13.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:46:13.422686  543705 net.go:698] Add success.
I0322 05:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:46:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:46:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 05:46:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:46:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 05:46:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:46:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:46:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:46:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:46:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:46:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:46:23.409768  543705 memory.go:184] no items to output this cycle
I0322 05:46:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 05:46:33.337674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:46:33.340170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:46:33.340176  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344740 0xc000344780]
E0322 05:46:33.407665  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:46:33.407682  543705 memory.go:184] no items to output this cycle
I0322 05:46:33.407699  543705 cpu.go:275] no items to output this cycle
E0322 05:46:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:46:43.409792  543705 memory.go:191] Add success.
I0322 05:46:43.409792  543705 cpu.go:282] Add success.
I0322 05:46:43.419869  543705 net.go:648] Add success.
I0322 05:46:43.422587  543705 net.go:770] primary dev: ETH0
I0322 05:46:43.422600  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:46:43.422613  543705 net.go:698] Add success.
I0322 05:46:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:46:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:46:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:46:53.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:46:53.409911  543705 memory.go:184] no items to output this cycle
I0322 05:46:53.409922  543705 cpu.go:275] no items to output this cycle
E0322 05:47:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:47:03.409818  543705 memory.go:184] no items to output this cycle
I0322 05:47:03.409828  543705 cpu.go:275] no items to output this cycle
E0322 05:47:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:47:13.409793  543705 memory.go:191] Add success.
I0322 05:47:13.409796  543705 cpu.go:282] Add success.
W0322 05:47:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:47:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:47:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:47:13.420042  543705 net.go:648] Add success.
I0322 05:47:13.422855  543705 net.go:770] primary dev: ETH0
I0322 05:47:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:47:13.422879  543705 net.go:698] Add success.
I0322 05:47:13.453428  543705 event_worker.go:152] Polling the log file for events...
W0322 05:47:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:47:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 05:47:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:47:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:47:14.456928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:47:14.456935  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:47:14.456981  543705 disk_worker.go:494] system disk:vda1
I0322 05:47:14.457024  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:47:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:47:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:47:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:47:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:47:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:47:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:47:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:47:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:47:23.409774  543705 memory.go:184] no items to output this cycle
I0322 05:47:23.409783  543705 cpu.go:275] no items to output this cycle
I0322 05:47:33.341674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:47:33.344261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:47:33.344268  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465dc0 0xc000465e00]
E0322 05:47:33.407722  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:47:33.407733  543705 cpu.go:275] no items to output this cycle
I0322 05:47:33.407736  543705 memory.go:184] no items to output this cycle
E0322 05:47:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:47:43.409788  543705 memory.go:191] Add success.
I0322 05:47:43.409788  543705 cpu.go:282] Add success.
I0322 05:47:43.419844  543705 net.go:648] Add success.
I0322 05:47:43.422604  543705 net.go:770] primary dev: ETH0
I0322 05:47:43.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:47:43.422629  543705 net.go:698] Add success.
I0322 05:47:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:47:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:47:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:47:53.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:47:53.409904  543705 memory.go:184] no items to output this cycle
I0322 05:47:53.409923  543705 cpu.go:275] no items to output this cycle
E0322 05:48:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:48:03.409783  543705 memory.go:184] no items to output this cycle
I0322 05:48:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 05:48:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:48:13.409782  543705 memory.go:191] Add success.
I0322 05:48:13.409785  543705 cpu.go:282] Add success.
W0322 05:48:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:48:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:48:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:48:13.420055  543705 net.go:648] Add success.
I0322 05:48:13.422652  543705 net.go:770] primary dev: ETH0
I0322 05:48:13.422667  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:48:13.422680  543705 net.go:698] Add success.
I0322 05:48:13.478756  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9a17cff0-bfb9-4c6b-b3ed-2a246855086a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:48:13.478793  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:48:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:48:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:48:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 05:48:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:48:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 05:48:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:48:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:48:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:48:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:48:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:48:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:48:23.409773  543705 cpu.go:275] no items to output this cycle
I0322 05:48:23.409777  543705 memory.go:184] no items to output this cycle
I0322 05:48:33.345674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:48:33.348233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:48:33.348240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bef00 0xc0002bef40]
E0322 05:48:33.407685  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:48:33.407701  543705 memory.go:184] no items to output this cycle
I0322 05:48:33.407706  543705 cpu.go:275] no items to output this cycle
I0322 05:48:39.520348  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:48:39.520353  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:48:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:48:43.410841  543705 memory.go:191] Add success.
I0322 05:48:43.409782  543705 cpu.go:282] Add success.
I0322 05:48:43.420565  543705 net.go:648] Add success.
I0322 05:48:43.423579  543705 net.go:770] primary dev: ETH0
I0322 05:48:43.423592  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:48:43.423604  543705 net.go:698] Add success.
I0322 05:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:48:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:48:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:48:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:48:53.409780  543705 memory.go:184] no items to output this cycle
I0322 05:48:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 05:49:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:49:03.409783  543705 memory.go:184] no items to output this cycle
I0322 05:49:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 05:49:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:49:13.409808  543705 memory.go:191] Add success.
I0322 05:49:13.409820  543705 cpu.go:282] Add success.
W0322 05:49:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:49:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:49:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:49:13.420215  543705 net.go:648] Add success.
I0322 05:49:13.423222  543705 net.go:770] primary dev: ETH0
I0322 05:49:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:49:13.423249  543705 net.go:698] Add success.
I0322 05:49:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:49:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:49:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 05:49:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:49:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 05:49:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:49:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:49:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:49:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:49:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:49:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:49:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:49:23.409776  543705 memory.go:184] no items to output this cycle
I0322 05:49:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 05:49:33.349677  543705 disk_info.go:125] begin check local disk info of client
I0322 05:49:33.352177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:49:33.352184  543705 disk_info.go:196] parse disk info done, disk is : [0xc000341d00 0xc000341d40]
E0322 05:49:33.407619  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:49:33.407636  543705 memory.go:184] no items to output this cycle
I0322 05:49:33.407652  543705 cpu.go:275] no items to output this cycle
E0322 05:49:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:49:43.409781  543705 memory.go:191] Add success.
I0322 05:49:43.409799  543705 cpu.go:282] Add success.
I0322 05:49:43.420242  543705 net.go:648] Add success.
I0322 05:49:43.422969  543705 net.go:770] primary dev: ETH0
I0322 05:49:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:49:43.422994  543705 net.go:698] Add success.
I0322 05:49:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:49:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:49:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:49:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:49:53.409780  543705 memory.go:184] no items to output this cycle
I0322 05:49:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 05:50:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:50:03.409785  543705 cpu.go:275] no items to output this cycle
I0322 05:50:03.409796  543705 memory.go:184] no items to output this cycle
E0322 05:50:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:50:13.409788  543705 memory.go:191] Add success.
I0322 05:50:13.409797  543705 cpu.go:282] Add success.
W0322 05:50:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:50:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:50:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:50:13.420070  543705 net.go:648] Add success.
I0322 05:50:13.422954  543705 net.go:770] primary dev: ETH0
I0322 05:50:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:50:13.422979  543705 net.go:698] Add success.
I0322 05:50:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:50:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:50:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 05:50:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:50:14.456585  543705 disk_worker.go:494] system disk:vda1
I0322 05:50:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:50:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:50:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:50:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:50:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:50:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:50:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:50:23.409794  543705 memory.go:184] no items to output this cycle
I0322 05:50:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:50:33.353683  543705 disk_info.go:125] begin check local disk info of client
I0322 05:50:33.356189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:50:33.356196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fcdc0 0xc0001fce00]
E0322 05:50:33.407514  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:50:33.407528  543705 memory.go:184] no items to output this cycle
I0322 05:50:33.407539  543705 cpu.go:275] no items to output this cycle
E0322 05:50:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:50:43.409784  543705 memory.go:191] Add success.
I0322 05:50:43.409810  543705 cpu.go:282] Add success.
I0322 05:50:43.419983  543705 net.go:648] Add success.
I0322 05:50:43.422468  543705 net.go:770] primary dev: ETH0
I0322 05:50:43.422482  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:50:43.422494  543705 net.go:698] Add success.
I0322 05:50:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:50:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:50:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:50:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:50:53.409802  543705 memory.go:184] no items to output this cycle
I0322 05:50:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 05:51:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:51:03.409815  543705 memory.go:184] no items to output this cycle
I0322 05:51:03.409830  543705 cpu.go:275] no items to output this cycle
E0322 05:51:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:51:13.409793  543705 memory.go:191] Add success.
I0322 05:51:13.409810  543705 cpu.go:282] Add success.
W0322 05:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:51:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:51:13.420114  543705 net.go:648] Add success.
I0322 05:51:13.422711  543705 net.go:770] primary dev: ETH0
I0322 05:51:13.422723  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:51:13.422735  543705 net.go:698] Add success.
I0322 05:51:13.468616  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e0d613d-45ab-4480-a2aa-83f4331661bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:51:13.468649  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:51:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:51:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0322 05:51:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:51:14.457041  543705 disk_worker.go:494] system disk:vda1
I0322 05:51:14.457071  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:51:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:51:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:51:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:51:23.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:51:23.410388  543705 memory.go:184] no items to output this cycle
I0322 05:51:23.410389  543705 cpu.go:275] no items to output this cycle
I0322 05:51:33.357675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:51:33.360183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:51:33.360190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc9c0 0xc0004dca00]
E0322 05:51:33.407516  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:51:33.407532  543705 memory.go:184] no items to output this cycle
I0322 05:51:33.407552  543705 cpu.go:275] no items to output this cycle
I0322 05:51:39.521344  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:51:39.521350  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:51:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:51:43.410727  543705 memory.go:191] Add success.
I0322 05:51:43.409807  543705 cpu.go:282] Add success.
I0322 05:51:43.420447  543705 net.go:648] Add success.
I0322 05:51:43.423381  543705 net.go:770] primary dev: ETH0
I0322 05:51:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:51:43.423410  543705 net.go:698] Add success.
I0322 05:51:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:51:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:51:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:51:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:51:53.409782  543705 memory.go:184] no items to output this cycle
I0322 05:51:53.409828  543705 cpu.go:275] no items to output this cycle
E0322 05:52:03.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:52:03.409824  543705 memory.go:184] no items to output this cycle
I0322 05:52:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 05:52:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:52:13.409783  543705 memory.go:191] Add success.
W0322 05:52:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 05:52:13.409809  543705 cpu.go:282] Add success.
W0322 05:52:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:52:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:52:13.420377  543705 net.go:648] Add success.
I0322 05:52:13.422896  543705 net.go:770] primary dev: ETH0
I0322 05:52:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:52:13.422925  543705 net.go:698] Add success.
W0322 05:52:14.455087  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:52:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0322 05:52:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0322 05:52:14.456869  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:52:14.456878  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:52:14.456884  543705 custom_config.go:64] query custom config with name: gpu
I0322 05:52:14.456956  543705 disk_worker.go:494] system disk:vda1
I0322 05:52:14.456995  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:52:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:52:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:52:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:52:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:52:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:52:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:52:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:52:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:52:23.409771  543705 memory.go:184] no items to output this cycle
I0322 05:52:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 05:52:33.361681  543705 disk_info.go:125] begin check local disk info of client
I0322 05:52:33.364215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:52:33.364222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dc440 0xc0004dc480]
E0322 05:52:33.407602  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:52:33.407619  543705 memory.go:184] no items to output this cycle
I0322 05:52:33.407635  543705 cpu.go:275] no items to output this cycle
E0322 05:52:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:52:43.409816  543705 memory.go:191] Add success.
I0322 05:52:43.409825  543705 cpu.go:282] Add success.
I0322 05:52:43.419901  543705 net.go:648] Add success.
I0322 05:52:43.422678  543705 net.go:770] primary dev: ETH0
I0322 05:52:43.422692  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:52:43.422705  543705 net.go:698] Add success.
I0322 05:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:52:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:52:53.410272  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:52:53.410293  543705 memory.go:184] no items to output this cycle
I0322 05:52:53.410307  543705 cpu.go:275] no items to output this cycle
E0322 05:53:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:53:03.409824  543705 memory.go:184] no items to output this cycle
I0322 05:53:03.409840  543705 cpu.go:275] no items to output this cycle
E0322 05:53:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:53:13.409782  543705 memory.go:191] Add success.
I0322 05:53:13.409814  543705 cpu.go:282] Add success.
W0322 05:53:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:53:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:53:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:53:13.420117  543705 net.go:648] Add success.
I0322 05:53:13.422926  543705 net.go:770] primary dev: ETH0
I0322 05:53:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:53:13.422955  543705 net.go:698] Add success.
I0322 05:53:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:53:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:53:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 05:53:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:53:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 05:53:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:53:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:53:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:53:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:53:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:53:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:53:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:53:23.409773  543705 memory.go:184] no items to output this cycle
I0322 05:53:23.409822  543705 cpu.go:275] no items to output this cycle
I0322 05:53:33.365674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:53:33.368199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:53:33.368205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464b00 0xc000464b40]
E0322 05:53:33.407513  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:53:33.407524  543705 memory.go:184] no items to output this cycle
I0322 05:53:33.407531  543705 cpu.go:275] no items to output this cycle
E0322 05:53:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:53:43.409783  543705 memory.go:191] Add success.
I0322 05:53:43.409787  543705 cpu.go:282] Add success.
I0322 05:53:43.420082  543705 net.go:648] Add success.
I0322 05:53:43.422638  543705 net.go:770] primary dev: ETH0
I0322 05:53:43.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:53:43.422667  543705 net.go:698] Add success.
I0322 05:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:53:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:53:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:53:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:53:53.409786  543705 memory.go:184] no items to output this cycle
I0322 05:53:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 05:54:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:54:03.409786  543705 memory.go:184] no items to output this cycle
I0322 05:54:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 05:54:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:54:13.409786  543705 memory.go:191] Add success.
I0322 05:54:13.409786  543705 cpu.go:282] Add success.
W0322 05:54:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:54:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:54:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:54:13.420095  543705 net.go:648] Add success.
I0322 05:54:13.422893  543705 net.go:770] primary dev: ETH0
I0322 05:54:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:54:13.422917  543705 net.go:698] Add success.
I0322 05:54:13.748874  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ed15f6fa-0886-4dba-aa48-8191dc995e5b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:54:13.748917  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 05:54:14.454468  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:54:14.454612  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:54:14.454675  543705 disk_worker.go:708] disk space is not compliant
W0322 05:54:14.454677  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:54:14.456023  543705 disk_worker.go:494] system disk:vda1
I0322 05:54:14.456067  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:54:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:54:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:54:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:54:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:54:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:54:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:54:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 05:54:23.409784  543705 memory.go:184] no items to output this cycle
I0322 05:54:33.369679  543705 disk_info.go:125] begin check local disk info of client
I0322 05:54:33.372194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:54:33.372200  543705 disk_info.go:196] parse disk info done, disk is : [0xc000279600 0xc000279640]
E0322 05:54:33.407534  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:54:33.407549  543705 memory.go:184] no items to output this cycle
I0322 05:54:33.407564  543705 cpu.go:275] no items to output this cycle
I0322 05:54:39.522351  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:54:39.522357  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:54:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:54:43.409800  543705 cpu.go:282] Add success.
I0322 05:54:43.410796  543705 memory.go:191] Add success.
I0322 05:54:43.419712  543705 net.go:648] Add success.
I0322 05:54:43.422108  543705 net.go:770] primary dev: ETH0
I0322 05:54:43.422121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:54:43.422132  543705 net.go:698] Add success.
I0322 05:54:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:54:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:54:53.409772  543705 memory.go:184] no items to output this cycle
I0322 05:54:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 05:55:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:55:03.409814  543705 memory.go:184] no items to output this cycle
I0322 05:55:03.409823  543705 cpu.go:275] no items to output this cycle
W0322 05:55:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:55:13.409734  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:55:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:55:13.409783  543705 cpu.go:282] Add success.
E0322 05:55:13.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:55:13.409833  543705 memory.go:191] Add success.
I0322 05:55:13.420075  543705 net.go:648] Add success.
I0322 05:55:13.422417  543705 net.go:770] primary dev: ETH0
I0322 05:55:13.422433  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:55:13.422446  543705 net.go:698] Add success.
I0322 05:55:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:55:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:55:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 05:55:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:55:14.456555  543705 disk_worker.go:494] system disk:vda1
I0322 05:55:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:55:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:55:16.458027  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:55:16.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:55:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:55:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:55:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:55:23.409786  543705 memory.go:184] no items to output this cycle
I0322 05:55:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 05:55:33.373675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:55:33.376194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:55:33.376201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036a640 0xc00036a680]
E0322 05:55:33.407502  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:55:33.407517  543705 memory.go:184] no items to output this cycle
I0322 05:55:33.407539  543705 cpu.go:275] no items to output this cycle
E0322 05:55:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:55:43.409779  543705 memory.go:191] Add success.
I0322 05:55:43.409807  543705 cpu.go:282] Add success.
I0322 05:55:43.420207  543705 net.go:648] Add success.
I0322 05:55:43.422681  543705 net.go:770] primary dev: ETH0
I0322 05:55:43.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:55:43.422710  543705 net.go:698] Add success.
I0322 05:55:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:55:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:55:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:55:53.410259  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:55:53.410285  543705 memory.go:184] no items to output this cycle
I0322 05:55:53.410290  543705 cpu.go:275] no items to output this cycle
E0322 05:56:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:56:03.409803  543705 memory.go:184] no items to output this cycle
I0322 05:56:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 05:56:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:56:13.409788  543705 memory.go:191] Add success.
I0322 05:56:13.409792  543705 cpu.go:282] Add success.
W0322 05:56:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:56:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:56:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:56:13.420064  543705 net.go:648] Add success.
I0322 05:56:13.422808  543705 net.go:770] primary dev: ETH0
I0322 05:56:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:56:13.422838  543705 net.go:698] Add success.
I0322 05:56:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:56:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:56:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 05:56:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:56:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 05:56:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:56:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:56:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:56:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:56:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:56:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:56:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 05:56:23.409788  543705 memory.go:184] no items to output this cycle
I0322 05:56:33.377673  543705 disk_info.go:125] begin check local disk info of client
I0322 05:56:33.380186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:56:33.380193  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e600 0xc00039e640]
E0322 05:56:33.407472  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:56:33.407491  543705 memory.go:184] no items to output this cycle
I0322 05:56:33.407508  543705 cpu.go:275] no items to output this cycle
E0322 05:56:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:56:43.409813  543705 memory.go:191] Add success.
I0322 05:56:43.409821  543705 cpu.go:282] Add success.
I0322 05:56:43.420154  543705 net.go:648] Add success.
I0322 05:56:43.423159  543705 net.go:770] primary dev: ETH0
I0322 05:56:43.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:56:43.423183  543705 net.go:698] Add success.
I0322 05:56:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:56:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:56:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:56:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:56:53.409800  543705 memory.go:184] no items to output this cycle
I0322 05:56:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 05:57:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:57:03.409789  543705 memory.go:184] no items to output this cycle
I0322 05:57:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 05:57:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:57:13.409782  543705 memory.go:191] Add success.
I0322 05:57:13.409783  543705 cpu.go:282] Add success.
W0322 05:57:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:57:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:57:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:57:13.420084  543705 net.go:648] Add success.
I0322 05:57:13.428427  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 05:57:13.428502  543705 net.go:770] primary dev: ETH0
I0322 05:57:13.428513  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:57:13.428525  543705 net.go:698] Add success.
I0322 05:57:13.453100  543705 event_worker.go:152] Polling the log file for events...
I0322 05:57:13.468264  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"92400cce-9971-4af9-8b89-9621180f7868","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 05:57:13.468298  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 05:57:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:57:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 05:57:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:57:14.456988  543705 disk_worker.go:494] system disk:vda1
I0322 05:57:14.457030  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 05:57:14.457145  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 05:57:14.457151  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 05:57:14.457155  543705 custom_config.go:64] query custom config with name: gpu
E0322 05:57:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 05:57:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:57:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 05:57:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 05:57:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:57:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:57:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:57:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:57:23.409801  543705 memory.go:184] no items to output this cycle
I0322 05:57:23.409815  543705 cpu.go:275] no items to output this cycle
I0322 05:57:33.381674  543705 disk_info.go:125] begin check local disk info of client
I0322 05:57:33.384207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:57:33.384214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004875c0 0xc000487600]
E0322 05:57:33.407466  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:57:33.407477  543705 memory.go:184] no items to output this cycle
I0322 05:57:33.407477  543705 cpu.go:275] no items to output this cycle
I0322 05:57:39.523365  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 05:57:39.523371  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 05:57:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:57:43.410870  543705 memory.go:191] Add success.
I0322 05:57:43.409796  543705 cpu.go:282] Add success.
I0322 05:57:43.419725  543705 net.go:648] Add success.
I0322 05:57:43.422509  543705 net.go:770] primary dev: ETH0
I0322 05:57:43.422522  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:57:43.422534  543705 net.go:698] Add success.
I0322 05:57:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:57:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:57:46.458106  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:57:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:57:53.409781  543705 memory.go:184] no items to output this cycle
I0322 05:57:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 05:58:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:58:03.409805  543705 memory.go:184] no items to output this cycle
I0322 05:58:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 05:58:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:58:13.409780  543705 memory.go:191] Add success.
W0322 05:58:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 05:58:13.409811  543705 cpu.go:282] Add success.
W0322 05:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:58:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:58:13.420161  543705 net.go:648] Add success.
I0322 05:58:13.422762  543705 net.go:770] primary dev: ETH0
I0322 05:58:13.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:58:13.422787  543705 net.go:698] Add success.
I0322 05:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:58:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:58:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 05:58:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:58:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 05:58:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:58:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:58:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:58:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:58:23.409770  543705 memory.go:184] no items to output this cycle
I0322 05:58:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 05:58:33.385672  543705 disk_info.go:125] begin check local disk info of client
I0322 05:58:33.388198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:58:33.388205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385080 0xc0003850c0]
E0322 05:58:33.407452  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:58:33.407467  543705 memory.go:184] no items to output this cycle
I0322 05:58:33.407477  543705 cpu.go:275] no items to output this cycle
E0322 05:58:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:58:43.409883  543705 memory.go:191] Add success.
I0322 05:58:43.409919  543705 cpu.go:282] Add success.
I0322 05:58:43.419722  543705 net.go:648] Add success.
I0322 05:58:43.422372  543705 net.go:770] primary dev: ETH0
I0322 05:58:43.422386  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:58:43.422397  543705 net.go:698] Add success.
I0322 05:58:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:58:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:58:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:58:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:58:53.409802  543705 memory.go:184] no items to output this cycle
I0322 05:58:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 05:59:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:59:03.409780  543705 memory.go:184] no items to output this cycle
I0322 05:59:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 05:59:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:59:13.409807  543705 memory.go:191] Add success.
I0322 05:59:13.409808  543705 cpu.go:282] Add success.
W0322 05:59:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 05:59:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 05:59:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 05:59:13.420155  543705 net.go:648] Add success.
I0322 05:59:13.423471  543705 net.go:770] primary dev: ETH0
I0322 05:59:13.423487  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:59:13.423500  543705 net.go:698] Add success.
I0322 05:59:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 05:59:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 05:59:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 05:59:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 05:59:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 05:59:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 05:59:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 05:59:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:59:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:59:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 05:59:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 05:59:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:59:23.409780  543705 memory.go:184] no items to output this cycle
I0322 05:59:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 05:59:33.389675  543705 disk_info.go:125] begin check local disk info of client
I0322 05:59:33.392224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 05:59:33.392230  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384200 0xc000384240]
E0322 05:59:33.407456  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:59:33.407471  543705 memory.go:184] no items to output this cycle
I0322 05:59:33.407487  543705 cpu.go:275] no items to output this cycle
E0322 05:59:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:59:43.409826  543705 memory.go:191] Add success.
I0322 05:59:43.409827  543705 cpu.go:282] Add success.
I0322 05:59:43.419943  543705 net.go:648] Add success.
I0322 05:59:43.423010  543705 net.go:770] primary dev: ETH0
I0322 05:59:43.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0322 05:59:43.423046  543705 net.go:698] Add success.
I0322 05:59:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 05:59:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 05:59:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0322 05:59:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 05:59:53.409879  543705 cpu.go:275] no items to output this cycle
I0322 05:59:53.409889  543705 memory.go:184] no items to output this cycle
E0322 06:00:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:00:03.409768  543705 memory.go:184] no items to output this cycle
I0322 06:00:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 06:00:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:00:13.409774  543705 memory.go:191] Add success.
W0322 06:00:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:00:13.409811  543705 cpu.go:282] Add success.
W0322 06:00:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:00:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:00:13.420346  543705 net.go:648] Add success.
I0322 06:00:13.423012  543705 net.go:770] primary dev: ETH0
I0322 06:00:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:00:13.423036  543705 net.go:698] Add success.
I0322 06:00:13.494042  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f5079b74-2dc8-4a8d-b531-04a79288b9e9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:00:13.494085  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:00:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:00:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:00:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 06:00:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:00:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 06:00:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:00:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:00:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:00:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:00:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:00:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:00:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:00:23.409778  543705 memory.go:184] no items to output this cycle
I0322 06:00:23.409804  543705 cpu.go:275] no items to output this cycle
I0322 06:00:33.393674  543705 disk_info.go:125] begin check local disk info of client
I0322 06:00:33.396279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:00:33.396285  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8f00 0xc0004d8f40]
E0322 06:00:33.407472  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:00:33.407484  543705 memory.go:184] no items to output this cycle
I0322 06:00:33.407510  543705 cpu.go:275] no items to output this cycle
I0322 06:00:39.524370  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:00:39.524377  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:00:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:00:43.410694  543705 memory.go:191] Add success.
I0322 06:00:43.409789  543705 cpu.go:282] Add success.
I0322 06:00:43.420374  543705 net.go:648] Add success.
I0322 06:00:43.423588  543705 net.go:770] primary dev: ETH0
I0322 06:00:43.423602  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:00:43.423617  543705 net.go:698] Add success.
I0322 06:00:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:00:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:00:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:00:53.409768  543705 memory.go:184] no items to output this cycle
I0322 06:00:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 06:01:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:01:03.409793  543705 memory.go:184] no items to output this cycle
I0322 06:01:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 06:01:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:01:13.409815  543705 cpu.go:282] Add success.
I0322 06:01:13.409826  543705 memory.go:191] Add success.
W0322 06:01:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:01:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:01:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:01:13.420150  543705 net.go:648] Add success.
I0322 06:01:13.422766  543705 net.go:770] primary dev: ETH0
I0322 06:01:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:01:13.422793  543705 net.go:698] Add success.
I0322 06:01:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:01:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:01:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 06:01:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:01:14.456505  543705 disk_worker.go:494] system disk:vda1
I0322 06:01:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:01:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:01:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:01:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:01:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:01:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:01:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:01:23.409768  543705 memory.go:184] no items to output this cycle
I0322 06:01:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 06:01:33.397675  543705 disk_info.go:125] begin check local disk info of client
I0322 06:01:33.400259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:01:33.400265  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8c00 0xc0004d8c40]
E0322 06:01:33.407451  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:01:33.407465  543705 memory.go:184] no items to output this cycle
I0322 06:01:33.407479  543705 cpu.go:275] no items to output this cycle
E0322 06:01:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:01:43.409792  543705 memory.go:191] Add success.
I0322 06:01:43.409808  543705 cpu.go:282] Add success.
I0322 06:01:43.419852  543705 net.go:648] Add success.
I0322 06:01:43.422489  543705 net.go:770] primary dev: ETH0
I0322 06:01:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:01:43.422520  543705 net.go:698] Add success.
I0322 06:01:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:01:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:01:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:01:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:01:53.409764  543705 memory.go:184] no items to output this cycle
I0322 06:01:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 06:02:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:02:03.409790  543705 memory.go:184] no items to output this cycle
I0322 06:02:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 06:02:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:02:13.409792  543705 memory.go:191] Add success.
I0322 06:02:13.409806  543705 cpu.go:282] Add success.
W0322 06:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:02:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:02:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:02:13.420292  543705 net.go:648] Add success.
I0322 06:02:13.422944  543705 net.go:770] primary dev: ETH0
I0322 06:02:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:02:13.422970  543705 net.go:698] Add success.
W0322 06:02:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:02:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 06:02:14.455221  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:02:14.455978  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:02:14.455988  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:02:14.455995  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:02:14.456607  543705 disk_worker.go:494] system disk:vda1
I0322 06:02:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:02:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:02:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:02:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:02:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:02:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:02:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:02:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:02:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:02:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 06:02:23.409798  543705 memory.go:184] no items to output this cycle
I0322 06:02:33.401675  543705 disk_info.go:125] begin check local disk info of client
I0322 06:02:33.404171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:02:33.404177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3d80 0xc0002a3dc0]
E0322 06:02:33.408427  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:02:33.408444  543705 memory.go:184] no items to output this cycle
I0322 06:02:33.408460  543705 cpu.go:275] no items to output this cycle
E0322 06:02:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:02:43.409785  543705 memory.go:191] Add success.
I0322 06:02:43.409804  543705 cpu.go:282] Add success.
I0322 06:02:43.420036  543705 net.go:648] Add success.
I0322 06:02:43.423012  543705 net.go:770] primary dev: ETH0
I0322 06:02:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:02:43.423037  543705 net.go:698] Add success.
I0322 06:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:02:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:02:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:02:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:02:53.409767  543705 memory.go:184] no items to output this cycle
I0322 06:02:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 06:03:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:03:03.409789  543705 memory.go:184] no items to output this cycle
I0322 06:03:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 06:03:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:03:13.409818  543705 memory.go:191] Add success.
I0322 06:03:13.409825  543705 cpu.go:282] Add success.
W0322 06:03:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:03:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:03:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:03:13.420152  543705 net.go:648] Add success.
I0322 06:03:13.423281  543705 net.go:770] primary dev: ETH0
I0322 06:03:13.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:03:13.423307  543705 net.go:698] Add success.
I0322 06:03:13.620113  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0a95152e-c840-4cb2-a52d-227036948503","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:03:13.620147  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:03:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:03:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:03:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 06:03:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:03:14.456543  543705 disk_worker.go:494] system disk:vda1
I0322 06:03:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:03:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:03:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:03:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:03:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:03:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:03:23.409777  543705 memory.go:184] no items to output this cycle
I0322 06:03:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 06:03:33.405687  543705 disk_info.go:125] begin check local disk info of client
E0322 06:03:33.407957  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:03:33.407975  543705 memory.go:184] no items to output this cycle
I0322 06:03:33.407993  543705 cpu.go:275] no items to output this cycle
I0322 06:03:33.408295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:03:33.408300  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053e600 0xc00053e640]
I0322 06:03:39.525378  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:03:39.525388  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:03:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:03:43.410631  543705 memory.go:191] Add success.
I0322 06:03:43.409815  543705 cpu.go:282] Add success.
I0322 06:03:43.420311  543705 net.go:648] Add success.
I0322 06:03:43.423014  543705 net.go:770] primary dev: ETH0
I0322 06:03:43.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:03:43.423057  543705 net.go:698] Add success.
I0322 06:03:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:03:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:03:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:03:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:03:53.409786  543705 memory.go:184] no items to output this cycle
I0322 06:03:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 06:04:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:04:03.409790  543705 memory.go:184] no items to output this cycle
I0322 06:04:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 06:04:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:04:13.409786  543705 memory.go:191] Add success.
W0322 06:04:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:04:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:04:13.409826  543705 cpu.go:282] Add success.
I0322 06:04:13.420192  543705 net.go:648] Add success.
I0322 06:04:13.423315  543705 net.go:770] primary dev: ETH0
I0322 06:04:13.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:04:13.423341  543705 net.go:698] Add success.
I0322 06:04:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:04:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:04:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 06:04:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:04:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 06:04:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:04:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:04:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:04:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:04:23.409778  543705 memory.go:184] no items to output this cycle
I0322 06:04:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 06:04:33.409686  543705 disk_info.go:125] begin check local disk info of client
E0322 06:04:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:04:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 06:04:33.409792  543705 memory.go:184] no items to output this cycle
I0322 06:04:33.412268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:04:33.412274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464a00 0xc000464a40]
E0322 06:04:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:04:43.409774  543705 memory.go:191] Add success.
I0322 06:04:43.409796  543705 cpu.go:282] Add success.
I0322 06:04:43.420005  543705 net.go:648] Add success.
I0322 06:04:43.422653  543705 net.go:770] primary dev: ETH0
I0322 06:04:43.422668  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:04:43.422683  543705 net.go:698] Add success.
I0322 06:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:04:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:04:53.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:04:53.409855  543705 memory.go:184] no items to output this cycle
I0322 06:04:53.409927  543705 cpu.go:275] no items to output this cycle
E0322 06:05:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:05:03.409796  543705 memory.go:184] no items to output this cycle
I0322 06:05:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 06:05:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:05:13.409792  543705 memory.go:191] Add success.
I0322 06:05:13.409792  543705 cpu.go:282] Add success.
W0322 06:05:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:05:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:05:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:05:13.420136  543705 net.go:648] Add success.
I0322 06:05:13.423116  543705 net.go:770] primary dev: ETH0
I0322 06:05:13.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:05:13.423141  543705 net.go:698] Add success.
I0322 06:05:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:05:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:05:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 06:05:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:05:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 06:05:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:05:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:05:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:05:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:05:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 06:05:23.409777  543705 memory.go:184] no items to output this cycle
E0322 06:05:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:05:33.409775  543705 memory.go:184] no items to output this cycle
I0322 06:05:33.409774  543705 cpu.go:275] no items to output this cycle
I0322 06:05:33.412869  543705 disk_info.go:125] begin check local disk info of client
I0322 06:05:33.415345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:05:33.415366  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381180 0xc0003811c0]
E0322 06:05:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:05:43.409812  543705 memory.go:191] Add success.
I0322 06:05:43.409819  543705 cpu.go:282] Add success.
I0322 06:05:43.419995  543705 net.go:648] Add success.
I0322 06:05:43.422655  543705 net.go:770] primary dev: ETH0
I0322 06:05:43.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:05:43.422684  543705 net.go:698] Add success.
I0322 06:05:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:05:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:05:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:05:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:05:53.409868  543705 memory.go:184] no items to output this cycle
I0322 06:05:53.409945  543705 cpu.go:275] no items to output this cycle
E0322 06:06:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:06:03.409814  543705 memory.go:184] no items to output this cycle
I0322 06:06:03.409836  543705 cpu.go:275] no items to output this cycle
E0322 06:06:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:06:13.409790  543705 memory.go:191] Add success.
W0322 06:06:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:06:13.409822  543705 cpu.go:282] Add success.
W0322 06:06:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:06:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:06:13.420150  543705 net.go:648] Add success.
I0322 06:06:13.422559  543705 net.go:770] primary dev: ETH0
I0322 06:06:13.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:06:13.422585  543705 net.go:698] Add success.
I0322 06:06:13.463196  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"096beb82-832f-4cf4-916c-589d8760c1fd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:06:13.463232  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:06:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:06:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 06:06:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:06:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 06:06:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:06:15.455616  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:06:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:06:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:06:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:06:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:06:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:06:23.409774  543705 memory.go:184] no items to output this cycle
I0322 06:06:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 06:06:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:06:33.409811  543705 memory.go:184] no items to output this cycle
I0322 06:06:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 06:06:33.416021  543705 disk_info.go:125] begin check local disk info of client
I0322 06:06:33.418516  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:06:33.418522  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e4c0 0xc00034e500]
I0322 06:06:39.526376  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:06:39.526382  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:06:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:06:43.410894  543705 memory.go:191] Add success.
I0322 06:06:43.409812  543705 cpu.go:282] Add success.
I0322 06:06:43.420608  543705 net.go:648] Add success.
I0322 06:06:43.423330  543705 net.go:770] primary dev: ETH0
I0322 06:06:43.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:06:43.423354  543705 net.go:698] Add success.
I0322 06:06:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:06:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:06:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:06:53.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:06:53.409863  543705 memory.go:184] no items to output this cycle
I0322 06:06:53.409898  543705 cpu.go:275] no items to output this cycle
E0322 06:07:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:07:03.409792  543705 memory.go:184] no items to output this cycle
I0322 06:07:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 06:07:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:07:13.409786  543705 memory.go:191] Add success.
I0322 06:07:13.409805  543705 cpu.go:282] Add success.
W0322 06:07:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:07:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:07:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:07:13.420145  543705 net.go:648] Add success.
I0322 06:07:13.422969  543705 net.go:770] primary dev: ETH0
I0322 06:07:13.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:07:13.422999  543705 net.go:698] Add success.
I0322 06:07:13.453600  543705 event_worker.go:152] Polling the log file for events...
W0322 06:07:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:07:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 06:07:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:07:14.455909  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:07:14.455917  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:07:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:07:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 06:07:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:07:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:07:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:07:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:07:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:07:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:07:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:07:16.472305  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:07:23.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:07:23.410280  543705 memory.go:184] no items to output this cycle
I0322 06:07:23.410300  543705 cpu.go:275] no items to output this cycle
E0322 06:07:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:07:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 06:07:33.409785  543705 memory.go:184] no items to output this cycle
I0322 06:07:33.418995  543705 disk_info.go:125] begin check local disk info of client
I0322 06:07:33.421524  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:07:33.423319  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352700 0xc000352740]
E0322 06:07:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:07:43.410649  543705 memory.go:191] Add success.
I0322 06:07:43.409805  543705 cpu.go:282] Add success.
I0322 06:07:43.420356  543705 net.go:648] Add success.
I0322 06:07:43.422995  543705 net.go:770] primary dev: ETH0
I0322 06:07:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:07:43.423021  543705 net.go:698] Add success.
I0322 06:07:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:07:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:07:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:07:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:07:53.409763  543705 memory.go:184] no items to output this cycle
I0322 06:07:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 06:08:03.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:08:03.409912  543705 cpu.go:275] no items to output this cycle
I0322 06:08:03.409915  543705 memory.go:184] no items to output this cycle
E0322 06:08:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:08:13.409791  543705 memory.go:191] Add success.
I0322 06:08:13.409814  543705 cpu.go:282] Add success.
W0322 06:08:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:08:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:08:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:08:13.420173  543705 net.go:648] Add success.
I0322 06:08:13.422998  543705 net.go:770] primary dev: ETH0
I0322 06:08:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:08:13.423025  543705 net.go:698] Add success.
I0322 06:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:08:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:08:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 06:08:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:08:14.456618  543705 disk_worker.go:494] system disk:vda1
I0322 06:08:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:08:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:08:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:08:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:08:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:08:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:08:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:08:23.409797  543705 memory.go:184] no items to output this cycle
I0322 06:08:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 06:08:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:08:33.409786  543705 memory.go:184] no items to output this cycle
I0322 06:08:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 06:08:33.423389  543705 disk_info.go:125] begin check local disk info of client
I0322 06:08:33.425937  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:08:33.425943  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0322 06:08:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:08:43.410528  543705 memory.go:191] Add success.
I0322 06:08:43.409829  543705 cpu.go:282] Add success.
I0322 06:08:43.420215  543705 net.go:648] Add success.
I0322 06:08:43.422612  543705 net.go:770] primary dev: ETH0
I0322 06:08:43.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:08:43.422638  543705 net.go:698] Add success.
I0322 06:08:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:08:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:08:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:08:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:08:53.409798  543705 memory.go:184] no items to output this cycle
I0322 06:08:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 06:09:03.409832  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:09:03.409852  543705 memory.go:184] no items to output this cycle
I0322 06:09:03.409911  543705 cpu.go:275] no items to output this cycle
E0322 06:09:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:09:13.409779  543705 memory.go:191] Add success.
W0322 06:09:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:09:13.409812  543705 cpu.go:282] Add success.
W0322 06:09:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:09:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:09:13.420254  543705 net.go:648] Add success.
I0322 06:09:13.423098  543705 net.go:770] primary dev: ETH0
I0322 06:09:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:09:13.423124  543705 net.go:698] Add success.
I0322 06:09:13.816445  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bc7753f1-5b1e-4df5-9b90-3b9e85913186","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:09:13.816479  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:09:14.453978  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:09:14.454140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:09:14.454201  543705 disk_worker.go:708] disk space is not compliant
W0322 06:09:14.454204  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:09:14.455590  543705 disk_worker.go:494] system disk:vda1
I0322 06:09:14.455647  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:09:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:09:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:09:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:09:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:09:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:09:23.409764  543705 memory.go:184] no items to output this cycle
I0322 06:09:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 06:09:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:09:33.409772  543705 memory.go:184] no items to output this cycle
I0322 06:09:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 06:09:33.426342  543705 disk_info.go:125] begin check local disk info of client
I0322 06:09:33.428824  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:09:33.428829  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b940 0xc00032b980]
I0322 06:09:39.527384  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:09:39.527390  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:09:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:09:43.410738  543705 memory.go:191] Add success.
I0322 06:09:43.409836  543705 cpu.go:282] Add success.
I0322 06:09:43.420509  543705 net.go:648] Add success.
I0322 06:09:43.423750  543705 net.go:770] primary dev: ETH0
I0322 06:09:43.423765  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:09:43.423780  543705 net.go:698] Add success.
I0322 06:09:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:09:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:09:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:09:53.410364  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:09:53.410382  543705 memory.go:184] no items to output this cycle
I0322 06:09:53.410423  543705 cpu.go:275] no items to output this cycle
E0322 06:10:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:10:03.409805  543705 memory.go:184] no items to output this cycle
I0322 06:10:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 06:10:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:10:13.409786  543705 memory.go:191] Add success.
I0322 06:10:13.409803  543705 cpu.go:282] Add success.
W0322 06:10:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:10:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:10:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:10:13.420266  543705 net.go:648] Add success.
I0322 06:10:13.422945  543705 net.go:770] primary dev: ETH0
I0322 06:10:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:10:13.422969  543705 net.go:698] Add success.
I0322 06:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:10:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:10:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 06:10:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:10:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 06:10:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:10:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:10:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:10:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:10:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:10:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:10:23.409772  543705 memory.go:184] no items to output this cycle
I0322 06:10:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 06:10:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:10:33.409785  543705 memory.go:184] no items to output this cycle
I0322 06:10:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 06:10:33.429339  543705 disk_info.go:125] begin check local disk info of client
I0322 06:10:33.431839  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:10:33.431845  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cef80 0xc0003cefc0]
E0322 06:10:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:10:43.410765  543705 memory.go:191] Add success.
I0322 06:10:43.409818  543705 cpu.go:282] Add success.
I0322 06:10:43.420439  543705 net.go:648] Add success.
I0322 06:10:43.423444  543705 net.go:770] primary dev: ETH0
I0322 06:10:43.423459  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:10:43.423471  543705 net.go:698] Add success.
I0322 06:10:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:10:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:10:53.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:10:53.409913  543705 cpu.go:275] no items to output this cycle
I0322 06:10:53.410035  543705 memory.go:184] no items to output this cycle
E0322 06:11:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:11:03.409793  543705 memory.go:184] no items to output this cycle
I0322 06:11:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 06:11:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:11:13.409825  543705 memory.go:191] Add success.
I0322 06:11:13.409827  543705 cpu.go:282] Add success.
W0322 06:11:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:11:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:11:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:11:13.420226  543705 net.go:648] Add success.
I0322 06:11:13.422975  543705 net.go:770] primary dev: ETH0
I0322 06:11:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:11:13.422999  543705 net.go:698] Add success.
I0322 06:11:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:11:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:11:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 06:11:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:11:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 06:11:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:11:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:11:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:11:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:11:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:11:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:11:23.409804  543705 memory.go:184] no items to output this cycle
I0322 06:11:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 06:11:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:11:33.409813  543705 memory.go:184] no items to output this cycle
I0322 06:11:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 06:11:33.432346  543705 disk_info.go:125] begin check local disk info of client
I0322 06:11:33.434824  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:11:33.434830  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e7c0 0xc00035e800]
E0322 06:11:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:11:43.410705  543705 memory.go:191] Add success.
I0322 06:11:43.409813  543705 cpu.go:282] Add success.
I0322 06:11:43.420394  543705 net.go:648] Add success.
I0322 06:11:43.423301  543705 net.go:770] primary dev: ETH0
I0322 06:11:43.423314  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:11:43.423325  543705 net.go:698] Add success.
I0322 06:11:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:11:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:11:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:11:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:11:53.409885  543705 memory.go:184] no items to output this cycle
I0322 06:11:53.409903  543705 cpu.go:275] no items to output this cycle
E0322 06:12:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:12:03.409779  543705 memory.go:184] no items to output this cycle
I0322 06:12:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 06:12:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:12:13.409798  543705 memory.go:191] Add success.
I0322 06:12:13.409815  543705 cpu.go:282] Add success.
W0322 06:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:12:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:12:13.420245  543705 net.go:648] Add success.
I0322 06:12:13.423083  543705 net.go:770] primary dev: ETH0
I0322 06:12:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:12:13.423108  543705 net.go:698] Add success.
I0322 06:12:13.469158  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef0d889b-b0a1-4bba-97b8-9aadd981b890","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:12:13.469203  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 06:12:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:12:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 06:12:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:12:14.455886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:12:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:12:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:12:14.456555  543705 disk_worker.go:494] system disk:vda1
I0322 06:12:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:12:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:12:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:12:16.457879  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:12:16.457879  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:12:16.457931  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:12:16.457950  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:12:16.472269  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:12:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:12:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 06:12:23.409781  543705 memory.go:184] no items to output this cycle
E0322 06:12:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:12:33.409775  543705 memory.go:184] no items to output this cycle
I0322 06:12:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 06:12:33.435317  543705 disk_info.go:125] begin check local disk info of client
I0322 06:12:33.437776  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:12:33.437782  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be000 0xc0003be040]
I0322 06:12:39.528397  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:12:39.528403  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:12:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:12:43.410613  543705 memory.go:191] Add success.
I0322 06:12:43.409802  543705 cpu.go:282] Add success.
I0322 06:12:43.420305  543705 net.go:648] Add success.
I0322 06:12:43.423061  543705 net.go:770] primary dev: ETH0
I0322 06:12:43.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:12:43.423088  543705 net.go:698] Add success.
I0322 06:12:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:12:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:12:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:12:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:12:53.409781  543705 memory.go:184] no items to output this cycle
I0322 06:12:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 06:13:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:13:03.409785  543705 memory.go:184] no items to output this cycle
I0322 06:13:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 06:13:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:13:13.409813  543705 memory.go:191] Add success.
I0322 06:13:13.409819  543705 cpu.go:282] Add success.
W0322 06:13:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:13:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:13:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:13:13.420158  543705 net.go:648] Add success.
I0322 06:13:13.422728  543705 net.go:770] primary dev: ETH0
I0322 06:13:13.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:13:13.422753  543705 net.go:698] Add success.
I0322 06:13:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:13:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:13:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 06:13:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:13:14.456772  543705 disk_worker.go:494] system disk:vda1
I0322 06:13:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:13:15.455782  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:13:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:13:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:13:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:13:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:13:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:13:23.409795  543705 memory.go:184] no items to output this cycle
I0322 06:13:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 06:13:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:13:33.409778  543705 cpu.go:275] no items to output this cycle
I0322 06:13:33.409783  543705 memory.go:184] no items to output this cycle
I0322 06:13:33.438364  543705 disk_info.go:125] begin check local disk info of client
I0322 06:13:33.440801  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:13:33.440807  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374b00 0xc000374b40]
E0322 06:13:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:13:43.410664  543705 memory.go:191] Add success.
I0322 06:13:43.409818  543705 cpu.go:282] Add success.
I0322 06:13:43.420410  543705 net.go:648] Add success.
I0322 06:13:43.423036  543705 net.go:770] primary dev: ETH0
I0322 06:13:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:13:43.423061  543705 net.go:698] Add success.
I0322 06:13:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:13:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:13:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:13:53.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:13:53.409909  543705 memory.go:184] no items to output this cycle
I0322 06:13:53.409948  543705 cpu.go:275] no items to output this cycle
E0322 06:14:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:14:03.409805  543705 memory.go:184] no items to output this cycle
I0322 06:14:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 06:14:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:14:13.409818  543705 memory.go:191] Add success.
I0322 06:14:13.409828  543705 cpu.go:282] Add success.
W0322 06:14:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:14:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:14:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:14:13.420165  543705 net.go:648] Add success.
I0322 06:14:13.423069  543705 net.go:770] primary dev: ETH0
I0322 06:14:13.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:14:13.423099  543705 net.go:698] Add success.
I0322 06:14:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:14:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:14:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 06:14:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:14:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 06:14:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:14:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:14:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:14:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:14:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:14:23.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:14:23.410255  543705 memory.go:184] no items to output this cycle
I0322 06:14:23.410277  543705 cpu.go:275] no items to output this cycle
E0322 06:14:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:14:33.409779  543705 memory.go:184] no items to output this cycle
I0322 06:14:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 06:14:33.441393  543705 disk_info.go:125] begin check local disk info of client
I0322 06:14:33.443898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:14:33.443903  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fa080 0xc0001fa0c0]
E0322 06:14:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:14:43.410739  543705 memory.go:191] Add success.
I0322 06:14:43.409795  543705 cpu.go:282] Add success.
I0322 06:14:43.420420  543705 net.go:648] Add success.
I0322 06:14:43.423594  543705 net.go:770] primary dev: ETH0
I0322 06:14:43.423607  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:14:43.423620  543705 net.go:698] Add success.
I0322 06:14:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:14:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:14:53.410333  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:14:53.410360  543705 memory.go:184] no items to output this cycle
I0322 06:14:53.410390  543705 cpu.go:275] no items to output this cycle
E0322 06:15:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:15:03.409800  543705 memory.go:184] no items to output this cycle
I0322 06:15:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 06:15:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:15:13.409794  543705 memory.go:191] Add success.
I0322 06:15:13.409797  543705 cpu.go:282] Add success.
W0322 06:15:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:15:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:15:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:15:13.420259  543705 net.go:648] Add success.
I0322 06:15:13.423281  543705 net.go:770] primary dev: ETH0
I0322 06:15:13.423303  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:15:13.423322  543705 net.go:698] Add success.
I0322 06:15:13.468517  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e95850f5-26c3-4055-88eb-03780e2a8789","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:15:13.468551  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:15:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:15:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 06:15:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:15:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 06:15:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:15:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:15:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:15:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:15:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:15:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:15:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:15:23.409783  543705 memory.go:184] no items to output this cycle
I0322 06:15:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 06:15:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:15:33.409777  543705 memory.go:184] no items to output this cycle
I0322 06:15:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 06:15:33.444411  543705 disk_info.go:125] begin check local disk info of client
I0322 06:15:33.446890  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:15:33.446895  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326dc0 0xc000326e00]
I0322 06:15:39.529397  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:15:39.529403  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:15:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:15:43.410576  543705 memory.go:191] Add success.
I0322 06:15:43.409812  543705 cpu.go:282] Add success.
I0322 06:15:43.420336  543705 net.go:648] Add success.
I0322 06:15:43.422991  543705 net.go:770] primary dev: ETH0
I0322 06:15:43.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:15:43.423021  543705 net.go:698] Add success.
I0322 06:15:46.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:15:46.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:15:46.458117  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:15:53.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:15:53.409915  543705 memory.go:184] no items to output this cycle
I0322 06:15:53.409963  543705 cpu.go:275] no items to output this cycle
E0322 06:16:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:16:03.409785  543705 memory.go:184] no items to output this cycle
I0322 06:16:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 06:16:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:16:13.409783  543705 memory.go:191] Add success.
W0322 06:16:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:16:13.409814  543705 cpu.go:282] Add success.
W0322 06:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:16:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:16:13.420142  543705 net.go:648] Add success.
I0322 06:16:13.423371  543705 net.go:770] primary dev: ETH0
I0322 06:16:13.423385  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:16:13.423399  543705 net.go:698] Add success.
I0322 06:16:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:16:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:16:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 06:16:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:16:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 06:16:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:16:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:16:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:16:23.410256  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:16:23.410279  543705 memory.go:184] no items to output this cycle
I0322 06:16:23.410292  543705 cpu.go:275] no items to output this cycle
E0322 06:16:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:16:33.409787  543705 memory.go:184] no items to output this cycle
I0322 06:16:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 06:16:33.447416  543705 disk_info.go:125] begin check local disk info of client
I0322 06:16:33.449904  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:16:33.449909  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003846c0 0xc000384700]
E0322 06:16:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:16:43.410833  543705 memory.go:191] Add success.
I0322 06:16:43.409819  543705 cpu.go:282] Add success.
I0322 06:16:43.420534  543705 net.go:648] Add success.
I0322 06:16:43.423186  543705 net.go:770] primary dev: ETH0
I0322 06:16:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:16:43.423212  543705 net.go:698] Add success.
I0322 06:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:16:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:16:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:16:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:16:53.410389  543705 memory.go:184] no items to output this cycle
I0322 06:16:53.410397  543705 cpu.go:275] no items to output this cycle
E0322 06:17:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:17:03.409824  543705 memory.go:184] no items to output this cycle
I0322 06:17:03.409836  543705 cpu.go:275] no items to output this cycle
E0322 06:17:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:17:13.409783  543705 memory.go:191] Add success.
I0322 06:17:13.409808  543705 cpu.go:282] Add success.
W0322 06:17:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:17:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:17:13.420128  543705 net.go:648] Add success.
I0322 06:17:13.422591  543705 net.go:770] primary dev: ETH0
I0322 06:17:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:17:13.422615  543705 net.go:698] Add success.
I0322 06:17:13.453288  543705 event_worker.go:152] Polling the log file for events...
W0322 06:17:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:17:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 06:17:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:17:14.455890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:17:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:17:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:17:14.456532  543705 disk_worker.go:494] system disk:vda1
I0322 06:17:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:17:15.456775  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:17:15.456784  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:17:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:17:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:17:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:17:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:17:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:17:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:17:23.409776  543705 memory.go:184] no items to output this cycle
I0322 06:17:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 06:17:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:17:33.409798  543705 memory.go:184] no items to output this cycle
I0322 06:17:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 06:17:33.450423  543705 disk_info.go:125] begin check local disk info of client
I0322 06:17:33.452873  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:17:33.452878  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b07c0 0xc0004b0800]
E0322 06:17:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:17:43.410692  543705 memory.go:191] Add success.
I0322 06:17:43.409793  543705 cpu.go:282] Add success.
I0322 06:17:43.420374  543705 net.go:648] Add success.
I0322 06:17:43.423122  543705 net.go:770] primary dev: ETH0
I0322 06:17:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:17:43.423150  543705 net.go:698] Add success.
I0322 06:17:46.458019  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:17:46.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:17:46.458111  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:17:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:17:53.409805  543705 memory.go:184] no items to output this cycle
I0322 06:17:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 06:18:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:18:03.409786  543705 memory.go:184] no items to output this cycle
I0322 06:18:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 06:18:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:18:13.409777  543705 memory.go:191] Add success.
W0322 06:18:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:18:13.409810  543705 cpu.go:282] Add success.
W0322 06:18:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:18:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:18:13.420184  543705 net.go:648] Add success.
I0322 06:18:13.423021  543705 net.go:770] primary dev: ETH0
I0322 06:18:13.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:18:13.423050  543705 net.go:698] Add success.
I0322 06:18:13.509250  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8710a48e-d01b-4c99-9949-188f8ddbfe77","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:18:13.509285  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:18:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:18:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:18:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 06:18:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:18:14.456621  543705 disk_worker.go:494] system disk:vda1
I0322 06:18:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:18:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:18:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:18:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:18:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:18:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:18:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:18:23.409776  543705 memory.go:184] no items to output this cycle
I0322 06:18:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 06:18:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:18:33.409776  543705 memory.go:184] no items to output this cycle
I0322 06:18:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 06:18:33.453442  543705 disk_info.go:125] begin check local disk info of client
I0322 06:18:33.455925  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:18:33.455930  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b00 0xc0000c5b40]
I0322 06:18:39.530398  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:18:39.530405  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:18:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:18:43.410678  543705 memory.go:191] Add success.
I0322 06:18:43.409800  543705 cpu.go:282] Add success.
I0322 06:18:43.420366  543705 net.go:648] Add success.
I0322 06:18:43.423092  543705 net.go:770] primary dev: ETH0
I0322 06:18:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:18:43.423117  543705 net.go:698] Add success.
I0322 06:18:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:18:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:18:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:18:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:18:53.409781  543705 memory.go:184] no items to output this cycle
I0322 06:18:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 06:19:03.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:19:03.409902  543705 memory.go:184] no items to output this cycle
I0322 06:19:03.409975  543705 cpu.go:275] no items to output this cycle
E0322 06:19:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:19:13.409779  543705 memory.go:191] Add success.
W0322 06:19:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:19:13.409811  543705 cpu.go:282] Add success.
W0322 06:19:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:19:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:19:13.420110  543705 net.go:648] Add success.
I0322 06:19:13.422838  543705 net.go:770] primary dev: ETH0
I0322 06:19:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:19:13.422862  543705 net.go:698] Add success.
I0322 06:19:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:19:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:19:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 06:19:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:19:14.456491  543705 disk_worker.go:494] system disk:vda1
I0322 06:19:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:19:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:19:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:19:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:19:23.409791  543705 memory.go:184] no items to output this cycle
I0322 06:19:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 06:19:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:19:33.409776  543705 memory.go:184] no items to output this cycle
I0322 06:19:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 06:19:33.456469  543705 disk_info.go:125] begin check local disk info of client
I0322 06:19:33.458997  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:19:33.459002  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2900 0xc0002a2940]
E0322 06:19:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:19:43.410651  543705 memory.go:191] Add success.
I0322 06:19:43.409796  543705 cpu.go:282] Add success.
I0322 06:19:43.420354  543705 net.go:648] Add success.
I0322 06:19:43.423341  543705 net.go:770] primary dev: ETH0
I0322 06:19:43.423354  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:19:43.423365  543705 net.go:698] Add success.
I0322 06:19:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:19:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:19:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:19:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:19:53.409784  543705 memory.go:184] no items to output this cycle
I0322 06:19:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 06:20:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:20:03.409778  543705 memory.go:184] no items to output this cycle
I0322 06:20:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 06:20:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:20:13.409889  543705 memory.go:191] Add success.
I0322 06:20:13.409891  543705 cpu.go:282] Add success.
W0322 06:20:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:20:13.409960  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:20:13.409968  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:20:13.419738  543705 net.go:648] Add success.
I0322 06:20:13.422472  543705 net.go:770] primary dev: ETH0
I0322 06:20:13.422486  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:20:13.422497  543705 net.go:698] Add success.
I0322 06:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:20:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:20:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 06:20:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:20:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 06:20:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:20:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:20:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:20:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:20:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:20:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:20:23.409775  543705 memory.go:184] no items to output this cycle
I0322 06:20:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 06:20:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:20:33.409773  543705 memory.go:184] no items to output this cycle
I0322 06:20:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 06:20:33.459534  543705 disk_info.go:125] begin check local disk info of client
I0322 06:20:33.462064  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:20:33.462069  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2740 0xc0002a2780]
E0322 06:20:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:20:43.410649  543705 memory.go:191] Add success.
I0322 06:20:43.409796  543705 cpu.go:282] Add success.
I0322 06:20:43.420347  543705 net.go:648] Add success.
I0322 06:20:43.423168  543705 net.go:770] primary dev: ETH0
I0322 06:20:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:20:43.423195  543705 net.go:698] Add success.
I0322 06:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:20:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:20:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:20:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:20:53.409789  543705 cpu.go:275] no items to output this cycle
I0322 06:20:53.409793  543705 memory.go:184] no items to output this cycle
E0322 06:21:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:21:03.409790  543705 memory.go:184] no items to output this cycle
I0322 06:21:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 06:21:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:21:13.409785  543705 memory.go:191] Add success.
I0322 06:21:13.409784  543705 cpu.go:282] Add success.
W0322 06:21:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:21:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:21:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:21:13.419723  543705 net.go:648] Add success.
I0322 06:21:13.422568  543705 net.go:770] primary dev: ETH0
I0322 06:21:13.422580  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:21:13.422591  543705 net.go:698] Add success.
I0322 06:21:13.469501  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"de142550-f8f7-449f-81e1-44c4e3879109","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:21:13.469533  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:21:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:21:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:21:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 06:21:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:21:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 06:21:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:21:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:21:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:21:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:21:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:21:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:21:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:21:23.409771  543705 memory.go:184] no items to output this cycle
I0322 06:21:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 06:21:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:21:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 06:21:33.409787  543705 memory.go:184] no items to output this cycle
I0322 06:21:33.462488  543705 disk_info.go:125] begin check local disk info of client
I0322 06:21:33.464938  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:21:33.464943  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a33c0 0xc0002a3400]
I0322 06:21:39.531401  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:21:39.531407  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:21:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:21:43.410786  543705 memory.go:191] Add success.
I0322 06:21:43.409814  543705 cpu.go:282] Add success.
I0322 06:21:43.420609  543705 net.go:648] Add success.
I0322 06:21:43.423211  543705 net.go:770] primary dev: ETH0
I0322 06:21:43.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:21:43.423236  543705 net.go:698] Add success.
I0322 06:21:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:21:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:21:46.458122  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:21:53.410447  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:21:53.410462  543705 cpu.go:275] no items to output this cycle
I0322 06:21:53.410467  543705 memory.go:184] no items to output this cycle
E0322 06:22:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:22:03.409789  543705 memory.go:184] no items to output this cycle
I0322 06:22:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 06:22:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:22:13.409877  543705 memory.go:191] Add success.
W0322 06:22:13.409907  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:22:13.409924  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:22:13.409929  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:22:13.409937  543705 cpu.go:282] Add success.
I0322 06:22:13.419751  543705 net.go:648] Add success.
I0322 06:22:13.422304  543705 net.go:770] primary dev: ETH0
I0322 06:22:13.422319  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:22:13.422333  543705 net.go:698] Add success.
W0322 06:22:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:22:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 06:22:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:22:14.456943  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:22:14.456952  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:22:14.456958  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:22:14.457003  543705 disk_worker.go:494] system disk:vda1
I0322 06:22:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:22:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:22:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:22:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:22:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:22:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:22:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:22:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:22:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:22:23.409779  543705 memory.go:184] no items to output this cycle
I0322 06:22:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 06:22:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:22:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 06:22:33.409790  543705 memory.go:184] no items to output this cycle
I0322 06:22:33.465494  543705 disk_info.go:125] begin check local disk info of client
I0322 06:22:33.467977  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:22:33.467983  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae40 0xc00007ae80]
E0322 06:22:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:22:43.410724  543705 memory.go:191] Add success.
I0322 06:22:43.409809  543705 cpu.go:282] Add success.
I0322 06:22:43.420410  543705 net.go:648] Add success.
I0322 06:22:43.423126  543705 net.go:770] primary dev: ETH0
I0322 06:22:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:22:43.423162  543705 net.go:698] Add success.
I0322 06:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:22:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:22:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:22:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:22:53.410265  543705 memory.go:184] no items to output this cycle
I0322 06:22:53.410277  543705 cpu.go:275] no items to output this cycle
E0322 06:23:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:23:03.409787  543705 memory.go:184] no items to output this cycle
I0322 06:23:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 06:23:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:23:13.409784  543705 memory.go:191] Add success.
I0322 06:23:13.409788  543705 cpu.go:282] Add success.
W0322 06:23:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:23:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:23:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:23:13.419709  543705 net.go:648] Add success.
I0322 06:23:13.422428  543705 net.go:770] primary dev: ETH0
I0322 06:23:13.422440  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:23:13.422452  543705 net.go:698] Add success.
I0322 06:23:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:23:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:23:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 06:23:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:23:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 06:23:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:23:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:23:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:23:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:23:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:23:23.409769  543705 memory.go:184] no items to output this cycle
I0322 06:23:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 06:23:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:23:33.409776  543705 memory.go:184] no items to output this cycle
I0322 06:23:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 06:23:33.468529  543705 disk_info.go:125] begin check local disk info of client
I0322 06:23:33.471018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:23:33.471024  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f5e80 0xc0003f5ec0]
E0322 06:23:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:23:43.410752  543705 memory.go:191] Add success.
I0322 06:23:43.409800  543705 cpu.go:282] Add success.
I0322 06:23:43.420508  543705 net.go:648] Add success.
I0322 06:23:43.423268  543705 net.go:770] primary dev: ETH0
I0322 06:23:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:23:43.423299  543705 net.go:698] Add success.
I0322 06:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:23:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:23:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:23:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:23:53.409788  543705 memory.go:184] no items to output this cycle
I0322 06:23:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 06:24:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:24:03.409812  543705 memory.go:184] no items to output this cycle
I0322 06:24:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 06:24:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:24:13.409774  543705 memory.go:191] Add success.
W0322 06:24:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:24:13.409804  543705 cpu.go:282] Add success.
W0322 06:24:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:24:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:24:13.420231  543705 net.go:648] Add success.
I0322 06:24:13.423165  543705 net.go:770] primary dev: ETH0
I0322 06:24:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:24:13.423202  543705 net.go:698] Add success.
I0322 06:24:13.468480  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f81f1368-9d32-4aa2-9596-3a33b4934af8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:24:13.468519  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:24:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:24:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:24:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 06:24:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:24:14.456513  543705 disk_worker.go:494] system disk:vda1
I0322 06:24:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:24:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:24:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:24:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:24:23.409765  543705 memory.go:184] no items to output this cycle
I0322 06:24:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 06:24:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:24:33.409787  543705 memory.go:184] no items to output this cycle
I0322 06:24:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 06:24:33.471503  543705 disk_info.go:125] begin check local disk info of client
I0322 06:24:33.473985  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:24:33.473990  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd00 0xc0001abd40]
I0322 06:24:39.532414  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:24:39.532421  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:24:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:24:43.410624  543705 memory.go:191] Add success.
I0322 06:24:43.409814  543705 cpu.go:282] Add success.
I0322 06:24:43.420340  543705 net.go:648] Add success.
I0322 06:24:43.423160  543705 net.go:770] primary dev: ETH0
I0322 06:24:43.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:24:43.423196  543705 net.go:698] Add success.
I0322 06:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:24:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:24:53.409780  543705 memory.go:184] no items to output this cycle
I0322 06:24:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 06:25:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:25:03.409789  543705 memory.go:184] no items to output this cycle
I0322 06:25:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 06:25:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:25:13.409794  543705 cpu.go:282] Add success.
I0322 06:25:13.409797  543705 memory.go:191] Add success.
W0322 06:25:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:25:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:25:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:25:13.420039  543705 net.go:648] Add success.
I0322 06:25:13.422552  543705 net.go:770] primary dev: ETH0
I0322 06:25:13.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:25:13.422578  543705 net.go:698] Add success.
I0322 06:25:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:25:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:25:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 06:25:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:25:14.456639  543705 disk_worker.go:494] system disk:vda1
I0322 06:25:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:25:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:25:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:25:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:25:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:25:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:25:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 06:25:23.409789  543705 memory.go:184] no items to output this cycle
E0322 06:25:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:25:33.409786  543705 memory.go:184] no items to output this cycle
I0322 06:25:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 06:25:33.474514  543705 disk_info.go:125] begin check local disk info of client
I0322 06:25:33.476963  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:25:33.476968  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2d00 0xc0002a2d40]
E0322 06:25:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:25:43.410706  543705 memory.go:191] Add success.
I0322 06:25:43.409795  543705 cpu.go:282] Add success.
I0322 06:25:43.420415  543705 net.go:648] Add success.
I0322 06:25:43.423118  543705 net.go:770] primary dev: ETH0
I0322 06:25:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:25:43.423144  543705 net.go:698] Add success.
I0322 06:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:25:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:25:53.409783  543705 memory.go:184] no items to output this cycle
I0322 06:25:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 06:26:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:26:03.409792  543705 cpu.go:275] no items to output this cycle
I0322 06:26:03.409794  543705 memory.go:184] no items to output this cycle
E0322 06:26:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:26:13.409786  543705 memory.go:191] Add success.
I0322 06:26:13.409806  543705 cpu.go:282] Add success.
W0322 06:26:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:26:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:26:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:26:13.420215  543705 net.go:648] Add success.
I0322 06:26:13.423236  543705 net.go:770] primary dev: ETH0
I0322 06:26:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:26:13.423265  543705 net.go:698] Add success.
I0322 06:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:26:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:26:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 06:26:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:26:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 06:26:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:26:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:26:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:26:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:26:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:26:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:26:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:26:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 06:26:23.409785  543705 memory.go:184] no items to output this cycle
E0322 06:26:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:26:33.409792  543705 memory.go:184] no items to output this cycle
I0322 06:26:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 06:26:33.477540  543705 disk_info.go:125] begin check local disk info of client
I0322 06:26:33.480054  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:26:33.480060  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464580 0xc0004645c0]
E0322 06:26:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:26:43.410739  543705 memory.go:191] Add success.
I0322 06:26:43.409826  543705 cpu.go:282] Add success.
I0322 06:26:43.420435  543705 net.go:648] Add success.
I0322 06:26:43.423283  543705 net.go:770] primary dev: ETH0
I0322 06:26:43.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:26:43.423317  543705 net.go:698] Add success.
I0322 06:26:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:26:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:26:53.410372  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:26:53.410392  543705 memory.go:184] no items to output this cycle
I0322 06:26:53.410410  543705 cpu.go:275] no items to output this cycle
E0322 06:27:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:27:03.409804  543705 memory.go:184] no items to output this cycle
I0322 06:27:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 06:27:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:27:13.409783  543705 memory.go:191] Add success.
I0322 06:27:13.409803  543705 cpu.go:282] Add success.
W0322 06:27:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:27:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:27:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:27:13.420100  543705 net.go:648] Add success.
I0322 06:27:13.429086  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 06:27:13.429159  543705 net.go:770] primary dev: ETH0
I0322 06:27:13.429170  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:27:13.429183  543705 net.go:698] Add success.
I0322 06:27:13.453740  543705 event_worker.go:152] Polling the log file for events...
I0322 06:27:13.469198  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"036c9872-c4b3-406e-a709-f98bcc6ba861","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:27:13.469231  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 06:27:14.455228  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:27:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0322 06:27:14.455246  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:27:14.455873  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:27:14.455882  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:27:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:27:14.456801  543705 disk_worker.go:494] system disk:vda1
I0322 06:27:14.456842  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:27:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:27:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:27:16.457573  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:27:16.457636  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:27:16.457699  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:27:16.458189  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:27:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:27:23.410202  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:27:23.410216  543705 memory.go:184] no items to output this cycle
I0322 06:27:23.410219  543705 cpu.go:275] no items to output this cycle
E0322 06:27:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:27:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 06:27:33.409781  543705 memory.go:184] no items to output this cycle
I0322 06:27:33.480573  543705 disk_info.go:125] begin check local disk info of client
I0322 06:27:33.483052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:27:33.483057  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2840 0xc0002a2880]
I0322 06:27:39.532859  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:27:39.532865  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:27:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:27:43.410655  543705 memory.go:191] Add success.
I0322 06:27:43.409786  543705 cpu.go:282] Add success.
I0322 06:27:43.420333  543705 net.go:648] Add success.
I0322 06:27:43.422949  543705 net.go:770] primary dev: ETH0
I0322 06:27:43.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:27:43.422974  543705 net.go:698] Add success.
I0322 06:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:27:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:27:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:27:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:27:53.409788  543705 memory.go:184] no items to output this cycle
I0322 06:27:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 06:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:28:03.409796  543705 memory.go:184] no items to output this cycle
I0322 06:28:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 06:28:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:28:13.409794  543705 memory.go:191] Add success.
I0322 06:28:13.409796  543705 cpu.go:282] Add success.
W0322 06:28:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:28:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:28:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:28:13.420056  543705 net.go:648] Add success.
I0322 06:28:13.422596  543705 net.go:770] primary dev: ETH0
I0322 06:28:13.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:28:13.422624  543705 net.go:698] Add success.
I0322 06:28:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:28:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:28:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 06:28:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:28:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 06:28:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:28:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:28:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:28:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:28:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:28:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:28:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 06:28:23.409782  543705 memory.go:184] no items to output this cycle
E0322 06:28:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:28:33.409780  543705 memory.go:184] no items to output this cycle
I0322 06:28:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 06:28:33.483555  543705 disk_info.go:125] begin check local disk info of client
I0322 06:28:33.486013  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:28:33.486019  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344d40 0xc000344d80]
E0322 06:28:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:28:43.410611  543705 memory.go:191] Add success.
I0322 06:28:43.409808  543705 cpu.go:282] Add success.
I0322 06:28:43.420323  543705 net.go:648] Add success.
I0322 06:28:43.422872  543705 net.go:770] primary dev: ETH0
I0322 06:28:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:28:43.422901  543705 net.go:698] Add success.
I0322 06:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:28:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:28:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:28:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:28:53.409773  543705 memory.go:184] no items to output this cycle
I0322 06:28:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 06:29:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:29:03.409807  543705 memory.go:184] no items to output this cycle
I0322 06:29:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 06:29:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:29:13.409777  543705 memory.go:191] Add success.
I0322 06:29:13.409799  543705 cpu.go:282] Add success.
W0322 06:29:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:29:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:29:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:29:13.420166  543705 net.go:648] Add success.
I0322 06:29:13.422918  543705 net.go:770] primary dev: ETH0
I0322 06:29:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:29:13.422943  543705 net.go:698] Add success.
I0322 06:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:29:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:29:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 06:29:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:29:14.456585  543705 disk_worker.go:494] system disk:vda1
I0322 06:29:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:29:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:29:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:29:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:29:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:29:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:29:23.410514  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:29:23.410532  543705 memory.go:184] no items to output this cycle
I0322 06:29:23.410542  543705 cpu.go:275] no items to output this cycle
E0322 06:29:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:29:33.409797  543705 memory.go:184] no items to output this cycle
I0322 06:29:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 06:29:33.486588  543705 disk_info.go:125] begin check local disk info of client
I0322 06:29:33.489053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:29:33.489059  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a940 0xc00007aa40]
E0322 06:29:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:29:43.410673  543705 memory.go:191] Add success.
I0322 06:29:43.409798  543705 cpu.go:282] Add success.
I0322 06:29:43.420370  543705 net.go:648] Add success.
I0322 06:29:43.423182  543705 net.go:770] primary dev: ETH0
I0322 06:29:43.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:29:43.423210  543705 net.go:698] Add success.
I0322 06:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:29:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:29:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:29:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:29:53.409772  543705 memory.go:184] no items to output this cycle
I0322 06:29:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 06:30:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:30:03.409804  543705 memory.go:184] no items to output this cycle
I0322 06:30:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 06:30:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:30:13.409789  543705 memory.go:191] Add success.
I0322 06:30:13.409811  543705 cpu.go:282] Add success.
W0322 06:30:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:30:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:30:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:30:13.420210  543705 net.go:648] Add success.
I0322 06:30:13.422797  543705 net.go:770] primary dev: ETH0
I0322 06:30:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:30:13.422823  543705 net.go:698] Add success.
I0322 06:30:13.468257  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b01e3fe3-16e3-4e41-9709-ec228f702ee0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:30:13.468291  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:30:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:30:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:30:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 06:30:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:30:14.456598  543705 disk_worker.go:494] system disk:vda1
I0322 06:30:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:30:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:30:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:30:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:30:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:30:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:30:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:30:23.409801  543705 memory.go:184] no items to output this cycle
I0322 06:30:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 06:30:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:30:33.409791  543705 memory.go:184] no items to output this cycle
I0322 06:30:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 06:30:33.489606  543705 disk_info.go:125] begin check local disk info of client
I0322 06:30:33.492084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:30:33.492089  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b740 0xc00007b780]
I0322 06:30:39.533367  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:30:39.533373  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:30:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:30:43.410803  543705 memory.go:191] Add success.
I0322 06:30:43.409822  543705 cpu.go:282] Add success.
I0322 06:30:43.420494  543705 net.go:648] Add success.
I0322 06:30:43.424145  543705 net.go:770] primary dev: ETH0
I0322 06:30:43.424160  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:30:43.424175  543705 net.go:698] Add success.
I0322 06:30:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:30:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:30:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:30:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:30:53.409780  543705 memory.go:184] no items to output this cycle
I0322 06:30:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 06:31:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:31:03.409794  543705 memory.go:184] no items to output this cycle
I0322 06:31:03.409830  543705 cpu.go:275] no items to output this cycle
E0322 06:31:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:31:13.409787  543705 memory.go:191] Add success.
I0322 06:31:13.409808  543705 cpu.go:282] Add success.
W0322 06:31:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:31:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:31:13.420050  543705 net.go:648] Add success.
I0322 06:31:13.422828  543705 net.go:770] primary dev: ETH0
I0322 06:31:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:31:13.422852  543705 net.go:698] Add success.
I0322 06:31:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:31:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:31:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 06:31:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:31:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 06:31:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:31:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:31:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:31:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:31:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:31:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:31:23.409803  543705 memory.go:184] no items to output this cycle
I0322 06:31:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 06:31:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:31:33.409793  543705 memory.go:184] no items to output this cycle
I0322 06:31:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 06:31:33.492168  543705 disk_info.go:125] begin check local disk info of client
I0322 06:31:33.494670  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:31:33.494676  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326900 0xc000326940]
E0322 06:31:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:31:43.410790  543705 memory.go:191] Add success.
I0322 06:31:43.409799  543705 cpu.go:282] Add success.
I0322 06:31:43.420508  543705 net.go:648] Add success.
I0322 06:31:43.423413  543705 net.go:770] primary dev: ETH0
I0322 06:31:43.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:31:43.423437  543705 net.go:698] Add success.
I0322 06:31:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:31:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:31:53.409788  543705 memory.go:184] no items to output this cycle
I0322 06:31:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 06:32:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:32:03.409819  543705 memory.go:184] no items to output this cycle
I0322 06:32:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 06:32:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:32:13.409791  543705 memory.go:191] Add success.
I0322 06:32:13.409811  543705 cpu.go:282] Add success.
W0322 06:32:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:32:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:32:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:32:13.420238  543705 net.go:648] Add success.
I0322 06:32:13.423265  543705 net.go:770] primary dev: ETH0
I0322 06:32:13.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:32:13.423289  543705 net.go:698] Add success.
W0322 06:32:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:32:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 06:32:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:32:14.456796  543705 disk_worker.go:494] system disk:vda1
I0322 06:32:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:32:14.457114  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:32:14.457122  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:32:14.457126  543705 custom_config.go:64] query custom config with name: gpu
E0322 06:32:15.456778  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:32:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:32:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:32:16.457980  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:32:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:32:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:32:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:32:23.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:32:23.410375  543705 memory.go:184] no items to output this cycle
I0322 06:32:23.410400  543705 cpu.go:275] no items to output this cycle
E0322 06:32:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:32:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 06:32:33.409788  543705 memory.go:184] no items to output this cycle
I0322 06:32:33.495647  543705 disk_info.go:125] begin check local disk info of client
I0322 06:32:33.498142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:32:33.498148  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d240 0xc00037d280]
E0322 06:32:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:32:43.410691  543705 memory.go:191] Add success.
I0322 06:32:43.409931  543705 cpu.go:282] Add success.
I0322 06:32:43.419717  543705 net.go:648] Add success.
I0322 06:32:43.422754  543705 net.go:770] primary dev: ETH0
I0322 06:32:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:32:43.422778  543705 net.go:698] Add success.
I0322 06:32:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:32:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:32:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:32:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:32:53.409774  543705 memory.go:184] no items to output this cycle
I0322 06:32:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 06:33:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:33:03.409795  543705 memory.go:184] no items to output this cycle
I0322 06:33:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 06:33:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:33:13.409787  543705 memory.go:191] Add success.
I0322 06:33:13.409788  543705 cpu.go:282] Add success.
W0322 06:33:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:33:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:33:13.420063  543705 net.go:648] Add success.
I0322 06:33:13.422715  543705 net.go:770] primary dev: ETH0
I0322 06:33:13.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:33:13.422741  543705 net.go:698] Add success.
I0322 06:33:13.469460  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e9ef368f-ea19-43a2-b19d-7455a5ba3a75","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:33:13.469494  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:33:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:33:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:33:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 06:33:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:33:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 06:33:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:33:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:33:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:33:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:33:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:33:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:33:23.409770  543705 memory.go:184] no items to output this cycle
I0322 06:33:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 06:33:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:33:33.409801  543705 memory.go:184] no items to output this cycle
I0322 06:33:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 06:33:33.498661  543705 disk_info.go:125] begin check local disk info of client
I0322 06:33:33.501181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:33:33.501187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ff00 0xc00047ff40]
I0322 06:33:39.534406  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:33:39.534413  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:33:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:33:43.410732  543705 memory.go:191] Add success.
I0322 06:33:43.409811  543705 cpu.go:282] Add success.
I0322 06:33:43.420777  543705 net.go:648] Add success.
I0322 06:33:43.423400  543705 net.go:770] primary dev: ETH0
I0322 06:33:43.423414  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:33:43.423428  543705 net.go:698] Add success.
I0322 06:33:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:33:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:33:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:33:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:33:53.409794  543705 memory.go:184] no items to output this cycle
I0322 06:33:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 06:34:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:34:03.409808  543705 memory.go:184] no items to output this cycle
I0322 06:34:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 06:34:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:34:13.409780  543705 memory.go:191] Add success.
I0322 06:34:13.409802  543705 cpu.go:282] Add success.
W0322 06:34:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:34:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:34:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:34:13.420184  543705 net.go:648] Add success.
I0322 06:34:13.422868  543705 net.go:770] primary dev: ETH0
I0322 06:34:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:34:13.422894  543705 net.go:698] Add success.
I0322 06:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:34:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:34:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 06:34:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:34:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 06:34:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:34:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:34:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:34:23.409758  543705 memory.go:184] no items to output this cycle
I0322 06:34:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 06:34:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:34:33.409805  543705 memory.go:184] no items to output this cycle
I0322 06:34:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 06:34:33.501667  543705 disk_info.go:125] begin check local disk info of client
I0322 06:34:33.504131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:34:33.504137  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a000 0xc00035a040]
E0322 06:34:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:34:43.410701  543705 memory.go:191] Add success.
I0322 06:34:43.409809  543705 cpu.go:282] Add success.
I0322 06:34:43.420770  543705 net.go:648] Add success.
I0322 06:34:43.423380  543705 net.go:770] primary dev: ETH0
I0322 06:34:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:34:43.423405  543705 net.go:698] Add success.
I0322 06:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:34:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:34:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 06:34:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:34:53.409818  543705 memory.go:184] no items to output this cycle
E0322 06:35:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:35:03.409771  543705 memory.go:184] no items to output this cycle
I0322 06:35:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 06:35:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:35:13.409779  543705 memory.go:191] Add success.
I0322 06:35:13.409792  543705 cpu.go:282] Add success.
W0322 06:35:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:35:13.412604  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:35:13.412609  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:35:13.420232  543705 net.go:648] Add success.
I0322 06:35:13.422002  543705 net.go:770] primary dev: ETH0
I0322 06:35:13.422016  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:35:13.422027  543705 net.go:698] Add success.
I0322 06:35:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:35:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:35:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 06:35:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:35:14.456486  543705 disk_worker.go:494] system disk:vda1
I0322 06:35:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:35:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:35:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:35:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:35:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:35:23.409774  543705 memory.go:184] no items to output this cycle
I0322 06:35:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 06:35:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:35:33.409767  543705 memory.go:184] no items to output this cycle
I0322 06:35:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 06:35:33.504712  543705 disk_info.go:125] begin check local disk info of client
I0322 06:35:33.507221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:35:33.507226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035bbc0 0xc00035bc00]
E0322 06:35:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:35:43.410626  543705 memory.go:191] Add success.
I0322 06:35:43.409818  543705 cpu.go:282] Add success.
I0322 06:35:43.420369  543705 net.go:648] Add success.
I0322 06:35:43.423228  543705 net.go:770] primary dev: ETH0
I0322 06:35:43.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:35:43.423260  543705 net.go:698] Add success.
I0322 06:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:35:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:35:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:35:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:35:53.409812  543705 memory.go:184] no items to output this cycle
I0322 06:35:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 06:36:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:36:03.409788  543705 memory.go:184] no items to output this cycle
I0322 06:36:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 06:36:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:36:13.409779  543705 memory.go:191] Add success.
I0322 06:36:13.409800  543705 cpu.go:282] Add success.
W0322 06:36:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:36:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:36:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:36:13.420104  543705 net.go:648] Add success.
I0322 06:36:13.422967  543705 net.go:770] primary dev: ETH0
I0322 06:36:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:36:13.422991  543705 net.go:698] Add success.
I0322 06:36:13.468485  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"617b1701-3610-40e9-acb5-dcfe916f99f4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:36:13.468521  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:36:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:36:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:36:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 06:36:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:36:14.456692  543705 disk_worker.go:494] system disk:vda1
I0322 06:36:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:36:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:36:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:36:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:36:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:36:23.409775  543705 memory.go:184] no items to output this cycle
I0322 06:36:23.409776  543705 cpu.go:275] no items to output this cycle
E0322 06:36:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:36:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 06:36:33.409786  543705 memory.go:184] no items to output this cycle
I0322 06:36:33.507768  543705 disk_info.go:125] begin check local disk info of client
I0322 06:36:33.510333  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:36:33.510339  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331300 0xc000331340]
I0322 06:36:39.535419  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:36:39.535425  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:36:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:36:43.410654  543705 memory.go:191] Add success.
I0322 06:36:43.409791  543705 cpu.go:282] Add success.
I0322 06:36:43.420379  543705 net.go:648] Add success.
I0322 06:36:43.423030  543705 net.go:770] primary dev: ETH0
I0322 06:36:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:36:43.423060  543705 net.go:698] Add success.
I0322 06:36:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:36:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:36:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:36:53.410355  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:36:53.410370  543705 memory.go:184] no items to output this cycle
I0322 06:36:53.410376  543705 cpu.go:275] no items to output this cycle
E0322 06:37:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:37:03.409778  543705 memory.go:184] no items to output this cycle
I0322 06:37:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 06:37:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:37:13.409811  543705 memory.go:191] Add success.
I0322 06:37:13.409819  543705 cpu.go:282] Add success.
W0322 06:37:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:37:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:37:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:37:13.420045  543705 net.go:648] Add success.
I0322 06:37:13.422688  543705 net.go:770] primary dev: ETH0
I0322 06:37:13.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:37:13.422716  543705 net.go:698] Add success.
I0322 06:37:13.453347  543705 event_worker.go:152] Polling the log file for events...
W0322 06:37:14.455289  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:37:14.455303  543705 disk_worker.go:708] disk space is not compliant
W0322 06:37:14.455306  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:37:14.457375  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:37:14.457385  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:37:14.457391  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:37:14.457397  543705 disk_worker.go:494] system disk:vda1
I0322 06:37:14.457433  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:37:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:37:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:37:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:37:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:37:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:37:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:37:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:37:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:37:23.409794  543705 memory.go:184] no items to output this cycle
I0322 06:37:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 06:37:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:37:33.409781  543705 memory.go:184] no items to output this cycle
I0322 06:37:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 06:37:33.510421  543705 disk_info.go:125] begin check local disk info of client
I0322 06:37:33.512879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:37:33.512884  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc40 0xc0001abc80]
E0322 06:37:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:37:43.410658  543705 memory.go:191] Add success.
I0322 06:37:43.409810  543705 cpu.go:282] Add success.
I0322 06:37:43.420347  543705 net.go:648] Add success.
I0322 06:37:43.422939  543705 net.go:770] primary dev: ETH0
I0322 06:37:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:37:43.422968  543705 net.go:698] Add success.
I0322 06:37:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:37:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:37:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:37:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:37:53.409782  543705 memory.go:184] no items to output this cycle
I0322 06:37:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 06:38:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:38:03.409790  543705 memory.go:184] no items to output this cycle
I0322 06:38:03.409790  543705 cpu.go:275] no items to output this cycle
I0322 06:38:13.409918  543705 cpu.go:282] Add success.
E0322 06:38:13.410047  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:38:13.410063  543705 memory.go:191] Add success.
W0322 06:38:13.410091  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:38:13.410103  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:38:13.410112  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:38:13.419710  543705 net.go:648] Add success.
I0322 06:38:13.422196  543705 net.go:770] primary dev: ETH0
I0322 06:38:13.422209  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:38:13.422220  543705 net.go:698] Add success.
I0322 06:38:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:38:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:38:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0322 06:38:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:38:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 06:38:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:38:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:38:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:38:23.410712  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:38:23.410727  543705 memory.go:184] no items to output this cycle
I0322 06:38:23.410732  543705 cpu.go:275] no items to output this cycle
E0322 06:38:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:38:33.409798  543705 memory.go:184] no items to output this cycle
I0322 06:38:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 06:38:33.513749  543705 disk_info.go:125] begin check local disk info of client
I0322 06:38:33.516190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:38:33.516213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330b40 0xc000330b80]
E0322 06:38:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:38:43.410691  543705 memory.go:191] Add success.
I0322 06:38:43.409805  543705 cpu.go:282] Add success.
I0322 06:38:43.420385  543705 net.go:648] Add success.
I0322 06:38:43.423182  543705 net.go:770] primary dev: ETH0
I0322 06:38:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:38:43.423217  543705 net.go:698] Add success.
I0322 06:38:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:38:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:38:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:38:53.410372  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:38:53.410382  543705 cpu.go:275] no items to output this cycle
I0322 06:38:53.410387  543705 memory.go:184] no items to output this cycle
E0322 06:39:03.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:39:03.409860  543705 memory.go:184] no items to output this cycle
I0322 06:39:03.409929  543705 cpu.go:275] no items to output this cycle
E0322 06:39:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:39:13.409824  543705 memory.go:191] Add success.
I0322 06:39:13.409829  543705 cpu.go:282] Add success.
W0322 06:39:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:39:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:39:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:39:13.420104  543705 net.go:648] Add success.
I0322 06:39:13.422850  543705 net.go:770] primary dev: ETH0
I0322 06:39:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:39:13.422876  543705 net.go:698] Add success.
I0322 06:39:13.468302  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"13f128b4-c238-425d-aab5-ad367d6f1b63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:39:13.468334  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:39:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:39:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 06:39:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:39:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 06:39:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:39:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:39:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:39:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:39:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:39:23.409804  543705 memory.go:184] no items to output this cycle
I0322 06:39:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 06:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:39:33.409775  543705 memory.go:184] no items to output this cycle
I0322 06:39:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 06:39:33.516290  543705 disk_info.go:125] begin check local disk info of client
I0322 06:39:33.518807  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:39:33.518813  543705 disk_info.go:196] parse disk info done, disk is : [0xc000497200 0xc000497240]
I0322 06:39:39.536428  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:39:39.536435  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:39:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:39:43.410802  543705 memory.go:191] Add success.
I0322 06:39:43.409798  543705 cpu.go:282] Add success.
I0322 06:39:43.420567  543705 net.go:648] Add success.
I0322 06:39:43.423185  543705 net.go:770] primary dev: ETH0
I0322 06:39:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:39:43.423242  543705 net.go:698] Add success.
I0322 06:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:39:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:39:53.409775  543705 memory.go:184] no items to output this cycle
I0322 06:39:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 06:40:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:40:03.409805  543705 memory.go:184] no items to output this cycle
I0322 06:40:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 06:40:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:40:13.409791  543705 memory.go:191] Add success.
I0322 06:40:13.409791  543705 cpu.go:282] Add success.
W0322 06:40:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:40:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:40:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:40:13.420175  543705 net.go:648] Add success.
I0322 06:40:13.423298  543705 net.go:770] primary dev: ETH0
I0322 06:40:13.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:40:13.423335  543705 net.go:698] Add success.
I0322 06:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:40:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:40:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 06:40:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:40:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 06:40:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:40:15.455477  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:40:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:40:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:40:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:40:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:40:23.409792  543705 memory.go:184] no items to output this cycle
I0322 06:40:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 06:40:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:40:33.409796  543705 memory.go:184] no items to output this cycle
I0322 06:40:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 06:40:33.519810  543705 disk_info.go:125] begin check local disk info of client
I0322 06:40:33.522279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:40:33.522284  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b49c0 0xc0004b4a00]
E0322 06:40:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:40:43.410993  543705 memory.go:191] Add success.
I0322 06:40:43.409823  543705 cpu.go:282] Add success.
I0322 06:40:43.420715  543705 net.go:648] Add success.
I0322 06:40:43.423682  543705 net.go:770] primary dev: ETH0
I0322 06:40:43.423695  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:40:43.423707  543705 net.go:698] Add success.
I0322 06:40:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:40:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:40:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:40:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:40:53.409772  543705 memory.go:184] no items to output this cycle
I0322 06:40:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 06:41:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:41:03.409780  543705 memory.go:184] no items to output this cycle
I0322 06:41:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 06:41:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:41:13.409790  543705 memory.go:191] Add success.
I0322 06:41:13.409793  543705 cpu.go:282] Add success.
W0322 06:41:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:41:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:41:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:41:13.420059  543705 net.go:648] Add success.
I0322 06:41:13.423022  543705 net.go:770] primary dev: ETH0
I0322 06:41:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:41:13.423046  543705 net.go:698] Add success.
I0322 06:41:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:41:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:41:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0322 06:41:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:41:14.456491  543705 disk_worker.go:494] system disk:vda1
I0322 06:41:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:41:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:41:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:41:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:41:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:41:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:41:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:41:23.409798  543705 memory.go:184] no items to output this cycle
I0322 06:41:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 06:41:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:41:33.409781  543705 memory.go:184] no items to output this cycle
I0322 06:41:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 06:41:33.522363  543705 disk_info.go:125] begin check local disk info of client
I0322 06:41:33.524825  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:41:33.524831  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b5880 0xc0002b58c0]
E0322 06:41:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:41:43.410717  543705 memory.go:191] Add success.
I0322 06:41:43.409826  543705 cpu.go:282] Add success.
I0322 06:41:43.420430  543705 net.go:648] Add success.
I0322 06:41:43.423133  543705 net.go:770] primary dev: ETH0
I0322 06:41:43.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:41:43.423158  543705 net.go:698] Add success.
I0322 06:41:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:41:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:41:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:41:53.410374  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:41:53.410393  543705 memory.go:184] no items to output this cycle
I0322 06:41:53.410421  543705 cpu.go:275] no items to output this cycle
E0322 06:42:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:42:03.409816  543705 memory.go:184] no items to output this cycle
I0322 06:42:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 06:42:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:42:13.409791  543705 memory.go:191] Add success.
I0322 06:42:13.409803  543705 cpu.go:282] Add success.
W0322 06:42:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:42:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:42:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:42:13.420200  543705 net.go:648] Add success.
I0322 06:42:13.423003  543705 net.go:770] primary dev: ETH0
I0322 06:42:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:42:13.423029  543705 net.go:698] Add success.
I0322 06:42:13.469829  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0144f73c-e2b0-4233-b58e-c41448658dbc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:42:13.469864  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 06:42:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:42:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 06:42:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:42:14.457006  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:42:14.457015  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:42:14.457021  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:42:14.457026  543705 disk_worker.go:494] system disk:vda1
I0322 06:42:14.457055  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:42:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:42:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:42:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:42:16.457892  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:42:16.457947  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:42:16.457966  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:42:16.472288  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:42:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:42:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 06:42:23.409785  543705 memory.go:184] no items to output this cycle
E0322 06:42:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:42:33.409775  543705 memory.go:184] no items to output this cycle
I0322 06:42:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 06:42:33.525770  543705 disk_info.go:125] begin check local disk info of client
I0322 06:42:33.528242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:42:33.528247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9440 0xc0003c9480]
I0322 06:42:39.537426  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:42:39.537433  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:42:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:42:43.410779  543705 memory.go:191] Add success.
I0322 06:42:43.409794  543705 cpu.go:282] Add success.
I0322 06:42:43.420562  543705 net.go:648] Add success.
I0322 06:42:43.423195  543705 net.go:770] primary dev: ETH0
I0322 06:42:43.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:42:43.423220  543705 net.go:698] Add success.
I0322 06:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:42:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:42:53.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:42:53.409866  543705 memory.go:184] no items to output this cycle
I0322 06:42:53.409939  543705 cpu.go:275] no items to output this cycle
E0322 06:43:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:43:03.409819  543705 memory.go:184] no items to output this cycle
I0322 06:43:03.409832  543705 cpu.go:275] no items to output this cycle
E0322 06:43:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:43:13.409786  543705 memory.go:191] Add success.
I0322 06:43:13.409788  543705 cpu.go:282] Add success.
W0322 06:43:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:43:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:43:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:43:13.420043  543705 net.go:648] Add success.
I0322 06:43:13.422999  543705 net.go:770] primary dev: ETH0
I0322 06:43:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:43:13.423029  543705 net.go:698] Add success.
I0322 06:43:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:43:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:43:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 06:43:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:43:14.456513  543705 disk_worker.go:494] system disk:vda1
I0322 06:43:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:43:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:43:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:43:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:43:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:43:23.410333  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:43:23.410348  543705 memory.go:184] no items to output this cycle
I0322 06:43:23.410367  543705 cpu.go:275] no items to output this cycle
E0322 06:43:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:43:33.409767  543705 memory.go:184] no items to output this cycle
I0322 06:43:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 06:43:33.528324  543705 disk_info.go:125] begin check local disk info of client
I0322 06:43:33.530925  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:43:33.530930  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492340 0xc000492380]
E0322 06:43:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:43:43.410693  543705 memory.go:191] Add success.
I0322 06:43:43.409830  543705 cpu.go:282] Add success.
I0322 06:43:43.420393  543705 net.go:648] Add success.
I0322 06:43:43.423212  543705 net.go:770] primary dev: ETH0
I0322 06:43:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:43:43.423242  543705 net.go:698] Add success.
I0322 06:43:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:43:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:43:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:43:53.409916  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:43:53.409940  543705 memory.go:184] no items to output this cycle
I0322 06:43:53.409949  543705 cpu.go:275] no items to output this cycle
E0322 06:44:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:44:03.409799  543705 cpu.go:275] no items to output this cycle
I0322 06:44:03.409801  543705 memory.go:184] no items to output this cycle
E0322 06:44:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:44:13.409790  543705 memory.go:191] Add success.
I0322 06:44:13.409795  543705 cpu.go:282] Add success.
W0322 06:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:44:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:44:13.420113  543705 net.go:648] Add success.
I0322 06:44:13.422941  543705 net.go:770] primary dev: ETH0
I0322 06:44:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:44:13.422969  543705 net.go:698] Add success.
I0322 06:44:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:44:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:44:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 06:44:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:44:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 06:44:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:44:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:44:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:44:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:44:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:44:23.409771  543705 memory.go:184] no items to output this cycle
I0322 06:44:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 06:44:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:44:33.409775  543705 memory.go:184] no items to output this cycle
I0322 06:44:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 06:44:33.531829  543705 disk_info.go:125] begin check local disk info of client
I0322 06:44:33.534302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:44:33.534307  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ba980 0xc0002ba9c0]
E0322 06:44:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:44:43.410660  543705 memory.go:191] Add success.
I0322 06:44:43.409800  543705 cpu.go:282] Add success.
I0322 06:44:43.420399  543705 net.go:648] Add success.
I0322 06:44:43.422904  543705 net.go:770] primary dev: ETH0
I0322 06:44:43.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:44:43.422930  543705 net.go:698] Add success.
I0322 06:44:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:44:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:44:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:44:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:44:53.409874  543705 memory.go:184] no items to output this cycle
I0322 06:44:53.409959  543705 cpu.go:275] no items to output this cycle
E0322 06:45:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:45:03.409788  543705 memory.go:184] no items to output this cycle
I0322 06:45:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 06:45:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:45:13.409785  543705 memory.go:191] Add success.
I0322 06:45:13.409785  543705 cpu.go:282] Add success.
W0322 06:45:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:45:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:45:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:45:13.420034  543705 net.go:648] Add success.
I0322 06:45:13.422860  543705 net.go:770] primary dev: ETH0
I0322 06:45:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:45:13.422891  543705 net.go:698] Add success.
I0322 06:45:13.505860  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ecf2956-2ad9-42c3-8c69-dd83abf7c9dc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:45:13.505894  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:45:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:45:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:45:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 06:45:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:45:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 06:45:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:45:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:45:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:45:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:45:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:45:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:45:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:45:23.409774  543705 cpu.go:275] no items to output this cycle
I0322 06:45:23.409784  543705 memory.go:184] no items to output this cycle
E0322 06:45:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:45:33.409811  543705 memory.go:184] no items to output this cycle
I0322 06:45:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 06:45:33.534384  543705 disk_info.go:125] begin check local disk info of client
I0322 06:45:33.536815  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:45:33.536820  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003defc0 0xc0003df000]
I0322 06:45:39.538433  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:45:39.538439  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:45:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:45:43.410965  543705 memory.go:191] Add success.
I0322 06:45:43.409818  543705 cpu.go:282] Add success.
I0322 06:45:43.420661  543705 net.go:648] Add success.
I0322 06:45:43.423379  543705 net.go:770] primary dev: ETH0
I0322 06:45:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:45:43.423407  543705 net.go:698] Add success.
I0322 06:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:45:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:45:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:45:53.409805  543705 memory.go:184] no items to output this cycle
I0322 06:45:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 06:46:03.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:46:03.409926  543705 memory.go:184] no items to output this cycle
I0322 06:46:03.409932  543705 cpu.go:275] no items to output this cycle
E0322 06:46:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:46:13.409812  543705 memory.go:191] Add success.
I0322 06:46:13.409816  543705 cpu.go:282] Add success.
W0322 06:46:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:46:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:46:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:46:13.420160  543705 net.go:648] Add success.
I0322 06:46:13.423208  543705 net.go:770] primary dev: ETH0
I0322 06:46:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:46:13.423233  543705 net.go:698] Add success.
I0322 06:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:46:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:46:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 06:46:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:46:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 06:46:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:46:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:46:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:46:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:46:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:46:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:46:23.409782  543705 memory.go:184] no items to output this cycle
I0322 06:46:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 06:46:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:46:33.409772  543705 memory.go:184] no items to output this cycle
I0322 06:46:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 06:46:33.537880  543705 disk_info.go:125] begin check local disk info of client
I0322 06:46:33.540375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:46:33.540380  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265e00 0xc000265e40]
E0322 06:46:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:46:43.410690  543705 memory.go:191] Add success.
I0322 06:46:43.409820  543705 cpu.go:282] Add success.
I0322 06:46:43.420398  543705 net.go:648] Add success.
I0322 06:46:43.423140  543705 net.go:770] primary dev: ETH0
I0322 06:46:43.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:46:43.423166  543705 net.go:698] Add success.
I0322 06:46:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:46:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:46:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:46:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:46:53.409779  543705 memory.go:184] no items to output this cycle
I0322 06:46:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 06:47:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:47:03.409783  543705 memory.go:184] no items to output this cycle
I0322 06:47:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 06:47:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:47:13.409788  543705 memory.go:191] Add success.
I0322 06:47:13.409790  543705 cpu.go:282] Add success.
W0322 06:47:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:47:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:47:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:47:13.420066  543705 net.go:648] Add success.
I0322 06:47:13.423010  543705 net.go:770] primary dev: ETH0
I0322 06:47:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:47:13.423035  543705 net.go:698] Add success.
I0322 06:47:13.453573  543705 event_worker.go:152] Polling the log file for events...
W0322 06:47:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:47:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 06:47:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:47:14.455920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:47:14.455929  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:47:14.455934  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:47:14.456555  543705 disk_worker.go:494] system disk:vda1
I0322 06:47:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:47:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:47:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:47:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:47:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:47:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:47:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:47:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:47:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:47:23.409777  543705 cpu.go:275] no items to output this cycle
I0322 06:47:23.409782  543705 memory.go:184] no items to output this cycle
E0322 06:47:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:47:33.409777  543705 memory.go:184] no items to output this cycle
I0322 06:47:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 06:47:33.540459  543705 disk_info.go:125] begin check local disk info of client
I0322 06:47:33.542990  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:47:33.542996  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036af80 0xc00036afc0]
E0322 06:47:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:47:43.410638  543705 memory.go:191] Add success.
I0322 06:47:43.409798  543705 cpu.go:282] Add success.
I0322 06:47:43.420334  543705 net.go:648] Add success.
I0322 06:47:43.423217  543705 net.go:770] primary dev: ETH0
I0322 06:47:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:47:43.423247  543705 net.go:698] Add success.
I0322 06:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:47:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:47:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:47:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:47:53.409819  543705 memory.go:184] no items to output this cycle
I0322 06:47:53.409832  543705 cpu.go:275] no items to output this cycle
E0322 06:48:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:48:03.409789  543705 memory.go:184] no items to output this cycle
I0322 06:48:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 06:48:13.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:48:13.409915  543705 memory.go:191] Add success.
W0322 06:48:13.409966  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:48:13.409987  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:48:13.409992  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:48:13.410122  543705 cpu.go:282] Add success.
I0322 06:48:13.419732  543705 net.go:648] Add success.
I0322 06:48:13.422512  543705 net.go:770] primary dev: ETH0
I0322 06:48:13.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:48:13.422554  543705 net.go:698] Add success.
I0322 06:48:13.463611  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c5d08521-2c23-445f-b550-2e3276ba3059","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:48:13.463642  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:48:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:48:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:48:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 06:48:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:48:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 06:48:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:48:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:48:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:48:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:48:23.409777  543705 memory.go:184] no items to output this cycle
I0322 06:48:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 06:48:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:48:33.409799  543705 memory.go:184] no items to output this cycle
I0322 06:48:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 06:48:33.543899  543705 disk_info.go:125] begin check local disk info of client
I0322 06:48:33.546481  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:48:33.546487  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9340 0xc0004d9380]
I0322 06:48:39.539439  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:48:39.539445  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:48:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:48:43.410662  543705 memory.go:191] Add success.
I0322 06:48:43.409812  543705 cpu.go:282] Add success.
I0322 06:48:43.420406  543705 net.go:648] Add success.
I0322 06:48:43.423184  543705 net.go:770] primary dev: ETH0
I0322 06:48:43.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:48:43.423230  543705 net.go:698] Add success.
I0322 06:48:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:48:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:48:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:48:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:48:53.409783  543705 memory.go:184] no items to output this cycle
I0322 06:48:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 06:49:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:49:03.409775  543705 memory.go:184] no items to output this cycle
I0322 06:49:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 06:49:13.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:49:13.409885  543705 memory.go:191] Add success.
W0322 06:49:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:49:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:49:13.409957  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:49:13.409967  543705 cpu.go:282] Add success.
I0322 06:49:13.419710  543705 net.go:648] Add success.
I0322 06:49:13.422780  543705 net.go:770] primary dev: ETH0
I0322 06:49:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:49:13.422805  543705 net.go:698] Add success.
I0322 06:49:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:49:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:49:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 06:49:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:49:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 06:49:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:49:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:49:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:49:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:49:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:49:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:49:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:49:23.409773  543705 memory.go:184] no items to output this cycle
I0322 06:49:23.409774  543705 cpu.go:275] no items to output this cycle
E0322 06:49:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:49:33.409801  543705 memory.go:184] no items to output this cycle
I0322 06:49:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 06:49:33.546854  543705 disk_info.go:125] begin check local disk info of client
I0322 06:49:33.549338  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:49:33.549343  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae00 0xc0001aae40]
E0322 06:49:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:49:43.410705  543705 memory.go:191] Add success.
I0322 06:49:43.409816  543705 cpu.go:282] Add success.
I0322 06:49:43.420428  543705 net.go:648] Add success.
I0322 06:49:43.423520  543705 net.go:770] primary dev: ETH0
I0322 06:49:43.423534  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:49:43.423548  543705 net.go:698] Add success.
I0322 06:49:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:49:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:49:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:49:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:49:53.409810  543705 memory.go:184] no items to output this cycle
I0322 06:49:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 06:50:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:50:03.409775  543705 memory.go:184] no items to output this cycle
I0322 06:50:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 06:50:13.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:50:13.409898  543705 memory.go:191] Add success.
W0322 06:50:13.409934  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:50:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:50:13.409962  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:50:13.409965  543705 cpu.go:282] Add success.
I0322 06:50:13.419754  543705 net.go:648] Add success.
I0322 06:50:13.422466  543705 net.go:770] primary dev: ETH0
I0322 06:50:13.422480  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:50:13.422494  543705 net.go:698] Add success.
I0322 06:50:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:50:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:50:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 06:50:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:50:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 06:50:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:50:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:50:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:50:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:50:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:50:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:50:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:50:23.409766  543705 memory.go:184] no items to output this cycle
I0322 06:50:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 06:50:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:50:33.409799  543705 memory.go:184] no items to output this cycle
I0322 06:50:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 06:50:33.549672  543705 disk_info.go:125] begin check local disk info of client
I0322 06:50:33.552110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:50:33.552116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b82c0 0xc0002b8300]
E0322 06:50:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:50:43.410543  543705 memory.go:191] Add success.
I0322 06:50:43.409816  543705 cpu.go:282] Add success.
I0322 06:50:43.420244  543705 net.go:648] Add success.
I0322 06:50:43.422797  543705 net.go:770] primary dev: ETH0
I0322 06:50:43.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:50:43.422823  543705 net.go:698] Add success.
I0322 06:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:50:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:50:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:50:53.409787  543705 cpu.go:275] no items to output this cycle
I0322 06:50:53.409792  543705 memory.go:184] no items to output this cycle
E0322 06:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:51:03.409779  543705 memory.go:184] no items to output this cycle
I0322 06:51:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 06:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:51:13.409922  543705 cpu.go:282] Add success.
I0322 06:51:13.409929  543705 memory.go:191] Add success.
W0322 06:51:13.409961  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:51:13.409975  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:51:13.409979  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:51:13.419738  543705 net.go:648] Add success.
I0322 06:51:13.422481  543705 net.go:770] primary dev: ETH0
I0322 06:51:13.422495  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:51:13.422508  543705 net.go:698] Add success.
I0322 06:51:13.468836  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e9e76f87-52a2-443c-a3ef-3f1149ab87b1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:51:13.468866  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:51:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:51:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:51:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 06:51:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:51:14.456636  543705 disk_worker.go:494] system disk:vda1
I0322 06:51:14.456668  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:51:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:51:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:51:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:51:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:51:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:51:23.409808  543705 memory.go:184] no items to output this cycle
I0322 06:51:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 06:51:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:51:33.409784  543705 memory.go:184] no items to output this cycle
I0322 06:51:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 06:51:33.552961  543705 disk_info.go:125] begin check local disk info of client
I0322 06:51:33.555495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:51:33.555500  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaac0 0xc0001aab00]
I0322 06:51:39.540460  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:51:39.540466  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:51:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:51:43.410741  543705 memory.go:191] Add success.
I0322 06:51:43.409806  543705 cpu.go:282] Add success.
I0322 06:51:43.420472  543705 net.go:648] Add success.
I0322 06:51:43.423242  543705 net.go:770] primary dev: ETH0
I0322 06:51:43.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:51:43.423276  543705 net.go:698] Add success.
I0322 06:51:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:51:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:51:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:51:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:51:53.409777  543705 memory.go:184] no items to output this cycle
I0322 06:51:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 06:52:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:52:03.409782  543705 memory.go:184] no items to output this cycle
I0322 06:52:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 06:52:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:52:13.409779  543705 memory.go:191] Add success.
I0322 06:52:13.409786  543705 cpu.go:282] Add success.
W0322 06:52:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:52:13.412534  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:52:13.412539  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:52:13.419707  543705 net.go:648] Add success.
I0322 06:52:13.421529  543705 net.go:770] primary dev: ETH0
I0322 06:52:13.421543  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:52:13.421555  543705 net.go:698] Add success.
W0322 06:52:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:52:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 06:52:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:52:14.456803  543705 disk_worker.go:494] system disk:vda1
I0322 06:52:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:52:14.457010  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:52:14.457018  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:52:14.457025  543705 custom_config.go:64] query custom config with name: gpu
E0322 06:52:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:52:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:52:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:52:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:52:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:52:16.457977  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:52:16.472299  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:52:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:52:23.409777  543705 memory.go:184] no items to output this cycle
I0322 06:52:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 06:52:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:52:33.409782  543705 memory.go:184] no items to output this cycle
I0322 06:52:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 06:52:33.555581  543705 disk_info.go:125] begin check local disk info of client
I0322 06:52:33.558151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:52:33.558156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484540 0xc000484580]
E0322 06:52:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:52:43.410626  543705 memory.go:191] Add success.
I0322 06:52:43.409792  543705 cpu.go:282] Add success.
I0322 06:52:43.420413  543705 net.go:648] Add success.
I0322 06:52:43.423261  543705 net.go:770] primary dev: ETH0
I0322 06:52:43.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:52:43.423286  543705 net.go:698] Add success.
I0322 06:52:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:52:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:52:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:52:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:52:53.409773  543705 memory.go:184] no items to output this cycle
I0322 06:52:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 06:53:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:53:03.409795  543705 memory.go:184] no items to output this cycle
I0322 06:53:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 06:53:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:53:13.409783  543705 memory.go:191] Add success.
W0322 06:53:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:53:13.409815  543705 cpu.go:282] Add success.
W0322 06:53:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:53:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:53:13.419720  543705 net.go:648] Add success.
I0322 06:53:13.422465  543705 net.go:770] primary dev: ETH0
I0322 06:53:13.422478  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:53:13.422500  543705 net.go:698] Add success.
I0322 06:53:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:53:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:53:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 06:53:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:53:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 06:53:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:53:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:53:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:53:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:53:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:53:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:53:23.409760  543705 memory.go:184] no items to output this cycle
I0322 06:53:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 06:53:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:53:33.409782  543705 memory.go:184] no items to output this cycle
I0322 06:53:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 06:53:33.558729  543705 disk_info.go:125] begin check local disk info of client
I0322 06:53:33.561216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:53:33.561221  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bc80 0xc00007bcc0]
E0322 06:53:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:53:43.410718  543705 memory.go:191] Add success.
I0322 06:53:43.409796  543705 cpu.go:282] Add success.
I0322 06:53:43.420418  543705 net.go:648] Add success.
I0322 06:53:43.423335  543705 net.go:770] primary dev: ETH0
I0322 06:53:43.423350  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:53:43.423364  543705 net.go:698] Add success.
I0322 06:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:53:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:53:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:53:53.409784  543705 memory.go:184] no items to output this cycle
I0322 06:53:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 06:54:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:54:03.409818  543705 memory.go:184] no items to output this cycle
I0322 06:54:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 06:54:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:54:13.409772  543705 memory.go:191] Add success.
W0322 06:54:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:54:13.409803  543705 cpu.go:282] Add success.
W0322 06:54:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:54:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:54:13.419722  543705 net.go:648] Add success.
I0322 06:54:13.422656  543705 net.go:770] primary dev: ETH0
I0322 06:54:13.422671  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:54:13.422687  543705 net.go:698] Add success.
I0322 06:54:13.463422  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c85df35e-9c90-42ad-9094-ef3ac1eb42f2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:54:13.463453  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 06:54:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:54:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:54:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 06:54:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:54:14.456646  543705 disk_worker.go:494] system disk:vda1
I0322 06:54:14.456675  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:54:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:54:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:54:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:54:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:54:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:54:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:54:23.409797  543705 memory.go:184] no items to output this cycle
I0322 06:54:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 06:54:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:54:33.409801  543705 memory.go:184] no items to output this cycle
I0322 06:54:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 06:54:33.561674  543705 disk_info.go:125] begin check local disk info of client
I0322 06:54:33.564173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:54:33.564178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000485d40 0xc000485d80]
I0322 06:54:39.541458  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:54:39.541465  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:54:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:54:43.410678  543705 memory.go:191] Add success.
I0322 06:54:43.409800  543705 cpu.go:282] Add success.
I0322 06:54:43.420485  543705 net.go:648] Add success.
I0322 06:54:43.423088  543705 net.go:770] primary dev: ETH0
I0322 06:54:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:54:43.423119  543705 net.go:698] Add success.
I0322 06:54:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:54:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:54:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:54:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:54:53.409780  543705 cpu.go:275] no items to output this cycle
I0322 06:54:53.409792  543705 memory.go:184] no items to output this cycle
E0322 06:55:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:55:03.409781  543705 memory.go:184] no items to output this cycle
I0322 06:55:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 06:55:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:55:13.409774  543705 memory.go:191] Add success.
W0322 06:55:13.409799  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 06:55:13.409804  543705 cpu.go:282] Add success.
W0322 06:55:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:55:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:55:13.420325  543705 net.go:648] Add success.
I0322 06:55:13.423014  543705 net.go:770] primary dev: ETH0
I0322 06:55:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:55:13.423042  543705 net.go:698] Add success.
I0322 06:55:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:55:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:55:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 06:55:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:55:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 06:55:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:55:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:55:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:55:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:55:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:55:16.472379  543705 disk_local_worker.go:436] Get disk info: []
I0322 06:55:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 06:55:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:55:23.409813  543705 memory.go:184] no items to output this cycle
E0322 06:55:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:55:33.409771  543705 memory.go:184] no items to output this cycle
I0322 06:55:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 06:55:33.565004  543705 disk_info.go:125] begin check local disk info of client
I0322 06:55:33.567515  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:55:33.567520  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b0c0 0xc00007b100]
E0322 06:55:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:55:43.410832  543705 memory.go:191] Add success.
I0322 06:55:43.409819  543705 cpu.go:282] Add success.
I0322 06:55:43.420521  543705 net.go:648] Add success.
I0322 06:55:43.423883  543705 net.go:770] primary dev: ETH0
I0322 06:55:43.423897  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:55:43.423909  543705 net.go:698] Add success.
I0322 06:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:55:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:55:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:55:53.409811  543705 memory.go:184] no items to output this cycle
I0322 06:55:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 06:56:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:56:03.409791  543705 memory.go:184] no items to output this cycle
I0322 06:56:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 06:56:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:56:13.409794  543705 memory.go:191] Add success.
I0322 06:56:13.409794  543705 cpu.go:282] Add success.
W0322 06:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:56:13.419766  543705 net.go:648] Add success.
I0322 06:56:13.422356  543705 net.go:770] primary dev: ETH0
I0322 06:56:13.422371  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:56:13.422384  543705 net.go:698] Add success.
I0322 06:56:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:56:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:56:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 06:56:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:56:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 06:56:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:56:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:56:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:56:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:56:23.409787  543705 memory.go:184] no items to output this cycle
I0322 06:56:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 06:56:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:56:33.409804  543705 memory.go:184] no items to output this cycle
I0322 06:56:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 06:56:33.567965  543705 disk_info.go:125] begin check local disk info of client
I0322 06:56:33.570493  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:56:33.570498  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5180 0xc0000c51c0]
E0322 06:56:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:56:43.410681  543705 memory.go:191] Add success.
I0322 06:56:43.409808  543705 cpu.go:282] Add success.
I0322 06:56:43.420435  543705 net.go:648] Add success.
I0322 06:56:43.423159  543705 net.go:770] primary dev: ETH0
I0322 06:56:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:56:43.423184  543705 net.go:698] Add success.
I0322 06:56:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:56:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:56:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:56:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:56:53.409792  543705 memory.go:184] no items to output this cycle
I0322 06:56:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 06:57:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:57:03.409788  543705 memory.go:184] no items to output this cycle
I0322 06:57:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 06:57:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:57:13.409819  543705 memory.go:191] Add success.
I0322 06:57:13.409828  543705 cpu.go:282] Add success.
W0322 06:57:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:57:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:57:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:57:13.420150  543705 net.go:648] Add success.
I0322 06:57:13.429057  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 06:57:13.429131  543705 net.go:770] primary dev: ETH0
I0322 06:57:13.429143  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:57:13.429155  543705 net.go:698] Add success.
I0322 06:57:13.452907  543705 event_worker.go:152] Polling the log file for events...
I0322 06:57:13.468840  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"93a64b33-8c28-4624-a4d0-e1a62e2e22fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 06:57:13.468874  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 06:57:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:57:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 06:57:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0322 06:57:14.455857  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 06:57:14.455866  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 06:57:14.455870  543705 custom_config.go:64] query custom config with name: gpu
I0322 06:57:14.456631  543705 disk_worker.go:494] system disk:vda1
I0322 06:57:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 06:57:15.456865  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 06:57:15.456873  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:57:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 06:57:16.457993  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 06:57:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:57:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:57:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:57:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:57:23.409784  543705 memory.go:184] no items to output this cycle
I0322 06:57:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 06:57:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:57:33.409776  543705 memory.go:184] no items to output this cycle
I0322 06:57:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 06:57:33.570577  543705 disk_info.go:125] begin check local disk info of client
I0322 06:57:33.573156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:57:33.573161  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307540 0xc000307580]
I0322 06:57:39.542450  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 06:57:39.542457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 06:57:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:57:43.410701  543705 memory.go:191] Add success.
I0322 06:57:43.409796  543705 cpu.go:282] Add success.
I0322 06:57:43.420378  543705 net.go:648] Add success.
I0322 06:57:43.423291  543705 net.go:770] primary dev: ETH0
I0322 06:57:43.423304  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:57:43.423319  543705 net.go:698] Add success.
I0322 06:57:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:57:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:57:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:57:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:57:53.409794  543705 memory.go:184] no items to output this cycle
I0322 06:57:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 06:58:03.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:58:03.409925  543705 memory.go:184] no items to output this cycle
I0322 06:58:03.409942  543705 cpu.go:275] no items to output this cycle
E0322 06:58:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:58:13.409782  543705 memory.go:191] Add success.
I0322 06:58:13.409812  543705 cpu.go:282] Add success.
W0322 06:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:58:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:58:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:58:13.420344  543705 net.go:648] Add success.
I0322 06:58:13.423070  543705 net.go:770] primary dev: ETH0
I0322 06:58:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:58:13.423096  543705 net.go:698] Add success.
I0322 06:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:58:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:58:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 06:58:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:58:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 06:58:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:58:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:58:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:58:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:58:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:58:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:58:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:58:23.409776  543705 memory.go:184] no items to output this cycle
I0322 06:58:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 06:58:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:58:33.409776  543705 memory.go:184] no items to output this cycle
I0322 06:58:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 06:58:33.574050  543705 disk_info.go:125] begin check local disk info of client
I0322 06:58:33.576547  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:58:33.576552  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5e40 0xc0000c5e80]
E0322 06:58:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:58:43.411243  543705 memory.go:191] Add success.
I0322 06:58:43.409819  543705 cpu.go:282] Add success.
I0322 06:58:43.419878  543705 net.go:648] Add success.
I0322 06:58:43.422986  543705 net.go:770] primary dev: ETH0
I0322 06:58:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:58:43.423015  543705 net.go:698] Add success.
I0322 06:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:58:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:58:53.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:58:53.409869  543705 cpu.go:275] no items to output this cycle
I0322 06:58:53.409871  543705 memory.go:184] no items to output this cycle
E0322 06:59:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:59:03.409801  543705 cpu.go:275] no items to output this cycle
I0322 06:59:03.409803  543705 memory.go:184] no items to output this cycle
E0322 06:59:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:59:13.409789  543705 memory.go:191] Add success.
I0322 06:59:13.409795  543705 cpu.go:282] Add success.
W0322 06:59:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 06:59:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 06:59:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 06:59:13.420159  543705 net.go:648] Add success.
I0322 06:59:13.422989  543705 net.go:770] primary dev: ETH0
I0322 06:59:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:59:13.423017  543705 net.go:698] Add success.
I0322 06:59:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 06:59:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 06:59:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 06:59:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 06:59:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 06:59:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 06:59:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 06:59:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:59:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:59:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 06:59:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 06:59:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:59:23.409779  543705 memory.go:184] no items to output this cycle
I0322 06:59:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 06:59:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:59:33.409790  543705 memory.go:184] no items to output this cycle
I0322 06:59:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 06:59:33.577024  543705 disk_info.go:125] begin check local disk info of client
I0322 06:59:33.579524  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 06:59:33.579529  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f380 0xc00032f3c0]
E0322 06:59:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:59:43.410715  543705 memory.go:191] Add success.
I0322 06:59:43.409831  543705 cpu.go:282] Add success.
I0322 06:59:43.420454  543705 net.go:648] Add success.
I0322 06:59:43.423110  543705 net.go:770] primary dev: ETH0
I0322 06:59:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0322 06:59:43.423135  543705 net.go:698] Add success.
I0322 06:59:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 06:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 06:59:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 06:59:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 06:59:53.409921  543705 memory.go:184] no items to output this cycle
I0322 06:59:53.409959  543705 cpu.go:275] no items to output this cycle
E0322 07:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:00:03.409787  543705 memory.go:184] no items to output this cycle
I0322 07:00:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 07:00:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:00:13.409811  543705 memory.go:191] Add success.
I0322 07:00:13.409819  543705 cpu.go:282] Add success.
W0322 07:00:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:00:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:00:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:00:13.420190  543705 net.go:648] Add success.
I0322 07:00:13.422820  543705 net.go:770] primary dev: ETH0
I0322 07:00:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:00:13.422845  543705 net.go:698] Add success.
I0322 07:00:14.245745  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b7d96f1d-75d7-4743-a184-8b4071387b86","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:00:14.245781  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:00:14.454686  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:00:14.454837  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:00:14.454899  543705 disk_worker.go:708] disk space is not compliant
W0322 07:00:14.454901  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:00:14.456240  543705 disk_worker.go:494] system disk:vda1
I0322 07:00:14.456295  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:00:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:00:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:00:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:00:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:00:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:00:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:00:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 07:00:23.409791  543705 memory.go:184] no items to output this cycle
E0322 07:00:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:00:33.409771  543705 memory.go:184] no items to output this cycle
I0322 07:00:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 07:00:33.580040  543705 disk_info.go:125] begin check local disk info of client
I0322 07:00:33.582557  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:00:33.582563  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046ab40 0xc00046ab80]
I0322 07:00:39.543475  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:00:39.543483  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:00:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:00:43.410798  543705 memory.go:191] Add success.
I0322 07:00:43.409840  543705 cpu.go:282] Add success.
I0322 07:00:43.420508  543705 net.go:648] Add success.
I0322 07:00:43.423588  543705 net.go:770] primary dev: ETH0
I0322 07:00:43.423603  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:00:43.423620  543705 net.go:698] Add success.
I0322 07:00:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:00:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:00:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:00:53.409905  543705 cpu.go:275] no items to output this cycle
E0322 07:00:53.409905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:00:53.409934  543705 memory.go:184] no items to output this cycle
E0322 07:01:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:01:03.409797  543705 memory.go:184] no items to output this cycle
I0322 07:01:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 07:01:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:01:13.409802  543705 memory.go:191] Add success.
I0322 07:01:13.409803  543705 cpu.go:282] Add success.
W0322 07:01:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:01:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:01:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:01:13.420153  543705 net.go:648] Add success.
I0322 07:01:13.423210  543705 net.go:770] primary dev: ETH0
I0322 07:01:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:01:13.423236  543705 net.go:698] Add success.
I0322 07:01:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:01:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:01:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 07:01:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:01:14.456555  543705 disk_worker.go:494] system disk:vda1
I0322 07:01:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:01:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:01:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:01:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:01:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:01:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:01:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:01:23.409785  543705 memory.go:184] no items to output this cycle
I0322 07:01:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 07:01:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:01:33.409795  543705 memory.go:184] no items to output this cycle
I0322 07:01:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 07:01:33.583042  543705 disk_info.go:125] begin check local disk info of client
I0322 07:01:33.585545  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:01:33.585550  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278d00 0xc000278d40]
E0322 07:01:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:01:43.410662  543705 memory.go:191] Add success.
I0322 07:01:43.409817  543705 cpu.go:282] Add success.
I0322 07:01:43.420363  543705 net.go:648] Add success.
I0322 07:01:43.423157  543705 net.go:770] primary dev: ETH0
I0322 07:01:43.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:01:43.423186  543705 net.go:698] Add success.
I0322 07:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:01:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:01:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:01:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:01:53.409808  543705 memory.go:184] no items to output this cycle
I0322 07:01:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 07:02:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:02:03.409781  543705 memory.go:184] no items to output this cycle
I0322 07:02:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 07:02:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:02:13.409811  543705 memory.go:191] Add success.
I0322 07:02:13.409818  543705 cpu.go:282] Add success.
W0322 07:02:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:02:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:02:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:02:13.420165  543705 net.go:648] Add success.
I0322 07:02:13.422823  543705 net.go:770] primary dev: ETH0
I0322 07:02:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:02:13.422851  543705 net.go:698] Add success.
W0322 07:02:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:02:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 07:02:14.455215  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:02:14.456163  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:02:14.456172  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:02:14.456178  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:02:14.457071  543705 disk_worker.go:494] system disk:vda1
I0322 07:02:14.457101  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:02:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:02:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:02:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:02:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:02:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:02:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:02:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:02:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:02:23.409786  543705 memory.go:184] no items to output this cycle
I0322 07:02:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 07:02:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:02:33.409781  543705 memory.go:184] no items to output this cycle
I0322 07:02:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 07:02:33.585668  543705 disk_info.go:125] begin check local disk info of client
I0322 07:02:33.588172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:02:33.588177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003687c0 0xc000368800]
E0322 07:02:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:02:43.410668  543705 memory.go:191] Add success.
I0322 07:02:43.409797  543705 cpu.go:282] Add success.
I0322 07:02:43.420371  543705 net.go:648] Add success.
I0322 07:02:43.423087  543705 net.go:770] primary dev: ETH0
I0322 07:02:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:02:43.423117  543705 net.go:698] Add success.
I0322 07:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:02:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:02:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:02:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:02:53.409803  543705 memory.go:184] no items to output this cycle
I0322 07:02:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 07:03:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:03:03.409793  543705 memory.go:184] no items to output this cycle
I0322 07:03:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 07:03:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:03:13.409813  543705 memory.go:191] Add success.
I0322 07:03:13.409825  543705 cpu.go:282] Add success.
W0322 07:03:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:03:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:03:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:03:13.420162  543705 net.go:648] Add success.
I0322 07:03:13.422847  543705 net.go:770] primary dev: ETH0
I0322 07:03:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:03:13.422874  543705 net.go:698] Add success.
I0322 07:03:13.468557  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07f8104f-e7c3-4861-add6-c6c4e2817d6c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:03:13.468590  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:03:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:03:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:03:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 07:03:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:03:14.456615  543705 disk_worker.go:494] system disk:vda1
I0322 07:03:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:03:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:03:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:03:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:03:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:03:23.409774  543705 memory.go:184] no items to output this cycle
I0322 07:03:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 07:03:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:03:33.409802  543705 memory.go:184] no items to output this cycle
I0322 07:03:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 07:03:33.589106  543705 disk_info.go:125] begin check local disk info of client
I0322 07:03:33.591689  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:03:33.591695  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b840 0xc00047b880]
I0322 07:03:39.544470  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:03:39.544477  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:03:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:03:43.410599  543705 memory.go:191] Add success.
I0322 07:03:43.409802  543705 cpu.go:282] Add success.
I0322 07:03:43.420342  543705 net.go:648] Add success.
I0322 07:03:43.422937  543705 net.go:770] primary dev: ETH0
I0322 07:03:43.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:03:43.422972  543705 net.go:698] Add success.
I0322 07:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:03:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:03:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:03:53.409801  543705 memory.go:184] no items to output this cycle
I0322 07:03:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 07:04:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:04:03.409795  543705 memory.go:184] no items to output this cycle
I0322 07:04:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 07:04:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:04:13.409823  543705 memory.go:191] Add success.
I0322 07:04:13.409824  543705 cpu.go:282] Add success.
W0322 07:04:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:04:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:04:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:04:13.420242  543705 net.go:648] Add success.
I0322 07:04:13.423118  543705 net.go:770] primary dev: ETH0
I0322 07:04:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:04:13.423143  543705 net.go:698] Add success.
I0322 07:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:04:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:04:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 07:04:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:04:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 07:04:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:04:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:04:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:04:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:04:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:04:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:04:23.409779  543705 memory.go:184] no items to output this cycle
I0322 07:04:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 07:04:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:04:33.409789  543705 memory.go:184] no items to output this cycle
I0322 07:04:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 07:04:33.592066  543705 disk_info.go:125] begin check local disk info of client
I0322 07:04:33.594595  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:04:33.594602  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a780 0xc00034a7c0]
E0322 07:04:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:04:43.410876  543705 memory.go:191] Add success.
I0322 07:04:43.409794  543705 cpu.go:282] Add success.
I0322 07:04:43.420596  543705 net.go:648] Add success.
I0322 07:04:43.423623  543705 net.go:770] primary dev: ETH0
I0322 07:04:43.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:04:43.423652  543705 net.go:698] Add success.
I0322 07:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:04:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:04:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:04:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:04:53.409810  543705 memory.go:184] no items to output this cycle
I0322 07:04:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 07:05:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:05:03.409799  543705 memory.go:184] no items to output this cycle
I0322 07:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 07:05:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:05:13.409795  543705 memory.go:191] Add success.
I0322 07:05:13.409795  543705 cpu.go:282] Add success.
W0322 07:05:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:05:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:05:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:05:13.420227  543705 net.go:648] Add success.
I0322 07:05:13.423111  543705 net.go:770] primary dev: ETH0
I0322 07:05:13.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:05:13.423142  543705 net.go:698] Add success.
I0322 07:05:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:05:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:05:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 07:05:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:05:14.456535  543705 disk_worker.go:494] system disk:vda1
I0322 07:05:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:05:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:05:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:05:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:05:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:05:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:05:23.409798  543705 memory.go:184] no items to output this cycle
I0322 07:05:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 07:05:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:05:33.409797  543705 memory.go:184] no items to output this cycle
I0322 07:05:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 07:05:33.594680  543705 disk_info.go:125] begin check local disk info of client
I0322 07:05:33.597171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:05:33.597176  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049ba80 0xc00049bac0]
E0322 07:05:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:05:43.410963  543705 memory.go:191] Add success.
I0322 07:05:43.409791  543705 cpu.go:282] Add success.
I0322 07:05:43.420654  543705 net.go:648] Add success.
I0322 07:05:43.423631  543705 net.go:770] primary dev: ETH0
I0322 07:05:43.423645  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:05:43.423657  543705 net.go:698] Add success.
I0322 07:05:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:05:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:05:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:05:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:05:53.409818  543705 memory.go:184] no items to output this cycle
I0322 07:05:53.409825  543705 cpu.go:275] no items to output this cycle
E0322 07:06:03.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:06:03.409923  543705 memory.go:184] no items to output this cycle
I0322 07:06:03.409963  543705 cpu.go:275] no items to output this cycle
E0322 07:06:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:06:13.409787  543705 memory.go:191] Add success.
W0322 07:06:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 07:06:13.409820  543705 cpu.go:282] Add success.
W0322 07:06:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:06:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:06:13.420234  543705 net.go:648] Add success.
I0322 07:06:13.423032  543705 net.go:770] primary dev: ETH0
I0322 07:06:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:06:13.423063  543705 net.go:698] Add success.
I0322 07:06:13.464791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fba1ba59-0c78-4341-82a2-d7fd60e7cc08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:06:13.464824  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:06:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:06:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:06:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 07:06:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:06:14.456643  543705 disk_worker.go:494] system disk:vda1
I0322 07:06:14.456674  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:06:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:06:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:06:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:06:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:06:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:06:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 07:06:23.409784  543705 memory.go:184] no items to output this cycle
E0322 07:06:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:06:33.409787  543705 memory.go:184] no items to output this cycle
I0322 07:06:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 07:06:33.598205  543705 disk_info.go:125] begin check local disk info of client
I0322 07:06:33.600675  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:06:33.600680  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c5c0 0xc00057c600]
I0322 07:06:39.545481  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:06:39.545487  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:06:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:06:43.410790  543705 memory.go:191] Add success.
I0322 07:06:43.409797  543705 cpu.go:282] Add success.
I0322 07:06:43.420478  543705 net.go:648] Add success.
I0322 07:06:43.423763  543705 net.go:770] primary dev: ETH0
I0322 07:06:43.423776  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:06:43.423789  543705 net.go:698] Add success.
I0322 07:06:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:06:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:06:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:06:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:06:53.409810  543705 memory.go:184] no items to output this cycle
I0322 07:06:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 07:07:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:07:03.409789  543705 memory.go:184] no items to output this cycle
I0322 07:07:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 07:07:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:07:13.409789  543705 memory.go:191] Add success.
I0322 07:07:13.409817  543705 cpu.go:282] Add success.
W0322 07:07:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:07:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:07:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:07:13.420584  543705 net.go:648] Add success.
I0322 07:07:13.423612  543705 net.go:770] primary dev: ETH0
I0322 07:07:13.423625  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:07:13.423638  543705 net.go:698] Add success.
I0322 07:07:13.453191  543705 event_worker.go:152] Polling the log file for events...
W0322 07:07:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:07:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 07:07:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:07:14.456784  543705 disk_worker.go:494] system disk:vda1
I0322 07:07:14.456824  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:07:14.457214  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:07:14.457222  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:07:14.457227  543705 custom_config.go:64] query custom config with name: gpu
E0322 07:07:15.457043  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:07:15.457057  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:07:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:07:16.457996  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:07:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:07:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:07:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:07:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:07:23.409775  543705 memory.go:184] no items to output this cycle
I0322 07:07:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 07:07:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:07:33.409772  543705 memory.go:184] no items to output this cycle
I0322 07:07:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 07:07:33.600758  543705 disk_info.go:125] begin check local disk info of client
I0322 07:07:33.603300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:07:33.603306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003abb00 0xc0003abb40]
E0322 07:07:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:07:43.410593  543705 memory.go:191] Add success.
I0322 07:07:43.409815  543705 cpu.go:282] Add success.
I0322 07:07:43.420309  543705 net.go:648] Add success.
I0322 07:07:43.423058  543705 net.go:770] primary dev: ETH0
I0322 07:07:43.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:07:43.423082  543705 net.go:698] Add success.
I0322 07:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:07:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:07:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:07:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:07:53.409779  543705 memory.go:184] no items to output this cycle
I0322 07:07:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 07:08:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:08:03.409781  543705 memory.go:184] no items to output this cycle
I0322 07:08:03.409832  543705 cpu.go:275] no items to output this cycle
E0322 07:08:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:08:13.409799  543705 memory.go:191] Add success.
I0322 07:08:13.409800  543705 cpu.go:282] Add success.
W0322 07:08:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:08:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:08:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:08:13.420154  543705 net.go:648] Add success.
I0322 07:08:13.423414  543705 net.go:770] primary dev: ETH0
I0322 07:08:13.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:08:13.423440  543705 net.go:698] Add success.
I0322 07:08:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:08:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:08:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 07:08:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:08:14.456498  543705 disk_worker.go:494] system disk:vda1
I0322 07:08:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:08:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:08:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:08:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:08:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:08:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:08:23.409783  543705 memory.go:184] no items to output this cycle
I0322 07:08:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 07:08:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:08:33.409776  543705 memory.go:184] no items to output this cycle
I0322 07:08:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 07:08:33.604154  543705 disk_info.go:125] begin check local disk info of client
I0322 07:08:33.606870  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:08:33.606875  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2180 0xc0003b21c0]
E0322 07:08:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:08:43.410639  543705 memory.go:191] Add success.
I0322 07:08:43.409799  543705 cpu.go:282] Add success.
I0322 07:08:43.420338  543705 net.go:648] Add success.
I0322 07:08:43.423086  543705 net.go:770] primary dev: ETH0
I0322 07:08:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:08:43.423117  543705 net.go:698] Add success.
I0322 07:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:08:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:08:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:08:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:08:53.409770  543705 memory.go:184] no items to output this cycle
I0322 07:08:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 07:09:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:09:03.409788  543705 memory.go:184] no items to output this cycle
I0322 07:09:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 07:09:13.409932  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:09:13.409954  543705 cpu.go:282] Add success.
I0322 07:09:13.409957  543705 memory.go:191] Add success.
W0322 07:09:13.410167  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:09:13.410188  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:09:13.410193  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:09:13.419746  543705 net.go:648] Add success.
I0322 07:09:13.422449  543705 net.go:770] primary dev: ETH0
I0322 07:09:13.422464  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:09:13.422477  543705 net.go:698] Add success.
I0322 07:09:13.468495  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"962a81b5-8ef8-4de3-9af5-6de95f2e5eab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:09:13.468527  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:09:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:09:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:09:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 07:09:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:09:14.456633  543705 disk_worker.go:494] system disk:vda1
I0322 07:09:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:09:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:09:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:09:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:09:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:09:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:09:23.409765  543705 memory.go:184] no items to output this cycle
I0322 07:09:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 07:09:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:09:33.409768  543705 memory.go:184] no items to output this cycle
I0322 07:09:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 07:09:33.607181  543705 disk_info.go:125] begin check local disk info of client
I0322 07:09:33.609685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:09:33.609690  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003eea00 0xc0003eea40]
I0322 07:09:39.546477  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:09:39.546484  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:09:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:09:43.410798  543705 memory.go:191] Add success.
I0322 07:09:43.409803  543705 cpu.go:282] Add success.
I0322 07:09:43.420502  543705 net.go:648] Add success.
I0322 07:09:43.423348  543705 net.go:770] primary dev: ETH0
I0322 07:09:43.423363  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:09:43.423377  543705 net.go:698] Add success.
I0322 07:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:09:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:09:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:09:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:09:53.409787  543705 memory.go:184] no items to output this cycle
I0322 07:09:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 07:10:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:10:03.409788  543705 memory.go:184] no items to output this cycle
I0322 07:10:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 07:10:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:10:13.409794  543705 memory.go:191] Add success.
W0322 07:10:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:10:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:10:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:10:13.409840  543705 cpu.go:282] Add success.
I0322 07:10:13.420062  543705 net.go:648] Add success.
I0322 07:10:13.422812  543705 net.go:770] primary dev: ETH0
I0322 07:10:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:10:13.422841  543705 net.go:698] Add success.
I0322 07:10:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:10:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:10:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 07:10:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:10:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 07:10:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:10:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:10:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:10:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:10:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:10:23.409781  543705 memory.go:184] no items to output this cycle
I0322 07:10:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:10:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:10:33.409768  543705 memory.go:184] no items to output this cycle
I0322 07:10:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 07:10:33.610726  543705 disk_info.go:125] begin check local disk info of client
I0322 07:10:33.613238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:10:33.613243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9a80 0xc0002b9ac0]
E0322 07:10:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:10:43.410687  543705 memory.go:191] Add success.
I0322 07:10:43.409816  543705 cpu.go:282] Add success.
I0322 07:10:43.420374  543705 net.go:648] Add success.
I0322 07:10:43.422831  543705 net.go:770] primary dev: ETH0
I0322 07:10:43.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:10:43.422858  543705 net.go:698] Add success.
I0322 07:10:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:10:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:10:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:10:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:10:53.409788  543705 cpu.go:275] no items to output this cycle
I0322 07:10:53.409799  543705 memory.go:184] no items to output this cycle
E0322 07:11:03.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:11:03.409875  543705 memory.go:184] no items to output this cycle
I0322 07:11:03.410000  543705 cpu.go:275] no items to output this cycle
E0322 07:11:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:11:13.409801  543705 memory.go:191] Add success.
I0322 07:11:13.409818  543705 cpu.go:282] Add success.
W0322 07:11:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:11:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:11:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:11:13.420113  543705 net.go:648] Add success.
I0322 07:11:13.422876  543705 net.go:770] primary dev: ETH0
I0322 07:11:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:11:13.422901  543705 net.go:698] Add success.
I0322 07:11:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:11:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:11:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 07:11:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:11:14.456498  543705 disk_worker.go:494] system disk:vda1
I0322 07:11:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:11:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:11:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:11:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:11:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:11:23.409793  543705 memory.go:184] no items to output this cycle
I0322 07:11:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 07:11:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:11:33.409797  543705 memory.go:184] no items to output this cycle
I0322 07:11:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 07:11:33.613666  543705 disk_info.go:125] begin check local disk info of client
I0322 07:11:33.616151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:11:33.616156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368740 0xc000368780]
E0322 07:11:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:11:43.410655  543705 memory.go:191] Add success.
I0322 07:11:43.409823  543705 cpu.go:282] Add success.
I0322 07:11:43.420348  543705 net.go:648] Add success.
I0322 07:11:43.423119  543705 net.go:770] primary dev: ETH0
I0322 07:11:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:11:43.423149  543705 net.go:698] Add success.
I0322 07:11:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:11:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:11:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:11:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:11:53.409786  543705 memory.go:184] no items to output this cycle
I0322 07:11:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 07:12:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:12:03.409779  543705 memory.go:184] no items to output this cycle
I0322 07:12:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 07:12:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:12:13.409788  543705 memory.go:191] Add success.
W0322 07:12:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 07:12:13.409817  543705 cpu.go:282] Add success.
W0322 07:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:12:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:12:13.420179  543705 net.go:648] Add success.
I0322 07:12:13.422894  543705 net.go:770] primary dev: ETH0
I0322 07:12:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:12:13.422919  543705 net.go:698] Add success.
I0322 07:12:13.537610  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c4293880-e3b8-4dfb-8258-eeeb88c0a516","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:12:13.537642  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 07:12:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:12:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 07:12:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:12:14.456853  543705 disk_worker.go:494] system disk:vda1
E0322 07:12:14.456856  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:12:14.456864  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:12:14.456881  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:12:14.456895  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:12:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:12:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:12:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:12:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:12:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:12:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:12:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:12:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:12:23.409805  543705 memory.go:184] no items to output this cycle
I0322 07:12:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 07:12:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:12:33.409780  543705 memory.go:184] no items to output this cycle
I0322 07:12:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 07:12:33.616190  543705 disk_info.go:125] begin check local disk info of client
I0322 07:12:33.618759  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:12:33.618764  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0380 0xc0003c03c0]
I0322 07:12:39.547492  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:12:39.547499  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:12:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:12:43.410599  543705 memory.go:191] Add success.
I0322 07:12:43.409798  543705 cpu.go:282] Add success.
I0322 07:12:43.420289  543705 net.go:648] Add success.
I0322 07:12:43.422983  543705 net.go:770] primary dev: ETH0
I0322 07:12:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:12:43.423007  543705 net.go:698] Add success.
I0322 07:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:12:53.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:12:53.409913  543705 memory.go:184] no items to output this cycle
I0322 07:12:53.410159  543705 cpu.go:275] no items to output this cycle
E0322 07:13:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:13:03.409778  543705 memory.go:184] no items to output this cycle
I0322 07:13:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 07:13:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:13:13.409813  543705 memory.go:191] Add success.
I0322 07:13:13.409818  543705 cpu.go:282] Add success.
W0322 07:13:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:13:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:13:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:13:13.420100  543705 net.go:648] Add success.
I0322 07:13:13.422684  543705 net.go:770] primary dev: ETH0
I0322 07:13:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:13:13.422712  543705 net.go:698] Add success.
I0322 07:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:13:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:13:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 07:13:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:13:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 07:13:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:13:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:13:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:13:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:13:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:13:23.409821  543705 memory.go:184] no items to output this cycle
I0322 07:13:23.409843  543705 cpu.go:275] no items to output this cycle
E0322 07:13:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:13:33.409784  543705 memory.go:184] no items to output this cycle
I0322 07:13:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 07:13:33.618842  543705 disk_info.go:125] begin check local disk info of client
I0322 07:13:33.621330  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:13:33.621335  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374980 0xc0003749c0]
E0322 07:13:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:13:43.410657  543705 memory.go:191] Add success.
I0322 07:13:43.409818  543705 cpu.go:282] Add success.
I0322 07:13:43.420412  543705 net.go:648] Add success.
I0322 07:13:43.423078  543705 net.go:770] primary dev: ETH0
I0322 07:13:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:13:43.423109  543705 net.go:698] Add success.
I0322 07:13:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:13:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:13:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:13:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:13:53.409776  543705 memory.go:184] no items to output this cycle
I0322 07:13:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 07:14:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:14:03.409803  543705 memory.go:184] no items to output this cycle
I0322 07:14:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 07:14:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:14:13.409822  543705 memory.go:191] Add success.
I0322 07:14:13.409825  543705 cpu.go:282] Add success.
W0322 07:14:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:14:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:14:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:14:13.420167  543705 net.go:648] Add success.
I0322 07:14:13.422846  543705 net.go:770] primary dev: ETH0
I0322 07:14:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:14:13.422875  543705 net.go:698] Add success.
I0322 07:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:14:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:14:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 07:14:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:14:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 07:14:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:14:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:14:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:14:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:14:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:14:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:14:23.409780  543705 memory.go:184] no items to output this cycle
I0322 07:14:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 07:14:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:14:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 07:14:33.409798  543705 memory.go:184] no items to output this cycle
I0322 07:14:33.621671  543705 disk_info.go:125] begin check local disk info of client
I0322 07:14:33.624202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:14:33.624208  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275240 0xc000275280]
E0322 07:14:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:14:43.410669  543705 memory.go:191] Add success.
I0322 07:14:43.409833  543705 cpu.go:282] Add success.
I0322 07:14:43.420495  543705 net.go:648] Add success.
I0322 07:14:43.423417  543705 net.go:770] primary dev: ETH0
I0322 07:14:43.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:14:43.423444  543705 net.go:698] Add success.
I0322 07:14:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:14:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:14:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:14:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:14:53.409786  543705 memory.go:184] no items to output this cycle
I0322 07:14:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 07:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:15:03.409777  543705 memory.go:184] no items to output this cycle
I0322 07:15:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 07:15:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:15:13.409811  543705 memory.go:191] Add success.
I0322 07:15:13.409812  543705 cpu.go:282] Add success.
W0322 07:15:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:15:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:15:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:15:13.420158  543705 net.go:648] Add success.
I0322 07:15:13.422903  543705 net.go:770] primary dev: ETH0
I0322 07:15:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:15:13.422933  543705 net.go:698] Add success.
I0322 07:15:13.468982  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d686e697-71d7-46f7-b59c-5a3e5ec202fa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:15:13.469025  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:15:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:15:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 07:15:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:15:14.456672  543705 disk_worker.go:494] system disk:vda1
I0322 07:15:14.456703  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:15:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:15:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:15:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:15:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:15:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:15:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:15:23.409792  543705 memory.go:184] no items to output this cycle
I0322 07:15:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 07:15:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:15:33.409797  543705 memory.go:184] no items to output this cycle
I0322 07:15:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 07:15:33.625229  543705 disk_info.go:125] begin check local disk info of client
I0322 07:15:33.627750  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:15:33.627755  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b5300 0xc0002b5340]
I0322 07:15:39.548486  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:15:39.548492  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:15:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:15:43.410697  543705 memory.go:191] Add success.
I0322 07:15:43.409816  543705 cpu.go:282] Add success.
I0322 07:15:43.420378  543705 net.go:648] Add success.
I0322 07:15:43.423000  543705 net.go:770] primary dev: ETH0
I0322 07:15:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:15:43.423025  543705 net.go:698] Add success.
I0322 07:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:15:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:15:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:15:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:15:53.409788  543705 memory.go:184] no items to output this cycle
I0322 07:15:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 07:16:03.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:16:03.409931  543705 cpu.go:275] no items to output this cycle
I0322 07:16:03.409967  543705 memory.go:184] no items to output this cycle
E0322 07:16:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:16:13.409820  543705 memory.go:191] Add success.
I0322 07:16:13.409833  543705 cpu.go:282] Add success.
W0322 07:16:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:16:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:16:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:16:13.420134  543705 net.go:648] Add success.
I0322 07:16:13.422930  543705 net.go:770] primary dev: ETH0
I0322 07:16:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:16:13.422961  543705 net.go:698] Add success.
I0322 07:16:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:16:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:16:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 07:16:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:16:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 07:16:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:16:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:16:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:16:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:16:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:16:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:16:23.409767  543705 memory.go:184] no items to output this cycle
I0322 07:16:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 07:16:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:16:33.409785  543705 memory.go:184] no items to output this cycle
I0322 07:16:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 07:16:33.628251  543705 disk_info.go:125] begin check local disk info of client
I0322 07:16:33.630773  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:16:33.630778  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e700 0xc00039e740]
E0322 07:16:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:16:43.410692  543705 memory.go:191] Add success.
I0322 07:16:43.409818  543705 cpu.go:282] Add success.
I0322 07:16:43.420475  543705 net.go:648] Add success.
I0322 07:16:43.423063  543705 net.go:770] primary dev: ETH0
I0322 07:16:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:16:43.423088  543705 net.go:698] Add success.
I0322 07:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:16:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:16:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:16:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:16:53.409788  543705 memory.go:184] no items to output this cycle
I0322 07:16:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 07:17:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:17:03.409937  543705 memory.go:184] no items to output this cycle
I0322 07:17:03.409939  543705 cpu.go:275] no items to output this cycle
E0322 07:17:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:17:13.409783  543705 memory.go:191] Add success.
I0322 07:17:13.409799  543705 cpu.go:282] Add success.
W0322 07:17:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:17:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:17:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:17:13.420170  543705 net.go:648] Add success.
I0322 07:17:13.422851  543705 net.go:770] primary dev: ETH0
I0322 07:17:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:17:13.422880  543705 net.go:698] Add success.
I0322 07:17:13.453443  543705 event_worker.go:152] Polling the log file for events...
W0322 07:17:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:17:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 07:17:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:17:14.457116  543705 disk_worker.go:494] system disk:vda1
E0322 07:17:14.457150  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0322 07:17:14.457156  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:17:14.457157  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:17:14.457162  543705 custom_config.go:64] query custom config with name: gpu
E0322 07:17:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:17:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:17:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:17:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:17:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:17:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:17:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:17:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:17:23.409792  543705 memory.go:184] no items to output this cycle
I0322 07:17:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 07:17:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:17:33.409777  543705 memory.go:184] no items to output this cycle
I0322 07:17:33.409778  543705 cpu.go:275] no items to output this cycle
I0322 07:17:33.630855  543705 disk_info.go:125] begin check local disk info of client
I0322 07:17:33.633414  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:17:33.633420  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328200 0xc000328240]
E0322 07:17:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:17:43.410571  543705 memory.go:191] Add success.
I0322 07:17:43.409831  543705 cpu.go:282] Add success.
I0322 07:17:43.420323  543705 net.go:648] Add success.
I0322 07:17:43.423367  543705 net.go:770] primary dev: ETH0
I0322 07:17:43.423382  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:17:43.423396  543705 net.go:698] Add success.
I0322 07:17:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:17:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:17:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:17:53.409807  543705 memory.go:184] no items to output this cycle
I0322 07:17:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 07:18:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:18:03.409784  543705 memory.go:184] no items to output this cycle
I0322 07:18:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 07:18:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:18:13.409786  543705 memory.go:191] Add success.
I0322 07:18:13.409808  543705 cpu.go:282] Add success.
W0322 07:18:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:18:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:18:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:18:13.420119  543705 net.go:648] Add success.
I0322 07:18:13.423139  543705 net.go:770] primary dev: ETH0
I0322 07:18:13.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:18:13.423163  543705 net.go:698] Add success.
I0322 07:18:13.471120  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5de2fa8e-2bce-4695-8a66-9d968f45b202","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:18:13.471155  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:18:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:18:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:18:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 07:18:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:18:14.456623  543705 disk_worker.go:494] system disk:vda1
I0322 07:18:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:18:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:18:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:18:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:18:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:18:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:18:23.409770  543705 memory.go:184] no items to output this cycle
I0322 07:18:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 07:18:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:18:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 07:18:33.409784  543705 memory.go:184] no items to output this cycle
I0322 07:18:33.633670  543705 disk_info.go:125] begin check local disk info of client
I0322 07:18:33.636120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:18:33.636125  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5080 0xc0000c50c0]
I0322 07:18:39.549493  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:18:39.549499  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:18:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:18:43.410626  543705 memory.go:191] Add success.
I0322 07:18:43.409789  543705 cpu.go:282] Add success.
I0322 07:18:43.420319  543705 net.go:648] Add success.
I0322 07:18:43.423004  543705 net.go:770] primary dev: ETH0
I0322 07:18:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:18:43.423030  543705 net.go:698] Add success.
I0322 07:18:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:18:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:18:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:18:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:18:53.409804  543705 memory.go:184] no items to output this cycle
I0322 07:18:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 07:19:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:19:03.409790  543705 cpu.go:275] no items to output this cycle
I0322 07:19:03.409793  543705 memory.go:184] no items to output this cycle
E0322 07:19:13.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:19:13.409900  543705 memory.go:191] Add success.
I0322 07:19:13.409912  543705 cpu.go:282] Add success.
W0322 07:19:13.409937  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:19:13.409955  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:19:13.409964  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:19:13.419708  543705 net.go:648] Add success.
I0322 07:19:13.422305  543705 net.go:770] primary dev: ETH0
I0322 07:19:13.422318  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:19:13.422328  543705 net.go:698] Add success.
I0322 07:19:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:19:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:19:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 07:19:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:19:14.456525  543705 disk_worker.go:494] system disk:vda1
I0322 07:19:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:19:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:19:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:19:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:19:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:19:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:19:23.409779  543705 memory.go:184] no items to output this cycle
I0322 07:19:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 07:19:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:19:33.409775  543705 memory.go:184] no items to output this cycle
I0322 07:19:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 07:19:33.636207  543705 disk_info.go:125] begin check local disk info of client
I0322 07:19:33.638722  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:19:33.638727  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaec0 0xc0001aaf00]
E0322 07:19:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:19:43.410815  543705 memory.go:191] Add success.
I0322 07:19:43.409816  543705 cpu.go:282] Add success.
I0322 07:19:43.420497  543705 net.go:648] Add success.
I0322 07:19:43.423347  543705 net.go:770] primary dev: ETH0
I0322 07:19:43.423362  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:19:43.423376  543705 net.go:698] Add success.
I0322 07:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:19:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:19:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:19:53.409779  543705 memory.go:184] no items to output this cycle
I0322 07:19:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 07:20:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:20:03.409793  543705 memory.go:184] no items to output this cycle
I0322 07:20:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 07:20:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:20:13.409786  543705 memory.go:191] Add success.
I0322 07:20:13.409789  543705 cpu.go:282] Add success.
W0322 07:20:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:20:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:20:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:20:13.420363  543705 net.go:648] Add success.
I0322 07:20:13.423371  543705 net.go:770] primary dev: ETH0
I0322 07:20:13.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:20:13.423399  543705 net.go:698] Add success.
I0322 07:20:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:20:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:20:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 07:20:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:20:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 07:20:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:20:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:20:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:20:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:20:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:20:16.472478  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:20:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:20:23.409794  543705 memory.go:184] no items to output this cycle
I0322 07:20:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 07:20:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:20:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 07:20:33.409787  543705 memory.go:184] no items to output this cycle
I0322 07:20:33.639304  543705 disk_info.go:125] begin check local disk info of client
I0322 07:20:33.641959  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:20:33.641965  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368640 0xc000368680]
I0322 07:20:43.409800  543705 cpu.go:282] Add success.
E0322 07:20:43.409975  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:20:43.410656  543705 memory.go:191] Add success.
I0322 07:20:43.420171  543705 net.go:770] primary dev: ETH0
I0322 07:20:43.420185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:20:43.420200  543705 net.go:698] Add success.
I0322 07:20:43.420563  543705 net.go:648] Add success.
I0322 07:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:20:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:20:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:20:53.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:20:53.410382  543705 memory.go:184] no items to output this cycle
I0322 07:20:53.410384  543705 cpu.go:275] no items to output this cycle
E0322 07:21:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:21:03.409801  543705 memory.go:184] no items to output this cycle
I0322 07:21:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 07:21:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:21:13.409810  543705 memory.go:191] Add success.
I0322 07:21:13.409823  543705 cpu.go:282] Add success.
W0322 07:21:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:21:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:21:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:21:13.420168  543705 net.go:648] Add success.
I0322 07:21:13.423249  543705 net.go:770] primary dev: ETH0
I0322 07:21:13.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:21:13.423273  543705 net.go:698] Add success.
I0322 07:21:13.554218  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a2c0af7-a56f-47b3-8209-b7be48e9e0e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:21:13.554252  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:21:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:21:14.455320  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:21:14.455332  543705 disk_worker.go:708] disk space is not compliant
W0322 07:21:14.455336  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:21:14.457013  543705 disk_worker.go:494] system disk:vda1
I0322 07:21:14.457049  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:21:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:21:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:21:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:21:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:21:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:21:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:21:23.409795  543705 memory.go:184] no items to output this cycle
I0322 07:21:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 07:21:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:21:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 07:21:33.409791  543705 memory.go:184] no items to output this cycle
I0322 07:21:33.642046  543705 disk_info.go:125] begin check local disk info of client
I0322 07:21:33.644540  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:21:33.644545  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d97c0 0xc0004d9800]
I0322 07:21:39.550502  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:21:39.550508  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:21:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:21:43.410706  543705 memory.go:191] Add success.
I0322 07:21:43.409810  543705 cpu.go:282] Add success.
I0322 07:21:43.420390  543705 net.go:648] Add success.
I0322 07:21:43.423507  543705 net.go:770] primary dev: ETH0
I0322 07:21:43.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:21:43.423533  543705 net.go:698] Add success.
I0322 07:21:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:21:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:21:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:21:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:21:53.409809  543705 memory.go:184] no items to output this cycle
I0322 07:21:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 07:22:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:22:03.409783  543705 memory.go:184] no items to output this cycle
I0322 07:22:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:22:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:22:13.409808  543705 memory.go:191] Add success.
I0322 07:22:13.409815  543705 cpu.go:282] Add success.
W0322 07:22:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:22:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:22:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:22:13.420601  543705 net.go:648] Add success.
I0322 07:22:13.423770  543705 net.go:770] primary dev: ETH0
I0322 07:22:13.423785  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:22:13.423799  543705 net.go:698] Add success.
W0322 07:22:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:22:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 07:22:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:22:14.456673  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:22:14.456682  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:22:14.456689  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:22:14.457136  543705 disk_worker.go:494] system disk:vda1
I0322 07:22:14.457178  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:22:15.456874  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:22:15.456883  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:22:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:22:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:22:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:22:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:22:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:22:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:22:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 07:22:23.409785  543705 memory.go:184] no items to output this cycle
E0322 07:22:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:22:33.409795  543705 memory.go:184] no items to output this cycle
I0322 07:22:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 07:22:33.644624  543705 disk_info.go:125] begin check local disk info of client
I0322 07:22:33.647104  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:22:33.647109  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ef0c0 0xc0003ef140]
E0322 07:22:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:22:43.410739  543705 memory.go:191] Add success.
I0322 07:22:43.409803  543705 cpu.go:282] Add success.
I0322 07:22:43.420436  543705 net.go:648] Add success.
I0322 07:22:43.423241  543705 net.go:770] primary dev: ETH0
I0322 07:22:43.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:22:43.423267  543705 net.go:698] Add success.
I0322 07:22:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:22:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:22:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:22:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:22:53.409777  543705 memory.go:184] no items to output this cycle
I0322 07:22:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 07:23:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:23:03.409777  543705 memory.go:184] no items to output this cycle
I0322 07:23:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 07:23:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:23:13.409806  543705 memory.go:191] Add success.
I0322 07:23:13.409813  543705 cpu.go:282] Add success.
W0322 07:23:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:23:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:23:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:23:13.420146  543705 net.go:648] Add success.
I0322 07:23:13.422863  543705 net.go:770] primary dev: ETH0
I0322 07:23:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:23:13.422886  543705 net.go:698] Add success.
I0322 07:23:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:23:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:23:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 07:23:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:23:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 07:23:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:23:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:23:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:23:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:23:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:23:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:23:23.409779  543705 memory.go:184] no items to output this cycle
I0322 07:23:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 07:23:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:23:33.409909  543705 memory.go:184] no items to output this cycle
I0322 07:23:33.409920  543705 cpu.go:275] no items to output this cycle
I0322 07:23:33.647439  543705 disk_info.go:125] begin check local disk info of client
I0322 07:23:33.650023  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:23:33.650029  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa0c0 0xc0001aa100]
E0322 07:23:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:23:43.410749  543705 memory.go:191] Add success.
I0322 07:23:43.409802  543705 cpu.go:282] Add success.
I0322 07:23:43.420448  543705 net.go:648] Add success.
I0322 07:23:43.423015  543705 net.go:770] primary dev: ETH0
I0322 07:23:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:23:43.423045  543705 net.go:698] Add success.
I0322 07:23:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:23:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:23:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:23:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:23:53.409778  543705 memory.go:184] no items to output this cycle
I0322 07:23:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 07:24:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:24:03.409781  543705 memory.go:184] no items to output this cycle
I0322 07:24:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 07:24:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:24:13.409819  543705 memory.go:191] Add success.
I0322 07:24:13.409831  543705 cpu.go:282] Add success.
W0322 07:24:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:24:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:24:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:24:13.420397  543705 net.go:648] Add success.
I0322 07:24:13.423428  543705 net.go:770] primary dev: ETH0
I0322 07:24:13.423440  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:24:13.423452  543705 net.go:698] Add success.
I0322 07:24:13.468888  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"87252883-2ed7-4d5d-94f7-d2248f4be469","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:24:13.468922  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:24:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:24:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:24:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 07:24:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:24:14.456574  543705 disk_worker.go:494] system disk:vda1
I0322 07:24:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:24:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:24:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:24:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:24:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:24:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:24:23.409796  543705 memory.go:184] no items to output this cycle
I0322 07:24:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 07:24:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:24:33.409788  543705 memory.go:184] no items to output this cycle
I0322 07:24:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 07:24:33.650724  543705 disk_info.go:125] begin check local disk info of client
I0322 07:24:33.653210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:24:33.653216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ef440 0xc0003ef480]
I0322 07:24:39.551506  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:24:39.551513  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:24:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:24:43.410606  543705 memory.go:191] Add success.
I0322 07:24:43.409817  543705 cpu.go:282] Add success.
I0322 07:24:43.420288  543705 net.go:648] Add success.
I0322 07:24:43.422944  543705 net.go:770] primary dev: ETH0
I0322 07:24:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:24:43.422970  543705 net.go:698] Add success.
I0322 07:24:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:24:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:24:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:24:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:24:53.410387  543705 memory.go:184] no items to output this cycle
I0322 07:24:53.410389  543705 cpu.go:275] no items to output this cycle
E0322 07:25:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:25:03.409787  543705 memory.go:184] no items to output this cycle
I0322 07:25:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 07:25:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:25:13.409778  543705 memory.go:191] Add success.
I0322 07:25:13.409800  543705 cpu.go:282] Add success.
W0322 07:25:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:25:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:25:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:25:13.420227  543705 net.go:648] Add success.
I0322 07:25:13.422847  543705 net.go:770] primary dev: ETH0
I0322 07:25:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:25:13.422871  543705 net.go:698] Add success.
I0322 07:25:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:25:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:25:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 07:25:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:25:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 07:25:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:25:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:25:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:25:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:25:23.409782  543705 memory.go:184] no items to output this cycle
I0322 07:25:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 07:25:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:25:33.409799  543705 memory.go:184] no items to output this cycle
I0322 07:25:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 07:25:33.653668  543705 disk_info.go:125] begin check local disk info of client
I0322 07:25:33.656148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:25:33.656153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8e40 0xc0004d8e80]
E0322 07:25:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:25:43.410831  543705 memory.go:191] Add success.
I0322 07:25:43.409801  543705 cpu.go:282] Add success.
I0322 07:25:43.420503  543705 net.go:648] Add success.
I0322 07:25:43.423639  543705 net.go:770] primary dev: ETH0
I0322 07:25:43.423652  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:25:43.423663  543705 net.go:698] Add success.
I0322 07:25:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:25:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:25:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:25:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:25:53.409780  543705 memory.go:184] no items to output this cycle
I0322 07:25:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 07:26:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:26:03.409804  543705 cpu.go:275] no items to output this cycle
I0322 07:26:03.409811  543705 memory.go:184] no items to output this cycle
E0322 07:26:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:26:13.409785  543705 memory.go:191] Add success.
I0322 07:26:13.409803  543705 cpu.go:282] Add success.
W0322 07:26:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:26:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:26:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:26:13.420500  543705 net.go:648] Add success.
I0322 07:26:13.423090  543705 net.go:770] primary dev: ETH0
I0322 07:26:13.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:26:13.423115  543705 net.go:698] Add success.
I0322 07:26:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:26:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:26:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 07:26:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:26:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 07:26:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:26:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:26:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:26:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:26:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:26:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:26:23.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:26:23.409891  543705 memory.go:184] no items to output this cycle
I0322 07:26:23.409971  543705 cpu.go:275] no items to output this cycle
E0322 07:26:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:26:33.409784  543705 memory.go:184] no items to output this cycle
I0322 07:26:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 07:26:33.656385  543705 disk_info.go:125] begin check local disk info of client
I0322 07:26:33.658904  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:26:33.658909  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025fc80 0xc00025fcc0]
E0322 07:26:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:26:43.410503  543705 memory.go:191] Add success.
I0322 07:26:43.409831  543705 cpu.go:282] Add success.
I0322 07:26:43.420239  543705 net.go:648] Add success.
I0322 07:26:43.422811  543705 net.go:770] primary dev: ETH0
I0322 07:26:43.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:26:43.422835  543705 net.go:698] Add success.
I0322 07:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:26:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:26:53.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:26:53.409828  543705 memory.go:184] no items to output this cycle
I0322 07:26:53.409839  543705 cpu.go:275] no items to output this cycle
E0322 07:27:03.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:27:03.409823  543705 memory.go:184] no items to output this cycle
I0322 07:27:03.409834  543705 cpu.go:275] no items to output this cycle
E0322 07:27:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:27:13.409818  543705 memory.go:191] Add success.
I0322 07:27:13.409825  543705 cpu.go:282] Add success.
W0322 07:27:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:27:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:27:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:27:13.420127  543705 net.go:648] Add success.
I0322 07:27:13.423041  543705 net.go:770] primary dev: ETH0
I0322 07:27:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:27:13.423067  543705 net.go:698] Add success.
I0322 07:27:13.429444  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 07:27:13.453617  543705 event_worker.go:152] Polling the log file for events...
I0322 07:27:13.469737  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f473c645-7ee2-4b74-af52-4c960425226f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:27:13.469770  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 07:27:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:27:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 07:27:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:27:14.456109  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:27:14.456119  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:27:14.456124  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:27:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 07:27:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:27:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:27:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:27:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:27:16.457927  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:27:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:27:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:27:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:27:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:27:23.409805  543705 memory.go:184] no items to output this cycle
I0322 07:27:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 07:27:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:27:33.409776  543705 memory.go:184] no items to output this cycle
I0322 07:27:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 07:27:33.658990  543705 disk_info.go:125] begin check local disk info of client
I0322 07:27:33.661500  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:27:33.661506  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025f440 0xc00025f480]
I0322 07:27:39.552511  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:27:39.552517  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:27:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:27:43.410753  543705 memory.go:191] Add success.
I0322 07:27:43.409820  543705 cpu.go:282] Add success.
I0322 07:27:43.420432  543705 net.go:648] Add success.
I0322 07:27:43.423313  543705 net.go:770] primary dev: ETH0
I0322 07:27:43.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:27:43.423339  543705 net.go:698] Add success.
I0322 07:27:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:27:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:27:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:27:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:27:53.409797  543705 memory.go:184] no items to output this cycle
I0322 07:27:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 07:28:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:28:03.409797  543705 memory.go:184] no items to output this cycle
I0322 07:28:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 07:28:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:28:13.409812  543705 memory.go:191] Add success.
I0322 07:28:13.409824  543705 cpu.go:282] Add success.
W0322 07:28:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:28:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:28:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:28:13.420062  543705 net.go:648] Add success.
I0322 07:28:13.422875  543705 net.go:770] primary dev: ETH0
I0322 07:28:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:28:13.422899  543705 net.go:698] Add success.
I0322 07:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:28:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:28:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 07:28:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:28:14.456534  543705 disk_worker.go:494] system disk:vda1
I0322 07:28:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:28:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:28:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:28:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:28:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:28:23.409786  543705 memory.go:184] no items to output this cycle
I0322 07:28:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 07:28:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:28:33.409761  543705 memory.go:184] no items to output this cycle
I0322 07:28:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 07:28:33.661669  543705 disk_info.go:125] begin check local disk info of client
I0322 07:28:33.664134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:28:33.664139  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5bc0 0xc0000c5c00]
E0322 07:28:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:28:43.410630  543705 memory.go:191] Add success.
I0322 07:28:43.409812  543705 cpu.go:282] Add success.
I0322 07:28:43.420325  543705 net.go:648] Add success.
I0322 07:28:43.423166  543705 net.go:770] primary dev: ETH0
I0322 07:28:43.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:28:43.423194  543705 net.go:698] Add success.
I0322 07:28:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:28:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:28:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:28:53.410404  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:28:53.410425  543705 memory.go:184] no items to output this cycle
I0322 07:28:53.410438  543705 cpu.go:275] no items to output this cycle
E0322 07:29:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:29:03.409791  543705 memory.go:184] no items to output this cycle
I0322 07:29:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 07:29:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:29:13.409807  543705 memory.go:191] Add success.
I0322 07:29:13.409815  543705 cpu.go:282] Add success.
W0322 07:29:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:29:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:29:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:29:13.420179  543705 net.go:648] Add success.
I0322 07:29:13.422963  543705 net.go:770] primary dev: ETH0
I0322 07:29:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:29:13.422991  543705 net.go:698] Add success.
I0322 07:29:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:29:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:29:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 07:29:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:29:14.456551  543705 disk_worker.go:494] system disk:vda1
I0322 07:29:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:29:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:29:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:29:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:29:16.458139  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:29:16.472092  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:29:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:29:23.409792  543705 memory.go:184] no items to output this cycle
I0322 07:29:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:29:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:29:33.409779  543705 memory.go:184] no items to output this cycle
I0322 07:29:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 07:29:33.664443  543705 disk_info.go:125] begin check local disk info of client
I0322 07:29:33.666940  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:29:33.666945  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5400 0xc0000c5440]
E0322 07:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:29:43.410778  543705 memory.go:191] Add success.
I0322 07:29:43.409780  543705 cpu.go:282] Add success.
I0322 07:29:43.420485  543705 net.go:648] Add success.
I0322 07:29:43.423362  543705 net.go:770] primary dev: ETH0
I0322 07:29:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:29:43.423387  543705 net.go:698] Add success.
I0322 07:29:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:29:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:29:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:29:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:29:53.409782  543705 memory.go:184] no items to output this cycle
I0322 07:29:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 07:30:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:30:03.409787  543705 memory.go:184] no items to output this cycle
I0322 07:30:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 07:30:13.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:30:13.409910  543705 memory.go:191] Add success.
I0322 07:30:13.409908  543705 cpu.go:282] Add success.
W0322 07:30:13.409956  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:30:13.409992  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:30:13.410002  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:30:13.420469  543705 net.go:648] Add success.
I0322 07:30:13.422981  543705 net.go:770] primary dev: ETH0
I0322 07:30:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:30:13.423006  543705 net.go:698] Add success.
I0322 07:30:13.468668  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aa833ed2-d778-47ea-ae97-5b67c7838414","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:30:13.468700  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:30:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:30:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:30:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 07:30:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:30:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 07:30:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:30:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:30:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:30:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:30:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:30:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:30:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:30:23.409773  543705 memory.go:184] no items to output this cycle
I0322 07:30:23.409775  543705 cpu.go:275] no items to output this cycle
E0322 07:30:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:30:33.409788  543705 memory.go:184] no items to output this cycle
I0322 07:30:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 07:30:33.667484  543705 disk_info.go:125] begin check local disk info of client
I0322 07:30:33.670211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:30:33.670217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460480 0xc0004604c0]
I0322 07:30:39.553508  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:30:39.553515  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:30:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:30:43.410717  543705 memory.go:191] Add success.
I0322 07:30:43.409811  543705 cpu.go:282] Add success.
I0322 07:30:43.420437  543705 net.go:648] Add success.
I0322 07:30:43.423021  543705 net.go:770] primary dev: ETH0
I0322 07:30:43.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:30:43.423046  543705 net.go:698] Add success.
I0322 07:30:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:30:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:30:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:30:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:30:53.409781  543705 memory.go:184] no items to output this cycle
I0322 07:30:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 07:31:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:31:03.409776  543705 memory.go:184] no items to output this cycle
I0322 07:31:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 07:31:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:31:13.409809  543705 memory.go:191] Add success.
I0322 07:31:13.409823  543705 cpu.go:282] Add success.
W0322 07:31:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:31:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:31:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:31:13.420111  543705 net.go:648] Add success.
I0322 07:31:13.422864  543705 net.go:770] primary dev: ETH0
I0322 07:31:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:31:13.422890  543705 net.go:698] Add success.
I0322 07:31:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:31:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:31:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 07:31:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:31:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 07:31:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:31:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:31:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:31:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:31:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:31:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:31:23.409767  543705 memory.go:184] no items to output this cycle
I0322 07:31:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 07:31:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:31:33.409900  543705 memory.go:184] no items to output this cycle
I0322 07:31:33.409977  543705 cpu.go:275] no items to output this cycle
I0322 07:31:33.670291  543705 disk_info.go:125] begin check local disk info of client
I0322 07:31:33.672801  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:31:33.672807  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
E0322 07:31:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:31:43.410820  543705 memory.go:191] Add success.
I0322 07:31:43.409789  543705 cpu.go:282] Add success.
I0322 07:31:43.420511  543705 net.go:648] Add success.
I0322 07:31:43.423666  543705 net.go:770] primary dev: ETH0
I0322 07:31:43.423678  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:31:43.423690  543705 net.go:698] Add success.
I0322 07:31:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:31:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:31:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:31:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:31:53.409807  543705 memory.go:184] no items to output this cycle
I0322 07:31:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 07:32:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:32:03.409779  543705 memory.go:184] no items to output this cycle
I0322 07:32:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 07:32:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:32:13.409784  543705 memory.go:191] Add success.
I0322 07:32:13.409808  543705 cpu.go:282] Add success.
W0322 07:32:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:32:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:32:13.420228  543705 net.go:648] Add success.
I0322 07:32:13.423042  543705 net.go:770] primary dev: ETH0
I0322 07:32:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:32:13.423068  543705 net.go:698] Add success.
W0322 07:32:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:32:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 07:32:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:32:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:32:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:32:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:32:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 07:32:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:32:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:32:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:32:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:32:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:32:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:32:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:32:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:32:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:32:23.409793  543705 memory.go:184] no items to output this cycle
I0322 07:32:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:32:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:32:33.409766  543705 memory.go:184] no items to output this cycle
I0322 07:32:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 07:32:33.672878  543705 disk_info.go:125] begin check local disk info of client
I0322 07:32:33.675400  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:32:33.675405  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1380 0xc0004b13c0]
E0322 07:32:43.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:32:43.410784  543705 memory.go:191] Add success.
I0322 07:32:43.409951  543705 cpu.go:282] Add success.
I0322 07:32:43.419704  543705 net.go:648] Add success.
I0322 07:32:43.422442  543705 net.go:770] primary dev: ETH0
I0322 07:32:43.422455  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:32:43.422466  543705 net.go:698] Add success.
I0322 07:32:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:32:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:32:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:32:53.410733  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:32:53.410751  543705 memory.go:184] no items to output this cycle
I0322 07:32:53.410790  543705 cpu.go:275] no items to output this cycle
E0322 07:33:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:33:03.409782  543705 memory.go:184] no items to output this cycle
I0322 07:33:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 07:33:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:33:13.409811  543705 memory.go:191] Add success.
I0322 07:33:13.409817  543705 cpu.go:282] Add success.
W0322 07:33:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:33:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:33:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:33:13.420212  543705 net.go:648] Add success.
I0322 07:33:13.422850  543705 net.go:770] primary dev: ETH0
I0322 07:33:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:33:13.422879  543705 net.go:698] Add success.
I0322 07:33:13.471225  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b866d2a3-0b30-44af-9262-0ebce5cc4939","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:33:13.471257  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:33:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:33:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:33:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 07:33:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:33:14.456619  543705 disk_worker.go:494] system disk:vda1
I0322 07:33:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:33:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:33:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:33:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:33:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:33:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 07:33:23.409797  543705 memory.go:184] no items to output this cycle
E0322 07:33:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:33:33.409763  543705 memory.go:184] no items to output this cycle
I0322 07:33:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 07:33:33.676526  543705 disk_info.go:125] begin check local disk info of client
I0322 07:33:33.678991  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:33:33.678996  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0100 0xc0004a0140]
I0322 07:33:39.554515  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:33:39.554521  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:33:43.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:33:43.410709  543705 memory.go:191] Add success.
I0322 07:33:43.409973  543705 cpu.go:282] Add success.
I0322 07:33:43.419750  543705 net.go:648] Add success.
I0322 07:33:43.422545  543705 net.go:770] primary dev: ETH0
I0322 07:33:43.422557  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:33:43.422569  543705 net.go:698] Add success.
I0322 07:33:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:33:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:33:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:33:53.410212  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:33:53.410233  543705 memory.go:184] no items to output this cycle
I0322 07:33:53.410247  543705 cpu.go:275] no items to output this cycle
E0322 07:34:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:34:03.409809  543705 memory.go:184] no items to output this cycle
I0322 07:34:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 07:34:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:34:13.409781  543705 memory.go:191] Add success.
W0322 07:34:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:34:13.409819  543705 cpu.go:282] Add success.
I0322 07:34:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:34:13.420457  543705 net.go:648] Add success.
I0322 07:34:13.423522  543705 net.go:770] primary dev: ETH0
I0322 07:34:13.423535  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:34:13.423547  543705 net.go:698] Add success.
I0322 07:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:34:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:34:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 07:34:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:34:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 07:34:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:34:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:34:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:34:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:34:23.409776  543705 memory.go:184] no items to output this cycle
I0322 07:34:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 07:34:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:34:33.409772  543705 memory.go:184] no items to output this cycle
I0322 07:34:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 07:34:33.679075  543705 disk_info.go:125] begin check local disk info of client
I0322 07:34:33.681541  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:34:33.681546  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a5980 0xc0004a59c0]
E0322 07:34:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:34:43.410870  543705 memory.go:191] Add success.
I0322 07:34:43.409812  543705 cpu.go:282] Add success.
I0322 07:34:43.420764  543705 net.go:648] Add success.
I0322 07:34:43.423719  543705 net.go:770] primary dev: ETH0
I0322 07:34:43.423732  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:34:43.423743  543705 net.go:698] Add success.
I0322 07:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:34:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:34:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:34:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:34:53.409815  543705 memory.go:184] no items to output this cycle
I0322 07:34:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 07:35:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:35:03.409782  543705 memory.go:184] no items to output this cycle
I0322 07:35:03.409804  543705 cpu.go:275] no items to output this cycle
W0322 07:35:13.409705  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:35:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:35:13.409732  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:35:13.409821  543705 cpu.go:282] Add success.
E0322 07:35:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:35:13.409844  543705 memory.go:191] Add success.
I0322 07:35:13.420003  543705 net.go:648] Add success.
I0322 07:35:13.422842  543705 net.go:770] primary dev: ETH0
I0322 07:35:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:35:13.422891  543705 net.go:698] Add success.
I0322 07:35:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:35:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:35:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 07:35:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:35:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 07:35:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:35:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:35:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:35:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:35:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:35:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:35:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:35:23.409781  543705 memory.go:184] no items to output this cycle
I0322 07:35:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 07:35:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:35:33.409781  543705 memory.go:184] no items to output this cycle
I0322 07:35:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 07:35:33.681670  543705 disk_info.go:125] begin check local disk info of client
I0322 07:35:33.684132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:35:33.684138  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5f00 0xc0003d5f40]
E0322 07:35:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:35:43.410659  543705 memory.go:191] Add success.
I0322 07:35:43.409816  543705 cpu.go:282] Add success.
I0322 07:35:43.419772  543705 net.go:648] Add success.
I0322 07:35:43.422501  543705 net.go:770] primary dev: ETH0
I0322 07:35:43.422514  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:35:43.422525  543705 net.go:698] Add success.
I0322 07:35:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:35:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:35:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:35:53.410586  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:35:53.410606  543705 memory.go:184] no items to output this cycle
I0322 07:35:53.410620  543705 cpu.go:275] no items to output this cycle
E0322 07:36:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:36:03.409778  543705 memory.go:184] no items to output this cycle
I0322 07:36:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:36:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:36:13.409810  543705 memory.go:191] Add success.
I0322 07:36:13.409811  543705 cpu.go:282] Add success.
W0322 07:36:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:36:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:36:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:36:13.420612  543705 net.go:648] Add success.
I0322 07:36:13.423478  543705 net.go:770] primary dev: ETH0
I0322 07:36:13.423492  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:36:13.423504  543705 net.go:698] Add success.
I0322 07:36:13.492049  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e5d6bc0-f821-4a21-969f-37cafc7413ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:36:13.492083  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:36:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:36:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:36:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 07:36:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:36:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 07:36:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:36:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:36:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:36:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:36:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:36:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:36:23.409792  543705 memory.go:184] no items to output this cycle
I0322 07:36:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 07:36:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:36:33.409794  543705 memory.go:184] no items to output this cycle
I0322 07:36:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 07:36:33.684219  543705 disk_info.go:125] begin check local disk info of client
I0322 07:36:33.686720  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:36:33.686726  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c00c0 0xc0003c0100]
I0322 07:36:39.555536  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:36:39.555543  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:36:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:36:43.410800  543705 memory.go:191] Add success.
I0322 07:36:43.409826  543705 cpu.go:282] Add success.
I0322 07:36:43.420503  543705 net.go:648] Add success.
I0322 07:36:43.423605  543705 net.go:770] primary dev: ETH0
I0322 07:36:43.423618  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:36:43.423630  543705 net.go:698] Add success.
I0322 07:36:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:36:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:36:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:36:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:36:53.409780  543705 memory.go:184] no items to output this cycle
I0322 07:36:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 07:37:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:37:03.409794  543705 memory.go:184] no items to output this cycle
I0322 07:37:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 07:37:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:37:13.409823  543705 memory.go:191] Add success.
I0322 07:37:13.409826  543705 cpu.go:282] Add success.
W0322 07:37:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:37:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:37:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:37:13.420313  543705 net.go:648] Add success.
I0322 07:37:13.423535  543705 net.go:770] primary dev: ETH0
I0322 07:37:13.423551  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:37:13.423565  543705 net.go:698] Add success.
I0322 07:37:13.453182  543705 event_worker.go:152] Polling the log file for events...
W0322 07:37:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:37:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 07:37:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:37:14.456935  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:37:14.456944  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:37:14.456950  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:37:14.456999  543705 disk_worker.go:494] system disk:vda1
I0322 07:37:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:37:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:37:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:37:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:37:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:37:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:37:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:37:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:37:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:37:23.409786  543705 memory.go:184] no items to output this cycle
I0322 07:37:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 07:37:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:37:33.409793  543705 memory.go:184] no items to output this cycle
I0322 07:37:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 07:37:33.687584  543705 disk_info.go:125] begin check local disk info of client
I0322 07:37:33.690110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:37:33.690115  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab300 0xc0001ab400]
E0322 07:37:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:37:43.410797  543705 memory.go:191] Add success.
I0322 07:37:43.409828  543705 cpu.go:282] Add success.
I0322 07:37:43.420493  543705 net.go:648] Add success.
I0322 07:37:43.423441  543705 net.go:770] primary dev: ETH0
I0322 07:37:43.423455  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:37:43.423467  543705 net.go:698] Add success.
I0322 07:37:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:37:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:37:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:37:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:37:53.409816  543705 memory.go:184] no items to output this cycle
I0322 07:37:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 07:38:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:38:03.409793  543705 memory.go:184] no items to output this cycle
I0322 07:38:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 07:38:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:38:13.409786  543705 memory.go:191] Add success.
W0322 07:38:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 07:38:13.409815  543705 cpu.go:282] Add success.
W0322 07:38:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:38:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:38:13.420120  543705 net.go:648] Add success.
I0322 07:38:13.423044  543705 net.go:770] primary dev: ETH0
I0322 07:38:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:38:13.423071  543705 net.go:698] Add success.
I0322 07:38:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:38:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:38:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 07:38:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:38:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 07:38:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:38:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:38:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:38:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:38:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:38:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:38:23.409767  543705 memory.go:184] no items to output this cycle
I0322 07:38:23.409882  543705 cpu.go:275] no items to output this cycle
E0322 07:38:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:38:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 07:38:33.409788  543705 memory.go:184] no items to output this cycle
I0322 07:38:33.690195  543705 disk_info.go:125] begin check local disk info of client
I0322 07:38:33.692693  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:38:33.692698  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9000 0xc0004d9040]
E0322 07:38:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:38:43.410740  543705 memory.go:191] Add success.
I0322 07:38:43.409835  543705 cpu.go:282] Add success.
I0322 07:38:43.420273  543705 net.go:770] primary dev: ETH0
I0322 07:38:43.420286  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:38:43.420299  543705 net.go:698] Add success.
I0322 07:38:43.420531  543705 net.go:648] Add success.
I0322 07:38:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:38:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:38:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:38:53.410276  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:38:53.410297  543705 memory.go:184] no items to output this cycle
I0322 07:38:53.410309  543705 cpu.go:275] no items to output this cycle
E0322 07:39:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:39:03.409812  543705 memory.go:184] no items to output this cycle
I0322 07:39:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 07:39:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:39:13.409820  543705 memory.go:191] Add success.
I0322 07:39:13.409827  543705 cpu.go:282] Add success.
W0322 07:39:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:39:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:39:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:39:13.420141  543705 net.go:648] Add success.
I0322 07:39:13.422938  543705 net.go:770] primary dev: ETH0
I0322 07:39:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:39:13.422967  543705 net.go:698] Add success.
I0322 07:39:13.493561  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f450da27-9621-4a3f-bce3-3e59b6e281e5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:39:13.493596  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:39:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:39:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:39:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 07:39:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:39:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 07:39:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:39:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:39:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:39:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:39:23.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:39:23.409878  543705 cpu.go:275] no items to output this cycle
I0322 07:39:23.409901  543705 memory.go:184] no items to output this cycle
E0322 07:39:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:39:33.409788  543705 memory.go:184] no items to output this cycle
I0322 07:39:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 07:39:33.692781  543705 disk_info.go:125] begin check local disk info of client
I0322 07:39:33.695275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:39:33.695281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f40 0xc0000c4f80]
I0322 07:39:39.556522  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:39:39.556529  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:39:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:39:43.410740  543705 memory.go:191] Add success.
I0322 07:39:43.409822  543705 cpu.go:282] Add success.
I0322 07:39:43.420574  543705 net.go:648] Add success.
I0322 07:39:43.423438  543705 net.go:770] primary dev: ETH0
I0322 07:39:43.423451  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:39:43.423464  543705 net.go:698] Add success.
I0322 07:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:39:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:39:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:39:53.409786  543705 memory.go:184] no items to output this cycle
I0322 07:39:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 07:40:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:40:03.409810  543705 memory.go:184] no items to output this cycle
I0322 07:40:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 07:40:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:40:13.409780  543705 memory.go:191] Add success.
I0322 07:40:13.409800  543705 cpu.go:282] Add success.
W0322 07:40:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:40:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:40:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:40:13.420194  543705 net.go:648] Add success.
I0322 07:40:13.422793  543705 net.go:770] primary dev: ETH0
I0322 07:40:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:40:13.422823  543705 net.go:698] Add success.
I0322 07:40:14.454807  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:40:14.455030  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:40:14.455040  543705 disk_worker.go:708] disk space is not compliant
W0322 07:40:14.455043  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:40:14.456445  543705 disk_worker.go:494] system disk:vda1
I0322 07:40:14.456476  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:40:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:40:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:40:16.458098  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:40:16.458120  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:40:16.472092  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:40:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:40:23.409781  543705 memory.go:184] no items to output this cycle
I0322 07:40:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 07:40:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:40:33.409779  543705 memory.go:184] no items to output this cycle
I0322 07:40:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 07:40:33.695360  543705 disk_info.go:125] begin check local disk info of client
I0322 07:40:33.697833  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:40:33.697839  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8e40 0xc0004d8e80]
E0322 07:40:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:40:43.410669  543705 memory.go:191] Add success.
I0322 07:40:43.409807  543705 cpu.go:282] Add success.
I0322 07:40:43.420321  543705 net.go:648] Add success.
I0322 07:40:43.423163  543705 net.go:770] primary dev: ETH0
I0322 07:40:43.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:40:43.423188  543705 net.go:698] Add success.
I0322 07:40:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:40:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:40:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:40:53.409774  543705 memory.go:184] no items to output this cycle
I0322 07:40:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 07:41:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:41:03.409811  543705 memory.go:184] no items to output this cycle
I0322 07:41:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 07:41:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:41:13.409780  543705 memory.go:191] Add success.
I0322 07:41:13.409803  543705 cpu.go:282] Add success.
W0322 07:41:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:41:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:41:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:41:13.420191  543705 net.go:648] Add success.
I0322 07:41:13.422796  543705 net.go:770] primary dev: ETH0
I0322 07:41:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:41:13.422822  543705 net.go:698] Add success.
I0322 07:41:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:41:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:41:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 07:41:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:41:14.456628  543705 disk_worker.go:494] system disk:vda1
I0322 07:41:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:41:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:41:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:41:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:41:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:41:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:41:23.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:41:23.409879  543705 memory.go:184] no items to output this cycle
I0322 07:41:23.409915  543705 cpu.go:275] no items to output this cycle
E0322 07:41:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:41:33.409780  543705 memory.go:184] no items to output this cycle
I0322 07:41:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 07:41:33.697919  543705 disk_info.go:125] begin check local disk info of client
I0322 07:41:33.700425  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:41:33.700431  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9b80 0xc0004d9bc0]
E0322 07:41:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:41:43.410560  543705 memory.go:191] Add success.
I0322 07:41:43.409793  543705 cpu.go:282] Add success.
I0322 07:41:43.420346  543705 net.go:648] Add success.
I0322 07:41:43.422843  543705 net.go:770] primary dev: ETH0
I0322 07:41:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:41:43.422873  543705 net.go:698] Add success.
I0322 07:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:41:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:41:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:41:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:41:53.409783  543705 memory.go:184] no items to output this cycle
I0322 07:41:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 07:42:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:42:03.409797  543705 memory.go:184] no items to output this cycle
I0322 07:42:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 07:42:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:42:13.409787  543705 memory.go:191] Add success.
I0322 07:42:13.409786  543705 cpu.go:282] Add success.
W0322 07:42:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:42:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:42:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:42:13.420231  543705 net.go:648] Add success.
I0322 07:42:13.423082  543705 net.go:770] primary dev: ETH0
I0322 07:42:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:42:13.423123  543705 net.go:698] Add success.
I0322 07:42:13.468936  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"35f93802-5e24-4871-be7c-e015bfe14dc9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:42:13.468969  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 07:42:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:42:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 07:42:14.455207  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:42:14.455953  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:42:14.455961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:42:14.455967  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:42:14.456765  543705 disk_worker.go:494] system disk:vda1
I0322 07:42:14.456796  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:42:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:42:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:42:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:42:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:42:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:42:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:42:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:42:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:42:23.409788  543705 memory.go:184] no items to output this cycle
I0322 07:42:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:42:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:42:33.409779  543705 memory.go:184] no items to output this cycle
I0322 07:42:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 07:42:33.701638  543705 disk_info.go:125] begin check local disk info of client
I0322 07:42:33.704194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:42:33.704199  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053f700 0xc00053f740]
I0322 07:42:39.557542  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:42:39.557549  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:42:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:42:43.410553  543705 memory.go:191] Add success.
I0322 07:42:43.409814  543705 cpu.go:282] Add success.
I0322 07:42:43.420282  543705 net.go:648] Add success.
I0322 07:42:43.423098  543705 net.go:770] primary dev: ETH0
I0322 07:42:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:42:43.423124  543705 net.go:698] Add success.
I0322 07:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:42:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:42:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:42:53.409800  543705 memory.go:184] no items to output this cycle
I0322 07:42:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 07:43:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:43:03.409780  543705 memory.go:184] no items to output this cycle
I0322 07:43:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 07:43:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:43:13.409815  543705 memory.go:191] Add success.
I0322 07:43:13.409827  543705 cpu.go:282] Add success.
W0322 07:43:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:43:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:43:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:43:13.420135  543705 net.go:648] Add success.
I0322 07:43:13.423425  543705 net.go:770] primary dev: ETH0
I0322 07:43:13.423437  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:43:13.423449  543705 net.go:698] Add success.
I0322 07:43:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:43:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:43:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 07:43:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:43:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 07:43:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:43:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:43:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:43:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:43:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:43:23.409779  543705 memory.go:184] no items to output this cycle
I0322 07:43:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 07:43:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:43:33.409770  543705 memory.go:184] no items to output this cycle
I0322 07:43:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 07:43:33.704277  543705 disk_info.go:125] begin check local disk info of client
I0322 07:43:33.706758  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:43:33.706763  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9b40 0xc0002b9b80]
E0322 07:43:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:43:43.410649  543705 memory.go:191] Add success.
I0322 07:43:43.409826  543705 cpu.go:282] Add success.
I0322 07:43:43.420349  543705 net.go:648] Add success.
I0322 07:43:43.422943  543705 net.go:770] primary dev: ETH0
I0322 07:43:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:43:43.422969  543705 net.go:698] Add success.
I0322 07:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:43:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:43:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:43:53.409783  543705 memory.go:184] no items to output this cycle
I0322 07:43:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 07:44:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:44:03.409791  543705 cpu.go:275] no items to output this cycle
I0322 07:44:03.409793  543705 memory.go:184] no items to output this cycle
E0322 07:44:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:44:13.409786  543705 memory.go:191] Add success.
I0322 07:44:13.409790  543705 cpu.go:282] Add success.
W0322 07:44:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:44:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:44:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:44:13.420167  543705 net.go:648] Add success.
I0322 07:44:13.423117  543705 net.go:770] primary dev: ETH0
I0322 07:44:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:44:13.423143  543705 net.go:698] Add success.
I0322 07:44:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:44:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:44:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 07:44:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:44:14.458982  543705 disk_worker.go:494] system disk:vda1
I0322 07:44:14.459020  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:44:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:44:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:44:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:44:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:44:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:44:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:44:23.409797  543705 memory.go:184] no items to output this cycle
I0322 07:44:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 07:44:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:44:33.409781  543705 memory.go:184] no items to output this cycle
I0322 07:44:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 07:44:33.706842  543705 disk_info.go:125] begin check local disk info of client
I0322 07:44:33.709331  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:44:33.709336  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369600 0xc000369640]
E0322 07:44:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:44:43.410725  543705 memory.go:191] Add success.
I0322 07:44:43.409799  543705 cpu.go:282] Add success.
I0322 07:44:43.420458  543705 net.go:648] Add success.
I0322 07:44:43.423267  543705 net.go:770] primary dev: ETH0
I0322 07:44:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:44:43.423296  543705 net.go:698] Add success.
I0322 07:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:44:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:44:53.409790  543705 memory.go:184] no items to output this cycle
I0322 07:44:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 07:45:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:45:03.409786  543705 memory.go:184] no items to output this cycle
I0322 07:45:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 07:45:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:45:13.409793  543705 memory.go:191] Add success.
I0322 07:45:13.409797  543705 cpu.go:282] Add success.
W0322 07:45:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:45:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:45:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:45:13.420056  543705 net.go:648] Add success.
I0322 07:45:13.422802  543705 net.go:770] primary dev: ETH0
I0322 07:45:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:45:13.422830  543705 net.go:698] Add success.
I0322 07:45:13.464022  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0a6b39b3-edb8-4fd8-afa6-28f424e8b006","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:45:13.464054  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:45:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:45:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:45:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 07:45:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:45:14.456512  543705 disk_worker.go:494] system disk:vda1
I0322 07:45:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:45:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:45:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:45:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:45:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:45:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:45:23.409796  543705 memory.go:184] no items to output this cycle
I0322 07:45:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 07:45:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:45:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 07:45:33.409797  543705 memory.go:184] no items to output this cycle
I0322 07:45:33.709670  543705 disk_info.go:125] begin check local disk info of client
I0322 07:45:33.712130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:45:33.712135  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8ec0 0xc0002b8f00]
I0322 07:45:39.558538  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:45:39.558546  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:45:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:45:43.410775  543705 memory.go:191] Add success.
I0322 07:45:43.409818  543705 cpu.go:282] Add success.
I0322 07:45:43.420484  543705 net.go:648] Add success.
I0322 07:45:43.423679  543705 net.go:770] primary dev: ETH0
I0322 07:45:43.423692  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:45:43.423704  543705 net.go:698] Add success.
I0322 07:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:45:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:45:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:45:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:45:53.409780  543705 memory.go:184] no items to output this cycle
I0322 07:45:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 07:46:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:46:03.409811  543705 memory.go:184] no items to output this cycle
I0322 07:46:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 07:46:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:46:13.409775  543705 memory.go:191] Add success.
I0322 07:46:13.409797  543705 cpu.go:282] Add success.
W0322 07:46:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:46:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:46:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:46:13.420130  543705 net.go:648] Add success.
I0322 07:46:13.422885  543705 net.go:770] primary dev: ETH0
I0322 07:46:13.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:46:13.422914  543705 net.go:698] Add success.
I0322 07:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:46:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:46:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 07:46:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:46:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 07:46:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:46:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:46:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:46:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:46:23.409796  543705 memory.go:184] no items to output this cycle
I0322 07:46:23.409832  543705 cpu.go:275] no items to output this cycle
E0322 07:46:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:46:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 07:46:33.409782  543705 memory.go:184] no items to output this cycle
I0322 07:46:33.712674  543705 disk_info.go:125] begin check local disk info of client
I0322 07:46:33.715184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:46:33.715189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac40 0xc00007ac80]
E0322 07:46:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:46:43.410789  543705 memory.go:191] Add success.
I0322 07:46:43.409797  543705 cpu.go:282] Add success.
I0322 07:46:43.420622  543705 net.go:648] Add success.
I0322 07:46:43.423650  543705 net.go:770] primary dev: ETH0
I0322 07:46:43.423663  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:46:43.423675  543705 net.go:698] Add success.
I0322 07:46:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:46:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:46:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:46:53.409796  543705 memory.go:184] no items to output this cycle
I0322 07:46:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 07:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:47:03.409785  543705 memory.go:184] no items to output this cycle
I0322 07:47:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 07:47:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:47:13.409776  543705 memory.go:191] Add success.
W0322 07:47:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:47:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:47:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:47:13.409820  543705 cpu.go:282] Add success.
I0322 07:47:13.420050  543705 net.go:648] Add success.
I0322 07:47:13.423257  543705 net.go:770] primary dev: ETH0
I0322 07:47:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:47:13.423282  543705 net.go:698] Add success.
I0322 07:47:13.452782  543705 event_worker.go:152] Polling the log file for events...
W0322 07:47:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:47:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 07:47:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:47:14.455945  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:47:14.455954  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:47:14.455959  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:47:14.456457  543705 disk_worker.go:494] system disk:vda1
I0322 07:47:14.456487  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:47:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:47:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:47:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:47:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:47:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:47:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:47:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:47:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:47:23.409794  543705 memory.go:184] no items to output this cycle
I0322 07:47:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 07:47:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:47:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 07:47:33.409790  543705 memory.go:184] no items to output this cycle
I0322 07:47:33.715281  543705 disk_info.go:125] begin check local disk info of client
I0322 07:47:33.717741  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:47:33.717748  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b2c0 0xc00007b300]
E0322 07:47:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:47:43.410565  543705 memory.go:191] Add success.
I0322 07:47:43.409814  543705 cpu.go:282] Add success.
I0322 07:47:43.420328  543705 net.go:648] Add success.
I0322 07:47:43.422830  543705 net.go:770] primary dev: ETH0
I0322 07:47:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:47:43.422857  543705 net.go:698] Add success.
I0322 07:47:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:47:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:47:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:47:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:47:53.409778  543705 memory.go:184] no items to output this cycle
I0322 07:47:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 07:48:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:48:03.409807  543705 memory.go:184] no items to output this cycle
I0322 07:48:03.409818  543705 cpu.go:275] no items to output this cycle
W0322 07:48:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:48:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:48:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:48:13.409807  543705 cpu.go:282] Add success.
E0322 07:48:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:48:13.409857  543705 memory.go:191] Add success.
I0322 07:48:13.420026  543705 net.go:648] Add success.
I0322 07:48:13.423140  543705 net.go:770] primary dev: ETH0
I0322 07:48:13.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:48:13.423165  543705 net.go:698] Add success.
I0322 07:48:13.563144  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"74f42404-013d-42d9-8dd0-9f9818aa9e42","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:48:13.563178  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:48:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:48:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 07:48:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:48:14.456616  543705 disk_worker.go:494] system disk:vda1
I0322 07:48:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:48:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:48:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:48:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:48:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 07:48:23.409792  543705 memory.go:184] no items to output this cycle
E0322 07:48:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:48:33.409788  543705 memory.go:184] no items to output this cycle
I0322 07:48:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 07:48:33.718724  543705 disk_info.go:125] begin check local disk info of client
I0322 07:48:33.721203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:48:33.721210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9c80 0xc0002b9cc0]
I0322 07:48:39.559534  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:48:39.559540  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:48:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:48:43.410761  543705 memory.go:191] Add success.
I0322 07:48:43.409796  543705 cpu.go:282] Add success.
I0322 07:48:43.420527  543705 net.go:648] Add success.
I0322 07:48:43.423335  543705 net.go:770] primary dev: ETH0
I0322 07:48:43.423349  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:48:43.423361  543705 net.go:698] Add success.
I0322 07:48:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:48:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:48:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:48:53.409803  543705 memory.go:184] no items to output this cycle
I0322 07:48:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 07:49:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:49:03.409810  543705 memory.go:184] no items to output this cycle
I0322 07:49:03.409824  543705 cpu.go:275] no items to output this cycle
W0322 07:49:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:49:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:49:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 07:49:13.409826  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:49:13.409827  543705 cpu.go:282] Add success.
I0322 07:49:13.409844  543705 memory.go:191] Add success.
I0322 07:49:13.420035  543705 net.go:648] Add success.
I0322 07:49:13.422618  543705 net.go:770] primary dev: ETH0
I0322 07:49:13.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:49:13.422646  543705 net.go:698] Add success.
I0322 07:49:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:49:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:49:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 07:49:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:49:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 07:49:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:49:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:49:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:49:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:49:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:49:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:49:23.409805  543705 memory.go:184] no items to output this cycle
I0322 07:49:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 07:49:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:49:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 07:49:33.409801  543705 memory.go:184] no items to output this cycle
I0322 07:49:33.721673  543705 disk_info.go:125] begin check local disk info of client
I0322 07:49:33.724132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:49:33.724137  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9e80 0xc0002b9ec0]
E0322 07:49:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:49:43.410698  543705 memory.go:191] Add success.
I0322 07:49:43.409792  543705 cpu.go:282] Add success.
I0322 07:49:43.420426  543705 net.go:648] Add success.
I0322 07:49:43.423343  543705 net.go:770] primary dev: ETH0
I0322 07:49:43.423356  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:49:43.423368  543705 net.go:698] Add success.
I0322 07:49:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:49:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:49:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:49:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:49:53.409774  543705 memory.go:184] no items to output this cycle
I0322 07:49:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 07:50:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:50:03.409787  543705 memory.go:184] no items to output this cycle
I0322 07:50:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 07:50:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:50:13.409794  543705 memory.go:191] Add success.
I0322 07:50:13.409793  543705 cpu.go:282] Add success.
W0322 07:50:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:50:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:50:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:50:13.420265  543705 net.go:648] Add success.
I0322 07:50:13.422876  543705 net.go:770] primary dev: ETH0
I0322 07:50:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:50:13.422906  543705 net.go:698] Add success.
I0322 07:50:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:50:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:50:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 07:50:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:50:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 07:50:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:50:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:50:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:50:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:50:23.409794  543705 memory.go:184] no items to output this cycle
I0322 07:50:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:50:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:50:33.409816  543705 memory.go:184] no items to output this cycle
I0322 07:50:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 07:50:33.724744  543705 disk_info.go:125] begin check local disk info of client
I0322 07:50:33.727275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:50:33.727280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab940 0xc0001ab980]
E0322 07:50:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:50:43.410632  543705 memory.go:191] Add success.
I0322 07:50:43.409799  543705 cpu.go:282] Add success.
I0322 07:50:43.420341  543705 net.go:648] Add success.
I0322 07:50:43.423022  543705 net.go:770] primary dev: ETH0
I0322 07:50:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:50:43.423047  543705 net.go:698] Add success.
I0322 07:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:50:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:50:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:50:53.409773  543705 memory.go:184] no items to output this cycle
I0322 07:50:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 07:51:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:51:03.409788  543705 memory.go:184] no items to output this cycle
I0322 07:51:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 07:51:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:51:13.409808  543705 memory.go:191] Add success.
I0322 07:51:13.409817  543705 cpu.go:282] Add success.
W0322 07:51:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:51:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:51:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:51:13.420053  543705 net.go:648] Add success.
I0322 07:51:13.423125  543705 net.go:770] primary dev: ETH0
I0322 07:51:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:51:13.423150  543705 net.go:698] Add success.
I0322 07:51:13.470484  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d3f95525-3e16-453e-94bf-68ed06b43156","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:51:13.470517  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:51:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:51:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:51:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 07:51:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:51:14.456688  543705 disk_worker.go:494] system disk:vda1
I0322 07:51:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:51:15.455619  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:51:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:51:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:51:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:51:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:51:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:51:23.409794  543705 memory.go:184] no items to output this cycle
I0322 07:51:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:51:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:51:33.409791  543705 memory.go:184] no items to output this cycle
I0322 07:51:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 07:51:33.728741  543705 disk_info.go:125] begin check local disk info of client
I0322 07:51:33.731202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:51:33.731207  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368800 0xc000368840]
I0322 07:51:39.560554  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:51:39.560561  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:51:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:51:43.410640  543705 memory.go:191] Add success.
I0322 07:51:43.409834  543705 cpu.go:282] Add success.
I0322 07:51:43.420372  543705 net.go:648] Add success.
I0322 07:51:43.423302  543705 net.go:770] primary dev: ETH0
I0322 07:51:43.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:51:43.423327  543705 net.go:698] Add success.
I0322 07:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:51:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:51:53.409789  543705 memory.go:184] no items to output this cycle
I0322 07:51:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 07:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:52:03.409784  543705 memory.go:184] no items to output this cycle
I0322 07:52:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 07:52:13.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:52:13.409770  543705 memory.go:191] Add success.
W0322 07:52:13.409795  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:52:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:52:13.409807  543705 cpu.go:282] Add success.
I0322 07:52:13.409808  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:52:13.420050  543705 net.go:648] Add success.
I0322 07:52:13.422763  543705 net.go:770] primary dev: ETH0
I0322 07:52:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:52:13.422795  543705 net.go:698] Add success.
W0322 07:52:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:52:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0322 07:52:14.455248  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:52:14.455912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:52:14.455922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:52:14.455928  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:52:14.456837  543705 disk_worker.go:494] system disk:vda1
I0322 07:52:14.456867  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:52:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:52:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:52:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:52:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:52:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:52:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:52:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:52:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:52:23.409773  543705 memory.go:184] no items to output this cycle
I0322 07:52:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 07:52:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:52:33.409805  543705 memory.go:184] no items to output this cycle
I0322 07:52:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 07:52:33.731786  543705 disk_info.go:125] begin check local disk info of client
I0322 07:52:33.734245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:52:33.734250  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052df00 0xc00052df40]
E0322 07:52:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:52:43.410796  543705 memory.go:191] Add success.
I0322 07:52:43.409809  543705 cpu.go:282] Add success.
I0322 07:52:43.420491  543705 net.go:648] Add success.
I0322 07:52:43.423414  543705 net.go:770] primary dev: ETH0
I0322 07:52:43.423429  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:52:43.423441  543705 net.go:698] Add success.
I0322 07:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:52:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:52:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:52:53.409811  543705 memory.go:184] no items to output this cycle
I0322 07:52:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 07:53:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:53:03.409796  543705 memory.go:184] no items to output this cycle
I0322 07:53:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 07:53:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:53:13.409799  543705 memory.go:191] Add success.
I0322 07:53:13.409799  543705 cpu.go:282] Add success.
W0322 07:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:53:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:53:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:53:13.420121  543705 net.go:648] Add success.
I0322 07:53:13.423129  543705 net.go:770] primary dev: ETH0
I0322 07:53:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:53:13.423153  543705 net.go:698] Add success.
I0322 07:53:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:53:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:53:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 07:53:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:53:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 07:53:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:53:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:53:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:53:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:53:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:53:23.409926  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:53:23.409926  543705 cpu.go:275] no items to output this cycle
I0322 07:53:23.409962  543705 memory.go:184] no items to output this cycle
E0322 07:53:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:53:33.409807  543705 memory.go:184] no items to output this cycle
I0322 07:53:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 07:53:33.735804  543705 disk_info.go:125] begin check local disk info of client
I0322 07:53:33.738279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:53:33.738285  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 07:53:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:53:43.410650  543705 memory.go:191] Add success.
I0322 07:53:43.409832  543705 cpu.go:282] Add success.
I0322 07:53:43.420373  543705 net.go:648] Add success.
I0322 07:53:43.423283  543705 net.go:770] primary dev: ETH0
I0322 07:53:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:53:43.423308  543705 net.go:698] Add success.
I0322 07:53:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:53:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:53:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:53:53.410385  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:53:53.410404  543705 memory.go:184] no items to output this cycle
I0322 07:53:53.410419  543705 cpu.go:275] no items to output this cycle
E0322 07:54:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:54:03.409812  543705 memory.go:184] no items to output this cycle
I0322 07:54:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 07:54:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:54:13.409820  543705 memory.go:191] Add success.
I0322 07:54:13.409825  543705 cpu.go:282] Add success.
W0322 07:54:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:54:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:54:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:54:13.420245  543705 net.go:648] Add success.
I0322 07:54:13.423104  543705 net.go:770] primary dev: ETH0
I0322 07:54:13.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:54:13.423133  543705 net.go:698] Add success.
I0322 07:54:13.463649  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ea665a3-0815-46d6-83b2-99db37ead783","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:54:13.463684  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 07:54:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:54:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:54:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 07:54:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:54:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 07:54:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:54:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:54:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:54:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:54:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:54:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:54:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:54:23.409778  543705 memory.go:184] no items to output this cycle
I0322 07:54:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 07:54:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:54:33.409779  543705 memory.go:184] no items to output this cycle
I0322 07:54:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 07:54:33.739836  543705 disk_info.go:125] begin check local disk info of client
I0322 07:54:33.742331  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:54:33.742337  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be40 0xc00007be80]
I0322 07:54:39.561553  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:54:39.561559  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:54:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:54:43.410775  543705 memory.go:191] Add success.
I0322 07:54:43.409801  543705 cpu.go:282] Add success.
I0322 07:54:43.420530  543705 net.go:648] Add success.
I0322 07:54:43.423476  543705 net.go:770] primary dev: ETH0
I0322 07:54:43.423491  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:54:43.423505  543705 net.go:698] Add success.
I0322 07:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:54:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:54:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:54:53.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:54:53.410280  543705 memory.go:184] no items to output this cycle
I0322 07:54:53.410295  543705 cpu.go:275] no items to output this cycle
E0322 07:55:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:55:03.409776  543705 memory.go:184] no items to output this cycle
I0322 07:55:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 07:55:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:55:13.409795  543705 memory.go:191] Add success.
I0322 07:55:13.409795  543705 cpu.go:282] Add success.
W0322 07:55:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:55:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:55:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:55:13.420231  543705 net.go:648] Add success.
I0322 07:55:13.422983  543705 net.go:770] primary dev: ETH0
I0322 07:55:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:55:13.423008  543705 net.go:698] Add success.
I0322 07:55:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:55:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:55:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 07:55:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:55:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 07:55:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:55:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:55:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:55:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:55:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:55:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:55:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:55:23.409793  543705 memory.go:184] no items to output this cycle
I0322 07:55:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 07:55:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:55:33.409781  543705 memory.go:184] no items to output this cycle
I0322 07:55:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 07:55:33.742417  543705 disk_info.go:125] begin check local disk info of client
I0322 07:55:33.744872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:55:33.744878  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005301c0 0xc000530200]
E0322 07:55:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:55:43.410729  543705 memory.go:191] Add success.
I0322 07:55:43.409814  543705 cpu.go:282] Add success.
I0322 07:55:43.420440  543705 net.go:648] Add success.
I0322 07:55:43.423140  543705 net.go:770] primary dev: ETH0
I0322 07:55:43.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:55:43.423166  543705 net.go:698] Add success.
I0322 07:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:55:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:55:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:55:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:55:53.409775  543705 memory.go:184] no items to output this cycle
I0322 07:55:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 07:56:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:56:03.409806  543705 memory.go:184] no items to output this cycle
I0322 07:56:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 07:56:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:56:13.409781  543705 memory.go:191] Add success.
I0322 07:56:13.409799  543705 cpu.go:282] Add success.
W0322 07:56:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:56:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:56:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:56:13.420137  543705 net.go:648] Add success.
I0322 07:56:13.423064  543705 net.go:770] primary dev: ETH0
I0322 07:56:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:56:13.423088  543705 net.go:698] Add success.
I0322 07:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:56:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:56:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 07:56:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:56:14.457618  543705 disk_worker.go:494] system disk:vda1
I0322 07:56:14.457664  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:56:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:56:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:56:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:56:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:56:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:56:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:56:23.409798  543705 memory.go:184] no items to output this cycle
I0322 07:56:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 07:56:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:56:33.409800  543705 memory.go:184] no items to output this cycle
I0322 07:56:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 07:56:33.745671  543705 disk_info.go:125] begin check local disk info of client
I0322 07:56:33.748162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:56:33.748167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5200 0xc0000c5240]
E0322 07:56:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:56:43.410642  543705 memory.go:191] Add success.
I0322 07:56:43.409813  543705 cpu.go:282] Add success.
I0322 07:56:43.420350  543705 net.go:648] Add success.
I0322 07:56:43.422851  543705 net.go:770] primary dev: ETH0
I0322 07:56:43.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:56:43.422877  543705 net.go:698] Add success.
I0322 07:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:56:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:56:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:56:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:56:53.409778  543705 memory.go:184] no items to output this cycle
I0322 07:56:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 07:57:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:57:03.409810  543705 memory.go:184] no items to output this cycle
I0322 07:57:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 07:57:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:57:13.409807  543705 memory.go:191] Add success.
I0322 07:57:13.409809  543705 cpu.go:282] Add success.
W0322 07:57:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:57:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:57:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:57:13.420198  543705 net.go:648] Add success.
I0322 07:57:13.429127  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 07:57:13.429210  543705 net.go:770] primary dev: ETH0
I0322 07:57:13.429224  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:57:13.429239  543705 net.go:698] Add success.
I0322 07:57:13.452774  543705 event_worker.go:152] Polling the log file for events...
I0322 07:57:13.463529  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e5e8ca5f-8818-4eb5-ad82-015b2c37f065","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 07:57:13.463562  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 07:57:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:57:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 07:57:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0322 07:57:14.456886  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 07:57:14.456894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 07:57:14.456899  543705 custom_config.go:64] query custom config with name: gpu
I0322 07:57:14.456976  543705 disk_worker.go:494] system disk:vda1
I0322 07:57:14.457003  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 07:57:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 07:57:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:57:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 07:57:16.457970  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 07:57:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:57:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:57:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:57:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:57:23.409777  543705 cpu.go:275] no items to output this cycle
I0322 07:57:23.409778  543705 memory.go:184] no items to output this cycle
E0322 07:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:57:33.409801  543705 memory.go:184] no items to output this cycle
I0322 07:57:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 07:57:33.748872  543705 disk_info.go:125] begin check local disk info of client
I0322 07:57:33.751391  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:57:33.751396  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
I0322 07:57:39.562552  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 07:57:39.562558  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 07:57:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:57:43.410595  543705 memory.go:191] Add success.
I0322 07:57:43.409786  543705 cpu.go:282] Add success.
I0322 07:57:43.420336  543705 net.go:648] Add success.
I0322 07:57:43.423339  543705 net.go:770] primary dev: ETH0
I0322 07:57:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:57:43.423364  543705 net.go:698] Add success.
I0322 07:57:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:57:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:57:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:57:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:57:53.409816  543705 memory.go:184] no items to output this cycle
I0322 07:57:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 07:58:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:58:03.409820  543705 memory.go:184] no items to output this cycle
I0322 07:58:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 07:58:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:58:13.409778  543705 memory.go:191] Add success.
W0322 07:58:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 07:58:13.409811  543705 cpu.go:282] Add success.
W0322 07:58:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:58:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:58:13.420352  543705 net.go:648] Add success.
I0322 07:58:13.423294  543705 net.go:770] primary dev: ETH0
I0322 07:58:13.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:58:13.423318  543705 net.go:698] Add success.
I0322 07:58:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:58:14.455085  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:58:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 07:58:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:58:14.456559  543705 disk_worker.go:494] system disk:vda1
I0322 07:58:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:58:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:58:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:58:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:58:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:58:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:58:23.409781  543705 memory.go:184] no items to output this cycle
I0322 07:58:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 07:58:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:58:33.409777  543705 memory.go:184] no items to output this cycle
I0322 07:58:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 07:58:33.752908  543705 disk_info.go:125] begin check local disk info of client
I0322 07:58:33.755538  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:58:33.755543  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053f700 0xc00053f740]
E0322 07:58:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:58:43.410637  543705 memory.go:191] Add success.
I0322 07:58:43.409786  543705 cpu.go:282] Add success.
I0322 07:58:43.420308  543705 net.go:648] Add success.
I0322 07:58:43.423334  543705 net.go:770] primary dev: ETH0
I0322 07:58:43.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:58:43.423361  543705 net.go:698] Add success.
I0322 07:58:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:58:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:58:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:58:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:58:53.409779  543705 memory.go:184] no items to output this cycle
I0322 07:58:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 07:59:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:59:03.409785  543705 memory.go:184] no items to output this cycle
I0322 07:59:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 07:59:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:59:13.409820  543705 memory.go:191] Add success.
I0322 07:59:13.409821  543705 cpu.go:282] Add success.
W0322 07:59:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 07:59:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 07:59:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 07:59:13.420158  543705 net.go:648] Add success.
I0322 07:59:13.423139  543705 net.go:770] primary dev: ETH0
I0322 07:59:13.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:59:13.423166  543705 net.go:698] Add success.
I0322 07:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 07:59:14.455470  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 07:59:14.455485  543705 disk_worker.go:708] disk space is not compliant
W0322 07:59:14.455489  543705 disk_worker.go:728] disk inode is not compliant
I0322 07:59:14.457084  543705 disk_worker.go:494] system disk:vda1
I0322 07:59:14.457112  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 07:59:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 07:59:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:59:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 07:59:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 07:59:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:59:23.409778  543705 memory.go:184] no items to output this cycle
I0322 07:59:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 07:59:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:59:33.409771  543705 memory.go:184] no items to output this cycle
I0322 07:59:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 07:59:33.756915  543705 disk_info.go:125] begin check local disk info of client
I0322 07:59:33.759408  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 07:59:33.759414  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053ef80 0xc00053efc0]
E0322 07:59:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:59:43.410657  543705 memory.go:191] Add success.
I0322 07:59:43.409794  543705 cpu.go:282] Add success.
I0322 07:59:43.420362  543705 net.go:648] Add success.
I0322 07:59:43.423319  543705 net.go:770] primary dev: ETH0
I0322 07:59:43.423331  543705 net.go:802] Send network stats successfully!,count is 6
I0322 07:59:43.423343  543705 net.go:698] Add success.
I0322 07:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 07:59:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 07:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 07:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 07:59:53.409781  543705 memory.go:184] no items to output this cycle
I0322 07:59:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:00:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:00:03.409782  543705 memory.go:184] no items to output this cycle
I0322 08:00:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:00:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:00:13.409776  543705 memory.go:191] Add success.
W0322 08:00:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 08:00:13.409811  543705 cpu.go:282] Add success.
W0322 08:00:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:00:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:00:13.420081  543705 net.go:648] Add success.
I0322 08:00:13.423031  543705 net.go:770] primary dev: ETH0
I0322 08:00:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:00:13.423055  543705 net.go:698] Add success.
I0322 08:00:13.468355  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b8a7bd50-451f-4d63-b972-b1c1c1b5838b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:00:13.468387  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:00:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:00:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:00:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 08:00:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:00:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 08:00:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:00:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:00:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:00:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:00:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:00:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:00:23.409772  543705 memory.go:184] no items to output this cycle
I0322 08:00:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 08:00:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:00:33.409765  543705 memory.go:184] no items to output this cycle
I0322 08:00:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 08:00:33.760944  543705 disk_info.go:125] begin check local disk info of client
I0322 08:00:33.763490  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:00:33.763496  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369400 0xc000369440]
I0322 08:00:39.563562  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:00:39.563567  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:00:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:00:43.410672  543705 memory.go:191] Add success.
I0322 08:00:43.409826  543705 cpu.go:282] Add success.
I0322 08:00:43.420372  543705 net.go:648] Add success.
I0322 08:00:43.422953  543705 net.go:770] primary dev: ETH0
I0322 08:00:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:00:43.422979  543705 net.go:698] Add success.
I0322 08:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:00:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:00:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:00:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:00:53.409774  543705 memory.go:184] no items to output this cycle
I0322 08:00:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 08:01:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:01:03.409793  543705 memory.go:184] no items to output this cycle
I0322 08:01:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 08:01:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:01:13.409780  543705 memory.go:191] Add success.
W0322 08:01:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 08:01:13.409805  543705 cpu.go:282] Add success.
W0322 08:01:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:01:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:01:13.420230  543705 net.go:648] Add success.
I0322 08:01:13.423452  543705 net.go:770] primary dev: ETH0
I0322 08:01:13.423465  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:01:13.423477  543705 net.go:698] Add success.
I0322 08:01:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:01:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:01:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 08:01:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:01:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 08:01:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:01:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:01:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:01:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:01:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:01:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:01:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:01:23.409784  543705 memory.go:184] no items to output this cycle
I0322 08:01:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 08:01:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:01:33.409765  543705 memory.go:184] no items to output this cycle
I0322 08:01:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 08:01:33.764942  543705 disk_info.go:125] begin check local disk info of client
I0322 08:01:33.767443  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:01:33.767448  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368080 0xc0003680c0]
E0322 08:01:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:01:43.410709  543705 memory.go:191] Add success.
I0322 08:01:43.409785  543705 cpu.go:282] Add success.
I0322 08:01:43.420385  543705 net.go:648] Add success.
I0322 08:01:43.423135  543705 net.go:770] primary dev: ETH0
I0322 08:01:43.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:01:43.423161  543705 net.go:698] Add success.
I0322 08:01:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:01:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:01:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:01:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:01:53.409799  543705 memory.go:184] no items to output this cycle
I0322 08:01:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 08:02:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:02:03.409790  543705 memory.go:184] no items to output this cycle
I0322 08:02:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 08:02:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:02:13.409787  543705 cpu.go:282] Add success.
I0322 08:02:13.409794  543705 memory.go:191] Add success.
W0322 08:02:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:02:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:02:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:02:13.420053  543705 net.go:648] Add success.
I0322 08:02:13.422647  543705 net.go:770] primary dev: ETH0
I0322 08:02:13.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:02:13.422679  543705 net.go:698] Add success.
W0322 08:02:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:02:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0322 08:02:14.455150  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:02:14.456103  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:02:14.456112  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:02:14.456118  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:02:14.456443  543705 disk_worker.go:494] system disk:vda1
I0322 08:02:14.456472  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:02:15.456880  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:02:15.456888  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:02:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:02:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:02:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:02:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:02:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:02:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:02:23.409814  543705 memory.go:184] no items to output this cycle
I0322 08:02:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 08:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:02:33.409784  543705 memory.go:184] no items to output this cycle
I0322 08:02:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 08:02:33.767533  543705 disk_info.go:125] begin check local disk info of client
I0322 08:02:33.770093  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:02:33.770099  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb00 0xc00007bb40]
E0322 08:02:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:02:43.410506  543705 memory.go:191] Add success.
I0322 08:02:43.409828  543705 cpu.go:282] Add success.
I0322 08:02:43.420203  543705 net.go:648] Add success.
I0322 08:02:43.422781  543705 net.go:770] primary dev: ETH0
I0322 08:02:43.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:02:43.422806  543705 net.go:698] Add success.
I0322 08:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:02:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:02:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:02:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:02:53.409798  543705 memory.go:184] no items to output this cycle
I0322 08:02:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 08:03:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:03:03.409791  543705 memory.go:184] no items to output this cycle
I0322 08:03:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:03:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:03:13.409810  543705 memory.go:191] Add success.
I0322 08:03:13.409817  543705 cpu.go:282] Add success.
W0322 08:03:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:03:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:03:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:03:13.420203  543705 net.go:648] Add success.
I0322 08:03:13.422965  543705 net.go:770] primary dev: ETH0
I0322 08:03:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:03:13.422994  543705 net.go:698] Add success.
I0322 08:03:13.777257  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"16dbecdb-cefd-428d-8d47-ca1df3e40781","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:03:13.777290  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:03:14.454276  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:03:14.454502  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:03:14.454512  543705 disk_worker.go:708] disk space is not compliant
W0322 08:03:14.454515  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:03:14.456083  543705 disk_worker.go:494] system disk:vda1
I0322 08:03:14.456120  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:03:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:03:16.472093  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:03:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:03:23.409779  543705 memory.go:184] no items to output this cycle
I0322 08:03:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 08:03:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:03:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 08:03:33.409791  543705 memory.go:184] no items to output this cycle
I0322 08:03:33.770727  543705 disk_info.go:125] begin check local disk info of client
I0322 08:03:33.773238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:03:33.773244  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9a80 0xc0004d9ac0]
I0322 08:03:39.564570  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:03:39.564577  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:03:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:03:43.410561  543705 memory.go:191] Add success.
I0322 08:03:43.409802  543705 cpu.go:282] Add success.
I0322 08:03:43.420265  543705 net.go:648] Add success.
I0322 08:03:43.422835  543705 net.go:770] primary dev: ETH0
I0322 08:03:43.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:03:43.422865  543705 net.go:698] Add success.
I0322 08:03:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:03:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:03:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:03:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:03:53.409782  543705 memory.go:184] no items to output this cycle
I0322 08:03:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 08:04:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:04:03.409807  543705 memory.go:184] no items to output this cycle
I0322 08:04:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 08:04:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:04:13.409784  543705 memory.go:191] Add success.
I0322 08:04:13.409803  543705 cpu.go:282] Add success.
W0322 08:04:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:04:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:04:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:04:13.420053  543705 net.go:648] Add success.
I0322 08:04:13.422498  543705 net.go:770] primary dev: ETH0
I0322 08:04:13.422511  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:04:13.422523  543705 net.go:698] Add success.
I0322 08:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:04:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:04:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 08:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:04:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 08:04:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:04:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:04:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:04:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:04:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:04:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:04:23.409890  543705 memory.go:184] no items to output this cycle
I0322 08:04:23.409933  543705 cpu.go:275] no items to output this cycle
E0322 08:04:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:04:33.409783  543705 memory.go:184] no items to output this cycle
I0322 08:04:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 08:04:33.773674  543705 disk_info.go:125] begin check local disk info of client
I0322 08:04:33.776172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:04:33.776177  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053f000 0xc00053f040]
E0322 08:04:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:04:43.410663  543705 memory.go:191] Add success.
I0322 08:04:43.409813  543705 cpu.go:282] Add success.
I0322 08:04:43.420364  543705 net.go:648] Add success.
I0322 08:04:43.423223  543705 net.go:770] primary dev: ETH0
I0322 08:04:43.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:04:43.423253  543705 net.go:698] Add success.
I0322 08:04:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:04:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:04:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:04:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 08:04:53.409784  543705 memory.go:184] no items to output this cycle
E0322 08:05:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:05:03.409830  543705 memory.go:184] no items to output this cycle
I0322 08:05:03.409844  543705 cpu.go:275] no items to output this cycle
E0322 08:05:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:05:13.409784  543705 memory.go:191] Add success.
I0322 08:05:13.409803  543705 cpu.go:282] Add success.
W0322 08:05:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:05:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:05:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:05:13.420109  543705 net.go:648] Add success.
I0322 08:05:13.422711  543705 net.go:770] primary dev: ETH0
I0322 08:05:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:05:13.422741  543705 net.go:698] Add success.
I0322 08:05:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:05:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:05:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 08:05:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:05:14.456595  543705 disk_worker.go:494] system disk:vda1
I0322 08:05:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:05:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:05:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:05:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:05:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:05:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:05:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:05:23.409790  543705 memory.go:184] no items to output this cycle
I0322 08:05:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 08:05:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:05:33.409778  543705 memory.go:184] no items to output this cycle
I0322 08:05:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 08:05:33.776252  543705 disk_info.go:125] begin check local disk info of client
I0322 08:05:33.778857  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:05:33.778863  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f4580 0xc0004f45c0]
E0322 08:05:43.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:05:43.410779  543705 memory.go:191] Add success.
I0322 08:05:43.409994  543705 cpu.go:282] Add success.
I0322 08:05:43.419706  543705 net.go:648] Add success.
I0322 08:05:43.422334  543705 net.go:770] primary dev: ETH0
I0322 08:05:43.422346  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:05:43.422358  543705 net.go:698] Add success.
I0322 08:05:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:05:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:05:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:05:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:05:53.409790  543705 memory.go:184] no items to output this cycle
I0322 08:05:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:06:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:06:03.409807  543705 memory.go:184] no items to output this cycle
I0322 08:06:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 08:06:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:06:13.409822  543705 memory.go:191] Add success.
I0322 08:06:13.409828  543705 cpu.go:282] Add success.
W0322 08:06:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:06:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:06:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:06:13.420324  543705 net.go:648] Add success.
I0322 08:06:13.422922  543705 net.go:770] primary dev: ETH0
I0322 08:06:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:06:13.422948  543705 net.go:698] Add success.
I0322 08:06:13.470296  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0f651e39-b024-4fbf-9bd7-7a5e158b9302","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:06:13.470329  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:06:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:06:14.455228  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:06:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0322 08:06:14.455245  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:06:14.456725  543705 disk_worker.go:494] system disk:vda1
I0322 08:06:14.456766  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:06:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:06:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:06:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:06:23.409783  543705 memory.go:184] no items to output this cycle
I0322 08:06:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 08:06:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:06:33.409768  543705 memory.go:184] no items to output this cycle
I0322 08:06:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 08:06:33.778949  543705 disk_info.go:125] begin check local disk info of client
I0322 08:06:33.781521  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:06:33.781526  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4900 0xc0004a4940]
I0322 08:06:39.565578  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:06:39.565584  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:06:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:06:43.410732  543705 memory.go:191] Add success.
I0322 08:06:43.409820  543705 cpu.go:282] Add success.
I0322 08:06:43.420454  543705 net.go:648] Add success.
I0322 08:06:43.423179  543705 net.go:770] primary dev: ETH0
I0322 08:06:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:06:43.423209  543705 net.go:698] Add success.
I0322 08:06:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:06:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:06:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:06:53.409782  543705 cpu.go:275] no items to output this cycle
I0322 08:06:53.409784  543705 memory.go:184] no items to output this cycle
E0322 08:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:07:03.409790  543705 memory.go:184] no items to output this cycle
I0322 08:07:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 08:07:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:07:13.409814  543705 memory.go:191] Add success.
I0322 08:07:13.409823  543705 cpu.go:282] Add success.
W0322 08:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:07:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:07:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:07:13.420112  543705 net.go:648] Add success.
I0322 08:07:13.422806  543705 net.go:770] primary dev: ETH0
I0322 08:07:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:07:13.422831  543705 net.go:698] Add success.
I0322 08:07:13.453450  543705 event_worker.go:152] Polling the log file for events...
W0322 08:07:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:07:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0322 08:07:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:07:14.456851  543705 disk_worker.go:494] system disk:vda1
I0322 08:07:14.456897  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:07:14.457317  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:07:14.457326  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:07:14.457330  543705 custom_config.go:64] query custom config with name: gpu
E0322 08:07:15.457044  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:07:15.457058  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:07:16.458119  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:07:16.458199  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0322 08:07:16.458219  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:07:16.458229  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:07:16.472696  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:07:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:07:23.409805  543705 memory.go:184] no items to output this cycle
I0322 08:07:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 08:07:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:07:33.409802  543705 memory.go:184] no items to output this cycle
I0322 08:07:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 08:07:33.781617  543705 disk_info.go:125] begin check local disk info of client
I0322 08:07:33.784205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:07:33.784212  543705 disk_info.go:196] parse disk info done, disk is : [0xc000586b00 0xc000586b40]
E0322 08:07:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:07:43.410769  543705 memory.go:191] Add success.
I0322 08:07:43.409805  543705 cpu.go:282] Add success.
I0322 08:07:43.420541  543705 net.go:648] Add success.
I0322 08:07:43.423190  543705 net.go:770] primary dev: ETH0
I0322 08:07:43.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:07:43.423217  543705 net.go:698] Add success.
I0322 08:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:07:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:07:53.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:07:53.410268  543705 memory.go:184] no items to output this cycle
I0322 08:07:53.410290  543705 cpu.go:275] no items to output this cycle
E0322 08:08:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:08:03.409800  543705 memory.go:184] no items to output this cycle
I0322 08:08:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 08:08:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:08:13.409788  543705 cpu.go:282] Add success.
I0322 08:08:13.409789  543705 memory.go:191] Add success.
W0322 08:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:08:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:08:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:08:13.420075  543705 net.go:648] Add success.
I0322 08:08:13.423226  543705 net.go:770] primary dev: ETH0
I0322 08:08:13.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:08:13.423255  543705 net.go:698] Add success.
I0322 08:08:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:08:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:08:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 08:08:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:08:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 08:08:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:08:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:08:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:08:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:08:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:08:23.409769  543705 memory.go:184] no items to output this cycle
I0322 08:08:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 08:08:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:08:33.409769  543705 memory.go:184] no items to output this cycle
I0322 08:08:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 08:08:33.785662  543705 disk_info.go:125] begin check local disk info of client
I0322 08:08:33.788167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:08:33.788172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278500 0xc000278540]
E0322 08:08:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:08:43.410700  543705 memory.go:191] Add success.
I0322 08:08:43.409784  543705 cpu.go:282] Add success.
I0322 08:08:43.419741  543705 net.go:648] Add success.
I0322 08:08:43.422181  543705 net.go:770] primary dev: ETH0
I0322 08:08:43.422194  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:08:43.422206  543705 net.go:698] Add success.
I0322 08:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:08:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:08:53.409781  543705 memory.go:184] no items to output this cycle
I0322 08:08:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:09:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:09:03.409794  543705 memory.go:184] no items to output this cycle
I0322 08:09:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 08:09:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:09:13.409795  543705 memory.go:191] Add success.
W0322 08:09:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:09:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:09:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:09:13.409836  543705 cpu.go:282] Add success.
I0322 08:09:13.420273  543705 net.go:648] Add success.
I0322 08:09:13.423130  543705 net.go:770] primary dev: ETH0
I0322 08:09:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:09:13.423156  543705 net.go:698] Add success.
I0322 08:09:13.499018  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d505cf2d-c686-41df-a36f-31d8a88204c1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:09:13.499050  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:09:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:09:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:09:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 08:09:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:09:14.456517  543705 disk_worker.go:494] system disk:vda1
I0322 08:09:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:09:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:09:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:09:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:09:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:09:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:09:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:09:23.409776  543705 memory.go:184] no items to output this cycle
I0322 08:09:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 08:09:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:09:33.409779  543705 memory.go:184] no items to output this cycle
I0322 08:09:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 08:09:33.789098  543705 disk_info.go:125] begin check local disk info of client
I0322 08:09:33.791612  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:09:33.791618  543705 disk_info.go:196] parse disk info done, disk is : [0xc000341ac0 0xc000341b00]
I0322 08:09:39.566569  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:09:39.566578  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:09:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:09:43.410917  543705 memory.go:191] Add success.
I0322 08:09:43.409819  543705 cpu.go:282] Add success.
I0322 08:09:43.420675  543705 net.go:648] Add success.
I0322 08:09:43.423384  543705 net.go:770] primary dev: ETH0
I0322 08:09:43.423397  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:09:43.423568  543705 net.go:698] Add success.
I0322 08:09:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:09:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:09:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:09:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:09:53.409783  543705 memory.go:184] no items to output this cycle
I0322 08:09:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 08:10:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:10:03.409820  543705 memory.go:184] no items to output this cycle
I0322 08:10:03.409828  543705 cpu.go:275] no items to output this cycle
E0322 08:10:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:10:13.409796  543705 memory.go:191] Add success.
I0322 08:10:13.409797  543705 cpu.go:282] Add success.
W0322 08:10:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:10:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:10:13.420221  543705 net.go:648] Add success.
I0322 08:10:13.423213  543705 net.go:770] primary dev: ETH0
I0322 08:10:13.423225  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:10:13.423238  543705 net.go:698] Add success.
I0322 08:10:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:10:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:10:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 08:10:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:10:14.456484  543705 disk_worker.go:494] system disk:vda1
I0322 08:10:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:10:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:10:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:10:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:10:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:10:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:10:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:10:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 08:10:23.409781  543705 memory.go:184] no items to output this cycle
E0322 08:10:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:10:33.409772  543705 memory.go:184] no items to output this cycle
I0322 08:10:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 08:10:33.791697  543705 disk_info.go:125] begin check local disk info of client
I0322 08:10:33.794246  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:10:33.794252  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048bb00 0xc00048bb40]
E0322 08:10:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:10:43.410858  543705 memory.go:191] Add success.
I0322 08:10:43.409803  543705 cpu.go:282] Add success.
I0322 08:10:43.420552  543705 net.go:648] Add success.
I0322 08:10:43.423460  543705 net.go:770] primary dev: ETH0
I0322 08:10:43.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:10:43.423486  543705 net.go:698] Add success.
I0322 08:10:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:10:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:10:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:10:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:10:53.409878  543705 memory.go:184] no items to output this cycle
I0322 08:10:53.409923  543705 cpu.go:275] no items to output this cycle
E0322 08:11:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:11:03.409800  543705 memory.go:184] no items to output this cycle
I0322 08:11:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 08:11:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:11:13.409821  543705 memory.go:191] Add success.
I0322 08:11:13.409824  543705 cpu.go:282] Add success.
W0322 08:11:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:11:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:11:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:11:13.420197  543705 net.go:648] Add success.
I0322 08:11:13.422852  543705 net.go:770] primary dev: ETH0
I0322 08:11:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:11:13.422877  543705 net.go:698] Add success.
I0322 08:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:11:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:11:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 08:11:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:11:14.456585  543705 disk_worker.go:494] system disk:vda1
I0322 08:11:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:11:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:11:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:11:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:11:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:11:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:11:23.409764  543705 memory.go:184] no items to output this cycle
I0322 08:11:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 08:11:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:11:33.409801  543705 memory.go:184] no items to output this cycle
I0322 08:11:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 08:11:33.794335  543705 disk_info.go:125] begin check local disk info of client
I0322 08:11:33.796797  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:11:33.796804  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048d480 0xc00048d4c0]
E0322 08:11:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:11:43.410632  543705 memory.go:191] Add success.
I0322 08:11:43.409812  543705 cpu.go:282] Add success.
I0322 08:11:43.420347  543705 net.go:648] Add success.
I0322 08:11:43.423370  543705 net.go:770] primary dev: ETH0
I0322 08:11:43.423385  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:11:43.423400  543705 net.go:698] Add success.
I0322 08:11:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:11:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:11:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:11:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:11:53.409814  543705 memory.go:184] no items to output this cycle
I0322 08:11:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 08:12:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:12:03.409798  543705 memory.go:184] no items to output this cycle
I0322 08:12:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 08:12:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:12:13.409798  543705 memory.go:191] Add success.
I0322 08:12:13.409798  543705 cpu.go:282] Add success.
W0322 08:12:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:12:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:12:13.420167  543705 net.go:648] Add success.
I0322 08:12:13.422766  543705 net.go:770] primary dev: ETH0
I0322 08:12:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:12:13.422791  543705 net.go:698] Add success.
I0322 08:12:13.463284  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d9785bea-05d8-4715-a46f-0f5db3c01314","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:12:13.463324  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 08:12:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:12:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 08:12:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:12:14.456980  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:12:14.456989  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:12:14.456994  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:12:14.457054  543705 disk_worker.go:494] system disk:vda1
I0322 08:12:14.457104  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:12:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:12:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:12:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:12:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:12:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:12:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:12:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:12:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:12:23.409798  543705 memory.go:184] no items to output this cycle
I0322 08:12:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:12:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:12:33.409800  543705 memory.go:184] no items to output this cycle
I0322 08:12:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 08:12:33.797670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:12:33.800193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:12:33.800199  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048c0c0 0xc00048c100]
I0322 08:12:39.567576  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:12:39.567582  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:12:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:12:43.410672  543705 memory.go:191] Add success.
I0322 08:12:43.409795  543705 cpu.go:282] Add success.
I0322 08:12:43.420378  543705 net.go:648] Add success.
I0322 08:12:43.423215  543705 net.go:770] primary dev: ETH0
I0322 08:12:43.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:12:43.423241  543705 net.go:698] Add success.
I0322 08:12:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:12:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:12:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:12:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:12:53.409782  543705 memory.go:184] no items to output this cycle
I0322 08:12:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 08:13:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:13:03.409815  543705 memory.go:184] no items to output this cycle
I0322 08:13:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 08:13:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:13:13.409820  543705 memory.go:191] Add success.
I0322 08:13:13.409828  543705 cpu.go:282] Add success.
W0322 08:13:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:13:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:13:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:13:13.420140  543705 net.go:648] Add success.
I0322 08:13:13.422979  543705 net.go:770] primary dev: ETH0
I0322 08:13:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:13:13.423005  543705 net.go:698] Add success.
I0322 08:13:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:13:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:13:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 08:13:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:13:14.456523  543705 disk_worker.go:494] system disk:vda1
I0322 08:13:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:13:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:13:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:13:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:13:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:13:23.409783  543705 memory.go:184] no items to output this cycle
I0322 08:13:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 08:13:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:13:33.409792  543705 memory.go:184] no items to output this cycle
I0322 08:13:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 08:13:33.801670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:13:33.804123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:13:33.804129  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a6c0 0xc00039a700]
E0322 08:13:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:13:43.410739  543705 memory.go:191] Add success.
I0322 08:13:43.409816  543705 cpu.go:282] Add success.
I0322 08:13:43.420435  543705 net.go:648] Add success.
I0322 08:13:43.423535  543705 net.go:770] primary dev: ETH0
I0322 08:13:43.423548  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:13:43.423560  543705 net.go:698] Add success.
I0322 08:13:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:13:53.410389  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:13:53.410414  543705 memory.go:184] no items to output this cycle
I0322 08:13:53.410416  543705 cpu.go:275] no items to output this cycle
E0322 08:14:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:14:03.409913  543705 memory.go:184] no items to output this cycle
I0322 08:14:03.410039  543705 cpu.go:275] no items to output this cycle
E0322 08:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:14:13.409797  543705 memory.go:191] Add success.
I0322 08:14:13.409800  543705 cpu.go:282] Add success.
W0322 08:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:14:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:14:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:14:13.420148  543705 net.go:648] Add success.
I0322 08:14:13.422866  543705 net.go:770] primary dev: ETH0
I0322 08:14:13.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:14:13.422896  543705 net.go:698] Add success.
I0322 08:14:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:14:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:14:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 08:14:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:14:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 08:14:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:14:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:14:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:14:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:14:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:14:23.409796  543705 memory.go:184] no items to output this cycle
I0322 08:14:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 08:14:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:14:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 08:14:33.409790  543705 memory.go:184] no items to output this cycle
I0322 08:14:33.804208  543705 disk_info.go:125] begin check local disk info of client
I0322 08:14:33.806685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:14:33.806690  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003817c0 0xc000381800]
E0322 08:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:14:43.410739  543705 memory.go:191] Add success.
I0322 08:14:43.409812  543705 cpu.go:282] Add success.
I0322 08:14:43.420533  543705 net.go:648] Add success.
I0322 08:14:43.423446  543705 net.go:770] primary dev: ETH0
I0322 08:14:43.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:14:43.423481  543705 net.go:698] Add success.
I0322 08:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:14:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:14:53.409779  543705 cpu.go:275] no items to output this cycle
I0322 08:14:53.409787  543705 memory.go:184] no items to output this cycle
E0322 08:15:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:15:03.409819  543705 memory.go:184] no items to output this cycle
I0322 08:15:03.409832  543705 cpu.go:275] no items to output this cycle
E0322 08:15:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:15:13.409830  543705 memory.go:191] Add success.
I0322 08:15:13.409838  543705 cpu.go:282] Add success.
W0322 08:15:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:15:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:15:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:15:13.420138  543705 net.go:648] Add success.
I0322 08:15:13.422967  543705 net.go:770] primary dev: ETH0
I0322 08:15:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:15:13.422992  543705 net.go:698] Add success.
I0322 08:15:13.468908  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe48cad2-25d6-403d-87ae-57da6ac17002","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:15:13.468941  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:15:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:15:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 08:15:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:15:14.456622  543705 disk_worker.go:494] system disk:vda1
I0322 08:15:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:15:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:15:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:15:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:15:23.409766  543705 memory.go:184] no items to output this cycle
I0322 08:15:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 08:15:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:15:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 08:15:33.409782  543705 memory.go:184] no items to output this cycle
I0322 08:15:33.808121  543705 disk_info.go:125] begin check local disk info of client
I0322 08:15:33.810613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:15:33.810618  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003402c0 0xc000340300]
I0322 08:15:39.568590  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:15:39.568597  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:15:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:15:43.410640  543705 memory.go:191] Add success.
I0322 08:15:43.409790  543705 cpu.go:282] Add success.
I0322 08:15:43.420322  543705 net.go:648] Add success.
I0322 08:15:43.423046  543705 net.go:770] primary dev: ETH0
I0322 08:15:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:15:43.423073  543705 net.go:698] Add success.
I0322 08:15:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:15:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:15:53.409816  543705 memory.go:184] no items to output this cycle
I0322 08:15:53.409819  543705 cpu.go:275] no items to output this cycle
I0322 08:16:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 08:16:03.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:16:03.409825  543705 memory.go:184] no items to output this cycle
E0322 08:16:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:16:13.409938  543705 cpu.go:282] Add success.
I0322 08:16:13.409964  543705 memory.go:191] Add success.
W0322 08:16:13.410009  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:16:13.410023  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:16:13.410026  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:16:13.419767  543705 net.go:648] Add success.
I0322 08:16:13.422298  543705 net.go:770] primary dev: ETH0
I0322 08:16:13.422311  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:16:13.422323  543705 net.go:698] Add success.
I0322 08:16:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:16:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:16:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 08:16:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:16:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 08:16:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:16:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:16:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:16:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:16:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:16:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:16:23.409777  543705 memory.go:184] no items to output this cycle
I0322 08:16:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 08:16:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:16:33.409771  543705 memory.go:184] no items to output this cycle
I0322 08:16:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 08:16:33.812188  543705 disk_info.go:125] begin check local disk info of client
I0322 08:16:33.814676  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:16:33.814682  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac40 0xc0001aac80]
E0322 08:16:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:16:43.410606  543705 memory.go:191] Add success.
I0322 08:16:43.409813  543705 cpu.go:282] Add success.
I0322 08:16:43.420380  543705 net.go:648] Add success.
I0322 08:16:43.422926  543705 net.go:770] primary dev: ETH0
I0322 08:16:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:16:43.422960  543705 net.go:698] Add success.
I0322 08:16:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:16:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:16:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:16:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:16:53.409782  543705 memory.go:184] no items to output this cycle
I0322 08:16:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:17:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:17:03.409819  543705 memory.go:184] no items to output this cycle
I0322 08:17:03.409832  543705 cpu.go:275] no items to output this cycle
E0322 08:17:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:17:13.409798  543705 memory.go:191] Add success.
I0322 08:17:13.409817  543705 cpu.go:282] Add success.
W0322 08:17:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:17:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:17:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:17:13.420196  543705 net.go:648] Add success.
I0322 08:17:13.423024  543705 net.go:770] primary dev: ETH0
I0322 08:17:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:17:13.423052  543705 net.go:698] Add success.
I0322 08:17:13.452775  543705 event_worker.go:152] Polling the log file for events...
W0322 08:17:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:17:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 08:17:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:17:14.456995  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:17:14.457004  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:17:14.457010  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:17:14.457030  543705 disk_worker.go:494] system disk:vda1
I0322 08:17:14.457070  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:17:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:17:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:17:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:17:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:17:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:17:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:17:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:17:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:17:23.409773  543705 memory.go:184] no items to output this cycle
I0322 08:17:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:17:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:17:33.409772  543705 memory.go:184] no items to output this cycle
I0322 08:17:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 08:17:33.816198  543705 disk_info.go:125] begin check local disk info of client
I0322 08:17:33.818680  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:17:33.818685  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a780 0xc00007a7c0]
E0322 08:17:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:17:43.410779  543705 memory.go:191] Add success.
I0322 08:17:43.409824  543705 cpu.go:282] Add success.
I0322 08:17:43.420441  543705 net.go:648] Add success.
I0322 08:17:43.423429  543705 net.go:770] primary dev: ETH0
I0322 08:17:43.423443  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:17:43.423458  543705 net.go:698] Add success.
I0322 08:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:17:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:17:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:17:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:17:53.409824  543705 memory.go:184] no items to output this cycle
I0322 08:17:53.409828  543705 cpu.go:275] no items to output this cycle
E0322 08:18:03.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:18:03.409819  543705 cpu.go:275] no items to output this cycle
I0322 08:18:03.409838  543705 memory.go:184] no items to output this cycle
E0322 08:18:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:18:13.409786  543705 memory.go:191] Add success.
W0322 08:18:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 08:18:13.409815  543705 cpu.go:282] Add success.
W0322 08:18:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:18:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:18:13.420185  543705 net.go:648] Add success.
I0322 08:18:13.423135  543705 net.go:770] primary dev: ETH0
I0322 08:18:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:18:13.423158  543705 net.go:698] Add success.
I0322 08:18:13.462875  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5504f374-2622-43a8-96d5-3394bc629f58","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:18:13.462906  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:18:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:18:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:18:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 08:18:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:18:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 08:18:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:18:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:18:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:18:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:18:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:18:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:18:23.409772  543705 cpu.go:275] no items to output this cycle
I0322 08:18:23.409784  543705 memory.go:184] no items to output this cycle
E0322 08:18:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:18:33.409799  543705 memory.go:184] no items to output this cycle
I0322 08:18:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 08:18:33.818771  543705 disk_info.go:125] begin check local disk info of client
I0322 08:18:33.821249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:18:33.821254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9e80 0xc0002b9ec0]
I0322 08:18:39.569598  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:18:39.569605  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:18:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:18:43.410600  543705 memory.go:191] Add success.
I0322 08:18:43.409805  543705 cpu.go:282] Add success.
I0322 08:18:43.420291  543705 net.go:648] Add success.
I0322 08:18:43.422959  543705 net.go:770] primary dev: ETH0
I0322 08:18:43.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:18:43.422983  543705 net.go:698] Add success.
I0322 08:18:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:18:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:18:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:18:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:18:53.409778  543705 memory.go:184] no items to output this cycle
I0322 08:18:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:19:03.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:19:03.409835  543705 memory.go:184] no items to output this cycle
I0322 08:19:03.409996  543705 cpu.go:275] no items to output this cycle
E0322 08:19:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:19:13.409813  543705 memory.go:191] Add success.
I0322 08:19:13.409822  543705 cpu.go:282] Add success.
W0322 08:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:19:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:19:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:19:13.420157  543705 net.go:648] Add success.
I0322 08:19:13.422886  543705 net.go:770] primary dev: ETH0
I0322 08:19:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:19:13.422910  543705 net.go:698] Add success.
I0322 08:19:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:19:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:19:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 08:19:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:19:14.457185  543705 disk_worker.go:494] system disk:vda1
I0322 08:19:14.457227  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:19:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:19:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:19:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:19:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:19:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:19:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:19:23.409777  543705 memory.go:184] no items to output this cycle
I0322 08:19:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 08:19:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:19:33.409782  543705 memory.go:184] no items to output this cycle
I0322 08:19:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 08:19:33.821675  543705 disk_info.go:125] begin check local disk info of client
I0322 08:19:33.824151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:19:33.824156  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8080 0xc0002b80c0]
E0322 08:19:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:19:43.410751  543705 memory.go:191] Add success.
I0322 08:19:43.409813  543705 cpu.go:282] Add success.
I0322 08:19:43.420421  543705 net.go:648] Add success.
I0322 08:19:43.423319  543705 net.go:770] primary dev: ETH0
I0322 08:19:43.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:19:43.423345  543705 net.go:698] Add success.
I0322 08:19:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:19:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:19:53.409777  543705 memory.go:184] no items to output this cycle
I0322 08:19:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:20:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:20:03.409811  543705 memory.go:184] no items to output this cycle
I0322 08:20:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 08:20:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:20:13.409793  543705 memory.go:191] Add success.
I0322 08:20:13.409794  543705 cpu.go:282] Add success.
W0322 08:20:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:20:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:20:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:20:13.420153  543705 net.go:648] Add success.
I0322 08:20:13.422959  543705 net.go:770] primary dev: ETH0
I0322 08:20:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:20:13.422984  543705 net.go:698] Add success.
I0322 08:20:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:20:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:20:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 08:20:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:20:14.457595  543705 disk_worker.go:494] system disk:vda1
I0322 08:20:14.457639  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:20:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:20:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:20:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:20:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:20:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:20:23.409780  543705 memory.go:184] no items to output this cycle
I0322 08:20:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 08:20:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:20:33.409774  543705 cpu.go:275] no items to output this cycle
I0322 08:20:33.409780  543705 memory.go:184] no items to output this cycle
I0322 08:20:33.824236  543705 disk_info.go:125] begin check local disk info of client
I0322 08:20:33.826759  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:20:33.826765  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492480 0xc0004924c0]
E0322 08:20:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:20:43.410665  543705 memory.go:191] Add success.
I0322 08:20:43.409800  543705 cpu.go:282] Add success.
I0322 08:20:43.420410  543705 net.go:648] Add success.
I0322 08:20:43.422825  543705 net.go:770] primary dev: ETH0
I0322 08:20:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:20:43.422851  543705 net.go:698] Add success.
I0322 08:20:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:20:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:20:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:20:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:20:53.409771  543705 memory.go:184] no items to output this cycle
I0322 08:20:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 08:21:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:21:03.409776  543705 memory.go:184] no items to output this cycle
I0322 08:21:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:21:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:21:13.409820  543705 memory.go:191] Add success.
I0322 08:21:13.409822  543705 cpu.go:282] Add success.
W0322 08:21:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:21:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:21:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:21:13.420141  543705 net.go:648] Add success.
I0322 08:21:13.422880  543705 net.go:770] primary dev: ETH0
I0322 08:21:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:21:13.422904  543705 net.go:698] Add success.
I0322 08:21:13.468641  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07b2dbb2-c1bf-4a20-ac2b-b9cf6a537c76","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:21:13.468676  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:21:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:21:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:21:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 08:21:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:21:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 08:21:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:21:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:21:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:21:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:21:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:21:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:21:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:21:23.409761  543705 memory.go:184] no items to output this cycle
I0322 08:21:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:21:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:21:33.409772  543705 memory.go:184] no items to output this cycle
I0322 08:21:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 08:21:33.828275  543705 disk_info.go:125] begin check local disk info of client
I0322 08:21:33.830746  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:21:33.830751  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaac0 0xc0001aab00]
I0322 08:21:39.570597  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:21:39.570603  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:21:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:21:43.409793  543705 cpu.go:282] Add success.
I0322 08:21:43.409813  543705 memory.go:191] Add success.
I0322 08:21:43.420066  543705 net.go:648] Add success.
I0322 08:21:43.421069  543705 net.go:770] primary dev: ETH0
I0322 08:21:43.421082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:21:43.421095  543705 net.go:698] Add success.
I0322 08:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:21:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:21:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:21:53.409809  543705 memory.go:184] no items to output this cycle
I0322 08:21:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 08:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:22:03.409782  543705 memory.go:184] no items to output this cycle
I0322 08:22:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 08:22:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:22:13.409820  543705 memory.go:191] Add success.
I0322 08:22:13.409821  543705 cpu.go:282] Add success.
W0322 08:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:22:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:22:13.420156  543705 net.go:648] Add success.
I0322 08:22:13.422814  543705 net.go:770] primary dev: ETH0
I0322 08:22:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:22:13.422839  543705 net.go:698] Add success.
W0322 08:22:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:22:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 08:22:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:22:14.455868  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:22:14.455876  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:22:14.455882  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:22:14.456546  543705 disk_worker.go:494] system disk:vda1
I0322 08:22:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:22:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:22:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:22:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:22:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:22:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:22:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:22:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:22:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:22:23.409769  543705 memory.go:184] no items to output this cycle
I0322 08:22:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 08:22:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:22:33.409803  543705 memory.go:184] no items to output this cycle
I0322 08:22:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 08:22:33.832285  543705 disk_info.go:125] begin check local disk info of client
I0322 08:22:33.834759  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:22:33.834765  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8240 0xc0002b8280]
E0322 08:22:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:22:43.410640  543705 memory.go:191] Add success.
I0322 08:22:43.409822  543705 cpu.go:282] Add success.
I0322 08:22:43.420330  543705 net.go:648] Add success.
I0322 08:22:43.423107  543705 net.go:770] primary dev: ETH0
I0322 08:22:43.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:22:43.423132  543705 net.go:698] Add success.
I0322 08:22:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:22:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:22:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:22:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:22:53.409782  543705 cpu.go:275] no items to output this cycle
I0322 08:22:53.409788  543705 memory.go:184] no items to output this cycle
E0322 08:23:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:23:03.409783  543705 memory.go:184] no items to output this cycle
I0322 08:23:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 08:23:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:23:13.409796  543705 memory.go:191] Add success.
I0322 08:23:13.409797  543705 cpu.go:282] Add success.
W0322 08:23:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:23:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:23:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:23:13.420117  543705 net.go:648] Add success.
I0322 08:23:13.423135  543705 net.go:770] primary dev: ETH0
I0322 08:23:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:23:13.423166  543705 net.go:698] Add success.
I0322 08:23:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:23:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:23:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0322 08:23:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:23:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 08:23:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:23:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:23:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:23:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:23:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:23:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:23:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:23:23.409793  543705 memory.go:184] no items to output this cycle
I0322 08:23:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:23:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:23:33.409776  543705 cpu.go:275] no items to output this cycle
I0322 08:23:33.409783  543705 memory.go:184] no items to output this cycle
I0322 08:23:33.834845  543705 disk_info.go:125] begin check local disk info of client
I0322 08:23:33.837361  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:23:33.837366  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab000 0xc0003ab040]
E0322 08:23:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:23:43.410717  543705 memory.go:191] Add success.
I0322 08:23:43.409815  543705 cpu.go:282] Add success.
I0322 08:23:43.420441  543705 net.go:648] Add success.
I0322 08:23:43.423347  543705 net.go:770] primary dev: ETH0
I0322 08:23:43.423360  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:23:43.423373  543705 net.go:698] Add success.
I0322 08:23:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:23:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:23:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:23:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:23:53.409790  543705 memory.go:184] no items to output this cycle
I0322 08:23:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 08:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:24:03.409776  543705 memory.go:184] no items to output this cycle
I0322 08:24:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 08:24:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:24:13.409791  543705 memory.go:191] Add success.
I0322 08:24:13.409809  543705 cpu.go:282] Add success.
W0322 08:24:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:24:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:24:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:24:13.420244  543705 net.go:648] Add success.
I0322 08:24:13.423163  543705 net.go:770] primary dev: ETH0
I0322 08:24:13.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:24:13.423192  543705 net.go:698] Add success.
I0322 08:24:13.552526  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dc12ea4e-0966-4cc3-bccb-5b60132611fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:24:13.552559  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:24:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:24:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:24:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 08:24:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:24:14.456633  543705 disk_worker.go:494] system disk:vda1
I0322 08:24:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:24:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:24:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:24:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:24:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:24:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:24:23.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:24:23.409879  543705 memory.go:184] no items to output this cycle
I0322 08:24:23.409900  543705 cpu.go:275] no items to output this cycle
E0322 08:24:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:24:33.409778  543705 memory.go:184] no items to output this cycle
I0322 08:24:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 08:24:33.837670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:24:33.840108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:24:33.840113  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035cb00 0xc00035cb40]
I0322 08:24:39.571611  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:24:39.571618  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:24:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:24:43.410826  543705 memory.go:191] Add success.
I0322 08:24:43.409814  543705 cpu.go:282] Add success.
I0322 08:24:43.420531  543705 net.go:648] Add success.
I0322 08:24:43.423887  543705 net.go:770] primary dev: ETH0
I0322 08:24:43.423899  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:24:43.423912  543705 net.go:698] Add success.
I0322 08:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:24:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:24:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:24:53.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:24:53.410393  543705 memory.go:184] no items to output this cycle
I0322 08:24:53.410427  543705 cpu.go:275] no items to output this cycle
E0322 08:25:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:25:03.409779  543705 memory.go:184] no items to output this cycle
I0322 08:25:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 08:25:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:25:13.409791  543705 memory.go:191] Add success.
I0322 08:25:13.409812  543705 cpu.go:282] Add success.
W0322 08:25:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:25:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:25:13.420260  543705 net.go:648] Add success.
I0322 08:25:13.423254  543705 net.go:770] primary dev: ETH0
I0322 08:25:13.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:25:13.423278  543705 net.go:698] Add success.
I0322 08:25:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:25:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:25:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 08:25:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:25:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 08:25:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:25:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:25:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:25:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:25:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 08:25:23.409778  543705 memory.go:184] no items to output this cycle
E0322 08:25:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:25:33.409904  543705 memory.go:184] no items to output this cycle
I0322 08:25:33.409966  543705 cpu.go:275] no items to output this cycle
I0322 08:25:33.841669  543705 disk_info.go:125] begin check local disk info of client
I0322 08:25:33.844147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:25:33.844152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330300 0xc000330340]
E0322 08:25:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:25:43.410731  543705 memory.go:191] Add success.
I0322 08:25:43.409817  543705 cpu.go:282] Add success.
I0322 08:25:43.420513  543705 net.go:648] Add success.
I0322 08:25:43.423242  543705 net.go:770] primary dev: ETH0
I0322 08:25:43.423257  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:25:43.423271  543705 net.go:698] Add success.
I0322 08:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:25:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:25:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:25:53.410403  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:25:53.410426  543705 memory.go:184] no items to output this cycle
I0322 08:25:53.410432  543705 cpu.go:275] no items to output this cycle
E0322 08:26:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:26:03.409770  543705 memory.go:184] no items to output this cycle
I0322 08:26:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 08:26:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:26:13.409793  543705 memory.go:191] Add success.
I0322 08:26:13.409794  543705 cpu.go:282] Add success.
W0322 08:26:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:26:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:26:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:26:13.420283  543705 net.go:648] Add success.
I0322 08:26:13.423113  543705 net.go:770] primary dev: ETH0
I0322 08:26:13.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:26:13.423139  543705 net.go:698] Add success.
I0322 08:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:26:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:26:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 08:26:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:26:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 08:26:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:26:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:26:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:26:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:26:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:26:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:26:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:26:23.409797  543705 memory.go:184] no items to output this cycle
I0322 08:26:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 08:26:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:26:33.409772  543705 memory.go:184] no items to output this cycle
I0322 08:26:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 08:26:33.845365  543705 disk_info.go:125] begin check local disk info of client
I0322 08:26:33.847895  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:26:33.847901  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb40 0xc00007bb80]
E0322 08:26:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:26:43.410676  543705 memory.go:191] Add success.
I0322 08:26:43.409808  543705 cpu.go:282] Add success.
I0322 08:26:43.420313  543705 net.go:648] Add success.
I0322 08:26:43.423251  543705 net.go:770] primary dev: ETH0
I0322 08:26:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:26:43.423276  543705 net.go:698] Add success.
I0322 08:26:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:26:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:26:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:26:53.409795  543705 memory.go:184] no items to output this cycle
I0322 08:26:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:27:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:27:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 08:27:03.409787  543705 memory.go:184] no items to output this cycle
E0322 08:27:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:27:13.409796  543705 memory.go:191] Add success.
I0322 08:27:13.409802  543705 cpu.go:282] Add success.
W0322 08:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:27:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:27:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:27:13.420064  543705 net.go:648] Add success.
I0322 08:27:13.428747  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 08:27:13.428821  543705 net.go:770] primary dev: ETH0
I0322 08:27:13.428833  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:27:13.428844  543705 net.go:698] Add success.
I0322 08:27:13.453430  543705 event_worker.go:152] Polling the log file for events...
I0322 08:27:13.463794  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9eae8fd3-69f6-4c4f-9339-b402c4028146","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:27:13.463829  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 08:27:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:27:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 08:27:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:27:14.455903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:27:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:27:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:27:14.456555  543705 disk_worker.go:494] system disk:vda1
I0322 08:27:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:27:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:27:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:27:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:27:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:27:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:27:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:27:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:27:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:27:23.409775  543705 memory.go:184] no items to output this cycle
I0322 08:27:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:27:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:27:33.409774  543705 memory.go:184] no items to output this cycle
I0322 08:27:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 08:27:33.847980  543705 disk_info.go:125] begin check local disk info of client
I0322 08:27:33.850521  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:27:33.850526  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331680 0xc0003316c0]
I0322 08:27:39.572614  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:27:39.572620  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:27:43.410673  543705 memory.go:191] Add success.
I0322 08:27:43.409791  543705 cpu.go:282] Add success.
I0322 08:27:43.420370  543705 net.go:648] Add success.
I0322 08:27:43.423144  543705 net.go:770] primary dev: ETH0
I0322 08:27:43.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:27:43.423170  543705 net.go:698] Add success.
I0322 08:27:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:27:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:27:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:27:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:27:53.409789  543705 memory.go:184] no items to output this cycle
I0322 08:27:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:28:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:28:03.409775  543705 memory.go:184] no items to output this cycle
I0322 08:28:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:28:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:28:13.409787  543705 memory.go:191] Add success.
I0322 08:28:13.409808  543705 cpu.go:282] Add success.
W0322 08:28:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:28:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:28:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:28:13.420169  543705 net.go:648] Add success.
I0322 08:28:13.422739  543705 net.go:770] primary dev: ETH0
I0322 08:28:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:28:13.422766  543705 net.go:698] Add success.
I0322 08:28:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:28:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:28:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 08:28:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:28:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 08:28:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:28:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:28:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:28:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:28:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:28:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:28:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:28:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 08:28:23.409788  543705 memory.go:184] no items to output this cycle
E0322 08:28:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:28:33.409776  543705 memory.go:184] no items to output this cycle
I0322 08:28:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 08:28:33.852439  543705 disk_info.go:125] begin check local disk info of client
I0322 08:28:33.854936  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:28:33.854942  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9ec0 0xc0004d9f00]
E0322 08:28:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:28:43.410722  543705 memory.go:191] Add success.
I0322 08:28:43.409802  543705 cpu.go:282] Add success.
I0322 08:28:43.420431  543705 net.go:648] Add success.
I0322 08:28:43.423128  543705 net.go:770] primary dev: ETH0
I0322 08:28:43.423141  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:28:43.423153  543705 net.go:698] Add success.
I0322 08:28:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:28:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:28:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:28:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:28:53.409780  543705 memory.go:184] no items to output this cycle
I0322 08:28:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 08:29:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:29:03.409803  543705 memory.go:184] no items to output this cycle
I0322 08:29:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 08:29:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:29:13.409823  543705 memory.go:191] Add success.
I0322 08:29:13.409824  543705 cpu.go:282] Add success.
W0322 08:29:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:29:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:29:13.420277  543705 net.go:648] Add success.
I0322 08:29:13.422966  543705 net.go:770] primary dev: ETH0
I0322 08:29:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:29:13.422992  543705 net.go:698] Add success.
I0322 08:29:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:29:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:29:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 08:29:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:29:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 08:29:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:29:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:29:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:29:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:29:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:29:16.472094  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:29:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:29:23.409770  543705 memory.go:184] no items to output this cycle
I0322 08:29:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 08:29:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:29:33.409792  543705 memory.go:184] no items to output this cycle
I0322 08:29:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 08:29:33.856428  543705 disk_info.go:125] begin check local disk info of client
I0322 08:29:33.859035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:29:33.859041  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004692c0 0xc000469300]
E0322 08:29:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:29:43.410794  543705 memory.go:191] Add success.
I0322 08:29:43.409810  543705 cpu.go:282] Add success.
I0322 08:29:43.420477  543705 net.go:648] Add success.
I0322 08:29:43.423398  543705 net.go:770] primary dev: ETH0
I0322 08:29:43.423410  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:29:43.423423  543705 net.go:698] Add success.
I0322 08:29:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:29:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:29:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:29:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:29:53.409817  543705 memory.go:184] no items to output this cycle
I0322 08:29:53.409832  543705 cpu.go:275] no items to output this cycle
E0322 08:30:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:30:03.409794  543705 memory.go:184] no items to output this cycle
I0322 08:30:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 08:30:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:30:13.409832  543705 memory.go:191] Add success.
I0322 08:30:13.409835  543705 cpu.go:282] Add success.
W0322 08:30:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:30:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:30:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:30:13.419961  543705 net.go:770] primary dev: ETH0
I0322 08:30:13.419976  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:30:13.419991  543705 net.go:698] Add success.
I0322 08:30:13.420336  543705 net.go:648] Add success.
I0322 08:30:13.469256  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6a891290-047d-4c4b-8286-fdd56fc28cf0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:30:13.469289  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:30:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:30:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:30:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 08:30:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:30:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 08:30:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:30:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:30:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:30:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:30:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:30:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:30:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:30:23.409782  543705 memory.go:184] no items to output this cycle
I0322 08:30:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 08:30:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:30:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 08:30:33.409795  543705 memory.go:184] no items to output this cycle
I0322 08:30:33.859122  543705 disk_info.go:125] begin check local disk info of client
I0322 08:30:33.861593  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:30:33.861599  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468240 0xc000468280]
I0322 08:30:39.573610  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:30:39.573616  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:30:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:30:43.410600  543705 memory.go:191] Add success.
I0322 08:30:43.409807  543705 cpu.go:282] Add success.
I0322 08:30:43.420302  543705 net.go:648] Add success.
I0322 08:30:43.423167  543705 net.go:770] primary dev: ETH0
I0322 08:30:43.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:30:43.423193  543705 net.go:698] Add success.
I0322 08:30:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:30:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:30:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:30:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:30:53.409789  543705 memory.go:184] no items to output this cycle
I0322 08:30:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 08:31:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:31:03.409812  543705 memory.go:184] no items to output this cycle
I0322 08:31:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 08:31:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:31:13.409840  543705 memory.go:191] Add success.
I0322 08:31:13.409843  543705 cpu.go:282] Add success.
W0322 08:31:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:31:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:31:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:31:13.420360  543705 net.go:648] Add success.
I0322 08:31:13.423149  543705 net.go:770] primary dev: ETH0
I0322 08:31:13.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:31:13.423177  543705 net.go:698] Add success.
I0322 08:31:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:31:14.455346  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:31:14.455358  543705 disk_worker.go:708] disk space is not compliant
W0322 08:31:14.455361  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:31:14.457549  543705 disk_worker.go:494] system disk:vda1
I0322 08:31:14.457578  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:31:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:31:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:31:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:31:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:31:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:31:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:31:23.409797  543705 memory.go:184] no items to output this cycle
I0322 08:31:23.409823  543705 cpu.go:275] no items to output this cycle
E0322 08:31:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:31:33.409806  543705 memory.go:184] no items to output this cycle
I0322 08:31:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 08:31:33.861674  543705 disk_info.go:125] begin check local disk info of client
I0322 08:31:33.864158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:31:33.864164  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007acc0 0xc00007ad00]
E0322 08:31:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:31:43.410827  543705 memory.go:191] Add success.
I0322 08:31:43.409813  543705 cpu.go:282] Add success.
I0322 08:31:43.420566  543705 net.go:648] Add success.
I0322 08:31:43.423411  543705 net.go:770] primary dev: ETH0
I0322 08:31:43.423425  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:31:43.423437  543705 net.go:698] Add success.
I0322 08:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:31:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:31:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:31:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:31:53.409794  543705 memory.go:184] no items to output this cycle
I0322 08:31:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 08:32:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:32:03.409796  543705 memory.go:184] no items to output this cycle
I0322 08:32:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 08:32:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:32:13.409793  543705 memory.go:191] Add success.
I0322 08:32:13.409810  543705 cpu.go:282] Add success.
W0322 08:32:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:32:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:32:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:32:13.420257  543705 net.go:648] Add success.
I0322 08:32:13.423060  543705 net.go:770] primary dev: ETH0
I0322 08:32:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:32:13.423084  543705 net.go:698] Add success.
W0322 08:32:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:32:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 08:32:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:32:14.456777  543705 disk_worker.go:494] system disk:vda1
I0322 08:32:14.456816  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:32:14.457129  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:32:14.457137  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:32:14.457141  543705 custom_config.go:64] query custom config with name: gpu
E0322 08:32:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:32:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:32:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:32:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:32:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:32:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:32:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:32:23.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:32:23.409832  543705 memory.go:184] no items to output this cycle
I0322 08:32:23.409855  543705 cpu.go:275] no items to output this cycle
E0322 08:32:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:32:33.409769  543705 memory.go:184] no items to output this cycle
I0322 08:32:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 08:32:33.865453  543705 disk_info.go:125] begin check local disk info of client
I0322 08:32:33.867957  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:32:33.867964  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c940 0xc00035c980]
E0322 08:32:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:32:43.410846  543705 memory.go:191] Add success.
I0322 08:32:43.409811  543705 cpu.go:282] Add success.
I0322 08:32:43.420537  543705 net.go:648] Add success.
I0322 08:32:43.423624  543705 net.go:770] primary dev: ETH0
I0322 08:32:43.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:32:43.423654  543705 net.go:698] Add success.
I0322 08:32:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:32:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:32:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:32:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:32:53.409773  543705 memory.go:184] no items to output this cycle
I0322 08:32:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 08:33:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:33:03.409779  543705 cpu.go:275] no items to output this cycle
I0322 08:33:03.409782  543705 memory.go:184] no items to output this cycle
E0322 08:33:13.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:33:13.409927  543705 memory.go:191] Add success.
W0322 08:33:13.409955  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 08:33:13.409962  543705 cpu.go:282] Add success.
W0322 08:33:13.410101  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:33:13.410106  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:33:13.419712  543705 net.go:648] Add success.
I0322 08:33:13.422590  543705 net.go:770] primary dev: ETH0
I0322 08:33:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:33:13.422614  543705 net.go:698] Add success.
I0322 08:33:13.566364  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"55c8e606-39a2-41ee-932a-0601f061ba5e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:33:13.566395  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:33:14.454680  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:33:14.454836  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:33:14.454901  543705 disk_worker.go:708] disk space is not compliant
W0322 08:33:14.454904  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:33:14.456253  543705 disk_worker.go:494] system disk:vda1
I0322 08:33:14.456296  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:33:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:33:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:33:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:33:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:33:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:33:23.409779  543705 memory.go:184] no items to output this cycle
I0322 08:33:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 08:33:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:33:33.409786  543705 memory.go:184] no items to output this cycle
I0322 08:33:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 08:33:33.869523  543705 disk_info.go:125] begin check local disk info of client
I0322 08:33:33.871998  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:33:33.872003  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c47c0 0xc0000c4800]
I0322 08:33:39.574611  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:33:39.574617  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:33:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:33:43.410664  543705 memory.go:191] Add success.
I0322 08:33:43.409788  543705 cpu.go:282] Add success.
I0322 08:33:43.420365  543705 net.go:648] Add success.
I0322 08:33:43.423243  543705 net.go:770] primary dev: ETH0
I0322 08:33:43.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:33:43.423270  543705 net.go:698] Add success.
I0322 08:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:33:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:33:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:33:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:33:53.409786  543705 memory.go:184] no items to output this cycle
I0322 08:33:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:34:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:34:03.409784  543705 memory.go:184] no items to output this cycle
I0322 08:34:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 08:34:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:34:13.409796  543705 cpu.go:282] Add success.
I0322 08:34:13.409799  543705 memory.go:191] Add success.
W0322 08:34:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:34:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:34:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:34:13.420123  543705 net.go:648] Add success.
I0322 08:34:13.423114  543705 net.go:770] primary dev: ETH0
I0322 08:34:13.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:34:13.423138  543705 net.go:698] Add success.
I0322 08:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:34:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:34:14.455232  543705 disk_worker.go:708] disk space is not compliant
W0322 08:34:14.455235  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:34:14.456625  543705 disk_worker.go:494] system disk:vda1
I0322 08:34:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:34:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:34:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:34:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:34:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:34:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:34:23.409782  543705 memory.go:184] no items to output this cycle
I0322 08:34:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 08:34:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:34:33.409802  543705 memory.go:184] no items to output this cycle
I0322 08:34:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 08:34:33.873502  543705 disk_info.go:125] begin check local disk info of client
I0322 08:34:33.875959  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:34:33.875965  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312400 0xc000312440]
E0322 08:34:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:34:43.410676  543705 memory.go:191] Add success.
I0322 08:34:43.409798  543705 cpu.go:282] Add success.
I0322 08:34:43.420361  543705 net.go:648] Add success.
I0322 08:34:43.422904  543705 net.go:770] primary dev: ETH0
I0322 08:34:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:34:43.422929  543705 net.go:698] Add success.
I0322 08:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:34:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:34:53.409776  543705 memory.go:184] no items to output this cycle
I0322 08:34:53.409830  543705 cpu.go:275] no items to output this cycle
E0322 08:35:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:35:03.409781  543705 memory.go:184] no items to output this cycle
I0322 08:35:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 08:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:35:13.409795  543705 memory.go:191] Add success.
I0322 08:35:13.409810  543705 cpu.go:282] Add success.
W0322 08:35:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:35:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:35:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:35:13.420253  543705 net.go:648] Add success.
I0322 08:35:13.422926  543705 net.go:770] primary dev: ETH0
I0322 08:35:13.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:35:13.422954  543705 net.go:698] Add success.
I0322 08:35:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:35:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:35:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 08:35:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:35:14.456774  543705 disk_worker.go:494] system disk:vda1
I0322 08:35:14.456816  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:35:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:35:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:35:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:35:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:35:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:35:23.409792  543705 memory.go:184] no items to output this cycle
I0322 08:35:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:35:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:35:33.409775  543705 memory.go:184] no items to output this cycle
I0322 08:35:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 08:35:33.877512  543705 disk_info.go:125] begin check local disk info of client
I0322 08:35:33.880024  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:35:33.880029  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ffe80 0xc0004ffec0]
E0322 08:35:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:35:43.410658  543705 memory.go:191] Add success.
I0322 08:35:43.409802  543705 cpu.go:282] Add success.
I0322 08:35:43.420454  543705 net.go:648] Add success.
I0322 08:35:43.423416  543705 net.go:770] primary dev: ETH0
I0322 08:35:43.423429  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:35:43.423442  543705 net.go:698] Add success.
I0322 08:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:35:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:35:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:35:53.409814  543705 memory.go:184] no items to output this cycle
I0322 08:35:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 08:36:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:36:03.409813  543705 memory.go:184] no items to output this cycle
I0322 08:36:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 08:36:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:36:13.409793  543705 memory.go:191] Add success.
I0322 08:36:13.409809  543705 cpu.go:282] Add success.
W0322 08:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:36:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:36:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:36:13.420141  543705 net.go:648] Add success.
I0322 08:36:13.422758  543705 net.go:770] primary dev: ETH0
I0322 08:36:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:36:13.422783  543705 net.go:698] Add success.
I0322 08:36:13.481089  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"077ef76d-6064-4d98-852a-62a27329c18c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:36:13.481122  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:36:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:36:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:36:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 08:36:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:36:14.456745  543705 disk_worker.go:494] system disk:vda1
I0322 08:36:14.456773  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:36:15.455609  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:36:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:36:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:36:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:36:23.409793  543705 memory.go:184] no items to output this cycle
I0322 08:36:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:36:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:36:33.409765  543705 memory.go:184] no items to output this cycle
I0322 08:36:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 08:36:33.881499  543705 disk_info.go:125] begin check local disk info of client
I0322 08:36:33.883983  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:36:33.883989  543705 disk_info.go:196] parse disk info done, disk is : [0xc000271280 0xc0002712c0]
I0322 08:36:39.575626  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:36:39.575632  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:36:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:36:43.410645  543705 memory.go:191] Add success.
I0322 08:36:43.409811  543705 cpu.go:282] Add success.
I0322 08:36:43.420372  543705 net.go:648] Add success.
I0322 08:36:43.422900  543705 net.go:770] primary dev: ETH0
I0322 08:36:43.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:36:43.422927  543705 net.go:698] Add success.
I0322 08:36:46.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:36:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:36:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:36:53.410379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:36:53.410509  543705 memory.go:184] no items to output this cycle
I0322 08:36:53.410538  543705 cpu.go:275] no items to output this cycle
E0322 08:37:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:37:03.409759  543705 memory.go:184] no items to output this cycle
I0322 08:37:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 08:37:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:37:13.409825  543705 memory.go:191] Add success.
I0322 08:37:13.409830  543705 cpu.go:282] Add success.
W0322 08:37:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:37:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:37:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:37:13.420175  543705 net.go:648] Add success.
I0322 08:37:13.422937  543705 net.go:770] primary dev: ETH0
I0322 08:37:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:37:13.422963  543705 net.go:698] Add success.
I0322 08:37:13.453517  543705 event_worker.go:152] Polling the log file for events...
W0322 08:37:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:37:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 08:37:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:37:14.456921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:37:14.456930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:37:14.456936  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:37:14.456986  543705 disk_worker.go:494] system disk:vda1
I0322 08:37:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:37:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:37:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:37:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:37:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:37:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:37:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:37:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:37:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:37:23.409781  543705 memory.go:184] no items to output this cycle
I0322 08:37:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 08:37:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:37:33.409778  543705 memory.go:184] no items to output this cycle
I0322 08:37:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 08:37:33.885547  543705 disk_info.go:125] begin check local disk info of client
I0322 08:37:33.888004  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:37:33.888011  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002712c0 0xc000271300]
E0322 08:37:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:37:43.410848  543705 memory.go:191] Add success.
I0322 08:37:43.409801  543705 cpu.go:282] Add success.
I0322 08:37:43.420532  543705 net.go:648] Add success.
I0322 08:37:43.423921  543705 net.go:770] primary dev: ETH0
I0322 08:37:43.423934  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:37:43.423947  543705 net.go:698] Add success.
I0322 08:37:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:37:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:37:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:37:53.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:37:53.409934  543705 memory.go:184] no items to output this cycle
I0322 08:37:53.409954  543705 cpu.go:275] no items to output this cycle
E0322 08:38:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:38:03.409793  543705 memory.go:184] no items to output this cycle
I0322 08:38:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 08:38:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:38:13.409823  543705 memory.go:191] Add success.
I0322 08:38:13.409827  543705 cpu.go:282] Add success.
W0322 08:38:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:38:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:38:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:38:13.420216  543705 net.go:648] Add success.
I0322 08:38:13.423173  543705 net.go:770] primary dev: ETH0
I0322 08:38:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:38:13.423214  543705 net.go:698] Add success.
I0322 08:38:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:38:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:38:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 08:38:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:38:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 08:38:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:38:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:38:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:38:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:38:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:38:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:38:23.409762  543705 memory.go:184] no items to output this cycle
I0322 08:38:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:38:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:38:33.409768  543705 memory.go:184] no items to output this cycle
I0322 08:38:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 08:38:33.888103  543705 disk_info.go:125] begin check local disk info of client
I0322 08:38:33.890645  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:38:33.890651  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034bec0 0xc00034bf00]
E0322 08:38:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:38:43.410660  543705 memory.go:191] Add success.
I0322 08:38:43.409805  543705 cpu.go:282] Add success.
I0322 08:38:43.420364  543705 net.go:648] Add success.
I0322 08:38:43.423245  543705 net.go:770] primary dev: ETH0
I0322 08:38:43.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:38:43.423310  543705 net.go:698] Add success.
I0322 08:38:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:38:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:38:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:38:53.409783  543705 memory.go:184] no items to output this cycle
I0322 08:38:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 08:39:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:39:03.409774  543705 memory.go:184] no items to output this cycle
I0322 08:39:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 08:39:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:39:13.409799  543705 memory.go:191] Add success.
I0322 08:39:13.409803  543705 cpu.go:282] Add success.
W0322 08:39:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:39:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:39:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:39:13.420126  543705 net.go:648] Add success.
I0322 08:39:13.422985  543705 net.go:770] primary dev: ETH0
I0322 08:39:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:39:13.423009  543705 net.go:698] Add success.
I0322 08:39:13.463460  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7e5bed06-7050-46ae-8a57-0254b7be5c8f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:39:13.463499  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:39:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:39:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:39:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 08:39:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:39:14.456738  543705 disk_worker.go:494] system disk:vda1
I0322 08:39:14.456766  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:39:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:39:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:39:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:39:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:39:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:39:23.409773  543705 memory.go:184] no items to output this cycle
I0322 08:39:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 08:39:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:39:33.409801  543705 memory.go:184] no items to output this cycle
I0322 08:39:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 08:39:33.890735  543705 disk_info.go:125] begin check local disk info of client
I0322 08:39:33.893184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:39:33.893190  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053d080 0xc00053d0c0]
I0322 08:39:39.576634  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:39:39.576640  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:39:43.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:39:43.410844  543705 memory.go:191] Add success.
I0322 08:39:43.409908  543705 cpu.go:282] Add success.
I0322 08:39:43.419732  543705 net.go:648] Add success.
I0322 08:39:43.422784  543705 net.go:770] primary dev: ETH0
I0322 08:39:43.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:39:43.422809  543705 net.go:698] Add success.
I0322 08:39:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:39:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:39:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:39:53.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:39:53.410260  543705 memory.go:184] no items to output this cycle
I0322 08:39:53.410325  543705 cpu.go:275] no items to output this cycle
E0322 08:40:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:40:03.409783  543705 memory.go:184] no items to output this cycle
I0322 08:40:03.409788  543705 cpu.go:275] no items to output this cycle
W0322 08:40:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:40:13.409725  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:40:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 08:40:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:40:13.409823  543705 memory.go:191] Add success.
I0322 08:40:13.409828  543705 cpu.go:282] Add success.
I0322 08:40:13.420429  543705 net.go:648] Add success.
I0322 08:40:13.423291  543705 net.go:770] primary dev: ETH0
I0322 08:40:13.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:40:13.423317  543705 net.go:698] Add success.
I0322 08:40:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:40:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:40:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 08:40:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:40:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 08:40:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:40:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:40:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:40:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:40:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:40:23.409800  543705 memory.go:184] no items to output this cycle
I0322 08:40:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:40:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:40:33.409777  543705 memory.go:184] no items to output this cycle
I0322 08:40:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 08:40:33.893293  543705 disk_info.go:125] begin check local disk info of client
I0322 08:40:33.895822  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:40:33.895828  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003595c0 0xc000359600]
E0322 08:40:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:40:43.410729  543705 memory.go:191] Add success.
I0322 08:40:43.409812  543705 cpu.go:282] Add success.
I0322 08:40:43.420432  543705 net.go:648] Add success.
I0322 08:40:43.423452  543705 net.go:770] primary dev: ETH0
I0322 08:40:43.423465  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:40:43.423477  543705 net.go:698] Add success.
I0322 08:40:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:40:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:40:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:40:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:40:53.409785  543705 memory.go:184] no items to output this cycle
I0322 08:40:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:41:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:41:03.409771  543705 memory.go:184] no items to output this cycle
I0322 08:41:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 08:41:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:41:13.409821  543705 memory.go:191] Add success.
I0322 08:41:13.409828  543705 cpu.go:282] Add success.
W0322 08:41:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:41:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:41:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:41:13.420161  543705 net.go:648] Add success.
I0322 08:41:13.423034  543705 net.go:770] primary dev: ETH0
I0322 08:41:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:41:13.423058  543705 net.go:698] Add success.
I0322 08:41:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:41:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:41:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 08:41:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:41:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 08:41:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:41:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:41:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:41:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:41:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:41:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:41:23.409766  543705 memory.go:184] no items to output this cycle
I0322 08:41:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 08:41:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:41:33.409797  543705 memory.go:184] no items to output this cycle
I0322 08:41:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 08:41:33.895911  543705 disk_info.go:125] begin check local disk info of client
I0322 08:41:33.898402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:41:33.898408  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387b40 0xc000387b80]
E0322 08:41:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:41:43.410782  543705 memory.go:191] Add success.
I0322 08:41:43.409804  543705 cpu.go:282] Add success.
I0322 08:41:43.420655  543705 net.go:648] Add success.
I0322 08:41:43.423625  543705 net.go:770] primary dev: ETH0
I0322 08:41:43.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:41:43.423650  543705 net.go:698] Add success.
I0322 08:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:41:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:41:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:41:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:41:53.409781  543705 memory.go:184] no items to output this cycle
I0322 08:41:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 08:42:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:42:03.409784  543705 memory.go:184] no items to output this cycle
I0322 08:42:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 08:42:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:42:13.409801  543705 memory.go:191] Add success.
I0322 08:42:13.409805  543705 cpu.go:282] Add success.
W0322 08:42:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:42:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:42:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:42:13.420468  543705 net.go:648] Add success.
I0322 08:42:13.423147  543705 net.go:770] primary dev: ETH0
I0322 08:42:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:42:13.423172  543705 net.go:698] Add success.
I0322 08:42:13.467451  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f104f3d8-c9ca-422f-b6c2-2cd8708edf98","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:42:13.467485  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 08:42:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:42:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0322 08:42:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:42:14.456804  543705 disk_worker.go:494] system disk:vda1
I0322 08:42:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:42:14.457141  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:42:14.457150  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:42:14.457154  543705 custom_config.go:64] query custom config with name: gpu
E0322 08:42:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:42:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:42:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:42:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:42:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:42:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:42:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:42:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:42:23.409776  543705 memory.go:184] no items to output this cycle
I0322 08:42:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 08:42:33.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:42:33.409896  543705 memory.go:184] no items to output this cycle
I0322 08:42:33.410013  543705 cpu.go:275] no items to output this cycle
I0322 08:42:33.900064  543705 disk_info.go:125] begin check local disk info of client
I0322 08:42:33.902650  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:42:33.902655  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035f240 0xc00035f280]
I0322 08:42:39.577640  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:42:39.577669  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:42:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:42:43.410681  543705 memory.go:191] Add success.
I0322 08:42:43.409819  543705 cpu.go:282] Add success.
I0322 08:42:43.420389  543705 net.go:648] Add success.
I0322 08:42:43.422973  543705 net.go:770] primary dev: ETH0
I0322 08:42:43.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:42:43.423002  543705 net.go:698] Add success.
I0322 08:42:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:42:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:42:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:42:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:42:53.409803  543705 memory.go:184] no items to output this cycle
I0322 08:42:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 08:43:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:43:03.409786  543705 memory.go:184] no items to output this cycle
I0322 08:43:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:43:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:43:13.409786  543705 memory.go:191] Add success.
W0322 08:43:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 08:43:13.409816  543705 cpu.go:282] Add success.
W0322 08:43:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:43:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:43:13.420116  543705 net.go:648] Add success.
I0322 08:43:13.425048  543705 net.go:770] primary dev: ETH0
I0322 08:43:13.425066  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:43:13.425080  543705 net.go:698] Add success.
I0322 08:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:43:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:43:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 08:43:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:43:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 08:43:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:43:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:43:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:43:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:43:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:43:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:43:23.409774  543705 memory.go:184] no items to output this cycle
I0322 08:43:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:43:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:43:33.409805  543705 memory.go:184] no items to output this cycle
I0322 08:43:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 08:43:33.902744  543705 disk_info.go:125] begin check local disk info of client
I0322 08:43:33.905368  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:43:33.905373  543705 disk_info.go:196] parse disk info done, disk is : [0xc000507380 0xc0005073c0]
E0322 08:43:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:43:43.410703  543705 memory.go:191] Add success.
I0322 08:43:43.409833  543705 cpu.go:282] Add success.
I0322 08:43:43.420395  543705 net.go:648] Add success.
I0322 08:43:43.423069  543705 net.go:770] primary dev: ETH0
I0322 08:43:43.423083  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:43:43.423095  543705 net.go:698] Add success.
I0322 08:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:43:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:43:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:43:53.409796  543705 memory.go:184] no items to output this cycle
I0322 08:43:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 08:44:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:44:03.409795  543705 memory.go:184] no items to output this cycle
I0322 08:44:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 08:44:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:44:13.409831  543705 memory.go:191] Add success.
I0322 08:44:13.409839  543705 cpu.go:282] Add success.
W0322 08:44:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:44:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:44:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:44:13.420309  543705 net.go:648] Add success.
I0322 08:44:13.423309  543705 net.go:770] primary dev: ETH0
I0322 08:44:13.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:44:13.423338  543705 net.go:698] Add success.
I0322 08:44:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:44:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:44:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 08:44:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:44:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 08:44:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:44:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:44:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:44:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:44:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:44:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:44:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:44:23.409773  543705 memory.go:184] no items to output this cycle
I0322 08:44:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 08:44:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:44:33.409811  543705 memory.go:184] no items to output this cycle
I0322 08:44:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 08:44:33.905661  543705 disk_info.go:125] begin check local disk info of client
I0322 08:44:33.908170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:44:33.908177  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376b40 0xc000376b80]
E0322 08:44:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:44:43.410687  543705 memory.go:191] Add success.
I0322 08:44:43.409836  543705 cpu.go:282] Add success.
I0322 08:44:43.420539  543705 net.go:648] Add success.
I0322 08:44:43.423158  543705 net.go:770] primary dev: ETH0
I0322 08:44:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:44:43.423182  543705 net.go:698] Add success.
I0322 08:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:44:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:44:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:44:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:44:53.409791  543705 memory.go:184] no items to output this cycle
I0322 08:44:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 08:45:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:45:03.409762  543705 memory.go:184] no items to output this cycle
I0322 08:45:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:45:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:45:13.409802  543705 memory.go:191] Add success.
I0322 08:45:13.409806  543705 cpu.go:282] Add success.
W0322 08:45:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:45:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:45:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:45:13.420121  543705 net.go:648] Add success.
I0322 08:45:13.422924  543705 net.go:770] primary dev: ETH0
I0322 08:45:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:45:13.422958  543705 net.go:698] Add success.
I0322 08:45:13.479150  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6f626488-2669-4767-a11f-8845a79737bf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:45:13.479184  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:45:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:45:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:45:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 08:45:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:45:14.456738  543705 disk_worker.go:494] system disk:vda1
I0322 08:45:14.456768  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:45:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:45:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:45:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:45:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:45:23.409771  543705 memory.go:184] no items to output this cycle
I0322 08:45:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 08:45:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:45:33.409795  543705 memory.go:184] no items to output this cycle
I0322 08:45:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 08:45:33.908260  543705 disk_info.go:125] begin check local disk info of client
I0322 08:45:33.910779  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:45:33.910784  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034b1c0 0xc00034b200]
I0322 08:45:39.578594  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:45:39.578602  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:45:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:45:43.410695  543705 memory.go:191] Add success.
I0322 08:45:43.409809  543705 cpu.go:282] Add success.
I0322 08:45:43.420382  543705 net.go:648] Add success.
I0322 08:45:43.423184  543705 net.go:770] primary dev: ETH0
I0322 08:45:43.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:45:43.423209  543705 net.go:698] Add success.
I0322 08:45:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:45:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:45:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:45:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:45:53.409776  543705 memory.go:184] no items to output this cycle
I0322 08:45:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 08:46:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:46:03.409786  543705 memory.go:184] no items to output this cycle
I0322 08:46:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 08:46:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:46:13.409822  543705 memory.go:191] Add success.
I0322 08:46:13.409829  543705 cpu.go:282] Add success.
W0322 08:46:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:46:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:46:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:46:13.420166  543705 net.go:648] Add success.
I0322 08:46:13.422939  543705 net.go:770] primary dev: ETH0
I0322 08:46:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:46:13.422964  543705 net.go:698] Add success.
I0322 08:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:46:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:46:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 08:46:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:46:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 08:46:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:46:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:46:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:46:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:46:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:46:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:46:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:46:23.409775  543705 memory.go:184] no items to output this cycle
I0322 08:46:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 08:46:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:46:33.409766  543705 memory.go:184] no items to output this cycle
I0322 08:46:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 08:46:33.910865  543705 disk_info.go:125] begin check local disk info of client
I0322 08:46:33.913440  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:46:33.913447  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae400 0xc0003ae440]
E0322 08:46:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:46:43.410676  543705 memory.go:191] Add success.
I0322 08:46:43.409827  543705 cpu.go:282] Add success.
I0322 08:46:43.420483  543705 net.go:648] Add success.
I0322 08:46:43.423307  543705 net.go:770] primary dev: ETH0
I0322 08:46:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:46:43.423332  543705 net.go:698] Add success.
I0322 08:46:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:46:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:46:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:46:53.410406  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:46:53.410429  543705 memory.go:184] no items to output this cycle
I0322 08:46:53.410532  543705 cpu.go:275] no items to output this cycle
E0322 08:47:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:47:03.409772  543705 memory.go:184] no items to output this cycle
I0322 08:47:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 08:47:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:47:13.409797  543705 memory.go:191] Add success.
I0322 08:47:13.409812  543705 cpu.go:282] Add success.
W0322 08:47:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:47:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:47:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:47:13.420152  543705 net.go:648] Add success.
I0322 08:47:13.423133  543705 net.go:770] primary dev: ETH0
I0322 08:47:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:47:13.423158  543705 net.go:698] Add success.
I0322 08:47:13.452789  543705 event_worker.go:152] Polling the log file for events...
W0322 08:47:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 08:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:47:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 08:47:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:47:14.456921  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:47:14.456930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:47:14.456936  543705 custom_config.go:64] query custom config with name: gpu
E0322 08:47:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:47:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:47:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:47:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:47:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:47:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:47:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:47:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:47:23.409767  543705 memory.go:184] no items to output this cycle
I0322 08:47:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 08:47:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:47:33.409785  543705 memory.go:184] no items to output this cycle
I0322 08:47:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 08:47:33.913672  543705 disk_info.go:125] begin check local disk info of client
I0322 08:47:33.916128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:47:33.916133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1c40 0xc0003b1c80]
E0322 08:47:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:47:43.410916  543705 memory.go:191] Add success.
I0322 08:47:43.409801  543705 cpu.go:282] Add success.
I0322 08:47:43.420591  543705 net.go:648] Add success.
I0322 08:47:43.423560  543705 net.go:770] primary dev: ETH0
I0322 08:47:43.423572  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:47:43.423583  543705 net.go:698] Add success.
I0322 08:47:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:47:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:47:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:47:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:47:53.409765  543705 memory.go:184] no items to output this cycle
I0322 08:47:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 08:48:03.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:48:03.409883  543705 memory.go:184] no items to output this cycle
I0322 08:48:03.409917  543705 cpu.go:275] no items to output this cycle
E0322 08:48:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:48:13.409800  543705 memory.go:191] Add success.
I0322 08:48:13.409802  543705 cpu.go:282] Add success.
W0322 08:48:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:48:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:48:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:48:13.420215  543705 net.go:648] Add success.
I0322 08:48:13.423121  543705 net.go:770] primary dev: ETH0
I0322 08:48:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:48:13.423152  543705 net.go:698] Add success.
I0322 08:48:13.509056  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"09a49c8b-9539-46e4-ba3b-7ff4984944c8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:48:13.509090  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:48:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:48:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:48:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 08:48:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:48:14.456532  543705 disk_worker.go:494] system disk:vda1
I0322 08:48:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:48:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:48:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:48:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:48:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:48:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:48:23.409778  543705 memory.go:184] no items to output this cycle
I0322 08:48:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 08:48:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:48:33.409804  543705 memory.go:184] no items to output this cycle
I0322 08:48:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 08:48:33.917669  543705 disk_info.go:125] begin check local disk info of client
I0322 08:48:33.920191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:48:33.920196  543705 disk_info.go:196] parse disk info done, disk is : [0xc000297400 0xc000297440]
I0322 08:48:39.579651  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:48:39.579659  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:48:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:48:43.410828  543705 memory.go:191] Add success.
I0322 08:48:43.409817  543705 cpu.go:282] Add success.
I0322 08:48:43.420557  543705 net.go:648] Add success.
I0322 08:48:43.423509  543705 net.go:770] primary dev: ETH0
I0322 08:48:43.423524  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:48:43.423539  543705 net.go:698] Add success.
I0322 08:48:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:48:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:48:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:48:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:48:53.409793  543705 memory.go:184] no items to output this cycle
I0322 08:48:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 08:49:03.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:49:03.409954  543705 memory.go:184] no items to output this cycle
I0322 08:49:03.410039  543705 cpu.go:275] no items to output this cycle
E0322 08:49:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:49:13.409836  543705 memory.go:191] Add success.
I0322 08:49:13.409838  543705 cpu.go:282] Add success.
W0322 08:49:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:49:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:49:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:49:13.420590  543705 net.go:648] Add success.
I0322 08:49:13.423254  543705 net.go:770] primary dev: ETH0
I0322 08:49:13.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:49:13.423281  543705 net.go:698] Add success.
I0322 08:49:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:49:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:49:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 08:49:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:49:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 08:49:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:49:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:49:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:49:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:49:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:49:23.409787  543705 cpu.go:275] no items to output this cycle
I0322 08:49:23.409794  543705 memory.go:184] no items to output this cycle
E0322 08:49:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:49:33.409809  543705 memory.go:184] no items to output this cycle
I0322 08:49:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 08:49:33.921670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:49:33.924203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:49:33.924210  543705 disk_info.go:196] parse disk info done, disk is : [0xc000284700 0xc000284740]
E0322 08:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:49:43.410708  543705 memory.go:191] Add success.
I0322 08:49:43.409826  543705 cpu.go:282] Add success.
I0322 08:49:43.420419  543705 net.go:648] Add success.
I0322 08:49:43.423233  543705 net.go:770] primary dev: ETH0
I0322 08:49:43.423246  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:49:43.423261  543705 net.go:698] Add success.
I0322 08:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:49:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:49:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:49:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:49:53.409776  543705 cpu.go:275] no items to output this cycle
I0322 08:49:53.409779  543705 memory.go:184] no items to output this cycle
E0322 08:50:03.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:50:03.409872  543705 memory.go:184] no items to output this cycle
I0322 08:50:03.409938  543705 cpu.go:275] no items to output this cycle
E0322 08:50:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:50:13.409820  543705 memory.go:191] Add success.
I0322 08:50:13.409833  543705 cpu.go:282] Add success.
W0322 08:50:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:50:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:50:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:50:13.420347  543705 net.go:648] Add success.
I0322 08:50:13.423615  543705 net.go:770] primary dev: ETH0
I0322 08:50:13.423628  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:50:13.423641  543705 net.go:698] Add success.
I0322 08:50:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:50:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:50:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 08:50:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:50:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 08:50:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:50:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:50:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:50:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:50:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:50:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:50:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:50:23.409770  543705 memory.go:184] no items to output this cycle
I0322 08:50:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 08:50:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:50:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 08:50:33.409794  543705 memory.go:184] no items to output this cycle
I0322 08:50:33.925672  543705 disk_info.go:125] begin check local disk info of client
I0322 08:50:33.928113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:50:33.928119  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330780 0xc0003307c0]
E0322 08:50:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:50:43.410775  543705 memory.go:191] Add success.
I0322 08:50:43.409796  543705 cpu.go:282] Add success.
I0322 08:50:43.420489  543705 net.go:648] Add success.
I0322 08:50:43.423456  543705 net.go:770] primary dev: ETH0
I0322 08:50:43.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:50:43.423479  543705 net.go:698] Add success.
I0322 08:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:50:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:50:53.409804  543705 memory.go:184] no items to output this cycle
I0322 08:50:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 08:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:51:03.409775  543705 memory.go:184] no items to output this cycle
I0322 08:51:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 08:51:13.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:51:13.409920  543705 memory.go:191] Add success.
W0322 08:51:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:51:13.409987  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:51:13.409990  543705 cpu.go:282] Add success.
I0322 08:51:13.410070  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:51:13.419712  543705 net.go:648] Add success.
I0322 08:51:13.422457  543705 net.go:770] primary dev: ETH0
I0322 08:51:13.422470  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:51:13.422481  543705 net.go:698] Add success.
I0322 08:51:13.467788  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"43bc3ee9-704e-4172-88f1-3175362ce154","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:51:13.467822  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:51:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:51:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:51:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 08:51:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:51:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 08:51:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:51:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:51:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:51:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:51:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:51:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:51:23.409776  543705 memory.go:184] no items to output this cycle
I0322 08:51:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 08:51:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:51:33.409805  543705 memory.go:184] no items to output this cycle
I0322 08:51:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 08:51:33.929668  543705 disk_info.go:125] begin check local disk info of client
I0322 08:51:33.932203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:51:33.932210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe480 0xc0003fe4c0]
I0322 08:51:39.580653  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:51:39.580660  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:51:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:51:43.410696  543705 memory.go:191] Add success.
I0322 08:51:43.409813  543705 cpu.go:282] Add success.
I0322 08:51:43.420472  543705 net.go:648] Add success.
I0322 08:51:43.423135  543705 net.go:770] primary dev: ETH0
I0322 08:51:43.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:51:43.423162  543705 net.go:698] Add success.
I0322 08:51:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:51:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:51:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:51:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:51:53.409767  543705 memory.go:184] no items to output this cycle
I0322 08:51:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 08:52:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:52:03.409775  543705 memory.go:184] no items to output this cycle
I0322 08:52:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 08:52:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:52:13.409880  543705 memory.go:191] Add success.
W0322 08:52:13.409921  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:52:13.409933  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:52:13.409935  543705 cpu.go:282] Add success.
I0322 08:52:13.409937  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:52:13.419710  543705 net.go:648] Add success.
I0322 08:52:13.422184  543705 net.go:770] primary dev: ETH0
I0322 08:52:13.422196  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:52:13.422207  543705 net.go:698] Add success.
W0322 08:52:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:52:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 08:52:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0322 08:52:14.456802  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:52:14.456811  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:52:14.456816  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:52:14.456866  543705 disk_worker.go:494] system disk:vda1
I0322 08:52:14.456909  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:52:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:52:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:52:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:52:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:52:16.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:52:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:52:16.472305  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:52:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:52:23.409768  543705 memory.go:184] no items to output this cycle
I0322 08:52:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 08:52:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:52:33.409773  543705 memory.go:184] no items to output this cycle
I0322 08:52:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 08:52:33.933673  543705 disk_info.go:125] begin check local disk info of client
I0322 08:52:33.936102  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:52:33.936107  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aab80 0xc0001aabc0]
E0322 08:52:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:52:43.410686  543705 memory.go:191] Add success.
I0322 08:52:43.409814  543705 cpu.go:282] Add success.
I0322 08:52:43.420496  543705 net.go:648] Add success.
I0322 08:52:43.423240  543705 net.go:770] primary dev: ETH0
I0322 08:52:43.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:52:43.423270  543705 net.go:698] Add success.
I0322 08:52:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:52:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:52:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:52:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:52:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 08:52:53.409790  543705 memory.go:184] no items to output this cycle
E0322 08:53:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:53:03.409787  543705 memory.go:184] no items to output this cycle
I0322 08:53:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 08:53:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:53:13.409880  543705 memory.go:191] Add success.
W0322 08:53:13.409912  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:53:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:53:13.409930  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:53:13.409938  543705 cpu.go:282] Add success.
I0322 08:53:13.419740  543705 net.go:648] Add success.
I0322 08:53:13.422647  543705 net.go:770] primary dev: ETH0
I0322 08:53:13.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:53:13.422672  543705 net.go:698] Add success.
I0322 08:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:53:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:53:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 08:53:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:53:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 08:53:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:53:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:53:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:53:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:53:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:53:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:53:23.409771  543705 memory.go:184] no items to output this cycle
I0322 08:53:23.409776  543705 cpu.go:275] no items to output this cycle
E0322 08:53:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:53:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 08:53:33.409790  543705 memory.go:184] no items to output this cycle
I0322 08:53:33.937670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:53:33.940154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:53:33.940160  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046aa80 0xc00046aac0]
E0322 08:53:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:53:43.410851  543705 memory.go:191] Add success.
I0322 08:53:43.409812  543705 cpu.go:282] Add success.
I0322 08:53:43.420370  543705 net.go:770] primary dev: ETH0
I0322 08:53:43.420384  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:53:43.420396  543705 net.go:698] Add success.
I0322 08:53:43.420756  543705 net.go:648] Add success.
I0322 08:53:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:53:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:53:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:53:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:53:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 08:53:53.409781  543705 memory.go:184] no items to output this cycle
E0322 08:54:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:54:03.409786  543705 memory.go:184] no items to output this cycle
I0322 08:54:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 08:54:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:54:13.409816  543705 memory.go:191] Add success.
I0322 08:54:13.409820  543705 cpu.go:282] Add success.
W0322 08:54:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:54:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:54:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:54:13.420323  543705 net.go:648] Add success.
I0322 08:54:13.422973  543705 net.go:770] primary dev: ETH0
I0322 08:54:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:54:13.422998  543705 net.go:698] Add success.
I0322 08:54:13.467848  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"934ebc9e-fcb5-4b56-8351-731d172cde16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:54:13.467879  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 08:54:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:54:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:54:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 08:54:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:54:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 08:54:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:54:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:54:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:54:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:54:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:54:23.409763  543705 memory.go:184] no items to output this cycle
I0322 08:54:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 08:54:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:54:33.409781  543705 memory.go:184] no items to output this cycle
I0322 08:54:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 08:54:33.941673  543705 disk_info.go:125] begin check local disk info of client
I0322 08:54:33.944251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:54:33.944257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c5c0 0xc00035c600]
I0322 08:54:39.581673  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:54:39.581679  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:54:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:54:43.410776  543705 memory.go:191] Add success.
I0322 08:54:43.409807  543705 cpu.go:282] Add success.
I0322 08:54:43.420477  543705 net.go:648] Add success.
I0322 08:54:43.423279  543705 net.go:770] primary dev: ETH0
I0322 08:54:43.423294  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:54:43.423306  543705 net.go:698] Add success.
I0322 08:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:54:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:54:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:54:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:54:53.409784  543705 memory.go:184] no items to output this cycle
I0322 08:54:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 08:55:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:55:03.409797  543705 memory.go:184] no items to output this cycle
I0322 08:55:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 08:55:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:55:13.409824  543705 memory.go:191] Add success.
I0322 08:55:13.409834  543705 cpu.go:282] Add success.
W0322 08:55:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:55:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:55:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:55:13.419744  543705 net.go:648] Add success.
I0322 08:55:13.422302  543705 net.go:770] primary dev: ETH0
I0322 08:55:13.422316  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:55:13.422328  543705 net.go:698] Add success.
I0322 08:55:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:55:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:55:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 08:55:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:55:14.456516  543705 disk_worker.go:494] system disk:vda1
I0322 08:55:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:55:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:55:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:55:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:55:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:55:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:55:23.409800  543705 memory.go:184] no items to output this cycle
I0322 08:55:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 08:55:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:55:33.409799  543705 memory.go:184] no items to output this cycle
I0322 08:55:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 08:55:33.945673  543705 disk_info.go:125] begin check local disk info of client
I0322 08:55:33.948161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:55:33.948166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae80 0xc0001aaec0]
E0322 08:55:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:55:43.410638  543705 memory.go:191] Add success.
I0322 08:55:43.409815  543705 cpu.go:282] Add success.
I0322 08:55:43.420331  543705 net.go:648] Add success.
I0322 08:55:43.423027  543705 net.go:770] primary dev: ETH0
I0322 08:55:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:55:43.423053  543705 net.go:698] Add success.
I0322 08:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:55:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:55:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:55:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:55:53.409808  543705 memory.go:184] no items to output this cycle
I0322 08:55:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 08:56:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:56:03.409811  543705 memory.go:184] no items to output this cycle
I0322 08:56:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 08:56:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:56:13.409796  543705 memory.go:191] Add success.
I0322 08:56:13.409815  543705 cpu.go:282] Add success.
W0322 08:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:56:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:56:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:56:13.420607  543705 net.go:648] Add success.
I0322 08:56:13.423895  543705 net.go:770] primary dev: ETH0
I0322 08:56:13.423908  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:56:13.423919  543705 net.go:698] Add success.
I0322 08:56:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:56:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:56:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 08:56:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:56:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 08:56:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:56:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:56:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:56:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:56:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:56:23.409778  543705 memory.go:184] no items to output this cycle
I0322 08:56:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 08:56:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:56:33.409785  543705 memory.go:184] no items to output this cycle
I0322 08:56:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 08:56:33.949670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:56:33.952231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:56:33.952236  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb80 0xc00007bbc0]
E0322 08:56:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:56:43.410575  543705 memory.go:191] Add success.
I0322 08:56:43.409795  543705 cpu.go:282] Add success.
I0322 08:56:43.420358  543705 net.go:648] Add success.
I0322 08:56:43.423010  543705 net.go:770] primary dev: ETH0
I0322 08:56:43.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:56:43.423036  543705 net.go:698] Add success.
I0322 08:56:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:56:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:56:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:56:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:56:53.409784  543705 memory.go:184] no items to output this cycle
I0322 08:56:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 08:57:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:57:03.409798  543705 memory.go:184] no items to output this cycle
I0322 08:57:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 08:57:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:57:13.409786  543705 memory.go:191] Add success.
I0322 08:57:13.409805  543705 cpu.go:282] Add success.
W0322 08:57:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:57:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:57:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:57:13.420331  543705 net.go:648] Add success.
I0322 08:57:13.423193  543705 net.go:770] primary dev: ETH0
I0322 08:57:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:57:13.423228  543705 net.go:698] Add success.
I0322 08:57:13.429684  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 08:57:13.452772  543705 event_worker.go:152] Polling the log file for events...
I0322 08:57:13.462952  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9790d2f1-3ee8-4a15-9d5d-3cbef0108ef1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 08:57:13.462986  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 08:57:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:57:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 08:57:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:57:14.456809  543705 disk_worker.go:494] system disk:vda1
E0322 08:57:14.456832  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 08:57:14.456841  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 08:57:14.456845  543705 custom_config.go:64] query custom config with name: gpu
I0322 08:57:14.456859  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 08:57:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 08:57:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:57:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 08:57:16.457912  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 08:57:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:57:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:57:16.472317  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:57:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:57:23.409777  543705 memory.go:184] no items to output this cycle
I0322 08:57:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 08:57:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:57:33.409799  543705 memory.go:184] no items to output this cycle
I0322 08:57:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 08:57:33.953674  543705 disk_info.go:125] begin check local disk info of client
I0322 08:57:33.956142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:57:33.956147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8440 0xc0002b8480]
I0322 08:57:39.582624  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 08:57:39.582631  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 08:57:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:57:43.410564  543705 memory.go:191] Add success.
I0322 08:57:43.409814  543705 cpu.go:282] Add success.
I0322 08:57:43.420302  543705 net.go:648] Add success.
I0322 08:57:43.423009  543705 net.go:770] primary dev: ETH0
I0322 08:57:43.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:57:43.423036  543705 net.go:698] Add success.
I0322 08:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:57:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:57:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:57:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:57:53.409777  543705 memory.go:184] no items to output this cycle
I0322 08:57:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 08:58:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:58:03.409787  543705 memory.go:184] no items to output this cycle
I0322 08:58:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 08:58:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:58:13.409785  543705 memory.go:191] Add success.
I0322 08:58:13.409808  543705 cpu.go:282] Add success.
W0322 08:58:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:58:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:58:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:58:13.420301  543705 net.go:648] Add success.
I0322 08:58:13.423162  543705 net.go:770] primary dev: ETH0
I0322 08:58:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:58:13.423191  543705 net.go:698] Add success.
I0322 08:58:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:58:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:58:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 08:58:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:58:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 08:58:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:58:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:58:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:58:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:58:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:58:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:58:23.409795  543705 memory.go:184] no items to output this cycle
I0322 08:58:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 08:58:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:58:33.409803  543705 memory.go:184] no items to output this cycle
I0322 08:58:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 08:58:33.957671  543705 disk_info.go:125] begin check local disk info of client
I0322 08:58:33.960156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:58:33.960161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9ac0 0xc0004d9b00]
E0322 08:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:58:43.410616  543705 memory.go:191] Add success.
I0322 08:58:43.409821  543705 cpu.go:282] Add success.
I0322 08:58:43.420365  543705 net.go:648] Add success.
I0322 08:58:43.422822  543705 net.go:770] primary dev: ETH0
I0322 08:58:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:58:43.422850  543705 net.go:698] Add success.
I0322 08:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:58:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:58:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:58:53.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:58:53.409881  543705 memory.go:184] no items to output this cycle
I0322 08:58:53.409880  543705 cpu.go:275] no items to output this cycle
E0322 08:59:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:59:03.409795  543705 memory.go:184] no items to output this cycle
I0322 08:59:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 08:59:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:59:13.409824  543705 memory.go:191] Add success.
I0322 08:59:13.409825  543705 cpu.go:282] Add success.
W0322 08:59:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 08:59:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 08:59:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 08:59:13.420205  543705 net.go:648] Add success.
I0322 08:59:13.423214  543705 net.go:770] primary dev: ETH0
I0322 08:59:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:59:13.423242  543705 net.go:698] Add success.
I0322 08:59:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 08:59:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 08:59:14.455281  543705 disk_worker.go:708] disk space is not compliant
W0322 08:59:14.455286  543705 disk_worker.go:728] disk inode is not compliant
I0322 08:59:14.456721  543705 disk_worker.go:494] system disk:vda1
I0322 08:59:14.456764  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 08:59:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 08:59:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:59:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:59:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 08:59:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0322 08:59:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:59:23.409777  543705 memory.go:184] no items to output this cycle
I0322 08:59:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 08:59:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:59:33.409808  543705 memory.go:184] no items to output this cycle
I0322 08:59:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 08:59:33.961670  543705 disk_info.go:125] begin check local disk info of client
I0322 08:59:33.964151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 08:59:33.964156  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abe00 0xc0001abe40]
E0322 08:59:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:59:43.410550  543705 memory.go:191] Add success.
I0322 08:59:43.409802  543705 cpu.go:282] Add success.
I0322 08:59:43.420318  543705 net.go:648] Add success.
I0322 08:59:43.422941  543705 net.go:770] primary dev: ETH0
I0322 08:59:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0322 08:59:43.422970  543705 net.go:698] Add success.
I0322 08:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 08:59:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 08:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 08:59:53.410350  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 08:59:53.410365  543705 memory.go:184] no items to output this cycle
I0322 08:59:53.410377  543705 cpu.go:275] no items to output this cycle
E0322 09:00:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:00:03.409779  543705 memory.go:184] no items to output this cycle
I0322 09:00:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 09:00:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:00:13.409783  543705 memory.go:191] Add success.
W0322 09:00:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:00:13.409811  543705 cpu.go:282] Add success.
W0322 09:00:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:00:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:00:13.420143  543705 net.go:648] Add success.
I0322 09:00:13.422882  543705 net.go:770] primary dev: ETH0
I0322 09:00:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:00:13.422908  543705 net.go:698] Add success.
I0322 09:00:13.467379  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e9fcd867-d214-41d8-9a96-8e78448f4259","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:00:13.467411  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:00:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:00:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 09:00:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:00:14.457334  543705 disk_worker.go:494] system disk:vda1
I0322 09:00:14.457441  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:00:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:00:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:00:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:00:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 09:00:23.409786  543705 memory.go:184] no items to output this cycle
E0322 09:00:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:00:33.409796  543705 memory.go:184] no items to output this cycle
I0322 09:00:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 09:00:33.965672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:00:33.968246  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:00:33.968252  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
I0322 09:00:39.583669  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:00:39.583675  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:00:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:00:43.410655  543705 memory.go:191] Add success.
I0322 09:00:43.409814  543705 cpu.go:282] Add success.
I0322 09:00:43.420172  543705 net.go:770] primary dev: ETH0
I0322 09:00:43.420184  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:00:43.420198  543705 net.go:698] Add success.
I0322 09:00:43.420449  543705 net.go:648] Add success.
I0322 09:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:00:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:00:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:00:53.409777  543705 memory.go:184] no items to output this cycle
I0322 09:00:53.409832  543705 cpu.go:275] no items to output this cycle
E0322 09:01:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:01:03.409776  543705 memory.go:184] no items to output this cycle
I0322 09:01:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 09:01:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:01:13.409798  543705 memory.go:191] Add success.
I0322 09:01:13.409798  543705 cpu.go:282] Add success.
W0322 09:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:01:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:01:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:01:13.420294  543705 net.go:648] Add success.
I0322 09:01:13.422984  543705 net.go:770] primary dev: ETH0
I0322 09:01:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:01:13.423014  543705 net.go:698] Add success.
I0322 09:01:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:01:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:01:14.455144  543705 disk_worker.go:708] disk space is not compliant
W0322 09:01:14.455147  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:01:14.456478  543705 disk_worker.go:494] system disk:vda1
I0322 09:01:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:01:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:01:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:01:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:01:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:01:23.409787  543705 memory.go:184] no items to output this cycle
I0322 09:01:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 09:01:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:01:33.409793  543705 memory.go:184] no items to output this cycle
I0322 09:01:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 09:01:33.969672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:01:33.972167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:01:33.972173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c55c0 0xc0000c5600]
E0322 09:01:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:01:43.410857  543705 memory.go:191] Add success.
I0322 09:01:43.409803  543705 cpu.go:282] Add success.
I0322 09:01:43.420599  543705 net.go:648] Add success.
I0322 09:01:43.423354  543705 net.go:770] primary dev: ETH0
I0322 09:01:43.423368  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:01:43.423387  543705 net.go:698] Add success.
I0322 09:01:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:01:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:01:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:01:53.409794  543705 memory.go:184] no items to output this cycle
I0322 09:01:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 09:02:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:02:03.409774  543705 memory.go:184] no items to output this cycle
I0322 09:02:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:02:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:02:13.409817  543705 memory.go:191] Add success.
I0322 09:02:13.409823  543705 cpu.go:282] Add success.
W0322 09:02:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:02:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:02:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:02:13.420121  543705 net.go:648] Add success.
I0322 09:02:13.423122  543705 net.go:770] primary dev: ETH0
I0322 09:02:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:02:13.423147  543705 net.go:698] Add success.
W0322 09:02:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:02:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 09:02:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:02:14.456803  543705 disk_worker.go:494] system disk:vda1
I0322 09:02:14.456842  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:02:14.457036  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:02:14.457044  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:02:14.457049  543705 custom_config.go:64] query custom config with name: gpu
E0322 09:02:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:02:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:02:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:02:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:02:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:02:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:02:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:02:23.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:02:23.409907  543705 memory.go:184] no items to output this cycle
I0322 09:02:23.409965  543705 cpu.go:275] no items to output this cycle
E0322 09:02:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:02:33.409774  543705 memory.go:184] no items to output this cycle
I0322 09:02:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 09:02:33.973676  543705 disk_info.go:125] begin check local disk info of client
I0322 09:02:33.976184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:02:33.976189  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8040 0xc0004d8080]
E0322 09:02:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:02:43.410692  543705 memory.go:191] Add success.
I0322 09:02:43.409814  543705 cpu.go:282] Add success.
I0322 09:02:43.420382  543705 net.go:648] Add success.
I0322 09:02:43.423192  543705 net.go:770] primary dev: ETH0
I0322 09:02:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:02:43.423221  543705 net.go:698] Add success.
I0322 09:02:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:02:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:02:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:02:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:02:53.409811  543705 memory.go:184] no items to output this cycle
I0322 09:02:53.409827  543705 cpu.go:275] no items to output this cycle
E0322 09:03:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:03:03.409797  543705 memory.go:184] no items to output this cycle
I0322 09:03:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 09:03:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:03:13.409791  543705 memory.go:191] Add success.
I0322 09:03:13.409806  543705 cpu.go:282] Add success.
W0322 09:03:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:03:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:03:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:03:13.420252  543705 net.go:648] Add success.
I0322 09:03:13.423226  543705 net.go:770] primary dev: ETH0
I0322 09:03:13.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:03:13.423258  543705 net.go:698] Add success.
I0322 09:03:13.581741  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d4bfdcb-d855-42e7-b014-3520672afa7d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:03:13.581776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:03:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:03:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:03:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 09:03:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:03:14.456702  543705 disk_worker.go:494] system disk:vda1
I0322 09:03:14.456738  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:03:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:03:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:03:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:03:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:03:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:03:23.409777  543705 memory.go:184] no items to output this cycle
I0322 09:03:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 09:03:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:03:33.409795  543705 memory.go:184] no items to output this cycle
I0322 09:03:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 09:03:33.977676  543705 disk_info.go:125] begin check local disk info of client
I0322 09:03:33.980151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:03:33.980156  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b400 0xc00007b440]
I0322 09:03:39.584614  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:03:39.584621  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:03:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:03:43.410615  543705 memory.go:191] Add success.
I0322 09:03:43.409820  543705 cpu.go:282] Add success.
I0322 09:03:43.420356  543705 net.go:648] Add success.
I0322 09:03:43.422977  543705 net.go:770] primary dev: ETH0
I0322 09:03:43.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:03:43.423002  543705 net.go:698] Add success.
I0322 09:03:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:03:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:03:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:03:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:03:53.409763  543705 memory.go:184] no items to output this cycle
I0322 09:03:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 09:04:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:04:03.409779  543705 memory.go:184] no items to output this cycle
I0322 09:04:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 09:04:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:04:13.409790  543705 memory.go:191] Add success.
I0322 09:04:13.409806  543705 cpu.go:282] Add success.
W0322 09:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:04:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:04:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:04:13.420075  543705 net.go:648] Add success.
I0322 09:04:13.423221  543705 net.go:770] primary dev: ETH0
I0322 09:04:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:04:13.423245  543705 net.go:698] Add success.
I0322 09:04:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:04:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:04:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 09:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:04:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 09:04:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:04:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:04:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:04:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:04:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:04:16.472350  543705 disk_local_worker.go:436] Get disk info: []
I0322 09:04:23.409883  543705 cpu.go:275] no items to output this cycle
E0322 09:04:23.410034  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:04:23.410047  543705 memory.go:184] no items to output this cycle
E0322 09:04:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:04:33.409773  543705 memory.go:184] no items to output this cycle
I0322 09:04:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 09:04:33.981670  543705 disk_info.go:125] begin check local disk info of client
I0322 09:04:33.984128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:04:33.984133  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be3c0 0xc0003be400]
E0322 09:04:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:04:43.410761  543705 memory.go:191] Add success.
I0322 09:04:43.409806  543705 cpu.go:282] Add success.
I0322 09:04:43.420445  543705 net.go:648] Add success.
I0322 09:04:43.423088  543705 net.go:770] primary dev: ETH0
I0322 09:04:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:04:43.423122  543705 net.go:698] Add success.
I0322 09:04:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:04:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:04:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:04:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:04:53.409780  543705 memory.go:184] no items to output this cycle
I0322 09:04:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 09:05:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:05:03.409803  543705 memory.go:184] no items to output this cycle
I0322 09:05:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 09:05:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:05:13.409826  543705 memory.go:191] Add success.
I0322 09:05:13.409833  543705 cpu.go:282] Add success.
W0322 09:05:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:05:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:05:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:05:13.420338  543705 net.go:648] Add success.
I0322 09:05:13.423396  543705 net.go:770] primary dev: ETH0
I0322 09:05:13.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:05:13.423429  543705 net.go:698] Add success.
I0322 09:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:05:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:05:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 09:05:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:05:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 09:05:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:05:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:05:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:05:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:05:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:05:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:05:23.409810  543705 memory.go:184] no items to output this cycle
I0322 09:05:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 09:05:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:05:33.409808  543705 memory.go:184] no items to output this cycle
I0322 09:05:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 09:05:33.985669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:05:33.988275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:05:33.988282  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b7c0 0xc00007b800]
E0322 09:05:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:05:43.411083  543705 memory.go:191] Add success.
I0322 09:05:43.409835  543705 cpu.go:282] Add success.
I0322 09:05:43.419816  543705 net.go:648] Add success.
I0322 09:05:43.422826  543705 net.go:770] primary dev: ETH0
I0322 09:05:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:05:43.422851  543705 net.go:698] Add success.
I0322 09:05:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:05:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:05:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:05:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:05:53.409789  543705 cpu.go:275] no items to output this cycle
I0322 09:05:53.409798  543705 memory.go:184] no items to output this cycle
E0322 09:06:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:06:03.409815  543705 memory.go:184] no items to output this cycle
I0322 09:06:03.409831  543705 cpu.go:275] no items to output this cycle
W0322 09:06:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:06:13.409750  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:06:13.409756  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:06:13.409851  543705 cpu.go:282] Add success.
E0322 09:06:13.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:06:13.409878  543705 memory.go:191] Add success.
I0322 09:06:13.420135  543705 net.go:648] Add success.
I0322 09:06:13.423210  543705 net.go:770] primary dev: ETH0
I0322 09:06:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:06:13.423243  543705 net.go:698] Add success.
I0322 09:06:13.469112  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6bb77d7d-1396-4ee8-983b-2ac850af1b48","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:06:13.469147  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:06:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:06:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:06:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 09:06:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:06:14.456611  543705 disk_worker.go:494] system disk:vda1
I0322 09:06:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:06:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:06:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:06:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:06:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:06:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:06:23.409803  543705 memory.go:184] no items to output this cycle
I0322 09:06:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 09:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:06:33.409798  543705 memory.go:184] no items to output this cycle
I0322 09:06:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 09:06:33.989670  543705 disk_info.go:125] begin check local disk info of client
I0322 09:06:33.992251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:06:33.992256  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad40 0xc00007ad80]
I0322 09:06:39.585679  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:06:39.585685  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:06:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:06:43.410695  543705 memory.go:191] Add success.
I0322 09:06:43.409814  543705 cpu.go:282] Add success.
I0322 09:06:43.420388  543705 net.go:648] Add success.
I0322 09:06:43.423035  543705 net.go:770] primary dev: ETH0
I0322 09:06:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:06:43.423060  543705 net.go:698] Add success.
I0322 09:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:06:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:06:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:06:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:06:53.409776  543705 memory.go:184] no items to output this cycle
I0322 09:06:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 09:07:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:07:03.409809  543705 memory.go:184] no items to output this cycle
I0322 09:07:03.409827  543705 cpu.go:275] no items to output this cycle
E0322 09:07:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:07:13.409789  543705 memory.go:191] Add success.
I0322 09:07:13.409807  543705 cpu.go:282] Add success.
W0322 09:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:07:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:07:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:07:13.420189  543705 net.go:648] Add success.
I0322 09:07:13.423244  543705 net.go:770] primary dev: ETH0
I0322 09:07:13.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:07:13.423272  543705 net.go:698] Add success.
I0322 09:07:13.452796  543705 event_worker.go:152] Polling the log file for events...
W0322 09:07:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:07:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 09:07:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:07:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:07:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:07:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:07:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 09:07:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:07:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:07:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:07:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:07:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:07:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:07:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:07:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:07:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:07:23.409782  543705 memory.go:184] no items to output this cycle
I0322 09:07:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 09:07:33.409925  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:07:33.409947  543705 memory.go:184] no items to output this cycle
I0322 09:07:33.409985  543705 cpu.go:275] no items to output this cycle
I0322 09:07:33.993668  543705 disk_info.go:125] begin check local disk info of client
I0322 09:07:33.996216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:07:33.996221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003692c0 0xc000369300]
E0322 09:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:07:43.410644  543705 memory.go:191] Add success.
I0322 09:07:43.409800  543705 cpu.go:282] Add success.
I0322 09:07:43.420103  543705 net.go:770] primary dev: ETH0
I0322 09:07:43.420115  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:07:43.420128  543705 net.go:698] Add success.
I0322 09:07:43.420478  543705 net.go:648] Add success.
I0322 09:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:07:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:07:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:07:53.409772  543705 memory.go:184] no items to output this cycle
I0322 09:07:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 09:08:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:08:03.409806  543705 memory.go:184] no items to output this cycle
I0322 09:08:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 09:08:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:08:13.409808  543705 memory.go:191] Add success.
I0322 09:08:13.409809  543705 cpu.go:282] Add success.
W0322 09:08:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:08:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:08:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:08:13.420192  543705 net.go:648] Add success.
I0322 09:08:13.422897  543705 net.go:770] primary dev: ETH0
I0322 09:08:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:08:13.422922  543705 net.go:698] Add success.
I0322 09:08:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:08:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:08:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 09:08:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:08:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 09:08:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:08:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:08:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:08:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:08:16.472534  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:08:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:08:23.409767  543705 memory.go:184] no items to output this cycle
I0322 09:08:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 09:08:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:08:33.409800  543705 memory.go:184] no items to output this cycle
I0322 09:08:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 09:08:33.997662  543705 disk_info.go:125] begin check local disk info of client
I0322 09:08:34.000512  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:08:34.000519  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384000 0xc000384040]
E0322 09:08:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:08:43.410850  543705 memory.go:191] Add success.
I0322 09:08:43.409796  543705 cpu.go:282] Add success.
I0322 09:08:43.420558  543705 net.go:648] Add success.
I0322 09:08:43.423547  543705 net.go:770] primary dev: ETH0
I0322 09:08:43.423560  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:08:43.423572  543705 net.go:698] Add success.
I0322 09:08:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:08:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:08:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:08:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:08:53.409792  543705 memory.go:184] no items to output this cycle
I0322 09:08:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 09:09:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:09:03.409772  543705 memory.go:184] no items to output this cycle
I0322 09:09:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 09:09:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:09:13.409797  543705 memory.go:191] Add success.
I0322 09:09:13.409797  543705 cpu.go:282] Add success.
W0322 09:09:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:09:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:09:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:09:13.420173  543705 net.go:648] Add success.
I0322 09:09:13.423287  543705 net.go:770] primary dev: ETH0
I0322 09:09:13.423301  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:09:13.423313  543705 net.go:698] Add success.
I0322 09:09:13.463034  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c4b9384d-e0c4-44c3-81c4-5de620bea72f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:09:13.463069  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:09:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:09:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:09:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 09:09:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:09:14.456695  543705 disk_worker.go:494] system disk:vda1
I0322 09:09:14.456732  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:09:15.455993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:09:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:09:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:09:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:09:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:09:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:09:23.409780  543705 memory.go:184] no items to output this cycle
I0322 09:09:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 09:09:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:09:33.409797  543705 memory.go:184] no items to output this cycle
I0322 09:09:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 09:09:34.001665  543705 disk_info.go:125] begin check local disk info of client
I0322 09:09:34.004213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:09:34.004219  543705 disk_info.go:196] parse disk info done, disk is : [0xc00054db40 0xc00054db80]
I0322 09:09:39.585821  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:09:39.585828  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:09:43.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:09:43.410658  543705 memory.go:191] Add success.
I0322 09:09:43.410008  543705 cpu.go:282] Add success.
I0322 09:09:43.419714  543705 net.go:648] Add success.
I0322 09:09:43.422723  543705 net.go:770] primary dev: ETH0
I0322 09:09:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:09:43.422752  543705 net.go:698] Add success.
I0322 09:09:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:09:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:09:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:09:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:09:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 09:09:53.409781  543705 memory.go:184] no items to output this cycle
E0322 09:10:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:10:03.409766  543705 memory.go:184] no items to output this cycle
I0322 09:10:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 09:10:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:10:13.409819  543705 memory.go:191] Add success.
I0322 09:10:13.409827  543705 cpu.go:282] Add success.
W0322 09:10:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:10:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:10:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:10:13.420066  543705 net.go:648] Add success.
I0322 09:10:13.422796  543705 net.go:770] primary dev: ETH0
I0322 09:10:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:10:13.422821  543705 net.go:698] Add success.
I0322 09:10:14.455084  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:10:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:10:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 09:10:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:10:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 09:10:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:10:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:10:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:10:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:10:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:10:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:10:23.409769  543705 memory.go:184] no items to output this cycle
I0322 09:10:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 09:10:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:10:33.409798  543705 memory.go:184] no items to output this cycle
I0322 09:10:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 09:10:34.005669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:10:34.008231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:10:34.008237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a800 0xc00046a840]
E0322 09:10:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:10:43.410804  543705 memory.go:191] Add success.
I0322 09:10:43.409941  543705 cpu.go:282] Add success.
I0322 09:10:43.419738  543705 net.go:648] Add success.
I0322 09:10:43.422361  543705 net.go:770] primary dev: ETH0
I0322 09:10:43.422377  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:10:43.422390  543705 net.go:698] Add success.
I0322 09:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:10:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:10:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:10:53.409777  543705 memory.go:184] no items to output this cycle
I0322 09:10:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 09:11:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:11:03.409786  543705 memory.go:184] no items to output this cycle
I0322 09:11:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 09:11:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:11:13.409783  543705 memory.go:191] Add success.
W0322 09:11:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:11:13.409816  543705 cpu.go:282] Add success.
W0322 09:11:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:11:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:11:13.420228  543705 net.go:648] Add success.
I0322 09:11:13.422990  543705 net.go:770] primary dev: ETH0
I0322 09:11:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:11:13.423015  543705 net.go:698] Add success.
I0322 09:11:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:11:14.455240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:11:14.455252  543705 disk_worker.go:708] disk space is not compliant
W0322 09:11:14.455255  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:11:14.456658  543705 disk_worker.go:494] system disk:vda1
I0322 09:11:14.456691  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:11:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:11:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:11:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:11:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:11:23.409798  543705 memory.go:184] no items to output this cycle
I0322 09:11:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 09:11:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:11:33.409780  543705 memory.go:184] no items to output this cycle
I0322 09:11:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 09:11:34.009672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:11:34.012139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:11:34.012144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dd140 0xc0003dd180]
E0322 09:11:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:11:43.410691  543705 memory.go:191] Add success.
I0322 09:11:43.409799  543705 cpu.go:282] Add success.
I0322 09:11:43.420469  543705 net.go:648] Add success.
I0322 09:11:43.423049  543705 net.go:770] primary dev: ETH0
I0322 09:11:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:11:43.423074  543705 net.go:698] Add success.
I0322 09:11:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:11:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:11:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:11:53.410352  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:11:53.410367  543705 memory.go:184] no items to output this cycle
I0322 09:11:53.410371  543705 cpu.go:275] no items to output this cycle
E0322 09:12:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:12:03.409784  543705 memory.go:184] no items to output this cycle
I0322 09:12:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 09:12:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:12:13.409784  543705 memory.go:191] Add success.
W0322 09:12:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:12:13.409817  543705 cpu.go:282] Add success.
W0322 09:12:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:12:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:12:13.420189  543705 net.go:648] Add success.
I0322 09:12:13.422992  543705 net.go:770] primary dev: ETH0
I0322 09:12:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:12:13.423016  543705 net.go:698] Add success.
I0322 09:12:13.470108  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae126dcf-d835-41cd-a47c-d9a2c95bb509","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:12:13.470152  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 09:12:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:12:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0322 09:12:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:12:14.456896  543705 disk_worker.go:494] system disk:vda1
I0322 09:12:14.456936  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:12:14.456967  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:12:14.456974  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:12:14.456979  543705 custom_config.go:64] query custom config with name: gpu
E0322 09:12:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:12:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:12:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:12:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:12:16.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:12:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:12:16.472309  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:12:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:12:23.409775  543705 memory.go:184] no items to output this cycle
I0322 09:12:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 09:12:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:12:33.409776  543705 memory.go:184] no items to output this cycle
I0322 09:12:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 09:12:34.013674  543705 disk_info.go:125] begin check local disk info of client
I0322 09:12:34.016159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:12:34.016164  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fef40 0xc0003fef80]
I0322 09:12:39.586640  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:12:39.586647  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:12:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:12:43.410668  543705 memory.go:191] Add success.
I0322 09:12:43.409815  543705 cpu.go:282] Add success.
I0322 09:12:43.420381  543705 net.go:770] primary dev: ETH0
I0322 09:12:43.420395  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:12:43.420407  543705 net.go:698] Add success.
I0322 09:12:43.420853  543705 net.go:648] Add success.
I0322 09:12:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:12:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:12:53.409808  543705 memory.go:184] no items to output this cycle
I0322 09:12:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 09:13:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:13:03.409781  543705 memory.go:184] no items to output this cycle
I0322 09:13:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 09:13:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:13:13.409825  543705 memory.go:191] Add success.
I0322 09:13:13.409833  543705 cpu.go:282] Add success.
W0322 09:13:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:13:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:13:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:13:13.420143  543705 net.go:648] Add success.
I0322 09:13:13.422940  543705 net.go:770] primary dev: ETH0
I0322 09:13:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:13:13.422965  543705 net.go:698] Add success.
I0322 09:13:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:13:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:13:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 09:13:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:13:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 09:13:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:13:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:13:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:13:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:13:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:13:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:13:23.409762  543705 memory.go:184] no items to output this cycle
I0322 09:13:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 09:13:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:13:33.409796  543705 memory.go:184] no items to output this cycle
I0322 09:13:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 09:13:34.017671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:13:34.020118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:13:34.020124  543705 disk_info.go:196] parse disk info done, disk is : [0xc000288140 0xc000288180]
E0322 09:13:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:13:43.410620  543705 memory.go:191] Add success.
I0322 09:13:43.409801  543705 cpu.go:282] Add success.
I0322 09:13:43.420438  543705 net.go:648] Add success.
I0322 09:13:43.423024  543705 net.go:770] primary dev: ETH0
I0322 09:13:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:13:43.423054  543705 net.go:698] Add success.
I0322 09:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:13:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:13:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:13:53.409766  543705 memory.go:184] no items to output this cycle
I0322 09:13:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 09:14:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:14:03.409788  543705 memory.go:184] no items to output this cycle
I0322 09:14:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 09:14:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:14:13.409790  543705 memory.go:191] Add success.
I0322 09:14:13.409813  543705 cpu.go:282] Add success.
W0322 09:14:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:14:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:14:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:14:13.420191  543705 net.go:648] Add success.
I0322 09:14:13.423013  543705 net.go:770] primary dev: ETH0
I0322 09:14:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:14:13.423048  543705 net.go:698] Add success.
I0322 09:14:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:14:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:14:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 09:14:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:14:14.456564  543705 disk_worker.go:494] system disk:vda1
I0322 09:14:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:14:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:14:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:14:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:14:23.409761  543705 memory.go:184] no items to output this cycle
I0322 09:14:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 09:14:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:14:33.409784  543705 memory.go:184] no items to output this cycle
I0322 09:14:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 09:14:34.021672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:14:34.024185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:14:34.024191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dee40 0xc0003dee80]
E0322 09:14:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:14:43.410636  543705 memory.go:191] Add success.
I0322 09:14:43.409820  543705 cpu.go:282] Add success.
I0322 09:14:43.420344  543705 net.go:648] Add success.
I0322 09:14:43.423023  543705 net.go:770] primary dev: ETH0
I0322 09:14:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:14:43.423050  543705 net.go:698] Add success.
I0322 09:14:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:14:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:14:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:14:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:14:53.409795  543705 memory.go:184] no items to output this cycle
I0322 09:14:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 09:15:03.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:15:03.409855  543705 memory.go:184] no items to output this cycle
I0322 09:15:03.409872  543705 cpu.go:275] no items to output this cycle
E0322 09:15:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:15:13.409798  543705 memory.go:191] Add success.
I0322 09:15:13.409814  543705 cpu.go:282] Add success.
W0322 09:15:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:15:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:15:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:15:13.420176  543705 net.go:648] Add success.
I0322 09:15:13.422743  543705 net.go:770] primary dev: ETH0
I0322 09:15:13.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:15:13.422768  543705 net.go:698] Add success.
I0322 09:15:13.468025  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dd465ed1-e0ad-4860-b35a-4fe1ae56f8b8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:15:13.468057  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:15:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:15:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:15:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 09:15:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:15:14.456673  543705 disk_worker.go:494] system disk:vda1
I0322 09:15:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:15:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:15:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:15:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:15:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:15:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:15:23.409769  543705 memory.go:184] no items to output this cycle
I0322 09:15:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 09:15:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:15:33.409776  543705 memory.go:184] no items to output this cycle
I0322 09:15:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 09:15:34.025671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:15:34.028176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:15:34.028182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be100 0xc0003be140]
I0322 09:15:39.587674  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:15:39.587681  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:15:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:15:43.410670  543705 memory.go:191] Add success.
I0322 09:15:43.409799  543705 cpu.go:282] Add success.
I0322 09:15:43.420428  543705 net.go:648] Add success.
I0322 09:15:43.423235  543705 net.go:770] primary dev: ETH0
I0322 09:15:43.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:15:43.423419  543705 net.go:698] Add success.
I0322 09:15:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:15:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:15:53.409781  543705 memory.go:184] no items to output this cycle
I0322 09:15:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 09:16:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:16:03.409784  543705 memory.go:184] no items to output this cycle
I0322 09:16:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 09:16:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:16:13.409789  543705 memory.go:191] Add success.
I0322 09:16:13.409805  543705 cpu.go:282] Add success.
W0322 09:16:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:16:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:16:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:16:13.420130  543705 net.go:648] Add success.
I0322 09:16:13.422868  543705 net.go:770] primary dev: ETH0
I0322 09:16:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:16:13.422896  543705 net.go:698] Add success.
I0322 09:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:16:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:16:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 09:16:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:16:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 09:16:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:16:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:16:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:16:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:16:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:16:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:16:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:16:23.409762  543705 memory.go:184] no items to output this cycle
I0322 09:16:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 09:16:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:16:33.409793  543705 memory.go:184] no items to output this cycle
I0322 09:16:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 09:16:34.029670  543705 disk_info.go:125] begin check local disk info of client
I0322 09:16:34.032136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:16:34.032141  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e500 0xc00049e540]
E0322 09:16:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:16:43.410691  543705 memory.go:191] Add success.
I0322 09:16:43.409820  543705 cpu.go:282] Add success.
I0322 09:16:43.420415  543705 net.go:648] Add success.
I0322 09:16:43.422941  543705 net.go:770] primary dev: ETH0
I0322 09:16:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:16:43.422966  543705 net.go:698] Add success.
I0322 09:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:16:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:16:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:16:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:16:53.409807  543705 memory.go:184] no items to output this cycle
I0322 09:16:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 09:17:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:17:03.409785  543705 cpu.go:275] no items to output this cycle
I0322 09:17:03.409795  543705 memory.go:184] no items to output this cycle
E0322 09:17:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:17:13.409830  543705 memory.go:191] Add success.
I0322 09:17:13.409836  543705 cpu.go:282] Add success.
W0322 09:17:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:17:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:17:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:17:13.420146  543705 net.go:648] Add success.
I0322 09:17:13.422958  543705 net.go:770] primary dev: ETH0
I0322 09:17:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:17:13.422984  543705 net.go:698] Add success.
I0322 09:17:13.453607  543705 event_worker.go:152] Polling the log file for events...
W0322 09:17:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:17:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 09:17:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:17:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:17:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:17:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:17:14.456552  543705 disk_worker.go:494] system disk:vda1
I0322 09:17:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:17:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:17:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:17:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:17:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:17:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:17:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:17:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:17:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:17:23.409771  543705 memory.go:184] no items to output this cycle
I0322 09:17:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 09:17:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:17:33.409784  543705 memory.go:184] no items to output this cycle
I0322 09:17:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 09:17:34.033671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:17:34.036127  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:17:34.036133  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352c00 0xc000352c40]
E0322 09:17:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:17:43.410924  543705 memory.go:191] Add success.
I0322 09:17:43.409821  543705 cpu.go:282] Add success.
I0322 09:17:43.420662  543705 net.go:648] Add success.
I0322 09:17:43.423623  543705 net.go:770] primary dev: ETH0
I0322 09:17:43.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:17:43.423651  543705 net.go:698] Add success.
I0322 09:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:17:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:17:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:17:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:17:53.409806  543705 memory.go:184] no items to output this cycle
I0322 09:17:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 09:18:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:18:03.409788  543705 memory.go:184] no items to output this cycle
I0322 09:18:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 09:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:18:13.409808  543705 memory.go:191] Add success.
I0322 09:18:13.409809  543705 cpu.go:282] Add success.
W0322 09:18:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:18:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:18:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:18:13.420190  543705 net.go:648] Add success.
I0322 09:18:13.423169  543705 net.go:770] primary dev: ETH0
I0322 09:18:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:18:13.423194  543705 net.go:698] Add success.
I0322 09:18:13.473482  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0e3a416e-34ae-4da7-9a79-76cb15a2053a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:18:13.473516  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:18:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:18:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:18:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 09:18:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:18:14.456685  543705 disk_worker.go:494] system disk:vda1
I0322 09:18:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:18:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:18:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:18:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:18:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:18:23.409795  543705 memory.go:184] no items to output this cycle
I0322 09:18:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 09:18:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:18:33.409779  543705 memory.go:184] no items to output this cycle
I0322 09:18:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 09:18:34.037671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:18:34.040138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:18:34.040143  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d2d40 0xc0003d2d80]
I0322 09:18:39.588637  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:18:39.588644  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:18:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:18:43.410737  543705 memory.go:191] Add success.
I0322 09:18:43.409787  543705 cpu.go:282] Add success.
I0322 09:18:43.420443  543705 net.go:648] Add success.
I0322 09:18:43.423960  543705 net.go:770] primary dev: ETH0
I0322 09:18:43.423973  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:18:43.424178  543705 net.go:698] Add success.
I0322 09:18:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:18:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:18:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:18:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:18:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 09:18:53.409787  543705 memory.go:184] no items to output this cycle
E0322 09:19:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:19:03.409811  543705 memory.go:184] no items to output this cycle
I0322 09:19:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 09:19:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:19:13.409790  543705 memory.go:191] Add success.
I0322 09:19:13.409812  543705 cpu.go:282] Add success.
W0322 09:19:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:19:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:19:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:19:13.420194  543705 net.go:648] Add success.
I0322 09:19:13.423384  543705 net.go:770] primary dev: ETH0
I0322 09:19:13.423397  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:19:13.423409  543705 net.go:698] Add success.
I0322 09:19:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:19:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:19:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 09:19:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:19:14.456530  543705 disk_worker.go:494] system disk:vda1
I0322 09:19:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:19:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:19:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:19:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:19:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:19:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:19:23.409767  543705 memory.go:184] no items to output this cycle
I0322 09:19:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:19:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:19:33.409778  543705 memory.go:184] no items to output this cycle
I0322 09:19:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 09:19:34.041680  543705 disk_info.go:125] begin check local disk info of client
I0322 09:19:34.044168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:19:34.044174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5cc0 0xc0000c5d00]
E0322 09:19:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:19:43.410741  543705 memory.go:191] Add success.
I0322 09:19:43.409796  543705 cpu.go:282] Add success.
I0322 09:19:43.420466  543705 net.go:648] Add success.
I0322 09:19:43.423432  543705 net.go:770] primary dev: ETH0
I0322 09:19:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:19:43.423457  543705 net.go:698] Add success.
I0322 09:19:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:19:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:19:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:19:53.409816  543705 memory.go:184] no items to output this cycle
I0322 09:19:53.409833  543705 cpu.go:275] no items to output this cycle
E0322 09:20:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:20:03.409811  543705 memory.go:184] no items to output this cycle
I0322 09:20:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 09:20:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:20:13.409791  543705 memory.go:191] Add success.
I0322 09:20:13.409814  543705 cpu.go:282] Add success.
W0322 09:20:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:20:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:20:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:20:13.420192  543705 net.go:648] Add success.
I0322 09:20:13.422709  543705 net.go:770] primary dev: ETH0
I0322 09:20:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:20:13.422734  543705 net.go:698] Add success.
I0322 09:20:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:20:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:20:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 09:20:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:20:14.456484  543705 disk_worker.go:494] system disk:vda1
I0322 09:20:14.456527  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:20:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:20:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:20:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:20:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:20:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:20:23.409774  543705 memory.go:184] no items to output this cycle
I0322 09:20:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 09:20:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:20:33.409765  543705 memory.go:184] no items to output this cycle
I0322 09:20:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 09:20:34.045671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:20:34.048168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:20:34.048174  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9f40 0xc000508000]
E0322 09:20:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:20:43.410693  543705 memory.go:191] Add success.
I0322 09:20:43.409800  543705 cpu.go:282] Add success.
I0322 09:20:43.420398  543705 net.go:648] Add success.
I0322 09:20:43.423148  543705 net.go:770] primary dev: ETH0
I0322 09:20:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:20:43.423180  543705 net.go:698] Add success.
I0322 09:20:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:20:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:20:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:20:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:20:53.409773  543705 memory.go:184] no items to output this cycle
I0322 09:20:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 09:21:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:21:03.409783  543705 memory.go:184] no items to output this cycle
I0322 09:21:03.409901  543705 cpu.go:275] no items to output this cycle
E0322 09:21:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:21:13.409834  543705 memory.go:191] Add success.
I0322 09:21:13.409837  543705 cpu.go:282] Add success.
W0322 09:21:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:21:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:21:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:21:13.420494  543705 net.go:648] Add success.
I0322 09:21:13.423352  543705 net.go:770] primary dev: ETH0
I0322 09:21:13.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:21:13.423382  543705 net.go:698] Add success.
I0322 09:21:13.488626  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bad23e39-ce1a-4cba-a4fd-9e8e6dc992e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:21:13.488660  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:21:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:21:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 09:21:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:21:14.456595  543705 disk_worker.go:494] system disk:vda1
I0322 09:21:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:21:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:21:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:21:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:21:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:21:23.409779  543705 memory.go:184] no items to output this cycle
I0322 09:21:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 09:21:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:21:33.409813  543705 memory.go:184] no items to output this cycle
I0322 09:21:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 09:21:34.049667  543705 disk_info.go:125] begin check local disk info of client
I0322 09:21:34.052180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:21:34.052185  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b500 0xc00032b540]
I0322 09:21:39.589704  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:21:39.589710  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:21:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:21:43.410663  543705 memory.go:191] Add success.
I0322 09:21:43.409814  543705 cpu.go:282] Add success.
I0322 09:21:43.420407  543705 net.go:648] Add success.
I0322 09:21:43.423258  543705 net.go:770] primary dev: ETH0
I0322 09:21:43.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:21:43.423288  543705 net.go:698] Add success.
I0322 09:21:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:21:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:21:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:21:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:21:53.409775  543705 memory.go:184] no items to output this cycle
I0322 09:21:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 09:22:03.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:22:03.409838  543705 memory.go:184] no items to output this cycle
I0322 09:22:03.409845  543705 cpu.go:275] no items to output this cycle
E0322 09:22:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:22:13.409814  543705 memory.go:191] Add success.
I0322 09:22:13.409815  543705 cpu.go:282] Add success.
W0322 09:22:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:22:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:22:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:22:13.420180  543705 net.go:648] Add success.
I0322 09:22:13.423181  543705 net.go:770] primary dev: ETH0
I0322 09:22:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:22:13.423210  543705 net.go:698] Add success.
W0322 09:22:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:22:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 09:22:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:22:14.455884  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:22:14.455893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:22:14.455898  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:22:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 09:22:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:22:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:22:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:22:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:22:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:22:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:22:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:22:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:22:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:22:23.409769  543705 memory.go:184] no items to output this cycle
I0322 09:22:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:22:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:22:33.409826  543705 memory.go:184] no items to output this cycle
I0322 09:22:33.409838  543705 cpu.go:275] no items to output this cycle
I0322 09:22:34.053672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:22:34.056196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:22:34.056202  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278b00 0xc000278b40]
E0322 09:22:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:22:43.410658  543705 memory.go:191] Add success.
I0322 09:22:43.409815  543705 cpu.go:282] Add success.
I0322 09:22:43.420352  543705 net.go:648] Add success.
I0322 09:22:43.422974  543705 net.go:770] primary dev: ETH0
I0322 09:22:43.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:22:43.423000  543705 net.go:698] Add success.
I0322 09:22:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:22:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:22:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:22:53.409771  543705 memory.go:184] no items to output this cycle
I0322 09:22:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 09:23:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:23:03.409814  543705 memory.go:184] no items to output this cycle
I0322 09:23:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 09:23:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:23:13.409790  543705 memory.go:191] Add success.
I0322 09:23:13.409813  543705 cpu.go:282] Add success.
W0322 09:23:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:23:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:23:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:23:13.420237  543705 net.go:648] Add success.
I0322 09:23:13.422912  543705 net.go:770] primary dev: ETH0
I0322 09:23:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:23:13.422941  543705 net.go:698] Add success.
I0322 09:23:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:23:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:23:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 09:23:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:23:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 09:23:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:23:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:23:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:23:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:23:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:23:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:23:23.409770  543705 memory.go:184] no items to output this cycle
I0322 09:23:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 09:23:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:23:33.409799  543705 memory.go:184] no items to output this cycle
I0322 09:23:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 09:23:34.057671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:23:34.060314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:23:34.060319  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a500 0xc00032a540]
E0322 09:23:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:23:43.410725  543705 memory.go:191] Add success.
I0322 09:23:43.409816  543705 cpu.go:282] Add success.
I0322 09:23:43.420486  543705 net.go:648] Add success.
I0322 09:23:43.423585  543705 net.go:770] primary dev: ETH0
I0322 09:23:43.423598  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:23:43.423610  543705 net.go:698] Add success.
I0322 09:23:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:23:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:23:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:23:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:23:53.409761  543705 memory.go:184] no items to output this cycle
I0322 09:23:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 09:24:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:24:03.409801  543705 memory.go:184] no items to output this cycle
I0322 09:24:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 09:24:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:24:13.409795  543705 memory.go:191] Add success.
W0322 09:24:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:24:13.409822  543705 cpu.go:282] Add success.
W0322 09:24:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:24:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:24:13.420181  543705 net.go:648] Add success.
I0322 09:24:13.422942  543705 net.go:770] primary dev: ETH0
I0322 09:24:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:24:13.422972  543705 net.go:698] Add success.
I0322 09:24:13.642114  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af104d97-024d-4610-ab51-c0aba3c4d880","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:24:13.642157  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:24:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:24:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:24:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 09:24:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:24:14.456711  543705 disk_worker.go:494] system disk:vda1
I0322 09:24:14.456744  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:24:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:24:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:24:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:24:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:24:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:24:23.409772  543705 memory.go:184] no items to output this cycle
I0322 09:24:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 09:24:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:24:33.409804  543705 memory.go:184] no items to output this cycle
I0322 09:24:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 09:24:34.061674  543705 disk_info.go:125] begin check local disk info of client
I0322 09:24:34.064183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:24:34.064189  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1ac0 0xc0004a1b00]
I0322 09:24:39.590647  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:24:39.590654  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:24:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:24:43.410637  543705 memory.go:191] Add success.
I0322 09:24:43.409791  543705 cpu.go:282] Add success.
I0322 09:24:43.420373  543705 net.go:648] Add success.
I0322 09:24:43.423326  543705 net.go:770] primary dev: ETH0
I0322 09:24:43.423342  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:24:43.423366  543705 net.go:698] Add success.
I0322 09:24:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:24:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:24:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:24:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:24:53.409794  543705 memory.go:184] no items to output this cycle
I0322 09:24:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 09:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:25:03.409805  543705 memory.go:184] no items to output this cycle
I0322 09:25:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 09:25:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:25:13.409824  543705 memory.go:191] Add success.
I0322 09:25:13.409825  543705 cpu.go:282] Add success.
W0322 09:25:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:25:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:25:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:25:13.420173  543705 net.go:648] Add success.
I0322 09:25:13.423126  543705 net.go:770] primary dev: ETH0
I0322 09:25:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:25:13.423152  543705 net.go:698] Add success.
I0322 09:25:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:25:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:25:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 09:25:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:25:14.456600  543705 disk_worker.go:494] system disk:vda1
I0322 09:25:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:25:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:25:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:25:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:25:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:25:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:25:23.409794  543705 memory.go:184] no items to output this cycle
I0322 09:25:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 09:25:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:25:33.409779  543705 memory.go:184] no items to output this cycle
I0322 09:25:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 09:25:34.065672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:25:34.068133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:25:34.068138  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b180 0xc00007b1c0]
E0322 09:25:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:25:43.410729  543705 memory.go:191] Add success.
I0322 09:25:43.409790  543705 cpu.go:282] Add success.
I0322 09:25:43.420483  543705 net.go:648] Add success.
I0322 09:25:43.423503  543705 net.go:770] primary dev: ETH0
I0322 09:25:43.423518  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:25:43.423532  543705 net.go:698] Add success.
I0322 09:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:25:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:25:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:25:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:25:53.409765  543705 memory.go:184] no items to output this cycle
I0322 09:25:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 09:26:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:26:03.409783  543705 memory.go:184] no items to output this cycle
I0322 09:26:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 09:26:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:26:13.409831  543705 memory.go:191] Add success.
I0322 09:26:13.409836  543705 cpu.go:282] Add success.
W0322 09:26:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:26:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:26:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:26:13.420300  543705 net.go:648] Add success.
I0322 09:26:13.423030  543705 net.go:770] primary dev: ETH0
I0322 09:26:13.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:26:13.423056  543705 net.go:698] Add success.
I0322 09:26:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:26:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:26:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 09:26:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:26:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 09:26:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:26:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:26:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:26:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:26:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:26:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:26:23.409796  543705 memory.go:184] no items to output this cycle
I0322 09:26:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 09:26:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:26:33.409778  543705 memory.go:184] no items to output this cycle
I0322 09:26:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 09:26:34.069671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:26:34.072223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:26:34.072228  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386600 0xc000386640]
E0322 09:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:26:43.410797  543705 memory.go:191] Add success.
I0322 09:26:43.409802  543705 cpu.go:282] Add success.
I0322 09:26:43.420490  543705 net.go:648] Add success.
I0322 09:26:43.423438  543705 net.go:770] primary dev: ETH0
I0322 09:26:43.423452  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:26:43.423464  543705 net.go:698] Add success.
I0322 09:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:26:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:26:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:26:53.409792  543705 memory.go:184] no items to output this cycle
I0322 09:26:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 09:27:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:27:03.409781  543705 memory.go:184] no items to output this cycle
I0322 09:27:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 09:27:13.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:27:13.410003  543705 cpu.go:282] Add success.
I0322 09:27:13.410080  543705 memory.go:191] Add success.
W0322 09:27:13.410111  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:27:13.410124  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:27:13.410127  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:27:13.419767  543705 net.go:648] Add success.
I0322 09:27:13.428570  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 09:27:13.428643  543705 net.go:770] primary dev: ETH0
I0322 09:27:13.428655  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:27:13.428666  543705 net.go:698] Add success.
I0322 09:27:13.453336  543705 event_worker.go:152] Polling the log file for events...
I0322 09:27:13.468440  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"054b3a70-b8a3-41b9-9c4c-ef269e69179e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:27:13.468471  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 09:27:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:27:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 09:27:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:27:14.456127  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:27:14.456136  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:27:14.456142  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:27:14.456460  543705 disk_worker.go:494] system disk:vda1
I0322 09:27:14.456489  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:27:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:27:15.456831  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:27:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:27:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:27:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:27:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:27:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:27:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:27:23.409774  543705 memory.go:184] no items to output this cycle
I0322 09:27:23.409774  543705 cpu.go:275] no items to output this cycle
E0322 09:27:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:27:33.409798  543705 memory.go:184] no items to output this cycle
I0322 09:27:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 09:27:34.073670  543705 disk_info.go:125] begin check local disk info of client
I0322 09:27:34.076179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:27:34.076186  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034d280 0xc00034d2c0]
I0322 09:27:39.591707  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:27:39.591713  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:27:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:27:43.410733  543705 memory.go:191] Add success.
I0322 09:27:43.409819  543705 cpu.go:282] Add success.
I0322 09:27:43.420456  543705 net.go:648] Add success.
I0322 09:27:43.423413  543705 net.go:770] primary dev: ETH0
I0322 09:27:43.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:27:43.423438  543705 net.go:698] Add success.
I0322 09:27:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:27:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:27:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:27:53.409778  543705 memory.go:184] no items to output this cycle
I0322 09:27:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 09:28:03.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:28:03.409903  543705 memory.go:184] no items to output this cycle
I0322 09:28:03.409946  543705 cpu.go:275] no items to output this cycle
E0322 09:28:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:28:13.409843  543705 memory.go:191] Add success.
I0322 09:28:13.409861  543705 cpu.go:282] Add success.
W0322 09:28:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:28:13.409900  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:28:13.409906  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:28:13.420216  543705 net.go:648] Add success.
I0322 09:28:13.423141  543705 net.go:770] primary dev: ETH0
I0322 09:28:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:28:13.423165  543705 net.go:698] Add success.
I0322 09:28:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:28:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:28:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 09:28:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:28:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 09:28:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:28:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:28:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:28:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:28:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:28:23.409797  543705 memory.go:184] no items to output this cycle
I0322 09:28:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 09:28:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:28:33.409784  543705 memory.go:184] no items to output this cycle
I0322 09:28:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 09:28:34.077682  543705 disk_info.go:125] begin check local disk info of client
I0322 09:28:34.080134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:28:34.080141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004decc0 0xc0004ded00]
E0322 09:28:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:28:43.410638  543705 memory.go:191] Add success.
I0322 09:28:43.409828  543705 cpu.go:282] Add success.
I0322 09:28:43.420374  543705 net.go:648] Add success.
I0322 09:28:43.422872  543705 net.go:770] primary dev: ETH0
I0322 09:28:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:28:43.422898  543705 net.go:698] Add success.
I0322 09:28:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:28:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:28:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:28:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:28:53.409790  543705 memory.go:184] no items to output this cycle
I0322 09:28:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 09:29:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:29:03.409801  543705 memory.go:184] no items to output this cycle
I0322 09:29:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 09:29:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:29:13.409818  543705 memory.go:191] Add success.
I0322 09:29:13.409820  543705 cpu.go:282] Add success.
W0322 09:29:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:29:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:29:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:29:13.420173  543705 net.go:648] Add success.
I0322 09:29:13.422912  543705 net.go:770] primary dev: ETH0
I0322 09:29:13.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:29:13.422937  543705 net.go:698] Add success.
I0322 09:29:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:29:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:29:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 09:29:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:29:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 09:29:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:29:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:29:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:29:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:29:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:29:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:29:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:29:23.409775  543705 memory.go:184] no items to output this cycle
I0322 09:29:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 09:29:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:29:33.409799  543705 memory.go:184] no items to output this cycle
I0322 09:29:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 09:29:34.081673  543705 disk_info.go:125] begin check local disk info of client
I0322 09:29:34.084122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:29:34.084127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001c6100 0xc0001c6140]
E0322 09:29:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:29:43.410698  543705 memory.go:191] Add success.
I0322 09:29:43.409816  543705 cpu.go:282] Add success.
I0322 09:29:43.420418  543705 net.go:648] Add success.
I0322 09:29:43.423148  543705 net.go:770] primary dev: ETH0
I0322 09:29:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:29:43.423202  543705 net.go:698] Add success.
I0322 09:29:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:29:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:29:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:29:53.409805  543705 memory.go:184] no items to output this cycle
I0322 09:29:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 09:30:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:30:03.409780  543705 memory.go:184] no items to output this cycle
I0322 09:30:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 09:30:13.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:30:13.409939  543705 memory.go:191] Add success.
W0322 09:30:13.409993  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:30:13.410015  543705 cpu.go:282] Add success.
W0322 09:30:13.410011  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:30:13.410022  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:30:13.419732  543705 net.go:648] Add success.
I0322 09:30:13.422419  543705 net.go:770] primary dev: ETH0
I0322 09:30:13.422434  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:30:13.422448  543705 net.go:698] Add success.
I0322 09:30:13.468208  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"26f2a099-2b40-4b87-a4e7-79ffd6719037","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:30:13.468239  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:30:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:30:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:30:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 09:30:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:30:14.456685  543705 disk_worker.go:494] system disk:vda1
I0322 09:30:14.456724  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:30:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:30:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:30:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:30:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:30:23.409778  543705 memory.go:184] no items to output this cycle
I0322 09:30:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 09:30:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:30:33.409781  543705 memory.go:184] no items to output this cycle
I0322 09:30:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 09:30:34.085674  543705 disk_info.go:125] begin check local disk info of client
I0322 09:30:34.088155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:30:34.088160  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386300 0xc000386340]
I0322 09:30:39.592651  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:30:39.592657  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:30:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:30:43.410668  543705 memory.go:191] Add success.
I0322 09:30:43.409802  543705 cpu.go:282] Add success.
I0322 09:30:43.420322  543705 net.go:648] Add success.
I0322 09:30:43.423004  543705 net.go:770] primary dev: ETH0
I0322 09:30:43.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:30:43.423029  543705 net.go:698] Add success.
I0322 09:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:30:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:30:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:30:53.409779  543705 memory.go:184] no items to output this cycle
I0322 09:30:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 09:31:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:31:03.409801  543705 memory.go:184] no items to output this cycle
I0322 09:31:03.409819  543705 cpu.go:275] no items to output this cycle
I0322 09:31:13.409922  543705 cpu.go:282] Add success.
E0322 09:31:13.410029  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:31:13.410057  543705 memory.go:191] Add success.
W0322 09:31:13.410209  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:31:13.410236  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:31:13.410239  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:31:13.419734  543705 net.go:648] Add success.
I0322 09:31:13.422310  543705 net.go:770] primary dev: ETH0
I0322 09:31:13.422323  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:31:13.422334  543705 net.go:698] Add success.
I0322 09:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:31:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:31:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 09:31:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:31:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 09:31:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:31:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:31:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:31:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:31:23.409799  543705 memory.go:184] no items to output this cycle
I0322 09:31:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 09:31:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:31:33.409780  543705 memory.go:184] no items to output this cycle
I0322 09:31:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 09:31:34.089672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:31:34.092128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:31:34.092134  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0322 09:31:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:31:43.410644  543705 memory.go:191] Add success.
I0322 09:31:43.409822  543705 cpu.go:282] Add success.
I0322 09:31:43.420330  543705 net.go:648] Add success.
I0322 09:31:43.423225  543705 net.go:770] primary dev: ETH0
I0322 09:31:43.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:31:43.423249  543705 net.go:698] Add success.
I0322 09:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:31:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:31:53.409765  543705 memory.go:184] no items to output this cycle
I0322 09:31:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 09:32:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:32:03.409783  543705 memory.go:184] no items to output this cycle
I0322 09:32:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 09:32:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:32:13.409819  543705 memory.go:191] Add success.
I0322 09:32:13.409826  543705 cpu.go:282] Add success.
W0322 09:32:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:32:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:32:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:32:13.420329  543705 net.go:648] Add success.
I0322 09:32:13.423025  543705 net.go:770] primary dev: ETH0
I0322 09:32:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:32:13.423053  543705 net.go:698] Add success.
W0322 09:32:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:32:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 09:32:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:32:14.456791  543705 disk_worker.go:494] system disk:vda1
I0322 09:32:14.456830  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:32:14.457144  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:32:14.457151  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:32:14.457156  543705 custom_config.go:64] query custom config with name: gpu
E0322 09:32:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:32:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:32:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:32:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:32:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:32:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:32:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:32:23.410248  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:32:23.410264  543705 memory.go:184] no items to output this cycle
I0322 09:32:23.410278  543705 cpu.go:275] no items to output this cycle
E0322 09:32:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:32:33.409799  543705 memory.go:184] no items to output this cycle
I0322 09:32:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 09:32:34.093669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:32:34.096162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:32:34.096167  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4780 0xc0000c47c0]
E0322 09:32:43.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:32:43.411407  543705 memory.go:191] Add success.
I0322 09:32:43.410425  543705 cpu.go:282] Add success.
I0322 09:32:43.420119  543705 net.go:648] Add success.
I0322 09:32:43.422952  543705 net.go:770] primary dev: ETH0
I0322 09:32:43.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:32:43.422979  543705 net.go:698] Add success.
I0322 09:32:46.458588  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:32:46.458655  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:32:46.458678  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:32:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:32:53.409776  543705 memory.go:184] no items to output this cycle
I0322 09:32:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 09:33:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:33:03.409774  543705 memory.go:184] no items to output this cycle
I0322 09:33:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 09:33:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:33:13.409818  543705 memory.go:191] Add success.
I0322 09:33:13.409835  543705 cpu.go:282] Add success.
W0322 09:33:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:33:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:33:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:33:13.420332  543705 net.go:648] Add success.
I0322 09:33:13.423116  543705 net.go:770] primary dev: ETH0
I0322 09:33:13.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:33:13.423142  543705 net.go:698] Add success.
I0322 09:33:13.467783  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3613c855-9e80-4a63-8397-ad91b5cb75bc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:33:13.467815  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:33:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:33:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:33:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 09:33:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:33:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 09:33:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:33:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:33:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:33:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:33:23.409781  543705 memory.go:184] no items to output this cycle
I0322 09:33:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 09:33:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:33:33.409768  543705 memory.go:184] no items to output this cycle
I0322 09:33:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 09:33:34.097668  543705 disk_info.go:125] begin check local disk info of client
I0322 09:33:34.100174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:33:34.100180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5540 0xc0000c5580]
I0322 09:33:39.593720  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:33:39.593726  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:33:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:33:43.410523  543705 memory.go:191] Add success.
I0322 09:33:43.409790  543705 cpu.go:282] Add success.
I0322 09:33:43.420227  543705 net.go:648] Add success.
I0322 09:33:43.422773  543705 net.go:770] primary dev: ETH0
I0322 09:33:43.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:33:43.422801  543705 net.go:698] Add success.
I0322 09:33:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:33:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:33:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:33:53.409799  543705 memory.go:184] no items to output this cycle
I0322 09:33:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 09:34:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:34:03.409792  543705 memory.go:184] no items to output this cycle
I0322 09:34:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:34:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:34:13.409798  543705 memory.go:191] Add success.
I0322 09:34:13.409799  543705 cpu.go:282] Add success.
W0322 09:34:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:34:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:34:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:34:13.420366  543705 net.go:648] Add success.
I0322 09:34:13.423015  543705 net.go:770] primary dev: ETH0
I0322 09:34:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:34:13.423043  543705 net.go:698] Add success.
I0322 09:34:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:34:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:34:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 09:34:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:34:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 09:34:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:34:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:34:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:34:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:34:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:34:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:34:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 09:34:23.409790  543705 memory.go:184] no items to output this cycle
E0322 09:34:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:34:33.409776  543705 memory.go:184] no items to output this cycle
I0322 09:34:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 09:34:34.101680  543705 disk_info.go:125] begin check local disk info of client
I0322 09:34:34.104163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:34:34.104169  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0322 09:34:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:34:43.410569  543705 memory.go:191] Add success.
I0322 09:34:43.409821  543705 cpu.go:282] Add success.
I0322 09:34:43.420291  543705 net.go:648] Add success.
I0322 09:34:43.422926  543705 net.go:770] primary dev: ETH0
I0322 09:34:43.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:34:43.422956  543705 net.go:698] Add success.
I0322 09:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:34:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:34:53.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:34:53.410265  543705 memory.go:184] no items to output this cycle
I0322 09:34:53.410298  543705 cpu.go:275] no items to output this cycle
E0322 09:35:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:35:03.409774  543705 memory.go:184] no items to output this cycle
I0322 09:35:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 09:35:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:35:13.409817  543705 memory.go:191] Add success.
I0322 09:35:13.409820  543705 cpu.go:282] Add success.
W0322 09:35:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:35:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:35:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:35:13.420138  543705 net.go:648] Add success.
I0322 09:35:13.423225  543705 net.go:770] primary dev: ETH0
I0322 09:35:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:35:13.423251  543705 net.go:698] Add success.
I0322 09:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:35:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:35:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 09:35:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:35:14.456702  543705 disk_worker.go:494] system disk:vda1
I0322 09:35:14.456747  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:35:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:35:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:35:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:35:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:35:23.409794  543705 memory.go:184] no items to output this cycle
I0322 09:35:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 09:35:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:35:33.409794  543705 memory.go:184] no items to output this cycle
I0322 09:35:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 09:35:34.105669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:35:34.108135  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:35:34.108141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9f00 0xc0002b9f40]
E0322 09:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:35:43.410651  543705 memory.go:191] Add success.
I0322 09:35:43.409816  543705 cpu.go:282] Add success.
I0322 09:35:43.420367  543705 net.go:648] Add success.
I0322 09:35:43.423016  543705 net.go:770] primary dev: ETH0
I0322 09:35:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:35:43.423044  543705 net.go:698] Add success.
I0322 09:35:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:35:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:35:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:35:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:35:53.409780  543705 cpu.go:275] no items to output this cycle
I0322 09:35:53.409785  543705 memory.go:184] no items to output this cycle
E0322 09:36:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:36:03.409783  543705 memory.go:184] no items to output this cycle
I0322 09:36:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 09:36:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:36:13.409823  543705 memory.go:191] Add success.
I0322 09:36:13.409834  543705 cpu.go:282] Add success.
W0322 09:36:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:36:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:36:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:36:13.420223  543705 net.go:648] Add success.
I0322 09:36:13.423094  543705 net.go:770] primary dev: ETH0
I0322 09:36:13.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:36:13.423123  543705 net.go:698] Add success.
I0322 09:36:13.464666  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6dd19a40-0fce-40fa-a9f8-4d34306a665c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:36:13.464699  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:36:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:36:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:36:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 09:36:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:36:14.456869  543705 disk_worker.go:494] system disk:vda1
I0322 09:36:14.456899  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:36:15.455610  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:36:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:36:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:36:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:36:23.409775  543705 memory.go:184] no items to output this cycle
I0322 09:36:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 09:36:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:36:33.409795  543705 memory.go:184] no items to output this cycle
I0322 09:36:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 09:36:34.109669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:36:34.112141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:36:34.112147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8d40 0xc0002b8d80]
I0322 09:36:39.594669  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:36:39.594676  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:36:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:36:43.410663  543705 memory.go:191] Add success.
I0322 09:36:43.409797  543705 cpu.go:282] Add success.
I0322 09:36:43.420421  543705 net.go:648] Add success.
I0322 09:36:43.422940  543705 net.go:770] primary dev: ETH0
I0322 09:36:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:36:43.422968  543705 net.go:698] Add success.
I0322 09:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:36:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:36:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:36:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:36:53.409802  543705 memory.go:184] no items to output this cycle
I0322 09:36:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 09:37:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:37:03.409776  543705 memory.go:184] no items to output this cycle
I0322 09:37:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 09:37:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:37:13.409788  543705 memory.go:191] Add success.
I0322 09:37:13.409807  543705 cpu.go:282] Add success.
W0322 09:37:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:37:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:37:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:37:13.420063  543705 net.go:648] Add success.
I0322 09:37:13.422972  543705 net.go:770] primary dev: ETH0
I0322 09:37:13.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:37:13.422997  543705 net.go:698] Add success.
I0322 09:37:13.453600  543705 event_worker.go:152] Polling the log file for events...
W0322 09:37:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:37:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 09:37:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:37:14.456806  543705 disk_worker.go:494] system disk:vda1
I0322 09:37:14.456848  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:37:14.457012  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:37:14.457020  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:37:14.457026  543705 custom_config.go:64] query custom config with name: gpu
E0322 09:37:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:37:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:37:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:37:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:37:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:37:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:37:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:37:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:37:23.409792  543705 memory.go:184] no items to output this cycle
I0322 09:37:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 09:37:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:37:33.409778  543705 cpu.go:275] no items to output this cycle
I0322 09:37:33.409783  543705 memory.go:184] no items to output this cycle
I0322 09:37:34.113671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:37:34.116149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:37:34.116154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8f00 0xc0002b8f40]
E0322 09:37:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:37:43.410705  543705 memory.go:191] Add success.
I0322 09:37:43.409800  543705 cpu.go:282] Add success.
I0322 09:37:43.420482  543705 net.go:648] Add success.
I0322 09:37:43.423310  543705 net.go:770] primary dev: ETH0
I0322 09:37:43.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:37:43.423337  543705 net.go:698] Add success.
I0322 09:37:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:37:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:37:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:37:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:37:53.409799  543705 memory.go:184] no items to output this cycle
I0322 09:37:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 09:38:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:38:03.409784  543705 memory.go:184] no items to output this cycle
I0322 09:38:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 09:38:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:38:13.409789  543705 memory.go:191] Add success.
W0322 09:38:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:38:13.409816  543705 cpu.go:282] Add success.
W0322 09:38:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:38:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:38:13.420129  543705 net.go:648] Add success.
I0322 09:38:13.422947  543705 net.go:770] primary dev: ETH0
I0322 09:38:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:38:13.422975  543705 net.go:698] Add success.
I0322 09:38:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:38:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:38:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 09:38:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:38:14.456598  543705 disk_worker.go:494] system disk:vda1
I0322 09:38:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:38:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:38:16.472426  543705 disk_local_worker.go:436] Get disk info: []
I0322 09:38:23.409897  543705 cpu.go:275] no items to output this cycle
E0322 09:38:23.410044  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:38:23.410054  543705 memory.go:184] no items to output this cycle
E0322 09:38:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:38:33.409773  543705 memory.go:184] no items to output this cycle
I0322 09:38:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 09:38:34.117671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:38:34.120121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:38:34.120127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b83c0 0xc0002b8400]
E0322 09:38:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:38:43.410586  543705 memory.go:191] Add success.
I0322 09:38:43.409820  543705 cpu.go:282] Add success.
I0322 09:38:43.420275  543705 net.go:648] Add success.
I0322 09:38:43.422751  543705 net.go:770] primary dev: ETH0
I0322 09:38:43.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:38:43.422775  543705 net.go:698] Add success.
I0322 09:38:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:38:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:38:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:38:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:38:53.409781  543705 memory.go:184] no items to output this cycle
I0322 09:38:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 09:39:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:39:03.409798  543705 memory.go:184] no items to output this cycle
I0322 09:39:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 09:39:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:39:13.409785  543705 memory.go:191] Add success.
W0322 09:39:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:39:13.409817  543705 cpu.go:282] Add success.
W0322 09:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:39:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:39:13.420371  543705 net.go:648] Add success.
I0322 09:39:13.423114  543705 net.go:770] primary dev: ETH0
I0322 09:39:13.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:39:13.423139  543705 net.go:698] Add success.
I0322 09:39:13.468652  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"51239eb5-dfba-43b1-a7f2-31285e3d876d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:39:13.468685  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:39:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:39:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:39:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 09:39:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:39:14.456620  543705 disk_worker.go:494] system disk:vda1
I0322 09:39:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:39:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:39:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:39:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:39:23.409793  543705 memory.go:184] no items to output this cycle
I0322 09:39:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 09:39:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:39:33.409813  543705 memory.go:184] no items to output this cycle
I0322 09:39:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 09:39:34.121674  543705 disk_info.go:125] begin check local disk info of client
I0322 09:39:34.124194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:39:34.124201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348ac0 0xc000348b00]
I0322 09:39:39.595666  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:39:39.595672  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:39:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:39:43.410676  543705 memory.go:191] Add success.
I0322 09:39:43.409814  543705 cpu.go:282] Add success.
I0322 09:39:43.420377  543705 net.go:648] Add success.
I0322 09:39:43.423092  543705 net.go:770] primary dev: ETH0
I0322 09:39:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:39:43.423117  543705 net.go:698] Add success.
I0322 09:39:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:39:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:39:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:39:53.409774  543705 cpu.go:275] no items to output this cycle
I0322 09:39:53.409786  543705 memory.go:184] no items to output this cycle
E0322 09:40:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:40:03.409772  543705 memory.go:184] no items to output this cycle
I0322 09:40:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 09:40:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:40:13.409817  543705 memory.go:191] Add success.
I0322 09:40:13.409829  543705 cpu.go:282] Add success.
W0322 09:40:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:40:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:40:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:40:13.420147  543705 net.go:648] Add success.
I0322 09:40:13.423067  543705 net.go:770] primary dev: ETH0
I0322 09:40:13.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:40:13.423105  543705 net.go:698] Add success.
I0322 09:40:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:40:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:40:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 09:40:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:40:14.456535  543705 disk_worker.go:494] system disk:vda1
I0322 09:40:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:40:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:40:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:40:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:40:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:40:23.409769  543705 memory.go:184] no items to output this cycle
I0322 09:40:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 09:40:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:40:33.409765  543705 memory.go:184] no items to output this cycle
I0322 09:40:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 09:40:34.125667  543705 disk_info.go:125] begin check local disk info of client
I0322 09:40:34.128205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:40:34.128211  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051fd80 0xc00051fdc0]
E0322 09:40:43.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:40:43.410689  543705 memory.go:191] Add success.
I0322 09:40:43.409962  543705 cpu.go:282] Add success.
I0322 09:40:43.419735  543705 net.go:648] Add success.
I0322 09:40:43.422470  543705 net.go:770] primary dev: ETH0
I0322 09:40:43.422485  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:40:43.422499  543705 net.go:698] Add success.
I0322 09:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:40:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:40:53.409767  543705 memory.go:184] no items to output this cycle
I0322 09:40:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 09:41:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:41:03.409782  543705 memory.go:184] no items to output this cycle
I0322 09:41:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 09:41:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:41:13.409800  543705 memory.go:191] Add success.
I0322 09:41:13.409810  543705 cpu.go:282] Add success.
W0322 09:41:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:41:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:41:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:41:13.420267  543705 net.go:648] Add success.
I0322 09:41:13.422934  543705 net.go:770] primary dev: ETH0
I0322 09:41:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:41:13.422963  543705 net.go:698] Add success.
I0322 09:41:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:41:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:41:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 09:41:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:41:14.456524  543705 disk_worker.go:494] system disk:vda1
I0322 09:41:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:41:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:41:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:41:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:41:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:41:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:41:23.409768  543705 memory.go:184] no items to output this cycle
I0322 09:41:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:41:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:41:33.409796  543705 memory.go:184] no items to output this cycle
I0322 09:41:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 09:41:34.129676  543705 disk_info.go:125] begin check local disk info of client
I0322 09:41:34.132149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:41:34.132155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003710c0 0xc000371100]
E0322 09:41:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:41:43.410653  543705 memory.go:191] Add success.
I0322 09:41:43.409814  543705 cpu.go:282] Add success.
I0322 09:41:43.420594  543705 net.go:648] Add success.
I0322 09:41:43.423272  543705 net.go:770] primary dev: ETH0
I0322 09:41:43.423286  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:41:43.423297  543705 net.go:698] Add success.
I0322 09:41:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:41:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:41:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:41:53.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:41:53.410389  543705 memory.go:184] no items to output this cycle
I0322 09:41:53.410415  543705 cpu.go:275] no items to output this cycle
E0322 09:42:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:42:03.409813  543705 memory.go:184] no items to output this cycle
I0322 09:42:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 09:42:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:42:13.409785  543705 memory.go:191] Add success.
I0322 09:42:13.409793  543705 cpu.go:282] Add success.
W0322 09:42:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:42:13.412538  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:42:13.412542  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:42:13.420003  543705 net.go:770] primary dev: ETH0
I0322 09:42:13.420021  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:42:13.420034  543705 net.go:698] Add success.
I0322 09:42:13.420416  543705 net.go:648] Add success.
I0322 09:42:13.467488  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f93a357-671d-49df-bb54-27a724ff5ffd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:42:13.467522  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 09:42:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:42:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 09:42:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:42:14.456129  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:42:14.456138  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:42:14.456144  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:42:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 09:42:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:42:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:42:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:42:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:42:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:42:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:42:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:42:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:42:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:42:23.409792  543705 memory.go:184] no items to output this cycle
I0322 09:42:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 09:42:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:42:33.409774  543705 memory.go:184] no items to output this cycle
I0322 09:42:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 09:42:34.133671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:42:34.136222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:42:34.136228  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a6c0 0xc00048a700]
I0322 09:42:39.596681  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:42:39.596688  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0322 09:42:43.409798  543705 cpu.go:282] Add success.
E0322 09:42:43.409977  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:42:43.411009  543705 memory.go:191] Add success.
I0322 09:42:43.419582  543705 net.go:770] primary dev: ETH0
I0322 09:42:43.419596  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:42:43.419608  543705 net.go:698] Add success.
I0322 09:42:43.419829  543705 net.go:648] Add success.
I0322 09:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:42:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:42:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:42:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:42:53.409800  543705 memory.go:184] no items to output this cycle
I0322 09:42:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 09:43:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:43:03.409775  543705 memory.go:184] no items to output this cycle
I0322 09:43:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 09:43:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:43:13.409786  543705 memory.go:191] Add success.
W0322 09:43:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:43:13.409817  543705 cpu.go:282] Add success.
W0322 09:43:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:43:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:43:13.420138  543705 net.go:648] Add success.
I0322 09:43:13.422701  543705 net.go:770] primary dev: ETH0
I0322 09:43:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:43:13.422735  543705 net.go:698] Add success.
I0322 09:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:43:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:43:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 09:43:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:43:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 09:43:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:43:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:43:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:43:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:43:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:43:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:43:23.409781  543705 memory.go:184] no items to output this cycle
I0322 09:43:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 09:43:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:43:33.409799  543705 memory.go:184] no items to output this cycle
I0322 09:43:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 09:43:34.136314  543705 disk_info.go:125] begin check local disk info of client
I0322 09:43:34.138818  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:43:34.138824  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a000 0xc00047a040]
E0322 09:43:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:43:43.410741  543705 memory.go:191] Add success.
I0322 09:43:43.409807  543705 cpu.go:282] Add success.
I0322 09:43:43.420521  543705 net.go:648] Add success.
I0322 09:43:43.423397  543705 net.go:770] primary dev: ETH0
I0322 09:43:43.423410  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:43:43.423422  543705 net.go:698] Add success.
I0322 09:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:43:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:43:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:43:53.410419  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:43:53.410436  543705 memory.go:184] no items to output this cycle
I0322 09:43:53.410447  543705 cpu.go:275] no items to output this cycle
E0322 09:44:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:44:03.409810  543705 memory.go:184] no items to output this cycle
I0322 09:44:03.409824  543705 cpu.go:275] no items to output this cycle
W0322 09:44:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:44:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:44:13.409740  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:44:13.409800  543705 cpu.go:282] Add success.
E0322 09:44:13.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:44:13.409862  543705 memory.go:191] Add success.
I0322 09:44:13.420108  543705 net.go:648] Add success.
I0322 09:44:13.423688  543705 net.go:770] primary dev: ETH0
I0322 09:44:13.423703  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:44:13.423718  543705 net.go:698] Add success.
I0322 09:44:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:44:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:44:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 09:44:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:44:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 09:44:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:44:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:44:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:44:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:44:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:44:23.409794  543705 memory.go:184] no items to output this cycle
I0322 09:44:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 09:44:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:44:33.409777  543705 memory.go:184] no items to output this cycle
I0322 09:44:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 09:44:34.138910  543705 disk_info.go:125] begin check local disk info of client
I0322 09:44:34.141377  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:44:34.141383  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047fbc0 0xc00047fc00]
E0322 09:44:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:44:43.410828  543705 memory.go:191] Add success.
I0322 09:44:43.409811  543705 cpu.go:282] Add success.
I0322 09:44:43.420537  543705 net.go:648] Add success.
I0322 09:44:43.423524  543705 net.go:770] primary dev: ETH0
I0322 09:44:43.423538  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:44:43.423549  543705 net.go:698] Add success.
I0322 09:44:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:44:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:44:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:44:53.409795  543705 memory.go:184] no items to output this cycle
I0322 09:44:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 09:45:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:45:03.409784  543705 cpu.go:275] no items to output this cycle
I0322 09:45:03.409792  543705 memory.go:184] no items to output this cycle
E0322 09:45:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:45:13.409819  543705 memory.go:191] Add success.
I0322 09:45:13.409829  543705 cpu.go:282] Add success.
W0322 09:45:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:45:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:45:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:45:13.420146  543705 net.go:648] Add success.
I0322 09:45:13.422991  543705 net.go:770] primary dev: ETH0
I0322 09:45:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:45:13.423016  543705 net.go:698] Add success.
I0322 09:45:13.469304  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2e8fe9e-15a2-4130-9e3e-c14f02e61c76","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:45:13.469343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:45:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:45:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:45:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0322 09:45:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:45:14.456620  543705 disk_worker.go:494] system disk:vda1
I0322 09:45:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:45:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:45:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:45:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:45:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:45:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:45:23.409794  543705 memory.go:184] no items to output this cycle
I0322 09:45:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 09:45:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:45:33.409769  543705 memory.go:184] no items to output this cycle
I0322 09:45:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 09:45:34.141665  543705 disk_info.go:125] begin check local disk info of client
I0322 09:45:34.144110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:45:34.144116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000506dc0 0xc000506e00]
I0322 09:45:39.597706  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:45:39.597713  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:45:43.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:45:43.410807  543705 memory.go:191] Add success.
I0322 09:45:43.410005  543705 cpu.go:282] Add success.
I0322 09:45:43.419723  543705 net.go:648] Add success.
I0322 09:45:43.422189  543705 net.go:770] primary dev: ETH0
I0322 09:45:43.422201  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:45:43.422213  543705 net.go:698] Add success.
I0322 09:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:45:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:45:53.409798  543705 memory.go:184] no items to output this cycle
I0322 09:45:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 09:46:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:46:03.409789  543705 memory.go:184] no items to output this cycle
I0322 09:46:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 09:46:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:46:13.409807  543705 memory.go:191] Add success.
I0322 09:46:13.409808  543705 cpu.go:282] Add success.
W0322 09:46:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:46:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:46:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:46:13.420190  543705 net.go:648] Add success.
I0322 09:46:13.422854  543705 net.go:770] primary dev: ETH0
I0322 09:46:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:46:13.422883  543705 net.go:698] Add success.
I0322 09:46:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:46:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:46:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 09:46:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:46:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 09:46:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:46:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:46:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:46:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:46:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:46:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:46:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:46:23.409776  543705 memory.go:184] no items to output this cycle
I0322 09:46:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 09:46:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:46:33.409767  543705 memory.go:184] no items to output this cycle
I0322 09:46:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 09:46:34.145673  543705 disk_info.go:125] begin check local disk info of client
I0322 09:46:34.148154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:46:34.148160  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370f80 0xc000370fc0]
E0322 09:46:43.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:46:43.410724  543705 memory.go:191] Add success.
I0322 09:46:43.409933  543705 cpu.go:282] Add success.
I0322 09:46:43.419773  543705 net.go:648] Add success.
I0322 09:46:43.422479  543705 net.go:770] primary dev: ETH0
I0322 09:46:43.422497  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:46:43.422512  543705 net.go:698] Add success.
I0322 09:46:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:46:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:46:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:46:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:46:53.409778  543705 memory.go:184] no items to output this cycle
I0322 09:46:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 09:47:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:47:03.409769  543705 memory.go:184] no items to output this cycle
I0322 09:47:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:47:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:47:13.409803  543705 memory.go:191] Add success.
I0322 09:47:13.409804  543705 cpu.go:282] Add success.
W0322 09:47:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:47:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:47:13.420303  543705 net.go:648] Add success.
I0322 09:47:13.423479  543705 net.go:770] primary dev: ETH0
I0322 09:47:13.423492  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:47:13.423505  543705 net.go:698] Add success.
I0322 09:47:13.453070  543705 event_worker.go:152] Polling the log file for events...
W0322 09:47:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:47:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 09:47:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:47:14.456942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:47:14.456951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:47:14.456957  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:47:14.457004  543705 disk_worker.go:494] system disk:vda1
I0322 09:47:14.457048  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:47:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:47:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:47:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:47:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:47:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:47:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:47:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:47:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:47:23.409767  543705 memory.go:184] no items to output this cycle
I0322 09:47:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 09:47:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:47:33.409807  543705 memory.go:184] no items to output this cycle
I0322 09:47:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 09:47:34.149679  543705 disk_info.go:125] begin check local disk info of client
I0322 09:47:34.152113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:47:34.152119  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2f00 0xc0004b2f40]
E0322 09:47:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:47:43.410577  543705 memory.go:191] Add success.
I0322 09:47:43.409804  543705 cpu.go:282] Add success.
I0322 09:47:43.420502  543705 net.go:648] Add success.
I0322 09:47:43.423159  543705 net.go:770] primary dev: ETH0
I0322 09:47:43.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:47:43.423192  543705 net.go:698] Add success.
I0322 09:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:47:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:47:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:47:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:47:53.409779  543705 memory.go:184] no items to output this cycle
I0322 09:47:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 09:48:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:48:03.409787  543705 memory.go:184] no items to output this cycle
I0322 09:48:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 09:48:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:48:13.409811  543705 memory.go:191] Add success.
I0322 09:48:13.409815  543705 cpu.go:282] Add success.
W0322 09:48:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:48:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:48:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:48:13.420143  543705 net.go:648] Add success.
I0322 09:48:13.422976  543705 net.go:770] primary dev: ETH0
I0322 09:48:13.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:48:13.423002  543705 net.go:698] Add success.
I0322 09:48:13.463282  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d1f85400-f90a-4f0d-b719-c1b5218feff5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:48:13.463318  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:48:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:48:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:48:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 09:48:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:48:14.456622  543705 disk_worker.go:494] system disk:vda1
I0322 09:48:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:48:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:48:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:48:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:48:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:48:23.409762  543705 memory.go:184] no items to output this cycle
I0322 09:48:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:48:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:48:33.409792  543705 memory.go:184] no items to output this cycle
I0322 09:48:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 09:48:34.153668  543705 disk_info.go:125] begin check local disk info of client
I0322 09:48:34.156153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:48:34.156159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0980 0xc0003c09c0]
I0322 09:48:39.598692  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:48:39.598698  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:48:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:48:43.410717  543705 memory.go:191] Add success.
I0322 09:48:43.409804  543705 cpu.go:282] Add success.
I0322 09:48:43.420640  543705 net.go:648] Add success.
I0322 09:48:43.423466  543705 net.go:770] primary dev: ETH0
I0322 09:48:43.423480  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:48:43.423492  543705 net.go:698] Add success.
I0322 09:48:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:48:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:48:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:48:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:48:53.409803  543705 memory.go:184] no items to output this cycle
I0322 09:48:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 09:49:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:49:03.409778  543705 memory.go:184] no items to output this cycle
I0322 09:49:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 09:49:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:49:13.409790  543705 memory.go:191] Add success.
W0322 09:49:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 09:49:13.409817  543705 cpu.go:282] Add success.
W0322 09:49:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:49:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:49:13.420134  543705 net.go:648] Add success.
I0322 09:49:13.423352  543705 net.go:770] primary dev: ETH0
I0322 09:49:13.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:49:13.423378  543705 net.go:698] Add success.
I0322 09:49:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:49:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:49:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 09:49:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:49:14.456521  543705 disk_worker.go:494] system disk:vda1
I0322 09:49:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:49:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:49:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:49:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:49:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:49:16.472490  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:49:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:49:23.409798  543705 memory.go:184] no items to output this cycle
I0322 09:49:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 09:49:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:49:33.409803  543705 memory.go:184] no items to output this cycle
I0322 09:49:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 09:49:34.157673  543705 disk_info.go:125] begin check local disk info of client
I0322 09:49:34.160140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:49:34.160146  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf280 0xc0003bf2c0]
E0322 09:49:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:49:43.410717  543705 memory.go:191] Add success.
I0322 09:49:43.409810  543705 cpu.go:282] Add success.
I0322 09:49:43.420565  543705 net.go:648] Add success.
I0322 09:49:43.423324  543705 net.go:770] primary dev: ETH0
I0322 09:49:43.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:49:43.423350  543705 net.go:698] Add success.
I0322 09:49:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:49:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:49:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:49:53.410413  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:49:53.410432  543705 memory.go:184] no items to output this cycle
I0322 09:49:53.410444  543705 cpu.go:275] no items to output this cycle
E0322 09:50:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:50:03.409788  543705 memory.go:184] no items to output this cycle
I0322 09:50:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 09:50:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:50:13.409817  543705 memory.go:191] Add success.
I0322 09:50:13.409825  543705 cpu.go:282] Add success.
W0322 09:50:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:50:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:50:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:50:13.420204  543705 net.go:648] Add success.
I0322 09:50:13.422876  543705 net.go:770] primary dev: ETH0
I0322 09:50:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:50:13.422903  543705 net.go:698] Add success.
I0322 09:50:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:50:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:50:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 09:50:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:50:14.456651  543705 disk_worker.go:494] system disk:vda1
I0322 09:50:14.456683  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:50:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:50:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:50:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:50:23.409804  543705 memory.go:184] no items to output this cycle
I0322 09:50:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 09:50:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:50:33.409778  543705 memory.go:184] no items to output this cycle
I0322 09:50:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 09:50:34.161669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:50:34.164141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:50:34.164147  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046cac0 0xc00046cb00]
E0322 09:50:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:50:43.410700  543705 memory.go:191] Add success.
I0322 09:50:43.409814  543705 cpu.go:282] Add success.
I0322 09:50:43.420403  543705 net.go:648] Add success.
I0322 09:50:43.423274  543705 net.go:770] primary dev: ETH0
I0322 09:50:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:50:43.423300  543705 net.go:698] Add success.
I0322 09:50:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:50:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:50:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:50:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:50:53.409767  543705 memory.go:184] no items to output this cycle
I0322 09:50:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 09:51:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:51:03.409798  543705 memory.go:184] no items to output this cycle
I0322 09:51:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 09:51:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:51:13.409794  543705 memory.go:191] Add success.
I0322 09:51:13.409811  543705 cpu.go:282] Add success.
W0322 09:51:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:51:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:51:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:51:13.420187  543705 net.go:648] Add success.
I0322 09:51:13.422986  543705 net.go:770] primary dev: ETH0
I0322 09:51:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:51:13.423013  543705 net.go:698] Add success.
I0322 09:51:13.464187  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8dfd03f3-992e-402a-a434-8215d3ef0b95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:51:13.464222  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:51:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:51:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:51:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 09:51:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:51:14.456708  543705 disk_worker.go:494] system disk:vda1
I0322 09:51:14.456738  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:51:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:51:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:51:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:51:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:51:23.409773  543705 memory.go:184] no items to output this cycle
I0322 09:51:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 09:51:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:51:33.409800  543705 memory.go:184] no items to output this cycle
I0322 09:51:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 09:51:34.165672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:51:34.168163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:51:34.168168  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8dc0 0xc0003d8e00]
I0322 09:51:39.599701  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:51:39.599708  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:51:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:51:43.410649  543705 memory.go:191] Add success.
I0322 09:51:43.409817  543705 cpu.go:282] Add success.
I0322 09:51:43.420364  543705 net.go:648] Add success.
I0322 09:51:43.423103  543705 net.go:770] primary dev: ETH0
I0322 09:51:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:51:43.423135  543705 net.go:698] Add success.
I0322 09:51:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:51:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:51:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:51:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:51:53.409897  543705 memory.go:184] no items to output this cycle
I0322 09:51:53.409920  543705 cpu.go:275] no items to output this cycle
E0322 09:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:52:03.409787  543705 memory.go:184] no items to output this cycle
I0322 09:52:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 09:52:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:52:13.409824  543705 memory.go:191] Add success.
I0322 09:52:13.409834  543705 cpu.go:282] Add success.
W0322 09:52:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:52:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:52:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:52:13.420600  543705 net.go:648] Add success.
I0322 09:52:13.423468  543705 net.go:770] primary dev: ETH0
I0322 09:52:13.423481  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:52:13.423493  543705 net.go:698] Add success.
W0322 09:52:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:52:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 09:52:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:52:14.456814  543705 disk_worker.go:494] system disk:vda1
I0322 09:52:14.456855  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:52:14.457188  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:52:14.457196  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:52:14.457201  543705 custom_config.go:64] query custom config with name: gpu
E0322 09:52:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:52:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:52:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:52:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:52:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:52:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:52:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:52:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:52:23.409792  543705 memory.go:184] no items to output this cycle
I0322 09:52:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 09:52:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:52:33.409775  543705 memory.go:184] no items to output this cycle
I0322 09:52:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 09:52:34.169679  543705 disk_info.go:125] begin check local disk info of client
I0322 09:52:34.172084  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:52:34.172091  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004821c0 0xc000482200]
E0322 09:52:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:52:43.410677  543705 memory.go:191] Add success.
I0322 09:52:43.409810  543705 cpu.go:282] Add success.
I0322 09:52:43.420365  543705 net.go:648] Add success.
I0322 09:52:43.423176  543705 net.go:770] primary dev: ETH0
I0322 09:52:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:52:43.423204  543705 net.go:698] Add success.
I0322 09:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:52:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:52:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:52:53.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:52:53.409891  543705 memory.go:184] no items to output this cycle
I0322 09:52:53.409970  543705 cpu.go:275] no items to output this cycle
E0322 09:53:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:53:03.409768  543705 memory.go:184] no items to output this cycle
I0322 09:53:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 09:53:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:53:13.409822  543705 memory.go:191] Add success.
I0322 09:53:13.409826  543705 cpu.go:282] Add success.
W0322 09:53:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:53:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:53:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:53:13.420257  543705 net.go:648] Add success.
I0322 09:53:13.422987  543705 net.go:770] primary dev: ETH0
I0322 09:53:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:53:13.423011  543705 net.go:698] Add success.
I0322 09:53:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:53:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:53:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 09:53:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:53:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 09:53:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:53:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:53:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:53:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:53:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:53:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:53:23.409776  543705 memory.go:184] no items to output this cycle
I0322 09:53:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 09:53:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:53:33.409800  543705 memory.go:184] no items to output this cycle
I0322 09:53:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 09:53:34.173671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:53:34.176315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:53:34.176321  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469380 0xc0004693c0]
E0322 09:53:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:53:43.410623  543705 memory.go:191] Add success.
I0322 09:53:43.409818  543705 cpu.go:282] Add success.
I0322 09:53:43.420308  543705 net.go:648] Add success.
I0322 09:53:43.422897  543705 net.go:770] primary dev: ETH0
I0322 09:53:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:53:43.422922  543705 net.go:698] Add success.
I0322 09:53:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:53:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:53:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:53:53.409781  543705 memory.go:184] no items to output this cycle
I0322 09:53:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 09:54:03.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:54:03.409918  543705 memory.go:184] no items to output this cycle
I0322 09:54:03.409930  543705 cpu.go:275] no items to output this cycle
E0322 09:54:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:54:13.409803  543705 memory.go:191] Add success.
I0322 09:54:13.409816  543705 cpu.go:282] Add success.
W0322 09:54:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:54:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:54:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:54:13.420179  543705 net.go:648] Add success.
I0322 09:54:13.423040  543705 net.go:770] primary dev: ETH0
I0322 09:54:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:54:13.423068  543705 net.go:698] Add success.
I0322 09:54:13.464148  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"048458c5-3f24-4fd8-9d42-41c1b1d24e7f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:54:13.464181  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 09:54:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:54:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:54:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 09:54:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:54:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 09:54:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:54:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:54:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:54:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:54:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:54:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:54:23.409805  543705 memory.go:184] no items to output this cycle
I0322 09:54:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 09:54:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:54:33.409810  543705 memory.go:184] no items to output this cycle
I0322 09:54:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 09:54:34.177674  543705 disk_info.go:125] begin check local disk info of client
I0322 09:54:34.180131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:54:34.180138  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a740 0xc00029a780]
I0322 09:54:39.600695  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:54:39.600702  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:54:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:54:43.410615  543705 memory.go:191] Add success.
I0322 09:54:43.409808  543705 cpu.go:282] Add success.
I0322 09:54:43.420376  543705 net.go:648] Add success.
I0322 09:54:43.423064  543705 net.go:770] primary dev: ETH0
I0322 09:54:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:54:43.423089  543705 net.go:698] Add success.
I0322 09:54:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:54:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:54:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:54:53.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:54:53.409903  543705 cpu.go:275] no items to output this cycle
I0322 09:54:53.409907  543705 memory.go:184] no items to output this cycle
E0322 09:55:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:55:03.409785  543705 memory.go:184] no items to output this cycle
I0322 09:55:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 09:55:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:55:13.409789  543705 memory.go:191] Add success.
W0322 09:55:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:55:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:55:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:55:13.409830  543705 cpu.go:282] Add success.
I0322 09:55:13.420175  543705 net.go:648] Add success.
I0322 09:55:13.422919  543705 net.go:770] primary dev: ETH0
I0322 09:55:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:55:13.422954  543705 net.go:698] Add success.
I0322 09:55:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:55:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:55:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 09:55:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:55:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 09:55:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:55:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:55:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:55:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:55:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:55:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:55:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:55:23.409785  543705 memory.go:184] no items to output this cycle
I0322 09:55:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 09:55:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:55:33.409812  543705 memory.go:184] no items to output this cycle
I0322 09:55:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 09:55:34.181671  543705 disk_info.go:125] begin check local disk info of client
I0322 09:55:34.184176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:55:34.184181  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492900 0xc000492940]
E0322 09:55:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:55:43.410695  543705 memory.go:191] Add success.
I0322 09:55:43.409817  543705 cpu.go:282] Add success.
I0322 09:55:43.420412  543705 net.go:648] Add success.
I0322 09:55:43.423327  543705 net.go:770] primary dev: ETH0
I0322 09:55:43.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:55:43.423352  543705 net.go:698] Add success.
I0322 09:55:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:55:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:55:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:55:53.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:55:53.409869  543705 memory.go:184] no items to output this cycle
I0322 09:55:53.409961  543705 cpu.go:275] no items to output this cycle
E0322 09:56:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:56:03.409821  543705 memory.go:184] no items to output this cycle
I0322 09:56:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 09:56:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:56:13.409794  543705 memory.go:191] Add success.
I0322 09:56:13.409813  543705 cpu.go:282] Add success.
W0322 09:56:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:56:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:56:13.420234  543705 net.go:648] Add success.
I0322 09:56:13.423408  543705 net.go:770] primary dev: ETH0
I0322 09:56:13.423423  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:56:13.423437  543705 net.go:698] Add success.
I0322 09:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:56:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:56:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 09:56:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:56:14.456636  543705 disk_worker.go:494] system disk:vda1
I0322 09:56:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:56:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:56:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:56:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:56:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:56:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:56:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:56:23.409759  543705 memory.go:184] no items to output this cycle
I0322 09:56:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 09:56:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:56:33.409803  543705 memory.go:184] no items to output this cycle
I0322 09:56:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 09:56:34.185669  543705 disk_info.go:125] begin check local disk info of client
I0322 09:56:34.188230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:56:34.188236  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe800 0xc0003fe840]
E0322 09:56:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:56:43.410671  543705 memory.go:191] Add success.
I0322 09:56:43.409800  543705 cpu.go:282] Add success.
I0322 09:56:43.420445  543705 net.go:648] Add success.
I0322 09:56:43.423157  543705 net.go:770] primary dev: ETH0
I0322 09:56:43.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:56:43.423181  543705 net.go:698] Add success.
I0322 09:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:56:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:56:53.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:56:53.410255  543705 cpu.go:275] no items to output this cycle
I0322 09:56:53.410269  543705 memory.go:184] no items to output this cycle
E0322 09:57:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:57:03.409774  543705 memory.go:184] no items to output this cycle
I0322 09:57:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 09:57:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:57:13.409793  543705 memory.go:191] Add success.
I0322 09:57:13.409809  543705 cpu.go:282] Add success.
W0322 09:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:57:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:57:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:57:13.420058  543705 net.go:648] Add success.
I0322 09:57:13.422941  543705 net.go:770] primary dev: ETH0
I0322 09:57:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:57:13.422967  543705 net.go:698] Add success.
I0322 09:57:13.428933  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 09:57:13.453169  543705 event_worker.go:152] Polling the log file for events...
I0322 09:57:13.470479  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ae94b5c-2c47-404f-9c41-1a0508c250b6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 09:57:13.470514  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 09:57:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:57:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 09:57:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0322 09:57:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 09:57:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 09:57:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0322 09:57:14.456850  543705 disk_worker.go:494] system disk:vda1
I0322 09:57:14.456903  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 09:57:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 09:57:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:57:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 09:57:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 09:57:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:57:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:57:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:57:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:57:23.409772  543705 memory.go:184] no items to output this cycle
I0322 09:57:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 09:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:57:33.409779  543705 memory.go:184] no items to output this cycle
I0322 09:57:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 09:57:34.189672  543705 disk_info.go:125] begin check local disk info of client
I0322 09:57:34.192139  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:57:34.192144  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9240 0xc0004a9280]
I0322 09:57:39.601717  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 09:57:39.601723  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 09:57:43.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:57:43.411027  543705 memory.go:191] Add success.
I0322 09:57:43.409970  543705 cpu.go:282] Add success.
I0322 09:57:43.419706  543705 net.go:648] Add success.
I0322 09:57:43.422731  543705 net.go:770] primary dev: ETH0
I0322 09:57:43.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:57:43.422756  543705 net.go:698] Add success.
I0322 09:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:57:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:57:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:57:53.409774  543705 memory.go:184] no items to output this cycle
I0322 09:57:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 09:58:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:58:03.409793  543705 memory.go:184] no items to output this cycle
I0322 09:58:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 09:58:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:58:13.409803  543705 memory.go:191] Add success.
I0322 09:58:13.409804  543705 cpu.go:282] Add success.
W0322 09:58:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:58:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:58:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:58:13.420155  543705 net.go:648] Add success.
I0322 09:58:13.423133  543705 net.go:770] primary dev: ETH0
I0322 09:58:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:58:13.423162  543705 net.go:698] Add success.
I0322 09:58:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:58:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:58:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 09:58:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:58:14.456505  543705 disk_worker.go:494] system disk:vda1
I0322 09:58:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:58:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:58:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:58:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:58:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:58:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:58:23.409776  543705 memory.go:184] no items to output this cycle
I0322 09:58:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 09:58:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:58:33.409807  543705 memory.go:184] no items to output this cycle
I0322 09:58:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 09:58:34.192227  543705 disk_info.go:125] begin check local disk info of client
I0322 09:58:34.194768  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:58:34.194774  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f0cc0 0xc0000f0d00]
E0322 09:58:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:58:43.410775  543705 memory.go:191] Add success.
I0322 09:58:43.409806  543705 cpu.go:282] Add success.
I0322 09:58:43.420521  543705 net.go:648] Add success.
I0322 09:58:43.423179  543705 net.go:770] primary dev: ETH0
I0322 09:58:43.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:58:43.423204  543705 net.go:698] Add success.
I0322 09:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:58:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:58:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:58:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:58:53.409777  543705 memory.go:184] no items to output this cycle
I0322 09:58:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 09:59:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:59:03.409796  543705 memory.go:184] no items to output this cycle
I0322 09:59:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 09:59:13.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:59:13.409851  543705 memory.go:191] Add success.
I0322 09:59:13.409861  543705 cpu.go:282] Add success.
W0322 09:59:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 09:59:13.409899  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 09:59:13.409904  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 09:59:13.420270  543705 net.go:648] Add success.
I0322 09:59:13.423075  543705 net.go:770] primary dev: ETH0
I0322 09:59:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:59:13.423109  543705 net.go:698] Add success.
I0322 09:59:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 09:59:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 09:59:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 09:59:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 09:59:14.456595  543705 disk_worker.go:494] system disk:vda1
I0322 09:59:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 09:59:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 09:59:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:59:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:59:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 09:59:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0322 09:59:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:59:23.409803  543705 memory.go:184] no items to output this cycle
I0322 09:59:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 09:59:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:59:33.409786  543705 memory.go:184] no items to output this cycle
I0322 09:59:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 09:59:34.194862  543705 disk_info.go:125] begin check local disk info of client
I0322 09:59:34.197336  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 09:59:34.197342  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ccc00 0xc0004ccc40]
E0322 09:59:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:59:43.410837  543705 memory.go:191] Add success.
I0322 09:59:43.409811  543705 cpu.go:282] Add success.
I0322 09:59:43.420605  543705 net.go:648] Add success.
I0322 09:59:43.423769  543705 net.go:770] primary dev: ETH0
I0322 09:59:43.423785  543705 net.go:802] Send network stats successfully!,count is 6
I0322 09:59:43.423799  543705 net.go:698] Add success.
I0322 09:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 09:59:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 09:59:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 09:59:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 09:59:53.409805  543705 memory.go:184] no items to output this cycle
I0322 09:59:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 10:00:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:00:03.409785  543705 memory.go:184] no items to output this cycle
I0322 10:00:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 10:00:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:00:13.409798  543705 memory.go:191] Add success.
I0322 10:00:13.409822  543705 cpu.go:282] Add success.
W0322 10:00:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:00:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:00:13.420185  543705 net.go:648] Add success.
I0322 10:00:13.423145  543705 net.go:770] primary dev: ETH0
I0322 10:00:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:00:13.423173  543705 net.go:698] Add success.
I0322 10:00:13.464480  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ac959a8-ee20-4288-884c-ee6e4d5e2e7e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:00:13.464512  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:00:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:00:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:00:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 10:00:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:00:14.456542  543705 disk_worker.go:494] system disk:vda1
I0322 10:00:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:00:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:00:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:00:23.410398  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:00:23.410413  543705 memory.go:184] no items to output this cycle
I0322 10:00:23.410422  543705 cpu.go:275] no items to output this cycle
E0322 10:00:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:00:33.409784  543705 memory.go:184] no items to output this cycle
I0322 10:00:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 10:00:34.197662  543705 disk_info.go:125] begin check local disk info of client
I0322 10:00:34.200234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:00:34.200241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500200 0xc000500240]
I0322 10:00:39.602710  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:00:39.602717  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:00:43.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:00:43.410771  543705 memory.go:191] Add success.
I0322 10:00:43.410158  543705 cpu.go:282] Add success.
I0322 10:00:43.419736  543705 net.go:648] Add success.
I0322 10:00:43.422355  543705 net.go:770] primary dev: ETH0
I0322 10:00:43.422369  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:00:43.422380  543705 net.go:698] Add success.
I0322 10:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:00:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:00:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:00:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:00:53.409768  543705 memory.go:184] no items to output this cycle
I0322 10:00:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 10:01:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:01:03.409769  543705 memory.go:184] no items to output this cycle
I0322 10:01:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 10:01:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:01:13.409802  543705 memory.go:191] Add success.
I0322 10:01:13.409807  543705 cpu.go:282] Add success.
W0322 10:01:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:01:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:01:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:01:13.420180  543705 net.go:648] Add success.
I0322 10:01:13.422724  543705 net.go:770] primary dev: ETH0
I0322 10:01:13.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:01:13.422750  543705 net.go:698] Add success.
I0322 10:01:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:01:14.455083  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:01:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0322 10:01:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:01:14.456485  543705 disk_worker.go:494] system disk:vda1
I0322 10:01:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:01:16.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:01:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:01:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:01:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:01:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:01:23.409779  543705 memory.go:184] no items to output this cycle
I0322 10:01:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:01:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:01:33.409767  543705 memory.go:184] no items to output this cycle
I0322 10:01:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 10:01:34.201676  543705 disk_info.go:125] begin check local disk info of client
I0322 10:01:34.204209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:01:34.204215  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343bc0 0xc000343c00]
E0322 10:01:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:01:43.410757  543705 memory.go:191] Add success.
I0322 10:01:43.410035  543705 cpu.go:282] Add success.
I0322 10:01:43.419747  543705 net.go:648] Add success.
I0322 10:01:43.422642  543705 net.go:770] primary dev: ETH0
I0322 10:01:43.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:01:43.422670  543705 net.go:698] Add success.
I0322 10:01:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:01:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:01:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:01:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:01:53.409767  543705 memory.go:184] no items to output this cycle
I0322 10:01:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 10:02:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:02:03.409807  543705 memory.go:184] no items to output this cycle
I0322 10:02:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 10:02:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:02:13.409788  543705 memory.go:191] Add success.
I0322 10:02:13.409807  543705 cpu.go:282] Add success.
W0322 10:02:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:02:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:02:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:02:13.420197  543705 net.go:648] Add success.
I0322 10:02:13.423140  543705 net.go:770] primary dev: ETH0
I0322 10:02:13.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:02:13.423179  543705 net.go:698] Add success.
W0322 10:02:14.455229  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:02:14.455242  543705 disk_worker.go:708] disk space is not compliant
W0322 10:02:14.455246  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:02:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:02:14.455913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:02:14.455919  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:02:14.456835  543705 disk_worker.go:494] system disk:vda1
I0322 10:02:14.456863  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:02:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:02:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:02:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:02:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:02:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:02:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:02:16.472310  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:02:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:02:23.409795  543705 memory.go:184] no items to output this cycle
I0322 10:02:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 10:02:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:02:33.409770  543705 memory.go:184] no items to output this cycle
I0322 10:02:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 10:02:34.205672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:02:34.208178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:02:34.208184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf8c0 0xc0003bf900]
E0322 10:02:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:02:43.409797  543705 cpu.go:282] Add success.
I0322 10:02:43.410752  543705 memory.go:191] Add success.
I0322 10:02:43.419708  543705 net.go:648] Add success.
I0322 10:02:43.422341  543705 net.go:770] primary dev: ETH0
I0322 10:02:43.422354  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:02:43.422365  543705 net.go:698] Add success.
I0322 10:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:02:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:02:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:02:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:02:53.409790  543705 memory.go:184] no items to output this cycle
I0322 10:02:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 10:03:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:03:03.409784  543705 memory.go:184] no items to output this cycle
I0322 10:03:03.409788  543705 cpu.go:275] no items to output this cycle
W0322 10:03:13.409724  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:03:13.409747  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:03:13.409753  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:03:13.409841  543705 cpu.go:282] Add success.
E0322 10:03:13.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:03:13.409872  543705 memory.go:191] Add success.
I0322 10:03:13.420051  543705 net.go:648] Add success.
I0322 10:03:13.422706  543705 net.go:770] primary dev: ETH0
I0322 10:03:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:03:13.422731  543705 net.go:698] Add success.
I0322 10:03:13.468273  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"375576cf-c273-409b-9b74-22ee054dc582","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:03:13.468307  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:03:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:03:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:03:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 10:03:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:03:14.456697  543705 disk_worker.go:494] system disk:vda1
I0322 10:03:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:03:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:03:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:03:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:03:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:03:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:03:23.409764  543705 memory.go:184] no items to output this cycle
I0322 10:03:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:03:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:03:33.409781  543705 memory.go:184] no items to output this cycle
I0322 10:03:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 10:03:34.209674  543705 disk_info.go:125] begin check local disk info of client
I0322 10:03:34.212214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:03:34.212219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e1740 0xc0003e1780]
I0322 10:03:39.603711  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:03:39.603717  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:03:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:03:43.410924  543705 memory.go:191] Add success.
I0322 10:03:43.409900  543705 cpu.go:282] Add success.
I0322 10:03:43.419710  543705 net.go:648] Add success.
I0322 10:03:43.422578  543705 net.go:770] primary dev: ETH0
I0322 10:03:43.422591  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:03:43.422622  543705 net.go:698] Add success.
I0322 10:03:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:03:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:03:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:03:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:03:53.409804  543705 memory.go:184] no items to output this cycle
I0322 10:03:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 10:04:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:04:03.409813  543705 memory.go:184] no items to output this cycle
I0322 10:04:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 10:04:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:04:13.409801  543705 memory.go:191] Add success.
I0322 10:04:13.409806  543705 cpu.go:282] Add success.
W0322 10:04:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:04:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:04:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:04:13.420120  543705 net.go:648] Add success.
I0322 10:04:13.423351  543705 net.go:770] primary dev: ETH0
I0322 10:04:13.423364  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:04:13.423376  543705 net.go:698] Add success.
I0322 10:04:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:04:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:04:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 10:04:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:04:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 10:04:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:04:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:04:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:04:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:04:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:04:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:04:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:04:23.409788  543705 memory.go:184] no items to output this cycle
I0322 10:04:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:04:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:04:33.409803  543705 memory.go:184] no items to output this cycle
I0322 10:04:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 10:04:34.213672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:04:34.216136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:04:34.216141  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fb80 0xc00037fbc0]
E0322 10:04:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:04:43.410827  543705 memory.go:191] Add success.
I0322 10:04:43.409794  543705 cpu.go:282] Add success.
I0322 10:04:43.420697  543705 net.go:648] Add success.
I0322 10:04:43.423433  543705 net.go:770] primary dev: ETH0
I0322 10:04:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:04:43.423457  543705 net.go:698] Add success.
I0322 10:04:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:04:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:04:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:04:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:04:53.409799  543705 memory.go:184] no items to output this cycle
I0322 10:04:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 10:05:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:05:03.409807  543705 memory.go:184] no items to output this cycle
I0322 10:05:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 10:05:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:05:13.409809  543705 memory.go:191] Add success.
I0322 10:05:13.409810  543705 cpu.go:282] Add success.
W0322 10:05:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:05:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:05:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:05:13.420151  543705 net.go:648] Add success.
I0322 10:05:13.423094  543705 net.go:770] primary dev: ETH0
I0322 10:05:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:05:13.423119  543705 net.go:698] Add success.
I0322 10:05:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:05:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:05:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 10:05:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:05:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 10:05:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:05:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:05:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:05:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:05:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:05:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:05:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:05:23.409801  543705 memory.go:184] no items to output this cycle
I0322 10:05:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 10:05:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:05:33.409769  543705 memory.go:184] no items to output this cycle
I0322 10:05:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 10:05:34.217668  543705 disk_info.go:125] begin check local disk info of client
I0322 10:05:34.220154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:05:34.220160  543705 disk_info.go:196] parse disk info done, disk is : [0xc000345680 0xc0003456c0]
E0322 10:05:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:05:43.410742  543705 memory.go:191] Add success.
I0322 10:05:43.409801  543705 cpu.go:282] Add success.
I0322 10:05:43.420460  543705 net.go:648] Add success.
I0322 10:05:43.423244  543705 net.go:770] primary dev: ETH0
I0322 10:05:43.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:05:43.423271  543705 net.go:698] Add success.
I0322 10:05:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:05:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:05:53.409763  543705 memory.go:184] no items to output this cycle
I0322 10:05:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:06:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:06:03.409775  543705 memory.go:184] no items to output this cycle
I0322 10:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 10:06:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:06:13.409800  543705 memory.go:191] Add success.
I0322 10:06:13.409805  543705 cpu.go:282] Add success.
W0322 10:06:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:06:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:06:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:06:13.420135  543705 net.go:648] Add success.
I0322 10:06:13.422957  543705 net.go:770] primary dev: ETH0
I0322 10:06:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:06:13.422985  543705 net.go:698] Add success.
I0322 10:06:13.469026  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3dd85893-8f0c-45c7-8c7d-2d0edbe32a83","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:06:13.469071  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:06:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:06:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:06:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 10:06:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:06:14.456684  543705 disk_worker.go:494] system disk:vda1
I0322 10:06:14.456715  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:06:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:06:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:06:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:06:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:06:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:06:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:06:23.409774  543705 memory.go:184] no items to output this cycle
I0322 10:06:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 10:06:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:06:33.409798  543705 memory.go:184] no items to output this cycle
I0322 10:06:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 10:06:34.221669  543705 disk_info.go:125] begin check local disk info of client
I0322 10:06:34.224212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:06:34.224218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536b80 0xc000536bc0]
I0322 10:06:39.604720  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:06:39.604727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:06:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:06:43.410669  543705 memory.go:191] Add success.
I0322 10:06:43.409796  543705 cpu.go:282] Add success.
I0322 10:06:43.420370  543705 net.go:648] Add success.
I0322 10:06:43.423048  543705 net.go:770] primary dev: ETH0
I0322 10:06:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:06:43.423073  543705 net.go:698] Add success.
I0322 10:06:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:06:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:06:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:06:53.409767  543705 memory.go:184] no items to output this cycle
I0322 10:06:53.409785  543705 cpu.go:275] no items to output this cycle
I0322 10:07:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 10:07:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:07:03.409804  543705 memory.go:184] no items to output this cycle
E0322 10:07:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:07:13.409820  543705 memory.go:191] Add success.
I0322 10:07:13.409832  543705 cpu.go:282] Add success.
W0322 10:07:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:07:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:07:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:07:13.420573  543705 net.go:648] Add success.
I0322 10:07:13.423256  543705 net.go:770] primary dev: ETH0
I0322 10:07:13.423269  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:07:13.423281  543705 net.go:698] Add success.
I0322 10:07:13.452784  543705 event_worker.go:152] Polling the log file for events...
W0322 10:07:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:07:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 10:07:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:07:14.455888  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:07:14.455896  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:07:14.455902  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:07:14.456628  543705 disk_worker.go:494] system disk:vda1
I0322 10:07:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:07:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:07:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:07:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:07:16.457984  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:07:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:07:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:07:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:07:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:07:23.409776  543705 memory.go:184] no items to output this cycle
I0322 10:07:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:07:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:07:33.409798  543705 memory.go:184] no items to output this cycle
I0322 10:07:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 10:07:34.225672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:07:34.228267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:07:34.228274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344b80 0xc000344bc0]
E0322 10:07:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:07:43.410709  543705 memory.go:191] Add success.
I0322 10:07:43.409803  543705 cpu.go:282] Add success.
I0322 10:07:43.420507  543705 net.go:648] Add success.
I0322 10:07:43.423747  543705 net.go:770] primary dev: ETH0
I0322 10:07:43.423761  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:07:43.423775  543705 net.go:698] Add success.
I0322 10:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:07:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:07:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:07:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:07:53.409773  543705 memory.go:184] no items to output this cycle
I0322 10:07:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:08:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:08:03.409810  543705 memory.go:184] no items to output this cycle
I0322 10:08:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 10:08:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:08:13.409784  543705 memory.go:191] Add success.
I0322 10:08:13.409807  543705 cpu.go:282] Add success.
W0322 10:08:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:08:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:08:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:08:13.420155  543705 net.go:648] Add success.
I0322 10:08:13.423275  543705 net.go:770] primary dev: ETH0
I0322 10:08:13.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:08:13.423301  543705 net.go:698] Add success.
I0322 10:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:08:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:08:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 10:08:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:08:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 10:08:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:08:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:08:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:08:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:08:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:08:23.409779  543705 memory.go:184] no items to output this cycle
I0322 10:08:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:08:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:08:33.409768  543705 memory.go:184] no items to output this cycle
I0322 10:08:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 10:08:34.229676  543705 disk_info.go:125] begin check local disk info of client
I0322 10:08:34.232156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:08:34.232161  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049c540 0xc00049c580]
E0322 10:08:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:08:43.410671  543705 memory.go:191] Add success.
I0322 10:08:43.409822  543705 cpu.go:282] Add success.
I0322 10:08:43.420510  543705 net.go:648] Add success.
I0322 10:08:43.423102  543705 net.go:770] primary dev: ETH0
I0322 10:08:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:08:43.423126  543705 net.go:698] Add success.
I0322 10:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:08:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:08:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:08:53.409799  543705 memory.go:184] no items to output this cycle
I0322 10:08:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 10:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:09:03.409786  543705 memory.go:184] no items to output this cycle
I0322 10:09:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 10:09:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:09:13.409830  543705 memory.go:191] Add success.
I0322 10:09:13.409837  543705 cpu.go:282] Add success.
W0322 10:09:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:09:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:09:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:09:13.420166  543705 net.go:648] Add success.
I0322 10:09:13.423143  543705 net.go:770] primary dev: ETH0
I0322 10:09:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:09:13.423168  543705 net.go:698] Add success.
I0322 10:09:13.662941  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"979f787f-3537-44cb-8a7c-5bb085079525","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:09:13.662977  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:09:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:09:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:09:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 10:09:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:09:14.456626  543705 disk_worker.go:494] system disk:vda1
I0322 10:09:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:09:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:09:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:09:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:09:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:09:23.409815  543705 memory.go:184] no items to output this cycle
I0322 10:09:23.409823  543705 cpu.go:275] no items to output this cycle
E0322 10:09:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:09:33.409777  543705 memory.go:184] no items to output this cycle
I0322 10:09:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 10:09:34.233673  543705 disk_info.go:125] begin check local disk info of client
I0322 10:09:34.236156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:09:34.236161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003af640 0xc0003af680]
I0322 10:09:39.605738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:09:39.605745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:09:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:09:43.410672  543705 memory.go:191] Add success.
I0322 10:09:43.409817  543705 cpu.go:282] Add success.
I0322 10:09:43.420399  543705 net.go:648] Add success.
I0322 10:09:43.422894  543705 net.go:770] primary dev: ETH0
I0322 10:09:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:09:43.422922  543705 net.go:698] Add success.
I0322 10:09:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:09:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:09:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:09:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:09:53.409782  543705 memory.go:184] no items to output this cycle
I0322 10:09:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:10:03.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:10:03.409918  543705 memory.go:184] no items to output this cycle
I0322 10:10:03.409976  543705 cpu.go:275] no items to output this cycle
E0322 10:10:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:10:13.409837  543705 memory.go:191] Add success.
I0322 10:10:13.409842  543705 cpu.go:282] Add success.
W0322 10:10:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:10:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:10:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:10:13.420218  543705 net.go:648] Add success.
I0322 10:10:13.422723  543705 net.go:770] primary dev: ETH0
I0322 10:10:13.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:10:13.422747  543705 net.go:698] Add success.
I0322 10:10:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:10:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:10:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 10:10:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:10:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 10:10:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:10:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:10:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:10:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:10:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:10:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:10:23.409784  543705 memory.go:184] no items to output this cycle
I0322 10:10:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 10:10:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:10:33.409772  543705 memory.go:184] no items to output this cycle
I0322 10:10:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 10:10:34.237672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:10:34.240251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:10:34.240256  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034eb40 0xc00034eb80]
E0322 10:10:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:10:43.410689  543705 memory.go:191] Add success.
I0322 10:10:43.409814  543705 cpu.go:282] Add success.
I0322 10:10:43.420382  543705 net.go:648] Add success.
I0322 10:10:43.422901  543705 net.go:770] primary dev: ETH0
I0322 10:10:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:10:43.422927  543705 net.go:698] Add success.
I0322 10:10:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:10:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:10:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:10:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:10:53.409775  543705 memory.go:184] no items to output this cycle
I0322 10:10:53.409774  543705 cpu.go:275] no items to output this cycle
E0322 10:11:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:11:03.409796  543705 memory.go:184] no items to output this cycle
I0322 10:11:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 10:11:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:11:13.409831  543705 memory.go:191] Add success.
I0322 10:11:13.409834  543705 cpu.go:282] Add success.
W0322 10:11:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:11:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:11:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:11:13.420195  543705 net.go:648] Add success.
I0322 10:11:13.423185  543705 net.go:770] primary dev: ETH0
I0322 10:11:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:11:13.423216  543705 net.go:698] Add success.
I0322 10:11:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:11:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:11:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 10:11:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:11:14.456605  543705 disk_worker.go:494] system disk:vda1
I0322 10:11:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:11:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:11:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:11:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:11:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:11:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:11:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:11:23.409766  543705 memory.go:184] no items to output this cycle
I0322 10:11:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:11:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:11:33.409783  543705 memory.go:184] no items to output this cycle
I0322 10:11:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 10:11:34.241690  543705 disk_info.go:125] begin check local disk info of client
I0322 10:11:34.244238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:11:34.244244  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052c5c0 0xc00052c600]
E0322 10:11:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:11:43.410696  543705 memory.go:191] Add success.
I0322 10:11:43.409807  543705 cpu.go:282] Add success.
I0322 10:11:43.420515  543705 net.go:648] Add success.
I0322 10:11:43.423316  543705 net.go:770] primary dev: ETH0
I0322 10:11:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:11:43.423341  543705 net.go:698] Add success.
I0322 10:11:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:11:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:11:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:11:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:11:53.409795  543705 memory.go:184] no items to output this cycle
I0322 10:11:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 10:12:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:12:03.409782  543705 memory.go:184] no items to output this cycle
I0322 10:12:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:12:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:12:13.409912  543705 memory.go:191] Add success.
I0322 10:12:13.409938  543705 cpu.go:282] Add success.
W0322 10:12:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:12:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:12:13.409968  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:12:13.419717  543705 net.go:648] Add success.
I0322 10:12:13.423153  543705 net.go:770] primary dev: ETH0
I0322 10:12:13.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:12:13.423178  543705 net.go:698] Add success.
I0322 10:12:13.469146  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d70f6b1-961a-4c67-8b68-846e6a42c965","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:12:13.469178  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 10:12:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:12:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 10:12:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:12:14.456148  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:12:14.456158  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:12:14.456163  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:12:14.456520  543705 disk_worker.go:494] system disk:vda1
I0322 10:12:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:12:15.456563  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:12:15.456575  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:12:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:12:16.458000  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:12:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:12:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:12:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:12:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:12:23.409772  543705 memory.go:184] no items to output this cycle
I0322 10:12:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:12:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:12:33.409777  543705 memory.go:184] no items to output this cycle
I0322 10:12:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 10:12:34.245672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:12:34.248181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:12:34.248187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b95c0 0xc0003b9600]
I0322 10:12:39.606729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:12:39.606736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:12:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:12:43.410709  543705 memory.go:191] Add success.
I0322 10:12:43.409802  543705 cpu.go:282] Add success.
I0322 10:12:43.420381  543705 net.go:648] Add success.
I0322 10:12:43.423381  543705 net.go:770] primary dev: ETH0
I0322 10:12:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:12:43.423411  543705 net.go:698] Add success.
I0322 10:12:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:12:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:12:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:12:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:12:53.409765  543705 memory.go:184] no items to output this cycle
I0322 10:12:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:13:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:13:03.409783  543705 memory.go:184] no items to output this cycle
I0322 10:13:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 10:13:13.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:13:13.409902  543705 memory.go:191] Add success.
W0322 10:13:13.409938  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:13:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:13:13.409959  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:13:13.409971  543705 cpu.go:282] Add success.
I0322 10:13:13.419543  543705 net.go:770] primary dev: ETH0
I0322 10:13:13.419557  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:13:13.419568  543705 net.go:698] Add success.
I0322 10:13:13.419811  543705 net.go:648] Add success.
I0322 10:13:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:13:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:13:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 10:13:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:13:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 10:13:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:13:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:13:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:13:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:13:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:13:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:13:23.409771  543705 memory.go:184] no items to output this cycle
I0322 10:13:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:13:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:13:33.409805  543705 memory.go:184] no items to output this cycle
I0322 10:13:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 10:13:34.249672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:13:34.252149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:13:34.252154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e0640 0xc0003e0680]
E0322 10:13:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:13:43.410656  543705 memory.go:191] Add success.
I0322 10:13:43.409807  543705 cpu.go:282] Add success.
I0322 10:13:43.420345  543705 net.go:648] Add success.
I0322 10:13:43.423018  543705 net.go:770] primary dev: ETH0
I0322 10:13:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:13:43.423043  543705 net.go:698] Add success.
I0322 10:13:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:13:53.410362  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:13:53.410364  543705 cpu.go:275] no items to output this cycle
I0322 10:13:53.410375  543705 memory.go:184] no items to output this cycle
E0322 10:14:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:14:03.409807  543705 memory.go:184] no items to output this cycle
I0322 10:14:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 10:14:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:14:13.409789  543705 memory.go:191] Add success.
W0322 10:14:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 10:14:13.409815  543705 cpu.go:282] Add success.
W0322 10:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:14:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:14:13.420530  543705 net.go:648] Add success.
I0322 10:14:13.423202  543705 net.go:770] primary dev: ETH0
I0322 10:14:13.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:14:13.423227  543705 net.go:698] Add success.
I0322 10:14:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:14:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:14:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 10:14:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:14:14.456524  543705 disk_worker.go:494] system disk:vda1
I0322 10:14:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:14:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:14:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:14:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:14:23.409765  543705 memory.go:184] no items to output this cycle
I0322 10:14:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 10:14:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:14:33.409801  543705 memory.go:184] no items to output this cycle
I0322 10:14:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 10:14:34.253672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:14:34.256201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:14:34.256206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4580 0xc0000c45c0]
E0322 10:14:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:14:43.410692  543705 memory.go:191] Add success.
I0322 10:14:43.409795  543705 cpu.go:282] Add success.
I0322 10:14:43.420402  543705 net.go:648] Add success.
I0322 10:14:43.423268  543705 net.go:770] primary dev: ETH0
I0322 10:14:43.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:14:43.423293  543705 net.go:698] Add success.
I0322 10:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:14:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:14:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:14:53.409790  543705 memory.go:184] no items to output this cycle
I0322 10:14:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 10:15:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:15:03.409775  543705 memory.go:184] no items to output this cycle
I0322 10:15:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 10:15:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:15:13.409821  543705 memory.go:191] Add success.
I0322 10:15:13.409828  543705 cpu.go:282] Add success.
W0322 10:15:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:15:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:15:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:15:13.420153  543705 net.go:648] Add success.
I0322 10:15:13.422756  543705 net.go:770] primary dev: ETH0
I0322 10:15:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:15:13.422786  543705 net.go:698] Add success.
I0322 10:15:13.468635  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31be952e-2332-4cce-a53e-4e8ab00bb414","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:15:13.468670  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:15:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:15:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:15:14.455312  543705 disk_worker.go:708] disk space is not compliant
W0322 10:15:14.455316  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:15:14.457240  543705 disk_worker.go:494] system disk:vda1
I0322 10:15:14.457275  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:15:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:15:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:15:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:15:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:15:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:15:23.409768  543705 memory.go:184] no items to output this cycle
I0322 10:15:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:15:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:15:33.409798  543705 memory.go:184] no items to output this cycle
I0322 10:15:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 10:15:34.257671  543705 disk_info.go:125] begin check local disk info of client
I0322 10:15:34.260152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:15:34.260157  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047aac0 0xc00047ab00]
I0322 10:15:39.607732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:15:39.607739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:15:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:15:43.410764  543705 memory.go:191] Add success.
I0322 10:15:43.409806  543705 cpu.go:282] Add success.
I0322 10:15:43.420550  543705 net.go:648] Add success.
I0322 10:15:43.423376  543705 net.go:770] primary dev: ETH0
I0322 10:15:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:15:43.423419  543705 net.go:698] Add success.
I0322 10:15:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:15:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:15:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:15:53.409789  543705 memory.go:184] no items to output this cycle
I0322 10:15:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 10:16:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:16:03.409779  543705 memory.go:184] no items to output this cycle
I0322 10:16:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 10:16:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:16:13.409799  543705 memory.go:191] Add success.
I0322 10:16:13.409804  543705 cpu.go:282] Add success.
W0322 10:16:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:16:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:16:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:16:13.420219  543705 net.go:648] Add success.
I0322 10:16:13.423168  543705 net.go:770] primary dev: ETH0
I0322 10:16:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:16:13.423196  543705 net.go:698] Add success.
I0322 10:16:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:16:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:16:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 10:16:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:16:14.457033  543705 disk_worker.go:494] system disk:vda1
I0322 10:16:14.457063  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:16:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:16:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:16:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:16:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:16:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:16:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:16:23.409792  543705 memory.go:184] no items to output this cycle
I0322 10:16:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 10:16:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:16:33.409779  543705 memory.go:184] no items to output this cycle
I0322 10:16:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 10:16:34.261677  543705 disk_info.go:125] begin check local disk info of client
I0322 10:16:34.264166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:16:34.264171  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052c840 0xc00052c880]
E0322 10:16:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:16:43.410800  543705 memory.go:191] Add success.
I0322 10:16:43.409821  543705 cpu.go:282] Add success.
I0322 10:16:43.420558  543705 net.go:648] Add success.
I0322 10:16:43.423413  543705 net.go:770] primary dev: ETH0
I0322 10:16:43.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:16:43.423448  543705 net.go:698] Add success.
I0322 10:16:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:16:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:16:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:16:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:16:53.409782  543705 memory.go:184] no items to output this cycle
I0322 10:16:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 10:17:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:17:03.409775  543705 memory.go:184] no items to output this cycle
I0322 10:17:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 10:17:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:17:13.409787  543705 memory.go:191] Add success.
I0322 10:17:13.409806  543705 cpu.go:282] Add success.
W0322 10:17:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:17:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:17:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:17:13.420073  543705 net.go:648] Add success.
I0322 10:17:13.422839  543705 net.go:770] primary dev: ETH0
I0322 10:17:13.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:17:13.422866  543705 net.go:698] Add success.
I0322 10:17:13.453413  543705 event_worker.go:152] Polling the log file for events...
W0322 10:17:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:17:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 10:17:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:17:14.455890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:17:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:17:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:17:14.456836  543705 disk_worker.go:494] system disk:vda1
I0322 10:17:14.456948  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:17:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:17:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:17:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:17:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:17:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:17:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:17:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:17:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:17:23.409775  543705 memory.go:184] no items to output this cycle
I0322 10:17:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 10:17:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:17:33.409775  543705 memory.go:184] no items to output this cycle
I0322 10:17:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 10:17:34.265673  543705 disk_info.go:125] begin check local disk info of client
I0322 10:17:34.268129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:17:34.268135  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052cc40 0xc00052cc80]
E0322 10:17:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:17:43.410783  543705 memory.go:191] Add success.
I0322 10:17:43.409815  543705 cpu.go:282] Add success.
I0322 10:17:43.420582  543705 net.go:648] Add success.
I0322 10:17:43.423151  543705 net.go:770] primary dev: ETH0
I0322 10:17:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:17:43.423176  543705 net.go:698] Add success.
I0322 10:17:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:17:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:17:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:17:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:17:53.409769  543705 memory.go:184] no items to output this cycle
I0322 10:17:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 10:18:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:18:03.409786  543705 memory.go:184] no items to output this cycle
I0322 10:18:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:18:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:18:13.409809  543705 memory.go:191] Add success.
I0322 10:18:13.409811  543705 cpu.go:282] Add success.
W0322 10:18:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:18:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:18:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:18:13.420187  543705 net.go:648] Add success.
I0322 10:18:13.422817  543705 net.go:770] primary dev: ETH0
I0322 10:18:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:18:13.422842  543705 net.go:698] Add success.
I0322 10:18:13.470798  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98cd2186-6073-428f-8443-0cd450a93209","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:18:13.470832  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:18:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:18:14.455338  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:18:14.455450  543705 disk_worker.go:708] disk space is not compliant
W0322 10:18:14.455454  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:18:14.457079  543705 disk_worker.go:494] system disk:vda1
I0322 10:18:14.457107  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:18:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:18:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:18:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:18:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:18:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:18:23.409776  543705 memory.go:184] no items to output this cycle
I0322 10:18:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 10:18:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:18:33.409800  543705 memory.go:184] no items to output this cycle
I0322 10:18:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 10:18:34.269672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:18:34.272211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:18:34.272217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8700 0xc0004d8740]
I0322 10:18:39.608741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:18:39.608747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:18:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:18:43.410703  543705 memory.go:191] Add success.
I0322 10:18:43.409798  543705 cpu.go:282] Add success.
I0322 10:18:43.420466  543705 net.go:648] Add success.
I0322 10:18:43.423029  543705 net.go:770] primary dev: ETH0
I0322 10:18:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:18:43.423059  543705 net.go:698] Add success.
I0322 10:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:18:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:18:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:18:53.409767  543705 memory.go:184] no items to output this cycle
I0322 10:18:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 10:19:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:19:03.409785  543705 memory.go:184] no items to output this cycle
I0322 10:19:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 10:19:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:19:13.409799  543705 memory.go:191] Add success.
I0322 10:19:13.409820  543705 cpu.go:282] Add success.
W0322 10:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:19:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:19:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:19:13.420295  543705 net.go:648] Add success.
I0322 10:19:13.423223  543705 net.go:770] primary dev: ETH0
I0322 10:19:13.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:19:13.423248  543705 net.go:698] Add success.
I0322 10:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:19:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:19:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 10:19:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:19:14.457023  543705 disk_worker.go:494] system disk:vda1
I0322 10:19:14.457054  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:19:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:19:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:19:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:19:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:19:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:19:23.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:19:23.410259  543705 memory.go:184] no items to output this cycle
I0322 10:19:23.410264  543705 cpu.go:275] no items to output this cycle
E0322 10:19:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:19:33.409794  543705 memory.go:184] no items to output this cycle
I0322 10:19:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 10:19:34.273672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:19:34.276239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:19:34.276246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
E0322 10:19:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:19:43.410687  543705 memory.go:191] Add success.
I0322 10:19:43.409833  543705 cpu.go:282] Add success.
I0322 10:19:43.420416  543705 net.go:648] Add success.
I0322 10:19:43.423096  543705 net.go:770] primary dev: ETH0
I0322 10:19:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:19:43.423124  543705 net.go:698] Add success.
I0322 10:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:19:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:19:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:19:53.409803  543705 memory.go:184] no items to output this cycle
I0322 10:19:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 10:20:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:20:03.409788  543705 memory.go:184] no items to output this cycle
I0322 10:20:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 10:20:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:20:13.409836  543705 memory.go:191] Add success.
I0322 10:20:13.409843  543705 cpu.go:282] Add success.
W0322 10:20:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:20:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:20:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:20:13.420276  543705 net.go:648] Add success.
I0322 10:20:13.423141  543705 net.go:770] primary dev: ETH0
I0322 10:20:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:20:13.423171  543705 net.go:698] Add success.
I0322 10:20:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:20:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:20:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 10:20:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:20:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 10:20:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:20:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:20:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:20:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:20:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:20:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:20:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:20:23.409772  543705 memory.go:184] no items to output this cycle
I0322 10:20:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 10:20:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:20:33.409772  543705 memory.go:184] no items to output this cycle
I0322 10:20:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 10:20:34.277674  543705 disk_info.go:125] begin check local disk info of client
I0322 10:20:34.280149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:20:34.280154  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343980 0xc0003439c0]
E0322 10:20:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:20:43.410756  543705 memory.go:191] Add success.
I0322 10:20:43.409801  543705 cpu.go:282] Add success.
I0322 10:20:43.420475  543705 net.go:648] Add success.
I0322 10:20:43.423258  543705 net.go:770] primary dev: ETH0
I0322 10:20:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:20:43.423284  543705 net.go:698] Add success.
I0322 10:20:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:20:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:20:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:20:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:20:53.410279  543705 memory.go:184] no items to output this cycle
I0322 10:20:53.410290  543705 cpu.go:275] no items to output this cycle
E0322 10:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:21:03.409800  543705 memory.go:184] no items to output this cycle
I0322 10:21:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 10:21:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:21:13.409780  543705 memory.go:191] Add success.
W0322 10:21:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:21:13.409817  543705 cpu.go:282] Add success.
I0322 10:21:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:21:13.420149  543705 net.go:648] Add success.
I0322 10:21:13.422787  543705 net.go:770] primary dev: ETH0
I0322 10:21:13.422801  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:21:13.422814  543705 net.go:698] Add success.
I0322 10:21:13.474364  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"083569e3-3c75-4268-a1da-fb763bc2ed2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:21:13.474397  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:21:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:21:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:21:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 10:21:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:21:14.456546  543705 disk_worker.go:494] system disk:vda1
I0322 10:21:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:21:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:21:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:21:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:21:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:21:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:21:23.409777  543705 memory.go:184] no items to output this cycle
I0322 10:21:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 10:21:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:21:33.409797  543705 memory.go:184] no items to output this cycle
I0322 10:21:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 10:21:34.281672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:21:34.284191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:21:34.284197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe4c0 0xc0003fe500]
I0322 10:21:39.609730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:21:39.609736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:21:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:21:43.410598  543705 memory.go:191] Add success.
I0322 10:21:43.409825  543705 cpu.go:282] Add success.
I0322 10:21:43.420311  543705 net.go:648] Add success.
I0322 10:21:43.422874  543705 net.go:770] primary dev: ETH0
I0322 10:21:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:21:43.422899  543705 net.go:698] Add success.
I0322 10:21:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:21:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:21:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:21:53.409778  543705 memory.go:184] no items to output this cycle
I0322 10:21:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 10:22:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:22:03.409785  543705 memory.go:184] no items to output this cycle
I0322 10:22:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 10:22:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:22:13.409806  543705 memory.go:191] Add success.
I0322 10:22:13.409805  543705 cpu.go:282] Add success.
W0322 10:22:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:22:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:22:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:22:13.420236  543705 net.go:648] Add success.
I0322 10:22:13.423074  543705 net.go:770] primary dev: ETH0
I0322 10:22:13.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:22:13.423388  543705 net.go:698] Add success.
W0322 10:22:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:22:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 10:22:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:22:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:22:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:22:14.455890  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:22:14.456804  543705 disk_worker.go:494] system disk:vda1
I0322 10:22:14.456933  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:22:15.456755  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:22:15.456763  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:22:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:22:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:22:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:22:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:22:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:22:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:22:23.409797  543705 memory.go:184] no items to output this cycle
I0322 10:22:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 10:22:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:22:33.409776  543705 memory.go:184] no items to output this cycle
I0322 10:22:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 10:22:34.285674  543705 disk_info.go:125] begin check local disk info of client
I0322 10:22:34.288207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:22:34.288213  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
E0322 10:22:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:22:43.410692  543705 memory.go:191] Add success.
I0322 10:22:43.409804  543705 cpu.go:282] Add success.
I0322 10:22:43.420436  543705 net.go:648] Add success.
I0322 10:22:43.423018  543705 net.go:770] primary dev: ETH0
I0322 10:22:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:22:43.423052  543705 net.go:698] Add success.
I0322 10:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:22:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:22:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:22:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:22:53.409793  543705 memory.go:184] no items to output this cycle
I0322 10:22:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 10:23:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:23:03.409803  543705 memory.go:184] no items to output this cycle
I0322 10:23:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 10:23:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:23:13.409795  543705 memory.go:191] Add success.
W0322 10:23:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 10:23:13.409824  543705 cpu.go:282] Add success.
W0322 10:23:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:23:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:23:13.420583  543705 net.go:648] Add success.
I0322 10:23:13.423395  543705 net.go:770] primary dev: ETH0
I0322 10:23:13.423409  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:23:13.423422  543705 net.go:698] Add success.
I0322 10:23:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:23:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:23:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 10:23:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:23:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 10:23:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:23:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:23:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:23:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:23:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:23:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:23:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:23:23.409774  543705 memory.go:184] no items to output this cycle
I0322 10:23:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 10:23:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:23:33.409776  543705 memory.go:184] no items to output this cycle
I0322 10:23:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 10:23:34.289672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:23:34.292242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:23:34.292248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff300 0xc0003ff340]
E0322 10:23:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:23:43.410885  543705 memory.go:191] Add success.
I0322 10:23:43.409821  543705 cpu.go:282] Add success.
I0322 10:23:43.420558  543705 net.go:648] Add success.
I0322 10:23:43.423487  543705 net.go:770] primary dev: ETH0
I0322 10:23:43.423500  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:23:43.423512  543705 net.go:698] Add success.
I0322 10:23:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:23:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:23:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:23:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:23:53.409764  543705 memory.go:184] no items to output this cycle
I0322 10:23:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:24:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:24:03.409810  543705 memory.go:184] no items to output this cycle
I0322 10:24:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 10:24:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:24:13.409819  543705 memory.go:191] Add success.
I0322 10:24:13.409829  543705 cpu.go:282] Add success.
W0322 10:24:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:24:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:24:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:24:13.420221  543705 net.go:648] Add success.
I0322 10:24:13.423101  543705 net.go:770] primary dev: ETH0
I0322 10:24:13.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:24:13.423131  543705 net.go:698] Add success.
I0322 10:24:13.484575  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"753f33a9-af82-4e93-94bd-0b0a8ead15bb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:24:13.484611  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:24:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:24:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:24:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 10:24:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:24:14.456540  543705 disk_worker.go:494] system disk:vda1
I0322 10:24:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:24:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:24:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:24:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:24:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:24:23.409767  543705 memory.go:184] no items to output this cycle
I0322 10:24:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 10:24:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:24:33.409785  543705 memory.go:184] no items to output this cycle
I0322 10:24:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 10:24:34.293671  543705 disk_info.go:125] begin check local disk info of client
I0322 10:24:34.296146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:24:34.296151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9e80 0xc0004d9ec0]
I0322 10:24:39.610741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:24:39.610748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:24:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:24:43.410615  543705 memory.go:191] Add success.
I0322 10:24:43.409814  543705 cpu.go:282] Add success.
I0322 10:24:43.420313  543705 net.go:648] Add success.
I0322 10:24:43.422886  543705 net.go:770] primary dev: ETH0
I0322 10:24:43.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:24:43.422912  543705 net.go:698] Add success.
I0322 10:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:24:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:24:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:24:53.409767  543705 memory.go:184] no items to output this cycle
I0322 10:24:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:25:03.409807  543705 memory.go:184] no items to output this cycle
I0322 10:25:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 10:25:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:25:13.409800  543705 memory.go:191] Add success.
I0322 10:25:13.409801  543705 cpu.go:282] Add success.
W0322 10:25:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:25:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:25:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:25:13.420600  543705 net.go:648] Add success.
I0322 10:25:13.423158  543705 net.go:770] primary dev: ETH0
I0322 10:25:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:25:13.423184  543705 net.go:698] Add success.
I0322 10:25:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:25:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:25:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 10:25:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:25:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 10:25:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:25:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:25:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:25:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:25:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:25:23.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:25:23.409906  543705 memory.go:184] no items to output this cycle
I0322 10:25:23.409953  543705 cpu.go:275] no items to output this cycle
E0322 10:25:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:25:33.409798  543705 memory.go:184] no items to output this cycle
I0322 10:25:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 10:25:34.297671  543705 disk_info.go:125] begin check local disk info of client
I0322 10:25:34.300182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:25:34.300187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f00 0xc0000c5f40]
E0322 10:25:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:25:43.410619  543705 memory.go:191] Add success.
I0322 10:25:43.409797  543705 cpu.go:282] Add success.
I0322 10:25:43.420343  543705 net.go:648] Add success.
I0322 10:25:43.422850  543705 net.go:770] primary dev: ETH0
I0322 10:25:43.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:25:43.422875  543705 net.go:698] Add success.
I0322 10:25:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:25:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:25:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:25:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:25:53.409782  543705 memory.go:184] no items to output this cycle
I0322 10:25:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 10:26:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:26:03.409773  543705 memory.go:184] no items to output this cycle
I0322 10:26:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 10:26:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:26:13.409827  543705 memory.go:191] Add success.
I0322 10:26:13.409833  543705 cpu.go:282] Add success.
W0322 10:26:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:26:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:26:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:26:13.420162  543705 net.go:648] Add success.
I0322 10:26:13.422985  543705 net.go:770] primary dev: ETH0
I0322 10:26:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:26:13.423011  543705 net.go:698] Add success.
I0322 10:26:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:26:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:26:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 10:26:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:26:14.456523  543705 disk_worker.go:494] system disk:vda1
I0322 10:26:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:26:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:26:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:26:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:26:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:26:23.410224  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:26:23.410241  543705 memory.go:184] no items to output this cycle
I0322 10:26:23.410252  543705 cpu.go:275] no items to output this cycle
E0322 10:26:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:26:33.409779  543705 memory.go:184] no items to output this cycle
I0322 10:26:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 10:26:34.301672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:26:34.304153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:26:34.304158  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0322 10:26:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:26:43.410704  543705 memory.go:191] Add success.
I0322 10:26:43.409811  543705 cpu.go:282] Add success.
I0322 10:26:43.420397  543705 net.go:648] Add success.
I0322 10:26:43.423481  543705 net.go:770] primary dev: ETH0
I0322 10:26:43.423496  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:26:43.423511  543705 net.go:698] Add success.
I0322 10:26:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:26:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:26:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:26:53.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:26:53.410382  543705 memory.go:184] no items to output this cycle
I0322 10:26:53.410397  543705 cpu.go:275] no items to output this cycle
E0322 10:27:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:27:03.409808  543705 memory.go:184] no items to output this cycle
I0322 10:27:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 10:27:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:27:13.409792  543705 memory.go:191] Add success.
I0322 10:27:13.409809  543705 cpu.go:282] Add success.
W0322 10:27:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:27:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:27:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:27:13.420238  543705 net.go:648] Add success.
I0322 10:27:13.423107  543705 net.go:770] primary dev: ETH0
I0322 10:27:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:27:13.423132  543705 net.go:698] Add success.
I0322 10:27:13.429226  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 10:27:13.453395  543705 event_worker.go:152] Polling the log file for events...
I0322 10:27:13.471335  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f0e40ea6-7500-45bf-b9e4-b6298a1a6abc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:27:13.471380  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 10:27:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:27:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 10:27:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:27:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:27:14.455930  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:27:14.455936  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:27:14.456557  543705 disk_worker.go:494] system disk:vda1
I0322 10:27:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:27:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:27:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:27:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:27:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:27:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:27:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:27:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:27:23.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:27:23.409890  543705 memory.go:184] no items to output this cycle
I0322 10:27:23.409947  543705 cpu.go:275] no items to output this cycle
E0322 10:27:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:27:33.409777  543705 memory.go:184] no items to output this cycle
I0322 10:27:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 10:27:34.305671  543705 disk_info.go:125] begin check local disk info of client
I0322 10:27:34.308120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:27:34.308125  543705 disk_info.go:196] parse disk info done, disk is : [0xc000234500 0xc000234540]
I0322 10:27:39.611747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:27:39.611754  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:27:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:27:43.410725  543705 memory.go:191] Add success.
I0322 10:27:43.409814  543705 cpu.go:282] Add success.
I0322 10:27:43.420543  543705 net.go:648] Add success.
I0322 10:27:43.423389  543705 net.go:770] primary dev: ETH0
I0322 10:27:43.423401  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:27:43.423413  543705 net.go:698] Add success.
I0322 10:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:27:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:27:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:27:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:27:53.409794  543705 memory.go:184] no items to output this cycle
I0322 10:27:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:28:03.409785  543705 memory.go:184] no items to output this cycle
I0322 10:28:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 10:28:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:28:13.409795  543705 cpu.go:282] Add success.
I0322 10:28:13.409805  543705 memory.go:191] Add success.
W0322 10:28:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:28:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:28:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:28:13.420122  543705 net.go:648] Add success.
I0322 10:28:13.422573  543705 net.go:770] primary dev: ETH0
I0322 10:28:13.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:28:13.422600  543705 net.go:698] Add success.
I0322 10:28:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:28:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:28:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 10:28:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:28:14.456512  543705 disk_worker.go:494] system disk:vda1
I0322 10:28:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:28:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:28:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:28:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:28:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:28:23.409796  543705 memory.go:184] no items to output this cycle
I0322 10:28:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 10:28:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:28:33.409902  543705 memory.go:184] no items to output this cycle
I0322 10:28:33.409909  543705 cpu.go:275] no items to output this cycle
I0322 10:28:34.309672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:28:34.312130  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:28:34.312136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b88c0 0xc0003b8900]
E0322 10:28:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:28:43.410634  543705 memory.go:191] Add success.
I0322 10:28:43.409802  543705 cpu.go:282] Add success.
I0322 10:28:43.420482  543705 net.go:648] Add success.
I0322 10:28:43.423099  543705 net.go:770] primary dev: ETH0
I0322 10:28:43.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:28:43.423124  543705 net.go:698] Add success.
I0322 10:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:28:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:28:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:28:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:28:53.409796  543705 memory.go:184] no items to output this cycle
I0322 10:28:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:29:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:29:03.409790  543705 memory.go:184] no items to output this cycle
I0322 10:29:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 10:29:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:29:13.409815  543705 memory.go:191] Add success.
I0322 10:29:13.409819  543705 cpu.go:282] Add success.
W0322 10:29:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:29:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:29:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:29:13.420045  543705 net.go:648] Add success.
I0322 10:29:13.422591  543705 net.go:770] primary dev: ETH0
I0322 10:29:13.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:29:13.422637  543705 net.go:698] Add success.
I0322 10:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:29:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:29:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 10:29:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:29:14.456488  543705 disk_worker.go:494] system disk:vda1
I0322 10:29:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:29:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:29:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:29:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:29:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:29:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:29:23.409790  543705 memory.go:184] no items to output this cycle
I0322 10:29:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 10:29:33.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:29:33.409907  543705 cpu.go:275] no items to output this cycle
I0322 10:29:33.409914  543705 memory.go:184] no items to output this cycle
I0322 10:29:34.313681  543705 disk_info.go:125] begin check local disk info of client
I0322 10:29:34.316144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:29:34.316150  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005658c0 0xc000565900]
E0322 10:29:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:29:43.410720  543705 memory.go:191] Add success.
I0322 10:29:43.409805  543705 cpu.go:282] Add success.
I0322 10:29:43.420445  543705 net.go:648] Add success.
I0322 10:29:43.423314  543705 net.go:770] primary dev: ETH0
I0322 10:29:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:29:43.423341  543705 net.go:698] Add success.
I0322 10:29:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:29:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:29:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:29:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:29:53.409787  543705 memory.go:184] no items to output this cycle
I0322 10:29:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:30:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:30:03.409783  543705 memory.go:184] no items to output this cycle
I0322 10:30:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 10:30:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:30:13.409829  543705 memory.go:191] Add success.
I0322 10:30:13.409830  543705 cpu.go:282] Add success.
W0322 10:30:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:30:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:30:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:30:13.420194  543705 net.go:648] Add success.
I0322 10:30:13.422826  543705 net.go:770] primary dev: ETH0
I0322 10:30:13.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:30:13.422851  543705 net.go:698] Add success.
I0322 10:30:13.524405  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae63dcd0-a5fa-4e9d-9b4a-4f44418a25b7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:30:13.524438  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:30:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:30:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:30:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 10:30:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:30:14.456492  543705 disk_worker.go:494] system disk:vda1
I0322 10:30:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:30:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:30:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:30:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:30:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:30:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:30:23.409779  543705 memory.go:184] no items to output this cycle
I0322 10:30:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 10:30:33.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:30:33.409918  543705 memory.go:184] no items to output this cycle
I0322 10:30:33.409993  543705 cpu.go:275] no items to output this cycle
I0322 10:30:34.317675  543705 disk_info.go:125] begin check local disk info of client
I0322 10:30:34.320147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:30:34.320152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000229a40 0xc000229a80]
I0322 10:30:39.612767  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:30:39.612773  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:30:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:30:43.410557  543705 memory.go:191] Add success.
I0322 10:30:43.409809  543705 cpu.go:282] Add success.
I0322 10:30:43.420374  543705 net.go:648] Add success.
I0322 10:30:43.422923  543705 net.go:770] primary dev: ETH0
I0322 10:30:43.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:30:43.422948  543705 net.go:698] Add success.
I0322 10:30:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:30:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:30:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:30:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:30:53.410247  543705 memory.go:184] no items to output this cycle
I0322 10:30:53.410275  543705 cpu.go:275] no items to output this cycle
E0322 10:31:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:31:03.409815  543705 memory.go:184] no items to output this cycle
I0322 10:31:03.409828  543705 cpu.go:275] no items to output this cycle
E0322 10:31:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:31:13.409788  543705 memory.go:191] Add success.
I0322 10:31:13.409808  543705 cpu.go:282] Add success.
W0322 10:31:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:31:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:31:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:31:13.420117  543705 net.go:648] Add success.
I0322 10:31:13.423005  543705 net.go:770] primary dev: ETH0
I0322 10:31:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:31:13.423034  543705 net.go:698] Add success.
I0322 10:31:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:31:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:31:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 10:31:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:31:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 10:31:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:31:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:31:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:31:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:31:23.410409  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:31:23.410420  543705 cpu.go:275] no items to output this cycle
I0322 10:31:23.410423  543705 memory.go:184] no items to output this cycle
E0322 10:31:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:31:33.409881  543705 memory.go:184] no items to output this cycle
I0322 10:31:33.409935  543705 cpu.go:275] no items to output this cycle
I0322 10:31:34.321669  543705 disk_info.go:125] begin check local disk info of client
I0322 10:31:34.324175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:31:34.324180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3ec0 0xc0003f3f00]
E0322 10:31:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:31:43.410687  543705 memory.go:191] Add success.
I0322 10:31:43.409818  543705 cpu.go:282] Add success.
I0322 10:31:43.420431  543705 net.go:648] Add success.
I0322 10:31:43.423066  543705 net.go:770] primary dev: ETH0
I0322 10:31:43.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:31:43.423096  543705 net.go:698] Add success.
I0322 10:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:31:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:31:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:31:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:31:53.409802  543705 memory.go:184] no items to output this cycle
I0322 10:31:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 10:32:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:32:03.409807  543705 memory.go:184] no items to output this cycle
I0322 10:32:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 10:32:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:32:13.409793  543705 memory.go:191] Add success.
I0322 10:32:13.409817  543705 cpu.go:282] Add success.
W0322 10:32:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:32:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:32:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:32:13.420131  543705 net.go:648] Add success.
I0322 10:32:13.422844  543705 net.go:770] primary dev: ETH0
I0322 10:32:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:32:13.422870  543705 net.go:698] Add success.
W0322 10:32:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:32:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 10:32:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:32:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:32:14.455893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:32:14.455899  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:32:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 10:32:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:32:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:32:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:32:16.457904  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:32:16.457904  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:32:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:32:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:32:16.472312  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:32:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:32:23.409777  543705 memory.go:184] no items to output this cycle
I0322 10:32:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 10:32:33.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:32:33.409896  543705 cpu.go:275] no items to output this cycle
I0322 10:32:33.409909  543705 memory.go:184] no items to output this cycle
I0322 10:32:34.325684  543705 disk_info.go:125] begin check local disk info of client
I0322 10:32:34.328165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:32:34.328170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275680 0xc0002756c0]
E0322 10:32:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:32:43.410603  543705 memory.go:191] Add success.
I0322 10:32:43.409818  543705 cpu.go:282] Add success.
I0322 10:32:43.420291  543705 net.go:648] Add success.
I0322 10:32:43.422978  543705 net.go:770] primary dev: ETH0
I0322 10:32:43.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:32:43.423007  543705 net.go:698] Add success.
I0322 10:32:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:32:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:32:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:32:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:32:53.409773  543705 memory.go:184] no items to output this cycle
I0322 10:32:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 10:33:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:33:03.409805  543705 memory.go:184] no items to output this cycle
I0322 10:33:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 10:33:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:33:13.409797  543705 memory.go:191] Add success.
I0322 10:33:13.409798  543705 cpu.go:282] Add success.
W0322 10:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:33:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:33:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:33:13.420410  543705 net.go:648] Add success.
I0322 10:33:13.423114  543705 net.go:770] primary dev: ETH0
I0322 10:33:13.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:33:13.423140  543705 net.go:698] Add success.
I0322 10:33:13.478673  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5fe25fe9-881b-4aea-8eff-b597bdf69866","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:33:13.478707  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:33:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:33:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:33:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 10:33:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:33:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 10:33:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:33:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:33:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:33:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:33:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:33:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:33:23.409794  543705 memory.go:184] no items to output this cycle
I0322 10:33:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 10:33:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:33:33.409788  543705 memory.go:184] no items to output this cycle
I0322 10:33:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 10:33:34.329695  543705 disk_info.go:125] begin check local disk info of client
I0322 10:33:34.332200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:33:34.332207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5400 0xc0000c5440]
I0322 10:33:39.613733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:33:39.613740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:33:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:33:43.410659  543705 memory.go:191] Add success.
I0322 10:33:43.409805  543705 cpu.go:282] Add success.
I0322 10:33:43.420393  543705 net.go:648] Add success.
I0322 10:33:43.422968  543705 net.go:770] primary dev: ETH0
I0322 10:33:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:33:43.422997  543705 net.go:698] Add success.
I0322 10:33:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:33:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:33:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:33:53.410397  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:33:53.410413  543705 memory.go:184] no items to output this cycle
I0322 10:33:53.410437  543705 cpu.go:275] no items to output this cycle
E0322 10:34:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:34:03.409781  543705 memory.go:184] no items to output this cycle
I0322 10:34:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:34:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:34:13.409816  543705 memory.go:191] Add success.
I0322 10:34:13.409825  543705 cpu.go:282] Add success.
W0322 10:34:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:34:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:34:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:34:13.420209  543705 net.go:648] Add success.
I0322 10:34:13.422854  543705 net.go:770] primary dev: ETH0
I0322 10:34:13.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:34:13.422880  543705 net.go:698] Add success.
I0322 10:34:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:34:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:34:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 10:34:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:34:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 10:34:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:34:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:34:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:34:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:34:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:34:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:34:23.409773  543705 memory.go:184] no items to output this cycle
I0322 10:34:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 10:34:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:34:33.409804  543705 memory.go:184] no items to output this cycle
I0322 10:34:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 10:34:34.333662  543705 disk_info.go:125] begin check local disk info of client
I0322 10:34:34.336207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:34:34.336214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cb880 0xc0004cb8c0]
E0322 10:34:43.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:34:43.410655  543705 memory.go:191] Add success.
I0322 10:34:43.409960  543705 cpu.go:282] Add success.
I0322 10:34:43.419731  543705 net.go:648] Add success.
I0322 10:34:43.422303  543705 net.go:770] primary dev: ETH0
I0322 10:34:43.422318  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:34:43.422331  543705 net.go:698] Add success.
I0322 10:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:34:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:34:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:34:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:34:53.409797  543705 cpu.go:275] no items to output this cycle
I0322 10:34:53.409803  543705 memory.go:184] no items to output this cycle
E0322 10:35:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:35:03.409806  543705 memory.go:184] no items to output this cycle
I0322 10:35:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 10:35:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:35:13.409824  543705 memory.go:191] Add success.
I0322 10:35:13.409825  543705 cpu.go:282] Add success.
W0322 10:35:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:35:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:35:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:35:13.420177  543705 net.go:648] Add success.
I0322 10:35:13.423135  543705 net.go:770] primary dev: ETH0
I0322 10:35:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:35:13.423161  543705 net.go:698] Add success.
I0322 10:35:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:35:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:35:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 10:35:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:35:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 10:35:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:35:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:35:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:35:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:35:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:35:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:35:23.409798  543705 memory.go:184] no items to output this cycle
I0322 10:35:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 10:35:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:35:33.409799  543705 memory.go:184] no items to output this cycle
I0322 10:35:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 10:35:34.336336  543705 disk_info.go:125] begin check local disk info of client
I0322 10:35:34.338867  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:35:34.338874  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001560c0 0xc000156100]
E0322 10:35:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:35:43.410704  543705 memory.go:191] Add success.
I0322 10:35:43.409828  543705 cpu.go:282] Add success.
I0322 10:35:43.420480  543705 net.go:648] Add success.
I0322 10:35:43.423130  543705 net.go:770] primary dev: ETH0
I0322 10:35:43.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:35:43.423154  543705 net.go:698] Add success.
I0322 10:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:35:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:35:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:35:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:35:53.409790  543705 memory.go:184] no items to output this cycle
I0322 10:35:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 10:36:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:36:03.409776  543705 memory.go:184] no items to output this cycle
I0322 10:36:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 10:36:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:36:13.409798  543705 memory.go:191] Add success.
I0322 10:36:13.409804  543705 cpu.go:282] Add success.
W0322 10:36:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:36:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:36:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:36:13.420136  543705 net.go:648] Add success.
I0322 10:36:13.423020  543705 net.go:770] primary dev: ETH0
I0322 10:36:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:36:13.423045  543705 net.go:698] Add success.
I0322 10:36:13.687027  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6299bcaf-0220-4ff9-882a-8a009a60bd3d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:36:13.687062  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:36:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:36:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:36:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 10:36:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:36:14.456531  543705 disk_worker.go:494] system disk:vda1
I0322 10:36:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:36:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:36:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:36:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:36:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:36:23.409758  543705 memory.go:184] no items to output this cycle
I0322 10:36:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:36:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:36:33.409772  543705 memory.go:184] no items to output this cycle
I0322 10:36:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 10:36:34.338958  543705 disk_info.go:125] begin check local disk info of client
I0322 10:36:34.341476  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:36:34.341482  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001fc280 0xc0001fc2c0]
I0322 10:36:39.613874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:36:39.613881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:36:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:36:43.410760  543705 memory.go:191] Add success.
I0322 10:36:43.409821  543705 cpu.go:282] Add success.
I0322 10:36:43.420460  543705 net.go:648] Add success.
I0322 10:36:43.423175  543705 net.go:770] primary dev: ETH0
I0322 10:36:43.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:36:43.423221  543705 net.go:698] Add success.
I0322 10:36:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:36:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:36:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:36:53.409796  543705 memory.go:184] no items to output this cycle
I0322 10:36:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 10:37:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:37:03.409784  543705 memory.go:184] no items to output this cycle
I0322 10:37:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 10:37:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:37:13.409795  543705 memory.go:191] Add success.
I0322 10:37:13.409800  543705 cpu.go:282] Add success.
W0322 10:37:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:37:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:37:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:37:13.420245  543705 net.go:648] Add success.
I0322 10:37:13.422793  543705 net.go:770] primary dev: ETH0
I0322 10:37:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:37:13.422823  543705 net.go:698] Add success.
I0322 10:37:13.453421  543705 event_worker.go:152] Polling the log file for events...
W0322 10:37:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:37:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 10:37:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:37:14.456905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:37:14.456914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:37:14.456920  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:37:14.456987  543705 disk_worker.go:494] system disk:vda1
I0322 10:37:14.457027  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:37:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:37:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:37:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:37:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:37:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:37:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:37:16.472306  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:37:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:37:23.409768  543705 memory.go:184] no items to output this cycle
I0322 10:37:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 10:37:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:37:33.409768  543705 memory.go:184] no items to output this cycle
I0322 10:37:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 10:37:34.345667  543705 disk_info.go:125] begin check local disk info of client
I0322 10:37:34.348232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:37:34.348239  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051de00 0xc00051de40]
E0322 10:37:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:37:43.410702  543705 memory.go:191] Add success.
I0322 10:37:43.409820  543705 cpu.go:282] Add success.
I0322 10:37:43.420384  543705 net.go:648] Add success.
I0322 10:37:43.423259  543705 net.go:770] primary dev: ETH0
I0322 10:37:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:37:43.423299  543705 net.go:698] Add success.
I0322 10:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:37:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:37:53.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:37:53.410422  543705 memory.go:184] no items to output this cycle
I0322 10:37:53.410425  543705 cpu.go:275] no items to output this cycle
E0322 10:38:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:38:03.409776  543705 memory.go:184] no items to output this cycle
I0322 10:38:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:38:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:38:13.409831  543705 memory.go:191] Add success.
I0322 10:38:13.409835  543705 cpu.go:282] Add success.
W0322 10:38:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:38:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:38:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:38:13.420211  543705 net.go:648] Add success.
I0322 10:38:13.423276  543705 net.go:770] primary dev: ETH0
I0322 10:38:13.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:38:13.423304  543705 net.go:698] Add success.
I0322 10:38:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:38:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:38:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 10:38:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:38:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 10:38:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:38:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:38:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:38:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:38:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:38:23.409792  543705 memory.go:184] no items to output this cycle
I0322 10:38:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 10:38:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:38:33.409776  543705 memory.go:184] no items to output this cycle
I0322 10:38:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 10:38:34.348326  543705 disk_info.go:125] begin check local disk info of client
I0322 10:38:34.350875  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:38:34.350881  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032e340 0xc00032e380]
E0322 10:38:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:38:43.410826  543705 memory.go:191] Add success.
I0322 10:38:43.409829  543705 cpu.go:282] Add success.
I0322 10:38:43.420681  543705 net.go:648] Add success.
I0322 10:38:43.423835  543705 net.go:770] primary dev: ETH0
I0322 10:38:43.423848  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:38:43.423860  543705 net.go:698] Add success.
I0322 10:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:38:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:38:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:38:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:38:53.409773  543705 memory.go:184] no items to output this cycle
I0322 10:38:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:39:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:39:03.409771  543705 memory.go:184] no items to output this cycle
I0322 10:39:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 10:39:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:39:13.409782  543705 memory.go:191] Add success.
W0322 10:39:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:39:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:39:13.409826  543705 cpu.go:282] Add success.
I0322 10:39:13.420341  543705 net.go:648] Add success.
I0322 10:39:13.423590  543705 net.go:770] primary dev: ETH0
I0322 10:39:13.423604  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:39:13.423618  543705 net.go:698] Add success.
I0322 10:39:13.483957  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"28a4fc37-2e2b-4730-8031-876df869da4f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:39:13.483995  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:39:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:39:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:39:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 10:39:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:39:14.456714  543705 disk_worker.go:494] system disk:vda1
I0322 10:39:14.456749  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:39:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:39:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:39:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:39:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:39:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:39:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:39:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 10:39:23.409797  543705 memory.go:184] no items to output this cycle
E0322 10:39:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:39:33.409804  543705 memory.go:184] no items to output this cycle
I0322 10:39:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 10:39:34.350970  543705 disk_info.go:125] begin check local disk info of client
I0322 10:39:34.353453  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:39:34.353459  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051e680 0xc00051e6c0]
I0322 10:39:39.614020  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:39:39.614027  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:39:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:39:43.410588  543705 memory.go:191] Add success.
I0322 10:39:43.409807  543705 cpu.go:282] Add success.
I0322 10:39:43.420308  543705 net.go:648] Add success.
I0322 10:39:43.422929  543705 net.go:770] primary dev: ETH0
I0322 10:39:43.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:39:43.422955  543705 net.go:698] Add success.
I0322 10:39:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:39:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:39:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:39:53.409806  543705 memory.go:184] no items to output this cycle
I0322 10:39:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 10:40:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:40:03.409772  543705 memory.go:184] no items to output this cycle
I0322 10:40:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:40:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:40:13.409835  543705 memory.go:191] Add success.
I0322 10:40:13.409836  543705 cpu.go:282] Add success.
W0322 10:40:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:40:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:40:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:40:13.420277  543705 net.go:648] Add success.
I0322 10:40:13.423031  543705 net.go:770] primary dev: ETH0
I0322 10:40:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:40:13.423060  543705 net.go:698] Add success.
I0322 10:40:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:40:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:40:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 10:40:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:40:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 10:40:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:40:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:40:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:40:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:40:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:40:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:40:23.409773  543705 memory.go:184] no items to output this cycle
I0322 10:40:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 10:40:33.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:40:33.409894  543705 memory.go:184] no items to output this cycle
I0322 10:40:33.409961  543705 cpu.go:275] no items to output this cycle
I0322 10:40:34.353671  543705 disk_info.go:125] begin check local disk info of client
I0322 10:40:34.356212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:40:34.356217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0322 10:40:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:40:43.410830  543705 memory.go:191] Add success.
I0322 10:40:43.409806  543705 cpu.go:282] Add success.
I0322 10:40:43.420539  543705 net.go:648] Add success.
I0322 10:40:43.423542  543705 net.go:770] primary dev: ETH0
I0322 10:40:43.423556  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:40:43.423570  543705 net.go:698] Add success.
I0322 10:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:40:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:40:53.409817  543705 memory.go:184] no items to output this cycle
I0322 10:40:53.409828  543705 cpu.go:275] no items to output this cycle
E0322 10:41:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:41:03.409793  543705 memory.go:184] no items to output this cycle
I0322 10:41:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:41:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:41:13.409807  543705 memory.go:191] Add success.
I0322 10:41:13.409808  543705 cpu.go:282] Add success.
W0322 10:41:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:41:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:41:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:41:13.420315  543705 net.go:648] Add success.
I0322 10:41:13.422867  543705 net.go:770] primary dev: ETH0
I0322 10:41:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:41:13.422892  543705 net.go:698] Add success.
I0322 10:41:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:41:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:41:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 10:41:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:41:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 10:41:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:41:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:41:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:41:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:41:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:41:23.409803  543705 memory.go:184] no items to output this cycle
I0322 10:41:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 10:41:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:41:33.409798  543705 memory.go:184] no items to output this cycle
I0322 10:41:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 10:41:34.357672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:41:34.360143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:41:34.360149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8fc0 0xc0004d9000]
E0322 10:41:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:41:43.410640  543705 memory.go:191] Add success.
I0322 10:41:43.409821  543705 cpu.go:282] Add success.
I0322 10:41:43.420380  543705 net.go:648] Add success.
I0322 10:41:43.423102  543705 net.go:770] primary dev: ETH0
I0322 10:41:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:41:43.423130  543705 net.go:698] Add success.
I0322 10:41:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:41:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:41:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:41:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:41:53.409808  543705 memory.go:184] no items to output this cycle
I0322 10:41:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 10:42:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:42:03.409779  543705 memory.go:184] no items to output this cycle
I0322 10:42:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 10:42:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:42:13.409809  543705 memory.go:191] Add success.
I0322 10:42:13.409809  543705 cpu.go:282] Add success.
W0322 10:42:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:42:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:42:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:42:13.420249  543705 net.go:648] Add success.
I0322 10:42:13.423056  543705 net.go:770] primary dev: ETH0
I0322 10:42:13.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:42:13.423082  543705 net.go:698] Add success.
I0322 10:42:13.468214  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5847e92c-78ef-4ac9-ba20-9ac234575b37","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:42:13.468256  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 10:42:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:42:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 10:42:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:42:14.456820  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:42:14.456829  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:42:14.456846  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:42:14.456874  543705 disk_worker.go:494] system disk:vda1
I0322 10:42:14.456905  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:42:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:42:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:42:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:42:16.457992  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:42:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:42:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:42:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:42:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:42:23.409795  543705 memory.go:184] no items to output this cycle
I0322 10:42:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 10:42:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:42:33.409775  543705 memory.go:184] no items to output this cycle
I0322 10:42:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 10:42:34.361670  543705 disk_info.go:125] begin check local disk info of client
I0322 10:42:34.364134  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:42:34.364140  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe740 0xc0003fe780]
I0322 10:42:39.614771  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:42:39.614777  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:42:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:42:43.410733  543705 memory.go:191] Add success.
I0322 10:42:43.409819  543705 cpu.go:282] Add success.
I0322 10:42:43.420446  543705 net.go:648] Add success.
I0322 10:42:43.423090  543705 net.go:770] primary dev: ETH0
I0322 10:42:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:42:43.423116  543705 net.go:698] Add success.
I0322 10:42:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:42:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:42:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:42:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:42:53.409798  543705 memory.go:184] no items to output this cycle
I0322 10:42:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 10:43:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:43:03.409802  543705 memory.go:184] no items to output this cycle
I0322 10:43:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 10:43:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:43:13.409801  543705 memory.go:191] Add success.
I0322 10:43:13.409802  543705 cpu.go:282] Add success.
W0322 10:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:43:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:43:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:43:13.420238  543705 net.go:648] Add success.
I0322 10:43:13.423160  543705 net.go:770] primary dev: ETH0
I0322 10:43:13.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:43:13.423184  543705 net.go:698] Add success.
I0322 10:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:43:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:43:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 10:43:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:43:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 10:43:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:43:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:43:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:43:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:43:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:43:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:43:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:43:23.409771  543705 memory.go:184] no items to output this cycle
I0322 10:43:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 10:43:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:43:33.409793  543705 memory.go:184] no items to output this cycle
I0322 10:43:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 10:43:34.365667  543705 disk_info.go:125] begin check local disk info of client
I0322 10:43:34.368140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:43:34.368147  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4ac0 0xc0000c4b00]
E0322 10:43:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:43:43.410800  543705 memory.go:191] Add success.
I0322 10:43:43.409827  543705 cpu.go:282] Add success.
I0322 10:43:43.420468  543705 net.go:648] Add success.
I0322 10:43:43.423390  543705 net.go:770] primary dev: ETH0
I0322 10:43:43.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:43:43.423415  543705 net.go:698] Add success.
I0322 10:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:43:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:43:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:43:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:43:53.409773  543705 memory.go:184] no items to output this cycle
I0322 10:43:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 10:44:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:44:03.409799  543705 memory.go:184] no items to output this cycle
I0322 10:44:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 10:44:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:44:13.409832  543705 memory.go:191] Add success.
I0322 10:44:13.409840  543705 cpu.go:282] Add success.
W0322 10:44:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:44:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:44:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:44:13.420305  543705 net.go:648] Add success.
I0322 10:44:13.423075  543705 net.go:770] primary dev: ETH0
I0322 10:44:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:44:13.423103  543705 net.go:698] Add success.
I0322 10:44:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:44:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:44:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 10:44:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:44:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 10:44:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:44:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:44:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:44:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:44:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:44:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:44:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:44:23.409801  543705 memory.go:184] no items to output this cycle
I0322 10:44:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 10:44:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:44:33.409876  543705 memory.go:184] no items to output this cycle
I0322 10:44:33.409950  543705 cpu.go:275] no items to output this cycle
I0322 10:44:34.369673  543705 disk_info.go:125] begin check local disk info of client
I0322 10:44:34.372118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:44:34.372124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5c40 0xc0004b5c80]
E0322 10:44:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:44:43.410716  543705 memory.go:191] Add success.
I0322 10:44:43.409834  543705 cpu.go:282] Add success.
I0322 10:44:43.420449  543705 net.go:648] Add success.
I0322 10:44:43.423405  543705 net.go:770] primary dev: ETH0
I0322 10:44:43.423418  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:44:43.423431  543705 net.go:698] Add success.
I0322 10:44:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:44:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:44:53.409779  543705 memory.go:184] no items to output this cycle
I0322 10:44:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 10:45:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:45:03.409779  543705 memory.go:184] no items to output this cycle
I0322 10:45:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 10:45:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:45:13.409809  543705 memory.go:191] Add success.
I0322 10:45:13.409811  543705 cpu.go:282] Add success.
W0322 10:45:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:45:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:45:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:45:13.420255  543705 net.go:648] Add success.
I0322 10:45:13.423220  543705 net.go:770] primary dev: ETH0
I0322 10:45:13.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:45:13.423249  543705 net.go:698] Add success.
I0322 10:45:13.467805  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f6e9da74-30e3-431e-9325-bd19ccd4fdda","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:45:13.467843  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:45:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:45:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:45:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 10:45:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:45:14.456526  543705 disk_worker.go:494] system disk:vda1
I0322 10:45:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:45:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:45:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:45:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:45:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:45:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:45:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:45:23.409777  543705 memory.go:184] no items to output this cycle
I0322 10:45:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 10:45:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:45:33.409785  543705 memory.go:184] no items to output this cycle
I0322 10:45:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 10:45:34.373672  543705 disk_info.go:125] begin check local disk info of client
I0322 10:45:34.376143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:45:34.376149  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369f00 0xc000369f40]
I0322 10:45:39.615774  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:45:39.615781  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:45:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:45:43.410648  543705 memory.go:191] Add success.
I0322 10:45:43.409801  543705 cpu.go:282] Add success.
I0322 10:45:43.420372  543705 net.go:648] Add success.
I0322 10:45:43.423125  543705 net.go:770] primary dev: ETH0
I0322 10:45:43.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:45:43.423155  543705 net.go:698] Add success.
I0322 10:45:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:45:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:45:53.410268  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:45:53.410286  543705 memory.go:184] no items to output this cycle
I0322 10:45:53.410297  543705 cpu.go:275] no items to output this cycle
E0322 10:46:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:46:03.409780  543705 memory.go:184] no items to output this cycle
I0322 10:46:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:46:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:46:13.409811  543705 memory.go:191] Add success.
I0322 10:46:13.409813  543705 cpu.go:282] Add success.
W0322 10:46:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:46:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:46:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:46:13.420172  543705 net.go:648] Add success.
I0322 10:46:13.422780  543705 net.go:770] primary dev: ETH0
I0322 10:46:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:46:13.422810  543705 net.go:698] Add success.
I0322 10:46:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:46:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:46:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 10:46:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:46:14.456608  543705 disk_worker.go:494] system disk:vda1
I0322 10:46:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:46:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:46:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:46:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:46:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:46:23.409780  543705 memory.go:184] no items to output this cycle
I0322 10:46:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 10:46:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:46:33.409848  543705 memory.go:184] no items to output this cycle
I0322 10:46:33.409954  543705 cpu.go:275] no items to output this cycle
I0322 10:46:34.377667  543705 disk_info.go:125] begin check local disk info of client
I0322 10:46:34.380168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:46:34.380173  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368a00 0xc000368a40]
E0322 10:46:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:46:43.410717  543705 memory.go:191] Add success.
I0322 10:46:43.409803  543705 cpu.go:282] Add success.
I0322 10:46:43.420486  543705 net.go:648] Add success.
I0322 10:46:43.423046  543705 net.go:770] primary dev: ETH0
I0322 10:46:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:46:43.423077  543705 net.go:698] Add success.
I0322 10:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:46:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:46:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:46:53.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:46:53.410392  543705 memory.go:184] no items to output this cycle
I0322 10:46:53.410420  543705 cpu.go:275] no items to output this cycle
E0322 10:47:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:47:03.409802  543705 memory.go:184] no items to output this cycle
I0322 10:47:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 10:47:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:47:13.409827  543705 memory.go:191] Add success.
I0322 10:47:13.409832  543705 cpu.go:282] Add success.
W0322 10:47:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:47:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:47:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:47:13.420345  543705 net.go:648] Add success.
I0322 10:47:13.423091  543705 net.go:770] primary dev: ETH0
I0322 10:47:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:47:13.423117  543705 net.go:698] Add success.
I0322 10:47:13.453664  543705 event_worker.go:152] Polling the log file for events...
W0322 10:47:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:47:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 10:47:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:47:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:47:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:47:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:47:14.456559  543705 disk_worker.go:494] system disk:vda1
I0322 10:47:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:47:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:47:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:47:16.457898  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:47:16.457897  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:47:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:47:16.457971  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:47:16.472300  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:47:23.410222  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:47:23.410239  543705 memory.go:184] no items to output this cycle
I0322 10:47:23.410262  543705 cpu.go:275] no items to output this cycle
E0322 10:47:33.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:47:33.409878  543705 memory.go:184] no items to output this cycle
I0322 10:47:33.409925  543705 cpu.go:275] no items to output this cycle
I0322 10:47:34.381670  543705 disk_info.go:125] begin check local disk info of client
I0322 10:47:34.384148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:47:34.384154  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d81c0 0xc0004d8200]
E0322 10:47:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:47:43.411014  543705 memory.go:191] Add success.
I0322 10:47:43.409832  543705 cpu.go:282] Add success.
I0322 10:47:43.419737  543705 net.go:648] Add success.
I0322 10:47:43.422555  543705 net.go:770] primary dev: ETH0
I0322 10:47:43.422570  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:47:43.422584  543705 net.go:698] Add success.
I0322 10:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:47:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:47:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:47:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:47:53.409777  543705 memory.go:184] no items to output this cycle
I0322 10:47:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 10:48:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:48:03.409786  543705 memory.go:184] no items to output this cycle
I0322 10:48:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 10:48:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:48:13.409830  543705 memory.go:191] Add success.
I0322 10:48:13.409837  543705 cpu.go:282] Add success.
W0322 10:48:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:48:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:48:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:48:13.420191  543705 net.go:648] Add success.
I0322 10:48:13.423071  543705 net.go:770] primary dev: ETH0
I0322 10:48:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:48:13.423096  543705 net.go:698] Add success.
I0322 10:48:13.468741  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"908a982b-a449-45ab-a88d-ca02cf2c2b75","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:48:13.468773  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:48:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:48:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 10:48:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:48:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 10:48:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:48:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:48:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:48:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:48:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:48:23.409778  543705 memory.go:184] no items to output this cycle
I0322 10:48:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 10:48:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:48:33.409771  543705 memory.go:184] no items to output this cycle
I0322 10:48:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 10:48:34.385671  543705 disk_info.go:125] begin check local disk info of client
I0322 10:48:34.388148  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:48:34.388154  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb40 0xc00007bb80]
I0322 10:48:39.616781  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:48:39.616788  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:48:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:48:43.410585  543705 memory.go:191] Add success.
I0322 10:48:43.409811  543705 cpu.go:282] Add success.
I0322 10:48:43.420327  543705 net.go:648] Add success.
I0322 10:48:43.423621  543705 net.go:770] primary dev: ETH0
I0322 10:48:43.423634  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:48:43.423647  543705 net.go:698] Add success.
I0322 10:48:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:48:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:48:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:48:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:48:53.409795  543705 memory.go:184] no items to output this cycle
I0322 10:48:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 10:49:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:49:03.409777  543705 memory.go:184] no items to output this cycle
I0322 10:49:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 10:49:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:49:13.409813  543705 memory.go:191] Add success.
I0322 10:49:13.409814  543705 cpu.go:282] Add success.
W0322 10:49:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:49:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:49:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:49:13.420112  543705 net.go:648] Add success.
I0322 10:49:13.422805  543705 net.go:770] primary dev: ETH0
I0322 10:49:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:49:13.422832  543705 net.go:698] Add success.
I0322 10:49:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:49:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:49:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 10:49:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:49:14.456617  543705 disk_worker.go:494] system disk:vda1
I0322 10:49:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:49:15.455938  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:49:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:49:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:49:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:49:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:49:23.409779  543705 memory.go:184] no items to output this cycle
I0322 10:49:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 10:49:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:49:33.409893  543705 cpu.go:275] no items to output this cycle
I0322 10:49:33.409898  543705 memory.go:184] no items to output this cycle
I0322 10:49:34.391996  543705 disk_info.go:125] begin check local disk info of client
I0322 10:49:34.394454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:49:34.394460  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9f00 0xc0004d9f40]
E0322 10:49:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:49:43.410809  543705 memory.go:191] Add success.
I0322 10:49:43.409809  543705 cpu.go:282] Add success.
I0322 10:49:43.420501  543705 net.go:648] Add success.
I0322 10:49:43.423985  543705 net.go:770] primary dev: ETH0
I0322 10:49:43.424000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:49:43.424015  543705 net.go:698] Add success.
I0322 10:49:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:49:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:49:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:49:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:49:53.409766  543705 memory.go:184] no items to output this cycle
I0322 10:49:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:50:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:50:03.409785  543705 memory.go:184] no items to output this cycle
I0322 10:50:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:50:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:50:13.409816  543705 memory.go:191] Add success.
I0322 10:50:13.409835  543705 cpu.go:282] Add success.
W0322 10:50:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:50:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:50:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:50:13.420233  543705 net.go:648] Add success.
I0322 10:50:13.422995  543705 net.go:770] primary dev: ETH0
I0322 10:50:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:50:13.423019  543705 net.go:698] Add success.
I0322 10:50:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:50:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:50:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 10:50:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:50:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 10:50:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:50:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:50:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:50:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:50:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:50:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:50:23.409794  543705 memory.go:184] no items to output this cycle
I0322 10:50:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 10:50:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:50:33.409765  543705 memory.go:184] no items to output this cycle
I0322 10:50:33.409894  543705 cpu.go:275] no items to output this cycle
I0322 10:50:34.397668  543705 disk_info.go:125] begin check local disk info of client
I0322 10:50:34.400204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:50:34.400209  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487a80 0xc000487ac0]
E0322 10:50:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:50:43.410700  543705 memory.go:191] Add success.
I0322 10:50:43.409805  543705 cpu.go:282] Add success.
I0322 10:50:43.420460  543705 net.go:648] Add success.
I0322 10:50:43.423253  543705 net.go:770] primary dev: ETH0
I0322 10:50:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:50:43.423277  543705 net.go:698] Add success.
I0322 10:50:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:50:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:50:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:50:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:50:53.409783  543705 memory.go:184] no items to output this cycle
I0322 10:50:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 10:51:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:51:03.409795  543705 memory.go:184] no items to output this cycle
I0322 10:51:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:51:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:51:13.409837  543705 memory.go:191] Add success.
I0322 10:51:13.409843  543705 cpu.go:282] Add success.
W0322 10:51:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:51:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:51:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:51:13.420129  543705 net.go:648] Add success.
I0322 10:51:13.422852  543705 net.go:770] primary dev: ETH0
I0322 10:51:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:51:13.422878  543705 net.go:698] Add success.
I0322 10:51:13.632162  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9495546a-f947-4801-9449-72ca83001d86","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:51:13.632204  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:51:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:51:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:51:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 10:51:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:51:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 10:51:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:51:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:51:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:51:23.410522  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:51:23.410528  543705 cpu.go:275] no items to output this cycle
I0322 10:51:23.410536  543705 memory.go:184] no items to output this cycle
E0322 10:51:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:51:33.409916  543705 cpu.go:275] no items to output this cycle
I0322 10:51:33.409917  543705 memory.go:184] no items to output this cycle
I0322 10:51:34.401668  543705 disk_info.go:125] begin check local disk info of client
I0322 10:51:34.404193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:51:34.404198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff300 0xc0003ff340]
I0322 10:51:39.617744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:51:39.617751  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:51:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:51:43.410596  543705 memory.go:191] Add success.
I0322 10:51:43.409814  543705 cpu.go:282] Add success.
I0322 10:51:43.420352  543705 net.go:648] Add success.
I0322 10:51:43.422897  543705 net.go:770] primary dev: ETH0
I0322 10:51:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:51:43.422922  543705 net.go:698] Add success.
I0322 10:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:51:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:51:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:51:53.409772  543705 memory.go:184] no items to output this cycle
I0322 10:51:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 10:52:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:52:03.409796  543705 memory.go:184] no items to output this cycle
I0322 10:52:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:52:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:52:13.409810  543705 memory.go:191] Add success.
I0322 10:52:13.409811  543705 cpu.go:282] Add success.
W0322 10:52:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:52:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:52:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:52:13.420625  543705 net.go:648] Add success.
I0322 10:52:13.423394  543705 net.go:770] primary dev: ETH0
I0322 10:52:13.423408  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:52:13.423420  543705 net.go:698] Add success.
W0322 10:52:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:52:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 10:52:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:52:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 10:52:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:52:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:52:14.456544  543705 disk_worker.go:494] system disk:vda1
I0322 10:52:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:52:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:52:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:52:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:52:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:52:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:52:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:52:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:52:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:52:23.409774  543705 memory.go:184] no items to output this cycle
I0322 10:52:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 10:52:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:52:33.409876  543705 memory.go:184] no items to output this cycle
I0322 10:52:33.409978  543705 cpu.go:275] no items to output this cycle
I0322 10:52:34.405667  543705 disk_info.go:125] begin check local disk info of client
I0322 10:52:34.408157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:52:34.408162  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481200 0xc000481240]
E0322 10:52:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:52:43.410702  543705 memory.go:191] Add success.
I0322 10:52:43.409811  543705 cpu.go:282] Add success.
I0322 10:52:43.420464  543705 net.go:648] Add success.
I0322 10:52:43.424018  543705 net.go:770] primary dev: ETH0
I0322 10:52:43.424032  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:52:43.424044  543705 net.go:698] Add success.
I0322 10:52:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:52:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:52:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:52:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:52:53.410252  543705 memory.go:184] no items to output this cycle
I0322 10:52:53.410287  543705 cpu.go:275] no items to output this cycle
E0322 10:53:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:53:03.409782  543705 memory.go:184] no items to output this cycle
I0322 10:53:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 10:53:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:53:13.409826  543705 memory.go:191] Add success.
I0322 10:53:13.409835  543705 cpu.go:282] Add success.
W0322 10:53:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:53:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:53:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:53:13.420165  543705 net.go:648] Add success.
I0322 10:53:13.423179  543705 net.go:770] primary dev: ETH0
I0322 10:53:13.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:53:13.423204  543705 net.go:698] Add success.
I0322 10:53:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:53:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:53:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 10:53:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:53:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 10:53:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:53:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:53:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:53:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:53:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:53:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:53:23.409791  543705 memory.go:184] no items to output this cycle
I0322 10:53:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 10:53:33.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:53:33.409848  543705 memory.go:184] no items to output this cycle
I0322 10:53:33.409956  543705 cpu.go:275] no items to output this cycle
I0322 10:53:34.409678  543705 disk_info.go:125] begin check local disk info of client
I0322 10:53:34.412147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:53:34.412153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fe000 0xc0003fe040]
E0322 10:53:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:53:43.410762  543705 memory.go:191] Add success.
I0322 10:53:43.409796  543705 cpu.go:282] Add success.
I0322 10:53:43.420471  543705 net.go:648] Add success.
I0322 10:53:43.423125  543705 net.go:770] primary dev: ETH0
I0322 10:53:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:53:43.423152  543705 net.go:698] Add success.
I0322 10:53:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:53:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:53:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:53:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:53:53.409784  543705 cpu.go:275] no items to output this cycle
I0322 10:53:53.409789  543705 memory.go:184] no items to output this cycle
E0322 10:54:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:54:03.409807  543705 memory.go:184] no items to output this cycle
I0322 10:54:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 10:54:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:54:13.409786  543705 memory.go:191] Add success.
W0322 10:54:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 10:54:13.409818  543705 cpu.go:282] Add success.
W0322 10:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:54:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:54:13.419703  543705 net.go:648] Add success.
I0322 10:54:13.422411  543705 net.go:770] primary dev: ETH0
I0322 10:54:13.422426  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:54:13.422439  543705 net.go:698] Add success.
I0322 10:54:13.468579  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"71365b26-a333-4ea3-a4d0-f54186f69d22","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:54:13.468613  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 10:54:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:54:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:54:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0322 10:54:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:54:14.456805  543705 disk_worker.go:494] system disk:vda1
I0322 10:54:14.456841  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:54:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:54:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:54:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:54:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:54:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:54:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:54:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 10:54:23.409788  543705 memory.go:184] no items to output this cycle
E0322 10:54:33.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:54:33.409913  543705 cpu.go:275] no items to output this cycle
I0322 10:54:33.409914  543705 memory.go:184] no items to output this cycle
I0322 10:54:34.412792  543705 disk_info.go:125] begin check local disk info of client
I0322 10:54:34.415274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:54:34.415280  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490000 0xc000490040]
I0322 10:54:39.617890  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:54:39.617897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:54:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:54:43.410750  543705 memory.go:191] Add success.
I0322 10:54:43.409826  543705 cpu.go:282] Add success.
I0322 10:54:43.420448  543705 net.go:648] Add success.
I0322 10:54:43.423009  543705 net.go:770] primary dev: ETH0
I0322 10:54:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:54:43.423049  543705 net.go:698] Add success.
I0322 10:54:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:54:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:54:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:54:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:54:53.409764  543705 memory.go:184] no items to output this cycle
I0322 10:54:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 10:55:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:55:03.409777  543705 memory.go:184] no items to output this cycle
I0322 10:55:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 10:55:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:55:13.409793  543705 memory.go:191] Add success.
W0322 10:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 10:55:13.409819  543705 cpu.go:282] Add success.
W0322 10:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:55:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:55:13.420200  543705 net.go:648] Add success.
I0322 10:55:13.422946  543705 net.go:770] primary dev: ETH0
I0322 10:55:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:55:13.422975  543705 net.go:698] Add success.
I0322 10:55:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:55:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:55:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 10:55:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:55:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 10:55:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:55:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:55:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:55:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:55:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:55:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:55:23.409777  543705 memory.go:184] no items to output this cycle
I0322 10:55:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 10:55:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:55:33.409773  543705 memory.go:184] no items to output this cycle
I0322 10:55:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 10:55:34.415786  543705 disk_info.go:125] begin check local disk info of client
I0322 10:55:34.418295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:55:34.418302  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048f2c0 0xc00048f300]
E0322 10:55:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:55:43.410671  543705 memory.go:191] Add success.
I0322 10:55:43.409805  543705 cpu.go:282] Add success.
I0322 10:55:43.420383  543705 net.go:648] Add success.
I0322 10:55:43.422969  543705 net.go:770] primary dev: ETH0
I0322 10:55:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:55:43.423003  543705 net.go:698] Add success.
I0322 10:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:55:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:55:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:55:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:55:53.409794  543705 memory.go:184] no items to output this cycle
I0322 10:55:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 10:56:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:56:03.409779  543705 memory.go:184] no items to output this cycle
I0322 10:56:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 10:56:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:56:13.409801  543705 memory.go:191] Add success.
W0322 10:56:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 10:56:13.409829  543705 cpu.go:282] Add success.
W0322 10:56:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:56:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:56:13.420127  543705 net.go:648] Add success.
I0322 10:56:13.422972  543705 net.go:770] primary dev: ETH0
I0322 10:56:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:56:13.423001  543705 net.go:698] Add success.
I0322 10:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:56:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:56:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 10:56:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:56:14.456634  543705 disk_worker.go:494] system disk:vda1
I0322 10:56:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:56:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:56:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:56:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:56:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:56:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:56:23.409807  543705 memory.go:184] no items to output this cycle
I0322 10:56:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 10:56:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:56:33.409808  543705 memory.go:184] no items to output this cycle
I0322 10:56:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 10:56:34.418785  543705 disk_info.go:125] begin check local disk info of client
I0322 10:56:34.421217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:56:34.421224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bbd40 0xc0003bbd80]
E0322 10:56:43.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:56:43.410709  543705 memory.go:191] Add success.
I0322 10:56:43.409934  543705 cpu.go:282] Add success.
I0322 10:56:43.419708  543705 net.go:648] Add success.
I0322 10:56:43.422380  543705 net.go:770] primary dev: ETH0
I0322 10:56:43.422392  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:56:43.422404  543705 net.go:698] Add success.
I0322 10:56:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:56:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:56:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:56:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:56:53.409788  543705 memory.go:184] no items to output this cycle
I0322 10:56:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 10:57:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:57:03.409784  543705 memory.go:184] no items to output this cycle
I0322 10:57:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 10:57:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:57:13.409797  543705 memory.go:191] Add success.
W0322 10:57:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 10:57:13.409829  543705 cpu.go:282] Add success.
W0322 10:57:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:57:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:57:13.420208  543705 net.go:648] Add success.
I0322 10:57:13.429110  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 10:57:13.429185  543705 net.go:770] primary dev: ETH0
I0322 10:57:13.429197  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:57:13.429208  543705 net.go:698] Add success.
I0322 10:57:13.453729  543705 event_worker.go:152] Polling the log file for events...
I0322 10:57:13.469436  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c7404a57-182f-46f4-9ca9-cb967ed7c005","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 10:57:13.469479  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 10:57:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:57:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 10:57:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0322 10:57:14.456872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0322 10:57:14.456875  543705 disk_worker.go:494] system disk:vda1
E0322 10:57:14.456881  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 10:57:14.456887  543705 custom_config.go:64] query custom config with name: gpu
I0322 10:57:14.456916  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 10:57:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 10:57:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:57:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 10:57:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 10:57:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:57:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:57:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:57:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:57:23.409764  543705 memory.go:184] no items to output this cycle
I0322 10:57:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 10:57:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:57:33.409784  543705 memory.go:184] no items to output this cycle
I0322 10:57:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 10:57:34.421790  543705 disk_info.go:125] begin check local disk info of client
I0322 10:57:34.424307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:57:34.424311  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491500 0xc000491540]
I0322 10:57:39.618785  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 10:57:39.618791  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 10:57:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:57:43.410739  543705 memory.go:191] Add success.
I0322 10:57:43.409800  543705 cpu.go:282] Add success.
I0322 10:57:43.419716  543705 net.go:648] Add success.
I0322 10:57:43.422457  543705 net.go:770] primary dev: ETH0
I0322 10:57:43.422469  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:57:43.422481  543705 net.go:698] Add success.
I0322 10:57:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:57:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:57:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:57:53.409773  543705 memory.go:184] no items to output this cycle
I0322 10:57:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 10:58:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:58:03.409793  543705 memory.go:184] no items to output this cycle
I0322 10:58:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 10:58:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:58:13.409824  543705 memory.go:191] Add success.
I0322 10:58:13.409831  543705 cpu.go:282] Add success.
W0322 10:58:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:58:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:58:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:58:13.420216  543705 net.go:648] Add success.
I0322 10:58:13.423087  543705 net.go:770] primary dev: ETH0
I0322 10:58:13.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:58:13.423115  543705 net.go:698] Add success.
I0322 10:58:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:58:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:58:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 10:58:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:58:14.456595  543705 disk_worker.go:494] system disk:vda1
I0322 10:58:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:58:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:58:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:58:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:58:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:58:23.409792  543705 memory.go:184] no items to output this cycle
I0322 10:58:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 10:58:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:58:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 10:58:33.409795  543705 memory.go:184] no items to output this cycle
I0322 10:58:34.424813  543705 disk_info.go:125] begin check local disk info of client
I0322 10:58:34.427310  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:58:34.427316  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4b80 0xc0002a4bc0]
E0322 10:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:58:43.410772  543705 memory.go:191] Add success.
I0322 10:58:43.409816  543705 cpu.go:282] Add success.
I0322 10:58:43.420783  543705 net.go:648] Add success.
I0322 10:58:43.423373  543705 net.go:770] primary dev: ETH0
I0322 10:58:43.423397  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:58:43.423409  543705 net.go:698] Add success.
I0322 10:58:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:58:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:58:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:58:53.409797  543705 memory.go:184] no items to output this cycle
I0322 10:58:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 10:59:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:59:03.409788  543705 memory.go:184] no items to output this cycle
I0322 10:59:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 10:59:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:59:13.409826  543705 memory.go:191] Add success.
I0322 10:59:13.409835  543705 cpu.go:282] Add success.
W0322 10:59:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 10:59:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 10:59:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 10:59:13.420201  543705 net.go:648] Add success.
I0322 10:59:13.423092  543705 net.go:770] primary dev: ETH0
I0322 10:59:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:59:13.423118  543705 net.go:698] Add success.
I0322 10:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 10:59:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 10:59:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 10:59:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 10:59:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 10:59:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 10:59:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 10:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:59:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:59:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 10:59:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 10:59:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:59:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 10:59:23.409790  543705 memory.go:184] no items to output this cycle
E0322 10:59:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:59:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 10:59:33.409801  543705 memory.go:184] no items to output this cycle
I0322 10:59:34.427856  543705 disk_info.go:125] begin check local disk info of client
I0322 10:59:34.430338  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 10:59:34.430343  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327640 0xc000327680]
E0322 10:59:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:59:43.410662  543705 memory.go:191] Add success.
I0322 10:59:43.409813  543705 cpu.go:282] Add success.
I0322 10:59:43.420487  543705 net.go:648] Add success.
I0322 10:59:43.423686  543705 net.go:770] primary dev: ETH0
I0322 10:59:43.423698  543705 net.go:802] Send network stats successfully!,count is 6
I0322 10:59:43.423710  543705 net.go:698] Add success.
I0322 10:59:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 10:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 10:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 10:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 10:59:53.409779  543705 memory.go:184] no items to output this cycle
I0322 10:59:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 11:00:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:00:03.409796  543705 memory.go:184] no items to output this cycle
I0322 11:00:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 11:00:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:00:13.409832  543705 memory.go:191] Add success.
I0322 11:00:13.409835  543705 cpu.go:282] Add success.
W0322 11:00:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:00:13.412481  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:00:13.412487  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:00:13.420198  543705 net.go:648] Add success.
I0322 11:00:13.422117  543705 net.go:770] primary dev: ETH0
I0322 11:00:13.422133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:00:13.422147  543705 net.go:698] Add success.
I0322 11:00:13.690998  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e2835463-7948-44ce-bb72-a62834b74b55","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:00:13.691034  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:00:14.454728  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:00:14.454944  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:00:14.454956  543705 disk_worker.go:708] disk space is not compliant
W0322 11:00:14.454959  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:00:14.456550  543705 disk_worker.go:494] system disk:vda1
I0322 11:00:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:00:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:00:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:00:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:00:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:00:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:00:23.409800  543705 memory.go:184] no items to output this cycle
I0322 11:00:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 11:00:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:00:33.409783  543705 memory.go:184] no items to output this cycle
I0322 11:00:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 11:00:34.430845  543705 disk_info.go:125] begin check local disk info of client
I0322 11:00:34.433357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:00:34.433362  543705 disk_info.go:196] parse disk info done, disk is : [0xc000503380 0xc0005033c0]
I0322 11:00:39.618927  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:00:39.618933  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:00:43.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:00:43.410764  543705 memory.go:191] Add success.
I0322 11:00:43.410047  543705 cpu.go:282] Add success.
I0322 11:00:43.419724  543705 net.go:648] Add success.
I0322 11:00:43.422431  543705 net.go:770] primary dev: ETH0
I0322 11:00:43.422445  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:00:43.422458  543705 net.go:698] Add success.
I0322 11:00:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:00:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:00:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:00:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:00:53.409779  543705 memory.go:184] no items to output this cycle
I0322 11:00:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 11:01:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:01:03.409791  543705 cpu.go:275] no items to output this cycle
I0322 11:01:03.409795  543705 memory.go:184] no items to output this cycle
E0322 11:01:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:01:13.409816  543705 memory.go:191] Add success.
W0322 11:01:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:01:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:01:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:01:13.409886  543705 cpu.go:282] Add success.
I0322 11:01:13.420531  543705 net.go:648] Add success.
I0322 11:01:13.427118  543705 net.go:770] primary dev: ETH0
I0322 11:01:13.427136  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:01:13.427155  543705 net.go:698] Add success.
I0322 11:01:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:01:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:01:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 11:01:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:01:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 11:01:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:01:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:01:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:01:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:01:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:01:23.410268  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:01:23.410288  543705 memory.go:184] no items to output this cycle
I0322 11:01:23.410300  543705 cpu.go:275] no items to output this cycle
E0322 11:01:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:01:33.409779  543705 memory.go:184] no items to output this cycle
I0322 11:01:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 11:01:34.433845  543705 disk_info.go:125] begin check local disk info of client
I0322 11:01:34.436318  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:01:34.436323  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270000 0xc000270040]
E0322 11:01:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:01:43.410825  543705 memory.go:191] Add success.
I0322 11:01:43.409821  543705 cpu.go:282] Add success.
I0322 11:01:43.420597  543705 net.go:648] Add success.
I0322 11:01:43.423788  543705 net.go:770] primary dev: ETH0
I0322 11:01:43.423803  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:01:43.423817  543705 net.go:698] Add success.
I0322 11:01:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:01:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:01:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:01:53.409801  543705 memory.go:184] no items to output this cycle
I0322 11:01:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 11:02:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:02:03.409792  543705 memory.go:184] no items to output this cycle
I0322 11:02:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:02:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:02:13.409834  543705 memory.go:191] Add success.
I0322 11:02:13.409844  543705 cpu.go:282] Add success.
W0322 11:02:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:02:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:02:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:02:13.420399  543705 net.go:648] Add success.
I0322 11:02:13.423601  543705 net.go:770] primary dev: ETH0
I0322 11:02:13.423620  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:02:13.423649  543705 net.go:698] Add success.
W0322 11:02:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:02:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 11:02:14.455218  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:02:14.456037  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:02:14.456047  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:02:14.456053  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:02:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 11:02:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:02:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:02:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:02:16.457894  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:02:16.457903  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:02:16.457946  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:02:16.457963  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:02:16.472297  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:02:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:02:23.409769  543705 memory.go:184] no items to output this cycle
I0322 11:02:23.409901  543705 cpu.go:275] no items to output this cycle
E0322 11:02:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:02:33.409791  543705 memory.go:184] no items to output this cycle
I0322 11:02:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 11:02:34.436883  543705 disk_info.go:125] begin check local disk info of client
I0322 11:02:34.439387  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:02:34.439394  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049b740 0xc00049b780]
E0322 11:02:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:02:43.410644  543705 memory.go:191] Add success.
I0322 11:02:43.409820  543705 cpu.go:282] Add success.
I0322 11:02:43.420338  543705 net.go:648] Add success.
I0322 11:02:43.423151  543705 net.go:770] primary dev: ETH0
I0322 11:02:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:02:43.423175  543705 net.go:698] Add success.
I0322 11:02:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:02:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:02:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:02:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:02:53.409808  543705 cpu.go:275] no items to output this cycle
I0322 11:02:53.409818  543705 memory.go:184] no items to output this cycle
E0322 11:03:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:03:03.409818  543705 memory.go:184] no items to output this cycle
I0322 11:03:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 11:03:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:03:13.409836  543705 cpu.go:282] Add success.
I0322 11:03:13.409860  543705 memory.go:191] Add success.
W0322 11:03:13.409898  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:03:13.409920  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:03:13.409925  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:03:13.420280  543705 net.go:648] Add success.
I0322 11:03:13.423294  543705 net.go:770] primary dev: ETH0
I0322 11:03:13.423308  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:03:13.423321  543705 net.go:698] Add success.
I0322 11:03:13.467686  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"792e8274-f42d-4337-91fa-42b1fca64ad6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:03:13.467727  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:03:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:03:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:03:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 11:03:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:03:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 11:03:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:03:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:03:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:03:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:03:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:03:23.409787  543705 memory.go:184] no items to output this cycle
I0322 11:03:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 11:03:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:03:33.409790  543705 memory.go:184] no items to output this cycle
I0322 11:03:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 11:03:34.439889  543705 disk_info.go:125] begin check local disk info of client
I0322 11:03:34.442390  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:03:34.442395  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582d40 0xc000582d80]
I0322 11:03:39.619074  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:03:39.619080  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:03:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:03:43.410859  543705 memory.go:191] Add success.
I0322 11:03:43.409798  543705 cpu.go:282] Add success.
I0322 11:03:43.420547  543705 net.go:648] Add success.
I0322 11:03:43.423397  543705 net.go:770] primary dev: ETH0
I0322 11:03:43.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:03:43.423426  543705 net.go:698] Add success.
I0322 11:03:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:03:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:03:53.410196  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:03:53.410213  543705 memory.go:184] no items to output this cycle
I0322 11:03:53.410234  543705 cpu.go:275] no items to output this cycle
E0322 11:04:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:04:03.409781  543705 memory.go:184] no items to output this cycle
I0322 11:04:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 11:04:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:04:13.409797  543705 memory.go:191] Add success.
W0322 11:04:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:04:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:04:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:04:13.409872  543705 cpu.go:282] Add success.
I0322 11:04:13.420314  543705 net.go:648] Add success.
I0322 11:04:13.421299  543705 net.go:770] primary dev: ETH0
I0322 11:04:13.421313  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:04:13.421326  543705 net.go:698] Add success.
I0322 11:04:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:04:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:04:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 11:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:04:14.457055  543705 disk_worker.go:494] system disk:vda1
I0322 11:04:14.457088  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:04:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:04:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:04:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:04:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:04:23.409778  543705 cpu.go:275] no items to output this cycle
I0322 11:04:23.409788  543705 memory.go:184] no items to output this cycle
E0322 11:04:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:04:33.409794  543705 memory.go:184] no items to output this cycle
I0322 11:04:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 11:04:34.442897  543705 disk_info.go:125] begin check local disk info of client
I0322 11:04:34.445422  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:04:34.445428  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582f80 0xc000582fc0]
E0322 11:04:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:04:43.410648  543705 memory.go:191] Add success.
I0322 11:04:43.409808  543705 cpu.go:282] Add success.
I0322 11:04:43.420351  543705 net.go:648] Add success.
I0322 11:04:43.423038  543705 net.go:770] primary dev: ETH0
I0322 11:04:43.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:04:43.423067  543705 net.go:698] Add success.
I0322 11:04:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:04:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:04:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:04:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:04:53.409775  543705 memory.go:184] no items to output this cycle
I0322 11:04:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 11:05:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:05:03.409771  543705 memory.go:184] no items to output this cycle
I0322 11:05:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 11:05:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:05:13.409801  543705 memory.go:191] Add success.
W0322 11:05:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:05:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:05:13.409853  543705 cpu.go:282] Add success.
I0322 11:05:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:05:13.420250  543705 net.go:648] Add success.
I0322 11:05:13.422918  543705 net.go:770] primary dev: ETH0
I0322 11:05:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:05:13.422948  543705 net.go:698] Add success.
I0322 11:05:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:05:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:05:14.455350  543705 disk_worker.go:708] disk space is not compliant
W0322 11:05:14.455356  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:05:14.457108  543705 disk_worker.go:494] system disk:vda1
I0322 11:05:14.457141  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:05:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:05:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:05:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:05:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:05:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:05:23.409777  543705 memory.go:184] no items to output this cycle
I0322 11:05:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 11:05:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:05:33.409772  543705 memory.go:184] no items to output this cycle
I0322 11:05:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 11:05:34.445920  543705 disk_info.go:125] begin check local disk info of client
I0322 11:05:34.448459  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:05:34.448464  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007acc0 0xc00007ad00]
E0322 11:05:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:05:43.410720  543705 memory.go:191] Add success.
I0322 11:05:43.409809  543705 cpu.go:282] Add success.
I0322 11:05:43.420462  543705 net.go:648] Add success.
I0322 11:05:43.422965  543705 net.go:770] primary dev: ETH0
I0322 11:05:43.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:05:43.422991  543705 net.go:698] Add success.
I0322 11:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:05:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:05:53.409764  543705 memory.go:184] no items to output this cycle
I0322 11:05:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:06:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:06:03.409775  543705 memory.go:184] no items to output this cycle
I0322 11:06:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 11:06:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:06:13.409781  543705 memory.go:191] Add success.
W0322 11:06:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:06:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:06:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:06:13.409835  543705 cpu.go:282] Add success.
I0322 11:06:13.420361  543705 net.go:648] Add success.
I0322 11:06:13.421235  543705 net.go:770] primary dev: ETH0
I0322 11:06:13.421250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:06:13.421263  543705 net.go:698] Add success.
I0322 11:06:13.561084  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"366c2e92-a58a-42cb-aea8-b055d88a8a13","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:06:13.561270  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 11:06:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
I0322 11:06:14.455160  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:06:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 11:06:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:06:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 11:06:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:06:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:06:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:06:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:06:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:06:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:06:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:06:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 11:06:23.409786  543705 memory.go:184] no items to output this cycle
E0322 11:06:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:06:33.409779  543705 memory.go:184] no items to output this cycle
I0322 11:06:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 11:06:34.448934  543705 disk_info.go:125] begin check local disk info of client
I0322 11:06:34.451492  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:06:34.451499  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036be80 0xc00036bec0]
I0322 11:06:39.619215  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:06:39.619221  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:06:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:06:43.410703  543705 memory.go:191] Add success.
I0322 11:06:43.409797  543705 cpu.go:282] Add success.
I0322 11:06:43.420477  543705 net.go:648] Add success.
I0322 11:06:43.423576  543705 net.go:770] primary dev: ETH0
I0322 11:06:43.423589  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:06:43.423602  543705 net.go:698] Add success.
I0322 11:06:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:06:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:06:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:06:53.409765  543705 memory.go:184] no items to output this cycle
I0322 11:06:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 11:07:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:07:03.409794  543705 memory.go:184] no items to output this cycle
I0322 11:07:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 11:07:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:07:13.409789  543705 memory.go:191] Add success.
I0322 11:07:13.409796  543705 cpu.go:282] Add success.
W0322 11:07:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:07:13.412639  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:07:13.412643  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:07:13.420322  543705 net.go:648] Add success.
I0322 11:07:13.422272  543705 net.go:770] primary dev: ETH0
I0322 11:07:13.422287  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:07:13.422301  543705 net.go:698] Add success.
I0322 11:07:13.452848  543705 event_worker.go:152] Polling the log file for events...
W0322 11:07:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:07:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 11:07:14.455215  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:07:14.456548  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:07:14.456558  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:07:14.456565  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:07:14.457801  543705 disk_worker.go:494] system disk:vda1
I0322 11:07:14.457845  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:07:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:07:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:07:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:07:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:07:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:07:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:07:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:07:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:07:23.409783  543705 memory.go:184] no items to output this cycle
I0322 11:07:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:07:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:07:33.409783  543705 memory.go:184] no items to output this cycle
I0322 11:07:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 11:07:34.451948  543705 disk_info.go:125] begin check local disk info of client
I0322 11:07:34.454430  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:07:34.454435  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a37c0 0xc0004a3800]
E0322 11:07:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:07:43.410652  543705 memory.go:191] Add success.
I0322 11:07:43.409802  543705 cpu.go:282] Add success.
I0322 11:07:43.420367  543705 net.go:648] Add success.
I0322 11:07:43.423282  543705 net.go:770] primary dev: ETH0
I0322 11:07:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:07:43.423310  543705 net.go:698] Add success.
I0322 11:07:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:07:53.410363  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:07:53.410380  543705 memory.go:184] no items to output this cycle
I0322 11:07:53.410392  543705 cpu.go:275] no items to output this cycle
E0322 11:08:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:08:03.409804  543705 memory.go:184] no items to output this cycle
I0322 11:08:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 11:08:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:08:13.409790  543705 memory.go:191] Add success.
I0322 11:08:13.409809  543705 cpu.go:282] Add success.
W0322 11:08:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:08:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:08:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:08:13.420114  543705 net.go:648] Add success.
I0322 11:08:13.422926  543705 net.go:770] primary dev: ETH0
I0322 11:08:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:08:13.422951  543705 net.go:698] Add success.
I0322 11:08:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:08:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:08:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 11:08:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:08:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 11:08:14.456731  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:08:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:08:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:08:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:08:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:08:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:08:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:08:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 11:08:23.409795  543705 memory.go:184] no items to output this cycle
E0322 11:08:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:08:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 11:08:33.409787  543705 memory.go:184] no items to output this cycle
I0322 11:08:34.454971  543705 disk_info.go:125] begin check local disk info of client
I0322 11:08:34.457479  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:08:34.457484  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582440 0xc000582480]
E0322 11:08:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:08:43.410542  543705 memory.go:191] Add success.
I0322 11:08:43.409795  543705 cpu.go:282] Add success.
I0322 11:08:43.420253  543705 net.go:648] Add success.
I0322 11:08:43.422737  543705 net.go:770] primary dev: ETH0
I0322 11:08:43.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:08:43.422763  543705 net.go:698] Add success.
I0322 11:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:08:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:08:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:08:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:08:53.409791  543705 memory.go:184] no items to output this cycle
I0322 11:08:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 11:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:09:03.409781  543705 memory.go:184] no items to output this cycle
I0322 11:09:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 11:09:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:09:13.409787  543705 memory.go:191] Add success.
I0322 11:09:13.409789  543705 cpu.go:282] Add success.
W0322 11:09:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:09:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:09:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:09:13.420061  543705 net.go:648] Add success.
I0322 11:09:13.422995  543705 net.go:770] primary dev: ETH0
I0322 11:09:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:09:13.423020  543705 net.go:698] Add success.
I0322 11:09:13.470637  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"85f71239-37d6-429a-b523-17bc32cd7995","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:09:13.470671  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:09:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:09:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:09:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0322 11:09:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:09:14.456690  543705 disk_worker.go:494] system disk:vda1
I0322 11:09:14.456728  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:09:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:09:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:09:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:09:23.409804  543705 memory.go:184] no items to output this cycle
I0322 11:09:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 11:09:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:09:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 11:09:33.409797  543705 memory.go:184] no items to output this cycle
I0322 11:09:34.457981  543705 disk_info.go:125] begin check local disk info of client
I0322 11:09:34.460434  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:09:34.460440  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472880 0xc0004728c0]
I0322 11:09:39.619793  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:09:39.619800  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:09:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:09:43.410619  543705 memory.go:191] Add success.
I0322 11:09:43.409813  543705 cpu.go:282] Add success.
I0322 11:09:43.420397  543705 net.go:648] Add success.
I0322 11:09:43.423047  543705 net.go:770] primary dev: ETH0
I0322 11:09:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:09:43.423078  543705 net.go:698] Add success.
I0322 11:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:09:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:09:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:09:53.409766  543705 memory.go:184] no items to output this cycle
I0322 11:09:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:10:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:10:03.409785  543705 memory.go:184] no items to output this cycle
I0322 11:10:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 11:10:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:10:13.409803  543705 memory.go:191] Add success.
I0322 11:10:13.409813  543705 cpu.go:282] Add success.
W0322 11:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:10:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:10:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:10:13.420202  543705 net.go:648] Add success.
I0322 11:10:13.422952  543705 net.go:770] primary dev: ETH0
I0322 11:10:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:10:13.422980  543705 net.go:698] Add success.
I0322 11:10:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:10:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:10:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0322 11:10:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:10:14.456650  543705 disk_worker.go:494] system disk:vda1
I0322 11:10:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:10:16.458028  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:10:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:10:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:10:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:10:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:10:23.409776  543705 memory.go:184] no items to output this cycle
I0322 11:10:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 11:10:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:10:33.409794  543705 memory.go:184] no items to output this cycle
I0322 11:10:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 11:10:34.460994  543705 disk_info.go:125] begin check local disk info of client
I0322 11:10:34.463532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:10:34.463538  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a25c0 0xc0004a2600]
E0322 11:10:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:10:43.410716  543705 memory.go:191] Add success.
I0322 11:10:43.409810  543705 cpu.go:282] Add success.
I0322 11:10:43.420446  543705 net.go:648] Add success.
I0322 11:10:43.423344  543705 net.go:770] primary dev: ETH0
I0322 11:10:43.423357  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:10:43.423370  543705 net.go:698] Add success.
I0322 11:10:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:10:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:10:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:10:53.410270  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:10:53.410288  543705 memory.go:184] no items to output this cycle
I0322 11:10:53.410296  543705 cpu.go:275] no items to output this cycle
E0322 11:11:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:11:03.409807  543705 memory.go:184] no items to output this cycle
I0322 11:11:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 11:11:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:11:13.409776  543705 memory.go:191] Add success.
W0322 11:11:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:11:13.409804  543705 cpu.go:282] Add success.
W0322 11:11:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:11:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:11:13.420079  543705 net.go:648] Add success.
I0322 11:11:13.422934  543705 net.go:770] primary dev: ETH0
I0322 11:11:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:11:13.422964  543705 net.go:698] Add success.
I0322 11:11:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:11:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:11:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 11:11:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:11:14.456524  543705 disk_worker.go:494] system disk:vda1
I0322 11:11:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:11:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:11:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:11:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:11:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:11:23.409791  543705 memory.go:184] no items to output this cycle
I0322 11:11:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 11:11:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:11:33.409778  543705 memory.go:184] no items to output this cycle
I0322 11:11:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 11:11:34.464008  543705 disk_info.go:125] begin check local disk info of client
I0322 11:11:34.466569  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:11:34.466575  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e400 0xc00049e440]
E0322 11:11:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:11:43.410621  543705 memory.go:191] Add success.
I0322 11:11:43.409799  543705 cpu.go:282] Add success.
I0322 11:11:43.420426  543705 net.go:648] Add success.
I0322 11:11:43.422953  543705 net.go:770] primary dev: ETH0
I0322 11:11:43.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:11:43.422983  543705 net.go:698] Add success.
I0322 11:11:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:11:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:11:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:11:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:11:53.409777  543705 memory.go:184] no items to output this cycle
I0322 11:11:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 11:12:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:12:03.409774  543705 memory.go:184] no items to output this cycle
I0322 11:12:03.409799  543705 cpu.go:275] no items to output this cycle
W0322 11:12:13.409699  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:12:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:12:13.409719  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 11:12:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:12:13.409811  543705 memory.go:191] Add success.
I0322 11:12:13.409816  543705 cpu.go:282] Add success.
I0322 11:12:13.420058  543705 net.go:648] Add success.
I0322 11:12:13.422723  543705 net.go:770] primary dev: ETH0
I0322 11:12:13.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:12:13.422754  543705 net.go:698] Add success.
I0322 11:12:13.562550  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dd55a428-a545-43fe-a215-f7875573110e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:12:13.562586  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 11:12:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:12:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 11:12:14.455225  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:12:14.457014  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:12:14.457025  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:12:14.457031  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:12:14.457032  543705 disk_worker.go:494] system disk:vda1
I0322 11:12:14.457082  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:12:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:12:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:12:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:12:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:12:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:12:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:12:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:12:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:12:23.409799  543705 memory.go:184] no items to output this cycle
I0322 11:12:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 11:12:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:12:33.409779  543705 memory.go:184] no items to output this cycle
I0322 11:12:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 11:12:34.467023  543705 disk_info.go:125] begin check local disk info of client
I0322 11:12:34.469563  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:12:34.469568  543705 disk_info.go:196] parse disk info done, disk is : [0xc000347580 0xc0003475c0]
I0322 11:12:39.619936  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:12:39.619942  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:12:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:12:43.410628  543705 memory.go:191] Add success.
I0322 11:12:43.409824  543705 cpu.go:282] Add success.
I0322 11:12:43.420336  543705 net.go:648] Add success.
I0322 11:12:43.423056  543705 net.go:770] primary dev: ETH0
I0322 11:12:43.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:12:43.423082  543705 net.go:698] Add success.
I0322 11:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:12:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:12:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:12:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:12:53.409768  543705 memory.go:184] no items to output this cycle
I0322 11:12:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:13:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:13:03.409810  543705 memory.go:184] no items to output this cycle
I0322 11:13:03.409794  543705 cpu.go:275] no items to output this cycle
W0322 11:13:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:13:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:13:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:13:13.409785  543705 cpu.go:282] Add success.
E0322 11:13:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:13:13.409851  543705 memory.go:191] Add success.
I0322 11:13:13.420005  543705 net.go:648] Add success.
I0322 11:13:13.422808  543705 net.go:770] primary dev: ETH0
I0322 11:13:13.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:13:13.422836  543705 net.go:698] Add success.
I0322 11:13:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:13:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:13:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0322 11:13:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:13:14.456692  543705 disk_worker.go:494] system disk:vda1
I0322 11:13:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:13:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:13:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:13:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:13:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:13:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:13:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 11:13:23.409884  543705 memory.go:184] no items to output this cycle
E0322 11:13:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:13:33.409768  543705 memory.go:184] no items to output this cycle
I0322 11:13:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 11:13:34.470050  543705 disk_info.go:125] begin check local disk info of client
I0322 11:13:34.472545  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:13:34.472552  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0322 11:13:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:13:43.410616  543705 memory.go:191] Add success.
I0322 11:13:43.409804  543705 cpu.go:282] Add success.
I0322 11:13:43.420319  543705 net.go:648] Add success.
I0322 11:13:43.423257  543705 net.go:770] primary dev: ETH0
I0322 11:13:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:13:43.423282  543705 net.go:698] Add success.
I0322 11:13:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:13:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:13:53.410397  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:13:53.410416  543705 memory.go:184] no items to output this cycle
I0322 11:13:53.410426  543705 cpu.go:275] no items to output this cycle
E0322 11:14:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:14:03.409795  543705 memory.go:184] no items to output this cycle
I0322 11:14:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 11:14:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:14:13.409801  543705 memory.go:191] Add success.
I0322 11:14:13.409803  543705 cpu.go:282] Add success.
W0322 11:14:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:14:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:14:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:14:13.420111  543705 net.go:648] Add success.
I0322 11:14:13.423144  543705 net.go:770] primary dev: ETH0
I0322 11:14:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:14:13.423168  543705 net.go:698] Add success.
I0322 11:14:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:14:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:14:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 11:14:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:14:14.456626  543705 disk_worker.go:494] system disk:vda1
I0322 11:14:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:14:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:14:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:14:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:14:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:14:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:14:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:14:23.409790  543705 memory.go:184] no items to output this cycle
I0322 11:14:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 11:14:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:14:33.409860  543705 memory.go:184] no items to output this cycle
I0322 11:14:33.409972  543705 cpu.go:275] no items to output this cycle
I0322 11:14:34.473050  543705 disk_info.go:125] begin check local disk info of client
I0322 11:14:34.475576  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:14:34.475582  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005e8040 0xc0005e8080]
E0322 11:14:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:14:43.410603  543705 memory.go:191] Add success.
I0322 11:14:43.409824  543705 cpu.go:282] Add success.
I0322 11:14:43.420337  543705 net.go:648] Add success.
I0322 11:14:43.422914  543705 net.go:770] primary dev: ETH0
I0322 11:14:43.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:14:43.422940  543705 net.go:698] Add success.
I0322 11:14:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:14:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:14:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:14:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:14:53.409793  543705 memory.go:184] no items to output this cycle
I0322 11:14:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 11:15:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:15:03.409779  543705 memory.go:184] no items to output this cycle
I0322 11:15:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 11:15:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:15:13.409793  543705 memory.go:191] Add success.
I0322 11:15:13.409796  543705 cpu.go:282] Add success.
W0322 11:15:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:15:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:15:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:15:13.420077  543705 net.go:648] Add success.
I0322 11:15:13.422918  543705 net.go:770] primary dev: ETH0
I0322 11:15:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:15:13.422949  543705 net.go:698] Add success.
I0322 11:15:13.512566  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"88281bc3-9fe5-4ec7-a164-10d2c06c103d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:15:13.512600  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:15:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:15:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:15:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 11:15:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:15:14.456618  543705 disk_worker.go:494] system disk:vda1
I0322 11:15:14.456668  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:15:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:15:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:15:16.472500  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:15:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:15:23.409798  543705 memory.go:184] no items to output this cycle
I0322 11:15:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 11:15:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:15:33.409779  543705 memory.go:184] no items to output this cycle
I0322 11:15:33.409918  543705 cpu.go:275] no items to output this cycle
I0322 11:15:34.476071  543705 disk_info.go:125] begin check local disk info of client
I0322 11:15:34.478610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:15:34.478616  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2000 0xc0004a2080]
I0322 11:15:39.620795  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:15:39.620802  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:15:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:15:43.410742  543705 memory.go:191] Add success.
I0322 11:15:43.409789  543705 cpu.go:282] Add success.
I0322 11:15:43.420431  543705 net.go:648] Add success.
I0322 11:15:43.423316  543705 net.go:770] primary dev: ETH0
I0322 11:15:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:15:43.423341  543705 net.go:698] Add success.
I0322 11:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:15:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:15:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:15:53.409778  543705 memory.go:184] no items to output this cycle
I0322 11:15:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:16:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:16:03.409820  543705 memory.go:184] no items to output this cycle
I0322 11:16:03.409836  543705 cpu.go:275] no items to output this cycle
E0322 11:16:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:16:13.409787  543705 cpu.go:282] Add success.
I0322 11:16:13.409799  543705 memory.go:191] Add success.
W0322 11:16:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:16:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:16:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:16:13.420056  543705 net.go:648] Add success.
I0322 11:16:13.422936  543705 net.go:770] primary dev: ETH0
I0322 11:16:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:16:13.422976  543705 net.go:698] Add success.
I0322 11:16:14.453989  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:16:14.454181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:16:14.454257  543705 disk_worker.go:708] disk space is not compliant
W0322 11:16:14.454261  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:16:14.455684  543705 disk_worker.go:494] system disk:vda1
I0322 11:16:14.455718  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:16:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:16:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:16:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:16:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:16:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:16:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:16:23.409795  543705 memory.go:184] no items to output this cycle
I0322 11:16:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 11:16:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:16:33.409801  543705 memory.go:184] no items to output this cycle
I0322 11:16:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 11:16:34.479082  543705 disk_info.go:125] begin check local disk info of client
I0322 11:16:34.481625  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:16:34.481630  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270740 0xc000270780]
E0322 11:16:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:16:43.409791  543705 cpu.go:282] Add success.
I0322 11:16:43.410821  543705 memory.go:191] Add success.
I0322 11:16:43.419677  543705 net.go:648] Add success.
I0322 11:16:43.422330  543705 net.go:770] primary dev: ETH0
I0322 11:16:43.422343  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:16:43.422354  543705 net.go:698] Add success.
I0322 11:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:16:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:16:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:16:53.410370  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:16:53.410384  543705 memory.go:184] no items to output this cycle
I0322 11:16:53.410399  543705 cpu.go:275] no items to output this cycle
E0322 11:17:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:17:03.409782  543705 memory.go:184] no items to output this cycle
I0322 11:17:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 11:17:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:17:13.409788  543705 memory.go:191] Add success.
I0322 11:17:13.409808  543705 cpu.go:282] Add success.
W0322 11:17:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:17:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:17:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:17:13.420157  543705 net.go:648] Add success.
I0322 11:17:13.422923  543705 net.go:770] primary dev: ETH0
I0322 11:17:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:17:13.422952  543705 net.go:698] Add success.
I0322 11:17:13.453507  543705 event_worker.go:152] Polling the log file for events...
W0322 11:17:14.455229  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:17:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0322 11:17:14.455247  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:17:14.456105  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:17:14.456115  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:17:14.456122  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:17:14.456651  543705 disk_worker.go:494] system disk:vda1
I0322 11:17:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:17:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:17:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:17:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:17:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:17:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:17:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:17:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:17:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:17:23.409795  543705 memory.go:184] no items to output this cycle
I0322 11:17:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 11:17:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:17:33.409784  543705 memory.go:184] no items to output this cycle
I0322 11:17:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 11:17:34.482102  543705 disk_info.go:125] begin check local disk info of client
I0322 11:17:34.484570  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:17:34.484576  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a31c0 0xc0004a3200]
E0322 11:17:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:17:43.410669  543705 memory.go:191] Add success.
I0322 11:17:43.409809  543705 cpu.go:282] Add success.
I0322 11:17:43.420722  543705 net.go:648] Add success.
I0322 11:17:43.423519  543705 net.go:770] primary dev: ETH0
I0322 11:17:43.423532  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:17:43.423543  543705 net.go:698] Add success.
I0322 11:17:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:17:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:17:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:17:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:17:53.409785  543705 cpu.go:275] no items to output this cycle
I0322 11:17:53.409788  543705 memory.go:184] no items to output this cycle
E0322 11:18:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:18:03.409785  543705 memory.go:184] no items to output this cycle
I0322 11:18:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 11:18:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:18:13.409795  543705 memory.go:191] Add success.
I0322 11:18:13.409797  543705 cpu.go:282] Add success.
W0322 11:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:18:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:18:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:18:13.420129  543705 net.go:648] Add success.
I0322 11:18:13.423034  543705 net.go:770] primary dev: ETH0
I0322 11:18:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:18:13.423059  543705 net.go:698] Add success.
I0322 11:18:13.463671  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0977b0b5-6af6-4667-aaeb-6c5835328396","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:18:13.463704  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:18:14.455008  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:18:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:18:14.455262  543705 disk_worker.go:708] disk space is not compliant
W0322 11:18:14.455267  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:18:14.457339  543705 disk_worker.go:494] system disk:vda1
I0322 11:18:14.457394  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:18:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:18:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:18:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:18:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:18:16.472490  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:18:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:18:23.409768  543705 memory.go:184] no items to output this cycle
I0322 11:18:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 11:18:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:18:33.409783  543705 memory.go:184] no items to output this cycle
I0322 11:18:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 11:18:34.485107  543705 disk_info.go:125] begin check local disk info of client
I0322 11:18:34.487655  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:18:34.487661  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003da380 0xc0003da3c0]
I0322 11:18:39.621735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:18:39.621742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:18:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:18:43.410674  543705 memory.go:191] Add success.
I0322 11:18:43.409799  543705 cpu.go:282] Add success.
I0322 11:18:43.420559  543705 net.go:648] Add success.
I0322 11:18:43.423302  543705 net.go:770] primary dev: ETH0
I0322 11:18:43.423316  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:18:43.423328  543705 net.go:698] Add success.
I0322 11:18:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:18:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:18:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:18:53.410347  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:18:53.410362  543705 memory.go:184] no items to output this cycle
I0322 11:18:53.410385  543705 cpu.go:275] no items to output this cycle
E0322 11:19:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:19:03.409810  543705 memory.go:184] no items to output this cycle
I0322 11:19:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 11:19:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:19:13.409822  543705 memory.go:191] Add success.
I0322 11:19:13.409827  543705 cpu.go:282] Add success.
W0322 11:19:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:19:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:19:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:19:13.420154  543705 net.go:648] Add success.
I0322 11:19:13.423003  543705 net.go:770] primary dev: ETH0
I0322 11:19:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:19:13.423032  543705 net.go:698] Add success.
I0322 11:19:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:19:14.455317  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:19:14.455334  543705 disk_worker.go:708] disk space is not compliant
W0322 11:19:14.455338  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:19:14.457577  543705 disk_worker.go:494] system disk:vda1
I0322 11:19:14.457632  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:19:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:19:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:19:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:19:16.472491  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:19:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:19:23.409783  543705 memory.go:184] no items to output this cycle
I0322 11:19:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 11:19:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:19:33.409771  543705 memory.go:184] no items to output this cycle
I0322 11:19:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 11:19:34.488070  543705 disk_info.go:125] begin check local disk info of client
I0322 11:19:34.490521  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:19:34.490526  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005839c0 0xc000583a00]
E0322 11:19:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:19:43.410672  543705 memory.go:191] Add success.
I0322 11:19:43.409801  543705 cpu.go:282] Add success.
I0322 11:19:43.420471  543705 net.go:648] Add success.
I0322 11:19:43.422946  543705 net.go:770] primary dev: ETH0
I0322 11:19:43.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:19:43.422970  543705 net.go:698] Add success.
I0322 11:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:19:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:19:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:19:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:19:53.409770  543705 cpu.go:275] no items to output this cycle
I0322 11:19:53.409780  543705 memory.go:184] no items to output this cycle
E0322 11:20:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:20:03.409787  543705 memory.go:184] no items to output this cycle
I0322 11:20:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 11:20:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:20:13.409796  543705 memory.go:191] Add success.
I0322 11:20:13.409797  543705 cpu.go:282] Add success.
W0322 11:20:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:20:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:20:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:20:13.420216  543705 net.go:648] Add success.
I0322 11:20:13.423014  543705 net.go:770] primary dev: ETH0
I0322 11:20:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:20:13.423039  543705 net.go:698] Add success.
I0322 11:20:14.453938  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:20:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:20:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 11:20:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:20:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 11:20:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:20:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:20:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:20:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:20:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:20:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:20:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:20:23.409775  543705 memory.go:184] no items to output this cycle
I0322 11:20:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 11:20:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:20:33.409778  543705 memory.go:184] no items to output this cycle
I0322 11:20:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 11:20:34.491146  543705 disk_info.go:125] begin check local disk info of client
I0322 11:20:34.493711  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:20:34.493716  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f04c0 0xc0000f0500]
E0322 11:20:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:20:43.410603  543705 memory.go:191] Add success.
I0322 11:20:43.409814  543705 cpu.go:282] Add success.
I0322 11:20:43.420309  543705 net.go:648] Add success.
I0322 11:20:43.423298  543705 net.go:770] primary dev: ETH0
I0322 11:20:43.423312  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:20:43.423323  543705 net.go:698] Add success.
I0322 11:20:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:20:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:20:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:20:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:20:53.409773  543705 memory.go:184] no items to output this cycle
I0322 11:20:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 11:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:21:03.409799  543705 memory.go:184] no items to output this cycle
I0322 11:21:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 11:21:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:21:13.409781  543705 memory.go:191] Add success.
W0322 11:21:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:21:13.409815  543705 cpu.go:282] Add success.
W0322 11:21:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:21:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:21:13.420239  543705 net.go:648] Add success.
I0322 11:21:13.423425  543705 net.go:770] primary dev: ETH0
I0322 11:21:13.423442  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:21:13.423457  543705 net.go:698] Add success.
I0322 11:21:13.468211  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"192ba6d5-d68a-402d-a860-81cb64dfa18b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:21:13.468247  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:21:14.454991  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:21:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:21:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0322 11:21:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:21:14.456630  543705 disk_worker.go:494] system disk:vda1
I0322 11:21:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:21:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:21:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:21:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:21:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:21:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:21:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:21:23.409798  543705 memory.go:184] no items to output this cycle
I0322 11:21:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 11:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:21:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 11:21:33.409791  543705 memory.go:184] no items to output this cycle
I0322 11:21:34.493796  543705 disk_info.go:125] begin check local disk info of client
I0322 11:21:34.496417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:21:34.496423  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4480 0xc0000c4540]
I0322 11:21:39.622799  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:21:39.622805  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:21:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:21:43.410612  543705 memory.go:191] Add success.
I0322 11:21:43.409811  543705 cpu.go:282] Add success.
I0322 11:21:43.420358  543705 net.go:648] Add success.
I0322 11:21:43.422853  543705 net.go:770] primary dev: ETH0
I0322 11:21:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:21:43.422879  543705 net.go:698] Add success.
I0322 11:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:21:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:21:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:21:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:21:53.409791  543705 memory.go:184] no items to output this cycle
I0322 11:21:53.409921  543705 cpu.go:275] no items to output this cycle
E0322 11:22:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:22:03.409791  543705 memory.go:184] no items to output this cycle
I0322 11:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 11:22:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:22:13.409815  543705 memory.go:191] Add success.
I0322 11:22:13.409826  543705 cpu.go:282] Add success.
W0322 11:22:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:22:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:22:13.420199  543705 net.go:648] Add success.
I0322 11:22:13.422805  543705 net.go:770] primary dev: ETH0
I0322 11:22:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:22:13.422832  543705 net.go:698] Add success.
W0322 11:22:14.455244  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:22:14.455341  543705 disk_worker.go:708] disk space is not compliant
W0322 11:22:14.455347  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:22:14.457125  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:22:14.457137  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:22:14.457143  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:22:14.458229  543705 disk_worker.go:494] system disk:vda1
I0322 11:22:14.458281  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:22:15.457015  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:22:15.457029  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:22:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:22:16.457993  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:22:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:22:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:22:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:22:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:22:23.409778  543705 memory.go:184] no items to output this cycle
I0322 11:22:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:22:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:22:33.409769  543705 memory.go:184] no items to output this cycle
I0322 11:22:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 11:22:34.497178  543705 disk_info.go:125] begin check local disk info of client
I0322 11:22:34.499712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:22:34.499718  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bb200 0xc0004bb240]
E0322 11:22:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:22:43.410789  543705 memory.go:191] Add success.
I0322 11:22:43.409801  543705 cpu.go:282] Add success.
I0322 11:22:43.420483  543705 net.go:648] Add success.
I0322 11:22:43.423209  543705 net.go:770] primary dev: ETH0
I0322 11:22:43.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:22:43.423234  543705 net.go:698] Add success.
I0322 11:22:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:22:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:22:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:22:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:22:53.409795  543705 memory.go:184] no items to output this cycle
I0322 11:22:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 11:23:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:23:03.409781  543705 cpu.go:275] no items to output this cycle
I0322 11:23:03.409784  543705 memory.go:184] no items to output this cycle
E0322 11:23:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:23:13.409827  543705 memory.go:191] Add success.
I0322 11:23:13.409834  543705 cpu.go:282] Add success.
W0322 11:23:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:23:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:23:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:23:13.420130  543705 net.go:648] Add success.
I0322 11:23:13.422962  543705 net.go:770] primary dev: ETH0
I0322 11:23:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:23:13.422987  543705 net.go:698] Add success.
I0322 11:23:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:23:14.455262  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:23:14.455279  543705 disk_worker.go:708] disk space is not compliant
W0322 11:23:14.455284  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:23:14.457073  543705 disk_worker.go:494] system disk:vda1
I0322 11:23:14.457106  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:23:15.455994  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:23:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:23:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:23:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:23:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:23:23.409782  543705 memory.go:184] no items to output this cycle
I0322 11:23:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 11:23:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:23:33.409802  543705 memory.go:184] no items to output this cycle
I0322 11:23:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 11:23:34.499800  543705 disk_info.go:125] begin check local disk info of client
I0322 11:23:34.502363  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:23:34.502370  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a2f80 0xc0004a2fc0]
E0322 11:23:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:23:43.410543  543705 memory.go:191] Add success.
I0322 11:23:43.409815  543705 cpu.go:282] Add success.
I0322 11:23:43.420244  543705 net.go:648] Add success.
I0322 11:23:43.422892  543705 net.go:770] primary dev: ETH0
I0322 11:23:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:23:43.422918  543705 net.go:698] Add success.
I0322 11:23:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:23:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:23:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:23:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:23:53.409779  543705 memory.go:184] no items to output this cycle
I0322 11:23:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 11:24:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:24:03.409800  543705 memory.go:184] no items to output this cycle
I0322 11:24:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 11:24:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:24:13.409931  543705 memory.go:191] Add success.
I0322 11:24:13.409933  543705 cpu.go:282] Add success.
W0322 11:24:13.410009  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:24:13.413071  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:24:13.413077  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:24:13.419721  543705 net.go:648] Add success.
I0322 11:24:13.421721  543705 net.go:770] primary dev: ETH0
I0322 11:24:13.421733  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:24:13.421745  543705 net.go:698] Add success.
I0322 11:24:13.468540  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"070ac5f7-658d-4381-a714-eba6fa0bb0e9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:24:13.468572  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:24:14.455041  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:24:14.455224  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:24:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0322 11:24:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:24:14.457888  543705 disk_worker.go:494] system disk:vda1
I0322 11:24:14.457917  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:24:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:24:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:24:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:24:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:24:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:24:23.409779  543705 memory.go:184] no items to output this cycle
I0322 11:24:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 11:24:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:24:33.409818  543705 memory.go:184] no items to output this cycle
I0322 11:24:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 11:24:34.503203  543705 disk_info.go:125] begin check local disk info of client
I0322 11:24:34.505753  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:24:34.505759  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5cc0 0xc0000c5d00]
I0322 11:24:39.623812  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:24:39.623818  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:24:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:24:43.410697  543705 memory.go:191] Add success.
I0322 11:24:43.409829  543705 cpu.go:282] Add success.
I0322 11:24:43.420434  543705 net.go:648] Add success.
I0322 11:24:43.423303  543705 net.go:770] primary dev: ETH0
I0322 11:24:43.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:24:43.423336  543705 net.go:698] Add success.
I0322 11:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:24:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:24:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:24:53.410228  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:24:53.410241  543705 memory.go:184] no items to output this cycle
I0322 11:24:53.410261  543705 cpu.go:275] no items to output this cycle
E0322 11:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:25:03.409809  543705 memory.go:184] no items to output this cycle
I0322 11:25:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 11:25:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:25:13.409878  543705 memory.go:191] Add success.
W0322 11:25:13.409910  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:25:13.409924  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:25:13.409927  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:25:13.409952  543705 cpu.go:282] Add success.
I0322 11:25:13.419710  543705 net.go:648] Add success.
I0322 11:25:13.422397  543705 net.go:770] primary dev: ETH0
I0322 11:25:13.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:25:13.422425  543705 net.go:698] Add success.
I0322 11:25:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:25:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:25:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 11:25:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:25:14.456560  543705 disk_worker.go:494] system disk:vda1
I0322 11:25:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:25:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:25:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:25:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:25:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:25:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:25:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:25:23.409776  543705 memory.go:184] no items to output this cycle
I0322 11:25:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 11:25:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:25:33.409771  543705 memory.go:184] no items to output this cycle
I0322 11:25:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 11:25:34.505839  543705 disk_info.go:125] begin check local disk info of client
I0322 11:25:34.508402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:25:34.508408  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f00 0xc0000c5f40]
E0322 11:25:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:25:43.410676  543705 memory.go:191] Add success.
I0322 11:25:43.409832  543705 cpu.go:282] Add success.
I0322 11:25:43.420381  543705 net.go:648] Add success.
I0322 11:25:43.423017  543705 net.go:770] primary dev: ETH0
I0322 11:25:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:25:43.423045  543705 net.go:698] Add success.
I0322 11:25:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:25:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:25:53.409779  543705 memory.go:184] no items to output this cycle
I0322 11:25:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 11:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:26:03.409778  543705 memory.go:184] no items to output this cycle
I0322 11:26:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 11:26:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:26:13.409796  543705 memory.go:191] Add success.
I0322 11:26:13.409802  543705 cpu.go:282] Add success.
W0322 11:26:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:26:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:26:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:26:13.419731  543705 net.go:648] Add success.
I0322 11:26:13.422742  543705 net.go:770] primary dev: ETH0
I0322 11:26:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:26:13.422766  543705 net.go:698] Add success.
I0322 11:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:26:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:26:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 11:26:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:26:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 11:26:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:26:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:26:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:26:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:26:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:26:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:26:23.409781  543705 memory.go:184] no items to output this cycle
I0322 11:26:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 11:26:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:26:33.409804  543705 memory.go:184] no items to output this cycle
I0322 11:26:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 11:26:34.509227  543705 disk_info.go:125] begin check local disk info of client
I0322 11:26:34.511807  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:26:34.511813  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ce80 0xc00035cec0]
E0322 11:26:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:26:43.410738  543705 memory.go:191] Add success.
I0322 11:26:43.409791  543705 cpu.go:282] Add success.
I0322 11:26:43.420426  543705 net.go:648] Add success.
I0322 11:26:43.423201  543705 net.go:770] primary dev: ETH0
I0322 11:26:43.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:26:43.423229  543705 net.go:698] Add success.
I0322 11:26:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:26:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:26:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:26:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:26:53.409765  543705 memory.go:184] no items to output this cycle
I0322 11:26:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:27:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:27:03.409781  543705 memory.go:184] no items to output this cycle
I0322 11:27:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 11:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:27:13.409813  543705 memory.go:191] Add success.
I0322 11:27:13.409824  543705 cpu.go:282] Add success.
W0322 11:27:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:27:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:27:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:27:13.420362  543705 net.go:648] Add success.
I0322 11:27:13.429143  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 11:27:13.429232  543705 net.go:770] primary dev: ETH0
I0322 11:27:13.429244  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:27:13.429259  543705 net.go:698] Add success.
I0322 11:27:13.452767  543705 event_worker.go:152] Polling the log file for events...
I0322 11:27:13.503506  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"103da3fc-99da-45d0-a3d3-17a12307d21d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:27:13.503537  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 11:27:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:27:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 11:27:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:27:14.457014  543705 disk_worker.go:494] system disk:vda1
I0322 11:27:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:27:14.457096  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:27:14.457106  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:27:14.457112  543705 custom_config.go:64] query custom config with name: gpu
E0322 11:27:15.457003  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:27:15.457017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:27:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:27:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:27:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:27:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:27:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:27:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:27:23.409771  543705 memory.go:184] no items to output this cycle
I0322 11:27:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 11:27:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:27:33.409796  543705 memory.go:184] no items to output this cycle
I0322 11:27:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 11:27:34.511895  543705 disk_info.go:125] begin check local disk info of client
I0322 11:27:34.514424  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:27:34.514430  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032eec0 0xc00032ef00]
I0322 11:27:39.624819  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:27:39.624825  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:27:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:27:43.410659  543705 memory.go:191] Add success.
I0322 11:27:43.409813  543705 cpu.go:282] Add success.
I0322 11:27:43.420389  543705 net.go:648] Add success.
I0322 11:27:43.423094  543705 net.go:770] primary dev: ETH0
I0322 11:27:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:27:43.423120  543705 net.go:698] Add success.
I0322 11:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:27:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:27:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:27:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:27:53.409770  543705 memory.go:184] no items to output this cycle
I0322 11:27:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 11:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:28:03.409786  543705 memory.go:184] no items to output this cycle
I0322 11:28:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 11:28:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:28:13.409780  543705 memory.go:191] Add success.
I0322 11:28:13.409802  543705 cpu.go:282] Add success.
W0322 11:28:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:28:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:28:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:28:13.420137  543705 net.go:648] Add success.
I0322 11:28:13.422994  543705 net.go:770] primary dev: ETH0
I0322 11:28:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:28:13.423041  543705 net.go:698] Add success.
I0322 11:28:14.454879  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:28:14.455040  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:28:14.455119  543705 disk_worker.go:708] disk space is not compliant
W0322 11:28:14.455123  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:28:14.456484  543705 disk_worker.go:494] system disk:vda1
I0322 11:28:14.456512  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:28:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:28:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:28:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:28:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:28:23.409796  543705 memory.go:184] no items to output this cycle
I0322 11:28:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 11:28:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:28:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 11:28:33.409792  543705 memory.go:184] no items to output this cycle
I0322 11:28:34.515262  543705 disk_info.go:125] begin check local disk info of client
I0322 11:28:34.517822  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:28:34.517828  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377bc0 0xc000377c00]
E0322 11:28:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:28:43.410564  543705 memory.go:191] Add success.
I0322 11:28:43.409814  543705 cpu.go:282] Add success.
I0322 11:28:43.420327  543705 net.go:648] Add success.
I0322 11:28:43.422881  543705 net.go:770] primary dev: ETH0
I0322 11:28:43.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:28:43.422909  543705 net.go:698] Add success.
I0322 11:28:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:28:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:28:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:28:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:28:53.409773  543705 memory.go:184] no items to output this cycle
I0322 11:28:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 11:29:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:29:03.409811  543705 memory.go:184] no items to output this cycle
I0322 11:29:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 11:29:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:29:13.409796  543705 cpu.go:282] Add success.
I0322 11:29:13.409799  543705 memory.go:191] Add success.
W0322 11:29:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:29:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:29:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:29:13.420151  543705 net.go:648] Add success.
I0322 11:29:13.422810  543705 net.go:770] primary dev: ETH0
I0322 11:29:13.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:29:13.422839  543705 net.go:698] Add success.
I0322 11:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:29:14.455390  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:29:14.455407  543705 disk_worker.go:708] disk space is not compliant
W0322 11:29:14.455411  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:29:14.457012  543705 disk_worker.go:494] system disk:vda1
I0322 11:29:14.457041  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:29:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:29:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:29:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:29:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:29:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:29:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 11:29:23.409788  543705 memory.go:184] no items to output this cycle
E0322 11:29:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:29:33.409799  543705 memory.go:184] no items to output this cycle
I0322 11:29:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 11:29:34.517912  543705 disk_info.go:125] begin check local disk info of client
I0322 11:29:34.520360  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:29:34.520366  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049ee40 0xc00049ee80]
E0322 11:29:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:29:43.410634  543705 memory.go:191] Add success.
I0322 11:29:43.409798  543705 cpu.go:282] Add success.
I0322 11:29:43.420340  543705 net.go:648] Add success.
I0322 11:29:43.423095  543705 net.go:770] primary dev: ETH0
I0322 11:29:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:29:43.423121  543705 net.go:698] Add success.
I0322 11:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:29:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:29:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:29:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:29:53.409769  543705 memory.go:184] no items to output this cycle
I0322 11:29:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 11:30:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:30:03.409805  543705 memory.go:184] no items to output this cycle
I0322 11:30:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 11:30:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:30:13.409781  543705 memory.go:191] Add success.
I0322 11:30:13.409801  543705 cpu.go:282] Add success.
W0322 11:30:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:30:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:30:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:30:13.420120  543705 net.go:648] Add success.
I0322 11:30:13.422763  543705 net.go:770] primary dev: ETH0
I0322 11:30:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:30:13.422789  543705 net.go:698] Add success.
I0322 11:30:13.463211  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"45fb2ae0-875f-4554-9b39-e33638f286d5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:30:13.463244  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:30:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:30:14.455348  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:30:14.455435  543705 disk_worker.go:708] disk space is not compliant
W0322 11:30:14.455440  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:30:14.457088  543705 disk_worker.go:494] system disk:vda1
I0322 11:30:14.457120  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:30:15.455635  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:30:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:30:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:30:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:30:16.472496  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:30:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:30:23.409799  543705 memory.go:184] no items to output this cycle
I0322 11:30:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 11:30:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:30:33.409772  543705 memory.go:184] no items to output this cycle
I0322 11:30:33.409777  543705 cpu.go:275] no items to output this cycle
I0322 11:30:34.521291  543705 disk_info.go:125] begin check local disk info of client
I0322 11:30:34.523846  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:30:34.523851  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd40 0xc0001abd80]
I0322 11:30:39.625734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:30:39.625741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:30:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:30:43.410573  543705 memory.go:191] Add success.
I0322 11:30:43.409796  543705 cpu.go:282] Add success.
I0322 11:30:43.420280  543705 net.go:648] Add success.
I0322 11:30:43.423026  543705 net.go:770] primary dev: ETH0
I0322 11:30:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:30:43.423053  543705 net.go:698] Add success.
I0322 11:30:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:30:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:30:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:30:53.409778  543705 memory.go:184] no items to output this cycle
I0322 11:30:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 11:31:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:31:03.409775  543705 memory.go:184] no items to output this cycle
I0322 11:31:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:31:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:31:13.409798  543705 memory.go:191] Add success.
I0322 11:31:13.409814  543705 cpu.go:282] Add success.
W0322 11:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:31:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:31:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:31:13.420147  543705 net.go:648] Add success.
I0322 11:31:13.423354  543705 net.go:770] primary dev: ETH0
I0322 11:31:13.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:31:13.423378  543705 net.go:698] Add success.
I0322 11:31:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:31:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:31:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 11:31:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:31:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 11:31:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:31:15.456021  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:31:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:31:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:31:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:31:16.472508  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:31:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:31:23.409772  543705 memory.go:184] no items to output this cycle
I0322 11:31:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:31:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:31:33.409771  543705 memory.go:184] no items to output this cycle
I0322 11:31:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 11:31:34.524260  543705 disk_info.go:125] begin check local disk info of client
I0322 11:31:34.526816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:31:34.526822  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3080 0xc0003b30c0]
E0322 11:31:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:31:43.410771  543705 memory.go:191] Add success.
I0322 11:31:43.409817  543705 cpu.go:282] Add success.
I0322 11:31:43.420547  543705 net.go:648] Add success.
I0322 11:31:43.423406  543705 net.go:770] primary dev: ETH0
I0322 11:31:43.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:31:43.423436  543705 net.go:698] Add success.
I0322 11:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:31:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:31:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:31:53.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:31:53.409757  543705 memory.go:184] no items to output this cycle
I0322 11:31:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 11:32:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:32:03.409814  543705 memory.go:184] no items to output this cycle
I0322 11:32:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 11:32:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:32:13.409786  543705 memory.go:191] Add success.
I0322 11:32:13.409801  543705 cpu.go:282] Add success.
W0322 11:32:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:32:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:32:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:32:13.420133  543705 net.go:648] Add success.
I0322 11:32:13.422787  543705 net.go:770] primary dev: ETH0
I0322 11:32:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:32:13.422812  543705 net.go:698] Add success.
W0322 11:32:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:32:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 11:32:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:32:14.456521  543705 disk_worker.go:494] system disk:vda1
I0322 11:32:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:32:14.457981  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:32:14.457998  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:32:14.458004  543705 custom_config.go:64] query custom config with name: gpu
E0322 11:32:15.457034  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:32:15.457050  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:32:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:32:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:32:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:32:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:32:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:32:23.410269  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:32:23.410289  543705 memory.go:184] no items to output this cycle
I0322 11:32:23.410300  543705 cpu.go:275] no items to output this cycle
E0322 11:32:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:32:33.409781  543705 memory.go:184] no items to output this cycle
I0322 11:32:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 11:32:34.527277  543705 disk_info.go:125] begin check local disk info of client
I0322 11:32:34.529809  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:32:34.529815  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003663c0 0xc000366400]
E0322 11:32:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:32:43.410607  543705 memory.go:191] Add success.
I0322 11:32:43.409800  543705 cpu.go:282] Add success.
I0322 11:32:43.420278  543705 net.go:648] Add success.
I0322 11:32:43.422928  543705 net.go:770] primary dev: ETH0
I0322 11:32:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:32:43.422953  543705 net.go:698] Add success.
I0322 11:32:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:32:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:32:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:32:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:32:53.409764  543705 memory.go:184] no items to output this cycle
I0322 11:32:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:33:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:33:03.409803  543705 memory.go:184] no items to output this cycle
I0322 11:33:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 11:33:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:33:13.409779  543705 memory.go:191] Add success.
W0322 11:33:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:33:13.409804  543705 cpu.go:282] Add success.
W0322 11:33:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:33:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:33:13.420136  543705 net.go:648] Add success.
I0322 11:33:13.423176  543705 net.go:770] primary dev: ETH0
I0322 11:33:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:33:13.423200  543705 net.go:698] Add success.
I0322 11:33:13.468049  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"70577aaa-7591-4f9d-ac97-c2b0d180cc32","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:33:13.468082  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:33:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:33:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:33:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 11:33:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:33:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 11:33:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:33:15.455989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:33:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:33:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:33:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:33:16.472519  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:33:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:33:23.409785  543705 memory.go:184] no items to output this cycle
I0322 11:33:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 11:33:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:33:33.409801  543705 memory.go:184] no items to output this cycle
I0322 11:33:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 11:33:34.530293  543705 disk_info.go:125] begin check local disk info of client
I0322 11:33:34.532872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:33:34.532878  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a28c0 0xc0002a2900]
I0322 11:33:39.626819  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:33:39.626825  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:33:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:33:43.410679  543705 memory.go:191] Add success.
I0322 11:33:43.409797  543705 cpu.go:282] Add success.
I0322 11:33:43.420426  543705 net.go:648] Add success.
I0322 11:33:43.423060  543705 net.go:770] primary dev: ETH0
I0322 11:33:43.423074  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:33:43.423086  543705 net.go:698] Add success.
I0322 11:33:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:33:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:33:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:33:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:33:53.409766  543705 memory.go:184] no items to output this cycle
I0322 11:33:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 11:34:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:34:03.409809  543705 memory.go:184] no items to output this cycle
I0322 11:34:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 11:34:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:34:13.409780  543705 memory.go:191] Add success.
W0322 11:34:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:34:13.409811  543705 cpu.go:282] Add success.
W0322 11:34:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:34:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:34:13.420132  543705 net.go:648] Add success.
I0322 11:34:13.422635  543705 net.go:770] primary dev: ETH0
I0322 11:34:13.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:34:13.422661  543705 net.go:698] Add success.
I0322 11:34:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:34:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:34:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 11:34:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:34:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 11:34:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:34:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:34:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:34:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:34:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:34:16.472492  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:34:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:34:23.409781  543705 memory.go:184] no items to output this cycle
I0322 11:34:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 11:34:33.409782  543705 cpu.go:275] no items to output this cycle
E0322 11:34:33.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:34:33.409841  543705 memory.go:184] no items to output this cycle
I0322 11:34:34.532960  543705 disk_info.go:125] begin check local disk info of client
I0322 11:34:34.535444  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:34:34.535450  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e04c0 0xc0003e0500]
E0322 11:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:34:43.410691  543705 memory.go:191] Add success.
I0322 11:34:43.409810  543705 cpu.go:282] Add success.
I0322 11:34:43.420423  543705 net.go:648] Add success.
I0322 11:34:43.423039  543705 net.go:770] primary dev: ETH0
I0322 11:34:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:34:43.423069  543705 net.go:698] Add success.
I0322 11:34:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:34:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:34:53.410269  543705 memory.go:184] no items to output this cycle
I0322 11:34:53.410297  543705 cpu.go:275] no items to output this cycle
E0322 11:35:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:35:03.409804  543705 memory.go:184] no items to output this cycle
I0322 11:35:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 11:35:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:35:13.409781  543705 memory.go:191] Add success.
W0322 11:35:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:35:13.409806  543705 cpu.go:282] Add success.
W0322 11:35:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:35:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:35:13.420176  543705 net.go:648] Add success.
I0322 11:35:13.423188  543705 net.go:770] primary dev: ETH0
I0322 11:35:13.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:35:13.423215  543705 net.go:698] Add success.
I0322 11:35:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:35:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:35:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 11:35:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:35:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 11:35:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:35:15.456030  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:35:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:35:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:35:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:35:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:35:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:35:23.409774  543705 memory.go:184] no items to output this cycle
I0322 11:35:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:35:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:35:33.409777  543705 memory.go:184] no items to output this cycle
I0322 11:35:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 11:35:34.536370  543705 disk_info.go:125] begin check local disk info of client
I0322 11:35:34.538921  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:35:34.538927  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc440 0xc0002bc480]
E0322 11:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:35:43.410608  543705 memory.go:191] Add success.
I0322 11:35:43.409813  543705 cpu.go:282] Add success.
I0322 11:35:43.420320  543705 net.go:648] Add success.
I0322 11:35:43.422939  543705 net.go:770] primary dev: ETH0
I0322 11:35:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:35:43.422964  543705 net.go:698] Add success.
I0322 11:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:35:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:35:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:35:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:35:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 11:35:53.409783  543705 memory.go:184] no items to output this cycle
E0322 11:36:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:36:03.409819  543705 memory.go:184] no items to output this cycle
I0322 11:36:03.409830  543705 cpu.go:275] no items to output this cycle
E0322 11:36:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:36:13.409788  543705 cpu.go:282] Add success.
I0322 11:36:13.409791  543705 memory.go:191] Add success.
W0322 11:36:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:36:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:36:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:36:13.420065  543705 net.go:648] Add success.
I0322 11:36:13.422784  543705 net.go:770] primary dev: ETH0
I0322 11:36:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:36:13.422810  543705 net.go:698] Add success.
I0322 11:36:13.468412  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3992f6eb-e2e2-43ae-bc92-1859a6afd869","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:36:13.468446  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:36:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:36:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:36:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 11:36:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:36:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 11:36:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:36:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:36:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:36:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:36:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:36:16.472094  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:36:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:36:23.409803  543705 memory.go:184] no items to output this cycle
I0322 11:36:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 11:36:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:36:33.409776  543705 memory.go:184] no items to output this cycle
I0322 11:36:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 11:36:34.539009  543705 disk_info.go:125] begin check local disk info of client
I0322 11:36:34.541579  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:36:34.541585  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a480 0xc00034a4c0]
I0322 11:36:39.627832  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:36:39.627838  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:36:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:36:43.410589  543705 memory.go:191] Add success.
I0322 11:36:43.409816  543705 cpu.go:282] Add success.
I0322 11:36:43.420290  543705 net.go:648] Add success.
I0322 11:36:43.422695  543705 net.go:770] primary dev: ETH0
I0322 11:36:43.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:36:43.422721  543705 net.go:698] Add success.
I0322 11:36:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:36:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:36:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:36:53.409769  543705 memory.go:184] no items to output this cycle
I0322 11:36:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 11:37:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:37:03.409796  543705 cpu.go:275] no items to output this cycle
I0322 11:37:03.409803  543705 memory.go:184] no items to output this cycle
E0322 11:37:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:37:13.409783  543705 memory.go:191] Add success.
W0322 11:37:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:37:13.409809  543705 cpu.go:282] Add success.
W0322 11:37:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:37:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:37:13.420046  543705 net.go:648] Add success.
I0322 11:37:13.422671  543705 net.go:770] primary dev: ETH0
I0322 11:37:13.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:37:13.422695  543705 net.go:698] Add success.
I0322 11:37:13.453230  543705 event_worker.go:152] Polling the log file for events...
W0322 11:37:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:37:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 11:37:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:37:14.456897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:37:14.456906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:37:14.456912  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:37:14.456986  543705 disk_worker.go:494] system disk:vda1
I0322 11:37:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:37:15.456878  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:37:15.456892  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:37:16.458120  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:37:16.458185  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0322 11:37:16.458194  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:37:16.458206  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:37:16.472586  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:37:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:37:23.409777  543705 memory.go:184] no items to output this cycle
I0322 11:37:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 11:37:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:37:33.409801  543705 memory.go:184] no items to output this cycle
I0322 11:37:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 11:37:34.542389  543705 disk_info.go:125] begin check local disk info of client
I0322 11:37:34.544998  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:37:34.545002  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384b40 0xc000384b80]
E0322 11:37:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:37:43.410585  543705 memory.go:191] Add success.
I0322 11:37:43.409789  543705 cpu.go:282] Add success.
I0322 11:37:43.420292  543705 net.go:648] Add success.
I0322 11:37:43.422928  543705 net.go:770] primary dev: ETH0
I0322 11:37:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:37:43.422954  543705 net.go:698] Add success.
I0322 11:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:37:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:37:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:37:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:37:53.410382  543705 memory.go:184] no items to output this cycle
I0322 11:37:53.410390  543705 cpu.go:275] no items to output this cycle
E0322 11:38:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:38:03.409809  543705 memory.go:184] no items to output this cycle
I0322 11:38:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 11:38:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:38:13.409783  543705 memory.go:191] Add success.
W0322 11:38:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:38:13.409811  543705 cpu.go:282] Add success.
W0322 11:38:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:38:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:38:13.420230  543705 net.go:648] Add success.
I0322 11:38:13.423174  543705 net.go:770] primary dev: ETH0
I0322 11:38:13.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:38:13.423198  543705 net.go:698] Add success.
I0322 11:38:14.454249  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:38:14.454444  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:38:14.454454  543705 disk_worker.go:708] disk space is not compliant
W0322 11:38:14.454457  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:38:14.455790  543705 disk_worker.go:494] system disk:vda1
I0322 11:38:14.455832  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:38:15.455315  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:38:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:38:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:38:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:38:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:38:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:38:23.409793  543705 memory.go:184] no items to output this cycle
I0322 11:38:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 11:38:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:38:33.409777  543705 memory.go:184] no items to output this cycle
I0322 11:38:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 11:38:34.545088  543705 disk_info.go:125] begin check local disk info of client
I0322 11:38:34.547713  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:38:34.547720  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486000 0xc000486040]
E0322 11:38:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:38:43.410646  543705 memory.go:191] Add success.
I0322 11:38:43.409807  543705 cpu.go:282] Add success.
I0322 11:38:43.420430  543705 net.go:648] Add success.
I0322 11:38:43.423229  543705 net.go:770] primary dev: ETH0
I0322 11:38:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:38:43.423257  543705 net.go:698] Add success.
I0322 11:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:38:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:38:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:38:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:38:53.409785  543705 memory.go:184] no items to output this cycle
I0322 11:38:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 11:39:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:39:03.409782  543705 memory.go:184] no items to output this cycle
I0322 11:39:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 11:39:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:39:13.409794  543705 memory.go:191] Add success.
I0322 11:39:13.409816  543705 cpu.go:282] Add success.
W0322 11:39:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:39:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:39:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:39:13.420251  543705 net.go:648] Add success.
I0322 11:39:13.422848  543705 net.go:770] primary dev: ETH0
I0322 11:39:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:39:13.422873  543705 net.go:698] Add success.
I0322 11:39:13.468585  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"698a2257-3f4f-45c2-9b45-c1d9a51965b9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:39:13.468616  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:39:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:39:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:39:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 11:39:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:39:14.456647  543705 disk_worker.go:494] system disk:vda1
I0322 11:39:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:39:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:39:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:39:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:39:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:39:16.472455  543705 disk_local_worker.go:436] Get disk info: []
I0322 11:39:23.409903  543705 cpu.go:275] no items to output this cycle
E0322 11:39:23.410071  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:39:23.410082  543705 memory.go:184] no items to output this cycle
E0322 11:39:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:39:33.409809  543705 memory.go:184] no items to output this cycle
I0322 11:39:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 11:39:34.548386  543705 disk_info.go:125] begin check local disk info of client
I0322 11:39:34.550949  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:39:34.550956  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0322 11:39:39.628855  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:39:39.628862  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:39:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:39:43.410656  543705 memory.go:191] Add success.
I0322 11:39:43.409809  543705 cpu.go:282] Add success.
I0322 11:39:43.420356  543705 net.go:648] Add success.
I0322 11:39:43.423032  543705 net.go:770] primary dev: ETH0
I0322 11:39:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:39:43.423057  543705 net.go:698] Add success.
I0322 11:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:39:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:39:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:39:53.409782  543705 cpu.go:275] no items to output this cycle
I0322 11:39:53.409804  543705 memory.go:184] no items to output this cycle
E0322 11:40:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:40:03.409795  543705 cpu.go:275] no items to output this cycle
I0322 11:40:03.409813  543705 memory.go:184] no items to output this cycle
E0322 11:40:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:40:13.409777  543705 memory.go:191] Add success.
I0322 11:40:13.409802  543705 cpu.go:282] Add success.
W0322 11:40:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:40:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:40:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:40:13.420062  543705 net.go:648] Add success.
I0322 11:40:13.422871  543705 net.go:770] primary dev: ETH0
I0322 11:40:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:40:13.422899  543705 net.go:698] Add success.
I0322 11:40:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:40:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:40:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 11:40:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:40:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 11:40:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:40:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:40:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:40:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:40:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:40:16.472472  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:40:23.409860  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:40:23.409878  543705 memory.go:184] no items to output this cycle
I0322 11:40:23.409935  543705 cpu.go:275] no items to output this cycle
E0322 11:40:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:40:33.409797  543705 memory.go:184] no items to output this cycle
I0322 11:40:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 11:40:34.551386  543705 disk_info.go:125] begin check local disk info of client
I0322 11:40:34.553951  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:40:34.553957  543705 disk_info.go:196] parse disk info done, disk is : [0xc000507f00 0xc000507f40]
E0322 11:40:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:40:43.410558  543705 memory.go:191] Add success.
I0322 11:40:43.409827  543705 cpu.go:282] Add success.
I0322 11:40:43.420252  543705 net.go:648] Add success.
I0322 11:40:43.422819  543705 net.go:770] primary dev: ETH0
I0322 11:40:43.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:40:43.422843  543705 net.go:698] Add success.
I0322 11:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:40:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:40:53.409783  543705 cpu.go:275] no items to output this cycle
I0322 11:40:53.409790  543705 memory.go:184] no items to output this cycle
E0322 11:41:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:41:03.409784  543705 memory.go:184] no items to output this cycle
I0322 11:41:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 11:41:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:41:13.409784  543705 memory.go:191] Add success.
I0322 11:41:13.409810  543705 cpu.go:282] Add success.
W0322 11:41:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:41:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:41:13.420201  543705 net.go:648] Add success.
I0322 11:41:13.422838  543705 net.go:770] primary dev: ETH0
I0322 11:41:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:41:13.422863  543705 net.go:698] Add success.
I0322 11:41:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:41:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:41:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0322 11:41:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:41:14.456481  543705 disk_worker.go:494] system disk:vda1
I0322 11:41:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:41:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:41:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:41:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:41:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:41:16.472489  543705 disk_local_worker.go:436] Get disk info: []
I0322 11:41:23.409862  543705 cpu.go:275] no items to output this cycle
E0322 11:41:23.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:41:23.409882  543705 memory.go:184] no items to output this cycle
E0322 11:41:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:41:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 11:41:33.409788  543705 memory.go:184] no items to output this cycle
I0322 11:41:34.554036  543705 disk_info.go:125] begin check local disk info of client
I0322 11:41:34.556610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:41:34.556616  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8140 0xc0004d8180]
E0322 11:41:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:41:43.410653  543705 memory.go:191] Add success.
I0322 11:41:43.409784  543705 cpu.go:282] Add success.
I0322 11:41:43.420359  543705 net.go:648] Add success.
I0322 11:41:43.423176  543705 net.go:770] primary dev: ETH0
I0322 11:41:43.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:41:43.423202  543705 net.go:698] Add success.
I0322 11:41:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:41:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:41:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:41:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:41:53.409775  543705 memory.go:184] no items to output this cycle
I0322 11:41:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:42:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:42:03.409795  543705 memory.go:184] no items to output this cycle
I0322 11:42:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 11:42:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:42:13.409776  543705 memory.go:191] Add success.
W0322 11:42:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:42:13.409811  543705 cpu.go:282] Add success.
W0322 11:42:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:42:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:42:13.420297  543705 net.go:648] Add success.
I0322 11:42:13.423049  543705 net.go:770] primary dev: ETH0
I0322 11:42:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:42:13.423074  543705 net.go:698] Add success.
I0322 11:42:13.485173  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ccbd698-11c9-4123-842c-34c1a9f6c7f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:42:13.485207  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 11:42:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:42:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 11:42:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:42:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:42:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:42:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:42:14.456551  543705 disk_worker.go:494] system disk:vda1
I0322 11:42:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:42:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:42:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:42:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:42:16.458166  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0322 11:42:16.458182  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:42:16.458186  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:42:16.472623  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:42:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:42:23.409768  543705 memory.go:184] no items to output this cycle
I0322 11:42:23.409885  543705 cpu.go:275] no items to output this cycle
E0322 11:42:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:42:33.409804  543705 memory.go:184] no items to output this cycle
I0322 11:42:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 11:42:34.557467  543705 disk_info.go:125] begin check local disk info of client
I0322 11:42:34.559965  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:42:34.559971  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae40 0xc0001aae80]
I0322 11:42:39.629762  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:42:39.629769  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:42:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:42:43.410754  543705 memory.go:191] Add success.
I0322 11:42:43.409807  543705 cpu.go:282] Add success.
I0322 11:42:43.420449  543705 net.go:648] Add success.
I0322 11:42:43.423349  543705 net.go:770] primary dev: ETH0
I0322 11:42:43.423362  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:42:43.423374  543705 net.go:698] Add success.
I0322 11:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:42:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:42:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:42:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:42:53.409769  543705 memory.go:184] no items to output this cycle
I0322 11:42:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 11:43:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:43:03.409770  543705 memory.go:184] no items to output this cycle
I0322 11:43:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 11:43:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:43:13.409821  543705 memory.go:191] Add success.
I0322 11:43:13.409829  543705 cpu.go:282] Add success.
W0322 11:43:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:43:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:43:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:43:13.420107  543705 net.go:648] Add success.
I0322 11:43:13.422922  543705 net.go:770] primary dev: ETH0
I0322 11:43:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:43:13.422946  543705 net.go:698] Add success.
I0322 11:43:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:43:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:43:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 11:43:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:43:14.456494  543705 disk_worker.go:494] system disk:vda1
I0322 11:43:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:43:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:43:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:43:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:43:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:43:16.472500  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:43:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:43:23.409783  543705 memory.go:184] no items to output this cycle
I0322 11:43:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 11:43:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:43:33.409808  543705 memory.go:184] no items to output this cycle
I0322 11:43:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 11:43:34.560059  543705 disk_info.go:125] begin check local disk info of client
I0322 11:43:34.562600  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:43:34.562605  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 11:43:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:43:43.410584  543705 memory.go:191] Add success.
I0322 11:43:43.409806  543705 cpu.go:282] Add success.
I0322 11:43:43.420291  543705 net.go:648] Add success.
I0322 11:43:43.422873  543705 net.go:770] primary dev: ETH0
I0322 11:43:43.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:43:43.422904  543705 net.go:698] Add success.
I0322 11:43:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:43:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:43:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:43:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:43:53.409777  543705 cpu.go:275] no items to output this cycle
I0322 11:43:53.409778  543705 memory.go:184] no items to output this cycle
E0322 11:44:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:44:03.409813  543705 memory.go:184] no items to output this cycle
I0322 11:44:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 11:44:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:44:13.409789  543705 memory.go:191] Add success.
I0322 11:44:13.409791  543705 cpu.go:282] Add success.
W0322 11:44:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:44:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:44:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:44:13.420200  543705 net.go:648] Add success.
I0322 11:44:13.423179  543705 net.go:770] primary dev: ETH0
I0322 11:44:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:44:13.423205  543705 net.go:698] Add success.
I0322 11:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:44:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:44:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0322 11:44:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:44:14.456615  543705 disk_worker.go:494] system disk:vda1
I0322 11:44:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:44:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:44:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:44:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:44:16.472526  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:44:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:44:23.409771  543705 memory.go:184] no items to output this cycle
I0322 11:44:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 11:44:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:44:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 11:44:33.409793  543705 memory.go:184] no items to output this cycle
I0322 11:44:34.563512  543705 disk_info.go:125] begin check local disk info of client
I0322 11:44:34.566005  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:44:34.566012  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 11:44:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:44:43.410685  543705 memory.go:191] Add success.
I0322 11:44:43.409796  543705 cpu.go:282] Add success.
I0322 11:44:43.420411  543705 net.go:648] Add success.
I0322 11:44:43.423356  543705 net.go:770] primary dev: ETH0
I0322 11:44:43.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:44:43.423384  543705 net.go:698] Add success.
I0322 11:44:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:44:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:44:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:44:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:44:53.409782  543705 memory.go:184] no items to output this cycle
I0322 11:44:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 11:45:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:45:03.409796  543705 memory.go:184] no items to output this cycle
I0322 11:45:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:45:13.409808  543705 memory.go:191] Add success.
I0322 11:45:13.409816  543705 cpu.go:282] Add success.
W0322 11:45:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:45:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:45:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:45:13.420254  543705 net.go:648] Add success.
I0322 11:45:13.422799  543705 net.go:770] primary dev: ETH0
I0322 11:45:13.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:45:13.422826  543705 net.go:698] Add success.
I0322 11:45:13.468169  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ec1457f1-40b6-4367-9751-4fede69dbae3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:45:13.468202  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:45:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:45:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:45:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 11:45:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:45:14.456690  543705 disk_worker.go:494] system disk:vda1
I0322 11:45:14.456730  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:45:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:45:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:45:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:45:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:45:16.472503  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:45:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:45:23.409781  543705 memory.go:184] no items to output this cycle
I0322 11:45:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 11:45:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:45:33.409762  543705 memory.go:184] no items to output this cycle
I0322 11:45:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 11:45:34.566457  543705 disk_info.go:125] begin check local disk info of client
I0322 11:45:34.568989  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:45:34.568994  543705 disk_info.go:196] parse disk info done, disk is : [0xc000512c40 0xc000512c80]
I0322 11:45:39.629915  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:45:39.629922  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:45:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:45:43.410567  543705 memory.go:191] Add success.
I0322 11:45:43.409801  543705 cpu.go:282] Add success.
I0322 11:45:43.420256  543705 net.go:648] Add success.
I0322 11:45:43.422845  543705 net.go:770] primary dev: ETH0
I0322 11:45:43.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:45:43.422874  543705 net.go:698] Add success.
I0322 11:45:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:45:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:45:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:45:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:45:53.409777  543705 memory.go:184] no items to output this cycle
I0322 11:45:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 11:46:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:46:03.409795  543705 memory.go:184] no items to output this cycle
I0322 11:46:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 11:46:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:46:13.409778  543705 memory.go:191] Add success.
I0322 11:46:13.409800  543705 cpu.go:282] Add success.
W0322 11:46:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:46:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:46:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:46:13.420105  543705 net.go:648] Add success.
I0322 11:46:13.423006  543705 net.go:770] primary dev: ETH0
I0322 11:46:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:46:13.423031  543705 net.go:698] Add success.
I0322 11:46:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:46:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:46:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 11:46:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:46:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 11:46:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:46:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:46:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:46:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:46:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:46:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:46:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:46:23.409783  543705 memory.go:184] no items to output this cycle
I0322 11:46:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 11:46:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:46:33.409782  543705 memory.go:184] no items to output this cycle
I0322 11:46:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 11:46:34.569472  543705 disk_info.go:125] begin check local disk info of client
I0322 11:46:34.572249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:46:34.572256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312040 0xc000312080]
E0322 11:46:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:46:43.410808  543705 memory.go:191] Add success.
I0322 11:46:43.409829  543705 cpu.go:282] Add success.
I0322 11:46:43.420516  543705 net.go:648] Add success.
I0322 11:46:43.423195  543705 net.go:770] primary dev: ETH0
I0322 11:46:43.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:46:43.423220  543705 net.go:698] Add success.
I0322 11:46:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:46:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:46:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:46:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:46:53.409799  543705 memory.go:184] no items to output this cycle
I0322 11:46:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 11:47:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:47:03.409788  543705 memory.go:184] no items to output this cycle
I0322 11:47:03.409789  543705 cpu.go:275] no items to output this cycle
W0322 11:47:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:47:13.409723  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:47:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 11:47:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:47:13.409819  543705 memory.go:191] Add success.
I0322 11:47:13.409819  543705 cpu.go:282] Add success.
I0322 11:47:13.420011  543705 net.go:648] Add success.
I0322 11:47:13.422564  543705 net.go:770] primary dev: ETH0
I0322 11:47:13.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:47:13.422589  543705 net.go:698] Add success.
I0322 11:47:13.453164  543705 event_worker.go:152] Polling the log file for events...
W0322 11:47:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 11:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:47:14.456817  543705 disk_worker.go:494] system disk:vda1
I0322 11:47:14.456855  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:47:14.457037  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:47:14.457045  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:47:14.457049  543705 custom_config.go:64] query custom config with name: gpu
E0322 11:47:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:47:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:47:16.458101  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:47:16.458167  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0322 11:47:16.458182  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:47:16.458189  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:47:16.472593  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:47:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:47:23.409770  543705 memory.go:184] no items to output this cycle
I0322 11:47:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 11:47:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:47:33.409786  543705 memory.go:184] no items to output this cycle
I0322 11:47:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 11:47:34.572492  543705 disk_info.go:125] begin check local disk info of client
I0322 11:47:34.575012  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:47:34.575018  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0200 0xc0004a0240]
E0322 11:47:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:47:43.410657  543705 memory.go:191] Add success.
I0322 11:47:43.409849  543705 cpu.go:282] Add success.
I0322 11:47:43.420397  543705 net.go:648] Add success.
I0322 11:47:43.423093  543705 net.go:770] primary dev: ETH0
I0322 11:47:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:47:43.423118  543705 net.go:698] Add success.
I0322 11:47:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:47:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:47:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:47:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:47:53.409810  543705 memory.go:184] no items to output this cycle
I0322 11:47:53.409827  543705 cpu.go:275] no items to output this cycle
E0322 11:48:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:48:03.409798  543705 memory.go:184] no items to output this cycle
I0322 11:48:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 11:48:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:48:13.409784  543705 memory.go:191] Add success.
W0322 11:48:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:48:13.409816  543705 cpu.go:282] Add success.
W0322 11:48:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:48:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:48:13.420112  543705 net.go:648] Add success.
I0322 11:48:13.422918  543705 net.go:770] primary dev: ETH0
I0322 11:48:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:48:13.422943  543705 net.go:698] Add success.
I0322 11:48:13.468398  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3dc2c8a8-bc39-4371-9f48-d2a08610b299","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:48:13.468433  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:48:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:48:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:48:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 11:48:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:48:14.456520  543705 disk_worker.go:494] system disk:vda1
I0322 11:48:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:48:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:48:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:48:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:48:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:48:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:48:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:48:23.409788  543705 memory.go:184] no items to output this cycle
I0322 11:48:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 11:48:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:48:33.409778  543705 memory.go:184] no items to output this cycle
I0322 11:48:33.409906  543705 cpu.go:275] no items to output this cycle
I0322 11:48:34.575515  543705 disk_info.go:125] begin check local disk info of client
I0322 11:48:34.578106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:48:34.578111  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a000 0xc00047a040]
I0322 11:48:39.630059  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:48:39.630065  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:48:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:48:43.410647  543705 memory.go:191] Add success.
I0322 11:48:43.409829  543705 cpu.go:282] Add success.
I0322 11:48:43.420377  543705 net.go:648] Add success.
I0322 11:48:43.423111  543705 net.go:770] primary dev: ETH0
I0322 11:48:43.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:48:43.423141  543705 net.go:698] Add success.
I0322 11:48:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:48:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:48:53.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:48:53.410422  543705 memory.go:184] no items to output this cycle
I0322 11:48:53.410436  543705 cpu.go:275] no items to output this cycle
E0322 11:49:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:49:03.409803  543705 memory.go:184] no items to output this cycle
I0322 11:49:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 11:49:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:49:13.409819  543705 memory.go:191] Add success.
I0322 11:49:13.409822  543705 cpu.go:282] Add success.
W0322 11:49:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:49:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:49:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:49:13.420050  543705 net.go:648] Add success.
I0322 11:49:13.422772  543705 net.go:770] primary dev: ETH0
I0322 11:49:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:49:13.422806  543705 net.go:698] Add success.
I0322 11:49:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:49:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:49:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 11:49:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:49:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 11:49:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:49:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:49:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:49:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:49:16.472576  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:49:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:49:23.409816  543705 memory.go:184] no items to output this cycle
I0322 11:49:23.409824  543705 cpu.go:275] no items to output this cycle
E0322 11:49:33.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:49:33.409899  543705 cpu.go:275] no items to output this cycle
I0322 11:49:33.409991  543705 memory.go:184] no items to output this cycle
I0322 11:49:34.578197  543705 disk_info.go:125] begin check local disk info of client
I0322 11:49:34.580721  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:49:34.580728  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582000 0xc000582040]
E0322 11:49:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:49:43.410687  543705 memory.go:191] Add success.
I0322 11:49:43.409815  543705 cpu.go:282] Add success.
I0322 11:49:43.420482  543705 net.go:648] Add success.
I0322 11:49:43.423264  543705 net.go:770] primary dev: ETH0
I0322 11:49:43.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:49:43.423293  543705 net.go:698] Add success.
I0322 11:49:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:49:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:49:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:49:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:49:53.409760  543705 memory.go:184] no items to output this cycle
I0322 11:49:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 11:50:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:50:03.409781  543705 memory.go:184] no items to output this cycle
I0322 11:50:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 11:50:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:50:13.409781  543705 memory.go:191] Add success.
W0322 11:50:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:50:13.409806  543705 cpu.go:282] Add success.
W0322 11:50:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:50:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:50:13.420065  543705 net.go:648] Add success.
I0322 11:50:13.423284  543705 net.go:770] primary dev: ETH0
I0322 11:50:13.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:50:13.423308  543705 net.go:698] Add success.
I0322 11:50:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:50:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:50:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 11:50:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:50:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 11:50:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:50:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:50:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:50:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:50:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:50:16.472549  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:50:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:50:23.409771  543705 memory.go:184] no items to output this cycle
I0322 11:50:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 11:50:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:50:33.409794  543705 memory.go:184] no items to output this cycle
I0322 11:50:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 11:50:34.580809  543705 disk_info.go:125] begin check local disk info of client
I0322 11:50:34.583360  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:50:34.583366  543705 disk_info.go:196] parse disk info done, disk is : [0xc000582000 0xc000582040]
E0322 11:50:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:50:43.410723  543705 memory.go:191] Add success.
I0322 11:50:43.409789  543705 cpu.go:282] Add success.
I0322 11:50:43.420431  543705 net.go:648] Add success.
I0322 11:50:43.423551  543705 net.go:770] primary dev: ETH0
I0322 11:50:43.423565  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:50:43.423577  543705 net.go:698] Add success.
I0322 11:50:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:50:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:50:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:50:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:50:53.409767  543705 memory.go:184] no items to output this cycle
I0322 11:50:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 11:51:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:51:03.409794  543705 memory.go:184] no items to output this cycle
I0322 11:51:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 11:51:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:51:13.409813  543705 memory.go:191] Add success.
I0322 11:51:13.409822  543705 cpu.go:282] Add success.
W0322 11:51:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:51:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:51:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:51:13.420086  543705 net.go:648] Add success.
I0322 11:51:13.422801  543705 net.go:770] primary dev: ETH0
I0322 11:51:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:51:13.422829  543705 net.go:698] Add success.
I0322 11:51:13.468981  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f6e94240-7842-4df6-a803-07afac192285","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:51:13.469012  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:51:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:51:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:51:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 11:51:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:51:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 11:51:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:51:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:51:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:51:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:51:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:51:16.472563  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:51:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:51:23.409802  543705 memory.go:184] no items to output this cycle
I0322 11:51:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 11:51:33.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:51:33.409896  543705 memory.go:184] no items to output this cycle
I0322 11:51:33.409965  543705 cpu.go:275] no items to output this cycle
I0322 11:51:34.583450  543705 disk_info.go:125] begin check local disk info of client
I0322 11:51:34.586025  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:51:34.586030  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f0940 0xc0004f0980]
I0322 11:51:39.630845  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:51:39.630851  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:51:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:51:43.410707  543705 memory.go:191] Add success.
I0322 11:51:43.409808  543705 cpu.go:282] Add success.
I0322 11:51:43.420390  543705 net.go:648] Add success.
I0322 11:51:43.423070  543705 net.go:770] primary dev: ETH0
I0322 11:51:43.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:51:43.423094  543705 net.go:698] Add success.
I0322 11:51:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:51:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:51:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:51:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:51:53.409783  543705 memory.go:184] no items to output this cycle
I0322 11:51:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 11:52:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:52:03.409814  543705 memory.go:184] no items to output this cycle
I0322 11:52:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 11:52:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:52:13.409779  543705 memory.go:191] Add success.
I0322 11:52:13.409804  543705 cpu.go:282] Add success.
W0322 11:52:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:52:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:52:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:52:13.420126  543705 net.go:648] Add success.
I0322 11:52:13.422954  543705 net.go:770] primary dev: ETH0
I0322 11:52:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:52:13.422979  543705 net.go:698] Add success.
W0322 11:52:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:52:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 11:52:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:52:14.455882  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:52:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:52:14.455896  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:52:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 11:52:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:52:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:52:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:52:16.458115  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 11:52:16.458126  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:52:16.458182  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:52:16.458205  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:52:16.472649  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:52:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:52:23.409813  543705 memory.go:184] no items to output this cycle
I0322 11:52:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 11:52:33.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:52:33.409891  543705 memory.go:184] no items to output this cycle
I0322 11:52:33.410082  543705 cpu.go:275] no items to output this cycle
I0322 11:52:34.586578  543705 disk_info.go:125] begin check local disk info of client
I0322 11:52:34.589116  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:52:34.589121  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2280 0xc0003f22c0]
E0322 11:52:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:52:43.410590  543705 memory.go:191] Add success.
I0322 11:52:43.409822  543705 cpu.go:282] Add success.
I0322 11:52:43.420363  543705 net.go:648] Add success.
I0322 11:52:43.422987  543705 net.go:770] primary dev: ETH0
I0322 11:52:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:52:43.423013  543705 net.go:698] Add success.
I0322 11:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:52:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:52:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:52:53.410406  543705 memory.go:184] no items to output this cycle
I0322 11:52:53.410445  543705 cpu.go:275] no items to output this cycle
E0322 11:53:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:53:03.409798  543705 memory.go:184] no items to output this cycle
I0322 11:53:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 11:53:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:53:13.409781  543705 memory.go:191] Add success.
I0322 11:53:13.409798  543705 cpu.go:282] Add success.
W0322 11:53:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:53:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:53:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:53:13.420088  543705 net.go:648] Add success.
I0322 11:53:13.422942  543705 net.go:770] primary dev: ETH0
I0322 11:53:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:53:13.422972  543705 net.go:698] Add success.
I0322 11:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:53:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:53:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 11:53:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:53:14.456512  543705 disk_worker.go:494] system disk:vda1
I0322 11:53:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:53:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:53:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:53:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:53:16.472504  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:53:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:53:23.409806  543705 memory.go:184] no items to output this cycle
I0322 11:53:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 11:53:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:53:33.409771  543705 memory.go:184] no items to output this cycle
I0322 11:53:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 11:53:34.589196  543705 disk_info.go:125] begin check local disk info of client
I0322 11:53:34.591854  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:53:34.591861  543705 disk_info.go:196] parse disk info done, disk is : [0xc000512440 0xc000512480]
E0322 11:53:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:53:43.410697  543705 memory.go:191] Add success.
I0322 11:53:43.409826  543705 cpu.go:282] Add success.
I0322 11:53:43.420404  543705 net.go:648] Add success.
I0322 11:53:43.423123  543705 net.go:770] primary dev: ETH0
I0322 11:53:43.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:53:43.423148  543705 net.go:698] Add success.
I0322 11:53:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:53:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:53:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:53:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 11:53:53.409784  543705 memory.go:184] no items to output this cycle
E0322 11:54:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:54:03.409790  543705 memory.go:184] no items to output this cycle
I0322 11:54:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 11:54:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:54:13.409819  543705 memory.go:191] Add success.
I0322 11:54:13.409819  543705 cpu.go:282] Add success.
W0322 11:54:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:54:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:54:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:54:13.420167  543705 net.go:648] Add success.
I0322 11:54:13.423177  543705 net.go:770] primary dev: ETH0
I0322 11:54:13.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:54:13.423214  543705 net.go:698] Add success.
I0322 11:54:13.467597  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4fe409c6-d53c-419d-81a6-476d50ded5f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:54:13.467643  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 11:54:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:54:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:54:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 11:54:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:54:14.456525  543705 disk_worker.go:494] system disk:vda1
I0322 11:54:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:54:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:54:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:54:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:54:16.472528  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:54:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:54:23.409801  543705 memory.go:184] no items to output this cycle
I0322 11:54:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 11:54:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:54:33.409797  543705 memory.go:184] no items to output this cycle
I0322 11:54:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 11:54:34.592606  543705 disk_info.go:125] begin check local disk info of client
I0322 11:54:34.595044  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:54:34.595050  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f0080 0xc0001f00c0]
I0322 11:54:39.631849  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:54:39.631856  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:54:43.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:54:43.410624  543705 memory.go:191] Add success.
I0322 11:54:43.409955  543705 cpu.go:282] Add success.
I0322 11:54:43.419715  543705 net.go:648] Add success.
I0322 11:54:43.422459  543705 net.go:770] primary dev: ETH0
I0322 11:54:43.422475  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:54:43.422490  543705 net.go:698] Add success.
I0322 11:54:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:54:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:54:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:54:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:54:53.409763  543705 memory.go:184] no items to output this cycle
I0322 11:54:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 11:55:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:55:03.409778  543705 memory.go:184] no items to output this cycle
I0322 11:55:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 11:55:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:55:13.409791  543705 memory.go:191] Add success.
W0322 11:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 11:55:13.409824  543705 cpu.go:282] Add success.
W0322 11:55:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:55:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:55:13.420459  543705 net.go:648] Add success.
I0322 11:55:13.423436  543705 net.go:770] primary dev: ETH0
I0322 11:55:13.423449  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:55:13.423461  543705 net.go:698] Add success.
I0322 11:55:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:55:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:55:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 11:55:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:55:14.456486  543705 disk_worker.go:494] system disk:vda1
I0322 11:55:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:55:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:55:16.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:55:16.458102  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:55:16.458137  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:55:16.472678  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:55:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:55:23.409779  543705 memory.go:184] no items to output this cycle
I0322 11:55:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 11:55:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:55:33.409790  543705 memory.go:184] no items to output this cycle
I0322 11:55:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 11:55:34.595616  543705 disk_info.go:125] begin check local disk info of client
I0322 11:55:34.598190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:55:34.598195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc940 0xc0003dc980]
E0322 11:55:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:55:43.410610  543705 memory.go:191] Add success.
I0322 11:55:43.409803  543705 cpu.go:282] Add success.
I0322 11:55:43.419712  543705 net.go:648] Add success.
I0322 11:55:43.422492  543705 net.go:770] primary dev: ETH0
I0322 11:55:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:55:43.422517  543705 net.go:698] Add success.
I0322 11:55:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:55:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:55:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:55:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:55:53.409772  543705 memory.go:184] no items to output this cycle
I0322 11:55:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 11:56:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:56:03.409802  543705 memory.go:184] no items to output this cycle
I0322 11:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 11:56:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:56:13.409814  543705 memory.go:191] Add success.
I0322 11:56:13.409821  543705 cpu.go:282] Add success.
W0322 11:56:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:56:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:56:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:56:13.420179  543705 net.go:648] Add success.
I0322 11:56:13.423046  543705 net.go:770] primary dev: ETH0
I0322 11:56:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:56:13.423071  543705 net.go:698] Add success.
I0322 11:56:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:56:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:56:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 11:56:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:56:14.456617  543705 disk_worker.go:494] system disk:vda1
I0322 11:56:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:56:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:56:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:56:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:56:16.458119  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:56:16.472601  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:56:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:56:23.409786  543705 memory.go:184] no items to output this cycle
I0322 11:56:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 11:56:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:56:33.409768  543705 memory.go:184] no items to output this cycle
I0322 11:56:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 11:56:34.598276  543705 disk_info.go:125] begin check local disk info of client
I0322 11:56:34.600806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:56:34.600812  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a440 0xc00039a480]
E0322 11:56:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:56:43.410834  543705 memory.go:191] Add success.
I0322 11:56:43.409819  543705 cpu.go:282] Add success.
I0322 11:56:43.420720  543705 net.go:648] Add success.
I0322 11:56:43.423493  543705 net.go:770] primary dev: ETH0
I0322 11:56:43.423509  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:56:43.423522  543705 net.go:698] Add success.
I0322 11:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:56:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:56:53.409785  543705 memory.go:184] no items to output this cycle
I0322 11:56:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 11:57:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:57:03.409765  543705 memory.go:184] no items to output this cycle
I0322 11:57:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 11:57:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:57:13.409793  543705 memory.go:191] Add success.
I0322 11:57:13.409793  543705 cpu.go:282] Add success.
W0322 11:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:57:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:57:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:57:13.420123  543705 net.go:648] Add success.
I0322 11:57:13.428989  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 11:57:13.429064  543705 net.go:770] primary dev: ETH0
I0322 11:57:13.429079  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:57:13.429093  543705 net.go:698] Add success.
I0322 11:57:13.453621  543705 event_worker.go:152] Polling the log file for events...
I0322 11:57:13.469760  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dbeec0ed-a96c-473e-bf4d-b51e6da60bf5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 11:57:13.469792  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 11:57:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:57:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 11:57:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 11:57:14.456063  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 11:57:14.456071  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 11:57:14.456076  543705 custom_config.go:64] query custom config with name: gpu
I0322 11:57:14.456651  543705 disk_worker.go:494] system disk:vda1
I0322 11:57:14.456692  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 11:57:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 11:57:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
E0322 11:57:16.457884  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 11:57:16.457892  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:57:16.457959  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:57:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:57:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:57:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:57:23.409774  543705 memory.go:184] no items to output this cycle
I0322 11:57:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 11:57:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:57:33.409794  543705 memory.go:184] no items to output this cycle
I0322 11:57:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 11:57:34.600898  543705 disk_info.go:125] begin check local disk info of client
I0322 11:57:34.603350  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:57:34.603357  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304e00 0xc000304e40]
I0322 11:57:39.632867  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 11:57:39.632873  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 11:57:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:57:43.410684  543705 memory.go:191] Add success.
I0322 11:57:43.409817  543705 cpu.go:282] Add success.
I0322 11:57:43.420342  543705 net.go:770] primary dev: ETH0
I0322 11:57:43.420362  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:57:43.420376  543705 net.go:698] Add success.
I0322 11:57:43.420924  543705 net.go:648] Add success.
I0322 11:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:57:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:57:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:57:53.409798  543705 memory.go:184] no items to output this cycle
I0322 11:57:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 11:58:03.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:58:03.409818  543705 cpu.go:275] no items to output this cycle
I0322 11:58:03.409829  543705 memory.go:184] no items to output this cycle
E0322 11:58:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:58:13.409844  543705 memory.go:191] Add success.
I0322 11:58:13.409858  543705 cpu.go:282] Add success.
W0322 11:58:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:58:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:58:13.409898  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:58:13.420067  543705 net.go:770] primary dev: ETH0
I0322 11:58:13.420081  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:58:13.420096  543705 net.go:698] Add success.
I0322 11:58:13.420450  543705 net.go:648] Add success.
I0322 11:58:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:58:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:58:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 11:58:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:58:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 11:58:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:58:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:58:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:58:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:58:23.409784  543705 cpu.go:275] no items to output this cycle
I0322 11:58:23.409786  543705 memory.go:184] no items to output this cycle
E0322 11:58:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:58:33.409769  543705 memory.go:184] no items to output this cycle
I0322 11:58:33.409841  543705 cpu.go:275] no items to output this cycle
I0322 11:58:34.603659  543705 disk_info.go:125] begin check local disk info of client
I0322 11:58:34.606131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:58:34.606137  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314180 0xc0003141c0]
E0322 11:58:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:58:43.410749  543705 memory.go:191] Add success.
I0322 11:58:43.409801  543705 cpu.go:282] Add success.
I0322 11:58:43.420710  543705 net.go:648] Add success.
I0322 11:58:43.423540  543705 net.go:770] primary dev: ETH0
I0322 11:58:43.423554  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:58:43.423566  543705 net.go:698] Add success.
I0322 11:58:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:58:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:58:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:58:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:58:53.409776  543705 cpu.go:275] no items to output this cycle
I0322 11:58:53.409783  543705 memory.go:184] no items to output this cycle
E0322 11:59:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:59:03.409770  543705 memory.go:184] no items to output this cycle
I0322 11:59:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 11:59:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:59:13.409794  543705 memory.go:191] Add success.
I0322 11:59:13.409795  543705 cpu.go:282] Add success.
W0322 11:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 11:59:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 11:59:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 11:59:13.420049  543705 net.go:648] Add success.
I0322 11:59:13.423105  543705 net.go:770] primary dev: ETH0
I0322 11:59:13.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:59:13.423131  543705 net.go:698] Add success.
I0322 11:59:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 11:59:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 11:59:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 11:59:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0322 11:59:14.456484  543705 disk_worker.go:494] system disk:vda1
I0322 11:59:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 11:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 11:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:59:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:59:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 11:59:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 11:59:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:59:23.409809  543705 memory.go:184] no items to output this cycle
I0322 11:59:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 11:59:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:59:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 11:59:33.409802  543705 memory.go:184] no items to output this cycle
I0322 11:59:34.606741  543705 disk_info.go:125] begin check local disk info of client
I0322 11:59:34.609228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 11:59:34.609235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3ec0 0xc0003e3f00]
E0322 11:59:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:59:43.410735  543705 memory.go:191] Add success.
I0322 11:59:43.409828  543705 cpu.go:282] Add success.
I0322 11:59:43.420525  543705 net.go:648] Add success.
I0322 11:59:43.423754  543705 net.go:770] primary dev: ETH0
I0322 11:59:43.423767  543705 net.go:802] Send network stats successfully!,count is 6
I0322 11:59:43.423778  543705 net.go:698] Add success.
I0322 11:59:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 11:59:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 11:59:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 11:59:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 11:59:53.409789  543705 memory.go:184] no items to output this cycle
I0322 11:59:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 12:00:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:00:03.409788  543705 memory.go:184] no items to output this cycle
I0322 12:00:03.409850  543705 cpu.go:275] no items to output this cycle
E0322 12:00:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:00:13.409807  543705 memory.go:191] Add success.
I0322 12:00:13.409813  543705 cpu.go:282] Add success.
W0322 12:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:00:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:00:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:00:13.420108  543705 net.go:648] Add success.
I0322 12:00:13.422815  543705 net.go:770] primary dev: ETH0
I0322 12:00:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:00:13.422840  543705 net.go:698] Add success.
I0322 12:00:13.463815  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"64f19cdd-f039-4bcb-824e-3037c76d7c66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:00:13.463851  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:00:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:00:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:00:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0322 12:00:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:00:14.456802  543705 disk_worker.go:494] system disk:vda1
I0322 12:00:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:00:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:00:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:00:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:00:23.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:00:23.409823  543705 memory.go:184] no items to output this cycle
I0322 12:00:23.409833  543705 cpu.go:275] no items to output this cycle
E0322 12:00:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:00:33.409801  543705 memory.go:184] no items to output this cycle
I0322 12:00:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 12:00:34.609671  543705 disk_info.go:125] begin check local disk info of client
I0322 12:00:34.612223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:00:34.612228  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386400 0xc000386440]
I0322 12:00:39.633729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:00:39.633735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:00:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:00:43.410716  543705 memory.go:191] Add success.
I0322 12:00:43.409813  543705 cpu.go:282] Add success.
I0322 12:00:43.420539  543705 net.go:648] Add success.
I0322 12:00:43.423469  543705 net.go:770] primary dev: ETH0
I0322 12:00:43.423481  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:00:43.423494  543705 net.go:698] Add success.
I0322 12:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:00:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:00:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:00:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:00:53.409805  543705 memory.go:184] no items to output this cycle
I0322 12:00:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 12:01:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:01:03.409768  543705 memory.go:184] no items to output this cycle
I0322 12:01:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 12:01:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:01:13.409791  543705 memory.go:191] Add success.
I0322 12:01:13.409802  543705 cpu.go:282] Add success.
W0322 12:01:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:01:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:01:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:01:13.420126  543705 net.go:648] Add success.
I0322 12:01:13.422899  543705 net.go:770] primary dev: ETH0
I0322 12:01:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:01:13.422924  543705 net.go:698] Add success.
I0322 12:01:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:01:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:01:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 12:01:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:01:14.456485  543705 disk_worker.go:494] system disk:vda1
I0322 12:01:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:01:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:01:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:01:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:01:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:01:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:01:23.409803  543705 memory.go:184] no items to output this cycle
I0322 12:01:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 12:01:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:01:33.409792  543705 memory.go:184] no items to output this cycle
I0322 12:01:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 12:01:34.612704  543705 disk_info.go:125] begin check local disk info of client
I0322 12:01:34.615260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:01:34.615265  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385800 0xc000385840]
E0322 12:01:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:01:43.409795  543705 cpu.go:282] Add success.
I0322 12:01:43.410758  543705 memory.go:191] Add success.
I0322 12:01:43.419736  543705 net.go:648] Add success.
I0322 12:01:43.422924  543705 net.go:770] primary dev: ETH0
I0322 12:01:43.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:01:43.422948  543705 net.go:698] Add success.
I0322 12:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:01:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:01:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:01:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:01:53.409807  543705 memory.go:184] no items to output this cycle
I0322 12:01:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 12:02:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:02:03.409767  543705 memory.go:184] no items to output this cycle
I0322 12:02:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 12:02:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:02:13.409813  543705 memory.go:191] Add success.
I0322 12:02:13.409817  543705 cpu.go:282] Add success.
W0322 12:02:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:02:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:02:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:02:13.420135  543705 net.go:648] Add success.
I0322 12:02:13.422751  543705 net.go:770] primary dev: ETH0
I0322 12:02:13.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:02:13.422777  543705 net.go:698] Add success.
W0322 12:02:14.455239  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:02:14.455253  543705 disk_worker.go:708] disk space is not compliant
W0322 12:02:14.455257  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:02:14.455925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:02:14.455934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:02:14.455941  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:02:14.456834  543705 disk_worker.go:494] system disk:vda1
I0322 12:02:14.456865  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:02:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:02:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:02:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:02:16.457975  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:02:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:02:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:02:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:02:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:02:23.409778  543705 memory.go:184] no items to output this cycle
I0322 12:02:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 12:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:02:33.409786  543705 memory.go:184] no items to output this cycle
I0322 12:02:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 12:02:34.615714  543705 disk_info.go:125] begin check local disk info of client
I0322 12:02:34.618325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:02:34.618333  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366080 0xc0003660c0]
E0322 12:02:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:02:43.410762  543705 memory.go:191] Add success.
I0322 12:02:43.409910  543705 cpu.go:282] Add success.
I0322 12:02:43.419732  543705 net.go:648] Add success.
I0322 12:02:43.422483  543705 net.go:770] primary dev: ETH0
I0322 12:02:43.422496  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:02:43.422508  543705 net.go:698] Add success.
I0322 12:02:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:02:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:02:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:02:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:02:53.409777  543705 memory.go:184] no items to output this cycle
I0322 12:02:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 12:03:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:03:03.409767  543705 memory.go:184] no items to output this cycle
I0322 12:03:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 12:03:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:03:13.409781  543705 memory.go:191] Add success.
I0322 12:03:13.409798  543705 cpu.go:282] Add success.
W0322 12:03:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:03:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:03:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:03:13.420109  543705 net.go:648] Add success.
I0322 12:03:13.423051  543705 net.go:770] primary dev: ETH0
I0322 12:03:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:03:13.423090  543705 net.go:698] Add success.
I0322 12:03:13.469170  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c5190b46-764a-4aa9-af49-6c7a85ffd87f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:03:13.469208  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:03:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:03:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:03:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 12:03:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:03:14.456700  543705 disk_worker.go:494] system disk:vda1
I0322 12:03:14.456738  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:03:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:03:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:03:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:03:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:03:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:03:23.409785  543705 memory.go:184] no items to output this cycle
I0322 12:03:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 12:03:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:03:33.409804  543705 memory.go:184] no items to output this cycle
I0322 12:03:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 12:03:34.618413  543705 disk_info.go:125] begin check local disk info of client
I0322 12:03:34.620900  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:03:34.620906  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2000 0xc0004b2040]
I0322 12:03:39.634872  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:03:39.634878  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:03:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:03:43.410608  543705 memory.go:191] Add success.
I0322 12:03:43.409813  543705 cpu.go:282] Add success.
I0322 12:03:43.420375  543705 net.go:648] Add success.
I0322 12:03:43.423054  543705 net.go:770] primary dev: ETH0
I0322 12:03:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:03:43.423083  543705 net.go:698] Add success.
I0322 12:03:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:03:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:03:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:03:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 12:03:53.409789  543705 memory.go:184] no items to output this cycle
E0322 12:04:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:04:03.409767  543705 memory.go:184] no items to output this cycle
I0322 12:04:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:04:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:04:13.409784  543705 memory.go:191] Add success.
I0322 12:04:13.409807  543705 cpu.go:282] Add success.
W0322 12:04:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:04:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:04:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:04:13.420111  543705 net.go:648] Add success.
I0322 12:04:13.423101  543705 net.go:770] primary dev: ETH0
I0322 12:04:13.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:04:13.423127  543705 net.go:698] Add success.
I0322 12:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:04:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:04:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 12:04:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:04:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 12:04:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:04:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:04:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:04:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:04:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:04:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:04:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:04:23.409771  543705 memory.go:184] no items to output this cycle
I0322 12:04:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 12:04:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:04:33.409798  543705 memory.go:184] no items to output this cycle
I0322 12:04:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 12:04:34.621673  543705 disk_info.go:125] begin check local disk info of client
I0322 12:04:34.624168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:04:34.624175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1000 0xc0004b1040]
E0322 12:04:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:04:43.410752  543705 memory.go:191] Add success.
I0322 12:04:43.409792  543705 cpu.go:282] Add success.
I0322 12:04:43.420514  543705 net.go:648] Add success.
I0322 12:04:43.423690  543705 net.go:770] primary dev: ETH0
I0322 12:04:43.423704  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:04:43.423716  543705 net.go:698] Add success.
I0322 12:04:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:04:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:04:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:04:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:04:53.409798  543705 memory.go:184] no items to output this cycle
I0322 12:04:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 12:05:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:05:03.409779  543705 memory.go:184] no items to output this cycle
I0322 12:05:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 12:05:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:05:13.409806  543705 memory.go:191] Add success.
I0322 12:05:13.409811  543705 cpu.go:282] Add success.
W0322 12:05:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:05:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:05:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:05:13.420143  543705 net.go:648] Add success.
I0322 12:05:13.422908  543705 net.go:770] primary dev: ETH0
I0322 12:05:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:05:13.422938  543705 net.go:698] Add success.
I0322 12:05:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:05:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:05:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 12:05:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:05:14.456777  543705 disk_worker.go:494] system disk:vda1
I0322 12:05:14.456807  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:05:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:05:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:05:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:05:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:05:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:05:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:05:23.409783  543705 memory.go:184] no items to output this cycle
I0322 12:05:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 12:05:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:05:33.409775  543705 memory.go:184] no items to output this cycle
I0322 12:05:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 12:05:34.624251  543705 disk_info.go:125] begin check local disk info of client
I0322 12:05:34.626833  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:05:34.626839  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5900 0xc0000c5940]
E0322 12:05:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:05:43.410722  543705 memory.go:191] Add success.
I0322 12:05:43.409791  543705 cpu.go:282] Add success.
I0322 12:05:43.420575  543705 net.go:648] Add success.
I0322 12:05:43.423769  543705 net.go:770] primary dev: ETH0
I0322 12:05:43.423783  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:05:43.423795  543705 net.go:698] Add success.
I0322 12:05:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:05:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:05:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:05:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:05:53.409770  543705 memory.go:184] no items to output this cycle
I0322 12:05:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 12:06:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:06:03.409796  543705 memory.go:184] no items to output this cycle
I0322 12:06:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 12:06:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:06:13.409785  543705 memory.go:191] Add success.
I0322 12:06:13.409808  543705 cpu.go:282] Add success.
W0322 12:06:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:06:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:06:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:06:13.420469  543705 net.go:648] Add success.
I0322 12:06:13.423120  543705 net.go:770] primary dev: ETH0
I0322 12:06:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:06:13.423145  543705 net.go:698] Add success.
I0322 12:06:13.469136  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3cbba3dc-d7e5-46bf-aa5c-38e7fb6192a0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:06:13.469169  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:06:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:06:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:06:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 12:06:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:06:14.456534  543705 disk_worker.go:494] system disk:vda1
I0322 12:06:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:06:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:06:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:06:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:06:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:06:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:06:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:06:23.409816  543705 memory.go:184] no items to output this cycle
I0322 12:06:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 12:06:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:06:33.409771  543705 memory.go:184] no items to output this cycle
I0322 12:06:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 12:06:34.627776  543705 disk_info.go:125] begin check local disk info of client
I0322 12:06:34.630282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:06:34.630288  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376700 0xc000376740]
I0322 12:06:39.635015  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:06:39.635021  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:06:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:06:43.410634  543705 memory.go:191] Add success.
I0322 12:06:43.409806  543705 cpu.go:282] Add success.
I0322 12:06:43.420641  543705 net.go:648] Add success.
I0322 12:06:43.423450  543705 net.go:770] primary dev: ETH0
I0322 12:06:43.423465  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:06:43.423480  543705 net.go:698] Add success.
I0322 12:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:06:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:06:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:06:53.409777  543705 cpu.go:275] no items to output this cycle
I0322 12:06:53.409785  543705 memory.go:184] no items to output this cycle
E0322 12:07:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:07:03.409805  543705 memory.go:184] no items to output this cycle
I0322 12:07:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:07:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:07:13.409823  543705 memory.go:191] Add success.
I0322 12:07:13.409831  543705 cpu.go:282] Add success.
W0322 12:07:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:07:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:07:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:07:13.420226  543705 net.go:648] Add success.
I0322 12:07:13.423003  543705 net.go:770] primary dev: ETH0
I0322 12:07:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:07:13.423031  543705 net.go:698] Add success.
I0322 12:07:13.453578  543705 event_worker.go:152] Polling the log file for events...
W0322 12:07:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 12:07:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:07:14.456795  543705 disk_worker.go:494] system disk:vda1
I0322 12:07:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:07:14.457111  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:07:14.457119  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:07:14.457124  543705 custom_config.go:64] query custom config with name: gpu
E0322 12:07:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:07:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:07:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:07:16.457893  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:07:16.457944  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:07:16.457963  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:07:16.472292  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:07:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:07:23.409814  543705 memory.go:184] no items to output this cycle
I0322 12:07:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 12:07:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:07:33.409769  543705 memory.go:184] no items to output this cycle
I0322 12:07:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 12:07:34.630369  543705 disk_info.go:125] begin check local disk info of client
I0322 12:07:34.632918  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:07:34.632924  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9140 0xc0004d9180]
E0322 12:07:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:07:43.410582  543705 memory.go:191] Add success.
I0322 12:07:43.409797  543705 cpu.go:282] Add success.
I0322 12:07:43.420446  543705 net.go:648] Add success.
I0322 12:07:43.423242  543705 net.go:770] primary dev: ETH0
I0322 12:07:43.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:07:43.423266  543705 net.go:698] Add success.
I0322 12:07:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:07:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:07:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:07:53.409774  543705 memory.go:184] no items to output this cycle
I0322 12:07:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 12:08:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:08:03.409799  543705 memory.go:184] no items to output this cycle
I0322 12:08:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 12:08:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:08:13.409788  543705 memory.go:191] Add success.
I0322 12:08:13.409803  543705 cpu.go:282] Add success.
W0322 12:08:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:08:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:08:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:08:13.420100  543705 net.go:648] Add success.
I0322 12:08:13.422845  543705 net.go:770] primary dev: ETH0
I0322 12:08:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:08:13.422871  543705 net.go:698] Add success.
I0322 12:08:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:08:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:08:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 12:08:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:08:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 12:08:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:08:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:08:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:08:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:08:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:08:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:08:23.409806  543705 memory.go:184] no items to output this cycle
I0322 12:08:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 12:08:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:08:33.409796  543705 memory.go:184] no items to output this cycle
I0322 12:08:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 12:08:34.633668  543705 disk_info.go:125] begin check local disk info of client
I0322 12:08:34.636287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:08:34.636294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e1340 0xc0001e1380]
E0322 12:08:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:08:43.410644  543705 memory.go:191] Add success.
I0322 12:08:43.409795  543705 cpu.go:282] Add success.
I0322 12:08:43.420531  543705 net.go:648] Add success.
I0322 12:08:43.423481  543705 net.go:770] primary dev: ETH0
I0322 12:08:43.423494  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:08:43.423506  543705 net.go:698] Add success.
I0322 12:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:08:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:08:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:08:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:08:53.409800  543705 memory.go:184] no items to output this cycle
I0322 12:08:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 12:09:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:09:03.409780  543705 memory.go:184] no items to output this cycle
I0322 12:09:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 12:09:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:09:13.409809  543705 memory.go:191] Add success.
I0322 12:09:13.409816  543705 cpu.go:282] Add success.
W0322 12:09:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:09:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:09:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:09:13.420141  543705 net.go:648] Add success.
I0322 12:09:13.422784  543705 net.go:770] primary dev: ETH0
I0322 12:09:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:09:13.422809  543705 net.go:698] Add success.
I0322 12:09:13.468122  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"80c25ff2-c8ba-4a4e-b5a7-cd54b3eca5dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:09:13.468155  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:09:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:09:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:09:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 12:09:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:09:14.456557  543705 disk_worker.go:494] system disk:vda1
I0322 12:09:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:09:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:09:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:09:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:09:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:09:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:09:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:09:23.409786  543705 memory.go:184] no items to output this cycle
I0322 12:09:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 12:09:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:09:33.409795  543705 memory.go:184] no items to output this cycle
I0322 12:09:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 12:09:34.636380  543705 disk_info.go:125] begin check local disk info of client
I0322 12:09:34.638943  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:09:34.638950  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e1c0 0xc00037e200]
I0322 12:09:39.635885  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:09:39.635892  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:09:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:09:43.410522  543705 memory.go:191] Add success.
I0322 12:09:43.409818  543705 cpu.go:282] Add success.
I0322 12:09:43.420706  543705 net.go:648] Add success.
I0322 12:09:43.423297  543705 net.go:770] primary dev: ETH0
I0322 12:09:43.423310  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:09:43.423322  543705 net.go:698] Add success.
I0322 12:09:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:09:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:09:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:09:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:09:53.409801  543705 memory.go:184] no items to output this cycle
I0322 12:09:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:10:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:10:03.409796  543705 memory.go:184] no items to output this cycle
I0322 12:10:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 12:10:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:10:13.409801  543705 memory.go:191] Add success.
I0322 12:10:13.409822  543705 cpu.go:282] Add success.
W0322 12:10:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:10:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:10:13.420139  543705 net.go:648] Add success.
I0322 12:10:13.422945  543705 net.go:770] primary dev: ETH0
I0322 12:10:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:10:13.422971  543705 net.go:698] Add success.
I0322 12:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:10:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:10:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 12:10:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:10:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 12:10:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:10:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:10:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:10:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:10:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:10:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:10:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:10:23.409803  543705 memory.go:184] no items to output this cycle
I0322 12:10:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 12:10:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:10:33.409821  543705 memory.go:184] no items to output this cycle
I0322 12:10:33.409831  543705 cpu.go:275] no items to output this cycle
I0322 12:10:34.639840  543705 disk_info.go:125] begin check local disk info of client
I0322 12:10:34.642366  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:10:34.642372  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487d00 0xc000487d40]
E0322 12:10:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:10:43.410595  543705 memory.go:191] Add success.
I0322 12:10:43.409823  543705 cpu.go:282] Add success.
I0322 12:10:43.420461  543705 net.go:648] Add success.
I0322 12:10:43.423157  543705 net.go:770] primary dev: ETH0
I0322 12:10:43.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:10:43.423182  543705 net.go:698] Add success.
I0322 12:10:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:10:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:10:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:10:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:10:53.409771  543705 memory.go:184] no items to output this cycle
I0322 12:10:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 12:11:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:11:03.409815  543705 memory.go:184] no items to output this cycle
I0322 12:11:03.409826  543705 cpu.go:275] no items to output this cycle
E0322 12:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:11:13.409790  543705 memory.go:191] Add success.
I0322 12:11:13.409808  543705 cpu.go:282] Add success.
W0322 12:11:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:11:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:11:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:11:13.420150  543705 net.go:648] Add success.
I0322 12:11:13.423010  543705 net.go:770] primary dev: ETH0
I0322 12:11:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:11:13.423038  543705 net.go:698] Add success.
I0322 12:11:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:11:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:11:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 12:11:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:11:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 12:11:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:11:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:11:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:11:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:11:23.409787  543705 memory.go:184] no items to output this cycle
I0322 12:11:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 12:11:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:11:33.409785  543705 memory.go:184] no items to output this cycle
I0322 12:11:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 12:11:34.642857  543705 disk_info.go:125] begin check local disk info of client
I0322 12:11:34.645402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:11:34.645408  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326c40 0xc000326c80]
E0322 12:11:43.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:11:43.410712  543705 memory.go:191] Add success.
I0322 12:11:43.409877  543705 cpu.go:282] Add success.
I0322 12:11:43.419759  543705 net.go:648] Add success.
I0322 12:11:43.422351  543705 net.go:770] primary dev: ETH0
I0322 12:11:43.422366  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:11:43.422379  543705 net.go:698] Add success.
I0322 12:11:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:11:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:11:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:11:53.409796  543705 memory.go:184] no items to output this cycle
I0322 12:11:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:12:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:12:03.409764  543705 memory.go:184] no items to output this cycle
I0322 12:12:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 12:12:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:12:13.409836  543705 memory.go:191] Add success.
I0322 12:12:13.409843  543705 cpu.go:282] Add success.
W0322 12:12:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:12:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:12:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:12:13.420127  543705 net.go:648] Add success.
I0322 12:12:13.422720  543705 net.go:770] primary dev: ETH0
I0322 12:12:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:12:13.422744  543705 net.go:698] Add success.
I0322 12:12:13.469050  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8bbaffd5-b0ec-4f3c-aea6-b8093dbe348f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:12:13.469084  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 12:12:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:12:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 12:12:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:12:14.456782  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:12:14.456791  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:12:14.456796  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:12:14.456856  543705 disk_worker.go:494] system disk:vda1
I0322 12:12:14.456884  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:12:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:12:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:12:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:12:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:12:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:12:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:12:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:12:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:12:23.409807  543705 memory.go:184] no items to output this cycle
I0322 12:12:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:12:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:12:33.409790  543705 memory.go:184] no items to output this cycle
I0322 12:12:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 12:12:34.645671  543705 disk_info.go:125] begin check local disk info of client
I0322 12:12:34.648195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:12:34.648201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468000 0xc000468040]
I0322 12:12:39.636889  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:12:39.636896  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:12:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:12:43.410766  543705 memory.go:191] Add success.
I0322 12:12:43.409817  543705 cpu.go:282] Add success.
I0322 12:12:43.420458  543705 net.go:648] Add success.
I0322 12:12:43.423346  543705 net.go:770] primary dev: ETH0
I0322 12:12:43.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:12:43.423371  543705 net.go:698] Add success.
I0322 12:12:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:12:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:12:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:12:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:12:53.409773  543705 memory.go:184] no items to output this cycle
I0322 12:12:53.409775  543705 cpu.go:275] no items to output this cycle
E0322 12:13:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:13:03.409803  543705 memory.go:184] no items to output this cycle
I0322 12:13:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 12:13:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:13:13.409779  543705 memory.go:191] Add success.
I0322 12:13:13.409798  543705 cpu.go:282] Add success.
W0322 12:13:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:13:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:13:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:13:13.420248  543705 net.go:648] Add success.
I0322 12:13:13.423222  543705 net.go:770] primary dev: ETH0
I0322 12:13:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:13:13.423249  543705 net.go:698] Add success.
I0322 12:13:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:13:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:13:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 12:13:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:13:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 12:13:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:13:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:13:16.458054  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:13:16.458116  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:13:16.458135  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:13:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:13:23.410387  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:13:23.410498  543705 memory.go:184] no items to output this cycle
I0322 12:13:23.410578  543705 cpu.go:275] no items to output this cycle
E0322 12:13:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:13:33.409771  543705 memory.go:184] no items to output this cycle
I0322 12:13:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 12:13:34.648890  543705 disk_info.go:125] begin check local disk info of client
I0322 12:13:34.651433  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:13:34.651439  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481880 0xc0004818c0]
E0322 12:13:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:13:43.410889  543705 memory.go:191] Add success.
I0322 12:13:43.409822  543705 cpu.go:282] Add success.
I0322 12:13:43.420586  543705 net.go:648] Add success.
I0322 12:13:43.423205  543705 net.go:770] primary dev: ETH0
I0322 12:13:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:13:43.423230  543705 net.go:698] Add success.
I0322 12:13:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:13:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:13:53.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:13:53.410393  543705 memory.go:184] no items to output this cycle
I0322 12:13:53.410405  543705 cpu.go:275] no items to output this cycle
E0322 12:14:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:14:03.409758  543705 memory.go:184] no items to output this cycle
I0322 12:14:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 12:14:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:14:13.409824  543705 memory.go:191] Add success.
I0322 12:14:13.409828  543705 cpu.go:282] Add success.
W0322 12:14:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:14:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:14:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:14:13.420143  543705 net.go:648] Add success.
I0322 12:14:13.422946  543705 net.go:770] primary dev: ETH0
I0322 12:14:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:14:13.422975  543705 net.go:698] Add success.
I0322 12:14:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:14:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:14:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 12:14:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:14:14.456805  543705 disk_worker.go:494] system disk:vda1
I0322 12:14:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:14:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:14:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:14:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:14:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:14:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:14:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:14:23.409894  543705 memory.go:184] no items to output this cycle
I0322 12:14:23.409874  543705 cpu.go:275] no items to output this cycle
E0322 12:14:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:14:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 12:14:33.409787  543705 memory.go:184] no items to output this cycle
I0322 12:14:34.651909  543705 disk_info.go:125] begin check local disk info of client
I0322 12:14:34.654382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:14:34.654389  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d83c0 0xc0004d8400]
E0322 12:14:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:14:43.410700  543705 memory.go:191] Add success.
I0322 12:14:43.409823  543705 cpu.go:282] Add success.
I0322 12:14:43.420405  543705 net.go:648] Add success.
I0322 12:14:43.423024  543705 net.go:770] primary dev: ETH0
I0322 12:14:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:14:43.423059  543705 net.go:698] Add success.
I0322 12:14:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:14:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:14:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:14:53.409768  543705 memory.go:184] no items to output this cycle
I0322 12:14:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 12:15:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:15:03.409804  543705 memory.go:184] no items to output this cycle
I0322 12:15:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:15:13.409781  543705 memory.go:191] Add success.
W0322 12:15:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:15:13.409809  543705 cpu.go:282] Add success.
W0322 12:15:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:15:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:15:13.420169  543705 net.go:648] Add success.
I0322 12:15:13.422935  543705 net.go:770] primary dev: ETH0
I0322 12:15:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:15:13.422960  543705 net.go:698] Add success.
I0322 12:15:13.468755  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dd95cb5c-dc4b-4bd2-91b1-a3dda60ce978","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:15:13.468788  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:15:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:15:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 12:15:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:15:14.456522  543705 disk_worker.go:494] system disk:vda1
I0322 12:15:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:15:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:15:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:15:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:15:23.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:15:23.409894  543705 cpu.go:275] no items to output this cycle
I0322 12:15:23.409905  543705 memory.go:184] no items to output this cycle
E0322 12:15:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:15:33.409763  543705 memory.go:184] no items to output this cycle
I0322 12:15:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 12:15:34.654915  543705 disk_info.go:125] begin check local disk info of client
I0322 12:15:34.657476  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:15:34.657482  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003430c0 0xc000343100]
I0322 12:15:39.637731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:15:39.637738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:15:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:15:43.410566  543705 memory.go:191] Add success.
I0322 12:15:43.409785  543705 cpu.go:282] Add success.
I0322 12:15:43.420283  543705 net.go:648] Add success.
I0322 12:15:43.422913  543705 net.go:770] primary dev: ETH0
I0322 12:15:43.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:15:43.422941  543705 net.go:698] Add success.
I0322 12:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:15:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:15:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:15:53.410343  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:15:53.410358  543705 memory.go:184] no items to output this cycle
I0322 12:15:53.410392  543705 cpu.go:275] no items to output this cycle
E0322 12:16:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:16:03.409790  543705 memory.go:184] no items to output this cycle
I0322 12:16:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 12:16:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:16:13.409792  543705 memory.go:191] Add success.
I0322 12:16:13.409793  543705 cpu.go:282] Add success.
W0322 12:16:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:16:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:16:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:16:13.420041  543705 net.go:648] Add success.
I0322 12:16:13.422656  543705 net.go:770] primary dev: ETH0
I0322 12:16:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:16:13.422683  543705 net.go:698] Add success.
I0322 12:16:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:16:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:16:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 12:16:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:16:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 12:16:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:16:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:16:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:16:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:16:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:16:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:16:23.409787  543705 memory.go:184] no items to output this cycle
I0322 12:16:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 12:16:33.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:16:33.409907  543705 memory.go:184] no items to output this cycle
I0322 12:16:33.409942  543705 cpu.go:275] no items to output this cycle
I0322 12:16:34.657671  543705 disk_info.go:125] begin check local disk info of client
I0322 12:16:34.660245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:16:34.660252  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0322 12:16:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:16:43.410626  543705 memory.go:191] Add success.
I0322 12:16:43.409789  543705 cpu.go:282] Add success.
I0322 12:16:43.420350  543705 net.go:648] Add success.
I0322 12:16:43.422948  543705 net.go:770] primary dev: ETH0
I0322 12:16:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:16:43.422983  543705 net.go:698] Add success.
I0322 12:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:16:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:16:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:16:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:16:53.409774  543705 memory.go:184] no items to output this cycle
I0322 12:16:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 12:17:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:17:03.409809  543705 memory.go:184] no items to output this cycle
I0322 12:17:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 12:17:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:17:13.409780  543705 memory.go:191] Add success.
I0322 12:17:13.409800  543705 cpu.go:282] Add success.
W0322 12:17:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:17:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:17:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:17:13.420602  543705 net.go:648] Add success.
I0322 12:17:13.423351  543705 net.go:770] primary dev: ETH0
I0322 12:17:13.423364  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:17:13.423377  543705 net.go:698] Add success.
I0322 12:17:13.452948  543705 event_worker.go:152] Polling the log file for events...
W0322 12:17:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:17:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 12:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:17:14.456802  543705 disk_worker.go:494] system disk:vda1
I0322 12:17:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:17:14.457133  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:17:14.457141  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:17:14.457146  543705 custom_config.go:64] query custom config with name: gpu
E0322 12:17:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:17:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:17:16.458053  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:17:16.458049  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:17:16.458110  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:17:16.458133  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:17:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:17:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:17:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 12:17:23.409803  543705 memory.go:184] no items to output this cycle
E0322 12:17:33.409931  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:17:33.409940  543705 cpu.go:275] no items to output this cycle
I0322 12:17:33.409953  543705 memory.go:184] no items to output this cycle
I0322 12:17:34.660933  543705 disk_info.go:125] begin check local disk info of client
I0322 12:17:34.663561  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:17:34.663567  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468000 0xc000468040]
E0322 12:17:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:17:43.410732  543705 memory.go:191] Add success.
I0322 12:17:43.409790  543705 cpu.go:282] Add success.
I0322 12:17:43.420448  543705 net.go:648] Add success.
I0322 12:17:43.423012  543705 net.go:770] primary dev: ETH0
I0322 12:17:43.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:17:43.423038  543705 net.go:698] Add success.
I0322 12:17:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:17:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:17:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:17:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:17:53.409772  543705 memory.go:184] no items to output this cycle
I0322 12:17:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 12:18:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:18:03.409766  543705 memory.go:184] no items to output this cycle
I0322 12:18:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 12:18:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:18:13.409798  543705 cpu.go:282] Add success.
I0322 12:18:13.409802  543705 memory.go:191] Add success.
W0322 12:18:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:18:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:18:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:18:13.420110  543705 net.go:648] Add success.
I0322 12:18:13.423051  543705 net.go:770] primary dev: ETH0
I0322 12:18:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:18:13.423075  543705 net.go:698] Add success.
I0322 12:18:13.469278  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ad11b2e-79e9-4d6b-ace1-42628573ec29","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:18:13.469312  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:18:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:18:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:18:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 12:18:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:18:14.456772  543705 disk_worker.go:494] system disk:vda1
I0322 12:18:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:18:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:18:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:18:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:18:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:18:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:18:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:18:23.409782  543705 memory.go:184] no items to output this cycle
I0322 12:18:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 12:18:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:18:33.409893  543705 cpu.go:275] no items to output this cycle
I0322 12:18:33.409909  543705 memory.go:184] no items to output this cycle
I0322 12:18:34.663646  543705 disk_info.go:125] begin check local disk info of client
I0322 12:18:34.666266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:18:34.666273  543705 disk_info.go:196] parse disk info done, disk is : [0xc000248c80 0xc000248cc0]
I0322 12:18:39.638893  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:18:39.638899  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:18:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:18:43.410775  543705 memory.go:191] Add success.
I0322 12:18:43.409805  543705 cpu.go:282] Add success.
I0322 12:18:43.420478  543705 net.go:648] Add success.
I0322 12:18:43.423352  543705 net.go:770] primary dev: ETH0
I0322 12:18:43.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:18:43.423380  543705 net.go:698] Add success.
I0322 12:18:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:18:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:18:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:18:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:18:53.409779  543705 memory.go:184] no items to output this cycle
I0322 12:18:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 12:19:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:19:03.409806  543705 memory.go:184] no items to output this cycle
I0322 12:19:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:19:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:19:13.409819  543705 memory.go:191] Add success.
I0322 12:19:13.409823  543705 cpu.go:282] Add success.
W0322 12:19:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:19:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:19:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:19:13.420188  543705 net.go:648] Add success.
I0322 12:19:13.423062  543705 net.go:770] primary dev: ETH0
I0322 12:19:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:19:13.423091  543705 net.go:698] Add success.
I0322 12:19:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:19:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:19:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0322 12:19:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:19:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 12:19:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:19:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:19:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:19:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:19:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:19:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:19:23.409777  543705 memory.go:184] no items to output this cycle
I0322 12:19:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 12:19:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:19:33.409777  543705 memory.go:184] no items to output this cycle
I0322 12:19:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 12:19:34.666359  543705 disk_info.go:125] begin check local disk info of client
I0322 12:19:34.668752  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:19:34.668759  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
E0322 12:19:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:19:43.410711  543705 memory.go:191] Add success.
I0322 12:19:43.409806  543705 cpu.go:282] Add success.
I0322 12:19:43.420426  543705 net.go:648] Add success.
I0322 12:19:43.423222  543705 net.go:770] primary dev: ETH0
I0322 12:19:43.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:19:43.423248  543705 net.go:698] Add success.
I0322 12:19:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:19:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:19:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:19:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:19:53.409766  543705 memory.go:184] no items to output this cycle
I0322 12:19:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 12:20:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:20:03.409773  543705 memory.go:184] no items to output this cycle
I0322 12:20:03.409775  543705 cpu.go:275] no items to output this cycle
E0322 12:20:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:20:13.409787  543705 memory.go:191] Add success.
W0322 12:20:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:20:13.409817  543705 cpu.go:282] Add success.
W0322 12:20:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:20:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:20:13.420133  543705 net.go:648] Add success.
I0322 12:20:13.422889  543705 net.go:770] primary dev: ETH0
I0322 12:20:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:20:13.422915  543705 net.go:698] Add success.
I0322 12:20:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:20:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:20:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 12:20:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:20:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 12:20:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:20:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:20:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:20:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:20:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:20:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:20:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:20:23.409780  543705 memory.go:184] no items to output this cycle
I0322 12:20:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 12:20:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:20:33.409803  543705 memory.go:184] no items to output this cycle
I0322 12:20:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 12:20:34.669666  543705 disk_info.go:125] begin check local disk info of client
I0322 12:20:34.672212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:20:34.672217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463c00 0xc000463c40]
E0322 12:20:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:20:43.410803  543705 memory.go:191] Add success.
I0322 12:20:43.409919  543705 cpu.go:282] Add success.
I0322 12:20:43.419727  543705 net.go:648] Add success.
I0322 12:20:43.422407  543705 net.go:770] primary dev: ETH0
I0322 12:20:43.422422  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:20:43.422435  543705 net.go:698] Add success.
I0322 12:20:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:20:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:20:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:20:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:20:53.409799  543705 memory.go:184] no items to output this cycle
I0322 12:20:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 12:21:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:21:03.409789  543705 memory.go:184] no items to output this cycle
I0322 12:21:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 12:21:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:21:13.409794  543705 memory.go:191] Add success.
W0322 12:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:21:13.409822  543705 cpu.go:282] Add success.
W0322 12:21:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:21:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:21:13.420235  543705 net.go:648] Add success.
I0322 12:21:13.423446  543705 net.go:770] primary dev: ETH0
I0322 12:21:13.423459  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:21:13.423471  543705 net.go:698] Add success.
I0322 12:21:13.468431  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aec70a77-b21e-4a33-8bb7-b8bd4158c33d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:21:13.468465  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:21:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:21:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:21:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 12:21:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:21:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 12:21:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:21:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:21:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:21:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:21:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:21:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:21:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:21:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 12:21:23.409808  543705 memory.go:184] no items to output this cycle
E0322 12:21:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:21:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 12:21:33.409790  543705 memory.go:184] no items to output this cycle
I0322 12:21:34.673010  543705 disk_info.go:125] begin check local disk info of client
I0322 12:21:34.675576  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:21:34.675582  543705 disk_info.go:196] parse disk info done, disk is : [0xc00017dc40 0xc00017dc80]
I0322 12:21:39.639901  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:21:39.639908  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:21:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:21:43.410875  543705 memory.go:191] Add success.
I0322 12:21:43.409791  543705 cpu.go:282] Add success.
I0322 12:21:43.420687  543705 net.go:648] Add success.
I0322 12:21:43.423773  543705 net.go:770] primary dev: ETH0
I0322 12:21:43.423786  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:21:43.423797  543705 net.go:698] Add success.
I0322 12:21:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:21:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:21:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:21:53.410344  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:21:53.410359  543705 memory.go:184] no items to output this cycle
I0322 12:21:53.410359  543705 cpu.go:275] no items to output this cycle
E0322 12:22:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:22:03.409763  543705 memory.go:184] no items to output this cycle
I0322 12:22:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 12:22:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:22:13.409821  543705 memory.go:191] Add success.
I0322 12:22:13.409822  543705 cpu.go:282] Add success.
W0322 12:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:22:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:22:13.420199  543705 net.go:648] Add success.
I0322 12:22:13.423289  543705 net.go:770] primary dev: ETH0
I0322 12:22:13.423303  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:22:13.423317  543705 net.go:698] Add success.
W0322 12:22:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:22:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 12:22:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:22:14.456746  543705 disk_worker.go:494] system disk:vda1
I0322 12:22:14.456784  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:22:14.457144  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:22:14.457152  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:22:14.457157  543705 custom_config.go:64] query custom config with name: gpu
E0322 12:22:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:22:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:22:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:22:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:22:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:22:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:22:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:22:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:22:23.409785  543705 memory.go:184] no items to output this cycle
I0322 12:22:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 12:22:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:22:33.409778  543705 memory.go:184] no items to output this cycle
I0322 12:22:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 12:22:34.675661  543705 disk_info.go:125] begin check local disk info of client
I0322 12:22:34.678321  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:22:34.678327  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab000 0xc0001ab040]
E0322 12:22:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:22:43.410747  543705 memory.go:191] Add success.
I0322 12:22:43.409798  543705 cpu.go:282] Add success.
I0322 12:22:43.420510  543705 net.go:648] Add success.
I0322 12:22:43.423187  543705 net.go:770] primary dev: ETH0
I0322 12:22:43.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:22:43.423455  543705 net.go:698] Add success.
I0322 12:22:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:22:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:22:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:22:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:22:53.409778  543705 memory.go:184] no items to output this cycle
I0322 12:22:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 12:23:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:23:03.409803  543705 memory.go:184] no items to output this cycle
I0322 12:23:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:23:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:23:13.409821  543705 memory.go:191] Add success.
I0322 12:23:13.409828  543705 cpu.go:282] Add success.
W0322 12:23:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:23:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:23:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:23:13.420168  543705 net.go:648] Add success.
I0322 12:23:13.422784  543705 net.go:770] primary dev: ETH0
I0322 12:23:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:23:13.422808  543705 net.go:698] Add success.
I0322 12:23:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:23:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:23:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 12:23:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:23:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 12:23:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:23:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:23:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:23:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:23:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:23:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:23:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:23:23.409783  543705 memory.go:184] no items to output this cycle
I0322 12:23:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:23:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:23:33.409802  543705 memory.go:184] no items to output this cycle
I0322 12:23:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 12:23:34.678410  543705 disk_info.go:125] begin check local disk info of client
I0322 12:23:34.680973  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:23:34.680979  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004874c0 0xc000487500]
E0322 12:23:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:23:43.410710  543705 memory.go:191] Add success.
I0322 12:23:43.409841  543705 cpu.go:282] Add success.
I0322 12:23:43.420433  543705 net.go:648] Add success.
I0322 12:23:43.423449  543705 net.go:770] primary dev: ETH0
I0322 12:23:43.423464  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:23:43.423476  543705 net.go:698] Add success.
I0322 12:23:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:23:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:23:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:23:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:23:53.409779  543705 memory.go:184] no items to output this cycle
I0322 12:23:53.409780  543705 cpu.go:275] no items to output this cycle
I0322 12:24:03.409924  543705 cpu.go:275] no items to output this cycle
E0322 12:24:03.409928  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:24:03.410017  543705 memory.go:184] no items to output this cycle
E0322 12:24:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:24:13.409783  543705 memory.go:191] Add success.
W0322 12:24:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:24:13.409814  543705 cpu.go:282] Add success.
W0322 12:24:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:24:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:24:13.420135  543705 net.go:648] Add success.
I0322 12:24:13.422829  543705 net.go:770] primary dev: ETH0
I0322 12:24:13.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:24:13.422862  543705 net.go:698] Add success.
I0322 12:24:13.469343  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"645adf45-7082-43e5-a523-f77f34f7fb79","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:24:13.469378  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:24:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:24:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:24:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 12:24:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:24:14.456786  543705 disk_worker.go:494] system disk:vda1
I0322 12:24:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:24:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:24:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:24:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:24:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:24:23.409781  543705 memory.go:184] no items to output this cycle
I0322 12:24:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 12:24:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:24:33.409769  543705 memory.go:184] no items to output this cycle
I0322 12:24:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 12:24:34.681680  543705 disk_info.go:125] begin check local disk info of client
I0322 12:24:34.684153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:24:34.684159  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465800 0xc000465840]
I0322 12:24:39.640900  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:24:39.640907  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:24:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:24:43.410598  543705 memory.go:191] Add success.
I0322 12:24:43.409797  543705 cpu.go:282] Add success.
I0322 12:24:43.420283  543705 net.go:648] Add success.
I0322 12:24:43.423052  543705 net.go:770] primary dev: ETH0
I0322 12:24:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:24:43.423079  543705 net.go:698] Add success.
I0322 12:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:24:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:24:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:24:53.409769  543705 memory.go:184] no items to output this cycle
I0322 12:24:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:25:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:25:03.409794  543705 memory.go:184] no items to output this cycle
I0322 12:25:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 12:25:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:25:13.409824  543705 memory.go:191] Add success.
I0322 12:25:13.409827  543705 cpu.go:282] Add success.
W0322 12:25:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:25:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:25:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:25:13.420246  543705 net.go:648] Add success.
I0322 12:25:13.423045  543705 net.go:770] primary dev: ETH0
I0322 12:25:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:25:13.423070  543705 net.go:698] Add success.
I0322 12:25:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:25:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:25:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 12:25:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:25:14.456484  543705 disk_worker.go:494] system disk:vda1
I0322 12:25:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:25:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:25:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:25:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:25:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:25:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:25:23.409810  543705 memory.go:184] no items to output this cycle
I0322 12:25:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:25:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:25:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 12:25:33.409785  543705 memory.go:184] no items to output this cycle
I0322 12:25:34.685060  543705 disk_info.go:125] begin check local disk info of client
I0322 12:25:34.687623  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:25:34.687628  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4700 0xc0000c4740]
E0322 12:25:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:25:43.410681  543705 memory.go:191] Add success.
I0322 12:25:43.409816  543705 cpu.go:282] Add success.
I0322 12:25:43.420380  543705 net.go:648] Add success.
I0322 12:25:43.423239  543705 net.go:770] primary dev: ETH0
I0322 12:25:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:25:43.423264  543705 net.go:698] Add success.
I0322 12:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:25:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:25:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:25:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:25:53.409793  543705 memory.go:184] no items to output this cycle
I0322 12:25:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 12:26:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:26:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 12:26:03.409786  543705 memory.go:184] no items to output this cycle
E0322 12:26:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:26:13.409830  543705 memory.go:191] Add success.
I0322 12:26:13.409835  543705 cpu.go:282] Add success.
W0322 12:26:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:26:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:26:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:26:13.420250  543705 net.go:648] Add success.
I0322 12:26:13.423066  543705 net.go:770] primary dev: ETH0
I0322 12:26:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:26:13.423091  543705 net.go:698] Add success.
I0322 12:26:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:26:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:26:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 12:26:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:26:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 12:26:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:26:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:26:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:26:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:26:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:26:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:26:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:26:23.409776  543705 memory.go:184] no items to output this cycle
I0322 12:26:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:26:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:26:33.409774  543705 memory.go:184] no items to output this cycle
I0322 12:26:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 12:26:34.687712  543705 disk_info.go:125] begin check local disk info of client
I0322 12:26:34.690268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:26:34.690274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7540 0xc0004a7580]
E0322 12:26:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:26:43.410981  543705 memory.go:191] Add success.
I0322 12:26:43.409824  543705 cpu.go:282] Add success.
I0322 12:26:43.420693  543705 net.go:648] Add success.
I0322 12:26:43.423681  543705 net.go:770] primary dev: ETH0
I0322 12:26:43.423695  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:26:43.423707  543705 net.go:698] Add success.
I0322 12:26:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:26:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:26:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:26:53.409782  543705 memory.go:184] no items to output this cycle
I0322 12:26:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 12:27:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:27:03.409787  543705 memory.go:184] no items to output this cycle
I0322 12:27:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 12:27:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:27:13.409835  543705 memory.go:191] Add success.
I0322 12:27:13.409840  543705 cpu.go:282] Add success.
W0322 12:27:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:27:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:27:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:27:13.420202  543705 net.go:648] Add success.
I0322 12:27:13.429030  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 12:27:13.429117  543705 net.go:770] primary dev: ETH0
I0322 12:27:13.429129  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:27:13.429141  543705 net.go:698] Add success.
I0322 12:27:13.453637  543705 event_worker.go:152] Polling the log file for events...
I0322 12:27:13.469195  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c6deadfd-787b-468e-a853-63d40a434fce","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:27:13.469232  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 12:27:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:27:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 12:27:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:27:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:27:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:27:14.455890  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:27:14.456623  543705 disk_worker.go:494] system disk:vda1
I0322 12:27:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:27:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:27:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:27:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:27:16.457974  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:27:16.458018  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:27:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:27:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:27:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:27:23.409795  543705 memory.go:184] no items to output this cycle
I0322 12:27:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 12:27:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:27:33.409811  543705 memory.go:184] no items to output this cycle
I0322 12:27:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 12:27:34.690357  543705 disk_info.go:125] begin check local disk info of client
I0322 12:27:34.692919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:27:34.692924  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aec0 0xc00007af00]
I0322 12:27:39.641761  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:27:39.641768  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:27:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:27:43.410756  543705 memory.go:191] Add success.
I0322 12:27:43.409810  543705 cpu.go:282] Add success.
I0322 12:27:43.420535  543705 net.go:648] Add success.
I0322 12:27:43.423163  543705 net.go:770] primary dev: ETH0
I0322 12:27:43.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:27:43.423188  543705 net.go:698] Add success.
I0322 12:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:27:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:27:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:27:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:27:53.409780  543705 memory.go:184] no items to output this cycle
I0322 12:27:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 12:28:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:28:03.409782  543705 memory.go:184] no items to output this cycle
I0322 12:28:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 12:28:13.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:28:13.409903  543705 memory.go:191] Add success.
W0322 12:28:13.409948  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:28:13.409962  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:28:13.409968  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:28:13.410004  543705 cpu.go:282] Add success.
I0322 12:28:13.419717  543705 net.go:648] Add success.
I0322 12:28:13.422456  543705 net.go:770] primary dev: ETH0
I0322 12:28:13.422469  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:28:13.422480  543705 net.go:698] Add success.
I0322 12:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:28:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:28:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 12:28:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:28:14.456552  543705 disk_worker.go:494] system disk:vda1
I0322 12:28:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:28:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:28:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:28:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:28:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:28:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:28:23.409775  543705 memory.go:184] no items to output this cycle
I0322 12:28:23.409825  543705 cpu.go:275] no items to output this cycle
E0322 12:28:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:28:33.409769  543705 memory.go:184] no items to output this cycle
I0322 12:28:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 12:28:34.693675  543705 disk_info.go:125] begin check local disk info of client
I0322 12:28:34.696229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:28:34.696235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307800 0xc000307840]
E0322 12:28:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:28:43.410676  543705 memory.go:191] Add success.
I0322 12:28:43.409798  543705 cpu.go:282] Add success.
I0322 12:28:43.420358  543705 net.go:648] Add success.
I0322 12:28:43.423104  543705 net.go:770] primary dev: ETH0
I0322 12:28:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:28:43.423129  543705 net.go:698] Add success.
I0322 12:28:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:28:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:28:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:28:53.410252  543705 cpu.go:275] no items to output this cycle
I0322 12:28:53.410258  543705 memory.go:184] no items to output this cycle
E0322 12:29:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:29:03.409796  543705 memory.go:184] no items to output this cycle
I0322 12:29:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 12:29:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:29:13.409792  543705 memory.go:191] Add success.
I0322 12:29:13.409808  543705 cpu.go:282] Add success.
W0322 12:29:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:29:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:29:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:29:13.420137  543705 net.go:648] Add success.
I0322 12:29:13.423098  543705 net.go:770] primary dev: ETH0
I0322 12:29:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:29:13.423126  543705 net.go:698] Add success.
I0322 12:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:29:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:29:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 12:29:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:29:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 12:29:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:29:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:29:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:29:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:29:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:29:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:29:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:29:23.409784  543705 memory.go:184] no items to output this cycle
I0322 12:29:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 12:29:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:29:33.409793  543705 memory.go:184] no items to output this cycle
I0322 12:29:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 12:29:34.697131  543705 disk_info.go:125] begin check local disk info of client
I0322 12:29:34.699581  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:29:34.699587  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490740 0xc0004907c0]
E0322 12:29:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:29:43.410807  543705 memory.go:191] Add success.
I0322 12:29:43.409800  543705 cpu.go:282] Add success.
I0322 12:29:43.419902  543705 net.go:648] Add success.
I0322 12:29:43.422849  543705 net.go:770] primary dev: ETH0
I0322 12:29:43.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:29:43.422874  543705 net.go:698] Add success.
I0322 12:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:29:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:29:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:29:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:29:53.409780  543705 memory.go:184] no items to output this cycle
I0322 12:29:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 12:30:03.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:30:03.409824  543705 cpu.go:275] no items to output this cycle
I0322 12:30:03.409827  543705 memory.go:184] no items to output this cycle
W0322 12:30:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:30:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:30:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 12:30:13.409832  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:30:13.409833  543705 cpu.go:282] Add success.
I0322 12:30:13.409854  543705 memory.go:191] Add success.
I0322 12:30:13.420063  543705 net.go:648] Add success.
I0322 12:30:13.422851  543705 net.go:770] primary dev: ETH0
I0322 12:30:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:30:13.422875  543705 net.go:698] Add success.
I0322 12:30:13.468597  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f6554f71-ba83-4341-a8bf-9bd4728bc7e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:30:13.468627  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:30:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:30:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:30:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 12:30:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:30:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 12:30:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:30:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:30:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:30:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:30:23.409781  543705 memory.go:184] no items to output this cycle
I0322 12:30:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 12:30:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:30:33.409775  543705 memory.go:184] no items to output this cycle
I0322 12:30:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 12:30:34.700136  543705 disk_info.go:125] begin check local disk info of client
I0322 12:30:34.702688  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:30:34.702694  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0cc0 0xc0003c0d00]
I0322 12:30:39.642915  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:30:39.642921  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:30:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:30:43.410714  543705 memory.go:191] Add success.
I0322 12:30:43.409809  543705 cpu.go:282] Add success.
I0322 12:30:43.420420  543705 net.go:648] Add success.
I0322 12:30:43.423232  543705 net.go:770] primary dev: ETH0
I0322 12:30:43.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:30:43.423261  543705 net.go:698] Add success.
I0322 12:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:30:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:30:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:30:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:30:53.409795  543705 memory.go:184] no items to output this cycle
I0322 12:30:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 12:31:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:31:03.409803  543705 memory.go:184] no items to output this cycle
I0322 12:31:03.409855  543705 cpu.go:275] no items to output this cycle
W0322 12:31:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:31:13.409723  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:31:13.409728  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:31:13.409848  543705 cpu.go:282] Add success.
E0322 12:31:13.409916  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:31:13.411949  543705 memory.go:191] Add success.
I0322 12:31:13.420868  543705 net.go:648] Add success.
I0322 12:31:13.423827  543705 net.go:770] primary dev: ETH0
I0322 12:31:13.423840  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:31:13.423851  543705 net.go:698] Add success.
I0322 12:31:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:31:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:31:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 12:31:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:31:14.456574  543705 disk_worker.go:494] system disk:vda1
I0322 12:31:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:31:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:31:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:31:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:31:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:31:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:31:23.409776  543705 memory.go:184] no items to output this cycle
I0322 12:31:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 12:31:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:31:33.409772  543705 memory.go:184] no items to output this cycle
I0322 12:31:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 12:31:34.704151  543705 disk_info.go:125] begin check local disk info of client
I0322 12:31:34.706706  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:31:34.706712  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9400 0xc0004d9440]
E0322 12:31:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:31:43.410590  543705 memory.go:191] Add success.
I0322 12:31:43.409815  543705 cpu.go:282] Add success.
I0322 12:31:43.420284  543705 net.go:648] Add success.
I0322 12:31:43.422992  543705 net.go:770] primary dev: ETH0
I0322 12:31:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:31:43.423017  543705 net.go:698] Add success.
I0322 12:31:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:31:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:31:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:31:53.410364  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:31:53.410380  543705 memory.go:184] no items to output this cycle
I0322 12:31:53.410415  543705 cpu.go:275] no items to output this cycle
E0322 12:32:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:32:03.409782  543705 memory.go:184] no items to output this cycle
I0322 12:32:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 12:32:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:32:13.409796  543705 memory.go:191] Add success.
I0322 12:32:13.409799  543705 cpu.go:282] Add success.
W0322 12:32:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:32:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:32:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:32:13.420067  543705 net.go:648] Add success.
I0322 12:32:13.422875  543705 net.go:770] primary dev: ETH0
I0322 12:32:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:32:13.422905  543705 net.go:698] Add success.
W0322 12:32:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:32:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 12:32:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:32:14.456796  543705 disk_worker.go:494] system disk:vda1
I0322 12:32:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:32:14.457083  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:32:14.457091  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:32:14.457096  543705 custom_config.go:64] query custom config with name: gpu
E0322 12:32:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:32:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:32:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:32:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:32:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:32:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:32:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:32:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:32:23.409786  543705 memory.go:184] no items to output this cycle
I0322 12:32:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 12:32:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:32:33.409780  543705 memory.go:184] no items to output this cycle
I0322 12:32:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 12:32:34.706797  543705 disk_info.go:125] begin check local disk info of client
I0322 12:32:34.709349  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:32:34.709355  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513a00 0xc000513a40]
E0322 12:32:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:32:43.410755  543705 memory.go:191] Add success.
I0322 12:32:43.409794  543705 cpu.go:282] Add success.
I0322 12:32:43.420477  543705 net.go:648] Add success.
I0322 12:32:43.423219  543705 net.go:770] primary dev: ETH0
I0322 12:32:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:32:43.423246  543705 net.go:698] Add success.
I0322 12:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:32:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:32:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:32:53.409792  543705 memory.go:184] no items to output this cycle
I0322 12:32:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 12:33:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:33:03.409782  543705 memory.go:184] no items to output this cycle
I0322 12:33:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 12:33:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:33:13.409780  543705 memory.go:191] Add success.
W0322 12:33:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:33:13.409805  543705 cpu.go:282] Add success.
W0322 12:33:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:33:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:33:13.420052  543705 net.go:648] Add success.
I0322 12:33:13.422957  543705 net.go:770] primary dev: ETH0
I0322 12:33:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:33:13.422982  543705 net.go:698] Add success.
I0322 12:33:13.468332  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"63cda67d-77d3-419d-8c70-12fa2d3bffb1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:33:13.468365  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:33:14.455618  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:33:14.455744  543705 disk_worker.go:708] disk space is not compliant
W0322 12:33:14.455749  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:33:14.457398  543705 disk_worker.go:494] system disk:vda1
I0322 12:33:14.457427  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:33:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:33:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:33:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:33:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:33:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:33:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:33:23.409789  543705 memory.go:184] no items to output this cycle
I0322 12:33:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 12:33:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:33:33.409773  543705 memory.go:184] no items to output this cycle
I0322 12:33:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 12:33:34.709674  543705 disk_info.go:125] begin check local disk info of client
I0322 12:33:34.712166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:33:34.712171  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa1c0 0xc0001aa240]
I0322 12:33:39.643914  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:33:39.643919  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:33:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:33:43.410721  543705 memory.go:191] Add success.
I0322 12:33:43.409797  543705 cpu.go:282] Add success.
I0322 12:33:43.420413  543705 net.go:648] Add success.
I0322 12:33:43.423409  543705 net.go:770] primary dev: ETH0
I0322 12:33:43.423422  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:33:43.423435  543705 net.go:698] Add success.
I0322 12:33:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:33:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:33:53.410372  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:33:53.410392  543705 memory.go:184] no items to output this cycle
I0322 12:33:53.410405  543705 cpu.go:275] no items to output this cycle
E0322 12:34:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:34:03.409803  543705 memory.go:184] no items to output this cycle
I0322 12:34:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:34:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:34:13.409827  543705 memory.go:191] Add success.
I0322 12:34:13.409840  543705 cpu.go:282] Add success.
W0322 12:34:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:34:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:34:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:34:13.420273  543705 net.go:648] Add success.
I0322 12:34:13.422975  543705 net.go:770] primary dev: ETH0
I0322 12:34:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:34:13.423001  543705 net.go:698] Add success.
I0322 12:34:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:34:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:34:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 12:34:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:34:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 12:34:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:34:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:34:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:34:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:34:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:34:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:34:23.409781  543705 memory.go:184] no items to output this cycle
I0322 12:34:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 12:34:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:34:33.409777  543705 memory.go:184] no items to output this cycle
I0322 12:34:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 12:34:34.713203  543705 disk_info.go:125] begin check local disk info of client
I0322 12:34:34.715738  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:34:34.715745  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471800 0xc000471840]
E0322 12:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:34:43.410780  543705 memory.go:191] Add success.
I0322 12:34:43.409809  543705 cpu.go:282] Add success.
I0322 12:34:43.420449  543705 net.go:648] Add success.
I0322 12:34:43.423039  543705 net.go:770] primary dev: ETH0
I0322 12:34:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:34:43.423068  543705 net.go:698] Add success.
I0322 12:34:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:34:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:34:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:34:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:34:53.409795  543705 memory.go:184] no items to output this cycle
I0322 12:34:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 12:35:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:35:03.409806  543705 memory.go:184] no items to output this cycle
I0322 12:35:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 12:35:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:35:13.409785  543705 memory.go:191] Add success.
I0322 12:35:13.409809  543705 cpu.go:282] Add success.
W0322 12:35:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:35:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:35:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:35:13.420159  543705 net.go:648] Add success.
I0322 12:35:13.423047  543705 net.go:770] primary dev: ETH0
I0322 12:35:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:35:13.423076  543705 net.go:698] Add success.
I0322 12:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:35:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:35:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 12:35:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:35:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 12:35:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:35:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:35:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:35:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:35:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:35:23.409814  543705 memory.go:184] no items to output this cycle
I0322 12:35:23.409822  543705 cpu.go:275] no items to output this cycle
E0322 12:35:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:35:33.409808  543705 memory.go:184] no items to output this cycle
I0322 12:35:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 12:35:34.717221  543705 disk_info.go:125] begin check local disk info of client
I0322 12:35:34.719816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:35:34.719821  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c41c0 0xc0000c4200]
E0322 12:35:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:35:43.410730  543705 memory.go:191] Add success.
I0322 12:35:43.409809  543705 cpu.go:282] Add success.
I0322 12:35:43.420432  543705 net.go:648] Add success.
I0322 12:35:43.423286  543705 net.go:770] primary dev: ETH0
I0322 12:35:43.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:35:43.423311  543705 net.go:698] Add success.
I0322 12:35:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:35:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:35:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:35:53.409780  543705 memory.go:184] no items to output this cycle
I0322 12:35:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 12:36:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:36:03.409821  543705 memory.go:184] no items to output this cycle
I0322 12:36:03.409835  543705 cpu.go:275] no items to output this cycle
E0322 12:36:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:36:13.409838  543705 memory.go:191] Add success.
I0322 12:36:13.409847  543705 cpu.go:282] Add success.
W0322 12:36:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:36:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:36:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:36:13.420242  543705 net.go:648] Add success.
I0322 12:36:13.422927  543705 net.go:770] primary dev: ETH0
I0322 12:36:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:36:13.422952  543705 net.go:698] Add success.
I0322 12:36:13.468583  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"683e8751-4ee7-4b74-9d08-8520a56e6c8a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:36:13.468622  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:36:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:36:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:36:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 12:36:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:36:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 12:36:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:36:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:36:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:36:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:36:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:36:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:36:23.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:36:23.409825  543705 memory.go:184] no items to output this cycle
I0322 12:36:23.409836  543705 cpu.go:275] no items to output this cycle
E0322 12:36:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:36:33.409821  543705 memory.go:184] no items to output this cycle
I0322 12:36:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 12:36:34.721243  543705 disk_info.go:125] begin check local disk info of client
I0322 12:36:34.723919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:36:34.723925  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380780 0xc0003807c0]
I0322 12:36:39.644922  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:36:39.644928  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:36:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:36:43.410770  543705 memory.go:191] Add success.
I0322 12:36:43.409809  543705 cpu.go:282] Add success.
I0322 12:36:43.420468  543705 net.go:648] Add success.
I0322 12:36:43.423366  543705 net.go:770] primary dev: ETH0
I0322 12:36:43.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:36:43.423392  543705 net.go:698] Add success.
I0322 12:36:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:36:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:36:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:36:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:36:53.409772  543705 memory.go:184] no items to output this cycle
I0322 12:36:53.409777  543705 cpu.go:275] no items to output this cycle
E0322 12:37:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:37:03.409807  543705 memory.go:184] no items to output this cycle
I0322 12:37:03.409831  543705 cpu.go:275] no items to output this cycle
E0322 12:37:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:37:13.409789  543705 memory.go:191] Add success.
I0322 12:37:13.409790  543705 cpu.go:282] Add success.
W0322 12:37:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:37:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:37:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:37:13.420073  543705 net.go:648] Add success.
I0322 12:37:13.422730  543705 net.go:770] primary dev: ETH0
I0322 12:37:13.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:37:13.422753  543705 net.go:698] Add success.
I0322 12:37:13.453332  543705 event_worker.go:152] Polling the log file for events...
W0322 12:37:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:37:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 12:37:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:37:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 12:37:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:37:14.456955  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:37:14.456964  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:37:14.456970  543705 custom_config.go:64] query custom config with name: gpu
E0322 12:37:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:37:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:37:16.457896  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:37:16.457903  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:37:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:37:16.457973  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:37:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:37:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:37:23.409885  543705 memory.go:184] no items to output this cycle
I0322 12:37:23.409924  543705 cpu.go:275] no items to output this cycle
E0322 12:37:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:37:33.409806  543705 memory.go:184] no items to output this cycle
I0322 12:37:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 12:37:34.724010  543705 disk_info.go:125] begin check local disk info of client
I0322 12:37:34.726554  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:37:34.726560  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f0000 0xc0004f0040]
E0322 12:37:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:37:43.410884  543705 memory.go:191] Add success.
I0322 12:37:43.409800  543705 cpu.go:282] Add success.
I0322 12:37:43.420607  543705 net.go:648] Add success.
I0322 12:37:43.423535  543705 net.go:770] primary dev: ETH0
I0322 12:37:43.423548  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:37:43.423561  543705 net.go:698] Add success.
I0322 12:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:37:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:37:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:37:53.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:37:53.409758  543705 memory.go:184] no items to output this cycle
I0322 12:37:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 12:38:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:38:03.409797  543705 memory.go:184] no items to output this cycle
I0322 12:38:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 12:38:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:38:13.409793  543705 memory.go:191] Add success.
I0322 12:38:13.409798  543705 cpu.go:282] Add success.
W0322 12:38:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:38:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:38:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:38:13.420169  543705 net.go:648] Add success.
I0322 12:38:13.423050  543705 net.go:770] primary dev: ETH0
I0322 12:38:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:38:13.423076  543705 net.go:698] Add success.
I0322 12:38:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:38:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:38:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 12:38:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:38:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 12:38:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:38:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:38:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:38:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:38:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:38:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:38:23.409777  543705 memory.go:184] no items to output this cycle
I0322 12:38:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 12:38:33.409961  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:38:33.409980  543705 memory.go:184] no items to output this cycle
I0322 12:38:33.410089  543705 cpu.go:275] no items to output this cycle
I0322 12:38:34.726640  543705 disk_info.go:125] begin check local disk info of client
I0322 12:38:34.729178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:38:34.729184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 12:38:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:38:43.410863  543705 memory.go:191] Add success.
I0322 12:38:43.409801  543705 cpu.go:282] Add success.
I0322 12:38:43.420561  543705 net.go:648] Add success.
I0322 12:38:43.423517  543705 net.go:770] primary dev: ETH0
I0322 12:38:43.423535  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:38:43.423550  543705 net.go:698] Add success.
I0322 12:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:38:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:38:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:38:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:38:53.409772  543705 memory.go:184] no items to output this cycle
I0322 12:38:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 12:39:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:39:03.409781  543705 memory.go:184] no items to output this cycle
I0322 12:39:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 12:39:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:39:13.409785  543705 memory.go:191] Add success.
W0322 12:39:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:39:13.409817  543705 cpu.go:282] Add success.
W0322 12:39:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:39:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:39:13.420129  543705 net.go:648] Add success.
I0322 12:39:13.422789  543705 net.go:770] primary dev: ETH0
I0322 12:39:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:39:13.422815  543705 net.go:698] Add success.
I0322 12:39:13.468696  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef911918-58e6-46e4-9eb0-fabad357c0f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:39:13.468728  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:39:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:39:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:39:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 12:39:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:39:14.456479  543705 disk_worker.go:494] system disk:vda1
I0322 12:39:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:39:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:39:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:39:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:39:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:39:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:39:23.409788  543705 memory.go:184] no items to output this cycle
I0322 12:39:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 12:39:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:39:33.409876  543705 memory.go:184] no items to output this cycle
I0322 12:39:33.410067  543705 cpu.go:275] no items to output this cycle
I0322 12:39:34.729675  543705 disk_info.go:125] begin check local disk info of client
I0322 12:39:34.732075  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:39:34.732081  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8000 0xc0004d8040]
I0322 12:39:39.645734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:39:39.645741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:39:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:39:43.410704  543705 memory.go:191] Add success.
I0322 12:39:43.409800  543705 cpu.go:282] Add success.
I0322 12:39:43.420409  543705 net.go:648] Add success.
I0322 12:39:43.423199  543705 net.go:770] primary dev: ETH0
I0322 12:39:43.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:39:43.423224  543705 net.go:698] Add success.
I0322 12:39:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:39:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:39:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:39:53.409762  543705 memory.go:184] no items to output this cycle
I0322 12:39:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 12:40:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:40:03.409801  543705 memory.go:184] no items to output this cycle
I0322 12:40:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 12:40:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:40:13.409777  543705 memory.go:191] Add success.
I0322 12:40:13.409798  543705 cpu.go:282] Add success.
W0322 12:40:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:40:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:40:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:40:13.420120  543705 net.go:648] Add success.
I0322 12:40:13.422562  543705 net.go:770] primary dev: ETH0
I0322 12:40:13.422574  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:40:13.422587  543705 net.go:698] Add success.
I0322 12:40:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:40:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:40:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 12:40:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:40:14.456616  543705 disk_worker.go:494] system disk:vda1
I0322 12:40:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:40:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:40:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:40:16.472513  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:40:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:40:23.409792  543705 memory.go:184] no items to output this cycle
I0322 12:40:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 12:40:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:40:33.409795  543705 memory.go:184] no items to output this cycle
I0322 12:40:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 12:40:34.732160  543705 disk_info.go:125] begin check local disk info of client
I0322 12:40:34.734731  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:40:34.734737  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abb80 0xc0001abbc0]
E0322 12:40:43.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:40:43.410756  543705 memory.go:191] Add success.
I0322 12:40:43.410123  543705 cpu.go:282] Add success.
I0322 12:40:43.419710  543705 net.go:648] Add success.
I0322 12:40:43.422818  543705 net.go:770] primary dev: ETH0
I0322 12:40:43.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:40:43.422847  543705 net.go:698] Add success.
I0322 12:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:40:53.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:40:53.410268  543705 memory.go:184] no items to output this cycle
I0322 12:40:53.410276  543705 cpu.go:275] no items to output this cycle
E0322 12:41:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:41:03.409772  543705 memory.go:184] no items to output this cycle
I0322 12:41:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 12:41:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:41:13.409814  543705 memory.go:191] Add success.
I0322 12:41:13.409820  543705 cpu.go:282] Add success.
W0322 12:41:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:41:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:41:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:41:13.420112  543705 net.go:648] Add success.
I0322 12:41:13.423162  543705 net.go:770] primary dev: ETH0
I0322 12:41:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:41:13.423189  543705 net.go:698] Add success.
I0322 12:41:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:41:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:41:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 12:41:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:41:14.456608  543705 disk_worker.go:494] system disk:vda1
I0322 12:41:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:41:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:41:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:41:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:41:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:41:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:41:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:41:23.409787  543705 memory.go:184] no items to output this cycle
I0322 12:41:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 12:41:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:41:33.409802  543705 memory.go:184] no items to output this cycle
I0322 12:41:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 12:41:34.736319  543705 disk_info.go:125] begin check local disk info of client
I0322 12:41:34.738874  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:41:34.738880  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f1600 0xc0004f1640]
E0322 12:41:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:41:43.410598  543705 memory.go:191] Add success.
I0322 12:41:43.409794  543705 cpu.go:282] Add success.
I0322 12:41:43.420782  543705 net.go:648] Add success.
I0322 12:41:43.423226  543705 net.go:770] primary dev: ETH0
I0322 12:41:43.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:41:43.423250  543705 net.go:698] Add success.
I0322 12:41:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:41:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:41:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:41:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:41:53.409781  543705 memory.go:184] no items to output this cycle
I0322 12:41:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 12:42:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:42:03.409772  543705 memory.go:184] no items to output this cycle
I0322 12:42:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:42:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:42:13.409816  543705 memory.go:191] Add success.
I0322 12:42:13.409817  543705 cpu.go:282] Add success.
W0322 12:42:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:42:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:42:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:42:13.420199  543705 net.go:648] Add success.
I0322 12:42:13.422907  543705 net.go:770] primary dev: ETH0
I0322 12:42:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:42:13.422932  543705 net.go:698] Add success.
I0322 12:42:13.471934  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6789b62-95e8-4528-abbe-1ef9deb2ce70","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:42:13.471968  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 12:42:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:42:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 12:42:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:42:14.455944  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:42:14.455953  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:42:14.455958  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:42:14.456564  543705 disk_worker.go:494] system disk:vda1
I0322 12:42:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:42:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:42:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:42:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:42:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:42:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:42:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:42:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:42:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:42:23.409808  543705 memory.go:184] no items to output this cycle
I0322 12:42:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:42:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:42:33.409783  543705 memory.go:184] no items to output this cycle
I0322 12:42:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 12:42:34.740338  543705 disk_info.go:125] begin check local disk info of client
I0322 12:42:34.742877  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:42:34.742883  543705 disk_info.go:196] parse disk info done, disk is : [0xc000216f80 0xc000216fc0]
I0322 12:42:39.646930  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:42:39.646937  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:42:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:42:43.410685  543705 memory.go:191] Add success.
I0322 12:42:43.409817  543705 cpu.go:282] Add success.
I0322 12:42:43.420632  543705 net.go:648] Add success.
I0322 12:42:43.423255  543705 net.go:770] primary dev: ETH0
I0322 12:42:43.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:42:43.423279  543705 net.go:698] Add success.
I0322 12:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:42:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:42:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:42:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:42:53.409783  543705 memory.go:184] no items to output this cycle
I0322 12:42:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 12:43:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:43:03.409779  543705 memory.go:184] no items to output this cycle
I0322 12:43:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 12:43:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:43:13.409819  543705 memory.go:191] Add success.
I0322 12:43:13.409825  543705 cpu.go:282] Add success.
W0322 12:43:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:43:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:43:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:43:13.420120  543705 net.go:648] Add success.
I0322 12:43:13.422973  543705 net.go:770] primary dev: ETH0
I0322 12:43:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:43:13.423001  543705 net.go:698] Add success.
I0322 12:43:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:43:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:43:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 12:43:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:43:14.456495  543705 disk_worker.go:494] system disk:vda1
I0322 12:43:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:43:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:43:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:43:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:43:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:43:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:43:23.409813  543705 memory.go:184] no items to output this cycle
I0322 12:43:23.409826  543705 cpu.go:275] no items to output this cycle
E0322 12:43:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:43:33.409800  543705 memory.go:184] no items to output this cycle
I0322 12:43:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 12:43:34.744360  543705 disk_info.go:125] begin check local disk info of client
I0322 12:43:34.746932  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:43:34.746938  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fd100 0xc0004fd140]
E0322 12:43:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:43:43.410592  543705 memory.go:191] Add success.
I0322 12:43:43.409827  543705 cpu.go:282] Add success.
I0322 12:43:43.420106  543705 net.go:770] primary dev: ETH0
I0322 12:43:43.420120  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:43:43.420132  543705 net.go:698] Add success.
I0322 12:43:43.420367  543705 net.go:648] Add success.
I0322 12:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:43:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:43:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:43:53.409796  543705 memory.go:184] no items to output this cycle
I0322 12:43:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 12:44:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:44:03.409779  543705 memory.go:184] no items to output this cycle
I0322 12:44:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 12:44:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:44:13.409814  543705 memory.go:191] Add success.
I0322 12:44:13.409816  543705 cpu.go:282] Add success.
W0322 12:44:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:44:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:44:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:44:13.420163  543705 net.go:648] Add success.
I0322 12:44:13.422744  543705 net.go:770] primary dev: ETH0
I0322 12:44:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:44:13.422780  543705 net.go:698] Add success.
I0322 12:44:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:44:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:44:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 12:44:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:44:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 12:44:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:44:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:44:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:44:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:44:23.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:44:23.409830  543705 memory.go:184] no items to output this cycle
I0322 12:44:23.409835  543705 cpu.go:275] no items to output this cycle
E0322 12:44:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:44:33.409771  543705 memory.go:184] no items to output this cycle
I0322 12:44:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 12:44:34.748385  543705 disk_info.go:125] begin check local disk info of client
I0322 12:44:34.750837  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:44:34.750845  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001cd900 0xc0001cd940]
E0322 12:44:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:44:43.410706  543705 memory.go:191] Add success.
I0322 12:44:43.409795  543705 cpu.go:282] Add success.
I0322 12:44:43.420413  543705 net.go:648] Add success.
I0322 12:44:43.422893  543705 net.go:770] primary dev: ETH0
I0322 12:44:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:44:43.422920  543705 net.go:698] Add success.
I0322 12:44:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:44:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:44:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:44:53.410380  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:44:53.410399  543705 memory.go:184] no items to output this cycle
I0322 12:44:53.410461  543705 cpu.go:275] no items to output this cycle
E0322 12:45:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:45:03.409760  543705 memory.go:184] no items to output this cycle
I0322 12:45:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 12:45:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:45:13.409797  543705 memory.go:191] Add success.
I0322 12:45:13.409797  543705 cpu.go:282] Add success.
W0322 12:45:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:45:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:45:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:45:13.420364  543705 net.go:648] Add success.
I0322 12:45:13.423287  543705 net.go:770] primary dev: ETH0
I0322 12:45:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:45:13.423312  543705 net.go:698] Add success.
I0322 12:45:13.469298  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6b45cf6a-6857-4366-84dc-c011edb8c74f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:45:13.469331  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:45:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:45:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:45:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 12:45:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:45:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 12:45:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:45:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:45:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:45:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:45:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:45:23.409790  543705 memory.go:184] no items to output this cycle
I0322 12:45:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 12:45:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:45:33.409778  543705 memory.go:184] no items to output this cycle
I0322 12:45:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 12:45:34.752402  543705 disk_info.go:125] begin check local disk info of client
I0322 12:45:34.755012  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:45:34.755019  543705 disk_info.go:196] parse disk info done, disk is : [0xc000330400 0xc000330440]
I0322 12:45:39.647940  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:45:39.647946  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:45:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:45:43.410698  543705 memory.go:191] Add success.
I0322 12:45:43.409792  543705 cpu.go:282] Add success.
I0322 12:45:43.420366  543705 net.go:648] Add success.
I0322 12:45:43.423141  543705 net.go:770] primary dev: ETH0
I0322 12:45:43.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:45:43.423171  543705 net.go:698] Add success.
I0322 12:45:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:45:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:45:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:45:53.410259  543705 memory.go:184] no items to output this cycle
I0322 12:45:53.410262  543705 cpu.go:275] no items to output this cycle
E0322 12:46:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:46:03.409774  543705 memory.go:184] no items to output this cycle
I0322 12:46:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 12:46:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:46:13.409819  543705 memory.go:191] Add success.
I0322 12:46:13.409831  543705 cpu.go:282] Add success.
W0322 12:46:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:46:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:46:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:46:13.420159  543705 net.go:648] Add success.
I0322 12:46:13.423109  543705 net.go:770] primary dev: ETH0
I0322 12:46:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:46:13.423133  543705 net.go:698] Add success.
I0322 12:46:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:46:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:46:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 12:46:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:46:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 12:46:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:46:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:46:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:46:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:46:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:46:23.409811  543705 memory.go:184] no items to output this cycle
I0322 12:46:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 12:46:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:46:33.409773  543705 memory.go:184] no items to output this cycle
I0322 12:46:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 12:46:34.756410  543705 disk_info.go:125] begin check local disk info of client
I0322 12:46:34.758962  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:46:34.758967  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dfd00 0xc0003dfd40]
E0322 12:46:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:46:43.410642  543705 memory.go:191] Add success.
I0322 12:46:43.409802  543705 cpu.go:282] Add success.
I0322 12:46:43.420353  543705 net.go:648] Add success.
I0322 12:46:43.422998  543705 net.go:770] primary dev: ETH0
I0322 12:46:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:46:43.423024  543705 net.go:698] Add success.
I0322 12:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:46:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:46:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:46:53.410485  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:46:53.410506  543705 memory.go:184] no items to output this cycle
I0322 12:46:53.410576  543705 cpu.go:275] no items to output this cycle
E0322 12:47:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:47:03.409797  543705 memory.go:184] no items to output this cycle
I0322 12:47:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:47:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:47:13.409780  543705 memory.go:191] Add success.
W0322 12:47:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:47:13.409811  543705 cpu.go:282] Add success.
W0322 12:47:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:47:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:47:13.420188  543705 net.go:648] Add success.
I0322 12:47:13.422915  543705 net.go:770] primary dev: ETH0
I0322 12:47:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:47:13.422943  543705 net.go:698] Add success.
I0322 12:47:13.453495  543705 event_worker.go:152] Polling the log file for events...
W0322 12:47:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:47:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 12:47:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:47:14.455875  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:47:14.455883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:47:14.455889  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:47:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 12:47:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:47:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:47:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:47:16.457887  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:47:16.457887  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:47:16.457943  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:47:16.457963  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:47:16.472307  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:47:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:47:23.409796  543705 memory.go:184] no items to output this cycle
I0322 12:47:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 12:47:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:47:33.409777  543705 memory.go:184] no items to output this cycle
I0322 12:47:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 12:47:34.759624  543705 disk_info.go:125] begin check local disk info of client
I0322 12:47:34.762245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:47:34.762251  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a380 0xc00027a3c0]
E0322 12:47:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:47:43.410782  543705 memory.go:191] Add success.
I0322 12:47:43.409793  543705 cpu.go:282] Add success.
I0322 12:47:43.420558  543705 net.go:648] Add success.
I0322 12:47:43.423738  543705 net.go:770] primary dev: ETH0
I0322 12:47:43.423752  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:47:43.423764  543705 net.go:698] Add success.
I0322 12:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:47:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:47:53.409803  543705 memory.go:184] no items to output this cycle
I0322 12:47:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:48:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:48:03.409796  543705 memory.go:184] no items to output this cycle
I0322 12:48:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 12:48:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:48:13.409791  543705 memory.go:191] Add success.
I0322 12:48:13.409810  543705 cpu.go:282] Add success.
W0322 12:48:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:48:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:48:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:48:13.420224  543705 net.go:648] Add success.
I0322 12:48:13.423022  543705 net.go:770] primary dev: ETH0
I0322 12:48:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:48:13.423056  543705 net.go:698] Add success.
I0322 12:48:13.949338  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6961c970-93dd-4b83-a397-f7dc621712f7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:48:13.949375  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:48:14.454691  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:48:14.454950  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:48:14.454960  543705 disk_worker.go:708] disk space is not compliant
W0322 12:48:14.454964  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:48:14.456474  543705 disk_worker.go:494] system disk:vda1
I0322 12:48:14.456504  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:48:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:48:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:48:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:48:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:48:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:48:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:48:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 12:48:23.409788  543705 memory.go:184] no items to output this cycle
E0322 12:48:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:48:33.409772  543705 memory.go:184] no items to output this cycle
I0322 12:48:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 12:48:34.763453  543705 disk_info.go:125] begin check local disk info of client
I0322 12:48:34.765982  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:48:34.765988  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047af40 0xc00047af80]
I0322 12:48:39.648941  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:48:39.648948  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:48:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:48:43.409799  543705 cpu.go:282] Add success.
I0322 12:48:43.410902  543705 memory.go:191] Add success.
I0322 12:48:43.419745  543705 net.go:648] Add success.
I0322 12:48:43.422719  543705 net.go:770] primary dev: ETH0
I0322 12:48:43.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:48:43.422749  543705 net.go:698] Add success.
I0322 12:48:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:48:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:48:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:48:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:48:53.409798  543705 memory.go:184] no items to output this cycle
I0322 12:48:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 12:49:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:49:03.409795  543705 memory.go:184] no items to output this cycle
I0322 12:49:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 12:49:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:49:13.409783  543705 memory.go:191] Add success.
W0322 12:49:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:49:13.409811  543705 cpu.go:282] Add success.
W0322 12:49:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:49:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:49:13.420092  543705 net.go:648] Add success.
I0322 12:49:13.422783  543705 net.go:770] primary dev: ETH0
I0322 12:49:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:49:13.422809  543705 net.go:698] Add success.
I0322 12:49:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:49:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:49:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 12:49:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:49:14.456574  543705 disk_worker.go:494] system disk:vda1
I0322 12:49:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:49:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:49:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:49:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:49:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:49:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:49:23.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:49:23.410408  543705 memory.go:184] no items to output this cycle
I0322 12:49:23.410413  543705 cpu.go:275] no items to output this cycle
E0322 12:49:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:49:33.409772  543705 memory.go:184] no items to output this cycle
I0322 12:49:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 12:49:34.767467  543705 disk_info.go:125] begin check local disk info of client
I0322 12:49:34.770012  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:49:34.770018  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374bc0 0xc000374c00]
E0322 12:49:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:49:43.410689  543705 memory.go:191] Add success.
I0322 12:49:43.409828  543705 cpu.go:282] Add success.
I0322 12:49:43.419688  543705 net.go:648] Add success.
I0322 12:49:43.422363  543705 net.go:770] primary dev: ETH0
I0322 12:49:43.422376  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:49:43.422387  543705 net.go:698] Add success.
I0322 12:49:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:49:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:49:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:49:53.410408  543705 memory.go:184] no items to output this cycle
I0322 12:49:53.410418  543705 cpu.go:275] no items to output this cycle
E0322 12:50:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:50:03.409774  543705 memory.go:184] no items to output this cycle
I0322 12:50:03.409777  543705 cpu.go:275] no items to output this cycle
E0322 12:50:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:50:13.409826  543705 memory.go:191] Add success.
I0322 12:50:13.409830  543705 cpu.go:282] Add success.
W0322 12:50:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:50:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:50:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:50:13.420147  543705 net.go:648] Add success.
I0322 12:50:13.422650  543705 net.go:770] primary dev: ETH0
I0322 12:50:13.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:50:13.422675  543705 net.go:698] Add success.
I0322 12:50:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:50:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:50:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 12:50:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:50:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 12:50:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:50:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:50:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:50:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:50:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:50:23.409779  543705 memory.go:184] no items to output this cycle
I0322 12:50:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 12:50:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:50:33.409797  543705 memory.go:184] no items to output this cycle
I0322 12:50:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 12:50:34.771484  543705 disk_info.go:125] begin check local disk info of client
I0322 12:50:34.774042  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:50:34.774048  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508400 0xc000508440]
E0322 12:50:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:50:43.410767  543705 memory.go:191] Add success.
I0322 12:50:43.410081  543705 cpu.go:282] Add success.
I0322 12:50:43.419707  543705 net.go:648] Add success.
I0322 12:50:43.422312  543705 net.go:770] primary dev: ETH0
I0322 12:50:43.422324  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:50:43.422335  543705 net.go:698] Add success.
I0322 12:50:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:50:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:50:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:50:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:50:53.409769  543705 memory.go:184] no items to output this cycle
I0322 12:50:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 12:51:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:51:03.409800  543705 memory.go:184] no items to output this cycle
I0322 12:51:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 12:51:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:51:13.409824  543705 memory.go:191] Add success.
I0322 12:51:13.409828  543705 cpu.go:282] Add success.
W0322 12:51:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:51:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:51:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:51:13.420265  543705 net.go:648] Add success.
I0322 12:51:13.423196  543705 net.go:770] primary dev: ETH0
I0322 12:51:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:51:13.423249  543705 net.go:698] Add success.
I0322 12:51:13.468062  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"39a26248-df15-4d0b-ac1a-2ec01f165a22","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:51:13.468096  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:51:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:51:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:51:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 12:51:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:51:14.456598  543705 disk_worker.go:494] system disk:vda1
I0322 12:51:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:51:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:51:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:51:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:51:23.409797  543705 cpu.go:275] no items to output this cycle
I0322 12:51:23.409803  543705 memory.go:184] no items to output this cycle
E0322 12:51:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:51:33.409799  543705 memory.go:184] no items to output this cycle
I0322 12:51:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 12:51:34.774134  543705 disk_info.go:125] begin check local disk info of client
I0322 12:51:34.776685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:51:34.776691  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2000 0xc0003b2040]
I0322 12:51:39.649730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:51:39.649736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:51:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:51:43.410744  543705 memory.go:191] Add success.
I0322 12:51:43.409808  543705 cpu.go:282] Add success.
I0322 12:51:43.420532  543705 net.go:648] Add success.
I0322 12:51:43.423444  543705 net.go:770] primary dev: ETH0
I0322 12:51:43.423457  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:51:43.423470  543705 net.go:698] Add success.
I0322 12:51:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:51:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:51:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:51:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:51:53.409784  543705 memory.go:184] no items to output this cycle
I0322 12:51:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 12:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:52:03.409779  543705 memory.go:184] no items to output this cycle
I0322 12:52:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 12:52:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:52:13.409794  543705 memory.go:191] Add success.
I0322 12:52:13.409796  543705 cpu.go:282] Add success.
W0322 12:52:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:52:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:52:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:52:13.420120  543705 net.go:648] Add success.
I0322 12:52:13.422723  543705 net.go:770] primary dev: ETH0
I0322 12:52:13.422736  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:52:13.422748  543705 net.go:698] Add success.
W0322 12:52:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:52:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 12:52:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:52:14.456914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:52:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:52:14.456930  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:52:14.457004  543705 disk_worker.go:494] system disk:vda1
I0322 12:52:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:52:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:52:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:52:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:52:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:52:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:52:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:52:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:52:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:52:23.409808  543705 memory.go:184] no items to output this cycle
I0322 12:52:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 12:52:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:52:33.409763  543705 memory.go:184] no items to output this cycle
I0322 12:52:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 12:52:34.777665  543705 disk_info.go:125] begin check local disk info of client
I0322 12:52:34.780205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:52:34.780211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005097c0 0xc000509800]
E0322 12:52:43.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:52:43.410862  543705 memory.go:191] Add success.
I0322 12:52:43.410068  543705 cpu.go:282] Add success.
I0322 12:52:43.419729  543705 net.go:648] Add success.
I0322 12:52:43.422765  543705 net.go:770] primary dev: ETH0
I0322 12:52:43.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:52:43.422794  543705 net.go:698] Add success.
I0322 12:52:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:52:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:52:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:52:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:52:53.409780  543705 memory.go:184] no items to output this cycle
I0322 12:52:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 12:53:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:53:03.409763  543705 memory.go:184] no items to output this cycle
I0322 12:53:03.409810  543705 cpu.go:275] no items to output this cycle
I0322 12:53:13.409811  543705 cpu.go:282] Add success.
E0322 12:53:13.410110  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:53:13.410128  543705 memory.go:191] Add success.
W0322 12:53:13.410153  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:53:13.410163  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:53:13.410166  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:53:13.420334  543705 net.go:648] Add success.
I0322 12:53:13.421225  543705 net.go:770] primary dev: ETH0
I0322 12:53:13.421238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:53:13.421250  543705 net.go:698] Add success.
I0322 12:53:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:53:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:53:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 12:53:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:53:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 12:53:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:53:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:53:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:53:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:53:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:53:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:53:23.409811  543705 memory.go:184] no items to output this cycle
I0322 12:53:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:53:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:53:33.409775  543705 memory.go:184] no items to output this cycle
I0322 12:53:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 12:53:34.780296  543705 disk_info.go:125] begin check local disk info of client
I0322 12:53:34.782884  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:53:34.782891  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001b8000 0xc0001b8040]
E0322 12:53:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:53:43.410670  543705 memory.go:191] Add success.
I0322 12:53:43.409823  543705 cpu.go:282] Add success.
I0322 12:53:43.420358  543705 net.go:648] Add success.
I0322 12:53:43.422960  543705 net.go:770] primary dev: ETH0
I0322 12:53:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:53:43.422986  543705 net.go:698] Add success.
I0322 12:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:53:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:53:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:53:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:53:53.409780  543705 memory.go:184] no items to output this cycle
I0322 12:53:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:54:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:54:03.409794  543705 memory.go:184] no items to output this cycle
I0322 12:54:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 12:54:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:54:13.409781  543705 memory.go:191] Add success.
W0322 12:54:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 12:54:13.409813  543705 cpu.go:282] Add success.
W0322 12:54:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:54:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:54:13.420170  543705 net.go:648] Add success.
I0322 12:54:13.422847  543705 net.go:770] primary dev: ETH0
I0322 12:54:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:54:13.422872  543705 net.go:698] Add success.
I0322 12:54:13.516346  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"99970270-b7e5-4e4e-94f4-1d1fa76b937e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:54:13.516381  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 12:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:54:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:54:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 12:54:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:54:14.456640  543705 disk_worker.go:494] system disk:vda1
I0322 12:54:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:54:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:54:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:54:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:54:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:54:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:54:23.409787  543705 memory.go:184] no items to output this cycle
I0322 12:54:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 12:54:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:54:33.409779  543705 memory.go:184] no items to output this cycle
I0322 12:54:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 12:54:34.782966  543705 disk_info.go:125] begin check local disk info of client
I0322 12:54:34.785685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:54:34.785693  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7180 0xc0003e71c0]
I0322 12:54:39.650951  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:54:39.650958  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:54:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:54:43.410706  543705 memory.go:191] Add success.
I0322 12:54:43.409833  543705 cpu.go:282] Add success.
I0322 12:54:43.420416  543705 net.go:648] Add success.
I0322 12:54:43.423397  543705 net.go:770] primary dev: ETH0
I0322 12:54:43.423410  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:54:43.423422  543705 net.go:698] Add success.
I0322 12:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:54:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:54:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:54:53.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:54:53.410422  543705 memory.go:184] no items to output this cycle
I0322 12:54:53.410436  543705 cpu.go:275] no items to output this cycle
E0322 12:55:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:55:03.409793  543705 memory.go:184] no items to output this cycle
I0322 12:55:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 12:55:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:55:13.409786  543705 memory.go:191] Add success.
I0322 12:55:13.409807  543705 cpu.go:282] Add success.
W0322 12:55:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:55:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:55:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:55:13.420116  543705 net.go:648] Add success.
I0322 12:55:13.422795  543705 net.go:770] primary dev: ETH0
I0322 12:55:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:55:13.422820  543705 net.go:698] Add success.
I0322 12:55:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:55:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:55:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 12:55:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:55:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 12:55:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:55:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:55:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:55:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:55:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:55:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:55:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:55:23.409786  543705 memory.go:184] no items to output this cycle
I0322 12:55:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:55:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:55:33.409783  543705 memory.go:184] no items to output this cycle
I0322 12:55:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 12:55:34.786967  543705 disk_info.go:125] begin check local disk info of client
I0322 12:55:34.789519  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:55:34.789525  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002662c0 0xc000266300]
E0322 12:55:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:55:43.410699  543705 memory.go:191] Add success.
I0322 12:55:43.409826  543705 cpu.go:282] Add success.
I0322 12:55:43.420426  543705 net.go:648] Add success.
I0322 12:55:43.423288  543705 net.go:770] primary dev: ETH0
I0322 12:55:43.423301  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:55:43.423313  543705 net.go:698] Add success.
I0322 12:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:55:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:55:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:55:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:55:53.409770  543705 memory.go:184] no items to output this cycle
I0322 12:55:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 12:56:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:56:03.409799  543705 memory.go:184] no items to output this cycle
I0322 12:56:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 12:56:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:56:13.409820  543705 memory.go:191] Add success.
I0322 12:56:13.409824  543705 cpu.go:282] Add success.
W0322 12:56:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:56:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:56:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:56:13.420150  543705 net.go:648] Add success.
I0322 12:56:13.422708  543705 net.go:770] primary dev: ETH0
I0322 12:56:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:56:13.422732  543705 net.go:698] Add success.
I0322 12:56:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:56:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:56:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 12:56:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:56:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 12:56:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:56:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:56:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:56:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:56:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:56:23.409779  543705 memory.go:184] no items to output this cycle
I0322 12:56:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 12:56:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:56:33.409780  543705 memory.go:184] no items to output this cycle
I0322 12:56:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 12:56:34.789671  543705 disk_info.go:125] begin check local disk info of client
I0322 12:56:34.792189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:56:34.792195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369480 0xc0003694c0]
E0322 12:56:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:56:43.410573  543705 memory.go:191] Add success.
I0322 12:56:43.409809  543705 cpu.go:282] Add success.
I0322 12:56:43.420351  543705 net.go:648] Add success.
I0322 12:56:43.423014  543705 net.go:770] primary dev: ETH0
I0322 12:56:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:56:43.423043  543705 net.go:698] Add success.
I0322 12:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:56:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:56:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:56:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:56:53.409786  543705 memory.go:184] no items to output this cycle
I0322 12:56:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 12:57:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:57:03.409797  543705 memory.go:184] no items to output this cycle
I0322 12:57:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 12:57:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:57:13.409789  543705 memory.go:191] Add success.
I0322 12:57:13.409814  543705 cpu.go:282] Add success.
W0322 12:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:57:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:57:13.420073  543705 net.go:648] Add success.
I0322 12:57:13.423032  543705 net.go:770] primary dev: ETH0
I0322 12:57:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:57:13.423062  543705 net.go:698] Add success.
I0322 12:57:13.429344  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 12:57:13.453589  543705 event_worker.go:152] Polling the log file for events...
I0322 12:57:13.468526  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b162e357-77ee-4652-be2e-55af27078975","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 12:57:13.468560  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 12:57:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:57:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 12:57:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0322 12:57:14.455854  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 12:57:14.455862  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 12:57:14.455867  543705 custom_config.go:64] query custom config with name: gpu
I0322 12:57:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 12:57:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 12:57:15.456834  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 12:57:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:57:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 12:57:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 12:57:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:57:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:57:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:57:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:57:23.409786  543705 memory.go:184] no items to output this cycle
I0322 12:57:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:57:33.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:57:33.409904  543705 memory.go:184] no items to output this cycle
I0322 12:57:33.409929  543705 cpu.go:275] no items to output this cycle
I0322 12:57:34.792278  543705 disk_info.go:125] begin check local disk info of client
I0322 12:57:34.794921  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:57:34.794928  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0322 12:57:39.651955  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 12:57:39.651962  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 12:57:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:57:43.410806  543705 memory.go:191] Add success.
I0322 12:57:43.409809  543705 cpu.go:282] Add success.
I0322 12:57:43.420530  543705 net.go:648] Add success.
I0322 12:57:43.423385  543705 net.go:770] primary dev: ETH0
I0322 12:57:43.423398  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:57:43.423411  543705 net.go:698] Add success.
I0322 12:57:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:57:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:57:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:57:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:57:53.409789  543705 memory.go:184] no items to output this cycle
I0322 12:57:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 12:58:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:58:03.409803  543705 memory.go:184] no items to output this cycle
I0322 12:58:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 12:58:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:58:13.409798  543705 memory.go:191] Add success.
I0322 12:58:13.409801  543705 cpu.go:282] Add success.
W0322 12:58:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:58:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:58:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:58:13.420077  543705 net.go:648] Add success.
I0322 12:58:13.422961  543705 net.go:770] primary dev: ETH0
I0322 12:58:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:58:13.422992  543705 net.go:698] Add success.
I0322 12:58:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:58:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:58:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 12:58:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:58:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 12:58:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:58:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:58:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:58:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:58:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:58:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:58:23.409807  543705 memory.go:184] no items to output this cycle
I0322 12:58:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 12:58:33.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:58:33.409855  543705 memory.go:184] no items to output this cycle
I0322 12:58:33.409951  543705 cpu.go:275] no items to output this cycle
I0322 12:58:34.796613  543705 disk_info.go:125] begin check local disk info of client
I0322 12:58:34.799191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:58:34.799197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f0000 0xc0003f0040]
E0322 12:58:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:58:43.410666  543705 memory.go:191] Add success.
I0322 12:58:43.409806  543705 cpu.go:282] Add success.
I0322 12:58:43.420440  543705 net.go:648] Add success.
I0322 12:58:43.423012  543705 net.go:770] primary dev: ETH0
I0322 12:58:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:58:43.423037  543705 net.go:698] Add success.
I0322 12:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:58:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:58:53.409798  543705 memory.go:184] no items to output this cycle
I0322 12:58:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 12:59:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:59:03.409774  543705 memory.go:184] no items to output this cycle
I0322 12:59:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 12:59:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:59:13.409796  543705 memory.go:191] Add success.
I0322 12:59:13.409796  543705 cpu.go:282] Add success.
W0322 12:59:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 12:59:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 12:59:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 12:59:13.420160  543705 net.go:648] Add success.
I0322 12:59:13.422798  543705 net.go:770] primary dev: ETH0
I0322 12:59:13.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:59:13.422827  543705 net.go:698] Add success.
I0322 12:59:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 12:59:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 12:59:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 12:59:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 12:59:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 12:59:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 12:59:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 12:59:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:59:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:59:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 12:59:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 12:59:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:59:23.409782  543705 memory.go:184] no items to output this cycle
I0322 12:59:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 12:59:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:59:33.409784  543705 memory.go:184] no items to output this cycle
I0322 12:59:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 12:59:34.800633  543705 disk_info.go:125] begin check local disk info of client
I0322 12:59:34.803215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 12:59:34.803221  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c900 0xc00034c940]
E0322 12:59:43.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:59:43.410761  543705 memory.go:191] Add success.
I0322 12:59:43.409976  543705 cpu.go:282] Add success.
I0322 12:59:43.419709  543705 net.go:648] Add success.
I0322 12:59:43.422415  543705 net.go:770] primary dev: ETH0
I0322 12:59:43.422428  543705 net.go:802] Send network stats successfully!,count is 6
I0322 12:59:43.422439  543705 net.go:698] Add success.
I0322 12:59:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 12:59:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 12:59:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 12:59:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 12:59:53.409814  543705 memory.go:184] no items to output this cycle
I0322 12:59:53.409827  543705 cpu.go:275] no items to output this cycle
E0322 13:00:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:00:03.409795  543705 memory.go:184] no items to output this cycle
I0322 13:00:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 13:00:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:00:13.409788  543705 memory.go:191] Add success.
I0322 13:00:13.409807  543705 cpu.go:282] Add success.
W0322 13:00:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:00:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:00:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:00:13.420152  543705 net.go:648] Add success.
I0322 13:00:13.422803  543705 net.go:770] primary dev: ETH0
I0322 13:00:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:00:13.422828  543705 net.go:698] Add success.
I0322 13:00:13.471390  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b6ebaefe-fbd9-4eab-9403-3f4f60a99d48","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:00:13.471424  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:00:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:00:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:00:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0322 13:00:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:00:14.456757  543705 disk_worker.go:494] system disk:vda1
I0322 13:00:14.456787  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:00:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:00:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:00:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:00:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:00:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:00:23.409807  543705 memory.go:184] no items to output this cycle
I0322 13:00:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 13:00:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:00:33.409782  543705 memory.go:184] no items to output this cycle
I0322 13:00:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 13:00:34.803307  543705 disk_info.go:125] begin check local disk info of client
I0322 13:00:34.805800  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:00:34.805808  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460540 0xc000460580]
I0322 13:00:39.652966  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:00:39.652973  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:00:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:00:43.410690  543705 memory.go:191] Add success.
I0322 13:00:43.409808  543705 cpu.go:282] Add success.
I0322 13:00:43.420396  543705 net.go:648] Add success.
I0322 13:00:43.423048  543705 net.go:770] primary dev: ETH0
I0322 13:00:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:00:43.423078  543705 net.go:698] Add success.
I0322 13:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:00:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:00:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:00:53.409768  543705 memory.go:184] no items to output this cycle
I0322 13:00:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 13:01:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:01:03.409803  543705 memory.go:184] no items to output this cycle
I0322 13:01:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:01:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:01:13.409783  543705 memory.go:191] Add success.
I0322 13:01:13.409805  543705 cpu.go:282] Add success.
W0322 13:01:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:01:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:01:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:01:13.420048  543705 net.go:648] Add success.
I0322 13:01:13.422816  543705 net.go:770] primary dev: ETH0
I0322 13:01:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:01:13.422843  543705 net.go:698] Add success.
I0322 13:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:01:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:01:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 13:01:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:01:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 13:01:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:01:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:01:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:01:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:01:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:01:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:01:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:01:23.409819  543705 memory.go:184] no items to output this cycle
I0322 13:01:23.409832  543705 cpu.go:275] no items to output this cycle
E0322 13:01:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:01:33.409783  543705 memory.go:184] no items to output this cycle
I0322 13:01:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 13:01:34.805885  543705 disk_info.go:125] begin check local disk info of client
I0322 13:01:34.808349  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:01:34.808355  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be380 0xc0003be3c0]
E0322 13:01:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:01:43.410632  543705 memory.go:191] Add success.
I0322 13:01:43.409797  543705 cpu.go:282] Add success.
I0322 13:01:43.420597  543705 net.go:648] Add success.
I0322 13:01:43.423307  543705 net.go:770] primary dev: ETH0
I0322 13:01:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:01:43.423332  543705 net.go:698] Add success.
I0322 13:01:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:01:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:01:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:01:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:01:53.409778  543705 memory.go:184] no items to output this cycle
I0322 13:01:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 13:02:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:02:03.409779  543705 memory.go:184] no items to output this cycle
I0322 13:02:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:02:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:02:13.409783  543705 memory.go:191] Add success.
W0322 13:02:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:02:13.409813  543705 cpu.go:282] Add success.
W0322 13:02:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:02:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:02:13.420111  543705 net.go:648] Add success.
I0322 13:02:13.422948  543705 net.go:770] primary dev: ETH0
I0322 13:02:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:02:13.422973  543705 net.go:698] Add success.
W0322 13:02:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:02:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 13:02:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:02:14.455903  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:02:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:02:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:02:14.456577  543705 disk_worker.go:494] system disk:vda1
I0322 13:02:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:02:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:02:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:02:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:02:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:02:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:02:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:02:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:02:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:02:23.409791  543705 memory.go:184] no items to output this cycle
I0322 13:02:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 13:02:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:02:33.409807  543705 memory.go:184] no items to output this cycle
I0322 13:02:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 13:02:34.809674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:02:34.812182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:02:34.812188  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a680 0xc00048a6c0]
E0322 13:02:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:02:43.410588  543705 memory.go:191] Add success.
I0322 13:02:43.409812  543705 cpu.go:282] Add success.
I0322 13:02:43.420457  543705 net.go:648] Add success.
I0322 13:02:43.423272  543705 net.go:770] primary dev: ETH0
I0322 13:02:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:02:43.423297  543705 net.go:698] Add success.
I0322 13:02:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:02:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:02:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:02:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:02:53.409773  543705 memory.go:184] no items to output this cycle
I0322 13:02:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 13:03:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:03:03.409792  543705 cpu.go:275] no items to output this cycle
I0322 13:03:03.409799  543705 memory.go:184] no items to output this cycle
E0322 13:03:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:03:13.409817  543705 memory.go:191] Add success.
I0322 13:03:13.409827  543705 cpu.go:282] Add success.
W0322 13:03:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:03:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:03:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:03:13.420159  543705 net.go:648] Add success.
I0322 13:03:13.422893  543705 net.go:770] primary dev: ETH0
I0322 13:03:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:03:13.422920  543705 net.go:698] Add success.
I0322 13:03:13.468573  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2bd7b61a-d74e-4ba1-abae-2f05e551b4b4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:03:13.468606  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:03:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:03:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:03:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 13:03:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:03:14.456726  543705 disk_worker.go:494] system disk:vda1
I0322 13:03:14.456755  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:03:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:03:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:03:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:03:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:03:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:03:23.409785  543705 memory.go:184] no items to output this cycle
I0322 13:03:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 13:03:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:03:33.409782  543705 memory.go:184] no items to output this cycle
I0322 13:03:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 13:03:34.813674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:03:34.816188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:03:34.816194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c80 0xc0000c5cc0]
I0322 13:03:39.653727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:03:39.653733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:03:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:03:43.410735  543705 memory.go:191] Add success.
I0322 13:03:43.409818  543705 cpu.go:282] Add success.
I0322 13:03:43.420640  543705 net.go:648] Add success.
I0322 13:03:43.423398  543705 net.go:770] primary dev: ETH0
I0322 13:03:43.423410  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:03:43.423422  543705 net.go:698] Add success.
I0322 13:03:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:03:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:03:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:03:53.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:03:53.410264  543705 memory.go:184] no items to output this cycle
I0322 13:03:53.410283  543705 cpu.go:275] no items to output this cycle
E0322 13:04:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:04:03.409779  543705 cpu.go:275] no items to output this cycle
I0322 13:04:03.409785  543705 memory.go:184] no items to output this cycle
E0322 13:04:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:04:13.409822  543705 memory.go:191] Add success.
I0322 13:04:13.409834  543705 cpu.go:282] Add success.
W0322 13:04:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:04:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:04:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:04:13.420157  543705 net.go:648] Add success.
I0322 13:04:13.423131  543705 net.go:770] primary dev: ETH0
I0322 13:04:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:04:13.423161  543705 net.go:698] Add success.
I0322 13:04:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:04:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:04:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 13:04:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:04:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 13:04:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:04:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:04:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:04:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:04:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:04:23.409789  543705 memory.go:184] no items to output this cycle
I0322 13:04:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 13:04:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:04:33.409780  543705 memory.go:184] no items to output this cycle
I0322 13:04:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 13:04:34.817670  543705 disk_info.go:125] begin check local disk info of client
I0322 13:04:34.820173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:04:34.820179  543705 disk_info.go:196] parse disk info done, disk is : [0xc000576480 0xc0005764c0]
E0322 13:04:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:04:43.410558  543705 memory.go:191] Add success.
I0322 13:04:43.409821  543705 cpu.go:282] Add success.
I0322 13:04:43.420565  543705 net.go:648] Add success.
I0322 13:04:43.423294  543705 net.go:770] primary dev: ETH0
I0322 13:04:43.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:04:43.423318  543705 net.go:698] Add success.
I0322 13:04:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:04:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:04:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:04:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:04:53.409767  543705 memory.go:184] no items to output this cycle
I0322 13:04:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 13:05:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:05:03.409791  543705 cpu.go:275] no items to output this cycle
I0322 13:05:03.409795  543705 memory.go:184] no items to output this cycle
E0322 13:05:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:05:13.409788  543705 memory.go:191] Add success.
I0322 13:05:13.409798  543705 cpu.go:282] Add success.
W0322 13:05:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:05:13.412489  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:05:13.412494  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:05:13.420139  543705 net.go:648] Add success.
I0322 13:05:13.421962  543705 net.go:770] primary dev: ETH0
I0322 13:05:13.421975  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:05:13.421987  543705 net.go:698] Add success.
I0322 13:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:05:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:05:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 13:05:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:05:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 13:05:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:05:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:05:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:05:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:05:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:05:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:05:23.409818  543705 memory.go:184] no items to output this cycle
I0322 13:05:23.409828  543705 cpu.go:275] no items to output this cycle
E0322 13:05:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:05:33.409797  543705 memory.go:184] no items to output this cycle
I0322 13:05:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 13:05:34.821672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:05:34.824205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:05:34.824212  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048a2c0 0xc00048a300]
E0322 13:05:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:05:43.410540  543705 memory.go:191] Add success.
I0322 13:05:43.409835  543705 cpu.go:282] Add success.
I0322 13:05:43.420354  543705 net.go:648] Add success.
I0322 13:05:43.422995  543705 net.go:770] primary dev: ETH0
I0322 13:05:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:05:43.423020  543705 net.go:698] Add success.
I0322 13:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:05:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:05:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:05:53.409782  543705 memory.go:184] no items to output this cycle
I0322 13:05:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 13:06:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:06:03.409772  543705 cpu.go:275] no items to output this cycle
I0322 13:06:03.409779  543705 memory.go:184] no items to output this cycle
E0322 13:06:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:06:13.409836  543705 memory.go:191] Add success.
I0322 13:06:13.409843  543705 cpu.go:282] Add success.
W0322 13:06:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:06:13.409899  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:06:13.409903  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:06:13.420147  543705 net.go:648] Add success.
I0322 13:06:13.423538  543705 net.go:770] primary dev: ETH0
I0322 13:06:13.423552  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:06:13.423566  543705 net.go:698] Add success.
I0322 13:06:13.468120  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"44f6b7bb-ec64-4622-be07-03b4ffabfe51","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:06:13.468162  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:06:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:06:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:06:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 13:06:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:06:14.456531  543705 disk_worker.go:494] system disk:vda1
I0322 13:06:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:06:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:06:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:06:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:06:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:06:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:06:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:06:23.409793  543705 cpu.go:275] no items to output this cycle
I0322 13:06:23.409797  543705 memory.go:184] no items to output this cycle
E0322 13:06:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:06:33.409775  543705 memory.go:184] no items to output this cycle
I0322 13:06:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 13:06:34.825671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:06:34.828174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:06:34.828180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003afe00 0xc0003afe40]
I0322 13:06:39.654968  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:06:39.654974  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:06:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:06:43.410730  543705 memory.go:191] Add success.
I0322 13:06:43.409808  543705 cpu.go:282] Add success.
I0322 13:06:43.419708  543705 net.go:648] Add success.
I0322 13:06:43.422797  543705 net.go:770] primary dev: ETH0
I0322 13:06:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:06:43.422825  543705 net.go:698] Add success.
I0322 13:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:06:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:06:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:06:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:06:53.409769  543705 memory.go:184] no items to output this cycle
I0322 13:06:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 13:07:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:07:03.409798  543705 memory.go:184] no items to output this cycle
I0322 13:07:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 13:07:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:07:13.409780  543705 memory.go:191] Add success.
W0322 13:07:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:07:13.409812  543705 cpu.go:282] Add success.
W0322 13:07:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:07:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:07:13.420175  543705 net.go:648] Add success.
I0322 13:07:13.423250  543705 net.go:770] primary dev: ETH0
I0322 13:07:13.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:07:13.423274  543705 net.go:698] Add success.
I0322 13:07:13.452772  543705 event_worker.go:152] Polling the log file for events...
W0322 13:07:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:07:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 13:07:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:07:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:07:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:07:14.455891  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:07:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 13:07:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:07:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:07:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:07:16.457922  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:07:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:07:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:07:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:07:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:07:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:07:23.409774  543705 memory.go:184] no items to output this cycle
I0322 13:07:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 13:07:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:07:33.409778  543705 memory.go:184] no items to output this cycle
I0322 13:07:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 13:07:34.829673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:07:34.832189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:07:34.832195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8b00 0xc0004d8b40]
E0322 13:07:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:07:43.410595  543705 memory.go:191] Add success.
I0322 13:07:43.409814  543705 cpu.go:282] Add success.
I0322 13:07:43.420468  543705 net.go:648] Add success.
I0322 13:07:43.423236  543705 net.go:770] primary dev: ETH0
I0322 13:07:43.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:07:43.423264  543705 net.go:698] Add success.
I0322 13:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:07:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:07:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:07:53.409776  543705 memory.go:184] no items to output this cycle
I0322 13:07:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 13:08:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:08:03.409785  543705 cpu.go:275] no items to output this cycle
I0322 13:08:03.409792  543705 memory.go:184] no items to output this cycle
E0322 13:08:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:08:13.409795  543705 memory.go:191] Add success.
I0322 13:08:13.409795  543705 cpu.go:282] Add success.
W0322 13:08:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:08:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:08:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:08:13.420068  543705 net.go:648] Add success.
I0322 13:08:13.422894  543705 net.go:770] primary dev: ETH0
I0322 13:08:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:08:13.422919  543705 net.go:698] Add success.
I0322 13:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:08:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:08:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 13:08:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:08:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 13:08:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:08:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:08:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:08:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:08:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:08:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:08:23.409781  543705 memory.go:184] no items to output this cycle
I0322 13:08:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 13:08:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:08:33.409775  543705 memory.go:184] no items to output this cycle
I0322 13:08:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 13:08:34.833674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:08:34.836189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:08:34.836195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002671c0 0xc000267200]
E0322 13:08:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:08:43.409810  543705 cpu.go:282] Add success.
I0322 13:08:43.410696  543705 memory.go:191] Add success.
I0322 13:08:43.419730  543705 net.go:648] Add success.
I0322 13:08:43.422585  543705 net.go:770] primary dev: ETH0
I0322 13:08:43.422598  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:08:43.422610  543705 net.go:698] Add success.
I0322 13:08:46.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:08:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:08:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:08:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:08:53.409769  543705 memory.go:184] no items to output this cycle
I0322 13:08:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 13:09:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:09:03.409766  543705 memory.go:184] no items to output this cycle
I0322 13:09:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 13:09:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:09:13.409813  543705 memory.go:191] Add success.
I0322 13:09:13.409820  543705 cpu.go:282] Add success.
W0322 13:09:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:09:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:09:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:09:13.420132  543705 net.go:648] Add success.
I0322 13:09:13.422974  543705 net.go:770] primary dev: ETH0
I0322 13:09:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:09:13.423007  543705 net.go:698] Add success.
I0322 13:09:13.468848  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f9f7c683-2487-443d-930c-152c012bd613","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:09:13.468883  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:09:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:09:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:09:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0322 13:09:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:09:14.456608  543705 disk_worker.go:494] system disk:vda1
I0322 13:09:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:09:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:09:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:09:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:09:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:09:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:09:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:09:23.409802  543705 memory.go:184] no items to output this cycle
I0322 13:09:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 13:09:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:09:33.409780  543705 memory.go:184] no items to output this cycle
I0322 13:09:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 13:09:34.837673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:09:34.840161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:09:34.840166  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039b8c0 0xc00039b900]
I0322 13:09:39.655974  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:09:39.655981  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:09:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:09:43.410770  543705 memory.go:191] Add success.
I0322 13:09:43.409823  543705 cpu.go:282] Add success.
I0322 13:09:43.419736  543705 net.go:648] Add success.
I0322 13:09:43.422382  543705 net.go:770] primary dev: ETH0
I0322 13:09:43.422394  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:09:43.422406  543705 net.go:698] Add success.
I0322 13:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:09:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:09:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:09:53.410201  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:09:53.410217  543705 memory.go:184] no items to output this cycle
I0322 13:09:53.410221  543705 cpu.go:275] no items to output this cycle
E0322 13:10:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:10:03.409805  543705 memory.go:184] no items to output this cycle
I0322 13:10:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 13:10:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:10:13.409787  543705 memory.go:191] Add success.
I0322 13:10:13.409807  543705 cpu.go:282] Add success.
W0322 13:10:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:10:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:10:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:10:13.420065  543705 net.go:648] Add success.
I0322 13:10:13.422986  543705 net.go:770] primary dev: ETH0
I0322 13:10:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:10:13.423010  543705 net.go:698] Add success.
I0322 13:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:10:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:10:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 13:10:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:10:14.456529  543705 disk_worker.go:494] system disk:vda1
I0322 13:10:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:10:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:10:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:10:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:10:23.409816  543705 memory.go:184] no items to output this cycle
I0322 13:10:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 13:10:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:10:33.409822  543705 memory.go:184] no items to output this cycle
I0322 13:10:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 13:10:34.841668  543705 disk_info.go:125] begin check local disk info of client
I0322 13:10:34.844187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:10:34.844193  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a440 0xc00032a480]
E0322 13:10:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:10:43.410810  543705 memory.go:191] Add success.
I0322 13:10:43.409820  543705 cpu.go:282] Add success.
I0322 13:10:43.420786  543705 net.go:648] Add success.
I0322 13:10:43.424357  543705 net.go:770] primary dev: ETH0
I0322 13:10:43.424371  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:10:43.424383  543705 net.go:698] Add success.
I0322 13:10:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:10:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:10:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:10:53.410376  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:10:53.410388  543705 cpu.go:275] no items to output this cycle
I0322 13:10:53.410395  543705 memory.go:184] no items to output this cycle
E0322 13:11:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:11:03.409767  543705 memory.go:184] no items to output this cycle
I0322 13:11:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 13:11:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:11:13.409795  543705 memory.go:191] Add success.
I0322 13:11:13.409803  543705 cpu.go:282] Add success.
W0322 13:11:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:11:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:11:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:11:13.420148  543705 net.go:648] Add success.
I0322 13:11:13.422632  543705 net.go:770] primary dev: ETH0
I0322 13:11:13.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:11:13.422658  543705 net.go:698] Add success.
I0322 13:11:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:11:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:11:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 13:11:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:11:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 13:11:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:11:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:11:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:11:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:11:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:11:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:11:23.409808  543705 memory.go:184] no items to output this cycle
I0322 13:11:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 13:11:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:11:33.409784  543705 memory.go:184] no items to output this cycle
I0322 13:11:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 13:11:34.845670  543705 disk_info.go:125] begin check local disk info of client
I0322 13:11:34.848195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:11:34.848201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd240 0xc0002bd280]
E0322 13:11:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:11:43.410726  543705 memory.go:191] Add success.
I0322 13:11:43.409800  543705 cpu.go:282] Add success.
I0322 13:11:43.420541  543705 net.go:648] Add success.
I0322 13:11:43.423198  543705 net.go:770] primary dev: ETH0
I0322 13:11:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:11:43.423222  543705 net.go:698] Add success.
I0322 13:11:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:11:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:11:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:11:53.409794  543705 memory.go:184] no items to output this cycle
I0322 13:11:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:12:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:12:03.409776  543705 cpu.go:275] no items to output this cycle
I0322 13:12:03.409780  543705 memory.go:184] no items to output this cycle
E0322 13:12:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:12:13.409804  543705 memory.go:191] Add success.
I0322 13:12:13.409805  543705 cpu.go:282] Add success.
W0322 13:12:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:12:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:12:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:12:13.420056  543705 net.go:648] Add success.
I0322 13:12:13.422934  543705 net.go:770] primary dev: ETH0
I0322 13:12:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:12:13.422964  543705 net.go:698] Add success.
I0322 13:12:13.463949  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5574cdbf-1717-480b-a6a1-a54120aa1523","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:12:13.463984  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 13:12:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:12:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 13:12:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:12:14.457012  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:12:14.457022  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:12:14.457028  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:12:14.457059  543705 disk_worker.go:494] system disk:vda1
I0322 13:12:14.457104  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:12:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:12:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:12:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:12:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:12:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:12:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:12:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:12:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:12:23.409818  543705 memory.go:184] no items to output this cycle
I0322 13:12:23.409825  543705 cpu.go:275] no items to output this cycle
E0322 13:12:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:12:33.409806  543705 memory.go:184] no items to output this cycle
I0322 13:12:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 13:12:34.849670  543705 disk_info.go:125] begin check local disk info of client
I0322 13:12:34.852266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:12:34.852272  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd900 0xc0002bd940]
I0322 13:12:39.656978  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:12:39.656985  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:12:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:12:43.410885  543705 memory.go:191] Add success.
I0322 13:12:43.409828  543705 cpu.go:282] Add success.
I0322 13:12:43.419744  543705 net.go:648] Add success.
I0322 13:12:43.422531  543705 net.go:770] primary dev: ETH0
I0322 13:12:43.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:12:43.422555  543705 net.go:698] Add success.
I0322 13:12:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:12:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:12:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:12:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:12:53.409782  543705 cpu.go:275] no items to output this cycle
I0322 13:12:53.409789  543705 memory.go:184] no items to output this cycle
E0322 13:13:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:13:03.409796  543705 memory.go:184] no items to output this cycle
I0322 13:13:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 13:13:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:13:13.409795  543705 memory.go:191] Add success.
I0322 13:13:13.409824  543705 cpu.go:282] Add success.
W0322 13:13:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:13:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:13:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:13:13.420095  543705 net.go:648] Add success.
I0322 13:13:13.423092  543705 net.go:770] primary dev: ETH0
I0322 13:13:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:13:13.423118  543705 net.go:698] Add success.
I0322 13:13:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:13:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:13:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 13:13:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:13:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 13:13:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:13:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:13:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:13:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:13:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:13:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:13:23.409813  543705 memory.go:184] no items to output this cycle
I0322 13:13:23.409823  543705 cpu.go:275] no items to output this cycle
E0322 13:13:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:13:33.409788  543705 memory.go:184] no items to output this cycle
I0322 13:13:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 13:13:34.853674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:13:34.856203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:13:34.856209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034ce40 0xc00034ce80]
E0322 13:13:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:13:43.410716  543705 memory.go:191] Add success.
I0322 13:13:43.409789  543705 cpu.go:282] Add success.
I0322 13:13:43.420530  543705 net.go:648] Add success.
I0322 13:13:43.423276  543705 net.go:770] primary dev: ETH0
I0322 13:13:43.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:13:43.423304  543705 net.go:698] Add success.
I0322 13:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:13:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:13:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:13:53.409792  543705 memory.go:184] no items to output this cycle
I0322 13:13:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 13:14:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:14:03.409799  543705 memory.go:184] no items to output this cycle
I0322 13:14:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 13:14:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:14:13.409830  543705 memory.go:191] Add success.
I0322 13:14:13.409832  543705 cpu.go:282] Add success.
W0322 13:14:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:14:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:14:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:14:13.420193  543705 net.go:648] Add success.
I0322 13:14:13.423146  543705 net.go:770] primary dev: ETH0
I0322 13:14:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:14:13.423172  543705 net.go:698] Add success.
I0322 13:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:14:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:14:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 13:14:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:14:14.456623  543705 disk_worker.go:494] system disk:vda1
I0322 13:14:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:14:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:14:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:14:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:14:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:14:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:14:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:14:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 13:14:23.409803  543705 memory.go:184] no items to output this cycle
E0322 13:14:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:14:33.409775  543705 memory.go:184] no items to output this cycle
I0322 13:14:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 13:14:34.857672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:14:34.860217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:14:34.860222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dfb40 0xc0003dfb80]
E0322 13:14:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:14:43.410690  543705 memory.go:191] Add success.
I0322 13:14:43.409792  543705 cpu.go:282] Add success.
I0322 13:14:43.419721  543705 net.go:648] Add success.
I0322 13:14:43.422363  543705 net.go:770] primary dev: ETH0
I0322 13:14:43.422379  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:14:43.422392  543705 net.go:698] Add success.
I0322 13:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:14:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:14:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:14:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:14:53.409776  543705 cpu.go:275] no items to output this cycle
I0322 13:14:53.409782  543705 memory.go:184] no items to output this cycle
E0322 13:15:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:15:03.409777  543705 memory.go:184] no items to output this cycle
I0322 13:15:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 13:15:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:15:13.409794  543705 memory.go:191] Add success.
I0322 13:15:13.409793  543705 cpu.go:282] Add success.
W0322 13:15:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:15:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:15:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:15:13.420125  543705 net.go:648] Add success.
I0322 13:15:13.422901  543705 net.go:770] primary dev: ETH0
I0322 13:15:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:15:13.422927  543705 net.go:698] Add success.
I0322 13:15:13.469329  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9a86cbc7-5b6b-4661-9be5-5d9b37b464a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:15:13.469364  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:15:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:15:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:15:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:15:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:15:14.456668  543705 disk_worker.go:494] system disk:vda1
I0322 13:15:14.456697  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:15:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:15:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:15:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:15:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:15:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:15:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:15:23.409775  543705 memory.go:184] no items to output this cycle
I0322 13:15:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 13:15:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:15:33.409795  543705 memory.go:184] no items to output this cycle
I0322 13:15:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 13:15:34.861672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:15:34.864303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:15:34.864308  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0a00 0xc0003c0a40]
I0322 13:15:39.657736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:15:39.657743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:15:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:15:43.410774  543705 memory.go:191] Add success.
I0322 13:15:43.409811  543705 cpu.go:282] Add success.
I0322 13:15:43.419693  543705 net.go:648] Add success.
I0322 13:15:43.422646  543705 net.go:770] primary dev: ETH0
I0322 13:15:43.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:15:43.422671  543705 net.go:698] Add success.
I0322 13:15:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:15:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:15:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:15:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:15:53.409783  543705 memory.go:184] no items to output this cycle
I0322 13:15:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 13:16:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:16:03.409796  543705 memory.go:184] no items to output this cycle
I0322 13:16:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 13:16:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:16:13.409834  543705 memory.go:191] Add success.
I0322 13:16:13.409846  543705 cpu.go:282] Add success.
W0322 13:16:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:16:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:16:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:16:13.420158  543705 net.go:648] Add success.
I0322 13:16:13.422806  543705 net.go:770] primary dev: ETH0
I0322 13:16:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:16:13.422830  543705 net.go:698] Add success.
I0322 13:16:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:16:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:16:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 13:16:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:16:14.456618  543705 disk_worker.go:494] system disk:vda1
I0322 13:16:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:16:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:16:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:16:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:16:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:16:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:16:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:16:23.409819  543705 memory.go:184] no items to output this cycle
I0322 13:16:23.409827  543705 cpu.go:275] no items to output this cycle
E0322 13:16:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:16:33.409797  543705 memory.go:184] no items to output this cycle
I0322 13:16:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 13:16:34.865671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:16:34.868259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:16:34.868264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8b40 0xc0004d8b80]
E0322 13:16:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:16:43.410700  543705 memory.go:191] Add success.
I0322 13:16:43.409793  543705 cpu.go:282] Add success.
I0322 13:16:43.420449  543705 net.go:648] Add success.
I0322 13:16:43.423005  543705 net.go:770] primary dev: ETH0
I0322 13:16:43.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:16:43.423031  543705 net.go:698] Add success.
I0322 13:16:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:16:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:16:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:16:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:16:53.409798  543705 memory.go:184] no items to output this cycle
I0322 13:16:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 13:17:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:17:03.409798  543705 memory.go:184] no items to output this cycle
I0322 13:17:03.409813  543705 cpu.go:275] no items to output this cycle
W0322 13:17:13.409712  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:17:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:17:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:17:13.409800  543705 cpu.go:282] Add success.
E0322 13:17:13.409834  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:17:13.409856  543705 memory.go:191] Add success.
I0322 13:17:13.420030  543705 net.go:648] Add success.
I0322 13:17:13.423035  543705 net.go:770] primary dev: ETH0
I0322 13:17:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:17:13.423061  543705 net.go:698] Add success.
I0322 13:17:13.453619  543705 event_worker.go:152] Polling the log file for events...
W0322 13:17:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:17:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 13:17:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:17:14.456938  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:17:14.456948  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:17:14.456954  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:17:14.457003  543705 disk_worker.go:494] system disk:vda1
I0322 13:17:14.457046  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:17:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:17:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:17:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:17:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:17:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:17:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:17:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:17:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:17:23.409776  543705 memory.go:184] no items to output this cycle
I0322 13:17:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:17:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:17:33.409800  543705 memory.go:184] no items to output this cycle
I0322 13:17:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 13:17:34.869672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:17:34.872210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:17:34.872216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000274e80 0xc000274ec0]
E0322 13:17:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:17:43.410719  543705 memory.go:191] Add success.
I0322 13:17:43.409801  543705 cpu.go:282] Add success.
I0322 13:17:43.420380  543705 net.go:648] Add success.
I0322 13:17:43.423212  543705 net.go:770] primary dev: ETH0
I0322 13:17:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:17:43.423239  543705 net.go:698] Add success.
I0322 13:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:17:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:17:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:17:53.409779  543705 cpu.go:275] no items to output this cycle
I0322 13:17:53.409781  543705 memory.go:184] no items to output this cycle
E0322 13:18:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:18:03.409777  543705 memory.go:184] no items to output this cycle
I0322 13:18:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 13:18:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:18:13.409798  543705 memory.go:191] Add success.
W0322 13:18:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:18:13.409842  543705 cpu.go:282] Add success.
W0322 13:18:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:18:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:18:13.420134  543705 net.go:648] Add success.
I0322 13:18:13.422944  543705 net.go:770] primary dev: ETH0
I0322 13:18:13.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:18:13.422973  543705 net.go:698] Add success.
I0322 13:18:13.470253  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6eb28ec0-f052-4b0b-9ae7-b3509aeaa1ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:18:13.470287  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:18:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:18:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:18:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 13:18:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:18:14.456664  543705 disk_worker.go:494] system disk:vda1
I0322 13:18:14.456690  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:18:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:18:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:18:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:18:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:18:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 13:18:23.409787  543705 memory.go:184] no items to output this cycle
E0322 13:18:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:18:33.409780  543705 memory.go:184] no items to output this cycle
I0322 13:18:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 13:18:34.873678  543705 disk_info.go:125] begin check local disk info of client
I0322 13:18:34.876203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:18:34.876209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f4a80 0xc0003f4ac0]
I0322 13:18:39.658986  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:18:39.658992  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:18:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:18:43.410692  543705 memory.go:191] Add success.
I0322 13:18:43.409806  543705 cpu.go:282] Add success.
I0322 13:18:43.420411  543705 net.go:648] Add success.
I0322 13:18:43.423132  543705 net.go:770] primary dev: ETH0
I0322 13:18:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:18:43.423172  543705 net.go:698] Add success.
I0322 13:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:18:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:18:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:18:53.409773  543705 memory.go:184] no items to output this cycle
I0322 13:18:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 13:19:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:19:03.409782  543705 memory.go:184] no items to output this cycle
I0322 13:19:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 13:19:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:19:13.409798  543705 memory.go:191] Add success.
I0322 13:19:13.409814  543705 cpu.go:282] Add success.
W0322 13:19:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:19:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:19:13.420183  543705 net.go:648] Add success.
I0322 13:19:13.423243  543705 net.go:770] primary dev: ETH0
I0322 13:19:13.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:19:13.423268  543705 net.go:698] Add success.
I0322 13:19:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:19:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:19:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 13:19:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:19:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 13:19:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:19:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:19:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:19:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:19:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:19:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:19:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:19:23.409786  543705 memory.go:184] no items to output this cycle
I0322 13:19:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:19:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:19:33.409770  543705 memory.go:184] no items to output this cycle
I0322 13:19:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 13:19:34.877674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:19:34.880220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:19:34.880225  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb00 0xc00007bb40]
E0322 13:19:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:19:43.410641  543705 memory.go:191] Add success.
I0322 13:19:43.409826  543705 cpu.go:282] Add success.
I0322 13:19:43.420377  543705 net.go:648] Add success.
I0322 13:19:43.423169  543705 net.go:770] primary dev: ETH0
I0322 13:19:43.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:19:43.423204  543705 net.go:698] Add success.
I0322 13:19:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:19:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:19:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:19:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:19:53.409800  543705 memory.go:184] no items to output this cycle
I0322 13:19:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 13:20:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:20:03.409795  543705 memory.go:184] no items to output this cycle
I0322 13:20:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 13:20:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:20:13.409801  543705 memory.go:191] Add success.
W0322 13:20:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:20:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:20:13.409840  543705 cpu.go:282] Add success.
I0322 13:20:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:20:13.420452  543705 net.go:648] Add success.
I0322 13:20:13.423455  543705 net.go:770] primary dev: ETH0
I0322 13:20:13.423469  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:20:13.423481  543705 net.go:698] Add success.
I0322 13:20:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:20:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:20:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 13:20:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:20:14.456519  543705 disk_worker.go:494] system disk:vda1
I0322 13:20:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:20:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:20:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:20:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:20:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:20:23.409790  543705 memory.go:184] no items to output this cycle
I0322 13:20:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:20:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:20:33.409779  543705 memory.go:184] no items to output this cycle
I0322 13:20:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 13:20:34.881673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:20:34.884216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:20:34.884222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5d00 0xc0003d5d40]
E0322 13:20:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:20:43.410594  543705 memory.go:191] Add success.
I0322 13:20:43.409794  543705 cpu.go:282] Add success.
I0322 13:20:43.420289  543705 net.go:648] Add success.
I0322 13:20:43.422917  543705 net.go:770] primary dev: ETH0
I0322 13:20:43.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:20:43.422941  543705 net.go:698] Add success.
I0322 13:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:20:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:20:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:20:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:20:53.409775  543705 memory.go:184] no items to output this cycle
I0322 13:20:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 13:21:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:21:03.409774  543705 memory.go:184] no items to output this cycle
I0322 13:21:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 13:21:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:21:13.409822  543705 memory.go:191] Add success.
I0322 13:21:13.409827  543705 cpu.go:282] Add success.
W0322 13:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:21:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:21:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:21:13.420403  543705 net.go:648] Add success.
I0322 13:21:13.423323  543705 net.go:770] primary dev: ETH0
I0322 13:21:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:21:13.423348  543705 net.go:698] Add success.
I0322 13:21:13.467560  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a4773bcd-d5bb-4eea-886e-f29ca49ad181","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:21:13.467591  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:21:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:21:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:21:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 13:21:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:21:14.456557  543705 disk_worker.go:494] system disk:vda1
I0322 13:21:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:21:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:21:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:21:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:21:23.409812  543705 memory.go:184] no items to output this cycle
I0322 13:21:23.409827  543705 cpu.go:275] no items to output this cycle
E0322 13:21:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:21:33.409774  543705 memory.go:184] no items to output this cycle
I0322 13:21:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 13:21:34.885674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:21:34.888221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:21:34.888226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b740 0xc00007b780]
I0322 13:21:39.659990  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:21:39.659997  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:21:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:21:43.410820  543705 memory.go:191] Add success.
I0322 13:21:43.409819  543705 cpu.go:282] Add success.
I0322 13:21:43.420471  543705 net.go:648] Add success.
I0322 13:21:43.423444  543705 net.go:770] primary dev: ETH0
I0322 13:21:43.423459  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:21:43.423474  543705 net.go:698] Add success.
I0322 13:21:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:21:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:21:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:21:53.410372  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:21:53.410387  543705 memory.go:184] no items to output this cycle
I0322 13:21:53.410396  543705 cpu.go:275] no items to output this cycle
E0322 13:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:22:03.409780  543705 memory.go:184] no items to output this cycle
I0322 13:22:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:22:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:22:13.409811  543705 memory.go:191] Add success.
I0322 13:22:13.409821  543705 cpu.go:282] Add success.
W0322 13:22:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:22:13.420342  543705 net.go:648] Add success.
I0322 13:22:13.423539  543705 net.go:770] primary dev: ETH0
I0322 13:22:13.423574  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:22:13.423605  543705 net.go:698] Add success.
W0322 13:22:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:22:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 13:22:14.455206  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:22:14.455942  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:22:14.455951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:22:14.455957  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:22:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 13:22:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:22:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:22:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:22:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:22:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:22:16.457955  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:22:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:22:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:22:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:22:23.409795  543705 cpu.go:275] no items to output this cycle
I0322 13:22:23.409797  543705 memory.go:184] no items to output this cycle
E0322 13:22:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:22:33.409770  543705 memory.go:184] no items to output this cycle
I0322 13:22:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 13:22:34.889675  543705 disk_info.go:125] begin check local disk info of client
I0322 13:22:34.892227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:22:34.892233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8c80 0xc0003b8fc0]
E0322 13:22:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:22:43.410703  543705 memory.go:191] Add success.
I0322 13:22:43.409797  543705 cpu.go:282] Add success.
I0322 13:22:43.420460  543705 net.go:648] Add success.
I0322 13:22:43.422954  543705 net.go:770] primary dev: ETH0
I0322 13:22:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:22:43.422981  543705 net.go:698] Add success.
I0322 13:22:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:22:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:22:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:22:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:22:53.409769  543705 memory.go:184] no items to output this cycle
I0322 13:22:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 13:23:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:23:03.409798  543705 memory.go:184] no items to output this cycle
I0322 13:23:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:23:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:23:13.409789  543705 memory.go:191] Add success.
I0322 13:23:13.409798  543705 cpu.go:282] Add success.
W0322 13:23:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:23:13.412651  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:23:13.412655  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:23:13.420246  543705 net.go:648] Add success.
I0322 13:23:13.422064  543705 net.go:770] primary dev: ETH0
I0322 13:23:13.422077  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:23:13.422089  543705 net.go:698] Add success.
I0322 13:23:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:23:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:23:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 13:23:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:23:14.457181  543705 disk_worker.go:494] system disk:vda1
I0322 13:23:14.457212  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:23:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:23:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:23:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:23:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:23:23.409789  543705 memory.go:184] no items to output this cycle
I0322 13:23:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 13:23:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:23:33.409784  543705 memory.go:184] no items to output this cycle
I0322 13:23:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 13:23:34.893669  543705 disk_info.go:125] begin check local disk info of client
I0322 13:23:34.896245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:23:34.896250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0322 13:23:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:23:43.410667  543705 memory.go:191] Add success.
I0322 13:23:43.409816  543705 cpu.go:282] Add success.
I0322 13:23:43.420509  543705 net.go:648] Add success.
I0322 13:23:43.423512  543705 net.go:770] primary dev: ETH0
I0322 13:23:43.423525  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:23:43.423537  543705 net.go:698] Add success.
I0322 13:23:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:23:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:23:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:23:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:23:53.409799  543705 memory.go:184] no items to output this cycle
I0322 13:23:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 13:24:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:24:03.409777  543705 memory.go:184] no items to output this cycle
I0322 13:24:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 13:24:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:24:13.409813  543705 memory.go:191] Add success.
I0322 13:24:13.409824  543705 cpu.go:282] Add success.
W0322 13:24:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:24:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:24:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:24:13.420099  543705 net.go:648] Add success.
I0322 13:24:13.422992  543705 net.go:770] primary dev: ETH0
I0322 13:24:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:24:13.423018  543705 net.go:698] Add success.
I0322 13:24:13.468797  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"34bb6769-1c94-4f12-85ed-fc6889e00a7c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:24:13.468830  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:24:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:24:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:24:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0322 13:24:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:24:14.456738  543705 disk_worker.go:494] system disk:vda1
I0322 13:24:14.456773  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:24:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:24:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:24:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:24:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:24:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:24:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:24:23.409811  543705 memory.go:184] no items to output this cycle
I0322 13:24:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 13:24:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:24:33.409780  543705 memory.go:184] no items to output this cycle
I0322 13:24:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 13:24:34.897673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:24:34.900325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:24:34.900332  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049c8c0 0xc00049c900]
I0322 13:24:39.661009  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:24:39.661017  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:24:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:24:43.410800  543705 memory.go:191] Add success.
I0322 13:24:43.409821  543705 cpu.go:282] Add success.
I0322 13:24:43.420625  543705 net.go:648] Add success.
I0322 13:24:43.423306  543705 net.go:770] primary dev: ETH0
I0322 13:24:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:24:43.423333  543705 net.go:698] Add success.
I0322 13:24:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:24:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:24:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:24:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:24:53.409773  543705 memory.go:184] no items to output this cycle
I0322 13:24:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 13:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:25:03.409805  543705 memory.go:184] no items to output this cycle
I0322 13:25:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 13:25:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:25:13.409783  543705 memory.go:191] Add success.
W0322 13:25:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:25:13.409813  543705 cpu.go:282] Add success.
W0322 13:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:25:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:25:13.420138  543705 net.go:648] Add success.
I0322 13:25:13.422779  543705 net.go:770] primary dev: ETH0
I0322 13:25:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:25:13.422803  543705 net.go:698] Add success.
I0322 13:25:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:25:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:25:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:25:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:25:14.456613  543705 disk_worker.go:494] system disk:vda1
I0322 13:25:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:25:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:25:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:25:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:25:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:25:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:25:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:25:23.409782  543705 memory.go:184] no items to output this cycle
I0322 13:25:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 13:25:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:25:33.409795  543705 memory.go:184] no items to output this cycle
I0322 13:25:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 13:25:34.901688  543705 disk_info.go:125] begin check local disk info of client
I0322 13:25:34.904226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:25:34.904231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9300 0xc0004d9340]
E0322 13:25:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:25:43.410762  543705 memory.go:191] Add success.
I0322 13:25:43.409819  543705 cpu.go:282] Add success.
I0322 13:25:43.420450  543705 net.go:648] Add success.
I0322 13:25:43.423362  543705 net.go:770] primary dev: ETH0
I0322 13:25:43.423377  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:25:43.423392  543705 net.go:698] Add success.
I0322 13:25:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:25:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:25:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:25:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:25:53.409782  543705 memory.go:184] no items to output this cycle
I0322 13:25:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 13:26:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:26:03.409807  543705 memory.go:184] no items to output this cycle
I0322 13:26:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 13:26:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:26:13.409796  543705 memory.go:191] Add success.
I0322 13:26:13.409798  543705 cpu.go:282] Add success.
W0322 13:26:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:26:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:26:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:26:13.420249  543705 net.go:648] Add success.
I0322 13:26:13.423345  543705 net.go:770] primary dev: ETH0
I0322 13:26:13.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:26:13.423371  543705 net.go:698] Add success.
I0322 13:26:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:26:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:26:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 13:26:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:26:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 13:26:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:26:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:26:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:26:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:26:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:26:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:26:23.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:26:23.409920  543705 memory.go:184] no items to output this cycle
I0322 13:26:23.409982  543705 cpu.go:275] no items to output this cycle
E0322 13:26:33.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:26:33.409759  543705 memory.go:184] no items to output this cycle
I0322 13:26:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 13:26:34.905681  543705 disk_info.go:125] begin check local disk info of client
I0322 13:26:34.908224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:26:34.908230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 13:26:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:26:43.410578  543705 memory.go:191] Add success.
I0322 13:26:43.409825  543705 cpu.go:282] Add success.
I0322 13:26:43.420256  543705 net.go:648] Add success.
I0322 13:26:43.423218  543705 net.go:770] primary dev: ETH0
I0322 13:26:43.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:26:43.423244  543705 net.go:698] Add success.
I0322 13:26:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:26:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:26:53.410394  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:26:53.410409  543705 memory.go:184] no items to output this cycle
I0322 13:26:53.410418  543705 cpu.go:275] no items to output this cycle
E0322 13:27:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:27:03.409801  543705 memory.go:184] no items to output this cycle
I0322 13:27:03.409816  543705 cpu.go:275] no items to output this cycle
W0322 13:27:13.409720  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0322 13:27:13.409734  543705 conf_downlod.go:89] use old conf
E0322 13:27:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:27:13.409773  543705 memory.go:191] Add success.
W0322 13:27:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:27:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:27:13.409811  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:27:13.409828  543705 cpu.go:282] Add success.
I0322 13:27:13.420063  543705 net.go:648] Add success.
I0322 13:27:13.423022  543705 net.go:770] primary dev: ETH0
I0322 13:27:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:27:13.423054  543705 net.go:698] Add success.
I0322 13:27:13.429216  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 13:27:13.453386  543705 event_worker.go:152] Polling the log file for events...
I0322 13:27:13.463378  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c72dc2ef-1bee-4440-89be-ae47616e6276","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:27:13.463411  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 13:27:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:27:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 13:27:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:27:14.455872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:27:14.455882  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:27:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:27:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 13:27:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:27:15.456780  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:27:15.456788  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:27:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:27:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:27:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:27:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:27:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:27:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:27:23.409811  543705 memory.go:184] no items to output this cycle
I0322 13:27:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 13:27:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:27:33.409787  543705 memory.go:184] no items to output this cycle
I0322 13:27:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 13:27:34.909674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:27:34.912205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:27:34.912211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf380 0xc0003bf3c0]
I0322 13:27:39.661731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:27:39.661738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:27:43.410650  543705 memory.go:191] Add success.
I0322 13:27:43.409788  543705 cpu.go:282] Add success.
I0322 13:27:43.420328  543705 net.go:648] Add success.
I0322 13:27:43.422922  543705 net.go:770] primary dev: ETH0
I0322 13:27:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:27:43.422956  543705 net.go:698] Add success.
I0322 13:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:27:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:27:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:27:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:27:53.409774  543705 memory.go:184] no items to output this cycle
I0322 13:27:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 13:28:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:28:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 13:28:03.409784  543705 memory.go:184] no items to output this cycle
E0322 13:28:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:28:13.409798  543705 memory.go:191] Add success.
I0322 13:28:13.409803  543705 cpu.go:282] Add success.
W0322 13:28:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:28:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:28:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:28:13.420100  543705 net.go:648] Add success.
I0322 13:28:13.422746  543705 net.go:770] primary dev: ETH0
I0322 13:28:13.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:28:13.422771  543705 net.go:698] Add success.
I0322 13:28:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:28:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:28:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 13:28:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:28:14.456486  543705 disk_worker.go:494] system disk:vda1
I0322 13:28:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:28:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:28:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:28:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:28:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:28:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:28:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:28:23.409784  543705 memory.go:184] no items to output this cycle
I0322 13:28:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 13:28:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:28:33.409803  543705 memory.go:184] no items to output this cycle
I0322 13:28:33.409919  543705 cpu.go:275] no items to output this cycle
I0322 13:28:34.913675  543705 disk_info.go:125] begin check local disk info of client
I0322 13:28:34.916321  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:28:34.916327  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0322 13:28:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:28:43.410632  543705 memory.go:191] Add success.
I0322 13:28:43.409800  543705 cpu.go:282] Add success.
I0322 13:28:43.420343  543705 net.go:648] Add success.
I0322 13:28:43.423205  543705 net.go:770] primary dev: ETH0
I0322 13:28:43.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:28:43.423234  543705 net.go:698] Add success.
I0322 13:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:28:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:28:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:28:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:28:53.409770  543705 memory.go:184] no items to output this cycle
I0322 13:28:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 13:29:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:29:03.409762  543705 memory.go:184] no items to output this cycle
I0322 13:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 13:29:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:29:13.409813  543705 memory.go:191] Add success.
I0322 13:29:13.409824  543705 cpu.go:282] Add success.
W0322 13:29:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:29:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:29:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:29:13.420056  543705 net.go:648] Add success.
I0322 13:29:13.423051  543705 net.go:770] primary dev: ETH0
I0322 13:29:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:29:13.423081  543705 net.go:698] Add success.
I0322 13:29:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:29:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:29:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 13:29:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:29:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 13:29:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:29:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:29:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:29:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:29:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:29:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:29:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:29:23.409786  543705 memory.go:184] no items to output this cycle
I0322 13:29:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:29:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:29:33.409780  543705 memory.go:184] no items to output this cycle
I0322 13:29:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 13:29:34.917672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:29:34.920238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:29:34.920244  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7900 0xc0003b7940]
E0322 13:29:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:29:43.410791  543705 memory.go:191] Add success.
I0322 13:29:43.409801  543705 cpu.go:282] Add success.
I0322 13:29:43.420506  543705 net.go:648] Add success.
I0322 13:29:43.423143  543705 net.go:770] primary dev: ETH0
I0322 13:29:43.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:29:43.423168  543705 net.go:698] Add success.
I0322 13:29:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:29:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:29:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:29:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:29:53.409795  543705 memory.go:184] no items to output this cycle
I0322 13:29:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 13:30:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:30:03.409773  543705 memory.go:184] no items to output this cycle
I0322 13:30:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 13:30:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:30:13.409817  543705 memory.go:191] Add success.
I0322 13:30:13.409822  543705 cpu.go:282] Add success.
W0322 13:30:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:30:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:30:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:30:13.420152  543705 net.go:648] Add success.
I0322 13:30:13.422719  543705 net.go:770] primary dev: ETH0
I0322 13:30:13.422738  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:30:13.422753  543705 net.go:698] Add success.
I0322 13:30:13.473280  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"481ab4d7-47bc-4a3e-9a40-33563eff5b2e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:30:13.473326  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:30:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:30:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:30:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 13:30:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:30:14.456734  543705 disk_worker.go:494] system disk:vda1
I0322 13:30:14.456770  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:30:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:30:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:30:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:30:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:30:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:30:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:30:23.409785  543705 memory.go:184] no items to output this cycle
I0322 13:30:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 13:30:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:30:33.409775  543705 memory.go:184] no items to output this cycle
I0322 13:30:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 13:30:34.921674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:30:34.924230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:30:34.924235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0322 13:30:39.663014  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:30:39.663019  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:30:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:30:43.410802  543705 memory.go:191] Add success.
I0322 13:30:43.409814  543705 cpu.go:282] Add success.
I0322 13:30:43.420495  543705 net.go:648] Add success.
I0322 13:30:43.423630  543705 net.go:770] primary dev: ETH0
I0322 13:30:43.423642  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:30:43.423655  543705 net.go:698] Add success.
I0322 13:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:30:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:30:53.410423  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:30:53.410432  543705 cpu.go:275] no items to output this cycle
I0322 13:30:53.410438  543705 memory.go:184] no items to output this cycle
E0322 13:31:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:31:03.409799  543705 memory.go:184] no items to output this cycle
I0322 13:31:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 13:31:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:31:13.409801  543705 memory.go:191] Add success.
I0322 13:31:13.409808  543705 cpu.go:282] Add success.
W0322 13:31:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:31:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:31:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:31:13.420071  543705 net.go:648] Add success.
I0322 13:31:13.422844  543705 net.go:770] primary dev: ETH0
I0322 13:31:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:31:13.422869  543705 net.go:698] Add success.
I0322 13:31:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:31:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:31:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 13:31:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:31:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 13:31:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:31:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:31:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:31:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:31:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:31:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:31:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:31:23.409797  543705 memory.go:184] no items to output this cycle
I0322 13:31:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:31:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:31:33.409803  543705 memory.go:184] no items to output this cycle
I0322 13:31:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 13:31:34.925666  543705 disk_info.go:125] begin check local disk info of client
I0322 13:31:34.928245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:31:34.928250  543705 disk_info.go:196] parse disk info done, disk is : [0xc000348cc0 0xc000348d00]
E0322 13:31:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:31:43.410981  543705 memory.go:191] Add success.
I0322 13:31:43.409813  543705 cpu.go:282] Add success.
I0322 13:31:43.420994  543705 net.go:648] Add success.
I0322 13:31:43.424151  543705 net.go:770] primary dev: ETH0
I0322 13:31:43.424163  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:31:43.424174  543705 net.go:698] Add success.
I0322 13:31:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:31:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:31:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:31:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:31:53.409771  543705 memory.go:184] no items to output this cycle
I0322 13:31:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 13:32:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:32:03.409800  543705 memory.go:184] no items to output this cycle
I0322 13:32:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:32:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:32:13.409795  543705 memory.go:191] Add success.
I0322 13:32:13.409798  543705 cpu.go:282] Add success.
W0322 13:32:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:32:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:32:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:32:13.420232  543705 net.go:648] Add success.
I0322 13:32:13.423044  543705 net.go:770] primary dev: ETH0
I0322 13:32:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:32:13.423069  543705 net.go:698] Add success.
W0322 13:32:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:32:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 13:32:14.455158  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:32:14.456926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:32:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:32:14.456941  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:32:14.457016  543705 disk_worker.go:494] system disk:vda1
I0322 13:32:14.457059  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:32:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:32:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:32:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:32:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:32:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:32:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:32:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:32:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:32:23.409814  543705 memory.go:184] no items to output this cycle
I0322 13:32:23.409825  543705 cpu.go:275] no items to output this cycle
E0322 13:32:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:32:33.409805  543705 memory.go:184] no items to output this cycle
I0322 13:32:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 13:32:34.929671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:32:34.932124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:32:34.932130  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c400 0xc00034c440]
E0322 13:32:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:32:43.410740  543705 memory.go:191] Add success.
I0322 13:32:43.409821  543705 cpu.go:282] Add success.
I0322 13:32:43.420412  543705 net.go:648] Add success.
I0322 13:32:43.423538  543705 net.go:770] primary dev: ETH0
I0322 13:32:43.423550  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:32:43.423562  543705 net.go:698] Add success.
I0322 13:32:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:32:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:32:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:32:53.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:32:53.409907  543705 memory.go:184] no items to output this cycle
I0322 13:32:53.409930  543705 cpu.go:275] no items to output this cycle
E0322 13:33:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:33:03.409783  543705 memory.go:184] no items to output this cycle
I0322 13:33:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 13:33:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:33:13.409799  543705 memory.go:191] Add success.
I0322 13:33:13.409800  543705 cpu.go:282] Add success.
W0322 13:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:33:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:33:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:33:13.420174  543705 net.go:648] Add success.
I0322 13:33:13.422686  543705 net.go:770] primary dev: ETH0
I0322 13:33:13.422701  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:33:13.422723  543705 net.go:698] Add success.
I0322 13:33:13.985198  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"255765c2-89ee-45fa-808a-f5efeeb47bc6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:33:13.985234  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:33:14.454684  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:33:14.454870  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:33:14.454881  543705 disk_worker.go:708] disk space is not compliant
W0322 13:33:14.454883  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:33:14.456263  543705 disk_worker.go:494] system disk:vda1
I0322 13:33:14.456323  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:33:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:33:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:33:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:33:23.409806  543705 memory.go:184] no items to output this cycle
I0322 13:33:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 13:33:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:33:33.409797  543705 memory.go:184] no items to output this cycle
I0322 13:33:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 13:33:34.933673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:33:34.936239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:33:34.936245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262080 0xc0002620c0]
I0322 13:33:39.664001  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:33:39.664007  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:33:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:33:43.410750  543705 memory.go:191] Add success.
I0322 13:33:43.409825  543705 cpu.go:282] Add success.
I0322 13:33:43.420499  543705 net.go:648] Add success.
I0322 13:33:43.423476  543705 net.go:770] primary dev: ETH0
I0322 13:33:43.423491  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:33:43.423504  543705 net.go:698] Add success.
I0322 13:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:33:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:33:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:33:53.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:33:53.409761  543705 memory.go:184] no items to output this cycle
I0322 13:33:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:34:03.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:34:03.409992  543705 memory.go:184] no items to output this cycle
I0322 13:34:03.410021  543705 cpu.go:275] no items to output this cycle
E0322 13:34:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:34:13.409796  543705 memory.go:191] Add success.
I0322 13:34:13.409801  543705 cpu.go:282] Add success.
W0322 13:34:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:34:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:34:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:34:13.420337  543705 net.go:648] Add success.
I0322 13:34:13.423219  543705 net.go:770] primary dev: ETH0
I0322 13:34:13.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:34:13.423244  543705 net.go:698] Add success.
I0322 13:34:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:34:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:34:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 13:34:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:34:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 13:34:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:34:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:34:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:34:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:34:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:34:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:34:23.409786  543705 memory.go:184] no items to output this cycle
I0322 13:34:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:34:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:34:33.409798  543705 memory.go:184] no items to output this cycle
I0322 13:34:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 13:34:34.937674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:34:34.940138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:34:34.940144  543705 disk_info.go:196] parse disk info done, disk is : [0xc000299d80 0xc000299dc0]
E0322 13:34:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:34:43.410779  543705 memory.go:191] Add success.
I0322 13:34:43.409798  543705 cpu.go:282] Add success.
I0322 13:34:43.420476  543705 net.go:648] Add success.
I0322 13:34:43.423719  543705 net.go:770] primary dev: ETH0
I0322 13:34:43.423731  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:34:43.423743  543705 net.go:698] Add success.
I0322 13:34:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:34:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:34:53.409796  543705 memory.go:184] no items to output this cycle
I0322 13:34:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:35:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:35:03.409780  543705 memory.go:184] no items to output this cycle
I0322 13:35:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:35:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:35:13.409794  543705 cpu.go:282] Add success.
I0322 13:35:13.409801  543705 memory.go:191] Add success.
W0322 13:35:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:35:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:35:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:35:13.420104  543705 net.go:648] Add success.
I0322 13:35:13.422832  543705 net.go:770] primary dev: ETH0
I0322 13:35:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:35:13.422856  543705 net.go:698] Add success.
I0322 13:35:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:35:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:35:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 13:35:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:35:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 13:35:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:35:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:35:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:35:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:35:23.410402  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:35:23.410418  543705 cpu.go:275] no items to output this cycle
I0322 13:35:23.410420  543705 memory.go:184] no items to output this cycle
E0322 13:35:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:35:33.409779  543705 memory.go:184] no items to output this cycle
I0322 13:35:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 13:35:34.941673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:35:34.944290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:35:34.944296  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5800 0xc0000c5840]
E0322 13:35:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:35:43.410844  543705 memory.go:191] Add success.
I0322 13:35:43.409795  543705 cpu.go:282] Add success.
I0322 13:35:43.420547  543705 net.go:648] Add success.
I0322 13:35:43.423341  543705 net.go:770] primary dev: ETH0
I0322 13:35:43.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:35:43.423368  543705 net.go:698] Add success.
I0322 13:35:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:35:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:35:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:35:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:35:53.409791  543705 memory.go:184] no items to output this cycle
I0322 13:35:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 13:36:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:36:03.409785  543705 memory.go:184] no items to output this cycle
I0322 13:36:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 13:36:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:36:13.409785  543705 memory.go:191] Add success.
W0322 13:36:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:36:13.409817  543705 cpu.go:282] Add success.
W0322 13:36:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:36:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:36:13.420179  543705 net.go:648] Add success.
I0322 13:36:13.423020  543705 net.go:770] primary dev: ETH0
I0322 13:36:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:36:13.423046  543705 net.go:698] Add success.
I0322 13:36:13.469373  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c0cb6bdb-793b-4413-b201-145b736ca6a4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:36:13.469405  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:36:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:36:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:36:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:36:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:36:14.456628  543705 disk_worker.go:494] system disk:vda1
I0322 13:36:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:36:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:36:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:36:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:36:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:36:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:36:23.409794  543705 memory.go:184] no items to output this cycle
I0322 13:36:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 13:36:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:36:33.409764  543705 memory.go:184] no items to output this cycle
I0322 13:36:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 13:36:34.945673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:36:34.948222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:36:34.948228  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029f680 0xc00029f6c0]
I0322 13:36:39.665026  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:36:39.665032  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:36:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:36:43.410700  543705 memory.go:191] Add success.
I0322 13:36:43.409809  543705 cpu.go:282] Add success.
I0322 13:36:43.420525  543705 net.go:648] Add success.
I0322 13:36:43.423507  543705 net.go:770] primary dev: ETH0
I0322 13:36:43.423521  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:36:43.423532  543705 net.go:698] Add success.
I0322 13:36:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:36:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:36:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:36:53.409802  543705 memory.go:184] no items to output this cycle
I0322 13:36:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 13:37:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:37:03.409804  543705 memory.go:184] no items to output this cycle
I0322 13:37:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 13:37:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:37:13.409788  543705 cpu.go:282] Add success.
I0322 13:37:13.409796  543705 memory.go:191] Add success.
W0322 13:37:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:37:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:37:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:37:13.420058  543705 net.go:648] Add success.
I0322 13:37:13.422962  543705 net.go:770] primary dev: ETH0
I0322 13:37:13.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:37:13.422986  543705 net.go:698] Add success.
I0322 13:37:13.453599  543705 event_worker.go:152] Polling the log file for events...
W0322 13:37:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:37:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 13:37:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:37:14.456952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:37:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:37:14.456967  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:37:14.457015  543705 disk_worker.go:494] system disk:vda1
I0322 13:37:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:37:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:37:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:37:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:37:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:37:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:37:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:37:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:37:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:37:23.409784  543705 memory.go:184] no items to output this cycle
I0322 13:37:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 13:37:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:37:33.409769  543705 memory.go:184] no items to output this cycle
I0322 13:37:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 13:37:34.949672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:37:34.952209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:37:34.952214  543705 disk_info.go:196] parse disk info done, disk is : [0xc000490340 0xc000490380]
I0322 13:37:43.409874  543705 cpu.go:282] Add success.
E0322 13:37:43.409914  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:37:43.410824  543705 memory.go:191] Add success.
I0322 13:37:43.419730  543705 net.go:648] Add success.
I0322 13:37:43.422366  543705 net.go:770] primary dev: ETH0
I0322 13:37:43.422379  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:37:43.422391  543705 net.go:698] Add success.
I0322 13:37:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:37:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:37:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:37:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:37:53.409771  543705 memory.go:184] no items to output this cycle
I0322 13:37:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 13:38:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:38:03.409778  543705 memory.go:184] no items to output this cycle
I0322 13:38:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 13:38:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:38:13.409819  543705 memory.go:191] Add success.
I0322 13:38:13.409822  543705 cpu.go:282] Add success.
W0322 13:38:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:38:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:38:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:38:13.420204  543705 net.go:648] Add success.
I0322 13:38:13.423138  543705 net.go:770] primary dev: ETH0
I0322 13:38:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:38:13.423162  543705 net.go:698] Add success.
I0322 13:38:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:38:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:38:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 13:38:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:38:14.456628  543705 disk_worker.go:494] system disk:vda1
I0322 13:38:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:38:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:38:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:38:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:38:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:38:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:38:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:38:23.409792  543705 memory.go:184] no items to output this cycle
I0322 13:38:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:38:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:38:33.409782  543705 memory.go:184] no items to output this cycle
I0322 13:38:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 13:38:34.953671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:38:34.956263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:38:34.956270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aed00 0xc0002aed40]
E0322 13:38:43.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:38:43.410843  543705 memory.go:191] Add success.
I0322 13:38:43.409918  543705 cpu.go:282] Add success.
I0322 13:38:43.419719  543705 net.go:648] Add success.
I0322 13:38:43.422896  543705 net.go:770] primary dev: ETH0
I0322 13:38:43.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:38:43.422921  543705 net.go:698] Add success.
I0322 13:38:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:38:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:38:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:38:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:38:53.409801  543705 memory.go:184] no items to output this cycle
I0322 13:38:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 13:39:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:39:03.409792  543705 cpu.go:275] no items to output this cycle
I0322 13:39:03.409797  543705 memory.go:184] no items to output this cycle
E0322 13:39:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:39:13.409796  543705 memory.go:191] Add success.
I0322 13:39:13.409804  543705 cpu.go:282] Add success.
W0322 13:39:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:39:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:39:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:39:13.420221  543705 net.go:648] Add success.
I0322 13:39:13.423075  543705 net.go:770] primary dev: ETH0
I0322 13:39:13.423090  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:39:13.423104  543705 net.go:698] Add success.
I0322 13:39:13.468427  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fa17647f-d8fc-4700-9e6c-9429e187a4f9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:39:13.468465  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:39:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:39:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:39:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 13:39:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:39:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 13:39:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:39:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:39:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:39:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:39:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:39:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:39:23.409813  543705 memory.go:184] no items to output this cycle
I0322 13:39:23.409828  543705 cpu.go:275] no items to output this cycle
E0322 13:39:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:39:33.409771  543705 memory.go:184] no items to output this cycle
I0322 13:39:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 13:39:34.957673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:39:34.960211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:39:34.960232  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1540 0xc0002b1580]
I0322 13:39:39.665740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:39:39.665748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:39:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:39:43.410835  543705 memory.go:191] Add success.
I0322 13:39:43.409821  543705 cpu.go:282] Add success.
I0322 13:39:43.419712  543705 net.go:648] Add success.
I0322 13:39:43.422201  543705 net.go:770] primary dev: ETH0
I0322 13:39:43.422215  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:39:43.422227  543705 net.go:698] Add success.
I0322 13:39:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:39:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:39:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:39:53.409793  543705 memory.go:184] no items to output this cycle
I0322 13:39:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 13:40:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:40:03.409787  543705 memory.go:184] no items to output this cycle
I0322 13:40:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 13:40:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:40:13.409812  543705 memory.go:191] Add success.
I0322 13:40:13.409834  543705 cpu.go:282] Add success.
W0322 13:40:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:40:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:40:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:40:13.420270  543705 net.go:648] Add success.
I0322 13:40:13.422928  543705 net.go:770] primary dev: ETH0
I0322 13:40:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:40:13.422953  543705 net.go:698] Add success.
I0322 13:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:40:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:40:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 13:40:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:40:14.456556  543705 disk_worker.go:494] system disk:vda1
I0322 13:40:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:40:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:40:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:40:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:40:23.409793  543705 memory.go:184] no items to output this cycle
I0322 13:40:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 13:40:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:40:33.409773  543705 memory.go:184] no items to output this cycle
I0322 13:40:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 13:40:34.961673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:40:34.964219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:40:34.964225  543705 disk_info.go:196] parse disk info done, disk is : [0xc000257780 0xc0002577c0]
E0322 13:40:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:40:43.410621  543705 memory.go:191] Add success.
I0322 13:40:43.409813  543705 cpu.go:282] Add success.
I0322 13:40:43.420577  543705 net.go:648] Add success.
I0322 13:40:43.423655  543705 net.go:770] primary dev: ETH0
I0322 13:40:43.423669  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:40:43.423683  543705 net.go:698] Add success.
I0322 13:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:40:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:40:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:40:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:40:53.409801  543705 memory.go:184] no items to output this cycle
I0322 13:40:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 13:41:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:41:03.409810  543705 memory.go:184] no items to output this cycle
I0322 13:41:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 13:41:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:41:13.409824  543705 memory.go:191] Add success.
I0322 13:41:13.409831  543705 cpu.go:282] Add success.
W0322 13:41:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:41:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:41:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:41:13.420343  543705 net.go:648] Add success.
I0322 13:41:13.423230  543705 net.go:770] primary dev: ETH0
I0322 13:41:13.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:41:13.423255  543705 net.go:698] Add success.
I0322 13:41:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:41:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:41:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 13:41:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:41:14.456600  543705 disk_worker.go:494] system disk:vda1
I0322 13:41:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:41:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:41:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:41:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:41:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:41:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:41:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:41:23.409780  543705 memory.go:184] no items to output this cycle
I0322 13:41:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:41:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:41:33.409783  543705 memory.go:184] no items to output this cycle
I0322 13:41:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 13:41:34.965674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:41:34.968188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:41:34.968194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b1480 0xc0003b14c0]
E0322 13:41:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:41:43.410604  543705 memory.go:191] Add success.
I0322 13:41:43.409798  543705 cpu.go:282] Add success.
I0322 13:41:43.420336  543705 net.go:648] Add success.
I0322 13:41:43.423162  543705 net.go:770] primary dev: ETH0
I0322 13:41:43.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:41:43.423186  543705 net.go:698] Add success.
I0322 13:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:41:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:41:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:41:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:41:53.409772  543705 memory.go:184] no items to output this cycle
I0322 13:41:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 13:42:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:42:03.409779  543705 memory.go:184] no items to output this cycle
I0322 13:42:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 13:42:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:42:13.409811  543705 memory.go:191] Add success.
I0322 13:42:13.409814  543705 cpu.go:282] Add success.
W0322 13:42:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:42:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:42:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:42:13.420236  543705 net.go:648] Add success.
I0322 13:42:13.422980  543705 net.go:770] primary dev: ETH0
I0322 13:42:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:42:13.423005  543705 net.go:698] Add success.
I0322 13:42:13.468850  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dd281815-f96b-4052-a07f-b749f8634c6c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:42:13.468884  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 13:42:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:42:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:42:14.455200  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:42:14.456785  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:42:14.456794  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:42:14.456799  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:42:14.456855  543705 disk_worker.go:494] system disk:vda1
I0322 13:42:14.456885  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:42:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:42:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:42:16.457909  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:42:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:42:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:42:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:42:16.472317  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:42:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:42:23.409814  543705 memory.go:184] no items to output this cycle
I0322 13:42:23.409825  543705 cpu.go:275] no items to output this cycle
E0322 13:42:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:42:33.409766  543705 memory.go:184] no items to output this cycle
I0322 13:42:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 13:42:34.969676  543705 disk_info.go:125] begin check local disk info of client
I0322 13:42:34.972198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:42:34.972204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e75c0 0xc0003e7600]
I0322 13:42:39.667037  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:42:39.667043  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:42:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:42:43.410712  543705 memory.go:191] Add success.
I0322 13:42:43.409807  543705 cpu.go:282] Add success.
I0322 13:42:43.420391  543705 net.go:648] Add success.
I0322 13:42:43.423222  543705 net.go:770] primary dev: ETH0
I0322 13:42:43.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:42:43.423248  543705 net.go:698] Add success.
I0322 13:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:42:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:42:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:42:53.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:42:53.409881  543705 memory.go:184] no items to output this cycle
I0322 13:42:53.409909  543705 cpu.go:275] no items to output this cycle
E0322 13:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:43:03.409791  543705 memory.go:184] no items to output this cycle
I0322 13:43:03.409805  543705 cpu.go:275] no items to output this cycle
W0322 13:43:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:43:13.409743  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:43:13.409749  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:43:13.409803  543705 cpu.go:282] Add success.
E0322 13:43:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:43:13.409865  543705 memory.go:191] Add success.
I0322 13:43:13.420057  543705 net.go:648] Add success.
I0322 13:43:13.423290  543705 net.go:770] primary dev: ETH0
I0322 13:43:13.423303  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:43:13.423315  543705 net.go:698] Add success.
I0322 13:43:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:43:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:43:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 13:43:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:43:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 13:43:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:43:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:43:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:43:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:43:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:43:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:43:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:43:23.409809  543705 memory.go:184] no items to output this cycle
I0322 13:43:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:43:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:43:33.409777  543705 memory.go:184] no items to output this cycle
I0322 13:43:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 13:43:34.973672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:43:34.976258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:43:34.976264  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315000 0xc000315080]
E0322 13:43:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:43:43.410775  543705 memory.go:191] Add success.
I0322 13:43:43.409815  543705 cpu.go:282] Add success.
I0322 13:43:43.420488  543705 net.go:648] Add success.
I0322 13:43:43.423411  543705 net.go:770] primary dev: ETH0
I0322 13:43:43.423425  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:43:43.423439  543705 net.go:698] Add success.
I0322 13:43:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:43:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:43:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:43:53.409805  543705 memory.go:184] no items to output this cycle
I0322 13:43:53.409829  543705 cpu.go:275] no items to output this cycle
E0322 13:44:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:44:03.409811  543705 memory.go:184] no items to output this cycle
I0322 13:44:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 13:44:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:44:13.409794  543705 memory.go:191] Add success.
W0322 13:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:44:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:44:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:44:13.409839  543705 cpu.go:282] Add success.
I0322 13:44:13.420146  543705 net.go:648] Add success.
I0322 13:44:13.423036  543705 net.go:770] primary dev: ETH0
I0322 13:44:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:44:13.423061  543705 net.go:698] Add success.
I0322 13:44:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:44:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:44:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 13:44:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:44:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 13:44:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:44:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:44:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:44:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:44:23.409814  543705 memory.go:184] no items to output this cycle
I0322 13:44:23.409824  543705 cpu.go:275] no items to output this cycle
E0322 13:44:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:44:33.409784  543705 memory.go:184] no items to output this cycle
I0322 13:44:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 13:44:34.977672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:44:34.980355  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:44:34.980361  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9200 0xc0004d9240]
E0322 13:44:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:44:43.410868  543705 memory.go:191] Add success.
I0322 13:44:43.409796  543705 cpu.go:282] Add success.
I0322 13:44:43.420572  543705 net.go:648] Add success.
I0322 13:44:43.424011  543705 net.go:770] primary dev: ETH0
I0322 13:44:43.424024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:44:43.424038  543705 net.go:698] Add success.
I0322 13:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:44:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:44:53.409809  543705 memory.go:184] no items to output this cycle
I0322 13:44:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 13:45:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:45:03.409776  543705 memory.go:184] no items to output this cycle
I0322 13:45:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 13:45:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:45:13.409799  543705 cpu.go:282] Add success.
I0322 13:45:13.409807  543705 memory.go:191] Add success.
W0322 13:45:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:45:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:45:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:45:13.420145  543705 net.go:648] Add success.
I0322 13:45:13.423032  543705 net.go:770] primary dev: ETH0
I0322 13:45:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:45:13.423057  543705 net.go:698] Add success.
I0322 13:45:13.470546  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e30f702-80b1-4a86-8798-cd7a6126d659","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:45:13.470580  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:45:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:45:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:45:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 13:45:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:45:14.456522  543705 disk_worker.go:494] system disk:vda1
I0322 13:45:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:45:15.455613  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:45:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:45:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:45:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:45:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:45:23.409816  543705 memory.go:184] no items to output this cycle
I0322 13:45:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 13:45:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:45:33.409801  543705 memory.go:184] no items to output this cycle
I0322 13:45:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 13:45:34.981670  543705 disk_info.go:125] begin check local disk info of client
I0322 13:45:34.984216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:45:34.984222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3e80 0xc0002a3ec0]
I0322 13:45:39.668042  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:45:39.668049  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:45:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:45:43.410920  543705 memory.go:191] Add success.
I0322 13:45:43.409817  543705 cpu.go:282] Add success.
I0322 13:45:43.420614  543705 net.go:648] Add success.
I0322 13:45:43.423438  543705 net.go:770] primary dev: ETH0
I0322 13:45:43.423452  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:45:43.423464  543705 net.go:698] Add success.
I0322 13:45:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:45:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:45:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:45:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:45:53.409779  543705 memory.go:184] no items to output this cycle
I0322 13:45:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 13:46:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:46:03.409858  543705 memory.go:184] no items to output this cycle
I0322 13:46:03.409948  543705 cpu.go:275] no items to output this cycle
E0322 13:46:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:46:13.409827  543705 memory.go:191] Add success.
I0322 13:46:13.409835  543705 cpu.go:282] Add success.
W0322 13:46:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:46:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:46:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:46:13.420609  543705 net.go:648] Add success.
I0322 13:46:13.423340  543705 net.go:770] primary dev: ETH0
I0322 13:46:13.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:46:13.423369  543705 net.go:698] Add success.
I0322 13:46:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:46:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:46:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:46:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:46:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 13:46:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:46:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:46:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:46:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:46:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:46:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:46:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:46:23.409821  543705 memory.go:184] no items to output this cycle
I0322 13:46:23.409830  543705 cpu.go:275] no items to output this cycle
E0322 13:46:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:46:33.409804  543705 memory.go:184] no items to output this cycle
I0322 13:46:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 13:46:34.985670  543705 disk_info.go:125] begin check local disk info of client
I0322 13:46:34.988216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:46:34.988222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2a80 0xc0002a2ac0]
E0322 13:46:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:46:43.410710  543705 memory.go:191] Add success.
I0322 13:46:43.409789  543705 cpu.go:282] Add success.
I0322 13:46:43.420459  543705 net.go:648] Add success.
I0322 13:46:43.422966  543705 net.go:770] primary dev: ETH0
I0322 13:46:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:46:43.422994  543705 net.go:698] Add success.
I0322 13:46:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:46:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:46:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:46:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:46:53.409775  543705 memory.go:184] no items to output this cycle
I0322 13:46:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:47:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:47:03.409789  543705 memory.go:184] no items to output this cycle
I0322 13:47:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 13:47:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:47:13.409798  543705 memory.go:191] Add success.
I0322 13:47:13.409819  543705 cpu.go:282] Add success.
W0322 13:47:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:47:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:47:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:47:13.420160  543705 net.go:648] Add success.
I0322 13:47:13.423027  543705 net.go:770] primary dev: ETH0
I0322 13:47:13.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:47:13.423055  543705 net.go:698] Add success.
I0322 13:47:13.453605  543705 event_worker.go:152] Polling the log file for events...
W0322 13:47:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:47:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 13:47:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:47:14.456938  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:47:14.456947  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:47:14.456953  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:47:14.457000  543705 disk_worker.go:494] system disk:vda1
I0322 13:47:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:47:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:47:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:47:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:47:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:47:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:47:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:47:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:47:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:47:23.409806  543705 memory.go:184] no items to output this cycle
I0322 13:47:23.409870  543705 cpu.go:275] no items to output this cycle
E0322 13:47:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:47:33.409783  543705 memory.go:184] no items to output this cycle
I0322 13:47:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 13:47:34.989671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:47:34.992215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:47:34.992221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5780 0xc0000c57c0]
E0322 13:47:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:47:43.410656  543705 memory.go:191] Add success.
I0322 13:47:43.409805  543705 cpu.go:282] Add success.
I0322 13:47:43.420461  543705 net.go:648] Add success.
I0322 13:47:43.423337  543705 net.go:770] primary dev: ETH0
I0322 13:47:43.423350  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:47:43.423362  543705 net.go:698] Add success.
I0322 13:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:47:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:47:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:47:53.409784  543705 memory.go:184] no items to output this cycle
I0322 13:47:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 13:48:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:48:03.409781  543705 memory.go:184] no items to output this cycle
I0322 13:48:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 13:48:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:48:13.409833  543705 memory.go:191] Add success.
I0322 13:48:13.409835  543705 cpu.go:282] Add success.
W0322 13:48:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:48:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:48:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:48:13.420190  543705 net.go:648] Add success.
I0322 13:48:13.423006  543705 net.go:770] primary dev: ETH0
I0322 13:48:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:48:13.423033  543705 net.go:698] Add success.
I0322 13:48:13.468701  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fbc552ec-fe48-44ec-ada9-3eeaf6d81cea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:48:13.468735  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:48:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:48:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 13:48:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:48:14.456544  543705 disk_worker.go:494] system disk:vda1
I0322 13:48:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:48:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:48:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:48:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:48:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:48:23.409819  543705 memory.go:184] no items to output this cycle
I0322 13:48:23.409827  543705 cpu.go:275] no items to output this cycle
E0322 13:48:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:48:33.409801  543705 memory.go:184] no items to output this cycle
I0322 13:48:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 13:48:34.993676  543705 disk_info.go:125] begin check local disk info of client
I0322 13:48:34.996210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:48:34.996216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000321800 0xc000321840]
I0322 13:48:39.669036  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:48:39.669042  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:48:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:48:43.410691  543705 memory.go:191] Add success.
I0322 13:48:43.409833  543705 cpu.go:282] Add success.
I0322 13:48:43.420383  543705 net.go:648] Add success.
I0322 13:48:43.423239  543705 net.go:770] primary dev: ETH0
I0322 13:48:43.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:48:43.423269  543705 net.go:698] Add success.
I0322 13:48:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:48:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:48:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:48:53.409795  543705 memory.go:184] no items to output this cycle
I0322 13:48:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:49:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:49:03.409886  543705 cpu.go:275] no items to output this cycle
I0322 13:49:03.409906  543705 memory.go:184] no items to output this cycle
E0322 13:49:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:49:13.409797  543705 memory.go:191] Add success.
I0322 13:49:13.409812  543705 cpu.go:282] Add success.
W0322 13:49:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:49:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:49:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:49:13.420114  543705 net.go:648] Add success.
I0322 13:49:13.422678  543705 net.go:770] primary dev: ETH0
I0322 13:49:13.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:49:13.422709  543705 net.go:698] Add success.
I0322 13:49:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:49:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:49:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:49:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:49:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 13:49:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:49:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:49:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:49:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:49:16.472481  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:49:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:49:23.409776  543705 memory.go:184] no items to output this cycle
I0322 13:49:23.409836  543705 cpu.go:275] no items to output this cycle
E0322 13:49:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:49:33.409784  543705 memory.go:184] no items to output this cycle
I0322 13:49:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 13:49:34.997671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:49:35.000228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:49:35.000233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c00 0xc0000c4c40]
E0322 13:49:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:49:43.410929  543705 memory.go:191] Add success.
I0322 13:49:43.409834  543705 cpu.go:282] Add success.
I0322 13:49:43.420708  543705 net.go:648] Add success.
I0322 13:49:43.423767  543705 net.go:770] primary dev: ETH0
I0322 13:49:43.423782  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:49:43.423797  543705 net.go:698] Add success.
I0322 13:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:49:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:49:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:49:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:49:53.409809  543705 memory.go:184] no items to output this cycle
I0322 13:49:53.409827  543705 cpu.go:275] no items to output this cycle
E0322 13:50:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:50:03.409887  543705 cpu.go:275] no items to output this cycle
I0322 13:50:03.409888  543705 memory.go:184] no items to output this cycle
E0322 13:50:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:50:13.409801  543705 memory.go:191] Add success.
I0322 13:50:13.409806  543705 cpu.go:282] Add success.
W0322 13:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:50:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:50:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:50:13.420130  543705 net.go:648] Add success.
I0322 13:50:13.423026  543705 net.go:770] primary dev: ETH0
I0322 13:50:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:50:13.423053  543705 net.go:698] Add success.
I0322 13:50:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:50:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:50:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 13:50:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:50:14.456557  543705 disk_worker.go:494] system disk:vda1
I0322 13:50:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:50:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:50:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:50:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:50:16.472385  543705 disk_local_worker.go:436] Get disk info: []
I0322 13:50:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:50:23.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:50:23.409841  543705 memory.go:184] no items to output this cycle
E0322 13:50:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:50:33.409806  543705 memory.go:184] no items to output this cycle
I0322 13:50:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 13:50:35.001672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:50:35.004234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:50:35.004240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9780 0xc0004d97c0]
E0322 13:50:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:50:43.410722  543705 memory.go:191] Add success.
I0322 13:50:43.409818  543705 cpu.go:282] Add success.
I0322 13:50:43.420426  543705 net.go:648] Add success.
I0322 13:50:43.423419  543705 net.go:770] primary dev: ETH0
I0322 13:50:43.423438  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:50:43.423453  543705 net.go:698] Add success.
I0322 13:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:50:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:50:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:50:53.410262  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:50:53.410276  543705 memory.go:184] no items to output this cycle
I0322 13:50:53.410278  543705 cpu.go:275] no items to output this cycle
E0322 13:51:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:51:03.409768  543705 memory.go:184] no items to output this cycle
I0322 13:51:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 13:51:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:51:13.409940  543705 memory.go:191] Add success.
I0322 13:51:13.410000  543705 cpu.go:282] Add success.
W0322 13:51:13.410037  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:51:13.410051  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:51:13.410054  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:51:13.419715  543705 net.go:648] Add success.
I0322 13:51:13.422708  543705 net.go:770] primary dev: ETH0
I0322 13:51:13.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:51:13.422736  543705 net.go:698] Add success.
I0322 13:51:13.468615  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"87b041bd-2c42-4400-a769-2496ed153748","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:51:13.468645  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:51:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:51:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:51:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 13:51:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:51:14.456519  543705 disk_worker.go:494] system disk:vda1
I0322 13:51:14.456562  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:51:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:51:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:51:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:51:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:51:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:51:23.409779  543705 memory.go:184] no items to output this cycle
I0322 13:51:23.409846  543705 cpu.go:275] no items to output this cycle
E0322 13:51:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:51:33.409780  543705 memory.go:184] no items to output this cycle
I0322 13:51:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 13:51:35.005674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:51:35.008278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:51:35.008285  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e240 0xc00037e280]
I0322 13:51:39.669735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:51:39.669742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:51:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:51:43.410714  543705 memory.go:191] Add success.
I0322 13:51:43.409785  543705 cpu.go:282] Add success.
I0322 13:51:43.420417  543705 net.go:648] Add success.
I0322 13:51:43.423074  543705 net.go:770] primary dev: ETH0
I0322 13:51:43.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:51:43.423101  543705 net.go:698] Add success.
I0322 13:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:51:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:51:53.409791  543705 memory.go:184] no items to output this cycle
I0322 13:51:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 13:52:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:52:03.409802  543705 memory.go:184] no items to output this cycle
I0322 13:52:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 13:52:13.409846  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:52:13.409884  543705 memory.go:191] Add success.
W0322 13:52:13.409913  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:52:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:52:13.409953  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:52:13.409957  543705 cpu.go:282] Add success.
I0322 13:52:13.419710  543705 net.go:648] Add success.
I0322 13:52:13.422875  543705 net.go:770] primary dev: ETH0
I0322 13:52:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:52:13.422899  543705 net.go:698] Add success.
W0322 13:52:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:52:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 13:52:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:52:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:52:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:52:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:52:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 13:52:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:52:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:52:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:52:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:52:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:52:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:52:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:52:16.472324  543705 disk_local_worker.go:436] Get disk info: []
I0322 13:52:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 13:52:23.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:52:23.409862  543705 memory.go:184] no items to output this cycle
E0322 13:52:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:52:33.409783  543705 memory.go:184] no items to output this cycle
I0322 13:52:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 13:52:35.009673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:52:35.012250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:52:35.012257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a3580 0xc0004a35c0]
E0322 13:52:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:52:43.410634  543705 memory.go:191] Add success.
I0322 13:52:43.409791  543705 cpu.go:282] Add success.
I0322 13:52:43.420306  543705 net.go:648] Add success.
I0322 13:52:43.423227  543705 net.go:770] primary dev: ETH0
I0322 13:52:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:52:43.423253  543705 net.go:698] Add success.
I0322 13:52:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:52:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:52:53.410208  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:52:53.410229  543705 memory.go:184] no items to output this cycle
I0322 13:52:53.410232  543705 cpu.go:275] no items to output this cycle
E0322 13:53:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:53:03.409771  543705 memory.go:184] no items to output this cycle
I0322 13:53:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 13:53:13.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:53:13.409893  543705 cpu.go:282] Add success.
I0322 13:53:13.409911  543705 memory.go:191] Add success.
W0322 13:53:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:53:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:53:13.409987  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:53:13.419706  543705 net.go:648] Add success.
I0322 13:53:13.422430  543705 net.go:770] primary dev: ETH0
I0322 13:53:13.422443  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:53:13.422455  543705 net.go:698] Add success.
I0322 13:53:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:53:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:53:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 13:53:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:53:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 13:53:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:53:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:53:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:53:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:53:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:53:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:53:23.409765  543705 memory.go:184] no items to output this cycle
I0322 13:53:23.409829  543705 cpu.go:275] no items to output this cycle
E0322 13:53:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:53:33.409788  543705 memory.go:184] no items to output this cycle
I0322 13:53:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 13:53:35.013671  543705 disk_info.go:125] begin check local disk info of client
I0322 13:53:35.016261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:53:35.016268  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a00 0xc0000c5a40]
I0322 13:53:43.409793  543705 cpu.go:282] Add success.
E0322 13:53:43.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:53:43.410725  543705 memory.go:191] Add success.
I0322 13:53:43.420448  543705 net.go:648] Add success.
I0322 13:53:43.423214  543705 net.go:770] primary dev: ETH0
I0322 13:53:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:53:43.423244  543705 net.go:698] Add success.
I0322 13:53:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:53:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:53:53.410251  543705 memory.go:184] no items to output this cycle
I0322 13:53:53.410283  543705 cpu.go:275] no items to output this cycle
E0322 13:54:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:54:03.409802  543705 memory.go:184] no items to output this cycle
I0322 13:54:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 13:54:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:54:13.409789  543705 memory.go:191] Add success.
W0322 13:54:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:54:13.409817  543705 cpu.go:282] Add success.
W0322 13:54:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:54:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:54:13.420187  543705 net.go:648] Add success.
I0322 13:54:13.423131  543705 net.go:770] primary dev: ETH0
I0322 13:54:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:54:13.423156  543705 net.go:698] Add success.
I0322 13:54:13.468217  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"92a935fe-b99a-48d9-96d6-e8cdc2421762","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:54:13.468249  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 13:54:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:54:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:54:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 13:54:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:54:14.456773  543705 disk_worker.go:494] system disk:vda1
I0322 13:54:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:54:15.455029  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:54:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:54:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:54:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:54:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:54:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:54:23.409774  543705 memory.go:184] no items to output this cycle
I0322 13:54:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 13:54:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:54:33.409812  543705 memory.go:184] no items to output this cycle
I0322 13:54:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 13:54:35.017679  543705 disk_info.go:125] begin check local disk info of client
I0322 13:54:35.020221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:54:35.020227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460680 0xc0004606c0]
I0322 13:54:39.671053  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:54:39.671061  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:54:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:54:43.410631  543705 memory.go:191] Add success.
I0322 13:54:43.409804  543705 cpu.go:282] Add success.
I0322 13:54:43.420334  543705 net.go:648] Add success.
I0322 13:54:43.422880  543705 net.go:770] primary dev: ETH0
I0322 13:54:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:54:43.422911  543705 net.go:698] Add success.
I0322 13:54:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:54:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:54:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:54:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:54:53.409778  543705 cpu.go:275] no items to output this cycle
I0322 13:54:53.409782  543705 memory.go:184] no items to output this cycle
E0322 13:55:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:55:03.409780  543705 memory.go:184] no items to output this cycle
I0322 13:55:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 13:55:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:55:13.409794  543705 memory.go:191] Add success.
I0322 13:55:13.409796  543705 cpu.go:282] Add success.
W0322 13:55:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:55:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:55:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:55:13.420051  543705 net.go:648] Add success.
I0322 13:55:13.422848  543705 net.go:770] primary dev: ETH0
I0322 13:55:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:55:13.422871  543705 net.go:698] Add success.
I0322 13:55:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:55:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:55:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 13:55:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:55:14.456517  543705 disk_worker.go:494] system disk:vda1
I0322 13:55:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:55:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:55:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:55:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:55:23.410375  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:55:23.410394  543705 memory.go:184] no items to output this cycle
I0322 13:55:23.410407  543705 cpu.go:275] no items to output this cycle
E0322 13:55:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:55:33.409799  543705 memory.go:184] no items to output this cycle
I0322 13:55:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 13:55:35.021673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:55:35.024275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:55:35.024281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f81c0 0xc0004f8200]
E0322 13:55:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:55:43.410843  543705 memory.go:191] Add success.
I0322 13:55:43.409816  543705 cpu.go:282] Add success.
I0322 13:55:43.420558  543705 net.go:648] Add success.
I0322 13:55:43.423524  543705 net.go:770] primary dev: ETH0
I0322 13:55:43.423538  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:55:43.423551  543705 net.go:698] Add success.
I0322 13:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:55:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:55:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:55:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:55:53.409777  543705 memory.go:184] no items to output this cycle
I0322 13:55:53.409777  543705 cpu.go:275] no items to output this cycle
E0322 13:56:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:56:03.409768  543705 memory.go:184] no items to output this cycle
I0322 13:56:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 13:56:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:56:13.409793  543705 memory.go:191] Add success.
I0322 13:56:13.409798  543705 cpu.go:282] Add success.
W0322 13:56:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:56:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:56:13.420160  543705 net.go:648] Add success.
I0322 13:56:13.422792  543705 net.go:770] primary dev: ETH0
I0322 13:56:13.422806  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:56:13.422818  543705 net.go:698] Add success.
I0322 13:56:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:56:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:56:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 13:56:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:56:14.456852  543705 disk_worker.go:494] system disk:vda1
I0322 13:56:14.456882  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:56:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:56:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:56:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:56:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:56:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:56:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:56:23.409763  543705 memory.go:184] no items to output this cycle
I0322 13:56:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 13:56:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:56:33.409777  543705 memory.go:184] no items to output this cycle
I0322 13:56:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 13:56:35.025673  543705 disk_info.go:125] begin check local disk info of client
I0322 13:56:35.028217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:56:35.028222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f9900 0xc0004f9940]
E0322 13:56:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:56:43.410719  543705 memory.go:191] Add success.
I0322 13:56:43.409812  543705 cpu.go:282] Add success.
I0322 13:56:43.420466  543705 net.go:648] Add success.
I0322 13:56:43.423250  543705 net.go:770] primary dev: ETH0
I0322 13:56:43.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:56:43.423276  543705 net.go:698] Add success.
I0322 13:56:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:56:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:56:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:56:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:56:53.410248  543705 memory.go:184] no items to output this cycle
I0322 13:56:53.410260  543705 cpu.go:275] no items to output this cycle
E0322 13:57:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:57:03.409800  543705 memory.go:184] no items to output this cycle
I0322 13:57:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 13:57:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:57:13.409788  543705 memory.go:191] Add success.
I0322 13:57:13.409810  543705 cpu.go:282] Add success.
W0322 13:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:57:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:57:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:57:13.420267  543705 net.go:648] Add success.
I0322 13:57:13.423387  543705 net.go:770] primary dev: ETH0
I0322 13:57:13.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:57:13.423416  543705 net.go:698] Add success.
I0322 13:57:13.429536  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 13:57:13.453668  543705 event_worker.go:152] Polling the log file for events...
I0322 13:57:13.463513  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5492e54e-2c20-462a-8c4c-d904b7417a16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 13:57:13.463545  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 13:57:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:57:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 13:57:14.455207  543705 disk_worker.go:728] disk inode is not compliant
E0322 13:57:14.456757  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 13:57:14.456766  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 13:57:14.456771  543705 custom_config.go:64] query custom config with name: gpu
I0322 13:57:14.456900  543705 disk_worker.go:494] system disk:vda1
I0322 13:57:14.456955  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 13:57:15.456865  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 13:57:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:57:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 13:57:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 13:57:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:57:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:57:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:57:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:57:23.409769  543705 memory.go:184] no items to output this cycle
I0322 13:57:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 13:57:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:57:33.409808  543705 memory.go:184] no items to output this cycle
I0322 13:57:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 13:57:35.029672  543705 disk_info.go:125] begin check local disk info of client
I0322 13:57:35.032272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:57:35.032278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be6c0 0xc0003be700]
I0322 13:57:39.672055  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 13:57:39.672062  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 13:57:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:57:43.410650  543705 memory.go:191] Add success.
I0322 13:57:43.409799  543705 cpu.go:282] Add success.
I0322 13:57:43.420173  543705 net.go:770] primary dev: ETH0
I0322 13:57:43.420186  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:57:43.420199  543705 net.go:698] Add success.
I0322 13:57:43.420542  543705 net.go:648] Add success.
I0322 13:57:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:57:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:57:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:57:53.410208  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:57:53.410221  543705 memory.go:184] no items to output this cycle
I0322 13:57:53.410229  543705 cpu.go:275] no items to output this cycle
E0322 13:58:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:58:03.409772  543705 memory.go:184] no items to output this cycle
I0322 13:58:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 13:58:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:58:13.409784  543705 memory.go:191] Add success.
W0322 13:58:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 13:58:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:58:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:58:13.409836  543705 cpu.go:282] Add success.
I0322 13:58:13.420142  543705 net.go:648] Add success.
I0322 13:58:13.422715  543705 net.go:770] primary dev: ETH0
I0322 13:58:13.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:58:13.422745  543705 net.go:698] Add success.
I0322 13:58:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:58:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:58:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 13:58:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:58:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 13:58:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:58:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:58:16.458066  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:58:16.458126  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:58:16.458148  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:58:16.472497  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:58:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:58:23.409776  543705 memory.go:184] no items to output this cycle
I0322 13:58:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 13:58:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:58:33.409785  543705 memory.go:184] no items to output this cycle
I0322 13:58:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 13:58:35.033674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:58:35.036215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:58:35.036221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472440 0xc000472480]
E0322 13:58:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:58:43.410708  543705 memory.go:191] Add success.
I0322 13:58:43.409793  543705 cpu.go:282] Add success.
I0322 13:58:43.420250  543705 net.go:770] primary dev: ETH0
I0322 13:58:43.420263  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:58:43.420276  543705 net.go:698] Add success.
I0322 13:58:43.420614  543705 net.go:648] Add success.
I0322 13:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:58:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:58:53.409764  543705 memory.go:184] no items to output this cycle
I0322 13:58:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 13:59:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:59:03.409795  543705 memory.go:184] no items to output this cycle
I0322 13:59:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 13:59:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:59:13.409776  543705 memory.go:191] Add success.
W0322 13:59:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 13:59:13.409810  543705 cpu.go:282] Add success.
W0322 13:59:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 13:59:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 13:59:13.420531  543705 net.go:648] Add success.
I0322 13:59:13.423489  543705 net.go:770] primary dev: ETH0
I0322 13:59:13.423507  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:59:13.423522  543705 net.go:698] Add success.
I0322 13:59:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0322 13:59:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 13:59:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 13:59:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 13:59:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 13:59:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 13:59:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 13:59:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:59:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:59:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 13:59:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 13:59:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:59:23.409793  543705 memory.go:184] no items to output this cycle
I0322 13:59:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 13:59:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:59:33.409798  543705 memory.go:184] no items to output this cycle
I0322 13:59:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 13:59:35.037674  543705 disk_info.go:125] begin check local disk info of client
I0322 13:59:35.040190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 13:59:35.040195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467580 0xc0004675c0]
E0322 13:59:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:59:43.410844  543705 memory.go:191] Add success.
I0322 13:59:43.409822  543705 cpu.go:282] Add success.
I0322 13:59:43.420550  543705 net.go:648] Add success.
I0322 13:59:43.422911  543705 net.go:770] primary dev: ETH0
I0322 13:59:43.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0322 13:59:43.422937  543705 net.go:698] Add success.
I0322 13:59:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 13:59:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 13:59:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 13:59:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 13:59:53.409765  543705 memory.go:184] no items to output this cycle
I0322 13:59:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 14:00:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:00:03.409790  543705 memory.go:184] no items to output this cycle
I0322 14:00:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 14:00:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:00:13.409784  543705 memory.go:191] Add success.
I0322 14:00:13.409801  543705 cpu.go:282] Add success.
W0322 14:00:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:00:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:00:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:00:13.420210  543705 net.go:648] Add success.
I0322 14:00:13.422963  543705 net.go:770] primary dev: ETH0
I0322 14:00:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:00:13.422989  543705 net.go:698] Add success.
I0322 14:00:13.468869  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"eaf0c002-ebec-4435-9252-e3dbcaec19d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:00:13.468906  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:00:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:00:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 14:00:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:00:14.456777  543705 disk_worker.go:494] system disk:vda1
I0322 14:00:14.456807  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:00:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:00:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:00:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:00:23.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:00:23.409892  543705 memory.go:184] no items to output this cycle
I0322 14:00:23.409985  543705 cpu.go:275] no items to output this cycle
E0322 14:00:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:00:33.409815  543705 memory.go:184] no items to output this cycle
I0322 14:00:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 14:00:35.041673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:00:35.044189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:00:35.044195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3ec0 0xc0003d3f00]
I0322 14:00:39.673059  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:00:39.673066  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:00:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:00:43.410610  543705 memory.go:191] Add success.
I0322 14:00:43.409792  543705 cpu.go:282] Add success.
I0322 14:00:43.420408  543705 net.go:648] Add success.
I0322 14:00:43.423212  543705 net.go:770] primary dev: ETH0
I0322 14:00:43.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:00:43.423238  543705 net.go:698] Add success.
I0322 14:00:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:00:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:00:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:00:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:00:53.409801  543705 memory.go:184] no items to output this cycle
I0322 14:00:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 14:01:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:01:03.409770  543705 memory.go:184] no items to output this cycle
I0322 14:01:03.409777  543705 cpu.go:275] no items to output this cycle
E0322 14:01:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:01:13.409815  543705 memory.go:191] Add success.
I0322 14:01:13.409821  543705 cpu.go:282] Add success.
W0322 14:01:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:01:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:01:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:01:13.419803  543705 net.go:648] Add success.
I0322 14:01:13.422540  543705 net.go:770] primary dev: ETH0
I0322 14:01:13.422554  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:01:13.422567  543705 net.go:698] Add success.
I0322 14:01:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:01:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:01:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 14:01:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:01:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 14:01:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:01:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:01:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:01:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:01:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:01:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:01:23.409789  543705 memory.go:184] no items to output this cycle
I0322 14:01:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 14:01:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:01:33.409793  543705 memory.go:184] no items to output this cycle
I0322 14:01:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 14:01:35.045672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:01:35.048219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:01:35.048225  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052c580 0xc00052c5c0]
E0322 14:01:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:01:43.410701  543705 memory.go:191] Add success.
I0322 14:01:43.409818  543705 cpu.go:282] Add success.
I0322 14:01:43.420442  543705 net.go:648] Add success.
I0322 14:01:43.423591  543705 net.go:770] primary dev: ETH0
I0322 14:01:43.423606  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:01:43.423620  543705 net.go:698] Add success.
I0322 14:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:01:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:01:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:01:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:01:53.409776  543705 memory.go:184] no items to output this cycle
I0322 14:01:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 14:02:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:02:03.409767  543705 memory.go:184] no items to output this cycle
I0322 14:02:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 14:02:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:02:13.409819  543705 memory.go:191] Add success.
I0322 14:02:13.409832  543705 cpu.go:282] Add success.
W0322 14:02:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:02:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:02:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:02:13.420153  543705 net.go:648] Add success.
I0322 14:02:13.422925  543705 net.go:770] primary dev: ETH0
I0322 14:02:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:02:13.422950  543705 net.go:698] Add success.
W0322 14:02:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:02:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 14:02:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:02:14.456940  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:02:14.456949  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:02:14.456955  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:02:14.457031  543705 disk_worker.go:494] system disk:vda1
I0322 14:02:14.457078  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:02:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:02:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:02:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:02:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:02:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:02:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:02:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:02:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:02:23.409766  543705 memory.go:184] no items to output this cycle
I0322 14:02:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 14:02:33.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:02:33.409871  543705 memory.go:184] no items to output this cycle
I0322 14:02:33.409950  543705 cpu.go:275] no items to output this cycle
I0322 14:02:35.049680  543705 disk_info.go:125] begin check local disk info of client
I0322 14:02:35.052112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:02:35.052120  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0322 14:02:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:02:43.410611  543705 memory.go:191] Add success.
I0322 14:02:43.409828  543705 cpu.go:282] Add success.
I0322 14:02:43.420338  543705 net.go:648] Add success.
I0322 14:02:43.423184  543705 net.go:770] primary dev: ETH0
I0322 14:02:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:02:43.423215  543705 net.go:698] Add success.
I0322 14:02:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:02:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:02:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:02:53.410203  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:02:53.410222  543705 memory.go:184] no items to output this cycle
I0322 14:02:53.410241  543705 cpu.go:275] no items to output this cycle
E0322 14:03:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:03:03.409779  543705 memory.go:184] no items to output this cycle
I0322 14:03:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 14:03:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:03:13.409815  543705 memory.go:191] Add success.
I0322 14:03:13.409821  543705 cpu.go:282] Add success.
W0322 14:03:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:03:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:03:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:03:13.420102  543705 net.go:648] Add success.
I0322 14:03:13.423242  543705 net.go:770] primary dev: ETH0
I0322 14:03:13.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:03:13.423268  543705 net.go:698] Add success.
I0322 14:03:13.467577  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a86c6ceb-ee8f-4d78-a231-df2ab2e450ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:03:13.467609  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:03:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:03:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:03:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 14:03:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:03:14.456490  543705 disk_worker.go:494] system disk:vda1
I0322 14:03:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:03:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:03:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:03:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:03:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:03:23.409769  543705 memory.go:184] no items to output this cycle
I0322 14:03:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 14:03:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:03:33.409776  543705 memory.go:184] no items to output this cycle
I0322 14:03:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 14:03:35.053669  543705 disk_info.go:125] begin check local disk info of client
I0322 14:03:35.056155  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:03:35.056161  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003da4c0 0xc0003da500]
I0322 14:03:39.673729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:03:39.673736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:03:43.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:03:43.410871  543705 memory.go:191] Add success.
I0322 14:03:43.409940  543705 cpu.go:282] Add success.
I0322 14:03:43.419727  543705 net.go:648] Add success.
I0322 14:03:43.422492  543705 net.go:770] primary dev: ETH0
I0322 14:03:43.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:03:43.422519  543705 net.go:698] Add success.
I0322 14:03:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:03:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:03:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:03:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:03:53.409771  543705 memory.go:184] no items to output this cycle
I0322 14:03:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 14:04:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:04:03.409790  543705 memory.go:184] no items to output this cycle
I0322 14:04:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 14:04:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:04:13.409825  543705 memory.go:191] Add success.
I0322 14:04:13.409831  543705 cpu.go:282] Add success.
W0322 14:04:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:04:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:04:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:04:13.420062  543705 net.go:648] Add success.
I0322 14:04:13.423104  543705 net.go:770] primary dev: ETH0
I0322 14:04:13.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:04:13.423134  543705 net.go:698] Add success.
I0322 14:04:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:04:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:04:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 14:04:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:04:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 14:04:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:04:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:04:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:04:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:04:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:04:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:04:23.409766  543705 memory.go:184] no items to output this cycle
I0322 14:04:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 14:04:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:04:33.409803  543705 memory.go:184] no items to output this cycle
I0322 14:04:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 14:04:35.057675  543705 disk_info.go:125] begin check local disk info of client
I0322 14:04:35.060125  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:04:35.060131  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032ed00 0xc00032ed40]
E0322 14:04:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:04:43.410644  543705 memory.go:191] Add success.
I0322 14:04:43.409802  543705 cpu.go:282] Add success.
I0322 14:04:43.420533  543705 net.go:648] Add success.
I0322 14:04:43.423194  543705 net.go:770] primary dev: ETH0
I0322 14:04:43.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:04:43.423221  543705 net.go:698] Add success.
I0322 14:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:04:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:04:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:04:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:04:53.409785  543705 memory.go:184] no items to output this cycle
I0322 14:04:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 14:05:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:05:03.409811  543705 memory.go:184] no items to output this cycle
I0322 14:05:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 14:05:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:05:13.409800  543705 memory.go:191] Add success.
I0322 14:05:13.409817  543705 cpu.go:282] Add success.
W0322 14:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:05:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:05:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:05:13.420067  543705 net.go:648] Add success.
I0322 14:05:13.422817  543705 net.go:770] primary dev: ETH0
I0322 14:05:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:05:13.422842  543705 net.go:698] Add success.
I0322 14:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:05:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:05:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 14:05:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:05:14.456498  543705 disk_worker.go:494] system disk:vda1
I0322 14:05:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:05:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:05:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:05:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:05:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:05:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:05:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:05:23.409788  543705 memory.go:184] no items to output this cycle
I0322 14:05:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:05:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:05:33.409813  543705 memory.go:184] no items to output this cycle
I0322 14:05:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 14:05:35.061671  543705 disk_info.go:125] begin check local disk info of client
I0322 14:05:35.064187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:05:35.064194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9940 0xc0004d9980]
E0322 14:05:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:05:43.410729  543705 memory.go:191] Add success.
I0322 14:05:43.409822  543705 cpu.go:282] Add success.
I0322 14:05:43.420426  543705 net.go:648] Add success.
I0322 14:05:43.423050  543705 net.go:770] primary dev: ETH0
I0322 14:05:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:05:43.423079  543705 net.go:698] Add success.
I0322 14:05:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:05:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:05:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:05:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:05:53.410257  543705 memory.go:184] no items to output this cycle
I0322 14:05:53.410293  543705 cpu.go:275] no items to output this cycle
E0322 14:06:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:06:03.409794  543705 memory.go:184] no items to output this cycle
I0322 14:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 14:06:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:06:13.409801  543705 memory.go:191] Add success.
I0322 14:06:13.409817  543705 cpu.go:282] Add success.
W0322 14:06:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:06:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:06:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:06:13.420154  543705 net.go:648] Add success.
I0322 14:06:13.423120  543705 net.go:770] primary dev: ETH0
I0322 14:06:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:06:13.423149  543705 net.go:698] Add success.
I0322 14:06:13.500099  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ac34fb5d-12cf-48ac-9249-aeb01bdddf00","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:06:13.500132  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:06:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:06:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:06:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 14:06:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:06:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 14:06:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:06:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:06:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:06:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:06:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:06:23.409777  543705 memory.go:184] no items to output this cycle
I0322 14:06:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 14:06:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:06:33.409792  543705 memory.go:184] no items to output this cycle
I0322 14:06:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 14:06:35.065670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:06:35.068167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:06:35.068173  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ab40 0xc00007ab80]
I0322 14:06:39.675070  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:06:39.675077  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:06:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:06:43.410875  543705 memory.go:191] Add success.
I0322 14:06:43.409807  543705 cpu.go:282] Add success.
I0322 14:06:43.420555  543705 net.go:648] Add success.
I0322 14:06:43.423305  543705 net.go:770] primary dev: ETH0
I0322 14:06:43.423318  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:06:43.423330  543705 net.go:698] Add success.
I0322 14:06:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:06:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:06:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:06:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:06:53.409782  543705 memory.go:184] no items to output this cycle
I0322 14:06:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 14:07:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:07:03.409890  543705 memory.go:184] no items to output this cycle
I0322 14:07:03.409953  543705 cpu.go:275] no items to output this cycle
E0322 14:07:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:07:13.409823  543705 memory.go:191] Add success.
I0322 14:07:13.409833  543705 cpu.go:282] Add success.
W0322 14:07:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:07:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:07:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:07:13.420150  543705 net.go:648] Add success.
I0322 14:07:13.422736  543705 net.go:770] primary dev: ETH0
I0322 14:07:13.422749  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:07:13.422761  543705 net.go:698] Add success.
I0322 14:07:13.453325  543705 event_worker.go:152] Polling the log file for events...
W0322 14:07:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:07:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 14:07:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:07:14.455894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:07:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:07:14.455908  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:07:14.456528  543705 disk_worker.go:494] system disk:vda1
I0322 14:07:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:07:15.456908  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:07:15.456920  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:07:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:07:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:07:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:07:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:07:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:07:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:07:23.409817  543705 memory.go:184] no items to output this cycle
I0322 14:07:23.409830  543705 cpu.go:275] no items to output this cycle
E0322 14:07:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:07:33.409788  543705 memory.go:184] no items to output this cycle
I0322 14:07:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 14:07:35.069670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:07:35.072212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:07:35.072218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bb240 0xc0004bb280]
E0322 14:07:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:07:43.410589  543705 memory.go:191] Add success.
I0322 14:07:43.409801  543705 cpu.go:282] Add success.
I0322 14:07:43.420285  543705 net.go:648] Add success.
I0322 14:07:43.423112  543705 net.go:770] primary dev: ETH0
I0322 14:07:43.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:07:43.423141  543705 net.go:698] Add success.
I0322 14:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:07:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:07:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:07:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:07:53.409794  543705 memory.go:184] no items to output this cycle
I0322 14:07:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 14:08:03.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:08:03.409926  543705 memory.go:184] no items to output this cycle
I0322 14:08:03.409909  543705 cpu.go:275] no items to output this cycle
E0322 14:08:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:08:13.409792  543705 memory.go:191] Add success.
W0322 14:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 14:08:13.409821  543705 cpu.go:282] Add success.
W0322 14:08:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:08:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:08:13.420340  543705 net.go:648] Add success.
I0322 14:08:13.423406  543705 net.go:770] primary dev: ETH0
I0322 14:08:13.423419  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:08:13.423431  543705 net.go:698] Add success.
I0322 14:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:08:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:08:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 14:08:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:08:14.456595  543705 disk_worker.go:494] system disk:vda1
I0322 14:08:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:08:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:08:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:08:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:08:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:08:23.409764  543705 memory.go:184] no items to output this cycle
I0322 14:08:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 14:08:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:08:33.409794  543705 memory.go:184] no items to output this cycle
I0322 14:08:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 14:08:35.073671  543705 disk_info.go:125] begin check local disk info of client
I0322 14:08:35.076172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:08:35.076178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ba100 0xc0004ba140]
E0322 14:08:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:08:43.410811  543705 memory.go:191] Add success.
I0322 14:08:43.409783  543705 cpu.go:282] Add success.
I0322 14:08:43.420512  543705 net.go:648] Add success.
I0322 14:08:43.423537  543705 net.go:770] primary dev: ETH0
I0322 14:08:43.423549  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:08:43.423562  543705 net.go:698] Add success.
I0322 14:08:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:08:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:08:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:08:53.409775  543705 memory.go:184] no items to output this cycle
I0322 14:08:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:09:03.409781  543705 memory.go:184] no items to output this cycle
I0322 14:09:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 14:09:13.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:09:13.409893  543705 memory.go:191] Add success.
W0322 14:09:13.409940  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:09:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:09:13.409956  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:09:13.409990  543705 cpu.go:282] Add success.
I0322 14:09:13.419743  543705 net.go:648] Add success.
I0322 14:09:13.422388  543705 net.go:770] primary dev: ETH0
I0322 14:09:13.422400  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:09:13.422411  543705 net.go:698] Add success.
I0322 14:09:13.467609  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"50b97db7-dcb5-429f-8f35-ed8df70d7f50","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:09:13.467640  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:09:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:09:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:09:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 14:09:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:09:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 14:09:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:09:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:09:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:09:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:09:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:09:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:09:23.409776  543705 memory.go:184] no items to output this cycle
I0322 14:09:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 14:09:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:09:33.409803  543705 memory.go:184] no items to output this cycle
I0322 14:09:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 14:09:35.077675  543705 disk_info.go:125] begin check local disk info of client
I0322 14:09:35.080186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:09:35.080193  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296640 0xc000296680]
I0322 14:09:39.676076  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:09:39.676083  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:09:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:09:43.410609  543705 memory.go:191] Add success.
I0322 14:09:43.409822  543705 cpu.go:282] Add success.
I0322 14:09:43.420339  543705 net.go:648] Add success.
I0322 14:09:43.423065  543705 net.go:770] primary dev: ETH0
I0322 14:09:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:09:43.423090  543705 net.go:698] Add success.
I0322 14:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:09:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:09:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:09:53.409793  543705 memory.go:184] no items to output this cycle
I0322 14:09:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 14:10:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:10:03.409777  543705 memory.go:184] no items to output this cycle
I0322 14:10:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 14:10:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:10:13.409876  543705 memory.go:191] Add success.
W0322 14:10:13.409907  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:10:13.409920  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:10:13.409923  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:10:13.409929  543705 cpu.go:282] Add success.
I0322 14:10:13.419747  543705 net.go:648] Add success.
I0322 14:10:13.422263  543705 net.go:770] primary dev: ETH0
I0322 14:10:13.422283  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:10:13.422298  543705 net.go:698] Add success.
I0322 14:10:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:10:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:10:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 14:10:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:10:14.456521  543705 disk_worker.go:494] system disk:vda1
I0322 14:10:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:10:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:10:16.457605  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:10:16.457677  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:10:16.457702  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:10:16.473031  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:10:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:10:23.409773  543705 memory.go:184] no items to output this cycle
I0322 14:10:23.409776  543705 cpu.go:275] no items to output this cycle
E0322 14:10:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:10:33.409786  543705 memory.go:184] no items to output this cycle
I0322 14:10:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 14:10:35.081671  543705 disk_info.go:125] begin check local disk info of client
I0322 14:10:35.084228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:10:35.084234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4940 0xc0000c4980]
E0322 14:10:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:10:43.410627  543705 memory.go:191] Add success.
I0322 14:10:43.409789  543705 cpu.go:282] Add success.
I0322 14:10:43.420321  543705 net.go:648] Add success.
I0322 14:10:43.422941  543705 net.go:770] primary dev: ETH0
I0322 14:10:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:10:43.422971  543705 net.go:698] Add success.
I0322 14:10:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:10:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:10:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:10:53.409798  543705 memory.go:184] no items to output this cycle
I0322 14:10:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 14:11:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:11:03.409798  543705 memory.go:184] no items to output this cycle
I0322 14:11:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 14:11:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:11:13.409787  543705 memory.go:191] Add success.
I0322 14:11:13.409803  543705 cpu.go:282] Add success.
W0322 14:11:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:11:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:11:13.419746  543705 net.go:648] Add success.
I0322 14:11:13.422405  543705 net.go:770] primary dev: ETH0
I0322 14:11:13.422418  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:11:13.422429  543705 net.go:698] Add success.
I0322 14:11:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:11:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:11:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 14:11:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:11:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 14:11:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:11:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:11:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:11:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:11:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:11:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:11:23.409792  543705 memory.go:184] no items to output this cycle
I0322 14:11:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 14:11:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:11:33.409785  543705 memory.go:184] no items to output this cycle
I0322 14:11:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 14:11:35.085673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:11:35.088256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:11:35.088263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac00 0xc0001aac40]
E0322 14:11:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:11:43.410618  543705 memory.go:191] Add success.
I0322 14:11:43.409789  543705 cpu.go:282] Add success.
I0322 14:11:43.420381  543705 net.go:648] Add success.
I0322 14:11:43.423001  543705 net.go:770] primary dev: ETH0
I0322 14:11:43.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:11:43.423027  543705 net.go:698] Add success.
I0322 14:11:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:11:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:11:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:11:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:11:53.409762  543705 memory.go:184] no items to output this cycle
I0322 14:11:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 14:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:12:03.409772  543705 memory.go:184] no items to output this cycle
I0322 14:12:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 14:12:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:12:13.409790  543705 memory.go:191] Add success.
I0322 14:12:13.409810  543705 cpu.go:282] Add success.
W0322 14:12:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:12:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:12:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:12:13.419723  543705 net.go:648] Add success.
I0322 14:12:13.430684  543705 net.go:770] primary dev: ETH0
I0322 14:12:13.430698  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:12:13.430709  543705 net.go:698] Add success.
I0322 14:12:13.480710  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d84ab981-18e2-47ef-ade3-8232ba0cb01a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:12:13.480744  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 14:12:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:12:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 14:12:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:12:14.457007  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:12:14.457014  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:12:14.457019  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:12:14.457025  543705 disk_worker.go:494] system disk:vda1
I0322 14:12:14.457066  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:12:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:12:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:12:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:12:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:12:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:12:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:12:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:12:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:12:23.409778  543705 memory.go:184] no items to output this cycle
I0322 14:12:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 14:12:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:12:33.409774  543705 memory.go:184] no items to output this cycle
I0322 14:12:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 14:12:35.089673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:12:35.092205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:12:35.092211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8340 0xc0004d8380]
I0322 14:12:39.677081  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:12:39.677088  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:12:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:12:43.410578  543705 memory.go:191] Add success.
I0322 14:12:43.409797  543705 cpu.go:282] Add success.
I0322 14:12:43.420274  543705 net.go:648] Add success.
I0322 14:12:43.423121  543705 net.go:770] primary dev: ETH0
I0322 14:12:43.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:12:43.423145  543705 net.go:698] Add success.
I0322 14:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:12:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:12:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:12:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:12:53.409807  543705 memory.go:184] no items to output this cycle
I0322 14:12:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 14:13:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:13:03.409783  543705 memory.go:184] no items to output this cycle
I0322 14:13:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 14:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:13:13.409798  543705 memory.go:191] Add success.
I0322 14:13:13.409815  543705 cpu.go:282] Add success.
W0322 14:13:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:13:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:13:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:13:13.419750  543705 net.go:648] Add success.
I0322 14:13:13.422473  543705 net.go:770] primary dev: ETH0
I0322 14:13:13.422488  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:13:13.422501  543705 net.go:698] Add success.
I0322 14:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:13:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:13:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 14:13:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:13:14.456529  543705 disk_worker.go:494] system disk:vda1
I0322 14:13:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:13:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:13:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:13:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:13:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:13:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:13:23.409808  543705 memory.go:184] no items to output this cycle
I0322 14:13:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:13:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:13:33.409813  543705 memory.go:184] no items to output this cycle
I0322 14:13:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 14:13:35.093672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:13:35.096195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:13:35.096200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4380 0xc0000c43c0]
E0322 14:13:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:13:43.410654  543705 memory.go:191] Add success.
I0322 14:13:43.409830  543705 cpu.go:282] Add success.
I0322 14:13:43.420360  543705 net.go:648] Add success.
I0322 14:13:43.422838  543705 net.go:770] primary dev: ETH0
I0322 14:13:43.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:13:43.422862  543705 net.go:698] Add success.
I0322 14:13:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:13:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:13:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:13:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:13:53.409806  543705 memory.go:184] no items to output this cycle
I0322 14:13:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:14:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:14:03.409776  543705 memory.go:184] no items to output this cycle
I0322 14:14:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 14:14:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:14:13.409797  543705 memory.go:191] Add success.
I0322 14:14:13.409803  543705 cpu.go:282] Add success.
W0322 14:14:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:14:13.412692  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:14:13.412696  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:14:13.420444  543705 net.go:648] Add success.
I0322 14:14:13.422235  543705 net.go:770] primary dev: ETH0
I0322 14:14:13.422250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:14:13.422263  543705 net.go:698] Add success.
I0322 14:14:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:14:14.455088  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:14:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 14:14:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:14:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 14:14:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:14:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:14:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:14:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:14:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:14:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:14:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:14:23.409782  543705 memory.go:184] no items to output this cycle
I0322 14:14:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 14:14:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:14:33.409816  543705 memory.go:184] no items to output this cycle
I0322 14:14:33.409829  543705 cpu.go:275] no items to output this cycle
I0322 14:14:35.097670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:14:35.100283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:14:35.100289  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1300 0xc0004a1340]
E0322 14:14:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:14:43.410707  543705 memory.go:191] Add success.
I0322 14:14:43.409809  543705 cpu.go:282] Add success.
I0322 14:14:43.420427  543705 net.go:648] Add success.
I0322 14:14:43.423291  543705 net.go:770] primary dev: ETH0
I0322 14:14:43.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:14:43.423317  543705 net.go:698] Add success.
I0322 14:14:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:14:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:14:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:14:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:14:53.409805  543705 memory.go:184] no items to output this cycle
I0322 14:14:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:15:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:15:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 14:15:03.409790  543705 memory.go:184] no items to output this cycle
E0322 14:15:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:15:13.409782  543705 memory.go:191] Add success.
I0322 14:15:13.409818  543705 cpu.go:282] Add success.
W0322 14:15:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:15:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:15:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:15:13.420169  543705 net.go:648] Add success.
I0322 14:15:13.423012  543705 net.go:770] primary dev: ETH0
I0322 14:15:13.423026  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:15:13.423038  543705 net.go:698] Add success.
I0322 14:15:13.464234  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4fa5d45f-c235-4298-a883-75cb8cd6c412","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:15:13.464269  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:15:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:15:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:15:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 14:15:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:15:14.456607  543705 disk_worker.go:494] system disk:vda1
I0322 14:15:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:15:15.455611  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:15:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:15:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:15:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:15:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:15:23.409783  543705 memory.go:184] no items to output this cycle
I0322 14:15:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 14:15:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:15:33.409785  543705 memory.go:184] no items to output this cycle
I0322 14:15:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 14:15:35.101670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:15:35.104286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:15:35.104291  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab080 0xc0001ab0c0]
I0322 14:15:39.677733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:15:39.677739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:15:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:15:43.410743  543705 memory.go:191] Add success.
I0322 14:15:43.409787  543705 cpu.go:282] Add success.
I0322 14:15:43.420470  543705 net.go:648] Add success.
I0322 14:15:43.423571  543705 net.go:770] primary dev: ETH0
I0322 14:15:43.423584  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:15:43.423597  543705 net.go:698] Add success.
I0322 14:15:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:15:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:15:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:15:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:15:53.409792  543705 memory.go:184] no items to output this cycle
I0322 14:15:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 14:16:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:16:03.409773  543705 memory.go:184] no items to output this cycle
I0322 14:16:03.409776  543705 cpu.go:275] no items to output this cycle
E0322 14:16:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:16:13.409800  543705 cpu.go:282] Add success.
I0322 14:16:13.409803  543705 memory.go:191] Add success.
W0322 14:16:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:16:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:16:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:16:13.420101  543705 net.go:648] Add success.
I0322 14:16:13.422828  543705 net.go:770] primary dev: ETH0
I0322 14:16:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:16:13.422853  543705 net.go:698] Add success.
I0322 14:16:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:16:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:16:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 14:16:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:16:14.459238  543705 disk_worker.go:494] system disk:vda1
I0322 14:16:14.459270  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:16:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:16:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:16:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:16:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:16:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:16:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:16:23.409804  543705 memory.go:184] no items to output this cycle
I0322 14:16:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 14:16:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:16:33.409780  543705 memory.go:184] no items to output this cycle
I0322 14:16:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 14:16:35.105673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:16:35.108224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:16:35.108230  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384d80 0xc000384dc0]
E0322 14:16:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:16:43.410634  543705 memory.go:191] Add success.
I0322 14:16:43.409816  543705 cpu.go:282] Add success.
I0322 14:16:43.420393  543705 net.go:648] Add success.
I0322 14:16:43.422957  543705 net.go:770] primary dev: ETH0
I0322 14:16:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:16:43.422983  543705 net.go:698] Add success.
I0322 14:16:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:16:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:16:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:16:53.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:16:53.410256  543705 memory.go:184] no items to output this cycle
I0322 14:16:53.410275  543705 cpu.go:275] no items to output this cycle
E0322 14:17:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:17:03.409779  543705 memory.go:184] no items to output this cycle
I0322 14:17:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 14:17:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:17:13.409791  543705 memory.go:191] Add success.
I0322 14:17:13.409794  543705 cpu.go:282] Add success.
W0322 14:17:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:17:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:17:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:17:13.420066  543705 net.go:648] Add success.
I0322 14:17:13.423045  543705 net.go:770] primary dev: ETH0
I0322 14:17:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:17:13.423073  543705 net.go:698] Add success.
I0322 14:17:13.453634  543705 event_worker.go:152] Polling the log file for events...
W0322 14:17:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:17:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 14:17:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:17:14.455866  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:17:14.455875  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:17:14.455881  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:17:14.456509  543705 disk_worker.go:494] system disk:vda1
I0322 14:17:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:17:15.456794  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:17:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:17:16.458067  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:17:16.458101  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:17:16.458130  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:17:16.458149  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:17:16.472597  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:17:23.410205  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:17:23.410224  543705 memory.go:184] no items to output this cycle
I0322 14:17:23.410227  543705 cpu.go:275] no items to output this cycle
E0322 14:17:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:17:33.409801  543705 memory.go:184] no items to output this cycle
I0322 14:17:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 14:17:35.109674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:17:35.112225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:17:35.112232  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8300 0xc0004d8340]
E0322 14:17:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:17:43.410621  543705 memory.go:191] Add success.
I0322 14:17:43.409800  543705 cpu.go:282] Add success.
I0322 14:17:43.420337  543705 net.go:648] Add success.
I0322 14:17:43.422966  543705 net.go:770] primary dev: ETH0
I0322 14:17:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:17:43.423006  543705 net.go:698] Add success.
I0322 14:17:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:17:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:17:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:17:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:17:53.409769  543705 memory.go:184] no items to output this cycle
I0322 14:17:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 14:18:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:18:03.409773  543705 memory.go:184] no items to output this cycle
I0322 14:18:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 14:18:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:18:13.409790  543705 memory.go:191] Add success.
I0322 14:18:13.409808  543705 cpu.go:282] Add success.
W0322 14:18:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:18:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:18:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:18:13.420161  543705 net.go:648] Add success.
I0322 14:18:13.422874  543705 net.go:770] primary dev: ETH0
I0322 14:18:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:18:13.422902  543705 net.go:698] Add success.
I0322 14:18:13.472210  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e3b82297-15e4-436e-ad07-d5d273ade202","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:18:13.472244  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:18:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:18:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:18:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0322 14:18:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:18:14.456926  543705 disk_worker.go:494] system disk:vda1
I0322 14:18:14.456956  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:18:15.455995  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:18:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:18:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:18:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:18:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:18:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:18:23.409792  543705 memory.go:184] no items to output this cycle
I0322 14:18:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:18:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:18:33.409775  543705 memory.go:184] no items to output this cycle
I0322 14:18:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 14:18:35.113675  543705 disk_info.go:125] begin check local disk info of client
I0322 14:18:35.116218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:18:35.116224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa4c0 0xc0001aa500]
I0322 14:18:39.679078  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:18:39.679084  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:18:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:18:43.410822  543705 memory.go:191] Add success.
I0322 14:18:43.409796  543705 cpu.go:282] Add success.
I0322 14:18:43.420532  543705 net.go:648] Add success.
I0322 14:18:43.423575  543705 net.go:770] primary dev: ETH0
I0322 14:18:43.423588  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:18:43.423601  543705 net.go:698] Add success.
I0322 14:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:18:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:18:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:18:53.409784  543705 memory.go:184] no items to output this cycle
I0322 14:18:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 14:19:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:19:03.409804  543705 memory.go:184] no items to output this cycle
I0322 14:19:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 14:19:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:19:13.409784  543705 memory.go:191] Add success.
I0322 14:19:13.409803  543705 cpu.go:282] Add success.
W0322 14:19:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:19:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:19:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:19:13.420147  543705 net.go:648] Add success.
I0322 14:19:13.422735  543705 net.go:770] primary dev: ETH0
I0322 14:19:13.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:19:13.422761  543705 net.go:698] Add success.
I0322 14:19:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:19:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:19:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 14:19:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:19:14.456548  543705 disk_worker.go:494] system disk:vda1
I0322 14:19:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:19:15.455892  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:19:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:19:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:19:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:19:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:19:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:19:23.409778  543705 memory.go:184] no items to output this cycle
I0322 14:19:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 14:19:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:19:33.409784  543705 memory.go:184] no items to output this cycle
I0322 14:19:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 14:19:35.117693  543705 disk_info.go:125] begin check local disk info of client
I0322 14:19:35.120229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:19:35.120234  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e240 0xc00037e280]
E0322 14:19:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:19:43.410762  543705 memory.go:191] Add success.
I0322 14:19:43.409792  543705 cpu.go:282] Add success.
I0322 14:19:43.420461  543705 net.go:648] Add success.
I0322 14:19:43.423372  543705 net.go:770] primary dev: ETH0
I0322 14:19:43.423391  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:19:43.423406  543705 net.go:698] Add success.
I0322 14:19:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:19:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:19:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:19:53.410187  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:19:53.410202  543705 memory.go:184] no items to output this cycle
I0322 14:19:53.410228  543705 cpu.go:275] no items to output this cycle
E0322 14:20:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:20:03.409761  543705 memory.go:184] no items to output this cycle
I0322 14:20:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 14:20:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:20:13.409782  543705 memory.go:191] Add success.
I0322 14:20:13.409806  543705 cpu.go:282] Add success.
W0322 14:20:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:20:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:20:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:20:13.420258  543705 net.go:648] Add success.
I0322 14:20:13.423201  543705 net.go:770] primary dev: ETH0
I0322 14:20:13.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:20:13.423226  543705 net.go:698] Add success.
I0322 14:20:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:20:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:20:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 14:20:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:20:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 14:20:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:20:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:20:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:20:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:20:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:20:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:20:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:20:23.409804  543705 memory.go:184] no items to output this cycle
I0322 14:20:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:20:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:20:33.409786  543705 memory.go:184] no items to output this cycle
I0322 14:20:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 14:20:35.121672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:20:35.124247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:20:35.124254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0180 0xc0004a01c0]
E0322 14:20:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:20:43.410831  543705 memory.go:191] Add success.
I0322 14:20:43.409783  543705 cpu.go:282] Add success.
I0322 14:20:43.420540  543705 net.go:648] Add success.
I0322 14:20:43.423262  543705 net.go:770] primary dev: ETH0
I0322 14:20:43.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:20:43.423288  543705 net.go:698] Add success.
I0322 14:20:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:20:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:20:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:20:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:20:53.409777  543705 memory.go:184] no items to output this cycle
I0322 14:20:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 14:21:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:21:03.409802  543705 memory.go:184] no items to output this cycle
I0322 14:21:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 14:21:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:21:13.409796  543705 memory.go:191] Add success.
I0322 14:21:13.409798  543705 cpu.go:282] Add success.
W0322 14:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:21:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:21:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:21:13.420260  543705 net.go:648] Add success.
I0322 14:21:13.423033  543705 net.go:770] primary dev: ETH0
I0322 14:21:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:21:13.423063  543705 net.go:698] Add success.
I0322 14:21:13.635949  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c23db059-04fd-4519-bed4-35bc64e1e584","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:21:13.635990  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:21:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:21:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:21:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0322 14:21:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:21:14.456603  543705 disk_worker.go:494] system disk:vda1
I0322 14:21:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:21:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:21:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:21:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:21:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:21:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:21:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:21:23.409812  543705 memory.go:184] no items to output this cycle
I0322 14:21:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 14:21:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:21:33.409783  543705 memory.go:184] no items to output this cycle
I0322 14:21:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 14:21:35.125677  543705 disk_info.go:125] begin check local disk info of client
I0322 14:21:35.128284  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:21:35.128291  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003661c0 0xc000366200]
I0322 14:21:39.680100  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:21:39.680107  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:21:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:21:43.410734  543705 memory.go:191] Add success.
I0322 14:21:43.409810  543705 cpu.go:282] Add success.
I0322 14:21:43.420435  543705 net.go:648] Add success.
I0322 14:21:43.422999  543705 net.go:770] primary dev: ETH0
I0322 14:21:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:21:43.423035  543705 net.go:698] Add success.
I0322 14:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:21:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:21:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:21:53.409783  543705 memory.go:184] no items to output this cycle
I0322 14:21:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 14:22:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:22:03.409815  543705 memory.go:184] no items to output this cycle
I0322 14:22:03.409833  543705 cpu.go:275] no items to output this cycle
E0322 14:22:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:22:13.409784  543705 memory.go:191] Add success.
I0322 14:22:13.409804  543705 cpu.go:282] Add success.
W0322 14:22:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:22:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:22:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:22:13.420131  543705 net.go:648] Add success.
I0322 14:22:13.422803  543705 net.go:770] primary dev: ETH0
I0322 14:22:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:22:13.422831  543705 net.go:698] Add success.
W0322 14:22:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:22:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 14:22:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:22:14.455915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:22:14.455924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:22:14.455930  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:22:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 14:22:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:22:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:22:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:22:16.458023  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:22:16.458023  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:22:16.458092  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:22:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:22:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:22:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:22:23.409891  543705 cpu.go:275] no items to output this cycle
I0322 14:22:23.409900  543705 memory.go:184] no items to output this cycle
E0322 14:22:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:22:33.409794  543705 memory.go:184] no items to output this cycle
I0322 14:22:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 14:22:35.129673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:22:35.132194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:22:35.132201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 14:22:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:22:43.410792  543705 memory.go:191] Add success.
I0322 14:22:43.409792  543705 cpu.go:282] Add success.
I0322 14:22:43.420476  543705 net.go:648] Add success.
I0322 14:22:43.423264  543705 net.go:770] primary dev: ETH0
I0322 14:22:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:22:43.423289  543705 net.go:698] Add success.
I0322 14:22:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:22:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:22:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:22:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:22:53.409783  543705 cpu.go:275] no items to output this cycle
I0322 14:22:53.409787  543705 memory.go:184] no items to output this cycle
E0322 14:23:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:23:03.409775  543705 memory.go:184] no items to output this cycle
I0322 14:23:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 14:23:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:23:13.409823  543705 memory.go:191] Add success.
I0322 14:23:13.409833  543705 cpu.go:282] Add success.
W0322 14:23:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:23:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:23:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:23:13.420166  543705 net.go:648] Add success.
I0322 14:23:13.423142  543705 net.go:770] primary dev: ETH0
I0322 14:23:13.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:23:13.423167  543705 net.go:698] Add success.
I0322 14:23:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:23:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:23:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 14:23:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:23:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 14:23:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:23:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:23:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:23:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:23:23.409770  543705 memory.go:184] no items to output this cycle
I0322 14:23:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 14:23:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:23:33.409908  543705 memory.go:184] no items to output this cycle
I0322 14:23:33.409914  543705 cpu.go:275] no items to output this cycle
I0322 14:23:35.133674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:23:35.136213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:23:35.136219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 14:23:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:23:43.410563  543705 memory.go:191] Add success.
I0322 14:23:43.409797  543705 cpu.go:282] Add success.
I0322 14:23:43.420233  543705 net.go:648] Add success.
I0322 14:23:43.422966  543705 net.go:770] primary dev: ETH0
I0322 14:23:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:23:43.422996  543705 net.go:698] Add success.
I0322 14:23:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:23:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:23:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:23:53.409768  543705 memory.go:184] no items to output this cycle
I0322 14:23:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 14:24:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:24:03.409767  543705 memory.go:184] no items to output this cycle
I0322 14:24:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 14:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:24:13.409805  543705 memory.go:191] Add success.
I0322 14:24:13.409805  543705 cpu.go:282] Add success.
W0322 14:24:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:24:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:24:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:24:13.420113  543705 net.go:648] Add success.
I0322 14:24:13.422829  543705 net.go:770] primary dev: ETH0
I0322 14:24:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:24:13.422854  543705 net.go:698] Add success.
I0322 14:24:13.469397  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"886f16c7-3b8a-4769-bf3d-6ed0146e6907","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:24:13.469432  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:24:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:24:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:24:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 14:24:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:24:14.456536  543705 disk_worker.go:494] system disk:vda1
I0322 14:24:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:24:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:24:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:24:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:24:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:24:23.409777  543705 memory.go:184] no items to output this cycle
I0322 14:24:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 14:24:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:24:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 14:24:33.409800  543705 memory.go:184] no items to output this cycle
I0322 14:24:35.137672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:24:35.140174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:24:35.140181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dc440 0xc0003dc480]
I0322 14:24:39.681105  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:24:39.681112  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:24:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:24:43.410620  543705 memory.go:191] Add success.
I0322 14:24:43.409855  543705 cpu.go:282] Add success.
I0322 14:24:43.420357  543705 net.go:648] Add success.
I0322 14:24:43.422996  543705 net.go:770] primary dev: ETH0
I0322 14:24:43.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:24:43.423023  543705 net.go:698] Add success.
I0322 14:24:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:24:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:24:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:24:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:24:53.410269  543705 memory.go:184] no items to output this cycle
I0322 14:24:53.410273  543705 cpu.go:275] no items to output this cycle
E0322 14:25:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:25:03.409794  543705 memory.go:184] no items to output this cycle
I0322 14:25:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 14:25:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:25:13.409783  543705 memory.go:191] Add success.
W0322 14:25:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 14:25:13.409811  543705 cpu.go:282] Add success.
W0322 14:25:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:25:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:25:13.420256  543705 net.go:648] Add success.
I0322 14:25:13.423107  543705 net.go:770] primary dev: ETH0
I0322 14:25:13.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:25:13.423133  543705 net.go:698] Add success.
I0322 14:25:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:25:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:25:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 14:25:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:25:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 14:25:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:25:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:25:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:25:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:25:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:25:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:25:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:25:23.409770  543705 memory.go:184] no items to output this cycle
I0322 14:25:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 14:25:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:25:33.409813  543705 memory.go:184] no items to output this cycle
I0322 14:25:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 14:25:35.141665  543705 disk_info.go:125] begin check local disk info of client
I0322 14:25:35.144200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:25:35.144205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305c80 0xc000305cc0]
E0322 14:25:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:25:43.410740  543705 memory.go:191] Add success.
I0322 14:25:43.409797  543705 cpu.go:282] Add success.
I0322 14:25:43.420769  543705 net.go:648] Add success.
I0322 14:25:43.423629  543705 net.go:770] primary dev: ETH0
I0322 14:25:43.423642  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:25:43.423654  543705 net.go:698] Add success.
I0322 14:25:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:25:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:25:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:25:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:25:53.409790  543705 memory.go:184] no items to output this cycle
I0322 14:25:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 14:26:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:26:03.409793  543705 memory.go:184] no items to output this cycle
I0322 14:26:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 14:26:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:26:13.409819  543705 memory.go:191] Add success.
I0322 14:26:13.409826  543705 cpu.go:282] Add success.
W0322 14:26:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:26:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:26:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:26:13.420122  543705 net.go:648] Add success.
I0322 14:26:13.422936  543705 net.go:770] primary dev: ETH0
I0322 14:26:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:26:13.422960  543705 net.go:698] Add success.
I0322 14:26:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:26:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:26:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 14:26:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:26:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 14:26:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:26:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:26:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:26:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:26:23.409770  543705 memory.go:184] no items to output this cycle
I0322 14:26:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 14:26:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:26:33.409803  543705 memory.go:184] no items to output this cycle
I0322 14:26:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 14:26:35.145671  543705 disk_info.go:125] begin check local disk info of client
I0322 14:26:35.148245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:26:35.148251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa600 0xc0001aa640]
E0322 14:26:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:26:43.410725  543705 memory.go:191] Add success.
I0322 14:26:43.409812  543705 cpu.go:282] Add success.
I0322 14:26:43.420450  543705 net.go:648] Add success.
I0322 14:26:43.423220  543705 net.go:770] primary dev: ETH0
I0322 14:26:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:26:43.423246  543705 net.go:698] Add success.
I0322 14:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:26:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:26:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:26:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:26:53.409802  543705 memory.go:184] no items to output this cycle
I0322 14:26:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 14:27:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:27:03.409771  543705 memory.go:184] no items to output this cycle
I0322 14:27:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 14:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:27:13.409820  543705 memory.go:191] Add success.
I0322 14:27:13.409826  543705 cpu.go:282] Add success.
W0322 14:27:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:27:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:27:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:27:13.420134  543705 net.go:648] Add success.
I0322 14:27:13.422660  543705 net.go:770] primary dev: ETH0
I0322 14:27:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:27:13.422688  543705 net.go:698] Add success.
I0322 14:27:13.428812  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 14:27:13.452991  543705 event_worker.go:152] Polling the log file for events...
I0322 14:27:13.474373  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"03a1ce62-1fa6-4926-99b9-84b38be5b083","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:27:13.474407  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 14:27:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:27:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 14:27:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:27:14.456135  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:27:14.456145  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:27:14.456151  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:27:14.456466  543705 disk_worker.go:494] system disk:vda1
I0322 14:27:14.456509  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:27:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:27:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:27:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:27:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:27:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:27:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:27:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:27:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:27:23.409792  543705 memory.go:184] no items to output this cycle
I0322 14:27:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 14:27:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:27:33.409808  543705 memory.go:184] no items to output this cycle
I0322 14:27:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 14:27:35.149675  543705 disk_info.go:125] begin check local disk info of client
I0322 14:27:35.152202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:27:35.152208  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460240 0xc000460280]
I0322 14:27:39.681726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:27:39.681733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:27:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:27:43.410600  543705 memory.go:191] Add success.
I0322 14:27:43.409816  543705 cpu.go:282] Add success.
I0322 14:27:43.420354  543705 net.go:648] Add success.
I0322 14:27:43.423236  543705 net.go:770] primary dev: ETH0
I0322 14:27:43.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:27:43.423268  543705 net.go:698] Add success.
I0322 14:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:27:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:27:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:27:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:27:53.409783  543705 memory.go:184] no items to output this cycle
I0322 14:27:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 14:28:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:28:03.409772  543705 memory.go:184] no items to output this cycle
I0322 14:28:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 14:28:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:28:13.409827  543705 memory.go:191] Add success.
I0322 14:28:13.409829  543705 cpu.go:282] Add success.
W0322 14:28:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:28:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:28:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:28:13.420204  543705 net.go:648] Add success.
I0322 14:28:13.423456  543705 net.go:770] primary dev: ETH0
I0322 14:28:13.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:28:13.423481  543705 net.go:698] Add success.
I0322 14:28:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:28:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:28:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 14:28:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:28:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 14:28:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:28:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:28:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:28:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:28:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:28:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:28:23.409777  543705 memory.go:184] no items to output this cycle
I0322 14:28:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:28:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:28:33.409780  543705 memory.go:184] no items to output this cycle
I0322 14:28:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 14:28:35.153672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:28:35.156251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:28:35.156257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049a5c0 0xc00049a600]
E0322 14:28:43.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:28:43.410700  543705 memory.go:191] Add success.
I0322 14:28:43.409801  543705 cpu.go:282] Add success.
I0322 14:28:43.420432  543705 net.go:648] Add success.
I0322 14:28:43.423324  543705 net.go:770] primary dev: ETH0
I0322 14:28:43.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:28:43.423350  543705 net.go:698] Add success.
I0322 14:28:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:28:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:28:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:28:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:28:53.409807  543705 memory.go:184] no items to output this cycle
I0322 14:28:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 14:29:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:29:03.409787  543705 memory.go:184] no items to output this cycle
I0322 14:29:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 14:29:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:29:13.409779  543705 memory.go:191] Add success.
I0322 14:29:13.409804  543705 cpu.go:282] Add success.
W0322 14:29:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:29:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:29:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:29:13.420294  543705 net.go:648] Add success.
I0322 14:29:13.423046  543705 net.go:770] primary dev: ETH0
I0322 14:29:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:29:13.423089  543705 net.go:698] Add success.
I0322 14:29:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:29:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:29:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 14:29:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:29:14.456522  543705 disk_worker.go:494] system disk:vda1
I0322 14:29:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:29:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:29:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:29:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:29:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:29:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:29:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:29:23.409770  543705 memory.go:184] no items to output this cycle
I0322 14:29:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 14:29:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:29:33.409784  543705 memory.go:184] no items to output this cycle
I0322 14:29:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 14:29:35.157674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:29:35.160190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:29:35.160197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4ec0 0xc0000c4f00]
E0322 14:29:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:29:43.410638  543705 memory.go:191] Add success.
I0322 14:29:43.409806  543705 cpu.go:282] Add success.
I0322 14:29:43.420318  543705 net.go:648] Add success.
I0322 14:29:43.422927  543705 net.go:770] primary dev: ETH0
I0322 14:29:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:29:43.422952  543705 net.go:698] Add success.
I0322 14:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:29:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:29:53.409763  543705 memory.go:184] no items to output this cycle
I0322 14:29:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 14:30:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:30:03.409779  543705 memory.go:184] no items to output this cycle
I0322 14:30:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 14:30:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:30:13.409805  543705 memory.go:191] Add success.
I0322 14:30:13.409809  543705 cpu.go:282] Add success.
W0322 14:30:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:30:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:30:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:30:13.420321  543705 net.go:648] Add success.
I0322 14:30:13.423082  543705 net.go:770] primary dev: ETH0
I0322 14:30:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:30:13.423111  543705 net.go:698] Add success.
I0322 14:30:13.491190  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"56e2d0f2-f573-4fd4-a68c-486a1b512014","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:30:13.491224  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:30:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:30:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:30:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 14:30:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:30:14.456678  543705 disk_worker.go:494] system disk:vda1
I0322 14:30:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:30:15.455608  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:30:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:30:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:30:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:30:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:30:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:30:23.409773  543705 memory.go:184] no items to output this cycle
I0322 14:30:23.409774  543705 cpu.go:275] no items to output this cycle
E0322 14:30:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:30:33.409778  543705 memory.go:184] no items to output this cycle
I0322 14:30:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 14:30:35.161672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:30:35.164203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:30:35.164209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4280 0xc0000c42c0]
I0322 14:30:39.683115  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:30:39.683121  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:30:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:30:43.410547  543705 memory.go:191] Add success.
I0322 14:30:43.409785  543705 cpu.go:282] Add success.
I0322 14:30:43.420267  543705 net.go:648] Add success.
I0322 14:30:43.423118  543705 net.go:770] primary dev: ETH0
I0322 14:30:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:30:43.423148  543705 net.go:698] Add success.
I0322 14:30:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:30:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:30:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:30:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:30:53.409796  543705 memory.go:184] no items to output this cycle
I0322 14:30:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 14:31:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:31:03.409772  543705 memory.go:184] no items to output this cycle
I0322 14:31:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 14:31:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:31:13.409822  543705 memory.go:191] Add success.
I0322 14:31:13.409827  543705 cpu.go:282] Add success.
W0322 14:31:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:31:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:31:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:31:13.420219  543705 net.go:648] Add success.
I0322 14:31:13.423003  543705 net.go:770] primary dev: ETH0
I0322 14:31:13.423016  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:31:13.423028  543705 net.go:698] Add success.
I0322 14:31:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:31:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:31:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 14:31:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:31:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 14:31:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:31:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:31:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:31:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:31:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:31:16.472500  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:31:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:31:23.409785  543705 memory.go:184] no items to output this cycle
I0322 14:31:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 14:31:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:31:33.409818  543705 memory.go:184] no items to output this cycle
I0322 14:31:33.409829  543705 cpu.go:275] no items to output this cycle
I0322 14:31:35.165685  543705 disk_info.go:125] begin check local disk info of client
I0322 14:31:35.168251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:31:35.168257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d99c0 0xc0004d9a00]
E0322 14:31:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:31:43.410868  543705 memory.go:191] Add success.
I0322 14:31:43.409826  543705 cpu.go:282] Add success.
I0322 14:31:43.420572  543705 net.go:648] Add success.
I0322 14:31:43.423316  543705 net.go:770] primary dev: ETH0
I0322 14:31:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:31:43.423342  543705 net.go:698] Add success.
I0322 14:31:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:31:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:31:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:31:53.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:31:53.410243  543705 memory.go:184] no items to output this cycle
I0322 14:31:53.410268  543705 cpu.go:275] no items to output this cycle
E0322 14:32:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:32:03.409804  543705 memory.go:184] no items to output this cycle
I0322 14:32:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 14:32:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:32:13.409806  543705 memory.go:191] Add success.
I0322 14:32:13.409822  543705 cpu.go:282] Add success.
W0322 14:32:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:32:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:32:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:32:13.420154  543705 net.go:648] Add success.
I0322 14:32:13.423366  543705 net.go:770] primary dev: ETH0
I0322 14:32:13.423380  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:32:13.423392  543705 net.go:698] Add success.
W0322 14:32:14.455222  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:32:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0322 14:32:14.455239  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:32:14.455865  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:32:14.455874  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:32:14.455880  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:32:14.456808  543705 disk_worker.go:494] system disk:vda1
I0322 14:32:14.456853  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:32:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:32:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:32:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:32:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:32:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:32:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:32:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:32:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:32:23.409774  543705 memory.go:184] no items to output this cycle
I0322 14:32:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 14:32:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:32:33.409789  543705 memory.go:184] no items to output this cycle
I0322 14:32:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 14:32:35.169674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:32:35.172211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:32:35.172217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0322 14:32:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:32:43.410660  543705 memory.go:191] Add success.
I0322 14:32:43.409797  543705 cpu.go:282] Add success.
I0322 14:32:43.420347  543705 net.go:648] Add success.
I0322 14:32:43.422938  543705 net.go:770] primary dev: ETH0
I0322 14:32:43.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:32:43.422967  543705 net.go:698] Add success.
I0322 14:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:32:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:32:53.410367  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:32:53.410381  543705 cpu.go:275] no items to output this cycle
I0322 14:32:53.410383  543705 memory.go:184] no items to output this cycle
E0322 14:33:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:33:03.409805  543705 memory.go:184] no items to output this cycle
I0322 14:33:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:33:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:33:13.409808  543705 memory.go:191] Add success.
I0322 14:33:13.409808  543705 cpu.go:282] Add success.
W0322 14:33:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:33:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:33:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:33:13.420184  543705 net.go:648] Add success.
I0322 14:33:13.422763  543705 net.go:770] primary dev: ETH0
I0322 14:33:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:33:13.422798  543705 net.go:698] Add success.
I0322 14:33:13.471131  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e089c10d-9f86-4dfd-8c99-7f7f8af93ddd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:33:13.471174  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:33:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:33:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:33:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 14:33:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:33:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 14:33:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:33:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:33:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:33:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:33:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:33:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:33:23.409802  543705 memory.go:184] no items to output this cycle
I0322 14:33:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 14:33:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:33:33.409780  543705 memory.go:184] no items to output this cycle
I0322 14:33:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 14:33:35.173678  543705 disk_info.go:125] begin check local disk info of client
I0322 14:33:35.176215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:33:35.176221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1640 0xc0004a1680]
I0322 14:33:39.683257  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:33:39.683263  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:33:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:33:43.410629  543705 memory.go:191] Add success.
I0322 14:33:43.409796  543705 cpu.go:282] Add success.
I0322 14:33:43.420357  543705 net.go:648] Add success.
I0322 14:33:43.423086  543705 net.go:770] primary dev: ETH0
I0322 14:33:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:33:43.423116  543705 net.go:698] Add success.
I0322 14:33:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:33:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:33:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:33:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:33:53.409760  543705 memory.go:184] no items to output this cycle
I0322 14:33:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 14:34:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:34:03.409770  543705 memory.go:184] no items to output this cycle
I0322 14:34:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 14:34:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:34:13.409828  543705 memory.go:191] Add success.
I0322 14:34:13.409838  543705 cpu.go:282] Add success.
W0322 14:34:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:34:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:34:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:34:13.420187  543705 net.go:648] Add success.
I0322 14:34:13.423094  543705 net.go:770] primary dev: ETH0
I0322 14:34:13.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:34:13.423119  543705 net.go:698] Add success.
I0322 14:34:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:34:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:34:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 14:34:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:34:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 14:34:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:34:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:34:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:34:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:34:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:34:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:34:23.409795  543705 memory.go:184] no items to output this cycle
I0322 14:34:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 14:34:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:34:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 14:34:33.409788  543705 memory.go:184] no items to output this cycle
I0322 14:34:35.177673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:34:35.180209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:34:35.180215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5280 0xc0000c52c0]
E0322 14:34:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:34:43.410719  543705 memory.go:191] Add success.
I0322 14:34:43.409807  543705 cpu.go:282] Add success.
I0322 14:34:43.420413  543705 net.go:648] Add success.
I0322 14:34:43.423189  543705 net.go:770] primary dev: ETH0
I0322 14:34:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:34:43.423215  543705 net.go:698] Add success.
I0322 14:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:34:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:34:53.409770  543705 memory.go:184] no items to output this cycle
I0322 14:34:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 14:35:03.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:35:03.409882  543705 memory.go:184] no items to output this cycle
I0322 14:35:03.409930  543705 cpu.go:275] no items to output this cycle
E0322 14:35:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:35:13.409794  543705 memory.go:191] Add success.
I0322 14:35:13.409811  543705 cpu.go:282] Add success.
W0322 14:35:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:35:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:35:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:35:13.420173  543705 net.go:648] Add success.
I0322 14:35:13.422882  543705 net.go:770] primary dev: ETH0
I0322 14:35:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:35:13.422907  543705 net.go:698] Add success.
I0322 14:35:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:35:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:35:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 14:35:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:35:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 14:35:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:35:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:35:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:35:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:35:23.409773  543705 memory.go:184] no items to output this cycle
I0322 14:35:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 14:35:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:35:33.409779  543705 memory.go:184] no items to output this cycle
I0322 14:35:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 14:35:35.181672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:35:35.184188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:35:35.184194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0a00 0xc0004a0a40]
E0322 14:35:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:35:43.410674  543705 memory.go:191] Add success.
I0322 14:35:43.409815  543705 cpu.go:282] Add success.
I0322 14:35:43.420366  543705 net.go:648] Add success.
I0322 14:35:43.422937  543705 net.go:770] primary dev: ETH0
I0322 14:35:43.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:35:43.422962  543705 net.go:698] Add success.
I0322 14:35:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:35:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:35:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:35:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:35:53.409798  543705 memory.go:184] no items to output this cycle
I0322 14:35:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 14:36:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:36:03.409788  543705 memory.go:184] no items to output this cycle
I0322 14:36:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 14:36:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:36:13.409804  543705 memory.go:191] Add success.
I0322 14:36:13.409815  543705 cpu.go:282] Add success.
W0322 14:36:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:36:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:36:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:36:13.420109  543705 net.go:648] Add success.
I0322 14:36:13.422719  543705 net.go:770] primary dev: ETH0
I0322 14:36:13.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:36:13.422745  543705 net.go:698] Add success.
I0322 14:36:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:36:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:36:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 14:36:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:36:14.456605  543705 disk_worker.go:494] system disk:vda1
I0322 14:36:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:36:14.472009  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6094c443-8e8e-4dee-abf4-c19e2dcb8fe0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:36:14.472051  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:36:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:36:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:36:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:36:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:36:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:36:23.409801  543705 memory.go:184] no items to output this cycle
I0322 14:36:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:36:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:36:33.409787  543705 memory.go:184] no items to output this cycle
I0322 14:36:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 14:36:35.185673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:36:35.188242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:36:35.188248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf740 0xc0003bf780]
I0322 14:36:39.684122  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:36:39.684128  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:36:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:36:43.410663  543705 memory.go:191] Add success.
I0322 14:36:43.409797  543705 cpu.go:282] Add success.
I0322 14:36:43.420498  543705 net.go:648] Add success.
I0322 14:36:43.423096  543705 net.go:770] primary dev: ETH0
I0322 14:36:43.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:36:43.423121  543705 net.go:698] Add success.
I0322 14:36:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:36:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:36:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:36:53.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:36:53.409759  543705 memory.go:184] no items to output this cycle
I0322 14:36:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 14:37:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:37:03.409786  543705 memory.go:184] no items to output this cycle
I0322 14:37:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 14:37:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:37:13.409788  543705 memory.go:191] Add success.
I0322 14:37:13.409792  543705 cpu.go:282] Add success.
W0322 14:37:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:37:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:37:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:37:13.420269  543705 net.go:648] Add success.
I0322 14:37:13.423234  543705 net.go:770] primary dev: ETH0
I0322 14:37:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:37:13.423267  543705 net.go:698] Add success.
I0322 14:37:13.452776  543705 event_worker.go:152] Polling the log file for events...
W0322 14:37:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:37:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 14:37:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:37:14.456760  543705 disk_worker.go:494] system disk:vda1
I0322 14:37:14.456805  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:37:14.457399  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:37:14.457407  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:37:14.457422  543705 custom_config.go:64] query custom config with name: gpu
E0322 14:37:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:37:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:37:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:37:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:37:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:37:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:37:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:37:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:37:23.409764  543705 memory.go:184] no items to output this cycle
I0322 14:37:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 14:37:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:37:33.409788  543705 memory.go:184] no items to output this cycle
I0322 14:37:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 14:37:35.189676  543705 disk_info.go:125] begin check local disk info of client
I0322 14:37:35.192248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:37:35.192256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab300 0xc0001ab400]
E0322 14:37:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:37:43.410858  543705 memory.go:191] Add success.
I0322 14:37:43.409812  543705 cpu.go:282] Add success.
I0322 14:37:43.420548  543705 net.go:648] Add success.
I0322 14:37:43.423064  543705 net.go:770] primary dev: ETH0
I0322 14:37:43.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:37:43.423092  543705 net.go:698] Add success.
I0322 14:37:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:37:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:37:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:37:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:37:53.409799  543705 memory.go:184] no items to output this cycle
I0322 14:37:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 14:38:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:38:03.409772  543705 memory.go:184] no items to output this cycle
I0322 14:38:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:38:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:38:13.409804  543705 memory.go:191] Add success.
I0322 14:38:13.409828  543705 cpu.go:282] Add success.
W0322 14:38:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:38:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:38:13.420394  543705 net.go:648] Add success.
I0322 14:38:13.423077  543705 net.go:770] primary dev: ETH0
I0322 14:38:13.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:38:13.423103  543705 net.go:698] Add success.
I0322 14:38:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:38:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:38:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0322 14:38:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:38:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 14:38:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:38:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:38:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:38:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:38:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:38:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:38:23.409795  543705 memory.go:184] no items to output this cycle
I0322 14:38:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 14:38:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:38:33.409810  543705 memory.go:184] no items to output this cycle
I0322 14:38:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 14:38:35.193674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:38:35.196269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:38:35.196276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5300 0xc0000c5340]
E0322 14:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:38:43.410588  543705 memory.go:191] Add success.
I0322 14:38:43.409785  543705 cpu.go:282] Add success.
I0322 14:38:43.420302  543705 net.go:648] Add success.
I0322 14:38:43.422836  543705 net.go:770] primary dev: ETH0
I0322 14:38:43.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:38:43.422865  543705 net.go:698] Add success.
I0322 14:38:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:38:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:38:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:38:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:38:53.409782  543705 memory.go:184] no items to output this cycle
I0322 14:38:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 14:39:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:39:03.409789  543705 memory.go:184] no items to output this cycle
I0322 14:39:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 14:39:13.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:39:13.409963  543705 cpu.go:282] Add success.
I0322 14:39:13.409988  543705 memory.go:191] Add success.
W0322 14:39:13.410016  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:39:13.410041  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:39:13.410045  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:39:13.419709  543705 net.go:648] Add success.
I0322 14:39:13.422514  543705 net.go:770] primary dev: ETH0
I0322 14:39:13.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:39:13.422539  543705 net.go:698] Add success.
I0322 14:39:13.468256  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"30088686-18cd-471f-9692-a353dce312b1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:39:13.468291  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:39:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:39:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:39:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 14:39:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:39:14.456746  543705 disk_worker.go:494] system disk:vda1
I0322 14:39:14.456776  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:39:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:39:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:39:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:39:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:39:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:39:23.409791  543705 memory.go:184] no items to output this cycle
I0322 14:39:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 14:39:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:39:33.409797  543705 memory.go:184] no items to output this cycle
I0322 14:39:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 14:39:35.197672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:39:35.200208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:39:35.200214  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052cc80 0xc00052ccc0]
I0322 14:39:39.685116  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:39:39.685123  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:39:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:39:43.410794  543705 memory.go:191] Add success.
I0322 14:39:43.409809  543705 cpu.go:282] Add success.
I0322 14:39:43.420552  543705 net.go:648] Add success.
I0322 14:39:43.423362  543705 net.go:770] primary dev: ETH0
I0322 14:39:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:39:43.423389  543705 net.go:698] Add success.
I0322 14:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:39:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:39:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:39:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:39:53.409781  543705 memory.go:184] no items to output this cycle
I0322 14:39:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 14:40:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:40:03.409768  543705 memory.go:184] no items to output this cycle
I0322 14:40:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 14:40:13.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:40:13.409886  543705 memory.go:191] Add success.
W0322 14:40:13.409917  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 14:40:13.409962  543705 cpu.go:282] Add success.
W0322 14:40:13.410009  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:40:13.410014  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:40:13.419716  543705 net.go:648] Add success.
I0322 14:40:13.422633  543705 net.go:770] primary dev: ETH0
I0322 14:40:13.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:40:13.422658  543705 net.go:698] Add success.
I0322 14:40:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:40:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:40:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 14:40:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:40:14.456629  543705 disk_worker.go:494] system disk:vda1
I0322 14:40:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:40:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:40:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:40:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:40:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:40:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:40:23.409768  543705 memory.go:184] no items to output this cycle
I0322 14:40:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 14:40:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:40:33.409807  543705 memory.go:184] no items to output this cycle
I0322 14:40:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 14:40:35.201674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:40:35.204210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:40:35.204216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab0c0 0xc0001ab100]
E0322 14:40:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:40:43.410740  543705 memory.go:191] Add success.
I0322 14:40:43.409832  543705 cpu.go:282] Add success.
I0322 14:40:43.420427  543705 net.go:648] Add success.
I0322 14:40:43.423026  543705 net.go:770] primary dev: ETH0
I0322 14:40:43.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:40:43.423052  543705 net.go:698] Add success.
I0322 14:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:40:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:40:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:40:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:40:53.409803  543705 memory.go:184] no items to output this cycle
I0322 14:40:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 14:41:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:41:03.409801  543705 memory.go:184] no items to output this cycle
I0322 14:41:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:41:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:41:13.409816  543705 memory.go:191] Add success.
I0322 14:41:13.409818  543705 cpu.go:282] Add success.
W0322 14:41:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:41:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:41:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:41:13.420154  543705 net.go:648] Add success.
I0322 14:41:13.422898  543705 net.go:770] primary dev: ETH0
I0322 14:41:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:41:13.422923  543705 net.go:698] Add success.
I0322 14:41:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:41:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:41:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 14:41:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:41:14.456585  543705 disk_worker.go:494] system disk:vda1
I0322 14:41:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:41:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:41:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:41:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:41:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:41:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:41:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:41:23.409792  543705 memory.go:184] no items to output this cycle
I0322 14:41:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 14:41:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:41:33.409785  543705 memory.go:184] no items to output this cycle
I0322 14:41:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 14:41:35.205681  543705 disk_info.go:125] begin check local disk info of client
I0322 14:41:35.208230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:41:35.208237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2a40 0xc0002a2a80]
E0322 14:41:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:41:43.410794  543705 memory.go:191] Add success.
I0322 14:41:43.409828  543705 cpu.go:282] Add success.
I0322 14:41:43.420476  543705 net.go:648] Add success.
I0322 14:41:43.423402  543705 net.go:770] primary dev: ETH0
I0322 14:41:43.423416  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:41:43.423428  543705 net.go:698] Add success.
I0322 14:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:41:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:41:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:41:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:41:53.409771  543705 memory.go:184] no items to output this cycle
I0322 14:41:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 14:42:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:42:03.409784  543705 memory.go:184] no items to output this cycle
I0322 14:42:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 14:42:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:42:13.409806  543705 memory.go:191] Add success.
I0322 14:42:13.409824  543705 cpu.go:282] Add success.
W0322 14:42:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:42:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:42:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:42:13.420268  543705 net.go:648] Add success.
I0322 14:42:13.423152  543705 net.go:770] primary dev: ETH0
I0322 14:42:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:42:13.423197  543705 net.go:698] Add success.
I0322 14:42:13.469202  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"08c3c569-5f91-4d5c-8a49-75fd312db1e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:42:13.469236  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 14:42:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:42:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 14:42:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:42:14.456865  543705 disk_worker.go:494] system disk:vda1
I0322 14:42:14.456905  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:42:14.457094  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:42:14.457101  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:42:14.457105  543705 custom_config.go:64] query custom config with name: gpu
E0322 14:42:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:42:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:42:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:42:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:42:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:42:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:42:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:42:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:42:23.409774  543705 memory.go:184] no items to output this cycle
I0322 14:42:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 14:42:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:42:33.409817  543705 memory.go:184] no items to output this cycle
I0322 14:42:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 14:42:35.209672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:42:35.212207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:42:35.212213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c56c0 0xc0000c5700]
I0322 14:42:39.685740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:42:39.685746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:42:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:42:43.410702  543705 memory.go:191] Add success.
I0322 14:42:43.409810  543705 cpu.go:282] Add success.
I0322 14:42:43.420488  543705 net.go:648] Add success.
I0322 14:42:43.423667  543705 net.go:770] primary dev: ETH0
I0322 14:42:43.423681  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:42:43.423696  543705 net.go:698] Add success.
I0322 14:42:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:42:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:42:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:42:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:42:53.409805  543705 memory.go:184] no items to output this cycle
I0322 14:42:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 14:43:03.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:43:03.409919  543705 memory.go:184] no items to output this cycle
I0322 14:43:03.409931  543705 cpu.go:275] no items to output this cycle
E0322 14:43:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:43:13.409796  543705 memory.go:191] Add success.
W0322 14:43:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:43:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:43:13.409836  543705 cpu.go:282] Add success.
I0322 14:43:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:43:13.420043  543705 net.go:648] Add success.
I0322 14:43:13.422796  543705 net.go:770] primary dev: ETH0
I0322 14:43:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:43:13.422821  543705 net.go:698] Add success.
I0322 14:43:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:43:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:43:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 14:43:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:43:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 14:43:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:43:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:43:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:43:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:43:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:43:16.472564  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:43:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:43:23.409808  543705 memory.go:184] no items to output this cycle
I0322 14:43:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 14:43:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:43:33.409821  543705 memory.go:184] no items to output this cycle
I0322 14:43:33.409832  543705 cpu.go:275] no items to output this cycle
I0322 14:43:35.213670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:43:35.216214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:43:35.216221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
E0322 14:43:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:43:43.410719  543705 memory.go:191] Add success.
I0322 14:43:43.409805  543705 cpu.go:282] Add success.
I0322 14:43:43.420486  543705 net.go:648] Add success.
I0322 14:43:43.423482  543705 net.go:770] primary dev: ETH0
I0322 14:43:43.423497  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:43:43.423509  543705 net.go:698] Add success.
I0322 14:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:43:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:43:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:43:53.410339  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:43:53.410353  543705 memory.go:184] no items to output this cycle
I0322 14:43:53.410368  543705 cpu.go:275] no items to output this cycle
E0322 14:44:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:44:03.409766  543705 memory.go:184] no items to output this cycle
I0322 14:44:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 14:44:13.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:44:13.409900  543705 memory.go:191] Add success.
W0322 14:44:13.409928  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 14:44:13.409998  543705 cpu.go:282] Add success.
W0322 14:44:13.410043  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:44:13.410048  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:44:13.419712  543705 net.go:648] Add success.
I0322 14:44:13.422451  543705 net.go:770] primary dev: ETH0
I0322 14:44:13.422467  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:44:13.422480  543705 net.go:698] Add success.
I0322 14:44:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:44:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:44:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 14:44:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:44:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 14:44:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:44:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:44:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:44:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:44:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:44:23.409796  543705 memory.go:184] no items to output this cycle
I0322 14:44:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 14:44:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:44:33.409782  543705 memory.go:184] no items to output this cycle
I0322 14:44:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 14:44:35.217670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:44:35.220241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:44:35.220246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4380 0xc0000c43c0]
E0322 14:44:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:44:43.410614  543705 memory.go:191] Add success.
I0322 14:44:43.409796  543705 cpu.go:282] Add success.
I0322 14:44:43.420298  543705 net.go:648] Add success.
I0322 14:44:43.422863  543705 net.go:770] primary dev: ETH0
I0322 14:44:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:44:43.422891  543705 net.go:698] Add success.
I0322 14:44:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:44:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:44:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:44:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:44:53.409764  543705 memory.go:184] no items to output this cycle
I0322 14:44:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:45:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:45:03.409806  543705 memory.go:184] no items to output this cycle
I0322 14:45:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 14:45:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:45:13.409801  543705 memory.go:191] Add success.
I0322 14:45:13.409804  543705 cpu.go:282] Add success.
W0322 14:45:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:45:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:45:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:45:13.420127  543705 net.go:648] Add success.
I0322 14:45:13.422904  543705 net.go:770] primary dev: ETH0
I0322 14:45:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:45:13.422932  543705 net.go:698] Add success.
I0322 14:45:13.468065  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ecd0283-5013-4f36-b1f3-9b6e505bce87","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:45:13.468098  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:45:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:45:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:45:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 14:45:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:45:14.456509  543705 disk_worker.go:494] system disk:vda1
I0322 14:45:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:45:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:45:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:45:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:45:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:45:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:45:23.409774  543705 memory.go:184] no items to output this cycle
I0322 14:45:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 14:45:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:45:33.409785  543705 memory.go:184] no items to output this cycle
I0322 14:45:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 14:45:35.221672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:45:35.224196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:45:35.224203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b0c0 0xc00007b100]
I0322 14:45:39.687128  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:45:39.687134  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:45:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:45:43.410599  543705 memory.go:191] Add success.
I0322 14:45:43.409795  543705 cpu.go:282] Add success.
I0322 14:45:43.420290  543705 net.go:648] Add success.
I0322 14:45:43.422923  543705 net.go:770] primary dev: ETH0
I0322 14:45:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:45:43.422953  543705 net.go:698] Add success.
I0322 14:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:45:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:45:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:45:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:45:53.409792  543705 memory.go:184] no items to output this cycle
I0322 14:45:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 14:46:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:46:03.409773  543705 memory.go:184] no items to output this cycle
I0322 14:46:03.409795  543705 cpu.go:275] no items to output this cycle
W0322 14:46:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:46:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:46:13.409746  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 14:46:13.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:46:13.409844  543705 cpu.go:282] Add success.
I0322 14:46:13.409859  543705 memory.go:191] Add success.
I0322 14:46:13.420161  543705 net.go:648] Add success.
I0322 14:46:13.422943  543705 net.go:770] primary dev: ETH0
I0322 14:46:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:46:13.422970  543705 net.go:698] Add success.
I0322 14:46:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:46:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:46:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 14:46:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:46:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 14:46:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:46:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:46:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:46:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:46:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:46:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:46:23.409794  543705 memory.go:184] no items to output this cycle
I0322 14:46:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 14:46:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:46:33.409787  543705 memory.go:184] no items to output this cycle
I0322 14:46:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 14:46:35.225672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:46:35.228302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:46:35.228308  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8080 0xc0004d80c0]
E0322 14:46:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:46:43.410871  543705 memory.go:191] Add success.
I0322 14:46:43.409799  543705 cpu.go:282] Add success.
I0322 14:46:43.420595  543705 net.go:648] Add success.
I0322 14:46:43.423020  543705 net.go:770] primary dev: ETH0
I0322 14:46:43.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:46:43.423044  543705 net.go:698] Add success.
I0322 14:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:46:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:46:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:46:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:46:53.409814  543705 memory.go:184] no items to output this cycle
I0322 14:46:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 14:47:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:47:03.409781  543705 memory.go:184] no items to output this cycle
I0322 14:47:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 14:47:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:47:13.409794  543705 cpu.go:282] Add success.
I0322 14:47:13.409803  543705 memory.go:191] Add success.
W0322 14:47:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:47:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:47:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:47:13.420140  543705 net.go:648] Add success.
I0322 14:47:13.422860  543705 net.go:770] primary dev: ETH0
I0322 14:47:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:47:13.422889  543705 net.go:698] Add success.
I0322 14:47:13.453439  543705 event_worker.go:152] Polling the log file for events...
W0322 14:47:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:47:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 14:47:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:47:14.455874  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:47:14.455883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:47:14.455888  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:47:14.456534  543705 disk_worker.go:494] system disk:vda1
I0322 14:47:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:47:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:47:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:47:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:47:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:47:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:47:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:47:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:47:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:47:23.409762  543705 memory.go:184] no items to output this cycle
I0322 14:47:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 14:47:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:47:33.409810  543705 memory.go:184] no items to output this cycle
I0322 14:47:33.409827  543705 cpu.go:275] no items to output this cycle
I0322 14:47:35.229676  543705 disk_info.go:125] begin check local disk info of client
I0322 14:47:35.232212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:47:35.232218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304440 0xc000304480]
E0322 14:47:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:47:43.410729  543705 memory.go:191] Add success.
I0322 14:47:43.409806  543705 cpu.go:282] Add success.
I0322 14:47:43.420407  543705 net.go:648] Add success.
I0322 14:47:43.423069  543705 net.go:770] primary dev: ETH0
I0322 14:47:43.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:47:43.423094  543705 net.go:698] Add success.
I0322 14:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:47:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:47:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:47:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:47:53.409855  543705 memory.go:184] no items to output this cycle
I0322 14:47:53.409925  543705 cpu.go:275] no items to output this cycle
E0322 14:48:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:48:03.409777  543705 memory.go:184] no items to output this cycle
I0322 14:48:03.409784  543705 cpu.go:275] no items to output this cycle
W0322 14:48:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:48:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:48:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:48:13.409811  543705 cpu.go:282] Add success.
E0322 14:48:13.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:48:13.409831  543705 memory.go:191] Add success.
I0322 14:48:13.420285  543705 net.go:648] Add success.
I0322 14:48:13.423116  543705 net.go:770] primary dev: ETH0
I0322 14:48:13.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:48:13.423142  543705 net.go:698] Add success.
I0322 14:48:13.469701  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3cc7640f-8710-436f-abc3-9436d6aeed42","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:48:13.469733  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:48:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:48:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 14:48:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:48:14.456648  543705 disk_worker.go:494] system disk:vda1
I0322 14:48:14.456677  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:48:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:48:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:48:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:48:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:48:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:48:23.409769  543705 memory.go:184] no items to output this cycle
I0322 14:48:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 14:48:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:48:33.409786  543705 memory.go:184] no items to output this cycle
I0322 14:48:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 14:48:35.233677  543705 disk_info.go:125] begin check local disk info of client
I0322 14:48:35.236225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:48:35.236231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003afa00 0xc0003afa40]
I0322 14:48:39.688134  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:48:39.688140  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:48:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:48:43.410742  543705 memory.go:191] Add success.
I0322 14:48:43.409815  543705 cpu.go:282] Add success.
I0322 14:48:43.420440  543705 net.go:648] Add success.
I0322 14:48:43.423154  543705 net.go:770] primary dev: ETH0
I0322 14:48:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:48:43.423380  543705 net.go:698] Add success.
I0322 14:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:48:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:48:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:48:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:48:53.409774  543705 memory.go:184] no items to output this cycle
I0322 14:48:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 14:49:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:49:03.409780  543705 cpu.go:275] no items to output this cycle
I0322 14:49:03.409792  543705 memory.go:184] no items to output this cycle
E0322 14:49:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:49:13.409804  543705 memory.go:191] Add success.
I0322 14:49:13.409806  543705 cpu.go:282] Add success.
W0322 14:49:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:49:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:49:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:49:13.420149  543705 net.go:648] Add success.
I0322 14:49:13.423329  543705 net.go:770] primary dev: ETH0
I0322 14:49:13.423342  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:49:13.423354  543705 net.go:698] Add success.
I0322 14:49:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:49:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:49:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 14:49:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:49:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 14:49:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:49:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:49:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:49:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:49:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:49:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:49:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:49:23.409770  543705 memory.go:184] no items to output this cycle
I0322 14:49:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 14:49:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:49:33.409809  543705 memory.go:184] no items to output this cycle
I0322 14:49:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 14:49:35.237673  543705 disk_info.go:125] begin check local disk info of client
I0322 14:49:35.240219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:49:35.240225  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312e80 0xc000312ec0]
E0322 14:49:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:49:43.410609  543705 memory.go:191] Add success.
I0322 14:49:43.409803  543705 cpu.go:282] Add success.
I0322 14:49:43.419739  543705 net.go:648] Add success.
I0322 14:49:43.422399  543705 net.go:770] primary dev: ETH0
I0322 14:49:43.422412  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:49:43.422424  543705 net.go:698] Add success.
I0322 14:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:49:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:49:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:49:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:49:53.409780  543705 memory.go:184] no items to output this cycle
I0322 14:49:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 14:50:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:50:03.409799  543705 memory.go:184] no items to output this cycle
I0322 14:50:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 14:50:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:50:13.409822  543705 memory.go:191] Add success.
I0322 14:50:13.409825  543705 cpu.go:282] Add success.
W0322 14:50:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:50:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:50:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:50:13.420267  543705 net.go:648] Add success.
I0322 14:50:13.422970  543705 net.go:770] primary dev: ETH0
I0322 14:50:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:50:13.422995  543705 net.go:698] Add success.
I0322 14:50:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:50:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:50:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 14:50:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:50:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 14:50:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:50:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:50:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:50:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:50:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:50:23.409773  543705 memory.go:184] no items to output this cycle
I0322 14:50:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 14:50:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:50:33.409778  543705 memory.go:184] no items to output this cycle
I0322 14:50:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 14:50:35.241675  543705 disk_info.go:125] begin check local disk info of client
I0322 14:50:35.244260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:50:35.244266  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353e80 0xc000353ec0]
E0322 14:50:43.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:50:43.410776  543705 memory.go:191] Add success.
I0322 14:50:43.410008  543705 cpu.go:282] Add success.
I0322 14:50:43.419713  543705 net.go:648] Add success.
I0322 14:50:43.422503  543705 net.go:770] primary dev: ETH0
I0322 14:50:43.422516  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:50:43.422528  543705 net.go:698] Add success.
I0322 14:50:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:50:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:50:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:50:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:50:53.409784  543705 memory.go:184] no items to output this cycle
I0322 14:50:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 14:51:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:51:03.409796  543705 memory.go:184] no items to output this cycle
I0322 14:51:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 14:51:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:51:13.409794  543705 memory.go:191] Add success.
I0322 14:51:13.409799  543705 cpu.go:282] Add success.
W0322 14:51:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:51:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:51:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:51:13.420114  543705 net.go:648] Add success.
I0322 14:51:13.422771  543705 net.go:770] primary dev: ETH0
I0322 14:51:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:51:13.422801  543705 net.go:698] Add success.
I0322 14:51:13.880305  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"486e204f-5630-4141-87a8-c499956fd7c4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:51:13.880340  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:51:14.454683  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:51:14.454892  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:51:14.454905  543705 disk_worker.go:708] disk space is not compliant
W0322 14:51:14.454907  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:51:14.456250  543705 disk_worker.go:494] system disk:vda1
I0322 14:51:14.456297  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:51:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:51:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:51:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:51:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:51:23.409799  543705 memory.go:184] no items to output this cycle
I0322 14:51:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 14:51:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:51:33.409794  543705 memory.go:184] no items to output this cycle
I0322 14:51:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 14:51:35.245669  543705 disk_info.go:125] begin check local disk info of client
I0322 14:51:35.248274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:51:35.248280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002576c0 0xc000257700]
I0322 14:51:39.689135  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:51:39.689141  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:51:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:51:43.409805  543705 cpu.go:282] Add success.
I0322 14:51:43.410863  543705 memory.go:191] Add success.
I0322 14:51:43.419707  543705 net.go:648] Add success.
I0322 14:51:43.422579  543705 net.go:770] primary dev: ETH0
I0322 14:51:43.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:51:43.422604  543705 net.go:698] Add success.
I0322 14:51:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:51:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:51:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:51:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:51:53.409779  543705 memory.go:184] no items to output this cycle
I0322 14:51:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 14:52:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:52:03.409794  543705 memory.go:184] no items to output this cycle
I0322 14:52:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 14:52:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:52:13.409799  543705 memory.go:191] Add success.
I0322 14:52:13.409817  543705 cpu.go:282] Add success.
W0322 14:52:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:52:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:52:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:52:13.420496  543705 net.go:648] Add success.
I0322 14:52:13.423410  543705 net.go:770] primary dev: ETH0
I0322 14:52:13.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:52:13.423435  543705 net.go:698] Add success.
W0322 14:52:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:52:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 14:52:14.455204  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:52:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:52:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:52:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:52:14.456659  543705 disk_worker.go:494] system disk:vda1
I0322 14:52:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:52:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:52:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:52:16.457897  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 14:52:16.457897  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:52:16.457950  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:52:16.457970  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:52:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:52:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:52:23.409793  543705 memory.go:184] no items to output this cycle
I0322 14:52:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 14:52:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:52:33.409785  543705 memory.go:184] no items to output this cycle
I0322 14:52:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 14:52:35.249678  543705 disk_info.go:125] begin check local disk info of client
I0322 14:52:35.252212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:52:35.252217  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c180 0xc00039c1c0]
E0322 14:52:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:52:43.410790  543705 memory.go:191] Add success.
I0322 14:52:43.409810  543705 cpu.go:282] Add success.
I0322 14:52:43.420586  543705 net.go:648] Add success.
I0322 14:52:43.423437  543705 net.go:770] primary dev: ETH0
I0322 14:52:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:52:43.423466  543705 net.go:698] Add success.
I0322 14:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:52:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:52:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:52:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:52:53.409780  543705 memory.go:184] no items to output this cycle
I0322 14:52:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 14:53:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:53:03.409775  543705 memory.go:184] no items to output this cycle
I0322 14:53:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 14:53:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:53:13.409817  543705 memory.go:191] Add success.
I0322 14:53:13.409820  543705 cpu.go:282] Add success.
W0322 14:53:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:53:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:53:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:53:13.420275  543705 net.go:648] Add success.
I0322 14:53:13.423843  543705 net.go:770] primary dev: ETH0
I0322 14:53:13.423857  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:53:13.423870  543705 net.go:698] Add success.
I0322 14:53:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:53:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:53:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 14:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:53:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 14:53:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:53:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:53:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:53:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:53:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:53:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:53:23.409780  543705 memory.go:184] no items to output this cycle
I0322 14:53:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 14:53:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:53:33.409803  543705 memory.go:184] no items to output this cycle
I0322 14:53:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 14:53:35.253670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:53:35.256227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:53:35.256233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba800 0xc0003ba840]
E0322 14:53:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:53:43.410834  543705 memory.go:191] Add success.
I0322 14:53:43.409817  543705 cpu.go:282] Add success.
I0322 14:53:43.420529  543705 net.go:648] Add success.
I0322 14:53:43.423344  543705 net.go:770] primary dev: ETH0
I0322 14:53:43.423357  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:53:43.423368  543705 net.go:698] Add success.
I0322 14:53:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:53:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:53:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:53:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:53:53.409780  543705 memory.go:184] no items to output this cycle
I0322 14:53:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 14:54:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:54:03.409764  543705 memory.go:184] no items to output this cycle
I0322 14:54:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 14:54:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:54:13.409807  543705 cpu.go:282] Add success.
I0322 14:54:13.409812  543705 memory.go:191] Add success.
W0322 14:54:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:54:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:54:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:54:13.420118  543705 net.go:648] Add success.
I0322 14:54:13.423227  543705 net.go:770] primary dev: ETH0
I0322 14:54:13.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:54:13.423252  543705 net.go:698] Add success.
I0322 14:54:13.463593  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"42a27e91-f374-4fbc-83d0-91768849cd99","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:54:13.463631  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 14:54:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:54:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:54:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 14:54:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:54:14.456717  543705 disk_worker.go:494] system disk:vda1
I0322 14:54:14.456748  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:54:15.455612  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:54:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:54:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:54:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:54:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:54:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:54:23.409803  543705 memory.go:184] no items to output this cycle
I0322 14:54:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 14:54:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:54:33.409805  543705 memory.go:184] no items to output this cycle
I0322 14:54:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 14:54:35.257674  543705 disk_info.go:125] begin check local disk info of client
I0322 14:54:35.260272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:54:35.260278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3b00 0xc0002a3b40]
I0322 14:54:39.689732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:54:39.689740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:54:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:54:43.410604  543705 memory.go:191] Add success.
I0322 14:54:43.409810  543705 cpu.go:282] Add success.
I0322 14:54:43.420296  543705 net.go:648] Add success.
I0322 14:54:43.423172  543705 net.go:770] primary dev: ETH0
I0322 14:54:43.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:54:43.423204  543705 net.go:698] Add success.
I0322 14:54:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:54:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:54:53.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:54:53.409902  543705 memory.go:184] no items to output this cycle
I0322 14:54:53.409922  543705 cpu.go:275] no items to output this cycle
E0322 14:55:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:55:03.409774  543705 memory.go:184] no items to output this cycle
I0322 14:55:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 14:55:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:55:13.409808  543705 memory.go:191] Add success.
I0322 14:55:13.409819  543705 cpu.go:282] Add success.
W0322 14:55:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:55:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:55:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:55:13.420142  543705 net.go:648] Add success.
I0322 14:55:13.423069  543705 net.go:770] primary dev: ETH0
I0322 14:55:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:55:13.423099  543705 net.go:698] Add success.
I0322 14:55:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:55:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:55:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 14:55:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:55:14.456624  543705 disk_worker.go:494] system disk:vda1
I0322 14:55:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:55:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:55:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:55:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:55:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:55:16.472501  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:55:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:55:23.409803  543705 memory.go:184] no items to output this cycle
I0322 14:55:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 14:55:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:55:33.409807  543705 memory.go:184] no items to output this cycle
I0322 14:55:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 14:55:35.261677  543705 disk_info.go:125] begin check local disk info of client
I0322 14:55:35.264235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:55:35.264241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4340 0xc0000c4380]
E0322 14:55:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:55:43.410790  543705 memory.go:191] Add success.
I0322 14:55:43.409791  543705 cpu.go:282] Add success.
I0322 14:55:43.420553  543705 net.go:648] Add success.
I0322 14:55:43.423512  543705 net.go:770] primary dev: ETH0
I0322 14:55:43.423524  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:55:43.423538  543705 net.go:698] Add success.
I0322 14:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:55:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:55:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:55:53.410258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:55:53.410274  543705 memory.go:184] no items to output this cycle
I0322 14:55:53.410275  543705 cpu.go:275] no items to output this cycle
E0322 14:56:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:56:03.409874  543705 memory.go:184] no items to output this cycle
I0322 14:56:03.409921  543705 cpu.go:275] no items to output this cycle
E0322 14:56:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:56:13.409804  543705 memory.go:191] Add success.
I0322 14:56:13.409805  543705 cpu.go:282] Add success.
W0322 14:56:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:56:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:56:13.420339  543705 net.go:648] Add success.
I0322 14:56:13.422885  543705 net.go:770] primary dev: ETH0
I0322 14:56:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:56:13.422911  543705 net.go:698] Add success.
I0322 14:56:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:56:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:56:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 14:56:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:56:14.456512  543705 disk_worker.go:494] system disk:vda1
I0322 14:56:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:56:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:56:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:56:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:56:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:56:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:56:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:56:23.409777  543705 memory.go:184] no items to output this cycle
I0322 14:56:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 14:56:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:56:33.409808  543705 memory.go:184] no items to output this cycle
I0322 14:56:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 14:56:35.265672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:56:35.268288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:56:35.268294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d81c0 0xc0004d8200]
E0322 14:56:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:56:43.410896  543705 memory.go:191] Add success.
I0322 14:56:43.409829  543705 cpu.go:282] Add success.
I0322 14:56:43.420635  543705 net.go:648] Add success.
I0322 14:56:43.423108  543705 net.go:770] primary dev: ETH0
I0322 14:56:43.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:56:43.423134  543705 net.go:698] Add success.
I0322 14:56:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:56:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:56:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:56:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:56:53.409766  543705 memory.go:184] no items to output this cycle
I0322 14:56:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 14:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:57:03.409775  543705 memory.go:184] no items to output this cycle
I0322 14:57:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 14:57:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:57:13.409787  543705 memory.go:191] Add success.
W0322 14:57:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 14:57:13.409813  543705 cpu.go:282] Add success.
W0322 14:57:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:57:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:57:13.420143  543705 net.go:648] Add success.
I0322 14:57:13.422919  543705 net.go:770] primary dev: ETH0
I0322 14:57:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:57:13.422947  543705 net.go:698] Add success.
I0322 14:57:13.428979  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 14:57:13.453156  543705 event_worker.go:152] Polling the log file for events...
I0322 14:57:13.485124  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1aca7a1a-e463-4018-a379-9e7919468912","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 14:57:13.485159  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 14:57:14.455237  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:57:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0322 14:57:14.455255  543705 disk_worker.go:728] disk inode is not compliant
E0322 14:57:14.455861  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 14:57:14.455869  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 14:57:14.455875  543705 custom_config.go:64] query custom config with name: gpu
I0322 14:57:14.456801  543705 disk_worker.go:494] system disk:vda1
I0322 14:57:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 14:57:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 14:57:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
E0322 14:57:16.457894  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 14:57:16.457896  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:57:16.457952  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:57:16.457973  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:57:16.472292  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:57:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:57:23.409776  543705 memory.go:184] no items to output this cycle
I0322 14:57:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 14:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:57:33.409778  543705 memory.go:184] no items to output this cycle
I0322 14:57:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 14:57:35.269671  543705 disk_info.go:125] begin check local disk info of client
I0322 14:57:35.272209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:57:35.272216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae40 0xc0001aae80]
I0322 14:57:39.691151  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 14:57:39.691157  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 14:57:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:57:43.410621  543705 memory.go:191] Add success.
I0322 14:57:43.409820  543705 cpu.go:282] Add success.
I0322 14:57:43.420291  543705 net.go:648] Add success.
I0322 14:57:43.422805  543705 net.go:770] primary dev: ETH0
I0322 14:57:43.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:57:43.422830  543705 net.go:698] Add success.
I0322 14:57:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:57:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:57:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:57:53.410398  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:57:53.410413  543705 cpu.go:275] no items to output this cycle
I0322 14:57:53.410417  543705 memory.go:184] no items to output this cycle
E0322 14:58:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:58:03.409797  543705 memory.go:184] no items to output this cycle
I0322 14:58:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 14:58:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:58:13.409797  543705 memory.go:191] Add success.
I0322 14:58:13.409816  543705 cpu.go:282] Add success.
W0322 14:58:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:58:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:58:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:58:13.420152  543705 net.go:648] Add success.
I0322 14:58:13.423139  543705 net.go:770] primary dev: ETH0
I0322 14:58:13.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:58:13.423172  543705 net.go:698] Add success.
I0322 14:58:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:58:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:58:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 14:58:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:58:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 14:58:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:58:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:58:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:58:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:58:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:58:23.409791  543705 memory.go:184] no items to output this cycle
I0322 14:58:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 14:58:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:58:33.409779  543705 memory.go:184] no items to output this cycle
I0322 14:58:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 14:58:35.273672  543705 disk_info.go:125] begin check local disk info of client
I0322 14:58:35.276224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:58:35.276230  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b2c0 0xc00007b300]
E0322 14:58:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:58:43.410617  543705 memory.go:191] Add success.
I0322 14:58:43.409803  543705 cpu.go:282] Add success.
I0322 14:58:43.420323  543705 net.go:648] Add success.
I0322 14:58:43.423084  543705 net.go:770] primary dev: ETH0
I0322 14:58:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:58:43.423111  543705 net.go:698] Add success.
I0322 14:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:58:53.410186  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:58:53.410200  543705 memory.go:184] no items to output this cycle
I0322 14:58:53.410231  543705 cpu.go:275] no items to output this cycle
E0322 14:59:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:59:03.409805  543705 memory.go:184] no items to output this cycle
I0322 14:59:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 14:59:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:59:13.409816  543705 memory.go:191] Add success.
I0322 14:59:13.409822  543705 cpu.go:282] Add success.
W0322 14:59:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 14:59:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 14:59:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 14:59:13.419733  543705 net.go:648] Add success.
I0322 14:59:13.422398  543705 net.go:770] primary dev: ETH0
I0322 14:59:13.422425  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:59:13.422438  543705 net.go:698] Add success.
I0322 14:59:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 14:59:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 14:59:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 14:59:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 14:59:14.456560  543705 disk_worker.go:494] system disk:vda1
I0322 14:59:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 14:59:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 14:59:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:59:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:59:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0322 14:59:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 14:59:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:59:23.409796  543705 memory.go:184] no items to output this cycle
I0322 14:59:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 14:59:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:59:33.409790  543705 memory.go:184] no items to output this cycle
I0322 14:59:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 14:59:35.277670  543705 disk_info.go:125] begin check local disk info of client
I0322 14:59:35.280256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 14:59:35.280262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0322 14:59:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:59:43.410761  543705 memory.go:191] Add success.
I0322 14:59:43.409821  543705 cpu.go:282] Add success.
I0322 14:59:43.420594  543705 net.go:648] Add success.
I0322 14:59:43.423483  543705 net.go:770] primary dev: ETH0
I0322 14:59:43.423503  543705 net.go:802] Send network stats successfully!,count is 6
I0322 14:59:43.423523  543705 net.go:698] Add success.
I0322 14:59:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 14:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 14:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 14:59:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 14:59:53.409765  543705 memory.go:184] no items to output this cycle
I0322 14:59:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:00:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:00:03.409776  543705 memory.go:184] no items to output this cycle
I0322 15:00:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 15:00:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:00:13.409808  543705 memory.go:191] Add success.
I0322 15:00:13.409808  543705 cpu.go:282] Add success.
W0322 15:00:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:00:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:00:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:00:13.420361  543705 net.go:648] Add success.
I0322 15:00:13.423164  543705 net.go:770] primary dev: ETH0
I0322 15:00:13.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:00:13.423188  543705 net.go:698] Add success.
I0322 15:00:13.468952  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c1436c0f-bcdf-45af-8472-fde18089466f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:00:13.468985  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:00:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:00:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 15:00:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:00:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 15:00:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:00:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:00:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:00:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:00:23.409771  543705 memory.go:184] no items to output this cycle
I0322 15:00:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 15:00:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:00:33.409774  543705 memory.go:184] no items to output this cycle
I0322 15:00:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 15:00:35.281680  543705 disk_info.go:125] begin check local disk info of client
I0322 15:00:35.284222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:00:35.284228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ff080 0xc0004ff0c0]
I0322 15:00:39.692153  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:00:39.692160  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:00:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:00:43.410770  543705 memory.go:191] Add success.
I0322 15:00:43.409804  543705 cpu.go:282] Add success.
I0322 15:00:43.420475  543705 net.go:648] Add success.
I0322 15:00:43.423219  543705 net.go:770] primary dev: ETH0
I0322 15:00:43.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:00:43.423245  543705 net.go:698] Add success.
I0322 15:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:00:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:00:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:00:53.409773  543705 cpu.go:275] no items to output this cycle
I0322 15:00:53.409780  543705 memory.go:184] no items to output this cycle
E0322 15:01:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:01:03.409771  543705 memory.go:184] no items to output this cycle
I0322 15:01:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 15:01:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:01:13.409779  543705 memory.go:191] Add success.
W0322 15:01:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 15:01:13.409811  543705 cpu.go:282] Add success.
W0322 15:01:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:01:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:01:13.420193  543705 net.go:648] Add success.
I0322 15:01:13.423031  543705 net.go:770] primary dev: ETH0
I0322 15:01:13.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:01:13.423055  543705 net.go:698] Add success.
I0322 15:01:14.454945  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:01:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:01:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 15:01:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:01:14.456577  543705 disk_worker.go:494] system disk:vda1
I0322 15:01:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:01:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:01:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:01:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:01:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:01:23.409762  543705 memory.go:184] no items to output this cycle
I0322 15:01:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 15:01:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:01:33.409788  543705 memory.go:184] no items to output this cycle
I0322 15:01:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 15:01:35.285673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:01:35.288183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:01:35.288189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ab80 0xc00039abc0]
E0322 15:01:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:01:43.410859  543705 memory.go:191] Add success.
I0322 15:01:43.409802  543705 cpu.go:282] Add success.
I0322 15:01:43.420570  543705 net.go:648] Add success.
I0322 15:01:43.423517  543705 net.go:770] primary dev: ETH0
I0322 15:01:43.423532  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:01:43.423544  543705 net.go:698] Add success.
I0322 15:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:01:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:01:53.410337  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:01:53.410352  543705 memory.go:184] no items to output this cycle
I0322 15:01:53.410353  543705 cpu.go:275] no items to output this cycle
E0322 15:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:02:03.409778  543705 memory.go:184] no items to output this cycle
I0322 15:02:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:02:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:02:13.409828  543705 memory.go:191] Add success.
I0322 15:02:13.409839  543705 cpu.go:282] Add success.
W0322 15:02:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:02:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:02:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:02:13.420349  543705 net.go:648] Add success.
I0322 15:02:13.423286  543705 net.go:770] primary dev: ETH0
I0322 15:02:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:02:13.423311  543705 net.go:698] Add success.
W0322 15:02:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:02:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 15:02:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:02:14.456824  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:02:14.456834  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:02:14.456839  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:02:14.456887  543705 disk_worker.go:494] system disk:vda1
I0322 15:02:14.456930  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:02:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:02:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:02:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:02:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:02:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:02:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:02:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:02:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:02:23.409772  543705 memory.go:184] no items to output this cycle
I0322 15:02:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 15:02:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:02:33.409773  543705 memory.go:184] no items to output this cycle
I0322 15:02:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 15:02:35.289675  543705 disk_info.go:125] begin check local disk info of client
I0322 15:02:35.292245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:02:35.292250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2180 0xc0003e21c0]
E0322 15:02:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:02:43.410670  543705 memory.go:191] Add success.
I0322 15:02:43.409834  543705 cpu.go:282] Add success.
I0322 15:02:43.420384  543705 net.go:648] Add success.
I0322 15:02:43.423038  543705 net.go:770] primary dev: ETH0
I0322 15:02:43.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:02:43.423065  543705 net.go:698] Add success.
I0322 15:02:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:02:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:02:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:02:53.409768  543705 memory.go:184] no items to output this cycle
I0322 15:02:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 15:03:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:03:03.409799  543705 memory.go:184] no items to output this cycle
I0322 15:03:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 15:03:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:03:13.409833  543705 memory.go:191] Add success.
I0322 15:03:13.409834  543705 cpu.go:282] Add success.
W0322 15:03:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:03:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:03:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:03:13.420190  543705 net.go:648] Add success.
I0322 15:03:13.422764  543705 net.go:770] primary dev: ETH0
I0322 15:03:13.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:03:13.422790  543705 net.go:698] Add success.
I0322 15:03:13.464339  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0b87e6b4-30af-4c23-9c5b-b2629104b6f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:03:13.464371  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:03:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:03:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:03:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 15:03:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:03:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 15:03:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:03:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:03:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:03:23.409802  543705 memory.go:184] no items to output this cycle
I0322 15:03:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 15:03:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:03:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 15:03:33.409801  543705 memory.go:184] no items to output this cycle
I0322 15:03:35.293675  543705 disk_info.go:125] begin check local disk info of client
I0322 15:03:35.296208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:03:35.296215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e3880 0xc0003e38c0]
I0322 15:03:39.693167  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:03:39.693174  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:03:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:03:43.410647  543705 memory.go:191] Add success.
I0322 15:03:43.409809  543705 cpu.go:282] Add success.
I0322 15:03:43.420359  543705 net.go:648] Add success.
I0322 15:03:43.422964  543705 net.go:770] primary dev: ETH0
I0322 15:03:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:03:43.422994  543705 net.go:698] Add success.
I0322 15:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:03:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:03:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:03:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:03:53.409800  543705 memory.go:184] no items to output this cycle
I0322 15:03:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 15:04:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:04:03.409772  543705 memory.go:184] no items to output this cycle
I0322 15:04:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 15:04:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:04:13.409840  543705 memory.go:191] Add success.
I0322 15:04:13.409847  543705 cpu.go:282] Add success.
W0322 15:04:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:04:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:04:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:04:13.420168  543705 net.go:648] Add success.
I0322 15:04:13.422855  543705 net.go:770] primary dev: ETH0
I0322 15:04:13.422871  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:04:13.422885  543705 net.go:698] Add success.
I0322 15:04:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:04:14.455323  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:04:14.455336  543705 disk_worker.go:708] disk space is not compliant
W0322 15:04:14.455340  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:04:14.457444  543705 disk_worker.go:494] system disk:vda1
I0322 15:04:14.457488  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:04:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:04:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:04:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:04:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:04:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:04:23.409804  543705 memory.go:184] no items to output this cycle
I0322 15:04:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 15:04:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:04:33.409790  543705 memory.go:184] no items to output this cycle
I0322 15:04:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 15:04:35.297675  543705 disk_info.go:125] begin check local disk info of client
I0322 15:04:35.300248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:04:35.300254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8440 0xc0004d8480]
E0322 15:04:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:04:43.410731  543705 memory.go:191] Add success.
I0322 15:04:43.409810  543705 cpu.go:282] Add success.
I0322 15:04:43.420424  543705 net.go:648] Add success.
I0322 15:04:43.423179  543705 net.go:770] primary dev: ETH0
I0322 15:04:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:04:43.423204  543705 net.go:698] Add success.
I0322 15:04:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:04:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:04:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:04:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:04:53.409769  543705 memory.go:184] no items to output this cycle
I0322 15:04:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 15:05:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:05:03.409799  543705 memory.go:184] no items to output this cycle
I0322 15:05:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 15:05:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:05:13.409778  543705 memory.go:191] Add success.
W0322 15:05:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 15:05:13.409806  543705 cpu.go:282] Add success.
W0322 15:05:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:05:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:05:13.420174  543705 net.go:648] Add success.
I0322 15:05:13.423137  543705 net.go:770] primary dev: ETH0
I0322 15:05:13.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:05:13.423173  543705 net.go:698] Add success.
I0322 15:05:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:05:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:05:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 15:05:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:05:14.456828  543705 disk_worker.go:494] system disk:vda1
I0322 15:05:14.456858  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:05:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:05:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:05:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:05:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:05:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:05:23.410396  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:05:23.410414  543705 memory.go:184] no items to output this cycle
I0322 15:05:23.410424  543705 cpu.go:275] no items to output this cycle
E0322 15:05:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:05:33.409806  543705 memory.go:184] no items to output this cycle
I0322 15:05:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 15:05:35.301679  543705 disk_info.go:125] begin check local disk info of client
I0322 15:05:35.304188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:05:35.304195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306080 0xc0003060c0]
E0322 15:05:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:05:43.410684  543705 memory.go:191] Add success.
I0322 15:05:43.409819  543705 cpu.go:282] Add success.
I0322 15:05:43.420421  543705 net.go:648] Add success.
I0322 15:05:43.423283  543705 net.go:770] primary dev: ETH0
I0322 15:05:43.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:05:43.423307  543705 net.go:698] Add success.
I0322 15:05:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:05:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:05:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:05:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:05:53.409776  543705 memory.go:184] no items to output this cycle
I0322 15:05:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 15:06:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:06:03.409771  543705 memory.go:184] no items to output this cycle
I0322 15:06:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 15:06:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:06:13.409830  543705 memory.go:191] Add success.
I0322 15:06:13.409834  543705 cpu.go:282] Add success.
W0322 15:06:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:06:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:06:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:06:13.420218  543705 net.go:648] Add success.
I0322 15:06:13.423129  543705 net.go:770] primary dev: ETH0
I0322 15:06:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:06:13.423155  543705 net.go:698] Add success.
I0322 15:06:13.469195  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8acdf5e6-87ee-4120-919e-45b59617cc08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:06:13.469229  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:06:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:06:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:06:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 15:06:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:06:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 15:06:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:06:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:06:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:06:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:06:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:06:23.409791  543705 memory.go:184] no items to output this cycle
I0322 15:06:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 15:06:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:06:33.409780  543705 memory.go:184] no items to output this cycle
I0322 15:06:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 15:06:35.305677  543705 disk_info.go:125] begin check local disk info of client
I0322 15:06:35.308207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:06:35.308212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002beac0 0xc0002beb00]
I0322 15:06:39.693733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:06:39.693739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:06:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:06:43.410750  543705 memory.go:191] Add success.
I0322 15:06:43.409795  543705 cpu.go:282] Add success.
I0322 15:06:43.420456  543705 net.go:648] Add success.
I0322 15:06:43.423246  543705 net.go:770] primary dev: ETH0
I0322 15:06:43.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:06:43.423273  543705 net.go:698] Add success.
I0322 15:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:06:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:06:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:06:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:06:53.409797  543705 memory.go:184] no items to output this cycle
I0322 15:06:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 15:07:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:07:03.409778  543705 memory.go:184] no items to output this cycle
I0322 15:07:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:07:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:07:13.409790  543705 memory.go:191] Add success.
I0322 15:07:13.409793  543705 cpu.go:282] Add success.
W0322 15:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:07:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:07:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:07:13.420418  543705 net.go:648] Add success.
I0322 15:07:13.423293  543705 net.go:770] primary dev: ETH0
I0322 15:07:13.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:07:13.423321  543705 net.go:698] Add success.
I0322 15:07:13.452817  543705 event_worker.go:152] Polling the log file for events...
W0322 15:07:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:07:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 15:07:14.455196  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:07:14.455873  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:07:14.455881  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:07:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:07:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 15:07:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:07:15.456267  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:07:15.456276  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:07:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:07:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:07:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:07:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:07:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:07:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:07:23.409775  543705 memory.go:184] no items to output this cycle
I0322 15:07:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 15:07:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:07:33.409813  543705 memory.go:184] no items to output this cycle
I0322 15:07:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 15:07:35.309673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:07:35.312202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:07:35.312208  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2480 0xc0003f24c0]
E0322 15:07:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:07:43.410767  543705 memory.go:191] Add success.
I0322 15:07:43.409795  543705 cpu.go:282] Add success.
I0322 15:07:43.420539  543705 net.go:648] Add success.
I0322 15:07:43.423117  543705 net.go:770] primary dev: ETH0
I0322 15:07:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:07:43.423145  543705 net.go:698] Add success.
I0322 15:07:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:07:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:07:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:07:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:07:53.409784  543705 memory.go:184] no items to output this cycle
I0322 15:07:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 15:08:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:08:03.409767  543705 memory.go:184] no items to output this cycle
I0322 15:08:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 15:08:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:08:13.409811  543705 memory.go:191] Add success.
I0322 15:08:13.409812  543705 cpu.go:282] Add success.
W0322 15:08:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:08:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:08:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:08:13.420151  543705 net.go:648] Add success.
I0322 15:08:13.423167  543705 net.go:770] primary dev: ETH0
I0322 15:08:13.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:08:13.423192  543705 net.go:698] Add success.
I0322 15:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:08:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:08:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 15:08:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:08:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 15:08:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:08:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:08:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:08:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:08:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:08:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:08:23.409766  543705 memory.go:184] no items to output this cycle
I0322 15:08:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 15:08:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:08:33.409787  543705 memory.go:184] no items to output this cycle
I0322 15:08:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 15:08:35.313674  543705 disk_info.go:125] begin check local disk info of client
I0322 15:08:35.316167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:08:35.316173  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315300 0xc000315340]
E0322 15:08:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:08:43.410562  543705 memory.go:191] Add success.
I0322 15:08:43.409802  543705 cpu.go:282] Add success.
I0322 15:08:43.420263  543705 net.go:648] Add success.
I0322 15:08:43.423056  543705 net.go:770] primary dev: ETH0
I0322 15:08:43.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:08:43.423090  543705 net.go:698] Add success.
I0322 15:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:08:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:08:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:08:53.409763  543705 memory.go:184] no items to output this cycle
I0322 15:08:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 15:09:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:09:03.409783  543705 memory.go:184] no items to output this cycle
I0322 15:09:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 15:09:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:09:13.409781  543705 memory.go:191] Add success.
W0322 15:09:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 15:09:13.409814  543705 cpu.go:282] Add success.
W0322 15:09:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:09:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:09:13.420127  543705 net.go:648] Add success.
I0322 15:09:13.422758  543705 net.go:770] primary dev: ETH0
I0322 15:09:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:09:13.422784  543705 net.go:698] Add success.
I0322 15:09:13.620227  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8647dd8b-3b93-42e1-ad61-4db6febf94d1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:09:13.620265  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:09:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:09:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:09:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 15:09:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:09:14.456680  543705 disk_worker.go:494] system disk:vda1
I0322 15:09:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:09:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:09:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:09:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:09:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:09:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:09:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:09:23.409770  543705 memory.go:184] no items to output this cycle
I0322 15:09:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 15:09:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:09:33.409775  543705 memory.go:184] no items to output this cycle
I0322 15:09:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 15:09:35.317675  543705 disk_info.go:125] begin check local disk info of client
I0322 15:09:35.320160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:09:35.320166  543705 disk_info.go:196] parse disk info done, disk is : [0xc000590300 0xc000590340]
I0322 15:09:39.695176  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:09:39.695184  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:09:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:09:43.410751  543705 memory.go:191] Add success.
I0322 15:09:43.409809  543705 cpu.go:282] Add success.
I0322 15:09:43.420474  543705 net.go:648] Add success.
I0322 15:09:43.423388  543705 net.go:770] primary dev: ETH0
I0322 15:09:43.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:09:43.423417  543705 net.go:698] Add success.
I0322 15:09:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:09:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:09:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:09:53.409767  543705 memory.go:184] no items to output this cycle
I0322 15:09:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 15:10:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:10:03.409792  543705 memory.go:184] no items to output this cycle
I0322 15:10:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 15:10:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:10:13.409790  543705 memory.go:191] Add success.
W0322 15:10:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 15:10:13.409824  543705 cpu.go:282] Add success.
W0322 15:10:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:10:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:10:13.420213  543705 net.go:648] Add success.
I0322 15:10:13.423222  543705 net.go:770] primary dev: ETH0
I0322 15:10:13.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:10:13.423247  543705 net.go:698] Add success.
I0322 15:10:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:10:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:10:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 15:10:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:10:14.456533  543705 disk_worker.go:494] system disk:vda1
I0322 15:10:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:10:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:10:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:10:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:10:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:10:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:10:23.409762  543705 memory.go:184] no items to output this cycle
I0322 15:10:23.409885  543705 cpu.go:275] no items to output this cycle
E0322 15:10:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:10:33.409812  543705 memory.go:184] no items to output this cycle
I0322 15:10:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 15:10:35.321679  543705 disk_info.go:125] begin check local disk info of client
I0322 15:10:35.324270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:10:35.324276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0000 0xc0004a0200]
E0322 15:10:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:10:43.410700  543705 memory.go:191] Add success.
I0322 15:10:43.409820  543705 cpu.go:282] Add success.
I0322 15:10:43.420410  543705 net.go:648] Add success.
I0322 15:10:43.423039  543705 net.go:770] primary dev: ETH0
I0322 15:10:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:10:43.423068  543705 net.go:698] Add success.
I0322 15:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:10:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:10:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:10:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:10:53.409775  543705 memory.go:184] no items to output this cycle
I0322 15:10:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:11:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:11:03.409797  543705 memory.go:184] no items to output this cycle
I0322 15:11:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 15:11:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:11:13.409821  543705 memory.go:191] Add success.
I0322 15:11:13.409826  543705 cpu.go:282] Add success.
W0322 15:11:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:11:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:11:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:11:13.420129  543705 net.go:648] Add success.
I0322 15:11:13.422873  543705 net.go:770] primary dev: ETH0
I0322 15:11:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:11:13.422898  543705 net.go:698] Add success.
I0322 15:11:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:11:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:11:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 15:11:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:11:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 15:11:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:11:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:11:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:11:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:11:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:11:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:11:23.409766  543705 memory.go:184] no items to output this cycle
I0322 15:11:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 15:11:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:11:33.409776  543705 memory.go:184] no items to output this cycle
I0322 15:11:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 15:11:35.325672  543705 disk_info.go:125] begin check local disk info of client
I0322 15:11:35.328297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:11:35.328303  543705 disk_info.go:196] parse disk info done, disk is : [0xc000590000 0xc000590040]
E0322 15:11:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:11:43.410619  543705 memory.go:191] Add success.
I0322 15:11:43.409811  543705 cpu.go:282] Add success.
I0322 15:11:43.420329  543705 net.go:648] Add success.
I0322 15:11:43.422849  543705 net.go:770] primary dev: ETH0
I0322 15:11:43.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:11:43.422875  543705 net.go:698] Add success.
I0322 15:11:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:11:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:11:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:11:53.409779  543705 memory.go:184] no items to output this cycle
I0322 15:11:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:12:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:12:03.409777  543705 memory.go:184] no items to output this cycle
I0322 15:12:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 15:12:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:12:13.409803  543705 memory.go:191] Add success.
I0322 15:12:13.409806  543705 cpu.go:282] Add success.
W0322 15:12:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:12:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:12:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:12:13.420115  543705 net.go:648] Add success.
I0322 15:12:13.422986  543705 net.go:770] primary dev: ETH0
I0322 15:12:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:12:13.423011  543705 net.go:698] Add success.
I0322 15:12:13.465333  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf62deee-2e5d-47ae-b24c-cc164269b8f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:12:13.465377  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 15:12:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:12:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 15:12:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:12:14.455934  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:12:14.455943  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:12:14.455948  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:12:14.456689  543705 disk_worker.go:494] system disk:vda1
I0322 15:12:14.456723  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:12:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:12:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:12:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:12:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:12:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:12:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:12:16.472327  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:12:23.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:12:23.409756  543705 memory.go:184] no items to output this cycle
I0322 15:12:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 15:12:33.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:12:33.409896  543705 memory.go:184] no items to output this cycle
I0322 15:12:33.410012  543705 cpu.go:275] no items to output this cycle
I0322 15:12:35.329681  543705 disk_info.go:125] begin check local disk info of client
I0322 15:12:35.332110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:12:35.332116  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0322 15:12:39.695417  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:12:39.695423  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:12:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:12:43.410649  543705 memory.go:191] Add success.
I0322 15:12:43.409794  543705 cpu.go:282] Add success.
I0322 15:12:43.420344  543705 net.go:648] Add success.
I0322 15:12:43.423203  543705 net.go:770] primary dev: ETH0
I0322 15:12:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:12:43.423233  543705 net.go:698] Add success.
I0322 15:12:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:12:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:12:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:12:53.409793  543705 memory.go:184] no items to output this cycle
I0322 15:12:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 15:13:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:13:03.409773  543705 memory.go:184] no items to output this cycle
I0322 15:13:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 15:13:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:13:13.409794  543705 memory.go:191] Add success.
I0322 15:13:13.409796  543705 cpu.go:282] Add success.
W0322 15:13:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:13:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:13:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:13:13.420278  543705 net.go:648] Add success.
I0322 15:13:13.423239  543705 net.go:770] primary dev: ETH0
I0322 15:13:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:13:13.423265  543705 net.go:698] Add success.
I0322 15:13:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:13:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:13:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 15:13:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:13:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 15:13:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:13:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:13:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:13:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:13:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:13:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:13:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:13:23.409776  543705 memory.go:184] no items to output this cycle
I0322 15:13:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:13:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:13:33.409787  543705 memory.go:184] no items to output this cycle
I0322 15:13:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 15:13:35.333673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:13:35.336212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:13:35.336218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005593c0 0xc000559400]
E0322 15:13:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:13:43.410743  543705 memory.go:191] Add success.
I0322 15:13:43.409812  543705 cpu.go:282] Add success.
I0322 15:13:43.420436  543705 net.go:648] Add success.
I0322 15:13:43.423281  543705 net.go:770] primary dev: ETH0
I0322 15:13:43.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:13:43.423331  543705 net.go:698] Add success.
I0322 15:13:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:13:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:13:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:13:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:13:53.409762  543705 memory.go:184] no items to output this cycle
I0322 15:13:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 15:14:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:14:03.409769  543705 memory.go:184] no items to output this cycle
I0322 15:14:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 15:14:13.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:14:13.409835  543705 memory.go:191] Add success.
I0322 15:14:13.409858  543705 cpu.go:282] Add success.
W0322 15:14:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:14:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:14:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:14:13.420272  543705 net.go:648] Add success.
I0322 15:14:13.423201  543705 net.go:770] primary dev: ETH0
I0322 15:14:13.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:14:13.423231  543705 net.go:698] Add success.
I0322 15:14:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:14:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:14:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 15:14:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:14:14.456525  543705 disk_worker.go:494] system disk:vda1
I0322 15:14:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:14:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:14:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:14:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:14:16.472359  543705 disk_local_worker.go:436] Get disk info: []
I0322 15:14:23.409776  543705 cpu.go:275] no items to output this cycle
E0322 15:14:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:14:23.409799  543705 memory.go:184] no items to output this cycle
E0322 15:14:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:14:33.409776  543705 memory.go:184] no items to output this cycle
I0322 15:14:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 15:14:35.337675  543705 disk_info.go:125] begin check local disk info of client
I0322 15:14:35.340088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:14:35.340093  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025e000 0xc00025e040]
E0322 15:14:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:14:43.410629  543705 memory.go:191] Add success.
I0322 15:14:43.409823  543705 cpu.go:282] Add success.
I0322 15:14:43.420334  543705 net.go:648] Add success.
I0322 15:14:43.422912  543705 net.go:770] primary dev: ETH0
I0322 15:14:43.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:14:43.422938  543705 net.go:698] Add success.
I0322 15:14:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:14:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:14:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:14:53.409761  543705 memory.go:184] no items to output this cycle
I0322 15:14:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 15:15:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:15:03.409765  543705 memory.go:184] no items to output this cycle
I0322 15:15:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 15:15:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:15:13.409811  543705 memory.go:191] Add success.
I0322 15:15:13.409821  543705 cpu.go:282] Add success.
W0322 15:15:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:15:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:15:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:15:13.420188  543705 net.go:648] Add success.
I0322 15:15:13.423097  543705 net.go:770] primary dev: ETH0
I0322 15:15:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:15:13.423131  543705 net.go:698] Add success.
I0322 15:15:13.464406  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e1dbe66d-1b0f-4017-a3fc-c7dbc16eeb00","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:15:13.464441  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:15:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:15:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:15:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 15:15:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:15:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 15:15:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:15:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:15:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:15:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:15:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:15:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:15:23.409765  543705 memory.go:184] no items to output this cycle
I0322 15:15:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 15:15:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:15:33.409922  543705 cpu.go:275] no items to output this cycle
I0322 15:15:33.409928  543705 memory.go:184] no items to output this cycle
I0322 15:15:35.341672  543705 disk_info.go:125] begin check local disk info of client
I0322 15:15:35.344204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:15:35.344218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ad6c0 0xc0004ad700]
I0322 15:15:39.696176  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:15:39.696182  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:15:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:15:43.410629  543705 memory.go:191] Add success.
I0322 15:15:43.409791  543705 cpu.go:282] Add success.
I0322 15:15:43.420323  543705 net.go:648] Add success.
I0322 15:15:43.423021  543705 net.go:770] primary dev: ETH0
I0322 15:15:43.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:15:43.423047  543705 net.go:698] Add success.
I0322 15:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:15:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:15:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:15:53.409777  543705 memory.go:184] no items to output this cycle
I0322 15:15:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:16:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:16:03.409795  543705 memory.go:184] no items to output this cycle
I0322 15:16:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 15:16:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:16:13.409823  543705 memory.go:191] Add success.
I0322 15:16:13.409828  543705 cpu.go:282] Add success.
W0322 15:16:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:16:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:16:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:16:13.420411  543705 net.go:648] Add success.
I0322 15:16:13.423323  543705 net.go:770] primary dev: ETH0
I0322 15:16:13.423342  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:16:13.423358  543705 net.go:698] Add success.
I0322 15:16:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:16:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:16:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0322 15:16:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:16:14.456652  543705 disk_worker.go:494] system disk:vda1
I0322 15:16:14.456687  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:16:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:16:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:16:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:16:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:16:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:16:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:16:23.409770  543705 memory.go:184] no items to output this cycle
I0322 15:16:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 15:16:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:16:33.409789  543705 memory.go:184] no items to output this cycle
I0322 15:16:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 15:16:35.345673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:16:35.348230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:16:35.348236  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499d80 0xc000499dc0]
E0322 15:16:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:16:43.410589  543705 memory.go:191] Add success.
I0322 15:16:43.409807  543705 cpu.go:282] Add success.
I0322 15:16:43.420296  543705 net.go:648] Add success.
I0322 15:16:43.422946  543705 net.go:770] primary dev: ETH0
I0322 15:16:43.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:16:43.422972  543705 net.go:698] Add success.
I0322 15:16:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:16:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:16:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:16:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:16:53.409777  543705 memory.go:184] no items to output this cycle
I0322 15:16:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:17:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:17:03.409778  543705 memory.go:184] no items to output this cycle
I0322 15:17:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:17:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:17:13.409784  543705 memory.go:191] Add success.
I0322 15:17:13.409802  543705 cpu.go:282] Add success.
W0322 15:17:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:17:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:17:13.420065  543705 net.go:648] Add success.
I0322 15:17:13.422782  543705 net.go:770] primary dev: ETH0
I0322 15:17:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:17:13.422809  543705 net.go:698] Add success.
I0322 15:17:13.453560  543705 event_worker.go:152] Polling the log file for events...
W0322 15:17:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:17:14.455248  543705 disk_worker.go:708] disk space is not compliant
W0322 15:17:14.455253  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:17:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:17:14.455917  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:17:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:17:14.456840  543705 disk_worker.go:494] system disk:vda1
I0322 15:17:14.456871  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:17:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:17:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:17:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:17:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:17:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:17:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:17:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:17:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:17:23.409787  543705 memory.go:184] no items to output this cycle
I0322 15:17:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:17:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:17:33.409785  543705 memory.go:184] no items to output this cycle
I0322 15:17:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 15:17:35.349670  543705 disk_info.go:125] begin check local disk info of client
I0322 15:17:35.352295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:17:35.352301  543705 disk_info.go:196] parse disk info done, disk is : [0xc000466dc0 0xc000466e00]
E0322 15:17:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:17:43.410834  543705 memory.go:191] Add success.
I0322 15:17:43.409987  543705 cpu.go:282] Add success.
I0322 15:17:43.419730  543705 net.go:648] Add success.
I0322 15:17:43.422386  543705 net.go:770] primary dev: ETH0
I0322 15:17:43.422401  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:17:43.422414  543705 net.go:698] Add success.
I0322 15:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:17:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:17:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:17:53.409791  543705 memory.go:184] no items to output this cycle
I0322 15:17:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 15:18:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:18:03.409767  543705 memory.go:184] no items to output this cycle
I0322 15:18:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 15:18:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:18:13.409780  543705 memory.go:191] Add success.
W0322 15:18:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:18:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:18:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:18:13.409856  543705 cpu.go:282] Add success.
I0322 15:18:13.420319  543705 net.go:648] Add success.
I0322 15:18:13.423219  543705 net.go:770] primary dev: ETH0
I0322 15:18:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:18:13.423244  543705 net.go:698] Add success.
I0322 15:18:13.468178  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a7c4880d-2444-40d1-b191-ca55e96b6318","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:18:13.468215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:18:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:18:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:18:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 15:18:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:18:14.456623  543705 disk_worker.go:494] system disk:vda1
I0322 15:18:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:18:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:18:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:18:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:18:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:18:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:18:23.409774  543705 memory.go:184] no items to output this cycle
I0322 15:18:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 15:18:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:18:33.409808  543705 memory.go:184] no items to output this cycle
I0322 15:18:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 15:18:35.353670  543705 disk_info.go:125] begin check local disk info of client
I0322 15:18:35.356287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:18:35.356294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003adac0 0xc0003adb00]
I0322 15:18:39.697174  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:18:39.697180  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:18:43.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:18:43.410767  543705 memory.go:191] Add success.
I0322 15:18:43.409950  543705 cpu.go:282] Add success.
I0322 15:18:43.419741  543705 net.go:648] Add success.
I0322 15:18:43.422540  543705 net.go:770] primary dev: ETH0
I0322 15:18:43.422555  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:18:43.422568  543705 net.go:698] Add success.
I0322 15:18:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:18:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:18:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:18:53.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:18:53.410274  543705 memory.go:184] no items to output this cycle
I0322 15:18:53.410282  543705 cpu.go:275] no items to output this cycle
E0322 15:19:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:19:03.409768  543705 memory.go:184] no items to output this cycle
I0322 15:19:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 15:19:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:19:13.409812  543705 memory.go:191] Add success.
I0322 15:19:13.409823  543705 cpu.go:282] Add success.
W0322 15:19:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:19:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:19:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:19:13.420163  543705 net.go:648] Add success.
I0322 15:19:13.422734  543705 net.go:770] primary dev: ETH0
I0322 15:19:13.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:19:13.422762  543705 net.go:698] Add success.
I0322 15:19:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:19:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:19:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 15:19:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:19:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 15:19:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:19:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:19:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:19:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:19:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:19:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:19:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:19:23.409804  543705 memory.go:184] no items to output this cycle
I0322 15:19:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 15:19:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:19:33.409776  543705 memory.go:184] no items to output this cycle
I0322 15:19:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 15:19:35.357674  543705 disk_info.go:125] begin check local disk info of client
I0322 15:19:35.360196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:19:35.360202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004649c0 0xc000464a00]
E0322 15:19:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:19:43.410814  543705 memory.go:191] Add success.
I0322 15:19:43.409904  543705 cpu.go:282] Add success.
I0322 15:19:43.419754  543705 net.go:648] Add success.
I0322 15:19:43.422466  543705 net.go:770] primary dev: ETH0
I0322 15:19:43.422485  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:19:43.422500  543705 net.go:698] Add success.
I0322 15:19:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:19:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:19:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:19:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:19:53.409795  543705 memory.go:184] no items to output this cycle
I0322 15:19:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 15:20:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:20:03.409760  543705 memory.go:184] no items to output this cycle
I0322 15:20:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 15:20:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:20:13.409807  543705 cpu.go:282] Add success.
I0322 15:20:13.409828  543705 memory.go:191] Add success.
W0322 15:20:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:20:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:20:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:20:13.420341  543705 net.go:648] Add success.
I0322 15:20:13.421340  543705 net.go:770] primary dev: ETH0
I0322 15:20:13.421358  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:20:13.421376  543705 net.go:698] Add success.
I0322 15:20:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:20:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:20:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 15:20:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:20:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 15:20:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:20:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:20:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:20:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:20:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:20:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:20:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:20:23.409794  543705 memory.go:184] no items to output this cycle
I0322 15:20:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 15:20:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:20:33.409788  543705 memory.go:184] no items to output this cycle
I0322 15:20:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 15:20:35.361672  543705 disk_info.go:125] begin check local disk info of client
I0322 15:20:35.364216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:20:35.364222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa500 0xc0002aa540]
E0322 15:20:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:20:43.410679  543705 memory.go:191] Add success.
I0322 15:20:43.409806  543705 cpu.go:282] Add success.
I0322 15:20:43.420378  543705 net.go:648] Add success.
I0322 15:20:43.423266  543705 net.go:770] primary dev: ETH0
I0322 15:20:43.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:20:43.423291  543705 net.go:698] Add success.
I0322 15:20:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:20:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:20:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:20:53.409777  543705 memory.go:184] no items to output this cycle
I0322 15:20:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 15:21:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:21:03.409804  543705 memory.go:184] no items to output this cycle
I0322 15:21:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 15:21:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:21:13.409824  543705 memory.go:191] Add success.
I0322 15:21:13.409835  543705 cpu.go:282] Add success.
W0322 15:21:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:21:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:21:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:21:13.420277  543705 net.go:648] Add success.
I0322 15:21:13.422860  543705 net.go:770] primary dev: ETH0
I0322 15:21:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:21:13.422886  543705 net.go:698] Add success.
I0322 15:21:13.468222  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c546f4b8-8903-4426-86a7-4319b15c264d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:21:13.468258  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:21:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:21:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:21:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 15:21:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:21:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 15:21:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:21:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:21:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:21:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:21:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:21:23.409781  543705 memory.go:184] no items to output this cycle
I0322 15:21:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 15:21:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:21:33.409819  543705 memory.go:184] no items to output this cycle
I0322 15:21:33.409831  543705 cpu.go:275] no items to output this cycle
I0322 15:21:35.365672  543705 disk_info.go:125] begin check local disk info of client
I0322 15:21:35.368190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:21:35.368195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000255d00 0xc000255d40]
I0322 15:21:39.697727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:21:39.697733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:21:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:21:43.410694  543705 memory.go:191] Add success.
I0322 15:21:43.409807  543705 cpu.go:282] Add success.
I0322 15:21:43.420399  543705 net.go:648] Add success.
I0322 15:21:43.423073  543705 net.go:770] primary dev: ETH0
I0322 15:21:43.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:21:43.423099  543705 net.go:698] Add success.
I0322 15:21:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:21:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:21:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:21:53.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:21:53.409893  543705 memory.go:184] no items to output this cycle
I0322 15:21:53.410059  543705 cpu.go:275] no items to output this cycle
E0322 15:22:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:22:03.409779  543705 memory.go:184] no items to output this cycle
I0322 15:22:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 15:22:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:22:13.409807  543705 memory.go:191] Add success.
I0322 15:22:13.409817  543705 cpu.go:282] Add success.
W0322 15:22:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:22:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:22:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:22:13.420159  543705 net.go:648] Add success.
I0322 15:22:13.422789  543705 net.go:770] primary dev: ETH0
I0322 15:22:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:22:13.422818  543705 net.go:698] Add success.
W0322 15:22:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:22:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0322 15:22:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:22:14.456878  543705 disk_worker.go:494] system disk:vda1
I0322 15:22:14.456920  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:22:14.457327  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:22:14.457336  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:22:14.457341  543705 custom_config.go:64] query custom config with name: gpu
E0322 15:22:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:22:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:22:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:22:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:22:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:22:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:22:16.472337  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:22:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:22:23.409778  543705 memory.go:184] no items to output this cycle
I0322 15:22:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 15:22:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:22:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 15:22:33.409799  543705 memory.go:184] no items to output this cycle
I0322 15:22:35.369673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:22:35.372205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:22:35.372212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d9980 0xc0003d99c0]
E0322 15:22:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:22:43.410633  543705 memory.go:191] Add success.
I0322 15:22:43.409799  543705 cpu.go:282] Add success.
I0322 15:22:43.420354  543705 net.go:648] Add success.
I0322 15:22:43.423037  543705 net.go:770] primary dev: ETH0
I0322 15:22:43.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:22:43.423063  543705 net.go:698] Add success.
I0322 15:22:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:22:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:22:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:22:53.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:22:53.409859  543705 memory.go:184] no items to output this cycle
I0322 15:22:53.409933  543705 cpu.go:275] no items to output this cycle
E0322 15:23:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:23:03.409768  543705 memory.go:184] no items to output this cycle
I0322 15:23:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 15:23:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:23:13.409793  543705 memory.go:191] Add success.
I0322 15:23:13.409803  543705 cpu.go:282] Add success.
W0322 15:23:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:23:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:23:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:23:13.420104  543705 net.go:648] Add success.
I0322 15:23:13.423053  543705 net.go:770] primary dev: ETH0
I0322 15:23:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:23:13.423078  543705 net.go:698] Add success.
I0322 15:23:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:23:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:23:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 15:23:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:23:14.456613  543705 disk_worker.go:494] system disk:vda1
I0322 15:23:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:23:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:23:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:23:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:23:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:23:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:23:23.409781  543705 memory.go:184] no items to output this cycle
I0322 15:23:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 15:23:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:23:33.409809  543705 memory.go:184] no items to output this cycle
I0322 15:23:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 15:23:35.373671  543705 disk_info.go:125] begin check local disk info of client
I0322 15:23:35.376216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:23:35.376222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005914c0 0xc000591500]
E0322 15:23:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:23:43.410689  543705 memory.go:191] Add success.
I0322 15:23:43.409797  543705 cpu.go:282] Add success.
I0322 15:23:43.420195  543705 net.go:770] primary dev: ETH0
I0322 15:23:43.420208  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:23:43.420220  543705 net.go:698] Add success.
I0322 15:23:43.420445  543705 net.go:648] Add success.
I0322 15:23:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:23:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:23:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:23:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:23:53.409798  543705 memory.go:184] no items to output this cycle
I0322 15:23:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 15:24:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:24:03.409771  543705 memory.go:184] no items to output this cycle
I0322 15:24:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 15:24:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:24:13.409785  543705 memory.go:191] Add success.
I0322 15:24:13.409794  543705 cpu.go:282] Add success.
W0322 15:24:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:24:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:24:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:24:13.420201  543705 net.go:648] Add success.
I0322 15:24:13.423090  543705 net.go:770] primary dev: ETH0
I0322 15:24:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:24:13.423118  543705 net.go:698] Add success.
I0322 15:24:13.469303  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b55b2eba-d2b8-4cda-99a6-f71de86bed00","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:24:13.469337  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:24:14.453976  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:24:14.454258  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:24:14.454271  543705 disk_worker.go:708] disk space is not compliant
W0322 15:24:14.454273  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:24:14.455719  543705 disk_worker.go:494] system disk:vda1
I0322 15:24:14.455753  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:24:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:24:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:24:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:24:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:24:23.409774  543705 memory.go:184] no items to output this cycle
I0322 15:24:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 15:24:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:24:33.409775  543705 memory.go:184] no items to output this cycle
I0322 15:24:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 15:24:35.377673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:24:35.380217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:24:35.380223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a01c0 0xc0004a0200]
I0322 15:24:39.699189  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:24:39.699196  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:24:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:24:43.410573  543705 memory.go:191] Add success.
I0322 15:24:43.409801  543705 cpu.go:282] Add success.
I0322 15:24:43.420277  543705 net.go:648] Add success.
I0322 15:24:43.422900  543705 net.go:770] primary dev: ETH0
I0322 15:24:43.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:24:43.422925  543705 net.go:698] Add success.
I0322 15:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:24:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:24:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:24:53.409771  543705 memory.go:184] no items to output this cycle
I0322 15:24:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 15:25:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:25:03.409897  543705 memory.go:184] no items to output this cycle
I0322 15:25:03.409920  543705 cpu.go:275] no items to output this cycle
E0322 15:25:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:25:13.409788  543705 memory.go:191] Add success.
W0322 15:25:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 15:25:13.409820  543705 cpu.go:282] Add success.
W0322 15:25:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:25:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:25:13.420470  543705 net.go:648] Add success.
I0322 15:25:13.423144  543705 net.go:770] primary dev: ETH0
I0322 15:25:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:25:13.423181  543705 net.go:698] Add success.
I0322 15:25:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:25:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:25:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 15:25:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:25:14.456574  543705 disk_worker.go:494] system disk:vda1
I0322 15:25:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:25:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:25:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:25:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:25:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:25:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:25:23.409767  543705 memory.go:184] no items to output this cycle
I0322 15:25:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 15:25:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:25:33.409781  543705 memory.go:184] no items to output this cycle
I0322 15:25:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 15:25:35.381673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:25:35.384220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:25:35.384225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2ac0 0xc0002a2b00]
E0322 15:25:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:25:43.410682  543705 memory.go:191] Add success.
I0322 15:25:43.409796  543705 cpu.go:282] Add success.
I0322 15:25:43.420453  543705 net.go:648] Add success.
I0322 15:25:43.423318  543705 net.go:770] primary dev: ETH0
I0322 15:25:43.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:25:43.423345  543705 net.go:698] Add success.
I0322 15:25:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:25:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:25:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:25:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:25:53.409779  543705 memory.go:184] no items to output this cycle
I0322 15:25:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:26:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:26:03.409764  543705 memory.go:184] no items to output this cycle
I0322 15:26:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 15:26:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:26:13.409797  543705 memory.go:191] Add success.
I0322 15:26:13.409798  543705 cpu.go:282] Add success.
W0322 15:26:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:26:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:26:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:26:13.420200  543705 net.go:648] Add success.
I0322 15:26:13.422900  543705 net.go:770] primary dev: ETH0
I0322 15:26:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:26:13.422928  543705 net.go:698] Add success.
I0322 15:26:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:26:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:26:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 15:26:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:26:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 15:26:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:26:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:26:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:26:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:26:16.472506  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:26:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:26:23.409779  543705 memory.go:184] no items to output this cycle
I0322 15:26:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 15:26:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:26:33.409806  543705 memory.go:184] no items to output this cycle
I0322 15:26:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 15:26:35.385671  543705 disk_info.go:125] begin check local disk info of client
I0322 15:26:35.388179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:26:35.388186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1140 0xc0004a1180]
E0322 15:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:26:43.410724  543705 memory.go:191] Add success.
I0322 15:26:43.409794  543705 cpu.go:282] Add success.
I0322 15:26:43.420444  543705 net.go:648] Add success.
I0322 15:26:43.423090  543705 net.go:770] primary dev: ETH0
I0322 15:26:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:26:43.423117  543705 net.go:698] Add success.
I0322 15:26:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:26:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:26:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:26:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:26:53.409803  543705 memory.go:184] no items to output this cycle
I0322 15:26:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 15:27:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:27:03.409775  543705 memory.go:184] no items to output this cycle
I0322 15:27:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:27:13.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:27:13.409915  543705 cpu.go:282] Add success.
I0322 15:27:13.409929  543705 memory.go:191] Add success.
W0322 15:27:13.410043  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:27:13.410055  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:27:13.410059  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:27:13.419730  543705 net.go:648] Add success.
I0322 15:27:13.422317  543705 net.go:770] primary dev: ETH0
I0322 15:27:13.422331  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:27:13.422345  543705 net.go:698] Add success.
I0322 15:27:13.428295  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 15:27:13.453586  543705 event_worker.go:152] Polling the log file for events...
I0322 15:27:13.472592  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"26e42cd4-91cf-48b8-b6e7-b3e4c0fbe3c5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:27:13.472624  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 15:27:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:27:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 15:27:14.455214  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:27:14.456913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:27:14.456924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:27:14.456930  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:27:14.456973  543705 disk_worker.go:494] system disk:vda1
I0322 15:27:14.457018  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:27:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:27:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:27:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:27:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:27:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:27:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:27:16.472317  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:27:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:27:23.409777  543705 memory.go:184] no items to output this cycle
I0322 15:27:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 15:27:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:27:33.409811  543705 memory.go:184] no items to output this cycle
I0322 15:27:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 15:27:35.392000  543705 disk_info.go:125] begin check local disk info of client
I0322 15:27:35.394587  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:27:35.394593  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a7c0 0xc00007a940]
I0322 15:27:39.700182  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:27:39.700189  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:27:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:27:43.410692  543705 memory.go:191] Add success.
I0322 15:27:43.409798  543705 cpu.go:282] Add success.
I0322 15:27:43.420443  543705 net.go:648] Add success.
I0322 15:27:43.423080  543705 net.go:770] primary dev: ETH0
I0322 15:27:43.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:27:43.423108  543705 net.go:698] Add success.
I0322 15:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:27:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:27:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:27:53.409778  543705 memory.go:184] no items to output this cycle
I0322 15:27:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:28:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:28:03.409763  543705 memory.go:184] no items to output this cycle
I0322 15:28:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 15:28:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:28:13.409794  543705 memory.go:191] Add success.
W0322 15:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 15:28:13.409820  543705 cpu.go:282] Add success.
W0322 15:28:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:28:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:28:13.420144  543705 net.go:648] Add success.
I0322 15:28:13.422672  543705 net.go:770] primary dev: ETH0
I0322 15:28:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:28:13.422696  543705 net.go:698] Add success.
I0322 15:28:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:28:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:28:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 15:28:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:28:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 15:28:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:28:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:28:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:28:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:28:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:28:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:28:23.409799  543705 memory.go:184] no items to output this cycle
I0322 15:28:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 15:28:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:28:33.409807  543705 memory.go:184] no items to output this cycle
I0322 15:28:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 15:28:35.397672  543705 disk_info.go:125] begin check local disk info of client
I0322 15:28:35.400238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:28:35.400243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab140 0xc0003ab180]
E0322 15:28:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:28:43.410656  543705 memory.go:191] Add success.
I0322 15:28:43.409819  543705 cpu.go:282] Add success.
I0322 15:28:43.420343  543705 net.go:648] Add success.
I0322 15:28:43.422857  543705 net.go:770] primary dev: ETH0
I0322 15:28:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:28:43.422887  543705 net.go:698] Add success.
I0322 15:28:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:28:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:28:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:28:53.409790  543705 memory.go:184] no items to output this cycle
I0322 15:28:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 15:29:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:29:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 15:29:03.409790  543705 memory.go:184] no items to output this cycle
E0322 15:29:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:29:13.409802  543705 memory.go:191] Add success.
I0322 15:29:13.409820  543705 cpu.go:282] Add success.
W0322 15:29:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:29:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:29:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:29:13.420368  543705 net.go:648] Add success.
I0322 15:29:13.423180  543705 net.go:770] primary dev: ETH0
I0322 15:29:13.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:29:13.423204  543705 net.go:698] Add success.
I0322 15:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:29:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:29:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 15:29:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:29:14.456525  543705 disk_worker.go:494] system disk:vda1
I0322 15:29:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:29:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:29:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:29:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:29:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:29:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:29:23.409765  543705 memory.go:184] no items to output this cycle
I0322 15:29:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 15:29:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:29:33.409774  543705 memory.go:184] no items to output this cycle
I0322 15:29:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 15:29:35.401675  543705 disk_info.go:125] begin check local disk info of client
I0322 15:29:35.404241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:29:35.404247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4740 0xc0000c4780]
E0322 15:29:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:29:43.410665  543705 memory.go:191] Add success.
I0322 15:29:43.409819  543705 cpu.go:282] Add success.
I0322 15:29:43.420375  543705 net.go:648] Add success.
I0322 15:29:43.422940  543705 net.go:770] primary dev: ETH0
I0322 15:29:43.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:29:43.422970  543705 net.go:698] Add success.
I0322 15:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:29:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:29:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:29:53.409774  543705 memory.go:184] no items to output this cycle
I0322 15:29:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:30:03.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:30:03.409877  543705 memory.go:184] no items to output this cycle
I0322 15:30:03.409919  543705 cpu.go:275] no items to output this cycle
E0322 15:30:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:30:13.409797  543705 memory.go:191] Add success.
I0322 15:30:13.409799  543705 cpu.go:282] Add success.
W0322 15:30:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:30:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:30:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:30:13.420122  543705 net.go:648] Add success.
I0322 15:30:13.423403  543705 net.go:770] primary dev: ETH0
I0322 15:30:13.423418  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:30:13.423433  543705 net.go:698] Add success.
I0322 15:30:13.469735  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a83b8213-738f-4777-9578-7498d06620e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:30:13.469770  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:30:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:30:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:30:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0322 15:30:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:30:14.456775  543705 disk_worker.go:494] system disk:vda1
I0322 15:30:14.456813  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:30:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:30:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:30:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:30:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:30:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:30:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:30:23.409793  543705 memory.go:184] no items to output this cycle
I0322 15:30:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 15:30:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:30:33.409782  543705 memory.go:184] no items to output this cycle
I0322 15:30:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 15:30:35.405673  543705 disk_info.go:125] begin check local disk info of client
I0322 15:30:35.408209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:30:35.408215  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005905c0 0xc000590600]
I0322 15:30:39.700336  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:30:39.700342  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:30:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:30:43.410702  543705 memory.go:191] Add success.
I0322 15:30:43.409823  543705 cpu.go:282] Add success.
I0322 15:30:43.420389  543705 net.go:648] Add success.
I0322 15:30:43.423380  543705 net.go:770] primary dev: ETH0
I0322 15:30:43.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:30:43.423405  543705 net.go:698] Add success.
I0322 15:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:30:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:30:53.409774  543705 memory.go:184] no items to output this cycle
I0322 15:30:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 15:31:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:31:03.409768  543705 memory.go:184] no items to output this cycle
I0322 15:31:03.409890  543705 cpu.go:275] no items to output this cycle
E0322 15:31:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:31:13.409801  543705 memory.go:191] Add success.
I0322 15:31:13.409813  543705 cpu.go:282] Add success.
W0322 15:31:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:31:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:31:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:31:13.420160  543705 net.go:648] Add success.
I0322 15:31:13.422931  543705 net.go:770] primary dev: ETH0
I0322 15:31:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:31:13.422956  543705 net.go:698] Add success.
I0322 15:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:31:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:31:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 15:31:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:31:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 15:31:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:31:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:31:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:31:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:31:23.409786  543705 memory.go:184] no items to output this cycle
I0322 15:31:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 15:31:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:31:33.409790  543705 memory.go:184] no items to output this cycle
I0322 15:31:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 15:31:35.409680  543705 disk_info.go:125] begin check local disk info of client
I0322 15:31:35.412296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:31:35.412302  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c8c0 0xc00039c900]
E0322 15:31:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:31:43.410897  543705 memory.go:191] Add success.
I0322 15:31:43.409790  543705 cpu.go:282] Add success.
I0322 15:31:43.420588  543705 net.go:648] Add success.
I0322 15:31:43.424017  543705 net.go:770] primary dev: ETH0
I0322 15:31:43.424032  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:31:43.424046  543705 net.go:698] Add success.
I0322 15:31:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:31:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:31:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:31:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:31:53.409796  543705 memory.go:184] no items to output this cycle
I0322 15:31:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 15:32:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:32:03.409780  543705 memory.go:184] no items to output this cycle
I0322 15:32:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 15:32:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:32:13.409827  543705 memory.go:191] Add success.
I0322 15:32:13.409832  543705 cpu.go:282] Add success.
W0322 15:32:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:32:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:32:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:32:13.420134  543705 net.go:648] Add success.
I0322 15:32:13.423100  543705 net.go:770] primary dev: ETH0
I0322 15:32:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:32:13.423128  543705 net.go:698] Add success.
W0322 15:32:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:32:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 15:32:14.455204  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:32:14.457098  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:32:14.457109  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:32:14.457114  543705 disk_worker.go:494] system disk:vda1
I0322 15:32:14.457116  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:32:14.457152  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:32:15.456863  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:32:15.456872  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:32:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:32:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:32:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:32:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:32:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:32:23.409794  543705 memory.go:184] no items to output this cycle
I0322 15:32:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 15:32:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:32:33.409781  543705 memory.go:184] no items to output this cycle
I0322 15:32:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 15:32:35.412799  543705 disk_info.go:125] begin check local disk info of client
I0322 15:32:35.415365  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:32:35.415372  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5e80 0xc0002a5ec0]
E0322 15:32:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:32:43.410733  543705 memory.go:191] Add success.
I0322 15:32:43.409800  543705 cpu.go:282] Add success.
I0322 15:32:43.420460  543705 net.go:648] Add success.
I0322 15:32:43.423515  543705 net.go:770] primary dev: ETH0
I0322 15:32:43.423533  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:32:43.423547  543705 net.go:698] Add success.
I0322 15:32:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:32:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:32:46.458048  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:32:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:32:53.409787  543705 memory.go:184] no items to output this cycle
I0322 15:32:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 15:33:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:33:03.409774  543705 memory.go:184] no items to output this cycle
I0322 15:33:03.409778  543705 cpu.go:275] no items to output this cycle
W0322 15:33:13.409714  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:33:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:33:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 15:33:13.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:33:13.409964  543705 cpu.go:282] Add success.
I0322 15:33:13.410004  543705 memory.go:191] Add success.
I0322 15:33:13.419730  543705 net.go:648] Add success.
I0322 15:33:13.422708  543705 net.go:770] primary dev: ETH0
I0322 15:33:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:33:13.422732  543705 net.go:698] Add success.
I0322 15:33:13.468919  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c2248e8-49e0-4af3-a1cc-daa77ac439bb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:33:13.468953  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:33:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:33:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:33:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 15:33:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:33:14.456495  543705 disk_worker.go:494] system disk:vda1
I0322 15:33:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:33:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:33:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:33:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:33:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:33:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:33:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 15:33:23.409776  543705 memory.go:184] no items to output this cycle
E0322 15:33:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:33:33.409804  543705 memory.go:184] no items to output this cycle
I0322 15:33:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 15:33:35.415788  543705 disk_info.go:125] begin check local disk info of client
I0322 15:33:35.418418  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:33:35.418424  543705 disk_info.go:196] parse disk info done, disk is : [0xc000591000 0xc000591040]
I0322 15:33:39.701198  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:33:39.701205  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:33:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:33:43.410737  543705 memory.go:191] Add success.
I0322 15:33:43.409795  543705 cpu.go:282] Add success.
I0322 15:33:43.420430  543705 net.go:648] Add success.
I0322 15:33:43.422978  543705 net.go:770] primary dev: ETH0
I0322 15:33:43.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:33:43.423005  543705 net.go:698] Add success.
I0322 15:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:33:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:33:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:33:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:33:53.409767  543705 memory.go:184] no items to output this cycle
I0322 15:33:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:34:03.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:34:03.409876  543705 cpu.go:275] no items to output this cycle
I0322 15:34:03.409884  543705 memory.go:184] no items to output this cycle
E0322 15:34:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:34:13.409788  543705 memory.go:191] Add success.
I0322 15:34:13.409793  543705 cpu.go:282] Add success.
W0322 15:34:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:34:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:34:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:34:13.420074  543705 net.go:648] Add success.
I0322 15:34:13.422653  543705 net.go:770] primary dev: ETH0
I0322 15:34:13.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:34:13.422678  543705 net.go:698] Add success.
I0322 15:34:14.453933  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:34:14.455239  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:34:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0322 15:34:14.455254  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:34:14.456622  543705 disk_worker.go:494] system disk:vda1
I0322 15:34:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:34:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:34:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:34:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:34:23.409807  543705 memory.go:184] no items to output this cycle
I0322 15:34:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 15:34:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:34:33.409797  543705 memory.go:184] no items to output this cycle
I0322 15:34:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 15:34:35.418794  543705 disk_info.go:125] begin check local disk info of client
I0322 15:34:35.421316  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:34:35.421321  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344800 0xc000344840]
E0322 15:34:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:34:43.410791  543705 memory.go:191] Add success.
I0322 15:34:43.409828  543705 cpu.go:282] Add success.
I0322 15:34:43.420487  543705 net.go:648] Add success.
I0322 15:34:43.423011  543705 net.go:770] primary dev: ETH0
I0322 15:34:43.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:34:43.423038  543705 net.go:698] Add success.
I0322 15:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:34:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:34:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:34:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:34:53.409805  543705 memory.go:184] no items to output this cycle
I0322 15:34:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 15:35:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:35:03.409814  543705 memory.go:184] no items to output this cycle
I0322 15:35:03.409827  543705 cpu.go:275] no items to output this cycle
E0322 15:35:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:35:13.409807  543705 memory.go:191] Add success.
I0322 15:35:13.409810  543705 cpu.go:282] Add success.
W0322 15:35:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:35:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:35:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:35:13.420277  543705 net.go:648] Add success.
I0322 15:35:13.423327  543705 net.go:770] primary dev: ETH0
I0322 15:35:13.423340  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:35:13.423351  543705 net.go:698] Add success.
I0322 15:35:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:35:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:35:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 15:35:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:35:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 15:35:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:35:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:35:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:35:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:35:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:35:23.409784  543705 memory.go:184] no items to output this cycle
I0322 15:35:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 15:35:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:35:33.409795  543705 memory.go:184] no items to output this cycle
I0322 15:35:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 15:35:35.421799  543705 disk_info.go:125] begin check local disk info of client
I0322 15:35:35.424400  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:35:35.424406  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474100 0xc000474140]
E0322 15:35:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:35:43.410637  543705 memory.go:191] Add success.
I0322 15:35:43.409819  543705 cpu.go:282] Add success.
I0322 15:35:43.420303  543705 net.go:648] Add success.
I0322 15:35:43.423232  543705 net.go:770] primary dev: ETH0
I0322 15:35:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:35:43.423258  543705 net.go:698] Add success.
I0322 15:35:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:35:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:35:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:35:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:35:53.409766  543705 memory.go:184] no items to output this cycle
I0322 15:35:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 15:36:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:36:03.409789  543705 memory.go:184] no items to output this cycle
I0322 15:36:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 15:36:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:36:13.409794  543705 memory.go:191] Add success.
I0322 15:36:13.409796  543705 cpu.go:282] Add success.
W0322 15:36:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:36:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:36:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:36:13.420494  543705 net.go:648] Add success.
I0322 15:36:13.423309  543705 net.go:770] primary dev: ETH0
I0322 15:36:13.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:36:13.423337  543705 net.go:698] Add success.
I0322 15:36:13.521763  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ddb2385-9ed7-46a0-919c-36c646e637e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:36:13.521798  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 15:36:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:36:14.455289  543705 disk_worker.go:708] disk space is not compliant
W0322 15:36:14.455293  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:36:14.455597  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:36:14.457035  543705 disk_worker.go:494] system disk:vda1
I0322 15:36:14.457078  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:36:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:36:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:36:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:36:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:36:23.409767  543705 memory.go:184] no items to output this cycle
I0322 15:36:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 15:36:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:36:33.409779  543705 memory.go:184] no items to output this cycle
I0322 15:36:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 15:36:35.424807  543705 disk_info.go:125] begin check local disk info of client
I0322 15:36:35.427367  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:36:35.427373  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039d8c0 0xc00039d900]
I0322 15:36:39.701728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:36:39.701734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:36:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:36:43.410814  543705 memory.go:191] Add success.
I0322 15:36:43.409810  543705 cpu.go:282] Add success.
I0322 15:36:43.420484  543705 net.go:648] Add success.
I0322 15:36:43.423283  543705 net.go:770] primary dev: ETH0
I0322 15:36:43.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:36:43.423310  543705 net.go:698] Add success.
I0322 15:36:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:36:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:36:53.410500  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:36:53.410517  543705 cpu.go:275] no items to output this cycle
I0322 15:36:53.410521  543705 memory.go:184] no items to output this cycle
E0322 15:37:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:37:03.409777  543705 memory.go:184] no items to output this cycle
I0322 15:37:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 15:37:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:37:13.409798  543705 memory.go:191] Add success.
I0322 15:37:13.409799  543705 cpu.go:282] Add success.
W0322 15:37:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:37:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:37:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:37:13.420204  543705 net.go:648] Add success.
I0322 15:37:13.422902  543705 net.go:770] primary dev: ETH0
I0322 15:37:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:37:13.422927  543705 net.go:698] Add success.
I0322 15:37:13.453498  543705 event_worker.go:152] Polling the log file for events...
W0322 15:37:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:37:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 15:37:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:37:14.456828  543705 disk_worker.go:494] system disk:vda1
I0322 15:37:14.456867  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:37:14.457052  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:37:14.457060  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:37:14.457066  543705 custom_config.go:64] query custom config with name: gpu
E0322 15:37:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:37:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:37:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:37:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:37:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:37:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:37:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:37:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:37:23.409780  543705 memory.go:184] no items to output this cycle
I0322 15:37:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 15:37:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:37:33.409778  543705 memory.go:184] no items to output this cycle
I0322 15:37:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 15:37:35.427830  543705 disk_info.go:125] begin check local disk info of client
I0322 15:37:35.430418  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:37:35.430424  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359580 0xc0003595c0]
E0322 15:37:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:37:43.410680  543705 memory.go:191] Add success.
I0322 15:37:43.409804  543705 cpu.go:282] Add success.
I0322 15:37:43.420384  543705 net.go:648] Add success.
I0322 15:37:43.423174  543705 net.go:770] primary dev: ETH0
I0322 15:37:43.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:37:43.423200  543705 net.go:698] Add success.
I0322 15:37:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:37:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:37:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:37:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:37:53.409775  543705 cpu.go:275] no items to output this cycle
I0322 15:37:53.409786  543705 memory.go:184] no items to output this cycle
E0322 15:38:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:38:03.409798  543705 memory.go:184] no items to output this cycle
I0322 15:38:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 15:38:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:38:13.409793  543705 cpu.go:282] Add success.
I0322 15:38:13.409796  543705 memory.go:191] Add success.
W0322 15:38:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:38:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:38:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:38:13.420043  543705 net.go:648] Add success.
I0322 15:38:13.422804  543705 net.go:770] primary dev: ETH0
I0322 15:38:13.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:38:13.422829  543705 net.go:698] Add success.
I0322 15:38:14.453947  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:38:14.455299  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:38:14.455314  543705 disk_worker.go:708] disk space is not compliant
W0322 15:38:14.455318  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:38:14.456845  543705 disk_worker.go:494] system disk:vda1
I0322 15:38:14.456881  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:38:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:38:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:38:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:38:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:38:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:38:23.409770  543705 memory.go:184] no items to output this cycle
I0322 15:38:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 15:38:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:38:33.409809  543705 memory.go:184] no items to output this cycle
I0322 15:38:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 15:38:35.430845  543705 disk_info.go:125] begin check local disk info of client
I0322 15:38:35.433434  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:38:35.433440  543705 disk_info.go:196] parse disk info done, disk is : [0xc000307680 0xc0003076c0]
E0322 15:38:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:38:43.410582  543705 memory.go:191] Add success.
I0322 15:38:43.409804  543705 cpu.go:282] Add success.
I0322 15:38:43.420330  543705 net.go:648] Add success.
I0322 15:38:43.422986  543705 net.go:770] primary dev: ETH0
I0322 15:38:43.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:38:43.423010  543705 net.go:698] Add success.
I0322 15:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:38:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:38:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:38:53.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:38:53.409891  543705 memory.go:184] no items to output this cycle
I0322 15:38:53.409950  543705 cpu.go:275] no items to output this cycle
E0322 15:39:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:39:03.409784  543705 memory.go:184] no items to output this cycle
I0322 15:39:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 15:39:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:39:13.409804  543705 memory.go:191] Add success.
I0322 15:39:13.409808  543705 cpu.go:282] Add success.
W0322 15:39:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:39:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:39:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:39:13.420054  543705 net.go:648] Add success.
I0322 15:39:13.423135  543705 net.go:770] primary dev: ETH0
I0322 15:39:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:39:13.423177  543705 net.go:698] Add success.
I0322 15:39:13.468643  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f5e90f99-7d07-4329-b768-51e9208d7f3e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:39:13.468678  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:39:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:39:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:39:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 15:39:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:39:14.456744  543705 disk_worker.go:494] system disk:vda1
I0322 15:39:14.456779  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:39:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:39:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:39:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:39:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:39:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:39:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:39:23.409768  543705 memory.go:184] no items to output this cycle
I0322 15:39:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 15:39:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:39:33.409810  543705 memory.go:184] no items to output this cycle
I0322 15:39:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 15:39:35.433854  543705 disk_info.go:125] begin check local disk info of client
I0322 15:39:35.436495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:39:35.436502  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7000 0xc0003b7040]
I0322 15:39:39.701868  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:39:39.701875  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:39:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:39:43.410706  543705 memory.go:191] Add success.
I0322 15:39:43.409813  543705 cpu.go:282] Add success.
I0322 15:39:43.420433  543705 net.go:648] Add success.
I0322 15:39:43.423077  543705 net.go:770] primary dev: ETH0
I0322 15:39:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:39:43.423201  543705 net.go:698] Add success.
I0322 15:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:39:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:39:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:39:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:39:53.410269  543705 memory.go:184] no items to output this cycle
I0322 15:39:53.410276  543705 cpu.go:275] no items to output this cycle
E0322 15:40:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:40:03.409774  543705 memory.go:184] no items to output this cycle
I0322 15:40:03.409824  543705 cpu.go:275] no items to output this cycle
W0322 15:40:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:40:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:40:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 15:40:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:40:13.409821  543705 cpu.go:282] Add success.
I0322 15:40:13.409827  543705 memory.go:191] Add success.
I0322 15:40:13.420043  543705 net.go:648] Add success.
I0322 15:40:13.422609  543705 net.go:770] primary dev: ETH0
I0322 15:40:13.422623  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:40:13.422635  543705 net.go:698] Add success.
I0322 15:40:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:40:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:40:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 15:40:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:40:14.456629  543705 disk_worker.go:494] system disk:vda1
I0322 15:40:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:40:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:40:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:40:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:40:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:40:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:40:23.409783  543705 memory.go:184] no items to output this cycle
I0322 15:40:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 15:40:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:40:33.409794  543705 memory.go:184] no items to output this cycle
I0322 15:40:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 15:40:35.436875  543705 disk_info.go:125] begin check local disk info of client
I0322 15:40:35.439457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:40:35.439463  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340dc0 0xc000340e00]
E0322 15:40:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:40:43.410666  543705 memory.go:191] Add success.
I0322 15:40:43.409797  543705 cpu.go:282] Add success.
I0322 15:40:43.420358  543705 net.go:648] Add success.
I0322 15:40:43.422838  543705 net.go:770] primary dev: ETH0
I0322 15:40:43.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:40:43.423067  543705 net.go:698] Add success.
I0322 15:40:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:40:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:40:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:40:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:40:53.409798  543705 memory.go:184] no items to output this cycle
I0322 15:40:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 15:41:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:41:03.409792  543705 cpu.go:275] no items to output this cycle
I0322 15:41:03.409794  543705 memory.go:184] no items to output this cycle
E0322 15:41:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:41:13.409795  543705 memory.go:191] Add success.
I0322 15:41:13.409800  543705 cpu.go:282] Add success.
W0322 15:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:41:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:41:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:41:13.420049  543705 net.go:648] Add success.
I0322 15:41:13.422582  543705 net.go:770] primary dev: ETH0
I0322 15:41:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:41:13.422608  543705 net.go:698] Add success.
I0322 15:41:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:41:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:41:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 15:41:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:41:14.456618  543705 disk_worker.go:494] system disk:vda1
I0322 15:41:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:41:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:41:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:41:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:41:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:41:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:41:23.409773  543705 memory.go:184] no items to output this cycle
I0322 15:41:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:41:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:41:33.409776  543705 memory.go:184] no items to output this cycle
I0322 15:41:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 15:41:35.439894  543705 disk_info.go:125] begin check local disk info of client
I0322 15:41:35.442458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:41:35.442464  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473e80 0xc000473ec0]
E0322 15:41:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:41:43.410787  543705 memory.go:191] Add success.
I0322 15:41:43.409795  543705 cpu.go:282] Add success.
I0322 15:41:43.420627  543705 net.go:648] Add success.
I0322 15:41:43.424440  543705 net.go:770] primary dev: ETH0
I0322 15:41:43.424455  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:41:43.424469  543705 net.go:698] Add success.
I0322 15:41:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:41:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:41:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:41:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:41:53.409800  543705 memory.go:184] no items to output this cycle
I0322 15:41:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 15:42:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:42:03.409805  543705 memory.go:184] no items to output this cycle
I0322 15:42:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 15:42:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:42:13.409813  543705 memory.go:191] Add success.
I0322 15:42:13.409819  543705 cpu.go:282] Add success.
W0322 15:42:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:42:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:42:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:42:13.420192  543705 net.go:648] Add success.
I0322 15:42:13.423122  543705 net.go:770] primary dev: ETH0
I0322 15:42:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:42:13.423151  543705 net.go:698] Add success.
I0322 15:42:13.468823  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12733ad6-14bb-423f-a887-6ab21ffbe78c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:42:13.468859  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 15:42:14.455244  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:42:14.455260  543705 disk_worker.go:708] disk space is not compliant
W0322 15:42:14.455265  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:42:14.456106  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:42:14.456116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:42:14.456122  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:42:14.457143  543705 disk_worker.go:494] system disk:vda1
I0322 15:42:14.457187  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:42:15.457049  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:42:15.457064  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:42:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:42:16.457974  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:42:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:42:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:42:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:42:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:42:23.409785  543705 memory.go:184] no items to output this cycle
I0322 15:42:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:42:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:42:33.409773  543705 memory.go:184] no items to output this cycle
I0322 15:42:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 15:42:35.442908  543705 disk_info.go:125] begin check local disk info of client
I0322 15:42:35.445510  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:42:35.445516  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7080 0xc0003b70c0]
I0322 15:42:39.703212  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:42:39.703219  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:42:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:42:43.410586  543705 memory.go:191] Add success.
I0322 15:42:43.409801  543705 cpu.go:282] Add success.
I0322 15:42:43.420379  543705 net.go:648] Add success.
I0322 15:42:43.423151  543705 net.go:770] primary dev: ETH0
I0322 15:42:43.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:42:43.423180  543705 net.go:698] Add success.
I0322 15:42:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:42:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:42:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:42:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:42:53.409778  543705 memory.go:184] no items to output this cycle
I0322 15:42:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 15:43:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:43:03.409800  543705 memory.go:184] no items to output this cycle
I0322 15:43:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 15:43:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:43:13.409799  543705 memory.go:191] Add success.
I0322 15:43:13.409802  543705 cpu.go:282] Add success.
W0322 15:43:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:43:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:43:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:43:13.420108  543705 net.go:648] Add success.
I0322 15:43:13.422967  543705 net.go:770] primary dev: ETH0
I0322 15:43:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:43:13.422992  543705 net.go:698] Add success.
I0322 15:43:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:43:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:43:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 15:43:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:43:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 15:43:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:43:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:43:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:43:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:43:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:43:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:43:23.409789  543705 memory.go:184] no items to output this cycle
I0322 15:43:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 15:43:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:43:33.409779  543705 memory.go:184] no items to output this cycle
I0322 15:43:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 15:43:35.445922  543705 disk_info.go:125] begin check local disk info of client
I0322 15:43:35.448441  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:43:35.448447  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468cc0 0xc000468d00]
E0322 15:43:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:43:43.410575  543705 memory.go:191] Add success.
I0322 15:43:43.409798  543705 cpu.go:282] Add success.
I0322 15:43:43.420281  543705 net.go:648] Add success.
I0322 15:43:43.422936  543705 net.go:770] primary dev: ETH0
I0322 15:43:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:43:43.422967  543705 net.go:698] Add success.
I0322 15:43:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:43:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:43:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:43:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:43:53.409806  543705 memory.go:184] no items to output this cycle
I0322 15:43:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 15:44:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:44:03.409814  543705 memory.go:184] no items to output this cycle
I0322 15:44:03.409830  543705 cpu.go:275] no items to output this cycle
E0322 15:44:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:44:13.409803  543705 cpu.go:282] Add success.
I0322 15:44:13.409804  543705 memory.go:191] Add success.
W0322 15:44:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:44:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:44:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:44:13.420089  543705 net.go:648] Add success.
I0322 15:44:13.422710  543705 net.go:770] primary dev: ETH0
I0322 15:44:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:44:13.422733  543705 net.go:698] Add success.
I0322 15:44:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:44:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:44:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 15:44:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:44:14.456513  543705 disk_worker.go:494] system disk:vda1
I0322 15:44:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:44:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:44:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:44:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:44:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:44:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:44:23.409775  543705 memory.go:184] no items to output this cycle
I0322 15:44:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 15:44:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:44:33.409776  543705 memory.go:184] no items to output this cycle
I0322 15:44:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 15:44:35.448937  543705 disk_info.go:125] begin check local disk info of client
I0322 15:44:35.451452  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:44:35.451457  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368500 0xc000368540]
E0322 15:44:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:44:43.410900  543705 memory.go:191] Add success.
I0322 15:44:43.409827  543705 cpu.go:282] Add success.
I0322 15:44:43.420583  543705 net.go:648] Add success.
I0322 15:44:43.423424  543705 net.go:770] primary dev: ETH0
I0322 15:44:43.423438  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:44:43.423452  543705 net.go:698] Add success.
I0322 15:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:44:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:44:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:44:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:44:53.409850  543705 memory.go:184] no items to output this cycle
I0322 15:44:53.409898  543705 cpu.go:275] no items to output this cycle
E0322 15:45:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:45:03.409767  543705 memory.go:184] no items to output this cycle
I0322 15:45:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 15:45:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:45:13.409796  543705 memory.go:191] Add success.
I0322 15:45:13.409797  543705 cpu.go:282] Add success.
W0322 15:45:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:45:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:45:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:45:13.420154  543705 net.go:648] Add success.
I0322 15:45:13.423208  543705 net.go:770] primary dev: ETH0
I0322 15:45:13.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:45:13.423233  543705 net.go:698] Add success.
I0322 15:45:13.519589  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"76d74fbd-e751-4a88-bb83-1ea50a0962e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:45:13.519624  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:45:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:45:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:45:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 15:45:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:45:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 15:45:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:45:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:45:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:45:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:45:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:45:23.409765  543705 memory.go:184] no items to output this cycle
I0322 15:45:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 15:45:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:45:33.409792  543705 memory.go:184] no items to output this cycle
I0322 15:45:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 15:45:35.451949  543705 disk_info.go:125] begin check local disk info of client
I0322 15:45:35.454513  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:45:35.454519  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468d00 0xc000468d40]
I0322 15:45:39.704214  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:45:39.704220  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:45:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:45:43.410711  543705 memory.go:191] Add success.
I0322 15:45:43.409806  543705 cpu.go:282] Add success.
I0322 15:45:43.420400  543705 net.go:648] Add success.
I0322 15:45:43.423024  543705 net.go:770] primary dev: ETH0
I0322 15:45:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:45:43.423049  543705 net.go:698] Add success.
I0322 15:45:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:45:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:45:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:45:53.410226  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:45:53.410245  543705 memory.go:184] no items to output this cycle
I0322 15:45:53.410270  543705 cpu.go:275] no items to output this cycle
E0322 15:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:46:03.409787  543705 memory.go:184] no items to output this cycle
I0322 15:46:03.409805  543705 cpu.go:275] no items to output this cycle
W0322 15:46:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:46:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:46:13.409737  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 15:46:13.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:46:13.409845  543705 cpu.go:282] Add success.
I0322 15:46:13.409855  543705 memory.go:191] Add success.
I0322 15:46:13.420059  543705 net.go:648] Add success.
I0322 15:46:13.422493  543705 net.go:770] primary dev: ETH0
I0322 15:46:13.422508  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:46:13.422523  543705 net.go:698] Add success.
I0322 15:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:46:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:46:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 15:46:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:46:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 15:46:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:46:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:46:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:46:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:46:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:46:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:46:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:46:23.409770  543705 memory.go:184] no items to output this cycle
I0322 15:46:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 15:46:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:46:33.409787  543705 memory.go:184] no items to output this cycle
I0322 15:46:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 15:46:35.454971  543705 disk_info.go:125] begin check local disk info of client
I0322 15:46:35.457491  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:46:35.457496  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033a980 0xc00033a9c0]
E0322 15:46:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:46:43.411028  543705 memory.go:191] Add success.
I0322 15:46:43.409825  543705 cpu.go:282] Add success.
I0322 15:46:43.420711  543705 net.go:648] Add success.
I0322 15:46:43.423466  543705 net.go:770] primary dev: ETH0
I0322 15:46:43.423479  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:46:43.423502  543705 net.go:698] Add success.
I0322 15:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:46:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:46:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:46:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:46:53.409774  543705 memory.go:184] no items to output this cycle
I0322 15:46:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 15:47:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:47:03.409816  543705 memory.go:184] no items to output this cycle
I0322 15:47:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 15:47:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:47:13.409802  543705 memory.go:191] Add success.
I0322 15:47:13.409803  543705 cpu.go:282] Add success.
W0322 15:47:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:47:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:47:13.420168  543705 net.go:648] Add success.
I0322 15:47:13.422904  543705 net.go:770] primary dev: ETH0
I0322 15:47:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:47:13.422933  543705 net.go:698] Add success.
I0322 15:47:13.453582  543705 event_worker.go:152] Polling the log file for events...
W0322 15:47:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:47:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 15:47:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:47:14.455897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:47:14.455905  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:47:14.455911  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:47:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 15:47:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:47:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:47:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:47:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:47:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:47:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:47:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:47:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:47:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:47:23.409773  543705 memory.go:184] no items to output this cycle
I0322 15:47:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 15:47:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:47:33.409785  543705 memory.go:184] no items to output this cycle
I0322 15:47:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 15:47:35.457972  543705 disk_info.go:125] begin check local disk info of client
I0322 15:47:35.460501  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:47:35.460507  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033a780 0xc00033a7c0]
E0322 15:47:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:47:43.410739  543705 memory.go:191] Add success.
I0322 15:47:43.409816  543705 cpu.go:282] Add success.
I0322 15:47:43.420447  543705 net.go:648] Add success.
I0322 15:47:43.422964  543705 net.go:770] primary dev: ETH0
I0322 15:47:43.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:47:43.422989  543705 net.go:698] Add success.
I0322 15:47:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:47:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:47:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:47:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:47:53.409796  543705 memory.go:184] no items to output this cycle
I0322 15:47:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 15:48:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:48:03.409785  543705 memory.go:184] no items to output this cycle
I0322 15:48:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 15:48:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:48:13.409800  543705 memory.go:191] Add success.
I0322 15:48:13.409809  543705 cpu.go:282] Add success.
W0322 15:48:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:48:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:48:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:48:13.420129  543705 net.go:648] Add success.
I0322 15:48:13.422847  543705 net.go:770] primary dev: ETH0
I0322 15:48:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:48:13.422871  543705 net.go:698] Add success.
I0322 15:48:13.468910  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c07f85ce-f122-4338-b0ea-990879b06081","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:48:13.468941  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:48:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:48:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 15:48:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:48:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 15:48:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:48:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:48:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:48:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:48:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:48:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:48:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:48:23.409794  543705 memory.go:184] no items to output this cycle
I0322 15:48:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 15:48:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:48:33.409786  543705 memory.go:184] no items to output this cycle
I0322 15:48:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 15:48:35.461001  543705 disk_info.go:125] begin check local disk info of client
I0322 15:48:35.463551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:48:35.463557  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e4e40 0xc0003e4e80]
I0322 15:48:39.705221  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:48:39.705227  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:48:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:48:43.410609  543705 memory.go:191] Add success.
I0322 15:48:43.409826  543705 cpu.go:282] Add success.
I0322 15:48:43.420382  543705 net.go:648] Add success.
I0322 15:48:43.422994  543705 net.go:770] primary dev: ETH0
I0322 15:48:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:48:43.423020  543705 net.go:698] Add success.
I0322 15:48:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:48:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:48:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:48:53.410387  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:48:53.410395  543705 cpu.go:275] no items to output this cycle
I0322 15:48:53.410403  543705 memory.go:184] no items to output this cycle
E0322 15:49:03.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:49:03.409821  543705 memory.go:184] no items to output this cycle
I0322 15:49:03.409834  543705 cpu.go:275] no items to output this cycle
E0322 15:49:13.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:49:13.409938  543705 cpu.go:282] Add success.
I0322 15:49:13.410043  543705 memory.go:191] Add success.
W0322 15:49:13.410079  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:49:13.410096  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:49:13.410101  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:49:13.419743  543705 net.go:648] Add success.
I0322 15:49:13.423080  543705 net.go:770] primary dev: ETH0
I0322 15:49:13.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:49:13.423104  543705 net.go:698] Add success.
I0322 15:49:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:49:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:49:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 15:49:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:49:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 15:49:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:49:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:49:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:49:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:49:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:49:23.409803  543705 memory.go:184] no items to output this cycle
I0322 15:49:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 15:49:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:49:33.409795  543705 memory.go:184] no items to output this cycle
I0322 15:49:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 15:49:35.464010  543705 disk_info.go:125] begin check local disk info of client
I0322 15:49:35.466568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:49:35.466574  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab7c0 0xc0001ab800]
E0322 15:49:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:49:43.410915  543705 memory.go:191] Add success.
I0322 15:49:43.409814  543705 cpu.go:282] Add success.
I0322 15:49:43.420616  543705 net.go:648] Add success.
I0322 15:49:43.423665  543705 net.go:770] primary dev: ETH0
I0322 15:49:43.423679  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:49:43.423694  543705 net.go:698] Add success.
I0322 15:49:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:49:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:49:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:49:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:49:53.409806  543705 memory.go:184] no items to output this cycle
I0322 15:49:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 15:50:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:50:03.409763  543705 memory.go:184] no items to output this cycle
I0322 15:50:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 15:50:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:50:13.409792  543705 memory.go:191] Add success.
I0322 15:50:13.409799  543705 cpu.go:282] Add success.
W0322 15:50:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:50:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:50:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:50:13.419713  543705 net.go:648] Add success.
I0322 15:50:13.422207  543705 net.go:770] primary dev: ETH0
I0322 15:50:13.422220  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:50:13.422231  543705 net.go:698] Add success.
I0322 15:50:14.454599  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:50:14.454851  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:50:14.454861  543705 disk_worker.go:708] disk space is not compliant
W0322 15:50:14.454864  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:50:14.456247  543705 disk_worker.go:494] system disk:vda1
I0322 15:50:14.456277  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:50:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:50:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:50:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:50:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:50:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:50:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:50:23.409781  543705 memory.go:184] no items to output this cycle
I0322 15:50:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 15:50:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:50:33.409806  543705 memory.go:184] no items to output this cycle
I0322 15:50:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 15:50:35.467025  543705 disk_info.go:125] begin check local disk info of client
I0322 15:50:35.469657  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:50:35.469664  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472dc0 0xc000472f80]
E0322 15:50:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:50:43.410729  543705 memory.go:191] Add success.
I0322 15:50:43.409825  543705 cpu.go:282] Add success.
I0322 15:50:43.420420  543705 net.go:648] Add success.
I0322 15:50:43.423176  543705 net.go:770] primary dev: ETH0
I0322 15:50:43.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:50:43.423203  543705 net.go:698] Add success.
I0322 15:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:50:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:50:53.409782  543705 memory.go:184] no items to output this cycle
I0322 15:50:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 15:51:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:51:03.409781  543705 memory.go:184] no items to output this cycle
I0322 15:51:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 15:51:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:51:13.409787  543705 memory.go:191] Add success.
I0322 15:51:13.409807  543705 cpu.go:282] Add success.
W0322 15:51:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:51:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:51:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:51:13.420324  543705 net.go:648] Add success.
I0322 15:51:13.423453  543705 net.go:770] primary dev: ETH0
I0322 15:51:13.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:51:13.423483  543705 net.go:698] Add success.
I0322 15:51:13.468132  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fd1c3aab-9afe-4edd-8227-6f404d92ee19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:51:13.468164  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:51:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:51:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:51:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 15:51:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:51:14.456717  543705 disk_worker.go:494] system disk:vda1
I0322 15:51:14.456747  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:51:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:51:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:51:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:51:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:51:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:51:23.409762  543705 memory.go:184] no items to output this cycle
I0322 15:51:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 15:51:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:51:33.409776  543705 memory.go:184] no items to output this cycle
I0322 15:51:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 15:51:35.469743  543705 disk_info.go:125] begin check local disk info of client
I0322 15:51:35.472305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:51:35.472311  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0a00 0xc0004a0a40]
I0322 15:51:39.705730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:51:39.705737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:51:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:51:43.410637  543705 memory.go:191] Add success.
I0322 15:51:43.409789  543705 cpu.go:282] Add success.
I0322 15:51:43.420413  543705 net.go:648] Add success.
I0322 15:51:43.422858  543705 net.go:770] primary dev: ETH0
I0322 15:51:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:51:43.422887  543705 net.go:698] Add success.
I0322 15:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:51:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:51:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:51:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:51:53.409783  543705 memory.go:184] no items to output this cycle
I0322 15:51:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 15:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:52:03.409779  543705 memory.go:184] no items to output this cycle
I0322 15:52:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:52:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:52:13.409788  543705 memory.go:191] Add success.
I0322 15:52:13.409789  543705 cpu.go:282] Add success.
W0322 15:52:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:52:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:52:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:52:13.420280  543705 net.go:648] Add success.
I0322 15:52:13.423494  543705 net.go:770] primary dev: ETH0
I0322 15:52:13.423507  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:52:13.423518  543705 net.go:698] Add success.
W0322 15:52:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:52:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 15:52:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:52:14.455898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:52:14.455906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:52:14.455912  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:52:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 15:52:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:52:15.456985  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:52:15.456999  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:52:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:52:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:52:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:52:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:52:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:52:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:52:23.409795  543705 memory.go:184] no items to output this cycle
I0322 15:52:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 15:52:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:52:33.409790  543705 memory.go:184] no items to output this cycle
I0322 15:52:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 15:52:35.473045  543705 disk_info.go:125] begin check local disk info of client
I0322 15:52:35.475576  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:52:35.475582  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b300 0xc00007b340]
E0322 15:52:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:52:43.410731  543705 memory.go:191] Add success.
I0322 15:52:43.409795  543705 cpu.go:282] Add success.
I0322 15:52:43.420438  543705 net.go:648] Add success.
I0322 15:52:43.423230  543705 net.go:770] primary dev: ETH0
I0322 15:52:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:52:43.423259  543705 net.go:698] Add success.
I0322 15:52:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:52:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:52:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:52:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:52:53.409776  543705 memory.go:184] no items to output this cycle
I0322 15:52:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 15:53:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:53:03.409794  543705 memory.go:184] no items to output this cycle
I0322 15:53:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 15:53:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:53:13.409794  543705 memory.go:191] Add success.
I0322 15:53:13.409798  543705 cpu.go:282] Add success.
W0322 15:53:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:53:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:53:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:53:13.420093  543705 net.go:648] Add success.
I0322 15:53:13.422962  543705 net.go:770] primary dev: ETH0
I0322 15:53:13.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:53:13.422989  543705 net.go:698] Add success.
I0322 15:53:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:53:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:53:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 15:53:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:53:14.456581  543705 disk_worker.go:494] system disk:vda1
I0322 15:53:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:53:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:53:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:53:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:53:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:53:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:53:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:53:23.409774  543705 memory.go:184] no items to output this cycle
I0322 15:53:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 15:53:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:53:33.409804  543705 memory.go:184] no items to output this cycle
I0322 15:53:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 15:53:35.476073  543705 disk_info.go:125] begin check local disk info of client
I0322 15:53:35.478720  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:53:35.478726  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0fc0 0xc0002a1000]
E0322 15:53:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:53:43.410642  543705 memory.go:191] Add success.
I0322 15:53:43.409815  543705 cpu.go:282] Add success.
I0322 15:53:43.420352  543705 net.go:648] Add success.
I0322 15:53:43.423268  543705 net.go:770] primary dev: ETH0
I0322 15:53:43.423281  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:53:43.423293  543705 net.go:698] Add success.
I0322 15:53:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:53:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:53:53.409782  543705 memory.go:184] no items to output this cycle
I0322 15:53:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 15:54:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:54:03.409766  543705 memory.go:184] no items to output this cycle
I0322 15:54:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 15:54:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:54:13.409786  543705 memory.go:191] Add success.
I0322 15:54:13.409790  543705 cpu.go:282] Add success.
W0322 15:54:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:54:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:54:13.420220  543705 net.go:648] Add success.
I0322 15:54:13.422703  543705 net.go:770] primary dev: ETH0
I0322 15:54:13.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:54:13.422729  543705 net.go:698] Add success.
I0322 15:54:13.468577  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f3c7bd59-16ae-432a-9b95-7e305ceaee95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:54:13.468610  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 15:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:54:14.455336  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:54:14.455350  543705 disk_worker.go:708] disk space is not compliant
W0322 15:54:14.455354  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:54:14.457460  543705 disk_worker.go:494] system disk:vda1
I0322 15:54:14.457503  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:54:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:54:16.458397  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:54:16.458465  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:54:16.458492  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:54:16.472858  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:54:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:54:23.409779  543705 memory.go:184] no items to output this cycle
I0322 15:54:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:54:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:54:33.409785  543705 memory.go:184] no items to output this cycle
I0322 15:54:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 15:54:35.479039  543705 disk_info.go:125] begin check local disk info of client
I0322 15:54:35.481591  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:54:35.481597  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5340 0xc0000c5380]
I0322 15:54:39.707259  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:54:39.707266  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:54:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:54:43.410588  543705 memory.go:191] Add success.
I0322 15:54:43.409803  543705 cpu.go:282] Add success.
I0322 15:54:43.420285  543705 net.go:648] Add success.
I0322 15:54:43.422934  543705 net.go:770] primary dev: ETH0
I0322 15:54:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:54:43.422960  543705 net.go:698] Add success.
I0322 15:54:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:54:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:54:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:54:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:54:53.409782  543705 memory.go:184] no items to output this cycle
I0322 15:54:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 15:55:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:55:03.409762  543705 memory.go:184] no items to output this cycle
I0322 15:55:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 15:55:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:55:13.409795  543705 memory.go:191] Add success.
I0322 15:55:13.409797  543705 cpu.go:282] Add success.
W0322 15:55:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:55:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:55:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:55:13.420109  543705 net.go:648] Add success.
I0322 15:55:13.423207  543705 net.go:770] primary dev: ETH0
I0322 15:55:13.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:55:13.423245  543705 net.go:698] Add success.
I0322 15:55:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:55:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:55:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 15:55:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:55:14.459111  543705 disk_worker.go:494] system disk:vda1
I0322 15:55:14.459140  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:55:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:55:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:55:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:55:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:55:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:55:23.409774  543705 memory.go:184] no items to output this cycle
I0322 15:55:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 15:55:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:55:33.409808  543705 memory.go:184] no items to output this cycle
I0322 15:55:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 15:55:35.482096  543705 disk_info.go:125] begin check local disk info of client
I0322 15:55:35.484712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:55:35.484718  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e00 0xc0000c4e40]
E0322 15:55:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:55:43.410727  543705 memory.go:191] Add success.
I0322 15:55:43.409798  543705 cpu.go:282] Add success.
I0322 15:55:43.420398  543705 net.go:648] Add success.
I0322 15:55:43.423443  543705 net.go:770] primary dev: ETH0
I0322 15:55:43.423460  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:55:43.423475  543705 net.go:698] Add success.
I0322 15:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:55:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:55:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:55:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:55:53.409773  543705 memory.go:184] no items to output this cycle
I0322 15:55:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 15:56:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:56:03.409777  543705 memory.go:184] no items to output this cycle
I0322 15:56:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 15:56:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:56:13.409796  543705 memory.go:191] Add success.
I0322 15:56:13.409796  543705 cpu.go:282] Add success.
W0322 15:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:56:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:56:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:56:13.420304  543705 net.go:648] Add success.
I0322 15:56:13.423139  543705 net.go:770] primary dev: ETH0
I0322 15:56:13.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:56:13.423168  543705 net.go:698] Add success.
I0322 15:56:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:56:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:56:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 15:56:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:56:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 15:56:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:56:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:56:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:56:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:56:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:56:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:56:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:56:23.409780  543705 memory.go:184] no items to output this cycle
I0322 15:56:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 15:56:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:56:33.409788  543705 memory.go:184] no items to output this cycle
I0322 15:56:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 15:56:35.485069  543705 disk_info.go:125] begin check local disk info of client
I0322 15:56:35.487870  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:56:35.487876  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0322 15:56:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:56:43.410708  543705 memory.go:191] Add success.
I0322 15:56:43.409819  543705 cpu.go:282] Add success.
I0322 15:56:43.420376  543705 net.go:648] Add success.
I0322 15:56:43.423227  543705 net.go:770] primary dev: ETH0
I0322 15:56:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:56:43.423252  543705 net.go:698] Add success.
I0322 15:56:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:56:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:56:53.409776  543705 memory.go:184] no items to output this cycle
I0322 15:56:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 15:57:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:57:03.409775  543705 memory.go:184] no items to output this cycle
I0322 15:57:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 15:57:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:57:13.409796  543705 cpu.go:282] Add success.
I0322 15:57:13.409809  543705 memory.go:191] Add success.
W0322 15:57:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:57:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:57:13.412587  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:57:13.420181  543705 net.go:648] Add success.
I0322 15:57:13.428001  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 15:57:13.428073  543705 net.go:770] primary dev: ETH0
I0322 15:57:13.428085  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:57:13.428097  543705 net.go:698] Add success.
I0322 15:57:13.453660  543705 event_worker.go:152] Polling the log file for events...
I0322 15:57:13.469909  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98dd6436-a9a1-4390-a39c-cfbeea72b9aa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 15:57:13.469943  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 15:57:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:57:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 15:57:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 15:57:14.455854  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 15:57:14.455863  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 15:57:14.455869  543705 custom_config.go:64] query custom config with name: gpu
I0322 15:57:14.456856  543705 disk_worker.go:494] system disk:vda1
I0322 15:57:14.456893  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 15:57:15.456531  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 15:57:15.456541  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:57:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 15:57:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 15:57:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:57:16.458001  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:57:16.472344  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:57:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:57:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 15:57:23.409787  543705 memory.go:184] no items to output this cycle
E0322 15:57:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:57:33.409787  543705 memory.go:184] no items to output this cycle
I0322 15:57:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 15:57:35.487957  543705 disk_info.go:125] begin check local disk info of client
I0322 15:57:35.490589  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:57:35.490595  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2780 0xc0002b27c0]
I0322 15:57:39.708244  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 15:57:39.708250  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 15:57:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:57:43.410590  543705 memory.go:191] Add success.
I0322 15:57:43.409812  543705 cpu.go:282] Add success.
I0322 15:57:43.420335  543705 net.go:648] Add success.
I0322 15:57:43.422965  543705 net.go:770] primary dev: ETH0
I0322 15:57:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:57:43.422992  543705 net.go:698] Add success.
I0322 15:57:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:57:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:57:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:57:53.409770  543705 memory.go:184] no items to output this cycle
I0322 15:57:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 15:58:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:58:03.409764  543705 memory.go:184] no items to output this cycle
I0322 15:58:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 15:58:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:58:13.409795  543705 memory.go:191] Add success.
I0322 15:58:13.409795  543705 cpu.go:282] Add success.
W0322 15:58:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:58:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:58:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:58:13.420063  543705 net.go:648] Add success.
I0322 15:58:13.422811  543705 net.go:770] primary dev: ETH0
I0322 15:58:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:58:13.422836  543705 net.go:698] Add success.
I0322 15:58:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:58:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:58:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 15:58:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:58:14.456997  543705 disk_worker.go:494] system disk:vda1
I0322 15:58:14.457101  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:58:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:58:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:58:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:58:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:58:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:58:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:58:23.409796  543705 memory.go:184] no items to output this cycle
I0322 15:58:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 15:58:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:58:33.409797  543705 memory.go:184] no items to output this cycle
I0322 15:58:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 15:58:35.491151  543705 disk_info.go:125] begin check local disk info of client
I0322 15:58:35.493704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:58:35.493711  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd00 0xc0001abd40]
E0322 15:58:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:58:43.410734  543705 memory.go:191] Add success.
I0322 15:58:43.409806  543705 cpu.go:282] Add success.
I0322 15:58:43.420412  543705 net.go:648] Add success.
I0322 15:58:43.423207  543705 net.go:770] primary dev: ETH0
I0322 15:58:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:58:43.423234  543705 net.go:698] Add success.
I0322 15:58:46.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:58:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:58:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:58:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:58:53.409806  543705 memory.go:184] no items to output this cycle
I0322 15:58:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 15:59:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:59:03.409776  543705 memory.go:184] no items to output this cycle
I0322 15:59:03.409814  543705 cpu.go:275] no items to output this cycle
W0322 15:59:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 15:59:13.409749  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 15:59:13.409756  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 15:59:13.409849  543705 cpu.go:282] Add success.
E0322 15:59:13.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:59:13.411833  543705 memory.go:191] Add success.
I0322 15:59:13.420799  543705 net.go:648] Add success.
I0322 15:59:13.423564  543705 net.go:770] primary dev: ETH0
I0322 15:59:13.423582  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:59:13.423603  543705 net.go:698] Add success.
I0322 15:59:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 15:59:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 15:59:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 15:59:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 15:59:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 15:59:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 15:59:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 15:59:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 15:59:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 15:59:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:59:23.409777  543705 memory.go:184] no items to output this cycle
I0322 15:59:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 15:59:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:59:33.409786  543705 memory.go:184] no items to output this cycle
I0322 15:59:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 15:59:35.493791  543705 disk_info.go:125] begin check local disk info of client
I0322 15:59:35.496395  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 15:59:35.496401  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0322 15:59:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:59:43.410807  543705 memory.go:191] Add success.
I0322 15:59:43.409841  543705 cpu.go:282] Add success.
I0322 15:59:43.420514  543705 net.go:648] Add success.
I0322 15:59:43.423241  543705 net.go:770] primary dev: ETH0
I0322 15:59:43.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0322 15:59:43.423267  543705 net.go:698] Add success.
I0322 15:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 15:59:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 15:59:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 15:59:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 15:59:53.409806  543705 memory.go:184] no items to output this cycle
I0322 15:59:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 16:00:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:00:03.409792  543705 memory.go:184] no items to output this cycle
I0322 16:00:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 16:00:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:00:13.409797  543705 cpu.go:282] Add success.
I0322 16:00:13.409800  543705 memory.go:191] Add success.
W0322 16:00:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:00:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:00:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:00:13.420068  543705 net.go:648] Add success.
I0322 16:00:13.422915  543705 net.go:770] primary dev: ETH0
I0322 16:00:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:00:13.422945  543705 net.go:698] Add success.
I0322 16:00:13.468510  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2997e46b-6701-4b6e-a70a-dc305a7bac0b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:00:13.468546  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:00:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:00:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:00:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 16:00:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:00:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 16:00:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:00:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:00:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:00:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:00:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:00:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:00:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:00:23.409802  543705 memory.go:184] no items to output this cycle
I0322 16:00:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 16:00:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:00:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 16:00:33.409799  543705 memory.go:184] no items to output this cycle
I0322 16:00:35.497182  543705 disk_info.go:125] begin check local disk info of client
I0322 16:00:35.499744  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:00:35.499749  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
I0322 16:00:39.708386  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:00:39.708393  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:00:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:00:43.410717  543705 memory.go:191] Add success.
I0322 16:00:43.409807  543705 cpu.go:282] Add success.
I0322 16:00:43.420484  543705 net.go:648] Add success.
I0322 16:00:43.423227  543705 net.go:770] primary dev: ETH0
I0322 16:00:43.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:00:43.423256  543705 net.go:698] Add success.
I0322 16:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:00:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:00:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:00:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:00:53.409780  543705 memory.go:184] no items to output this cycle
I0322 16:00:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 16:01:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:01:03.409798  543705 memory.go:184] no items to output this cycle
I0322 16:01:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 16:01:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:01:13.409779  543705 memory.go:191] Add success.
W0322 16:01:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 16:01:13.409806  543705 cpu.go:282] Add success.
W0322 16:01:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:01:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:01:13.420106  543705 net.go:648] Add success.
I0322 16:01:13.422993  543705 net.go:770] primary dev: ETH0
I0322 16:01:13.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:01:13.423017  543705 net.go:698] Add success.
I0322 16:01:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:01:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:01:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 16:01:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:01:14.456543  543705 disk_worker.go:494] system disk:vda1
I0322 16:01:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:01:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:01:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:01:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:01:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:01:23.409911  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:01:23.409931  543705 memory.go:184] no items to output this cycle
I0322 16:01:23.409972  543705 cpu.go:275] no items to output this cycle
E0322 16:01:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:01:33.409772  543705 memory.go:184] no items to output this cycle
I0322 16:01:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 16:01:35.499835  543705 disk_info.go:125] begin check local disk info of client
I0322 16:01:35.502377  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:01:35.502382  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa40 0xc0001aaa80]
E0322 16:01:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:01:43.410809  543705 memory.go:191] Add success.
I0322 16:01:43.409805  543705 cpu.go:282] Add success.
I0322 16:01:43.420510  543705 net.go:648] Add success.
I0322 16:01:43.423453  543705 net.go:770] primary dev: ETH0
I0322 16:01:43.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:01:43.423482  543705 net.go:698] Add success.
I0322 16:01:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:01:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:01:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:01:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:01:53.409776  543705 memory.go:184] no items to output this cycle
I0322 16:01:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 16:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:02:03.409773  543705 memory.go:184] no items to output this cycle
I0322 16:02:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 16:02:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:02:13.409816  543705 memory.go:191] Add success.
I0322 16:02:13.409824  543705 cpu.go:282] Add success.
W0322 16:02:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:02:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:02:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:02:13.420103  543705 net.go:648] Add success.
I0322 16:02:13.422759  543705 net.go:770] primary dev: ETH0
I0322 16:02:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:02:13.422783  543705 net.go:698] Add success.
W0322 16:02:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:02:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0322 16:02:14.455249  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:02:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:02:14.455919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:02:14.455925  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:02:14.456845  543705 disk_worker.go:494] system disk:vda1
I0322 16:02:14.456873  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:02:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:02:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:02:16.458082  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:02:16.458150  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0322 16:02:16.458148  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:02:16.458172  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:02:16.472540  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:02:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:02:23.409798  543705 memory.go:184] no items to output this cycle
I0322 16:02:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 16:02:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:02:33.409786  543705 memory.go:184] no items to output this cycle
I0322 16:02:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 16:02:35.503205  543705 disk_info.go:125] begin check local disk info of client
I0322 16:02:35.505669  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:02:35.505674  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a03c0 0xc0002a0400]
E0322 16:02:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:02:43.410668  543705 memory.go:191] Add success.
I0322 16:02:43.409808  543705 cpu.go:282] Add success.
I0322 16:02:43.420368  543705 net.go:648] Add success.
I0322 16:02:43.423363  543705 net.go:770] primary dev: ETH0
I0322 16:02:43.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:02:43.423393  543705 net.go:698] Add success.
I0322 16:02:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:02:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:02:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:02:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:02:53.409802  543705 memory.go:184] no items to output this cycle
I0322 16:02:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 16:03:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:03:03.409797  543705 memory.go:184] no items to output this cycle
I0322 16:03:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 16:03:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:03:13.409806  543705 cpu.go:282] Add success.
I0322 16:03:13.409809  543705 memory.go:191] Add success.
W0322 16:03:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:03:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:03:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:03:13.420111  543705 net.go:648] Add success.
I0322 16:03:13.422877  543705 net.go:770] primary dev: ETH0
I0322 16:03:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:03:13.422901  543705 net.go:698] Add success.
I0322 16:03:13.469098  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4b6a4bcc-15e1-446d-8d8e-de39a7b0235c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:03:13.469135  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:03:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:03:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:03:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 16:03:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:03:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 16:03:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:03:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:03:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:03:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:03:23.409808  543705 memory.go:184] no items to output this cycle
I0322 16:03:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 16:03:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:03:33.409790  543705 memory.go:184] no items to output this cycle
I0322 16:03:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 16:03:35.506175  543705 disk_info.go:125] begin check local disk info of client
I0322 16:03:35.508667  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:03:35.508672  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4280 0xc0000c42c0]
I0322 16:03:39.709247  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:03:39.709255  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:03:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:03:43.410707  543705 memory.go:191] Add success.
I0322 16:03:43.409830  543705 cpu.go:282] Add success.
I0322 16:03:43.420470  543705 net.go:648] Add success.
I0322 16:03:43.423034  543705 net.go:770] primary dev: ETH0
I0322 16:03:43.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:03:43.423063  543705 net.go:698] Add success.
I0322 16:03:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:03:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:03:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:03:53.409782  543705 memory.go:184] no items to output this cycle
I0322 16:03:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 16:04:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:04:03.409809  543705 memory.go:184] no items to output this cycle
I0322 16:04:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 16:04:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:04:13.409777  543705 memory.go:191] Add success.
W0322 16:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 16:04:13.409821  543705 cpu.go:282] Add success.
W0322 16:04:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:04:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:04:13.420132  543705 net.go:648] Add success.
I0322 16:04:13.422657  543705 net.go:770] primary dev: ETH0
I0322 16:04:13.422670  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:04:13.422681  543705 net.go:698] Add success.
I0322 16:04:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:04:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:04:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 16:04:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:04:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 16:04:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:04:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:04:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:04:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:04:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:04:16.472527  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:04:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:04:23.409782  543705 memory.go:184] no items to output this cycle
I0322 16:04:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 16:04:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:04:33.409813  543705 memory.go:184] no items to output this cycle
I0322 16:04:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 16:04:35.509178  543705 disk_info.go:125] begin check local disk info of client
I0322 16:04:35.511686  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:04:35.511693  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a18c0 0xc0004a1900]
E0322 16:04:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:04:43.410763  543705 memory.go:191] Add success.
I0322 16:04:43.409833  543705 cpu.go:282] Add success.
I0322 16:04:43.420471  543705 net.go:648] Add success.
I0322 16:04:43.423528  543705 net.go:770] primary dev: ETH0
I0322 16:04:43.423543  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:04:43.423559  543705 net.go:698] Add success.
I0322 16:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:04:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:04:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:04:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:04:53.410400  543705 memory.go:184] no items to output this cycle
I0322 16:04:53.410433  543705 cpu.go:275] no items to output this cycle
E0322 16:05:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:05:03.409774  543705 memory.go:184] no items to output this cycle
I0322 16:05:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 16:05:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:05:13.409786  543705 memory.go:191] Add success.
I0322 16:05:13.409787  543705 cpu.go:282] Add success.
W0322 16:05:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:05:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:05:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:05:13.420068  543705 net.go:648] Add success.
I0322 16:05:13.423113  543705 net.go:770] primary dev: ETH0
I0322 16:05:13.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:05:13.423141  543705 net.go:698] Add success.
I0322 16:05:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:05:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:05:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 16:05:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:05:14.456952  543705 disk_worker.go:494] system disk:vda1
I0322 16:05:14.456999  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:05:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:05:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:05:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:05:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:05:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:05:23.409767  543705 memory.go:184] no items to output this cycle
I0322 16:05:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 16:05:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:05:33.409770  543705 memory.go:184] no items to output this cycle
I0322 16:05:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 16:05:35.511773  543705 disk_info.go:125] begin check local disk info of client
I0322 16:05:35.514326  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:05:35.514332  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af00 0xc00007af40]
E0322 16:05:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:05:43.410702  543705 memory.go:191] Add success.
I0322 16:05:43.409799  543705 cpu.go:282] Add success.
I0322 16:05:43.420371  543705 net.go:648] Add success.
I0322 16:05:43.423072  543705 net.go:770] primary dev: ETH0
I0322 16:05:43.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:05:43.423102  543705 net.go:698] Add success.
I0322 16:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:05:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:05:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:05:53.409761  543705 memory.go:184] no items to output this cycle
I0322 16:05:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 16:06:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:06:03.409775  543705 memory.go:184] no items to output this cycle
I0322 16:06:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 16:06:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:06:13.409789  543705 memory.go:191] Add success.
I0322 16:06:13.409789  543705 cpu.go:282] Add success.
W0322 16:06:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:06:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:06:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:06:13.420121  543705 net.go:648] Add success.
I0322 16:06:13.422961  543705 net.go:770] primary dev: ETH0
I0322 16:06:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:06:13.422984  543705 net.go:698] Add success.
I0322 16:06:13.468269  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ff0eb16e-887a-44ed-af4d-c651b86f6894","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:06:13.468302  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:06:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:06:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:06:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 16:06:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:06:14.456677  543705 disk_worker.go:494] system disk:vda1
I0322 16:06:14.456704  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:06:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:06:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:06:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:06:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:06:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:06:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 16:06:23.409779  543705 memory.go:184] no items to output this cycle
E0322 16:06:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:06:33.409781  543705 memory.go:184] no items to output this cycle
I0322 16:06:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 16:06:35.515263  543705 disk_info.go:125] begin check local disk info of client
I0322 16:06:35.517827  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:06:35.517833  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab140 0xc0001ab180]
I0322 16:06:39.709732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:06:39.709740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:06:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:06:43.410763  543705 memory.go:191] Add success.
I0322 16:06:43.409811  543705 cpu.go:282] Add success.
I0322 16:06:43.420279  543705 net.go:770] primary dev: ETH0
I0322 16:06:43.420292  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:06:43.420304  543705 net.go:698] Add success.
I0322 16:06:43.420658  543705 net.go:648] Add success.
I0322 16:06:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:06:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:06:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:06:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:06:53.409799  543705 memory.go:184] no items to output this cycle
I0322 16:06:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:07:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:07:03.409800  543705 memory.go:184] no items to output this cycle
I0322 16:07:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 16:07:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:07:13.409791  543705 memory.go:191] Add success.
I0322 16:07:13.409795  543705 cpu.go:282] Add success.
W0322 16:07:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:07:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:07:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:07:13.420346  543705 net.go:648] Add success.
I0322 16:07:13.423246  543705 net.go:770] primary dev: ETH0
I0322 16:07:13.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:07:13.423277  543705 net.go:698] Add success.
I0322 16:07:13.452767  543705 event_worker.go:152] Polling the log file for events...
W0322 16:07:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:07:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 16:07:14.455225  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:07:14.455978  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:07:14.455988  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:07:14.455994  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:07:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 16:07:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:07:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:07:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:07:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:07:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:07:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:07:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:07:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:07:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:07:23.409802  543705 memory.go:184] no items to output this cycle
I0322 16:07:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:07:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:07:33.409776  543705 memory.go:184] no items to output this cycle
I0322 16:07:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 16:07:35.517921  543705 disk_info.go:125] begin check local disk info of client
I0322 16:07:35.520478  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:07:35.520486  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa40 0xc0001aaa80]
E0322 16:07:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:07:43.410701  543705 memory.go:191] Add success.
I0322 16:07:43.409810  543705 cpu.go:282] Add success.
I0322 16:07:43.420458  543705 net.go:648] Add success.
I0322 16:07:43.423426  543705 net.go:770] primary dev: ETH0
I0322 16:07:43.423440  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:07:43.423452  543705 net.go:698] Add success.
I0322 16:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:07:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:07:53.410380  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:07:53.410398  543705 memory.go:184] no items to output this cycle
I0322 16:07:53.410403  543705 cpu.go:275] no items to output this cycle
E0322 16:08:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:08:03.409801  543705 memory.go:184] no items to output this cycle
I0322 16:08:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 16:08:13.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:08:13.409872  543705 memory.go:191] Add success.
W0322 16:08:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:08:13.409915  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:08:13.409922  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:08:13.409939  543705 cpu.go:282] Add success.
I0322 16:08:13.419708  543705 net.go:648] Add success.
I0322 16:08:13.422560  543705 net.go:770] primary dev: ETH0
I0322 16:08:13.422573  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:08:13.422585  543705 net.go:698] Add success.
I0322 16:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:08:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:08:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 16:08:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:08:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 16:08:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:08:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:08:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:08:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:08:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:08:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:08:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:08:23.409808  543705 memory.go:184] no items to output this cycle
I0322 16:08:23.409822  543705 cpu.go:275] no items to output this cycle
E0322 16:08:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:08:33.409781  543705 memory.go:184] no items to output this cycle
I0322 16:08:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 16:08:35.521296  543705 disk_info.go:125] begin check local disk info of client
I0322 16:08:35.523785  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:08:35.523792  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b780 0xc00007b7c0]
E0322 16:08:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:08:43.410711  543705 memory.go:191] Add success.
I0322 16:08:43.409809  543705 cpu.go:282] Add success.
I0322 16:08:43.420429  543705 net.go:648] Add success.
I0322 16:08:43.423461  543705 net.go:770] primary dev: ETH0
I0322 16:08:43.423476  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:08:43.423489  543705 net.go:698] Add success.
I0322 16:08:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:08:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:08:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:08:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:08:53.409761  543705 memory.go:184] no items to output this cycle
I0322 16:08:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:09:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:09:03.409766  543705 memory.go:184] no items to output this cycle
I0322 16:09:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 16:09:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:09:13.409786  543705 memory.go:191] Add success.
W0322 16:09:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 16:09:13.409824  543705 cpu.go:282] Add success.
W0322 16:09:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:09:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:09:13.420139  543705 net.go:648] Add success.
I0322 16:09:13.423012  543705 net.go:770] primary dev: ETH0
I0322 16:09:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:09:13.423037  543705 net.go:698] Add success.
I0322 16:09:13.468455  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"14575018-bb59-4f17-994a-be9e11288527","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:09:13.468489  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:09:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:09:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 16:09:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:09:14.456723  543705 disk_worker.go:494] system disk:vda1
I0322 16:09:14.456760  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:09:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:09:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:09:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:09:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:09:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:09:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:09:23.409774  543705 memory.go:184] no items to output this cycle
I0322 16:09:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 16:09:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:09:33.409792  543705 memory.go:184] no items to output this cycle
I0322 16:09:33.409841  543705 cpu.go:275] no items to output this cycle
I0322 16:09:35.524261  543705 disk_info.go:125] begin check local disk info of client
I0322 16:09:35.526770  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:09:35.526776  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464780 0xc0004647c0]
I0322 16:09:39.711259  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:09:39.711266  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:09:43.410593  543705 memory.go:191] Add success.
I0322 16:09:43.409803  543705 cpu.go:282] Add success.
I0322 16:09:43.420323  543705 net.go:648] Add success.
I0322 16:09:43.423141  543705 net.go:770] primary dev: ETH0
I0322 16:09:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:09:43.423170  543705 net.go:698] Add success.
I0322 16:09:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:09:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:09:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:09:53.409799  543705 memory.go:184] no items to output this cycle
I0322 16:09:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 16:10:03.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:10:03.409853  543705 memory.go:184] no items to output this cycle
I0322 16:10:03.409955  543705 cpu.go:275] no items to output this cycle
E0322 16:10:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:10:13.409788  543705 memory.go:191] Add success.
I0322 16:10:13.409790  543705 cpu.go:282] Add success.
W0322 16:10:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:10:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:10:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:10:13.419877  543705 net.go:770] primary dev: ETH0
I0322 16:10:13.419892  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:10:13.419907  543705 net.go:698] Add success.
I0322 16:10:13.420262  543705 net.go:648] Add success.
I0322 16:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:10:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:10:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 16:10:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:10:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 16:10:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:10:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:10:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:10:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:10:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:10:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:10:23.409773  543705 memory.go:184] no items to output this cycle
I0322 16:10:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 16:10:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:10:33.409784  543705 memory.go:184] no items to output this cycle
I0322 16:10:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 16:10:35.526861  543705 disk_info.go:125] begin check local disk info of client
I0322 16:10:35.529413  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:10:35.529420  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002097c0 0xc000209800]
E0322 16:10:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:10:43.410784  543705 memory.go:191] Add success.
I0322 16:10:43.409799  543705 cpu.go:282] Add success.
I0322 16:10:43.420551  543705 net.go:648] Add success.
I0322 16:10:43.423386  543705 net.go:770] primary dev: ETH0
I0322 16:10:43.423401  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:10:43.423414  543705 net.go:698] Add success.
I0322 16:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:10:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:10:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:10:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:10:53.409767  543705 memory.go:184] no items to output this cycle
I0322 16:10:53.409800  543705 cpu.go:275] no items to output this cycle
I0322 16:11:03.409851  543705 cpu.go:275] no items to output this cycle
E0322 16:11:03.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:11:03.409886  543705 memory.go:184] no items to output this cycle
E0322 16:11:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:11:13.409816  543705 memory.go:191] Add success.
I0322 16:11:13.409826  543705 cpu.go:282] Add success.
W0322 16:11:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:11:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:11:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:11:13.420120  543705 net.go:648] Add success.
I0322 16:11:13.423060  543705 net.go:770] primary dev: ETH0
I0322 16:11:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:11:13.423085  543705 net.go:698] Add success.
I0322 16:11:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:11:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:11:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 16:11:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:11:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 16:11:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:11:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:11:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:11:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:11:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:11:23.409778  543705 memory.go:184] no items to output this cycle
I0322 16:11:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 16:11:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:11:33.409788  543705 memory.go:184] no items to output this cycle
I0322 16:11:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 16:11:35.530345  543705 disk_info.go:125] begin check local disk info of client
I0322 16:11:35.532908  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:11:35.532915  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393c80 0xc000393cc0]
E0322 16:11:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:11:43.410697  543705 memory.go:191] Add success.
I0322 16:11:43.409797  543705 cpu.go:282] Add success.
I0322 16:11:43.420412  543705 net.go:648] Add success.
I0322 16:11:43.423395  543705 net.go:770] primary dev: ETH0
I0322 16:11:43.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:11:43.423428  543705 net.go:698] Add success.
I0322 16:11:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:11:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:11:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:11:53.410223  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:11:53.410244  543705 memory.go:184] no items to output this cycle
I0322 16:11:53.410258  543705 cpu.go:275] no items to output this cycle
E0322 16:12:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:12:03.409778  543705 memory.go:184] no items to output this cycle
I0322 16:12:03.409913  543705 cpu.go:275] no items to output this cycle
E0322 16:12:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:12:13.409791  543705 memory.go:191] Add success.
I0322 16:12:13.409808  543705 cpu.go:282] Add success.
W0322 16:12:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:12:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:12:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:12:13.420212  543705 net.go:648] Add success.
I0322 16:12:13.422885  543705 net.go:770] primary dev: ETH0
I0322 16:12:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:12:13.422910  543705 net.go:698] Add success.
I0322 16:12:14.044747  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd5e0d79-44a0-45fb-8e42-435214efb224","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:12:14.044782  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 16:12:14.454963  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:12:14.454980  543705 disk_worker.go:708] disk space is not compliant
W0322 16:12:14.454983  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:12:14.456587  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:12:14.456596  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:12:14.456602  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:12:14.456728  543705 disk_worker.go:494] system disk:vda1
I0322 16:12:14.456760  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:12:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:12:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:12:16.458108  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:12:16.458107  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:12:16.458169  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:12:16.458191  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:12:16.472543  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:12:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:12:23.409790  543705 memory.go:184] no items to output this cycle
I0322 16:12:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 16:12:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:12:33.409800  543705 memory.go:184] no items to output this cycle
I0322 16:12:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 16:12:35.532996  543705 disk_info.go:125] begin check local disk info of client
I0322 16:12:35.535577  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:12:35.535584  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467cc0 0xc000467d00]
I0322 16:12:39.712252  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:12:39.712259  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:12:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:12:43.410775  543705 memory.go:191] Add success.
I0322 16:12:43.409801  543705 cpu.go:282] Add success.
I0322 16:12:43.420440  543705 net.go:648] Add success.
I0322 16:12:43.423358  543705 net.go:770] primary dev: ETH0
I0322 16:12:43.423372  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:12:43.423384  543705 net.go:698] Add success.
I0322 16:12:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:12:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:12:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:12:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:12:53.409790  543705 memory.go:184] no items to output this cycle
I0322 16:12:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 16:13:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:13:03.409817  543705 memory.go:184] no items to output this cycle
I0322 16:13:03.409828  543705 cpu.go:275] no items to output this cycle
E0322 16:13:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:13:13.409784  543705 memory.go:191] Add success.
I0322 16:13:13.409815  543705 cpu.go:282] Add success.
W0322 16:13:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:13:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:13:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:13:13.420276  543705 net.go:648] Add success.
I0322 16:13:13.423101  543705 net.go:770] primary dev: ETH0
I0322 16:13:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:13:13.423124  543705 net.go:698] Add success.
I0322 16:13:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:13:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:13:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 16:13:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:13:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 16:13:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:13:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:13:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:13:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:13:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:13:23.409797  543705 memory.go:184] no items to output this cycle
I0322 16:13:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 16:13:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:13:33.409776  543705 memory.go:184] no items to output this cycle
I0322 16:13:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 16:13:35.536367  543705 disk_info.go:125] begin check local disk info of client
I0322 16:13:35.538944  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:13:35.538950  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377200 0xc000377240]
E0322 16:13:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:13:43.410665  543705 memory.go:191] Add success.
I0322 16:13:43.409815  543705 cpu.go:282] Add success.
I0322 16:13:43.420367  543705 net.go:648] Add success.
I0322 16:13:43.423180  543705 net.go:770] primary dev: ETH0
I0322 16:13:43.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:13:43.423209  543705 net.go:698] Add success.
I0322 16:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:13:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:13:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:13:53.409777  543705 memory.go:184] no items to output this cycle
I0322 16:13:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 16:14:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:14:03.409780  543705 memory.go:184] no items to output this cycle
I0322 16:14:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 16:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:14:13.409791  543705 memory.go:191] Add success.
I0322 16:14:13.409794  543705 cpu.go:282] Add success.
W0322 16:14:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:14:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:14:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:14:13.420169  543705 net.go:648] Add success.
I0322 16:14:13.422711  543705 net.go:770] primary dev: ETH0
I0322 16:14:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:14:13.422741  543705 net.go:698] Add success.
I0322 16:14:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:14:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:14:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 16:14:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:14:14.456533  543705 disk_worker.go:494] system disk:vda1
I0322 16:14:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:14:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:14:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:14:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:14:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:14:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:14:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:14:23.409801  543705 memory.go:184] no items to output this cycle
I0322 16:14:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 16:14:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:14:33.409789  543705 memory.go:184] no items to output this cycle
I0322 16:14:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 16:14:35.539031  543705 disk_info.go:125] begin check local disk info of client
I0322 16:14:35.541672  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:14:35.541679  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a940 0xc00032a980]
E0322 16:14:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:14:43.410684  543705 memory.go:191] Add success.
I0322 16:14:43.409802  543705 cpu.go:282] Add success.
I0322 16:14:43.420373  543705 net.go:648] Add success.
I0322 16:14:43.423086  543705 net.go:770] primary dev: ETH0
I0322 16:14:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:14:43.423112  543705 net.go:698] Add success.
I0322 16:14:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:14:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:14:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:14:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:14:53.409791  543705 memory.go:184] no items to output this cycle
I0322 16:14:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 16:15:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:15:03.409766  543705 memory.go:184] no items to output this cycle
I0322 16:15:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 16:15:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:15:13.409798  543705 memory.go:191] Add success.
I0322 16:15:13.409800  543705 cpu.go:282] Add success.
W0322 16:15:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:15:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:15:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:15:13.420286  543705 net.go:648] Add success.
I0322 16:15:13.423069  543705 net.go:770] primary dev: ETH0
I0322 16:15:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:15:13.423094  543705 net.go:698] Add success.
I0322 16:15:13.468685  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e12fa04a-539e-497d-8350-c2934d1203ad","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:15:13.468716  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:15:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:15:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 16:15:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:15:14.456543  543705 disk_worker.go:494] system disk:vda1
I0322 16:15:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:15:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:15:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:15:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:15:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:15:23.409759  543705 memory.go:184] no items to output this cycle
I0322 16:15:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 16:15:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:15:33.409791  543705 memory.go:184] no items to output this cycle
I0322 16:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 16:15:35.541761  543705 disk_info.go:125] begin check local disk info of client
I0322 16:15:35.544408  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:15:35.544414  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b2c0 0xc00032b300]
I0322 16:15:39.713259  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:15:39.713265  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:15:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:15:43.410688  543705 memory.go:191] Add success.
I0322 16:15:43.409799  543705 cpu.go:282] Add success.
I0322 16:15:43.420372  543705 net.go:648] Add success.
I0322 16:15:43.423181  543705 net.go:770] primary dev: ETH0
I0322 16:15:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:15:43.423208  543705 net.go:698] Add success.
I0322 16:15:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:15:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:15:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:15:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:15:53.409783  543705 memory.go:184] no items to output this cycle
I0322 16:15:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 16:16:03.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:16:03.409878  543705 memory.go:184] no items to output this cycle
I0322 16:16:03.409901  543705 cpu.go:275] no items to output this cycle
E0322 16:16:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:16:13.409817  543705 memory.go:191] Add success.
I0322 16:16:13.409820  543705 cpu.go:282] Add success.
W0322 16:16:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:16:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:16:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:16:13.420128  543705 net.go:648] Add success.
I0322 16:16:13.422650  543705 net.go:770] primary dev: ETH0
I0322 16:16:13.422664  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:16:13.422675  543705 net.go:698] Add success.
I0322 16:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:16:14.455085  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:16:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0322 16:16:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:16:14.456461  543705 disk_worker.go:494] system disk:vda1
I0322 16:16:14.456505  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:16:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:16:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:16:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:16:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:16:16.472561  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:16:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:16:23.409770  543705 memory.go:184] no items to output this cycle
I0322 16:16:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:16:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:16:33.409812  543705 memory.go:184] no items to output this cycle
I0322 16:16:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 16:16:35.545409  543705 disk_info.go:125] begin check local disk info of client
I0322 16:16:35.548007  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:16:35.548014  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035c7c0 0xc00035c800]
E0322 16:16:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:16:43.410656  543705 memory.go:191] Add success.
I0322 16:16:43.409800  543705 cpu.go:282] Add success.
I0322 16:16:43.420366  543705 net.go:648] Add success.
I0322 16:16:43.423001  543705 net.go:770] primary dev: ETH0
I0322 16:16:43.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:16:43.423026  543705 net.go:698] Add success.
I0322 16:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:16:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:16:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:16:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:16:53.409761  543705 memory.go:184] no items to output this cycle
I0322 16:16:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 16:17:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:17:03.409777  543705 memory.go:184] no items to output this cycle
I0322 16:17:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 16:17:13.409961  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:17:13.409992  543705 memory.go:191] Add success.
W0322 16:17:13.410020  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:17:13.410036  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:17:13.410039  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:17:13.410048  543705 cpu.go:282] Add success.
I0322 16:17:13.419731  543705 net.go:648] Add success.
I0322 16:17:13.422460  543705 net.go:770] primary dev: ETH0
I0322 16:17:13.422477  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:17:13.422491  543705 net.go:698] Add success.
I0322 16:17:13.453081  543705 event_worker.go:152] Polling the log file for events...
W0322 16:17:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:17:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 16:17:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:17:14.456790  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:17:14.456801  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:17:14.456808  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:17:14.456856  543705 disk_worker.go:494] system disk:vda1
I0322 16:17:14.456900  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:17:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:17:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:17:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:17:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:17:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:17:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:17:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:17:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:17:23.409801  543705 memory.go:184] no items to output this cycle
I0322 16:17:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 16:17:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:17:33.409774  543705 memory.go:184] no items to output this cycle
I0322 16:17:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 16:17:35.548376  543705 disk_info.go:125] begin check local disk info of client
I0322 16:17:35.550906  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:17:35.550911  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a16c0 0xc0002a1700]
E0322 16:17:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:17:43.410711  543705 memory.go:191] Add success.
I0322 16:17:43.409796  543705 cpu.go:282] Add success.
I0322 16:17:43.420385  543705 net.go:648] Add success.
I0322 16:17:43.423165  543705 net.go:770] primary dev: ETH0
I0322 16:17:43.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:17:43.423192  543705 net.go:698] Add success.
I0322 16:17:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:17:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:17:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:17:53.409784  543705 memory.go:184] no items to output this cycle
I0322 16:17:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 16:18:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:18:03.409802  543705 memory.go:184] no items to output this cycle
I0322 16:18:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 16:18:13.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:18:13.409869  543705 memory.go:191] Add success.
W0322 16:18:13.409921  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:18:13.409944  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:18:13.409949  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:18:13.410027  543705 cpu.go:282] Add success.
I0322 16:18:13.419713  543705 net.go:648] Add success.
I0322 16:18:13.422401  543705 net.go:770] primary dev: ETH0
I0322 16:18:13.422416  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:18:13.422430  543705 net.go:698] Add success.
I0322 16:18:13.468130  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c14b3902-96e7-4b72-9413-9c69e7c17a3f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:18:13.468160  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:18:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:18:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:18:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 16:18:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:18:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 16:18:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:18:16.458019  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:18:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:18:16.458124  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:18:16.472595  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:18:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:18:23.409794  543705 cpu.go:275] no items to output this cycle
I0322 16:18:23.409799  543705 memory.go:184] no items to output this cycle
E0322 16:18:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:18:33.409777  543705 memory.go:184] no items to output this cycle
I0322 16:18:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 16:18:35.551390  543705 disk_info.go:125] begin check local disk info of client
I0322 16:18:35.553939  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:18:35.553945  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5280 0xc0000c52c0]
I0322 16:18:39.713732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:18:39.713738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:18:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:18:43.410746  543705 memory.go:191] Add success.
I0322 16:18:43.409817  543705 cpu.go:282] Add success.
I0322 16:18:43.420429  543705 net.go:648] Add success.
I0322 16:18:43.423117  543705 net.go:770] primary dev: ETH0
I0322 16:18:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:18:43.423146  543705 net.go:698] Add success.
I0322 16:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:18:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:18:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:18:53.409783  543705 cpu.go:275] no items to output this cycle
I0322 16:18:53.409784  543705 memory.go:184] no items to output this cycle
E0322 16:19:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:19:03.409774  543705 memory.go:184] no items to output this cycle
I0322 16:19:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 16:19:13.409916  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:19:13.409939  543705 cpu.go:282] Add success.
I0322 16:19:13.409974  543705 memory.go:191] Add success.
W0322 16:19:13.410001  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:19:13.410017  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:19:13.410020  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:19:13.419718  543705 net.go:648] Add success.
I0322 16:19:13.422499  543705 net.go:770] primary dev: ETH0
I0322 16:19:13.422513  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:19:13.422524  543705 net.go:698] Add success.
W0322 16:19:14.455244  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:19:14.455260  543705 disk_worker.go:708] disk space is not compliant
W0322 16:19:14.455264  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:19:14.455606  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:19:14.457588  543705 disk_worker.go:494] system disk:vda1
I0322 16:19:14.457632  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:19:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:19:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:19:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:19:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:19:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:19:23.409786  543705 memory.go:184] no items to output this cycle
I0322 16:19:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:19:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:19:33.409814  543705 memory.go:184] no items to output this cycle
I0322 16:19:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 16:19:35.554030  543705 disk_info.go:125] begin check local disk info of client
I0322 16:19:35.556565  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:19:35.556572  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0440 0xc0002a0480]
E0322 16:19:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:19:43.410677  543705 memory.go:191] Add success.
I0322 16:19:43.409811  543705 cpu.go:282] Add success.
I0322 16:19:43.420354  543705 net.go:648] Add success.
I0322 16:19:43.423209  543705 net.go:770] primary dev: ETH0
I0322 16:19:43.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:19:43.423248  543705 net.go:698] Add success.
I0322 16:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:19:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:19:53.409802  543705 memory.go:184] no items to output this cycle
I0322 16:19:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 16:20:03.410400  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:20:03.410419  543705 memory.go:184] no items to output this cycle
I0322 16:20:03.410449  543705 cpu.go:275] no items to output this cycle
E0322 16:20:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:20:13.409819  543705 memory.go:191] Add success.
I0322 16:20:13.409822  543705 cpu.go:282] Add success.
W0322 16:20:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:20:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:20:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:20:13.419721  543705 net.go:648] Add success.
I0322 16:20:13.423263  543705 net.go:770] primary dev: ETH0
I0322 16:20:13.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:20:13.423288  543705 net.go:698] Add success.
I0322 16:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:20:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:20:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 16:20:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:20:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 16:20:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:20:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:20:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:20:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:20:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:20:16.472497  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:20:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:20:23.409816  543705 memory.go:184] no items to output this cycle
I0322 16:20:23.409826  543705 cpu.go:275] no items to output this cycle
E0322 16:20:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:20:33.409803  543705 memory.go:184] no items to output this cycle
I0322 16:20:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 16:20:35.557468  543705 disk_info.go:125] begin check local disk info of client
I0322 16:20:35.560045  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:20:35.560050  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a01c0 0xc0002a0200]
E0322 16:20:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:20:43.410716  543705 memory.go:191] Add success.
I0322 16:20:43.409803  543705 cpu.go:282] Add success.
I0322 16:20:43.420430  543705 net.go:648] Add success.
I0322 16:20:43.423143  543705 net.go:770] primary dev: ETH0
I0322 16:20:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:20:43.423168  543705 net.go:698] Add success.
I0322 16:20:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:20:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:20:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:20:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:20:53.409796  543705 memory.go:184] no items to output this cycle
I0322 16:20:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 16:21:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:21:03.409791  543705 memory.go:184] no items to output this cycle
I0322 16:21:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 16:21:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:21:13.409931  543705 memory.go:191] Add success.
I0322 16:21:13.409952  543705 cpu.go:282] Add success.
W0322 16:21:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:21:13.409981  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:21:13.409984  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:21:13.419700  543705 net.go:648] Add success.
I0322 16:21:13.422516  543705 net.go:770] primary dev: ETH0
I0322 16:21:13.422529  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:21:13.422541  543705 net.go:698] Add success.
I0322 16:21:13.468238  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"df976905-8d64-4d6a-9884-75af716c7e16","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:21:13.468269  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:21:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:21:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:21:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 16:21:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:21:14.457881  543705 disk_worker.go:494] system disk:vda1
I0322 16:21:14.457911  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:21:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:21:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:21:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:21:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:21:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:21:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:21:23.409800  543705 memory.go:184] no items to output this cycle
I0322 16:21:23.409823  543705 cpu.go:275] no items to output this cycle
E0322 16:21:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:21:33.409791  543705 memory.go:184] no items to output this cycle
I0322 16:21:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 16:21:35.560132  543705 disk_info.go:125] begin check local disk info of client
I0322 16:21:35.562685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:21:35.562691  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab880 0xc0001ab8c0]
I0322 16:21:39.715262  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:21:39.715269  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:21:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:21:43.410675  543705 memory.go:191] Add success.
I0322 16:21:43.409797  543705 cpu.go:282] Add success.
I0322 16:21:43.420364  543705 net.go:648] Add success.
I0322 16:21:43.422928  543705 net.go:770] primary dev: ETH0
I0322 16:21:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:21:43.422953  543705 net.go:698] Add success.
I0322 16:21:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:21:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:21:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:21:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:21:53.409774  543705 memory.go:184] no items to output this cycle
I0322 16:21:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 16:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:22:03.409777  543705 memory.go:184] no items to output this cycle
I0322 16:22:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 16:22:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:22:13.409787  543705 cpu.go:282] Add success.
I0322 16:22:13.409793  543705 memory.go:191] Add success.
W0322 16:22:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:22:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:22:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:22:13.420341  543705 net.go:648] Add success.
I0322 16:22:13.423440  543705 net.go:770] primary dev: ETH0
I0322 16:22:13.423455  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:22:13.423468  543705 net.go:698] Add success.
W0322 16:22:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:22:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 16:22:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:22:14.455881  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:22:14.455890  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:22:14.455896  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:22:14.456543  543705 disk_worker.go:494] system disk:vda1
I0322 16:22:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:22:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:22:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:22:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:22:16.457983  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:22:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:22:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:22:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:22:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:22:23.409789  543705 cpu.go:275] no items to output this cycle
I0322 16:22:23.409795  543705 memory.go:184] no items to output this cycle
E0322 16:22:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:22:33.409783  543705 memory.go:184] no items to output this cycle
I0322 16:22:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 16:22:35.563459  543705 disk_info.go:125] begin check local disk info of client
I0322 16:22:35.565980  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:22:35.565986  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1e00 0xc0002a1e40]
E0322 16:22:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:22:43.410732  543705 memory.go:191] Add success.
I0322 16:22:43.409811  543705 cpu.go:282] Add success.
I0322 16:22:43.420442  543705 net.go:648] Add success.
I0322 16:22:43.423179  543705 net.go:770] primary dev: ETH0
I0322 16:22:43.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:22:43.423204  543705 net.go:698] Add success.
I0322 16:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:22:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:22:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:22:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:22:53.409783  543705 cpu.go:275] no items to output this cycle
I0322 16:22:53.409786  543705 memory.go:184] no items to output this cycle
E0322 16:23:03.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:23:03.409866  543705 memory.go:184] no items to output this cycle
I0322 16:23:03.409938  543705 cpu.go:275] no items to output this cycle
E0322 16:23:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:23:13.409813  543705 memory.go:191] Add success.
I0322 16:23:13.409826  543705 cpu.go:282] Add success.
W0322 16:23:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:23:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:23:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:23:13.420182  543705 net.go:648] Add success.
I0322 16:23:13.422915  543705 net.go:770] primary dev: ETH0
I0322 16:23:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:23:13.422939  543705 net.go:698] Add success.
I0322 16:23:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:23:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:23:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 16:23:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:23:14.456502  543705 disk_worker.go:494] system disk:vda1
I0322 16:23:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:23:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:23:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:23:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:23:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:23:16.472562  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:23:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:23:23.409776  543705 memory.go:184] no items to output this cycle
I0322 16:23:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 16:23:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:23:33.409790  543705 memory.go:184] no items to output this cycle
I0322 16:23:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 16:23:35.566478  543705 disk_info.go:125] begin check local disk info of client
I0322 16:23:35.569007  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:23:35.569013  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003426c0 0xc000342700]
E0322 16:23:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:23:43.410630  543705 memory.go:191] Add success.
I0322 16:23:43.409819  543705 cpu.go:282] Add success.
I0322 16:23:43.420385  543705 net.go:648] Add success.
I0322 16:23:43.423154  543705 net.go:770] primary dev: ETH0
I0322 16:23:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:23:43.423182  543705 net.go:698] Add success.
I0322 16:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:23:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:23:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:23:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:23:53.409779  543705 memory.go:184] no items to output this cycle
I0322 16:23:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 16:24:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:24:03.409778  543705 memory.go:184] no items to output this cycle
I0322 16:24:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 16:24:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:24:13.409820  543705 memory.go:191] Add success.
I0322 16:24:13.409823  543705 cpu.go:282] Add success.
W0322 16:24:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:24:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:24:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:24:13.420227  543705 net.go:648] Add success.
I0322 16:24:13.423083  543705 net.go:770] primary dev: ETH0
I0322 16:24:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:24:13.423110  543705 net.go:698] Add success.
I0322 16:24:13.464592  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"33e5cd56-0256-453f-8389-5ec7ccdf4455","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:24:13.464627  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:24:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:24:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:24:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 16:24:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:24:14.456772  543705 disk_worker.go:494] system disk:vda1
I0322 16:24:14.456802  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:24:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:24:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:24:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:24:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:24:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:24:23.409812  543705 memory.go:184] no items to output this cycle
I0322 16:24:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 16:24:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:24:33.409796  543705 memory.go:184] no items to output this cycle
I0322 16:24:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 16:24:35.569095  543705 disk_info.go:125] begin check local disk info of client
I0322 16:24:35.571707  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:24:35.571714  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b73c0 0xc0003b7400]
I0322 16:24:39.716272  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:24:39.716280  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:24:43.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:24:43.410959  543705 memory.go:191] Add success.
I0322 16:24:43.409947  543705 cpu.go:282] Add success.
I0322 16:24:43.419713  543705 net.go:648] Add success.
I0322 16:24:43.422504  543705 net.go:770] primary dev: ETH0
I0322 16:24:43.422517  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:24:43.422528  543705 net.go:698] Add success.
I0322 16:24:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:24:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:24:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:24:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:24:53.409775  543705 memory.go:184] no items to output this cycle
I0322 16:24:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 16:25:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:25:03.409787  543705 cpu.go:275] no items to output this cycle
I0322 16:25:03.409796  543705 memory.go:184] no items to output this cycle
E0322 16:25:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:25:13.409785  543705 memory.go:191] Add success.
I0322 16:25:13.409808  543705 cpu.go:282] Add success.
W0322 16:25:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:25:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:25:13.420084  543705 net.go:648] Add success.
I0322 16:25:13.423036  543705 net.go:770] primary dev: ETH0
I0322 16:25:13.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:25:13.423062  543705 net.go:698] Add success.
I0322 16:25:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:25:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:25:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0322 16:25:14.455150  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:25:14.456472  543705 disk_worker.go:494] system disk:vda1
I0322 16:25:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:25:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:25:16.458088  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:25:16.458155  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:25:16.458181  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:25:16.472592  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:25:23.409780  543705 memory.go:184] no items to output this cycle
I0322 16:25:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 16:25:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:25:33.409913  543705 memory.go:184] no items to output this cycle
I0322 16:25:33.410049  543705 cpu.go:275] no items to output this cycle
I0322 16:25:35.572504  543705 disk_info.go:125] begin check local disk info of client
I0322 16:25:35.575035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:25:35.575041  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e80 0xc0000c4ec0]
E0322 16:25:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:25:43.410780  543705 memory.go:191] Add success.
I0322 16:25:43.409819  543705 cpu.go:282] Add success.
I0322 16:25:43.420473  543705 net.go:648] Add success.
I0322 16:25:43.423360  543705 net.go:770] primary dev: ETH0
I0322 16:25:43.423373  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:25:43.423384  543705 net.go:698] Add success.
I0322 16:25:46.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:25:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:25:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:25:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:25:53.409775  543705 memory.go:184] no items to output this cycle
I0322 16:25:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 16:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:26:03.409770  543705 memory.go:184] no items to output this cycle
I0322 16:26:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:26:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:26:13.409809  543705 memory.go:191] Add success.
I0322 16:26:13.409818  543705 cpu.go:282] Add success.
W0322 16:26:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:26:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:26:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:26:13.420037  543705 net.go:648] Add success.
I0322 16:26:13.423593  543705 net.go:770] primary dev: ETH0
I0322 16:26:13.423605  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:26:13.423617  543705 net.go:698] Add success.
I0322 16:26:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:26:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:26:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 16:26:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:26:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 16:26:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:26:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:26:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:26:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:26:16.472439  543705 disk_local_worker.go:436] Get disk info: []
I0322 16:26:23.409926  543705 cpu.go:275] no items to output this cycle
E0322 16:26:23.410013  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:26:23.410030  543705 memory.go:184] no items to output this cycle
E0322 16:26:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:26:33.409821  543705 memory.go:184] no items to output this cycle
I0322 16:26:33.409831  543705 cpu.go:275] no items to output this cycle
I0322 16:26:35.575124  543705 disk_info.go:125] begin check local disk info of client
I0322 16:26:35.577683  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:26:35.577689  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500c00 0xc000500c40]
E0322 16:26:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:26:43.410515  543705 memory.go:191] Add success.
I0322 16:26:43.409822  543705 cpu.go:282] Add success.
I0322 16:26:43.420211  543705 net.go:648] Add success.
I0322 16:26:43.422853  543705 net.go:770] primary dev: ETH0
I0322 16:26:43.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:26:43.422878  543705 net.go:698] Add success.
I0322 16:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:26:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:26:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:26:53.409780  543705 memory.go:184] no items to output this cycle
I0322 16:26:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 16:27:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:27:03.409775  543705 memory.go:184] no items to output this cycle
I0322 16:27:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 16:27:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:27:13.409807  543705 memory.go:191] Add success.
I0322 16:27:13.409815  543705 cpu.go:282] Add success.
W0322 16:27:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:27:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:27:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:27:13.420111  543705 net.go:648] Add success.
I0322 16:27:13.422910  543705 net.go:770] primary dev: ETH0
I0322 16:27:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:27:13.422935  543705 net.go:698] Add success.
I0322 16:27:13.429011  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 16:27:13.453208  543705 event_worker.go:152] Polling the log file for events...
I0322 16:27:13.470399  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"40dc55d5-cdd3-46b7-953b-31312a81a041","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:27:13.470433  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 16:27:14.455245  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:27:14.455260  543705 disk_worker.go:708] disk space is not compliant
W0322 16:27:14.455264  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:27:14.455874  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:27:14.455883  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:27:14.455888  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:27:14.456817  543705 disk_worker.go:494] system disk:vda1
I0322 16:27:14.456859  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:27:15.456910  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:27:15.456923  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:27:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:27:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:27:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:27:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:27:16.472496  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:27:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:27:23.409800  543705 memory.go:184] no items to output this cycle
I0322 16:27:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:27:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:27:33.409804  543705 memory.go:184] no items to output this cycle
I0322 16:27:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 16:27:35.578530  543705 disk_info.go:125] begin check local disk info of client
I0322 16:27:35.581072  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:27:35.581078  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001dec00 0xc0001dec40]
I0322 16:27:39.717283  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:27:39.717288  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:27:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:27:43.410703  543705 memory.go:191] Add success.
I0322 16:27:43.409804  543705 cpu.go:282] Add success.
I0322 16:27:43.420457  543705 net.go:648] Add success.
I0322 16:27:43.423205  543705 net.go:770] primary dev: ETH0
I0322 16:27:43.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:27:43.423235  543705 net.go:698] Add success.
I0322 16:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:27:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:27:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:27:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:27:53.409777  543705 memory.go:184] no items to output this cycle
I0322 16:27:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 16:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:28:03.409797  543705 memory.go:184] no items to output this cycle
I0322 16:28:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 16:28:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:28:13.409784  543705 memory.go:191] Add success.
I0322 16:28:13.409805  543705 cpu.go:282] Add success.
W0322 16:28:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:28:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:28:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:28:13.420135  543705 net.go:648] Add success.
I0322 16:28:13.422798  543705 net.go:770] primary dev: ETH0
I0322 16:28:13.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:28:13.422823  543705 net.go:698] Add success.
I0322 16:28:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:28:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:28:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 16:28:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:28:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 16:28:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:28:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:28:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:28:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:28:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:28:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:28:23.409777  543705 memory.go:184] no items to output this cycle
I0322 16:28:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 16:28:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:28:33.409799  543705 memory.go:184] no items to output this cycle
I0322 16:28:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 16:28:35.581163  543705 disk_info.go:125] begin check local disk info of client
I0322 16:28:35.583797  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:28:35.583804  543705 disk_info.go:196] parse disk info done, disk is : [0xc000501b80 0xc000501bc0]
E0322 16:28:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:28:43.410861  543705 memory.go:191] Add success.
I0322 16:28:43.409826  543705 cpu.go:282] Add success.
I0322 16:28:43.420564  543705 net.go:648] Add success.
I0322 16:28:43.423196  543705 net.go:770] primary dev: ETH0
I0322 16:28:43.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:28:43.423221  543705 net.go:698] Add success.
I0322 16:28:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:28:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:28:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:28:53.410328  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:28:53.410342  543705 memory.go:184] no items to output this cycle
I0322 16:28:53.410374  543705 cpu.go:275] no items to output this cycle
E0322 16:29:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:29:03.409805  543705 memory.go:184] no items to output this cycle
I0322 16:29:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 16:29:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:29:13.409800  543705 memory.go:191] Add success.
I0322 16:29:13.409800  543705 cpu.go:282] Add success.
W0322 16:29:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:29:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:29:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:29:13.420100  543705 net.go:648] Add success.
I0322 16:29:13.423323  543705 net.go:770] primary dev: ETH0
I0322 16:29:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:29:13.423353  543705 net.go:698] Add success.
I0322 16:29:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:29:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:29:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 16:29:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:29:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 16:29:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:29:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:29:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:29:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:29:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:29:16.472517  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:29:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:29:23.409790  543705 memory.go:184] no items to output this cycle
I0322 16:29:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 16:29:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:29:33.409824  543705 memory.go:184] no items to output this cycle
I0322 16:29:33.409838  543705 cpu.go:275] no items to output this cycle
I0322 16:29:35.583886  543705 disk_info.go:125] begin check local disk info of client
I0322 16:29:35.586454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:29:35.586460  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abe00 0xc0001abe40]
E0322 16:29:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:29:43.410967  543705 memory.go:191] Add success.
I0322 16:29:43.409831  543705 cpu.go:282] Add success.
I0322 16:29:43.420623  543705 net.go:648] Add success.
I0322 16:29:43.423582  543705 net.go:770] primary dev: ETH0
I0322 16:29:43.423596  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:29:43.423608  543705 net.go:698] Add success.
I0322 16:29:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:29:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:29:53.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:29:53.409757  543705 memory.go:184] no items to output this cycle
I0322 16:29:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 16:30:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:30:03.409769  543705 memory.go:184] no items to output this cycle
I0322 16:30:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 16:30:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:30:13.409782  543705 memory.go:191] Add success.
I0322 16:30:13.409807  543705 cpu.go:282] Add success.
W0322 16:30:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:30:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:30:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:30:13.420175  543705 net.go:648] Add success.
I0322 16:30:13.423068  543705 net.go:770] primary dev: ETH0
I0322 16:30:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:30:13.423096  543705 net.go:698] Add success.
I0322 16:30:13.464303  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ec015ca0-f97a-4cbe-b24f-218a3815d756","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:30:13.464336  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:30:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:30:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:30:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 16:30:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:30:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 16:30:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:30:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:30:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:30:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:30:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:30:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:30:23.409812  543705 memory.go:184] no items to output this cycle
I0322 16:30:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 16:30:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:30:33.409798  543705 memory.go:184] no items to output this cycle
I0322 16:30:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 16:30:35.586542  543705 disk_info.go:125] begin check local disk info of client
I0322 16:30:35.589112  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:30:35.589118  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396580 0xc0003965c0]
I0322 16:30:39.717724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:30:39.717730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:30:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:30:43.410790  543705 memory.go:191] Add success.
I0322 16:30:43.409810  543705 cpu.go:282] Add success.
I0322 16:30:43.420528  543705 net.go:648] Add success.
I0322 16:30:43.423299  543705 net.go:770] primary dev: ETH0
I0322 16:30:43.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:30:43.423329  543705 net.go:698] Add success.
I0322 16:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:30:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:30:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:30:53.409769  543705 memory.go:184] no items to output this cycle
I0322 16:30:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 16:31:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:31:03.409784  543705 memory.go:184] no items to output this cycle
I0322 16:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 16:31:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:31:13.409796  543705 memory.go:191] Add success.
I0322 16:31:13.409813  543705 cpu.go:282] Add success.
W0322 16:31:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:31:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:31:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:31:13.420384  543705 net.go:648] Add success.
I0322 16:31:13.423216  543705 net.go:770] primary dev: ETH0
I0322 16:31:13.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:31:13.423243  543705 net.go:698] Add success.
I0322 16:31:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:31:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:31:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 16:31:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:31:14.456850  543705 disk_worker.go:494] system disk:vda1
I0322 16:31:14.456879  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:31:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:31:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:31:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:31:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:31:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:31:23.409803  543705 memory.go:184] no items to output this cycle
I0322 16:31:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 16:31:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:31:33.409783  543705 memory.go:184] no items to output this cycle
I0322 16:31:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 16:31:35.589200  543705 disk_info.go:125] begin check local disk info of client
I0322 16:31:35.591748  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:31:35.591754  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329c80 0xc000329cc0]
E0322 16:31:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:31:43.410734  543705 memory.go:191] Add success.
I0322 16:31:43.409798  543705 cpu.go:282] Add success.
I0322 16:31:43.420428  543705 net.go:648] Add success.
I0322 16:31:43.423576  543705 net.go:770] primary dev: ETH0
I0322 16:31:43.423591  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:31:43.423606  543705 net.go:698] Add success.
I0322 16:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:31:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:31:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:31:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:31:53.410388  543705 memory.go:184] no items to output this cycle
I0322 16:31:53.410400  543705 cpu.go:275] no items to output this cycle
E0322 16:32:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:32:03.409796  543705 memory.go:184] no items to output this cycle
I0322 16:32:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 16:32:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:32:13.409815  543705 memory.go:191] Add success.
I0322 16:32:13.409817  543705 cpu.go:282] Add success.
W0322 16:32:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:32:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:32:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:32:13.420221  543705 net.go:648] Add success.
I0322 16:32:13.422805  543705 net.go:770] primary dev: ETH0
I0322 16:32:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:32:13.422830  543705 net.go:698] Add success.
W0322 16:32:14.455373  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:32:14.455464  543705 disk_worker.go:708] disk space is not compliant
W0322 16:32:14.455468  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:32:14.456631  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:32:14.456640  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:32:14.456646  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:32:14.457453  543705 disk_worker.go:494] system disk:vda1
I0322 16:32:14.457491  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:32:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:32:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:32:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:32:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:32:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:32:16.457982  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:32:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:32:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:32:23.409813  543705 memory.go:184] no items to output this cycle
I0322 16:32:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 16:32:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:32:33.409787  543705 memory.go:184] no items to output this cycle
I0322 16:32:33.409867  543705 cpu.go:275] no items to output this cycle
I0322 16:32:35.591836  543705 disk_info.go:125] begin check local disk info of client
I0322 16:32:35.594425  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:32:35.594432  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2d00 0xc0002b2d40]
E0322 16:32:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:32:43.410644  543705 memory.go:191] Add success.
I0322 16:32:43.409809  543705 cpu.go:282] Add success.
I0322 16:32:43.420321  543705 net.go:648] Add success.
I0322 16:32:43.423008  543705 net.go:770] primary dev: ETH0
I0322 16:32:43.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:32:43.423035  543705 net.go:698] Add success.
I0322 16:32:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:32:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:32:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:32:53.410380  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:32:53.410395  543705 memory.go:184] no items to output this cycle
I0322 16:32:53.410399  543705 cpu.go:275] no items to output this cycle
E0322 16:33:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:33:03.409807  543705 memory.go:184] no items to output this cycle
I0322 16:33:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 16:33:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:33:13.409796  543705 memory.go:191] Add success.
I0322 16:33:13.409797  543705 cpu.go:282] Add success.
W0322 16:33:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:33:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:33:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:33:13.420440  543705 net.go:648] Add success.
I0322 16:33:13.423353  543705 net.go:770] primary dev: ETH0
I0322 16:33:13.423366  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:33:13.423377  543705 net.go:698] Add success.
I0322 16:33:13.463424  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"43da65a2-bbf7-47f1-a28a-91b76fd07db5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:33:13.463456  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:33:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:33:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:33:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 16:33:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:33:14.456652  543705 disk_worker.go:494] system disk:vda1
I0322 16:33:14.456682  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:33:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:33:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:33:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:33:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:33:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:33:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:33:23.409802  543705 memory.go:184] no items to output this cycle
I0322 16:33:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 16:33:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:33:33.409780  543705 memory.go:184] no items to output this cycle
I0322 16:33:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 16:33:35.594515  543705 disk_info.go:125] begin check local disk info of client
I0322 16:33:35.597091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:33:35.597097  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499b40 0xc000499b80]
I0322 16:33:39.719288  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:33:39.719294  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:33:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:33:43.410903  543705 memory.go:191] Add success.
I0322 16:33:43.409811  543705 cpu.go:282] Add success.
I0322 16:33:43.420594  543705 net.go:648] Add success.
I0322 16:33:43.423308  543705 net.go:770] primary dev: ETH0
I0322 16:33:43.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:33:43.423337  543705 net.go:698] Add success.
I0322 16:33:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:33:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:33:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:33:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:33:53.409801  543705 memory.go:184] no items to output this cycle
I0322 16:33:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 16:34:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:34:03.409770  543705 memory.go:184] no items to output this cycle
I0322 16:34:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 16:34:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:34:13.409789  543705 memory.go:191] Add success.
I0322 16:34:13.409808  543705 cpu.go:282] Add success.
W0322 16:34:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:34:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:34:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:34:13.419748  543705 net.go:648] Add success.
I0322 16:34:13.422658  543705 net.go:770] primary dev: ETH0
I0322 16:34:13.422673  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:34:13.422687  543705 net.go:698] Add success.
I0322 16:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:34:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:34:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 16:34:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:34:14.456570  543705 disk_worker.go:494] system disk:vda1
I0322 16:34:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:34:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:34:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:34:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:34:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:34:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:34:23.409787  543705 memory.go:184] no items to output this cycle
I0322 16:34:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 16:34:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:34:33.409810  543705 memory.go:184] no items to output this cycle
I0322 16:34:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 16:34:35.597182  543705 disk_info.go:125] begin check local disk info of client
I0322 16:34:35.599798  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:34:35.599804  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0322 16:34:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:34:43.410747  543705 memory.go:191] Add success.
I0322 16:34:43.409808  543705 cpu.go:282] Add success.
I0322 16:34:43.420438  543705 net.go:648] Add success.
I0322 16:34:43.422933  543705 net.go:770] primary dev: ETH0
I0322 16:34:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:34:43.422959  543705 net.go:698] Add success.
I0322 16:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:34:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:34:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:34:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:34:53.410275  543705 memory.go:184] no items to output this cycle
I0322 16:34:53.410284  543705 cpu.go:275] no items to output this cycle
E0322 16:35:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:35:03.409789  543705 memory.go:184] no items to output this cycle
I0322 16:35:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 16:35:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:35:13.409824  543705 memory.go:191] Add success.
I0322 16:35:13.409827  543705 cpu.go:282] Add success.
W0322 16:35:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:35:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:35:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:35:13.419713  543705 net.go:648] Add success.
I0322 16:35:13.422850  543705 net.go:770] primary dev: ETH0
I0322 16:35:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:35:13.422875  543705 net.go:698] Add success.
I0322 16:35:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:35:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:35:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 16:35:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:35:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 16:35:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:35:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:35:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:35:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:35:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:35:23.409776  543705 memory.go:184] no items to output this cycle
I0322 16:35:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:35:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:35:33.409813  543705 memory.go:184] no items to output this cycle
I0322 16:35:33.409864  543705 cpu.go:275] no items to output this cycle
I0322 16:35:35.599888  543705 disk_info.go:125] begin check local disk info of client
I0322 16:35:35.602477  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:35:35.602484  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f5140 0xc0003f5180]
E0322 16:35:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:35:43.410753  543705 memory.go:191] Add success.
I0322 16:35:43.409792  543705 cpu.go:282] Add success.
I0322 16:35:43.420461  543705 net.go:648] Add success.
I0322 16:35:43.423197  543705 net.go:770] primary dev: ETH0
I0322 16:35:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:35:43.423223  543705 net.go:698] Add success.
I0322 16:35:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:35:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:35:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:35:53.410367  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:35:53.410374  543705 cpu.go:275] no items to output this cycle
I0322 16:35:53.410381  543705 memory.go:184] no items to output this cycle
E0322 16:36:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:36:03.409795  543705 memory.go:184] no items to output this cycle
I0322 16:36:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:36:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:36:13.409793  543705 cpu.go:282] Add success.
I0322 16:36:13.409799  543705 memory.go:191] Add success.
W0322 16:36:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:36:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:36:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:36:13.420260  543705 net.go:648] Add success.
I0322 16:36:13.423049  543705 net.go:770] primary dev: ETH0
I0322 16:36:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:36:13.423074  543705 net.go:698] Add success.
I0322 16:36:13.468768  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b8388b68-d545-467e-ae31-4f2c57e08867","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:36:13.468799  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:36:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:36:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:36:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 16:36:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:36:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 16:36:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:36:15.455604  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:36:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:36:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:36:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:36:23.409775  543705 memory.go:184] no items to output this cycle
I0322 16:36:23.409805  543705 cpu.go:275] no items to output this cycle
I0322 16:36:33.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:36:33.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:36:33.409829  543705 memory.go:184] no items to output this cycle
I0322 16:36:35.602659  543705 disk_info.go:125] begin check local disk info of client
I0322 16:36:35.605195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:36:35.605202  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509e40 0xc000509e80]
I0322 16:36:39.720297  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:36:39.720304  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:36:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:36:43.410744  543705 memory.go:191] Add success.
I0322 16:36:43.409820  543705 cpu.go:282] Add success.
I0322 16:36:43.420469  543705 net.go:648] Add success.
I0322 16:36:43.423130  543705 net.go:770] primary dev: ETH0
I0322 16:36:43.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:36:43.423156  543705 net.go:698] Add success.
I0322 16:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:36:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:36:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:36:53.409800  543705 memory.go:184] no items to output this cycle
I0322 16:36:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 16:37:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:37:03.409785  543705 memory.go:184] no items to output this cycle
I0322 16:37:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 16:37:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:37:13.409800  543705 memory.go:191] Add success.
I0322 16:37:13.409799  543705 cpu.go:282] Add success.
W0322 16:37:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:37:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:37:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:37:13.419715  543705 net.go:648] Add success.
I0322 16:37:13.422568  543705 net.go:770] primary dev: ETH0
I0322 16:37:13.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:37:13.422592  543705 net.go:698] Add success.
I0322 16:37:13.453105  543705 event_worker.go:152] Polling the log file for events...
W0322 16:37:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:37:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 16:37:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:37:14.456775  543705 disk_worker.go:494] system disk:vda1
I0322 16:37:14.456815  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:37:14.457127  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:37:14.457135  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:37:14.457140  543705 custom_config.go:64] query custom config with name: gpu
E0322 16:37:15.456892  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:37:15.456905  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:37:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:37:16.457973  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:37:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:37:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:37:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:37:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:37:23.409776  543705 memory.go:184] no items to output this cycle
I0322 16:37:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 16:37:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:37:33.409782  543705 memory.go:184] no items to output this cycle
I0322 16:37:33.409836  543705 cpu.go:275] no items to output this cycle
I0322 16:37:35.605288  543705 disk_info.go:125] begin check local disk info of client
I0322 16:37:35.607913  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:37:35.607920  543705 disk_info.go:196] parse disk info done, disk is : [0xc000279c80 0xc000279cc0]
E0322 16:37:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:37:43.410692  543705 memory.go:191] Add success.
I0322 16:37:43.409819  543705 cpu.go:282] Add success.
I0322 16:37:43.420388  543705 net.go:648] Add success.
I0322 16:37:43.423039  543705 net.go:770] primary dev: ETH0
I0322 16:37:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:37:43.423068  543705 net.go:698] Add success.
I0322 16:37:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:37:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:37:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:37:53.409776  543705 memory.go:184] no items to output this cycle
I0322 16:37:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 16:38:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:38:03.409772  543705 memory.go:184] no items to output this cycle
I0322 16:38:03.409776  543705 cpu.go:275] no items to output this cycle
E0322 16:38:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:38:13.409787  543705 memory.go:191] Add success.
I0322 16:38:13.409809  543705 cpu.go:282] Add success.
W0322 16:38:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:38:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:38:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:38:13.419753  543705 net.go:648] Add success.
I0322 16:38:13.422426  543705 net.go:770] primary dev: ETH0
I0322 16:38:13.422440  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:38:13.422454  543705 net.go:698] Add success.
I0322 16:38:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:38:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:38:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 16:38:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:38:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 16:38:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:38:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:38:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:38:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:38:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:38:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:38:23.409777  543705 memory.go:184] no items to output this cycle
I0322 16:38:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:38:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:38:33.409768  543705 memory.go:184] no items to output this cycle
I0322 16:38:33.409855  543705 cpu.go:275] no items to output this cycle
I0322 16:38:35.608007  543705 disk_info.go:125] begin check local disk info of client
I0322 16:38:35.610645  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:38:35.610652  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad00 0xc0001aad40]
E0322 16:38:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:38:43.410768  543705 memory.go:191] Add success.
I0322 16:38:43.409812  543705 cpu.go:282] Add success.
I0322 16:38:43.420468  543705 net.go:648] Add success.
I0322 16:38:43.423106  543705 net.go:770] primary dev: ETH0
I0322 16:38:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:38:43.423135  543705 net.go:698] Add success.
I0322 16:38:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:38:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:38:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:38:53.410185  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:38:53.410202  543705 memory.go:184] no items to output this cycle
I0322 16:38:53.410230  543705 cpu.go:275] no items to output this cycle
E0322 16:39:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:39:03.409793  543705 memory.go:184] no items to output this cycle
I0322 16:39:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 16:39:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:39:13.409904  543705 cpu.go:282] Add success.
I0322 16:39:13.409917  543705 memory.go:191] Add success.
W0322 16:39:13.409948  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:39:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:39:13.409968  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:39:13.419721  543705 net.go:648] Add success.
I0322 16:39:13.422757  543705 net.go:770] primary dev: ETH0
I0322 16:39:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:39:13.422782  543705 net.go:698] Add success.
I0322 16:39:13.468605  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9ee220aa-609a-4fb7-aa30-e8b29188351d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:39:13.468635  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:39:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:39:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:39:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 16:39:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:39:14.456671  543705 disk_worker.go:494] system disk:vda1
I0322 16:39:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:39:15.455991  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:39:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:39:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:39:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:39:16.472509  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:39:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:39:23.409807  543705 memory.go:184] no items to output this cycle
I0322 16:39:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 16:39:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:39:33.409774  543705 memory.go:184] no items to output this cycle
I0322 16:39:33.409860  543705 cpu.go:275] no items to output this cycle
I0322 16:39:35.610688  543705 disk_info.go:125] begin check local disk info of client
I0322 16:39:35.613242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:39:35.613257  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500d00 0xc000500d40]
I0322 16:39:39.721301  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:39:39.721308  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:39:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:39:43.410699  543705 memory.go:191] Add success.
I0322 16:39:43.409827  543705 cpu.go:282] Add success.
I0322 16:39:43.420428  543705 net.go:648] Add success.
I0322 16:39:43.423033  543705 net.go:770] primary dev: ETH0
I0322 16:39:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:39:43.423059  543705 net.go:698] Add success.
I0322 16:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:39:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:39:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:39:53.409789  543705 memory.go:184] no items to output this cycle
I0322 16:39:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 16:40:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:40:03.409783  543705 memory.go:184] no items to output this cycle
I0322 16:40:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:40:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:40:13.409842  543705 memory.go:191] Add success.
I0322 16:40:13.409849  543705 cpu.go:282] Add success.
W0322 16:40:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:40:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:40:13.409894  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:40:13.420232  543705 net.go:648] Add success.
I0322 16:40:13.422693  543705 net.go:770] primary dev: ETH0
I0322 16:40:13.422708  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:40:13.422722  543705 net.go:698] Add success.
I0322 16:40:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:40:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:40:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 16:40:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:40:14.456559  543705 disk_worker.go:494] system disk:vda1
I0322 16:40:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:40:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:40:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:40:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:40:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:40:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:40:23.410407  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:40:23.410427  543705 memory.go:184] no items to output this cycle
I0322 16:40:23.410452  543705 cpu.go:275] no items to output this cycle
I0322 16:40:33.409795  543705 cpu.go:275] no items to output this cycle
E0322 16:40:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:40:33.409824  543705 memory.go:184] no items to output this cycle
I0322 16:40:35.613679  543705 disk_info.go:125] begin check local disk info of client
I0322 16:40:35.616242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:40:35.616249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003245c0 0xc000324600]
E0322 16:40:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:40:43.410878  543705 memory.go:191] Add success.
I0322 16:40:43.409829  543705 cpu.go:282] Add success.
I0322 16:40:43.420561  543705 net.go:648] Add success.
I0322 16:40:43.423488  543705 net.go:770] primary dev: ETH0
I0322 16:40:43.423501  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:40:43.423514  543705 net.go:698] Add success.
I0322 16:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:40:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:40:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:40:53.409768  543705 memory.go:184] no items to output this cycle
I0322 16:40:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 16:41:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:41:03.409773  543705 memory.go:184] no items to output this cycle
I0322 16:41:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 16:41:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:41:13.409910  543705 memory.go:191] Add success.
I0322 16:41:13.409939  543705 cpu.go:282] Add success.
W0322 16:41:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:41:13.409959  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:41:13.409962  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:41:13.419707  543705 net.go:648] Add success.
I0322 16:41:13.422624  543705 net.go:770] primary dev: ETH0
I0322 16:41:13.422637  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:41:13.422648  543705 net.go:698] Add success.
I0322 16:41:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:41:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:41:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 16:41:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:41:14.456505  543705 disk_worker.go:494] system disk:vda1
I0322 16:41:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:41:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:41:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:41:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:41:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:41:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:41:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:41:23.409767  543705 memory.go:184] no items to output this cycle
I0322 16:41:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 16:41:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:41:33.409781  543705 memory.go:184] no items to output this cycle
I0322 16:41:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 16:41:35.616336  543705 disk_info.go:125] begin check local disk info of client
I0322 16:41:35.618999  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:41:35.619006  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509400 0xc000509440]
E0322 16:41:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:41:43.410654  543705 memory.go:191] Add success.
I0322 16:41:43.409826  543705 cpu.go:282] Add success.
I0322 16:41:43.420342  543705 net.go:648] Add success.
I0322 16:41:43.423003  543705 net.go:770] primary dev: ETH0
I0322 16:41:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:41:43.423034  543705 net.go:698] Add success.
I0322 16:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:41:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:41:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:41:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:41:53.409771  543705 memory.go:184] no items to output this cycle
I0322 16:41:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 16:42:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:42:03.409776  543705 memory.go:184] no items to output this cycle
I0322 16:42:03.409799  543705 cpu.go:275] no items to output this cycle
W0322 16:42:13.409705  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:42:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:42:13.409726  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 16:42:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:42:13.409816  543705 memory.go:191] Add success.
I0322 16:42:13.409831  543705 cpu.go:282] Add success.
I0322 16:42:13.420244  543705 net.go:648] Add success.
I0322 16:42:13.423291  543705 net.go:770] primary dev: ETH0
I0322 16:42:13.423304  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:42:13.423315  543705 net.go:698] Add success.
I0322 16:42:13.465075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"59fcaef3-7992-4767-9b2b-e958d46702fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:42:13.465113  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 16:42:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:42:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 16:42:14.455226  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:42:14.457216  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:42:14.457224  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:42:14.457230  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:42:14.457244  543705 disk_worker.go:494] system disk:vda1
I0322 16:42:14.457277  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:42:15.456482  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:42:15.456491  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:42:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:42:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:42:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:42:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:42:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:42:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:42:23.409790  543705 memory.go:184] no items to output this cycle
I0322 16:42:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 16:42:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:42:33.409771  543705 memory.go:184] no items to output this cycle
I0322 16:42:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 16:42:35.619745  543705 disk_info.go:125] begin check local disk info of client
I0322 16:42:35.622321  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:42:35.622329  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500f00 0xc000500f40]
I0322 16:42:39.721728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:42:39.721735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:42:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:42:43.410659  543705 memory.go:191] Add success.
I0322 16:42:43.409798  543705 cpu.go:282] Add success.
I0322 16:42:43.420366  543705 net.go:648] Add success.
I0322 16:42:43.422985  543705 net.go:770] primary dev: ETH0
I0322 16:42:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:42:43.423031  543705 net.go:698] Add success.
I0322 16:42:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:42:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:42:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:42:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:42:53.409797  543705 memory.go:184] no items to output this cycle
I0322 16:42:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 16:43:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:43:03.409782  543705 memory.go:184] no items to output this cycle
I0322 16:43:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 16:43:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:43:13.409848  543705 memory.go:191] Add success.
I0322 16:43:13.409859  543705 cpu.go:282] Add success.
W0322 16:43:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:43:13.409906  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:43:13.409911  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:43:13.420496  543705 net.go:648] Add success.
I0322 16:43:13.423468  543705 net.go:770] primary dev: ETH0
I0322 16:43:13.423482  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:43:13.423500  543705 net.go:698] Add success.
I0322 16:43:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:43:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:43:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 16:43:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:43:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 16:43:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:43:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:43:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:43:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:43:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:43:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:43:23.409775  543705 memory.go:184] no items to output this cycle
I0322 16:43:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 16:43:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:43:33.409802  543705 memory.go:184] no items to output this cycle
I0322 16:43:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 16:43:35.622417  543705 disk_info.go:125] begin check local disk info of client
I0322 16:43:35.624947  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:43:35.624954  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052d180 0xc00052d1c0]
E0322 16:43:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:43:43.410719  543705 memory.go:191] Add success.
I0322 16:43:43.409827  543705 cpu.go:282] Add success.
I0322 16:43:43.420398  543705 net.go:648] Add success.
I0322 16:43:43.423371  543705 net.go:770] primary dev: ETH0
I0322 16:43:43.423385  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:43:43.423398  543705 net.go:698] Add success.
I0322 16:43:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:43:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:43:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:43:53.409769  543705 memory.go:184] no items to output this cycle
I0322 16:43:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 16:44:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:44:03.409780  543705 memory.go:184] no items to output this cycle
I0322 16:44:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 16:44:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:44:13.409824  543705 memory.go:191] Add success.
I0322 16:44:13.409831  543705 cpu.go:282] Add success.
W0322 16:44:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:44:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:44:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:44:13.420138  543705 net.go:648] Add success.
I0322 16:44:13.422835  543705 net.go:770] primary dev: ETH0
I0322 16:44:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:44:13.422860  543705 net.go:698] Add success.
I0322 16:44:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:44:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:44:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 16:44:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:44:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 16:44:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:44:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:44:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:44:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:44:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:44:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:44:23.409805  543705 memory.go:184] no items to output this cycle
I0322 16:44:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 16:44:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:44:33.409772  543705 memory.go:184] no items to output this cycle
I0322 16:44:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 16:44:35.625677  543705 disk_info.go:125] begin check local disk info of client
I0322 16:44:35.628235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:44:35.628242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500b00 0xc000500b40]
E0322 16:44:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:44:43.410714  543705 memory.go:191] Add success.
I0322 16:44:43.409804  543705 cpu.go:282] Add success.
I0322 16:44:43.420406  543705 net.go:648] Add success.
I0322 16:44:43.423258  543705 net.go:770] primary dev: ETH0
I0322 16:44:43.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:44:43.423285  543705 net.go:698] Add success.
I0322 16:44:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:44:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:44:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:44:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:44:53.409813  543705 memory.go:184] no items to output this cycle
I0322 16:44:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 16:45:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:45:03.409795  543705 memory.go:184] no items to output this cycle
I0322 16:45:03.409798  543705 cpu.go:275] no items to output this cycle
W0322 16:45:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:45:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:45:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 16:45:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:45:13.409817  543705 cpu.go:282] Add success.
I0322 16:45:13.409821  543705 memory.go:191] Add success.
I0322 16:45:13.420249  543705 net.go:648] Add success.
I0322 16:45:13.423000  543705 net.go:770] primary dev: ETH0
I0322 16:45:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:45:13.423024  543705 net.go:698] Add success.
I0322 16:45:13.468924  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c5d11bcf-a414-4cdb-8e65-f592062eb801","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:45:13.468959  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:45:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:45:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0322 16:45:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:45:14.456724  543705 disk_worker.go:494] system disk:vda1
I0322 16:45:14.456754  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:45:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:45:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:45:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:45:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:45:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:45:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:45:23.409778  543705 memory.go:184] no items to output this cycle
I0322 16:45:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 16:45:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:45:33.409784  543705 memory.go:184] no items to output this cycle
I0322 16:45:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 16:45:35.628329  543705 disk_info.go:125] begin check local disk info of client
I0322 16:45:35.630929  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:45:35.630937  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004641c0 0xc000464200]
I0322 16:45:39.723307  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:45:39.723314  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:45:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:45:43.410738  543705 memory.go:191] Add success.
I0322 16:45:43.409819  543705 cpu.go:282] Add success.
I0322 16:45:43.420449  543705 net.go:648] Add success.
I0322 16:45:43.423216  543705 net.go:770] primary dev: ETH0
I0322 16:45:43.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:45:43.423253  543705 net.go:698] Add success.
I0322 16:45:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:45:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:45:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:45:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:45:53.409777  543705 memory.go:184] no items to output this cycle
I0322 16:45:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 16:46:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:46:03.409782  543705 memory.go:184] no items to output this cycle
I0322 16:46:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 16:46:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:46:13.409812  543705 memory.go:191] Add success.
I0322 16:46:13.409817  543705 cpu.go:282] Add success.
W0322 16:46:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:46:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:46:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:46:13.420135  543705 net.go:648] Add success.
I0322 16:46:13.423210  543705 net.go:770] primary dev: ETH0
I0322 16:46:13.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:46:13.423235  543705 net.go:698] Add success.
I0322 16:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:46:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:46:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 16:46:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:46:14.456577  543705 disk_worker.go:494] system disk:vda1
I0322 16:46:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:46:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:46:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:46:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:46:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:46:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:46:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:46:23.409782  543705 memory.go:184] no items to output this cycle
I0322 16:46:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 16:46:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:46:33.409796  543705 memory.go:184] no items to output this cycle
I0322 16:46:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 16:46:35.631802  543705 disk_info.go:125] begin check local disk info of client
I0322 16:46:35.634428  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:46:35.634436  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f340 0xc00037f380]
E0322 16:46:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:46:43.410750  543705 memory.go:191] Add success.
I0322 16:46:43.409827  543705 cpu.go:282] Add success.
I0322 16:46:43.420439  543705 net.go:648] Add success.
I0322 16:46:43.423233  543705 net.go:770] primary dev: ETH0
I0322 16:46:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:46:43.423258  543705 net.go:698] Add success.
I0322 16:46:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:46:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:46:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:46:53.410392  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:46:53.410413  543705 memory.go:184] no items to output this cycle
I0322 16:46:53.410422  543705 cpu.go:275] no items to output this cycle
E0322 16:47:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:47:03.409788  543705 memory.go:184] no items to output this cycle
I0322 16:47:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 16:47:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:47:13.409816  543705 memory.go:191] Add success.
I0322 16:47:13.409823  543705 cpu.go:282] Add success.
W0322 16:47:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:47:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:47:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:47:13.420171  543705 net.go:648] Add success.
I0322 16:47:13.422870  543705 net.go:770] primary dev: ETH0
I0322 16:47:13.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:47:13.422901  543705 net.go:698] Add success.
I0322 16:47:13.453454  543705 event_worker.go:152] Polling the log file for events...
W0322 16:47:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:47:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 16:47:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:47:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:47:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:47:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:47:14.456570  543705 disk_worker.go:494] system disk:vda1
I0322 16:47:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:47:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:47:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:47:16.458009  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:47:16.458009  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:47:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:47:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:47:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:47:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:47:23.409781  543705 memory.go:184] no items to output this cycle
I0322 16:47:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 16:47:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:47:33.409777  543705 memory.go:184] no items to output this cycle
I0322 16:47:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 16:47:35.634825  543705 disk_info.go:125] begin check local disk info of client
I0322 16:47:35.637621  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:47:35.637628  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f2c0 0xc00039f300]
E0322 16:47:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:47:43.410754  543705 memory.go:191] Add success.
I0322 16:47:43.409828  543705 cpu.go:282] Add success.
I0322 16:47:43.420597  543705 net.go:648] Add success.
I0322 16:47:43.423255  543705 net.go:770] primary dev: ETH0
I0322 16:47:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:47:43.423291  543705 net.go:698] Add success.
I0322 16:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:47:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:47:53.409768  543705 memory.go:184] no items to output this cycle
I0322 16:47:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 16:48:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:48:03.409806  543705 memory.go:184] no items to output this cycle
I0322 16:48:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 16:48:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:48:13.409780  543705 memory.go:191] Add success.
W0322 16:48:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 16:48:13.409811  543705 cpu.go:282] Add success.
W0322 16:48:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:48:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:48:13.420441  543705 net.go:648] Add success.
I0322 16:48:13.423605  543705 net.go:770] primary dev: ETH0
I0322 16:48:13.423617  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:48:13.423630  543705 net.go:698] Add success.
I0322 16:48:13.471197  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dc724b79-39f3-4bad-9067-8cbb697c9fdb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:48:13.471231  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:48:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:48:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 16:48:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:48:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 16:48:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:48:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:48:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:48:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:48:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:48:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:48:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:48:23.409812  543705 memory.go:184] no items to output this cycle
I0322 16:48:23.409823  543705 cpu.go:275] no items to output this cycle
E0322 16:48:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:48:33.409879  543705 cpu.go:275] no items to output this cycle
I0322 16:48:33.409904  543705 memory.go:184] no items to output this cycle
I0322 16:48:35.637677  543705 disk_info.go:125] begin check local disk info of client
I0322 16:48:35.640307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:48:35.640313  543705 disk_info.go:196] parse disk info done, disk is : [0xc000365ac0 0xc000365b00]
I0322 16:48:39.723451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:48:39.723457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:48:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:48:43.410797  543705 memory.go:191] Add success.
I0322 16:48:43.409808  543705 cpu.go:282] Add success.
I0322 16:48:43.420497  543705 net.go:648] Add success.
I0322 16:48:43.423736  543705 net.go:770] primary dev: ETH0
I0322 16:48:43.423749  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:48:43.423762  543705 net.go:698] Add success.
I0322 16:48:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:48:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:48:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:48:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:48:53.409798  543705 memory.go:184] no items to output this cycle
I0322 16:48:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 16:49:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:49:03.409768  543705 memory.go:184] no items to output this cycle
I0322 16:49:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 16:49:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:49:13.409776  543705 memory.go:191] Add success.
W0322 16:49:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 16:49:13.409800  543705 cpu.go:282] Add success.
W0322 16:49:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:49:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:49:13.420046  543705 net.go:648] Add success.
I0322 16:49:13.422860  543705 net.go:770] primary dev: ETH0
I0322 16:49:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:49:13.422886  543705 net.go:698] Add success.
I0322 16:49:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:49:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:49:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0322 16:49:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:49:14.456486  543705 disk_worker.go:494] system disk:vda1
I0322 16:49:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:49:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:49:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:49:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:49:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:49:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:49:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:49:23.409868  543705 memory.go:184] no items to output this cycle
I0322 16:49:23.409951  543705 cpu.go:275] no items to output this cycle
E0322 16:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:49:33.409784  543705 memory.go:184] no items to output this cycle
I0322 16:49:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 16:49:35.640403  543705 disk_info.go:125] begin check local disk info of client
I0322 16:49:35.643149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:49:35.643155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5000 0xc0000c5040]
E0322 16:49:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:49:43.410800  543705 memory.go:191] Add success.
I0322 16:49:43.409801  543705 cpu.go:282] Add success.
I0322 16:49:43.420516  543705 net.go:648] Add success.
I0322 16:49:43.423619  543705 net.go:770] primary dev: ETH0
I0322 16:49:43.423632  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:49:43.423645  543705 net.go:698] Add success.
I0322 16:49:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:49:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:49:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:49:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:49:53.409771  543705 memory.go:184] no items to output this cycle
I0322 16:49:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 16:50:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:50:03.409774  543705 memory.go:184] no items to output this cycle
I0322 16:50:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 16:50:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:50:13.409784  543705 memory.go:191] Add success.
I0322 16:50:13.409806  543705 cpu.go:282] Add success.
W0322 16:50:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:50:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:50:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:50:13.420040  543705 net.go:648] Add success.
I0322 16:50:13.422855  543705 net.go:770] primary dev: ETH0
I0322 16:50:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:50:13.422883  543705 net.go:698] Add success.
I0322 16:50:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:50:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:50:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 16:50:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:50:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 16:50:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:50:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:50:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:50:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:50:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:50:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:50:23.409809  543705 memory.go:184] no items to output this cycle
I0322 16:50:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 16:50:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:50:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 16:50:33.409787  543705 memory.go:184] no items to output this cycle
I0322 16:50:35.643868  543705 disk_info.go:125] begin check local disk info of client
I0322 16:50:35.646482  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:50:35.646490  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052b200 0xc00052b240]
E0322 16:50:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:50:43.410629  543705 memory.go:191] Add success.
I0322 16:50:43.409813  543705 cpu.go:282] Add success.
I0322 16:50:43.420316  543705 net.go:648] Add success.
I0322 16:50:43.423275  543705 net.go:770] primary dev: ETH0
I0322 16:50:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:50:43.423300  543705 net.go:698] Add success.
I0322 16:50:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:50:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:50:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:50:53.410248  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:50:53.410263  543705 memory.go:184] no items to output this cycle
I0322 16:50:53.410281  543705 cpu.go:275] no items to output this cycle
E0322 16:51:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:51:03.409784  543705 memory.go:184] no items to output this cycle
I0322 16:51:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 16:51:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:51:13.409787  543705 memory.go:191] Add success.
I0322 16:51:13.409790  543705 cpu.go:282] Add success.
W0322 16:51:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:51:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:51:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:51:13.420283  543705 net.go:648] Add success.
I0322 16:51:13.423156  543705 net.go:770] primary dev: ETH0
I0322 16:51:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:51:13.423181  543705 net.go:698] Add success.
I0322 16:51:13.468880  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"57320462-14a6-4626-a45a-4618df556f24","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:51:13.468913  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:51:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:51:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:51:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 16:51:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:51:14.456716  543705 disk_worker.go:494] system disk:vda1
I0322 16:51:14.456746  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:51:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:51:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:51:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:51:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:51:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:51:23.409771  543705 memory.go:184] no items to output this cycle
I0322 16:51:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 16:51:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:51:33.409780  543705 memory.go:184] no items to output this cycle
I0322 16:51:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 16:51:35.646569  543705 disk_info.go:125] begin check local disk info of client
I0322 16:51:35.649168  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:51:35.649176  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051f400 0xc00051f440]
I0322 16:51:39.724314  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:51:39.724320  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:51:43.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:51:43.410946  543705 memory.go:191] Add success.
I0322 16:51:43.410004  543705 cpu.go:282] Add success.
I0322 16:51:43.419712  543705 net.go:648] Add success.
I0322 16:51:43.422670  543705 net.go:770] primary dev: ETH0
I0322 16:51:43.422683  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:51:43.422695  543705 net.go:698] Add success.
I0322 16:51:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:51:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:51:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:51:53.410330  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:51:53.410348  543705 memory.go:184] no items to output this cycle
I0322 16:51:53.410349  543705 cpu.go:275] no items to output this cycle
E0322 16:52:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:52:03.409776  543705 memory.go:184] no items to output this cycle
I0322 16:52:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 16:52:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:52:13.409809  543705 memory.go:191] Add success.
I0322 16:52:13.409817  543705 cpu.go:282] Add success.
W0322 16:52:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:52:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:52:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:52:13.420030  543705 net.go:648] Add success.
I0322 16:52:13.423283  543705 net.go:770] primary dev: ETH0
I0322 16:52:13.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:52:13.423309  543705 net.go:698] Add success.
W0322 16:52:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:52:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0322 16:52:14.455154  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:52:14.457024  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:52:14.457033  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:52:14.457040  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:52:14.457111  543705 disk_worker.go:494] system disk:vda1
I0322 16:52:14.457152  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:52:15.456815  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:52:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:52:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:52:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:52:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:52:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:52:16.472317  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:52:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:52:23.409810  543705 memory.go:184] no items to output this cycle
I0322 16:52:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 16:52:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:52:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 16:52:33.409819  543705 memory.go:184] no items to output this cycle
I0322 16:52:35.649678  543705 disk_info.go:125] begin check local disk info of client
I0322 16:52:35.652224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:52:35.652231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b42c0 0xc0002b4300]
I0322 16:52:43.409935  543705 cpu.go:282] Add success.
E0322 16:52:43.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:52:43.410841  543705 memory.go:191] Add success.
I0322 16:52:43.419556  543705 net.go:770] primary dev: ETH0
I0322 16:52:43.419569  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:52:43.419580  543705 net.go:698] Add success.
I0322 16:52:43.419804  543705 net.go:648] Add success.
I0322 16:52:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:52:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:52:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:52:53.410266  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:52:53.410285  543705 memory.go:184] no items to output this cycle
I0322 16:52:53.410295  543705 cpu.go:275] no items to output this cycle
E0322 16:53:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:53:03.409776  543705 memory.go:184] no items to output this cycle
I0322 16:53:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 16:53:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:53:13.409808  543705 memory.go:191] Add success.
I0322 16:53:13.409821  543705 cpu.go:282] Add success.
W0322 16:53:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:53:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:53:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:53:13.420061  543705 net.go:648] Add success.
I0322 16:53:13.423164  543705 net.go:770] primary dev: ETH0
I0322 16:53:13.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:53:13.423190  543705 net.go:698] Add success.
I0322 16:53:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:53:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:53:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 16:53:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:53:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 16:53:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:53:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:53:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:53:16.472524  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:53:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:53:23.409802  543705 memory.go:184] no items to output this cycle
I0322 16:53:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:53:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:53:33.409768  543705 memory.go:184] no items to output this cycle
I0322 16:53:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 16:53:35.652914  543705 disk_info.go:125] begin check local disk info of client
I0322 16:53:35.655595  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:53:35.655602  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492f00 0xc000492f40]
E0322 16:53:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:53:43.410857  543705 memory.go:191] Add success.
I0322 16:53:43.409821  543705 cpu.go:282] Add success.
I0322 16:53:43.420721  543705 net.go:648] Add success.
I0322 16:53:43.423403  543705 net.go:770] primary dev: ETH0
I0322 16:53:43.423417  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:53:43.423429  543705 net.go:698] Add success.
I0322 16:53:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:53:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:53:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:53:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:53:53.409784  543705 memory.go:184] no items to output this cycle
I0322 16:53:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 16:54:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:54:03.409797  543705 memory.go:184] no items to output this cycle
I0322 16:54:03.409813  543705 cpu.go:275] no items to output this cycle
W0322 16:54:13.409700  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:54:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:54:13.409721  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 16:54:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:54:13.409811  543705 memory.go:191] Add success.
I0322 16:54:13.409818  543705 cpu.go:282] Add success.
I0322 16:54:13.420033  543705 net.go:648] Add success.
I0322 16:54:13.422817  543705 net.go:770] primary dev: ETH0
I0322 16:54:13.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:54:13.422842  543705 net.go:698] Add success.
I0322 16:54:13.525374  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4abcad30-9b1e-4c47-9580-67c682d72408","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:54:13.525410  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 16:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:54:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:54:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 16:54:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:54:14.456500  543705 disk_worker.go:494] system disk:vda1
I0322 16:54:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:54:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:54:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:54:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:54:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:54:23.409801  543705 memory.go:184] no items to output this cycle
I0322 16:54:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 16:54:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:54:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 16:54:33.409793  543705 memory.go:184] no items to output this cycle
I0322 16:54:35.655691  543705 disk_info.go:125] begin check local disk info of client
I0322 16:54:35.658253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:54:35.658260  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c140 0xc00039c180]
I0322 16:54:39.724455  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:54:39.724462  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:54:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:54:43.410635  543705 memory.go:191] Add success.
I0322 16:54:43.409813  543705 cpu.go:282] Add success.
I0322 16:54:43.420466  543705 net.go:648] Add success.
I0322 16:54:43.423432  543705 net.go:770] primary dev: ETH0
I0322 16:54:43.423447  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:54:43.423461  543705 net.go:698] Add success.
I0322 16:54:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:54:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:54:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:54:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:54:53.409784  543705 memory.go:184] no items to output this cycle
I0322 16:54:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 16:55:03.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:55:03.409760  543705 memory.go:184] no items to output this cycle
I0322 16:55:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 16:55:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:55:13.409798  543705 memory.go:191] Add success.
I0322 16:55:13.409799  543705 cpu.go:282] Add success.
W0322 16:55:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:55:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:55:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:55:13.420120  543705 net.go:648] Add success.
I0322 16:55:13.423030  543705 net.go:770] primary dev: ETH0
I0322 16:55:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:55:13.423059  543705 net.go:698] Add success.
I0322 16:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:55:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:55:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 16:55:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:55:14.456559  543705 disk_worker.go:494] system disk:vda1
I0322 16:55:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:55:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:55:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:55:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:55:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:55:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:55:23.409781  543705 memory.go:184] no items to output this cycle
I0322 16:55:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 16:55:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:55:33.409798  543705 memory.go:184] no items to output this cycle
I0322 16:55:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 16:55:35.658349  543705 disk_info.go:125] begin check local disk info of client
I0322 16:55:35.660882  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:55:35.660889  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052f300 0xc00052f340]
E0322 16:55:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:55:43.410846  543705 memory.go:191] Add success.
I0322 16:55:43.409836  543705 cpu.go:282] Add success.
I0322 16:55:43.420677  543705 net.go:648] Add success.
I0322 16:55:43.423092  543705 net.go:770] primary dev: ETH0
I0322 16:55:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:55:43.423117  543705 net.go:698] Add success.
I0322 16:55:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:55:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:55:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:55:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:55:53.409822  543705 memory.go:184] no items to output this cycle
I0322 16:55:53.409832  543705 cpu.go:275] no items to output this cycle
E0322 16:56:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:56:03.409776  543705 memory.go:184] no items to output this cycle
I0322 16:56:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 16:56:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:56:13.409794  543705 memory.go:191] Add success.
I0322 16:56:13.409799  543705 cpu.go:282] Add success.
W0322 16:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:56:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:56:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:56:13.420057  543705 net.go:648] Add success.
I0322 16:56:13.422995  543705 net.go:770] primary dev: ETH0
I0322 16:56:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:56:13.423024  543705 net.go:698] Add success.
I0322 16:56:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:56:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:56:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 16:56:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:56:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 16:56:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:56:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:56:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:56:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:56:23.409790  543705 memory.go:184] no items to output this cycle
I0322 16:56:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 16:56:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:56:33.409774  543705 memory.go:184] no items to output this cycle
I0322 16:56:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 16:56:35.661675  543705 disk_info.go:125] begin check local disk info of client
I0322 16:56:35.664273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:56:35.664280  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f600 0xc00039f640]
E0322 16:56:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:56:43.410694  543705 memory.go:191] Add success.
I0322 16:56:43.409821  543705 cpu.go:282] Add success.
I0322 16:56:43.420521  543705 net.go:648] Add success.
I0322 16:56:43.423325  543705 net.go:770] primary dev: ETH0
I0322 16:56:43.423339  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:56:43.423350  543705 net.go:698] Add success.
I0322 16:56:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:56:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:56:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:56:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:56:53.410405  543705 memory.go:184] no items to output this cycle
I0322 16:56:53.410418  543705 cpu.go:275] no items to output this cycle
E0322 16:57:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:57:03.409790  543705 memory.go:184] no items to output this cycle
I0322 16:57:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 16:57:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:57:13.409820  543705 memory.go:191] Add success.
I0322 16:57:13.409829  543705 cpu.go:282] Add success.
W0322 16:57:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:57:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:57:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:57:13.420291  543705 net.go:648] Add success.
I0322 16:57:13.429122  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 16:57:13.429198  543705 net.go:770] primary dev: ETH0
I0322 16:57:13.429209  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:57:13.429221  543705 net.go:698] Add success.
I0322 16:57:13.452771  543705 event_worker.go:152] Polling the log file for events...
I0322 16:57:13.481776  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dcaf82d7-7401-4691-874b-44b08444d35c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 16:57:13.481807  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 16:57:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:57:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 16:57:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0322 16:57:14.455801  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 16:57:14.455808  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 16:57:14.455812  543705 custom_config.go:64] query custom config with name: gpu
I0322 16:57:14.456761  543705 disk_worker.go:494] system disk:vda1
I0322 16:57:14.456791  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 16:57:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 16:57:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:57:16.458036  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 16:57:16.458044  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 16:57:16.458091  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:57:16.458108  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:57:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:57:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:57:23.409788  543705 memory.go:184] no items to output this cycle
I0322 16:57:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 16:57:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:57:33.409787  543705 memory.go:184] no items to output this cycle
I0322 16:57:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 16:57:35.664368  543705 disk_info.go:125] begin check local disk info of client
I0322 16:57:35.667040  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:57:35.667047  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377700 0xc000377740]
I0322 16:57:39.725325  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 16:57:39.725332  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 16:57:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:57:43.410677  543705 memory.go:191] Add success.
I0322 16:57:43.409823  543705 cpu.go:282] Add success.
I0322 16:57:43.419711  543705 net.go:648] Add success.
I0322 16:57:43.422551  543705 net.go:770] primary dev: ETH0
I0322 16:57:43.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:57:43.422576  543705 net.go:698] Add success.
I0322 16:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:57:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:57:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:57:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:57:53.409792  543705 memory.go:184] no items to output this cycle
I0322 16:57:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 16:58:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:58:03.409801  543705 memory.go:184] no items to output this cycle
I0322 16:58:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 16:58:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:58:13.409776  543705 memory.go:191] Add success.
W0322 16:58:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 16:58:13.409824  543705 cpu.go:282] Add success.
W0322 16:58:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:58:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:58:13.420120  543705 net.go:648] Add success.
I0322 16:58:13.422767  543705 net.go:770] primary dev: ETH0
I0322 16:58:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:58:13.422793  543705 net.go:698] Add success.
I0322 16:58:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:58:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:58:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 16:58:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:58:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 16:58:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:58:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:58:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:58:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:58:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:58:23.409785  543705 memory.go:184] no items to output this cycle
I0322 16:58:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 16:58:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:58:33.409802  543705 memory.go:184] no items to output this cycle
I0322 16:58:33.409827  543705 cpu.go:275] no items to output this cycle
I0322 16:58:35.667138  543705 disk_info.go:125] begin check local disk info of client
I0322 16:58:35.669714  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:58:35.669721  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7740 0xc0003b7780]
E0322 16:58:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:58:43.409806  543705 cpu.go:282] Add success.
I0322 16:58:43.410698  543705 memory.go:191] Add success.
I0322 16:58:43.419715  543705 net.go:648] Add success.
I0322 16:58:43.422456  543705 net.go:770] primary dev: ETH0
I0322 16:58:43.422468  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:58:43.422480  543705 net.go:698] Add success.
I0322 16:58:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:58:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:58:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:58:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:58:53.409770  543705 memory.go:184] no items to output this cycle
I0322 16:58:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 16:59:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:59:03.409773  543705 memory.go:184] no items to output this cycle
I0322 16:59:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 16:59:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:59:13.409808  543705 memory.go:191] Add success.
I0322 16:59:13.409817  543705 cpu.go:282] Add success.
W0322 16:59:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 16:59:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 16:59:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 16:59:13.420202  543705 net.go:648] Add success.
I0322 16:59:13.422944  543705 net.go:770] primary dev: ETH0
I0322 16:59:13.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:59:13.422969  543705 net.go:698] Add success.
I0322 16:59:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 16:59:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 16:59:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 16:59:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 16:59:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 16:59:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 16:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 16:59:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:59:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:59:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 16:59:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0322 16:59:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:59:23.409766  543705 memory.go:184] no items to output this cycle
I0322 16:59:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 16:59:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:59:33.409768  543705 memory.go:184] no items to output this cycle
I0322 16:59:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 16:59:35.669810  543705 disk_info.go:125] begin check local disk info of client
I0322 16:59:35.672402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 16:59:35.672408  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7d00 0xc0004a7d40]
E0322 16:59:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:59:43.410643  543705 memory.go:191] Add success.
I0322 16:59:43.409802  543705 cpu.go:282] Add success.
I0322 16:59:43.419709  543705 net.go:648] Add success.
I0322 16:59:43.422507  543705 net.go:770] primary dev: ETH0
I0322 16:59:43.422520  543705 net.go:802] Send network stats successfully!,count is 6
I0322 16:59:43.422531  543705 net.go:698] Add success.
I0322 16:59:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 16:59:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 16:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 16:59:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 16:59:53.409767  543705 memory.go:184] no items to output this cycle
I0322 16:59:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 17:00:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:00:03.409776  543705 memory.go:184] no items to output this cycle
I0322 17:00:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 17:00:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:00:13.409806  543705 memory.go:191] Add success.
I0322 17:00:13.409817  543705 cpu.go:282] Add success.
W0322 17:00:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:00:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:00:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:00:13.420206  543705 net.go:648] Add success.
I0322 17:00:13.423404  543705 net.go:770] primary dev: ETH0
I0322 17:00:13.423418  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:00:13.423430  543705 net.go:698] Add success.
I0322 17:00:13.640645  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"06d54dfb-692e-44e8-972f-2463ae2cead6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:00:13.640680  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:00:14.454676  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:00:14.454812  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:00:14.454874  543705 disk_worker.go:708] disk space is not compliant
W0322 17:00:14.454877  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:00:14.456231  543705 disk_worker.go:494] system disk:vda1
I0322 17:00:14.456277  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:00:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:00:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:00:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:00:23.409779  543705 memory.go:184] no items to output this cycle
I0322 17:00:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 17:00:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:00:33.409798  543705 memory.go:184] no items to output this cycle
I0322 17:00:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 17:00:35.672499  543705 disk_info.go:125] begin check local disk info of client
I0322 17:00:35.675124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:00:35.675132  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046cdc0 0xc00046ce00]
I0322 17:00:39.725729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:00:39.725735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:00:43.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:00:43.410832  543705 memory.go:191] Add success.
I0322 17:00:43.409974  543705 cpu.go:282] Add success.
I0322 17:00:43.419745  543705 net.go:648] Add success.
I0322 17:00:43.422596  543705 net.go:770] primary dev: ETH0
I0322 17:00:43.422609  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:00:43.422621  543705 net.go:698] Add success.
I0322 17:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:00:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:00:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:00:53.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:00:53.410276  543705 memory.go:184] no items to output this cycle
I0322 17:00:53.410287  543705 cpu.go:275] no items to output this cycle
E0322 17:01:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:01:03.409776  543705 memory.go:184] no items to output this cycle
I0322 17:01:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 17:01:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:01:13.409785  543705 memory.go:191] Add success.
I0322 17:01:13.409786  543705 cpu.go:282] Add success.
W0322 17:01:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:01:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:01:13.420050  543705 net.go:648] Add success.
I0322 17:01:13.422921  543705 net.go:770] primary dev: ETH0
I0322 17:01:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:01:13.422952  543705 net.go:698] Add success.
I0322 17:01:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:01:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:01:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 17:01:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:01:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 17:01:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:01:16.458018  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:01:16.458096  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:01:16.458130  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:01:16.472615  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:01:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:01:23.409779  543705 memory.go:184] no items to output this cycle
I0322 17:01:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 17:01:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:01:33.409771  543705 memory.go:184] no items to output this cycle
I0322 17:01:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 17:01:35.675221  543705 disk_info.go:125] begin check local disk info of client
I0322 17:01:35.677882  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:01:35.677889  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481ac0 0xc000481b00]
E0322 17:01:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:01:43.410732  543705 memory.go:191] Add success.
I0322 17:01:43.409795  543705 cpu.go:282] Add success.
I0322 17:01:43.420648  543705 net.go:648] Add success.
I0322 17:01:43.423270  543705 net.go:770] primary dev: ETH0
I0322 17:01:43.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:01:43.423295  543705 net.go:698] Add success.
I0322 17:01:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:01:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:01:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:01:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:01:53.409808  543705 memory.go:184] no items to output this cycle
I0322 17:01:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 17:02:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:02:03.409781  543705 cpu.go:275] no items to output this cycle
I0322 17:02:03.409792  543705 memory.go:184] no items to output this cycle
E0322 17:02:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:02:13.409811  543705 memory.go:191] Add success.
I0322 17:02:13.409822  543705 cpu.go:282] Add success.
W0322 17:02:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:02:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:02:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:02:13.420139  543705 net.go:648] Add success.
I0322 17:02:13.422809  543705 net.go:770] primary dev: ETH0
I0322 17:02:13.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:02:13.422834  543705 net.go:698] Add success.
W0322 17:02:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:02:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 17:02:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:02:14.456910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:02:14.456919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:02:14.456926  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:02:14.456993  543705 disk_worker.go:494] system disk:vda1
I0322 17:02:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:02:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:02:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:02:16.457955  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:02:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:02:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:02:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:02:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:02:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:02:23.409812  543705 memory.go:184] no items to output this cycle
I0322 17:02:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 17:02:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:02:33.409773  543705 memory.go:184] no items to output this cycle
I0322 17:02:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 17:02:35.679050  543705 disk_info.go:125] begin check local disk info of client
I0322 17:02:35.681598  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:02:35.681606  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370e80 0xc000370ec0]
E0322 17:02:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:02:43.410611  543705 memory.go:191] Add success.
I0322 17:02:43.409801  543705 cpu.go:282] Add success.
I0322 17:02:43.420461  543705 net.go:648] Add success.
I0322 17:02:43.423320  543705 net.go:770] primary dev: ETH0
I0322 17:02:43.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:02:43.423345  543705 net.go:698] Add success.
I0322 17:02:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:02:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:02:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:02:53.409771  543705 memory.go:184] no items to output this cycle
I0322 17:02:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 17:03:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:03:03.409775  543705 memory.go:184] no items to output this cycle
I0322 17:03:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 17:03:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:03:13.409786  543705 memory.go:191] Add success.
I0322 17:03:13.409793  543705 cpu.go:282] Add success.
W0322 17:03:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:03:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:03:13.420210  543705 net.go:648] Add success.
I0322 17:03:13.423102  543705 net.go:770] primary dev: ETH0
I0322 17:03:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:03:13.423128  543705 net.go:698] Add success.
I0322 17:03:13.629846  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"056e62b0-4ceb-48be-b202-04938d4e0cb7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:03:13.629886  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:03:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:03:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:03:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0322 17:03:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:03:14.456717  543705 disk_worker.go:494] system disk:vda1
I0322 17:03:14.456747  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:03:15.455620  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:03:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:03:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:03:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:03:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:03:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:03:23.409773  543705 memory.go:184] no items to output this cycle
I0322 17:03:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 17:03:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:03:33.409778  543705 memory.go:184] no items to output this cycle
I0322 17:03:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 17:03:35.681676  543705 disk_info.go:125] begin check local disk info of client
I0322 17:03:35.684238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:03:35.684245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf900 0xc0002bf940]
I0322 17:03:39.727337  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:03:39.727343  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:03:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:03:43.410706  543705 memory.go:191] Add success.
I0322 17:03:43.409805  543705 cpu.go:282] Add success.
I0322 17:03:43.420387  543705 net.go:648] Add success.
I0322 17:03:43.423306  543705 net.go:770] primary dev: ETH0
I0322 17:03:43.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:03:43.423332  543705 net.go:698] Add success.
I0322 17:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:03:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:03:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:03:53.409773  543705 memory.go:184] no items to output this cycle
I0322 17:03:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 17:04:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:04:03.409805  543705 memory.go:184] no items to output this cycle
I0322 17:04:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 17:04:13.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:04:13.409845  543705 memory.go:191] Add success.
I0322 17:04:13.409855  543705 cpu.go:282] Add success.
W0322 17:04:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:04:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:04:13.409898  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:04:13.420153  543705 net.go:648] Add success.
I0322 17:04:13.422859  543705 net.go:770] primary dev: ETH0
I0322 17:04:13.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:04:13.422885  543705 net.go:698] Add success.
I0322 17:04:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:04:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:04:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 17:04:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:04:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 17:04:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:04:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:04:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:04:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:04:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:04:23.409784  543705 memory.go:184] no items to output this cycle
I0322 17:04:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 17:04:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:04:33.409783  543705 memory.go:184] no items to output this cycle
I0322 17:04:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 17:04:35.685068  543705 disk_info.go:125] begin check local disk info of client
I0322 17:04:35.687640  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:04:35.687649  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390000 0xc000390040]
E0322 17:04:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:04:43.410581  543705 memory.go:191] Add success.
I0322 17:04:43.409817  543705 cpu.go:282] Add success.
I0322 17:04:43.420266  543705 net.go:648] Add success.
I0322 17:04:43.422673  543705 net.go:770] primary dev: ETH0
I0322 17:04:43.422689  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:04:43.422706  543705 net.go:698] Add success.
I0322 17:04:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:04:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:04:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:04:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:04:53.409804  543705 memory.go:184] no items to output this cycle
I0322 17:04:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 17:05:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:05:03.409778  543705 memory.go:184] no items to output this cycle
I0322 17:05:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 17:05:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:05:13.409789  543705 memory.go:191] Add success.
I0322 17:05:13.409803  543705 cpu.go:282] Add success.
W0322 17:05:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:05:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:05:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:05:13.420089  543705 net.go:648] Add success.
I0322 17:05:13.422822  543705 net.go:770] primary dev: ETH0
I0322 17:05:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:05:13.422847  543705 net.go:698] Add success.
I0322 17:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:05:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:05:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 17:05:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:05:14.456474  543705 disk_worker.go:494] system disk:vda1
I0322 17:05:14.456516  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:05:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:05:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:05:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:05:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:05:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:05:23.409813  543705 memory.go:184] no items to output this cycle
I0322 17:05:23.409823  543705 cpu.go:275] no items to output this cycle
E0322 17:05:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:05:33.409775  543705 memory.go:184] no items to output this cycle
I0322 17:05:33.409776  543705 cpu.go:275] no items to output this cycle
I0322 17:05:35.687735  543705 disk_info.go:125] begin check local disk info of client
I0322 17:05:35.690344  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:05:35.690351  543705 disk_info.go:196] parse disk info done, disk is : [0xc000315640 0xc000315680]
E0322 17:05:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:05:43.410913  543705 memory.go:191] Add success.
I0322 17:05:43.409834  543705 cpu.go:282] Add success.
I0322 17:05:43.420597  543705 net.go:648] Add success.
I0322 17:05:43.423659  543705 net.go:770] primary dev: ETH0
I0322 17:05:43.423672  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:05:43.423685  543705 net.go:698] Add success.
I0322 17:05:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:05:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:05:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:05:53.409779  543705 memory.go:184] no items to output this cycle
I0322 17:05:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 17:06:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:06:03.409771  543705 memory.go:184] no items to output this cycle
I0322 17:06:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 17:06:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:06:13.409792  543705 memory.go:191] Add success.
I0322 17:06:13.409796  543705 cpu.go:282] Add success.
W0322 17:06:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:06:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:06:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:06:13.420063  543705 net.go:648] Add success.
I0322 17:06:13.422835  543705 net.go:770] primary dev: ETH0
I0322 17:06:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:06:13.422868  543705 net.go:698] Add success.
I0322 17:06:13.463102  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"48a54e8b-d31a-4450-98a4-63a932a06f69","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:06:13.463134  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:06:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:06:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:06:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 17:06:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:06:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 17:06:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:06:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:06:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:06:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:06:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:06:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:06:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:06:23.409805  543705 memory.go:184] no items to output this cycle
I0322 17:06:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 17:06:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:06:33.409778  543705 memory.go:184] no items to output this cycle
I0322 17:06:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 17:06:35.691114  543705 disk_info.go:125] begin check local disk info of client
I0322 17:06:35.693676  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:06:35.693683  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331380 0xc0003313c0]
I0322 17:06:39.728337  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:06:39.728344  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:06:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:06:43.410646  543705 memory.go:191] Add success.
I0322 17:06:43.409822  543705 cpu.go:282] Add success.
I0322 17:06:43.420348  543705 net.go:648] Add success.
I0322 17:06:43.423306  543705 net.go:770] primary dev: ETH0
I0322 17:06:43.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:06:43.423332  543705 net.go:698] Add success.
I0322 17:06:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:06:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:06:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:06:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:06:53.409796  543705 memory.go:184] no items to output this cycle
I0322 17:06:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 17:07:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:07:03.409776  543705 memory.go:184] no items to output this cycle
I0322 17:07:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 17:07:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:07:13.409787  543705 memory.go:191] Add success.
W0322 17:07:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:07:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:07:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:07:13.409829  543705 cpu.go:282] Add success.
I0322 17:07:13.420146  543705 net.go:648] Add success.
I0322 17:07:13.423297  543705 net.go:770] primary dev: ETH0
I0322 17:07:13.423310  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:07:13.423321  543705 net.go:698] Add success.
I0322 17:07:13.452821  543705 event_worker.go:152] Polling the log file for events...
W0322 17:07:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:07:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 17:07:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:07:14.456753  543705 disk_worker.go:494] system disk:vda1
I0322 17:07:14.456791  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:07:14.457109  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:07:14.457116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:07:14.457121  543705 custom_config.go:64] query custom config with name: gpu
E0322 17:07:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:07:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:07:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:07:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:07:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:07:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:07:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:07:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:07:23.409788  543705 memory.go:184] no items to output this cycle
I0322 17:07:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 17:07:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:07:33.409798  543705 memory.go:184] no items to output this cycle
I0322 17:07:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 17:07:35.693778  543705 disk_info.go:125] begin check local disk info of client
I0322 17:07:35.696344  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:07:35.696352  543705 disk_info.go:196] parse disk info done, disk is : [0xc000289340 0xc000289380]
E0322 17:07:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:07:43.410612  543705 memory.go:191] Add success.
I0322 17:07:43.409805  543705 cpu.go:282] Add success.
I0322 17:07:43.420325  543705 net.go:648] Add success.
I0322 17:07:43.423012  543705 net.go:770] primary dev: ETH0
I0322 17:07:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:07:43.423038  543705 net.go:698] Add success.
I0322 17:07:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:07:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:07:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:07:53.410252  543705 memory.go:184] no items to output this cycle
I0322 17:07:53.410280  543705 cpu.go:275] no items to output this cycle
E0322 17:08:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:08:03.409771  543705 memory.go:184] no items to output this cycle
I0322 17:08:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 17:08:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:08:13.409784  543705 memory.go:191] Add success.
W0322 17:08:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 17:08:13.409816  543705 cpu.go:282] Add success.
W0322 17:08:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:08:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:08:13.420199  543705 net.go:648] Add success.
I0322 17:08:13.422990  543705 net.go:770] primary dev: ETH0
I0322 17:08:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:08:13.423023  543705 net.go:698] Add success.
I0322 17:08:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:08:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:08:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 17:08:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:08:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 17:08:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:08:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:08:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:08:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:08:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:08:23.409774  543705 memory.go:184] no items to output this cycle
I0322 17:08:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 17:08:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:08:33.409800  543705 memory.go:184] no items to output this cycle
I0322 17:08:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 17:08:35.696443  543705 disk_info.go:125] begin check local disk info of client
I0322 17:08:35.699036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:08:35.699045  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500c80 0xc000500cc0]
E0322 17:08:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:08:43.410616  543705 memory.go:191] Add success.
I0322 17:08:43.409804  543705 cpu.go:282] Add success.
I0322 17:08:43.420278  543705 net.go:648] Add success.
I0322 17:08:43.423326  543705 net.go:770] primary dev: ETH0
I0322 17:08:43.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:08:43.423351  543705 net.go:698] Add success.
I0322 17:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:08:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:08:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:08:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:08:53.410408  543705 memory.go:184] no items to output this cycle
I0322 17:08:53.410417  543705 cpu.go:275] no items to output this cycle
E0322 17:09:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:09:03.409777  543705 memory.go:184] no items to output this cycle
I0322 17:09:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 17:09:13.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:09:13.409921  543705 memory.go:191] Add success.
I0322 17:09:13.409923  543705 cpu.go:282] Add success.
W0322 17:09:13.410109  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:09:13.410132  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:09:13.410137  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:09:13.419703  543705 net.go:648] Add success.
I0322 17:09:13.422436  543705 net.go:770] primary dev: ETH0
I0322 17:09:13.422449  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:09:13.422460  543705 net.go:698] Add success.
I0322 17:09:13.463991  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"50bf3cf9-e27f-4c82-bfa8-dc29b93862be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:09:13.464031  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:09:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:09:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 17:09:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:09:14.456679  543705 disk_worker.go:494] system disk:vda1
I0322 17:09:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:09:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:09:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:09:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:09:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:09:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:09:23.409788  543705 memory.go:184] no items to output this cycle
I0322 17:09:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 17:09:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:09:33.409795  543705 memory.go:184] no items to output this cycle
I0322 17:09:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 17:09:35.699211  543705 disk_info.go:125] begin check local disk info of client
I0322 17:09:35.701797  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:09:35.701804  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ef780 0xc0003ef7c0]
I0322 17:09:39.729349  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:09:39.729356  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:09:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:09:43.410627  543705 memory.go:191] Add success.
I0322 17:09:43.409806  543705 cpu.go:282] Add success.
I0322 17:09:43.420322  543705 net.go:648] Add success.
I0322 17:09:43.422950  543705 net.go:770] primary dev: ETH0
I0322 17:09:43.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:09:43.422976  543705 net.go:698] Add success.
I0322 17:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:09:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:09:53.409780  543705 memory.go:184] no items to output this cycle
I0322 17:09:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 17:10:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:10:03.409763  543705 memory.go:184] no items to output this cycle
I0322 17:10:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 17:10:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:10:13.409870  543705 memory.go:191] Add success.
W0322 17:10:13.409900  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:10:13.409913  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:10:13.409916  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:10:13.409938  543705 cpu.go:282] Add success.
I0322 17:10:13.419708  543705 net.go:648] Add success.
I0322 17:10:13.422815  543705 net.go:770] primary dev: ETH0
I0322 17:10:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:10:13.422839  543705 net.go:698] Add success.
I0322 17:10:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:10:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:10:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 17:10:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:10:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 17:10:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:10:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:10:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:10:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:10:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:10:23.409788  543705 memory.go:184] no items to output this cycle
I0322 17:10:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 17:10:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:10:33.409781  543705 memory.go:184] no items to output this cycle
I0322 17:10:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 17:10:35.703179  543705 disk_info.go:125] begin check local disk info of client
I0322 17:10:35.705830  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:10:35.705839  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ef940 0xc0003ef980]
E0322 17:10:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:10:43.410585  543705 memory.go:191] Add success.
I0322 17:10:43.409805  543705 cpu.go:282] Add success.
I0322 17:10:43.420281  543705 net.go:648] Add success.
I0322 17:10:43.423124  543705 net.go:770] primary dev: ETH0
I0322 17:10:43.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:10:43.423154  543705 net.go:698] Add success.
I0322 17:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:10:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:10:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:10:53.410361  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:10:53.410378  543705 memory.go:184] no items to output this cycle
I0322 17:10:53.410383  543705 cpu.go:275] no items to output this cycle
E0322 17:11:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:11:03.409773  543705 memory.go:184] no items to output this cycle
I0322 17:11:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 17:11:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:11:13.409914  543705 memory.go:191] Add success.
I0322 17:11:13.409937  543705 cpu.go:282] Add success.
W0322 17:11:13.409949  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:11:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:11:13.409966  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:11:13.419709  543705 net.go:648] Add success.
I0322 17:11:13.422853  543705 net.go:770] primary dev: ETH0
I0322 17:11:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:11:13.422877  543705 net.go:698] Add success.
I0322 17:11:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:11:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:11:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 17:11:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:11:14.456489  543705 disk_worker.go:494] system disk:vda1
I0322 17:11:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:11:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:11:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:11:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:11:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:11:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:11:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:11:23.409788  543705 memory.go:184] no items to output this cycle
I0322 17:11:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 17:11:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:11:33.409798  543705 memory.go:184] no items to output this cycle
I0322 17:11:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 17:11:35.707191  543705 disk_info.go:125] begin check local disk info of client
I0322 17:11:35.709856  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:11:35.709864  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e240 0xc00039e280]
E0322 17:11:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:11:43.410703  543705 memory.go:191] Add success.
I0322 17:11:43.409828  543705 cpu.go:282] Add success.
I0322 17:11:43.420419  543705 net.go:648] Add success.
I0322 17:11:43.423442  543705 net.go:770] primary dev: ETH0
I0322 17:11:43.423455  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:11:43.423467  543705 net.go:698] Add success.
I0322 17:11:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:11:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:11:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:11:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:11:53.409780  543705 memory.go:184] no items to output this cycle
I0322 17:11:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 17:12:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:12:03.409784  543705 memory.go:184] no items to output this cycle
I0322 17:12:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 17:12:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:12:13.409787  543705 memory.go:191] Add success.
I0322 17:12:13.409788  543705 cpu.go:282] Add success.
W0322 17:12:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:12:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:12:13.419712  543705 net.go:648] Add success.
I0322 17:12:13.422511  543705 net.go:770] primary dev: ETH0
I0322 17:12:13.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:12:13.422540  543705 net.go:698] Add success.
I0322 17:12:13.475346  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e8eee8a1-533f-4433-bc9b-cb85c1ece4ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:12:13.475378  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 17:12:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:12:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 17:12:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:12:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:12:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:12:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:12:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 17:12:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:12:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:12:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:12:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:12:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:12:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:12:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:12:16.472318  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:12:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:12:23.409790  543705 memory.go:184] no items to output this cycle
I0322 17:12:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 17:12:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:12:33.409807  543705 memory.go:184] no items to output this cycle
I0322 17:12:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 17:12:35.709969  543705 disk_info.go:125] begin check local disk info of client
I0322 17:12:35.712579  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:12:35.712587  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ee780 0xc0003ee7c0]
I0322 17:12:39.729733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:12:39.729740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:12:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:12:43.410727  543705 memory.go:191] Add success.
I0322 17:12:43.409797  543705 cpu.go:282] Add success.
I0322 17:12:43.420511  543705 net.go:648] Add success.
I0322 17:12:43.423333  543705 net.go:770] primary dev: ETH0
I0322 17:12:43.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:12:43.423356  543705 net.go:698] Add success.
I0322 17:12:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:12:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:12:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:12:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:12:53.409796  543705 memory.go:184] no items to output this cycle
I0322 17:12:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 17:13:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:13:03.409764  543705 memory.go:184] no items to output this cycle
I0322 17:13:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 17:13:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:13:13.409778  543705 memory.go:191] Add success.
W0322 17:13:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 17:13:13.409807  543705 cpu.go:282] Add success.
W0322 17:13:13.409964  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:13:13.409968  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:13:13.419708  543705 net.go:648] Add success.
I0322 17:13:13.422497  543705 net.go:770] primary dev: ETH0
I0322 17:13:13.422510  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:13:13.422521  543705 net.go:698] Add success.
I0322 17:13:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:13:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:13:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 17:13:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:13:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 17:13:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:13:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:13:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:13:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:13:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:13:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:13:23.409800  543705 memory.go:184] no items to output this cycle
I0322 17:13:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 17:13:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:13:33.409771  543705 memory.go:184] no items to output this cycle
I0322 17:13:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 17:13:35.713240  543705 disk_info.go:125] begin check local disk info of client
I0322 17:13:35.715882  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:13:35.715890  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e640 0xc00039e680]
E0322 17:13:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:13:43.410677  543705 memory.go:191] Add success.
I0322 17:13:43.409805  543705 cpu.go:282] Add success.
I0322 17:13:43.420388  543705 net.go:648] Add success.
I0322 17:13:43.422795  543705 net.go:770] primary dev: ETH0
I0322 17:13:43.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:13:43.422821  543705 net.go:698] Add success.
I0322 17:13:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:13:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:13:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:13:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:13:53.410261  543705 memory.go:184] no items to output this cycle
I0322 17:13:53.410269  543705 cpu.go:275] no items to output this cycle
E0322 17:14:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:14:03.409782  543705 memory.go:184] no items to output this cycle
I0322 17:14:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 17:14:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:14:13.409795  543705 cpu.go:282] Add success.
I0322 17:14:13.409808  543705 memory.go:191] Add success.
W0322 17:14:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:14:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:14:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:14:13.420373  543705 net.go:648] Add success.
I0322 17:14:13.423242  543705 net.go:770] primary dev: ETH0
I0322 17:14:13.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:14:13.423270  543705 net.go:698] Add success.
I0322 17:14:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:14:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:14:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 17:14:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:14:14.456622  543705 disk_worker.go:494] system disk:vda1
I0322 17:14:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:14:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:14:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:14:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:14:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:14:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:14:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:14:23.409781  543705 memory.go:184] no items to output this cycle
I0322 17:14:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 17:14:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:14:33.409805  543705 memory.go:184] no items to output this cycle
I0322 17:14:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 17:14:35.715994  543705 disk_info.go:125] begin check local disk info of client
I0322 17:14:35.718456  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:14:35.718466  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003eec80 0xc0003eecc0]
E0322 17:14:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:14:43.410606  543705 memory.go:191] Add success.
I0322 17:14:43.409803  543705 cpu.go:282] Add success.
I0322 17:14:43.420355  543705 net.go:648] Add success.
I0322 17:14:43.423151  543705 net.go:770] primary dev: ETH0
I0322 17:14:43.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:14:43.423182  543705 net.go:698] Add success.
I0322 17:14:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:14:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:14:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:14:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:14:53.409773  543705 memory.go:184] no items to output this cycle
I0322 17:14:53.409776  543705 cpu.go:275] no items to output this cycle
E0322 17:15:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:15:03.409800  543705 memory.go:184] no items to output this cycle
I0322 17:15:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 17:15:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:15:13.409784  543705 memory.go:191] Add success.
I0322 17:15:13.409803  543705 cpu.go:282] Add success.
W0322 17:15:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:15:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:15:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:15:13.420291  543705 net.go:648] Add success.
I0322 17:15:13.423188  543705 net.go:770] primary dev: ETH0
I0322 17:15:13.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:15:13.423213  543705 net.go:698] Add success.
I0322 17:15:13.469373  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c91a743-7692-49b3-9c00-d6dc68c54536","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:15:13.469406  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:15:14.455515  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:15:14.455528  543705 disk_worker.go:708] disk space is not compliant
W0322 17:15:14.455532  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:15:14.457527  543705 disk_worker.go:494] system disk:vda1
I0322 17:15:14.457566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:15:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:15:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:15:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:15:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:15:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:15:23.409784  543705 memory.go:184] no items to output this cycle
I0322 17:15:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 17:15:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:15:33.409789  543705 memory.go:184] no items to output this cycle
I0322 17:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 17:15:35.719245  543705 disk_info.go:125] begin check local disk info of client
I0322 17:15:35.721722  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:15:35.721730  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048b380 0xc00048b3c0]
I0322 17:15:39.731368  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:15:39.731378  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:15:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:15:43.410764  543705 memory.go:191] Add success.
I0322 17:15:43.409810  543705 cpu.go:282] Add success.
I0322 17:15:43.420480  543705 net.go:648] Add success.
I0322 17:15:43.423451  543705 net.go:770] primary dev: ETH0
I0322 17:15:43.423466  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:15:43.423480  543705 net.go:698] Add success.
I0322 17:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:15:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:15:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:15:53.409780  543705 memory.go:184] no items to output this cycle
I0322 17:15:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 17:16:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:16:03.409781  543705 memory.go:184] no items to output this cycle
I0322 17:16:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 17:16:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:16:13.409790  543705 memory.go:191] Add success.
I0322 17:16:13.409792  543705 cpu.go:282] Add success.
W0322 17:16:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:16:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:16:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:16:13.420289  543705 net.go:648] Add success.
I0322 17:16:13.423145  543705 net.go:770] primary dev: ETH0
I0322 17:16:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:16:13.423173  543705 net.go:698] Add success.
I0322 17:16:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:16:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:16:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 17:16:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:16:14.456813  543705 disk_worker.go:494] system disk:vda1
I0322 17:16:14.456842  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:16:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:16:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:16:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:16:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:16:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:16:23.409775  543705 memory.go:184] no items to output this cycle
I0322 17:16:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 17:16:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:16:33.409771  543705 memory.go:184] no items to output this cycle
I0322 17:16:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 17:16:35.723278  543705 disk_info.go:125] begin check local disk info of client
I0322 17:16:35.725777  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:16:35.725784  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc00 0xc0001abc40]
E0322 17:16:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:16:43.410786  543705 memory.go:191] Add success.
I0322 17:16:43.409799  543705 cpu.go:282] Add success.
I0322 17:16:43.420487  543705 net.go:648] Add success.
I0322 17:16:43.423201  543705 net.go:770] primary dev: ETH0
I0322 17:16:43.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:16:43.423231  543705 net.go:698] Add success.
I0322 17:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:16:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:16:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:16:53.410230  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:16:53.410256  543705 memory.go:184] no items to output this cycle
I0322 17:16:53.410284  543705 cpu.go:275] no items to output this cycle
E0322 17:17:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:17:03.409772  543705 memory.go:184] no items to output this cycle
I0322 17:17:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 17:17:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:17:13.409789  543705 memory.go:191] Add success.
I0322 17:17:13.409794  543705 cpu.go:282] Add success.
W0322 17:17:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:17:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:17:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:17:13.420039  543705 net.go:648] Add success.
I0322 17:17:13.422846  543705 net.go:770] primary dev: ETH0
I0322 17:17:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:17:13.422871  543705 net.go:698] Add success.
I0322 17:17:13.453432  543705 event_worker.go:152] Polling the log file for events...
W0322 17:17:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:17:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 17:17:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:17:14.456099  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:17:14.456108  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:17:14.456115  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:17:14.456415  543705 disk_worker.go:494] system disk:vda1
I0322 17:17:14.456446  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:17:15.456851  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:17:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:17:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:17:16.457974  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:17:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:17:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:17:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:17:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:17:23.409771  543705 memory.go:184] no items to output this cycle
I0322 17:17:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 17:17:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:17:33.409780  543705 cpu.go:275] no items to output this cycle
I0322 17:17:33.409793  543705 memory.go:184] no items to output this cycle
I0322 17:17:35.725873  543705 disk_info.go:125] begin check local disk info of client
I0322 17:17:35.728277  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:17:35.728283  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bbc0 0xc00007bc00]
E0322 17:17:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:17:43.410658  543705 memory.go:191] Add success.
I0322 17:17:43.409814  543705 cpu.go:282] Add success.
I0322 17:17:43.420472  543705 net.go:648] Add success.
I0322 17:17:43.423074  543705 net.go:770] primary dev: ETH0
I0322 17:17:43.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:17:43.423099  543705 net.go:698] Add success.
I0322 17:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:17:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:17:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:17:53.410371  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:17:53.410392  543705 memory.go:184] no items to output this cycle
I0322 17:17:53.410400  543705 cpu.go:275] no items to output this cycle
E0322 17:18:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:18:03.409785  543705 memory.go:184] no items to output this cycle
I0322 17:18:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 17:18:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:18:13.409785  543705 memory.go:191] Add success.
I0322 17:18:13.409786  543705 cpu.go:282] Add success.
W0322 17:18:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:18:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:18:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:18:13.420112  543705 net.go:648] Add success.
I0322 17:18:13.422911  543705 net.go:770] primary dev: ETH0
I0322 17:18:13.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:18:13.422935  543705 net.go:698] Add success.
I0322 17:18:13.464056  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"50ad199c-69e6-480b-864a-160e58990116","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:18:13.464102  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:18:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:18:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:18:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 17:18:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:18:14.456625  543705 disk_worker.go:494] system disk:vda1
I0322 17:18:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:18:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:18:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:18:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:18:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:18:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:18:23.409788  543705 memory.go:184] no items to output this cycle
I0322 17:18:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 17:18:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:18:33.409781  543705 memory.go:184] no items to output this cycle
I0322 17:18:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 17:18:35.728374  543705 disk_info.go:125] begin check local disk info of client
I0322 17:18:35.730775  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:18:35.730782  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ada40 0xc0003ada80]
I0322 17:18:39.732361  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:18:39.732369  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:18:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:18:43.410612  543705 memory.go:191] Add success.
I0322 17:18:43.409803  543705 cpu.go:282] Add success.
I0322 17:18:43.420327  543705 net.go:648] Add success.
I0322 17:18:43.423133  543705 net.go:770] primary dev: ETH0
I0322 17:18:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:18:43.423160  543705 net.go:698] Add success.
I0322 17:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:18:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:18:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:18:53.409771  543705 memory.go:184] no items to output this cycle
I0322 17:18:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 17:19:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:19:03.409804  543705 memory.go:184] no items to output this cycle
I0322 17:19:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 17:19:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:19:13.409783  543705 memory.go:191] Add success.
I0322 17:19:13.409805  543705 cpu.go:282] Add success.
W0322 17:19:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:19:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:19:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:19:13.420048  543705 net.go:648] Add success.
I0322 17:19:13.422711  543705 net.go:770] primary dev: ETH0
I0322 17:19:13.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:19:13.422737  543705 net.go:698] Add success.
I0322 17:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:19:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:19:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 17:19:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:19:14.456539  543705 disk_worker.go:494] system disk:vda1
I0322 17:19:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:19:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:19:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:19:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:19:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:19:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:19:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:19:23.409808  543705 memory.go:184] no items to output this cycle
I0322 17:19:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 17:19:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:19:33.409779  543705 memory.go:184] no items to output this cycle
I0322 17:19:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 17:19:35.730864  543705 disk_info.go:125] begin check local disk info of client
I0322 17:19:35.733412  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:19:35.733418  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa40 0xc0001aaa80]
E0322 17:19:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:19:43.410543  543705 memory.go:191] Add success.
I0322 17:19:43.409827  543705 cpu.go:282] Add success.
I0322 17:19:43.420244  543705 net.go:648] Add success.
I0322 17:19:43.423112  543705 net.go:770] primary dev: ETH0
I0322 17:19:43.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:19:43.423144  543705 net.go:698] Add success.
I0322 17:19:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:19:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:19:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:19:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:19:53.409797  543705 memory.go:184] no items to output this cycle
I0322 17:19:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 17:20:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:20:03.409808  543705 memory.go:184] no items to output this cycle
I0322 17:20:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 17:20:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:20:13.409780  543705 memory.go:191] Add success.
I0322 17:20:13.409799  543705 cpu.go:282] Add success.
W0322 17:20:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:20:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:20:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:20:13.420115  543705 net.go:648] Add success.
I0322 17:20:13.422819  543705 net.go:770] primary dev: ETH0
I0322 17:20:13.422832  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:20:13.422843  543705 net.go:698] Add success.
I0322 17:20:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:20:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:20:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 17:20:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:20:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 17:20:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:20:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:20:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:20:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:20:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:20:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:20:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:20:23.409776  543705 memory.go:184] no items to output this cycle
I0322 17:20:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 17:20:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:20:33.409804  543705 memory.go:184] no items to output this cycle
I0322 17:20:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 17:20:35.733672  543705 disk_info.go:125] begin check local disk info of client
I0322 17:20:35.736277  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:20:35.736283  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc00 0xc0001abc40]
E0322 17:20:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:20:43.410624  543705 memory.go:191] Add success.
I0322 17:20:43.409795  543705 cpu.go:282] Add success.
I0322 17:20:43.420342  543705 net.go:648] Add success.
I0322 17:20:43.423011  543705 net.go:770] primary dev: ETH0
I0322 17:20:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:20:43.423045  543705 net.go:698] Add success.
I0322 17:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:20:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:20:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:20:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:20:53.409772  543705 memory.go:184] no items to output this cycle
I0322 17:20:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 17:21:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:21:03.409800  543705 memory.go:184] no items to output this cycle
I0322 17:21:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 17:21:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:21:13.409794  543705 memory.go:191] Add success.
I0322 17:21:13.409816  543705 cpu.go:282] Add success.
W0322 17:21:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:21:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:21:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:21:13.420140  543705 net.go:648] Add success.
I0322 17:21:13.423649  543705 net.go:770] primary dev: ETH0
I0322 17:21:13.423665  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:21:13.423679  543705 net.go:698] Add success.
I0322 17:21:13.468424  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3be221c7-a8c8-4ede-8ad9-787d0bf73b5f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:21:13.468465  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:21:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:21:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:21:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 17:21:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:21:14.456722  543705 disk_worker.go:494] system disk:vda1
I0322 17:21:14.456751  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:21:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:21:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:21:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:21:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:21:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:21:23.409775  543705 memory.go:184] no items to output this cycle
I0322 17:21:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 17:21:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:21:33.409785  543705 memory.go:184] no items to output this cycle
I0322 17:21:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 17:21:35.736366  543705 disk_info.go:125] begin check local disk info of client
I0322 17:21:35.738976  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:21:35.738982  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4800 0xc0002a4840]
I0322 17:21:39.733357  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:21:39.733365  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:21:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:21:43.410695  543705 memory.go:191] Add success.
I0322 17:21:43.409807  543705 cpu.go:282] Add success.
I0322 17:21:43.420383  543705 net.go:648] Add success.
I0322 17:21:43.423179  543705 net.go:770] primary dev: ETH0
I0322 17:21:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:21:43.423205  543705 net.go:698] Add success.
I0322 17:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:21:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:21:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:21:53.410368  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:21:53.410383  543705 memory.go:184] no items to output this cycle
I0322 17:21:53.410389  543705 cpu.go:275] no items to output this cycle
E0322 17:22:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:22:03.409798  543705 memory.go:184] no items to output this cycle
I0322 17:22:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 17:22:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:22:13.409820  543705 memory.go:191] Add success.
I0322 17:22:13.409831  543705 cpu.go:282] Add success.
W0322 17:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:22:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:22:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:22:13.420110  543705 net.go:648] Add success.
I0322 17:22:13.422540  543705 net.go:770] primary dev: ETH0
I0322 17:22:13.422553  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:22:13.422565  543705 net.go:698] Add success.
W0322 17:22:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:22:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 17:22:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:22:14.456526  543705 disk_worker.go:494] system disk:vda1
I0322 17:22:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:22:14.458038  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:22:14.458046  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:22:14.458062  543705 custom_config.go:64] query custom config with name: gpu
E0322 17:22:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:22:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:22:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:22:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:22:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:22:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:22:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:22:23.410284  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:22:23.410308  543705 memory.go:184] no items to output this cycle
I0322 17:22:23.410319  543705 cpu.go:275] no items to output this cycle
E0322 17:22:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:22:33.409771  543705 memory.go:184] no items to output this cycle
I0322 17:22:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 17:22:35.739068  543705 disk_info.go:125] begin check local disk info of client
I0322 17:22:35.741599  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:22:35.741605  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b19c0 0xc0002b1a00]
E0322 17:22:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:22:43.410610  543705 memory.go:191] Add success.
I0322 17:22:43.409807  543705 cpu.go:282] Add success.
I0322 17:22:43.420293  543705 net.go:648] Add success.
I0322 17:22:43.422808  543705 net.go:770] primary dev: ETH0
I0322 17:22:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:22:43.422834  543705 net.go:698] Add success.
I0322 17:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:22:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:22:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:22:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:22:53.409781  543705 memory.go:184] no items to output this cycle
I0322 17:22:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 17:23:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:23:03.409802  543705 memory.go:184] no items to output this cycle
I0322 17:23:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 17:23:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:23:13.409780  543705 memory.go:191] Add success.
I0322 17:23:13.409798  543705 cpu.go:282] Add success.
W0322 17:23:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:23:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:23:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:23:13.420260  543705 net.go:648] Add success.
I0322 17:23:13.422988  543705 net.go:770] primary dev: ETH0
I0322 17:23:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:23:13.423011  543705 net.go:698] Add success.
I0322 17:23:14.454948  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:23:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:23:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 17:23:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:23:14.456488  543705 disk_worker.go:494] system disk:vda1
I0322 17:23:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:23:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:23:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:23:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:23:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:23:23.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:23:23.409929  543705 cpu.go:275] no items to output this cycle
I0322 17:23:23.409933  543705 memory.go:184] no items to output this cycle
E0322 17:23:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:23:33.409765  543705 memory.go:184] no items to output this cycle
I0322 17:23:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 17:23:35.741674  543705 disk_info.go:125] begin check local disk info of client
I0322 17:23:35.744234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:23:35.744240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4cc0 0xc0000c4d00]
E0322 17:23:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:23:43.410614  543705 memory.go:191] Add success.
I0322 17:23:43.409810  543705 cpu.go:282] Add success.
I0322 17:23:43.420311  543705 net.go:648] Add success.
I0322 17:23:43.422802  543705 net.go:770] primary dev: ETH0
I0322 17:23:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:23:43.422827  543705 net.go:698] Add success.
I0322 17:23:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:23:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:23:53.410227  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:23:53.410247  543705 memory.go:184] no items to output this cycle
I0322 17:23:53.410261  543705 cpu.go:275] no items to output this cycle
E0322 17:24:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:24:03.409766  543705 memory.go:184] no items to output this cycle
I0322 17:24:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 17:24:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:24:13.409814  543705 memory.go:191] Add success.
I0322 17:24:13.409822  543705 cpu.go:282] Add success.
W0322 17:24:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:24:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:24:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:24:13.420144  543705 net.go:648] Add success.
I0322 17:24:13.422933  543705 net.go:770] primary dev: ETH0
I0322 17:24:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:24:13.422959  543705 net.go:698] Add success.
I0322 17:24:13.463566  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b33780fa-428b-492b-92c3-ab611ac87c2c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:24:13.463598  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:24:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:24:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:24:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 17:24:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:24:14.456585  543705 disk_worker.go:494] system disk:vda1
I0322 17:24:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:24:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:24:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:24:16.472105  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:24:23.410215  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:24:23.410234  543705 memory.go:184] no items to output this cycle
I0322 17:24:23.410261  543705 cpu.go:275] no items to output this cycle
E0322 17:24:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:24:33.409780  543705 memory.go:184] no items to output this cycle
I0322 17:24:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 17:24:35.744325  543705 disk_info.go:125] begin check local disk info of client
I0322 17:24:35.746898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:24:35.746904  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003efc00 0xc0003efc40]
I0322 17:24:39.733736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:24:39.733744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:24:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:24:43.410715  543705 memory.go:191] Add success.
I0322 17:24:43.409821  543705 cpu.go:282] Add success.
I0322 17:24:43.420415  543705 net.go:648] Add success.
I0322 17:24:43.423041  543705 net.go:770] primary dev: ETH0
I0322 17:24:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:24:43.423066  543705 net.go:698] Add success.
I0322 17:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:24:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:24:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:24:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:24:53.409778  543705 memory.go:184] no items to output this cycle
I0322 17:24:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 17:25:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:25:03.409797  543705 memory.go:184] no items to output this cycle
I0322 17:25:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 17:25:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:25:13.409785  543705 memory.go:191] Add success.
I0322 17:25:13.409802  543705 cpu.go:282] Add success.
W0322 17:25:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:25:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:25:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:25:13.420129  543705 net.go:648] Add success.
I0322 17:25:13.423163  543705 net.go:770] primary dev: ETH0
I0322 17:25:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:25:13.423195  543705 net.go:698] Add success.
I0322 17:25:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:25:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:25:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 17:25:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:25:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 17:25:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:25:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:25:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:25:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:25:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:25:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:25:23.409801  543705 memory.go:184] no items to output this cycle
I0322 17:25:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 17:25:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:25:33.409778  543705 memory.go:184] no items to output this cycle
I0322 17:25:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 17:25:35.746985  543705 disk_info.go:125] begin check local disk info of client
I0322 17:25:35.749533  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:25:35.749539  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003efc00 0xc0003efc40]
E0322 17:25:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:25:43.410546  543705 memory.go:191] Add success.
I0322 17:25:43.409835  543705 cpu.go:282] Add success.
I0322 17:25:43.420260  543705 net.go:648] Add success.
I0322 17:25:43.422979  543705 net.go:770] primary dev: ETH0
I0322 17:25:43.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:25:43.423004  543705 net.go:698] Add success.
I0322 17:25:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:25:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:25:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:25:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:25:53.409762  543705 memory.go:184] no items to output this cycle
I0322 17:25:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 17:26:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:26:03.409769  543705 memory.go:184] no items to output this cycle
I0322 17:26:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 17:26:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:26:13.409787  543705 memory.go:191] Add success.
I0322 17:26:13.409787  543705 cpu.go:282] Add success.
W0322 17:26:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:26:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:26:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:26:13.420129  543705 net.go:648] Add success.
I0322 17:26:13.423146  543705 net.go:770] primary dev: ETH0
I0322 17:26:13.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:26:13.423179  543705 net.go:698] Add success.
I0322 17:26:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:26:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:26:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 17:26:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:26:14.456609  543705 disk_worker.go:494] system disk:vda1
I0322 17:26:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:26:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:26:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:26:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:26:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:26:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 17:26:23.409790  543705 memory.go:184] no items to output this cycle
E0322 17:26:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:26:33.409774  543705 memory.go:184] no items to output this cycle
I0322 17:26:33.409871  543705 cpu.go:275] no items to output this cycle
I0322 17:26:35.749669  543705 disk_info.go:125] begin check local disk info of client
I0322 17:26:35.752259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:26:35.752265  543705 disk_info.go:196] parse disk info done, disk is : [0xc000284bc0 0xc000284c00]
E0322 17:26:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:26:43.410647  543705 memory.go:191] Add success.
I0322 17:26:43.409813  543705 cpu.go:282] Add success.
I0322 17:26:43.420415  543705 net.go:648] Add success.
I0322 17:26:43.423540  543705 net.go:770] primary dev: ETH0
I0322 17:26:43.423553  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:26:43.423566  543705 net.go:698] Add success.
I0322 17:26:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:26:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:26:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:26:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:26:53.410394  543705 memory.go:184] no items to output this cycle
I0322 17:26:53.410401  543705 cpu.go:275] no items to output this cycle
E0322 17:27:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:27:03.409780  543705 memory.go:184] no items to output this cycle
I0322 17:27:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 17:27:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:27:13.409779  543705 memory.go:191] Add success.
W0322 17:27:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 17:27:13.409804  543705 cpu.go:282] Add success.
W0322 17:27:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:27:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:27:13.420040  543705 net.go:648] Add success.
I0322 17:27:13.422553  543705 net.go:770] primary dev: ETH0
I0322 17:27:13.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:27:13.422577  543705 net.go:698] Add success.
I0322 17:27:13.428591  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 17:27:13.452780  543705 event_worker.go:152] Polling the log file for events...
I0322 17:27:13.463520  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d456f905-d7a3-431e-8d85-3f156521b4f2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:27:13.463552  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 17:27:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:27:14.455256  543705 disk_worker.go:708] disk space is not compliant
W0322 17:27:14.455260  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:27:14.455911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:27:14.455920  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:27:14.455927  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:27:14.456823  543705 disk_worker.go:494] system disk:vda1
I0322 17:27:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:27:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:27:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:27:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:27:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:27:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:27:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:27:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:27:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:27:23.409806  543705 memory.go:184] no items to output this cycle
I0322 17:27:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 17:27:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:27:33.409773  543705 memory.go:184] no items to output this cycle
I0322 17:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 17:27:35.752350  543705 disk_info.go:125] begin check local disk info of client
I0322 17:27:35.754970  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:27:35.754976  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4440 0xc0000c4480]
I0322 17:27:39.735379  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:27:39.735395  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:27:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:27:43.410710  543705 memory.go:191] Add success.
I0322 17:27:43.409832  543705 cpu.go:282] Add success.
I0322 17:27:43.420419  543705 net.go:648] Add success.
I0322 17:27:43.423176  543705 net.go:770] primary dev: ETH0
I0322 17:27:43.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:27:43.423201  543705 net.go:698] Add success.
I0322 17:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:27:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:27:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:27:53.409776  543705 memory.go:184] no items to output this cycle
I0322 17:27:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 17:28:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:28:03.409778  543705 memory.go:184] no items to output this cycle
I0322 17:28:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 17:28:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:28:13.409818  543705 memory.go:191] Add success.
I0322 17:28:13.409822  543705 cpu.go:282] Add success.
W0322 17:28:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:28:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:28:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:28:13.420104  543705 net.go:648] Add success.
I0322 17:28:13.423086  543705 net.go:770] primary dev: ETH0
I0322 17:28:13.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:28:13.423112  543705 net.go:698] Add success.
I0322 17:28:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:28:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:28:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 17:28:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:28:14.456523  543705 disk_worker.go:494] system disk:vda1
I0322 17:28:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:28:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:28:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:28:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:28:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:28:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:28:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:28:23.409815  543705 memory.go:184] no items to output this cycle
I0322 17:28:23.409822  543705 cpu.go:275] no items to output this cycle
E0322 17:28:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:28:33.409765  543705 memory.go:184] no items to output this cycle
I0322 17:28:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 17:28:35.756417  543705 disk_info.go:125] begin check local disk info of client
I0322 17:28:35.759020  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:28:35.759025  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a3c0 0xc00027a400]
E0322 17:28:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:28:43.410672  543705 memory.go:191] Add success.
I0322 17:28:43.409796  543705 cpu.go:282] Add success.
I0322 17:28:43.420374  543705 net.go:648] Add success.
I0322 17:28:43.423295  543705 net.go:770] primary dev: ETH0
I0322 17:28:43.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:28:43.423326  543705 net.go:698] Add success.
I0322 17:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:28:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:28:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:28:53.410234  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:28:53.410251  543705 memory.go:184] no items to output this cycle
I0322 17:28:53.410278  543705 cpu.go:275] no items to output this cycle
E0322 17:29:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:29:03.409794  543705 memory.go:184] no items to output this cycle
I0322 17:29:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 17:29:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:29:13.409779  543705 memory.go:191] Add success.
I0322 17:29:13.409804  543705 cpu.go:282] Add success.
W0322 17:29:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:29:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:29:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:29:13.420054  543705 net.go:648] Add success.
I0322 17:29:13.422727  543705 net.go:770] primary dev: ETH0
I0322 17:29:13.422742  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:29:13.422755  543705 net.go:698] Add success.
I0322 17:29:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:29:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:29:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 17:29:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:29:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 17:29:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:29:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:29:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:29:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:29:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:29:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:29:23.409786  543705 memory.go:184] no items to output this cycle
I0322 17:29:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 17:29:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:29:33.409786  543705 memory.go:184] no items to output this cycle
I0322 17:29:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 17:29:35.759108  543705 disk_info.go:125] begin check local disk info of client
I0322 17:29:35.761704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:29:35.761710  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046df00 0xc00046df40]
E0322 17:29:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:29:43.410646  543705 memory.go:191] Add success.
I0322 17:29:43.409804  543705 cpu.go:282] Add success.
I0322 17:29:43.420431  543705 net.go:648] Add success.
I0322 17:29:43.423033  543705 net.go:770] primary dev: ETH0
I0322 17:29:43.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:29:43.423058  543705 net.go:698] Add success.
I0322 17:29:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:29:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:29:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:29:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:29:53.409774  543705 memory.go:184] no items to output this cycle
I0322 17:29:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 17:30:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:30:03.409779  543705 cpu.go:275] no items to output this cycle
I0322 17:30:03.409783  543705 memory.go:184] no items to output this cycle
E0322 17:30:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:30:13.409796  543705 memory.go:191] Add success.
I0322 17:30:13.409796  543705 cpu.go:282] Add success.
W0322 17:30:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:30:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:30:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:30:13.420098  543705 net.go:648] Add success.
I0322 17:30:13.423168  543705 net.go:770] primary dev: ETH0
I0322 17:30:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:30:13.423193  543705 net.go:698] Add success.
I0322 17:30:13.470073  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"27018757-96b0-464e-96ba-8724d590c499","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:30:13.470105  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:30:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:30:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:30:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 17:30:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:30:14.456607  543705 disk_worker.go:494] system disk:vda1
I0322 17:30:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:30:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:30:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:30:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:30:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:30:23.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:30:23.409887  543705 memory.go:184] no items to output this cycle
I0322 17:30:23.409976  543705 cpu.go:275] no items to output this cycle
E0322 17:30:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:30:33.409795  543705 memory.go:184] no items to output this cycle
I0322 17:30:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 17:30:35.761793  543705 disk_info.go:125] begin check local disk info of client
I0322 17:30:35.764345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:30:35.764351  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab700 0xc0001ab740]
I0322 17:30:39.736376  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:30:39.736384  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:30:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:30:43.410631  543705 memory.go:191] Add success.
I0322 17:30:43.409806  543705 cpu.go:282] Add success.
I0322 17:30:43.420301  543705 net.go:648] Add success.
I0322 17:30:43.422955  543705 net.go:770] primary dev: ETH0
I0322 17:30:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:30:43.422984  543705 net.go:698] Add success.
I0322 17:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:30:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:30:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:30:53.409795  543705 memory.go:184] no items to output this cycle
I0322 17:30:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 17:31:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:31:03.409779  543705 memory.go:184] no items to output this cycle
I0322 17:31:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 17:31:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:31:13.409788  543705 memory.go:191] Add success.
I0322 17:31:13.409788  543705 cpu.go:282] Add success.
W0322 17:31:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:31:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:31:13.420043  543705 net.go:648] Add success.
I0322 17:31:13.423080  543705 net.go:770] primary dev: ETH0
I0322 17:31:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:31:13.423104  543705 net.go:698] Add success.
I0322 17:31:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:31:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:31:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 17:31:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:31:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 17:31:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:31:15.456031  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:31:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:31:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:31:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:31:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:31:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:31:23.409774  543705 memory.go:184] no items to output this cycle
I0322 17:31:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 17:31:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:31:33.409802  543705 memory.go:184] no items to output this cycle
I0322 17:31:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 17:31:35.764434  543705 disk_info.go:125] begin check local disk info of client
I0322 17:31:35.767012  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:31:35.767018  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d80 0xc0000c5dc0]
E0322 17:31:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:31:43.410694  543705 memory.go:191] Add success.
I0322 17:31:43.409832  543705 cpu.go:282] Add success.
I0322 17:31:43.420383  543705 net.go:648] Add success.
I0322 17:31:43.423265  543705 net.go:770] primary dev: ETH0
I0322 17:31:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:31:43.423290  543705 net.go:698] Add success.
I0322 17:31:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:31:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:31:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:31:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:31:53.410399  543705 memory.go:184] no items to output this cycle
I0322 17:31:53.410434  543705 cpu.go:275] no items to output this cycle
E0322 17:32:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:32:03.409776  543705 memory.go:184] no items to output this cycle
I0322 17:32:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 17:32:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:32:13.409797  543705 memory.go:191] Add success.
I0322 17:32:13.409796  543705 cpu.go:282] Add success.
W0322 17:32:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:32:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:32:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:32:13.420107  543705 net.go:648] Add success.
I0322 17:32:13.422958  543705 net.go:770] primary dev: ETH0
I0322 17:32:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:32:13.422983  543705 net.go:698] Add success.
W0322 17:32:14.454403  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:32:14.454497  543705 disk_worker.go:708] disk space is not compliant
W0322 17:32:14.454502  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:32:14.454885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:32:14.454895  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:32:14.454902  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:32:14.457003  543705 disk_worker.go:494] system disk:vda1
I0322 17:32:14.457050  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:32:15.456895  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:32:15.456905  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:32:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:32:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:32:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:32:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:32:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:32:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:32:23.409793  543705 memory.go:184] no items to output this cycle
I0322 17:32:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 17:32:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:32:33.409819  543705 memory.go:184] no items to output this cycle
I0322 17:32:33.409832  543705 cpu.go:275] no items to output this cycle
I0322 17:32:35.767100  543705 disk_info.go:125] begin check local disk info of client
I0322 17:32:35.769639  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:32:35.769659  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5780 0xc0000c57c0]
E0322 17:32:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:32:43.410557  543705 memory.go:191] Add success.
I0322 17:32:43.409815  543705 cpu.go:282] Add success.
I0322 17:32:43.420252  543705 net.go:648] Add success.
I0322 17:32:43.422775  543705 net.go:770] primary dev: ETH0
I0322 17:32:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:32:43.422800  543705 net.go:698] Add success.
I0322 17:32:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:32:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:32:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:32:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:32:53.409774  543705 memory.go:184] no items to output this cycle
I0322 17:32:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 17:33:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:33:03.409788  543705 memory.go:184] no items to output this cycle
I0322 17:33:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 17:33:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:33:13.409822  543705 memory.go:191] Add success.
I0322 17:33:13.409825  543705 cpu.go:282] Add success.
W0322 17:33:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:33:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:33:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:33:13.420261  543705 net.go:648] Add success.
I0322 17:33:13.422836  543705 net.go:770] primary dev: ETH0
I0322 17:33:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:33:13.422860  543705 net.go:698] Add success.
I0322 17:33:13.464231  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"977e754c-e330-43a7-a1f3-d3f5744c3ca5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:33:13.464266  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:33:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:33:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:33:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 17:33:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:33:14.456542  543705 disk_worker.go:494] system disk:vda1
I0322 17:33:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:33:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:33:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:33:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:33:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:33:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:33:23.409782  543705 memory.go:184] no items to output this cycle
I0322 17:33:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 17:33:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:33:33.409790  543705 memory.go:184] no items to output this cycle
I0322 17:33:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 17:33:35.770727  543705 disk_info.go:125] begin check local disk info of client
I0322 17:33:35.773298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:33:35.773304  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3d80 0xc0003d3dc0]
I0322 17:33:39.737390  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:33:39.737397  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:33:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:33:43.410614  543705 memory.go:191] Add success.
I0322 17:33:43.409840  543705 cpu.go:282] Add success.
I0322 17:33:43.420306  543705 net.go:648] Add success.
I0322 17:33:43.423261  543705 net.go:770] primary dev: ETH0
I0322 17:33:43.423274  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:33:43.423287  543705 net.go:698] Add success.
I0322 17:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:33:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:33:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:33:53.409791  543705 memory.go:184] no items to output this cycle
I0322 17:33:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 17:34:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:34:03.409770  543705 memory.go:184] no items to output this cycle
I0322 17:34:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 17:34:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:34:13.409831  543705 memory.go:191] Add success.
I0322 17:34:13.409831  543705 cpu.go:282] Add success.
W0322 17:34:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:34:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:34:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:34:13.420165  543705 net.go:648] Add success.
I0322 17:34:13.422803  543705 net.go:770] primary dev: ETH0
I0322 17:34:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:34:13.422828  543705 net.go:698] Add success.
I0322 17:34:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:34:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:34:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 17:34:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:34:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 17:34:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:34:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:34:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:34:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:34:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:34:23.409819  543705 memory.go:184] no items to output this cycle
I0322 17:34:23.409824  543705 cpu.go:275] no items to output this cycle
E0322 17:34:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:34:33.409777  543705 memory.go:184] no items to output this cycle
I0322 17:34:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 17:34:35.773664  543705 disk_info.go:125] begin check local disk info of client
I0322 17:34:35.776194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:34:35.776200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004af300 0xc0004af340]
E0322 17:34:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:34:43.410721  543705 memory.go:191] Add success.
I0322 17:34:43.409807  543705 cpu.go:282] Add success.
I0322 17:34:43.419731  543705 net.go:648] Add success.
I0322 17:34:43.422465  543705 net.go:770] primary dev: ETH0
I0322 17:34:43.422484  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:34:43.422498  543705 net.go:698] Add success.
I0322 17:34:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:34:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:34:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:34:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:34:53.409779  543705 memory.go:184] no items to output this cycle
I0322 17:34:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 17:35:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:35:03.409793  543705 memory.go:184] no items to output this cycle
I0322 17:35:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 17:35:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:35:13.409784  543705 memory.go:191] Add success.
I0322 17:35:13.409806  543705 cpu.go:282] Add success.
W0322 17:35:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:35:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:35:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:35:13.420061  543705 net.go:648] Add success.
I0322 17:35:13.423051  543705 net.go:770] primary dev: ETH0
I0322 17:35:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:35:13.423075  543705 net.go:698] Add success.
I0322 17:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:35:14.455084  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:35:14.455148  543705 disk_worker.go:708] disk space is not compliant
W0322 17:35:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:35:14.456491  543705 disk_worker.go:494] system disk:vda1
I0322 17:35:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:35:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:35:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:35:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:35:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:35:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:35:23.409798  543705 memory.go:184] no items to output this cycle
I0322 17:35:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 17:35:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:35:33.409780  543705 memory.go:184] no items to output this cycle
I0322 17:35:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 17:35:35.776284  543705 disk_info.go:125] begin check local disk info of client
I0322 17:35:35.778848  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:35:35.778855  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004631c0 0xc000463200]
E0322 17:35:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:35:43.410744  543705 memory.go:191] Add success.
I0322 17:35:43.409795  543705 cpu.go:282] Add success.
I0322 17:35:43.420615  543705 net.go:648] Add success.
I0322 17:35:43.423282  543705 net.go:770] primary dev: ETH0
I0322 17:35:43.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:35:43.423306  543705 net.go:698] Add success.
I0322 17:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:35:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:35:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:35:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:35:53.409776  543705 memory.go:184] no items to output this cycle
I0322 17:35:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 17:36:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:36:03.409775  543705 memory.go:184] no items to output this cycle
I0322 17:36:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 17:36:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:36:13.409786  543705 memory.go:191] Add success.
I0322 17:36:13.409806  543705 cpu.go:282] Add success.
W0322 17:36:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:36:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:36:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:36:13.420124  543705 net.go:648] Add success.
I0322 17:36:13.422619  543705 net.go:770] primary dev: ETH0
I0322 17:36:13.422633  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:36:13.422647  543705 net.go:698] Add success.
I0322 17:36:13.463117  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"451e655e-61b6-446b-99da-df5d43bb6186","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:36:13.463151  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:36:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:36:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:36:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 17:36:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:36:14.456546  543705 disk_worker.go:494] system disk:vda1
I0322 17:36:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:36:15.455618  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:36:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:36:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:36:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:36:23.409779  543705 memory.go:184] no items to output this cycle
I0322 17:36:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 17:36:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:36:33.409778  543705 memory.go:184] no items to output this cycle
I0322 17:36:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 17:36:35.780548  543705 disk_info.go:125] begin check local disk info of client
I0322 17:36:35.783089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:36:35.783095  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4e80 0xc0003d4ec0]
I0322 17:36:39.737740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:36:39.737748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:36:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:36:43.410594  543705 memory.go:191] Add success.
I0322 17:36:43.409825  543705 cpu.go:282] Add success.
I0322 17:36:43.420292  543705 net.go:648] Add success.
I0322 17:36:43.423023  543705 net.go:770] primary dev: ETH0
I0322 17:36:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:36:43.423050  543705 net.go:698] Add success.
I0322 17:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:36:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:36:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:36:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:36:53.409778  543705 memory.go:184] no items to output this cycle
I0322 17:36:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 17:37:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:37:03.409774  543705 memory.go:184] no items to output this cycle
I0322 17:37:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 17:37:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:37:13.409790  543705 memory.go:191] Add success.
I0322 17:37:13.409811  543705 cpu.go:282] Add success.
W0322 17:37:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:37:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:37:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:37:13.420128  543705 net.go:648] Add success.
I0322 17:37:13.422700  543705 net.go:770] primary dev: ETH0
I0322 17:37:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:37:13.422726  543705 net.go:698] Add success.
I0322 17:37:13.453258  543705 event_worker.go:152] Polling the log file for events...
W0322 17:37:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:37:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 17:37:14.455176  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:37:14.456951  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:37:14.456960  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:37:14.456966  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:37:14.457016  543705 disk_worker.go:494] system disk:vda1
I0322 17:37:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:37:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:37:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
E0322 17:37:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:37:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:37:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:37:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:37:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:37:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:37:23.409768  543705 memory.go:184] no items to output this cycle
I0322 17:37:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 17:37:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:37:33.409763  543705 memory.go:184] no items to output this cycle
I0322 17:37:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 17:37:35.783178  543705 disk_info.go:125] begin check local disk info of client
I0322 17:37:35.785747  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:37:35.785753  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9c00 0xc0003b9c40]
E0322 17:37:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:37:43.410747  543705 memory.go:191] Add success.
I0322 17:37:43.409808  543705 cpu.go:282] Add success.
I0322 17:37:43.420458  543705 net.go:648] Add success.
I0322 17:37:43.423440  543705 net.go:770] primary dev: ETH0
I0322 17:37:43.423455  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:37:43.423471  543705 net.go:698] Add success.
I0322 17:37:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:37:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:37:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:37:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:37:53.409795  543705 memory.go:184] no items to output this cycle
I0322 17:37:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 17:38:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:38:03.409779  543705 cpu.go:275] no items to output this cycle
I0322 17:38:03.409787  543705 memory.go:184] no items to output this cycle
E0322 17:38:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:38:13.409814  543705 memory.go:191] Add success.
I0322 17:38:13.409820  543705 cpu.go:282] Add success.
W0322 17:38:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:38:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:38:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:38:13.420141  543705 net.go:648] Add success.
I0322 17:38:13.423208  543705 net.go:770] primary dev: ETH0
I0322 17:38:13.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:38:13.423236  543705 net.go:698] Add success.
I0322 17:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:38:14.455090  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:38:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0322 17:38:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:38:14.456487  543705 disk_worker.go:494] system disk:vda1
I0322 17:38:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:38:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:38:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:38:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:38:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:38:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:38:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:38:23.409792  543705 memory.go:184] no items to output this cycle
I0322 17:38:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 17:38:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:38:33.409768  543705 memory.go:184] no items to output this cycle
I0322 17:38:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 17:38:35.785841  543705 disk_info.go:125] begin check local disk info of client
I0322 17:38:35.788380  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:38:35.788385  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2940 0xc0004b2980]
E0322 17:38:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:38:43.410690  543705 memory.go:191] Add success.
I0322 17:38:43.409823  543705 cpu.go:282] Add success.
I0322 17:38:43.420380  543705 net.go:648] Add success.
I0322 17:38:43.422944  543705 net.go:770] primary dev: ETH0
I0322 17:38:43.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:38:43.422974  543705 net.go:698] Add success.
I0322 17:38:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:38:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:38:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:38:53.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:38:53.409879  543705 memory.go:184] no items to output this cycle
I0322 17:38:53.409988  543705 cpu.go:275] no items to output this cycle
E0322 17:39:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:39:03.409804  543705 memory.go:184] no items to output this cycle
I0322 17:39:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 17:39:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:39:13.409798  543705 memory.go:191] Add success.
I0322 17:39:13.409811  543705 cpu.go:282] Add success.
W0322 17:39:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:39:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:39:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:39:13.420052  543705 net.go:648] Add success.
I0322 17:39:13.422645  543705 net.go:770] primary dev: ETH0
I0322 17:39:13.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:39:13.422683  543705 net.go:698] Add success.
I0322 17:39:13.468353  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"56b8aa04-4888-404b-b538-644c795f395d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:39:13.468386  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:39:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:39:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:39:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 17:39:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:39:14.456577  543705 disk_worker.go:494] system disk:vda1
I0322 17:39:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:39:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:39:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:39:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:39:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:39:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:39:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:39:23.409814  543705 memory.go:184] no items to output this cycle
I0322 17:39:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 17:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:39:33.409781  543705 memory.go:184] no items to output this cycle
I0322 17:39:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 17:39:35.789583  543705 disk_info.go:125] begin check local disk info of client
I0322 17:39:35.792126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:39:35.792132  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278080 0xc0002780c0]
I0322 17:39:39.739400  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:39:39.739407  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:39:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:39:43.410620  543705 memory.go:191] Add success.
I0322 17:39:43.409812  543705 cpu.go:282] Add success.
I0322 17:39:43.420411  543705 net.go:648] Add success.
I0322 17:39:43.422716  543705 net.go:770] primary dev: ETH0
I0322 17:39:43.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:39:43.422742  543705 net.go:698] Add success.
I0322 17:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:39:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:39:53.409789  543705 memory.go:184] no items to output this cycle
I0322 17:39:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 17:40:03.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:40:03.409866  543705 memory.go:184] no items to output this cycle
I0322 17:40:03.409994  543705 cpu.go:275] no items to output this cycle
E0322 17:40:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:40:13.409836  543705 memory.go:191] Add success.
I0322 17:40:13.409838  543705 cpu.go:282] Add success.
W0322 17:40:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:40:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:40:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:40:13.420246  543705 net.go:648] Add success.
I0322 17:40:13.423193  543705 net.go:770] primary dev: ETH0
I0322 17:40:13.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:40:13.423218  543705 net.go:698] Add success.
I0322 17:40:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:40:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:40:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 17:40:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:40:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 17:40:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:40:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:40:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:40:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:40:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:40:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:40:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:40:23.409796  543705 memory.go:184] no items to output this cycle
I0322 17:40:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 17:40:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:40:33.409808  543705 memory.go:184] no items to output this cycle
I0322 17:40:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 17:40:35.792214  543705 disk_info.go:125] begin check local disk info of client
I0322 17:40:35.794796  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:40:35.794802  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025ebc0 0xc00025ec00]
E0322 17:40:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:40:43.410729  543705 memory.go:191] Add success.
I0322 17:40:43.409831  543705 cpu.go:282] Add success.
I0322 17:40:43.420441  543705 net.go:648] Add success.
I0322 17:40:43.423206  543705 net.go:770] primary dev: ETH0
I0322 17:40:43.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:40:43.423231  543705 net.go:698] Add success.
I0322 17:40:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:40:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:40:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:40:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:40:53.409812  543705 memory.go:184] no items to output this cycle
I0322 17:40:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 17:41:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:41:03.409801  543705 memory.go:184] no items to output this cycle
I0322 17:41:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 17:41:13.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:41:13.409894  543705 memory.go:191] Add success.
I0322 17:41:13.409900  543705 cpu.go:282] Add success.
W0322 17:41:13.409928  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:41:13.409974  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:41:13.409978  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:41:13.419708  543705 net.go:648] Add success.
I0322 17:41:13.422145  543705 net.go:770] primary dev: ETH0
I0322 17:41:13.422157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:41:13.422169  543705 net.go:698] Add success.
I0322 17:41:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:41:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:41:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 17:41:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:41:14.456495  543705 disk_worker.go:494] system disk:vda1
I0322 17:41:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:41:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:41:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:41:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:41:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:41:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:41:23.409794  543705 cpu.go:275] no items to output this cycle
I0322 17:41:23.409808  543705 memory.go:184] no items to output this cycle
E0322 17:41:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:41:33.409775  543705 memory.go:184] no items to output this cycle
I0322 17:41:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 17:41:35.794884  543705 disk_info.go:125] begin check local disk info of client
I0322 17:41:35.797415  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:41:35.797421  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8740 0xc0004a8780]
E0322 17:41:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:41:43.410592  543705 memory.go:191] Add success.
I0322 17:41:43.409828  543705 cpu.go:282] Add success.
I0322 17:41:43.420284  543705 net.go:648] Add success.
I0322 17:41:43.423167  543705 net.go:770] primary dev: ETH0
I0322 17:41:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:41:43.423197  543705 net.go:698] Add success.
I0322 17:41:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:41:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:41:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:41:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:41:53.409796  543705 memory.go:184] no items to output this cycle
I0322 17:41:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 17:42:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:42:03.409763  543705 memory.go:184] no items to output this cycle
I0322 17:42:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 17:42:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:42:13.409793  543705 memory.go:191] Add success.
I0322 17:42:13.409797  543705 cpu.go:282] Add success.
W0322 17:42:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:42:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:42:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:42:13.419709  543705 net.go:648] Add success.
I0322 17:42:13.422241  543705 net.go:770] primary dev: ETH0
I0322 17:42:13.422254  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:42:13.422266  543705 net.go:698] Add success.
I0322 17:42:13.463689  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fbf9b437-15c4-40ba-b61d-717e9d63aa74","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:42:13.463721  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 17:42:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:42:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0322 17:42:14.455249  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:42:14.455899  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:42:14.455908  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:42:14.455914  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:42:14.456821  543705 disk_worker.go:494] system disk:vda1
I0322 17:42:14.456876  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:42:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:42:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:42:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:42:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:42:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:42:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:42:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:42:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:42:23.409789  543705 memory.go:184] no items to output this cycle
I0322 17:42:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 17:42:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:42:33.409772  543705 memory.go:184] no items to output this cycle
I0322 17:42:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 17:42:35.797677  543705 disk_info.go:125] begin check local disk info of client
I0322 17:42:35.800067  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:42:35.800074  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e640 0xc00034e680]
I0322 17:42:39.740401  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:42:39.740409  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:42:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:42:43.410682  543705 memory.go:191] Add success.
I0322 17:42:43.409808  543705 cpu.go:282] Add success.
I0322 17:42:43.420243  543705 net.go:770] primary dev: ETH0
I0322 17:42:43.420257  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:42:43.420269  543705 net.go:698] Add success.
I0322 17:42:43.420504  543705 net.go:648] Add success.
I0322 17:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:42:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:42:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:42:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:42:53.409797  543705 memory.go:184] no items to output this cycle
I0322 17:42:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 17:43:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:43:03.409766  543705 memory.go:184] no items to output this cycle
I0322 17:43:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 17:43:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:43:13.409915  543705 cpu.go:282] Add success.
I0322 17:43:13.409927  543705 memory.go:191] Add success.
W0322 17:43:13.409969  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:43:13.409990  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:43:13.410010  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:43:13.419723  543705 net.go:648] Add success.
I0322 17:43:13.422289  543705 net.go:770] primary dev: ETH0
I0322 17:43:13.422303  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:43:13.422314  543705 net.go:698] Add success.
I0322 17:43:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:43:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:43:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 17:43:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:43:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 17:43:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:43:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:43:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:43:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:43:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:43:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:43:23.409783  543705 memory.go:184] no items to output this cycle
I0322 17:43:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 17:43:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:43:33.409770  543705 memory.go:184] no items to output this cycle
I0322 17:43:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 17:43:35.800159  543705 disk_info.go:125] begin check local disk info of client
I0322 17:43:35.802738  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:43:35.802744  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b040 0xc00036b080]
E0322 17:43:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:43:43.410684  543705 memory.go:191] Add success.
I0322 17:43:43.409829  543705 cpu.go:282] Add success.
I0322 17:43:43.420431  543705 net.go:648] Add success.
I0322 17:43:43.423179  543705 net.go:770] primary dev: ETH0
I0322 17:43:43.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:43:43.423204  543705 net.go:698] Add success.
I0322 17:43:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:43:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:43:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:43:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:43:53.409774  543705 memory.go:184] no items to output this cycle
I0322 17:43:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 17:44:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:44:03.409807  543705 memory.go:184] no items to output this cycle
I0322 17:44:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 17:44:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:44:13.409871  543705 memory.go:191] Add success.
W0322 17:44:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:44:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:44:13.409918  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:44:13.409930  543705 cpu.go:282] Add success.
I0322 17:44:13.419730  543705 net.go:648] Add success.
I0322 17:44:13.422408  543705 net.go:770] primary dev: ETH0
I0322 17:44:13.422422  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:44:13.422435  543705 net.go:698] Add success.
I0322 17:44:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:44:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:44:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 17:44:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:44:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 17:44:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:44:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:44:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:44:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:44:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:44:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:44:23.409790  543705 memory.go:184] no items to output this cycle
I0322 17:44:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 17:44:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:44:33.409800  543705 memory.go:184] no items to output this cycle
I0322 17:44:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 17:44:35.802828  543705 disk_info.go:125] begin check local disk info of client
I0322 17:44:35.805255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:44:35.805262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005061c0 0xc000506200]
E0322 17:44:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:44:43.410752  543705 memory.go:191] Add success.
I0322 17:44:43.409801  543705 cpu.go:282] Add success.
I0322 17:44:43.419680  543705 net.go:648] Add success.
I0322 17:44:43.422690  543705 net.go:770] primary dev: ETH0
I0322 17:44:43.422704  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:44:43.422717  543705 net.go:698] Add success.
I0322 17:44:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:44:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:44:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:44:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:44:53.410404  543705 memory.go:184] no items to output this cycle
I0322 17:44:53.410418  543705 cpu.go:275] no items to output this cycle
E0322 17:45:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:45:03.409807  543705 memory.go:184] no items to output this cycle
I0322 17:45:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 17:45:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:45:13.409814  543705 memory.go:191] Add success.
I0322 17:45:13.409823  543705 cpu.go:282] Add success.
W0322 17:45:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:45:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:45:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:45:13.419711  543705 net.go:648] Add success.
I0322 17:45:13.422525  543705 net.go:770] primary dev: ETH0
I0322 17:45:13.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:45:13.422550  543705 net.go:698] Add success.
I0322 17:45:13.476649  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"be4368e3-c136-4f96-b49a-6a588ee2d9fa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:45:13.476679  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:45:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:45:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:45:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 17:45:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:45:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 17:45:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:45:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:45:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:45:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:45:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:45:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:45:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:45:23.409801  543705 memory.go:184] no items to output this cycle
I0322 17:45:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 17:45:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:45:33.409774  543705 memory.go:184] no items to output this cycle
I0322 17:45:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 17:45:35.805673  543705 disk_info.go:125] begin check local disk info of client
I0322 17:45:35.808191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:45:35.808198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358000 0xc000358040]
I0322 17:45:39.740552  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:45:39.740559  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:45:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:45:43.410724  543705 memory.go:191] Add success.
I0322 17:45:43.409825  543705 cpu.go:282] Add success.
I0322 17:45:43.420437  543705 net.go:648] Add success.
I0322 17:45:43.423073  543705 net.go:770] primary dev: ETH0
I0322 17:45:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:45:43.423112  543705 net.go:698] Add success.
I0322 17:45:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:45:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:45:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:45:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:45:53.409775  543705 memory.go:184] no items to output this cycle
I0322 17:45:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 17:46:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:46:03.409805  543705 memory.go:184] no items to output this cycle
I0322 17:46:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 17:46:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:46:13.409854  543705 memory.go:191] Add success.
W0322 17:46:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:46:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:46:13.409902  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:46:13.409942  543705 cpu.go:282] Add success.
I0322 17:46:13.419716  543705 net.go:648] Add success.
I0322 17:46:13.422438  543705 net.go:770] primary dev: ETH0
I0322 17:46:13.422450  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:46:13.422462  543705 net.go:698] Add success.
I0322 17:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:46:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:46:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 17:46:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:46:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 17:46:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:46:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:46:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:46:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:46:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:46:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:46:23.409813  543705 memory.go:184] no items to output this cycle
I0322 17:46:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 17:46:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:46:33.409772  543705 memory.go:184] no items to output this cycle
I0322 17:46:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 17:46:35.809674  543705 disk_info.go:125] begin check local disk info of client
I0322 17:46:35.812263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:46:35.812269  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027ad40 0xc00027ad80]
E0322 17:46:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:46:43.410695  543705 memory.go:191] Add success.
I0322 17:46:43.409819  543705 cpu.go:282] Add success.
I0322 17:46:43.420474  543705 net.go:648] Add success.
I0322 17:46:43.422935  543705 net.go:770] primary dev: ETH0
I0322 17:46:43.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:46:43.422966  543705 net.go:698] Add success.
I0322 17:46:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:46:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:46:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:46:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:46:53.409798  543705 memory.go:184] no items to output this cycle
I0322 17:46:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 17:47:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:47:03.409771  543705 memory.go:184] no items to output this cycle
I0322 17:47:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 17:47:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:47:13.409911  543705 memory.go:191] Add success.
I0322 17:47:13.409914  543705 cpu.go:282] Add success.
W0322 17:47:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:47:13.409985  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:47:13.409993  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:47:13.419727  543705 net.go:648] Add success.
I0322 17:47:13.422178  543705 net.go:770] primary dev: ETH0
I0322 17:47:13.422205  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:47:13.422217  543705 net.go:698] Add success.
I0322 17:47:13.452778  543705 event_worker.go:152] Polling the log file for events...
W0322 17:47:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:47:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 17:47:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:47:14.456889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:47:14.456898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:47:14.456904  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:47:14.456977  543705 disk_worker.go:494] system disk:vda1
I0322 17:47:14.457016  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:47:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:47:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:47:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:47:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:47:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:47:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:47:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:47:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:47:23.409784  543705 memory.go:184] no items to output this cycle
I0322 17:47:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 17:47:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:47:33.409803  543705 memory.go:184] no items to output this cycle
I0322 17:47:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 17:47:35.813672  543705 disk_info.go:125] begin check local disk info of client
I0322 17:47:35.816248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:47:35.816254  543705 disk_info.go:196] parse disk info done, disk is : [0xc000324340 0xc000324380]
E0322 17:47:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:47:43.410664  543705 memory.go:191] Add success.
I0322 17:47:43.409797  543705 cpu.go:282] Add success.
I0322 17:47:43.420399  543705 net.go:648] Add success.
I0322 17:47:43.423233  543705 net.go:770] primary dev: ETH0
I0322 17:47:43.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:47:43.423261  543705 net.go:698] Add success.
I0322 17:47:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:47:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:47:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:47:53.409800  543705 memory.go:184] no items to output this cycle
I0322 17:47:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 17:48:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:48:03.409809  543705 memory.go:184] no items to output this cycle
I0322 17:48:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 17:48:13.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:48:13.409893  543705 memory.go:191] Add success.
W0322 17:48:13.409926  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:48:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:48:13.409956  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:48:13.409970  543705 cpu.go:282] Add success.
I0322 17:48:13.419707  543705 net.go:648] Add success.
I0322 17:48:13.422786  543705 net.go:770] primary dev: ETH0
I0322 17:48:13.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:48:13.422811  543705 net.go:698] Add success.
I0322 17:48:13.468653  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9f4fe421-c738-45a8-b627-cd8d8dc7bed4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:48:13.468685  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:48:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:48:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:48:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 17:48:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:48:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 17:48:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:48:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:48:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:48:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:48:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:48:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 17:48:23.409793  543705 memory.go:184] no items to output this cycle
E0322 17:48:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:48:33.409777  543705 memory.go:184] no items to output this cycle
I0322 17:48:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 17:48:35.817675  543705 disk_info.go:125] begin check local disk info of client
I0322 17:48:35.820196  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:48:35.820201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a7cc0 0xc0002a7d00]
I0322 17:48:39.740710  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:48:39.740718  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:48:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:48:43.410784  543705 memory.go:191] Add success.
I0322 17:48:43.409807  543705 cpu.go:282] Add success.
I0322 17:48:43.420491  543705 net.go:648] Add success.
I0322 17:48:43.423471  543705 net.go:770] primary dev: ETH0
I0322 17:48:43.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:48:43.423501  543705 net.go:698] Add success.
I0322 17:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:48:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:48:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:48:53.409795  543705 memory.go:184] no items to output this cycle
I0322 17:48:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 17:49:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:49:03.409786  543705 cpu.go:275] no items to output this cycle
I0322 17:49:03.409790  543705 memory.go:184] no items to output this cycle
E0322 17:49:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:49:13.409816  543705 memory.go:191] Add success.
I0322 17:49:13.409826  543705 cpu.go:282] Add success.
W0322 17:49:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:49:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:49:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:49:13.420142  543705 net.go:648] Add success.
I0322 17:49:13.422829  543705 net.go:770] primary dev: ETH0
I0322 17:49:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:49:13.422854  543705 net.go:698] Add success.
I0322 17:49:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:49:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:49:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 17:49:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:49:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 17:49:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:49:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:49:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:49:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:49:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:49:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:49:23.409770  543705 memory.go:184] no items to output this cycle
I0322 17:49:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 17:49:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:49:33.409772  543705 memory.go:184] no items to output this cycle
I0322 17:49:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 17:49:35.821673  543705 disk_info.go:125] begin check local disk info of client
I0322 17:49:35.824180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:49:35.824186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000368cc0 0xc000368d00]
E0322 17:49:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:49:43.410701  543705 memory.go:191] Add success.
I0322 17:49:43.409825  543705 cpu.go:282] Add success.
I0322 17:49:43.420399  543705 net.go:648] Add success.
I0322 17:49:43.423467  543705 net.go:770] primary dev: ETH0
I0322 17:49:43.423481  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:49:43.423493  543705 net.go:698] Add success.
I0322 17:49:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:49:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:49:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:49:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:49:53.409768  543705 memory.go:184] no items to output this cycle
I0322 17:49:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 17:50:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:50:03.409803  543705 memory.go:184] no items to output this cycle
I0322 17:50:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 17:50:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:50:13.409804  543705 memory.go:191] Add success.
I0322 17:50:13.409806  543705 cpu.go:282] Add success.
W0322 17:50:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:50:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:50:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:50:13.420137  543705 net.go:648] Add success.
I0322 17:50:13.422785  543705 net.go:770] primary dev: ETH0
I0322 17:50:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:50:13.422809  543705 net.go:698] Add success.
I0322 17:50:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:50:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:50:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 17:50:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:50:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 17:50:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:50:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:50:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:50:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:50:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:50:23.409787  543705 memory.go:184] no items to output this cycle
I0322 17:50:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 17:50:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:50:33.409778  543705 memory.go:184] no items to output this cycle
I0322 17:50:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 17:50:35.825670  543705 disk_info.go:125] begin check local disk info of client
I0322 17:50:35.828208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:50:35.828213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342080 0xc0003420c0]
E0322 17:50:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:50:43.410812  543705 memory.go:191] Add success.
I0322 17:50:43.409827  543705 cpu.go:282] Add success.
I0322 17:50:43.420532  543705 net.go:648] Add success.
I0322 17:50:43.423041  543705 net.go:770] primary dev: ETH0
I0322 17:50:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:50:43.423071  543705 net.go:698] Add success.
I0322 17:50:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:50:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:50:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:50:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:50:53.409796  543705 memory.go:184] no items to output this cycle
I0322 17:50:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 17:51:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:51:03.409786  543705 memory.go:184] no items to output this cycle
I0322 17:51:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 17:51:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:51:13.409788  543705 memory.go:191] Add success.
I0322 17:51:13.409804  543705 cpu.go:282] Add success.
W0322 17:51:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:51:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:51:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:51:13.420154  543705 net.go:648] Add success.
I0322 17:51:13.423275  543705 net.go:770] primary dev: ETH0
I0322 17:51:13.423288  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:51:13.423300  543705 net.go:698] Add success.
I0322 17:51:13.467837  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8c81d9e8-31f8-4af8-8a31-25605eb4910e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:51:13.467874  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:51:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:51:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:51:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 17:51:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:51:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 17:51:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:51:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:51:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:51:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:51:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:51:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:51:23.409804  543705 memory.go:184] no items to output this cycle
I0322 17:51:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 17:51:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:51:33.409787  543705 memory.go:184] no items to output this cycle
I0322 17:51:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 17:51:35.829672  543705 disk_info.go:125] begin check local disk info of client
I0322 17:51:35.832172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:51:35.832178  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039cf40 0xc00039cf80]
I0322 17:51:39.741736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:51:39.741743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:51:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:51:43.410810  543705 memory.go:191] Add success.
I0322 17:51:43.409795  543705 cpu.go:282] Add success.
I0322 17:51:43.420527  543705 net.go:648] Add success.
I0322 17:51:43.423470  543705 net.go:770] primary dev: ETH0
I0322 17:51:43.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:51:43.423498  543705 net.go:698] Add success.
I0322 17:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:51:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:51:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:51:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:51:53.409770  543705 memory.go:184] no items to output this cycle
I0322 17:51:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 17:52:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:52:03.409776  543705 memory.go:184] no items to output this cycle
I0322 17:52:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 17:52:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:52:13.409795  543705 memory.go:191] Add success.
I0322 17:52:13.409799  543705 cpu.go:282] Add success.
W0322 17:52:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:52:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:52:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:52:13.420066  543705 net.go:648] Add success.
I0322 17:52:13.422767  543705 net.go:770] primary dev: ETH0
I0322 17:52:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:52:13.422793  543705 net.go:698] Add success.
W0322 17:52:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:52:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0322 17:52:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:52:14.456327  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:52:14.456337  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:52:14.456343  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:52:14.456416  543705 disk_worker.go:494] system disk:vda1
I0322 17:52:14.456446  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:52:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:52:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:52:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:52:16.457967  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:52:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:52:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:52:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:52:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:52:23.409789  543705 memory.go:184] no items to output this cycle
I0322 17:52:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 17:52:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:52:33.409769  543705 memory.go:184] no items to output this cycle
I0322 17:52:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 17:52:35.833674  543705 disk_info.go:125] begin check local disk info of client
I0322 17:52:35.836220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:52:35.836225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3cc0 0xc0003d3d00]
E0322 17:52:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:52:43.410776  543705 memory.go:191] Add success.
I0322 17:52:43.409819  543705 cpu.go:282] Add success.
I0322 17:52:43.420459  543705 net.go:648] Add success.
I0322 17:52:43.423235  543705 net.go:770] primary dev: ETH0
I0322 17:52:43.423250  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:52:43.423264  543705 net.go:698] Add success.
I0322 17:52:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:52:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:52:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:52:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:52:53.409781  543705 memory.go:184] no items to output this cycle
I0322 17:52:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 17:53:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:53:03.409786  543705 memory.go:184] no items to output this cycle
I0322 17:53:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 17:53:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:53:13.409830  543705 memory.go:191] Add success.
I0322 17:53:13.409838  543705 cpu.go:282] Add success.
W0322 17:53:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:53:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:53:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:53:13.420175  543705 net.go:648] Add success.
I0322 17:53:13.423134  543705 net.go:770] primary dev: ETH0
I0322 17:53:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:53:13.423160  543705 net.go:698] Add success.
I0322 17:53:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:53:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:53:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 17:53:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:53:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 17:53:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:53:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:53:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:53:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:53:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:53:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:53:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:53:23.409785  543705 memory.go:184] no items to output this cycle
I0322 17:53:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 17:53:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:53:33.409770  543705 memory.go:184] no items to output this cycle
I0322 17:53:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 17:53:35.837673  543705 disk_info.go:125] begin check local disk info of client
I0322 17:53:35.840201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:53:35.840207  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340680 0xc0003406c0]
E0322 17:53:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:53:43.410720  543705 memory.go:191] Add success.
I0322 17:53:43.409835  543705 cpu.go:282] Add success.
I0322 17:53:43.420461  543705 net.go:648] Add success.
I0322 17:53:43.423237  543705 net.go:770] primary dev: ETH0
I0322 17:53:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:53:43.423267  543705 net.go:698] Add success.
I0322 17:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:53:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:53:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:53:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:53:53.409771  543705 memory.go:184] no items to output this cycle
I0322 17:53:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 17:54:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:54:03.409782  543705 memory.go:184] no items to output this cycle
I0322 17:54:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 17:54:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:54:13.409810  543705 memory.go:191] Add success.
I0322 17:54:13.409815  543705 cpu.go:282] Add success.
W0322 17:54:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:54:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:54:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:54:13.419709  543705 net.go:648] Add success.
I0322 17:54:13.422727  543705 net.go:770] primary dev: ETH0
I0322 17:54:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:54:13.422751  543705 net.go:698] Add success.
I0322 17:54:13.738684  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0521507c-5c70-4c0e-b1c1-ee4fbc0094d5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:54:13.738719  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 17:54:14.454533  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:54:14.454709  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:54:14.454787  543705 disk_worker.go:708] disk space is not compliant
W0322 17:54:14.454790  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:54:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 17:54:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:54:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:54:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:54:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:54:23.409778  543705 memory.go:184] no items to output this cycle
I0322 17:54:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 17:54:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:54:33.409775  543705 memory.go:184] no items to output this cycle
I0322 17:54:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 17:54:35.841672  543705 disk_info.go:125] begin check local disk info of client
I0322 17:54:35.844231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:54:35.844237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051c640 0xc00051c680]
I0322 17:54:39.743414  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:54:39.743422  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:54:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:54:43.410635  543705 memory.go:191] Add success.
I0322 17:54:43.409810  543705 cpu.go:282] Add success.
I0322 17:54:43.420336  543705 net.go:648] Add success.
I0322 17:54:43.423054  543705 net.go:770] primary dev: ETH0
I0322 17:54:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:54:43.423080  543705 net.go:698] Add success.
I0322 17:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:54:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:54:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:54:53.409781  543705 memory.go:184] no items to output this cycle
I0322 17:54:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 17:55:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:55:03.409766  543705 memory.go:184] no items to output this cycle
I0322 17:55:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 17:55:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:55:13.409776  543705 memory.go:191] Add success.
I0322 17:55:13.409787  543705 cpu.go:282] Add success.
W0322 17:55:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:55:13.412452  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:55:13.412457  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:55:13.419728  543705 net.go:648] Add success.
I0322 17:55:13.421367  543705 net.go:770] primary dev: ETH0
I0322 17:55:13.421380  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:55:13.421392  543705 net.go:698] Add success.
I0322 17:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:55:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:55:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 17:55:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:55:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 17:55:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:55:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:55:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:55:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:55:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:55:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:55:23.409794  543705 memory.go:184] no items to output this cycle
I0322 17:55:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 17:55:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:55:33.409780  543705 memory.go:184] no items to output this cycle
I0322 17:55:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 17:55:35.845671  543705 disk_info.go:125] begin check local disk info of client
I0322 17:55:35.848199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:55:35.848205  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033c2c0 0xc00033c300]
E0322 17:55:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:55:43.410590  543705 memory.go:191] Add success.
I0322 17:55:43.409830  543705 cpu.go:282] Add success.
I0322 17:55:43.420290  543705 net.go:648] Add success.
I0322 17:55:43.422907  543705 net.go:770] primary dev: ETH0
I0322 17:55:43.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:55:43.422932  543705 net.go:698] Add success.
I0322 17:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:55:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:55:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:55:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:55:53.409781  543705 memory.go:184] no items to output this cycle
I0322 17:55:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 17:56:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:56:03.409796  543705 memory.go:184] no items to output this cycle
I0322 17:56:03.409812  543705 cpu.go:275] no items to output this cycle
W0322 17:56:13.409703  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:56:13.409719  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:56:13.409723  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 17:56:13.409912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:56:13.409932  543705 memory.go:191] Add success.
I0322 17:56:13.409948  543705 cpu.go:282] Add success.
I0322 17:56:13.419726  543705 net.go:648] Add success.
I0322 17:56:13.422575  543705 net.go:770] primary dev: ETH0
I0322 17:56:13.422588  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:56:13.422599  543705 net.go:698] Add success.
I0322 17:56:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:56:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:56:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 17:56:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:56:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 17:56:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:56:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:56:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:56:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:56:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:56:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:56:23.409811  543705 memory.go:184] no items to output this cycle
I0322 17:56:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 17:56:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:56:33.409783  543705 memory.go:184] no items to output this cycle
I0322 17:56:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 17:56:35.849671  543705 disk_info.go:125] begin check local disk info of client
I0322 17:56:35.852258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:56:35.852264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002be600 0xc0002be640]
E0322 17:56:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:56:43.410711  543705 memory.go:191] Add success.
I0322 17:56:43.409813  543705 cpu.go:282] Add success.
I0322 17:56:43.420407  543705 net.go:648] Add success.
I0322 17:56:43.423520  543705 net.go:770] primary dev: ETH0
I0322 17:56:43.423534  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:56:43.423549  543705 net.go:698] Add success.
I0322 17:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:56:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:56:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:56:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:56:53.409780  543705 memory.go:184] no items to output this cycle
I0322 17:56:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 17:57:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:57:03.409775  543705 memory.go:184] no items to output this cycle
I0322 17:57:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 17:57:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:57:13.409785  543705 memory.go:191] Add success.
I0322 17:57:13.409789  543705 cpu.go:282] Add success.
W0322 17:57:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:57:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:57:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:57:13.419723  543705 net.go:648] Add success.
I0322 17:57:13.428354  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 17:57:13.428427  543705 net.go:770] primary dev: ETH0
I0322 17:57:13.428439  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:57:13.428450  543705 net.go:698] Add success.
I0322 17:57:13.453027  543705 event_worker.go:152] Polling the log file for events...
I0322 17:57:13.469390  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8714dfe3-b20a-4ede-9c00-a795765e0e68","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 17:57:13.469421  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 17:57:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:57:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0322 17:57:14.455245  543705 disk_worker.go:728] disk inode is not compliant
E0322 17:57:14.455911  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 17:57:14.455919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 17:57:14.455925  543705 custom_config.go:64] query custom config with name: gpu
I0322 17:57:14.456824  543705 disk_worker.go:494] system disk:vda1
I0322 17:57:14.456856  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 17:57:15.456783  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 17:57:15.456791  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:57:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 17:57:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 17:57:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:57:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:57:16.472332  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:57:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:57:23.409806  543705 memory.go:184] no items to output this cycle
I0322 17:57:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 17:57:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:57:33.409776  543705 memory.go:184] no items to output this cycle
I0322 17:57:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 17:57:35.853672  543705 disk_info.go:125] begin check local disk info of client
I0322 17:57:35.856185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:57:35.856192  543705 disk_info.go:196] parse disk info done, disk is : [0xc000470880 0xc0004708c0]
I0322 17:57:39.743560  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 17:57:39.743568  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 17:57:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:57:43.410660  543705 memory.go:191] Add success.
I0322 17:57:43.409824  543705 cpu.go:282] Add success.
I0322 17:57:43.420350  543705 net.go:648] Add success.
I0322 17:57:43.423116  543705 net.go:770] primary dev: ETH0
I0322 17:57:43.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:57:43.423143  543705 net.go:698] Add success.
I0322 17:57:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:57:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:57:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:57:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:57:53.409786  543705 memory.go:184] no items to output this cycle
I0322 17:57:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 17:58:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:58:03.409783  543705 memory.go:184] no items to output this cycle
I0322 17:58:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 17:58:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:58:13.409804  543705 memory.go:191] Add success.
I0322 17:58:13.409805  543705 cpu.go:282] Add success.
W0322 17:58:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:58:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:58:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:58:13.420130  543705 net.go:648] Add success.
I0322 17:58:13.423069  543705 net.go:770] primary dev: ETH0
I0322 17:58:13.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:58:13.423096  543705 net.go:698] Add success.
I0322 17:58:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:58:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:58:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 17:58:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:58:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 17:58:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:58:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:58:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:58:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:58:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:58:23.409808  543705 memory.go:184] no items to output this cycle
I0322 17:58:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 17:58:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:58:33.409769  543705 memory.go:184] no items to output this cycle
I0322 17:58:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 17:58:35.857673  543705 disk_info.go:125] begin check local disk info of client
I0322 17:58:35.860225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:58:35.860232  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa140 0xc0001aa180]
E0322 17:58:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:58:43.410646  543705 memory.go:191] Add success.
I0322 17:58:43.409839  543705 cpu.go:282] Add success.
I0322 17:58:43.420390  543705 net.go:648] Add success.
I0322 17:58:43.422962  543705 net.go:770] primary dev: ETH0
I0322 17:58:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:58:43.422987  543705 net.go:698] Add success.
I0322 17:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:58:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:58:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:58:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:58:53.409804  543705 memory.go:184] no items to output this cycle
I0322 17:58:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 17:59:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:59:03.409776  543705 memory.go:184] no items to output this cycle
I0322 17:59:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 17:59:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:59:13.409795  543705 memory.go:191] Add success.
I0322 17:59:13.409797  543705 cpu.go:282] Add success.
W0322 17:59:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 17:59:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 17:59:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 17:59:13.419866  543705 net.go:770] primary dev: ETH0
I0322 17:59:13.419879  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:59:13.419891  543705 net.go:698] Add success.
I0322 17:59:13.420134  543705 net.go:648] Add success.
I0322 17:59:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 17:59:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 17:59:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 17:59:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 17:59:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 17:59:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 17:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 17:59:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:59:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:59:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 17:59:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 17:59:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:59:23.409800  543705 memory.go:184] no items to output this cycle
I0322 17:59:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 17:59:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:59:33.409797  543705 memory.go:184] no items to output this cycle
I0322 17:59:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 17:59:35.861674  543705 disk_info.go:125] begin check local disk info of client
I0322 17:59:35.864294  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 17:59:35.864300  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270d00 0xc000270d40]
E0322 17:59:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:59:43.410578  543705 memory.go:191] Add success.
I0322 17:59:43.409844  543705 cpu.go:282] Add success.
I0322 17:59:43.420294  543705 net.go:648] Add success.
I0322 17:59:43.422757  543705 net.go:770] primary dev: ETH0
I0322 17:59:43.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0322 17:59:43.422785  543705 net.go:698] Add success.
I0322 17:59:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 17:59:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 17:59:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 17:59:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 17:59:53.409787  543705 memory.go:184] no items to output this cycle
I0322 17:59:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 18:00:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:00:03.409799  543705 memory.go:184] no items to output this cycle
I0322 18:00:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 18:00:13.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:00:13.409935  543705 memory.go:191] Add success.
I0322 18:00:13.409949  543705 cpu.go:282] Add success.
W0322 18:00:13.409969  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:00:13.409991  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:00:13.409996  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:00:13.419706  543705 net.go:648] Add success.
I0322 18:00:13.422238  543705 net.go:770] primary dev: ETH0
I0322 18:00:13.422251  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:00:13.422262  543705 net.go:698] Add success.
I0322 18:00:13.468128  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"628c23bf-a6ae-4c95-a681-93f383969d14","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:00:13.468159  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:00:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:00:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:00:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 18:00:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:00:14.456621  543705 disk_worker.go:494] system disk:vda1
I0322 18:00:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:00:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:00:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:00:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:00:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:00:23.409821  543705 memory.go:184] no items to output this cycle
I0322 18:00:23.409828  543705 cpu.go:275] no items to output this cycle
E0322 18:00:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:00:33.409808  543705 memory.go:184] no items to output this cycle
I0322 18:00:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 18:00:35.865671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:00:35.868338  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:00:35.868345  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025ac40 0xc00025ac80]
I0322 18:00:39.744425  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:00:39.744432  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:00:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:00:43.410633  543705 memory.go:191] Add success.
I0322 18:00:43.409821  543705 cpu.go:282] Add success.
I0322 18:00:43.420350  543705 net.go:648] Add success.
I0322 18:00:43.423023  543705 net.go:770] primary dev: ETH0
I0322 18:00:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:00:43.423051  543705 net.go:698] Add success.
I0322 18:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:00:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:00:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:00:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:00:53.409802  543705 memory.go:184] no items to output this cycle
I0322 18:00:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 18:01:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:01:03.409777  543705 memory.go:184] no items to output this cycle
I0322 18:01:03.409777  543705 cpu.go:275] no items to output this cycle
E0322 18:01:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:01:13.409795  543705 memory.go:191] Add success.
I0322 18:01:13.409795  543705 cpu.go:282] Add success.
W0322 18:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:01:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:01:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:01:13.419724  543705 net.go:648] Add success.
I0322 18:01:13.422928  543705 net.go:770] primary dev: ETH0
I0322 18:01:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:01:13.422951  543705 net.go:698] Add success.
I0322 18:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:01:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:01:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 18:01:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:01:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 18:01:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:01:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:01:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:01:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:01:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:01:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:01:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:01:23.409801  543705 memory.go:184] no items to output this cycle
I0322 18:01:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 18:01:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:01:33.409769  543705 memory.go:184] no items to output this cycle
I0322 18:01:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 18:01:35.869674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:01:35.872200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:01:35.872206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ab080 0xc0003ab0c0]
E0322 18:01:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:01:43.410803  543705 memory.go:191] Add success.
I0322 18:01:43.409802  543705 cpu.go:282] Add success.
I0322 18:01:43.420495  543705 net.go:648] Add success.
I0322 18:01:43.423550  543705 net.go:770] primary dev: ETH0
I0322 18:01:43.423563  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:01:43.423575  543705 net.go:698] Add success.
I0322 18:01:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:01:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:01:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:01:53.410199  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:01:53.410217  543705 memory.go:184] no items to output this cycle
I0322 18:01:53.410237  543705 cpu.go:275] no items to output this cycle
E0322 18:02:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:02:03.409772  543705 memory.go:184] no items to output this cycle
I0322 18:02:03.409801  543705 cpu.go:275] no items to output this cycle
W0322 18:02:13.409704  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:02:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:02:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 18:02:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:02:13.409829  543705 cpu.go:282] Add success.
I0322 18:02:13.409853  543705 memory.go:191] Add success.
I0322 18:02:13.419985  543705 net.go:648] Add success.
I0322 18:02:13.422893  543705 net.go:770] primary dev: ETH0
I0322 18:02:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:02:13.422948  543705 net.go:698] Add success.
W0322 18:02:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:02:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 18:02:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:02:14.456785  543705 disk_worker.go:494] system disk:vda1
I0322 18:02:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:02:14.456994  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:02:14.457002  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:02:14.457019  543705 custom_config.go:64] query custom config with name: gpu
E0322 18:02:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:02:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:02:16.457900  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:02:16.457909  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:02:16.457951  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:02:16.457968  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:02:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:02:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:02:23.409782  543705 memory.go:184] no items to output this cycle
I0322 18:02:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:02:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:02:33.409801  543705 memory.go:184] no items to output this cycle
I0322 18:02:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 18:02:35.873670  543705 disk_info.go:125] begin check local disk info of client
I0322 18:02:35.876305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:02:35.876311  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472fc0 0xc000473000]
E0322 18:02:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:02:43.410645  543705 memory.go:191] Add success.
I0322 18:02:43.409802  543705 cpu.go:282] Add success.
I0322 18:02:43.420365  543705 net.go:648] Add success.
I0322 18:02:43.423599  543705 net.go:770] primary dev: ETH0
I0322 18:02:43.423613  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:02:43.423627  543705 net.go:698] Add success.
I0322 18:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:02:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:02:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:02:53.409770  543705 memory.go:184] no items to output this cycle
I0322 18:02:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 18:03:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:03:03.409777  543705 memory.go:184] no items to output this cycle
I0322 18:03:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 18:03:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:03:13.409793  543705 memory.go:191] Add success.
I0322 18:03:13.409793  543705 cpu.go:282] Add success.
W0322 18:03:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:03:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:03:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:03:13.420206  543705 net.go:648] Add success.
I0322 18:03:13.423137  543705 net.go:770] primary dev: ETH0
I0322 18:03:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:03:13.423173  543705 net.go:698] Add success.
I0322 18:03:13.464818  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a1a779a-2c8c-4a44-8278-d9affad015c4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:03:13.464852  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:03:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:03:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:03:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 18:03:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:03:14.457586  543705 disk_worker.go:494] system disk:vda1
I0322 18:03:14.457613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:03:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:03:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:03:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:03:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:03:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:03:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 18:03:23.409792  543705 memory.go:184] no items to output this cycle
E0322 18:03:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:03:33.409779  543705 memory.go:184] no items to output this cycle
I0322 18:03:33.409785  543705 cpu.go:275] no items to output this cycle
I0322 18:03:35.877672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:03:35.880169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:03:35.880175  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf00 0xc0001abf40]
I0322 18:03:39.744576  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:03:39.744584  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:03:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:03:43.410738  543705 memory.go:191] Add success.
I0322 18:03:43.409809  543705 cpu.go:282] Add success.
I0322 18:03:43.420481  543705 net.go:648] Add success.
I0322 18:03:43.423192  543705 net.go:770] primary dev: ETH0
I0322 18:03:43.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:03:43.423228  543705 net.go:698] Add success.
I0322 18:03:46.458384  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:03:46.458458  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:03:46.458485  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:03:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:03:53.409782  543705 cpu.go:275] no items to output this cycle
I0322 18:03:53.409788  543705 memory.go:184] no items to output this cycle
E0322 18:04:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:04:03.409761  543705 memory.go:184] no items to output this cycle
I0322 18:04:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 18:04:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:04:13.409812  543705 memory.go:191] Add success.
I0322 18:04:13.409824  543705 cpu.go:282] Add success.
W0322 18:04:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:04:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:04:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:04:13.420036  543705 net.go:648] Add success.
I0322 18:04:13.422776  543705 net.go:770] primary dev: ETH0
I0322 18:04:13.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:04:13.422802  543705 net.go:698] Add success.
I0322 18:04:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:04:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:04:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 18:04:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:04:14.456600  543705 disk_worker.go:494] system disk:vda1
I0322 18:04:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:04:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:04:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:04:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:04:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:04:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:04:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:04:23.409776  543705 memory.go:184] no items to output this cycle
I0322 18:04:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 18:04:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:04:33.409774  543705 memory.go:184] no items to output this cycle
I0322 18:04:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 18:04:35.881677  543705 disk_info.go:125] begin check local disk info of client
I0322 18:04:35.884194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:04:35.884200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b7d80 0xc0002b7dc0]
E0322 18:04:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:04:43.410642  543705 memory.go:191] Add success.
I0322 18:04:43.409822  543705 cpu.go:282] Add success.
I0322 18:04:43.420410  543705 net.go:648] Add success.
I0322 18:04:43.422961  543705 net.go:770] primary dev: ETH0
I0322 18:04:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:04:43.422987  543705 net.go:698] Add success.
I0322 18:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:04:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:04:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:04:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:04:53.409795  543705 memory.go:184] no items to output this cycle
I0322 18:04:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 18:05:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:05:03.409781  543705 memory.go:184] no items to output this cycle
I0322 18:05:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 18:05:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:05:13.409780  543705 memory.go:191] Add success.
I0322 18:05:13.409801  543705 cpu.go:282] Add success.
W0322 18:05:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:05:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:05:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:05:13.420110  543705 net.go:648] Add success.
I0322 18:05:13.422629  543705 net.go:770] primary dev: ETH0
I0322 18:05:13.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:05:13.422654  543705 net.go:698] Add success.
I0322 18:05:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:05:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:05:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 18:05:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:05:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 18:05:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:05:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:05:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:05:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:05:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:05:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:05:23.409779  543705 memory.go:184] no items to output this cycle
I0322 18:05:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:05:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:05:33.409767  543705 memory.go:184] no items to output this cycle
I0322 18:05:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 18:05:35.885672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:05:35.888169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:05:35.888176  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a3f40 0xc00007a000]
E0322 18:05:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:05:43.410701  543705 memory.go:191] Add success.
I0322 18:05:43.409806  543705 cpu.go:282] Add success.
I0322 18:05:43.420417  543705 net.go:648] Add success.
I0322 18:05:43.423135  543705 net.go:770] primary dev: ETH0
I0322 18:05:43.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:05:43.423162  543705 net.go:698] Add success.
I0322 18:05:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:05:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:05:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:05:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:05:53.409792  543705 memory.go:184] no items to output this cycle
I0322 18:05:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 18:06:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:06:03.409777  543705 memory.go:184] no items to output this cycle
I0322 18:06:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 18:06:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:06:13.409785  543705 memory.go:191] Add success.
I0322 18:06:13.409808  543705 cpu.go:282] Add success.
W0322 18:06:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:06:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:06:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:06:13.420241  543705 net.go:648] Add success.
I0322 18:06:13.423345  543705 net.go:770] primary dev: ETH0
I0322 18:06:13.423360  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:06:13.423375  543705 net.go:698] Add success.
I0322 18:06:13.468192  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3bf7bb7a-e56e-4fbc-a4af-5c5e95793e2b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:06:13.468234  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:06:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:06:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:06:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 18:06:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:06:14.456505  543705 disk_worker.go:494] system disk:vda1
I0322 18:06:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:06:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:06:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:06:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:06:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:06:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:06:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:06:23.409807  543705 memory.go:184] no items to output this cycle
I0322 18:06:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 18:06:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:06:33.409781  543705 memory.go:184] no items to output this cycle
I0322 18:06:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 18:06:35.889671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:06:35.892177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:06:35.892183  543705 disk_info.go:196] parse disk info done, disk is : [0xc000252700 0xc000252740]
I0322 18:06:39.745424  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:06:39.745432  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:06:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:06:43.410691  543705 memory.go:191] Add success.
I0322 18:06:43.409799  543705 cpu.go:282] Add success.
I0322 18:06:43.420401  543705 net.go:648] Add success.
I0322 18:06:43.424292  543705 net.go:770] primary dev: ETH0
I0322 18:06:43.424308  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:06:43.424321  543705 net.go:698] Add success.
I0322 18:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:06:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:06:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:06:53.409786  543705 memory.go:184] no items to output this cycle
I0322 18:06:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 18:07:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:07:03.409777  543705 memory.go:184] no items to output this cycle
I0322 18:07:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 18:07:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:07:13.409775  543705 memory.go:191] Add success.
W0322 18:07:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:07:13.409807  543705 cpu.go:282] Add success.
W0322 18:07:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:07:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:07:13.420071  543705 net.go:648] Add success.
I0322 18:07:13.423004  543705 net.go:770] primary dev: ETH0
I0322 18:07:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:07:13.423028  543705 net.go:698] Add success.
I0322 18:07:13.453663  543705 event_worker.go:152] Polling the log file for events...
W0322 18:07:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:07:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 18:07:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:07:14.456787  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:07:14.456795  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:07:14.456801  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:07:14.456845  543705 disk_worker.go:494] system disk:vda1
I0322 18:07:14.456887  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:07:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:07:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:07:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:07:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:07:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:07:16.457980  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:07:16.472310  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:07:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:07:23.409773  543705 memory.go:184] no items to output this cycle
I0322 18:07:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 18:07:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:07:33.409804  543705 memory.go:184] no items to output this cycle
I0322 18:07:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 18:07:35.893683  543705 disk_info.go:125] begin check local disk info of client
I0322 18:07:35.896205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:07:35.896212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fc000 0xc0004fc040]
E0322 18:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:07:43.410667  543705 memory.go:191] Add success.
I0322 18:07:43.409803  543705 cpu.go:282] Add success.
I0322 18:07:43.420338  543705 net.go:648] Add success.
I0322 18:07:43.423194  543705 net.go:770] primary dev: ETH0
I0322 18:07:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:07:43.423220  543705 net.go:698] Add success.
I0322 18:07:46.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:07:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:07:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:07:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:07:53.409772  543705 memory.go:184] no items to output this cycle
I0322 18:07:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 18:08:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:08:03.409776  543705 memory.go:184] no items to output this cycle
I0322 18:08:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 18:08:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:08:13.409793  543705 memory.go:191] Add success.
I0322 18:08:13.409792  543705 cpu.go:282] Add success.
W0322 18:08:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:08:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:08:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:08:13.420390  543705 net.go:648] Add success.
I0322 18:08:13.423086  543705 net.go:770] primary dev: ETH0
I0322 18:08:13.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:08:13.423111  543705 net.go:698] Add success.
I0322 18:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:08:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:08:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 18:08:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:08:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 18:08:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:08:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:08:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:08:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:08:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:08:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:08:23.409896  543705 cpu.go:275] no items to output this cycle
I0322 18:08:23.409914  543705 memory.go:184] no items to output this cycle
E0322 18:08:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:08:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 18:08:33.409789  543705 memory.go:184] no items to output this cycle
I0322 18:08:35.897673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:08:35.900211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:08:35.900217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357800 0xc000357840]
E0322 18:08:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:08:43.410791  543705 memory.go:191] Add success.
I0322 18:08:43.409830  543705 cpu.go:282] Add success.
I0322 18:08:43.420477  543705 net.go:648] Add success.
I0322 18:08:43.423434  543705 net.go:770] primary dev: ETH0
I0322 18:08:43.423447  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:08:43.423459  543705 net.go:698] Add success.
I0322 18:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:08:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:08:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:08:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:08:53.409773  543705 memory.go:184] no items to output this cycle
I0322 18:08:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 18:09:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:09:03.409795  543705 memory.go:184] no items to output this cycle
I0322 18:09:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 18:09:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:09:13.409805  543705 memory.go:191] Add success.
I0322 18:09:13.409811  543705 cpu.go:282] Add success.
W0322 18:09:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:09:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:09:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:09:13.420045  543705 net.go:648] Add success.
I0322 18:09:13.422841  543705 net.go:770] primary dev: ETH0
I0322 18:09:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:09:13.422870  543705 net.go:698] Add success.
I0322 18:09:13.463816  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"aed0dc18-99b6-486b-9980-ff653afa6c03","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:09:13.463849  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:09:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:09:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:09:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 18:09:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:09:14.456677  543705 disk_worker.go:494] system disk:vda1
I0322 18:09:14.456704  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:09:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:09:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:09:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:09:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:09:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:09:23.409800  543705 memory.go:184] no items to output this cycle
I0322 18:09:23.409897  543705 cpu.go:275] no items to output this cycle
E0322 18:09:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:09:33.409796  543705 memory.go:184] no items to output this cycle
I0322 18:09:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 18:09:35.901673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:09:35.904164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:09:35.904181  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
I0322 18:09:39.745742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:09:39.745750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:09:43.410161  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:09:43.411072  543705 memory.go:191] Add success.
I0322 18:09:43.410212  543705 cpu.go:282] Add success.
I0322 18:09:43.419787  543705 net.go:648] Add success.
I0322 18:09:43.422448  543705 net.go:770] primary dev: ETH0
I0322 18:09:43.422461  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:09:43.422474  543705 net.go:698] Add success.
I0322 18:09:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:09:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:09:53.410257  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:09:53.410273  543705 memory.go:184] no items to output this cycle
I0322 18:09:53.410297  543705 cpu.go:275] no items to output this cycle
E0322 18:10:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:10:03.409792  543705 memory.go:184] no items to output this cycle
I0322 18:10:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 18:10:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:10:13.409781  543705 memory.go:191] Add success.
W0322 18:10:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:10:13.409813  543705 cpu.go:282] Add success.
W0322 18:10:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:10:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:10:13.420119  543705 net.go:648] Add success.
I0322 18:10:13.422805  543705 net.go:770] primary dev: ETH0
I0322 18:10:13.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:10:13.422834  543705 net.go:698] Add success.
I0322 18:10:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:10:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:10:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 18:10:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:10:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 18:10:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:10:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:10:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:10:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:10:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:10:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:10:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:10:23.409815  543705 memory.go:184] no items to output this cycle
I0322 18:10:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 18:10:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:10:33.409810  543705 memory.go:184] no items to output this cycle
I0322 18:10:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 18:10:35.905674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:10:35.908220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:10:35.908227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab300 0xc0001ab400]
E0322 18:10:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:10:43.410766  543705 memory.go:191] Add success.
I0322 18:10:43.409803  543705 cpu.go:282] Add success.
I0322 18:10:43.420475  543705 net.go:648] Add success.
I0322 18:10:43.423429  543705 net.go:770] primary dev: ETH0
I0322 18:10:43.423441  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:10:43.423455  543705 net.go:698] Add success.
I0322 18:10:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:10:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:10:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:10:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:10:53.409765  543705 memory.go:184] no items to output this cycle
I0322 18:10:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:11:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:11:03.409796  543705 memory.go:184] no items to output this cycle
I0322 18:11:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:11:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:11:13.409777  543705 memory.go:191] Add success.
W0322 18:11:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:11:13.409802  543705 cpu.go:282] Add success.
W0322 18:11:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:11:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:11:13.420098  543705 net.go:648] Add success.
I0322 18:11:13.422836  543705 net.go:770] primary dev: ETH0
I0322 18:11:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:11:13.422862  543705 net.go:698] Add success.
I0322 18:11:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:11:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:11:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 18:11:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:11:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 18:11:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:11:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:11:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:11:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:11:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:11:23.409769  543705 memory.go:184] no items to output this cycle
I0322 18:11:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 18:11:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:11:33.409769  543705 memory.go:184] no items to output this cycle
I0322 18:11:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 18:11:35.909667  543705 disk_info.go:125] begin check local disk info of client
I0322 18:11:35.912160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:11:35.912166  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051b840 0xc00051b880]
E0322 18:11:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:11:43.409806  543705 cpu.go:282] Add success.
I0322 18:11:43.410891  543705 memory.go:191] Add success.
I0322 18:11:43.419759  543705 net.go:648] Add success.
I0322 18:11:43.422491  543705 net.go:770] primary dev: ETH0
I0322 18:11:43.422511  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:11:43.422525  543705 net.go:698] Add success.
I0322 18:11:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:11:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:11:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:11:53.409799  543705 memory.go:184] no items to output this cycle
I0322 18:11:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 18:12:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:12:03.409779  543705 memory.go:184] no items to output this cycle
I0322 18:12:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 18:12:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:12:13.409793  543705 memory.go:191] Add success.
I0322 18:12:13.409797  543705 cpu.go:282] Add success.
W0322 18:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:12:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:12:13.420193  543705 net.go:648] Add success.
I0322 18:12:13.423447  543705 net.go:770] primary dev: ETH0
I0322 18:12:13.423462  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:12:13.423474  543705 net.go:698] Add success.
I0322 18:12:13.463708  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e7a228d-79ae-40a9-a548-44bb844cf989","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:12:13.463748  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 18:12:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:12:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 18:12:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:12:14.455932  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:12:14.455942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:12:14.455946  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:12:14.456707  543705 disk_worker.go:494] system disk:vda1
I0322 18:12:14.456752  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:12:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:12:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:12:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:12:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:12:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:12:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:12:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:12:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:12:23.409808  543705 memory.go:184] no items to output this cycle
I0322 18:12:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 18:12:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:12:33.409771  543705 memory.go:184] no items to output this cycle
I0322 18:12:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 18:12:35.913686  543705 disk_info.go:125] begin check local disk info of client
I0322 18:12:35.916117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:12:35.916123  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048f040 0xc00048f080]
I0322 18:12:39.747444  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:12:39.747452  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:12:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:12:43.410644  543705 memory.go:191] Add success.
I0322 18:12:43.409810  543705 cpu.go:282] Add success.
I0322 18:12:43.419737  543705 net.go:648] Add success.
I0322 18:12:43.422433  543705 net.go:770] primary dev: ETH0
I0322 18:12:43.422448  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:12:43.422463  543705 net.go:698] Add success.
I0322 18:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:12:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:12:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:12:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:12:53.409777  543705 memory.go:184] no items to output this cycle
I0322 18:12:53.409777  543705 cpu.go:275] no items to output this cycle
E0322 18:13:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:13:03.409766  543705 memory.go:184] no items to output this cycle
I0322 18:13:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 18:13:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:13:13.409786  543705 memory.go:191] Add success.
W0322 18:13:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:13:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:13:13.409822  543705 cpu.go:282] Add success.
I0322 18:13:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:13:13.420062  543705 net.go:648] Add success.
I0322 18:13:13.423223  543705 net.go:770] primary dev: ETH0
I0322 18:13:13.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:13:13.423254  543705 net.go:698] Add success.
I0322 18:13:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:13:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:13:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 18:13:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:13:14.456539  543705 disk_worker.go:494] system disk:vda1
I0322 18:13:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:13:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:13:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:13:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:13:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:13:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:13:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:13:23.409791  543705 memory.go:184] no items to output this cycle
I0322 18:13:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 18:13:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:13:33.409793  543705 memory.go:184] no items to output this cycle
I0322 18:13:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 18:13:35.917674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:13:35.920157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:13:35.920166  543705 disk_info.go:196] parse disk info done, disk is : [0xc000507700 0xc000507740]
E0322 18:13:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:13:43.410629  543705 memory.go:191] Add success.
I0322 18:13:43.409818  543705 cpu.go:282] Add success.
I0322 18:13:43.420604  543705 net.go:648] Add success.
I0322 18:13:43.423236  543705 net.go:770] primary dev: ETH0
I0322 18:13:43.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:13:43.423261  543705 net.go:698] Add success.
I0322 18:13:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:13:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:13:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:13:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:13:53.409807  543705 memory.go:184] no items to output this cycle
I0322 18:13:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 18:14:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:14:03.409789  543705 cpu.go:275] no items to output this cycle
I0322 18:14:03.409799  543705 memory.go:184] no items to output this cycle
E0322 18:14:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:14:13.409820  543705 memory.go:191] Add success.
I0322 18:14:13.409824  543705 cpu.go:282] Add success.
W0322 18:14:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:14:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:14:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:14:13.420049  543705 net.go:648] Add success.
I0322 18:14:13.422916  543705 net.go:770] primary dev: ETH0
I0322 18:14:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:14:13.422940  543705 net.go:698] Add success.
I0322 18:14:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:14:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:14:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 18:14:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:14:14.456490  543705 disk_worker.go:494] system disk:vda1
I0322 18:14:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:14:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:14:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:14:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:14:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:14:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:14:23.409775  543705 memory.go:184] no items to output this cycle
I0322 18:14:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 18:14:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:14:33.409795  543705 memory.go:184] no items to output this cycle
I0322 18:14:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 18:14:35.921675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:14:35.924233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:14:35.924239  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2080 0xc0003f20c0]
E0322 18:14:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:14:43.410692  543705 memory.go:191] Add success.
I0322 18:14:43.409825  543705 cpu.go:282] Add success.
I0322 18:14:43.420533  543705 net.go:648] Add success.
I0322 18:14:43.423224  543705 net.go:770] primary dev: ETH0
I0322 18:14:43.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:14:43.423248  543705 net.go:698] Add success.
I0322 18:14:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:14:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:14:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:14:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:14:53.409803  543705 memory.go:184] no items to output this cycle
I0322 18:14:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:15:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:15:03.409778  543705 memory.go:184] no items to output this cycle
I0322 18:15:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 18:15:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:15:13.409809  543705 memory.go:191] Add success.
I0322 18:15:13.409817  543705 cpu.go:282] Add success.
W0322 18:15:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:15:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:15:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:15:13.420104  543705 net.go:648] Add success.
I0322 18:15:13.422682  543705 net.go:770] primary dev: ETH0
I0322 18:15:13.422694  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:15:13.422705  543705 net.go:698] Add success.
I0322 18:15:13.463253  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"74e0d3d2-4d9e-44f9-befd-77b093069d87","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:15:13.463285  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:15:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:15:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 18:15:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:15:14.456674  543705 disk_worker.go:494] system disk:vda1
I0322 18:15:14.456703  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:15:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:15:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:15:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:15:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:15:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:15:23.409802  543705 memory.go:184] no items to output this cycle
I0322 18:15:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:15:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:15:33.409803  543705 memory.go:184] no items to output this cycle
I0322 18:15:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 18:15:35.925676  543705 disk_info.go:125] begin check local disk info of client
I0322 18:15:35.928193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:15:35.928199  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f2c0 0xc00039f300]
I0322 18:15:39.748475  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:15:39.748482  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:15:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:15:43.410723  543705 memory.go:191] Add success.
I0322 18:15:43.409806  543705 cpu.go:282] Add success.
I0322 18:15:43.420638  543705 net.go:648] Add success.
I0322 18:15:43.423460  543705 net.go:770] primary dev: ETH0
I0322 18:15:43.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:15:43.423486  543705 net.go:698] Add success.
I0322 18:15:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:15:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:15:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:15:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:15:53.409795  543705 memory.go:184] no items to output this cycle
I0322 18:15:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:16:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:16:03.409773  543705 memory.go:184] no items to output this cycle
I0322 18:16:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 18:16:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:16:13.409787  543705 memory.go:191] Add success.
I0322 18:16:13.409794  543705 cpu.go:282] Add success.
W0322 18:16:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:16:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:16:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:16:13.420072  543705 net.go:648] Add success.
I0322 18:16:13.423192  543705 net.go:770] primary dev: ETH0
I0322 18:16:13.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:16:13.423217  543705 net.go:698] Add success.
I0322 18:16:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:16:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:16:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 18:16:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:16:14.456502  543705 disk_worker.go:494] system disk:vda1
I0322 18:16:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:16:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:16:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:16:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:16:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:16:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:16:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:16:23.409785  543705 memory.go:184] no items to output this cycle
I0322 18:16:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 18:16:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:16:33.409803  543705 memory.go:184] no items to output this cycle
I0322 18:16:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 18:16:35.929675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:16:35.932164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:16:35.932170  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c400 0xc00057c440]
E0322 18:16:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:16:43.409941  543705 cpu.go:282] Add success.
I0322 18:16:43.410708  543705 memory.go:191] Add success.
I0322 18:16:43.419740  543705 net.go:648] Add success.
I0322 18:16:43.422522  543705 net.go:770] primary dev: ETH0
I0322 18:16:43.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:16:43.422547  543705 net.go:698] Add success.
I0322 18:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:16:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:16:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:16:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:16:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 18:16:53.409784  543705 memory.go:184] no items to output this cycle
E0322 18:17:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:17:03.409802  543705 memory.go:184] no items to output this cycle
I0322 18:17:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:17:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:17:13.409793  543705 memory.go:191] Add success.
I0322 18:17:13.409792  543705 cpu.go:282] Add success.
W0322 18:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:17:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:17:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:17:13.420236  543705 net.go:648] Add success.
I0322 18:17:13.423159  543705 net.go:770] primary dev: ETH0
I0322 18:17:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:17:13.423201  543705 net.go:698] Add success.
I0322 18:17:13.452866  543705 event_worker.go:152] Polling the log file for events...
W0322 18:17:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:17:14.455262  543705 disk_worker.go:708] disk space is not compliant
W0322 18:17:14.455266  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:17:14.455864  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:17:14.455873  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:17:14.455879  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:17:14.456831  543705 disk_worker.go:494] system disk:vda1
I0322 18:17:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:17:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:17:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:17:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:17:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:17:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:17:16.457980  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:17:16.472298  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:17:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:17:23.409807  543705 memory.go:184] no items to output this cycle
I0322 18:17:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 18:17:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:17:33.409782  543705 memory.go:184] no items to output this cycle
I0322 18:17:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 18:17:35.933679  543705 disk_info.go:125] begin check local disk info of client
I0322 18:17:35.936167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:17:35.936173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2000 0xc0003f2040]
E0322 18:17:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:17:43.410662  543705 memory.go:191] Add success.
I0322 18:17:43.409827  543705 cpu.go:282] Add success.
I0322 18:17:43.420388  543705 net.go:648] Add success.
I0322 18:17:43.423446  543705 net.go:770] primary dev: ETH0
I0322 18:17:43.423459  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:17:43.423471  543705 net.go:698] Add success.
I0322 18:17:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:17:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:17:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:17:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:17:53.409780  543705 memory.go:184] no items to output this cycle
I0322 18:17:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 18:18:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:18:03.409804  543705 memory.go:184] no items to output this cycle
I0322 18:18:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 18:18:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:18:13.409773  543705 memory.go:191] Add success.
W0322 18:18:13.409796  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:18:13.409805  543705 cpu.go:282] Add success.
W0322 18:18:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:18:13.409810  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:18:13.420097  543705 net.go:648] Add success.
I0322 18:18:13.422830  543705 net.go:770] primary dev: ETH0
I0322 18:18:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:18:13.422857  543705 net.go:698] Add success.
I0322 18:18:13.556823  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2c1b9dcf-a579-4590-beb1-59a113d84dca","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:18:13.556855  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:18:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:18:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:18:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0322 18:18:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:18:14.456616  543705 disk_worker.go:494] system disk:vda1
I0322 18:18:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:18:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:18:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:18:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:18:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:18:23.409820  543705 memory.go:184] no items to output this cycle
I0322 18:18:23.409829  543705 cpu.go:275] no items to output this cycle
E0322 18:18:33.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:18:33.409884  543705 cpu.go:275] no items to output this cycle
I0322 18:18:33.409892  543705 memory.go:184] no items to output this cycle
I0322 18:18:35.937670  543705 disk_info.go:125] begin check local disk info of client
I0322 18:18:35.940182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:18:35.940188  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371c40 0xc000371c80]
I0322 18:18:39.749510  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:18:39.749518  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:18:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:18:43.410647  543705 memory.go:191] Add success.
I0322 18:18:43.409818  543705 cpu.go:282] Add success.
I0322 18:18:43.420426  543705 net.go:648] Add success.
I0322 18:18:43.423289  543705 net.go:770] primary dev: ETH0
I0322 18:18:43.423303  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:18:43.423321  543705 net.go:698] Add success.
I0322 18:18:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:18:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:18:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:18:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:18:53.409770  543705 memory.go:184] no items to output this cycle
I0322 18:18:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 18:19:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:19:03.409779  543705 memory.go:184] no items to output this cycle
I0322 18:19:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 18:19:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:19:13.409792  543705 memory.go:191] Add success.
I0322 18:19:13.409796  543705 cpu.go:282] Add success.
W0322 18:19:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:19:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:19:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:19:13.420105  543705 net.go:648] Add success.
I0322 18:19:13.422943  543705 net.go:770] primary dev: ETH0
I0322 18:19:13.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:19:13.422975  543705 net.go:698] Add success.
I0322 18:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:19:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:19:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 18:19:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:19:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 18:19:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:19:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:19:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:19:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:19:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:19:23.409780  543705 memory.go:184] no items to output this cycle
I0322 18:19:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 18:19:33.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:19:33.409907  543705 memory.go:184] no items to output this cycle
I0322 18:19:33.409908  543705 cpu.go:275] no items to output this cycle
I0322 18:19:35.941671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:19:35.944164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:19:35.944170  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344a80 0xc000344b00]
E0322 18:19:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:19:43.410760  543705 memory.go:191] Add success.
I0322 18:19:43.409838  543705 cpu.go:282] Add success.
I0322 18:19:43.420488  543705 net.go:648] Add success.
I0322 18:19:43.423108  543705 net.go:770] primary dev: ETH0
I0322 18:19:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:19:43.423133  543705 net.go:698] Add success.
I0322 18:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:19:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:19:53.409767  543705 memory.go:184] no items to output this cycle
I0322 18:19:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:20:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:20:03.409774  543705 memory.go:184] no items to output this cycle
I0322 18:20:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 18:20:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:20:13.409788  543705 memory.go:191] Add success.
I0322 18:20:13.409789  543705 cpu.go:282] Add success.
W0322 18:20:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:20:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:20:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:20:13.420146  543705 net.go:648] Add success.
I0322 18:20:13.423163  543705 net.go:770] primary dev: ETH0
I0322 18:20:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:20:13.423188  543705 net.go:698] Add success.
I0322 18:20:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:20:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:20:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 18:20:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:20:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 18:20:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:20:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:20:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:20:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:20:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:20:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:20:23.409787  543705 memory.go:184] no items to output this cycle
I0322 18:20:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 18:20:33.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:20:33.409916  543705 memory.go:184] no items to output this cycle
I0322 18:20:33.409938  543705 cpu.go:275] no items to output this cycle
I0322 18:20:35.945673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:20:35.948241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:20:35.948248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2fc0 0xc0003b3000]
E0322 18:20:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:20:43.410760  543705 memory.go:191] Add success.
I0322 18:20:43.409821  543705 cpu.go:282] Add success.
I0322 18:20:43.420453  543705 net.go:648] Add success.
I0322 18:20:43.423176  543705 net.go:770] primary dev: ETH0
I0322 18:20:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:20:43.423212  543705 net.go:698] Add success.
I0322 18:20:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:20:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:20:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:20:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:20:53.409799  543705 memory.go:184] no items to output this cycle
I0322 18:20:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:21:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:21:03.409768  543705 memory.go:184] no items to output this cycle
I0322 18:21:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:21:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:21:13.409821  543705 memory.go:191] Add success.
I0322 18:21:13.409824  543705 cpu.go:282] Add success.
W0322 18:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:21:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:21:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:21:13.420120  543705 net.go:648] Add success.
I0322 18:21:13.422798  543705 net.go:770] primary dev: ETH0
I0322 18:21:13.422811  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:21:13.422824  543705 net.go:698] Add success.
I0322 18:21:13.525395  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f655da8a-10cb-49c8-ab6b-937552471097","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:21:13.525436  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:21:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:21:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 18:21:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:21:14.456605  543705 disk_worker.go:494] system disk:vda1
I0322 18:21:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:21:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:21:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:21:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:21:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:21:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:21:23.409773  543705 memory.go:184] no items to output this cycle
I0322 18:21:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 18:21:33.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:21:33.409915  543705 memory.go:184] no items to output this cycle
I0322 18:21:33.409920  543705 cpu.go:275] no items to output this cycle
I0322 18:21:35.949675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:21:35.952177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:21:35.952182  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475b40 0xc000475b80]
I0322 18:21:39.749736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:21:39.749743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:21:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:21:43.410684  543705 memory.go:191] Add success.
I0322 18:21:43.409827  543705 cpu.go:282] Add success.
I0322 18:21:43.420405  543705 net.go:648] Add success.
I0322 18:21:43.423587  543705 net.go:770] primary dev: ETH0
I0322 18:21:43.423601  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:21:43.423615  543705 net.go:698] Add success.
I0322 18:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:21:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:21:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:21:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:21:53.409794  543705 memory.go:184] no items to output this cycle
I0322 18:21:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 18:22:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:22:03.409772  543705 memory.go:184] no items to output this cycle
I0322 18:22:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:22:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:22:13.409815  543705 memory.go:191] Add success.
I0322 18:22:13.409823  543705 cpu.go:282] Add success.
W0322 18:22:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:22:13.420162  543705 net.go:648] Add success.
I0322 18:22:13.422810  543705 net.go:770] primary dev: ETH0
I0322 18:22:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:22:13.422836  543705 net.go:698] Add success.
W0322 18:22:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:22:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 18:22:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:22:14.456925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:22:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:22:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:22:14.456986  543705 disk_worker.go:494] system disk:vda1
I0322 18:22:14.457027  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:22:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:22:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:22:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:22:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:22:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:22:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:22:16.472318  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:22:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:22:23.409780  543705 memory.go:184] no items to output this cycle
I0322 18:22:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:22:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:22:33.409783  543705 memory.go:184] no items to output this cycle
I0322 18:22:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 18:22:35.953675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:22:35.956216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:22:35.956221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c0740 0xc0004c0780]
E0322 18:22:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:22:43.410620  543705 memory.go:191] Add success.
I0322 18:22:43.409811  543705 cpu.go:282] Add success.
I0322 18:22:43.420155  543705 net.go:770] primary dev: ETH0
I0322 18:22:43.420170  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:22:43.420185  543705 net.go:698] Add success.
I0322 18:22:43.420541  543705 net.go:648] Add success.
I0322 18:22:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:22:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:22:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:22:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:22:53.409795  543705 memory.go:184] no items to output this cycle
I0322 18:22:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:23:03.409769  543705 memory.go:184] no items to output this cycle
I0322 18:23:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:23:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:23:13.409786  543705 memory.go:191] Add success.
I0322 18:23:13.409793  543705 cpu.go:282] Add success.
W0322 18:23:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:23:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:23:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:23:13.420056  543705 net.go:648] Add success.
I0322 18:23:13.422774  543705 net.go:770] primary dev: ETH0
I0322 18:23:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:23:13.422798  543705 net.go:698] Add success.
I0322 18:23:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:23:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:23:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 18:23:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:23:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 18:23:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:23:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:23:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:23:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:23:16.472424  543705 disk_local_worker.go:436] Get disk info: []
I0322 18:23:23.409890  543705 cpu.go:275] no items to output this cycle
E0322 18:23:23.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:23:23.409907  543705 memory.go:184] no items to output this cycle
E0322 18:23:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:23:33.409774  543705 memory.go:184] no items to output this cycle
I0322 18:23:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 18:23:35.957674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:23:35.960210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:23:35.960216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509c80 0xc000509cc0]
E0322 18:23:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:23:43.410648  543705 memory.go:191] Add success.
I0322 18:23:43.409820  543705 cpu.go:282] Add success.
I0322 18:23:43.420368  543705 net.go:648] Add success.
I0322 18:23:43.423082  543705 net.go:770] primary dev: ETH0
I0322 18:23:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:23:43.423110  543705 net.go:698] Add success.
I0322 18:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:23:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:23:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:23:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:23:53.409769  543705 memory.go:184] no items to output this cycle
I0322 18:23:53.409820  543705 cpu.go:275] no items to output this cycle
E0322 18:24:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:24:03.409778  543705 memory.go:184] no items to output this cycle
I0322 18:24:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:24:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:24:13.409792  543705 memory.go:191] Add success.
I0322 18:24:13.409795  543705 cpu.go:282] Add success.
W0322 18:24:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:24:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:24:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:24:13.420063  543705 net.go:648] Add success.
I0322 18:24:13.422771  543705 net.go:770] primary dev: ETH0
I0322 18:24:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:24:13.422796  543705 net.go:698] Add success.
I0322 18:24:13.467966  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"25dab544-bf7c-4881-8feb-18d4fcdabc81","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:24:13.467998  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:24:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:24:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:24:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 18:24:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:24:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 18:24:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:24:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:24:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:24:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:24:23.409818  543705 memory.go:184] no items to output this cycle
I0322 18:24:23.409820  543705 cpu.go:275] no items to output this cycle
E0322 18:24:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:24:33.409787  543705 memory.go:184] no items to output this cycle
I0322 18:24:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 18:24:35.961671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:24:35.964193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:24:35.964198  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b440 0xc00007b480]
I0322 18:24:39.751445  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:24:39.751451  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:24:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:24:43.410632  543705 memory.go:191] Add success.
I0322 18:24:43.409817  543705 cpu.go:282] Add success.
I0322 18:24:43.420363  543705 net.go:648] Add success.
I0322 18:24:43.422933  543705 net.go:770] primary dev: ETH0
I0322 18:24:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:24:43.422960  543705 net.go:698] Add success.
I0322 18:24:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:24:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:24:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:24:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:24:53.409798  543705 memory.go:184] no items to output this cycle
I0322 18:24:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 18:25:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:25:03.409768  543705 memory.go:184] no items to output this cycle
I0322 18:25:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 18:25:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:25:13.409797  543705 cpu.go:282] Add success.
I0322 18:25:13.409797  543705 memory.go:191] Add success.
W0322 18:25:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:25:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:25:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:25:13.420034  543705 net.go:648] Add success.
I0322 18:25:13.422791  543705 net.go:770] primary dev: ETH0
I0322 18:25:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:25:13.422814  543705 net.go:698] Add success.
I0322 18:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:25:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:25:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 18:25:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:25:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 18:25:14.456526  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:25:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:25:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:25:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:25:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:25:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:25:23.409784  543705 memory.go:184] no items to output this cycle
I0322 18:25:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 18:25:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:25:33.409810  543705 memory.go:184] no items to output this cycle
I0322 18:25:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 18:25:35.965671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:25:35.968315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:25:35.968322  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c1ac0 0xc0004c1b00]
E0322 18:25:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:25:43.410722  543705 memory.go:191] Add success.
I0322 18:25:43.409829  543705 cpu.go:282] Add success.
I0322 18:25:43.420457  543705 net.go:648] Add success.
I0322 18:25:43.423046  543705 net.go:770] primary dev: ETH0
I0322 18:25:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:25:43.423077  543705 net.go:698] Add success.
I0322 18:25:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:25:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:25:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:25:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:25:53.409780  543705 memory.go:184] no items to output this cycle
I0322 18:25:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 18:26:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:26:03.409809  543705 memory.go:184] no items to output this cycle
I0322 18:26:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 18:26:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:26:13.409774  543705 memory.go:191] Add success.
I0322 18:26:13.409798  543705 cpu.go:282] Add success.
W0322 18:26:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:26:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:26:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:26:13.420068  543705 net.go:648] Add success.
I0322 18:26:13.422588  543705 net.go:770] primary dev: ETH0
I0322 18:26:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:26:13.422617  543705 net.go:698] Add success.
I0322 18:26:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:26:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:26:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 18:26:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:26:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 18:26:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:26:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:26:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:26:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:26:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:26:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:26:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:26:23.409802  543705 memory.go:184] no items to output this cycle
I0322 18:26:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 18:26:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:26:33.409778  543705 memory.go:184] no items to output this cycle
I0322 18:26:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 18:26:35.969674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:26:35.972193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:26:35.972199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4900 0xc0000c4940]
E0322 18:26:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:26:43.410723  543705 memory.go:191] Add success.
I0322 18:26:43.409800  543705 cpu.go:282] Add success.
I0322 18:26:43.420424  543705 net.go:648] Add success.
I0322 18:26:43.423032  543705 net.go:770] primary dev: ETH0
I0322 18:26:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:26:43.423060  543705 net.go:698] Add success.
I0322 18:26:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:26:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:26:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:26:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:26:53.409776  543705 memory.go:184] no items to output this cycle
I0322 18:26:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 18:27:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:27:03.409803  543705 memory.go:184] no items to output this cycle
I0322 18:27:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 18:27:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:27:13.409788  543705 memory.go:191] Add success.
I0322 18:27:13.409792  543705 cpu.go:282] Add success.
W0322 18:27:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:27:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:27:13.419872  543705 net.go:770] primary dev: ETH0
I0322 18:27:13.419885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:27:13.419897  543705 net.go:698] Add success.
I0322 18:27:13.420121  543705 net.go:648] Add success.
I0322 18:27:13.428765  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 18:27:13.452937  543705 event_worker.go:152] Polling the log file for events...
I0322 18:27:13.463358  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b2a91394-0d40-47c3-8820-2ad104e458e9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:27:13.463389  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 18:27:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:27:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 18:27:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:27:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:27:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:27:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:27:14.456641  543705 disk_worker.go:494] system disk:vda1
I0322 18:27:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:27:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:27:15.456814  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:27:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:27:16.457974  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:27:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:27:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:27:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:27:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:27:23.409788  543705 cpu.go:275] no items to output this cycle
I0322 18:27:23.409791  543705 memory.go:184] no items to output this cycle
E0322 18:27:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:27:33.409780  543705 memory.go:184] no items to output this cycle
I0322 18:27:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 18:27:35.973674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:27:35.976164  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:27:35.976169  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aec0 0xc00007af00]
I0322 18:27:39.752451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:27:39.752457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:27:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:27:43.410712  543705 memory.go:191] Add success.
I0322 18:27:43.409836  543705 cpu.go:282] Add success.
I0322 18:27:43.420488  543705 net.go:648] Add success.
I0322 18:27:43.423551  543705 net.go:770] primary dev: ETH0
I0322 18:27:43.423564  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:27:43.423578  543705 net.go:698] Add success.
I0322 18:27:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:27:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:27:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:27:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:27:53.409782  543705 memory.go:184] no items to output this cycle
I0322 18:27:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 18:28:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:28:03.409777  543705 memory.go:184] no items to output this cycle
I0322 18:28:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 18:28:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:28:13.409809  543705 memory.go:191] Add success.
I0322 18:28:13.409819  543705 cpu.go:282] Add success.
W0322 18:28:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:28:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:28:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:28:13.420158  543705 net.go:648] Add success.
I0322 18:28:13.422900  543705 net.go:770] primary dev: ETH0
I0322 18:28:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:28:13.422938  543705 net.go:698] Add success.
I0322 18:28:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:28:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:28:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 18:28:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:28:14.456476  543705 disk_worker.go:494] system disk:vda1
I0322 18:28:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:28:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:28:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:28:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:28:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:28:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:28:23.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:28:23.409910  543705 cpu.go:275] no items to output this cycle
I0322 18:28:23.409918  543705 memory.go:184] no items to output this cycle
E0322 18:28:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:28:33.409763  543705 memory.go:184] no items to output this cycle
I0322 18:28:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 18:28:35.977674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:28:35.980200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:28:35.980206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c1e80 0xc0004c1ec0]
E0322 18:28:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:28:43.410798  543705 memory.go:191] Add success.
I0322 18:28:43.409805  543705 cpu.go:282] Add success.
I0322 18:28:43.420528  543705 net.go:648] Add success.
I0322 18:28:43.423373  543705 net.go:770] primary dev: ETH0
I0322 18:28:43.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:28:43.423405  543705 net.go:698] Add success.
I0322 18:28:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:28:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:28:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:28:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:28:53.409769  543705 memory.go:184] no items to output this cycle
I0322 18:28:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 18:29:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:29:03.409789  543705 memory.go:184] no items to output this cycle
I0322 18:29:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 18:29:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:29:13.409791  543705 memory.go:191] Add success.
W0322 18:29:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:29:13.409820  543705 cpu.go:282] Add success.
W0322 18:29:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:29:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:29:13.420414  543705 net.go:648] Add success.
I0322 18:29:13.423149  543705 net.go:770] primary dev: ETH0
I0322 18:29:13.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:29:13.423175  543705 net.go:698] Add success.
I0322 18:29:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:29:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:29:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 18:29:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:29:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 18:29:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:29:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:29:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:29:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:29:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:29:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:29:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:29:23.409792  543705 memory.go:184] no items to output this cycle
I0322 18:29:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 18:29:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:29:33.409784  543705 memory.go:184] no items to output this cycle
I0322 18:29:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 18:29:35.981674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:29:35.984229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:29:35.984235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf00 0xc0001aaf40]
E0322 18:29:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:29:43.410604  543705 memory.go:191] Add success.
I0322 18:29:43.409836  543705 cpu.go:282] Add success.
I0322 18:29:43.420318  543705 net.go:648] Add success.
I0322 18:29:43.422745  543705 net.go:770] primary dev: ETH0
I0322 18:29:43.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:29:43.422776  543705 net.go:698] Add success.
I0322 18:29:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:29:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:29:53.409792  543705 memory.go:184] no items to output this cycle
I0322 18:29:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:30:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:30:03.409786  543705 memory.go:184] no items to output this cycle
I0322 18:30:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 18:30:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:30:13.409793  543705 memory.go:191] Add success.
I0322 18:30:13.409816  543705 cpu.go:282] Add success.
W0322 18:30:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:30:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:30:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:30:13.420262  543705 net.go:648] Add success.
I0322 18:30:13.422888  543705 net.go:770] primary dev: ETH0
I0322 18:30:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:30:13.422915  543705 net.go:698] Add success.
I0322 18:30:13.463852  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3c453158-fed4-47ed-a343-900e84030b6c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:30:13.463885  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:30:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:30:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:30:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 18:30:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:30:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 18:30:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:30:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:30:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:30:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:30:23.409818  543705 memory.go:184] no items to output this cycle
I0322 18:30:23.409826  543705 cpu.go:275] no items to output this cycle
E0322 18:30:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:30:33.409816  543705 memory.go:184] no items to output this cycle
I0322 18:30:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 18:30:35.985671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:30:35.988311  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:30:35.988318  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa740 0xc0001aa780]
I0322 18:30:39.753451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:30:39.753457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:30:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:30:43.410651  543705 memory.go:191] Add success.
I0322 18:30:43.409812  543705 cpu.go:282] Add success.
I0322 18:30:43.420455  543705 net.go:648] Add success.
I0322 18:30:43.423177  543705 net.go:770] primary dev: ETH0
I0322 18:30:43.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:30:43.423203  543705 net.go:698] Add success.
I0322 18:30:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:30:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:30:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:30:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:30:53.409771  543705 memory.go:184] no items to output this cycle
I0322 18:30:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 18:31:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:31:03.409764  543705 memory.go:184] no items to output this cycle
I0322 18:31:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:31:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:31:13.409815  543705 memory.go:191] Add success.
I0322 18:31:13.409827  543705 cpu.go:282] Add success.
W0322 18:31:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:31:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:31:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:31:13.420150  543705 net.go:648] Add success.
I0322 18:31:13.422940  543705 net.go:770] primary dev: ETH0
I0322 18:31:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:31:13.422969  543705 net.go:698] Add success.
I0322 18:31:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:31:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:31:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 18:31:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:31:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 18:31:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:31:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:31:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:31:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:31:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:31:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:31:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:31:23.409780  543705 memory.go:184] no items to output this cycle
I0322 18:31:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 18:31:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:31:33.409784  543705 memory.go:184] no items to output this cycle
I0322 18:31:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 18:31:35.989675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:31:35.992236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:31:35.992243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa4c0 0xc0001aa500]
E0322 18:31:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:31:43.410699  543705 memory.go:191] Add success.
I0322 18:31:43.409807  543705 cpu.go:282] Add success.
I0322 18:31:43.420435  543705 net.go:648] Add success.
I0322 18:31:43.423362  543705 net.go:770] primary dev: ETH0
I0322 18:31:43.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:31:43.423392  543705 net.go:698] Add success.
I0322 18:31:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:31:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:31:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:31:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:31:53.409783  543705 memory.go:184] no items to output this cycle
I0322 18:31:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 18:32:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:32:03.409780  543705 memory.go:184] no items to output this cycle
I0322 18:32:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 18:32:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:32:13.409779  543705 memory.go:191] Add success.
I0322 18:32:13.409801  543705 cpu.go:282] Add success.
W0322 18:32:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:32:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:32:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:32:13.420137  543705 net.go:648] Add success.
I0322 18:32:13.422696  543705 net.go:770] primary dev: ETH0
I0322 18:32:13.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:32:13.422728  543705 net.go:698] Add success.
W0322 18:32:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:32:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 18:32:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:32:14.456949  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:32:14.456958  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:32:14.456965  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:32:14.457013  543705 disk_worker.go:494] system disk:vda1
I0322 18:32:14.457053  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:32:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:32:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:32:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:32:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:32:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:32:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:32:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:32:23.409792  543705 memory.go:184] no items to output this cycle
I0322 18:32:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 18:32:33.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:32:33.409893  543705 cpu.go:275] no items to output this cycle
I0322 18:32:33.409899  543705 memory.go:184] no items to output this cycle
I0322 18:32:35.993684  543705 disk_info.go:125] begin check local disk info of client
I0322 18:32:35.996016  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:32:35.996023  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003553c0 0xc000355400]
E0322 18:32:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:32:43.410753  543705 memory.go:191] Add success.
I0322 18:32:43.409836  543705 cpu.go:282] Add success.
I0322 18:32:43.420422  543705 net.go:648] Add success.
I0322 18:32:43.423477  543705 net.go:770] primary dev: ETH0
I0322 18:32:43.423491  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:32:43.423503  543705 net.go:698] Add success.
I0322 18:32:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:32:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:32:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:32:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:32:53.409766  543705 memory.go:184] no items to output this cycle
I0322 18:32:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 18:33:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:33:03.409774  543705 memory.go:184] no items to output this cycle
I0322 18:33:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 18:33:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:33:13.409791  543705 memory.go:191] Add success.
I0322 18:33:13.409791  543705 cpu.go:282] Add success.
W0322 18:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:33:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:33:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:33:13.420173  543705 net.go:648] Add success.
I0322 18:33:13.423159  543705 net.go:770] primary dev: ETH0
I0322 18:33:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:33:13.423183  543705 net.go:698] Add success.
I0322 18:33:13.469144  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"749829ca-772b-48fe-951e-236510ae3ee8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:33:13.469177  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:33:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:33:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 18:33:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:33:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 18:33:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:33:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:33:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:33:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:33:16.458106  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:33:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:33:23.410494  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:33:23.410511  543705 memory.go:184] no items to output this cycle
I0322 18:33:23.410549  543705 cpu.go:275] no items to output this cycle
E0322 18:33:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:33:33.409880  543705 memory.go:184] no items to output this cycle
I0322 18:33:33.409972  543705 cpu.go:275] no items to output this cycle
I0322 18:33:35.997671  543705 disk_info.go:125] begin check local disk info of client
I0322 18:33:36.000192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:33:36.000198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8d80 0xc0002b8dc0]
I0322 18:33:39.753746  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:33:39.753752  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:33:43.410808  543705 memory.go:191] Add success.
I0322 18:33:43.409822  543705 cpu.go:282] Add success.
I0322 18:33:43.420528  543705 net.go:648] Add success.
I0322 18:33:43.423368  543705 net.go:770] primary dev: ETH0
I0322 18:33:43.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:33:43.423414  543705 net.go:698] Add success.
I0322 18:33:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:33:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:33:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:33:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:33:53.409794  543705 memory.go:184] no items to output this cycle
I0322 18:33:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 18:34:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:34:03.409775  543705 memory.go:184] no items to output this cycle
I0322 18:34:03.409775  543705 cpu.go:275] no items to output this cycle
E0322 18:34:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:34:13.409789  543705 memory.go:191] Add success.
I0322 18:34:13.409794  543705 cpu.go:282] Add success.
W0322 18:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:34:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:34:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:34:13.420220  543705 net.go:648] Add success.
I0322 18:34:13.422998  543705 net.go:770] primary dev: ETH0
I0322 18:34:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:34:13.423023  543705 net.go:698] Add success.
I0322 18:34:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:34:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:34:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 18:34:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:34:14.456806  543705 disk_worker.go:494] system disk:vda1
I0322 18:34:14.456837  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:34:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:34:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:34:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:34:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:34:16.472475  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:34:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:34:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 18:34:23.409805  543705 memory.go:184] no items to output this cycle
E0322 18:34:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:34:33.409899  543705 memory.go:184] no items to output this cycle
I0322 18:34:33.409931  543705 cpu.go:275] no items to output this cycle
I0322 18:34:36.001673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:34:36.004088  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:34:36.004095  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bd900 0xc0004bd940]
E0322 18:34:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:34:43.411275  543705 memory.go:191] Add success.
I0322 18:34:43.409813  543705 cpu.go:282] Add success.
I0322 18:34:43.420027  543705 net.go:648] Add success.
I0322 18:34:43.423130  543705 net.go:770] primary dev: ETH0
I0322 18:34:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:34:43.423161  543705 net.go:698] Add success.
I0322 18:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:34:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:34:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:34:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:34:53.409778  543705 memory.go:184] no items to output this cycle
I0322 18:34:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 18:35:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:35:03.409785  543705 cpu.go:275] no items to output this cycle
I0322 18:35:03.409788  543705 memory.go:184] no items to output this cycle
E0322 18:35:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:35:13.409788  543705 memory.go:191] Add success.
I0322 18:35:13.409807  543705 cpu.go:282] Add success.
W0322 18:35:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:35:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:35:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:35:13.420101  543705 net.go:648] Add success.
I0322 18:35:13.423051  543705 net.go:770] primary dev: ETH0
I0322 18:35:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:35:13.423077  543705 net.go:698] Add success.
I0322 18:35:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:35:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:35:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0322 18:35:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:35:14.456635  543705 disk_worker.go:494] system disk:vda1
I0322 18:35:14.456670  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:35:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:35:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:35:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:35:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:35:23.409782  543705 memory.go:184] no items to output this cycle
I0322 18:35:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 18:35:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:35:33.409782  543705 memory.go:184] no items to output this cycle
I0322 18:35:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 18:35:36.005668  543705 disk_info.go:125] begin check local disk info of client
I0322 18:35:36.008165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:35:36.008171  543705 disk_info.go:196] parse disk info done, disk is : [0xc000465140 0xc000465180]
E0322 18:35:43.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:35:43.410739  543705 memory.go:191] Add success.
I0322 18:35:43.410002  543705 cpu.go:282] Add success.
I0322 18:35:43.419716  543705 net.go:648] Add success.
I0322 18:35:43.422565  543705 net.go:770] primary dev: ETH0
I0322 18:35:43.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:35:43.422590  543705 net.go:698] Add success.
I0322 18:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:35:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:35:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:35:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:35:53.409793  543705 memory.go:184] no items to output this cycle
I0322 18:35:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 18:36:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:36:03.409773  543705 memory.go:184] no items to output this cycle
I0322 18:36:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 18:36:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:36:13.409821  543705 memory.go:191] Add success.
I0322 18:36:13.409826  543705 cpu.go:282] Add success.
W0322 18:36:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:36:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:36:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:36:13.420148  543705 net.go:648] Add success.
I0322 18:36:13.423295  543705 net.go:770] primary dev: ETH0
I0322 18:36:13.423310  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:36:13.423326  543705 net.go:698] Add success.
I0322 18:36:13.547954  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ad61b3b-70ac-4183-bae4-ce40cb87726f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:36:13.547989  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:36:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:36:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:36:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 18:36:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:36:14.456570  543705 disk_worker.go:494] system disk:vda1
I0322 18:36:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:36:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:36:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:36:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:36:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:36:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:36:23.409780  543705 memory.go:184] no items to output this cycle
I0322 18:36:23.409846  543705 cpu.go:275] no items to output this cycle
E0322 18:36:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:36:33.409805  543705 memory.go:184] no items to output this cycle
I0322 18:36:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 18:36:36.009672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:36:36.012260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:36:36.012266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2b00 0xc0004b2b40]
I0322 18:36:39.755470  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:36:39.755477  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:36:43.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:36:43.410780  543705 memory.go:191] Add success.
I0322 18:36:43.409916  543705 cpu.go:282] Add success.
I0322 18:36:43.419747  543705 net.go:648] Add success.
I0322 18:36:43.422552  543705 net.go:770] primary dev: ETH0
I0322 18:36:43.422581  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:36:43.422593  543705 net.go:698] Add success.
I0322 18:36:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:36:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:36:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:36:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:36:53.409778  543705 memory.go:184] no items to output this cycle
I0322 18:36:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 18:37:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:37:03.409779  543705 memory.go:184] no items to output this cycle
I0322 18:37:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 18:37:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:37:13.409813  543705 memory.go:191] Add success.
I0322 18:37:13.409825  543705 cpu.go:282] Add success.
W0322 18:37:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:37:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:37:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:37:13.420141  543705 net.go:648] Add success.
I0322 18:37:13.423130  543705 net.go:770] primary dev: ETH0
I0322 18:37:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:37:13.423156  543705 net.go:698] Add success.
I0322 18:37:13.453663  543705 event_worker.go:152] Polling the log file for events...
W0322 18:37:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:37:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 18:37:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:37:14.456955  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:37:14.456965  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:37:14.456971  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:37:14.457015  543705 disk_worker.go:494] system disk:vda1
I0322 18:37:14.457060  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:37:15.456808  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:37:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:37:16.457938  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:37:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:37:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:37:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:37:16.472344  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:37:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:37:23.409799  543705 memory.go:184] no items to output this cycle
I0322 18:37:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:37:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:37:33.409770  543705 memory.go:184] no items to output this cycle
I0322 18:37:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 18:37:36.013674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:37:36.016181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:37:36.016186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000256040 0xc000256080]
E0322 18:37:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:37:43.410710  543705 memory.go:191] Add success.
I0322 18:37:43.409824  543705 cpu.go:282] Add success.
I0322 18:37:43.420881  543705 net.go:648] Add success.
I0322 18:37:43.423521  543705 net.go:770] primary dev: ETH0
I0322 18:37:43.423534  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:37:43.423546  543705 net.go:698] Add success.
I0322 18:37:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:37:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:37:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:37:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:37:53.409785  543705 memory.go:184] no items to output this cycle
I0322 18:37:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 18:38:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:38:03.409766  543705 memory.go:184] no items to output this cycle
I0322 18:38:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:38:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:38:13.409817  543705 memory.go:191] Add success.
I0322 18:38:13.409825  543705 cpu.go:282] Add success.
W0322 18:38:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:38:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:38:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:38:13.420138  543705 net.go:648] Add success.
I0322 18:38:13.422901  543705 net.go:770] primary dev: ETH0
I0322 18:38:13.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:38:13.422925  543705 net.go:698] Add success.
I0322 18:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:38:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:38:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 18:38:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:38:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 18:38:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:38:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:38:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:38:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:38:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:38:23.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:38:23.410250  543705 memory.go:184] no items to output this cycle
I0322 18:38:23.410312  543705 cpu.go:275] no items to output this cycle
E0322 18:38:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:38:33.409774  543705 memory.go:184] no items to output this cycle
I0322 18:38:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 18:38:36.017672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:38:36.020197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:38:36.020203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a8c40 0xc0004a8c80]
E0322 18:38:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:38:43.410694  543705 memory.go:191] Add success.
I0322 18:38:43.409818  543705 cpu.go:282] Add success.
I0322 18:38:43.420385  543705 net.go:648] Add success.
I0322 18:38:43.422908  543705 net.go:770] primary dev: ETH0
I0322 18:38:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:38:43.422937  543705 net.go:698] Add success.
I0322 18:38:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:38:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:38:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:38:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:38:53.409773  543705 memory.go:184] no items to output this cycle
I0322 18:38:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 18:39:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:39:03.409778  543705 memory.go:184] no items to output this cycle
I0322 18:39:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:39:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:39:13.409793  543705 memory.go:191] Add success.
I0322 18:39:13.409794  543705 cpu.go:282] Add success.
W0322 18:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:39:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:39:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:39:13.420240  543705 net.go:648] Add success.
I0322 18:39:13.423323  543705 net.go:770] primary dev: ETH0
I0322 18:39:13.423336  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:39:13.423348  543705 net.go:698] Add success.
I0322 18:39:13.467810  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"51f31b0f-b3b4-4506-8cc8-ef7dab5720d8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:39:13.467843  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:39:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:39:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:39:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 18:39:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:39:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 18:39:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:39:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:39:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:39:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:39:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 18:39:23.409786  543705 memory.go:184] no items to output this cycle
E0322 18:39:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:39:33.409771  543705 memory.go:184] no items to output this cycle
I0322 18:39:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 18:39:36.021680  543705 disk_info.go:125] begin check local disk info of client
I0322 18:39:36.024193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:39:36.024198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000395400 0xc000395440]
I0322 18:39:39.756490  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:39:39.756514  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:39:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:39:43.410680  543705 memory.go:191] Add success.
I0322 18:39:43.409793  543705 cpu.go:282] Add success.
I0322 18:39:43.420376  543705 net.go:648] Add success.
I0322 18:39:43.422883  543705 net.go:770] primary dev: ETH0
I0322 18:39:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:39:43.422909  543705 net.go:698] Add success.
I0322 18:39:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:39:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:39:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:39:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:39:53.409775  543705 memory.go:184] no items to output this cycle
I0322 18:39:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 18:40:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:40:03.409778  543705 cpu.go:275] no items to output this cycle
I0322 18:40:03.409789  543705 memory.go:184] no items to output this cycle
E0322 18:40:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:40:13.409795  543705 memory.go:191] Add success.
I0322 18:40:13.409796  543705 cpu.go:282] Add success.
W0322 18:40:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:40:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:40:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:40:13.420275  543705 net.go:648] Add success.
I0322 18:40:13.423132  543705 net.go:770] primary dev: ETH0
I0322 18:40:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:40:13.423157  543705 net.go:698] Add success.
I0322 18:40:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:40:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:40:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 18:40:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:40:14.456464  543705 disk_worker.go:494] system disk:vda1
I0322 18:40:14.456508  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:40:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:40:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:40:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:40:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:40:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:40:23.409802  543705 memory.go:184] no items to output this cycle
I0322 18:40:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 18:40:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:40:33.409771  543705 memory.go:184] no items to output this cycle
I0322 18:40:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 18:40:36.025675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:40:36.028210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:40:36.028215  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046adc0 0xc00046ae00]
E0322 18:40:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:40:43.410648  543705 memory.go:191] Add success.
I0322 18:40:43.409827  543705 cpu.go:282] Add success.
I0322 18:40:43.420364  543705 net.go:648] Add success.
I0322 18:40:43.422946  543705 net.go:770] primary dev: ETH0
I0322 18:40:43.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:40:43.422976  543705 net.go:698] Add success.
I0322 18:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:40:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:40:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:40:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:40:53.409895  543705 memory.go:184] no items to output this cycle
I0322 18:40:53.409899  543705 cpu.go:275] no items to output this cycle
E0322 18:41:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:41:03.409799  543705 memory.go:184] no items to output this cycle
I0322 18:41:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 18:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:41:13.409789  543705 memory.go:191] Add success.
I0322 18:41:13.409792  543705 cpu.go:282] Add success.
W0322 18:41:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:41:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:41:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:41:13.420130  543705 net.go:648] Add success.
I0322 18:41:13.422830  543705 net.go:770] primary dev: ETH0
I0322 18:41:13.422845  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:41:13.422859  543705 net.go:698] Add success.
I0322 18:41:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:41:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:41:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 18:41:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:41:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 18:41:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:41:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:41:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:41:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:41:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:41:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:41:23.409797  543705 memory.go:184] no items to output this cycle
I0322 18:41:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:41:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:41:33.409783  543705 memory.go:184] no items to output this cycle
I0322 18:41:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 18:41:36.029682  543705 disk_info.go:125] begin check local disk info of client
I0322 18:41:36.032464  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:41:36.032470  543705 disk_info.go:196] parse disk info done, disk is : [0xc000313380 0xc0003133c0]
E0322 18:41:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:41:43.410744  543705 memory.go:191] Add success.
I0322 18:41:43.409833  543705 cpu.go:282] Add success.
I0322 18:41:43.420423  543705 net.go:648] Add success.
I0322 18:41:43.423182  543705 net.go:770] primary dev: ETH0
I0322 18:41:43.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:41:43.423208  543705 net.go:698] Add success.
I0322 18:41:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:41:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:41:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:41:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:41:53.409784  543705 cpu.go:275] no items to output this cycle
I0322 18:41:53.409795  543705 memory.go:184] no items to output this cycle
E0322 18:42:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:42:03.409774  543705 memory.go:184] no items to output this cycle
I0322 18:42:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 18:42:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:42:13.409831  543705 memory.go:191] Add success.
I0322 18:42:13.409835  543705 cpu.go:282] Add success.
W0322 18:42:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:42:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:42:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:42:13.420144  543705 net.go:648] Add success.
I0322 18:42:13.422822  543705 net.go:770] primary dev: ETH0
I0322 18:42:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:42:13.422850  543705 net.go:698] Add success.
I0322 18:42:13.468505  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bcbb2c52-6ae9-453c-9e25-1b8a31dbf48a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:42:13.468539  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 18:42:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:42:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 18:42:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:42:14.456970  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:42:14.456980  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:42:14.456985  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:42:14.457035  543705 disk_worker.go:494] system disk:vda1
I0322 18:42:14.457088  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:42:15.456857  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:42:15.456866  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:42:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:42:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:42:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:42:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:42:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:42:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:42:23.409771  543705 memory.go:184] no items to output this cycle
I0322 18:42:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:42:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:42:33.409791  543705 memory.go:184] no items to output this cycle
I0322 18:42:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 18:42:36.033673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:42:36.036221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:42:36.036227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342c40 0xc000342c80]
I0322 18:42:39.757475  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:42:39.757482  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:42:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:42:43.410802  543705 memory.go:191] Add success.
I0322 18:42:43.409841  543705 cpu.go:282] Add success.
I0322 18:42:43.420507  543705 net.go:648] Add success.
I0322 18:42:43.423229  543705 net.go:770] primary dev: ETH0
I0322 18:42:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:42:43.423258  543705 net.go:698] Add success.
I0322 18:42:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:42:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:42:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:42:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:42:53.409806  543705 memory.go:184] no items to output this cycle
I0322 18:42:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 18:43:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:43:03.409768  543705 memory.go:184] no items to output this cycle
I0322 18:43:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:43:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:43:13.409835  543705 memory.go:191] Add success.
I0322 18:43:13.409842  543705 cpu.go:282] Add success.
W0322 18:43:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:43:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:43:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:43:13.420160  543705 net.go:648] Add success.
I0322 18:43:13.422899  543705 net.go:770] primary dev: ETH0
I0322 18:43:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:43:13.422934  543705 net.go:698] Add success.
I0322 18:43:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:43:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:43:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 18:43:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:43:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 18:43:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:43:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:43:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:43:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:43:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:43:23.409803  543705 memory.go:184] no items to output this cycle
I0322 18:43:23.409824  543705 cpu.go:275] no items to output this cycle
E0322 18:43:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:43:33.409781  543705 memory.go:184] no items to output this cycle
I0322 18:43:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 18:43:36.037674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:43:36.040202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:43:36.040208  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486380 0xc0004863c0]
E0322 18:43:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:43:43.410768  543705 memory.go:191] Add success.
I0322 18:43:43.409804  543705 cpu.go:282] Add success.
I0322 18:43:43.420496  543705 net.go:648] Add success.
I0322 18:43:43.423520  543705 net.go:770] primary dev: ETH0
I0322 18:43:43.423535  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:43:43.423550  543705 net.go:698] Add success.
I0322 18:43:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:43:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:43:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:43:53.409805  543705 memory.go:184] no items to output this cycle
I0322 18:43:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 18:44:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:44:03.409796  543705 memory.go:184] no items to output this cycle
I0322 18:44:03.409808  543705 cpu.go:275] no items to output this cycle
W0322 18:44:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:44:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:44:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 18:44:13.410001  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:44:13.409997  543705 cpu.go:282] Add success.
I0322 18:44:13.410086  543705 memory.go:191] Add success.
I0322 18:44:13.419750  543705 net.go:648] Add success.
I0322 18:44:13.422653  543705 net.go:770] primary dev: ETH0
I0322 18:44:13.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:44:13.422677  543705 net.go:698] Add success.
I0322 18:44:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:44:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:44:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 18:44:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:44:14.456489  543705 disk_worker.go:494] system disk:vda1
I0322 18:44:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:44:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:44:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:44:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:44:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:44:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:44:23.409796  543705 memory.go:184] no items to output this cycle
I0322 18:44:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 18:44:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:44:33.409805  543705 memory.go:184] no items to output this cycle
I0322 18:44:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 18:44:36.041673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:44:36.044300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:44:36.044307  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c0300 0xc0004c0340]
E0322 18:44:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:44:43.410674  543705 memory.go:191] Add success.
I0322 18:44:43.409799  543705 cpu.go:282] Add success.
I0322 18:44:43.420383  543705 net.go:648] Add success.
I0322 18:44:43.423102  543705 net.go:770] primary dev: ETH0
I0322 18:44:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:44:43.423129  543705 net.go:698] Add success.
I0322 18:44:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:44:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:44:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:44:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:44:53.409777  543705 memory.go:184] no items to output this cycle
I0322 18:44:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 18:45:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:45:03.409774  543705 memory.go:184] no items to output this cycle
I0322 18:45:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 18:45:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:45:13.409836  543705 memory.go:191] Add success.
I0322 18:45:13.409847  543705 cpu.go:282] Add success.
W0322 18:45:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:45:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:45:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:45:13.420289  543705 net.go:648] Add success.
I0322 18:45:13.423123  543705 net.go:770] primary dev: ETH0
I0322 18:45:13.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:45:13.423152  543705 net.go:698] Add success.
I0322 18:45:13.467440  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9f35aa5a-0d30-4bf4-a450-ef93f3603273","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:45:13.467470  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:45:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:45:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:45:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 18:45:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:45:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 18:45:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:45:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:45:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:45:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:45:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:45:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:45:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:45:23.409775  543705 memory.go:184] no items to output this cycle
I0322 18:45:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 18:45:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:45:33.409800  543705 memory.go:184] no items to output this cycle
I0322 18:45:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 18:45:36.045673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:45:36.048283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:45:36.048289  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa3c0 0xc0001aa400]
I0322 18:45:39.757736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:45:39.757743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:45:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:45:43.410669  543705 memory.go:191] Add success.
I0322 18:45:43.409811  543705 cpu.go:282] Add success.
I0322 18:45:43.420380  543705 net.go:648] Add success.
I0322 18:45:43.423064  543705 net.go:770] primary dev: ETH0
I0322 18:45:43.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:45:43.423092  543705 net.go:698] Add success.
I0322 18:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:45:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:45:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:45:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:45:53.409768  543705 memory.go:184] no items to output this cycle
I0322 18:45:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:46:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:46:03.409776  543705 memory.go:184] no items to output this cycle
I0322 18:46:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 18:46:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:46:13.409790  543705 memory.go:191] Add success.
I0322 18:46:13.409803  543705 cpu.go:282] Add success.
W0322 18:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:46:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:46:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:46:13.420079  543705 net.go:648] Add success.
I0322 18:46:13.422558  543705 net.go:770] primary dev: ETH0
I0322 18:46:13.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:46:13.422584  543705 net.go:698] Add success.
I0322 18:46:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:46:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:46:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 18:46:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:46:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 18:46:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:46:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:46:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:46:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:46:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:46:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:46:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:46:23.409770  543705 memory.go:184] no items to output this cycle
I0322 18:46:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 18:46:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:46:33.409790  543705 memory.go:184] no items to output this cycle
I0322 18:46:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 18:46:36.049672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:46:36.052231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:46:36.052238  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482280 0xc0004822c0]
E0322 18:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:46:43.410692  543705 memory.go:191] Add success.
I0322 18:46:43.409830  543705 cpu.go:282] Add success.
I0322 18:46:43.420439  543705 net.go:648] Add success.
I0322 18:46:43.423132  543705 net.go:770] primary dev: ETH0
I0322 18:46:43.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:46:43.423163  543705 net.go:698] Add success.
I0322 18:46:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:46:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:46:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:46:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:46:53.409773  543705 memory.go:184] no items to output this cycle
I0322 18:46:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 18:47:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:47:03.409808  543705 memory.go:184] no items to output this cycle
I0322 18:47:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 18:47:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:47:13.409775  543705 memory.go:191] Add success.
W0322 18:47:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:47:13.409805  543705 cpu.go:282] Add success.
W0322 18:47:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:47:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:47:13.420050  543705 net.go:648] Add success.
I0322 18:47:13.422728  543705 net.go:770] primary dev: ETH0
I0322 18:47:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:47:13.422759  543705 net.go:698] Add success.
I0322 18:47:13.453323  543705 event_worker.go:152] Polling the log file for events...
W0322 18:47:14.455520  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:47:14.455693  543705 disk_worker.go:708] disk space is not compliant
W0322 18:47:14.455698  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:47:14.456512  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:47:14.456521  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:47:14.456527  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:47:14.457538  543705 disk_worker.go:494] system disk:vda1
I0322 18:47:14.457566  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:47:15.456805  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:47:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:47:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:47:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:47:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:47:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:47:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:47:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:47:23.409774  543705 memory.go:184] no items to output this cycle
I0322 18:47:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 18:47:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:47:33.409805  543705 memory.go:184] no items to output this cycle
I0322 18:47:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 18:47:36.053674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:47:36.056180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:47:36.056186  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483040 0xc000483080]
E0322 18:47:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:47:43.410736  543705 memory.go:191] Add success.
I0322 18:47:43.409807  543705 cpu.go:282] Add success.
I0322 18:47:43.420445  543705 net.go:648] Add success.
I0322 18:47:43.423589  543705 net.go:770] primary dev: ETH0
I0322 18:47:43.423604  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:47:43.423617  543705 net.go:698] Add success.
I0322 18:47:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:47:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:47:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:47:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:47:53.409777  543705 cpu.go:275] no items to output this cycle
I0322 18:47:53.409778  543705 memory.go:184] no items to output this cycle
E0322 18:48:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:48:03.409808  543705 memory.go:184] no items to output this cycle
I0322 18:48:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 18:48:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:48:13.409791  543705 memory.go:191] Add success.
I0322 18:48:13.409793  543705 cpu.go:282] Add success.
W0322 18:48:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:48:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:48:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:48:13.420050  543705 net.go:648] Add success.
I0322 18:48:13.422798  543705 net.go:770] primary dev: ETH0
I0322 18:48:13.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:48:13.422824  543705 net.go:698] Add success.
I0322 18:48:13.463630  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9267e54-8a4c-44bb-83de-ee086e04e517","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:48:13.463670  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:48:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:48:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:48:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 18:48:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:48:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 18:48:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:48:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:48:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:48:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:48:16.472434  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:48:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:48:23.409799  543705 memory.go:184] no items to output this cycle
I0322 18:48:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 18:48:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:48:33.409778  543705 memory.go:184] no items to output this cycle
I0322 18:48:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 18:48:36.057678  543705 disk_info.go:125] begin check local disk info of client
I0322 18:48:36.060273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:48:36.060279  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab240 0xc0001ab280]
I0322 18:48:39.759490  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:48:39.759495  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:48:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:48:43.410799  543705 memory.go:191] Add success.
I0322 18:48:43.409803  543705 cpu.go:282] Add success.
I0322 18:48:43.420485  543705 net.go:648] Add success.
I0322 18:48:43.423471  543705 net.go:770] primary dev: ETH0
I0322 18:48:43.423485  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:48:43.423504  543705 net.go:698] Add success.
I0322 18:48:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:48:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:48:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:48:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:48:53.409780  543705 memory.go:184] no items to output this cycle
I0322 18:48:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 18:49:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:49:03.409784  543705 memory.go:184] no items to output this cycle
I0322 18:49:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 18:49:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:49:13.409784  543705 memory.go:191] Add success.
W0322 18:49:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:49:13.409812  543705 cpu.go:282] Add success.
W0322 18:49:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:49:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:49:13.420205  543705 net.go:648] Add success.
I0322 18:49:13.422979  543705 net.go:770] primary dev: ETH0
I0322 18:49:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:49:13.423005  543705 net.go:698] Add success.
I0322 18:49:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:49:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:49:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 18:49:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:49:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 18:49:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:49:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:49:16.458014  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:49:16.458091  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:49:16.458117  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:49:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:49:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:49:23.409782  543705 memory.go:184] no items to output this cycle
I0322 18:49:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 18:49:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:49:33.409805  543705 memory.go:184] no items to output this cycle
I0322 18:49:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 18:49:36.061675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:49:36.064257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:49:36.064263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d40 0xc0000c5d80]
E0322 18:49:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:49:43.410681  543705 memory.go:191] Add success.
I0322 18:49:43.409817  543705 cpu.go:282] Add success.
I0322 18:49:43.420403  543705 net.go:648] Add success.
I0322 18:49:43.422809  543705 net.go:770] primary dev: ETH0
I0322 18:49:43.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:49:43.422835  543705 net.go:698] Add success.
I0322 18:49:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:49:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:49:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:49:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:49:53.409795  543705 memory.go:184] no items to output this cycle
I0322 18:49:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 18:50:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:50:03.409780  543705 memory.go:184] no items to output this cycle
I0322 18:50:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 18:50:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:50:13.409785  543705 memory.go:191] Add success.
W0322 18:50:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:50:13.409812  543705 cpu.go:282] Add success.
W0322 18:50:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:50:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:50:13.419918  543705 net.go:770] primary dev: ETH0
I0322 18:50:13.419933  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:50:13.419948  543705 net.go:698] Add success.
I0322 18:50:13.420305  543705 net.go:648] Add success.
I0322 18:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:50:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:50:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 18:50:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:50:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 18:50:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:50:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:50:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:50:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:50:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:50:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:50:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:50:23.409797  543705 memory.go:184] no items to output this cycle
I0322 18:50:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 18:50:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:50:33.409782  543705 memory.go:184] no items to output this cycle
I0322 18:50:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 18:50:36.065673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:50:36.068203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:50:36.068209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c47c0 0xc0000c4800]
E0322 18:50:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:50:43.410686  543705 memory.go:191] Add success.
I0322 18:50:43.409814  543705 cpu.go:282] Add success.
I0322 18:50:43.420410  543705 net.go:648] Add success.
I0322 18:50:43.423141  543705 net.go:770] primary dev: ETH0
I0322 18:50:43.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:50:43.423179  543705 net.go:698] Add success.
I0322 18:50:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:50:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:50:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:50:53.409796  543705 memory.go:184] no items to output this cycle
I0322 18:50:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:51:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:51:03.409773  543705 memory.go:184] no items to output this cycle
I0322 18:51:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 18:51:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:51:13.409818  543705 memory.go:191] Add success.
I0322 18:51:13.409820  543705 cpu.go:282] Add success.
W0322 18:51:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:51:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:51:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:51:13.420355  543705 net.go:648] Add success.
I0322 18:51:13.423258  543705 net.go:770] primary dev: ETH0
I0322 18:51:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:51:13.423284  543705 net.go:698] Add success.
I0322 18:51:13.467834  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"509bbe03-a2e9-4dd2-bcd0-c0c2525daffa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:51:13.467866  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:51:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:51:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0322 18:51:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:51:14.456682  543705 disk_worker.go:494] system disk:vda1
I0322 18:51:14.456711  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:51:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:51:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:51:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:51:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:51:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:51:23.409805  543705 memory.go:184] no items to output this cycle
I0322 18:51:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 18:51:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:51:33.409802  543705 memory.go:184] no items to output this cycle
I0322 18:51:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 18:51:36.069675  543705 disk_info.go:125] begin check local disk info of client
I0322 18:51:36.072250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:51:36.072256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc80 0xc0001abcc0]
I0322 18:51:39.760488  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:51:39.760494  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:51:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:51:43.410600  543705 memory.go:191] Add success.
I0322 18:51:43.409830  543705 cpu.go:282] Add success.
I0322 18:51:43.420311  543705 net.go:648] Add success.
I0322 18:51:43.422879  543705 net.go:770] primary dev: ETH0
I0322 18:51:43.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:51:43.422905  543705 net.go:698] Add success.
I0322 18:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:51:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:51:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:51:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:51:53.409798  543705 memory.go:184] no items to output this cycle
I0322 18:51:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 18:52:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:52:03.409782  543705 memory.go:184] no items to output this cycle
I0322 18:52:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 18:52:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:52:13.409795  543705 memory.go:191] Add success.
I0322 18:52:13.409795  543705 cpu.go:282] Add success.
W0322 18:52:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:52:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:52:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:52:13.420149  543705 net.go:648] Add success.
I0322 18:52:13.423007  543705 net.go:770] primary dev: ETH0
I0322 18:52:13.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:52:13.423038  543705 net.go:698] Add success.
W0322 18:52:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:52:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 18:52:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:52:14.456090  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:52:14.456099  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:52:14.456106  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:52:14.456447  543705 disk_worker.go:494] system disk:vda1
I0322 18:52:14.456477  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:52:15.456856  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:52:15.456865  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:52:16.458080  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:52:16.458080  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:52:16.458136  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:52:16.458155  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:52:16.472542  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:52:23.410344  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:52:23.410360  543705 memory.go:184] no items to output this cycle
I0322 18:52:23.410460  543705 cpu.go:275] no items to output this cycle
E0322 18:52:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:52:33.409806  543705 memory.go:184] no items to output this cycle
I0322 18:52:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 18:52:36.074950  543705 disk_info.go:125] begin check local disk info of client
I0322 18:52:36.077568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:52:36.077575  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc00 0xc0001abc40]
E0322 18:52:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:52:43.410693  543705 memory.go:191] Add success.
I0322 18:52:43.409811  543705 cpu.go:282] Add success.
I0322 18:52:43.420423  543705 net.go:648] Add success.
I0322 18:52:43.423018  543705 net.go:770] primary dev: ETH0
I0322 18:52:43.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:52:43.423045  543705 net.go:698] Add success.
I0322 18:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:52:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:52:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:52:53.410249  543705 memory.go:184] no items to output this cycle
I0322 18:52:53.410277  543705 cpu.go:275] no items to output this cycle
E0322 18:53:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:53:03.409768  543705 memory.go:184] no items to output this cycle
I0322 18:53:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 18:53:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:53:13.409779  543705 memory.go:191] Add success.
W0322 18:53:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:53:13.409808  543705 cpu.go:282] Add success.
W0322 18:53:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:53:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:53:13.420109  543705 net.go:648] Add success.
I0322 18:53:13.422729  543705 net.go:770] primary dev: ETH0
I0322 18:53:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:53:13.422758  543705 net.go:698] Add success.
I0322 18:53:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:53:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:53:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 18:53:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:53:14.456521  543705 disk_worker.go:494] system disk:vda1
I0322 18:53:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:53:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:53:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:53:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:53:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:53:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:53:23.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:53:23.409898  543705 cpu.go:275] no items to output this cycle
I0322 18:53:23.409903  543705 memory.go:184] no items to output this cycle
E0322 18:53:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:53:33.409776  543705 memory.go:184] no items to output this cycle
I0322 18:53:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 18:53:36.077676  543705 disk_info.go:125] begin check local disk info of client
I0322 18:53:36.080191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:53:36.080196  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483a00 0xc000483a40]
E0322 18:53:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:53:43.410600  543705 memory.go:191] Add success.
I0322 18:53:43.409833  543705 cpu.go:282] Add success.
I0322 18:53:43.420295  543705 net.go:648] Add success.
I0322 18:53:43.422841  543705 net.go:770] primary dev: ETH0
I0322 18:53:43.422854  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:53:43.422867  543705 net.go:698] Add success.
I0322 18:53:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:53:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:53:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:53:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:53:53.409762  543705 memory.go:184] no items to output this cycle
I0322 18:53:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:54:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:54:03.409803  543705 memory.go:184] no items to output this cycle
I0322 18:54:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 18:54:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:54:13.409790  543705 memory.go:191] Add success.
I0322 18:54:13.409812  543705 cpu.go:282] Add success.
W0322 18:54:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:54:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:54:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:54:13.420210  543705 net.go:648] Add success.
I0322 18:54:13.422850  543705 net.go:770] primary dev: ETH0
I0322 18:54:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:54:13.422875  543705 net.go:698] Add success.
I0322 18:54:13.468401  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"61677001-204b-474d-b017-d8db5f9a13e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:54:13.468437  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 18:54:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:54:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:54:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 18:54:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:54:14.456520  543705 disk_worker.go:494] system disk:vda1
I0322 18:54:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:54:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:54:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:54:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:54:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:54:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:54:23.409899  543705 memory.go:184] no items to output this cycle
I0322 18:54:23.409916  543705 cpu.go:275] no items to output this cycle
E0322 18:54:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:54:33.409781  543705 memory.go:184] no items to output this cycle
I0322 18:54:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 18:54:36.081673  543705 disk_info.go:125] begin check local disk info of client
I0322 18:54:36.084160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:54:36.084166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa4c0 0xc0001aa500]
I0322 18:54:39.761508  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:54:39.761514  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:54:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:54:43.410703  543705 memory.go:191] Add success.
I0322 18:54:43.409831  543705 cpu.go:282] Add success.
I0322 18:54:43.420414  543705 net.go:648] Add success.
I0322 18:54:43.423404  543705 net.go:770] primary dev: ETH0
I0322 18:54:43.423417  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:54:43.423430  543705 net.go:698] Add success.
I0322 18:54:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:54:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:54:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:54:53.410271  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:54:53.410288  543705 memory.go:184] no items to output this cycle
I0322 18:54:53.410304  543705 cpu.go:275] no items to output this cycle
E0322 18:55:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:55:03.409782  543705 memory.go:184] no items to output this cycle
I0322 18:55:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 18:55:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:55:13.409824  543705 memory.go:191] Add success.
I0322 18:55:13.409827  543705 cpu.go:282] Add success.
W0322 18:55:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:55:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:55:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:55:13.420118  543705 net.go:648] Add success.
I0322 18:55:13.422942  543705 net.go:770] primary dev: ETH0
I0322 18:55:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:55:13.422973  543705 net.go:698] Add success.
I0322 18:55:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:55:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:55:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 18:55:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:55:14.456570  543705 disk_worker.go:494] system disk:vda1
I0322 18:55:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:55:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:55:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:55:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:55:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:55:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:55:23.409778  543705 memory.go:184] no items to output this cycle
I0322 18:55:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 18:55:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:55:33.409791  543705 memory.go:184] no items to output this cycle
I0322 18:55:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 18:55:36.085674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:55:36.088184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:55:36.088190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482c80 0xc000482cc0]
E0322 18:55:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:55:43.410757  543705 memory.go:191] Add success.
I0322 18:55:43.409816  543705 cpu.go:282] Add success.
I0322 18:55:43.420468  543705 net.go:648] Add success.
I0322 18:55:43.423291  543705 net.go:770] primary dev: ETH0
I0322 18:55:43.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:55:43.423318  543705 net.go:698] Add success.
I0322 18:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:55:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:55:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:55:53.409791  543705 memory.go:184] no items to output this cycle
I0322 18:55:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 18:56:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:56:03.409793  543705 memory.go:184] no items to output this cycle
I0322 18:56:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 18:56:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:56:13.409805  543705 memory.go:191] Add success.
I0322 18:56:13.409812  543705 cpu.go:282] Add success.
W0322 18:56:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:56:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:56:13.420218  543705 net.go:648] Add success.
I0322 18:56:13.423022  543705 net.go:770] primary dev: ETH0
I0322 18:56:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:56:13.423050  543705 net.go:698] Add success.
I0322 18:56:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:56:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:56:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 18:56:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:56:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 18:56:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:56:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:56:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:56:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:56:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:56:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:56:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:56:23.409789  543705 memory.go:184] no items to output this cycle
I0322 18:56:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 18:56:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:56:33.409822  543705 memory.go:184] no items to output this cycle
I0322 18:56:33.409836  543705 cpu.go:275] no items to output this cycle
I0322 18:56:36.089672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:56:36.092287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:56:36.092293  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483480 0xc0004834c0]
E0322 18:56:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:56:43.410702  543705 memory.go:191] Add success.
I0322 18:56:43.409817  543705 cpu.go:282] Add success.
I0322 18:56:43.420419  543705 net.go:648] Add success.
I0322 18:56:43.423160  543705 net.go:770] primary dev: ETH0
I0322 18:56:43.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:56:43.423190  543705 net.go:698] Add success.
I0322 18:56:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:56:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:56:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:56:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:56:53.409775  543705 memory.go:184] no items to output this cycle
I0322 18:56:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 18:57:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:57:03.409776  543705 memory.go:184] no items to output this cycle
I0322 18:57:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 18:57:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:57:13.409779  543705 memory.go:191] Add success.
I0322 18:57:13.409802  543705 cpu.go:282] Add success.
W0322 18:57:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:57:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:57:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:57:13.420097  543705 net.go:648] Add success.
I0322 18:57:13.429262  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 18:57:13.429336  543705 net.go:770] primary dev: ETH0
I0322 18:57:13.429347  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:57:13.429358  543705 net.go:698] Add success.
I0322 18:57:13.452954  543705 event_worker.go:152] Polling the log file for events...
I0322 18:57:13.463632  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"36660dbf-b404-45fa-9b5c-75221199e944","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 18:57:13.463664  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 18:57:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:57:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 18:57:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0322 18:57:14.455775  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 18:57:14.455782  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 18:57:14.455786  543705 custom_config.go:64] query custom config with name: gpu
I0322 18:57:14.456948  543705 disk_worker.go:494] system disk:vda1
I0322 18:57:14.456995  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 18:57:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 18:57:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:57:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 18:57:16.457917  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 18:57:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:57:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:57:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:57:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:57:23.409802  543705 memory.go:184] no items to output this cycle
I0322 18:57:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 18:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:57:33.409799  543705 memory.go:184] no items to output this cycle
I0322 18:57:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 18:57:36.093674  543705 disk_info.go:125] begin check local disk info of client
I0322 18:57:36.096224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:57:36.096230  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aa40 0xc00007aac0]
I0322 18:57:39.761732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 18:57:39.761738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 18:57:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:57:43.410724  543705 memory.go:191] Add success.
I0322 18:57:43.409814  543705 cpu.go:282] Add success.
I0322 18:57:43.420460  543705 net.go:648] Add success.
I0322 18:57:43.423328  543705 net.go:770] primary dev: ETH0
I0322 18:57:43.423341  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:57:43.423354  543705 net.go:698] Add success.
I0322 18:57:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:57:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:57:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:57:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:57:53.409781  543705 memory.go:184] no items to output this cycle
I0322 18:57:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 18:58:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:58:03.409796  543705 memory.go:184] no items to output this cycle
I0322 18:58:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 18:58:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:58:13.409817  543705 memory.go:191] Add success.
I0322 18:58:13.409825  543705 cpu.go:282] Add success.
W0322 18:58:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 18:58:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:58:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:58:13.420172  543705 net.go:648] Add success.
I0322 18:58:13.422630  543705 net.go:770] primary dev: ETH0
I0322 18:58:13.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:58:13.422661  543705 net.go:698] Add success.
I0322 18:58:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:58:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:58:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 18:58:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:58:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 18:58:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:58:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:58:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:58:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:58:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:58:23.409781  543705 cpu.go:275] no items to output this cycle
I0322 18:58:23.409783  543705 memory.go:184] no items to output this cycle
E0322 18:58:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:58:33.409784  543705 memory.go:184] no items to output this cycle
I0322 18:58:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 18:58:36.097676  543705 disk_info.go:125] begin check local disk info of client
I0322 18:58:36.100207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:58:36.100212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d40 0xc0000c4d80]
E0322 18:58:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:58:43.410711  543705 memory.go:191] Add success.
I0322 18:58:43.409808  543705 cpu.go:282] Add success.
I0322 18:58:43.420401  543705 net.go:648] Add success.
I0322 18:58:43.423034  543705 net.go:770] primary dev: ETH0
I0322 18:58:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:58:43.423060  543705 net.go:698] Add success.
I0322 18:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:58:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:58:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:58:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:58:53.409773  543705 memory.go:184] no items to output this cycle
I0322 18:58:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 18:59:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:59:03.409774  543705 memory.go:184] no items to output this cycle
I0322 18:59:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 18:59:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:59:13.409781  543705 memory.go:191] Add success.
W0322 18:59:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 18:59:13.409807  543705 cpu.go:282] Add success.
W0322 18:59:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 18:59:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 18:59:13.420138  543705 net.go:648] Add success.
I0322 18:59:13.422831  543705 net.go:770] primary dev: ETH0
I0322 18:59:13.422847  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:59:13.422860  543705 net.go:698] Add success.
I0322 18:59:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 18:59:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 18:59:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 18:59:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 18:59:14.456564  543705 disk_worker.go:494] system disk:vda1
I0322 18:59:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 18:59:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 18:59:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:59:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:59:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 18:59:16.472511  543705 disk_local_worker.go:436] Get disk info: []
E0322 18:59:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:59:23.409802  543705 memory.go:184] no items to output this cycle
I0322 18:59:23.409811  543705 cpu.go:275] no items to output this cycle
I0322 18:59:33.409977  543705 cpu.go:275] no items to output this cycle
E0322 18:59:33.410094  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:59:33.410109  543705 memory.go:184] no items to output this cycle
I0322 18:59:36.101672  543705 disk_info.go:125] begin check local disk info of client
I0322 18:59:36.104197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 18:59:36.104203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004878c0 0xc000487900]
E0322 18:59:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:59:43.410657  543705 memory.go:191] Add success.
I0322 18:59:43.409842  543705 cpu.go:282] Add success.
I0322 18:59:43.420398  543705 net.go:648] Add success.
I0322 18:59:43.423121  543705 net.go:770] primary dev: ETH0
I0322 18:59:43.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0322 18:59:43.423147  543705 net.go:698] Add success.
I0322 18:59:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 18:59:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 18:59:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 18:59:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 18:59:53.409804  543705 memory.go:184] no items to output this cycle
I0322 18:59:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 19:00:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:00:03.409787  543705 cpu.go:275] no items to output this cycle
I0322 19:00:03.409790  543705 memory.go:184] no items to output this cycle
E0322 19:00:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:00:13.409786  543705 memory.go:191] Add success.
I0322 19:00:13.409791  543705 cpu.go:282] Add success.
W0322 19:00:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:00:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:00:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:00:13.420130  543705 net.go:648] Add success.
I0322 19:00:13.422759  543705 net.go:770] primary dev: ETH0
I0322 19:00:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:00:13.422784  543705 net.go:698] Add success.
I0322 19:00:13.463901  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3466a364-a094-4942-9d66-9041998796de","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:00:13.463935  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:00:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:00:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 19:00:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:00:14.456534  543705 disk_worker.go:494] system disk:vda1
I0322 19:00:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:00:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:00:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:00:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:00:23.409779  543705 memory.go:184] no items to output this cycle
I0322 19:00:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 19:00:33.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:00:33.409879  543705 memory.go:184] no items to output this cycle
I0322 19:00:33.409962  543705 cpu.go:275] no items to output this cycle
I0322 19:00:36.105673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:00:36.108216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:00:36.108222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007adc0 0xc00007ae00]
I0322 19:00:39.763514  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:00:39.763521  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:00:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:00:43.410790  543705 memory.go:191] Add success.
I0322 19:00:43.409818  543705 cpu.go:282] Add success.
I0322 19:00:43.420508  543705 net.go:648] Add success.
I0322 19:00:43.423383  543705 net.go:770] primary dev: ETH0
I0322 19:00:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:00:43.423409  543705 net.go:698] Add success.
I0322 19:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:00:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:00:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:00:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:00:53.409804  543705 memory.go:184] no items to output this cycle
I0322 19:00:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 19:01:03.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:01:03.409867  543705 cpu.go:275] no items to output this cycle
I0322 19:01:03.409883  543705 memory.go:184] no items to output this cycle
E0322 19:01:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:01:13.409791  543705 memory.go:191] Add success.
I0322 19:01:13.409796  543705 cpu.go:282] Add success.
W0322 19:01:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:01:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:01:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:01:13.420042  543705 net.go:648] Add success.
I0322 19:01:13.422901  543705 net.go:770] primary dev: ETH0
I0322 19:01:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:01:13.422930  543705 net.go:698] Add success.
I0322 19:01:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:01:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:01:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 19:01:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:01:14.456603  543705 disk_worker.go:494] system disk:vda1
I0322 19:01:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:01:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:01:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:01:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:01:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:01:23.409780  543705 memory.go:184] no items to output this cycle
I0322 19:01:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 19:01:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:01:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 19:01:33.409782  543705 memory.go:184] no items to output this cycle
I0322 19:01:36.109673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:01:36.112189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:01:36.112196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7c40 0xc0004a7c80]
E0322 19:01:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:01:43.410603  543705 memory.go:191] Add success.
I0322 19:01:43.409840  543705 cpu.go:282] Add success.
I0322 19:01:43.420337  543705 net.go:648] Add success.
I0322 19:01:43.423148  543705 net.go:770] primary dev: ETH0
I0322 19:01:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:01:43.423184  543705 net.go:698] Add success.
I0322 19:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:01:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:01:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:01:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:01:53.409783  543705 memory.go:184] no items to output this cycle
I0322 19:01:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 19:02:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:02:03.409797  543705 memory.go:184] no items to output this cycle
I0322 19:02:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 19:02:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:02:13.409774  543705 memory.go:191] Add success.
W0322 19:02:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:02:13.409799  543705 cpu.go:282] Add success.
W0322 19:02:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:02:13.409812  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:02:13.420072  543705 net.go:648] Add success.
I0322 19:02:13.422895  543705 net.go:770] primary dev: ETH0
I0322 19:02:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:02:13.422925  543705 net.go:698] Add success.
W0322 19:02:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:02:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 19:02:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:02:14.456943  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:02:14.456952  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:02:14.456959  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:02:14.457010  543705 disk_worker.go:494] system disk:vda1
I0322 19:02:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:02:15.456774  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:02:15.456782  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:02:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:02:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:02:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:02:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:02:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:02:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:02:23.409768  543705 memory.go:184] no items to output this cycle
I0322 19:02:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 19:02:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:02:33.409801  543705 memory.go:184] no items to output this cycle
I0322 19:02:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 19:02:36.113676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:02:36.116271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:02:36.116278  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a000 0xc00034a040]
E0322 19:02:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:02:43.410850  543705 memory.go:191] Add success.
I0322 19:02:43.409816  543705 cpu.go:282] Add success.
I0322 19:02:43.420595  543705 net.go:648] Add success.
I0322 19:02:43.423379  543705 net.go:770] primary dev: ETH0
I0322 19:02:43.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:02:43.423405  543705 net.go:698] Add success.
I0322 19:02:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:02:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:02:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:02:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:02:53.409788  543705 memory.go:184] no items to output this cycle
I0322 19:02:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 19:03:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:03:03.409798  543705 memory.go:184] no items to output this cycle
I0322 19:03:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 19:03:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:03:13.409811  543705 memory.go:191] Add success.
I0322 19:03:13.409815  543705 cpu.go:282] Add success.
W0322 19:03:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:03:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:03:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:03:13.420180  543705 net.go:648] Add success.
I0322 19:03:13.423367  543705 net.go:770] primary dev: ETH0
I0322 19:03:13.423382  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:03:13.423397  543705 net.go:698] Add success.
I0322 19:03:13.463220  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12602cca-3bd2-42c8-8932-fcfb0b358f46","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:03:13.463252  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:03:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:03:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:03:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 19:03:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:03:14.456786  543705 disk_worker.go:494] system disk:vda1
I0322 19:03:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:03:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:03:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:03:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:03:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:03:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:03:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 19:03:23.409787  543705 memory.go:184] no items to output this cycle
E0322 19:03:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:03:33.409795  543705 memory.go:184] no items to output this cycle
I0322 19:03:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 19:03:36.117682  543705 disk_info.go:125] begin check local disk info of client
I0322 19:03:36.120185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:03:36.120192  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a040 0xc00047a080]
I0322 19:03:39.764513  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:03:39.764518  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:03:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:03:43.410736  543705 memory.go:191] Add success.
I0322 19:03:43.409814  543705 cpu.go:282] Add success.
I0322 19:03:43.420483  543705 net.go:648] Add success.
I0322 19:03:43.423428  543705 net.go:770] primary dev: ETH0
I0322 19:03:43.423456  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:03:43.423469  543705 net.go:698] Add success.
I0322 19:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:03:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:03:53.409802  543705 memory.go:184] no items to output this cycle
I0322 19:03:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 19:04:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:04:03.409777  543705 memory.go:184] no items to output this cycle
I0322 19:04:03.409780  543705 cpu.go:275] no items to output this cycle
E0322 19:04:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:04:13.409808  543705 memory.go:191] Add success.
I0322 19:04:13.409816  543705 cpu.go:282] Add success.
W0322 19:04:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:04:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:04:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:04:13.420054  543705 net.go:648] Add success.
I0322 19:04:13.422680  543705 net.go:770] primary dev: ETH0
I0322 19:04:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:04:13.422713  543705 net.go:698] Add success.
I0322 19:04:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:04:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:04:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 19:04:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:04:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 19:04:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:04:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:04:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:04:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:04:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:04:23.409766  543705 memory.go:184] no items to output this cycle
I0322 19:04:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 19:04:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:04:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 19:04:33.409791  543705 memory.go:184] no items to output this cycle
I0322 19:04:36.121667  543705 disk_info.go:125] begin check local disk info of client
I0322 19:04:36.124202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:04:36.124209  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508ac0 0xc000508b00]
E0322 19:04:43.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:04:43.410694  543705 memory.go:191] Add success.
I0322 19:04:43.409934  543705 cpu.go:282] Add success.
I0322 19:04:43.419709  543705 net.go:648] Add success.
I0322 19:04:43.422507  543705 net.go:770] primary dev: ETH0
I0322 19:04:43.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:04:43.422533  543705 net.go:698] Add success.
I0322 19:04:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:04:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:04:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:04:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:04:53.409773  543705 memory.go:184] no items to output this cycle
I0322 19:04:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 19:05:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:05:03.409759  543705 memory.go:184] no items to output this cycle
I0322 19:05:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 19:05:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:05:13.409790  543705 memory.go:191] Add success.
I0322 19:05:13.409794  543705 cpu.go:282] Add success.
W0322 19:05:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:05:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:05:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:05:13.420055  543705 net.go:648] Add success.
I0322 19:05:13.422617  543705 net.go:770] primary dev: ETH0
I0322 19:05:13.422632  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:05:13.422646  543705 net.go:698] Add success.
I0322 19:05:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:05:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:05:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 19:05:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:05:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 19:05:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:05:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:05:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:05:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:05:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:05:23.409800  543705 memory.go:184] no items to output this cycle
I0322 19:05:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 19:05:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:05:33.409803  543705 memory.go:184] no items to output this cycle
I0322 19:05:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 19:05:36.125676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:05:36.128178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:05:36.128185  543705 disk_info.go:196] parse disk info done, disk is : [0xc000506200 0xc000506240]
E0322 19:05:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:05:43.410556  543705 memory.go:191] Add success.
I0322 19:05:43.409838  543705 cpu.go:282] Add success.
I0322 19:05:43.420258  543705 net.go:648] Add success.
I0322 19:05:43.422836  543705 net.go:770] primary dev: ETH0
I0322 19:05:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:05:43.422862  543705 net.go:698] Add success.
I0322 19:05:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:05:53.410211  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:05:53.410228  543705 memory.go:184] no items to output this cycle
I0322 19:05:53.410236  543705 cpu.go:275] no items to output this cycle
E0322 19:06:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:06:03.409774  543705 memory.go:184] no items to output this cycle
I0322 19:06:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:06:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:06:13.409808  543705 memory.go:191] Add success.
I0322 19:06:13.409815  543705 cpu.go:282] Add success.
W0322 19:06:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:06:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:06:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:06:13.420052  543705 net.go:648] Add success.
I0322 19:06:13.422998  543705 net.go:770] primary dev: ETH0
I0322 19:06:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:06:13.423022  543705 net.go:698] Add success.
I0322 19:06:13.658070  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a948097-64db-4fb8-9e3d-deb64daa2677","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:06:13.658105  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:06:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:06:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:06:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 19:06:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:06:14.456534  543705 disk_worker.go:494] system disk:vda1
I0322 19:06:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:06:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:06:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:06:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:06:23.409772  543705 memory.go:184] no items to output this cycle
I0322 19:06:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 19:06:33.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:06:33.409898  543705 memory.go:184] no items to output this cycle
I0322 19:06:33.409933  543705 cpu.go:275] no items to output this cycle
I0322 19:06:36.129671  543705 disk_info.go:125] begin check local disk info of client
I0322 19:06:36.132301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:06:36.132307  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7340 0xc0004a7380]
I0322 19:06:39.765512  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:06:39.765518  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:06:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:06:43.410703  543705 memory.go:191] Add success.
I0322 19:06:43.409837  543705 cpu.go:282] Add success.
I0322 19:06:43.420411  543705 net.go:648] Add success.
I0322 19:06:43.423352  543705 net.go:770] primary dev: ETH0
I0322 19:06:43.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:06:43.423382  543705 net.go:698] Add success.
I0322 19:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:06:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:06:53.409783  543705 memory.go:184] no items to output this cycle
I0322 19:06:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 19:07:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:07:03.409769  543705 memory.go:184] no items to output this cycle
I0322 19:07:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 19:07:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:07:13.409788  543705 memory.go:191] Add success.
I0322 19:07:13.409795  543705 cpu.go:282] Add success.
W0322 19:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:07:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:07:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:07:13.420041  543705 net.go:648] Add success.
I0322 19:07:13.422647  543705 net.go:770] primary dev: ETH0
I0322 19:07:13.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:07:13.422671  543705 net.go:698] Add success.
I0322 19:07:13.453226  543705 event_worker.go:152] Polling the log file for events...
W0322 19:07:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:07:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 19:07:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:07:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:07:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:07:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:07:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 19:07:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:07:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:07:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:07:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:07:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:07:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:07:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:07:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:07:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:07:23.409776  543705 memory.go:184] no items to output this cycle
I0322 19:07:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 19:07:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:07:33.409815  543705 memory.go:184] no items to output this cycle
I0322 19:07:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 19:07:36.133680  543705 disk_info.go:125] begin check local disk info of client
I0322 19:07:36.136243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:07:36.136250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5a00 0xc0004b5a40]
E0322 19:07:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:07:43.410730  543705 memory.go:191] Add success.
I0322 19:07:43.409842  543705 cpu.go:282] Add success.
I0322 19:07:43.420436  543705 net.go:648] Add success.
I0322 19:07:43.423079  543705 net.go:770] primary dev: ETH0
I0322 19:07:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:07:43.423104  543705 net.go:698] Add success.
I0322 19:07:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:07:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:07:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:07:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:07:53.409789  543705 memory.go:184] no items to output this cycle
I0322 19:07:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:08:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:08:03.409809  543705 memory.go:184] no items to output this cycle
I0322 19:08:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 19:08:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:08:13.409782  543705 memory.go:191] Add success.
W0322 19:08:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:08:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:08:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:08:13.409841  543705 cpu.go:282] Add success.
I0322 19:08:13.420045  543705 net.go:648] Add success.
I0322 19:08:13.422732  543705 net.go:770] primary dev: ETH0
I0322 19:08:13.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:08:13.422763  543705 net.go:698] Add success.
I0322 19:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:08:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:08:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 19:08:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:08:14.456495  543705 disk_worker.go:494] system disk:vda1
I0322 19:08:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:08:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:08:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:08:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:08:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:08:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:08:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 19:08:23.409792  543705 memory.go:184] no items to output this cycle
E0322 19:08:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:08:33.409795  543705 memory.go:184] no items to output this cycle
I0322 19:08:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 19:08:36.137677  543705 disk_info.go:125] begin check local disk info of client
I0322 19:08:36.140236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:08:36.140243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001dc000 0xc0001dc040]
E0322 19:08:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:08:43.410706  543705 memory.go:191] Add success.
I0322 19:08:43.409837  543705 cpu.go:282] Add success.
I0322 19:08:43.420491  543705 net.go:648] Add success.
I0322 19:08:43.423311  543705 net.go:770] primary dev: ETH0
I0322 19:08:43.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:08:43.423337  543705 net.go:698] Add success.
I0322 19:08:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:08:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:08:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:08:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:08:53.409792  543705 memory.go:184] no items to output this cycle
I0322 19:08:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 19:09:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:09:03.409781  543705 memory.go:184] no items to output this cycle
I0322 19:09:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 19:09:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:09:13.409827  543705 memory.go:191] Add success.
I0322 19:09:13.409832  543705 cpu.go:282] Add success.
W0322 19:09:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:09:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:09:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:09:13.420193  543705 net.go:648] Add success.
I0322 19:09:13.423409  543705 net.go:770] primary dev: ETH0
I0322 19:09:13.423423  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:09:13.423434  543705 net.go:698] Add success.
I0322 19:09:13.493664  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"438230bf-5ec9-438b-9fab-f7b343d66bd1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:09:13.493697  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:09:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:09:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:09:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 19:09:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:09:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 19:09:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:09:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:09:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:09:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:09:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:09:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:09:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:09:23.409813  543705 memory.go:184] no items to output this cycle
I0322 19:09:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 19:09:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:09:33.409775  543705 memory.go:184] no items to output this cycle
I0322 19:09:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 19:09:36.141675  543705 disk_info.go:125] begin check local disk info of client
I0322 19:09:36.144185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:09:36.144191  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051fbc0 0xc00051fc00]
I0322 19:09:39.765737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:09:39.765743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:09:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:09:43.410661  543705 memory.go:191] Add success.
I0322 19:09:43.409819  543705 cpu.go:282] Add success.
I0322 19:09:43.420385  543705 net.go:648] Add success.
I0322 19:09:43.423006  543705 net.go:770] primary dev: ETH0
I0322 19:09:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:09:43.423032  543705 net.go:698] Add success.
I0322 19:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:09:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:09:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:09:53.409775  543705 memory.go:184] no items to output this cycle
I0322 19:09:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 19:10:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:10:03.409773  543705 memory.go:184] no items to output this cycle
I0322 19:10:03.409779  543705 cpu.go:275] no items to output this cycle
E0322 19:10:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:10:13.409808  543705 memory.go:191] Add success.
I0322 19:10:13.409822  543705 cpu.go:282] Add success.
W0322 19:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:10:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:10:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:10:13.420059  543705 net.go:648] Add success.
I0322 19:10:13.422566  543705 net.go:770] primary dev: ETH0
I0322 19:10:13.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:10:13.422590  543705 net.go:698] Add success.
I0322 19:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:10:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:10:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 19:10:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:10:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 19:10:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:10:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:10:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:10:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:10:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:10:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:10:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:10:23.409763  543705 memory.go:184] no items to output this cycle
I0322 19:10:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:10:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:10:33.409813  543705 memory.go:184] no items to output this cycle
I0322 19:10:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 19:10:36.145676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:10:36.148283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:10:36.148289  543705 disk_info.go:196] parse disk info done, disk is : [0xc000232e80 0xc000232ec0]
E0322 19:10:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:10:43.410671  543705 memory.go:191] Add success.
I0322 19:10:43.409818  543705 cpu.go:282] Add success.
I0322 19:10:43.420413  543705 net.go:648] Add success.
I0322 19:10:43.423274  543705 net.go:770] primary dev: ETH0
I0322 19:10:43.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:10:43.423306  543705 net.go:698] Add success.
I0322 19:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:10:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:10:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:10:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:10:53.409764  543705 memory.go:184] no items to output this cycle
I0322 19:10:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:11:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:11:03.409787  543705 memory.go:184] no items to output this cycle
I0322 19:11:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 19:11:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:11:13.409788  543705 memory.go:191] Add success.
I0322 19:11:13.409804  543705 cpu.go:282] Add success.
W0322 19:11:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:11:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:11:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:11:13.420247  543705 net.go:648] Add success.
I0322 19:11:13.423172  543705 net.go:770] primary dev: ETH0
I0322 19:11:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:11:13.423201  543705 net.go:698] Add success.
I0322 19:11:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:11:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:11:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 19:11:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:11:14.456505  543705 disk_worker.go:494] system disk:vda1
I0322 19:11:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:11:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:11:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:11:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:11:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:11:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:11:23.409788  543705 memory.go:184] no items to output this cycle
I0322 19:11:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:11:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:11:33.409806  543705 memory.go:184] no items to output this cycle
I0322 19:11:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 19:11:36.149671  543705 disk_info.go:125] begin check local disk info of client
I0322 19:11:36.152216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:11:36.152222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0322 19:11:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:11:43.410628  543705 memory.go:191] Add success.
I0322 19:11:43.409815  543705 cpu.go:282] Add success.
I0322 19:11:43.420350  543705 net.go:648] Add success.
I0322 19:11:43.423100  543705 net.go:770] primary dev: ETH0
I0322 19:11:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:11:43.423126  543705 net.go:698] Add success.
I0322 19:11:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:11:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:11:53.410405  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:11:53.410419  543705 memory.go:184] no items to output this cycle
I0322 19:11:53.410425  543705 cpu.go:275] no items to output this cycle
E0322 19:12:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:12:03.409771  543705 memory.go:184] no items to output this cycle
I0322 19:12:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 19:12:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:12:13.409805  543705 memory.go:191] Add success.
I0322 19:12:13.409811  543705 cpu.go:282] Add success.
W0322 19:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:12:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:12:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:12:13.420056  543705 net.go:648] Add success.
I0322 19:12:13.423062  543705 net.go:770] primary dev: ETH0
I0322 19:12:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:12:13.423091  543705 net.go:698] Add success.
I0322 19:12:13.469313  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0c831f48-84db-42d9-91b5-dad617ea38d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:12:13.469349  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 19:12:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:12:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 19:12:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:12:14.455894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:12:14.455903  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:12:14.455908  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:12:14.456637  543705 disk_worker.go:494] system disk:vda1
I0322 19:12:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:12:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:12:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:12:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:12:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:12:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:12:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:12:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:12:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:12:23.409772  543705 cpu.go:275] no items to output this cycle
I0322 19:12:23.409780  543705 memory.go:184] no items to output this cycle
E0322 19:12:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:12:33.409791  543705 memory.go:184] no items to output this cycle
I0322 19:12:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 19:12:36.153675  543705 disk_info.go:125] begin check local disk info of client
I0322 19:12:36.156219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:12:36.156225  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508fc0 0xc000509000]
I0322 19:12:39.767535  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:12:39.767541  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:12:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:12:43.410750  543705 memory.go:191] Add success.
I0322 19:12:43.409811  543705 cpu.go:282] Add success.
I0322 19:12:43.420498  543705 net.go:648] Add success.
I0322 19:12:43.423372  543705 net.go:770] primary dev: ETH0
I0322 19:12:43.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:12:43.423399  543705 net.go:698] Add success.
I0322 19:12:46.457664  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:12:46.457736  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:12:46.457763  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:12:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:12:53.409773  543705 memory.go:184] no items to output this cycle
I0322 19:12:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 19:13:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:13:03.409779  543705 cpu.go:275] no items to output this cycle
I0322 19:13:03.409782  543705 memory.go:184] no items to output this cycle
E0322 19:13:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:13:13.409795  543705 memory.go:191] Add success.
I0322 19:13:13.409795  543705 cpu.go:282] Add success.
W0322 19:13:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:13:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:13:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:13:13.420110  543705 net.go:648] Add success.
I0322 19:13:13.422804  543705 net.go:770] primary dev: ETH0
I0322 19:13:13.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:13:13.422829  543705 net.go:698] Add success.
I0322 19:13:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:13:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:13:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 19:13:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:13:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 19:13:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:13:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:13:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:13:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:13:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:13:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:13:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:13:23.409790  543705 cpu.go:275] no items to output this cycle
I0322 19:13:23.409797  543705 memory.go:184] no items to output this cycle
E0322 19:13:33.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:13:33.409895  543705 memory.go:184] no items to output this cycle
I0322 19:13:33.409950  543705 cpu.go:275] no items to output this cycle
I0322 19:13:36.157678  543705 disk_info.go:125] begin check local disk info of client
I0322 19:13:36.160177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:13:36.160184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 19:13:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:13:43.410623  543705 memory.go:191] Add success.
I0322 19:13:43.409831  543705 cpu.go:282] Add success.
I0322 19:13:43.420366  543705 net.go:648] Add success.
I0322 19:13:43.422925  543705 net.go:770] primary dev: ETH0
I0322 19:13:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:13:43.422950  543705 net.go:698] Add success.
I0322 19:13:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:13:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:13:53.409763  543705 memory.go:184] no items to output this cycle
I0322 19:13:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 19:14:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:14:03.409770  543705 memory.go:184] no items to output this cycle
I0322 19:14:03.409778  543705 cpu.go:275] no items to output this cycle
W0322 19:14:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:14:13.409723  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:14:13.409729  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:14:13.409791  543705 cpu.go:282] Add success.
E0322 19:14:13.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:14:13.409853  543705 memory.go:191] Add success.
I0322 19:14:13.420096  543705 net.go:648] Add success.
I0322 19:14:13.422971  543705 net.go:770] primary dev: ETH0
I0322 19:14:13.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:14:13.422998  543705 net.go:698] Add success.
I0322 19:14:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:14:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:14:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 19:14:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:14:14.456505  543705 disk_worker.go:494] system disk:vda1
I0322 19:14:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:14:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:14:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:14:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:14:23.409763  543705 memory.go:184] no items to output this cycle
I0322 19:14:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 19:14:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:14:33.409780  543705 memory.go:184] no items to output this cycle
I0322 19:14:33.409809  543705 cpu.go:275] no items to output this cycle
I0322 19:14:36.161677  543705 disk_info.go:125] begin check local disk info of client
I0322 19:14:36.164195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:14:36.164201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b840 0xc00047b880]
E0322 19:14:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:14:43.410636  543705 memory.go:191] Add success.
I0322 19:14:43.409811  543705 cpu.go:282] Add success.
I0322 19:14:43.420332  543705 net.go:648] Add success.
I0322 19:14:43.422995  543705 net.go:770] primary dev: ETH0
I0322 19:14:43.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:14:43.423026  543705 net.go:698] Add success.
I0322 19:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:14:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:14:53.409797  543705 memory.go:184] no items to output this cycle
I0322 19:14:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 19:15:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:15:03.409769  543705 memory.go:184] no items to output this cycle
I0322 19:15:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 19:15:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:15:13.409814  543705 memory.go:191] Add success.
I0322 19:15:13.409816  543705 cpu.go:282] Add success.
W0322 19:15:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:15:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:15:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:15:13.420058  543705 net.go:648] Add success.
I0322 19:15:13.422530  543705 net.go:770] primary dev: ETH0
I0322 19:15:13.422543  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:15:13.422558  543705 net.go:698] Add success.
I0322 19:15:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:15:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:15:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 19:15:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:15:14.456494  543705 disk_worker.go:494] system disk:vda1
I0322 19:15:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:15:14.710254  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d02d2237-6b9c-4f6d-be68-dcb882092c17","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:15:14.710289  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:15:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:15:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:15:23.409804  543705 memory.go:184] no items to output this cycle
I0322 19:15:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 19:15:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:15:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 19:15:33.409792  543705 memory.go:184] no items to output this cycle
I0322 19:15:36.165730  543705 disk_info.go:125] begin check local disk info of client
I0322 19:15:36.168176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:15:36.168183  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387d40 0xc000387d80]
I0322 19:15:39.768551  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:15:39.768557  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:15:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:15:43.410732  543705 memory.go:191] Add success.
I0322 19:15:43.409838  543705 cpu.go:282] Add success.
I0322 19:15:43.420405  543705 net.go:648] Add success.
I0322 19:15:43.423149  543705 net.go:770] primary dev: ETH0
I0322 19:15:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:15:43.423175  543705 net.go:698] Add success.
I0322 19:15:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:15:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:15:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:15:53.409766  543705 memory.go:184] no items to output this cycle
I0322 19:15:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 19:16:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:16:03.409781  543705 memory.go:184] no items to output this cycle
I0322 19:16:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 19:16:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:16:13.409807  543705 memory.go:191] Add success.
I0322 19:16:13.409820  543705 cpu.go:282] Add success.
W0322 19:16:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:16:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:16:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:16:13.420217  543705 net.go:648] Add success.
I0322 19:16:13.422928  543705 net.go:770] primary dev: ETH0
I0322 19:16:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:16:13.422952  543705 net.go:698] Add success.
I0322 19:16:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:16:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:16:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 19:16:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:16:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 19:16:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:16:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:16:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:16:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:16:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:16:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:16:23.409772  543705 memory.go:184] no items to output this cycle
I0322 19:16:23.409777  543705 cpu.go:275] no items to output this cycle
E0322 19:16:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:16:33.409814  543705 memory.go:184] no items to output this cycle
I0322 19:16:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 19:16:36.169676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:16:36.172192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:16:36.172199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a55c0 0xc0002a5600]
E0322 19:16:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:16:43.410712  543705 memory.go:191] Add success.
I0322 19:16:43.409821  543705 cpu.go:282] Add success.
I0322 19:16:43.420434  543705 net.go:648] Add success.
I0322 19:16:43.423211  543705 net.go:770] primary dev: ETH0
I0322 19:16:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:16:43.423241  543705 net.go:698] Add success.
I0322 19:16:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:16:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:16:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:16:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:16:53.409776  543705 memory.go:184] no items to output this cycle
I0322 19:16:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 19:17:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:17:03.409800  543705 memory.go:184] no items to output this cycle
I0322 19:17:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 19:17:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:17:13.409773  543705 memory.go:191] Add success.
W0322 19:17:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:17:13.409807  543705 cpu.go:282] Add success.
W0322 19:17:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:17:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:17:13.420047  543705 net.go:648] Add success.
I0322 19:17:13.422671  543705 net.go:770] primary dev: ETH0
I0322 19:17:13.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:17:13.422698  543705 net.go:698] Add success.
I0322 19:17:13.453369  543705 event_worker.go:152] Polling the log file for events...
W0322 19:17:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:17:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 19:17:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:17:14.456793  543705 disk_worker.go:494] system disk:vda1
I0322 19:17:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:17:14.457102  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:17:14.457110  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:17:14.457114  543705 custom_config.go:64] query custom config with name: gpu
E0322 19:17:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:17:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:17:16.457913  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:17:16.457913  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:17:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:17:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:17:16.472307  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:17:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:17:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 19:17:23.409794  543705 memory.go:184] no items to output this cycle
E0322 19:17:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:17:33.409791  543705 memory.go:184] no items to output this cycle
I0322 19:17:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 19:17:36.173679  543705 disk_info.go:125] begin check local disk info of client
I0322 19:17:36.176151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:17:36.176157  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b8480 0xc0002b84c0]
E0322 19:17:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:17:43.410836  543705 memory.go:191] Add success.
I0322 19:17:43.409834  543705 cpu.go:282] Add success.
I0322 19:17:43.420547  543705 net.go:648] Add success.
I0322 19:17:43.423625  543705 net.go:770] primary dev: ETH0
I0322 19:17:43.423650  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:17:43.423663  543705 net.go:698] Add success.
I0322 19:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:17:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:17:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:17:53.409787  543705 memory.go:184] no items to output this cycle
I0322 19:17:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 19:18:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:18:03.409782  543705 memory.go:184] no items to output this cycle
I0322 19:18:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 19:18:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:18:13.409781  543705 memory.go:191] Add success.
W0322 19:18:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:18:13.409811  543705 cpu.go:282] Add success.
W0322 19:18:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:18:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:18:13.420238  543705 net.go:648] Add success.
I0322 19:18:13.422806  543705 net.go:770] primary dev: ETH0
I0322 19:18:13.422819  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:18:13.422831  543705 net.go:698] Add success.
I0322 19:18:13.463559  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3cdfad35-9627-4cc7-a88e-1b4c2f5864c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:18:13.463599  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:18:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:18:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:18:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 19:18:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:18:14.456690  543705 disk_worker.go:494] system disk:vda1
I0322 19:18:14.456726  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:18:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:18:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:18:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:18:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:18:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:18:23.409777  543705 memory.go:184] no items to output this cycle
I0322 19:18:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:18:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:18:33.409798  543705 memory.go:184] no items to output this cycle
I0322 19:18:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 19:18:36.177680  543705 disk_info.go:125] begin check local disk info of client
I0322 19:18:36.180184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:18:36.180191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515880 0xc0005158c0]
I0322 19:18:39.769529  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:18:39.769535  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:18:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:18:43.410664  543705 memory.go:191] Add success.
I0322 19:18:43.409842  543705 cpu.go:282] Add success.
I0322 19:18:43.420389  543705 net.go:648] Add success.
I0322 19:18:43.423221  543705 net.go:770] primary dev: ETH0
I0322 19:18:43.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:18:43.423248  543705 net.go:698] Add success.
I0322 19:18:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:18:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:18:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:18:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:18:53.409777  543705 memory.go:184] no items to output this cycle
I0322 19:18:53.409778  543705 cpu.go:275] no items to output this cycle
E0322 19:19:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:19:03.409813  543705 memory.go:184] no items to output this cycle
I0322 19:19:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 19:19:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:19:13.409780  543705 memory.go:191] Add success.
I0322 19:19:13.409795  543705 cpu.go:282] Add success.
W0322 19:19:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:19:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:19:13.420072  543705 net.go:648] Add success.
I0322 19:19:13.423220  543705 net.go:770] primary dev: ETH0
I0322 19:19:13.423235  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:19:13.423248  543705 net.go:698] Add success.
I0322 19:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:19:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:19:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 19:19:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:19:14.456607  543705 disk_worker.go:494] system disk:vda1
I0322 19:19:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:19:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:19:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:19:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:19:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:19:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:19:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:19:23.409801  543705 memory.go:184] no items to output this cycle
I0322 19:19:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 19:19:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:19:33.409891  543705 cpu.go:275] no items to output this cycle
I0322 19:19:33.409955  543705 memory.go:184] no items to output this cycle
I0322 19:19:36.181675  543705 disk_info.go:125] begin check local disk info of client
I0322 19:19:36.184241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:19:36.184247  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474100 0xc000474140]
E0322 19:19:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:19:43.410885  543705 memory.go:191] Add success.
I0322 19:19:43.409828  543705 cpu.go:282] Add success.
I0322 19:19:43.420644  543705 net.go:648] Add success.
I0322 19:19:43.423507  543705 net.go:770] primary dev: ETH0
I0322 19:19:43.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:19:43.423533  543705 net.go:698] Add success.
I0322 19:19:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:19:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:19:53.410454  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:19:53.410471  543705 memory.go:184] no items to output this cycle
I0322 19:19:53.410507  543705 cpu.go:275] no items to output this cycle
E0322 19:20:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:20:03.409782  543705 memory.go:184] no items to output this cycle
I0322 19:20:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 19:20:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:20:13.409790  543705 memory.go:191] Add success.
I0322 19:20:13.409794  543705 cpu.go:282] Add success.
W0322 19:20:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:20:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:20:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:20:13.420049  543705 net.go:648] Add success.
I0322 19:20:13.423055  543705 net.go:770] primary dev: ETH0
I0322 19:20:13.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:20:13.423082  543705 net.go:698] Add success.
I0322 19:20:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:20:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:20:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 19:20:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:20:14.456506  543705 disk_worker.go:494] system disk:vda1
I0322 19:20:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:20:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:20:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:20:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:20:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:20:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:20:23.409773  543705 memory.go:184] no items to output this cycle
I0322 19:20:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 19:20:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:20:33.409824  543705 memory.go:184] no items to output this cycle
I0322 19:20:33.409835  543705 cpu.go:275] no items to output this cycle
I0322 19:20:36.185674  543705 disk_info.go:125] begin check local disk info of client
I0322 19:20:36.188248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:20:36.188256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005974c0 0xc000597500]
E0322 19:20:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:20:43.410755  543705 memory.go:191] Add success.
I0322 19:20:43.409882  543705 cpu.go:282] Add success.
I0322 19:20:43.420501  543705 net.go:648] Add success.
I0322 19:20:43.423176  543705 net.go:770] primary dev: ETH0
I0322 19:20:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:20:43.423203  543705 net.go:698] Add success.
I0322 19:20:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:20:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:20:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:20:53.409802  543705 memory.go:184] no items to output this cycle
I0322 19:20:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 19:21:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:21:03.409773  543705 memory.go:184] no items to output this cycle
I0322 19:21:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:21:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:21:13.409790  543705 memory.go:191] Add success.
I0322 19:21:13.409793  543705 cpu.go:282] Add success.
W0322 19:21:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:21:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:21:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:21:13.420072  543705 net.go:648] Add success.
I0322 19:21:13.423054  543705 net.go:770] primary dev: ETH0
I0322 19:21:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:21:13.423080  543705 net.go:698] Add success.
I0322 19:21:13.464432  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"913231c1-7729-469c-88b8-97ce77ccbd31","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:21:13.464463  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:21:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:21:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:21:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 19:21:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:21:14.456762  543705 disk_worker.go:494] system disk:vda1
I0322 19:21:14.456791  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:21:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:21:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:21:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:21:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:21:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:21:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:21:23.409803  543705 memory.go:184] no items to output this cycle
I0322 19:21:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 19:21:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:21:33.409775  543705 memory.go:184] no items to output this cycle
I0322 19:21:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 19:21:36.189674  543705 disk_info.go:125] begin check local disk info of client
I0322 19:21:36.192201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:21:36.192208  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326000 0xc000326040]
I0322 19:21:39.769735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:21:39.769741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:21:43.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:21:43.410774  543705 memory.go:191] Add success.
I0322 19:21:43.409885  543705 cpu.go:282] Add success.
I0322 19:21:43.420592  543705 net.go:648] Add success.
I0322 19:21:43.423442  543705 net.go:770] primary dev: ETH0
I0322 19:21:43.423457  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:21:43.423472  543705 net.go:698] Add success.
I0322 19:21:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:21:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:21:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:21:53.409779  543705 cpu.go:275] no items to output this cycle
I0322 19:21:53.409781  543705 memory.go:184] no items to output this cycle
E0322 19:22:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:22:03.409761  543705 memory.go:184] no items to output this cycle
I0322 19:22:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 19:22:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:22:13.409808  543705 memory.go:191] Add success.
I0322 19:22:13.409810  543705 cpu.go:282] Add success.
W0322 19:22:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:22:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:22:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:22:13.420124  543705 net.go:648] Add success.
I0322 19:22:13.422904  543705 net.go:770] primary dev: ETH0
I0322 19:22:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:22:13.422928  543705 net.go:698] Add success.
W0322 19:22:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:22:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 19:22:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:22:14.456916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:22:14.456925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:22:14.456931  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:22:14.456980  543705 disk_worker.go:494] system disk:vda1
I0322 19:22:14.457022  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:22:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:22:15.456859  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:22:16.457918  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:22:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:22:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:22:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:22:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:22:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:22:23.409766  543705 memory.go:184] no items to output this cycle
I0322 19:22:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 19:22:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:22:33.409772  543705 memory.go:184] no items to output this cycle
I0322 19:22:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 19:22:36.193671  543705 disk_info.go:125] begin check local disk info of client
I0322 19:22:36.196105  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:22:36.196115  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051ef80 0xc00051efc0]
E0322 19:22:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:22:43.411034  543705 memory.go:191] Add success.
I0322 19:22:43.410057  543705 cpu.go:282] Add success.
I0322 19:22:43.419750  543705 net.go:648] Add success.
I0322 19:22:43.422622  543705 net.go:770] primary dev: ETH0
I0322 19:22:43.422641  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:22:43.422656  543705 net.go:698] Add success.
I0322 19:22:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:22:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:22:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:22:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:22:53.409778  543705 memory.go:184] no items to output this cycle
I0322 19:22:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 19:23:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:23:03.409780  543705 memory.go:184] no items to output this cycle
I0322 19:23:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 19:23:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:23:13.409786  543705 memory.go:191] Add success.
I0322 19:23:13.409787  543705 cpu.go:282] Add success.
W0322 19:23:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:23:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:23:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:23:13.420258  543705 net.go:648] Add success.
I0322 19:23:13.422813  543705 net.go:770] primary dev: ETH0
I0322 19:23:13.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:23:13.422839  543705 net.go:698] Add success.
I0322 19:23:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:23:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:23:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 19:23:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:23:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 19:23:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:23:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:23:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:23:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:23:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:23:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:23:23.409776  543705 memory.go:184] no items to output this cycle
I0322 19:23:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 19:23:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:23:33.409802  543705 memory.go:184] no items to output this cycle
I0322 19:23:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 19:23:36.197673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:23:36.200237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:23:36.200244  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004818c0 0xc000481900]
E0322 19:23:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:23:43.409841  543705 memory.go:191] Add success.
I0322 19:23:43.409814  543705 cpu.go:282] Add success.
I0322 19:23:43.420374  543705 net.go:648] Add success.
I0322 19:23:43.421446  543705 net.go:770] primary dev: ETH0
I0322 19:23:43.421459  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:23:43.421471  543705 net.go:698] Add success.
I0322 19:23:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:23:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:23:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:23:53.410466  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:23:53.410482  543705 memory.go:184] no items to output this cycle
I0322 19:23:53.410481  543705 cpu.go:275] no items to output this cycle
E0322 19:24:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:24:03.409777  543705 memory.go:184] no items to output this cycle
I0322 19:24:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 19:24:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:24:13.409777  543705 memory.go:191] Add success.
I0322 19:24:13.409800  543705 cpu.go:282] Add success.
W0322 19:24:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:24:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:24:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:24:13.420050  543705 net.go:648] Add success.
I0322 19:24:13.422486  543705 net.go:770] primary dev: ETH0
I0322 19:24:13.422500  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:24:13.422512  543705 net.go:698] Add success.
I0322 19:24:13.469663  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc6f39e9-5781-46bf-9268-76d64e7ae622","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:24:13.469698  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:24:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:24:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:24:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 19:24:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:24:14.456696  543705 disk_worker.go:494] system disk:vda1
I0322 19:24:14.456732  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:24:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:24:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:24:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:24:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:24:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:24:23.409774  543705 memory.go:184] no items to output this cycle
I0322 19:24:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 19:24:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:24:33.409786  543705 memory.go:184] no items to output this cycle
I0322 19:24:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 19:24:36.201674  543705 disk_info.go:125] begin check local disk info of client
I0322 19:24:36.203992  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:24:36.204001  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3f40 0xc000460000]
I0322 19:24:39.771546  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:24:39.771552  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:24:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:24:43.409791  543705 memory.go:191] Add success.
I0322 19:24:43.409862  543705 cpu.go:282] Add success.
I0322 19:24:43.420211  543705 net.go:648] Add success.
I0322 19:24:43.421119  543705 net.go:770] primary dev: ETH0
I0322 19:24:43.421134  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:24:43.421145  543705 net.go:698] Add success.
I0322 19:24:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:24:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:24:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:24:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:24:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 19:24:53.409789  543705 memory.go:184] no items to output this cycle
E0322 19:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:25:03.409799  543705 memory.go:184] no items to output this cycle
I0322 19:25:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 19:25:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:25:13.409791  543705 memory.go:191] Add success.
I0322 19:25:13.409793  543705 cpu.go:282] Add success.
W0322 19:25:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:25:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:25:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:25:13.420052  543705 net.go:648] Add success.
I0322 19:25:13.422745  543705 net.go:770] primary dev: ETH0
I0322 19:25:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:25:13.422770  543705 net.go:698] Add success.
I0322 19:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:25:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:25:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 19:25:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:25:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 19:25:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:25:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:25:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:25:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:25:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:25:23.409783  543705 memory.go:184] no items to output this cycle
I0322 19:25:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 19:25:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:25:33.409775  543705 memory.go:184] no items to output this cycle
I0322 19:25:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 19:25:36.205676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:25:36.208212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:25:36.208218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d84c0 0xc0003d8500]
E0322 19:25:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:25:43.410548  543705 memory.go:191] Add success.
I0322 19:25:43.409814  543705 cpu.go:282] Add success.
I0322 19:25:43.420470  543705 net.go:648] Add success.
I0322 19:25:43.423057  543705 net.go:770] primary dev: ETH0
I0322 19:25:43.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:25:43.423110  543705 net.go:698] Add success.
I0322 19:25:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:25:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:25:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:25:53.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:25:53.409872  543705 memory.go:184] no items to output this cycle
I0322 19:25:53.409936  543705 cpu.go:275] no items to output this cycle
E0322 19:26:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:26:03.409783  543705 memory.go:184] no items to output this cycle
I0322 19:26:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 19:26:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:26:13.409814  543705 memory.go:191] Add success.
I0322 19:26:13.409823  543705 cpu.go:282] Add success.
W0322 19:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:26:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:26:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:26:13.420132  543705 net.go:648] Add success.
I0322 19:26:13.422978  543705 net.go:770] primary dev: ETH0
I0322 19:26:13.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:26:13.423003  543705 net.go:698] Add success.
I0322 19:26:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:26:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:26:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 19:26:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:26:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 19:26:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:26:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:26:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:26:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:26:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:26:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:26:23.409771  543705 cpu.go:275] no items to output this cycle
I0322 19:26:23.409782  543705 memory.go:184] no items to output this cycle
E0322 19:26:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:26:33.409799  543705 memory.go:184] no items to output this cycle
I0322 19:26:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 19:26:36.209671  543705 disk_info.go:125] begin check local disk info of client
I0322 19:26:36.212209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:26:36.212216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304180 0xc0003041c0]
E0322 19:26:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:26:43.409773  543705 memory.go:191] Add success.
I0322 19:26:43.409844  543705 cpu.go:282] Add success.
I0322 19:26:43.420057  543705 net.go:648] Add success.
I0322 19:26:43.420980  543705 net.go:770] primary dev: ETH0
I0322 19:26:43.420994  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:26:43.421006  543705 net.go:698] Add success.
I0322 19:26:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:26:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:26:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:26:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:26:53.409795  543705 memory.go:184] no items to output this cycle
I0322 19:26:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 19:27:03.409930  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:27:03.409979  543705 cpu.go:275] no items to output this cycle
I0322 19:27:03.410014  543705 memory.go:184] no items to output this cycle
E0322 19:27:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:27:13.409815  543705 memory.go:191] Add success.
W0322 19:27:13.412439  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:27:13.412453  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:27:13.412458  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:27:13.409787  543705 cpu.go:282] Add success.
I0322 19:27:13.420067  543705 net.go:648] Add success.
I0322 19:27:13.427823  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 19:27:13.427905  543705 net.go:770] primary dev: ETH0
I0322 19:27:13.427917  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:27:13.427928  543705 net.go:698] Add success.
I0322 19:27:13.453528  543705 event_worker.go:152] Polling the log file for events...
I0322 19:27:13.463817  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"10a56681-96ef-4320-9ef2-e98f407b1eb9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:27:13.463848  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 19:27:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:27:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0322 19:27:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:27:14.455875  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:27:14.455884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:27:14.455889  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:27:14.456624  543705 disk_worker.go:494] system disk:vda1
I0322 19:27:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:27:15.456792  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:27:15.456799  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:27:16.457927  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:27:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:27:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:27:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:27:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:27:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:27:23.409813  543705 memory.go:184] no items to output this cycle
I0322 19:27:23.409827  543705 cpu.go:275] no items to output this cycle
E0322 19:27:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:27:33.409806  543705 memory.go:184] no items to output this cycle
I0322 19:27:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 19:27:36.213687  543705 disk_info.go:125] begin check local disk info of client
I0322 19:27:36.216252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:27:36.216258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8d00 0xc0003c8d40]
I0322 19:27:39.772556  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:27:39.772561  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:27:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:27:43.410696  543705 memory.go:191] Add success.
I0322 19:27:43.409807  543705 cpu.go:282] Add success.
I0322 19:27:43.420387  543705 net.go:648] Add success.
I0322 19:27:43.423075  543705 net.go:770] primary dev: ETH0
I0322 19:27:43.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:27:43.423115  543705 net.go:698] Add success.
I0322 19:27:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:27:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:27:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:27:53.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:27:53.409849  543705 memory.go:184] no items to output this cycle
I0322 19:27:53.409927  543705 cpu.go:275] no items to output this cycle
E0322 19:28:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:28:03.409773  543705 memory.go:184] no items to output this cycle
I0322 19:28:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 19:28:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:28:13.409782  543705 memory.go:191] Add success.
I0322 19:28:13.409804  543705 cpu.go:282] Add success.
W0322 19:28:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:28:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:28:13.420096  543705 net.go:648] Add success.
I0322 19:28:13.422738  543705 net.go:770] primary dev: ETH0
I0322 19:28:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:28:13.422768  543705 net.go:698] Add success.
I0322 19:28:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:28:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:28:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 19:28:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:28:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 19:28:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:28:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:28:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:28:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:28:16.472452  543705 disk_local_worker.go:436] Get disk info: []
I0322 19:28:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 19:28:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:28:23.409813  543705 memory.go:184] no items to output this cycle
E0322 19:28:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:28:33.409783  543705 memory.go:184] no items to output this cycle
I0322 19:28:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 19:28:36.217691  543705 disk_info.go:125] begin check local disk info of client
I0322 19:28:36.220257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:28:36.220263  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498d40 0xc000498d80]
E0322 19:28:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:28:43.410682  543705 memory.go:191] Add success.
I0322 19:28:43.409797  543705 cpu.go:282] Add success.
I0322 19:28:43.420396  543705 net.go:648] Add success.
I0322 19:28:43.423251  543705 net.go:770] primary dev: ETH0
I0322 19:28:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:28:43.423277  543705 net.go:698] Add success.
I0322 19:28:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:28:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:28:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:28:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:28:53.409874  543705 memory.go:184] no items to output this cycle
I0322 19:28:53.409934  543705 cpu.go:275] no items to output this cycle
E0322 19:29:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:29:03.409774  543705 cpu.go:275] no items to output this cycle
I0322 19:29:03.409789  543705 memory.go:184] no items to output this cycle
E0322 19:29:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:29:13.409799  543705 memory.go:191] Add success.
I0322 19:29:13.409800  543705 cpu.go:282] Add success.
W0322 19:29:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:29:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:29:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:29:13.420290  543705 net.go:648] Add success.
I0322 19:29:13.423199  543705 net.go:770] primary dev: ETH0
I0322 19:29:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:29:13.423229  543705 net.go:698] Add success.
I0322 19:29:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:29:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:29:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 19:29:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:29:14.456781  543705 disk_worker.go:494] system disk:vda1
I0322 19:29:14.456810  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:29:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:29:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:29:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:29:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:29:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:29:23.409768  543705 memory.go:184] no items to output this cycle
I0322 19:29:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 19:29:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:29:33.409779  543705 memory.go:184] no items to output this cycle
I0322 19:29:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 19:29:36.221673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:29:36.224208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:29:36.224214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a77c0 0xc0002a7800]
E0322 19:29:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:29:43.410542  543705 memory.go:191] Add success.
I0322 19:29:43.409827  543705 cpu.go:282] Add success.
I0322 19:29:43.420230  543705 net.go:648] Add success.
I0322 19:29:43.423024  543705 net.go:770] primary dev: ETH0
I0322 19:29:43.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:29:43.423054  543705 net.go:698] Add success.
I0322 19:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:29:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:29:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:29:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:29:53.409776  543705 memory.go:184] no items to output this cycle
I0322 19:29:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 19:30:03.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:30:03.409866  543705 memory.go:184] no items to output this cycle
I0322 19:30:03.409930  543705 cpu.go:275] no items to output this cycle
E0322 19:30:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:30:13.409778  543705 memory.go:191] Add success.
W0322 19:30:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:30:13.409810  543705 cpu.go:282] Add success.
W0322 19:30:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:30:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:30:13.420048  543705 net.go:648] Add success.
I0322 19:30:13.422704  543705 net.go:770] primary dev: ETH0
I0322 19:30:13.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:30:13.422729  543705 net.go:698] Add success.
I0322 19:30:13.463963  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7e255e52-8ed6-42cc-829a-c032f16c7231","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:30:13.463997  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:30:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:30:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:30:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 19:30:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:30:14.456611  543705 disk_worker.go:494] system disk:vda1
I0322 19:30:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:30:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:30:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:30:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:30:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:30:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:30:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:30:23.409804  543705 memory.go:184] no items to output this cycle
I0322 19:30:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 19:30:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:30:33.409788  543705 memory.go:184] no items to output this cycle
I0322 19:30:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 19:30:36.225672  543705 disk_info.go:125] begin check local disk info of client
I0322 19:30:36.228203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:30:36.228209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f040 0xc00039f080]
I0322 19:30:39.773559  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:30:39.773566  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:30:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:30:43.410699  543705 memory.go:191] Add success.
I0322 19:30:43.409801  543705 cpu.go:282] Add success.
I0322 19:30:43.420375  543705 net.go:648] Add success.
I0322 19:30:43.423173  543705 net.go:770] primary dev: ETH0
I0322 19:30:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:30:43.423197  543705 net.go:698] Add success.
I0322 19:30:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:30:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:30:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:30:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:30:53.409774  543705 memory.go:184] no items to output this cycle
I0322 19:30:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 19:31:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:31:03.409779  543705 memory.go:184] no items to output this cycle
I0322 19:31:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 19:31:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:31:13.409813  543705 memory.go:191] Add success.
I0322 19:31:13.409814  543705 cpu.go:282] Add success.
W0322 19:31:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:31:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:31:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:31:13.419978  543705 net.go:770] primary dev: ETH0
I0322 19:31:13.419991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:31:13.420003  543705 net.go:698] Add success.
I0322 19:31:13.420348  543705 net.go:648] Add success.
I0322 19:31:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:31:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:31:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 19:31:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:31:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 19:31:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:31:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:31:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:31:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:31:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:31:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:31:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:31:23.409785  543705 memory.go:184] no items to output this cycle
I0322 19:31:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 19:31:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:31:33.409805  543705 memory.go:184] no items to output this cycle
I0322 19:31:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 19:31:36.229669  543705 disk_info.go:125] begin check local disk info of client
I0322 19:31:36.232206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:31:36.232212  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342dc0 0xc000342e00]
E0322 19:31:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:31:43.410691  543705 memory.go:191] Add success.
I0322 19:31:43.409816  543705 cpu.go:282] Add success.
I0322 19:31:43.420374  543705 net.go:648] Add success.
I0322 19:31:43.423335  543705 net.go:770] primary dev: ETH0
I0322 19:31:43.423350  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:31:43.423364  543705 net.go:698] Add success.
I0322 19:31:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:31:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:31:53.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:31:53.410268  543705 memory.go:184] no items to output this cycle
I0322 19:31:53.410298  543705 cpu.go:275] no items to output this cycle
E0322 19:32:03.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:32:03.409911  543705 memory.go:184] no items to output this cycle
I0322 19:32:03.409970  543705 cpu.go:275] no items to output this cycle
W0322 19:32:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:32:13.409731  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:32:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:32:13.409825  543705 cpu.go:282] Add success.
E0322 19:32:13.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:32:13.409853  543705 memory.go:191] Add success.
I0322 19:32:13.420069  543705 net.go:648] Add success.
I0322 19:32:13.422904  543705 net.go:770] primary dev: ETH0
I0322 19:32:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:32:13.422929  543705 net.go:698] Add success.
W0322 19:32:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:32:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 19:32:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:32:14.456790  543705 disk_worker.go:494] system disk:vda1
I0322 19:32:14.456828  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:32:14.457140  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:32:14.457148  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:32:14.457153  543705 custom_config.go:64] query custom config with name: gpu
E0322 19:32:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:32:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:32:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:32:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:32:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:32:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:32:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:32:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:32:23.409786  543705 memory.go:184] no items to output this cycle
I0322 19:32:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 19:32:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:32:33.409793  543705 memory.go:184] no items to output this cycle
I0322 19:32:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 19:32:36.233681  543705 disk_info.go:125] begin check local disk info of client
I0322 19:32:36.236211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:32:36.236217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278440 0xc000278480]
E0322 19:32:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:32:43.410721  543705 memory.go:191] Add success.
I0322 19:32:43.409826  543705 cpu.go:282] Add success.
I0322 19:32:43.420398  543705 net.go:648] Add success.
I0322 19:32:43.423317  543705 net.go:770] primary dev: ETH0
I0322 19:32:43.423330  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:32:43.423343  543705 net.go:698] Add success.
I0322 19:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:32:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:32:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:32:53.409786  543705 memory.go:184] no items to output this cycle
I0322 19:32:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 19:33:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:33:03.409872  543705 memory.go:184] no items to output this cycle
I0322 19:33:03.409919  543705 cpu.go:275] no items to output this cycle
E0322 19:33:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:33:13.409803  543705 memory.go:191] Add success.
I0322 19:33:13.409805  543705 cpu.go:282] Add success.
W0322 19:33:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:33:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:33:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:33:13.420144  543705 net.go:648] Add success.
I0322 19:33:13.423361  543705 net.go:770] primary dev: ETH0
I0322 19:33:13.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:33:13.423386  543705 net.go:698] Add success.
I0322 19:33:13.467286  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bbd2f0fe-8647-4f7b-b75a-a678e2c528e8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:33:13.467317  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:33:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:33:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:33:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 19:33:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:33:14.456613  543705 disk_worker.go:494] system disk:vda1
I0322 19:33:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:33:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:33:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:33:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:33:23.409791  543705 cpu.go:275] no items to output this cycle
I0322 19:33:23.409791  543705 memory.go:184] no items to output this cycle
E0322 19:33:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:33:33.409783  543705 memory.go:184] no items to output this cycle
I0322 19:33:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 19:33:36.237675  543705 disk_info.go:125] begin check local disk info of client
I0322 19:33:36.240203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:33:36.240209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046ecc0 0xc00046ed00]
I0322 19:33:39.773724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:33:39.773730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:33:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:33:43.410812  543705 memory.go:191] Add success.
I0322 19:33:43.409803  543705 cpu.go:282] Add success.
I0322 19:33:43.420574  543705 net.go:648] Add success.
I0322 19:33:43.423550  543705 net.go:770] primary dev: ETH0
I0322 19:33:43.423564  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:33:43.423579  543705 net.go:698] Add success.
I0322 19:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:33:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:33:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:33:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:33:53.409791  543705 memory.go:184] no items to output this cycle
I0322 19:33:53.409793  543705 cpu.go:275] no items to output this cycle
I0322 19:34:03.409877  543705 cpu.go:275] no items to output this cycle
E0322 19:34:03.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:34:03.409893  543705 memory.go:184] no items to output this cycle
E0322 19:34:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:34:13.409773  543705 memory.go:191] Add success.
W0322 19:34:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:34:13.409809  543705 cpu.go:282] Add success.
W0322 19:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:34:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:34:13.420130  543705 net.go:648] Add success.
I0322 19:34:13.422811  543705 net.go:770] primary dev: ETH0
I0322 19:34:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:34:13.422840  543705 net.go:698] Add success.
I0322 19:34:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:34:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:34:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 19:34:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:34:14.456560  543705 disk_worker.go:494] system disk:vda1
I0322 19:34:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:34:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:34:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:34:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:34:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:34:23.409777  543705 memory.go:184] no items to output this cycle
I0322 19:34:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 19:34:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:34:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 19:34:33.409792  543705 memory.go:184] no items to output this cycle
I0322 19:34:36.241676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:34:36.244197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:34:36.244203  543705 disk_info.go:196] parse disk info done, disk is : [0xc000290980 0xc0002909c0]
E0322 19:34:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:34:43.410672  543705 memory.go:191] Add success.
I0322 19:34:43.409802  543705 cpu.go:282] Add success.
I0322 19:34:43.420381  543705 net.go:648] Add success.
I0322 19:34:43.422962  543705 net.go:770] primary dev: ETH0
I0322 19:34:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:34:43.422990  543705 net.go:698] Add success.
I0322 19:34:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:34:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:34:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:34:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:34:53.409774  543705 memory.go:184] no items to output this cycle
I0322 19:34:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 19:35:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:35:03.409764  543705 memory.go:184] no items to output this cycle
I0322 19:35:03.409897  543705 cpu.go:275] no items to output this cycle
E0322 19:35:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:35:13.409797  543705 memory.go:191] Add success.
I0322 19:35:13.409797  543705 cpu.go:282] Add success.
W0322 19:35:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:35:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:35:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:35:13.420141  543705 net.go:648] Add success.
I0322 19:35:13.422943  543705 net.go:770] primary dev: ETH0
I0322 19:35:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:35:13.422967  543705 net.go:698] Add success.
I0322 19:35:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:35:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:35:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 19:35:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:35:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 19:35:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:35:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:35:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:35:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:35:23.409765  543705 memory.go:184] no items to output this cycle
I0322 19:35:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 19:35:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:35:33.409800  543705 memory.go:184] no items to output this cycle
I0322 19:35:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 19:35:36.245673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:35:36.248255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:35:36.248260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1800 0xc0003f1840]
E0322 19:35:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:35:43.410681  543705 memory.go:191] Add success.
I0322 19:35:43.409786  543705 cpu.go:282] Add success.
I0322 19:35:43.420404  543705 net.go:648] Add success.
I0322 19:35:43.422997  543705 net.go:770] primary dev: ETH0
I0322 19:35:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:35:43.423025  543705 net.go:698] Add success.
I0322 19:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:35:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:35:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:35:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:35:53.409779  543705 memory.go:184] no items to output this cycle
I0322 19:35:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 19:36:03.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:36:03.409928  543705 memory.go:184] no items to output this cycle
I0322 19:36:03.410102  543705 cpu.go:275] no items to output this cycle
E0322 19:36:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:36:13.409790  543705 memory.go:191] Add success.
I0322 19:36:13.409791  543705 cpu.go:282] Add success.
W0322 19:36:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:36:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:36:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:36:13.420165  543705 net.go:648] Add success.
I0322 19:36:13.423133  543705 net.go:770] primary dev: ETH0
I0322 19:36:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:36:13.423166  543705 net.go:698] Add success.
I0322 19:36:13.468991  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6dd1b00b-084f-4647-a2e9-b5e9eb850090","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:36:13.469024  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:36:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:36:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:36:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 19:36:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:36:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 19:36:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:36:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:36:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:36:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:36:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:36:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:36:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:36:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 19:36:23.409789  543705 memory.go:184] no items to output this cycle
E0322 19:36:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:36:33.409809  543705 memory.go:184] no items to output this cycle
I0322 19:36:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 19:36:36.249688  543705 disk_info.go:125] begin check local disk info of client
I0322 19:36:36.252275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:36:36.252281  543705 disk_info.go:196] parse disk info done, disk is : [0xc000380080 0xc0003800c0]
I0322 19:36:39.775565  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:36:39.775571  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:36:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:36:43.410669  543705 memory.go:191] Add success.
I0322 19:36:43.409789  543705 cpu.go:282] Add success.
I0322 19:36:43.420349  543705 net.go:648] Add success.
I0322 19:36:43.423027  543705 net.go:770] primary dev: ETH0
I0322 19:36:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:36:43.423056  543705 net.go:698] Add success.
I0322 19:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:36:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:36:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:36:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:36:53.409781  543705 memory.go:184] no items to output this cycle
I0322 19:36:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 19:37:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:37:03.409775  543705 memory.go:184] no items to output this cycle
I0322 19:37:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 19:37:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:37:13.409788  543705 memory.go:191] Add success.
I0322 19:37:13.409808  543705 cpu.go:282] Add success.
W0322 19:37:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:37:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:37:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:37:13.420118  543705 net.go:648] Add success.
I0322 19:37:13.422763  543705 net.go:770] primary dev: ETH0
I0322 19:37:13.422776  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:37:13.422787  543705 net.go:698] Add success.
I0322 19:37:13.453536  543705 event_worker.go:152] Polling the log file for events...
W0322 19:37:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:37:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 19:37:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:37:14.455923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:37:14.455932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:37:14.455938  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:37:14.456560  543705 disk_worker.go:494] system disk:vda1
I0322 19:37:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:37:15.456928  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:37:15.456940  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:37:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:37:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:37:16.458018  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:37:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:37:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:37:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:37:23.409795  543705 memory.go:184] no items to output this cycle
I0322 19:37:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 19:37:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:37:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 19:37:33.409789  543705 memory.go:184] no items to output this cycle
I0322 19:37:36.253673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:37:36.256207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:37:36.256213  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039fec0 0xc00039ff00]
E0322 19:37:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:37:43.410652  543705 memory.go:191] Add success.
I0322 19:37:43.409807  543705 cpu.go:282] Add success.
I0322 19:37:43.420358  543705 net.go:648] Add success.
I0322 19:37:43.422820  543705 net.go:770] primary dev: ETH0
I0322 19:37:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:37:43.422849  543705 net.go:698] Add success.
I0322 19:37:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:37:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:37:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:37:53.409786  543705 memory.go:184] no items to output this cycle
I0322 19:37:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 19:38:03.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:38:03.409897  543705 memory.go:184] no items to output this cycle
I0322 19:38:03.409955  543705 cpu.go:275] no items to output this cycle
E0322 19:38:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:38:13.409817  543705 memory.go:191] Add success.
I0322 19:38:13.409838  543705 cpu.go:282] Add success.
W0322 19:38:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:38:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:38:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:38:13.420171  543705 net.go:648] Add success.
I0322 19:38:13.422891  543705 net.go:770] primary dev: ETH0
I0322 19:38:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:38:13.422917  543705 net.go:698] Add success.
I0322 19:38:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:38:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:38:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 19:38:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:38:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 19:38:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:38:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:38:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:38:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:38:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:38:23.409788  543705 memory.go:184] no items to output this cycle
I0322 19:38:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 19:38:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:38:33.409809  543705 memory.go:184] no items to output this cycle
I0322 19:38:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 19:38:36.257673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:38:36.260262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:38:36.260268  543705 disk_info.go:196] parse disk info done, disk is : [0xc000321cc0 0xc000321d00]
E0322 19:38:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:38:43.410674  543705 memory.go:191] Add success.
I0322 19:38:43.409804  543705 cpu.go:282] Add success.
I0322 19:38:43.420376  543705 net.go:648] Add success.
I0322 19:38:43.423231  543705 net.go:770] primary dev: ETH0
I0322 19:38:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:38:43.423255  543705 net.go:698] Add success.
I0322 19:38:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:38:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:38:53.409800  543705 memory.go:184] no items to output this cycle
I0322 19:38:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 19:39:03.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:39:03.409880  543705 memory.go:184] no items to output this cycle
I0322 19:39:03.409881  543705 cpu.go:275] no items to output this cycle
E0322 19:39:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:39:13.409808  543705 memory.go:191] Add success.
I0322 19:39:13.409808  543705 cpu.go:282] Add success.
W0322 19:39:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:39:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:39:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:39:13.420550  543705 net.go:648] Add success.
I0322 19:39:13.423356  543705 net.go:770] primary dev: ETH0
I0322 19:39:13.423369  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:39:13.423382  543705 net.go:698] Add success.
I0322 19:39:13.525388  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"198a371c-e798-45ae-aae7-748ef632c478","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:39:13.525421  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:39:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:39:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:39:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 19:39:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:39:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 19:39:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:39:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:39:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:39:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:39:23.410273  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:39:23.410291  543705 memory.go:184] no items to output this cycle
I0322 19:39:23.410292  543705 cpu.go:275] no items to output this cycle
E0322 19:39:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:39:33.409802  543705 memory.go:184] no items to output this cycle
I0322 19:39:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 19:39:36.261672  543705 disk_info.go:125] begin check local disk info of client
I0322 19:39:36.264226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:39:36.264232  543705 disk_info.go:196] parse disk info done, disk is : [0xc000347e00 0xc000347e40]
I0322 19:39:39.776579  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:39:39.776585  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:39:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:39:43.410721  543705 memory.go:191] Add success.
I0322 19:39:43.409804  543705 cpu.go:282] Add success.
I0322 19:39:43.420423  543705 net.go:648] Add success.
I0322 19:39:43.423247  543705 net.go:770] primary dev: ETH0
I0322 19:39:43.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:39:43.423291  543705 net.go:698] Add success.
I0322 19:39:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:39:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:39:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:39:53.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:39:53.409899  543705 cpu.go:275] no items to output this cycle
I0322 19:39:53.409913  543705 memory.go:184] no items to output this cycle
E0322 19:40:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:40:03.409778  543705 memory.go:184] no items to output this cycle
I0322 19:40:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 19:40:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:40:13.409799  543705 cpu.go:282] Add success.
I0322 19:40:13.409809  543705 memory.go:191] Add success.
W0322 19:40:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:40:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:40:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:40:13.420173  543705 net.go:648] Add success.
I0322 19:40:13.422918  543705 net.go:770] primary dev: ETH0
I0322 19:40:13.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:40:13.422948  543705 net.go:698] Add success.
I0322 19:40:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:40:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:40:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 19:40:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:40:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 19:40:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:40:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:40:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:40:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:40:23.409768  543705 memory.go:184] no items to output this cycle
I0322 19:40:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 19:40:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:40:33.409782  543705 memory.go:184] no items to output this cycle
I0322 19:40:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 19:40:36.265677  543705 disk_info.go:125] begin check local disk info of client
I0322 19:40:36.268213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:40:36.268219  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382640 0xc000382680]
E0322 19:40:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:40:43.410618  543705 memory.go:191] Add success.
I0322 19:40:43.409797  543705 cpu.go:282] Add success.
I0322 19:40:43.420529  543705 net.go:648] Add success.
I0322 19:40:43.423339  543705 net.go:770] primary dev: ETH0
I0322 19:40:43.423364  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:40:43.423377  543705 net.go:698] Add success.
I0322 19:40:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:40:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:40:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:40:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:40:53.409773  543705 memory.go:184] no items to output this cycle
I0322 19:40:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 19:41:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:41:03.409773  543705 memory.go:184] no items to output this cycle
I0322 19:41:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 19:41:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:41:13.409811  543705 memory.go:191] Add success.
I0322 19:41:13.409821  543705 cpu.go:282] Add success.
W0322 19:41:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:41:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:41:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:41:13.420102  543705 net.go:648] Add success.
I0322 19:41:13.423079  543705 net.go:770] primary dev: ETH0
I0322 19:41:13.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:41:13.423105  543705 net.go:698] Add success.
I0322 19:41:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:41:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:41:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 19:41:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:41:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 19:41:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:41:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:41:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:41:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:41:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:41:23.409767  543705 memory.go:184] no items to output this cycle
I0322 19:41:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:41:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:41:33.409780  543705 memory.go:184] no items to output this cycle
I0322 19:41:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 19:41:36.269675  543705 disk_info.go:125] begin check local disk info of client
I0322 19:41:36.272180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:41:36.272187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fe80 0xc00037fec0]
E0322 19:41:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:41:43.410641  543705 memory.go:191] Add success.
I0322 19:41:43.409797  543705 cpu.go:282] Add success.
I0322 19:41:43.419714  543705 net.go:648] Add success.
I0322 19:41:43.422522  543705 net.go:770] primary dev: ETH0
I0322 19:41:43.422537  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:41:43.422550  543705 net.go:698] Add success.
I0322 19:41:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:41:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:41:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:41:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:41:53.409771  543705 memory.go:184] no items to output this cycle
I0322 19:41:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 19:42:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:42:03.409802  543705 memory.go:184] no items to output this cycle
I0322 19:42:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 19:42:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:42:13.409781  543705 memory.go:191] Add success.
W0322 19:42:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:42:13.409815  543705 cpu.go:282] Add success.
W0322 19:42:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:42:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:42:13.420234  543705 net.go:648] Add success.
I0322 19:42:13.423159  543705 net.go:770] primary dev: ETH0
I0322 19:42:13.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:42:13.423182  543705 net.go:698] Add success.
I0322 19:42:13.469045  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5526427b-0652-4f06-a184-94784afc6a27","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:42:13.469079  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 19:42:14.455239  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:42:14.455253  543705 disk_worker.go:708] disk space is not compliant
W0322 19:42:14.455257  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:42:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:42:14.455900  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:42:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:42:14.456830  543705 disk_worker.go:494] system disk:vda1
I0322 19:42:14.456874  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:42:15.456878  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:42:15.456888  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:42:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:42:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:42:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:42:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:42:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:42:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:42:23.409768  543705 memory.go:184] no items to output this cycle
I0322 19:42:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 19:42:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:42:33.409886  543705 cpu.go:275] no items to output this cycle
I0322 19:42:33.409898  543705 memory.go:184] no items to output this cycle
I0322 19:42:36.273671  543705 disk_info.go:125] begin check local disk info of client
I0322 19:42:36.276276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:42:36.276283  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034aa80 0xc00034aac0]
I0322 19:42:39.777576  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:42:39.777582  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:42:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:42:43.410834  543705 memory.go:191] Add success.
I0322 19:42:43.409810  543705 cpu.go:282] Add success.
I0322 19:42:43.420540  543705 net.go:648] Add success.
I0322 19:42:43.423374  543705 net.go:770] primary dev: ETH0
I0322 19:42:43.423389  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:42:43.423403  543705 net.go:698] Add success.
I0322 19:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:42:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:42:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:42:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:42:53.409768  543705 memory.go:184] no items to output this cycle
I0322 19:42:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:43:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:43:03.409768  543705 memory.go:184] no items to output this cycle
I0322 19:43:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 19:43:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:43:13.409783  543705 memory.go:191] Add success.
I0322 19:43:13.409803  543705 cpu.go:282] Add success.
W0322 19:43:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:43:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:43:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:43:13.420066  543705 net.go:648] Add success.
I0322 19:43:13.422717  543705 net.go:770] primary dev: ETH0
I0322 19:43:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:43:13.422742  543705 net.go:698] Add success.
I0322 19:43:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:43:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:43:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 19:43:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:43:14.456499  543705 disk_worker.go:494] system disk:vda1
I0322 19:43:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:43:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:43:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:43:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:43:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:43:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:43:23.409761  543705 memory.go:184] no items to output this cycle
I0322 19:43:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 19:43:33.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:43:33.409920  543705 memory.go:184] no items to output this cycle
I0322 19:43:33.409974  543705 cpu.go:275] no items to output this cycle
I0322 19:43:36.277680  543705 disk_info.go:125] begin check local disk info of client
I0322 19:43:36.280239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:43:36.280245  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ee40 0xc00037ee80]
E0322 19:43:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:43:43.410859  543705 memory.go:191] Add success.
I0322 19:43:43.409817  543705 cpu.go:282] Add success.
I0322 19:43:43.420550  543705 net.go:648] Add success.
I0322 19:43:43.423298  543705 net.go:770] primary dev: ETH0
I0322 19:43:43.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:43:43.423322  543705 net.go:698] Add success.
I0322 19:43:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:43:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:43:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:43:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:43:53.409783  543705 memory.go:184] no items to output this cycle
I0322 19:43:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 19:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:44:03.409776  543705 memory.go:184] no items to output this cycle
I0322 19:44:03.409778  543705 cpu.go:275] no items to output this cycle
E0322 19:44:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:44:13.409813  543705 memory.go:191] Add success.
I0322 19:44:13.409824  543705 cpu.go:282] Add success.
W0322 19:44:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:44:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:44:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:44:13.420125  543705 net.go:648] Add success.
I0322 19:44:13.423468  543705 net.go:770] primary dev: ETH0
I0322 19:44:13.423484  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:44:13.423499  543705 net.go:698] Add success.
I0322 19:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:44:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:44:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 19:44:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:44:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 19:44:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:44:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:44:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:44:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:44:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:44:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:44:23.409782  543705 cpu.go:275] no items to output this cycle
I0322 19:44:23.409784  543705 memory.go:184] no items to output this cycle
E0322 19:44:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:44:33.409806  543705 memory.go:184] no items to output this cycle
I0322 19:44:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 19:44:36.281676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:44:36.284276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:44:36.284283  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004bc8c0 0xc0004bc900]
E0322 19:44:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:44:43.410767  543705 memory.go:191] Add success.
I0322 19:44:43.409806  543705 cpu.go:282] Add success.
I0322 19:44:43.420456  543705 net.go:648] Add success.
I0322 19:44:43.423399  543705 net.go:770] primary dev: ETH0
I0322 19:44:43.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:44:43.423424  543705 net.go:698] Add success.
I0322 19:44:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:44:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:44:53.410407  543705 memory.go:184] no items to output this cycle
I0322 19:44:53.410437  543705 cpu.go:275] no items to output this cycle
E0322 19:45:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:45:03.409780  543705 memory.go:184] no items to output this cycle
I0322 19:45:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 19:45:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:45:13.409824  543705 memory.go:191] Add success.
I0322 19:45:13.409825  543705 cpu.go:282] Add success.
W0322 19:45:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:45:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:45:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:45:13.420231  543705 net.go:648] Add success.
I0322 19:45:13.423166  543705 net.go:770] primary dev: ETH0
I0322 19:45:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:45:13.423195  543705 net.go:698] Add success.
I0322 19:45:13.469006  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"83e37b54-1292-43e2-a706-653de33c2412","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:45:13.469038  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:45:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:45:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:45:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 19:45:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:45:14.456531  543705 disk_worker.go:494] system disk:vda1
I0322 19:45:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:45:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:45:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:45:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:45:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:45:23.409796  543705 memory.go:184] no items to output this cycle
I0322 19:45:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 19:45:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:45:33.409770  543705 memory.go:184] no items to output this cycle
I0322 19:45:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 19:45:36.285700  543705 disk_info.go:125] begin check local disk info of client
I0322 19:45:36.288230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:45:36.288237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004da000 0xc0004da040]
I0322 19:45:39.777731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:45:39.777737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:45:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:45:43.410752  543705 memory.go:191] Add success.
I0322 19:45:43.409805  543705 cpu.go:282] Add success.
I0322 19:45:43.420448  543705 net.go:648] Add success.
I0322 19:45:43.423074  543705 net.go:770] primary dev: ETH0
I0322 19:45:43.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:45:43.423099  543705 net.go:698] Add success.
I0322 19:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:45:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:45:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:45:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:45:53.409770  543705 memory.go:184] no items to output this cycle
I0322 19:45:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 19:46:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:46:03.409797  543705 memory.go:184] no items to output this cycle
I0322 19:46:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 19:46:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:46:13.409787  543705 memory.go:191] Add success.
I0322 19:46:13.409812  543705 cpu.go:282] Add success.
W0322 19:46:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:46:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:46:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:46:13.420156  543705 net.go:648] Add success.
I0322 19:46:13.423125  543705 net.go:770] primary dev: ETH0
I0322 19:46:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:46:13.423153  543705 net.go:698] Add success.
I0322 19:46:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:46:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:46:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 19:46:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:46:14.456559  543705 disk_worker.go:494] system disk:vda1
I0322 19:46:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:46:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:46:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:46:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:46:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:46:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:46:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:46:23.409781  543705 memory.go:184] no items to output this cycle
I0322 19:46:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 19:46:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:46:33.409815  543705 memory.go:184] no items to output this cycle
I0322 19:46:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 19:46:36.289665  543705 disk_info.go:125] begin check local disk info of client
I0322 19:46:36.292250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:46:36.292257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff400 0xc0003ff440]
E0322 19:46:43.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:46:43.410818  543705 memory.go:191] Add success.
I0322 19:46:43.409901  543705 cpu.go:282] Add success.
I0322 19:46:43.419732  543705 net.go:648] Add success.
I0322 19:46:43.422415  543705 net.go:770] primary dev: ETH0
I0322 19:46:43.422430  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:46:43.422443  543705 net.go:698] Add success.
I0322 19:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:46:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:46:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:46:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:46:53.409784  543705 memory.go:184] no items to output this cycle
I0322 19:46:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 19:47:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:47:03.409777  543705 cpu.go:275] no items to output this cycle
I0322 19:47:03.409789  543705 memory.go:184] no items to output this cycle
E0322 19:47:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:47:13.409785  543705 memory.go:191] Add success.
W0322 19:47:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:47:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:47:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:47:13.409825  543705 cpu.go:282] Add success.
I0322 19:47:13.420057  543705 net.go:648] Add success.
I0322 19:47:13.422974  543705 net.go:770] primary dev: ETH0
I0322 19:47:13.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:47:13.423005  543705 net.go:698] Add success.
I0322 19:47:13.453602  543705 event_worker.go:152] Polling the log file for events...
W0322 19:47:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:47:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 19:47:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:47:14.456936  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:47:14.456945  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:47:14.456952  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:47:14.456991  543705 disk_worker.go:494] system disk:vda1
I0322 19:47:14.457021  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:47:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:47:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:47:16.457905  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:47:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:47:16.457959  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:47:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:47:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:47:23.410694  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:47:23.410706  543705 memory.go:184] no items to output this cycle
I0322 19:47:23.410709  543705 cpu.go:275] no items to output this cycle
E0322 19:47:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:47:33.409772  543705 memory.go:184] no items to output this cycle
I0322 19:47:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 19:47:36.293676  543705 disk_info.go:125] begin check local disk info of client
I0322 19:47:36.296192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:47:36.296200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dbd00 0xc0004dbd40]
E0322 19:47:43.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:47:43.410850  543705 memory.go:191] Add success.
I0322 19:47:43.409958  543705 cpu.go:282] Add success.
I0322 19:47:43.419713  543705 net.go:648] Add success.
I0322 19:47:43.422431  543705 net.go:770] primary dev: ETH0
I0322 19:47:43.422443  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:47:43.422454  543705 net.go:698] Add success.
I0322 19:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:47:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:47:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:47:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:47:53.409798  543705 memory.go:184] no items to output this cycle
I0322 19:47:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 19:48:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:48:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 19:48:03.409790  543705 memory.go:184] no items to output this cycle
E0322 19:48:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:48:13.409818  543705 memory.go:191] Add success.
I0322 19:48:13.409831  543705 cpu.go:282] Add success.
W0322 19:48:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:48:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:48:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:48:13.420162  543705 net.go:648] Add success.
I0322 19:48:13.423073  543705 net.go:770] primary dev: ETH0
I0322 19:48:13.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:48:13.423103  543705 net.go:698] Add success.
I0322 19:48:13.464004  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"94b0fe2d-9636-46e0-b46a-487299d25d21","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:48:13.464037  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:48:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:48:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:48:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 19:48:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:48:14.456616  543705 disk_worker.go:494] system disk:vda1
I0322 19:48:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:48:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:48:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:48:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:48:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:48:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:48:23.409801  543705 memory.go:184] no items to output this cycle
I0322 19:48:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 19:48:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:48:33.409812  543705 memory.go:184] no items to output this cycle
I0322 19:48:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 19:48:36.297675  543705 disk_info.go:125] begin check local disk info of client
I0322 19:48:36.300290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:48:36.300296  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396700 0xc000396740]
I0322 19:48:39.779595  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:48:39.779601  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:48:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:48:43.410778  543705 memory.go:191] Add success.
I0322 19:48:43.409816  543705 cpu.go:282] Add success.
I0322 19:48:43.420768  543705 net.go:648] Add success.
I0322 19:48:43.423454  543705 net.go:770] primary dev: ETH0
I0322 19:48:43.423470  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:48:43.423483  543705 net.go:698] Add success.
I0322 19:48:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:48:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:48:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:48:53.410412  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:48:53.410431  543705 memory.go:184] no items to output this cycle
I0322 19:48:53.410442  543705 cpu.go:275] no items to output this cycle
E0322 19:49:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:49:03.409777  543705 cpu.go:275] no items to output this cycle
I0322 19:49:03.409785  543705 memory.go:184] no items to output this cycle
E0322 19:49:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:49:13.409815  543705 memory.go:191] Add success.
I0322 19:49:13.409819  543705 cpu.go:282] Add success.
W0322 19:49:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:49:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:49:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:49:13.420096  543705 net.go:648] Add success.
I0322 19:49:13.422958  543705 net.go:770] primary dev: ETH0
I0322 19:49:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:49:13.422988  543705 net.go:698] Add success.
I0322 19:49:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:49:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:49:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 19:49:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:49:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 19:49:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:49:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:49:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:49:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:49:16.472432  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:49:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:49:23.409773  543705 memory.go:184] no items to output this cycle
I0322 19:49:23.409776  543705 cpu.go:275] no items to output this cycle
E0322 19:49:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:49:33.409777  543705 memory.go:184] no items to output this cycle
I0322 19:49:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 19:49:36.301673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:49:36.304180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:49:36.304186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c4080 0xc0004c40c0]
E0322 19:49:43.409959  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:49:43.410835  543705 memory.go:191] Add success.
I0322 19:49:43.410131  543705 cpu.go:282] Add success.
I0322 19:49:43.419735  543705 net.go:648] Add success.
I0322 19:49:43.422477  543705 net.go:770] primary dev: ETH0
I0322 19:49:43.422490  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:49:43.422502  543705 net.go:698] Add success.
I0322 19:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:49:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:49:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:49:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:49:53.409785  543705 memory.go:184] no items to output this cycle
I0322 19:49:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 19:50:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:50:03.409807  543705 memory.go:184] no items to output this cycle
I0322 19:50:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 19:50:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:50:13.409777  543705 memory.go:191] Add success.
W0322 19:50:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 19:50:13.409807  543705 cpu.go:282] Add success.
W0322 19:50:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:50:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:50:13.419990  543705 net.go:770] primary dev: ETH0
I0322 19:50:13.420003  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:50:13.420015  543705 net.go:698] Add success.
I0322 19:50:13.420249  543705 net.go:648] Add success.
I0322 19:50:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:50:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:50:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 19:50:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:50:14.456520  543705 disk_worker.go:494] system disk:vda1
I0322 19:50:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:50:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:50:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:50:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:50:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:50:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:50:23.409794  543705 memory.go:184] no items to output this cycle
I0322 19:50:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 19:50:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:50:33.409787  543705 memory.go:184] no items to output this cycle
I0322 19:50:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 19:50:36.305674  543705 disk_info.go:125] begin check local disk info of client
I0322 19:50:36.308190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:50:36.308196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6000 0xc0002a6040]
E0322 19:50:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:50:43.410703  543705 memory.go:191] Add success.
I0322 19:50:43.409823  543705 cpu.go:282] Add success.
I0322 19:50:43.420407  543705 net.go:648] Add success.
I0322 19:50:43.423222  543705 net.go:770] primary dev: ETH0
I0322 19:50:43.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:50:43.423252  543705 net.go:698] Add success.
I0322 19:50:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:50:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:50:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:50:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:50:53.409807  543705 memory.go:184] no items to output this cycle
I0322 19:50:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 19:51:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:51:03.409791  543705 memory.go:184] no items to output this cycle
I0322 19:51:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 19:51:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:51:13.409782  543705 memory.go:191] Add success.
I0322 19:51:13.409802  543705 cpu.go:282] Add success.
W0322 19:51:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:51:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:51:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:51:13.420037  543705 net.go:648] Add success.
I0322 19:51:13.422670  543705 net.go:770] primary dev: ETH0
I0322 19:51:13.422684  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:51:13.422696  543705 net.go:698] Add success.
I0322 19:51:13.471772  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3f0dfdad-fd68-46b1-a040-b9d7cefd8995","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:51:13.471805  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:51:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:51:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:51:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 19:51:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:51:14.456693  543705 disk_worker.go:494] system disk:vda1
I0322 19:51:14.456727  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:51:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:51:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:51:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:51:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:51:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:51:23.409775  543705 memory.go:184] no items to output this cycle
I0322 19:51:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 19:51:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:51:33.409778  543705 memory.go:184] no items to output this cycle
I0322 19:51:33.409890  543705 cpu.go:275] no items to output this cycle
I0322 19:51:36.309687  543705 disk_info.go:125] begin check local disk info of client
I0322 19:51:36.312212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:51:36.312218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475500 0xc000475540]
I0322 19:51:39.780586  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:51:39.780593  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:51:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:51:43.410800  543705 memory.go:191] Add success.
I0322 19:51:43.409816  543705 cpu.go:282] Add success.
I0322 19:51:43.420544  543705 net.go:648] Add success.
I0322 19:51:43.423370  543705 net.go:770] primary dev: ETH0
I0322 19:51:43.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:51:43.423398  543705 net.go:698] Add success.
I0322 19:51:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:51:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:51:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:51:53.410394  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:51:53.410409  543705 memory.go:184] no items to output this cycle
I0322 19:51:53.410426  543705 cpu.go:275] no items to output this cycle
E0322 19:52:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:52:03.409806  543705 memory.go:184] no items to output this cycle
I0322 19:52:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 19:52:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:52:13.409786  543705 memory.go:191] Add success.
I0322 19:52:13.409789  543705 cpu.go:282] Add success.
W0322 19:52:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:52:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:52:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:52:13.420095  543705 net.go:648] Add success.
I0322 19:52:13.422639  543705 net.go:770] primary dev: ETH0
I0322 19:52:13.422652  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:52:13.422664  543705 net.go:698] Add success.
W0322 19:52:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:52:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 19:52:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:52:14.456907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:52:14.456916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:52:14.456922  543705 custom_config.go:64] query custom config with name: gpu
I0322 19:52:14.457000  543705 disk_worker.go:494] system disk:vda1
I0322 19:52:14.457043  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:52:15.456871  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:52:15.456880  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:52:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:52:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:52:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:52:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:52:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:52:23.410484  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:52:23.410502  543705 memory.go:184] no items to output this cycle
I0322 19:52:23.410528  543705 cpu.go:275] no items to output this cycle
E0322 19:52:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:52:33.409790  543705 memory.go:184] no items to output this cycle
I0322 19:52:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 19:52:36.313673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:52:36.316179  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:52:36.316185  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c55c0 0xc0000c5600]
E0322 19:52:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:52:43.410692  543705 memory.go:191] Add success.
I0322 19:52:43.409808  543705 cpu.go:282] Add success.
I0322 19:52:43.420403  543705 net.go:648] Add success.
I0322 19:52:43.423016  543705 net.go:770] primary dev: ETH0
I0322 19:52:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:52:43.423041  543705 net.go:698] Add success.
I0322 19:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:52:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:52:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:52:53.409804  543705 memory.go:184] no items to output this cycle
I0322 19:52:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 19:53:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:53:03.409782  543705 memory.go:184] no items to output this cycle
I0322 19:53:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 19:53:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:53:13.409816  543705 memory.go:191] Add success.
I0322 19:53:13.409820  543705 cpu.go:282] Add success.
W0322 19:53:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:53:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:53:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:53:13.420055  543705 net.go:648] Add success.
I0322 19:53:13.422838  543705 net.go:770] primary dev: ETH0
I0322 19:53:13.422851  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:53:13.422862  543705 net.go:698] Add success.
I0322 19:53:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:53:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:53:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 19:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:53:14.456606  543705 disk_worker.go:494] system disk:vda1
I0322 19:53:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:53:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:53:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:53:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:53:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:53:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:53:23.409778  543705 memory.go:184] no items to output this cycle
I0322 19:53:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 19:53:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:53:33.409790  543705 memory.go:184] no items to output this cycle
I0322 19:53:33.409803  543705 cpu.go:275] no items to output this cycle
I0322 19:53:36.317673  543705 disk_info.go:125] begin check local disk info of client
I0322 19:53:36.320186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:53:36.320192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d00 0xc0000c5d40]
E0322 19:53:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:53:43.410643  543705 memory.go:191] Add success.
I0322 19:53:43.409805  543705 cpu.go:282] Add success.
I0322 19:53:43.420340  543705 net.go:648] Add success.
I0322 19:53:43.423100  543705 net.go:770] primary dev: ETH0
I0322 19:53:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:53:43.423125  543705 net.go:698] Add success.
I0322 19:53:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:53:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:53:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:53:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:53:53.409782  543705 memory.go:184] no items to output this cycle
I0322 19:53:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 19:54:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:54:03.409788  543705 memory.go:184] no items to output this cycle
I0322 19:54:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 19:54:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:54:13.409821  543705 memory.go:191] Add success.
I0322 19:54:13.409830  543705 cpu.go:282] Add success.
W0322 19:54:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:54:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:54:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:54:13.420154  543705 net.go:648] Add success.
I0322 19:54:13.423265  543705 net.go:770] primary dev: ETH0
I0322 19:54:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:54:13.423295  543705 net.go:698] Add success.
I0322 19:54:13.468112  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7223385e-7220-47aa-9bdd-1a93e1e3178a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:54:13.468144  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:54:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:54:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:54:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 19:54:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:54:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 19:54:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:54:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:54:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:54:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:54:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:54:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:54:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:54:23.409777  543705 memory.go:184] no items to output this cycle
I0322 19:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 19:54:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:54:33.409804  543705 memory.go:184] no items to output this cycle
I0322 19:54:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 19:54:36.321677  543705 disk_info.go:125] begin check local disk info of client
I0322 19:54:36.324221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:54:36.324227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4e80 0xc0000c4ec0]
I0322 19:54:39.781597  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:54:39.781603  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0322 19:54:43.409814  543705 cpu.go:282] Add success.
E0322 19:54:43.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:54:43.410661  543705 memory.go:191] Add success.
I0322 19:54:43.420469  543705 net.go:648] Add success.
I0322 19:54:43.422945  543705 net.go:770] primary dev: ETH0
I0322 19:54:43.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:54:43.422977  543705 net.go:698] Add success.
I0322 19:54:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:54:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:54:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:54:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:54:53.409791  543705 memory.go:184] no items to output this cycle
I0322 19:54:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 19:55:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:55:03.409819  543705 memory.go:184] no items to output this cycle
I0322 19:55:03.409833  543705 cpu.go:275] no items to output this cycle
E0322 19:55:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:55:13.409792  543705 memory.go:191] Add success.
I0322 19:55:13.409815  543705 cpu.go:282] Add success.
W0322 19:55:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:55:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:55:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:55:13.420223  543705 net.go:648] Add success.
I0322 19:55:13.423260  543705 net.go:770] primary dev: ETH0
I0322 19:55:13.423272  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:55:13.423284  543705 net.go:698] Add success.
I0322 19:55:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:55:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:55:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 19:55:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:55:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 19:55:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:55:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:55:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:55:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:55:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:55:23.409809  543705 memory.go:184] no items to output this cycle
I0322 19:55:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 19:55:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:55:33.409793  543705 memory.go:184] no items to output this cycle
I0322 19:55:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 19:55:36.327012  543705 disk_info.go:125] begin check local disk info of client
I0322 19:55:36.329535  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:55:36.329542  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b80 0xc0000c5bc0]
E0322 19:55:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:55:43.410947  543705 memory.go:191] Add success.
I0322 19:55:43.409829  543705 cpu.go:282] Add success.
I0322 19:55:43.420657  543705 net.go:648] Add success.
I0322 19:55:43.423490  543705 net.go:770] primary dev: ETH0
I0322 19:55:43.423505  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:55:43.423519  543705 net.go:698] Add success.
I0322 19:55:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:55:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:55:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:55:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:55:53.409778  543705 memory.go:184] no items to output this cycle
I0322 19:55:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 19:56:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:56:03.409815  543705 memory.go:184] no items to output this cycle
I0322 19:56:03.409824  543705 cpu.go:275] no items to output this cycle
W0322 19:56:13.409702  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:56:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:56:13.409724  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 19:56:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:56:13.409808  543705 cpu.go:282] Add success.
I0322 19:56:13.409824  543705 memory.go:191] Add success.
I0322 19:56:13.419884  543705 net.go:770] primary dev: ETH0
I0322 19:56:13.419898  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:56:13.419911  543705 net.go:698] Add success.
I0322 19:56:13.420260  543705 net.go:648] Add success.
I0322 19:56:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:56:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:56:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 19:56:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:56:14.456733  543705 disk_worker.go:494] system disk:vda1
I0322 19:56:14.456764  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:56:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:56:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:56:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:56:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:56:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:56:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:56:23.409761  543705 memory.go:184] no items to output this cycle
I0322 19:56:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 19:56:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:56:33.409786  543705 memory.go:184] no items to output this cycle
I0322 19:56:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 19:56:36.329681  543705 disk_info.go:125] begin check local disk info of client
I0322 19:56:36.332248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:56:36.332254  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482e80 0xc000482ec0]
E0322 19:56:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:56:43.410805  543705 memory.go:191] Add success.
I0322 19:56:43.409823  543705 cpu.go:282] Add success.
I0322 19:56:43.420507  543705 net.go:648] Add success.
I0322 19:56:43.422983  543705 net.go:770] primary dev: ETH0
I0322 19:56:43.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:56:43.423009  543705 net.go:698] Add success.
I0322 19:56:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:56:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:56:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:56:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:56:53.409788  543705 cpu.go:275] no items to output this cycle
I0322 19:56:53.409797  543705 memory.go:184] no items to output this cycle
E0322 19:57:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:57:03.409785  543705 cpu.go:275] no items to output this cycle
I0322 19:57:03.409788  543705 memory.go:184] no items to output this cycle
W0322 19:57:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:57:13.409731  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:57:13.409737  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 19:57:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:57:13.409838  543705 cpu.go:282] Add success.
I0322 19:57:13.409850  543705 memory.go:191] Add success.
I0322 19:57:13.420071  543705 net.go:648] Add success.
I0322 19:57:13.422718  543705 net.go:770] primary dev: ETH0
I0322 19:57:13.422731  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:57:13.422743  543705 net.go:698] Add success.
I0322 19:57:13.429065  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 19:57:13.453243  543705 event_worker.go:152] Polling the log file for events...
I0322 19:57:13.463651  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"48a50073-d50c-459b-8ec5-33985ca4126b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 19:57:13.463684  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 19:57:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:57:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:57:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 19:57:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0322 19:57:14.456952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 19:57:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 19:57:14.456996  543705 disk_worker.go:494] system disk:vda1
I0322 19:57:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 19:57:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 19:57:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:57:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 19:57:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 19:57:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:57:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:57:16.472112  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:57:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:57:23.409802  543705 memory.go:184] no items to output this cycle
I0322 19:57:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 19:57:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:57:33.409771  543705 memory.go:184] no items to output this cycle
I0322 19:57:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 19:57:36.333677  543705 disk_info.go:125] begin check local disk info of client
I0322 19:57:36.336205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:57:36.336211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004825c0 0xc000482600]
I0322 19:57:39.781729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 19:57:39.781735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 19:57:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:57:43.410650  543705 memory.go:191] Add success.
I0322 19:57:43.409810  543705 cpu.go:282] Add success.
I0322 19:57:43.420349  543705 net.go:648] Add success.
I0322 19:57:43.423375  543705 net.go:770] primary dev: ETH0
I0322 19:57:43.423388  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:57:43.423400  543705 net.go:698] Add success.
I0322 19:57:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:57:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:57:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:57:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:57:53.409778  543705 memory.go:184] no items to output this cycle
I0322 19:57:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 19:58:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:58:03.409806  543705 memory.go:184] no items to output this cycle
I0322 19:58:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 19:58:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:58:13.409783  543705 memory.go:191] Add success.
I0322 19:58:13.409807  543705 cpu.go:282] Add success.
W0322 19:58:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:58:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:58:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:58:13.420055  543705 net.go:648] Add success.
I0322 19:58:13.422864  543705 net.go:770] primary dev: ETH0
I0322 19:58:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:58:13.422892  543705 net.go:698] Add success.
I0322 19:58:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:58:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:58:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 19:58:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:58:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 19:58:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:58:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:58:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:58:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:58:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:58:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:58:23.409769  543705 memory.go:184] no items to output this cycle
I0322 19:58:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 19:58:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:58:33.409784  543705 memory.go:184] no items to output this cycle
I0322 19:58:33.409806  543705 cpu.go:275] no items to output this cycle
I0322 19:58:36.337674  543705 disk_info.go:125] begin check local disk info of client
I0322 19:58:36.340183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:58:36.340189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025bb40 0xc00025bb80]
E0322 19:58:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:58:43.410560  543705 memory.go:191] Add success.
I0322 19:58:43.409838  543705 cpu.go:282] Add success.
I0322 19:58:43.420263  543705 net.go:648] Add success.
I0322 19:58:43.422743  543705 net.go:770] primary dev: ETH0
I0322 19:58:43.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:58:43.422773  543705 net.go:698] Add success.
I0322 19:58:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:58:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:58:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:58:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:58:53.409790  543705 memory.go:184] no items to output this cycle
I0322 19:58:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 19:59:03.409974  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:59:03.409993  543705 memory.go:184] no items to output this cycle
I0322 19:59:03.410003  543705 cpu.go:275] no items to output this cycle
E0322 19:59:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:59:13.409823  543705 memory.go:191] Add success.
I0322 19:59:13.409831  543705 cpu.go:282] Add success.
W0322 19:59:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 19:59:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 19:59:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 19:59:13.420166  543705 net.go:648] Add success.
I0322 19:59:13.423328  543705 net.go:770] primary dev: ETH0
I0322 19:59:13.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:59:13.423357  543705 net.go:698] Add success.
I0322 19:59:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 19:59:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 19:59:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 19:59:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 19:59:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 19:59:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 19:59:15.456018  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 19:59:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:59:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:59:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 19:59:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0322 19:59:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:59:23.409799  543705 memory.go:184] no items to output this cycle
I0322 19:59:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 19:59:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:59:33.409774  543705 memory.go:184] no items to output this cycle
I0322 19:59:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 19:59:36.342089  543705 disk_info.go:125] begin check local disk info of client
I0322 19:59:36.344595  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 19:59:36.344601  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312e40 0xc000312e80]
E0322 19:59:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:59:43.410794  543705 memory.go:191] Add success.
I0322 19:59:43.409815  543705 cpu.go:282] Add success.
I0322 19:59:43.420589  543705 net.go:648] Add success.
I0322 19:59:43.423704  543705 net.go:770] primary dev: ETH0
I0322 19:59:43.423722  543705 net.go:802] Send network stats successfully!,count is 6
I0322 19:59:43.423742  543705 net.go:698] Add success.
I0322 19:59:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 19:59:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 19:59:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 19:59:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 19:59:53.409788  543705 memory.go:184] no items to output this cycle
I0322 19:59:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 20:00:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:00:03.409774  543705 memory.go:184] no items to output this cycle
I0322 20:00:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:00:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:00:13.409828  543705 memory.go:191] Add success.
I0322 20:00:13.409834  543705 cpu.go:282] Add success.
W0322 20:00:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:00:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:00:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:00:13.420120  543705 net.go:648] Add success.
I0322 20:00:13.422904  543705 net.go:770] primary dev: ETH0
I0322 20:00:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:00:13.422931  543705 net.go:698] Add success.
I0322 20:00:13.468616  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"92c4c4b5-73ce-489d-84f1-71ae4f658267","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:00:13.468652  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:00:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:00:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:00:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 20:00:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:00:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 20:00:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:00:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:00:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:00:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:00:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:00:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:00:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:00:23.409796  543705 memory.go:184] no items to output this cycle
I0322 20:00:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:00:33.410725  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:00:33.410741  543705 cpu.go:275] no items to output this cycle
I0322 20:00:33.410743  543705 memory.go:184] no items to output this cycle
I0322 20:00:36.345679  543705 disk_info.go:125] begin check local disk info of client
I0322 20:00:36.348255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:00:36.348261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b640 0xc00007b680]
I0322 20:00:39.783617  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:00:39.783624  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:00:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:00:43.410750  543705 memory.go:191] Add success.
I0322 20:00:43.409815  543705 cpu.go:282] Add success.
I0322 20:00:43.420478  543705 net.go:648] Add success.
I0322 20:00:43.423812  543705 net.go:770] primary dev: ETH0
I0322 20:00:43.423826  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:00:43.423838  543705 net.go:698] Add success.
I0322 20:00:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:00:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:00:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:00:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:00:53.409779  543705 memory.go:184] no items to output this cycle
I0322 20:00:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 20:01:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:01:03.409809  543705 memory.go:184] no items to output this cycle
I0322 20:01:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 20:01:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:01:13.409796  543705 memory.go:191] Add success.
I0322 20:01:13.409810  543705 cpu.go:282] Add success.
W0322 20:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:01:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:01:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:01:13.420051  543705 net.go:648] Add success.
I0322 20:01:13.422863  543705 net.go:770] primary dev: ETH0
I0322 20:01:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:01:13.422887  543705 net.go:698] Add success.
I0322 20:01:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:01:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:01:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 20:01:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:01:14.456550  543705 disk_worker.go:494] system disk:vda1
I0322 20:01:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:01:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:01:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:01:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:01:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:01:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:01:23.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:01:23.410244  543705 memory.go:184] no items to output this cycle
I0322 20:01:23.410273  543705 cpu.go:275] no items to output this cycle
E0322 20:01:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:01:33.409816  543705 memory.go:184] no items to output this cycle
I0322 20:01:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 20:01:36.349672  543705 disk_info.go:125] begin check local disk info of client
I0322 20:01:36.352235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:01:36.352242  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051bf40 0xc000482000]
E0322 20:01:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:01:43.410685  543705 memory.go:191] Add success.
I0322 20:01:43.409808  543705 cpu.go:282] Add success.
I0322 20:01:43.420373  543705 net.go:648] Add success.
I0322 20:01:43.423034  543705 net.go:770] primary dev: ETH0
I0322 20:01:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:01:43.423058  543705 net.go:698] Add success.
I0322 20:01:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:01:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:01:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:01:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:01:53.409804  543705 memory.go:184] no items to output this cycle
I0322 20:01:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 20:02:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:02:03.409781  543705 cpu.go:275] no items to output this cycle
I0322 20:02:03.409786  543705 memory.go:184] no items to output this cycle
E0322 20:02:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:02:13.409821  543705 memory.go:191] Add success.
I0322 20:02:13.409823  543705 cpu.go:282] Add success.
W0322 20:02:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:02:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:02:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:02:13.420074  543705 net.go:648] Add success.
I0322 20:02:13.422642  543705 net.go:770] primary dev: ETH0
I0322 20:02:13.422655  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:02:13.422671  543705 net.go:698] Add success.
W0322 20:02:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:02:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 20:02:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:02:14.455915  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:02:14.455923  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:02:14.455929  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:02:14.456537  543705 disk_worker.go:494] system disk:vda1
I0322 20:02:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:02:15.456781  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:02:15.456791  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:02:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:02:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:02:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:02:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:02:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:02:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:02:23.409767  543705 memory.go:184] no items to output this cycle
I0322 20:02:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 20:02:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:02:33.409810  543705 memory.go:184] no items to output this cycle
I0322 20:02:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 20:02:36.353672  543705 disk_info.go:125] begin check local disk info of client
I0322 20:02:36.356301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:02:36.356308  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b7c0 0xc00007b800]
E0322 20:02:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:02:43.410802  543705 memory.go:191] Add success.
I0322 20:02:43.409804  543705 cpu.go:282] Add success.
I0322 20:02:43.420511  543705 net.go:648] Add success.
I0322 20:02:43.423298  543705 net.go:770] primary dev: ETH0
I0322 20:02:43.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:02:43.423325  543705 net.go:698] Add success.
I0322 20:02:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:02:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:02:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:02:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:02:53.409769  543705 memory.go:184] no items to output this cycle
I0322 20:02:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 20:03:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:03:03.409767  543705 memory.go:184] no items to output this cycle
I0322 20:03:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:03:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:03:13.409809  543705 memory.go:191] Add success.
I0322 20:03:13.409811  543705 cpu.go:282] Add success.
W0322 20:03:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:03:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:03:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:03:13.420288  543705 net.go:648] Add success.
I0322 20:03:13.423021  543705 net.go:770] primary dev: ETH0
I0322 20:03:13.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:03:13.423047  543705 net.go:698] Add success.
I0322 20:03:13.468609  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"630cc159-b786-4e4c-b7cb-c555a212abbe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:03:13.468645  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:03:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:03:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:03:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 20:03:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:03:14.456598  543705 disk_worker.go:494] system disk:vda1
I0322 20:03:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:03:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:03:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:03:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:03:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:03:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:03:23.409798  543705 cpu.go:275] no items to output this cycle
I0322 20:03:23.409799  543705 memory.go:184] no items to output this cycle
E0322 20:03:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:03:33.409783  543705 memory.go:184] no items to output this cycle
I0322 20:03:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 20:03:36.357672  543705 disk_info.go:125] begin check local disk info of client
I0322 20:03:36.360166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:03:36.360172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000482400 0xc000482440]
I0322 20:03:39.784680  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:03:39.784686  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:03:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:03:43.410568  543705 memory.go:191] Add success.
I0322 20:03:43.409798  543705 cpu.go:282] Add success.
I0322 20:03:43.420261  543705 net.go:648] Add success.
I0322 20:03:43.422766  543705 net.go:770] primary dev: ETH0
I0322 20:03:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:03:43.422791  543705 net.go:698] Add success.
I0322 20:03:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:03:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:03:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:03:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:03:53.409785  543705 cpu.go:275] no items to output this cycle
I0322 20:03:53.409793  543705 memory.go:184] no items to output this cycle
E0322 20:04:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:04:03.409783  543705 memory.go:184] no items to output this cycle
I0322 20:04:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:04:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:04:13.409809  543705 memory.go:191] Add success.
I0322 20:04:13.409821  543705 cpu.go:282] Add success.
W0322 20:04:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:04:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:04:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:04:13.420127  543705 net.go:648] Add success.
I0322 20:04:13.423057  543705 net.go:770] primary dev: ETH0
I0322 20:04:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:04:13.423084  543705 net.go:698] Add success.
I0322 20:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:04:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:04:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0322 20:04:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:04:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 20:04:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:04:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:04:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:04:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:04:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:04:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:04:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:04:23.409865  543705 memory.go:184] no items to output this cycle
I0322 20:04:23.409903  543705 cpu.go:275] no items to output this cycle
E0322 20:04:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:04:33.409809  543705 memory.go:184] no items to output this cycle
I0322 20:04:33.409826  543705 cpu.go:275] no items to output this cycle
I0322 20:04:36.361674  543705 disk_info.go:125] begin check local disk info of client
I0322 20:04:36.364259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:04:36.364265  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4780 0xc0000c47c0]
E0322 20:04:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:04:43.410600  543705 memory.go:191] Add success.
I0322 20:04:43.409795  543705 cpu.go:282] Add success.
I0322 20:04:43.420298  543705 net.go:648] Add success.
I0322 20:04:43.422866  543705 net.go:770] primary dev: ETH0
I0322 20:04:43.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:04:43.422892  543705 net.go:698] Add success.
I0322 20:04:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:04:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:04:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:04:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:04:53.409775  543705 memory.go:184] no items to output this cycle
I0322 20:04:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 20:05:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:05:03.409777  543705 memory.go:184] no items to output this cycle
I0322 20:05:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 20:05:13.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:05:13.409904  543705 memory.go:191] Add success.
I0322 20:05:13.409924  543705 cpu.go:282] Add success.
W0322 20:05:13.409953  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:05:13.409983  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:05:13.409990  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:05:13.420433  543705 net.go:648] Add success.
I0322 20:05:13.422987  543705 net.go:770] primary dev: ETH0
I0322 20:05:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:05:13.423013  543705 net.go:698] Add success.
I0322 20:05:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:05:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:05:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 20:05:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:05:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 20:05:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:05:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:05:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:05:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:05:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:05:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:05:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:05:23.409800  543705 memory.go:184] no items to output this cycle
I0322 20:05:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 20:05:33.409854  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:05:33.409915  543705 memory.go:184] no items to output this cycle
I0322 20:05:33.410022  543705 cpu.go:275] no items to output this cycle
I0322 20:05:36.365673  543705 disk_info.go:125] begin check local disk info of client
I0322 20:05:36.368175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:05:36.368180  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c98c0 0xc0003c9900]
E0322 20:05:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:05:43.410697  543705 memory.go:191] Add success.
I0322 20:05:43.409792  543705 cpu.go:282] Add success.
I0322 20:05:43.420469  543705 net.go:648] Add success.
I0322 20:05:43.423477  543705 net.go:770] primary dev: ETH0
I0322 20:05:43.423490  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:05:43.423502  543705 net.go:698] Add success.
I0322 20:05:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:05:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:05:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:05:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:05:53.409790  543705 memory.go:184] no items to output this cycle
I0322 20:05:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 20:06:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:06:03.409795  543705 memory.go:184] no items to output this cycle
I0322 20:06:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:06:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:06:13.409794  543705 cpu.go:282] Add success.
I0322 20:06:13.409796  543705 memory.go:191] Add success.
W0322 20:06:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:06:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:06:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:06:13.420199  543705 net.go:648] Add success.
I0322 20:06:13.422981  543705 net.go:770] primary dev: ETH0
I0322 20:06:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:06:13.423006  543705 net.go:698] Add success.
I0322 20:06:13.541839  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"85e936e2-8aab-47d6-920e-4478c35a5fb6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:06:13.541873  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:06:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:06:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:06:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 20:06:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:06:14.456613  543705 disk_worker.go:494] system disk:vda1
I0322 20:06:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:06:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:06:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:06:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:06:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:06:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:06:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:06:23.409889  543705 memory.go:184] no items to output this cycle
I0322 20:06:23.409986  543705 cpu.go:275] no items to output this cycle
E0322 20:06:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:06:33.409788  543705 memory.go:184] no items to output this cycle
I0322 20:06:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 20:06:36.369673  543705 disk_info.go:125] begin check local disk info of client
I0322 20:06:36.372219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:06:36.372225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c49c0 0xc0000c4a00]
I0322 20:06:39.785727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:06:39.785733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:06:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:06:43.410652  543705 memory.go:191] Add success.
I0322 20:06:43.409821  543705 cpu.go:282] Add success.
I0322 20:06:43.420337  543705 net.go:648] Add success.
I0322 20:06:43.422826  543705 net.go:770] primary dev: ETH0
I0322 20:06:43.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:06:43.422852  543705 net.go:698] Add success.
I0322 20:06:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:06:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:06:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:06:53.409786  543705 memory.go:184] no items to output this cycle
I0322 20:06:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 20:07:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:07:03.409799  543705 memory.go:184] no items to output this cycle
I0322 20:07:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 20:07:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:07:13.409788  543705 memory.go:191] Add success.
I0322 20:07:13.409791  543705 cpu.go:282] Add success.
W0322 20:07:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:07:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:07:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:07:13.420003  543705 net.go:648] Add success.
I0322 20:07:13.422813  543705 net.go:770] primary dev: ETH0
I0322 20:07:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:07:13.422839  543705 net.go:698] Add success.
I0322 20:07:13.453379  543705 event_worker.go:152] Polling the log file for events...
W0322 20:07:14.454776  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:07:14.454788  543705 disk_worker.go:708] disk space is not compliant
W0322 20:07:14.454791  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:07:14.456566  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:07:14.456576  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:07:14.456582  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:07:14.456636  543705 disk_worker.go:494] system disk:vda1
I0322 20:07:14.456678  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:07:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:07:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:07:16.457906  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:07:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:07:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:07:16.457980  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:07:16.472302  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:07:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:07:23.409770  543705 memory.go:184] no items to output this cycle
I0322 20:07:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 20:07:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:07:33.409786  543705 memory.go:184] no items to output this cycle
I0322 20:07:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 20:07:36.373672  543705 disk_info.go:125] begin check local disk info of client
I0322 20:07:36.376283  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:07:36.376289  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0322 20:07:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:07:43.409824  543705 memory.go:191] Add success.
I0322 20:07:43.409796  543705 cpu.go:282] Add success.
I0322 20:07:43.420048  543705 net.go:648] Add success.
I0322 20:07:43.421075  543705 net.go:770] primary dev: ETH0
I0322 20:07:43.421088  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:07:43.421100  543705 net.go:698] Add success.
I0322 20:07:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:07:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:07:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:07:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:07:53.409768  543705 memory.go:184] no items to output this cycle
I0322 20:07:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 20:08:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:08:03.409785  543705 memory.go:184] no items to output this cycle
I0322 20:08:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 20:08:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:08:13.409806  543705 memory.go:191] Add success.
I0322 20:08:13.409814  543705 cpu.go:282] Add success.
W0322 20:08:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:08:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:08:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:08:13.420154  543705 net.go:648] Add success.
I0322 20:08:13.423009  543705 net.go:770] primary dev: ETH0
I0322 20:08:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:08:13.423035  543705 net.go:698] Add success.
I0322 20:08:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:08:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:08:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 20:08:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:08:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 20:08:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:08:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:08:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:08:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:08:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:08:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:08:23.409791  543705 memory.go:184] no items to output this cycle
I0322 20:08:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:08:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:08:33.409775  543705 memory.go:184] no items to output this cycle
I0322 20:08:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 20:08:36.377675  543705 disk_info.go:125] begin check local disk info of client
I0322 20:08:36.380203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:08:36.380209  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483d40 0xc000483d80]
E0322 20:08:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:08:43.410704  543705 memory.go:191] Add success.
I0322 20:08:43.409796  543705 cpu.go:282] Add success.
I0322 20:08:43.420412  543705 net.go:648] Add success.
I0322 20:08:43.423043  543705 net.go:770] primary dev: ETH0
I0322 20:08:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:08:43.423072  543705 net.go:698] Add success.
I0322 20:08:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:08:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:08:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:08:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:08:53.409798  543705 memory.go:184] no items to output this cycle
I0322 20:08:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:09:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:09:03.409765  543705 memory.go:184] no items to output this cycle
I0322 20:09:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 20:09:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:09:13.409800  543705 memory.go:191] Add success.
I0322 20:09:13.409801  543705 cpu.go:282] Add success.
W0322 20:09:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:09:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:09:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:09:13.420208  543705 net.go:648] Add success.
I0322 20:09:13.422934  543705 net.go:770] primary dev: ETH0
I0322 20:09:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:09:13.422961  543705 net.go:698] Add success.
I0322 20:09:13.463684  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b1c95e90-90a0-424a-af11-bbdfc331f9e5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:09:13.463717  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:09:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:09:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:09:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 20:09:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:09:14.456490  543705 disk_worker.go:494] system disk:vda1
I0322 20:09:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:09:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:09:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:09:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:09:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:09:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:09:23.409780  543705 memory.go:184] no items to output this cycle
I0322 20:09:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 20:09:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:09:33.409788  543705 memory.go:184] no items to output this cycle
I0322 20:09:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 20:09:36.381673  543705 disk_info.go:125] begin check local disk info of client
I0322 20:09:36.384173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:09:36.384178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab080 0xc0001ab0c0]
I0322 20:09:39.787629  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:09:39.787635  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:09:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:09:43.410633  543705 memory.go:191] Add success.
I0322 20:09:43.409799  543705 cpu.go:282] Add success.
I0322 20:09:43.420419  543705 net.go:648] Add success.
I0322 20:09:43.422967  543705 net.go:770] primary dev: ETH0
I0322 20:09:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:09:43.422995  543705 net.go:698] Add success.
I0322 20:09:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:09:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:09:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:09:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:09:53.409768  543705 memory.go:184] no items to output this cycle
I0322 20:09:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:10:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:10:03.409806  543705 memory.go:184] no items to output this cycle
I0322 20:10:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 20:10:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:10:13.409775  543705 memory.go:191] Add success.
W0322 20:10:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:10:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:10:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:10:13.409824  543705 cpu.go:282] Add success.
I0322 20:10:13.420171  543705 net.go:648] Add success.
I0322 20:10:13.423258  543705 net.go:770] primary dev: ETH0
I0322 20:10:13.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:10:13.423283  543705 net.go:698] Add success.
I0322 20:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:10:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:10:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 20:10:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:10:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 20:10:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:10:15.455916  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:10:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:10:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:10:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:10:23.409765  543705 memory.go:184] no items to output this cycle
I0322 20:10:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 20:10:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:10:33.409786  543705 memory.go:184] no items to output this cycle
I0322 20:10:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 20:10:36.385671  543705 disk_info.go:125] begin check local disk info of client
I0322 20:10:36.388183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:10:36.388189  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b280 0xc00007b2c0]
E0322 20:10:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:10:43.410648  543705 memory.go:191] Add success.
I0322 20:10:43.409806  543705 cpu.go:282] Add success.
I0322 20:10:43.420342  543705 net.go:648] Add success.
I0322 20:10:43.423214  543705 net.go:770] primary dev: ETH0
I0322 20:10:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:10:43.423242  543705 net.go:698] Add success.
I0322 20:10:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:10:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:10:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:10:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:10:53.409776  543705 memory.go:184] no items to output this cycle
I0322 20:10:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 20:11:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:11:03.409771  543705 memory.go:184] no items to output this cycle
I0322 20:11:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 20:11:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:11:13.409810  543705 memory.go:191] Add success.
I0322 20:11:13.409817  543705 cpu.go:282] Add success.
W0322 20:11:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:11:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:11:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:11:13.420074  543705 net.go:648] Add success.
I0322 20:11:13.422871  543705 net.go:770] primary dev: ETH0
I0322 20:11:13.422885  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:11:13.422900  543705 net.go:698] Add success.
I0322 20:11:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:11:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:11:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 20:11:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:11:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 20:11:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:11:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:11:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:11:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:11:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:11:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:11:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:11:23.409770  543705 memory.go:184] no items to output this cycle
I0322 20:11:23.409774  543705 cpu.go:275] no items to output this cycle
E0322 20:11:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:11:33.409776  543705 memory.go:184] no items to output this cycle
I0322 20:11:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 20:11:36.392000  543705 disk_info.go:125] begin check local disk info of client
I0322 20:11:36.394585  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:11:36.394591  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483a00 0xc000483a40]
E0322 20:11:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:11:43.410909  543705 memory.go:191] Add success.
I0322 20:11:43.409824  543705 cpu.go:282] Add success.
I0322 20:11:43.420625  543705 net.go:648] Add success.
I0322 20:11:43.424308  543705 net.go:770] primary dev: ETH0
I0322 20:11:43.424321  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:11:43.424334  543705 net.go:698] Add success.
I0322 20:11:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:11:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:11:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:11:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:11:53.409772  543705 memory.go:184] no items to output this cycle
I0322 20:11:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 20:12:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:12:03.409802  543705 memory.go:184] no items to output this cycle
I0322 20:12:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:12:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:12:13.409809  543705 memory.go:191] Add success.
I0322 20:12:13.409820  543705 cpu.go:282] Add success.
W0322 20:12:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:12:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:12:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:12:13.420044  543705 net.go:648] Add success.
I0322 20:12:13.422647  543705 net.go:770] primary dev: ETH0
I0322 20:12:13.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:12:13.422675  543705 net.go:698] Add success.
I0322 20:12:13.537930  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49622f6f-50e6-4d0b-8a55-795b133485a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:12:13.537962  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 20:12:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:12:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0322 20:12:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:12:14.456554  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:12:14.456572  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:12:14.456576  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:12:14.457639  543705 disk_worker.go:494] system disk:vda1
I0322 20:12:14.457685  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:12:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:12:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:12:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:12:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:12:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:12:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:12:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:12:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:12:23.409761  543705 memory.go:184] no items to output this cycle
I0322 20:12:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 20:12:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:12:33.409788  543705 memory.go:184] no items to output this cycle
I0322 20:12:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 20:12:36.397671  543705 disk_info.go:125] begin check local disk info of client
I0322 20:12:36.400188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:12:36.400194  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b9c0 0xc00007ba00]
I0322 20:12:39.788654  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:12:39.788661  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:12:43.410479  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:12:43.411338  543705 memory.go:191] Add success.
I0322 20:12:43.410520  543705 cpu.go:282] Add success.
I0322 20:12:43.420045  543705 net.go:648] Add success.
I0322 20:12:43.422583  543705 net.go:770] primary dev: ETH0
I0322 20:12:43.422612  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:12:43.422625  543705 net.go:698] Add success.
I0322 20:12:46.458022  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:12:46.458107  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:12:46.458144  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:12:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:12:53.409785  543705 memory.go:184] no items to output this cycle
I0322 20:12:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:13:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:13:03.409775  543705 memory.go:184] no items to output this cycle
I0322 20:13:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 20:13:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:13:13.409809  543705 memory.go:191] Add success.
I0322 20:13:13.409820  543705 cpu.go:282] Add success.
W0322 20:13:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:13:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:13:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:13:13.420088  543705 net.go:648] Add success.
I0322 20:13:13.422976  543705 net.go:770] primary dev: ETH0
I0322 20:13:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:13:13.423005  543705 net.go:698] Add success.
I0322 20:13:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:13:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:13:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 20:13:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:13:14.459165  543705 disk_worker.go:494] system disk:vda1
I0322 20:13:14.459198  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:13:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:13:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:13:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:13:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:13:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:13:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:13:23.409769  543705 memory.go:184] no items to output this cycle
I0322 20:13:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:13:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:13:33.409776  543705 memory.go:184] no items to output this cycle
I0322 20:13:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 20:13:36.402119  543705 disk_info.go:125] begin check local disk info of client
I0322 20:13:36.404617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:13:36.404623  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a500 0xc00027a540]
E0322 20:13:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:13:43.410603  543705 memory.go:191] Add success.
I0322 20:13:43.409808  543705 cpu.go:282] Add success.
I0322 20:13:43.420380  543705 net.go:648] Add success.
I0322 20:13:43.422770  543705 net.go:770] primary dev: ETH0
I0322 20:13:43.422788  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:13:43.422804  543705 net.go:698] Add success.
I0322 20:13:46.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:13:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:13:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:13:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:13:53.409783  543705 memory.go:184] no items to output this cycle
I0322 20:13:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 20:14:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:14:03.409798  543705 memory.go:184] no items to output this cycle
I0322 20:14:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:14:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:14:13.409809  543705 memory.go:191] Add success.
I0322 20:14:13.409815  543705 cpu.go:282] Add success.
W0322 20:14:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:14:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:14:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:14:13.420235  543705 net.go:648] Add success.
I0322 20:14:13.423308  543705 net.go:770] primary dev: ETH0
I0322 20:14:13.423322  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:14:13.423333  543705 net.go:698] Add success.
I0322 20:14:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:14:14.455364  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:14:14.455461  543705 disk_worker.go:708] disk space is not compliant
W0322 20:14:14.455472  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:14:14.457551  543705 disk_worker.go:494] system disk:vda1
I0322 20:14:14.457592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:14:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:14:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:14:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:14:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:14:23.409796  543705 memory.go:184] no items to output this cycle
I0322 20:14:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 20:14:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:14:33.409804  543705 memory.go:184] no items to output this cycle
I0322 20:14:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 20:14:36.405678  543705 disk_info.go:125] begin check local disk info of client
I0322 20:14:36.408216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:14:36.408222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0322 20:14:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:14:43.410736  543705 memory.go:191] Add success.
I0322 20:14:43.409802  543705 cpu.go:282] Add success.
I0322 20:14:43.420442  543705 net.go:648] Add success.
I0322 20:14:43.422955  543705 net.go:770] primary dev: ETH0
I0322 20:14:43.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:14:43.422986  543705 net.go:698] Add success.
I0322 20:14:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:14:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:14:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:14:53.409778  543705 memory.go:184] no items to output this cycle
I0322 20:14:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 20:15:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:15:03.409794  543705 memory.go:184] no items to output this cycle
I0322 20:15:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:15:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:15:13.409778  543705 memory.go:191] Add success.
I0322 20:15:13.409796  543705 cpu.go:282] Add success.
W0322 20:15:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:15:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:15:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:15:13.420057  543705 net.go:648] Add success.
I0322 20:15:13.422824  543705 net.go:770] primary dev: ETH0
I0322 20:15:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:15:13.422858  543705 net.go:698] Add success.
I0322 20:15:13.463723  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5a4aec63-a565-41e8-977c-df5beb725349","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:15:13.463755  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:15:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:15:14.455344  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:15:14.455437  543705 disk_worker.go:708] disk space is not compliant
W0322 20:15:14.455442  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:15:14.457073  543705 disk_worker.go:494] system disk:vda1
I0322 20:15:14.457117  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:15:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:15:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:15:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:15:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:15:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:15:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:15:23.409764  543705 memory.go:184] no items to output this cycle
I0322 20:15:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 20:15:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:15:33.409781  543705 cpu.go:275] no items to output this cycle
I0322 20:15:33.409795  543705 memory.go:184] no items to output this cycle
I0322 20:15:36.409676  543705 disk_info.go:125] begin check local disk info of client
I0322 20:15:36.412217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:15:36.412223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5540 0xc0000c5580]
I0322 20:15:39.789661  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:15:39.789669  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:15:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:15:43.410605  543705 memory.go:191] Add success.
I0322 20:15:43.409808  543705 cpu.go:282] Add success.
I0322 20:15:43.420377  543705 net.go:648] Add success.
I0322 20:15:43.422835  543705 net.go:770] primary dev: ETH0
I0322 20:15:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:15:43.422861  543705 net.go:698] Add success.
I0322 20:15:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:15:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:15:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:15:53.409775  543705 memory.go:184] no items to output this cycle
I0322 20:15:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 20:16:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:16:03.409777  543705 memory.go:184] no items to output this cycle
I0322 20:16:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 20:16:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:16:13.409786  543705 memory.go:191] Add success.
W0322 20:16:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 20:16:13.409816  543705 cpu.go:282] Add success.
W0322 20:16:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:16:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:16:13.420255  543705 net.go:648] Add success.
I0322 20:16:13.422885  543705 net.go:770] primary dev: ETH0
I0322 20:16:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:16:13.422911  543705 net.go:698] Add success.
I0322 20:16:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:16:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:16:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 20:16:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:16:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 20:16:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:16:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:16:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:16:16.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:16:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:16:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:16:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:16:23.409766  543705 memory.go:184] no items to output this cycle
I0322 20:16:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 20:16:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:16:33.409780  543705 memory.go:184] no items to output this cycle
I0322 20:16:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 20:16:36.412794  543705 disk_info.go:125] begin check local disk info of client
I0322 20:16:36.415403  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:16:36.415410  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a6c0 0xc00046a700]
E0322 20:16:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:16:43.410744  543705 memory.go:191] Add success.
I0322 20:16:43.409819  543705 cpu.go:282] Add success.
I0322 20:16:43.420442  543705 net.go:648] Add success.
I0322 20:16:43.423015  543705 net.go:770] primary dev: ETH0
I0322 20:16:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:16:43.423042  543705 net.go:698] Add success.
I0322 20:16:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:16:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:16:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:16:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:16:53.409804  543705 memory.go:184] no items to output this cycle
I0322 20:16:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:17:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:17:03.409772  543705 memory.go:184] no items to output this cycle
I0322 20:17:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 20:17:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:17:13.409811  543705 memory.go:191] Add success.
I0322 20:17:13.409817  543705 cpu.go:282] Add success.
W0322 20:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:17:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:17:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:17:13.420226  543705 net.go:648] Add success.
I0322 20:17:13.423119  543705 net.go:770] primary dev: ETH0
I0322 20:17:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:17:13.423154  543705 net.go:698] Add success.
I0322 20:17:13.452958  543705 event_worker.go:152] Polling the log file for events...
W0322 20:17:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:17:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 20:17:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:17:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:17:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:17:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:17:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 20:17:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:17:15.456867  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:17:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:17:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:17:16.457965  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:17:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:17:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:17:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:17:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:17:23.409800  543705 memory.go:184] no items to output this cycle
I0322 20:17:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:17:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:17:33.409808  543705 memory.go:184] no items to output this cycle
I0322 20:17:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 20:17:36.415804  543705 disk_info.go:125] begin check local disk info of client
I0322 20:17:36.418252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:17:36.418258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc580 0xc0002bc5c0]
E0322 20:17:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:17:43.410756  543705 memory.go:191] Add success.
I0322 20:17:43.409800  543705 cpu.go:282] Add success.
I0322 20:17:43.420480  543705 net.go:648] Add success.
I0322 20:17:43.423336  543705 net.go:770] primary dev: ETH0
I0322 20:17:43.423350  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:17:43.423362  543705 net.go:698] Add success.
I0322 20:17:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:17:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:17:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:17:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:17:53.409777  543705 memory.go:184] no items to output this cycle
I0322 20:17:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 20:18:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:18:03.409800  543705 memory.go:184] no items to output this cycle
I0322 20:18:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:18:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:18:13.409786  543705 memory.go:191] Add success.
I0322 20:18:13.409807  543705 cpu.go:282] Add success.
W0322 20:18:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:18:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:18:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:18:13.420044  543705 net.go:648] Add success.
I0322 20:18:13.422808  543705 net.go:770] primary dev: ETH0
I0322 20:18:13.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:18:13.422834  543705 net.go:698] Add success.
I0322 20:18:13.463394  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2cc2dc47-eaca-4af3-8239-55b6b0f7455d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:18:13.463429  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:18:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:18:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 20:18:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:18:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 20:18:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:18:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:18:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:18:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:18:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:18:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:18:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 20:18:23.409781  543705 memory.go:184] no items to output this cycle
E0322 20:18:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:18:33.409774  543705 memory.go:184] no items to output this cycle
I0322 20:18:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 20:18:36.418804  543705 disk_info.go:125] begin check local disk info of client
I0322 20:18:36.421287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:18:36.421294  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039b000 0xc00039b080]
I0322 20:18:39.789802  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:18:39.789808  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:18:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:18:43.410660  543705 memory.go:191] Add success.
I0322 20:18:43.409812  543705 cpu.go:282] Add success.
I0322 20:18:43.420393  543705 net.go:648] Add success.
I0322 20:18:43.423272  543705 net.go:770] primary dev: ETH0
I0322 20:18:43.423288  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:18:43.423303  543705 net.go:698] Add success.
I0322 20:18:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:18:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:18:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:18:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:18:53.409811  543705 memory.go:184] no items to output this cycle
I0322 20:18:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 20:19:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:19:03.409773  543705 memory.go:184] no items to output this cycle
I0322 20:19:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 20:19:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:19:13.409814  543705 memory.go:191] Add success.
I0322 20:19:13.409823  543705 cpu.go:282] Add success.
W0322 20:19:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:19:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:19:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:19:13.420511  543705 net.go:648] Add success.
I0322 20:19:13.423315  543705 net.go:770] primary dev: ETH0
I0322 20:19:13.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:19:13.423340  543705 net.go:698] Add success.
I0322 20:19:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:19:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:19:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 20:19:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:19:14.456512  543705 disk_worker.go:494] system disk:vda1
I0322 20:19:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:19:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:19:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:19:23.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:19:23.409868  543705 memory.go:184] no items to output this cycle
I0322 20:19:23.409975  543705 cpu.go:275] no items to output this cycle
E0322 20:19:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:19:33.409785  543705 memory.go:184] no items to output this cycle
I0322 20:19:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 20:19:36.421798  543705 disk_info.go:125] begin check local disk info of client
I0322 20:19:36.424262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:19:36.424269  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 20:19:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:19:43.410762  543705 memory.go:191] Add success.
I0322 20:19:43.409820  543705 cpu.go:282] Add success.
I0322 20:19:43.420467  543705 net.go:648] Add success.
I0322 20:19:43.423259  543705 net.go:770] primary dev: ETH0
I0322 20:19:43.423274  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:19:43.423288  543705 net.go:698] Add success.
I0322 20:19:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:19:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:19:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:19:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:19:53.409795  543705 cpu.go:275] no items to output this cycle
I0322 20:19:53.409803  543705 memory.go:184] no items to output this cycle
E0322 20:20:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:20:03.409787  543705 cpu.go:275] no items to output this cycle
I0322 20:20:03.409792  543705 memory.go:184] no items to output this cycle
E0322 20:20:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:20:13.409819  543705 memory.go:191] Add success.
I0322 20:20:13.409831  543705 cpu.go:282] Add success.
W0322 20:20:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:20:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:20:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:20:13.420085  543705 net.go:648] Add success.
I0322 20:20:13.422940  543705 net.go:770] primary dev: ETH0
I0322 20:20:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:20:13.422966  543705 net.go:698] Add success.
I0322 20:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:20:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:20:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 20:20:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:20:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 20:20:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:20:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:20:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:20:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:20:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:20:23.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:20:23.409913  543705 cpu.go:275] no items to output this cycle
I0322 20:20:23.409923  543705 memory.go:184] no items to output this cycle
E0322 20:20:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:20:33.409805  543705 memory.go:184] no items to output this cycle
I0322 20:20:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 20:20:36.424819  543705 disk_info.go:125] begin check local disk info of client
I0322 20:20:36.427329  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:20:36.427335  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b740 0xc00032b780]
E0322 20:20:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:20:43.410894  543705 memory.go:191] Add success.
I0322 20:20:43.409836  543705 cpu.go:282] Add success.
I0322 20:20:43.420613  543705 net.go:648] Add success.
I0322 20:20:43.423453  543705 net.go:770] primary dev: ETH0
I0322 20:20:43.423466  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:20:43.423478  543705 net.go:698] Add success.
I0322 20:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:20:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:20:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:20:53.410465  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:20:53.410486  543705 memory.go:184] no items to output this cycle
I0322 20:20:53.410497  543705 cpu.go:275] no items to output this cycle
E0322 20:21:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:21:03.409775  543705 memory.go:184] no items to output this cycle
I0322 20:21:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:21:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:21:13.409805  543705 memory.go:191] Add success.
I0322 20:21:13.409805  543705 cpu.go:282] Add success.
W0322 20:21:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:21:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:21:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:21:13.420167  543705 net.go:648] Add success.
I0322 20:21:13.423035  543705 net.go:770] primary dev: ETH0
I0322 20:21:13.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:21:13.423070  543705 net.go:698] Add success.
I0322 20:21:13.566140  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af97fc9a-9f77-41d5-8012-a49ac2ac4429","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:21:13.566173  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:21:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:21:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:21:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 20:21:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:21:14.456717  543705 disk_worker.go:494] system disk:vda1
I0322 20:21:14.456748  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:21:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:21:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:21:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:21:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:21:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:21:23.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:21:23.409902  543705 memory.go:184] no items to output this cycle
I0322 20:21:23.409947  543705 cpu.go:275] no items to output this cycle
E0322 20:21:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:21:33.409778  543705 memory.go:184] no items to output this cycle
I0322 20:21:33.409830  543705 cpu.go:275] no items to output this cycle
I0322 20:21:36.427829  543705 disk_info.go:125] begin check local disk info of client
I0322 20:21:36.430349  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:21:36.430356  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0322 20:21:39.789939  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:21:39.789945  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:21:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:21:43.410766  543705 memory.go:191] Add success.
I0322 20:21:43.409819  543705 cpu.go:282] Add success.
I0322 20:21:43.420500  543705 net.go:648] Add success.
I0322 20:21:43.423397  543705 net.go:770] primary dev: ETH0
I0322 20:21:43.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:21:43.423427  543705 net.go:698] Add success.
I0322 20:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:21:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:21:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:21:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:21:53.409780  543705 memory.go:184] no items to output this cycle
I0322 20:21:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 20:22:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:22:03.409801  543705 memory.go:184] no items to output this cycle
I0322 20:22:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 20:22:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:22:13.409787  543705 memory.go:191] Add success.
I0322 20:22:13.409802  543705 cpu.go:282] Add success.
W0322 20:22:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:22:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:22:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:22:13.420162  543705 net.go:648] Add success.
I0322 20:22:13.423081  543705 net.go:770] primary dev: ETH0
I0322 20:22:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:22:13.423106  543705 net.go:698] Add success.
W0322 20:22:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:22:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 20:22:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:22:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:22:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:22:14.456943  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:22:14.457018  543705 disk_worker.go:494] system disk:vda1
I0322 20:22:14.457049  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:22:15.456928  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:22:15.456937  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:22:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:22:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:22:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:22:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:22:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:22:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:22:23.409794  543705 memory.go:184] no items to output this cycle
I0322 20:22:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 20:22:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:22:33.409781  543705 memory.go:184] no items to output this cycle
I0322 20:22:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 20:22:36.430852  543705 disk_info.go:125] begin check local disk info of client
I0322 20:22:36.433357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:22:36.433363  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae00 0xc00007ae40]
E0322 20:22:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:22:43.410600  543705 memory.go:191] Add success.
I0322 20:22:43.409815  543705 cpu.go:282] Add success.
I0322 20:22:43.420296  543705 net.go:648] Add success.
I0322 20:22:43.424044  543705 net.go:770] primary dev: ETH0
I0322 20:22:43.424058  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:22:43.424071  543705 net.go:698] Add success.
I0322 20:22:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:22:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:22:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:22:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:22:53.409789  543705 cpu.go:275] no items to output this cycle
I0322 20:22:53.409792  543705 memory.go:184] no items to output this cycle
E0322 20:23:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:23:03.409770  543705 memory.go:184] no items to output this cycle
I0322 20:23:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 20:23:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:23:13.409783  543705 memory.go:191] Add success.
I0322 20:23:13.409801  543705 cpu.go:282] Add success.
W0322 20:23:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:23:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:23:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:23:13.420227  543705 net.go:648] Add success.
I0322 20:23:13.423026  543705 net.go:770] primary dev: ETH0
I0322 20:23:13.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:23:13.423052  543705 net.go:698] Add success.
I0322 20:23:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:23:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:23:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 20:23:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:23:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 20:23:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:23:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:23:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:23:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:23:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:23:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:23:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:23:23.409779  543705 memory.go:184] no items to output this cycle
I0322 20:23:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 20:23:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:23:33.409810  543705 memory.go:184] no items to output this cycle
I0322 20:23:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 20:23:36.433863  543705 disk_info.go:125] begin check local disk info of client
I0322 20:23:36.436352  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:23:36.436358  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344480 0xc0003444c0]
E0322 20:23:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:23:43.410609  543705 memory.go:191] Add success.
I0322 20:23:43.409802  543705 cpu.go:282] Add success.
I0322 20:23:43.420284  543705 net.go:648] Add success.
I0322 20:23:43.422914  543705 net.go:770] primary dev: ETH0
I0322 20:23:43.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:23:43.422938  543705 net.go:698] Add success.
I0322 20:23:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:23:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:23:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:23:53.409792  543705 memory.go:184] no items to output this cycle
I0322 20:23:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 20:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:24:03.409778  543705 memory.go:184] no items to output this cycle
I0322 20:24:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 20:24:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:24:13.409809  543705 memory.go:191] Add success.
I0322 20:24:13.409813  543705 cpu.go:282] Add success.
W0322 20:24:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:24:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:24:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:24:13.420282  543705 net.go:648] Add success.
I0322 20:24:13.423057  543705 net.go:770] primary dev: ETH0
I0322 20:24:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:24:13.423085  543705 net.go:698] Add success.
I0322 20:24:13.464304  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"415c7d45-a161-45d5-8633-0f05e8469c09","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:24:13.464336  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:24:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:24:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:24:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0322 20:24:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:24:14.456826  543705 disk_worker.go:494] system disk:vda1
I0322 20:24:14.456858  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:24:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:24:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:24:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:24:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:24:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:24:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:24:23.409798  543705 memory.go:184] no items to output this cycle
I0322 20:24:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:24:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:24:33.409809  543705 memory.go:184] no items to output this cycle
I0322 20:24:33.409823  543705 cpu.go:275] no items to output this cycle
I0322 20:24:36.436882  543705 disk_info.go:125] begin check local disk info of client
I0322 20:24:36.439496  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:24:36.439503  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a640 0xc00035a680]
I0322 20:24:39.791638  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:24:39.791644  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:24:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:24:43.410661  543705 memory.go:191] Add success.
I0322 20:24:43.409807  543705 cpu.go:282] Add success.
I0322 20:24:43.420358  543705 net.go:648] Add success.
I0322 20:24:43.423133  543705 net.go:770] primary dev: ETH0
I0322 20:24:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:24:43.423161  543705 net.go:698] Add success.
I0322 20:24:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:24:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:24:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:24:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:24:53.409782  543705 memory.go:184] no items to output this cycle
I0322 20:24:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 20:25:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:25:03.409775  543705 memory.go:184] no items to output this cycle
I0322 20:25:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 20:25:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:25:13.409780  543705 memory.go:191] Add success.
I0322 20:25:13.409802  543705 cpu.go:282] Add success.
W0322 20:25:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:25:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:25:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:25:13.420281  543705 net.go:648] Add success.
I0322 20:25:13.423330  543705 net.go:770] primary dev: ETH0
I0322 20:25:13.423342  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:25:13.423353  543705 net.go:698] Add success.
I0322 20:25:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:25:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:25:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 20:25:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:25:14.457432  543705 disk_worker.go:494] system disk:vda1
I0322 20:25:14.457477  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:25:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:25:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:25:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:25:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:25:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:25:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:25:23.409795  543705 memory.go:184] no items to output this cycle
I0322 20:25:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:25:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:25:33.409793  543705 memory.go:184] no items to output this cycle
I0322 20:25:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 20:25:36.439888  543705 disk_info.go:125] begin check local disk info of client
I0322 20:25:36.442411  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:25:36.442418  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386b40 0xc000386b80]
E0322 20:25:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:25:43.410692  543705 memory.go:191] Add success.
I0322 20:25:43.409813  543705 cpu.go:282] Add success.
I0322 20:25:43.420467  543705 net.go:648] Add success.
I0322 20:25:43.423020  543705 net.go:770] primary dev: ETH0
I0322 20:25:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:25:43.423050  543705 net.go:698] Add success.
I0322 20:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:25:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:25:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:25:53.409805  543705 memory.go:184] no items to output this cycle
I0322 20:25:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:26:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:26:03.409779  543705 memory.go:184] no items to output this cycle
I0322 20:26:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 20:26:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:26:13.409790  543705 memory.go:191] Add success.
W0322 20:26:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 20:26:13.409822  543705 cpu.go:282] Add success.
W0322 20:26:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:26:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:26:13.420236  543705 net.go:648] Add success.
I0322 20:26:13.422712  543705 net.go:770] primary dev: ETH0
I0322 20:26:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:26:13.422739  543705 net.go:698] Add success.
I0322 20:26:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:26:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:26:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0322 20:26:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:26:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 20:26:14.456760  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:26:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:26:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:26:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:26:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:26:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:26:23.409763  543705 memory.go:184] no items to output this cycle
I0322 20:26:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 20:26:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:26:33.409809  543705 memory.go:184] no items to output this cycle
I0322 20:26:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 20:26:36.442904  543705 disk_info.go:125] begin check local disk info of client
I0322 20:26:36.445473  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:26:36.445479  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374e80 0xc000374ec0]
E0322 20:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:26:43.410707  543705 memory.go:191] Add success.
I0322 20:26:43.409809  543705 cpu.go:282] Add success.
I0322 20:26:43.420448  543705 net.go:648] Add success.
I0322 20:26:43.423269  543705 net.go:770] primary dev: ETH0
I0322 20:26:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:26:43.423297  543705 net.go:698] Add success.
I0322 20:26:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:26:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:26:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:26:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:26:53.409776  543705 memory.go:184] no items to output this cycle
I0322 20:26:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:27:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:27:03.409774  543705 memory.go:184] no items to output this cycle
I0322 20:27:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 20:27:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:27:13.409796  543705 memory.go:191] Add success.
I0322 20:27:13.409796  543705 cpu.go:282] Add success.
W0322 20:27:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:27:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:27:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:27:13.420285  543705 net.go:648] Add success.
I0322 20:27:13.423222  543705 net.go:770] primary dev: ETH0
I0322 20:27:13.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:27:13.423270  543705 net.go:698] Add success.
I0322 20:27:13.429556  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 20:27:13.453723  543705 event_worker.go:152] Polling the log file for events...
I0322 20:27:13.468727  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75b183e9-4e0a-487f-bd2e-0583aa3c274f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:27:13.468770  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 20:27:14.455338  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:27:14.455351  543705 disk_worker.go:708] disk space is not compliant
W0322 20:27:14.455354  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:27:14.457779  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:27:14.457785  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:27:14.457789  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:27:14.457812  543705 disk_worker.go:494] system disk:vda1
I0322 20:27:14.457857  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:27:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:27:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:27:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:27:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:27:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:27:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:27:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:27:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:27:23.409801  543705 memory.go:184] no items to output this cycle
I0322 20:27:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:27:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:27:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 20:27:33.409786  543705 memory.go:184] no items to output this cycle
I0322 20:27:36.445915  543705 disk_info.go:125] begin check local disk info of client
I0322 20:27:36.448417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:27:36.448423  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a6c80 0xc0002a6cc0]
I0322 20:27:39.791782  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:27:39.791789  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:27:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:27:43.410722  543705 memory.go:191] Add success.
I0322 20:27:43.409797  543705 cpu.go:282] Add success.
I0322 20:27:43.420426  543705 net.go:648] Add success.
I0322 20:27:43.423406  543705 net.go:770] primary dev: ETH0
I0322 20:27:43.423419  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:27:43.423431  543705 net.go:698] Add success.
I0322 20:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:27:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:27:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:27:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:27:53.409773  543705 memory.go:184] no items to output this cycle
I0322 20:27:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 20:28:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:28:03.409770  543705 memory.go:184] no items to output this cycle
I0322 20:28:03.409791  543705 cpu.go:275] no items to output this cycle
E0322 20:28:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:28:13.409795  543705 memory.go:191] Add success.
I0322 20:28:13.409798  543705 cpu.go:282] Add success.
W0322 20:28:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:28:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:28:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:28:13.420189  543705 net.go:648] Add success.
I0322 20:28:13.422882  543705 net.go:770] primary dev: ETH0
I0322 20:28:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:28:13.422911  543705 net.go:698] Add success.
I0322 20:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:28:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:28:14.455355  543705 disk_worker.go:708] disk space is not compliant
W0322 20:28:14.455360  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:28:14.457032  543705 disk_worker.go:494] system disk:vda1
I0322 20:28:14.457060  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:28:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:28:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:28:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:28:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:28:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:28:23.409777  543705 memory.go:184] no items to output this cycle
I0322 20:28:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 20:28:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:28:33.409812  543705 memory.go:184] no items to output this cycle
I0322 20:28:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 20:28:36.448933  543705 disk_info.go:125] begin check local disk info of client
I0322 20:28:36.451541  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:28:36.451546  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e0680 0xc0003e06c0]
E0322 20:28:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:28:43.410695  543705 memory.go:191] Add success.
I0322 20:28:43.409794  543705 cpu.go:282] Add success.
I0322 20:28:43.420403  543705 net.go:648] Add success.
I0322 20:28:43.423019  543705 net.go:770] primary dev: ETH0
I0322 20:28:43.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:28:43.423046  543705 net.go:698] Add success.
I0322 20:28:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:28:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:28:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:28:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:28:53.409786  543705 memory.go:184] no items to output this cycle
I0322 20:28:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 20:29:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:29:03.409794  543705 memory.go:184] no items to output this cycle
I0322 20:29:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:29:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:29:13.409781  543705 memory.go:191] Add success.
I0322 20:29:13.409803  543705 cpu.go:282] Add success.
W0322 20:29:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:29:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:29:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:29:13.420037  543705 net.go:648] Add success.
I0322 20:29:13.422985  543705 net.go:770] primary dev: ETH0
I0322 20:29:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:29:13.423010  543705 net.go:698] Add success.
I0322 20:29:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:29:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:29:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 20:29:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:29:14.456559  543705 disk_worker.go:494] system disk:vda1
I0322 20:29:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:29:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:29:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:29:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:29:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:29:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:29:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:29:23.409775  543705 memory.go:184] no items to output this cycle
I0322 20:29:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 20:29:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:29:33.409782  543705 memory.go:184] no items to output this cycle
I0322 20:29:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 20:29:36.451952  543705 disk_info.go:125] begin check local disk info of client
I0322 20:29:36.454495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:29:36.454502  543705 disk_info.go:196] parse disk info done, disk is : [0xc000374680 0xc0003746c0]
E0322 20:29:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:29:43.410511  543705 memory.go:191] Add success.
I0322 20:29:43.409814  543705 cpu.go:282] Add success.
I0322 20:29:43.420206  543705 net.go:648] Add success.
I0322 20:29:43.423118  543705 net.go:770] primary dev: ETH0
I0322 20:29:43.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:29:43.423142  543705 net.go:698] Add success.
I0322 20:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:29:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:29:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:29:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:29:53.409785  543705 memory.go:184] no items to output this cycle
I0322 20:29:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 20:30:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:30:03.409797  543705 memory.go:184] no items to output this cycle
I0322 20:30:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:30:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:30:13.409782  543705 memory.go:191] Add success.
I0322 20:30:13.409808  543705 cpu.go:282] Add success.
W0322 20:30:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:30:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:30:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:30:13.420059  543705 net.go:648] Add success.
I0322 20:30:13.422722  543705 net.go:770] primary dev: ETH0
I0322 20:30:13.422735  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:30:13.422747  543705 net.go:698] Add success.
I0322 20:30:13.526589  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f84a0836-7bf1-4702-95e3-b210dfeceaa0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:30:13.526622  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:30:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:30:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:30:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 20:30:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:30:14.456612  543705 disk_worker.go:494] system disk:vda1
I0322 20:30:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:30:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:30:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:30:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:30:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:30:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:30:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:30:23.409782  543705 memory.go:184] no items to output this cycle
I0322 20:30:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:30:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:30:33.409818  543705 memory.go:184] no items to output this cycle
I0322 20:30:33.409830  543705 cpu.go:275] no items to output this cycle
I0322 20:30:36.454966  543705 disk_info.go:125] begin check local disk info of client
I0322 20:30:36.457566  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:30:36.457574  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8480 0xc0003c84c0]
I0322 20:30:39.791932  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:30:39.791939  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:30:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:30:43.410707  543705 memory.go:191] Add success.
I0322 20:30:43.409822  543705 cpu.go:282] Add success.
I0322 20:30:43.420509  543705 net.go:648] Add success.
I0322 20:30:43.423135  543705 net.go:770] primary dev: ETH0
I0322 20:30:43.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:30:43.423163  543705 net.go:698] Add success.
I0322 20:30:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:30:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:30:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:30:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:30:53.409788  543705 memory.go:184] no items to output this cycle
I0322 20:30:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:31:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:31:03.409813  543705 memory.go:184] no items to output this cycle
I0322 20:31:03.409828  543705 cpu.go:275] no items to output this cycle
E0322 20:31:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:31:13.409787  543705 memory.go:191] Add success.
W0322 20:31:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 20:31:13.409824  543705 cpu.go:282] Add success.
W0322 20:31:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:31:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:31:13.420140  543705 net.go:648] Add success.
I0322 20:31:13.422602  543705 net.go:770] primary dev: ETH0
I0322 20:31:13.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:31:13.422628  543705 net.go:698] Add success.
I0322 20:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:31:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:31:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 20:31:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:31:14.456826  543705 disk_worker.go:494] system disk:vda1
I0322 20:31:14.456854  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:31:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:31:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:31:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:31:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:31:23.409794  543705 memory.go:184] no items to output this cycle
I0322 20:31:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 20:31:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:31:33.409798  543705 memory.go:184] no items to output this cycle
I0322 20:31:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 20:31:36.457983  543705 disk_info.go:125] begin check local disk info of client
I0322 20:31:36.460485  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:31:36.460492  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004eabc0 0xc0004eac00]
E0322 20:31:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:31:43.410606  543705 memory.go:191] Add success.
I0322 20:31:43.409819  543705 cpu.go:282] Add success.
I0322 20:31:43.420295  543705 net.go:648] Add success.
I0322 20:31:43.423023  543705 net.go:770] primary dev: ETH0
I0322 20:31:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:31:43.423052  543705 net.go:698] Add success.
I0322 20:31:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:31:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:31:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:31:53.409785  543705 memory.go:184] no items to output this cycle
I0322 20:31:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 20:32:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:32:03.409800  543705 memory.go:184] no items to output this cycle
I0322 20:32:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 20:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:32:13.409789  543705 memory.go:191] Add success.
I0322 20:32:13.409798  543705 cpu.go:282] Add success.
W0322 20:32:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:32:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:32:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:32:13.420193  543705 net.go:648] Add success.
I0322 20:32:13.422923  543705 net.go:770] primary dev: ETH0
I0322 20:32:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:32:13.422948  543705 net.go:698] Add success.
W0322 20:32:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:32:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 20:32:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:32:14.455890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:32:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:32:14.455905  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:32:14.456633  543705 disk_worker.go:494] system disk:vda1
I0322 20:32:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:32:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:32:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:32:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:32:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:32:16.457973  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:32:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:32:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:32:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:32:23.409788  543705 memory.go:184] no items to output this cycle
I0322 20:32:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 20:32:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:32:33.409793  543705 memory.go:184] no items to output this cycle
I0322 20:32:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 20:32:36.460996  543705 disk_info.go:125] begin check local disk info of client
I0322 20:32:36.463610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:32:36.463617  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358a00 0xc000358a40]
E0322 20:32:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:32:43.410682  543705 memory.go:191] Add success.
I0322 20:32:43.409794  543705 cpu.go:282] Add success.
I0322 20:32:43.420496  543705 net.go:648] Add success.
I0322 20:32:43.423267  543705 net.go:770] primary dev: ETH0
I0322 20:32:43.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:32:43.423308  543705 net.go:698] Add success.
I0322 20:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:32:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:32:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:32:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:32:53.409774  543705 memory.go:184] no items to output this cycle
I0322 20:32:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:33:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:33:03.409801  543705 memory.go:184] no items to output this cycle
I0322 20:33:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 20:33:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:33:13.409786  543705 memory.go:191] Add success.
I0322 20:33:13.409807  543705 cpu.go:282] Add success.
W0322 20:33:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:33:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:33:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:33:13.420061  543705 net.go:648] Add success.
I0322 20:33:13.422657  543705 net.go:770] primary dev: ETH0
I0322 20:33:13.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:33:13.422687  543705 net.go:698] Add success.
I0322 20:33:13.628920  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"378c5a35-3fcf-4d56-8db8-8e8278115598","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:33:13.628972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:33:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:33:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:33:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 20:33:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:33:14.456664  543705 disk_worker.go:494] system disk:vda1
I0322 20:33:14.456760  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:33:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:33:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:33:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:33:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:33:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:33:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:33:23.409805  543705 memory.go:184] no items to output this cycle
I0322 20:33:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:33:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:33:33.409776  543705 memory.go:184] no items to output this cycle
I0322 20:33:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 20:33:36.464010  543705 disk_info.go:125] begin check local disk info of client
I0322 20:33:36.466555  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:33:36.466561  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be480 0xc0003be4c0]
I0322 20:33:39.793671  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:33:39.793678  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:33:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:33:43.410658  543705 memory.go:191] Add success.
I0322 20:33:43.409824  543705 cpu.go:282] Add success.
I0322 20:33:43.420347  543705 net.go:648] Add success.
I0322 20:33:43.422860  543705 net.go:770] primary dev: ETH0
I0322 20:33:43.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:33:43.422886  543705 net.go:698] Add success.
I0322 20:33:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:33:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:33:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:33:53.409772  543705 memory.go:184] no items to output this cycle
I0322 20:33:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 20:34:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:34:03.409775  543705 memory.go:184] no items to output this cycle
I0322 20:34:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 20:34:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:34:13.409782  543705 memory.go:191] Add success.
I0322 20:34:13.409803  543705 cpu.go:282] Add success.
W0322 20:34:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:34:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:34:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:34:13.420168  543705 net.go:648] Add success.
I0322 20:34:13.422900  543705 net.go:770] primary dev: ETH0
I0322 20:34:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:34:13.422925  543705 net.go:698] Add success.
I0322 20:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:34:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:34:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 20:34:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:34:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 20:34:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:34:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:34:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:34:16.472516  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:34:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:34:23.409805  543705 memory.go:184] no items to output this cycle
I0322 20:34:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:34:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:34:33.409796  543705 memory.go:184] no items to output this cycle
I0322 20:34:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 20:34:36.467029  543705 disk_info.go:125] begin check local disk info of client
I0322 20:34:36.469614  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:34:36.469620  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002acec0 0xc0002acf00]
E0322 20:34:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:34:43.410943  543705 memory.go:191] Add success.
I0322 20:34:43.409816  543705 cpu.go:282] Add success.
I0322 20:34:43.420646  543705 net.go:648] Add success.
I0322 20:34:43.423295  543705 net.go:770] primary dev: ETH0
I0322 20:34:43.423308  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:34:43.423320  543705 net.go:698] Add success.
I0322 20:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:34:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:34:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:34:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:34:53.409775  543705 memory.go:184] no items to output this cycle
I0322 20:34:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 20:35:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:35:03.409777  543705 memory.go:184] no items to output this cycle
I0322 20:35:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 20:35:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:35:13.409787  543705 memory.go:191] Add success.
I0322 20:35:13.409787  543705 cpu.go:282] Add success.
W0322 20:35:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:35:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:35:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:35:13.420258  543705 net.go:648] Add success.
I0322 20:35:13.423212  543705 net.go:770] primary dev: ETH0
I0322 20:35:13.423225  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:35:13.423236  543705 net.go:698] Add success.
I0322 20:35:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:35:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:35:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 20:35:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:35:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 20:35:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:35:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:35:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:35:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:35:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:35:23.409907  543705 cpu.go:275] no items to output this cycle
I0322 20:35:23.409948  543705 memory.go:184] no items to output this cycle
E0322 20:35:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:35:33.409790  543705 memory.go:184] no items to output this cycle
I0322 20:35:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 20:35:36.470034  543705 disk_info.go:125] begin check local disk info of client
I0322 20:35:36.472560  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:35:36.472566  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 20:35:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:35:43.410662  543705 memory.go:191] Add success.
I0322 20:35:43.409830  543705 cpu.go:282] Add success.
I0322 20:35:43.420376  543705 net.go:648] Add success.
I0322 20:35:43.422915  543705 net.go:770] primary dev: ETH0
I0322 20:35:43.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:35:43.422939  543705 net.go:698] Add success.
I0322 20:35:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:35:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:35:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:35:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:35:53.409796  543705 cpu.go:275] no items to output this cycle
I0322 20:35:53.409808  543705 memory.go:184] no items to output this cycle
E0322 20:36:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:36:03.409796  543705 memory.go:184] no items to output this cycle
I0322 20:36:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:36:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:36:13.409784  543705 memory.go:191] Add success.
W0322 20:36:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 20:36:13.409816  543705 cpu.go:282] Add success.
W0322 20:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:36:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:36:13.420250  543705 net.go:648] Add success.
I0322 20:36:13.423044  543705 net.go:770] primary dev: ETH0
I0322 20:36:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:36:13.423068  543705 net.go:698] Add success.
I0322 20:36:13.468673  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc600769-7418-405a-803f-9dcd37345225","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:36:13.468705  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:36:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:36:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 20:36:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:36:14.456561  543705 disk_worker.go:494] system disk:vda1
I0322 20:36:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:36:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:36:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:36:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:36:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:36:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:36:23.409773  543705 memory.go:184] no items to output this cycle
I0322 20:36:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:36:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:36:33.409778  543705 memory.go:184] no items to output this cycle
I0322 20:36:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 20:36:36.473058  543705 disk_info.go:125] begin check local disk info of client
I0322 20:36:36.475704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:36:36.475712  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2d40 0xc0003b2d80]
I0322 20:36:39.795669  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:36:39.795674  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:36:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:36:43.410802  543705 memory.go:191] Add success.
I0322 20:36:43.409807  543705 cpu.go:282] Add success.
I0322 20:36:43.420497  543705 net.go:648] Add success.
I0322 20:36:43.423408  543705 net.go:770] primary dev: ETH0
I0322 20:36:43.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:36:43.423434  543705 net.go:698] Add success.
I0322 20:36:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:36:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:36:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:36:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:36:53.409808  543705 memory.go:184] no items to output this cycle
I0322 20:36:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:37:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:37:03.409782  543705 memory.go:184] no items to output this cycle
I0322 20:37:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 20:37:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:37:13.409786  543705 memory.go:191] Add success.
I0322 20:37:13.409790  543705 cpu.go:282] Add success.
W0322 20:37:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:37:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:37:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:37:13.419886  543705 net.go:770] primary dev: ETH0
I0322 20:37:13.419897  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:37:13.419909  543705 net.go:698] Add success.
I0322 20:37:13.420137  543705 net.go:648] Add success.
I0322 20:37:13.453712  543705 event_worker.go:152] Polling the log file for events...
W0322 20:37:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:37:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 20:37:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:37:14.456795  543705 disk_worker.go:494] system disk:vda1
I0322 20:37:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:37:14.457119  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:37:14.457126  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:37:14.457131  543705 custom_config.go:64] query custom config with name: gpu
E0322 20:37:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:37:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:37:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:37:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:37:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:37:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:37:16.472345  543705 disk_local_worker.go:436] Get disk info: []
I0322 20:37:23.409932  543705 cpu.go:275] no items to output this cycle
E0322 20:37:23.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:37:23.410013  543705 memory.go:184] no items to output this cycle
E0322 20:37:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:37:33.409803  543705 memory.go:184] no items to output this cycle
I0322 20:37:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 20:37:36.475802  543705 disk_info.go:125] begin check local disk info of client
I0322 20:37:36.478348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:37:36.478354  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
E0322 20:37:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:37:43.410572  543705 memory.go:191] Add success.
I0322 20:37:43.409822  543705 cpu.go:282] Add success.
I0322 20:37:43.420283  543705 net.go:648] Add success.
I0322 20:37:43.422936  543705 net.go:770] primary dev: ETH0
I0322 20:37:43.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:37:43.422961  543705 net.go:698] Add success.
I0322 20:37:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:37:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:37:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:37:53.409811  543705 memory.go:184] no items to output this cycle
I0322 20:37:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 20:38:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:38:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 20:38:03.409785  543705 memory.go:184] no items to output this cycle
E0322 20:38:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:38:13.409793  543705 memory.go:191] Add success.
I0322 20:38:13.409794  543705 cpu.go:282] Add success.
W0322 20:38:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:38:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:38:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:38:13.420108  543705 net.go:648] Add success.
I0322 20:38:13.422999  543705 net.go:770] primary dev: ETH0
I0322 20:38:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:38:13.423029  543705 net.go:698] Add success.
I0322 20:38:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:38:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:38:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 20:38:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:38:14.456530  543705 disk_worker.go:494] system disk:vda1
I0322 20:38:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:38:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:38:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:38:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:38:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:38:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:38:23.409781  543705 memory.go:184] no items to output this cycle
I0322 20:38:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 20:38:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:38:33.409775  543705 memory.go:184] no items to output this cycle
I0322 20:38:33.409811  543705 cpu.go:275] no items to output this cycle
I0322 20:38:36.479096  543705 disk_info.go:125] begin check local disk info of client
I0322 20:38:36.481608  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:38:36.481614  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387d80 0xc000387dc0]
E0322 20:38:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:38:43.410671  543705 memory.go:191] Add success.
I0322 20:38:43.409812  543705 cpu.go:282] Add success.
I0322 20:38:43.420346  543705 net.go:648] Add success.
I0322 20:38:43.422924  543705 net.go:770] primary dev: ETH0
I0322 20:38:43.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:38:43.422949  543705 net.go:698] Add success.
I0322 20:38:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:38:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:38:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:38:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:38:53.409776  543705 memory.go:184] no items to output this cycle
I0322 20:38:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 20:39:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:39:03.409796  543705 memory.go:184] no items to output this cycle
I0322 20:39:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 20:39:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:39:13.409785  543705 memory.go:191] Add success.
I0322 20:39:13.409806  543705 cpu.go:282] Add success.
W0322 20:39:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:39:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:39:13.420208  543705 net.go:648] Add success.
I0322 20:39:13.423362  543705 net.go:770] primary dev: ETH0
I0322 20:39:13.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:39:13.423401  543705 net.go:698] Add success.
I0322 20:39:13.463446  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c1e1675-ed2c-4044-a65b-d3ec0cb8d0ca","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:39:13.463478  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:39:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:39:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:39:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 20:39:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:39:14.456485  543705 disk_worker.go:494] system disk:vda1
I0322 20:39:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:39:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:39:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:39:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:39:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:39:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:39:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:39:23.409802  543705 memory.go:184] no items to output this cycle
I0322 20:39:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 20:39:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:39:33.409777  543705 memory.go:184] no items to output this cycle
I0322 20:39:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 20:39:36.482103  543705 disk_info.go:125] begin check local disk info of client
I0322 20:39:36.484624  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:39:36.484630  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af00 0xc00007af40]
I0322 20:39:39.797686  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:39:39.797693  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:39:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:39:43.410522  543705 memory.go:191] Add success.
I0322 20:39:43.409796  543705 cpu.go:282] Add success.
I0322 20:39:43.420211  543705 net.go:648] Add success.
I0322 20:39:43.422774  543705 net.go:770] primary dev: ETH0
I0322 20:39:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:39:43.422799  543705 net.go:698] Add success.
I0322 20:39:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:39:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:39:53.410477  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:39:53.410496  543705 memory.go:184] no items to output this cycle
I0322 20:39:53.410503  543705 cpu.go:275] no items to output this cycle
E0322 20:40:03.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:40:03.409893  543705 memory.go:184] no items to output this cycle
I0322 20:40:03.409914  543705 cpu.go:275] no items to output this cycle
E0322 20:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:40:13.409798  543705 cpu.go:282] Add success.
I0322 20:40:13.409802  543705 memory.go:191] Add success.
W0322 20:40:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:40:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:40:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:40:13.420047  543705 net.go:648] Add success.
I0322 20:40:13.422738  543705 net.go:770] primary dev: ETH0
I0322 20:40:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:40:13.422763  543705 net.go:698] Add success.
I0322 20:40:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:40:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:40:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 20:40:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:40:14.456529  543705 disk_worker.go:494] system disk:vda1
I0322 20:40:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:40:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:40:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:40:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:40:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:40:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:40:23.409770  543705 memory.go:184] no items to output this cycle
I0322 20:40:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:40:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:40:33.409809  543705 memory.go:184] no items to output this cycle
I0322 20:40:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 20:40:36.485118  543705 disk_info.go:125] begin check local disk info of client
I0322 20:40:36.487742  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:40:36.487747  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a7900 0xc0002a7940]
E0322 20:40:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:40:43.410676  543705 memory.go:191] Add success.
I0322 20:40:43.409802  543705 cpu.go:282] Add success.
I0322 20:40:43.420373  543705 net.go:648] Add success.
I0322 20:40:43.422999  543705 net.go:770] primary dev: ETH0
I0322 20:40:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:40:43.423031  543705 net.go:698] Add success.
I0322 20:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:40:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:40:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:40:53.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:40:53.409872  543705 memory.go:184] no items to output this cycle
I0322 20:40:53.409957  543705 cpu.go:275] no items to output this cycle
E0322 20:41:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:41:03.409785  543705 memory.go:184] no items to output this cycle
I0322 20:41:03.409793  543705 cpu.go:275] no items to output this cycle
E0322 20:41:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:41:13.409801  543705 cpu.go:282] Add success.
I0322 20:41:13.409814  543705 memory.go:191] Add success.
W0322 20:41:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:41:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:41:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:41:13.420214  543705 net.go:648] Add success.
I0322 20:41:13.422960  543705 net.go:770] primary dev: ETH0
I0322 20:41:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:41:13.422984  543705 net.go:698] Add success.
I0322 20:41:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:41:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:41:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 20:41:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:41:14.456517  543705 disk_worker.go:494] system disk:vda1
I0322 20:41:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:41:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:41:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:41:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:41:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:41:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:41:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:41:23.409767  543705 memory.go:184] no items to output this cycle
I0322 20:41:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 20:41:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:41:33.409780  543705 memory.go:184] no items to output this cycle
I0322 20:41:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 20:41:36.487832  543705 disk_info.go:125] begin check local disk info of client
I0322 20:41:36.490378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:41:36.490385  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ae800 0xc0001ae840]
E0322 20:41:43.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:41:43.410732  543705 memory.go:191] Add success.
I0322 20:41:43.409988  543705 cpu.go:282] Add success.
I0322 20:41:43.419774  543705 net.go:648] Add success.
I0322 20:41:43.422552  543705 net.go:770] primary dev: ETH0
I0322 20:41:43.422567  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:41:43.422582  543705 net.go:698] Add success.
I0322 20:41:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:41:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:41:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:41:53.409783  543705 memory.go:184] no items to output this cycle
I0322 20:41:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 20:42:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:42:03.409797  543705 memory.go:184] no items to output this cycle
I0322 20:42:03.409810  543705 cpu.go:275] no items to output this cycle
W0322 20:42:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:42:13.409725  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:42:13.409729  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 20:42:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:42:13.409816  543705 memory.go:191] Add success.
I0322 20:42:13.409824  543705 cpu.go:282] Add success.
I0322 20:42:13.420119  543705 net.go:648] Add success.
I0322 20:42:13.422757  543705 net.go:770] primary dev: ETH0
I0322 20:42:13.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:42:13.422783  543705 net.go:698] Add success.
I0322 20:42:13.463308  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0822b4a2-ad20-457d-bf22-2abf22215bfe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:42:13.463341  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 20:42:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:42:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 20:42:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:42:14.456835  543705 disk_worker.go:494] system disk:vda1
I0322 20:42:14.456877  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:42:14.457120  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:42:14.457128  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:42:14.457132  543705 custom_config.go:64] query custom config with name: gpu
E0322 20:42:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:42:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:42:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:42:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:42:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:42:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:42:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:42:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:42:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 20:42:23.409894  543705 memory.go:184] no items to output this cycle
E0322 20:42:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:42:33.409793  543705 memory.go:184] no items to output this cycle
I0322 20:42:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 20:42:36.491157  543705 disk_info.go:125] begin check local disk info of client
I0322 20:42:36.493745  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:42:36.493751  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
I0322 20:42:39.799714  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:42:39.799720  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:42:43.410660  543705 memory.go:191] Add success.
I0322 20:42:43.409814  543705 cpu.go:282] Add success.
I0322 20:42:43.420373  543705 net.go:648] Add success.
I0322 20:42:43.423122  543705 net.go:770] primary dev: ETH0
I0322 20:42:43.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:42:43.423149  543705 net.go:698] Add success.
I0322 20:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:42:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:42:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:42:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:42:53.409790  543705 memory.go:184] no items to output this cycle
I0322 20:42:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:43:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:43:03.409807  543705 memory.go:184] no items to output this cycle
I0322 20:43:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 20:43:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:43:13.409792  543705 memory.go:191] Add success.
I0322 20:43:13.409812  543705 cpu.go:282] Add success.
W0322 20:43:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:43:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:43:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:43:13.420049  543705 net.go:648] Add success.
I0322 20:43:13.422901  543705 net.go:770] primary dev: ETH0
I0322 20:43:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:43:13.422928  543705 net.go:698] Add success.
I0322 20:43:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:43:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:43:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 20:43:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:43:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 20:43:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:43:15.456022  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:43:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:43:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:43:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:43:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:43:23.409782  543705 memory.go:184] no items to output this cycle
I0322 20:43:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 20:43:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:43:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 20:43:33.409810  543705 memory.go:184] no items to output this cycle
I0322 20:43:36.494109  543705 disk_info.go:125] begin check local disk info of client
I0322 20:43:36.496620  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:43:36.496626  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462dc0 0xc000462e00]
E0322 20:43:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:43:43.410528  543705 memory.go:191] Add success.
I0322 20:43:43.409808  543705 cpu.go:282] Add success.
I0322 20:43:43.420231  543705 net.go:648] Add success.
I0322 20:43:43.423017  543705 net.go:770] primary dev: ETH0
I0322 20:43:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:43:43.423055  543705 net.go:698] Add success.
I0322 20:43:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:43:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:43:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:43:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:43:53.409789  543705 memory.go:184] no items to output this cycle
I0322 20:43:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 20:44:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:44:03.409788  543705 memory.go:184] no items to output this cycle
I0322 20:44:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 20:44:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:44:13.409809  543705 memory.go:191] Add success.
I0322 20:44:13.409811  543705 cpu.go:282] Add success.
W0322 20:44:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:44:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:44:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:44:13.420267  543705 net.go:648] Add success.
I0322 20:44:13.423073  543705 net.go:770] primary dev: ETH0
I0322 20:44:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:44:13.423098  543705 net.go:698] Add success.
I0322 20:44:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:44:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:44:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 20:44:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:44:14.456858  543705 disk_worker.go:494] system disk:vda1
I0322 20:44:14.456888  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:44:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:44:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:44:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:44:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:44:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:44:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:44:23.409772  543705 memory.go:184] no items to output this cycle
I0322 20:44:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:44:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:44:33.409784  543705 memory.go:184] no items to output this cycle
I0322 20:44:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 20:44:36.497177  543705 disk_info.go:125] begin check local disk info of client
I0322 20:44:36.499746  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:44:36.499752  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005783c0 0xc000578400]
E0322 20:44:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:44:43.410628  543705 memory.go:191] Add success.
I0322 20:44:43.409801  543705 cpu.go:282] Add success.
I0322 20:44:43.420299  543705 net.go:648] Add success.
I0322 20:44:43.422966  543705 net.go:770] primary dev: ETH0
I0322 20:44:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:44:43.422991  543705 net.go:698] Add success.
I0322 20:44:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:44:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:44:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:44:53.409792  543705 memory.go:184] no items to output this cycle
I0322 20:44:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 20:45:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:45:03.409778  543705 memory.go:184] no items to output this cycle
I0322 20:45:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 20:45:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:45:13.409790  543705 memory.go:191] Add success.
I0322 20:45:13.409809  543705 cpu.go:282] Add success.
W0322 20:45:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:45:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:45:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:45:13.420284  543705 net.go:648] Add success.
I0322 20:45:13.423557  543705 net.go:770] primary dev: ETH0
I0322 20:45:13.423569  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:45:13.423581  543705 net.go:698] Add success.
I0322 20:45:13.463822  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc369eac-9d19-40e2-bace-4d91d9972c62","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:45:13.463855  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:45:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:45:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:45:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 20:45:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:45:14.456519  543705 disk_worker.go:494] system disk:vda1
I0322 20:45:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:45:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:45:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:45:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:45:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:45:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:45:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:45:23.409799  543705 memory.go:184] no items to output this cycle
I0322 20:45:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:45:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:45:33.409771  543705 memory.go:184] no items to output this cycle
I0322 20:45:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 20:45:36.500137  543705 disk_info.go:125] begin check local disk info of client
I0322 20:45:36.502712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:45:36.502718  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aec0 0xc00007af00]
I0322 20:45:39.799855  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:45:39.799862  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:45:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:45:43.410783  543705 memory.go:191] Add success.
I0322 20:45:43.409813  543705 cpu.go:282] Add success.
I0322 20:45:43.420473  543705 net.go:648] Add success.
I0322 20:45:43.423316  543705 net.go:770] primary dev: ETH0
I0322 20:45:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:45:43.423341  543705 net.go:698] Add success.
I0322 20:45:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:45:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:45:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:45:53.409772  543705 memory.go:184] no items to output this cycle
I0322 20:45:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 20:46:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:46:03.409779  543705 memory.go:184] no items to output this cycle
I0322 20:46:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 20:46:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:46:13.409798  543705 memory.go:191] Add success.
I0322 20:46:13.409799  543705 cpu.go:282] Add success.
W0322 20:46:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:46:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:46:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:46:13.420166  543705 net.go:648] Add success.
I0322 20:46:13.423314  543705 net.go:770] primary dev: ETH0
I0322 20:46:13.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:46:13.423340  543705 net.go:698] Add success.
I0322 20:46:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:46:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:46:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 20:46:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:46:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 20:46:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:46:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:46:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:46:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:46:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:46:23.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:46:23.409883  543705 memory.go:184] no items to output this cycle
I0322 20:46:23.409976  543705 cpu.go:275] no items to output this cycle
E0322 20:46:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:46:33.409795  543705 memory.go:184] no items to output this cycle
I0322 20:46:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 20:46:36.502803  543705 disk_info.go:125] begin check local disk info of client
I0322 20:46:36.505383  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:46:36.505390  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 20:46:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:46:43.410650  543705 memory.go:191] Add success.
I0322 20:46:43.409801  543705 cpu.go:282] Add success.
I0322 20:46:43.420323  543705 net.go:648] Add success.
I0322 20:46:43.423024  543705 net.go:770] primary dev: ETH0
I0322 20:46:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:46:43.423049  543705 net.go:698] Add success.
I0322 20:46:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:46:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:46:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:46:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:46:53.409779  543705 memory.go:184] no items to output this cycle
I0322 20:46:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 20:47:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:47:03.409800  543705 memory.go:184] no items to output this cycle
I0322 20:47:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:47:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:47:13.409789  543705 memory.go:191] Add success.
I0322 20:47:13.409794  543705 cpu.go:282] Add success.
W0322 20:47:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:47:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:47:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:47:13.420152  543705 net.go:648] Add success.
I0322 20:47:13.422932  543705 net.go:770] primary dev: ETH0
I0322 20:47:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:47:13.422960  543705 net.go:698] Add success.
I0322 20:47:13.453516  543705 event_worker.go:152] Polling the log file for events...
W0322 20:47:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:47:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 20:47:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:47:14.456937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:47:14.456945  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:47:14.456952  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:47:14.457031  543705 disk_worker.go:494] system disk:vda1
I0322 20:47:14.457074  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:47:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:47:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:47:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:47:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:47:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:47:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:47:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:47:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:47:23.409793  543705 memory.go:184] no items to output this cycle
I0322 20:47:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:47:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:47:33.409786  543705 memory.go:184] no items to output this cycle
I0322 20:47:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 20:47:36.506225  543705 disk_info.go:125] begin check local disk info of client
I0322 20:47:36.508761  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:47:36.508767  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 20:47:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:47:43.410741  543705 memory.go:191] Add success.
I0322 20:47:43.409800  543705 cpu.go:282] Add success.
I0322 20:47:43.420454  543705 net.go:648] Add success.
I0322 20:47:43.423393  543705 net.go:770] primary dev: ETH0
I0322 20:47:43.423405  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:47:43.423417  543705 net.go:698] Add success.
I0322 20:47:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:47:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:47:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:47:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:47:53.409810  543705 memory.go:184] no items to output this cycle
I0322 20:47:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 20:48:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:48:03.409769  543705 memory.go:184] no items to output this cycle
I0322 20:48:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 20:48:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:48:13.409816  543705 memory.go:191] Add success.
I0322 20:48:13.409822  543705 cpu.go:282] Add success.
W0322 20:48:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:48:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:48:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:48:13.420538  543705 net.go:648] Add success.
I0322 20:48:13.423506  543705 net.go:770] primary dev: ETH0
I0322 20:48:13.423544  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:48:13.423561  543705 net.go:698] Add success.
I0322 20:48:13.469464  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fa3a2b6b-2a18-4052-a2ee-7e97aa52353c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:48:13.469499  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:48:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:48:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 20:48:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:48:14.456509  543705 disk_worker.go:494] system disk:vda1
I0322 20:48:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:48:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:48:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:48:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:48:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:48:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:48:23.409771  543705 memory.go:184] no items to output this cycle
I0322 20:48:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:48:33.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:48:33.409889  543705 memory.go:184] no items to output this cycle
I0322 20:48:33.409940  543705 cpu.go:275] no items to output this cycle
I0322 20:48:36.508852  543705 disk_info.go:125] begin check local disk info of client
I0322 20:48:36.511532  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:48:36.511538  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f4740 0xc0003f4780]
I0322 20:48:39.800002  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:48:39.800008  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:48:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:48:43.410671  543705 memory.go:191] Add success.
I0322 20:48:43.409787  543705 cpu.go:282] Add success.
I0322 20:48:43.420341  543705 net.go:648] Add success.
I0322 20:48:43.423191  543705 net.go:770] primary dev: ETH0
I0322 20:48:43.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:48:43.423216  543705 net.go:698] Add success.
I0322 20:48:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:48:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:48:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:48:53.410362  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:48:53.410382  543705 memory.go:184] no items to output this cycle
I0322 20:48:53.410407  543705 cpu.go:275] no items to output this cycle
E0322 20:49:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:49:03.409796  543705 memory.go:184] no items to output this cycle
I0322 20:49:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 20:49:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:49:13.409780  543705 memory.go:191] Add success.
I0322 20:49:13.409805  543705 cpu.go:282] Add success.
W0322 20:49:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:49:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:49:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:49:13.420121  543705 net.go:648] Add success.
I0322 20:49:13.423003  543705 net.go:770] primary dev: ETH0
I0322 20:49:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:49:13.423048  543705 net.go:698] Add success.
I0322 20:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:49:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:49:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0322 20:49:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:49:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 20:49:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:49:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:49:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:49:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:49:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:49:23.409778  543705 memory.go:184] no items to output this cycle
I0322 20:49:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 20:49:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:49:33.409799  543705 memory.go:184] no items to output this cycle
I0322 20:49:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 20:49:36.512234  543705 disk_info.go:125] begin check local disk info of client
I0322 20:49:36.514807  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:49:36.514814  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580f00 0xc000580f40]
E0322 20:49:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:49:43.410627  543705 memory.go:191] Add success.
I0322 20:49:43.409791  543705 cpu.go:282] Add success.
I0322 20:49:43.419710  543705 net.go:648] Add success.
I0322 20:49:43.422410  543705 net.go:770] primary dev: ETH0
I0322 20:49:43.422423  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:49:43.422434  543705 net.go:698] Add success.
I0322 20:49:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:49:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:49:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:49:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:49:53.409786  543705 memory.go:184] no items to output this cycle
I0322 20:49:53.409789  543705 cpu.go:275] no items to output this cycle
E0322 20:50:03.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:50:03.409761  543705 memory.go:184] no items to output this cycle
I0322 20:50:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 20:50:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:50:13.409800  543705 memory.go:191] Add success.
I0322 20:50:13.409800  543705 cpu.go:282] Add success.
W0322 20:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:50:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:50:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:50:13.420097  543705 net.go:648] Add success.
I0322 20:50:13.422987  543705 net.go:770] primary dev: ETH0
I0322 20:50:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:50:13.423012  543705 net.go:698] Add success.
I0322 20:50:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:50:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:50:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 20:50:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:50:14.456510  543705 disk_worker.go:494] system disk:vda1
I0322 20:50:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:50:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:50:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:50:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:50:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:50:23.409776  543705 cpu.go:275] no items to output this cycle
I0322 20:50:23.409778  543705 memory.go:184] no items to output this cycle
E0322 20:50:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:50:33.409805  543705 memory.go:184] no items to output this cycle
I0322 20:50:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 20:50:36.514896  543705 disk_info.go:125] begin check local disk info of client
I0322 20:50:36.517450  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:50:36.517456  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc140 0xc0002bc180]
E0322 20:50:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:50:43.409812  543705 cpu.go:282] Add success.
I0322 20:50:43.410966  543705 memory.go:191] Add success.
I0322 20:50:43.419753  543705 net.go:648] Add success.
I0322 20:50:43.422698  543705 net.go:770] primary dev: ETH0
I0322 20:50:43.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:50:43.422723  543705 net.go:698] Add success.
I0322 20:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:50:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:50:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:50:53.409786  543705 memory.go:184] no items to output this cycle
I0322 20:50:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 20:51:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:51:03.409798  543705 memory.go:184] no items to output this cycle
I0322 20:51:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 20:51:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:51:13.409789  543705 memory.go:191] Add success.
I0322 20:51:13.409806  543705 cpu.go:282] Add success.
W0322 20:51:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:51:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:51:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:51:13.420213  543705 net.go:648] Add success.
I0322 20:51:13.423032  543705 net.go:770] primary dev: ETH0
I0322 20:51:13.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:51:13.423062  543705 net.go:698] Add success.
I0322 20:51:13.814269  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"45e691ff-aa05-448d-84ea-a5b175341df5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:51:13.814307  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:51:14.454679  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:51:14.454831  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:51:14.454895  543705 disk_worker.go:708] disk space is not compliant
W0322 20:51:14.454898  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:51:14.456252  543705 disk_worker.go:494] system disk:vda1
I0322 20:51:14.456310  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:51:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:51:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:51:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:51:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:51:23.409775  543705 memory.go:184] no items to output this cycle
I0322 20:51:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 20:51:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:51:33.409774  543705 memory.go:184] no items to output this cycle
I0322 20:51:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 20:51:36.518296  543705 disk_info.go:125] begin check local disk info of client
I0322 20:51:36.520805  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:51:36.520812  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3400 0xc0003b3440]
I0322 20:51:39.801708  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:51:39.801714  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:51:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:51:43.410920  543705 memory.go:191] Add success.
I0322 20:51:43.409818  543705 cpu.go:282] Add success.
I0322 20:51:43.420808  543705 net.go:648] Add success.
I0322 20:51:43.423623  543705 net.go:770] primary dev: ETH0
I0322 20:51:43.423636  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:51:43.423647  543705 net.go:698] Add success.
I0322 20:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:51:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:51:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:51:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:51:53.409781  543705 memory.go:184] no items to output this cycle
I0322 20:51:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 20:52:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:52:03.409798  543705 memory.go:184] no items to output this cycle
I0322 20:52:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:52:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:52:13.409775  543705 memory.go:191] Add success.
W0322 20:52:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 20:52:13.409809  543705 cpu.go:282] Add success.
W0322 20:52:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:52:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:52:13.420161  543705 net.go:648] Add success.
I0322 20:52:13.423377  543705 net.go:770] primary dev: ETH0
I0322 20:52:13.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:52:13.423403  543705 net.go:698] Add success.
W0322 20:52:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:52:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 20:52:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0322 20:52:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:52:14.455893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:52:14.455899  543705 custom_config.go:64] query custom config with name: gpu
I0322 20:52:14.456635  543705 disk_worker.go:494] system disk:vda1
I0322 20:52:14.456682  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:52:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:52:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:52:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:52:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:52:16.457980  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:52:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:52:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:52:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:52:23.409800  543705 memory.go:184] no items to output this cycle
I0322 20:52:23.409807  543705 cpu.go:275] no items to output this cycle
E0322 20:52:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:52:33.409773  543705 memory.go:184] no items to output this cycle
I0322 20:52:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 20:52:36.520895  543705 disk_info.go:125] begin check local disk info of client
I0322 20:52:36.523491  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:52:36.523497  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f2200 0xc0003f2240]
E0322 20:52:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:52:43.410705  543705 memory.go:191] Add success.
I0322 20:52:43.409821  543705 cpu.go:282] Add success.
I0322 20:52:43.420389  543705 net.go:648] Add success.
I0322 20:52:43.423285  543705 net.go:770] primary dev: ETH0
I0322 20:52:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:52:43.423411  543705 net.go:698] Add success.
I0322 20:52:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:52:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:52:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:52:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:52:53.409812  543705 memory.go:184] no items to output this cycle
I0322 20:52:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 20:53:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:53:03.409776  543705 memory.go:184] no items to output this cycle
I0322 20:53:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:53:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:53:13.409791  543705 memory.go:191] Add success.
I0322 20:53:13.409793  543705 cpu.go:282] Add success.
W0322 20:53:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:53:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:53:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:53:13.420165  543705 net.go:648] Add success.
I0322 20:53:13.422661  543705 net.go:770] primary dev: ETH0
I0322 20:53:13.422675  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:53:13.422690  543705 net.go:698] Add success.
I0322 20:53:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:53:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:53:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 20:53:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:53:14.456596  543705 disk_worker.go:494] system disk:vda1
I0322 20:53:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:53:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:53:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:53:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:53:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:53:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:53:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:53:23.409777  543705 memory.go:184] no items to output this cycle
I0322 20:53:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 20:53:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:53:33.409784  543705 memory.go:184] no items to output this cycle
I0322 20:53:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 20:53:36.524309  543705 disk_info.go:125] begin check local disk info of client
I0322 20:53:36.526856  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:53:36.526863  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bdc0 0xc00007be00]
E0322 20:53:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:53:43.410891  543705 memory.go:191] Add success.
I0322 20:53:43.409819  543705 cpu.go:282] Add success.
I0322 20:53:43.420576  543705 net.go:648] Add success.
I0322 20:53:43.423147  543705 net.go:770] primary dev: ETH0
I0322 20:53:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:53:43.423172  543705 net.go:698] Add success.
I0322 20:53:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:53:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:53:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:53:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:53:53.409774  543705 memory.go:184] no items to output this cycle
I0322 20:53:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 20:54:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:54:03.409790  543705 memory.go:184] no items to output this cycle
I0322 20:54:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 20:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:54:13.409791  543705 memory.go:191] Add success.
I0322 20:54:13.409795  543705 cpu.go:282] Add success.
W0322 20:54:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:54:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:54:13.420223  543705 net.go:648] Add success.
I0322 20:54:13.422759  543705 net.go:770] primary dev: ETH0
I0322 20:54:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:54:13.422785  543705 net.go:698] Add success.
I0322 20:54:13.463331  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"331fcacb-92a3-481b-b3ba-cee8df46a66a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:54:13.463372  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 20:54:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:54:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:54:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 20:54:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:54:14.456641  543705 disk_worker.go:494] system disk:vda1
I0322 20:54:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:54:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:54:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:54:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:54:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:54:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:54:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:54:23.409796  543705 memory.go:184] no items to output this cycle
I0322 20:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 20:54:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:54:33.409791  543705 memory.go:184] no items to output this cycle
I0322 20:54:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 20:54:36.526945  543705 disk_info.go:125] begin check local disk info of client
I0322 20:54:36.529562  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:54:36.529569  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a5e80 0xc0004a5ec0]
I0322 20:54:39.803707  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:54:39.803713  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:54:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:54:43.410655  543705 memory.go:191] Add success.
I0322 20:54:43.409822  543705 cpu.go:282] Add success.
I0322 20:54:43.420337  543705 net.go:648] Add success.
I0322 20:54:43.423144  543705 net.go:770] primary dev: ETH0
I0322 20:54:43.423157  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:54:43.423168  543705 net.go:698] Add success.
I0322 20:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:54:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:54:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:54:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:54:53.410262  543705 memory.go:184] no items to output this cycle
I0322 20:54:53.410296  543705 cpu.go:275] no items to output this cycle
E0322 20:55:03.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:55:03.409903  543705 memory.go:184] no items to output this cycle
I0322 20:55:03.409965  543705 cpu.go:275] no items to output this cycle
E0322 20:55:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:55:13.409820  543705 memory.go:191] Add success.
I0322 20:55:13.409831  543705 cpu.go:282] Add success.
W0322 20:55:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:55:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:55:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:55:13.420229  543705 net.go:648] Add success.
I0322 20:55:13.423041  543705 net.go:770] primary dev: ETH0
I0322 20:55:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:55:13.423070  543705 net.go:698] Add success.
I0322 20:55:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:55:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:55:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 20:55:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:55:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 20:55:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:55:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:55:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:55:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:55:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:55:23.409800  543705 memory.go:184] no items to output this cycle
I0322 20:55:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 20:55:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:55:33.409793  543705 memory.go:184] no items to output this cycle
I0322 20:55:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 20:55:36.530332  543705 disk_info.go:125] begin check local disk info of client
I0322 20:55:36.532872  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:55:36.532877  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bd00 0xc00007bd80]
E0322 20:55:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:55:43.410732  543705 memory.go:191] Add success.
I0322 20:55:43.409804  543705 cpu.go:282] Add success.
I0322 20:55:43.420404  543705 net.go:648] Add success.
I0322 20:55:43.422943  543705 net.go:770] primary dev: ETH0
I0322 20:55:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:55:43.422969  543705 net.go:698] Add success.
I0322 20:55:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:55:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:55:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:55:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:55:53.409895  543705 memory.go:184] no items to output this cycle
I0322 20:55:53.409930  543705 cpu.go:275] no items to output this cycle
E0322 20:56:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:56:03.409791  543705 memory.go:184] no items to output this cycle
I0322 20:56:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 20:56:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:56:13.409831  543705 memory.go:191] Add success.
I0322 20:56:13.409835  543705 cpu.go:282] Add success.
W0322 20:56:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:56:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:56:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:56:13.420196  543705 net.go:648] Add success.
I0322 20:56:13.423013  543705 net.go:770] primary dev: ETH0
I0322 20:56:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:56:13.423039  543705 net.go:698] Add success.
I0322 20:56:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:56:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:56:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 20:56:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:56:14.456502  543705 disk_worker.go:494] system disk:vda1
I0322 20:56:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:56:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:56:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:56:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:56:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:56:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:56:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:56:23.409779  543705 memory.go:184] no items to output this cycle
I0322 20:56:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 20:56:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:56:33.409819  543705 memory.go:184] no items to output this cycle
I0322 20:56:33.409830  543705 cpu.go:275] no items to output this cycle
I0322 20:56:36.533305  543705 disk_info.go:125] begin check local disk info of client
I0322 20:56:36.535923  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:56:36.535930  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8d80 0xc0003c8dc0]
E0322 20:56:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:56:43.410562  543705 memory.go:191] Add success.
I0322 20:56:43.409833  543705 cpu.go:282] Add success.
I0322 20:56:43.420230  543705 net.go:648] Add success.
I0322 20:56:43.422819  543705 net.go:770] primary dev: ETH0
I0322 20:56:43.422834  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:56:43.422846  543705 net.go:698] Add success.
I0322 20:56:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:56:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:56:53.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:56:53.409928  543705 memory.go:184] no items to output this cycle
I0322 20:56:53.409946  543705 cpu.go:275] no items to output this cycle
E0322 20:57:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:57:03.409807  543705 memory.go:184] no items to output this cycle
I0322 20:57:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 20:57:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:57:13.409796  543705 memory.go:191] Add success.
I0322 20:57:13.409796  543705 cpu.go:282] Add success.
W0322 20:57:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:57:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:57:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:57:13.420153  543705 net.go:648] Add success.
I0322 20:57:13.422924  543705 net.go:770] primary dev: ETH0
I0322 20:57:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:57:13.422953  543705 net.go:698] Add success.
I0322 20:57:13.429172  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 20:57:13.453407  543705 event_worker.go:152] Polling the log file for events...
I0322 20:57:13.595184  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e800761-0f7a-42ba-98e3-f5b68941665a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 20:57:13.595220  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 20:57:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:57:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 20:57:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:57:14.456826  543705 disk_worker.go:494] system disk:vda1
I0322 20:57:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 20:57:14.456878  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 20:57:14.456886  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 20:57:14.456891  543705 custom_config.go:64] query custom config with name: gpu
E0322 20:57:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 20:57:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:57:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 20:57:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 20:57:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:57:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:57:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:57:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:57:23.409792  543705 memory.go:184] no items to output this cycle
I0322 20:57:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 20:57:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:57:33.409777  543705 memory.go:184] no items to output this cycle
I0322 20:57:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 20:57:36.536014  543705 disk_info.go:125] begin check local disk info of client
I0322 20:57:36.538533  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:57:36.538539  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002af000 0xc0002af040]
I0322 20:57:39.803849  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 20:57:39.803856  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 20:57:43.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:57:43.410870  543705 memory.go:191] Add success.
I0322 20:57:43.409922  543705 cpu.go:282] Add success.
I0322 20:57:43.419719  543705 net.go:648] Add success.
I0322 20:57:43.422285  543705 net.go:770] primary dev: ETH0
I0322 20:57:43.422299  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:57:43.422311  543705 net.go:698] Add success.
I0322 20:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:57:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:57:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:57:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:57:53.409777  543705 memory.go:184] no items to output this cycle
I0322 20:57:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 20:58:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:58:03.409773  543705 memory.go:184] no items to output this cycle
I0322 20:58:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 20:58:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:58:13.409820  543705 memory.go:191] Add success.
I0322 20:58:13.409825  543705 cpu.go:282] Add success.
W0322 20:58:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:58:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:58:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:58:13.420134  543705 net.go:648] Add success.
I0322 20:58:13.423255  543705 net.go:770] primary dev: ETH0
I0322 20:58:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:58:13.423285  543705 net.go:698] Add success.
I0322 20:58:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:58:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:58:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0322 20:58:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:58:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 20:58:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:58:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:58:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:58:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:58:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:58:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:58:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:58:23.409766  543705 memory.go:184] no items to output this cycle
I0322 20:58:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 20:58:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:58:33.409786  543705 memory.go:184] no items to output this cycle
I0322 20:58:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 20:58:36.539392  543705 disk_info.go:125] begin check local disk info of client
I0322 20:58:36.541944  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:58:36.541950  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4c40 0xc0004a4c80]
E0322 20:58:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:58:43.410699  543705 memory.go:191] Add success.
I0322 20:58:43.409801  543705 cpu.go:282] Add success.
I0322 20:58:43.420419  543705 net.go:648] Add success.
I0322 20:58:43.423042  543705 net.go:770] primary dev: ETH0
I0322 20:58:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:58:43.423066  543705 net.go:698] Add success.
I0322 20:58:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:58:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:58:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:58:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:58:53.409787  543705 memory.go:184] no items to output this cycle
I0322 20:58:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 20:59:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:59:03.409780  543705 memory.go:184] no items to output this cycle
I0322 20:59:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 20:59:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:59:13.409783  543705 memory.go:191] Add success.
I0322 20:59:13.409806  543705 cpu.go:282] Add success.
W0322 20:59:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 20:59:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 20:59:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 20:59:13.420456  543705 net.go:648] Add success.
I0322 20:59:13.423124  543705 net.go:770] primary dev: ETH0
I0322 20:59:13.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:59:13.423149  543705 net.go:698] Add success.
I0322 20:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 20:59:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 20:59:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 20:59:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 20:59:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 20:59:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 20:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 20:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 20:59:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0322 20:59:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:59:23.409804  543705 memory.go:184] no items to output this cycle
I0322 20:59:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 20:59:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:59:33.409797  543705 memory.go:184] no items to output this cycle
I0322 20:59:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 20:59:36.542034  543705 disk_info.go:125] begin check local disk info of client
I0322 20:59:36.544599  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 20:59:36.544606  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac000 0xc0002ac040]
E0322 20:59:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:59:43.410692  543705 memory.go:191] Add success.
I0322 20:59:43.409800  543705 cpu.go:282] Add success.
I0322 20:59:43.420397  543705 net.go:648] Add success.
I0322 20:59:43.423057  543705 net.go:770] primary dev: ETH0
I0322 20:59:43.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0322 20:59:43.423086  543705 net.go:698] Add success.
I0322 20:59:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 20:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 20:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 20:59:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 20:59:53.409789  543705 cpu.go:275] no items to output this cycle
I0322 20:59:53.409791  543705 memory.go:184] no items to output this cycle
E0322 21:00:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:00:03.409803  543705 memory.go:184] no items to output this cycle
I0322 21:00:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 21:00:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:00:13.409791  543705 cpu.go:282] Add success.
I0322 21:00:13.409797  543705 memory.go:191] Add success.
W0322 21:00:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:00:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:00:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:00:13.420309  543705 net.go:648] Add success.
I0322 21:00:13.423108  543705 net.go:770] primary dev: ETH0
I0322 21:00:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:00:13.423133  543705 net.go:698] Add success.
I0322 21:00:13.468637  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6e194df-23f9-454a-a29f-6c28bd8b6c4c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:00:13.468671  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:00:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:00:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:00:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 21:00:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:00:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 21:00:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:00:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:00:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:00:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:00:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:00:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:00:23.410260  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:00:23.410275  543705 memory.go:184] no items to output this cycle
I0322 21:00:23.410284  543705 cpu.go:275] no items to output this cycle
E0322 21:00:33.409931  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:00:33.409932  543705 cpu.go:275] no items to output this cycle
I0322 21:00:33.409948  543705 memory.go:184] no items to output this cycle
I0322 21:00:36.545421  543705 disk_info.go:125] begin check local disk info of client
I0322 21:00:36.548053  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:00:36.548061  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
I0322 21:00:39.803993  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:00:39.804000  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:00:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:00:43.410785  543705 memory.go:191] Add success.
I0322 21:00:43.409806  543705 cpu.go:282] Add success.
I0322 21:00:43.420489  543705 net.go:648] Add success.
I0322 21:00:43.422976  543705 net.go:770] primary dev: ETH0
I0322 21:00:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:00:43.423000  543705 net.go:698] Add success.
I0322 21:00:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:00:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:00:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:00:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:00:53.409805  543705 memory.go:184] no items to output this cycle
I0322 21:00:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:01:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:01:03.409783  543705 memory.go:184] no items to output this cycle
I0322 21:01:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:01:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:01:13.409785  543705 memory.go:191] Add success.
I0322 21:01:13.409789  543705 cpu.go:282] Add success.
W0322 21:01:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:01:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:01:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:01:13.420046  543705 net.go:648] Add success.
I0322 21:01:13.422795  543705 net.go:770] primary dev: ETH0
I0322 21:01:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:01:13.422822  543705 net.go:698] Add success.
I0322 21:01:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:01:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:01:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 21:01:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:01:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 21:01:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:01:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:01:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:01:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:01:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:01:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:01:23.409775  543705 cpu.go:275] no items to output this cycle
I0322 21:01:23.409777  543705 memory.go:184] no items to output this cycle
I0322 21:01:33.409883  543705 cpu.go:275] no items to output this cycle
E0322 21:01:33.409961  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:01:33.409976  543705 memory.go:184] no items to output this cycle
I0322 21:01:36.548387  543705 disk_info.go:125] begin check local disk info of client
I0322 21:01:36.550889  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:01:36.550896  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
E0322 21:01:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:01:43.410616  543705 memory.go:191] Add success.
I0322 21:01:43.409799  543705 cpu.go:282] Add success.
I0322 21:01:43.420323  543705 net.go:648] Add success.
I0322 21:01:43.422836  543705 net.go:770] primary dev: ETH0
I0322 21:01:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:01:43.422861  543705 net.go:698] Add success.
I0322 21:01:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:01:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:01:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:01:53.410357  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:01:53.410376  543705 memory.go:184] no items to output this cycle
I0322 21:01:53.410396  543705 cpu.go:275] no items to output this cycle
E0322 21:02:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:02:03.409806  543705 memory.go:184] no items to output this cycle
I0322 21:02:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 21:02:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:02:13.409784  543705 memory.go:191] Add success.
I0322 21:02:13.409807  543705 cpu.go:282] Add success.
W0322 21:02:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:02:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:02:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:02:13.420139  543705 net.go:648] Add success.
I0322 21:02:13.423000  543705 net.go:770] primary dev: ETH0
I0322 21:02:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:02:13.423028  543705 net.go:698] Add success.
W0322 21:02:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:02:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0322 21:02:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:02:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:02:14.456928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:02:14.456934  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:02:14.456982  543705 disk_worker.go:494] system disk:vda1
I0322 21:02:14.457024  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:02:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:02:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:02:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:02:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:02:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:02:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:02:16.472307  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:02:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:02:23.409800  543705 memory.go:184] no items to output this cycle
I0322 21:02:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:02:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:02:33.409787  543705 memory.go:184] no items to output this cycle
I0322 21:02:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 21:02:36.551398  543705 disk_info.go:125] begin check local disk info of client
I0322 21:02:36.553973  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:02:36.553980  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057fa40 0xc00057fa80]
E0322 21:02:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:02:43.410747  543705 memory.go:191] Add success.
I0322 21:02:43.409815  543705 cpu.go:282] Add success.
I0322 21:02:43.420533  543705 net.go:648] Add success.
I0322 21:02:43.423321  543705 net.go:770] primary dev: ETH0
I0322 21:02:43.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:02:43.423347  543705 net.go:698] Add success.
I0322 21:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:02:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:02:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:02:53.410407  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:02:53.410427  543705 memory.go:184] no items to output this cycle
I0322 21:02:53.410455  543705 cpu.go:275] no items to output this cycle
E0322 21:03:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:03:03.409769  543705 memory.go:184] no items to output this cycle
I0322 21:03:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 21:03:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:03:13.409775  543705 memory.go:191] Add success.
W0322 21:03:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:03:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:03:13.409811  543705 cpu.go:282] Add success.
I0322 21:03:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:03:13.420125  543705 net.go:648] Add success.
I0322 21:03:13.422820  543705 net.go:770] primary dev: ETH0
I0322 21:03:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:03:13.422845  543705 net.go:698] Add success.
I0322 21:03:13.621597  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0530d979-2877-4fa4-b4f1-0b7f8244753a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:03:13.621633  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:03:14.453977  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:03:14.454234  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:03:14.454245  543705 disk_worker.go:708] disk space is not compliant
W0322 21:03:14.454247  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:03:14.455793  543705 disk_worker.go:494] system disk:vda1
I0322 21:03:14.455823  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:03:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:03:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:03:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:03:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:03:23.409779  543705 memory.go:184] no items to output this cycle
I0322 21:03:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 21:03:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:03:33.409791  543705 memory.go:184] no items to output this cycle
I0322 21:03:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 21:03:36.554401  543705 disk_info.go:125] begin check local disk info of client
I0322 21:03:36.556910  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:03:36.556918  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024ed80 0xc00024edc0]
I0322 21:03:39.805727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:03:39.805733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:03:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:03:43.410838  543705 memory.go:191] Add success.
I0322 21:03:43.409901  543705 cpu.go:282] Add success.
I0322 21:03:43.419737  543705 net.go:648] Add success.
I0322 21:03:43.422338  543705 net.go:770] primary dev: ETH0
I0322 21:03:43.422353  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:03:43.422379  543705 net.go:698] Add success.
I0322 21:03:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:03:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:03:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:03:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:03:53.409805  543705 memory.go:184] no items to output this cycle
I0322 21:03:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 21:04:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:04:03.409802  543705 memory.go:184] no items to output this cycle
I0322 21:04:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 21:04:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:04:13.409779  543705 memory.go:191] Add success.
W0322 21:04:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:04:13.409805  543705 cpu.go:282] Add success.
W0322 21:04:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:04:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:04:13.420060  543705 net.go:648] Add success.
I0322 21:04:13.423228  543705 net.go:770] primary dev: ETH0
I0322 21:04:13.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:04:13.423253  543705 net.go:698] Add success.
I0322 21:04:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:04:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:04:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 21:04:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:04:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 21:04:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:04:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:04:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:04:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:04:23.410356  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:04:23.410371  543705 memory.go:184] no items to output this cycle
I0322 21:04:23.410395  543705 cpu.go:275] no items to output this cycle
E0322 21:04:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:04:33.409788  543705 memory.go:184] no items to output this cycle
I0322 21:04:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 21:04:36.557001  543705 disk_info.go:125] begin check local disk info of client
I0322 21:04:36.559615  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:04:36.559621  543705 disk_info.go:196] parse disk info done, disk is : [0xc000383240 0xc000383280]
E0322 21:04:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:04:43.409813  543705 cpu.go:282] Add success.
I0322 21:04:43.410871  543705 memory.go:191] Add success.
I0322 21:04:43.419739  543705 net.go:648] Add success.
I0322 21:04:43.422515  543705 net.go:770] primary dev: ETH0
I0322 21:04:43.422527  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:04:43.422539  543705 net.go:698] Add success.
I0322 21:04:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:04:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:04:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:04:53.410410  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:04:53.410420  543705 cpu.go:275] no items to output this cycle
I0322 21:04:53.410428  543705 memory.go:184] no items to output this cycle
E0322 21:05:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:05:03.409797  543705 memory.go:184] no items to output this cycle
I0322 21:05:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 21:05:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:05:13.409780  543705 memory.go:191] Add success.
W0322 21:05:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:05:13.409808  543705 cpu.go:282] Add success.
W0322 21:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:05:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:05:13.420154  543705 net.go:648] Add success.
I0322 21:05:13.422858  543705 net.go:770] primary dev: ETH0
I0322 21:05:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:05:13.422882  543705 net.go:698] Add success.
I0322 21:05:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:05:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:05:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 21:05:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:05:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 21:05:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:05:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:05:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:05:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:05:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:05:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:05:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:05:23.409779  543705 memory.go:184] no items to output this cycle
I0322 21:05:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 21:05:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:05:33.409810  543705 memory.go:184] no items to output this cycle
I0322 21:05:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 21:05:36.560488  543705 disk_info.go:125] begin check local disk info of client
I0322 21:05:36.563052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:05:36.563059  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9840 0xc0004a9880]
E0322 21:05:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:05:43.410669  543705 memory.go:191] Add success.
I0322 21:05:43.409813  543705 cpu.go:282] Add success.
I0322 21:05:43.420418  543705 net.go:648] Add success.
I0322 21:05:43.423098  543705 net.go:770] primary dev: ETH0
I0322 21:05:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:05:43.423123  543705 net.go:698] Add success.
I0322 21:05:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:05:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:05:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:05:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:05:53.409776  543705 memory.go:184] no items to output this cycle
I0322 21:05:53.409806  543705 cpu.go:275] no items to output this cycle
E0322 21:06:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:06:03.409799  543705 memory.go:184] no items to output this cycle
I0322 21:06:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 21:06:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:06:13.409787  543705 memory.go:191] Add success.
W0322 21:06:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:06:13.409822  543705 cpu.go:282] Add success.
W0322 21:06:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:06:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:06:13.419925  543705 net.go:770] primary dev: ETH0
I0322 21:06:13.419937  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:06:13.419949  543705 net.go:698] Add success.
I0322 21:06:13.420293  543705 net.go:648] Add success.
I0322 21:06:13.469084  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7393dea1-7ea6-44f6-821c-773c5d08ba71","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:06:13.469117  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:06:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:06:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:06:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0322 21:06:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:06:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 21:06:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:06:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:06:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:06:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:06:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:06:23.409781  543705 memory.go:184] no items to output this cycle
I0322 21:06:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:06:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:06:33.409797  543705 memory.go:184] no items to output this cycle
I0322 21:06:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 21:06:36.563143  543705 disk_info.go:125] begin check local disk info of client
I0322 21:06:36.565775  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:06:36.565781  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000cb540 0xc0000cb580]
I0322 21:06:39.807741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:06:39.807747  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:06:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:06:43.410706  543705 memory.go:191] Add success.
I0322 21:06:43.409831  543705 cpu.go:282] Add success.
I0322 21:06:43.420410  543705 net.go:648] Add success.
I0322 21:06:43.423039  543705 net.go:770] primary dev: ETH0
I0322 21:06:43.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:06:43.423227  543705 net.go:698] Add success.
I0322 21:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:06:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:06:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:06:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:06:53.409820  543705 memory.go:184] no items to output this cycle
I0322 21:06:53.409830  543705 cpu.go:275] no items to output this cycle
E0322 21:07:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:07:03.409777  543705 memory.go:184] no items to output this cycle
I0322 21:07:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 21:07:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:07:13.409825  543705 memory.go:191] Add success.
I0322 21:07:13.409833  543705 cpu.go:282] Add success.
W0322 21:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:07:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:07:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:07:13.420125  543705 net.go:648] Add success.
I0322 21:07:13.422986  543705 net.go:770] primary dev: ETH0
I0322 21:07:13.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:07:13.423012  543705 net.go:698] Add success.
I0322 21:07:13.453559  543705 event_worker.go:152] Polling the log file for events...
W0322 21:07:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:07:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 21:07:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:07:14.456897  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:07:14.456906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:07:14.456912  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:07:14.456973  543705 disk_worker.go:494] system disk:vda1
I0322 21:07:14.457003  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:07:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:07:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:07:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:07:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:07:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:07:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:07:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:07:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:07:23.409784  543705 memory.go:184] no items to output this cycle
I0322 21:07:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 21:07:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:07:33.409819  543705 memory.go:184] no items to output this cycle
I0322 21:07:33.409834  543705 cpu.go:275] no items to output this cycle
I0322 21:07:36.565865  543705 disk_info.go:125] begin check local disk info of client
I0322 21:07:36.568381  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:07:36.568387  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004613c0 0xc000461400]
E0322 21:07:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:07:43.410638  543705 memory.go:191] Add success.
I0322 21:07:43.409808  543705 cpu.go:282] Add success.
I0322 21:07:43.420415  543705 net.go:648] Add success.
I0322 21:07:43.423289  543705 net.go:770] primary dev: ETH0
I0322 21:07:43.423301  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:07:43.423314  543705 net.go:698] Add success.
I0322 21:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:07:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:07:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:07:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:07:53.409778  543705 memory.go:184] no items to output this cycle
I0322 21:07:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 21:08:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:08:03.409776  543705 memory.go:184] no items to output this cycle
I0322 21:08:03.409829  543705 cpu.go:275] no items to output this cycle
E0322 21:08:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:08:13.409797  543705 memory.go:191] Add success.
I0322 21:08:13.409797  543705 cpu.go:282] Add success.
W0322 21:08:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:08:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:08:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:08:13.420383  543705 net.go:648] Add success.
I0322 21:08:13.423211  543705 net.go:770] primary dev: ETH0
I0322 21:08:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:08:13.423236  543705 net.go:698] Add success.
I0322 21:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:08:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:08:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 21:08:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:08:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 21:08:14.456524  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:08:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:08:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:08:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:08:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:08:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:08:23.409769  543705 memory.go:184] no items to output this cycle
I0322 21:08:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 21:08:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:08:33.409793  543705 memory.go:184] no items to output this cycle
I0322 21:08:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 21:08:36.568471  543705 disk_info.go:125] begin check local disk info of client
I0322 21:08:36.571097  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:08:36.571105  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b040 0xc00007b080]
E0322 21:08:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:08:43.410614  543705 memory.go:191] Add success.
I0322 21:08:43.409804  543705 cpu.go:282] Add success.
I0322 21:08:43.420302  543705 net.go:648] Add success.
I0322 21:08:43.423104  543705 net.go:770] primary dev: ETH0
I0322 21:08:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:08:43.423130  543705 net.go:698] Add success.
I0322 21:08:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:08:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:08:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:08:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:08:53.409791  543705 memory.go:184] no items to output this cycle
I0322 21:08:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 21:09:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:09:03.409779  543705 memory.go:184] no items to output this cycle
I0322 21:09:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:09:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:09:13.409789  543705 memory.go:191] Add success.
I0322 21:09:13.409800  543705 cpu.go:282] Add success.
W0322 21:09:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:09:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:09:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:09:13.420113  543705 net.go:648] Add success.
I0322 21:09:13.422803  543705 net.go:770] primary dev: ETH0
I0322 21:09:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:09:13.422829  543705 net.go:698] Add success.
I0322 21:09:13.468865  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b4d871d0-5164-43f3-8d19-a1c4ed2bc681","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:09:13.468899  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:09:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:09:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:09:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 21:09:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:09:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 21:09:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:09:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:09:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:09:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:09:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:09:23.409796  543705 memory.go:184] no items to output this cycle
I0322 21:09:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 21:09:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:09:33.409807  543705 memory.go:184] no items to output this cycle
I0322 21:09:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 21:09:36.571188  543705 disk_info.go:125] begin check local disk info of client
I0322 21:09:36.573711  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:09:36.573718  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312600 0xc000312640]
I0322 21:09:39.809735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:09:39.809741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:09:43.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:09:43.410848  543705 memory.go:191] Add success.
I0322 21:09:43.409918  543705 cpu.go:282] Add success.
I0322 21:09:43.419714  543705 net.go:648] Add success.
I0322 21:09:43.422082  543705 net.go:770] primary dev: ETH0
I0322 21:09:43.422094  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:09:43.422107  543705 net.go:698] Add success.
I0322 21:09:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:09:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:09:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:09:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:09:53.409807  543705 memory.go:184] no items to output this cycle
I0322 21:09:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 21:10:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:10:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 21:10:03.409784  543705 memory.go:184] no items to output this cycle
E0322 21:10:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:10:13.409814  543705 memory.go:191] Add success.
I0322 21:10:13.409825  543705 cpu.go:282] Add success.
W0322 21:10:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:10:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:10:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:10:13.420161  543705 net.go:648] Add success.
I0322 21:10:13.423134  543705 net.go:770] primary dev: ETH0
I0322 21:10:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:10:13.423160  543705 net.go:698] Add success.
I0322 21:10:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:10:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:10:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0322 21:10:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:10:14.456587  543705 disk_worker.go:494] system disk:vda1
I0322 21:10:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:10:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:10:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:10:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:10:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:10:23.409773  543705 memory.go:184] no items to output this cycle
I0322 21:10:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 21:10:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:10:33.409774  543705 memory.go:184] no items to output this cycle
I0322 21:10:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 21:10:36.573806  543705 disk_info.go:125] begin check local disk info of client
I0322 21:10:36.576457  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:10:36.576465  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 21:10:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:10:43.410749  543705 memory.go:191] Add success.
I0322 21:10:43.409807  543705 cpu.go:282] Add success.
I0322 21:10:43.420438  543705 net.go:648] Add success.
I0322 21:10:43.423337  543705 net.go:770] primary dev: ETH0
I0322 21:10:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:10:43.423363  543705 net.go:698] Add success.
I0322 21:10:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:10:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:10:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:10:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:10:53.409815  543705 memory.go:184] no items to output this cycle
I0322 21:10:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 21:11:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:11:03.409772  543705 memory.go:184] no items to output this cycle
I0322 21:11:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 21:11:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:11:13.409813  543705 memory.go:191] Add success.
I0322 21:11:13.409822  543705 cpu.go:282] Add success.
W0322 21:11:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:11:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:11:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:11:13.420146  543705 net.go:648] Add success.
I0322 21:11:13.423221  543705 net.go:770] primary dev: ETH0
I0322 21:11:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:11:13.423245  543705 net.go:698] Add success.
I0322 21:11:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:11:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:11:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 21:11:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:11:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 21:11:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:11:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:11:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:11:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:11:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:11:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:11:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:11:23.409789  543705 memory.go:184] no items to output this cycle
I0322 21:11:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 21:11:33.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:11:33.409922  543705 cpu.go:275] no items to output this cycle
I0322 21:11:33.409925  543705 memory.go:184] no items to output this cycle
I0322 21:11:36.576517  543705 disk_info.go:125] begin check local disk info of client
I0322 21:11:36.579050  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:11:36.579057  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c3340 0xc0004c3380]
E0322 21:11:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:11:43.410682  543705 memory.go:191] Add success.
I0322 21:11:43.409827  543705 cpu.go:282] Add success.
I0322 21:11:43.420366  543705 net.go:648] Add success.
I0322 21:11:43.423018  543705 net.go:770] primary dev: ETH0
I0322 21:11:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:11:43.423043  543705 net.go:698] Add success.
I0322 21:11:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:11:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:11:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:11:53.409788  543705 memory.go:184] no items to output this cycle
I0322 21:11:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 21:12:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:12:03.409775  543705 memory.go:184] no items to output this cycle
I0322 21:12:03.409793  543705 cpu.go:275] no items to output this cycle
W0322 21:12:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:12:13.409731  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:12:13.409737  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:12:13.409829  543705 cpu.go:282] Add success.
E0322 21:12:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:12:13.409848  543705 memory.go:191] Add success.
I0322 21:12:13.420117  543705 net.go:648] Add success.
I0322 21:12:13.423147  543705 net.go:770] primary dev: ETH0
I0322 21:12:13.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:12:13.423174  543705 net.go:698] Add success.
I0322 21:12:13.465457  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"553550c1-9c92-46cd-b721-3acbf7a4f1a7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:12:13.465487  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 21:12:14.455236  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:12:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0322 21:12:14.455256  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:12:14.456110  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:12:14.456120  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:12:14.456126  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:12:14.456974  543705 disk_worker.go:494] system disk:vda1
I0322 21:12:14.457004  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:12:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:12:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:12:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:12:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:12:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:12:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:12:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:12:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:12:23.409777  543705 memory.go:184] no items to output this cycle
I0322 21:12:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 21:12:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:12:33.409784  543705 memory.go:184] no items to output this cycle
I0322 21:12:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 21:12:36.579145  543705 disk_info.go:125] begin check local disk info of client
I0322 21:12:36.581765  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:12:36.581772  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
I0322 21:12:39.811748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:12:39.811754  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:12:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:12:43.410710  543705 memory.go:191] Add success.
I0322 21:12:43.409823  543705 cpu.go:282] Add success.
I0322 21:12:43.420424  543705 net.go:648] Add success.
I0322 21:12:43.423045  543705 net.go:770] primary dev: ETH0
I0322 21:12:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:12:43.423071  543705 net.go:698] Add success.
I0322 21:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:12:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:12:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:12:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:12:53.409787  543705 memory.go:184] no items to output this cycle
I0322 21:12:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 21:13:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:13:03.409776  543705 memory.go:184] no items to output this cycle
I0322 21:13:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 21:13:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:13:13.409808  543705 memory.go:191] Add success.
I0322 21:13:13.409818  543705 cpu.go:282] Add success.
W0322 21:13:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:13:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:13:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:13:13.420165  543705 net.go:648] Add success.
I0322 21:13:13.422783  543705 net.go:770] primary dev: ETH0
I0322 21:13:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:13:13.422809  543705 net.go:698] Add success.
I0322 21:13:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:13:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:13:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 21:13:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:13:14.456525  543705 disk_worker.go:494] system disk:vda1
I0322 21:13:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:13:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:13:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:13:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:13:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:13:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:13:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:13:23.409774  543705 memory.go:184] no items to output this cycle
I0322 21:13:23.409895  543705 cpu.go:275] no items to output this cycle
E0322 21:13:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:13:33.409768  543705 memory.go:184] no items to output this cycle
I0322 21:13:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 21:13:36.581857  543705 disk_info.go:125] begin check local disk info of client
I0322 21:13:36.584378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:13:36.584385  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 21:13:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:13:43.410789  543705 memory.go:191] Add success.
I0322 21:13:43.409818  543705 cpu.go:282] Add success.
I0322 21:13:43.420518  543705 net.go:648] Add success.
I0322 21:13:43.423428  543705 net.go:770] primary dev: ETH0
I0322 21:13:43.423443  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:13:43.423458  543705 net.go:698] Add success.
I0322 21:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:13:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:13:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:13:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:13:53.409785  543705 memory.go:184] no items to output this cycle
I0322 21:13:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 21:14:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:14:03.409774  543705 memory.go:184] no items to output this cycle
I0322 21:14:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 21:14:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:14:13.409810  543705 memory.go:191] Add success.
I0322 21:14:13.409821  543705 cpu.go:282] Add success.
W0322 21:14:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:14:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:14:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:14:13.420362  543705 net.go:648] Add success.
I0322 21:14:13.422964  543705 net.go:770] primary dev: ETH0
I0322 21:14:13.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:14:13.422993  543705 net.go:698] Add success.
I0322 21:14:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:14:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:14:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 21:14:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:14:14.456583  543705 disk_worker.go:494] system disk:vda1
I0322 21:14:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:14:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:14:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:14:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:14:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:14:23.409797  543705 memory.go:184] no items to output this cycle
I0322 21:14:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 21:14:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:14:33.409787  543705 memory.go:184] no items to output this cycle
I0322 21:14:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 21:14:36.584480  543705 disk_info.go:125] begin check local disk info of client
I0322 21:14:36.587039  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:14:36.587046  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580000 0xc000580040]
E0322 21:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:14:43.410583  543705 memory.go:191] Add success.
I0322 21:14:43.409804  543705 cpu.go:282] Add success.
I0322 21:14:43.420287  543705 net.go:648] Add success.
I0322 21:14:43.423609  543705 net.go:770] primary dev: ETH0
I0322 21:14:43.423623  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:14:43.423635  543705 net.go:698] Add success.
I0322 21:14:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:14:53.410402  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:14:53.410419  543705 memory.go:184] no items to output this cycle
I0322 21:14:53.410420  543705 cpu.go:275] no items to output this cycle
E0322 21:15:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:15:03.409801  543705 memory.go:184] no items to output this cycle
I0322 21:15:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:15:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:15:13.409777  543705 memory.go:191] Add success.
W0322 21:15:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:15:13.409802  543705 cpu.go:282] Add success.
W0322 21:15:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:15:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:15:13.420122  543705 net.go:648] Add success.
I0322 21:15:13.422864  543705 net.go:770] primary dev: ETH0
I0322 21:15:13.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:15:13.422890  543705 net.go:698] Add success.
I0322 21:15:13.463562  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d05d1197-ffce-4aa9-89eb-943e2e230331","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:15:13.463595  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:15:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:15:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:15:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0322 21:15:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:15:14.456630  543705 disk_worker.go:494] system disk:vda1
I0322 21:15:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:15:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:15:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:15:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:15:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:15:23.409780  543705 memory.go:184] no items to output this cycle
I0322 21:15:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 21:15:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:15:33.409810  543705 memory.go:184] no items to output this cycle
I0322 21:15:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 21:15:36.587128  543705 disk_info.go:125] begin check local disk info of client
I0322 21:15:36.589641  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:15:36.589661  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a800 0xc00029a840]
I0322 21:15:39.813727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:15:39.813733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:15:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:15:43.410650  543705 memory.go:191] Add success.
I0322 21:15:43.409803  543705 cpu.go:282] Add success.
I0322 21:15:43.420352  543705 net.go:648] Add success.
I0322 21:15:43.423225  543705 net.go:770] primary dev: ETH0
I0322 21:15:43.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:15:43.423251  543705 net.go:698] Add success.
I0322 21:15:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:15:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:15:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:15:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:15:53.409784  543705 memory.go:184] no items to output this cycle
I0322 21:15:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:16:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:16:03.409782  543705 memory.go:184] no items to output this cycle
I0322 21:16:03.409781  543705 cpu.go:275] no items to output this cycle
E0322 21:16:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:16:13.409780  543705 memory.go:191] Add success.
I0322 21:16:13.409804  543705 cpu.go:282] Add success.
W0322 21:16:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:16:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:16:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:16:13.420158  543705 net.go:648] Add success.
I0322 21:16:13.422936  543705 net.go:770] primary dev: ETH0
I0322 21:16:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:16:13.422962  543705 net.go:698] Add success.
I0322 21:16:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:16:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:16:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 21:16:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:16:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 21:16:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:16:15.456009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:16:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:16:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:16:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:16:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:16:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:16:23.409800  543705 memory.go:184] no items to output this cycle
I0322 21:16:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 21:16:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:16:33.409823  543705 memory.go:184] no items to output this cycle
I0322 21:16:33.409836  543705 cpu.go:275] no items to output this cycle
I0322 21:16:36.590643  543705 disk_info.go:125] begin check local disk info of client
I0322 21:16:36.593240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:16:36.593246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002084c0 0xc000208500]
E0322 21:16:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:16:43.410634  543705 memory.go:191] Add success.
I0322 21:16:43.409835  543705 cpu.go:282] Add success.
I0322 21:16:43.420349  543705 net.go:648] Add success.
I0322 21:16:43.423008  543705 net.go:770] primary dev: ETH0
I0322 21:16:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:16:43.423034  543705 net.go:698] Add success.
I0322 21:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:16:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:16:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:16:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:16:53.409784  543705 memory.go:184] no items to output this cycle
I0322 21:16:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 21:17:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:17:03.409774  543705 memory.go:184] no items to output this cycle
I0322 21:17:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 21:17:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:17:13.409824  543705 memory.go:191] Add success.
I0322 21:17:13.409830  543705 cpu.go:282] Add success.
W0322 21:17:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:17:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:17:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:17:13.420161  543705 net.go:648] Add success.
I0322 21:17:13.422865  543705 net.go:770] primary dev: ETH0
I0322 21:17:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:17:13.422891  543705 net.go:698] Add success.
I0322 21:17:13.453461  543705 event_worker.go:152] Polling the log file for events...
W0322 21:17:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:17:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 21:17:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:17:14.455890  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:17:14.455898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:17:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:17:14.456550  543705 disk_worker.go:494] system disk:vda1
I0322 21:17:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:17:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:17:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:17:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:17:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:17:16.457982  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:17:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:17:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:17:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:17:23.409784  543705 memory.go:184] no items to output this cycle
I0322 21:17:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 21:17:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:17:33.409781  543705 memory.go:184] no items to output this cycle
I0322 21:17:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 21:17:36.593329  543705 disk_info.go:125] begin check local disk info of client
I0322 21:17:36.595863  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:17:36.595869  543705 disk_info.go:196] parse disk info done, disk is : [0xc000346840 0xc000346880]
E0322 21:17:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:17:43.410625  543705 memory.go:191] Add success.
I0322 21:17:43.409815  543705 cpu.go:282] Add success.
I0322 21:17:43.420371  543705 net.go:648] Add success.
I0322 21:17:43.422886  543705 net.go:770] primary dev: ETH0
I0322 21:17:43.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:17:43.422916  543705 net.go:698] Add success.
I0322 21:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:17:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:17:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:17:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:17:53.409806  543705 memory.go:184] no items to output this cycle
I0322 21:17:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 21:18:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:18:03.409779  543705 memory.go:184] no items to output this cycle
I0322 21:18:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 21:18:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:18:13.409788  543705 memory.go:191] Add success.
I0322 21:18:13.409803  543705 cpu.go:282] Add success.
W0322 21:18:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:18:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:18:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:18:13.420122  543705 net.go:648] Add success.
I0322 21:18:13.422778  543705 net.go:770] primary dev: ETH0
I0322 21:18:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:18:13.422807  543705 net.go:698] Add success.
I0322 21:18:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:18:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:18:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0322 21:18:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:18:14.456932  543705 disk_worker.go:494] system disk:vda1
I0322 21:18:14.456965  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:18:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:18:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:18:16.472388  543705 disk_local_worker.go:436] Get disk info: []
W0322 21:18:18.454007  543705 custom_config.go:80] failed to get custom config
I0322 21:18:18.454026  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0322 21:18:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:18:23.409764  543705 memory.go:184] no items to output this cycle
I0322 21:18:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 21:18:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:18:33.409788  543705 memory.go:184] no items to output this cycle
I0322 21:18:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 21:18:36.596621  543705 disk_info.go:125] begin check local disk info of client
I0322 21:18:36.599169  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:18:36.599176  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bb740 0xc0002bb780]
I0322 21:18:39.815767  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:18:39.815774  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:18:43.410697  543705 memory.go:191] Add success.
I0322 21:18:43.409796  543705 cpu.go:282] Add success.
I0322 21:18:43.420396  543705 net.go:648] Add success.
I0322 21:18:43.423070  543705 net.go:770] primary dev: ETH0
I0322 21:18:43.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:18:43.423101  543705 net.go:698] Add success.
I0322 21:18:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:18:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:18:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:18:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:18:53.409778  543705 memory.go:184] no items to output this cycle
I0322 21:18:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 21:19:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:19:03.409782  543705 memory.go:184] no items to output this cycle
I0322 21:19:03.409784  543705 cpu.go:275] no items to output this cycle
E0322 21:19:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:19:13.409784  543705 memory.go:191] Add success.
I0322 21:19:13.409803  543705 cpu.go:282] Add success.
W0322 21:19:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:19:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:19:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:19:13.420188  543705 net.go:648] Add success.
I0322 21:19:13.422698  543705 net.go:770] primary dev: ETH0
I0322 21:19:13.422713  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:19:13.422728  543705 net.go:698] Add success.
I0322 21:19:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:19:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:19:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 21:19:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:19:14.457002  543705 disk_worker.go:494] system disk:vda1
I0322 21:19:14.457030  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:19:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:19:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:19:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:19:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:19:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:19:23.409803  543705 memory.go:184] no items to output this cycle
I0322 21:19:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 21:19:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:19:33.409783  543705 memory.go:184] no items to output this cycle
I0322 21:19:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 21:19:36.599259  543705 disk_info.go:125] begin check local disk info of client
I0322 21:19:36.601842  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:19:36.601849  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521640 0xc000521680]
E0322 21:19:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:19:43.410606  543705 memory.go:191] Add success.
I0322 21:19:43.409834  543705 cpu.go:282] Add success.
I0322 21:19:43.420376  543705 net.go:648] Add success.
I0322 21:19:43.422868  543705 net.go:770] primary dev: ETH0
I0322 21:19:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:19:43.422893  543705 net.go:698] Add success.
I0322 21:19:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:19:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:19:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:19:53.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:19:53.409831  543705 memory.go:184] no items to output this cycle
I0322 21:19:53.409843  543705 cpu.go:275] no items to output this cycle
E0322 21:20:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:20:03.409773  543705 memory.go:184] no items to output this cycle
I0322 21:20:03.409776  543705 cpu.go:275] no items to output this cycle
E0322 21:20:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:20:13.409786  543705 memory.go:191] Add success.
W0322 21:20:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:20:13.409816  543705 cpu.go:282] Add success.
W0322 21:20:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:20:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:20:13.420121  543705 net.go:648] Add success.
I0322 21:20:13.422698  543705 net.go:770] primary dev: ETH0
I0322 21:20:13.422711  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:20:13.422724  543705 net.go:698] Add success.
I0322 21:20:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:20:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:20:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 21:20:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:20:14.457027  543705 disk_worker.go:494] system disk:vda1
I0322 21:20:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:20:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:20:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:20:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:20:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:20:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:20:23.409796  543705 memory.go:184] no items to output this cycle
I0322 21:20:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 21:20:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:20:33.409816  543705 memory.go:184] no items to output this cycle
I0322 21:20:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 21:20:36.602664  543705 disk_info.go:125] begin check local disk info of client
I0322 21:20:36.605094  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:20:36.605101  543705 disk_info.go:196] parse disk info done, disk is : [0xc000464980 0xc0004649c0]
E0322 21:20:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:20:43.410912  543705 memory.go:191] Add success.
I0322 21:20:43.409819  543705 cpu.go:282] Add success.
I0322 21:20:43.420642  543705 net.go:648] Add success.
I0322 21:20:43.423366  543705 net.go:770] primary dev: ETH0
I0322 21:20:43.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:20:43.423402  543705 net.go:698] Add success.
I0322 21:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:20:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:20:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:20:53.409784  543705 memory.go:184] no items to output this cycle
I0322 21:20:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 21:21:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:21:03.409780  543705 cpu.go:275] no items to output this cycle
I0322 21:21:03.409787  543705 memory.go:184] no items to output this cycle
E0322 21:21:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:21:13.409795  543705 memory.go:191] Add success.
I0322 21:21:13.409796  543705 cpu.go:282] Add success.
W0322 21:21:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:21:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:21:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:21:13.420178  543705 net.go:648] Add success.
I0322 21:21:13.422888  543705 net.go:770] primary dev: ETH0
I0322 21:21:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:21:13.422915  543705 net.go:698] Add success.
I0322 21:21:13.468520  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e0da4be7-061e-4da0-a9b6-cf913b7a9834","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:21:13.468552  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:21:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:21:14.455355  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:21:14.455463  543705 disk_worker.go:708] disk space is not compliant
W0322 21:21:14.455468  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:21:14.457095  543705 disk_worker.go:494] system disk:vda1
I0322 21:21:14.457122  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:21:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:21:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:21:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:21:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:21:23.409785  543705 memory.go:184] no items to output this cycle
I0322 21:21:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 21:21:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:21:33.409784  543705 memory.go:184] no items to output this cycle
I0322 21:21:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 21:21:36.605187  543705 disk_info.go:125] begin check local disk info of client
I0322 21:21:36.607678  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:21:36.607685  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e1480 0xc0004e14c0]
I0322 21:21:39.817729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:21:39.817735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:21:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:21:43.410636  543705 memory.go:191] Add success.
I0322 21:21:43.409801  543705 cpu.go:282] Add success.
I0322 21:21:43.420401  543705 net.go:648] Add success.
I0322 21:21:43.423109  543705 net.go:770] primary dev: ETH0
I0322 21:21:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:21:43.423139  543705 net.go:698] Add success.
I0322 21:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:21:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:21:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:21:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:21:53.409781  543705 memory.go:184] no items to output this cycle
I0322 21:21:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 21:22:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:22:03.409800  543705 memory.go:184] no items to output this cycle
I0322 21:22:03.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:22:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:22:13.409783  543705 memory.go:191] Add success.
I0322 21:22:13.409806  543705 cpu.go:282] Add success.
W0322 21:22:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:22:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:22:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:22:13.420105  543705 net.go:648] Add success.
I0322 21:22:13.423094  543705 net.go:770] primary dev: ETH0
I0322 21:22:13.423109  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:22:13.423121  543705 net.go:698] Add success.
W0322 21:22:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:22:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 21:22:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:22:14.456705  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:22:14.456715  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:22:14.456722  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:22:14.457459  543705 disk_worker.go:494] system disk:vda1
I0322 21:22:14.457498  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:22:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:22:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:22:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:22:16.457990  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:22:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:22:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:22:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:22:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:22:23.409777  543705 memory.go:184] no items to output this cycle
I0322 21:22:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 21:22:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:22:33.409790  543705 memory.go:184] no items to output this cycle
I0322 21:22:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 21:22:36.607769  543705 disk_info.go:125] begin check local disk info of client
I0322 21:22:36.610404  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:22:36.610412  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275440 0xc000275480]
E0322 21:22:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:22:43.410791  543705 memory.go:191] Add success.
I0322 21:22:43.409827  543705 cpu.go:282] Add success.
I0322 21:22:43.420557  543705 net.go:648] Add success.
I0322 21:22:43.423538  543705 net.go:770] primary dev: ETH0
I0322 21:22:43.423552  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:22:43.423564  543705 net.go:698] Add success.
I0322 21:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:22:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:22:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:22:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:22:53.409810  543705 memory.go:184] no items to output this cycle
I0322 21:22:53.409825  543705 cpu.go:275] no items to output this cycle
E0322 21:23:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:23:03.409791  543705 memory.go:184] no items to output this cycle
I0322 21:23:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 21:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:23:13.409802  543705 memory.go:191] Add success.
I0322 21:23:13.409802  543705 cpu.go:282] Add success.
W0322 21:23:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:23:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:23:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:23:13.420194  543705 net.go:648] Add success.
I0322 21:23:13.422674  543705 net.go:770] primary dev: ETH0
I0322 21:23:13.422701  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:23:13.422714  543705 net.go:698] Add success.
I0322 21:23:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:23:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:23:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 21:23:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:23:14.456563  543705 disk_worker.go:494] system disk:vda1
I0322 21:23:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:23:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:23:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:23:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:23:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:23:23.409780  543705 memory.go:184] no items to output this cycle
I0322 21:23:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 21:23:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:23:33.409794  543705 memory.go:184] no items to output this cycle
I0322 21:23:33.409829  543705 cpu.go:275] no items to output this cycle
I0322 21:23:36.610697  543705 disk_info.go:125] begin check local disk info of client
I0322 21:23:36.613211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:23:36.613218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c45c0 0xc0000c4600]
E0322 21:23:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:23:43.410716  543705 memory.go:191] Add success.
I0322 21:23:43.409824  543705 cpu.go:282] Add success.
I0322 21:23:43.420471  543705 net.go:648] Add success.
I0322 21:23:43.423123  543705 net.go:770] primary dev: ETH0
I0322 21:23:43.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:23:43.423149  543705 net.go:698] Add success.
I0322 21:23:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:23:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:23:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:23:53.409791  543705 memory.go:184] no items to output this cycle
I0322 21:23:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 21:24:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:24:03.409817  543705 memory.go:184] no items to output this cycle
I0322 21:24:03.409834  543705 cpu.go:275] no items to output this cycle
E0322 21:24:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:24:13.409802  543705 memory.go:191] Add success.
I0322 21:24:13.409811  543705 cpu.go:282] Add success.
W0322 21:24:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:24:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:24:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:24:13.420199  543705 net.go:648] Add success.
I0322 21:24:13.422883  543705 net.go:770] primary dev: ETH0
I0322 21:24:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:24:13.422908  543705 net.go:698] Add success.
I0322 21:24:13.468824  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"00634f04-28b8-469f-a74c-2d58883d38a1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:24:13.468857  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:24:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:24:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:24:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 21:24:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:24:14.456636  543705 disk_worker.go:494] system disk:vda1
I0322 21:24:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:24:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:24:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:24:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:24:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:24:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:24:23.409791  543705 memory.go:184] no items to output this cycle
I0322 21:24:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:24:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:24:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 21:24:33.409792  543705 memory.go:184] no items to output this cycle
I0322 21:24:36.613674  543705 disk_info.go:125] begin check local disk info of client
I0322 21:24:36.616180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:24:36.616186  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e680 0xc00039e6c0]
I0322 21:24:39.819780  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:24:39.819787  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:24:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:24:43.410730  543705 memory.go:191] Add success.
I0322 21:24:43.409805  543705 cpu.go:282] Add success.
I0322 21:24:43.420421  543705 net.go:648] Add success.
I0322 21:24:43.423195  543705 net.go:770] primary dev: ETH0
I0322 21:24:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:24:43.423224  543705 net.go:698] Add success.
I0322 21:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:24:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:24:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:24:53.409786  543705 memory.go:184] no items to output this cycle
I0322 21:24:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 21:25:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:25:03.409765  543705 memory.go:184] no items to output this cycle
I0322 21:25:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:25:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:25:13.409782  543705 memory.go:191] Add success.
W0322 21:25:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:25:13.409810  543705 cpu.go:282] Add success.
W0322 21:25:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:25:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:25:13.420171  543705 net.go:648] Add success.
I0322 21:25:13.423039  543705 net.go:770] primary dev: ETH0
I0322 21:25:13.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:25:13.423065  543705 net.go:698] Add success.
I0322 21:25:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:25:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:25:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 21:25:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:25:14.456573  543705 disk_worker.go:494] system disk:vda1
I0322 21:25:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:25:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:25:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:25:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:25:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:25:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:25:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:25:23.409764  543705 memory.go:184] no items to output this cycle
I0322 21:25:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 21:25:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:25:33.409784  543705 memory.go:184] no items to output this cycle
I0322 21:25:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 21:25:36.616725  543705 disk_info.go:125] begin check local disk info of client
I0322 21:25:36.619279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:25:36.619286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003951c0 0xc000395200]
E0322 21:25:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:25:43.410802  543705 memory.go:191] Add success.
I0322 21:25:43.409796  543705 cpu.go:282] Add success.
I0322 21:25:43.420517  543705 net.go:648] Add success.
I0322 21:25:43.423315  543705 net.go:770] primary dev: ETH0
I0322 21:25:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:25:43.423341  543705 net.go:698] Add success.
I0322 21:25:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:25:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:25:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:25:53.409786  543705 memory.go:184] no items to output this cycle
I0322 21:25:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 21:26:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:26:03.409776  543705 memory.go:184] no items to output this cycle
I0322 21:26:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 21:26:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:26:13.409809  543705 memory.go:191] Add success.
I0322 21:26:13.409819  543705 cpu.go:282] Add success.
W0322 21:26:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:26:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:26:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:26:13.420064  543705 net.go:648] Add success.
I0322 21:26:13.422605  543705 net.go:770] primary dev: ETH0
I0322 21:26:13.422619  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:26:13.422634  543705 net.go:698] Add success.
I0322 21:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:26:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:26:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0322 21:26:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:26:14.456603  543705 disk_worker.go:494] system disk:vda1
I0322 21:26:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:26:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:26:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:26:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:26:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:26:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:26:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:26:23.409865  543705 memory.go:184] no items to output this cycle
I0322 21:26:23.409941  543705 cpu.go:275] no items to output this cycle
E0322 21:26:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:26:33.409799  543705 memory.go:184] no items to output this cycle
I0322 21:26:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 21:26:36.619738  543705 disk_info.go:125] begin check local disk info of client
I0322 21:26:36.622300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:26:36.622306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002958c0 0xc000295900]
E0322 21:26:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:26:43.410882  543705 memory.go:191] Add success.
I0322 21:26:43.409800  543705 cpu.go:282] Add success.
I0322 21:26:43.420582  543705 net.go:648] Add success.
I0322 21:26:43.423506  543705 net.go:770] primary dev: ETH0
I0322 21:26:43.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:26:43.423535  543705 net.go:698] Add success.
I0322 21:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:26:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:26:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:26:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:26:53.409785  543705 memory.go:184] no items to output this cycle
I0322 21:26:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:27:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:27:03.409806  543705 memory.go:184] no items to output this cycle
I0322 21:27:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 21:27:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:27:13.409787  543705 memory.go:191] Add success.
I0322 21:27:13.409806  543705 cpu.go:282] Add success.
W0322 21:27:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:27:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:27:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:27:13.420288  543705 net.go:648] Add success.
I0322 21:27:13.429674  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 21:27:13.429760  543705 net.go:770] primary dev: ETH0
I0322 21:27:13.429776  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:27:13.429792  543705 net.go:698] Add success.
I0322 21:27:13.453668  543705 event_worker.go:152] Polling the log file for events...
I0322 21:27:13.845452  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1d1a04a8-0bda-4860-badf-3d78bc4ecdf0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:27:13.845489  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 21:27:14.454170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:27:14.454230  543705 disk_worker.go:708] disk space is not compliant
W0322 21:27:14.454233  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:27:14.456051  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:27:14.456058  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:27:14.456062  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:27:14.456066  543705 disk_worker.go:494] system disk:vda1
I0322 21:27:14.456118  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:27:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:27:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:27:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:27:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:27:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:27:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:27:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:27:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:27:23.409804  543705 memory.go:184] no items to output this cycle
I0322 21:27:23.409814  543705 cpu.go:275] no items to output this cycle
E0322 21:27:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:27:33.409770  543705 memory.go:184] no items to output this cycle
I0322 21:27:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 21:27:36.622392  543705 disk_info.go:125] begin check local disk info of client
I0322 21:27:36.624888  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:27:36.624894  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004743c0 0xc000474400]
I0322 21:27:39.821731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:27:39.821738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:27:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:27:43.410818  543705 memory.go:191] Add success.
I0322 21:27:43.409799  543705 cpu.go:282] Add success.
I0322 21:27:43.420522  543705 net.go:648] Add success.
I0322 21:27:43.423611  543705 net.go:770] primary dev: ETH0
I0322 21:27:43.423623  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:27:43.423636  543705 net.go:698] Add success.
I0322 21:27:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:27:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:27:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:27:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:27:53.409822  543705 memory.go:184] no items to output this cycle
I0322 21:27:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 21:28:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:28:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 21:28:03.409787  543705 memory.go:184] no items to output this cycle
E0322 21:28:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:28:13.409817  543705 memory.go:191] Add success.
I0322 21:28:13.409824  543705 cpu.go:282] Add success.
W0322 21:28:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:28:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:28:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:28:13.420105  543705 net.go:648] Add success.
I0322 21:28:13.423011  543705 net.go:770] primary dev: ETH0
I0322 21:28:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:28:13.423035  543705 net.go:698] Add success.
I0322 21:28:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:28:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:28:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 21:28:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:28:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 21:28:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:28:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:28:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:28:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:28:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:28:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:28:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:28:23.409870  543705 memory.go:184] no items to output this cycle
I0322 21:28:23.409900  543705 cpu.go:275] no items to output this cycle
E0322 21:28:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:28:33.409820  543705 memory.go:184] no items to output this cycle
I0322 21:28:33.409833  543705 cpu.go:275] no items to output this cycle
I0322 21:28:36.625674  543705 disk_info.go:125] begin check local disk info of client
I0322 21:28:36.628280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:28:36.628298  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9b40 0xc0002b9b80]
E0322 21:28:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:28:43.410678  543705 memory.go:191] Add success.
I0322 21:28:43.409800  543705 cpu.go:282] Add success.
I0322 21:28:43.420435  543705 net.go:648] Add success.
I0322 21:28:43.423793  543705 net.go:770] primary dev: ETH0
I0322 21:28:43.423806  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:28:43.423818  543705 net.go:698] Add success.
I0322 21:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:28:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:28:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:28:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:28:53.409776  543705 memory.go:184] no items to output this cycle
I0322 21:28:53.409798  543705 cpu.go:275] no items to output this cycle
E0322 21:29:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:29:03.409770  543705 memory.go:184] no items to output this cycle
I0322 21:29:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 21:29:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:29:13.409811  543705 memory.go:191] Add success.
I0322 21:29:13.409815  543705 cpu.go:282] Add success.
W0322 21:29:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:29:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:29:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:29:13.420066  543705 net.go:648] Add success.
I0322 21:29:13.422828  543705 net.go:770] primary dev: ETH0
I0322 21:29:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:29:13.422853  543705 net.go:698] Add success.
I0322 21:29:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:29:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:29:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 21:29:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:29:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 21:29:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:29:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:29:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:29:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:29:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:29:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:29:23.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:29:23.409868  543705 cpu.go:275] no items to output this cycle
I0322 21:29:23.409876  543705 memory.go:184] no items to output this cycle
E0322 21:29:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:29:33.409781  543705 memory.go:184] no items to output this cycle
I0322 21:29:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 21:29:36.628384  543705 disk_info.go:125] begin check local disk info of client
I0322 21:29:36.630980  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:29:36.630987  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382000 0xc000382040]
E0322 21:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:29:43.410585  543705 memory.go:191] Add success.
I0322 21:29:43.409789  543705 cpu.go:282] Add success.
I0322 21:29:43.420261  543705 net.go:648] Add success.
I0322 21:29:43.422961  543705 net.go:770] primary dev: ETH0
I0322 21:29:43.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:29:43.422990  543705 net.go:698] Add success.
I0322 21:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:29:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:29:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:29:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:29:53.409813  543705 memory.go:184] no items to output this cycle
I0322 21:29:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 21:30:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:30:03.409765  543705 memory.go:184] no items to output this cycle
I0322 21:30:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 21:30:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:30:13.409817  543705 memory.go:191] Add success.
I0322 21:30:13.409818  543705 cpu.go:282] Add success.
W0322 21:30:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:30:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:30:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:30:13.419950  543705 net.go:770] primary dev: ETH0
I0322 21:30:13.419964  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:30:13.419979  543705 net.go:698] Add success.
I0322 21:30:13.420318  543705 net.go:648] Add success.
I0322 21:30:13.464676  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65d94525-732f-4bbd-8531-220f488d8f3d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:30:13.464711  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:30:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:30:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:30:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 21:30:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:30:14.456617  543705 disk_worker.go:494] system disk:vda1
I0322 21:30:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:30:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:30:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:30:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:30:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:30:23.409862  543705 cpu.go:275] no items to output this cycle
I0322 21:30:23.409916  543705 memory.go:184] no items to output this cycle
E0322 21:30:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:30:33.409817  543705 memory.go:184] no items to output this cycle
I0322 21:30:33.409832  543705 cpu.go:275] no items to output this cycle
I0322 21:30:36.631069  543705 disk_info.go:125] begin check local disk info of client
I0322 21:30:36.633628  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:30:36.633634  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b5cc0 0xc0002b5d00]
I0322 21:30:39.823806  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:30:39.823813  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:30:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:30:43.410721  543705 memory.go:191] Add success.
I0322 21:30:43.409796  543705 cpu.go:282] Add success.
I0322 21:30:43.420416  543705 net.go:648] Add success.
I0322 21:30:43.422973  543705 net.go:770] primary dev: ETH0
I0322 21:30:43.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:30:43.422997  543705 net.go:698] Add success.
I0322 21:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:30:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:30:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:30:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:30:53.409771  543705 memory.go:184] no items to output this cycle
I0322 21:30:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 21:31:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:31:03.409765  543705 memory.go:184] no items to output this cycle
I0322 21:31:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 21:31:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:31:13.409816  543705 memory.go:191] Add success.
I0322 21:31:13.409825  543705 cpu.go:282] Add success.
W0322 21:31:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:31:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:31:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:31:13.420327  543705 net.go:770] primary dev: ETH0
I0322 21:31:13.420341  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:31:13.420353  543705 net.go:698] Add success.
I0322 21:31:13.420578  543705 net.go:648] Add success.
I0322 21:31:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:31:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:31:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 21:31:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:31:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 21:31:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:31:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:31:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:31:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:31:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:31:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:31:23.409777  543705 memory.go:184] no items to output this cycle
I0322 21:31:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 21:31:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:31:33.409812  543705 memory.go:184] no items to output this cycle
I0322 21:31:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 21:31:36.633677  543705 disk_info.go:125] begin check local disk info of client
I0322 21:31:36.636259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:31:36.636266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 21:31:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:31:43.410703  543705 memory.go:191] Add success.
I0322 21:31:43.409793  543705 cpu.go:282] Add success.
I0322 21:31:43.420457  543705 net.go:648] Add success.
I0322 21:31:43.423411  543705 net.go:770] primary dev: ETH0
I0322 21:31:43.423426  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:31:43.423441  543705 net.go:698] Add success.
I0322 21:31:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:31:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:31:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:31:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:31:53.409793  543705 memory.go:184] no items to output this cycle
I0322 21:31:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 21:32:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:32:03.409803  543705 memory.go:184] no items to output this cycle
I0322 21:32:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:32:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:32:13.409787  543705 memory.go:191] Add success.
W0322 21:32:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:32:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:32:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:32:13.409828  543705 cpu.go:282] Add success.
I0322 21:32:13.420039  543705 net.go:648] Add success.
I0322 21:32:13.423034  543705 net.go:770] primary dev: ETH0
I0322 21:32:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:32:13.423059  543705 net.go:698] Add success.
W0322 21:32:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:32:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0322 21:32:14.455156  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:32:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:32:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:32:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:32:14.457013  543705 disk_worker.go:494] system disk:vda1
I0322 21:32:14.457041  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:32:15.456803  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:32:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:32:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:32:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:32:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:32:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:32:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:32:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:32:23.409787  543705 memory.go:184] no items to output this cycle
I0322 21:32:23.409794  543705 cpu.go:275] no items to output this cycle
E0322 21:32:33.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:32:33.409936  543705 cpu.go:275] no items to output this cycle
I0322 21:32:33.409966  543705 memory.go:184] no items to output this cycle
I0322 21:32:36.636349  543705 disk_info.go:125] begin check local disk info of client
I0322 21:32:36.638903  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:32:36.638910  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 21:32:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:32:43.410698  543705 memory.go:191] Add success.
I0322 21:32:43.409818  543705 cpu.go:282] Add success.
I0322 21:32:43.420386  543705 net.go:648] Add success.
I0322 21:32:43.423239  543705 net.go:770] primary dev: ETH0
I0322 21:32:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:32:43.423264  543705 net.go:698] Add success.
I0322 21:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:32:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:32:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:32:53.410422  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:32:53.410444  543705 memory.go:184] no items to output this cycle
I0322 21:32:53.410458  543705 cpu.go:275] no items to output this cycle
E0322 21:33:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:33:03.409780  543705 cpu.go:275] no items to output this cycle
I0322 21:33:03.409800  543705 memory.go:184] no items to output this cycle
E0322 21:33:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:33:13.409793  543705 memory.go:191] Add success.
I0322 21:33:13.409798  543705 cpu.go:282] Add success.
W0322 21:33:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:33:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:33:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:33:13.420072  543705 net.go:648] Add success.
I0322 21:33:13.422734  543705 net.go:770] primary dev: ETH0
I0322 21:33:13.422748  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:33:13.422761  543705 net.go:698] Add success.
I0322 21:33:13.468465  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea440a5e-f0a7-4183-aee8-587f26f85214","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:33:13.468499  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:33:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:33:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 21:33:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:33:14.456630  543705 disk_worker.go:494] system disk:vda1
I0322 21:33:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:33:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:33:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:33:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:33:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:33:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:33:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:33:23.409793  543705 memory.go:184] no items to output this cycle
I0322 21:33:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 21:33:33.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:33:33.409869  543705 memory.go:184] no items to output this cycle
I0322 21:33:33.409969  543705 cpu.go:275] no items to output this cycle
I0322 21:33:36.639828  543705 disk_info.go:125] begin check local disk info of client
I0322 21:33:36.642407  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:33:36.642414  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
I0322 21:33:39.825726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:33:39.825733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:33:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:33:43.410642  543705 memory.go:191] Add success.
I0322 21:33:43.409803  543705 cpu.go:282] Add success.
I0322 21:33:43.420337  543705 net.go:648] Add success.
I0322 21:33:43.423122  543705 net.go:770] primary dev: ETH0
I0322 21:33:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:33:43.423148  543705 net.go:698] Add success.
I0322 21:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:33:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:33:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:33:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:33:53.409782  543705 memory.go:184] no items to output this cycle
I0322 21:33:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 21:34:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:34:03.409794  543705 memory.go:184] no items to output this cycle
I0322 21:34:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 21:34:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:34:13.409783  543705 memory.go:191] Add success.
I0322 21:34:13.409799  543705 cpu.go:282] Add success.
W0322 21:34:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:34:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:34:13.420130  543705 net.go:648] Add success.
I0322 21:34:13.423176  543705 net.go:770] primary dev: ETH0
I0322 21:34:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:34:13.423201  543705 net.go:698] Add success.
I0322 21:34:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:34:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:34:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0322 21:34:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:34:14.456580  543705 disk_worker.go:494] system disk:vda1
I0322 21:34:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:34:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:34:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:34:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:34:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:34:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:34:23.409784  543705 memory.go:184] no items to output this cycle
I0322 21:34:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 21:34:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:34:33.409817  543705 memory.go:184] no items to output this cycle
I0322 21:34:33.409829  543705 cpu.go:275] no items to output this cycle
I0322 21:34:36.642500  543705 disk_info.go:125] begin check local disk info of client
I0322 21:34:36.645051  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:34:36.645058  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fc000 0xc0004fc040]
E0322 21:34:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:34:43.410709  543705 memory.go:191] Add success.
I0322 21:34:43.409817  543705 cpu.go:282] Add success.
I0322 21:34:43.420435  543705 net.go:648] Add success.
I0322 21:34:43.423642  543705 net.go:770] primary dev: ETH0
I0322 21:34:43.423656  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:34:43.423668  543705 net.go:698] Add success.
I0322 21:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:34:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:34:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:34:53.409805  543705 memory.go:184] no items to output this cycle
I0322 21:34:53.409817  543705 cpu.go:275] no items to output this cycle
I0322 21:35:03.409782  543705 cpu.go:275] no items to output this cycle
E0322 21:35:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:35:03.409797  543705 memory.go:184] no items to output this cycle
E0322 21:35:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:35:13.409821  543705 memory.go:191] Add success.
I0322 21:35:13.409825  543705 cpu.go:282] Add success.
W0322 21:35:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:35:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:35:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:35:13.420167  543705 net.go:648] Add success.
I0322 21:35:13.422598  543705 net.go:770] primary dev: ETH0
I0322 21:35:13.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:35:13.422622  543705 net.go:698] Add success.
I0322 21:35:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:35:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:35:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 21:35:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:35:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 21:35:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:35:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:35:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:35:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:35:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:35:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:35:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:35:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 21:35:23.409789  543705 memory.go:184] no items to output this cycle
E0322 21:35:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:35:33.409785  543705 memory.go:184] no items to output this cycle
I0322 21:35:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 21:35:36.645672  543705 disk_info.go:125] begin check local disk info of client
I0322 21:35:36.648198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:35:36.648204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abf00 0xc0002abf40]
E0322 21:35:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:35:43.410750  543705 memory.go:191] Add success.
I0322 21:35:43.409796  543705 cpu.go:282] Add success.
I0322 21:35:43.420542  543705 net.go:648] Add success.
I0322 21:35:43.423057  543705 net.go:770] primary dev: ETH0
I0322 21:35:43.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:35:43.423082  543705 net.go:698] Add success.
I0322 21:35:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:35:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:35:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:35:53.409783  543705 memory.go:184] no items to output this cycle
I0322 21:35:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 21:36:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:36:03.409784  543705 memory.go:184] no items to output this cycle
I0322 21:36:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 21:36:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:36:13.409815  543705 memory.go:191] Add success.
I0322 21:36:13.409818  543705 cpu.go:282] Add success.
W0322 21:36:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:36:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:36:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:36:13.420145  543705 net.go:648] Add success.
I0322 21:36:13.422971  543705 net.go:770] primary dev: ETH0
I0322 21:36:13.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:36:13.423001  543705 net.go:698] Add success.
I0322 21:36:13.468514  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"accb2548-cbf2-4a4c-b8c5-01fed129b0b9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:36:13.468551  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:36:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:36:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:36:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 21:36:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:36:14.456521  543705 disk_worker.go:494] system disk:vda1
I0322 21:36:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:36:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:36:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:36:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:36:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:36:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:36:23.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:36:23.409891  543705 cpu.go:275] no items to output this cycle
I0322 21:36:23.409892  543705 memory.go:184] no items to output this cycle
E0322 21:36:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:36:33.409812  543705 memory.go:184] no items to output this cycle
I0322 21:36:33.409828  543705 cpu.go:275] no items to output this cycle
I0322 21:36:36.648886  543705 disk_info.go:125] begin check local disk info of client
I0322 21:36:36.651494  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:36:36.651500  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
I0322 21:36:39.827832  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:36:39.827838  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:36:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:36:43.410713  543705 memory.go:191] Add success.
I0322 21:36:43.409804  543705 cpu.go:282] Add success.
I0322 21:36:43.420408  543705 net.go:648] Add success.
I0322 21:36:43.423066  543705 net.go:770] primary dev: ETH0
I0322 21:36:43.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:36:43.423091  543705 net.go:698] Add success.
I0322 21:36:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:36:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:36:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:36:53.410235  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:36:53.410254  543705 memory.go:184] no items to output this cycle
I0322 21:36:53.410278  543705 cpu.go:275] no items to output this cycle
E0322 21:37:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:37:03.409798  543705 memory.go:184] no items to output this cycle
I0322 21:37:03.409810  543705 cpu.go:275] no items to output this cycle
E0322 21:37:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:37:13.409785  543705 memory.go:191] Add success.
I0322 21:37:13.409808  543705 cpu.go:282] Add success.
W0322 21:37:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:37:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:37:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:37:13.420156  543705 net.go:648] Add success.
I0322 21:37:13.423004  543705 net.go:770] primary dev: ETH0
I0322 21:37:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:37:13.423030  543705 net.go:698] Add success.
I0322 21:37:13.453565  543705 event_worker.go:152] Polling the log file for events...
W0322 21:37:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:37:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 21:37:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:37:14.455939  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:37:14.455947  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:37:14.455953  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:37:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 21:37:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:37:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:37:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:37:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:37:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:37:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:37:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:37:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:37:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:37:23.409802  543705 memory.go:184] no items to output this cycle
I0322 21:37:23.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:37:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:37:33.409790  543705 memory.go:184] no items to output this cycle
I0322 21:37:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 21:37:36.651894  543705 disk_info.go:125] begin check local disk info of client
I0322 21:37:36.654425  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:37:36.654432  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 21:37:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:37:43.410602  543705 memory.go:191] Add success.
I0322 21:37:43.409813  543705 cpu.go:282] Add success.
I0322 21:37:43.420301  543705 net.go:648] Add success.
I0322 21:37:43.422765  543705 net.go:770] primary dev: ETH0
I0322 21:37:43.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:37:43.422790  543705 net.go:698] Add success.
I0322 21:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:37:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:37:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:37:53.409772  543705 memory.go:184] no items to output this cycle
I0322 21:37:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 21:38:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:38:03.409780  543705 memory.go:184] no items to output this cycle
I0322 21:38:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 21:38:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:38:13.409780  543705 memory.go:191] Add success.
W0322 21:38:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:38:13.409813  543705 cpu.go:282] Add success.
W0322 21:38:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:38:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:38:13.420121  543705 net.go:648] Add success.
I0322 21:38:13.423108  543705 net.go:770] primary dev: ETH0
I0322 21:38:13.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:38:13.423137  543705 net.go:698] Add success.
I0322 21:38:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:38:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:38:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 21:38:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:38:14.456577  543705 disk_worker.go:494] system disk:vda1
I0322 21:38:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:38:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:38:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:38:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:38:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:38:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:38:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:38:23.409776  543705 memory.go:184] no items to output this cycle
I0322 21:38:23.409898  543705 cpu.go:275] no items to output this cycle
E0322 21:38:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:38:33.409801  543705 memory.go:184] no items to output this cycle
I0322 21:38:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 21:38:36.654922  543705 disk_info.go:125] begin check local disk info of client
I0322 21:38:36.657468  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:38:36.657475  543705 disk_info.go:196] parse disk info done, disk is : [0xc000285d00 0xc000285d40]
E0322 21:38:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:38:43.410655  543705 memory.go:191] Add success.
I0322 21:38:43.409825  543705 cpu.go:282] Add success.
I0322 21:38:43.420382  543705 net.go:648] Add success.
I0322 21:38:43.423127  543705 net.go:770] primary dev: ETH0
I0322 21:38:43.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:38:43.423157  543705 net.go:698] Add success.
I0322 21:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:38:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:38:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:38:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:38:53.409799  543705 cpu.go:275] no items to output this cycle
I0322 21:38:53.409803  543705 memory.go:184] no items to output this cycle
E0322 21:39:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:39:03.409810  543705 memory.go:184] no items to output this cycle
I0322 21:39:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 21:39:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:39:13.409805  543705 memory.go:191] Add success.
I0322 21:39:13.409825  543705 cpu.go:282] Add success.
W0322 21:39:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:39:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:39:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:39:13.420208  543705 net.go:648] Add success.
I0322 21:39:13.422989  543705 net.go:770] primary dev: ETH0
I0322 21:39:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:39:13.423014  543705 net.go:698] Add success.
I0322 21:39:13.469205  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3890a030-c829-4c16-9732-872e479d508d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:39:13.469240  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:39:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:39:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:39:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 21:39:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:39:14.456700  543705 disk_worker.go:494] system disk:vda1
I0322 21:39:14.456744  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:39:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:39:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:39:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:39:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:39:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:39:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:39:23.409818  543705 memory.go:184] no items to output this cycle
I0322 21:39:23.409827  543705 cpu.go:275] no items to output this cycle
E0322 21:39:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:39:33.409800  543705 memory.go:184] no items to output this cycle
I0322 21:39:33.409801  543705 cpu.go:275] no items to output this cycle
I0322 21:39:36.657671  543705 disk_info.go:125] begin check local disk info of client
I0322 21:39:36.660182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:39:36.660187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474600 0xc000474640]
I0322 21:39:39.829725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:39:39.829731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:39:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:39:43.410621  543705 memory.go:191] Add success.
I0322 21:39:43.409805  543705 cpu.go:282] Add success.
I0322 21:39:43.420304  543705 net.go:648] Add success.
I0322 21:39:43.423031  543705 net.go:770] primary dev: ETH0
I0322 21:39:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:39:43.423056  543705 net.go:698] Add success.
I0322 21:39:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:39:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:39:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:39:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:39:53.410405  543705 memory.go:184] no items to output this cycle
I0322 21:39:53.410412  543705 cpu.go:275] no items to output this cycle
E0322 21:40:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:40:03.409775  543705 memory.go:184] no items to output this cycle
I0322 21:40:03.409783  543705 cpu.go:275] no items to output this cycle
E0322 21:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:40:13.409809  543705 memory.go:191] Add success.
I0322 21:40:13.409815  543705 cpu.go:282] Add success.
W0322 21:40:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:40:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:40:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:40:13.420126  543705 net.go:648] Add success.
I0322 21:40:13.422910  543705 net.go:770] primary dev: ETH0
I0322 21:40:13.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:40:13.422937  543705 net.go:698] Add success.
I0322 21:40:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:40:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:40:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 21:40:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:40:14.456566  543705 disk_worker.go:494] system disk:vda1
I0322 21:40:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:40:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:40:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:40:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:40:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:40:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:40:23.409783  543705 memory.go:184] no items to output this cycle
I0322 21:40:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 21:40:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:40:33.409819  543705 memory.go:184] no items to output this cycle
I0322 21:40:33.409829  543705 cpu.go:275] no items to output this cycle
I0322 21:40:36.660943  543705 disk_info.go:125] begin check local disk info of client
I0322 21:40:36.663557  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:40:36.663564  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492440 0xc000492480]
E0322 21:40:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:40:43.410676  543705 memory.go:191] Add success.
I0322 21:40:43.409821  543705 cpu.go:282] Add success.
I0322 21:40:43.420426  543705 net.go:648] Add success.
I0322 21:40:43.423007  543705 net.go:770] primary dev: ETH0
I0322 21:40:43.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:40:43.423036  543705 net.go:698] Add success.
I0322 21:40:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:40:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:40:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:40:53.410385  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:40:53.410407  543705 memory.go:184] no items to output this cycle
I0322 21:40:53.410434  543705 cpu.go:275] no items to output this cycle
E0322 21:41:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:41:03.409794  543705 memory.go:184] no items to output this cycle
I0322 21:41:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 21:41:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:41:13.409795  543705 memory.go:191] Add success.
I0322 21:41:13.409812  543705 cpu.go:282] Add success.
W0322 21:41:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:41:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:41:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:41:13.420135  543705 net.go:648] Add success.
I0322 21:41:13.423016  543705 net.go:770] primary dev: ETH0
I0322 21:41:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:41:13.423041  543705 net.go:698] Add success.
I0322 21:41:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:41:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:41:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 21:41:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:41:14.456515  543705 disk_worker.go:494] system disk:vda1
I0322 21:41:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:41:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:41:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:41:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:41:16.458163  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:41:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:41:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:41:23.409784  543705 memory.go:184] no items to output this cycle
I0322 21:41:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 21:41:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:41:33.409782  543705 memory.go:184] no items to output this cycle
I0322 21:41:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 21:41:36.663646  543705 disk_info.go:125] begin check local disk info of client
I0322 21:41:36.666184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:41:36.666190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be3c0 0xc0003be400]
E0322 21:41:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:41:43.410789  543705 memory.go:191] Add success.
I0322 21:41:43.409806  543705 cpu.go:282] Add success.
I0322 21:41:43.420481  543705 net.go:648] Add success.
I0322 21:41:43.423083  543705 net.go:770] primary dev: ETH0
I0322 21:41:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:41:43.423109  543705 net.go:698] Add success.
I0322 21:41:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:41:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:41:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:41:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:41:53.409788  543705 memory.go:184] no items to output this cycle
I0322 21:41:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 21:42:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:42:03.409766  543705 memory.go:184] no items to output this cycle
I0322 21:42:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 21:42:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:42:13.409808  543705 memory.go:191] Add success.
I0322 21:42:13.409814  543705 cpu.go:282] Add success.
W0322 21:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:42:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:42:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:42:13.420050  543705 net.go:648] Add success.
I0322 21:42:13.422757  543705 net.go:770] primary dev: ETH0
I0322 21:42:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:42:13.422783  543705 net.go:698] Add success.
I0322 21:42:13.470215  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a42eb2f5-ad12-4a47-af71-6ae7a71a2918","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:42:13.470249  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 21:42:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:42:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 21:42:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:42:14.456918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:42:14.456926  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:42:14.456931  543705 custom_config.go:64] query custom config with name: gpu
I0322 21:42:14.457138  543705 disk_worker.go:494] system disk:vda1
I0322 21:42:14.457172  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:42:15.455940  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:42:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:42:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:42:16.457988  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:42:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:42:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:42:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:42:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:42:23.409770  543705 memory.go:184] no items to output this cycle
I0322 21:42:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 21:42:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:42:33.409789  543705 memory.go:184] no items to output this cycle
I0322 21:42:33.409821  543705 cpu.go:275] no items to output this cycle
I0322 21:42:36.666274  543705 disk_info.go:125] begin check local disk info of client
I0322 21:42:36.668898  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:42:36.668904  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a400 0xc00035a440]
I0322 21:42:39.831851  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:42:39.831857  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:42:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:42:43.410646  543705 memory.go:191] Add success.
I0322 21:42:43.409804  543705 cpu.go:282] Add success.
I0322 21:42:43.420354  543705 net.go:648] Add success.
I0322 21:42:43.422760  543705 net.go:770] primary dev: ETH0
I0322 21:42:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:42:43.422786  543705 net.go:698] Add success.
I0322 21:42:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:42:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:42:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:42:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:42:53.409783  543705 memory.go:184] no items to output this cycle
I0322 21:42:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:43:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:43:03.409769  543705 memory.go:184] no items to output this cycle
I0322 21:43:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 21:43:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:43:13.409817  543705 memory.go:191] Add success.
I0322 21:43:13.409825  543705 cpu.go:282] Add success.
W0322 21:43:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:43:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:43:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:43:13.420111  543705 net.go:648] Add success.
I0322 21:43:13.422773  543705 net.go:770] primary dev: ETH0
I0322 21:43:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:43:13.422797  543705 net.go:698] Add success.
I0322 21:43:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:43:14.455411  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:43:14.455515  543705 disk_worker.go:708] disk space is not compliant
W0322 21:43:14.455520  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:43:14.457502  543705 disk_worker.go:494] system disk:vda1
I0322 21:43:14.457534  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:43:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:43:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:43:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:43:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:43:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:43:23.409799  543705 memory.go:184] no items to output this cycle
I0322 21:43:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 21:43:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:43:33.409776  543705 memory.go:184] no items to output this cycle
I0322 21:43:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 21:43:36.669676  543705 disk_info.go:125] begin check local disk info of client
I0322 21:43:36.672145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:43:36.672151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaa00 0xc0001aaa40]
E0322 21:43:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:43:43.410566  543705 memory.go:191] Add success.
I0322 21:43:43.409792  543705 cpu.go:282] Add success.
I0322 21:43:43.420271  543705 net.go:648] Add success.
I0322 21:43:43.423026  543705 net.go:770] primary dev: ETH0
I0322 21:43:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:43:43.423055  543705 net.go:698] Add success.
I0322 21:43:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:43:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:43:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:43:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:43:53.409774  543705 memory.go:184] no items to output this cycle
I0322 21:43:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 21:44:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:44:03.409803  543705 memory.go:184] no items to output this cycle
I0322 21:44:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:44:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:44:13.409795  543705 memory.go:191] Add success.
W0322 21:44:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:44:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:44:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:44:13.409850  543705 cpu.go:282] Add success.
I0322 21:44:13.419893  543705 net.go:648] Add success.
I0322 21:44:13.422896  543705 net.go:770] primary dev: ETH0
I0322 21:44:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:44:13.422921  543705 net.go:698] Add success.
I0322 21:44:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:44:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:44:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 21:44:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:44:14.456503  543705 disk_worker.go:494] system disk:vda1
I0322 21:44:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:44:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:44:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:44:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:44:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:44:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:44:23.409799  543705 memory.go:184] no items to output this cycle
I0322 21:44:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:44:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:44:33.409795  543705 memory.go:184] no items to output this cycle
I0322 21:44:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 21:44:36.672994  543705 disk_info.go:125] begin check local disk info of client
I0322 21:44:36.675588  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:44:36.675594  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484bc0 0xc000484c00]
E0322 21:44:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:44:43.410632  543705 memory.go:191] Add success.
I0322 21:44:43.409815  543705 cpu.go:282] Add success.
I0322 21:44:43.420369  543705 net.go:648] Add success.
I0322 21:44:43.422979  543705 net.go:770] primary dev: ETH0
I0322 21:44:43.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:44:43.423003  543705 net.go:698] Add success.
I0322 21:44:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:44:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:44:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:44:53.409811  543705 memory.go:184] no items to output this cycle
I0322 21:44:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 21:45:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:45:03.409769  543705 memory.go:184] no items to output this cycle
I0322 21:45:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 21:45:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:45:13.409826  543705 memory.go:191] Add success.
I0322 21:45:13.409828  543705 cpu.go:282] Add success.
W0322 21:45:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:45:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:45:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:45:13.419995  543705 net.go:770] primary dev: ETH0
I0322 21:45:13.420011  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:45:13.420026  543705 net.go:698] Add success.
I0322 21:45:13.420373  543705 net.go:648] Add success.
I0322 21:45:13.581351  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65a8bc67-c510-4be6-a7ab-41035a1fcf7b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:45:13.581387  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:45:14.454696  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:45:14.454935  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:45:14.454946  543705 disk_worker.go:708] disk space is not compliant
W0322 21:45:14.454949  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:45:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 21:45:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:45:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:45:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:45:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:45:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:45:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:45:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:45:23.409816  543705 memory.go:184] no items to output this cycle
I0322 21:45:23.409825  543705 cpu.go:275] no items to output this cycle
E0322 21:45:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:45:33.409772  543705 memory.go:184] no items to output this cycle
I0322 21:45:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 21:45:36.676012  543705 disk_info.go:125] begin check local disk info of client
I0322 21:45:36.678534  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:45:36.678540  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c42c0 0xc0000c4300]
I0322 21:45:39.833724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:45:39.833730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:45:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:45:43.410718  543705 memory.go:191] Add success.
I0322 21:45:43.409807  543705 cpu.go:282] Add success.
I0322 21:45:43.420460  543705 net.go:648] Add success.
I0322 21:45:43.423147  543705 net.go:770] primary dev: ETH0
I0322 21:45:43.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:45:43.423181  543705 net.go:698] Add success.
I0322 21:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:45:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:45:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:45:53.409784  543705 memory.go:184] no items to output this cycle
I0322 21:45:53.409790  543705 cpu.go:275] no items to output this cycle
E0322 21:46:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:46:03.409780  543705 memory.go:184] no items to output this cycle
I0322 21:46:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:46:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:46:13.409816  543705 memory.go:191] Add success.
I0322 21:46:13.409825  543705 cpu.go:282] Add success.
W0322 21:46:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:46:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:46:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:46:13.420532  543705 net.go:648] Add success.
I0322 21:46:13.423169  543705 net.go:770] primary dev: ETH0
I0322 21:46:13.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:46:13.423195  543705 net.go:698] Add success.
I0322 21:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:46:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:46:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 21:46:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:46:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 21:46:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:46:15.456023  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:46:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:46:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:46:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:46:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:46:23.409801  543705 memory.go:184] no items to output this cycle
I0322 21:46:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:46:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:46:33.409795  543705 memory.go:184] no items to output this cycle
I0322 21:46:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 21:46:36.678623  543705 disk_info.go:125] begin check local disk info of client
I0322 21:46:36.681159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:46:36.681165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be680 0xc0003be6c0]
E0322 21:46:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:46:43.410740  543705 memory.go:191] Add success.
I0322 21:46:43.409797  543705 cpu.go:282] Add success.
I0322 21:46:43.420439  543705 net.go:648] Add success.
I0322 21:46:43.423541  543705 net.go:770] primary dev: ETH0
I0322 21:46:43.423554  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:46:43.423566  543705 net.go:698] Add success.
I0322 21:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:46:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:46:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:46:53.410202  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:46:53.410221  543705 memory.go:184] no items to output this cycle
I0322 21:46:53.410245  543705 cpu.go:275] no items to output this cycle
E0322 21:47:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:47:03.409806  543705 memory.go:184] no items to output this cycle
I0322 21:47:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 21:47:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:47:13.409784  543705 memory.go:191] Add success.
W0322 21:47:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:47:13.409815  543705 cpu.go:282] Add success.
W0322 21:47:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:47:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:47:13.420182  543705 net.go:648] Add success.
I0322 21:47:13.422810  543705 net.go:770] primary dev: ETH0
I0322 21:47:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:47:13.422841  543705 net.go:698] Add success.
I0322 21:47:13.453403  543705 event_worker.go:152] Polling the log file for events...
W0322 21:47:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:47:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 21:47:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:47:14.456794  543705 disk_worker.go:494] system disk:vda1
I0322 21:47:14.456836  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:47:14.457099  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:47:14.457107  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:47:14.457112  543705 custom_config.go:64] query custom config with name: gpu
E0322 21:47:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:47:15.456842  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:47:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:47:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:47:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:47:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:47:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:47:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:47:23.409802  543705 memory.go:184] no items to output this cycle
I0322 21:47:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:47:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:47:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 21:47:33.409798  543705 memory.go:184] no items to output this cycle
I0322 21:47:36.681686  543705 disk_info.go:125] begin check local disk info of client
I0322 21:47:36.684175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:47:36.684181  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033c480 0xc00033c680]
E0322 21:47:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:47:43.410783  543705 memory.go:191] Add success.
I0322 21:47:43.409808  543705 cpu.go:282] Add success.
I0322 21:47:43.420475  543705 net.go:648] Add success.
I0322 21:47:43.423114  543705 net.go:770] primary dev: ETH0
I0322 21:47:43.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:47:43.423140  543705 net.go:698] Add success.
I0322 21:47:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:47:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:47:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:47:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:47:53.409797  543705 memory.go:184] no items to output this cycle
I0322 21:47:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 21:48:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:48:03.409791  543705 memory.go:184] no items to output this cycle
I0322 21:48:03.409793  543705 cpu.go:275] no items to output this cycle
W0322 21:48:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:48:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:48:13.409739  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:48:13.409832  543705 cpu.go:282] Add success.
E0322 21:48:13.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:48:13.409864  543705 memory.go:191] Add success.
I0322 21:48:13.420070  543705 net.go:648] Add success.
I0322 21:48:13.422874  543705 net.go:770] primary dev: ETH0
I0322 21:48:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:48:13.422901  543705 net.go:698] Add success.
I0322 21:48:13.502659  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2c16593a-a7b5-4b43-ac2d-601ceaba750a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:48:13.502691  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:48:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:48:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 21:48:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:48:14.456590  543705 disk_worker.go:494] system disk:vda1
I0322 21:48:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:48:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:48:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:48:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:48:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:48:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:48:23.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:48:23.409881  543705 memory.go:184] no items to output this cycle
I0322 21:48:23.409971  543705 cpu.go:275] no items to output this cycle
E0322 21:48:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:48:33.409794  543705 memory.go:184] no items to output this cycle
I0322 21:48:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 21:48:36.684269  543705 disk_info.go:125] begin check local disk info of client
I0322 21:48:36.686866  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:48:36.686873  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
I0322 21:48:39.835871  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:48:39.835878  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:48:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:48:43.410557  543705 memory.go:191] Add success.
I0322 21:48:43.409795  543705 cpu.go:282] Add success.
I0322 21:48:43.420256  543705 net.go:648] Add success.
I0322 21:48:43.422714  543705 net.go:770] primary dev: ETH0
I0322 21:48:43.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:48:43.422740  543705 net.go:698] Add success.
I0322 21:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:48:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:48:53.409776  543705 memory.go:184] no items to output this cycle
I0322 21:48:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 21:49:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:49:03.409795  543705 memory.go:184] no items to output this cycle
I0322 21:49:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 21:49:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:49:13.409781  543705 memory.go:191] Add success.
W0322 21:49:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 21:49:13.409812  543705 cpu.go:282] Add success.
W0322 21:49:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:49:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:49:13.420073  543705 net.go:648] Add success.
I0322 21:49:13.422706  543705 net.go:770] primary dev: ETH0
I0322 21:49:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:49:13.422732  543705 net.go:698] Add success.
I0322 21:49:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:49:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:49:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 21:49:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:49:14.456767  543705 disk_worker.go:494] system disk:vda1
I0322 21:49:14.456799  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:49:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:49:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:49:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:49:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:49:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:49:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:49:23.409795  543705 memory.go:184] no items to output this cycle
I0322 21:49:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 21:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:49:33.409786  543705 memory.go:184] no items to output this cycle
I0322 21:49:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 21:49:36.686958  543705 disk_info.go:125] begin check local disk info of client
I0322 21:49:36.689433  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:49:36.689439  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 21:49:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:49:43.410610  543705 memory.go:191] Add success.
I0322 21:49:43.409797  543705 cpu.go:282] Add success.
I0322 21:49:43.420316  543705 net.go:648] Add success.
I0322 21:49:43.423028  543705 net.go:770] primary dev: ETH0
I0322 21:49:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:49:43.423053  543705 net.go:698] Add success.
I0322 21:49:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:49:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:49:53.409774  543705 memory.go:184] no items to output this cycle
I0322 21:49:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 21:50:03.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:50:03.409760  543705 memory.go:184] no items to output this cycle
I0322 21:50:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 21:50:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:50:13.409820  543705 memory.go:191] Add success.
I0322 21:50:13.409829  543705 cpu.go:282] Add success.
W0322 21:50:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:50:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:50:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:50:13.420138  543705 net.go:648] Add success.
I0322 21:50:13.422604  543705 net.go:770] primary dev: ETH0
I0322 21:50:13.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:50:13.422628  543705 net.go:698] Add success.
I0322 21:50:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:50:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:50:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 21:50:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:50:14.456493  543705 disk_worker.go:494] system disk:vda1
I0322 21:50:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:50:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:50:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:50:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:50:23.409802  543705 memory.go:184] no items to output this cycle
I0322 21:50:23.409819  543705 cpu.go:275] no items to output this cycle
E0322 21:50:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:50:33.409799  543705 memory.go:184] no items to output this cycle
I0322 21:50:33.409833  543705 cpu.go:275] no items to output this cycle
I0322 21:50:36.689676  543705 disk_info.go:125] begin check local disk info of client
I0322 21:50:36.692230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:50:36.692237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 21:50:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:50:43.410656  543705 memory.go:191] Add success.
I0322 21:50:43.409794  543705 cpu.go:282] Add success.
I0322 21:50:43.420348  543705 net.go:648] Add success.
I0322 21:50:43.423160  543705 net.go:770] primary dev: ETH0
I0322 21:50:43.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:50:43.423185  543705 net.go:698] Add success.
I0322 21:50:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:50:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:50:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:50:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:50:53.409780  543705 memory.go:184] no items to output this cycle
I0322 21:50:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 21:51:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:51:03.409774  543705 memory.go:184] no items to output this cycle
I0322 21:51:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 21:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:51:13.409813  543705 memory.go:191] Add success.
I0322 21:51:13.409819  543705 cpu.go:282] Add success.
W0322 21:51:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:51:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:51:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:51:13.420174  543705 net.go:648] Add success.
I0322 21:51:13.423199  543705 net.go:770] primary dev: ETH0
I0322 21:51:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:51:13.423228  543705 net.go:698] Add success.
I0322 21:51:13.464052  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6331aa71-a437-4f67-b5b2-1f137ab1ffd3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:51:13.464100  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:51:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:51:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:51:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 21:51:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:51:14.456536  543705 disk_worker.go:494] system disk:vda1
I0322 21:51:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:51:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:51:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:51:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:51:23.409775  543705 memory.go:184] no items to output this cycle
I0322 21:51:23.409784  543705 cpu.go:275] no items to output this cycle
E0322 21:51:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:51:33.409791  543705 memory.go:184] no items to output this cycle
I0322 21:51:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 21:51:36.692325  543705 disk_info.go:125] begin check local disk info of client
I0322 21:51:36.694870  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:51:36.694877  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0322 21:51:39.837727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:51:39.837733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:51:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:51:43.410747  543705 memory.go:191] Add success.
I0322 21:51:43.409817  543705 cpu.go:282] Add success.
I0322 21:51:43.420562  543705 net.go:648] Add success.
I0322 21:51:43.423225  543705 net.go:770] primary dev: ETH0
I0322 21:51:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:51:43.423254  543705 net.go:698] Add success.
I0322 21:51:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:51:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:51:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:51:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:51:53.409814  543705 memory.go:184] no items to output this cycle
I0322 21:51:53.409822  543705 cpu.go:275] no items to output this cycle
E0322 21:52:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:52:03.409777  543705 cpu.go:275] no items to output this cycle
I0322 21:52:03.409781  543705 memory.go:184] no items to output this cycle
E0322 21:52:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:52:13.409786  543705 memory.go:191] Add success.
I0322 21:52:13.409804  543705 cpu.go:282] Add success.
W0322 21:52:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:52:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:52:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:52:13.420135  543705 net.go:648] Add success.
I0322 21:52:13.423205  543705 net.go:770] primary dev: ETH0
I0322 21:52:13.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:52:13.423232  543705 net.go:698] Add success.
W0322 21:52:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:52:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 21:52:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:52:14.456798  543705 disk_worker.go:494] system disk:vda1
I0322 21:52:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:52:14.457059  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:52:14.457066  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:52:14.457072  543705 custom_config.go:64] query custom config with name: gpu
E0322 21:52:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:52:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:52:16.457888  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:52:16.457888  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:52:16.457944  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:52:16.457963  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:52:16.472277  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:52:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:52:23.409799  543705 memory.go:184] no items to output this cycle
I0322 21:52:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 21:52:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:52:33.409800  543705 memory.go:184] no items to output this cycle
I0322 21:52:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 21:52:36.694967  543705 disk_info.go:125] begin check local disk info of client
I0322 21:52:36.697524  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:52:36.697530  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340000 0xc000340040]
E0322 21:52:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:52:43.410676  543705 memory.go:191] Add success.
I0322 21:52:43.409800  543705 cpu.go:282] Add success.
I0322 21:52:43.420389  543705 net.go:648] Add success.
I0322 21:52:43.423005  543705 net.go:770] primary dev: ETH0
I0322 21:52:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:52:43.423035  543705 net.go:698] Add success.
I0322 21:52:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:52:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:52:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:52:53.409788  543705 memory.go:184] no items to output this cycle
I0322 21:52:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 21:53:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:53:03.409779  543705 memory.go:184] no items to output this cycle
I0322 21:53:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 21:53:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:53:13.409793  543705 memory.go:191] Add success.
I0322 21:53:13.409797  543705 cpu.go:282] Add success.
W0322 21:53:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:53:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:53:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:53:13.420183  543705 net.go:648] Add success.
I0322 21:53:13.422747  543705 net.go:770] primary dev: ETH0
I0322 21:53:13.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:53:13.422776  543705 net.go:698] Add success.
I0322 21:53:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:53:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:53:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0322 21:53:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:53:14.456498  543705 disk_worker.go:494] system disk:vda1
I0322 21:53:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:53:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:53:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:53:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:53:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:53:23.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:53:23.410265  543705 memory.go:184] no items to output this cycle
I0322 21:53:23.410258  543705 cpu.go:275] no items to output this cycle
E0322 21:53:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:53:33.409818  543705 memory.go:184] no items to output this cycle
I0322 21:53:33.409832  543705 cpu.go:275] no items to output this cycle
I0322 21:53:36.697673  543705 disk_info.go:125] begin check local disk info of client
I0322 21:53:36.700212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:53:36.700218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
E0322 21:53:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:53:43.410600  543705 memory.go:191] Add success.
I0322 21:53:43.409791  543705 cpu.go:282] Add success.
I0322 21:53:43.420287  543705 net.go:648] Add success.
I0322 21:53:43.422980  543705 net.go:770] primary dev: ETH0
I0322 21:53:43.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:53:43.423005  543705 net.go:698] Add success.
I0322 21:53:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:53:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:53:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:53:53.410258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:53:53.410277  543705 cpu.go:275] no items to output this cycle
I0322 21:53:53.410287  543705 memory.go:184] no items to output this cycle
E0322 21:54:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:54:03.409795  543705 memory.go:184] no items to output this cycle
I0322 21:54:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 21:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:54:13.409791  543705 memory.go:191] Add success.
I0322 21:54:13.409816  543705 cpu.go:282] Add success.
W0322 21:54:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:54:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:54:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:54:13.420217  543705 net.go:648] Add success.
I0322 21:54:13.423231  543705 net.go:770] primary dev: ETH0
I0322 21:54:13.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:54:13.423260  543705 net.go:698] Add success.
I0322 21:54:13.468392  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"527b3b03-5312-4aea-94b6-ab1da8da2bf3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:54:13.468440  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:54:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:54:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 21:54:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:54:14.456639  543705 disk_worker.go:494] system disk:vda1
I0322 21:54:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:54:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:54:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:54:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:54:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:54:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:54:23.409870  543705 cpu.go:275] no items to output this cycle
I0322 21:54:23.409881  543705 memory.go:184] no items to output this cycle
E0322 21:54:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:54:33.409809  543705 memory.go:184] no items to output this cycle
I0322 21:54:33.409832  543705 cpu.go:275] no items to output this cycle
I0322 21:54:36.701136  543705 disk_info.go:125] begin check local disk info of client
I0322 21:54:36.703737  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:54:36.703743  543705 disk_info.go:196] parse disk info done, disk is : [0xc000383d80 0xc000383dc0]
I0322 21:54:39.839882  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:54:39.839888  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:54:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:54:43.410677  543705 memory.go:191] Add success.
I0322 21:54:43.409800  543705 cpu.go:282] Add success.
I0322 21:54:43.420382  543705 net.go:648] Add success.
I0322 21:54:43.422968  543705 net.go:770] primary dev: ETH0
I0322 21:54:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:54:43.422997  543705 net.go:698] Add success.
I0322 21:54:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:54:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:54:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:54:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:54:53.409795  543705 cpu.go:275] no items to output this cycle
I0322 21:54:53.409806  543705 memory.go:184] no items to output this cycle
E0322 21:55:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:55:03.409778  543705 memory.go:184] no items to output this cycle
I0322 21:55:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 21:55:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:55:13.409795  543705 memory.go:191] Add success.
I0322 21:55:13.409797  543705 cpu.go:282] Add success.
W0322 21:55:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:55:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:55:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:55:13.420243  543705 net.go:648] Add success.
I0322 21:55:13.422934  543705 net.go:770] primary dev: ETH0
I0322 21:55:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:55:13.422959  543705 net.go:698] Add success.
I0322 21:55:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:55:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:55:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 21:55:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:55:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 21:55:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:55:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:55:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:55:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:55:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:55:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:55:23.409768  543705 memory.go:184] no items to output this cycle
I0322 21:55:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 21:55:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:55:33.409797  543705 memory.go:184] no items to output this cycle
I0322 21:55:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 21:55:36.703830  543705 disk_info.go:125] begin check local disk info of client
I0322 21:55:36.706355  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:55:36.706362  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340000 0xc000340040]
E0322 21:55:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:55:43.410706  543705 memory.go:191] Add success.
I0322 21:55:43.409826  543705 cpu.go:282] Add success.
I0322 21:55:43.420441  543705 net.go:648] Add success.
I0322 21:55:43.423112  543705 net.go:770] primary dev: ETH0
I0322 21:55:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:55:43.423138  543705 net.go:698] Add success.
I0322 21:55:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:55:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:55:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:55:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:55:53.409819  543705 memory.go:184] no items to output this cycle
I0322 21:55:53.409829  543705 cpu.go:275] no items to output this cycle
E0322 21:56:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:56:03.409785  543705 memory.go:184] no items to output this cycle
I0322 21:56:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 21:56:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:56:13.409782  543705 memory.go:191] Add success.
I0322 21:56:13.409807  543705 cpu.go:282] Add success.
W0322 21:56:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:56:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:56:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:56:13.420167  543705 net.go:648] Add success.
I0322 21:56:13.422792  543705 net.go:770] primary dev: ETH0
I0322 21:56:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:56:13.422820  543705 net.go:698] Add success.
I0322 21:56:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:56:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:56:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 21:56:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:56:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 21:56:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:56:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:56:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:56:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:56:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:56:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:56:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:56:23.409802  543705 memory.go:184] no items to output this cycle
I0322 21:56:23.409815  543705 cpu.go:275] no items to output this cycle
E0322 21:56:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:56:33.409824  543705 memory.go:184] no items to output this cycle
I0322 21:56:33.409837  543705 cpu.go:275] no items to output this cycle
I0322 21:56:36.707171  543705 disk_info.go:125] begin check local disk info of client
I0322 21:56:36.709777  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:56:36.709785  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580000 0xc000580040]
E0322 21:56:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:56:43.410690  543705 memory.go:191] Add success.
I0322 21:56:43.409823  543705 cpu.go:282] Add success.
I0322 21:56:43.420406  543705 net.go:648] Add success.
I0322 21:56:43.422995  543705 net.go:770] primary dev: ETH0
I0322 21:56:43.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:56:43.423021  543705 net.go:698] Add success.
I0322 21:56:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:56:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:56:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:56:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:56:53.409810  543705 memory.go:184] no items to output this cycle
I0322 21:56:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 21:57:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:57:03.409786  543705 memory.go:184] no items to output this cycle
I0322 21:57:03.409808  543705 cpu.go:275] no items to output this cycle
E0322 21:57:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:57:13.409826  543705 memory.go:191] Add success.
I0322 21:57:13.409836  543705 cpu.go:282] Add success.
W0322 21:57:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:57:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:57:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:57:13.420121  543705 net.go:648] Add success.
I0322 21:57:13.422895  543705 net.go:770] primary dev: ETH0
I0322 21:57:13.422909  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:57:13.422923  543705 net.go:698] Add success.
I0322 21:57:13.428980  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 21:57:13.453162  543705 event_worker.go:152] Polling the log file for events...
I0322 21:57:13.468552  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2d0faca7-f051-473f-bf3b-440e2de61a85","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 21:57:13.468585  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 21:57:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:57:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:57:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 21:57:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0322 21:57:14.455934  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 21:57:14.455942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 21:57:14.456415  543705 disk_worker.go:494] system disk:vda1
I0322 21:57:14.456446  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 21:57:15.457133  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 21:57:15.457142  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:57:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 21:57:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 21:57:16.457963  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:57:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:57:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:57:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:57:23.409789  543705 memory.go:184] no items to output this cycle
I0322 21:57:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 21:57:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:57:33.409769  543705 memory.go:184] no items to output this cycle
I0322 21:57:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 21:57:36.709869  543705 disk_info.go:125] begin check local disk info of client
I0322 21:57:36.712411  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:57:36.712417  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001e67c0 0xc0001e6800]
I0322 21:57:39.841729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 21:57:39.841735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 21:57:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:57:43.410706  543705 memory.go:191] Add success.
I0322 21:57:43.409821  543705 cpu.go:282] Add success.
I0322 21:57:43.420395  543705 net.go:648] Add success.
I0322 21:57:43.423293  543705 net.go:770] primary dev: ETH0
I0322 21:57:43.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:57:43.423318  543705 net.go:698] Add success.
I0322 21:57:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:57:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:57:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:57:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:57:53.409819  543705 memory.go:184] no items to output this cycle
I0322 21:57:53.409836  543705 cpu.go:275] no items to output this cycle
E0322 21:58:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:58:03.409807  543705 memory.go:184] no items to output this cycle
I0322 21:58:03.409818  543705 cpu.go:275] no items to output this cycle
E0322 21:58:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:58:13.409818  543705 memory.go:191] Add success.
I0322 21:58:13.409825  543705 cpu.go:282] Add success.
W0322 21:58:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:58:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:58:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:58:13.420140  543705 net.go:648] Add success.
I0322 21:58:13.423026  543705 net.go:770] primary dev: ETH0
I0322 21:58:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:58:13.423056  543705 net.go:698] Add success.
I0322 21:58:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:58:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:58:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 21:58:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:58:14.456570  543705 disk_worker.go:494] system disk:vda1
I0322 21:58:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:58:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:58:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:58:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:58:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:58:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 21:58:23.410530  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:58:23.410550  543705 memory.go:184] no items to output this cycle
I0322 21:58:23.410627  543705 cpu.go:275] no items to output this cycle
E0322 21:58:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:58:33.409768  543705 memory.go:184] no items to output this cycle
I0322 21:58:33.409856  543705 cpu.go:275] no items to output this cycle
I0322 21:58:36.712512  543705 disk_info.go:125] begin check local disk info of client
I0322 21:58:36.715161  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:58:36.715169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580000 0xc000580040]
E0322 21:58:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:58:43.410607  543705 memory.go:191] Add success.
I0322 21:58:43.409833  543705 cpu.go:282] Add success.
I0322 21:58:43.420320  543705 net.go:648] Add success.
I0322 21:58:43.422950  543705 net.go:770] primary dev: ETH0
I0322 21:58:43.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:58:43.422975  543705 net.go:698] Add success.
I0322 21:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:58:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:58:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:58:53.409800  543705 memory.go:184] no items to output this cycle
I0322 21:58:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 21:59:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:59:03.409788  543705 cpu.go:275] no items to output this cycle
I0322 21:59:03.409790  543705 memory.go:184] no items to output this cycle
E0322 21:59:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:59:13.409794  543705 memory.go:191] Add success.
I0322 21:59:13.409795  543705 cpu.go:282] Add success.
W0322 21:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 21:59:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 21:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 21:59:13.420232  543705 net.go:770] primary dev: ETH0
I0322 21:59:13.420245  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:59:13.420257  543705 net.go:698] Add success.
I0322 21:59:13.420620  543705 net.go:648] Add success.
I0322 21:59:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 21:59:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 21:59:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0322 21:59:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0322 21:59:14.456605  543705 disk_worker.go:494] system disk:vda1
I0322 21:59:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 21:59:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 21:59:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:59:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:59:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 21:59:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0322 21:59:23.409894  543705 cpu.go:275] no items to output this cycle
E0322 21:59:23.409901  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:59:23.409920  543705 memory.go:184] no items to output this cycle
E0322 21:59:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:59:33.409819  543705 memory.go:184] no items to output this cycle
I0322 21:59:33.409834  543705 cpu.go:275] no items to output this cycle
I0322 21:59:36.715253  543705 disk_info.go:125] begin check local disk info of client
I0322 21:59:36.717761  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 21:59:36.717768  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580f40 0xc000580f80]
E0322 21:59:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:59:43.410559  543705 memory.go:191] Add success.
I0322 21:59:43.409801  543705 cpu.go:282] Add success.
I0322 21:59:43.420267  543705 net.go:648] Add success.
I0322 21:59:43.422846  543705 net.go:770] primary dev: ETH0
I0322 21:59:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0322 21:59:43.422873  543705 net.go:698] Add success.
I0322 21:59:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 21:59:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 21:59:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0322 21:59:53.410438  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 21:59:53.410460  543705 memory.go:184] no items to output this cycle
I0322 21:59:53.410471  543705 cpu.go:275] no items to output this cycle
E0322 22:00:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:00:03.409799  543705 memory.go:184] no items to output this cycle
I0322 22:00:03.409811  543705 cpu.go:275] no items to output this cycle
W0322 22:00:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:00:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:00:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:00:13.409795  543705 cpu.go:282] Add success.
E0322 22:00:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:00:13.409823  543705 memory.go:191] Add success.
I0322 22:00:13.420055  543705 net.go:648] Add success.
I0322 22:00:13.422781  543705 net.go:770] primary dev: ETH0
I0322 22:00:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:00:13.422806  543705 net.go:698] Add success.
I0322 22:00:13.468351  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b9606446-0f8e-4bcc-9a8d-4b91251ff2af","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:00:13.468384  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:00:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:00:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:00:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 22:00:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:00:14.456600  543705 disk_worker.go:494] system disk:vda1
I0322 22:00:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:00:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:00:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:00:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:00:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:00:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:00:23.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:00:23.409902  543705 memory.go:184] no items to output this cycle
I0322 22:00:23.409954  543705 cpu.go:275] no items to output this cycle
E0322 22:00:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:00:33.409778  543705 memory.go:184] no items to output this cycle
I0322 22:00:33.409853  543705 cpu.go:275] no items to output this cycle
I0322 22:00:36.719238  543705 disk_info.go:125] begin check local disk info of client
I0322 22:00:36.721819  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:00:36.721826  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b440 0xc00047b480]
I0322 22:00:39.843914  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:00:39.843921  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:00:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:00:43.410684  543705 memory.go:191] Add success.
I0322 22:00:43.409793  543705 cpu.go:282] Add success.
I0322 22:00:43.420566  543705 net.go:648] Add success.
I0322 22:00:43.423257  543705 net.go:770] primary dev: ETH0
I0322 22:00:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:00:43.423282  543705 net.go:698] Add success.
I0322 22:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:00:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:00:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:00:53.409816  543705 memory.go:184] no items to output this cycle
I0322 22:00:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 22:01:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:01:03.409782  543705 memory.go:184] no items to output this cycle
I0322 22:01:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 22:01:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:01:13.409798  543705 memory.go:191] Add success.
I0322 22:01:13.409799  543705 cpu.go:282] Add success.
W0322 22:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:01:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:01:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:01:13.420153  543705 net.go:648] Add success.
I0322 22:01:13.423033  543705 net.go:770] primary dev: ETH0
I0322 22:01:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:01:13.423058  543705 net.go:698] Add success.
I0322 22:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:01:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:01:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 22:01:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:01:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 22:01:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:01:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:01:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:01:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:01:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:01:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:01:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:01:23.409794  543705 memory.go:184] no items to output this cycle
I0322 22:01:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 22:01:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:01:33.409782  543705 memory.go:184] no items to output this cycle
I0322 22:01:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 22:01:36.723255  543705 disk_info.go:125] begin check local disk info of client
I0322 22:01:36.725771  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:01:36.725778  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580000 0xc000580040]
E0322 22:01:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:01:43.410655  543705 memory.go:191] Add success.
I0322 22:01:43.409813  543705 cpu.go:282] Add success.
I0322 22:01:43.420382  543705 net.go:648] Add success.
I0322 22:01:43.423157  543705 net.go:770] primary dev: ETH0
I0322 22:01:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:01:43.423185  543705 net.go:698] Add success.
I0322 22:01:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:01:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:01:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:01:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:01:53.409816  543705 memory.go:184] no items to output this cycle
I0322 22:01:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 22:02:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:02:03.409782  543705 memory.go:184] no items to output this cycle
I0322 22:02:03.409792  543705 cpu.go:275] no items to output this cycle
W0322 22:02:13.409705  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:02:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:02:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:02:13.409793  543705 cpu.go:282] Add success.
E0322 22:02:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:02:13.409820  543705 memory.go:191] Add success.
I0322 22:02:13.420120  543705 net.go:648] Add success.
I0322 22:02:13.422934  543705 net.go:770] primary dev: ETH0
I0322 22:02:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:02:13.422960  543705 net.go:698] Add success.
I0322 22:02:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:02:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:02:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 22:02:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:02:14.456789  543705 disk_worker.go:494] system disk:vda1
I0322 22:02:14.456827  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:02:14.456993  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:02:14.457001  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
E0322 22:02:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:02:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:02:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:02:16.457986  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:02:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:02:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:02:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:02:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:02:23.409758  543705 memory.go:184] no items to output this cycle
I0322 22:02:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 22:02:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:02:33.409795  543705 memory.go:184] no items to output this cycle
I0322 22:02:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 22:02:36.727282  543705 disk_info.go:125] begin check local disk info of client
I0322 22:02:36.729887  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:02:36.729895  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e5340 0xc0003e5380]
E0322 22:02:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:02:43.410623  543705 memory.go:191] Add success.
I0322 22:02:43.409805  543705 cpu.go:282] Add success.
I0322 22:02:43.420315  543705 net.go:648] Add success.
I0322 22:02:43.422947  543705 net.go:770] primary dev: ETH0
I0322 22:02:43.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:02:43.422972  543705 net.go:698] Add success.
I0322 22:02:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:02:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:02:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:02:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:02:53.409795  543705 memory.go:184] no items to output this cycle
I0322 22:02:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 22:03:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:03:03.409781  543705 cpu.go:275] no items to output this cycle
I0322 22:03:03.409791  543705 memory.go:184] no items to output this cycle
E0322 22:03:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:03:13.409791  543705 memory.go:191] Add success.
I0322 22:03:13.409798  543705 cpu.go:282] Add success.
W0322 22:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:03:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:03:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:03:13.420063  543705 net.go:648] Add success.
I0322 22:03:13.422915  543705 net.go:770] primary dev: ETH0
I0322 22:03:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:03:13.422944  543705 net.go:698] Add success.
I0322 22:03:13.468363  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"81e5dd96-0f87-4d9f-bc05-7bb471d4289b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:03:13.468397  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:03:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:03:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:03:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0322 22:03:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:03:14.456554  543705 disk_worker.go:494] system disk:vda1
I0322 22:03:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:03:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:03:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:03:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:03:23.409793  543705 memory.go:184] no items to output this cycle
I0322 22:03:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 22:03:33.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:03:33.409892  543705 memory.go:184] no items to output this cycle
I0322 22:03:33.410004  543705 cpu.go:275] no items to output this cycle
I0322 22:03:36.729979  543705 disk_info.go:125] begin check local disk info of client
I0322 22:03:36.732495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:03:36.732501  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ce40 0xc00035ce80]
I0322 22:03:39.845726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:03:39.845732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:03:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:03:43.410663  543705 memory.go:191] Add success.
I0322 22:03:43.409816  543705 cpu.go:282] Add success.
I0322 22:03:43.420378  543705 net.go:648] Add success.
I0322 22:03:43.423161  543705 net.go:770] primary dev: ETH0
I0322 22:03:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:03:43.423186  543705 net.go:698] Add success.
I0322 22:03:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:03:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:03:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:03:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:03:53.409812  543705 memory.go:184] no items to output this cycle
I0322 22:03:53.409823  543705 cpu.go:275] no items to output this cycle
E0322 22:04:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:04:03.409777  543705 memory.go:184] no items to output this cycle
I0322 22:04:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 22:04:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:04:13.409792  543705 memory.go:191] Add success.
I0322 22:04:13.409792  543705 cpu.go:282] Add success.
W0322 22:04:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:04:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:04:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:04:13.420574  543705 net.go:648] Add success.
I0322 22:04:13.423411  543705 net.go:770] primary dev: ETH0
I0322 22:04:13.423425  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:04:13.423437  543705 net.go:698] Add success.
I0322 22:04:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:04:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:04:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 22:04:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:04:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 22:04:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:04:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:04:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:04:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:04:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:04:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:04:23.409770  543705 memory.go:184] no items to output this cycle
I0322 22:04:23.409802  543705 cpu.go:275] no items to output this cycle
I0322 22:04:33.409878  543705 cpu.go:275] no items to output this cycle
E0322 22:04:33.409937  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:04:33.409956  543705 memory.go:184] no items to output this cycle
I0322 22:04:36.733303  543705 disk_info.go:125] begin check local disk info of client
I0322 22:04:36.735862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:04:36.735869  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c8000 0xc0003c8040]
E0322 22:04:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:04:43.410753  543705 memory.go:191] Add success.
I0322 22:04:43.409794  543705 cpu.go:282] Add success.
I0322 22:04:43.420506  543705 net.go:648] Add success.
I0322 22:04:43.423743  543705 net.go:770] primary dev: ETH0
I0322 22:04:43.423756  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:04:43.423768  543705 net.go:698] Add success.
I0322 22:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:04:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:04:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:04:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:04:53.409796  543705 memory.go:184] no items to output this cycle
I0322 22:04:53.409799  543705 cpu.go:275] no items to output this cycle
E0322 22:05:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:05:03.409778  543705 memory.go:184] no items to output this cycle
I0322 22:05:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:05:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:05:13.409788  543705 memory.go:191] Add success.
I0322 22:05:13.409788  543705 cpu.go:282] Add success.
W0322 22:05:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:05:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:05:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:05:13.420084  543705 net.go:648] Add success.
I0322 22:05:13.423216  543705 net.go:770] primary dev: ETH0
I0322 22:05:13.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:05:13.423241  543705 net.go:698] Add success.
I0322 22:05:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:05:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:05:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 22:05:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:05:14.456575  543705 disk_worker.go:494] system disk:vda1
I0322 22:05:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:05:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:05:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:05:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:05:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:05:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:05:23.410396  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:05:23.410412  543705 memory.go:184] no items to output this cycle
I0322 22:05:23.410415  543705 cpu.go:275] no items to output this cycle
E0322 22:05:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:05:33.409918  543705 memory.go:184] no items to output this cycle
I0322 22:05:33.409945  543705 cpu.go:275] no items to output this cycle
I0322 22:05:36.737311  543705 disk_info.go:125] begin check local disk info of client
I0322 22:05:36.739833  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:05:36.739838  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0322 22:05:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:05:43.410783  543705 memory.go:191] Add success.
I0322 22:05:43.409813  543705 cpu.go:282] Add success.
I0322 22:05:43.420463  543705 net.go:648] Add success.
I0322 22:05:43.423306  543705 net.go:770] primary dev: ETH0
I0322 22:05:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:05:43.423334  543705 net.go:698] Add success.
I0322 22:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:05:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:05:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:05:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:05:53.409779  543705 memory.go:184] no items to output this cycle
I0322 22:05:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 22:06:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:06:03.409777  543705 memory.go:184] no items to output this cycle
I0322 22:06:03.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:06:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:06:13.409791  543705 memory.go:191] Add success.
I0322 22:06:13.409793  543705 cpu.go:282] Add success.
W0322 22:06:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:06:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:06:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:06:13.420199  543705 net.go:648] Add success.
I0322 22:06:13.422740  543705 net.go:770] primary dev: ETH0
I0322 22:06:13.422753  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:06:13.422765  543705 net.go:698] Add success.
I0322 22:06:13.470839  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe92ed6d-5ed4-40f9-9985-9fa170d5bfe7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:06:13.470872  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:06:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:06:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:06:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 22:06:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:06:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 22:06:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:06:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:06:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:06:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:06:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:06:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:06:23.409798  543705 memory.go:184] no items to output this cycle
I0322 22:06:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 22:06:33.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:06:33.409884  543705 memory.go:184] no items to output this cycle
I0322 22:06:33.409973  543705 cpu.go:275] no items to output this cycle
I0322 22:06:36.739928  543705 disk_info.go:125] begin check local disk info of client
I0322 22:06:36.742473  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:06:36.742480  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049a4c0 0xc00049a500]
I0322 22:06:39.847931  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:06:39.847937  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:06:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:06:43.410780  543705 memory.go:191] Add success.
I0322 22:06:43.409798  543705 cpu.go:282] Add success.
I0322 22:06:43.420487  543705 net.go:648] Add success.
I0322 22:06:43.423288  543705 net.go:770] primary dev: ETH0
I0322 22:06:43.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:06:43.423313  543705 net.go:698] Add success.
I0322 22:06:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:06:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:06:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:06:53.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:06:53.409850  543705 memory.go:184] no items to output this cycle
I0322 22:06:53.409869  543705 cpu.go:275] no items to output this cycle
E0322 22:07:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:07:03.409783  543705 memory.go:184] no items to output this cycle
I0322 22:07:03.409802  543705 cpu.go:275] no items to output this cycle
E0322 22:07:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:07:13.409790  543705 memory.go:191] Add success.
I0322 22:07:13.409807  543705 cpu.go:282] Add success.
W0322 22:07:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:07:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:07:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:07:13.420227  543705 net.go:648] Add success.
I0322 22:07:13.422994  543705 net.go:770] primary dev: ETH0
I0322 22:07:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:07:13.423019  543705 net.go:698] Add success.
I0322 22:07:13.453578  543705 event_worker.go:152] Polling the log file for events...
W0322 22:07:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:07:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 22:07:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:07:14.455878  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:07:14.455886  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:07:14.455892  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:07:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 22:07:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:07:15.456861  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:07:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:07:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:07:16.457974  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:07:16.458018  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:07:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:07:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:07:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:07:23.409782  543705 memory.go:184] no items to output this cycle
I0322 22:07:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 22:07:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:07:33.409808  543705 memory.go:184] no items to output this cycle
I0322 22:07:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 22:07:36.742566  543705 disk_info.go:125] begin check local disk info of client
I0322 22:07:36.745111  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:07:36.745118  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049a000 0xc00049a040]
E0322 22:07:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:07:43.410779  543705 memory.go:191] Add success.
I0322 22:07:43.409798  543705 cpu.go:282] Add success.
I0322 22:07:43.420468  543705 net.go:648] Add success.
I0322 22:07:43.423733  543705 net.go:770] primary dev: ETH0
I0322 22:07:43.423746  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:07:43.423759  543705 net.go:698] Add success.
I0322 22:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:07:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:07:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:07:53.409818  543705 memory.go:184] no items to output this cycle
I0322 22:07:53.409826  543705 cpu.go:275] no items to output this cycle
E0322 22:08:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:08:03.409804  543705 memory.go:184] no items to output this cycle
I0322 22:08:03.409833  543705 cpu.go:275] no items to output this cycle
E0322 22:08:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:08:13.409790  543705 cpu.go:282] Add success.
I0322 22:08:13.409795  543705 memory.go:191] Add success.
W0322 22:08:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:08:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:08:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:08:13.420063  543705 net.go:648] Add success.
I0322 22:08:13.423357  543705 net.go:770] primary dev: ETH0
I0322 22:08:13.423370  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:08:13.423383  543705 net.go:698] Add success.
I0322 22:08:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:08:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:08:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 22:08:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:08:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 22:08:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:08:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:08:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:08:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:08:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:08:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:08:23.409780  543705 cpu.go:275] no items to output this cycle
I0322 22:08:23.409782  543705 memory.go:184] no items to output this cycle
E0322 22:08:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:08:33.409796  543705 memory.go:184] no items to output this cycle
I0322 22:08:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 22:08:36.745677  543705 disk_info.go:125] begin check local disk info of client
I0322 22:08:36.748332  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:08:36.748339  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387880 0xc0003878c0]
E0322 22:08:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:08:43.410661  543705 memory.go:191] Add success.
I0322 22:08:43.409795  543705 cpu.go:282] Add success.
I0322 22:08:43.420369  543705 net.go:648] Add success.
I0322 22:08:43.422863  543705 net.go:770] primary dev: ETH0
I0322 22:08:43.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:08:43.422900  543705 net.go:698] Add success.
I0322 22:08:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:08:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:08:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:08:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:08:53.409818  543705 memory.go:184] no items to output this cycle
I0322 22:08:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 22:09:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:09:03.409770  543705 memory.go:184] no items to output this cycle
I0322 22:09:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:09:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:09:13.409788  543705 memory.go:191] Add success.
I0322 22:09:13.409788  543705 cpu.go:282] Add success.
W0322 22:09:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:09:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:09:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:09:13.419880  543705 net.go:770] primary dev: ETH0
I0322 22:09:13.419893  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:09:13.419906  543705 net.go:698] Add success.
I0322 22:09:13.420267  543705 net.go:648] Add success.
I0322 22:09:13.463263  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8a30ec55-c3ae-4586-bcaf-3f184b327a6d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:09:13.463298  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:09:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:09:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:09:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0322 22:09:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:09:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 22:09:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:09:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:09:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:09:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:09:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:09:23.409794  543705 memory.go:184] no items to output this cycle
I0322 22:09:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:09:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:09:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 22:09:33.409793  543705 memory.go:184] no items to output this cycle
I0322 22:09:36.748424  543705 disk_info.go:125] begin check local disk info of client
I0322 22:09:36.750954  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:09:36.750960  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029e680 0xc00029e6c0]
I0322 22:09:39.849731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:09:39.849737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:09:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:09:43.410690  543705 memory.go:191] Add success.
I0322 22:09:43.409797  543705 cpu.go:282] Add success.
I0322 22:09:43.420406  543705 net.go:648] Add success.
I0322 22:09:43.423103  543705 net.go:770] primary dev: ETH0
I0322 22:09:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:09:43.423133  543705 net.go:698] Add success.
I0322 22:09:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:09:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:09:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:09:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 22:09:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:09:53.409825  543705 memory.go:184] no items to output this cycle
E0322 22:10:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:10:03.409788  543705 cpu.go:275] no items to output this cycle
I0322 22:10:03.409790  543705 memory.go:184] no items to output this cycle
E0322 22:10:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:10:13.409813  543705 memory.go:191] Add success.
I0322 22:10:13.409824  543705 cpu.go:282] Add success.
W0322 22:10:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:10:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:10:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:10:13.420132  543705 net.go:648] Add success.
I0322 22:10:13.422788  543705 net.go:770] primary dev: ETH0
I0322 22:10:13.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:10:13.422822  543705 net.go:698] Add success.
I0322 22:10:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:10:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:10:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0322 22:10:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:10:14.456504  543705 disk_worker.go:494] system disk:vda1
I0322 22:10:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:10:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:10:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:10:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:10:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:10:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:10:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:10:23.409792  543705 memory.go:184] no items to output this cycle
I0322 22:10:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:10:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:10:33.409764  543705 memory.go:184] no items to output this cycle
I0322 22:10:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 22:10:36.751049  543705 disk_info.go:125] begin check local disk info of client
I0322 22:10:36.753725  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:10:36.753732  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8f40 0xc0004d8f80]
E0322 22:10:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:10:43.410869  543705 memory.go:191] Add success.
I0322 22:10:43.409823  543705 cpu.go:282] Add success.
I0322 22:10:43.420652  543705 net.go:648] Add success.
I0322 22:10:43.423680  543705 net.go:770] primary dev: ETH0
I0322 22:10:43.423693  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:10:43.423706  543705 net.go:698] Add success.
I0322 22:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:10:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:10:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:10:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:10:53.409783  543705 memory.go:184] no items to output this cycle
I0322 22:10:53.409891  543705 cpu.go:275] no items to output this cycle
E0322 22:11:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:11:03.409786  543705 memory.go:184] no items to output this cycle
I0322 22:11:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 22:11:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:11:13.409786  543705 memory.go:191] Add success.
I0322 22:11:13.409803  543705 cpu.go:282] Add success.
W0322 22:11:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:11:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:11:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:11:13.420168  543705 net.go:648] Add success.
I0322 22:11:13.423209  543705 net.go:770] primary dev: ETH0
I0322 22:11:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:11:13.423238  543705 net.go:698] Add success.
I0322 22:11:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:11:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:11:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 22:11:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:11:14.457033  543705 disk_worker.go:494] system disk:vda1
I0322 22:11:14.457062  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:11:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:11:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:11:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:11:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:11:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:11:23.409769  543705 memory.go:184] no items to output this cycle
I0322 22:11:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 22:11:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:11:33.409780  543705 memory.go:184] no items to output this cycle
I0322 22:11:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 22:11:36.753816  543705 disk_info.go:125] begin check local disk info of client
I0322 22:11:36.756378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:11:36.756384  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c3500 0xc0004c3540]
E0322 22:11:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:11:43.410656  543705 memory.go:191] Add success.
I0322 22:11:43.409828  543705 cpu.go:282] Add success.
I0322 22:11:43.420402  543705 net.go:648] Add success.
I0322 22:11:43.423036  543705 net.go:770] primary dev: ETH0
I0322 22:11:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:11:43.423062  543705 net.go:698] Add success.
I0322 22:11:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:11:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:11:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:11:53.410406  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:11:53.410426  543705 memory.go:184] no items to output this cycle
I0322 22:11:53.410471  543705 cpu.go:275] no items to output this cycle
E0322 22:12:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:12:03.409775  543705 memory.go:184] no items to output this cycle
I0322 22:12:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 22:12:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:12:13.409811  543705 memory.go:191] Add success.
I0322 22:12:13.409817  543705 cpu.go:282] Add success.
W0322 22:12:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:12:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:12:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:12:13.420143  543705 net.go:648] Add success.
I0322 22:12:13.423087  543705 net.go:770] primary dev: ETH0
I0322 22:12:13.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:12:13.423112  543705 net.go:698] Add success.
I0322 22:12:13.468938  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cb0c5fd4-fa9d-490d-b43e-21f959e19895","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:12:13.468971  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 22:12:14.455389  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:12:14.455483  543705 disk_worker.go:708] disk space is not compliant
W0322 22:12:14.455487  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:12:14.457500  543705 disk_worker.go:494] system disk:vda1
I0322 22:12:14.457550  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:12:14.457772  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:12:14.457779  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:12:14.457784  543705 custom_config.go:64] query custom config with name: gpu
E0322 22:12:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:12:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:12:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:12:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:12:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:12:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:12:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:12:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:12:23.409799  543705 memory.go:184] no items to output this cycle
I0322 22:12:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 22:12:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:12:33.409768  543705 memory.go:184] no items to output this cycle
I0322 22:12:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 22:12:36.756472  543705 disk_info.go:125] begin check local disk info of client
I0322 22:12:36.759090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:12:36.759097  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d3c0 0xc00035d400]
I0322 22:12:39.851950  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:12:39.851956  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:12:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:12:43.410804  543705 memory.go:191] Add success.
I0322 22:12:43.409824  543705 cpu.go:282] Add success.
I0322 22:12:43.420569  543705 net.go:648] Add success.
I0322 22:12:43.423400  543705 net.go:770] primary dev: ETH0
I0322 22:12:43.423415  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:12:43.423430  543705 net.go:698] Add success.
I0322 22:12:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:12:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:12:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:12:53.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:12:53.410410  543705 memory.go:184] no items to output this cycle
I0322 22:12:53.410479  543705 cpu.go:275] no items to output this cycle
E0322 22:13:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:13:03.409788  543705 memory.go:184] no items to output this cycle
I0322 22:13:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 22:13:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:13:13.409793  543705 memory.go:191] Add success.
I0322 22:13:13.409794  543705 cpu.go:282] Add success.
W0322 22:13:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:13:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:13:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:13:13.420098  543705 net.go:648] Add success.
I0322 22:13:13.422559  543705 net.go:770] primary dev: ETH0
I0322 22:13:13.422571  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:13:13.422585  543705 net.go:698] Add success.
I0322 22:13:14.453951  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:13:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:13:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 22:13:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:13:14.456543  543705 disk_worker.go:494] system disk:vda1
I0322 22:13:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:13:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:13:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:13:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:13:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:13:23.409793  543705 memory.go:184] no items to output this cycle
I0322 22:13:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 22:13:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:13:33.409798  543705 memory.go:184] no items to output this cycle
I0322 22:13:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 22:13:36.760450  543705 disk_info.go:125] begin check local disk info of client
I0322 22:13:36.763002  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:13:36.763008  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af40 0xc00007af80]
E0322 22:13:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:13:43.410621  543705 memory.go:191] Add success.
I0322 22:13:43.409819  543705 cpu.go:282] Add success.
I0322 22:13:43.420332  543705 net.go:648] Add success.
I0322 22:13:43.422834  543705 net.go:770] primary dev: ETH0
I0322 22:13:43.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:13:43.422860  543705 net.go:698] Add success.
I0322 22:13:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:13:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:13:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:13:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:13:53.409773  543705 memory.go:184] no items to output this cycle
I0322 22:13:53.409838  543705 cpu.go:275] no items to output this cycle
E0322 22:14:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:14:03.409776  543705 memory.go:184] no items to output this cycle
I0322 22:14:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 22:14:13.410398  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:14:13.410423  543705 memory.go:191] Add success.
I0322 22:14:13.410436  543705 cpu.go:282] Add success.
W0322 22:14:13.410466  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:14:13.410478  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:14:13.410481  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:14:13.419986  543705 net.go:648] Add success.
I0322 22:14:13.420957  543705 net.go:770] primary dev: ETH0
I0322 22:14:13.420971  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:14:13.420982  543705 net.go:698] Add success.
I0322 22:14:14.454944  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:14:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:14:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 22:14:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:14:14.456487  543705 disk_worker.go:494] system disk:vda1
I0322 22:14:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:14:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:14:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:14:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:14:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:14:23.409776  543705 memory.go:184] no items to output this cycle
I0322 22:14:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 22:14:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:14:33.409783  543705 memory.go:184] no items to output this cycle
I0322 22:14:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 22:14:36.764467  543705 disk_info.go:125] begin check local disk info of client
I0322 22:14:36.767128  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:14:36.767134  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359d80 0xc000359dc0]
E0322 22:14:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:14:43.410526  543705 memory.go:191] Add success.
I0322 22:14:43.409816  543705 cpu.go:282] Add success.
I0322 22:14:43.420216  543705 net.go:648] Add success.
I0322 22:14:43.422655  543705 net.go:770] primary dev: ETH0
I0322 22:14:43.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:14:43.422681  543705 net.go:698] Add success.
I0322 22:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:14:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:14:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:14:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:14:53.409770  543705 memory.go:184] no items to output this cycle
I0322 22:14:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 22:15:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:15:03.409802  543705 memory.go:184] no items to output this cycle
I0322 22:15:03.409814  543705 cpu.go:275] no items to output this cycle
E0322 22:15:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:15:13.409776  543705 memory.go:191] Add success.
I0322 22:15:13.409797  543705 cpu.go:282] Add success.
W0322 22:15:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:15:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:15:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:15:13.420717  543705 net.go:648] Add success.
I0322 22:15:13.423537  543705 net.go:770] primary dev: ETH0
I0322 22:15:13.423550  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:15:13.423562  543705 net.go:698] Add success.
I0322 22:15:13.468266  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"573bec41-866b-40f9-9834-85ad4c64266b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:15:13.468297  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:15:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:15:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 22:15:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:15:14.456602  543705 disk_worker.go:494] system disk:vda1
I0322 22:15:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:15:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:15:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:15:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:15:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:15:23.409778  543705 memory.go:184] no items to output this cycle
I0322 22:15:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 22:15:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:15:33.409813  543705 memory.go:184] no items to output this cycle
I0322 22:15:33.409827  543705 cpu.go:275] no items to output this cycle
I0322 22:15:36.767218  543705 disk_info.go:125] begin check local disk info of client
I0322 22:15:36.769713  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:15:36.769719  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa940 0xc0001aa980]
I0322 22:15:39.853737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:15:39.853744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:15:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:15:43.409783  543705 memory.go:191] Add success.
I0322 22:15:43.409845  543705 cpu.go:282] Add success.
I0322 22:15:43.420165  543705 net.go:648] Add success.
I0322 22:15:43.421200  543705 net.go:770] primary dev: ETH0
I0322 22:15:43.421219  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:15:43.421238  543705 net.go:698] Add success.
I0322 22:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:15:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:15:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:15:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:15:53.409812  543705 memory.go:184] no items to output this cycle
I0322 22:15:53.409824  543705 cpu.go:275] no items to output this cycle
E0322 22:16:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:16:03.409790  543705 memory.go:184] no items to output this cycle
I0322 22:16:03.409811  543705 cpu.go:275] no items to output this cycle
E0322 22:16:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:16:13.409833  543705 memory.go:191] Add success.
I0322 22:16:13.409838  543705 cpu.go:282] Add success.
W0322 22:16:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:16:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:16:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:16:13.420395  543705 net.go:648] Add success.
I0322 22:16:13.423260  543705 net.go:770] primary dev: ETH0
I0322 22:16:13.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:16:13.423288  543705 net.go:698] Add success.
I0322 22:16:14.454944  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:16:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:16:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 22:16:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:16:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 22:16:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:16:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:16:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:16:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:16:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:16:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:16:23.409783  543705 memory.go:184] no items to output this cycle
I0322 22:16:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 22:16:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:16:33.409794  543705 memory.go:184] no items to output this cycle
I0322 22:16:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 22:16:36.771505  543705 disk_info.go:125] begin check local disk info of client
I0322 22:16:36.774085  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:16:36.774091  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0322 22:16:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:16:43.410696  543705 memory.go:191] Add success.
I0322 22:16:43.409827  543705 cpu.go:282] Add success.
I0322 22:16:43.420421  543705 net.go:648] Add success.
I0322 22:16:43.423088  543705 net.go:770] primary dev: ETH0
I0322 22:16:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:16:43.423115  543705 net.go:698] Add success.
I0322 22:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:16:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:16:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:16:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:16:53.409804  543705 memory.go:184] no items to output this cycle
I0322 22:16:53.409814  543705 cpu.go:275] no items to output this cycle
E0322 22:17:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:17:03.409787  543705 memory.go:184] no items to output this cycle
I0322 22:17:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 22:17:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:17:13.409788  543705 memory.go:191] Add success.
I0322 22:17:13.409790  543705 cpu.go:282] Add success.
W0322 22:17:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:17:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:17:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:17:13.420244  543705 net.go:648] Add success.
I0322 22:17:13.422895  543705 net.go:770] primary dev: ETH0
I0322 22:17:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:17:13.422918  543705 net.go:698] Add success.
I0322 22:17:13.452780  543705 event_worker.go:152] Polling the log file for events...
W0322 22:17:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:17:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 22:17:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:17:14.456779  543705 disk_worker.go:494] system disk:vda1
I0322 22:17:14.456816  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:17:14.456988  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:17:14.456996  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:17:14.457002  543705 custom_config.go:64] query custom config with name: gpu
E0322 22:17:15.456785  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:17:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:17:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:17:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:17:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:17:16.458034  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:17:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:17:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:17:23.409793  543705 memory.go:184] no items to output this cycle
I0322 22:17:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:17:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:17:33.409773  543705 memory.go:184] no items to output this cycle
I0322 22:17:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 22:17:36.774173  543705 disk_info.go:125] begin check local disk info of client
I0322 22:17:36.776720  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:17:36.776726  543705 disk_info.go:196] parse disk info done, disk is : [0xc000580c80 0xc000580cc0]
E0322 22:17:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:17:43.410602  543705 memory.go:191] Add success.
I0322 22:17:43.409805  543705 cpu.go:282] Add success.
I0322 22:17:43.420358  543705 net.go:648] Add success.
I0322 22:17:43.422916  543705 net.go:770] primary dev: ETH0
I0322 22:17:43.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:17:43.422945  543705 net.go:698] Add success.
I0322 22:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:17:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:17:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:17:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:17:53.410262  543705 memory.go:184] no items to output this cycle
I0322 22:17:53.410263  543705 cpu.go:275] no items to output this cycle
E0322 22:18:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:18:03.409802  543705 memory.go:184] no items to output this cycle
I0322 22:18:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 22:18:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:18:13.409776  543705 memory.go:191] Add success.
I0322 22:18:13.409801  543705 cpu.go:282] Add success.
W0322 22:18:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:18:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:18:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:18:13.420375  543705 net.go:648] Add success.
I0322 22:18:13.423164  543705 net.go:770] primary dev: ETH0
I0322 22:18:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:18:13.423188  543705 net.go:698] Add success.
I0322 22:18:13.537604  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c6cfa5f-99c4-465f-8866-ed17d4da3977","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:18:13.537634  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:18:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:18:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:18:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0322 22:18:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:18:14.456721  543705 disk_worker.go:494] system disk:vda1
I0322 22:18:14.456753  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:18:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:18:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:18:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:18:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:18:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:18:23.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:18:23.410390  543705 memory.go:184] no items to output this cycle
I0322 22:18:23.410411  543705 cpu.go:275] no items to output this cycle
E0322 22:18:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:18:33.409767  543705 memory.go:184] no items to output this cycle
I0322 22:18:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 22:18:36.777678  543705 disk_info.go:125] begin check local disk info of client
I0322 22:18:36.780223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:18:36.780230  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474500 0xc000474540]
I0322 22:18:39.855979  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:18:39.855986  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:18:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:18:43.410659  543705 memory.go:191] Add success.
I0322 22:18:43.409835  543705 cpu.go:282] Add success.
I0322 22:18:43.420371  543705 net.go:648] Add success.
I0322 22:18:43.422921  543705 net.go:770] primary dev: ETH0
I0322 22:18:43.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:18:43.422949  543705 net.go:698] Add success.
I0322 22:18:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:18:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:18:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:18:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:18:53.409775  543705 memory.go:184] no items to output this cycle
I0322 22:18:53.409779  543705 cpu.go:275] no items to output this cycle
E0322 22:19:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:19:03.409804  543705 memory.go:184] no items to output this cycle
I0322 22:19:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 22:19:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:19:13.409791  543705 cpu.go:282] Add success.
I0322 22:19:13.409792  543705 memory.go:191] Add success.
W0322 22:19:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:19:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:19:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:19:13.419711  543705 net.go:648] Add success.
I0322 22:19:13.422448  543705 net.go:770] primary dev: ETH0
I0322 22:19:13.422461  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:19:13.422472  543705 net.go:698] Add success.
I0322 22:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:19:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:19:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 22:19:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:19:14.456571  543705 disk_worker.go:494] system disk:vda1
I0322 22:19:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:19:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:19:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:19:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:19:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:19:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:19:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:19:23.409799  543705 memory.go:184] no items to output this cycle
I0322 22:19:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 22:19:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:19:33.409804  543705 memory.go:184] no items to output this cycle
I0322 22:19:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 22:19:36.781543  543705 disk_info.go:125] begin check local disk info of client
I0322 22:19:36.784065  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:19:36.784071  543705 disk_info.go:196] parse disk info done, disk is : [0xc000581f00 0xc000581f40]
E0322 22:19:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:19:43.410736  543705 memory.go:191] Add success.
I0322 22:19:43.409802  543705 cpu.go:282] Add success.
I0322 22:19:43.420472  543705 net.go:648] Add success.
I0322 22:19:43.423230  543705 net.go:770] primary dev: ETH0
I0322 22:19:43.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:19:43.423268  543705 net.go:698] Add success.
I0322 22:19:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:19:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:19:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:19:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:19:53.409774  543705 memory.go:184] no items to output this cycle
I0322 22:19:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 22:20:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:20:03.409774  543705 memory.go:184] no items to output this cycle
I0322 22:20:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 22:20:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:20:13.409804  543705 memory.go:191] Add success.
I0322 22:20:13.409814  543705 cpu.go:282] Add success.
W0322 22:20:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:20:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:20:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:20:13.420290  543705 net.go:648] Add success.
I0322 22:20:13.423011  543705 net.go:770] primary dev: ETH0
I0322 22:20:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:20:13.423035  543705 net.go:698] Add success.
I0322 22:20:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:20:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:20:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 22:20:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:20:14.456485  543705 disk_worker.go:494] system disk:vda1
I0322 22:20:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:20:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:20:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:20:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:20:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:20:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:20:23.409764  543705 memory.go:184] no items to output this cycle
I0322 22:20:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 22:20:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:20:33.409775  543705 memory.go:184] no items to output this cycle
I0322 22:20:33.409779  543705 cpu.go:275] no items to output this cycle
I0322 22:20:36.784157  543705 disk_info.go:125] begin check local disk info of client
I0322 22:20:36.786803  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:20:36.786811  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c1700 0xc0004c1740]
E0322 22:20:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:20:43.410791  543705 memory.go:191] Add success.
I0322 22:20:43.409819  543705 cpu.go:282] Add success.
I0322 22:20:43.420504  543705 net.go:648] Add success.
I0322 22:20:43.423178  543705 net.go:770] primary dev: ETH0
I0322 22:20:43.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:20:43.423208  543705 net.go:698] Add success.
I0322 22:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:20:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:20:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:20:53.409779  543705 memory.go:184] no items to output this cycle
I0322 22:20:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:21:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:21:03.409807  543705 memory.go:184] no items to output this cycle
I0322 22:21:03.409815  543705 cpu.go:275] no items to output this cycle
W0322 22:21:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:21:13.409725  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:21:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:21:13.409797  543705 cpu.go:282] Add success.
E0322 22:21:13.409918  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:21:13.409938  543705 memory.go:191] Add success.
I0322 22:21:13.419737  543705 net.go:648] Add success.
I0322 22:21:13.422513  543705 net.go:770] primary dev: ETH0
I0322 22:21:13.422529  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:21:13.422541  543705 net.go:698] Add success.
I0322 22:21:13.467779  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"33730c83-1c11-40d4-83a0-e1705e9658d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:21:13.467810  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:21:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:21:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:21:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0322 22:21:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:21:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 22:21:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:21:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:21:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:21:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:21:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:21:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:21:23.409781  543705 memory.go:184] no items to output this cycle
I0322 22:21:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 22:21:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:21:33.409777  543705 memory.go:184] no items to output this cycle
I0322 22:21:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 22:21:36.786893  543705 disk_info.go:125] begin check local disk info of client
I0322 22:21:36.789462  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:21:36.789468  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035df00 0xc00035df40]
I0322 22:21:39.857728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:21:39.857736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:21:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:21:43.410689  543705 memory.go:191] Add success.
I0322 22:21:43.409803  543705 cpu.go:282] Add success.
I0322 22:21:43.420457  543705 net.go:648] Add success.
I0322 22:21:43.423358  543705 net.go:770] primary dev: ETH0
I0322 22:21:43.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:21:43.423383  543705 net.go:698] Add success.
I0322 22:21:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:21:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:21:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:21:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:21:53.409770  543705 memory.go:184] no items to output this cycle
I0322 22:21:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 22:22:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:22:03.409806  543705 memory.go:184] no items to output this cycle
I0322 22:22:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 22:22:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:22:13.409793  543705 memory.go:191] Add success.
I0322 22:22:13.409795  543705 cpu.go:282] Add success.
W0322 22:22:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:22:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:22:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:22:13.420150  543705 net.go:648] Add success.
I0322 22:22:13.422937  543705 net.go:770] primary dev: ETH0
I0322 22:22:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:22:13.422962  543705 net.go:698] Add success.
I0322 22:22:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:22:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:22:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 22:22:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:22:14.456933  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:22:14.456942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:22:14.456998  543705 disk_worker.go:494] system disk:vda1
I0322 22:22:14.457029  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:22:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:22:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:22:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:22:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:22:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:22:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:22:16.472352  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:22:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:22:23.409763  543705 memory.go:184] no items to output this cycle
I0322 22:22:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 22:22:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:22:33.409772  543705 memory.go:184] no items to output this cycle
I0322 22:22:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 22:22:36.789677  543705 disk_info.go:125] begin check local disk info of client
I0322 22:22:36.792066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:22:36.792073  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e2d00 0xc0004e2d40]
E0322 22:22:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:22:43.410655  543705 memory.go:191] Add success.
I0322 22:22:43.409802  543705 cpu.go:282] Add success.
I0322 22:22:43.420350  543705 net.go:648] Add success.
I0322 22:22:43.423119  543705 net.go:770] primary dev: ETH0
I0322 22:22:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:22:43.423146  543705 net.go:698] Add success.
I0322 22:22:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:22:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:22:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:22:53.410710  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:22:53.410727  543705 memory.go:184] no items to output this cycle
I0322 22:22:53.410739  543705 cpu.go:275] no items to output this cycle
E0322 22:23:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:23:03.409793  543705 memory.go:184] no items to output this cycle
I0322 22:23:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 22:23:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:23:13.409799  543705 memory.go:191] Add success.
I0322 22:23:13.409801  543705 cpu.go:282] Add success.
W0322 22:23:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:23:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:23:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:23:13.420264  543705 net.go:648] Add success.
I0322 22:23:13.423081  543705 net.go:770] primary dev: ETH0
I0322 22:23:13.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:23:13.423106  543705 net.go:698] Add success.
I0322 22:23:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:23:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:23:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0322 22:23:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:23:14.456522  543705 disk_worker.go:494] system disk:vda1
I0322 22:23:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:23:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:23:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:23:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:23:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:23:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:23:23.409764  543705 memory.go:184] no items to output this cycle
I0322 22:23:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 22:23:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:23:33.409770  543705 memory.go:184] no items to output this cycle
I0322 22:23:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 22:23:36.792159  543705 disk_info.go:125] begin check local disk info of client
I0322 22:23:36.794696  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:23:36.794703  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5140 0xc0000c5180]
E0322 22:23:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:23:43.410642  543705 memory.go:191] Add success.
I0322 22:23:43.409799  543705 cpu.go:282] Add success.
I0322 22:23:43.420336  543705 net.go:648] Add success.
I0322 22:23:43.422865  543705 net.go:770] primary dev: ETH0
I0322 22:23:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:23:43.422891  543705 net.go:698] Add success.
I0322 22:23:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:23:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:23:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:23:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:23:53.409768  543705 memory.go:184] no items to output this cycle
I0322 22:23:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:24:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:24:03.409791  543705 memory.go:184] no items to output this cycle
I0322 22:24:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 22:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:24:13.409796  543705 cpu.go:282] Add success.
I0322 22:24:13.409804  543705 memory.go:191] Add success.
W0322 22:24:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:24:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:24:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:24:13.420609  543705 net.go:648] Add success.
I0322 22:24:13.423387  543705 net.go:770] primary dev: ETH0
I0322 22:24:13.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:24:13.423411  543705 net.go:698] Add success.
I0322 22:24:13.463426  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"836bdd9c-ae4a-40c2-b144-335df6ddd599","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:24:13.463459  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:24:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:24:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:24:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0322 22:24:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:24:14.456599  543705 disk_worker.go:494] system disk:vda1
I0322 22:24:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:24:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:24:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:24:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:24:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:24:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:24:23.409765  543705 memory.go:184] no items to output this cycle
I0322 22:24:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 22:24:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:24:33.409769  543705 memory.go:184] no items to output this cycle
I0322 22:24:33.409792  543705 cpu.go:275] no items to output this cycle
I0322 22:24:36.794788  543705 disk_info.go:125] begin check local disk info of client
I0322 22:24:36.797333  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:24:36.797344  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e3f40 0xc000298000]
I0322 22:24:39.859979  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:24:39.859985  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:24:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:24:43.410651  543705 memory.go:191] Add success.
I0322 22:24:43.409806  543705 cpu.go:282] Add success.
I0322 22:24:43.420347  543705 net.go:648] Add success.
I0322 22:24:43.422941  543705 net.go:770] primary dev: ETH0
I0322 22:24:43.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:24:43.422975  543705 net.go:698] Add success.
I0322 22:24:46.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:24:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:24:46.458110  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:24:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:24:53.409771  543705 memory.go:184] no items to output this cycle
I0322 22:24:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:25:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:25:03.409820  543705 memory.go:184] no items to output this cycle
I0322 22:25:03.409833  543705 cpu.go:275] no items to output this cycle
E0322 22:25:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:25:13.409786  543705 memory.go:191] Add success.
I0322 22:25:13.409809  543705 cpu.go:282] Add success.
W0322 22:25:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:25:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:25:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:25:13.420207  543705 net.go:648] Add success.
I0322 22:25:13.422972  543705 net.go:770] primary dev: ETH0
I0322 22:25:13.422985  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:25:13.422998  543705 net.go:698] Add success.
I0322 22:25:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:25:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:25:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 22:25:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:25:14.456553  543705 disk_worker.go:494] system disk:vda1
I0322 22:25:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:25:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:25:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:25:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:25:23.409776  543705 memory.go:184] no items to output this cycle
I0322 22:25:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 22:25:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:25:33.409816  543705 memory.go:184] no items to output this cycle
I0322 22:25:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 22:25:36.797681  543705 disk_info.go:125] begin check local disk info of client
I0322 22:25:36.800213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:25:36.800220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369600 0xc000369640]
E0322 22:25:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:25:43.410701  543705 memory.go:191] Add success.
I0322 22:25:43.409812  543705 cpu.go:282] Add success.
I0322 22:25:43.420404  543705 net.go:648] Add success.
I0322 22:25:43.423176  543705 net.go:770] primary dev: ETH0
I0322 22:25:43.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:25:43.423205  543705 net.go:698] Add success.
I0322 22:25:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:25:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:25:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:25:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:25:53.409773  543705 memory.go:184] no items to output this cycle
I0322 22:25:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 22:26:03.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:26:03.409919  543705 memory.go:184] no items to output this cycle
I0322 22:26:03.410033  543705 cpu.go:275] no items to output this cycle
E0322 22:26:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:26:13.409816  543705 memory.go:191] Add success.
I0322 22:26:13.409816  543705 cpu.go:282] Add success.
W0322 22:26:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:26:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:26:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:26:13.419687  543705 net.go:648] Add success.
I0322 22:26:13.422185  543705 net.go:770] primary dev: ETH0
I0322 22:26:13.422197  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:26:13.422208  543705 net.go:698] Add success.
I0322 22:26:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:26:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:26:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 22:26:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:26:14.456568  543705 disk_worker.go:494] system disk:vda1
I0322 22:26:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:26:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:26:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:26:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:26:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:26:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:26:23.409810  543705 memory.go:184] no items to output this cycle
I0322 22:26:23.409818  543705 cpu.go:275] no items to output this cycle
E0322 22:26:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:26:33.409783  543705 memory.go:184] no items to output this cycle
I0322 22:26:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 22:26:36.801639  543705 disk_info.go:125] begin check local disk info of client
I0322 22:26:36.804257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:26:36.804264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab580 0xc0001ab5c0]
E0322 22:26:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:26:43.410658  543705 memory.go:191] Add success.
I0322 22:26:43.409821  543705 cpu.go:282] Add success.
I0322 22:26:43.420352  543705 net.go:648] Add success.
I0322 22:26:43.423228  543705 net.go:770] primary dev: ETH0
I0322 22:26:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:26:43.423252  543705 net.go:698] Add success.
I0322 22:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:26:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:26:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:26:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:26:53.409774  543705 memory.go:184] no items to output this cycle
I0322 22:26:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 22:27:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:27:03.409777  543705 memory.go:184] no items to output this cycle
I0322 22:27:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 22:27:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:27:13.409795  543705 memory.go:191] Add success.
I0322 22:27:13.409796  543705 cpu.go:282] Add success.
W0322 22:27:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:27:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:27:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:27:13.420229  543705 net.go:648] Add success.
I0322 22:27:13.422914  543705 net.go:770] primary dev: ETH0
I0322 22:27:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:27:13.422940  543705 net.go:698] Add success.
I0322 22:27:13.429089  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 22:27:13.453267  543705 event_worker.go:152] Polling the log file for events...
I0322 22:27:13.468970  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65e87b67-11a4-42f3-9c37-fd6ee1a9f6d7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:27:13.469002  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 22:27:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:27:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0322 22:27:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:27:14.456123  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:27:14.456133  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:27:14.456139  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:27:14.456483  543705 disk_worker.go:494] system disk:vda1
I0322 22:27:14.456511  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:27:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:27:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:27:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:27:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:27:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:27:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:27:16.472336  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:27:23.410543  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:27:23.410558  543705 memory.go:184] no items to output this cycle
I0322 22:27:23.410561  543705 cpu.go:275] no items to output this cycle
E0322 22:27:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:27:33.409808  543705 memory.go:184] no items to output this cycle
I0322 22:27:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 22:27:36.804348  543705 disk_info.go:125] begin check local disk info of client
I0322 22:27:36.806901  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:27:36.806908  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003db100 0xc0003db140]
I0322 22:27:39.861733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:27:39.861739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:27:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:27:43.410607  543705 memory.go:191] Add success.
I0322 22:27:43.409795  543705 cpu.go:282] Add success.
I0322 22:27:43.420371  543705 net.go:648] Add success.
I0322 22:27:43.422976  543705 net.go:770] primary dev: ETH0
I0322 22:27:43.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:27:43.423005  543705 net.go:698] Add success.
I0322 22:27:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:27:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:27:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:27:53.409773  543705 memory.go:184] no items to output this cycle
I0322 22:27:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:28:03.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:28:03.409888  543705 memory.go:184] no items to output this cycle
I0322 22:28:03.409981  543705 cpu.go:275] no items to output this cycle
E0322 22:28:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:28:13.409788  543705 memory.go:191] Add success.
I0322 22:28:13.409792  543705 cpu.go:282] Add success.
W0322 22:28:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:28:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:28:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:28:13.419667  543705 net.go:648] Add success.
I0322 22:28:13.422525  543705 net.go:770] primary dev: ETH0
I0322 22:28:13.422539  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:28:13.422551  543705 net.go:698] Add success.
I0322 22:28:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:28:14.455219  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:28:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0322 22:28:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:28:14.456747  543705 disk_worker.go:494] system disk:vda1
I0322 22:28:14.456776  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:28:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:28:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:28:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:28:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:28:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:28:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:28:23.409769  543705 memory.go:184] no items to output this cycle
I0322 22:28:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 22:28:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:28:33.409813  543705 memory.go:184] no items to output this cycle
I0322 22:28:33.409825  543705 cpu.go:275] no items to output this cycle
I0322 22:28:36.808676  543705 disk_info.go:125] begin check local disk info of client
I0322 22:28:36.811267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:28:36.811274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000595040 0xc000595080]
E0322 22:28:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:28:43.410613  543705 memory.go:191] Add success.
I0322 22:28:43.409805  543705 cpu.go:282] Add success.
I0322 22:28:43.420383  543705 net.go:648] Add success.
I0322 22:28:43.423070  543705 net.go:770] primary dev: ETH0
I0322 22:28:43.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:28:43.423106  543705 net.go:698] Add success.
I0322 22:28:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:28:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:28:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:28:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:28:53.409763  543705 memory.go:184] no items to output this cycle
I0322 22:28:53.409794  543705 cpu.go:275] no items to output this cycle
E0322 22:29:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:29:03.409786  543705 memory.go:184] no items to output this cycle
I0322 22:29:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:29:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:29:13.409799  543705 memory.go:191] Add success.
I0322 22:29:13.409800  543705 cpu.go:282] Add success.
W0322 22:29:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:29:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:29:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:29:13.420190  543705 net.go:648] Add success.
I0322 22:29:13.423030  543705 net.go:770] primary dev: ETH0
I0322 22:29:13.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:29:13.423059  543705 net.go:698] Add success.
I0322 22:29:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:29:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:29:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 22:29:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:29:14.456529  543705 disk_worker.go:494] system disk:vda1
I0322 22:29:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:29:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:29:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:29:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:29:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:29:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:29:23.409792  543705 memory.go:184] no items to output this cycle
I0322 22:29:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:29:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:29:33.409775  543705 memory.go:184] no items to output this cycle
I0322 22:29:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 22:29:36.811361  543705 disk_info.go:125] begin check local disk info of client
I0322 22:29:36.813862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:29:36.813868  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4fc0 0xc0004a5000]
E0322 22:29:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:29:43.410661  543705 memory.go:191] Add success.
I0322 22:29:43.409798  543705 cpu.go:282] Add success.
I0322 22:29:43.420376  543705 net.go:648] Add success.
I0322 22:29:43.423108  543705 net.go:770] primary dev: ETH0
I0322 22:29:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:29:43.423136  543705 net.go:698] Add success.
I0322 22:29:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:29:46.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:29:46.458114  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:29:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:29:53.409782  543705 memory.go:184] no items to output this cycle
I0322 22:29:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:30:03.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:30:03.409911  543705 memory.go:184] no items to output this cycle
I0322 22:30:03.409957  543705 cpu.go:275] no items to output this cycle
E0322 22:30:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:30:13.409781  543705 memory.go:191] Add success.
W0322 22:30:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 22:30:13.409813  543705 cpu.go:282] Add success.
W0322 22:30:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:30:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:30:13.420212  543705 net.go:648] Add success.
I0322 22:30:13.422900  543705 net.go:770] primary dev: ETH0
I0322 22:30:13.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:30:13.422932  543705 net.go:698] Add success.
I0322 22:30:13.469198  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fb9241bd-1d3e-486f-89b3-c8557f556ae7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:30:13.469236  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:30:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:30:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:30:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 22:30:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:30:14.456564  543705 disk_worker.go:494] system disk:vda1
I0322 22:30:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:30:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:30:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:30:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:30:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:30:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:30:23.409764  543705 memory.go:184] no items to output this cycle
I0322 22:30:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 22:30:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:30:33.409785  543705 memory.go:184] no items to output this cycle
I0322 22:30:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 22:30:36.813953  543705 disk_info.go:125] begin check local disk info of client
I0322 22:30:36.816561  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:30:36.816569  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005852c0 0xc000585300]
I0322 22:30:39.864010  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:30:39.864017  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:30:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:30:43.410608  543705 memory.go:191] Add success.
I0322 22:30:43.409798  543705 cpu.go:282] Add success.
I0322 22:30:43.420331  543705 net.go:648] Add success.
I0322 22:30:43.422969  543705 net.go:770] primary dev: ETH0
I0322 22:30:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:30:43.422995  543705 net.go:698] Add success.
I0322 22:30:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:30:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:30:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:30:53.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:30:53.410278  543705 cpu.go:275] no items to output this cycle
I0322 22:30:53.410283  543705 memory.go:184] no items to output this cycle
E0322 22:31:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:31:03.409784  543705 memory.go:184] no items to output this cycle
I0322 22:31:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:31:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:31:13.409802  543705 memory.go:191] Add success.
I0322 22:31:13.409807  543705 cpu.go:282] Add success.
W0322 22:31:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:31:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:31:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:31:13.420175  543705 net.go:648] Add success.
I0322 22:31:13.422795  543705 net.go:770] primary dev: ETH0
I0322 22:31:13.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:31:13.422820  543705 net.go:698] Add success.
I0322 22:31:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:31:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:31:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0322 22:31:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:31:14.456508  543705 disk_worker.go:494] system disk:vda1
I0322 22:31:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:31:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:31:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:31:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:31:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:31:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:31:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:31:23.409801  543705 memory.go:184] no items to output this cycle
I0322 22:31:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 22:31:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:31:33.409783  543705 memory.go:184] no items to output this cycle
I0322 22:31:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 22:31:36.817675  543705 disk_info.go:125] begin check local disk info of client
I0322 22:31:36.820138  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:31:36.820144  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473800 0xc000473840]
E0322 22:31:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:31:43.410720  543705 memory.go:191] Add success.
I0322 22:31:43.409798  543705 cpu.go:282] Add success.
I0322 22:31:43.420423  543705 net.go:648] Add success.
I0322 22:31:43.423211  543705 net.go:770] primary dev: ETH0
I0322 22:31:43.423225  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:31:43.423236  543705 net.go:698] Add success.
I0322 22:31:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:31:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:31:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:31:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:31:53.409796  543705 memory.go:184] no items to output this cycle
I0322 22:31:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:32:03.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:32:03.409869  543705 memory.go:184] no items to output this cycle
I0322 22:32:03.409930  543705 cpu.go:275] no items to output this cycle
E0322 22:32:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:32:13.409788  543705 memory.go:191] Add success.
I0322 22:32:13.409791  543705 cpu.go:282] Add success.
W0322 22:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:32:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:32:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:32:13.420251  543705 net.go:648] Add success.
I0322 22:32:13.422829  543705 net.go:770] primary dev: ETH0
I0322 22:32:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:32:13.422856  543705 net.go:698] Add success.
W0322 22:32:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:32:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 22:32:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:32:14.456794  543705 disk_worker.go:494] system disk:vda1
I0322 22:32:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:32:14.457145  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:32:14.457153  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:32:14.457158  543705 custom_config.go:64] query custom config with name: gpu
E0322 22:32:15.456786  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:32:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:32:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:32:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:32:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:32:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:32:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:32:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:32:23.409794  543705 memory.go:184] no items to output this cycle
I0322 22:32:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 22:32:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:32:33.409783  543705 memory.go:184] no items to output this cycle
I0322 22:32:33.409804  543705 cpu.go:275] no items to output this cycle
I0322 22:32:36.821676  543705 disk_info.go:125] begin check local disk info of client
I0322 22:32:36.824193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:32:36.824199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5140 0xc0000c5180]
E0322 22:32:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:32:43.410623  543705 memory.go:191] Add success.
I0322 22:32:43.409809  543705 cpu.go:282] Add success.
I0322 22:32:43.420298  543705 net.go:648] Add success.
I0322 22:32:43.423123  543705 net.go:770] primary dev: ETH0
I0322 22:32:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:32:43.423150  543705 net.go:698] Add success.
I0322 22:32:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:32:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:32:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:32:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:32:53.409800  543705 memory.go:184] no items to output this cycle
I0322 22:32:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:33:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:33:03.409791  543705 memory.go:184] no items to output this cycle
I0322 22:33:03.409798  543705 cpu.go:275] no items to output this cycle
E0322 22:33:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:33:13.409782  543705 memory.go:191] Add success.
W0322 22:33:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 22:33:13.409809  543705 cpu.go:282] Add success.
W0322 22:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:33:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:33:13.420255  543705 net.go:648] Add success.
I0322 22:33:13.422953  543705 net.go:770] primary dev: ETH0
I0322 22:33:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:33:13.422981  543705 net.go:698] Add success.
I0322 22:33:13.468515  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"67a47514-92ba-4cbd-bc54-5c14a1a8cd30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:33:13.468552  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:33:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:33:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 22:33:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:33:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 22:33:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:33:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:33:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:33:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:33:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:33:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:33:23.409779  543705 cpu.go:275] no items to output this cycle
I0322 22:33:23.409780  543705 memory.go:184] no items to output this cycle
E0322 22:33:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:33:33.409786  543705 memory.go:184] no items to output this cycle
I0322 22:33:33.409839  543705 cpu.go:275] no items to output this cycle
I0322 22:33:36.825675  543705 disk_info.go:125] begin check local disk info of client
I0322 22:33:36.828171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:33:36.828177  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4cc0 0xc0000c4d00]
I0322 22:33:39.865731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:33:39.865737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:33:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:33:43.410601  543705 memory.go:191] Add success.
I0322 22:33:43.409804  543705 cpu.go:282] Add success.
I0322 22:33:43.420357  543705 net.go:648] Add success.
I0322 22:33:43.423341  543705 net.go:770] primary dev: ETH0
I0322 22:33:43.423354  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:33:43.423367  543705 net.go:698] Add success.
I0322 22:33:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:33:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:33:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:33:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:33:53.409766  543705 memory.go:184] no items to output this cycle
I0322 22:33:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 22:34:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:34:03.409865  543705 memory.go:184] no items to output this cycle
I0322 22:34:03.409956  543705 cpu.go:275] no items to output this cycle
E0322 22:34:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:34:13.409782  543705 memory.go:191] Add success.
I0322 22:34:13.409807  543705 cpu.go:282] Add success.
W0322 22:34:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:34:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:34:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:34:13.420168  543705 net.go:648] Add success.
I0322 22:34:13.422956  543705 net.go:770] primary dev: ETH0
I0322 22:34:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:34:13.422981  543705 net.go:698] Add success.
I0322 22:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:34:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:34:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 22:34:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:34:14.456600  543705 disk_worker.go:494] system disk:vda1
I0322 22:34:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:34:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:34:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:34:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:34:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:34:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:34:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:34:23.409802  543705 memory.go:184] no items to output this cycle
I0322 22:34:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 22:34:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:34:33.409777  543705 memory.go:184] no items to output this cycle
I0322 22:34:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 22:34:36.829679  543705 disk_info.go:125] begin check local disk info of client
I0322 22:34:36.832238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:34:36.832245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa080 0xc0001aa0c0]
E0322 22:34:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:34:43.410681  543705 memory.go:191] Add success.
I0322 22:34:43.409829  543705 cpu.go:282] Add success.
I0322 22:34:43.420395  543705 net.go:648] Add success.
I0322 22:34:43.423352  543705 net.go:770] primary dev: ETH0
I0322 22:34:43.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:34:43.423377  543705 net.go:698] Add success.
I0322 22:34:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:34:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:34:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:34:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:34:53.409810  543705 memory.go:184] no items to output this cycle
I0322 22:34:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 22:35:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:35:03.409778  543705 memory.go:184] no items to output this cycle
I0322 22:35:03.409787  543705 cpu.go:275] no items to output this cycle
E0322 22:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:35:13.409900  543705 memory.go:191] Add success.
I0322 22:35:13.409901  543705 cpu.go:282] Add success.
W0322 22:35:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:35:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:35:13.409963  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:35:13.419717  543705 net.go:648] Add success.
I0322 22:35:13.422235  543705 net.go:770] primary dev: ETH0
I0322 22:35:13.422249  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:35:13.422263  543705 net.go:698] Add success.
I0322 22:35:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:35:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:35:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 22:35:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:35:14.456539  543705 disk_worker.go:494] system disk:vda1
I0322 22:35:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:35:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:35:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:35:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:35:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:35:23.409777  543705 memory.go:184] no items to output this cycle
I0322 22:35:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 22:35:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:35:33.409775  543705 memory.go:184] no items to output this cycle
I0322 22:35:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 22:35:36.833675  543705 disk_info.go:125] begin check local disk info of client
I0322 22:35:36.836193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:35:36.836199  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b1c0 0xc00007b200]
E0322 22:35:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:35:43.410653  543705 memory.go:191] Add success.
I0322 22:35:43.409822  543705 cpu.go:282] Add success.
I0322 22:35:43.420367  543705 net.go:648] Add success.
I0322 22:35:43.422875  543705 net.go:770] primary dev: ETH0
I0322 22:35:43.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:35:43.422905  543705 net.go:698] Add success.
I0322 22:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:35:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:35:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:35:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:35:53.409770  543705 memory.go:184] no items to output this cycle
I0322 22:35:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 22:36:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:36:03.409809  543705 memory.go:184] no items to output this cycle
I0322 22:36:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 22:36:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:36:13.409801  543705 memory.go:191] Add success.
I0322 22:36:13.409800  543705 cpu.go:282] Add success.
W0322 22:36:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:36:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:36:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:36:13.420358  543705 net.go:648] Add success.
I0322 22:36:13.422943  543705 net.go:770] primary dev: ETH0
I0322 22:36:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:36:13.422967  543705 net.go:698] Add success.
I0322 22:36:13.480127  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"461bf0ee-9e8f-4669-8859-24eff6747fee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:36:13.480159  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:36:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:36:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:36:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0322 22:36:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:36:14.456610  543705 disk_worker.go:494] system disk:vda1
I0322 22:36:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:36:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:36:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:36:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:36:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:36:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:36:23.409787  543705 memory.go:184] no items to output this cycle
I0322 22:36:23.409791  543705 cpu.go:275] no items to output this cycle
E0322 22:36:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:36:33.409809  543705 memory.go:184] no items to output this cycle
I0322 22:36:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 22:36:36.837681  543705 disk_info.go:125] begin check local disk info of client
I0322 22:36:36.840247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:36:36.840254  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039dbc0 0xc00039dc00]
I0322 22:36:39.868024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:36:39.868031  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:36:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:36:43.410687  543705 memory.go:191] Add success.
I0322 22:36:43.409803  543705 cpu.go:282] Add success.
I0322 22:36:43.420383  543705 net.go:648] Add success.
I0322 22:36:43.423203  543705 net.go:770] primary dev: ETH0
I0322 22:36:43.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:36:43.423228  543705 net.go:698] Add success.
I0322 22:36:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:36:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:36:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:36:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:36:53.409795  543705 memory.go:184] no items to output this cycle
I0322 22:36:53.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:37:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:37:03.409789  543705 cpu.go:275] no items to output this cycle
I0322 22:37:03.409794  543705 memory.go:184] no items to output this cycle
E0322 22:37:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:37:13.409814  543705 memory.go:191] Add success.
I0322 22:37:13.409823  543705 cpu.go:282] Add success.
W0322 22:37:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:37:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:37:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:37:13.420316  543705 net.go:648] Add success.
I0322 22:37:13.423121  543705 net.go:770] primary dev: ETH0
I0322 22:37:13.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:37:13.423147  543705 net.go:698] Add success.
I0322 22:37:13.452773  543705 event_worker.go:152] Polling the log file for events...
W0322 22:37:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:37:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0322 22:37:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:37:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:37:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:37:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:37:14.456572  543705 disk_worker.go:494] system disk:vda1
I0322 22:37:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:37:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:37:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:37:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:37:16.457908  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:37:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:37:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:37:16.472295  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:37:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:37:23.409802  543705 memory.go:184] no items to output this cycle
I0322 22:37:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 22:37:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:37:33.409789  543705 memory.go:184] no items to output this cycle
I0322 22:37:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 22:37:36.841672  543705 disk_info.go:125] begin check local disk info of client
I0322 22:37:36.844143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:37:36.844149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005814c0 0xc000581500]
E0322 22:37:43.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:37:43.410678  543705 memory.go:191] Add success.
I0322 22:37:43.409797  543705 cpu.go:282] Add success.
I0322 22:37:43.420387  543705 net.go:648] Add success.
I0322 22:37:43.423004  543705 net.go:770] primary dev: ETH0
I0322 22:37:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:37:43.423035  543705 net.go:698] Add success.
I0322 22:37:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:37:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:37:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:37:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:37:53.409763  543705 memory.go:184] no items to output this cycle
I0322 22:37:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 22:38:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:38:03.409780  543705 memory.go:184] no items to output this cycle
I0322 22:38:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 22:38:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:38:13.409786  543705 memory.go:191] Add success.
I0322 22:38:13.409788  543705 cpu.go:282] Add success.
W0322 22:38:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:38:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:38:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:38:13.420273  543705 net.go:648] Add success.
I0322 22:38:13.423114  543705 net.go:770] primary dev: ETH0
I0322 22:38:13.423127  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:38:13.423138  543705 net.go:698] Add success.
I0322 22:38:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:38:14.455369  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:38:14.455393  543705 disk_worker.go:708] disk space is not compliant
W0322 22:38:14.455397  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:38:14.456843  543705 disk_worker.go:494] system disk:vda1
I0322 22:38:14.456885  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:38:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:38:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:38:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:38:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:38:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:38:23.409785  543705 memory.go:184] no items to output this cycle
I0322 22:38:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 22:38:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:38:33.409800  543705 memory.go:184] no items to output this cycle
I0322 22:38:33.409802  543705 cpu.go:275] no items to output this cycle
I0322 22:38:36.845676  543705 disk_info.go:125] begin check local disk info of client
I0322 22:38:36.848194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:38:36.848203  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003beb00 0xc0003beb40]
E0322 22:38:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:38:43.410738  543705 memory.go:191] Add success.
I0322 22:38:43.409789  543705 cpu.go:282] Add success.
I0322 22:38:43.420510  543705 net.go:648] Add success.
I0322 22:38:43.423303  543705 net.go:770] primary dev: ETH0
I0322 22:38:43.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:38:43.423330  543705 net.go:698] Add success.
I0322 22:38:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:38:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:38:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:38:53.410349  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:38:53.410365  543705 memory.go:184] no items to output this cycle
I0322 22:38:53.410373  543705 cpu.go:275] no items to output this cycle
E0322 22:39:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:39:03.409813  543705 memory.go:184] no items to output this cycle
I0322 22:39:03.409824  543705 cpu.go:275] no items to output this cycle
E0322 22:39:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:39:13.409787  543705 memory.go:191] Add success.
W0322 22:39:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:39:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:39:13.409826  543705 cpu.go:282] Add success.
I0322 22:39:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:39:13.420123  543705 net.go:648] Add success.
I0322 22:39:13.423070  543705 net.go:770] primary dev: ETH0
I0322 22:39:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:39:13.423099  543705 net.go:698] Add success.
I0322 22:39:13.468385  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"006e99db-a4e1-4083-882a-c7c2e44142f2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:39:13.468421  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:39:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:39:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:39:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 22:39:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:39:14.456549  543705 disk_worker.go:494] system disk:vda1
I0322 22:39:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:39:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:39:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:39:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:39:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:39:23.409783  543705 memory.go:184] no items to output this cycle
I0322 22:39:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:39:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:39:33.409806  543705 memory.go:184] no items to output this cycle
I0322 22:39:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 22:39:36.849672  543705 disk_info.go:125] begin check local disk info of client
I0322 22:39:36.852212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:39:36.852219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005809c0 0xc000580a00]
I0322 22:39:39.869729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:39:39.869737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:39:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:39:43.410668  543705 memory.go:191] Add success.
I0322 22:39:43.409800  543705 cpu.go:282] Add success.
I0322 22:39:43.420405  543705 net.go:648] Add success.
I0322 22:39:43.423185  543705 net.go:770] primary dev: ETH0
I0322 22:39:43.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:39:43.423211  543705 net.go:698] Add success.
I0322 22:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:39:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:39:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:39:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:39:53.409784  543705 memory.go:184] no items to output this cycle
I0322 22:39:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 22:40:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:40:03.409779  543705 memory.go:184] no items to output this cycle
I0322 22:40:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 22:40:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:40:13.409793  543705 memory.go:191] Add success.
I0322 22:40:13.409797  543705 cpu.go:282] Add success.
W0322 22:40:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:40:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:40:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:40:13.420248  543705 net.go:648] Add success.
I0322 22:40:13.422977  543705 net.go:770] primary dev: ETH0
I0322 22:40:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:40:13.423002  543705 net.go:698] Add success.
I0322 22:40:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:40:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:40:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 22:40:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:40:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 22:40:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:40:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:40:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:40:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:40:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:40:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:40:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:40:23.409802  543705 memory.go:184] no items to output this cycle
I0322 22:40:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 22:40:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:40:33.409769  543705 memory.go:184] no items to output this cycle
I0322 22:40:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 22:40:36.853678  543705 disk_info.go:125] begin check local disk info of client
I0322 22:40:36.856229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:40:36.856236  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387bc0 0xc000387c00]
E0322 22:40:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:40:43.410609  543705 memory.go:191] Add success.
I0322 22:40:43.409809  543705 cpu.go:282] Add success.
I0322 22:40:43.420319  543705 net.go:648] Add success.
I0322 22:40:43.422767  543705 net.go:770] primary dev: ETH0
I0322 22:40:43.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:40:43.422794  543705 net.go:698] Add success.
I0322 22:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:40:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:40:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:40:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:40:53.409779  543705 cpu.go:275] no items to output this cycle
I0322 22:40:53.409783  543705 memory.go:184] no items to output this cycle
E0322 22:41:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:41:03.409786  543705 memory.go:184] no items to output this cycle
I0322 22:41:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 22:41:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:41:13.409825  543705 memory.go:191] Add success.
I0322 22:41:13.409828  543705 cpu.go:282] Add success.
W0322 22:41:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:41:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:41:13.410039  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:41:13.419729  543705 net.go:648] Add success.
I0322 22:41:13.422508  543705 net.go:770] primary dev: ETH0
I0322 22:41:13.422521  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:41:13.422532  543705 net.go:698] Add success.
I0322 22:41:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:41:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:41:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0322 22:41:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:41:14.456592  543705 disk_worker.go:494] system disk:vda1
I0322 22:41:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:41:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:41:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:41:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:41:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:41:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:41:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:41:23.409809  543705 memory.go:184] no items to output this cycle
I0322 22:41:23.409822  543705 cpu.go:275] no items to output this cycle
E0322 22:41:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:41:33.409781  543705 memory.go:184] no items to output this cycle
I0322 22:41:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 22:41:36.857674  543705 disk_info.go:125] begin check local disk info of client
I0322 22:41:36.860190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:41:36.860196  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505a40 0xc000505a80]
E0322 22:41:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:41:43.410664  543705 memory.go:191] Add success.
I0322 22:41:43.409807  543705 cpu.go:282] Add success.
I0322 22:41:43.420357  543705 net.go:648] Add success.
I0322 22:41:43.423186  543705 net.go:770] primary dev: ETH0
I0322 22:41:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:41:43.423214  543705 net.go:698] Add success.
I0322 22:41:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:41:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:41:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:41:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:41:53.409789  543705 memory.go:184] no items to output this cycle
I0322 22:41:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 22:42:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:42:03.409814  543705 memory.go:184] no items to output this cycle
I0322 22:42:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 22:42:13.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:42:13.409902  543705 memory.go:191] Add success.
W0322 22:42:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:42:13.409986  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:42:13.409990  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:42:13.410021  543705 cpu.go:282] Add success.
I0322 22:42:13.419708  543705 net.go:648] Add success.
I0322 22:42:13.422492  543705 net.go:770] primary dev: ETH0
I0322 22:42:13.422505  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:42:13.422516  543705 net.go:698] Add success.
I0322 22:42:13.463311  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"63f30248-e4ff-43a4-bff9-e4dd92cd74d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:42:13.463343  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 22:42:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:42:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0322 22:42:14.455241  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:42:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:42:14.455916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:42:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:42:14.457049  543705 disk_worker.go:494] system disk:vda1
I0322 22:42:14.457080  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:42:15.456506  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:42:15.456515  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:42:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:42:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:42:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:42:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:42:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:42:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:42:23.409779  543705 memory.go:184] no items to output this cycle
I0322 22:42:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 22:42:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:42:33.409770  543705 memory.go:184] no items to output this cycle
I0322 22:42:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 22:42:36.861677  543705 disk_info.go:125] begin check local disk info of client
I0322 22:42:36.864222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:42:36.864229  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac140 0xc0002ac180]
I0322 22:42:39.872048  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:42:39.872055  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:42:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:42:43.410650  543705 memory.go:191] Add success.
I0322 22:42:43.409816  543705 cpu.go:282] Add success.
I0322 22:42:43.420360  543705 net.go:648] Add success.
I0322 22:42:43.423136  543705 net.go:770] primary dev: ETH0
I0322 22:42:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:42:43.423162  543705 net.go:698] Add success.
I0322 22:42:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:42:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:42:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:42:53.409781  543705 memory.go:184] no items to output this cycle
I0322 22:42:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 22:43:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:43:03.409787  543705 memory.go:184] no items to output this cycle
I0322 22:43:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:43:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:43:13.409802  543705 memory.go:191] Add success.
I0322 22:43:13.409802  543705 cpu.go:282] Add success.
W0322 22:43:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:43:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:43:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:43:13.420554  543705 net.go:648] Add success.
I0322 22:43:13.423332  543705 net.go:770] primary dev: ETH0
I0322 22:43:13.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:43:13.423357  543705 net.go:698] Add success.
I0322 22:43:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:43:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:43:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 22:43:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:43:14.456518  543705 disk_worker.go:494] system disk:vda1
I0322 22:43:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:43:16.458023  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:43:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:43:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:43:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:43:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:43:23.409769  543705 memory.go:184] no items to output this cycle
I0322 22:43:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 22:43:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:43:33.409781  543705 memory.go:184] no items to output this cycle
I0322 22:43:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 22:43:36.865676  543705 disk_info.go:125] begin check local disk info of client
I0322 22:43:36.868260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:43:36.868268  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d48c0 0xc0003d4900]
E0322 22:43:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:43:43.410751  543705 memory.go:191] Add success.
I0322 22:43:43.409820  543705 cpu.go:282] Add success.
I0322 22:43:43.420436  543705 net.go:648] Add success.
I0322 22:43:43.423316  543705 net.go:770] primary dev: ETH0
I0322 22:43:43.423329  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:43:43.423341  543705 net.go:698] Add success.
I0322 22:43:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:43:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:43:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:43:53.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:43:53.409868  543705 memory.go:184] no items to output this cycle
I0322 22:43:53.409943  543705 cpu.go:275] no items to output this cycle
E0322 22:44:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:44:03.409782  543705 cpu.go:275] no items to output this cycle
I0322 22:44:03.409797  543705 memory.go:184] no items to output this cycle
E0322 22:44:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:44:13.409813  543705 memory.go:191] Add success.
I0322 22:44:13.409817  543705 cpu.go:282] Add success.
W0322 22:44:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:44:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:44:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:44:13.420160  543705 net.go:648] Add success.
I0322 22:44:13.422873  543705 net.go:770] primary dev: ETH0
I0322 22:44:13.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:44:13.422898  543705 net.go:698] Add success.
I0322 22:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:44:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:44:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 22:44:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:44:14.456514  543705 disk_worker.go:494] system disk:vda1
I0322 22:44:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:44:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:44:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:44:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:44:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:44:16.472464  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:44:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:44:23.409792  543705 memory.go:184] no items to output this cycle
I0322 22:44:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 22:44:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:44:33.409805  543705 memory.go:184] no items to output this cycle
I0322 22:44:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 22:44:36.869675  543705 disk_info.go:125] begin check local disk info of client
I0322 22:44:36.872254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:44:36.872261  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b00 0xc0000c4b40]
E0322 22:44:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:44:43.410748  543705 memory.go:191] Add success.
I0322 22:44:43.409813  543705 cpu.go:282] Add success.
I0322 22:44:43.420479  543705 net.go:648] Add success.
I0322 22:44:43.423032  543705 net.go:770] primary dev: ETH0
I0322 22:44:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:44:43.423061  543705 net.go:698] Add success.
I0322 22:44:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:44:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:44:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:44:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:44:53.409892  543705 cpu.go:275] no items to output this cycle
I0322 22:44:53.409913  543705 memory.go:184] no items to output this cycle
E0322 22:45:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:45:03.409782  543705 memory.go:184] no items to output this cycle
I0322 22:45:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 22:45:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:45:13.409822  543705 memory.go:191] Add success.
I0322 22:45:13.409837  543705 cpu.go:282] Add success.
W0322 22:45:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:45:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:45:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:45:13.420251  543705 net.go:648] Add success.
I0322 22:45:13.423190  543705 net.go:770] primary dev: ETH0
I0322 22:45:13.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:45:13.423219  543705 net.go:698] Add success.
I0322 22:45:13.463784  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c396ac81-0ef2-4f65-a0c5-5d81516e070f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:45:13.463828  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:45:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:45:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:45:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 22:45:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:45:14.456540  543705 disk_worker.go:494] system disk:vda1
I0322 22:45:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:45:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:45:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:45:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:45:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:45:16.472547  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:45:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:45:23.409762  543705 memory.go:184] no items to output this cycle
I0322 22:45:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 22:45:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:45:33.409807  543705 memory.go:184] no items to output this cycle
I0322 22:45:33.409820  543705 cpu.go:275] no items to output this cycle
I0322 22:45:36.873674  543705 disk_info.go:125] begin check local disk info of client
I0322 22:45:36.876270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:45:36.876277  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005090c0 0xc000509100]
I0322 22:45:39.873729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:45:39.873735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:45:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:45:43.410681  543705 memory.go:191] Add success.
I0322 22:45:43.409805  543705 cpu.go:282] Add success.
I0322 22:45:43.420385  543705 net.go:648] Add success.
I0322 22:45:43.422955  543705 net.go:770] primary dev: ETH0
I0322 22:45:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:45:43.422985  543705 net.go:698] Add success.
I0322 22:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:45:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:45:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:45:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:45:53.409791  543705 memory.go:184] no items to output this cycle
I0322 22:45:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 22:46:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:46:03.409812  543705 memory.go:184] no items to output this cycle
I0322 22:46:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 22:46:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:46:13.409823  543705 memory.go:191] Add success.
I0322 22:46:13.409826  543705 cpu.go:282] Add success.
W0322 22:46:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:46:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:46:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:46:13.420159  543705 net.go:648] Add success.
I0322 22:46:13.423519  543705 net.go:770] primary dev: ETH0
I0322 22:46:13.423532  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:46:13.423544  543705 net.go:698] Add success.
I0322 22:46:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:46:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:46:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 22:46:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:46:14.456589  543705 disk_worker.go:494] system disk:vda1
I0322 22:46:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:46:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:46:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:46:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:46:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:46:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:46:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:46:23.409803  543705 memory.go:184] no items to output this cycle
I0322 22:46:23.409813  543705 cpu.go:275] no items to output this cycle
E0322 22:46:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:46:33.409768  543705 memory.go:184] no items to output this cycle
I0322 22:46:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 22:46:36.877677  543705 disk_info.go:125] begin check local disk info of client
I0322 22:46:36.880248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:46:36.880255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0322 22:46:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:46:43.410642  543705 memory.go:191] Add success.
I0322 22:46:43.409802  543705 cpu.go:282] Add success.
I0322 22:46:43.420536  543705 net.go:648] Add success.
I0322 22:46:43.423104  543705 net.go:770] primary dev: ETH0
I0322 22:46:43.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:46:43.423130  543705 net.go:698] Add success.
I0322 22:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:46:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:46:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:46:53.409765  543705 memory.go:184] no items to output this cycle
I0322 22:46:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 22:47:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:47:03.409789  543705 memory.go:184] no items to output this cycle
I0322 22:47:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 22:47:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:47:13.409797  543705 cpu.go:282] Add success.
I0322 22:47:13.409810  543705 memory.go:191] Add success.
W0322 22:47:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:47:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:47:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:47:13.420145  543705 net.go:648] Add success.
I0322 22:47:13.422799  543705 net.go:770] primary dev: ETH0
I0322 22:47:13.422812  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:47:13.422824  543705 net.go:698] Add success.
I0322 22:47:13.453432  543705 event_worker.go:152] Polling the log file for events...
W0322 22:47:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:47:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 22:47:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:47:14.455976  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:47:14.455985  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:47:14.455990  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:47:14.456475  543705 disk_worker.go:494] system disk:vda1
I0322 22:47:14.456504  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:47:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:47:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:47:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:47:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:47:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:47:16.458007  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:47:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:47:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:47:23.409791  543705 memory.go:184] no items to output this cycle
I0322 22:47:23.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:47:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:47:33.409794  543705 memory.go:184] no items to output this cycle
I0322 22:47:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 22:47:36.881671  543705 disk_info.go:125] begin check local disk info of client
I0322 22:47:36.884160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:47:36.884174  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272000 0xc000272040]
E0322 22:47:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:47:43.410585  543705 memory.go:191] Add success.
I0322 22:47:43.409796  543705 cpu.go:282] Add success.
I0322 22:47:43.420298  543705 net.go:648] Add success.
I0322 22:47:43.422961  543705 net.go:770] primary dev: ETH0
I0322 22:47:43.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:47:43.422988  543705 net.go:698] Add success.
I0322 22:47:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:47:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:47:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:47:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:47:53.409762  543705 memory.go:184] no items to output this cycle
I0322 22:47:53.409797  543705 cpu.go:275] no items to output this cycle
E0322 22:48:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:48:03.409792  543705 memory.go:184] no items to output this cycle
I0322 22:48:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 22:48:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:48:13.409787  543705 memory.go:191] Add success.
I0322 22:48:13.409810  543705 cpu.go:282] Add success.
W0322 22:48:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:48:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:48:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:48:13.420151  543705 net.go:648] Add success.
I0322 22:48:13.422826  543705 net.go:770] primary dev: ETH0
I0322 22:48:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:48:13.422855  543705 net.go:698] Add success.
I0322 22:48:13.468693  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6896d447-a4c4-4714-8471-3c600444a24e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:48:13.468727  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:48:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:48:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:48:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 22:48:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:48:14.456601  543705 disk_worker.go:494] system disk:vda1
I0322 22:48:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:48:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:48:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:48:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:48:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:48:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:48:23.409759  543705 memory.go:184] no items to output this cycle
I0322 22:48:23.409792  543705 cpu.go:275] no items to output this cycle
E0322 22:48:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:48:33.409797  543705 memory.go:184] no items to output this cycle
I0322 22:48:33.409810  543705 cpu.go:275] no items to output this cycle
I0322 22:48:36.885681  543705 disk_info.go:125] begin check local disk info of client
I0322 22:48:36.888278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:48:36.888286  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504780 0xc0005047c0]
I0322 22:48:39.876081  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:48:39.876088  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:48:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:48:43.410914  543705 memory.go:191] Add success.
I0322 22:48:43.409819  543705 cpu.go:282] Add success.
I0322 22:48:43.420616  543705 net.go:648] Add success.
I0322 22:48:43.423162  543705 net.go:770] primary dev: ETH0
I0322 22:48:43.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:48:43.423190  543705 net.go:698] Add success.
I0322 22:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:48:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:48:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:48:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:48:53.409800  543705 memory.go:184] no items to output this cycle
I0322 22:48:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 22:49:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:49:03.409772  543705 memory.go:184] no items to output this cycle
I0322 22:49:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 22:49:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:49:13.409795  543705 memory.go:191] Add success.
I0322 22:49:13.409816  543705 cpu.go:282] Add success.
W0322 22:49:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:49:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:49:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:49:13.420239  543705 net.go:648] Add success.
I0322 22:49:13.423107  543705 net.go:770] primary dev: ETH0
I0322 22:49:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:49:13.423137  543705 net.go:698] Add success.
I0322 22:49:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:49:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:49:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 22:49:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:49:14.456623  543705 disk_worker.go:494] system disk:vda1
I0322 22:49:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:49:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:49:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:49:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:49:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:49:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:49:23.409764  543705 memory.go:184] no items to output this cycle
I0322 22:49:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 22:49:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:49:33.409806  543705 memory.go:184] no items to output this cycle
I0322 22:49:33.409822  543705 cpu.go:275] no items to output this cycle
I0322 22:49:36.889672  543705 disk_info.go:125] begin check local disk info of client
I0322 22:49:36.892214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:49:36.892221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5b80 0xc0003d5bc0]
E0322 22:49:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:49:43.410608  543705 memory.go:191] Add success.
I0322 22:49:43.409794  543705 cpu.go:282] Add success.
I0322 22:49:43.420318  543705 net.go:648] Add success.
I0322 22:49:43.422708  543705 net.go:770] primary dev: ETH0
I0322 22:49:43.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:49:43.422734  543705 net.go:698] Add success.
I0322 22:49:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:49:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:49:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:49:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:49:53.409772  543705 memory.go:184] no items to output this cycle
I0322 22:49:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 22:50:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:50:03.409786  543705 cpu.go:275] no items to output this cycle
I0322 22:50:03.409793  543705 memory.go:184] no items to output this cycle
E0322 22:50:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:50:13.409800  543705 memory.go:191] Add success.
I0322 22:50:13.409801  543705 cpu.go:282] Add success.
W0322 22:50:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:50:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:50:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:50:13.420192  543705 net.go:648] Add success.
I0322 22:50:13.422742  543705 net.go:770] primary dev: ETH0
I0322 22:50:13.422756  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:50:13.422768  543705 net.go:698] Add success.
I0322 22:50:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:50:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:50:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 22:50:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:50:14.456507  543705 disk_worker.go:494] system disk:vda1
I0322 22:50:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:50:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:50:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:50:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:50:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:50:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:50:23.409760  543705 memory.go:184] no items to output this cycle
I0322 22:50:23.409797  543705 cpu.go:275] no items to output this cycle
E0322 22:50:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:50:33.409794  543705 memory.go:184] no items to output this cycle
I0322 22:50:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 22:50:36.893674  543705 disk_info.go:125] begin check local disk info of client
I0322 22:50:36.896282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:50:36.896288  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf00 0xc0001aaf40]
E0322 22:50:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:50:43.410744  543705 memory.go:191] Add success.
I0322 22:50:43.409795  543705 cpu.go:282] Add success.
I0322 22:50:43.420450  543705 net.go:648] Add success.
I0322 22:50:43.423034  543705 net.go:770] primary dev: ETH0
I0322 22:50:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:50:43.423064  543705 net.go:698] Add success.
I0322 22:50:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:50:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:50:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:50:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:50:53.409766  543705 memory.go:184] no items to output this cycle
I0322 22:50:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 22:51:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:51:03.409804  543705 memory.go:184] no items to output this cycle
I0322 22:51:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 22:51:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:51:13.409794  543705 memory.go:191] Add success.
I0322 22:51:13.409813  543705 cpu.go:282] Add success.
W0322 22:51:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:51:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:51:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:51:13.420188  543705 net.go:648] Add success.
I0322 22:51:13.423120  543705 net.go:770] primary dev: ETH0
I0322 22:51:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:51:13.423146  543705 net.go:698] Add success.
I0322 22:51:13.468706  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4704f2bd-7843-419e-854a-b4409d056b9b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:51:13.468741  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:51:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:51:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:51:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 22:51:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:51:14.456544  543705 disk_worker.go:494] system disk:vda1
I0322 22:51:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:51:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:51:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:51:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:51:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:51:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:51:23.409763  543705 memory.go:184] no items to output this cycle
I0322 22:51:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 22:51:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:51:33.409786  543705 memory.go:184] no items to output this cycle
I0322 22:51:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 22:51:36.897673  543705 disk_info.go:125] begin check local disk info of client
I0322 22:51:36.900150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:51:36.900156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000503c00 0xc000503c40]
I0322 22:51:39.877724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:51:39.877729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:51:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:51:43.410645  543705 memory.go:191] Add success.
I0322 22:51:43.409790  543705 cpu.go:282] Add success.
I0322 22:51:43.420333  543705 net.go:648] Add success.
I0322 22:51:43.422889  543705 net.go:770] primary dev: ETH0
I0322 22:51:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:51:43.422916  543705 net.go:698] Add success.
I0322 22:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:51:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:51:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:51:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:51:53.409773  543705 memory.go:184] no items to output this cycle
I0322 22:51:53.409777  543705 cpu.go:275] no items to output this cycle
E0322 22:52:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:52:03.409781  543705 memory.go:184] no items to output this cycle
I0322 22:52:03.409913  543705 cpu.go:275] no items to output this cycle
E0322 22:52:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:52:13.409780  543705 memory.go:191] Add success.
W0322 22:52:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 22:52:13.409811  543705 cpu.go:282] Add success.
W0322 22:52:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:52:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:52:13.420172  543705 net.go:648] Add success.
I0322 22:52:13.422803  543705 net.go:770] primary dev: ETH0
I0322 22:52:13.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:52:13.422828  543705 net.go:698] Add success.
W0322 22:52:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:52:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0322 22:52:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:52:14.456932  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:52:14.456942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:52:14.456949  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:52:14.457002  543705 disk_worker.go:494] system disk:vda1
I0322 22:52:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:52:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:52:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:52:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:52:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:52:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:52:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:52:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:52:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:52:23.409767  543705 memory.go:184] no items to output this cycle
I0322 22:52:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 22:52:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:52:33.409799  543705 memory.go:184] no items to output this cycle
I0322 22:52:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 22:52:36.901677  543705 disk_info.go:125] begin check local disk info of client
I0322 22:52:36.904259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:52:36.904266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5000 0xc0000c5040]
E0322 22:52:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:52:43.410610  543705 memory.go:191] Add success.
I0322 22:52:43.409806  543705 cpu.go:282] Add success.
I0322 22:52:43.420323  543705 net.go:648] Add success.
I0322 22:52:43.422620  543705 net.go:770] primary dev: ETH0
I0322 22:52:43.422634  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:52:43.422647  543705 net.go:698] Add success.
I0322 22:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:52:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:52:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:52:53.409780  543705 memory.go:184] no items to output this cycle
I0322 22:52:53.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:53:03.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:53:03.409894  543705 memory.go:184] no items to output this cycle
I0322 22:53:03.409972  543705 cpu.go:275] no items to output this cycle
E0322 22:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:53:13.409790  543705 memory.go:191] Add success.
I0322 22:53:13.409811  543705 cpu.go:282] Add success.
W0322 22:53:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:53:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:53:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:53:13.420298  543705 net.go:648] Add success.
I0322 22:53:13.423119  543705 net.go:770] primary dev: ETH0
I0322 22:53:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:53:13.423144  543705 net.go:698] Add success.
I0322 22:53:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:53:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:53:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 22:53:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:53:14.456586  543705 disk_worker.go:494] system disk:vda1
I0322 22:53:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:53:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:53:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:53:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:53:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:53:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:53:23.409768  543705 memory.go:184] no items to output this cycle
I0322 22:53:23.409773  543705 cpu.go:275] no items to output this cycle
E0322 22:53:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:53:33.409801  543705 memory.go:184] no items to output this cycle
I0322 22:53:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 22:53:36.905674  543705 disk_info.go:125] begin check local disk info of client
I0322 22:53:36.908152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:53:36.908158  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0322 22:53:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:53:43.410632  543705 memory.go:191] Add success.
I0322 22:53:43.409815  543705 cpu.go:282] Add success.
I0322 22:53:43.420203  543705 net.go:770] primary dev: ETH0
I0322 22:53:43.420218  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:53:43.420234  543705 net.go:698] Add success.
I0322 22:53:43.420587  543705 net.go:648] Add success.
I0322 22:53:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:53:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:53:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:53:53.410217  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:53:53.410226  543705 cpu.go:275] no items to output this cycle
I0322 22:53:53.410233  543705 memory.go:184] no items to output this cycle
E0322 22:54:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:54:03.409785  543705 memory.go:184] no items to output this cycle
I0322 22:54:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 22:54:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:54:13.409819  543705 memory.go:191] Add success.
I0322 22:54:13.409821  543705 cpu.go:282] Add success.
W0322 22:54:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:54:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:54:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:54:13.420285  543705 net.go:648] Add success.
I0322 22:54:13.423041  543705 net.go:770] primary dev: ETH0
I0322 22:54:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:54:13.423068  543705 net.go:698] Add success.
I0322 22:54:13.464079  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6b473cbc-b625-4013-a9ba-54c2f6b139e2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:54:13.464119  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 22:54:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:54:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:54:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 22:54:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:54:14.456511  543705 disk_worker.go:494] system disk:vda1
I0322 22:54:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:54:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:54:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:54:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:54:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:54:23.409768  543705 memory.go:184] no items to output this cycle
I0322 22:54:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:54:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:54:33.409794  543705 memory.go:184] no items to output this cycle
I0322 22:54:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 22:54:36.909678  543705 disk_info.go:125] begin check local disk info of client
I0322 22:54:36.912235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:54:36.912242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa9c0 0xc0001aaa00]
I0322 22:54:39.880101  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:54:39.880107  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:54:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:54:43.410706  543705 memory.go:191] Add success.
I0322 22:54:43.409823  543705 cpu.go:282] Add success.
I0322 22:54:43.420404  543705 net.go:648] Add success.
I0322 22:54:43.423023  543705 net.go:770] primary dev: ETH0
I0322 22:54:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:54:43.423049  543705 net.go:698] Add success.
I0322 22:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:54:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:54:53.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:54:53.409926  543705 cpu.go:275] no items to output this cycle
I0322 22:54:53.409931  543705 memory.go:184] no items to output this cycle
E0322 22:55:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:55:03.409809  543705 memory.go:184] no items to output this cycle
I0322 22:55:03.409823  543705 cpu.go:275] no items to output this cycle
E0322 22:55:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:55:13.409781  543705 memory.go:191] Add success.
I0322 22:55:13.409806  543705 cpu.go:282] Add success.
W0322 22:55:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:55:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:55:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:55:13.420182  543705 net.go:648] Add success.
I0322 22:55:13.422666  543705 net.go:770] primary dev: ETH0
I0322 22:55:13.422679  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:55:13.422692  543705 net.go:698] Add success.
I0322 22:55:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:55:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:55:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0322 22:55:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:55:14.456506  543705 disk_worker.go:494] system disk:vda1
I0322 22:55:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:55:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:55:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:55:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:55:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:55:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:55:23.409779  543705 memory.go:184] no items to output this cycle
I0322 22:55:23.409786  543705 cpu.go:275] no items to output this cycle
E0322 22:55:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:55:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 22:55:33.409806  543705 memory.go:184] no items to output this cycle
I0322 22:55:36.913678  543705 disk_info.go:125] begin check local disk info of client
I0322 22:55:36.916149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:55:36.916156  543705 disk_info.go:196] parse disk info done, disk is : [0xc000397940 0xc000397980]
E0322 22:55:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:55:43.410616  543705 memory.go:191] Add success.
I0322 22:55:43.409819  543705 cpu.go:282] Add success.
I0322 22:55:43.420318  543705 net.go:648] Add success.
I0322 22:55:43.422769  543705 net.go:770] primary dev: ETH0
I0322 22:55:43.422783  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:55:43.422795  543705 net.go:698] Add success.
I0322 22:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:55:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:55:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:55:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:55:53.409801  543705 memory.go:184] no items to output this cycle
I0322 22:55:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 22:56:03.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:56:03.409901  543705 cpu.go:275] no items to output this cycle
I0322 22:56:03.409917  543705 memory.go:184] no items to output this cycle
E0322 22:56:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:56:13.409791  543705 memory.go:191] Add success.
I0322 22:56:13.409809  543705 cpu.go:282] Add success.
W0322 22:56:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:56:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:56:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:56:13.420530  543705 net.go:648] Add success.
I0322 22:56:13.422867  543705 net.go:770] primary dev: ETH0
I0322 22:56:13.422880  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:56:13.422893  543705 net.go:698] Add success.
I0322 22:56:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:56:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:56:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 22:56:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:56:14.456569  543705 disk_worker.go:494] system disk:vda1
I0322 22:56:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:56:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:56:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:56:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:56:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:56:23.409769  543705 memory.go:184] no items to output this cycle
I0322 22:56:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 22:56:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:56:33.409795  543705 memory.go:184] no items to output this cycle
I0322 22:56:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 22:56:36.917682  543705 disk_info.go:125] begin check local disk info of client
I0322 22:56:36.920266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:56:36.920274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329900 0xc000329940]
E0322 22:56:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:56:43.410619  543705 memory.go:191] Add success.
I0322 22:56:43.409804  543705 cpu.go:282] Add success.
I0322 22:56:43.420330  543705 net.go:648] Add success.
I0322 22:56:43.422962  543705 net.go:770] primary dev: ETH0
I0322 22:56:43.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:56:43.422993  543705 net.go:698] Add success.
I0322 22:56:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:56:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:56:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:56:53.409809  543705 memory.go:184] no items to output this cycle
I0322 22:56:53.409819  543705 cpu.go:275] no items to output this cycle
E0322 22:57:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:57:03.409798  543705 memory.go:184] no items to output this cycle
I0322 22:57:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 22:57:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:57:13.409797  543705 memory.go:191] Add success.
I0322 22:57:13.409797  543705 cpu.go:282] Add success.
W0322 22:57:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:57:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:57:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:57:13.420251  543705 net.go:648] Add success.
I0322 22:57:13.428926  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 22:57:13.429004  543705 net.go:770] primary dev: ETH0
I0322 22:57:13.429016  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:57:13.429027  543705 net.go:698] Add success.
I0322 22:57:13.453609  543705 event_worker.go:152] Polling the log file for events...
I0322 22:57:13.463791  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ea5a4c53-fc9a-4a27-a874-020250c026a4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 22:57:13.463824  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 22:57:14.455240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:57:14.455255  543705 disk_worker.go:708] disk space is not compliant
W0322 22:57:14.455259  543705 disk_worker.go:728] disk inode is not compliant
E0322 22:57:14.455893  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 22:57:14.455902  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 22:57:14.455908  543705 custom_config.go:64] query custom config with name: gpu
I0322 22:57:14.456821  543705 disk_worker.go:494] system disk:vda1
I0322 22:57:14.456866  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 22:57:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 22:57:15.456878  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:57:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 22:57:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 22:57:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:57:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:57:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:57:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:57:23.409781  543705 memory.go:184] no items to output this cycle
I0322 22:57:23.409785  543705 cpu.go:275] no items to output this cycle
E0322 22:57:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:57:33.409773  543705 memory.go:184] no items to output this cycle
I0322 22:57:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 22:57:36.921675  543705 disk_info.go:125] begin check local disk info of client
I0322 22:57:36.924183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:57:36.924190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5500 0xc0000c5540]
I0322 22:57:39.881727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 22:57:39.881733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 22:57:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:57:43.410739  543705 memory.go:191] Add success.
I0322 22:57:43.409819  543705 cpu.go:282] Add success.
I0322 22:57:43.420447  543705 net.go:648] Add success.
I0322 22:57:43.423414  543705 net.go:770] primary dev: ETH0
I0322 22:57:43.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:57:43.423439  543705 net.go:698] Add success.
I0322 22:57:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:57:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:57:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:57:53.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:57:53.410384  543705 memory.go:184] no items to output this cycle
I0322 22:57:53.410405  543705 cpu.go:275] no items to output this cycle
E0322 22:58:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:58:03.409817  543705 memory.go:184] no items to output this cycle
I0322 22:58:03.409828  543705 cpu.go:275] no items to output this cycle
E0322 22:58:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:58:13.409799  543705 memory.go:191] Add success.
I0322 22:58:13.409802  543705 cpu.go:282] Add success.
W0322 22:58:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:58:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:58:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:58:13.420151  543705 net.go:648] Add success.
I0322 22:58:13.422957  543705 net.go:770] primary dev: ETH0
I0322 22:58:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:58:13.422986  543705 net.go:698] Add success.
I0322 22:58:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:58:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:58:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 22:58:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:58:14.456597  543705 disk_worker.go:494] system disk:vda1
I0322 22:58:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:58:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:58:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:58:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:58:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:58:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:58:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:58:23.409768  543705 memory.go:184] no items to output this cycle
I0322 22:58:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 22:58:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:58:33.409787  543705 memory.go:184] no items to output this cycle
I0322 22:58:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 22:58:36.925682  543705 disk_info.go:125] begin check local disk info of client
I0322 22:58:36.928304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:58:36.928313  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b80 0xc0000c4bc0]
E0322 22:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:58:43.410605  543705 memory.go:191] Add success.
I0322 22:58:43.409824  543705 cpu.go:282] Add success.
I0322 22:58:43.420310  543705 net.go:648] Add success.
I0322 22:58:43.422844  543705 net.go:770] primary dev: ETH0
I0322 22:58:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:58:43.422872  543705 net.go:698] Add success.
I0322 22:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:58:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:58:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:58:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:58:53.409796  543705 memory.go:184] no items to output this cycle
I0322 22:58:53.409809  543705 cpu.go:275] no items to output this cycle
E0322 22:59:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:59:03.409791  543705 memory.go:184] no items to output this cycle
I0322 22:59:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 22:59:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:59:13.409791  543705 cpu.go:282] Add success.
I0322 22:59:13.409799  543705 memory.go:191] Add success.
W0322 22:59:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 22:59:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 22:59:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 22:59:13.420125  543705 net.go:648] Add success.
I0322 22:59:13.422829  543705 net.go:770] primary dev: ETH0
I0322 22:59:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:59:13.422855  543705 net.go:698] Add success.
I0322 22:59:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 22:59:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 22:59:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0322 22:59:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0322 22:59:14.456584  543705 disk_worker.go:494] system disk:vda1
I0322 22:59:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 22:59:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 22:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:59:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:59:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 22:59:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0322 22:59:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:59:23.409766  543705 memory.go:184] no items to output this cycle
I0322 22:59:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 22:59:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:59:33.409778  543705 memory.go:184] no items to output this cycle
I0322 22:59:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 22:59:36.929672  543705 disk_info.go:125] begin check local disk info of client
I0322 22:59:36.932140  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 22:59:36.932147  543705 disk_info.go:196] parse disk info done, disk is : [0xc000540240 0xc000540280]
E0322 22:59:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:59:43.410782  543705 memory.go:191] Add success.
I0322 22:59:43.409798  543705 cpu.go:282] Add success.
I0322 22:59:43.420513  543705 net.go:648] Add success.
I0322 22:59:43.423400  543705 net.go:770] primary dev: ETH0
I0322 22:59:43.423413  543705 net.go:802] Send network stats successfully!,count is 6
I0322 22:59:43.423426  543705 net.go:698] Add success.
I0322 22:59:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 22:59:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 22:59:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 22:59:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 22:59:53.409801  543705 memory.go:184] no items to output this cycle
I0322 22:59:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 23:00:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:00:03.409785  543705 memory.go:184] no items to output this cycle
I0322 23:00:03.409804  543705 cpu.go:275] no items to output this cycle
E0322 23:00:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:00:13.409797  543705 memory.go:191] Add success.
I0322 23:00:13.409797  543705 cpu.go:282] Add success.
W0322 23:00:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:00:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:00:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:00:13.420141  543705 net.go:648] Add success.
I0322 23:00:13.423054  543705 net.go:770] primary dev: ETH0
I0322 23:00:13.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:00:13.423084  543705 net.go:698] Add success.
I0322 23:00:13.468756  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"75084b35-f6f0-4dde-8ca2-efc8cf2d1b19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:00:13.468789  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:00:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:00:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:00:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 23:00:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:00:14.456497  543705 disk_worker.go:494] system disk:vda1
I0322 23:00:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:00:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:00:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:00:23.410357  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:00:23.410372  543705 memory.go:184] no items to output this cycle
I0322 23:00:23.410383  543705 cpu.go:275] no items to output this cycle
E0322 23:00:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:00:33.409776  543705 memory.go:184] no items to output this cycle
I0322 23:00:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 23:00:36.933682  543705 disk_info.go:125] begin check local disk info of client
I0322 23:00:36.936286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:00:36.936294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8e00 0xc0003e8e40]
I0322 23:00:39.884114  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:00:39.884120  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:00:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:00:43.410617  543705 memory.go:191] Add success.
I0322 23:00:43.409820  543705 cpu.go:282] Add success.
I0322 23:00:43.420309  543705 net.go:648] Add success.
I0322 23:00:43.422793  543705 net.go:770] primary dev: ETH0
I0322 23:00:43.422807  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:00:43.422821  543705 net.go:698] Add success.
I0322 23:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:00:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:00:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:00:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:00:53.409779  543705 memory.go:184] no items to output this cycle
I0322 23:00:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 23:01:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:01:03.409893  543705 memory.go:184] no items to output this cycle
I0322 23:01:03.409982  543705 cpu.go:275] no items to output this cycle
E0322 23:01:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:01:13.409804  543705 memory.go:191] Add success.
I0322 23:01:13.409805  543705 cpu.go:282] Add success.
W0322 23:01:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:01:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:01:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:01:13.420139  543705 net.go:648] Add success.
I0322 23:01:13.422902  543705 net.go:770] primary dev: ETH0
I0322 23:01:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:01:13.422930  543705 net.go:698] Add success.
I0322 23:01:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:01:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:01:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0322 23:01:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:01:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 23:01:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:01:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:01:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:01:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:01:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:01:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:01:23.409778  543705 memory.go:184] no items to output this cycle
I0322 23:01:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 23:01:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:01:33.409806  543705 memory.go:184] no items to output this cycle
I0322 23:01:33.409817  543705 cpu.go:275] no items to output this cycle
I0322 23:01:36.937673  543705 disk_info.go:125] begin check local disk info of client
I0322 23:01:36.940211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:01:36.940218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e8100 0xc0004e8140]
E0322 23:01:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:01:43.410713  543705 memory.go:191] Add success.
I0322 23:01:43.409832  543705 cpu.go:282] Add success.
I0322 23:01:43.420481  543705 net.go:648] Add success.
I0322 23:01:43.423373  543705 net.go:770] primary dev: ETH0
I0322 23:01:43.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:01:43.423408  543705 net.go:698] Add success.
I0322 23:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:01:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:01:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:01:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:01:53.409792  543705 memory.go:184] no items to output this cycle
I0322 23:01:53.409795  543705 cpu.go:275] no items to output this cycle
E0322 23:02:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:02:03.409814  543705 memory.go:184] no items to output this cycle
I0322 23:02:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 23:02:13.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:02:13.409940  543705 memory.go:191] Add success.
W0322 23:02:13.409982  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:02:13.409995  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:02:13.409998  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:02:13.410074  543705 cpu.go:282] Add success.
I0322 23:02:13.419749  543705 net.go:648] Add success.
I0322 23:02:13.422541  543705 net.go:770] primary dev: ETH0
I0322 23:02:13.422555  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:02:13.422566  543705 net.go:698] Add success.
W0322 23:02:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:02:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0322 23:02:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:02:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:02:14.456938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:02:14.456943  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:02:14.457002  543705 disk_worker.go:494] system disk:vda1
I0322 23:02:14.457032  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:02:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:02:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:02:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:02:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:02:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:02:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:02:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:02:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:02:23.409776  543705 memory.go:184] no items to output this cycle
I0322 23:02:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 23:02:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:02:33.409796  543705 memory.go:184] no items to output this cycle
I0322 23:02:33.409807  543705 cpu.go:275] no items to output this cycle
I0322 23:02:36.941679  543705 disk_info.go:125] begin check local disk info of client
I0322 23:02:36.944275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:02:36.944283  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005085c0 0xc000508600]
E0322 23:02:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:02:43.410782  543705 memory.go:191] Add success.
I0322 23:02:43.409823  543705 cpu.go:282] Add success.
I0322 23:02:43.420503  543705 net.go:648] Add success.
I0322 23:02:43.423119  543705 net.go:770] primary dev: ETH0
I0322 23:02:43.423132  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:02:43.423146  543705 net.go:698] Add success.
I0322 23:02:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:02:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:02:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:02:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:02:53.409780  543705 memory.go:184] no items to output this cycle
I0322 23:02:53.409781  543705 cpu.go:275] no items to output this cycle
E0322 23:03:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:03:03.409785  543705 memory.go:184] no items to output this cycle
I0322 23:03:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 23:03:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:03:13.409800  543705 memory.go:191] Add success.
I0322 23:03:13.409800  543705 cpu.go:282] Add success.
W0322 23:03:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:03:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:03:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:03:13.420220  543705 net.go:648] Add success.
I0322 23:03:13.422946  543705 net.go:770] primary dev: ETH0
I0322 23:03:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:03:13.422977  543705 net.go:698] Add success.
I0322 23:03:13.468507  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8b3cd7c1-bf36-46a5-9844-7cdae1d05b1c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:03:13.468541  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:03:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:03:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:03:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0322 23:03:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:03:14.456545  543705 disk_worker.go:494] system disk:vda1
I0322 23:03:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:03:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:03:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:03:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:03:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:03:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:03:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:03:23.409796  543705 memory.go:184] no items to output this cycle
I0322 23:03:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 23:03:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:03:33.409764  543705 memory.go:184] no items to output this cycle
I0322 23:03:33.409796  543705 cpu.go:275] no items to output this cycle
I0322 23:03:36.945673  543705 disk_info.go:125] begin check local disk info of client
I0322 23:03:36.948160  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:03:36.948166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2580 0xc0003b25c0]
I0322 23:03:39.885731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:03:39.885738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:03:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:03:43.410652  543705 memory.go:191] Add success.
I0322 23:03:43.409810  543705 cpu.go:282] Add success.
I0322 23:03:43.420362  543705 net.go:648] Add success.
I0322 23:03:43.422845  543705 net.go:770] primary dev: ETH0
I0322 23:03:43.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:03:43.422875  543705 net.go:698] Add success.
I0322 23:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:03:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:03:53.410412  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:03:53.410431  543705 memory.go:184] no items to output this cycle
I0322 23:03:53.410447  543705 cpu.go:275] no items to output this cycle
E0322 23:04:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:04:03.409775  543705 memory.go:184] no items to output this cycle
I0322 23:04:03.409890  543705 cpu.go:275] no items to output this cycle
E0322 23:04:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:04:13.409798  543705 memory.go:191] Add success.
I0322 23:04:13.409797  543705 cpu.go:282] Add success.
W0322 23:04:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:04:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:04:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:04:13.420187  543705 net.go:648] Add success.
I0322 23:04:13.422950  543705 net.go:770] primary dev: ETH0
I0322 23:04:13.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:04:13.422987  543705 net.go:698] Add success.
I0322 23:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:04:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:04:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 23:04:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:04:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 23:04:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:04:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:04:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:04:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:04:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:04:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:04:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:04:23.409773  543705 cpu.go:275] no items to output this cycle
I0322 23:04:23.409783  543705 memory.go:184] no items to output this cycle
E0322 23:04:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:04:33.409806  543705 memory.go:184] no items to output this cycle
I0322 23:04:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 23:04:36.949682  543705 disk_info.go:125] begin check local disk info of client
I0322 23:04:36.952291  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:04:36.952299  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2740 0xc0003b2780]
E0322 23:04:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:04:43.410720  543705 memory.go:191] Add success.
I0322 23:04:43.409803  543705 cpu.go:282] Add success.
I0322 23:04:43.420495  543705 net.go:648] Add success.
I0322 23:04:43.423205  543705 net.go:770] primary dev: ETH0
I0322 23:04:43.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:04:43.423231  543705 net.go:698] Add success.
I0322 23:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:04:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:04:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:04:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:04:53.409802  543705 memory.go:184] no items to output this cycle
I0322 23:04:53.409812  543705 cpu.go:275] no items to output this cycle
E0322 23:05:03.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:05:03.409892  543705 memory.go:184] no items to output this cycle
I0322 23:05:03.409958  543705 cpu.go:275] no items to output this cycle
E0322 23:05:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:05:13.409776  543705 memory.go:191] Add success.
I0322 23:05:13.409798  543705 cpu.go:282] Add success.
W0322 23:05:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:05:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:05:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:05:13.420142  543705 net.go:648] Add success.
I0322 23:05:13.422848  543705 net.go:770] primary dev: ETH0
I0322 23:05:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:05:13.422878  543705 net.go:698] Add success.
I0322 23:05:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:05:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:05:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 23:05:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:05:14.456565  543705 disk_worker.go:494] system disk:vda1
I0322 23:05:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:05:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:05:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:05:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:05:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:05:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:05:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:05:23.409771  543705 memory.go:184] no items to output this cycle
I0322 23:05:23.409774  543705 cpu.go:275] no items to output this cycle
E0322 23:05:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:05:33.409801  543705 memory.go:184] no items to output this cycle
I0322 23:05:33.409812  543705 cpu.go:275] no items to output this cycle
I0322 23:05:36.953672  543705 disk_info.go:125] begin check local disk info of client
I0322 23:05:36.956205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:05:36.956211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b00 0xc0000c5b40]
E0322 23:05:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:05:43.410595  543705 memory.go:191] Add success.
I0322 23:05:43.409789  543705 cpu.go:282] Add success.
I0322 23:05:43.420306  543705 net.go:648] Add success.
I0322 23:05:43.422921  543705 net.go:770] primary dev: ETH0
I0322 23:05:43.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:05:43.422948  543705 net.go:698] Add success.
I0322 23:05:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:05:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:05:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:05:53.410290  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:05:53.410308  543705 memory.go:184] no items to output this cycle
I0322 23:05:53.410320  543705 cpu.go:275] no items to output this cycle
E0322 23:06:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:06:03.409786  543705 memory.go:184] no items to output this cycle
I0322 23:06:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 23:06:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:06:13.409787  543705 memory.go:191] Add success.
I0322 23:06:13.409788  543705 cpu.go:282] Add success.
W0322 23:06:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:06:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:06:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:06:13.419704  543705 net.go:648] Add success.
I0322 23:06:13.422457  543705 net.go:770] primary dev: ETH0
I0322 23:06:13.422470  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:06:13.422481  543705 net.go:698] Add success.
I0322 23:06:13.467921  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c15e12a1-05cb-42ec-9465-fc83cbf9f628","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:06:13.467952  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:06:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:06:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:06:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0322 23:06:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:06:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 23:06:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:06:15.455610  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:06:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:06:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:06:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:06:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:06:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:06:23.409770  543705 memory.go:184] no items to output this cycle
I0322 23:06:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 23:06:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:06:33.409771  543705 memory.go:184] no items to output this cycle
I0322 23:06:33.409790  543705 cpu.go:275] no items to output this cycle
I0322 23:06:36.957688  543705 disk_info.go:125] begin check local disk info of client
I0322 23:06:36.960889  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:06:36.960900  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e0d40 0xc0004e0d80]
I0322 23:06:39.888143  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:06:39.888151  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:06:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:06:43.410914  543705 memory.go:191] Add success.
I0322 23:06:43.409826  543705 cpu.go:282] Add success.
I0322 23:06:43.420628  543705 net.go:648] Add success.
I0322 23:06:43.423539  543705 net.go:770] primary dev: ETH0
I0322 23:06:43.423553  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:06:43.423565  543705 net.go:698] Add success.
I0322 23:06:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:06:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:06:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:06:53.409779  543705 cpu.go:275] no items to output this cycle
I0322 23:06:53.409785  543705 memory.go:184] no items to output this cycle
E0322 23:07:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:07:03.409784  543705 memory.go:184] no items to output this cycle
I0322 23:07:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 23:07:13.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:07:13.409935  543705 memory.go:191] Add success.
W0322 23:07:13.409970  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:07:13.409990  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:07:13.410000  543705 cpu.go:282] Add success.
I0322 23:07:13.410001  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:07:13.419708  543705 net.go:648] Add success.
I0322 23:07:13.422539  543705 net.go:770] primary dev: ETH0
I0322 23:07:13.422551  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:07:13.422563  543705 net.go:698] Add success.
I0322 23:07:13.453129  543705 event_worker.go:152] Polling the log file for events...
W0322 23:07:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:07:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 23:07:14.455190  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:07:14.455906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:07:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:07:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:07:14.456547  543705 disk_worker.go:494] system disk:vda1
I0322 23:07:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:07:15.456875  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:07:15.456885  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:07:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:07:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:07:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:07:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:07:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:07:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:07:23.409777  543705 memory.go:184] no items to output this cycle
I0322 23:07:23.409780  543705 cpu.go:275] no items to output this cycle
E0322 23:07:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:07:33.409798  543705 memory.go:184] no items to output this cycle
I0322 23:07:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 23:07:36.961674  543705 disk_info.go:125] begin check local disk info of client
I0322 23:07:36.964220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:07:36.964228  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352780 0xc0003527c0]
E0322 23:07:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:07:43.410633  543705 memory.go:191] Add success.
I0322 23:07:43.409828  543705 cpu.go:282] Add success.
I0322 23:07:43.420338  543705 net.go:648] Add success.
I0322 23:07:43.423034  543705 net.go:770] primary dev: ETH0
I0322 23:07:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:07:43.423064  543705 net.go:698] Add success.
I0322 23:07:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:07:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:07:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:07:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:07:53.409761  543705 memory.go:184] no items to output this cycle
I0322 23:07:53.409836  543705 cpu.go:275] no items to output this cycle
E0322 23:08:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:08:03.409774  543705 memory.go:184] no items to output this cycle
I0322 23:08:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 23:08:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:08:13.409793  543705 memory.go:191] Add success.
I0322 23:08:13.409811  543705 cpu.go:282] Add success.
W0322 23:08:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:08:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:08:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:08:13.420176  543705 net.go:648] Add success.
I0322 23:08:13.423037  543705 net.go:770] primary dev: ETH0
I0322 23:08:13.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:08:13.423062  543705 net.go:698] Add success.
I0322 23:08:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:08:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:08:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 23:08:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:08:14.456530  543705 disk_worker.go:494] system disk:vda1
I0322 23:08:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:08:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:08:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:08:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:08:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:08:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:08:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:08:23.409775  543705 memory.go:184] no items to output this cycle
I0322 23:08:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 23:08:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:08:33.409801  543705 memory.go:184] no items to output this cycle
I0322 23:08:33.409815  543705 cpu.go:275] no items to output this cycle
I0322 23:08:36.965682  543705 disk_info.go:125] begin check local disk info of client
I0322 23:08:36.967997  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:08:36.968005  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005086c0 0xc000508700]
E0322 23:08:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:08:43.410580  543705 memory.go:191] Add success.
I0322 23:08:43.409810  543705 cpu.go:282] Add success.
I0322 23:08:43.420348  543705 net.go:648] Add success.
I0322 23:08:43.422745  543705 net.go:770] primary dev: ETH0
I0322 23:08:43.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:08:43.422773  543705 net.go:698] Add success.
I0322 23:08:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:08:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:08:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:08:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:08:53.409788  543705 cpu.go:275] no items to output this cycle
I0322 23:08:53.409790  543705 memory.go:184] no items to output this cycle
E0322 23:09:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:09:03.409798  543705 memory.go:184] no items to output this cycle
I0322 23:09:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:09:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:09:13.409798  543705 memory.go:191] Add success.
I0322 23:09:13.409823  543705 cpu.go:282] Add success.
W0322 23:09:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:09:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:09:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:09:13.420652  543705 net.go:648] Add success.
I0322 23:09:13.423528  543705 net.go:770] primary dev: ETH0
I0322 23:09:13.423540  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:09:13.423552  543705 net.go:698] Add success.
I0322 23:09:13.469127  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"152ddfbd-67e8-4ec4-82e7-704565a69ce7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:09:13.469168  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:09:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:09:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:09:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0322 23:09:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:09:14.456667  543705 disk_worker.go:494] system disk:vda1
I0322 23:09:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:09:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:09:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:09:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:09:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:09:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:09:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:09:23.409773  543705 memory.go:184] no items to output this cycle
I0322 23:09:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 23:09:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:09:33.409794  543705 memory.go:184] no items to output this cycle
I0322 23:09:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 23:09:36.969676  543705 disk_info.go:125] begin check local disk info of client
I0322 23:09:36.972159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:09:36.972165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4400 0xc0004a4440]
I0322 23:09:39.889736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:09:39.889743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:09:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:09:43.410665  543705 memory.go:191] Add success.
I0322 23:09:43.409833  543705 cpu.go:282] Add success.
I0322 23:09:43.420371  543705 net.go:648] Add success.
I0322 23:09:43.423128  543705 net.go:770] primary dev: ETH0
I0322 23:09:43.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:09:43.423156  543705 net.go:698] Add success.
I0322 23:09:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:09:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:09:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:09:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:09:53.409814  543705 memory.go:184] no items to output this cycle
I0322 23:09:53.409818  543705 cpu.go:275] no items to output this cycle
E0322 23:10:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:10:03.409892  543705 cpu.go:275] no items to output this cycle
I0322 23:10:03.409913  543705 memory.go:184] no items to output this cycle
E0322 23:10:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:10:13.409796  543705 memory.go:191] Add success.
W0322 23:10:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 23:10:13.409822  543705 cpu.go:282] Add success.
W0322 23:10:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:10:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:10:13.420103  543705 net.go:648] Add success.
I0322 23:10:13.422564  543705 net.go:770] primary dev: ETH0
I0322 23:10:13.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:10:13.422589  543705 net.go:698] Add success.
I0322 23:10:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:10:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:10:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 23:10:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:10:14.456492  543705 disk_worker.go:494] system disk:vda1
I0322 23:10:14.456539  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:10:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:10:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:10:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:10:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:10:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:10:23.409771  543705 memory.go:184] no items to output this cycle
I0322 23:10:23.409783  543705 cpu.go:275] no items to output this cycle
E0322 23:10:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:10:33.409779  543705 memory.go:184] no items to output this cycle
I0322 23:10:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 23:10:36.973683  543705 disk_info.go:125] begin check local disk info of client
I0322 23:10:36.976165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:10:36.976173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2380 0xc0003b23c0]
E0322 23:10:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:10:43.410690  543705 memory.go:191] Add success.
I0322 23:10:43.409845  543705 cpu.go:282] Add success.
I0322 23:10:43.420389  543705 net.go:648] Add success.
I0322 23:10:43.423370  543705 net.go:770] primary dev: ETH0
I0322 23:10:43.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:10:43.423398  543705 net.go:698] Add success.
I0322 23:10:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:10:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:10:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:10:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:10:53.409785  543705 memory.go:184] no items to output this cycle
I0322 23:10:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 23:11:03.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:11:03.409920  543705 memory.go:184] no items to output this cycle
I0322 23:11:03.409921  543705 cpu.go:275] no items to output this cycle
E0322 23:11:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:11:13.409810  543705 memory.go:191] Add success.
I0322 23:11:13.409835  543705 cpu.go:282] Add success.
W0322 23:11:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:11:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:11:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:11:13.420314  543705 net.go:648] Add success.
I0322 23:11:13.425626  543705 net.go:770] primary dev: ETH0
I0322 23:11:13.425640  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:11:13.425669  543705 net.go:698] Add success.
I0322 23:11:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:11:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:11:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 23:11:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:11:14.456567  543705 disk_worker.go:494] system disk:vda1
I0322 23:11:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:11:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:11:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:11:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:11:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:11:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:11:23.409798  543705 memory.go:184] no items to output this cycle
I0322 23:11:23.409810  543705 cpu.go:275] no items to output this cycle
E0322 23:11:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:11:33.409771  543705 memory.go:184] no items to output this cycle
I0322 23:11:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 23:11:36.977677  543705 disk_info.go:125] begin check local disk info of client
I0322 23:11:36.980159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:11:36.980165  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521a40 0xc000521a80]
E0322 23:11:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:11:43.410584  543705 memory.go:191] Add success.
I0322 23:11:43.409810  543705 cpu.go:282] Add success.
I0322 23:11:43.420326  543705 net.go:648] Add success.
I0322 23:11:43.423034  543705 net.go:770] primary dev: ETH0
I0322 23:11:43.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:11:43.423060  543705 net.go:698] Add success.
I0322 23:11:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:11:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:11:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:11:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:11:53.410259  543705 memory.go:184] no items to output this cycle
I0322 23:11:53.410261  543705 cpu.go:275] no items to output this cycle
E0322 23:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:12:03.409777  543705 memory.go:184] no items to output this cycle
I0322 23:12:03.409804  543705 cpu.go:275] no items to output this cycle
W0322 23:12:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:12:13.409731  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:12:13.409736  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 23:12:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:12:13.409816  543705 cpu.go:282] Add success.
I0322 23:12:13.409831  543705 memory.go:191] Add success.
I0322 23:12:13.420163  543705 net.go:648] Add success.
I0322 23:12:13.422742  543705 net.go:770] primary dev: ETH0
I0322 23:12:13.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:12:13.422767  543705 net.go:698] Add success.
I0322 23:12:13.517742  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1a8458d1-db14-4ca7-882c-cddeff5ce89f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:12:13.517776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 23:12:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:12:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0322 23:12:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:12:14.456835  543705 disk_worker.go:494] system disk:vda1
E0322 23:12:14.456857  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:12:14.456865  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:12:14.456870  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:12:14.456889  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:12:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:12:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:12:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:12:16.457996  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:12:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:12:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:12:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:12:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:12:23.409796  543705 memory.go:184] no items to output this cycle
I0322 23:12:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 23:12:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:12:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 23:12:33.409791  543705 memory.go:184] no items to output this cycle
I0322 23:12:36.981676  543705 disk_info.go:125] begin check local disk info of client
I0322 23:12:36.984089  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:12:36.984098  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508cc0 0xc000508d00]
I0322 23:12:39.892172  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:12:39.892179  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:12:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:12:43.410734  543705 memory.go:191] Add success.
I0322 23:12:43.409822  543705 cpu.go:282] Add success.
I0322 23:12:43.420440  543705 net.go:648] Add success.
I0322 23:12:43.422967  543705 net.go:770] primary dev: ETH0
I0322 23:12:43.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:12:43.422996  543705 net.go:698] Add success.
I0322 23:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:12:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:12:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:12:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:12:53.409785  543705 memory.go:184] no items to output this cycle
I0322 23:12:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:13:03.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:13:03.409883  543705 memory.go:184] no items to output this cycle
I0322 23:13:03.409926  543705 cpu.go:275] no items to output this cycle
E0322 23:13:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:13:13.409824  543705 memory.go:191] Add success.
I0322 23:13:13.409845  543705 cpu.go:282] Add success.
W0322 23:13:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:13:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:13:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:13:13.420213  543705 net.go:648] Add success.
I0322 23:13:13.423163  543705 net.go:770] primary dev: ETH0
I0322 23:13:13.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:13:13.423188  543705 net.go:698] Add success.
I0322 23:13:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:13:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:13:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 23:13:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:13:14.456625  543705 disk_worker.go:494] system disk:vda1
I0322 23:13:14.456661  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:13:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:13:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:13:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:13:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:13:23.409781  543705 memory.go:184] no items to output this cycle
I0322 23:13:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 23:13:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:13:33.409807  543705 memory.go:184] no items to output this cycle
I0322 23:13:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 23:13:36.985672  543705 disk_info.go:125] begin check local disk info of client
I0322 23:13:36.988201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:13:36.988208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b300 0xc00007b340]
E0322 23:13:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:13:43.410592  543705 memory.go:191] Add success.
I0322 23:13:43.409793  543705 cpu.go:282] Add success.
I0322 23:13:43.420289  543705 net.go:648] Add success.
I0322 23:13:43.423036  543705 net.go:770] primary dev: ETH0
I0322 23:13:43.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:13:43.423062  543705 net.go:698] Add success.
I0322 23:13:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:13:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:13:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:13:53.409784  543705 memory.go:184] no items to output this cycle
I0322 23:13:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 23:14:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:14:03.409788  543705 memory.go:184] no items to output this cycle
I0322 23:14:03.409795  543705 cpu.go:275] no items to output this cycle
W0322 23:14:13.409715  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:14:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:14:13.409746  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:14:13.409839  543705 cpu.go:282] Add success.
E0322 23:14:13.409842  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:14:13.409859  543705 memory.go:191] Add success.
I0322 23:14:13.420048  543705 net.go:648] Add success.
I0322 23:14:13.422919  543705 net.go:770] primary dev: ETH0
I0322 23:14:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:14:13.422943  543705 net.go:698] Add success.
I0322 23:14:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:14:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:14:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0322 23:14:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:14:14.456562  543705 disk_worker.go:494] system disk:vda1
I0322 23:14:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:14:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:14:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:14:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:14:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:14:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:14:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:14:23.409780  543705 memory.go:184] no items to output this cycle
I0322 23:14:23.409782  543705 cpu.go:275] no items to output this cycle
E0322 23:14:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:14:33.409800  543705 memory.go:184] no items to output this cycle
I0322 23:14:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 23:14:36.989672  543705 disk_info.go:125] begin check local disk info of client
I0322 23:14:36.992216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:14:36.992223  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2900 0xc0003e2940]
E0322 23:14:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:14:43.410688  543705 memory.go:191] Add success.
I0322 23:14:43.409811  543705 cpu.go:282] Add success.
I0322 23:14:43.420411  543705 net.go:648] Add success.
I0322 23:14:43.422808  543705 net.go:770] primary dev: ETH0
I0322 23:14:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:14:43.422835  543705 net.go:698] Add success.
I0322 23:14:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:14:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:14:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:14:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:14:53.409806  543705 memory.go:184] no items to output this cycle
I0322 23:14:53.409815  543705 cpu.go:275] no items to output this cycle
E0322 23:15:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:15:03.409785  543705 memory.go:184] no items to output this cycle
I0322 23:15:03.409790  543705 cpu.go:275] no items to output this cycle
E0322 23:15:13.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:15:13.409940  543705 cpu.go:282] Add success.
I0322 23:15:13.410026  543705 memory.go:191] Add success.
W0322 23:15:13.410056  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:15:13.410072  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:15:13.410076  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:15:13.419712  543705 net.go:648] Add success.
I0322 23:15:13.422362  543705 net.go:770] primary dev: ETH0
I0322 23:15:13.422377  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:15:13.422390  543705 net.go:698] Add success.
I0322 23:15:13.486460  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d8c1ba08-8d7f-4421-924e-14dcfeb18bbc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:15:13.486489  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:15:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:15:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:15:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0322 23:15:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:15:14.456524  543705 disk_worker.go:494] system disk:vda1
I0322 23:15:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:15:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:15:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:15:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:15:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:15:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:15:23.409774  543705 memory.go:184] no items to output this cycle
I0322 23:15:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 23:15:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:15:33.409786  543705 memory.go:184] no items to output this cycle
I0322 23:15:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 23:15:36.993676  543705 disk_info.go:125] begin check local disk info of client
I0322 23:15:36.996132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:15:36.996138  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509b40 0xc000509b80]
I0322 23:15:39.893726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:15:39.893732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:15:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:15:43.410565  543705 memory.go:191] Add success.
I0322 23:15:43.409809  543705 cpu.go:282] Add success.
I0322 23:15:43.420341  543705 net.go:648] Add success.
I0322 23:15:43.423731  543705 net.go:770] primary dev: ETH0
I0322 23:15:43.423744  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:15:43.423756  543705 net.go:698] Add success.
I0322 23:15:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:15:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:15:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:15:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:15:53.409779  543705 memory.go:184] no items to output this cycle
I0322 23:15:53.409786  543705 cpu.go:275] no items to output this cycle
E0322 23:16:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:16:03.409807  543705 memory.go:184] no items to output this cycle
I0322 23:16:03.409822  543705 cpu.go:275] no items to output this cycle
E0322 23:16:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:16:13.409866  543705 memory.go:191] Add success.
W0322 23:16:13.409898  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:16:13.409913  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:16:13.409917  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:16:13.409948  543705 cpu.go:282] Add success.
I0322 23:16:13.419710  543705 net.go:648] Add success.
I0322 23:16:13.422576  543705 net.go:770] primary dev: ETH0
I0322 23:16:13.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:16:13.422600  543705 net.go:698] Add success.
I0322 23:16:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:16:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:16:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 23:16:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:16:14.456588  543705 disk_worker.go:494] system disk:vda1
I0322 23:16:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:16:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:16:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:16:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:16:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:16:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:16:23.409797  543705 memory.go:184] no items to output this cycle
I0322 23:16:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 23:16:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:16:33.409771  543705 memory.go:184] no items to output this cycle
I0322 23:16:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 23:16:36.997675  543705 disk_info.go:125] begin check local disk info of client
I0322 23:16:37.000181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:16:37.000187  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa9c0 0xc0001aaa00]
E0322 23:16:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:16:43.411131  543705 memory.go:191] Add success.
I0322 23:16:43.409809  543705 cpu.go:282] Add success.
I0322 23:16:43.419842  543705 net.go:648] Add success.
I0322 23:16:43.423210  543705 net.go:770] primary dev: ETH0
I0322 23:16:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:16:43.423242  543705 net.go:698] Add success.
I0322 23:16:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:16:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:16:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:16:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:16:53.409800  543705 memory.go:184] no items to output this cycle
I0322 23:16:53.409811  543705 cpu.go:275] no items to output this cycle
E0322 23:17:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:17:03.409804  543705 memory.go:184] no items to output this cycle
I0322 23:17:03.409816  543705 cpu.go:275] no items to output this cycle
E0322 23:17:13.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:17:13.409772  543705 memory.go:191] Add success.
W0322 23:17:13.409797  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:17:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:17:13.409810  543705 cpu.go:282] Add success.
I0322 23:17:13.409813  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:17:13.419735  543705 net.go:648] Add success.
I0322 23:17:13.422693  543705 net.go:770] primary dev: ETH0
I0322 23:17:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:17:13.422726  543705 net.go:698] Add success.
I0322 23:17:13.453354  543705 event_worker.go:152] Polling the log file for events...
W0322 23:17:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:17:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0322 23:17:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:17:14.456793  543705 disk_worker.go:494] system disk:vda1
I0322 23:17:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:17:14.457116  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:17:14.457123  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:17:14.457127  543705 custom_config.go:64] query custom config with name: gpu
E0322 23:17:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:17:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:17:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:17:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:17:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:17:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:17:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:17:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:17:23.409773  543705 memory.go:184] no items to output this cycle
I0322 23:17:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 23:17:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:17:33.409767  543705 memory.go:184] no items to output this cycle
I0322 23:17:33.409794  543705 cpu.go:275] no items to output this cycle
I0322 23:17:37.001675  543705 disk_info.go:125] begin check local disk info of client
I0322 23:17:37.004152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:17:37.004158  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a40 0xc0000c4a80]
E0322 23:17:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:17:43.410551  543705 memory.go:191] Add success.
I0322 23:17:43.409795  543705 cpu.go:282] Add success.
I0322 23:17:43.420373  543705 net.go:648] Add success.
I0322 23:17:43.422854  543705 net.go:770] primary dev: ETH0
I0322 23:17:43.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:17:43.422884  543705 net.go:698] Add success.
I0322 23:17:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:17:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:17:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:17:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:17:53.409783  543705 cpu.go:275] no items to output this cycle
I0322 23:17:53.409789  543705 memory.go:184] no items to output this cycle
E0322 23:18:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:18:03.409773  543705 memory.go:184] no items to output this cycle
I0322 23:18:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 23:18:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:18:13.409919  543705 memory.go:191] Add success.
I0322 23:18:13.409927  543705 cpu.go:282] Add success.
W0322 23:18:13.409951  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:18:13.409965  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:18:13.409969  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:18:13.419738  543705 net.go:648] Add success.
I0322 23:18:13.422456  543705 net.go:770] primary dev: ETH0
I0322 23:18:13.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:18:13.422485  543705 net.go:698] Add success.
I0322 23:18:13.468371  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8cd74f0c-a40d-4547-a1e4-a85d964edae8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:18:13.468404  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:18:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:18:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:18:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 23:18:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:18:14.456614  543705 disk_worker.go:494] system disk:vda1
I0322 23:18:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:18:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:18:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:18:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:18:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:18:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:18:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:18:23.409805  543705 memory.go:184] no items to output this cycle
I0322 23:18:23.409821  543705 cpu.go:275] no items to output this cycle
E0322 23:18:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:18:33.409778  543705 memory.go:184] no items to output this cycle
I0322 23:18:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 23:18:37.005674  543705 disk_info.go:125] begin check local disk info of client
I0322 23:18:37.008197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:18:37.008203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049f300 0xc00049f340]
I0322 23:18:39.896182  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:18:39.896190  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:18:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:18:43.410672  543705 memory.go:191] Add success.
I0322 23:18:43.409807  543705 cpu.go:282] Add success.
I0322 23:18:43.420332  543705 net.go:648] Add success.
I0322 23:18:43.422729  543705 net.go:770] primary dev: ETH0
I0322 23:18:43.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:18:43.422757  543705 net.go:698] Add success.
I0322 23:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:18:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:18:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:18:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:18:53.409819  543705 memory.go:184] no items to output this cycle
I0322 23:18:53.409834  543705 cpu.go:275] no items to output this cycle
E0322 23:19:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:19:03.409816  543705 memory.go:184] no items to output this cycle
I0322 23:19:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 23:19:13.409844  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:19:13.409872  543705 memory.go:191] Add success.
W0322 23:19:13.409901  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:19:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:19:13.409921  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:19:13.409930  543705 cpu.go:282] Add success.
I0322 23:19:13.419755  543705 net.go:648] Add success.
I0322 23:19:13.422199  543705 net.go:770] primary dev: ETH0
I0322 23:19:13.422215  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:19:13.422228  543705 net.go:698] Add success.
I0322 23:19:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:19:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:19:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0322 23:19:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:19:14.456481  543705 disk_worker.go:494] system disk:vda1
I0322 23:19:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:19:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:19:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:19:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:19:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:19:23.409785  543705 cpu.go:275] no items to output this cycle
I0322 23:19:23.409794  543705 memory.go:184] no items to output this cycle
E0322 23:19:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:19:33.409805  543705 memory.go:184] no items to output this cycle
I0322 23:19:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 23:19:37.009676  543705 disk_info.go:125] begin check local disk info of client
I0322 23:19:37.012221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:19:37.012230  543705 disk_info.go:196] parse disk info done, disk is : [0xc000355440 0xc000355480]
E0322 23:19:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:19:43.410581  543705 memory.go:191] Add success.
I0322 23:19:43.409832  543705 cpu.go:282] Add success.
I0322 23:19:43.420286  543705 net.go:648] Add success.
I0322 23:19:43.422942  543705 net.go:770] primary dev: ETH0
I0322 23:19:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:19:43.422967  543705 net.go:698] Add success.
I0322 23:19:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:19:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:19:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:19:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:19:53.409815  543705 memory.go:184] no items to output this cycle
I0322 23:19:53.409826  543705 cpu.go:275] no items to output this cycle
E0322 23:20:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:20:03.409771  543705 memory.go:184] no items to output this cycle
I0322 23:20:03.409796  543705 cpu.go:275] no items to output this cycle
E0322 23:20:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:20:13.409781  543705 memory.go:191] Add success.
I0322 23:20:13.409801  543705 cpu.go:282] Add success.
W0322 23:20:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:20:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:20:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:20:13.419735  543705 net.go:648] Add success.
I0322 23:20:13.422299  543705 net.go:770] primary dev: ETH0
I0322 23:20:13.422313  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:20:13.422324  543705 net.go:698] Add success.
I0322 23:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:20:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:20:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0322 23:20:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:20:14.456496  543705 disk_worker.go:494] system disk:vda1
I0322 23:20:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:20:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:20:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:20:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:20:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:20:23.409793  543705 memory.go:184] no items to output this cycle
I0322 23:20:23.409805  543705 cpu.go:275] no items to output this cycle
E0322 23:20:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:20:33.409780  543705 memory.go:184] no items to output this cycle
I0322 23:20:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 23:20:37.013673  543705 disk_info.go:125] begin check local disk info of client
I0322 23:20:37.016194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:20:37.016200  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504740 0xc000504780]
E0322 23:20:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:20:43.410565  543705 memory.go:191] Add success.
I0322 23:20:43.409800  543705 cpu.go:282] Add success.
I0322 23:20:43.420322  543705 net.go:648] Add success.
I0322 23:20:43.422765  543705 net.go:770] primary dev: ETH0
I0322 23:20:43.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:20:43.422793  543705 net.go:698] Add success.
I0322 23:20:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:20:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:20:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:20:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:20:53.409780  543705 memory.go:184] no items to output this cycle
I0322 23:20:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 23:21:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:21:03.409783  543705 memory.go:184] no items to output this cycle
I0322 23:21:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 23:21:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:21:13.409816  543705 memory.go:191] Add success.
I0322 23:21:13.409819  543705 cpu.go:282] Add success.
W0322 23:21:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:21:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:21:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:21:13.420162  543705 net.go:648] Add success.
I0322 23:21:13.422976  543705 net.go:770] primary dev: ETH0
I0322 23:21:13.422991  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:21:13.423160  543705 net.go:698] Add success.
I0322 23:21:13.468124  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84a92cc2-84a0-421c-b211-0b7330e5a477","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:21:13.468155  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:21:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:21:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:21:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0322 23:21:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:21:14.456501  543705 disk_worker.go:494] system disk:vda1
I0322 23:21:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:21:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:21:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:21:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:21:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:21:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:21:23.409770  543705 memory.go:184] no items to output this cycle
I0322 23:21:23.409798  543705 cpu.go:275] no items to output this cycle
E0322 23:21:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:21:33.409798  543705 memory.go:184] no items to output this cycle
I0322 23:21:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 23:21:37.017673  543705 disk_info.go:125] begin check local disk info of client
I0322 23:21:37.020273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:21:37.020279  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8680 0xc0004d86c0]
I0322 23:21:39.897733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:21:39.897740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:21:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:21:43.410530  543705 memory.go:191] Add success.
I0322 23:21:43.409806  543705 cpu.go:282] Add success.
I0322 23:21:43.420267  543705 net.go:648] Add success.
I0322 23:21:43.422843  543705 net.go:770] primary dev: ETH0
I0322 23:21:43.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:21:43.422869  543705 net.go:698] Add success.
I0322 23:21:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:21:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:21:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:21:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:21:53.409796  543705 memory.go:184] no items to output this cycle
I0322 23:21:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 23:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:22:03.409782  543705 memory.go:184] no items to output this cycle
I0322 23:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 23:22:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:22:13.409796  543705 memory.go:191] Add success.
I0322 23:22:13.409796  543705 cpu.go:282] Add success.
W0322 23:22:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:22:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:22:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:22:13.420357  543705 net.go:648] Add success.
I0322 23:22:13.422998  543705 net.go:770] primary dev: ETH0
I0322 23:22:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:22:13.423035  543705 net.go:698] Add success.
W0322 23:22:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:22:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0322 23:22:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:22:14.456943  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:22:14.456952  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:22:14.456958  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:22:14.457003  543705 disk_worker.go:494] system disk:vda1
I0322 23:22:14.457055  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:22:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:22:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:22:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:22:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:22:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:22:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:22:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:22:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:22:23.409777  543705 memory.go:184] no items to output this cycle
I0322 23:22:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 23:22:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:22:33.409778  543705 memory.go:184] no items to output this cycle
I0322 23:22:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 23:22:37.021674  543705 disk_info.go:125] begin check local disk info of client
I0322 23:22:37.024186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:22:37.024192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002af1c0 0xc0002af200]
E0322 23:22:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:22:43.410546  543705 memory.go:191] Add success.
I0322 23:22:43.409813  543705 cpu.go:282] Add success.
I0322 23:22:43.420257  543705 net.go:648] Add success.
I0322 23:22:43.423118  543705 net.go:770] primary dev: ETH0
I0322 23:22:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:22:43.423144  543705 net.go:698] Add success.
I0322 23:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:22:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:22:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:22:53.410400  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:22:53.410418  543705 memory.go:184] no items to output this cycle
I0322 23:22:53.410428  543705 cpu.go:275] no items to output this cycle
E0322 23:23:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:23:03.409778  543705 memory.go:184] no items to output this cycle
I0322 23:23:03.409809  543705 cpu.go:275] no items to output this cycle
E0322 23:23:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:23:13.409809  543705 memory.go:191] Add success.
I0322 23:23:13.409824  543705 cpu.go:282] Add success.
W0322 23:23:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:23:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:23:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:23:13.420270  543705 net.go:648] Add success.
I0322 23:23:13.422969  543705 net.go:770] primary dev: ETH0
I0322 23:23:13.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:23:13.422993  543705 net.go:698] Add success.
I0322 23:23:14.454946  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:23:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:23:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0322 23:23:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:23:14.456579  543705 disk_worker.go:494] system disk:vda1
I0322 23:23:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:23:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:23:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:23:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:23:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:23:23.409777  543705 memory.go:184] no items to output this cycle
I0322 23:23:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 23:23:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:23:33.409778  543705 memory.go:184] no items to output this cycle
I0322 23:23:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 23:23:37.025671  543705 disk_info.go:125] begin check local disk info of client
I0322 23:23:37.028144  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:23:37.028151  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504880 0xc0005048c0]
E0322 23:23:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:23:43.410797  543705 memory.go:191] Add success.
I0322 23:23:43.409823  543705 cpu.go:282] Add success.
I0322 23:23:43.420538  543705 net.go:648] Add success.
I0322 23:23:43.423461  543705 net.go:770] primary dev: ETH0
I0322 23:23:43.423481  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:23:43.423511  543705 net.go:698] Add success.
I0322 23:23:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:23:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:23:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:23:53.409766  543705 memory.go:184] no items to output this cycle
I0322 23:23:53.409796  543705 cpu.go:275] no items to output this cycle
E0322 23:24:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:24:03.409780  543705 memory.go:184] no items to output this cycle
I0322 23:24:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:24:13.409813  543705 memory.go:191] Add success.
I0322 23:24:13.409818  543705 cpu.go:282] Add success.
W0322 23:24:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:24:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:24:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:24:13.420146  543705 net.go:648] Add success.
I0322 23:24:13.423018  543705 net.go:770] primary dev: ETH0
I0322 23:24:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:24:13.423042  543705 net.go:698] Add success.
I0322 23:24:13.462875  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cff9d6ea-c58f-4ba4-8030-95531c31c559","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:24:13.462906  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:24:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:24:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:24:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0322 23:24:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:24:14.456607  543705 disk_worker.go:494] system disk:vda1
I0322 23:24:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:24:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:24:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:24:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:24:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:24:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:24:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:24:23.409790  543705 memory.go:184] no items to output this cycle
I0322 23:24:23.409803  543705 cpu.go:275] no items to output this cycle
E0322 23:24:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:24:33.409767  543705 memory.go:184] no items to output this cycle
I0322 23:24:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 23:24:37.029673  543705 disk_info.go:125] begin check local disk info of client
I0322 23:24:37.032187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:24:37.032194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d8dc0 0xc0003d8e00]
I0322 23:24:39.900205  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:24:39.900212  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:24:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:24:43.410707  543705 memory.go:191] Add success.
I0322 23:24:43.409834  543705 cpu.go:282] Add success.
I0322 23:24:43.420459  543705 net.go:648] Add success.
I0322 23:24:43.422973  543705 net.go:770] primary dev: ETH0
I0322 23:24:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:24:43.423005  543705 net.go:698] Add success.
I0322 23:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:24:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:24:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:24:53.409777  543705 memory.go:184] no items to output this cycle
I0322 23:24:53.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:25:03.409808  543705 memory.go:184] no items to output this cycle
I0322 23:25:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 23:25:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:25:13.409816  543705 memory.go:191] Add success.
I0322 23:25:13.409827  543705 cpu.go:282] Add success.
W0322 23:25:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:25:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:25:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:25:13.420270  543705 net.go:648] Add success.
I0322 23:25:13.422787  543705 net.go:770] primary dev: ETH0
I0322 23:25:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:25:13.422811  543705 net.go:698] Add success.
I0322 23:25:14.453953  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:25:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:25:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0322 23:25:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:25:14.456619  543705 disk_worker.go:494] system disk:vda1
I0322 23:25:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:25:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:25:16.458025  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:25:16.458089  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:25:16.458111  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:25:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:25:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:25:23.409773  543705 memory.go:184] no items to output this cycle
I0322 23:25:23.409799  543705 cpu.go:275] no items to output this cycle
E0322 23:25:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:25:33.409783  543705 memory.go:184] no items to output this cycle
I0322 23:25:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 23:25:37.033688  543705 disk_info.go:125] begin check local disk info of client
I0322 23:25:37.036124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:25:37.036131  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf00 0xc0001aaf40]
E0322 23:25:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:25:43.410638  543705 memory.go:191] Add success.
I0322 23:25:43.409838  543705 cpu.go:282] Add success.
I0322 23:25:43.420583  543705 net.go:648] Add success.
I0322 23:25:43.422950  543705 net.go:770] primary dev: ETH0
I0322 23:25:43.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:25:43.422975  543705 net.go:698] Add success.
I0322 23:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:25:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:25:53.409785  543705 memory.go:184] no items to output this cycle
I0322 23:25:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 23:26:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:26:03.409795  543705 memory.go:184] no items to output this cycle
I0322 23:26:03.409811  543705 cpu.go:275] no items to output this cycle
W0322 23:26:13.409707  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:26:13.409739  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:26:13.409745  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0322 23:26:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:26:13.409848  543705 cpu.go:282] Add success.
I0322 23:26:13.409856  543705 memory.go:191] Add success.
I0322 23:26:13.420117  543705 net.go:648] Add success.
I0322 23:26:13.422688  543705 net.go:770] primary dev: ETH0
I0322 23:26:13.422703  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:26:13.422725  543705 net.go:698] Add success.
I0322 23:26:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:26:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:26:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 23:26:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:26:14.456697  543705 disk_worker.go:494] system disk:vda1
I0322 23:26:14.456749  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:26:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:26:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:26:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:26:16.472436  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:26:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:26:23.409778  543705 memory.go:184] no items to output this cycle
I0322 23:26:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 23:26:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:26:33.409792  543705 memory.go:184] no items to output this cycle
I0322 23:26:33.409805  543705 cpu.go:275] no items to output this cycle
I0322 23:26:37.037678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:26:37.040151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:26:37.040157  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052a540 0xc00052a580]
E0322 23:26:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:26:43.410590  543705 memory.go:191] Add success.
I0322 23:26:43.409845  543705 cpu.go:282] Add success.
I0322 23:26:43.420133  543705 net.go:770] primary dev: ETH0
I0322 23:26:43.420146  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:26:43.420159  543705 net.go:698] Add success.
I0322 23:26:43.420532  543705 net.go:648] Add success.
I0322 23:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:26:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:26:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:26:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:26:53.409784  543705 memory.go:184] no items to output this cycle
I0322 23:26:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 23:27:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:27:03.409818  543705 memory.go:184] no items to output this cycle
I0322 23:27:03.409834  543705 cpu.go:275] no items to output this cycle
E0322 23:27:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:27:13.409820  543705 memory.go:191] Add success.
I0322 23:27:13.409826  543705 cpu.go:282] Add success.
W0322 23:27:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:27:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:27:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:27:13.425911  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 23:27:13.426166  543705 net.go:648] Add success.
I0322 23:27:13.428857  543705 net.go:770] primary dev: ETH0
I0322 23:27:13.428870  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:27:13.428882  543705 net.go:698] Add success.
I0322 23:27:13.453388  543705 event_worker.go:152] Polling the log file for events...
I0322 23:27:13.463451  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a02cabd9-eeb6-466c-b407-75f49db5e5fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:27:13.463487  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 23:27:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:27:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0322 23:27:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:27:14.456452  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:27:14.456461  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:27:14.456466  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:27:14.457418  543705 disk_worker.go:494] system disk:vda1
I0322 23:27:14.457445  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:27:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:27:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:27:16.457908  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:27:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:27:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:27:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:27:16.472301  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:27:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:27:23.409774  543705 memory.go:184] no items to output this cycle
I0322 23:27:23.409793  543705 cpu.go:275] no items to output this cycle
E0322 23:27:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:27:33.409806  543705 memory.go:184] no items to output this cycle
I0322 23:27:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 23:27:37.041675  543705 disk_info.go:125] begin check local disk info of client
I0322 23:27:37.044197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:27:37.044203  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469400 0xc000469440]
I0322 23:27:39.901731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:27:39.901738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:27:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:27:43.410494  543705 memory.go:191] Add success.
I0322 23:27:43.409824  543705 cpu.go:282] Add success.
I0322 23:27:43.420559  543705 net.go:648] Add success.
I0322 23:27:43.422979  543705 net.go:770] primary dev: ETH0
I0322 23:27:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:27:43.423010  543705 net.go:698] Add success.
I0322 23:27:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:27:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:27:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:27:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:27:53.409781  543705 cpu.go:275] no items to output this cycle
I0322 23:27:53.409783  543705 memory.go:184] no items to output this cycle
E0322 23:28:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:28:03.409788  543705 memory.go:184] no items to output this cycle
I0322 23:28:03.409789  543705 cpu.go:275] no items to output this cycle
W0322 23:28:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:28:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:28:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:28:13.409796  543705 cpu.go:282] Add success.
E0322 23:28:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:28:13.409816  543705 memory.go:191] Add success.
I0322 23:28:13.420057  543705 net.go:648] Add success.
I0322 23:28:13.422808  543705 net.go:770] primary dev: ETH0
I0322 23:28:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:28:13.422851  543705 net.go:698] Add success.
I0322 23:28:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:28:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:28:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0322 23:28:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:28:14.456828  543705 disk_worker.go:494] system disk:vda1
I0322 23:28:14.456856  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:28:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:28:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:28:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:28:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:28:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:28:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:28:23.409772  543705 memory.go:184] no items to output this cycle
I0322 23:28:23.409789  543705 cpu.go:275] no items to output this cycle
E0322 23:28:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:28:33.409776  543705 memory.go:184] no items to output this cycle
I0322 23:28:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 23:28:37.045677  543705 disk_info.go:125] begin check local disk info of client
I0322 23:28:37.048236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:28:37.048242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e9800 0xc0004e9840]
E0322 23:28:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:28:43.410655  543705 memory.go:191] Add success.
I0322 23:28:43.409805  543705 cpu.go:282] Add success.
I0322 23:28:43.420379  543705 net.go:648] Add success.
I0322 23:28:43.423034  543705 net.go:770] primary dev: ETH0
I0322 23:28:43.423047  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:28:43.423059  543705 net.go:698] Add success.
I0322 23:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:28:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:28:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:28:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:28:53.409778  543705 memory.go:184] no items to output this cycle
I0322 23:28:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 23:29:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:29:03.409808  543705 memory.go:184] no items to output this cycle
I0322 23:29:03.409821  543705 cpu.go:275] no items to output this cycle
E0322 23:29:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:29:13.409789  543705 memory.go:191] Add success.
I0322 23:29:13.409790  543705 cpu.go:282] Add success.
W0322 23:29:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:29:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:29:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:29:13.420117  543705 net.go:648] Add success.
I0322 23:29:13.422842  543705 net.go:770] primary dev: ETH0
I0322 23:29:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:29:13.422871  543705 net.go:698] Add success.
I0322 23:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:29:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:29:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 23:29:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:29:14.456522  543705 disk_worker.go:494] system disk:vda1
I0322 23:29:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:29:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:29:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:29:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:29:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:29:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:29:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:29:23.409798  543705 memory.go:184] no items to output this cycle
I0322 23:29:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 23:29:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:29:33.409781  543705 memory.go:184] no items to output this cycle
I0322 23:29:33.409798  543705 cpu.go:275] no items to output this cycle
I0322 23:29:37.049677  543705 disk_info.go:125] begin check local disk info of client
I0322 23:29:37.052229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:29:37.052235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000279400 0xc000279440]
E0322 23:29:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:29:43.410574  543705 memory.go:191] Add success.
I0322 23:29:43.409800  543705 cpu.go:282] Add success.
I0322 23:29:43.420287  543705 net.go:648] Add success.
I0322 23:29:43.422765  543705 net.go:770] primary dev: ETH0
I0322 23:29:43.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:29:43.422790  543705 net.go:698] Add success.
I0322 23:29:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:29:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:29:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:29:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:29:53.409785  543705 memory.go:184] no items to output this cycle
I0322 23:29:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 23:30:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:30:03.409808  543705 memory.go:184] no items to output this cycle
I0322 23:30:03.409820  543705 cpu.go:275] no items to output this cycle
E0322 23:30:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:30:13.409778  543705 memory.go:191] Add success.
W0322 23:30:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 23:30:13.409809  543705 cpu.go:282] Add success.
W0322 23:30:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:30:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:30:13.420079  543705 net.go:648] Add success.
I0322 23:30:13.422536  543705 net.go:770] primary dev: ETH0
I0322 23:30:13.422550  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:30:13.422563  543705 net.go:698] Add success.
I0322 23:30:13.468802  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"10c21e6c-b47e-4d1b-bf06-84a5583459b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:30:13.468843  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:30:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:30:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:30:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 23:30:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:30:14.456840  543705 disk_worker.go:494] system disk:vda1
I0322 23:30:14.456869  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:30:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:30:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:30:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:30:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:30:23.409776  543705 memory.go:184] no items to output this cycle
I0322 23:30:23.409778  543705 cpu.go:275] no items to output this cycle
E0322 23:30:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:30:33.409787  543705 memory.go:184] no items to output this cycle
I0322 23:30:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 23:30:37.053672  543705 disk_info.go:125] begin check local disk info of client
I0322 23:30:37.056203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:30:37.056209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005058c0 0xc000505900]
I0322 23:30:39.904212  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:30:39.904220  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:30:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:30:43.410712  543705 memory.go:191] Add success.
I0322 23:30:43.409820  543705 cpu.go:282] Add success.
I0322 23:30:43.420411  543705 net.go:648] Add success.
I0322 23:30:43.423074  543705 net.go:770] primary dev: ETH0
I0322 23:30:43.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:30:43.423114  543705 net.go:698] Add success.
I0322 23:30:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:30:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:30:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:30:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:30:53.409769  543705 memory.go:184] no items to output this cycle
I0322 23:30:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 23:31:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:31:03.409782  543705 memory.go:184] no items to output this cycle
I0322 23:31:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 23:31:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:31:13.409818  543705 memory.go:191] Add success.
I0322 23:31:13.409827  543705 cpu.go:282] Add success.
W0322 23:31:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:31:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:31:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:31:13.420179  543705 net.go:648] Add success.
I0322 23:31:13.422874  543705 net.go:770] primary dev: ETH0
I0322 23:31:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:31:13.422898  543705 net.go:698] Add success.
I0322 23:31:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:31:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:31:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0322 23:31:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:31:14.456523  543705 disk_worker.go:494] system disk:vda1
I0322 23:31:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:31:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:31:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:31:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:31:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:31:23.409795  543705 memory.go:184] no items to output this cycle
I0322 23:31:23.409806  543705 cpu.go:275] no items to output this cycle
E0322 23:31:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:31:33.409780  543705 memory.go:184] no items to output this cycle
I0322 23:31:33.409797  543705 cpu.go:275] no items to output this cycle
I0322 23:31:37.057679  543705 disk_info.go:125] begin check local disk info of client
I0322 23:31:37.060095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:31:37.060103  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508440 0xc000508480]
E0322 23:31:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:31:43.410614  543705 memory.go:191] Add success.
I0322 23:31:43.409825  543705 cpu.go:282] Add success.
I0322 23:31:43.420328  543705 net.go:648] Add success.
I0322 23:31:43.422968  543705 net.go:770] primary dev: ETH0
I0322 23:31:43.422979  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:31:43.422993  543705 net.go:698] Add success.
I0322 23:31:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:31:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:31:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:31:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:31:53.409782  543705 memory.go:184] no items to output this cycle
I0322 23:31:53.409793  543705 cpu.go:275] no items to output this cycle
E0322 23:32:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:32:03.409790  543705 memory.go:184] no items to output this cycle
I0322 23:32:03.409807  543705 cpu.go:275] no items to output this cycle
E0322 23:32:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:32:13.409791  543705 memory.go:191] Add success.
I0322 23:32:13.409810  543705 cpu.go:282] Add success.
W0322 23:32:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:32:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:32:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:32:13.420064  543705 net.go:648] Add success.
I0322 23:32:13.422834  543705 net.go:770] primary dev: ETH0
I0322 23:32:13.422848  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:32:13.422860  543705 net.go:698] Add success.
W0322 23:32:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:32:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0322 23:32:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:32:14.456941  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:32:14.456951  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:32:14.456957  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:32:14.457027  543705 disk_worker.go:494] system disk:vda1
I0322 23:32:14.457059  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:32:15.456794  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:32:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:32:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:32:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:32:16.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:32:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:32:16.472115  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:32:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:32:23.409770  543705 memory.go:184] no items to output this cycle
I0322 23:32:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 23:32:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:32:33.409788  543705 memory.go:184] no items to output this cycle
I0322 23:32:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 23:32:37.061674  543705 disk_info.go:125] begin check local disk info of client
I0322 23:32:37.064198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:32:37.064204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003947c0 0xc000394800]
E0322 23:32:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:32:43.410667  543705 memory.go:191] Add success.
I0322 23:32:43.409815  543705 cpu.go:282] Add success.
I0322 23:32:43.420390  543705 net.go:648] Add success.
I0322 23:32:43.423100  543705 net.go:770] primary dev: ETH0
I0322 23:32:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:32:43.423126  543705 net.go:698] Add success.
I0322 23:32:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:32:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:32:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:32:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:32:53.409780  543705 memory.go:184] no items to output this cycle
I0322 23:32:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 23:33:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:33:03.409804  543705 memory.go:184] no items to output this cycle
I0322 23:33:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 23:33:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:33:13.409778  543705 memory.go:191] Add success.
W0322 23:33:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 23:33:13.409807  543705 cpu.go:282] Add success.
W0322 23:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:33:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:33:13.420110  543705 net.go:648] Add success.
I0322 23:33:13.422653  543705 net.go:770] primary dev: ETH0
I0322 23:33:13.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:33:13.422682  543705 net.go:698] Add success.
I0322 23:33:13.469726  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a390be38-a79f-4655-b1a3-7c116f6b21f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:33:13.469761  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:33:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:33:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:33:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 23:33:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:33:14.456532  543705 disk_worker.go:494] system disk:vda1
I0322 23:33:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:33:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:33:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:33:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:33:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:33:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:33:23.409905  543705 cpu.go:275] no items to output this cycle
I0322 23:33:23.409933  543705 memory.go:184] no items to output this cycle
E0322 23:33:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:33:33.409783  543705 memory.go:184] no items to output this cycle
I0322 23:33:33.409795  543705 cpu.go:275] no items to output this cycle
I0322 23:33:37.065684  543705 disk_info.go:125] begin check local disk info of client
I0322 23:33:37.068133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:33:37.068140  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0322 23:33:39.905737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:33:39.905744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:33:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:33:43.410633  543705 memory.go:191] Add success.
I0322 23:33:43.409812  543705 cpu.go:282] Add success.
I0322 23:33:43.420363  543705 net.go:648] Add success.
I0322 23:33:43.422628  543705 net.go:770] primary dev: ETH0
I0322 23:33:43.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:33:43.422655  543705 net.go:698] Add success.
I0322 23:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:33:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:33:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:33:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:33:53.409795  543705 memory.go:184] no items to output this cycle
I0322 23:33:53.409808  543705 cpu.go:275] no items to output this cycle
E0322 23:34:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:34:03.409780  543705 memory.go:184] no items to output this cycle
I0322 23:34:03.409803  543705 cpu.go:275] no items to output this cycle
E0322 23:34:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:34:13.409790  543705 memory.go:191] Add success.
I0322 23:34:13.409791  543705 cpu.go:282] Add success.
W0322 23:34:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:34:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:34:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:34:13.420117  543705 net.go:648] Add success.
I0322 23:34:13.422807  543705 net.go:770] primary dev: ETH0
I0322 23:34:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:34:13.422833  543705 net.go:698] Add success.
I0322 23:34:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:34:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:34:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0322 23:34:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:34:14.456498  543705 disk_worker.go:494] system disk:vda1
I0322 23:34:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:34:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:34:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:34:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:34:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:34:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:34:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:34:23.409796  543705 memory.go:184] no items to output this cycle
I0322 23:34:23.409808  543705 cpu.go:275] no items to output this cycle
E0322 23:34:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:34:33.409778  543705 memory.go:184] no items to output this cycle
I0322 23:34:33.409813  543705 cpu.go:275] no items to output this cycle
I0322 23:34:37.069678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:34:37.072195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:34:37.072202  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0322 23:34:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:34:43.410711  543705 memory.go:191] Add success.
I0322 23:34:43.409827  543705 cpu.go:282] Add success.
I0322 23:34:43.420424  543705 net.go:648] Add success.
I0322 23:34:43.422899  543705 net.go:770] primary dev: ETH0
I0322 23:34:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:34:43.422924  543705 net.go:698] Add success.
I0322 23:34:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:34:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:34:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:34:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:34:53.409785  543705 memory.go:184] no items to output this cycle
I0322 23:34:53.409787  543705 cpu.go:275] no items to output this cycle
E0322 23:35:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:35:03.409780  543705 memory.go:184] no items to output this cycle
I0322 23:35:03.409786  543705 cpu.go:275] no items to output this cycle
E0322 23:35:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:35:13.409787  543705 memory.go:191] Add success.
I0322 23:35:13.409811  543705 cpu.go:282] Add success.
W0322 23:35:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:35:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:35:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:35:13.420138  543705 net.go:648] Add success.
I0322 23:35:13.422770  543705 net.go:770] primary dev: ETH0
I0322 23:35:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:35:13.422798  543705 net.go:698] Add success.
I0322 23:35:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:35:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:35:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0322 23:35:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:35:14.456558  543705 disk_worker.go:494] system disk:vda1
I0322 23:35:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:35:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:35:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:35:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:35:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:35:23.409778  543705 memory.go:184] no items to output this cycle
I0322 23:35:23.409779  543705 cpu.go:275] no items to output this cycle
E0322 23:35:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:35:33.409887  543705 memory.go:184] no items to output this cycle
I0322 23:35:33.409919  543705 cpu.go:275] no items to output this cycle
I0322 23:35:37.073677  543705 disk_info.go:125] begin check local disk info of client
I0322 23:35:37.076147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:35:37.076154  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508000 0xc000508040]
E0322 23:35:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:35:43.410676  543705 memory.go:191] Add success.
I0322 23:35:43.409816  543705 cpu.go:282] Add success.
I0322 23:35:43.420370  543705 net.go:648] Add success.
I0322 23:35:43.423110  543705 net.go:770] primary dev: ETH0
I0322 23:35:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:35:43.423141  543705 net.go:698] Add success.
I0322 23:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:35:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:35:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:35:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:35:53.409767  543705 memory.go:184] no items to output this cycle
I0322 23:35:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:36:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:36:03.409774  543705 memory.go:184] no items to output this cycle
I0322 23:36:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 23:36:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:36:13.409796  543705 memory.go:191] Add success.
I0322 23:36:13.409798  543705 cpu.go:282] Add success.
W0322 23:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:36:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:36:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:36:13.420045  543705 net.go:648] Add success.
I0322 23:36:13.422615  543705 net.go:770] primary dev: ETH0
I0322 23:36:13.422628  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:36:13.422643  543705 net.go:698] Add success.
I0322 23:36:13.463898  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7f0b19c3-1c33-462a-afe7-21de324aceb8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:36:13.463932  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:36:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:36:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:36:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0322 23:36:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:36:14.456607  543705 disk_worker.go:494] system disk:vda1
I0322 23:36:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:36:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:36:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:36:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:36:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:36:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:36:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:36:23.409798  543705 memory.go:184] no items to output this cycle
I0322 23:36:23.409809  543705 cpu.go:275] no items to output this cycle
E0322 23:36:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:36:33.409784  543705 memory.go:184] no items to output this cycle
I0322 23:36:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 23:36:37.077678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:36:37.080234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:36:37.080241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469500 0xc000469540]
I0322 23:36:39.908243  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:36:39.908250  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:36:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:36:43.410733  543705 memory.go:191] Add success.
I0322 23:36:43.409823  543705 cpu.go:282] Add success.
I0322 23:36:43.420444  543705 net.go:648] Add success.
I0322 23:36:43.423028  543705 net.go:770] primary dev: ETH0
I0322 23:36:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:36:43.423058  543705 net.go:698] Add success.
I0322 23:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:36:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:36:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:36:53.409776  543705 memory.go:184] no items to output this cycle
I0322 23:36:53.409791  543705 cpu.go:275] no items to output this cycle
E0322 23:37:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:37:03.409779  543705 memory.go:184] no items to output this cycle
I0322 23:37:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:37:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:37:13.409814  543705 memory.go:191] Add success.
I0322 23:37:13.409815  543705 cpu.go:282] Add success.
W0322 23:37:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:37:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:37:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:37:13.420140  543705 net.go:648] Add success.
I0322 23:37:13.422737  543705 net.go:770] primary dev: ETH0
I0322 23:37:13.422750  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:37:13.422763  543705 net.go:698] Add success.
I0322 23:37:13.453297  543705 event_worker.go:152] Polling the log file for events...
W0322 23:37:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:37:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0322 23:37:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:37:14.455860  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:37:14.455867  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:37:14.455872  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:37:14.456813  543705 disk_worker.go:494] system disk:vda1
I0322 23:37:14.456847  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:37:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:37:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:37:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:37:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:37:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:37:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:37:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:37:23.410217  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:37:23.410240  543705 memory.go:184] no items to output this cycle
I0322 23:37:23.410253  543705 cpu.go:275] no items to output this cycle
E0322 23:37:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:37:33.409777  543705 memory.go:184] no items to output this cycle
I0322 23:37:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 23:37:37.081683  543705 disk_info.go:125] begin check local disk info of client
I0322 23:37:37.084146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:37:37.084153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e8780 0xc0004e87c0]
E0322 23:37:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:37:43.410738  543705 memory.go:191] Add success.
I0322 23:37:43.409800  543705 cpu.go:282] Add success.
I0322 23:37:43.420452  543705 net.go:648] Add success.
I0322 23:37:43.423742  543705 net.go:770] primary dev: ETH0
I0322 23:37:43.423756  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:37:43.423771  543705 net.go:698] Add success.
I0322 23:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:37:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:37:53.409804  543705 memory.go:184] no items to output this cycle
I0322 23:37:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 23:38:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:38:03.409792  543705 memory.go:184] no items to output this cycle
I0322 23:38:03.409815  543705 cpu.go:275] no items to output this cycle
E0322 23:38:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:38:13.409819  543705 memory.go:191] Add success.
I0322 23:38:13.409828  543705 cpu.go:282] Add success.
W0322 23:38:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:38:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:38:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:38:13.420196  543705 net.go:648] Add success.
I0322 23:38:13.422736  543705 net.go:770] primary dev: ETH0
I0322 23:38:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:38:13.422764  543705 net.go:698] Add success.
I0322 23:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:38:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:38:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0322 23:38:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:38:14.456576  543705 disk_worker.go:494] system disk:vda1
I0322 23:38:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:38:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:38:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:38:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:38:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:38:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:38:23.409788  543705 memory.go:184] no items to output this cycle
I0322 23:38:23.409790  543705 cpu.go:275] no items to output this cycle
E0322 23:38:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:38:33.409812  543705 memory.go:184] no items to output this cycle
I0322 23:38:33.409824  543705 cpu.go:275] no items to output this cycle
I0322 23:38:37.085665  543705 disk_info.go:125] begin check local disk info of client
I0322 23:38:37.088220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:38:37.088227  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025ddc0 0xc00025de00]
E0322 23:38:43.409919  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:38:43.410634  543705 memory.go:191] Add success.
I0322 23:38:43.409939  543705 cpu.go:282] Add success.
I0322 23:38:43.419723  543705 net.go:648] Add success.
I0322 23:38:43.422213  543705 net.go:770] primary dev: ETH0
I0322 23:38:43.422227  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:38:43.422239  543705 net.go:698] Add success.
I0322 23:38:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:38:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:38:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:38:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:38:53.409806  543705 memory.go:184] no items to output this cycle
I0322 23:38:53.409817  543705 cpu.go:275] no items to output this cycle
E0322 23:39:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:39:03.409787  543705 memory.go:184] no items to output this cycle
I0322 23:39:03.409788  543705 cpu.go:275] no items to output this cycle
E0322 23:39:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:39:13.409820  543705 memory.go:191] Add success.
I0322 23:39:13.409834  543705 cpu.go:282] Add success.
W0322 23:39:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:39:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:39:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:39:13.420111  543705 net.go:648] Add success.
I0322 23:39:13.422670  543705 net.go:770] primary dev: ETH0
I0322 23:39:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:39:13.422699  543705 net.go:698] Add success.
I0322 23:39:13.468277  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"48db2d97-32ff-4411-b15e-6290d5bdff9e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:39:13.468311  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:39:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:39:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:39:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0322 23:39:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:39:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 23:39:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:39:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:39:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:39:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:39:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:39:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:39:23.409777  543705 memory.go:184] no items to output this cycle
I0322 23:39:23.409852  543705 cpu.go:275] no items to output this cycle
E0322 23:39:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:39:33.409781  543705 memory.go:184] no items to output this cycle
I0322 23:39:33.409784  543705 cpu.go:275] no items to output this cycle
I0322 23:39:37.089674  543705 disk_info.go:125] begin check local disk info of client
I0322 23:39:37.092133  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:39:37.092139  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509e40 0xc000509e80]
I0322 23:39:39.909733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:39:39.909739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0322 23:39:43.409919  543705 cpu.go:282] Add success.
E0322 23:39:43.410082  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:39:43.410831  543705 memory.go:191] Add success.
I0322 23:39:43.419731  543705 net.go:648] Add success.
I0322 23:39:43.422359  543705 net.go:770] primary dev: ETH0
I0322 23:39:43.422373  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:39:43.422387  543705 net.go:698] Add success.
I0322 23:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:39:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:39:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:39:53.409781  543705 memory.go:184] no items to output this cycle
I0322 23:39:53.409784  543705 cpu.go:275] no items to output this cycle
E0322 23:40:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:40:03.409786  543705 memory.go:184] no items to output this cycle
I0322 23:40:03.409792  543705 cpu.go:275] no items to output this cycle
E0322 23:40:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:40:13.409785  543705 memory.go:191] Add success.
I0322 23:40:13.409785  543705 cpu.go:282] Add success.
W0322 23:40:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:40:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:40:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:40:13.420201  543705 net.go:648] Add success.
I0322 23:40:13.423101  543705 net.go:770] primary dev: ETH0
I0322 23:40:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:40:13.423131  543705 net.go:698] Add success.
I0322 23:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:40:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:40:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 23:40:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:40:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 23:40:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:40:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:40:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:40:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:40:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:40:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:40:23.409782  543705 memory.go:184] no items to output this cycle
I0322 23:40:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 23:40:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:40:33.409779  543705 memory.go:184] no items to output this cycle
I0322 23:40:33.409788  543705 cpu.go:275] no items to output this cycle
I0322 23:40:37.093675  543705 disk_info.go:125] begin check local disk info of client
I0322 23:40:37.096214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:40:37.096220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471a80 0xc000471ac0]
E0322 23:40:43.409931  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:40:43.410838  543705 memory.go:191] Add success.
I0322 23:40:43.409931  543705 cpu.go:282] Add success.
I0322 23:40:43.419721  543705 net.go:648] Add success.
I0322 23:40:43.422424  543705 net.go:770] primary dev: ETH0
I0322 23:40:43.422440  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:40:43.422454  543705 net.go:698] Add success.
I0322 23:40:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:40:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:40:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:40:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:40:53.409765  543705 memory.go:184] no items to output this cycle
I0322 23:40:53.409792  543705 cpu.go:275] no items to output this cycle
E0322 23:41:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:41:03.409785  543705 memory.go:184] no items to output this cycle
I0322 23:41:03.409794  543705 cpu.go:275] no items to output this cycle
E0322 23:41:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:41:13.409783  543705 memory.go:191] Add success.
I0322 23:41:13.409807  543705 cpu.go:282] Add success.
W0322 23:41:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:41:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:41:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:41:13.420146  543705 net.go:648] Add success.
I0322 23:41:13.422858  543705 net.go:770] primary dev: ETH0
I0322 23:41:13.422872  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:41:13.422886  543705 net.go:698] Add success.
I0322 23:41:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:41:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:41:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 23:41:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:41:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 23:41:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:41:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:41:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:41:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:41:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:41:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:41:23.409781  543705 memory.go:184] no items to output this cycle
I0322 23:41:23.409788  543705 cpu.go:275] no items to output this cycle
E0322 23:41:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:41:33.409778  543705 memory.go:184] no items to output this cycle
I0322 23:41:33.409782  543705 cpu.go:275] no items to output this cycle
I0322 23:41:37.097680  543705 disk_info.go:125] begin check local disk info of client
I0322 23:41:37.100172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:41:37.100178  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486000 0xc000486040]
E0322 23:41:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:41:43.410547  543705 memory.go:191] Add success.
I0322 23:41:43.409804  543705 cpu.go:282] Add success.
I0322 23:41:43.420259  543705 net.go:648] Add success.
I0322 23:41:43.422655  543705 net.go:770] primary dev: ETH0
I0322 23:41:43.422669  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:41:43.422681  543705 net.go:698] Add success.
I0322 23:41:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:41:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:41:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:41:53.409780  543705 memory.go:184] no items to output this cycle
I0322 23:41:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 23:42:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:42:03.409783  543705 memory.go:184] no items to output this cycle
I0322 23:42:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 23:42:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:42:13.409807  543705 memory.go:191] Add success.
I0322 23:42:13.409815  543705 cpu.go:282] Add success.
W0322 23:42:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:42:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:42:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:42:13.420253  543705 net.go:648] Add success.
I0322 23:42:13.423063  543705 net.go:770] primary dev: ETH0
I0322 23:42:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:42:13.423091  543705 net.go:698] Add success.
I0322 23:42:13.468843  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4bb155ff-fd27-4a07-9cc3-77c563b38748","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:42:13.468877  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 23:42:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:42:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0322 23:42:14.455209  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:42:14.455898  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:42:14.455906  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:42:14.455911  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:42:14.456663  543705 disk_worker.go:494] system disk:vda1
I0322 23:42:14.456708  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:42:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:42:15.456873  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:42:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:42:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:42:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:42:16.458032  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:42:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:42:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:42:23.409786  543705 cpu.go:275] no items to output this cycle
I0322 23:42:23.409786  543705 memory.go:184] no items to output this cycle
E0322 23:42:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:42:33.409769  543705 memory.go:184] no items to output this cycle
I0322 23:42:33.409891  543705 cpu.go:275] no items to output this cycle
I0322 23:42:37.101675  543705 disk_info.go:125] begin check local disk info of client
I0322 23:42:37.104197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:42:37.104204  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0322 23:42:39.912275  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:42:39.912282  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:42:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:42:43.410570  543705 memory.go:191] Add success.
I0322 23:42:43.409831  543705 cpu.go:282] Add success.
I0322 23:42:43.420272  543705 net.go:648] Add success.
I0322 23:42:43.422767  543705 net.go:770] primary dev: ETH0
I0322 23:42:43.422780  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:42:43.422792  543705 net.go:698] Add success.
I0322 23:42:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:42:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:42:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:42:53.410262  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:42:53.410276  543705 memory.go:184] no items to output this cycle
I0322 23:42:53.410276  543705 cpu.go:275] no items to output this cycle
E0322 23:43:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:43:03.409815  543705 memory.go:184] no items to output this cycle
I0322 23:43:03.409825  543705 cpu.go:275] no items to output this cycle
E0322 23:43:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:43:13.409785  543705 memory.go:191] Add success.
I0322 23:43:13.409806  543705 cpu.go:282] Add success.
W0322 23:43:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:43:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:43:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:43:13.420119  543705 net.go:648] Add success.
I0322 23:43:13.422562  543705 net.go:770] primary dev: ETH0
I0322 23:43:13.422575  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:43:13.422587  543705 net.go:698] Add success.
I0322 23:43:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:43:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:43:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0322 23:43:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:43:14.456628  543705 disk_worker.go:494] system disk:vda1
I0322 23:43:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:43:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:43:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:43:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:43:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:43:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:43:23.409785  543705 memory.go:184] no items to output this cycle
I0322 23:43:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 23:43:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:43:33.409783  543705 cpu.go:275] no items to output this cycle
I0322 23:43:33.409790  543705 memory.go:184] no items to output this cycle
I0322 23:43:37.105689  543705 disk_info.go:125] begin check local disk info of client
I0322 23:43:37.108103  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:43:37.108110  543705 disk_info.go:196] parse disk info done, disk is : [0xc000320000 0xc000320040]
E0322 23:43:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:43:43.410707  543705 memory.go:191] Add success.
I0322 23:43:43.409827  543705 cpu.go:282] Add success.
I0322 23:43:43.420397  543705 net.go:648] Add success.
I0322 23:43:43.423002  543705 net.go:770] primary dev: ETH0
I0322 23:43:43.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:43:43.423027  543705 net.go:698] Add success.
I0322 23:43:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:43:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:43:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:43:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:43:53.409797  543705 memory.go:184] no items to output this cycle
I0322 23:43:53.409810  543705 cpu.go:275] no items to output this cycle
E0322 23:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:44:03.409780  543705 memory.go:184] no items to output this cycle
I0322 23:44:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:44:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:44:13.409817  543705 memory.go:191] Add success.
I0322 23:44:13.409824  543705 cpu.go:282] Add success.
W0322 23:44:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:44:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:44:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:44:13.420054  543705 net.go:648] Add success.
I0322 23:44:13.422895  543705 net.go:770] primary dev: ETH0
I0322 23:44:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:44:13.422920  543705 net.go:698] Add success.
I0322 23:44:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:44:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:44:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0322 23:44:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:44:14.456489  543705 disk_worker.go:494] system disk:vda1
I0322 23:44:14.456532  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:44:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:44:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:44:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:44:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:44:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:44:23.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:44:23.409828  543705 memory.go:184] no items to output this cycle
I0322 23:44:23.409838  543705 cpu.go:275] no items to output this cycle
E0322 23:44:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:44:33.409778  543705 memory.go:184] no items to output this cycle
I0322 23:44:33.409799  543705 cpu.go:275] no items to output this cycle
I0322 23:44:37.109681  543705 disk_info.go:125] begin check local disk info of client
I0322 23:44:37.112222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:44:37.112229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000501100 0xc000501140]
E0322 23:44:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:44:43.410762  543705 memory.go:191] Add success.
I0322 23:44:43.409824  543705 cpu.go:282] Add success.
I0322 23:44:43.420529  543705 net.go:648] Add success.
I0322 23:44:43.423479  543705 net.go:770] primary dev: ETH0
I0322 23:44:43.423492  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:44:43.423505  543705 net.go:698] Add success.
I0322 23:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:44:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:44:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:44:53.409787  543705 cpu.go:275] no items to output this cycle
I0322 23:44:53.409791  543705 memory.go:184] no items to output this cycle
E0322 23:45:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:45:03.409794  543705 memory.go:184] no items to output this cycle
I0322 23:45:03.409805  543705 cpu.go:275] no items to output this cycle
E0322 23:45:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:45:13.409794  543705 memory.go:191] Add success.
I0322 23:45:13.409813  543705 cpu.go:282] Add success.
W0322 23:45:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:45:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:45:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:45:13.420126  543705 net.go:648] Add success.
I0322 23:45:13.422820  543705 net.go:770] primary dev: ETH0
I0322 23:45:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:45:13.422844  543705 net.go:698] Add success.
I0322 23:45:13.468213  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12a207b2-8cfa-4ea3-87e8-f0f1c15a7842","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:45:13.468247  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:45:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:45:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:45:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 23:45:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:45:14.456591  543705 disk_worker.go:494] system disk:vda1
I0322 23:45:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:45:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:45:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:45:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:45:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:45:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:45:23.409781  543705 memory.go:184] no items to output this cycle
I0322 23:45:23.409802  543705 cpu.go:275] no items to output this cycle
E0322 23:45:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:45:33.409902  543705 memory.go:184] no items to output this cycle
I0322 23:45:33.409926  543705 cpu.go:275] no items to output this cycle
I0322 23:45:37.113682  543705 disk_info.go:125] begin check local disk info of client
I0322 23:45:37.116183  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:45:37.116190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486900 0xc000486940]
I0322 23:45:39.913732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:45:39.913739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:45:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:45:43.410758  543705 memory.go:191] Add success.
I0322 23:45:43.409822  543705 cpu.go:282] Add success.
I0322 23:45:43.420493  543705 net.go:648] Add success.
I0322 23:45:43.423687  543705 net.go:770] primary dev: ETH0
I0322 23:45:43.423701  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:45:43.423714  543705 net.go:698] Add success.
I0322 23:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:45:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:45:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:45:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:45:53.409787  543705 memory.go:184] no items to output this cycle
I0322 23:45:53.409788  543705 cpu.go:275] no items to output this cycle
E0322 23:46:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:46:03.409821  543705 memory.go:184] no items to output this cycle
I0322 23:46:03.409832  543705 cpu.go:275] no items to output this cycle
E0322 23:46:13.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:46:13.409771  543705 memory.go:191] Add success.
W0322 23:46:13.409798  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 23:46:13.409804  543705 cpu.go:282] Add success.
W0322 23:46:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:46:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:46:13.420112  543705 net.go:648] Add success.
I0322 23:46:13.422762  543705 net.go:770] primary dev: ETH0
I0322 23:46:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:46:13.422791  543705 net.go:698] Add success.
I0322 23:46:14.454949  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:46:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:46:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0322 23:46:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:46:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 23:46:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:46:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:46:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:46:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:46:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:46:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:46:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:46:23.409785  543705 memory.go:184] no items to output this cycle
I0322 23:46:23.409787  543705 cpu.go:275] no items to output this cycle
E0322 23:46:33.409868  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:46:33.409879  543705 cpu.go:275] no items to output this cycle
I0322 23:46:33.409887  543705 memory.go:184] no items to output this cycle
I0322 23:46:37.117674  543705 disk_info.go:125] begin check local disk info of client
I0322 23:46:37.120241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:46:37.120248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ef380 0xc0004ef3c0]
E0322 23:46:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:46:43.410716  543705 memory.go:191] Add success.
I0322 23:46:43.409813  543705 cpu.go:282] Add success.
I0322 23:46:43.420502  543705 net.go:648] Add success.
I0322 23:46:43.422982  543705 net.go:770] primary dev: ETH0
I0322 23:46:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:46:43.423008  543705 net.go:698] Add success.
I0322 23:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:46:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:46:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:46:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:46:53.409782  543705 cpu.go:275] no items to output this cycle
I0322 23:46:53.409784  543705 memory.go:184] no items to output this cycle
E0322 23:47:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:47:03.409775  543705 memory.go:184] no items to output this cycle
I0322 23:47:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:47:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:47:13.409811  543705 memory.go:191] Add success.
I0322 23:47:13.409817  543705 cpu.go:282] Add success.
W0322 23:47:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:47:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:47:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:47:13.420132  543705 net.go:648] Add success.
I0322 23:47:13.422799  543705 net.go:770] primary dev: ETH0
I0322 23:47:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:47:13.422825  543705 net.go:698] Add success.
I0322 23:47:13.453355  543705 event_worker.go:152] Polling the log file for events...
W0322 23:47:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:47:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0322 23:47:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:47:14.455914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:47:14.455923  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:47:14.455929  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:47:14.456544  543705 disk_worker.go:494] system disk:vda1
I0322 23:47:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:47:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:47:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:47:16.458059  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:47:16.458068  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:47:16.458115  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:47:16.458134  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:47:16.472552  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:47:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:47:23.409772  543705 memory.go:184] no items to output this cycle
I0322 23:47:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:47:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:47:33.409801  543705 memory.go:184] no items to output this cycle
I0322 23:47:33.409816  543705 cpu.go:275] no items to output this cycle
I0322 23:47:37.121678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:47:37.124186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:47:37.124194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6000 0xc0002b6040]
E0322 23:47:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:47:43.410647  543705 memory.go:191] Add success.
I0322 23:47:43.409808  543705 cpu.go:282] Add success.
I0322 23:47:43.420347  543705 net.go:648] Add success.
I0322 23:47:43.422910  543705 net.go:770] primary dev: ETH0
I0322 23:47:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:47:43.422936  543705 net.go:698] Add success.
I0322 23:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:47:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:47:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:47:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:47:53.409783  543705 memory.go:184] no items to output this cycle
I0322 23:47:53.409783  543705 cpu.go:275] no items to output this cycle
E0322 23:48:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:48:03.409804  543705 memory.go:184] no items to output this cycle
I0322 23:48:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 23:48:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:48:13.409779  543705 memory.go:191] Add success.
W0322 23:48:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 23:48:13.409810  543705 cpu.go:282] Add success.
W0322 23:48:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:48:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:48:13.420078  543705 net.go:648] Add success.
I0322 23:48:13.422676  543705 net.go:770] primary dev: ETH0
I0322 23:48:13.422690  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:48:13.422701  543705 net.go:698] Add success.
I0322 23:48:13.468803  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3bdc992b-432d-416e-97b8-61f39cb04fdd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:48:13.468847  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:48:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:48:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0322 23:48:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:48:14.456673  543705 disk_worker.go:494] system disk:vda1
I0322 23:48:14.456708  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:48:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:48:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:48:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:48:16.472429  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:48:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:48:23.409802  543705 memory.go:184] no items to output this cycle
I0322 23:48:23.409812  543705 cpu.go:275] no items to output this cycle
E0322 23:48:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:48:33.409780  543705 memory.go:184] no items to output this cycle
I0322 23:48:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 23:48:37.125680  543705 disk_info.go:125] begin check local disk info of client
I0322 23:48:37.128241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:48:37.128248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005316c0 0xc000531700]
I0322 23:48:39.916294  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:48:39.916301  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:48:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:48:43.410774  543705 memory.go:191] Add success.
I0322 23:48:43.409833  543705 cpu.go:282] Add success.
I0322 23:48:43.420486  543705 net.go:648] Add success.
I0322 23:48:43.423467  543705 net.go:770] primary dev: ETH0
I0322 23:48:43.423486  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:48:43.423504  543705 net.go:698] Add success.
I0322 23:48:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:48:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:48:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:48:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:48:53.409770  543705 memory.go:184] no items to output this cycle
I0322 23:48:53.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:49:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:49:03.409789  543705 memory.go:184] no items to output this cycle
I0322 23:49:03.409795  543705 cpu.go:275] no items to output this cycle
E0322 23:49:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:49:13.409786  543705 memory.go:191] Add success.
W0322 23:49:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0322 23:49:13.409819  543705 cpu.go:282] Add success.
W0322 23:49:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:49:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:49:13.420151  543705 net.go:648] Add success.
I0322 23:49:13.422850  543705 net.go:770] primary dev: ETH0
I0322 23:49:13.422865  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:49:13.422879  543705 net.go:698] Add success.
I0322 23:49:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:49:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:49:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0322 23:49:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:49:14.456618  543705 disk_worker.go:494] system disk:vda1
I0322 23:49:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:49:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:49:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:49:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:49:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:49:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:49:23.409802  543705 memory.go:184] no items to output this cycle
I0322 23:49:23.409816  543705 cpu.go:275] no items to output this cycle
E0322 23:49:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:49:33.409775  543705 memory.go:184] no items to output this cycle
I0322 23:49:33.409793  543705 cpu.go:275] no items to output this cycle
I0322 23:49:37.129669  543705 disk_info.go:125] begin check local disk info of client
I0322 23:49:37.132124  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:49:37.132130  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5400 0xc0000c5440]
E0322 23:49:43.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:49:43.410952  543705 memory.go:191] Add success.
I0322 23:49:43.410056  543705 cpu.go:282] Add success.
I0322 23:49:43.419724  543705 net.go:648] Add success.
I0322 23:49:43.422575  543705 net.go:770] primary dev: ETH0
I0322 23:49:43.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:49:43.422615  543705 net.go:698] Add success.
I0322 23:49:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:49:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:49:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:49:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:49:53.409812  543705 memory.go:184] no items to output this cycle
I0322 23:49:53.409821  543705 cpu.go:275] no items to output this cycle
E0322 23:50:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:50:03.409778  543705 memory.go:184] no items to output this cycle
I0322 23:50:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 23:50:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:50:13.409804  543705 memory.go:191] Add success.
I0322 23:50:13.409811  543705 cpu.go:282] Add success.
W0322 23:50:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:50:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:50:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:50:13.420364  543705 net.go:648] Add success.
I0322 23:50:13.423009  543705 net.go:770] primary dev: ETH0
I0322 23:50:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:50:13.423035  543705 net.go:698] Add success.
I0322 23:50:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:50:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:50:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0322 23:50:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:50:14.456582  543705 disk_worker.go:494] system disk:vda1
I0322 23:50:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:50:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:50:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:50:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:50:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:50:23.409795  543705 memory.go:184] no items to output this cycle
I0322 23:50:23.409795  543705 cpu.go:275] no items to output this cycle
E0322 23:50:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:50:33.409808  543705 memory.go:184] no items to output this cycle
I0322 23:50:33.409819  543705 cpu.go:275] no items to output this cycle
I0322 23:50:37.133671  543705 disk_info.go:125] begin check local disk info of client
I0322 23:50:37.136513  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:50:37.136520  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2000 0xc0003e2040]
E0322 23:50:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:50:43.410599  543705 memory.go:191] Add success.
I0322 23:50:43.409828  543705 cpu.go:282] Add success.
I0322 23:50:43.420127  543705 net.go:770] primary dev: ETH0
I0322 23:50:43.420140  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:50:43.420153  543705 net.go:698] Add success.
I0322 23:50:43.420387  543705 net.go:648] Add success.
I0322 23:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:50:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:50:53.409806  543705 memory.go:184] no items to output this cycle
I0322 23:50:53.409816  543705 cpu.go:275] no items to output this cycle
E0322 23:51:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:51:03.409795  543705 memory.go:184] no items to output this cycle
I0322 23:51:03.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:51:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:51:13.409802  543705 memory.go:191] Add success.
I0322 23:51:13.409805  543705 cpu.go:282] Add success.
W0322 23:51:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:51:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:51:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:51:13.419933  543705 net.go:770] primary dev: ETH0
I0322 23:51:13.419948  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:51:13.419964  543705 net.go:698] Add success.
I0322 23:51:13.420341  543705 net.go:648] Add success.
I0322 23:51:13.468944  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5150213f-89c3-4344-b204-1239acb242ce","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:51:13.468979  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:51:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:51:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0322 23:51:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:51:14.456727  543705 disk_worker.go:494] system disk:vda1
I0322 23:51:14.456758  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:51:15.455606  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:51:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:51:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:51:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:51:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:51:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:51:23.409807  543705 memory.go:184] no items to output this cycle
I0322 23:51:23.409821  543705 cpu.go:275] no items to output this cycle
I0322 23:51:33.409886  543705 cpu.go:275] no items to output this cycle
E0322 23:51:33.409963  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:51:33.409978  543705 memory.go:184] no items to output this cycle
I0322 23:51:37.137678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:51:37.140091  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:51:37.140098  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046a180 0xc00046a1c0]
I0322 23:51:39.917730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:51:39.917737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:51:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:51:43.410608  543705 memory.go:191] Add success.
I0322 23:51:43.409805  543705 cpu.go:282] Add success.
I0322 23:51:43.420300  543705 net.go:648] Add success.
I0322 23:51:43.422801  543705 net.go:770] primary dev: ETH0
I0322 23:51:43.422823  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:51:43.422836  543705 net.go:698] Add success.
I0322 23:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:51:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:51:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:51:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:51:53.409780  543705 memory.go:184] no items to output this cycle
I0322 23:51:53.409782  543705 cpu.go:275] no items to output this cycle
E0322 23:52:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:52:03.409809  543705 memory.go:184] no items to output this cycle
I0322 23:52:03.409817  543705 cpu.go:275] no items to output this cycle
E0322 23:52:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:52:13.409797  543705 memory.go:191] Add success.
I0322 23:52:13.409798  543705 cpu.go:282] Add success.
W0322 23:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:52:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:52:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:52:13.420280  543705 net.go:648] Add success.
I0322 23:52:13.422962  543705 net.go:770] primary dev: ETH0
I0322 23:52:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:52:13.422987  543705 net.go:698] Add success.
W0322 23:52:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:52:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0322 23:52:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:52:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:52:14.456928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:52:14.456935  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:52:14.457001  543705 disk_worker.go:494] system disk:vda1
I0322 23:52:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:52:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:52:15.456805  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:52:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:52:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:52:16.457992  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:52:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:52:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:52:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:52:23.409770  543705 memory.go:184] no items to output this cycle
I0322 23:52:23.409800  543705 cpu.go:275] no items to output this cycle
E0322 23:52:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:52:33.409786  543705 memory.go:184] no items to output this cycle
I0322 23:52:33.409791  543705 cpu.go:275] no items to output this cycle
I0322 23:52:37.141676  543705 disk_info.go:125] begin check local disk info of client
I0322 23:52:37.144198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:52:37.144205  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0322 23:52:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:52:43.410663  543705 memory.go:191] Add success.
I0322 23:52:43.409808  543705 cpu.go:282] Add success.
I0322 23:52:43.420379  543705 net.go:648] Add success.
I0322 23:52:43.423089  543705 net.go:770] primary dev: ETH0
I0322 23:52:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:52:43.423116  543705 net.go:698] Add success.
I0322 23:52:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:52:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:52:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:52:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:52:53.409768  543705 memory.go:184] no items to output this cycle
I0322 23:52:53.409780  543705 cpu.go:275] no items to output this cycle
E0322 23:53:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:53:03.409781  543705 memory.go:184] no items to output this cycle
I0322 23:53:03.409797  543705 cpu.go:275] no items to output this cycle
E0322 23:53:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:53:13.409825  543705 memory.go:191] Add success.
I0322 23:53:13.409831  543705 cpu.go:282] Add success.
W0322 23:53:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:53:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:53:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:53:13.420152  543705 net.go:648] Add success.
I0322 23:53:13.422893  543705 net.go:770] primary dev: ETH0
I0322 23:53:13.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:53:13.422917  543705 net.go:698] Add success.
I0322 23:53:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:53:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:53:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0322 23:53:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:53:14.456529  543705 disk_worker.go:494] system disk:vda1
I0322 23:53:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:53:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:53:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:53:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:53:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:53:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:53:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:53:23.409771  543705 memory.go:184] no items to output this cycle
I0322 23:53:23.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:53:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:53:33.409784  543705 memory.go:184] no items to output this cycle
I0322 23:53:33.409787  543705 cpu.go:275] no items to output this cycle
I0322 23:53:37.145677  543705 disk_info.go:125] begin check local disk info of client
I0322 23:53:37.148121  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:53:37.148128  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0322 23:53:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:53:43.410773  543705 memory.go:191] Add success.
I0322 23:53:43.409820  543705 cpu.go:282] Add success.
I0322 23:53:43.420487  543705 net.go:648] Add success.
I0322 23:53:43.423035  543705 net.go:770] primary dev: ETH0
I0322 23:53:43.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:53:43.423060  543705 net.go:698] Add success.
I0322 23:53:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:53:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:53:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:53:53.410416  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:53:53.410433  543705 memory.go:184] no items to output this cycle
I0322 23:53:53.410465  543705 cpu.go:275] no items to output this cycle
E0322 23:54:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:54:03.409807  543705 memory.go:184] no items to output this cycle
I0322 23:54:03.409819  543705 cpu.go:275] no items to output this cycle
E0322 23:54:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:54:13.409808  543705 cpu.go:282] Add success.
I0322 23:54:13.409816  543705 memory.go:191] Add success.
W0322 23:54:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:54:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:54:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:54:13.420200  543705 net.go:648] Add success.
I0322 23:54:13.422796  543705 net.go:770] primary dev: ETH0
I0322 23:54:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:54:13.422821  543705 net.go:698] Add success.
I0322 23:54:13.469805  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1ff00239-233f-4d6d-90d1-f6219af88023","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:54:13.469841  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0322 23:54:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:54:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:54:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 23:54:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:54:14.456687  543705 disk_worker.go:494] system disk:vda1
I0322 23:54:14.456717  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:54:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:54:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:54:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:54:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:54:23.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:54:23.409883  543705 memory.go:184] no items to output this cycle
I0322 23:54:23.409953  543705 cpu.go:275] no items to output this cycle
E0322 23:54:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:54:33.409779  543705 memory.go:184] no items to output this cycle
I0322 23:54:33.409789  543705 cpu.go:275] no items to output this cycle
I0322 23:54:37.149675  543705 disk_info.go:125] begin check local disk info of client
I0322 23:54:37.152226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:54:37.152232  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6000 0xc0003e6040]
I0322 23:54:39.920318  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:54:39.920327  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:54:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:54:43.410750  543705 memory.go:191] Add success.
I0322 23:54:43.409794  543705 cpu.go:282] Add success.
I0322 23:54:43.420445  543705 net.go:648] Add success.
I0322 23:54:43.423186  543705 net.go:770] primary dev: ETH0
I0322 23:54:43.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:54:43.423217  543705 net.go:698] Add success.
I0322 23:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:54:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:54:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:54:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:54:53.409773  543705 memory.go:184] no items to output this cycle
I0322 23:54:53.409802  543705 cpu.go:275] no items to output this cycle
E0322 23:55:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:55:03.409787  543705 memory.go:184] no items to output this cycle
I0322 23:55:03.409789  543705 cpu.go:275] no items to output this cycle
E0322 23:55:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:55:13.409791  543705 memory.go:191] Add success.
I0322 23:55:13.409792  543705 cpu.go:282] Add success.
W0322 23:55:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:55:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:55:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:55:13.420146  543705 net.go:648] Add success.
I0322 23:55:13.422878  543705 net.go:770] primary dev: ETH0
I0322 23:55:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:55:13.422907  543705 net.go:698] Add success.
I0322 23:55:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:55:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:55:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0322 23:55:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:55:14.456604  543705 disk_worker.go:494] system disk:vda1
I0322 23:55:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:55:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:55:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:55:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:55:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:55:23.409883  543705 memory.go:184] no items to output this cycle
I0322 23:55:23.409883  543705 cpu.go:275] no items to output this cycle
E0322 23:55:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:55:33.409782  543705 memory.go:184] no items to output this cycle
I0322 23:55:33.409786  543705 cpu.go:275] no items to output this cycle
I0322 23:55:37.153684  543705 disk_info.go:125] begin check local disk info of client
I0322 23:55:37.156090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:55:37.156097  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391b00 0xc000391b40]
E0322 23:55:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:55:43.410610  543705 memory.go:191] Add success.
I0322 23:55:43.409809  543705 cpu.go:282] Add success.
I0322 23:55:43.420319  543705 net.go:648] Add success.
I0322 23:55:43.422808  543705 net.go:770] primary dev: ETH0
I0322 23:55:43.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:55:43.422834  543705 net.go:698] Add success.
I0322 23:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:55:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:55:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:55:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:55:53.409802  543705 memory.go:184] no items to output this cycle
I0322 23:55:53.409813  543705 cpu.go:275] no items to output this cycle
E0322 23:56:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:56:03.409775  543705 memory.go:184] no items to output this cycle
I0322 23:56:03.409799  543705 cpu.go:275] no items to output this cycle
E0322 23:56:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:56:13.409798  543705 cpu.go:282] Add success.
I0322 23:56:13.409806  543705 memory.go:191] Add success.
W0322 23:56:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:56:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:56:13.420124  543705 net.go:648] Add success.
I0322 23:56:13.423084  543705 net.go:770] primary dev: ETH0
I0322 23:56:13.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:56:13.423108  543705 net.go:698] Add success.
I0322 23:56:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:56:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:56:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0322 23:56:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:56:14.456593  543705 disk_worker.go:494] system disk:vda1
I0322 23:56:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:56:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:56:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:56:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:56:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:56:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:56:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:56:23.409776  543705 memory.go:184] no items to output this cycle
I0322 23:56:23.409781  543705 cpu.go:275] no items to output this cycle
E0322 23:56:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:56:33.409804  543705 memory.go:184] no items to output this cycle
I0322 23:56:33.409818  543705 cpu.go:275] no items to output this cycle
I0322 23:56:37.157682  543705 disk_info.go:125] begin check local disk info of client
I0322 23:56:37.160192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:56:37.160198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0322 23:56:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:56:43.410644  543705 memory.go:191] Add success.
I0322 23:56:43.409829  543705 cpu.go:282] Add success.
I0322 23:56:43.420330  543705 net.go:648] Add success.
I0322 23:56:43.423145  543705 net.go:770] primary dev: ETH0
I0322 23:56:43.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:56:43.423171  543705 net.go:698] Add success.
I0322 23:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:56:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:56:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:56:53.409797  543705 memory.go:184] no items to output this cycle
I0322 23:56:53.409805  543705 cpu.go:275] no items to output this cycle
E0322 23:57:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:57:03.409786  543705 memory.go:184] no items to output this cycle
I0322 23:57:03.409801  543705 cpu.go:275] no items to output this cycle
E0322 23:57:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:57:13.409789  543705 memory.go:191] Add success.
I0322 23:57:13.409812  543705 cpu.go:282] Add success.
W0322 23:57:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:57:13.420300  543705 net.go:648] Add success.
I0322 23:57:13.422961  543705 net.go:770] primary dev: ETH0
I0322 23:57:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:57:13.422985  543705 net.go:698] Add success.
I0322 23:57:13.429092  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0322 23:57:13.453333  543705 event_worker.go:152] Polling the log file for events...
I0322 23:57:13.468056  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"348959a3-89e7-42cb-9cc1-93bc1837469b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0322 23:57:13.468089  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0322 23:57:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:57:14.455253  543705 disk_worker.go:708] disk space is not compliant
W0322 23:57:14.455257  543705 disk_worker.go:728] disk inode is not compliant
E0322 23:57:14.455926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0322 23:57:14.455935  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0322 23:57:14.455940  543705 custom_config.go:64] query custom config with name: gpu
I0322 23:57:14.456810  543705 disk_worker.go:494] system disk:vda1
I0322 23:57:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
E0322 23:57:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0322 23:57:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:57:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0322 23:57:16.457938  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0322 23:57:16.457979  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:57:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:57:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:57:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:57:23.409763  543705 memory.go:184] no items to output this cycle
I0322 23:57:23.409796  543705 cpu.go:275] no items to output this cycle
E0322 23:57:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:57:33.409768  543705 memory.go:184] no items to output this cycle
I0322 23:57:33.409808  543705 cpu.go:275] no items to output this cycle
I0322 23:57:37.161678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:57:37.164113  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:57:37.164120  543705 disk_info.go:196] parse disk info done, disk is : [0xc000274540 0xc0002745c0]
I0322 23:57:39.921730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0322 23:57:39.921736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0322 23:57:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:57:43.410638  543705 memory.go:191] Add success.
I0322 23:57:43.409806  543705 cpu.go:282] Add success.
I0322 23:57:43.420339  543705 net.go:648] Add success.
I0322 23:57:43.422722  543705 net.go:770] primary dev: ETH0
I0322 23:57:43.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:57:43.422750  543705 net.go:698] Add success.
I0322 23:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:57:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:57:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:57:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:57:53.409793  543705 memory.go:184] no items to output this cycle
I0322 23:57:53.409807  543705 cpu.go:275] no items to output this cycle
E0322 23:58:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:58:03.409772  543705 memory.go:184] no items to output this cycle
I0322 23:58:03.409812  543705 cpu.go:275] no items to output this cycle
E0322 23:58:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:58:13.409817  543705 memory.go:191] Add success.
I0322 23:58:13.409821  543705 cpu.go:282] Add success.
W0322 23:58:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:58:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:58:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:58:13.420119  543705 net.go:648] Add success.
I0322 23:58:13.422857  543705 net.go:770] primary dev: ETH0
I0322 23:58:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:58:13.422882  543705 net.go:698] Add success.
I0322 23:58:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:58:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:58:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0322 23:58:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:58:14.456594  543705 disk_worker.go:494] system disk:vda1
I0322 23:58:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:58:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:58:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:58:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:58:23.409799  543705 memory.go:184] no items to output this cycle
I0322 23:58:23.409811  543705 cpu.go:275] no items to output this cycle
E0322 23:58:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:58:33.409781  543705 memory.go:184] no items to output this cycle
I0322 23:58:33.409814  543705 cpu.go:275] no items to output this cycle
I0322 23:58:37.165681  543705 disk_info.go:125] begin check local disk info of client
I0322 23:58:37.168180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:58:37.168186  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e6840 0xc0003e6880]
E0322 23:58:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:58:43.410655  543705 memory.go:191] Add success.
I0322 23:58:43.409803  543705 cpu.go:282] Add success.
I0322 23:58:43.420359  543705 net.go:648] Add success.
I0322 23:58:43.423310  543705 net.go:770] primary dev: ETH0
I0322 23:58:43.423325  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:58:43.423340  543705 net.go:698] Add success.
I0322 23:58:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:58:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:58:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:58:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:58:53.409773  543705 memory.go:184] no items to output this cycle
I0322 23:58:53.409803  543705 cpu.go:275] no items to output this cycle
E0322 23:59:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:59:03.409799  543705 memory.go:184] no items to output this cycle
I0322 23:59:03.409806  543705 cpu.go:275] no items to output this cycle
E0322 23:59:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:59:13.409803  543705 memory.go:191] Add success.
I0322 23:59:13.409808  543705 cpu.go:282] Add success.
W0322 23:59:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0322 23:59:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0322 23:59:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0322 23:59:13.420071  543705 net.go:648] Add success.
I0322 23:59:13.422877  543705 net.go:770] primary dev: ETH0
I0322 23:59:13.422891  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:59:13.422911  543705 net.go:698] Add success.
I0322 23:59:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0322 23:59:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0322 23:59:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0322 23:59:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0322 23:59:14.456578  543705 disk_worker.go:494] system disk:vda1
I0322 23:59:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0322 23:59:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0322 23:59:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:59:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0322 23:59:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0322 23:59:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:59:23.409811  543705 memory.go:184] no items to output this cycle
I0322 23:59:23.409824  543705 cpu.go:275] no items to output this cycle
E0322 23:59:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:59:33.409791  543705 memory.go:184] no items to output this cycle
I0322 23:59:33.409800  543705 cpu.go:275] no items to output this cycle
I0322 23:59:37.169678  543705 disk_info.go:125] begin check local disk info of client
I0322 23:59:37.172149  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0322 23:59:37.172155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4480 0xc0003d44c0]
E0322 23:59:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:59:43.410645  543705 memory.go:191] Add success.
I0322 23:59:43.409840  543705 cpu.go:282] Add success.
I0322 23:59:43.420389  543705 net.go:648] Add success.
I0322 23:59:43.423155  543705 net.go:770] primary dev: ETH0
I0322 23:59:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0322 23:59:43.423183  543705 net.go:698] Add success.
I0322 23:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0322 23:59:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0322 23:59:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0322 23:59:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0322 23:59:53.409776  543705 memory.go:184] no items to output this cycle
I0322 23:59:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 00:00:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:00:03.409777  543705 memory.go:184] no items to output this cycle
I0323 00:00:03.409814  543705 cpu.go:275] no items to output this cycle
W0323 00:00:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:00:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:00:13.409727  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 00:00:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:00:13.409829  543705 memory.go:191] Add success.
I0323 00:00:13.409839  543705 cpu.go:282] Add success.
I0323 00:00:13.420063  543705 net.go:648] Add success.
I0323 00:00:13.422729  543705 net.go:770] primary dev: ETH0
I0323 00:00:13.422744  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:00:13.422758  543705 net.go:698] Add success.
I0323 00:00:13.468317  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e8c1d150-3cc1-4f86-8839-e3875b2e918f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:00:13.468354  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:00:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:00:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:00:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 00:00:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:00:14.456603  543705 disk_worker.go:494] system disk:vda1
I0323 00:00:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:00:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:00:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:00:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:00:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:00:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:00:23.409796  543705 memory.go:184] no items to output this cycle
I0323 00:00:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 00:00:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:00:33.409798  543705 memory.go:184] no items to output this cycle
I0323 00:00:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 00:00:37.173674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:00:37.176207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:00:37.176213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4500 0xc0003d4540]
I0323 00:00:39.921911  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:00:39.921919  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:00:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:00:43.410752  543705 memory.go:191] Add success.
I0323 00:00:43.409808  543705 cpu.go:282] Add success.
I0323 00:00:43.420514  543705 net.go:648] Add success.
I0323 00:00:43.423532  543705 net.go:770] primary dev: ETH0
I0323 00:00:43.423545  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:00:43.423559  543705 net.go:698] Add success.
I0323 00:00:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:00:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:00:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:00:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:00:53.409791  543705 memory.go:184] no items to output this cycle
I0323 00:00:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 00:01:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:01:03.409814  543705 memory.go:184] no items to output this cycle
I0323 00:01:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 00:01:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:01:13.409824  543705 memory.go:191] Add success.
I0323 00:01:13.409834  543705 cpu.go:282] Add success.
W0323 00:01:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:01:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:01:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:01:13.420166  543705 net.go:648] Add success.
I0323 00:01:13.422853  543705 net.go:770] primary dev: ETH0
I0323 00:01:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:01:13.422879  543705 net.go:698] Add success.
I0323 00:01:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:01:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:01:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 00:01:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:01:14.456584  543705 disk_worker.go:494] system disk:vda1
I0323 00:01:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:01:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:01:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:01:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:01:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:01:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:01:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:01:23.409795  543705 memory.go:184] no items to output this cycle
I0323 00:01:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 00:01:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:01:33.409788  543705 memory.go:184] no items to output this cycle
I0323 00:01:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 00:01:37.177675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:01:37.180109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:01:37.180116  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270500 0xc000270540]
E0323 00:01:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:01:43.410669  543705 memory.go:191] Add success.
I0323 00:01:43.409801  543705 cpu.go:282] Add success.
I0323 00:01:43.420432  543705 net.go:648] Add success.
I0323 00:01:43.423008  543705 net.go:770] primary dev: ETH0
I0323 00:01:43.423021  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:01:43.423035  543705 net.go:698] Add success.
I0323 00:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:01:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:01:53.410377  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:01:53.410391  543705 memory.go:184] no items to output this cycle
I0323 00:01:53.410393  543705 cpu.go:275] no items to output this cycle
E0323 00:02:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:02:03.409786  543705 memory.go:184] no items to output this cycle
I0323 00:02:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 00:02:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:02:13.409788  543705 memory.go:191] Add success.
I0323 00:02:13.409796  543705 cpu.go:282] Add success.
W0323 00:02:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:02:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:02:13.420065  543705 net.go:648] Add success.
I0323 00:02:13.422744  543705 net.go:770] primary dev: ETH0
I0323 00:02:13.422759  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:02:13.422773  543705 net.go:698] Add success.
W0323 00:02:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:02:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 00:02:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:02:14.456134  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:02:14.456143  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:02:14.456149  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:02:14.456478  543705 disk_worker.go:494] system disk:vda1
I0323 00:02:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:02:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:02:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:02:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:02:16.457918  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:02:16.457972  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:02:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:02:16.472340  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:02:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:02:23.409811  543705 memory.go:184] no items to output this cycle
I0323 00:02:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 00:02:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:02:33.409767  543705 memory.go:184] no items to output this cycle
I0323 00:02:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 00:02:37.181677  543705 disk_info.go:125] begin check local disk info of client
I0323 00:02:37.184273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:02:37.184280  543705 disk_info.go:196] parse disk info done, disk is : [0xc000500740 0xc000500780]
E0323 00:02:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:02:43.410896  543705 memory.go:191] Add success.
I0323 00:02:43.409830  543705 cpu.go:282] Add success.
I0323 00:02:43.420620  543705 net.go:648] Add success.
I0323 00:02:43.423268  543705 net.go:770] primary dev: ETH0
I0323 00:02:43.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:02:43.423298  543705 net.go:698] Add success.
I0323 00:02:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:02:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:02:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:02:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:02:53.409776  543705 memory.go:184] no items to output this cycle
I0323 00:02:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 00:03:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:03:03.409808  543705 memory.go:184] no items to output this cycle
I0323 00:03:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 00:03:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:03:13.409776  543705 memory.go:191] Add success.
W0323 00:03:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 00:03:13.409805  543705 cpu.go:282] Add success.
W0323 00:03:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:03:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:03:13.420352  543705 net.go:648] Add success.
I0323 00:03:13.422932  543705 net.go:770] primary dev: ETH0
I0323 00:03:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:03:13.422956  543705 net.go:698] Add success.
I0323 00:03:13.463741  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"43315470-77e6-43d8-986d-3dd9bb70508f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:03:13.463773  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:03:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:03:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:03:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 00:03:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:03:14.456526  543705 disk_worker.go:494] system disk:vda1
I0323 00:03:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:03:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:03:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:03:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:03:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:03:23.409771  543705 memory.go:184] no items to output this cycle
I0323 00:03:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 00:03:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:03:33.409779  543705 memory.go:184] no items to output this cycle
I0323 00:03:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 00:03:37.185679  543705 disk_info.go:125] begin check local disk info of client
I0323 00:03:37.188095  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:03:37.188101  543705 disk_info.go:196] parse disk info done, disk is : [0xc000322c40 0xc000322c80]
I0323 00:03:39.924305  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:03:39.924311  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:03:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:03:43.410701  543705 memory.go:191] Add success.
I0323 00:03:43.409790  543705 cpu.go:282] Add success.
I0323 00:03:43.420391  543705 net.go:648] Add success.
I0323 00:03:43.423141  543705 net.go:770] primary dev: ETH0
I0323 00:03:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:03:43.423170  543705 net.go:698] Add success.
I0323 00:03:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:03:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:03:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:03:53.409769  543705 memory.go:184] no items to output this cycle
I0323 00:03:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 00:04:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:04:03.409807  543705 memory.go:184] no items to output this cycle
I0323 00:04:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 00:04:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:04:13.409786  543705 memory.go:191] Add success.
I0323 00:04:13.409804  543705 cpu.go:282] Add success.
W0323 00:04:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:04:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:04:13.420126  543705 net.go:648] Add success.
I0323 00:04:13.422756  543705 net.go:770] primary dev: ETH0
I0323 00:04:13.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:04:13.422781  543705 net.go:698] Add success.
I0323 00:04:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:04:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:04:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 00:04:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:04:14.456598  543705 disk_worker.go:494] system disk:vda1
I0323 00:04:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:04:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:04:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:04:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:04:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:04:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:04:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:04:23.409769  543705 memory.go:184] no items to output this cycle
I0323 00:04:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 00:04:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:04:33.409803  543705 memory.go:184] no items to output this cycle
I0323 00:04:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 00:04:37.189673  543705 disk_info.go:125] begin check local disk info of client
I0323 00:04:37.192243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:04:37.192250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2440 0xc0003b2480]
E0323 00:04:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:04:43.410611  543705 memory.go:191] Add success.
I0323 00:04:43.409810  543705 cpu.go:282] Add success.
I0323 00:04:43.420352  543705 net.go:648] Add success.
I0323 00:04:43.422980  543705 net.go:770] primary dev: ETH0
I0323 00:04:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:04:43.423010  543705 net.go:698] Add success.
I0323 00:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:04:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:04:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:04:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:04:53.409783  543705 memory.go:184] no items to output this cycle
I0323 00:04:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 00:05:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:05:03.409783  543705 memory.go:184] no items to output this cycle
I0323 00:05:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 00:05:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:05:13.409786  543705 cpu.go:282] Add success.
I0323 00:05:13.409791  543705 memory.go:191] Add success.
W0323 00:05:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:05:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:05:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:05:13.420636  543705 net.go:648] Add success.
I0323 00:05:13.423804  543705 net.go:770] primary dev: ETH0
I0323 00:05:13.423817  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:05:13.423829  543705 net.go:698] Add success.
I0323 00:05:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:05:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:05:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 00:05:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:05:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 00:05:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:05:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:05:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:05:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:05:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:05:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:05:23.409793  543705 memory.go:184] no items to output this cycle
I0323 00:05:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 00:05:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:05:33.409766  543705 memory.go:184] no items to output this cycle
I0323 00:05:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 00:05:37.193682  543705 disk_info.go:125] begin check local disk info of client
I0323 00:05:37.196090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:05:37.196097  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508c80 0xc000508cc0]
E0323 00:05:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:05:43.410841  543705 memory.go:191] Add success.
I0323 00:05:43.409817  543705 cpu.go:282] Add success.
I0323 00:05:43.420550  543705 net.go:648] Add success.
I0323 00:05:43.423161  543705 net.go:770] primary dev: ETH0
I0323 00:05:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:05:43.423186  543705 net.go:698] Add success.
I0323 00:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:05:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:05:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:05:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:05:53.409773  543705 memory.go:184] no items to output this cycle
I0323 00:05:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:06:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:06:03.409779  543705 memory.go:184] no items to output this cycle
I0323 00:06:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 00:06:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:06:13.409808  543705 memory.go:191] Add success.
I0323 00:06:13.409814  543705 cpu.go:282] Add success.
W0323 00:06:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:06:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:06:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:06:13.420161  543705 net.go:648] Add success.
I0323 00:06:13.422879  543705 net.go:770] primary dev: ETH0
I0323 00:06:13.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:06:13.422909  543705 net.go:698] Add success.
I0323 00:06:13.468761  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fe88831a-8e07-4ecf-867a-a0c267db6b56","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:06:13.468795  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:06:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:06:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:06:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 00:06:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:06:14.456529  543705 disk_worker.go:494] system disk:vda1
I0323 00:06:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:06:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:06:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:06:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:06:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:06:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:06:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:06:23.409764  543705 memory.go:184] no items to output this cycle
I0323 00:06:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 00:06:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:06:33.409778  543705 memory.go:184] no items to output this cycle
I0323 00:06:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 00:06:37.197676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:06:37.200172  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:06:37.200178  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0323 00:06:39.925731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:06:39.925739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:06:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:06:43.410528  543705 memory.go:191] Add success.
I0323 00:06:43.409801  543705 cpu.go:282] Add success.
I0323 00:06:43.420313  543705 net.go:648] Add success.
I0323 00:06:43.422977  543705 net.go:770] primary dev: ETH0
I0323 00:06:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:06:43.423003  543705 net.go:698] Add success.
I0323 00:06:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:06:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:06:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:06:53.409769  543705 memory.go:184] no items to output this cycle
I0323 00:06:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 00:07:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:07:03.409780  543705 memory.go:184] no items to output this cycle
I0323 00:07:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 00:07:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:07:13.409793  543705 memory.go:191] Add success.
I0323 00:07:13.409801  543705 cpu.go:282] Add success.
W0323 00:07:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:07:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:07:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:07:13.420296  543705 net.go:648] Add success.
I0323 00:07:13.422846  543705 net.go:770] primary dev: ETH0
I0323 00:07:13.422860  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:07:13.422873  543705 net.go:698] Add success.
I0323 00:07:13.453465  543705 event_worker.go:152] Polling the log file for events...
W0323 00:07:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:07:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 00:07:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:07:14.456122  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:07:14.456131  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:07:14.456138  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:07:14.456440  543705 disk_worker.go:494] system disk:vda1
I0323 00:07:14.456484  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:07:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:07:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:07:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:07:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:07:16.458011  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:07:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:07:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:07:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:07:23.409773  543705 memory.go:184] no items to output this cycle
I0323 00:07:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 00:07:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:07:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 00:07:33.409788  543705 memory.go:184] no items to output this cycle
I0323 00:07:37.201678  543705 disk_info.go:125] begin check local disk info of client
I0323 00:07:37.204123  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:07:37.204130  543705 disk_info.go:196] parse disk info done, disk is : [0xc000273bc0 0xc000273c00]
E0323 00:07:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:07:43.410519  543705 memory.go:191] Add success.
I0323 00:07:43.409838  543705 cpu.go:282] Add success.
I0323 00:07:43.420206  543705 net.go:648] Add success.
I0323 00:07:43.422539  543705 net.go:770] primary dev: ETH0
I0323 00:07:43.422552  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:07:43.422566  543705 net.go:698] Add success.
I0323 00:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:07:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:07:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:07:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:07:53.409760  543705 memory.go:184] no items to output this cycle
I0323 00:07:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 00:08:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:08:03.409777  543705 memory.go:184] no items to output this cycle
I0323 00:08:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 00:08:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:08:13.409793  543705 memory.go:191] Add success.
I0323 00:08:13.409794  543705 cpu.go:282] Add success.
W0323 00:08:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:08:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:08:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:08:13.420049  543705 net.go:648] Add success.
I0323 00:08:13.422440  543705 net.go:770] primary dev: ETH0
I0323 00:08:13.422455  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:08:13.422466  543705 net.go:698] Add success.
I0323 00:08:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:08:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:08:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 00:08:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:08:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 00:08:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:08:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:08:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:08:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:08:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:08:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:08:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:08:23.409767  543705 memory.go:184] no items to output this cycle
I0323 00:08:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 00:08:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:08:33.409793  543705 memory.go:184] no items to output this cycle
I0323 00:08:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 00:08:37.205675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:08:37.208238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:08:37.208244  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508080 0xc0005080c0]
E0323 00:08:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:08:43.410639  543705 memory.go:191] Add success.
I0323 00:08:43.409819  543705 cpu.go:282] Add success.
I0323 00:08:43.420328  543705 net.go:648] Add success.
I0323 00:08:43.422801  543705 net.go:770] primary dev: ETH0
I0323 00:08:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:08:43.422828  543705 net.go:698] Add success.
I0323 00:08:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:08:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:08:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:08:53.409780  543705 memory.go:184] no items to output this cycle
I0323 00:08:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 00:09:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:09:03.409778  543705 memory.go:184] no items to output this cycle
I0323 00:09:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 00:09:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:09:13.409809  543705 memory.go:191] Add success.
I0323 00:09:13.409814  543705 cpu.go:282] Add success.
W0323 00:09:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:09:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:09:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:09:13.420217  543705 net.go:648] Add success.
I0323 00:09:13.423172  543705 net.go:770] primary dev: ETH0
I0323 00:09:13.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:09:13.423200  543705 net.go:698] Add success.
I0323 00:09:13.535066  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6dcce4de-f3c6-4099-9105-61e6bbfb3a85","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:09:13.535099  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:09:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:09:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:09:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0323 00:09:14.455235  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:09:14.456755  543705 disk_worker.go:494] system disk:vda1
I0323 00:09:14.456788  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:09:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:09:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:09:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:09:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:09:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:09:23.409874  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:09:23.409919  543705 cpu.go:275] no items to output this cycle
I0323 00:09:23.409939  543705 memory.go:184] no items to output this cycle
E0323 00:09:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:09:33.409790  543705 memory.go:184] no items to output this cycle
I0323 00:09:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 00:09:37.209687  543705 disk_info.go:125] begin check local disk info of client
I0323 00:09:37.212035  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:09:37.212043  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
I0323 00:09:39.925878  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:09:39.925885  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:09:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:09:43.410636  543705 memory.go:191] Add success.
I0323 00:09:43.409824  543705 cpu.go:282] Add success.
I0323 00:09:43.420345  543705 net.go:648] Add success.
I0323 00:09:43.422994  543705 net.go:770] primary dev: ETH0
I0323 00:09:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:09:43.423020  543705 net.go:698] Add success.
I0323 00:09:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:09:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:09:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:09:53.409786  543705 memory.go:184] no items to output this cycle
I0323 00:09:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 00:10:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:10:03.409793  543705 memory.go:184] no items to output this cycle
I0323 00:10:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 00:10:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:10:13.409802  543705 memory.go:191] Add success.
I0323 00:10:13.409806  543705 cpu.go:282] Add success.
W0323 00:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:10:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:10:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:10:13.420040  543705 net.go:648] Add success.
I0323 00:10:13.422513  543705 net.go:770] primary dev: ETH0
I0323 00:10:13.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:10:13.422538  543705 net.go:698] Add success.
I0323 00:10:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:10:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:10:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 00:10:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:10:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 00:10:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:10:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:10:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:10:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:10:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:10:23.409809  543705 memory.go:184] no items to output this cycle
I0323 00:10:23.409819  543705 cpu.go:275] no items to output this cycle
E0323 00:10:33.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:10:33.409915  543705 memory.go:184] no items to output this cycle
I0323 00:10:33.409918  543705 cpu.go:275] no items to output this cycle
I0323 00:10:37.213678  543705 disk_info.go:125] begin check local disk info of client
I0323 00:10:37.216212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:10:37.216218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0000 0xc0002b0040]
E0323 00:10:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:10:43.410690  543705 memory.go:191] Add success.
I0323 00:10:43.409811  543705 cpu.go:282] Add success.
I0323 00:10:43.420407  543705 net.go:648] Add success.
I0323 00:10:43.423124  543705 net.go:770] primary dev: ETH0
I0323 00:10:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:10:43.423149  543705 net.go:698] Add success.
I0323 00:10:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:10:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:10:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:10:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:10:53.409794  543705 memory.go:184] no items to output this cycle
I0323 00:10:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 00:11:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:11:03.409820  543705 memory.go:184] no items to output this cycle
I0323 00:11:03.409832  543705 cpu.go:275] no items to output this cycle
E0323 00:11:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:11:13.409811  543705 memory.go:191] Add success.
I0323 00:11:13.409836  543705 cpu.go:282] Add success.
W0323 00:11:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:11:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:11:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:11:13.420151  543705 net.go:648] Add success.
I0323 00:11:13.422692  543705 net.go:770] primary dev: ETH0
I0323 00:11:13.422705  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:11:13.422717  543705 net.go:698] Add success.
I0323 00:11:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:11:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:11:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 00:11:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:11:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 00:11:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:11:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:11:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:11:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:11:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:11:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:11:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:11:23.409766  543705 memory.go:184] no items to output this cycle
I0323 00:11:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:11:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:11:33.409804  543705 memory.go:184] no items to output this cycle
I0323 00:11:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 00:11:37.217674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:11:37.220205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:11:37.220212  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253780 0xc0002537c0]
E0323 00:11:43.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:11:43.410890  543705 memory.go:191] Add success.
I0323 00:11:43.409933  543705 cpu.go:282] Add success.
I0323 00:11:43.419740  543705 net.go:648] Add success.
I0323 00:11:43.422598  543705 net.go:770] primary dev: ETH0
I0323 00:11:43.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:11:43.422627  543705 net.go:698] Add success.
I0323 00:11:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:11:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:11:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:11:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:11:53.409779  543705 memory.go:184] no items to output this cycle
I0323 00:11:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:12:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:12:03.409815  543705 memory.go:184] no items to output this cycle
I0323 00:12:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 00:12:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:12:13.409812  543705 memory.go:191] Add success.
I0323 00:12:13.409822  543705 cpu.go:282] Add success.
W0323 00:12:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:12:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:12:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:12:13.420256  543705 net.go:648] Add success.
I0323 00:12:13.423209  543705 net.go:770] primary dev: ETH0
I0323 00:12:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:12:13.423238  543705 net.go:698] Add success.
I0323 00:12:13.463233  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f926be35-6409-432d-b54d-bcb529b535ca","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:12:13.463270  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 00:12:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:12:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0323 00:12:14.455219  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:12:14.455937  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:12:14.455946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:12:14.455952  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:12:14.456793  543705 disk_worker.go:494] system disk:vda1
I0323 00:12:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:12:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:12:15.456803  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:12:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:12:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:12:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:12:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:12:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:12:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:12:23.409781  543705 memory.go:184] no items to output this cycle
I0323 00:12:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 00:12:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:12:33.409778  543705 memory.go:184] no items to output this cycle
I0323 00:12:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 00:12:37.221696  543705 disk_info.go:125] begin check local disk info of client
I0323 00:12:37.224214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:12:37.224219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003822c0 0xc000382300]
I0323 00:12:39.928318  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:12:39.928325  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:12:43.409871  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:12:43.410873  543705 memory.go:191] Add success.
I0323 00:12:43.409979  543705 cpu.go:282] Add success.
I0323 00:12:43.419737  543705 net.go:648] Add success.
I0323 00:12:43.422562  543705 net.go:770] primary dev: ETH0
I0323 00:12:43.422577  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:12:43.422591  543705 net.go:698] Add success.
I0323 00:12:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:12:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:12:53.409773  543705 memory.go:184] no items to output this cycle
I0323 00:12:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:13:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:13:03.409813  543705 memory.go:184] no items to output this cycle
I0323 00:13:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 00:13:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:13:13.409813  543705 memory.go:191] Add success.
I0323 00:13:13.409819  543705 cpu.go:282] Add success.
W0323 00:13:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:13:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:13:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:13:13.420239  543705 net.go:648] Add success.
I0323 00:13:13.424710  543705 net.go:770] primary dev: ETH0
I0323 00:13:13.424721  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:13:13.424733  543705 net.go:698] Add success.
I0323 00:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:13:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:13:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 00:13:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:13:14.456514  543705 disk_worker.go:494] system disk:vda1
I0323 00:13:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:13:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:13:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:13:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:13:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:13:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:13:23.409771  543705 memory.go:184] no items to output this cycle
I0323 00:13:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 00:13:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:13:33.409800  543705 memory.go:184] no items to output this cycle
I0323 00:13:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 00:13:37.225683  543705 disk_info.go:125] begin check local disk info of client
I0323 00:13:37.228616  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:13:37.228625  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052c6c0 0xc00052c700]
E0323 00:13:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:13:43.410690  543705 memory.go:191] Add success.
I0323 00:13:43.409791  543705 cpu.go:282] Add success.
I0323 00:13:43.420408  543705 net.go:648] Add success.
I0323 00:13:43.423411  543705 net.go:770] primary dev: ETH0
I0323 00:13:43.423429  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:13:43.423448  543705 net.go:698] Add success.
I0323 00:13:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:13:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:13:53.409773  543705 memory.go:184] no items to output this cycle
I0323 00:13:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:14:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:14:03.409783  543705 memory.go:184] no items to output this cycle
I0323 00:14:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 00:14:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:14:13.409776  543705 memory.go:191] Add success.
I0323 00:14:13.409798  543705 cpu.go:282] Add success.
W0323 00:14:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:14:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:14:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:14:13.420315  543705 net.go:648] Add success.
I0323 00:14:13.423148  543705 net.go:770] primary dev: ETH0
I0323 00:14:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:14:13.423178  543705 net.go:698] Add success.
I0323 00:14:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:14:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:14:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 00:14:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:14:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 00:14:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:14:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:14:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:14:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:14:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:14:23.409790  543705 memory.go:184] no items to output this cycle
I0323 00:14:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 00:14:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:14:33.409790  543705 memory.go:184] no items to output this cycle
I0323 00:14:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 00:14:37.229669  543705 disk_info.go:125] begin check local disk info of client
I0323 00:14:37.232268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:14:37.232275  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390000 0xc000390040]
E0323 00:14:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:14:43.410609  543705 memory.go:191] Add success.
I0323 00:14:43.409836  543705 cpu.go:282] Add success.
I0323 00:14:43.420292  543705 net.go:648] Add success.
I0323 00:14:43.422928  543705 net.go:770] primary dev: ETH0
I0323 00:14:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:14:43.422953  543705 net.go:698] Add success.
I0323 00:14:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:14:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:14:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:14:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:14:53.409796  543705 memory.go:184] no items to output this cycle
I0323 00:14:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 00:15:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:15:03.409786  543705 cpu.go:275] no items to output this cycle
I0323 00:15:03.409788  543705 memory.go:184] no items to output this cycle
E0323 00:15:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:15:13.409780  543705 memory.go:191] Add success.
W0323 00:15:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 00:15:13.409812  543705 cpu.go:282] Add success.
W0323 00:15:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:15:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:15:13.420105  543705 net.go:648] Add success.
I0323 00:15:13.423164  543705 net.go:770] primary dev: ETH0
I0323 00:15:13.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:15:13.423191  543705 net.go:698] Add success.
I0323 00:15:13.464080  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b2721aa9-2c7c-4874-a5da-184cff39280d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:15:13.464112  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:15:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:15:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:15:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0323 00:15:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:15:14.456776  543705 disk_worker.go:494] system disk:vda1
I0323 00:15:14.456806  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:15:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:15:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:15:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:15:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:15:23.409762  543705 memory.go:184] no items to output this cycle
I0323 00:15:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 00:15:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:15:33.409775  543705 memory.go:184] no items to output this cycle
I0323 00:15:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 00:15:37.233681  543705 disk_info.go:125] begin check local disk info of client
I0323 00:15:37.236022  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:15:37.236028  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fa440 0xc0004fa480]
I0323 00:15:39.929738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:15:39.929745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:15:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:15:43.410622  543705 memory.go:191] Add success.
I0323 00:15:43.409796  543705 cpu.go:282] Add success.
I0323 00:15:43.420355  543705 net.go:648] Add success.
I0323 00:15:43.423148  543705 net.go:770] primary dev: ETH0
I0323 00:15:43.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:15:43.423173  543705 net.go:698] Add success.
I0323 00:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:15:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:15:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:15:53.410426  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:15:53.410445  543705 memory.go:184] no items to output this cycle
I0323 00:15:53.410453  543705 cpu.go:275] no items to output this cycle
E0323 00:16:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:16:03.409769  543705 memory.go:184] no items to output this cycle
I0323 00:16:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 00:16:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:16:13.409793  543705 memory.go:191] Add success.
I0323 00:16:13.409793  543705 cpu.go:282] Add success.
W0323 00:16:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:16:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:16:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:16:13.420218  543705 net.go:648] Add success.
I0323 00:16:13.422892  543705 net.go:770] primary dev: ETH0
I0323 00:16:13.422905  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:16:13.422917  543705 net.go:698] Add success.
I0323 00:16:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:16:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:16:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 00:16:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:16:14.456515  543705 disk_worker.go:494] system disk:vda1
I0323 00:16:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:16:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:16:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:16:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:16:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:16:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:16:23.409775  543705 memory.go:184] no items to output this cycle
I0323 00:16:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 00:16:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:16:33.409907  543705 cpu.go:275] no items to output this cycle
I0323 00:16:33.409971  543705 memory.go:184] no items to output this cycle
I0323 00:16:37.237677  543705 disk_info.go:125] begin check local disk info of client
I0323 00:16:37.240211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:16:37.240218  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396040 0xc000396080]
E0323 00:16:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:16:43.419378  543705 memory.go:191] Add success.
I0323 00:16:43.409810  543705 cpu.go:282] Add success.
I0323 00:16:43.419686  543705 net.go:648] Add success.
I0323 00:16:43.422373  543705 net.go:770] primary dev: ETH0
I0323 00:16:43.422387  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:16:43.422399  543705 net.go:698] Add success.
I0323 00:16:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:16:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:16:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:16:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:16:53.409783  543705 memory.go:184] no items to output this cycle
I0323 00:16:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 00:17:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:17:03.409791  543705 memory.go:184] no items to output this cycle
I0323 00:17:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 00:17:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:17:13.409810  543705 memory.go:191] Add success.
I0323 00:17:13.409817  543705 cpu.go:282] Add success.
W0323 00:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:17:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:17:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:17:13.420120  543705 net.go:648] Add success.
I0323 00:17:13.422825  543705 net.go:770] primary dev: ETH0
I0323 00:17:13.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:17:13.422851  543705 net.go:698] Add success.
I0323 00:17:13.453441  543705 event_worker.go:152] Polling the log file for events...
W0323 00:17:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:17:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 00:17:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:17:14.456760  543705 disk_worker.go:494] system disk:vda1
I0323 00:17:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:17:14.457081  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:17:14.457089  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:17:14.457094  543705 custom_config.go:64] query custom config with name: gpu
E0323 00:17:15.456864  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:17:15.456873  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:17:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:17:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:17:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:17:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:17:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:17:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:17:23.409798  543705 memory.go:184] no items to output this cycle
I0323 00:17:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 00:17:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:17:33.409779  543705 memory.go:184] no items to output this cycle
I0323 00:17:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 00:17:37.241676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:17:37.244167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:17:37.244174  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481d00 0xc000481d40]
E0323 00:17:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:17:43.410656  543705 memory.go:191] Add success.
I0323 00:17:43.409786  543705 cpu.go:282] Add success.
I0323 00:17:43.420346  543705 net.go:648] Add success.
I0323 00:17:43.423151  543705 net.go:770] primary dev: ETH0
I0323 00:17:43.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:17:43.423178  543705 net.go:698] Add success.
I0323 00:17:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:17:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:17:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:17:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:17:53.409772  543705 memory.go:184] no items to output this cycle
I0323 00:17:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 00:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:18:03.409779  543705 memory.go:184] no items to output this cycle
I0323 00:18:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 00:18:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:18:13.409794  543705 cpu.go:282] Add success.
I0323 00:18:13.409799  543705 memory.go:191] Add success.
W0323 00:18:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:18:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:18:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:18:13.420044  543705 net.go:648] Add success.
I0323 00:18:13.422838  543705 net.go:770] primary dev: ETH0
I0323 00:18:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:18:13.422863  543705 net.go:698] Add success.
I0323 00:18:13.574533  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3ba19b7e-e91a-4cc6-91cc-dd81e7494372","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:18:13.574568  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:18:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:18:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:18:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 00:18:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:18:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 00:18:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:18:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:18:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:18:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:18:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:18:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:18:23.409873  543705 cpu.go:275] no items to output this cycle
I0323 00:18:23.409881  543705 memory.go:184] no items to output this cycle
E0323 00:18:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:18:33.409793  543705 memory.go:184] no items to output this cycle
I0323 00:18:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 00:18:37.245676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:18:37.248173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:18:37.248179  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab040 0xc0001ab080]
I0323 00:18:39.932344  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:18:39.932350  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:18:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:18:43.410689  543705 memory.go:191] Add success.
I0323 00:18:43.409814  543705 cpu.go:282] Add success.
I0323 00:18:43.420395  543705 net.go:648] Add success.
I0323 00:18:43.423168  543705 net.go:770] primary dev: ETH0
I0323 00:18:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:18:43.423197  543705 net.go:698] Add success.
I0323 00:18:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:18:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:18:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:18:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:18:53.409798  543705 memory.go:184] no items to output this cycle
I0323 00:18:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 00:19:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:19:03.409772  543705 memory.go:184] no items to output this cycle
I0323 00:19:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 00:19:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:19:13.409810  543705 memory.go:191] Add success.
I0323 00:19:13.409817  543705 cpu.go:282] Add success.
W0323 00:19:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:19:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:19:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:19:13.420235  543705 net.go:648] Add success.
I0323 00:19:13.423196  543705 net.go:770] primary dev: ETH0
I0323 00:19:13.423212  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:19:13.423227  543705 net.go:698] Add success.
I0323 00:19:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:19:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:19:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0323 00:19:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:19:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 00:19:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:19:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:19:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:19:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:19:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:19:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:19:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:19:23.409797  543705 memory.go:184] no items to output this cycle
I0323 00:19:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 00:19:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:19:33.409786  543705 memory.go:184] no items to output this cycle
I0323 00:19:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 00:19:37.249673  543705 disk_info.go:125] begin check local disk info of client
I0323 00:19:37.252234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:19:37.252240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000f16c0 0xc0000f1700]
E0323 00:19:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:19:43.410689  543705 memory.go:191] Add success.
I0323 00:19:43.409797  543705 cpu.go:282] Add success.
I0323 00:19:43.420401  543705 net.go:648] Add success.
I0323 00:19:43.423162  543705 net.go:770] primary dev: ETH0
I0323 00:19:43.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:19:43.423192  543705 net.go:698] Add success.
I0323 00:19:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:19:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:19:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:19:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:19:53.409800  543705 memory.go:184] no items to output this cycle
I0323 00:19:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 00:20:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:20:03.409809  543705 memory.go:184] no items to output this cycle
I0323 00:20:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 00:20:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:20:13.409779  543705 memory.go:191] Add success.
W0323 00:20:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 00:20:13.409805  543705 cpu.go:282] Add success.
W0323 00:20:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:20:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:20:13.420167  543705 net.go:648] Add success.
I0323 00:20:13.423053  543705 net.go:770] primary dev: ETH0
I0323 00:20:13.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:20:13.423079  543705 net.go:698] Add success.
I0323 00:20:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:20:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:20:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 00:20:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:20:14.456612  543705 disk_worker.go:494] system disk:vda1
I0323 00:20:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:20:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:20:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:20:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:20:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:20:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:20:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:20:23.409778  543705 memory.go:184] no items to output this cycle
I0323 00:20:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 00:20:33.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:20:33.409912  543705 cpu.go:275] no items to output this cycle
I0323 00:20:33.409916  543705 memory.go:184] no items to output this cycle
I0323 00:20:37.253675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:20:37.256152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:20:37.256159  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032fec0 0xc00032ff00]
E0323 00:20:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:20:43.410697  543705 memory.go:191] Add success.
I0323 00:20:43.409828  543705 cpu.go:282] Add success.
I0323 00:20:43.420415  543705 net.go:648] Add success.
I0323 00:20:43.423166  543705 net.go:770] primary dev: ETH0
I0323 00:20:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:20:43.423194  543705 net.go:698] Add success.
I0323 00:20:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:20:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:20:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:20:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:20:53.409769  543705 memory.go:184] no items to output this cycle
I0323 00:20:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 00:21:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:21:03.409809  543705 memory.go:184] no items to output this cycle
I0323 00:21:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 00:21:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:21:13.409780  543705 memory.go:191] Add success.
I0323 00:21:13.409798  543705 cpu.go:282] Add success.
W0323 00:21:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:21:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:21:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:21:13.420056  543705 net.go:648] Add success.
I0323 00:21:13.422971  543705 net.go:770] primary dev: ETH0
I0323 00:21:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:21:13.422995  543705 net.go:698] Add success.
I0323 00:21:13.469268  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b7ab0ce-3852-435c-9416-758ff3759ffc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:21:13.469302  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:21:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:21:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:21:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 00:21:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:21:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 00:21:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:21:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:21:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:21:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:21:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:21:23.409792  543705 memory.go:184] no items to output this cycle
I0323 00:21:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 00:21:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:21:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 00:21:33.409787  543705 memory.go:184] no items to output this cycle
I0323 00:21:37.257674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:21:37.260146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:21:37.260152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
I0323 00:21:39.933742  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:21:39.933748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:21:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:21:43.410783  543705 memory.go:191] Add success.
I0323 00:21:43.409814  543705 cpu.go:282] Add success.
I0323 00:21:43.420497  543705 net.go:648] Add success.
I0323 00:21:43.423472  543705 net.go:770] primary dev: ETH0
I0323 00:21:43.423485  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:21:43.423498  543705 net.go:698] Add success.
I0323 00:21:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:21:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:21:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:21:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:21:53.409800  543705 cpu.go:275] no items to output this cycle
I0323 00:21:53.409804  543705 memory.go:184] no items to output this cycle
E0323 00:22:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:22:03.409785  543705 memory.go:184] no items to output this cycle
I0323 00:22:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 00:22:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:22:13.409790  543705 memory.go:191] Add success.
I0323 00:22:13.409805  543705 cpu.go:282] Add success.
W0323 00:22:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:22:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:22:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:22:13.420110  543705 net.go:648] Add success.
I0323 00:22:13.422638  543705 net.go:770] primary dev: ETH0
I0323 00:22:13.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:22:13.422667  543705 net.go:698] Add success.
W0323 00:22:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:22:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 00:22:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:22:14.456925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:22:14.456934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:22:14.456940  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:22:14.457013  543705 disk_worker.go:494] system disk:vda1
I0323 00:22:14.457057  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:22:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:22:15.456796  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:22:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:22:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:22:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:22:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:22:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:22:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:22:23.409799  543705 memory.go:184] no items to output this cycle
I0323 00:22:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 00:22:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:22:33.409793  543705 memory.go:184] no items to output this cycle
I0323 00:22:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 00:22:37.261685  543705 disk_info.go:125] begin check local disk info of client
I0323 00:22:37.264239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:22:37.264247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aacc0 0xc0002aad00]
E0323 00:22:43.409912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:22:43.410803  543705 memory.go:191] Add success.
I0323 00:22:43.410009  543705 cpu.go:282] Add success.
I0323 00:22:43.419749  543705 net.go:648] Add success.
I0323 00:22:43.422485  543705 net.go:770] primary dev: ETH0
I0323 00:22:43.422499  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:22:43.422513  543705 net.go:698] Add success.
I0323 00:22:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:22:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:22:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:22:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:22:53.409775  543705 memory.go:184] no items to output this cycle
I0323 00:22:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 00:23:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:23:03.409818  543705 memory.go:184] no items to output this cycle
I0323 00:23:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 00:23:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:23:13.409778  543705 memory.go:191] Add success.
I0323 00:23:13.409806  543705 cpu.go:282] Add success.
W0323 00:23:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:23:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:23:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:23:13.420088  543705 net.go:648] Add success.
I0323 00:23:13.422869  543705 net.go:770] primary dev: ETH0
I0323 00:23:13.422883  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:23:13.422896  543705 net.go:698] Add success.
I0323 00:23:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:23:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:23:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 00:23:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:23:14.456504  543705 disk_worker.go:494] system disk:vda1
I0323 00:23:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:23:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:23:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:23:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:23:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:23:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:23:23.409774  543705 memory.go:184] no items to output this cycle
I0323 00:23:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 00:23:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:23:33.409769  543705 memory.go:184] no items to output this cycle
I0323 00:23:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 00:23:37.265675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:23:37.268207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:23:37.268213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c2b40 0xc0004c2b80]
E0323 00:23:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:23:43.410565  543705 memory.go:191] Add success.
I0323 00:23:43.409792  543705 cpu.go:282] Add success.
I0323 00:23:43.420366  543705 net.go:648] Add success.
I0323 00:23:43.422937  543705 net.go:770] primary dev: ETH0
I0323 00:23:43.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:23:43.422966  543705 net.go:698] Add success.
I0323 00:23:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:23:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:23:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:23:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:23:53.409798  543705 memory.go:184] no items to output this cycle
I0323 00:23:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 00:24:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:24:03.409788  543705 memory.go:184] no items to output this cycle
I0323 00:24:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 00:24:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:24:13.409790  543705 memory.go:191] Add success.
I0323 00:24:13.409793  543705 cpu.go:282] Add success.
W0323 00:24:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:24:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:24:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:24:13.420139  543705 net.go:648] Add success.
I0323 00:24:13.423136  543705 net.go:770] primary dev: ETH0
I0323 00:24:13.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:24:13.423164  543705 net.go:698] Add success.
I0323 00:24:13.467560  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"09a31ec9-6331-4bb1-a7db-6cb08e4eebcb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:24:13.467594  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:24:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:24:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:24:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 00:24:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:24:14.456617  543705 disk_worker.go:494] system disk:vda1
I0323 00:24:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:24:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:24:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:24:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:24:23.409780  543705 memory.go:184] no items to output this cycle
I0323 00:24:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 00:24:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:24:33.409797  543705 memory.go:184] no items to output this cycle
I0323 00:24:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 00:24:37.269674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:24:37.272195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:24:37.272201  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037d200 0xc00037d240]
I0323 00:24:39.936367  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:24:39.936372  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:24:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:24:43.410774  543705 memory.go:191] Add success.
I0323 00:24:43.409828  543705 cpu.go:282] Add success.
I0323 00:24:43.420563  543705 net.go:648] Add success.
I0323 00:24:43.423551  543705 net.go:770] primary dev: ETH0
I0323 00:24:43.423567  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:24:43.423580  543705 net.go:698] Add success.
I0323 00:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:24:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:24:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:24:53.409778  543705 memory.go:184] no items to output this cycle
I0323 00:24:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 00:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:25:03.409811  543705 memory.go:184] no items to output this cycle
I0323 00:25:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 00:25:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:25:13.409783  543705 memory.go:191] Add success.
I0323 00:25:13.409804  543705 cpu.go:282] Add success.
W0323 00:25:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:25:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:25:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:25:13.420054  543705 net.go:648] Add success.
I0323 00:25:13.423009  543705 net.go:770] primary dev: ETH0
I0323 00:25:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:25:13.423035  543705 net.go:698] Add success.
I0323 00:25:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:25:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:25:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 00:25:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:25:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 00:25:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:25:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:25:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:25:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:25:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:25:23.409793  543705 memory.go:184] no items to output this cycle
I0323 00:25:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 00:25:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:25:33.409793  543705 memory.go:184] no items to output this cycle
I0323 00:25:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 00:25:37.275178  543705 disk_info.go:125] begin check local disk info of client
I0323 00:25:37.277742  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:25:37.277749  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267540 0xc000267580]
E0323 00:25:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:25:43.410690  543705 memory.go:191] Add success.
I0323 00:25:43.409803  543705 cpu.go:282] Add success.
I0323 00:25:43.420383  543705 net.go:648] Add success.
I0323 00:25:43.423139  543705 net.go:770] primary dev: ETH0
I0323 00:25:43.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:25:43.423169  543705 net.go:698] Add success.
I0323 00:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:25:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:25:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:25:53.409782  543705 memory.go:184] no items to output this cycle
I0323 00:25:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 00:26:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:26:03.409826  543705 memory.go:184] no items to output this cycle
I0323 00:26:03.409838  543705 cpu.go:275] no items to output this cycle
E0323 00:26:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:26:13.409813  543705 memory.go:191] Add success.
I0323 00:26:13.409818  543705 cpu.go:282] Add success.
W0323 00:26:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:26:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:26:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:26:13.420137  543705 net.go:648] Add success.
I0323 00:26:13.422638  543705 net.go:770] primary dev: ETH0
I0323 00:26:13.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:26:13.422666  543705 net.go:698] Add success.
I0323 00:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:26:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:26:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0323 00:26:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:26:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 00:26:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:26:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:26:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:26:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:26:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:26:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:26:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:26:23.409772  543705 memory.go:184] no items to output this cycle
I0323 00:26:23.409775  543705 cpu.go:275] no items to output this cycle
E0323 00:26:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:26:33.409776  543705 memory.go:184] no items to output this cycle
I0323 00:26:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 00:26:37.281672  543705 disk_info.go:125] begin check local disk info of client
I0323 00:26:37.284152  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:26:37.284159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c51c0 0xc0000c5200]
E0323 00:26:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:26:43.410587  543705 memory.go:191] Add success.
I0323 00:26:43.409822  543705 cpu.go:282] Add success.
I0323 00:26:43.420359  543705 net.go:648] Add success.
I0323 00:26:43.423042  543705 net.go:770] primary dev: ETH0
I0323 00:26:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:26:43.423068  543705 net.go:698] Add success.
I0323 00:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:26:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:26:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:26:53.409778  543705 memory.go:184] no items to output this cycle
I0323 00:26:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 00:27:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:27:03.409781  543705 memory.go:184] no items to output this cycle
I0323 00:27:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 00:27:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:27:13.409790  543705 memory.go:191] Add success.
I0323 00:27:13.409791  543705 cpu.go:282] Add success.
W0323 00:27:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:27:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:27:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:27:13.420148  543705 net.go:648] Add success.
I0323 00:27:13.422927  543705 net.go:770] primary dev: ETH0
I0323 00:27:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:27:13.422952  543705 net.go:698] Add success.
I0323 00:27:13.429166  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 00:27:13.453337  543705 event_worker.go:152] Polling the log file for events...
I0323 00:27:13.474608  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f12629d5-e6cf-4e2c-ac77-22a77e4319e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:27:13.474641  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 00:27:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:27:14.455141  543705 disk_worker.go:708] disk space is not compliant
W0323 00:27:14.455144  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:27:14.456891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:27:14.456900  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:27:14.456905  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:27:14.456973  543705 disk_worker.go:494] system disk:vda1
I0323 00:27:14.457006  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:27:15.456670  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:27:15.456679  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:27:16.457910  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:27:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:27:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:27:16.457977  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:27:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:27:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:27:23.409771  543705 memory.go:184] no items to output this cycle
I0323 00:27:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 00:27:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:27:33.409798  543705 memory.go:184] no items to output this cycle
I0323 00:27:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 00:27:37.285677  543705 disk_info.go:125] begin check local disk info of client
I0323 00:27:37.288260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:27:37.288268  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375e00 0xc000375e40]
I0323 00:27:39.937731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:27:39.937739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:27:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:27:43.410659  543705 memory.go:191] Add success.
I0323 00:27:43.409803  543705 cpu.go:282] Add success.
I0323 00:27:43.420358  543705 net.go:648] Add success.
I0323 00:27:43.423382  543705 net.go:770] primary dev: ETH0
I0323 00:27:43.423395  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:27:43.423407  543705 net.go:698] Add success.
I0323 00:27:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:27:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:27:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:27:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:27:53.409768  543705 memory.go:184] no items to output this cycle
I0323 00:27:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:28:03.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:28:03.409865  543705 memory.go:184] no items to output this cycle
I0323 00:28:03.409937  543705 cpu.go:275] no items to output this cycle
E0323 00:28:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:28:13.409809  543705 memory.go:191] Add success.
I0323 00:28:13.409820  543705 cpu.go:282] Add success.
W0323 00:28:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:28:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:28:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:28:13.420177  543705 net.go:648] Add success.
I0323 00:28:13.422781  543705 net.go:770] primary dev: ETH0
I0323 00:28:13.422794  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:28:13.422806  543705 net.go:698] Add success.
I0323 00:28:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:28:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:28:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 00:28:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:28:14.456506  543705 disk_worker.go:494] system disk:vda1
I0323 00:28:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:28:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:28:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:28:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:28:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:28:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:28:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:28:23.409770  543705 memory.go:184] no items to output this cycle
I0323 00:28:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 00:28:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:28:33.409775  543705 memory.go:184] no items to output this cycle
I0323 00:28:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 00:28:37.289683  543705 disk_info.go:125] begin check local disk info of client
I0323 00:28:37.292120  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:28:37.292127  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aab00 0xc0001aab40]
E0323 00:28:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:28:43.410820  543705 memory.go:191] Add success.
I0323 00:28:43.409825  543705 cpu.go:282] Add success.
I0323 00:28:43.420529  543705 net.go:648] Add success.
I0323 00:28:43.423213  543705 net.go:770] primary dev: ETH0
I0323 00:28:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:28:43.423245  543705 net.go:698] Add success.
I0323 00:28:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:28:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:28:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:28:53.409798  543705 memory.go:184] no items to output this cycle
I0323 00:28:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 00:29:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:29:03.409816  543705 memory.go:184] no items to output this cycle
I0323 00:29:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 00:29:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:29:13.409796  543705 memory.go:191] Add success.
I0323 00:29:13.409797  543705 cpu.go:282] Add success.
W0323 00:29:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:29:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:29:13.420185  543705 net.go:648] Add success.
I0323 00:29:13.423037  543705 net.go:770] primary dev: ETH0
I0323 00:29:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:29:13.423064  543705 net.go:698] Add success.
I0323 00:29:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:29:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:29:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 00:29:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:29:14.456570  543705 disk_worker.go:494] system disk:vda1
I0323 00:29:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:29:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:29:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:29:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:29:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:29:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:29:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:29:23.409782  543705 memory.go:184] no items to output this cycle
I0323 00:29:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 00:29:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:29:33.409785  543705 memory.go:184] no items to output this cycle
I0323 00:29:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 00:29:37.293674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:29:37.296129  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:29:37.296136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b14c0 0xc0002b1500]
E0323 00:29:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:29:43.410652  543705 memory.go:191] Add success.
I0323 00:29:43.409827  543705 cpu.go:282] Add success.
I0323 00:29:43.420345  543705 net.go:648] Add success.
I0323 00:29:43.423130  543705 net.go:770] primary dev: ETH0
I0323 00:29:43.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:29:43.423157  543705 net.go:698] Add success.
I0323 00:29:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:29:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:29:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:29:53.410231  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:29:53.410248  543705 memory.go:184] no items to output this cycle
I0323 00:29:53.410273  543705 cpu.go:275] no items to output this cycle
E0323 00:30:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:30:03.409817  543705 memory.go:184] no items to output this cycle
I0323 00:30:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 00:30:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:30:13.409788  543705 memory.go:191] Add success.
W0323 00:30:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 00:30:13.409822  543705 cpu.go:282] Add success.
W0323 00:30:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:30:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:30:13.420117  543705 net.go:648] Add success.
I0323 00:30:13.423218  543705 net.go:770] primary dev: ETH0
I0323 00:30:13.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:30:13.423242  543705 net.go:698] Add success.
I0323 00:30:13.467437  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1e0e2875-ef1e-41df-8d79-019fb1aa3aa3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:30:13.467470  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:30:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:30:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:30:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 00:30:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:30:14.456529  543705 disk_worker.go:494] system disk:vda1
I0323 00:30:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:30:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:30:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:30:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:30:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:30:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:30:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:30:23.409775  543705 memory.go:184] no items to output this cycle
I0323 00:30:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 00:30:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:30:33.409787  543705 memory.go:184] no items to output this cycle
I0323 00:30:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 00:30:37.297675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:30:37.300156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:30:37.300162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b540 0xc00007b580]
I0323 00:30:39.940388  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:30:39.940394  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:30:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:30:43.410608  543705 memory.go:191] Add success.
I0323 00:30:43.409810  543705 cpu.go:282] Add success.
I0323 00:30:43.420317  543705 net.go:648] Add success.
I0323 00:30:43.423022  543705 net.go:770] primary dev: ETH0
I0323 00:30:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:30:43.423047  543705 net.go:698] Add success.
I0323 00:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:30:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:30:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:30:53.409791  543705 memory.go:184] no items to output this cycle
I0323 00:30:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 00:31:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:31:03.409787  543705 cpu.go:275] no items to output this cycle
I0323 00:31:03.409797  543705 memory.go:184] no items to output this cycle
E0323 00:31:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:31:13.409793  543705 memory.go:191] Add success.
I0323 00:31:13.409800  543705 cpu.go:282] Add success.
W0323 00:31:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:31:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:31:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:31:13.420119  543705 net.go:648] Add success.
I0323 00:31:13.422890  543705 net.go:770] primary dev: ETH0
I0323 00:31:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:31:13.422915  543705 net.go:698] Add success.
I0323 00:31:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:31:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:31:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 00:31:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:31:14.456490  543705 disk_worker.go:494] system disk:vda1
I0323 00:31:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:31:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:31:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:31:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:31:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:31:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:31:23.409778  543705 memory.go:184] no items to output this cycle
I0323 00:31:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 00:31:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:31:33.409775  543705 memory.go:184] no items to output this cycle
I0323 00:31:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 00:31:37.301676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:31:37.304150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:31:37.304155  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1880 0xc0002b18c0]
E0323 00:31:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:31:43.410638  543705 memory.go:191] Add success.
I0323 00:31:43.409798  543705 cpu.go:282] Add success.
I0323 00:31:43.420322  543705 net.go:648] Add success.
I0323 00:31:43.423167  543705 net.go:770] primary dev: ETH0
I0323 00:31:43.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:31:43.423193  543705 net.go:698] Add success.
I0323 00:31:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:31:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:31:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:31:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:31:53.409767  543705 memory.go:184] no items to output this cycle
I0323 00:31:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 00:32:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:32:03.409788  543705 memory.go:184] no items to output this cycle
I0323 00:32:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 00:32:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:32:13.409817  543705 memory.go:191] Add success.
I0323 00:32:13.409818  543705 cpu.go:282] Add success.
W0323 00:32:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:32:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:32:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:32:13.420259  543705 net.go:648] Add success.
I0323 00:32:13.423021  543705 net.go:770] primary dev: ETH0
I0323 00:32:13.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:32:13.423047  543705 net.go:698] Add success.
W0323 00:32:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:32:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 00:32:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:32:14.455934  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:32:14.455943  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:32:14.455949  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:32:14.456541  543705 disk_worker.go:494] system disk:vda1
I0323 00:32:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:32:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:32:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:32:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:32:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:32:16.457985  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:32:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:32:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:32:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:32:23.409774  543705 memory.go:184] no items to output this cycle
I0323 00:32:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 00:32:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:32:33.409802  543705 memory.go:184] no items to output this cycle
I0323 00:32:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 00:32:37.305678  543705 disk_info.go:125] begin check local disk info of client
I0323 00:32:37.308167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:32:37.308174  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314bc0 0xc000314c00]
E0323 00:32:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:32:43.410700  543705 memory.go:191] Add success.
I0323 00:32:43.409829  543705 cpu.go:282] Add success.
I0323 00:32:43.420387  543705 net.go:648] Add success.
I0323 00:32:43.423361  543705 net.go:770] primary dev: ETH0
I0323 00:32:43.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:32:43.423389  543705 net.go:698] Add success.
I0323 00:32:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:32:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:32:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:32:53.409761  543705 memory.go:184] no items to output this cycle
I0323 00:32:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 00:33:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:33:03.409814  543705 memory.go:184] no items to output this cycle
I0323 00:33:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 00:33:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:33:13.409786  543705 memory.go:191] Add success.
I0323 00:33:13.409802  543705 cpu.go:282] Add success.
W0323 00:33:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:33:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:33:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:33:13.420137  543705 net.go:648] Add success.
I0323 00:33:13.422885  543705 net.go:770] primary dev: ETH0
I0323 00:33:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:33:13.422911  543705 net.go:698] Add success.
I0323 00:33:13.480746  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f944904-7eb2-4dc3-b3b3-99b2471da52a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:33:13.480780  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:33:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:33:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:33:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 00:33:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:33:14.456668  543705 disk_worker.go:494] system disk:vda1
I0323 00:33:14.456697  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:33:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:33:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:33:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:33:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:33:23.409795  543705 memory.go:184] no items to output this cycle
I0323 00:33:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 00:33:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:33:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 00:33:33.409787  543705 memory.go:184] no items to output this cycle
I0323 00:33:37.309674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:33:37.312159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:33:37.312165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab480 0xc0001ab4c0]
I0323 00:33:39.941730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:33:39.941737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:33:43.410660  543705 memory.go:191] Add success.
I0323 00:33:43.409796  543705 cpu.go:282] Add success.
I0323 00:33:43.420353  543705 net.go:648] Add success.
I0323 00:33:43.423308  543705 net.go:770] primary dev: ETH0
I0323 00:33:43.423321  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:33:43.423334  543705 net.go:698] Add success.
I0323 00:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:33:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:33:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:33:53.410516  543705 cpu.go:275] no items to output this cycle
E0323 00:33:53.410587  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:33:53.410602  543705 memory.go:184] no items to output this cycle
E0323 00:34:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:34:03.409773  543705 memory.go:184] no items to output this cycle
I0323 00:34:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 00:34:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:34:13.409797  543705 memory.go:191] Add success.
I0323 00:34:13.409797  543705 cpu.go:282] Add success.
W0323 00:34:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:34:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:34:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:34:13.420147  543705 net.go:648] Add success.
I0323 00:34:13.423070  543705 net.go:770] primary dev: ETH0
I0323 00:34:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:34:13.423094  543705 net.go:698] Add success.
I0323 00:34:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:34:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:34:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 00:34:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:34:14.456621  543705 disk_worker.go:494] system disk:vda1
I0323 00:34:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:34:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:34:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:34:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:34:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:34:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:34:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:34:23.409797  543705 memory.go:184] no items to output this cycle
I0323 00:34:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 00:34:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:34:33.409780  543705 memory.go:184] no items to output this cycle
I0323 00:34:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 00:34:37.313675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:34:37.316171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:34:37.316178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000eab00 0xc0000eab40]
E0323 00:34:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:34:43.410636  543705 memory.go:191] Add success.
I0323 00:34:43.409804  543705 cpu.go:282] Add success.
I0323 00:34:43.420476  543705 net.go:648] Add success.
I0323 00:34:43.423057  543705 net.go:770] primary dev: ETH0
I0323 00:34:43.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:34:43.423087  543705 net.go:698] Add success.
I0323 00:34:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:34:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:34:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:34:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:34:53.409775  543705 memory.go:184] no items to output this cycle
I0323 00:34:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 00:35:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:35:03.409813  543705 memory.go:184] no items to output this cycle
I0323 00:35:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 00:35:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:35:13.409780  543705 memory.go:191] Add success.
W0323 00:35:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 00:35:13.409811  543705 cpu.go:282] Add success.
W0323 00:35:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:35:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:35:13.420138  543705 net.go:648] Add success.
I0323 00:35:13.423053  543705 net.go:770] primary dev: ETH0
I0323 00:35:13.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:35:13.423080  543705 net.go:698] Add success.
I0323 00:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:35:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:35:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 00:35:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:35:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 00:35:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:35:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:35:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:35:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:35:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:35:23.409779  543705 memory.go:184] no items to output this cycle
I0323 00:35:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 00:35:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:35:33.409773  543705 memory.go:184] no items to output this cycle
I0323 00:35:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 00:35:37.317675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:35:37.320166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:35:37.320172  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521880 0xc0005218c0]
E0323 00:35:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:35:43.410704  543705 memory.go:191] Add success.
I0323 00:35:43.409815  543705 cpu.go:282] Add success.
I0323 00:35:43.420660  543705 net.go:648] Add success.
I0323 00:35:43.423558  543705 net.go:770] primary dev: ETH0
I0323 00:35:43.423571  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:35:43.423582  543705 net.go:698] Add success.
I0323 00:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:35:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:35:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:35:53.409795  543705 memory.go:184] no items to output this cycle
I0323 00:35:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 00:36:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:36:03.409789  543705 memory.go:184] no items to output this cycle
I0323 00:36:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 00:36:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:36:13.409811  543705 memory.go:191] Add success.
I0323 00:36:13.409819  543705 cpu.go:282] Add success.
W0323 00:36:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:36:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:36:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:36:13.420169  543705 net.go:648] Add success.
I0323 00:36:13.423158  543705 net.go:770] primary dev: ETH0
I0323 00:36:13.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:36:13.423184  543705 net.go:698] Add success.
I0323 00:36:13.505814  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f9dadcb1-1526-4a43-bed5-3f1bc4b9e586","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:36:13.505850  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:36:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:36:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:36:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 00:36:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:36:14.456632  543705 disk_worker.go:494] system disk:vda1
I0323 00:36:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:36:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:36:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:36:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:36:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:36:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:36:23.409766  543705 memory.go:184] no items to output this cycle
I0323 00:36:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 00:36:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:36:33.409804  543705 memory.go:184] no items to output this cycle
I0323 00:36:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 00:36:37.321672  543705 disk_info.go:125] begin check local disk info of client
I0323 00:36:37.324220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:36:37.324226  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3140 0xc0003d3180]
I0323 00:36:39.944400  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:36:39.944406  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:36:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:36:43.410564  543705 memory.go:191] Add success.
I0323 00:36:43.409808  543705 cpu.go:282] Add success.
I0323 00:36:43.420240  543705 net.go:648] Add success.
I0323 00:36:43.422719  543705 net.go:770] primary dev: ETH0
I0323 00:36:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:36:43.422745  543705 net.go:698] Add success.
I0323 00:36:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:36:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:36:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:36:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:36:53.409794  543705 memory.go:184] no items to output this cycle
I0323 00:36:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 00:37:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:37:03.409780  543705 memory.go:184] no items to output this cycle
I0323 00:37:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 00:37:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:37:13.409804  543705 memory.go:191] Add success.
I0323 00:37:13.409809  543705 cpu.go:282] Add success.
W0323 00:37:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:37:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:37:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:37:13.420132  543705 net.go:648] Add success.
I0323 00:37:13.422917  543705 net.go:770] primary dev: ETH0
I0323 00:37:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:37:13.422947  543705 net.go:698] Add success.
I0323 00:37:13.453496  543705 event_worker.go:152] Polling the log file for events...
W0323 00:37:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:37:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 00:37:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:37:14.456919  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:37:14.456928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:37:14.456934  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:37:14.457007  543705 disk_worker.go:494] system disk:vda1
I0323 00:37:14.457047  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:37:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:37:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:37:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:37:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:37:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:37:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:37:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:37:23.410266  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:37:23.410276  543705 cpu.go:275] no items to output this cycle
I0323 00:37:23.410284  543705 memory.go:184] no items to output this cycle
E0323 00:37:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:37:33.409788  543705 memory.go:184] no items to output this cycle
I0323 00:37:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 00:37:37.327010  543705 disk_info.go:125] begin check local disk info of client
I0323 00:37:37.329625  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:37:37.329631  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1bc0 0xc0004a1c00]
E0323 00:37:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:37:43.410655  543705 memory.go:191] Add success.
I0323 00:37:43.409794  543705 cpu.go:282] Add success.
I0323 00:37:43.420383  543705 net.go:648] Add success.
I0323 00:37:43.422892  543705 net.go:770] primary dev: ETH0
I0323 00:37:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:37:43.422920  543705 net.go:698] Add success.
I0323 00:37:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:37:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:37:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:37:53.410492  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:37:53.410500  543705 cpu.go:275] no items to output this cycle
I0323 00:37:53.410514  543705 memory.go:184] no items to output this cycle
E0323 00:38:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:38:03.409789  543705 cpu.go:275] no items to output this cycle
I0323 00:38:03.409795  543705 memory.go:184] no items to output this cycle
E0323 00:38:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:38:13.409785  543705 memory.go:191] Add success.
I0323 00:38:13.409786  543705 cpu.go:282] Add success.
W0323 00:38:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:38:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:38:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:38:13.420152  543705 net.go:648] Add success.
I0323 00:38:13.422755  543705 net.go:770] primary dev: ETH0
I0323 00:38:13.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:38:13.422784  543705 net.go:698] Add success.
I0323 00:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:38:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:38:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 00:38:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:38:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 00:38:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:38:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:38:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:38:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:38:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:38:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:38:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:38:23.409774  543705 cpu.go:275] no items to output this cycle
I0323 00:38:23.409783  543705 memory.go:184] no items to output this cycle
E0323 00:38:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:38:33.409804  543705 memory.go:184] no items to output this cycle
I0323 00:38:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 00:38:37.330962  543705 disk_info.go:125] begin check local disk info of client
I0323 00:38:37.333501  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:38:37.333507  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003929c0 0xc000392a00]
E0323 00:38:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:38:43.410600  543705 memory.go:191] Add success.
I0323 00:38:43.409802  543705 cpu.go:282] Add success.
I0323 00:38:43.420297  543705 net.go:648] Add success.
I0323 00:38:43.422910  543705 net.go:770] primary dev: ETH0
I0323 00:38:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:38:43.422936  543705 net.go:698] Add success.
I0323 00:38:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:38:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:38:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:38:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:38:53.409776  543705 memory.go:184] no items to output this cycle
I0323 00:38:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 00:39:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:39:03.409797  543705 memory.go:184] no items to output this cycle
I0323 00:39:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 00:39:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:39:13.409787  543705 memory.go:191] Add success.
W0323 00:39:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:39:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:39:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:39:13.409839  543705 cpu.go:282] Add success.
I0323 00:39:13.420713  543705 net.go:648] Add success.
I0323 00:39:13.423246  543705 net.go:770] primary dev: ETH0
I0323 00:39:13.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:39:13.423272  543705 net.go:698] Add success.
I0323 00:39:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:39:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:39:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 00:39:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:39:14.456562  543705 disk_worker.go:494] system disk:vda1
I0323 00:39:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:39:14.739875  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12b053bf-ffe4-4aa4-ab81-7eb7bc66f479","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:39:14.739910  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:39:15.455504  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:39:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:39:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:39:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:39:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:39:23.409799  543705 memory.go:184] no items to output this cycle
I0323 00:39:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 00:39:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:39:33.409774  543705 memory.go:184] no items to output this cycle
I0323 00:39:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 00:39:37.333676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:39:37.336211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:39:37.336217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f4380 0xc0004f43c0]
I0323 00:39:39.945741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:39:39.945748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:39:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:39:43.410645  543705 memory.go:191] Add success.
I0323 00:39:43.409817  543705 cpu.go:282] Add success.
I0323 00:39:43.420338  543705 net.go:648] Add success.
I0323 00:39:43.422930  543705 net.go:770] primary dev: ETH0
I0323 00:39:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:39:43.422958  543705 net.go:698] Add success.
I0323 00:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:39:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:39:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:39:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:39:53.409774  543705 memory.go:184] no items to output this cycle
I0323 00:39:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 00:40:03.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:40:03.410016  543705 cpu.go:275] no items to output this cycle
I0323 00:40:03.410032  543705 memory.go:184] no items to output this cycle
E0323 00:40:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:40:13.409793  543705 memory.go:191] Add success.
I0323 00:40:13.409797  543705 cpu.go:282] Add success.
W0323 00:40:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:40:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:40:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:40:13.420127  543705 net.go:648] Add success.
I0323 00:40:13.422896  543705 net.go:770] primary dev: ETH0
I0323 00:40:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:40:13.422924  543705 net.go:698] Add success.
I0323 00:40:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:40:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:40:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 00:40:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:40:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 00:40:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:40:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:40:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:40:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:40:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:40:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:40:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:40:23.409824  543705 memory.go:184] no items to output this cycle
I0323 00:40:23.409882  543705 cpu.go:275] no items to output this cycle
E0323 00:40:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:40:33.409779  543705 memory.go:184] no items to output this cycle
I0323 00:40:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 00:40:37.337674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:40:37.340195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:40:37.340202  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f5f40 0xc000262000]
E0323 00:40:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:40:43.410676  543705 memory.go:191] Add success.
I0323 00:40:43.409820  543705 cpu.go:282] Add success.
I0323 00:40:43.420394  543705 net.go:648] Add success.
I0323 00:40:43.423004  543705 net.go:770] primary dev: ETH0
I0323 00:40:43.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:40:43.423031  543705 net.go:698] Add success.
I0323 00:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:40:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:40:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:40:53.409777  543705 memory.go:184] no items to output this cycle
I0323 00:40:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 00:41:03.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:41:03.409918  543705 memory.go:184] no items to output this cycle
I0323 00:41:03.409942  543705 cpu.go:275] no items to output this cycle
E0323 00:41:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:41:13.409808  543705 memory.go:191] Add success.
I0323 00:41:13.409825  543705 cpu.go:282] Add success.
W0323 00:41:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:41:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:41:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:41:13.420117  543705 net.go:648] Add success.
I0323 00:41:13.422933  543705 net.go:770] primary dev: ETH0
I0323 00:41:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:41:13.422962  543705 net.go:698] Add success.
I0323 00:41:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:41:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:41:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 00:41:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:41:14.456493  543705 disk_worker.go:494] system disk:vda1
I0323 00:41:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:41:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:41:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:41:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:41:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:41:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:41:23.409766  543705 memory.go:184] no items to output this cycle
I0323 00:41:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 00:41:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:41:33.409786  543705 memory.go:184] no items to output this cycle
I0323 00:41:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 00:41:37.341681  543705 disk_info.go:125] begin check local disk info of client
I0323 00:41:37.344203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:41:37.344209  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad40 0xc00007ad80]
E0323 00:41:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:41:43.410716  543705 memory.go:191] Add success.
I0323 00:41:43.409811  543705 cpu.go:282] Add success.
I0323 00:41:43.420420  543705 net.go:648] Add success.
I0323 00:41:43.423070  543705 net.go:770] primary dev: ETH0
I0323 00:41:43.423083  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:41:43.423096  543705 net.go:698] Add success.
I0323 00:41:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:41:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:41:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:41:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:41:53.409781  543705 memory.go:184] no items to output this cycle
I0323 00:41:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 00:42:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:42:03.409920  543705 cpu.go:275] no items to output this cycle
I0323 00:42:03.409922  543705 memory.go:184] no items to output this cycle
E0323 00:42:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:42:13.409793  543705 memory.go:191] Add success.
I0323 00:42:13.409818  543705 cpu.go:282] Add success.
W0323 00:42:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:42:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:42:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:42:13.420129  543705 net.go:648] Add success.
I0323 00:42:13.423066  543705 net.go:770] primary dev: ETH0
I0323 00:42:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:42:13.423095  543705 net.go:698] Add success.
I0323 00:42:13.514507  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"42cbf877-8c27-4f97-9a51-2a0a49adae63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:42:13.514547  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 00:42:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:42:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 00:42:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:42:14.457000  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:42:14.457009  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:42:14.457015  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:42:14.457037  543705 disk_worker.go:494] system disk:vda1
I0323 00:42:14.457076  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:42:15.456548  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:42:15.456557  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:42:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:42:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:42:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:42:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:42:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:42:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:42:23.409793  543705 memory.go:184] no items to output this cycle
I0323 00:42:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 00:42:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:42:33.409777  543705 memory.go:184] no items to output this cycle
I0323 00:42:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 00:42:37.345674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:42:37.348218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:42:37.348225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1800 0xc0002b1840]
I0323 00:42:39.948416  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:42:39.948422  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:42:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:42:43.410739  543705 memory.go:191] Add success.
I0323 00:42:43.409805  543705 cpu.go:282] Add success.
I0323 00:42:43.420502  543705 net.go:648] Add success.
I0323 00:42:43.423242  543705 net.go:770] primary dev: ETH0
I0323 00:42:43.423255  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:42:43.423267  543705 net.go:698] Add success.
I0323 00:42:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:42:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:42:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:42:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:42:53.409807  543705 memory.go:184] no items to output this cycle
I0323 00:42:53.409819  543705 cpu.go:275] no items to output this cycle
E0323 00:43:03.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:43:03.409892  543705 memory.go:184] no items to output this cycle
I0323 00:43:03.410158  543705 cpu.go:275] no items to output this cycle
E0323 00:43:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:43:13.409789  543705 memory.go:191] Add success.
I0323 00:43:13.409814  543705 cpu.go:282] Add success.
W0323 00:43:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:43:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:43:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:43:13.420121  543705 net.go:648] Add success.
I0323 00:43:13.422757  543705 net.go:770] primary dev: ETH0
I0323 00:43:13.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:43:13.422781  543705 net.go:698] Add success.
I0323 00:43:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:43:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:43:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 00:43:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:43:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 00:43:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:43:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:43:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:43:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:43:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:43:23.409789  543705 memory.go:184] no items to output this cycle
I0323 00:43:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 00:43:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:43:33.409785  543705 memory.go:184] no items to output this cycle
I0323 00:43:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 00:43:37.349671  543705 disk_info.go:125] begin check local disk info of client
I0323 00:43:37.352190  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:43:37.352196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bbe40 0xc0003bbe80]
E0323 00:43:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:43:43.410625  543705 memory.go:191] Add success.
I0323 00:43:43.409811  543705 cpu.go:282] Add success.
I0323 00:43:43.420306  543705 net.go:648] Add success.
I0323 00:43:43.423083  543705 net.go:770] primary dev: ETH0
I0323 00:43:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:43:43.423110  543705 net.go:698] Add success.
I0323 00:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:43:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:43:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:43:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:43:53.409793  543705 memory.go:184] no items to output this cycle
I0323 00:43:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:44:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:44:03.409797  543705 memory.go:184] no items to output this cycle
I0323 00:44:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 00:44:13.410691  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:44:13.410720  543705 memory.go:191] Add success.
I0323 00:44:13.410724  543705 cpu.go:282] Add success.
W0323 00:44:13.410747  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:44:13.410758  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:44:13.410761  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:44:13.420018  543705 net.go:648] Add success.
I0323 00:44:13.423011  543705 net.go:770] primary dev: ETH0
I0323 00:44:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:44:13.423039  543705 net.go:698] Add success.
I0323 00:44:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:44:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:44:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 00:44:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:44:14.456851  543705 disk_worker.go:494] system disk:vda1
I0323 00:44:14.456896  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:44:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:44:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:44:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:44:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:44:23.409788  543705 memory.go:184] no items to output this cycle
I0323 00:44:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 00:44:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:44:33.409806  543705 memory.go:184] no items to output this cycle
I0323 00:44:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 00:44:37.353675  543705 disk_info.go:125] begin check local disk info of client
I0323 00:44:37.356118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:44:37.356125  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504440 0xc000504480]
E0323 00:44:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:44:43.410689  543705 memory.go:191] Add success.
I0323 00:44:43.409813  543705 cpu.go:282] Add success.
I0323 00:44:43.420440  543705 net.go:648] Add success.
I0323 00:44:43.423247  543705 net.go:770] primary dev: ETH0
I0323 00:44:43.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:44:43.423273  543705 net.go:698] Add success.
I0323 00:44:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:44:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:44:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:44:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:44:53.409802  543705 memory.go:184] no items to output this cycle
I0323 00:44:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 00:45:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:45:03.409790  543705 memory.go:184] no items to output this cycle
I0323 00:45:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 00:45:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:45:13.409795  543705 memory.go:191] Add success.
I0323 00:45:13.409798  543705 cpu.go:282] Add success.
W0323 00:45:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:45:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:45:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:45:13.420137  543705 net.go:648] Add success.
I0323 00:45:13.422697  543705 net.go:770] primary dev: ETH0
I0323 00:45:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:45:13.422723  543705 net.go:698] Add success.
I0323 00:45:13.468743  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"88a0edd9-f7a3-4056-8717-815cf3d95bb5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:45:13.468776  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:45:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:45:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:45:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 00:45:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:45:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 00:45:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:45:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:45:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:45:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:45:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:45:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:45:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:45:23.409767  543705 memory.go:184] no items to output this cycle
I0323 00:45:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 00:45:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:45:33.409802  543705 memory.go:184] no items to output this cycle
I0323 00:45:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 00:45:37.357672  543705 disk_info.go:125] begin check local disk info of client
I0323 00:45:37.360279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:45:37.360284  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0b00 0xc0002b0b40]
I0323 00:45:39.949730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:45:39.949737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:45:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:45:43.410539  543705 memory.go:191] Add success.
I0323 00:45:43.409803  543705 cpu.go:282] Add success.
I0323 00:45:43.420234  543705 net.go:648] Add success.
I0323 00:45:43.422828  543705 net.go:770] primary dev: ETH0
I0323 00:45:43.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:45:43.422853  543705 net.go:698] Add success.
I0323 00:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:45:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:45:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:45:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:45:53.409769  543705 memory.go:184] no items to output this cycle
I0323 00:45:53.409786  543705 cpu.go:275] no items to output this cycle
I0323 00:46:03.409969  543705 cpu.go:275] no items to output this cycle
E0323 00:46:03.410066  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:46:03.410084  543705 memory.go:184] no items to output this cycle
E0323 00:46:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:46:13.409814  543705 memory.go:191] Add success.
I0323 00:46:13.409821  543705 cpu.go:282] Add success.
W0323 00:46:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:46:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:46:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:46:13.420145  543705 net.go:648] Add success.
I0323 00:46:13.422822  543705 net.go:770] primary dev: ETH0
I0323 00:46:13.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:46:13.422848  543705 net.go:698] Add success.
I0323 00:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:46:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:46:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 00:46:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:46:14.456582  543705 disk_worker.go:494] system disk:vda1
I0323 00:46:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:46:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:46:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:46:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:46:23.409798  543705 memory.go:184] no items to output this cycle
I0323 00:46:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 00:46:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:46:33.409772  543705 memory.go:184] no items to output this cycle
I0323 00:46:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 00:46:37.361673  543705 disk_info.go:125] begin check local disk info of client
I0323 00:46:37.364195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:46:37.364201  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328640 0xc000328680]
E0323 00:46:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:46:43.410659  543705 memory.go:191] Add success.
I0323 00:46:43.409815  543705 cpu.go:282] Add success.
I0323 00:46:43.420352  543705 net.go:648] Add success.
I0323 00:46:43.422987  543705 net.go:770] primary dev: ETH0
I0323 00:46:43.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:46:43.423016  543705 net.go:698] Add success.
I0323 00:46:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:46:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:46:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:46:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:46:53.409789  543705 memory.go:184] no items to output this cycle
I0323 00:46:53.409802  543705 cpu.go:275] no items to output this cycle
I0323 00:47:03.409902  543705 cpu.go:275] no items to output this cycle
E0323 00:47:03.409915  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:47:03.409949  543705 memory.go:184] no items to output this cycle
E0323 00:47:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:47:13.409799  543705 memory.go:191] Add success.
I0323 00:47:13.409800  543705 cpu.go:282] Add success.
W0323 00:47:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:47:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:47:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:47:13.420577  543705 net.go:648] Add success.
I0323 00:47:13.423156  543705 net.go:770] primary dev: ETH0
I0323 00:47:13.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:47:13.423182  543705 net.go:698] Add success.
I0323 00:47:13.453715  543705 event_worker.go:152] Polling the log file for events...
W0323 00:47:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:47:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 00:47:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0323 00:47:14.456905  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:47:14.456915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:47:14.456920  543705 custom_config.go:64] query custom config with name: gpu
I0323 00:47:14.456992  543705 disk_worker.go:494] system disk:vda1
I0323 00:47:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:47:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:47:15.456853  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:47:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:47:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:47:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:47:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:47:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:47:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:47:23.409786  543705 memory.go:184] no items to output this cycle
I0323 00:47:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 00:47:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:47:33.409777  543705 memory.go:184] no items to output this cycle
I0323 00:47:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 00:47:37.365673  543705 disk_info.go:125] begin check local disk info of client
I0323 00:47:37.368235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:47:37.368241  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b500 0xc00007b540]
E0323 00:47:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:47:43.410624  543705 memory.go:191] Add success.
I0323 00:47:43.409813  543705 cpu.go:282] Add success.
I0323 00:47:43.420326  543705 net.go:648] Add success.
I0323 00:47:43.422779  543705 net.go:770] primary dev: ETH0
I0323 00:47:43.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:47:43.422805  543705 net.go:698] Add success.
I0323 00:47:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:47:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:47:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:47:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:47:53.410384  543705 memory.go:184] no items to output this cycle
I0323 00:47:53.410405  543705 cpu.go:275] no items to output this cycle
E0323 00:48:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:48:03.409799  543705 memory.go:184] no items to output this cycle
I0323 00:48:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 00:48:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:48:13.409800  543705 memory.go:191] Add success.
I0323 00:48:13.409821  543705 cpu.go:282] Add success.
W0323 00:48:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:48:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:48:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:48:13.420164  543705 net.go:648] Add success.
I0323 00:48:13.423169  543705 net.go:770] primary dev: ETH0
I0323 00:48:13.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:48:13.423194  543705 net.go:698] Add success.
I0323 00:48:13.486599  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"660afa09-1a9f-43e1-a62a-b2137a309d1c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:48:13.486631  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:48:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:48:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 00:48:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:48:14.456502  543705 disk_worker.go:494] system disk:vda1
I0323 00:48:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:48:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:48:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:48:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:48:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:48:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:48:23.409816  543705 memory.go:184] no items to output this cycle
I0323 00:48:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 00:48:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:48:33.409782  543705 memory.go:184] no items to output this cycle
I0323 00:48:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 00:48:37.369674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:48:37.372198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:48:37.372204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0500 0xc0002b0540]
I0323 00:48:39.952436  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:48:39.952442  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:48:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:48:43.410690  543705 memory.go:191] Add success.
I0323 00:48:43.409814  543705 cpu.go:282] Add success.
I0323 00:48:43.420452  543705 net.go:648] Add success.
I0323 00:48:43.423197  543705 net.go:770] primary dev: ETH0
I0323 00:48:43.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:48:43.423224  543705 net.go:698] Add success.
I0323 00:48:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:48:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:48:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:48:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:48:53.409792  543705 memory.go:184] no items to output this cycle
I0323 00:48:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 00:49:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:49:03.409821  543705 memory.go:184] no items to output this cycle
I0323 00:49:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 00:49:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:49:13.409789  543705 memory.go:191] Add success.
I0323 00:49:13.409799  543705 cpu.go:282] Add success.
W0323 00:49:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:49:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:49:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:49:13.420132  543705 net.go:648] Add success.
I0323 00:49:13.422931  543705 net.go:770] primary dev: ETH0
I0323 00:49:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:49:13.422959  543705 net.go:698] Add success.
I0323 00:49:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:49:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:49:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 00:49:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:49:14.456493  543705 disk_worker.go:494] system disk:vda1
I0323 00:49:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:49:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:49:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:49:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:49:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:49:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:49:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:49:23.409766  543705 memory.go:184] no items to output this cycle
I0323 00:49:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 00:49:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:49:33.409771  543705 memory.go:184] no items to output this cycle
I0323 00:49:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 00:49:37.373674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:49:37.376201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:49:37.376206  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504640 0xc000504680]
E0323 00:49:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:49:43.410714  543705 memory.go:191] Add success.
I0323 00:49:43.409821  543705 cpu.go:282] Add success.
I0323 00:49:43.420405  543705 net.go:648] Add success.
I0323 00:49:43.423220  543705 net.go:770] primary dev: ETH0
I0323 00:49:43.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:49:43.423251  543705 net.go:698] Add success.
I0323 00:49:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:49:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:49:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:49:53.410412  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:49:53.410433  543705 memory.go:184] no items to output this cycle
I0323 00:49:53.410444  543705 cpu.go:275] no items to output this cycle
E0323 00:50:03.409915  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:50:03.409934  543705 memory.go:184] no items to output this cycle
I0323 00:50:03.409979  543705 cpu.go:275] no items to output this cycle
E0323 00:50:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:50:13.409796  543705 memory.go:191] Add success.
I0323 00:50:13.409799  543705 cpu.go:282] Add success.
W0323 00:50:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:50:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:50:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:50:13.420214  543705 net.go:648] Add success.
I0323 00:50:13.422830  543705 net.go:770] primary dev: ETH0
I0323 00:50:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:50:13.422855  543705 net.go:698] Add success.
I0323 00:50:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:50:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:50:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 00:50:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:50:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 00:50:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:50:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:50:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:50:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:50:16.472539  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:50:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:50:23.409767  543705 memory.go:184] no items to output this cycle
I0323 00:50:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 00:50:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:50:33.409800  543705 memory.go:184] no items to output this cycle
I0323 00:50:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 00:50:37.377672  543705 disk_info.go:125] begin check local disk info of client
I0323 00:50:37.380261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:50:37.380267  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371700 0xc000371740]
E0323 00:50:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:50:43.410579  543705 memory.go:191] Add success.
I0323 00:50:43.409822  543705 cpu.go:282] Add success.
I0323 00:50:43.420341  543705 net.go:648] Add success.
I0323 00:50:43.423454  543705 net.go:770] primary dev: ETH0
I0323 00:50:43.423467  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:50:43.423480  543705 net.go:698] Add success.
I0323 00:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:50:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:50:53.409780  543705 memory.go:184] no items to output this cycle
I0323 00:50:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 00:51:03.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:51:03.409889  543705 cpu.go:275] no items to output this cycle
I0323 00:51:03.409899  543705 memory.go:184] no items to output this cycle
E0323 00:51:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:51:13.409783  543705 memory.go:191] Add success.
W0323 00:51:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:51:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:51:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:51:13.409823  543705 cpu.go:282] Add success.
I0323 00:51:13.420180  543705 net.go:648] Add success.
I0323 00:51:13.422784  543705 net.go:770] primary dev: ETH0
I0323 00:51:13.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:51:13.422808  543705 net.go:698] Add success.
I0323 00:51:13.468253  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"afd23c3d-7fb7-43ad-ac7a-a61a457619c5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:51:13.468286  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:51:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:51:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 00:51:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:51:14.456537  543705 disk_worker.go:494] system disk:vda1
I0323 00:51:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:51:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:51:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:51:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:51:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:51:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:51:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:51:23.409766  543705 memory.go:184] no items to output this cycle
I0323 00:51:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 00:51:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:51:33.409778  543705 memory.go:184] no items to output this cycle
I0323 00:51:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 00:51:37.381676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:51:37.384315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:51:37.384322  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abcc0 0xc0001abd00]
I0323 00:51:39.953728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:51:39.953734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:51:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:51:43.410572  543705 memory.go:191] Add success.
I0323 00:51:43.409806  543705 cpu.go:282] Add success.
I0323 00:51:43.420269  543705 net.go:648] Add success.
I0323 00:51:43.423032  543705 net.go:770] primary dev: ETH0
I0323 00:51:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:51:43.423056  543705 net.go:698] Add success.
I0323 00:51:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:51:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:51:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:51:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:51:53.409777  543705 memory.go:184] no items to output this cycle
I0323 00:51:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 00:52:03.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:52:03.409902  543705 memory.go:184] no items to output this cycle
I0323 00:52:03.409954  543705 cpu.go:275] no items to output this cycle
E0323 00:52:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:52:13.409813  543705 memory.go:191] Add success.
I0323 00:52:13.409823  543705 cpu.go:282] Add success.
W0323 00:52:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:52:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:52:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:52:13.420240  543705 net.go:648] Add success.
I0323 00:52:13.422881  543705 net.go:770] primary dev: ETH0
I0323 00:52:13.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:52:13.422905  543705 net.go:698] Add success.
W0323 00:52:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:52:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 00:52:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:52:14.456845  543705 disk_worker.go:494] system disk:vda1
I0323 00:52:14.456886  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:52:14.457194  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:52:14.457202  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:52:14.457207  543705 custom_config.go:64] query custom config with name: gpu
E0323 00:52:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:52:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:52:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:52:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:52:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:52:16.458030  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:52:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:52:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:52:23.409761  543705 memory.go:184] no items to output this cycle
I0323 00:52:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 00:52:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:52:33.409785  543705 memory.go:184] no items to output this cycle
I0323 00:52:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 00:52:37.385670  543705 disk_info.go:125] begin check local disk info of client
I0323 00:52:37.388209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:52:37.388217  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468bc0 0xc000468c00]
E0323 00:52:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:52:43.411014  543705 memory.go:191] Add success.
I0323 00:52:43.409827  543705 cpu.go:282] Add success.
I0323 00:52:43.419717  543705 net.go:648] Add success.
I0323 00:52:43.422330  543705 net.go:770] primary dev: ETH0
I0323 00:52:43.422344  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:52:43.422358  543705 net.go:698] Add success.
I0323 00:52:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:52:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:52:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:52:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:52:53.409763  543705 memory.go:184] no items to output this cycle
I0323 00:52:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 00:53:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:53:03.409919  543705 memory.go:184] no items to output this cycle
I0323 00:53:03.409942  543705 cpu.go:275] no items to output this cycle
E0323 00:53:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:53:13.409801  543705 memory.go:191] Add success.
I0323 00:53:13.409801  543705 cpu.go:282] Add success.
W0323 00:53:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:53:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:53:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:53:13.420199  543705 net.go:648] Add success.
I0323 00:53:13.422666  543705 net.go:770] primary dev: ETH0
I0323 00:53:13.422680  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:53:13.422694  543705 net.go:698] Add success.
I0323 00:53:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:53:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:53:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 00:53:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:53:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 00:53:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:53:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:53:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:53:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:53:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:53:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:53:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:53:23.409794  543705 memory.go:184] no items to output this cycle
I0323 00:53:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 00:53:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:53:33.409768  543705 memory.go:184] no items to output this cycle
I0323 00:53:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 00:53:37.392064  543705 disk_info.go:125] begin check local disk info of client
I0323 00:53:37.394601  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:53:37.394608  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad00 0xc0001aad40]
E0323 00:53:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:53:43.410633  543705 memory.go:191] Add success.
I0323 00:53:43.409805  543705 cpu.go:282] Add success.
I0323 00:53:43.420325  543705 net.go:648] Add success.
I0323 00:53:43.422823  543705 net.go:770] primary dev: ETH0
I0323 00:53:43.422836  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:53:43.422848  543705 net.go:698] Add success.
I0323 00:53:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:53:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:53:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:53:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:53:53.409797  543705 memory.go:184] no items to output this cycle
I0323 00:53:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 00:54:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:54:03.409777  543705 memory.go:184] no items to output this cycle
I0323 00:54:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 00:54:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:54:13.409808  543705 memory.go:191] Add success.
I0323 00:54:13.409809  543705 cpu.go:282] Add success.
W0323 00:54:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:54:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:54:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:54:13.420319  543705 net.go:648] Add success.
I0323 00:54:13.423287  543705 net.go:770] primary dev: ETH0
I0323 00:54:13.423301  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:54:13.423313  543705 net.go:698] Add success.
I0323 00:54:13.462910  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bf52792a-ff8e-46b3-87b2-a48780050f31","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:54:13.462944  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 00:54:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:54:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:54:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 00:54:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:54:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 00:54:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:54:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:54:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:54:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:54:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:54:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:54:23.409768  543705 memory.go:184] no items to output this cycle
I0323 00:54:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 00:54:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:54:33.409778  543705 memory.go:184] no items to output this cycle
I0323 00:54:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 00:54:37.397674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:54:37.400192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:54:37.400198  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504ac0 0xc000504b00]
I0323 00:54:39.953861  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:54:39.953867  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:54:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:54:43.410665  543705 memory.go:191] Add success.
I0323 00:54:43.409802  543705 cpu.go:282] Add success.
I0323 00:54:43.420385  543705 net.go:648] Add success.
I0323 00:54:43.422975  543705 net.go:770] primary dev: ETH0
I0323 00:54:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:54:43.423002  543705 net.go:698] Add success.
I0323 00:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:54:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:54:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:54:53.409773  543705 memory.go:184] no items to output this cycle
I0323 00:54:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 00:55:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:55:03.409818  543705 memory.go:184] no items to output this cycle
I0323 00:55:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 00:55:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:55:13.409804  543705 memory.go:191] Add success.
I0323 00:55:13.409807  543705 cpu.go:282] Add success.
W0323 00:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:55:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:55:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:55:13.420360  543705 net.go:648] Add success.
I0323 00:55:13.422764  543705 net.go:770] primary dev: ETH0
I0323 00:55:13.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:55:13.422790  543705 net.go:698] Add success.
I0323 00:55:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:55:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:55:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 00:55:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:55:14.456517  543705 disk_worker.go:494] system disk:vda1
I0323 00:55:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:55:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:55:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:55:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:55:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:55:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:55:23.409796  543705 memory.go:184] no items to output this cycle
I0323 00:55:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 00:55:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:55:33.409767  543705 memory.go:184] no items to output this cycle
I0323 00:55:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 00:55:37.401674  543705 disk_info.go:125] begin check local disk info of client
I0323 00:55:37.404399  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:55:37.404405  543705 disk_info.go:196] parse disk info done, disk is : [0xc000365a40 0xc000365a80]
E0323 00:55:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:55:43.410718  543705 memory.go:191] Add success.
I0323 00:55:43.409826  543705 cpu.go:282] Add success.
I0323 00:55:43.420426  543705 net.go:648] Add success.
I0323 00:55:43.423021  543705 net.go:770] primary dev: ETH0
I0323 00:55:43.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:55:43.423046  543705 net.go:698] Add success.
I0323 00:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:55:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:55:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:55:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:55:53.409781  543705 memory.go:184] no items to output this cycle
I0323 00:55:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 00:56:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:56:03.409791  543705 memory.go:184] no items to output this cycle
I0323 00:56:03.409860  543705 cpu.go:275] no items to output this cycle
E0323 00:56:13.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:56:13.409908  543705 memory.go:191] Add success.
W0323 00:56:13.409936  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:56:13.409952  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:56:13.409955  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:56:13.410049  543705 cpu.go:282] Add success.
I0323 00:56:13.419737  543705 net.go:648] Add success.
I0323 00:56:13.422451  543705 net.go:770] primary dev: ETH0
I0323 00:56:13.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:56:13.422478  543705 net.go:698] Add success.
I0323 00:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:56:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:56:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 00:56:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:56:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 00:56:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:56:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:56:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:56:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:56:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:56:23.409764  543705 memory.go:184] no items to output this cycle
I0323 00:56:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:56:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:56:33.409769  543705 memory.go:184] no items to output this cycle
I0323 00:56:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 00:56:37.405683  543705 disk_info.go:125] begin check local disk info of client
I0323 00:56:37.408158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:56:37.408165  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0323 00:56:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:56:43.410611  543705 memory.go:191] Add success.
I0323 00:56:43.409828  543705 cpu.go:282] Add success.
I0323 00:56:43.420325  543705 net.go:648] Add success.
I0323 00:56:43.422955  543705 net.go:770] primary dev: ETH0
I0323 00:56:43.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:56:43.422982  543705 net.go:698] Add success.
I0323 00:56:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:56:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:56:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:56:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:56:53.409777  543705 memory.go:184] no items to output this cycle
I0323 00:56:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 00:57:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:57:03.409825  543705 cpu.go:275] no items to output this cycle
I0323 00:57:03.409831  543705 memory.go:184] no items to output this cycle
E0323 00:57:13.409971  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:57:13.410006  543705 cpu.go:282] Add success.
I0323 00:57:13.410008  543705 memory.go:191] Add success.
W0323 00:57:13.410044  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:57:13.410067  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:57:13.410072  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:57:13.419717  543705 net.go:648] Add success.
I0323 00:57:13.428582  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 00:57:13.428654  543705 net.go:770] primary dev: ETH0
I0323 00:57:13.428666  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:57:13.428677  543705 net.go:698] Add success.
I0323 00:57:13.453219  543705 event_worker.go:152] Polling the log file for events...
I0323 00:57:13.468274  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ff7e9028-1c57-49f8-ae1a-020a944e2bde","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 00:57:13.468317  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 00:57:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:57:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 00:57:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:57:14.456794  543705 disk_worker.go:494] system disk:vda1
I0323 00:57:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 00:57:14.457091  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 00:57:14.457099  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 00:57:14.457104  543705 custom_config.go:64] query custom config with name: gpu
E0323 00:57:15.456791  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 00:57:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:57:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 00:57:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 00:57:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:57:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:57:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:57:23.410248  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:57:23.410265  543705 memory.go:184] no items to output this cycle
I0323 00:57:23.410277  543705 cpu.go:275] no items to output this cycle
E0323 00:57:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:57:33.409770  543705 memory.go:184] no items to output this cycle
I0323 00:57:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 00:57:37.409676  543705 disk_info.go:125] begin check local disk info of client
I0323 00:57:37.412166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:57:37.412173  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf40 0xc00007a000]
I0323 00:57:39.954004  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 00:57:39.954011  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 00:57:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:57:43.410631  543705 memory.go:191] Add success.
I0323 00:57:43.409804  543705 cpu.go:282] Add success.
I0323 00:57:43.420336  543705 net.go:648] Add success.
I0323 00:57:43.422970  543705 net.go:770] primary dev: ETH0
I0323 00:57:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:57:43.422999  543705 net.go:698] Add success.
I0323 00:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:57:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:57:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:57:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:57:53.409795  543705 memory.go:184] no items to output this cycle
I0323 00:57:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 00:58:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:58:03.409814  543705 memory.go:184] no items to output this cycle
I0323 00:58:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 00:58:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:58:13.409830  543705 memory.go:191] Add success.
I0323 00:58:13.409841  543705 cpu.go:282] Add success.
W0323 00:58:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:58:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:58:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:58:13.420372  543705 net.go:648] Add success.
I0323 00:58:13.423096  543705 net.go:770] primary dev: ETH0
I0323 00:58:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:58:13.423121  543705 net.go:698] Add success.
I0323 00:58:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:58:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:58:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 00:58:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:58:14.456519  543705 disk_worker.go:494] system disk:vda1
I0323 00:58:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:58:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:58:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:58:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:58:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:58:23.409796  543705 memory.go:184] no items to output this cycle
I0323 00:58:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 00:58:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:58:33.409784  543705 memory.go:184] no items to output this cycle
I0323 00:58:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 00:58:37.412794  543705 disk_info.go:125] begin check local disk info of client
I0323 00:58:37.415307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:58:37.415313  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b18c0 0xc0002b1900]
E0323 00:58:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:58:43.410726  543705 memory.go:191] Add success.
I0323 00:58:43.409841  543705 cpu.go:282] Add success.
I0323 00:58:43.420498  543705 net.go:648] Add success.
I0323 00:58:43.423067  543705 net.go:770] primary dev: ETH0
I0323 00:58:43.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:58:43.423092  543705 net.go:698] Add success.
I0323 00:58:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:58:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:58:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:58:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:58:53.409766  543705 memory.go:184] no items to output this cycle
I0323 00:58:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 00:59:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:59:03.409782  543705 memory.go:184] no items to output this cycle
I0323 00:59:03.409835  543705 cpu.go:275] no items to output this cycle
E0323 00:59:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:59:13.409818  543705 memory.go:191] Add success.
I0323 00:59:13.409822  543705 cpu.go:282] Add success.
W0323 00:59:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 00:59:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 00:59:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 00:59:13.419737  543705 net.go:648] Add success.
I0323 00:59:13.422751  543705 net.go:770] primary dev: ETH0
I0323 00:59:13.422764  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:59:13.422776  543705 net.go:698] Add success.
I0323 00:59:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 00:59:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 00:59:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 00:59:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 00:59:14.456556  543705 disk_worker.go:494] system disk:vda1
I0323 00:59:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 00:59:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 00:59:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:59:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:59:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 00:59:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 00:59:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:59:23.409761  543705 memory.go:184] no items to output this cycle
I0323 00:59:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 00:59:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:59:33.409763  543705 memory.go:184] no items to output this cycle
I0323 00:59:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 00:59:37.415799  543705 disk_info.go:125] begin check local disk info of client
I0323 00:59:37.418336  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 00:59:37.418342  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b14c0 0xc0002b1500]
E0323 00:59:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:59:43.410724  543705 memory.go:191] Add success.
I0323 00:59:43.409824  543705 cpu.go:282] Add success.
I0323 00:59:43.420423  543705 net.go:648] Add success.
I0323 00:59:43.423026  543705 net.go:770] primary dev: ETH0
I0323 00:59:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0323 00:59:43.423056  543705 net.go:698] Add success.
I0323 00:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 00:59:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 00:59:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 00:59:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 00:59:53.409787  543705 memory.go:184] no items to output this cycle
I0323 00:59:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:00:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:00:03.409802  543705 cpu.go:275] no items to output this cycle
I0323 01:00:03.409810  543705 memory.go:184] no items to output this cycle
E0323 01:00:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:00:13.409821  543705 memory.go:191] Add success.
I0323 01:00:13.409824  543705 cpu.go:282] Add success.
W0323 01:00:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:00:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:00:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:00:13.420185  543705 net.go:648] Add success.
I0323 01:00:13.422993  543705 net.go:770] primary dev: ETH0
I0323 01:00:13.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:00:13.423176  543705 net.go:698] Add success.
I0323 01:00:13.507270  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a0dc6be4-3428-44ea-a3ab-3ebe2026ab30","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:00:13.507301  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:00:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:00:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:00:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 01:00:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:00:14.456634  543705 disk_worker.go:494] system disk:vda1
I0323 01:00:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:00:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:00:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:00:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:00:23.409794  543705 memory.go:184] no items to output this cycle
I0323 01:00:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 01:00:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:00:33.409778  543705 cpu.go:275] no items to output this cycle
I0323 01:00:33.409785  543705 memory.go:184] no items to output this cycle
I0323 01:00:37.418797  543705 disk_info.go:125] begin check local disk info of client
I0323 01:00:37.421276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:00:37.421281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abf00 0xc0001abf40]
I0323 01:00:39.956466  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:00:39.956472  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:00:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:00:43.410709  543705 memory.go:191] Add success.
I0323 01:00:43.409807  543705 cpu.go:282] Add success.
I0323 01:00:43.420364  543705 net.go:648] Add success.
I0323 01:00:43.422919  543705 net.go:770] primary dev: ETH0
I0323 01:00:43.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:00:43.422946  543705 net.go:698] Add success.
I0323 01:00:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:00:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:00:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:00:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:00:53.409783  543705 memory.go:184] no items to output this cycle
I0323 01:00:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 01:01:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:01:03.409789  543705 cpu.go:275] no items to output this cycle
I0323 01:01:03.409804  543705 memory.go:184] no items to output this cycle
E0323 01:01:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:01:13.409831  543705 memory.go:191] Add success.
I0323 01:01:13.409836  543705 cpu.go:282] Add success.
W0323 01:01:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:01:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:01:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:01:13.420121  543705 net.go:648] Add success.
I0323 01:01:13.422771  543705 net.go:770] primary dev: ETH0
I0323 01:01:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:01:13.422968  543705 net.go:698] Add success.
I0323 01:01:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:01:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:01:14.455149  543705 disk_worker.go:708] disk space is not compliant
W0323 01:01:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:01:14.456537  543705 disk_worker.go:494] system disk:vda1
I0323 01:01:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:01:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:01:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:01:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:01:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:01:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:01:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:01:23.409792  543705 memory.go:184] no items to output this cycle
I0323 01:01:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 01:01:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:01:33.409765  543705 memory.go:184] no items to output this cycle
I0323 01:01:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 01:01:37.421799  543705 disk_info.go:125] begin check local disk info of client
I0323 01:01:37.424367  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:01:37.424373  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
E0323 01:01:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:01:43.410678  543705 memory.go:191] Add success.
I0323 01:01:43.409790  543705 cpu.go:282] Add success.
I0323 01:01:43.420396  543705 net.go:648] Add success.
I0323 01:01:43.422911  543705 net.go:770] primary dev: ETH0
I0323 01:01:43.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:01:43.422936  543705 net.go:698] Add success.
I0323 01:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:01:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:01:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:01:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:01:53.409775  543705 memory.go:184] no items to output this cycle
I0323 01:01:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:02:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:02:03.409772  543705 memory.go:184] no items to output this cycle
I0323 01:02:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 01:02:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:02:13.409786  543705 memory.go:191] Add success.
I0323 01:02:13.409805  543705 cpu.go:282] Add success.
W0323 01:02:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:02:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:02:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:02:13.420194  543705 net.go:648] Add success.
I0323 01:02:13.423280  543705 net.go:770] primary dev: ETH0
I0323 01:02:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:02:13.423315  543705 net.go:698] Add success.
W0323 01:02:14.455636  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:02:14.455651  543705 disk_worker.go:708] disk space is not compliant
W0323 01:02:14.455656  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:02:14.456211  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:02:14.456218  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:02:14.456234  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:02:14.458043  543705 disk_worker.go:494] system disk:vda1
I0323 01:02:14.458079  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:02:15.456836  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:02:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:02:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:02:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:02:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:02:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:02:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:02:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:02:23.409808  543705 memory.go:184] no items to output this cycle
I0323 01:02:23.409839  543705 cpu.go:275] no items to output this cycle
E0323 01:02:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:02:33.409807  543705 memory.go:184] no items to output this cycle
I0323 01:02:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 01:02:37.424822  543705 disk_info.go:125] begin check local disk info of client
I0323 01:02:37.427464  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:02:37.427471  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504400 0xc000504440]
E0323 01:02:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:02:43.410770  543705 memory.go:191] Add success.
I0323 01:02:43.409821  543705 cpu.go:282] Add success.
I0323 01:02:43.420453  543705 net.go:648] Add success.
I0323 01:02:43.423268  543705 net.go:770] primary dev: ETH0
I0323 01:02:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:02:43.423310  543705 net.go:698] Add success.
I0323 01:02:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:02:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:02:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:02:53.409810  543705 memory.go:184] no items to output this cycle
I0323 01:02:53.409819  543705 cpu.go:275] no items to output this cycle
E0323 01:03:03.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:03:03.409833  543705 memory.go:184] no items to output this cycle
I0323 01:03:03.409839  543705 cpu.go:275] no items to output this cycle
E0323 01:03:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:03:13.409820  543705 memory.go:191] Add success.
I0323 01:03:13.409831  543705 cpu.go:282] Add success.
W0323 01:03:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:03:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:03:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:03:13.420178  543705 net.go:648] Add success.
I0323 01:03:13.423202  543705 net.go:770] primary dev: ETH0
I0323 01:03:13.423217  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:03:13.423231  543705 net.go:698] Add success.
I0323 01:03:13.462859  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3a8be8cf-ad11-41df-be2a-6488c7b8aca6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:03:13.462891  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:03:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:03:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 01:03:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:03:14.457216  543705 disk_worker.go:494] system disk:vda1
I0323 01:03:14.457253  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:03:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:03:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:03:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:03:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:03:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:03:23.409764  543705 memory.go:184] no items to output this cycle
I0323 01:03:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 01:03:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:03:33.409799  543705 memory.go:184] no items to output this cycle
I0323 01:03:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 01:03:37.427827  543705 disk_info.go:125] begin check local disk info of client
I0323 01:03:37.430347  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:03:37.430354  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0080 0xc0002b00c0]
I0323 01:03:39.957725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:03:39.957732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:03:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:03:43.410547  543705 memory.go:191] Add success.
I0323 01:03:43.409819  543705 cpu.go:282] Add success.
I0323 01:03:43.420229  543705 net.go:648] Add success.
I0323 01:03:43.422930  543705 net.go:770] primary dev: ETH0
I0323 01:03:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:03:43.422957  543705 net.go:698] Add success.
I0323 01:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:03:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:03:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:03:53.409764  543705 memory.go:184] no items to output this cycle
I0323 01:03:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 01:04:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:04:03.409784  543705 cpu.go:275] no items to output this cycle
I0323 01:04:03.409789  543705 memory.go:184] no items to output this cycle
E0323 01:04:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:04:13.409806  543705 memory.go:191] Add success.
I0323 01:04:13.409807  543705 cpu.go:282] Add success.
W0323 01:04:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:04:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:04:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:04:13.420147  543705 net.go:648] Add success.
I0323 01:04:13.423125  543705 net.go:770] primary dev: ETH0
I0323 01:04:13.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:04:13.423150  543705 net.go:698] Add success.
I0323 01:04:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:04:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:04:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 01:04:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:04:14.457007  543705 disk_worker.go:494] system disk:vda1
I0323 01:04:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:04:15.456021  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:04:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:04:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:04:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:04:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:04:23.409802  543705 memory.go:184] no items to output this cycle
I0323 01:04:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 01:04:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:04:33.409801  543705 memory.go:184] no items to output this cycle
I0323 01:04:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 01:04:37.430848  543705 disk_info.go:125] begin check local disk info of client
I0323 01:04:37.433413  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:04:37.433419  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb00 0xc00007bb40]
E0323 01:04:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:04:43.410701  543705 memory.go:191] Add success.
I0323 01:04:43.409819  543705 cpu.go:282] Add success.
I0323 01:04:43.420416  543705 net.go:648] Add success.
I0323 01:04:43.423261  543705 net.go:770] primary dev: ETH0
I0323 01:04:43.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:04:43.423288  543705 net.go:698] Add success.
I0323 01:04:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:04:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:04:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:04:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:04:53.409812  543705 memory.go:184] no items to output this cycle
I0323 01:04:53.409823  543705 cpu.go:275] no items to output this cycle
E0323 01:05:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:05:03.409790  543705 memory.go:184] no items to output this cycle
I0323 01:05:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:05:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:05:13.409804  543705 memory.go:191] Add success.
I0323 01:05:13.409818  543705 cpu.go:282] Add success.
W0323 01:05:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:05:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:05:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:05:13.420121  543705 net.go:648] Add success.
I0323 01:05:13.422742  543705 net.go:770] primary dev: ETH0
I0323 01:05:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:05:13.422772  543705 net.go:698] Add success.
I0323 01:05:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:05:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:05:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 01:05:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:05:14.457102  543705 disk_worker.go:494] system disk:vda1
I0323 01:05:14.457133  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:05:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:05:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:05:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:05:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:05:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:05:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:05:23.409787  543705 cpu.go:275] no items to output this cycle
I0323 01:05:23.409796  543705 memory.go:184] no items to output this cycle
E0323 01:05:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:05:33.409782  543705 memory.go:184] no items to output this cycle
I0323 01:05:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 01:05:37.433849  543705 disk_info.go:125] begin check local disk info of client
I0323 01:05:37.436346  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:05:37.436353  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b06c0 0xc0002b0700]
E0323 01:05:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:05:43.410672  543705 memory.go:191] Add success.
I0323 01:05:43.409828  543705 cpu.go:282] Add success.
I0323 01:05:43.420356  543705 net.go:648] Add success.
I0323 01:05:43.422820  543705 net.go:770] primary dev: ETH0
I0323 01:05:43.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:05:43.422846  543705 net.go:698] Add success.
I0323 01:05:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:05:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:05:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:05:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:05:53.409800  543705 memory.go:184] no items to output this cycle
I0323 01:05:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:06:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:06:03.409784  543705 memory.go:184] no items to output this cycle
I0323 01:06:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 01:06:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:06:13.409810  543705 memory.go:191] Add success.
I0323 01:06:13.409811  543705 cpu.go:282] Add success.
W0323 01:06:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:06:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:06:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:06:13.420201  543705 net.go:648] Add success.
I0323 01:06:13.422971  543705 net.go:770] primary dev: ETH0
I0323 01:06:13.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:06:13.423001  543705 net.go:698] Add success.
I0323 01:06:13.779150  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7c78dd15-3d36-41f5-bcca-81bb6b7936b0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:06:13.779195  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:06:14.454692  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:06:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:06:14.455112  543705 disk_worker.go:708] disk space is not compliant
W0323 01:06:14.455116  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:06:14.456810  543705 disk_worker.go:494] system disk:vda1
I0323 01:06:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:06:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:06:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:06:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:06:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:06:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:06:23.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:06:23.410256  543705 memory.go:184] no items to output this cycle
I0323 01:06:23.410272  543705 cpu.go:275] no items to output this cycle
E0323 01:06:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:06:33.409763  543705 memory.go:184] no items to output this cycle
I0323 01:06:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 01:06:37.436878  543705 disk_info.go:125] begin check local disk info of client
I0323 01:06:37.439454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:06:37.439460  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aacc0 0xc0001aad00]
I0323 01:06:39.960480  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:06:39.960486  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:06:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:06:43.410592  543705 memory.go:191] Add success.
I0323 01:06:43.409825  543705 cpu.go:282] Add success.
I0323 01:06:43.420400  543705 net.go:648] Add success.
I0323 01:06:43.422903  543705 net.go:770] primary dev: ETH0
I0323 01:06:43.422915  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:06:43.422929  543705 net.go:698] Add success.
I0323 01:06:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:06:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:06:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:06:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:06:53.409777  543705 cpu.go:275] no items to output this cycle
I0323 01:06:53.409780  543705 memory.go:184] no items to output this cycle
E0323 01:07:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:07:03.409769  543705 memory.go:184] no items to output this cycle
I0323 01:07:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 01:07:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:07:13.409832  543705 memory.go:191] Add success.
I0323 01:07:13.409840  543705 cpu.go:282] Add success.
W0323 01:07:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:07:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:07:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:07:13.420189  543705 net.go:648] Add success.
I0323 01:07:13.422975  543705 net.go:770] primary dev: ETH0
I0323 01:07:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:07:13.423004  543705 net.go:698] Add success.
I0323 01:07:13.453544  543705 event_worker.go:152] Polling the log file for events...
W0323 01:07:14.455516  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:07:14.455612  543705 disk_worker.go:708] disk space is not compliant
W0323 01:07:14.455616  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:07:14.456326  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:07:14.456350  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:07:14.456357  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:07:14.457220  543705 disk_worker.go:494] system disk:vda1
I0323 01:07:14.457266  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:07:15.456779  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:07:15.456789  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:07:16.457893  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:07:16.457895  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:07:16.457949  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:07:16.457969  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:07:16.472295  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:07:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:07:23.409772  543705 memory.go:184] no items to output this cycle
I0323 01:07:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 01:07:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:07:33.409798  543705 memory.go:184] no items to output this cycle
I0323 01:07:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 01:07:37.439888  543705 disk_info.go:125] begin check local disk info of client
I0323 01:07:37.442431  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:07:37.442437  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c00 0xc0000c4c40]
E0323 01:07:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:07:43.410617  543705 memory.go:191] Add success.
I0323 01:07:43.409796  543705 cpu.go:282] Add success.
I0323 01:07:43.420320  543705 net.go:648] Add success.
I0323 01:07:43.422868  543705 net.go:770] primary dev: ETH0
I0323 01:07:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:07:43.422895  543705 net.go:698] Add success.
I0323 01:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:07:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:07:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:07:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:07:53.409764  543705 memory.go:184] no items to output this cycle
I0323 01:07:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 01:08:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:08:03.409800  543705 memory.go:184] no items to output this cycle
I0323 01:08:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 01:08:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:08:13.409829  543705 memory.go:191] Add success.
I0323 01:08:13.409832  543705 cpu.go:282] Add success.
W0323 01:08:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:08:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:08:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:08:13.420186  543705 net.go:648] Add success.
I0323 01:08:13.422947  543705 net.go:770] primary dev: ETH0
I0323 01:08:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:08:13.422978  543705 net.go:698] Add success.
I0323 01:08:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:08:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:08:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 01:08:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:08:14.456837  543705 disk_worker.go:494] system disk:vda1
I0323 01:08:14.456867  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:08:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:08:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:08:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:08:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:08:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:08:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:08:23.409765  543705 memory.go:184] no items to output this cycle
I0323 01:08:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:08:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:08:33.409772  543705 memory.go:184] no items to output this cycle
I0323 01:08:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 01:08:37.442917  543705 disk_info.go:125] begin check local disk info of client
I0323 01:08:37.445351  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:08:37.445359  543705 disk_info.go:196] parse disk info done, disk is : [0xc000293400 0xc000293440]
E0323 01:08:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:08:43.410729  543705 memory.go:191] Add success.
I0323 01:08:43.409836  543705 cpu.go:282] Add success.
I0323 01:08:43.420450  543705 net.go:648] Add success.
I0323 01:08:43.423160  543705 net.go:770] primary dev: ETH0
I0323 01:08:43.423176  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:08:43.423192  543705 net.go:698] Add success.
I0323 01:08:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:08:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:08:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:08:53.409761  543705 memory.go:184] no items to output this cycle
I0323 01:08:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 01:09:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:09:03.409775  543705 memory.go:184] no items to output this cycle
I0323 01:09:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 01:09:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:09:13.409820  543705 memory.go:191] Add success.
I0323 01:09:13.409827  543705 cpu.go:282] Add success.
W0323 01:09:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:09:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:09:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:09:13.420151  543705 net.go:648] Add success.
I0323 01:09:13.423013  543705 net.go:770] primary dev: ETH0
I0323 01:09:13.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:09:13.423042  543705 net.go:698] Add success.
I0323 01:09:13.469708  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db8442f8-16bd-4eb0-9db6-e8c4d6d5c165","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:09:13.469740  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:09:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:09:14.455292  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:09:14.455382  543705 disk_worker.go:708] disk space is not compliant
W0323 01:09:14.455387  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:09:14.457046  543705 disk_worker.go:494] system disk:vda1
I0323 01:09:14.457089  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:09:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:09:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:09:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:09:23.409772  543705 memory.go:184] no items to output this cycle
I0323 01:09:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 01:09:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:09:33.409772  543705 memory.go:184] no items to output this cycle
I0323 01:09:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 01:09:37.445924  543705 disk_info.go:125] begin check local disk info of client
I0323 01:09:37.448439  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:09:37.448445  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a80 0xc0000c5ac0]
I0323 01:09:39.961728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:09:39.961734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:09:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:09:43.410651  543705 memory.go:191] Add success.
I0323 01:09:43.409807  543705 cpu.go:282] Add success.
I0323 01:09:43.420384  543705 net.go:648] Add success.
I0323 01:09:43.423284  543705 net.go:770] primary dev: ETH0
I0323 01:09:43.423300  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:09:43.423314  543705 net.go:698] Add success.
I0323 01:09:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:09:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:09:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:09:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:09:53.409799  543705 memory.go:184] no items to output this cycle
I0323 01:09:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 01:10:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:10:03.409777  543705 memory.go:184] no items to output this cycle
I0323 01:10:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 01:10:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:10:13.409816  543705 memory.go:191] Add success.
I0323 01:10:13.409824  543705 cpu.go:282] Add success.
W0323 01:10:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:10:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:10:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:10:13.420127  543705 net.go:648] Add success.
I0323 01:10:13.422676  543705 net.go:770] primary dev: ETH0
I0323 01:10:13.422690  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:10:13.422703  543705 net.go:698] Add success.
I0323 01:10:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:10:14.455356  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:10:14.455371  543705 disk_worker.go:708] disk space is not compliant
W0323 01:10:14.455375  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:10:14.456948  543705 disk_worker.go:494] system disk:vda1
I0323 01:10:14.456987  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:10:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:10:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:10:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:10:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:10:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:10:23.409800  543705 memory.go:184] no items to output this cycle
I0323 01:10:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 01:10:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:10:33.409773  543705 memory.go:184] no items to output this cycle
I0323 01:10:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 01:10:37.448930  543705 disk_info.go:125] begin check local disk info of client
I0323 01:10:37.451442  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:10:37.451448  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1840 0xc0002b1880]
E0323 01:10:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:10:43.410745  543705 memory.go:191] Add success.
I0323 01:10:43.409812  543705 cpu.go:282] Add success.
I0323 01:10:43.420451  543705 net.go:648] Add success.
I0323 01:10:43.423267  543705 net.go:770] primary dev: ETH0
I0323 01:10:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:10:43.423297  543705 net.go:698] Add success.
I0323 01:10:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:10:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:10:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:10:53.409776  543705 memory.go:184] no items to output this cycle
I0323 01:10:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 01:11:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:11:03.409781  543705 memory.go:184] no items to output this cycle
I0323 01:11:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 01:11:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:11:13.409819  543705 memory.go:191] Add success.
I0323 01:11:13.409823  543705 cpu.go:282] Add success.
W0323 01:11:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:11:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:11:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:11:13.420128  543705 net.go:648] Add success.
I0323 01:11:13.422517  543705 net.go:770] primary dev: ETH0
I0323 01:11:13.422530  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:11:13.422542  543705 net.go:698] Add success.
I0323 01:11:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:11:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:11:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 01:11:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:11:14.456796  543705 disk_worker.go:494] system disk:vda1
I0323 01:11:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:11:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:11:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:11:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:11:23.409775  543705 memory.go:184] no items to output this cycle
I0323 01:11:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 01:11:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:11:33.409798  543705 memory.go:184] no items to output this cycle
I0323 01:11:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 01:11:37.451948  543705 disk_info.go:125] begin check local disk info of client
I0323 01:11:37.454546  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:11:37.454551  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1e80 0xc0002b1ec0]
E0323 01:11:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:11:43.410613  543705 memory.go:191] Add success.
I0323 01:11:43.409817  543705 cpu.go:282] Add success.
I0323 01:11:43.420300  543705 net.go:648] Add success.
I0323 01:11:43.422951  543705 net.go:770] primary dev: ETH0
I0323 01:11:43.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:11:43.422979  543705 net.go:698] Add success.
I0323 01:11:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:11:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:11:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:11:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:11:53.409774  543705 cpu.go:275] no items to output this cycle
I0323 01:11:53.409777  543705 memory.go:184] no items to output this cycle
E0323 01:12:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:12:03.409802  543705 memory.go:184] no items to output this cycle
I0323 01:12:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 01:12:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:12:13.409787  543705 memory.go:191] Add success.
W0323 01:12:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:12:13.409816  543705 cpu.go:282] Add success.
W0323 01:12:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:12:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:12:13.420202  543705 net.go:648] Add success.
I0323 01:12:13.422755  543705 net.go:770] primary dev: ETH0
I0323 01:12:13.422769  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:12:13.422781  543705 net.go:698] Add success.
I0323 01:12:13.479100  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"61be7e1c-e032-4379-96e6-4d6bb1e16645","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:12:13.479142  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 01:12:14.455410  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:12:14.455616  543705 disk_worker.go:708] disk space is not compliant
W0323 01:12:14.455621  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:12:14.456156  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:12:14.456163  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:12:14.456167  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:12:14.457760  543705 disk_worker.go:494] system disk:vda1
I0323 01:12:14.457790  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:12:15.456797  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:12:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:12:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:12:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:12:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:12:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:12:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:12:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:12:23.409793  543705 memory.go:184] no items to output this cycle
I0323 01:12:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 01:12:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:12:33.409765  543705 memory.go:184] no items to output this cycle
I0323 01:12:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 01:12:37.454966  543705 disk_info.go:125] begin check local disk info of client
I0323 01:12:37.457475  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:12:37.457481  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504bc0 0xc000504c00]
I0323 01:12:39.961868  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:12:39.961873  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:12:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:12:43.410683  543705 memory.go:191] Add success.
I0323 01:12:43.409880  543705 cpu.go:282] Add success.
I0323 01:12:43.420478  543705 net.go:648] Add success.
I0323 01:12:43.423122  543705 net.go:770] primary dev: ETH0
I0323 01:12:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:12:43.423164  543705 net.go:698] Add success.
I0323 01:12:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:12:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:12:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:12:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:12:53.409769  543705 memory.go:184] no items to output this cycle
I0323 01:12:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 01:13:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:13:03.409806  543705 memory.go:184] no items to output this cycle
I0323 01:13:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 01:13:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:13:13.409803  543705 memory.go:191] Add success.
I0323 01:13:13.409803  543705 cpu.go:282] Add success.
W0323 01:13:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:13:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:13:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:13:13.420136  543705 net.go:648] Add success.
I0323 01:13:13.423012  543705 net.go:770] primary dev: ETH0
I0323 01:13:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:13:13.423043  543705 net.go:698] Add success.
I0323 01:13:14.453952  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:13:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:13:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0323 01:13:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:13:14.456559  543705 disk_worker.go:494] system disk:vda1
I0323 01:13:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:13:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:13:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:13:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:13:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:13:23.409797  543705 memory.go:184] no items to output this cycle
I0323 01:13:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 01:13:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:13:33.409767  543705 memory.go:184] no items to output this cycle
I0323 01:13:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 01:13:37.457977  543705 disk_info.go:125] begin check local disk info of client
I0323 01:13:37.460441  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:13:37.460447  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab580 0xc0001ab5c0]
E0323 01:13:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:13:43.410602  543705 memory.go:191] Add success.
I0323 01:13:43.409801  543705 cpu.go:282] Add success.
I0323 01:13:43.420304  543705 net.go:648] Add success.
I0323 01:13:43.423048  543705 net.go:770] primary dev: ETH0
I0323 01:13:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:13:43.423074  543705 net.go:698] Add success.
I0323 01:13:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:13:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:13:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:13:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:13:53.409769  543705 memory.go:184] no items to output this cycle
I0323 01:13:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 01:14:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:14:03.409796  543705 memory.go:184] no items to output this cycle
I0323 01:14:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 01:14:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:14:13.409795  543705 memory.go:191] Add success.
I0323 01:14:13.409815  543705 cpu.go:282] Add success.
W0323 01:14:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:14:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:14:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:14:13.420154  543705 net.go:648] Add success.
I0323 01:14:13.422998  543705 net.go:770] primary dev: ETH0
I0323 01:14:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:14:13.423034  543705 net.go:698] Add success.
I0323 01:14:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:14:14.455449  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:14:14.455480  543705 disk_worker.go:708] disk space is not compliant
W0323 01:14:14.455485  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:14:14.457059  543705 disk_worker.go:494] system disk:vda1
I0323 01:14:14.457088  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:14:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:14:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:14:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:14:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:14:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:14:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:14:23.409763  543705 memory.go:184] no items to output this cycle
I0323 01:14:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 01:14:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:14:33.409776  543705 memory.go:184] no items to output this cycle
I0323 01:14:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 01:14:37.460988  543705 disk_info.go:125] begin check local disk info of client
I0323 01:14:37.463544  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:14:37.463550  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b1c0 0xc00007b200]
E0323 01:14:43.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:14:43.410668  543705 memory.go:191] Add success.
I0323 01:14:43.409882  543705 cpu.go:282] Add success.
I0323 01:14:43.420381  543705 net.go:648] Add success.
I0323 01:14:43.423019  543705 net.go:770] primary dev: ETH0
I0323 01:14:43.423034  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:14:43.423050  543705 net.go:698] Add success.
I0323 01:14:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:14:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:14:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:14:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:14:53.409789  543705 cpu.go:275] no items to output this cycle
I0323 01:14:53.409796  543705 memory.go:184] no items to output this cycle
E0323 01:15:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:15:03.409796  543705 memory.go:184] no items to output this cycle
I0323 01:15:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 01:15:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:15:13.409820  543705 memory.go:191] Add success.
I0323 01:15:13.409825  543705 cpu.go:282] Add success.
W0323 01:15:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:15:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:15:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:15:13.420140  543705 net.go:648] Add success.
I0323 01:15:13.422810  543705 net.go:770] primary dev: ETH0
I0323 01:15:13.422825  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:15:13.422839  543705 net.go:698] Add success.
I0323 01:15:13.573840  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3b2b0b7f-74e9-45fe-b8b6-8627e5d67a32","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:15:13.573876  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:15:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:15:14.455372  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:15:14.455384  543705 disk_worker.go:708] disk space is not compliant
W0323 01:15:14.455391  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:15:14.456828  543705 disk_worker.go:494] system disk:vda1
I0323 01:15:14.456870  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:15:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:15:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:15:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:15:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:15:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:15:23.409769  543705 memory.go:184] no items to output this cycle
I0323 01:15:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 01:15:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:15:33.409799  543705 memory.go:184] no items to output this cycle
I0323 01:15:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 01:15:37.464011  543705 disk_info.go:125] begin check local disk info of client
I0323 01:15:37.466595  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:15:37.466601  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505ec0 0xc000505f00]
I0323 01:15:39.964506  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:15:39.964512  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:15:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:15:43.410572  543705 memory.go:191] Add success.
I0323 01:15:43.409816  543705 cpu.go:282] Add success.
I0323 01:15:43.420249  543705 net.go:648] Add success.
I0323 01:15:43.422728  543705 net.go:770] primary dev: ETH0
I0323 01:15:43.422741  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:15:43.422755  543705 net.go:698] Add success.
I0323 01:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:15:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:15:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:15:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:15:53.409773  543705 memory.go:184] no items to output this cycle
I0323 01:15:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 01:16:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:16:03.409773  543705 memory.go:184] no items to output this cycle
I0323 01:16:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 01:16:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:16:13.409801  543705 memory.go:191] Add success.
I0323 01:16:13.409802  543705 cpu.go:282] Add success.
W0323 01:16:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:16:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:16:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:16:13.420277  543705 net.go:648] Add success.
I0323 01:16:13.422942  543705 net.go:770] primary dev: ETH0
I0323 01:16:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:16:13.422972  543705 net.go:698] Add success.
I0323 01:16:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:16:14.455271  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:16:14.455281  543705 disk_worker.go:708] disk space is not compliant
W0323 01:16:14.455284  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:16:14.456885  543705 disk_worker.go:494] system disk:vda1
I0323 01:16:14.456926  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:16:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:16:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:16:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:16:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:16:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:16:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:16:23.409797  543705 memory.go:184] no items to output this cycle
I0323 01:16:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 01:16:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:16:33.409773  543705 memory.go:184] no items to output this cycle
I0323 01:16:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 01:16:37.467026  543705 disk_info.go:125] begin check local disk info of client
I0323 01:16:37.469530  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:16:37.469536  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0780 0xc0002b07c0]
E0323 01:16:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:16:43.410880  543705 memory.go:191] Add success.
I0323 01:16:43.409862  543705 cpu.go:282] Add success.
I0323 01:16:43.420685  543705 net.go:648] Add success.
I0323 01:16:43.423453  543705 net.go:770] primary dev: ETH0
I0323 01:16:43.423467  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:16:43.423480  543705 net.go:698] Add success.
I0323 01:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:16:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:16:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:16:53.410516  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:16:53.410532  543705 memory.go:184] no items to output this cycle
I0323 01:16:53.410554  543705 cpu.go:275] no items to output this cycle
E0323 01:17:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:17:03.409765  543705 memory.go:184] no items to output this cycle
I0323 01:17:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 01:17:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:17:13.409784  543705 memory.go:191] Add success.
W0323 01:17:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:17:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:17:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:17:13.409824  543705 cpu.go:282] Add success.
I0323 01:17:13.420055  543705 net.go:648] Add success.
I0323 01:17:13.422706  543705 net.go:770] primary dev: ETH0
I0323 01:17:13.422721  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:17:13.422735  543705 net.go:698] Add success.
I0323 01:17:13.453297  543705 event_worker.go:152] Polling the log file for events...
W0323 01:17:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:17:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 01:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:17:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:17:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:17:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:17:14.456555  543705 disk_worker.go:494] system disk:vda1
I0323 01:17:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:17:15.456882  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:17:15.456890  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:17:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:17:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:17:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:17:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:17:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:17:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:17:23.409796  543705 memory.go:184] no items to output this cycle
I0323 01:17:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 01:17:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:17:33.409776  543705 memory.go:184] no items to output this cycle
I0323 01:17:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 01:17:37.470029  543705 disk_info.go:125] begin check local disk info of client
I0323 01:17:37.472520  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:17:37.472526  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b09c0 0xc0002b0a00]
E0323 01:17:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:17:43.410648  543705 memory.go:191] Add success.
I0323 01:17:43.409799  543705 cpu.go:282] Add success.
I0323 01:17:43.420326  543705 net.go:648] Add success.
I0323 01:17:43.423118  543705 net.go:770] primary dev: ETH0
I0323 01:17:43.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:17:43.423161  543705 net.go:698] Add success.
I0323 01:17:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:17:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:17:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:17:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:17:53.409774  543705 memory.go:184] no items to output this cycle
I0323 01:17:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 01:18:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:18:03.409800  543705 memory.go:184] no items to output this cycle
I0323 01:18:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 01:18:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:18:13.409797  543705 memory.go:191] Add success.
W0323 01:18:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:18:13.409826  543705 cpu.go:282] Add success.
W0323 01:18:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:18:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:18:13.420142  543705 net.go:648] Add success.
I0323 01:18:13.422781  543705 net.go:770] primary dev: ETH0
I0323 01:18:13.422795  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:18:13.422807  543705 net.go:698] Add success.
I0323 01:18:13.471116  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9d437d67-c59f-4e56-ba1c-57e19150c3d4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:18:13.471149  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:18:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:18:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:18:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 01:18:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:18:14.457080  543705 disk_worker.go:494] system disk:vda1
I0323 01:18:14.457110  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:18:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:18:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:18:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:18:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:18:23.409789  543705 memory.go:184] no items to output this cycle
I0323 01:18:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 01:18:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:18:33.409803  543705 memory.go:184] no items to output this cycle
I0323 01:18:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 01:18:37.473056  543705 disk_info.go:125] begin check local disk info of client
I0323 01:18:37.475650  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:18:37.475656  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa9c0 0xc0001aaa00]
I0323 01:18:39.965727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:18:39.965733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:18:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:18:43.410810  543705 memory.go:191] Add success.
I0323 01:18:43.409870  543705 cpu.go:282] Add success.
I0323 01:18:43.420588  543705 net.go:648] Add success.
I0323 01:18:43.423731  543705 net.go:770] primary dev: ETH0
I0323 01:18:43.423758  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:18:43.423773  543705 net.go:698] Add success.
I0323 01:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:18:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:18:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:18:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:18:53.409797  543705 memory.go:184] no items to output this cycle
I0323 01:18:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 01:19:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:19:03.409793  543705 memory.go:184] no items to output this cycle
I0323 01:19:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 01:19:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:19:13.409834  543705 memory.go:191] Add success.
I0323 01:19:13.409838  543705 cpu.go:282] Add success.
W0323 01:19:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:19:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:19:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:19:13.420191  543705 net.go:648] Add success.
I0323 01:19:13.422832  543705 net.go:770] primary dev: ETH0
I0323 01:19:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:19:13.422858  543705 net.go:698] Add success.
I0323 01:19:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:19:14.455378  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:19:14.455406  543705 disk_worker.go:708] disk space is not compliant
W0323 01:19:14.455411  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:19:14.457007  543705 disk_worker.go:494] system disk:vda1
I0323 01:19:14.457037  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:19:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:19:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:19:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:19:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:19:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:19:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:19:23.409808  543705 memory.go:184] no items to output this cycle
I0323 01:19:23.409817  543705 cpu.go:275] no items to output this cycle
E0323 01:19:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:19:33.409780  543705 memory.go:184] no items to output this cycle
I0323 01:19:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 01:19:37.475737  543705 disk_info.go:125] begin check local disk info of client
I0323 01:19:37.478277  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:19:37.478283  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f3c0 0xc00039f400]
E0323 01:19:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:19:43.410749  543705 memory.go:191] Add success.
I0323 01:19:43.409831  543705 cpu.go:282] Add success.
I0323 01:19:43.420448  543705 net.go:648] Add success.
I0323 01:19:43.423072  543705 net.go:770] primary dev: ETH0
I0323 01:19:43.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:19:43.423102  543705 net.go:698] Add success.
I0323 01:19:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:19:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:19:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:19:53.409779  543705 memory.go:184] no items to output this cycle
I0323 01:19:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 01:20:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:20:03.409770  543705 memory.go:184] no items to output this cycle
I0323 01:20:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 01:20:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:20:13.409822  543705 memory.go:191] Add success.
I0323 01:20:13.409833  543705 cpu.go:282] Add success.
W0323 01:20:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:20:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:20:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:20:13.420243  543705 net.go:648] Add success.
I0323 01:20:13.422942  543705 net.go:770] primary dev: ETH0
I0323 01:20:13.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:20:13.422968  543705 net.go:698] Add success.
I0323 01:20:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:20:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:20:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 01:20:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:20:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 01:20:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:20:15.456016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:20:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:20:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:20:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:20:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:20:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:20:23.409769  543705 memory.go:184] no items to output this cycle
I0323 01:20:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 01:20:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:20:33.409799  543705 memory.go:184] no items to output this cycle
I0323 01:20:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 01:20:37.479101  543705 disk_info.go:125] begin check local disk info of client
I0323 01:20:37.481519  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:20:37.481527  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003528c0 0xc000352900]
E0323 01:20:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:20:43.410686  543705 memory.go:191] Add success.
I0323 01:20:43.409854  543705 cpu.go:282] Add success.
I0323 01:20:43.420442  543705 net.go:648] Add success.
I0323 01:20:43.423262  543705 net.go:770] primary dev: ETH0
I0323 01:20:43.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:20:43.423288  543705 net.go:698] Add success.
I0323 01:20:46.458032  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:20:46.458110  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:20:46.458149  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:20:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:20:53.409778  543705 memory.go:184] no items to output this cycle
I0323 01:20:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:21:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:21:03.409774  543705 memory.go:184] no items to output this cycle
I0323 01:21:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 01:21:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:21:13.409834  543705 memory.go:191] Add success.
I0323 01:21:13.409840  543705 cpu.go:282] Add success.
W0323 01:21:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:21:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:21:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:21:13.420126  543705 net.go:648] Add success.
I0323 01:21:13.422791  543705 net.go:770] primary dev: ETH0
I0323 01:21:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:21:13.422816  543705 net.go:698] Add success.
I0323 01:21:13.480026  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"399b1b10-2912-42e5-867c-a61fca2b6316","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:21:13.480064  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:21:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:21:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:21:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0323 01:21:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:21:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 01:21:14.456740  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:21:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:21:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:21:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:21:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:21:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:21:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:21:23.409776  543705 memory.go:184] no items to output this cycle
I0323 01:21:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 01:21:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:21:33.409810  543705 memory.go:184] no items to output this cycle
I0323 01:21:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 01:21:37.482101  543705 disk_info.go:125] begin check local disk info of client
I0323 01:21:37.484643  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:21:37.484649  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b05c0 0xc0002b0600]
I0323 01:21:39.968536  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:21:39.968542  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:21:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:21:43.410733  543705 memory.go:191] Add success.
I0323 01:21:43.409801  543705 cpu.go:282] Add success.
I0323 01:21:43.420432  543705 net.go:648] Add success.
I0323 01:21:43.423094  543705 net.go:770] primary dev: ETH0
I0323 01:21:43.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:21:43.423121  543705 net.go:698] Add success.
I0323 01:21:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:21:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:21:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:21:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:21:53.409780  543705 memory.go:184] no items to output this cycle
I0323 01:21:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 01:22:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:22:03.409795  543705 memory.go:184] no items to output this cycle
I0323 01:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 01:22:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:22:13.409785  543705 memory.go:191] Add success.
I0323 01:22:13.409804  543705 cpu.go:282] Add success.
W0323 01:22:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:22:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:22:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:22:13.420164  543705 net.go:648] Add success.
I0323 01:22:13.423014  543705 net.go:770] primary dev: ETH0
I0323 01:22:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:22:13.423039  543705 net.go:698] Add success.
W0323 01:22:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:22:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 01:22:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:22:14.455891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:22:14.455900  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:22:14.455906  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:22:14.456637  543705 disk_worker.go:494] system disk:vda1
I0323 01:22:14.456682  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:22:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:22:15.456800  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:22:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:22:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:22:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:22:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:22:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:22:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:22:23.409775  543705 memory.go:184] no items to output this cycle
I0323 01:22:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 01:22:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:22:33.409769  543705 memory.go:184] no items to output this cycle
I0323 01:22:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 01:22:37.485115  543705 disk_info.go:125] begin check local disk info of client
I0323 01:22:37.487654  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:22:37.487660  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1100 0xc0002b1140]
E0323 01:22:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:22:43.410675  543705 memory.go:191] Add success.
I0323 01:22:43.409798  543705 cpu.go:282] Add success.
I0323 01:22:43.420343  543705 net.go:648] Add success.
I0323 01:22:43.422989  543705 net.go:770] primary dev: ETH0
I0323 01:22:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:22:43.423024  543705 net.go:698] Add success.
I0323 01:22:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:22:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:22:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:22:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:22:53.409767  543705 memory.go:184] no items to output this cycle
I0323 01:22:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 01:23:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:23:03.409779  543705 memory.go:184] no items to output this cycle
I0323 01:23:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:23:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:23:13.409800  543705 memory.go:191] Add success.
I0323 01:23:13.409805  543705 cpu.go:282] Add success.
W0323 01:23:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:23:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:23:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:23:13.420176  543705 net.go:648] Add success.
I0323 01:23:13.423203  543705 net.go:770] primary dev: ETH0
I0323 01:23:13.423215  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:23:13.423227  543705 net.go:698] Add success.
I0323 01:23:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:23:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:23:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 01:23:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:23:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 01:23:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:23:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:23:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:23:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:23:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:23:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:23:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:23:23.409777  543705 memory.go:184] no items to output this cycle
I0323 01:23:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:23:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:23:33.409769  543705 memory.go:184] no items to output this cycle
I0323 01:23:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 01:23:37.488139  543705 disk_info.go:125] begin check local disk info of client
I0323 01:23:37.490682  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:23:37.490688  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4d40 0xc0000c4d80]
E0323 01:23:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:23:43.410576  543705 memory.go:191] Add success.
I0323 01:23:43.409817  543705 cpu.go:282] Add success.
I0323 01:23:43.420317  543705 net.go:648] Add success.
I0323 01:23:43.422948  543705 net.go:770] primary dev: ETH0
I0323 01:23:43.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:23:43.422976  543705 net.go:698] Add success.
I0323 01:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:23:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:23:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:23:53.409764  543705 memory.go:184] no items to output this cycle
I0323 01:23:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 01:24:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:24:03.409794  543705 memory.go:184] no items to output this cycle
I0323 01:24:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 01:24:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:24:13.409785  543705 memory.go:191] Add success.
W0323 01:24:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:24:13.409813  543705 cpu.go:282] Add success.
W0323 01:24:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:24:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:24:13.420075  543705 net.go:648] Add success.
I0323 01:24:13.422729  543705 net.go:770] primary dev: ETH0
I0323 01:24:13.422743  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:24:13.422755  543705 net.go:698] Add success.
I0323 01:24:13.468387  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"78b3fb33-33f8-4c2f-a5b3-29d45cd307dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:24:13.468428  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:24:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:24:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:24:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 01:24:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:24:14.456683  543705 disk_worker.go:494] system disk:vda1
I0323 01:24:14.456716  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:24:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:24:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:24:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:24:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:24:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:24:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:24:23.409791  543705 memory.go:184] no items to output this cycle
I0323 01:24:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 01:24:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:24:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 01:24:33.409786  543705 memory.go:184] no items to output this cycle
I0323 01:24:37.491101  543705 disk_info.go:125] begin check local disk info of client
I0323 01:24:37.493700  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:24:37.493707  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab140 0xc0001ab180]
I0323 01:24:39.969723  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:24:39.969729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:24:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:24:43.410662  543705 memory.go:191] Add success.
I0323 01:24:43.409809  543705 cpu.go:282] Add success.
I0323 01:24:43.420354  543705 net.go:648] Add success.
I0323 01:24:43.422837  543705 net.go:770] primary dev: ETH0
I0323 01:24:43.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:24:43.422863  543705 net.go:698] Add success.
I0323 01:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:24:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:24:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:24:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:24:53.409780  543705 memory.go:184] no items to output this cycle
I0323 01:24:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:25:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:25:03.409770  543705 memory.go:184] no items to output this cycle
I0323 01:25:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 01:25:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:25:13.409789  543705 memory.go:191] Add success.
I0323 01:25:13.409810  543705 cpu.go:282] Add success.
W0323 01:25:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:25:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:25:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:25:13.420110  543705 net.go:648] Add success.
I0323 01:25:13.422936  543705 net.go:770] primary dev: ETH0
I0323 01:25:13.422952  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:25:13.422964  543705 net.go:698] Add success.
I0323 01:25:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:25:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:25:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 01:25:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:25:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 01:25:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:25:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:25:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:25:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:25:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:25:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:25:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:25:23.409821  543705 memory.go:184] no items to output this cycle
I0323 01:25:23.409832  543705 cpu.go:275] no items to output this cycle
E0323 01:25:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:25:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 01:25:33.409786  543705 memory.go:184] no items to output this cycle
I0323 01:25:37.494112  543705 disk_info.go:125] begin check local disk info of client
I0323 01:25:37.496691  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:25:37.496697  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b940 0xc00007b980]
E0323 01:25:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:25:43.410587  543705 memory.go:191] Add success.
I0323 01:25:43.409823  543705 cpu.go:282] Add success.
I0323 01:25:43.420291  543705 net.go:648] Add success.
I0323 01:25:43.423187  543705 net.go:770] primary dev: ETH0
I0323 01:25:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:25:43.423226  543705 net.go:698] Add success.
I0323 01:25:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:25:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:25:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:25:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:25:53.409791  543705 memory.go:184] no items to output this cycle
I0323 01:25:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 01:26:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:26:03.409773  543705 memory.go:184] no items to output this cycle
I0323 01:26:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 01:26:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:26:13.409818  543705 memory.go:191] Add success.
I0323 01:26:13.409824  543705 cpu.go:282] Add success.
W0323 01:26:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:26:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:26:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:26:13.420127  543705 net.go:770] primary dev: ETH0
I0323 01:26:13.420141  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:26:13.420152  543705 net.go:698] Add success.
I0323 01:26:13.420389  543705 net.go:648] Add success.
I0323 01:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:26:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:26:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 01:26:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:26:14.456595  543705 disk_worker.go:494] system disk:vda1
I0323 01:26:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:26:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:26:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:26:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:26:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:26:16.472090  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:26:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:26:23.409777  543705 memory.go:184] no items to output this cycle
I0323 01:26:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 01:26:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:26:33.409816  543705 memory.go:184] no items to output this cycle
I0323 01:26:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 01:26:37.496782  543705 disk_info.go:125] begin check local disk info of client
I0323 01:26:37.499313  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:26:37.499319  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505a00 0xc000505a40]
E0323 01:26:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:26:43.410718  543705 memory.go:191] Add success.
I0323 01:26:43.409831  543705 cpu.go:282] Add success.
I0323 01:26:43.420491  543705 net.go:648] Add success.
I0323 01:26:43.422994  543705 net.go:770] primary dev: ETH0
I0323 01:26:43.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:26:43.423022  543705 net.go:698] Add success.
I0323 01:26:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:26:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:26:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:26:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:26:53.409774  543705 memory.go:184] no items to output this cycle
I0323 01:26:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 01:27:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:27:03.409780  543705 memory.go:184] no items to output this cycle
I0323 01:27:03.409782  543705 cpu.go:275] no items to output this cycle
W0323 01:27:13.409704  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0323 01:27:13.409715  543705 conf_downlod.go:89] use old conf
E0323 01:27:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:27:13.409818  543705 memory.go:191] Add success.
I0323 01:27:13.409832  543705 cpu.go:282] Add success.
W0323 01:27:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:27:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:27:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:27:13.420109  543705 net.go:648] Add success.
I0323 01:27:13.422709  543705 net.go:770] primary dev: ETH0
I0323 01:27:13.422722  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:27:13.422735  543705 net.go:698] Add success.
I0323 01:27:13.428847  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 01:27:13.453021  543705 event_worker.go:152] Polling the log file for events...
I0323 01:27:13.469926  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d5134026-5400-488a-9cb3-ff2b3763f052","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:27:13.469959  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 01:27:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:27:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 01:27:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:27:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:27:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:27:14.456938  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:27:14.456985  543705 disk_worker.go:494] system disk:vda1
I0323 01:27:14.457028  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:27:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:27:15.456827  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:27:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:27:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:27:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:27:16.457992  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:27:16.472341  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:27:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:27:23.409772  543705 memory.go:184] no items to output this cycle
I0323 01:27:23.409775  543705 cpu.go:275] no items to output this cycle
E0323 01:27:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:27:33.409776  543705 memory.go:184] no items to output this cycle
I0323 01:27:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 01:27:37.500193  543705 disk_info.go:125] begin check local disk info of client
I0323 01:27:37.502706  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:27:37.502713  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa0c0 0xc0001aa100]
I0323 01:27:39.972543  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:27:39.972550  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:27:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:27:43.410659  543705 memory.go:191] Add success.
I0323 01:27:43.409828  543705 cpu.go:282] Add success.
I0323 01:27:43.420362  543705 net.go:648] Add success.
I0323 01:27:43.423055  543705 net.go:770] primary dev: ETH0
I0323 01:27:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:27:43.423081  543705 net.go:698] Add success.
I0323 01:27:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:27:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:27:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:27:53.409764  543705 memory.go:184] no items to output this cycle
I0323 01:27:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 01:28:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:28:03.409772  543705 memory.go:184] no items to output this cycle
I0323 01:28:03.409793  543705 cpu.go:275] no items to output this cycle
W0323 01:28:13.409717  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:28:13.409735  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:28:13.409741  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:28:13.409804  543705 cpu.go:282] Add success.
E0323 01:28:13.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:28:13.409868  543705 memory.go:191] Add success.
I0323 01:28:13.420105  543705 net.go:648] Add success.
I0323 01:28:13.423030  543705 net.go:770] primary dev: ETH0
I0323 01:28:13.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:28:13.423054  543705 net.go:698] Add success.
I0323 01:28:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:28:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:28:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 01:28:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:28:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 01:28:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:28:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:28:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:28:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:28:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:28:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:28:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:28:23.409795  543705 memory.go:184] no items to output this cycle
I0323 01:28:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 01:28:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:28:33.409765  543705 memory.go:184] no items to output this cycle
I0323 01:28:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 01:28:37.502796  543705 disk_info.go:125] begin check local disk info of client
I0323 01:28:37.505322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:28:37.505330  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025fc00 0xc00025fc40]
E0323 01:28:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:28:43.410576  543705 memory.go:191] Add success.
I0323 01:28:43.409831  543705 cpu.go:282] Add success.
I0323 01:28:43.420304  543705 net.go:648] Add success.
I0323 01:28:43.423009  543705 net.go:770] primary dev: ETH0
I0323 01:28:43.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:28:43.423035  543705 net.go:698] Add success.
I0323 01:28:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:28:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:28:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:28:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:28:53.409801  543705 memory.go:184] no items to output this cycle
I0323 01:28:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 01:29:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:29:03.409766  543705 memory.go:184] no items to output this cycle
I0323 01:29:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 01:29:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:29:13.409829  543705 memory.go:191] Add success.
I0323 01:29:13.409832  543705 cpu.go:282] Add success.
W0323 01:29:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:29:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:29:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:29:13.420178  543705 net.go:648] Add success.
I0323 01:29:13.422957  543705 net.go:770] primary dev: ETH0
I0323 01:29:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:29:13.422983  543705 net.go:698] Add success.
I0323 01:29:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:29:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:29:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 01:29:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:29:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 01:29:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:29:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:29:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:29:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:29:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:29:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:29:23.410715  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:29:23.410731  543705 memory.go:184] no items to output this cycle
I0323 01:29:23.410765  543705 cpu.go:275] no items to output this cycle
E0323 01:29:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:29:33.409774  543705 memory.go:184] no items to output this cycle
I0323 01:29:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 01:29:37.506221  543705 disk_info.go:125] begin check local disk info of client
I0323 01:29:37.508748  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:29:37.508755  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 01:29:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:29:43.410687  543705 memory.go:191] Add success.
I0323 01:29:43.409819  543705 cpu.go:282] Add success.
I0323 01:29:43.420470  543705 net.go:648] Add success.
I0323 01:29:43.423101  543705 net.go:770] primary dev: ETH0
I0323 01:29:43.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:29:43.423126  543705 net.go:698] Add success.
I0323 01:29:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:29:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:29:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:29:53.410216  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:29:53.410233  543705 memory.go:184] no items to output this cycle
I0323 01:29:53.410242  543705 cpu.go:275] no items to output this cycle
E0323 01:30:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:30:03.409765  543705 memory.go:184] no items to output this cycle
I0323 01:30:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 01:30:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:30:13.409793  543705 memory.go:191] Add success.
I0323 01:30:13.409814  543705 cpu.go:282] Add success.
W0323 01:30:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:30:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:30:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:30:13.420147  543705 net.go:648] Add success.
I0323 01:30:13.423055  543705 net.go:770] primary dev: ETH0
I0323 01:30:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:30:13.423080  543705 net.go:698] Add success.
I0323 01:30:13.468591  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"de32c9e4-2582-4c8e-bba6-7ec89a5dc7ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:30:13.468624  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:30:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:30:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:30:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 01:30:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:30:14.456632  543705 disk_worker.go:494] system disk:vda1
I0323 01:30:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:30:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:30:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:30:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:30:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:30:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:30:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:30:23.409775  543705 memory.go:184] no items to output this cycle
I0323 01:30:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 01:30:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:30:33.409775  543705 memory.go:184] no items to output this cycle
I0323 01:30:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 01:30:37.508839  543705 disk_info.go:125] begin check local disk info of client
I0323 01:30:37.511356  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:30:37.511363  543705 disk_info.go:196] parse disk info done, disk is : [0xc000271c80 0xc000271cc0]
I0323 01:30:39.973726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:30:39.973733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:30:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:30:43.410589  543705 memory.go:191] Add success.
I0323 01:30:43.409827  543705 cpu.go:282] Add success.
I0323 01:30:43.420364  543705 net.go:648] Add success.
I0323 01:30:43.423044  543705 net.go:770] primary dev: ETH0
I0323 01:30:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:30:43.423074  543705 net.go:698] Add success.
I0323 01:30:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:30:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:30:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:30:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:30:53.409799  543705 memory.go:184] no items to output this cycle
I0323 01:30:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 01:31:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:31:03.409797  543705 memory.go:184] no items to output this cycle
I0323 01:31:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:31:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:31:13.409798  543705 memory.go:191] Add success.
I0323 01:31:13.409801  543705 cpu.go:282] Add success.
W0323 01:31:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:31:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:31:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:31:13.420131  543705 net.go:648] Add success.
I0323 01:31:13.422940  543705 net.go:770] primary dev: ETH0
I0323 01:31:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:31:13.422965  543705 net.go:698] Add success.
I0323 01:31:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:31:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:31:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 01:31:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:31:14.456574  543705 disk_worker.go:494] system disk:vda1
I0323 01:31:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:31:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:31:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:31:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:31:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:31:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:31:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:31:23.409777  543705 memory.go:184] no items to output this cycle
I0323 01:31:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:31:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:31:33.409775  543705 memory.go:184] no items to output this cycle
I0323 01:31:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 01:31:37.512248  543705 disk_info.go:125] begin check local disk info of client
I0323 01:31:37.514756  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:31:37.514763  543705 disk_info.go:196] parse disk info done, disk is : [0xc000270cc0 0xc000270d00]
E0323 01:31:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:31:43.410774  543705 memory.go:191] Add success.
I0323 01:31:43.409800  543705 cpu.go:282] Add success.
I0323 01:31:43.420516  543705 net.go:648] Add success.
I0323 01:31:43.423367  543705 net.go:770] primary dev: ETH0
I0323 01:31:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:31:43.423394  543705 net.go:698] Add success.
I0323 01:31:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:31:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:31:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:31:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:31:53.409797  543705 memory.go:184] no items to output this cycle
I0323 01:31:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 01:32:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:32:03.409762  543705 memory.go:184] no items to output this cycle
I0323 01:32:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 01:32:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:32:13.409824  543705 memory.go:191] Add success.
I0323 01:32:13.409832  543705 cpu.go:282] Add success.
W0323 01:32:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:32:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:32:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:32:13.420159  543705 net.go:648] Add success.
I0323 01:32:13.422840  543705 net.go:770] primary dev: ETH0
I0323 01:32:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:32:13.422865  543705 net.go:698] Add success.
W0323 01:32:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:32:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 01:32:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:32:14.456151  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:32:14.456161  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:32:14.456168  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:32:14.456478  543705 disk_worker.go:494] system disk:vda1
I0323 01:32:14.456506  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:32:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:32:15.456812  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:32:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:32:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:32:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:32:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:32:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:32:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:32:23.409785  543705 memory.go:184] no items to output this cycle
I0323 01:32:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 01:32:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:32:33.409775  543705 memory.go:184] no items to output this cycle
I0323 01:32:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 01:32:37.514847  543705 disk_info.go:125] begin check local disk info of client
I0323 01:32:37.517301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:32:37.517308  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c43c0 0xc0000c4400]
E0323 01:32:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:32:43.410601  543705 memory.go:191] Add success.
I0323 01:32:43.409802  543705 cpu.go:282] Add success.
I0323 01:32:43.420318  543705 net.go:648] Add success.
I0323 01:32:43.423018  543705 net.go:770] primary dev: ETH0
I0323 01:32:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:32:43.423045  543705 net.go:698] Add success.
I0323 01:32:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:32:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:32:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:32:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:32:53.409781  543705 memory.go:184] no items to output this cycle
I0323 01:32:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 01:33:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:33:03.409797  543705 memory.go:184] no items to output this cycle
I0323 01:33:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 01:33:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:33:13.409800  543705 memory.go:191] Add success.
I0323 01:33:13.409819  543705 cpu.go:282] Add success.
W0323 01:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:33:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:33:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:33:13.420333  543705 net.go:648] Add success.
I0323 01:33:13.423438  543705 net.go:770] primary dev: ETH0
I0323 01:33:13.423450  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:33:13.423462  543705 net.go:698] Add success.
I0323 01:33:13.986509  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"338cb6e1-b4ed-4fc5-8a41-753f253f4aac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:33:13.986543  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:33:14.453982  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:33:14.454215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:33:14.454226  543705 disk_worker.go:708] disk space is not compliant
W0323 01:33:14.454230  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:33:14.455765  543705 disk_worker.go:494] system disk:vda1
I0323 01:33:14.455798  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:33:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:33:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:33:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:33:23.410642  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:33:23.410658  543705 memory.go:184] no items to output this cycle
I0323 01:33:23.410694  543705 cpu.go:275] no items to output this cycle
E0323 01:33:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:33:33.409796  543705 memory.go:184] no items to output this cycle
I0323 01:33:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 01:33:37.518278  543705 disk_info.go:125] begin check local disk info of client
I0323 01:33:37.520812  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:33:37.520819  543705 disk_info.go:196] parse disk info done, disk is : [0xc000382340 0xc000382380]
I0323 01:33:39.973870  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:33:39.973876  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:33:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:33:43.410655  543705 memory.go:191] Add success.
I0323 01:33:43.409799  543705 cpu.go:282] Add success.
I0323 01:33:43.420365  543705 net.go:648] Add success.
I0323 01:33:43.423105  543705 net.go:770] primary dev: ETH0
I0323 01:33:43.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:33:43.423132  543705 net.go:698] Add success.
I0323 01:33:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:33:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:33:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:33:53.410196  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:33:53.410214  543705 memory.go:184] no items to output this cycle
I0323 01:33:53.410234  543705 cpu.go:275] no items to output this cycle
E0323 01:34:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:34:03.409792  543705 memory.go:184] no items to output this cycle
I0323 01:34:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 01:34:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:34:13.409796  543705 memory.go:191] Add success.
I0323 01:34:13.409815  543705 cpu.go:282] Add success.
W0323 01:34:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:34:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:34:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:34:13.420154  543705 net.go:648] Add success.
I0323 01:34:13.422672  543705 net.go:770] primary dev: ETH0
I0323 01:34:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:34:13.422696  543705 net.go:698] Add success.
I0323 01:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:34:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:34:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 01:34:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:34:14.456611  543705 disk_worker.go:494] system disk:vda1
I0323 01:34:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:34:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:34:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:34:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:34:23.409944  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:34:23.409953  543705 cpu.go:275] no items to output this cycle
I0323 01:34:23.410090  543705 memory.go:184] no items to output this cycle
E0323 01:34:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:34:33.409768  543705 memory.go:184] no items to output this cycle
I0323 01:34:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 01:34:37.520903  543705 disk_info.go:125] begin check local disk info of client
I0323 01:34:37.523484  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:34:37.523490  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3b80 0xc0003d3bc0]
E0323 01:34:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:34:43.410730  543705 memory.go:191] Add success.
I0323 01:34:43.409802  543705 cpu.go:282] Add success.
I0323 01:34:43.420453  543705 net.go:648] Add success.
I0323 01:34:43.423134  543705 net.go:770] primary dev: ETH0
I0323 01:34:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:34:43.423165  543705 net.go:698] Add success.
I0323 01:34:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:34:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:34:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:34:53.410229  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:34:53.410248  543705 memory.go:184] no items to output this cycle
I0323 01:34:53.410269  543705 cpu.go:275] no items to output this cycle
E0323 01:35:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:35:03.409796  543705 memory.go:184] no items to output this cycle
I0323 01:35:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:35:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:35:13.409789  543705 memory.go:191] Add success.
W0323 01:35:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:35:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:35:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:35:13.409834  543705 cpu.go:282] Add success.
I0323 01:35:13.420278  543705 net.go:648] Add success.
I0323 01:35:13.423026  543705 net.go:770] primary dev: ETH0
I0323 01:35:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:35:13.423052  543705 net.go:698] Add success.
I0323 01:35:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:35:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:35:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 01:35:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:35:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 01:35:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:35:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:35:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:35:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:35:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:35:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:35:23.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:35:23.409879  543705 memory.go:184] no items to output this cycle
I0323 01:35:23.410011  543705 cpu.go:275] no items to output this cycle
E0323 01:35:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:35:33.409806  543705 memory.go:184] no items to output this cycle
I0323 01:35:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 01:35:37.524303  543705 disk_info.go:125] begin check local disk info of client
I0323 01:35:37.526887  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:35:37.526895  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 01:35:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:35:43.410634  543705 memory.go:191] Add success.
I0323 01:35:43.409819  543705 cpu.go:282] Add success.
I0323 01:35:43.420318  543705 net.go:648] Add success.
I0323 01:35:43.422886  543705 net.go:770] primary dev: ETH0
I0323 01:35:43.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:35:43.422911  543705 net.go:698] Add success.
I0323 01:35:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:35:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:35:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:35:53.409775  543705 memory.go:184] no items to output this cycle
I0323 01:35:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 01:36:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:36:03.409795  543705 memory.go:184] no items to output this cycle
I0323 01:36:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 01:36:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:36:13.409783  543705 memory.go:191] Add success.
W0323 01:36:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:36:13.409816  543705 cpu.go:282] Add success.
W0323 01:36:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:36:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:36:13.420126  543705 net.go:648] Add success.
I0323 01:36:13.423062  543705 net.go:770] primary dev: ETH0
I0323 01:36:13.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:36:13.423089  543705 net.go:698] Add success.
I0323 01:36:13.468327  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"14fd112c-ac0a-4e70-bb62-127a23134714","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:36:13.468365  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:36:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:36:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:36:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 01:36:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:36:14.456621  543705 disk_worker.go:494] system disk:vda1
I0323 01:36:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:36:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:36:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:36:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:36:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:36:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:36:23.409763  543705 memory.go:184] no items to output this cycle
I0323 01:36:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 01:36:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:36:33.409815  543705 memory.go:184] no items to output this cycle
I0323 01:36:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 01:36:37.527277  543705 disk_info.go:125] begin check local disk info of client
I0323 01:36:37.529882  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:36:37.529888  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aabc0 0xc0001aac00]
I0323 01:36:39.976557  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:36:39.976563  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:36:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:36:43.410747  543705 memory.go:191] Add success.
I0323 01:36:43.409801  543705 cpu.go:282] Add success.
I0323 01:36:43.420458  543705 net.go:648] Add success.
I0323 01:36:43.423030  543705 net.go:770] primary dev: ETH0
I0323 01:36:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:36:43.423054  543705 net.go:698] Add success.
I0323 01:36:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:36:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:36:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:36:53.410393  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:36:53.410409  543705 memory.go:184] no items to output this cycle
I0323 01:36:53.410408  543705 cpu.go:275] no items to output this cycle
E0323 01:37:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:37:03.409791  543705 memory.go:184] no items to output this cycle
I0323 01:37:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 01:37:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:37:13.409799  543705 memory.go:191] Add success.
W0323 01:37:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:37:13.409831  543705 cpu.go:282] Add success.
W0323 01:37:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:37:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:37:13.420414  543705 net.go:648] Add success.
I0323 01:37:13.423336  543705 net.go:770] primary dev: ETH0
I0323 01:37:13.423349  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:37:13.423361  543705 net.go:698] Add success.
I0323 01:37:13.452910  543705 event_worker.go:152] Polling the log file for events...
W0323 01:37:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:37:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 01:37:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:37:14.456924  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:37:14.456933  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:37:14.456939  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:37:14.456991  543705 disk_worker.go:494] system disk:vda1
I0323 01:37:14.457035  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:37:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:37:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:37:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:37:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:37:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:37:16.458015  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:37:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:37:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:37:23.409779  543705 memory.go:184] no items to output this cycle
I0323 01:37:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:37:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:37:33.409794  543705 memory.go:184] no items to output this cycle
I0323 01:37:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 01:37:37.530292  543705 disk_info.go:125] begin check local disk info of client
I0323 01:37:37.532780  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:37:37.532786  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c50c0 0xc0000c5100]
E0323 01:37:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:37:43.410669  543705 memory.go:191] Add success.
I0323 01:37:43.409810  543705 cpu.go:282] Add success.
I0323 01:37:43.420391  543705 net.go:648] Add success.
I0323 01:37:43.423092  543705 net.go:770] primary dev: ETH0
I0323 01:37:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:37:43.423118  543705 net.go:698] Add success.
I0323 01:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:37:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:37:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:37:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:37:53.409804  543705 memory.go:184] no items to output this cycle
I0323 01:37:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 01:38:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:38:03.409786  543705 memory.go:184] no items to output this cycle
I0323 01:38:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 01:38:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:38:13.409814  543705 memory.go:191] Add success.
I0323 01:38:13.409815  543705 cpu.go:282] Add success.
W0323 01:38:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:38:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:38:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:38:13.420198  543705 net.go:648] Add success.
I0323 01:38:13.422743  543705 net.go:770] primary dev: ETH0
I0323 01:38:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:38:13.422771  543705 net.go:698] Add success.
I0323 01:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:38:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:38:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 01:38:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:38:14.456529  543705 disk_worker.go:494] system disk:vda1
I0323 01:38:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:38:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:38:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:38:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:38:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:38:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:38:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:38:23.409787  543705 memory.go:184] no items to output this cycle
I0323 01:38:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 01:38:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:38:33.409799  543705 memory.go:184] no items to output this cycle
I0323 01:38:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 01:38:37.533304  543705 disk_info.go:125] begin check local disk info of client
I0323 01:38:37.535835  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:38:37.535841  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1e00 0xc0002b1e40]
E0323 01:38:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:38:43.410643  543705 memory.go:191] Add success.
I0323 01:38:43.409820  543705 cpu.go:282] Add success.
I0323 01:38:43.420392  543705 net.go:648] Add success.
I0323 01:38:43.423270  543705 net.go:770] primary dev: ETH0
I0323 01:38:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:38:43.423294  543705 net.go:698] Add success.
I0323 01:38:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:38:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:38:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:38:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:38:53.409780  543705 memory.go:184] no items to output this cycle
I0323 01:38:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 01:39:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:39:03.409787  543705 memory.go:184] no items to output this cycle
I0323 01:39:03.409790  543705 cpu.go:275] no items to output this cycle
W0323 01:39:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:39:13.409728  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:39:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:39:13.409804  543705 cpu.go:282] Add success.
E0323 01:39:13.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:39:13.409841  543705 memory.go:191] Add success.
I0323 01:39:13.420221  543705 net.go:648] Add success.
I0323 01:39:13.423061  543705 net.go:770] primary dev: ETH0
I0323 01:39:13.423074  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:39:13.423087  543705 net.go:698] Add success.
I0323 01:39:14.366987  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"731b6d80-9ef3-42fd-8a3c-e1b9b49741dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:39:14.367028  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:39:14.454549  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:39:14.454735  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:39:14.454747  543705 disk_worker.go:708] disk space is not compliant
W0323 01:39:14.454749  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:39:14.456132  543705 disk_worker.go:494] system disk:vda1
I0323 01:39:14.456189  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:39:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:39:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:39:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:39:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:39:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:39:23.409771  543705 memory.go:184] no items to output this cycle
I0323 01:39:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 01:39:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:39:33.409795  543705 memory.go:184] no items to output this cycle
I0323 01:39:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 01:39:37.535922  543705 disk_info.go:125] begin check local disk info of client
I0323 01:39:37.538506  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:39:37.538513  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b05c0 0xc0002b0600]
I0323 01:39:39.977728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:39:39.977734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:39:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:39:43.410647  543705 memory.go:191] Add success.
I0323 01:39:43.409836  543705 cpu.go:282] Add success.
I0323 01:39:43.420345  543705 net.go:648] Add success.
I0323 01:39:43.423134  543705 net.go:770] primary dev: ETH0
I0323 01:39:43.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:39:43.423159  543705 net.go:698] Add success.
I0323 01:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:39:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:39:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:39:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:39:53.409776  543705 memory.go:184] no items to output this cycle
I0323 01:39:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 01:40:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:40:03.409778  543705 memory.go:184] no items to output this cycle
I0323 01:40:03.409781  543705 cpu.go:275] no items to output this cycle
E0323 01:40:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:40:13.409794  543705 memory.go:191] Add success.
I0323 01:40:13.409797  543705 cpu.go:282] Add success.
W0323 01:40:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:40:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:40:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:40:13.420262  543705 net.go:648] Add success.
I0323 01:40:13.422971  543705 net.go:770] primary dev: ETH0
I0323 01:40:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:40:13.423016  543705 net.go:698] Add success.
I0323 01:40:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:40:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:40:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 01:40:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:40:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 01:40:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:40:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:40:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:40:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:40:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:40:23.409742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:40:23.409760  543705 memory.go:184] no items to output this cycle
I0323 01:40:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 01:40:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:40:33.409774  543705 memory.go:184] no items to output this cycle
I0323 01:40:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 01:40:37.539387  543705 disk_info.go:125] begin check local disk info of client
I0323 01:40:37.541928  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:40:37.541934  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005059c0 0xc000505a00]
E0323 01:40:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:40:43.410740  543705 memory.go:191] Add success.
I0323 01:40:43.409820  543705 cpu.go:282] Add success.
I0323 01:40:43.420443  543705 net.go:648] Add success.
I0323 01:40:43.423197  543705 net.go:770] primary dev: ETH0
I0323 01:40:43.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:40:43.423227  543705 net.go:698] Add success.
I0323 01:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:40:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:40:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:40:53.410398  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:40:53.410416  543705 memory.go:184] no items to output this cycle
I0323 01:40:53.410436  543705 cpu.go:275] no items to output this cycle
E0323 01:41:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:41:03.409776  543705 cpu.go:275] no items to output this cycle
I0323 01:41:03.409783  543705 memory.go:184] no items to output this cycle
E0323 01:41:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:41:13.409801  543705 memory.go:191] Add success.
I0323 01:41:13.409803  543705 cpu.go:282] Add success.
W0323 01:41:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:41:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:41:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:41:13.420320  543705 net.go:648] Add success.
I0323 01:41:13.423181  543705 net.go:770] primary dev: ETH0
I0323 01:41:13.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:41:13.423205  543705 net.go:698] Add success.
I0323 01:41:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:41:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:41:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 01:41:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:41:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 01:41:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:41:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:41:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:41:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:41:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:41:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:41:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:41:23.409776  543705 memory.go:184] no items to output this cycle
I0323 01:41:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 01:41:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:41:33.409777  543705 memory.go:184] no items to output this cycle
I0323 01:41:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 01:41:37.542016  543705 disk_info.go:125] begin check local disk info of client
I0323 01:41:37.544593  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:41:37.544600  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505dc0 0xc000505e00]
E0323 01:41:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:41:43.410625  543705 memory.go:191] Add success.
I0323 01:41:43.409816  543705 cpu.go:282] Add success.
I0323 01:41:43.420395  543705 net.go:648] Add success.
I0323 01:41:43.423018  543705 net.go:770] primary dev: ETH0
I0323 01:41:43.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:41:43.423043  543705 net.go:698] Add success.
I0323 01:41:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:41:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:41:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:41:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:41:53.409768  543705 memory.go:184] no items to output this cycle
I0323 01:41:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 01:42:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:42:03.409777  543705 memory.go:184] no items to output this cycle
I0323 01:42:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 01:42:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:42:13.409792  543705 memory.go:191] Add success.
I0323 01:42:13.409793  543705 cpu.go:282] Add success.
W0323 01:42:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:42:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:42:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:42:13.420504  543705 net.go:648] Add success.
I0323 01:42:13.423006  543705 net.go:770] primary dev: ETH0
I0323 01:42:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:42:13.423031  543705 net.go:698] Add success.
I0323 01:42:13.469494  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"480530dd-fe5e-4b57-a4be-ac0cf05cc7f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:42:13.469528  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 01:42:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:42:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 01:42:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:42:14.456820  543705 disk_worker.go:494] system disk:vda1
I0323 01:42:14.456860  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:42:14.457130  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:42:14.457138  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:42:14.457143  543705 custom_config.go:64] query custom config with name: gpu
E0323 01:42:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:42:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:42:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:42:16.457906  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:42:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:42:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:42:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:42:23.410339  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:42:23.410355  543705 memory.go:184] no items to output this cycle
I0323 01:42:23.410452  543705 cpu.go:275] no items to output this cycle
E0323 01:42:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:42:33.409782  543705 memory.go:184] no items to output this cycle
I0323 01:42:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 01:42:37.545415  543705 disk_info.go:125] begin check local disk info of client
I0323 01:42:37.547942  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:42:37.547948  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504040 0xc000504080]
I0323 01:42:39.980587  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:42:39.980593  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:42:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:42:43.410640  543705 memory.go:191] Add success.
I0323 01:42:43.409818  543705 cpu.go:282] Add success.
I0323 01:42:43.420402  543705 net.go:648] Add success.
I0323 01:42:43.422972  543705 net.go:770] primary dev: ETH0
I0323 01:42:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:42:43.423009  543705 net.go:698] Add success.
I0323 01:42:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:42:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:42:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:42:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:42:53.409797  543705 memory.go:184] no items to output this cycle
I0323 01:42:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 01:43:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:43:03.409771  543705 memory.go:184] no items to output this cycle
I0323 01:43:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 01:43:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:43:13.409796  543705 memory.go:191] Add success.
I0323 01:43:13.409815  543705 cpu.go:282] Add success.
W0323 01:43:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:43:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:43:13.420159  543705 net.go:648] Add success.
I0323 01:43:13.422807  543705 net.go:770] primary dev: ETH0
I0323 01:43:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:43:13.422837  543705 net.go:698] Add success.
I0323 01:43:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:43:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:43:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 01:43:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:43:14.456584  543705 disk_worker.go:494] system disk:vda1
I0323 01:43:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:43:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:43:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:43:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:43:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:43:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:43:23.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:43:23.409901  543705 memory.go:184] no items to output this cycle
I0323 01:43:23.409935  543705 cpu.go:275] no items to output this cycle
E0323 01:43:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:43:33.409766  543705 memory.go:184] no items to output this cycle
I0323 01:43:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 01:43:37.548029  543705 disk_info.go:125] begin check local disk info of client
I0323 01:43:37.550632  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:43:37.550638  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5080 0xc0000c50c0]
E0323 01:43:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:43:43.410731  543705 memory.go:191] Add success.
I0323 01:43:43.409797  543705 cpu.go:282] Add success.
I0323 01:43:43.420449  543705 net.go:648] Add success.
I0323 01:43:43.423157  543705 net.go:770] primary dev: ETH0
I0323 01:43:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:43:43.423188  543705 net.go:698] Add success.
I0323 01:43:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:43:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:43:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:43:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:43:53.409798  543705 memory.go:184] no items to output this cycle
I0323 01:43:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 01:44:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:44:03.409765  543705 memory.go:184] no items to output this cycle
I0323 01:44:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 01:44:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:44:13.409800  543705 memory.go:191] Add success.
I0323 01:44:13.409800  543705 cpu.go:282] Add success.
W0323 01:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:44:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:44:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:44:13.420204  543705 net.go:648] Add success.
I0323 01:44:13.422831  543705 net.go:770] primary dev: ETH0
I0323 01:44:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:44:13.422857  543705 net.go:698] Add success.
I0323 01:44:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:44:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:44:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0323 01:44:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:44:14.456587  543705 disk_worker.go:494] system disk:vda1
I0323 01:44:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:44:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:44:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:44:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:44:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:44:23.410425  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:44:23.410440  543705 memory.go:184] no items to output this cycle
I0323 01:44:23.410468  543705 cpu.go:275] no items to output this cycle
E0323 01:44:33.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:44:33.409909  543705 memory.go:184] no items to output this cycle
I0323 01:44:33.409935  543705 cpu.go:275] no items to output this cycle
I0323 01:44:37.551449  543705 disk_info.go:125] begin check local disk info of client
I0323 01:44:37.553980  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:44:37.553987  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002164c0 0xc000217400]
E0323 01:44:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:44:43.410800  543705 memory.go:191] Add success.
I0323 01:44:43.409797  543705 cpu.go:282] Add success.
I0323 01:44:43.420519  543705 net.go:648] Add success.
I0323 01:44:43.423414  543705 net.go:770] primary dev: ETH0
I0323 01:44:43.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:44:43.423440  543705 net.go:698] Add success.
I0323 01:44:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:44:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:44:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:44:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:44:53.409769  543705 memory.go:184] no items to output this cycle
I0323 01:44:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 01:45:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:45:03.409769  543705 memory.go:184] no items to output this cycle
I0323 01:45:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 01:45:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:45:13.409788  543705 memory.go:191] Add success.
W0323 01:45:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:45:13.409819  543705 cpu.go:282] Add success.
W0323 01:45:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:45:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:45:13.420107  543705 net.go:648] Add success.
I0323 01:45:13.422851  543705 net.go:770] primary dev: ETH0
I0323 01:45:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:45:13.422880  543705 net.go:698] Add success.
I0323 01:45:13.463828  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"101e1079-5ef0-4e28-aba8-e496f712333a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:45:13.463862  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:45:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:45:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:45:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 01:45:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:45:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 01:45:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:45:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:45:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:45:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:45:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:45:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:45:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:45:23.409773  543705 memory.go:184] no items to output this cycle
I0323 01:45:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:45:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:45:33.409789  543705 memory.go:184] no items to output this cycle
I0323 01:45:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 01:45:37.554406  543705 disk_info.go:125] begin check local disk info of client
I0323 01:45:37.556924  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:45:37.556930  543705 disk_info.go:196] parse disk info done, disk is : [0xc000259c40 0xc000259c80]
I0323 01:45:39.981737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:45:39.981744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:45:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:45:43.410621  543705 memory.go:191] Add success.
I0323 01:45:43.409823  543705 cpu.go:282] Add success.
I0323 01:45:43.420319  543705 net.go:648] Add success.
I0323 01:45:43.422927  543705 net.go:770] primary dev: ETH0
I0323 01:45:43.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:45:43.422953  543705 net.go:698] Add success.
I0323 01:45:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:45:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:45:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:45:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:45:53.409792  543705 memory.go:184] no items to output this cycle
I0323 01:45:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 01:46:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:46:03.409770  543705 memory.go:184] no items to output this cycle
I0323 01:46:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 01:46:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:46:13.409830  543705 memory.go:191] Add success.
I0323 01:46:13.409831  543705 cpu.go:282] Add success.
W0323 01:46:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:46:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:46:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:46:13.420146  543705 net.go:648] Add success.
I0323 01:46:13.422826  543705 net.go:770] primary dev: ETH0
I0323 01:46:13.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:46:13.422852  543705 net.go:698] Add success.
I0323 01:46:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:46:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:46:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 01:46:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:46:14.456598  543705 disk_worker.go:494] system disk:vda1
I0323 01:46:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:46:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:46:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:46:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:46:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:46:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:46:23.409801  543705 memory.go:184] no items to output this cycle
I0323 01:46:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 01:46:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:46:33.409784  543705 memory.go:184] no items to output this cycle
I0323 01:46:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 01:46:37.557013  543705 disk_info.go:125] begin check local disk info of client
I0323 01:46:37.559611  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:46:37.559617  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0600 0xc0002b0640]
E0323 01:46:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:46:43.410756  543705 memory.go:191] Add success.
I0323 01:46:43.409802  543705 cpu.go:282] Add success.
I0323 01:46:43.420453  543705 net.go:648] Add success.
I0323 01:46:43.423087  543705 net.go:770] primary dev: ETH0
I0323 01:46:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:46:43.423115  543705 net.go:698] Add success.
I0323 01:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:46:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:46:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:46:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:46:53.409783  543705 memory.go:184] no items to output this cycle
I0323 01:46:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:47:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:47:03.409796  543705 memory.go:184] no items to output this cycle
I0323 01:47:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:47:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:47:13.409797  543705 memory.go:191] Add success.
I0323 01:47:13.409799  543705 cpu.go:282] Add success.
W0323 01:47:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:47:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:47:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:47:13.420291  543705 net.go:648] Add success.
I0323 01:47:13.423435  543705 net.go:770] primary dev: ETH0
I0323 01:47:13.423451  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:47:13.423466  543705 net.go:698] Add success.
I0323 01:47:13.453179  543705 event_worker.go:152] Polling the log file for events...
W0323 01:47:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:47:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0323 01:47:14.455162  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:47:14.456930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:47:14.456940  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:47:14.456946  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:47:14.456994  543705 disk_worker.go:494] system disk:vda1
I0323 01:47:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:47:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:47:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:47:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:47:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:47:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:47:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:47:16.472316  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:47:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:47:23.409800  543705 memory.go:184] no items to output this cycle
I0323 01:47:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 01:47:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:47:33.409779  543705 memory.go:184] no items to output this cycle
I0323 01:47:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 01:47:37.560484  543705 disk_info.go:125] begin check local disk info of client
I0323 01:47:37.563054  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:47:37.563060  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b880 0xc00007b8c0]
E0323 01:47:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:47:43.410659  543705 memory.go:191] Add success.
I0323 01:47:43.409797  543705 cpu.go:282] Add success.
I0323 01:47:43.420351  543705 net.go:648] Add success.
I0323 01:47:43.423168  543705 net.go:770] primary dev: ETH0
I0323 01:47:43.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:47:43.423198  543705 net.go:698] Add success.
I0323 01:47:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:47:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:47:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:47:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:47:53.409778  543705 cpu.go:275] no items to output this cycle
I0323 01:47:53.409781  543705 memory.go:184] no items to output this cycle
E0323 01:48:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:48:03.409767  543705 memory.go:184] no items to output this cycle
I0323 01:48:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 01:48:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:48:13.409800  543705 memory.go:191] Add success.
I0323 01:48:13.409801  543705 cpu.go:282] Add success.
W0323 01:48:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:48:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:48:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:48:13.420156  543705 net.go:648] Add success.
I0323 01:48:13.422963  543705 net.go:770] primary dev: ETH0
I0323 01:48:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:48:13.422990  543705 net.go:698] Add success.
I0323 01:48:13.463672  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2f86cd8b-ae08-4d83-8200-9f66f750a13f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:48:13.463708  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:48:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:48:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:48:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 01:48:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:48:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 01:48:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:48:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:48:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:48:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:48:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:48:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:48:23.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:48:23.409899  543705 cpu.go:275] no items to output this cycle
I0323 01:48:23.409914  543705 memory.go:184] no items to output this cycle
E0323 01:48:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:48:33.409781  543705 memory.go:184] no items to output this cycle
I0323 01:48:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 01:48:37.563142  543705 disk_info.go:125] begin check local disk info of client
I0323 01:48:37.565679  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:48:37.565685  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4700 0xc0000c4740]
I0323 01:48:39.981876  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:48:39.981882  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:48:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:48:43.410741  543705 memory.go:191] Add success.
I0323 01:48:43.409822  543705 cpu.go:282] Add success.
I0323 01:48:43.420431  543705 net.go:648] Add success.
I0323 01:48:43.423300  543705 net.go:770] primary dev: ETH0
I0323 01:48:43.423314  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:48:43.423327  543705 net.go:698] Add success.
I0323 01:48:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:48:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:48:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:48:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:48:53.409767  543705 memory.go:184] no items to output this cycle
I0323 01:48:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 01:49:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:49:03.409770  543705 memory.go:184] no items to output this cycle
I0323 01:49:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 01:49:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:49:13.409795  543705 memory.go:191] Add success.
I0323 01:49:13.409815  543705 cpu.go:282] Add success.
W0323 01:49:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:49:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:49:13.420514  543705 net.go:648] Add success.
I0323 01:49:13.423249  543705 net.go:770] primary dev: ETH0
I0323 01:49:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:49:13.423275  543705 net.go:698] Add success.
I0323 01:49:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:49:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:49:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 01:49:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:49:14.456603  543705 disk_worker.go:494] system disk:vda1
I0323 01:49:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:49:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:49:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:49:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:49:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:49:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:49:23.409783  543705 memory.go:184] no items to output this cycle
I0323 01:49:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:49:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:49:33.409802  543705 memory.go:184] no items to output this cycle
I0323 01:49:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 01:49:37.565769  543705 disk_info.go:125] begin check local disk info of client
I0323 01:49:37.568307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:49:37.568314  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab2c0 0xc0001ab300]
E0323 01:49:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:49:43.410838  543705 memory.go:191] Add success.
I0323 01:49:43.409806  543705 cpu.go:282] Add success.
I0323 01:49:43.420534  543705 net.go:648] Add success.
I0323 01:49:43.423294  543705 net.go:770] primary dev: ETH0
I0323 01:49:43.423307  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:49:43.423319  543705 net.go:698] Add success.
I0323 01:49:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:49:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:49:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:49:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:49:53.409776  543705 memory.go:184] no items to output this cycle
I0323 01:49:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 01:50:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:50:03.409798  543705 memory.go:184] no items to output this cycle
I0323 01:50:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 01:50:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:50:13.409789  543705 memory.go:191] Add success.
I0323 01:50:13.409809  543705 cpu.go:282] Add success.
W0323 01:50:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:50:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:50:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:50:13.420049  543705 net.go:648] Add success.
I0323 01:50:13.422645  543705 net.go:770] primary dev: ETH0
I0323 01:50:13.422657  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:50:13.422670  543705 net.go:698] Add success.
I0323 01:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:50:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:50:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 01:50:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:50:14.456613  543705 disk_worker.go:494] system disk:vda1
I0323 01:50:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:50:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:50:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:50:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:50:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:50:23.409810  543705 memory.go:184] no items to output this cycle
I0323 01:50:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 01:50:33.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:50:33.409860  543705 memory.go:184] no items to output this cycle
I0323 01:50:33.409981  543705 cpu.go:275] no items to output this cycle
I0323 01:50:37.568473  543705 disk_info.go:125] begin check local disk info of client
I0323 01:50:37.571031  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:50:37.571037  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264840 0xc000264880]
E0323 01:50:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:50:43.410764  543705 memory.go:191] Add success.
I0323 01:50:43.409802  543705 cpu.go:282] Add success.
I0323 01:50:43.420442  543705 net.go:648] Add success.
I0323 01:50:43.423458  543705 net.go:770] primary dev: ETH0
I0323 01:50:43.423473  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:50:43.423488  543705 net.go:698] Add success.
I0323 01:50:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:50:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:50:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:50:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:50:53.409781  543705 memory.go:184] no items to output this cycle
I0323 01:50:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:51:03.409777  543705 memory.go:184] no items to output this cycle
I0323 01:51:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 01:51:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:51:13.409821  543705 memory.go:191] Add success.
I0323 01:51:13.409824  543705 cpu.go:282] Add success.
W0323 01:51:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:51:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:51:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:51:13.420167  543705 net.go:648] Add success.
I0323 01:51:13.422927  543705 net.go:770] primary dev: ETH0
I0323 01:51:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:51:13.422952  543705 net.go:698] Add success.
I0323 01:51:13.469820  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"117129b0-ef3b-4e7c-b95c-18802f6a7684","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:51:13.469860  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:51:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:51:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:51:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 01:51:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:51:14.456547  543705 disk_worker.go:494] system disk:vda1
I0323 01:51:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:51:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:51:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:51:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:51:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:51:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:51:23.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:51:23.409885  543705 cpu.go:275] no items to output this cycle
I0323 01:51:23.409907  543705 memory.go:184] no items to output this cycle
E0323 01:51:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:51:33.409767  543705 memory.go:184] no items to output this cycle
I0323 01:51:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 01:51:37.571119  543705 disk_info.go:125] begin check local disk info of client
I0323 01:51:37.573746  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:51:37.573751  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007af40 0xc00007af80]
I0323 01:51:39.982024  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:51:39.982030  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:51:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:51:43.410776  543705 memory.go:191] Add success.
I0323 01:51:43.409812  543705 cpu.go:282] Add success.
I0323 01:51:43.420489  543705 net.go:648] Add success.
I0323 01:51:43.423291  543705 net.go:770] primary dev: ETH0
I0323 01:51:43.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:51:43.423320  543705 net.go:698] Add success.
I0323 01:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:51:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:51:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:51:53.409798  543705 memory.go:184] no items to output this cycle
I0323 01:51:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 01:52:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:52:03.409809  543705 memory.go:184] no items to output this cycle
I0323 01:52:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 01:52:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:52:13.409790  543705 memory.go:191] Add success.
W0323 01:52:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 01:52:13.409825  543705 cpu.go:282] Add success.
W0323 01:52:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:52:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:52:13.420182  543705 net.go:648] Add success.
I0323 01:52:13.423156  543705 net.go:770] primary dev: ETH0
I0323 01:52:13.423168  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:52:13.423181  543705 net.go:698] Add success.
W0323 01:52:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:52:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 01:52:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:52:14.455863  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:52:14.455872  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:52:14.455878  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:52:14.456569  543705 disk_worker.go:494] system disk:vda1
I0323 01:52:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:52:15.456943  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:52:15.456953  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 01:52:16.457029  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:52:16.458071  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:52:16.458125  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:52:16.458143  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:52:16.472580  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:52:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:52:23.409809  543705 memory.go:184] no items to output this cycle
I0323 01:52:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 01:52:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:52:33.409775  543705 memory.go:184] no items to output this cycle
I0323 01:52:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 01:52:37.573834  543705 disk_info.go:125] begin check local disk info of client
I0323 01:52:37.576373  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:52:37.576379  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad80 0xc00007adc0]
E0323 01:52:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:52:43.410713  543705 memory.go:191] Add success.
I0323 01:52:43.409819  543705 cpu.go:282] Add success.
I0323 01:52:43.420413  543705 net.go:648] Add success.
I0323 01:52:43.423326  543705 net.go:770] primary dev: ETH0
I0323 01:52:43.423342  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:52:43.423357  543705 net.go:698] Add success.
I0323 01:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:52:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:52:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:52:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:52:53.409786  543705 memory.go:184] no items to output this cycle
I0323 01:52:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 01:53:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:53:03.409777  543705 memory.go:184] no items to output this cycle
I0323 01:53:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 01:53:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:53:13.409818  543705 memory.go:191] Add success.
I0323 01:53:13.409817  543705 cpu.go:282] Add success.
W0323 01:53:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:53:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:53:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:53:13.420237  543705 net.go:648] Add success.
I0323 01:53:13.423185  543705 net.go:770] primary dev: ETH0
I0323 01:53:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:53:13.423215  543705 net.go:698] Add success.
I0323 01:53:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:53:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:53:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 01:53:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:53:14.456570  543705 disk_worker.go:494] system disk:vda1
I0323 01:53:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:53:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:53:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:53:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:53:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:53:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:53:23.409788  543705 memory.go:184] no items to output this cycle
I0323 01:53:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 01:53:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:53:33.409811  543705 memory.go:184] no items to output this cycle
I0323 01:53:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 01:53:37.576464  543705 disk_info.go:125] begin check local disk info of client
I0323 01:53:37.579020  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:53:37.579026  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2600 0xc0002b2640]
E0323 01:53:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:53:43.410611  543705 memory.go:191] Add success.
I0323 01:53:43.409820  543705 cpu.go:282] Add success.
I0323 01:53:43.420332  543705 net.go:648] Add success.
I0323 01:53:43.422924  543705 net.go:770] primary dev: ETH0
I0323 01:53:43.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:53:43.422952  543705 net.go:698] Add success.
I0323 01:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:53:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:53:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:53:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:53:53.409809  543705 memory.go:184] no items to output this cycle
I0323 01:53:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 01:54:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:54:03.409784  543705 memory.go:184] no items to output this cycle
I0323 01:54:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 01:54:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:54:13.409833  543705 memory.go:191] Add success.
I0323 01:54:13.409840  543705 cpu.go:282] Add success.
W0323 01:54:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:54:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:54:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:54:13.420161  543705 net.go:648] Add success.
I0323 01:54:13.422934  543705 net.go:770] primary dev: ETH0
I0323 01:54:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:54:13.422959  543705 net.go:698] Add success.
I0323 01:54:13.468459  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b0f5a867-ea26-42e5-8983-06c8dec9e1fa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:54:13.468493  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 01:54:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:54:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:54:14.455354  543705 disk_worker.go:708] disk space is not compliant
W0323 01:54:14.455361  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:54:14.457192  543705 disk_worker.go:494] system disk:vda1
I0323 01:54:14.457233  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:54:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:54:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:54:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:54:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:54:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:54:23.409780  543705 memory.go:184] no items to output this cycle
I0323 01:54:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 01:54:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:54:33.409791  543705 memory.go:184] no items to output this cycle
I0323 01:54:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 01:54:37.579107  543705 disk_info.go:125] begin check local disk info of client
I0323 01:54:37.581676  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:54:37.581683  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4ec0 0xc0000c4f00]
I0323 01:54:39.984608  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:54:39.984614  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:54:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:54:43.410669  543705 memory.go:191] Add success.
I0323 01:54:43.409812  543705 cpu.go:282] Add success.
I0323 01:54:43.420355  543705 net.go:648] Add success.
I0323 01:54:43.423047  543705 net.go:770] primary dev: ETH0
I0323 01:54:43.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:54:43.423073  543705 net.go:698] Add success.
I0323 01:54:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:54:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:54:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:54:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:54:53.409777  543705 memory.go:184] no items to output this cycle
I0323 01:54:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 01:55:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:55:03.409779  543705 memory.go:184] no items to output this cycle
I0323 01:55:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 01:55:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:55:13.409799  543705 memory.go:191] Add success.
W0323 01:55:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:55:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:55:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:55:13.409869  543705 cpu.go:282] Add success.
I0323 01:55:13.420351  543705 net.go:648] Add success.
I0323 01:55:13.423765  543705 net.go:770] primary dev: ETH0
I0323 01:55:13.423779  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:55:13.423791  543705 net.go:698] Add success.
I0323 01:55:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:55:14.455287  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:55:14.455300  543705 disk_worker.go:708] disk space is not compliant
W0323 01:55:14.455303  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:55:14.457071  543705 disk_worker.go:494] system disk:vda1
I0323 01:55:14.457116  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:55:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:55:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:55:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:55:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:55:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:55:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:55:23.409776  543705 memory.go:184] no items to output this cycle
I0323 01:55:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 01:55:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:55:33.409772  543705 memory.go:184] no items to output this cycle
I0323 01:55:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 01:55:37.581763  543705 disk_info.go:125] begin check local disk info of client
I0323 01:55:37.584284  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:55:37.584290  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a940 0xc00007aa40]
E0323 01:55:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:55:43.410666  543705 memory.go:191] Add success.
I0323 01:55:43.409822  543705 cpu.go:282] Add success.
I0323 01:55:43.420440  543705 net.go:648] Add success.
I0323 01:55:43.423387  543705 net.go:770] primary dev: ETH0
I0323 01:55:43.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:55:43.423412  543705 net.go:698] Add success.
I0323 01:55:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:55:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:55:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:55:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:55:53.409789  543705 memory.go:184] no items to output this cycle
I0323 01:55:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 01:56:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:56:03.409798  543705 memory.go:184] no items to output this cycle
I0323 01:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 01:56:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:56:13.409824  543705 memory.go:191] Add success.
I0323 01:56:13.409829  543705 cpu.go:282] Add success.
W0323 01:56:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:56:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:56:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:56:13.420157  543705 net.go:648] Add success.
I0323 01:56:13.422896  543705 net.go:770] primary dev: ETH0
I0323 01:56:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:56:13.422927  543705 net.go:698] Add success.
I0323 01:56:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:56:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:56:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 01:56:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:56:14.456650  543705 disk_worker.go:494] system disk:vda1
I0323 01:56:14.456694  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:56:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:56:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:56:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:56:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:56:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:56:23.409799  543705 memory.go:184] no items to output this cycle
I0323 01:56:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 01:56:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:56:33.409764  543705 memory.go:184] no items to output this cycle
I0323 01:56:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 01:56:37.584373  543705 disk_info.go:125] begin check local disk info of client
I0323 01:56:37.586959  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:56:37.586965  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4700 0xc0000c4740]
E0323 01:56:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:56:43.410898  543705 memory.go:191] Add success.
I0323 01:56:43.409798  543705 cpu.go:282] Add success.
I0323 01:56:43.420582  543705 net.go:648] Add success.
I0323 01:56:43.423490  543705 net.go:770] primary dev: ETH0
I0323 01:56:43.423503  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:56:43.423516  543705 net.go:698] Add success.
I0323 01:56:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:56:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:56:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:56:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:56:53.409802  543705 memory.go:184] no items to output this cycle
I0323 01:56:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 01:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:57:03.409780  543705 memory.go:184] no items to output this cycle
I0323 01:57:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 01:57:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:57:13.409790  543705 memory.go:191] Add success.
I0323 01:57:13.409809  543705 cpu.go:282] Add success.
W0323 01:57:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:57:13.420193  543705 net.go:648] Add success.
I0323 01:57:13.424011  543705 net.go:770] primary dev: ETH0
I0323 01:57:13.424026  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:57:13.424041  543705 net.go:698] Add success.
I0323 01:57:13.430188  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 01:57:13.453353  543705 event_worker.go:152] Polling the log file for events...
I0323 01:57:13.489154  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f07f5651-605a-4a32-bb83-c35a41613605","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 01:57:13.489190  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 01:57:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:57:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 01:57:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0323 01:57:14.456992  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 01:57:14.457002  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 01:57:14.457007  543705 custom_config.go:64] query custom config with name: gpu
I0323 01:57:14.457031  543705 disk_worker.go:494] system disk:vda1
I0323 01:57:14.457070  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 01:57:15.456781  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 01:57:15.456790  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:57:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 01:57:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 01:57:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:57:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:57:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:57:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:57:23.409791  543705 memory.go:184] no items to output this cycle
I0323 01:57:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 01:57:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:57:33.409776  543705 memory.go:184] no items to output this cycle
I0323 01:57:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 01:57:37.587048  543705 disk_info.go:125] begin check local disk info of client
I0323 01:57:37.589608  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:57:37.589615  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a40 0xc0000c4a80]
I0323 01:57:39.985725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 01:57:39.985731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 01:57:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:57:43.410737  543705 memory.go:191] Add success.
I0323 01:57:43.409829  543705 cpu.go:282] Add success.
I0323 01:57:43.420432  543705 net.go:648] Add success.
I0323 01:57:43.423209  543705 net.go:770] primary dev: ETH0
I0323 01:57:43.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:57:43.423238  543705 net.go:698] Add success.
I0323 01:57:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:57:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:57:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:57:53.410400  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:57:53.410417  543705 memory.go:184] no items to output this cycle
I0323 01:57:53.410431  543705 cpu.go:275] no items to output this cycle
E0323 01:58:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:58:03.409763  543705 memory.go:184] no items to output this cycle
I0323 01:58:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 01:58:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:58:13.409818  543705 memory.go:191] Add success.
I0323 01:58:13.409824  543705 cpu.go:282] Add success.
W0323 01:58:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:58:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:58:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:58:13.420081  543705 net.go:648] Add success.
I0323 01:58:13.422986  543705 net.go:770] primary dev: ETH0
I0323 01:58:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:58:13.423010  543705 net.go:698] Add success.
I0323 01:58:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:58:14.455305  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:58:14.455377  543705 disk_worker.go:708] disk space is not compliant
W0323 01:58:14.455380  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:58:14.457466  543705 disk_worker.go:494] system disk:vda1
I0323 01:58:14.457510  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:58:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:58:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:58:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:58:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:58:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:58:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:58:23.409769  543705 memory.go:184] no items to output this cycle
I0323 01:58:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 01:58:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:58:33.409776  543705 memory.go:184] no items to output this cycle
I0323 01:58:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 01:58:37.590641  543705 disk_info.go:125] begin check local disk info of client
I0323 01:58:37.593201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:58:37.593208  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0323 01:58:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:58:43.410709  543705 memory.go:191] Add success.
I0323 01:58:43.409786  543705 cpu.go:282] Add success.
I0323 01:58:43.420456  543705 net.go:648] Add success.
I0323 01:58:43.423082  543705 net.go:770] primary dev: ETH0
I0323 01:58:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:58:43.423111  543705 net.go:698] Add success.
I0323 01:58:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:58:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:58:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:58:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:58:53.409783  543705 memory.go:184] no items to output this cycle
I0323 01:58:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 01:59:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:59:03.409769  543705 memory.go:184] no items to output this cycle
I0323 01:59:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 01:59:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:59:13.409826  543705 memory.go:191] Add success.
I0323 01:59:13.409829  543705 cpu.go:282] Add success.
W0323 01:59:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 01:59:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 01:59:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 01:59:13.420147  543705 net.go:648] Add success.
I0323 01:59:13.422861  543705 net.go:770] primary dev: ETH0
I0323 01:59:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:59:13.422888  543705 net.go:698] Add success.
I0323 01:59:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 01:59:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 01:59:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 01:59:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 01:59:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 01:59:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 01:59:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 01:59:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:59:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:59:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 01:59:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0323 01:59:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:59:23.409778  543705 memory.go:184] no items to output this cycle
I0323 01:59:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 01:59:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:59:33.409775  543705 memory.go:184] no items to output this cycle
I0323 01:59:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 01:59:37.593292  543705 disk_info.go:125] begin check local disk info of client
I0323 01:59:37.595870  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 01:59:37.595876  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1b40 0xc0002b1b80]
E0323 01:59:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:59:43.410941  543705 memory.go:191] Add success.
I0323 01:59:43.409825  543705 cpu.go:282] Add success.
I0323 01:59:43.420643  543705 net.go:648] Add success.
I0323 01:59:43.423732  543705 net.go:770] primary dev: ETH0
I0323 01:59:43.423746  543705 net.go:802] Send network stats successfully!,count is 6
I0323 01:59:43.423758  543705 net.go:698] Add success.
I0323 01:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 01:59:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 01:59:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 01:59:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 01:59:53.409769  543705 memory.go:184] no items to output this cycle
I0323 01:59:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:00:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:00:03.409769  543705 memory.go:184] no items to output this cycle
I0323 02:00:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 02:00:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:00:13.409787  543705 memory.go:191] Add success.
I0323 02:00:13.409810  543705 cpu.go:282] Add success.
W0323 02:00:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:00:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:00:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:00:13.420304  543705 net.go:648] Add success.
I0323 02:00:13.423274  543705 net.go:770] primary dev: ETH0
I0323 02:00:13.423288  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:00:13.423300  543705 net.go:698] Add success.
I0323 02:00:13.468007  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd75574f-0069-4c1a-8432-6692d70c6c2f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:00:13.468038  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:00:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:00:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:00:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 02:00:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:00:14.456626  543705 disk_worker.go:494] system disk:vda1
I0323 02:00:14.456659  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:00:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:00:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:00:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:00:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:00:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:00:23.409780  543705 cpu.go:275] no items to output this cycle
I0323 02:00:23.409787  543705 memory.go:184] no items to output this cycle
E0323 02:00:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:00:33.409795  543705 memory.go:184] no items to output this cycle
I0323 02:00:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 02:00:37.595960  543705 disk_info.go:125] begin check local disk info of client
I0323 02:00:37.598524  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:00:37.598529  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1a00 0xc0002b1a40]
I0323 02:00:39.988635  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:00:39.988641  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:00:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:00:43.410825  543705 memory.go:191] Add success.
I0323 02:00:43.409799  543705 cpu.go:282] Add success.
I0323 02:00:43.420507  543705 net.go:648] Add success.
I0323 02:00:43.423507  543705 net.go:770] primary dev: ETH0
I0323 02:00:43.423520  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:00:43.423533  543705 net.go:698] Add success.
I0323 02:00:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:00:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:00:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:00:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:00:53.409785  543705 memory.go:184] no items to output this cycle
I0323 02:00:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 02:01:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:01:03.409779  543705 cpu.go:275] no items to output this cycle
I0323 02:01:03.409789  543705 memory.go:184] no items to output this cycle
E0323 02:01:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:01:13.409783  543705 memory.go:191] Add success.
W0323 02:01:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:01:13.409811  543705 cpu.go:282] Add success.
W0323 02:01:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:01:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:01:13.420164  543705 net.go:648] Add success.
I0323 02:01:13.422906  543705 net.go:770] primary dev: ETH0
I0323 02:01:13.422919  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:01:13.422931  543705 net.go:698] Add success.
I0323 02:01:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:01:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:01:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 02:01:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:01:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 02:01:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:01:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:01:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:01:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:01:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:01:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:01:23.409769  543705 memory.go:184] no items to output this cycle
I0323 02:01:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 02:01:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:01:33.409776  543705 memory.go:184] no items to output this cycle
I0323 02:01:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 02:01:37.598630  543705 disk_info.go:125] begin check local disk info of client
I0323 02:01:37.601153  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:01:37.601159  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4140 0xc0000c4180]
E0323 02:01:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:01:43.410852  543705 memory.go:191] Add success.
I0323 02:01:43.409795  543705 cpu.go:282] Add success.
I0323 02:01:43.420537  543705 net.go:648] Add success.
I0323 02:01:43.423392  543705 net.go:770] primary dev: ETH0
I0323 02:01:43.423405  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:01:43.423417  543705 net.go:698] Add success.
I0323 02:01:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:01:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:01:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:01:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:01:53.409775  543705 cpu.go:275] no items to output this cycle
I0323 02:01:53.409785  543705 memory.go:184] no items to output this cycle
E0323 02:02:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:02:03.409773  543705 memory.go:184] no items to output this cycle
I0323 02:02:03.409781  543705 cpu.go:275] no items to output this cycle
E0323 02:02:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:02:13.409795  543705 memory.go:191] Add success.
I0323 02:02:13.409812  543705 cpu.go:282] Add success.
W0323 02:02:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:02:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:02:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:02:13.420572  543705 net.go:648] Add success.
I0323 02:02:13.423098  543705 net.go:770] primary dev: ETH0
I0323 02:02:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:02:13.423123  543705 net.go:698] Add success.
W0323 02:02:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:02:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 02:02:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:02:14.456824  543705 disk_worker.go:494] system disk:vda1
I0323 02:02:14.456866  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:02:14.457175  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:02:14.457183  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:02:14.457188  543705 custom_config.go:64] query custom config with name: gpu
E0323 02:02:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:02:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:02:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:02:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:02:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:02:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:02:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:02:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:02:23.409795  543705 memory.go:184] no items to output this cycle
I0323 02:02:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 02:02:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:02:33.409768  543705 memory.go:184] no items to output this cycle
I0323 02:02:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 02:02:37.601240  543705 disk_info.go:125] begin check local disk info of client
I0323 02:02:37.603750  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:02:37.603756  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1e80 0xc0002b1ec0]
E0323 02:02:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:02:43.410910  543705 memory.go:191] Add success.
I0323 02:02:43.409809  543705 cpu.go:282] Add success.
I0323 02:02:43.419665  543705 net.go:648] Add success.
I0323 02:02:43.423057  543705 net.go:770] primary dev: ETH0
I0323 02:02:43.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:02:43.423083  543705 net.go:698] Add success.
I0323 02:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:02:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:02:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:02:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:02:53.409781  543705 memory.go:184] no items to output this cycle
I0323 02:02:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 02:03:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:03:03.409777  543705 memory.go:184] no items to output this cycle
I0323 02:03:03.409778  543705 cpu.go:275] no items to output this cycle
E0323 02:03:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:03:13.409818  543705 memory.go:191] Add success.
I0323 02:03:13.409827  543705 cpu.go:282] Add success.
W0323 02:03:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:03:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:03:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:03:13.420131  543705 net.go:648] Add success.
I0323 02:03:13.422996  543705 net.go:770] primary dev: ETH0
I0323 02:03:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:03:13.423025  543705 net.go:698] Add success.
I0323 02:03:13.468968  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"17863f79-6dae-4b32-a0a0-aee2c25ae051","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:03:13.469004  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:03:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:03:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:03:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 02:03:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:03:14.456716  543705 disk_worker.go:494] system disk:vda1
I0323 02:03:14.456747  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:03:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:03:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:03:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:03:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:03:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:03:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:03:23.409780  543705 memory.go:184] no items to output this cycle
I0323 02:03:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 02:03:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:03:33.409777  543705 memory.go:184] no items to output this cycle
I0323 02:03:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 02:03:37.604664  543705 disk_info.go:125] begin check local disk info of client
I0323 02:03:37.607163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:03:37.607169  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5dc0 0xc0000c5e00]
I0323 02:03:39.989727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:03:39.989733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:03:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:03:43.410805  543705 memory.go:191] Add success.
I0323 02:03:43.409805  543705 cpu.go:282] Add success.
I0323 02:03:43.420524  543705 net.go:648] Add success.
I0323 02:03:43.423462  543705 net.go:770] primary dev: ETH0
I0323 02:03:43.423478  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:03:43.423493  543705 net.go:698] Add success.
I0323 02:03:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:03:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:03:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:03:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:03:53.410407  543705 memory.go:184] no items to output this cycle
I0323 02:03:53.410418  543705 cpu.go:275] no items to output this cycle
E0323 02:04:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:04:03.409780  543705 memory.go:184] no items to output this cycle
I0323 02:04:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 02:04:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:04:13.409821  543705 memory.go:191] Add success.
I0323 02:04:13.409831  543705 cpu.go:282] Add success.
W0323 02:04:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:04:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:04:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:04:13.420230  543705 net.go:648] Add success.
I0323 02:04:13.423224  543705 net.go:770] primary dev: ETH0
I0323 02:04:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:04:13.423251  543705 net.go:698] Add success.
I0323 02:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:04:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:04:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 02:04:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:04:14.456608  543705 disk_worker.go:494] system disk:vda1
I0323 02:04:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:04:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:04:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:04:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:04:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:04:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:04:23.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:04:23.410273  543705 memory.go:184] no items to output this cycle
I0323 02:04:23.410282  543705 cpu.go:275] no items to output this cycle
E0323 02:04:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:04:33.409796  543705 memory.go:184] no items to output this cycle
I0323 02:04:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 02:04:37.607671  543705 disk_info.go:125] begin check local disk info of client
I0323 02:04:37.610237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:04:37.610244  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0323 02:04:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:04:43.410702  543705 memory.go:191] Add success.
I0323 02:04:43.409800  543705 cpu.go:282] Add success.
I0323 02:04:43.420413  543705 net.go:648] Add success.
I0323 02:04:43.423438  543705 net.go:770] primary dev: ETH0
I0323 02:04:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:04:43.423468  543705 net.go:698] Add success.
I0323 02:04:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:04:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:04:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:04:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:04:53.409777  543705 memory.go:184] no items to output this cycle
I0323 02:04:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 02:05:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:05:03.409784  543705 memory.go:184] no items to output this cycle
I0323 02:05:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 02:05:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:05:13.409782  543705 memory.go:191] Add success.
W0323 02:05:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:05:13.409812  543705 cpu.go:282] Add success.
W0323 02:05:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:05:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:05:13.420117  543705 net.go:648] Add success.
I0323 02:05:13.423406  543705 net.go:770] primary dev: ETH0
I0323 02:05:13.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:05:13.423435  543705 net.go:698] Add success.
I0323 02:05:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:05:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:05:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 02:05:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:05:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 02:05:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:05:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:05:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:05:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:05:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:05:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:05:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:05:23.409761  543705 memory.go:184] no items to output this cycle
I0323 02:05:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 02:05:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:05:33.409781  543705 memory.go:184] no items to output this cycle
I0323 02:05:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 02:05:37.610685  543705 disk_info.go:125] begin check local disk info of client
I0323 02:05:37.613182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:05:37.613188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ae880 0xc0003ae8c0]
E0323 02:05:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:05:43.410658  543705 memory.go:191] Add success.
I0323 02:05:43.409799  543705 cpu.go:282] Add success.
I0323 02:05:43.420361  543705 net.go:648] Add success.
I0323 02:05:43.423217  543705 net.go:770] primary dev: ETH0
I0323 02:05:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:05:43.423242  543705 net.go:698] Add success.
I0323 02:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:05:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:05:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:05:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:05:53.409791  543705 memory.go:184] no items to output this cycle
I0323 02:05:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 02:06:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:06:03.409780  543705 memory.go:184] no items to output this cycle
I0323 02:06:03.409783  543705 cpu.go:275] no items to output this cycle
E0323 02:06:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:06:13.409800  543705 memory.go:191] Add success.
I0323 02:06:13.409820  543705 cpu.go:282] Add success.
W0323 02:06:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:06:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:06:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:06:13.420137  543705 net.go:648] Add success.
I0323 02:06:13.422883  543705 net.go:770] primary dev: ETH0
I0323 02:06:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:06:13.422908  543705 net.go:698] Add success.
I0323 02:06:13.502917  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2e7263be-50f4-415c-8dbc-a2e3d9bb40fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:06:13.502952  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:06:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:06:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:06:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 02:06:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:06:14.456759  543705 disk_worker.go:494] system disk:vda1
I0323 02:06:14.456795  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:06:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:06:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:06:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:06:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:06:23.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:06:23.409896  543705 memory.go:184] no items to output this cycle
I0323 02:06:23.410007  543705 cpu.go:275] no items to output this cycle
E0323 02:06:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:06:33.409764  543705 memory.go:184] no items to output this cycle
I0323 02:06:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 02:06:37.613674  543705 disk_info.go:125] begin check local disk info of client
I0323 02:06:37.616182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:06:37.616188  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1a00 0xc0002b1a40]
I0323 02:06:39.989865  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:06:39.989871  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:06:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:06:43.410666  543705 memory.go:191] Add success.
I0323 02:06:43.409818  543705 cpu.go:282] Add success.
I0323 02:06:43.420365  543705 net.go:648] Add success.
I0323 02:06:43.423160  543705 net.go:770] primary dev: ETH0
I0323 02:06:43.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:06:43.423190  543705 net.go:698] Add success.
I0323 02:06:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:06:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:06:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:06:53.410414  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:06:53.410434  543705 memory.go:184] no items to output this cycle
I0323 02:06:53.410446  543705 cpu.go:275] no items to output this cycle
E0323 02:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:07:03.409775  543705 memory.go:184] no items to output this cycle
I0323 02:07:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 02:07:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:07:13.409794  543705 memory.go:191] Add success.
I0323 02:07:13.409797  543705 cpu.go:282] Add success.
W0323 02:07:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:07:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:07:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:07:13.420073  543705 net.go:648] Add success.
I0323 02:07:13.422967  543705 net.go:770] primary dev: ETH0
I0323 02:07:13.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:07:13.422992  543705 net.go:698] Add success.
I0323 02:07:13.453578  543705 event_worker.go:152] Polling the log file for events...
W0323 02:07:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:07:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 02:07:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:07:14.455916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:07:14.455925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:07:14.455931  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:07:14.456547  543705 disk_worker.go:494] system disk:vda1
I0323 02:07:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:07:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:07:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:07:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:07:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:07:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:07:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:07:16.472342  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:07:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:07:23.409891  543705 memory.go:184] no items to output this cycle
I0323 02:07:23.409911  543705 cpu.go:275] no items to output this cycle
E0323 02:07:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:07:33.409799  543705 memory.go:184] no items to output this cycle
I0323 02:07:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 02:07:37.616269  543705 disk_info.go:125] begin check local disk info of client
I0323 02:07:37.618774  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:07:37.618780  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc00 0xc0001abc40]
E0323 02:07:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:07:43.410732  543705 memory.go:191] Add success.
I0323 02:07:43.409824  543705 cpu.go:282] Add success.
I0323 02:07:43.420444  543705 net.go:648] Add success.
I0323 02:07:43.423385  543705 net.go:770] primary dev: ETH0
I0323 02:07:43.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:07:43.423416  543705 net.go:698] Add success.
I0323 02:07:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:07:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:07:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:07:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:07:53.409777  543705 memory.go:184] no items to output this cycle
I0323 02:07:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 02:08:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:08:03.409781  543705 cpu.go:275] no items to output this cycle
I0323 02:08:03.409786  543705 memory.go:184] no items to output this cycle
E0323 02:08:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:08:13.409819  543705 memory.go:191] Add success.
I0323 02:08:13.409827  543705 cpu.go:282] Add success.
W0323 02:08:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:08:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:08:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:08:13.420139  543705 net.go:648] Add success.
I0323 02:08:13.422794  543705 net.go:770] primary dev: ETH0
I0323 02:08:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:08:13.422823  543705 net.go:698] Add success.
I0323 02:08:14.454950  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:08:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:08:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 02:08:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:08:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 02:08:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:08:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:08:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:08:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:08:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:08:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:08:23.410596  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:08:23.410610  543705 memory.go:184] no items to output this cycle
I0323 02:08:23.410611  543705 cpu.go:275] no items to output this cycle
E0323 02:08:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:08:33.409780  543705 memory.go:184] no items to output this cycle
I0323 02:08:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 02:08:37.619727  543705 disk_info.go:125] begin check local disk info of client
I0323 02:08:37.622229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:08:37.622235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1740 0xc0002b1780]
E0323 02:08:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:08:43.410805  543705 memory.go:191] Add success.
I0323 02:08:43.409828  543705 cpu.go:282] Add success.
I0323 02:08:43.420589  543705 net.go:648] Add success.
I0323 02:08:43.423511  543705 net.go:770] primary dev: ETH0
I0323 02:08:43.423525  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:08:43.423536  543705 net.go:698] Add success.
I0323 02:08:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:08:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:08:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:08:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:08:53.409790  543705 cpu.go:275] no items to output this cycle
I0323 02:08:53.409793  543705 memory.go:184] no items to output this cycle
E0323 02:09:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:09:03.409769  543705 memory.go:184] no items to output this cycle
I0323 02:09:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 02:09:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:09:13.409821  543705 memory.go:191] Add success.
I0323 02:09:13.409823  543705 cpu.go:282] Add success.
W0323 02:09:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:09:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:09:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:09:13.420177  543705 net.go:648] Add success.
I0323 02:09:13.423191  543705 net.go:770] primary dev: ETH0
I0323 02:09:13.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:09:13.423217  543705 net.go:698] Add success.
I0323 02:09:13.468624  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"11aa7dd5-8504-44cc-b8c7-dd701fb953ef","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:09:13.468659  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:09:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:09:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:09:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 02:09:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:09:14.456502  543705 disk_worker.go:494] system disk:vda1
I0323 02:09:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:09:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:09:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:09:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:09:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:09:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:09:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:09:23.409770  543705 memory.go:184] no items to output this cycle
I0323 02:09:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 02:09:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:09:33.409771  543705 memory.go:184] no items to output this cycle
I0323 02:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 02:09:37.622742  543705 disk_info.go:125] begin check local disk info of client
I0323 02:09:37.625353  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:09:37.625360  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505c40 0xc000505c80]
I0323 02:09:39.992647  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:09:39.992653  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:09:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:09:43.410729  543705 memory.go:191] Add success.
I0323 02:09:43.409830  543705 cpu.go:282] Add success.
I0323 02:09:43.420452  543705 net.go:648] Add success.
I0323 02:09:43.423190  543705 net.go:770] primary dev: ETH0
I0323 02:09:43.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:09:43.423217  543705 net.go:698] Add success.
I0323 02:09:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:09:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:09:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:09:53.410441  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:09:53.410457  543705 memory.go:184] no items to output this cycle
I0323 02:09:53.410459  543705 cpu.go:275] no items to output this cycle
E0323 02:10:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:10:03.409767  543705 memory.go:184] no items to output this cycle
I0323 02:10:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 02:10:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:10:13.409819  543705 memory.go:191] Add success.
I0323 02:10:13.409828  543705 cpu.go:282] Add success.
W0323 02:10:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:10:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:10:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:10:13.420121  543705 net.go:648] Add success.
I0323 02:10:13.422761  543705 net.go:770] primary dev: ETH0
I0323 02:10:13.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:10:13.422786  543705 net.go:698] Add success.
I0323 02:10:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:10:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:10:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 02:10:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:10:14.456573  543705 disk_worker.go:494] system disk:vda1
I0323 02:10:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:10:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:10:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:10:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:10:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:10:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:10:23.409914  543705 cpu.go:275] no items to output this cycle
I0323 02:10:23.409920  543705 memory.go:184] no items to output this cycle
E0323 02:10:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:10:33.409811  543705 memory.go:184] no items to output this cycle
I0323 02:10:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 02:10:37.625680  543705 disk_info.go:125] begin check local disk info of client
I0323 02:10:37.628256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:10:37.628262  543705 disk_info.go:196] parse disk info done, disk is : [0xc000320440 0xc000320480]
E0323 02:10:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:10:43.410863  543705 memory.go:191] Add success.
I0323 02:10:43.409806  543705 cpu.go:282] Add success.
I0323 02:10:43.420559  543705 net.go:648] Add success.
I0323 02:10:43.423482  543705 net.go:770] primary dev: ETH0
I0323 02:10:43.423496  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:10:43.423509  543705 net.go:698] Add success.
I0323 02:10:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:10:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:10:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:10:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:10:53.409813  543705 memory.go:184] no items to output this cycle
I0323 02:10:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 02:11:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:11:03.409777  543705 memory.go:184] no items to output this cycle
I0323 02:11:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 02:11:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:11:13.409809  543705 memory.go:191] Add success.
I0323 02:11:13.409810  543705 cpu.go:282] Add success.
W0323 02:11:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:11:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:11:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:11:13.420138  543705 net.go:648] Add success.
I0323 02:11:13.423238  543705 net.go:770] primary dev: ETH0
I0323 02:11:13.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:11:13.423267  543705 net.go:698] Add success.
I0323 02:11:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:11:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:11:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 02:11:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:11:14.456524  543705 disk_worker.go:494] system disk:vda1
I0323 02:11:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:11:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:11:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:11:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:11:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:11:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:11:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:11:23.409800  543705 memory.go:184] no items to output this cycle
I0323 02:11:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 02:11:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:11:33.409790  543705 memory.go:184] no items to output this cycle
I0323 02:11:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 02:11:37.628345  543705 disk_info.go:125] begin check local disk info of client
I0323 02:11:37.630890  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:11:37.630897  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0000 0xc0002b0040]
E0323 02:11:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:11:43.410672  543705 memory.go:191] Add success.
I0323 02:11:43.409836  543705 cpu.go:282] Add success.
I0323 02:11:43.420381  543705 net.go:648] Add success.
I0323 02:11:43.423076  543705 net.go:770] primary dev: ETH0
I0323 02:11:43.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:11:43.423101  543705 net.go:698] Add success.
I0323 02:11:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:11:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:11:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:11:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:11:53.409803  543705 memory.go:184] no items to output this cycle
I0323 02:11:53.409818  543705 cpu.go:275] no items to output this cycle
E0323 02:12:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:12:03.409793  543705 memory.go:184] no items to output this cycle
I0323 02:12:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 02:12:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:12:13.409800  543705 memory.go:191] Add success.
I0323 02:12:13.409801  543705 cpu.go:282] Add success.
W0323 02:12:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:12:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:12:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:12:13.420172  543705 net.go:648] Add success.
I0323 02:12:13.422922  543705 net.go:770] primary dev: ETH0
I0323 02:12:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:12:13.422947  543705 net.go:698] Add success.
I0323 02:12:13.468455  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dce34db9-35d4-4339-ae4d-af0d5108742b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:12:13.468490  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 02:12:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:12:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 02:12:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:12:14.456962  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:12:14.456972  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:12:14.456977  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:12:14.457047  543705 disk_worker.go:494] system disk:vda1
I0323 02:12:14.457076  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:12:15.456470  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:12:15.456480  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:12:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:12:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:12:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:12:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:12:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:12:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:12:23.409770  543705 memory.go:184] no items to output this cycle
I0323 02:12:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 02:12:33.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:12:33.409916  543705 cpu.go:275] no items to output this cycle
I0323 02:12:33.410008  543705 memory.go:184] no items to output this cycle
I0323 02:12:37.630979  543705 disk_info.go:125] begin check local disk info of client
I0323 02:12:37.633542  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:12:37.633549  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8e80 0xc0004d8ec0]
I0323 02:12:39.993726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:12:39.993732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:12:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:12:43.410780  543705 memory.go:191] Add success.
I0323 02:12:43.409829  543705 cpu.go:282] Add success.
I0323 02:12:43.420474  543705 net.go:648] Add success.
I0323 02:12:43.423458  543705 net.go:770] primary dev: ETH0
I0323 02:12:43.423472  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:12:43.423486  543705 net.go:698] Add success.
I0323 02:12:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:12:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:12:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:12:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:12:53.409784  543705 memory.go:184] no items to output this cycle
I0323 02:12:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:13:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:13:03.409807  543705 memory.go:184] no items to output this cycle
I0323 02:13:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 02:13:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:13:13.409811  543705 memory.go:191] Add success.
I0323 02:13:13.409813  543705 cpu.go:282] Add success.
W0323 02:13:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:13:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:13:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:13:13.420078  543705 net.go:648] Add success.
I0323 02:13:13.423075  543705 net.go:770] primary dev: ETH0
I0323 02:13:13.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:13:13.423101  543705 net.go:698] Add success.
I0323 02:13:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:13:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:13:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 02:13:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:13:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 02:13:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:13:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:13:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:13:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:13:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:13:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:13:23.409765  543705 memory.go:184] no items to output this cycle
I0323 02:13:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 02:13:33.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:13:33.409914  543705 cpu.go:275] no items to output this cycle
I0323 02:13:33.409929  543705 memory.go:184] no items to output this cycle
I0323 02:13:37.633670  543705 disk_info.go:125] begin check local disk info of client
I0323 02:13:37.636215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:13:37.636221  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9d40 0xc0002b9d80]
E0323 02:13:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:13:43.410623  543705 memory.go:191] Add success.
I0323 02:13:43.409809  543705 cpu.go:282] Add success.
I0323 02:13:43.420336  543705 net.go:648] Add success.
I0323 02:13:43.423332  543705 net.go:770] primary dev: ETH0
I0323 02:13:43.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:13:43.423361  543705 net.go:698] Add success.
I0323 02:13:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:13:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:13:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:13:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:13:53.409774  543705 memory.go:184] no items to output this cycle
I0323 02:13:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 02:14:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:14:03.409784  543705 memory.go:184] no items to output this cycle
I0323 02:14:03.409783  543705 cpu.go:275] no items to output this cycle
E0323 02:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:14:13.409796  543705 cpu.go:282] Add success.
I0323 02:14:13.409797  543705 memory.go:191] Add success.
W0323 02:14:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:14:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:14:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:14:13.420163  543705 net.go:648] Add success.
I0323 02:14:13.422901  543705 net.go:770] primary dev: ETH0
I0323 02:14:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:14:13.422926  543705 net.go:698] Add success.
I0323 02:14:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:14:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:14:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 02:14:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:14:14.456588  543705 disk_worker.go:494] system disk:vda1
I0323 02:14:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:14:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:14:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:14:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:14:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:14:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:14:23.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:14:23.410356  543705 memory.go:184] no items to output this cycle
I0323 02:14:23.410420  543705 cpu.go:275] no items to output this cycle
E0323 02:14:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:14:33.409782  543705 memory.go:184] no items to output this cycle
I0323 02:14:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 02:14:37.636303  543705 disk_info.go:125] begin check local disk info of client
I0323 02:14:37.638858  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:14:37.638864  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1b00 0xc0002b1b40]
E0323 02:14:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:14:43.410714  543705 memory.go:191] Add success.
I0323 02:14:43.409816  543705 cpu.go:282] Add success.
I0323 02:14:43.420453  543705 net.go:648] Add success.
I0323 02:14:43.423334  543705 net.go:770] primary dev: ETH0
I0323 02:14:43.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:14:43.423358  543705 net.go:698] Add success.
I0323 02:14:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:14:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:14:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:14:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:14:53.409801  543705 memory.go:184] no items to output this cycle
I0323 02:14:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 02:15:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:15:03.409782  543705 memory.go:184] no items to output this cycle
I0323 02:15:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 02:15:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:15:13.409803  543705 memory.go:191] Add success.
I0323 02:15:13.409802  543705 cpu.go:282] Add success.
W0323 02:15:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:15:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:15:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:15:13.420141  543705 net.go:648] Add success.
I0323 02:15:13.423232  543705 net.go:770] primary dev: ETH0
I0323 02:15:13.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:15:13.423256  543705 net.go:698] Add success.
I0323 02:15:13.571395  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"93944f59-1ed2-4351-96ea-6b362b085874","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:15:13.571435  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:15:14.453972  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:15:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:15:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0323 02:15:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:15:14.456559  543705 disk_worker.go:494] system disk:vda1
I0323 02:15:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:15:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:15:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:15:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:15:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:15:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:15:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:15:23.409780  543705 cpu.go:275] no items to output this cycle
I0323 02:15:23.409781  543705 memory.go:184] no items to output this cycle
E0323 02:15:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:15:33.409762  543705 memory.go:184] no items to output this cycle
I0323 02:15:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 02:15:37.638959  543705 disk_info.go:125] begin check local disk info of client
I0323 02:15:37.641511  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:15:37.641517  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e6600 0xc0000e6640]
I0323 02:15:39.996676  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:15:39.996681  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:15:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:15:43.410601  543705 memory.go:191] Add success.
I0323 02:15:43.409808  543705 cpu.go:282] Add success.
I0323 02:15:43.420363  543705 net.go:648] Add success.
I0323 02:15:43.423275  543705 net.go:770] primary dev: ETH0
I0323 02:15:43.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:15:43.423320  543705 net.go:698] Add success.
I0323 02:15:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:15:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:15:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:15:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:15:53.409803  543705 memory.go:184] no items to output this cycle
I0323 02:15:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 02:16:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:16:03.409807  543705 memory.go:184] no items to output this cycle
I0323 02:16:03.409824  543705 cpu.go:275] no items to output this cycle
E0323 02:16:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:16:13.409783  543705 memory.go:191] Add success.
W0323 02:16:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:16:13.409811  543705 cpu.go:282] Add success.
W0323 02:16:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:16:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:16:13.420193  543705 net.go:648] Add success.
I0323 02:16:13.422913  543705 net.go:770] primary dev: ETH0
I0323 02:16:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:16:13.422942  543705 net.go:698] Add success.
I0323 02:16:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:16:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:16:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0323 02:16:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:16:14.456605  543705 disk_worker.go:494] system disk:vda1
I0323 02:16:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:16:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:16:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:16:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:16:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:16:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:16:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:16:23.409794  543705 memory.go:184] no items to output this cycle
I0323 02:16:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 02:16:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:16:33.409776  543705 cpu.go:275] no items to output this cycle
I0323 02:16:33.409786  543705 memory.go:184] no items to output this cycle
I0323 02:16:37.641674  543705 disk_info.go:125] begin check local disk info of client
I0323 02:16:37.644194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:16:37.644200  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba00 0xc0001aba40]
E0323 02:16:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:16:43.410586  543705 memory.go:191] Add success.
I0323 02:16:43.409795  543705 cpu.go:282] Add success.
I0323 02:16:43.420280  543705 net.go:648] Add success.
I0323 02:16:43.422743  543705 net.go:770] primary dev: ETH0
I0323 02:16:43.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:16:43.422771  543705 net.go:698] Add success.
I0323 02:16:46.458029  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:16:46.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:16:46.458138  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:16:53.410362  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:16:53.410380  543705 memory.go:184] no items to output this cycle
I0323 02:16:53.410392  543705 cpu.go:275] no items to output this cycle
E0323 02:17:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:17:03.409769  543705 memory.go:184] no items to output this cycle
I0323 02:17:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 02:17:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:17:13.409817  543705 memory.go:191] Add success.
I0323 02:17:13.409828  543705 cpu.go:282] Add success.
W0323 02:17:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:17:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:17:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:17:13.420190  543705 net.go:648] Add success.
I0323 02:17:13.423044  543705 net.go:770] primary dev: ETH0
I0323 02:17:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:17:13.423071  543705 net.go:698] Add success.
I0323 02:17:13.453666  543705 event_worker.go:152] Polling the log file for events...
W0323 02:17:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:17:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 02:17:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:17:14.455904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:17:14.455914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:17:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:17:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 02:17:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:17:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:17:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:17:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:17:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:17:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:17:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:17:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:17:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:17:23.409771  543705 memory.go:184] no items to output this cycle
I0323 02:17:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 02:17:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:17:33.409797  543705 memory.go:184] no items to output this cycle
I0323 02:17:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 02:17:37.644862  543705 disk_info.go:125] begin check local disk info of client
I0323 02:17:37.647459  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:17:37.647465  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b1bc0 0xc0002b1c00]
E0323 02:17:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:17:43.410589  543705 memory.go:191] Add success.
I0323 02:17:43.409789  543705 cpu.go:282] Add success.
I0323 02:17:43.420298  543705 net.go:648] Add success.
I0323 02:17:43.422927  543705 net.go:770] primary dev: ETH0
I0323 02:17:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:17:43.422953  543705 net.go:698] Add success.
I0323 02:17:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:17:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:17:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:17:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:17:53.409798  543705 memory.go:184] no items to output this cycle
I0323 02:17:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 02:18:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:18:03.409777  543705 memory.go:184] no items to output this cycle
I0323 02:18:03.409778  543705 cpu.go:275] no items to output this cycle
E0323 02:18:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:18:13.409800  543705 memory.go:191] Add success.
I0323 02:18:13.409801  543705 cpu.go:282] Add success.
W0323 02:18:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:18:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:18:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:18:13.420281  543705 net.go:648] Add success.
I0323 02:18:13.423103  543705 net.go:770] primary dev: ETH0
I0323 02:18:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:18:13.423130  543705 net.go:698] Add success.
I0323 02:18:13.469668  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"81b3f58e-b86e-4292-b343-ed344e2d83fa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:18:13.469712  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:18:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:18:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:18:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 02:18:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:18:14.456879  543705 disk_worker.go:494] system disk:vda1
I0323 02:18:14.456909  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:18:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:18:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:18:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:18:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:18:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:18:23.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:18:23.410263  543705 cpu.go:275] no items to output this cycle
I0323 02:18:23.410268  543705 memory.go:184] no items to output this cycle
E0323 02:18:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:18:33.409802  543705 memory.go:184] no items to output this cycle
I0323 02:18:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 02:18:37.647548  543705 disk_info.go:125] begin check local disk info of client
I0323 02:18:37.650092  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:18:37.650100  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0323 02:18:39.997728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:18:39.997734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:18:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:18:43.410782  543705 memory.go:191] Add success.
I0323 02:18:43.409814  543705 cpu.go:282] Add success.
I0323 02:18:43.420488  543705 net.go:648] Add success.
I0323 02:18:43.423108  543705 net.go:770] primary dev: ETH0
I0323 02:18:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:18:43.423138  543705 net.go:698] Add success.
I0323 02:18:46.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:18:46.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:18:46.458149  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:18:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:18:53.409782  543705 memory.go:184] no items to output this cycle
I0323 02:18:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 02:19:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:19:03.409806  543705 memory.go:184] no items to output this cycle
I0323 02:19:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 02:19:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:19:13.409820  543705 memory.go:191] Add success.
I0323 02:19:13.409833  543705 cpu.go:282] Add success.
W0323 02:19:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:19:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:19:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:19:13.420198  543705 net.go:648] Add success.
I0323 02:19:13.423215  543705 net.go:770] primary dev: ETH0
I0323 02:19:13.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:19:13.423244  543705 net.go:698] Add success.
I0323 02:19:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:19:14.455253  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:19:14.455346  543705 disk_worker.go:708] disk space is not compliant
W0323 02:19:14.455350  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:19:14.456998  543705 disk_worker.go:494] system disk:vda1
I0323 02:19:14.457027  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:19:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:19:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:19:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:19:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:19:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:19:23.409787  543705 memory.go:184] no items to output this cycle
I0323 02:19:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 02:19:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:19:33.409775  543705 cpu.go:275] no items to output this cycle
I0323 02:19:33.409790  543705 memory.go:184] no items to output this cycle
I0323 02:19:37.650182  543705 disk_info.go:125] begin check local disk info of client
I0323 02:19:37.652685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:19:37.652692  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab2c0 0xc0001ab300]
E0323 02:19:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:19:43.410560  543705 memory.go:191] Add success.
I0323 02:19:43.409802  543705 cpu.go:282] Add success.
I0323 02:19:43.420265  543705 net.go:648] Add success.
I0323 02:19:43.422697  543705 net.go:770] primary dev: ETH0
I0323 02:19:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:19:43.422723  543705 net.go:698] Add success.
I0323 02:19:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:19:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:19:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:19:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:19:53.409796  543705 memory.go:184] no items to output this cycle
I0323 02:19:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 02:20:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:20:03.409782  543705 cpu.go:275] no items to output this cycle
I0323 02:20:03.409785  543705 memory.go:184] no items to output this cycle
E0323 02:20:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:20:13.409802  543705 memory.go:191] Add success.
I0323 02:20:13.409805  543705 cpu.go:282] Add success.
W0323 02:20:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:20:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:20:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:20:13.420131  543705 net.go:648] Add success.
I0323 02:20:13.423345  543705 net.go:770] primary dev: ETH0
I0323 02:20:13.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:20:13.423372  543705 net.go:698] Add success.
I0323 02:20:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:20:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:20:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 02:20:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:20:14.456863  543705 disk_worker.go:494] system disk:vda1
I0323 02:20:14.456894  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:20:15.456009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:20:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:20:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:20:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:20:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:20:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:20:23.409775  543705 memory.go:184] no items to output this cycle
I0323 02:20:23.409774  543705 cpu.go:275] no items to output this cycle
E0323 02:20:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:20:33.409776  543705 memory.go:184] no items to output this cycle
I0323 02:20:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 02:20:37.653674  543705 disk_info.go:125] begin check local disk info of client
I0323 02:20:37.656188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:20:37.656194  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b00 0xc0000c4b40]
E0323 02:20:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:20:43.410845  543705 memory.go:191] Add success.
I0323 02:20:43.409818  543705 cpu.go:282] Add success.
I0323 02:20:43.420619  543705 net.go:648] Add success.
I0323 02:20:43.423409  543705 net.go:770] primary dev: ETH0
I0323 02:20:43.423423  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:20:43.423435  543705 net.go:698] Add success.
I0323 02:20:46.458023  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:20:46.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:20:46.458129  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:20:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:20:53.409782  543705 memory.go:184] no items to output this cycle
I0323 02:20:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 02:21:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:21:03.409800  543705 memory.go:184] no items to output this cycle
I0323 02:21:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 02:21:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:21:13.409785  543705 memory.go:191] Add success.
I0323 02:21:13.409810  543705 cpu.go:282] Add success.
W0323 02:21:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:21:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:21:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:21:13.420189  543705 net.go:648] Add success.
I0323 02:21:13.422725  543705 net.go:770] primary dev: ETH0
I0323 02:21:13.422740  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:21:13.422753  543705 net.go:698] Add success.
I0323 02:21:13.468576  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6ad9d834-4bfd-490f-9139-b5c7210c4218","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:21:13.468613  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:21:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:21:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:21:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 02:21:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:21:14.457009  543705 disk_worker.go:494] system disk:vda1
I0323 02:21:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:21:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:21:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:21:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:21:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:21:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:21:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:21:23.409777  543705 memory.go:184] no items to output this cycle
I0323 02:21:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 02:21:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:21:33.409792  543705 memory.go:184] no items to output this cycle
I0323 02:21:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 02:21:37.656924  543705 disk_info.go:125] begin check local disk info of client
I0323 02:21:37.659448  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:21:37.659455  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505c00 0xc000505c40]
I0323 02:21:40.000689  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:21:40.000695  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:21:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:21:43.410543  543705 memory.go:191] Add success.
I0323 02:21:43.409800  543705 cpu.go:282] Add success.
I0323 02:21:43.420248  543705 net.go:648] Add success.
I0323 02:21:43.422611  543705 net.go:770] primary dev: ETH0
I0323 02:21:43.422627  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:21:43.422642  543705 net.go:698] Add success.
I0323 02:21:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:21:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:21:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:21:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:21:53.409792  543705 cpu.go:275] no items to output this cycle
I0323 02:21:53.409798  543705 memory.go:184] no items to output this cycle
E0323 02:22:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:22:03.409807  543705 memory.go:184] no items to output this cycle
I0323 02:22:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 02:22:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:22:13.409800  543705 cpu.go:282] Add success.
I0323 02:22:13.409811  543705 memory.go:191] Add success.
W0323 02:22:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:22:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:22:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:22:13.420109  543705 net.go:648] Add success.
I0323 02:22:13.422737  543705 net.go:770] primary dev: ETH0
I0323 02:22:13.422752  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:22:13.422766  543705 net.go:698] Add success.
W0323 02:22:14.455475  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:22:14.455491  543705 disk_worker.go:708] disk space is not compliant
W0323 02:22:14.455495  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:22:14.456217  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:22:14.456224  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:22:14.456229  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:22:14.457493  543705 disk_worker.go:494] system disk:vda1
I0323 02:22:14.457535  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:22:15.456867  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:22:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:22:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:22:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:22:16.458003  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:22:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:22:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:22:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:22:23.409795  543705 memory.go:184] no items to output this cycle
I0323 02:22:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 02:22:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:22:33.409780  543705 cpu.go:275] no items to output this cycle
I0323 02:22:33.409795  543705 memory.go:184] no items to output this cycle
I0323 02:22:37.659538  543705 disk_info.go:125] begin check local disk info of client
I0323 02:22:37.662109  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:22:37.662116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005058c0 0xc000505900]
E0323 02:22:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:22:43.410699  543705 memory.go:191] Add success.
I0323 02:22:43.409820  543705 cpu.go:282] Add success.
I0323 02:22:43.420380  543705 net.go:648] Add success.
I0323 02:22:43.423257  543705 net.go:770] primary dev: ETH0
I0323 02:22:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:22:43.423282  543705 net.go:698] Add success.
I0323 02:22:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:22:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:22:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:22:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:22:53.409774  543705 memory.go:184] no items to output this cycle
I0323 02:22:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 02:23:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:23:03.409778  543705 memory.go:184] no items to output this cycle
I0323 02:23:03.409779  543705 cpu.go:275] no items to output this cycle
E0323 02:23:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:23:13.409803  543705 cpu.go:282] Add success.
I0323 02:23:13.409806  543705 memory.go:191] Add success.
W0323 02:23:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:23:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:23:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:23:13.420235  543705 net.go:648] Add success.
I0323 02:23:13.422702  543705 net.go:770] primary dev: ETH0
I0323 02:23:13.422714  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:23:13.422726  543705 net.go:698] Add success.
I0323 02:23:14.454946  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:23:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:23:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0323 02:23:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:23:14.456488  543705 disk_worker.go:494] system disk:vda1
I0323 02:23:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:23:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:23:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:23:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:23:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:23:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:23:23.409767  543705 memory.go:184] no items to output this cycle
I0323 02:23:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 02:23:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:23:33.409795  543705 memory.go:184] no items to output this cycle
I0323 02:23:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 02:23:37.662202  543705 disk_info.go:125] begin check local disk info of client
I0323 02:23:37.664739  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:23:37.664745  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b580 0xc00007b5c0]
E0323 02:23:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:23:43.410722  543705 memory.go:191] Add success.
I0323 02:23:43.409858  543705 cpu.go:282] Add success.
I0323 02:23:43.420604  543705 net.go:648] Add success.
I0323 02:23:43.423373  543705 net.go:770] primary dev: ETH0
I0323 02:23:43.423391  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:23:43.423411  543705 net.go:698] Add success.
I0323 02:23:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:23:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:23:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:23:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:23:53.409779  543705 memory.go:184] no items to output this cycle
I0323 02:23:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 02:24:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:24:03.409767  543705 memory.go:184] no items to output this cycle
I0323 02:24:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 02:24:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:24:13.409826  543705 memory.go:191] Add success.
I0323 02:24:13.409828  543705 cpu.go:282] Add success.
W0323 02:24:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:24:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:24:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:24:13.420370  543705 net.go:648] Add success.
I0323 02:24:13.423089  543705 net.go:770] primary dev: ETH0
I0323 02:24:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:24:13.423113  543705 net.go:698] Add success.
I0323 02:24:13.735650  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5b5eebcd-6a2a-4024-81b2-c473d9e5bd25","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:24:13.735684  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:24:14.453970  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:24:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:24:14.455292  543705 disk_worker.go:708] disk space is not compliant
W0323 02:24:14.455295  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:24:14.456865  543705 disk_worker.go:494] system disk:vda1
I0323 02:24:14.456896  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:24:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:24:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:24:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:24:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:24:16.472541  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:24:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:24:23.409790  543705 memory.go:184] no items to output this cycle
I0323 02:24:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 02:24:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:24:33.409771  543705 memory.go:184] no items to output this cycle
I0323 02:24:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 02:24:37.665681  543705 disk_info.go:125] begin check local disk info of client
I0323 02:24:37.668271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:24:37.668277  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0000 0xc0002b0040]
I0323 02:24:40.001746  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:24:40.001752  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:24:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:24:43.410639  543705 memory.go:191] Add success.
I0323 02:24:43.409817  543705 cpu.go:282] Add success.
I0323 02:24:43.420331  543705 net.go:648] Add success.
I0323 02:24:43.422910  543705 net.go:770] primary dev: ETH0
I0323 02:24:43.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:24:43.422941  543705 net.go:698] Add success.
I0323 02:24:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:24:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:24:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:24:53.410207  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:24:53.410225  543705 memory.go:184] no items to output this cycle
I0323 02:24:53.410246  543705 cpu.go:275] no items to output this cycle
E0323 02:25:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:25:03.409806  543705 memory.go:184] no items to output this cycle
I0323 02:25:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 02:25:13.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:25:13.409879  543705 memory.go:191] Add success.
W0323 02:25:13.409908  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:25:13.409925  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:25:13.409928  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:25:13.410048  543705 cpu.go:282] Add success.
I0323 02:25:13.419721  543705 net.go:648] Add success.
I0323 02:25:13.422296  543705 net.go:770] primary dev: ETH0
I0323 02:25:13.422308  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:25:13.422320  543705 net.go:698] Add success.
I0323 02:25:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:25:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:25:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 02:25:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:25:14.456489  543705 disk_worker.go:494] system disk:vda1
I0323 02:25:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:25:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:25:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:25:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:25:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:25:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:25:23.409776  543705 memory.go:184] no items to output this cycle
I0323 02:25:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 02:25:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:25:33.409774  543705 memory.go:184] no items to output this cycle
I0323 02:25:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 02:25:37.668361  543705 disk_info.go:125] begin check local disk info of client
I0323 02:25:37.670938  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:25:37.670945  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae80 0xc00007aec0]
E0323 02:25:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:25:43.410598  543705 memory.go:191] Add success.
I0323 02:25:43.409830  543705 cpu.go:282] Add success.
I0323 02:25:43.420318  543705 net.go:648] Add success.
I0323 02:25:43.422810  543705 net.go:770] primary dev: ETH0
I0323 02:25:43.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:25:43.422841  543705 net.go:698] Add success.
I0323 02:25:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:25:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:25:53.409788  543705 memory.go:184] no items to output this cycle
I0323 02:25:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 02:26:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:26:03.409779  543705 memory.go:184] no items to output this cycle
I0323 02:26:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 02:26:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:26:13.409830  543705 memory.go:191] Add success.
I0323 02:26:13.409835  543705 cpu.go:282] Add success.
W0323 02:26:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:26:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:26:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:26:13.419747  543705 net.go:648] Add success.
I0323 02:26:13.422237  543705 net.go:770] primary dev: ETH0
I0323 02:26:13.422252  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:26:13.422266  543705 net.go:698] Add success.
I0323 02:26:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:26:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:26:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 02:26:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:26:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 02:26:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:26:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:26:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:26:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:26:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:26:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:26:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:26:23.409797  543705 memory.go:184] no items to output this cycle
I0323 02:26:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 02:26:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:26:33.409782  543705 memory.go:184] no items to output this cycle
I0323 02:26:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 02:26:37.671025  543705 disk_info.go:125] begin check local disk info of client
I0323 02:26:37.673529  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:26:37.673536  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba40 0xc00007ba80]
E0323 02:26:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:26:43.410675  543705 memory.go:191] Add success.
I0323 02:26:43.409848  543705 cpu.go:282] Add success.
I0323 02:26:43.420475  543705 net.go:648] Add success.
I0323 02:26:43.423091  543705 net.go:770] primary dev: ETH0
I0323 02:26:43.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:26:43.423117  543705 net.go:698] Add success.
I0323 02:26:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:26:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:26:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:26:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:26:53.410264  543705 memory.go:184] no items to output this cycle
I0323 02:26:53.410290  543705 cpu.go:275] no items to output this cycle
E0323 02:27:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:27:03.409781  543705 memory.go:184] no items to output this cycle
I0323 02:27:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 02:27:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:27:13.409826  543705 memory.go:191] Add success.
I0323 02:27:13.409847  543705 cpu.go:282] Add success.
W0323 02:27:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:27:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:27:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:27:13.419757  543705 net.go:648] Add success.
I0323 02:27:13.422639  543705 net.go:770] primary dev: ETH0
I0323 02:27:13.422653  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:27:13.422667  543705 net.go:698] Add success.
I0323 02:27:13.428852  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 02:27:13.453023  543705 event_worker.go:152] Polling the log file for events...
I0323 02:27:13.464890  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"89ac6cbf-324f-47c5-801c-765f76cc9c19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:27:13.464922  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 02:27:14.455240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:27:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0323 02:27:14.455258  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:27:14.455860  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:27:14.455870  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:27:14.455875  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:27:14.456792  543705 disk_worker.go:494] system disk:vda1
I0323 02:27:14.456834  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:27:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:27:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:27:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:27:16.457944  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:27:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:27:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:27:16.472331  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:27:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:27:23.409778  543705 cpu.go:275] no items to output this cycle
I0323 02:27:23.409794  543705 memory.go:184] no items to output this cycle
E0323 02:27:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:27:33.409806  543705 memory.go:184] no items to output this cycle
I0323 02:27:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 02:27:37.673669  543705 disk_info.go:125] begin check local disk info of client
I0323 02:27:37.676265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:27:37.676271  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bdc0 0xc00007be00]
I0323 02:27:40.004699  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:27:40.004705  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:27:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:27:43.410845  543705 memory.go:191] Add success.
I0323 02:27:43.409835  543705 cpu.go:282] Add success.
I0323 02:27:43.420568  543705 net.go:648] Add success.
I0323 02:27:43.423149  543705 net.go:770] primary dev: ETH0
I0323 02:27:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:27:43.423174  543705 net.go:698] Add success.
I0323 02:27:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:27:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:27:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:27:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:27:53.409772  543705 memory.go:184] no items to output this cycle
I0323 02:27:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 02:28:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:28:03.409781  543705 cpu.go:275] no items to output this cycle
I0323 02:28:03.409793  543705 memory.go:184] no items to output this cycle
E0323 02:28:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:28:13.409921  543705 memory.go:191] Add success.
I0323 02:28:13.409945  543705 cpu.go:282] Add success.
W0323 02:28:13.409957  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:28:13.409971  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:28:13.409974  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:28:13.419731  543705 net.go:648] Add success.
I0323 02:28:13.422297  543705 net.go:770] primary dev: ETH0
I0323 02:28:13.422309  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:28:13.422320  543705 net.go:698] Add success.
I0323 02:28:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:28:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:28:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0323 02:28:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:28:14.456508  543705 disk_worker.go:494] system disk:vda1
I0323 02:28:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:28:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:28:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:28:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:28:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:28:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:28:23.409773  543705 memory.go:184] no items to output this cycle
I0323 02:28:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 02:28:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:28:33.409770  543705 memory.go:184] no items to output this cycle
I0323 02:28:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 02:28:37.676355  543705 disk_info.go:125] begin check local disk info of client
I0323 02:28:37.678935  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:28:37.678941  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b00c0 0xc0002b0100]
E0323 02:28:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:28:43.410622  543705 memory.go:191] Add success.
I0323 02:28:43.409807  543705 cpu.go:282] Add success.
I0323 02:28:43.420376  543705 net.go:648] Add success.
I0323 02:28:43.423087  543705 net.go:770] primary dev: ETH0
I0323 02:28:43.423100  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:28:43.423113  543705 net.go:698] Add success.
I0323 02:28:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:28:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:28:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:28:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:28:53.409781  543705 memory.go:184] no items to output this cycle
I0323 02:28:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 02:29:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:29:03.409802  543705 memory.go:184] no items to output this cycle
I0323 02:29:03.409811  543705 cpu.go:275] no items to output this cycle
W0323 02:29:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:29:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:29:13.409732  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 02:29:13.409919  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:29:13.409941  543705 memory.go:191] Add success.
I0323 02:29:13.410036  543705 cpu.go:282] Add success.
I0323 02:29:13.419718  543705 net.go:648] Add success.
I0323 02:29:13.422242  543705 net.go:770] primary dev: ETH0
I0323 02:29:13.422255  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:29:13.422267  543705 net.go:698] Add success.
I0323 02:29:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:29:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:29:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 02:29:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:29:14.456582  543705 disk_worker.go:494] system disk:vda1
I0323 02:29:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:29:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:29:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:29:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:29:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:29:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:29:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:29:23.409771  543705 memory.go:184] no items to output this cycle
I0323 02:29:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 02:29:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:29:33.409776  543705 memory.go:184] no items to output this cycle
I0323 02:29:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 02:29:37.679024  543705 disk_info.go:125] begin check local disk info of client
I0323 02:29:37.681566  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:29:37.681573  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4180 0xc0000c41c0]
E0323 02:29:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:29:43.410492  543705 memory.go:191] Add success.
I0323 02:29:43.409824  543705 cpu.go:282] Add success.
I0323 02:29:43.420194  543705 net.go:648] Add success.
I0323 02:29:43.422626  543705 net.go:770] primary dev: ETH0
I0323 02:29:43.422639  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:29:43.422652  543705 net.go:698] Add success.
I0323 02:29:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:29:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:29:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:29:53.409813  543705 memory.go:184] no items to output this cycle
I0323 02:29:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 02:30:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:30:03.409778  543705 memory.go:184] no items to output this cycle
I0323 02:30:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 02:30:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:30:13.409800  543705 memory.go:191] Add success.
I0323 02:30:13.409801  543705 cpu.go:282] Add success.
W0323 02:30:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:30:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:30:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:30:13.420285  543705 net.go:648] Add success.
I0323 02:30:13.423105  543705 net.go:770] primary dev: ETH0
I0323 02:30:13.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:30:13.423128  543705 net.go:698] Add success.
I0323 02:30:13.658194  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fc89d3ee-a1e1-42a6-9eb5-8b4069577ca2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:30:13.658226  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:30:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:30:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:30:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 02:30:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:30:14.456618  543705 disk_worker.go:494] system disk:vda1
I0323 02:30:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:30:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:30:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:30:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:30:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:30:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:30:23.409768  543705 memory.go:184] no items to output this cycle
I0323 02:30:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 02:30:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:30:33.409781  543705 memory.go:184] no items to output this cycle
I0323 02:30:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 02:30:37.681670  543705 disk_info.go:125] begin check local disk info of client
I0323 02:30:37.684198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:30:37.684204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ab0c0 0xc0002ab100]
I0323 02:30:40.005732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:30:40.005739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:30:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:30:43.410712  543705 memory.go:191] Add success.
I0323 02:30:43.409822  543705 cpu.go:282] Add success.
I0323 02:30:43.420410  543705 net.go:648] Add success.
I0323 02:30:43.423098  543705 net.go:770] primary dev: ETH0
I0323 02:30:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:30:43.423125  543705 net.go:698] Add success.
I0323 02:30:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:30:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:30:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:30:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:30:53.409793  543705 memory.go:184] no items to output this cycle
I0323 02:30:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:31:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:31:03.409780  543705 memory.go:184] no items to output this cycle
I0323 02:31:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 02:31:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:31:13.409788  543705 memory.go:191] Add success.
W0323 02:31:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:31:13.409816  543705 cpu.go:282] Add success.
W0323 02:31:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:31:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:31:13.420183  543705 net.go:648] Add success.
I0323 02:31:13.422744  543705 net.go:770] primary dev: ETH0
I0323 02:31:13.422762  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:31:13.422779  543705 net.go:698] Add success.
I0323 02:31:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:31:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:31:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 02:31:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:31:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 02:31:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:31:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:31:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:31:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:31:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:31:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:31:23.410393  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:31:23.410411  543705 memory.go:184] no items to output this cycle
I0323 02:31:23.410423  543705 cpu.go:275] no items to output this cycle
E0323 02:31:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:31:33.409771  543705 memory.go:184] no items to output this cycle
I0323 02:31:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 02:31:37.685062  543705 disk_info.go:125] begin check local disk info of client
I0323 02:31:37.687607  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:31:37.687613  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab400 0xc0001ab440]
E0323 02:31:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:31:43.410680  543705 memory.go:191] Add success.
I0323 02:31:43.409821  543705 cpu.go:282] Add success.
I0323 02:31:43.420371  543705 net.go:648] Add success.
I0323 02:31:43.423041  543705 net.go:770] primary dev: ETH0
I0323 02:31:43.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:31:43.423070  543705 net.go:698] Add success.
I0323 02:31:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:31:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:31:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:31:53.410215  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:31:53.410233  543705 memory.go:184] no items to output this cycle
I0323 02:31:53.410242  543705 cpu.go:275] no items to output this cycle
E0323 02:32:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:32:03.409769  543705 memory.go:184] no items to output this cycle
I0323 02:32:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:32:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:32:13.409821  543705 memory.go:191] Add success.
I0323 02:32:13.409828  543705 cpu.go:282] Add success.
W0323 02:32:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:32:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:32:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:32:13.420279  543705 net.go:648] Add success.
I0323 02:32:13.422952  543705 net.go:770] primary dev: ETH0
I0323 02:32:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:32:13.422981  543705 net.go:698] Add success.
W0323 02:32:14.456100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:32:14.456111  543705 disk_worker.go:708] disk space is not compliant
W0323 02:32:14.456113  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:32:14.456696  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:32:14.456706  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:32:14.456712  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:32:14.457697  543705 disk_worker.go:494] system disk:vda1
I0323 02:32:14.457726  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:32:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:32:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:32:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:32:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:32:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:32:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:32:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:32:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:32:23.409777  543705 memory.go:184] no items to output this cycle
I0323 02:32:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 02:32:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:32:33.409795  543705 memory.go:184] no items to output this cycle
I0323 02:32:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 02:32:37.687705  543705 disk_info.go:125] begin check local disk info of client
I0323 02:32:37.690086  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:32:37.690093  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b400 0xc00007b440]
E0323 02:32:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:32:43.410803  543705 memory.go:191] Add success.
I0323 02:32:43.409815  543705 cpu.go:282] Add success.
I0323 02:32:43.420560  543705 net.go:648] Add success.
I0323 02:32:43.423435  543705 net.go:770] primary dev: ETH0
I0323 02:32:43.423450  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:32:43.423466  543705 net.go:698] Add success.
I0323 02:32:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:32:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:32:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:32:53.410400  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:32:53.410414  543705 cpu.go:275] no items to output this cycle
I0323 02:32:53.410417  543705 memory.go:184] no items to output this cycle
E0323 02:33:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:33:03.409793  543705 memory.go:184] no items to output this cycle
I0323 02:33:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 02:33:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:33:13.409789  543705 memory.go:191] Add success.
I0323 02:33:13.409805  543705 cpu.go:282] Add success.
W0323 02:33:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:33:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:33:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:33:13.420050  543705 net.go:648] Add success.
I0323 02:33:13.422564  543705 net.go:770] primary dev: ETH0
I0323 02:33:13.422578  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:33:13.422591  543705 net.go:698] Add success.
I0323 02:33:13.470331  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4c03353-c171-4084-aa6f-ce482bfcdc38","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:33:13.470363  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:33:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:33:14.455271  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:33:14.455366  543705 disk_worker.go:708] disk space is not compliant
W0323 02:33:14.455370  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:33:14.456999  543705 disk_worker.go:494] system disk:vda1
I0323 02:33:14.457028  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:33:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:33:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:33:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:33:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:33:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:33:23.409790  543705 memory.go:184] no items to output this cycle
I0323 02:33:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 02:33:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:33:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 02:33:33.409785  543705 memory.go:184] no items to output this cycle
I0323 02:33:37.691104  543705 disk_info.go:125] begin check local disk info of client
I0323 02:33:37.693521  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:33:37.693528  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5340 0xc0000c5380]
I0323 02:33:40.008718  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:33:40.008725  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:33:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:33:43.410632  543705 memory.go:191] Add success.
I0323 02:33:43.409816  543705 cpu.go:282] Add success.
I0323 02:33:43.420348  543705 net.go:648] Add success.
I0323 02:33:43.422974  543705 net.go:770] primary dev: ETH0
I0323 02:33:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:33:43.423001  543705 net.go:698] Add success.
I0323 02:33:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:33:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:33:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:33:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:33:53.409803  543705 memory.go:184] no items to output this cycle
I0323 02:33:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 02:34:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:34:03.409784  543705 memory.go:184] no items to output this cycle
I0323 02:34:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 02:34:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:34:13.409799  543705 memory.go:191] Add success.
I0323 02:34:13.409804  543705 cpu.go:282] Add success.
W0323 02:34:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:34:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:34:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:34:13.420046  543705 net.go:648] Add success.
I0323 02:34:13.422557  543705 net.go:770] primary dev: ETH0
I0323 02:34:13.422572  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:34:13.422585  543705 net.go:698] Add success.
I0323 02:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:34:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:34:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 02:34:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:34:14.458970  543705 disk_worker.go:494] system disk:vda1
I0323 02:34:14.459001  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:34:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:34:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:34:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:34:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:34:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:34:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:34:23.409790  543705 memory.go:184] no items to output this cycle
I0323 02:34:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 02:34:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:34:33.409786  543705 memory.go:184] no items to output this cycle
I0323 02:34:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 02:34:37.693675  543705 disk_info.go:125] begin check local disk info of client
I0323 02:34:37.696110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:34:37.696116  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a240 0xc00047a280]
E0323 02:34:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:34:43.410698  543705 memory.go:191] Add success.
I0323 02:34:43.409801  543705 cpu.go:282] Add success.
I0323 02:34:43.420397  543705 net.go:648] Add success.
I0323 02:34:43.422939  543705 net.go:770] primary dev: ETH0
I0323 02:34:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:34:43.422973  543705 net.go:698] Add success.
I0323 02:34:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:34:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:34:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:34:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:34:53.409772  543705 memory.go:184] no items to output this cycle
I0323 02:34:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 02:35:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:35:03.409778  543705 memory.go:184] no items to output this cycle
I0323 02:35:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 02:35:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:35:13.409820  543705 memory.go:191] Add success.
I0323 02:35:13.409828  543705 cpu.go:282] Add success.
W0323 02:35:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:35:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:35:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:35:13.419954  543705 net.go:770] primary dev: ETH0
I0323 02:35:13.419969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:35:13.419984  543705 net.go:698] Add success.
I0323 02:35:13.420362  543705 net.go:648] Add success.
I0323 02:35:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:35:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:35:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 02:35:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:35:14.456573  543705 disk_worker.go:494] system disk:vda1
I0323 02:35:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:35:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:35:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:35:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:35:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:35:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:35:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:35:23.409764  543705 memory.go:184] no items to output this cycle
I0323 02:35:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 02:35:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:35:33.409773  543705 memory.go:184] no items to output this cycle
I0323 02:35:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 02:35:37.696202  543705 disk_info.go:125] begin check local disk info of client
I0323 02:35:37.698802  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:35:37.698809  543705 disk_info.go:196] parse disk info done, disk is : [0xc000263a00 0xc000263a40]
E0323 02:35:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:35:43.410624  543705 memory.go:191] Add success.
I0323 02:35:43.409822  543705 cpu.go:282] Add success.
I0323 02:35:43.420357  543705 net.go:648] Add success.
I0323 02:35:43.423047  543705 net.go:770] primary dev: ETH0
I0323 02:35:43.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:35:43.423072  543705 net.go:698] Add success.
I0323 02:35:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:35:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:35:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:35:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:35:53.409779  543705 cpu.go:275] no items to output this cycle
I0323 02:35:53.409786  543705 memory.go:184] no items to output this cycle
E0323 02:36:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:36:03.409800  543705 memory.go:184] no items to output this cycle
I0323 02:36:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 02:36:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:36:13.409830  543705 memory.go:191] Add success.
I0323 02:36:13.409834  543705 cpu.go:282] Add success.
W0323 02:36:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:36:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:36:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:36:13.420279  543705 net.go:648] Add success.
I0323 02:36:13.423061  543705 net.go:770] primary dev: ETH0
I0323 02:36:13.423074  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:36:13.423086  543705 net.go:698] Add success.
I0323 02:36:13.469387  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d3ffbeef-f93d-41cd-9b1f-dd91be8d7aba","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:36:13.469422  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:36:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:36:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:36:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 02:36:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:36:14.456808  543705 disk_worker.go:494] system disk:vda1
I0323 02:36:14.456844  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:36:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:36:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:36:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:36:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:36:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:36:23.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:36:23.410269  543705 memory.go:184] no items to output this cycle
I0323 02:36:23.410279  543705 cpu.go:275] no items to output this cycle
E0323 02:36:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:36:33.409771  543705 memory.go:184] no items to output this cycle
I0323 02:36:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 02:36:37.698894  543705 disk_info.go:125] begin check local disk info of client
I0323 02:36:37.701396  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:36:37.701403  543705 disk_info.go:196] parse disk info done, disk is : [0xc000263a80 0xc000263ac0]
I0323 02:36:40.009722  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:36:40.009728  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:36:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:36:43.410749  543705 memory.go:191] Add success.
I0323 02:36:43.409792  543705 cpu.go:282] Add success.
I0323 02:36:43.420484  543705 net.go:648] Add success.
I0323 02:36:43.423064  543705 net.go:770] primary dev: ETH0
I0323 02:36:43.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:36:43.423093  543705 net.go:698] Add success.
I0323 02:36:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:36:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:36:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:36:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:36:53.409808  543705 memory.go:184] no items to output this cycle
I0323 02:36:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 02:37:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:37:03.409768  543705 memory.go:184] no items to output this cycle
I0323 02:37:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 02:37:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:37:13.409824  543705 memory.go:191] Add success.
I0323 02:37:13.409830  543705 cpu.go:282] Add success.
W0323 02:37:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:37:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:37:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:37:13.420199  543705 net.go:648] Add success.
I0323 02:37:13.423188  543705 net.go:770] primary dev: ETH0
I0323 02:37:13.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:37:13.423226  543705 net.go:698] Add success.
I0323 02:37:13.452779  543705 event_worker.go:152] Polling the log file for events...
W0323 02:37:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:37:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 02:37:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:37:14.456801  543705 disk_worker.go:494] system disk:vda1
I0323 02:37:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:37:14.457096  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:37:14.457104  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:37:14.457109  543705 custom_config.go:64] query custom config with name: gpu
E0323 02:37:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:37:15.456823  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:37:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:37:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:37:16.457993  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:37:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:37:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:37:23.410256  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:37:23.410271  543705 memory.go:184] no items to output this cycle
I0323 02:37:23.410273  543705 cpu.go:275] no items to output this cycle
E0323 02:37:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:37:33.409783  543705 memory.go:184] no items to output this cycle
I0323 02:37:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 02:37:37.701671  543705 disk_info.go:125] begin check local disk info of client
I0323 02:37:37.704162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:37:37.704168  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a200 0xc00047a240]
E0323 02:37:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:37:43.410708  543705 memory.go:191] Add success.
I0323 02:37:43.409804  543705 cpu.go:282] Add success.
I0323 02:37:43.420410  543705 net.go:648] Add success.
I0323 02:37:43.423279  543705 net.go:770] primary dev: ETH0
I0323 02:37:43.423293  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:37:43.423305  543705 net.go:698] Add success.
I0323 02:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:37:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:37:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:37:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:37:53.409782  543705 memory.go:184] no items to output this cycle
I0323 02:37:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 02:38:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:38:03.409793  543705 memory.go:184] no items to output this cycle
I0323 02:38:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 02:38:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:38:13.409791  543705 memory.go:191] Add success.
I0323 02:38:13.409809  543705 cpu.go:282] Add success.
W0323 02:38:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:38:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:38:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:38:13.420153  543705 net.go:648] Add success.
I0323 02:38:13.422734  543705 net.go:770] primary dev: ETH0
I0323 02:38:13.422747  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:38:13.422758  543705 net.go:698] Add success.
I0323 02:38:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:38:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:38:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0323 02:38:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:38:14.456508  543705 disk_worker.go:494] system disk:vda1
I0323 02:38:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:38:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:38:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:38:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:38:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:38:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:38:23.409793  543705 memory.go:184] no items to output this cycle
I0323 02:38:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 02:38:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:38:33.409772  543705 memory.go:184] no items to output this cycle
I0323 02:38:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 02:38:37.704261  543705 disk_info.go:125] begin check local disk info of client
I0323 02:38:37.706761  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:38:37.706767  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0323 02:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:38:43.410705  543705 memory.go:191] Add success.
I0323 02:38:43.409795  543705 cpu.go:282] Add success.
I0323 02:38:43.420405  543705 net.go:648] Add success.
I0323 02:38:43.423177  543705 net.go:770] primary dev: ETH0
I0323 02:38:43.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:38:43.423203  543705 net.go:698] Add success.
I0323 02:38:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:38:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:38:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:38:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:38:53.409783  543705 memory.go:184] no items to output this cycle
I0323 02:38:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 02:39:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:39:03.409796  543705 memory.go:184] no items to output this cycle
I0323 02:39:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 02:39:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:39:13.409824  543705 memory.go:191] Add success.
I0323 02:39:13.409829  543705 cpu.go:282] Add success.
W0323 02:39:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:39:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:39:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:39:13.420597  543705 net.go:648] Add success.
I0323 02:39:13.423527  543705 net.go:770] primary dev: ETH0
I0323 02:39:13.423543  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:39:13.423557  543705 net.go:698] Add success.
I0323 02:39:13.658207  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a22754a3-97db-4c90-adf3-a915fb376815","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:39:13.658240  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:39:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:39:14.455224  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:39:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0323 02:39:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:39:14.456810  543705 disk_worker.go:494] system disk:vda1
I0323 02:39:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:39:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:39:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:39:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:39:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:39:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:39:23.409799  543705 memory.go:184] no items to output this cycle
I0323 02:39:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 02:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:39:33.409777  543705 memory.go:184] no items to output this cycle
I0323 02:39:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 02:39:37.708180  543705 disk_info.go:125] begin check local disk info of client
I0323 02:39:37.710735  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:39:37.710741  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039de40 0xc00039de80]
I0323 02:39:40.012750  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:39:40.012756  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:39:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:39:43.410753  543705 memory.go:191] Add success.
I0323 02:39:43.409807  543705 cpu.go:282] Add success.
I0323 02:39:43.420471  543705 net.go:648] Add success.
I0323 02:39:43.423032  543705 net.go:770] primary dev: ETH0
I0323 02:39:43.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:39:43.423061  543705 net.go:698] Add success.
I0323 02:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:39:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:39:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:39:53.410388  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:39:53.410405  543705 memory.go:184] no items to output this cycle
I0323 02:39:53.410404  543705 cpu.go:275] no items to output this cycle
E0323 02:40:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:40:03.409776  543705 memory.go:184] no items to output this cycle
I0323 02:40:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 02:40:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:40:13.409827  543705 memory.go:191] Add success.
I0323 02:40:13.409828  543705 cpu.go:282] Add success.
W0323 02:40:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:40:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:40:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:40:13.420068  543705 net.go:648] Add success.
I0323 02:40:13.422927  543705 net.go:770] primary dev: ETH0
I0323 02:40:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:40:13.422956  543705 net.go:698] Add success.
I0323 02:40:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:40:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:40:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 02:40:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:40:14.456601  543705 disk_worker.go:494] system disk:vda1
I0323 02:40:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:40:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:40:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:40:23.410522  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:40:23.410540  543705 memory.go:184] no items to output this cycle
I0323 02:40:23.410552  543705 cpu.go:275] no items to output this cycle
E0323 02:40:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:40:33.409775  543705 memory.go:184] no items to output this cycle
I0323 02:40:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 02:40:37.710825  543705 disk_info.go:125] begin check local disk info of client
I0323 02:40:37.713338  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:40:37.713344  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc80 0xc0001abcc0]
E0323 02:40:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:40:43.410690  543705 memory.go:191] Add success.
I0323 02:40:43.409798  543705 cpu.go:282] Add success.
I0323 02:40:43.420398  543705 net.go:648] Add success.
I0323 02:40:43.423071  543705 net.go:770] primary dev: ETH0
I0323 02:40:43.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:40:43.423100  543705 net.go:698] Add success.
I0323 02:40:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:40:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:40:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:40:53.409776  543705 memory.go:184] no items to output this cycle
I0323 02:40:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 02:41:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:41:03.409812  543705 memory.go:184] no items to output this cycle
I0323 02:41:03.409825  543705 cpu.go:275] no items to output this cycle
E0323 02:41:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:41:13.409795  543705 memory.go:191] Add success.
I0323 02:41:13.409812  543705 cpu.go:282] Add success.
W0323 02:41:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:41:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:41:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:41:13.420638  543705 net.go:648] Add success.
I0323 02:41:13.423428  543705 net.go:770] primary dev: ETH0
I0323 02:41:13.423441  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:41:13.423454  543705 net.go:698] Add success.
I0323 02:41:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:41:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:41:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 02:41:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:41:14.456605  543705 disk_worker.go:494] system disk:vda1
I0323 02:41:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:41:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:41:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:41:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:41:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:41:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:41:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:41:23.409761  543705 memory.go:184] no items to output this cycle
I0323 02:41:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 02:41:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:41:33.409781  543705 memory.go:184] no items to output this cycle
I0323 02:41:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 02:41:37.713672  543705 disk_info.go:125] begin check local disk info of client
I0323 02:41:37.716187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:41:37.716193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002663c0 0xc000266400]
E0323 02:41:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:41:43.410890  543705 memory.go:191] Add success.
I0323 02:41:43.409815  543705 cpu.go:282] Add success.
I0323 02:41:43.420593  543705 net.go:648] Add success.
I0323 02:41:43.423632  543705 net.go:770] primary dev: ETH0
I0323 02:41:43.423646  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:41:43.423658  543705 net.go:698] Add success.
I0323 02:41:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:41:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:41:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:41:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:41:53.409781  543705 memory.go:184] no items to output this cycle
I0323 02:41:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 02:42:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:42:03.409786  543705 memory.go:184] no items to output this cycle
I0323 02:42:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 02:42:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:42:13.409794  543705 memory.go:191] Add success.
I0323 02:42:13.409816  543705 cpu.go:282] Add success.
W0323 02:42:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:42:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:42:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:42:13.420223  543705 net.go:648] Add success.
I0323 02:42:13.422653  543705 net.go:770] primary dev: ETH0
I0323 02:42:13.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:42:13.422679  543705 net.go:698] Add success.
I0323 02:42:13.468269  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ae9a84e1-0b34-48d6-9bcb-a6329824ab9c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:42:13.468303  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 02:42:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:42:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 02:42:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:42:14.455917  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:42:14.455925  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:42:14.455931  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:42:14.456560  543705 disk_worker.go:494] system disk:vda1
I0323 02:42:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:42:15.456786  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:42:15.456794  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:42:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:42:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:42:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:42:16.457991  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:42:16.472315  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:42:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:42:23.409798  543705 memory.go:184] no items to output this cycle
I0323 02:42:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 02:42:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:42:33.409779  543705 memory.go:184] no items to output this cycle
I0323 02:42:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 02:42:37.716277  543705 disk_info.go:125] begin check local disk info of client
I0323 02:42:37.718969  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:42:37.718976  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ac080 0xc0003ac0c0]
I0323 02:42:40.013723  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:42:40.013729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:42:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:42:43.410829  543705 memory.go:191] Add success.
I0323 02:42:43.409826  543705 cpu.go:282] Add success.
I0323 02:42:43.420548  543705 net.go:648] Add success.
I0323 02:42:43.423421  543705 net.go:770] primary dev: ETH0
I0323 02:42:43.423434  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:42:43.423459  543705 net.go:698] Add success.
I0323 02:42:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:42:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:42:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:42:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:42:53.409783  543705 memory.go:184] no items to output this cycle
I0323 02:42:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 02:43:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:43:03.409820  543705 memory.go:184] no items to output this cycle
I0323 02:43:03.409840  543705 cpu.go:275] no items to output this cycle
E0323 02:43:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:43:13.409804  543705 memory.go:191] Add success.
I0323 02:43:13.409804  543705 cpu.go:282] Add success.
W0323 02:43:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:43:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:43:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:43:13.420146  543705 net.go:648] Add success.
I0323 02:43:13.422800  543705 net.go:770] primary dev: ETH0
I0323 02:43:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:43:13.422827  543705 net.go:698] Add success.
I0323 02:43:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:43:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:43:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 02:43:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:43:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 02:43:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:43:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:43:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:43:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:43:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:43:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:43:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:43:23.409788  543705 memory.go:184] no items to output this cycle
I0323 02:43:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 02:43:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:43:33.409803  543705 memory.go:184] no items to output this cycle
I0323 02:43:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 02:43:37.720236  543705 disk_info.go:125] begin check local disk info of client
I0323 02:43:37.722806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:43:37.722813  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac80 0xc0001aacc0]
E0323 02:43:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:43:43.410742  543705 memory.go:191] Add success.
I0323 02:43:43.409809  543705 cpu.go:282] Add success.
I0323 02:43:43.420511  543705 net.go:648] Add success.
I0323 02:43:43.422898  543705 net.go:770] primary dev: ETH0
I0323 02:43:43.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:43:43.422924  543705 net.go:698] Add success.
I0323 02:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:43:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:43:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:43:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:43:53.409793  543705 memory.go:184] no items to output this cycle
I0323 02:43:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 02:44:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:44:03.409779  543705 memory.go:184] no items to output this cycle
I0323 02:44:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 02:44:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:44:13.409788  543705 memory.go:191] Add success.
I0323 02:44:13.409806  543705 cpu.go:282] Add success.
W0323 02:44:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:44:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:44:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:44:13.420285  543705 net.go:648] Add success.
I0323 02:44:13.423416  543705 net.go:770] primary dev: ETH0
I0323 02:44:13.423429  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:44:13.423441  543705 net.go:698] Add success.
I0323 02:44:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:44:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:44:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 02:44:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:44:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 02:44:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:44:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:44:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:44:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:44:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:44:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:44:23.409797  543705 memory.go:184] no items to output this cycle
I0323 02:44:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 02:44:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:44:33.409782  543705 memory.go:184] no items to output this cycle
I0323 02:44:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 02:44:37.724258  543705 disk_info.go:125] begin check local disk info of client
I0323 02:44:37.726783  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:44:37.726789  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047bb00 0xc00047bb40]
E0323 02:44:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:44:43.410876  543705 memory.go:191] Add success.
I0323 02:44:43.409826  543705 cpu.go:282] Add success.
I0323 02:44:43.420649  543705 net.go:648] Add success.
I0323 02:44:43.423453  543705 net.go:770] primary dev: ETH0
I0323 02:44:43.423467  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:44:43.423479  543705 net.go:698] Add success.
I0323 02:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:44:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:44:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:44:53.409772  543705 memory.go:184] no items to output this cycle
I0323 02:44:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 02:45:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:45:03.409764  543705 memory.go:184] no items to output this cycle
I0323 02:45:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 02:45:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:45:13.409820  543705 memory.go:191] Add success.
I0323 02:45:13.409826  543705 cpu.go:282] Add success.
W0323 02:45:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:45:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:45:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:45:13.420171  543705 net.go:648] Add success.
I0323 02:45:13.422694  543705 net.go:770] primary dev: ETH0
I0323 02:45:13.422706  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:45:13.422718  543705 net.go:698] Add success.
I0323 02:45:13.493272  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e541fd9-4986-463a-8bcd-b90be3a58c41","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:45:13.493303  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:45:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:45:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:45:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0323 02:45:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:45:14.456789  543705 disk_worker.go:494] system disk:vda1
I0323 02:45:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:45:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:45:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:45:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:45:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:45:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:45:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:45:23.409773  543705 memory.go:184] no items to output this cycle
I0323 02:45:23.409774  543705 cpu.go:275] no items to output this cycle
E0323 02:45:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:45:33.409786  543705 memory.go:184] no items to output this cycle
I0323 02:45:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 02:45:37.726870  543705 disk_info.go:125] begin check local disk info of client
I0323 02:45:37.729445  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:45:37.729452  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4480 0xc0000c4540]
I0323 02:45:40.016760  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:45:40.016766  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:45:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:45:43.410679  543705 memory.go:191] Add success.
I0323 02:45:43.409789  543705 cpu.go:282] Add success.
I0323 02:45:43.420368  543705 net.go:648] Add success.
I0323 02:45:43.423058  543705 net.go:770] primary dev: ETH0
I0323 02:45:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:45:43.423083  543705 net.go:698] Add success.
I0323 02:45:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:45:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:45:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:45:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:45:53.409772  543705 memory.go:184] no items to output this cycle
I0323 02:45:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 02:46:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:46:03.409778  543705 memory.go:184] no items to output this cycle
I0323 02:46:03.409781  543705 cpu.go:275] no items to output this cycle
E0323 02:46:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:46:13.409801  543705 memory.go:191] Add success.
I0323 02:46:13.409802  543705 cpu.go:282] Add success.
W0323 02:46:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:46:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:46:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:46:13.420145  543705 net.go:648] Add success.
I0323 02:46:13.423064  543705 net.go:770] primary dev: ETH0
I0323 02:46:13.423078  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:46:13.423091  543705 net.go:698] Add success.
I0323 02:46:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:46:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:46:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 02:46:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:46:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 02:46:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:46:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:46:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:46:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:46:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:46:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:46:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:46:23.409775  543705 memory.go:184] no items to output this cycle
I0323 02:46:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 02:46:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:46:33.409785  543705 memory.go:184] no items to output this cycle
I0323 02:46:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 02:46:37.729678  543705 disk_info.go:125] begin check local disk info of client
I0323 02:46:37.732199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:46:37.732205  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047bbc0 0xc00047bc00]
E0323 02:46:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:46:43.410620  543705 memory.go:191] Add success.
I0323 02:46:43.409804  543705 cpu.go:282] Add success.
I0323 02:46:43.420333  543705 net.go:648] Add success.
I0323 02:46:43.422996  543705 net.go:770] primary dev: ETH0
I0323 02:46:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:46:43.423026  543705 net.go:698] Add success.
I0323 02:46:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:46:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:46:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:46:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:46:53.409812  543705 memory.go:184] no items to output this cycle
I0323 02:46:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 02:47:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:47:03.409775  543705 memory.go:184] no items to output this cycle
I0323 02:47:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 02:47:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:47:13.409790  543705 memory.go:191] Add success.
W0323 02:47:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:47:13.409819  543705 cpu.go:282] Add success.
W0323 02:47:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:47:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:47:13.420307  543705 net.go:648] Add success.
I0323 02:47:13.423382  543705 net.go:770] primary dev: ETH0
I0323 02:47:13.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:47:13.423410  543705 net.go:698] Add success.
I0323 02:47:13.453010  543705 event_worker.go:152] Polling the log file for events...
W0323 02:47:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:47:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 02:47:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:47:14.456929  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:47:14.456938  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:47:14.456945  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:47:14.457019  543705 disk_worker.go:494] system disk:vda1
I0323 02:47:14.457051  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:47:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:47:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:47:16.457959  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:47:16.457967  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:47:16.458012  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:47:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:47:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:47:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:47:23.409787  543705 memory.go:184] no items to output this cycle
I0323 02:47:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:47:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:47:33.409783  543705 memory.go:184] no items to output this cycle
I0323 02:47:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 02:47:37.732288  543705 disk_info.go:125] begin check local disk info of client
I0323 02:47:37.734907  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:47:37.734913  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0323 02:47:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:47:43.410665  543705 memory.go:191] Add success.
I0323 02:47:43.409815  543705 cpu.go:282] Add success.
I0323 02:47:43.420343  543705 net.go:648] Add success.
I0323 02:47:43.423368  543705 net.go:770] primary dev: ETH0
I0323 02:47:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:47:43.423394  543705 net.go:698] Add success.
I0323 02:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:47:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:47:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:47:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:47:53.409792  543705 memory.go:184] no items to output this cycle
I0323 02:47:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:48:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:48:03.409783  543705 memory.go:184] no items to output this cycle
I0323 02:48:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 02:48:13.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:48:13.409832  543705 cpu.go:282] Add success.
I0323 02:48:13.409849  543705 memory.go:191] Add success.
W0323 02:48:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:48:13.409899  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:48:13.409903  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:48:13.419898  543705 net.go:648] Add success.
I0323 02:48:13.422778  543705 net.go:770] primary dev: ETH0
I0323 02:48:13.422793  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:48:13.422805  543705 net.go:698] Add success.
I0323 02:48:13.464249  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"965f2673-8cd5-4e2f-a8c0-4729a89dc03b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:48:13.464290  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:48:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:48:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:48:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 02:48:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:48:14.456703  543705 disk_worker.go:494] system disk:vda1
I0323 02:48:14.456740  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:48:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:48:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:48:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:48:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:48:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:48:23.409802  543705 memory.go:184] no items to output this cycle
I0323 02:48:23.409817  543705 cpu.go:275] no items to output this cycle
E0323 02:48:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:48:33.409813  543705 memory.go:184] no items to output this cycle
I0323 02:48:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 02:48:37.734998  543705 disk_info.go:125] begin check local disk info of client
I0323 02:48:37.737519  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:48:37.737525  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab4c0 0xc0001ab500]
I0323 02:48:40.017729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:48:40.017735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:48:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:48:43.410611  543705 memory.go:191] Add success.
I0323 02:48:43.409819  543705 cpu.go:282] Add success.
I0323 02:48:43.420332  543705 net.go:648] Add success.
I0323 02:48:43.422947  543705 net.go:770] primary dev: ETH0
I0323 02:48:43.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:48:43.422973  543705 net.go:698] Add success.
I0323 02:48:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:48:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:48:53.409776  543705 memory.go:184] no items to output this cycle
I0323 02:48:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 02:49:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:49:03.409774  543705 memory.go:184] no items to output this cycle
I0323 02:49:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 02:49:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:49:13.409819  543705 memory.go:191] Add success.
I0323 02:49:13.409829  543705 cpu.go:282] Add success.
W0323 02:49:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:49:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:49:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:49:13.420233  543705 net.go:648] Add success.
I0323 02:49:13.423211  543705 net.go:770] primary dev: ETH0
I0323 02:49:13.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:49:13.423246  543705 net.go:698] Add success.
I0323 02:49:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:49:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:49:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 02:49:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:49:14.456626  543705 disk_worker.go:494] system disk:vda1
I0323 02:49:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:49:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:49:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:49:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:49:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:49:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:49:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:49:23.409778  543705 memory.go:184] no items to output this cycle
I0323 02:49:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 02:49:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:49:33.409771  543705 memory.go:184] no items to output this cycle
I0323 02:49:33.409922  543705 cpu.go:275] no items to output this cycle
I0323 02:49:37.737691  543705 disk_info.go:125] begin check local disk info of client
I0323 02:49:37.740192  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:49:37.740199  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a74c0 0xc0002a7500]
E0323 02:49:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:49:43.410740  543705 memory.go:191] Add success.
I0323 02:49:43.409819  543705 cpu.go:282] Add success.
I0323 02:49:43.420420  543705 net.go:648] Add success.
I0323 02:49:43.423238  543705 net.go:770] primary dev: ETH0
I0323 02:49:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:49:43.423277  543705 net.go:698] Add success.
I0323 02:49:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:49:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:49:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:49:53.410419  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:49:53.410439  543705 memory.go:184] no items to output this cycle
I0323 02:49:53.410450  543705 cpu.go:275] no items to output this cycle
E0323 02:50:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:50:03.409769  543705 memory.go:184] no items to output this cycle
I0323 02:50:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 02:50:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:50:13.409822  543705 memory.go:191] Add success.
I0323 02:50:13.409830  543705 cpu.go:282] Add success.
W0323 02:50:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:50:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:50:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:50:13.420161  543705 net.go:648] Add success.
I0323 02:50:13.423170  543705 net.go:770] primary dev: ETH0
I0323 02:50:13.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:50:13.423200  543705 net.go:698] Add success.
I0323 02:50:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:50:14.455088  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:50:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0323 02:50:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:50:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 02:50:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:50:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:50:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:50:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:50:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:50:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:50:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:50:23.409770  543705 memory.go:184] no items to output this cycle
I0323 02:50:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 02:50:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:50:33.409782  543705 memory.go:184] no items to output this cycle
I0323 02:50:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 02:50:37.741340  543705 disk_info.go:125] begin check local disk info of client
I0323 02:50:37.743869  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:50:37.743875  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472000 0xc000472040]
E0323 02:50:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:50:43.410612  543705 memory.go:191] Add success.
I0323 02:50:43.409796  543705 cpu.go:282] Add success.
I0323 02:50:43.420357  543705 net.go:648] Add success.
I0323 02:50:43.422957  543705 net.go:770] primary dev: ETH0
I0323 02:50:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:50:43.422981  543705 net.go:698] Add success.
I0323 02:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:50:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:50:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:50:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:50:53.409787  543705 memory.go:184] no items to output this cycle
I0323 02:50:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 02:51:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:51:03.409762  543705 memory.go:184] no items to output this cycle
I0323 02:51:03.409801  543705 cpu.go:275] no items to output this cycle
W0323 02:51:13.409716  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:51:13.409738  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:51:13.409744  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 02:51:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:51:13.409845  543705 cpu.go:282] Add success.
I0323 02:51:13.409858  543705 memory.go:191] Add success.
I0323 02:51:13.420061  543705 net.go:648] Add success.
I0323 02:51:13.422794  543705 net.go:770] primary dev: ETH0
I0323 02:51:13.422809  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:51:13.422825  543705 net.go:698] Add success.
I0323 02:51:13.801868  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b893bc36-1751-40ab-a6cd-013f152f96e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:51:13.801903  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:51:14.454689  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:51:14.454919  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:51:14.454928  543705 disk_worker.go:708] disk space is not compliant
W0323 02:51:14.454931  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:51:14.456482  543705 disk_worker.go:494] system disk:vda1
I0323 02:51:14.456512  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:51:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:51:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:51:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:51:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:51:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:51:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:51:23.409801  543705 memory.go:184] no items to output this cycle
I0323 02:51:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 02:51:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:51:33.409781  543705 memory.go:184] no items to output this cycle
I0323 02:51:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 02:51:37.745358  543705 disk_info.go:125] begin check local disk info of client
I0323 02:51:37.747917  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:51:37.747925  543705 disk_info.go:196] parse disk info done, disk is : [0xc000505f40 0xc00049e040]
I0323 02:51:40.020796  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:51:40.020801  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:51:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:51:43.410727  543705 memory.go:191] Add success.
I0323 02:51:43.409825  543705 cpu.go:282] Add success.
I0323 02:51:43.420431  543705 net.go:648] Add success.
I0323 02:51:43.423012  543705 net.go:770] primary dev: ETH0
I0323 02:51:43.423025  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:51:43.423037  543705 net.go:698] Add success.
I0323 02:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:51:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:51:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:51:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:51:53.409767  543705 memory.go:184] no items to output this cycle
I0323 02:51:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 02:52:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:52:03.409782  543705 memory.go:184] no items to output this cycle
I0323 02:52:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 02:52:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:52:13.409780  543705 memory.go:191] Add success.
W0323 02:52:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:52:13.409809  543705 cpu.go:282] Add success.
W0323 02:52:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:52:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:52:13.420300  543705 net.go:648] Add success.
I0323 02:52:13.422958  543705 net.go:770] primary dev: ETH0
I0323 02:52:13.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:52:13.422985  543705 net.go:698] Add success.
W0323 02:52:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:52:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 02:52:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:52:14.456957  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:52:14.456966  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:52:14.456972  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:52:14.457018  543705 disk_worker.go:494] system disk:vda1
I0323 02:52:14.457061  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:52:15.456812  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:52:15.456821  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:52:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:52:16.457914  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:52:16.457968  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:52:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:52:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:52:23.409779  543705 memory.go:184] no items to output this cycle
I0323 02:52:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 02:52:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:52:33.409766  543705 memory.go:184] no items to output this cycle
I0323 02:52:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 02:52:37.748002  543705 disk_info.go:125] begin check local disk info of client
I0323 02:52:37.750600  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:52:37.750606  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a02c0 0xc0004a0300]
E0323 02:52:43.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:52:43.410795  543705 memory.go:191] Add success.
I0323 02:52:43.409903  543705 cpu.go:282] Add success.
I0323 02:52:43.419754  543705 net.go:648] Add success.
I0323 02:52:43.422136  543705 net.go:770] primary dev: ETH0
I0323 02:52:43.422148  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:52:43.422160  543705 net.go:698] Add success.
I0323 02:52:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:52:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:52:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:52:53.409788  543705 cpu.go:275] no items to output this cycle
I0323 02:52:53.409791  543705 memory.go:184] no items to output this cycle
E0323 02:53:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:53:03.409780  543705 memory.go:184] no items to output this cycle
I0323 02:53:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 02:53:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:53:13.409797  543705 memory.go:191] Add success.
I0323 02:53:13.409801  543705 cpu.go:282] Add success.
W0323 02:53:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:53:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:53:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:53:13.420142  543705 net.go:648] Add success.
I0323 02:53:13.423175  543705 net.go:770] primary dev: ETH0
I0323 02:53:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:53:13.423199  543705 net.go:698] Add success.
I0323 02:53:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:53:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:53:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 02:53:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:53:14.456492  543705 disk_worker.go:494] system disk:vda1
I0323 02:53:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:53:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:53:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:53:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:53:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:53:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:53:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:53:23.409784  543705 memory.go:184] no items to output this cycle
I0323 02:53:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 02:53:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:53:33.409766  543705 memory.go:184] no items to output this cycle
I0323 02:53:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 02:53:37.750688  543705 disk_info.go:125] begin check local disk info of client
I0323 02:53:37.753280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:53:37.753286  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509380 0xc0005093c0]
E0323 02:53:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:53:43.410691  543705 memory.go:191] Add success.
I0323 02:53:43.409804  543705 cpu.go:282] Add success.
I0323 02:53:43.420421  543705 net.go:648] Add success.
I0323 02:53:43.423310  543705 net.go:770] primary dev: ETH0
I0323 02:53:43.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:53:43.423348  543705 net.go:698] Add success.
I0323 02:53:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:53:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:53:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:53:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:53:53.409770  543705 memory.go:184] no items to output this cycle
I0323 02:53:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 02:54:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:54:03.409778  543705 memory.go:184] no items to output this cycle
I0323 02:54:03.409778  543705 cpu.go:275] no items to output this cycle
E0323 02:54:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:54:13.409799  543705 memory.go:191] Add success.
I0323 02:54:13.409799  543705 cpu.go:282] Add success.
W0323 02:54:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:54:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:54:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:54:13.420334  543705 net.go:648] Add success.
I0323 02:54:13.423000  543705 net.go:770] primary dev: ETH0
I0323 02:54:13.423014  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:54:13.423026  543705 net.go:698] Add success.
I0323 02:54:13.498262  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c5df8486-f4dc-420d-b715-f372a013fc25","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:54:13.498297  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 02:54:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:54:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:54:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 02:54:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:54:14.456584  543705 disk_worker.go:494] system disk:vda1
I0323 02:54:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:54:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:54:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:54:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:54:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:54:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:54:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:54:23.409786  543705 memory.go:184] no items to output this cycle
I0323 02:54:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 02:54:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:54:33.409777  543705 memory.go:184] no items to output this cycle
I0323 02:54:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 02:54:37.753672  543705 disk_info.go:125] begin check local disk info of client
I0323 02:54:37.756212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:54:37.756218  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ee00 0xc00035ee40]
I0323 02:54:40.021725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:54:40.021731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:54:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:54:43.410659  543705 memory.go:191] Add success.
I0323 02:54:43.409823  543705 cpu.go:282] Add success.
I0323 02:54:43.420355  543705 net.go:648] Add success.
I0323 02:54:43.422814  543705 net.go:770] primary dev: ETH0
I0323 02:54:43.422830  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:54:43.422864  543705 net.go:698] Add success.
I0323 02:54:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:54:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:54:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:54:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:54:53.409794  543705 memory.go:184] no items to output this cycle
I0323 02:54:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 02:55:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:55:03.409772  543705 memory.go:184] no items to output this cycle
I0323 02:55:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 02:55:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:55:13.409833  543705 memory.go:191] Add success.
I0323 02:55:13.409839  543705 cpu.go:282] Add success.
W0323 02:55:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:55:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:55:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:55:13.420317  543705 net.go:648] Add success.
I0323 02:55:13.422945  543705 net.go:770] primary dev: ETH0
I0323 02:55:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:55:13.422971  543705 net.go:698] Add success.
I0323 02:55:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:55:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:55:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0323 02:55:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:55:14.456504  543705 disk_worker.go:494] system disk:vda1
I0323 02:55:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:55:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:55:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:55:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:55:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:55:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:55:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:55:23.409815  543705 memory.go:184] no items to output this cycle
I0323 02:55:23.409826  543705 cpu.go:275] no items to output this cycle
E0323 02:55:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:55:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 02:55:33.409795  543705 memory.go:184] no items to output this cycle
I0323 02:55:37.756302  543705 disk_info.go:125] begin check local disk info of client
I0323 02:55:37.758879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:55:37.758885  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b24c0 0xc0003b2500]
E0323 02:55:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:55:43.410637  543705 memory.go:191] Add success.
I0323 02:55:43.409819  543705 cpu.go:282] Add success.
I0323 02:55:43.420361  543705 net.go:648] Add success.
I0323 02:55:43.422801  543705 net.go:770] primary dev: ETH0
I0323 02:55:43.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:55:43.422826  543705 net.go:698] Add success.
I0323 02:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:55:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:55:53.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:55:53.409910  543705 memory.go:184] no items to output this cycle
I0323 02:55:53.410040  543705 cpu.go:275] no items to output this cycle
E0323 02:56:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:56:03.409780  543705 memory.go:184] no items to output this cycle
I0323 02:56:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 02:56:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:56:13.409804  543705 memory.go:191] Add success.
W0323 02:56:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 02:56:13.409836  543705 cpu.go:282] Add success.
W0323 02:56:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:56:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:56:13.420140  543705 net.go:648] Add success.
I0323 02:56:13.422696  543705 net.go:770] primary dev: ETH0
I0323 02:56:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:56:13.422731  543705 net.go:698] Add success.
I0323 02:56:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:56:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:56:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 02:56:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:56:14.456511  543705 disk_worker.go:494] system disk:vda1
I0323 02:56:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:56:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:56:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:56:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:56:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:56:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:56:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:56:23.409774  543705 memory.go:184] no items to output this cycle
I0323 02:56:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 02:56:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:56:33.409812  543705 memory.go:184] no items to output this cycle
I0323 02:56:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 02:56:37.760437  543705 disk_info.go:125] begin check local disk info of client
I0323 02:56:37.763034  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:56:37.763042  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385400 0xc000385440]
E0323 02:56:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:56:43.410670  543705 memory.go:191] Add success.
I0323 02:56:43.409838  543705 cpu.go:282] Add success.
I0323 02:56:43.420391  543705 net.go:648] Add success.
I0323 02:56:43.423246  543705 net.go:770] primary dev: ETH0
I0323 02:56:43.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:56:43.423270  543705 net.go:698] Add success.
I0323 02:56:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:56:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:56:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:56:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:56:53.409800  543705 cpu.go:275] no items to output this cycle
I0323 02:56:53.409815  543705 memory.go:184] no items to output this cycle
E0323 02:57:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:57:03.409797  543705 memory.go:184] no items to output this cycle
I0323 02:57:03.409801  543705 cpu.go:275] no items to output this cycle
W0323 02:57:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:57:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:57:13.409736  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 02:57:13.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:57:13.409828  543705 cpu.go:282] Add success.
I0323 02:57:13.409839  543705 memory.go:191] Add success.
I0323 02:57:13.420134  543705 net.go:648] Add success.
I0323 02:57:13.428972  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 02:57:13.429046  543705 net.go:770] primary dev: ETH0
I0323 02:57:13.429058  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:57:13.429070  543705 net.go:698] Add success.
I0323 02:57:13.453595  543705 event_worker.go:152] Polling the log file for events...
I0323 02:57:13.469428  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"359fa68e-665f-4107-8f5d-f46624cca8a9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 02:57:13.469460  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 02:57:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:57:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 02:57:14.455215  543705 disk_worker.go:728] disk inode is not compliant
E0323 02:57:14.455971  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 02:57:14.455980  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 02:57:14.455986  543705 custom_config.go:64] query custom config with name: gpu
I0323 02:57:14.456819  543705 disk_worker.go:494] system disk:vda1
I0323 02:57:14.456857  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 02:57:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 02:57:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:57:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 02:57:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 02:57:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:57:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:57:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:57:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:57:23.409794  543705 memory.go:184] no items to output this cycle
I0323 02:57:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 02:57:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:57:33.409810  543705 memory.go:184] no items to output this cycle
I0323 02:57:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 02:57:37.763124  543705 disk_info.go:125] begin check local disk info of client
I0323 02:57:37.765879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:57:37.765885  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9840 0xc0003e9880]
I0323 02:57:40.024822  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 02:57:40.024828  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 02:57:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:57:43.410697  543705 memory.go:191] Add success.
I0323 02:57:43.409793  543705 cpu.go:282] Add success.
I0323 02:57:43.420406  543705 net.go:648] Add success.
I0323 02:57:43.423015  543705 net.go:770] primary dev: ETH0
I0323 02:57:43.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:57:43.423045  543705 net.go:698] Add success.
I0323 02:57:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:57:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:57:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:57:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:57:53.409774  543705 memory.go:184] no items to output this cycle
I0323 02:57:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 02:58:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:58:03.409782  543705 memory.go:184] no items to output this cycle
I0323 02:58:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 02:58:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:58:13.409822  543705 memory.go:191] Add success.
I0323 02:58:13.409846  543705 cpu.go:282] Add success.
W0323 02:58:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:58:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:58:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:58:13.420188  543705 net.go:648] Add success.
I0323 02:58:13.422805  543705 net.go:770] primary dev: ETH0
I0323 02:58:13.422817  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:58:13.422828  543705 net.go:698] Add success.
I0323 02:58:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:58:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:58:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 02:58:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:58:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 02:58:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:58:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:58:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:58:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:58:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:58:23.409771  543705 memory.go:184] no items to output this cycle
I0323 02:58:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 02:58:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:58:33.409795  543705 memory.go:184] no items to output this cycle
I0323 02:58:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 02:58:37.765967  543705 disk_info.go:125] begin check local disk info of client
I0323 02:58:37.768484  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:58:37.768491  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c03c0 0xc0003c0400]
E0323 02:58:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:58:43.410604  543705 memory.go:191] Add success.
I0323 02:58:43.409811  543705 cpu.go:282] Add success.
I0323 02:58:43.420292  543705 net.go:648] Add success.
I0323 02:58:43.423035  543705 net.go:770] primary dev: ETH0
I0323 02:58:43.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:58:43.423060  543705 net.go:698] Add success.
I0323 02:58:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:58:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:58:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:58:53.409777  543705 memory.go:184] no items to output this cycle
I0323 02:58:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 02:59:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:59:03.409789  543705 memory.go:184] no items to output this cycle
I0323 02:59:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 02:59:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:59:13.409825  543705 memory.go:191] Add success.
I0323 02:59:13.409829  543705 cpu.go:282] Add success.
W0323 02:59:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 02:59:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 02:59:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 02:59:13.420187  543705 net.go:648] Add success.
I0323 02:59:13.423169  543705 net.go:770] primary dev: ETH0
I0323 02:59:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:59:13.423205  543705 net.go:698] Add success.
I0323 02:59:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 02:59:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 02:59:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 02:59:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 02:59:14.456601  543705 disk_worker.go:494] system disk:vda1
I0323 02:59:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 02:59:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 02:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:59:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:59:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 02:59:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 02:59:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:59:23.409762  543705 memory.go:184] no items to output this cycle
I0323 02:59:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 02:59:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:59:33.409783  543705 memory.go:184] no items to output this cycle
I0323 02:59:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 02:59:37.768573  543705 disk_info.go:125] begin check local disk info of client
I0323 02:59:37.771176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 02:59:37.771183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004756c0 0xc000475700]
E0323 02:59:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:59:43.410660  543705 memory.go:191] Add success.
I0323 02:59:43.409810  543705 cpu.go:282] Add success.
I0323 02:59:43.420377  543705 net.go:648] Add success.
I0323 02:59:43.422964  543705 net.go:770] primary dev: ETH0
I0323 02:59:43.422977  543705 net.go:802] Send network stats successfully!,count is 6
I0323 02:59:43.422989  543705 net.go:698] Add success.
I0323 02:59:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 02:59:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 02:59:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 02:59:53.410289  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 02:59:53.410309  543705 memory.go:184] no items to output this cycle
I0323 02:59:53.410311  543705 cpu.go:275] no items to output this cycle
I0323 03:00:03.409920  543705 cpu.go:275] no items to output this cycle
E0323 03:00:03.410015  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:00:03.410038  543705 memory.go:184] no items to output this cycle
E0323 03:00:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:00:13.409790  543705 memory.go:191] Add success.
W0323 03:00:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:00:13.409830  543705 cpu.go:282] Add success.
I0323 03:00:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:00:13.420208  543705 net.go:648] Add success.
I0323 03:00:13.423041  543705 net.go:770] primary dev: ETH0
I0323 03:00:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:00:13.423066  543705 net.go:698] Add success.
I0323 03:00:13.468535  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6958b27c-512d-47fd-935c-d5214868104f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:00:13.468567  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:00:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:00:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:00:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0323 03:00:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:00:14.456598  543705 disk_worker.go:494] system disk:vda1
I0323 03:00:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:00:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:00:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:00:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:00:23.409765  543705 memory.go:184] no items to output this cycle
I0323 03:00:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 03:00:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:00:33.409805  543705 memory.go:184] no items to output this cycle
I0323 03:00:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 03:00:37.772489  543705 disk_info.go:125] begin check local disk info of client
I0323 03:00:37.775062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:00:37.775069  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051ddc0 0xc00051de00]
I0323 03:00:40.025728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:00:40.025735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:00:43.409739  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:00:43.410701  543705 memory.go:191] Add success.
I0323 03:00:43.409803  543705 cpu.go:282] Add success.
I0323 03:00:43.420382  543705 net.go:648] Add success.
I0323 03:00:43.423222  543705 net.go:770] primary dev: ETH0
I0323 03:00:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:00:43.423247  543705 net.go:698] Add success.
I0323 03:00:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:00:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:00:53.410361  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:00:53.410384  543705 memory.go:184] no items to output this cycle
I0323 03:00:53.410407  543705 cpu.go:275] no items to output this cycle
I0323 03:01:03.409933  543705 cpu.go:275] no items to output this cycle
E0323 03:01:03.410097  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:01:03.410109  543705 memory.go:184] no items to output this cycle
E0323 03:01:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:01:13.409798  543705 memory.go:191] Add success.
I0323 03:01:13.409817  543705 cpu.go:282] Add success.
W0323 03:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:01:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:01:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:01:13.420497  543705 net.go:648] Add success.
I0323 03:01:13.423128  543705 net.go:770] primary dev: ETH0
I0323 03:01:13.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:01:13.423157  543705 net.go:698] Add success.
I0323 03:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:01:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:01:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 03:01:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:01:14.456530  543705 disk_worker.go:494] system disk:vda1
I0323 03:01:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:01:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:01:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:01:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:01:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:01:23.409768  543705 memory.go:184] no items to output this cycle
I0323 03:01:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:01:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:01:33.409768  543705 memory.go:184] no items to output this cycle
I0323 03:01:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 03:01:37.776521  543705 disk_info.go:125] begin check local disk info of client
I0323 03:01:37.779108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:01:37.779115  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b62c0 0xc0002b6300]
E0323 03:01:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:01:43.410962  543705 memory.go:191] Add success.
I0323 03:01:43.409820  543705 cpu.go:282] Add success.
I0323 03:01:43.420675  543705 net.go:648] Add success.
I0323 03:01:43.423487  543705 net.go:770] primary dev: ETH0
I0323 03:01:43.423502  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:01:43.423514  543705 net.go:698] Add success.
I0323 03:01:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:01:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:01:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:01:53.410381  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:01:53.410398  543705 memory.go:184] no items to output this cycle
I0323 03:01:53.410407  543705 cpu.go:275] no items to output this cycle
E0323 03:02:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:02:03.409765  543705 memory.go:184] no items to output this cycle
I0323 03:02:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 03:02:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:02:13.409814  543705 memory.go:191] Add success.
I0323 03:02:13.409823  543705 cpu.go:282] Add success.
W0323 03:02:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:02:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:02:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:02:13.420204  543705 net.go:648] Add success.
I0323 03:02:13.423443  543705 net.go:770] primary dev: ETH0
I0323 03:02:13.423457  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:02:13.423472  543705 net.go:698] Add success.
W0323 03:02:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:02:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 03:02:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:02:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:02:14.455919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:02:14.455925  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:02:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 03:02:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:02:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:02:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:02:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:02:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:02:16.457988  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:02:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:02:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:02:23.410272  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:02:23.410291  543705 memory.go:184] no items to output this cycle
I0323 03:02:23.410313  543705 cpu.go:275] no items to output this cycle
E0323 03:02:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:02:33.409799  543705 memory.go:184] no items to output this cycle
I0323 03:02:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 03:02:37.780538  543705 disk_info.go:125] begin check local disk info of client
I0323 03:02:37.783090  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:02:37.783096  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306ec0 0xc000306f00]
E0323 03:02:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:02:43.410703  543705 memory.go:191] Add success.
I0323 03:02:43.409797  543705 cpu.go:282] Add success.
I0323 03:02:43.420385  543705 net.go:648] Add success.
I0323 03:02:43.423214  543705 net.go:770] primary dev: ETH0
I0323 03:02:43.423229  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:02:43.423244  543705 net.go:698] Add success.
I0323 03:02:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:02:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:02:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:02:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:02:53.409812  543705 memory.go:184] no items to output this cycle
I0323 03:02:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 03:03:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:03:03.409775  543705 memory.go:184] no items to output this cycle
I0323 03:03:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 03:03:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:03:13.409793  543705 memory.go:191] Add success.
W0323 03:03:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 03:03:13.409823  543705 cpu.go:282] Add success.
W0323 03:03:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:03:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:03:13.420354  543705 net.go:648] Add success.
I0323 03:03:13.423012  543705 net.go:770] primary dev: ETH0
I0323 03:03:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:03:13.423036  543705 net.go:698] Add success.
I0323 03:03:13.470077  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"46f20868-a181-40dc-be4e-937eebb98627","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:03:13.470110  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:03:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:03:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:03:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 03:03:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:03:14.456730  543705 disk_worker.go:494] system disk:vda1
I0323 03:03:14.456760  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:03:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:03:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:03:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:03:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:03:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:03:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:03:23.409766  543705 memory.go:184] no items to output this cycle
I0323 03:03:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 03:03:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:03:33.409766  543705 memory.go:184] no items to output this cycle
I0323 03:03:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 03:03:37.784557  543705 disk_info.go:125] begin check local disk info of client
I0323 03:03:37.787154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:03:37.787161  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b4c0 0xc00047b500]
I0323 03:03:40.028820  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:03:40.028826  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:03:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:03:43.410699  543705 memory.go:191] Add success.
I0323 03:03:43.409797  543705 cpu.go:282] Add success.
I0323 03:03:43.420404  543705 net.go:648] Add success.
I0323 03:03:43.423071  543705 net.go:770] primary dev: ETH0
I0323 03:03:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:03:43.423098  543705 net.go:698] Add success.
I0323 03:03:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:03:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:03:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:03:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:03:53.409776  543705 memory.go:184] no items to output this cycle
I0323 03:03:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 03:04:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:04:03.409776  543705 memory.go:184] no items to output this cycle
I0323 03:04:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 03:04:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:04:13.409811  543705 memory.go:191] Add success.
I0323 03:04:13.409813  543705 cpu.go:282] Add success.
W0323 03:04:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:04:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:04:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:04:13.420155  543705 net.go:648] Add success.
I0323 03:04:13.423142  543705 net.go:770] primary dev: ETH0
I0323 03:04:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:04:13.423183  543705 net.go:698] Add success.
I0323 03:04:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:04:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:04:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0323 03:04:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:04:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 03:04:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:04:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:04:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:04:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:04:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:04:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:04:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:04:23.409773  543705 memory.go:184] no items to output this cycle
I0323 03:04:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 03:04:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:04:33.409767  543705 memory.go:184] no items to output this cycle
I0323 03:04:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 03:04:37.788579  543705 disk_info.go:125] begin check local disk info of client
I0323 03:04:37.791096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:04:37.791102  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0323 03:04:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:04:43.410686  543705 memory.go:191] Add success.
I0323 03:04:43.409796  543705 cpu.go:282] Add success.
I0323 03:04:43.420528  543705 net.go:648] Add success.
I0323 03:04:43.423057  543705 net.go:770] primary dev: ETH0
I0323 03:04:43.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:04:43.423083  543705 net.go:698] Add success.
I0323 03:04:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:04:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:04:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:04:53.410350  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:04:53.410366  543705 memory.go:184] no items to output this cycle
I0323 03:04:53.410366  543705 cpu.go:275] no items to output this cycle
E0323 03:05:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:05:03.409780  543705 memory.go:184] no items to output this cycle
I0323 03:05:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 03:05:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:05:13.409792  543705 memory.go:191] Add success.
W0323 03:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 03:05:13.409818  543705 cpu.go:282] Add success.
W0323 03:05:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:05:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:05:13.419716  543705 net.go:648] Add success.
I0323 03:05:13.422355  543705 net.go:770] primary dev: ETH0
I0323 03:05:13.422368  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:05:13.422380  543705 net.go:698] Add success.
I0323 03:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:05:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:05:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 03:05:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:05:14.456636  543705 disk_worker.go:494] system disk:vda1
I0323 03:05:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:05:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:05:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:05:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:05:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:05:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:05:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:05:23.409767  543705 memory.go:184] no items to output this cycle
I0323 03:05:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 03:05:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:05:33.409765  543705 memory.go:184] no items to output this cycle
I0323 03:05:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 03:05:37.792594  543705 disk_info.go:125] begin check local disk info of client
I0323 03:05:37.795146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:05:37.795151  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aad80 0xc0002aadc0]
E0323 03:05:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:05:43.410678  543705 memory.go:191] Add success.
I0323 03:05:43.409816  543705 cpu.go:282] Add success.
I0323 03:05:43.420386  543705 net.go:648] Add success.
I0323 03:05:43.422969  543705 net.go:770] primary dev: ETH0
I0323 03:05:43.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:05:43.423002  543705 net.go:698] Add success.
I0323 03:05:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:05:53.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:05:53.410256  543705 memory.go:184] no items to output this cycle
I0323 03:05:53.410288  543705 cpu.go:275] no items to output this cycle
E0323 03:06:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:06:03.409798  543705 memory.go:184] no items to output this cycle
I0323 03:06:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 03:06:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:06:13.409788  543705 memory.go:191] Add success.
W0323 03:06:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 03:06:13.409823  543705 cpu.go:282] Add success.
W0323 03:06:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:06:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:06:13.419741  543705 net.go:648] Add success.
I0323 03:06:13.422531  543705 net.go:770] primary dev: ETH0
I0323 03:06:13.422547  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:06:13.422565  543705 net.go:698] Add success.
I0323 03:06:13.488530  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6766edc7-f8e4-4220-b26d-fe60de285832","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:06:13.488561  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:06:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:06:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:06:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 03:06:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:06:14.456591  543705 disk_worker.go:494] system disk:vda1
I0323 03:06:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:06:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:06:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:06:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:06:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:06:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:06:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:06:23.409783  543705 memory.go:184] no items to output this cycle
I0323 03:06:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 03:06:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:06:33.409794  543705 memory.go:184] no items to output this cycle
I0323 03:06:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 03:06:37.795237  543705 disk_info.go:125] begin check local disk info of client
I0323 03:06:37.797730  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:06:37.797736  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dd1c0 0xc0003dd200]
I0323 03:06:40.029733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:06:40.029738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:06:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:06:43.410763  543705 memory.go:191] Add success.
I0323 03:06:43.409814  543705 cpu.go:282] Add success.
I0323 03:06:43.420471  543705 net.go:648] Add success.
I0323 03:06:43.423198  543705 net.go:770] primary dev: ETH0
I0323 03:06:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:06:43.423224  543705 net.go:698] Add success.
I0323 03:06:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:06:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:06:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:06:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:06:53.409775  543705 memory.go:184] no items to output this cycle
I0323 03:06:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 03:07:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:07:03.409796  543705 memory.go:184] no items to output this cycle
I0323 03:07:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 03:07:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:07:13.409823  543705 memory.go:191] Add success.
I0323 03:07:13.409835  543705 cpu.go:282] Add success.
W0323 03:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:07:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:07:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:07:13.420187  543705 net.go:648] Add success.
I0323 03:07:13.423602  543705 net.go:770] primary dev: ETH0
I0323 03:07:13.423616  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:07:13.423631  543705 net.go:698] Add success.
I0323 03:07:13.452773  543705 event_worker.go:152] Polling the log file for events...
W0323 03:07:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:07:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 03:07:14.455171  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:07:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:07:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:07:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:07:14.457027  543705 disk_worker.go:494] system disk:vda1
I0323 03:07:14.457056  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:07:15.456784  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:07:15.456793  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:07:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:07:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:07:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:07:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:07:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:07:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:07:23.409773  543705 memory.go:184] no items to output this cycle
I0323 03:07:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 03:07:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:07:33.409795  543705 memory.go:184] no items to output this cycle
I0323 03:07:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 03:07:37.799628  543705 disk_info.go:125] begin check local disk info of client
I0323 03:07:37.802189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:07:37.802195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abdc0 0xc0002abe00]
E0323 03:07:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:07:43.410709  543705 memory.go:191] Add success.
I0323 03:07:43.409807  543705 cpu.go:282] Add success.
I0323 03:07:43.420407  543705 net.go:648] Add success.
I0323 03:07:43.423038  543705 net.go:770] primary dev: ETH0
I0323 03:07:43.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:07:43.423063  543705 net.go:698] Add success.
I0323 03:07:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:07:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:07:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:07:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:07:53.409796  543705 memory.go:184] no items to output this cycle
I0323 03:07:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 03:08:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:08:03.409773  543705 memory.go:184] no items to output this cycle
I0323 03:08:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 03:08:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:08:13.409830  543705 memory.go:191] Add success.
I0323 03:08:13.409837  543705 cpu.go:282] Add success.
W0323 03:08:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:08:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:08:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:08:13.420177  543705 net.go:648] Add success.
I0323 03:08:13.422930  543705 net.go:770] primary dev: ETH0
I0323 03:08:13.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:08:13.422997  543705 net.go:698] Add success.
I0323 03:08:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:08:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:08:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 03:08:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:08:14.456586  543705 disk_worker.go:494] system disk:vda1
I0323 03:08:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:08:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:08:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:08:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:08:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:08:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:08:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:08:23.409795  543705 memory.go:184] no items to output this cycle
I0323 03:08:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 03:08:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:08:33.409777  543705 memory.go:184] no items to output this cycle
I0323 03:08:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 03:08:37.802277  543705 disk_info.go:125] begin check local disk info of client
I0323 03:08:37.804792  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:08:37.804798  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b400 0xc00047b440]
E0323 03:08:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:08:43.410627  543705 memory.go:191] Add success.
I0323 03:08:43.409817  543705 cpu.go:282] Add success.
I0323 03:08:43.420340  543705 net.go:648] Add success.
I0323 03:08:43.422995  543705 net.go:770] primary dev: ETH0
I0323 03:08:43.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:08:43.423025  543705 net.go:698] Add success.
I0323 03:08:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:08:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:08:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:08:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:08:53.409785  543705 memory.go:184] no items to output this cycle
I0323 03:08:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 03:09:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:09:03.409788  543705 memory.go:184] no items to output this cycle
I0323 03:09:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 03:09:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:09:13.409829  543705 memory.go:191] Add success.
I0323 03:09:13.409835  543705 cpu.go:282] Add success.
W0323 03:09:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:09:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:09:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:09:13.420352  543705 net.go:648] Add success.
I0323 03:09:13.423299  543705 net.go:770] primary dev: ETH0
I0323 03:09:13.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:09:13.423326  543705 net.go:698] Add success.
I0323 03:09:13.471195  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6f28bb6e-0f74-4e70-9082-221ad8841a63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:09:13.471227  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:09:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:09:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:09:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 03:09:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:09:14.456741  543705 disk_worker.go:494] system disk:vda1
I0323 03:09:14.456777  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:09:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:09:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:09:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:09:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:09:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:09:23.409766  543705 memory.go:184] no items to output this cycle
I0323 03:09:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 03:09:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:09:33.409764  543705 memory.go:184] no items to output this cycle
I0323 03:09:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 03:09:37.805672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:09:37.808156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:09:37.808161  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b740 0xc00047b780]
I0323 03:09:40.032850  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:09:40.032856  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:09:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:09:43.410824  543705 memory.go:191] Add success.
I0323 03:09:43.409803  543705 cpu.go:282] Add success.
I0323 03:09:43.420537  543705 net.go:648] Add success.
I0323 03:09:43.423324  543705 net.go:770] primary dev: ETH0
I0323 03:09:43.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:09:43.423350  543705 net.go:698] Add success.
I0323 03:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:09:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:09:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:09:53.409772  543705 memory.go:184] no items to output this cycle
I0323 03:09:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 03:10:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:10:03.409810  543705 memory.go:184] no items to output this cycle
I0323 03:10:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 03:10:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:10:13.409790  543705 memory.go:191] Add success.
I0323 03:10:13.409811  543705 cpu.go:282] Add success.
W0323 03:10:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:10:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:10:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:10:13.420166  543705 net.go:648] Add success.
I0323 03:10:13.422943  543705 net.go:770] primary dev: ETH0
I0323 03:10:13.422958  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:10:13.422971  543705 net.go:698] Add success.
I0323 03:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:10:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:10:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 03:10:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:10:14.456977  543705 disk_worker.go:494] system disk:vda1
I0323 03:10:14.457005  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:10:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:10:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:10:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:10:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:10:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:10:23.410278  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:10:23.410298  543705 memory.go:184] no items to output this cycle
I0323 03:10:23.410312  543705 cpu.go:275] no items to output this cycle
E0323 03:10:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:10:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 03:10:33.409784  543705 memory.go:184] no items to output this cycle
I0323 03:10:37.809673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:10:37.812181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:10:37.812187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b980 0xc00007b9c0]
E0323 03:10:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:10:43.410746  543705 memory.go:191] Add success.
I0323 03:10:43.409796  543705 cpu.go:282] Add success.
I0323 03:10:43.420432  543705 net.go:648] Add success.
I0323 03:10:43.423186  543705 net.go:770] primary dev: ETH0
I0323 03:10:43.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:10:43.423211  543705 net.go:698] Add success.
I0323 03:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:10:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:10:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:10:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:10:53.409782  543705 memory.go:184] no items to output this cycle
I0323 03:10:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 03:11:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:11:03.409783  543705 memory.go:184] no items to output this cycle
I0323 03:11:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 03:11:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:11:13.409798  543705 memory.go:191] Add success.
I0323 03:11:13.409798  543705 cpu.go:282] Add success.
W0323 03:11:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:11:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:11:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:11:13.420170  543705 net.go:648] Add success.
I0323 03:11:13.423205  543705 net.go:770] primary dev: ETH0
I0323 03:11:13.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:11:13.423232  543705 net.go:698] Add success.
I0323 03:11:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:11:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:11:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 03:11:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:11:14.456633  543705 disk_worker.go:494] system disk:vda1
I0323 03:11:14.456673  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:11:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:11:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:11:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:11:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:11:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:11:23.409775  543705 memory.go:184] no items to output this cycle
I0323 03:11:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 03:11:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:11:33.409779  543705 memory.go:184] no items to output this cycle
I0323 03:11:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 03:11:37.813677  543705 disk_info.go:125] begin check local disk info of client
I0323 03:11:37.816198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:11:37.816204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abbc0 0xc0001abc00]
E0323 03:11:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:11:43.410730  543705 memory.go:191] Add success.
I0323 03:11:43.409816  543705 cpu.go:282] Add success.
I0323 03:11:43.420435  543705 net.go:648] Add success.
I0323 03:11:43.423114  543705 net.go:770] primary dev: ETH0
I0323 03:11:43.423129  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:11:43.423144  543705 net.go:698] Add success.
I0323 03:11:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:11:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:11:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:11:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:11:53.409798  543705 memory.go:184] no items to output this cycle
I0323 03:11:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 03:12:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:12:03.409783  543705 memory.go:184] no items to output this cycle
I0323 03:12:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 03:12:13.410307  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:12:13.410336  543705 memory.go:191] Add success.
I0323 03:12:13.410338  543705 cpu.go:282] Add success.
W0323 03:12:13.410364  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:12:13.410377  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:12:13.410380  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:12:13.419836  543705 net.go:648] Add success.
I0323 03:12:13.422323  543705 net.go:770] primary dev: ETH0
I0323 03:12:13.422336  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:12:13.422349  543705 net.go:698] Add success.
I0323 03:12:13.463970  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f926b7c7-7b9c-4ebc-8bbf-dd967d2f45d9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:12:13.464005  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 03:12:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:12:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 03:12:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:12:14.456188  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:12:14.456198  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:12:14.456204  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:12:14.457174  543705 disk_worker.go:494] system disk:vda1
I0323 03:12:14.457214  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:12:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:12:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:12:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:12:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:12:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:12:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:12:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:12:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:12:23.409795  543705 memory.go:184] no items to output this cycle
I0323 03:12:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:12:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:12:33.409774  543705 memory.go:184] no items to output this cycle
I0323 03:12:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 03:12:37.817674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:12:37.820176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:12:37.820182  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d00 0xc0000c5d40]
I0323 03:12:40.033725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:12:40.033731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:12:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:12:43.410566  543705 memory.go:191] Add success.
I0323 03:12:43.409799  543705 cpu.go:282] Add success.
I0323 03:12:43.420267  543705 net.go:648] Add success.
I0323 03:12:43.422864  543705 net.go:770] primary dev: ETH0
I0323 03:12:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:12:43.422889  543705 net.go:698] Add success.
I0323 03:12:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:12:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:12:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:12:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:12:53.409814  543705 memory.go:184] no items to output this cycle
I0323 03:12:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 03:13:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:13:03.409809  543705 memory.go:184] no items to output this cycle
I0323 03:13:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 03:13:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:13:13.409800  543705 cpu.go:282] Add success.
I0323 03:13:13.409801  543705 memory.go:191] Add success.
W0323 03:13:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:13:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:13:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:13:13.420154  543705 net.go:648] Add success.
I0323 03:13:13.422833  543705 net.go:770] primary dev: ETH0
I0323 03:13:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:13:13.422862  543705 net.go:698] Add success.
I0323 03:13:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:13:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:13:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 03:13:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:13:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 03:13:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:13:15.456008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:13:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:13:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:13:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:13:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:13:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:13:23.409763  543705 memory.go:184] no items to output this cycle
I0323 03:13:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 03:13:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:13:33.409777  543705 memory.go:184] no items to output this cycle
I0323 03:13:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 03:13:37.821673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:13:37.824166  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:13:37.824173  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262b40 0xc000262b80]
E0323 03:13:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:13:43.410670  543705 memory.go:191] Add success.
I0323 03:13:43.409796  543705 cpu.go:282] Add success.
I0323 03:13:43.420396  543705 net.go:648] Add success.
I0323 03:13:43.423049  543705 net.go:770] primary dev: ETH0
I0323 03:13:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:13:43.423080  543705 net.go:698] Add success.
I0323 03:13:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:13:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:13:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:13:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:13:53.409798  543705 memory.go:184] no items to output this cycle
I0323 03:13:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:14:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:14:03.409784  543705 cpu.go:275] no items to output this cycle
I0323 03:14:03.409786  543705 memory.go:184] no items to output this cycle
E0323 03:14:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:14:13.409803  543705 memory.go:191] Add success.
I0323 03:14:13.409806  543705 cpu.go:282] Add success.
W0323 03:14:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:14:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:14:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:14:13.420181  543705 net.go:648] Add success.
I0323 03:14:13.422840  543705 net.go:770] primary dev: ETH0
I0323 03:14:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:14:13.422866  543705 net.go:698] Add success.
I0323 03:14:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:14:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:14:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0323 03:14:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:14:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 03:14:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:14:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:14:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:14:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:14:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:14:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:14:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:14:23.409760  543705 memory.go:184] no items to output this cycle
I0323 03:14:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 03:14:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:14:33.409796  543705 memory.go:184] no items to output this cycle
I0323 03:14:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 03:14:37.825669  543705 disk_info.go:125] begin check local disk info of client
I0323 03:14:37.828225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:14:37.828230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abec0 0xc0001abf00]
E0323 03:14:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:14:43.410597  543705 memory.go:191] Add success.
I0323 03:14:43.409798  543705 cpu.go:282] Add success.
I0323 03:14:43.420263  543705 net.go:648] Add success.
I0323 03:14:43.422883  543705 net.go:770] primary dev: ETH0
I0323 03:14:43.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:14:43.422909  543705 net.go:698] Add success.
I0323 03:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:14:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:14:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:14:53.409788  543705 memory.go:184] no items to output this cycle
I0323 03:14:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 03:15:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:15:03.409770  543705 memory.go:184] no items to output this cycle
I0323 03:15:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 03:15:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:15:13.409789  543705 memory.go:191] Add success.
I0323 03:15:13.409810  543705 cpu.go:282] Add success.
W0323 03:15:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:15:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:15:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:15:13.420170  543705 net.go:648] Add success.
I0323 03:15:13.422974  543705 net.go:770] primary dev: ETH0
I0323 03:15:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:15:13.422999  543705 net.go:698] Add success.
I0323 03:15:13.469317  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9af2a1fa-d181-4acd-8948-db27bb00ea95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:15:13.469351  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:15:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:15:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:15:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 03:15:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:15:14.456778  543705 disk_worker.go:494] system disk:vda1
I0323 03:15:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:15:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:15:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:15:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:15:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:15:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:15:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:15:23.409775  543705 memory.go:184] no items to output this cycle
I0323 03:15:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 03:15:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:15:33.409778  543705 memory.go:184] no items to output this cycle
I0323 03:15:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 03:15:37.829673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:15:37.832176  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:15:37.832183  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abd80 0xc0002abdc0]
I0323 03:15:40.036868  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:15:40.036873  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:15:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:15:43.410716  543705 memory.go:191] Add success.
I0323 03:15:43.409794  543705 cpu.go:282] Add success.
I0323 03:15:43.420420  543705 net.go:648] Add success.
I0323 03:15:43.423284  543705 net.go:770] primary dev: ETH0
I0323 03:15:43.423297  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:15:43.423310  543705 net.go:698] Add success.
I0323 03:15:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:15:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:15:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:15:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:15:53.409782  543705 memory.go:184] no items to output this cycle
I0323 03:15:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 03:16:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:16:03.409784  543705 cpu.go:275] no items to output this cycle
I0323 03:16:03.409789  543705 memory.go:184] no items to output this cycle
E0323 03:16:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:16:13.409807  543705 memory.go:191] Add success.
I0323 03:16:13.409809  543705 cpu.go:282] Add success.
W0323 03:16:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:16:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:16:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:16:13.420159  543705 net.go:648] Add success.
I0323 03:16:13.422993  543705 net.go:770] primary dev: ETH0
I0323 03:16:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:16:13.423018  543705 net.go:698] Add success.
I0323 03:16:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:16:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:16:14.455154  543705 disk_worker.go:708] disk space is not compliant
W0323 03:16:14.455157  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:16:14.456712  543705 disk_worker.go:494] system disk:vda1
I0323 03:16:14.456757  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:16:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:16:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:16:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:16:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:16:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:16:23.409798  543705 memory.go:184] no items to output this cycle
I0323 03:16:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 03:16:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:16:33.409773  543705 memory.go:184] no items to output this cycle
I0323 03:16:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 03:16:37.833675  543705 disk_info.go:125] begin check local disk info of client
I0323 03:16:37.836206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:16:37.836212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002abb80 0xc0002abbc0]
E0323 03:16:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:16:43.410671  543705 memory.go:191] Add success.
I0323 03:16:43.409796  543705 cpu.go:282] Add success.
I0323 03:16:43.420442  543705 net.go:648] Add success.
I0323 03:16:43.423112  543705 net.go:770] primary dev: ETH0
I0323 03:16:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:16:43.423140  543705 net.go:698] Add success.
I0323 03:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:16:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:16:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:16:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:16:53.409781  543705 memory.go:184] no items to output this cycle
I0323 03:16:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 03:17:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:17:03.409771  543705 memory.go:184] no items to output this cycle
I0323 03:17:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 03:17:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:17:13.409796  543705 memory.go:191] Add success.
I0323 03:17:13.409798  543705 cpu.go:282] Add success.
W0323 03:17:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:17:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:17:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:17:13.420137  543705 net.go:648] Add success.
I0323 03:17:13.423019  543705 net.go:770] primary dev: ETH0
I0323 03:17:13.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:17:13.423049  543705 net.go:698] Add success.
I0323 03:17:13.453584  543705 event_worker.go:152] Polling the log file for events...
W0323 03:17:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:17:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 03:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:17:14.455920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:17:14.455929  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:17:14.455935  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:17:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 03:17:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:17:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:17:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:17:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:17:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:17:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:17:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:17:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:17:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:17:23.409764  543705 memory.go:184] no items to output this cycle
I0323 03:17:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 03:17:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:17:33.409799  543705 memory.go:184] no items to output this cycle
I0323 03:17:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 03:17:37.837671  543705 disk_info.go:125] begin check local disk info of client
I0323 03:17:37.840216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:17:37.840221  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b540 0xc00047b580]
E0323 03:17:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:17:43.410619  543705 memory.go:191] Add success.
I0323 03:17:43.409804  543705 cpu.go:282] Add success.
I0323 03:17:43.420300  543705 net.go:648] Add success.
I0323 03:17:43.422675  543705 net.go:770] primary dev: ETH0
I0323 03:17:43.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:17:43.422700  543705 net.go:698] Add success.
I0323 03:17:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:17:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:17:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:17:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:17:53.409795  543705 memory.go:184] no items to output this cycle
I0323 03:17:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 03:18:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:18:03.409776  543705 memory.go:184] no items to output this cycle
I0323 03:18:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 03:18:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:18:13.409824  543705 memory.go:191] Add success.
I0323 03:18:13.409834  543705 cpu.go:282] Add success.
W0323 03:18:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:18:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:18:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:18:13.420140  543705 net.go:648] Add success.
I0323 03:18:13.422847  543705 net.go:770] primary dev: ETH0
I0323 03:18:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:18:13.422877  543705 net.go:698] Add success.
I0323 03:18:13.984412  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65cd17dd-7053-49cb-aa69-09025449ae63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:18:13.984447  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:18:14.453977  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:18:14.454236  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:18:14.454246  543705 disk_worker.go:708] disk space is not compliant
W0323 03:18:14.454248  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:18:14.455806  543705 disk_worker.go:494] system disk:vda1
I0323 03:18:14.455838  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:18:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:18:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:18:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:18:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:18:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:18:23.409764  543705 memory.go:184] no items to output this cycle
I0323 03:18:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 03:18:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:18:33.409779  543705 memory.go:184] no items to output this cycle
I0323 03:18:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 03:18:37.841673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:18:37.844184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:18:37.844190  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c45c0 0xc0000c4600]
I0323 03:18:40.037729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:18:40.037736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:18:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:18:43.410692  543705 memory.go:191] Add success.
I0323 03:18:43.409800  543705 cpu.go:282] Add success.
I0323 03:18:43.420409  543705 net.go:648] Add success.
I0323 03:18:43.423040  543705 net.go:770] primary dev: ETH0
I0323 03:18:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:18:43.423068  543705 net.go:698] Add success.
I0323 03:18:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:18:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:18:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:18:53.410278  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:18:53.410299  543705 memory.go:184] no items to output this cycle
I0323 03:18:53.410308  543705 cpu.go:275] no items to output this cycle
E0323 03:19:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:19:03.409801  543705 memory.go:184] no items to output this cycle
I0323 03:19:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 03:19:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:19:13.409796  543705 memory.go:191] Add success.
I0323 03:19:13.409800  543705 cpu.go:282] Add success.
W0323 03:19:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:19:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:19:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:19:13.420134  543705 net.go:648] Add success.
I0323 03:19:13.422693  543705 net.go:770] primary dev: ETH0
I0323 03:19:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:19:13.422725  543705 net.go:698] Add success.
I0323 03:19:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:19:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:19:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 03:19:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:19:14.456612  543705 disk_worker.go:494] system disk:vda1
I0323 03:19:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:19:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:19:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:19:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:19:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:19:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:19:23.409774  543705 memory.go:184] no items to output this cycle
I0323 03:19:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 03:19:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:19:33.409780  543705 memory.go:184] no items to output this cycle
I0323 03:19:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 03:19:37.845672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:19:37.848178  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:19:37.848184  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b080 0xc00007b0c0]
E0323 03:19:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:19:43.410614  543705 memory.go:191] Add success.
I0323 03:19:43.409802  543705 cpu.go:282] Add success.
I0323 03:19:43.420313  543705 net.go:648] Add success.
I0323 03:19:43.422750  543705 net.go:770] primary dev: ETH0
I0323 03:19:43.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:19:43.422779  543705 net.go:698] Add success.
I0323 03:19:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:19:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:19:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:19:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:19:53.409777  543705 memory.go:184] no items to output this cycle
I0323 03:19:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 03:20:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:20:03.409796  543705 memory.go:184] no items to output this cycle
I0323 03:20:03.409807  543705 cpu.go:275] no items to output this cycle
W0323 03:20:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:20:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:20:13.409730  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 03:20:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:20:13.409822  543705 memory.go:191] Add success.
I0323 03:20:13.409845  543705 cpu.go:282] Add success.
I0323 03:20:13.420067  543705 net.go:648] Add success.
I0323 03:20:13.422771  543705 net.go:770] primary dev: ETH0
I0323 03:20:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:20:13.422800  543705 net.go:698] Add success.
I0323 03:20:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:20:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:20:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 03:20:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:20:14.456595  543705 disk_worker.go:494] system disk:vda1
I0323 03:20:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:20:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:20:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:20:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:20:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:20:23.409767  543705 memory.go:184] no items to output this cycle
I0323 03:20:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 03:20:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:20:33.409790  543705 memory.go:184] no items to output this cycle
I0323 03:20:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 03:20:37.849670  543705 disk_info.go:125] begin check local disk info of client
I0323 03:20:37.852201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:20:37.852208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027bec0 0xc00027bf00]
E0323 03:20:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:20:43.410675  543705 memory.go:191] Add success.
I0323 03:20:43.409807  543705 cpu.go:282] Add success.
I0323 03:20:43.420404  543705 net.go:648] Add success.
I0323 03:20:43.422983  543705 net.go:770] primary dev: ETH0
I0323 03:20:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:20:43.423014  543705 net.go:698] Add success.
I0323 03:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:20:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:20:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:20:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:20:53.409779  543705 memory.go:184] no items to output this cycle
I0323 03:20:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 03:21:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:21:03.409767  543705 memory.go:184] no items to output this cycle
I0323 03:21:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 03:21:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:21:13.409791  543705 memory.go:191] Add success.
I0323 03:21:13.409802  543705 cpu.go:282] Add success.
W0323 03:21:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:21:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:21:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:21:13.420291  543705 net.go:648] Add success.
I0323 03:21:13.423056  543705 net.go:770] primary dev: ETH0
I0323 03:21:13.423069  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:21:13.423081  543705 net.go:698] Add success.
I0323 03:21:13.471182  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a3952ce5-c26c-42b8-8f11-00b8eeb05e07","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:21:13.471215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:21:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:21:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:21:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0323 03:21:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:21:14.456512  543705 disk_worker.go:494] system disk:vda1
I0323 03:21:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:21:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:21:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:21:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:21:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:21:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:21:23.409798  543705 memory.go:184] no items to output this cycle
I0323 03:21:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 03:21:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:21:33.409773  543705 memory.go:184] no items to output this cycle
I0323 03:21:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 03:21:37.853675  543705 disk_info.go:125] begin check local disk info of client
I0323 03:21:37.856174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:21:37.856181  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047aa00 0xc00047aa40]
I0323 03:21:40.040891  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:21:40.040896  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:21:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:21:43.410638  543705 memory.go:191] Add success.
I0323 03:21:43.409818  543705 cpu.go:282] Add success.
I0323 03:21:43.420609  543705 net.go:648] Add success.
I0323 03:21:43.423215  543705 net.go:770] primary dev: ETH0
I0323 03:21:43.423227  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:21:43.423240  543705 net.go:698] Add success.
I0323 03:21:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:21:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:21:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:21:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:21:53.409803  543705 memory.go:184] no items to output this cycle
I0323 03:21:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 03:22:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:22:03.409779  543705 memory.go:184] no items to output this cycle
I0323 03:22:03.409781  543705 cpu.go:275] no items to output this cycle
E0323 03:22:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:22:13.409801  543705 memory.go:191] Add success.
I0323 03:22:13.409815  543705 cpu.go:282] Add success.
W0323 03:22:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:22:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:22:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:22:13.420116  543705 net.go:648] Add success.
I0323 03:22:13.422864  543705 net.go:770] primary dev: ETH0
I0323 03:22:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:22:13.422893  543705 net.go:698] Add success.
W0323 03:22:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:22:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 03:22:14.455163  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:22:14.456906  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:22:14.456914  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:22:14.456921  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:22:14.456993  543705 disk_worker.go:494] system disk:vda1
I0323 03:22:14.457034  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:22:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:22:15.456825  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:22:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:22:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:22:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:22:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:22:16.472092  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:22:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:22:23.409793  543705 memory.go:184] no items to output this cycle
I0323 03:22:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:22:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:22:33.409771  543705 memory.go:184] no items to output this cycle
I0323 03:22:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 03:22:37.857683  543705 disk_info.go:125] begin check local disk info of client
I0323 03:22:37.860187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:22:37.860193  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0323 03:22:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:22:43.410541  543705 memory.go:191] Add success.
I0323 03:22:43.409826  543705 cpu.go:282] Add success.
I0323 03:22:43.420291  543705 net.go:648] Add success.
I0323 03:22:43.422664  543705 net.go:770] primary dev: ETH0
I0323 03:22:43.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:22:43.422689  543705 net.go:698] Add success.
I0323 03:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:22:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:22:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:22:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:22:53.409786  543705 memory.go:184] no items to output this cycle
I0323 03:22:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 03:23:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:23:03.409771  543705 memory.go:184] no items to output this cycle
I0323 03:23:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:23:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:23:13.409829  543705 memory.go:191] Add success.
I0323 03:23:13.409840  543705 cpu.go:282] Add success.
W0323 03:23:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:23:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:23:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:23:13.420395  543705 net.go:648] Add success.
I0323 03:23:13.423101  543705 net.go:770] primary dev: ETH0
I0323 03:23:13.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:23:13.423126  543705 net.go:698] Add success.
I0323 03:23:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:23:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:23:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 03:23:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:23:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 03:23:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:23:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:23:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:23:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:23:23.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:23:23.409896  543705 memory.go:184] no items to output this cycle
I0323 03:23:23.409973  543705 cpu.go:275] no items to output this cycle
E0323 03:23:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:23:33.409785  543705 memory.go:184] no items to output this cycle
I0323 03:23:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 03:23:37.861670  543705 disk_info.go:125] begin check local disk info of client
I0323 03:23:37.864204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:23:37.864210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003efd80 0xc0003efdc0]
E0323 03:23:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:23:43.410689  543705 memory.go:191] Add success.
I0323 03:23:43.409823  543705 cpu.go:282] Add success.
I0323 03:23:43.420385  543705 net.go:648] Add success.
I0323 03:23:43.423364  543705 net.go:770] primary dev: ETH0
I0323 03:23:43.423377  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:23:43.423389  543705 net.go:698] Add success.
I0323 03:23:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:23:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:23:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:23:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:23:53.409811  543705 memory.go:184] no items to output this cycle
I0323 03:23:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 03:24:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:24:03.409785  543705 memory.go:184] no items to output this cycle
I0323 03:24:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 03:24:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:24:13.409841  543705 memory.go:191] Add success.
I0323 03:24:13.409846  543705 cpu.go:282] Add success.
W0323 03:24:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:24:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:24:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:24:13.420315  543705 net.go:648] Add success.
I0323 03:24:13.423154  543705 net.go:770] primary dev: ETH0
I0323 03:24:13.423168  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:24:13.423180  543705 net.go:698] Add success.
I0323 03:24:13.468865  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"436dfe9a-f772-46d0-884d-bfcc93e6b9f6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:24:13.468900  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:24:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:24:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:24:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 03:24:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:24:14.456550  543705 disk_worker.go:494] system disk:vda1
I0323 03:24:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:24:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:24:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:24:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:24:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:24:23.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:24:23.409890  543705 memory.go:184] no items to output this cycle
I0323 03:24:23.409929  543705 cpu.go:275] no items to output this cycle
E0323 03:24:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:24:33.409785  543705 memory.go:184] no items to output this cycle
I0323 03:24:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 03:24:37.865671  543705 disk_info.go:125] begin check local disk info of client
I0323 03:24:37.868177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:24:37.868184  543705 disk_info.go:196] parse disk info done, disk is : [0xc000326300 0xc000326340]
I0323 03:24:40.041724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:24:40.041730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:24:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:24:43.410554  543705 memory.go:191] Add success.
I0323 03:24:43.409818  543705 cpu.go:282] Add success.
I0323 03:24:43.420318  543705 net.go:648] Add success.
I0323 03:24:43.422815  543705 net.go:770] primary dev: ETH0
I0323 03:24:43.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:24:43.422843  543705 net.go:698] Add success.
I0323 03:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:24:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:24:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:24:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:24:53.409790  543705 memory.go:184] no items to output this cycle
I0323 03:24:53.409819  543705 cpu.go:275] no items to output this cycle
E0323 03:25:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:25:03.409803  543705 memory.go:184] no items to output this cycle
I0323 03:25:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 03:25:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:25:13.409813  543705 memory.go:191] Add success.
I0323 03:25:13.409816  543705 cpu.go:282] Add success.
W0323 03:25:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:25:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:25:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:25:13.420145  543705 net.go:648] Add success.
I0323 03:25:13.422696  543705 net.go:770] primary dev: ETH0
I0323 03:25:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:25:13.422724  543705 net.go:698] Add success.
I0323 03:25:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:25:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:25:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 03:25:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:25:14.456573  543705 disk_worker.go:494] system disk:vda1
I0323 03:25:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:25:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:25:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:25:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:25:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:25:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:25:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:25:23.409805  543705 memory.go:184] no items to output this cycle
I0323 03:25:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:25:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:25:33.409771  543705 memory.go:184] no items to output this cycle
I0323 03:25:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 03:25:37.869673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:25:37.872181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:25:37.872187  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f280 0xc00032f2c0]
E0323 03:25:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:25:43.410624  543705 memory.go:191] Add success.
I0323 03:25:43.409804  543705 cpu.go:282] Add success.
I0323 03:25:43.420404  543705 net.go:648] Add success.
I0323 03:25:43.422997  543705 net.go:770] primary dev: ETH0
I0323 03:25:43.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:25:43.423023  543705 net.go:698] Add success.
I0323 03:25:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:25:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:25:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:25:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:25:53.409771  543705 memory.go:184] no items to output this cycle
I0323 03:25:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 03:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:26:03.409769  543705 memory.go:184] no items to output this cycle
I0323 03:26:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 03:26:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:26:13.409804  543705 memory.go:191] Add success.
I0323 03:26:13.409805  543705 cpu.go:282] Add success.
W0323 03:26:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:26:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:26:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:26:13.420155  543705 net.go:648] Add success.
I0323 03:26:13.422941  543705 net.go:770] primary dev: ETH0
I0323 03:26:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:26:13.422967  543705 net.go:698] Add success.
I0323 03:26:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:26:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:26:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 03:26:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:26:14.456515  543705 disk_worker.go:494] system disk:vda1
I0323 03:26:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:26:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:26:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:26:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:26:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:26:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:26:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:26:23.409774  543705 cpu.go:275] no items to output this cycle
I0323 03:26:23.409785  543705 memory.go:184] no items to output this cycle
E0323 03:26:33.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:26:33.409980  543705 cpu.go:275] no items to output this cycle
I0323 03:26:33.410001  543705 memory.go:184] no items to output this cycle
I0323 03:26:37.873672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:26:37.876214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:26:37.876219  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d9c0 0xc00035da00]
E0323 03:26:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:26:43.410918  543705 memory.go:191] Add success.
I0323 03:26:43.409821  543705 cpu.go:282] Add success.
I0323 03:26:43.420636  543705 net.go:648] Add success.
I0323 03:26:43.424021  543705 net.go:770] primary dev: ETH0
I0323 03:26:43.424035  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:26:43.424050  543705 net.go:698] Add success.
I0323 03:26:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:26:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:26:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:26:53.409784  543705 memory.go:184] no items to output this cycle
I0323 03:26:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 03:27:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:27:03.409796  543705 memory.go:184] no items to output this cycle
I0323 03:27:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 03:27:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:27:13.409827  543705 memory.go:191] Add success.
I0323 03:27:13.409834  543705 cpu.go:282] Add success.
W0323 03:27:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:27:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:27:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:27:13.420134  543705 net.go:648] Add success.
I0323 03:27:13.422860  543705 net.go:770] primary dev: ETH0
I0323 03:27:13.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:27:13.422905  543705 net.go:698] Add success.
I0323 03:27:13.428884  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 03:27:13.453045  543705 event_worker.go:152] Polling the log file for events...
I0323 03:27:13.467797  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"069cb135-a178-4866-9182-d2ddb3598689","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:27:13.467830  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 03:27:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:27:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 03:27:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:27:14.456760  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:27:14.456768  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:27:14.456774  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:27:14.456833  543705 disk_worker.go:494] system disk:vda1
I0323 03:27:14.456864  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:27:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:27:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 03:27:16.458050  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:27:16.458050  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:27:16.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:27:16.458128  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:27:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:27:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:27:23.409764  543705 memory.go:184] no items to output this cycle
I0323 03:27:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 03:27:33.409929  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:27:33.409961  543705 cpu.go:275] no items to output this cycle
I0323 03:27:33.410034  543705 memory.go:184] no items to output this cycle
I0323 03:27:37.877672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:27:37.880230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:27:37.880236  543705 disk_info.go:196] parse disk info done, disk is : [0xc000291e00 0xc000291e40]
I0323 03:27:40.044907  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:27:40.044913  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:27:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:27:43.410750  543705 memory.go:191] Add success.
I0323 03:27:43.409806  543705 cpu.go:282] Add success.
I0323 03:27:43.420611  543705 net.go:648] Add success.
I0323 03:27:43.423658  543705 net.go:770] primary dev: ETH0
I0323 03:27:43.423672  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:27:43.423683  543705 net.go:698] Add success.
I0323 03:27:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:27:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:27:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:27:53.410384  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:27:53.410400  543705 memory.go:184] no items to output this cycle
I0323 03:27:53.410399  543705 cpu.go:275] no items to output this cycle
E0323 03:28:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:28:03.409807  543705 memory.go:184] no items to output this cycle
I0323 03:28:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 03:28:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:28:13.409795  543705 memory.go:191] Add success.
I0323 03:28:13.409820  543705 cpu.go:282] Add success.
W0323 03:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:28:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:28:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:28:13.420258  543705 net.go:648] Add success.
I0323 03:28:13.422664  543705 net.go:770] primary dev: ETH0
I0323 03:28:13.422677  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:28:13.422689  543705 net.go:698] Add success.
I0323 03:28:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:28:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:28:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 03:28:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:28:14.456614  543705 disk_worker.go:494] system disk:vda1
I0323 03:28:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:28:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:28:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:28:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:28:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:28:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:28:23.409763  543705 memory.go:184] no items to output this cycle
I0323 03:28:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 03:28:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:28:33.409767  543705 memory.go:184] no items to output this cycle
I0323 03:28:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 03:28:37.881674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:28:37.884185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:28:37.884192  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d5080 0xc0003d50c0]
E0323 03:28:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:28:43.410700  543705 memory.go:191] Add success.
I0323 03:28:43.409804  543705 cpu.go:282] Add success.
I0323 03:28:43.420378  543705 net.go:648] Add success.
I0323 03:28:43.423141  543705 net.go:770] primary dev: ETH0
I0323 03:28:43.423154  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:28:43.423167  543705 net.go:698] Add success.
I0323 03:28:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:28:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:28:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:28:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:28:53.409783  543705 memory.go:184] no items to output this cycle
I0323 03:28:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 03:29:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:29:03.409798  543705 memory.go:184] no items to output this cycle
I0323 03:29:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 03:29:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:29:13.409809  543705 memory.go:191] Add success.
I0323 03:29:13.409810  543705 cpu.go:282] Add success.
W0323 03:29:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:29:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:29:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:29:13.420227  543705 net.go:648] Add success.
I0323 03:29:13.422544  543705 net.go:770] primary dev: ETH0
I0323 03:29:13.422559  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:29:13.422571  543705 net.go:698] Add success.
I0323 03:29:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:29:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:29:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 03:29:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:29:14.456610  543705 disk_worker.go:494] system disk:vda1
I0323 03:29:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:29:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:29:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:29:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:29:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:29:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:29:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:29:23.409802  543705 memory.go:184] no items to output this cycle
I0323 03:29:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 03:29:33.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:29:33.409849  543705 memory.go:184] no items to output this cycle
I0323 03:29:33.409916  543705 cpu.go:275] no items to output this cycle
I0323 03:29:37.885679  543705 disk_info.go:125] begin check local disk info of client
I0323 03:29:37.888199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:29:37.888205  543705 disk_info.go:196] parse disk info done, disk is : [0xc000504200 0xc000504240]
E0323 03:29:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:29:43.410504  543705 memory.go:191] Add success.
I0323 03:29:43.409830  543705 cpu.go:282] Add success.
I0323 03:29:43.420212  543705 net.go:648] Add success.
I0323 03:29:43.422614  543705 net.go:770] primary dev: ETH0
I0323 03:29:43.422628  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:29:43.422641  543705 net.go:698] Add success.
I0323 03:29:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:29:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:29:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:29:53.409814  543705 memory.go:184] no items to output this cycle
I0323 03:29:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 03:30:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:30:03.409805  543705 memory.go:184] no items to output this cycle
I0323 03:30:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 03:30:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:30:13.409808  543705 memory.go:191] Add success.
I0323 03:30:13.409809  543705 cpu.go:282] Add success.
W0323 03:30:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:30:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:30:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:30:13.420157  543705 net.go:648] Add success.
I0323 03:30:13.423047  543705 net.go:770] primary dev: ETH0
I0323 03:30:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:30:13.423073  543705 net.go:698] Add success.
I0323 03:30:13.463726  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"907f4e9a-6ee9-4637-bd65-cfb90687a89d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:30:13.463759  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:30:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:30:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:30:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 03:30:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:30:14.456702  543705 disk_worker.go:494] system disk:vda1
I0323 03:30:14.456737  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:30:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:30:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:30:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:30:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:30:23.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:30:23.409875  543705 memory.go:184] no items to output this cycle
I0323 03:30:23.409926  543705 cpu.go:275] no items to output this cycle
E0323 03:30:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:30:33.409776  543705 memory.go:184] no items to output this cycle
I0323 03:30:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 03:30:37.889671  543705 disk_info.go:125] begin check local disk info of client
I0323 03:30:37.892220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:30:37.892226  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4700 0xc0000c4740]
I0323 03:30:40.045731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:30:40.045737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:30:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:30:43.410734  543705 memory.go:191] Add success.
I0323 03:30:43.409797  543705 cpu.go:282] Add success.
I0323 03:30:43.420440  543705 net.go:648] Add success.
I0323 03:30:43.423243  543705 net.go:770] primary dev: ETH0
I0323 03:30:43.423255  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:30:43.423268  543705 net.go:698] Add success.
I0323 03:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:30:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:30:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:30:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:30:53.409783  543705 memory.go:184] no items to output this cycle
I0323 03:30:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 03:31:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:31:03.409777  543705 memory.go:184] no items to output this cycle
I0323 03:31:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 03:31:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:31:13.409833  543705 memory.go:191] Add success.
I0323 03:31:13.409836  543705 cpu.go:282] Add success.
W0323 03:31:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:31:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:31:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:31:13.420192  543705 net.go:648] Add success.
I0323 03:31:13.422702  543705 net.go:770] primary dev: ETH0
I0323 03:31:13.422715  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:31:13.422729  543705 net.go:698] Add success.
I0323 03:31:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:31:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:31:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0323 03:31:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:31:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 03:31:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:31:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:31:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:31:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:31:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:31:23.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:31:23.409876  543705 cpu.go:275] no items to output this cycle
I0323 03:31:23.409881  543705 memory.go:184] no items to output this cycle
E0323 03:31:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:31:33.409799  543705 memory.go:184] no items to output this cycle
I0323 03:31:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 03:31:37.893674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:31:37.896199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:31:37.896206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab600 0xc0001ab640]
E0323 03:31:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:31:43.410708  543705 memory.go:191] Add success.
I0323 03:31:43.409822  543705 cpu.go:282] Add success.
I0323 03:31:43.420389  543705 net.go:648] Add success.
I0323 03:31:43.423265  543705 net.go:770] primary dev: ETH0
I0323 03:31:43.423278  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:31:43.423291  543705 net.go:698] Add success.
I0323 03:31:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:31:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:31:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:31:53.410246  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:31:53.410263  543705 memory.go:184] no items to output this cycle
I0323 03:31:53.410266  543705 cpu.go:275] no items to output this cycle
E0323 03:32:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:32:03.409787  543705 memory.go:184] no items to output this cycle
I0323 03:32:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 03:32:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:32:13.409804  543705 memory.go:191] Add success.
I0323 03:32:13.409815  543705 cpu.go:282] Add success.
W0323 03:32:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:32:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:32:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:32:13.420326  543705 net.go:648] Add success.
I0323 03:32:13.423377  543705 net.go:770] primary dev: ETH0
I0323 03:32:13.423391  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:32:13.423405  543705 net.go:698] Add success.
W0323 03:32:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:32:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 03:32:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:32:14.456972  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:32:14.456982  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:32:14.456988  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:32:14.457039  543705 disk_worker.go:494] system disk:vda1
I0323 03:32:14.457070  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:32:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:32:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:32:16.457931  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:32:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:32:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:32:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:32:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:32:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:32:23.409796  543705 memory.go:184] no items to output this cycle
I0323 03:32:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 03:32:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:32:33.409776  543705 memory.go:184] no items to output this cycle
I0323 03:32:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 03:32:37.897671  543705 disk_info.go:125] begin check local disk info of client
I0323 03:32:37.900204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:32:37.900211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f40 0xc000384000]
E0323 03:32:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:32:43.410568  543705 memory.go:191] Add success.
I0323 03:32:43.409815  543705 cpu.go:282] Add success.
I0323 03:32:43.420268  543705 net.go:648] Add success.
I0323 03:32:43.422775  543705 net.go:770] primary dev: ETH0
I0323 03:32:43.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:32:43.422802  543705 net.go:698] Add success.
I0323 03:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:32:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:32:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:32:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:32:53.409787  543705 memory.go:184] no items to output this cycle
I0323 03:32:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 03:33:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:33:03.409784  543705 memory.go:184] no items to output this cycle
I0323 03:33:03.409788  543705 cpu.go:275] no items to output this cycle
W0323 03:33:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:33:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:33:13.409745  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:33:13.409815  543705 cpu.go:282] Add success.
E0323 03:33:13.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:33:13.409841  543705 memory.go:191] Add success.
I0323 03:33:13.420241  543705 net.go:648] Add success.
I0323 03:33:13.423046  543705 net.go:770] primary dev: ETH0
I0323 03:33:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:33:13.423072  543705 net.go:698] Add success.
I0323 03:33:13.469129  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"82611116-f349-42e4-bce5-27b6a5a7a6a5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:33:13.469175  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:33:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:33:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:33:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 03:33:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:33:14.456899  543705 disk_worker.go:494] system disk:vda1
I0323 03:33:14.456929  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:33:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:33:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:33:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:33:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:33:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:33:23.409793  543705 memory.go:184] no items to output this cycle
I0323 03:33:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 03:33:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:33:33.409767  543705 memory.go:184] no items to output this cycle
I0323 03:33:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 03:33:37.901675  543705 disk_info.go:125] begin check local disk info of client
I0323 03:33:37.904189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:33:37.904195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
I0323 03:33:40.048924  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:33:40.048932  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:33:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:33:43.410584  543705 memory.go:191] Add success.
I0323 03:33:43.409793  543705 cpu.go:282] Add success.
I0323 03:33:43.420264  543705 net.go:648] Add success.
I0323 03:33:43.422807  543705 net.go:770] primary dev: ETH0
I0323 03:33:43.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:33:43.422833  543705 net.go:698] Add success.
I0323 03:33:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:33:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:33:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:33:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:33:53.409817  543705 memory.go:184] no items to output this cycle
I0323 03:33:53.409826  543705 cpu.go:275] no items to output this cycle
E0323 03:34:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:34:03.409799  543705 memory.go:184] no items to output this cycle
I0323 03:34:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 03:34:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:34:13.409794  543705 memory.go:191] Add success.
I0323 03:34:13.409816  543705 cpu.go:282] Add success.
W0323 03:34:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:34:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:34:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:34:13.420308  543705 net.go:648] Add success.
I0323 03:34:13.422920  543705 net.go:770] primary dev: ETH0
I0323 03:34:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:34:13.422943  543705 net.go:698] Add success.
I0323 03:34:14.454951  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:34:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:34:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 03:34:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:34:14.456617  543705 disk_worker.go:494] system disk:vda1
I0323 03:34:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:34:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:34:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:34:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:34:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:34:23.409793  543705 memory.go:184] no items to output this cycle
I0323 03:34:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 03:34:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:34:33.409782  543705 memory.go:184] no items to output this cycle
I0323 03:34:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 03:34:37.905672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:34:37.908227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:34:37.908232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051ca00 0xc00051ca40]
E0323 03:34:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:34:43.410609  543705 memory.go:191] Add success.
I0323 03:34:43.409829  543705 cpu.go:282] Add success.
I0323 03:34:43.420328  543705 net.go:648] Add success.
I0323 03:34:43.422850  543705 net.go:770] primary dev: ETH0
I0323 03:34:43.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:34:43.422877  543705 net.go:698] Add success.
I0323 03:34:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:34:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:34:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:34:53.409841  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:34:53.409862  543705 memory.go:184] no items to output this cycle
I0323 03:34:53.409871  543705 cpu.go:275] no items to output this cycle
E0323 03:35:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:35:03.409776  543705 memory.go:184] no items to output this cycle
I0323 03:35:03.409780  543705 cpu.go:275] no items to output this cycle
E0323 03:35:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:35:13.409805  543705 memory.go:191] Add success.
I0323 03:35:13.409809  543705 cpu.go:282] Add success.
W0323 03:35:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:35:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:35:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:35:13.420376  543705 net.go:648] Add success.
I0323 03:35:13.422935  543705 net.go:770] primary dev: ETH0
I0323 03:35:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:35:13.422960  543705 net.go:698] Add success.
I0323 03:35:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:35:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:35:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0323 03:35:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:35:14.456514  543705 disk_worker.go:494] system disk:vda1
I0323 03:35:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:35:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:35:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:35:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:35:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:35:23.410723  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:35:23.410739  543705 memory.go:184] no items to output this cycle
I0323 03:35:23.410741  543705 cpu.go:275] no items to output this cycle
E0323 03:35:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:35:33.409770  543705 memory.go:184] no items to output this cycle
I0323 03:35:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 03:35:37.909679  543705 disk_info.go:125] begin check local disk info of client
I0323 03:35:37.912126  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:35:37.912133  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051dc00 0xc00051dc40]
E0323 03:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:35:43.410829  543705 memory.go:191] Add success.
I0323 03:35:43.409815  543705 cpu.go:282] Add success.
I0323 03:35:43.420535  543705 net.go:648] Add success.
I0323 03:35:43.423261  543705 net.go:770] primary dev: ETH0
I0323 03:35:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:35:43.423298  543705 net.go:698] Add success.
I0323 03:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:35:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:35:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:35:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:35:53.409775  543705 memory.go:184] no items to output this cycle
I0323 03:35:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 03:36:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:36:03.409800  543705 memory.go:184] no items to output this cycle
I0323 03:36:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 03:36:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:36:13.409801  543705 memory.go:191] Add success.
I0323 03:36:13.409821  543705 cpu.go:282] Add success.
W0323 03:36:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:36:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:36:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:36:13.419734  543705 net.go:648] Add success.
I0323 03:36:13.422638  543705 net.go:770] primary dev: ETH0
I0323 03:36:13.422650  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:36:13.422661  543705 net.go:698] Add success.
I0323 03:36:13.463362  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0af5bcbb-64b9-4cd4-9128-2629af7304f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:36:13.463393  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:36:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:36:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:36:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 03:36:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:36:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 03:36:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:36:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:36:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:36:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:36:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:36:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:36:23.409793  543705 memory.go:184] no items to output this cycle
I0323 03:36:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 03:36:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:36:33.409776  543705 cpu.go:275] no items to output this cycle
I0323 03:36:33.409786  543705 memory.go:184] no items to output this cycle
I0323 03:36:37.913676  543705 disk_info.go:125] begin check local disk info of client
I0323 03:36:37.916157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:36:37.916164  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a700 0xc00047a740]
I0323 03:36:40.049728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:36:40.049734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:36:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:36:43.410684  543705 memory.go:191] Add success.
I0323 03:36:43.409830  543705 cpu.go:282] Add success.
I0323 03:36:43.420387  543705 net.go:648] Add success.
I0323 03:36:43.422938  543705 net.go:770] primary dev: ETH0
I0323 03:36:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:36:43.422964  543705 net.go:698] Add success.
I0323 03:36:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:36:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:36:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:36:53.410211  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:36:53.410230  543705 memory.go:184] no items to output this cycle
I0323 03:36:53.410240  543705 cpu.go:275] no items to output this cycle
E0323 03:37:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:37:03.409782  543705 memory.go:184] no items to output this cycle
I0323 03:37:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 03:37:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:37:13.409830  543705 memory.go:191] Add success.
I0323 03:37:13.409841  543705 cpu.go:282] Add success.
W0323 03:37:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:37:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:37:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:37:13.420181  543705 net.go:648] Add success.
I0323 03:37:13.423061  543705 net.go:770] primary dev: ETH0
I0323 03:37:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:37:13.423089  543705 net.go:698] Add success.
I0323 03:37:13.453665  543705 event_worker.go:152] Polling the log file for events...
W0323 03:37:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:37:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 03:37:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:37:14.455928  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:37:14.455936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:37:14.455943  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:37:14.456591  543705 disk_worker.go:494] system disk:vda1
I0323 03:37:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:37:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:37:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:37:16.457961  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:37:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:37:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:37:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:37:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:37:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:37:23.409789  543705 memory.go:184] no items to output this cycle
I0323 03:37:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 03:37:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:37:33.409773  543705 memory.go:184] no items to output this cycle
I0323 03:37:33.409778  543705 cpu.go:275] no items to output this cycle
I0323 03:37:37.917673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:37:37.920147  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:37:37.920153  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003643c0 0xc000364400]
E0323 03:37:43.410228  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:37:43.411200  543705 memory.go:191] Add success.
I0323 03:37:43.410289  543705 cpu.go:282] Add success.
I0323 03:37:43.419968  543705 net.go:648] Add success.
I0323 03:37:43.423013  543705 net.go:770] primary dev: ETH0
I0323 03:37:43.423028  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:37:43.423040  543705 net.go:698] Add success.
I0323 03:37:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:37:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:37:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:37:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:37:53.409775  543705 memory.go:184] no items to output this cycle
I0323 03:37:53.409783  543705 cpu.go:275] no items to output this cycle
I0323 03:38:03.409906  543705 cpu.go:275] no items to output this cycle
E0323 03:38:03.409987  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:38:03.410018  543705 memory.go:184] no items to output this cycle
E0323 03:38:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:38:13.409808  543705 memory.go:191] Add success.
I0323 03:38:13.409816  543705 cpu.go:282] Add success.
W0323 03:38:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:38:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:38:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:38:13.420336  543705 net.go:648] Add success.
I0323 03:38:13.423419  543705 net.go:770] primary dev: ETH0
I0323 03:38:13.423433  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:38:13.423445  543705 net.go:698] Add success.
I0323 03:38:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:38:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:38:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 03:38:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:38:14.456581  543705 disk_worker.go:494] system disk:vda1
I0323 03:38:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:38:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:38:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:38:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:38:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:38:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:38:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:38:23.409776  543705 memory.go:184] no items to output this cycle
I0323 03:38:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 03:38:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:38:33.409782  543705 memory.go:184] no items to output this cycle
I0323 03:38:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 03:38:37.921673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:38:37.924143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:38:37.924149  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0323 03:38:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:38:43.410612  543705 memory.go:191] Add success.
I0323 03:38:43.409805  543705 cpu.go:282] Add success.
I0323 03:38:43.420327  543705 net.go:648] Add success.
I0323 03:38:43.422995  543705 net.go:770] primary dev: ETH0
I0323 03:38:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:38:43.423034  543705 net.go:698] Add success.
I0323 03:38:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:38:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:38:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:38:53.410370  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:38:53.410387  543705 memory.go:184] no items to output this cycle
I0323 03:38:53.410407  543705 cpu.go:275] no items to output this cycle
E0323 03:39:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:39:03.409786  543705 memory.go:184] no items to output this cycle
I0323 03:39:03.409788  543705 cpu.go:275] no items to output this cycle
W0323 03:39:13.409728  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:39:13.409752  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:39:13.409759  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:39:13.409840  543705 cpu.go:282] Add success.
E0323 03:39:13.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:39:13.409877  543705 memory.go:191] Add success.
I0323 03:39:13.420098  543705 net.go:770] primary dev: ETH0
I0323 03:39:13.420113  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:39:13.420125  543705 net.go:698] Add success.
I0323 03:39:13.420355  543705 net.go:648] Add success.
I0323 03:39:13.462925  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c71e0818-ae93-4f53-8a4f-d2ecb0d699cb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:39:13.462960  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:39:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:39:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:39:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0323 03:39:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:39:14.456708  543705 disk_worker.go:494] system disk:vda1
I0323 03:39:14.456745  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:39:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:39:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:39:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:39:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:39:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:39:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:39:23.409769  543705 memory.go:184] no items to output this cycle
I0323 03:39:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 03:39:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:39:33.409798  543705 memory.go:184] no items to output this cycle
I0323 03:39:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 03:39:37.925675  543705 disk_info.go:125] begin check local disk info of client
I0323 03:39:37.928189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:39:37.928196  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6700 0xc0003b6740]
I0323 03:39:40.052949  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:39:40.052956  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:39:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:39:43.410800  543705 memory.go:191] Add success.
I0323 03:39:43.409815  543705 cpu.go:282] Add success.
I0323 03:39:43.420511  543705 net.go:648] Add success.
I0323 03:39:43.423332  543705 net.go:770] primary dev: ETH0
I0323 03:39:43.423345  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:39:43.423373  543705 net.go:698] Add success.
I0323 03:39:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:39:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:39:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:39:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:39:53.409789  543705 memory.go:184] no items to output this cycle
I0323 03:39:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 03:40:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:40:03.409781  543705 memory.go:184] no items to output this cycle
I0323 03:40:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 03:40:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:40:13.409834  543705 memory.go:191] Add success.
I0323 03:40:13.409837  543705 cpu.go:282] Add success.
W0323 03:40:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:40:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:40:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:40:13.420259  543705 net.go:648] Add success.
I0323 03:40:13.423101  543705 net.go:770] primary dev: ETH0
I0323 03:40:13.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:40:13.423127  543705 net.go:698] Add success.
I0323 03:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:40:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:40:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 03:40:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:40:14.456503  543705 disk_worker.go:494] system disk:vda1
I0323 03:40:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:40:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:40:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:40:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:40:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:40:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:40:23.409796  543705 memory.go:184] no items to output this cycle
I0323 03:40:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 03:40:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:40:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 03:40:33.409805  543705 memory.go:184] no items to output this cycle
I0323 03:40:37.929675  543705 disk_info.go:125] begin check local disk info of client
I0323 03:40:37.932189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:40:37.932195  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343100 0xc000343140]
E0323 03:40:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:40:43.410776  543705 memory.go:191] Add success.
I0323 03:40:43.409798  543705 cpu.go:282] Add success.
I0323 03:40:43.420450  543705 net.go:648] Add success.
I0323 03:40:43.423351  543705 net.go:770] primary dev: ETH0
I0323 03:40:43.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:40:43.423377  543705 net.go:698] Add success.
I0323 03:40:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:40:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:40:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:40:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:40:53.409772  543705 memory.go:184] no items to output this cycle
I0323 03:40:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 03:41:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:41:03.409777  543705 memory.go:184] no items to output this cycle
I0323 03:41:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 03:41:13.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:41:13.409852  543705 memory.go:191] Add success.
W0323 03:41:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:41:13.409904  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:41:13.409908  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:41:13.409821  543705 cpu.go:282] Add success.
I0323 03:41:13.420308  543705 net.go:648] Add success.
I0323 03:41:13.423380  543705 net.go:770] primary dev: ETH0
I0323 03:41:13.423395  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:41:13.423408  543705 net.go:698] Add success.
I0323 03:41:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:41:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:41:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 03:41:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:41:14.456547  543705 disk_worker.go:494] system disk:vda1
I0323 03:41:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:41:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:41:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:41:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:41:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:41:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:41:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:41:23.409795  543705 memory.go:184] no items to output this cycle
I0323 03:41:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 03:41:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:41:33.409785  543705 memory.go:184] no items to output this cycle
I0323 03:41:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 03:41:37.933674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:41:37.936156  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:41:37.936162  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b840 0xc00027b880]
E0323 03:41:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:41:43.410665  543705 memory.go:191] Add success.
I0323 03:41:43.409821  543705 cpu.go:282] Add success.
I0323 03:41:43.420356  543705 net.go:648] Add success.
I0323 03:41:43.422844  543705 net.go:770] primary dev: ETH0
I0323 03:41:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:41:43.422873  543705 net.go:698] Add success.
I0323 03:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:41:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:41:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:41:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:41:53.409786  543705 cpu.go:275] no items to output this cycle
I0323 03:41:53.409787  543705 memory.go:184] no items to output this cycle
E0323 03:42:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:42:03.409800  543705 memory.go:184] no items to output this cycle
I0323 03:42:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 03:42:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:42:13.409832  543705 memory.go:191] Add success.
I0323 03:42:13.409837  543705 cpu.go:282] Add success.
W0323 03:42:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:42:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:42:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:42:13.420412  543705 net.go:648] Add success.
I0323 03:42:13.423146  543705 net.go:770] primary dev: ETH0
I0323 03:42:13.423161  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:42:13.423185  543705 net.go:698] Add success.
I0323 03:42:13.467337  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29163bea-9519-43ff-81a3-db5807192c11","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:42:13.467380  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 03:42:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:42:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0323 03:42:14.455243  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:42:14.456047  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:42:14.456057  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:42:14.456063  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:42:14.456661  543705 disk_worker.go:494] system disk:vda1
I0323 03:42:14.456695  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:42:15.456800  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:42:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:42:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:42:16.457926  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:42:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:42:16.458000  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:42:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:42:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:42:23.409771  543705 memory.go:184] no items to output this cycle
I0323 03:42:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 03:42:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:42:33.409802  543705 memory.go:184] no items to output this cycle
I0323 03:42:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 03:42:37.937670  543705 disk_info.go:125] begin check local disk info of client
I0323 03:42:37.940243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:42:37.940249  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484c00 0xc000484c40]
I0323 03:42:40.053728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:42:40.053734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:42:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:42:43.410719  543705 memory.go:191] Add success.
I0323 03:42:43.409800  543705 cpu.go:282] Add success.
I0323 03:42:43.420446  543705 net.go:648] Add success.
I0323 03:42:43.423018  543705 net.go:770] primary dev: ETH0
I0323 03:42:43.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:42:43.423046  543705 net.go:698] Add success.
I0323 03:42:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:42:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:42:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:42:53.410336  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:42:53.410354  543705 memory.go:184] no items to output this cycle
I0323 03:42:53.410358  543705 cpu.go:275] no items to output this cycle
E0323 03:43:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:43:03.409777  543705 memory.go:184] no items to output this cycle
I0323 03:43:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 03:43:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:43:13.409808  543705 cpu.go:282] Add success.
I0323 03:43:13.409824  543705 memory.go:191] Add success.
W0323 03:43:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:43:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:43:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:43:13.420280  543705 net.go:648] Add success.
I0323 03:43:13.421340  543705 net.go:770] primary dev: ETH0
I0323 03:43:13.421354  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:43:13.421366  543705 net.go:698] Add success.
I0323 03:43:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:43:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:43:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 03:43:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:43:14.456558  543705 disk_worker.go:494] system disk:vda1
I0323 03:43:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:43:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:43:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:43:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:43:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:43:23.409803  543705 memory.go:184] no items to output this cycle
I0323 03:43:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 03:43:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:43:33.409775  543705 memory.go:184] no items to output this cycle
I0323 03:43:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 03:43:37.941674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:43:37.944188  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:43:37.944197  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3bc0 0xc000358000]
E0323 03:43:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:43:43.410658  543705 memory.go:191] Add success.
I0323 03:43:43.409802  543705 cpu.go:282] Add success.
I0323 03:43:43.420360  543705 net.go:648] Add success.
I0323 03:43:43.422891  543705 net.go:770] primary dev: ETH0
I0323 03:43:43.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:43:43.422916  543705 net.go:698] Add success.
I0323 03:43:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:43:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:43:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:43:53.410204  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:43:53.410223  543705 memory.go:184] no items to output this cycle
I0323 03:43:53.410254  543705 cpu.go:275] no items to output this cycle
E0323 03:44:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:44:03.409810  543705 memory.go:184] no items to output this cycle
I0323 03:44:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 03:44:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:44:13.409839  543705 cpu.go:282] Add success.
I0323 03:44:13.409848  543705 memory.go:191] Add success.
W0323 03:44:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:44:13.409901  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:44:13.409906  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:44:13.420309  543705 net.go:648] Add success.
I0323 03:44:13.423096  543705 net.go:770] primary dev: ETH0
I0323 03:44:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:44:13.423122  543705 net.go:698] Add success.
I0323 03:44:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:44:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:44:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 03:44:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:44:14.456540  543705 disk_worker.go:494] system disk:vda1
I0323 03:44:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:44:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:44:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:44:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:44:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:44:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:44:23.409782  543705 memory.go:184] no items to output this cycle
I0323 03:44:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 03:44:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:44:33.409784  543705 memory.go:184] no items to output this cycle
I0323 03:44:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 03:44:37.945674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:44:37.948211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:44:37.948218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003584c0 0xc000358500]
E0323 03:44:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:44:43.410643  543705 memory.go:191] Add success.
I0323 03:44:43.409812  543705 cpu.go:282] Add success.
I0323 03:44:43.420342  543705 net.go:648] Add success.
I0323 03:44:43.423219  543705 net.go:770] primary dev: ETH0
I0323 03:44:43.423233  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:44:43.423245  543705 net.go:698] Add success.
I0323 03:44:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:44:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:44:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:44:53.410381  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:44:53.410405  543705 memory.go:184] no items to output this cycle
I0323 03:44:53.410419  543705 cpu.go:275] no items to output this cycle
E0323 03:45:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:45:03.409781  543705 memory.go:184] no items to output this cycle
I0323 03:45:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 03:45:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:45:13.409797  543705 memory.go:191] Add success.
W0323 03:45:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:45:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:45:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:45:13.409866  543705 cpu.go:282] Add success.
I0323 03:45:13.420393  543705 net.go:648] Add success.
I0323 03:45:13.422924  543705 net.go:770] primary dev: ETH0
I0323 03:45:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:45:13.422951  543705 net.go:698] Add success.
I0323 03:45:13.484834  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"da32add5-9189-406a-a52a-9864aee8da2b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:45:13.484870  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:45:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:45:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:45:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0323 03:45:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:45:14.456679  543705 disk_worker.go:494] system disk:vda1
I0323 03:45:14.456715  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:45:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:45:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:45:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:45:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:45:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:45:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:45:23.409805  543705 memory.go:184] no items to output this cycle
I0323 03:45:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 03:45:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:45:33.409784  543705 memory.go:184] no items to output this cycle
I0323 03:45:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 03:45:37.949672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:45:37.952182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:45:37.952189  543705 disk_info.go:196] parse disk info done, disk is : [0xc000383e40 0xc000383e80]
I0323 03:45:40.056973  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:45:40.056978  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:45:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:45:43.410602  543705 memory.go:191] Add success.
I0323 03:45:43.409816  543705 cpu.go:282] Add success.
I0323 03:45:43.420288  543705 net.go:648] Add success.
I0323 03:45:43.423064  543705 net.go:770] primary dev: ETH0
I0323 03:45:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:45:43.423090  543705 net.go:698] Add success.
I0323 03:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:45:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:45:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:45:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:45:53.409792  543705 memory.go:184] no items to output this cycle
I0323 03:45:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 03:46:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:46:03.409776  543705 memory.go:184] no items to output this cycle
I0323 03:46:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 03:46:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:46:13.409804  543705 memory.go:191] Add success.
W0323 03:46:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:46:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:46:13.409844  543705 cpu.go:282] Add success.
I0323 03:46:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:46:13.420288  543705 net.go:648] Add success.
I0323 03:46:13.422919  543705 net.go:770] primary dev: ETH0
I0323 03:46:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:46:13.422947  543705 net.go:698] Add success.
I0323 03:46:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:46:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:46:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0323 03:46:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:46:14.456625  543705 disk_worker.go:494] system disk:vda1
I0323 03:46:14.456655  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:46:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:46:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:46:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:46:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:46:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:46:23.410234  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:46:23.410251  543705 memory.go:184] no items to output this cycle
I0323 03:46:23.410279  543705 cpu.go:275] no items to output this cycle
E0323 03:46:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:46:33.409809  543705 memory.go:184] no items to output this cycle
I0323 03:46:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 03:46:37.953671  543705 disk_info.go:125] begin check local disk info of client
I0323 03:46:37.956237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:46:37.956243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314cc0 0xc000314d00]
E0323 03:46:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:46:43.410706  543705 memory.go:191] Add success.
I0323 03:46:43.409812  543705 cpu.go:282] Add success.
I0323 03:46:43.420383  543705 net.go:648] Add success.
I0323 03:46:43.423048  543705 net.go:770] primary dev: ETH0
I0323 03:46:43.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:46:43.423075  543705 net.go:698] Add success.
I0323 03:46:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:46:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:46:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:46:53.409788  543705 memory.go:184] no items to output this cycle
I0323 03:46:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 03:47:03.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:47:03.409902  543705 cpu.go:275] no items to output this cycle
I0323 03:47:03.409911  543705 memory.go:184] no items to output this cycle
E0323 03:47:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:47:13.409810  543705 memory.go:191] Add success.
I0323 03:47:13.409812  543705 cpu.go:282] Add success.
W0323 03:47:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:47:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:47:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:47:13.420360  543705 net.go:648] Add success.
I0323 03:47:13.422967  543705 net.go:770] primary dev: ETH0
I0323 03:47:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:47:13.422996  543705 net.go:698] Add success.
I0323 03:47:13.453622  543705 event_worker.go:152] Polling the log file for events...
W0323 03:47:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:47:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 03:47:14.455224  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:47:14.456080  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:47:14.456089  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:47:14.456096  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:47:14.456645  543705 disk_worker.go:494] system disk:vda1
I0323 03:47:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:47:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:47:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:47:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:47:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:47:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:47:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:47:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:47:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:47:23.409774  543705 memory.go:184] no items to output this cycle
I0323 03:47:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 03:47:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:47:33.409787  543705 memory.go:184] no items to output this cycle
I0323 03:47:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 03:47:37.957675  543705 disk_info.go:125] begin check local disk info of client
I0323 03:47:37.960185  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:47:37.960191  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391dc0 0xc000391e00]
E0323 03:47:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:47:43.410652  543705 memory.go:191] Add success.
I0323 03:47:43.409791  543705 cpu.go:282] Add success.
I0323 03:47:43.420341  543705 net.go:648] Add success.
I0323 03:47:43.423030  543705 net.go:770] primary dev: ETH0
I0323 03:47:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:47:43.423054  543705 net.go:698] Add success.
I0323 03:47:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:47:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:47:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:47:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:47:53.409769  543705 memory.go:184] no items to output this cycle
I0323 03:47:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 03:48:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:48:03.409796  543705 memory.go:184] no items to output this cycle
I0323 03:48:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 03:48:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:48:13.409780  543705 memory.go:191] Add success.
W0323 03:48:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 03:48:13.409814  543705 cpu.go:282] Add success.
W0323 03:48:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:48:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:48:13.420186  543705 net.go:648] Add success.
I0323 03:48:13.422850  543705 net.go:770] primary dev: ETH0
I0323 03:48:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:48:13.422886  543705 net.go:698] Add success.
I0323 03:48:13.463436  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fbe3d290-9ce3-476b-b891-2fac81927a94","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:48:13.463470  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:48:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:48:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:48:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 03:48:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:48:14.456637  543705 disk_worker.go:494] system disk:vda1
I0323 03:48:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:48:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:48:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:48:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:48:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:48:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:48:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:48:23.409782  543705 cpu.go:275] no items to output this cycle
I0323 03:48:23.409782  543705 memory.go:184] no items to output this cycle
E0323 03:48:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:48:33.409779  543705 memory.go:184] no items to output this cycle
I0323 03:48:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 03:48:37.961677  543705 disk_info.go:125] begin check local disk info of client
I0323 03:48:37.964177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:48:37.964183  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e5c0 0xc00035e600]
I0323 03:48:40.057722  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:48:40.057728  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:48:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:48:43.410644  543705 memory.go:191] Add success.
I0323 03:48:43.409810  543705 cpu.go:282] Add success.
I0323 03:48:43.420346  543705 net.go:648] Add success.
I0323 03:48:43.422988  543705 net.go:770] primary dev: ETH0
I0323 03:48:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:48:43.423012  543705 net.go:698] Add success.
I0323 03:48:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:48:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:48:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:48:53.410399  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:48:53.410420  543705 memory.go:184] no items to output this cycle
I0323 03:48:53.410432  543705 cpu.go:275] no items to output this cycle
E0323 03:49:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:49:03.409799  543705 memory.go:184] no items to output this cycle
I0323 03:49:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 03:49:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:49:13.409783  543705 memory.go:191] Add success.
I0323 03:49:13.409805  543705 cpu.go:282] Add success.
W0323 03:49:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:49:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:49:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:49:13.420141  543705 net.go:648] Add success.
I0323 03:49:13.422924  543705 net.go:770] primary dev: ETH0
I0323 03:49:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:49:13.422951  543705 net.go:698] Add success.
I0323 03:49:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:49:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:49:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0323 03:49:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:49:14.456655  543705 disk_worker.go:494] system disk:vda1
I0323 03:49:14.456686  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:49:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:49:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:49:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:49:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:49:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:49:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:49:23.409772  543705 memory.go:184] no items to output this cycle
I0323 03:49:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 03:49:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:49:33.409801  543705 memory.go:184] no items to output this cycle
I0323 03:49:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 03:49:37.965672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:49:37.968253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:49:37.968259  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264900 0xc000264940]
E0323 03:49:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:49:43.410773  543705 memory.go:191] Add success.
I0323 03:49:43.409797  543705 cpu.go:282] Add success.
I0323 03:49:43.420499  543705 net.go:648] Add success.
I0323 03:49:43.423306  543705 net.go:770] primary dev: ETH0
I0323 03:49:43.423323  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:49:43.423337  543705 net.go:698] Add success.
I0323 03:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:49:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:49:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:49:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:49:53.409806  543705 memory.go:184] no items to output this cycle
I0323 03:49:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 03:50:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:50:03.409777  543705 memory.go:184] no items to output this cycle
I0323 03:50:03.409779  543705 cpu.go:275] no items to output this cycle
E0323 03:50:13.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:50:13.409932  543705 memory.go:191] Add success.
I0323 03:50:13.409909  543705 cpu.go:282] Add success.
W0323 03:50:13.409970  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:50:13.409990  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:50:13.410005  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:50:13.419811  543705 net.go:648] Add success.
I0323 03:50:13.422576  543705 net.go:770] primary dev: ETH0
I0323 03:50:13.422593  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:50:13.422610  543705 net.go:698] Add success.
I0323 03:50:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:50:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:50:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 03:50:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:50:14.456544  543705 disk_worker.go:494] system disk:vda1
I0323 03:50:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:50:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:50:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:50:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:50:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:50:23.409828  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:50:23.409847  543705 memory.go:184] no items to output this cycle
I0323 03:50:23.409859  543705 cpu.go:275] no items to output this cycle
E0323 03:50:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:50:33.409767  543705 memory.go:184] no items to output this cycle
I0323 03:50:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 03:50:37.969677  543705 disk_info.go:125] begin check local disk info of client
I0323 03:50:37.972193  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:50:37.972201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa000 0xc0002aa040]
E0323 03:50:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:50:43.410556  543705 memory.go:191] Add success.
I0323 03:50:43.409801  543705 cpu.go:282] Add success.
I0323 03:50:43.420258  543705 net.go:648] Add success.
I0323 03:50:43.423641  543705 net.go:770] primary dev: ETH0
I0323 03:50:43.423654  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:50:43.423666  543705 net.go:698] Add success.
I0323 03:50:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:50:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:50:53.409783  543705 memory.go:184] no items to output this cycle
I0323 03:50:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 03:51:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:51:03.409776  543705 memory.go:184] no items to output this cycle
I0323 03:51:03.409783  543705 cpu.go:275] no items to output this cycle
E0323 03:51:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:51:13.409789  543705 cpu.go:282] Add success.
I0323 03:51:13.409790  543705 memory.go:191] Add success.
W0323 03:51:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:51:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:51:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:51:13.420508  543705 net.go:648] Add success.
I0323 03:51:13.423277  543705 net.go:770] primary dev: ETH0
I0323 03:51:13.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:51:13.423305  543705 net.go:698] Add success.
I0323 03:51:13.467506  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"adfff2d5-232a-4f25-a559-944dad8b5018","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:51:13.467536  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:51:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:51:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:51:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0323 03:51:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:51:14.456676  543705 disk_worker.go:494] system disk:vda1
I0323 03:51:14.456711  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:51:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:51:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:51:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:51:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:51:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:51:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:51:23.409805  543705 memory.go:184] no items to output this cycle
I0323 03:51:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 03:51:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:51:33.409767  543705 memory.go:184] no items to output this cycle
I0323 03:51:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 03:51:37.973674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:51:37.976201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:51:37.976207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa2c0 0xc0002aa300]
I0323 03:51:40.061001  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:51:40.061007  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:51:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:51:43.410763  543705 memory.go:191] Add success.
I0323 03:51:43.409802  543705 cpu.go:282] Add success.
I0323 03:51:43.420466  543705 net.go:648] Add success.
I0323 03:51:43.423050  543705 net.go:770] primary dev: ETH0
I0323 03:51:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:51:43.423078  543705 net.go:698] Add success.
I0323 03:51:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:51:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:51:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:51:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:51:53.409779  543705 memory.go:184] no items to output this cycle
I0323 03:51:53.409778  543705 cpu.go:275] no items to output this cycle
E0323 03:52:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:52:03.409769  543705 memory.go:184] no items to output this cycle
I0323 03:52:03.409788  543705 cpu.go:275] no items to output this cycle
W0323 03:52:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:52:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:52:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:52:13.409825  543705 cpu.go:282] Add success.
E0323 03:52:13.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:52:13.409847  543705 memory.go:191] Add success.
I0323 03:52:13.420140  543705 net.go:648] Add success.
I0323 03:52:13.422928  543705 net.go:770] primary dev: ETH0
I0323 03:52:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:52:13.422951  543705 net.go:698] Add success.
W0323 03:52:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:52:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0323 03:52:14.455226  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:52:14.456066  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:52:14.456076  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:52:14.456083  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:52:14.456647  543705 disk_worker.go:494] system disk:vda1
I0323 03:52:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:52:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:52:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:52:16.457898  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:52:16.457898  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:52:16.457956  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:52:16.457975  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:52:16.472286  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:52:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:52:23.409781  543705 memory.go:184] no items to output this cycle
I0323 03:52:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 03:52:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:52:33.409801  543705 memory.go:184] no items to output this cycle
I0323 03:52:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 03:52:37.977670  543705 disk_info.go:125] begin check local disk info of client
I0323 03:52:37.980270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:52:37.980276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ab9c0 0xc0002aba00]
E0323 03:52:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:52:43.410693  543705 memory.go:191] Add success.
I0323 03:52:43.409794  543705 cpu.go:282] Add success.
I0323 03:52:43.420389  543705 net.go:648] Add success.
I0323 03:52:43.422995  543705 net.go:770] primary dev: ETH0
I0323 03:52:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:52:43.423020  543705 net.go:698] Add success.
I0323 03:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:52:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:52:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:52:53.409807  543705 memory.go:184] no items to output this cycle
I0323 03:52:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 03:53:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:53:03.409796  543705 memory.go:184] no items to output this cycle
I0323 03:53:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 03:53:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:53:13.409785  543705 memory.go:191] Add success.
I0323 03:53:13.409802  543705 cpu.go:282] Add success.
W0323 03:53:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:53:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:53:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:53:13.420282  543705 net.go:648] Add success.
I0323 03:53:13.423046  543705 net.go:770] primary dev: ETH0
I0323 03:53:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:53:13.423071  543705 net.go:698] Add success.
I0323 03:53:14.453970  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:53:14.454166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:53:14.454249  543705 disk_worker.go:708] disk space is not compliant
W0323 03:53:14.454252  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:53:14.455658  543705 disk_worker.go:494] system disk:vda1
I0323 03:53:14.455690  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:53:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:53:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:53:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:53:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:53:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:53:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:53:23.409803  543705 memory.go:184] no items to output this cycle
I0323 03:53:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 03:53:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:53:33.409800  543705 memory.go:184] no items to output this cycle
I0323 03:53:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 03:53:37.981683  543705 disk_info.go:125] begin check local disk info of client
I0323 03:53:37.984250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:53:37.984255  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047be00 0xc00047be40]
E0323 03:53:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:53:43.410627  543705 memory.go:191] Add success.
I0323 03:53:43.409793  543705 cpu.go:282] Add success.
I0323 03:53:43.420329  543705 net.go:648] Add success.
I0323 03:53:43.422871  543705 net.go:770] primary dev: ETH0
I0323 03:53:43.422886  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:53:43.422901  543705 net.go:698] Add success.
I0323 03:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:53:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:53:53.410363  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:53:53.410372  543705 cpu.go:275] no items to output this cycle
I0323 03:53:53.410378  543705 memory.go:184] no items to output this cycle
E0323 03:54:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:54:03.409768  543705 memory.go:184] no items to output this cycle
I0323 03:54:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 03:54:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:54:13.409791  543705 memory.go:191] Add success.
I0323 03:54:13.409794  543705 cpu.go:282] Add success.
W0323 03:54:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:54:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:54:13.420255  543705 net.go:648] Add success.
I0323 03:54:13.422775  543705 net.go:770] primary dev: ETH0
I0323 03:54:13.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:54:13.422803  543705 net.go:698] Add success.
I0323 03:54:13.462815  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e72eedf8-fa06-4060-bc3c-2ef6606e7c32","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:54:13.462846  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 03:54:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:54:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:54:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 03:54:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:54:14.456604  543705 disk_worker.go:494] system disk:vda1
I0323 03:54:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:54:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:54:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:54:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:54:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:54:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:54:23.409774  543705 memory.go:184] no items to output this cycle
I0323 03:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 03:54:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:54:33.409775  543705 memory.go:184] no items to output this cycle
I0323 03:54:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 03:54:37.985673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:54:37.988212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:54:37.988218  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047bd00 0xc00047bd40]
I0323 03:54:40.061723  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:54:40.061728  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:54:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:54:43.410629  543705 memory.go:191] Add success.
I0323 03:54:43.409814  543705 cpu.go:282] Add success.
I0323 03:54:43.420311  543705 net.go:648] Add success.
I0323 03:54:43.422958  543705 net.go:770] primary dev: ETH0
I0323 03:54:43.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:54:43.422999  543705 net.go:698] Add success.
I0323 03:54:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:54:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:54:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:54:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:54:53.409790  543705 memory.go:184] no items to output this cycle
I0323 03:54:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 03:55:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:55:03.409782  543705 cpu.go:275] no items to output this cycle
I0323 03:55:03.409786  543705 memory.go:184] no items to output this cycle
E0323 03:55:13.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:55:13.409885  543705 memory.go:191] Add success.
W0323 03:55:13.409915  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:55:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:55:13.409935  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:55:13.409943  543705 cpu.go:282] Add success.
I0323 03:55:13.419726  543705 net.go:648] Add success.
I0323 03:55:13.422419  543705 net.go:770] primary dev: ETH0
I0323 03:55:13.422432  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:55:13.422444  543705 net.go:698] Add success.
I0323 03:55:14.454997  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:55:14.455241  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:55:14.455255  543705 disk_worker.go:708] disk space is not compliant
W0323 03:55:14.455258  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:55:14.456670  543705 disk_worker.go:494] system disk:vda1
I0323 03:55:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:55:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:55:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:55:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:55:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:55:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:55:23.409783  543705 memory.go:184] no items to output this cycle
I0323 03:55:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 03:55:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:55:33.409803  543705 memory.go:184] no items to output this cycle
I0323 03:55:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 03:55:37.989674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:55:37.992255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:55:37.992263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0323 03:55:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:55:43.410697  543705 memory.go:191] Add success.
I0323 03:55:43.409796  543705 cpu.go:282] Add success.
I0323 03:55:43.420394  543705 net.go:648] Add success.
I0323 03:55:43.423206  543705 net.go:770] primary dev: ETH0
I0323 03:55:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:55:43.423236  543705 net.go:698] Add success.
I0323 03:55:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:55:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:55:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:55:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:55:53.409804  543705 memory.go:184] no items to output this cycle
I0323 03:55:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 03:56:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:56:03.409802  543705 memory.go:184] no items to output this cycle
I0323 03:56:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 03:56:13.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:56:13.409860  543705 memory.go:191] Add success.
W0323 03:56:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:56:13.409899  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:56:13.409907  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:56:13.409925  543705 cpu.go:282] Add success.
I0323 03:56:13.419727  543705 net.go:648] Add success.
I0323 03:56:13.422305  543705 net.go:770] primary dev: ETH0
I0323 03:56:13.422320  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:56:13.422333  543705 net.go:698] Add success.
I0323 03:56:14.454999  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:56:14.455231  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:56:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0323 03:56:14.455246  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:56:14.456642  543705 disk_worker.go:494] system disk:vda1
I0323 03:56:14.456675  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:56:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:56:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:56:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:56:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:56:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:56:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:56:23.409780  543705 memory.go:184] no items to output this cycle
I0323 03:56:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 03:56:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:56:33.409778  543705 memory.go:184] no items to output this cycle
I0323 03:56:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 03:56:37.993673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:56:37.996215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:56:37.996221  543705 disk_info.go:196] parse disk info done, disk is : [0xc000530840 0xc000530880]
E0323 03:56:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:56:43.410667  543705 memory.go:191] Add success.
I0323 03:56:43.409804  543705 cpu.go:282] Add success.
I0323 03:56:43.420403  543705 net.go:648] Add success.
I0323 03:56:43.423064  543705 net.go:770] primary dev: ETH0
I0323 03:56:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:56:43.423090  543705 net.go:698] Add success.
I0323 03:56:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:56:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:56:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:56:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:56:53.409789  543705 memory.go:184] no items to output this cycle
I0323 03:56:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 03:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:57:03.409777  543705 memory.go:184] no items to output this cycle
I0323 03:57:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 03:57:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:57:13.409918  543705 memory.go:191] Add success.
I0323 03:57:13.409922  543705 cpu.go:282] Add success.
W0323 03:57:13.409950  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:57:13.409969  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:57:13.409974  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:57:13.419714  543705 net.go:648] Add success.
I0323 03:57:13.422248  543705 net.go:770] primary dev: ETH0
I0323 03:57:13.422261  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:57:13.422273  543705 net.go:698] Add success.
I0323 03:57:13.428301  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 03:57:13.453527  543705 event_worker.go:152] Polling the log file for events...
I0323 03:57:13.934605  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f8ac1151-52d5-4e19-a91f-a5e36a1bed01","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 03:57:13.934637  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 03:57:14.454268  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:57:14.454344  543705 disk_worker.go:708] disk space is not compliant
W0323 03:57:14.454348  543705 disk_worker.go:728] disk inode is not compliant
E0323 03:57:14.455202  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 03:57:14.455212  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 03:57:14.455219  543705 custom_config.go:64] query custom config with name: gpu
I0323 03:57:14.455938  543705 disk_worker.go:494] system disk:vda1
I0323 03:57:14.455971  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 03:57:15.456820  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 03:57:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:57:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 03:57:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 03:57:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:57:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:57:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:57:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:57:23.409805  543705 memory.go:184] no items to output this cycle
I0323 03:57:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 03:57:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:57:33.409775  543705 memory.go:184] no items to output this cycle
I0323 03:57:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 03:57:37.997673  543705 disk_info.go:125] begin check local disk info of client
I0323 03:57:38.000216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:57:38.000222  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003afac0 0xc0003afb00]
I0323 03:57:40.065009  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 03:57:40.065016  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 03:57:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:57:43.410591  543705 memory.go:191] Add success.
I0323 03:57:43.409795  543705 cpu.go:282] Add success.
I0323 03:57:43.420372  543705 net.go:648] Add success.
I0323 03:57:43.422800  543705 net.go:770] primary dev: ETH0
I0323 03:57:43.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:57:43.422825  543705 net.go:698] Add success.
I0323 03:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:57:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:57:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:57:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:57:53.409779  543705 memory.go:184] no items to output this cycle
I0323 03:57:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 03:58:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:58:03.409793  543705 memory.go:184] no items to output this cycle
I0323 03:58:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 03:58:13.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:58:13.409899  543705 cpu.go:282] Add success.
I0323 03:58:13.409911  543705 memory.go:191] Add success.
W0323 03:58:13.409946  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:58:13.410043  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:58:13.410052  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:58:13.419709  543705 net.go:648] Add success.
I0323 03:58:13.422162  543705 net.go:770] primary dev: ETH0
I0323 03:58:13.422175  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:58:13.422186  543705 net.go:698] Add success.
I0323 03:58:14.455086  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:58:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:58:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 03:58:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:58:14.456576  543705 disk_worker.go:494] system disk:vda1
I0323 03:58:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:58:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:58:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:58:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:58:16.472474  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:58:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:58:23.409782  543705 memory.go:184] no items to output this cycle
I0323 03:58:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 03:58:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:58:33.409774  543705 memory.go:184] no items to output this cycle
I0323 03:58:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 03:58:38.001674  543705 disk_info.go:125] begin check local disk info of client
I0323 03:58:38.004235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:58:38.004242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328040 0xc000328080]
E0323 03:58:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:58:43.410572  543705 memory.go:191] Add success.
I0323 03:58:43.409798  543705 cpu.go:282] Add success.
I0323 03:58:43.420295  543705 net.go:648] Add success.
I0323 03:58:43.422879  543705 net.go:770] primary dev: ETH0
I0323 03:58:43.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:58:43.422905  543705 net.go:698] Add success.
I0323 03:58:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:58:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:58:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:58:53.410226  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:58:53.410246  543705 memory.go:184] no items to output this cycle
I0323 03:58:53.410251  543705 cpu.go:275] no items to output this cycle
E0323 03:59:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:59:03.409775  543705 memory.go:184] no items to output this cycle
I0323 03:59:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 03:59:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:59:13.409819  543705 memory.go:191] Add success.
I0323 03:59:13.409819  543705 cpu.go:282] Add success.
W0323 03:59:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 03:59:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 03:59:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 03:59:13.419709  543705 net.go:648] Add success.
I0323 03:59:13.422610  543705 net.go:770] primary dev: ETH0
I0323 03:59:13.422624  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:59:13.422652  543705 net.go:698] Add success.
I0323 03:59:14.453942  543705 custom_config.go:64] query custom config with name: gpu
W0323 03:59:14.455276  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 03:59:14.455291  543705 disk_worker.go:708] disk space is not compliant
W0323 03:59:14.455296  543705 disk_worker.go:728] disk inode is not compliant
I0323 03:59:14.457371  543705 disk_worker.go:494] system disk:vda1
I0323 03:59:14.457419  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 03:59:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 03:59:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:59:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:59:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0323 03:59:16.472579  543705 disk_local_worker.go:436] Get disk info: []
E0323 03:59:23.410458  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:59:23.410479  543705 memory.go:184] no items to output this cycle
I0323 03:59:23.410492  543705 cpu.go:275] no items to output this cycle
E0323 03:59:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:59:33.409779  543705 memory.go:184] no items to output this cycle
I0323 03:59:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 03:59:38.005672  543705 disk_info.go:125] begin check local disk info of client
I0323 03:59:38.008236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 03:59:38.008242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329280 0xc0003292c0]
E0323 03:59:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:59:43.410640  543705 memory.go:191] Add success.
I0323 03:59:43.409814  543705 cpu.go:282] Add success.
I0323 03:59:43.420358  543705 net.go:648] Add success.
I0323 03:59:43.423000  543705 net.go:770] primary dev: ETH0
I0323 03:59:43.423013  543705 net.go:802] Send network stats successfully!,count is 6
I0323 03:59:43.423026  543705 net.go:698] Add success.
I0323 03:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 03:59:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 03:59:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 03:59:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 03:59:53.409783  543705 memory.go:184] no items to output this cycle
I0323 03:59:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 04:00:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:00:03.409800  543705 memory.go:184] no items to output this cycle
I0323 04:00:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 04:00:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:00:13.409780  543705 memory.go:191] Add success.
W0323 04:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:00:13.409808  543705 cpu.go:282] Add success.
W0323 04:00:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:00:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:00:13.420275  543705 net.go:648] Add success.
I0323 04:00:13.422831  543705 net.go:770] primary dev: ETH0
I0323 04:00:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:00:13.422855  543705 net.go:698] Add success.
I0323 04:00:13.556028  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"12b3b341-e0a8-4ec9-b729-79ea9da0eab0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:00:13.556060  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:00:14.453937  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:00:14.455260  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:00:14.455272  543705 disk_worker.go:708] disk space is not compliant
W0323 04:00:14.455275  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:00:14.456764  543705 disk_worker.go:494] system disk:vda1
I0323 04:00:14.456798  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:00:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:00:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:00:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:00:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:00:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:00:23.409772  543705 memory.go:184] no items to output this cycle
I0323 04:00:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 04:00:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:00:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 04:00:33.409797  543705 memory.go:184] no items to output this cycle
I0323 04:00:38.009674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:00:38.012262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:00:38.012268  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c52c0 0xc0000c5300]
I0323 04:00:40.065733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:00:40.065740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:00:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:00:43.410581  543705 memory.go:191] Add success.
I0323 04:00:43.409812  543705 cpu.go:282] Add success.
I0323 04:00:43.420331  543705 net.go:648] Add success.
I0323 04:00:43.422995  543705 net.go:770] primary dev: ETH0
I0323 04:00:43.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:00:43.423021  543705 net.go:698] Add success.
I0323 04:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:00:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:00:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:00:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:00:53.409788  543705 memory.go:184] no items to output this cycle
I0323 04:00:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 04:01:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:01:03.409809  543705 memory.go:184] no items to output this cycle
I0323 04:01:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 04:01:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:01:13.409822  543705 memory.go:191] Add success.
I0323 04:01:13.409831  543705 cpu.go:282] Add success.
W0323 04:01:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:01:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:01:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:01:13.420162  543705 net.go:648] Add success.
I0323 04:01:13.422824  543705 net.go:770] primary dev: ETH0
I0323 04:01:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:01:13.422851  543705 net.go:698] Add success.
I0323 04:01:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:01:14.455743  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:01:14.455763  543705 disk_worker.go:708] disk space is not compliant
W0323 04:01:14.455768  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:01:14.459129  543705 disk_worker.go:494] system disk:vda1
I0323 04:01:14.459170  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:01:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:01:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:01:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:01:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:01:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:01:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:01:23.409772  543705 memory.go:184] no items to output this cycle
I0323 04:01:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 04:01:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:01:33.409805  543705 memory.go:184] no items to output this cycle
I0323 04:01:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 04:01:38.013673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:01:38.016278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:01:38.016293  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f40 0xc0000c4f80]
E0323 04:01:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:01:43.410738  543705 memory.go:191] Add success.
I0323 04:01:43.409798  543705 cpu.go:282] Add success.
I0323 04:01:43.420446  543705 net.go:648] Add success.
I0323 04:01:43.423086  543705 net.go:770] primary dev: ETH0
I0323 04:01:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:01:43.423116  543705 net.go:698] Add success.
I0323 04:01:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:01:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:01:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:01:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:01:53.409777  543705 memory.go:184] no items to output this cycle
I0323 04:01:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:02:03.409775  543705 memory.go:184] no items to output this cycle
I0323 04:02:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 04:02:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:02:13.409807  543705 memory.go:191] Add success.
I0323 04:02:13.409816  543705 cpu.go:282] Add success.
W0323 04:02:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:02:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:02:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:02:13.420160  543705 net.go:648] Add success.
I0323 04:02:13.423015  543705 net.go:770] primary dev: ETH0
I0323 04:02:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:02:13.423044  543705 net.go:698] Add success.
W0323 04:02:14.455256  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:02:14.455270  543705 disk_worker.go:708] disk space is not compliant
W0323 04:02:14.455273  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:02:14.456733  543705 disk_worker.go:494] system disk:vda1
I0323 04:02:14.456783  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:02:14.457607  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:02:14.457616  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:02:14.457622  543705 custom_config.go:64] query custom config with name: gpu
E0323 04:02:15.457011  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:02:15.457025  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:02:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:02:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:02:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:02:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:02:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:02:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:02:23.409803  543705 memory.go:184] no items to output this cycle
I0323 04:02:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 04:02:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:02:33.409774  543705 memory.go:184] no items to output this cycle
I0323 04:02:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 04:02:38.017673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:02:38.020208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:02:38.020214  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a100 0xc00047a140]
E0323 04:02:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:02:43.410614  543705 memory.go:191] Add success.
I0323 04:02:43.409796  543705 cpu.go:282] Add success.
I0323 04:02:43.420301  543705 net.go:648] Add success.
I0323 04:02:43.422784  543705 net.go:770] primary dev: ETH0
I0323 04:02:43.422799  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:02:43.422813  543705 net.go:698] Add success.
I0323 04:02:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:02:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:02:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:02:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:02:53.409786  543705 cpu.go:275] no items to output this cycle
I0323 04:02:53.409799  543705 memory.go:184] no items to output this cycle
E0323 04:03:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:03:03.409797  543705 memory.go:184] no items to output this cycle
I0323 04:03:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 04:03:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:03:13.409796  543705 memory.go:191] Add success.
I0323 04:03:13.409798  543705 cpu.go:282] Add success.
W0323 04:03:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:03:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:03:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:03:13.420280  543705 net.go:648] Add success.
I0323 04:03:13.423145  543705 net.go:770] primary dev: ETH0
I0323 04:03:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:03:13.423170  543705 net.go:698] Add success.
I0323 04:03:13.463363  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"111ff723-10f3-4ae2-bf90-900daf44e96a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:03:13.463395  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:03:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:03:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:03:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0323 04:03:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:03:14.456645  543705 disk_worker.go:494] system disk:vda1
I0323 04:03:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:03:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:03:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:03:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:03:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:03:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:03:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:03:23.409780  543705 memory.go:184] no items to output this cycle
I0323 04:03:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 04:03:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:03:33.409776  543705 memory.go:184] no items to output this cycle
I0323 04:03:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 04:03:38.021676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:03:38.024236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:03:38.024242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f78c0 0xc0004f7900]
I0323 04:03:40.069015  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:03:40.069021  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:03:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:03:43.410520  543705 memory.go:191] Add success.
I0323 04:03:43.409826  543705 cpu.go:282] Add success.
I0323 04:03:43.420210  543705 net.go:648] Add success.
I0323 04:03:43.422701  543705 net.go:770] primary dev: ETH0
I0323 04:03:43.422716  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:03:43.422731  543705 net.go:698] Add success.
I0323 04:03:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:03:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:03:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:03:53.409780  543705 cpu.go:275] no items to output this cycle
I0323 04:03:53.409789  543705 memory.go:184] no items to output this cycle
E0323 04:04:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:04:03.409801  543705 memory.go:184] no items to output this cycle
I0323 04:04:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 04:04:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:04:13.409796  543705 memory.go:191] Add success.
I0323 04:04:13.409798  543705 cpu.go:282] Add success.
W0323 04:04:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:04:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:04:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:04:13.420172  543705 net.go:648] Add success.
I0323 04:04:13.423025  543705 net.go:770] primary dev: ETH0
I0323 04:04:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:04:13.423056  543705 net.go:698] Add success.
I0323 04:04:14.455039  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:04:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:04:14.455279  543705 disk_worker.go:708] disk space is not compliant
W0323 04:04:14.455284  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:04:14.457482  543705 disk_worker.go:494] system disk:vda1
I0323 04:04:14.457535  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:04:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:04:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:04:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:04:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:04:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:04:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:04:23.409771  543705 memory.go:184] no items to output this cycle
I0323 04:04:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 04:04:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:04:33.409782  543705 memory.go:184] no items to output this cycle
I0323 04:04:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 04:04:38.025673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:04:38.028234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:04:38.028240  543705 disk_info.go:196] parse disk info done, disk is : [0xc000385bc0 0xc000385c00]
E0323 04:04:43.410280  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:04:43.411083  543705 memory.go:191] Add success.
I0323 04:04:43.410331  543705 cpu.go:282] Add success.
I0323 04:04:43.419771  543705 net.go:648] Add success.
I0323 04:04:43.422371  543705 net.go:770] primary dev: ETH0
I0323 04:04:43.422385  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:04:43.422400  543705 net.go:698] Add success.
I0323 04:04:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:04:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:04:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:04:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:04:53.410255  543705 memory.go:184] no items to output this cycle
I0323 04:04:53.410285  543705 cpu.go:275] no items to output this cycle
E0323 04:05:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:05:03.409801  543705 memory.go:184] no items to output this cycle
I0323 04:05:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 04:05:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:05:13.409781  543705 memory.go:191] Add success.
W0323 04:05:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:05:13.409806  543705 cpu.go:282] Add success.
W0323 04:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:05:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:05:13.420135  543705 net.go:648] Add success.
I0323 04:05:13.422827  543705 net.go:770] primary dev: ETH0
I0323 04:05:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:05:13.422857  543705 net.go:698] Add success.
I0323 04:05:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:05:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:05:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 04:05:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:05:14.456570  543705 disk_worker.go:494] system disk:vda1
I0323 04:05:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:05:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:05:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:05:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:05:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:05:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:05:23.409787  543705 memory.go:184] no items to output this cycle
I0323 04:05:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:05:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:05:33.409789  543705 memory.go:184] no items to output this cycle
I0323 04:05:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 04:05:38.029673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:05:38.032235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:05:38.032242  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aa40 0xc00007aac0]
E0323 04:05:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:05:43.410565  543705 memory.go:191] Add success.
I0323 04:05:43.409809  543705 cpu.go:282] Add success.
I0323 04:05:43.420259  543705 net.go:648] Add success.
I0323 04:05:43.422844  543705 net.go:770] primary dev: ETH0
I0323 04:05:43.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:05:43.422874  543705 net.go:698] Add success.
I0323 04:05:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:05:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:05:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:05:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:05:53.409805  543705 memory.go:184] no items to output this cycle
I0323 04:05:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 04:06:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:06:03.409766  543705 memory.go:184] no items to output this cycle
I0323 04:06:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 04:06:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:06:13.409823  543705 memory.go:191] Add success.
I0323 04:06:13.409827  543705 cpu.go:282] Add success.
W0323 04:06:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:06:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:06:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:06:13.420164  543705 net.go:648] Add success.
I0323 04:06:13.422975  543705 net.go:770] primary dev: ETH0
I0323 04:06:13.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:06:13.423015  543705 net.go:698] Add success.
I0323 04:06:13.469986  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9d87936-145d-43ba-909c-16f03a65c4d6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:06:13.470018  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:06:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:06:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:06:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 04:06:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:06:14.456622  543705 disk_worker.go:494] system disk:vda1
I0323 04:06:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:06:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:06:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:06:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:06:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:06:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:06:23.409775  543705 memory.go:184] no items to output this cycle
I0323 04:06:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 04:06:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:06:33.409770  543705 memory.go:184] no items to output this cycle
I0323 04:06:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 04:06:38.033674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:06:38.036289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:06:38.036296  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047bbc0 0xc00047bc00]
I0323 04:06:40.069737  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:06:40.069743  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:06:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:06:43.410579  543705 memory.go:191] Add success.
I0323 04:06:43.409799  543705 cpu.go:282] Add success.
I0323 04:06:43.420291  543705 net.go:648] Add success.
I0323 04:06:43.422753  543705 net.go:770] primary dev: ETH0
I0323 04:06:43.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:06:43.422780  543705 net.go:698] Add success.
I0323 04:06:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:06:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:06:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:06:53.409787  543705 memory.go:184] no items to output this cycle
I0323 04:06:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 04:07:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:07:03.409785  543705 memory.go:184] no items to output this cycle
I0323 04:07:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 04:07:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:07:13.409789  543705 memory.go:191] Add success.
I0323 04:07:13.409808  543705 cpu.go:282] Add success.
W0323 04:07:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:07:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:07:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:07:13.420110  543705 net.go:648] Add success.
I0323 04:07:13.422711  543705 net.go:770] primary dev: ETH0
I0323 04:07:13.422727  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:07:13.422742  543705 net.go:698] Add success.
I0323 04:07:13.453270  543705 event_worker.go:152] Polling the log file for events...
W0323 04:07:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:07:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 04:07:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0323 04:07:14.456124  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:07:14.456135  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:07:14.456141  543705 custom_config.go:64] query custom config with name: gpu
I0323 04:07:14.456451  543705 disk_worker.go:494] system disk:vda1
I0323 04:07:14.456482  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:07:15.457015  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:07:15.457029  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:07:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:07:16.457987  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:07:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:07:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:07:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:07:23.409795  543705 memory.go:184] no items to output this cycle
I0323 04:07:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 04:07:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:07:33.409771  543705 memory.go:184] no items to output this cycle
I0323 04:07:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 04:07:38.037674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:07:38.040216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:07:38.040222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047acc0 0xc00047ad00]
E0323 04:07:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:07:43.410618  543705 memory.go:191] Add success.
I0323 04:07:43.409794  543705 cpu.go:282] Add success.
I0323 04:07:43.420317  543705 net.go:648] Add success.
I0323 04:07:43.422889  543705 net.go:770] primary dev: ETH0
I0323 04:07:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:07:43.422922  543705 net.go:698] Add success.
I0323 04:07:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:07:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:07:53.410527  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:07:53.410545  543705 memory.go:184] no items to output this cycle
I0323 04:07:53.410603  543705 cpu.go:275] no items to output this cycle
E0323 04:08:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:08:03.409797  543705 memory.go:184] no items to output this cycle
I0323 04:08:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 04:08:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:08:13.409823  543705 memory.go:191] Add success.
I0323 04:08:13.409829  543705 cpu.go:282] Add success.
W0323 04:08:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:08:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:08:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:08:13.420054  543705 net.go:648] Add success.
I0323 04:08:13.422754  543705 net.go:770] primary dev: ETH0
I0323 04:08:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:08:13.422780  543705 net.go:698] Add success.
I0323 04:08:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:08:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:08:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 04:08:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:08:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 04:08:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:08:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:08:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:08:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:08:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:08:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:08:23.409778  543705 memory.go:184] no items to output this cycle
I0323 04:08:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 04:08:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:08:33.409773  543705 memory.go:184] no items to output this cycle
I0323 04:08:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 04:08:38.041676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:08:38.044213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:08:38.044219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa4c0 0xc0002aa500]
E0323 04:08:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:08:43.410606  543705 memory.go:191] Add success.
I0323 04:08:43.409813  543705 cpu.go:282] Add success.
I0323 04:08:43.420300  543705 net.go:648] Add success.
I0323 04:08:43.422697  543705 net.go:770] primary dev: ETH0
I0323 04:08:43.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:08:43.422722  543705 net.go:698] Add success.
I0323 04:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:08:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:08:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:08:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:08:53.409788  543705 memory.go:184] no items to output this cycle
I0323 04:08:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 04:09:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:09:03.409802  543705 memory.go:184] no items to output this cycle
I0323 04:09:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:09:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:09:13.409781  543705 memory.go:191] Add success.
W0323 04:09:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:09:13.409812  543705 cpu.go:282] Add success.
W0323 04:09:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:09:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:09:13.420134  543705 net.go:648] Add success.
I0323 04:09:13.422907  543705 net.go:770] primary dev: ETH0
I0323 04:09:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:09:13.422932  543705 net.go:698] Add success.
I0323 04:09:13.468934  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"682d0d8f-1bbe-4cca-a0db-13a0c4a95a2b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:09:13.468972  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:09:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:09:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:09:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 04:09:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:09:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 04:09:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:09:15.455990  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:09:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:09:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:09:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:09:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:09:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:09:23.409796  543705 cpu.go:275] no items to output this cycle
I0323 04:09:23.409798  543705 memory.go:184] no items to output this cycle
E0323 04:09:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:09:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 04:09:33.409798  543705 memory.go:184] no items to output this cycle
I0323 04:09:38.045680  543705 disk_info.go:125] begin check local disk info of client
I0323 04:09:38.048252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:09:38.048258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262280 0xc0002622c0]
I0323 04:09:40.073056  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:09:40.073063  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:09:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:09:43.410653  543705 memory.go:191] Add success.
I0323 04:09:43.409819  543705 cpu.go:282] Add success.
I0323 04:09:43.420360  543705 net.go:648] Add success.
I0323 04:09:43.423108  543705 net.go:770] primary dev: ETH0
I0323 04:09:43.423121  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:09:43.423133  543705 net.go:698] Add success.
I0323 04:09:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:09:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:09:53.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:09:53.409848  543705 cpu.go:275] no items to output this cycle
I0323 04:09:53.409855  543705 memory.go:184] no items to output this cycle
E0323 04:10:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:10:03.409789  543705 memory.go:184] no items to output this cycle
I0323 04:10:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 04:10:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:10:13.409796  543705 memory.go:191] Add success.
W0323 04:10:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:10:13.409831  543705 cpu.go:282] Add success.
W0323 04:10:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:10:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:10:13.420113  543705 net.go:648] Add success.
I0323 04:10:13.422950  543705 net.go:770] primary dev: ETH0
I0323 04:10:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:10:13.422975  543705 net.go:698] Add success.
I0323 04:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:10:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:10:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 04:10:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:10:14.456565  543705 disk_worker.go:494] system disk:vda1
I0323 04:10:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:10:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:10:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:10:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:10:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:10:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:10:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:10:23.409802  543705 memory.go:184] no items to output this cycle
I0323 04:10:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 04:10:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:10:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:10:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 04:10:38.049675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:10:38.052220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:10:38.052226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b400 0xc00047b440]
E0323 04:10:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:10:43.410640  543705 memory.go:191] Add success.
I0323 04:10:43.409796  543705 cpu.go:282] Add success.
I0323 04:10:43.420367  543705 net.go:648] Add success.
I0323 04:10:43.423379  543705 net.go:770] primary dev: ETH0
I0323 04:10:43.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:10:43.423405  543705 net.go:698] Add success.
I0323 04:10:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:10:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:10:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:10:53.410376  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:10:53.410394  543705 memory.go:184] no items to output this cycle
I0323 04:10:53.410425  543705 cpu.go:275] no items to output this cycle
E0323 04:11:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:11:03.409767  543705 memory.go:184] no items to output this cycle
I0323 04:11:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:11:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:11:13.409829  543705 memory.go:191] Add success.
I0323 04:11:13.409842  543705 cpu.go:282] Add success.
W0323 04:11:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:11:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:11:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:11:13.420227  543705 net.go:648] Add success.
I0323 04:11:13.422712  543705 net.go:770] primary dev: ETH0
I0323 04:11:13.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:11:13.422744  543705 net.go:698] Add success.
I0323 04:11:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:11:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:11:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 04:11:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:11:14.456598  543705 disk_worker.go:494] system disk:vda1
I0323 04:11:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:11:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:11:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:11:16.458086  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:11:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:11:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:11:23.409780  543705 memory.go:184] no items to output this cycle
I0323 04:11:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 04:11:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:11:33.409802  543705 memory.go:184] no items to output this cycle
I0323 04:11:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 04:11:38.053673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:11:38.056495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:11:38.056501  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a780 0xc00047a7c0]
E0323 04:11:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:11:43.410678  543705 memory.go:191] Add success.
I0323 04:11:43.409800  543705 cpu.go:282] Add success.
I0323 04:11:43.420399  543705 net.go:648] Add success.
I0323 04:11:43.422802  543705 net.go:770] primary dev: ETH0
I0323 04:11:43.422816  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:11:43.422829  543705 net.go:698] Add success.
I0323 04:11:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:11:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:11:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:11:53.409770  543705 memory.go:184] no items to output this cycle
I0323 04:11:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 04:12:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:12:03.409775  543705 memory.go:184] no items to output this cycle
I0323 04:12:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 04:12:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:12:13.409792  543705 memory.go:191] Add success.
I0323 04:12:13.409794  543705 cpu.go:282] Add success.
W0323 04:12:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:12:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:12:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:12:13.420074  543705 net.go:648] Add success.
I0323 04:12:13.422952  543705 net.go:770] primary dev: ETH0
I0323 04:12:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:12:13.422981  543705 net.go:698] Add success.
I0323 04:12:13.469879  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"517c0441-7976-4d23-b4e8-e256b6c12021","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:12:13.469912  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 04:12:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:12:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0323 04:12:14.455245  543705 disk_worker.go:728] disk inode is not compliant
E0323 04:12:14.456087  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:12:14.456096  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:12:14.456101  543705 custom_config.go:64] query custom config with name: gpu
I0323 04:12:14.457056  543705 disk_worker.go:494] system disk:vda1
I0323 04:12:14.457095  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:12:15.457061  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:12:15.457075  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:12:16.458020  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:12:16.458020  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:12:16.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:12:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:12:16.472530  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:12:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:12:23.409785  543705 cpu.go:275] no items to output this cycle
I0323 04:12:23.409790  543705 memory.go:184] no items to output this cycle
E0323 04:12:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:12:33.409797  543705 memory.go:184] no items to output this cycle
I0323 04:12:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 04:12:38.057671  543705 disk_info.go:125] begin check local disk info of client
I0323 04:12:38.060271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:12:38.060278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa600 0xc0001aa640]
I0323 04:12:40.073731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:12:40.073737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:12:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:12:43.410550  543705 memory.go:191] Add success.
I0323 04:12:43.409802  543705 cpu.go:282] Add success.
I0323 04:12:43.420243  543705 net.go:648] Add success.
I0323 04:12:43.422718  543705 net.go:770] primary dev: ETH0
I0323 04:12:43.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:12:43.422744  543705 net.go:698] Add success.
I0323 04:12:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:12:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:12:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:12:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:12:53.409778  543705 memory.go:184] no items to output this cycle
I0323 04:12:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 04:13:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:13:03.409781  543705 memory.go:184] no items to output this cycle
I0323 04:13:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 04:13:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:13:13.409816  543705 memory.go:191] Add success.
I0323 04:13:13.409828  543705 cpu.go:282] Add success.
W0323 04:13:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:13:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:13:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:13:13.420179  543705 net.go:648] Add success.
I0323 04:13:13.423156  543705 net.go:770] primary dev: ETH0
I0323 04:13:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:13:13.423181  543705 net.go:698] Add success.
I0323 04:13:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:13:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:13:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 04:13:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:13:14.456522  543705 disk_worker.go:494] system disk:vda1
I0323 04:13:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:13:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:13:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:13:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:13:16.472492  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:13:23.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:13:23.409997  543705 memory.go:184] no items to output this cycle
I0323 04:13:23.410023  543705 cpu.go:275] no items to output this cycle
E0323 04:13:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:13:33.409781  543705 memory.go:184] no items to output this cycle
I0323 04:13:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 04:13:38.061674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:13:38.064238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:13:38.064244  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5cc0 0xc0000c5d00]
E0323 04:13:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:13:43.410718  543705 memory.go:191] Add success.
I0323 04:13:43.409805  543705 cpu.go:282] Add success.
I0323 04:13:43.420477  543705 net.go:648] Add success.
I0323 04:13:43.423194  543705 net.go:770] primary dev: ETH0
I0323 04:13:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:13:43.423220  543705 net.go:698] Add success.
I0323 04:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:13:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:13:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:13:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:13:53.409778  543705 memory.go:184] no items to output this cycle
I0323 04:13:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 04:14:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:14:03.409772  543705 memory.go:184] no items to output this cycle
I0323 04:14:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 04:14:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:14:13.409796  543705 memory.go:191] Add success.
I0323 04:14:13.409797  543705 cpu.go:282] Add success.
W0323 04:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:14:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:14:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:14:13.420205  543705 net.go:648] Add success.
I0323 04:14:13.422989  543705 net.go:770] primary dev: ETH0
I0323 04:14:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:14:13.423015  543705 net.go:698] Add success.
I0323 04:14:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:14:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:14:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 04:14:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:14:14.456491  543705 disk_worker.go:494] system disk:vda1
I0323 04:14:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:14:15.456035  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:14:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:14:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:14:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:14:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:14:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:14:23.409802  543705 memory.go:184] no items to output this cycle
I0323 04:14:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 04:14:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:14:33.409771  543705 memory.go:184] no items to output this cycle
I0323 04:14:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 04:14:38.065675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:14:38.068243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:14:38.068249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa0c0 0xc0001aa100]
E0323 04:14:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:14:43.410697  543705 memory.go:191] Add success.
I0323 04:14:43.409814  543705 cpu.go:282] Add success.
I0323 04:14:43.420385  543705 net.go:648] Add success.
I0323 04:14:43.422991  543705 net.go:770] primary dev: ETH0
I0323 04:14:43.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:14:43.423020  543705 net.go:698] Add success.
I0323 04:14:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:14:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:14:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:14:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 04:14:53.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:14:53.409827  543705 memory.go:184] no items to output this cycle
E0323 04:15:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:15:03.409773  543705 memory.go:184] no items to output this cycle
I0323 04:15:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 04:15:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:15:13.409796  543705 memory.go:191] Add success.
I0323 04:15:13.409817  543705 cpu.go:282] Add success.
W0323 04:15:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:15:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:15:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:15:13.420158  543705 net.go:648] Add success.
I0323 04:15:13.422914  543705 net.go:770] primary dev: ETH0
I0323 04:15:13.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:15:13.422939  543705 net.go:698] Add success.
I0323 04:15:13.469191  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"65a8f6e5-2a00-4469-a1bf-259a7c92e82c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:15:13.469225  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:15:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:15:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:15:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 04:15:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:15:14.456691  543705 disk_worker.go:494] system disk:vda1
I0323 04:15:14.456733  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:15:15.456022  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:15:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:15:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:15:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:15:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:15:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:15:23.409788  543705 memory.go:184] no items to output this cycle
I0323 04:15:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 04:15:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:15:33.409784  543705 memory.go:184] no items to output this cycle
I0323 04:15:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 04:15:38.069676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:15:38.072242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:15:38.072248  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007acc0 0xc00007ad00]
I0323 04:15:40.077080  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:15:40.077086  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:15:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:15:43.410646  543705 memory.go:191] Add success.
I0323 04:15:43.409798  543705 cpu.go:282] Add success.
I0323 04:15:43.420351  543705 net.go:648] Add success.
I0323 04:15:43.423112  543705 net.go:770] primary dev: ETH0
I0323 04:15:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:15:43.423140  543705 net.go:698] Add success.
I0323 04:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:15:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:15:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:15:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:15:53.409815  543705 memory.go:184] no items to output this cycle
I0323 04:15:53.409818  543705 cpu.go:275] no items to output this cycle
E0323 04:16:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:16:03.409785  543705 memory.go:184] no items to output this cycle
I0323 04:16:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 04:16:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:16:13.409787  543705 memory.go:191] Add success.
I0323 04:16:13.409789  543705 cpu.go:282] Add success.
W0323 04:16:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:16:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:16:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:16:13.420255  543705 net.go:648] Add success.
I0323 04:16:13.422888  543705 net.go:770] primary dev: ETH0
I0323 04:16:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:16:13.422930  543705 net.go:698] Add success.
I0323 04:16:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:16:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:16:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 04:16:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:16:14.456630  543705 disk_worker.go:494] system disk:vda1
I0323 04:16:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:16:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:16:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:16:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:16:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:16:16.472550  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:16:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:16:23.409808  543705 memory.go:184] no items to output this cycle
I0323 04:16:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 04:16:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:16:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:16:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 04:16:38.073676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:16:38.076253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:16:38.076260  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b5c0 0xc00007b600]
E0323 04:16:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:16:43.410676  543705 memory.go:191] Add success.
I0323 04:16:43.409818  543705 cpu.go:282] Add success.
I0323 04:16:43.420384  543705 net.go:648] Add success.
I0323 04:16:43.423147  543705 net.go:770] primary dev: ETH0
I0323 04:16:43.423160  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:16:43.423173  543705 net.go:698] Add success.
I0323 04:16:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:16:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:16:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:16:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:16:53.409820  543705 memory.go:184] no items to output this cycle
I0323 04:16:53.409830  543705 cpu.go:275] no items to output this cycle
E0323 04:17:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:17:03.409780  543705 memory.go:184] no items to output this cycle
I0323 04:17:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 04:17:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:17:13.409787  543705 memory.go:191] Add success.
I0323 04:17:13.409792  543705 cpu.go:282] Add success.
W0323 04:17:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:17:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:17:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:17:13.420063  543705 net.go:648] Add success.
I0323 04:17:13.422798  543705 net.go:770] primary dev: ETH0
I0323 04:17:13.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:17:13.422828  543705 net.go:698] Add success.
I0323 04:17:13.453384  543705 event_worker.go:152] Polling the log file for events...
W0323 04:17:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:17:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 04:17:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:17:14.456946  543705 disk_worker.go:494] system disk:vda1
I0323 04:17:14.457000  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:17:14.458019  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:17:14.458028  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:17:14.458033  543705 custom_config.go:64] query custom config with name: gpu
E0323 04:17:15.456998  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:17:15.457016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:17:16.458089  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:17:16.458152  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:17:16.458166  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:17:16.458185  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:17:16.472556  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:17:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:17:23.409780  543705 memory.go:184] no items to output this cycle
I0323 04:17:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 04:17:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:17:33.409768  543705 memory.go:184] no items to output this cycle
I0323 04:17:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 04:17:38.077678  543705 disk_info.go:125] begin check local disk info of client
I0323 04:17:38.080226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:17:38.080232  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b600 0xc00047b640]
E0323 04:17:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:17:43.410605  543705 memory.go:191] Add success.
I0323 04:17:43.409797  543705 cpu.go:282] Add success.
I0323 04:17:43.420317  543705 net.go:648] Add success.
I0323 04:17:43.422936  543705 net.go:770] primary dev: ETH0
I0323 04:17:43.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:17:43.422962  543705 net.go:698] Add success.
I0323 04:17:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:17:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:17:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:17:53.410273  543705 cpu.go:275] no items to output this cycle
I0323 04:17:53.410277  543705 memory.go:184] no items to output this cycle
E0323 04:18:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:18:03.409784  543705 memory.go:184] no items to output this cycle
I0323 04:18:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 04:18:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:18:13.409815  543705 memory.go:191] Add success.
I0323 04:18:13.409821  543705 cpu.go:282] Add success.
W0323 04:18:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:18:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:18:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:18:13.420130  543705 net.go:648] Add success.
I0323 04:18:13.422800  543705 net.go:770] primary dev: ETH0
I0323 04:18:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:18:13.422829  543705 net.go:698] Add success.
I0323 04:18:13.468433  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af453b2f-358e-40e8-a409-6b975382f4ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:18:13.468466  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:18:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:18:14.455486  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:18:14.455500  543705 disk_worker.go:708] disk space is not compliant
W0323 04:18:14.455505  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:18:14.457560  543705 disk_worker.go:494] system disk:vda1
I0323 04:18:14.457587  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:18:15.455993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:18:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:18:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:18:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:18:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:18:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:18:23.409772  543705 memory.go:184] no items to output this cycle
I0323 04:18:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 04:18:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:18:33.409800  543705 memory.go:184] no items to output this cycle
I0323 04:18:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 04:18:38.081674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:18:38.084279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:18:38.084286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa2c0 0xc0001aa300]
I0323 04:18:40.077727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:18:40.077733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:18:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:18:43.410731  543705 memory.go:191] Add success.
I0323 04:18:43.409799  543705 cpu.go:282] Add success.
I0323 04:18:43.420407  543705 net.go:648] Add success.
I0323 04:18:43.423045  543705 net.go:770] primary dev: ETH0
I0323 04:18:43.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:18:43.423082  543705 net.go:698] Add success.
I0323 04:18:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:18:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:18:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:18:53.409788  543705 memory.go:184] no items to output this cycle
I0323 04:18:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 04:19:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:19:03.409801  543705 memory.go:184] no items to output this cycle
I0323 04:19:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 04:19:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:19:13.409778  543705 memory.go:191] Add success.
W0323 04:19:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:19:13.409810  543705 cpu.go:282] Add success.
W0323 04:19:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:19:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:19:13.420153  543705 net.go:648] Add success.
I0323 04:19:13.422818  543705 net.go:770] primary dev: ETH0
I0323 04:19:13.422833  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:19:13.422847  543705 net.go:698] Add success.
I0323 04:19:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:19:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:19:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 04:19:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:19:14.458979  543705 disk_worker.go:494] system disk:vda1
I0323 04:19:14.459008  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:19:15.455993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:19:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:19:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:19:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:19:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:19:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:19:23.409785  543705 memory.go:184] no items to output this cycle
I0323 04:19:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 04:19:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:19:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:19:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 04:19:38.085673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:19:38.088228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:19:38.088234  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262240 0xc000262280]
E0323 04:19:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:19:43.410563  543705 memory.go:191] Add success.
I0323 04:19:43.409818  543705 cpu.go:282] Add success.
I0323 04:19:43.420262  543705 net.go:648] Add success.
I0323 04:19:43.422831  543705 net.go:770] primary dev: ETH0
I0323 04:19:43.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:19:43.422858  543705 net.go:698] Add success.
I0323 04:19:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:19:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:19:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:19:53.410258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:19:53.410287  543705 cpu.go:275] no items to output this cycle
I0323 04:19:53.410289  543705 memory.go:184] no items to output this cycle
E0323 04:20:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:20:03.409775  543705 memory.go:184] no items to output this cycle
I0323 04:20:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 04:20:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:20:13.409792  543705 memory.go:191] Add success.
I0323 04:20:13.409792  543705 cpu.go:282] Add success.
W0323 04:20:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:20:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:20:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:20:13.420108  543705 net.go:648] Add success.
I0323 04:20:13.422813  543705 net.go:770] primary dev: ETH0
I0323 04:20:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:20:13.422839  543705 net.go:698] Add success.
I0323 04:20:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:20:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:20:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 04:20:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:20:14.456489  543705 disk_worker.go:494] system disk:vda1
I0323 04:20:14.456531  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:20:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:20:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:20:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:20:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:20:16.472491  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:20:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:20:23.409803  543705 memory.go:184] no items to output this cycle
I0323 04:20:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 04:20:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:20:33.409769  543705 memory.go:184] no items to output this cycle
I0323 04:20:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 04:20:38.089675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:20:38.092271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:20:38.092278  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047be40 0xc00047be80]
E0323 04:20:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:20:43.410798  543705 memory.go:191] Add success.
I0323 04:20:43.409801  543705 cpu.go:282] Add success.
I0323 04:20:43.420471  543705 net.go:648] Add success.
I0323 04:20:43.423710  543705 net.go:770] primary dev: ETH0
I0323 04:20:43.423723  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:20:43.423738  543705 net.go:698] Add success.
I0323 04:20:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:20:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:20:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:20:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:20:53.409822  543705 memory.go:184] no items to output this cycle
I0323 04:20:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 04:21:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:21:03.409779  543705 memory.go:184] no items to output this cycle
I0323 04:21:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 04:21:13.410280  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:21:13.410307  543705 memory.go:191] Add success.
I0323 04:21:13.410326  543705 cpu.go:282] Add success.
W0323 04:21:13.410334  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:21:13.410345  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:21:13.410348  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:21:13.420634  543705 net.go:648] Add success.
I0323 04:21:13.423297  543705 net.go:770] primary dev: ETH0
I0323 04:21:13.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:21:13.423323  543705 net.go:698] Add success.
I0323 04:21:13.470006  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5ce880c0-9bad-4c47-9ee1-208e331a8244","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:21:13.470040  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:21:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:21:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:21:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 04:21:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:21:14.456680  543705 disk_worker.go:494] system disk:vda1
I0323 04:21:14.456717  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:21:15.456000  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:21:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:21:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:21:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:21:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:21:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:21:23.409799  543705 memory.go:184] no items to output this cycle
I0323 04:21:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 04:21:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:21:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 04:21:33.409797  543705 memory.go:184] no items to output this cycle
I0323 04:21:38.093674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:21:38.096241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:21:38.096247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
I0323 04:21:40.081076  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:21:40.081082  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:21:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:21:43.410665  543705 memory.go:191] Add success.
I0323 04:21:43.409794  543705 cpu.go:282] Add success.
I0323 04:21:43.420366  543705 net.go:648] Add success.
I0323 04:21:43.423228  543705 net.go:770] primary dev: ETH0
I0323 04:21:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:21:43.423253  543705 net.go:698] Add success.
I0323 04:21:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:21:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:21:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:21:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:21:53.409787  543705 memory.go:184] no items to output this cycle
I0323 04:21:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 04:22:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:22:03.409801  543705 memory.go:184] no items to output this cycle
I0323 04:22:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 04:22:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:22:13.409784  543705 memory.go:191] Add success.
W0323 04:22:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:22:13.409816  543705 cpu.go:282] Add success.
W0323 04:22:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:22:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:22:13.419703  543705 net.go:648] Add success.
I0323 04:22:13.422595  543705 net.go:770] primary dev: ETH0
I0323 04:22:13.422608  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:22:13.422619  543705 net.go:698] Add success.
W0323 04:22:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:22:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 04:22:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:22:14.456815  543705 disk_worker.go:494] system disk:vda1
I0323 04:22:14.456852  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:22:14.457073  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:22:14.457081  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:22:14.457086  543705 custom_config.go:64] query custom config with name: gpu
E0323 04:22:15.456826  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:22:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:22:16.458109  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:22:16.458139  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:22:16.458179  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:22:16.458201  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:22:16.472601  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:22:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:22:23.409775  543705 memory.go:184] no items to output this cycle
I0323 04:22:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 04:22:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:22:33.409805  543705 memory.go:184] no items to output this cycle
I0323 04:22:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 04:22:38.097674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:22:38.100469  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:22:38.100476  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262c40 0xc000262c80]
E0323 04:22:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:22:43.410983  543705 memory.go:191] Add success.
I0323 04:22:43.409813  543705 cpu.go:282] Add success.
I0323 04:22:43.420698  543705 net.go:648] Add success.
I0323 04:22:43.423973  543705 net.go:770] primary dev: ETH0
I0323 04:22:43.423986  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:22:43.424000  543705 net.go:698] Add success.
I0323 04:22:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:22:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:22:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:22:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:22:53.409812  543705 memory.go:184] no items to output this cycle
I0323 04:22:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 04:23:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:23:03.409777  543705 memory.go:184] no items to output this cycle
I0323 04:23:03.409780  543705 cpu.go:275] no items to output this cycle
E0323 04:23:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:23:13.409793  543705 cpu.go:282] Add success.
I0323 04:23:13.409794  543705 memory.go:191] Add success.
W0323 04:23:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:23:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:23:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:23:13.420172  543705 net.go:648] Add success.
I0323 04:23:13.423385  543705 net.go:770] primary dev: ETH0
I0323 04:23:13.423401  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:23:13.423417  543705 net.go:698] Add success.
I0323 04:23:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:23:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:23:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 04:23:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:23:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 04:23:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:23:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:23:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:23:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:23:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:23:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:23:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:23:23.409772  543705 memory.go:184] no items to output this cycle
I0323 04:23:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 04:23:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:23:33.409785  543705 memory.go:184] no items to output this cycle
I0323 04:23:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 04:23:38.101672  543705 disk_info.go:125] begin check local disk info of client
I0323 04:23:38.104237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:23:38.104243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000297d80 0xc000297dc0]
E0323 04:23:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:23:43.410585  543705 memory.go:191] Add success.
I0323 04:23:43.409803  543705 cpu.go:282] Add success.
I0323 04:23:43.420325  543705 net.go:648] Add success.
I0323 04:23:43.422959  543705 net.go:770] primary dev: ETH0
I0323 04:23:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:23:43.422988  543705 net.go:698] Add success.
I0323 04:23:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:23:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:23:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:23:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:23:53.409780  543705 memory.go:184] no items to output this cycle
I0323 04:23:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 04:24:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:24:03.409784  543705 memory.go:184] no items to output this cycle
I0323 04:24:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 04:24:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:24:13.409785  543705 memory.go:191] Add success.
I0323 04:24:13.409804  543705 cpu.go:282] Add success.
W0323 04:24:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:24:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:24:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:24:13.420534  543705 net.go:648] Add success.
I0323 04:24:13.423266  543705 net.go:770] primary dev: ETH0
I0323 04:24:13.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:24:13.423292  543705 net.go:698] Add success.
I0323 04:24:13.516412  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f91f087d-55e7-4b53-9d92-2ed76e2ebddb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:24:13.516445  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:24:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:24:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:24:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 04:24:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:24:14.456624  543705 disk_worker.go:494] system disk:vda1
I0323 04:24:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:24:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:24:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:24:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:24:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:24:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:24:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:24:23.409779  543705 memory.go:184] no items to output this cycle
I0323 04:24:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 04:24:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:24:33.409810  543705 memory.go:184] no items to output this cycle
I0323 04:24:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 04:24:38.105675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:24:38.108305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:24:38.108311  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002630c0 0xc000263100]
I0323 04:24:40.081722  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:24:40.081728  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:24:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:24:43.410529  543705 memory.go:191] Add success.
I0323 04:24:43.409819  543705 cpu.go:282] Add success.
I0323 04:24:43.420224  543705 net.go:648] Add success.
I0323 04:24:43.422561  543705 net.go:770] primary dev: ETH0
I0323 04:24:43.422575  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:24:43.422587  543705 net.go:698] Add success.
I0323 04:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:24:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:24:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:24:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:24:53.409795  543705 memory.go:184] no items to output this cycle
I0323 04:24:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 04:25:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:25:03.409782  543705 memory.go:184] no items to output this cycle
I0323 04:25:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 04:25:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:25:13.409786  543705 memory.go:191] Add success.
I0323 04:25:13.409805  543705 cpu.go:282] Add success.
W0323 04:25:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:25:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:25:13.420186  543705 net.go:648] Add success.
I0323 04:25:13.422849  543705 net.go:770] primary dev: ETH0
I0323 04:25:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:25:13.422878  543705 net.go:698] Add success.
I0323 04:25:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:25:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:25:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 04:25:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:25:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 04:25:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:25:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:25:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:25:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:25:16.472484  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:25:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:25:23.409789  543705 memory.go:184] no items to output this cycle
I0323 04:25:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 04:25:33.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:25:33.409916  543705 memory.go:184] no items to output this cycle
I0323 04:25:33.410011  543705 cpu.go:275] no items to output this cycle
I0323 04:25:38.109676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:25:38.112254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:25:38.112261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 04:25:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:25:43.410680  543705 memory.go:191] Add success.
I0323 04:25:43.409794  543705 cpu.go:282] Add success.
I0323 04:25:43.420374  543705 net.go:648] Add success.
I0323 04:25:43.423044  543705 net.go:770] primary dev: ETH0
I0323 04:25:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:25:43.423069  543705 net.go:698] Add success.
I0323 04:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:25:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:25:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:25:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:25:53.409788  543705 memory.go:184] no items to output this cycle
I0323 04:25:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 04:26:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:26:03.409786  543705 memory.go:184] no items to output this cycle
I0323 04:26:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 04:26:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:26:13.409789  543705 memory.go:191] Add success.
I0323 04:26:13.409792  543705 cpu.go:282] Add success.
W0323 04:26:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:26:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:26:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:26:13.420052  543705 net.go:648] Add success.
I0323 04:26:13.422785  543705 net.go:770] primary dev: ETH0
I0323 04:26:13.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:26:13.422810  543705 net.go:698] Add success.
I0323 04:26:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:26:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:26:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 04:26:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:26:14.456557  543705 disk_worker.go:494] system disk:vda1
I0323 04:26:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:26:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:26:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:26:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:26:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:26:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:26:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:26:23.409777  543705 memory.go:184] no items to output this cycle
I0323 04:26:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 04:26:33.409847  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:26:33.409865  543705 memory.go:184] no items to output this cycle
I0323 04:26:33.409940  543705 cpu.go:275] no items to output this cycle
I0323 04:26:38.113676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:26:38.116516  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:26:38.116522  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab5c0 0xc0001ab600]
E0323 04:26:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:26:43.410762  543705 memory.go:191] Add success.
I0323 04:26:43.409824  543705 cpu.go:282] Add success.
I0323 04:26:43.420457  543705 net.go:648] Add success.
I0323 04:26:43.424576  543705 net.go:770] primary dev: ETH0
I0323 04:26:43.424589  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:26:43.424600  543705 net.go:698] Add success.
I0323 04:26:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:26:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:26:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:26:53.410417  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:26:53.410425  543705 cpu.go:275] no items to output this cycle
I0323 04:26:53.410435  543705 memory.go:184] no items to output this cycle
E0323 04:27:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:27:03.409790  543705 memory.go:184] no items to output this cycle
I0323 04:27:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 04:27:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:27:13.409790  543705 memory.go:191] Add success.
I0323 04:27:13.409810  543705 cpu.go:282] Add success.
W0323 04:27:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:27:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:27:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:27:13.420200  543705 net.go:648] Add success.
I0323 04:27:13.428761  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 04:27:13.428838  543705 net.go:770] primary dev: ETH0
I0323 04:27:13.428850  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:27:13.428861  543705 net.go:698] Add success.
I0323 04:27:13.453377  543705 event_worker.go:152] Polling the log file for events...
I0323 04:27:13.468473  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ff838d7-33f7-499f-a0b9-73392173a284","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:27:13.468506  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 04:27:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:27:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0323 04:27:14.455182  543705 disk_worker.go:728] disk inode is not compliant
E0323 04:27:14.456741  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:27:14.456749  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:27:14.456754  543705 custom_config.go:64] query custom config with name: gpu
I0323 04:27:14.456791  543705 disk_worker.go:494] system disk:vda1
I0323 04:27:14.456822  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:27:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:27:15.456845  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:27:16.458106  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:27:16.458134  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:27:16.458170  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:27:16.458190  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:27:16.472570  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:27:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:27:23.409782  543705 memory.go:184] no items to output this cycle
I0323 04:27:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:27:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:27:33.409791  543705 memory.go:184] no items to output this cycle
I0323 04:27:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 04:27:38.117674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:27:38.120309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:27:38.120316  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4940 0xc0000c4980]
I0323 04:27:40.085095  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:27:40.085100  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:27:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:27:43.410653  543705 memory.go:191] Add success.
I0323 04:27:43.409815  543705 cpu.go:282] Add success.
I0323 04:27:43.420364  543705 net.go:648] Add success.
I0323 04:27:43.422952  543705 net.go:770] primary dev: ETH0
I0323 04:27:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:27:43.422979  543705 net.go:698] Add success.
I0323 04:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:27:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:27:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:27:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:27:53.409817  543705 memory.go:184] no items to output this cycle
I0323 04:27:53.409830  543705 cpu.go:275] no items to output this cycle
E0323 04:28:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:28:03.409790  543705 memory.go:184] no items to output this cycle
I0323 04:28:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 04:28:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:28:13.409795  543705 memory.go:191] Add success.
I0323 04:28:13.409818  543705 cpu.go:282] Add success.
W0323 04:28:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:28:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:28:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:28:13.420161  543705 net.go:648] Add success.
I0323 04:28:13.422912  543705 net.go:770] primary dev: ETH0
I0323 04:28:13.422926  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:28:13.422937  543705 net.go:698] Add success.
I0323 04:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:28:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:28:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 04:28:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:28:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 04:28:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:28:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:28:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:28:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:28:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:28:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:28:23.409793  543705 memory.go:184] no items to output this cycle
I0323 04:28:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 04:28:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:28:33.409788  543705 memory.go:184] no items to output this cycle
I0323 04:28:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 04:28:38.121678  543705 disk_info.go:125] begin check local disk info of client
I0323 04:28:38.124257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:28:38.124263  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b500 0xc00007b540]
E0323 04:28:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:28:43.410678  543705 memory.go:191] Add success.
I0323 04:28:43.409833  543705 cpu.go:282] Add success.
I0323 04:28:43.420376  543705 net.go:648] Add success.
I0323 04:28:43.422938  543705 net.go:770] primary dev: ETH0
I0323 04:28:43.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:28:43.422964  543705 net.go:698] Add success.
I0323 04:28:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:28:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:28:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:28:53.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:28:53.409831  543705 memory.go:184] no items to output this cycle
I0323 04:28:53.409843  543705 cpu.go:275] no items to output this cycle
E0323 04:29:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:29:03.409781  543705 memory.go:184] no items to output this cycle
I0323 04:29:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 04:29:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:29:13.409821  543705 memory.go:191] Add success.
I0323 04:29:13.409832  543705 cpu.go:282] Add success.
W0323 04:29:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:29:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:29:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:29:13.420144  543705 net.go:648] Add success.
I0323 04:29:13.422882  543705 net.go:770] primary dev: ETH0
I0323 04:29:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:29:13.422909  543705 net.go:698] Add success.
I0323 04:29:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:29:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:29:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 04:29:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:29:14.456505  543705 disk_worker.go:494] system disk:vda1
I0323 04:29:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:29:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:29:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:29:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:29:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:29:16.472498  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:29:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:29:23.409788  543705 memory.go:184] no items to output this cycle
I0323 04:29:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 04:29:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:29:33.409880  543705 memory.go:184] no items to output this cycle
I0323 04:29:33.409971  543705 cpu.go:275] no items to output this cycle
I0323 04:29:38.125676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:29:38.128251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:29:38.128258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cfb80 0xc0003cfbc0]
E0323 04:29:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:29:43.410550  543705 memory.go:191] Add success.
I0323 04:29:43.409808  543705 cpu.go:282] Add success.
I0323 04:29:43.420226  543705 net.go:648] Add success.
I0323 04:29:43.422868  543705 net.go:770] primary dev: ETH0
I0323 04:29:43.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:29:43.422893  543705 net.go:698] Add success.
I0323 04:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:29:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:29:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:29:53.409778  543705 memory.go:184] no items to output this cycle
I0323 04:29:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 04:30:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:30:03.409788  543705 cpu.go:275] no items to output this cycle
I0323 04:30:03.409794  543705 memory.go:184] no items to output this cycle
E0323 04:30:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:30:13.409788  543705 memory.go:191] Add success.
I0323 04:30:13.409793  543705 cpu.go:282] Add success.
W0323 04:30:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:30:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:30:13.420111  543705 net.go:648] Add success.
I0323 04:30:13.422623  543705 net.go:770] primary dev: ETH0
I0323 04:30:13.422638  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:30:13.422650  543705 net.go:698] Add success.
I0323 04:30:13.468259  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b3bcb3e0-0a0e-497f-8069-65a81d78e354","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:30:13.468293  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:30:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:30:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:30:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 04:30:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:30:14.456608  543705 disk_worker.go:494] system disk:vda1
I0323 04:30:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:30:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:30:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:30:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:30:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:30:16.472475  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:30:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:30:23.409784  543705 memory.go:184] no items to output this cycle
I0323 04:30:23.409880  543705 cpu.go:275] no items to output this cycle
E0323 04:30:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:30:33.409778  543705 memory.go:184] no items to output this cycle
I0323 04:30:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 04:30:38.129677  543705 disk_info.go:125] begin check local disk info of client
I0323 04:30:38.132237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:30:38.132243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296240 0xc000296280]
I0323 04:30:40.087277  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:30:40.087283  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:30:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:30:43.410635  543705 memory.go:191] Add success.
I0323 04:30:43.409810  543705 cpu.go:282] Add success.
I0323 04:30:43.420323  543705 net.go:648] Add success.
I0323 04:30:43.422795  543705 net.go:770] primary dev: ETH0
I0323 04:30:43.422808  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:30:43.422821  543705 net.go:698] Add success.
I0323 04:30:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:30:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:30:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:30:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:30:53.409790  543705 memory.go:184] no items to output this cycle
I0323 04:30:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 04:31:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:31:03.409795  543705 memory.go:184] no items to output this cycle
I0323 04:31:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 04:31:13.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:31:13.409774  543705 memory.go:191] Add success.
I0323 04:31:13.409800  543705 cpu.go:282] Add success.
W0323 04:31:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:31:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:31:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:31:13.420160  543705 net.go:648] Add success.
I0323 04:31:13.422949  543705 net.go:770] primary dev: ETH0
I0323 04:31:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:31:13.422978  543705 net.go:698] Add success.
I0323 04:31:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:31:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:31:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0323 04:31:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:31:14.456581  543705 disk_worker.go:494] system disk:vda1
I0323 04:31:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:31:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:31:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:31:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:31:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:31:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:31:23.409877  543705 memory.go:184] no items to output this cycle
I0323 04:31:23.409919  543705 cpu.go:275] no items to output this cycle
E0323 04:31:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:31:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:31:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 04:31:38.133674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:31:38.136237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:31:38.136243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5480 0xc0000c54c0]
E0323 04:31:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:31:43.410718  543705 memory.go:191] Add success.
I0323 04:31:43.409826  543705 cpu.go:282] Add success.
I0323 04:31:43.420487  543705 net.go:648] Add success.
I0323 04:31:43.423349  543705 net.go:770] primary dev: ETH0
I0323 04:31:43.423364  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:31:43.423378  543705 net.go:698] Add success.
I0323 04:31:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:31:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:31:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:31:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:31:53.409790  543705 memory.go:184] no items to output this cycle
I0323 04:31:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 04:32:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:32:03.409817  543705 memory.go:184] no items to output this cycle
I0323 04:32:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 04:32:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:32:13.409775  543705 memory.go:191] Add success.
W0323 04:32:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:32:13.409801  543705 cpu.go:282] Add success.
W0323 04:32:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:32:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:32:13.420112  543705 net.go:648] Add success.
I0323 04:32:13.422591  543705 net.go:770] primary dev: ETH0
I0323 04:32:13.422605  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:32:13.422617  543705 net.go:698] Add success.
W0323 04:32:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:32:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 04:32:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:32:14.456791  543705 disk_worker.go:494] system disk:vda1
I0323 04:32:14.456830  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:32:14.457078  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:32:14.457086  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:32:14.457092  543705 custom_config.go:64] query custom config with name: gpu
E0323 04:32:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:32:15.456794  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:32:16.458136  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:32:16.458156  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:32:16.458208  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:32:16.458234  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:32:16.472684  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:32:23.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:32:23.409869  543705 memory.go:184] no items to output this cycle
I0323 04:32:23.409941  543705 cpu.go:275] no items to output this cycle
E0323 04:32:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:32:33.409803  543705 memory.go:184] no items to output this cycle
I0323 04:32:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 04:32:38.137674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:32:38.140299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:32:38.140305  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4680 0xc0000c46c0]
E0323 04:32:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:32:43.410657  543705 memory.go:191] Add success.
I0323 04:32:43.409799  543705 cpu.go:282] Add success.
I0323 04:32:43.420469  543705 net.go:648] Add success.
I0323 04:32:43.423325  543705 net.go:770] primary dev: ETH0
I0323 04:32:43.423344  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:32:43.423360  543705 net.go:698] Add success.
I0323 04:32:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:32:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:32:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:32:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:32:53.409780  543705 memory.go:184] no items to output this cycle
I0323 04:32:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 04:33:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:33:03.409779  543705 memory.go:184] no items to output this cycle
I0323 04:33:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 04:33:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:33:13.409784  543705 memory.go:191] Add success.
I0323 04:33:13.409803  543705 cpu.go:282] Add success.
W0323 04:33:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:33:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:33:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:33:13.420110  543705 net.go:648] Add success.
I0323 04:33:13.422948  543705 net.go:770] primary dev: ETH0
I0323 04:33:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:33:13.422988  543705 net.go:698] Add success.
I0323 04:33:13.469490  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9cc57462-f870-45f5-a8cb-15fef3baf816","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:33:13.469532  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:33:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:33:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:33:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 04:33:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:33:14.456511  543705 disk_worker.go:494] system disk:vda1
I0323 04:33:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:33:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:33:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:33:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:33:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:33:16.472528  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:33:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:33:23.409884  543705 memory.go:184] no items to output this cycle
I0323 04:33:23.409909  543705 cpu.go:275] no items to output this cycle
E0323 04:33:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:33:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 04:33:33.409796  543705 memory.go:184] no items to output this cycle
I0323 04:33:38.141674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:33:38.144281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:33:38.144286  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5000 0xc0000c5040]
I0323 04:33:40.089725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:33:40.089731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:33:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:33:43.410711  543705 memory.go:191] Add success.
I0323 04:33:43.409784  543705 cpu.go:282] Add success.
I0323 04:33:43.420408  543705 net.go:648] Add success.
I0323 04:33:43.422915  543705 net.go:770] primary dev: ETH0
I0323 04:33:43.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:33:43.422942  543705 net.go:698] Add success.
I0323 04:33:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:33:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:33:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:33:53.410373  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:33:53.410389  543705 memory.go:184] no items to output this cycle
I0323 04:33:53.410395  543705 cpu.go:275] no items to output this cycle
E0323 04:34:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:34:03.409783  543705 memory.go:184] no items to output this cycle
I0323 04:34:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 04:34:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:34:13.409800  543705 memory.go:191] Add success.
I0323 04:34:13.409801  543705 cpu.go:282] Add success.
W0323 04:34:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:34:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:34:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:34:13.420144  543705 net.go:648] Add success.
I0323 04:34:13.423045  543705 net.go:770] primary dev: ETH0
I0323 04:34:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:34:13.423069  543705 net.go:698] Add success.
I0323 04:34:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:34:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:34:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 04:34:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:34:14.456568  543705 disk_worker.go:494] system disk:vda1
I0323 04:34:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:34:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:34:16.458042  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:34:16.458120  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:34:16.458157  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:34:16.472670  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:34:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:34:23.409770  543705 memory.go:184] no items to output this cycle
I0323 04:34:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 04:34:33.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:34:33.409920  543705 memory.go:184] no items to output this cycle
I0323 04:34:33.409990  543705 cpu.go:275] no items to output this cycle
I0323 04:34:38.145672  543705 disk_info.go:125] begin check local disk info of client
I0323 04:34:38.148503  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:34:38.148509  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1940 0xc0004a1980]
E0323 04:34:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:34:43.410678  543705 memory.go:191] Add success.
I0323 04:34:43.409829  543705 cpu.go:282] Add success.
I0323 04:34:43.420383  543705 net.go:648] Add success.
I0323 04:34:43.422913  543705 net.go:770] primary dev: ETH0
I0323 04:34:43.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:34:43.422939  543705 net.go:698] Add success.
I0323 04:34:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:34:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:34:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:34:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:34:53.410284  543705 memory.go:184] no items to output this cycle
I0323 04:34:53.410327  543705 cpu.go:275] no items to output this cycle
E0323 04:35:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:35:03.409803  543705 memory.go:184] no items to output this cycle
I0323 04:35:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:35:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:35:13.409789  543705 cpu.go:282] Add success.
I0323 04:35:13.409793  543705 memory.go:191] Add success.
W0323 04:35:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:35:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:35:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:35:13.420318  543705 net.go:648] Add success.
I0323 04:35:13.423159  543705 net.go:770] primary dev: ETH0
I0323 04:35:13.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:35:13.423184  543705 net.go:698] Add success.
I0323 04:35:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:35:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:35:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 04:35:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:35:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 04:35:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:35:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:35:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:35:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:35:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:35:16.472976  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:35:23.410353  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:35:23.410371  543705 cpu.go:275] no items to output this cycle
I0323 04:35:23.410373  543705 memory.go:184] no items to output this cycle
E0323 04:35:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:35:33.409781  543705 memory.go:184] no items to output this cycle
I0323 04:35:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 04:35:38.149675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:35:38.152247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:35:38.152254  543705 disk_info.go:196] parse disk info done, disk is : [0xc000263000 0xc000263040]
E0323 04:35:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:35:43.410812  543705 memory.go:191] Add success.
I0323 04:35:43.409792  543705 cpu.go:282] Add success.
I0323 04:35:43.420524  543705 net.go:648] Add success.
I0323 04:35:43.423431  543705 net.go:770] primary dev: ETH0
I0323 04:35:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:35:43.423460  543705 net.go:698] Add success.
I0323 04:35:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:35:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:35:53.409786  543705 memory.go:184] no items to output this cycle
I0323 04:35:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 04:36:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:36:03.409768  543705 memory.go:184] no items to output this cycle
I0323 04:36:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 04:36:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:36:13.409813  543705 memory.go:191] Add success.
I0323 04:36:13.409819  543705 cpu.go:282] Add success.
W0323 04:36:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:36:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:36:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:36:13.420139  543705 net.go:648] Add success.
I0323 04:36:13.423153  543705 net.go:770] primary dev: ETH0
I0323 04:36:13.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:36:13.423178  543705 net.go:698] Add success.
I0323 04:36:13.465035  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"30738e61-d7a9-4b7b-b6a4-453def427e2c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:36:13.465072  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:36:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:36:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:36:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0323 04:36:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:36:14.456676  543705 disk_worker.go:494] system disk:vda1
I0323 04:36:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:36:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:36:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:36:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:36:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:36:16.472538  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:36:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:36:23.409891  543705 memory.go:184] no items to output this cycle
I0323 04:36:23.409947  543705 cpu.go:275] no items to output this cycle
E0323 04:36:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:36:33.409777  543705 memory.go:184] no items to output this cycle
I0323 04:36:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 04:36:38.153674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:36:38.156267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:36:38.156273  543705 disk_info.go:196] parse disk info done, disk is : [0xc000278f40 0xc000278f80]
I0323 04:36:40.093166  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:36:40.093173  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:36:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:36:43.410648  543705 memory.go:191] Add success.
I0323 04:36:43.409831  543705 cpu.go:282] Add success.
I0323 04:36:43.420378  543705 net.go:648] Add success.
I0323 04:36:43.423114  543705 net.go:770] primary dev: ETH0
I0323 04:36:43.423138  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:36:43.423153  543705 net.go:698] Add success.
I0323 04:36:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:36:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:36:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:36:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:36:53.409808  543705 memory.go:184] no items to output this cycle
I0323 04:36:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:37:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:37:03.409809  543705 memory.go:184] no items to output this cycle
I0323 04:37:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 04:37:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:37:13.409783  543705 memory.go:191] Add success.
I0323 04:37:13.409807  543705 cpu.go:282] Add success.
W0323 04:37:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:37:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:37:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:37:13.420131  543705 net.go:648] Add success.
I0323 04:37:13.422840  543705 net.go:770] primary dev: ETH0
I0323 04:37:13.422853  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:37:13.422865  543705 net.go:698] Add success.
I0323 04:37:13.453413  543705 event_worker.go:152] Polling the log file for events...
W0323 04:37:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:37:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 04:37:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0323 04:37:14.456904  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:37:14.456913  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:37:14.456920  543705 custom_config.go:64] query custom config with name: gpu
I0323 04:37:14.456971  543705 disk_worker.go:494] system disk:vda1
I0323 04:37:14.457014  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:37:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:37:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:37:16.458018  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:37:16.458038  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:37:16.458079  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:37:16.458106  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:37:16.472604  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:37:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:37:23.409797  543705 memory.go:184] no items to output this cycle
I0323 04:37:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 04:37:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:37:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:37:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 04:37:38.157694  543705 disk_info.go:125] begin check local disk info of client
I0323 04:37:38.160073  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:37:38.160082  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3f40 0xc00007a000]
E0323 04:37:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:37:43.410815  543705 memory.go:191] Add success.
I0323 04:37:43.409806  543705 cpu.go:282] Add success.
I0323 04:37:43.420509  543705 net.go:648] Add success.
I0323 04:37:43.423454  543705 net.go:770] primary dev: ETH0
I0323 04:37:43.423474  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:37:43.423490  543705 net.go:698] Add success.
I0323 04:37:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:37:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:37:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:37:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:37:53.409783  543705 memory.go:184] no items to output this cycle
I0323 04:37:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 04:38:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:38:03.409770  543705 memory.go:184] no items to output this cycle
I0323 04:38:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 04:38:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:38:13.409817  543705 memory.go:191] Add success.
I0323 04:38:13.409826  543705 cpu.go:282] Add success.
W0323 04:38:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:38:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:38:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:38:13.420116  543705 net.go:648] Add success.
I0323 04:38:13.422993  543705 net.go:770] primary dev: ETH0
I0323 04:38:13.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:38:13.423023  543705 net.go:698] Add success.
I0323 04:38:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:38:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:38:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 04:38:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:38:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 04:38:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:38:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:38:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:38:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:38:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:38:16.472494  543705 disk_local_worker.go:436] Get disk info: []
I0323 04:38:23.409915  543705 cpu.go:275] no items to output this cycle
E0323 04:38:23.409994  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:38:23.410016  543705 memory.go:184] no items to output this cycle
E0323 04:38:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:38:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:38:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 04:38:38.161675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:38:38.164234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:38:38.164242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab740 0xc0001ab780]
E0323 04:38:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:38:43.410649  543705 memory.go:191] Add success.
I0323 04:38:43.409832  543705 cpu.go:282] Add success.
I0323 04:38:43.420365  543705 net.go:648] Add success.
I0323 04:38:43.423058  543705 net.go:770] primary dev: ETH0
I0323 04:38:43.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:38:43.423084  543705 net.go:698] Add success.
I0323 04:38:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:38:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:38:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:38:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:38:53.409802  543705 memory.go:184] no items to output this cycle
I0323 04:38:53.409836  543705 cpu.go:275] no items to output this cycle
E0323 04:39:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:39:03.409782  543705 memory.go:184] no items to output this cycle
I0323 04:39:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 04:39:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:39:13.409819  543705 memory.go:191] Add success.
I0323 04:39:13.409819  543705 cpu.go:282] Add success.
W0323 04:39:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:39:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:39:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:39:13.420121  543705 net.go:648] Add success.
I0323 04:39:13.422774  543705 net.go:770] primary dev: ETH0
I0323 04:39:13.422789  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:39:13.422802  543705 net.go:698] Add success.
I0323 04:39:13.517683  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2003d56f-a8fa-4e89-93bb-edde5b1d7553","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:39:13.517716  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:39:14.455106  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:39:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:39:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 04:39:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:39:14.457500  543705 disk_worker.go:494] system disk:vda1
I0323 04:39:14.457529  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:39:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:39:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:39:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:39:16.472397  543705 disk_local_worker.go:436] Get disk info: []
I0323 04:39:23.409919  543705 cpu.go:275] no items to output this cycle
E0323 04:39:23.409920  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:39:23.409942  543705 memory.go:184] no items to output this cycle
E0323 04:39:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:39:33.409776  543705 memory.go:184] no items to output this cycle
I0323 04:39:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 04:39:38.165678  543705 disk_info.go:125] begin check local disk info of client
I0323 04:39:38.168200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:39:38.168208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034e680 0xc00034e6c0]
I0323 04:39:40.093731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:39:40.093737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:39:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:39:43.410648  543705 memory.go:191] Add success.
I0323 04:39:43.409789  543705 cpu.go:282] Add success.
I0323 04:39:43.420364  543705 net.go:648] Add success.
I0323 04:39:43.423243  543705 net.go:770] primary dev: ETH0
I0323 04:39:43.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:39:43.423280  543705 net.go:698] Add success.
I0323 04:39:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:39:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:39:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:39:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:39:53.409785  543705 memory.go:184] no items to output this cycle
I0323 04:39:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 04:40:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:40:03.409774  543705 memory.go:184] no items to output this cycle
I0323 04:40:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:40:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:40:13.409812  543705 memory.go:191] Add success.
I0323 04:40:13.409822  543705 cpu.go:282] Add success.
W0323 04:40:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:40:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:40:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:40:13.420229  543705 net.go:648] Add success.
I0323 04:40:13.423068  543705 net.go:770] primary dev: ETH0
I0323 04:40:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:40:13.423093  543705 net.go:698] Add success.
I0323 04:40:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:40:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:40:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 04:40:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:40:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 04:40:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:40:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:40:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:40:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:40:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:40:23.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:40:23.409890  543705 memory.go:184] no items to output this cycle
I0323 04:40:23.409915  543705 cpu.go:275] no items to output this cycle
E0323 04:40:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:40:33.409780  543705 memory.go:184] no items to output this cycle
I0323 04:40:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 04:40:38.169678  543705 disk_info.go:125] begin check local disk info of client
I0323 04:40:38.172205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:40:38.172211  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5140 0xc0000c5180]
E0323 04:40:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:40:43.410664  543705 memory.go:191] Add success.
I0323 04:40:43.409819  543705 cpu.go:282] Add success.
I0323 04:40:43.420457  543705 net.go:648] Add success.
I0323 04:40:43.423181  543705 net.go:770] primary dev: ETH0
I0323 04:40:43.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:40:43.423215  543705 net.go:698] Add success.
I0323 04:40:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:40:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:40:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:40:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:40:53.409792  543705 memory.go:184] no items to output this cycle
I0323 04:40:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 04:41:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:41:03.409802  543705 memory.go:184] no items to output this cycle
I0323 04:41:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 04:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:41:13.409792  543705 memory.go:191] Add success.
I0323 04:41:13.409792  543705 cpu.go:282] Add success.
W0323 04:41:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:41:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:41:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:41:13.420162  543705 net.go:648] Add success.
I0323 04:41:13.422530  543705 net.go:770] primary dev: ETH0
I0323 04:41:13.422546  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:41:13.422560  543705 net.go:698] Add success.
I0323 04:41:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:41:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:41:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 04:41:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:41:14.456812  543705 disk_worker.go:494] system disk:vda1
I0323 04:41:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:41:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:41:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:41:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:41:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:41:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:41:23.409865  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:41:23.409885  543705 memory.go:184] no items to output this cycle
I0323 04:41:23.409962  543705 cpu.go:275] no items to output this cycle
E0323 04:41:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:41:33.409807  543705 memory.go:184] no items to output this cycle
I0323 04:41:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 04:41:38.173676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:41:38.176201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:41:38.176207  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5b40 0xc0000c5b80]
E0323 04:41:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:41:43.410699  543705 memory.go:191] Add success.
I0323 04:41:43.409800  543705 cpu.go:282] Add success.
I0323 04:41:43.420541  543705 net.go:648] Add success.
I0323 04:41:43.423091  543705 net.go:770] primary dev: ETH0
I0323 04:41:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:41:43.423117  543705 net.go:698] Add success.
I0323 04:41:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:41:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:41:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:41:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:41:53.409773  543705 memory.go:184] no items to output this cycle
I0323 04:41:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 04:42:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:42:03.409779  543705 cpu.go:275] no items to output this cycle
I0323 04:42:03.409781  543705 memory.go:184] no items to output this cycle
E0323 04:42:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:42:13.409809  543705 memory.go:191] Add success.
I0323 04:42:13.409822  543705 cpu.go:282] Add success.
W0323 04:42:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:42:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:42:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:42:13.420356  543705 net.go:648] Add success.
I0323 04:42:13.423199  543705 net.go:770] primary dev: ETH0
I0323 04:42:13.423214  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:42:13.423229  543705 net.go:698] Add success.
I0323 04:42:13.468576  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c141f1f1-9bcf-48a5-b526-6a973962ac78","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:42:13.468610  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 04:42:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:42:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 04:42:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 04:42:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:42:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:42:14.455920  543705 custom_config.go:64] query custom config with name: gpu
I0323 04:42:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 04:42:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:42:15.456844  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:42:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:42:16.457945  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:42:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:42:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:42:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:42:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:42:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:42:23.409809  543705 memory.go:184] no items to output this cycle
I0323 04:42:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 04:42:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:42:33.409766  543705 memory.go:184] no items to output this cycle
I0323 04:42:33.409839  543705 cpu.go:275] no items to output this cycle
I0323 04:42:38.177679  543705 disk_info.go:125] begin check local disk info of client
I0323 04:42:38.180205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:42:38.180212  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a8c0 0xc00047a900]
I0323 04:42:40.097174  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:42:40.097180  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:42:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:42:43.410647  543705 memory.go:191] Add success.
I0323 04:42:43.409821  543705 cpu.go:282] Add success.
I0323 04:42:43.420336  543705 net.go:648] Add success.
I0323 04:42:43.422933  543705 net.go:770] primary dev: ETH0
I0323 04:42:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:42:43.422959  543705 net.go:698] Add success.
I0323 04:42:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:42:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:42:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:42:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:42:53.409804  543705 memory.go:184] no items to output this cycle
I0323 04:42:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 04:43:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:43:03.409775  543705 memory.go:184] no items to output this cycle
I0323 04:43:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 04:43:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:43:13.409787  543705 memory.go:191] Add success.
W0323 04:43:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 04:43:13.409822  543705 cpu.go:282] Add success.
W0323 04:43:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:43:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:43:13.419857  543705 net.go:648] Add success.
I0323 04:43:13.422587  543705 net.go:770] primary dev: ETH0
I0323 04:43:13.422602  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:43:13.422614  543705 net.go:698] Add success.
I0323 04:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:43:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:43:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 04:43:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:43:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 04:43:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:43:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:43:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:43:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:43:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:43:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:43:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:43:23.409784  543705 memory.go:184] no items to output this cycle
I0323 04:43:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 04:43:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:43:33.409808  543705 memory.go:184] no items to output this cycle
I0323 04:43:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 04:43:38.181675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:43:38.184200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:43:38.184206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003959c0 0xc000395a00]
E0323 04:43:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:43:43.410679  543705 memory.go:191] Add success.
I0323 04:43:43.409817  543705 cpu.go:282] Add success.
I0323 04:43:43.420376  543705 net.go:648] Add success.
I0323 04:43:43.422811  543705 net.go:770] primary dev: ETH0
I0323 04:43:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:43:43.422839  543705 net.go:698] Add success.
I0323 04:43:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:43:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:43:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:43:53.409780  543705 memory.go:184] no items to output this cycle
I0323 04:43:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 04:44:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:44:03.409773  543705 memory.go:184] no items to output this cycle
I0323 04:44:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 04:44:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:44:13.409780  543705 memory.go:191] Add success.
I0323 04:44:13.409801  543705 cpu.go:282] Add success.
W0323 04:44:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:44:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:44:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:44:13.420156  543705 net.go:648] Add success.
I0323 04:44:13.423222  543705 net.go:770] primary dev: ETH0
I0323 04:44:13.423237  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:44:13.423251  543705 net.go:698] Add success.
I0323 04:44:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:44:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:44:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0323 04:44:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:44:14.456512  543705 disk_worker.go:494] system disk:vda1
I0323 04:44:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:44:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:44:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:44:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:44:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:44:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:44:23.409787  543705 memory.go:184] no items to output this cycle
I0323 04:44:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 04:44:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:44:33.409787  543705 memory.go:184] no items to output this cycle
I0323 04:44:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 04:44:38.185675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:44:38.188295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:44:38.188302  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005656c0 0xc000565700]
E0323 04:44:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:44:43.410555  543705 memory.go:191] Add success.
I0323 04:44:43.409790  543705 cpu.go:282] Add success.
I0323 04:44:43.420331  543705 net.go:648] Add success.
I0323 04:44:43.422758  543705 net.go:770] primary dev: ETH0
I0323 04:44:43.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:44:43.422788  543705 net.go:698] Add success.
I0323 04:44:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:44:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:44:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:44:53.410317  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:44:53.410333  543705 memory.go:184] no items to output this cycle
I0323 04:44:53.410346  543705 cpu.go:275] no items to output this cycle
E0323 04:45:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:45:03.409802  543705 memory.go:184] no items to output this cycle
I0323 04:45:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 04:45:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:45:13.409787  543705 cpu.go:282] Add success.
I0323 04:45:13.409798  543705 memory.go:191] Add success.
W0323 04:45:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:45:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:45:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:45:13.420059  543705 net.go:648] Add success.
I0323 04:45:13.422634  543705 net.go:770] primary dev: ETH0
I0323 04:45:13.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:45:13.422657  543705 net.go:698] Add success.
I0323 04:45:13.468654  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c8179246-000d-41ff-802a-d57604c9b1d1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:45:13.468687  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:45:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:45:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:45:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 04:45:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:45:14.456530  543705 disk_worker.go:494] system disk:vda1
I0323 04:45:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:45:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:45:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:45:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:45:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:45:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:45:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:45:23.409780  543705 memory.go:184] no items to output this cycle
I0323 04:45:23.409891  543705 cpu.go:275] no items to output this cycle
E0323 04:45:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:45:33.409789  543705 memory.go:184] no items to output this cycle
I0323 04:45:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 04:45:38.189677  543705 disk_info.go:125] begin check local disk info of client
I0323 04:45:38.192220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:45:38.192226  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314c00 0xc000314c40]
I0323 04:45:40.097725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:45:40.097730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:45:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:45:43.410636  543705 memory.go:191] Add success.
I0323 04:45:43.409798  543705 cpu.go:282] Add success.
I0323 04:45:43.420370  543705 net.go:648] Add success.
I0323 04:45:43.423050  543705 net.go:770] primary dev: ETH0
I0323 04:45:43.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:45:43.423079  543705 net.go:698] Add success.
I0323 04:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:45:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:45:53.409805  543705 memory.go:184] no items to output this cycle
I0323 04:45:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 04:46:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:46:03.409787  543705 memory.go:184] no items to output this cycle
I0323 04:46:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:46:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:46:13.409785  543705 memory.go:191] Add success.
I0323 04:46:13.409787  543705 cpu.go:282] Add success.
W0323 04:46:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:46:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:46:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:46:13.420223  543705 net.go:648] Add success.
I0323 04:46:13.422941  543705 net.go:770] primary dev: ETH0
I0323 04:46:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:46:13.422971  543705 net.go:698] Add success.
I0323 04:46:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:46:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:46:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0323 04:46:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:46:14.456499  543705 disk_worker.go:494] system disk:vda1
I0323 04:46:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:46:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:46:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:46:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:46:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:46:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:46:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:46:23.409895  543705 cpu.go:275] no items to output this cycle
I0323 04:46:23.409899  543705 memory.go:184] no items to output this cycle
E0323 04:46:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:46:33.409789  543705 memory.go:184] no items to output this cycle
I0323 04:46:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 04:46:38.193674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:46:38.196290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:46:38.196296  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac00 0xc00007ac40]
E0323 04:46:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:46:43.410623  543705 memory.go:191] Add success.
I0323 04:46:43.409814  543705 cpu.go:282] Add success.
I0323 04:46:43.420338  543705 net.go:648] Add success.
I0323 04:46:43.422932  543705 net.go:770] primary dev: ETH0
I0323 04:46:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:46:43.422960  543705 net.go:698] Add success.
I0323 04:46:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:46:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:46:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:46:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:46:53.409764  543705 memory.go:184] no items to output this cycle
I0323 04:46:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 04:47:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:47:03.409780  543705 memory.go:184] no items to output this cycle
I0323 04:47:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 04:47:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:47:13.409810  543705 memory.go:191] Add success.
I0323 04:47:13.409813  543705 cpu.go:282] Add success.
W0323 04:47:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:47:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:47:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:47:13.420178  543705 net.go:648] Add success.
I0323 04:47:13.422716  543705 net.go:770] primary dev: ETH0
I0323 04:47:13.422729  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:47:13.422741  543705 net.go:698] Add success.
I0323 04:47:13.453425  543705 event_worker.go:152] Polling the log file for events...
W0323 04:47:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 04:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:47:14.456808  543705 disk_worker.go:494] system disk:vda1
I0323 04:47:14.456850  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:47:14.457132  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:47:14.457141  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:47:14.457146  543705 custom_config.go:64] query custom config with name: gpu
E0323 04:47:15.456769  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:47:15.456777  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:47:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:47:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:47:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:47:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:47:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:47:23.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:47:23.409917  543705 memory.go:184] no items to output this cycle
I0323 04:47:23.409940  543705 cpu.go:275] no items to output this cycle
E0323 04:47:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:47:33.409771  543705 memory.go:184] no items to output this cycle
I0323 04:47:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 04:47:38.197673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:47:38.200215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:47:38.200222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ac40 0xc00047ac80]
E0323 04:47:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:47:43.410724  543705 memory.go:191] Add success.
I0323 04:47:43.409803  543705 cpu.go:282] Add success.
I0323 04:47:43.420428  543705 net.go:648] Add success.
I0323 04:47:43.423209  543705 net.go:770] primary dev: ETH0
I0323 04:47:43.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:47:43.423235  543705 net.go:698] Add success.
I0323 04:47:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:47:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:47:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:47:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:47:53.409785  543705 memory.go:184] no items to output this cycle
I0323 04:47:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 04:48:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:48:03.409773  543705 memory.go:184] no items to output this cycle
I0323 04:48:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 04:48:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:48:13.409798  543705 memory.go:191] Add success.
I0323 04:48:13.409816  543705 cpu.go:282] Add success.
W0323 04:48:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:48:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:48:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:48:13.420122  543705 net.go:648] Add success.
I0323 04:48:13.422688  543705 net.go:770] primary dev: ETH0
I0323 04:48:13.422702  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:48:13.422717  543705 net.go:698] Add success.
I0323 04:48:13.464452  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"be1a94b7-d9bc-4426-bc87-0a8093374b2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:48:13.464488  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:48:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:48:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:48:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0323 04:48:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:48:14.456613  543705 disk_worker.go:494] system disk:vda1
I0323 04:48:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:48:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:48:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:48:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:48:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:48:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:48:23.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:48:23.409894  543705 memory.go:184] no items to output this cycle
I0323 04:48:23.409970  543705 cpu.go:275] no items to output this cycle
E0323 04:48:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:48:33.409771  543705 memory.go:184] no items to output this cycle
I0323 04:48:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 04:48:38.201676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:48:38.204247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:48:38.204253  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007be00 0xc00007be40]
I0323 04:48:40.101189  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:48:40.101195  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:48:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:48:43.410657  543705 memory.go:191] Add success.
I0323 04:48:43.409795  543705 cpu.go:282] Add success.
I0323 04:48:43.420372  543705 net.go:648] Add success.
I0323 04:48:43.422999  543705 net.go:770] primary dev: ETH0
I0323 04:48:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:48:43.423025  543705 net.go:698] Add success.
I0323 04:48:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:48:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:48:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:48:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:48:53.409805  543705 memory.go:184] no items to output this cycle
I0323 04:48:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:49:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:49:03.409788  543705 memory.go:184] no items to output this cycle
I0323 04:49:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 04:49:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:49:13.409806  543705 memory.go:191] Add success.
I0323 04:49:13.409810  543705 cpu.go:282] Add success.
W0323 04:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:49:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:49:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:49:13.420483  543705 net.go:648] Add success.
I0323 04:49:13.423100  543705 net.go:770] primary dev: ETH0
I0323 04:49:13.423112  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:49:13.423124  543705 net.go:698] Add success.
I0323 04:49:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:49:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:49:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0323 04:49:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:49:14.456491  543705 disk_worker.go:494] system disk:vda1
I0323 04:49:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:49:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:49:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:49:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:49:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:49:16.472508  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:49:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:49:23.409891  543705 cpu.go:275] no items to output this cycle
I0323 04:49:23.409893  543705 memory.go:184] no items to output this cycle
E0323 04:49:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:49:33.409789  543705 memory.go:184] no items to output this cycle
I0323 04:49:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 04:49:38.205683  543705 disk_info.go:125] begin check local disk info of client
I0323 04:49:38.208220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:49:38.208226  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0323 04:49:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:49:43.410568  543705 memory.go:191] Add success.
I0323 04:49:43.409807  543705 cpu.go:282] Add success.
I0323 04:49:43.420284  543705 net.go:648] Add success.
I0323 04:49:43.422586  543705 net.go:770] primary dev: ETH0
I0323 04:49:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:49:43.422614  543705 net.go:698] Add success.
I0323 04:49:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:49:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:49:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:49:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:49:53.409811  543705 memory.go:184] no items to output this cycle
I0323 04:49:53.409829  543705 cpu.go:275] no items to output this cycle
E0323 04:50:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:50:03.409789  543705 memory.go:184] no items to output this cycle
I0323 04:50:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 04:50:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:50:13.409819  543705 memory.go:191] Add success.
I0323 04:50:13.409831  543705 cpu.go:282] Add success.
W0323 04:50:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:50:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:50:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:50:13.420177  543705 net.go:648] Add success.
I0323 04:50:13.422869  543705 net.go:770] primary dev: ETH0
I0323 04:50:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:50:13.422894  543705 net.go:698] Add success.
I0323 04:50:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:50:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:50:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 04:50:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:50:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 04:50:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:50:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:50:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:50:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:50:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:50:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:50:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:50:23.409789  543705 memory.go:184] no items to output this cycle
I0323 04:50:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 04:50:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:50:33.409930  543705 memory.go:184] no items to output this cycle
I0323 04:50:33.409998  543705 cpu.go:275] no items to output this cycle
I0323 04:50:38.209687  543705 disk_info.go:125] begin check local disk info of client
I0323 04:50:38.212345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:50:38.212352  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac240 0xc0002ac280]
E0323 04:50:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:50:43.410658  543705 memory.go:191] Add success.
I0323 04:50:43.409836  543705 cpu.go:282] Add success.
I0323 04:50:43.420382  543705 net.go:648] Add success.
I0323 04:50:43.423100  543705 net.go:770] primary dev: ETH0
I0323 04:50:43.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:50:43.423126  543705 net.go:698] Add success.
I0323 04:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:50:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:50:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:50:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:50:53.409803  543705 memory.go:184] no items to output this cycle
I0323 04:50:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 04:51:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:51:03.409793  543705 memory.go:184] no items to output this cycle
I0323 04:51:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 04:51:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:51:13.409824  543705 memory.go:191] Add success.
I0323 04:51:13.409837  543705 cpu.go:282] Add success.
W0323 04:51:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:51:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:51:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:51:13.420179  543705 net.go:648] Add success.
I0323 04:51:13.422787  543705 net.go:770] primary dev: ETH0
I0323 04:51:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:51:13.422812  543705 net.go:698] Add success.
I0323 04:51:13.537409  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"90a4c2f9-d81d-42fe-b364-0fc3910f0c96","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:51:13.537443  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:51:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:51:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:51:14.455147  543705 disk_worker.go:708] disk space is not compliant
W0323 04:51:14.455149  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:51:14.456492  543705 disk_worker.go:494] system disk:vda1
I0323 04:51:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:51:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:51:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:51:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:51:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:51:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:51:23.409790  543705 cpu.go:275] no items to output this cycle
I0323 04:51:23.409802  543705 memory.go:184] no items to output this cycle
E0323 04:51:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:51:33.409802  543705 memory.go:184] no items to output this cycle
I0323 04:51:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 04:51:38.213675  543705 disk_info.go:125] begin check local disk info of client
I0323 04:51:38.216321  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:51:38.216327  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312000 0xc000312040]
I0323 04:51:40.101740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:51:40.101745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:51:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:51:43.410654  543705 memory.go:191] Add success.
I0323 04:51:43.409825  543705 cpu.go:282] Add success.
I0323 04:51:43.420432  543705 net.go:648] Add success.
I0323 04:51:43.422902  543705 net.go:770] primary dev: ETH0
I0323 04:51:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:51:43.422928  543705 net.go:698] Add success.
I0323 04:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:51:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:51:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:51:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:51:53.409803  543705 memory.go:184] no items to output this cycle
I0323 04:51:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 04:52:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:52:03.409772  543705 memory.go:184] no items to output this cycle
I0323 04:52:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 04:52:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:52:13.409791  543705 memory.go:191] Add success.
I0323 04:52:13.409793  543705 cpu.go:282] Add success.
W0323 04:52:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:52:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:52:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:52:13.420125  543705 net.go:648] Add success.
I0323 04:52:13.423115  543705 net.go:770] primary dev: ETH0
I0323 04:52:13.423130  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:52:13.423143  543705 net.go:698] Add success.
W0323 04:52:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:52:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 04:52:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:52:14.456765  543705 disk_worker.go:494] system disk:vda1
I0323 04:52:14.456805  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:52:14.457170  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:52:14.457178  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:52:14.457183  543705 custom_config.go:64] query custom config with name: gpu
E0323 04:52:15.456866  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:52:15.456875  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:52:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:52:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:52:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:52:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:52:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:52:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:52:23.409808  543705 memory.go:184] no items to output this cycle
I0323 04:52:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:52:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:52:33.409778  543705 memory.go:184] no items to output this cycle
I0323 04:52:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 04:52:38.217666  543705 disk_info.go:125] begin check local disk info of client
I0323 04:52:38.220235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:52:38.220241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493300 0xc000493340]
E0323 04:52:43.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:52:43.410760  543705 memory.go:191] Add success.
I0323 04:52:43.409956  543705 cpu.go:282] Add success.
I0323 04:52:43.419734  543705 net.go:648] Add success.
I0323 04:52:43.422476  543705 net.go:770] primary dev: ETH0
I0323 04:52:43.422489  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:52:43.422500  543705 net.go:698] Add success.
I0323 04:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:52:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:52:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:52:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:52:53.409769  543705 memory.go:184] no items to output this cycle
I0323 04:52:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 04:53:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:53:03.409805  543705 memory.go:184] no items to output this cycle
I0323 04:53:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 04:53:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:53:13.409799  543705 memory.go:191] Add success.
I0323 04:53:13.409810  543705 cpu.go:282] Add success.
W0323 04:53:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:53:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:53:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:53:13.420091  543705 net.go:648] Add success.
I0323 04:53:13.422717  543705 net.go:770] primary dev: ETH0
I0323 04:53:13.422730  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:53:13.422741  543705 net.go:698] Add success.
I0323 04:53:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:53:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:53:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 04:53:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:53:14.456493  543705 disk_worker.go:494] system disk:vda1
I0323 04:53:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:53:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:53:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:53:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:53:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:53:16.472422  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:53:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:53:23.409780  543705 memory.go:184] no items to output this cycle
I0323 04:53:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 04:53:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:53:33.409801  543705 memory.go:184] no items to output this cycle
I0323 04:53:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 04:53:38.221676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:53:38.224234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:53:38.224240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d9e80 0xc0004d9ec0]
E0323 04:53:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:53:43.410539  543705 memory.go:191] Add success.
I0323 04:53:43.409827  543705 cpu.go:282] Add success.
I0323 04:53:43.420494  543705 net.go:648] Add success.
I0323 04:53:43.422975  543705 net.go:770] primary dev: ETH0
I0323 04:53:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:53:43.423000  543705 net.go:698] Add success.
I0323 04:53:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:53:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:53:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:53:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:53:53.409806  543705 memory.go:184] no items to output this cycle
I0323 04:53:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:54:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:54:03.409787  543705 cpu.go:275] no items to output this cycle
I0323 04:54:03.409791  543705 memory.go:184] no items to output this cycle
E0323 04:54:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:54:13.409811  543705 memory.go:191] Add success.
I0323 04:54:13.409820  543705 cpu.go:282] Add success.
W0323 04:54:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:54:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:54:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:54:13.420131  543705 net.go:648] Add success.
I0323 04:54:13.422881  543705 net.go:770] primary dev: ETH0
I0323 04:54:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:54:13.422909  543705 net.go:698] Add success.
I0323 04:54:13.498856  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84858663-d9fc-45ab-8237-d78627b00655","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:54:13.498890  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 04:54:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:54:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:54:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 04:54:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:54:14.456605  543705 disk_worker.go:494] system disk:vda1
I0323 04:54:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:54:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:54:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:54:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:54:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:54:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:54:23.409784  543705 memory.go:184] no items to output this cycle
I0323 04:54:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 04:54:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:54:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 04:54:33.409800  543705 memory.go:184] no items to output this cycle
I0323 04:54:38.225676  543705 disk_info.go:125] begin check local disk info of client
I0323 04:54:38.228316  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:54:38.228323  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4540 0xc0003d4580]
I0323 04:54:40.105197  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:54:40.105203  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:54:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:54:43.410701  543705 memory.go:191] Add success.
I0323 04:54:43.409785  543705 cpu.go:282] Add success.
I0323 04:54:43.420516  543705 net.go:648] Add success.
I0323 04:54:43.423156  543705 net.go:770] primary dev: ETH0
I0323 04:54:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:54:43.423180  543705 net.go:698] Add success.
I0323 04:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:54:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:54:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:54:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:54:53.409780  543705 cpu.go:275] no items to output this cycle
I0323 04:54:53.409788  543705 memory.go:184] no items to output this cycle
E0323 04:55:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:55:03.409802  543705 memory.go:184] no items to output this cycle
I0323 04:55:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 04:55:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:55:13.409813  543705 memory.go:191] Add success.
I0323 04:55:13.409828  543705 cpu.go:282] Add success.
W0323 04:55:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:55:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:55:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:55:13.420182  543705 net.go:648] Add success.
I0323 04:55:13.423311  543705 net.go:770] primary dev: ETH0
I0323 04:55:13.423326  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:55:13.423338  543705 net.go:698] Add success.
I0323 04:55:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:55:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:55:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 04:55:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:55:14.456497  543705 disk_worker.go:494] system disk:vda1
I0323 04:55:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:55:15.456001  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:55:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:55:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:55:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:55:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:55:23.409786  543705 memory.go:184] no items to output this cycle
I0323 04:55:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 04:55:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:55:33.409802  543705 memory.go:184] no items to output this cycle
I0323 04:55:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 04:55:38.229674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:55:38.232265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:55:38.232271  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7280 0xc0004a72c0]
E0323 04:55:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:55:43.410629  543705 memory.go:191] Add success.
I0323 04:55:43.409798  543705 cpu.go:282] Add success.
I0323 04:55:43.420313  543705 net.go:648] Add success.
I0323 04:55:43.423238  543705 net.go:770] primary dev: ETH0
I0323 04:55:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:55:43.423295  543705 net.go:698] Add success.
I0323 04:55:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:55:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:55:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:55:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:55:53.409779  543705 memory.go:184] no items to output this cycle
I0323 04:55:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:56:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:56:03.409813  543705 memory.go:184] no items to output this cycle
I0323 04:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 04:56:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:56:13.409815  543705 memory.go:191] Add success.
I0323 04:56:13.409827  543705 cpu.go:282] Add success.
W0323 04:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:56:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:56:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:56:13.420299  543705 net.go:648] Add success.
I0323 04:56:13.423177  543705 net.go:770] primary dev: ETH0
I0323 04:56:13.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:56:13.423202  543705 net.go:698] Add success.
I0323 04:56:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:56:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:56:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 04:56:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:56:14.456813  543705 disk_worker.go:494] system disk:vda1
I0323 04:56:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:56:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:56:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:56:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:56:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:56:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:56:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:56:23.409792  543705 memory.go:184] no items to output this cycle
I0323 04:56:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 04:56:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:56:33.409806  543705 memory.go:184] no items to output this cycle
I0323 04:56:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 04:56:38.233673  543705 disk_info.go:125] begin check local disk info of client
I0323 04:56:38.236303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:56:38.236309  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff140 0xc0003ff180]
E0323 04:56:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:56:43.410579  543705 memory.go:191] Add success.
I0323 04:56:43.409816  543705 cpu.go:282] Add success.
I0323 04:56:43.420322  543705 net.go:648] Add success.
I0323 04:56:43.423097  543705 net.go:770] primary dev: ETH0
I0323 04:56:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:56:43.423131  543705 net.go:698] Add success.
I0323 04:56:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:56:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:56:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:56:53.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:56:53.409856  543705 memory.go:184] no items to output this cycle
I0323 04:56:53.410013  543705 cpu.go:275] no items to output this cycle
E0323 04:57:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:57:03.409771  543705 memory.go:184] no items to output this cycle
I0323 04:57:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 04:57:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:57:13.409779  543705 memory.go:191] Add success.
W0323 04:57:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:57:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:57:13.409817  543705 cpu.go:282] Add success.
I0323 04:57:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:57:13.420149  543705 net.go:648] Add success.
I0323 04:57:13.423093  543705 net.go:770] primary dev: ETH0
I0323 04:57:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:57:13.423122  543705 net.go:698] Add success.
I0323 04:57:13.429270  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 04:57:13.453445  543705 event_worker.go:152] Polling the log file for events...
I0323 04:57:13.483208  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a503c532-512f-499d-9144-ba7d4bfe12f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 04:57:13.483240  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 04:57:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:57:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 04:57:14.455202  543705 disk_worker.go:728] disk inode is not compliant
E0323 04:57:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 04:57:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 04:57:14.455899  543705 custom_config.go:64] query custom config with name: gpu
I0323 04:57:14.456730  543705 disk_worker.go:494] system disk:vda1
I0323 04:57:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 04:57:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 04:57:15.456801  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:57:16.457930  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 04:57:16.457930  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 04:57:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:57:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:57:16.472325  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:57:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:57:23.409783  543705 cpu.go:275] no items to output this cycle
I0323 04:57:23.409785  543705 memory.go:184] no items to output this cycle
E0323 04:57:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:57:33.409779  543705 memory.go:184] no items to output this cycle
I0323 04:57:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 04:57:38.237674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:57:38.240297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:57:38.240303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003706c0 0xc000370700]
I0323 04:57:40.105728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 04:57:40.105734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 04:57:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:57:43.410563  543705 memory.go:191] Add success.
I0323 04:57:43.409803  543705 cpu.go:282] Add success.
I0323 04:57:43.420260  543705 net.go:648] Add success.
I0323 04:57:43.422886  543705 net.go:770] primary dev: ETH0
I0323 04:57:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:57:43.422912  543705 net.go:698] Add success.
I0323 04:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:57:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:57:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:57:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:57:53.409774  543705 memory.go:184] no items to output this cycle
I0323 04:57:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 04:58:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:58:03.409776  543705 memory.go:184] no items to output this cycle
I0323 04:58:03.409777  543705 cpu.go:275] no items to output this cycle
E0323 04:58:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:58:13.409811  543705 memory.go:191] Add success.
I0323 04:58:13.409817  543705 cpu.go:282] Add success.
W0323 04:58:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:58:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:58:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:58:13.420184  543705 net.go:648] Add success.
I0323 04:58:13.422696  543705 net.go:770] primary dev: ETH0
I0323 04:58:13.422709  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:58:13.422722  543705 net.go:698] Add success.
I0323 04:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:58:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:58:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0323 04:58:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:58:14.456530  543705 disk_worker.go:494] system disk:vda1
I0323 04:58:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:58:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:58:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:58:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:58:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:58:23.409785  543705 memory.go:184] no items to output this cycle
I0323 04:58:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 04:58:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:58:33.409807  543705 memory.go:184] no items to output this cycle
I0323 04:58:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 04:58:38.241674  543705 disk_info.go:125] begin check local disk info of client
I0323 04:58:38.244266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:58:38.244272  543705 disk_info.go:196] parse disk info done, disk is : [0xc000329e80 0xc000329ec0]
E0323 04:58:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:58:43.410645  543705 memory.go:191] Add success.
I0323 04:58:43.409799  543705 cpu.go:282] Add success.
I0323 04:58:43.420341  543705 net.go:648] Add success.
I0323 04:58:43.423666  543705 net.go:770] primary dev: ETH0
I0323 04:58:43.423678  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:58:43.423691  543705 net.go:698] Add success.
I0323 04:58:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:58:53.409924  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:58:53.409946  543705 cpu.go:275] no items to output this cycle
I0323 04:58:53.409967  543705 memory.go:184] no items to output this cycle
E0323 04:59:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:59:03.409781  543705 memory.go:184] no items to output this cycle
I0323 04:59:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 04:59:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:59:13.409803  543705 memory.go:191] Add success.
I0323 04:59:13.409802  543705 cpu.go:282] Add success.
W0323 04:59:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 04:59:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 04:59:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 04:59:13.420131  543705 net.go:648] Add success.
I0323 04:59:13.422812  543705 net.go:770] primary dev: ETH0
I0323 04:59:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:59:13.422837  543705 net.go:698] Add success.
I0323 04:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 04:59:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 04:59:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 04:59:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 04:59:14.456477  543705 disk_worker.go:494] system disk:vda1
I0323 04:59:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 04:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 04:59:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:59:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:59:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 04:59:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 04:59:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:59:23.409781  543705 memory.go:184] no items to output this cycle
I0323 04:59:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 04:59:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:59:33.409781  543705 memory.go:184] no items to output this cycle
I0323 04:59:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 04:59:38.245678  543705 disk_info.go:125] begin check local disk info of client
I0323 04:59:38.248244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 04:59:38.248250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3880 0xc0003d38c0]
E0323 04:59:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:59:43.410715  543705 memory.go:191] Add success.
I0323 04:59:43.409818  543705 cpu.go:282] Add success.
I0323 04:59:43.420438  543705 net.go:648] Add success.
I0323 04:59:43.423180  543705 net.go:770] primary dev: ETH0
I0323 04:59:43.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0323 04:59:43.423210  543705 net.go:698] Add success.
I0323 04:59:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 04:59:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 04:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 04:59:53.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 04:59:53.409891  543705 memory.go:184] no items to output this cycle
I0323 04:59:53.409892  543705 cpu.go:275] no items to output this cycle
E0323 05:00:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:00:03.409799  543705 memory.go:184] no items to output this cycle
I0323 05:00:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 05:00:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:00:13.409810  543705 memory.go:191] Add success.
I0323 05:00:13.409822  543705 cpu.go:282] Add success.
W0323 05:00:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:00:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:00:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:00:13.420648  543705 net.go:648] Add success.
I0323 05:00:13.423424  543705 net.go:770] primary dev: ETH0
I0323 05:00:13.423437  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:00:13.423448  543705 net.go:698] Add success.
I0323 05:00:13.467683  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"02a0484c-b9b2-4961-9df8-afe1d06bde0c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:00:13.467716  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:00:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:00:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:00:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 05:00:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:00:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 05:00:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:00:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:00:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:00:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:00:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:00:16.472504  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:00:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:00:23.409791  543705 cpu.go:275] no items to output this cycle
I0323 05:00:23.409797  543705 memory.go:184] no items to output this cycle
E0323 05:00:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:00:33.409796  543705 memory.go:184] no items to output this cycle
I0323 05:00:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 05:00:38.249674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:00:38.252242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:00:38.252248  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a6c0 0xc00034a700]
I0323 05:00:40.109226  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:00:40.109231  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:00:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:00:43.410691  543705 memory.go:191] Add success.
I0323 05:00:43.409802  543705 cpu.go:282] Add success.
I0323 05:00:43.420390  543705 net.go:648] Add success.
I0323 05:00:43.423263  543705 net.go:770] primary dev: ETH0
I0323 05:00:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:00:43.423291  543705 net.go:698] Add success.
I0323 05:00:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:00:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:00:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:00:53.409890  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:00:53.409910  543705 memory.go:184] no items to output this cycle
I0323 05:00:53.409939  543705 cpu.go:275] no items to output this cycle
E0323 05:01:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:01:03.409784  543705 cpu.go:275] no items to output this cycle
I0323 05:01:03.409798  543705 memory.go:184] no items to output this cycle
E0323 05:01:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:01:13.409810  543705 memory.go:191] Add success.
I0323 05:01:13.409820  543705 cpu.go:282] Add success.
W0323 05:01:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:01:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:01:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:01:13.420125  543705 net.go:648] Add success.
I0323 05:01:13.422754  543705 net.go:770] primary dev: ETH0
I0323 05:01:13.422767  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:01:13.422779  543705 net.go:698] Add success.
I0323 05:01:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:01:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:01:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 05:01:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:01:14.456522  543705 disk_worker.go:494] system disk:vda1
I0323 05:01:14.456569  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:01:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:01:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:01:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:01:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:01:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:01:23.409808  543705 memory.go:184] no items to output this cycle
I0323 05:01:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 05:01:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:01:33.409783  543705 memory.go:184] no items to output this cycle
I0323 05:01:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 05:01:38.253682  543705 disk_info.go:125] begin check local disk info of client
I0323 05:01:38.256229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:01:38.256235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d1f00 0xc0003d1f40]
E0323 05:01:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:01:43.410695  543705 memory.go:191] Add success.
I0323 05:01:43.409839  543705 cpu.go:282] Add success.
I0323 05:01:43.420457  543705 net.go:648] Add success.
I0323 05:01:43.422898  543705 net.go:770] primary dev: ETH0
I0323 05:01:43.422913  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:01:43.422928  543705 net.go:698] Add success.
I0323 05:01:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:01:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:01:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:01:53.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:01:53.410260  543705 memory.go:184] no items to output this cycle
I0323 05:01:53.410273  543705 cpu.go:275] no items to output this cycle
E0323 05:02:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:02:03.409787  543705 memory.go:184] no items to output this cycle
I0323 05:02:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 05:02:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:02:13.409787  543705 memory.go:191] Add success.
W0323 05:02:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:02:13.409814  543705 cpu.go:282] Add success.
W0323 05:02:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:02:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:02:13.420153  543705 net.go:648] Add success.
I0323 05:02:13.423746  543705 net.go:770] primary dev: ETH0
I0323 05:02:13.423758  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:02:13.423770  543705 net.go:698] Add success.
W0323 05:02:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:02:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 05:02:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:02:14.455910  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:02:14.455918  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:02:14.455924  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:02:14.456585  543705 disk_worker.go:494] system disk:vda1
I0323 05:02:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:02:15.456936  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:02:15.456949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:02:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:02:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:02:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:02:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:02:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:02:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:02:23.409788  543705 memory.go:184] no items to output this cycle
I0323 05:02:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 05:02:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:02:33.409784  543705 memory.go:184] no items to output this cycle
I0323 05:02:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 05:02:38.257674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:02:38.260251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:02:38.260257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036ae00 0xc00036ae40]
E0323 05:02:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:02:43.410753  543705 memory.go:191] Add success.
I0323 05:02:43.409826  543705 cpu.go:282] Add success.
I0323 05:02:43.420450  543705 net.go:648] Add success.
I0323 05:02:43.423205  543705 net.go:770] primary dev: ETH0
I0323 05:02:43.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:02:43.423231  543705 net.go:698] Add success.
I0323 05:02:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:02:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:02:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:02:53.409912  543705 cpu.go:275] no items to output this cycle
E0323 05:02:53.410006  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:02:53.410023  543705 memory.go:184] no items to output this cycle
E0323 05:03:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:03:03.409785  543705 memory.go:184] no items to output this cycle
I0323 05:03:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 05:03:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:03:13.409801  543705 memory.go:191] Add success.
I0323 05:03:13.409807  543705 cpu.go:282] Add success.
W0323 05:03:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:03:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:03:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:03:13.420313  543705 net.go:648] Add success.
I0323 05:03:13.423786  543705 net.go:770] primary dev: ETH0
I0323 05:03:13.423799  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:03:13.423811  543705 net.go:698] Add success.
I0323 05:03:13.476647  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f94df50-761a-4e77-ba24-8051171c4de9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:03:13.476683  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:03:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:03:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:03:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 05:03:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:03:14.456819  543705 disk_worker.go:494] system disk:vda1
I0323 05:03:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:03:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:03:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:03:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:03:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:03:23.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:03:23.409836  543705 memory.go:184] no items to output this cycle
I0323 05:03:23.409855  543705 cpu.go:275] no items to output this cycle
E0323 05:03:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:03:33.409798  543705 memory.go:184] no items to output this cycle
I0323 05:03:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 05:03:38.261675  543705 disk_info.go:125] begin check local disk info of client
I0323 05:03:38.264314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:03:38.264321  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305580 0xc0003055c0]
I0323 05:03:40.109729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:03:40.109736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:03:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:03:43.410724  543705 memory.go:191] Add success.
I0323 05:03:43.409835  543705 cpu.go:282] Add success.
I0323 05:03:43.420427  543705 net.go:648] Add success.
I0323 05:03:43.423343  543705 net.go:770] primary dev: ETH0
I0323 05:03:43.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:03:43.423368  543705 net.go:698] Add success.
I0323 05:03:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:03:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:03:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:03:53.410722  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:03:53.410744  543705 memory.go:184] no items to output this cycle
I0323 05:03:53.410754  543705 cpu.go:275] no items to output this cycle
E0323 05:04:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:04:03.409789  543705 memory.go:184] no items to output this cycle
I0323 05:04:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 05:04:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:04:13.409803  543705 memory.go:191] Add success.
I0323 05:04:13.409804  543705 cpu.go:282] Add success.
W0323 05:04:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:04:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:04:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:04:13.420207  543705 net.go:648] Add success.
I0323 05:04:13.422934  543705 net.go:770] primary dev: ETH0
I0323 05:04:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:04:13.422964  543705 net.go:698] Add success.
I0323 05:04:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:04:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:04:14.455156  543705 disk_worker.go:708] disk space is not compliant
W0323 05:04:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:04:14.456496  543705 disk_worker.go:494] system disk:vda1
I0323 05:04:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:04:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:04:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:04:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:04:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:04:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:04:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:04:23.409815  543705 memory.go:184] no items to output this cycle
I0323 05:04:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 05:04:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:04:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 05:04:33.409800  543705 memory.go:184] no items to output this cycle
I0323 05:04:38.265684  543705 disk_info.go:125] begin check local disk info of client
I0323 05:04:38.268292  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:04:38.268299  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004727c0 0xc000472800]
E0323 05:04:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:04:43.410578  543705 memory.go:191] Add success.
I0323 05:04:43.409802  543705 cpu.go:282] Add success.
I0323 05:04:43.420296  543705 net.go:648] Add success.
I0323 05:04:43.422827  543705 net.go:770] primary dev: ETH0
I0323 05:04:43.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:04:43.422852  543705 net.go:698] Add success.
I0323 05:04:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:04:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:04:53.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:04:53.409877  543705 memory.go:184] no items to output this cycle
I0323 05:04:53.409952  543705 cpu.go:275] no items to output this cycle
E0323 05:05:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:05:03.409793  543705 memory.go:184] no items to output this cycle
I0323 05:05:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 05:05:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:05:13.409788  543705 memory.go:191] Add success.
I0323 05:05:13.409806  543705 cpu.go:282] Add success.
W0323 05:05:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:05:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:05:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:05:13.420168  543705 net.go:648] Add success.
I0323 05:05:13.422832  543705 net.go:770] primary dev: ETH0
I0323 05:05:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:05:13.422868  543705 net.go:698] Add success.
I0323 05:05:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:05:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:05:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 05:05:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:05:14.456551  543705 disk_worker.go:494] system disk:vda1
I0323 05:05:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:05:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:05:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:05:16.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:05:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:05:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:05:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:05:23.409817  543705 memory.go:184] no items to output this cycle
I0323 05:05:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 05:05:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:05:33.409784  543705 memory.go:184] no items to output this cycle
I0323 05:05:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 05:05:38.269677  543705 disk_info.go:125] begin check local disk info of client
I0323 05:05:38.272239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:05:38.272245  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312e00 0xc000312e40]
E0323 05:05:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:05:43.410706  543705 memory.go:191] Add success.
I0323 05:05:43.409808  543705 cpu.go:282] Add success.
I0323 05:05:43.420475  543705 net.go:648] Add success.
I0323 05:05:43.423317  543705 net.go:770] primary dev: ETH0
I0323 05:05:43.423330  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:05:43.423342  543705 net.go:698] Add success.
I0323 05:05:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:05:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:05:53.409802  543705 memory.go:184] no items to output this cycle
I0323 05:05:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 05:06:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:06:03.409778  543705 memory.go:184] no items to output this cycle
I0323 05:06:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 05:06:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:06:13.409821  543705 memory.go:191] Add success.
I0323 05:06:13.409827  543705 cpu.go:282] Add success.
W0323 05:06:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:06:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:06:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:06:13.420165  543705 net.go:648] Add success.
I0323 05:06:13.423233  543705 net.go:770] primary dev: ETH0
I0323 05:06:13.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:06:13.423261  543705 net.go:698] Add success.
I0323 05:06:13.467498  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2316469a-855f-488f-acba-0394db833c38","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:06:13.467531  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:06:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:06:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:06:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 05:06:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:06:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 05:06:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:06:15.455617  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:06:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:06:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:06:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:06:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:06:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:06:23.409777  543705 memory.go:184] no items to output this cycle
I0323 05:06:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 05:06:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:06:33.409807  543705 memory.go:184] no items to output this cycle
I0323 05:06:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 05:06:38.273682  543705 disk_info.go:125] begin check local disk info of client
I0323 05:06:38.276265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:06:38.276273  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6a40 0xc0004a6a80]
I0323 05:06:40.113250  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:06:40.113257  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:06:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:06:43.410727  543705 memory.go:191] Add success.
I0323 05:06:43.409819  543705 cpu.go:282] Add success.
I0323 05:06:43.419715  543705 net.go:648] Add success.
I0323 05:06:43.422550  543705 net.go:770] primary dev: ETH0
I0323 05:06:43.422563  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:06:43.422575  543705 net.go:698] Add success.
I0323 05:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:06:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:06:53.409783  543705 memory.go:184] no items to output this cycle
I0323 05:06:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 05:07:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:07:03.409792  543705 memory.go:184] no items to output this cycle
I0323 05:07:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 05:07:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:07:13.409781  543705 memory.go:191] Add success.
W0323 05:07:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:07:13.409816  543705 cpu.go:282] Add success.
W0323 05:07:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:07:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:07:13.420062  543705 net.go:648] Add success.
I0323 05:07:13.422791  543705 net.go:770] primary dev: ETH0
I0323 05:07:13.422804  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:07:13.422816  543705 net.go:698] Add success.
I0323 05:07:13.453390  543705 event_worker.go:152] Polling the log file for events...
W0323 05:07:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:07:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 05:07:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:07:14.456930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:07:14.456939  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:07:14.456959  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:07:14.457033  543705 disk_worker.go:494] system disk:vda1
I0323 05:07:14.457065  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:07:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:07:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:07:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:07:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:07:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:07:16.458014  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:07:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:07:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:07:23.409775  543705 memory.go:184] no items to output this cycle
I0323 05:07:23.409826  543705 cpu.go:275] no items to output this cycle
E0323 05:07:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:07:33.409811  543705 memory.go:184] no items to output this cycle
I0323 05:07:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 05:07:38.277677  543705 disk_info.go:125] begin check local disk info of client
I0323 05:07:38.280308  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:07:38.280314  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa3c0]
E0323 05:07:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:07:43.410698  543705 memory.go:191] Add success.
I0323 05:07:43.409840  543705 cpu.go:282] Add success.
I0323 05:07:43.420381  543705 net.go:648] Add success.
I0323 05:07:43.423254  543705 net.go:770] primary dev: ETH0
I0323 05:07:43.423267  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:07:43.423279  543705 net.go:698] Add success.
I0323 05:07:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:07:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:07:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:07:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:07:53.409788  543705 memory.go:184] no items to output this cycle
I0323 05:07:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 05:08:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:08:03.409800  543705 memory.go:184] no items to output this cycle
I0323 05:08:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 05:08:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:08:13.409791  543705 memory.go:191] Add success.
I0323 05:08:13.409791  543705 cpu.go:282] Add success.
W0323 05:08:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:08:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:08:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:08:13.420225  543705 net.go:648] Add success.
I0323 05:08:13.423276  543705 net.go:770] primary dev: ETH0
I0323 05:08:13.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:08:13.423305  543705 net.go:698] Add success.
I0323 05:08:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:08:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:08:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 05:08:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:08:14.456518  543705 disk_worker.go:494] system disk:vda1
I0323 05:08:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:08:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:08:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:08:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:08:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:08:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:08:23.409790  543705 cpu.go:275] no items to output this cycle
I0323 05:08:23.409796  543705 memory.go:184] no items to output this cycle
E0323 05:08:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:08:33.409801  543705 memory.go:184] no items to output this cycle
I0323 05:08:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 05:08:38.281667  543705 disk_info.go:125] begin check local disk info of client
I0323 05:08:38.284296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:08:38.284306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cda80 0xc0004cdac0]
E0323 05:08:43.409872  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:08:43.410779  543705 memory.go:191] Add success.
I0323 05:08:43.409950  543705 cpu.go:282] Add success.
I0323 05:08:43.419730  543705 net.go:648] Add success.
I0323 05:08:43.422503  543705 net.go:770] primary dev: ETH0
I0323 05:08:43.422517  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:08:43.422528  543705 net.go:698] Add success.
I0323 05:08:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:08:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:08:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:08:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:08:53.409804  543705 memory.go:184] no items to output this cycle
I0323 05:08:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 05:09:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:09:03.409784  543705 memory.go:184] no items to output this cycle
I0323 05:09:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 05:09:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:09:13.409785  543705 memory.go:191] Add success.
W0323 05:09:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:09:13.409812  543705 cpu.go:282] Add success.
W0323 05:09:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:09:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:09:13.420145  543705 net.go:648] Add success.
I0323 05:09:13.422706  543705 net.go:770] primary dev: ETH0
I0323 05:09:13.422719  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:09:13.422731  543705 net.go:698] Add success.
I0323 05:09:13.464067  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"60a1d286-aaf5-4555-8f93-4be99d45320a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:09:13.464102  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:09:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:09:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:09:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 05:09:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:09:14.456604  543705 disk_worker.go:494] system disk:vda1
I0323 05:09:14.456635  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:09:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:09:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:09:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:09:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:09:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:09:23.409787  543705 memory.go:184] no items to output this cycle
I0323 05:09:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 05:09:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:09:33.409800  543705 memory.go:184] no items to output this cycle
I0323 05:09:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 05:09:38.285679  543705 disk_info.go:125] begin check local disk info of client
I0323 05:09:38.288262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:09:38.288268  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266bc0 0xc000266c00]
I0323 05:09:40.113725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:09:40.113732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:09:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:09:43.410569  543705 memory.go:191] Add success.
I0323 05:09:43.409822  543705 cpu.go:282] Add success.
I0323 05:09:43.420259  543705 net.go:648] Add success.
I0323 05:09:43.422783  543705 net.go:770] primary dev: ETH0
I0323 05:09:43.422796  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:09:43.422809  543705 net.go:698] Add success.
I0323 05:09:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:09:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:09:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:09:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:09:53.409784  543705 memory.go:184] no items to output this cycle
I0323 05:09:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 05:10:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:10:03.409781  543705 memory.go:184] no items to output this cycle
I0323 05:10:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 05:10:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:10:13.409814  543705 memory.go:191] Add success.
I0323 05:10:13.409822  543705 cpu.go:282] Add success.
W0323 05:10:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:10:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:10:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:10:13.420155  543705 net.go:648] Add success.
I0323 05:10:13.422578  543705 net.go:770] primary dev: ETH0
I0323 05:10:13.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:10:13.422605  543705 net.go:698] Add success.
I0323 05:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:10:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:10:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 05:10:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:10:14.456527  543705 disk_worker.go:494] system disk:vda1
I0323 05:10:14.456573  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:10:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:10:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:10:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:10:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:10:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:10:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:10:23.409786  543705 memory.go:184] no items to output this cycle
I0323 05:10:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 05:10:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:10:33.409776  543705 memory.go:184] no items to output this cycle
I0323 05:10:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 05:10:38.289677  543705 disk_info.go:125] begin check local disk info of client
I0323 05:10:38.292289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:10:38.292296  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaec0 0xc0001aaf00]
E0323 05:10:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:10:43.410724  543705 memory.go:191] Add success.
I0323 05:10:43.409797  543705 cpu.go:282] Add success.
I0323 05:10:43.420455  543705 net.go:648] Add success.
I0323 05:10:43.423346  543705 net.go:770] primary dev: ETH0
I0323 05:10:43.423360  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:10:43.423373  543705 net.go:698] Add success.
I0323 05:10:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:10:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:10:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:10:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:10:53.409772  543705 memory.go:184] no items to output this cycle
I0323 05:10:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 05:11:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:11:03.409777  543705 memory.go:184] no items to output this cycle
I0323 05:11:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:11:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:11:13.409789  543705 memory.go:191] Add success.
I0323 05:11:13.409812  543705 cpu.go:282] Add success.
W0323 05:11:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:11:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:11:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:11:13.420269  543705 net.go:648] Add success.
I0323 05:11:13.422912  543705 net.go:770] primary dev: ETH0
I0323 05:11:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:11:13.422942  543705 net.go:698] Add success.
I0323 05:11:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:11:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:11:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 05:11:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:11:14.456538  543705 disk_worker.go:494] system disk:vda1
I0323 05:11:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:11:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:11:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:11:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:11:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:11:23.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:11:23.409870  543705 memory.go:184] no items to output this cycle
I0323 05:11:23.409929  543705 cpu.go:275] no items to output this cycle
E0323 05:11:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:11:33.409790  543705 memory.go:184] no items to output this cycle
I0323 05:11:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 05:11:38.293676  543705 disk_info.go:125] begin check local disk info of client
I0323 05:11:38.296268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:11:38.296274  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296500 0xc000296540]
E0323 05:11:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:11:43.410626  543705 memory.go:191] Add success.
I0323 05:11:43.409824  543705 cpu.go:282] Add success.
I0323 05:11:43.420411  543705 net.go:648] Add success.
I0323 05:11:43.422972  543705 net.go:770] primary dev: ETH0
I0323 05:11:43.422984  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:11:43.422997  543705 net.go:698] Add success.
I0323 05:11:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:11:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:11:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:11:53.409775  543705 memory.go:184] no items to output this cycle
I0323 05:11:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 05:12:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:12:03.409799  543705 memory.go:184] no items to output this cycle
I0323 05:12:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 05:12:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:12:13.409790  543705 memory.go:191] Add success.
I0323 05:12:13.409813  543705 cpu.go:282] Add success.
W0323 05:12:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:12:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:12:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:12:13.420177  543705 net.go:648] Add success.
I0323 05:12:13.422890  543705 net.go:770] primary dev: ETH0
I0323 05:12:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:12:13.422915  543705 net.go:698] Add success.
I0323 05:12:13.469022  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1f5924d9-2fbf-4e3c-960f-a4f588894fd0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:12:13.469056  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 05:12:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:12:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 05:12:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:12:14.456891  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:12:14.456901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:12:14.456906  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:12:14.456910  543705 disk_worker.go:494] system disk:vda1
I0323 05:12:14.456951  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:12:15.456865  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:12:15.456874  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 05:12:16.457949  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:12:16.457950  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:12:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:12:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:12:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:12:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:12:23.409823  543705 memory.go:184] no items to output this cycle
I0323 05:12:23.409825  543705 cpu.go:275] no items to output this cycle
E0323 05:12:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:12:33.409781  543705 memory.go:184] no items to output this cycle
I0323 05:12:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 05:12:38.297674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:12:38.300285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:12:38.300291  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262300 0xc000262340]
I0323 05:12:40.117285  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:12:40.117291  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:12:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:12:43.410712  543705 memory.go:191] Add success.
I0323 05:12:43.409822  543705 cpu.go:282] Add success.
I0323 05:12:43.420458  543705 net.go:648] Add success.
I0323 05:12:43.423159  543705 net.go:770] primary dev: ETH0
I0323 05:12:43.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:12:43.423187  543705 net.go:698] Add success.
I0323 05:12:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:12:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:12:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:12:53.410401  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:12:53.410417  543705 memory.go:184] no items to output this cycle
I0323 05:12:53.410439  543705 cpu.go:275] no items to output this cycle
E0323 05:13:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:13:03.409774  543705 memory.go:184] no items to output this cycle
I0323 05:13:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 05:13:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:13:13.409795  543705 memory.go:191] Add success.
I0323 05:13:13.409802  543705 cpu.go:282] Add success.
W0323 05:13:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:13:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:13:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:13:13.420083  543705 net.go:648] Add success.
I0323 05:13:13.423035  543705 net.go:770] primary dev: ETH0
I0323 05:13:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:13:13.423065  543705 net.go:698] Add success.
I0323 05:13:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:13:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:13:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 05:13:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:13:14.456604  543705 disk_worker.go:494] system disk:vda1
I0323 05:13:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:13:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:13:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:13:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:13:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:13:23.410408  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:13:23.410428  543705 memory.go:184] no items to output this cycle
I0323 05:13:23.410437  543705 cpu.go:275] no items to output this cycle
E0323 05:13:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:13:33.409802  543705 memory.go:184] no items to output this cycle
I0323 05:13:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 05:13:38.301674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:13:38.304275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:13:38.304283  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003044c0 0xc000304500]
E0323 05:13:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:13:43.410597  543705 memory.go:191] Add success.
I0323 05:13:43.409806  543705 cpu.go:282] Add success.
I0323 05:13:43.420324  543705 net.go:648] Add success.
I0323 05:13:43.422741  543705 net.go:770] primary dev: ETH0
I0323 05:13:43.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:13:43.422770  543705 net.go:698] Add success.
I0323 05:13:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:13:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:13:53.409794  543705 memory.go:184] no items to output this cycle
I0323 05:13:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 05:14:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:14:03.409794  543705 memory.go:184] no items to output this cycle
I0323 05:14:03.409840  543705 cpu.go:275] no items to output this cycle
E0323 05:14:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:14:13.409796  543705 cpu.go:282] Add success.
I0323 05:14:13.409798  543705 memory.go:191] Add success.
W0323 05:14:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:14:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:14:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:14:13.419785  543705 net.go:648] Add success.
I0323 05:14:13.422620  543705 net.go:770] primary dev: ETH0
I0323 05:14:13.422634  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:14:13.422647  543705 net.go:698] Add success.
I0323 05:14:14.453975  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:14:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:14:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0323 05:14:14.455257  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:14:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 05:14:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:14:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:14:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:14:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:14:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:14:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:14:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:14:23.409787  543705 memory.go:184] no items to output this cycle
I0323 05:14:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 05:14:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:14:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 05:14:33.409783  543705 memory.go:184] no items to output this cycle
I0323 05:14:38.305676  543705 disk_info.go:125] begin check local disk info of client
I0323 05:14:38.308227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:14:38.308233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3340 0xc0003b3380]
E0323 05:14:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:14:43.410669  543705 memory.go:191] Add success.
I0323 05:14:43.409787  543705 cpu.go:282] Add success.
I0323 05:14:43.420476  543705 net.go:648] Add success.
I0323 05:14:43.423098  543705 net.go:770] primary dev: ETH0
I0323 05:14:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:14:43.423124  543705 net.go:698] Add success.
I0323 05:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:14:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:14:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:14:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:14:53.409806  543705 memory.go:184] no items to output this cycle
I0323 05:14:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 05:15:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:15:03.409896  543705 memory.go:184] no items to output this cycle
I0323 05:15:03.409934  543705 cpu.go:275] no items to output this cycle
E0323 05:15:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:15:13.409792  543705 memory.go:191] Add success.
I0323 05:15:13.409802  543705 cpu.go:282] Add success.
W0323 05:15:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:15:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:15:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:15:13.420135  543705 net.go:648] Add success.
I0323 05:15:13.422953  543705 net.go:770] primary dev: ETH0
I0323 05:15:13.422968  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:15:13.422982  543705 net.go:698] Add success.
I0323 05:15:13.464108  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c8026ae-1401-4d97-87e8-ff5c7294bca4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:15:13.464141  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:15:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:15:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:15:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 05:15:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:15:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 05:15:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:15:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:15:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:15:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:15:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:15:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:15:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:15:23.409801  543705 memory.go:184] no items to output this cycle
I0323 05:15:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 05:15:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:15:33.409806  543705 memory.go:184] no items to output this cycle
I0323 05:15:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 05:15:38.309672  543705 disk_info.go:125] begin check local disk info of client
I0323 05:15:38.312274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:15:38.312280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003aaa00 0xc0003aaa40]
I0323 05:15:40.117729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:15:40.117735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:15:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:15:43.410672  543705 memory.go:191] Add success.
I0323 05:15:43.409797  543705 cpu.go:282] Add success.
I0323 05:15:43.420382  543705 net.go:648] Add success.
I0323 05:15:43.422911  543705 net.go:770] primary dev: ETH0
I0323 05:15:43.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:15:43.422942  543705 net.go:698] Add success.
I0323 05:15:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:15:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:15:53.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:15:53.409929  543705 memory.go:184] no items to output this cycle
I0323 05:15:53.409930  543705 cpu.go:275] no items to output this cycle
E0323 05:16:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:16:03.409788  543705 cpu.go:275] no items to output this cycle
I0323 05:16:03.409798  543705 memory.go:184] no items to output this cycle
E0323 05:16:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:16:13.409796  543705 memory.go:191] Add success.
I0323 05:16:13.409801  543705 cpu.go:282] Add success.
W0323 05:16:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:16:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:16:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:16:13.420050  543705 net.go:648] Add success.
I0323 05:16:13.422933  543705 net.go:770] primary dev: ETH0
I0323 05:16:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:16:13.422959  543705 net.go:698] Add success.
I0323 05:16:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:16:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:16:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 05:16:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:16:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 05:16:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:16:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:16:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:16:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:16:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:16:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:16:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:16:23.409809  543705 memory.go:184] no items to output this cycle
I0323 05:16:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 05:16:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:16:33.409772  543705 memory.go:184] no items to output this cycle
I0323 05:16:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 05:16:38.313675  543705 disk_info.go:125] begin check local disk info of client
I0323 05:16:38.316250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:16:38.316256  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034ebc0 0xc00034ec00]
E0323 05:16:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:16:43.410829  543705 memory.go:191] Add success.
I0323 05:16:43.409813  543705 cpu.go:282] Add success.
I0323 05:16:43.420695  543705 net.go:648] Add success.
I0323 05:16:43.423365  543705 net.go:770] primary dev: ETH0
I0323 05:16:43.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:16:43.423390  543705 net.go:698] Add success.
I0323 05:16:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:16:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:16:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:16:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:16:53.409766  543705 memory.go:184] no items to output this cycle
I0323 05:16:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 05:17:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:17:03.409785  543705 memory.go:184] no items to output this cycle
I0323 05:17:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:17:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:17:13.409810  543705 memory.go:191] Add success.
I0323 05:17:13.409819  543705 cpu.go:282] Add success.
W0323 05:17:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:17:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:17:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:17:13.420144  543705 net.go:648] Add success.
I0323 05:17:13.422798  543705 net.go:770] primary dev: ETH0
I0323 05:17:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:17:13.422822  543705 net.go:698] Add success.
I0323 05:17:13.453346  543705 event_worker.go:152] Polling the log file for events...
W0323 05:17:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:17:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 05:17:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:17:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:17:14.455915  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:17:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:17:14.456559  543705 disk_worker.go:494] system disk:vda1
I0323 05:17:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:17:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:17:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:17:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:17:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:17:16.458021  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:17:16.458038  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:17:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:17:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:17:23.409807  543705 memory.go:184] no items to output this cycle
I0323 05:17:23.409817  543705 cpu.go:275] no items to output this cycle
E0323 05:17:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:17:33.409779  543705 memory.go:184] no items to output this cycle
I0323 05:17:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 05:17:38.317695  543705 disk_info.go:125] begin check local disk info of client
I0323 05:17:38.320328  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:17:38.320335  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ca940 0xc0004ca980]
E0323 05:17:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:17:43.410551  543705 memory.go:191] Add success.
I0323 05:17:43.409801  543705 cpu.go:282] Add success.
I0323 05:17:43.420258  543705 net.go:648] Add success.
I0323 05:17:43.422824  543705 net.go:770] primary dev: ETH0
I0323 05:17:43.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:17:43.422849  543705 net.go:698] Add success.
I0323 05:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:17:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:17:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:17:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:17:53.409776  543705 cpu.go:275] no items to output this cycle
I0323 05:17:53.409780  543705 memory.go:184] no items to output this cycle
E0323 05:18:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:18:03.409782  543705 memory.go:184] no items to output this cycle
I0323 05:18:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 05:18:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:18:13.409788  543705 memory.go:191] Add success.
I0323 05:18:13.409810  543705 cpu.go:282] Add success.
W0323 05:18:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:18:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:18:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:18:13.420152  543705 net.go:648] Add success.
I0323 05:18:13.422753  543705 net.go:770] primary dev: ETH0
I0323 05:18:13.422765  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:18:13.422778  543705 net.go:698] Add success.
I0323 05:18:13.623319  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8aeeaa90-9ead-4eaa-8d87-8434563f5c66","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:18:13.623353  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:18:14.453975  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:18:14.455262  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:18:14.455274  543705 disk_worker.go:708] disk space is not compliant
W0323 05:18:14.455277  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:18:14.456813  543705 disk_worker.go:494] system disk:vda1
I0323 05:18:14.456852  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:18:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:18:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:18:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:18:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:18:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:18:23.409781  543705 memory.go:184] no items to output this cycle
I0323 05:18:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 05:18:33.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:18:33.409887  543705 cpu.go:275] no items to output this cycle
I0323 05:18:33.409898  543705 memory.go:184] no items to output this cycle
I0323 05:18:38.321674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:18:38.324295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:18:38.324301  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048aec0 0xc00048af00]
I0323 05:18:40.121309  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:18:40.121315  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:18:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:18:43.410535  543705 memory.go:191] Add success.
I0323 05:18:43.409794  543705 cpu.go:282] Add success.
I0323 05:18:43.420245  543705 net.go:648] Add success.
I0323 05:18:43.422973  543705 net.go:770] primary dev: ETH0
I0323 05:18:43.422987  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:18:43.423001  543705 net.go:698] Add success.
I0323 05:18:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:18:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:18:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:18:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:18:53.409781  543705 cpu.go:275] no items to output this cycle
I0323 05:18:53.409788  543705 memory.go:184] no items to output this cycle
E0323 05:19:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:19:03.409775  543705 memory.go:184] no items to output this cycle
I0323 05:19:03.409799  543705 cpu.go:275] no items to output this cycle
W0323 05:19:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:19:13.409733  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:19:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 05:19:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:19:13.409829  543705 cpu.go:282] Add success.
I0323 05:19:13.409850  543705 memory.go:191] Add success.
I0323 05:19:13.420274  543705 net.go:648] Add success.
I0323 05:19:13.423043  543705 net.go:770] primary dev: ETH0
I0323 05:19:13.423056  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:19:13.423067  543705 net.go:698] Add success.
I0323 05:19:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:19:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:19:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 05:19:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:19:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 05:19:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:19:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:19:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:19:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:19:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:19:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:19:23.409776  543705 memory.go:184] no items to output this cycle
I0323 05:19:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 05:19:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:19:33.409787  543705 memory.go:184] no items to output this cycle
I0323 05:19:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 05:19:38.325681  543705 disk_info.go:125] begin check local disk info of client
I0323 05:19:38.328293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:19:38.328300  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487b40 0xc000487b80]
E0323 05:19:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:19:43.410599  543705 memory.go:191] Add success.
I0323 05:19:43.409806  543705 cpu.go:282] Add success.
I0323 05:19:43.420376  543705 net.go:648] Add success.
I0323 05:19:43.422784  543705 net.go:770] primary dev: ETH0
I0323 05:19:43.422798  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:19:43.422810  543705 net.go:698] Add success.
I0323 05:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:19:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:19:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:19:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:19:53.409768  543705 memory.go:184] no items to output this cycle
I0323 05:19:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 05:20:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:20:03.409822  543705 memory.go:184] no items to output this cycle
I0323 05:20:03.409824  543705 cpu.go:275] no items to output this cycle
E0323 05:20:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:20:13.409785  543705 memory.go:191] Add success.
I0323 05:20:13.409807  543705 cpu.go:282] Add success.
W0323 05:20:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:20:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:20:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:20:13.420315  543705 net.go:648] Add success.
I0323 05:20:13.423227  543705 net.go:770] primary dev: ETH0
I0323 05:20:13.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:20:13.423252  543705 net.go:698] Add success.
I0323 05:20:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:20:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:20:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 05:20:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:20:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 05:20:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:20:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:20:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:20:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:20:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:20:16.472393  543705 disk_local_worker.go:436] Get disk info: []
I0323 05:20:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 05:20:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:20:23.409814  543705 memory.go:184] no items to output this cycle
E0323 05:20:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:20:33.409809  543705 memory.go:184] no items to output this cycle
I0323 05:20:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 05:20:38.329671  543705 disk_info.go:125] begin check local disk info of client
I0323 05:20:38.332296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:20:38.332303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8340 0xc0003e8380]
E0323 05:20:43.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:20:43.410847  543705 memory.go:191] Add success.
I0323 05:20:43.409969  543705 cpu.go:282] Add success.
I0323 05:20:43.419715  543705 net.go:648] Add success.
I0323 05:20:43.422678  543705 net.go:770] primary dev: ETH0
I0323 05:20:43.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:20:43.422706  543705 net.go:698] Add success.
I0323 05:20:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:20:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:20:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:20:53.409774  543705 memory.go:184] no items to output this cycle
I0323 05:20:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 05:21:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:21:03.409780  543705 memory.go:184] no items to output this cycle
I0323 05:21:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 05:21:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:21:13.409784  543705 memory.go:191] Add success.
I0323 05:21:13.409805  543705 cpu.go:282] Add success.
W0323 05:21:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:21:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:21:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:21:13.420383  543705 net.go:648] Add success.
I0323 05:21:13.423175  543705 net.go:770] primary dev: ETH0
I0323 05:21:13.423190  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:21:13.423203  543705 net.go:698] Add success.
I0323 05:21:13.468666  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4a13c535-8bd9-4f5c-99af-2656a20621d0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:21:13.468701  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:21:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:21:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:21:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 05:21:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:21:14.456513  543705 disk_worker.go:494] system disk:vda1
I0323 05:21:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:21:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:21:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:21:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:21:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:21:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:21:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:21:23.409777  543705 memory.go:184] no items to output this cycle
I0323 05:21:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 05:21:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:21:33.409772  543705 memory.go:184] no items to output this cycle
I0323 05:21:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 05:21:38.333674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:21:38.336210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:21:38.336216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003776c0 0xc000377700]
I0323 05:21:40.121736  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:21:40.121745  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:21:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:21:43.410727  543705 memory.go:191] Add success.
I0323 05:21:43.409811  543705 cpu.go:282] Add success.
I0323 05:21:43.420633  543705 net.go:648] Add success.
I0323 05:21:43.423381  543705 net.go:770] primary dev: ETH0
I0323 05:21:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:21:43.423405  543705 net.go:698] Add success.
I0323 05:21:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:21:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:21:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:21:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:21:53.409779  543705 memory.go:184] no items to output this cycle
I0323 05:21:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:22:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:22:03.409777  543705 memory.go:184] no items to output this cycle
I0323 05:22:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 05:22:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:22:13.409784  543705 memory.go:191] Add success.
I0323 05:22:13.409800  543705 cpu.go:282] Add success.
W0323 05:22:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:22:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:22:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:22:13.420136  543705 net.go:648] Add success.
I0323 05:22:13.422901  543705 net.go:770] primary dev: ETH0
I0323 05:22:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:22:13.422929  543705 net.go:698] Add success.
W0323 05:22:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:22:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 05:22:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:22:14.455873  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:22:14.455882  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:22:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:22:14.456553  543705 disk_worker.go:494] system disk:vda1
I0323 05:22:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:22:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:22:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:22:16.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:22:16.458016  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:22:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:22:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:22:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:22:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:22:23.409790  543705 cpu.go:275] no items to output this cycle
I0323 05:22:23.409793  543705 memory.go:184] no items to output this cycle
E0323 05:22:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:22:33.409772  543705 memory.go:184] no items to output this cycle
I0323 05:22:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 05:22:38.337676  543705 disk_info.go:125] begin check local disk info of client
I0323 05:22:38.340264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:22:38.340270  543705 disk_info.go:196] parse disk info done, disk is : [0xc000495640 0xc000495680]
E0323 05:22:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:22:43.410648  543705 memory.go:191] Add success.
I0323 05:22:43.409823  543705 cpu.go:282] Add success.
I0323 05:22:43.420344  543705 net.go:648] Add success.
I0323 05:22:43.422928  543705 net.go:770] primary dev: ETH0
I0323 05:22:43.422941  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:22:43.422954  543705 net.go:698] Add success.
I0323 05:22:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:22:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:22:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:22:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:22:53.409795  543705 memory.go:184] no items to output this cycle
I0323 05:22:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 05:23:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:23:03.409787  543705 cpu.go:275] no items to output this cycle
I0323 05:23:03.409793  543705 memory.go:184] no items to output this cycle
E0323 05:23:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:23:13.409808  543705 memory.go:191] Add success.
I0323 05:23:13.409817  543705 cpu.go:282] Add success.
W0323 05:23:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:23:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:23:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:23:13.420063  543705 net.go:648] Add success.
I0323 05:23:13.422646  543705 net.go:770] primary dev: ETH0
I0323 05:23:13.422661  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:23:13.422674  543705 net.go:698] Add success.
I0323 05:23:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:23:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:23:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 05:23:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:23:14.456585  543705 disk_worker.go:494] system disk:vda1
I0323 05:23:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:23:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:23:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:23:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:23:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:23:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:23:23.409779  543705 memory.go:184] no items to output this cycle
I0323 05:23:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 05:23:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:23:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 05:23:33.409787  543705 memory.go:184] no items to output this cycle
I0323 05:23:38.341677  543705 disk_info.go:125] begin check local disk info of client
I0323 05:23:38.344251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:23:38.344257  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491500 0xc000491540]
E0323 05:23:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:23:43.410513  543705 memory.go:191] Add success.
I0323 05:23:43.409812  543705 cpu.go:282] Add success.
I0323 05:23:43.420188  543705 net.go:648] Add success.
I0323 05:23:43.422572  543705 net.go:770] primary dev: ETH0
I0323 05:23:43.422585  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:23:43.422598  543705 net.go:698] Add success.
I0323 05:23:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:23:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:23:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:23:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:23:53.409779  543705 memory.go:184] no items to output this cycle
I0323 05:23:53.409902  543705 cpu.go:275] no items to output this cycle
E0323 05:24:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:24:03.409801  543705 memory.go:184] no items to output this cycle
I0323 05:24:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 05:24:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:24:13.409811  543705 memory.go:191] Add success.
I0323 05:24:13.409819  543705 cpu.go:282] Add success.
W0323 05:24:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:24:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:24:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:24:13.420160  543705 net.go:648] Add success.
I0323 05:24:13.422577  543705 net.go:770] primary dev: ETH0
I0323 05:24:13.422590  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:24:13.422603  543705 net.go:698] Add success.
I0323 05:24:13.529302  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0565f21e-27d6-49de-b867-e4643d47019e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:24:13.529335  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:24:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:24:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:24:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 05:24:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:24:14.456623  543705 disk_worker.go:494] system disk:vda1
I0323 05:24:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:24:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:24:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:24:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:24:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:24:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:24:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:24:23.409797  543705 memory.go:184] no items to output this cycle
I0323 05:24:23.409820  543705 cpu.go:275] no items to output this cycle
E0323 05:24:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:24:33.409774  543705 memory.go:184] no items to output this cycle
I0323 05:24:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 05:24:38.345674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:24:38.348289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:24:38.348296  543705 disk_info.go:196] parse disk info done, disk is : [0xc000387ac0 0xc000387b00]
I0323 05:24:40.125303  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:24:40.125309  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:24:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:24:43.410556  543705 memory.go:191] Add success.
I0323 05:24:43.409812  543705 cpu.go:282] Add success.
I0323 05:24:43.420278  543705 net.go:648] Add success.
I0323 05:24:43.422966  543705 net.go:770] primary dev: ETH0
I0323 05:24:43.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:24:43.422995  543705 net.go:698] Add success.
I0323 05:24:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:24:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:24:46.458055  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:24:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:24:53.409783  543705 memory.go:184] no items to output this cycle
I0323 05:24:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 05:25:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:25:03.409787  543705 memory.go:184] no items to output this cycle
I0323 05:25:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 05:25:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:25:13.409825  543705 memory.go:191] Add success.
I0323 05:25:13.409834  543705 cpu.go:282] Add success.
W0323 05:25:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:25:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:25:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:25:13.420199  543705 net.go:648] Add success.
I0323 05:25:13.422808  543705 net.go:770] primary dev: ETH0
I0323 05:25:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:25:13.422833  543705 net.go:698] Add success.
I0323 05:25:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:25:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:25:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 05:25:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:25:14.456566  543705 disk_worker.go:494] system disk:vda1
I0323 05:25:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:25:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:25:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:25:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:25:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:25:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:25:23.409787  543705 cpu.go:275] no items to output this cycle
I0323 05:25:23.409794  543705 memory.go:184] no items to output this cycle
E0323 05:25:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:25:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 05:25:33.409791  543705 memory.go:184] no items to output this cycle
I0323 05:25:38.349678  543705 disk_info.go:125] begin check local disk info of client
I0323 05:25:38.352262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:25:38.352268  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2500 0xc0003e2540]
E0323 05:25:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:25:43.410583  543705 memory.go:191] Add success.
I0323 05:25:43.409785  543705 cpu.go:282] Add success.
I0323 05:25:43.420286  543705 net.go:648] Add success.
I0323 05:25:43.422944  543705 net.go:770] primary dev: ETH0
I0323 05:25:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:25:43.422968  543705 net.go:698] Add success.
I0323 05:25:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:25:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:25:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:25:53.409806  543705 memory.go:184] no items to output this cycle
I0323 05:25:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 05:26:03.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:26:03.409896  543705 memory.go:184] no items to output this cycle
I0323 05:26:03.409930  543705 cpu.go:275] no items to output this cycle
E0323 05:26:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:26:13.409807  543705 memory.go:191] Add success.
W0323 05:26:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:26:13.409835  543705 cpu.go:282] Add success.
W0323 05:26:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:26:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:26:13.420225  543705 net.go:648] Add success.
I0323 05:26:13.422860  543705 net.go:770] primary dev: ETH0
I0323 05:26:13.422875  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:26:13.422888  543705 net.go:698] Add success.
I0323 05:26:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:26:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:26:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 05:26:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:26:14.456503  543705 disk_worker.go:494] system disk:vda1
I0323 05:26:14.456546  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:26:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:26:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:26:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:26:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:26:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:26:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:26:23.409791  543705 memory.go:184] no items to output this cycle
I0323 05:26:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 05:26:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:26:33.409783  543705 memory.go:184] no items to output this cycle
I0323 05:26:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 05:26:38.353680  543705 disk_info.go:125] begin check local disk info of client
I0323 05:26:38.356250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:26:38.356256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354580 0xc0003545c0]
E0323 05:26:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:26:43.410740  543705 memory.go:191] Add success.
I0323 05:26:43.409825  543705 cpu.go:282] Add success.
I0323 05:26:43.420433  543705 net.go:648] Add success.
I0323 05:26:43.423105  543705 net.go:770] primary dev: ETH0
I0323 05:26:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:26:43.423130  543705 net.go:698] Add success.
I0323 05:26:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:26:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:26:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:26:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:26:53.409787  543705 memory.go:184] no items to output this cycle
I0323 05:26:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 05:27:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:27:03.409801  543705 memory.go:184] no items to output this cycle
I0323 05:27:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 05:27:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:27:13.409821  543705 memory.go:191] Add success.
I0323 05:27:13.409841  543705 cpu.go:282] Add success.
W0323 05:27:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:27:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:27:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:27:13.420178  543705 net.go:648] Add success.
I0323 05:27:13.428720  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 05:27:13.428798  543705 net.go:770] primary dev: ETH0
I0323 05:27:13.428810  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:27:13.428822  543705 net.go:698] Add success.
I0323 05:27:13.453449  543705 event_worker.go:152] Polling the log file for events...
I0323 05:27:14.059177  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f700564a-0280-4216-abfe-b6f3f66378ec","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:27:14.059215  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 05:27:14.454374  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:27:14.454387  543705 disk_worker.go:708] disk space is not compliant
W0323 05:27:14.454391  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:27:14.455123  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:27:14.455132  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:27:14.455138  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:27:14.455957  543705 disk_worker.go:494] system disk:vda1
I0323 05:27:14.455986  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:27:15.456874  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:27:15.456884  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 05:27:16.457197  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:27:16.458253  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:27:16.458305  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:27:16.458322  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:27:16.472676  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:27:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:27:23.409791  543705 memory.go:184] no items to output this cycle
I0323 05:27:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 05:27:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:27:33.409777  543705 memory.go:184] no items to output this cycle
I0323 05:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 05:27:38.357675  543705 disk_info.go:125] begin check local disk info of client
I0323 05:27:38.360247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:27:38.360253  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d040 0xc00035d080]
I0323 05:27:40.125734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:27:40.125739  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:27:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:27:43.410528  543705 memory.go:191] Add success.
I0323 05:27:43.409825  543705 cpu.go:282] Add success.
I0323 05:27:43.420241  543705 net.go:648] Add success.
I0323 05:27:43.422810  543705 net.go:770] primary dev: ETH0
I0323 05:27:43.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:27:43.422838  543705 net.go:698] Add success.
I0323 05:27:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:27:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:27:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:27:53.409784  543705 memory.go:184] no items to output this cycle
I0323 05:27:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:28:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:28:03.409812  543705 memory.go:184] no items to output this cycle
I0323 05:28:03.409824  543705 cpu.go:275] no items to output this cycle
E0323 05:28:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:28:13.409793  543705 memory.go:191] Add success.
I0323 05:28:13.409812  543705 cpu.go:282] Add success.
W0323 05:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:28:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:28:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:28:13.420146  543705 net.go:648] Add success.
I0323 05:28:13.422770  543705 net.go:770] primary dev: ETH0
I0323 05:28:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:28:13.422796  543705 net.go:698] Add success.
I0323 05:28:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:28:14.455098  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:28:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 05:28:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:28:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 05:28:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:28:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:28:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:28:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:28:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:28:23.409799  543705 memory.go:184] no items to output this cycle
I0323 05:28:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 05:28:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:28:33.409789  543705 memory.go:184] no items to output this cycle
I0323 05:28:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 05:28:38.361679  543705 disk_info.go:125] begin check local disk info of client
I0323 05:28:38.364289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:28:38.364295  543705 disk_info.go:196] parse disk info done, disk is : [0xc00048aac0 0xc00048ab00]
E0323 05:28:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:28:43.410599  543705 memory.go:191] Add success.
I0323 05:28:43.409812  543705 cpu.go:282] Add success.
I0323 05:28:43.420298  543705 net.go:648] Add success.
I0323 05:28:43.422871  543705 net.go:770] primary dev: ETH0
I0323 05:28:43.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:28:43.422898  543705 net.go:698] Add success.
I0323 05:28:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:28:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:28:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:28:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:28:53.409792  543705 memory.go:184] no items to output this cycle
I0323 05:28:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 05:29:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:29:03.409808  543705 memory.go:184] no items to output this cycle
I0323 05:29:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 05:29:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:29:13.409774  543705 memory.go:191] Add success.
I0323 05:29:13.409795  543705 cpu.go:282] Add success.
W0323 05:29:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:29:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:29:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:29:13.420683  543705 net.go:648] Add success.
I0323 05:29:13.423378  543705 net.go:770] primary dev: ETH0
I0323 05:29:13.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:29:13.423403  543705 net.go:698] Add success.
I0323 05:29:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:29:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:29:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 05:29:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:29:14.456608  543705 disk_worker.go:494] system disk:vda1
I0323 05:29:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:29:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:29:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:29:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:29:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:29:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:29:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:29:23.409808  543705 memory.go:184] no items to output this cycle
I0323 05:29:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 05:29:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:29:33.409794  543705 memory.go:184] no items to output this cycle
I0323 05:29:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 05:29:38.365674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:29:38.368291  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:29:38.368298  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003221c0 0xc000322200]
E0323 05:29:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:29:43.410580  543705 memory.go:191] Add success.
I0323 05:29:43.409819  543705 cpu.go:282] Add success.
I0323 05:29:43.420349  543705 net.go:648] Add success.
I0323 05:29:43.422903  543705 net.go:770] primary dev: ETH0
I0323 05:29:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:29:43.422929  543705 net.go:698] Add success.
I0323 05:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:29:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:29:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:29:53.410420  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:29:53.410441  543705 memory.go:184] no items to output this cycle
I0323 05:29:53.410453  543705 cpu.go:275] no items to output this cycle
E0323 05:30:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:30:03.409790  543705 memory.go:184] no items to output this cycle
I0323 05:30:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 05:30:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:30:13.409790  543705 cpu.go:282] Add success.
I0323 05:30:13.409802  543705 memory.go:191] Add success.
W0323 05:30:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:30:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:30:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:30:13.420154  543705 net.go:648] Add success.
I0323 05:30:13.422675  543705 net.go:770] primary dev: ETH0
I0323 05:30:13.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:30:13.422698  543705 net.go:698] Add success.
I0323 05:30:13.463646  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d7f5adfa-0cb0-4eba-91ad-3de46d28a4d5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:30:13.463682  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:30:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:30:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:30:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0323 05:30:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:30:14.456500  543705 disk_worker.go:494] system disk:vda1
I0323 05:30:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:30:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:30:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:30:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:30:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:30:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:30:23.409776  543705 memory.go:184] no items to output this cycle
I0323 05:30:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 05:30:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:30:33.409781  543705 memory.go:184] no items to output this cycle
I0323 05:30:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 05:30:38.369673  543705 disk_info.go:125] begin check local disk info of client
I0323 05:30:38.372289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:30:38.372295  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463c80 0xc000463cc0]
I0323 05:30:40.128154  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:30:40.128160  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:30:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:30:43.410569  543705 memory.go:191] Add success.
I0323 05:30:43.409810  543705 cpu.go:282] Add success.
I0323 05:30:43.420263  543705 net.go:648] Add success.
I0323 05:30:43.422894  543705 net.go:770] primary dev: ETH0
I0323 05:30:43.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:30:43.422919  543705 net.go:698] Add success.
I0323 05:30:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:30:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:30:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:30:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:30:53.409780  543705 memory.go:184] no items to output this cycle
I0323 05:30:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 05:31:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:31:03.409814  543705 memory.go:184] no items to output this cycle
I0323 05:31:03.409825  543705 cpu.go:275] no items to output this cycle
E0323 05:31:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:31:13.409779  543705 memory.go:191] Add success.
I0323 05:31:13.409799  543705 cpu.go:282] Add success.
W0323 05:31:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:31:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:31:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:31:13.420083  543705 net.go:648] Add success.
I0323 05:31:13.422744  543705 net.go:770] primary dev: ETH0
I0323 05:31:13.422757  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:31:13.422769  543705 net.go:698] Add success.
I0323 05:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:31:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:31:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 05:31:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:31:14.456522  543705 disk_worker.go:494] system disk:vda1
I0323 05:31:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:31:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:31:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:31:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:31:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:31:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:31:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:31:23.409793  543705 memory.go:184] no items to output this cycle
I0323 05:31:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 05:31:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:31:33.409781  543705 memory.go:184] no items to output this cycle
I0323 05:31:33.409780  543705 cpu.go:275] no items to output this cycle
I0323 05:31:38.373675  543705 disk_info.go:125] begin check local disk info of client
I0323 05:31:38.376230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:31:38.376236  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032ea40 0xc00032ea80]
E0323 05:31:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:31:43.410721  543705 memory.go:191] Add success.
I0323 05:31:43.409798  543705 cpu.go:282] Add success.
I0323 05:31:43.420421  543705 net.go:648] Add success.
I0323 05:31:43.423403  543705 net.go:770] primary dev: ETH0
I0323 05:31:43.423416  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:31:43.423428  543705 net.go:698] Add success.
I0323 05:31:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:31:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:31:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:31:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:31:53.409799  543705 memory.go:184] no items to output this cycle
I0323 05:31:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 05:32:03.409837  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:32:03.409855  543705 memory.go:184] no items to output this cycle
I0323 05:32:03.409966  543705 cpu.go:275] no items to output this cycle
E0323 05:32:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:32:13.409783  543705 memory.go:191] Add success.
I0323 05:32:13.409788  543705 cpu.go:282] Add success.
W0323 05:32:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:32:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:32:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:32:13.420036  543705 net.go:648] Add success.
I0323 05:32:13.422604  543705 net.go:770] primary dev: ETH0
I0323 05:32:13.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:32:13.422629  543705 net.go:698] Add success.
W0323 05:32:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:32:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 05:32:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:32:14.455913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:32:14.455922  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:32:14.455928  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:32:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 05:32:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:32:15.456813  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:32:15.456822  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:32:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:32:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:32:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:32:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:32:16.472326  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:32:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:32:23.409770  543705 memory.go:184] no items to output this cycle
I0323 05:32:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 05:32:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:32:33.409800  543705 memory.go:184] no items to output this cycle
I0323 05:32:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 05:32:38.377673  543705 disk_info.go:125] begin check local disk info of client
I0323 05:32:38.380227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:32:38.380234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002711c0 0xc000271200]
E0323 05:32:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:32:43.410938  543705 memory.go:191] Add success.
I0323 05:32:43.409821  543705 cpu.go:282] Add success.
I0323 05:32:43.420681  543705 net.go:648] Add success.
I0323 05:32:43.423246  543705 net.go:770] primary dev: ETH0
I0323 05:32:43.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:32:43.423273  543705 net.go:698] Add success.
I0323 05:32:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:32:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:32:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:32:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:32:53.409784  543705 cpu.go:275] no items to output this cycle
I0323 05:32:53.409797  543705 memory.go:184] no items to output this cycle
E0323 05:33:03.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:33:03.409899  543705 memory.go:184] no items to output this cycle
I0323 05:33:03.409954  543705 cpu.go:275] no items to output this cycle
E0323 05:33:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:33:13.409790  543705 memory.go:191] Add success.
I0323 05:33:13.409794  543705 cpu.go:282] Add success.
W0323 05:33:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:33:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:33:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:33:13.420074  543705 net.go:648] Add success.
I0323 05:33:13.422511  543705 net.go:770] primary dev: ETH0
I0323 05:33:13.422523  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:33:13.422536  543705 net.go:698] Add success.
I0323 05:33:13.504686  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2f50c3ad-47b8-4b8e-9d61-2b07c2bfaa13","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:33:13.504717  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:33:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:33:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:33:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 05:33:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:33:14.456719  543705 disk_worker.go:494] system disk:vda1
I0323 05:33:14.456746  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:33:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:33:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:33:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:33:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:33:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:33:23.409790  543705 memory.go:184] no items to output this cycle
I0323 05:33:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 05:33:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:33:33.409800  543705 memory.go:184] no items to output this cycle
I0323 05:33:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 05:33:38.381673  543705 disk_info.go:125] begin check local disk info of client
I0323 05:33:38.384232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:33:38.384238  543705 disk_info.go:196] parse disk info done, disk is : [0xc000492100 0xc000492140]
I0323 05:33:40.129725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:33:40.129731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:33:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:33:43.410712  543705 memory.go:191] Add success.
I0323 05:33:43.409817  543705 cpu.go:282] Add success.
I0323 05:33:43.420490  543705 net.go:648] Add success.
I0323 05:33:43.423138  543705 net.go:770] primary dev: ETH0
I0323 05:33:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:33:43.423164  543705 net.go:698] Add success.
I0323 05:33:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:33:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:33:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:33:53.409913  543705 cpu.go:275] no items to output this cycle
E0323 05:33:53.409923  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:33:53.409937  543705 memory.go:184] no items to output this cycle
E0323 05:34:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:34:03.409782  543705 memory.go:184] no items to output this cycle
I0323 05:34:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 05:34:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:34:13.409786  543705 memory.go:191] Add success.
W0323 05:34:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:34:13.409821  543705 cpu.go:282] Add success.
W0323 05:34:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:34:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:34:13.420213  543705 net.go:648] Add success.
I0323 05:34:13.423197  543705 net.go:770] primary dev: ETH0
I0323 05:34:13.423209  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:34:13.423221  543705 net.go:698] Add success.
I0323 05:34:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:34:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:34:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 05:34:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:34:14.456601  543705 disk_worker.go:494] system disk:vda1
I0323 05:34:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:34:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:34:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:34:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:34:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:34:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:34:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:34:23.409812  543705 memory.go:184] no items to output this cycle
I0323 05:34:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 05:34:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:34:33.409772  543705 memory.go:184] no items to output this cycle
I0323 05:34:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 05:34:38.385675  543705 disk_info.go:125] begin check local disk info of client
I0323 05:34:38.388242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:34:38.388248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8b40 0xc0002a8b80]
E0323 05:34:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:34:43.410707  543705 memory.go:191] Add success.
I0323 05:34:43.409823  543705 cpu.go:282] Add success.
I0323 05:34:43.420380  543705 net.go:648] Add success.
I0323 05:34:43.423182  543705 net.go:770] primary dev: ETH0
I0323 05:34:43.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:34:43.423212  543705 net.go:698] Add success.
I0323 05:34:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:34:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:34:53.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:34:53.410256  543705 memory.go:184] no items to output this cycle
I0323 05:34:53.410288  543705 cpu.go:275] no items to output this cycle
E0323 05:35:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:35:03.409790  543705 memory.go:184] no items to output this cycle
I0323 05:35:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 05:35:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:35:13.409806  543705 memory.go:191] Add success.
I0323 05:35:13.409816  543705 cpu.go:282] Add success.
W0323 05:35:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:35:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:35:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:35:13.420167  543705 net.go:648] Add success.
I0323 05:35:13.422967  543705 net.go:770] primary dev: ETH0
I0323 05:35:13.422982  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:35:13.422996  543705 net.go:698] Add success.
I0323 05:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:35:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:35:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 05:35:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:35:14.456495  543705 disk_worker.go:494] system disk:vda1
I0323 05:35:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:35:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:35:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:35:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:35:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:35:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:35:23.409814  543705 memory.go:184] no items to output this cycle
I0323 05:35:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 05:35:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:35:33.409796  543705 memory.go:184] no items to output this cycle
I0323 05:35:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 05:35:38.392007  543705 disk_info.go:125] begin check local disk info of client
I0323 05:35:38.394617  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:35:38.394624  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e280 0xc00039e2c0]
E0323 05:35:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:35:43.410612  543705 memory.go:191] Add success.
I0323 05:35:43.409806  543705 cpu.go:282] Add success.
I0323 05:35:43.420360  543705 net.go:648] Add success.
I0323 05:35:43.423226  543705 net.go:770] primary dev: ETH0
I0323 05:35:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:35:43.423253  543705 net.go:698] Add success.
I0323 05:35:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:35:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:35:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:35:53.409848  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:35:53.409866  543705 memory.go:184] no items to output this cycle
I0323 05:35:53.409958  543705 cpu.go:275] no items to output this cycle
E0323 05:36:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:36:03.409799  543705 memory.go:184] no items to output this cycle
I0323 05:36:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 05:36:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:36:13.409791  543705 memory.go:191] Add success.
I0323 05:36:13.409793  543705 cpu.go:282] Add success.
W0323 05:36:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:36:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:36:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:36:13.420113  543705 net.go:648] Add success.
I0323 05:36:13.422833  543705 net.go:770] primary dev: ETH0
I0323 05:36:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:36:13.422858  543705 net.go:698] Add success.
I0323 05:36:13.468760  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29d959ad-bb76-4504-803d-7f44835cbbfd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:36:13.468793  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:36:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:36:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:36:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 05:36:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:36:14.456839  543705 disk_worker.go:494] system disk:vda1
I0323 05:36:14.456871  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:36:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:36:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:36:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:36:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:36:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:36:23.409780  543705 memory.go:184] no items to output this cycle
I0323 05:36:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 05:36:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:36:33.409769  543705 memory.go:184] no items to output this cycle
I0323 05:36:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 05:36:38.397674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:36:38.400213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:36:38.400219  543705 disk_info.go:196] parse disk info done, disk is : [0xc000485f00 0xc000485f40]
I0323 05:36:40.133339  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:36:40.133345  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:36:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:36:43.410717  543705 memory.go:191] Add success.
I0323 05:36:43.409814  543705 cpu.go:282] Add success.
I0323 05:36:43.420467  543705 net.go:648] Add success.
I0323 05:36:43.423364  543705 net.go:770] primary dev: ETH0
I0323 05:36:43.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:36:43.423545  543705 net.go:698] Add success.
I0323 05:36:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:36:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:36:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:36:53.409768  543705 memory.go:184] no items to output this cycle
I0323 05:36:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:37:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:37:03.409783  543705 memory.go:184] no items to output this cycle
I0323 05:37:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 05:37:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:37:13.409812  543705 memory.go:191] Add success.
I0323 05:37:13.409820  543705 cpu.go:282] Add success.
W0323 05:37:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:37:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:37:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:37:13.419897  543705 net.go:770] primary dev: ETH0
I0323 05:37:13.419910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:37:13.419922  543705 net.go:698] Add success.
I0323 05:37:13.420159  543705 net.go:648] Add success.
I0323 05:37:13.453663  543705 event_worker.go:152] Polling the log file for events...
W0323 05:37:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:37:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 05:37:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:37:14.456952  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:37:14.456961  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:37:14.456968  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:37:14.457024  543705 disk_worker.go:494] system disk:vda1
I0323 05:37:14.457053  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:37:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:37:15.456839  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:37:16.457917  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:37:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:37:16.457971  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:37:16.457990  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:37:16.472300  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:37:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:37:23.409809  543705 memory.go:184] no items to output this cycle
I0323 05:37:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 05:37:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:37:33.409775  543705 memory.go:184] no items to output this cycle
I0323 05:37:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 05:37:38.401674  543705 disk_info.go:125] begin check local disk info of client
I0323 05:37:38.404242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:37:38.404248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d1c40 0xc0003d1c80]
E0323 05:37:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:37:43.410622  543705 memory.go:191] Add success.
I0323 05:37:43.409809  543705 cpu.go:282] Add success.
I0323 05:37:43.419740  543705 net.go:648] Add success.
I0323 05:37:43.422269  543705 net.go:770] primary dev: ETH0
I0323 05:37:43.422284  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:37:43.422298  543705 net.go:698] Add success.
I0323 05:37:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:37:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:37:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:37:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:37:53.409773  543705 memory.go:184] no items to output this cycle
I0323 05:37:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 05:38:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:38:03.409775  543705 memory.go:184] no items to output this cycle
I0323 05:38:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 05:38:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:38:13.409812  543705 memory.go:191] Add success.
I0323 05:38:13.409822  543705 cpu.go:282] Add success.
W0323 05:38:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:38:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:38:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:38:13.420065  543705 net.go:648] Add success.
I0323 05:38:13.422765  543705 net.go:770] primary dev: ETH0
I0323 05:38:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:38:13.422792  543705 net.go:698] Add success.
I0323 05:38:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:38:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:38:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 05:38:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:38:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 05:38:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:38:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:38:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:38:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:38:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:38:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:38:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:38:23.409812  543705 memory.go:184] no items to output this cycle
I0323 05:38:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 05:38:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:38:33.409801  543705 memory.go:184] no items to output this cycle
I0323 05:38:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 05:38:38.405675  543705 disk_info.go:125] begin check local disk info of client
I0323 05:38:38.408276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:38:38.408283  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472700 0xc000472740]
E0323 05:38:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:38:43.410638  543705 memory.go:191] Add success.
I0323 05:38:43.409835  543705 cpu.go:282] Add success.
I0323 05:38:43.420488  543705 net.go:648] Add success.
I0323 05:38:43.423261  543705 net.go:770] primary dev: ETH0
I0323 05:38:43.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:38:43.423287  543705 net.go:698] Add success.
I0323 05:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:38:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:38:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:38:53.409769  543705 memory.go:184] no items to output this cycle
I0323 05:38:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 05:39:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:39:03.409792  543705 memory.go:184] no items to output this cycle
I0323 05:39:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 05:39:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:39:13.409804  543705 memory.go:191] Add success.
I0323 05:39:13.409805  543705 cpu.go:282] Add success.
W0323 05:39:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:39:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:39:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:39:13.420170  543705 net.go:648] Add success.
I0323 05:39:13.422797  543705 net.go:770] primary dev: ETH0
I0323 05:39:13.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:39:13.422821  543705 net.go:698] Add success.
I0323 05:39:13.468427  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c5a018f-db72-4f2b-a847-303269981752","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:39:13.468459  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:39:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:39:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:39:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 05:39:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:39:14.456581  543705 disk_worker.go:494] system disk:vda1
I0323 05:39:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:39:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:39:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:39:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:39:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:39:16.472520  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:39:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:39:23.409814  543705 memory.go:184] no items to output this cycle
I0323 05:39:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 05:39:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:39:33.409778  543705 memory.go:184] no items to output this cycle
I0323 05:39:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 05:39:38.409677  543705 disk_info.go:125] begin check local disk info of client
I0323 05:39:38.412247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:39:38.412252  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8d00 0xc0003e8d40]
I0323 05:39:40.133753  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:39:40.133760  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:39:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:39:43.410652  543705 memory.go:191] Add success.
I0323 05:39:43.409807  543705 cpu.go:282] Add success.
I0323 05:39:43.420655  543705 net.go:648] Add success.
I0323 05:39:43.423389  543705 net.go:770] primary dev: ETH0
I0323 05:39:43.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:39:43.423415  543705 net.go:698] Add success.
I0323 05:39:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:39:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:39:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:39:53.410196  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:39:53.410214  543705 memory.go:184] no items to output this cycle
I0323 05:39:53.410227  543705 cpu.go:275] no items to output this cycle
E0323 05:40:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:40:03.409795  543705 memory.go:184] no items to output this cycle
I0323 05:40:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 05:40:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:40:13.409792  543705 memory.go:191] Add success.
I0323 05:40:13.409813  543705 cpu.go:282] Add success.
W0323 05:40:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:40:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:40:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:40:13.420073  543705 net.go:648] Add success.
I0323 05:40:13.422697  543705 net.go:770] primary dev: ETH0
I0323 05:40:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:40:13.422723  543705 net.go:698] Add success.
I0323 05:40:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:40:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:40:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 05:40:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:40:14.456521  543705 disk_worker.go:494] system disk:vda1
I0323 05:40:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:40:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:40:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:40:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:40:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:40:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:40:23.409813  543705 memory.go:184] no items to output this cycle
I0323 05:40:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 05:40:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:40:33.409770  543705 memory.go:184] no items to output this cycle
I0323 05:40:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 05:40:38.412806  543705 disk_info.go:125] begin check local disk info of client
I0323 05:40:38.415239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:40:38.415247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b6c0 0xc00007b700]
E0323 05:40:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:40:43.410857  543705 memory.go:191] Add success.
I0323 05:40:43.409919  543705 cpu.go:282] Add success.
I0323 05:40:43.419777  543705 net.go:648] Add success.
I0323 05:40:43.422353  543705 net.go:770] primary dev: ETH0
I0323 05:40:43.422368  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:40:43.422382  543705 net.go:698] Add success.
I0323 05:40:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:40:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:40:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:40:53.409770  543705 memory.go:184] no items to output this cycle
I0323 05:40:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 05:41:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:41:03.409774  543705 memory.go:184] no items to output this cycle
I0323 05:41:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 05:41:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:41:13.409792  543705 memory.go:191] Add success.
I0323 05:41:13.409797  543705 cpu.go:282] Add success.
W0323 05:41:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:41:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:41:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:41:13.420052  543705 net.go:648] Add success.
I0323 05:41:13.422784  543705 net.go:770] primary dev: ETH0
I0323 05:41:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:41:13.422810  543705 net.go:698] Add success.
I0323 05:41:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:41:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:41:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 05:41:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:41:14.456578  543705 disk_worker.go:494] system disk:vda1
I0323 05:41:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:41:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:41:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:41:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:41:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:41:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:41:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:41:23.409786  543705 memory.go:184] no items to output this cycle
I0323 05:41:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 05:41:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:41:33.409766  543705 memory.go:184] no items to output this cycle
I0323 05:41:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 05:41:38.415799  543705 disk_info.go:125] begin check local disk info of client
I0323 05:41:38.418312  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:41:38.418321  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6a00 0xc0003b6a40]
E0323 05:41:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:41:43.410627  543705 memory.go:191] Add success.
I0323 05:41:43.409811  543705 cpu.go:282] Add success.
I0323 05:41:43.420620  543705 net.go:648] Add success.
I0323 05:41:43.423208  543705 net.go:770] primary dev: ETH0
I0323 05:41:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:41:43.423233  543705 net.go:698] Add success.
I0323 05:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:41:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:41:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:41:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:41:53.409790  543705 memory.go:184] no items to output this cycle
I0323 05:41:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 05:42:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:42:03.409802  543705 memory.go:184] no items to output this cycle
I0323 05:42:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 05:42:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:42:13.409782  543705 memory.go:191] Add success.
W0323 05:42:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:42:13.409810  543705 cpu.go:282] Add success.
W0323 05:42:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:42:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:42:13.420067  543705 net.go:648] Add success.
I0323 05:42:13.422806  543705 net.go:770] primary dev: ETH0
I0323 05:42:13.422818  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:42:13.422830  543705 net.go:698] Add success.
I0323 05:42:13.463410  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"138a976b-7362-4997-9fcf-989da91d813d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:42:13.463445  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 05:42:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:42:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 05:42:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:42:14.456781  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:42:14.456789  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:42:14.456794  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:42:14.456835  543705 disk_worker.go:494] system disk:vda1
I0323 05:42:14.456862  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:42:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:42:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:42:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:42:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:42:16.458013  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:42:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:42:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:42:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:42:23.409814  543705 memory.go:184] no items to output this cycle
I0323 05:42:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 05:42:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:42:33.409775  543705 memory.go:184] no items to output this cycle
I0323 05:42:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 05:42:38.418798  543705 disk_info.go:125] begin check local disk info of client
I0323 05:42:38.421353  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:42:38.421359  543705 disk_info.go:196] parse disk info done, disk is : [0xc000264140 0xc000264180]
I0323 05:42:40.133913  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:42:40.133919  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:42:43.409912  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:42:43.410840  543705 memory.go:191] Add success.
I0323 05:42:43.409956  543705 cpu.go:282] Add success.
I0323 05:42:43.419736  543705 net.go:648] Add success.
I0323 05:42:43.422211  543705 net.go:770] primary dev: ETH0
I0323 05:42:43.422223  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:42:43.422236  543705 net.go:698] Add success.
I0323 05:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:42:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:42:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:42:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:42:53.409775  543705 memory.go:184] no items to output this cycle
I0323 05:42:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 05:43:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:43:03.409785  543705 memory.go:184] no items to output this cycle
I0323 05:43:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 05:43:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:43:13.409781  543705 memory.go:191] Add success.
W0323 05:43:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:43:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:43:13.409821  543705 cpu.go:282] Add success.
I0323 05:43:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:43:13.420185  543705 net.go:648] Add success.
I0323 05:43:13.423046  543705 net.go:770] primary dev: ETH0
I0323 05:43:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:43:13.423071  543705 net.go:698] Add success.
I0323 05:43:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:43:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:43:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 05:43:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:43:14.456578  543705 disk_worker.go:494] system disk:vda1
I0323 05:43:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:43:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:43:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:43:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:43:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:43:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:43:23.409786  543705 memory.go:184] no items to output this cycle
I0323 05:43:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 05:43:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:43:33.409767  543705 memory.go:184] no items to output this cycle
I0323 05:43:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 05:43:38.421800  543705 disk_info.go:125] begin check local disk info of client
I0323 05:43:38.424311  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:43:38.424318  543705 disk_info.go:196] parse disk info done, disk is : [0xc000462e80 0xc000462ec0]
E0323 05:43:43.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:43:43.410587  543705 memory.go:191] Add success.
I0323 05:43:43.409924  543705 cpu.go:282] Add success.
I0323 05:43:43.419708  543705 net.go:648] Add success.
I0323 05:43:43.422195  543705 net.go:770] primary dev: ETH0
I0323 05:43:43.422207  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:43:43.422219  543705 net.go:698] Add success.
I0323 05:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:43:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:43:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:43:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:43:53.409811  543705 memory.go:184] no items to output this cycle
I0323 05:43:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 05:44:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:44:03.409772  543705 memory.go:184] no items to output this cycle
I0323 05:44:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 05:44:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:44:13.409807  543705 memory.go:191] Add success.
I0323 05:44:13.409818  543705 cpu.go:282] Add success.
W0323 05:44:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:44:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:44:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:44:13.420085  543705 net.go:648] Add success.
I0323 05:44:13.422683  543705 net.go:770] primary dev: ETH0
I0323 05:44:13.422698  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:44:13.422712  543705 net.go:698] Add success.
I0323 05:44:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:44:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:44:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 05:44:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:44:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 05:44:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:44:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:44:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:44:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:44:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:44:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:44:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:44:23.409782  543705 memory.go:184] no items to output this cycle
I0323 05:44:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 05:44:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:44:33.409798  543705 memory.go:184] no items to output this cycle
I0323 05:44:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 05:44:38.424818  543705 disk_info.go:125] begin check local disk info of client
I0323 05:44:38.427422  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:44:38.427430  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004993c0 0xc000499400]
E0323 05:44:43.409828  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:44:43.410778  543705 memory.go:191] Add success.
I0323 05:44:43.409949  543705 cpu.go:282] Add success.
I0323 05:44:43.419740  543705 net.go:648] Add success.
I0323 05:44:43.422396  543705 net.go:770] primary dev: ETH0
I0323 05:44:43.422410  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:44:43.422424  543705 net.go:698] Add success.
I0323 05:44:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:44:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:44:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:44:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:44:53.409790  543705 memory.go:184] no items to output this cycle
I0323 05:44:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:45:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:45:03.409818  543705 memory.go:184] no items to output this cycle
I0323 05:45:03.409825  543705 cpu.go:275] no items to output this cycle
E0323 05:45:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:45:13.409814  543705 memory.go:191] Add success.
I0323 05:45:13.409820  543705 cpu.go:282] Add success.
W0323 05:45:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:45:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:45:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:45:13.420112  543705 net.go:648] Add success.
I0323 05:45:13.422960  543705 net.go:770] primary dev: ETH0
I0323 05:45:13.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:45:13.422986  543705 net.go:698] Add success.
I0323 05:45:14.012700  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c120f65f-4b87-4794-8967-d0febeddd0a1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:45:14.012736  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:45:14.454686  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:45:14.454870  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:45:14.454939  543705 disk_worker.go:708] disk space is not compliant
W0323 05:45:14.454942  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:45:14.456382  543705 disk_worker.go:494] system disk:vda1
I0323 05:45:14.456414  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:45:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:45:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:45:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:45:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:45:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:45:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:45:23.409797  543705 memory.go:184] no items to output this cycle
I0323 05:45:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 05:45:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:45:33.409786  543705 memory.go:184] no items to output this cycle
I0323 05:45:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 05:45:38.427851  543705 disk_info.go:125] begin check local disk info of client
I0323 05:45:38.430382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:45:38.430388  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049dac0 0xc00049db00]
I0323 05:45:40.134051  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:45:40.134057  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:45:43.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:45:43.410524  543705 memory.go:191] Add success.
I0323 05:45:43.409966  543705 cpu.go:282] Add success.
I0323 05:45:43.419717  543705 net.go:648] Add success.
I0323 05:45:43.422280  543705 net.go:770] primary dev: ETH0
I0323 05:45:43.422295  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:45:43.422309  543705 net.go:698] Add success.
I0323 05:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:45:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:45:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:45:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:45:53.409766  543705 memory.go:184] no items to output this cycle
I0323 05:45:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 05:46:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:46:03.409784  543705 memory.go:184] no items to output this cycle
I0323 05:46:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 05:46:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:46:13.409781  543705 memory.go:191] Add success.
I0323 05:46:13.409813  543705 cpu.go:282] Add success.
W0323 05:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:46:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:46:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:46:13.420095  543705 net.go:648] Add success.
I0323 05:46:13.422919  543705 net.go:770] primary dev: ETH0
I0323 05:46:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:46:13.422945  543705 net.go:698] Add success.
I0323 05:46:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:46:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:46:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 05:46:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:46:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 05:46:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:46:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:46:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:46:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:46:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:46:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:46:23.409816  543705 memory.go:184] no items to output this cycle
I0323 05:46:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 05:46:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:46:33.409780  543705 memory.go:184] no items to output this cycle
I0323 05:46:33.409832  543705 cpu.go:275] no items to output this cycle
I0323 05:46:38.430849  543705 disk_info.go:125] begin check local disk info of client
I0323 05:46:38.433345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:46:38.433351  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005088c0 0xc000508900]
E0323 05:46:43.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:46:43.410868  543705 memory.go:191] Add success.
I0323 05:46:43.410008  543705 cpu.go:282] Add success.
I0323 05:46:43.419733  543705 net.go:648] Add success.
I0323 05:46:43.422633  543705 net.go:770] primary dev: ETH0
I0323 05:46:43.422652  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:46:43.422667  543705 net.go:698] Add success.
I0323 05:46:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:46:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:46:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:46:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:46:53.409776  543705 memory.go:184] no items to output this cycle
I0323 05:46:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 05:47:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:47:03.409779  543705 memory.go:184] no items to output this cycle
I0323 05:47:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 05:47:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:47:13.409776  543705 memory.go:191] Add success.
W0323 05:47:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:47:13.409804  543705 cpu.go:282] Add success.
W0323 05:47:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:47:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:47:13.420152  543705 net.go:648] Add success.
I0323 05:47:13.422981  543705 net.go:770] primary dev: ETH0
I0323 05:47:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:47:13.423006  543705 net.go:698] Add success.
I0323 05:47:13.453550  543705 event_worker.go:152] Polling the log file for events...
W0323 05:47:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:47:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 05:47:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:47:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:47:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:47:14.456943  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:47:14.457016  543705 disk_worker.go:494] system disk:vda1
I0323 05:47:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:47:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:47:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:47:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:47:16.457974  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:47:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:47:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:47:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:47:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:47:23.409790  543705 memory.go:184] no items to output this cycle
I0323 05:47:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 05:47:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:47:33.409772  543705 memory.go:184] no items to output this cycle
I0323 05:47:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 05:47:38.433855  543705 disk_info.go:125] begin check local disk info of client
I0323 05:47:38.436408  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:47:38.436413  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003077c0 0xc000307800]
E0323 05:47:43.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:47:43.410586  543705 memory.go:191] Add success.
I0323 05:47:43.409952  543705 cpu.go:282] Add success.
I0323 05:47:43.419708  543705 net.go:648] Add success.
I0323 05:47:43.422251  543705 net.go:770] primary dev: ETH0
I0323 05:47:43.422264  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:47:43.422276  543705 net.go:698] Add success.
I0323 05:47:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:47:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:47:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:47:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:47:53.409777  543705 memory.go:184] no items to output this cycle
I0323 05:47:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 05:48:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:48:03.409763  543705 memory.go:184] no items to output this cycle
I0323 05:48:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 05:48:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:48:13.409807  543705 memory.go:191] Add success.
I0323 05:48:13.409811  543705 cpu.go:282] Add success.
W0323 05:48:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:48:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:48:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:48:13.420102  543705 net.go:648] Add success.
I0323 05:48:13.422741  543705 net.go:770] primary dev: ETH0
I0323 05:48:13.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:48:13.422767  543705 net.go:698] Add success.
I0323 05:48:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:48:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:48:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 05:48:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:48:14.456520  543705 disk_worker.go:494] system disk:vda1
I0323 05:48:14.456567  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:48:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:48:15.604920  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4fe4316d-9605-4cc7-a907-d068dfceeaaf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:48:15.604964  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:48:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:48:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:48:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:48:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:48:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:48:23.409820  543705 memory.go:184] no items to output this cycle
I0323 05:48:23.409831  543705 cpu.go:275] no items to output this cycle
E0323 05:48:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:48:33.409801  543705 memory.go:184] no items to output this cycle
I0323 05:48:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 05:48:38.436878  543705 disk_info.go:125] begin check local disk info of client
I0323 05:48:38.439485  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:48:38.439492  543705 disk_info.go:196] parse disk info done, disk is : [0xc000282800 0xc000282840]
I0323 05:48:40.134185  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:48:40.134192  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:48:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:48:43.410843  543705 memory.go:191] Add success.
I0323 05:48:43.409995  543705 cpu.go:282] Add success.
I0323 05:48:43.419733  543705 net.go:648] Add success.
I0323 05:48:43.422331  543705 net.go:770] primary dev: ETH0
I0323 05:48:43.422346  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:48:43.422360  543705 net.go:698] Add success.
I0323 05:48:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:48:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:48:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:48:53.409795  543705 memory.go:184] no items to output this cycle
I0323 05:48:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 05:49:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:49:03.409783  543705 memory.go:184] no items to output this cycle
I0323 05:49:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 05:49:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:49:13.409788  543705 memory.go:191] Add success.
I0323 05:49:13.409793  543705 cpu.go:282] Add success.
W0323 05:49:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:49:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:49:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:49:13.420076  543705 net.go:648] Add success.
I0323 05:49:13.423132  543705 net.go:770] primary dev: ETH0
I0323 05:49:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:49:13.423156  543705 net.go:698] Add success.
I0323 05:49:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:49:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:49:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0323 05:49:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:49:14.456612  543705 disk_worker.go:494] system disk:vda1
I0323 05:49:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:49:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:49:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:49:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:49:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:49:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:49:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:49:23.409789  543705 memory.go:184] no items to output this cycle
I0323 05:49:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 05:49:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:49:33.409771  543705 memory.go:184] no items to output this cycle
I0323 05:49:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 05:49:38.439894  543705 disk_info.go:125] begin check local disk info of client
I0323 05:49:38.442454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:49:38.442461  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ad8c0 0xc0003ad900]
E0323 05:49:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:49:43.410821  543705 memory.go:191] Add success.
I0323 05:49:43.409815  543705 cpu.go:282] Add success.
I0323 05:49:43.420717  543705 net.go:648] Add success.
I0323 05:49:43.423505  543705 net.go:770] primary dev: ETH0
I0323 05:49:43.423518  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:49:43.423530  543705 net.go:698] Add success.
I0323 05:49:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:49:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:49:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:49:53.410371  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:49:53.410393  543705 memory.go:184] no items to output this cycle
I0323 05:49:53.410424  543705 cpu.go:275] no items to output this cycle
E0323 05:50:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:50:03.409800  543705 memory.go:184] no items to output this cycle
I0323 05:50:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 05:50:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:50:13.409783  543705 memory.go:191] Add success.
I0323 05:50:13.409805  543705 cpu.go:282] Add success.
W0323 05:50:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:50:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:50:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:50:13.420204  543705 net.go:648] Add success.
I0323 05:50:13.422990  543705 net.go:770] primary dev: ETH0
I0323 05:50:13.423002  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:50:13.423014  543705 net.go:698] Add success.
I0323 05:50:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:50:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:50:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 05:50:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:50:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 05:50:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:50:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:50:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:50:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:50:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:50:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:50:23.409782  543705 memory.go:184] no items to output this cycle
I0323 05:50:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 05:50:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:50:33.409776  543705 memory.go:184] no items to output this cycle
I0323 05:50:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 05:50:38.442918  543705 disk_info.go:125] begin check local disk info of client
I0323 05:50:38.445480  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:50:38.445486  543705 disk_info.go:196] parse disk info done, disk is : [0xc000306ec0 0xc000306f00]
E0323 05:50:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:50:43.410664  543705 memory.go:191] Add success.
I0323 05:50:43.409803  543705 cpu.go:282] Add success.
I0323 05:50:43.420360  543705 net.go:648] Add success.
I0323 05:50:43.422891  543705 net.go:770] primary dev: ETH0
I0323 05:50:43.422906  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:50:43.422928  543705 net.go:698] Add success.
I0323 05:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:50:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:50:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:50:53.409769  543705 memory.go:184] no items to output this cycle
I0323 05:50:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 05:51:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:51:03.409779  543705 memory.go:184] no items to output this cycle
I0323 05:51:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 05:51:13.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:51:13.409774  543705 memory.go:191] Add success.
W0323 05:51:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:51:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:51:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:51:13.409815  543705 cpu.go:282] Add success.
I0323 05:51:13.420170  543705 net.go:648] Add success.
I0323 05:51:13.422801  543705 net.go:770] primary dev: ETH0
I0323 05:51:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:51:13.422826  543705 net.go:698] Add success.
I0323 05:51:13.467989  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9c75cb80-ff79-4142-b06c-2067b881ef92","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:51:13.468023  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:51:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:51:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:51:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 05:51:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:51:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 05:51:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:51:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:51:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:51:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:51:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:51:23.409812  543705 memory.go:184] no items to output this cycle
I0323 05:51:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 05:51:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:51:33.409778  543705 cpu.go:275] no items to output this cycle
I0323 05:51:33.409786  543705 memory.go:184] no items to output this cycle
I0323 05:51:38.445921  543705 disk_info.go:125] begin check local disk info of client
I0323 05:51:38.448544  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:51:38.448550  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025de80 0xc00025dec0]
I0323 05:51:40.134323  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:51:40.134328  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:51:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:51:43.410713  543705 memory.go:191] Add success.
I0323 05:51:43.409790  543705 cpu.go:282] Add success.
I0323 05:51:43.420483  543705 net.go:648] Add success.
I0323 05:51:43.423353  543705 net.go:770] primary dev: ETH0
I0323 05:51:43.423368  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:51:43.423382  543705 net.go:698] Add success.
I0323 05:51:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:51:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:51:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:51:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:51:53.409802  543705 memory.go:184] no items to output this cycle
I0323 05:51:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 05:52:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:52:03.409790  543705 cpu.go:275] no items to output this cycle
I0323 05:52:03.409795  543705 memory.go:184] no items to output this cycle
E0323 05:52:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:52:13.409806  543705 memory.go:191] Add success.
I0323 05:52:13.409813  543705 cpu.go:282] Add success.
W0323 05:52:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:52:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:52:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:52:13.420155  543705 net.go:648] Add success.
I0323 05:52:13.422724  543705 net.go:770] primary dev: ETH0
I0323 05:52:13.422737  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:52:13.422749  543705 net.go:698] Add success.
W0323 05:52:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:52:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 05:52:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:52:14.455907  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:52:14.455916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:52:14.455922  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:52:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 05:52:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:52:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:52:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:52:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:52:16.457929  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:52:16.457969  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:52:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:52:16.472330  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:52:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:52:23.409779  543705 memory.go:184] no items to output this cycle
I0323 05:52:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 05:52:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:52:33.409777  543705 memory.go:184] no items to output this cycle
I0323 05:52:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 05:52:38.448930  543705 disk_info.go:125] begin check local disk info of client
I0323 05:52:38.451529  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:52:38.451535  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b7c0 0xc00007b800]
E0323 05:52:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:52:43.410649  543705 memory.go:191] Add success.
I0323 05:52:43.409829  543705 cpu.go:282] Add success.
I0323 05:52:43.420357  543705 net.go:648] Add success.
I0323 05:52:43.423145  543705 net.go:770] primary dev: ETH0
I0323 05:52:43.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:52:43.423171  543705 net.go:698] Add success.
I0323 05:52:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:52:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:52:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:52:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:52:53.409768  543705 memory.go:184] no items to output this cycle
I0323 05:52:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 05:53:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:53:03.409793  543705 memory.go:184] no items to output this cycle
I0323 05:53:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 05:53:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:53:13.409791  543705 memory.go:191] Add success.
I0323 05:53:13.409792  543705 cpu.go:282] Add success.
W0323 05:53:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:53:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:53:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:53:13.420179  543705 net.go:648] Add success.
I0323 05:53:13.422870  543705 net.go:770] primary dev: ETH0
I0323 05:53:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:53:13.422896  543705 net.go:698] Add success.
I0323 05:53:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:53:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:53:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 05:53:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:53:14.456609  543705 disk_worker.go:494] system disk:vda1
I0323 05:53:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:53:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:53:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:53:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:53:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:53:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:53:23.409787  543705 memory.go:184] no items to output this cycle
I0323 05:53:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 05:53:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:53:33.409776  543705 memory.go:184] no items to output this cycle
I0323 05:53:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 05:53:38.451953  543705 disk_info.go:125] begin check local disk info of client
I0323 05:53:38.454568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:53:38.454574  543705 disk_info.go:196] parse disk info done, disk is : [0xc000375940 0xc000375980]
E0323 05:53:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:53:43.410651  543705 memory.go:191] Add success.
I0323 05:53:43.409815  543705 cpu.go:282] Add success.
I0323 05:53:43.420354  543705 net.go:648] Add success.
I0323 05:53:43.422930  543705 net.go:770] primary dev: ETH0
I0323 05:53:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:53:43.422961  543705 net.go:698] Add success.
I0323 05:53:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:53:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:53:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:53:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:53:53.409767  543705 memory.go:184] no items to output this cycle
I0323 05:53:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 05:54:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:54:03.409770  543705 memory.go:184] no items to output this cycle
I0323 05:54:03.409893  543705 cpu.go:275] no items to output this cycle
E0323 05:54:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:54:13.409785  543705 memory.go:191] Add success.
I0323 05:54:13.409807  543705 cpu.go:282] Add success.
W0323 05:54:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:54:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:54:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:54:13.420445  543705 net.go:648] Add success.
I0323 05:54:13.423207  543705 net.go:770] primary dev: ETH0
I0323 05:54:13.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:54:13.423248  543705 net.go:698] Add success.
I0323 05:54:13.462403  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"10f4a034-1e96-4915-8be5-3b7a5d57fd7c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:54:13.462435  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 05:54:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:54:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:54:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 05:54:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:54:14.456511  543705 disk_worker.go:494] system disk:vda1
I0323 05:54:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:54:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:54:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:54:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:54:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:54:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:54:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:54:23.409787  543705 memory.go:184] no items to output this cycle
I0323 05:54:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 05:54:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:54:33.409814  543705 memory.go:184] no items to output this cycle
I0323 05:54:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 05:54:38.454958  543705 disk_info.go:125] begin check local disk info of client
I0323 05:54:38.457626  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:54:38.457632  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472680 0xc0004726c0]
I0323 05:54:40.134459  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:54:40.134465  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:54:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:54:43.410720  543705 memory.go:191] Add success.
I0323 05:54:43.409812  543705 cpu.go:282] Add success.
I0323 05:54:43.420427  543705 net.go:648] Add success.
I0323 05:54:43.423304  543705 net.go:770] primary dev: ETH0
I0323 05:54:43.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:54:43.423333  543705 net.go:698] Add success.
I0323 05:54:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:54:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:54:53.409794  543705 memory.go:184] no items to output this cycle
I0323 05:54:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 05:55:03.409892  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:55:03.409914  543705 cpu.go:275] no items to output this cycle
I0323 05:55:03.409961  543705 memory.go:184] no items to output this cycle
E0323 05:55:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:55:13.409781  543705 memory.go:191] Add success.
W0323 05:55:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:55:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:55:13.409816  543705 cpu.go:282] Add success.
I0323 05:55:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:55:13.420241  543705 net.go:648] Add success.
I0323 05:55:13.422789  543705 net.go:770] primary dev: ETH0
I0323 05:55:13.422802  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:55:13.422814  543705 net.go:698] Add success.
I0323 05:55:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:55:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:55:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 05:55:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:55:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 05:55:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:55:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:55:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:55:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:55:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:55:16.472368  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:55:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:55:23.409782  543705 memory.go:184] no items to output this cycle
I0323 05:55:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 05:55:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:55:33.409785  543705 memory.go:184] no items to output this cycle
I0323 05:55:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 05:55:38.457979  543705 disk_info.go:125] begin check local disk info of client
I0323 05:55:38.460545  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:55:38.460551  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5440 0xc0000c5480]
E0323 05:55:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:55:43.410625  543705 memory.go:191] Add success.
I0323 05:55:43.409796  543705 cpu.go:282] Add success.
I0323 05:55:43.420308  543705 net.go:648] Add success.
I0323 05:55:43.423022  543705 net.go:770] primary dev: ETH0
I0323 05:55:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:55:43.423047  543705 net.go:698] Add success.
I0323 05:55:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:55:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:55:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:55:53.409809  543705 memory.go:184] no items to output this cycle
I0323 05:55:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 05:56:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:56:03.409904  543705 memory.go:184] no items to output this cycle
I0323 05:56:03.409911  543705 cpu.go:275] no items to output this cycle
E0323 05:56:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:56:13.409784  543705 memory.go:191] Add success.
W0323 05:56:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 05:56:13.409815  543705 cpu.go:282] Add success.
W0323 05:56:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:56:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:56:13.420340  543705 net.go:648] Add success.
I0323 05:56:13.422768  543705 net.go:770] primary dev: ETH0
I0323 05:56:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:56:13.422793  543705 net.go:698] Add success.
I0323 05:56:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:56:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:56:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 05:56:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:56:14.456578  543705 disk_worker.go:494] system disk:vda1
I0323 05:56:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:56:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:56:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:56:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:56:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:56:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:56:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:56:23.409813  543705 memory.go:184] no items to output this cycle
I0323 05:56:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 05:56:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:56:33.409764  543705 memory.go:184] no items to output this cycle
I0323 05:56:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 05:56:38.460989  543705 disk_info.go:125] begin check local disk info of client
I0323 05:56:38.463556  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:56:38.463563  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266000 0xc000266040]
E0323 05:56:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:56:43.410627  543705 memory.go:191] Add success.
I0323 05:56:43.409796  543705 cpu.go:282] Add success.
I0323 05:56:43.420341  543705 net.go:648] Add success.
I0323 05:56:43.422880  543705 net.go:770] primary dev: ETH0
I0323 05:56:43.422893  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:56:43.422905  543705 net.go:698] Add success.
I0323 05:56:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:56:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:56:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:56:53.410355  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:56:53.410368  543705 cpu.go:275] no items to output this cycle
I0323 05:56:53.410372  543705 memory.go:184] no items to output this cycle
E0323 05:57:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:57:03.409881  543705 memory.go:184] no items to output this cycle
I0323 05:57:03.409946  543705 cpu.go:275] no items to output this cycle
E0323 05:57:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:57:13.409787  543705 memory.go:191] Add success.
I0323 05:57:13.409798  543705 cpu.go:282] Add success.
W0323 05:57:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:57:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:57:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:57:13.420235  543705 net.go:648] Add success.
I0323 05:57:13.428644  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 05:57:13.428719  543705 net.go:770] primary dev: ETH0
I0323 05:57:13.428733  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:57:13.428744  543705 net.go:698] Add success.
I0323 05:57:13.453244  543705 event_worker.go:152] Polling the log file for events...
I0323 05:57:13.463309  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"34428c3c-f056-4fcf-bbf0-6bf36defa247","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 05:57:13.463342  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 05:57:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:57:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0323 05:57:14.455242  543705 disk_worker.go:728] disk inode is not compliant
E0323 05:57:14.455872  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 05:57:14.455881  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 05:57:14.455887  543705 custom_config.go:64] query custom config with name: gpu
I0323 05:57:14.456817  543705 disk_worker.go:494] system disk:vda1
I0323 05:57:14.456859  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 05:57:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 05:57:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:57:16.457909  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 05:57:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 05:57:16.457962  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:57:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:57:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:57:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:57:23.409781  543705 memory.go:184] no items to output this cycle
I0323 05:57:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 05:57:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:57:33.409780  543705 memory.go:184] no items to output this cycle
I0323 05:57:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 05:57:38.464012  543705 disk_info.go:125] begin check local disk info of client
I0323 05:57:38.466650  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:57:38.466656  543705 disk_info.go:196] parse disk info done, disk is : [0xc000263c00 0xc000263c40]
I0323 05:57:40.134609  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 05:57:40.134615  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 05:57:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:57:43.410755  543705 memory.go:191] Add success.
I0323 05:57:43.409815  543705 cpu.go:282] Add success.
I0323 05:57:43.420298  543705 net.go:770] primary dev: ETH0
I0323 05:57:43.420313  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:57:43.420328  543705 net.go:698] Add success.
I0323 05:57:43.420684  543705 net.go:648] Add success.
I0323 05:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:57:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:57:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:57:53.409770  543705 memory.go:184] no items to output this cycle
I0323 05:57:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 05:58:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:58:03.409784  543705 memory.go:184] no items to output this cycle
I0323 05:58:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 05:58:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:58:13.409811  543705 memory.go:191] Add success.
I0323 05:58:13.409817  543705 cpu.go:282] Add success.
W0323 05:58:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:58:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:58:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:58:13.420119  543705 net.go:648] Add success.
I0323 05:58:13.422957  543705 net.go:770] primary dev: ETH0
I0323 05:58:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:58:13.422983  543705 net.go:698] Add success.
I0323 05:58:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:58:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:58:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 05:58:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:58:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 05:58:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:58:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:58:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:58:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:58:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:58:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:58:23.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:58:23.409831  543705 memory.go:184] no items to output this cycle
I0323 05:58:23.409842  543705 cpu.go:275] no items to output this cycle
E0323 05:58:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:58:33.409768  543705 memory.go:184] no items to output this cycle
I0323 05:58:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 05:58:38.467026  543705 disk_info.go:125] begin check local disk info of client
I0323 05:58:38.469591  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:58:38.469597  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472780 0xc0004727c0]
E0323 05:58:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:58:43.410804  543705 memory.go:191] Add success.
I0323 05:58:43.409811  543705 cpu.go:282] Add success.
I0323 05:58:43.420523  543705 net.go:648] Add success.
I0323 05:58:43.423419  543705 net.go:770] primary dev: ETH0
I0323 05:58:43.423434  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:58:43.423453  543705 net.go:698] Add success.
I0323 05:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:58:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:58:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:58:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:58:53.409779  543705 memory.go:184] no items to output this cycle
I0323 05:58:53.409781  543705 cpu.go:275] no items to output this cycle
E0323 05:59:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:59:03.409799  543705 memory.go:184] no items to output this cycle
I0323 05:59:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 05:59:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:59:13.409804  543705 memory.go:191] Add success.
I0323 05:59:13.409808  543705 cpu.go:282] Add success.
W0323 05:59:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 05:59:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 05:59:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 05:59:13.420191  543705 net.go:648] Add success.
I0323 05:59:13.422924  543705 net.go:770] primary dev: ETH0
I0323 05:59:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:59:13.422950  543705 net.go:698] Add success.
I0323 05:59:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 05:59:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 05:59:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 05:59:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0323 05:59:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 05:59:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 05:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 05:59:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:59:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:59:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0323 05:59:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 05:59:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:59:23.409802  543705 memory.go:184] no items to output this cycle
I0323 05:59:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 05:59:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:59:33.409776  543705 memory.go:184] no items to output this cycle
I0323 05:59:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 05:59:38.470041  543705 disk_info.go:125] begin check local disk info of client
I0323 05:59:38.472586  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 05:59:38.472592  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a1c0 0xc00027a200]
E0323 05:59:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:59:43.410679  543705 memory.go:191] Add success.
I0323 05:59:43.409808  543705 cpu.go:282] Add success.
I0323 05:59:43.420353  543705 net.go:648] Add success.
I0323 05:59:43.422882  543705 net.go:770] primary dev: ETH0
I0323 05:59:43.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0323 05:59:43.422916  543705 net.go:698] Add success.
I0323 05:59:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 05:59:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 05:59:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 05:59:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 05:59:53.409780  543705 memory.go:184] no items to output this cycle
I0323 05:59:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 06:00:03.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:00:03.409904  543705 memory.go:184] no items to output this cycle
I0323 06:00:03.409907  543705 cpu.go:275] no items to output this cycle
E0323 06:00:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:00:13.409799  543705 memory.go:191] Add success.
I0323 06:00:13.409799  543705 cpu.go:282] Add success.
W0323 06:00:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:00:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:00:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:00:13.420140  543705 net.go:648] Add success.
I0323 06:00:13.422875  543705 net.go:770] primary dev: ETH0
I0323 06:00:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:00:13.422900  543705 net.go:698] Add success.
I0323 06:00:13.469240  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f2dd9ba5-4c8c-47bc-aa84-e1485305e958","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:00:13.469273  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:00:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:00:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:00:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0323 06:00:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:00:14.456537  543705 disk_worker.go:494] system disk:vda1
I0323 06:00:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:00:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:00:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:00:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:00:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:00:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:00:23.409787  543705 memory.go:184] no items to output this cycle
I0323 06:00:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 06:00:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:00:33.409765  543705 memory.go:184] no items to output this cycle
I0323 06:00:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 06:00:38.473064  543705 disk_info.go:125] begin check local disk info of client
I0323 06:00:38.475639  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:00:38.475646  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a100 0xc00027a140]
I0323 06:00:40.134744  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:00:40.134750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:00:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:00:43.410790  543705 memory.go:191] Add success.
I0323 06:00:43.409816  543705 cpu.go:282] Add success.
I0323 06:00:43.420490  543705 net.go:648] Add success.
I0323 06:00:43.423369  543705 net.go:770] primary dev: ETH0
I0323 06:00:43.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:00:43.423416  543705 net.go:698] Add success.
I0323 06:00:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:00:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:00:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:00:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:00:53.409775  543705 memory.go:184] no items to output this cycle
I0323 06:00:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 06:01:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:01:03.409772  543705 memory.go:184] no items to output this cycle
I0323 06:01:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 06:01:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:01:13.409801  543705 memory.go:191] Add success.
I0323 06:01:13.409802  543705 cpu.go:282] Add success.
W0323 06:01:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:01:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:01:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:01:13.420170  543705 net.go:648] Add success.
I0323 06:01:13.423248  543705 net.go:770] primary dev: ETH0
I0323 06:01:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:01:13.423277  543705 net.go:698] Add success.
I0323 06:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:01:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:01:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 06:01:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:01:14.456587  543705 disk_worker.go:494] system disk:vda1
I0323 06:01:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:01:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:01:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:01:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:01:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:01:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:01:23.409815  543705 memory.go:184] no items to output this cycle
I0323 06:01:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 06:01:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:01:33.409769  543705 memory.go:184] no items to output this cycle
I0323 06:01:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 06:01:38.476073  543705 disk_info.go:125] begin check local disk info of client
I0323 06:01:38.478926  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:01:38.478932  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047ad80 0xc00047adc0]
E0323 06:01:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:01:43.410639  543705 memory.go:191] Add success.
I0323 06:01:43.409795  543705 cpu.go:282] Add success.
I0323 06:01:43.420331  543705 net.go:648] Add success.
I0323 06:01:43.423337  543705 net.go:770] primary dev: ETH0
I0323 06:01:43.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:01:43.423365  543705 net.go:698] Add success.
I0323 06:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:01:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:01:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:01:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:01:53.409778  543705 memory.go:184] no items to output this cycle
I0323 06:01:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 06:02:03.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:02:03.409934  543705 cpu.go:275] no items to output this cycle
I0323 06:02:03.409934  543705 memory.go:184] no items to output this cycle
E0323 06:02:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:02:13.409789  543705 memory.go:191] Add success.
I0323 06:02:13.409805  543705 cpu.go:282] Add success.
W0323 06:02:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:02:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:02:13.420201  543705 net.go:648] Add success.
I0323 06:02:13.422704  543705 net.go:770] primary dev: ETH0
I0323 06:02:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:02:13.422729  543705 net.go:698] Add success.
W0323 06:02:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:02:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 06:02:14.455165  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:02:14.455858  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:02:14.455866  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:02:14.455871  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:02:14.456520  543705 disk_worker.go:494] system disk:vda1
I0323 06:02:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:02:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:02:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:02:16.457891  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:02:16.457890  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:02:16.457945  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:02:16.457964  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:02:16.472286  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:02:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:02:23.409792  543705 memory.go:184] no items to output this cycle
I0323 06:02:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:02:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:02:33.409771  543705 memory.go:184] no items to output this cycle
I0323 06:02:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 06:02:38.479040  543705 disk_info.go:125] begin check local disk info of client
I0323 06:02:38.481680  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:02:38.481686  543705 disk_info.go:196] parse disk info done, disk is : [0xc000291700 0xc000291740]
E0323 06:02:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:02:43.410587  543705 memory.go:191] Add success.
I0323 06:02:43.409794  543705 cpu.go:282] Add success.
I0323 06:02:43.420346  543705 net.go:648] Add success.
I0323 06:02:43.422960  543705 net.go:770] primary dev: ETH0
I0323 06:02:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:02:43.422991  543705 net.go:698] Add success.
I0323 06:02:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:02:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:02:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:02:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:02:53.409803  543705 memory.go:184] no items to output this cycle
I0323 06:02:53.409818  543705 cpu.go:275] no items to output this cycle
E0323 06:03:03.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:03:03.409883  543705 memory.go:184] no items to output this cycle
I0323 06:03:03.409980  543705 cpu.go:275] no items to output this cycle
E0323 06:03:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:03:13.409807  543705 memory.go:191] Add success.
I0323 06:03:13.409811  543705 cpu.go:282] Add success.
W0323 06:03:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:03:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:03:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:03:13.420258  543705 net.go:648] Add success.
I0323 06:03:13.423291  543705 net.go:770] primary dev: ETH0
I0323 06:03:13.423304  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:03:13.423316  543705 net.go:698] Add success.
I0323 06:03:13.462990  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"567cd5c9-1d7b-4049-915e-1c0b60dac5cb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:03:13.463022  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:03:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:03:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:03:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 06:03:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:03:14.456619  543705 disk_worker.go:494] system disk:vda1
I0323 06:03:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:03:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:03:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:03:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:03:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:03:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:03:23.409790  543705 memory.go:184] no items to output this cycle
I0323 06:03:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 06:03:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:03:33.409789  543705 memory.go:184] no items to output this cycle
I0323 06:03:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 06:03:38.482051  543705 disk_info.go:125] begin check local disk info of client
I0323 06:03:38.484669  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:03:38.484675  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036b400 0xc00036b440]
I0323 06:03:40.137732  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:03:40.137738  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:03:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:03:43.410829  543705 memory.go:191] Add success.
I0323 06:03:43.409819  543705 cpu.go:282] Add success.
I0323 06:03:43.420690  543705 net.go:648] Add success.
I0323 06:03:43.423574  543705 net.go:770] primary dev: ETH0
I0323 06:03:43.423593  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:03:43.423606  543705 net.go:698] Add success.
I0323 06:03:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:03:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:03:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:03:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:03:53.409781  543705 memory.go:184] no items to output this cycle
I0323 06:03:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 06:04:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:04:03.409804  543705 memory.go:184] no items to output this cycle
I0323 06:04:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 06:04:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:04:13.409789  543705 memory.go:191] Add success.
I0323 06:04:13.409811  543705 cpu.go:282] Add success.
W0323 06:04:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:04:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:04:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:04:13.420245  543705 net.go:648] Add success.
I0323 06:04:13.422759  543705 net.go:770] primary dev: ETH0
I0323 06:04:13.422772  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:04:13.422796  543705 net.go:698] Add success.
I0323 06:04:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:04:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:04:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 06:04:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:04:14.456498  543705 disk_worker.go:494] system disk:vda1
I0323 06:04:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:04:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:04:16.458054  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:04:16.458118  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:04:16.458141  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:04:16.472574  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:04:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:04:23.409785  543705 memory.go:184] no items to output this cycle
I0323 06:04:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 06:04:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:04:33.409797  543705 memory.go:184] no items to output this cycle
I0323 06:04:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 06:04:38.485060  543705 disk_info.go:125] begin check local disk info of client
I0323 06:04:38.487611  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:04:38.487617  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002945c0 0xc000294600]
E0323 06:04:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:04:43.410938  543705 memory.go:191] Add success.
I0323 06:04:43.409813  543705 cpu.go:282] Add success.
I0323 06:04:43.420824  543705 net.go:648] Add success.
I0323 06:04:43.423618  543705 net.go:770] primary dev: ETH0
I0323 06:04:43.423632  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:04:43.423643  543705 net.go:698] Add success.
I0323 06:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:04:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:04:53.410397  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:04:53.410413  543705 memory.go:184] no items to output this cycle
I0323 06:04:53.410435  543705 cpu.go:275] no items to output this cycle
E0323 06:05:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:05:03.409787  543705 cpu.go:275] no items to output this cycle
I0323 06:05:03.409792  543705 memory.go:184] no items to output this cycle
E0323 06:05:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:05:13.409784  543705 memory.go:191] Add success.
W0323 06:05:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 06:05:13.409813  543705 cpu.go:282] Add success.
W0323 06:05:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:05:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:05:13.420150  543705 net.go:648] Add success.
I0323 06:05:13.422948  543705 net.go:770] primary dev: ETH0
I0323 06:05:13.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:05:13.422972  543705 net.go:698] Add success.
I0323 06:05:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:05:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:05:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 06:05:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:05:14.456859  543705 disk_worker.go:494] system disk:vda1
I0323 06:05:14.456919  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:05:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:05:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:05:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:05:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:05:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:05:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:05:23.409810  543705 memory.go:184] no items to output this cycle
I0323 06:05:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 06:05:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:05:33.409770  543705 memory.go:184] no items to output this cycle
I0323 06:05:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 06:05:38.488123  543705 disk_info.go:125] begin check local disk info of client
I0323 06:05:38.490704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:05:38.490710  543705 disk_info.go:196] parse disk info done, disk is : [0xc000292a00 0xc000292a40]
E0323 06:05:43.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:05:43.410855  543705 memory.go:191] Add success.
I0323 06:05:43.409988  543705 cpu.go:282] Add success.
I0323 06:05:43.419763  543705 net.go:648] Add success.
I0323 06:05:43.422708  543705 net.go:770] primary dev: ETH0
I0323 06:05:43.422723  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:05:43.422737  543705 net.go:698] Add success.
I0323 06:05:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:05:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:05:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:05:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:05:53.409767  543705 memory.go:184] no items to output this cycle
I0323 06:05:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 06:06:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:06:03.409770  543705 memory.go:184] no items to output this cycle
I0323 06:06:03.409811  543705 cpu.go:275] no items to output this cycle
I0323 06:06:13.409804  543705 cpu.go:282] Add success.
E0323 06:06:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:06:13.409834  543705 memory.go:191] Add success.
W0323 06:06:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:06:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:06:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:06:13.420402  543705 net.go:648] Add success.
I0323 06:06:13.421440  543705 net.go:770] primary dev: ETH0
I0323 06:06:13.421461  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:06:13.421479  543705 net.go:698] Add success.
I0323 06:06:13.468861  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6c89ad21-edec-4066-ae27-47befa254bb0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:06:13.468894  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:06:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:06:14.455235  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:06:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 06:06:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:06:14.456786  543705 disk_worker.go:494] system disk:vda1
I0323 06:06:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:06:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:06:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:06:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:06:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:06:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:06:23.409794  543705 memory.go:184] no items to output this cycle
I0323 06:06:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 06:06:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:06:33.409765  543705 memory.go:184] no items to output this cycle
I0323 06:06:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 06:06:38.490793  543705 disk_info.go:125] begin check local disk info of client
I0323 06:06:38.493386  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:06:38.493392  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473d00 0xc000473d40]
I0323 06:06:40.137866  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:06:40.137872  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:06:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:06:43.410951  543705 memory.go:191] Add success.
I0323 06:06:43.409800  543705 cpu.go:282] Add success.
I0323 06:06:43.419727  543705 net.go:648] Add success.
I0323 06:06:43.422257  543705 net.go:770] primary dev: ETH0
I0323 06:06:43.422270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:06:43.422282  543705 net.go:698] Add success.
I0323 06:06:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:06:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:06:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:06:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:06:53.409819  543705 memory.go:184] no items to output this cycle
I0323 06:06:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 06:07:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:07:03.409794  543705 memory.go:184] no items to output this cycle
I0323 06:07:03.409832  543705 cpu.go:275] no items to output this cycle
E0323 06:07:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:07:13.409820  543705 memory.go:191] Add success.
I0323 06:07:13.409838  543705 cpu.go:282] Add success.
W0323 06:07:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:07:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:07:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:07:13.420305  543705 net.go:648] Add success.
I0323 06:07:13.422801  543705 net.go:770] primary dev: ETH0
I0323 06:07:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:07:13.422828  543705 net.go:698] Add success.
I0323 06:07:13.453379  543705 event_worker.go:152] Polling the log file for events...
W0323 06:07:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:07:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 06:07:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:07:14.456481  543705 disk_worker.go:494] system disk:vda1
I0323 06:07:14.456507  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:07:14.456509  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:07:14.456519  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:07:14.456525  543705 custom_config.go:64] query custom config with name: gpu
E0323 06:07:15.456786  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:07:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:07:16.457946  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:07:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:07:16.457998  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:07:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:07:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:07:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:07:23.409807  543705 memory.go:184] no items to output this cycle
I0323 06:07:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 06:07:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:07:33.409779  543705 memory.go:184] no items to output this cycle
I0323 06:07:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 06:07:38.494159  543705 disk_info.go:125] begin check local disk info of client
I0323 06:07:38.496711  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:07:38.496721  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032a2c0 0xc00032a300]
E0323 06:07:43.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:07:43.410653  543705 memory.go:191] Add success.
I0323 06:07:43.409958  543705 cpu.go:282] Add success.
I0323 06:07:43.419724  543705 net.go:648] Add success.
I0323 06:07:43.422229  543705 net.go:770] primary dev: ETH0
I0323 06:07:43.422243  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:07:43.422255  543705 net.go:698] Add success.
I0323 06:07:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:07:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:07:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:07:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:07:53.409760  543705 memory.go:184] no items to output this cycle
I0323 06:07:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:08:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:08:03.409813  543705 memory.go:184] no items to output this cycle
I0323 06:08:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 06:08:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:08:13.409781  543705 memory.go:191] Add success.
I0323 06:08:13.409800  543705 cpu.go:282] Add success.
W0323 06:08:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:08:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:08:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:08:13.420338  543705 net.go:648] Add success.
I0323 06:08:13.423048  543705 net.go:770] primary dev: ETH0
I0323 06:08:13.423061  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:08:13.423074  543705 net.go:698] Add success.
I0323 06:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:08:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:08:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 06:08:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:08:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 06:08:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:08:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:08:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:08:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:08:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:08:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:08:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:08:23.409789  543705 memory.go:184] no items to output this cycle
I0323 06:08:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 06:08:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:08:33.409770  543705 memory.go:184] no items to output this cycle
I0323 06:08:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 06:08:38.496804  543705 disk_info.go:125] begin check local disk info of client
I0323 06:08:38.499358  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:08:38.499365  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fc480 0xc0004fc4c0]
E0323 06:08:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:08:43.409811  543705 cpu.go:282] Add success.
I0323 06:08:43.410810  543705 memory.go:191] Add success.
I0323 06:08:43.419706  543705 net.go:648] Add success.
I0323 06:08:43.422283  543705 net.go:770] primary dev: ETH0
I0323 06:08:43.422297  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:08:43.422308  543705 net.go:698] Add success.
I0323 06:08:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:08:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:08:53.409784  543705 memory.go:184] no items to output this cycle
I0323 06:08:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 06:09:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:09:03.409783  543705 memory.go:184] no items to output this cycle
I0323 06:09:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 06:09:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:09:13.409823  543705 memory.go:191] Add success.
I0323 06:09:13.409823  543705 cpu.go:282] Add success.
W0323 06:09:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:09:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:09:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:09:13.420314  543705 net.go:648] Add success.
I0323 06:09:13.422958  543705 net.go:770] primary dev: ETH0
I0323 06:09:13.422971  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:09:13.422983  543705 net.go:698] Add success.
I0323 06:09:13.468790  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"daf6667d-90f1-4b0e-9144-88ec9caac301","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:09:13.468825  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:09:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:09:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 06:09:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:09:14.456718  543705 disk_worker.go:494] system disk:vda1
I0323 06:09:14.456746  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:09:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:09:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:09:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:09:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:09:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:09:23.409804  543705 memory.go:184] no items to output this cycle
I0323 06:09:23.409820  543705 cpu.go:275] no items to output this cycle
E0323 06:09:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:09:33.409794  543705 memory.go:184] no items to output this cycle
I0323 06:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 06:09:38.500193  543705 disk_info.go:125] begin check local disk info of client
I0323 06:09:38.502799  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:09:38.502806  543705 disk_info.go:196] parse disk info done, disk is : [0xc000265c00 0xc000265c40]
I0323 06:09:40.141384  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:09:40.141389  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:09:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:09:43.410897  543705 memory.go:191] Add success.
I0323 06:09:43.409903  543705 cpu.go:282] Add success.
I0323 06:09:43.419763  543705 net.go:648] Add success.
I0323 06:09:43.422595  543705 net.go:770] primary dev: ETH0
I0323 06:09:43.422613  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:09:43.422628  543705 net.go:698] Add success.
I0323 06:09:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:09:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:09:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:09:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:09:53.409770  543705 memory.go:184] no items to output this cycle
I0323 06:09:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 06:10:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:10:03.409784  543705 memory.go:184] no items to output this cycle
I0323 06:10:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 06:10:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:10:13.409800  543705 memory.go:191] Add success.
I0323 06:10:13.409800  543705 cpu.go:282] Add success.
W0323 06:10:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:10:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:10:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:10:13.420164  543705 net.go:648] Add success.
I0323 06:10:13.423026  543705 net.go:770] primary dev: ETH0
I0323 06:10:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:10:13.423055  543705 net.go:698] Add success.
I0323 06:10:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:10:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:10:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 06:10:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:10:14.456614  543705 disk_worker.go:494] system disk:vda1
I0323 06:10:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:10:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:10:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:10:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:10:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:10:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:10:23.409806  543705 memory.go:184] no items to output this cycle
I0323 06:10:23.409819  543705 cpu.go:275] no items to output this cycle
E0323 06:10:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:10:33.409778  543705 cpu.go:275] no items to output this cycle
I0323 06:10:33.409789  543705 memory.go:184] no items to output this cycle
I0323 06:10:38.503155  543705 disk_info.go:125] begin check local disk info of client
I0323 06:10:38.505740  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:10:38.505746  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027ae40 0xc00027ae80]
E0323 06:10:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:10:43.410820  543705 memory.go:191] Add success.
I0323 06:10:43.409818  543705 cpu.go:282] Add success.
I0323 06:10:43.420806  543705 net.go:648] Add success.
I0323 06:10:43.423671  543705 net.go:770] primary dev: ETH0
I0323 06:10:43.423687  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:10:43.423701  543705 net.go:698] Add success.
I0323 06:10:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:10:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:10:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:10:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:10:53.409781  543705 memory.go:184] no items to output this cycle
I0323 06:10:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 06:11:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:11:03.409804  543705 memory.go:184] no items to output this cycle
I0323 06:11:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 06:11:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:11:13.409785  543705 memory.go:191] Add success.
I0323 06:11:13.409807  543705 cpu.go:282] Add success.
W0323 06:11:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:11:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:11:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:11:13.420134  543705 net.go:648] Add success.
I0323 06:11:13.422774  543705 net.go:770] primary dev: ETH0
I0323 06:11:13.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:11:13.422796  543705 net.go:698] Add success.
I0323 06:11:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:11:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:11:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 06:11:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:11:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 06:11:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:11:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:11:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:11:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:11:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:11:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:11:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:11:23.409793  543705 cpu.go:275] no items to output this cycle
I0323 06:11:23.409800  543705 memory.go:184] no items to output this cycle
E0323 06:11:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:11:33.409799  543705 memory.go:184] no items to output this cycle
I0323 06:11:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 06:11:38.505828  543705 disk_info.go:125] begin check local disk info of client
I0323 06:11:38.508378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:11:38.508383  543705 disk_info.go:196] parse disk info done, disk is : [0xc000282c00 0xc000282c40]
E0323 06:11:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:11:43.410678  543705 memory.go:191] Add success.
I0323 06:11:43.409815  543705 cpu.go:282] Add success.
I0323 06:11:43.420448  543705 net.go:648] Add success.
I0323 06:11:43.423378  543705 net.go:770] primary dev: ETH0
I0323 06:11:43.423397  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:11:43.423411  543705 net.go:698] Add success.
I0323 06:11:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:11:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:11:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:11:53.409793  543705 memory.go:184] no items to output this cycle
I0323 06:11:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 06:12:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:12:03.409772  543705 memory.go:184] no items to output this cycle
I0323 06:12:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 06:12:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:12:13.409796  543705 memory.go:191] Add success.
I0323 06:12:13.409798  543705 cpu.go:282] Add success.
W0323 06:12:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:12:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:12:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:12:13.420052  543705 net.go:648] Add success.
I0323 06:12:13.422719  543705 net.go:770] primary dev: ETH0
I0323 06:12:13.422732  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:12:13.422745  543705 net.go:698] Add success.
I0323 06:12:13.468727  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98d65b36-8059-44b5-99aa-b0e3663a3c11","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:12:13.468761  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 06:12:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:12:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 06:12:14.455250  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:12:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:12:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:12:14.455894  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:12:14.456812  543705 disk_worker.go:494] system disk:vda1
I0323 06:12:14.456856  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:12:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:12:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 06:12:16.457904  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:12:16.457904  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:12:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:12:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:12:16.472307  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:12:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:12:23.409778  543705 memory.go:184] no items to output this cycle
I0323 06:12:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 06:12:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:12:33.409772  543705 memory.go:184] no items to output this cycle
I0323 06:12:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 06:12:38.509236  543705 disk_info.go:125] begin check local disk info of client
I0323 06:12:38.511814  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:12:38.511819  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b880 0xc00027b8c0]
I0323 06:12:40.141726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:12:40.141733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:12:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:12:43.410759  543705 memory.go:191] Add success.
I0323 06:12:43.409811  543705 cpu.go:282] Add success.
I0323 06:12:43.420594  543705 net.go:648] Add success.
I0323 06:12:43.423266  543705 net.go:770] primary dev: ETH0
I0323 06:12:43.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:12:43.423292  543705 net.go:698] Add success.
I0323 06:12:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:12:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:12:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:12:53.410241  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:12:53.410261  543705 memory.go:184] no items to output this cycle
I0323 06:12:53.410274  543705 cpu.go:275] no items to output this cycle
E0323 06:13:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:13:03.409791  543705 cpu.go:275] no items to output this cycle
I0323 06:13:03.409806  543705 memory.go:184] no items to output this cycle
E0323 06:13:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:13:13.409814  543705 memory.go:191] Add success.
I0323 06:13:13.409829  543705 cpu.go:282] Add success.
W0323 06:13:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:13:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:13:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:13:13.420208  543705 net.go:648] Add success.
I0323 06:13:13.423038  543705 net.go:770] primary dev: ETH0
I0323 06:13:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:13:13.423062  543705 net.go:698] Add success.
I0323 06:13:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:13:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:13:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 06:13:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:13:14.456562  543705 disk_worker.go:494] system disk:vda1
I0323 06:13:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:13:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:13:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:13:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:13:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:13:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:13:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:13:23.409797  543705 memory.go:184] no items to output this cycle
I0323 06:13:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 06:13:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:13:33.409794  543705 memory.go:184] no items to output this cycle
I0323 06:13:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 06:13:38.512193  543705 disk_info.go:125] begin check local disk info of client
I0323 06:13:38.514777  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:13:38.514784  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f240 0xc00046f280]
E0323 06:13:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:13:43.410791  543705 memory.go:191] Add success.
I0323 06:13:43.409818  543705 cpu.go:282] Add success.
I0323 06:13:43.420491  543705 net.go:648] Add success.
I0323 06:13:43.423500  543705 net.go:770] primary dev: ETH0
I0323 06:13:43.423512  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:13:43.423524  543705 net.go:698] Add success.
I0323 06:13:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:13:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:13:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:13:53.409792  543705 memory.go:184] no items to output this cycle
I0323 06:13:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 06:14:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:14:03.409781  543705 memory.go:184] no items to output this cycle
I0323 06:14:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 06:14:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:14:13.409815  543705 memory.go:191] Add success.
I0323 06:14:13.409820  543705 cpu.go:282] Add success.
W0323 06:14:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:14:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:14:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:14:13.420274  543705 net.go:648] Add success.
I0323 06:14:13.423003  543705 net.go:770] primary dev: ETH0
I0323 06:14:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:14:13.423028  543705 net.go:698] Add success.
I0323 06:14:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:14:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:14:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 06:14:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:14:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 06:14:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:14:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:14:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:14:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:14:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:14:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:14:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:14:23.409798  543705 memory.go:184] no items to output this cycle
I0323 06:14:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 06:14:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:14:33.409782  543705 memory.go:184] no items to output this cycle
I0323 06:14:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 06:14:38.514868  543705 disk_info.go:125] begin check local disk info of client
I0323 06:14:38.517429  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:14:38.517436  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a000 0xc00047a040]
E0323 06:14:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:14:43.410820  543705 memory.go:191] Add success.
I0323 06:14:43.409827  543705 cpu.go:282] Add success.
I0323 06:14:43.420521  543705 net.go:648] Add success.
I0323 06:14:43.423215  543705 net.go:770] primary dev: ETH0
I0323 06:14:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:14:43.423240  543705 net.go:698] Add success.
I0323 06:14:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:14:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:14:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:14:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:14:53.409803  543705 memory.go:184] no items to output this cycle
I0323 06:14:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 06:15:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:15:03.409797  543705 memory.go:184] no items to output this cycle
I0323 06:15:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 06:15:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:15:13.409802  543705 memory.go:191] Add success.
I0323 06:15:13.409825  543705 cpu.go:282] Add success.
W0323 06:15:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:15:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:15:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:15:13.420155  543705 net.go:648] Add success.
I0323 06:15:13.423519  543705 net.go:770] primary dev: ETH0
I0323 06:15:13.423535  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:15:13.423550  543705 net.go:698] Add success.
I0323 06:15:13.468960  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5c3e1329-68f5-43dd-8dab-0a426024ac25","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:15:13.468995  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:15:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:15:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:15:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 06:15:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:15:14.456556  543705 disk_worker.go:494] system disk:vda1
I0323 06:15:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:15:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:15:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:15:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:15:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:15:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:15:23.410425  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:15:23.410448  543705 memory.go:184] no items to output this cycle
I0323 06:15:23.410454  543705 cpu.go:275] no items to output this cycle
E0323 06:15:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:15:33.409769  543705 memory.go:184] no items to output this cycle
I0323 06:15:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 06:15:38.518285  543705 disk_info.go:125] begin check local disk info of client
I0323 06:15:38.520875  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:15:38.520881  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab040 0xc0001ab080]
I0323 06:15:40.141874  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:15:40.141881  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:15:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:15:43.410884  543705 memory.go:191] Add success.
I0323 06:15:43.409818  543705 cpu.go:282] Add success.
I0323 06:15:43.420568  543705 net.go:648] Add success.
I0323 06:15:43.423408  543705 net.go:770] primary dev: ETH0
I0323 06:15:43.423420  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:15:43.423434  543705 net.go:698] Add success.
I0323 06:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:15:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:15:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:15:53.409767  543705 memory.go:184] no items to output this cycle
I0323 06:15:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 06:16:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:16:03.409809  543705 memory.go:184] no items to output this cycle
I0323 06:16:03.409824  543705 cpu.go:275] no items to output this cycle
E0323 06:16:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:16:13.409835  543705 memory.go:191] Add success.
I0323 06:16:13.409842  543705 cpu.go:282] Add success.
W0323 06:16:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:16:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:16:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:16:13.420339  543705 net.go:648] Add success.
I0323 06:16:13.423039  543705 net.go:770] primary dev: ETH0
I0323 06:16:13.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:16:13.423065  543705 net.go:698] Add success.
I0323 06:16:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:16:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:16:14.455256  543705 disk_worker.go:708] disk space is not compliant
W0323 06:16:14.455263  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:16:14.457041  543705 disk_worker.go:494] system disk:vda1
I0323 06:16:14.457086  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:16:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:16:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:16:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:16:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:16:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:16:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:16:23.409831  543705 memory.go:184] no items to output this cycle
I0323 06:16:23.409840  543705 cpu.go:275] no items to output this cycle
E0323 06:16:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:16:33.409771  543705 memory.go:184] no items to output this cycle
I0323 06:16:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 06:16:38.521246  543705 disk_info.go:125] begin check local disk info of client
I0323 06:16:38.523859  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:16:38.523865  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa940 0xc0001aa980]
E0323 06:16:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:16:43.410680  543705 memory.go:191] Add success.
I0323 06:16:43.409812  543705 cpu.go:282] Add success.
I0323 06:16:43.420361  543705 net.go:648] Add success.
I0323 06:16:43.422979  543705 net.go:770] primary dev: ETH0
I0323 06:16:43.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:16:43.423006  543705 net.go:698] Add success.
I0323 06:16:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:16:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:16:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:16:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:16:53.409797  543705 memory.go:184] no items to output this cycle
I0323 06:16:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 06:17:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:17:03.409791  543705 memory.go:184] no items to output this cycle
I0323 06:17:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 06:17:13.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:17:13.409945  543705 memory.go:191] Add success.
W0323 06:17:13.409979  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:17:13.409998  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:17:13.410003  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:17:13.410038  543705 cpu.go:282] Add success.
I0323 06:17:13.419726  543705 net.go:648] Add success.
I0323 06:17:13.422404  543705 net.go:770] primary dev: ETH0
I0323 06:17:13.422420  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:17:13.422433  543705 net.go:698] Add success.
I0323 06:17:13.453005  543705 event_worker.go:152] Polling the log file for events...
W0323 06:17:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:17:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 06:17:14.455174  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:17:14.456944  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:17:14.456953  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:17:14.456959  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:17:14.457021  543705 disk_worker.go:494] system disk:vda1
I0323 06:17:14.457050  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:17:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:17:15.456813  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:17:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:17:16.457936  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:17:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:17:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:17:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:17:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:17:23.409802  543705 memory.go:184] no items to output this cycle
I0323 06:17:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 06:17:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:17:33.409783  543705 memory.go:184] no items to output this cycle
I0323 06:17:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 06:17:38.524254  543705 disk_info.go:125] begin check local disk info of client
I0323 06:17:38.526852  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:17:38.526858  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b780 0xc00007b7c0]
E0323 06:17:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:17:43.410852  543705 memory.go:191] Add success.
I0323 06:17:43.409825  543705 cpu.go:282] Add success.
I0323 06:17:43.420610  543705 net.go:648] Add success.
I0323 06:17:43.423526  543705 net.go:770] primary dev: ETH0
I0323 06:17:43.423538  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:17:43.423550  543705 net.go:698] Add success.
I0323 06:17:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:17:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:17:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:17:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:17:53.409785  543705 memory.go:184] no items to output this cycle
I0323 06:17:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 06:18:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:18:03.409785  543705 memory.go:184] no items to output this cycle
I0323 06:18:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 06:18:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:18:13.409820  543705 memory.go:191] Add success.
I0323 06:18:13.409830  543705 cpu.go:282] Add success.
W0323 06:18:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:18:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:18:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:18:13.420412  543705 net.go:648] Add success.
I0323 06:18:13.423264  543705 net.go:770] primary dev: ETH0
I0323 06:18:13.423278  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:18:13.423292  543705 net.go:698] Add success.
I0323 06:18:13.468214  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ec88d9b7-e8c5-4c55-8ed3-06b5b55721ba","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:18:13.468248  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:18:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:18:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:18:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 06:18:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:18:14.456823  543705 disk_worker.go:494] system disk:vda1
I0323 06:18:14.456855  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:18:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:18:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:18:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:18:23.410776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:18:23.410794  543705 memory.go:184] no items to output this cycle
I0323 06:18:23.410804  543705 cpu.go:275] no items to output this cycle
E0323 06:18:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:18:33.409812  543705 memory.go:184] no items to output this cycle
I0323 06:18:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 06:18:38.526940  543705 disk_info.go:125] begin check local disk info of client
I0323 06:18:38.529534  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:18:38.529541  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1800 0xc0004b1840]
I0323 06:18:40.142951  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:18:40.142957  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:18:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:18:43.410789  543705 memory.go:191] Add success.
I0323 06:18:43.409820  543705 cpu.go:282] Add success.
I0323 06:18:43.420645  543705 net.go:648] Add success.
I0323 06:18:43.424481  543705 net.go:770] primary dev: ETH0
I0323 06:18:43.424494  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:18:43.424507  543705 net.go:698] Add success.
I0323 06:18:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:18:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:18:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:18:53.409792  543705 memory.go:184] no items to output this cycle
I0323 06:18:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:19:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:19:03.409805  543705 memory.go:184] no items to output this cycle
I0323 06:19:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 06:19:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:19:13.409786  543705 memory.go:191] Add success.
I0323 06:19:13.409821  543705 cpu.go:282] Add success.
W0323 06:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:19:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:19:13.420138  543705 net.go:648] Add success.
I0323 06:19:13.422997  543705 net.go:770] primary dev: ETH0
I0323 06:19:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:19:13.423023  543705 net.go:698] Add success.
I0323 06:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:19:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:19:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 06:19:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:19:14.456586  543705 disk_worker.go:494] system disk:vda1
I0323 06:19:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:19:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:19:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:19:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:19:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:19:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:19:23.409828  543705 memory.go:184] no items to output this cycle
I0323 06:19:23.409841  543705 cpu.go:275] no items to output this cycle
E0323 06:19:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:19:33.409779  543705 memory.go:184] no items to output this cycle
I0323 06:19:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 06:19:38.530335  543705 disk_info.go:125] begin check local disk info of client
I0323 06:19:38.532905  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:19:38.532912  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b20c0 0xc0003b2100]
E0323 06:19:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:19:43.410711  543705 memory.go:191] Add success.
I0323 06:19:43.409799  543705 cpu.go:282] Add success.
I0323 06:19:43.420561  543705 net.go:648] Add success.
I0323 06:19:43.423473  543705 net.go:770] primary dev: ETH0
I0323 06:19:43.423486  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:19:43.423497  543705 net.go:698] Add success.
I0323 06:19:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:19:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:19:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:19:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:19:53.409776  543705 memory.go:184] no items to output this cycle
I0323 06:19:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:20:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:20:03.409812  543705 memory.go:184] no items to output this cycle
I0323 06:20:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 06:20:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:20:13.409825  543705 memory.go:191] Add success.
I0323 06:20:13.409830  543705 cpu.go:282] Add success.
W0323 06:20:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:20:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:20:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:20:13.420185  543705 net.go:648] Add success.
I0323 06:20:13.423799  543705 net.go:770] primary dev: ETH0
I0323 06:20:13.423813  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:20:13.423825  543705 net.go:698] Add success.
I0323 06:20:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:20:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:20:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 06:20:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:20:14.456574  543705 disk_worker.go:494] system disk:vda1
I0323 06:20:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:20:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:20:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:20:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:20:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:20:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:20:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:20:23.409809  543705 memory.go:184] no items to output this cycle
I0323 06:20:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 06:20:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:20:33.409771  543705 memory.go:184] no items to output this cycle
I0323 06:20:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 06:20:38.532997  543705 disk_info.go:125] begin check local disk info of client
I0323 06:20:38.535571  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:20:38.535577  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039ae40 0xc00039ae80]
E0323 06:20:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:20:43.410743  543705 memory.go:191] Add success.
I0323 06:20:43.409800  543705 cpu.go:282] Add success.
I0323 06:20:43.420709  543705 net.go:648] Add success.
I0323 06:20:43.423478  543705 net.go:770] primary dev: ETH0
I0323 06:20:43.423491  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:20:43.423503  543705 net.go:698] Add success.
I0323 06:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:20:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:20:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:20:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:20:53.409781  543705 memory.go:184] no items to output this cycle
I0323 06:20:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 06:21:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:21:03.409790  543705 memory.go:184] no items to output this cycle
I0323 06:21:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 06:21:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:21:13.409790  543705 memory.go:191] Add success.
I0323 06:21:13.409791  543705 cpu.go:282] Add success.
W0323 06:21:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:21:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:21:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:21:13.420135  543705 net.go:648] Add success.
I0323 06:21:13.422837  543705 net.go:770] primary dev: ETH0
I0323 06:21:13.422850  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:21:13.422862  543705 net.go:698] Add success.
I0323 06:21:13.468567  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6a72082-7ae5-4aed-96c6-1d310e77fba8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:21:13.468602  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:21:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:21:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:21:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 06:21:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:21:14.456561  543705 disk_worker.go:494] system disk:vda1
I0323 06:21:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:21:15.455611  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:21:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:21:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:21:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:21:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:21:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:21:23.409821  543705 memory.go:184] no items to output this cycle
I0323 06:21:23.409829  543705 cpu.go:275] no items to output this cycle
E0323 06:21:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:21:33.409766  543705 memory.go:184] no items to output this cycle
I0323 06:21:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 06:21:38.536378  543705 disk_info.go:125] begin check local disk info of client
I0323 06:21:38.538982  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:21:38.538995  543705 disk_info.go:196] parse disk info done, disk is : [0xc000345800 0xc000345840]
I0323 06:21:40.145724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:21:40.145729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:21:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:21:43.410697  543705 memory.go:191] Add success.
I0323 06:21:43.409799  543705 cpu.go:282] Add success.
I0323 06:21:43.420647  543705 net.go:648] Add success.
I0323 06:21:43.423269  543705 net.go:770] primary dev: ETH0
I0323 06:21:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:21:43.423294  543705 net.go:698] Add success.
I0323 06:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:21:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:21:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:21:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:21:53.409764  543705 memory.go:184] no items to output this cycle
I0323 06:21:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 06:22:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:22:03.409774  543705 memory.go:184] no items to output this cycle
I0323 06:22:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 06:22:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:22:13.409789  543705 memory.go:191] Add success.
I0323 06:22:13.409791  543705 cpu.go:282] Add success.
W0323 06:22:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:22:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:22:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:22:13.420178  543705 net.go:648] Add success.
I0323 06:22:13.422711  543705 net.go:770] primary dev: ETH0
I0323 06:22:13.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:22:13.422737  543705 net.go:698] Add success.
W0323 06:22:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:22:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 06:22:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:22:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:22:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:22:14.455916  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:22:14.456545  543705 disk_worker.go:494] system disk:vda1
I0323 06:22:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:22:15.456790  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:22:15.456800  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:22:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:22:16.457976  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:22:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:22:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:22:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:22:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:22:23.409816  543705 memory.go:184] no items to output this cycle
I0323 06:22:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 06:22:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:22:33.409775  543705 memory.go:184] no items to output this cycle
I0323 06:22:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 06:22:38.539341  543705 disk_info.go:125] begin check local disk info of client
I0323 06:22:38.541919  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:22:38.541924  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039fe80 0xc00039fec0]
E0323 06:22:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:22:43.410628  543705 memory.go:191] Add success.
I0323 06:22:43.409814  543705 cpu.go:282] Add success.
I0323 06:22:43.420346  543705 net.go:648] Add success.
I0323 06:22:43.423150  543705 net.go:770] primary dev: ETH0
I0323 06:22:43.423166  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:22:43.423185  543705 net.go:698] Add success.
I0323 06:22:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:22:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:22:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:22:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:22:53.409781  543705 memory.go:184] no items to output this cycle
I0323 06:22:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 06:23:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:23:03.409790  543705 memory.go:184] no items to output this cycle
I0323 06:23:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:23:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:23:13.409793  543705 memory.go:191] Add success.
I0323 06:23:13.409794  543705 cpu.go:282] Add success.
W0323 06:23:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:23:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:23:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:23:13.420161  543705 net.go:648] Add success.
I0323 06:23:13.423165  543705 net.go:770] primary dev: ETH0
I0323 06:23:13.423180  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:23:13.423194  543705 net.go:698] Add success.
I0323 06:23:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:23:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:23:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 06:23:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:23:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 06:23:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:23:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:23:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:23:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:23:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:23:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:23:23.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:23:23.410265  543705 memory.go:184] no items to output this cycle
I0323 06:23:23.410291  543705 cpu.go:275] no items to output this cycle
E0323 06:23:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:23:33.409776  543705 memory.go:184] no items to output this cycle
I0323 06:23:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 06:23:38.542008  543705 disk_info.go:125] begin check local disk info of client
I0323 06:23:38.544560  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:23:38.544567  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034f880 0xc00034f8c0]
E0323 06:23:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:23:43.410754  543705 memory.go:191] Add success.
I0323 06:23:43.409804  543705 cpu.go:282] Add success.
I0323 06:23:43.420482  543705 net.go:648] Add success.
I0323 06:23:43.423251  543705 net.go:770] primary dev: ETH0
I0323 06:23:43.423264  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:23:43.423277  543705 net.go:698] Add success.
I0323 06:23:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:23:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:23:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:23:53.409913  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:23:53.409939  543705 cpu.go:275] no items to output this cycle
I0323 06:23:53.410003  543705 memory.go:184] no items to output this cycle
E0323 06:24:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:24:03.409782  543705 memory.go:184] no items to output this cycle
I0323 06:24:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 06:24:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:24:13.409783  543705 memory.go:191] Add success.
I0323 06:24:13.409802  543705 cpu.go:282] Add success.
W0323 06:24:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:24:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:24:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:24:13.420106  543705 net.go:648] Add success.
I0323 06:24:13.422738  543705 net.go:770] primary dev: ETH0
I0323 06:24:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:24:13.422763  543705 net.go:698] Add success.
I0323 06:24:13.468075  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ed6f0784-9d37-4fae-bf66-68867f19b13e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:24:13.468111  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:24:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:24:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:24:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 06:24:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:24:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 06:24:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:24:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:24:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:24:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:24:23.409789  543705 memory.go:184] no items to output this cycle
I0323 06:24:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 06:24:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:24:33.409798  543705 memory.go:184] no items to output this cycle
I0323 06:24:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 06:24:38.545409  543705 disk_info.go:125] begin check local disk info of client
I0323 06:24:38.547991  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:24:38.547998  543705 disk_info.go:196] parse disk info done, disk is : [0xc000355940 0xc000355980]
I0323 06:24:40.149433  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:24:40.149439  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:24:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:24:43.409795  543705 cpu.go:282] Add success.
I0323 06:24:43.410621  543705 memory.go:191] Add success.
I0323 06:24:43.419769  543705 net.go:648] Add success.
I0323 06:24:43.422421  543705 net.go:770] primary dev: ETH0
I0323 06:24:43.422440  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:24:43.422455  543705 net.go:698] Add success.
I0323 06:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:24:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:24:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:24:53.410731  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:24:53.410747  543705 memory.go:184] no items to output this cycle
I0323 06:24:53.410751  543705 cpu.go:275] no items to output this cycle
E0323 06:25:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:25:03.409769  543705 memory.go:184] no items to output this cycle
I0323 06:25:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 06:25:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:25:13.409779  543705 memory.go:191] Add success.
I0323 06:25:13.409801  543705 cpu.go:282] Add success.
W0323 06:25:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:25:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:25:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:25:13.420153  543705 net.go:648] Add success.
I0323 06:25:13.422811  543705 net.go:770] primary dev: ETH0
I0323 06:25:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:25:13.422835  543705 net.go:698] Add success.
I0323 06:25:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:25:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:25:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 06:25:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:25:14.456555  543705 disk_worker.go:494] system disk:vda1
I0323 06:25:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:25:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:25:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:25:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:25:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:25:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:25:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:25:23.409792  543705 memory.go:184] no items to output this cycle
I0323 06:25:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 06:25:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:25:33.409801  543705 memory.go:184] no items to output this cycle
I0323 06:25:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 06:25:38.548080  543705 disk_info.go:125] begin check local disk info of client
I0323 06:25:38.550685  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:25:38.550691  543705 disk_info.go:196] parse disk info done, disk is : [0xc000376480 0xc0003764c0]
E0323 06:25:43.409891  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:25:43.410921  543705 memory.go:191] Add success.
I0323 06:25:43.410065  543705 cpu.go:282] Add success.
I0323 06:25:43.419686  543705 net.go:648] Add success.
I0323 06:25:43.422607  543705 net.go:770] primary dev: ETH0
I0323 06:25:43.422622  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:25:43.422636  543705 net.go:698] Add success.
I0323 06:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:25:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:25:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:25:53.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:25:53.410266  543705 memory.go:184] no items to output this cycle
I0323 06:25:53.410271  543705 cpu.go:275] no items to output this cycle
E0323 06:26:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:26:03.409811  543705 memory.go:184] no items to output this cycle
I0323 06:26:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 06:26:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:26:13.409811  543705 memory.go:191] Add success.
I0323 06:26:13.409818  543705 cpu.go:282] Add success.
W0323 06:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:26:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:26:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:26:13.420119  543705 net.go:648] Add success.
I0323 06:26:13.422979  543705 net.go:770] primary dev: ETH0
I0323 06:26:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:26:13.423006  543705 net.go:698] Add success.
I0323 06:26:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:26:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:26:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0323 06:26:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:26:14.456618  543705 disk_worker.go:494] system disk:vda1
I0323 06:26:14.456648  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:26:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:26:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:26:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:26:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:26:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:26:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:26:23.409814  543705 memory.go:184] no items to output this cycle
I0323 06:26:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 06:26:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:26:33.409776  543705 memory.go:184] no items to output this cycle
I0323 06:26:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 06:26:38.551396  543705 disk_info.go:125] begin check local disk info of client
I0323 06:26:38.553984  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:26:38.553990  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035dd00 0xc00035dd40]
E0323 06:26:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:26:43.410859  543705 memory.go:191] Add success.
I0323 06:26:43.409815  543705 cpu.go:282] Add success.
I0323 06:26:43.419715  543705 net.go:648] Add success.
I0323 06:26:43.422460  543705 net.go:770] primary dev: ETH0
I0323 06:26:43.422474  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:26:43.422488  543705 net.go:698] Add success.
I0323 06:26:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:26:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:26:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:26:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:26:53.409781  543705 cpu.go:275] no items to output this cycle
I0323 06:26:53.409785  543705 memory.go:184] no items to output this cycle
E0323 06:27:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:27:03.409792  543705 memory.go:184] no items to output this cycle
I0323 06:27:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 06:27:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:27:13.409785  543705 memory.go:191] Add success.
I0323 06:27:13.409794  543705 cpu.go:282] Add success.
W0323 06:27:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:27:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:27:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:27:13.425965  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 06:27:13.426046  543705 net.go:770] primary dev: ETH0
I0323 06:27:13.426060  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:27:13.426074  543705 net.go:698] Add success.
I0323 06:27:13.426447  543705 net.go:648] Add success.
I0323 06:27:13.452832  543705 event_worker.go:152] Polling the log file for events...
I0323 06:27:13.463318  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c5a50a6-08d7-4b93-87b2-08e30c9e4de8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:27:13.463352  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 06:27:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:27:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 06:27:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:27:14.456815  543705 disk_worker.go:494] system disk:vda1
E0323 06:27:14.456825  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:27:14.456832  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:27:14.456838  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:27:14.456866  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:27:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:27:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:27:16.457944  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:27:16.457954  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:27:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:27:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:27:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:27:23.410383  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:27:23.410401  543705 memory.go:184] no items to output this cycle
I0323 06:27:23.410406  543705 cpu.go:275] no items to output this cycle
E0323 06:27:33.409897  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:27:33.409911  543705 cpu.go:275] no items to output this cycle
I0323 06:27:33.410040  543705 memory.go:184] no items to output this cycle
I0323 06:27:38.554074  543705 disk_info.go:125] begin check local disk info of client
I0323 06:27:38.556640  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:27:38.556647  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024e780 0xc00024e7c0]
I0323 06:27:40.149726  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:27:40.149732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:27:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:27:43.410783  543705 memory.go:191] Add success.
I0323 06:27:43.409787  543705 cpu.go:282] Add success.
I0323 06:27:43.420563  543705 net.go:648] Add success.
I0323 06:27:43.423400  543705 net.go:770] primary dev: ETH0
I0323 06:27:43.423414  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:27:43.423427  543705 net.go:698] Add success.
I0323 06:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:27:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:27:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:27:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:27:53.409782  543705 cpu.go:275] no items to output this cycle
I0323 06:27:53.409787  543705 memory.go:184] no items to output this cycle
E0323 06:28:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:28:03.409782  543705 memory.go:184] no items to output this cycle
I0323 06:28:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 06:28:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:28:13.409787  543705 memory.go:191] Add success.
I0323 06:28:13.409804  543705 cpu.go:282] Add success.
W0323 06:28:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:28:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:28:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:28:13.420278  543705 net.go:648] Add success.
I0323 06:28:13.423015  543705 net.go:770] primary dev: ETH0
I0323 06:28:13.423027  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:28:13.423039  543705 net.go:698] Add success.
I0323 06:28:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:28:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:28:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 06:28:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:28:14.456508  543705 disk_worker.go:494] system disk:vda1
I0323 06:28:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:28:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:28:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:28:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:28:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:28:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:28:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:28:23.409814  543705 memory.go:184] no items to output this cycle
I0323 06:28:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 06:28:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:28:33.409782  543705 memory.go:184] no items to output this cycle
I0323 06:28:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 06:28:38.557474  543705 disk_info.go:125] begin check local disk info of client
I0323 06:28:38.560058  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:28:38.560064  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a900 0xc00047a940]
E0323 06:28:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:28:43.411028  543705 memory.go:191] Add success.
I0323 06:28:43.409813  543705 cpu.go:282] Add success.
I0323 06:28:43.419679  543705 net.go:648] Add success.
I0323 06:28:43.422445  543705 net.go:770] primary dev: ETH0
I0323 06:28:43.422458  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:28:43.422470  543705 net.go:698] Add success.
I0323 06:28:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:28:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:28:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:28:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:28:53.409773  543705 memory.go:184] no items to output this cycle
I0323 06:28:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:29:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:29:03.409787  543705 memory.go:184] no items to output this cycle
I0323 06:29:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 06:29:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:29:13.409779  543705 memory.go:191] Add success.
W0323 06:29:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 06:29:13.409810  543705 cpu.go:282] Add success.
W0323 06:29:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:29:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:29:13.420117  543705 net.go:648] Add success.
I0323 06:29:13.422663  543705 net.go:770] primary dev: ETH0
I0323 06:29:13.422676  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:29:13.422689  543705 net.go:698] Add success.
I0323 06:29:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:29:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:29:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 06:29:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:29:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 06:29:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:29:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:29:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:29:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:29:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:29:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:29:23.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:29:23.409921  543705 memory.go:184] no items to output this cycle
I0323 06:29:23.410076  543705 cpu.go:275] no items to output this cycle
E0323 06:29:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:29:33.409782  543705 memory.go:184] no items to output this cycle
I0323 06:29:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 06:29:38.560443  543705 disk_info.go:125] begin check local disk info of client
I0323 06:29:38.563043  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:29:38.563049  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c4240 0xc0004c4280]
E0323 06:29:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:29:43.411006  543705 memory.go:191] Add success.
I0323 06:29:43.409799  543705 cpu.go:282] Add success.
I0323 06:29:43.420727  543705 net.go:648] Add success.
I0323 06:29:43.423209  543705 net.go:770] primary dev: ETH0
I0323 06:29:43.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:29:43.423235  543705 net.go:698] Add success.
I0323 06:29:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:29:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:29:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:29:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:29:53.409778  543705 memory.go:184] no items to output this cycle
I0323 06:29:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:30:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:30:03.409803  543705 memory.go:184] no items to output this cycle
I0323 06:30:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 06:30:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:30:13.409777  543705 memory.go:191] Add success.
I0323 06:30:13.409801  543705 cpu.go:282] Add success.
W0323 06:30:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:30:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:30:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:30:13.420276  543705 net.go:648] Add success.
I0323 06:30:13.423284  543705 net.go:770] primary dev: ETH0
I0323 06:30:13.423298  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:30:13.423310  543705 net.go:698] Add success.
I0323 06:30:13.463886  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"54a18f22-251e-4eb5-8e25-6b50018ccc04","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:30:13.463920  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:30:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:30:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:30:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 06:30:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:30:14.456558  543705 disk_worker.go:494] system disk:vda1
I0323 06:30:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:30:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:30:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:30:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:30:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:30:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:30:23.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:30:23.409838  543705 memory.go:184] no items to output this cycle
I0323 06:30:23.409849  543705 cpu.go:275] no items to output this cycle
E0323 06:30:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:30:33.409791  543705 memory.go:184] no items to output this cycle
I0323 06:30:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 06:30:38.563134  543705 disk_info.go:125] begin check local disk info of client
I0323 06:30:38.565739  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:30:38.565745  543705 disk_info.go:196] parse disk info done, disk is : [0xc000384480 0xc0003844c0]
I0323 06:30:40.153451  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:30:40.153457  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:30:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:30:43.410723  543705 memory.go:191] Add success.
I0323 06:30:43.409801  543705 cpu.go:282] Add success.
I0323 06:30:43.420414  543705 net.go:648] Add success.
I0323 06:30:43.423113  543705 net.go:770] primary dev: ETH0
I0323 06:30:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:30:43.423139  543705 net.go:698] Add success.
I0323 06:30:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:30:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:30:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:30:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:30:53.409789  543705 memory.go:184] no items to output this cycle
I0323 06:30:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 06:31:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:31:03.409786  543705 memory.go:184] no items to output this cycle
I0323 06:31:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 06:31:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:31:13.409820  543705 memory.go:191] Add success.
I0323 06:31:13.409827  543705 cpu.go:282] Add success.
W0323 06:31:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:31:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:31:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:31:13.420150  543705 net.go:648] Add success.
I0323 06:31:13.423171  543705 net.go:770] primary dev: ETH0
I0323 06:31:13.423184  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:31:13.423196  543705 net.go:698] Add success.
I0323 06:31:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:31:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:31:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0323 06:31:14.455154  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:31:14.456475  543705 disk_worker.go:494] system disk:vda1
I0323 06:31:14.456520  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:31:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:31:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:31:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:31:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:31:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:31:23.409793  543705 memory.go:184] no items to output this cycle
I0323 06:31:23.409863  543705 cpu.go:275] no items to output this cycle
E0323 06:31:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:31:33.409809  543705 memory.go:184] no items to output this cycle
I0323 06:31:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 06:31:38.565826  543705 disk_info.go:125] begin check local disk info of client
I0323 06:31:38.568394  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:31:38.568401  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460840 0xc000460880]
E0323 06:31:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:31:43.410688  543705 memory.go:191] Add success.
I0323 06:31:43.409803  543705 cpu.go:282] Add success.
I0323 06:31:43.420363  543705 net.go:648] Add success.
I0323 06:31:43.422923  543705 net.go:770] primary dev: ETH0
I0323 06:31:43.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:31:43.422949  543705 net.go:698] Add success.
I0323 06:31:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:31:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:31:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:31:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:31:53.409772  543705 memory.go:184] no items to output this cycle
I0323 06:31:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 06:32:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:32:03.409785  543705 memory.go:184] no items to output this cycle
I0323 06:32:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 06:32:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:32:13.409816  543705 memory.go:191] Add success.
I0323 06:32:13.409825  543705 cpu.go:282] Add success.
W0323 06:32:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:32:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:32:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:32:13.420066  543705 net.go:648] Add success.
I0323 06:32:13.422991  543705 net.go:770] primary dev: ETH0
I0323 06:32:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:32:13.423021  543705 net.go:698] Add success.
W0323 06:32:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:32:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 06:32:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:32:14.456818  543705 disk_worker.go:494] system disk:vda1
I0323 06:32:14.456855  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:32:14.457096  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:32:14.457104  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:32:14.457108  543705 custom_config.go:64] query custom config with name: gpu
E0323 06:32:15.456863  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:32:15.456873  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:32:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:32:16.457952  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:32:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:32:16.458011  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:32:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:32:23.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:32:23.409830  543705 memory.go:184] no items to output this cycle
I0323 06:32:23.409911  543705 cpu.go:275] no items to output this cycle
E0323 06:32:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:32:33.409769  543705 memory.go:184] no items to output this cycle
I0323 06:32:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 06:32:38.568486  543705 disk_info.go:125] begin check local disk info of client
I0323 06:32:38.571079  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:32:38.571086  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003873c0 0xc000387400]
E0323 06:32:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:32:43.410725  543705 memory.go:191] Add success.
I0323 06:32:43.409815  543705 cpu.go:282] Add success.
I0323 06:32:43.420443  543705 net.go:648] Add success.
I0323 06:32:43.423344  543705 net.go:770] primary dev: ETH0
I0323 06:32:43.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:32:43.423370  543705 net.go:698] Add success.
I0323 06:32:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:32:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:32:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:32:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:32:53.409784  543705 cpu.go:275] no items to output this cycle
I0323 06:32:53.409788  543705 memory.go:184] no items to output this cycle
E0323 06:33:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:33:03.409769  543705 memory.go:184] no items to output this cycle
I0323 06:33:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 06:33:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:33:13.409818  543705 memory.go:191] Add success.
I0323 06:33:13.409828  543705 cpu.go:282] Add success.
W0323 06:33:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:33:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:33:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:33:13.420139  543705 net.go:648] Add success.
I0323 06:33:13.422974  543705 net.go:770] primary dev: ETH0
I0323 06:33:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:33:13.423001  543705 net.go:698] Add success.
I0323 06:33:13.562485  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6b9313a6-b2e1-4aed-9ad1-79a56ce6baaa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:33:13.562518  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:33:14.454943  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:33:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:33:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 06:33:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:33:14.456535  543705 disk_worker.go:494] system disk:vda1
I0323 06:33:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:33:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:33:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:33:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:33:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:33:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:33:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:33:23.409791  543705 cpu.go:275] no items to output this cycle
I0323 06:33:23.409812  543705 memory.go:184] no items to output this cycle
E0323 06:33:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:33:33.409777  543705 memory.go:184] no items to output this cycle
I0323 06:33:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 06:33:38.571500  543705 disk_info.go:125] begin check local disk info of client
I0323 06:33:38.574106  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:33:38.574112  543705 disk_info.go:196] parse disk info done, disk is : [0xc00036af00 0xc00036af40]
I0323 06:33:40.153724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:33:40.153729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:33:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:33:43.410693  543705 memory.go:191] Add success.
I0323 06:33:43.409809  543705 cpu.go:282] Add success.
I0323 06:33:43.420422  543705 net.go:648] Add success.
I0323 06:33:43.423210  543705 net.go:770] primary dev: ETH0
I0323 06:33:43.423223  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:33:43.423236  543705 net.go:698] Add success.
I0323 06:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:33:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:33:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:33:53.410242  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:33:53.410259  543705 memory.go:184] no items to output this cycle
I0323 06:33:53.410270  543705 cpu.go:275] no items to output this cycle
E0323 06:34:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:34:03.409776  543705 memory.go:184] no items to output this cycle
I0323 06:34:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 06:34:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:34:13.409817  543705 memory.go:191] Add success.
I0323 06:34:13.409826  543705 cpu.go:282] Add success.
W0323 06:34:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:34:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:34:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:34:13.420316  543705 net.go:648] Add success.
I0323 06:34:13.423015  543705 net.go:770] primary dev: ETH0
I0323 06:34:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:34:13.423052  543705 net.go:698] Add success.
I0323 06:34:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:34:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:34:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 06:34:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:34:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 06:34:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:34:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:34:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:34:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:34:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:34:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:34:23.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:34:23.409918  543705 memory.go:184] no items to output this cycle
I0323 06:34:23.410062  543705 cpu.go:275] no items to output this cycle
E0323 06:34:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:34:33.409768  543705 memory.go:184] no items to output this cycle
I0323 06:34:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 06:34:38.574510  543705 disk_info.go:125] begin check local disk info of client
I0323 06:34:38.577098  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:34:38.577104  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052b700 0xc00052b740]
E0323 06:34:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:34:43.410622  543705 memory.go:191] Add success.
I0323 06:34:43.409812  543705 cpu.go:282] Add success.
I0323 06:34:43.420359  543705 net.go:648] Add success.
I0323 06:34:43.422953  543705 net.go:770] primary dev: ETH0
I0323 06:34:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:34:43.422978  543705 net.go:698] Add success.
I0323 06:34:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:34:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:34:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:34:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:34:53.409782  543705 memory.go:184] no items to output this cycle
I0323 06:34:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 06:35:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:35:03.409779  543705 memory.go:184] no items to output this cycle
I0323 06:35:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 06:35:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:35:13.409794  543705 memory.go:191] Add success.
I0323 06:35:13.409797  543705 cpu.go:282] Add success.
W0323 06:35:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:35:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:35:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:35:13.420159  543705 net.go:648] Add success.
I0323 06:35:13.422924  543705 net.go:770] primary dev: ETH0
I0323 06:35:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:35:13.422950  543705 net.go:698] Add success.
I0323 06:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:35:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:35:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 06:35:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:35:14.456568  543705 disk_worker.go:494] system disk:vda1
I0323 06:35:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:35:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:35:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:35:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:35:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:35:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:35:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:35:23.409773  543705 memory.go:184] no items to output this cycle
I0323 06:35:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:35:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:35:33.409817  543705 memory.go:184] no items to output this cycle
I0323 06:35:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 06:35:38.577527  543705 disk_info.go:125] begin check local disk info of client
I0323 06:35:38.580122  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:35:38.580140  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032b7c0 0xc00032b800]
E0323 06:35:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:35:43.410673  543705 memory.go:191] Add success.
I0323 06:35:43.409807  543705 cpu.go:282] Add success.
I0323 06:35:43.420451  543705 net.go:648] Add success.
I0323 06:35:43.423167  543705 net.go:770] primary dev: ETH0
I0323 06:35:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:35:43.423196  543705 net.go:698] Add success.
I0323 06:35:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:35:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:35:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:35:53.409779  543705 cpu.go:275] no items to output this cycle
I0323 06:35:53.409782  543705 memory.go:184] no items to output this cycle
E0323 06:36:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:36:03.409805  543705 memory.go:184] no items to output this cycle
I0323 06:36:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 06:36:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:36:13.409809  543705 memory.go:191] Add success.
I0323 06:36:13.409820  543705 cpu.go:282] Add success.
W0323 06:36:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:36:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:36:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:36:13.420557  543705 net.go:648] Add success.
I0323 06:36:13.423581  543705 net.go:770] primary dev: ETH0
I0323 06:36:13.423594  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:36:13.423605  543705 net.go:698] Add success.
I0323 06:36:13.509376  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c4525a36-25dd-4ebc-84b3-c4ac3efedc88","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:36:13.509417  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:36:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:36:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:36:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0323 06:36:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:36:14.456730  543705 disk_worker.go:494] system disk:vda1
I0323 06:36:14.456761  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:36:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:36:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:36:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:36:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:36:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:36:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:36:23.409806  543705 memory.go:184] no items to output this cycle
I0323 06:36:23.409821  543705 cpu.go:275] no items to output this cycle
I0323 06:36:33.409883  543705 cpu.go:275] no items to output this cycle
E0323 06:36:33.409932  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:36:33.409947  543705 memory.go:184] no items to output this cycle
I0323 06:36:38.580236  543705 disk_info.go:125] begin check local disk info of client
I0323 06:36:38.582875  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:36:38.582883  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0323 06:36:40.157466  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:36:40.157473  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:36:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:36:43.410735  543705 memory.go:191] Add success.
I0323 06:36:43.409791  543705 cpu.go:282] Add success.
I0323 06:36:43.420468  543705 net.go:648] Add success.
I0323 06:36:43.423028  543705 net.go:770] primary dev: ETH0
I0323 06:36:43.423042  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:36:43.423054  543705 net.go:698] Add success.
I0323 06:36:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:36:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:36:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:36:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:36:53.409783  543705 memory.go:184] no items to output this cycle
I0323 06:36:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 06:37:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:37:03.409793  543705 cpu.go:275] no items to output this cycle
I0323 06:37:03.409800  543705 memory.go:184] no items to output this cycle
E0323 06:37:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:37:13.409793  543705 memory.go:191] Add success.
I0323 06:37:13.409794  543705 cpu.go:282] Add success.
W0323 06:37:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:37:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:37:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:37:13.420112  543705 net.go:648] Add success.
I0323 06:37:13.422882  543705 net.go:770] primary dev: ETH0
I0323 06:37:13.422902  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:37:13.422916  543705 net.go:698] Add success.
I0323 06:37:13.453463  543705 event_worker.go:152] Polling the log file for events...
W0323 06:37:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:37:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 06:37:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:37:14.456831  543705 disk_worker.go:494] system disk:vda1
I0323 06:37:14.456871  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:37:14.457125  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:37:14.457132  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:37:14.457137  543705 custom_config.go:64] query custom config with name: gpu
E0323 06:37:15.456868  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:37:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:37:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:37:16.457945  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:37:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:37:16.458004  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:37:16.472333  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:37:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:37:23.409777  543705 memory.go:184] no items to output this cycle
I0323 06:37:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:37:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:37:33.409783  543705 memory.go:184] no items to output this cycle
I0323 06:37:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 06:37:38.583555  543705 disk_info.go:125] begin check local disk info of client
I0323 06:37:38.586165  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:37:38.586172  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab5c0 0xc0001ab600]
E0323 06:37:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:37:43.409782  543705 memory.go:191] Add success.
I0323 06:37:43.410129  543705 cpu.go:282] Add success.
I0323 06:37:43.420050  543705 net.go:648] Add success.
I0323 06:37:43.421072  543705 net.go:770] primary dev: ETH0
I0323 06:37:43.421093  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:37:43.421113  543705 net.go:698] Add success.
I0323 06:37:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:37:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:37:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:37:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:37:53.409795  543705 memory.go:184] no items to output this cycle
I0323 06:37:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 06:38:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:38:03.409802  543705 memory.go:184] no items to output this cycle
I0323 06:38:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 06:38:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:38:13.409810  543705 memory.go:191] Add success.
I0323 06:38:13.409819  543705 cpu.go:282] Add success.
W0323 06:38:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:38:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:38:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:38:13.420165  543705 net.go:648] Add success.
I0323 06:38:13.422849  543705 net.go:770] primary dev: ETH0
I0323 06:38:13.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:38:13.422875  543705 net.go:698] Add success.
I0323 06:38:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:38:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:38:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 06:38:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:38:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 06:38:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:38:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:38:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:38:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:38:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:38:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:38:23.409773  543705 memory.go:184] no items to output this cycle
I0323 06:38:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 06:38:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:38:33.409807  543705 memory.go:184] no items to output this cycle
I0323 06:38:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 06:38:38.586255  543705 disk_info.go:125] begin check local disk info of client
I0323 06:38:38.588816  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:38:38.588822  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f840 0xc00046f880]
E0323 06:38:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:38:43.410822  543705 memory.go:191] Add success.
I0323 06:38:43.409839  543705 cpu.go:282] Add success.
I0323 06:38:43.420530  543705 net.go:648] Add success.
I0323 06:38:43.423175  543705 net.go:770] primary dev: ETH0
I0323 06:38:43.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:38:43.423202  543705 net.go:698] Add success.
I0323 06:38:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:38:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:38:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:38:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:38:53.409767  543705 memory.go:184] no items to output this cycle
I0323 06:38:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 06:39:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:39:03.409807  543705 memory.go:184] no items to output this cycle
I0323 06:39:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 06:39:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:39:13.409794  543705 memory.go:191] Add success.
I0323 06:39:13.409803  543705 cpu.go:282] Add success.
W0323 06:39:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:39:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:39:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:39:13.420048  543705 net.go:648] Add success.
I0323 06:39:13.422802  543705 net.go:770] primary dev: ETH0
I0323 06:39:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:39:13.422829  543705 net.go:698] Add success.
I0323 06:39:13.468766  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1809d8b5-9022-48d2-b0dc-0b9b2aeb2af6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:39:13.468801  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:39:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:39:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:39:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 06:39:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:39:14.457083  543705 disk_worker.go:494] system disk:vda1
I0323 06:39:14.457112  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:39:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:39:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:39:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:39:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:39:16.472495  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:39:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:39:23.409785  543705 cpu.go:275] no items to output this cycle
I0323 06:39:23.409786  543705 memory.go:184] no items to output this cycle
E0323 06:39:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:39:33.409785  543705 memory.go:184] no items to output this cycle
I0323 06:39:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 06:39:38.588904  543705 disk_info.go:125] begin check local disk info of client
I0323 06:39:38.591533  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:39:38.591540  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b54c0 0xc0002b5500]
I0323 06:39:40.157722  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:39:40.157729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:39:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:39:43.410587  543705 memory.go:191] Add success.
I0323 06:39:43.409836  543705 cpu.go:282] Add success.
I0323 06:39:43.420344  543705 net.go:648] Add success.
I0323 06:39:43.422996  543705 net.go:770] primary dev: ETH0
I0323 06:39:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:39:43.423026  543705 net.go:698] Add success.
I0323 06:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:39:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:39:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:39:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:39:53.409779  543705 memory.go:184] no items to output this cycle
I0323 06:39:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 06:40:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:40:03.409784  543705 memory.go:184] no items to output this cycle
I0323 06:40:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 06:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:40:13.409810  543705 memory.go:191] Add success.
I0323 06:40:13.409812  543705 cpu.go:282] Add success.
W0323 06:40:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:40:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:40:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:40:13.420156  543705 net.go:648] Add success.
I0323 06:40:13.422867  543705 net.go:770] primary dev: ETH0
I0323 06:40:13.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:40:13.422893  543705 net.go:698] Add success.
I0323 06:40:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:40:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:40:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 06:40:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:40:14.459006  543705 disk_worker.go:494] system disk:vda1
I0323 06:40:14.459037  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:40:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:40:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:40:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:40:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:40:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:40:23.409794  543705 memory.go:184] no items to output this cycle
I0323 06:40:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 06:40:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:40:33.409781  543705 memory.go:184] no items to output this cycle
I0323 06:40:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 06:40:38.591601  543705 disk_info.go:125] begin check local disk info of client
I0323 06:40:38.594211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:40:38.594217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b4580 0xc0004b45c0]
E0323 06:40:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:40:43.410818  543705 memory.go:191] Add success.
I0323 06:40:43.409825  543705 cpu.go:282] Add success.
I0323 06:40:43.420548  543705 net.go:648] Add success.
I0323 06:40:43.423694  543705 net.go:770] primary dev: ETH0
I0323 06:40:43.423709  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:40:43.423722  543705 net.go:698] Add success.
I0323 06:40:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:40:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:40:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:40:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:40:53.409781  543705 memory.go:184] no items to output this cycle
I0323 06:40:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 06:41:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:41:03.409805  543705 memory.go:184] no items to output this cycle
I0323 06:41:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 06:41:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:41:13.409781  543705 memory.go:191] Add success.
I0323 06:41:13.409799  543705 cpu.go:282] Add success.
W0323 06:41:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:41:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:41:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:41:13.420061  543705 net.go:648] Add success.
I0323 06:41:13.423023  543705 net.go:770] primary dev: ETH0
I0323 06:41:13.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:41:13.423050  543705 net.go:698] Add success.
I0323 06:41:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:41:14.455228  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:41:14.455293  543705 disk_worker.go:708] disk space is not compliant
W0323 06:41:14.455296  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:41:14.457203  543705 disk_worker.go:494] system disk:vda1
I0323 06:41:14.457241  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:41:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:41:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:41:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:41:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:41:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:41:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:41:23.409778  543705 cpu.go:275] no items to output this cycle
I0323 06:41:23.409786  543705 memory.go:184] no items to output this cycle
E0323 06:41:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:41:33.409785  543705 memory.go:184] no items to output this cycle
I0323 06:41:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 06:41:38.594616  543705 disk_info.go:125] begin check local disk info of client
I0323 06:41:38.597173  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:41:38.597179  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a480 0xc00034a4c0]
E0323 06:41:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:41:43.410716  543705 memory.go:191] Add success.
I0323 06:41:43.409812  543705 cpu.go:282] Add success.
I0323 06:41:43.420403  543705 net.go:648] Add success.
I0323 06:41:43.423179  543705 net.go:770] primary dev: ETH0
I0323 06:41:43.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:41:43.423205  543705 net.go:698] Add success.
I0323 06:41:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:41:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:41:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:41:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:41:53.409791  543705 cpu.go:275] no items to output this cycle
I0323 06:41:53.409795  543705 memory.go:184] no items to output this cycle
E0323 06:42:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:42:03.409782  543705 cpu.go:275] no items to output this cycle
I0323 06:42:03.409785  543705 memory.go:184] no items to output this cycle
E0323 06:42:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:42:13.409786  543705 memory.go:191] Add success.
I0323 06:42:13.409805  543705 cpu.go:282] Add success.
W0323 06:42:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:42:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:42:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:42:13.420111  543705 net.go:648] Add success.
I0323 06:42:13.422928  543705 net.go:770] primary dev: ETH0
I0323 06:42:13.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:42:13.422958  543705 net.go:698] Add success.
I0323 06:42:13.468522  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9985a580-8803-417e-adad-3ccf87b82aaa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:42:13.468552  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 06:42:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:42:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 06:42:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:42:14.455973  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:42:14.455981  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:42:14.455987  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:42:14.456556  543705 disk_worker.go:494] system disk:vda1
I0323 06:42:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:42:15.456804  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:42:15.456811  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:42:16.457915  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:42:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:42:16.457974  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:42:16.457993  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:42:16.472291  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:42:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:42:23.409771  543705 memory.go:184] no items to output this cycle
I0323 06:42:23.409820  543705 cpu.go:275] no items to output this cycle
E0323 06:42:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:42:33.409786  543705 memory.go:184] no items to output this cycle
I0323 06:42:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 06:42:38.597271  543705 disk_info.go:125] begin check local disk info of client
I0323 06:42:38.599713  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:42:38.599719  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b45c0 0xc0004b4600]
I0323 06:42:40.161497  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:42:40.161503  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:42:43.410834  543705 memory.go:191] Add success.
I0323 06:42:43.409829  543705 cpu.go:282] Add success.
I0323 06:42:43.420509  543705 net.go:648] Add success.
I0323 06:42:43.423708  543705 net.go:770] primary dev: ETH0
I0323 06:42:43.423721  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:42:43.423734  543705 net.go:698] Add success.
I0323 06:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:42:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:42:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:42:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:42:53.409787  543705 memory.go:184] no items to output this cycle
I0323 06:42:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 06:43:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:43:03.409816  543705 memory.go:184] no items to output this cycle
I0323 06:43:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 06:43:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:43:13.409789  543705 memory.go:191] Add success.
W0323 06:43:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 06:43:13.409823  543705 cpu.go:282] Add success.
W0323 06:43:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:43:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:43:13.420163  543705 net.go:648] Add success.
I0323 06:43:13.422866  543705 net.go:770] primary dev: ETH0
I0323 06:43:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:43:13.423051  543705 net.go:698] Add success.
I0323 06:43:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:43:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:43:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 06:43:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:43:14.456503  543705 disk_worker.go:494] system disk:vda1
I0323 06:43:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:43:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:43:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:43:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:43:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:43:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:43:23.409790  543705 cpu.go:275] no items to output this cycle
I0323 06:43:23.409797  543705 memory.go:184] no items to output this cycle
E0323 06:43:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:43:33.409815  543705 memory.go:184] no items to output this cycle
I0323 06:43:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 06:43:38.599807  543705 disk_info.go:125] begin check local disk info of client
I0323 06:43:38.602285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:43:38.602292  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053e0c0 0xc00053e100]
E0323 06:43:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:43:43.410694  543705 memory.go:191] Add success.
I0323 06:43:43.409812  543705 cpu.go:282] Add success.
I0323 06:43:43.420391  543705 net.go:648] Add success.
I0323 06:43:43.422932  543705 net.go:770] primary dev: ETH0
I0323 06:43:43.422947  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:43:43.422959  543705 net.go:698] Add success.
I0323 06:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:43:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:43:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:43:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:43:53.409808  543705 memory.go:184] no items to output this cycle
I0323 06:43:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 06:44:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:44:03.409792  543705 cpu.go:275] no items to output this cycle
I0323 06:44:03.409798  543705 memory.go:184] no items to output this cycle
E0323 06:44:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:44:13.409819  543705 memory.go:191] Add success.
I0323 06:44:13.409825  543705 cpu.go:282] Add success.
W0323 06:44:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:44:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:44:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:44:13.420119  543705 net.go:648] Add success.
I0323 06:44:13.423230  543705 net.go:770] primary dev: ETH0
I0323 06:44:13.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:44:13.423261  543705 net.go:698] Add success.
I0323 06:44:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:44:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:44:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 06:44:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:44:14.456822  543705 disk_worker.go:494] system disk:vda1
I0323 06:44:14.456850  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:44:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:44:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:44:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:44:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:44:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:44:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:44:23.409759  543705 memory.go:184] no items to output this cycle
I0323 06:44:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 06:44:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:44:33.409821  543705 memory.go:184] no items to output this cycle
I0323 06:44:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 06:44:38.602654  543705 disk_info.go:125] begin check local disk info of client
I0323 06:44:38.605195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:44:38.605202  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370840 0xc000370880]
E0323 06:44:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:44:43.410627  543705 memory.go:191] Add success.
I0323 06:44:43.409828  543705 cpu.go:282] Add success.
I0323 06:44:43.420325  543705 net.go:648] Add success.
I0323 06:44:43.422859  543705 net.go:770] primary dev: ETH0
I0323 06:44:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:44:43.422885  543705 net.go:698] Add success.
I0323 06:44:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:44:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:44:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:44:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:44:53.409773  543705 memory.go:184] no items to output this cycle
I0323 06:44:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 06:45:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:45:03.409782  543705 memory.go:184] no items to output this cycle
I0323 06:45:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 06:45:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:45:13.409790  543705 cpu.go:282] Add success.
I0323 06:45:13.409792  543705 memory.go:191] Add success.
W0323 06:45:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:45:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:45:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:45:13.420273  543705 net.go:648] Add success.
I0323 06:45:13.423014  543705 net.go:770] primary dev: ETH0
I0323 06:45:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:45:13.423046  543705 net.go:698] Add success.
I0323 06:45:13.464866  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ad1895c8-1522-438f-b968-7916db31b371","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:45:13.464898  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:45:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:45:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:45:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0323 06:45:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:45:14.456760  543705 disk_worker.go:494] system disk:vda1
I0323 06:45:14.456800  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:45:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:45:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:45:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:45:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:45:16.472435  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:45:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:45:23.409779  543705 cpu.go:275] no items to output this cycle
I0323 06:45:23.409782  543705 memory.go:184] no items to output this cycle
E0323 06:45:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:45:33.409783  543705 memory.go:184] no items to output this cycle
I0323 06:45:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 06:45:38.605679  543705 disk_info.go:125] begin check local disk info of client
I0323 06:45:38.608174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:45:38.608181  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393cc0 0xc000393d00]
I0323 06:45:40.161729  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:45:40.161735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:45:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:45:43.410655  543705 memory.go:191] Add success.
I0323 06:45:43.409825  543705 cpu.go:282] Add success.
I0323 06:45:43.420426  543705 net.go:648] Add success.
I0323 06:45:43.423082  543705 net.go:770] primary dev: ETH0
I0323 06:45:43.423095  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:45:43.423109  543705 net.go:698] Add success.
I0323 06:45:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:45:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:45:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:45:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:45:53.409777  543705 memory.go:184] no items to output this cycle
I0323 06:45:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:46:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:46:03.409802  543705 memory.go:184] no items to output this cycle
I0323 06:46:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 06:46:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:46:13.409781  543705 memory.go:191] Add success.
W0323 06:46:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 06:46:13.409809  543705 cpu.go:282] Add success.
W0323 06:46:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:46:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:46:13.420089  543705 net.go:648] Add success.
I0323 06:46:13.422978  543705 net.go:770] primary dev: ETH0
I0323 06:46:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:46:13.423008  543705 net.go:698] Add success.
I0323 06:46:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:46:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:46:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 06:46:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:46:14.456529  543705 disk_worker.go:494] system disk:vda1
I0323 06:46:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:46:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:46:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:46:16.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:46:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:46:16.472442  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:46:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:46:23.409876  543705 cpu.go:275] no items to output this cycle
I0323 06:46:23.409888  543705 memory.go:184] no items to output this cycle
E0323 06:46:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:46:33.409812  543705 memory.go:184] no items to output this cycle
I0323 06:46:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 06:46:38.608273  543705 disk_info.go:125] begin check local disk info of client
I0323 06:46:38.610833  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:46:38.610840  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 06:46:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:46:43.410805  543705 memory.go:191] Add success.
I0323 06:46:43.409802  543705 cpu.go:282] Add success.
I0323 06:46:43.420575  543705 net.go:648] Add success.
I0323 06:46:43.423100  543705 net.go:770] primary dev: ETH0
I0323 06:46:43.423113  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:46:43.423126  543705 net.go:698] Add success.
I0323 06:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:46:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:46:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:46:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:46:53.409763  543705 memory.go:184] no items to output this cycle
I0323 06:46:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 06:47:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:47:03.409815  543705 memory.go:184] no items to output this cycle
I0323 06:47:03.409832  543705 cpu.go:275] no items to output this cycle
E0323 06:47:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:47:13.409809  543705 memory.go:191] Add success.
I0323 06:47:13.409817  543705 cpu.go:282] Add success.
W0323 06:47:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:47:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:47:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:47:13.420122  543705 net.go:648] Add success.
I0323 06:47:13.422698  543705 net.go:770] primary dev: ETH0
I0323 06:47:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:47:13.422728  543705 net.go:698] Add success.
I0323 06:47:13.453262  543705 event_worker.go:152] Polling the log file for events...
W0323 06:47:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:47:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 06:47:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:47:14.456799  543705 disk_worker.go:494] system disk:vda1
I0323 06:47:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:47:14.457120  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:47:14.457128  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:47:14.457133  543705 custom_config.go:64] query custom config with name: gpu
E0323 06:47:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:47:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:47:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:47:16.457959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:47:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:47:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:47:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:47:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:47:23.409761  543705 memory.go:184] no items to output this cycle
I0323 06:47:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:47:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:47:33.409793  543705 memory.go:184] no items to output this cycle
I0323 06:47:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 06:47:38.610933  543705 disk_info.go:125] begin check local disk info of client
I0323 06:47:38.613594  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:47:38.613602  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 06:47:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:47:43.410750  543705 memory.go:191] Add success.
I0323 06:47:43.409820  543705 cpu.go:282] Add success.
I0323 06:47:43.420539  543705 net.go:648] Add success.
I0323 06:47:43.423374  543705 net.go:770] primary dev: ETH0
I0323 06:47:43.423387  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:47:43.423400  543705 net.go:698] Add success.
I0323 06:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:47:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:47:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:47:53.410198  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:47:53.410213  543705 memory.go:184] no items to output this cycle
I0323 06:47:53.410236  543705 cpu.go:275] no items to output this cycle
E0323 06:48:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:48:03.409782  543705 memory.go:184] no items to output this cycle
I0323 06:48:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 06:48:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:48:13.409810  543705 memory.go:191] Add success.
I0323 06:48:13.409816  543705 cpu.go:282] Add success.
W0323 06:48:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:48:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:48:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:48:13.420109  543705 net.go:648] Add success.
I0323 06:48:13.423041  543705 net.go:770] primary dev: ETH0
I0323 06:48:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:48:13.423068  543705 net.go:698] Add success.
I0323 06:48:13.468558  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7f028851-dc0e-4c8e-b0db-c9816d3b6e46","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:48:13.468593  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:48:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:48:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:48:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0323 06:48:14.455152  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:48:14.456494  543705 disk_worker.go:494] system disk:vda1
I0323 06:48:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:48:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:48:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:48:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:48:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:48:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:48:23.409766  543705 memory.go:184] no items to output this cycle
I0323 06:48:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 06:48:33.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:48:33.409905  543705 cpu.go:275] no items to output this cycle
I0323 06:48:33.409961  543705 memory.go:184] no items to output this cycle
I0323 06:48:38.613675  543705 disk_info.go:125] begin check local disk info of client
I0323 06:48:38.616271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:48:38.616278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c4000 0xc0004c4040]
I0323 06:48:40.165519  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:48:40.165525  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:48:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:48:43.410733  543705 memory.go:191] Add success.
I0323 06:48:43.409819  543705 cpu.go:282] Add success.
I0323 06:48:43.420480  543705 net.go:648] Add success.
I0323 06:48:43.423407  543705 net.go:770] primary dev: ETH0
I0323 06:48:43.423422  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:48:43.423437  543705 net.go:698] Add success.
I0323 06:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:48:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:48:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:48:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:48:53.409772  543705 memory.go:184] no items to output this cycle
I0323 06:48:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 06:49:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:49:03.409787  543705 memory.go:184] no items to output this cycle
I0323 06:49:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 06:49:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:49:13.409800  543705 memory.go:191] Add success.
I0323 06:49:13.409803  543705 cpu.go:282] Add success.
W0323 06:49:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:49:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:49:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:49:13.420164  543705 net.go:648] Add success.
I0323 06:49:13.422829  543705 net.go:770] primary dev: ETH0
I0323 06:49:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:49:13.422853  543705 net.go:698] Add success.
I0323 06:49:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:49:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:49:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 06:49:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:49:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 06:49:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:49:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:49:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:49:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:49:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:49:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:49:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:49:23.409819  543705 memory.go:184] no items to output this cycle
I0323 06:49:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 06:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:49:33.409783  543705 memory.go:184] no items to output this cycle
I0323 06:49:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 06:49:38.616376  543705 disk_info.go:125] begin check local disk info of client
I0323 06:49:38.618950  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:49:38.618958  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000ec0c0 0xc0000ec100]
E0323 06:49:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:49:43.410903  543705 memory.go:191] Add success.
I0323 06:49:43.409820  543705 cpu.go:282] Add success.
I0323 06:49:43.420518  543705 net.go:770] primary dev: ETH0
I0323 06:49:43.420532  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:49:43.420545  543705 net.go:698] Add success.
I0323 06:49:43.420894  543705 net.go:648] Add success.
I0323 06:49:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:49:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:49:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:49:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:49:53.409794  543705 memory.go:184] no items to output this cycle
I0323 06:49:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:50:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:50:03.409786  543705 memory.go:184] no items to output this cycle
I0323 06:50:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 06:50:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:50:13.409788  543705 memory.go:191] Add success.
I0323 06:50:13.409790  543705 cpu.go:282] Add success.
W0323 06:50:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:50:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:50:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:50:13.420331  543705 net.go:648] Add success.
I0323 06:50:13.423095  543705 net.go:770] primary dev: ETH0
I0323 06:50:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:50:13.423119  543705 net.go:698] Add success.
I0323 06:50:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:50:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:50:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 06:50:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:50:14.456615  543705 disk_worker.go:494] system disk:vda1
I0323 06:50:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:50:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:50:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:50:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:50:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:50:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:50:23.409765  543705 memory.go:184] no items to output this cycle
I0323 06:50:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 06:50:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:50:33.409811  543705 memory.go:184] no items to output this cycle
I0323 06:50:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 06:50:38.619732  543705 disk_info.go:125] begin check local disk info of client
I0323 06:50:38.622329  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:50:38.622337  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304800 0xc000304840]
E0323 06:50:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:50:43.410634  543705 memory.go:191] Add success.
I0323 06:50:43.409795  543705 cpu.go:282] Add success.
I0323 06:50:43.420562  543705 net.go:648] Add success.
I0323 06:50:43.423169  543705 net.go:770] primary dev: ETH0
I0323 06:50:43.423185  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:50:43.423198  543705 net.go:698] Add success.
I0323 06:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:50:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:50:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:50:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:50:53.409783  543705 memory.go:184] no items to output this cycle
I0323 06:50:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 06:51:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:51:03.409816  543705 memory.go:184] no items to output this cycle
I0323 06:51:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 06:51:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:51:13.409789  543705 memory.go:191] Add success.
I0323 06:51:13.409798  543705 cpu.go:282] Add success.
W0323 06:51:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:51:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:51:13.420040  543705 net.go:648] Add success.
I0323 06:51:13.422790  543705 net.go:770] primary dev: ETH0
I0323 06:51:13.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:51:13.422814  543705 net.go:698] Add success.
I0323 06:51:13.468313  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ca80015-3267-4fd4-934e-b5815e0b7729","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:51:13.468349  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:51:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:51:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:51:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 06:51:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:51:14.456538  543705 disk_worker.go:494] system disk:vda1
I0323 06:51:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:51:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:51:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:51:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:51:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:51:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:51:23.410379  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:51:23.410386  543705 cpu.go:275] no items to output this cycle
I0323 06:51:23.410393  543705 memory.go:184] no items to output this cycle
E0323 06:51:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:51:33.409791  543705 memory.go:184] no items to output this cycle
I0323 06:51:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 06:51:38.622747  543705 disk_info.go:125] begin check local disk info of client
I0323 06:51:38.625314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:51:38.625320  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b53c0 0xc0004b5400]
I0323 06:51:40.165730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:51:40.165736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:51:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:51:43.410624  543705 memory.go:191] Add success.
I0323 06:51:43.409810  543705 cpu.go:282] Add success.
I0323 06:51:43.420325  543705 net.go:648] Add success.
I0323 06:51:43.422989  543705 net.go:770] primary dev: ETH0
I0323 06:51:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:51:43.423018  543705 net.go:698] Add success.
I0323 06:51:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:51:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:51:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:51:53.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:51:53.409882  543705 memory.go:184] no items to output this cycle
I0323 06:51:53.410038  543705 cpu.go:275] no items to output this cycle
E0323 06:52:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:52:03.409783  543705 memory.go:184] no items to output this cycle
I0323 06:52:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 06:52:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:52:13.409797  543705 memory.go:191] Add success.
I0323 06:52:13.409814  543705 cpu.go:282] Add success.
W0323 06:52:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:52:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:52:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:52:13.420551  543705 net.go:648] Add success.
I0323 06:52:13.423445  543705 net.go:770] primary dev: ETH0
I0323 06:52:13.423458  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:52:13.423471  543705 net.go:698] Add success.
W0323 06:52:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:52:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 06:52:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:52:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:52:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:52:14.455937  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:52:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 06:52:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:52:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:52:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:52:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 06:52:16.457960  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:52:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:52:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:52:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:52:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:52:23.409798  543705 memory.go:184] no items to output this cycle
I0323 06:52:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 06:52:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:52:33.409781  543705 memory.go:184] no items to output this cycle
I0323 06:52:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 06:52:38.625675  543705 disk_info.go:125] begin check local disk info of client
I0323 06:52:38.628212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:52:38.628218  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bb80 0xc00007bbc0]
E0323 06:52:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:52:43.410623  543705 memory.go:191] Add success.
I0323 06:52:43.409812  543705 cpu.go:282] Add success.
I0323 06:52:43.420319  543705 net.go:648] Add success.
I0323 06:52:43.423006  543705 net.go:770] primary dev: ETH0
I0323 06:52:43.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:52:43.423032  543705 net.go:698] Add success.
I0323 06:52:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:52:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:52:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:52:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:52:53.409770  543705 memory.go:184] no items to output this cycle
I0323 06:52:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 06:53:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:53:03.409810  543705 memory.go:184] no items to output this cycle
I0323 06:53:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 06:53:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:53:13.409786  543705 memory.go:191] Add success.
W0323 06:53:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 06:53:13.409812  543705 cpu.go:282] Add success.
W0323 06:53:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:53:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:53:13.420179  543705 net.go:648] Add success.
I0323 06:53:13.422949  543705 net.go:770] primary dev: ETH0
I0323 06:53:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:53:13.422973  543705 net.go:698] Add success.
I0323 06:53:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:53:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:53:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 06:53:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:53:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 06:53:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:53:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:53:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:53:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:53:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:53:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:53:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:53:23.409763  543705 memory.go:184] no items to output this cycle
I0323 06:53:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 06:53:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:53:33.409808  543705 memory.go:184] no items to output this cycle
I0323 06:53:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 06:53:38.628312  543705 disk_info.go:125] begin check local disk info of client
I0323 06:53:38.630879  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:53:38.630888  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5f40 0xc0002bc000]
E0323 06:53:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:53:43.410559  543705 memory.go:191] Add success.
I0323 06:53:43.409824  543705 cpu.go:282] Add success.
I0323 06:53:43.420268  543705 net.go:648] Add success.
I0323 06:53:43.422766  543705 net.go:770] primary dev: ETH0
I0323 06:53:43.422778  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:53:43.422791  543705 net.go:698] Add success.
I0323 06:53:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:53:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:53:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:53:53.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:53:53.409872  543705 memory.go:184] no items to output this cycle
I0323 06:53:53.409926  543705 cpu.go:275] no items to output this cycle
E0323 06:54:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:54:03.409780  543705 memory.go:184] no items to output this cycle
I0323 06:54:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 06:54:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:54:13.409815  543705 memory.go:191] Add success.
I0323 06:54:13.409819  543705 cpu.go:282] Add success.
W0323 06:54:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:54:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:54:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:54:13.420132  543705 net.go:648] Add success.
I0323 06:54:13.422753  543705 net.go:770] primary dev: ETH0
I0323 06:54:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:54:13.422778  543705 net.go:698] Add success.
I0323 06:54:13.587885  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9daa3ef3-8bec-4f23-91ff-3b943469ca93","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:54:13.587920  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 06:54:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:54:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:54:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 06:54:14.455224  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:54:14.456778  543705 disk_worker.go:494] system disk:vda1
I0323 06:54:14.456809  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:54:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:54:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:54:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:54:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:54:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:54:23.409766  543705 memory.go:184] no items to output this cycle
I0323 06:54:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 06:54:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:54:33.409777  543705 memory.go:184] no items to output this cycle
I0323 06:54:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 06:54:38.631803  543705 disk_info.go:125] begin check local disk info of client
I0323 06:54:38.634404  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:54:38.634411  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c52c0 0xc0004c5300]
I0323 06:54:40.166787  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:54:40.166793  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:54:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:54:43.410677  543705 memory.go:191] Add success.
I0323 06:54:43.409814  543705 cpu.go:282] Add success.
I0323 06:54:43.420471  543705 net.go:648] Add success.
I0323 06:54:43.423269  543705 net.go:770] primary dev: ETH0
I0323 06:54:43.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:54:43.423311  543705 net.go:698] Add success.
I0323 06:54:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:54:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:54:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:54:53.410668  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:54:53.410685  543705 memory.go:184] no items to output this cycle
I0323 06:54:53.410689  543705 cpu.go:275] no items to output this cycle
E0323 06:55:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:55:03.409794  543705 memory.go:184] no items to output this cycle
I0323 06:55:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 06:55:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:55:13.409792  543705 memory.go:191] Add success.
I0323 06:55:13.409794  543705 cpu.go:282] Add success.
W0323 06:55:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:55:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:55:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:55:13.420067  543705 net.go:648] Add success.
I0323 06:55:13.422565  543705 net.go:770] primary dev: ETH0
I0323 06:55:13.422579  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:55:13.422591  543705 net.go:698] Add success.
I0323 06:55:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:55:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:55:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 06:55:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:55:14.456501  543705 disk_worker.go:494] system disk:vda1
I0323 06:55:14.456547  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:55:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:55:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:55:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:55:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:55:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:55:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:55:23.409765  543705 memory.go:184] no items to output this cycle
I0323 06:55:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 06:55:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:55:33.409774  543705 memory.go:184] no items to output this cycle
I0323 06:55:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 06:55:38.634815  543705 disk_info.go:125] begin check local disk info of client
I0323 06:55:38.637351  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:55:38.637358  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab640 0xc0001ab680]
E0323 06:55:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:55:43.410666  543705 memory.go:191] Add success.
I0323 06:55:43.409797  543705 cpu.go:282] Add success.
I0323 06:55:43.420375  543705 net.go:648] Add success.
I0323 06:55:43.422902  543705 net.go:770] primary dev: ETH0
I0323 06:55:43.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:55:43.422928  543705 net.go:698] Add success.
I0323 06:55:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:55:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:55:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:55:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:55:53.409773  543705 memory.go:184] no items to output this cycle
I0323 06:55:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 06:56:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:56:03.409779  543705 memory.go:184] no items to output this cycle
I0323 06:56:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 06:56:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:56:13.409790  543705 memory.go:191] Add success.
I0323 06:56:13.409809  543705 cpu.go:282] Add success.
W0323 06:56:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:56:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:56:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:56:13.420097  543705 net.go:648] Add success.
I0323 06:56:13.422774  543705 net.go:770] primary dev: ETH0
I0323 06:56:13.422787  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:56:13.422798  543705 net.go:698] Add success.
I0323 06:56:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:56:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:56:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 06:56:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:56:14.456603  543705 disk_worker.go:494] system disk:vda1
I0323 06:56:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:56:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:56:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:56:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:56:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:56:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:56:23.409778  543705 memory.go:184] no items to output this cycle
I0323 06:56:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 06:56:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:56:33.409774  543705 memory.go:184] no items to output this cycle
I0323 06:56:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 06:56:38.637674  543705 disk_info.go:125] begin check local disk info of client
I0323 06:56:38.640220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:56:38.640226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b0c0 0xc00047b100]
E0323 06:56:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:56:43.410883  543705 memory.go:191] Add success.
I0323 06:56:43.409813  543705 cpu.go:282] Add success.
I0323 06:56:43.420589  543705 net.go:648] Add success.
I0323 06:56:43.423550  543705 net.go:770] primary dev: ETH0
I0323 06:56:43.423564  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:56:43.423576  543705 net.go:698] Add success.
I0323 06:56:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:56:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:56:53.409785  543705 memory.go:184] no items to output this cycle
I0323 06:56:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 06:57:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:57:03.409779  543705 memory.go:184] no items to output this cycle
I0323 06:57:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 06:57:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:57:13.409906  543705 memory.go:191] Add success.
I0323 06:57:13.409927  543705 cpu.go:282] Add success.
W0323 06:57:13.409941  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:57:13.409954  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:57:13.409958  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:57:13.419727  543705 net.go:648] Add success.
I0323 06:57:13.422513  543705 net.go:770] primary dev: ETH0
I0323 06:57:13.422536  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:57:13.422548  543705 net.go:698] Add success.
I0323 06:57:13.428666  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 06:57:13.452790  543705 event_worker.go:152] Polling the log file for events...
I0323 06:57:13.467843  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"909125a5-5d08-4e14-8bfe-8011dc0d88b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 06:57:13.467875  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 06:57:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:57:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0323 06:57:14.455156  543705 disk_worker.go:728] disk inode is not compliant
E0323 06:57:14.456981  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 06:57:14.456990  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 06:57:14.456995  543705 custom_config.go:64] query custom config with name: gpu
I0323 06:57:14.456998  543705 disk_worker.go:494] system disk:vda1
I0323 06:57:14.457031  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 06:57:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 06:57:15.456856  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 06:57:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 06:57:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:57:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:57:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:57:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:57:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:57:23.409794  543705 memory.go:184] no items to output this cycle
I0323 06:57:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 06:57:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:57:33.409776  543705 memory.go:184] no items to output this cycle
I0323 06:57:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 06:57:38.640844  543705 disk_info.go:125] begin check local disk info of client
I0323 06:57:38.643418  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:57:38.643424  543705 disk_info.go:196] parse disk info done, disk is : [0xc000463c80 0xc000463cc0]
I0323 06:57:40.169731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 06:57:40.169737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 06:57:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:57:43.410949  543705 memory.go:191] Add success.
I0323 06:57:43.409815  543705 cpu.go:282] Add success.
I0323 06:57:43.420665  543705 net.go:648] Add success.
I0323 06:57:43.423519  543705 net.go:770] primary dev: ETH0
I0323 06:57:43.423534  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:57:43.423549  543705 net.go:698] Add success.
I0323 06:57:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:57:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:57:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:57:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:57:53.409771  543705 memory.go:184] no items to output this cycle
I0323 06:57:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 06:58:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:58:03.409775  543705 memory.go:184] no items to output this cycle
I0323 06:58:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 06:58:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:58:13.409797  543705 memory.go:191] Add success.
I0323 06:58:13.409796  543705 cpu.go:282] Add success.
W0323 06:58:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:58:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:58:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:58:13.420527  543705 net.go:648] Add success.
I0323 06:58:13.423219  543705 net.go:770] primary dev: ETH0
I0323 06:58:13.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:58:13.423244  543705 net.go:698] Add success.
I0323 06:58:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:58:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:58:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 06:58:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:58:14.456550  543705 disk_worker.go:494] system disk:vda1
I0323 06:58:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:58:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:58:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:58:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:58:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:58:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:58:23.409793  543705 memory.go:184] no items to output this cycle
I0323 06:58:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 06:58:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:58:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 06:58:33.409799  543705 memory.go:184] no items to output this cycle
I0323 06:58:38.643860  543705 disk_info.go:125] begin check local disk info of client
I0323 06:58:38.646423  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:58:38.646429  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae00 0xc00007ae40]
E0323 06:58:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:58:43.410802  543705 memory.go:191] Add success.
I0323 06:58:43.409813  543705 cpu.go:282] Add success.
I0323 06:58:43.420518  543705 net.go:648] Add success.
I0323 06:58:43.423302  543705 net.go:770] primary dev: ETH0
I0323 06:58:43.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:58:43.423329  543705 net.go:698] Add success.
I0323 06:58:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:58:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:58:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:58:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:58:53.409776  543705 memory.go:184] no items to output this cycle
I0323 06:58:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 06:59:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:59:03.409802  543705 memory.go:184] no items to output this cycle
I0323 06:59:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 06:59:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:59:13.409793  543705 memory.go:191] Add success.
I0323 06:59:13.409817  543705 cpu.go:282] Add success.
W0323 06:59:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 06:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 06:59:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 06:59:13.419739  543705 net.go:648] Add success.
I0323 06:59:13.422453  543705 net.go:770] primary dev: ETH0
I0323 06:59:13.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:59:13.422478  543705 net.go:698] Add success.
I0323 06:59:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 06:59:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 06:59:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 06:59:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 06:59:14.456525  543705 disk_worker.go:494] system disk:vda1
I0323 06:59:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 06:59:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 06:59:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:59:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:59:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 06:59:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 06:59:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:59:23.409799  543705 memory.go:184] no items to output this cycle
I0323 06:59:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 06:59:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:59:33.409783  543705 memory.go:184] no items to output this cycle
I0323 06:59:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 06:59:38.646873  543705 disk_info.go:125] begin check local disk info of client
I0323 06:59:38.649475  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 06:59:38.649482  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047a340 0xc00047a380]
E0323 06:59:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:59:43.410738  543705 memory.go:191] Add success.
I0323 06:59:43.409820  543705 cpu.go:282] Add success.
I0323 06:59:43.420420  543705 net.go:648] Add success.
I0323 06:59:43.423138  543705 net.go:770] primary dev: ETH0
I0323 06:59:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0323 06:59:43.423164  543705 net.go:698] Add success.
I0323 06:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 06:59:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 06:59:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 06:59:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 06:59:53.409788  543705 memory.go:184] no items to output this cycle
I0323 06:59:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:00:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:00:03.409793  543705 memory.go:184] no items to output this cycle
I0323 07:00:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:00:13.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:00:13.409911  543705 memory.go:191] Add success.
W0323 07:00:13.409938  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:00:13.409949  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:00:13.409953  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:00:13.410004  543705 cpu.go:282] Add success.
I0323 07:00:13.419708  543705 net.go:648] Add success.
I0323 07:00:13.422785  543705 net.go:770] primary dev: ETH0
I0323 07:00:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:00:13.422808  543705 net.go:698] Add success.
I0323 07:00:13.464196  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1e1def7d-2511-4898-837c-6fb97e5f5cfe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:00:13.464227  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:00:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:00:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:00:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 07:00:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:00:14.456490  543705 disk_worker.go:494] system disk:vda1
I0323 07:00:14.456533  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:00:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:00:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:00:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:00:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:00:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:00:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:00:23.409814  543705 memory.go:184] no items to output this cycle
I0323 07:00:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 07:00:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:00:33.409793  543705 memory.go:184] no items to output this cycle
I0323 07:00:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 07:00:38.649696  543705 disk_info.go:125] begin check local disk info of client
I0323 07:00:38.652264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:00:38.652272  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0323 07:00:40.169869  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:00:40.169875  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:00:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:00:43.410736  543705 memory.go:191] Add success.
I0323 07:00:43.409802  543705 cpu.go:282] Add success.
I0323 07:00:43.420430  543705 net.go:648] Add success.
I0323 07:00:43.423107  543705 net.go:770] primary dev: ETH0
I0323 07:00:43.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:00:43.423132  543705 net.go:698] Add success.
I0323 07:00:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:00:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:00:53.409786  543705 memory.go:184] no items to output this cycle
I0323 07:00:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 07:01:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:01:03.409790  543705 memory.go:184] no items to output this cycle
I0323 07:01:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 07:01:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:01:13.409775  543705 memory.go:191] Add success.
W0323 07:01:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:01:13.409816  543705 cpu.go:282] Add success.
W0323 07:01:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:01:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:01:13.420231  543705 net.go:648] Add success.
I0323 07:01:13.422844  543705 net.go:770] primary dev: ETH0
I0323 07:01:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:01:13.422868  543705 net.go:698] Add success.
I0323 07:01:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:01:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:01:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 07:01:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:01:14.456560  543705 disk_worker.go:494] system disk:vda1
I0323 07:01:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:01:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:01:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:01:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:01:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:01:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:01:23.409786  543705 memory.go:184] no items to output this cycle
I0323 07:01:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 07:01:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:01:33.409825  543705 memory.go:184] no items to output this cycle
I0323 07:01:33.409836  543705 cpu.go:275] no items to output this cycle
I0323 07:01:38.652356  543705 disk_info.go:125] begin check local disk info of client
I0323 07:01:38.654934  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:01:38.654940  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b740 0xc00047b780]
E0323 07:01:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:01:43.410702  543705 memory.go:191] Add success.
I0323 07:01:43.409796  543705 cpu.go:282] Add success.
I0323 07:01:43.420427  543705 net.go:648] Add success.
I0323 07:01:43.423149  543705 net.go:770] primary dev: ETH0
I0323 07:01:43.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:01:43.423175  543705 net.go:698] Add success.
I0323 07:01:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:01:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:01:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:01:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:01:53.409793  543705 memory.go:184] no items to output this cycle
I0323 07:01:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 07:02:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:02:03.409773  543705 memory.go:184] no items to output this cycle
I0323 07:02:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:02:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:02:13.409808  543705 memory.go:191] Add success.
I0323 07:02:13.409815  543705 cpu.go:282] Add success.
W0323 07:02:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:02:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:02:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:02:13.420321  543705 net.go:648] Add success.
I0323 07:02:13.423208  543705 net.go:770] primary dev: ETH0
I0323 07:02:13.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:02:13.423233  543705 net.go:698] Add success.
W0323 07:02:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:02:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 07:02:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:02:14.456894  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:02:14.456903  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:02:14.456909  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:02:14.456982  543705 disk_worker.go:494] system disk:vda1
I0323 07:02:14.457022  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:02:15.456793  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:02:15.456802  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:02:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:02:16.457931  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:02:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:02:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:02:16.472341  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:02:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:02:23.409784  543705 memory.go:184] no items to output this cycle
I0323 07:02:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 07:02:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:02:33.409779  543705 memory.go:184] no items to output this cycle
I0323 07:02:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 07:02:38.655024  543705 disk_info.go:125] begin check local disk info of client
I0323 07:02:38.657613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:02:38.657620  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004fd240 0xc0004fd280]
E0323 07:02:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:02:43.410742  543705 memory.go:191] Add success.
I0323 07:02:43.409804  543705 cpu.go:282] Add success.
I0323 07:02:43.420487  543705 net.go:648] Add success.
I0323 07:02:43.423164  543705 net.go:770] primary dev: ETH0
I0323 07:02:43.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:02:43.423189  543705 net.go:698] Add success.
I0323 07:02:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:02:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:02:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:02:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:02:53.409783  543705 cpu.go:275] no items to output this cycle
I0323 07:02:53.409792  543705 memory.go:184] no items to output this cycle
E0323 07:03:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:03:03.409786  543705 memory.go:184] no items to output this cycle
I0323 07:03:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 07:03:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:03:13.409798  543705 memory.go:191] Add success.
I0323 07:03:13.409801  543705 cpu.go:282] Add success.
W0323 07:03:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:03:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:03:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:03:13.420179  543705 net.go:648] Add success.
I0323 07:03:13.422815  543705 net.go:770] primary dev: ETH0
I0323 07:03:13.422828  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:03:13.422840  543705 net.go:698] Add success.
I0323 07:03:13.470273  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d60a518f-5373-4615-95ab-10cb1d4e186e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:03:13.470306  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:03:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:03:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:03:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 07:03:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:03:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 07:03:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:03:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:03:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:03:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:03:16.458045  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:03:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:03:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:03:23.409805  543705 memory.go:184] no items to output this cycle
I0323 07:03:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 07:03:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:03:33.409773  543705 memory.go:184] no items to output this cycle
I0323 07:03:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 07:03:38.657678  543705 disk_info.go:125] begin check local disk info of client
I0323 07:03:38.660251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:03:38.660258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000521940 0xc000521980]
I0323 07:03:40.173549  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:03:40.173556  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:03:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:03:43.410685  543705 memory.go:191] Add success.
I0323 07:03:43.409806  543705 cpu.go:282] Add success.
I0323 07:03:43.420389  543705 net.go:648] Add success.
I0323 07:03:43.423189  543705 net.go:770] primary dev: ETH0
I0323 07:03:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:03:43.423214  543705 net.go:698] Add success.
I0323 07:03:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:03:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:03:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:03:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:03:53.409772  543705 memory.go:184] no items to output this cycle
I0323 07:03:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 07:04:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:04:03.409782  543705 memory.go:184] no items to output this cycle
I0323 07:04:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 07:04:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:04:13.409779  543705 memory.go:191] Add success.
I0323 07:04:13.409805  543705 cpu.go:282] Add success.
W0323 07:04:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:04:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:04:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:04:13.420108  543705 net.go:648] Add success.
I0323 07:04:13.422914  543705 net.go:770] primary dev: ETH0
I0323 07:04:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:04:13.422939  543705 net.go:698] Add success.
I0323 07:04:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:04:14.455434  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:04:14.455448  543705 disk_worker.go:708] disk space is not compliant
W0323 07:04:14.455450  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:04:14.457532  543705 disk_worker.go:494] system disk:vda1
I0323 07:04:14.457572  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:04:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:04:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:04:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:04:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:04:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:04:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:04:23.409801  543705 memory.go:184] no items to output this cycle
I0323 07:04:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 07:04:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:04:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 07:04:33.409801  543705 memory.go:184] no items to output this cycle
I0323 07:04:38.660931  543705 disk_info.go:125] begin check local disk info of client
I0323 07:04:38.663558  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:04:38.663563  543705 disk_info.go:196] parse disk info done, disk is : [0xc000499cc0 0xc000499d00]
E0323 07:04:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:04:43.410736  543705 memory.go:191] Add success.
I0323 07:04:43.409813  543705 cpu.go:282] Add success.
I0323 07:04:43.420495  543705 net.go:648] Add success.
I0323 07:04:43.423352  543705 net.go:770] primary dev: ETH0
I0323 07:04:43.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:04:43.423379  543705 net.go:698] Add success.
I0323 07:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:04:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:04:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:04:53.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:04:53.410259  543705 memory.go:184] no items to output this cycle
I0323 07:04:53.410264  543705 cpu.go:275] no items to output this cycle
E0323 07:05:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:05:03.409799  543705 memory.go:184] no items to output this cycle
I0323 07:05:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 07:05:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:05:13.409787  543705 memory.go:191] Add success.
I0323 07:05:13.409789  543705 cpu.go:282] Add success.
W0323 07:05:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:05:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:05:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:05:13.420087  543705 net.go:648] Add success.
I0323 07:05:13.423066  543705 net.go:770] primary dev: ETH0
I0323 07:05:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:05:13.423096  543705 net.go:698] Add success.
I0323 07:05:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:05:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:05:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 07:05:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:05:14.457047  543705 disk_worker.go:494] system disk:vda1
I0323 07:05:14.457076  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:05:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:05:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:05:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:05:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:05:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:05:23.409805  543705 memory.go:184] no items to output this cycle
I0323 07:05:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 07:05:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:05:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 07:05:33.409801  543705 memory.go:184] no items to output this cycle
I0323 07:05:38.663949  543705 disk_info.go:125] begin check local disk info of client
I0323 07:05:38.666862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:05:38.666868  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352440 0xc000352480]
E0323 07:05:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:05:43.410723  543705 memory.go:191] Add success.
I0323 07:05:43.409802  543705 cpu.go:282] Add success.
I0323 07:05:43.420439  543705 net.go:648] Add success.
I0323 07:05:43.423227  543705 net.go:770] primary dev: ETH0
I0323 07:05:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:05:43.423279  543705 net.go:698] Add success.
I0323 07:05:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:05:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:05:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:05:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:05:53.409799  543705 memory.go:184] no items to output this cycle
I0323 07:05:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 07:06:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:06:03.409791  543705 memory.go:184] no items to output this cycle
I0323 07:06:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 07:06:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:06:13.409817  543705 memory.go:191] Add success.
I0323 07:06:13.409826  543705 cpu.go:282] Add success.
W0323 07:06:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:06:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:06:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:06:13.420351  543705 net.go:648] Add success.
I0323 07:06:13.423111  543705 net.go:770] primary dev: ETH0
I0323 07:06:13.423124  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:06:13.423137  543705 net.go:698] Add success.
I0323 07:06:13.463045  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2c2827a8-9b46-47e8-bca5-c8a5716e95a2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:06:13.463080  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:06:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:06:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:06:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 07:06:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:06:14.456990  543705 disk_worker.go:494] system disk:vda1
I0323 07:06:14.457020  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:06:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:06:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:06:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:06:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:06:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:06:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:06:23.409794  543705 memory.go:184] no items to output this cycle
I0323 07:06:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 07:06:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:06:33.409792  543705 memory.go:184] no items to output this cycle
I0323 07:06:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 07:06:38.666953  543705 disk_info.go:125] begin check local disk info of client
I0323 07:06:38.669538  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:06:38.669544  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035d180 0xc00035d1c0]
I0323 07:06:40.173724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:06:40.173729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:06:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:06:43.410629  543705 memory.go:191] Add success.
I0323 07:06:43.409802  543705 cpu.go:282] Add success.
I0323 07:06:43.420307  543705 net.go:648] Add success.
I0323 07:06:43.422773  543705 net.go:770] primary dev: ETH0
I0323 07:06:43.422786  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:06:43.422800  543705 net.go:698] Add success.
I0323 07:06:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:06:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:06:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:06:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:06:53.409776  543705 memory.go:184] no items to output this cycle
I0323 07:06:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 07:07:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:07:03.409809  543705 memory.go:184] no items to output this cycle
I0323 07:07:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 07:07:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:07:13.409780  543705 memory.go:191] Add success.
W0323 07:07:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:07:13.409810  543705 cpu.go:282] Add success.
W0323 07:07:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:07:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:07:13.420138  543705 net.go:648] Add success.
I0323 07:07:13.422810  543705 net.go:770] primary dev: ETH0
I0323 07:07:13.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:07:13.422841  543705 net.go:698] Add success.
I0323 07:07:13.453393  543705 event_worker.go:152] Polling the log file for events...
W0323 07:07:14.455402  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:07:14.455619  543705 disk_worker.go:708] disk space is not compliant
W0323 07:07:14.455625  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:07:14.456275  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:07:14.456284  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:07:14.456290  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:07:14.457215  543705 disk_worker.go:494] system disk:vda1
I0323 07:07:14.457256  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:07:15.456873  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:07:15.456886  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:07:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:07:16.457967  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:07:16.458020  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:07:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:07:16.472537  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:07:23.410670  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:07:23.410686  543705 memory.go:184] no items to output this cycle
I0323 07:07:23.410698  543705 cpu.go:275] no items to output this cycle
E0323 07:07:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:07:33.409810  543705 memory.go:184] no items to output this cycle
I0323 07:07:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 07:07:38.669675  543705 disk_info.go:125] begin check local disk info of client
I0323 07:07:38.672245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:07:38.672251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa340 0xc0001aa380]
E0323 07:07:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:07:43.410881  543705 memory.go:191] Add success.
I0323 07:07:43.409820  543705 cpu.go:282] Add success.
I0323 07:07:43.420609  543705 net.go:648] Add success.
I0323 07:07:43.423261  543705 net.go:770] primary dev: ETH0
I0323 07:07:43.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:07:43.423288  543705 net.go:698] Add success.
I0323 07:07:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:07:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:07:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:07:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:07:53.409797  543705 memory.go:184] no items to output this cycle
I0323 07:07:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 07:08:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:08:03.409778  543705 memory.go:184] no items to output this cycle
I0323 07:08:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 07:08:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:08:13.409814  543705 memory.go:191] Add success.
I0323 07:08:13.409823  543705 cpu.go:282] Add success.
W0323 07:08:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:08:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:08:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:08:13.420144  543705 net.go:648] Add success.
I0323 07:08:13.422753  543705 net.go:770] primary dev: ETH0
I0323 07:08:13.422766  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:08:13.422779  543705 net.go:698] Add success.
I0323 07:08:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:08:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:08:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 07:08:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:08:14.456952  543705 disk_worker.go:494] system disk:vda1
I0323 07:08:14.456983  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:08:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:08:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:08:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:08:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:08:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:08:23.409785  543705 memory.go:184] no items to output this cycle
I0323 07:08:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:08:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:08:33.409807  543705 memory.go:184] no items to output this cycle
I0323 07:08:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 07:08:38.672994  543705 disk_info.go:125] begin check local disk info of client
I0323 07:08:38.675613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:08:38.675620  543705 disk_info.go:196] parse disk info done, disk is : [0xc000469dc0 0xc000469e00]
E0323 07:08:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:08:43.410724  543705 memory.go:191] Add success.
I0323 07:08:43.409809  543705 cpu.go:282] Add success.
I0323 07:08:43.420239  543705 net.go:770] primary dev: ETH0
I0323 07:08:43.420253  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:08:43.420265  543705 net.go:698] Add success.
I0323 07:08:43.420509  543705 net.go:648] Add success.
I0323 07:08:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:08:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:08:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:08:53.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:08:53.410408  543705 memory.go:184] no items to output this cycle
I0323 07:08:53.410426  543705 cpu.go:275] no items to output this cycle
E0323 07:09:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:09:03.409812  543705 memory.go:184] no items to output this cycle
I0323 07:09:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 07:09:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:09:13.409792  543705 memory.go:191] Add success.
I0323 07:09:13.409810  543705 cpu.go:282] Add success.
W0323 07:09:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:09:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:09:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:09:13.420146  543705 net.go:648] Add success.
I0323 07:09:13.422829  543705 net.go:770] primary dev: ETH0
I0323 07:09:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:09:13.422858  543705 net.go:698] Add success.
I0323 07:09:13.468508  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e54ab405-95c1-43c0-a6d5-d9697a060212","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:09:13.468541  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:09:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:09:14.455390  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:09:14.455405  543705 disk_worker.go:708] disk space is not compliant
W0323 07:09:14.455409  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:09:14.457378  543705 disk_worker.go:494] system disk:vda1
I0323 07:09:14.457422  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:09:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:09:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:09:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:09:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:09:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:09:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:09:23.409778  543705 memory.go:184] no items to output this cycle
I0323 07:09:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 07:09:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:09:33.409805  543705 memory.go:184] no items to output this cycle
I0323 07:09:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 07:09:38.676013  543705 disk_info.go:125] begin check local disk info of client
I0323 07:09:38.678852  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:09:38.678858  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
I0323 07:09:40.174784  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:09:40.174790  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:09:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:09:43.410810  543705 memory.go:191] Add success.
I0323 07:09:43.409804  543705 cpu.go:282] Add success.
I0323 07:09:43.420325  543705 net.go:770] primary dev: ETH0
I0323 07:09:43.420338  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:09:43.420350  543705 net.go:698] Add success.
I0323 07:09:43.420600  543705 net.go:648] Add success.
I0323 07:09:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:09:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:09:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:09:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:09:53.409765  543705 memory.go:184] no items to output this cycle
I0323 07:09:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 07:10:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:10:03.409805  543705 memory.go:184] no items to output this cycle
I0323 07:10:03.409813  543705 cpu.go:275] no items to output this cycle
W0323 07:10:13.409710  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:10:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:10:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 07:10:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:10:13.409812  543705 cpu.go:282] Add success.
I0323 07:10:13.409828  543705 memory.go:191] Add success.
I0323 07:10:13.420347  543705 net.go:648] Add success.
I0323 07:10:13.423438  543705 net.go:770] primary dev: ETH0
I0323 07:10:13.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:10:13.423466  543705 net.go:698] Add success.
I0323 07:10:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:10:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:10:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 07:10:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:10:14.456502  543705 disk_worker.go:494] system disk:vda1
I0323 07:10:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:10:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:10:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:10:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:10:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:10:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:10:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:10:23.409800  543705 memory.go:184] no items to output this cycle
I0323 07:10:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 07:10:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:10:33.409776  543705 memory.go:184] no items to output this cycle
I0323 07:10:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 07:10:38.680031  543705 disk_info.go:125] begin check local disk info of client
I0323 07:10:38.682632  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:10:38.682638  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6300 0xc0003b6340]
E0323 07:10:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:10:43.410706  543705 memory.go:191] Add success.
I0323 07:10:43.409789  543705 cpu.go:282] Add success.
I0323 07:10:43.420227  543705 net.go:770] primary dev: ETH0
I0323 07:10:43.420242  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:10:43.420257  543705 net.go:698] Add success.
I0323 07:10:43.420629  543705 net.go:648] Add success.
I0323 07:10:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:10:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:10:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:10:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:10:53.409780  543705 cpu.go:275] no items to output this cycle
I0323 07:10:53.409788  543705 memory.go:184] no items to output this cycle
E0323 07:11:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:11:03.409793  543705 memory.go:184] no items to output this cycle
I0323 07:11:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 07:11:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:11:13.409813  543705 memory.go:191] Add success.
I0323 07:11:13.409826  543705 cpu.go:282] Add success.
W0323 07:11:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:11:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:11:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:11:13.420131  543705 net.go:648] Add success.
I0323 07:11:13.423047  543705 net.go:770] primary dev: ETH0
I0323 07:11:13.423060  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:11:13.423072  543705 net.go:698] Add success.
I0323 07:11:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:11:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:11:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 07:11:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:11:14.456559  543705 disk_worker.go:494] system disk:vda1
I0323 07:11:14.456589  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:11:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:11:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:11:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:11:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:11:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:11:23.409775  543705 memory.go:184] no items to output this cycle
I0323 07:11:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 07:11:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:11:33.409794  543705 memory.go:184] no items to output this cycle
I0323 07:11:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 07:11:38.683063  543705 disk_info.go:125] begin check local disk info of client
I0323 07:11:38.685642  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:11:38.685667  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474080 0xc0004740c0]
E0323 07:11:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:11:43.410740  543705 memory.go:191] Add success.
I0323 07:11:43.409818  543705 cpu.go:282] Add success.
I0323 07:11:43.420254  543705 net.go:770] primary dev: ETH0
I0323 07:11:43.420267  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:11:43.420279  543705 net.go:698] Add success.
I0323 07:11:43.420643  543705 net.go:648] Add success.
I0323 07:11:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:11:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:11:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:11:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:11:53.409791  543705 cpu.go:275] no items to output this cycle
I0323 07:11:53.409802  543705 memory.go:184] no items to output this cycle
E0323 07:12:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:12:03.409788  543705 memory.go:184] no items to output this cycle
I0323 07:12:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 07:12:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:12:13.409810  543705 memory.go:191] Add success.
I0323 07:12:13.409811  543705 cpu.go:282] Add success.
W0323 07:12:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:12:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:12:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:12:13.420349  543705 net.go:648] Add success.
I0323 07:12:13.423146  543705 net.go:770] primary dev: ETH0
I0323 07:12:13.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:12:13.423173  543705 net.go:698] Add success.
I0323 07:12:13.467718  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"70506157-d532-4542-8569-bcb486540f6e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:12:13.467761  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 07:12:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:12:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0323 07:12:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:12:14.456145  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:12:14.456155  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:12:14.456162  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:12:14.456523  543705 disk_worker.go:494] system disk:vda1
I0323 07:12:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:12:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:12:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:12:16.457933  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:12:16.457933  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:12:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:12:16.458006  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:12:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:12:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:12:23.409783  543705 memory.go:184] no items to output this cycle
I0323 07:12:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:12:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:12:33.409794  543705 memory.go:184] no items to output this cycle
I0323 07:12:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 07:12:38.687075  543705 disk_info.go:125] begin check local disk info of client
I0323 07:12:38.689712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:12:38.689719  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
I0323 07:12:40.177720  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:12:40.177735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:12:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:12:43.410657  543705 memory.go:191] Add success.
I0323 07:12:43.409811  543705 cpu.go:282] Add success.
I0323 07:12:43.420372  543705 net.go:648] Add success.
I0323 07:12:43.423386  543705 net.go:770] primary dev: ETH0
I0323 07:12:43.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:12:43.423416  543705 net.go:698] Add success.
I0323 07:12:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:12:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:12:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:12:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:12:53.409805  543705 memory.go:184] no items to output this cycle
I0323 07:12:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 07:13:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:13:03.409819  543705 memory.go:184] no items to output this cycle
I0323 07:13:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 07:13:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:13:13.409797  543705 memory.go:191] Add success.
I0323 07:13:13.409816  543705 cpu.go:282] Add success.
W0323 07:13:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:13:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:13:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:13:13.420229  543705 net.go:648] Add success.
I0323 07:13:13.423024  543705 net.go:770] primary dev: ETH0
I0323 07:13:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:13:13.423050  543705 net.go:698] Add success.
I0323 07:13:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:13:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:13:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 07:13:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:13:14.456599  543705 disk_worker.go:494] system disk:vda1
I0323 07:13:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:13:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:13:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:13:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:13:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:13:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:13:23.409761  543705 memory.go:184] no items to output this cycle
I0323 07:13:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:13:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:13:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 07:13:33.409803  543705 memory.go:184] no items to output this cycle
I0323 07:13:38.689821  543705 disk_info.go:125] begin check local disk info of client
I0323 07:13:38.692440  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:13:38.692448  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c44c0 0xc0004c4500]
E0323 07:13:43.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:13:43.410873  543705 memory.go:191] Add success.
I0323 07:13:43.409811  543705 cpu.go:282] Add success.
I0323 07:13:43.420623  543705 net.go:648] Add success.
I0323 07:13:43.423675  543705 net.go:770] primary dev: ETH0
I0323 07:13:43.423687  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:13:43.423701  543705 net.go:698] Add success.
I0323 07:13:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:13:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:13:53.409775  543705 memory.go:184] no items to output this cycle
I0323 07:13:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 07:14:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:14:03.409785  543705 memory.go:184] no items to output this cycle
I0323 07:14:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 07:14:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:14:13.409830  543705 memory.go:191] Add success.
I0323 07:14:13.409844  543705 cpu.go:282] Add success.
W0323 07:14:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:14:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:14:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:14:13.420169  543705 net.go:648] Add success.
I0323 07:14:13.422874  543705 net.go:770] primary dev: ETH0
I0323 07:14:13.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:14:13.422899  543705 net.go:698] Add success.
I0323 07:14:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:14:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:14:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 07:14:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:14:14.456560  543705 disk_worker.go:494] system disk:vda1
I0323 07:14:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:14:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:14:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:14:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:14:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:14:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:14:23.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:14:23.409903  543705 memory.go:184] no items to output this cycle
I0323 07:14:23.409923  543705 cpu.go:275] no items to output this cycle
E0323 07:14:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:14:33.409789  543705 memory.go:184] no items to output this cycle
I0323 07:14:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 07:14:38.692533  543705 disk_info.go:125] begin check local disk info of client
I0323 07:14:38.695195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:14:38.695203  543705 disk_info.go:196] parse disk info done, disk is : [0xc000574000 0xc000574040]
E0323 07:14:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:14:43.410734  543705 memory.go:191] Add success.
I0323 07:14:43.409805  543705 cpu.go:282] Add success.
I0323 07:14:43.420494  543705 net.go:648] Add success.
I0323 07:14:43.423381  543705 net.go:770] primary dev: ETH0
I0323 07:14:43.423395  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:14:43.423409  543705 net.go:698] Add success.
I0323 07:14:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:14:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:14:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:14:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:14:53.409801  543705 memory.go:184] no items to output this cycle
I0323 07:14:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:15:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:15:03.409783  543705 memory.go:184] no items to output this cycle
I0323 07:15:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:15:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:15:13.409811  543705 memory.go:191] Add success.
I0323 07:15:13.409820  543705 cpu.go:282] Add success.
W0323 07:15:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:15:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:15:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:15:13.420172  543705 net.go:648] Add success.
I0323 07:15:13.423209  543705 net.go:770] primary dev: ETH0
I0323 07:15:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:15:13.423237  543705 net.go:698] Add success.
I0323 07:15:13.463318  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b22e15ba-27e3-40c8-92ac-068c76ad754e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:15:13.463350  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:15:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:15:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:15:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 07:15:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:15:14.456618  543705 disk_worker.go:494] system disk:vda1
I0323 07:15:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:15:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:15:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:15:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:15:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:15:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:15:23.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:15:23.409916  543705 memory.go:184] no items to output this cycle
I0323 07:15:23.410064  543705 cpu.go:275] no items to output this cycle
E0323 07:15:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:15:33.409782  543705 memory.go:184] no items to output this cycle
I0323 07:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 07:15:38.696120  543705 disk_info.go:125] begin check local disk info of client
I0323 07:15:38.698754  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:15:38.698761  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6f40 0xc0003b6f80]
I0323 07:15:40.178780  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:15:40.178786  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:15:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:15:43.410866  543705 memory.go:191] Add success.
I0323 07:15:43.409787  543705 cpu.go:282] Add success.
I0323 07:15:43.420648  543705 net.go:648] Add success.
I0323 07:15:43.423966  543705 net.go:770] primary dev: ETH0
I0323 07:15:43.423979  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:15:43.423992  543705 net.go:698] Add success.
I0323 07:15:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:15:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:15:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:15:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:15:53.409781  543705 memory.go:184] no items to output this cycle
I0323 07:15:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 07:16:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:16:03.409779  543705 memory.go:184] no items to output this cycle
I0323 07:16:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:16:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:16:13.409813  543705 memory.go:191] Add success.
I0323 07:16:13.409822  543705 cpu.go:282] Add success.
W0323 07:16:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:16:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:16:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:16:13.420153  543705 net.go:648] Add success.
I0323 07:16:13.422904  543705 net.go:770] primary dev: ETH0
I0323 07:16:13.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:16:13.422929  543705 net.go:698] Add success.
I0323 07:16:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:16:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:16:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 07:16:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:16:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 07:16:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:16:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:16:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:16:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:16:23.409774  543705 memory.go:184] no items to output this cycle
I0323 07:16:23.409775  543705 cpu.go:275] no items to output this cycle
E0323 07:16:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:16:33.409810  543705 memory.go:184] no items to output this cycle
I0323 07:16:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 07:16:38.700137  543705 disk_info.go:125] begin check local disk info of client
I0323 07:16:38.702928  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:16:38.702934  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f5ac0 0xc0003f5b00]
E0323 07:16:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:16:43.410818  543705 memory.go:191] Add success.
I0323 07:16:43.409815  543705 cpu.go:282] Add success.
I0323 07:16:43.420517  543705 net.go:648] Add success.
I0323 07:16:43.423415  543705 net.go:770] primary dev: ETH0
I0323 07:16:43.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:16:43.423440  543705 net.go:698] Add success.
I0323 07:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:16:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:16:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:16:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:16:53.409786  543705 memory.go:184] no items to output this cycle
I0323 07:16:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 07:17:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:17:03.409786  543705 memory.go:184] no items to output this cycle
I0323 07:17:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:17:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:17:13.409791  543705 cpu.go:282] Add success.
I0323 07:17:13.409794  543705 memory.go:191] Add success.
W0323 07:17:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:17:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:17:13.420249  543705 net.go:648] Add success.
I0323 07:17:13.422827  543705 net.go:770] primary dev: ETH0
I0323 07:17:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:17:13.422852  543705 net.go:698] Add success.
I0323 07:17:13.453431  543705 event_worker.go:152] Polling the log file for events...
W0323 07:17:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:17:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 07:17:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:17:14.457094  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:17:14.457103  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:17:14.457110  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:17:14.457181  543705 disk_worker.go:494] system disk:vda1
I0323 07:17:14.457224  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:17:15.456823  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:17:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:17:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:17:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:17:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:17:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:17:16.472334  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:17:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:17:23.409772  543705 memory.go:184] no items to output this cycle
I0323 07:17:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 07:17:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:17:33.409785  543705 memory.go:184] no items to output this cycle
I0323 07:17:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 07:17:38.704159  543705 disk_info.go:125] begin check local disk info of client
I0323 07:17:38.706747  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:17:38.706754  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c980 0xc00034c9c0]
E0323 07:17:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:17:43.410660  543705 memory.go:191] Add success.
I0323 07:17:43.409795  543705 cpu.go:282] Add success.
I0323 07:17:43.420363  543705 net.go:648] Add success.
I0323 07:17:43.423123  543705 net.go:770] primary dev: ETH0
I0323 07:17:43.423137  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:17:43.423150  543705 net.go:698] Add success.
I0323 07:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:17:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:17:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:17:53.410234  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:17:53.410249  543705 memory.go:184] no items to output this cycle
I0323 07:17:53.410279  543705 cpu.go:275] no items to output this cycle
E0323 07:18:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:18:03.409782  543705 memory.go:184] no items to output this cycle
I0323 07:18:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 07:18:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:18:13.409817  543705 memory.go:191] Add success.
I0323 07:18:13.409823  543705 cpu.go:282] Add success.
W0323 07:18:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:18:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:18:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:18:13.420163  543705 net.go:648] Add success.
I0323 07:18:13.422707  543705 net.go:770] primary dev: ETH0
I0323 07:18:13.422724  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:18:13.422737  543705 net.go:698] Add success.
I0323 07:18:13.468420  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8721ee17-64a3-404a-ad1d-3da802c926c2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:18:13.468456  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:18:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:18:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:18:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 07:18:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:18:14.456668  543705 disk_worker.go:494] system disk:vda1
I0323 07:18:14.456712  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:18:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:18:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:18:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:18:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:18:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:18:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:18:23.409793  543705 memory.go:184] no items to output this cycle
I0323 07:18:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 07:18:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:18:33.409792  543705 memory.go:184] no items to output this cycle
I0323 07:18:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 07:18:38.708170  543705 disk_info.go:125] begin check local disk info of client
I0323 07:18:38.710743  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:18:38.710749  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b63c0 0xc0003b6400]
I0323 07:18:40.181735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:18:40.181741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:18:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:18:43.410763  543705 memory.go:191] Add success.
I0323 07:18:43.409785  543705 cpu.go:282] Add success.
I0323 07:18:43.420529  543705 net.go:648] Add success.
I0323 07:18:43.423501  543705 net.go:770] primary dev: ETH0
I0323 07:18:43.423517  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:18:43.423535  543705 net.go:698] Add success.
I0323 07:18:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:18:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:18:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:18:53.409796  543705 memory.go:184] no items to output this cycle
I0323 07:18:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 07:19:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:19:03.409784  543705 memory.go:184] no items to output this cycle
I0323 07:19:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:19:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:19:13.409816  543705 memory.go:191] Add success.
I0323 07:19:13.409823  543705 cpu.go:282] Add success.
W0323 07:19:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:19:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:19:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:19:13.420333  543705 net.go:648] Add success.
I0323 07:19:13.422929  543705 net.go:770] primary dev: ETH0
I0323 07:19:13.422942  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:19:13.422955  543705 net.go:698] Add success.
I0323 07:19:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:19:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:19:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 07:19:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:19:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 07:19:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:19:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:19:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:19:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:19:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:19:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:19:23.409760  543705 memory.go:184] no items to output this cycle
I0323 07:19:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:19:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:19:33.409809  543705 memory.go:184] no items to output this cycle
I0323 07:19:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 07:19:38.712194  543705 disk_info.go:125] begin check local disk info of client
I0323 07:19:38.714772  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:19:38.714779  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5e80 0xc0004b5f00]
E0323 07:19:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:19:43.410737  543705 memory.go:191] Add success.
I0323 07:19:43.409813  543705 cpu.go:282] Add success.
I0323 07:19:43.420442  543705 net.go:648] Add success.
I0323 07:19:43.423173  543705 net.go:770] primary dev: ETH0
I0323 07:19:43.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:19:43.423199  543705 net.go:698] Add success.
I0323 07:19:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:19:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:19:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:19:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:19:53.409760  543705 memory.go:184] no items to output this cycle
I0323 07:19:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 07:20:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:20:03.409785  543705 memory.go:184] no items to output this cycle
I0323 07:20:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 07:20:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:20:13.409778  543705 memory.go:191] Add success.
W0323 07:20:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:20:13.409806  543705 cpu.go:282] Add success.
W0323 07:20:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:20:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:20:13.420047  543705 net.go:648] Add success.
I0323 07:20:13.422927  543705 net.go:770] primary dev: ETH0
I0323 07:20:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:20:13.422951  543705 net.go:698] Add success.
I0323 07:20:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:20:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:20:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 07:20:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:20:14.456506  543705 disk_worker.go:494] system disk:vda1
I0323 07:20:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:20:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:20:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:20:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:20:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:20:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:20:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:20:23.409798  543705 memory.go:184] no items to output this cycle
I0323 07:20:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 07:20:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:20:33.409788  543705 memory.go:184] no items to output this cycle
I0323 07:20:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 07:20:38.716216  543705 disk_info.go:125] begin check local disk info of client
I0323 07:20:38.718876  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:20:38.718883  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bec80 0xc0003becc0]
E0323 07:20:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:20:43.410744  543705 memory.go:191] Add success.
I0323 07:20:43.409782  543705 cpu.go:282] Add success.
I0323 07:20:43.420434  543705 net.go:648] Add success.
I0323 07:20:43.423934  543705 net.go:770] primary dev: ETH0
I0323 07:20:43.423949  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:20:43.423963  543705 net.go:698] Add success.
I0323 07:20:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:20:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:20:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:20:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:20:53.410277  543705 cpu.go:275] no items to output this cycle
I0323 07:20:53.410279  543705 memory.go:184] no items to output this cycle
E0323 07:21:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:21:03.409782  543705 memory.go:184] no items to output this cycle
I0323 07:21:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 07:21:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:21:13.409810  543705 memory.go:191] Add success.
I0323 07:21:13.409821  543705 cpu.go:282] Add success.
W0323 07:21:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:21:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:21:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:21:13.420110  543705 net.go:648] Add success.
I0323 07:21:13.422890  543705 net.go:770] primary dev: ETH0
I0323 07:21:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:21:13.422915  543705 net.go:698] Add success.
I0323 07:21:13.557343  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"62e5d104-f136-43d5-b5cf-2d5779acd43a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:21:13.557548  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:21:14.453966  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:21:14.455270  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:21:14.455281  543705 disk_worker.go:708] disk space is not compliant
W0323 07:21:14.455284  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:21:14.456804  543705 disk_worker.go:494] system disk:vda1
I0323 07:21:14.456833  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:21:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:21:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:21:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:21:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:21:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:21:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:21:23.409786  543705 memory.go:184] no items to output this cycle
I0323 07:21:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 07:21:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:21:33.409807  543705 memory.go:184] no items to output this cycle
I0323 07:21:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 07:21:38.718970  543705 disk_info.go:125] begin check local disk info of client
I0323 07:21:38.721505  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:21:38.721512  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d3d80 0xc0003d3dc0]
I0323 07:21:40.185694  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:21:40.185700  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:21:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:21:43.410662  543705 memory.go:191] Add success.
I0323 07:21:43.409799  543705 cpu.go:282] Add success.
I0323 07:21:43.420354  543705 net.go:648] Add success.
I0323 07:21:43.422929  543705 net.go:770] primary dev: ETH0
I0323 07:21:43.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:21:43.422958  543705 net.go:698] Add success.
I0323 07:21:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:21:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:21:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:21:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:21:53.409798  543705 memory.go:184] no items to output this cycle
I0323 07:21:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 07:22:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:22:03.409786  543705 memory.go:184] no items to output this cycle
I0323 07:22:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:22:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:22:13.409816  543705 memory.go:191] Add success.
I0323 07:22:13.409820  543705 cpu.go:282] Add success.
W0323 07:22:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:22:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:22:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:22:13.420308  543705 net.go:648] Add success.
I0323 07:22:13.422861  543705 net.go:770] primary dev: ETH0
I0323 07:22:13.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:22:13.422885  543705 net.go:698] Add success.
W0323 07:22:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:22:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 07:22:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:22:14.456923  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:22:14.456932  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:22:14.456938  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:22:14.457011  543705 disk_worker.go:494] system disk:vda1
I0323 07:22:14.457054  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:22:15.456870  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:22:15.456879  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:22:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:22:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:22:16.458011  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:22:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:22:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:22:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:22:23.409775  543705 memory.go:184] no items to output this cycle
I0323 07:22:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 07:22:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:22:33.409792  543705 memory.go:184] no items to output this cycle
I0323 07:22:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 07:22:38.721684  543705 disk_info.go:125] begin check local disk info of client
I0323 07:22:38.724274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:22:38.724281  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b43c0 0xc0004b4400]
E0323 07:22:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:22:43.410735  543705 memory.go:191] Add success.
I0323 07:22:43.409799  543705 cpu.go:282] Add success.
I0323 07:22:43.420412  543705 net.go:648] Add success.
I0323 07:22:43.423111  543705 net.go:770] primary dev: ETH0
I0323 07:22:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:22:43.423137  543705 net.go:698] Add success.
I0323 07:22:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:22:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:22:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:22:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:22:53.409784  543705 cpu.go:275] no items to output this cycle
I0323 07:22:53.409786  543705 memory.go:184] no items to output this cycle
E0323 07:23:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:23:03.409784  543705 memory.go:184] no items to output this cycle
I0323 07:23:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 07:23:13.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:23:13.409913  543705 memory.go:191] Add success.
W0323 07:23:13.409981  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:23:13.410002  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:23:13.410004  543705 cpu.go:282] Add success.
I0323 07:23:13.410006  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:23:13.419737  543705 net.go:648] Add success.
I0323 07:23:13.422701  543705 net.go:770] primary dev: ETH0
I0323 07:23:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:23:13.422740  543705 net.go:698] Add success.
I0323 07:23:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:23:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:23:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 07:23:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:23:14.456522  543705 disk_worker.go:494] system disk:vda1
I0323 07:23:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:23:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:23:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:23:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:23:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:23:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:23:23.409809  543705 memory.go:184] no items to output this cycle
I0323 07:23:23.409819  543705 cpu.go:275] no items to output this cycle
E0323 07:23:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:23:33.409824  543705 memory.go:184] no items to output this cycle
I0323 07:23:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 07:23:38.724364  543705 disk_info.go:125] begin check local disk info of client
I0323 07:23:38.726928  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:23:38.726934  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5500 0xc0000c5540]
E0323 07:23:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:23:43.410620  543705 memory.go:191] Add success.
I0323 07:23:43.409825  543705 cpu.go:282] Add success.
I0323 07:23:43.420316  543705 net.go:648] Add success.
I0323 07:23:43.422779  543705 net.go:770] primary dev: ETH0
I0323 07:23:43.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:23:43.422804  543705 net.go:698] Add success.
I0323 07:23:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:23:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:23:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:23:53.409784  543705 memory.go:184] no items to output this cycle
I0323 07:23:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 07:24:03.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:24:03.409832  543705 memory.go:184] no items to output this cycle
I0323 07:24:03.409843  543705 cpu.go:275] no items to output this cycle
E0323 07:24:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:24:13.409805  543705 memory.go:191] Add success.
I0323 07:24:13.409813  543705 cpu.go:282] Add success.
W0323 07:24:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:24:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:24:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:24:13.420199  543705 net.go:648] Add success.
I0323 07:24:13.422895  543705 net.go:770] primary dev: ETH0
I0323 07:24:13.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:24:13.422920  543705 net.go:698] Add success.
I0323 07:24:13.463468  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c4120924-eb2e-463d-ba70-8d743f9b108d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:24:13.463502  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:24:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:24:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:24:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 07:24:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:24:14.456557  543705 disk_worker.go:494] system disk:vda1
I0323 07:24:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:24:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:24:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:24:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:24:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:24:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:24:23.409768  543705 memory.go:184] no items to output this cycle
I0323 07:24:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 07:24:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:24:33.409821  543705 memory.go:184] no items to output this cycle
I0323 07:24:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 07:24:38.728281  543705 disk_info.go:125] begin check local disk info of client
I0323 07:24:38.730878  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:24:38.730884  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b49c0 0xc0004b4a00]
I0323 07:24:40.186791  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:24:40.186797  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:24:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:24:43.410740  543705 memory.go:191] Add success.
I0323 07:24:43.409813  543705 cpu.go:282] Add success.
I0323 07:24:43.420531  543705 net.go:648] Add success.
I0323 07:24:43.423339  543705 net.go:770] primary dev: ETH0
I0323 07:24:43.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:24:43.423364  543705 net.go:698] Add success.
I0323 07:24:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:24:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:24:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:24:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:24:53.409766  543705 memory.go:184] no items to output this cycle
I0323 07:24:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 07:25:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:25:03.409785  543705 memory.go:184] no items to output this cycle
I0323 07:25:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 07:25:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:25:13.409786  543705 memory.go:191] Add success.
W0323 07:25:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:25:13.409813  543705 cpu.go:282] Add success.
W0323 07:25:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:25:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:25:13.420274  543705 net.go:648] Add success.
I0323 07:25:13.423159  543705 net.go:770] primary dev: ETH0
I0323 07:25:13.423175  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:25:13.423188  543705 net.go:698] Add success.
I0323 07:25:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:25:14.455111  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:25:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 07:25:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:25:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 07:25:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:25:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:25:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:25:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:25:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:25:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:25:23.409780  543705 memory.go:184] no items to output this cycle
I0323 07:25:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 07:25:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:25:33.409787  543705 memory.go:184] no items to output this cycle
I0323 07:25:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 07:25:38.732299  543705 disk_info.go:125] begin check local disk info of client
I0323 07:25:38.734885  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:25:38.734892  543705 disk_info.go:196] parse disk info done, disk is : [0xc000390280 0xc0003902c0]
E0323 07:25:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:25:43.410633  543705 memory.go:191] Add success.
I0323 07:25:43.409794  543705 cpu.go:282] Add success.
I0323 07:25:43.420370  543705 net.go:648] Add success.
I0323 07:25:43.422996  543705 net.go:770] primary dev: ETH0
I0323 07:25:43.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:25:43.423021  543705 net.go:698] Add success.
I0323 07:25:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:25:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:25:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:25:53.409920  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:25:53.409936  543705 memory.go:184] no items to output this cycle
I0323 07:25:53.409937  543705 cpu.go:275] no items to output this cycle
E0323 07:26:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:26:03.409781  543705 memory.go:184] no items to output this cycle
I0323 07:26:03.409791  543705 cpu.go:275] no items to output this cycle
W0323 07:26:13.409706  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:26:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:26:13.409726  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 07:26:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:26:13.409820  543705 memory.go:191] Add success.
I0323 07:26:13.409828  543705 cpu.go:282] Add success.
I0323 07:26:13.420480  543705 net.go:648] Add success.
I0323 07:26:13.423545  543705 net.go:770] primary dev: ETH0
I0323 07:26:13.423560  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:26:13.423575  543705 net.go:698] Add success.
I0323 07:26:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:26:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:26:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 07:26:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:26:14.456509  543705 disk_worker.go:494] system disk:vda1
I0323 07:26:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:26:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:26:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:26:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:26:16.458047  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:26:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:26:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:26:23.409799  543705 memory.go:184] no items to output this cycle
I0323 07:26:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 07:26:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:26:33.409775  543705 memory.go:184] no items to output this cycle
I0323 07:26:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 07:26:38.736311  543705 disk_info.go:125] begin check local disk info of client
I0323 07:26:38.738910  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:26:38.738916  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027ba40 0xc00027ba80]
E0323 07:26:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:26:43.410585  543705 memory.go:191] Add success.
I0323 07:26:43.409801  543705 cpu.go:282] Add success.
I0323 07:26:43.420304  543705 net.go:648] Add success.
I0323 07:26:43.422968  543705 net.go:770] primary dev: ETH0
I0323 07:26:43.422980  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:26:43.422992  543705 net.go:698] Add success.
I0323 07:26:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:26:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:26:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:26:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:26:53.410379  543705 cpu.go:275] no items to output this cycle
I0323 07:26:53.410384  543705 memory.go:184] no items to output this cycle
E0323 07:27:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:27:03.409780  543705 memory.go:184] no items to output this cycle
I0323 07:27:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 07:27:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:27:13.409807  543705 memory.go:191] Add success.
I0323 07:27:13.409815  543705 cpu.go:282] Add success.
W0323 07:27:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:27:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:27:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:27:13.420179  543705 net.go:648] Add success.
I0323 07:27:13.422711  543705 net.go:770] primary dev: ETH0
I0323 07:27:13.422726  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:27:13.422738  543705 net.go:698] Add success.
I0323 07:27:13.428885  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 07:27:13.453075  543705 event_worker.go:152] Polling the log file for events...
I0323 07:27:13.469708  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"14d72150-290c-42d4-856f-02e41df27a59","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:27:13.469742  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 07:27:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:27:14.455146  543705 disk_worker.go:708] disk space is not compliant
W0323 07:27:14.455149  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:27:14.456972  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:27:14.456981  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:27:14.456987  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:27:14.456993  543705 disk_worker.go:494] system disk:vda1
I0323 07:27:14.457025  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:27:15.456789  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:27:15.456798  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:27:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:27:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:27:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:27:16.457984  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:27:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:27:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:27:23.409775  543705 memory.go:184] no items to output this cycle
I0323 07:27:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 07:27:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:27:33.409783  543705 memory.go:184] no items to output this cycle
I0323 07:27:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 07:27:38.740352  543705 disk_info.go:125] begin check local disk info of client
I0323 07:27:38.742921  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:27:38.742928  543705 disk_info.go:196] parse disk info done, disk is : [0xc000354b40 0xc000354b80]
I0323 07:27:40.189728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:27:40.189734  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:27:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:27:43.410626  543705 memory.go:191] Add success.
I0323 07:27:43.409818  543705 cpu.go:282] Add success.
I0323 07:27:43.420400  543705 net.go:648] Add success.
I0323 07:27:43.423340  543705 net.go:770] primary dev: ETH0
I0323 07:27:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:27:43.423364  543705 net.go:698] Add success.
I0323 07:27:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:27:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:27:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:27:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:27:53.409778  543705 memory.go:184] no items to output this cycle
I0323 07:27:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 07:28:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:28:03.409805  543705 memory.go:184] no items to output this cycle
I0323 07:28:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 07:28:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:28:13.409781  543705 memory.go:191] Add success.
I0323 07:28:13.409802  543705 cpu.go:282] Add success.
W0323 07:28:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:28:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:28:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:28:13.420224  543705 net.go:648] Add success.
I0323 07:28:13.423230  543705 net.go:770] primary dev: ETH0
I0323 07:28:13.423244  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:28:13.423257  543705 net.go:698] Add success.
I0323 07:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:28:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:28:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 07:28:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:28:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 07:28:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:28:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:28:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:28:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:28:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:28:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:28:23.409795  543705 memory.go:184] no items to output this cycle
I0323 07:28:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 07:28:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:28:33.409779  543705 memory.go:184] no items to output this cycle
I0323 07:28:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 07:28:38.744352  543705 disk_info.go:125] begin check local disk info of client
I0323 07:28:38.746979  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:28:38.746985  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474900 0xc000474940]
E0323 07:28:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:28:43.410779  543705 memory.go:191] Add success.
I0323 07:28:43.409786  543705 cpu.go:282] Add success.
I0323 07:28:43.419736  543705 net.go:648] Add success.
I0323 07:28:43.422322  543705 net.go:770] primary dev: ETH0
I0323 07:28:43.422335  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:28:43.422347  543705 net.go:698] Add success.
I0323 07:28:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:28:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:28:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:28:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:28:53.409765  543705 memory.go:184] no items to output this cycle
I0323 07:28:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 07:29:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:29:03.409789  543705 memory.go:184] no items to output this cycle
I0323 07:29:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:29:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:29:13.409806  543705 memory.go:191] Add success.
I0323 07:29:13.409816  543705 cpu.go:282] Add success.
W0323 07:29:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:29:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:29:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:29:13.420074  543705 net.go:648] Add success.
I0323 07:29:13.422855  543705 net.go:770] primary dev: ETH0
I0323 07:29:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:29:13.422881  543705 net.go:698] Add success.
I0323 07:29:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:29:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:29:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 07:29:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:29:14.456522  543705 disk_worker.go:494] system disk:vda1
I0323 07:29:14.456566  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:29:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:29:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:29:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:29:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:29:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:29:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:29:23.409780  543705 memory.go:184] no items to output this cycle
I0323 07:29:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 07:29:33.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:29:33.409883  543705 memory.go:184] no items to output this cycle
I0323 07:29:33.409946  543705 cpu.go:275] no items to output this cycle
I0323 07:29:38.747084  543705 disk_info.go:125] begin check local disk info of client
I0323 07:29:38.749635  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:29:38.749641  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0000 0xc0004a0040]
E0323 07:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:29:43.410718  543705 memory.go:191] Add success.
I0323 07:29:43.409818  543705 cpu.go:282] Add success.
I0323 07:29:43.420465  543705 net.go:648] Add success.
I0323 07:29:43.423152  543705 net.go:770] primary dev: ETH0
I0323 07:29:43.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:29:43.423177  543705 net.go:698] Add success.
I0323 07:29:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:29:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:29:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:29:53.409768  543705 memory.go:184] no items to output this cycle
I0323 07:29:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 07:30:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:30:03.409768  543705 memory.go:184] no items to output this cycle
I0323 07:30:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 07:30:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:30:13.409779  543705 memory.go:191] Add success.
I0323 07:30:13.409801  543705 cpu.go:282] Add success.
W0323 07:30:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:30:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:30:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:30:13.420143  543705 net.go:648] Add success.
I0323 07:30:13.422920  543705 net.go:770] primary dev: ETH0
I0323 07:30:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:30:13.422963  543705 net.go:698] Add success.
I0323 07:30:13.468855  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4c3c6398-a158-414e-82ba-c3739a55909c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:30:13.468896  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:30:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:30:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:30:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 07:30:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:30:14.456547  543705 disk_worker.go:494] system disk:vda1
I0323 07:30:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:30:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:30:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:30:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:30:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:30:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:30:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:30:23.409762  543705 memory.go:184] no items to output this cycle
I0323 07:30:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 07:30:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:30:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 07:30:33.409803  543705 memory.go:184] no items to output this cycle
I0323 07:30:38.751393  543705 disk_info.go:125] begin check local disk info of client
I0323 07:30:38.753979  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:30:38.753985  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0323 07:30:40.190792  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:30:40.190797  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:30:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:30:43.410756  543705 memory.go:191] Add success.
I0323 07:30:43.409803  543705 cpu.go:282] Add success.
I0323 07:30:43.420454  543705 net.go:648] Add success.
I0323 07:30:43.423223  543705 net.go:770] primary dev: ETH0
I0323 07:30:43.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:30:43.423249  543705 net.go:698] Add success.
I0323 07:30:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:30:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:30:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:30:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 07:30:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:30:53.409809  543705 memory.go:184] no items to output this cycle
E0323 07:31:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:31:03.409783  543705 memory.go:184] no items to output this cycle
I0323 07:31:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:31:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:31:13.409791  543705 memory.go:191] Add success.
I0323 07:31:13.409793  543705 cpu.go:282] Add success.
W0323 07:31:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:31:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:31:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:31:13.420055  543705 net.go:648] Add success.
I0323 07:31:13.423250  543705 net.go:770] primary dev: ETH0
I0323 07:31:13.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:31:13.423280  543705 net.go:698] Add success.
I0323 07:31:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:31:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:31:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 07:31:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:31:14.456587  543705 disk_worker.go:494] system disk:vda1
I0323 07:31:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:31:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:31:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:31:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:31:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:31:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:31:23.409779  543705 memory.go:184] no items to output this cycle
I0323 07:31:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 07:31:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:31:33.409796  543705 memory.go:184] no items to output this cycle
I0323 07:31:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 07:31:38.754073  543705 disk_info.go:125] begin check local disk info of client
I0323 07:31:38.756684  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:31:38.756691  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c4000 0xc0004c4040]
E0323 07:31:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:31:43.410620  543705 memory.go:191] Add success.
I0323 07:31:43.409817  543705 cpu.go:282] Add success.
I0323 07:31:43.420330  543705 net.go:648] Add success.
I0323 07:31:43.423321  543705 net.go:770] primary dev: ETH0
I0323 07:31:43.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:31:43.423345  543705 net.go:698] Add success.
I0323 07:31:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:31:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:31:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:31:53.410360  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:31:53.410377  543705 memory.go:184] no items to output this cycle
I0323 07:31:53.410394  543705 cpu.go:275] no items to output this cycle
E0323 07:32:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:32:03.409798  543705 memory.go:184] no items to output this cycle
I0323 07:32:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 07:32:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:32:13.409777  543705 memory.go:191] Add success.
I0323 07:32:13.409799  543705 cpu.go:282] Add success.
W0323 07:32:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:32:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:32:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:32:13.420110  543705 net.go:648] Add success.
I0323 07:32:13.422574  543705 net.go:770] primary dev: ETH0
I0323 07:32:13.422586  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:32:13.422598  543705 net.go:698] Add success.
W0323 07:32:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:32:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 07:32:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:32:14.456816  543705 disk_worker.go:494] system disk:vda1
I0323 07:32:14.456855  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:32:14.457111  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:32:14.457119  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:32:14.457124  543705 custom_config.go:64] query custom config with name: gpu
E0323 07:32:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:32:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:32:16.457941  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:32:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:32:16.457997  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:32:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:32:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:32:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:32:23.409794  543705 memory.go:184] no items to output this cycle
I0323 07:32:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 07:32:33.409917  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:32:33.409967  543705 cpu.go:275] no items to output this cycle
I0323 07:32:33.410003  543705 memory.go:184] no items to output this cycle
I0323 07:32:38.757675  543705 disk_info.go:125] begin check local disk info of client
I0323 07:32:38.760270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:32:38.760277  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034a000 0xc00034a040]
E0323 07:32:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:32:43.410662  543705 memory.go:191] Add success.
I0323 07:32:43.409811  543705 cpu.go:282] Add success.
I0323 07:32:43.420409  543705 net.go:648] Add success.
I0323 07:32:43.423169  543705 net.go:770] primary dev: ETH0
I0323 07:32:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:32:43.423195  543705 net.go:698] Add success.
I0323 07:32:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:32:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:32:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:32:53.410245  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:32:53.410274  543705 memory.go:184] no items to output this cycle
I0323 07:32:53.410300  543705 cpu.go:275] no items to output this cycle
E0323 07:33:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:33:03.409788  543705 memory.go:184] no items to output this cycle
I0323 07:33:03.409797  543705 cpu.go:275] no items to output this cycle
W0323 07:33:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:33:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:33:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:33:13.409802  543705 cpu.go:282] Add success.
E0323 07:33:13.409836  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:33:13.409858  543705 memory.go:191] Add success.
I0323 07:33:13.420146  543705 net.go:648] Add success.
I0323 07:33:13.422792  543705 net.go:770] primary dev: ETH0
I0323 07:33:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:33:13.422818  543705 net.go:698] Add success.
I0323 07:33:13.464017  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9e86ad0e-c016-4912-bb7c-0c87e4c51faa","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:33:13.464052  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:33:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:33:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:33:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 07:33:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:33:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 07:33:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:33:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:33:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:33:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:33:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:33:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:33:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:33:23.409794  543705 memory.go:184] no items to output this cycle
I0323 07:33:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 07:33:33.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:33:33.409902  543705 cpu.go:275] no items to output this cycle
I0323 07:33:33.409911  543705 memory.go:184] no items to output this cycle
I0323 07:33:38.761445  543705 disk_info.go:125] begin check local disk info of client
I0323 07:33:38.764033  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:33:38.764040  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6000 0xc0003b6040]
I0323 07:33:40.193731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:33:40.193736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:33:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:33:43.410704  543705 memory.go:191] Add success.
I0323 07:33:43.409815  543705 cpu.go:282] Add success.
I0323 07:33:43.420625  543705 net.go:648] Add success.
I0323 07:33:43.423438  543705 net.go:770] primary dev: ETH0
I0323 07:33:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:33:43.423467  543705 net.go:698] Add success.
I0323 07:33:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:33:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:33:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:33:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:33:53.409769  543705 memory.go:184] no items to output this cycle
I0323 07:33:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 07:34:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:34:03.409782  543705 memory.go:184] no items to output this cycle
I0323 07:34:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 07:34:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:34:13.409821  543705 memory.go:191] Add success.
I0323 07:34:13.409826  543705 cpu.go:282] Add success.
W0323 07:34:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:34:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:34:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:34:13.420294  543705 net.go:648] Add success.
I0323 07:34:13.423063  543705 net.go:770] primary dev: ETH0
I0323 07:34:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:34:13.423088  543705 net.go:698] Add success.
I0323 07:34:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:34:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:34:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 07:34:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:34:14.456512  543705 disk_worker.go:494] system disk:vda1
I0323 07:34:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:34:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:34:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:34:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:34:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:34:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:34:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:34:23.409768  543705 memory.go:184] no items to output this cycle
I0323 07:34:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 07:34:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:34:33.409903  543705 memory.go:184] no items to output this cycle
I0323 07:34:33.409935  543705 cpu.go:275] no items to output this cycle
I0323 07:34:38.764126  543705 disk_info.go:125] begin check local disk info of client
I0323 07:34:38.766705  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:34:38.766711  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352280 0xc0003522c0]
E0323 07:34:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:34:43.410633  543705 memory.go:191] Add success.
I0323 07:34:43.409800  543705 cpu.go:282] Add success.
I0323 07:34:43.420335  543705 net.go:648] Add success.
I0323 07:34:43.422984  543705 net.go:770] primary dev: ETH0
I0323 07:34:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:34:43.423014  543705 net.go:698] Add success.
I0323 07:34:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:34:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:34:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:34:53.410420  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:34:53.410439  543705 memory.go:184] no items to output this cycle
I0323 07:34:53.410448  543705 cpu.go:275] no items to output this cycle
E0323 07:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:35:03.409784  543705 memory.go:184] no items to output this cycle
I0323 07:35:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:35:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:35:13.409790  543705 memory.go:191] Add success.
I0323 07:35:13.409796  543705 cpu.go:282] Add success.
W0323 07:35:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:35:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:35:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:35:13.420090  543705 net.go:648] Add success.
I0323 07:35:13.422635  543705 net.go:770] primary dev: ETH0
I0323 07:35:13.422648  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:35:13.422660  543705 net.go:698] Add success.
I0323 07:35:14.454283  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:35:14.454538  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:35:14.454550  543705 disk_worker.go:708] disk space is not compliant
W0323 07:35:14.454552  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:35:14.455923  543705 disk_worker.go:494] system disk:vda1
I0323 07:35:14.455952  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:35:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:35:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:35:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:35:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:35:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:35:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:35:23.409773  543705 memory.go:184] no items to output this cycle
I0323 07:35:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 07:35:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:35:33.409780  543705 memory.go:184] no items to output this cycle
I0323 07:35:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 07:35:38.766795  543705 disk_info.go:125] begin check local disk info of client
I0323 07:35:38.769402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:35:38.769408  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a4d40 0xc0004a4d80]
E0323 07:35:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:35:43.410915  543705 memory.go:191] Add success.
I0323 07:35:43.409819  543705 cpu.go:282] Add success.
I0323 07:35:43.420630  543705 net.go:648] Add success.
I0323 07:35:43.423432  543705 net.go:770] primary dev: ETH0
I0323 07:35:43.423446  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:35:43.423458  543705 net.go:698] Add success.
I0323 07:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:35:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:35:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:35:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:35:53.409784  543705 memory.go:184] no items to output this cycle
I0323 07:35:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 07:36:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:36:03.409777  543705 memory.go:184] no items to output this cycle
I0323 07:36:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 07:36:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:36:13.409780  543705 memory.go:191] Add success.
W0323 07:36:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:36:13.409808  543705 cpu.go:282] Add success.
W0323 07:36:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:36:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:36:13.419979  543705 net.go:770] primary dev: ETH0
I0323 07:36:13.419991  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:36:13.420003  543705 net.go:698] Add success.
I0323 07:36:13.420347  543705 net.go:648] Add success.
I0323 07:36:13.467995  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"73d9d99b-8720-4c37-be4b-89304577d9ad","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:36:13.468031  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:36:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:36:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:36:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 07:36:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:36:14.456692  543705 disk_worker.go:494] system disk:vda1
I0323 07:36:14.456730  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:36:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:36:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:36:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:36:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:36:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:36:23.409779  543705 cpu.go:275] no items to output this cycle
I0323 07:36:23.409783  543705 memory.go:184] no items to output this cycle
E0323 07:36:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:36:33.409784  543705 memory.go:184] no items to output this cycle
I0323 07:36:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 07:36:38.769678  543705 disk_info.go:125] begin check local disk info of client
I0323 07:36:38.772209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:36:38.772216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000515240 0xc000515280]
I0323 07:36:40.194792  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:36:40.194798  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:36:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:36:43.410651  543705 memory.go:191] Add success.
I0323 07:36:43.409814  543705 cpu.go:282] Add success.
I0323 07:36:43.420387  543705 net.go:648] Add success.
I0323 07:36:43.423236  543705 net.go:770] primary dev: ETH0
I0323 07:36:43.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:36:43.423262  543705 net.go:698] Add success.
I0323 07:36:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:36:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:36:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:36:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:36:53.409767  543705 memory.go:184] no items to output this cycle
I0323 07:36:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 07:37:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:37:03.409818  543705 memory.go:184] no items to output this cycle
I0323 07:37:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 07:37:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:37:13.409781  543705 memory.go:191] Add success.
W0323 07:37:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:37:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:37:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:37:13.409829  543705 cpu.go:282] Add success.
I0323 07:37:13.420053  543705 net.go:648] Add success.
I0323 07:37:13.422985  543705 net.go:770] primary dev: ETH0
I0323 07:37:13.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:37:13.423009  543705 net.go:698] Add success.
I0323 07:37:13.453569  543705 event_worker.go:152] Polling the log file for events...
W0323 07:37:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:37:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 07:37:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:37:14.456768  543705 disk_worker.go:494] system disk:vda1
I0323 07:37:14.456811  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:37:14.457156  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:37:14.457165  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:37:14.457171  543705 custom_config.go:64] query custom config with name: gpu
E0323 07:37:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:37:15.456820  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:37:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:37:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:37:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:37:16.457987  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:37:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:37:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:37:23.409789  543705 memory.go:184] no items to output this cycle
I0323 07:37:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 07:37:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:37:33.409804  543705 memory.go:184] no items to output this cycle
I0323 07:37:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 07:37:38.772305  543705 disk_info.go:125] begin check local disk info of client
I0323 07:37:38.774881  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:37:38.774888  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 07:37:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:37:43.410644  543705 memory.go:191] Add success.
I0323 07:37:43.409819  543705 cpu.go:282] Add success.
I0323 07:37:43.420351  543705 net.go:648] Add success.
I0323 07:37:43.423044  543705 net.go:770] primary dev: ETH0
I0323 07:37:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:37:43.423069  543705 net.go:698] Add success.
I0323 07:37:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:37:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:37:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:37:53.410429  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:37:53.410449  543705 memory.go:184] no items to output this cycle
I0323 07:37:53.410461  543705 cpu.go:275] no items to output this cycle
E0323 07:38:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:38:03.409798  543705 memory.go:184] no items to output this cycle
I0323 07:38:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 07:38:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:38:13.409795  543705 memory.go:191] Add success.
I0323 07:38:13.409811  543705 cpu.go:282] Add success.
W0323 07:38:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:38:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:38:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:38:13.420122  543705 net.go:648] Add success.
I0323 07:38:13.422898  543705 net.go:770] primary dev: ETH0
I0323 07:38:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:38:13.422923  543705 net.go:698] Add success.
I0323 07:38:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:38:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:38:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 07:38:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:38:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 07:38:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:38:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:38:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:38:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:38:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:38:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:38:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:38:23.409783  543705 memory.go:184] no items to output this cycle
I0323 07:38:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 07:38:33.409866  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:38:33.409886  543705 memory.go:184] no items to output this cycle
I0323 07:38:33.409980  543705 cpu.go:275] no items to output this cycle
I0323 07:38:38.774974  543705 disk_info.go:125] begin check local disk info of client
I0323 07:38:38.777587  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:38:38.777594  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 07:38:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:38:43.410862  543705 memory.go:191] Add success.
I0323 07:38:43.409808  543705 cpu.go:282] Add success.
I0323 07:38:43.420540  543705 net.go:648] Add success.
I0323 07:38:43.423707  543705 net.go:770] primary dev: ETH0
I0323 07:38:43.423722  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:38:43.423746  543705 net.go:698] Add success.
I0323 07:38:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:38:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:38:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:38:53.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:38:53.410394  543705 memory.go:184] no items to output this cycle
I0323 07:38:53.410402  543705 cpu.go:275] no items to output this cycle
E0323 07:39:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:39:03.409807  543705 cpu.go:275] no items to output this cycle
I0323 07:39:03.409810  543705 memory.go:184] no items to output this cycle
E0323 07:39:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:39:13.409801  543705 memory.go:191] Add success.
I0323 07:39:13.409804  543705 cpu.go:282] Add success.
W0323 07:39:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:39:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:39:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:39:13.420118  543705 net.go:648] Add success.
I0323 07:39:13.422851  543705 net.go:770] primary dev: ETH0
I0323 07:39:13.422866  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:39:13.422877  543705 net.go:698] Add success.
I0323 07:39:13.551628  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6261c012-2e97-4151-9339-effff0fc1434","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:39:13.551663  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:39:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:39:14.455099  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:39:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 07:39:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:39:14.456519  543705 disk_worker.go:494] system disk:vda1
I0323 07:39:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:39:15.455611  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:39:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:39:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:39:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:39:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:39:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:39:23.409800  543705 memory.go:184] no items to output this cycle
I0323 07:39:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 07:39:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:39:33.409801  543705 memory.go:184] no items to output this cycle
I0323 07:39:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 07:39:38.777681  543705 disk_info.go:125] begin check local disk info of client
I0323 07:39:38.780286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:39:38.780293  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057c3c0 0xc00057c400]
I0323 07:39:40.197728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:39:40.197733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:39:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:39:43.410674  543705 memory.go:191] Add success.
I0323 07:39:43.409808  543705 cpu.go:282] Add success.
I0323 07:39:43.420366  543705 net.go:648] Add success.
I0323 07:39:43.423038  543705 net.go:770] primary dev: ETH0
I0323 07:39:43.423053  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:39:43.423068  543705 net.go:698] Add success.
I0323 07:39:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:39:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:39:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:39:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:39:53.409778  543705 memory.go:184] no items to output this cycle
I0323 07:39:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 07:40:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:40:03.409782  543705 memory.go:184] no items to output this cycle
I0323 07:40:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 07:40:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:40:13.409810  543705 memory.go:191] Add success.
I0323 07:40:13.409832  543705 cpu.go:282] Add success.
W0323 07:40:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:40:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:40:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:40:13.420167  543705 net.go:648] Add success.
I0323 07:40:13.422962  543705 net.go:770] primary dev: ETH0
I0323 07:40:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:40:13.422989  543705 net.go:698] Add success.
I0323 07:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:40:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:40:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 07:40:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:40:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 07:40:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:40:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:40:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:40:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:40:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:40:16.472364  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:40:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:40:23.409903  543705 memory.go:184] no items to output this cycle
I0323 07:40:23.409908  543705 cpu.go:275] no items to output this cycle
E0323 07:40:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:40:33.409773  543705 memory.go:184] no items to output this cycle
I0323 07:40:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 07:40:38.781537  543705 disk_info.go:125] begin check local disk info of client
I0323 07:40:38.784151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:40:38.784157  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa180 0xc0001aa1c0]
E0323 07:40:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:40:43.410598  543705 memory.go:191] Add success.
I0323 07:40:43.409829  543705 cpu.go:282] Add success.
I0323 07:40:43.420361  543705 net.go:648] Add success.
I0323 07:40:43.423052  543705 net.go:770] primary dev: ETH0
I0323 07:40:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:40:43.423080  543705 net.go:698] Add success.
I0323 07:40:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:40:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:40:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:40:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:40:53.409776  543705 memory.go:184] no items to output this cycle
I0323 07:40:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 07:41:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:41:03.409794  543705 memory.go:184] no items to output this cycle
I0323 07:41:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 07:41:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:41:13.409791  543705 memory.go:191] Add success.
I0323 07:41:13.409794  543705 cpu.go:282] Add success.
W0323 07:41:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:41:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:41:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:41:13.420623  543705 net.go:648] Add success.
I0323 07:41:13.423623  543705 net.go:770] primary dev: ETH0
I0323 07:41:13.423637  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:41:13.423649  543705 net.go:698] Add success.
I0323 07:41:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:41:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:41:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 07:41:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:41:14.456546  543705 disk_worker.go:494] system disk:vda1
I0323 07:41:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:41:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:41:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:41:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:41:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:41:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:41:23.409795  543705 memory.go:184] no items to output this cycle
I0323 07:41:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:41:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:41:33.409820  543705 memory.go:184] no items to output this cycle
I0323 07:41:33.409829  543705 cpu.go:275] no items to output this cycle
I0323 07:41:38.784247  543705 disk_info.go:125] begin check local disk info of client
I0323 07:41:38.786819  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:41:38.786826  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 07:41:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:41:43.410706  543705 memory.go:191] Add success.
I0323 07:41:43.409821  543705 cpu.go:282] Add success.
I0323 07:41:43.420406  543705 net.go:648] Add success.
I0323 07:41:43.423182  543705 net.go:770] primary dev: ETH0
I0323 07:41:43.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:41:43.423208  543705 net.go:698] Add success.
I0323 07:41:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:41:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:41:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:41:53.410457  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:41:53.410475  543705 memory.go:184] no items to output this cycle
I0323 07:41:53.410488  543705 cpu.go:275] no items to output this cycle
E0323 07:42:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:42:03.409799  543705 memory.go:184] no items to output this cycle
I0323 07:42:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 07:42:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:42:13.409775  543705 memory.go:191] Add success.
W0323 07:42:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:42:13.409800  543705 cpu.go:282] Add success.
W0323 07:42:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:42:13.409814  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:42:13.420157  543705 net.go:648] Add success.
I0323 07:42:13.422871  543705 net.go:770] primary dev: ETH0
I0323 07:42:13.422884  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:42:13.422897  543705 net.go:698] Add success.
I0323 07:42:13.469642  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"95c48be6-df68-418e-9bb1-24de1a60e190","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:42:13.469686  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 07:42:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:42:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 07:42:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:42:14.457040  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:42:14.457047  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:42:14.457051  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:42:14.457057  543705 disk_worker.go:494] system disk:vda1
I0323 07:42:14.457095  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:42:15.456454  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:42:15.456463  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:42:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:42:16.457920  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:42:16.457983  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:42:16.458003  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:42:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:42:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:42:23.409798  543705 memory.go:184] no items to output this cycle
I0323 07:42:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 07:42:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:42:33.409792  543705 memory.go:184] no items to output this cycle
I0323 07:42:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 07:42:38.788583  543705 disk_info.go:125] begin check local disk info of client
I0323 07:42:38.791220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:42:38.791225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cef00 0xc0003cef40]
I0323 07:42:40.198783  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:42:40.198789  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:42:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:42:43.410616  543705 memory.go:191] Add success.
I0323 07:42:43.409791  543705 cpu.go:282] Add success.
I0323 07:42:43.420335  543705 net.go:648] Add success.
I0323 07:42:43.423202  543705 net.go:770] primary dev: ETH0
I0323 07:42:43.423216  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:42:43.423228  543705 net.go:698] Add success.
I0323 07:42:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:42:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:42:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:42:53.410202  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:42:53.410216  543705 memory.go:184] no items to output this cycle
I0323 07:42:53.410217  543705 cpu.go:275] no items to output this cycle
E0323 07:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:43:03.409789  543705 memory.go:184] no items to output this cycle
I0323 07:43:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 07:43:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:43:13.409795  543705 memory.go:191] Add success.
I0323 07:43:13.409796  543705 cpu.go:282] Add success.
W0323 07:43:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:43:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:43:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:43:13.420166  543705 net.go:648] Add success.
I0323 07:43:13.423072  543705 net.go:770] primary dev: ETH0
I0323 07:43:13.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:43:13.423102  543705 net.go:698] Add success.
I0323 07:43:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:43:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:43:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 07:43:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:43:14.456570  543705 disk_worker.go:494] system disk:vda1
I0323 07:43:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:43:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:43:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:43:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:43:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:43:23.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:43:23.409906  543705 memory.go:184] no items to output this cycle
I0323 07:43:23.409963  543705 cpu.go:275] no items to output this cycle
E0323 07:43:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:43:33.409785  543705 memory.go:184] no items to output this cycle
I0323 07:43:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 07:43:38.791313  543705 disk_info.go:125] begin check local disk info of client
I0323 07:43:38.793863  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:43:38.793870  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 07:43:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:43:43.410740  543705 memory.go:191] Add success.
I0323 07:43:43.409804  543705 cpu.go:282] Add success.
I0323 07:43:43.420444  543705 net.go:648] Add success.
I0323 07:43:43.423547  543705 net.go:770] primary dev: ETH0
I0323 07:43:43.423562  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:43:43.423577  543705 net.go:698] Add success.
I0323 07:43:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:43:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:43:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:43:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:43:53.409768  543705 memory.go:184] no items to output this cycle
I0323 07:43:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 07:44:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:44:03.409803  543705 memory.go:184] no items to output this cycle
I0323 07:44:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 07:44:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:44:13.409818  543705 memory.go:191] Add success.
I0323 07:44:13.409827  543705 cpu.go:282] Add success.
W0323 07:44:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:44:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:44:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:44:13.420270  543705 net.go:648] Add success.
I0323 07:44:13.422978  543705 net.go:770] primary dev: ETH0
I0323 07:44:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:44:13.423020  543705 net.go:698] Add success.
I0323 07:44:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:44:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:44:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 07:44:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:44:14.456603  543705 disk_worker.go:494] system disk:vda1
I0323 07:44:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:44:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:44:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:44:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:44:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:44:23.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:44:23.409893  543705 cpu.go:275] no items to output this cycle
I0323 07:44:23.409898  543705 memory.go:184] no items to output this cycle
E0323 07:44:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:44:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 07:44:33.409807  543705 memory.go:184] no items to output this cycle
I0323 07:44:38.795606  543705 disk_info.go:125] begin check local disk info of client
I0323 07:44:38.798264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:44:38.798270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003539c0 0xc000353a00]
E0323 07:44:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:44:43.410790  543705 memory.go:191] Add success.
I0323 07:44:43.409826  543705 cpu.go:282] Add success.
I0323 07:44:43.420552  543705 net.go:648] Add success.
I0323 07:44:43.423397  543705 net.go:770] primary dev: ETH0
I0323 07:44:43.423409  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:44:43.423422  543705 net.go:698] Add success.
I0323 07:44:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:44:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:44:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:44:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:44:53.409809  543705 memory.go:184] no items to output this cycle
I0323 07:44:53.409818  543705 cpu.go:275] no items to output this cycle
E0323 07:45:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:45:03.409784  543705 memory.go:184] no items to output this cycle
I0323 07:45:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 07:45:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:45:13.409792  543705 memory.go:191] Add success.
I0323 07:45:13.409797  543705 cpu.go:282] Add success.
W0323 07:45:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:45:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:45:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:45:13.420147  543705 net.go:648] Add success.
I0323 07:45:13.423230  543705 net.go:770] primary dev: ETH0
I0323 07:45:13.423249  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:45:13.423263  543705 net.go:698] Add success.
I0323 07:45:13.468325  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"82d4ae2d-9ae6-4e3a-9f68-45c8dc6a01ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:45:13.468364  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:45:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:45:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:45:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 07:45:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:45:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 07:45:14.456611  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:45:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:45:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:45:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:45:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:45:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:45:23.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:45:23.409869  543705 memory.go:184] no items to output this cycle
I0323 07:45:23.409962  543705 cpu.go:275] no items to output this cycle
E0323 07:45:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:45:33.409772  543705 memory.go:184] no items to output this cycle
I0323 07:45:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 07:45:38.798362  543705 disk_info.go:125] begin check local disk info of client
I0323 07:45:38.800821  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:45:38.800828  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
I0323 07:45:40.201723  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:45:40.201728  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:45:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:45:43.410673  543705 memory.go:191] Add success.
I0323 07:45:43.409822  543705 cpu.go:282] Add success.
I0323 07:45:43.420358  543705 net.go:648] Add success.
I0323 07:45:43.423119  543705 net.go:770] primary dev: ETH0
I0323 07:45:43.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:45:43.423155  543705 net.go:698] Add success.
I0323 07:45:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:45:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:45:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:45:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:45:53.409773  543705 memory.go:184] no items to output this cycle
I0323 07:45:53.409777  543705 cpu.go:275] no items to output this cycle
E0323 07:46:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:46:03.409768  543705 memory.go:184] no items to output this cycle
I0323 07:46:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 07:46:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:46:13.409788  543705 memory.go:191] Add success.
I0323 07:46:13.409797  543705 cpu.go:282] Add success.
W0323 07:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:46:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:46:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:46:13.420186  543705 net.go:648] Add success.
I0323 07:46:13.422760  543705 net.go:770] primary dev: ETH0
I0323 07:46:13.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:46:13.422786  543705 net.go:698] Add success.
I0323 07:46:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:46:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:46:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 07:46:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:46:14.456514  543705 disk_worker.go:494] system disk:vda1
I0323 07:46:14.456559  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:46:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:46:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:46:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:46:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:46:23.409767  543705 memory.go:184] no items to output this cycle
I0323 07:46:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 07:46:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:46:33.409782  543705 memory.go:184] no items to output this cycle
I0323 07:46:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 07:46:38.801683  543705 disk_info.go:125] begin check local disk info of client
I0323 07:46:38.804171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:46:38.804191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6000 0xc0003b6040]
E0323 07:46:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:46:43.410546  543705 memory.go:191] Add success.
I0323 07:46:43.409805  543705 cpu.go:282] Add success.
I0323 07:46:43.420253  543705 net.go:648] Add success.
I0323 07:46:43.422992  543705 net.go:770] primary dev: ETH0
I0323 07:46:43.423005  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:46:43.423019  543705 net.go:698] Add success.
I0323 07:46:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:46:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:46:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:46:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:46:53.409763  543705 memory.go:184] no items to output this cycle
I0323 07:46:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 07:47:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:47:03.409815  543705 memory.go:184] no items to output this cycle
I0323 07:47:03.409835  543705 cpu.go:275] no items to output this cycle
E0323 07:47:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:47:13.409812  543705 memory.go:191] Add success.
I0323 07:47:13.409825  543705 cpu.go:282] Add success.
W0323 07:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:47:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:47:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:47:13.420274  543705 net.go:648] Add success.
I0323 07:47:13.423196  543705 net.go:770] primary dev: ETH0
I0323 07:47:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:47:13.423235  543705 net.go:698] Add success.
I0323 07:47:13.452881  543705 event_worker.go:152] Polling the log file for events...
W0323 07:47:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:47:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 07:47:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:47:14.456802  543705 disk_worker.go:494] system disk:vda1
I0323 07:47:14.456840  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:47:14.457098  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:47:14.457106  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:47:14.457111  543705 custom_config.go:64] query custom config with name: gpu
E0323 07:47:15.456827  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:47:15.456835  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:47:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:47:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:47:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:47:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:47:16.472359  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:47:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:47:23.409764  543705 memory.go:184] no items to output this cycle
I0323 07:47:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 07:47:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:47:33.409871  543705 memory.go:184] no items to output this cycle
I0323 07:47:33.409960  543705 cpu.go:275] no items to output this cycle
I0323 07:47:38.805674  543705 disk_info.go:125] begin check local disk info of client
I0323 07:47:38.808189  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:47:38.808195  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 07:47:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:47:43.410599  543705 memory.go:191] Add success.
I0323 07:47:43.409816  543705 cpu.go:282] Add success.
I0323 07:47:43.420303  543705 net.go:648] Add success.
I0323 07:47:43.422983  543705 net.go:770] primary dev: ETH0
I0323 07:47:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:47:43.423007  543705 net.go:698] Add success.
I0323 07:47:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:47:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:47:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:47:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:47:53.409778  543705 memory.go:184] no items to output this cycle
I0323 07:47:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 07:48:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:48:03.409781  543705 memory.go:184] no items to output this cycle
I0323 07:48:03.409783  543705 cpu.go:275] no items to output this cycle
E0323 07:48:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:48:13.409812  543705 memory.go:191] Add success.
I0323 07:48:13.409821  543705 cpu.go:282] Add success.
W0323 07:48:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:48:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:48:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:48:13.420272  543705 net.go:648] Add success.
I0323 07:48:13.423389  543705 net.go:770] primary dev: ETH0
I0323 07:48:13.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:48:13.423414  543705 net.go:698] Add success.
I0323 07:48:13.743343  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1c76a51a-f1d9-4c41-8be3-bf2f64e68286","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:48:13.743388  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:48:14.453977  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:48:14.454150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:48:14.454236  543705 disk_worker.go:708] disk space is not compliant
W0323 07:48:14.454239  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:48:14.455789  543705 disk_worker.go:494] system disk:vda1
I0323 07:48:14.455819  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:48:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:48:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:48:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:48:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:48:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:48:23.409768  543705 memory.go:184] no items to output this cycle
I0323 07:48:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 07:48:33.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:48:33.409891  543705 memory.go:184] no items to output this cycle
I0323 07:48:33.409996  543705 cpu.go:275] no items to output this cycle
I0323 07:48:38.809676  543705 disk_info.go:125] begin check local disk info of client
I0323 07:48:38.812256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:48:38.812263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
I0323 07:48:40.202788  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:48:40.202794  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:48:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:48:43.410751  543705 memory.go:191] Add success.
I0323 07:48:43.409818  543705 cpu.go:282] Add success.
I0323 07:48:43.420524  543705 net.go:648] Add success.
I0323 07:48:43.423190  543705 net.go:770] primary dev: ETH0
I0323 07:48:43.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:48:43.423215  543705 net.go:698] Add success.
I0323 07:48:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:48:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:48:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:48:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:48:53.409783  543705 memory.go:184] no items to output this cycle
I0323 07:48:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 07:49:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:49:03.409791  543705 memory.go:184] no items to output this cycle
I0323 07:49:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 07:49:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:49:13.409793  543705 memory.go:191] Add success.
I0323 07:49:13.409807  543705 cpu.go:282] Add success.
W0323 07:49:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:49:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:49:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:49:13.420065  543705 net.go:648] Add success.
I0323 07:49:13.423010  543705 net.go:770] primary dev: ETH0
I0323 07:49:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:49:13.423035  543705 net.go:698] Add success.
I0323 07:49:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:49:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:49:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 07:49:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:49:14.456559  543705 disk_worker.go:494] system disk:vda1
I0323 07:49:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:49:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:49:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:49:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:49:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:49:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:49:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:49:23.409762  543705 memory.go:184] no items to output this cycle
I0323 07:49:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 07:49:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:49:33.409928  543705 memory.go:184] no items to output this cycle
I0323 07:49:33.409959  543705 cpu.go:275] no items to output this cycle
I0323 07:49:38.813676  543705 disk_info.go:125] begin check local disk info of client
I0323 07:49:38.816256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:49:38.816264  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 07:49:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:49:43.410619  543705 memory.go:191] Add success.
I0323 07:49:43.409804  543705 cpu.go:282] Add success.
I0323 07:49:43.420122  543705 net.go:770] primary dev: ETH0
I0323 07:49:43.420135  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:49:43.420147  543705 net.go:698] Add success.
I0323 07:49:43.420494  543705 net.go:648] Add success.
I0323 07:49:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:49:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:49:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:49:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:49:53.409774  543705 memory.go:184] no items to output this cycle
I0323 07:49:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:50:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:50:03.409801  543705 memory.go:184] no items to output this cycle
I0323 07:50:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 07:50:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:50:13.409778  543705 memory.go:191] Add success.
I0323 07:50:13.409799  543705 cpu.go:282] Add success.
W0323 07:50:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:50:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:50:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:50:13.420103  543705 net.go:648] Add success.
I0323 07:50:13.422849  543705 net.go:770] primary dev: ETH0
I0323 07:50:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:50:13.422878  543705 net.go:698] Add success.
I0323 07:50:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:50:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:50:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 07:50:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:50:14.456531  543705 disk_worker.go:494] system disk:vda1
I0323 07:50:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:50:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:50:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:50:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:50:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:50:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:50:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:50:23.409772  543705 memory.go:184] no items to output this cycle
I0323 07:50:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 07:50:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:50:33.409810  543705 memory.go:184] no items to output this cycle
I0323 07:50:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 07:50:38.817686  543705 disk_info.go:125] begin check local disk info of client
I0323 07:50:38.820220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:50:38.820227  543705 disk_info.go:196] parse disk info done, disk is : [0xc000548940 0xc000548980]
E0323 07:50:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:50:43.410714  543705 memory.go:191] Add success.
I0323 07:50:43.409803  543705 cpu.go:282] Add success.
I0323 07:50:43.420439  543705 net.go:648] Add success.
I0323 07:50:43.423057  543705 net.go:770] primary dev: ETH0
I0323 07:50:43.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:50:43.423086  543705 net.go:698] Add success.
I0323 07:50:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:50:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:50:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:50:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:50:53.409808  543705 memory.go:184] no items to output this cycle
I0323 07:50:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 07:51:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:51:03.409794  543705 memory.go:184] no items to output this cycle
I0323 07:51:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 07:51:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:51:13.409790  543705 memory.go:191] Add success.
I0323 07:51:13.409790  543705 cpu.go:282] Add success.
W0323 07:51:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:51:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:51:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:51:13.420296  543705 net.go:648] Add success.
I0323 07:51:13.423370  543705 net.go:770] primary dev: ETH0
I0323 07:51:13.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:51:13.423397  543705 net.go:698] Add success.
I0323 07:51:13.846211  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fed2f49a-630b-407d-a082-250be338f8a0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:51:13.846244  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:51:14.454304  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:51:14.454512  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:51:14.454525  543705 disk_worker.go:708] disk space is not compliant
W0323 07:51:14.454528  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:51:14.455874  543705 disk_worker.go:494] system disk:vda1
I0323 07:51:14.455930  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:51:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:51:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:51:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:51:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:51:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:51:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:51:23.409765  543705 memory.go:184] no items to output this cycle
I0323 07:51:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 07:51:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:51:33.409891  543705 memory.go:184] no items to output this cycle
I0323 07:51:33.409937  543705 cpu.go:275] no items to output this cycle
I0323 07:51:38.821677  543705 disk_info.go:125] begin check local disk info of client
I0323 07:51:38.824228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:51:38.824234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
I0323 07:51:40.205730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:51:40.205735  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:51:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:51:43.410686  543705 memory.go:191] Add success.
I0323 07:51:43.409815  543705 cpu.go:282] Add success.
I0323 07:51:43.420359  543705 net.go:648] Add success.
I0323 07:51:43.422973  543705 net.go:770] primary dev: ETH0
I0323 07:51:43.422992  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:51:43.423008  543705 net.go:698] Add success.
I0323 07:51:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:51:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:51:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:51:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:51:53.409776  543705 memory.go:184] no items to output this cycle
I0323 07:51:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 07:52:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:52:03.409798  543705 memory.go:184] no items to output this cycle
I0323 07:52:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 07:52:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:52:13.409789  543705 cpu.go:282] Add success.
I0323 07:52:13.409794  543705 memory.go:191] Add success.
W0323 07:52:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:52:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:52:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:52:13.420033  543705 net.go:648] Add success.
I0323 07:52:13.422574  543705 net.go:770] primary dev: ETH0
I0323 07:52:13.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:52:13.422600  543705 net.go:698] Add success.
W0323 07:52:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:52:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 07:52:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:52:14.455877  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:52:14.455885  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:52:14.455891  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:52:14.456617  543705 disk_worker.go:494] system disk:vda1
I0323 07:52:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:52:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:52:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:52:16.457901  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:52:16.457901  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:52:16.457954  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:52:16.457973  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:52:16.472313  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:52:23.409740  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:52:23.409759  543705 memory.go:184] no items to output this cycle
I0323 07:52:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:52:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:52:33.409821  543705 memory.go:184] no items to output this cycle
I0323 07:52:33.409834  543705 cpu.go:275] no items to output this cycle
I0323 07:52:38.825673  543705 disk_info.go:125] begin check local disk info of client
I0323 07:52:38.828194  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:52:38.828201  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cd0c0 0xc0004cd100]
E0323 07:52:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:52:43.410702  543705 memory.go:191] Add success.
I0323 07:52:43.409801  543705 cpu.go:282] Add success.
I0323 07:52:43.420441  543705 net.go:648] Add success.
I0323 07:52:43.423319  543705 net.go:770] primary dev: ETH0
I0323 07:52:43.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:52:43.423345  543705 net.go:698] Add success.
I0323 07:52:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:52:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:52:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:52:53.410244  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:52:53.410264  543705 memory.go:184] no items to output this cycle
I0323 07:52:53.410283  543705 cpu.go:275] no items to output this cycle
E0323 07:53:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:53:03.409813  543705 memory.go:184] no items to output this cycle
I0323 07:53:03.409825  543705 cpu.go:275] no items to output this cycle
E0323 07:53:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:53:13.409808  543705 memory.go:191] Add success.
I0323 07:53:13.409817  543705 cpu.go:282] Add success.
W0323 07:53:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:53:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:53:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:53:13.420122  543705 net.go:648] Add success.
I0323 07:53:13.422830  543705 net.go:770] primary dev: ETH0
I0323 07:53:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:53:13.422857  543705 net.go:698] Add success.
I0323 07:53:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:53:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:53:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 07:53:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:53:14.456481  543705 disk_worker.go:494] system disk:vda1
I0323 07:53:14.456525  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:53:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:53:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:53:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:53:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:53:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:53:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:53:23.409794  543705 memory.go:184] no items to output this cycle
I0323 07:53:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 07:53:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:53:33.409780  543705 memory.go:184] no items to output this cycle
I0323 07:53:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 07:53:38.829676  543705 disk_info.go:125] begin check local disk info of client
I0323 07:53:38.832197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:53:38.832204  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f4340 0xc0003f4380]
E0323 07:53:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:53:43.410699  543705 memory.go:191] Add success.
I0323 07:53:43.409799  543705 cpu.go:282] Add success.
I0323 07:53:43.420364  543705 net.go:648] Add success.
I0323 07:53:43.423952  543705 net.go:770] primary dev: ETH0
I0323 07:53:43.423965  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:53:43.423977  543705 net.go:698] Add success.
I0323 07:53:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:53:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:53:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:53:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:53:53.409782  543705 memory.go:184] no items to output this cycle
I0323 07:53:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 07:54:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:54:03.409772  543705 memory.go:184] no items to output this cycle
I0323 07:54:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 07:54:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:54:13.409813  543705 memory.go:191] Add success.
I0323 07:54:13.409817  543705 cpu.go:282] Add success.
W0323 07:54:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:54:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:54:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:54:13.420184  543705 net.go:648] Add success.
I0323 07:54:13.422848  543705 net.go:770] primary dev: ETH0
I0323 07:54:13.422862  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:54:13.422876  543705 net.go:698] Add success.
I0323 07:54:13.468401  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6673100c-4b31-44e5-9f76-f7f72c1f52ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:54:13.468435  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 07:54:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:54:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:54:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 07:54:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:54:14.456587  543705 disk_worker.go:494] system disk:vda1
I0323 07:54:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:54:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:54:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:54:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:54:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:54:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:54:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:54:23.409792  543705 memory.go:184] no items to output this cycle
I0323 07:54:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 07:54:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:54:33.409790  543705 memory.go:184] no items to output this cycle
I0323 07:54:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 07:54:38.833674  543705 disk_info.go:125] begin check local disk info of client
I0323 07:54:38.836175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:54:38.836181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b78c0 0xc0003b7900]
I0323 07:54:40.207756  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:54:40.207761  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:54:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:54:43.410632  543705 memory.go:191] Add success.
I0323 07:54:43.409901  543705 cpu.go:282] Add success.
I0323 07:54:43.419735  543705 net.go:648] Add success.
I0323 07:54:43.422389  543705 net.go:770] primary dev: ETH0
I0323 07:54:43.422404  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:54:43.422418  543705 net.go:698] Add success.
I0323 07:54:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:54:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:54:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:54:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:54:53.409776  543705 memory.go:184] no items to output this cycle
I0323 07:54:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 07:55:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:55:03.409809  543705 cpu.go:275] no items to output this cycle
I0323 07:55:03.409812  543705 memory.go:184] no items to output this cycle
E0323 07:55:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:55:13.409821  543705 memory.go:191] Add success.
I0323 07:55:13.409828  543705 cpu.go:282] Add success.
W0323 07:55:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:55:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:55:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:55:13.420299  543705 net.go:648] Add success.
I0323 07:55:13.423142  543705 net.go:770] primary dev: ETH0
I0323 07:55:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:55:13.423172  543705 net.go:698] Add success.
I0323 07:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:55:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:55:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 07:55:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:55:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 07:55:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:55:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:55:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:55:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:55:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:55:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:55:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:55:23.409784  543705 memory.go:184] no items to output this cycle
I0323 07:55:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 07:55:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:55:33.409787  543705 memory.go:184] no items to output this cycle
I0323 07:55:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 07:55:38.837672  543705 disk_info.go:125] begin check local disk info of client
I0323 07:55:38.840229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:55:38.840235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b7a80 0xc0003b7ac0]
E0323 07:55:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:55:43.410668  543705 memory.go:191] Add success.
I0323 07:55:43.409806  543705 cpu.go:282] Add success.
I0323 07:55:43.420385  543705 net.go:648] Add success.
I0323 07:55:43.423094  543705 net.go:770] primary dev: ETH0
I0323 07:55:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:55:43.423119  543705 net.go:698] Add success.
I0323 07:55:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:55:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:55:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:55:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:55:53.409779  543705 memory.go:184] no items to output this cycle
I0323 07:55:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 07:56:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:56:03.409801  543705 memory.go:184] no items to output this cycle
I0323 07:56:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 07:56:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:56:13.409828  543705 memory.go:191] Add success.
I0323 07:56:13.409841  543705 cpu.go:282] Add success.
W0323 07:56:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:56:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:56:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:56:13.420285  543705 net.go:648] Add success.
I0323 07:56:13.423292  543705 net.go:770] primary dev: ETH0
I0323 07:56:13.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:56:13.423318  543705 net.go:698] Add success.
I0323 07:56:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:56:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:56:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 07:56:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:56:14.456558  543705 disk_worker.go:494] system disk:vda1
I0323 07:56:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:56:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:56:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:56:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:56:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:56:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:56:23.409787  543705 memory.go:184] no items to output this cycle
I0323 07:56:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 07:56:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:56:33.409793  543705 memory.go:184] no items to output this cycle
I0323 07:56:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 07:56:38.841675  543705 disk_info.go:125] begin check local disk info of client
I0323 07:56:38.844250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:56:38.844256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000355a40 0xc000355a80]
E0323 07:56:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:56:43.410583  543705 memory.go:191] Add success.
I0323 07:56:43.409802  543705 cpu.go:282] Add success.
I0323 07:56:43.420286  543705 net.go:648] Add success.
I0323 07:56:43.423190  543705 net.go:770] primary dev: ETH0
I0323 07:56:43.423205  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:56:43.423218  543705 net.go:698] Add success.
I0323 07:56:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:56:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:56:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:56:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:56:53.409798  543705 memory.go:184] no items to output this cycle
I0323 07:56:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 07:57:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:57:03.409788  543705 memory.go:184] no items to output this cycle
I0323 07:57:03.409916  543705 cpu.go:275] no items to output this cycle
E0323 07:57:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:57:13.409826  543705 memory.go:191] Add success.
I0323 07:57:13.409833  543705 cpu.go:282] Add success.
W0323 07:57:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:57:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:57:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:57:13.420183  543705 net.go:648] Add success.
I0323 07:57:13.423236  543705 net.go:770] primary dev: ETH0
I0323 07:57:13.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:57:13.423260  543705 net.go:698] Add success.
I0323 07:57:13.429233  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 07:57:13.453400  543705 event_worker.go:152] Polling the log file for events...
I0323 07:57:13.463829  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49fadbcf-3302-4f32-a561-b3e3d310fb63","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 07:57:13.463865  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 07:57:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:57:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 07:57:14.455213  543705 disk_worker.go:728] disk inode is not compliant
E0323 07:57:14.455909  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 07:57:14.455919  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 07:57:14.455925  543705 custom_config.go:64] query custom config with name: gpu
I0323 07:57:14.456831  543705 disk_worker.go:494] system disk:vda1
I0323 07:57:14.456862  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 07:57:15.456891  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 07:57:15.456899  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:57:16.457960  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 07:57:16.457969  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 07:57:16.458014  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:57:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:57:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:57:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:57:23.409779  543705 memory.go:184] no items to output this cycle
I0323 07:57:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 07:57:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:57:33.409780  543705 memory.go:184] no items to output this cycle
I0323 07:57:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 07:57:38.845675  543705 disk_info.go:125] begin check local disk info of client
I0323 07:57:38.848237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:57:38.848243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304180 0xc0003041c0]
I0323 07:57:40.209733  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 07:57:40.209740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 07:57:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:57:43.410583  543705 memory.go:191] Add success.
I0323 07:57:43.409807  543705 cpu.go:282] Add success.
I0323 07:57:43.420091  543705 net.go:770] primary dev: ETH0
I0323 07:57:43.420107  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:57:43.420122  543705 net.go:698] Add success.
I0323 07:57:43.420621  543705 net.go:648] Add success.
I0323 07:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:57:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:57:53.410425  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:57:53.410441  543705 memory.go:184] no items to output this cycle
I0323 07:57:53.410447  543705 cpu.go:275] no items to output this cycle
E0323 07:58:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:58:03.409805  543705 memory.go:184] no items to output this cycle
I0323 07:58:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 07:58:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:58:13.409776  543705 memory.go:191] Add success.
W0323 07:58:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 07:58:13.409808  543705 cpu.go:282] Add success.
W0323 07:58:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:58:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:58:13.420127  543705 net.go:648] Add success.
I0323 07:58:13.423019  543705 net.go:770] primary dev: ETH0
I0323 07:58:13.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:58:13.423045  543705 net.go:698] Add success.
I0323 07:58:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:58:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:58:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 07:58:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:58:14.456515  543705 disk_worker.go:494] system disk:vda1
I0323 07:58:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:58:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:58:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:58:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:58:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:58:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:58:23.409768  543705 memory.go:184] no items to output this cycle
I0323 07:58:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 07:58:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:58:33.409785  543705 memory.go:184] no items to output this cycle
I0323 07:58:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 07:58:38.849675  543705 disk_info.go:125] begin check local disk info of client
I0323 07:58:38.852235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:58:38.852242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abc80 0xc0001abcc0]
E0323 07:58:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:58:43.410675  543705 memory.go:191] Add success.
I0323 07:58:43.409811  543705 cpu.go:282] Add success.
I0323 07:58:43.420447  543705 net.go:648] Add success.
I0323 07:58:43.423341  543705 net.go:770] primary dev: ETH0
I0323 07:58:43.423354  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:58:43.423367  543705 net.go:698] Add success.
I0323 07:58:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:58:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:58:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:58:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:58:53.409766  543705 memory.go:184] no items to output this cycle
I0323 07:58:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 07:59:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:59:03.409786  543705 memory.go:184] no items to output this cycle
I0323 07:59:03.409841  543705 cpu.go:275] no items to output this cycle
E0323 07:59:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:59:13.409798  543705 memory.go:191] Add success.
I0323 07:59:13.409823  543705 cpu.go:282] Add success.
W0323 07:59:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 07:59:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 07:59:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 07:59:13.420168  543705 net.go:648] Add success.
I0323 07:59:13.423130  543705 net.go:770] primary dev: ETH0
I0323 07:59:13.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:59:13.423156  543705 net.go:698] Add success.
I0323 07:59:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 07:59:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 07:59:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 07:59:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 07:59:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 07:59:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 07:59:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 07:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:59:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:59:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0323 07:59:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0323 07:59:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:59:23.409816  543705 memory.go:184] no items to output this cycle
I0323 07:59:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 07:59:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:59:33.409811  543705 memory.go:184] no items to output this cycle
I0323 07:59:33.409831  543705 cpu.go:275] no items to output this cycle
I0323 07:59:38.853674  543705 disk_info.go:125] begin check local disk info of client
I0323 07:59:38.856251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 07:59:38.856257  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275ec0 0xc000275f00]
E0323 07:59:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:59:43.410604  543705 memory.go:191] Add success.
I0323 07:59:43.409801  543705 cpu.go:282] Add success.
I0323 07:59:43.420308  543705 net.go:648] Add success.
I0323 07:59:43.422942  543705 net.go:770] primary dev: ETH0
I0323 07:59:43.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0323 07:59:43.422979  543705 net.go:698] Add success.
I0323 07:59:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 07:59:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 07:59:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 07:59:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 07:59:53.409773  543705 memory.go:184] no items to output this cycle
I0323 07:59:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 08:00:03.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:00:03.409827  543705 memory.go:184] no items to output this cycle
I0323 08:00:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 08:00:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:00:13.409787  543705 memory.go:191] Add success.
I0323 08:00:13.409804  543705 cpu.go:282] Add success.
W0323 08:00:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:00:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:00:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:00:13.420323  543705 net.go:648] Add success.
I0323 08:00:13.422843  543705 net.go:770] primary dev: ETH0
I0323 08:00:13.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:00:13.422870  543705 net.go:698] Add success.
I0323 08:00:13.465050  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4b90c333-6be7-4951-b685-d2c0f8307971","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:00:13.465084  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:00:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:00:14.455253  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:00:14.455265  543705 disk_worker.go:708] disk space is not compliant
W0323 08:00:14.455269  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:00:14.456813  543705 disk_worker.go:494] system disk:vda1
I0323 08:00:14.456850  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:00:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:00:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:00:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:00:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:00:23.410392  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:00:23.410408  543705 cpu.go:275] no items to output this cycle
I0323 08:00:23.410409  543705 memory.go:184] no items to output this cycle
E0323 08:00:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:00:33.409805  543705 memory.go:184] no items to output this cycle
I0323 08:00:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 08:00:38.857681  543705 disk_info.go:125] begin check local disk info of client
I0323 08:00:38.860399  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:00:38.860408  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d01c0 0xc0003d0200]
I0323 08:00:40.213756  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:00:40.213765  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:00:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:00:43.410674  543705 memory.go:191] Add success.
I0323 08:00:43.409845  543705 cpu.go:282] Add success.
I0323 08:00:43.420352  543705 net.go:648] Add success.
I0323 08:00:43.422828  543705 net.go:770] primary dev: ETH0
I0323 08:00:43.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:00:43.422855  543705 net.go:698] Add success.
I0323 08:00:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:00:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:00:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:00:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:00:53.409786  543705 cpu.go:275] no items to output this cycle
I0323 08:00:53.409794  543705 memory.go:184] no items to output this cycle
E0323 08:01:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:01:03.409789  543705 memory.go:184] no items to output this cycle
I0323 08:01:03.409903  543705 cpu.go:275] no items to output this cycle
E0323 08:01:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:01:13.409788  543705 memory.go:191] Add success.
I0323 08:01:13.409810  543705 cpu.go:282] Add success.
W0323 08:01:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:01:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:01:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:01:13.420169  543705 net.go:648] Add success.
I0323 08:01:13.422844  543705 net.go:770] primary dev: ETH0
I0323 08:01:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:01:13.422870  543705 net.go:698] Add success.
I0323 08:01:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:01:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:01:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 08:01:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:01:14.456595  543705 disk_worker.go:494] system disk:vda1
I0323 08:01:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:01:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:01:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:01:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:01:16.472389  543705 disk_local_worker.go:436] Get disk info: []
I0323 08:01:23.409908  543705 cpu.go:275] no items to output this cycle
E0323 08:01:23.410052  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:01:23.410071  543705 memory.go:184] no items to output this cycle
E0323 08:01:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:01:33.409780  543705 memory.go:184] no items to output this cycle
I0323 08:01:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 08:01:38.861675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:01:38.864251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:01:38.864258  543705 disk_info.go:196] parse disk info done, disk is : [0xc000262800 0xc000262840]
E0323 08:01:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:01:43.410696  543705 memory.go:191] Add success.
I0323 08:01:43.409784  543705 cpu.go:282] Add success.
I0323 08:01:43.420408  543705 net.go:648] Add success.
I0323 08:01:43.423383  543705 net.go:770] primary dev: ETH0
I0323 08:01:43.423398  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:01:43.423412  543705 net.go:698] Add success.
I0323 08:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:01:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:01:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:01:53.410479  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:01:53.410497  543705 memory.go:184] no items to output this cycle
I0323 08:01:53.410540  543705 cpu.go:275] no items to output this cycle
E0323 08:02:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:02:03.409789  543705 memory.go:184] no items to output this cycle
I0323 08:02:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 08:02:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:02:13.409812  543705 memory.go:191] Add success.
I0323 08:02:13.409822  543705 cpu.go:282] Add success.
W0323 08:02:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:02:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:02:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:02:13.420146  543705 net.go:648] Add success.
I0323 08:02:13.422806  543705 net.go:770] primary dev: ETH0
I0323 08:02:13.422820  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:02:13.422833  543705 net.go:698] Add success.
W0323 08:02:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:02:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 08:02:14.455193  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:02:14.456823  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:02:14.456832  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:02:14.456838  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:02:14.456882  543705 disk_worker.go:494] system disk:vda1
I0323 08:02:14.456924  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:02:15.456850  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:02:15.456860  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:02:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:02:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:02:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:02:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:02:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:02:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:02:23.409799  543705 memory.go:184] no items to output this cycle
I0323 08:02:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 08:02:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:02:33.409777  543705 memory.go:184] no items to output this cycle
I0323 08:02:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 08:02:38.865676  543705 disk_info.go:125] begin check local disk info of client
I0323 08:02:38.868253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:02:38.868259  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460400 0xc000460440]
E0323 08:02:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:02:43.410740  543705 memory.go:191] Add success.
I0323 08:02:43.409785  543705 cpu.go:282] Add success.
I0323 08:02:43.420413  543705 net.go:648] Add success.
I0323 08:02:43.423317  543705 net.go:770] primary dev: ETH0
I0323 08:02:43.423331  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:02:43.423497  543705 net.go:698] Add success.
I0323 08:02:46.458011  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:02:46.458095  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:02:46.458142  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:02:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:02:53.409808  543705 memory.go:184] no items to output this cycle
I0323 08:02:53.409823  543705 cpu.go:275] no items to output this cycle
E0323 08:03:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:03:03.409790  543705 memory.go:184] no items to output this cycle
I0323 08:03:03.409843  543705 cpu.go:275] no items to output this cycle
E0323 08:03:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:03:13.409797  543705 cpu.go:282] Add success.
I0323 08:03:13.409800  543705 memory.go:191] Add success.
W0323 08:03:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:03:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:03:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:03:13.420196  543705 net.go:648] Add success.
I0323 08:03:13.423091  543705 net.go:770] primary dev: ETH0
I0323 08:03:13.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:03:13.423120  543705 net.go:698] Add success.
I0323 08:03:13.469421  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84e16a6f-e9d1-40dc-83ed-3a3169ff3d5b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:03:13.469454  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:03:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:03:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:03:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 08:03:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:03:14.456544  543705 disk_worker.go:494] system disk:vda1
I0323 08:03:14.456598  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:03:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:03:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:03:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:03:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:03:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:03:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:03:23.409774  543705 memory.go:184] no items to output this cycle
I0323 08:03:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 08:03:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:03:33.409793  543705 memory.go:184] no items to output this cycle
I0323 08:03:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 08:03:38.869674  543705 disk_info.go:125] begin check local disk info of client
I0323 08:03:38.872238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:03:38.872244  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024d300 0xc00024d340]
I0323 08:03:40.214784  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:03:40.214789  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:03:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:03:43.410505  543705 memory.go:191] Add success.
I0323 08:03:43.409801  543705 cpu.go:282] Add success.
I0323 08:03:43.420237  543705 net.go:648] Add success.
I0323 08:03:43.422897  543705 net.go:770] primary dev: ETH0
I0323 08:03:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:03:43.422923  543705 net.go:698] Add success.
I0323 08:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:03:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:03:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:03:53.410378  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:03:53.410396  543705 memory.go:184] no items to output this cycle
I0323 08:03:53.410408  543705 cpu.go:275] no items to output this cycle
E0323 08:04:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:04:03.409792  543705 memory.go:184] no items to output this cycle
I0323 08:04:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 08:04:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:04:13.409778  543705 memory.go:191] Add success.
W0323 08:04:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:04:13.409814  543705 cpu.go:282] Add success.
W0323 08:04:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:04:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:04:13.420200  543705 net.go:648] Add success.
I0323 08:04:13.422946  543705 net.go:770] primary dev: ETH0
I0323 08:04:13.422962  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:04:13.422976  543705 net.go:698] Add success.
I0323 08:04:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:04:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:04:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 08:04:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:04:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 08:04:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:04:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:04:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:04:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:04:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:04:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:04:23.409782  543705 memory.go:184] no items to output this cycle
I0323 08:04:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 08:04:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:04:33.409789  543705 memory.go:184] no items to output this cycle
I0323 08:04:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 08:04:38.873673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:04:38.876319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:04:38.876325  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024d100 0xc00024d140]
E0323 08:04:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:04:43.410605  543705 memory.go:191] Add success.
I0323 08:04:43.409815  543705 cpu.go:282] Add success.
I0323 08:04:43.420315  543705 net.go:648] Add success.
I0323 08:04:43.423016  543705 net.go:770] primary dev: ETH0
I0323 08:04:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:04:43.423045  543705 net.go:698] Add success.
I0323 08:04:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:04:46.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:04:46.458112  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:04:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:04:53.409793  543705 memory.go:184] no items to output this cycle
I0323 08:04:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 08:05:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:05:03.409779  543705 memory.go:184] no items to output this cycle
I0323 08:05:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 08:05:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:05:13.409802  543705 memory.go:191] Add success.
I0323 08:05:13.409832  543705 cpu.go:282] Add success.
W0323 08:05:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:05:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:05:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:05:13.420329  543705 net.go:648] Add success.
I0323 08:05:13.423405  543705 net.go:770] primary dev: ETH0
I0323 08:05:13.423420  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:05:13.423434  543705 net.go:698] Add success.
I0323 08:05:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:05:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:05:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0323 08:05:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:05:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 08:05:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:05:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:05:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:05:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:05:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:05:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:05:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:05:23.409786  543705 memory.go:184] no items to output this cycle
I0323 08:05:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 08:05:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:05:33.409778  543705 memory.go:184] no items to output this cycle
I0323 08:05:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 08:05:38.877672  543705 disk_info.go:125] begin check local disk info of client
I0323 08:05:38.880243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:05:38.880250  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b780 0xc00007b7c0]
E0323 08:05:43.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:05:43.411020  543705 memory.go:191] Add success.
I0323 08:05:43.409846  543705 cpu.go:282] Add success.
I0323 08:05:43.419930  543705 net.go:648] Add success.
I0323 08:05:43.422553  543705 net.go:770] primary dev: ETH0
I0323 08:05:43.422566  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:05:43.422579  543705 net.go:698] Add success.
I0323 08:05:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:05:53.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:05:53.409897  543705 memory.go:184] no items to output this cycle
I0323 08:05:53.410011  543705 cpu.go:275] no items to output this cycle
E0323 08:06:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:06:03.409779  543705 memory.go:184] no items to output this cycle
I0323 08:06:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 08:06:13.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:06:13.409868  543705 memory.go:191] Add success.
W0323 08:06:13.409904  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:06:13.409922  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:06:13.409926  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:06:13.410274  543705 cpu.go:282] Add success.
I0323 08:06:13.422687  543705 net.go:648] Add success.
I0323 08:06:13.455465  543705 net.go:770] primary dev: ETH0
I0323 08:06:13.455484  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:06:13.455504  543705 net.go:698] Add success.
I0323 08:06:13.555338  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf983f80-91a0-473c-9804-bac626920242","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:06:13.555382  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 08:06:14.455037  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:06:14.455050  543705 disk_worker.go:708] disk space is not compliant
W0323 08:06:14.455053  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:06:14.455284  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:06:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 08:06:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:06:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:06:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:06:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:06:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:06:16.472472  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:06:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:06:23.409787  543705 memory.go:184] no items to output this cycle
I0323 08:06:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 08:06:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:06:33.409789  543705 memory.go:184] no items to output this cycle
I0323 08:06:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 08:06:38.881675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:06:38.884285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:06:38.884291  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314b00 0xc000314b40]
I0323 08:06:40.217721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:06:40.217727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:06:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:06:43.410749  543705 memory.go:191] Add success.
I0323 08:06:43.409797  543705 cpu.go:282] Add success.
I0323 08:06:43.420482  543705 net.go:648] Add success.
I0323 08:06:43.423244  543705 net.go:770] primary dev: ETH0
I0323 08:06:43.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:06:43.423276  543705 net.go:698] Add success.
I0323 08:06:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:06:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:06:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:06:53.409903  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:06:53.409906  543705 cpu.go:275] no items to output this cycle
I0323 08:06:53.409924  543705 memory.go:184] no items to output this cycle
E0323 08:07:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:07:03.409806  543705 memory.go:184] no items to output this cycle
I0323 08:07:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 08:07:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:07:13.409799  543705 memory.go:191] Add success.
I0323 08:07:13.409801  543705 cpu.go:282] Add success.
W0323 08:07:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:07:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:07:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:07:13.420189  543705 net.go:648] Add success.
I0323 08:07:13.423050  543705 net.go:770] primary dev: ETH0
I0323 08:07:13.423064  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:07:13.423076  543705 net.go:698] Add success.
I0323 08:07:13.453658  543705 event_worker.go:152] Polling the log file for events...
W0323 08:07:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:07:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 08:07:14.455168  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:07:14.456933  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:07:14.456942  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:07:14.456948  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:07:14.456993  543705 disk_worker.go:494] system disk:vda1
I0323 08:07:14.457038  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:07:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:07:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:07:16.457952  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:07:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:07:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:07:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:07:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:07:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:07:23.409782  543705 memory.go:184] no items to output this cycle
I0323 08:07:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 08:07:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:07:33.409771  543705 memory.go:184] no items to output this cycle
I0323 08:07:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 08:07:38.885674  543705 disk_info.go:125] begin check local disk info of client
I0323 08:07:38.888159  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:07:38.888166  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2900 0xc0003b2940]
E0323 08:07:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:07:43.410680  543705 memory.go:191] Add success.
I0323 08:07:43.409799  543705 cpu.go:282] Add success.
I0323 08:07:43.420372  543705 net.go:648] Add success.
I0323 08:07:43.423256  543705 net.go:770] primary dev: ETH0
I0323 08:07:43.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:07:43.423282  543705 net.go:698] Add success.
I0323 08:07:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:07:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:07:53.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:07:53.409873  543705 memory.go:184] no items to output this cycle
I0323 08:07:53.409945  543705 cpu.go:275] no items to output this cycle
E0323 08:08:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:08:03.409803  543705 memory.go:184] no items to output this cycle
I0323 08:08:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 08:08:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:08:13.409789  543705 memory.go:191] Add success.
I0323 08:08:13.409792  543705 cpu.go:282] Add success.
W0323 08:08:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:08:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:08:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:08:13.420251  543705 net.go:648] Add success.
I0323 08:08:13.423373  543705 net.go:770] primary dev: ETH0
I0323 08:08:13.423385  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:08:13.423398  543705 net.go:698] Add success.
I0323 08:08:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:08:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:08:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 08:08:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:08:14.456573  543705 disk_worker.go:494] system disk:vda1
I0323 08:08:14.456604  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:08:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:08:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:08:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:08:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:08:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:08:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:08:23.409768  543705 memory.go:184] no items to output this cycle
I0323 08:08:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 08:08:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:08:33.409779  543705 memory.go:184] no items to output this cycle
I0323 08:08:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 08:08:38.889672  543705 disk_info.go:125] begin check local disk info of client
I0323 08:08:38.892314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:08:38.892321  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028e2c0 0xc00028e300]
E0323 08:08:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:08:43.410540  543705 memory.go:191] Add success.
I0323 08:08:43.409822  543705 cpu.go:282] Add success.
I0323 08:08:43.420251  543705 net.go:648] Add success.
I0323 08:08:43.422897  543705 net.go:770] primary dev: ETH0
I0323 08:08:43.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:08:43.422924  543705 net.go:698] Add success.
I0323 08:08:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:08:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:08:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:08:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:08:53.409803  543705 memory.go:184] no items to output this cycle
I0323 08:08:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 08:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:09:03.409785  543705 memory.go:184] no items to output this cycle
I0323 08:09:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 08:09:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:09:13.409800  543705 memory.go:191] Add success.
I0323 08:09:13.409804  543705 cpu.go:282] Add success.
W0323 08:09:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:09:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:09:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:09:13.420161  543705 net.go:648] Add success.
I0323 08:09:13.423092  543705 net.go:770] primary dev: ETH0
I0323 08:09:13.423105  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:09:13.423123  543705 net.go:698] Add success.
I0323 08:09:13.469679  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2f3f7767-9920-4782-b644-6464c44327f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:09:13.469713  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:09:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:09:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:09:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 08:09:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:09:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 08:09:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:09:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:09:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:09:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:09:16.458093  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:09:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:09:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:09:23.409768  543705 memory.go:184] no items to output this cycle
I0323 08:09:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 08:09:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:09:33.409790  543705 memory.go:184] no items to output this cycle
I0323 08:09:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 08:09:38.893674  543705 disk_info.go:125] begin check local disk info of client
I0323 08:09:38.896267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:09:38.896274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bdd00 0xc0002bdd40]
I0323 08:09:40.218788  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:09:40.218794  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:09:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:09:43.410739  543705 memory.go:191] Add success.
I0323 08:09:43.409798  543705 cpu.go:282] Add success.
I0323 08:09:43.420577  543705 net.go:648] Add success.
I0323 08:09:43.423451  543705 net.go:770] primary dev: ETH0
I0323 08:09:43.423463  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:09:43.423476  543705 net.go:698] Add success.
I0323 08:09:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:09:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:09:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:09:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:09:53.409768  543705 memory.go:184] no items to output this cycle
I0323 08:09:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:10:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:10:03.409804  543705 memory.go:184] no items to output this cycle
I0323 08:10:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 08:10:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:10:13.409792  543705 memory.go:191] Add success.
I0323 08:10:13.409796  543705 cpu.go:282] Add success.
W0323 08:10:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:10:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:10:13.420068  543705 net.go:648] Add success.
I0323 08:10:13.422960  543705 net.go:770] primary dev: ETH0
I0323 08:10:13.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:10:13.422986  543705 net.go:698] Add success.
I0323 08:10:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:10:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:10:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 08:10:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:10:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 08:10:14.456605  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:10:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:10:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:10:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:10:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:10:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:10:23.409779  543705 memory.go:184] no items to output this cycle
I0323 08:10:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 08:10:33.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:10:33.409928  543705 memory.go:184] no items to output this cycle
I0323 08:10:33.409974  543705 cpu.go:275] no items to output this cycle
I0323 08:10:38.897675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:10:38.900229  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:10:38.900236  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002aa3c0 0xc0002aa400]
E0323 08:10:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:10:43.410649  543705 memory.go:191] Add success.
I0323 08:10:43.409802  543705 cpu.go:282] Add success.
I0323 08:10:43.420383  543705 net.go:648] Add success.
I0323 08:10:43.423184  543705 net.go:770] primary dev: ETH0
I0323 08:10:43.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:10:43.423212  543705 net.go:698] Add success.
I0323 08:10:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:10:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:10:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:10:53.410269  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:10:53.410298  543705 memory.go:184] no items to output this cycle
I0323 08:10:53.410299  543705 cpu.go:275] no items to output this cycle
E0323 08:11:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:11:03.409780  543705 memory.go:184] no items to output this cycle
I0323 08:11:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 08:11:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:11:13.409818  543705 memory.go:191] Add success.
I0323 08:11:13.409824  543705 cpu.go:282] Add success.
W0323 08:11:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:11:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:11:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:11:13.420179  543705 net.go:648] Add success.
I0323 08:11:13.423109  543705 net.go:770] primary dev: ETH0
I0323 08:11:13.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:11:13.423136  543705 net.go:698] Add success.
I0323 08:11:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:11:14.455206  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:11:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0323 08:11:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:11:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 08:11:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:11:15.456024  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:11:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:11:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:11:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:11:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:11:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:11:23.409802  543705 memory.go:184] no items to output this cycle
I0323 08:11:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 08:11:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:11:33.409774  543705 memory.go:184] no items to output this cycle
I0323 08:11:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 08:11:38.901675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:11:38.904252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:11:38.904259  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b64c0 0xc0003b6500]
E0323 08:11:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:11:43.410681  543705 memory.go:191] Add success.
I0323 08:11:43.409803  543705 cpu.go:282] Add success.
I0323 08:11:43.420391  543705 net.go:648] Add success.
I0323 08:11:43.423258  543705 net.go:770] primary dev: ETH0
I0323 08:11:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:11:43.423283  543705 net.go:698] Add success.
I0323 08:11:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:11:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:11:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:11:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:11:53.409796  543705 memory.go:184] no items to output this cycle
I0323 08:11:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 08:12:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:12:03.409762  543705 memory.go:184] no items to output this cycle
I0323 08:12:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 08:12:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:12:13.409798  543705 memory.go:191] Add success.
I0323 08:12:13.409803  543705 cpu.go:282] Add success.
W0323 08:12:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:12:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:12:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:12:13.420228  543705 net.go:648] Add success.
I0323 08:12:13.422936  543705 net.go:770] primary dev: ETH0
I0323 08:12:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:12:13.422960  543705 net.go:698] Add success.
I0323 08:12:13.486002  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4315b2fe-4176-4e1e-a16c-26a80628e9fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:12:13.486049  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 08:12:14.455351  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:12:14.455448  543705 disk_worker.go:708] disk space is not compliant
W0323 08:12:14.455453  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:12:14.456764  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:12:14.456775  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:12:14.456782  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:12:14.457477  543705 disk_worker.go:494] system disk:vda1
I0323 08:12:14.457517  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:12:15.456867  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:12:15.456876  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:12:16.457929  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:12:16.457943  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:12:16.457981  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:12:16.457999  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:12:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:12:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:12:23.409781  543705 cpu.go:275] no items to output this cycle
I0323 08:12:23.409781  543705 memory.go:184] no items to output this cycle
E0323 08:12:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:12:33.409780  543705 memory.go:184] no items to output this cycle
I0323 08:12:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 08:12:38.905676  543705 disk_info.go:125] begin check local disk info of client
I0323 08:12:38.908250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:12:38.908257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3b40 0xc0003f3b80]
I0323 08:12:40.221721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:12:40.221727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:12:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:12:43.410715  543705 memory.go:191] Add success.
I0323 08:12:43.409800  543705 cpu.go:282] Add success.
I0323 08:12:43.420419  543705 net.go:648] Add success.
I0323 08:12:43.423041  543705 net.go:770] primary dev: ETH0
I0323 08:12:43.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:12:43.423067  543705 net.go:698] Add success.
I0323 08:12:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:12:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:12:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:12:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:12:53.409770  543705 memory.go:184] no items to output this cycle
I0323 08:12:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:13:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:13:03.409775  543705 memory.go:184] no items to output this cycle
I0323 08:13:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 08:13:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:13:13.409805  543705 memory.go:191] Add success.
I0323 08:13:13.409808  543705 cpu.go:282] Add success.
W0323 08:13:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:13:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:13:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:13:13.420233  543705 net.go:648] Add success.
I0323 08:13:13.422683  543705 net.go:770] primary dev: ETH0
I0323 08:13:13.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:13:13.422708  543705 net.go:698] Add success.
I0323 08:13:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:13:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:13:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 08:13:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:13:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 08:13:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:13:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:13:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:13:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:13:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:13:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:13:23.409795  543705 memory.go:184] no items to output this cycle
I0323 08:13:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:13:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:13:33.409813  543705 memory.go:184] no items to output this cycle
I0323 08:13:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 08:13:38.909675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:13:38.912239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:13:38.912246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa500 0xc0001aa580]
E0323 08:13:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:13:43.410623  543705 memory.go:191] Add success.
I0323 08:13:43.409802  543705 cpu.go:282] Add success.
I0323 08:13:43.420412  543705 net.go:648] Add success.
I0323 08:13:43.423010  543705 net.go:770] primary dev: ETH0
I0323 08:13:43.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:13:43.423036  543705 net.go:698] Add success.
I0323 08:13:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:13:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:13:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:13:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:13:53.409778  543705 memory.go:184] no items to output this cycle
I0323 08:13:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 08:14:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:14:03.409777  543705 memory.go:184] no items to output this cycle
I0323 08:14:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 08:14:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:14:13.409821  543705 memory.go:191] Add success.
I0323 08:14:13.409823  543705 cpu.go:282] Add success.
W0323 08:14:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:14:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:14:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:14:13.420153  543705 net.go:648] Add success.
I0323 08:14:13.422933  543705 net.go:770] primary dev: ETH0
I0323 08:14:13.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:14:13.422957  543705 net.go:698] Add success.
I0323 08:14:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:14:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:14:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0323 08:14:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:14:14.456496  543705 disk_worker.go:494] system disk:vda1
I0323 08:14:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:14:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:14:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:14:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:14:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:14:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:14:23.409795  543705 memory.go:184] no items to output this cycle
I0323 08:14:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:14:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:14:33.409793  543705 memory.go:184] no items to output this cycle
I0323 08:14:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 08:14:38.913677  543705 disk_info.go:125] begin check local disk info of client
I0323 08:14:38.916314  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:14:38.916321  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472940 0xc000472980]
E0323 08:14:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:14:43.410597  543705 memory.go:191] Add success.
I0323 08:14:43.409810  543705 cpu.go:282] Add success.
I0323 08:14:43.420311  543705 net.go:648] Add success.
I0323 08:14:43.423296  543705 net.go:770] primary dev: ETH0
I0323 08:14:43.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:14:43.423322  543705 net.go:698] Add success.
I0323 08:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:14:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:14:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:14:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:14:53.410265  543705 memory.go:184] no items to output this cycle
I0323 08:14:53.410270  543705 cpu.go:275] no items to output this cycle
E0323 08:15:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:15:03.409802  543705 memory.go:184] no items to output this cycle
I0323 08:15:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 08:15:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:15:13.409782  543705 memory.go:191] Add success.
W0323 08:15:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:15:13.409811  543705 cpu.go:282] Add success.
W0323 08:15:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:15:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:15:13.420130  543705 net.go:648] Add success.
I0323 08:15:13.423054  543705 net.go:770] primary dev: ETH0
I0323 08:15:13.423067  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:15:13.423079  543705 net.go:698] Add success.
I0323 08:15:13.669915  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"749d8a46-f491-4677-8483-1506fc11d84f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:15:13.669959  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:15:14.454683  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:15:14.454917  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:15:14.454927  543705 disk_worker.go:708] disk space is not compliant
W0323 08:15:14.454930  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:15:14.456459  543705 disk_worker.go:494] system disk:vda1
I0323 08:15:14.456491  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:15:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:15:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:15:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:15:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:15:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:15:23.409795  543705 memory.go:184] no items to output this cycle
I0323 08:15:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 08:15:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:15:33.409790  543705 memory.go:184] no items to output this cycle
I0323 08:15:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 08:15:38.917679  543705 disk_info.go:125] begin check local disk info of client
I0323 08:15:38.920228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:15:38.920235  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2000 0xc0002a2040]
I0323 08:15:40.225722  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:15:40.225727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:15:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:15:43.410629  543705 memory.go:191] Add success.
I0323 08:15:43.409790  543705 cpu.go:282] Add success.
I0323 08:15:43.420326  543705 net.go:648] Add success.
I0323 08:15:43.422894  543705 net.go:770] primary dev: ETH0
I0323 08:15:43.422908  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:15:43.422921  543705 net.go:698] Add success.
I0323 08:15:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:15:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:15:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:15:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:15:53.409801  543705 memory.go:184] no items to output this cycle
I0323 08:15:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 08:16:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:16:03.409774  543705 memory.go:184] no items to output this cycle
I0323 08:16:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 08:16:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:16:13.409812  543705 memory.go:191] Add success.
I0323 08:16:13.409819  543705 cpu.go:282] Add success.
W0323 08:16:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:16:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:16:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:16:13.420261  543705 net.go:648] Add success.
I0323 08:16:13.422918  543705 net.go:770] primary dev: ETH0
I0323 08:16:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:16:13.422942  543705 net.go:698] Add success.
I0323 08:16:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:16:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:16:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 08:16:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:16:14.456480  543705 disk_worker.go:494] system disk:vda1
I0323 08:16:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:16:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:16:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:16:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:16:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:16:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:16:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:16:23.409777  543705 memory.go:184] no items to output this cycle
I0323 08:16:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 08:16:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:16:33.409808  543705 memory.go:184] no items to output this cycle
I0323 08:16:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 08:16:38.921675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:16:38.924311  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:16:38.924319  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 08:16:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:16:43.410687  543705 memory.go:191] Add success.
I0323 08:16:43.409812  543705 cpu.go:282] Add success.
I0323 08:16:43.420416  543705 net.go:648] Add success.
I0323 08:16:43.422999  543705 net.go:770] primary dev: ETH0
I0323 08:16:43.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:16:43.423024  543705 net.go:698] Add success.
I0323 08:16:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:16:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:16:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:16:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:16:53.409777  543705 memory.go:184] no items to output this cycle
I0323 08:16:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 08:17:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:17:03.409778  543705 memory.go:184] no items to output this cycle
I0323 08:17:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 08:17:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:17:13.409801  543705 memory.go:191] Add success.
I0323 08:17:13.409813  543705 cpu.go:282] Add success.
W0323 08:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:17:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:17:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:17:13.420184  543705 net.go:648] Add success.
I0323 08:17:13.423100  543705 net.go:770] primary dev: ETH0
I0323 08:17:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:17:13.423128  543705 net.go:698] Add success.
I0323 08:17:13.453663  543705 event_worker.go:152] Polling the log file for events...
W0323 08:17:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:17:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 08:17:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:17:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:17:14.455908  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:17:14.455914  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:17:14.456547  543705 disk_worker.go:494] system disk:vda1
I0323 08:17:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:17:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:17:15.456832  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:17:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:17:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:17:16.458023  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:17:16.458044  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:17:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:17:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:17:23.409795  543705 memory.go:184] no items to output this cycle
I0323 08:17:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 08:17:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:17:33.409772  543705 memory.go:184] no items to output this cycle
I0323 08:17:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 08:17:38.925668  543705 disk_info.go:125] begin check local disk info of client
I0323 08:17:38.928211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:17:38.928218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4fc0 0xc0000c5000]
E0323 08:17:43.409870  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:17:43.410820  543705 memory.go:191] Add success.
I0323 08:17:43.410082  543705 cpu.go:282] Add success.
I0323 08:17:43.419718  543705 net.go:648] Add success.
I0323 08:17:43.422422  543705 net.go:770] primary dev: ETH0
I0323 08:17:43.422435  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:17:43.422447  543705 net.go:698] Add success.
I0323 08:17:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:17:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:17:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:17:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:17:53.409776  543705 memory.go:184] no items to output this cycle
I0323 08:17:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 08:18:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:18:03.409800  543705 memory.go:184] no items to output this cycle
I0323 08:18:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 08:18:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:18:13.409782  543705 memory.go:191] Add success.
W0323 08:18:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:18:13.409809  543705 cpu.go:282] Add success.
W0323 08:18:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:18:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:18:13.420154  543705 net.go:648] Add success.
I0323 08:18:13.423245  543705 net.go:770] primary dev: ETH0
I0323 08:18:13.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:18:13.423271  543705 net.go:698] Add success.
I0323 08:18:13.463244  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4bd3da4a-3acd-4d70-a132-db3062d2a105","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:18:13.463277  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:18:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:18:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:18:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 08:18:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:18:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 08:18:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:18:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:18:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:18:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:18:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:18:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:18:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:18:23.409780  543705 memory.go:184] no items to output this cycle
I0323 08:18:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 08:18:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:18:33.409808  543705 memory.go:184] no items to output this cycle
I0323 08:18:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 08:18:38.929676  543705 disk_info.go:125] begin check local disk info of client
I0323 08:18:38.932239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:18:38.932245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9540 0xc0004a9580]
I0323 08:18:40.225855  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:18:40.225860  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:18:43.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:18:43.410749  543705 memory.go:191] Add success.
I0323 08:18:43.409963  543705 cpu.go:282] Add success.
I0323 08:18:43.419706  543705 net.go:648] Add success.
I0323 08:18:43.422308  543705 net.go:770] primary dev: ETH0
I0323 08:18:43.422320  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:18:43.422332  543705 net.go:698] Add success.
I0323 08:18:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:18:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:18:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:18:53.410238  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:18:53.410254  543705 memory.go:184] no items to output this cycle
I0323 08:18:53.410266  543705 cpu.go:275] no items to output this cycle
E0323 08:19:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:19:03.409800  543705 memory.go:184] no items to output this cycle
I0323 08:19:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 08:19:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:19:13.409783  543705 memory.go:191] Add success.
W0323 08:19:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:19:13.409817  543705 cpu.go:282] Add success.
W0323 08:19:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:19:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:19:13.420145  543705 net.go:648] Add success.
I0323 08:19:13.422679  543705 net.go:770] primary dev: ETH0
I0323 08:19:13.422691  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:19:13.422703  543705 net.go:698] Add success.
I0323 08:19:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:19:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:19:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 08:19:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:19:14.456492  543705 disk_worker.go:494] system disk:vda1
I0323 08:19:14.456537  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:19:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:19:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:19:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:19:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:19:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:19:23.409792  543705 memory.go:184] no items to output this cycle
I0323 08:19:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 08:19:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:19:33.409791  543705 memory.go:184] no items to output this cycle
I0323 08:19:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 08:19:38.933674  543705 disk_info.go:125] begin check local disk info of client
I0323 08:19:38.936302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:19:38.936308  543705 disk_info.go:196] parse disk info done, disk is : [0xc00057b840 0xc00057b880]
E0323 08:19:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:19:43.410658  543705 memory.go:191] Add success.
I0323 08:19:43.409819  543705 cpu.go:282] Add success.
I0323 08:19:43.420739  543705 net.go:648] Add success.
I0323 08:19:43.423433  543705 net.go:770] primary dev: ETH0
I0323 08:19:43.423447  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:19:43.423462  543705 net.go:698] Add success.
I0323 08:19:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:19:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:19:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:19:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:19:53.409778  543705 memory.go:184] no items to output this cycle
I0323 08:19:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 08:20:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:20:03.409771  543705 memory.go:184] no items to output this cycle
I0323 08:20:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 08:20:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:20:13.409797  543705 memory.go:191] Add success.
I0323 08:20:13.409798  543705 cpu.go:282] Add success.
W0323 08:20:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:20:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:20:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:20:13.420293  543705 net.go:648] Add success.
I0323 08:20:13.423581  543705 net.go:770] primary dev: ETH0
I0323 08:20:13.423596  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:20:13.423611  543705 net.go:698] Add success.
I0323 08:20:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:20:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:20:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 08:20:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:20:14.456615  543705 disk_worker.go:494] system disk:vda1
I0323 08:20:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:20:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:20:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:20:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:20:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:20:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:20:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:20:23.409779  543705 memory.go:184] no items to output this cycle
I0323 08:20:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 08:20:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:20:33.409784  543705 memory.go:184] no items to output this cycle
I0323 08:20:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 08:20:38.937673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:20:38.940218  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:20:38.940225  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9e80 0xc0004a9ec0]
E0323 08:20:43.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:20:43.410739  543705 memory.go:191] Add success.
I0323 08:20:43.410105  543705 cpu.go:282] Add success.
I0323 08:20:43.419735  543705 net.go:648] Add success.
I0323 08:20:43.422520  543705 net.go:770] primary dev: ETH0
I0323 08:20:43.422532  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:20:43.422543  543705 net.go:698] Add success.
I0323 08:20:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:20:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:20:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:20:53.410240  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:20:53.410255  543705 memory.go:184] no items to output this cycle
I0323 08:20:53.410257  543705 cpu.go:275] no items to output this cycle
E0323 08:21:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:21:03.409782  543705 cpu.go:275] no items to output this cycle
I0323 08:21:03.409786  543705 memory.go:184] no items to output this cycle
E0323 08:21:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:21:13.409821  543705 memory.go:191] Add success.
I0323 08:21:13.409825  543705 cpu.go:282] Add success.
W0323 08:21:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:21:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:21:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:21:13.420152  543705 net.go:648] Add success.
I0323 08:21:13.423136  543705 net.go:770] primary dev: ETH0
I0323 08:21:13.423149  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:21:13.423161  543705 net.go:698] Add success.
I0323 08:21:13.468786  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"85330b56-686c-4958-8336-7ac4786e7f26","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:21:13.468820  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:21:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:21:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 08:21:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:21:14.456535  543705 disk_worker.go:494] system disk:vda1
I0323 08:21:14.456588  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:21:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:21:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:21:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:21:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:21:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:21:23.409770  543705 memory.go:184] no items to output this cycle
I0323 08:21:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 08:21:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:21:33.409811  543705 memory.go:184] no items to output this cycle
I0323 08:21:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 08:21:38.941675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:21:38.944241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:21:38.944247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8dc0 0xc0003e8e00]
I0323 08:21:40.226781  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:21:40.226787  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:21:43.409864  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:21:43.410726  543705 memory.go:191] Add success.
I0323 08:21:43.409938  543705 cpu.go:282] Add success.
I0323 08:21:43.419733  543705 net.go:648] Add success.
I0323 08:21:43.422470  543705 net.go:770] primary dev: ETH0
I0323 08:21:43.422484  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:21:43.422499  543705 net.go:698] Add success.
I0323 08:21:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:21:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:21:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:21:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:21:53.409774  543705 cpu.go:275] no items to output this cycle
I0323 08:21:53.409786  543705 memory.go:184] no items to output this cycle
E0323 08:22:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:22:03.409801  543705 memory.go:184] no items to output this cycle
I0323 08:22:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 08:22:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:22:13.409811  543705 memory.go:191] Add success.
I0323 08:22:13.409819  543705 cpu.go:282] Add success.
W0323 08:22:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:22:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:22:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:22:13.420124  543705 net.go:648] Add success.
I0323 08:22:13.423248  543705 net.go:770] primary dev: ETH0
I0323 08:22:13.423260  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:22:13.423272  543705 net.go:698] Add success.
W0323 08:22:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:22:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 08:22:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:22:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:22:14.455898  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:22:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:22:14.456644  543705 disk_worker.go:494] system disk:vda1
I0323 08:22:14.456686  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:22:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:22:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:22:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:22:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:22:16.458019  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:22:16.458035  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:22:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:22:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:22:23.409772  543705 memory.go:184] no items to output this cycle
I0323 08:22:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 08:22:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:22:33.409893  543705 memory.go:184] no items to output this cycle
I0323 08:22:33.409929  543705 cpu.go:275] no items to output this cycle
I0323 08:22:38.945671  543705 disk_info.go:125] begin check local disk info of client
I0323 08:22:38.948220  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:22:38.948227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b9b40 0xc0002b9b80]
E0323 08:22:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:22:43.410619  543705 memory.go:191] Add success.
I0323 08:22:43.409784  543705 cpu.go:282] Add success.
I0323 08:22:43.420336  543705 net.go:648] Add success.
I0323 08:22:43.422910  543705 net.go:770] primary dev: ETH0
I0323 08:22:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:22:43.422935  543705 net.go:698] Add success.
I0323 08:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:22:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:22:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:22:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:22:53.409792  543705 memory.go:184] no items to output this cycle
I0323 08:22:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 08:23:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:23:03.409777  543705 memory.go:184] no items to output this cycle
I0323 08:23:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 08:23:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:23:13.409819  543705 memory.go:191] Add success.
I0323 08:23:13.409825  543705 cpu.go:282] Add success.
W0323 08:23:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:23:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:23:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:23:13.420117  543705 net.go:648] Add success.
I0323 08:23:13.422890  543705 net.go:770] primary dev: ETH0
I0323 08:23:13.422904  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:23:13.422919  543705 net.go:698] Add success.
I0323 08:23:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:23:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:23:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 08:23:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:23:14.456572  543705 disk_worker.go:494] system disk:vda1
I0323 08:23:14.456603  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:23:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:23:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:23:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:23:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:23:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:23:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:23:23.409783  543705 memory.go:184] no items to output this cycle
I0323 08:23:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 08:23:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:23:33.409892  543705 cpu.go:275] no items to output this cycle
I0323 08:23:33.409909  543705 memory.go:184] no items to output this cycle
I0323 08:23:38.949683  543705 disk_info.go:125] begin check local disk info of client
I0323 08:23:38.952237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:23:38.952243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d4000 0xc0004d4040]
E0323 08:23:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:23:43.410769  543705 memory.go:191] Add success.
I0323 08:23:43.409811  543705 cpu.go:282] Add success.
I0323 08:23:43.420508  543705 net.go:648] Add success.
I0323 08:23:43.423659  543705 net.go:770] primary dev: ETH0
I0323 08:23:43.423689  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:23:43.423707  543705 net.go:698] Add success.
I0323 08:23:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:23:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:23:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:23:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:23:53.409790  543705 memory.go:184] no items to output this cycle
I0323 08:23:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 08:24:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:24:03.409781  543705 memory.go:184] no items to output this cycle
I0323 08:24:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:24:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:24:13.409821  543705 memory.go:191] Add success.
I0323 08:24:13.409832  543705 cpu.go:282] Add success.
W0323 08:24:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:24:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:24:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:24:13.420135  543705 net.go:648] Add success.
I0323 08:24:13.423163  543705 net.go:770] primary dev: ETH0
I0323 08:24:13.423177  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:24:13.423193  543705 net.go:698] Add success.
I0323 08:24:13.470543  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"39c08177-e779-4dad-b227-7d851942a96d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:24:13.470583  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:24:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:24:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:24:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 08:24:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:24:14.456705  543705 disk_worker.go:494] system disk:vda1
I0323 08:24:14.456738  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:24:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:24:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:24:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:24:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:24:23.410267  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:24:23.410287  543705 memory.go:184] no items to output this cycle
I0323 08:24:23.410292  543705 cpu.go:275] no items to output this cycle
E0323 08:24:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:24:33.409797  543705 memory.go:184] no items to output this cycle
I0323 08:24:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 08:24:38.953675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:24:38.956234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:24:38.956241  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371080 0xc0003710c0]
I0323 08:24:40.229738  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:24:40.229744  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:24:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:24:43.410685  543705 memory.go:191] Add success.
I0323 08:24:43.409828  543705 cpu.go:282] Add success.
I0323 08:24:43.420447  543705 net.go:648] Add success.
I0323 08:24:43.423088  543705 net.go:770] primary dev: ETH0
I0323 08:24:43.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:24:43.423119  543705 net.go:698] Add success.
I0323 08:24:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:24:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:24:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:24:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:24:53.409805  543705 memory.go:184] no items to output this cycle
I0323 08:24:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 08:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:25:03.409797  543705 memory.go:184] no items to output this cycle
I0323 08:25:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 08:25:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:25:13.409792  543705 memory.go:191] Add success.
W0323 08:25:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:25:13.409823  543705 cpu.go:282] Add success.
W0323 08:25:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:25:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:25:13.420196  543705 net.go:648] Add success.
I0323 08:25:13.422917  543705 net.go:770] primary dev: ETH0
I0323 08:25:13.422931  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:25:13.422943  543705 net.go:698] Add success.
I0323 08:25:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:25:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:25:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0323 08:25:14.455155  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:25:14.456513  543705 disk_worker.go:494] system disk:vda1
I0323 08:25:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:25:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:25:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:25:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:25:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:25:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:25:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:25:23.409808  543705 memory.go:184] no items to output this cycle
I0323 08:25:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 08:25:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:25:33.409809  543705 memory.go:184] no items to output this cycle
I0323 08:25:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 08:25:38.957676  543705 disk_info.go:125] begin check local disk info of client
I0323 08:25:38.960239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:25:38.960246  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396000 0xc000396040]
E0323 08:25:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:25:43.410606  543705 memory.go:191] Add success.
I0323 08:25:43.409813  543705 cpu.go:282] Add success.
I0323 08:25:43.420334  543705 net.go:648] Add success.
I0323 08:25:43.423168  543705 net.go:770] primary dev: ETH0
I0323 08:25:43.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:25:43.423194  543705 net.go:698] Add success.
I0323 08:25:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:25:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:25:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:25:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:25:53.409775  543705 memory.go:184] no items to output this cycle
I0323 08:25:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 08:26:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:26:03.409808  543705 memory.go:184] no items to output this cycle
I0323 08:26:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 08:26:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:26:13.409797  543705 memory.go:191] Add success.
I0323 08:26:13.409812  543705 cpu.go:282] Add success.
W0323 08:26:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:26:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:26:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:26:13.420060  543705 net.go:648] Add success.
I0323 08:26:13.423011  543705 net.go:770] primary dev: ETH0
I0323 08:26:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:26:13.423043  543705 net.go:698] Add success.
I0323 08:26:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:26:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:26:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 08:26:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:26:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 08:26:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:26:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:26:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:26:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:26:16.472403  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:26:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:26:23.409789  543705 memory.go:184] no items to output this cycle
I0323 08:26:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 08:26:33.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:26:33.409876  543705 memory.go:184] no items to output this cycle
I0323 08:26:33.409936  543705 cpu.go:275] no items to output this cycle
I0323 08:26:38.961676  543705 disk_info.go:125] begin check local disk info of client
I0323 08:26:38.964224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:26:38.964231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cc240 0xc0004cc280]
E0323 08:26:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:26:43.410704  543705 memory.go:191] Add success.
I0323 08:26:43.409822  543705 cpu.go:282] Add success.
I0323 08:26:43.420399  543705 net.go:648] Add success.
I0323 08:26:43.423228  543705 net.go:770] primary dev: ETH0
I0323 08:26:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:26:43.423254  543705 net.go:698] Add success.
I0323 08:26:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:26:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:26:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:26:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:26:53.410264  543705 memory.go:184] no items to output this cycle
I0323 08:26:53.410273  543705 cpu.go:275] no items to output this cycle
E0323 08:27:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:27:03.409812  543705 memory.go:184] no items to output this cycle
I0323 08:27:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 08:27:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:27:13.409809  543705 memory.go:191] Add success.
I0323 08:27:13.409811  543705 cpu.go:282] Add success.
W0323 08:27:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:27:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:27:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:27:13.420586  543705 net.go:648] Add success.
I0323 08:27:13.429538  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 08:27:13.429613  543705 net.go:770] primary dev: ETH0
I0323 08:27:13.429625  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:27:13.429636  543705 net.go:698] Add success.
I0323 08:27:13.453210  543705 event_worker.go:152] Polling the log file for events...
I0323 08:27:13.468602  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9e194e65-caf6-4d0a-839b-469d5a19bea2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:27:13.468641  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 08:27:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:27:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 08:27:14.455172  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:27:14.456791  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:27:14.456814  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:27:14.456820  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:27:14.456850  543705 disk_worker.go:494] system disk:vda1
I0323 08:27:14.456880  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:27:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:27:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:27:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:27:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:27:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:27:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:27:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:27:23.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:27:23.409914  543705 cpu.go:275] no items to output this cycle
I0323 08:27:23.409917  543705 memory.go:184] no items to output this cycle
E0323 08:27:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:27:33.409808  543705 memory.go:184] no items to output this cycle
I0323 08:27:33.409831  543705 cpu.go:275] no items to output this cycle
I0323 08:27:38.965675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:27:38.968247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:27:38.968253  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028b340 0xc00028b380]
I0323 08:27:40.230779  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:27:40.230785  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:27:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:27:43.410674  543705 memory.go:191] Add success.
I0323 08:27:43.409804  543705 cpu.go:282] Add success.
I0323 08:27:43.420392  543705 net.go:648] Add success.
I0323 08:27:43.423215  543705 net.go:770] primary dev: ETH0
I0323 08:27:43.423228  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:27:43.423241  543705 net.go:698] Add success.
I0323 08:27:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:27:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:27:53.410250  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:27:53.410265  543705 memory.go:184] no items to output this cycle
I0323 08:27:53.410266  543705 cpu.go:275] no items to output this cycle
E0323 08:28:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:28:03.409770  543705 memory.go:184] no items to output this cycle
I0323 08:28:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:28:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:28:13.409808  543705 memory.go:191] Add success.
I0323 08:28:13.409816  543705 cpu.go:282] Add success.
W0323 08:28:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:28:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:28:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:28:13.420169  543705 net.go:648] Add success.
I0323 08:28:13.422823  543705 net.go:770] primary dev: ETH0
I0323 08:28:13.422846  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:28:13.422860  543705 net.go:698] Add success.
I0323 08:28:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:28:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:28:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 08:28:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:28:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 08:28:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:28:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:28:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:28:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:28:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:28:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:28:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:28:23.409775  543705 memory.go:184] no items to output this cycle
I0323 08:28:23.409775  543705 cpu.go:275] no items to output this cycle
E0323 08:28:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:28:33.409819  543705 memory.go:184] no items to output this cycle
I0323 08:28:33.409832  543705 cpu.go:275] no items to output this cycle
I0323 08:28:38.969680  543705 disk_info.go:125] begin check local disk info of client
I0323 08:28:38.972304  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:28:38.972311  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cc000 0xc0004cc040]
E0323 08:28:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:28:43.410712  543705 memory.go:191] Add success.
I0323 08:28:43.409789  543705 cpu.go:282] Add success.
I0323 08:28:43.420406  543705 net.go:648] Add success.
I0323 08:28:43.423130  543705 net.go:770] primary dev: ETH0
I0323 08:28:43.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:28:43.423159  543705 net.go:698] Add success.
I0323 08:28:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:28:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:28:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:28:53.410369  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:28:53.410375  543705 cpu.go:275] no items to output this cycle
I0323 08:28:53.410383  543705 memory.go:184] no items to output this cycle
E0323 08:29:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:29:03.409778  543705 memory.go:184] no items to output this cycle
I0323 08:29:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 08:29:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:29:13.409817  543705 memory.go:191] Add success.
I0323 08:29:13.409831  543705 cpu.go:282] Add success.
W0323 08:29:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:29:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:29:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:29:13.420188  543705 net.go:648] Add success.
I0323 08:29:13.422957  543705 net.go:770] primary dev: ETH0
I0323 08:29:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:29:13.422981  543705 net.go:698] Add success.
I0323 08:29:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:29:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:29:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 08:29:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:29:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 08:29:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:29:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:29:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:29:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:29:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:29:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:29:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:29:23.409776  543705 memory.go:184] no items to output this cycle
I0323 08:29:23.409776  543705 cpu.go:275] no items to output this cycle
E0323 08:29:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:29:33.409943  543705 cpu.go:275] no items to output this cycle
I0323 08:29:33.409964  543705 memory.go:184] no items to output this cycle
I0323 08:29:38.973675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:29:38.976297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:29:38.976303  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 08:29:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:29:43.410662  543705 memory.go:191] Add success.
I0323 08:29:43.409787  543705 cpu.go:282] Add success.
I0323 08:29:43.420405  543705 net.go:648] Add success.
I0323 08:29:43.423082  543705 net.go:770] primary dev: ETH0
I0323 08:29:43.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:29:43.423109  543705 net.go:698] Add success.
I0323 08:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:29:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:29:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:29:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:29:53.409762  543705 memory.go:184] no items to output this cycle
I0323 08:29:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 08:30:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:30:03.409789  543705 memory.go:184] no items to output this cycle
I0323 08:30:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 08:30:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:30:13.409788  543705 memory.go:191] Add success.
I0323 08:30:13.409788  543705 cpu.go:282] Add success.
W0323 08:30:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:30:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:30:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:30:13.420152  543705 net.go:648] Add success.
I0323 08:30:13.422882  543705 net.go:770] primary dev: ETH0
I0323 08:30:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:30:13.422907  543705 net.go:698] Add success.
I0323 08:30:13.463624  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b60a55c4-f7ef-4360-8a26-93b5e2a0e8f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:30:13.463658  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:30:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:30:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:30:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 08:30:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:30:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 08:30:14.456626  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:30:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:30:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:30:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:30:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:30:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:30:23.409792  543705 memory.go:184] no items to output this cycle
I0323 08:30:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:30:33.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:30:33.409940  543705 memory.go:184] no items to output this cycle
I0323 08:30:33.409956  543705 cpu.go:275] no items to output this cycle
I0323 08:30:38.977673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:30:38.980294  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:30:38.980300  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
I0323 08:30:40.233735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:30:40.233741  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:30:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:30:43.410659  543705 memory.go:191] Add success.
I0323 08:30:43.409791  543705 cpu.go:282] Add success.
I0323 08:30:43.420238  543705 net.go:770] primary dev: ETH0
I0323 08:30:43.420250  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:30:43.420263  543705 net.go:698] Add success.
I0323 08:30:43.420607  543705 net.go:648] Add success.
I0323 08:30:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:30:46.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:30:46.458051  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:30:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:30:53.409786  543705 memory.go:184] no items to output this cycle
I0323 08:30:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 08:31:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:31:03.409776  543705 memory.go:184] no items to output this cycle
I0323 08:31:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 08:31:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:31:13.409792  543705 memory.go:191] Add success.
I0323 08:31:13.409808  543705 cpu.go:282] Add success.
W0323 08:31:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:31:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:31:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:31:13.420245  543705 net.go:648] Add success.
I0323 08:31:13.422928  543705 net.go:770] primary dev: ETH0
I0323 08:31:13.422940  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:31:13.422967  543705 net.go:698] Add success.
I0323 08:31:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:31:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:31:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 08:31:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:31:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 08:31:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:31:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:31:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:31:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:31:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:31:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:31:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:31:23.409781  543705 cpu.go:275] no items to output this cycle
I0323 08:31:23.409785  543705 memory.go:184] no items to output this cycle
E0323 08:31:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:31:33.409789  543705 memory.go:184] no items to output this cycle
I0323 08:31:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 08:31:38.981675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:31:38.984228  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:31:38.984235  543705 disk_info.go:196] parse disk info done, disk is : [0xc000391140 0xc000391180]
E0323 08:31:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:31:43.410688  543705 memory.go:191] Add success.
I0323 08:31:43.409818  543705 cpu.go:282] Add success.
I0323 08:31:43.420394  543705 net.go:648] Add success.
I0323 08:31:43.423050  543705 net.go:770] primary dev: ETH0
I0323 08:31:43.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:31:43.423076  543705 net.go:698] Add success.
I0323 08:31:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:31:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:31:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:31:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:31:53.409793  543705 memory.go:184] no items to output this cycle
I0323 08:31:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:32:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:32:03.409770  543705 memory.go:184] no items to output this cycle
I0323 08:32:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 08:32:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:32:13.409808  543705 memory.go:191] Add success.
I0323 08:32:13.409817  543705 cpu.go:282] Add success.
W0323 08:32:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:32:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:32:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:32:13.420187  543705 net.go:648] Add success.
I0323 08:32:13.423280  543705 net.go:770] primary dev: ETH0
I0323 08:32:13.423294  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:32:13.423305  543705 net.go:698] Add success.
W0323 08:32:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:32:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 08:32:14.455169  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:32:14.456870  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:32:14.456880  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:32:14.456886  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:32:14.456956  543705 disk_worker.go:494] system disk:vda1
I0323 08:32:14.457007  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:32:15.456811  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:32:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:32:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:32:16.457971  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:32:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:32:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:32:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:32:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:32:23.409792  543705 memory.go:184] no items to output this cycle
I0323 08:32:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 08:32:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:32:33.409796  543705 memory.go:184] no items to output this cycle
I0323 08:32:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 08:32:38.985671  543705 disk_info.go:125] begin check local disk info of client
I0323 08:32:38.988274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:32:38.988280  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260580 0xc0002605c0]
E0323 08:32:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:32:43.410579  543705 memory.go:191] Add success.
I0323 08:32:43.409802  543705 cpu.go:282] Add success.
I0323 08:32:43.420097  543705 net.go:770] primary dev: ETH0
I0323 08:32:43.420110  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:32:43.420121  543705 net.go:698] Add success.
I0323 08:32:43.420358  543705 net.go:648] Add success.
I0323 08:32:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:32:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:32:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:32:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:32:53.409773  543705 cpu.go:275] no items to output this cycle
I0323 08:32:53.409783  543705 memory.go:184] no items to output this cycle
E0323 08:33:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:33:03.409791  543705 memory.go:184] no items to output this cycle
I0323 08:33:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 08:33:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:33:13.409789  543705 memory.go:191] Add success.
I0323 08:33:13.409807  543705 cpu.go:282] Add success.
W0323 08:33:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:33:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:33:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:33:13.420207  543705 net.go:648] Add success.
I0323 08:33:13.423205  543705 net.go:770] primary dev: ETH0
I0323 08:33:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:33:13.423241  543705 net.go:698] Add success.
I0323 08:33:13.506754  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"90dfd4fc-c193-42ec-91b2-63398828b502","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:33:13.506787  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:33:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:33:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:33:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 08:33:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:33:14.456532  543705 disk_worker.go:494] system disk:vda1
I0323 08:33:14.456575  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:33:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:33:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:33:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:33:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:33:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:33:23.409794  543705 memory.go:184] no items to output this cycle
I0323 08:33:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 08:33:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:33:33.409792  543705 memory.go:184] no items to output this cycle
I0323 08:33:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 08:33:38.989680  543705 disk_info.go:125] begin check local disk info of client
I0323 08:33:38.992264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:33:38.992271  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0323 08:33:40.234776  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:33:40.234781  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:33:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:33:43.410580  543705 memory.go:191] Add success.
I0323 08:33:43.409816  543705 cpu.go:282] Add success.
I0323 08:33:43.420099  543705 net.go:770] primary dev: ETH0
I0323 08:33:43.420114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:33:43.420129  543705 net.go:698] Add success.
I0323 08:33:43.420473  543705 net.go:648] Add success.
I0323 08:33:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:33:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:33:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:33:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:33:53.409776  543705 memory.go:184] no items to output this cycle
I0323 08:33:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 08:34:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:34:03.409777  543705 memory.go:184] no items to output this cycle
I0323 08:34:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 08:34:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:34:13.409808  543705 memory.go:191] Add success.
I0323 08:34:13.409814  543705 cpu.go:282] Add success.
W0323 08:34:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:34:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:34:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:34:13.420515  543705 net.go:648] Add success.
I0323 08:34:13.423032  543705 net.go:770] primary dev: ETH0
I0323 08:34:13.423046  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:34:13.423057  543705 net.go:698] Add success.
I0323 08:34:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:34:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:34:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 08:34:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:34:14.456559  543705 disk_worker.go:494] system disk:vda1
I0323 08:34:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:34:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:34:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:34:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:34:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:34:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:34:23.409776  543705 memory.go:184] no items to output this cycle
I0323 08:34:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 08:34:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:34:33.409808  543705 memory.go:184] no items to output this cycle
I0323 08:34:33.409920  543705 cpu.go:275] no items to output this cycle
I0323 08:34:38.993677  543705 disk_info.go:125] begin check local disk info of client
I0323 08:34:38.996259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:34:38.996266  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8ac0 0xc0004d8b00]
E0323 08:34:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:34:43.410589  543705 memory.go:191] Add success.
I0323 08:34:43.409796  543705 cpu.go:282] Add success.
I0323 08:34:43.420335  543705 net.go:648] Add success.
I0323 08:34:43.422845  543705 net.go:770] primary dev: ETH0
I0323 08:34:43.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:34:43.422875  543705 net.go:698] Add success.
I0323 08:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:34:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:34:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:34:53.409783  543705 memory.go:184] no items to output this cycle
I0323 08:34:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 08:35:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:35:03.409778  543705 memory.go:184] no items to output this cycle
I0323 08:35:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 08:35:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:35:13.409821  543705 memory.go:191] Add success.
I0323 08:35:13.409834  543705 cpu.go:282] Add success.
W0323 08:35:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:35:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:35:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:35:13.420222  543705 net.go:648] Add success.
I0323 08:35:13.423002  543705 net.go:770] primary dev: ETH0
I0323 08:35:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:35:13.423032  543705 net.go:698] Add success.
I0323 08:35:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:35:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:35:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 08:35:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:35:14.456506  543705 disk_worker.go:494] system disk:vda1
I0323 08:35:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:35:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:35:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:35:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:35:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:35:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:35:23.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:35:23.409832  543705 memory.go:184] no items to output this cycle
I0323 08:35:23.409835  543705 cpu.go:275] no items to output this cycle
E0323 08:35:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:35:33.409806  543705 memory.go:184] no items to output this cycle
I0323 08:35:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 08:35:38.997682  543705 disk_info.go:125] begin check local disk info of client
I0323 08:35:39.000276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:35:39.000283  543705 disk_info.go:196] parse disk info done, disk is : [0xc000520ec0 0xc000520f00]
E0323 08:35:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:35:43.410644  543705 memory.go:191] Add success.
I0323 08:35:43.409809  543705 cpu.go:282] Add success.
I0323 08:35:43.420359  543705 net.go:648] Add success.
I0323 08:35:43.423082  543705 net.go:770] primary dev: ETH0
I0323 08:35:43.423097  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:35:43.423110  543705 net.go:698] Add success.
I0323 08:35:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:35:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:35:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:35:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:35:53.409786  543705 memory.go:184] no items to output this cycle
I0323 08:35:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 08:36:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:36:03.409772  543705 memory.go:184] no items to output this cycle
I0323 08:36:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 08:36:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:36:13.409810  543705 memory.go:191] Add success.
I0323 08:36:13.409815  543705 cpu.go:282] Add success.
W0323 08:36:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:36:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:36:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:36:13.420162  543705 net.go:648] Add success.
I0323 08:36:13.423183  543705 net.go:770] primary dev: ETH0
I0323 08:36:13.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:36:13.423220  543705 net.go:698] Add success.
I0323 08:36:13.468892  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0ed97674-d44a-4922-8f26-30e8ced473dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:36:13.468924  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:36:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:36:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:36:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 08:36:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:36:14.456588  543705 disk_worker.go:494] system disk:vda1
I0323 08:36:14.456619  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:36:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:36:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:36:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:36:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:36:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:36:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:36:23.409766  543705 memory.go:184] no items to output this cycle
I0323 08:36:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 08:36:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:36:33.409895  543705 cpu.go:275] no items to output this cycle
I0323 08:36:33.409895  543705 memory.go:184] no items to output this cycle
I0323 08:36:39.001679  543705 disk_info.go:125] begin check local disk info of client
I0323 08:36:39.004231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:36:39.004248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ac000 0xc0002ac040]
I0323 08:36:40.237734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:36:40.237740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:36:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:36:43.410629  543705 memory.go:191] Add success.
I0323 08:36:43.409789  543705 cpu.go:282] Add success.
I0323 08:36:43.420349  543705 net.go:648] Add success.
I0323 08:36:43.423031  543705 net.go:770] primary dev: ETH0
I0323 08:36:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:36:43.423057  543705 net.go:698] Add success.
I0323 08:36:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:36:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:36:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:36:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:36:53.409784  543705 memory.go:184] no items to output this cycle
I0323 08:36:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 08:37:03.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:37:03.409765  543705 memory.go:184] no items to output this cycle
I0323 08:37:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 08:37:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:37:13.409818  543705 memory.go:191] Add success.
I0323 08:37:13.409823  543705 cpu.go:282] Add success.
W0323 08:37:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:37:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:37:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:37:13.420136  543705 net.go:648] Add success.
I0323 08:37:13.423037  543705 net.go:770] primary dev: ETH0
I0323 08:37:13.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:37:13.423066  543705 net.go:698] Add success.
I0323 08:37:13.453598  543705 event_worker.go:152] Polling the log file for events...
W0323 08:37:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:37:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 08:37:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:37:14.455928  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:37:14.455936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:37:14.455942  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:37:14.456560  543705 disk_worker.go:494] system disk:vda1
I0323 08:37:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:37:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:37:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:37:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:37:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:37:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:37:16.457988  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:37:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:37:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:37:23.409795  543705 memory.go:184] no items to output this cycle
I0323 08:37:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 08:37:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:37:33.409811  543705 memory.go:184] no items to output this cycle
I0323 08:37:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 08:37:39.005675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:37:39.008232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:37:39.008240  543705 disk_info.go:196] parse disk info done, disk is : [0xc000506180 0xc0005061c0]
E0323 08:37:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:37:43.410613  543705 memory.go:191] Add success.
I0323 08:37:43.409799  543705 cpu.go:282] Add success.
I0323 08:37:43.420318  543705 net.go:648] Add success.
I0323 08:37:43.423102  543705 net.go:770] primary dev: ETH0
I0323 08:37:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:37:43.423127  543705 net.go:698] Add success.
I0323 08:37:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:37:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:37:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:37:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:37:53.410273  543705 cpu.go:275] no items to output this cycle
I0323 08:37:53.410277  543705 memory.go:184] no items to output this cycle
E0323 08:38:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:38:03.409771  543705 memory.go:184] no items to output this cycle
I0323 08:38:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 08:38:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:38:13.409813  543705 memory.go:191] Add success.
I0323 08:38:13.409825  543705 cpu.go:282] Add success.
W0323 08:38:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:38:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:38:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:38:13.420139  543705 net.go:648] Add success.
I0323 08:38:13.422952  543705 net.go:770] primary dev: ETH0
I0323 08:38:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:38:13.422976  543705 net.go:698] Add success.
I0323 08:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:38:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:38:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 08:38:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:38:14.456627  543705 disk_worker.go:494] system disk:vda1
I0323 08:38:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:38:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:38:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:38:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:38:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:38:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:38:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:38:23.409772  543705 memory.go:184] no items to output this cycle
I0323 08:38:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 08:38:33.409855  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:38:33.409875  543705 memory.go:184] no items to output this cycle
I0323 08:38:33.409961  543705 cpu.go:275] no items to output this cycle
I0323 08:38:39.009677  543705 disk_info.go:125] begin check local disk info of client
I0323 08:38:39.012256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:38:39.012263  543705 disk_info.go:196] parse disk info done, disk is : [0xc000314000 0xc000314040]
E0323 08:38:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:38:43.410627  543705 memory.go:191] Add success.
I0323 08:38:43.409813  543705 cpu.go:282] Add success.
I0323 08:38:43.420326  543705 net.go:648] Add success.
I0323 08:38:43.423128  543705 net.go:770] primary dev: ETH0
I0323 08:38:43.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:38:43.423158  543705 net.go:698] Add success.
I0323 08:38:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:38:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:38:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:38:53.410272  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:38:53.410296  543705 memory.go:184] no items to output this cycle
I0323 08:38:53.410309  543705 cpu.go:275] no items to output this cycle
E0323 08:39:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:39:03.409768  543705 memory.go:184] no items to output this cycle
I0323 08:39:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:39:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:39:13.409791  543705 memory.go:191] Add success.
I0323 08:39:13.409794  543705 cpu.go:282] Add success.
W0323 08:39:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:39:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:39:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:39:13.420233  543705 net.go:648] Add success.
I0323 08:39:13.423283  543705 net.go:770] primary dev: ETH0
I0323 08:39:13.423298  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:39:13.423311  543705 net.go:698] Add success.
I0323 08:39:13.736197  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a2597e0f-3ac1-4ad4-9572-4be4c357b261","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:39:13.736241  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:39:14.454690  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:39:14.454868  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:39:14.454878  543705 disk_worker.go:708] disk space is not compliant
W0323 08:39:14.454881  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:39:14.456249  543705 disk_worker.go:494] system disk:vda1
I0323 08:39:14.456291  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:39:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:39:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:39:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:39:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:39:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:39:23.409773  543705 memory.go:184] no items to output this cycle
I0323 08:39:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 08:39:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:39:33.409908  543705 memory.go:184] no items to output this cycle
I0323 08:39:33.409940  543705 cpu.go:275] no items to output this cycle
I0323 08:39:39.013673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:39:39.016326  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:39:39.016333  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0323 08:39:40.241730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:39:40.241736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:39:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:39:43.410689  543705 memory.go:191] Add success.
I0323 08:39:43.409810  543705 cpu.go:282] Add success.
I0323 08:39:43.420423  543705 net.go:648] Add success.
I0323 08:39:43.423022  543705 net.go:770] primary dev: ETH0
I0323 08:39:43.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:39:43.423052  543705 net.go:698] Add success.
I0323 08:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:39:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:39:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:39:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:39:53.409779  543705 cpu.go:275] no items to output this cycle
I0323 08:39:53.409788  543705 memory.go:184] no items to output this cycle
E0323 08:40:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:40:03.409775  543705 memory.go:184] no items to output this cycle
I0323 08:40:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 08:40:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:40:13.409792  543705 memory.go:191] Add success.
I0323 08:40:13.409796  543705 cpu.go:282] Add success.
W0323 08:40:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:40:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:40:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:40:13.420033  543705 net.go:648] Add success.
I0323 08:40:13.422926  543705 net.go:770] primary dev: ETH0
I0323 08:40:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:40:13.422967  543705 net.go:698] Add success.
I0323 08:40:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:40:14.455093  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:40:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 08:40:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:40:14.456582  543705 disk_worker.go:494] system disk:vda1
I0323 08:40:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:40:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:40:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:40:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:40:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:40:16.472404  543705 disk_local_worker.go:436] Get disk info: []
I0323 08:40:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 08:40:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:40:23.409796  543705 memory.go:184] no items to output this cycle
E0323 08:40:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:40:33.409813  543705 memory.go:184] no items to output this cycle
I0323 08:40:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 08:40:39.017679  543705 disk_info.go:125] begin check local disk info of client
I0323 08:40:39.020315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:40:39.020323  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046f280 0xc00046f2c0]
E0323 08:40:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:40:43.410706  543705 memory.go:191] Add success.
I0323 08:40:43.409822  543705 cpu.go:282] Add success.
I0323 08:40:43.420407  543705 net.go:648] Add success.
I0323 08:40:43.423208  543705 net.go:770] primary dev: ETH0
I0323 08:40:43.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:40:43.423236  543705 net.go:698] Add success.
I0323 08:40:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:40:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:40:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:40:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:40:53.409788  543705 memory.go:184] no items to output this cycle
I0323 08:40:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 08:41:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:41:03.409802  543705 memory.go:184] no items to output this cycle
I0323 08:41:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 08:41:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:41:13.409794  543705 cpu.go:282] Add success.
I0323 08:41:13.409809  543705 memory.go:191] Add success.
W0323 08:41:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:41:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:41:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:41:13.420590  543705 net.go:648] Add success.
I0323 08:41:13.423312  543705 net.go:770] primary dev: ETH0
I0323 08:41:13.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:41:13.423337  543705 net.go:698] Add success.
I0323 08:41:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:41:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:41:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 08:41:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:41:14.456530  543705 disk_worker.go:494] system disk:vda1
I0323 08:41:14.456574  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:41:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:41:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:41:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:41:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:41:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:41:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:41:23.409800  543705 memory.go:184] no items to output this cycle
I0323 08:41:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 08:41:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:41:33.409787  543705 memory.go:184] no items to output this cycle
I0323 08:41:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 08:41:39.021666  543705 disk_info.go:125] begin check local disk info of client
I0323 08:41:39.024338  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:41:39.024345  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491a40 0xc000491a80]
E0323 08:41:43.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:41:43.410807  543705 memory.go:191] Add success.
I0323 08:41:43.409882  543705 cpu.go:282] Add success.
I0323 08:41:43.419708  543705 net.go:648] Add success.
I0323 08:41:43.422265  543705 net.go:770] primary dev: ETH0
I0323 08:41:43.422277  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:41:43.422289  543705 net.go:698] Add success.
I0323 08:41:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:41:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:41:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:41:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:41:53.409781  543705 memory.go:184] no items to output this cycle
I0323 08:41:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 08:42:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:42:03.409773  543705 memory.go:184] no items to output this cycle
I0323 08:42:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 08:42:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:42:13.409812  543705 memory.go:191] Add success.
I0323 08:42:13.409811  543705 cpu.go:282] Add success.
W0323 08:42:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:42:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:42:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:42:13.420211  543705 net.go:648] Add success.
I0323 08:42:13.423062  543705 net.go:770] primary dev: ETH0
I0323 08:42:13.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:42:13.423091  543705 net.go:698] Add success.
I0323 08:42:13.468245  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"879c50d4-1447-44c6-9e46-0ec9fcb75f19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:42:13.468289  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 08:42:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:42:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 08:42:14.455209  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:42:14.455918  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:42:14.455927  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:42:14.455933  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:42:14.456599  543705 disk_worker.go:494] system disk:vda1
I0323 08:42:14.456629  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:42:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:42:15.456837  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:42:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:42:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:42:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:42:16.458023  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:42:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:42:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:42:23.409767  543705 memory.go:184] no items to output this cycle
I0323 08:42:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 08:42:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:42:33.409812  543705 memory.go:184] no items to output this cycle
I0323 08:42:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 08:42:39.025673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:42:39.028221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:42:39.028227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cec80 0xc0003cecc0]
I0323 08:42:40.242779  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:42:40.242785  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:42:43.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:42:43.410725  543705 memory.go:191] Add success.
I0323 08:42:43.409927  543705 cpu.go:282] Add success.
I0323 08:42:43.419711  543705 net.go:648] Add success.
I0323 08:42:43.422156  543705 net.go:770] primary dev: ETH0
I0323 08:42:43.422169  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:42:43.422181  543705 net.go:698] Add success.
I0323 08:42:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:42:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:42:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:42:53.409814  543705 memory.go:184] no items to output this cycle
I0323 08:42:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 08:43:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:43:03.409772  543705 memory.go:184] no items to output this cycle
I0323 08:43:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:43:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:43:13.409821  543705 memory.go:191] Add success.
I0323 08:43:13.409824  543705 cpu.go:282] Add success.
W0323 08:43:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:43:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:43:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:43:13.420258  543705 net.go:648] Add success.
I0323 08:43:13.422840  543705 net.go:770] primary dev: ETH0
I0323 08:43:13.422852  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:43:13.422864  543705 net.go:698] Add success.
I0323 08:43:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:43:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:43:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 08:43:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:43:14.456505  543705 disk_worker.go:494] system disk:vda1
I0323 08:43:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:43:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:43:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:43:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:43:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:43:23.409780  543705 memory.go:184] no items to output this cycle
I0323 08:43:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 08:43:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:43:33.409793  543705 memory.go:184] no items to output this cycle
I0323 08:43:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 08:43:39.029677  543705 disk_info.go:125] begin check local disk info of client
I0323 08:43:39.032251  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:43:39.032256  543705 disk_info.go:196] parse disk info done, disk is : [0xc000296080 0xc0002960c0]
E0323 08:43:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:43:43.410952  543705 memory.go:191] Add success.
I0323 08:43:43.409831  543705 cpu.go:282] Add success.
I0323 08:43:43.419762  543705 net.go:648] Add success.
I0323 08:43:43.422545  543705 net.go:770] primary dev: ETH0
I0323 08:43:43.422559  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:43:43.422570  543705 net.go:698] Add success.
I0323 08:43:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:43:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:43:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:43:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:43:53.409795  543705 memory.go:184] no items to output this cycle
I0323 08:43:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 08:44:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:44:03.409770  543705 memory.go:184] no items to output this cycle
I0323 08:44:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 08:44:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:44:13.409809  543705 memory.go:191] Add success.
I0323 08:44:13.409817  543705 cpu.go:282] Add success.
W0323 08:44:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:44:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:44:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:44:13.420116  543705 net.go:648] Add success.
I0323 08:44:13.422784  543705 net.go:770] primary dev: ETH0
I0323 08:44:13.422803  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:44:13.422817  543705 net.go:698] Add success.
I0323 08:44:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:44:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:44:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 08:44:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:44:14.456520  543705 disk_worker.go:494] system disk:vda1
I0323 08:44:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:44:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:44:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:44:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:44:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:44:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:44:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:44:23.409777  543705 memory.go:184] no items to output this cycle
I0323 08:44:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 08:44:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:44:33.409767  543705 memory.go:184] no items to output this cycle
I0323 08:44:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 08:44:39.033674  543705 disk_info.go:125] begin check local disk info of client
I0323 08:44:39.036227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:44:39.036234  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483a40 0xc000483a80]
E0323 08:44:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:44:43.410613  543705 memory.go:191] Add success.
I0323 08:44:43.409801  543705 cpu.go:282] Add success.
I0323 08:44:43.420373  543705 net.go:648] Add success.
I0323 08:44:43.423017  543705 net.go:770] primary dev: ETH0
I0323 08:44:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:44:43.423043  543705 net.go:698] Add success.
I0323 08:44:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:44:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:44:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:44:53.409804  543705 memory.go:184] no items to output this cycle
I0323 08:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:45:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:45:03.409802  543705 memory.go:184] no items to output this cycle
I0323 08:45:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 08:45:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:45:13.409785  543705 memory.go:191] Add success.
I0323 08:45:13.409802  543705 cpu.go:282] Add success.
W0323 08:45:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:45:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:45:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:45:13.420061  543705 net.go:648] Add success.
I0323 08:45:13.422918  543705 net.go:770] primary dev: ETH0
I0323 08:45:13.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:45:13.422945  543705 net.go:698] Add success.
I0323 08:45:13.464469  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0ca84707-3abf-4713-824c-ebcaddc2fadd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:45:13.464501  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:45:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:45:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:45:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0323 08:45:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:45:14.456675  543705 disk_worker.go:494] system disk:vda1
I0323 08:45:14.456704  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:45:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:45:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:45:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:45:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:45:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:45:23.409763  543705 memory.go:184] no items to output this cycle
I0323 08:45:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 08:45:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:45:33.409804  543705 memory.go:184] no items to output this cycle
I0323 08:45:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 08:45:39.037672  543705 disk_info.go:125] begin check local disk info of client
I0323 08:45:39.040231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:45:39.040237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049f600 0xc00049f640]
I0323 08:45:40.245728  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:45:40.245733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:45:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:45:43.410609  543705 memory.go:191] Add success.
I0323 08:45:43.409791  543705 cpu.go:282] Add success.
I0323 08:45:43.420306  543705 net.go:648] Add success.
I0323 08:45:43.423341  543705 net.go:770] primary dev: ETH0
I0323 08:45:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:45:43.423366  543705 net.go:698] Add success.
I0323 08:45:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:45:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:45:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:45:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:45:53.409798  543705 memory.go:184] no items to output this cycle
I0323 08:45:53.409866  543705 cpu.go:275] no items to output this cycle
E0323 08:46:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:46:03.409784  543705 memory.go:184] no items to output this cycle
I0323 08:46:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 08:46:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:46:13.409822  543705 memory.go:191] Add success.
I0323 08:46:13.409835  543705 cpu.go:282] Add success.
W0323 08:46:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:46:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:46:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:46:13.420146  543705 net.go:648] Add success.
I0323 08:46:13.423089  543705 net.go:770] primary dev: ETH0
I0323 08:46:13.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:46:13.423113  543705 net.go:698] Add success.
I0323 08:46:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:46:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:46:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 08:46:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:46:14.456551  543705 disk_worker.go:494] system disk:vda1
I0323 08:46:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:46:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:46:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:46:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:46:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:46:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:46:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:46:23.409793  543705 memory.go:184] no items to output this cycle
I0323 08:46:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:46:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:46:33.409779  543705 memory.go:184] no items to output this cycle
I0323 08:46:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 08:46:39.041674  543705 disk_info.go:125] begin check local disk info of client
I0323 08:46:39.044274  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:46:39.044280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004846c0 0xc000484700]
E0323 08:46:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:46:43.410686  543705 memory.go:191] Add success.
I0323 08:46:43.409821  543705 cpu.go:282] Add success.
I0323 08:46:43.420570  543705 net.go:648] Add success.
I0323 08:46:43.423413  543705 net.go:770] primary dev: ETH0
I0323 08:46:43.423427  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:46:43.423438  543705 net.go:698] Add success.
I0323 08:46:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:46:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:46:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:46:53.410663  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:46:53.410681  543705 memory.go:184] no items to output this cycle
I0323 08:46:53.410690  543705 cpu.go:275] no items to output this cycle
E0323 08:47:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:47:03.409799  543705 memory.go:184] no items to output this cycle
I0323 08:47:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 08:47:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:47:13.409816  543705 memory.go:191] Add success.
I0323 08:47:13.409817  543705 cpu.go:282] Add success.
W0323 08:47:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:47:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:47:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:47:13.420186  543705 net.go:648] Add success.
I0323 08:47:13.427939  543705 net.go:770] primary dev: ETH0
I0323 08:47:13.427953  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:47:13.427965  543705 net.go:698] Add success.
I0323 08:47:13.453484  543705 event_worker.go:152] Polling the log file for events...
W0323 08:47:14.455084  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:47:14.455143  543705 disk_worker.go:708] disk space is not compliant
W0323 08:47:14.455146  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:47:14.456887  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:47:14.456896  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:47:14.456903  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:47:14.456972  543705 disk_worker.go:494] system disk:vda1
I0323 08:47:14.457013  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:47:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:47:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:47:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:47:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:47:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:47:16.458016  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:47:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:47:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:47:23.409785  543705 memory.go:184] no items to output this cycle
I0323 08:47:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 08:47:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:47:33.409805  543705 memory.go:184] no items to output this cycle
I0323 08:47:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 08:47:39.045675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:47:39.048258  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:47:39.048265  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b100 0xc00007b140]
E0323 08:47:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:47:43.410827  543705 memory.go:191] Add success.
I0323 08:47:43.409809  543705 cpu.go:282] Add success.
I0323 08:47:43.420554  543705 net.go:648] Add success.
I0323 08:47:43.423387  543705 net.go:770] primary dev: ETH0
I0323 08:47:43.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:47:43.423412  543705 net.go:698] Add success.
I0323 08:47:46.457892  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:47:46.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:47:46.457987  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:47:53.410237  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:47:53.410253  543705 memory.go:184] no items to output this cycle
I0323 08:47:53.410303  543705 cpu.go:275] no items to output this cycle
E0323 08:48:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:48:03.409808  543705 memory.go:184] no items to output this cycle
I0323 08:48:03.409825  543705 cpu.go:275] no items to output this cycle
E0323 08:48:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:48:13.409809  543705 memory.go:191] Add success.
I0323 08:48:13.409814  543705 cpu.go:282] Add success.
W0323 08:48:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:48:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:48:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:48:13.420455  543705 net.go:648] Add success.
I0323 08:48:13.423104  543705 net.go:770] primary dev: ETH0
I0323 08:48:13.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:48:13.423128  543705 net.go:698] Add success.
I0323 08:48:13.467472  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"33a69627-0810-4e17-a625-8f7ff6b60e78","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:48:13.467503  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:48:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:48:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:48:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 08:48:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:48:14.456522  543705 disk_worker.go:494] system disk:vda1
I0323 08:48:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:48:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:48:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:48:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:48:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:48:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:48:23.409763  543705 memory.go:184] no items to output this cycle
I0323 08:48:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 08:48:33.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:48:33.409834  543705 memory.go:184] no items to output this cycle
I0323 08:48:33.409848  543705 cpu.go:275] no items to output this cycle
I0323 08:48:39.049678  543705 disk_info.go:125] begin check local disk info of client
I0323 08:48:39.052177  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:48:39.052184  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a7400 0xc0002a7440]
I0323 08:48:40.246779  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:48:40.246786  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:48:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:48:43.410672  543705 memory.go:191] Add success.
I0323 08:48:43.409802  543705 cpu.go:282] Add success.
I0323 08:48:43.420382  543705 net.go:648] Add success.
I0323 08:48:43.423145  543705 net.go:770] primary dev: ETH0
I0323 08:48:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:48:43.423175  543705 net.go:698] Add success.
I0323 08:48:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:48:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:48:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:48:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:48:53.409772  543705 memory.go:184] no items to output this cycle
I0323 08:48:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 08:49:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:49:03.409780  543705 cpu.go:275] no items to output this cycle
I0323 08:49:03.409782  543705 memory.go:184] no items to output this cycle
E0323 08:49:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:49:13.409798  543705 memory.go:191] Add success.
I0323 08:49:13.409801  543705 cpu.go:282] Add success.
W0323 08:49:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:49:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:49:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:49:13.420127  543705 net.go:648] Add success.
I0323 08:49:13.423103  543705 net.go:770] primary dev: ETH0
I0323 08:49:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:49:13.423289  543705 net.go:698] Add success.
I0323 08:49:14.454954  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:49:14.455116  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:49:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 08:49:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:49:14.456584  543705 disk_worker.go:494] system disk:vda1
I0323 08:49:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:49:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:49:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:49:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:49:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:49:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:49:23.410258  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:49:23.410274  543705 memory.go:184] no items to output this cycle
I0323 08:49:23.410276  543705 cpu.go:275] no items to output this cycle
E0323 08:49:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:49:33.409785  543705 memory.go:184] no items to output this cycle
I0323 08:49:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 08:49:39.053677  543705 disk_info.go:125] begin check local disk info of client
I0323 08:49:39.056158  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:49:39.056165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004604c0 0xc000460500]
E0323 08:49:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:49:43.410804  543705 memory.go:191] Add success.
I0323 08:49:43.409800  543705 cpu.go:282] Add success.
I0323 08:49:43.420513  543705 net.go:648] Add success.
I0323 08:49:43.423391  543705 net.go:770] primary dev: ETH0
I0323 08:49:43.423404  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:49:43.423418  543705 net.go:698] Add success.
I0323 08:49:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:49:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:49:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:49:53.410421  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:49:53.410440  543705 memory.go:184] no items to output this cycle
I0323 08:49:53.410451  543705 cpu.go:275] no items to output this cycle
E0323 08:50:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:50:03.409784  543705 memory.go:184] no items to output this cycle
I0323 08:50:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 08:50:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:50:13.409794  543705 memory.go:191] Add success.
I0323 08:50:13.409796  543705 cpu.go:282] Add success.
W0323 08:50:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:50:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:50:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:50:13.420043  543705 net.go:648] Add success.
I0323 08:50:13.422613  543705 net.go:770] primary dev: ETH0
I0323 08:50:13.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:50:13.422639  543705 net.go:698] Add success.
I0323 08:50:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:50:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:50:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 08:50:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:50:14.457076  543705 disk_worker.go:494] system disk:vda1
I0323 08:50:14.457109  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:50:15.456009  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:50:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:50:16.472485  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:50:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:50:23.409761  543705 memory.go:184] no items to output this cycle
I0323 08:50:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 08:50:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:50:33.409778  543705 memory.go:184] no items to output this cycle
I0323 08:50:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 08:50:39.057677  543705 disk_info.go:125] begin check local disk info of client
I0323 08:50:39.060199  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:50:39.060206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6680 0xc0003b66c0]
E0323 08:50:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:50:43.410625  543705 memory.go:191] Add success.
I0323 08:50:43.409804  543705 cpu.go:282] Add success.
I0323 08:50:43.420327  543705 net.go:648] Add success.
I0323 08:50:43.423146  543705 net.go:770] primary dev: ETH0
I0323 08:50:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:50:43.423177  543705 net.go:698] Add success.
I0323 08:50:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:50:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:50:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:50:53.409801  543705 memory.go:184] no items to output this cycle
I0323 08:50:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 08:51:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:51:03.409772  543705 memory.go:184] no items to output this cycle
I0323 08:51:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 08:51:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:51:13.409796  543705 memory.go:191] Add success.
I0323 08:51:13.409800  543705 cpu.go:282] Add success.
W0323 08:51:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:51:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:51:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:51:13.420106  543705 net.go:648] Add success.
I0323 08:51:13.422778  543705 net.go:770] primary dev: ETH0
I0323 08:51:13.422790  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:51:13.422803  543705 net.go:698] Add success.
I0323 08:51:13.593458  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d65fbf64-9cd6-4331-af1c-54aec54280bb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:51:13.593491  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:51:14.453976  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:51:14.454243  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:51:14.454252  543705 disk_worker.go:708] disk space is not compliant
W0323 08:51:14.454255  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:51:14.455756  543705 disk_worker.go:494] system disk:vda1
I0323 08:51:14.455792  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:51:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:51:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:51:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:51:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:51:16.472426  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:51:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:51:23.409770  543705 memory.go:184] no items to output this cycle
I0323 08:51:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 08:51:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:51:33.409812  543705 memory.go:184] no items to output this cycle
I0323 08:51:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 08:51:39.061675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:51:39.064284  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:51:39.064290  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6440 0xc0003b6480]
I0323 08:51:40.249720  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:51:40.249725  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:51:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:51:43.410679  543705 memory.go:191] Add success.
I0323 08:51:43.409815  543705 cpu.go:282] Add success.
I0323 08:51:43.420408  543705 net.go:648] Add success.
I0323 08:51:43.423034  543705 net.go:770] primary dev: ETH0
I0323 08:51:43.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:51:43.423060  543705 net.go:698] Add success.
I0323 08:51:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:51:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:51:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:51:53.409761  543705 memory.go:184] no items to output this cycle
I0323 08:51:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 08:52:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:52:03.409766  543705 memory.go:184] no items to output this cycle
I0323 08:52:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 08:52:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:52:13.409787  543705 memory.go:191] Add success.
I0323 08:52:13.409796  543705 cpu.go:282] Add success.
W0323 08:52:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:52:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:52:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:52:13.420237  543705 net.go:648] Add success.
I0323 08:52:13.423332  543705 net.go:770] primary dev: ETH0
I0323 08:52:13.423346  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:52:13.423361  543705 net.go:698] Add success.
W0323 08:52:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:52:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 08:52:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:52:14.455892  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:52:14.455901  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:52:14.455907  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:52:14.456566  543705 disk_worker.go:494] system disk:vda1
I0323 08:52:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:52:15.456810  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:52:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:52:16.457956  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:52:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:52:16.458008  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:52:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:52:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:52:23.410328  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:52:23.410347  543705 memory.go:184] no items to output this cycle
I0323 08:52:23.410365  543705 cpu.go:275] no items to output this cycle
E0323 08:52:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:52:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 08:52:33.409796  543705 memory.go:184] no items to output this cycle
I0323 08:52:39.065673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:52:39.068170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:52:39.068176  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
E0323 08:52:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:52:43.410673  543705 memory.go:191] Add success.
I0323 08:52:43.409822  543705 cpu.go:282] Add success.
I0323 08:52:43.420505  543705 net.go:648] Add success.
I0323 08:52:43.423362  543705 net.go:770] primary dev: ETH0
I0323 08:52:43.423375  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:52:43.423388  543705 net.go:698] Add success.
I0323 08:52:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:52:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:52:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:52:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:52:53.409800  543705 memory.go:184] no items to output this cycle
I0323 08:52:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:53:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:53:03.409796  543705 memory.go:184] no items to output this cycle
I0323 08:53:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:53:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:53:13.409799  543705 memory.go:191] Add success.
I0323 08:53:13.409802  543705 cpu.go:282] Add success.
W0323 08:53:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:53:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:53:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:53:13.420158  543705 net.go:648] Add success.
I0323 08:53:13.423190  543705 net.go:770] primary dev: ETH0
I0323 08:53:13.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:53:13.423216  543705 net.go:698] Add success.
I0323 08:53:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:53:14.455171  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:53:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 08:53:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:53:14.456582  543705 disk_worker.go:494] system disk:vda1
I0323 08:53:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:53:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:53:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:53:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:53:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:53:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:53:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:53:23.409792  543705 memory.go:184] no items to output this cycle
I0323 08:53:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 08:53:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:53:33.409880  543705 memory.go:184] no items to output this cycle
I0323 08:53:33.410062  543705 cpu.go:275] no items to output this cycle
I0323 08:53:39.069681  543705 disk_info.go:125] begin check local disk info of client
I0323 08:53:39.072237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:53:39.072243  543705 disk_info.go:196] parse disk info done, disk is : [0xc000533b40 0xc000533b80]
E0323 08:53:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:53:43.410649  543705 memory.go:191] Add success.
I0323 08:53:43.409818  543705 cpu.go:282] Add success.
I0323 08:53:43.420386  543705 net.go:648] Add success.
I0323 08:53:43.423021  543705 net.go:770] primary dev: ETH0
I0323 08:53:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:53:43.423051  543705 net.go:698] Add success.
I0323 08:53:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:53:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:53:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:53:53.410188  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:53:53.410203  543705 memory.go:184] no items to output this cycle
I0323 08:53:53.410233  543705 cpu.go:275] no items to output this cycle
E0323 08:54:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:54:03.409798  543705 memory.go:184] no items to output this cycle
I0323 08:54:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 08:54:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:54:13.409813  543705 memory.go:191] Add success.
I0323 08:54:13.409817  543705 cpu.go:282] Add success.
W0323 08:54:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:54:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:54:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:54:13.420154  543705 net.go:648] Add success.
I0323 08:54:13.423339  543705 net.go:770] primary dev: ETH0
I0323 08:54:13.423363  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:54:13.423375  543705 net.go:698] Add success.
I0323 08:54:13.468632  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6395cc4f-6251-4fe4-9eb6-8b87ff49cf26","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:54:13.468666  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 08:54:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:54:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:54:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 08:54:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:54:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 08:54:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:54:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:54:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:54:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:54:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:54:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:54:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:54:23.409796  543705 memory.go:184] no items to output this cycle
I0323 08:54:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 08:54:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:54:33.409805  543705 memory.go:184] no items to output this cycle
I0323 08:54:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 08:54:39.073676  543705 disk_info.go:125] begin check local disk info of client
I0323 08:54:39.076248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:54:39.076255  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352000 0xc000352040]
I0323 08:54:40.250777  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:54:40.250783  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:54:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:54:43.410665  543705 memory.go:191] Add success.
I0323 08:54:43.409816  543705 cpu.go:282] Add success.
I0323 08:54:43.420358  543705 net.go:648] Add success.
I0323 08:54:43.423553  543705 net.go:770] primary dev: ETH0
I0323 08:54:43.423566  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:54:43.423579  543705 net.go:698] Add success.
I0323 08:54:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:54:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:54:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:54:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:54:53.409784  543705 cpu.go:275] no items to output this cycle
I0323 08:54:53.409786  543705 memory.go:184] no items to output this cycle
E0323 08:55:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:55:03.409780  543705 memory.go:184] no items to output this cycle
I0323 08:55:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 08:55:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:55:13.409784  543705 memory.go:191] Add success.
W0323 08:55:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:55:13.409815  543705 cpu.go:282] Add success.
W0323 08:55:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:55:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:55:13.420091  543705 net.go:648] Add success.
I0323 08:55:13.423044  543705 net.go:770] primary dev: ETH0
I0323 08:55:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:55:13.423073  543705 net.go:698] Add success.
I0323 08:55:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:55:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:55:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 08:55:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:55:14.456584  543705 disk_worker.go:494] system disk:vda1
I0323 08:55:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:55:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:55:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:55:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:55:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:55:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:55:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:55:23.409769  543705 memory.go:184] no items to output this cycle
I0323 08:55:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 08:55:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:55:33.409791  543705 memory.go:184] no items to output this cycle
I0323 08:55:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 08:55:39.077675  543705 disk_info.go:125] begin check local disk info of client
I0323 08:55:39.080292  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:55:39.080299  543705 disk_info.go:196] parse disk info done, disk is : [0xc000493e00 0xc000493e40]
E0323 08:55:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:55:43.410818  543705 memory.go:191] Add success.
I0323 08:55:43.409816  543705 cpu.go:282] Add success.
I0323 08:55:43.420528  543705 net.go:648] Add success.
I0323 08:55:43.423372  543705 net.go:770] primary dev: ETH0
I0323 08:55:43.423387  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:55:43.423402  543705 net.go:698] Add success.
I0323 08:55:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:55:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:55:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:55:53.410227  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:55:53.410241  543705 memory.go:184] no items to output this cycle
I0323 08:55:53.410263  543705 cpu.go:275] no items to output this cycle
E0323 08:56:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:56:03.409796  543705 memory.go:184] no items to output this cycle
I0323 08:56:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 08:56:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:56:13.409813  543705 memory.go:191] Add success.
I0323 08:56:13.409816  543705 cpu.go:282] Add success.
W0323 08:56:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:56:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:56:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:56:13.420387  543705 net.go:648] Add success.
I0323 08:56:13.423288  543705 net.go:770] primary dev: ETH0
I0323 08:56:13.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:56:13.423314  543705 net.go:698] Add success.
I0323 08:56:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:56:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:56:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 08:56:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:56:14.456609  543705 disk_worker.go:494] system disk:vda1
I0323 08:56:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:56:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:56:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:56:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:56:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:56:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:56:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:56:23.409801  543705 memory.go:184] no items to output this cycle
I0323 08:56:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 08:56:33.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:56:33.409924  543705 memory.go:184] no items to output this cycle
I0323 08:56:33.410087  543705 cpu.go:275] no items to output this cycle
I0323 08:56:39.081673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:56:39.084300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:56:39.084309  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a1f40 0xc000352000]
E0323 08:56:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:56:43.410607  543705 memory.go:191] Add success.
I0323 08:56:43.409805  543705 cpu.go:282] Add success.
I0323 08:56:43.420343  543705 net.go:648] Add success.
I0323 08:56:43.422811  543705 net.go:770] primary dev: ETH0
I0323 08:56:43.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:56:43.422836  543705 net.go:698] Add success.
I0323 08:56:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:56:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:56:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:56:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:56:53.409774  543705 memory.go:184] no items to output this cycle
I0323 08:56:53.409783  543705 cpu.go:275] no items to output this cycle
E0323 08:57:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:57:03.409784  543705 memory.go:184] no items to output this cycle
I0323 08:57:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 08:57:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:57:13.409821  543705 memory.go:191] Add success.
I0323 08:57:13.409829  543705 cpu.go:282] Add success.
W0323 08:57:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:57:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:57:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:57:13.420186  543705 net.go:648] Add success.
I0323 08:57:13.423182  543705 net.go:770] primary dev: ETH0
I0323 08:57:13.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:57:13.423206  543705 net.go:698] Add success.
I0323 08:57:13.429589  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 08:57:13.452772  543705 event_worker.go:152] Polling the log file for events...
I0323 08:57:13.871783  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ca0c9a76-9c3b-4be2-ac36-53f0adffff86","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 08:57:13.871816  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 08:57:14.454944  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:57:14.454958  543705 disk_worker.go:708] disk space is not compliant
W0323 08:57:14.454962  543705 disk_worker.go:728] disk inode is not compliant
E0323 08:57:14.455899  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 08:57:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 08:57:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0323 08:57:14.456733  543705 disk_worker.go:494] system disk:vda1
I0323 08:57:14.456766  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 08:57:15.456829  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 08:57:15.456836  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:57:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 08:57:16.457961  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 08:57:16.458001  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:57:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:57:16.472343  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:57:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:57:23.409796  543705 memory.go:184] no items to output this cycle
I0323 08:57:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 08:57:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:57:33.409798  543705 memory.go:184] no items to output this cycle
I0323 08:57:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 08:57:39.085680  543705 disk_info.go:125] begin check local disk info of client
I0323 08:57:39.088247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:57:39.088254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6000 0xc0003b6040]
I0323 08:57:40.253721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 08:57:40.253727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 08:57:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:57:43.410639  543705 memory.go:191] Add success.
I0323 08:57:43.409791  543705 cpu.go:282] Add success.
I0323 08:57:43.420322  543705 net.go:648] Add success.
I0323 08:57:43.422868  543705 net.go:770] primary dev: ETH0
I0323 08:57:43.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:57:43.422895  543705 net.go:698] Add success.
I0323 08:57:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:57:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:57:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:57:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:57:53.409796  543705 memory.go:184] no items to output this cycle
I0323 08:57:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 08:58:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:58:03.409812  543705 memory.go:184] no items to output this cycle
I0323 08:58:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 08:58:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:58:13.409793  543705 memory.go:191] Add success.
I0323 08:58:13.409795  543705 cpu.go:282] Add success.
W0323 08:58:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 08:58:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:58:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:58:13.420594  543705 net.go:648] Add success.
I0323 08:58:13.423577  543705 net.go:770] primary dev: ETH0
I0323 08:58:13.423590  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:58:13.423602  543705 net.go:698] Add success.
I0323 08:58:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:58:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:58:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 08:58:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:58:14.456611  543705 disk_worker.go:494] system disk:vda1
I0323 08:58:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:58:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:58:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:58:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:58:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:58:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:58:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:58:23.409856  543705 memory.go:184] no items to output this cycle
I0323 08:58:23.409919  543705 cpu.go:275] no items to output this cycle
E0323 08:58:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:58:33.409823  543705 memory.go:184] no items to output this cycle
I0323 08:58:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 08:58:39.089678  543705 disk_info.go:125] begin check local disk info of client
I0323 08:58:39.092357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:58:39.092365  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b6000 0xc0003b6040]
E0323 08:58:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:58:43.410732  543705 memory.go:191] Add success.
I0323 08:58:43.409804  543705 cpu.go:282] Add success.
I0323 08:58:43.420494  543705 net.go:648] Add success.
I0323 08:58:43.423430  543705 net.go:770] primary dev: ETH0
I0323 08:58:43.423444  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:58:43.423459  543705 net.go:698] Add success.
I0323 08:58:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:58:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:58:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:58:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:58:53.409768  543705 memory.go:184] no items to output this cycle
I0323 08:58:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 08:59:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:59:03.409768  543705 memory.go:184] no items to output this cycle
I0323 08:59:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 08:59:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:59:13.409793  543705 memory.go:191] Add success.
W0323 08:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 08:59:13.409824  543705 cpu.go:282] Add success.
W0323 08:59:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 08:59:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 08:59:13.420165  543705 net.go:648] Add success.
I0323 08:59:13.422825  543705 net.go:770] primary dev: ETH0
I0323 08:59:13.422838  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:59:13.422849  543705 net.go:698] Add success.
I0323 08:59:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 08:59:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 08:59:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 08:59:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 08:59:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 08:59:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 08:59:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 08:59:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:59:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:59:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 08:59:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 08:59:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:59:23.409793  543705 memory.go:184] no items to output this cycle
I0323 08:59:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 08:59:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:59:33.409922  543705 cpu.go:275] no items to output this cycle
I0323 08:59:33.409923  543705 memory.go:184] no items to output this cycle
I0323 08:59:39.093673  543705 disk_info.go:125] begin check local disk info of client
I0323 08:59:39.096205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 08:59:39.096212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 08:59:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:59:43.410582  543705 memory.go:191] Add success.
I0323 08:59:43.409826  543705 cpu.go:282] Add success.
I0323 08:59:43.420260  543705 net.go:648] Add success.
I0323 08:59:43.423239  543705 net.go:770] primary dev: ETH0
I0323 08:59:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0323 08:59:43.423265  543705 net.go:698] Add success.
I0323 08:59:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 08:59:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 08:59:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 08:59:53.410415  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 08:59:53.410430  543705 memory.go:184] no items to output this cycle
I0323 08:59:53.410456  543705 cpu.go:275] no items to output this cycle
E0323 09:00:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:00:03.409791  543705 memory.go:184] no items to output this cycle
I0323 09:00:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 09:00:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:00:13.409781  543705 memory.go:191] Add success.
I0323 09:00:13.409805  543705 cpu.go:282] Add success.
W0323 09:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:00:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:00:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:00:13.420433  543705 net.go:648] Add success.
I0323 09:00:13.423320  543705 net.go:770] primary dev: ETH0
I0323 09:00:13.423335  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:00:13.423350  543705 net.go:698] Add success.
I0323 09:00:14.001890  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a48d7e6b-1dae-4b35-a738-b2dbbd7ae137","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:00:14.001926  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:00:14.454676  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:00:14.454932  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:00:14.454945  543705 disk_worker.go:708] disk space is not compliant
W0323 09:00:14.454949  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:00:14.456518  543705 disk_worker.go:494] system disk:vda1
I0323 09:00:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:00:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:00:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:00:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:00:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:00:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:00:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:00:23.409792  543705 memory.go:184] no items to output this cycle
I0323 09:00:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 09:00:33.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:00:33.409926  543705 cpu.go:275] no items to output this cycle
I0323 09:00:33.410008  543705 memory.go:184] no items to output this cycle
I0323 09:00:39.097678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:00:39.100200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:00:39.100206  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004cdf00 0xc0004cdf40]
I0323 09:00:40.254784  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:00:40.254790  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:00:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:00:43.410909  543705 memory.go:191] Add success.
I0323 09:00:43.409819  543705 cpu.go:282] Add success.
I0323 09:00:43.420638  543705 net.go:648] Add success.
I0323 09:00:43.423439  543705 net.go:770] primary dev: ETH0
I0323 09:00:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:00:43.423469  543705 net.go:698] Add success.
I0323 09:00:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:00:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:00:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:00:53.410395  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:00:53.410414  543705 memory.go:184] no items to output this cycle
I0323 09:00:53.410427  543705 cpu.go:275] no items to output this cycle
E0323 09:01:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:01:03.409762  543705 memory.go:184] no items to output this cycle
I0323 09:01:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 09:01:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:01:13.409787  543705 memory.go:191] Add success.
W0323 09:01:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 09:01:13.409816  543705 cpu.go:282] Add success.
W0323 09:01:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:01:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:01:13.420202  543705 net.go:648] Add success.
I0323 09:01:13.423260  543705 net.go:770] primary dev: ETH0
I0323 09:01:13.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:01:13.423285  543705 net.go:698] Add success.
I0323 09:01:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:01:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:01:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 09:01:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:01:14.456569  543705 disk_worker.go:494] system disk:vda1
I0323 09:01:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:01:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:01:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:01:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:01:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:01:23.409777  543705 memory.go:184] no items to output this cycle
I0323 09:01:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 09:01:33.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:01:33.409899  543705 cpu.go:275] no items to output this cycle
I0323 09:01:33.409907  543705 memory.go:184] no items to output this cycle
I0323 09:01:39.101677  543705 disk_info.go:125] begin check local disk info of client
I0323 09:01:39.104221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:01:39.104227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 09:01:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:01:43.410860  543705 memory.go:191] Add success.
I0323 09:01:43.409814  543705 cpu.go:282] Add success.
I0323 09:01:43.420634  543705 net.go:648] Add success.
I0323 09:01:43.423390  543705 net.go:770] primary dev: ETH0
I0323 09:01:43.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:01:43.423441  543705 net.go:698] Add success.
I0323 09:01:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:01:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:01:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:01:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:01:53.409798  543705 memory.go:184] no items to output this cycle
I0323 09:01:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 09:02:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:02:03.409774  543705 memory.go:184] no items to output this cycle
I0323 09:02:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 09:02:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:02:13.409812  543705 memory.go:191] Add success.
I0323 09:02:13.409814  543705 cpu.go:282] Add success.
W0323 09:02:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:02:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:02:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:02:13.420226  543705 net.go:648] Add success.
I0323 09:02:13.423010  543705 net.go:770] primary dev: ETH0
I0323 09:02:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:02:13.423035  543705 net.go:698] Add success.
W0323 09:02:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:02:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 09:02:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:02:14.456920  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:02:14.456929  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:02:14.456935  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:02:14.457008  543705 disk_worker.go:494] system disk:vda1
I0323 09:02:14.457040  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:02:15.456853  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:02:15.456861  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:02:16.457937  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:02:16.457937  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:02:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:02:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:02:16.472346  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:02:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:02:23.409765  543705 memory.go:184] no items to output this cycle
I0323 09:02:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:02:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:02:33.409816  543705 memory.go:184] no items to output this cycle
I0323 09:02:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 09:02:39.105680  543705 disk_info.go:125] begin check local disk info of client
I0323 09:02:39.108242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:02:39.108249  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c1580 0xc0003c15c0]
E0323 09:02:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:02:43.410644  543705 memory.go:191] Add success.
I0323 09:02:43.409805  543705 cpu.go:282] Add success.
I0323 09:02:43.420411  543705 net.go:648] Add success.
I0323 09:02:43.423196  543705 net.go:770] primary dev: ETH0
I0323 09:02:43.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:02:43.423222  543705 net.go:698] Add success.
I0323 09:02:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:02:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:02:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:02:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:02:53.409800  543705 memory.go:184] no items to output this cycle
I0323 09:02:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 09:03:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:03:03.409776  543705 memory.go:184] no items to output this cycle
I0323 09:03:03.409777  543705 cpu.go:275] no items to output this cycle
E0323 09:03:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:03:13.409783  543705 memory.go:191] Add success.
W0323 09:03:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 09:03:13.409816  543705 cpu.go:282] Add success.
W0323 09:03:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:03:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:03:13.420302  543705 net.go:648] Add success.
I0323 09:03:13.423133  543705 net.go:770] primary dev: ETH0
I0323 09:03:13.423145  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:03:13.423157  543705 net.go:698] Add success.
I0323 09:03:13.470004  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1debd8bc-acb9-49d6-9b2c-9265b3705bfc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:03:13.470038  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:03:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:03:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:03:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 09:03:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:03:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 09:03:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:03:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:03:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:03:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:03:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:03:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:03:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:03:23.409798  543705 memory.go:184] no items to output this cycle
I0323 09:03:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 09:03:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:03:33.409792  543705 memory.go:184] no items to output this cycle
I0323 09:03:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 09:03:39.109667  543705 disk_info.go:125] begin check local disk info of client
I0323 09:03:39.112282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:03:39.112289  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491580 0xc0004915c0]
I0323 09:03:40.257724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:03:40.257731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:03:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:03:43.410703  543705 memory.go:191] Add success.
I0323 09:03:43.409798  543705 cpu.go:282] Add success.
I0323 09:03:43.420610  543705 net.go:648] Add success.
I0323 09:03:43.423656  543705 net.go:770] primary dev: ETH0
I0323 09:03:43.423669  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:03:43.423681  543705 net.go:698] Add success.
I0323 09:03:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:03:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:03:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:03:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:03:53.409777  543705 memory.go:184] no items to output this cycle
I0323 09:03:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 09:04:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:04:03.409806  543705 memory.go:184] no items to output this cycle
I0323 09:04:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 09:04:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:04:13.409776  543705 memory.go:191] Add success.
W0323 09:04:13.409802  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 09:04:13.409808  543705 cpu.go:282] Add success.
W0323 09:04:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:04:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:04:13.420144  543705 net.go:648] Add success.
I0323 09:04:13.422845  543705 net.go:770] primary dev: ETH0
I0323 09:04:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:04:13.422871  543705 net.go:698] Add success.
I0323 09:04:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:04:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:04:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 09:04:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:04:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 09:04:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:04:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:04:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:04:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:04:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:04:23.409782  543705 memory.go:184] no items to output this cycle
I0323 09:04:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 09:04:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:04:33.409795  543705 memory.go:184] no items to output this cycle
I0323 09:04:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 09:04:39.113703  543705 disk_info.go:125] begin check local disk info of client
I0323 09:04:39.116487  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:04:39.116495  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0000 0xc0002b0040]
E0323 09:04:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:04:43.410622  543705 memory.go:191] Add success.
I0323 09:04:43.409819  543705 cpu.go:282] Add success.
I0323 09:04:43.420373  543705 net.go:648] Add success.
I0323 09:04:43.423544  543705 net.go:770] primary dev: ETH0
I0323 09:04:43.423556  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:04:43.423568  543705 net.go:698] Add success.
I0323 09:04:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:04:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:04:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:04:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:04:53.409802  543705 memory.go:184] no items to output this cycle
I0323 09:04:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 09:05:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:05:03.409774  543705 memory.go:184] no items to output this cycle
I0323 09:05:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 09:05:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:05:13.409806  543705 memory.go:191] Add success.
I0323 09:05:13.409808  543705 cpu.go:282] Add success.
W0323 09:05:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:05:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:05:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:05:13.420079  543705 net.go:648] Add success.
I0323 09:05:13.423117  543705 net.go:770] primary dev: ETH0
I0323 09:05:13.423131  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:05:13.423145  543705 net.go:698] Add success.
I0323 09:05:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:05:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:05:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 09:05:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:05:14.456525  543705 disk_worker.go:494] system disk:vda1
I0323 09:05:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:05:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:05:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:05:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:05:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:05:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:05:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:05:23.409802  543705 memory.go:184] no items to output this cycle
I0323 09:05:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:05:33.409929  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:05:33.409956  543705 memory.go:184] no items to output this cycle
I0323 09:05:33.409967  543705 cpu.go:275] no items to output this cycle
I0323 09:05:39.117674  543705 disk_info.go:125] begin check local disk info of client
I0323 09:05:39.120291  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:05:39.120297  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 09:05:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:05:43.410645  543705 memory.go:191] Add success.
I0323 09:05:43.409815  543705 cpu.go:282] Add success.
I0323 09:05:43.420406  543705 net.go:648] Add success.
I0323 09:05:43.422985  543705 net.go:770] primary dev: ETH0
I0323 09:05:43.422998  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:05:43.423010  543705 net.go:698] Add success.
I0323 09:05:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:05:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:05:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:05:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:05:53.409796  543705 memory.go:184] no items to output this cycle
I0323 09:05:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 09:06:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:06:03.409783  543705 memory.go:184] no items to output this cycle
I0323 09:06:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 09:06:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:06:13.409788  543705 memory.go:191] Add success.
I0323 09:06:13.409809  543705 cpu.go:282] Add success.
W0323 09:06:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:06:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:06:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:06:13.420058  543705 net.go:648] Add success.
I0323 09:06:13.422802  543705 net.go:770] primary dev: ETH0
I0323 09:06:13.422815  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:06:13.422828  543705 net.go:698] Add success.
I0323 09:06:13.752146  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ab8a24f-fd5c-4ccc-b4b8-9d5f3305884b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:06:13.752190  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:06:14.454705  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:06:14.454961  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:06:14.454971  543705 disk_worker.go:708] disk space is not compliant
W0323 09:06:14.454974  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:06:14.456396  543705 disk_worker.go:494] system disk:vda1
I0323 09:06:14.456430  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:06:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:06:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:06:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:06:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:06:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:06:23.409773  543705 memory.go:184] no items to output this cycle
I0323 09:06:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 09:06:33.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:06:33.409846  543705 memory.go:184] no items to output this cycle
I0323 09:06:33.409856  543705 cpu.go:275] no items to output this cycle
I0323 09:06:39.121677  543705 disk_info.go:125] begin check local disk info of client
I0323 09:06:39.124282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:06:39.124289  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0323 09:06:40.261725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:06:40.261731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:06:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:06:43.410847  543705 memory.go:191] Add success.
I0323 09:06:43.409825  543705 cpu.go:282] Add success.
I0323 09:06:43.420602  543705 net.go:648] Add success.
I0323 09:06:43.424125  543705 net.go:770] primary dev: ETH0
I0323 09:06:43.424140  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:06:43.424154  543705 net.go:698] Add success.
I0323 09:06:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:06:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:06:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:06:53.410236  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:06:53.410252  543705 memory.go:184] no items to output this cycle
I0323 09:06:53.410288  543705 cpu.go:275] no items to output this cycle
E0323 09:07:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:07:03.409803  543705 memory.go:184] no items to output this cycle
I0323 09:07:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 09:07:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:07:13.409791  543705 memory.go:191] Add success.
I0323 09:07:13.409820  543705 cpu.go:282] Add success.
W0323 09:07:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:07:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:07:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:07:13.420600  543705 net.go:648] Add success.
I0323 09:07:13.423505  543705 net.go:770] primary dev: ETH0
I0323 09:07:13.423517  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:07:13.423531  543705 net.go:698] Add success.
I0323 09:07:13.453086  543705 event_worker.go:152] Polling the log file for events...
W0323 09:07:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:07:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 09:07:14.455175  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:07:14.456882  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:07:14.456891  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:07:14.456896  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:07:14.456969  543705 disk_worker.go:494] system disk:vda1
I0323 09:07:14.457011  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:07:15.456818  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:07:15.456826  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:07:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:07:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:07:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:07:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:07:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:07:23.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:07:23.410274  543705 memory.go:184] no items to output this cycle
I0323 09:07:23.410290  543705 cpu.go:275] no items to output this cycle
E0323 09:07:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:07:33.409802  543705 memory.go:184] no items to output this cycle
I0323 09:07:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 09:07:39.125673  543705 disk_info.go:125] begin check local disk info of client
I0323 09:07:39.128270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:07:39.128277  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0c80 0xc0004a0cc0]
E0323 09:07:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:07:43.410901  543705 memory.go:191] Add success.
I0323 09:07:43.409798  543705 cpu.go:282] Add success.
I0323 09:07:43.420670  543705 net.go:648] Add success.
I0323 09:07:43.423909  543705 net.go:770] primary dev: ETH0
I0323 09:07:43.423921  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:07:43.423934  543705 net.go:698] Add success.
I0323 09:07:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:07:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:07:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:07:53.409766  543705 memory.go:184] no items to output this cycle
I0323 09:07:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 09:08:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:08:03.409800  543705 memory.go:184] no items to output this cycle
I0323 09:08:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 09:08:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:08:13.409779  543705 memory.go:191] Add success.
I0323 09:08:13.409801  543705 cpu.go:282] Add success.
W0323 09:08:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:08:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:08:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:08:13.420198  543705 net.go:648] Add success.
I0323 09:08:13.423079  543705 net.go:770] primary dev: ETH0
I0323 09:08:13.423092  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:08:13.423104  543705 net.go:698] Add success.
I0323 09:08:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:08:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:08:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 09:08:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:08:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 09:08:14.456625  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:08:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:08:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:08:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:08:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:08:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:08:23.409803  543705 memory.go:184] no items to output this cycle
I0323 09:08:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:08:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:08:33.409824  543705 memory.go:184] no items to output this cycle
I0323 09:08:33.409835  543705 cpu.go:275] no items to output this cycle
I0323 09:08:39.129674  543705 disk_info.go:125] begin check local disk info of client
I0323 09:08:39.132225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:08:39.132231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f4100 0xc0001f4140]
E0323 09:08:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:08:43.410557  543705 memory.go:191] Add success.
I0323 09:08:43.409827  543705 cpu.go:282] Add success.
I0323 09:08:43.420251  543705 net.go:648] Add success.
I0323 09:08:43.422892  543705 net.go:770] primary dev: ETH0
I0323 09:08:43.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:08:43.422935  543705 net.go:698] Add success.
I0323 09:08:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:08:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:08:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:08:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:08:53.409770  543705 memory.go:184] no items to output this cycle
I0323 09:08:53.409774  543705 cpu.go:275] no items to output this cycle
E0323 09:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:09:03.409781  543705 memory.go:184] no items to output this cycle
I0323 09:09:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 09:09:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:09:13.409796  543705 memory.go:191] Add success.
I0323 09:09:13.409797  543705 cpu.go:282] Add success.
W0323 09:09:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:09:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:09:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:09:13.420059  543705 net.go:648] Add success.
I0323 09:09:13.423057  543705 net.go:770] primary dev: ETH0
I0323 09:09:13.423076  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:09:13.423094  543705 net.go:698] Add success.
I0323 09:09:13.468372  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3d3c9fcc-29f3-49e3-985a-5e148d4ac3dc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:09:13.468406  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:09:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:09:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:09:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0323 09:09:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:09:14.456719  543705 disk_worker.go:494] system disk:vda1
I0323 09:09:14.456756  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:09:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:09:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:09:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:09:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:09:16.472415  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:09:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:09:23.409784  543705 cpu.go:275] no items to output this cycle
I0323 09:09:23.409786  543705 memory.go:184] no items to output this cycle
E0323 09:09:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:09:33.409808  543705 memory.go:184] no items to output this cycle
I0323 09:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 09:09:39.133675  543705 disk_info.go:125] begin check local disk info of client
I0323 09:09:39.136288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:09:39.136294  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f5840 0xc0001f5880]
I0323 09:09:40.262787  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:09:40.262793  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:09:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:09:43.410666  543705 memory.go:191] Add success.
I0323 09:09:43.409822  543705 cpu.go:282] Add success.
I0323 09:09:43.420383  543705 net.go:648] Add success.
I0323 09:09:43.423191  543705 net.go:770] primary dev: ETH0
I0323 09:09:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:09:43.423222  543705 net.go:698] Add success.
I0323 09:09:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:09:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:09:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:09:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:09:53.409782  543705 cpu.go:275] no items to output this cycle
I0323 09:09:53.409791  543705 memory.go:184] no items to output this cycle
E0323 09:10:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:10:03.409814  543705 memory.go:184] no items to output this cycle
I0323 09:10:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 09:10:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:10:13.409817  543705 memory.go:191] Add success.
I0323 09:10:13.409819  543705 cpu.go:282] Add success.
W0323 09:10:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:10:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:10:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:10:13.420234  543705 net.go:648] Add success.
I0323 09:10:13.423400  543705 net.go:770] primary dev: ETH0
I0323 09:10:13.423412  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:10:13.423423  543705 net.go:698] Add success.
I0323 09:10:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:10:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:10:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0323 09:10:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:10:14.456632  543705 disk_worker.go:494] system disk:vda1
I0323 09:10:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:10:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:10:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:10:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:10:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:10:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:10:23.409779  543705 memory.go:184] no items to output this cycle
I0323 09:10:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 09:10:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:10:33.409783  543705 memory.go:184] no items to output this cycle
I0323 09:10:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 09:10:39.137675  543705 disk_info.go:125] begin check local disk info of client
I0323 09:10:39.140311  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:10:39.140318  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b60c0 0xc0003b6100]
E0323 09:10:43.409741  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:10:43.410689  543705 memory.go:191] Add success.
I0323 09:10:43.409805  543705 cpu.go:282] Add success.
I0323 09:10:43.420226  543705 net.go:770] primary dev: ETH0
I0323 09:10:43.420240  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:10:43.420253  543705 net.go:698] Add success.
I0323 09:10:43.420596  543705 net.go:648] Add success.
I0323 09:10:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:10:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:10:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:10:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:10:53.409777  543705 memory.go:184] no items to output this cycle
I0323 09:10:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 09:11:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:11:03.409796  543705 memory.go:184] no items to output this cycle
I0323 09:11:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 09:11:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:11:13.409786  543705 memory.go:191] Add success.
W0323 09:11:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:11:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:11:13.409825  543705 cpu.go:282] Add success.
I0323 09:11:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:11:13.420177  543705 net.go:648] Add success.
I0323 09:11:13.422936  543705 net.go:770] primary dev: ETH0
I0323 09:11:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:11:13.422969  543705 net.go:698] Add success.
I0323 09:11:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:11:14.455311  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:11:14.455400  543705 disk_worker.go:708] disk space is not compliant
W0323 09:11:14.455404  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:11:14.457062  543705 disk_worker.go:494] system disk:vda1
I0323 09:11:14.457090  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:11:15.456020  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:11:16.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:11:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:11:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:11:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:11:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:11:23.409785  543705 memory.go:184] no items to output this cycle
I0323 09:11:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 09:11:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:11:33.409795  543705 memory.go:184] no items to output this cycle
I0323 09:11:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 09:11:39.141675  543705 disk_info.go:125] begin check local disk info of client
I0323 09:11:39.144319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:11:39.144325  543705 disk_info.go:196] parse disk info done, disk is : [0xc000260900 0xc000260940]
E0323 09:11:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:11:43.410771  543705 memory.go:191] Add success.
I0323 09:11:43.409785  543705 cpu.go:282] Add success.
I0323 09:11:43.420484  543705 net.go:648] Add success.
I0323 09:11:43.423022  543705 net.go:770] primary dev: ETH0
I0323 09:11:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:11:43.423059  543705 net.go:698] Add success.
I0323 09:11:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:11:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:11:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:11:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:11:53.409792  543705 memory.go:184] no items to output this cycle
I0323 09:11:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 09:12:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:12:03.409782  543705 memory.go:184] no items to output this cycle
I0323 09:12:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 09:12:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:12:13.409776  543705 memory.go:191] Add success.
W0323 09:12:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 09:12:13.409802  543705 cpu.go:282] Add success.
W0323 09:12:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:12:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:12:13.420306  543705 net.go:648] Add success.
I0323 09:12:13.423156  543705 net.go:770] primary dev: ETH0
I0323 09:12:13.423170  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:12:13.423181  543705 net.go:698] Add success.
I0323 09:12:13.623064  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ca7c1359-1be4-41b9-b05e-2b7baf2ae323","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:12:13.623106  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 09:12:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:12:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 09:12:14.455205  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:12:14.455957  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:12:14.455966  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:12:14.455971  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:12:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 09:12:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:12:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:12:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:12:16.458019  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:12:16.458019  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:12:16.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:12:16.458110  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:12:16.472462  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:12:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:12:23.409781  543705 memory.go:184] no items to output this cycle
I0323 09:12:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 09:12:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:12:33.409787  543705 memory.go:184] no items to output this cycle
I0323 09:12:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 09:12:39.145677  543705 disk_info.go:125] begin check local disk info of client
I0323 09:12:39.148374  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:12:39.148380  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c47c0 0xc0000c4800]
I0323 09:12:40.265725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:12:40.265731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:12:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:12:43.410812  543705 memory.go:191] Add success.
I0323 09:12:43.409804  543705 cpu.go:282] Add success.
I0323 09:12:43.420500  543705 net.go:648] Add success.
I0323 09:12:43.423382  543705 net.go:770] primary dev: ETH0
I0323 09:12:43.423397  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:12:43.423411  543705 net.go:698] Add success.
I0323 09:12:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:12:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:12:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:12:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:12:53.409762  543705 memory.go:184] no items to output this cycle
I0323 09:12:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 09:13:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:13:03.409797  543705 memory.go:184] no items to output this cycle
I0323 09:13:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 09:13:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:13:13.409799  543705 memory.go:191] Add success.
I0323 09:13:13.409799  543705 cpu.go:282] Add success.
W0323 09:13:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:13:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:13:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:13:13.420165  543705 net.go:648] Add success.
I0323 09:13:13.423321  543705 net.go:770] primary dev: ETH0
I0323 09:13:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:13:13.423346  543705 net.go:698] Add success.
I0323 09:13:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:13:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:13:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 09:13:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:13:14.456505  543705 disk_worker.go:494] system disk:vda1
I0323 09:13:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:13:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:13:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:13:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:13:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:13:16.472530  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:13:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:13:23.409796  543705 memory.go:184] no items to output this cycle
I0323 09:13:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 09:13:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:13:33.409792  543705 memory.go:184] no items to output this cycle
I0323 09:13:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 09:13:39.149674  543705 disk_info.go:125] begin check local disk info of client
I0323 09:13:39.152317  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:13:39.152323  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc5c0 0xc0002bc600]
E0323 09:13:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:13:43.410605  543705 memory.go:191] Add success.
I0323 09:13:43.409801  543705 cpu.go:282] Add success.
I0323 09:13:43.420600  543705 net.go:648] Add success.
I0323 09:13:43.423332  543705 net.go:770] primary dev: ETH0
I0323 09:13:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:13:43.423371  543705 net.go:698] Add success.
I0323 09:13:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:13:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:13:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:13:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:13:53.409777  543705 memory.go:184] no items to output this cycle
I0323 09:13:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 09:14:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:14:03.409790  543705 memory.go:184] no items to output this cycle
I0323 09:14:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 09:14:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:14:13.409809  543705 memory.go:191] Add success.
I0323 09:14:13.409818  543705 cpu.go:282] Add success.
W0323 09:14:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:14:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:14:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:14:13.420137  543705 net.go:648] Add success.
I0323 09:14:13.422829  543705 net.go:770] primary dev: ETH0
I0323 09:14:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:14:13.422854  543705 net.go:698] Add success.
I0323 09:14:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:14:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:14:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 09:14:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:14:14.456626  543705 disk_worker.go:494] system disk:vda1
I0323 09:14:14.456660  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:14:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:14:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:14:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:14:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:14:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:14:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:14:23.409762  543705 memory.go:184] no items to output this cycle
I0323 09:14:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 09:14:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:14:33.409818  543705 memory.go:184] no items to output this cycle
I0323 09:14:33.409832  543705 cpu.go:275] no items to output this cycle
I0323 09:14:39.153673  543705 disk_info.go:125] begin check local disk info of client
I0323 09:14:39.156245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:14:39.156252  543705 disk_info.go:196] parse disk info done, disk is : [0xc000460380 0xc0004603c0]
E0323 09:14:43.409843  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:14:43.410788  543705 memory.go:191] Add success.
I0323 09:14:43.409925  543705 cpu.go:282] Add success.
I0323 09:14:43.419747  543705 net.go:648] Add success.
I0323 09:14:43.422369  543705 net.go:770] primary dev: ETH0
I0323 09:14:43.422383  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:14:43.422395  543705 net.go:698] Add success.
I0323 09:14:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:14:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:14:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:14:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:14:53.409793  543705 memory.go:184] no items to output this cycle
I0323 09:14:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 09:15:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:15:03.409777  543705 memory.go:184] no items to output this cycle
I0323 09:15:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 09:15:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:15:13.409788  543705 memory.go:191] Add success.
I0323 09:15:13.409812  543705 cpu.go:282] Add success.
W0323 09:15:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:15:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:15:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:15:13.420365  543705 net.go:648] Add success.
I0323 09:15:13.423249  543705 net.go:770] primary dev: ETH0
I0323 09:15:13.423262  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:15:13.423285  543705 net.go:698] Add success.
I0323 09:15:13.467464  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8085b437-02b3-4ce5-896f-0fbef89db4f1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:15:13.467500  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:15:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:15:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:15:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 09:15:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:15:14.456619  543705 disk_worker.go:494] system disk:vda1
I0323 09:15:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:15:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:15:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:15:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:15:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:15:23.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:15:23.409911  543705 cpu.go:275] no items to output this cycle
I0323 09:15:23.409927  543705 memory.go:184] no items to output this cycle
E0323 09:15:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:15:33.409783  543705 memory.go:184] no items to output this cycle
I0323 09:15:33.409868  543705 cpu.go:275] no items to output this cycle
I0323 09:15:39.157676  543705 disk_info.go:125] begin check local disk info of client
I0323 09:15:39.160254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:15:39.160261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b840 0xc00007b880]
I0323 09:15:40.269727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:15:40.269732  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:15:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:15:43.410782  543705 memory.go:191] Add success.
I0323 09:15:43.409824  543705 cpu.go:282] Add success.
I0323 09:15:43.420483  543705 net.go:648] Add success.
I0323 09:15:43.423606  543705 net.go:770] primary dev: ETH0
I0323 09:15:43.423621  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:15:43.423636  543705 net.go:698] Add success.
I0323 09:15:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:15:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:15:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:15:53.410432  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:15:53.410453  543705 memory.go:184] no items to output this cycle
I0323 09:15:53.410474  543705 cpu.go:275] no items to output this cycle
E0323 09:16:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:16:03.409780  543705 memory.go:184] no items to output this cycle
I0323 09:16:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 09:16:13.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:16:13.409842  543705 memory.go:191] Add success.
I0323 09:16:13.409843  543705 cpu.go:282] Add success.
W0323 09:16:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:16:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:16:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:16:13.420238  543705 net.go:648] Add success.
I0323 09:16:13.422914  543705 net.go:770] primary dev: ETH0
I0323 09:16:13.422928  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:16:13.422940  543705 net.go:698] Add success.
I0323 09:16:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:16:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:16:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 09:16:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:16:14.456912  543705 disk_worker.go:494] system disk:vda1
I0323 09:16:14.456943  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:16:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:16:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:16:16.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:16:16.458108  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:16:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:16:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:16:23.409788  543705 memory.go:184] no items to output this cycle
I0323 09:16:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 09:16:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:16:33.409778  543705 memory.go:184] no items to output this cycle
I0323 09:16:33.409840  543705 cpu.go:275] no items to output this cycle
I0323 09:16:39.161676  543705 disk_info.go:125] begin check local disk info of client
I0323 09:16:39.164240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:16:39.164247  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaec0 0xc0001aaf00]
E0323 09:16:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:16:43.410659  543705 memory.go:191] Add success.
I0323 09:16:43.409795  543705 cpu.go:282] Add success.
I0323 09:16:43.420355  543705 net.go:648] Add success.
I0323 09:16:43.423142  543705 net.go:770] primary dev: ETH0
I0323 09:16:43.423155  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:16:43.423168  543705 net.go:698] Add success.
I0323 09:16:46.458011  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:16:46.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:16:46.458121  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:16:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:16:53.409817  543705 memory.go:184] no items to output this cycle
I0323 09:16:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 09:17:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:17:03.409799  543705 memory.go:184] no items to output this cycle
I0323 09:17:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 09:17:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:17:13.409788  543705 memory.go:191] Add success.
W0323 09:17:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:17:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:17:13.409825  543705 cpu.go:282] Add success.
I0323 09:17:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:17:13.420272  543705 net.go:648] Add success.
I0323 09:17:13.422984  543705 net.go:770] primary dev: ETH0
I0323 09:17:13.422997  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:17:13.423009  543705 net.go:698] Add success.
I0323 09:17:13.452776  543705 event_worker.go:152] Polling the log file for events...
W0323 09:17:14.455351  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:17:14.455368  543705 disk_worker.go:708] disk space is not compliant
W0323 09:17:14.455372  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:17:14.458259  543705 disk_worker.go:494] system disk:vda1
I0323 09:17:14.458303  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:17:14.458542  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:17:14.458552  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:17:14.458558  543705 custom_config.go:64] query custom config with name: gpu
E0323 09:17:15.457060  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:17:15.457074  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:17:16.458111  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:17:16.458183  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:17:16.458208  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:17:16.458265  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:17:16.472606  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:17:23.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:17:23.409843  543705 memory.go:184] no items to output this cycle
I0323 09:17:23.409979  543705 cpu.go:275] no items to output this cycle
E0323 09:17:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:17:33.409830  543705 memory.go:184] no items to output this cycle
I0323 09:17:33.409964  543705 cpu.go:275] no items to output this cycle
I0323 09:17:39.165686  543705 disk_info.go:125] begin check local disk info of client
I0323 09:17:39.168316  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:17:39.168323  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053d8c0 0xc00053d900]
E0323 09:17:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:17:43.409795  543705 memory.go:191] Add success.
I0323 09:17:43.409846  543705 cpu.go:282] Add success.
I0323 09:17:43.420153  543705 net.go:648] Add success.
I0323 09:17:43.421194  543705 net.go:770] primary dev: ETH0
I0323 09:17:43.421211  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:17:43.421231  543705 net.go:698] Add success.
I0323 09:17:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:17:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:17:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:17:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:17:53.409812  543705 memory.go:184] no items to output this cycle
I0323 09:17:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 09:18:03.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:18:03.409845  543705 memory.go:184] no items to output this cycle
I0323 09:18:03.409980  543705 cpu.go:275] no items to output this cycle
E0323 09:18:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:18:13.409806  543705 memory.go:191] Add success.
I0323 09:18:13.409808  543705 cpu.go:282] Add success.
W0323 09:18:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:18:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:18:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:18:13.420297  543705 net.go:648] Add success.
I0323 09:18:13.423310  543705 net.go:770] primary dev: ETH0
I0323 09:18:13.423324  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:18:13.423335  543705 net.go:698] Add success.
I0323 09:18:13.469383  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"417fdf04-ddd0-4fe6-a7b2-c976a86ffb93","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:18:13.469420  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:18:14.455078  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:18:14.455250  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:18:14.455265  543705 disk_worker.go:708] disk space is not compliant
W0323 09:18:14.455269  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:18:14.458959  543705 disk_worker.go:494] system disk:vda1
I0323 09:18:14.459002  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:18:15.456003  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:18:16.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:18:16.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:18:16.458121  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:18:16.472592  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:18:23.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:18:23.409834  543705 memory.go:184] no items to output this cycle
I0323 09:18:23.409853  543705 cpu.go:275] no items to output this cycle
E0323 09:18:33.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:18:33.410276  543705 memory.go:184] no items to output this cycle
I0323 09:18:33.410431  543705 cpu.go:275] no items to output this cycle
I0323 09:18:39.169713  543705 disk_info.go:125] begin check local disk info of client
I0323 09:18:39.238543  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:18:39.238556  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034b800 0xc00034b840]
I0323 09:18:40.273778  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:18:40.273787  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:18:43.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:18:43.411498  543705 memory.go:191] Add success.
I0323 09:18:43.410059  543705 cpu.go:282] Add success.
I0323 09:18:43.420548  543705 net.go:648] Add success.
I0323 09:18:43.425818  543705 net.go:770] primary dev: ETH0
I0323 09:18:43.425837  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:18:43.425856  543705 net.go:698] Add success.
I0323 09:18:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:18:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:18:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:18:53.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:18:53.409814  543705 cpu.go:275] no items to output this cycle
I0323 09:18:53.409829  543705 memory.go:184] no items to output this cycle
E0323 09:19:03.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:19:03.409854  543705 memory.go:184] no items to output this cycle
I0323 09:19:03.409993  543705 cpu.go:275] no items to output this cycle
E0323 09:19:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:19:13.409837  543705 memory.go:191] Add success.
I0323 09:19:13.409841  543705 cpu.go:282] Add success.
W0323 09:19:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:19:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:19:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:19:13.419714  543705 net.go:648] Add success.
I0323 09:19:13.422554  543705 net.go:770] primary dev: ETH0
I0323 09:19:13.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:19:13.422583  543705 net.go:698] Add success.
I0323 09:19:14.453974  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:19:14.454291  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:19:14.454308  543705 disk_worker.go:708] disk space is not compliant
W0323 09:19:14.454313  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:19:14.456311  543705 disk_worker.go:494] system disk:vda1
I0323 09:19:14.456356  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:19:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:19:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:19:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:19:16.458105  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:19:16.472572  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:19:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:19:23.409786  543705 memory.go:184] no items to output this cycle
I0323 09:19:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 09:19:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:19:33.409824  543705 memory.go:184] no items to output this cycle
I0323 09:19:33.409883  543705 cpu.go:275] no items to output this cycle
I0323 09:19:39.241682  543705 disk_info.go:125] begin check local disk info of client
I0323 09:19:39.244716  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:19:39.244723  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007bc80 0xc00007bcc0]
E0323 09:19:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:19:43.410752  543705 memory.go:191] Add success.
I0323 09:19:43.409811  543705 cpu.go:282] Add success.
I0323 09:19:43.420430  543705 net.go:648] Add success.
I0323 09:19:43.423783  543705 net.go:770] primary dev: ETH0
I0323 09:19:43.423799  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:19:43.423814  543705 net.go:698] Add success.
I0323 09:19:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:19:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:19:46.458107  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:19:53.410419  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:19:53.410439  543705 cpu.go:275] no items to output this cycle
I0323 09:19:53.410442  543705 memory.go:184] no items to output this cycle
E0323 09:20:03.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:20:03.409838  543705 memory.go:184] no items to output this cycle
I0323 09:20:03.409843  543705 cpu.go:275] no items to output this cycle
E0323 09:20:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:20:13.409834  543705 memory.go:191] Add success.
I0323 09:20:13.409838  543705 cpu.go:282] Add success.
W0323 09:20:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:20:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:20:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:20:13.420130  543705 net.go:648] Add success.
I0323 09:20:13.422893  543705 net.go:770] primary dev: ETH0
I0323 09:20:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:20:13.422923  543705 net.go:698] Add success.
I0323 09:20:14.455087  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:20:14.461239  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:20:14.461348  543705 disk_worker.go:708] disk space is not compliant
W0323 09:20:14.461353  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:20:14.469718  543705 disk_worker.go:494] system disk:vda1
I0323 09:20:14.469762  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:20:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:20:16.458017  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:20:16.458102  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:20:16.458136  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:20:16.472628  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:20:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:20:23.409813  543705 memory.go:184] no items to output this cycle
I0323 09:20:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 09:20:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:20:33.409808  543705 memory.go:184] no items to output this cycle
I0323 09:20:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 09:20:39.245688  543705 disk_info.go:125] begin check local disk info of client
I0323 09:20:39.248238  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:20:39.248245  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bc080 0xc0002bc0c0]
E0323 09:20:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:20:43.410663  543705 memory.go:191] Add success.
I0323 09:20:43.409807  543705 cpu.go:282] Add success.
I0323 09:20:43.420376  543705 net.go:648] Add success.
I0323 09:20:43.422875  543705 net.go:770] primary dev: ETH0
I0323 09:20:43.422889  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:20:43.422902  543705 net.go:698] Add success.
I0323 09:20:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:20:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:20:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:20:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:20:53.409787  543705 memory.go:184] no items to output this cycle
I0323 09:20:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 09:21:03.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:21:03.409826  543705 memory.go:184] no items to output this cycle
I0323 09:21:03.409843  543705 cpu.go:275] no items to output this cycle
E0323 09:21:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:21:13.409801  543705 cpu.go:282] Add success.
I0323 09:21:13.409809  543705 memory.go:191] Add success.
W0323 09:21:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:21:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:21:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:21:13.420103  543705 net.go:648] Add success.
I0323 09:21:13.423000  543705 net.go:770] primary dev: ETH0
I0323 09:21:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:21:13.423028  543705 net.go:698] Add success.
I0323 09:21:14.453949  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:21:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:21:14.455309  543705 disk_worker.go:708] disk space is not compliant
W0323 09:21:14.455313  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:21:14.457761  543705 disk_worker.go:494] system disk:vda1
I0323 09:21:14.457791  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:21:14.620207  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1d863dfd-dced-4d82-84d3-b624c7f4e5f5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:21:14.620257  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:21:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:21:16.458029  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:21:16.458127  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:21:16.458163  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:21:16.472660  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:21:23.413822  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:21:23.413844  543705 memory.go:184] no items to output this cycle
I0323 09:21:23.413855  543705 cpu.go:275] no items to output this cycle
E0323 09:21:33.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:21:33.409833  543705 memory.go:184] no items to output this cycle
I0323 09:21:33.409989  543705 cpu.go:275] no items to output this cycle
I0323 09:21:39.257724  543705 disk_info.go:125] begin check local disk info of client
I0323 09:21:39.290723  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:21:39.290734  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab400 0xc0001ab440]
I0323 09:21:40.277763  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:21:40.277771  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:21:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:21:43.410960  543705 memory.go:191] Add success.
I0323 09:21:43.409821  543705 cpu.go:282] Add success.
I0323 09:21:43.420698  543705 net.go:648] Add success.
I0323 09:21:43.423599  543705 net.go:770] primary dev: ETH0
I0323 09:21:43.423615  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:21:43.423629  543705 net.go:698] Add success.
I0323 09:21:46.458092  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:21:46.458172  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:21:46.458200  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:21:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:21:53.409795  543705 memory.go:184] no items to output this cycle
I0323 09:21:53.409860  543705 cpu.go:275] no items to output this cycle
E0323 09:22:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:22:03.409819  543705 memory.go:184] no items to output this cycle
I0323 09:22:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 09:22:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:22:13.409820  543705 memory.go:191] Add success.
W0323 09:22:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 09:22:13.409854  543705 cpu.go:282] Add success.
W0323 09:22:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:22:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:22:13.420370  543705 net.go:648] Add success.
I0323 09:22:13.423129  543705 net.go:770] primary dev: ETH0
I0323 09:22:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:22:13.423168  543705 net.go:698] Add success.
W0323 09:22:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:22:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 09:22:14.455211  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:22:14.456354  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:22:14.456364  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:22:14.456371  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:22:14.456554  543705 disk_worker.go:494] system disk:vda1
I0323 09:22:14.456657  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:22:15.457043  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:22:15.457058  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:22:16.458095  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:22:16.458176  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 09:22:16.458174  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:22:16.458197  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:22:16.472608  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:22:23.410088  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:22:23.410110  543705 memory.go:184] no items to output this cycle
I0323 09:22:23.410120  543705 cpu.go:275] no items to output this cycle
E0323 09:22:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:22:33.409778  543705 memory.go:184] no items to output this cycle
I0323 09:22:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 09:22:39.293682  543705 disk_info.go:125] begin check local disk info of client
I0323 09:22:39.296296  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:22:39.296303  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353a00 0xc000353a40]
E0323 09:22:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:22:43.410879  543705 memory.go:191] Add success.
I0323 09:22:43.409833  543705 cpu.go:282] Add success.
I0323 09:22:43.420586  543705 net.go:648] Add success.
I0323 09:22:43.423591  543705 net.go:770] primary dev: ETH0
I0323 09:22:43.423604  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:22:43.423617  543705 net.go:698] Add success.
I0323 09:22:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:22:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:22:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:22:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:22:53.409816  543705 memory.go:184] no items to output this cycle
I0323 09:22:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 09:23:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:23:03.409778  543705 memory.go:184] no items to output this cycle
I0323 09:23:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 09:23:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:23:13.409831  543705 memory.go:191] Add success.
I0323 09:23:13.409834  543705 cpu.go:282] Add success.
W0323 09:23:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:23:13.412826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:23:13.412831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:23:13.420529  543705 net.go:648] Add success.
I0323 09:23:13.422358  543705 net.go:770] primary dev: ETH0
I0323 09:23:13.422371  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:23:13.422384  543705 net.go:698] Add success.
I0323 09:23:14.453949  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:23:14.455251  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:23:14.455264  543705 disk_worker.go:708] disk space is not compliant
W0323 09:23:14.455267  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:23:14.456721  543705 disk_worker.go:494] system disk:vda1
I0323 09:23:14.456756  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:23:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:23:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:23:16.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:23:16.458108  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:23:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:23:23.417827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:23:23.417852  543705 memory.go:184] no items to output this cycle
I0323 09:23:23.418030  543705 cpu.go:275] no items to output this cycle
E0323 09:23:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:23:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 09:23:33.409822  543705 memory.go:184] no items to output this cycle
I0323 09:23:39.297683  543705 disk_info.go:125] begin check local disk info of client
I0323 09:23:39.300327  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:23:39.300334  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7300 0xc0003e7340]
E0323 09:23:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:23:43.409832  543705 memory.go:191] Add success.
I0323 09:23:43.409838  543705 cpu.go:282] Add success.
I0323 09:23:43.420283  543705 net.go:648] Add success.
I0323 09:23:43.421482  543705 net.go:770] primary dev: ETH0
I0323 09:23:43.421500  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:23:43.421518  543705 net.go:698] Add success.
I0323 09:23:46.458019  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:23:46.458100  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:23:46.458135  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:23:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:23:53.409814  543705 memory.go:184] no items to output this cycle
I0323 09:23:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 09:24:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:24:03.409803  543705 memory.go:184] no items to output this cycle
I0323 09:24:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 09:24:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:24:13.409801  543705 memory.go:191] Add success.
I0323 09:24:13.409822  543705 cpu.go:282] Add success.
W0323 09:24:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:24:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:24:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:24:13.420161  543705 net.go:648] Add success.
I0323 09:24:13.422925  543705 net.go:770] primary dev: ETH0
I0323 09:24:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:24:13.422949  543705 net.go:698] Add success.
I0323 09:24:13.610384  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f75e8298-ad49-481c-82d1-bd4f0266fc8e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:24:13.610425  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:24:14.453999  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:24:14.454250  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:24:14.454264  543705 disk_worker.go:708] disk space is not compliant
W0323 09:24:14.454267  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:24:14.455874  543705 disk_worker.go:494] system disk:vda1
I0323 09:24:14.455906  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:24:15.456002  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:24:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:24:16.458087  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:24:16.458120  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:24:16.472553  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:24:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:24:23.409812  543705 memory.go:184] no items to output this cycle
I0323 09:24:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 09:24:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:24:33.409814  543705 memory.go:184] no items to output this cycle
I0323 09:24:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 09:24:39.301688  543705 disk_info.go:125] begin check local disk info of client
I0323 09:24:39.304308  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:24:39.304315  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359ec0 0xc000359f00]
I0323 09:24:40.281747  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:24:40.281755  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:24:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:24:43.409825  543705 cpu.go:282] Add success.
I0323 09:24:43.409836  543705 memory.go:191] Add success.
I0323 09:24:43.420414  543705 net.go:648] Add success.
I0323 09:24:43.421413  543705 net.go:770] primary dev: ETH0
I0323 09:24:43.421431  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:24:43.421449  543705 net.go:698] Add success.
I0323 09:24:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:24:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:24:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:24:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:24:53.409818  543705 memory.go:184] no items to output this cycle
I0323 09:24:53.409834  543705 cpu.go:275] no items to output this cycle
E0323 09:25:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:25:03.409810  543705 memory.go:184] no items to output this cycle
I0323 09:25:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 09:25:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:25:13.409822  543705 memory.go:191] Add success.
I0323 09:25:13.409828  543705 cpu.go:282] Add success.
W0323 09:25:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:25:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:25:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:25:13.420118  543705 net.go:648] Add success.
I0323 09:25:13.423005  543705 net.go:770] primary dev: ETH0
I0323 09:25:13.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:25:13.423035  543705 net.go:698] Add success.
I0323 09:25:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:25:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:25:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 09:25:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:25:14.456581  543705 disk_worker.go:494] system disk:vda1
I0323 09:25:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:25:15.456019  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:25:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:25:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:25:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:25:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:25:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:25:23.409770  543705 memory.go:184] no items to output this cycle
I0323 09:25:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 09:25:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:25:33.409777  543705 memory.go:184] no items to output this cycle
I0323 09:25:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 09:25:39.305685  543705 disk_info.go:125] begin check local disk info of client
I0323 09:25:39.308233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:25:39.308240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c56c0 0xc0000c5780]
E0323 09:25:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:25:43.410840  543705 memory.go:191] Add success.
I0323 09:25:43.409816  543705 cpu.go:282] Add success.
I0323 09:25:43.420594  543705 net.go:648] Add success.
I0323 09:25:43.423572  543705 net.go:770] primary dev: ETH0
I0323 09:25:43.423591  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:25:43.423605  543705 net.go:698] Add success.
I0323 09:25:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:25:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:25:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:25:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:25:53.409783  543705 memory.go:184] no items to output this cycle
I0323 09:25:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 09:26:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:26:03.409787  543705 memory.go:184] no items to output this cycle
I0323 09:26:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 09:26:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:26:13.409806  543705 cpu.go:282] Add success.
I0323 09:26:13.409816  543705 memory.go:191] Add success.
W0323 09:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:26:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:26:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:26:13.420109  543705 net.go:648] Add success.
I0323 09:26:13.422899  543705 net.go:770] primary dev: ETH0
I0323 09:26:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:26:13.422924  543705 net.go:698] Add success.
I0323 09:26:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:26:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:26:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 09:26:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:26:14.456564  543705 disk_worker.go:494] system disk:vda1
I0323 09:26:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:26:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:26:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:26:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:26:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:26:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:26:23.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:26:23.409895  543705 memory.go:184] no items to output this cycle
I0323 09:26:23.409898  543705 cpu.go:275] no items to output this cycle
E0323 09:26:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:26:33.409786  543705 memory.go:184] no items to output this cycle
I0323 09:26:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 09:26:39.309683  543705 disk_info.go:125] begin check local disk info of client
I0323 09:26:39.312270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:26:39.312278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 09:26:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:26:43.410667  543705 memory.go:191] Add success.
I0323 09:26:43.409809  543705 cpu.go:282] Add success.
I0323 09:26:43.420401  543705 net.go:648] Add success.
I0323 09:26:43.423112  543705 net.go:770] primary dev: ETH0
I0323 09:26:43.423128  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:26:43.423142  543705 net.go:698] Add success.
I0323 09:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:26:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:26:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:26:53.410409  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:26:53.410429  543705 memory.go:184] no items to output this cycle
I0323 09:26:53.410439  543705 cpu.go:275] no items to output this cycle
E0323 09:27:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:27:03.409787  543705 memory.go:184] no items to output this cycle
I0323 09:27:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:27:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:27:13.409825  543705 memory.go:191] Add success.
I0323 09:27:13.409831  543705 cpu.go:282] Add success.
W0323 09:27:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:27:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:27:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:27:13.420170  543705 net.go:648] Add success.
I0323 09:27:13.423263  543705 net.go:770] primary dev: ETH0
I0323 09:27:13.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:27:13.423288  543705 net.go:698] Add success.
I0323 09:27:13.429324  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 09:27:13.453495  543705 event_worker.go:152] Polling the log file for events...
I0323 09:27:13.468788  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d4bc231a-c4f9-4a62-8db5-cc5d0435a871","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:27:13.468824  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 09:27:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:27:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 09:27:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:27:14.456991  543705 disk_worker.go:494] system disk:vda1
I0323 09:27:14.457030  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:27:14.457068  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:27:14.457076  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:27:14.457080  543705 custom_config.go:64] query custom config with name: gpu
E0323 09:27:15.456872  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:27:15.456882  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:27:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:27:16.457955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:27:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:27:16.458018  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:27:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:27:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:27:23.409779  543705 memory.go:184] no items to output this cycle
I0323 09:27:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 09:27:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:27:33.409794  543705 memory.go:184] no items to output this cycle
I0323 09:27:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 09:27:39.313685  543705 disk_info.go:125] begin check local disk info of client
I0323 09:27:39.316233  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:27:39.316240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
I0323 09:27:40.285727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:27:40.285733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:27:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:27:43.410679  543705 memory.go:191] Add success.
I0323 09:27:43.409809  543705 cpu.go:282] Add success.
I0323 09:27:43.420394  543705 net.go:648] Add success.
I0323 09:27:43.422986  543705 net.go:770] primary dev: ETH0
I0323 09:27:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:27:43.423014  543705 net.go:698] Add success.
I0323 09:27:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:27:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:27:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:27:53.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:27:53.410268  543705 memory.go:184] no items to output this cycle
I0323 09:27:53.410271  543705 cpu.go:275] no items to output this cycle
E0323 09:28:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:28:03.409789  543705 memory.go:184] no items to output this cycle
I0323 09:28:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 09:28:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:28:13.409796  543705 memory.go:191] Add success.
W0323 09:28:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 09:28:13.409827  543705 cpu.go:282] Add success.
W0323 09:28:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:28:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:28:13.420064  543705 net.go:648] Add success.
I0323 09:28:13.422849  543705 net.go:770] primary dev: ETH0
I0323 09:28:13.422868  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:28:13.422886  543705 net.go:698] Add success.
I0323 09:28:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:28:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:28:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 09:28:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:28:14.456576  543705 disk_worker.go:494] system disk:vda1
I0323 09:28:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:28:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:28:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:28:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:28:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:28:16.472395  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:28:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:28:23.409773  543705 memory.go:184] no items to output this cycle
I0323 09:28:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 09:28:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:28:33.409787  543705 memory.go:184] no items to output this cycle
I0323 09:28:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 09:28:39.317676  543705 disk_info.go:125] begin check local disk info of client
I0323 09:28:39.320262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:28:39.320268  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
E0323 09:28:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:28:43.410731  543705 memory.go:191] Add success.
I0323 09:28:43.409814  543705 cpu.go:282] Add success.
I0323 09:28:43.420451  543705 net.go:648] Add success.
I0323 09:28:43.423172  543705 net.go:770] primary dev: ETH0
I0323 09:28:43.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:28:43.423202  543705 net.go:698] Add success.
I0323 09:28:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:28:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:28:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:28:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:28:53.409777  543705 memory.go:184] no items to output this cycle
I0323 09:28:53.409777  543705 cpu.go:275] no items to output this cycle
E0323 09:29:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:29:03.409786  543705 memory.go:184] no items to output this cycle
I0323 09:29:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 09:29:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:29:13.409822  543705 memory.go:191] Add success.
I0323 09:29:13.409832  543705 cpu.go:282] Add success.
W0323 09:29:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:29:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:29:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:29:13.420175  543705 net.go:648] Add success.
I0323 09:29:13.423263  543705 net.go:770] primary dev: ETH0
I0323 09:29:13.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:29:13.423288  543705 net.go:698] Add success.
I0323 09:29:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:29:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:29:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0323 09:29:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:29:14.456497  543705 disk_worker.go:494] system disk:vda1
I0323 09:29:14.456543  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:29:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:29:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:29:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:29:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:29:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:29:23.409883  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:29:23.409917  543705 memory.go:184] no items to output this cycle
I0323 09:29:23.409916  543705 cpu.go:275] no items to output this cycle
E0323 09:29:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:29:33.409779  543705 memory.go:184] no items to output this cycle
I0323 09:29:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 09:29:39.321683  543705 disk_info.go:125] begin check local disk info of client
I0323 09:29:39.324247  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:29:39.324254  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 09:29:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:29:43.410633  543705 memory.go:191] Add success.
I0323 09:29:43.409825  543705 cpu.go:282] Add success.
I0323 09:29:43.420338  543705 net.go:648] Add success.
I0323 09:29:43.423088  543705 net.go:770] primary dev: ETH0
I0323 09:29:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:29:43.423114  543705 net.go:698] Add success.
I0323 09:29:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:29:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:29:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:29:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:29:53.409772  543705 memory.go:184] no items to output this cycle
I0323 09:29:53.409778  543705 cpu.go:275] no items to output this cycle
E0323 09:30:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:30:03.409778  543705 memory.go:184] no items to output this cycle
I0323 09:30:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 09:30:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:30:13.409821  543705 memory.go:191] Add success.
I0323 09:30:13.409830  543705 cpu.go:282] Add success.
W0323 09:30:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:30:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:30:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:30:13.420159  543705 net.go:648] Add success.
I0323 09:30:13.422855  543705 net.go:770] primary dev: ETH0
I0323 09:30:13.422870  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:30:13.422884  543705 net.go:698] Add success.
I0323 09:30:13.635899  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cb6148c4-aa62-455a-a2b1-d90b14097ff8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:30:13.635939  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:30:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:30:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:30:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 09:30:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:30:14.456537  543705 disk_worker.go:494] system disk:vda1
I0323 09:30:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:30:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:30:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:30:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:30:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:30:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:30:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:30:23.409869  543705 memory.go:184] no items to output this cycle
I0323 09:30:23.409937  543705 cpu.go:275] no items to output this cycle
E0323 09:30:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:30:33.409765  543705 memory.go:184] no items to output this cycle
I0323 09:30:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 09:30:39.325678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:30:39.328281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:30:39.328287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5100 0xc0000c5140]
I0323 09:30:40.289725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:30:40.289731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:30:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:30:43.410775  543705 memory.go:191] Add success.
I0323 09:30:43.409815  543705 cpu.go:282] Add success.
I0323 09:30:43.420496  543705 net.go:648] Add success.
I0323 09:30:43.423587  543705 net.go:770] primary dev: ETH0
I0323 09:30:43.423601  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:30:43.423615  543705 net.go:698] Add success.
I0323 09:30:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:30:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:30:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:30:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:30:53.409782  543705 memory.go:184] no items to output this cycle
I0323 09:30:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 09:31:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:31:03.409781  543705 memory.go:184] no items to output this cycle
I0323 09:31:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 09:31:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:31:13.409828  543705 memory.go:191] Add success.
I0323 09:31:13.409835  543705 cpu.go:282] Add success.
W0323 09:31:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:31:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:31:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:31:13.420227  543705 net.go:648] Add success.
I0323 09:31:13.422861  543705 net.go:770] primary dev: ETH0
I0323 09:31:13.422874  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:31:13.422886  543705 net.go:698] Add success.
I0323 09:31:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:31:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:31:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 09:31:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:31:14.456509  543705 disk_worker.go:494] system disk:vda1
I0323 09:31:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:31:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:31:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:31:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:31:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0323 09:31:23.409919  543705 cpu.go:275] no items to output this cycle
E0323 09:31:23.409921  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:31:23.409942  543705 memory.go:184] no items to output this cycle
E0323 09:31:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:31:33.409792  543705 memory.go:184] no items to output this cycle
I0323 09:31:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 09:31:39.329680  543705 disk_info.go:125] begin check local disk info of client
I0323 09:31:39.332276  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:31:39.332283  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa400 0xc0001aa480]
E0323 09:31:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:31:43.410722  543705 memory.go:191] Add success.
I0323 09:31:43.409810  543705 cpu.go:282] Add success.
I0323 09:31:43.420405  543705 net.go:648] Add success.
I0323 09:31:43.423242  543705 net.go:770] primary dev: ETH0
I0323 09:31:43.423255  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:31:43.423267  543705 net.go:698] Add success.
I0323 09:31:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:31:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:31:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:31:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:31:53.409775  543705 memory.go:184] no items to output this cycle
I0323 09:31:53.409792  543705 cpu.go:275] no items to output this cycle
I0323 09:32:03.410130  543705 cpu.go:275] no items to output this cycle
E0323 09:32:03.410134  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:32:03.410153  543705 memory.go:184] no items to output this cycle
E0323 09:32:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:32:13.409793  543705 cpu.go:282] Add success.
I0323 09:32:13.409811  543705 memory.go:191] Add success.
W0323 09:32:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:32:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:32:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:32:13.420057  543705 net.go:648] Add success.
I0323 09:32:13.422994  543705 net.go:770] primary dev: ETH0
I0323 09:32:13.423007  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:32:13.423021  543705 net.go:698] Add success.
W0323 09:32:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:32:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 09:32:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:32:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:32:14.455894  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:32:14.455900  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:32:14.456557  543705 disk_worker.go:494] system disk:vda1
I0323 09:32:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:32:15.456892  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:32:15.456901  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:32:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:32:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:32:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:32:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:32:16.472357  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:32:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:32:23.409778  543705 cpu.go:275] no items to output this cycle
I0323 09:32:23.409781  543705 memory.go:184] no items to output this cycle
E0323 09:32:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:32:33.409783  543705 memory.go:184] no items to output this cycle
I0323 09:32:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 09:32:39.333681  543705 disk_info.go:125] begin check local disk info of client
I0323 09:32:39.336260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:32:39.336268  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 09:32:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:32:43.410836  543705 memory.go:191] Add success.
I0323 09:32:43.409823  543705 cpu.go:282] Add success.
I0323 09:32:43.420550  543705 net.go:648] Add success.
I0323 09:32:43.423046  543705 net.go:770] primary dev: ETH0
I0323 09:32:43.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:32:43.423070  543705 net.go:698] Add success.
I0323 09:32:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:32:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:32:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:32:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:32:53.409777  543705 memory.go:184] no items to output this cycle
I0323 09:32:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 09:33:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:33:03.409775  543705 memory.go:184] no items to output this cycle
I0323 09:33:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 09:33:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:33:13.409833  543705 memory.go:191] Add success.
I0323 09:33:13.409846  543705 cpu.go:282] Add success.
W0323 09:33:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:33:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:33:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:33:13.420165  543705 net.go:648] Add success.
I0323 09:33:13.422760  543705 net.go:770] primary dev: ETH0
I0323 09:33:13.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:33:13.422786  543705 net.go:698] Add success.
I0323 09:33:13.481109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"642ab4a7-69a5-4f8b-ab92-10297c09b78e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:33:13.481142  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:33:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:33:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:33:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 09:33:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:33:14.456596  543705 disk_worker.go:494] system disk:vda1
I0323 09:33:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:33:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:33:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:33:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:33:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:33:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:33:23.409792  543705 memory.go:184] no items to output this cycle
I0323 09:33:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 09:33:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:33:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 09:33:33.409789  543705 memory.go:184] no items to output this cycle
I0323 09:33:39.337676  543705 disk_info.go:125] begin check local disk info of client
I0323 09:33:39.340235  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:33:39.340242  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4540 0xc0000c4580]
I0323 09:33:40.293725  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:33:40.293731  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:33:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:33:43.410632  543705 memory.go:191] Add success.
I0323 09:33:43.409815  543705 cpu.go:282] Add success.
I0323 09:33:43.420343  543705 net.go:648] Add success.
I0323 09:33:43.422846  543705 net.go:770] primary dev: ETH0
I0323 09:33:43.422858  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:33:43.422870  543705 net.go:698] Add success.
I0323 09:33:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:33:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:33:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:33:53.410370  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:33:53.410387  543705 memory.go:184] no items to output this cycle
I0323 09:33:53.410412  543705 cpu.go:275] no items to output this cycle
E0323 09:34:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:34:03.409783  543705 cpu.go:275] no items to output this cycle
I0323 09:34:03.409785  543705 memory.go:184] no items to output this cycle
E0323 09:34:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:34:13.409817  543705 memory.go:191] Add success.
I0323 09:34:13.409829  543705 cpu.go:282] Add success.
W0323 09:34:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:34:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:34:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:34:13.420139  543705 net.go:648] Add success.
I0323 09:34:13.422863  543705 net.go:770] primary dev: ETH0
I0323 09:34:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:34:13.422888  543705 net.go:698] Add success.
I0323 09:34:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:34:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:34:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 09:34:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:34:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 09:34:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:34:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:34:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:34:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:34:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:34:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:34:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:34:23.409772  543705 memory.go:184] no items to output this cycle
I0323 09:34:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 09:34:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:34:33.409781  543705 memory.go:184] no items to output this cycle
I0323 09:34:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 09:34:39.341679  543705 disk_info.go:125] begin check local disk info of client
I0323 09:34:39.344217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:34:39.344224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2700 0xc0003e2740]
E0323 09:34:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:34:43.410814  543705 memory.go:191] Add success.
I0323 09:34:43.409820  543705 cpu.go:282] Add success.
I0323 09:34:43.420556  543705 net.go:648] Add success.
I0323 09:34:43.423234  543705 net.go:770] primary dev: ETH0
I0323 09:34:43.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:34:43.423259  543705 net.go:698] Add success.
I0323 09:34:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:34:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:34:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:34:53.410399  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:34:53.410414  543705 memory.go:184] no items to output this cycle
I0323 09:34:53.410438  543705 cpu.go:275] no items to output this cycle
E0323 09:35:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:35:03.409770  543705 memory.go:184] no items to output this cycle
I0323 09:35:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 09:35:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:35:13.409825  543705 memory.go:191] Add success.
I0323 09:35:13.409839  543705 cpu.go:282] Add success.
W0323 09:35:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:35:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:35:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:35:13.420266  543705 net.go:648] Add success.
I0323 09:35:13.423045  543705 net.go:770] primary dev: ETH0
I0323 09:35:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:35:13.423071  543705 net.go:698] Add success.
I0323 09:35:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:35:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:35:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 09:35:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:35:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 09:35:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:35:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:35:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:35:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:35:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:35:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:35:23.409778  543705 memory.go:184] no items to output this cycle
I0323 09:35:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 09:35:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:35:33.409812  543705 memory.go:184] no items to output this cycle
I0323 09:35:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 09:35:39.345675  543705 disk_info.go:125] begin check local disk info of client
I0323 09:35:39.348303  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:35:39.348309  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e880 0xc00035e8c0]
E0323 09:35:43.410182  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:35:43.411005  543705 memory.go:191] Add success.
I0323 09:35:43.410219  543705 cpu.go:282] Add success.
I0323 09:35:43.419692  543705 net.go:648] Add success.
I0323 09:35:43.422353  543705 net.go:770] primary dev: ETH0
I0323 09:35:43.422366  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:35:43.422380  543705 net.go:698] Add success.
I0323 09:35:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:35:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:35:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:35:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:35:53.409769  543705 memory.go:184] no items to output this cycle
I0323 09:35:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 09:36:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:36:03.409794  543705 memory.go:184] no items to output this cycle
I0323 09:36:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 09:36:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:36:13.409787  543705 memory.go:191] Add success.
I0323 09:36:13.409809  543705 cpu.go:282] Add success.
W0323 09:36:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:36:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:36:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:36:13.420258  543705 net.go:648] Add success.
I0323 09:36:13.423333  543705 net.go:770] primary dev: ETH0
I0323 09:36:13.423347  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:36:13.423362  543705 net.go:698] Add success.
I0323 09:36:13.468668  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a5305b28-1784-4bfe-aba9-c0c9afb44a6f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:36:13.468703  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:36:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:36:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:36:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 09:36:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:36:14.456558  543705 disk_worker.go:494] system disk:vda1
I0323 09:36:14.456614  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:36:15.455607  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:36:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:36:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:36:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:36:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:36:23.409775  543705 memory.go:184] no items to output this cycle
I0323 09:36:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 09:36:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:36:33.409775  543705 memory.go:184] no items to output this cycle
I0323 09:36:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 09:36:39.349678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:36:39.352231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:36:39.352237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035e440 0xc00035e480]
I0323 09:36:40.297724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:36:40.297729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:36:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:36:43.410799  543705 memory.go:191] Add success.
I0323 09:36:43.409833  543705 cpu.go:282] Add success.
I0323 09:36:43.420493  543705 net.go:648] Add success.
I0323 09:36:43.423279  543705 net.go:770] primary dev: ETH0
I0323 09:36:43.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:36:43.423306  543705 net.go:698] Add success.
I0323 09:36:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:36:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:36:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:36:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:36:53.409810  543705 memory.go:184] no items to output this cycle
I0323 09:36:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 09:37:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:37:03.409805  543705 memory.go:184] no items to output this cycle
I0323 09:37:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 09:37:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:37:13.409794  543705 memory.go:191] Add success.
W0323 09:37:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:37:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:37:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:37:13.409835  543705 cpu.go:282] Add success.
I0323 09:37:13.420185  543705 net.go:648] Add success.
I0323 09:37:13.423071  543705 net.go:770] primary dev: ETH0
I0323 09:37:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:37:13.423101  543705 net.go:698] Add success.
I0323 09:37:13.453670  543705 event_worker.go:152] Polling the log file for events...
W0323 09:37:14.455467  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:37:14.455487  543705 disk_worker.go:708] disk space is not compliant
W0323 09:37:14.455491  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:37:14.456494  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:37:14.456503  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:37:14.456509  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:37:14.457443  543705 disk_worker.go:494] system disk:vda1
I0323 09:37:14.457482  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:37:15.456902  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:37:15.456912  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:37:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:37:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:37:16.457964  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:37:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:37:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:37:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:37:23.409779  543705 memory.go:184] no items to output this cycle
I0323 09:37:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 09:37:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:37:33.409773  543705 memory.go:184] no items to output this cycle
I0323 09:37:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 09:37:39.353679  543705 disk_info.go:125] begin check local disk info of client
I0323 09:37:39.356252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:37:39.356258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d47c0 0xc0003d4800]
E0323 09:37:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:37:43.410568  543705 memory.go:191] Add success.
I0323 09:37:43.409812  543705 cpu.go:282] Add success.
I0323 09:37:43.420279  543705 net.go:648] Add success.
I0323 09:37:43.422941  543705 net.go:770] primary dev: ETH0
I0323 09:37:43.422957  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:37:43.422971  543705 net.go:698] Add success.
I0323 09:37:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:37:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:37:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:37:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:37:53.409789  543705 memory.go:184] no items to output this cycle
I0323 09:37:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 09:38:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:38:03.409791  543705 memory.go:184] no items to output this cycle
I0323 09:38:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 09:38:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:38:13.409796  543705 memory.go:191] Add success.
I0323 09:38:13.409816  543705 cpu.go:282] Add success.
W0323 09:38:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:38:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:38:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:38:13.420595  543705 net.go:648] Add success.
I0323 09:38:13.423595  543705 net.go:770] primary dev: ETH0
I0323 09:38:13.423608  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:38:13.423620  543705 net.go:698] Add success.
I0323 09:38:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:38:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:38:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 09:38:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:38:14.456551  543705 disk_worker.go:494] system disk:vda1
I0323 09:38:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:38:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:38:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:38:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:38:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:38:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:38:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:38:23.409804  543705 memory.go:184] no items to output this cycle
I0323 09:38:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:38:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:38:33.409776  543705 memory.go:184] no items to output this cycle
I0323 09:38:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 09:38:39.357678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:38:39.360252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:38:39.360258  543705 disk_info.go:196] parse disk info done, disk is : [0xc00025a500 0xc00025a540]
E0323 09:38:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:38:43.410660  543705 memory.go:191] Add success.
I0323 09:38:43.409797  543705 cpu.go:282] Add success.
I0323 09:38:43.420374  543705 net.go:648] Add success.
I0323 09:38:43.422923  543705 net.go:770] primary dev: ETH0
I0323 09:38:43.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:38:43.422949  543705 net.go:698] Add success.
I0323 09:38:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:38:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:38:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:38:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:38:53.409773  543705 memory.go:184] no items to output this cycle
I0323 09:38:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 09:39:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:39:03.409773  543705 memory.go:184] no items to output this cycle
I0323 09:39:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 09:39:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:39:13.409929  543705 cpu.go:282] Add success.
I0323 09:39:13.409946  543705 memory.go:191] Add success.
W0323 09:39:13.409995  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:39:13.410023  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:39:13.410027  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:39:13.419734  543705 net.go:648] Add success.
I0323 09:39:13.422485  543705 net.go:770] primary dev: ETH0
I0323 09:39:13.422498  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:39:13.422509  543705 net.go:698] Add success.
I0323 09:39:13.472983  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"20e62900-74eb-464d-9a1e-2907318aa542","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:39:13.473015  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:39:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:39:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:39:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 09:39:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:39:14.456509  543705 disk_worker.go:494] system disk:vda1
I0323 09:39:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:39:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:39:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:39:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:39:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:39:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:39:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:39:23.409797  543705 memory.go:184] no items to output this cycle
I0323 09:39:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:39:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:39:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 09:39:33.409786  543705 memory.go:184] no items to output this cycle
I0323 09:39:39.361678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:39:39.364295  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:39:39.364302  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab400 0xc0001ab440]
I0323 09:39:40.301717  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:39:40.301723  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:39:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:39:43.410724  543705 memory.go:191] Add success.
I0323 09:39:43.409799  543705 cpu.go:282] Add success.
I0323 09:39:43.420467  543705 net.go:648] Add success.
I0323 09:39:43.423156  543705 net.go:770] primary dev: ETH0
I0323 09:39:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:39:43.423183  543705 net.go:698] Add success.
I0323 09:39:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:39:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:39:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:39:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:39:53.409783  543705 memory.go:184] no items to output this cycle
I0323 09:39:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 09:40:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:40:03.409801  543705 memory.go:184] no items to output this cycle
I0323 09:40:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 09:40:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:40:13.409924  543705 memory.go:191] Add success.
W0323 09:40:13.409959  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:40:13.409972  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:40:13.409985  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:40:13.410023  543705 cpu.go:282] Add success.
I0323 09:40:13.419714  543705 net.go:648] Add success.
I0323 09:40:13.422422  543705 net.go:770] primary dev: ETH0
I0323 09:40:13.422437  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:40:13.422451  543705 net.go:698] Add success.
I0323 09:40:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:40:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:40:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 09:40:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:40:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 09:40:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:40:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:40:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:40:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:40:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:40:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:40:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:40:23.409781  543705 memory.go:184] no items to output this cycle
I0323 09:40:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 09:40:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:40:33.409782  543705 memory.go:184] no items to output this cycle
I0323 09:40:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 09:40:39.365676  543705 disk_info.go:125] begin check local disk info of client
I0323 09:40:39.368249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:40:39.368256  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5a80 0xc0000c5ac0]
E0323 09:40:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:40:43.410761  543705 memory.go:191] Add success.
I0323 09:40:43.409812  543705 cpu.go:282] Add success.
I0323 09:40:43.420453  543705 net.go:648] Add success.
I0323 09:40:43.423219  543705 net.go:770] primary dev: ETH0
I0323 09:40:43.423234  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:40:43.423247  543705 net.go:698] Add success.
I0323 09:40:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:40:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:40:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:40:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:40:53.409772  543705 memory.go:184] no items to output this cycle
I0323 09:40:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 09:41:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:41:03.409769  543705 memory.go:184] no items to output this cycle
I0323 09:41:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 09:41:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:41:13.409786  543705 memory.go:191] Add success.
I0323 09:41:13.409811  543705 cpu.go:282] Add success.
W0323 09:41:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:41:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:41:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:41:13.420347  543705 net.go:648] Add success.
I0323 09:41:13.422981  543705 net.go:770] primary dev: ETH0
I0323 09:41:13.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:41:13.423006  543705 net.go:698] Add success.
I0323 09:41:14.453953  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:41:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:41:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0323 09:41:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:41:14.456611  543705 disk_worker.go:494] system disk:vda1
I0323 09:41:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:41:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:41:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:41:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:41:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:41:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:41:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:41:23.409781  543705 memory.go:184] no items to output this cycle
I0323 09:41:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 09:41:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:41:33.409776  543705 memory.go:184] no items to output this cycle
I0323 09:41:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 09:41:39.369679  543705 disk_info.go:125] begin check local disk info of client
I0323 09:41:39.372316  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:41:39.372323  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f80 0xc0000c4fc0]
E0323 09:41:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:41:43.410673  543705 memory.go:191] Add success.
I0323 09:41:43.409790  543705 cpu.go:282] Add success.
I0323 09:41:43.420401  543705 net.go:648] Add success.
I0323 09:41:43.423178  543705 net.go:770] primary dev: ETH0
I0323 09:41:43.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:41:43.423208  543705 net.go:698] Add success.
I0323 09:41:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:41:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:41:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:41:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:41:53.409775  543705 memory.go:184] no items to output this cycle
I0323 09:41:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:42:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:42:03.409780  543705 memory.go:184] no items to output this cycle
I0323 09:42:03.409783  543705 cpu.go:275] no items to output this cycle
E0323 09:42:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:42:13.409799  543705 memory.go:191] Add success.
I0323 09:42:13.409802  543705 cpu.go:282] Add success.
W0323 09:42:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:42:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:42:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:42:13.420123  543705 net.go:648] Add success.
I0323 09:42:13.423032  543705 net.go:770] primary dev: ETH0
I0323 09:42:13.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:42:13.423061  543705 net.go:698] Add success.
I0323 09:42:13.469342  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f169d56f-2525-4246-ac1b-28866b0ca889","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:42:13.469373  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 09:42:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:42:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 09:42:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:42:14.457032  543705 disk_worker.go:494] system disk:vda1
I0323 09:42:14.457058  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:42:14.457121  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:42:14.457131  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:42:14.457137  543705 custom_config.go:64] query custom config with name: gpu
E0323 09:42:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:42:15.456817  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:42:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:42:16.457963  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:42:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:42:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:42:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:42:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:42:23.409770  543705 cpu.go:275] no items to output this cycle
I0323 09:42:23.409778  543705 memory.go:184] no items to output this cycle
E0323 09:42:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:42:33.409764  543705 memory.go:184] no items to output this cycle
I0323 09:42:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 09:42:39.373678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:42:39.376302  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:42:39.376309  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024e0c0 0xc00024e100]
I0323 09:42:40.305721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:42:40.305726  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:42:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:42:43.410625  543705 memory.go:191] Add success.
I0323 09:42:43.409801  543705 cpu.go:282] Add success.
I0323 09:42:43.420394  543705 net.go:648] Add success.
I0323 09:42:43.423203  543705 net.go:770] primary dev: ETH0
I0323 09:42:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:42:43.423233  543705 net.go:698] Add success.
I0323 09:42:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:42:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:42:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:42:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:42:53.409802  543705 memory.go:184] no items to output this cycle
I0323 09:42:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:43:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:43:03.409781  543705 memory.go:184] no items to output this cycle
I0323 09:43:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 09:43:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:43:13.409784  543705 memory.go:191] Add success.
I0323 09:43:13.409807  543705 cpu.go:282] Add success.
W0323 09:43:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:43:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:43:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:43:13.420113  543705 net.go:648] Add success.
I0323 09:43:13.422909  543705 net.go:770] primary dev: ETH0
I0323 09:43:13.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:43:13.422935  543705 net.go:698] Add success.
I0323 09:43:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:43:14.455341  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:43:14.455422  543705 disk_worker.go:708] disk space is not compliant
W0323 09:43:14.455427  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:43:14.457855  543705 disk_worker.go:494] system disk:vda1
I0323 09:43:14.457882  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:43:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:43:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:43:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:43:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:43:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:43:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:43:23.409791  543705 memory.go:184] no items to output this cycle
I0323 09:43:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 09:43:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:43:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 09:43:33.409788  543705 memory.go:184] no items to output this cycle
I0323 09:43:39.377677  543705 disk_info.go:125] begin check local disk info of client
I0323 09:43:39.380236  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:43:39.380242  543705 disk_info.go:196] parse disk info done, disk is : [0xc000367ec0 0xc000367f00]
E0323 09:43:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:43:43.410641  543705 memory.go:191] Add success.
I0323 09:43:43.409798  543705 cpu.go:282] Add success.
I0323 09:43:43.420332  543705 net.go:648] Add success.
I0323 09:43:43.422996  543705 net.go:770] primary dev: ETH0
I0323 09:43:43.423009  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:43:43.423022  543705 net.go:698] Add success.
I0323 09:43:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:43:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:43:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:43:53.410366  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:43:53.410381  543705 memory.go:184] no items to output this cycle
I0323 09:43:53.410386  543705 cpu.go:275] no items to output this cycle
E0323 09:44:03.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:44:03.409763  543705 memory.go:184] no items to output this cycle
I0323 09:44:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 09:44:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:44:13.409812  543705 memory.go:191] Add success.
I0323 09:44:13.409817  543705 cpu.go:282] Add success.
W0323 09:44:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:44:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:44:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:44:13.420162  543705 net.go:648] Add success.
I0323 09:44:13.423127  543705 net.go:770] primary dev: ETH0
I0323 09:44:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:44:13.423152  543705 net.go:698] Add success.
I0323 09:44:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:44:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:44:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 09:44:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:44:14.456829  543705 disk_worker.go:494] system disk:vda1
I0323 09:44:14.456858  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:44:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:44:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:44:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:44:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:44:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:44:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:44:23.409768  543705 memory.go:184] no items to output this cycle
I0323 09:44:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 09:44:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:44:33.409770  543705 memory.go:184] no items to output this cycle
I0323 09:44:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 09:44:39.381681  543705 disk_info.go:125] begin check local disk info of client
I0323 09:44:39.384250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:44:39.384257  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 09:44:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:44:43.410820  543705 memory.go:191] Add success.
I0323 09:44:43.409800  543705 cpu.go:282] Add success.
I0323 09:44:43.420535  543705 net.go:648] Add success.
I0323 09:44:43.423523  543705 net.go:770] primary dev: ETH0
I0323 09:44:43.423536  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:44:43.423549  543705 net.go:698] Add success.
I0323 09:44:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:44:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:44:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:44:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:44:53.409795  543705 memory.go:184] no items to output this cycle
I0323 09:44:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 09:45:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:45:03.409782  543705 memory.go:184] no items to output this cycle
I0323 09:45:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 09:45:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:45:13.409804  543705 memory.go:191] Add success.
I0323 09:45:13.409810  543705 cpu.go:282] Add success.
W0323 09:45:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:45:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:45:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:45:13.420130  543705 net.go:648] Add success.
I0323 09:45:13.422779  543705 net.go:770] primary dev: ETH0
I0323 09:45:13.422792  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:45:13.422805  543705 net.go:698] Add success.
I0323 09:45:13.728555  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"790b2cca-2b9b-4e90-a310-6e1b7e4955af","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:45:13.728592  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:45:14.453976  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:45:14.454157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:45:14.454228  543705 disk_worker.go:708] disk space is not compliant
W0323 09:45:14.454232  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:45:14.456076  543705 disk_worker.go:494] system disk:vda1
I0323 09:45:14.456109  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:45:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:45:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:45:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:45:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:45:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:45:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:45:23.409764  543705 memory.go:184] no items to output this cycle
I0323 09:45:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 09:45:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:45:33.409810  543705 memory.go:184] no items to output this cycle
I0323 09:45:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 09:45:39.385676  543705 disk_info.go:125] begin check local disk info of client
I0323 09:45:39.388205  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:45:39.388212  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab580 0xc0001ab5c0]
I0323 09:45:40.309734  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:45:40.309740  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:45:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:45:43.410551  543705 memory.go:191] Add success.
I0323 09:45:43.409839  543705 cpu.go:282] Add success.
I0323 09:45:43.420272  543705 net.go:648] Add success.
I0323 09:45:43.422874  543705 net.go:770] primary dev: ETH0
I0323 09:45:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:45:43.422899  543705 net.go:698] Add success.
I0323 09:45:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:45:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:45:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:45:53.410409  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:45:53.410427  543705 memory.go:184] no items to output this cycle
I0323 09:45:53.410430  543705 cpu.go:275] no items to output this cycle
E0323 09:46:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:46:03.409775  543705 memory.go:184] no items to output this cycle
I0323 09:46:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:46:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:46:13.409816  543705 memory.go:191] Add success.
I0323 09:46:13.409820  543705 cpu.go:282] Add success.
W0323 09:46:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:46:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:46:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:46:13.420137  543705 net.go:648] Add success.
I0323 09:46:13.423098  543705 net.go:770] primary dev: ETH0
I0323 09:46:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:46:13.423124  543705 net.go:698] Add success.
I0323 09:46:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:46:14.455091  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:46:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0323 09:46:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:46:14.456488  543705 disk_worker.go:494] system disk:vda1
I0323 09:46:14.456529  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:46:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:46:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:46:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:46:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:46:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:46:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:46:23.409796  543705 memory.go:184] no items to output this cycle
I0323 09:46:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 09:46:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:46:33.409798  543705 memory.go:184] no items to output this cycle
I0323 09:46:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 09:46:39.392010  543705 disk_info.go:125] begin check local disk info of client
I0323 09:46:39.394604  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:46:39.394612  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b4100 0xc0002b4140]
E0323 09:46:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:46:43.410896  543705 memory.go:191] Add success.
I0323 09:46:43.409826  543705 cpu.go:282] Add success.
I0323 09:46:43.420601  543705 net.go:648] Add success.
I0323 09:46:43.423799  543705 net.go:770] primary dev: ETH0
I0323 09:46:43.423812  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:46:43.423825  543705 net.go:698] Add success.
I0323 09:46:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:46:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:46:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:46:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:46:53.409783  543705 memory.go:184] no items to output this cycle
I0323 09:46:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 09:47:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:47:03.409803  543705 memory.go:184] no items to output this cycle
I0323 09:47:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 09:47:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:47:13.409804  543705 memory.go:191] Add success.
I0323 09:47:13.409804  543705 cpu.go:282] Add success.
W0323 09:47:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:47:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:47:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:47:13.420137  543705 net.go:648] Add success.
I0323 09:47:13.422746  543705 net.go:770] primary dev: ETH0
I0323 09:47:13.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:47:13.422771  543705 net.go:698] Add success.
I0323 09:47:13.453314  543705 event_worker.go:152] Polling the log file for events...
W0323 09:47:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:47:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 09:47:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:47:14.455895  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:47:14.455904  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:47:14.455910  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:47:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 09:47:14.456593  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:47:15.456872  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:47:15.456880  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:47:16.457618  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:47:16.457704  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:47:16.457725  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:47:16.458376  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:47:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:47:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:47:23.409785  543705 memory.go:184] no items to output this cycle
I0323 09:47:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 09:47:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:47:33.409810  543705 memory.go:184] no items to output this cycle
I0323 09:47:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 09:47:39.397678  543705 disk_info.go:125] begin check local disk info of client
I0323 09:47:39.400310  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:47:39.400316  543705 disk_info.go:196] parse disk info done, disk is : [0xc000246400 0xc000246780]
E0323 09:47:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:47:43.410762  543705 memory.go:191] Add success.
I0323 09:47:43.409807  543705 cpu.go:282] Add success.
I0323 09:47:43.420463  543705 net.go:648] Add success.
I0323 09:47:43.423400  543705 net.go:770] primary dev: ETH0
I0323 09:47:43.423413  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:47:43.423427  543705 net.go:698] Add success.
I0323 09:47:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:47:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:47:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:47:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:47:53.409773  543705 memory.go:184] no items to output this cycle
I0323 09:47:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 09:48:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:48:03.409769  543705 memory.go:184] no items to output this cycle
I0323 09:48:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:48:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:48:13.409816  543705 memory.go:191] Add success.
I0323 09:48:13.409820  543705 cpu.go:282] Add success.
W0323 09:48:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:48:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:48:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:48:13.420410  543705 net.go:648] Add success.
I0323 09:48:13.423211  543705 net.go:770] primary dev: ETH0
I0323 09:48:13.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:48:13.423237  543705 net.go:698] Add success.
I0323 09:48:13.467837  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f856559f-225a-49cd-956b-91034c76cbee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:48:13.467877  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:48:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:48:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:48:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 09:48:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:48:14.456714  543705 disk_worker.go:494] system disk:vda1
I0323 09:48:14.456763  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:48:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:48:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:48:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:48:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:48:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:48:23.409850  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:48:23.409874  543705 memory.go:184] no items to output this cycle
I0323 09:48:23.410022  543705 cpu.go:275] no items to output this cycle
E0323 09:48:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:48:33.409776  543705 memory.go:184] no items to output this cycle
I0323 09:48:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 09:48:39.401684  543705 disk_info.go:125] begin check local disk info of client
I0323 09:48:39.404278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:48:39.404285  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
I0323 09:48:40.313730  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:48:40.313736  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:48:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:48:43.410802  543705 memory.go:191] Add success.
I0323 09:48:43.409829  543705 cpu.go:282] Add success.
I0323 09:48:43.420539  543705 net.go:648] Add success.
I0323 09:48:43.423420  543705 net.go:770] primary dev: ETH0
I0323 09:48:43.423434  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:48:43.423447  543705 net.go:698] Add success.
I0323 09:48:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:48:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:48:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:48:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:48:53.409771  543705 memory.go:184] no items to output this cycle
I0323 09:48:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 09:49:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:49:03.409783  543705 cpu.go:275] no items to output this cycle
I0323 09:49:03.409784  543705 memory.go:184] no items to output this cycle
E0323 09:49:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:49:13.409828  543705 memory.go:191] Add success.
I0323 09:49:13.409831  543705 cpu.go:282] Add success.
W0323 09:49:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:49:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:49:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:49:13.420135  543705 net.go:648] Add success.
I0323 09:49:13.423056  543705 net.go:770] primary dev: ETH0
I0323 09:49:13.423071  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:49:13.423085  543705 net.go:698] Add success.
I0323 09:49:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:49:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:49:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 09:49:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:49:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 09:49:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:49:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:49:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:49:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:49:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:49:16.472119  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:49:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:49:23.409784  543705 memory.go:184] no items to output this cycle
I0323 09:49:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 09:49:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:49:33.409798  543705 memory.go:184] no items to output this cycle
I0323 09:49:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 09:49:39.405675  543705 disk_info.go:125] begin check local disk info of client
I0323 09:49:39.408256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:49:39.408263  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
E0323 09:49:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:49:43.410591  543705 memory.go:191] Add success.
I0323 09:49:43.409800  543705 cpu.go:282] Add success.
I0323 09:49:43.420300  543705 net.go:648] Add success.
I0323 09:49:43.422855  543705 net.go:770] primary dev: ETH0
I0323 09:49:43.422867  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:49:43.422880  543705 net.go:698] Add success.
I0323 09:49:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:49:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:49:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:49:53.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:49:53.409762  543705 memory.go:184] no items to output this cycle
I0323 09:49:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:50:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:50:03.409805  543705 memory.go:184] no items to output this cycle
I0323 09:50:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 09:50:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:50:13.409792  543705 memory.go:191] Add success.
I0323 09:50:13.409792  543705 cpu.go:282] Add success.
W0323 09:50:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:50:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:50:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:50:13.420220  543705 net.go:648] Add success.
I0323 09:50:13.422814  543705 net.go:770] primary dev: ETH0
I0323 09:50:13.422829  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:50:13.422841  543705 net.go:698] Add success.
I0323 09:50:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:50:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:50:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 09:50:14.455181  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:50:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 09:50:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:50:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:50:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:50:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:50:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:50:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:50:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:50:23.409796  543705 memory.go:184] no items to output this cycle
I0323 09:50:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 09:50:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:50:33.409795  543705 memory.go:184] no items to output this cycle
I0323 09:50:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 09:50:39.409690  543705 disk_info.go:125] begin check local disk info of client
I0323 09:50:39.412146  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:50:39.412153  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353640 0xc000353680]
E0323 09:50:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:50:43.410646  543705 memory.go:191] Add success.
I0323 09:50:43.409801  543705 cpu.go:282] Add success.
I0323 09:50:43.420469  543705 net.go:648] Add success.
I0323 09:50:43.423078  543705 net.go:770] primary dev: ETH0
I0323 09:50:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:50:43.423104  543705 net.go:698] Add success.
I0323 09:50:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:50:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:50:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:50:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:50:53.409764  543705 memory.go:184] no items to output this cycle
I0323 09:50:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 09:51:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:51:03.409799  543705 memory.go:184] no items to output this cycle
I0323 09:51:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 09:51:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:51:13.409827  543705 memory.go:191] Add success.
I0323 09:51:13.409832  543705 cpu.go:282] Add success.
W0323 09:51:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:51:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:51:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:51:13.420616  543705 net.go:648] Add success.
I0323 09:51:13.423181  543705 net.go:770] primary dev: ETH0
I0323 09:51:13.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:51:13.423212  543705 net.go:698] Add success.
I0323 09:51:13.467422  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"45c920cb-1701-4ad4-bad4-ff4a87ca6af9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:51:13.467454  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:51:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:51:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:51:14.455172  543705 disk_worker.go:708] disk space is not compliant
W0323 09:51:14.455175  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:51:14.456517  543705 disk_worker.go:494] system disk:vda1
I0323 09:51:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:51:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:51:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:51:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:51:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:51:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:51:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:51:23.409802  543705 memory.go:184] no items to output this cycle
I0323 09:51:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 09:51:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:51:33.409784  543705 memory.go:184] no items to output this cycle
I0323 09:51:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 09:51:39.412799  543705 disk_info.go:125] begin check local disk info of client
I0323 09:51:39.415279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:51:39.415286  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353040 0xc000353080]
I0323 09:51:40.317741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:51:40.317746  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:51:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:51:43.410607  543705 memory.go:191] Add success.
I0323 09:51:43.409825  543705 cpu.go:282] Add success.
I0323 09:51:43.420319  543705 net.go:648] Add success.
I0323 09:51:43.422936  543705 net.go:770] primary dev: ETH0
I0323 09:51:43.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:51:43.422963  543705 net.go:698] Add success.
I0323 09:51:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:51:46.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:51:46.458052  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:51:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:51:53.409789  543705 memory.go:184] no items to output this cycle
I0323 09:51:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:52:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:52:03.409776  543705 memory.go:184] no items to output this cycle
I0323 09:52:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 09:52:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:52:13.409809  543705 memory.go:191] Add success.
I0323 09:52:13.409810  543705 cpu.go:282] Add success.
W0323 09:52:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:52:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:52:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:52:13.420363  543705 net.go:648] Add success.
I0323 09:52:13.423143  543705 net.go:770] primary dev: ETH0
I0323 09:52:13.423158  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:52:13.423172  543705 net.go:698] Add success.
W0323 09:52:14.455459  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:52:14.455477  543705 disk_worker.go:708] disk space is not compliant
W0323 09:52:14.455481  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:52:14.456336  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:52:14.456345  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:52:14.456351  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:52:14.457676  543705 disk_worker.go:494] system disk:vda1
I0323 09:52:14.457716  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:52:15.456783  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:52:15.456792  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:52:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:52:16.457916  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:52:16.457970  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:52:16.457989  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:52:16.472322  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:52:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:52:23.409781  543705 memory.go:184] no items to output this cycle
I0323 09:52:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 09:52:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:52:33.409801  543705 memory.go:184] no items to output this cycle
I0323 09:52:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 09:52:39.415798  543705 disk_info.go:125] begin check local disk info of client
I0323 09:52:39.418322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:52:39.418328  543705 disk_info.go:196] parse disk info done, disk is : [0xc000288680 0xc0002886c0]
E0323 09:52:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:52:43.410652  543705 memory.go:191] Add success.
I0323 09:52:43.409827  543705 cpu.go:282] Add success.
I0323 09:52:43.420375  543705 net.go:648] Add success.
I0323 09:52:43.422980  543705 net.go:770] primary dev: ETH0
I0323 09:52:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:52:43.423011  543705 net.go:698] Add success.
I0323 09:52:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:52:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:52:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:52:53.410254  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:52:53.410270  543705 memory.go:184] no items to output this cycle
I0323 09:52:53.410275  543705 cpu.go:275] no items to output this cycle
E0323 09:53:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:53:03.409779  543705 cpu.go:275] no items to output this cycle
I0323 09:53:03.409784  543705 memory.go:184] no items to output this cycle
E0323 09:53:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:53:13.409798  543705 memory.go:191] Add success.
I0323 09:53:13.409802  543705 cpu.go:282] Add success.
W0323 09:53:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:53:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:53:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:53:13.420230  543705 net.go:648] Add success.
I0323 09:53:13.423129  543705 net.go:770] primary dev: ETH0
I0323 09:53:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:53:13.423161  543705 net.go:698] Add success.
I0323 09:53:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:53:14.455256  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:53:14.455266  543705 disk_worker.go:708] disk space is not compliant
W0323 09:53:14.455268  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:53:14.456796  543705 disk_worker.go:494] system disk:vda1
I0323 09:53:14.456826  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:53:15.456012  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:53:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:53:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:53:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:53:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:53:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:53:23.409775  543705 memory.go:184] no items to output this cycle
I0323 09:53:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 09:53:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:53:33.409763  543705 memory.go:184] no items to output this cycle
I0323 09:53:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 09:53:39.418803  543705 disk_info.go:125] begin check local disk info of client
I0323 09:53:39.421366  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:53:39.421373  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cea80 0xc0003ceac0]
E0323 09:53:43.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:53:43.410736  543705 memory.go:191] Add success.
I0323 09:53:43.409808  543705 cpu.go:282] Add success.
I0323 09:53:43.420460  543705 net.go:648] Add success.
I0323 09:53:43.423435  543705 net.go:770] primary dev: ETH0
I0323 09:53:43.423448  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:53:43.423460  543705 net.go:698] Add success.
I0323 09:53:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:53:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:53:46.458056  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:53:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:53:53.409789  543705 memory.go:184] no items to output this cycle
I0323 09:53:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 09:54:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:54:03.409778  543705 memory.go:184] no items to output this cycle
I0323 09:54:03.409781  543705 cpu.go:275] no items to output this cycle
E0323 09:54:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:54:13.409790  543705 memory.go:191] Add success.
I0323 09:54:13.409800  543705 cpu.go:282] Add success.
W0323 09:54:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:54:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:54:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:54:13.420160  543705 net.go:648] Add success.
I0323 09:54:13.422746  543705 net.go:770] primary dev: ETH0
I0323 09:54:13.422758  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:54:13.422770  543705 net.go:698] Add success.
I0323 09:54:13.463604  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e0e4f71-a2fb-4c21-8699-9a6586b3d5b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:54:13.463634  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 09:54:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:54:14.455209  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:54:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0323 09:54:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:54:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 09:54:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:54:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:54:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:54:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:54:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:54:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:54:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:54:23.409767  543705 memory.go:184] no items to output this cycle
I0323 09:54:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:54:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:54:33.409801  543705 memory.go:184] no items to output this cycle
I0323 09:54:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 09:54:39.421802  543705 disk_info.go:125] begin check local disk info of client
I0323 09:54:39.424443  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:54:39.424449  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352800 0xc000352840]
I0323 09:54:40.321723  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:54:40.321729  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:54:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:54:43.410700  543705 memory.go:191] Add success.
I0323 09:54:43.409825  543705 cpu.go:282] Add success.
I0323 09:54:43.420455  543705 net.go:648] Add success.
I0323 09:54:43.423092  543705 net.go:770] primary dev: ETH0
I0323 09:54:43.423107  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:54:43.423122  543705 net.go:698] Add success.
I0323 09:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:54:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:54:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:54:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:54:53.409793  543705 memory.go:184] no items to output this cycle
I0323 09:54:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 09:55:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:55:03.409772  543705 memory.go:184] no items to output this cycle
I0323 09:55:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 09:55:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:55:13.409817  543705 memory.go:191] Add success.
I0323 09:55:13.409823  543705 cpu.go:282] Add success.
W0323 09:55:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:55:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:55:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:55:13.420051  543705 net.go:648] Add success.
I0323 09:55:13.422763  543705 net.go:770] primary dev: ETH0
I0323 09:55:13.422777  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:55:13.422789  543705 net.go:698] Add success.
I0323 09:55:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:55:14.455298  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:55:14.455392  543705 disk_worker.go:708] disk space is not compliant
W0323 09:55:14.455397  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:55:14.457022  543705 disk_worker.go:494] system disk:vda1
I0323 09:55:14.457052  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:55:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:55:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:55:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:55:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:55:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:55:23.409773  543705 memory.go:184] no items to output this cycle
I0323 09:55:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 09:55:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:55:33.409765  543705 memory.go:184] no items to output this cycle
I0323 09:55:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 09:55:39.424818  543705 disk_info.go:125] begin check local disk info of client
I0323 09:55:39.427409  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:55:39.427417  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002662c0 0xc000266300]
E0323 09:55:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:55:43.410694  543705 memory.go:191] Add success.
I0323 09:55:43.409832  543705 cpu.go:282] Add success.
I0323 09:55:43.420452  543705 net.go:648] Add success.
I0323 09:55:43.423194  543705 net.go:770] primary dev: ETH0
I0323 09:55:43.423207  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:55:43.423219  543705 net.go:698] Add success.
I0323 09:55:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:55:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:55:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:55:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:55:53.409778  543705 memory.go:184] no items to output this cycle
I0323 09:55:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 09:56:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:56:03.409789  543705 memory.go:184] no items to output this cycle
I0323 09:56:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 09:56:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:56:13.409792  543705 memory.go:191] Add success.
I0323 09:56:13.409793  543705 cpu.go:282] Add success.
W0323 09:56:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:56:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:56:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:56:13.420159  543705 net.go:648] Add success.
I0323 09:56:13.422681  543705 net.go:770] primary dev: ETH0
I0323 09:56:13.422696  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:56:13.422711  543705 net.go:698] Add success.
I0323 09:56:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:56:14.455462  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:56:14.455475  543705 disk_worker.go:708] disk space is not compliant
W0323 09:56:14.455480  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:56:14.457520  543705 disk_worker.go:494] system disk:vda1
I0323 09:56:14.457559  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:56:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:56:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:56:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:56:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:56:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:56:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:56:23.409781  543705 cpu.go:275] no items to output this cycle
I0323 09:56:23.409789  543705 memory.go:184] no items to output this cycle
E0323 09:56:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:56:33.409769  543705 memory.go:184] no items to output this cycle
I0323 09:56:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 09:56:39.427827  543705 disk_info.go:125] begin check local disk info of client
I0323 09:56:39.430487  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:56:39.430494  543705 disk_info.go:196] parse disk info done, disk is : [0xc000484f40 0xc000484f80]
E0323 09:56:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:56:43.410706  543705 memory.go:191] Add success.
I0323 09:56:43.409788  543705 cpu.go:282] Add success.
I0323 09:56:43.420421  543705 net.go:648] Add success.
I0323 09:56:43.423067  543705 net.go:770] primary dev: ETH0
I0323 09:56:43.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:56:43.423094  543705 net.go:698] Add success.
I0323 09:56:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:56:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:56:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:56:53.410351  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:56:53.410367  543705 memory.go:184] no items to output this cycle
I0323 09:56:53.410382  543705 cpu.go:275] no items to output this cycle
E0323 09:57:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:57:03.409804  543705 memory.go:184] no items to output this cycle
I0323 09:57:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 09:57:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:57:13.409818  543705 memory.go:191] Add success.
I0323 09:57:13.409823  543705 cpu.go:282] Add success.
W0323 09:57:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:57:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:57:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:57:13.420169  543705 net.go:648] Add success.
I0323 09:57:13.422882  543705 net.go:770] primary dev: ETH0
I0323 09:57:13.422896  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:57:13.422908  543705 net.go:698] Add success.
I0323 09:57:13.428917  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 09:57:13.453101  543705 event_worker.go:152] Polling the log file for events...
I0323 09:57:13.468383  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2542760c-d65f-4b5d-a175-207666f72352","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 09:57:13.468416  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 09:57:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:57:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 09:57:14.455177  543705 disk_worker.go:728] disk inode is not compliant
E0323 09:57:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 09:57:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 09:57:14.455914  543705 custom_config.go:64] query custom config with name: gpu
I0323 09:57:14.456725  543705 disk_worker.go:494] system disk:vda1
I0323 09:57:14.456758  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 09:57:15.456848  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 09:57:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:57:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 09:57:16.457948  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 09:57:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:57:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:57:16.472347  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:57:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:57:23.409797  543705 memory.go:184] no items to output this cycle
I0323 09:57:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 09:57:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:57:33.409778  543705 memory.go:184] no items to output this cycle
I0323 09:57:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 09:57:39.430850  543705 disk_info.go:125] begin check local disk info of client
I0323 09:57:39.433433  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:57:39.433439  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4640 0xc0000c4680]
I0323 09:57:40.325719  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 09:57:40.325725  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 09:57:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:57:43.410743  543705 memory.go:191] Add success.
I0323 09:57:43.409818  543705 cpu.go:282] Add success.
I0323 09:57:43.420470  543705 net.go:648] Add success.
I0323 09:57:43.423300  543705 net.go:770] primary dev: ETH0
I0323 09:57:43.423312  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:57:43.423325  543705 net.go:698] Add success.
I0323 09:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:57:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:57:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:57:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:57:53.409803  543705 memory.go:184] no items to output this cycle
I0323 09:57:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 09:58:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:58:03.409780  543705 memory.go:184] no items to output this cycle
I0323 09:58:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 09:58:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:58:13.409823  543705 memory.go:191] Add success.
I0323 09:58:13.409828  543705 cpu.go:282] Add success.
W0323 09:58:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:58:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:58:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:58:13.420241  543705 net.go:648] Add success.
I0323 09:58:13.423067  543705 net.go:770] primary dev: ETH0
I0323 09:58:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:58:13.423094  543705 net.go:698] Add success.
I0323 09:58:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:58:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:58:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 09:58:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:58:14.456515  543705 disk_worker.go:494] system disk:vda1
I0323 09:58:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:58:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:58:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:58:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:58:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:58:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:58:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:58:23.409795  543705 memory.go:184] no items to output this cycle
I0323 09:58:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 09:58:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:58:33.409788  543705 memory.go:184] no items to output this cycle
I0323 09:58:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 09:58:39.433865  543705 disk_info.go:125] begin check local disk info of client
I0323 09:58:39.436427  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:58:39.436434  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352ec0 0xc000352f00]
E0323 09:58:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:58:43.410788  543705 memory.go:191] Add success.
I0323 09:58:43.409820  543705 cpu.go:282] Add success.
I0323 09:58:43.420469  543705 net.go:648] Add success.
I0323 09:58:43.422929  543705 net.go:770] primary dev: ETH0
I0323 09:58:43.422945  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:58:43.422961  543705 net.go:698] Add success.
I0323 09:58:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:58:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:58:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:58:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:58:53.409789  543705 memory.go:184] no items to output this cycle
I0323 09:58:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 09:59:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:59:03.409774  543705 memory.go:184] no items to output this cycle
I0323 09:59:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 09:59:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:59:13.409800  543705 memory.go:191] Add success.
I0323 09:59:13.409801  543705 cpu.go:282] Add success.
W0323 09:59:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 09:59:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 09:59:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 09:59:13.420268  543705 net.go:648] Add success.
I0323 09:59:13.422990  543705 net.go:770] primary dev: ETH0
I0323 09:59:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:59:13.423015  543705 net.go:698] Add success.
I0323 09:59:14.453949  543705 custom_config.go:64] query custom config with name: gpu
W0323 09:59:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 09:59:14.455245  543705 disk_worker.go:708] disk space is not compliant
W0323 09:59:14.455248  543705 disk_worker.go:728] disk inode is not compliant
I0323 09:59:14.456644  543705 disk_worker.go:494] system disk:vda1
I0323 09:59:14.456676  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 09:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 09:59:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:59:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:59:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 09:59:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 09:59:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:59:23.409770  543705 memory.go:184] no items to output this cycle
I0323 09:59:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 09:59:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:59:33.409801  543705 memory.go:184] no items to output this cycle
I0323 09:59:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 09:59:39.436872  543705 disk_info.go:125] begin check local disk info of client
I0323 09:59:39.439451  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 09:59:39.439458  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4f80 0xc0000c4fc0]
E0323 09:59:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:59:43.410700  543705 memory.go:191] Add success.
I0323 09:59:43.409806  543705 cpu.go:282] Add success.
I0323 09:59:43.420416  543705 net.go:648] Add success.
I0323 09:59:43.422982  543705 net.go:770] primary dev: ETH0
I0323 09:59:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0323 09:59:43.423006  543705 net.go:698] Add success.
I0323 09:59:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 09:59:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 09:59:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 09:59:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 09:59:53.409777  543705 memory.go:184] no items to output this cycle
I0323 09:59:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 10:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:00:03.409785  543705 memory.go:184] no items to output this cycle
I0323 10:00:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 10:00:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:00:13.409819  543705 memory.go:191] Add success.
I0323 10:00:13.409820  543705 cpu.go:282] Add success.
W0323 10:00:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:00:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:00:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:00:13.420539  543705 net.go:648] Add success.
I0323 10:00:13.423258  543705 net.go:770] primary dev: ETH0
I0323 10:00:13.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:00:13.423290  543705 net.go:698] Add success.
I0323 10:00:13.475749  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5e757eac-7cdd-464c-9ec5-229b18c67e97","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:00:13.475780  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:00:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:00:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:00:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 10:00:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:00:14.456708  543705 disk_worker.go:494] system disk:vda1
I0323 10:00:14.456737  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:00:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:00:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:00:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:00:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:00:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:00:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:00:23.409801  543705 memory.go:184] no items to output this cycle
I0323 10:00:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 10:00:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:00:33.409775  543705 memory.go:184] no items to output this cycle
I0323 10:00:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 10:00:39.439895  543705 disk_info.go:125] begin check local disk info of client
I0323 10:00:39.442481  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:00:39.442487  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b580 0xc00007b5c0]
I0323 10:00:40.329721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:00:40.329727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:00:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:00:43.410871  543705 memory.go:191] Add success.
I0323 10:00:43.409812  543705 cpu.go:282] Add success.
I0323 10:00:43.420594  543705 net.go:648] Add success.
I0323 10:00:43.423669  543705 net.go:770] primary dev: ETH0
I0323 10:00:43.423682  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:00:43.423696  543705 net.go:698] Add success.
I0323 10:00:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:00:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:00:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:00:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:00:53.409782  543705 memory.go:184] no items to output this cycle
I0323 10:00:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 10:01:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:01:03.409796  543705 memory.go:184] no items to output this cycle
I0323 10:01:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 10:01:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:01:13.409789  543705 memory.go:191] Add success.
W0323 10:01:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:01:13.409815  543705 cpu.go:282] Add success.
W0323 10:01:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:01:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:01:13.419765  543705 net.go:648] Add success.
I0323 10:01:13.423305  543705 net.go:770] primary dev: ETH0
I0323 10:01:13.423319  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:01:13.423330  543705 net.go:698] Add success.
I0323 10:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:01:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:01:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 10:01:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:01:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 10:01:14.456631  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:01:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:01:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:01:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:01:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:01:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:01:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:01:23.409767  543705 memory.go:184] no items to output this cycle
I0323 10:01:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 10:01:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:01:33.409801  543705 memory.go:184] no items to output this cycle
I0323 10:01:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 10:01:39.442907  543705 disk_info.go:125] begin check local disk info of client
I0323 10:01:39.445527  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:01:39.445534  543705 disk_info.go:196] parse disk info done, disk is : [0xc000353500 0xc000353540]
E0323 10:01:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:01:43.410687  543705 memory.go:191] Add success.
I0323 10:01:43.409798  543705 cpu.go:282] Add success.
I0323 10:01:43.420460  543705 net.go:648] Add success.
I0323 10:01:43.423328  543705 net.go:770] primary dev: ETH0
I0323 10:01:43.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:01:43.423357  543705 net.go:698] Add success.
I0323 10:01:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:01:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:01:53.410261  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:01:53.410276  543705 memory.go:184] no items to output this cycle
I0323 10:01:53.410282  543705 cpu.go:275] no items to output this cycle
E0323 10:02:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:02:03.409770  543705 memory.go:184] no items to output this cycle
I0323 10:02:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 10:02:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:02:13.409787  543705 memory.go:191] Add success.
I0323 10:02:13.409805  543705 cpu.go:282] Add success.
W0323 10:02:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:02:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:02:13.420340  543705 net.go:648] Add success.
I0323 10:02:13.423075  543705 net.go:770] primary dev: ETH0
I0323 10:02:13.423087  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:02:13.423249  543705 net.go:698] Add success.
W0323 10:02:14.455126  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:02:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 10:02:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:02:14.455889  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:02:14.455899  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:02:14.455904  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:02:14.456624  543705 disk_worker.go:494] system disk:vda1
I0323 10:02:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:02:15.456838  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:02:15.456847  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:02:16.457902  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:02:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:02:16.457958  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:02:16.457978  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:02:16.472287  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:02:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:02:23.409796  543705 memory.go:184] no items to output this cycle
I0323 10:02:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 10:02:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:02:33.409786  543705 memory.go:184] no items to output this cycle
I0323 10:02:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 10:02:39.445924  543705 disk_info.go:125] begin check local disk info of client
I0323 10:02:39.448484  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:02:39.448491  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352280 0xc0003522c0]
E0323 10:02:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:02:43.410623  543705 memory.go:191] Add success.
I0323 10:02:43.409802  543705 cpu.go:282] Add success.
I0323 10:02:43.420388  543705 net.go:648] Add success.
I0323 10:02:43.423025  543705 net.go:770] primary dev: ETH0
I0323 10:02:43.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:02:43.423051  543705 net.go:698] Add success.
I0323 10:02:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:02:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:02:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:02:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:02:53.409779  543705 memory.go:184] no items to output this cycle
I0323 10:02:53.409781  543705 cpu.go:275] no items to output this cycle
E0323 10:03:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:03:03.409793  543705 memory.go:184] no items to output this cycle
I0323 10:03:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 10:03:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:03:13.409786  543705 memory.go:191] Add success.
W0323 10:03:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:03:13.409815  543705 cpu.go:282] Add success.
W0323 10:03:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:03:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:03:13.420214  543705 net.go:648] Add success.
I0323 10:03:13.423148  543705 net.go:770] primary dev: ETH0
I0323 10:03:13.423163  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:03:13.423178  543705 net.go:698] Add success.
I0323 10:03:13.465000  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"36c4b4c0-b927-4b1b-a145-ede10f1bf657","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:03:13.465039  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:03:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:03:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:03:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 10:03:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:03:14.456716  543705 disk_worker.go:494] system disk:vda1
I0323 10:03:14.456743  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:03:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:03:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:03:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:03:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:03:16.472397  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:03:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:03:23.409778  543705 memory.go:184] no items to output this cycle
I0323 10:03:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 10:03:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:03:33.409776  543705 memory.go:184] no items to output this cycle
I0323 10:03:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 10:03:39.448943  543705 disk_info.go:125] begin check local disk info of client
I0323 10:03:39.451506  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:03:39.451512  543705 disk_info.go:196] parse disk info done, disk is : [0xc00028ca40 0xc00028ca80]
I0323 10:03:40.333718  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:03:40.333724  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:03:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:03:43.410670  543705 memory.go:191] Add success.
I0323 10:03:43.409803  543705 cpu.go:282] Add success.
I0323 10:03:43.420490  543705 net.go:648] Add success.
I0323 10:03:43.422940  543705 net.go:770] primary dev: ETH0
I0323 10:03:43.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:03:43.422971  543705 net.go:698] Add success.
I0323 10:03:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:03:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:03:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:03:53.410351  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:03:53.410365  543705 memory.go:184] no items to output this cycle
I0323 10:03:53.410394  543705 cpu.go:275] no items to output this cycle
E0323 10:04:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:04:03.409774  543705 memory.go:184] no items to output this cycle
I0323 10:04:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 10:04:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:04:13.409798  543705 memory.go:191] Add success.
I0323 10:04:13.409820  543705 cpu.go:282] Add success.
W0323 10:04:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:04:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:04:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:04:13.420165  543705 net.go:648] Add success.
I0323 10:04:13.422950  543705 net.go:770] primary dev: ETH0
I0323 10:04:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:04:13.422977  543705 net.go:698] Add success.
I0323 10:04:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:04:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:04:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 10:04:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:04:14.456705  543705 disk_worker.go:494] system disk:vda1
I0323 10:04:14.456733  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:04:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:04:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:04:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:04:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:04:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:04:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:04:23.409787  543705 cpu.go:275] no items to output this cycle
I0323 10:04:23.409789  543705 memory.go:184] no items to output this cycle
E0323 10:04:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:04:33.409799  543705 memory.go:184] no items to output this cycle
I0323 10:04:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 10:04:39.451942  543705 disk_info.go:125] begin check local disk info of client
I0323 10:04:39.454614  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:04:39.454621  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005193c0 0xc000519400]
E0323 10:04:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:04:43.410705  543705 memory.go:191] Add success.
I0323 10:04:43.409816  543705 cpu.go:282] Add success.
I0323 10:04:43.420467  543705 net.go:648] Add success.
I0323 10:04:43.423360  543705 net.go:770] primary dev: ETH0
I0323 10:04:43.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:04:43.423391  543705 net.go:698] Add success.
I0323 10:04:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:04:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:04:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:04:53.409784  543705 memory.go:184] no items to output this cycle
I0323 10:04:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 10:05:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:05:03.409792  543705 memory.go:184] no items to output this cycle
I0323 10:05:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 10:05:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:05:13.409834  543705 memory.go:191] Add success.
I0323 10:05:13.409837  543705 cpu.go:282] Add success.
W0323 10:05:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:05:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:05:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:05:13.420180  543705 net.go:648] Add success.
I0323 10:05:13.423024  543705 net.go:770] primary dev: ETH0
I0323 10:05:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:05:13.423051  543705 net.go:698] Add success.
I0323 10:05:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:05:14.455288  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:05:14.455300  543705 disk_worker.go:708] disk space is not compliant
W0323 10:05:14.455303  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:05:14.457236  543705 disk_worker.go:494] system disk:vda1
I0323 10:05:14.457272  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:05:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:05:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:05:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:05:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:05:23.409743  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:05:23.409759  543705 memory.go:184] no items to output this cycle
I0323 10:05:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 10:05:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:05:33.409805  543705 memory.go:184] no items to output this cycle
I0323 10:05:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 10:05:39.454967  543705 disk_info.go:125] begin check local disk info of client
I0323 10:05:39.457553  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:05:39.457559  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352b40 0xc000352b80]
E0323 10:05:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:05:43.410769  543705 memory.go:191] Add success.
I0323 10:05:43.409800  543705 cpu.go:282] Add success.
I0323 10:05:43.420536  543705 net.go:648] Add success.
I0323 10:05:43.423556  543705 net.go:770] primary dev: ETH0
I0323 10:05:43.423570  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:05:43.423583  543705 net.go:698] Add success.
I0323 10:05:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:05:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:05:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:05:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:05:53.409773  543705 memory.go:184] no items to output this cycle
I0323 10:05:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 10:06:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:06:03.409768  543705 memory.go:184] no items to output this cycle
I0323 10:06:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 10:06:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:06:13.409816  543705 memory.go:191] Add success.
I0323 10:06:13.409825  543705 cpu.go:282] Add success.
W0323 10:06:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:06:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:06:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:06:13.420158  543705 net.go:648] Add success.
I0323 10:06:13.422975  543705 net.go:770] primary dev: ETH0
I0323 10:06:13.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:06:13.423002  543705 net.go:698] Add success.
I0323 10:06:13.682955  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ed3560bd-d397-4031-8ac8-7c5481a30800","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:06:13.682990  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:06:14.454726  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:06:14.454949  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:06:14.454959  543705 disk_worker.go:708] disk space is not compliant
W0323 10:06:14.454961  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:06:14.456638  543705 disk_worker.go:494] system disk:vda1
I0323 10:06:14.456670  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:06:15.455613  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:06:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:06:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:06:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:06:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:06:23.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:06:23.409761  543705 memory.go:184] no items to output this cycle
I0323 10:06:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 10:06:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:06:33.409780  543705 memory.go:184] no items to output this cycle
I0323 10:06:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 10:06:39.457984  543705 disk_info.go:125] begin check local disk info of client
I0323 10:06:39.460551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:06:39.460558  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab1c0 0xc0001ab200]
I0323 10:06:40.337724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:06:40.337730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:06:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:06:43.410675  543705 memory.go:191] Add success.
I0323 10:06:43.409815  543705 cpu.go:282] Add success.
I0323 10:06:43.420403  543705 net.go:648] Add success.
I0323 10:06:43.422886  543705 net.go:770] primary dev: ETH0
I0323 10:06:43.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:06:43.422916  543705 net.go:698] Add success.
I0323 10:06:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:06:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:06:53.409777  543705 cpu.go:275] no items to output this cycle
I0323 10:06:53.409788  543705 memory.go:184] no items to output this cycle
E0323 10:07:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:07:03.409777  543705 memory.go:184] no items to output this cycle
I0323 10:07:03.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:07:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:07:13.409786  543705 memory.go:191] Add success.
W0323 10:07:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:07:13.409816  543705 cpu.go:282] Add success.
W0323 10:07:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:07:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:07:13.420198  543705 net.go:648] Add success.
I0323 10:07:13.423317  543705 net.go:770] primary dev: ETH0
I0323 10:07:13.423333  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:07:13.423346  543705 net.go:698] Add success.
I0323 10:07:13.452921  543705 event_worker.go:152] Polling the log file for events...
W0323 10:07:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:07:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 10:07:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:07:14.456171  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:07:14.456180  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:07:14.456186  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:07:14.456461  543705 disk_worker.go:494] system disk:vda1
I0323 10:07:14.456515  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:07:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:07:15.456819  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:07:16.457934  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:07:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:07:16.457986  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:07:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:07:16.472339  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:07:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:07:23.409785  543705 memory.go:184] no items to output this cycle
I0323 10:07:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 10:07:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:07:33.409781  543705 memory.go:184] no items to output this cycle
I0323 10:07:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 10:07:39.460994  543705 disk_info.go:125] begin check local disk info of client
I0323 10:07:39.463566  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:07:39.463572  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa7c0 0xc0001aa800]
E0323 10:07:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:07:43.410675  543705 memory.go:191] Add success.
I0323 10:07:43.409816  543705 cpu.go:282] Add success.
I0323 10:07:43.420409  543705 net.go:648] Add success.
I0323 10:07:43.427376  543705 net.go:770] primary dev: ETH0
I0323 10:07:43.427390  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:07:43.427403  543705 net.go:698] Add success.
I0323 10:07:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:07:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:07:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:07:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:07:53.409795  543705 memory.go:184] no items to output this cycle
I0323 10:07:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 10:08:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:08:03.409771  543705 memory.go:184] no items to output this cycle
I0323 10:08:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 10:08:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:08:13.409784  543705 memory.go:191] Add success.
I0323 10:08:13.409802  543705 cpu.go:282] Add success.
W0323 10:08:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:08:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:08:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:08:13.420115  543705 net.go:648] Add success.
I0323 10:08:13.422842  543705 net.go:770] primary dev: ETH0
I0323 10:08:13.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:08:13.422868  543705 net.go:698] Add success.
I0323 10:08:14.454953  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:08:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:08:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 10:08:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:08:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 10:08:14.456647  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:08:15.456011  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:08:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:08:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:08:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:08:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:08:23.409802  543705 memory.go:184] no items to output this cycle
I0323 10:08:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 10:08:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:08:33.409767  543705 memory.go:184] no items to output this cycle
I0323 10:08:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 10:08:39.464014  543705 disk_info.go:125] begin check local disk info of client
I0323 10:08:39.466580  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:08:39.466587  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5280 0xc0000c52c0]
E0323 10:08:43.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:08:43.410646  543705 memory.go:191] Add success.
I0323 10:08:43.409909  543705 cpu.go:282] Add success.
I0323 10:08:43.420428  543705 net.go:648] Add success.
I0323 10:08:43.422957  543705 net.go:770] primary dev: ETH0
I0323 10:08:43.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:08:43.422982  543705 net.go:698] Add success.
I0323 10:08:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:08:53.410699  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:08:53.410714  543705 memory.go:184] no items to output this cycle
I0323 10:08:53.410750  543705 cpu.go:275] no items to output this cycle
E0323 10:09:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:09:03.409813  543705 memory.go:184] no items to output this cycle
I0323 10:09:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 10:09:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:09:13.409786  543705 memory.go:191] Add success.
W0323 10:09:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:09:13.409816  543705 cpu.go:282] Add success.
W0323 10:09:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:09:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:09:13.420111  543705 net.go:648] Add success.
I0323 10:09:13.423257  543705 net.go:770] primary dev: ETH0
I0323 10:09:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:09:13.423283  543705 net.go:698] Add success.
I0323 10:09:13.695382  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e90fe18-3a08-4e02-a666-c4c3ff6f9a3f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:09:13.695417  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:09:14.454686  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:09:14.454923  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:09:14.454934  543705 disk_worker.go:708] disk space is not compliant
W0323 10:09:14.454936  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:09:14.457224  543705 disk_worker.go:494] system disk:vda1
I0323 10:09:14.457356  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:09:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:09:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:09:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:09:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:09:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:09:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:09:23.409772  543705 memory.go:184] no items to output this cycle
I0323 10:09:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 10:09:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:09:33.409774  543705 memory.go:184] no items to output this cycle
I0323 10:09:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 10:09:39.467030  543705 disk_info.go:125] begin check local disk info of client
I0323 10:09:39.469675  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:09:39.469683  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c42c0 0xc0000c4300]
I0323 10:09:40.341724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:09:40.341730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:09:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:09:43.410750  543705 memory.go:191] Add success.
I0323 10:09:43.409794  543705 cpu.go:282] Add success.
I0323 10:09:43.420477  543705 net.go:648] Add success.
I0323 10:09:43.423367  543705 net.go:770] primary dev: ETH0
I0323 10:09:43.423381  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:09:43.423394  543705 net.go:698] Add success.
I0323 10:09:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:09:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:09:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:09:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:09:53.409800  543705 memory.go:184] no items to output this cycle
I0323 10:09:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 10:10:03.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:10:03.409766  543705 memory.go:184] no items to output this cycle
I0323 10:10:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 10:10:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:10:13.409824  543705 memory.go:191] Add success.
I0323 10:10:13.409825  543705 cpu.go:282] Add success.
W0323 10:10:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:10:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:10:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:10:13.420191  543705 net.go:648] Add success.
I0323 10:10:13.423007  543705 net.go:770] primary dev: ETH0
I0323 10:10:13.423020  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:10:13.423033  543705 net.go:698] Add success.
I0323 10:10:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:10:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:10:14.455155  543705 disk_worker.go:708] disk space is not compliant
W0323 10:10:14.455158  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:10:14.456492  543705 disk_worker.go:494] system disk:vda1
I0323 10:10:14.456536  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:10:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:10:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:10:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:10:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:10:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:10:23.409778  543705 memory.go:184] no items to output this cycle
I0323 10:10:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 10:10:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:10:33.409795  543705 memory.go:184] no items to output this cycle
I0323 10:10:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 10:10:39.469770  543705 disk_info.go:125] begin check local disk info of client
I0323 10:10:39.472561  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:10:39.472567  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa380 0xc0001aa3c0]
E0323 10:10:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:10:43.410656  543705 memory.go:191] Add success.
I0323 10:10:43.409799  543705 cpu.go:282] Add success.
I0323 10:10:43.420392  543705 net.go:648] Add success.
I0323 10:10:43.423119  543705 net.go:770] primary dev: ETH0
I0323 10:10:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:10:43.423148  543705 net.go:698] Add success.
I0323 10:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:10:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:10:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:10:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:10:53.409794  543705 memory.go:184] no items to output this cycle
I0323 10:10:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 10:11:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:11:03.409770  543705 memory.go:184] no items to output this cycle
I0323 10:11:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 10:11:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:11:13.409823  543705 memory.go:191] Add success.
I0323 10:11:13.409825  543705 cpu.go:282] Add success.
W0323 10:11:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:11:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:11:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:11:13.420175  543705 net.go:648] Add success.
I0323 10:11:13.422961  543705 net.go:770] primary dev: ETH0
I0323 10:11:13.422975  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:11:13.422986  543705 net.go:698] Add success.
I0323 10:11:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:11:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:11:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 10:11:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:11:14.456502  543705 disk_worker.go:494] system disk:vda1
I0323 10:11:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:11:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:11:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:11:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:11:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:11:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:11:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:11:23.409779  543705 memory.go:184] no items to output this cycle
I0323 10:11:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 10:11:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:11:33.409765  543705 memory.go:184] no items to output this cycle
I0323 10:11:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 10:11:39.473066  543705 disk_info.go:125] begin check local disk info of client
I0323 10:11:39.475647  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:11:39.475654  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4240 0xc0000c4280]
E0323 10:11:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:11:43.410760  543705 memory.go:191] Add success.
I0323 10:11:43.409800  543705 cpu.go:282] Add success.
I0323 10:11:43.420320  543705 net.go:770] primary dev: ETH0
I0323 10:11:43.420333  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:11:43.420346  543705 net.go:698] Add success.
I0323 10:11:43.420599  543705 net.go:648] Add success.
I0323 10:11:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:11:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:11:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:11:53.410368  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:11:53.410384  543705 memory.go:184] no items to output this cycle
I0323 10:11:53.410392  543705 cpu.go:275] no items to output this cycle
E0323 10:12:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:12:03.409776  543705 memory.go:184] no items to output this cycle
I0323 10:12:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 10:12:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:12:13.409786  543705 memory.go:191] Add success.
I0323 10:12:13.409789  543705 cpu.go:282] Add success.
W0323 10:12:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:12:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:12:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:12:13.420042  543705 net.go:648] Add success.
I0323 10:12:13.422757  543705 net.go:770] primary dev: ETH0
I0323 10:12:13.422770  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:12:13.422782  543705 net.go:698] Add success.
I0323 10:12:13.468236  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6e1c2c13-5235-4b6b-9675-a007a927543e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:12:13.468271  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 10:12:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:12:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 10:12:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:12:14.455908  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:12:14.455916  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:12:14.455921  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:12:14.456841  543705 disk_worker.go:494] system disk:vda1
I0323 10:12:14.456878  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:12:15.456799  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:12:15.456807  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:12:16.457939  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:12:16.457939  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:12:16.457995  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:12:16.458012  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:12:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:12:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:12:23.409778  543705 memory.go:184] no items to output this cycle
I0323 10:12:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:12:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:12:33.409776  543705 memory.go:184] no items to output this cycle
I0323 10:12:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 10:12:39.476076  543705 disk_info.go:125] begin check local disk info of client
I0323 10:12:39.478664  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:12:39.478671  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae40 0xc00007ae80]
I0323 10:12:40.345721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:12:40.345727  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:12:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:12:43.410615  543705 memory.go:191] Add success.
I0323 10:12:43.409820  543705 cpu.go:282] Add success.
I0323 10:12:43.420312  543705 net.go:648] Add success.
I0323 10:12:43.422999  543705 net.go:770] primary dev: ETH0
I0323 10:12:43.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:12:43.423030  543705 net.go:698] Add success.
I0323 10:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:12:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:12:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:12:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:12:53.409776  543705 memory.go:184] no items to output this cycle
I0323 10:12:53.409781  543705 cpu.go:275] no items to output this cycle
E0323 10:13:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:13:03.409790  543705 memory.go:184] no items to output this cycle
I0323 10:13:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 10:13:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:13:13.409792  543705 memory.go:191] Add success.
I0323 10:13:13.409810  543705 cpu.go:282] Add success.
W0323 10:13:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:13:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:13:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:13:13.420179  543705 net.go:648] Add success.
I0323 10:13:13.423181  543705 net.go:770] primary dev: ETH0
I0323 10:13:13.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:13:13.423211  543705 net.go:698] Add success.
I0323 10:13:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:13:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:13:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 10:13:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:13:14.456514  543705 disk_worker.go:494] system disk:vda1
I0323 10:13:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:13:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:13:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:13:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:13:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:13:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:13:23.410742  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:13:23.410759  543705 memory.go:184] no items to output this cycle
I0323 10:13:23.410784  543705 cpu.go:275] no items to output this cycle
E0323 10:13:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:13:33.409793  543705 memory.go:184] no items to output this cycle
I0323 10:13:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 10:13:39.479038  543705 disk_info.go:125] begin check local disk info of client
I0323 10:13:39.481613  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:13:39.481620  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bf440 0xc0002bf480]
E0323 10:13:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:13:43.410673  543705 memory.go:191] Add success.
I0323 10:13:43.409820  543705 cpu.go:282] Add success.
I0323 10:13:43.420397  543705 net.go:648] Add success.
I0323 10:13:43.423343  543705 net.go:770] primary dev: ETH0
I0323 10:13:43.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:13:43.423374  543705 net.go:698] Add success.
I0323 10:13:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:13:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:13:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:13:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:13:53.409774  543705 memory.go:184] no items to output this cycle
I0323 10:13:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 10:14:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:14:03.409784  543705 memory.go:184] no items to output this cycle
I0323 10:14:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 10:14:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:14:13.409821  543705 memory.go:191] Add success.
I0323 10:14:13.409830  543705 cpu.go:282] Add success.
W0323 10:14:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:14:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:14:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:14:13.420275  543705 net.go:648] Add success.
I0323 10:14:13.423031  543705 net.go:770] primary dev: ETH0
I0323 10:14:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:14:13.423056  543705 net.go:698] Add success.
I0323 10:14:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:14:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:14:14.455179  543705 disk_worker.go:708] disk space is not compliant
W0323 10:14:14.455182  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:14:14.456516  543705 disk_worker.go:494] system disk:vda1
I0323 10:14:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:14:15.456016  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:14:16.458036  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:14:16.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:14:16.458127  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:14:16.472490  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:14:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:14:23.409764  543705 memory.go:184] no items to output this cycle
I0323 10:14:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 10:14:33.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:14:33.409762  543705 memory.go:184] no items to output this cycle
I0323 10:14:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 10:14:39.482097  543705 disk_info.go:125] begin check local disk info of client
I0323 10:14:39.484640  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:14:39.484647  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba80 0xc0001abc40]
E0323 10:14:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:14:43.410643  543705 memory.go:191] Add success.
I0323 10:14:43.409816  543705 cpu.go:282] Add success.
I0323 10:14:43.420433  543705 net.go:648] Add success.
I0323 10:14:43.423076  543705 net.go:770] primary dev: ETH0
I0323 10:14:43.423089  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:14:43.423103  543705 net.go:698] Add success.
I0323 10:14:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:14:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:14:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:14:53.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:14:53.409763  543705 memory.go:184] no items to output this cycle
I0323 10:14:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 10:15:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:15:03.409799  543705 memory.go:184] no items to output this cycle
I0323 10:15:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 10:15:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:15:13.409783  543705 memory.go:191] Add success.
W0323 10:15:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:15:13.409813  543705 cpu.go:282] Add success.
W0323 10:15:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:15:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:15:13.420161  543705 net.go:648] Add success.
I0323 10:15:13.423008  543705 net.go:770] primary dev: ETH0
I0323 10:15:13.423023  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:15:13.423037  543705 net.go:698] Add success.
I0323 10:15:13.494145  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"db39c4d0-1d32-4819-84ab-c70508385ca7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:15:13.494178  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:15:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:15:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:15:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 10:15:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:15:14.456636  543705 disk_worker.go:494] system disk:vda1
I0323 10:15:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:15:15.455999  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:15:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:15:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:15:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:15:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:15:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:15:23.409903  543705 memory.go:184] no items to output this cycle
I0323 10:15:23.409997  543705 cpu.go:275] no items to output this cycle
E0323 10:15:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:15:33.409766  543705 memory.go:184] no items to output this cycle
I0323 10:15:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 10:15:39.485129  543705 disk_info.go:125] begin check local disk info of client
I0323 10:15:39.487704  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:15:39.487711  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5240 0xc0000c5280]
I0323 10:15:40.349731  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:15:40.349737  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:15:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:15:43.410627  543705 memory.go:191] Add success.
I0323 10:15:43.409808  543705 cpu.go:282] Add success.
I0323 10:15:43.420311  543705 net.go:648] Add success.
I0323 10:15:43.423244  543705 net.go:770] primary dev: ETH0
I0323 10:15:43.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:15:43.423269  543705 net.go:698] Add success.
I0323 10:15:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:15:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:15:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:15:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:15:53.409770  543705 memory.go:184] no items to output this cycle
I0323 10:15:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 10:16:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:16:03.409793  543705 memory.go:184] no items to output this cycle
I0323 10:16:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 10:16:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:16:13.409779  543705 memory.go:191] Add success.
W0323 10:16:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:16:13.409807  543705 cpu.go:282] Add success.
W0323 10:16:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:16:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:16:13.420203  543705 net.go:648] Add success.
I0323 10:16:13.423110  543705 net.go:770] primary dev: ETH0
I0323 10:16:13.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:16:13.423137  543705 net.go:698] Add success.
I0323 10:16:14.455121  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:16:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:16:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 10:16:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:16:14.457933  543705 disk_worker.go:494] system disk:vda1
I0323 10:16:14.457964  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:16:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:16:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:16:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:16:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:16:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:16:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:16:23.409767  543705 memory.go:184] no items to output this cycle
I0323 10:16:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 10:16:33.409888  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:16:33.409916  543705 memory.go:184] no items to output this cycle
I0323 10:16:33.410050  543705 cpu.go:275] no items to output this cycle
I0323 10:16:39.488085  543705 disk_info.go:125] begin check local disk info of client
I0323 10:16:39.490660  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:16:39.490666  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5600 0xc0000c5640]
E0323 10:16:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:16:43.410668  543705 memory.go:191] Add success.
I0323 10:16:43.409821  543705 cpu.go:282] Add success.
I0323 10:16:43.420429  543705 net.go:648] Add success.
I0323 10:16:43.423336  543705 net.go:770] primary dev: ETH0
I0323 10:16:43.423351  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:16:43.423366  543705 net.go:698] Add success.
I0323 10:16:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:16:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:16:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:16:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:16:53.409799  543705 memory.go:184] no items to output this cycle
I0323 10:16:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 10:17:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:17:03.409769  543705 memory.go:184] no items to output this cycle
I0323 10:17:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 10:17:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:17:13.409799  543705 memory.go:191] Add success.
W0323 10:17:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:17:13.409831  543705 cpu.go:282] Add success.
W0323 10:17:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:17:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:17:13.420612  543705 net.go:648] Add success.
I0323 10:17:13.423336  543705 net.go:770] primary dev: ETH0
I0323 10:17:13.423356  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:17:13.423377  543705 net.go:698] Add success.
I0323 10:17:13.453047  543705 event_worker.go:152] Polling the log file for events...
W0323 10:17:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:17:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 10:17:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:17:14.456817  543705 disk_worker.go:494] system disk:vda1
I0323 10:17:14.456859  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:17:14.457208  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:17:14.457216  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:17:14.457221  543705 custom_config.go:64] query custom config with name: gpu
E0323 10:17:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:17:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:17:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:17:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:17:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:17:16.458022  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:17:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:17:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:17:23.409795  543705 memory.go:184] no items to output this cycle
I0323 10:17:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 10:17:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:17:33.409775  543705 memory.go:184] no items to output this cycle
I0323 10:17:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 10:17:39.491106  543705 disk_info.go:125] begin check local disk info of client
I0323 10:17:39.493686  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:17:39.493693  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003de000 0xc0003de040]
E0323 10:17:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:17:43.410702  543705 memory.go:191] Add success.
I0323 10:17:43.409802  543705 cpu.go:282] Add success.
I0323 10:17:43.420415  543705 net.go:648] Add success.
I0323 10:17:43.423426  543705 net.go:770] primary dev: ETH0
I0323 10:17:43.423440  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:17:43.423452  543705 net.go:698] Add success.
I0323 10:17:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:17:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:17:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:17:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:17:53.409797  543705 memory.go:184] no items to output this cycle
I0323 10:17:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 10:18:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:18:03.409774  543705 memory.go:184] no items to output this cycle
I0323 10:18:03.409783  543705 cpu.go:275] no items to output this cycle
E0323 10:18:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:18:13.409817  543705 memory.go:191] Add success.
I0323 10:18:13.409828  543705 cpu.go:282] Add success.
W0323 10:18:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:18:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:18:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:18:13.420148  543705 net.go:648] Add success.
I0323 10:18:13.423247  543705 net.go:770] primary dev: ETH0
I0323 10:18:13.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:18:13.423273  543705 net.go:698] Add success.
I0323 10:18:13.468182  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"06286169-dc9b-4b61-b7ee-be5bfe66165a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:18:13.468216  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:18:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:18:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:18:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 10:18:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:18:14.456494  543705 disk_worker.go:494] system disk:vda1
I0323 10:18:14.456540  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:18:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:18:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:18:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:18:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:18:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:18:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:18:23.409770  543705 memory.go:184] no items to output this cycle
I0323 10:18:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 10:18:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:18:33.409795  543705 memory.go:184] no items to output this cycle
I0323 10:18:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 10:18:39.494117  543705 disk_info.go:125] begin check local disk info of client
I0323 10:18:39.496658  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:18:39.496665  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e67c0 0xc0004e6800]
I0323 10:18:40.353724  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:18:40.353730  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:18:43.409861  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:18:43.410812  543705 memory.go:191] Add success.
I0323 10:18:43.410028  543705 cpu.go:282] Add success.
I0323 10:18:43.419742  543705 net.go:648] Add success.
I0323 10:18:43.422787  543705 net.go:770] primary dev: ETH0
I0323 10:18:43.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:18:43.422812  543705 net.go:698] Add success.
I0323 10:18:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:18:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:18:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:18:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:18:53.409794  543705 memory.go:184] no items to output this cycle
I0323 10:18:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 10:19:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:19:03.409773  543705 memory.go:184] no items to output this cycle
I0323 10:19:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 10:19:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:19:13.409797  543705 memory.go:191] Add success.
I0323 10:19:13.409797  543705 cpu.go:282] Add success.
W0323 10:19:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:19:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:19:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:19:13.420246  543705 net.go:648] Add success.
I0323 10:19:13.423003  543705 net.go:770] primary dev: ETH0
I0323 10:19:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:19:13.423032  543705 net.go:698] Add success.
I0323 10:19:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:19:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:19:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 10:19:14.455183  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:19:14.456518  543705 disk_worker.go:494] system disk:vda1
I0323 10:19:14.456565  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:19:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:19:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:19:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:19:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:19:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:19:23.409775  543705 memory.go:184] no items to output this cycle
I0323 10:19:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 10:19:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:19:33.409774  543705 memory.go:184] no items to output this cycle
I0323 10:19:33.409776  543705 cpu.go:275] no items to output this cycle
I0323 10:19:39.497127  543705 disk_info.go:125] begin check local disk info of client
I0323 10:19:39.499709  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:19:39.499716  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328180 0xc0003281c0]
E0323 10:19:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:19:43.410682  543705 memory.go:191] Add success.
I0323 10:19:43.409804  543705 cpu.go:282] Add success.
I0323 10:19:43.420402  543705 net.go:648] Add success.
I0323 10:19:43.423251  543705 net.go:770] primary dev: ETH0
I0323 10:19:43.423266  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:19:43.423280  543705 net.go:698] Add success.
I0323 10:19:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:19:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:19:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:19:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:19:53.409763  543705 memory.go:184] no items to output this cycle
I0323 10:19:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 10:20:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:20:03.409800  543705 memory.go:184] no items to output this cycle
I0323 10:20:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 10:20:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:20:13.409788  543705 memory.go:191] Add success.
I0323 10:20:13.409807  543705 cpu.go:282] Add success.
W0323 10:20:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:20:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:20:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:20:13.420120  543705 net.go:648] Add success.
I0323 10:20:13.422965  543705 net.go:770] primary dev: ETH0
I0323 10:20:13.422978  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:20:13.422991  543705 net.go:698] Add success.
I0323 10:20:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:20:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:20:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 10:20:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:20:14.456584  543705 disk_worker.go:494] system disk:vda1
I0323 10:20:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:20:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:20:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:20:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:20:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:20:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:20:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:20:23.409777  543705 cpu.go:275] no items to output this cycle
I0323 10:20:23.409788  543705 memory.go:184] no items to output this cycle
E0323 10:20:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:20:33.409803  543705 memory.go:184] no items to output this cycle
I0323 10:20:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 10:20:39.499808  543705 disk_info.go:125] begin check local disk info of client
I0323 10:20:39.502426  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:20:39.502434  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f04c0 0xc0003f0500]
E0323 10:20:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:20:43.410723  543705 memory.go:191] Add success.
I0323 10:20:43.409825  543705 cpu.go:282] Add success.
I0323 10:20:43.420522  543705 net.go:648] Add success.
I0323 10:20:43.423311  543705 net.go:770] primary dev: ETH0
I0323 10:20:43.423326  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:20:43.423341  543705 net.go:698] Add success.
I0323 10:20:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:20:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:20:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:20:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:20:53.409765  543705 memory.go:184] no items to output this cycle
I0323 10:20:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 10:21:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:21:03.409805  543705 memory.go:184] no items to output this cycle
I0323 10:21:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 10:21:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:21:13.409823  543705 memory.go:191] Add success.
I0323 10:21:13.409833  543705 cpu.go:282] Add success.
W0323 10:21:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:21:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:21:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:21:13.420193  543705 net.go:648] Add success.
I0323 10:21:13.423273  543705 net.go:770] primary dev: ETH0
I0323 10:21:13.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:21:13.423309  543705 net.go:698] Add success.
I0323 10:21:13.468136  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"54659dc5-39d6-4768-9439-647b8b9fe3a2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:21:13.468173  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:21:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:21:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:21:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 10:21:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:21:14.456643  543705 disk_worker.go:494] system disk:vda1
I0323 10:21:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:21:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:21:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:21:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:21:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:21:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:21:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:21:23.409773  543705 memory.go:184] no items to output this cycle
I0323 10:21:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:21:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:21:33.409776  543705 memory.go:184] no items to output this cycle
I0323 10:21:33.409780  543705 cpu.go:275] no items to output this cycle
I0323 10:21:39.503201  543705 disk_info.go:125] begin check local disk info of client
I0323 10:21:39.505849  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:21:39.505857  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004c4200 0xc0004c4240]
I0323 10:21:40.354779  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:21:40.354785  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:21:43.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:21:43.410788  543705 memory.go:191] Add success.
I0323 10:21:43.410003  543705 cpu.go:282] Add success.
I0323 10:21:43.419712  543705 net.go:648] Add success.
I0323 10:21:43.422398  543705 net.go:770] primary dev: ETH0
I0323 10:21:43.422411  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:21:43.422423  543705 net.go:698] Add success.
I0323 10:21:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:21:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:21:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:21:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:21:53.409795  543705 memory.go:184] no items to output this cycle
I0323 10:21:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 10:22:03.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:22:03.409766  543705 memory.go:184] no items to output this cycle
I0323 10:22:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 10:22:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:22:13.409813  543705 memory.go:191] Add success.
I0323 10:22:13.409827  543705 cpu.go:282] Add success.
W0323 10:22:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:22:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:22:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:22:13.420169  543705 net.go:648] Add success.
I0323 10:22:13.422998  543705 net.go:770] primary dev: ETH0
I0323 10:22:13.423012  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:22:13.423028  543705 net.go:698] Add success.
W0323 10:22:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:22:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0323 10:22:14.455166  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:22:14.456130  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:22:14.456136  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:22:14.456141  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:22:14.456954  543705 disk_worker.go:494] system disk:vda1
I0323 10:22:14.456997  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:22:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:22:15.456858  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:22:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:22:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:22:16.457996  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:22:16.458013  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:22:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:22:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:22:23.409771  543705 memory.go:184] no items to output this cycle
I0323 10:22:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 10:22:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:22:33.409777  543705 memory.go:184] no items to output this cycle
I0323 10:22:33.409778  543705 cpu.go:275] no items to output this cycle
I0323 10:22:39.506169  543705 disk_info.go:125] begin check local disk info of client
I0323 10:22:39.508807  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:22:39.508815  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e2d40 0xc0003e2d80]
E0323 10:22:43.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:22:43.410655  543705 memory.go:191] Add success.
I0323 10:22:43.409973  543705 cpu.go:282] Add success.
I0323 10:22:43.419712  543705 net.go:648] Add success.
I0323 10:22:43.422241  543705 net.go:770] primary dev: ETH0
I0323 10:22:43.422254  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:22:43.422266  543705 net.go:698] Add success.
I0323 10:22:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:22:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:22:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:22:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:22:53.409775  543705 memory.go:184] no items to output this cycle
I0323 10:22:53.409778  543705 cpu.go:275] no items to output this cycle
E0323 10:23:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:23:03.409771  543705 memory.go:184] no items to output this cycle
I0323 10:23:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 10:23:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:23:13.409804  543705 memory.go:191] Add success.
I0323 10:23:13.409806  543705 cpu.go:282] Add success.
W0323 10:23:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:23:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:23:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:23:13.420293  543705 net.go:648] Add success.
I0323 10:23:13.422917  543705 net.go:770] primary dev: ETH0
I0323 10:23:13.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:23:13.422942  543705 net.go:698] Add success.
I0323 10:23:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:23:14.455110  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:23:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 10:23:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:23:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 10:23:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:23:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:23:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:23:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:23:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:23:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:23:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:23:23.409773  543705 memory.go:184] no items to output this cycle
I0323 10:23:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 10:23:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:23:33.409776  543705 memory.go:184] no items to output this cycle
I0323 10:23:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 10:23:39.509187  543705 disk_info.go:125] begin check local disk info of client
I0323 10:23:39.511788  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:23:39.511795  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049ef80 0xc00049efc0]
E0323 10:23:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:23:43.410707  543705 memory.go:191] Add success.
I0323 10:23:43.409819  543705 cpu.go:282] Add success.
I0323 10:23:43.420772  543705 net.go:648] Add success.
I0323 10:23:43.423574  543705 net.go:770] primary dev: ETH0
I0323 10:23:43.423587  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:23:43.423599  543705 net.go:698] Add success.
I0323 10:23:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:23:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:23:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:23:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:23:53.409780  543705 memory.go:184] no items to output this cycle
I0323 10:23:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 10:24:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:24:03.409800  543705 memory.go:184] no items to output this cycle
I0323 10:24:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 10:24:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:24:13.409821  543705 memory.go:191] Add success.
I0323 10:24:13.409837  543705 cpu.go:282] Add success.
W0323 10:24:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:24:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:24:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:24:13.420184  543705 net.go:648] Add success.
I0323 10:24:13.423031  543705 net.go:770] primary dev: ETH0
I0323 10:24:13.423045  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:24:13.423057  543705 net.go:698] Add success.
I0323 10:24:13.468642  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1dc9c662-47ee-4449-8db7-7adda3c38e6c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:24:13.468675  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:24:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:24:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:24:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0323 10:24:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:24:14.456784  543705 disk_worker.go:494] system disk:vda1
I0323 10:24:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:24:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:24:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:24:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:24:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:24:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:24:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:24:23.409811  543705 memory.go:184] no items to output this cycle
I0323 10:24:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 10:24:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:24:33.409785  543705 memory.go:184] no items to output this cycle
I0323 10:24:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 10:24:39.512217  543705 disk_info.go:125] begin check local disk info of client
I0323 10:24:39.514819  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:24:39.514826  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d2100 0xc0003d2140]
I0323 10:24:40.357748  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:24:40.357756  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:24:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:24:43.410955  543705 memory.go:191] Add success.
I0323 10:24:43.409933  543705 cpu.go:282] Add success.
I0323 10:24:43.419776  543705 net.go:648] Add success.
I0323 10:24:43.422566  543705 net.go:770] primary dev: ETH0
I0323 10:24:43.422580  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:24:43.422592  543705 net.go:698] Add success.
I0323 10:24:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:24:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:24:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:24:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:24:53.409776  543705 cpu.go:275] no items to output this cycle
I0323 10:24:53.409786  543705 memory.go:184] no items to output this cycle
E0323 10:25:03.409824  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:25:03.409849  543705 memory.go:184] no items to output this cycle
I0323 10:25:03.409887  543705 cpu.go:275] no items to output this cycle
E0323 10:25:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:25:13.409828  543705 memory.go:191] Add success.
I0323 10:25:13.409829  543705 cpu.go:282] Add success.
W0323 10:25:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:25:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:25:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:25:13.420218  543705 net.go:648] Add success.
I0323 10:25:13.423166  543705 net.go:770] primary dev: ETH0
I0323 10:25:13.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:25:13.423191  543705 net.go:698] Add success.
I0323 10:25:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:25:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:25:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 10:25:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:25:14.456595  543705 disk_worker.go:494] system disk:vda1
I0323 10:25:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:25:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:25:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:25:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:25:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:25:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:25:23.409794  543705 memory.go:184] no items to output this cycle
I0323 10:25:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 10:25:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:25:33.409791  543705 memory.go:184] no items to output this cycle
I0323 10:25:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 10:25:39.515222  543705 disk_info.go:125] begin check local disk info of client
I0323 10:25:39.517779  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:25:39.517786  543705 disk_info.go:196] parse disk info done, disk is : [0xc000491680 0xc0004916c0]
E0323 10:25:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:25:43.410651  543705 memory.go:191] Add success.
I0323 10:25:43.409829  543705 cpu.go:282] Add success.
I0323 10:25:43.420626  543705 net.go:648] Add success.
I0323 10:25:43.423390  543705 net.go:770] primary dev: ETH0
I0323 10:25:43.423403  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:25:43.423415  543705 net.go:698] Add success.
I0323 10:25:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:25:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:25:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:25:53.410413  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:25:53.410429  543705 cpu.go:275] no items to output this cycle
I0323 10:25:53.410435  543705 memory.go:184] no items to output this cycle
E0323 10:26:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:26:03.409771  543705 memory.go:184] no items to output this cycle
I0323 10:26:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 10:26:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:26:13.409806  543705 memory.go:191] Add success.
I0323 10:26:13.409815  543705 cpu.go:282] Add success.
W0323 10:26:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:26:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:26:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:26:13.420366  543705 net.go:648] Add success.
I0323 10:26:13.423247  543705 net.go:770] primary dev: ETH0
I0323 10:26:13.423259  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:26:13.423272  543705 net.go:698] Add success.
I0323 10:26:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:26:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:26:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 10:26:14.455178  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:26:14.456530  543705 disk_worker.go:494] system disk:vda1
I0323 10:26:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:26:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:26:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:26:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:26:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:26:16.472662  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:26:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:26:23.409787  543705 memory.go:184] no items to output this cycle
I0323 10:26:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 10:26:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:26:33.409780  543705 memory.go:184] no items to output this cycle
I0323 10:26:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 10:26:39.518236  543705 disk_info.go:125] begin check local disk info of client
I0323 10:26:39.520806  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:26:39.520817  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369640 0xc000369680]
E0323 10:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:26:43.410773  543705 memory.go:191] Add success.
I0323 10:26:43.409822  543705 cpu.go:282] Add success.
I0323 10:26:43.420629  543705 net.go:648] Add success.
I0323 10:26:43.423547  543705 net.go:770] primary dev: ETH0
I0323 10:26:43.423561  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:26:43.423573  543705 net.go:698] Add success.
I0323 10:26:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:26:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:26:46.458060  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:26:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:26:53.409776  543705 memory.go:184] no items to output this cycle
I0323 10:26:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 10:27:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:27:03.409801  543705 memory.go:184] no items to output this cycle
I0323 10:27:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 10:27:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:27:13.409810  543705 memory.go:191] Add success.
I0323 10:27:13.409831  543705 cpu.go:282] Add success.
W0323 10:27:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:27:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:27:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:27:13.420339  543705 net.go:648] Add success.
I0323 10:27:13.429159  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 10:27:13.429236  543705 net.go:770] primary dev: ETH0
I0323 10:27:13.429250  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:27:13.429265  543705 net.go:698] Add success.
I0323 10:27:13.452776  543705 event_worker.go:152] Polling the log file for events...
I0323 10:27:13.463327  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8e2f84e3-f51e-4258-88cb-cbb82d292088","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:27:13.463362  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 10:27:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:27:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 10:27:14.455195  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:27:14.456764  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:27:14.456773  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:27:14.456778  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:27:14.456844  543705 disk_worker.go:494] system disk:vda1
I0323 10:27:14.456875  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:27:15.456802  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:27:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:27:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:27:16.457987  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:27:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:27:16.458046  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:27:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:27:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:27:23.409801  543705 memory.go:184] no items to output this cycle
I0323 10:27:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 10:27:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:27:33.409799  543705 memory.go:184] no items to output this cycle
I0323 10:27:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 10:27:39.521252  543705 disk_info.go:125] begin check local disk info of client
I0323 10:27:39.523821  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:27:39.523828  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052a7c0 0xc00052a800]
I0323 10:27:40.357890  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:27:40.357895  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:27:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:27:43.410650  543705 memory.go:191] Add success.
I0323 10:27:43.409848  543705 cpu.go:282] Add success.
I0323 10:27:43.419740  543705 net.go:648] Add success.
I0323 10:27:43.422531  543705 net.go:770] primary dev: ETH0
I0323 10:27:43.422544  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:27:43.422555  543705 net.go:698] Add success.
I0323 10:27:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:27:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:27:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:27:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:27:53.409828  543705 memory.go:184] no items to output this cycle
I0323 10:27:53.409831  543705 cpu.go:275] no items to output this cycle
E0323 10:28:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:28:03.409778  543705 memory.go:184] no items to output this cycle
I0323 10:28:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 10:28:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:28:13.409795  543705 memory.go:191] Add success.
I0323 10:28:13.409804  543705 cpu.go:282] Add success.
W0323 10:28:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:28:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:28:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:28:13.420155  543705 net.go:648] Add success.
I0323 10:28:13.422825  543705 net.go:770] primary dev: ETH0
I0323 10:28:13.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:28:13.422853  543705 net.go:698] Add success.
I0323 10:28:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:28:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:28:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 10:28:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:28:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 10:28:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:28:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:28:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:28:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:28:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:28:16.472367  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:28:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:28:23.409765  543705 memory.go:184] no items to output this cycle
I0323 10:28:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 10:28:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:28:33.409777  543705 memory.go:184] no items to output this cycle
I0323 10:28:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 10:28:39.523915  543705 disk_info.go:125] begin check local disk info of client
I0323 10:28:39.526535  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:28:39.526542  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be140 0xc0003be180]
E0323 10:28:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:28:43.409828  543705 cpu.go:282] Add success.
I0323 10:28:43.410847  543705 memory.go:191] Add success.
I0323 10:28:43.419719  543705 net.go:648] Add success.
I0323 10:28:43.422556  543705 net.go:770] primary dev: ETH0
I0323 10:28:43.422569  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:28:43.422580  543705 net.go:698] Add success.
I0323 10:28:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:28:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:28:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:28:53.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:28:53.409759  543705 memory.go:184] no items to output this cycle
I0323 10:28:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 10:29:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:29:03.409804  543705 memory.go:184] no items to output this cycle
I0323 10:29:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 10:29:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:29:13.409801  543705 memory.go:191] Add success.
I0323 10:29:13.409804  543705 cpu.go:282] Add success.
W0323 10:29:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:29:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:29:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:29:13.420141  543705 net.go:648] Add success.
I0323 10:29:13.423324  543705 net.go:770] primary dev: ETH0
I0323 10:29:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:29:13.423351  543705 net.go:698] Add success.
I0323 10:29:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:29:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:29:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 10:29:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:29:14.456528  543705 disk_worker.go:494] system disk:vda1
I0323 10:29:14.456576  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:29:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:29:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:29:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:29:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:29:16.472482  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:29:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:29:23.409805  543705 memory.go:184] no items to output this cycle
I0323 10:29:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 10:29:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:29:33.409819  543705 memory.go:184] no items to output this cycle
I0323 10:29:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 10:29:39.527339  543705 disk_info.go:125] begin check local disk info of client
I0323 10:29:39.529968  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:29:39.529975  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498e40 0xc000498e80]
E0323 10:29:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:29:43.410695  543705 memory.go:191] Add success.
I0323 10:29:43.409837  543705 cpu.go:282] Add success.
I0323 10:29:43.420387  543705 net.go:648] Add success.
I0323 10:29:43.423408  543705 net.go:770] primary dev: ETH0
I0323 10:29:43.423420  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:29:43.423432  543705 net.go:698] Add success.
I0323 10:29:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:29:46.458187  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:29:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:29:53.409770  543705 memory.go:184] no items to output this cycle
I0323 10:29:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 10:30:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:30:03.409772  543705 memory.go:184] no items to output this cycle
I0323 10:30:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 10:30:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:30:13.409826  543705 memory.go:191] Add success.
I0323 10:30:13.409832  543705 cpu.go:282] Add success.
W0323 10:30:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:30:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:30:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:30:13.420144  543705 net.go:648] Add success.
I0323 10:30:13.422772  543705 net.go:770] primary dev: ETH0
I0323 10:30:13.422785  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:30:13.422806  543705 net.go:698] Add success.
I0323 10:30:13.463901  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e6352e23-245c-4dd0-a99c-6851b0b428c2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:30:13.463932  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:30:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:30:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:30:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 10:30:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:30:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 10:30:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:30:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:30:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:30:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:30:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:30:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:30:23.409774  543705 memory.go:184] no items to output this cycle
I0323 10:30:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 10:30:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:30:33.409773  543705 memory.go:184] no items to output this cycle
I0323 10:30:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 10:30:39.530062  543705 disk_info.go:125] begin check local disk info of client
I0323 10:30:39.532752  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:30:39.532758  543705 disk_info.go:196] parse disk info done, disk is : [0xc000475a40 0xc000475a80]
I0323 10:30:40.361743  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:30:40.361750  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:30:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:30:43.410660  543705 memory.go:191] Add success.
I0323 10:30:43.409807  543705 cpu.go:282] Add success.
I0323 10:30:43.420433  543705 net.go:648] Add success.
I0323 10:30:43.423207  543705 net.go:770] primary dev: ETH0
I0323 10:30:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:30:43.423236  543705 net.go:698] Add success.
I0323 10:30:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:30:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:30:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:30:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:30:53.409786  543705 memory.go:184] no items to output this cycle
I0323 10:30:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 10:31:03.410294  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:31:03.410317  543705 memory.go:184] no items to output this cycle
I0323 10:31:03.410333  543705 cpu.go:275] no items to output this cycle
E0323 10:31:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:31:13.409801  543705 memory.go:191] Add success.
I0323 10:31:13.409809  543705 cpu.go:282] Add success.
W0323 10:31:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:31:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:31:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:31:13.420218  543705 net.go:648] Add success.
I0323 10:31:13.423080  543705 net.go:770] primary dev: ETH0
I0323 10:31:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:31:13.423111  543705 net.go:698] Add success.
I0323 10:31:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:31:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:31:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 10:31:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:31:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 10:31:14.456553  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:31:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:31:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:31:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:31:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:31:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:31:23.409801  543705 memory.go:184] no items to output this cycle
I0323 10:31:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 10:31:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:31:33.409797  543705 memory.go:184] no items to output this cycle
I0323 10:31:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 10:31:39.532844  543705 disk_info.go:125] begin check local disk info of client
I0323 10:31:39.535441  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:31:39.535448  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8600 0xc0002a8640]
E0323 10:31:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:31:43.410791  543705 memory.go:191] Add success.
I0323 10:31:43.409824  543705 cpu.go:282] Add success.
I0323 10:31:43.420508  543705 net.go:648] Add success.
I0323 10:31:43.423613  543705 net.go:770] primary dev: ETH0
I0323 10:31:43.423626  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:31:43.423639  543705 net.go:698] Add success.
I0323 10:31:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:31:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:31:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:31:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:31:53.409781  543705 memory.go:184] no items to output this cycle
I0323 10:31:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 10:32:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:32:03.409790  543705 memory.go:184] no items to output this cycle
I0323 10:32:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 10:32:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:32:13.409791  543705 memory.go:191] Add success.
W0323 10:32:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:32:13.409820  543705 cpu.go:282] Add success.
W0323 10:32:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:32:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:32:13.419947  543705 net.go:770] primary dev: ETH0
I0323 10:32:13.419961  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:32:13.419973  543705 net.go:698] Add success.
I0323 10:32:13.420323  543705 net.go:648] Add success.
W0323 10:32:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:32:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 10:32:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:32:14.456781  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:32:14.456790  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:32:14.456796  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:32:14.456925  543705 disk_worker.go:494] system disk:vda1
I0323 10:32:14.456953  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:32:15.456807  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:32:15.456816  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:32:16.458097  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:32:16.458164  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 10:32:16.458176  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:32:16.458185  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:32:16.472559  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:32:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:32:23.409798  543705 memory.go:184] no items to output this cycle
I0323 10:32:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 10:32:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:32:33.409822  543705 memory.go:184] no items to output this cycle
I0323 10:32:33.409835  543705 cpu.go:275] no items to output this cycle
I0323 10:32:39.536393  543705 disk_info.go:125] begin check local disk info of client
I0323 10:32:39.539001  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:32:39.539009  543705 disk_info.go:196] parse disk info done, disk is : [0xc000474640 0xc000474680]
E0323 10:32:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:32:43.410799  543705 memory.go:191] Add success.
I0323 10:32:43.409826  543705 cpu.go:282] Add success.
I0323 10:32:43.419715  543705 net.go:648] Add success.
I0323 10:32:43.422911  543705 net.go:770] primary dev: ETH0
I0323 10:32:43.422924  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:32:43.422936  543705 net.go:698] Add success.
I0323 10:32:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:32:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:32:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:32:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:32:53.409803  543705 memory.go:184] no items to output this cycle
I0323 10:32:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 10:33:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:33:03.409802  543705 memory.go:184] no items to output this cycle
I0323 10:33:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 10:33:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:33:13.409786  543705 memory.go:191] Add success.
I0323 10:33:13.409820  543705 cpu.go:282] Add success.
W0323 10:33:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:33:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:33:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:33:13.420097  543705 net.go:648] Add success.
I0323 10:33:13.423292  543705 net.go:770] primary dev: ETH0
I0323 10:33:13.423305  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:33:13.423317  543705 net.go:698] Add success.
I0323 10:33:13.468087  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cdcfecc0-01e2-495f-bba0-0430a14d69e7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:33:13.468123  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:33:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:33:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:33:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 10:33:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:33:14.456585  543705 disk_worker.go:494] system disk:vda1
I0323 10:33:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:33:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:33:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:33:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:33:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:33:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:33:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:33:23.409798  543705 memory.go:184] no items to output this cycle
I0323 10:33:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 10:33:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:33:33.409796  543705 memory.go:184] no items to output this cycle
I0323 10:33:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 10:33:39.539098  543705 disk_info.go:125] begin check local disk info of client
I0323 10:33:39.541718  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:33:39.541725  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005075c0 0xc000507600]
I0323 10:33:40.365749  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:33:40.365757  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:33:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:33:43.410701  543705 memory.go:191] Add success.
I0323 10:33:43.409815  543705 cpu.go:282] Add success.
I0323 10:33:43.420450  543705 net.go:648] Add success.
I0323 10:33:43.423464  543705 net.go:770] primary dev: ETH0
I0323 10:33:43.423479  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:33:43.423493  543705 net.go:698] Add success.
I0323 10:33:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:33:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:33:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:33:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:33:53.409814  543705 memory.go:184] no items to output this cycle
I0323 10:33:53.409834  543705 cpu.go:275] no items to output this cycle
E0323 10:34:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:34:03.409800  543705 memory.go:184] no items to output this cycle
I0323 10:34:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 10:34:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:34:13.409801  543705 cpu.go:282] Add success.
I0323 10:34:13.409810  543705 memory.go:191] Add success.
W0323 10:34:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:34:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:34:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:34:13.420220  543705 net.go:648] Add success.
I0323 10:34:13.423150  543705 net.go:770] primary dev: ETH0
I0323 10:34:13.423165  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:34:13.423180  543705 net.go:698] Add success.
I0323 10:34:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:34:14.455269  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:34:14.455284  543705 disk_worker.go:708] disk space is not compliant
W0323 10:34:14.455287  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:34:14.457721  543705 disk_worker.go:494] system disk:vda1
I0323 10:34:14.457768  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:34:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:34:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:34:16.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:34:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:34:16.472552  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:34:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:34:23.409783  543705 memory.go:184] no items to output this cycle
I0323 10:34:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 10:34:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:34:33.409800  543705 memory.go:184] no items to output this cycle
I0323 10:34:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 10:34:39.541814  543705 disk_info.go:125] begin check local disk info of client
I0323 10:34:39.544454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:34:39.544462  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ac80 0xc00007acc0]
E0323 10:34:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:34:43.410651  543705 memory.go:191] Add success.
I0323 10:34:43.409829  543705 cpu.go:282] Add success.
I0323 10:34:43.420424  543705 net.go:648] Add success.
I0323 10:34:43.423159  543705 net.go:770] primary dev: ETH0
I0323 10:34:43.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:34:43.423185  543705 net.go:698] Add success.
I0323 10:34:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:34:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:34:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:34:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:34:53.409785  543705 memory.go:184] no items to output this cycle
I0323 10:34:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 10:35:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:35:03.409795  543705 cpu.go:275] no items to output this cycle
I0323 10:35:03.409802  543705 memory.go:184] no items to output this cycle
E0323 10:35:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:35:13.409806  543705 memory.go:191] Add success.
I0323 10:35:13.409806  543705 cpu.go:282] Add success.
W0323 10:35:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:35:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:35:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:35:13.420174  543705 net.go:648] Add success.
I0323 10:35:13.422810  543705 net.go:770] primary dev: ETH0
I0323 10:35:13.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:35:13.422835  543705 net.go:698] Add success.
I0323 10:35:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:35:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:35:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 10:35:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:35:14.456537  543705 disk_worker.go:494] system disk:vda1
I0323 10:35:14.456584  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:35:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:35:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:35:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:35:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:35:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:35:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:35:23.409791  543705 memory.go:184] no items to output this cycle
I0323 10:35:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 10:35:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:35:33.409888  543705 memory.go:184] no items to output this cycle
I0323 10:35:33.409937  543705 cpu.go:275] no items to output this cycle
I0323 10:35:39.545435  543705 disk_info.go:125] begin check local disk info of client
I0323 10:35:39.548018  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:35:39.548024  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467840 0xc000467880]
E0323 10:35:43.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:35:43.409863  543705 memory.go:191] Add success.
I0323 10:35:43.410271  543705 cpu.go:282] Add success.
I0323 10:35:43.419993  543705 net.go:648] Add success.
I0323 10:35:43.420988  543705 net.go:770] primary dev: ETH0
I0323 10:35:43.421009  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:35:43.421028  543705 net.go:698] Add success.
I0323 10:35:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:35:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:35:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:35:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:35:53.409780  543705 memory.go:184] no items to output this cycle
I0323 10:35:53.409781  543705 cpu.go:275] no items to output this cycle
E0323 10:36:03.410712  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:36:03.410734  543705 memory.go:184] no items to output this cycle
I0323 10:36:03.410747  543705 cpu.go:275] no items to output this cycle
E0323 10:36:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:36:13.409829  543705 memory.go:191] Add success.
I0323 10:36:13.409829  543705 cpu.go:282] Add success.
W0323 10:36:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:36:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:36:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:36:13.420446  543705 net.go:648] Add success.
I0323 10:36:13.423353  543705 net.go:770] primary dev: ETH0
I0323 10:36:13.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:36:13.423389  543705 net.go:698] Add success.
I0323 10:36:14.186084  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"22d17133-a538-48e9-9616-eba3f5c70591","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:36:14.186133  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:36:14.454499  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:36:14.454712  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:36:14.454723  543705 disk_worker.go:708] disk space is not compliant
W0323 10:36:14.454726  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:36:14.456084  543705 disk_worker.go:494] system disk:vda1
I0323 10:36:14.456127  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:36:15.455654  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:36:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:36:16.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:36:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:36:16.472514  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:36:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:36:23.409807  543705 memory.go:184] no items to output this cycle
I0323 10:36:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 10:36:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:36:33.409778  543705 memory.go:184] no items to output this cycle
I0323 10:36:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 10:36:39.548380  543705 disk_info.go:125] begin check local disk info of client
I0323 10:36:39.550945  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:36:39.550952  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa100 0xc0001aa140]
I0323 10:36:40.366777  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:36:40.366782  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:36:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:36:43.410564  543705 memory.go:191] Add success.
I0323 10:36:43.409809  543705 cpu.go:282] Add success.
I0323 10:36:43.420295  543705 net.go:648] Add success.
I0323 10:36:43.422704  543705 net.go:770] primary dev: ETH0
I0323 10:36:43.422717  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:36:43.422730  543705 net.go:698] Add success.
I0323 10:36:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:36:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:36:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:36:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:36:53.409778  543705 memory.go:184] no items to output this cycle
I0323 10:36:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:37:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:37:03.409818  543705 memory.go:184] no items to output this cycle
I0323 10:37:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 10:37:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:37:13.409801  543705 memory.go:191] Add success.
I0323 10:37:13.409807  543705 cpu.go:282] Add success.
W0323 10:37:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:37:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:37:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:37:13.420276  543705 net.go:648] Add success.
I0323 10:37:13.423148  543705 net.go:770] primary dev: ETH0
I0323 10:37:13.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:37:13.423177  543705 net.go:698] Add success.
I0323 10:37:13.453739  543705 event_worker.go:152] Polling the log file for events...
W0323 10:37:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:37:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 10:37:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:37:14.456927  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:37:14.456937  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:37:14.456943  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:37:14.456991  543705 disk_worker.go:494] system disk:vda1
I0323 10:37:14.457033  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:37:15.456936  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:37:15.456947  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:37:16.458109  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:37:16.458169  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:37:16.458189  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:37:16.458237  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:37:16.472565  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:37:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:37:23.409814  543705 memory.go:184] no items to output this cycle
I0323 10:37:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 10:37:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:37:33.409813  543705 memory.go:184] no items to output this cycle
I0323 10:37:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 10:37:39.551039  543705 disk_info.go:125] begin check local disk info of client
I0323 10:37:39.553657  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:37:39.553666  543705 disk_info.go:196] parse disk info done, disk is : [0xc00053f980 0xc00053f9c0]
E0323 10:37:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:37:43.410687  543705 memory.go:191] Add success.
I0323 10:37:43.409815  543705 cpu.go:282] Add success.
I0323 10:37:43.420398  543705 net.go:648] Add success.
I0323 10:37:43.423062  543705 net.go:770] primary dev: ETH0
I0323 10:37:43.423077  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:37:43.423091  543705 net.go:698] Add success.
I0323 10:37:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:37:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:37:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:37:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:37:53.409784  543705 memory.go:184] no items to output this cycle
I0323 10:37:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 10:38:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:38:03.409777  543705 memory.go:184] no items to output this cycle
I0323 10:38:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 10:38:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:38:13.409811  543705 memory.go:191] Add success.
I0323 10:38:13.409812  543705 cpu.go:282] Add success.
W0323 10:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:38:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:38:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:38:13.420187  543705 net.go:648] Add success.
I0323 10:38:13.422787  543705 net.go:770] primary dev: ETH0
I0323 10:38:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:38:13.422812  543705 net.go:698] Add success.
I0323 10:38:14.454993  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:38:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:38:14.455252  543705 disk_worker.go:708] disk space is not compliant
W0323 10:38:14.455256  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:38:14.457060  543705 disk_worker.go:494] system disk:vda1
I0323 10:38:14.457110  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:38:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:38:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:38:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:38:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:38:16.472528  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:38:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:38:23.409795  543705 memory.go:184] no items to output this cycle
I0323 10:38:23.409798  543705 cpu.go:275] no items to output this cycle
I0323 10:38:33.409797  543705 cpu.go:275] no items to output this cycle
E0323 10:38:33.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:38:33.409835  543705 memory.go:184] no items to output this cycle
I0323 10:38:39.554472  543705 disk_info.go:125] begin check local disk info of client
I0323 10:38:39.557022  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:38:39.557028  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359180 0xc0003591c0]
E0323 10:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:38:43.410731  543705 memory.go:191] Add success.
I0323 10:38:43.409807  543705 cpu.go:282] Add success.
I0323 10:38:43.420447  543705 net.go:648] Add success.
I0323 10:38:43.423422  543705 net.go:770] primary dev: ETH0
I0323 10:38:43.423436  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:38:43.423451  543705 net.go:698] Add success.
I0323 10:38:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:38:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:38:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:38:53.409809  543705 memory.go:184] no items to output this cycle
I0323 10:38:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 10:39:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:39:03.409798  543705 memory.go:184] no items to output this cycle
I0323 10:39:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 10:39:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:39:13.409816  543705 memory.go:191] Add success.
I0323 10:39:13.409832  543705 cpu.go:282] Add success.
W0323 10:39:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:39:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:39:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:39:13.420189  543705 net.go:648] Add success.
I0323 10:39:13.423360  543705 net.go:770] primary dev: ETH0
I0323 10:39:13.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:39:13.423387  543705 net.go:698] Add success.
I0323 10:39:13.467723  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8eb53210-9027-41e4-a824-4a606f06c197","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:39:13.467756  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:39:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:39:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:39:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 10:39:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:39:14.456657  543705 disk_worker.go:494] system disk:vda1
I0323 10:39:14.456716  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:39:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:39:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:39:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:39:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:39:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:39:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:39:23.409776  543705 memory.go:184] no items to output this cycle
I0323 10:39:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 10:39:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:39:33.409766  543705 memory.go:184] no items to output this cycle
I0323 10:39:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 10:39:39.557438  543705 disk_info.go:125] begin check local disk info of client
I0323 10:39:39.559981  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:39:39.559988  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5d00 0xc0000c5d40]
I0323 10:39:40.369718  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:39:40.369724  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:39:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:39:43.410649  543705 memory.go:191] Add success.
I0323 10:39:43.409802  543705 cpu.go:282] Add success.
I0323 10:39:43.420351  543705 net.go:648] Add success.
I0323 10:39:43.422992  543705 net.go:770] primary dev: ETH0
I0323 10:39:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:39:43.423023  543705 net.go:698] Add success.
I0323 10:39:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:39:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:39:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:39:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:39:53.409814  543705 memory.go:184] no items to output this cycle
I0323 10:39:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 10:40:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:40:03.409784  543705 memory.go:184] no items to output this cycle
I0323 10:40:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 10:40:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:40:13.409802  543705 memory.go:191] Add success.
I0323 10:40:13.409802  543705 cpu.go:282] Add success.
W0323 10:40:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:40:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:40:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:40:13.420575  543705 net.go:648] Add success.
I0323 10:40:13.423409  543705 net.go:770] primary dev: ETH0
I0323 10:40:13.423422  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:40:13.423434  543705 net.go:698] Add success.
I0323 10:40:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:40:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:40:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 10:40:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:40:14.456530  543705 disk_worker.go:494] system disk:vda1
I0323 10:40:14.456558  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:40:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:40:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:40:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:40:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:40:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:40:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:40:23.409771  543705 memory.go:184] no items to output this cycle
I0323 10:40:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:40:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:40:33.409777  543705 memory.go:184] no items to output this cycle
I0323 10:40:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 10:40:39.560468  543705 disk_info.go:125] begin check local disk info of client
I0323 10:40:39.563202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:40:39.563210  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003594c0 0xc000359500]
E0323 10:40:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:40:43.410785  543705 memory.go:191] Add success.
I0323 10:40:43.409813  543705 cpu.go:282] Add success.
I0323 10:40:43.420504  543705 net.go:648] Add success.
I0323 10:40:43.423432  543705 net.go:770] primary dev: ETH0
I0323 10:40:43.423444  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:40:43.423457  543705 net.go:698] Add success.
I0323 10:40:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:40:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:40:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:40:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:40:53.409784  543705 cpu.go:275] no items to output this cycle
I0323 10:40:53.409796  543705 memory.go:184] no items to output this cycle
E0323 10:41:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:41:03.409777  543705 cpu.go:275] no items to output this cycle
I0323 10:41:03.409789  543705 memory.go:184] no items to output this cycle
E0323 10:41:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:41:13.409792  543705 memory.go:191] Add success.
W0323 10:41:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:41:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:41:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:41:13.409835  543705 cpu.go:282] Add success.
I0323 10:41:13.420141  543705 net.go:648] Add success.
I0323 10:41:13.422950  543705 net.go:770] primary dev: ETH0
I0323 10:41:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:41:13.422980  543705 net.go:698] Add success.
I0323 10:41:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:41:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:41:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 10:41:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:41:14.457134  543705 disk_worker.go:494] system disk:vda1
I0323 10:41:14.457185  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:41:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:41:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:41:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:41:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:41:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:41:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:41:23.409771  543705 memory.go:184] no items to output this cycle
I0323 10:41:23.409776  543705 cpu.go:275] no items to output this cycle
E0323 10:41:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:41:33.409790  543705 memory.go:184] no items to output this cycle
I0323 10:41:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 10:41:39.563479  543705 disk_info.go:125] begin check local disk info of client
I0323 10:41:39.566062  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:41:39.566068  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359840 0xc000359880]
E0323 10:41:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:41:43.410708  543705 memory.go:191] Add success.
I0323 10:41:43.409841  543705 cpu.go:282] Add success.
I0323 10:41:43.420429  543705 net.go:648] Add success.
I0323 10:41:43.423209  543705 net.go:770] primary dev: ETH0
I0323 10:41:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:41:43.423235  543705 net.go:698] Add success.
I0323 10:41:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:41:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:41:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:41:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:41:53.409788  543705 memory.go:184] no items to output this cycle
I0323 10:41:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 10:42:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:42:03.409802  543705 memory.go:184] no items to output this cycle
I0323 10:42:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 10:42:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:42:13.409812  543705 memory.go:191] Add success.
I0323 10:42:13.409816  543705 cpu.go:282] Add success.
W0323 10:42:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:42:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:42:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:42:13.420169  543705 net.go:648] Add success.
I0323 10:42:13.422830  543705 net.go:770] primary dev: ETH0
I0323 10:42:13.422844  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:42:13.422857  543705 net.go:698] Add success.
I0323 10:42:13.468940  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bd524fb0-a85b-4938-938c-34125aa1a813","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:42:13.468970  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 10:42:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:42:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 10:42:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:42:14.455901  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:42:14.455910  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:42:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:42:14.456560  543705 disk_worker.go:494] system disk:vda1
I0323 10:42:14.456601  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:42:15.456787  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:42:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:42:16.457954  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:42:16.457953  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:42:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:42:16.458029  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:42:16.472353  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:42:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:42:23.409764  543705 memory.go:184] no items to output this cycle
I0323 10:42:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 10:42:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:42:33.409797  543705 memory.go:184] no items to output this cycle
I0323 10:42:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 10:42:39.566161  543705 disk_info.go:125] begin check local disk info of client
I0323 10:42:39.569187  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:42:39.569197  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366e00 0xc000366e40]
I0323 10:42:40.369883  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:42:40.369890  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:42:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:42:43.410765  543705 memory.go:191] Add success.
I0323 10:42:43.409810  543705 cpu.go:282] Add success.
I0323 10:42:43.420450  543705 net.go:648] Add success.
I0323 10:42:43.423606  543705 net.go:770] primary dev: ETH0
I0323 10:42:43.423620  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:42:43.423633  543705 net.go:698] Add success.
I0323 10:42:46.457752  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:42:46.457847  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:42:46.457881  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:42:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:42:53.409816  543705 memory.go:184] no items to output this cycle
I0323 10:42:53.409822  543705 cpu.go:275] no items to output this cycle
I0323 10:43:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 10:43:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:43:03.409819  543705 memory.go:184] no items to output this cycle
E0323 10:43:13.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:43:13.409837  543705 cpu.go:282] Add success.
I0323 10:43:13.409840  543705 memory.go:191] Add success.
W0323 10:43:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:43:13.409891  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:43:13.409896  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:43:13.419833  543705 net.go:648] Add success.
I0323 10:43:13.422743  543705 net.go:770] primary dev: ETH0
I0323 10:43:13.422761  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:43:13.422778  543705 net.go:698] Add success.
I0323 10:43:14.453941  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:43:14.455273  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:43:14.455286  543705 disk_worker.go:708] disk space is not compliant
W0323 10:43:14.455289  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:43:14.456709  543705 disk_worker.go:494] system disk:vda1
I0323 10:43:14.456744  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:43:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:43:16.458025  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:43:16.458117  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:43:16.458156  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:43:16.472593  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:43:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:43:23.409796  543705 memory.go:184] no items to output this cycle
I0323 10:43:23.409803  543705 cpu.go:275] no items to output this cycle
I0323 10:43:33.409801  543705 cpu.go:275] no items to output this cycle
E0323 10:43:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:43:33.409825  543705 memory.go:184] no items to output this cycle
I0323 10:43:39.569288  543705 disk_info.go:125] begin check local disk info of client
I0323 10:43:39.571922  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:43:39.571929  543705 disk_info.go:196] parse disk info done, disk is : [0xc000472380 0xc0004723c0]
E0323 10:43:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:43:43.410709  543705 memory.go:191] Add success.
I0323 10:43:43.409813  543705 cpu.go:282] Add success.
I0323 10:43:43.420537  543705 net.go:648] Add success.
I0323 10:43:43.423415  543705 net.go:770] primary dev: ETH0
I0323 10:43:43.423432  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:43:43.423447  543705 net.go:698] Add success.
I0323 10:43:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:43:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:43:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:43:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 10:43:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:43:53.409818  543705 memory.go:184] no items to output this cycle
E0323 10:44:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:44:03.409812  543705 memory.go:184] no items to output this cycle
I0323 10:44:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 10:44:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:44:13.409811  543705 memory.go:191] Add success.
I0323 10:44:13.409814  543705 cpu.go:282] Add success.
W0323 10:44:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:44:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:44:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:44:13.420296  543705 net.go:648] Add success.
I0323 10:44:13.423223  543705 net.go:770] primary dev: ETH0
I0323 10:44:13.423236  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:44:13.423247  543705 net.go:698] Add success.
I0323 10:44:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:44:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:44:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0323 10:44:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:44:14.456808  543705 disk_worker.go:494] system disk:vda1
I0323 10:44:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:44:15.455988  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:44:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:44:16.458092  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:44:16.458121  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:44:16.472577  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:44:23.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:44:23.409827  543705 memory.go:184] no items to output this cycle
I0323 10:44:23.410012  543705 cpu.go:275] no items to output this cycle
E0323 10:44:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:44:33.409789  543705 memory.go:184] no items to output this cycle
I0323 10:44:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 10:44:39.572030  543705 disk_info.go:125] begin check local disk info of client
I0323 10:44:39.574748  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:44:39.574755  543705 disk_info.go:196] parse disk info done, disk is : [0xc000342a40 0xc000342a80]
E0323 10:44:43.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:44:43.409828  543705 memory.go:191] Add success.
I0323 10:44:43.409835  543705 cpu.go:282] Add success.
I0323 10:44:43.420139  543705 net.go:648] Add success.
I0323 10:44:43.421289  543705 net.go:770] primary dev: ETH0
I0323 10:44:43.421304  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:44:43.421318  543705 net.go:698] Add success.
I0323 10:44:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:44:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:44:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:44:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:44:53.409792  543705 memory.go:184] no items to output this cycle
I0323 10:44:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 10:45:03.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:45:03.409904  543705 memory.go:184] no items to output this cycle
I0323 10:45:03.409914  543705 cpu.go:275] no items to output this cycle
E0323 10:45:13.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:45:13.409820  543705 cpu.go:282] Add success.
I0323 10:45:13.409844  543705 memory.go:191] Add success.
W0323 10:45:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:45:13.409898  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:45:13.409903  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:45:13.420139  543705 net.go:648] Add success.
I0323 10:45:13.421264  543705 net.go:770] primary dev: ETH0
I0323 10:45:13.421277  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:45:13.421289  543705 net.go:698] Add success.
I0323 10:45:13.516401  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d65fca1e-8b3f-4fbf-98e2-3b4aa7e0e63e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:45:13.516440  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:45:14.453948  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:45:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:45:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0323 10:45:14.455255  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:45:14.456692  543705 disk_worker.go:494] system disk:vda1
I0323 10:45:14.456729  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:45:15.456037  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:45:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:45:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:45:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:45:16.472499  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:45:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:45:23.409799  543705 cpu.go:275] no items to output this cycle
I0323 10:45:23.409804  543705 memory.go:184] no items to output this cycle
E0323 10:45:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:45:33.409827  543705 memory.go:184] no items to output this cycle
I0323 10:45:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 10:45:39.574843  543705 disk_info.go:125] begin check local disk info of client
I0323 10:45:39.577384  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:45:39.577390  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003862c0 0xc000386300]
I0323 10:45:40.370086  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:45:40.370097  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:45:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:45:43.410643  543705 memory.go:191] Add success.
I0323 10:45:43.409819  543705 cpu.go:282] Add success.
I0323 10:45:43.420466  543705 net.go:648] Add success.
I0323 10:45:43.423226  543705 net.go:770] primary dev: ETH0
I0323 10:45:43.423240  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:45:43.423254  543705 net.go:698] Add success.
I0323 10:45:46.461747  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:45:46.461849  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:45:46.461881  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:45:53.410232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:45:53.410251  543705 memory.go:184] no items to output this cycle
I0323 10:45:53.410314  543705 cpu.go:275] no items to output this cycle
E0323 10:46:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:46:03.409805  543705 memory.go:184] no items to output this cycle
I0323 10:46:03.409836  543705 cpu.go:275] no items to output this cycle
E0323 10:46:13.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:46:13.409858  543705 memory.go:191] Add success.
W0323 10:46:13.409897  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:46:13.409916  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:46:13.409920  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:46:13.410478  543705 cpu.go:282] Add success.
I0323 10:46:13.420572  543705 net.go:648] Add success.
I0323 10:46:13.421542  543705 net.go:770] primary dev: ETH0
I0323 10:46:13.421557  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:46:13.421571  543705 net.go:698] Add success.
I0323 10:46:14.453933  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:46:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:46:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 10:46:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:46:14.456562  543705 disk_worker.go:494] system disk:vda1
I0323 10:46:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:46:15.455046  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:46:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:46:16.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:46:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:46:16.472638  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:46:23.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:46:23.409841  543705 memory.go:184] no items to output this cycle
I0323 10:46:23.409849  543705 cpu.go:275] no items to output this cycle
E0323 10:46:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:46:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 10:46:33.409802  543705 memory.go:184] no items to output this cycle
I0323 10:46:39.577488  543705 disk_info.go:125] begin check local disk info of client
I0323 10:46:39.580206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:46:39.580216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a9e80 0xc0002a9ec0]
E0323 10:46:43.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:46:43.410808  543705 memory.go:191] Add success.
I0323 10:46:43.409878  543705 cpu.go:282] Add success.
I0323 10:46:43.420589  543705 net.go:648] Add success.
I0323 10:46:43.423269  543705 net.go:770] primary dev: ETH0
I0323 10:46:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:46:43.423300  543705 net.go:698] Add success.
I0323 10:46:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:46:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:46:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:46:53.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:46:53.409842  543705 memory.go:184] no items to output this cycle
I0323 10:46:53.410015  543705 cpu.go:275] no items to output this cycle
E0323 10:47:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:47:03.409868  543705 memory.go:184] no items to output this cycle
I0323 10:47:03.409952  543705 cpu.go:275] no items to output this cycle
E0323 10:47:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:47:13.409830  543705 memory.go:191] Add success.
I0323 10:47:13.409843  543705 cpu.go:282] Add success.
W0323 10:47:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:47:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:47:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:47:13.420125  543705 net.go:648] Add success.
I0323 10:47:13.422657  543705 net.go:770] primary dev: ETH0
I0323 10:47:13.422672  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:47:13.422687  543705 net.go:698] Add success.
I0323 10:47:13.453227  543705 event_worker.go:152] Polling the log file for events...
W0323 10:47:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:47:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 10:47:14.455223  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:47:14.457106  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:47:14.457116  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:47:14.457123  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:47:14.457182  543705 disk_worker.go:494] system disk:vda1
I0323 10:47:14.457226  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:47:15.457464  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:47:15.457479  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:47:16.458097  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:47:16.458168  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 10:47:16.458186  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:47:16.458190  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:47:16.472587  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:47:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:47:23.409799  543705 cpu.go:275] no items to output this cycle
I0323 10:47:23.409810  543705 memory.go:184] no items to output this cycle
I0323 10:47:33.409865  543705 cpu.go:275] no items to output this cycle
E0323 10:47:33.409986  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:47:33.410008  543705 memory.go:184] no items to output this cycle
I0323 10:47:39.580561  543705 disk_info.go:125] begin check local disk info of client
I0323 10:47:39.583145  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:47:39.583152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000305700 0xc000305740]
E0323 10:47:43.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:47:43.409828  543705 memory.go:191] Add success.
I0323 10:47:43.409836  543705 cpu.go:282] Add success.
I0323 10:47:43.420180  543705 net.go:648] Add success.
I0323 10:47:43.421187  543705 net.go:770] primary dev: ETH0
I0323 10:47:43.421208  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:47:43.421224  543705 net.go:698] Add success.
I0323 10:47:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:47:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:47:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:47:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:47:53.409815  543705 memory.go:184] no items to output this cycle
I0323 10:47:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 10:48:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:48:03.409797  543705 memory.go:184] no items to output this cycle
I0323 10:48:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 10:48:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:48:13.409807  543705 memory.go:191] Add success.
W0323 10:48:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:48:13.409839  543705 cpu.go:282] Add success.
W0323 10:48:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:48:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:48:13.420349  543705 net.go:648] Add success.
I0323 10:48:13.423431  543705 net.go:770] primary dev: ETH0
I0323 10:48:13.423447  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:48:13.423460  543705 net.go:698] Add success.
I0323 10:48:13.475226  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cbfa9ebe-1029-4b65-a085-5754bb009608","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:48:13.475261  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:48:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:48:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:48:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 10:48:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:48:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 10:48:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:48:15.456014  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:48:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:48:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:48:16.458112  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:48:16.472517  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:48:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:48:23.409817  543705 memory.go:184] no items to output this cycle
I0323 10:48:23.409825  543705 cpu.go:275] no items to output this cycle
E0323 10:48:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:48:33.409813  543705 memory.go:184] no items to output this cycle
I0323 10:48:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 10:48:39.583256  543705 disk_info.go:125] begin check local disk info of client
I0323 10:48:39.585993  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:48:39.586002  543705 disk_info.go:196] parse disk info done, disk is : [0xc000294bc0 0xc000294c00]
I0323 10:48:40.373751  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:48:40.373759  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 10:48:43.413955  543705 cpu.go:282] Add success.
E0323 10:48:43.414326  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:48:43.414979  543705 memory.go:191] Add success.
I0323 10:48:43.430073  543705 net.go:648] Add success.
I0323 10:48:43.466327  543705 net.go:770] primary dev: ETH0
I0323 10:48:43.466350  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:48:43.466369  543705 net.go:698] Add success.
I0323 10:48:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:48:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:48:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:48:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:48:53.409811  543705 memory.go:184] no items to output this cycle
I0323 10:48:53.409823  543705 cpu.go:275] no items to output this cycle
E0323 10:49:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:49:03.409791  543705 memory.go:184] no items to output this cycle
I0323 10:49:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 10:49:13.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:49:13.409905  543705 memory.go:191] Add success.
W0323 10:49:13.409935  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:49:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:49:13.409955  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:49:13.410104  543705 cpu.go:282] Add success.
I0323 10:49:13.419859  543705 net.go:648] Add success.
I0323 10:49:13.420942  543705 net.go:770] primary dev: ETH0
I0323 10:49:13.420959  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:49:13.420976  543705 net.go:698] Add success.
I0323 10:49:14.453954  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:49:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:49:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 10:49:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:49:14.458883  543705 disk_worker.go:494] system disk:vda1
I0323 10:49:14.458933  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:49:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:49:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:49:16.458091  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:49:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:49:16.472541  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:49:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:49:23.409816  543705 memory.go:184] no items to output this cycle
I0323 10:49:23.409824  543705 cpu.go:275] no items to output this cycle
I0323 10:49:33.413268  543705 cpu.go:275] no items to output this cycle
E0323 10:49:33.413955  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:49:33.413990  543705 memory.go:184] no items to output this cycle
I0323 10:49:39.586094  543705 disk_info.go:125] begin check local disk info of client
I0323 10:49:39.588729  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:49:39.588735  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359740 0xc000359780]
I0323 10:49:43.410746  543705 cpu.go:282] Add success.
E0323 10:49:43.411118  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:49:43.411144  543705 memory.go:191] Add success.
I0323 10:49:43.420801  543705 net.go:648] Add success.
I0323 10:49:43.421914  543705 net.go:770] primary dev: ETH0
I0323 10:49:43.421933  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:49:43.421950  543705 net.go:698] Add success.
I0323 10:49:46.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:49:46.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:49:46.458118  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:49:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:49:53.409796  543705 memory.go:184] no items to output this cycle
I0323 10:49:53.409840  543705 cpu.go:275] no items to output this cycle
E0323 10:50:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:50:03.409832  543705 memory.go:184] no items to output this cycle
I0323 10:50:03.409845  543705 cpu.go:275] no items to output this cycle
E0323 10:50:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:50:13.409928  543705 memory.go:191] Add success.
I0323 10:50:13.409932  543705 cpu.go:282] Add success.
W0323 10:50:13.409962  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:50:13.410002  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:50:13.410006  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:50:13.419723  543705 net.go:648] Add success.
I0323 10:50:13.422460  543705 net.go:770] primary dev: ETH0
I0323 10:50:13.422472  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:50:13.422484  543705 net.go:698] Add success.
I0323 10:50:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:50:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:50:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 10:50:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:50:14.456558  543705 disk_worker.go:494] system disk:vda1
I0323 10:50:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:50:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:50:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:50:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:50:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:50:16.472491  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:50:23.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:50:23.409832  543705 memory.go:184] no items to output this cycle
I0323 10:50:23.409846  543705 cpu.go:275] no items to output this cycle
E0323 10:50:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:50:33.409818  543705 memory.go:184] no items to output this cycle
I0323 10:50:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 10:50:39.589608  543705 disk_info.go:125] begin check local disk info of client
I0323 10:50:39.592269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:50:39.592275  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358ec0 0xc000358f00]
E0323 10:50:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:50:43.409814  543705 memory.go:191] Add success.
I0323 10:50:43.409875  543705 cpu.go:282] Add success.
I0323 10:50:43.420202  543705 net.go:648] Add success.
I0323 10:50:43.421178  543705 net.go:770] primary dev: ETH0
I0323 10:50:43.421193  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:50:43.421206  543705 net.go:698] Add success.
I0323 10:50:46.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:50:46.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:50:46.458133  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:50:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:50:53.409804  543705 memory.go:184] no items to output this cycle
I0323 10:50:53.409830  543705 cpu.go:275] no items to output this cycle
E0323 10:51:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:51:03.409784  543705 memory.go:184] no items to output this cycle
I0323 10:51:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 10:51:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:51:13.409803  543705 memory.go:191] Add success.
I0323 10:51:13.409820  543705 cpu.go:282] Add success.
W0323 10:51:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:51:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:51:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:51:13.420461  543705 net.go:648] Add success.
I0323 10:51:13.423178  543705 net.go:770] primary dev: ETH0
I0323 10:51:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:51:13.423207  543705 net.go:698] Add success.
I0323 10:51:13.534447  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b882e7b8-9824-4904-8704-2ebe9bb5bfea","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:51:13.534478  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:51:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:51:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 10:51:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:51:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 10:51:14.456634  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:51:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:51:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:51:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:51:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:51:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:51:23.409767  543705 memory.go:184] no items to output this cycle
I0323 10:51:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 10:51:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:51:33.409779  543705 memory.go:184] no items to output this cycle
I0323 10:51:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 10:51:39.592612  543705 disk_info.go:125] begin check local disk info of client
I0323 10:51:39.595209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:51:39.595216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aac80 0xc0001aacc0]
I0323 10:51:40.373892  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:51:40.373897  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:51:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:51:43.410598  543705 memory.go:191] Add success.
I0323 10:51:43.409838  543705 cpu.go:282] Add success.
I0323 10:51:43.420323  543705 net.go:648] Add success.
I0323 10:51:43.422856  543705 net.go:770] primary dev: ETH0
I0323 10:51:43.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:51:43.422883  543705 net.go:698] Add success.
I0323 10:51:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:51:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:51:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:51:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:51:53.409801  543705 memory.go:184] no items to output this cycle
I0323 10:51:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 10:52:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:52:03.409781  543705 cpu.go:275] no items to output this cycle
I0323 10:52:03.409784  543705 memory.go:184] no items to output this cycle
E0323 10:52:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:52:13.409781  543705 memory.go:191] Add success.
I0323 10:52:13.409803  543705 cpu.go:282] Add success.
W0323 10:52:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:52:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:52:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:52:13.420144  543705 net.go:648] Add success.
I0323 10:52:13.422898  543705 net.go:770] primary dev: ETH0
I0323 10:52:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:52:13.422922  543705 net.go:698] Add success.
W0323 10:52:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:52:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 10:52:14.455170  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:52:14.456126  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:52:14.456135  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:52:14.456142  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:52:14.456426  543705 disk_worker.go:494] system disk:vda1
I0323 10:52:14.456456  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:52:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:52:15.456795  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:52:16.457957  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:52:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:52:16.458009  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:52:16.458028  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:52:16.472358  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:52:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:52:23.409791  543705 memory.go:184] no items to output this cycle
I0323 10:52:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 10:52:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:52:33.409798  543705 memory.go:184] no items to output this cycle
I0323 10:52:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 10:52:39.595630  543705 disk_info.go:125] begin check local disk info of client
I0323 10:52:39.598023  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:52:39.598031  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a9cc0 0xc0004a9d00]
E0323 10:52:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:52:43.410758  543705 memory.go:191] Add success.
I0323 10:52:43.409813  543705 cpu.go:282] Add success.
I0323 10:52:43.420498  543705 net.go:648] Add success.
I0323 10:52:43.423273  543705 net.go:770] primary dev: ETH0
I0323 10:52:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:52:43.423297  543705 net.go:698] Add success.
I0323 10:52:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:52:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:52:46.458053  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:52:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:52:53.409804  543705 memory.go:184] no items to output this cycle
I0323 10:52:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 10:53:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:53:03.409780  543705 cpu.go:275] no items to output this cycle
I0323 10:53:03.409784  543705 memory.go:184] no items to output this cycle
E0323 10:53:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:53:13.409791  543705 memory.go:191] Add success.
I0323 10:53:13.409796  543705 cpu.go:282] Add success.
W0323 10:53:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:53:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:53:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:53:13.420130  543705 net.go:648] Add success.
I0323 10:53:13.423076  543705 net.go:770] primary dev: ETH0
I0323 10:53:13.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:53:13.423113  543705 net.go:698] Add success.
I0323 10:53:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:53:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:53:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 10:53:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:53:14.456519  543705 disk_worker.go:494] system disk:vda1
I0323 10:53:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:53:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:53:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:53:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:53:23.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:53:23.409931  543705 memory.go:184] no items to output this cycle
I0323 10:53:23.410055  543705 cpu.go:275] no items to output this cycle
E0323 10:53:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:53:33.409797  543705 memory.go:184] no items to output this cycle
I0323 10:53:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 10:53:39.598646  543705 disk_info.go:125] begin check local disk info of client
I0323 10:53:39.601293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:53:39.601301  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5c00 0xc0000c5c40]
E0323 10:53:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:53:43.410595  543705 memory.go:191] Add success.
I0323 10:53:43.409800  543705 cpu.go:282] Add success.
I0323 10:53:43.420397  543705 net.go:648] Add success.
I0323 10:53:43.422971  543705 net.go:770] primary dev: ETH0
I0323 10:53:43.422986  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:53:43.423001  543705 net.go:698] Add success.
I0323 10:53:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:53:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:53:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:53:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:53:53.409794  543705 memory.go:184] no items to output this cycle
I0323 10:53:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 10:54:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:54:03.409777  543705 memory.go:184] no items to output this cycle
I0323 10:54:03.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:54:13.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:54:13.409775  543705 memory.go:191] Add success.
W0323 10:54:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:54:13.409810  543705 cpu.go:282] Add success.
W0323 10:54:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:54:13.409818  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:54:13.420152  543705 net.go:648] Add success.
I0323 10:54:13.422718  543705 net.go:770] primary dev: ETH0
I0323 10:54:13.422733  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:54:13.422746  543705 net.go:698] Add success.
I0323 10:54:13.889048  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d34ca847-a3d0-4059-8e65-4cf5836e7d37","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:54:13.889084  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 10:54:14.454683  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:54:14.454876  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:54:14.454886  543705 disk_worker.go:708] disk space is not compliant
W0323 10:54:14.454889  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:54:14.456237  543705 disk_worker.go:494] system disk:vda1
I0323 10:54:14.456294  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:54:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:54:16.457575  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:54:16.457662  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:54:16.457687  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:54:16.473032  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:54:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:54:23.409778  543705 memory.go:184] no items to output this cycle
I0323 10:54:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 10:54:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:54:33.409782  543705 memory.go:184] no items to output this cycle
I0323 10:54:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 10:54:39.601674  543705 disk_info.go:125] begin check local disk info of client
I0323 10:54:39.604293  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:54:39.604300  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2780 0xc0003b27c0]
I0323 10:54:40.374778  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:54:40.374784  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:54:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:54:43.410763  543705 memory.go:191] Add success.
I0323 10:54:43.409815  543705 cpu.go:282] Add success.
I0323 10:54:43.420513  543705 net.go:648] Add success.
I0323 10:54:43.423644  543705 net.go:770] primary dev: ETH0
I0323 10:54:43.423657  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:54:43.423671  543705 net.go:698] Add success.
I0323 10:54:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:54:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:54:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:54:53.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:54:53.409764  543705 memory.go:184] no items to output this cycle
I0323 10:54:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 10:55:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:55:03.409801  543705 memory.go:184] no items to output this cycle
I0323 10:55:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 10:55:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:55:13.409804  543705 memory.go:191] Add success.
I0323 10:55:13.409805  543705 cpu.go:282] Add success.
W0323 10:55:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:55:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:55:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:55:13.420186  543705 net.go:648] Add success.
I0323 10:55:13.423583  543705 net.go:770] primary dev: ETH0
I0323 10:55:13.423596  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:55:13.423608  543705 net.go:698] Add success.
I0323 10:55:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:55:14.455246  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:55:14.455310  543705 disk_worker.go:708] disk space is not compliant
W0323 10:55:14.455313  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:55:14.456817  543705 disk_worker.go:494] system disk:vda1
I0323 10:55:14.456854  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:55:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:55:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:55:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:55:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:55:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:55:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:55:23.409777  543705 memory.go:184] no items to output this cycle
I0323 10:55:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 10:55:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:55:33.409781  543705 memory.go:184] no items to output this cycle
I0323 10:55:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 10:55:39.604691  543705 disk_info.go:125] begin check local disk info of client
I0323 10:55:39.607289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:55:39.607296  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa600 0xc0001aa640]
E0323 10:55:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:55:43.410744  543705 memory.go:191] Add success.
I0323 10:55:43.409812  543705 cpu.go:282] Add success.
I0323 10:55:43.420522  543705 net.go:648] Add success.
I0323 10:55:43.423396  543705 net.go:770] primary dev: ETH0
I0323 10:55:43.423411  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:55:43.423423  543705 net.go:698] Add success.
I0323 10:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:55:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:55:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:55:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:55:53.409776  543705 memory.go:184] no items to output this cycle
I0323 10:55:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 10:56:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:56:03.409798  543705 memory.go:184] no items to output this cycle
I0323 10:56:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 10:56:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:56:13.409784  543705 memory.go:191] Add success.
W0323 10:56:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 10:56:13.409815  543705 cpu.go:282] Add success.
W0323 10:56:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:56:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:56:13.419743  543705 net.go:648] Add success.
I0323 10:56:13.422528  543705 net.go:770] primary dev: ETH0
I0323 10:56:13.422542  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:56:13.422556  543705 net.go:698] Add success.
I0323 10:56:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:56:14.455094  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:56:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 10:56:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:56:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 10:56:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:56:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:56:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:56:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:56:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:56:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:56:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:56:23.409771  543705 memory.go:184] no items to output this cycle
I0323 10:56:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 10:56:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:56:33.409766  543705 memory.go:184] no items to output this cycle
I0323 10:56:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 10:56:39.607776  543705 disk_info.go:125] begin check local disk info of client
I0323 10:56:39.610395  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:56:39.610403  543705 disk_info.go:196] parse disk info done, disk is : [0xc000281840 0xc000281880]
E0323 10:56:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:56:43.410708  543705 memory.go:191] Add success.
I0323 10:56:43.409802  543705 cpu.go:282] Add success.
I0323 10:56:43.420390  543705 net.go:648] Add success.
I0323 10:56:43.423078  543705 net.go:770] primary dev: ETH0
I0323 10:56:43.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:56:43.423104  543705 net.go:698] Add success.
I0323 10:56:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:56:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:56:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:56:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:56:53.409799  543705 memory.go:184] no items to output this cycle
I0323 10:56:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 10:57:03.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:57:03.409896  543705 memory.go:184] no items to output this cycle
I0323 10:57:03.409941  543705 cpu.go:275] no items to output this cycle
E0323 10:57:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:57:13.409846  543705 memory.go:191] Add success.
I0323 10:57:13.409856  543705 cpu.go:282] Add success.
W0323 10:57:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:57:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:57:13.409898  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:57:13.420408  543705 net.go:648] Add success.
I0323 10:57:13.428896  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 10:57:13.428970  543705 net.go:770] primary dev: ETH0
I0323 10:57:13.428984  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:57:13.428996  543705 net.go:698] Add success.
I0323 10:57:13.453607  543705 event_worker.go:152] Polling the log file for events...
I0323 10:57:13.468298  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"028e098f-b89c-416a-805e-15692f14c197","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 10:57:13.468332  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 10:57:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:57:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 10:57:14.455201  543705 disk_worker.go:728] disk inode is not compliant
E0323 10:57:14.455913  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 10:57:14.455923  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 10:57:14.455928  543705 custom_config.go:64] query custom config with name: gpu
I0323 10:57:14.456554  543705 disk_worker.go:494] system disk:vda1
I0323 10:57:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 10:57:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 10:57:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:57:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 10:57:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 10:57:16.458007  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:57:16.458027  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:57:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:57:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:57:23.409795  543705 memory.go:184] no items to output this cycle
I0323 10:57:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 10:57:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:57:33.409769  543705 memory.go:184] no items to output this cycle
I0323 10:57:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 10:57:39.610499  543705 disk_info.go:125] begin check local disk info of client
I0323 10:57:39.613052  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:57:39.613060  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1080 0xc0003f10c0]
I0323 10:57:40.377727  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 10:57:40.377733  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 10:57:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:57:43.410716  543705 memory.go:191] Add success.
I0323 10:57:43.409799  543705 cpu.go:282] Add success.
I0323 10:57:43.420447  543705 net.go:648] Add success.
I0323 10:57:43.423150  543705 net.go:770] primary dev: ETH0
I0323 10:57:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:57:43.423178  543705 net.go:698] Add success.
I0323 10:57:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:57:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:57:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:57:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:57:53.409898  543705 memory.go:184] no items to output this cycle
I0323 10:57:53.409929  543705 cpu.go:275] no items to output this cycle
E0323 10:58:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:58:03.409771  543705 memory.go:184] no items to output this cycle
I0323 10:58:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 10:58:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:58:13.409814  543705 memory.go:191] Add success.
I0323 10:58:13.409820  543705 cpu.go:282] Add success.
W0323 10:58:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:58:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:58:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:58:13.420153  543705 net.go:648] Add success.
I0323 10:58:13.423018  543705 net.go:770] primary dev: ETH0
I0323 10:58:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:58:13.423047  543705 net.go:698] Add success.
I0323 10:58:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:58:14.455115  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:58:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 10:58:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:58:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 10:58:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:58:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:58:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:58:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:58:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:58:16.472425  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:58:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:58:23.409766  543705 memory.go:184] no items to output this cycle
I0323 10:58:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 10:58:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:58:33.409771  543705 memory.go:184] no items to output this cycle
I0323 10:58:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 10:58:39.613694  543705 disk_info.go:125] begin check local disk info of client
I0323 10:58:39.616307  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:58:39.616315  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e8d80 0xc0003e8dc0]
E0323 10:58:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:58:43.410685  543705 memory.go:191] Add success.
I0323 10:58:43.409799  543705 cpu.go:282] Add success.
I0323 10:58:43.419711  543705 net.go:648] Add success.
I0323 10:58:43.422567  543705 net.go:770] primary dev: ETH0
I0323 10:58:43.422580  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:58:43.422591  543705 net.go:698] Add success.
I0323 10:58:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:58:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:58:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:58:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:58:53.409800  543705 memory.go:184] no items to output this cycle
I0323 10:58:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 10:59:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:59:03.409775  543705 memory.go:184] no items to output this cycle
I0323 10:59:03.409777  543705 cpu.go:275] no items to output this cycle
E0323 10:59:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:59:13.409808  543705 memory.go:191] Add success.
I0323 10:59:13.409808  543705 cpu.go:282] Add success.
W0323 10:59:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 10:59:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 10:59:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 10:59:13.420225  543705 net.go:648] Add success.
I0323 10:59:13.423198  543705 net.go:770] primary dev: ETH0
I0323 10:59:13.423210  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:59:13.423223  543705 net.go:698] Add success.
I0323 10:59:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 10:59:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 10:59:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 10:59:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 10:59:14.456612  543705 disk_worker.go:494] system disk:vda1
I0323 10:59:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 10:59:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 10:59:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:59:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:59:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 10:59:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 10:59:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:59:23.409774  543705 memory.go:184] no items to output this cycle
I0323 10:59:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 10:59:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:59:33.409898  543705 memory.go:184] no items to output this cycle
I0323 10:59:33.409952  543705 cpu.go:275] no items to output this cycle
I0323 10:59:39.616762  543705 disk_info.go:125] begin check local disk info of client
I0323 10:59:39.619263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 10:59:39.619270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9c40 0xc0003c9c80]
E0323 10:59:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:59:43.410875  543705 memory.go:191] Add success.
I0323 10:59:43.409827  543705 cpu.go:282] Add success.
I0323 10:59:43.420570  543705 net.go:648] Add success.
I0323 10:59:43.423235  543705 net.go:770] primary dev: ETH0
I0323 10:59:43.423248  543705 net.go:802] Send network stats successfully!,count is 6
I0323 10:59:43.423261  543705 net.go:698] Add success.
I0323 10:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 10:59:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 10:59:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 10:59:53.410243  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 10:59:53.410259  543705 memory.go:184] no items to output this cycle
I0323 10:59:53.410267  543705 cpu.go:275] no items to output this cycle
E0323 11:00:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:00:03.409785  543705 memory.go:184] no items to output this cycle
I0323 11:00:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 11:00:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:00:13.409795  543705 cpu.go:282] Add success.
I0323 11:00:13.409796  543705 memory.go:191] Add success.
W0323 11:00:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:00:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:00:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:00:13.420163  543705 net.go:648] Add success.
I0323 11:00:13.422648  543705 net.go:770] primary dev: ETH0
I0323 11:00:13.422662  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:00:13.422675  543705 net.go:698] Add success.
I0323 11:00:14.339598  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d1d0e916-6165-4e11-9bc9-8123345838bd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:00:14.339637  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:00:14.453976  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:00:14.454239  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:00:14.454251  543705 disk_worker.go:708] disk space is not compliant
W0323 11:00:14.454253  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:00:14.455792  543705 disk_worker.go:494] system disk:vda1
I0323 11:00:14.455820  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:00:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:00:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:00:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:00:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:00:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:00:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:00:23.409790  543705 memory.go:184] no items to output this cycle
I0323 11:00:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 11:00:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:00:33.409786  543705 memory.go:184] no items to output this cycle
I0323 11:00:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 11:00:39.619747  543705 disk_info.go:125] begin check local disk info of client
I0323 11:00:39.622157  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:00:39.622165  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004e6a80 0xc0004e6ac0]
I0323 11:00:40.377878  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:00:40.377886  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:00:43.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:00:43.410855  543705 memory.go:191] Add success.
I0323 11:00:43.410081  543705 cpu.go:282] Add success.
I0323 11:00:43.419748  543705 net.go:648] Add success.
I0323 11:00:43.422392  543705 net.go:770] primary dev: ETH0
I0323 11:00:43.422405  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:00:43.422417  543705 net.go:698] Add success.
I0323 11:00:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:00:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:00:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:00:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:00:53.409774  543705 memory.go:184] no items to output this cycle
I0323 11:00:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 11:01:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:01:03.409773  543705 memory.go:184] no items to output this cycle
I0323 11:01:03.409779  543705 cpu.go:275] no items to output this cycle
W0323 11:01:13.409718  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:01:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:01:13.409742  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:01:13.409808  543705 cpu.go:282] Add success.
E0323 11:01:13.409849  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:01:13.409872  543705 memory.go:191] Add success.
I0323 11:01:13.420136  543705 net.go:648] Add success.
I0323 11:01:13.423042  543705 net.go:770] primary dev: ETH0
I0323 11:01:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:01:13.423069  543705 net.go:698] Add success.
I0323 11:01:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:01:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:01:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 11:01:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:01:14.456636  543705 disk_worker.go:494] system disk:vda1
I0323 11:01:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:01:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:01:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:01:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:01:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:01:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:01:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:01:23.409769  543705 memory.go:184] no items to output this cycle
I0323 11:01:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 11:01:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:01:33.409777  543705 memory.go:184] no items to output this cycle
I0323 11:01:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 11:01:39.622264  543705 disk_info.go:125] begin check local disk info of client
I0323 11:01:39.624719  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:01:39.624727  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370300 0xc000370340]
E0323 11:01:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:01:43.410720  543705 memory.go:191] Add success.
I0323 11:01:43.409835  543705 cpu.go:282] Add success.
I0323 11:01:43.420771  543705 net.go:648] Add success.
I0323 11:01:43.423250  543705 net.go:770] primary dev: ETH0
I0323 11:01:43.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:01:43.423274  543705 net.go:698] Add success.
I0323 11:01:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:01:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:01:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:01:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:01:53.409768  543705 memory.go:184] no items to output this cycle
I0323 11:01:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 11:02:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:02:03.409797  543705 memory.go:184] no items to output this cycle
I0323 11:02:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 11:02:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:02:13.409785  543705 memory.go:191] Add success.
I0323 11:02:13.409804  543705 cpu.go:282] Add success.
W0323 11:02:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:02:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:02:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:02:13.420074  543705 net.go:648] Add success.
I0323 11:02:13.423080  543705 net.go:770] primary dev: ETH0
I0323 11:02:13.423093  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:02:13.423105  543705 net.go:698] Add success.
W0323 11:02:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:02:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 11:02:14.455185  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:02:14.455908  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:02:14.455917  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:02:14.455923  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:02:14.456561  543705 disk_worker.go:494] system disk:vda1
I0323 11:02:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:02:15.456825  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:02:15.456834  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:02:16.457936  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:02:16.457946  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:02:16.457989  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:02:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:02:16.472309  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:02:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:02:23.409864  543705 memory.go:184] no items to output this cycle
I0323 11:02:23.409917  543705 cpu.go:275] no items to output this cycle
E0323 11:02:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:02:33.409779  543705 memory.go:184] no items to output this cycle
I0323 11:02:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 11:02:39.625683  543705 disk_info.go:125] begin check local disk info of client
I0323 11:02:39.628271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:02:39.628279  543705 disk_info.go:196] parse disk info done, disk is : [0xc000359b40 0xc000359b80]
E0323 11:02:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:02:43.410786  543705 memory.go:191] Add success.
I0323 11:02:43.409822  543705 cpu.go:282] Add success.
I0323 11:02:43.420549  543705 net.go:648] Add success.
I0323 11:02:43.423158  543705 net.go:770] primary dev: ETH0
I0323 11:02:43.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:02:43.423187  543705 net.go:698] Add success.
I0323 11:02:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:02:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:02:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:02:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:02:53.409791  543705 memory.go:184] no items to output this cycle
I0323 11:02:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 11:03:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:03:03.409769  543705 memory.go:184] no items to output this cycle
I0323 11:03:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 11:03:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:03:13.409832  543705 memory.go:191] Add success.
I0323 11:03:13.409839  543705 cpu.go:282] Add success.
W0323 11:03:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:03:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:03:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:03:13.420179  543705 net.go:648] Add success.
I0323 11:03:13.423517  543705 net.go:770] primary dev: ETH0
I0323 11:03:13.423531  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:03:13.423546  543705 net.go:698] Add success.
I0323 11:03:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:03:14.455267  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:03:14.455277  543705 disk_worker.go:708] disk space is not compliant
W0323 11:03:14.455280  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:03:14.456668  543705 disk_worker.go:494] system disk:vda1
I0323 11:03:14.456713  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:03:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:03:16.059229  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a3e81147-afa1-4769-a626-7afd7229712c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:03:16.059264  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:03:16.458274  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:03:16.458343  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:03:16.458367  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:03:16.472676  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:03:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:03:23.409773  543705 cpu.go:275] no items to output this cycle
I0323 11:03:23.409780  543705 memory.go:184] no items to output this cycle
E0323 11:03:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:03:33.409802  543705 memory.go:184] no items to output this cycle
I0323 11:03:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 11:03:39.628808  543705 disk_info.go:125] begin check local disk info of client
I0323 11:03:39.631266  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:03:39.631273  543705 disk_info.go:196] parse disk info done, disk is : [0xc000298100 0xc000298140]
I0323 11:03:40.378803  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:03:40.378810  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:03:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:03:43.410689  543705 memory.go:191] Add success.
I0323 11:03:43.409795  543705 cpu.go:282] Add success.
I0323 11:03:43.420423  543705 net.go:648] Add success.
I0323 11:03:43.423111  543705 net.go:770] primary dev: ETH0
I0323 11:03:43.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:03:43.423141  543705 net.go:698] Add success.
I0323 11:03:46.457671  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:03:46.457754  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:03:46.457781  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:03:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:03:53.409770  543705 memory.go:184] no items to output this cycle
I0323 11:03:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 11:04:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:04:03.409796  543705 memory.go:184] no items to output this cycle
I0323 11:04:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:04:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:04:13.409792  543705 memory.go:191] Add success.
I0323 11:04:13.409812  543705 cpu.go:282] Add success.
W0323 11:04:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:04:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:04:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:04:13.420240  543705 net.go:648] Add success.
I0323 11:04:13.423358  543705 net.go:770] primary dev: ETH0
I0323 11:04:13.423369  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:04:13.423382  543705 net.go:698] Add success.
I0323 11:04:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:04:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:04:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 11:04:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:04:14.456601  543705 disk_worker.go:494] system disk:vda1
I0323 11:04:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:04:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:04:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:04:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:04:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:04:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:04:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:04:23.409794  543705 memory.go:184] no items to output this cycle
I0323 11:04:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 11:04:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:04:33.409777  543705 memory.go:184] no items to output this cycle
I0323 11:04:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 11:04:39.631809  543705 disk_info.go:125] begin check local disk info of client
I0323 11:04:39.634231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:04:39.634238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0680 0xc0002a06c0]
E0323 11:04:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:04:43.410722  543705 memory.go:191] Add success.
I0323 11:04:43.409805  543705 cpu.go:282] Add success.
I0323 11:04:43.420675  543705 net.go:648] Add success.
I0323 11:04:43.423342  543705 net.go:770] primary dev: ETH0
I0323 11:04:43.423356  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:04:43.423367  543705 net.go:698] Add success.
I0323 11:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:04:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:04:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:04:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:04:53.409768  543705 memory.go:184] no items to output this cycle
I0323 11:04:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 11:05:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:05:03.409805  543705 memory.go:184] no items to output this cycle
I0323 11:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 11:05:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:05:13.409793  543705 memory.go:191] Add success.
W0323 11:05:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 11:05:13.409821  543705 cpu.go:282] Add success.
W0323 11:05:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:05:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:05:13.420351  543705 net.go:648] Add success.
I0323 11:05:13.423345  543705 net.go:770] primary dev: ETH0
I0323 11:05:13.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:05:13.423371  543705 net.go:698] Add success.
I0323 11:05:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:05:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:05:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0323 11:05:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:05:14.456647  543705 disk_worker.go:494] system disk:vda1
I0323 11:05:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:05:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:05:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:05:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:05:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:05:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:05:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:05:23.409775  543705 memory.go:184] no items to output this cycle
I0323 11:05:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 11:05:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:05:33.409796  543705 memory.go:184] no items to output this cycle
I0323 11:05:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 11:05:39.634811  543705 disk_info.go:125] begin check local disk info of client
I0323 11:05:39.637362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:05:39.637369  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3a80 0xc0003b3ac0]
E0323 11:05:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:05:43.410667  543705 memory.go:191] Add success.
I0323 11:05:43.409833  543705 cpu.go:282] Add success.
I0323 11:05:43.420543  543705 net.go:648] Add success.
I0323 11:05:43.423272  543705 net.go:770] primary dev: ETH0
I0323 11:05:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:05:43.423300  543705 net.go:698] Add success.
I0323 11:05:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:05:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:05:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:05:53.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:05:53.409767  543705 memory.go:184] no items to output this cycle
I0323 11:05:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 11:06:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:06:03.409797  543705 memory.go:184] no items to output this cycle
I0323 11:06:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 11:06:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:06:13.409803  543705 memory.go:191] Add success.
I0323 11:06:13.409804  543705 cpu.go:282] Add success.
W0323 11:06:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:06:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:06:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:06:13.420194  543705 net.go:648] Add success.
I0323 11:06:13.423066  543705 net.go:770] primary dev: ETH0
I0323 11:06:13.423080  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:06:13.423095  543705 net.go:698] Add success.
I0323 11:06:13.641119  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9d867349-bcb2-484b-b570-03ea5ca92832","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:06:13.641155  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:06:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:06:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:06:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 11:06:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:06:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 11:06:14.456608  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:06:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:06:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:06:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:06:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:06:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:06:23.409780  543705 memory.go:184] no items to output this cycle
I0323 11:06:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 11:06:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:06:33.409786  543705 memory.go:184] no items to output this cycle
I0323 11:06:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 11:06:39.637673  543705 disk_info.go:125] begin check local disk info of client
I0323 11:06:39.640212  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:06:39.640218  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4300 0xc0000c4340]
I0323 11:06:40.381741  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:06:40.381748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:06:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:06:43.410710  543705 memory.go:191] Add success.
I0323 11:06:43.409818  543705 cpu.go:282] Add success.
I0323 11:06:43.420496  543705 net.go:648] Add success.
I0323 11:06:43.423356  543705 net.go:770] primary dev: ETH0
I0323 11:06:43.423369  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:06:43.423381  543705 net.go:698] Add success.
I0323 11:06:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:06:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:06:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:06:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:06:53.409777  543705 memory.go:184] no items to output this cycle
I0323 11:06:53.409779  543705 cpu.go:275] no items to output this cycle
E0323 11:07:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:07:03.409780  543705 memory.go:184] no items to output this cycle
I0323 11:07:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 11:07:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:07:13.409824  543705 memory.go:191] Add success.
I0323 11:07:13.409835  543705 cpu.go:282] Add success.
W0323 11:07:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:07:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:07:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:07:13.420147  543705 net.go:648] Add success.
I0323 11:07:13.422957  543705 net.go:770] primary dev: ETH0
I0323 11:07:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:07:13.422982  543705 net.go:698] Add success.
I0323 11:07:13.453541  543705 event_worker.go:152] Polling the log file for events...
W0323 11:07:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:07:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 11:07:14.455183  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:07:14.455854  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:07:14.455864  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:07:14.455869  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:07:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 11:07:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:07:15.456841  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:07:15.456850  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:07:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:07:16.457972  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:07:16.458015  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:07:16.458031  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:07:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:07:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:07:23.409783  543705 memory.go:184] no items to output this cycle
I0323 11:07:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 11:07:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:07:33.409798  543705 memory.go:184] no items to output this cycle
I0323 11:07:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 11:07:39.640845  543705 disk_info.go:125] begin check local disk info of client
I0323 11:07:39.643473  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:07:39.643480  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8fc0 0xc0004d9000]
E0323 11:07:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:07:43.410694  543705 memory.go:191] Add success.
I0323 11:07:43.409813  543705 cpu.go:282] Add success.
I0323 11:07:43.420468  543705 net.go:648] Add success.
I0323 11:07:43.422946  543705 net.go:770] primary dev: ETH0
I0323 11:07:43.422959  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:07:43.422972  543705 net.go:698] Add success.
I0323 11:07:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:07:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:07:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:07:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:07:53.409779  543705 memory.go:184] no items to output this cycle
I0323 11:07:53.409781  543705 cpu.go:275] no items to output this cycle
E0323 11:08:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:08:03.409768  543705 memory.go:184] no items to output this cycle
I0323 11:08:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 11:08:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:08:13.409813  543705 memory.go:191] Add success.
I0323 11:08:13.409817  543705 cpu.go:282] Add success.
W0323 11:08:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:08:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:08:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:08:13.420047  543705 net.go:648] Add success.
I0323 11:08:13.422726  543705 net.go:770] primary dev: ETH0
I0323 11:08:13.422739  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:08:13.422752  543705 net.go:698] Add success.
I0323 11:08:14.454941  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:08:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:08:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 11:08:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:08:14.456558  543705 disk_worker.go:494] system disk:vda1
I0323 11:08:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:08:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:08:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:08:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:08:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:08:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:08:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:08:23.409798  543705 memory.go:184] no items to output this cycle
I0323 11:08:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:08:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:08:33.409807  543705 memory.go:184] no items to output this cycle
I0323 11:08:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 11:08:39.643557  543705 disk_info.go:125] begin check local disk info of client
I0323 11:08:39.646117  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:08:39.646124  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b2240 0xc0004b2280]
E0323 11:08:43.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:08:43.410860  543705 memory.go:191] Add success.
I0323 11:08:43.410020  543705 cpu.go:282] Add success.
I0323 11:08:43.419726  543705 net.go:648] Add success.
I0323 11:08:43.423028  543705 net.go:770] primary dev: ETH0
I0323 11:08:43.423043  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:08:43.423058  543705 net.go:698] Add success.
I0323 11:08:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:08:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:08:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:08:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:08:53.409776  543705 cpu.go:275] no items to output this cycle
I0323 11:08:53.409789  543705 memory.go:184] no items to output this cycle
E0323 11:09:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:09:03.409813  543705 memory.go:184] no items to output this cycle
I0323 11:09:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 11:09:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:09:13.409796  543705 memory.go:191] Add success.
I0323 11:09:13.409814  543705 cpu.go:282] Add success.
W0323 11:09:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:09:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:09:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:09:13.420168  543705 net.go:648] Add success.
I0323 11:09:13.423097  543705 net.go:770] primary dev: ETH0
I0323 11:09:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:09:13.423124  543705 net.go:698] Add success.
I0323 11:09:13.469152  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"180db011-91c7-4f64-9ea5-29e770f223a2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:09:13.469186  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:09:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:09:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:09:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 11:09:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:09:14.456638  543705 disk_worker.go:494] system disk:vda1
I0323 11:09:14.456671  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:09:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:09:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:09:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:09:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:09:16.472370  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:09:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:09:23.409798  543705 memory.go:184] no items to output this cycle
I0323 11:09:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:09:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:09:33.409769  543705 memory.go:184] no items to output this cycle
I0323 11:09:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 11:09:39.646876  543705 disk_info.go:125] begin check local disk info of client
I0323 11:09:39.649410  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:09:39.649425  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371ac0 0xc000371b00]
I0323 11:09:40.381899  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:09:40.381906  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:09:43.409896  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:09:43.410722  543705 memory.go:191] Add success.
I0323 11:09:43.409921  543705 cpu.go:282] Add success.
I0323 11:09:43.419741  543705 net.go:648] Add success.
I0323 11:09:43.422601  543705 net.go:770] primary dev: ETH0
I0323 11:09:43.422616  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:09:43.422630  543705 net.go:698] Add success.
I0323 11:09:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:09:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:09:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:09:53.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:09:53.409762  543705 memory.go:184] no items to output this cycle
I0323 11:09:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 11:10:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:10:03.409774  543705 memory.go:184] no items to output this cycle
I0323 11:10:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 11:10:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:10:13.409797  543705 memory.go:191] Add success.
I0323 11:10:13.409800  543705 cpu.go:282] Add success.
W0323 11:10:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:10:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:10:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:10:13.420250  543705 net.go:648] Add success.
I0323 11:10:13.423354  543705 net.go:770] primary dev: ETH0
I0323 11:10:13.423368  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:10:13.423379  543705 net.go:698] Add success.
I0323 11:10:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:10:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:10:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 11:10:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:10:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 11:10:14.456620  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:10:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:10:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:10:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:10:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:10:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:10:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:10:23.409770  543705 memory.go:184] no items to output this cycle
I0323 11:10:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 11:10:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:10:33.409766  543705 memory.go:184] no items to output this cycle
I0323 11:10:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 11:10:39.649672  543705 disk_info.go:125] begin check local disk info of client
I0323 11:10:39.652206  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:10:39.652213  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487ec0 0xc000487f00]
E0323 11:10:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:10:43.410732  543705 memory.go:191] Add success.
I0323 11:10:43.409807  543705 cpu.go:282] Add success.
I0323 11:10:43.420523  543705 net.go:648] Add success.
I0323 11:10:43.423165  543705 net.go:770] primary dev: ETH0
I0323 11:10:43.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:10:43.423191  543705 net.go:698] Add success.
I0323 11:10:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:10:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:10:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:10:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:10:53.409794  543705 memory.go:184] no items to output this cycle
I0323 11:10:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 11:11:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:11:03.409774  543705 memory.go:184] no items to output this cycle
I0323 11:11:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 11:11:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:11:13.409826  543705 cpu.go:282] Add success.
I0323 11:11:13.409830  543705 memory.go:191] Add success.
W0323 11:11:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:11:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:11:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:11:13.420014  543705 net.go:770] primary dev: ETH0
I0323 11:11:13.420028  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:11:13.420041  543705 net.go:698] Add success.
I0323 11:11:13.420407  543705 net.go:648] Add success.
I0323 11:11:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:11:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:11:14.455166  543705 disk_worker.go:708] disk space is not compliant
W0323 11:11:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:11:14.456516  543705 disk_worker.go:494] system disk:vda1
I0323 11:11:14.456572  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:11:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:11:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:11:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:11:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:11:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:11:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:11:23.409774  543705 memory.go:184] no items to output this cycle
I0323 11:11:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 11:11:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:11:33.409782  543705 memory.go:184] no items to output this cycle
I0323 11:11:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 11:11:39.652297  543705 disk_info.go:125] begin check local disk info of client
I0323 11:11:39.654905  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:11:39.654912  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029da40 0xc00029da80]
E0323 11:11:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:11:43.410656  543705 memory.go:191] Add success.
I0323 11:11:43.409830  543705 cpu.go:282] Add success.
I0323 11:11:43.420336  543705 net.go:648] Add success.
I0323 11:11:43.422981  543705 net.go:770] primary dev: ETH0
I0323 11:11:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:11:43.423009  543705 net.go:698] Add success.
I0323 11:11:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:11:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:11:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:11:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:11:53.409841  543705 memory.go:184] no items to output this cycle
I0323 11:11:53.409928  543705 cpu.go:275] no items to output this cycle
E0323 11:12:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:12:03.409798  543705 memory.go:184] no items to output this cycle
I0323 11:12:03.409807  543705 cpu.go:275] no items to output this cycle
W0323 11:12:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:12:13.409727  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:12:13.409733  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 11:12:13.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:12:13.409816  543705 cpu.go:282] Add success.
I0323 11:12:13.409825  543705 memory.go:191] Add success.
I0323 11:12:13.420335  543705 net.go:648] Add success.
I0323 11:12:13.423018  543705 net.go:770] primary dev: ETH0
I0323 11:12:13.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:12:13.423051  543705 net.go:698] Add success.
I0323 11:12:13.468488  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e42e8d48-7ebb-488e-ad4f-d2d185b724e1","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:12:13.468520  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 11:12:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:12:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 11:12:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:12:14.456934  543705 disk_worker.go:494] system disk:vda1
E0323 11:12:14.456938  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:12:14.456946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:12:14.456952  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:12:14.456979  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:12:15.456798  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:12:15.456808  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:12:16.457951  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:12:16.457950  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:12:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:12:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:12:16.472345  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:12:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:12:23.409801  543705 memory.go:184] no items to output this cycle
I0323 11:12:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:12:33.410597  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:12:33.410618  543705 memory.go:184] no items to output this cycle
I0323 11:12:33.410643  543705 cpu.go:275] no items to output this cycle
I0323 11:12:39.654995  543705 disk_info.go:125] begin check local disk info of client
I0323 11:12:39.657580  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:12:39.657587  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396d00 0xc000396d40]
I0323 11:12:40.382793  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:12:40.382801  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:12:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:12:43.410677  543705 memory.go:191] Add success.
I0323 11:12:43.409831  543705 cpu.go:282] Add success.
I0323 11:12:43.420388  543705 net.go:648] Add success.
I0323 11:12:43.423087  543705 net.go:770] primary dev: ETH0
I0323 11:12:43.423101  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:12:43.423114  543705 net.go:698] Add success.
I0323 11:12:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:12:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:12:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:12:53.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:12:53.409907  543705 cpu.go:275] no items to output this cycle
I0323 11:12:53.409916  543705 memory.go:184] no items to output this cycle
E0323 11:13:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:13:03.409796  543705 memory.go:184] no items to output this cycle
I0323 11:13:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 11:13:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:13:13.409799  543705 memory.go:191] Add success.
W0323 11:13:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:13:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:13:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:13:13.409840  543705 cpu.go:282] Add success.
I0323 11:13:13.420221  543705 net.go:648] Add success.
I0323 11:13:13.423312  543705 net.go:770] primary dev: ETH0
I0323 11:13:13.423326  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:13:13.423339  543705 net.go:698] Add success.
I0323 11:13:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:13:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:13:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 11:13:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:13:14.456494  543705 disk_worker.go:494] system disk:vda1
I0323 11:13:14.456538  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:13:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:13:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:13:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:13:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:13:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:13:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:13:23.409807  543705 memory.go:184] no items to output this cycle
I0323 11:13:23.409817  543705 cpu.go:275] no items to output this cycle
E0323 11:13:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:13:33.409783  543705 memory.go:184] no items to output this cycle
I0323 11:13:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 11:13:39.657673  543705 disk_info.go:125] begin check local disk info of client
I0323 11:13:39.660200  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:13:39.660206  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267940 0xc000267980]
E0323 11:13:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:13:43.410675  543705 memory.go:191] Add success.
I0323 11:13:43.409858  543705 cpu.go:282] Add success.
I0323 11:13:43.420359  543705 net.go:648] Add success.
I0323 11:13:43.423037  543705 net.go:770] primary dev: ETH0
I0323 11:13:43.423050  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:13:43.423063  543705 net.go:698] Add success.
I0323 11:13:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:13:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:13:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:13:53.409889  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:13:53.409911  543705 memory.go:184] no items to output this cycle
I0323 11:13:53.410018  543705 cpu.go:275] no items to output this cycle
E0323 11:14:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:14:03.409799  543705 memory.go:184] no items to output this cycle
I0323 11:14:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 11:14:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:14:13.409801  543705 memory.go:191] Add success.
I0323 11:14:13.409820  543705 cpu.go:282] Add success.
W0323 11:14:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:14:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:14:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:14:13.420197  543705 net.go:648] Add success.
I0323 11:14:13.423274  543705 net.go:770] primary dev: ETH0
I0323 11:14:13.423286  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:14:13.423298  543705 net.go:698] Add success.
I0323 11:14:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:14:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:14:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 11:14:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:14:14.456822  543705 disk_worker.go:494] system disk:vda1
I0323 11:14:14.456852  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:14:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:14:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:14:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:14:16.472417  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:14:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:14:23.409795  543705 memory.go:184] no items to output this cycle
I0323 11:14:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 11:14:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:14:33.409781  543705 memory.go:184] no items to output this cycle
I0323 11:14:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 11:14:39.660942  543705 disk_info.go:125] begin check local disk info of client
I0323 11:14:39.663529  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:14:39.663536  543705 disk_info.go:196] parse disk info done, disk is : [0xc000271240 0xc000271280]
E0323 11:14:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:14:43.410606  543705 memory.go:191] Add success.
I0323 11:14:43.409839  543705 cpu.go:282] Add success.
I0323 11:14:43.420303  543705 net.go:648] Add success.
I0323 11:14:43.423102  543705 net.go:770] primary dev: ETH0
I0323 11:14:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:14:43.423130  543705 net.go:698] Add success.
I0323 11:14:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:14:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:14:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:14:53.409904  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:14:53.409923  543705 memory.go:184] no items to output this cycle
I0323 11:14:53.409937  543705 cpu.go:275] no items to output this cycle
E0323 11:15:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:15:03.409784  543705 memory.go:184] no items to output this cycle
I0323 11:15:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 11:15:13.409844  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:15:13.409876  543705 memory.go:191] Add success.
W0323 11:15:13.409904  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:15:13.409916  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:15:13.409919  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:15:13.409919  543705 cpu.go:282] Add success.
I0323 11:15:13.420242  543705 net.go:648] Add success.
I0323 11:15:13.423471  543705 net.go:770] primary dev: ETH0
I0323 11:15:13.423485  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:15:13.423497  543705 net.go:698] Add success.
I0323 11:15:13.464403  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"29b786b2-4fca-44da-bfd3-1a764cd7c40d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:15:13.464438  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:15:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:15:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:15:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 11:15:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:15:14.456779  543705 disk_worker.go:494] system disk:vda1
I0323 11:15:14.456820  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:15:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:15:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:15:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:15:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:15:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:15:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:15:23.409804  543705 memory.go:184] no items to output this cycle
I0323 11:15:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 11:15:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:15:33.409770  543705 memory.go:184] no items to output this cycle
I0323 11:15:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 11:15:39.663961  543705 disk_info.go:125] begin check local disk info of client
I0323 11:15:39.666526  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:15:39.666532  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c09c0 0xc0003c0a00]
I0323 11:15:40.385735  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:15:40.385742  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:15:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:15:43.410716  543705 memory.go:191] Add success.
I0323 11:15:43.409813  543705 cpu.go:282] Add success.
I0323 11:15:43.420425  543705 net.go:648] Add success.
I0323 11:15:43.423433  543705 net.go:770] primary dev: ETH0
I0323 11:15:43.423449  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:15:43.423463  543705 net.go:698] Add success.
I0323 11:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:15:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:15:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:15:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:15:53.409781  543705 cpu.go:275] no items to output this cycle
I0323 11:15:53.409785  543705 memory.go:184] no items to output this cycle
E0323 11:16:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:16:03.409778  543705 memory.go:184] no items to output this cycle
I0323 11:16:03.409784  543705 cpu.go:275] no items to output this cycle
W0323 11:16:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:16:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:16:13.409737  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:16:13.409832  543705 cpu.go:282] Add success.
E0323 11:16:13.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:16:13.409853  543705 memory.go:191] Add success.
I0323 11:16:13.420046  543705 net.go:648] Add success.
I0323 11:16:13.422842  543705 net.go:770] primary dev: ETH0
I0323 11:16:13.422857  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:16:13.422871  543705 net.go:698] Add success.
I0323 11:16:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:16:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:16:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 11:16:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:16:14.456569  543705 disk_worker.go:494] system disk:vda1
I0323 11:16:14.456599  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:16:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:16:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:16:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:16:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:16:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:16:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:16:23.409774  543705 memory.go:184] no items to output this cycle
I0323 11:16:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 11:16:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:16:33.409777  543705 memory.go:184] no items to output this cycle
I0323 11:16:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 11:16:39.666976  543705 disk_info.go:125] begin check local disk info of client
I0323 11:16:39.669504  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:16:39.669510  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003597c0 0xc000359800]
E0323 11:16:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:16:43.410710  543705 memory.go:191] Add success.
I0323 11:16:43.409793  543705 cpu.go:282] Add success.
I0323 11:16:43.420421  543705 net.go:648] Add success.
I0323 11:16:43.423178  543705 net.go:770] primary dev: ETH0
I0323 11:16:43.423192  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:16:43.423207  543705 net.go:698] Add success.
I0323 11:16:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:16:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:16:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:16:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:16:53.409780  543705 cpu.go:275] no items to output this cycle
I0323 11:16:53.409783  543705 memory.go:184] no items to output this cycle
E0323 11:17:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:17:03.409799  543705 memory.go:184] no items to output this cycle
I0323 11:17:03.409812  543705 cpu.go:275] no items to output this cycle
I0323 11:17:13.409803  543705 cpu.go:282] Add success.
E0323 11:17:13.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:17:13.409843  543705 memory.go:191] Add success.
W0323 11:17:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:17:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:17:13.409907  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:17:13.420480  543705 net.go:648] Add success.
I0323 11:17:13.423687  543705 net.go:770] primary dev: ETH0
I0323 11:17:13.423702  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:17:13.423715  543705 net.go:698] Add success.
I0323 11:17:13.453301  543705 event_worker.go:152] Polling the log file for events...
W0323 11:17:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:17:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0323 11:17:14.455218  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:17:14.456818  543705 disk_worker.go:494] system disk:vda1
I0323 11:17:14.456862  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:17:14.457292  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:17:14.457301  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:17:14.457306  543705 custom_config.go:64] query custom config with name: gpu
E0323 11:17:15.456772  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:17:15.456782  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:17:16.458017  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:17:16.458017  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:17:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:17:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:17:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:17:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:17:23.409768  543705 memory.go:184] no items to output this cycle
I0323 11:17:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 11:17:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:17:33.409803  543705 memory.go:184] no items to output this cycle
I0323 11:17:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 11:17:39.669672  543705 disk_info.go:125] begin check local disk info of client
I0323 11:17:39.672278  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:17:39.672284  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aec0 0xc00007af00]
E0323 11:17:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:17:43.410611  543705 memory.go:191] Add success.
I0323 11:17:43.409821  543705 cpu.go:282] Add success.
I0323 11:17:43.420368  543705 net.go:648] Add success.
I0323 11:17:43.423324  543705 net.go:770] primary dev: ETH0
I0323 11:17:43.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:17:43.423351  543705 net.go:698] Add success.
I0323 11:17:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:17:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:17:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:17:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:17:53.409777  543705 memory.go:184] no items to output this cycle
I0323 11:17:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 11:18:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:18:03.409768  543705 memory.go:184] no items to output this cycle
I0323 11:18:03.409791  543705 cpu.go:275] no items to output this cycle
W0323 11:18:13.409708  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:18:13.409730  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:18:13.409735  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 11:18:13.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:18:13.409829  543705 cpu.go:282] Add success.
I0323 11:18:13.409847  543705 memory.go:191] Add success.
I0323 11:18:13.420223  543705 net.go:648] Add success.
I0323 11:18:13.423168  543705 net.go:770] primary dev: ETH0
I0323 11:18:13.423181  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:18:13.423195  543705 net.go:698] Add success.
I0323 11:18:14.065977  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ffdcd160-b506-420a-9bb2-a6f0930e44d7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:18:14.066011  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:18:14.454688  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:18:14.455711  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:18:14.455725  543705 disk_worker.go:708] disk space is not compliant
W0323 11:18:14.455729  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:18:14.457368  543705 disk_worker.go:494] system disk:vda1
I0323 11:18:14.457399  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:18:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:18:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:18:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:18:16.472440  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:18:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:18:23.409793  543705 memory.go:184] no items to output this cycle
I0323 11:18:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 11:18:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:18:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 11:18:33.409794  543705 memory.go:184] no items to output this cycle
I0323 11:18:39.673009  543705 disk_info.go:125] begin check local disk info of client
I0323 11:18:39.675577  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:18:39.675584  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5140 0xc0000c5180]
I0323 11:18:40.385865  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:18:40.385871  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:18:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:18:43.410660  543705 memory.go:191] Add success.
I0323 11:18:43.409812  543705 cpu.go:282] Add success.
I0323 11:18:43.420371  543705 net.go:648] Add success.
I0323 11:18:43.422987  543705 net.go:770] primary dev: ETH0
I0323 11:18:43.423000  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:18:43.423013  543705 net.go:698] Add success.
I0323 11:18:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:18:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:18:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:18:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:18:53.409776  543705 memory.go:184] no items to output this cycle
I0323 11:18:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 11:19:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:19:03.409793  543705 memory.go:184] no items to output this cycle
I0323 11:19:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 11:19:13.409916  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:19:13.409934  543705 cpu.go:282] Add success.
I0323 11:19:13.409951  543705 memory.go:191] Add success.
W0323 11:19:13.409989  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:19:13.410016  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:19:13.410025  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:19:13.419757  543705 net.go:648] Add success.
I0323 11:19:13.422630  543705 net.go:770] primary dev: ETH0
I0323 11:19:13.422645  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:19:13.422663  543705 net.go:698] Add success.
I0323 11:19:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:19:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:19:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0323 11:19:14.455243  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:19:14.456614  543705 disk_worker.go:494] system disk:vda1
I0323 11:19:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:19:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:19:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:19:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:19:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:19:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:19:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:19:23.409778  543705 memory.go:184] no items to output this cycle
I0323 11:19:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 11:19:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:19:33.409793  543705 memory.go:184] no items to output this cycle
I0323 11:19:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 11:19:39.676008  543705 disk_info.go:125] begin check local disk info of client
I0323 11:19:39.678556  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:19:39.678562  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328840 0xc000328880]
E0323 11:19:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:19:43.410713  543705 memory.go:191] Add success.
I0323 11:19:43.409796  543705 cpu.go:282] Add success.
I0323 11:19:43.420404  543705 net.go:648] Add success.
I0323 11:19:43.423278  543705 net.go:770] primary dev: ETH0
I0323 11:19:43.423292  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:19:43.423304  543705 net.go:698] Add success.
I0323 11:19:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:19:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:19:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:19:53.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:19:53.409762  543705 memory.go:184] no items to output this cycle
I0323 11:19:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 11:20:03.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:20:03.409892  543705 cpu.go:275] no items to output this cycle
I0323 11:20:03.409897  543705 memory.go:184] no items to output this cycle
E0323 11:20:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:20:13.409820  543705 memory.go:191] Add success.
I0323 11:20:13.409824  543705 cpu.go:282] Add success.
W0323 11:20:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:20:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:20:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:20:13.420222  543705 net.go:648] Add success.
I0323 11:20:13.422909  543705 net.go:770] primary dev: ETH0
I0323 11:20:13.422925  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:20:13.422939  543705 net.go:698] Add success.
I0323 11:20:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:20:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:20:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 11:20:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:20:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 11:20:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:20:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:20:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:20:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:20:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:20:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:20:23.409799  543705 memory.go:184] no items to output this cycle
I0323 11:20:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 11:20:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:20:33.409773  543705 memory.go:184] no items to output this cycle
I0323 11:20:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 11:20:39.678649  543705 disk_info.go:125] begin check local disk info of client
I0323 11:20:39.681184  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:20:39.681190  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396240 0xc000396280]
E0323 11:20:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:20:43.410580  543705 memory.go:191] Add success.
I0323 11:20:43.409800  543705 cpu.go:282] Add success.
I0323 11:20:43.420342  543705 net.go:648] Add success.
I0323 11:20:43.422914  543705 net.go:770] primary dev: ETH0
I0323 11:20:43.422929  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:20:43.422942  543705 net.go:698] Add success.
I0323 11:20:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:20:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:20:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:20:53.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:20:53.409883  543705 cpu.go:275] no items to output this cycle
I0323 11:20:53.409889  543705 memory.go:184] no items to output this cycle
E0323 11:21:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:21:03.409777  543705 memory.go:184] no items to output this cycle
I0323 11:21:03.409792  543705 cpu.go:275] no items to output this cycle
I0323 11:21:13.409793  543705 cpu.go:282] Add success.
E0323 11:21:13.410164  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:21:13.410986  543705 memory.go:191] Add success.
W0323 11:21:13.411024  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:21:13.411042  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:21:13.411047  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:21:13.420273  543705 net.go:648] Add success.
I0323 11:21:13.422870  543705 net.go:770] primary dev: ETH0
I0323 11:21:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:21:13.422909  543705 net.go:698] Add success.
I0323 11:21:13.463836  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4ff669ed-0c05-4f74-8fe9-821a8b846d6f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:21:13.463870  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:21:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:21:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:21:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 11:21:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:21:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 11:21:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:21:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:21:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:21:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:21:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:21:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:21:23.409793  543705 memory.go:184] no items to output this cycle
I0323 11:21:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 11:21:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:21:33.409797  543705 memory.go:184] no items to output this cycle
I0323 11:21:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 11:21:39.681678  543705 disk_info.go:125] begin check local disk info of client
I0323 11:21:39.684204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:21:39.684210  543705 disk_info.go:196] parse disk info done, disk is : [0xc000468d00 0xc000468d40]
I0323 11:21:40.386782  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:21:40.386787  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:21:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:21:43.410729  543705 memory.go:191] Add success.
I0323 11:21:43.409824  543705 cpu.go:282] Add success.
I0323 11:21:43.420427  543705 net.go:648] Add success.
I0323 11:21:43.422946  543705 net.go:770] primary dev: ETH0
I0323 11:21:43.422961  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:21:43.422977  543705 net.go:698] Add success.
I0323 11:21:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:21:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:21:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:21:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:21:53.409895  543705 memory.go:184] no items to output this cycle
I0323 11:21:53.409917  543705 cpu.go:275] no items to output this cycle
E0323 11:22:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:22:03.409775  543705 memory.go:184] no items to output this cycle
I0323 11:22:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 11:22:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:22:13.409785  543705 memory.go:191] Add success.
W0323 11:22:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 11:22:13.409815  543705 cpu.go:282] Add success.
W0323 11:22:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:22:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:22:13.420133  543705 net.go:648] Add success.
I0323 11:22:13.423129  543705 net.go:770] primary dev: ETH0
I0323 11:22:13.423141  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:22:13.423154  543705 net.go:698] Add success.
W0323 11:22:14.455236  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:22:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0323 11:22:14.455257  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:22:14.455900  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:22:14.455909  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:22:14.455915  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:22:14.456831  543705 disk_worker.go:494] system disk:vda1
I0323 11:22:14.456862  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:22:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:22:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:22:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:22:16.457957  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:22:16.458002  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:22:16.458019  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:22:16.472356  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:22:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:22:23.409764  543705 memory.go:184] no items to output this cycle
I0323 11:22:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 11:22:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:22:33.409773  543705 memory.go:184] no items to output this cycle
I0323 11:22:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 11:22:39.685063  543705 disk_info.go:125] begin check local disk info of client
I0323 11:22:39.687595  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:22:39.687602  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3040 0xc0002b3080]
E0323 11:22:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:22:43.410725  543705 memory.go:191] Add success.
I0323 11:22:43.409808  543705 cpu.go:282] Add success.
I0323 11:22:43.420444  543705 net.go:648] Add success.
I0323 11:22:43.423262  543705 net.go:770] primary dev: ETH0
I0323 11:22:43.423276  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:22:43.423289  543705 net.go:698] Add success.
I0323 11:22:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:22:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:22:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:22:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:22:53.409872  543705 memory.go:184] no items to output this cycle
I0323 11:22:53.409937  543705 cpu.go:275] no items to output this cycle
E0323 11:23:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:23:03.409788  543705 memory.go:184] no items to output this cycle
I0323 11:23:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 11:23:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:23:13.409799  543705 memory.go:191] Add success.
I0323 11:23:13.409801  543705 cpu.go:282] Add success.
W0323 11:23:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:23:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:23:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:23:13.420150  543705 net.go:648] Add success.
I0323 11:23:13.423332  543705 net.go:770] primary dev: ETH0
I0323 11:23:13.423346  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:23:13.423357  543705 net.go:698] Add success.
I0323 11:23:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:23:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:23:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0323 11:23:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:23:14.456621  543705 disk_worker.go:494] system disk:vda1
I0323 11:23:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:23:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:23:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:23:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:23:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:23:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:23:23.409801  543705 memory.go:184] no items to output this cycle
I0323 11:23:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 11:23:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:23:33.409780  543705 memory.go:184] no items to output this cycle
I0323 11:23:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 11:23:39.688078  543705 disk_info.go:125] begin check local disk info of client
I0323 11:23:39.690625  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:23:39.690631  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e7280 0xc0003e72c0]
E0323 11:23:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:23:43.410683  543705 memory.go:191] Add success.
I0323 11:23:43.409806  543705 cpu.go:282] Add success.
I0323 11:23:43.420456  543705 net.go:648] Add success.
I0323 11:23:43.423241  543705 net.go:770] primary dev: ETH0
I0323 11:23:43.423255  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:23:43.423268  543705 net.go:698] Add success.
I0323 11:23:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:23:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:23:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:23:53.410355  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:23:53.410372  543705 memory.go:184] no items to output this cycle
I0323 11:23:53.410389  543705 cpu.go:275] no items to output this cycle
E0323 11:24:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:24:03.409789  543705 memory.go:184] no items to output this cycle
I0323 11:24:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 11:24:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:24:13.409805  543705 memory.go:191] Add success.
I0323 11:24:13.409806  543705 cpu.go:282] Add success.
W0323 11:24:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:24:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:24:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:24:13.420360  543705 net.go:648] Add success.
I0323 11:24:13.423024  543705 net.go:770] primary dev: ETH0
I0323 11:24:13.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:24:13.423055  543705 net.go:698] Add success.
I0323 11:24:13.469948  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5fb499d2-eaae-4065-95d7-91efab019424","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:24:13.469985  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:24:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:24:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:24:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 11:24:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:24:14.456677  543705 disk_worker.go:494] system disk:vda1
I0323 11:24:14.456706  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:24:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:24:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:24:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:24:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:24:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:24:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:24:23.409774  543705 memory.go:184] no items to output this cycle
I0323 11:24:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 11:24:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:24:33.409775  543705 memory.go:184] no items to output this cycle
I0323 11:24:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 11:24:39.691096  543705 disk_info.go:125] begin check local disk info of client
I0323 11:24:39.693664  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:24:39.693670  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003141c0 0xc000314200]
I0323 11:24:40.389716  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:24:40.389721  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:24:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:24:43.410755  543705 memory.go:191] Add success.
I0323 11:24:43.409833  543705 cpu.go:282] Add success.
I0323 11:24:43.420467  543705 net.go:648] Add success.
I0323 11:24:43.423415  543705 net.go:770] primary dev: ETH0
I0323 11:24:43.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:24:43.423440  543705 net.go:698] Add success.
I0323 11:24:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:24:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:24:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:24:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:24:53.409777  543705 memory.go:184] no items to output this cycle
I0323 11:24:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 11:25:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:25:03.409812  543705 memory.go:184] no items to output this cycle
I0323 11:25:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 11:25:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:25:13.409789  543705 memory.go:191] Add success.
I0323 11:25:13.409823  543705 cpu.go:282] Add success.
W0323 11:25:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:25:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:25:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:25:13.420180  543705 net.go:648] Add success.
I0323 11:25:13.423104  543705 net.go:770] primary dev: ETH0
I0323 11:25:13.423120  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:25:13.423135  543705 net.go:698] Add success.
I0323 11:25:14.453940  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:25:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:25:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0323 11:25:14.455243  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:25:14.456638  543705 disk_worker.go:494] system disk:vda1
I0323 11:25:14.456672  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:25:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:25:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:25:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:25:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:25:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:25:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:25:23.409791  543705 memory.go:184] no items to output this cycle
I0323 11:25:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 11:25:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:25:33.409771  543705 memory.go:184] no items to output this cycle
I0323 11:25:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 11:25:39.693752  543705 disk_info.go:125] begin check local disk info of client
I0323 11:25:39.696279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:25:39.696286  543705 disk_info.go:196] parse disk info done, disk is : [0xc000259880 0xc0002598c0]
E0323 11:25:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:25:43.410677  543705 memory.go:191] Add success.
I0323 11:25:43.409800  543705 cpu.go:282] Add success.
I0323 11:25:43.420412  543705 net.go:648] Add success.
I0323 11:25:43.423160  543705 net.go:770] primary dev: ETH0
I0323 11:25:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:25:43.423186  543705 net.go:698] Add success.
I0323 11:25:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:25:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:25:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:25:53.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:25:53.410266  543705 memory.go:184] no items to output this cycle
I0323 11:25:53.410269  543705 cpu.go:275] no items to output this cycle
E0323 11:26:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:26:03.409860  543705 cpu.go:275] no items to output this cycle
I0323 11:26:03.409889  543705 memory.go:184] no items to output this cycle
E0323 11:26:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:26:13.409827  543705 memory.go:191] Add success.
I0323 11:26:13.409828  543705 cpu.go:282] Add success.
W0323 11:26:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:26:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:26:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:26:13.420253  543705 net.go:648] Add success.
I0323 11:26:13.423127  543705 net.go:770] primary dev: ETH0
I0323 11:26:13.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:26:13.423152  543705 net.go:698] Add success.
I0323 11:26:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:26:14.455147  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:26:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 11:26:14.455159  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:26:14.456497  543705 disk_worker.go:494] system disk:vda1
I0323 11:26:14.456542  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:26:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:26:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:26:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:26:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:26:16.472398  543705 disk_local_worker.go:436] Get disk info: []
I0323 11:26:23.409781  543705 cpu.go:275] no items to output this cycle
E0323 11:26:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:26:23.409800  543705 memory.go:184] no items to output this cycle
E0323 11:26:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:26:33.409801  543705 memory.go:184] no items to output this cycle
I0323 11:26:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 11:26:39.697124  543705 disk_info.go:125] begin check local disk info of client
I0323 11:26:39.699657  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:26:39.699663  543705 disk_info.go:196] parse disk info done, disk is : [0xc000357b00 0xc000357b40]
E0323 11:26:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:26:43.410654  543705 memory.go:191] Add success.
I0323 11:26:43.409833  543705 cpu.go:282] Add success.
I0323 11:26:43.420349  543705 net.go:648] Add success.
I0323 11:26:43.423131  543705 net.go:770] primary dev: ETH0
I0323 11:26:43.423144  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:26:43.423156  543705 net.go:698] Add success.
I0323 11:26:46.457964  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:26:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:26:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:26:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:26:53.410262  543705 memory.go:184] no items to output this cycle
I0323 11:26:53.410268  543705 cpu.go:275] no items to output this cycle
E0323 11:27:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:27:03.409771  543705 memory.go:184] no items to output this cycle
I0323 11:27:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 11:27:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:27:13.409788  543705 memory.go:191] Add success.
W0323 11:27:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:27:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:27:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:27:13.409826  543705 cpu.go:282] Add success.
I0323 11:27:13.420216  543705 net.go:648] Add success.
I0323 11:27:13.429014  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 11:27:13.429089  543705 net.go:770] primary dev: ETH0
I0323 11:27:13.429103  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:27:13.429117  543705 net.go:698] Add success.
I0323 11:27:13.453653  543705 event_worker.go:152] Polling the log file for events...
I0323 11:27:13.468798  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7684c58c-b2db-4a18-9669-ee593141b680","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:27:13.468838  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 11:27:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:27:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0323 11:27:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:27:14.456867  543705 disk_worker.go:494] system disk:vda1
I0323 11:27:14.456910  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:27:14.457238  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:27:14.457247  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:27:14.457252  543705 custom_config.go:64] query custom config with name: gpu
E0323 11:27:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:27:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:27:16.457916  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:27:16.457915  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:27:16.457987  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:27:16.458008  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:27:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:27:23.409798  543705 memory.go:184] no items to output this cycle
I0323 11:27:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 11:27:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:27:33.409786  543705 memory.go:184] no items to output this cycle
I0323 11:27:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 11:27:39.700144  543705 disk_info.go:125] begin check local disk info of client
I0323 11:27:39.702740  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:27:39.702746  543705 disk_info.go:196] parse disk info done, disk is : [0xc000288e80 0xc000288ec0]
I0323 11:27:40.390785  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:27:40.390790  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:27:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:27:43.410696  543705 memory.go:191] Add success.
I0323 11:27:43.409818  543705 cpu.go:282] Add success.
I0323 11:27:43.420430  543705 net.go:648] Add success.
I0323 11:27:43.423174  543705 net.go:770] primary dev: ETH0
I0323 11:27:43.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:27:43.423202  543705 net.go:698] Add success.
I0323 11:27:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:27:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:27:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:27:53.409781  543705 cpu.go:275] no items to output this cycle
E0323 11:27:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:27:53.409797  543705 memory.go:184] no items to output this cycle
E0323 11:28:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:28:03.409792  543705 memory.go:184] no items to output this cycle
I0323 11:28:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 11:28:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:28:13.409795  543705 memory.go:191] Add success.
I0323 11:28:13.409813  543705 cpu.go:282] Add success.
W0323 11:28:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:28:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:28:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:28:13.420128  543705 net.go:648] Add success.
I0323 11:28:13.422836  543705 net.go:770] primary dev: ETH0
I0323 11:28:13.422849  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:28:13.422861  543705 net.go:698] Add success.
I0323 11:28:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:28:14.455104  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:28:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 11:28:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:28:14.456500  543705 disk_worker.go:494] system disk:vda1
I0323 11:28:14.456544  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:28:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:28:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:28:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:28:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:28:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:28:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:28:23.409800  543705 memory.go:184] no items to output this cycle
I0323 11:28:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 11:28:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:28:33.409766  543705 memory.go:184] no items to output this cycle
I0323 11:28:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 11:28:39.704165  543705 disk_info.go:125] begin check local disk info of client
I0323 11:28:39.706755  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:28:39.706761  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371680 0xc0003716c0]
E0323 11:28:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:28:43.410848  543705 memory.go:191] Add success.
I0323 11:28:43.409825  543705 cpu.go:282] Add success.
I0323 11:28:43.420555  543705 net.go:648] Add success.
I0323 11:28:43.423317  543705 net.go:770] primary dev: ETH0
I0323 11:28:43.423330  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:28:43.423342  543705 net.go:698] Add success.
I0323 11:28:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:28:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:28:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:28:53.410247  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:28:53.410263  543705 memory.go:184] no items to output this cycle
I0323 11:28:53.410286  543705 cpu.go:275] no items to output this cycle
E0323 11:29:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:29:03.409783  543705 memory.go:184] no items to output this cycle
I0323 11:29:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 11:29:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:29:13.409783  543705 memory.go:191] Add success.
W0323 11:29:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 11:29:13.409811  543705 cpu.go:282] Add success.
W0323 11:29:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:29:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:29:13.420190  543705 net.go:648] Add success.
I0323 11:29:13.422953  543705 net.go:770] primary dev: ETH0
I0323 11:29:13.422966  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:29:13.422978  543705 net.go:698] Add success.
I0323 11:29:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:29:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:29:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 11:29:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:29:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 11:29:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:29:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:29:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:29:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:29:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:29:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:29:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:29:23.409794  543705 memory.go:184] no items to output this cycle
I0323 11:29:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 11:29:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:29:33.409786  543705 memory.go:184] no items to output this cycle
I0323 11:29:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 11:29:39.706845  543705 disk_info.go:125] begin check local disk info of client
I0323 11:29:39.709402  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:29:39.709408  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046fb80 0xc00046fbc0]
E0323 11:29:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:29:43.409781  543705 memory.go:191] Add success.
I0323 11:29:43.409841  543705 cpu.go:282] Add success.
I0323 11:29:43.420008  543705 net.go:648] Add success.
I0323 11:29:43.420996  543705 net.go:770] primary dev: ETH0
I0323 11:29:43.421009  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:29:43.421023  543705 net.go:698] Add success.
I0323 11:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:29:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:29:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:29:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:29:53.409787  543705 memory.go:184] no items to output this cycle
I0323 11:29:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 11:30:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:30:03.409796  543705 memory.go:184] no items to output this cycle
I0323 11:30:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 11:30:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:30:13.409814  543705 memory.go:191] Add success.
I0323 11:30:13.409818  543705 cpu.go:282] Add success.
W0323 11:30:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:30:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:30:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:30:13.420101  543705 net.go:648] Add success.
I0323 11:30:13.423218  543705 net.go:770] primary dev: ETH0
I0323 11:30:13.423232  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:30:13.423244  543705 net.go:698] Add success.
I0323 11:30:13.467591  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"94a6c568-1eb7-47f9-9e90-faaa35befce6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:30:13.467625  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:30:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:30:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:30:14.455229  543705 disk_worker.go:708] disk space is not compliant
W0323 11:30:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:30:14.456758  543705 disk_worker.go:494] system disk:vda1
I0323 11:30:14.456790  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:30:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:30:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:30:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:30:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:30:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:30:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:30:23.409777  543705 memory.go:184] no items to output this cycle
I0323 11:30:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 11:30:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:30:33.409805  543705 memory.go:184] no items to output this cycle
I0323 11:30:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 11:30:39.709696  543705 disk_info.go:125] begin check local disk info of client
I0323 11:30:39.712273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:30:39.712279  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002acbc0 0xc0002acc00]
I0323 11:30:40.393719  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:30:40.393724  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:30:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:30:43.410692  543705 memory.go:191] Add success.
I0323 11:30:43.409812  543705 cpu.go:282] Add success.
I0323 11:30:43.420411  543705 net.go:648] Add success.
I0323 11:30:43.422882  543705 net.go:770] primary dev: ETH0
I0323 11:30:43.422897  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:30:43.422911  543705 net.go:698] Add success.
I0323 11:30:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:30:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:30:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:30:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:30:53.409779  543705 cpu.go:275] no items to output this cycle
I0323 11:30:53.409788  543705 memory.go:184] no items to output this cycle
E0323 11:31:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:31:03.409783  543705 memory.go:184] no items to output this cycle
I0323 11:31:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 11:31:13.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:31:13.409785  543705 cpu.go:282] Add success.
I0323 11:31:13.409794  543705 memory.go:191] Add success.
W0323 11:31:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:31:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:31:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:31:13.420276  543705 net.go:648] Add success.
I0323 11:31:13.423156  543705 net.go:770] primary dev: ETH0
I0323 11:31:13.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:31:13.423185  543705 net.go:698] Add success.
I0323 11:31:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:31:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:31:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 11:31:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:31:14.456569  543705 disk_worker.go:494] system disk:vda1
I0323 11:31:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:31:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:31:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:31:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:31:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:31:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:31:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:31:23.409772  543705 memory.go:184] no items to output this cycle
I0323 11:31:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 11:31:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:31:33.409801  543705 memory.go:184] no items to output this cycle
I0323 11:31:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 11:31:39.713203  543705 disk_info.go:125] begin check local disk info of client
I0323 11:31:39.715763  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:31:39.715769  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003be640 0xc0003be680]
E0323 11:31:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:31:43.410658  543705 memory.go:191] Add success.
I0323 11:31:43.409805  543705 cpu.go:282] Add success.
I0323 11:31:43.420441  543705 net.go:648] Add success.
I0323 11:31:43.423187  543705 net.go:770] primary dev: ETH0
I0323 11:31:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:31:43.423217  543705 net.go:698] Add success.
I0323 11:31:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:31:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:31:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:31:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:31:53.409778  543705 memory.go:184] no items to output this cycle
I0323 11:31:53.409778  543705 cpu.go:275] no items to output this cycle
E0323 11:32:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:32:03.409786  543705 memory.go:184] no items to output this cycle
I0323 11:32:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 11:32:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:32:13.409789  543705 memory.go:191] Add success.
I0323 11:32:13.409790  543705 cpu.go:282] Add success.
W0323 11:32:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:32:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:32:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:32:13.419882  543705 net.go:770] primary dev: ETH0
I0323 11:32:13.419894  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:32:13.419907  543705 net.go:698] Add success.
I0323 11:32:13.420313  543705 net.go:648] Add success.
W0323 11:32:14.455151  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:32:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 11:32:14.455164  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:32:14.456939  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:32:14.456948  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:32:14.456954  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:32:14.456995  543705 disk_worker.go:494] system disk:vda1
I0323 11:32:14.457025  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:32:15.456849  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:32:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:32:16.457919  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:32:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:32:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:32:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:32:16.472312  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:32:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:32:23.409767  543705 memory.go:184] no items to output this cycle
I0323 11:32:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 11:32:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:32:33.409811  543705 memory.go:184] no items to output this cycle
I0323 11:32:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 11:32:39.715854  543705 disk_info.go:125] begin check local disk info of client
I0323 11:32:39.718396  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:32:39.718402  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003918c0 0xc000391900]
E0323 11:32:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:32:43.410709  543705 memory.go:191] Add success.
I0323 11:32:43.409822  543705 cpu.go:282] Add success.
I0323 11:32:43.420603  543705 net.go:648] Add success.
I0323 11:32:43.423113  543705 net.go:770] primary dev: ETH0
I0323 11:32:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:32:43.423138  543705 net.go:698] Add success.
I0323 11:32:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:32:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:32:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:32:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:32:53.409797  543705 memory.go:184] no items to output this cycle
I0323 11:32:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:33:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:33:03.409785  543705 memory.go:184] no items to output this cycle
I0323 11:33:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 11:33:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:33:13.409809  543705 memory.go:191] Add success.
I0323 11:33:13.409817  543705 cpu.go:282] Add success.
W0323 11:33:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:33:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:33:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:33:13.420049  543705 net.go:648] Add success.
I0323 11:33:13.422908  543705 net.go:770] primary dev: ETH0
I0323 11:33:13.422922  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:33:13.422936  543705 net.go:698] Add success.
I0323 11:33:13.568992  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3e880c33-ce27-4a2b-b3e4-396099a23abe","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:33:13.569028  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:33:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:33:14.455240  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:33:14.455255  543705 disk_worker.go:708] disk space is not compliant
W0323 11:33:14.455258  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:33:14.456768  543705 disk_worker.go:494] system disk:vda1
I0323 11:33:14.456803  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:33:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:33:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:33:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:33:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:33:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:33:23.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:33:23.409769  543705 memory.go:184] no items to output this cycle
I0323 11:33:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 11:33:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:33:33.409778  543705 memory.go:184] no items to output this cycle
I0323 11:33:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 11:33:39.718487  543705 disk_info.go:125] begin check local disk info of client
I0323 11:33:39.721036  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:33:39.721041  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039edc0 0xc00039ee00]
I0323 11:33:40.393860  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:33:40.393866  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:33:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:33:43.410699  543705 memory.go:191] Add success.
I0323 11:33:43.409851  543705 cpu.go:282] Add success.
I0323 11:33:43.420274  543705 net.go:770] primary dev: ETH0
I0323 11:33:43.420287  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:33:43.420301  543705 net.go:698] Add success.
I0323 11:33:43.420775  543705 net.go:648] Add success.
I0323 11:33:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:33:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:33:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:33:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:33:53.409815  543705 memory.go:184] no items to output this cycle
I0323 11:33:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 11:34:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:34:03.409789  543705 memory.go:184] no items to output this cycle
I0323 11:34:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 11:34:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:34:13.409815  543705 memory.go:191] Add success.
I0323 11:34:13.409824  543705 cpu.go:282] Add success.
W0323 11:34:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:34:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:34:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:34:13.419994  543705 net.go:770] primary dev: ETH0
I0323 11:34:13.420019  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:34:13.420031  543705 net.go:698] Add success.
I0323 11:34:13.420268  543705 net.go:648] Add success.
I0323 11:34:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:34:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:34:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 11:34:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:34:14.456605  543705 disk_worker.go:494] system disk:vda1
I0323 11:34:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:34:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:34:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:34:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:34:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:34:23.409783  543705 cpu.go:275] no items to output this cycle
I0323 11:34:23.409785  543705 memory.go:184] no items to output this cycle
E0323 11:34:33.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:34:33.409765  543705 memory.go:184] no items to output this cycle
I0323 11:34:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 11:34:39.721675  543705 disk_info.go:125] begin check local disk info of client
I0323 11:34:39.724255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:34:39.724261  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0880 0xc0003c08c0]
E0323 11:34:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:34:43.409798  543705 cpu.go:282] Add success.
I0323 11:34:43.410775  543705 memory.go:191] Add success.
I0323 11:34:43.419738  543705 net.go:648] Add success.
I0323 11:34:43.422511  543705 net.go:770] primary dev: ETH0
I0323 11:34:43.422526  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:34:43.422540  543705 net.go:698] Add success.
I0323 11:34:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:34:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:34:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:34:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:34:53.409782  543705 memory.go:184] no items to output this cycle
I0323 11:34:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 11:35:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:35:03.409778  543705 memory.go:184] no items to output this cycle
I0323 11:35:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 11:35:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:35:13.409815  543705 memory.go:191] Add success.
I0323 11:35:13.409821  543705 cpu.go:282] Add success.
W0323 11:35:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:35:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:35:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:35:13.420144  543705 net.go:648] Add success.
I0323 11:35:13.422934  543705 net.go:770] primary dev: ETH0
I0323 11:35:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:35:13.422964  543705 net.go:698] Add success.
I0323 11:35:14.454999  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:35:14.455158  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:35:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0323 11:35:14.455243  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:35:14.456682  543705 disk_worker.go:494] system disk:vda1
I0323 11:35:14.456717  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:35:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:35:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:35:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:35:16.472389  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:35:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:35:23.409774  543705 memory.go:184] no items to output this cycle
I0323 11:35:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 11:35:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:35:33.409768  543705 memory.go:184] no items to output this cycle
I0323 11:35:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 11:35:39.724344  543705 disk_info.go:125] begin check local disk info of client
I0323 11:35:39.726964  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:35:39.726970  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7380 0xc0004a73c0]
E0323 11:35:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:35:43.410778  543705 memory.go:191] Add success.
I0323 11:35:43.409809  543705 cpu.go:282] Add success.
I0323 11:35:43.420493  543705 net.go:648] Add success.
I0323 11:35:43.423166  543705 net.go:770] primary dev: ETH0
I0323 11:35:43.423179  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:35:43.423194  543705 net.go:698] Add success.
I0323 11:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:35:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:35:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:35:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:35:53.409770  543705 memory.go:184] no items to output this cycle
I0323 11:35:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 11:36:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:36:03.409777  543705 memory.go:184] no items to output this cycle
I0323 11:36:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 11:36:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:36:13.409818  543705 memory.go:191] Add success.
I0323 11:36:13.409833  543705 cpu.go:282] Add success.
W0323 11:36:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:36:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:36:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:36:13.420218  543705 net.go:648] Add success.
I0323 11:36:13.423372  543705 net.go:770] primary dev: ETH0
I0323 11:36:13.423386  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:36:13.423399  543705 net.go:698] Add success.
I0323 11:36:13.468050  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6134a7c5-e241-4462-a3d3-6174184212e3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:36:13.468088  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:36:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:36:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:36:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 11:36:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:36:14.456745  543705 disk_worker.go:494] system disk:vda1
I0323 11:36:14.456775  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:36:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:36:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:36:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:36:16.472371  543705 disk_local_worker.go:436] Get disk info: []
I0323 11:36:23.409775  543705 cpu.go:275] no items to output this cycle
E0323 11:36:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:36:23.409790  543705 memory.go:184] no items to output this cycle
E0323 11:36:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:36:33.409781  543705 memory.go:184] no items to output this cycle
I0323 11:36:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 11:36:39.727054  543705 disk_info.go:125] begin check local disk info of client
I0323 11:36:39.729592  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:36:39.729598  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c780 0xc00034c7c0]
I0323 11:36:40.394773  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:36:40.394778  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:36:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:36:43.410699  543705 memory.go:191] Add success.
I0323 11:36:43.409809  543705 cpu.go:282] Add success.
I0323 11:36:43.420620  543705 net.go:648] Add success.
I0323 11:36:43.423386  543705 net.go:770] primary dev: ETH0
I0323 11:36:43.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:36:43.423413  543705 net.go:698] Add success.
I0323 11:36:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:36:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:36:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:36:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:36:53.409783  543705 memory.go:184] no items to output this cycle
I0323 11:36:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 11:37:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:37:03.409789  543705 cpu.go:275] no items to output this cycle
I0323 11:37:03.409794  543705 memory.go:184] no items to output this cycle
E0323 11:37:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:37:13.409812  543705 memory.go:191] Add success.
I0323 11:37:13.409822  543705 cpu.go:282] Add success.
W0323 11:37:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:37:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:37:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:37:13.420273  543705 net.go:648] Add success.
I0323 11:37:13.423117  543705 net.go:770] primary dev: ETH0
I0323 11:37:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:37:13.423145  543705 net.go:698] Add success.
I0323 11:37:13.453718  543705 event_worker.go:152] Polling the log file for events...
W0323 11:37:14.454146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:37:14.454212  543705 disk_worker.go:708] disk space is not compliant
W0323 11:37:14.454216  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:37:14.455565  543705 disk_worker.go:494] system disk:vda1
I0323 11:37:14.455597  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:37:14.456214  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:37:14.456224  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:37:14.456231  543705 custom_config.go:64] query custom config with name: gpu
E0323 11:37:15.457003  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:37:15.457017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:37:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:37:16.458011  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:37:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:37:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:37:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:37:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:37:23.409783  543705 cpu.go:275] no items to output this cycle
I0323 11:37:23.409786  543705 memory.go:184] no items to output this cycle
E0323 11:37:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:37:33.409797  543705 memory.go:184] no items to output this cycle
I0323 11:37:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 11:37:39.729677  543705 disk_info.go:125] begin check local disk info of client
I0323 11:37:39.732210  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:37:39.732216  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004869c0 0xc000486a00]
E0323 11:37:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:37:43.410775  543705 memory.go:191] Add success.
I0323 11:37:43.409825  543705 cpu.go:282] Add success.
I0323 11:37:43.420705  543705 net.go:648] Add success.
I0323 11:37:43.423427  543705 net.go:770] primary dev: ETH0
I0323 11:37:43.423439  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:37:43.423452  543705 net.go:698] Add success.
I0323 11:37:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:37:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:37:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:37:53.410396  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:37:53.410410  543705 memory.go:184] no items to output this cycle
I0323 11:37:53.410414  543705 cpu.go:275] no items to output this cycle
E0323 11:38:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:38:03.409799  543705 memory.go:184] no items to output this cycle
I0323 11:38:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 11:38:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:38:13.409813  543705 memory.go:191] Add success.
I0323 11:38:13.409825  543705 cpu.go:282] Add success.
W0323 11:38:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:38:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:38:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:38:13.420144  543705 net.go:648] Add success.
I0323 11:38:13.423091  543705 net.go:770] primary dev: ETH0
I0323 11:38:13.423104  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:38:13.423132  543705 net.go:698] Add success.
I0323 11:38:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:38:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:38:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 11:38:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:38:14.456585  543705 disk_worker.go:494] system disk:vda1
I0323 11:38:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:38:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:38:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:38:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:38:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:38:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:38:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:38:23.409776  543705 memory.go:184] no items to output this cycle
I0323 11:38:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 11:38:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:38:33.409801  543705 memory.go:184] no items to output this cycle
I0323 11:38:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 11:38:39.733296  543705 disk_info.go:125] begin check local disk info of client
I0323 11:38:39.735844  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:38:39.735851  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bacc0 0xc0003bad00]
E0323 11:38:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:38:43.410679  543705 memory.go:191] Add success.
I0323 11:38:43.409802  543705 cpu.go:282] Add success.
I0323 11:38:43.420727  543705 net.go:648] Add success.
I0323 11:38:43.423859  543705 net.go:770] primary dev: ETH0
I0323 11:38:43.423872  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:38:43.423884  543705 net.go:698] Add success.
I0323 11:38:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:38:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:38:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:38:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:38:53.409797  543705 memory.go:184] no items to output this cycle
I0323 11:38:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 11:39:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:39:03.409781  543705 cpu.go:275] no items to output this cycle
I0323 11:39:03.409791  543705 memory.go:184] no items to output this cycle
E0323 11:39:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:39:13.409784  543705 memory.go:191] Add success.
I0323 11:39:13.409784  543705 cpu.go:282] Add success.
W0323 11:39:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:39:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:39:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:39:13.420104  543705 net.go:648] Add success.
I0323 11:39:13.422940  543705 net.go:770] primary dev: ETH0
I0323 11:39:13.422953  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:39:13.422966  543705 net.go:698] Add success.
I0323 11:39:13.475052  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"04a806a4-5b4d-4ba0-ac98-8452955dbf3f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:39:13.475086  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:39:14.453934  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:39:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:39:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 11:39:14.455232  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:39:14.456710  543705 disk_worker.go:494] system disk:vda1
I0323 11:39:14.456775  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:39:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:39:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:39:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:39:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:39:16.472507  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:39:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:39:23.409779  543705 cpu.go:275] no items to output this cycle
I0323 11:39:23.409780  543705 memory.go:184] no items to output this cycle
E0323 11:39:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:39:33.409774  543705 memory.go:184] no items to output this cycle
I0323 11:39:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 11:39:39.735934  543705 disk_info.go:125] begin check local disk info of client
I0323 11:39:39.738548  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:39:39.738554  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5c80 0xc0004b5cc0]
I0323 11:39:40.397716  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:39:40.397721  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:39:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:39:43.410674  543705 memory.go:191] Add success.
I0323 11:39:43.409802  543705 cpu.go:282] Add success.
I0323 11:39:43.420666  543705 net.go:648] Add success.
I0323 11:39:43.423694  543705 net.go:770] primary dev: ETH0
I0323 11:39:43.423707  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:39:43.423719  543705 net.go:698] Add success.
I0323 11:39:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:39:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:39:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:39:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:39:53.409798  543705 memory.go:184] no items to output this cycle
I0323 11:39:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 11:40:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:40:03.409783  543705 memory.go:184] no items to output this cycle
I0323 11:40:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 11:40:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:40:13.409819  543705 memory.go:191] Add success.
I0323 11:40:13.409825  543705 cpu.go:282] Add success.
W0323 11:40:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:40:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:40:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:40:13.420213  543705 net.go:648] Add success.
I0323 11:40:13.423498  543705 net.go:770] primary dev: ETH0
I0323 11:40:13.423511  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:40:13.423523  543705 net.go:698] Add success.
I0323 11:40:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:40:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:40:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 11:40:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:40:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 11:40:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:40:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:40:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:40:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:40:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:40:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:40:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:40:23.409793  543705 memory.go:184] no items to output this cycle
I0323 11:40:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 11:40:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:40:33.409769  543705 memory.go:184] no items to output this cycle
I0323 11:40:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 11:40:39.738640  543705 disk_info.go:125] begin check local disk info of client
I0323 11:40:39.741150  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:40:39.741157  543705 disk_info.go:196] parse disk info done, disk is : [0xc000366380 0xc0003663c0]
E0323 11:40:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:40:43.410633  543705 memory.go:191] Add success.
I0323 11:40:43.409830  543705 cpu.go:282] Add success.
I0323 11:40:43.420703  543705 net.go:648] Add success.
I0323 11:40:43.423206  543705 net.go:770] primary dev: ETH0
I0323 11:40:43.423220  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:40:43.423231  543705 net.go:698] Add success.
I0323 11:40:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:40:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:40:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:40:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:40:53.409769  543705 memory.go:184] no items to output this cycle
I0323 11:40:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 11:41:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:41:03.409802  543705 memory.go:184] no items to output this cycle
I0323 11:41:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 11:41:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:41:13.409784  543705 memory.go:191] Add success.
I0323 11:41:13.409805  543705 cpu.go:282] Add success.
W0323 11:41:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:41:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:41:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:41:13.420154  543705 net.go:648] Add success.
I0323 11:41:13.423479  543705 net.go:770] primary dev: ETH0
I0323 11:41:13.423494  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:41:13.423509  543705 net.go:698] Add success.
W0323 11:41:14.455268  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:41:14.455287  543705 disk_worker.go:708] disk space is not compliant
W0323 11:41:14.455291  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:41:14.455679  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:41:14.457597  543705 disk_worker.go:494] system disk:vda1
I0323 11:41:14.457663  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:41:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:41:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:41:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:41:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:41:16.472407  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:41:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:41:23.409766  543705 memory.go:184] no items to output this cycle
I0323 11:41:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 11:41:33.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:41:33.409773  543705 memory.go:184] no items to output this cycle
I0323 11:41:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 11:41:39.741675  543705 disk_info.go:125] begin check local disk info of client
I0323 11:41:39.744216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:41:39.744222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051d600 0xc00051d640]
E0323 11:41:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:41:43.410789  543705 memory.go:191] Add success.
I0323 11:41:43.409962  543705 cpu.go:282] Add success.
I0323 11:41:43.419757  543705 net.go:648] Add success.
I0323 11:41:43.422797  543705 net.go:770] primary dev: ETH0
I0323 11:41:43.422810  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:41:43.422822  543705 net.go:698] Add success.
I0323 11:41:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:41:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:41:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:41:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:41:53.409777  543705 memory.go:184] no items to output this cycle
I0323 11:41:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 11:42:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:42:03.409796  543705 memory.go:184] no items to output this cycle
I0323 11:42:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 11:42:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:42:13.409781  543705 memory.go:191] Add success.
W0323 11:42:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 11:42:13.409806  543705 cpu.go:282] Add success.
W0323 11:42:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:42:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:42:13.420180  543705 net.go:648] Add success.
I0323 11:42:13.422844  543705 net.go:770] primary dev: ETH0
I0323 11:42:13.422859  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:42:13.422873  543705 net.go:698] Add success.
I0323 11:42:13.529437  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2035b8ee-cfc1-49f3-985c-2fce7fefdec6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:42:13.529473  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 11:42:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:42:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 11:42:14.455178  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:42:14.456973  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0323 11:42:14.456978  543705 disk_worker.go:494] system disk:vda1
E0323 11:42:14.456982  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:42:14.456988  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:42:14.457011  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:42:15.456821  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:42:15.456829  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:42:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:42:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:42:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:42:16.458039  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:42:16.472362  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:42:23.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:42:23.409759  543705 memory.go:184] no items to output this cycle
I0323 11:42:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 11:42:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:42:33.409796  543705 memory.go:184] no items to output this cycle
I0323 11:42:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 11:42:39.745369  543705 disk_info.go:125] begin check local disk info of client
I0323 11:42:39.747935  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:42:39.747941  543705 disk_info.go:196] parse disk info done, disk is : [0xc000358000 0xc000358040]
I0323 11:42:40.398788  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:42:40.398793  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:42:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:42:43.410767  543705 memory.go:191] Add success.
I0323 11:42:43.409837  543705 cpu.go:282] Add success.
I0323 11:42:43.420474  543705 net.go:648] Add success.
I0323 11:42:43.423261  543705 net.go:770] primary dev: ETH0
I0323 11:42:43.423274  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:42:43.423286  543705 net.go:698] Add success.
I0323 11:42:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:42:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:42:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:42:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:42:53.409783  543705 memory.go:184] no items to output this cycle
I0323 11:42:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:43:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:43:03.409782  543705 memory.go:184] no items to output this cycle
I0323 11:43:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 11:43:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:43:13.409801  543705 memory.go:191] Add success.
I0323 11:43:13.409805  543705 cpu.go:282] Add success.
W0323 11:43:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:43:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:43:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:43:13.420054  543705 net.go:648] Add success.
I0323 11:43:13.422901  543705 net.go:770] primary dev: ETH0
I0323 11:43:13.422914  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:43:13.422926  543705 net.go:698] Add success.
I0323 11:43:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:43:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:43:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0323 11:43:14.455245  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:43:14.457221  543705 disk_worker.go:494] system disk:vda1
I0323 11:43:14.457268  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:43:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:43:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:43:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:43:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:43:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:43:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:43:23.409791  543705 memory.go:184] no items to output this cycle
I0323 11:43:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 11:43:33.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:43:33.409916  543705 cpu.go:275] no items to output this cycle
I0323 11:43:33.409993  543705 memory.go:184] no items to output this cycle
I0323 11:43:39.748026  543705 disk_info.go:125] begin check local disk info of client
I0323 11:43:39.750695  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:43:39.750700  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004da400 0xc0004da440]
E0323 11:43:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:43:43.410577  543705 memory.go:191] Add success.
I0323 11:43:43.409845  543705 cpu.go:282] Add success.
I0323 11:43:43.420275  543705 net.go:648] Add success.
I0323 11:43:43.423046  543705 net.go:770] primary dev: ETH0
I0323 11:43:43.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:43:43.423071  543705 net.go:698] Add success.
I0323 11:43:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:43:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:43:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:43:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:43:53.409794  543705 memory.go:184] no items to output this cycle
I0323 11:43:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 11:44:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:44:03.409787  543705 memory.go:184] no items to output this cycle
I0323 11:44:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 11:44:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:44:13.409801  543705 memory.go:191] Add success.
I0323 11:44:13.409801  543705 cpu.go:282] Add success.
W0323 11:44:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:44:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:44:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:44:13.420098  543705 net.go:648] Add success.
I0323 11:44:13.422801  543705 net.go:770] primary dev: ETH0
I0323 11:44:13.422813  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:44:13.422832  543705 net.go:698] Add success.
I0323 11:44:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:44:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:44:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 11:44:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:44:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 11:44:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:44:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:44:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:44:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:44:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:44:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:44:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:44:23.409775  543705 cpu.go:275] no items to output this cycle
I0323 11:44:23.409778  543705 memory.go:184] no items to output this cycle
E0323 11:44:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:44:33.409819  543705 memory.go:184] no items to output this cycle
I0323 11:44:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 11:44:39.750785  543705 disk_info.go:125] begin check local disk info of client
I0323 11:44:39.753327  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:44:39.753334  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5ec0 0xc0000c5f00]
E0323 11:44:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:44:43.410745  543705 memory.go:191] Add success.
I0323 11:44:43.409819  543705 cpu.go:282] Add success.
I0323 11:44:43.420453  543705 net.go:648] Add success.
I0323 11:44:43.424078  543705 net.go:770] primary dev: ETH0
I0323 11:44:43.424095  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:44:43.424110  543705 net.go:698] Add success.
I0323 11:44:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:44:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:44:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:44:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:44:53.409812  543705 memory.go:184] no items to output this cycle
I0323 11:44:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 11:45:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:45:03.409782  543705 memory.go:184] no items to output this cycle
I0323 11:45:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 11:45:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:45:13.409833  543705 memory.go:191] Add success.
I0323 11:45:13.409837  543705 cpu.go:282] Add success.
W0323 11:45:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:45:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:45:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:45:13.420403  543705 net.go:648] Add success.
I0323 11:45:13.423590  543705 net.go:770] primary dev: ETH0
I0323 11:45:13.423609  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:45:13.423624  543705 net.go:698] Add success.
I0323 11:45:13.471694  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3fe8f62d-f9dc-4e55-b0c8-0f1e096d9c5a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:45:13.471728  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:45:14.453956  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:45:14.455252  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:45:14.455351  543705 disk_worker.go:708] disk space is not compliant
W0323 11:45:14.455356  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:45:14.457338  543705 disk_worker.go:494] system disk:vda1
I0323 11:45:14.457381  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:45:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:45:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:45:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:45:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:45:16.472556  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:45:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:45:23.409895  543705 memory.go:184] no items to output this cycle
I0323 11:45:23.409957  543705 cpu.go:275] no items to output this cycle
E0323 11:45:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:45:33.409786  543705 memory.go:184] no items to output this cycle
I0323 11:45:33.409891  543705 cpu.go:275] no items to output this cycle
I0323 11:45:39.753684  543705 disk_info.go:125] begin check local disk info of client
I0323 11:45:39.756290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:45:39.756297  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b900 0xc00007b940]
I0323 11:45:40.398955  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:45:40.398962  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:45:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:45:43.409795  543705 memory.go:191] Add success.
I0323 11:45:43.409902  543705 cpu.go:282] Add success.
I0323 11:45:43.420159  543705 net.go:648] Add success.
I0323 11:45:43.421271  543705 net.go:770] primary dev: ETH0
I0323 11:45:43.421284  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:45:43.421298  543705 net.go:698] Add success.
I0323 11:45:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:45:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:45:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:45:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:45:53.409799  543705 memory.go:184] no items to output this cycle
I0323 11:45:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 11:46:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:46:03.409793  543705 memory.go:184] no items to output this cycle
I0323 11:46:03.409833  543705 cpu.go:275] no items to output this cycle
E0323 11:46:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:46:13.409805  543705 memory.go:191] Add success.
I0323 11:46:13.409806  543705 cpu.go:282] Add success.
W0323 11:46:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:46:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:46:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:46:13.420084  543705 net.go:648] Add success.
I0323 11:46:13.422950  543705 net.go:770] primary dev: ETH0
I0323 11:46:13.422965  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:46:13.422978  543705 net.go:698] Add success.
I0323 11:46:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:46:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:46:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 11:46:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:46:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 11:46:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:46:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:46:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:46:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:46:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:46:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:46:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:46:23.409770  543705 memory.go:184] no items to output this cycle
I0323 11:46:23.409778  543705 cpu.go:275] no items to output this cycle
E0323 11:46:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:46:33.409826  543705 memory.go:184] no items to output this cycle
I0323 11:46:33.409839  543705 cpu.go:275] no items to output this cycle
I0323 11:46:39.756394  543705 disk_info.go:125] begin check local disk info of client
I0323 11:46:39.758990  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:46:39.758997  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4940 0xc0000c4980]
E0323 11:46:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:46:43.410757  543705 memory.go:191] Add success.
I0323 11:46:43.409830  543705 cpu.go:282] Add success.
I0323 11:46:43.420544  543705 net.go:648] Add success.
I0323 11:46:43.423217  543705 net.go:770] primary dev: ETH0
I0323 11:46:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:46:43.423243  543705 net.go:698] Add success.
I0323 11:46:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:46:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:46:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:46:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:46:53.409820  543705 memory.go:184] no items to output this cycle
I0323 11:46:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 11:47:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:47:03.409768  543705 memory.go:184] no items to output this cycle
I0323 11:47:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 11:47:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:47:13.409823  543705 cpu.go:282] Add success.
I0323 11:47:13.409834  543705 memory.go:191] Add success.
W0323 11:47:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:47:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:47:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:47:13.420482  543705 net.go:648] Add success.
I0323 11:47:13.423902  543705 net.go:770] primary dev: ETH0
I0323 11:47:13.423916  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:47:13.423929  543705 net.go:698] Add success.
I0323 11:47:13.453547  543705 event_worker.go:152] Polling the log file for events...
W0323 11:47:14.454412  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:47:14.454430  543705 disk_worker.go:708] disk space is not compliant
W0323 11:47:14.454435  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:47:14.455358  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:47:14.455369  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:47:14.455376  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:47:14.456180  543705 disk_worker.go:494] system disk:vda1
I0323 11:47:14.456217  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:47:15.457117  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:47:15.457135  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:47:16.458102  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:47:16.458164  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:47:16.458175  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:47:16.458194  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:47:16.472557  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:47:23.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:47:23.409833  543705 memory.go:184] no items to output this cycle
I0323 11:47:23.409966  543705 cpu.go:275] no items to output this cycle
E0323 11:47:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:47:33.409824  543705 memory.go:184] no items to output this cycle
I0323 11:47:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 11:47:39.759097  543705 disk_info.go:125] begin check local disk info of client
I0323 11:47:39.761712  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:47:39.761721  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004adf40 0xc00037fac0]
E0323 11:47:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:47:43.410700  543705 memory.go:191] Add success.
I0323 11:47:43.409822  543705 cpu.go:282] Add success.
I0323 11:47:43.420395  543705 net.go:648] Add success.
I0323 11:47:43.423417  543705 net.go:770] primary dev: ETH0
I0323 11:47:43.423431  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:47:43.423444  543705 net.go:698] Add success.
I0323 11:47:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:47:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:47:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:47:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:47:53.409801  543705 memory.go:184] no items to output this cycle
I0323 11:47:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 11:48:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:48:03.409779  543705 memory.go:184] no items to output this cycle
I0323 11:48:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 11:48:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:48:13.409832  543705 memory.go:191] Add success.
I0323 11:48:13.409835  543705 cpu.go:282] Add success.
W0323 11:48:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:48:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:48:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:48:13.420220  543705 net.go:648] Add success.
I0323 11:48:13.423237  543705 net.go:770] primary dev: ETH0
I0323 11:48:13.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:48:13.423266  543705 net.go:698] Add success.
I0323 11:48:13.470505  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"0feb9017-6846-453c-9a5d-fd1f89cd05fd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:48:13.470539  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:48:14.453940  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:48:14.455169  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:48:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0323 11:48:14.455242  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:48:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 11:48:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:48:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:48:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:48:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:48:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:48:16.472487  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:48:23.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:48:23.409801  543705 memory.go:184] no items to output this cycle
I0323 11:48:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 11:48:33.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:48:33.409835  543705 memory.go:184] no items to output this cycle
I0323 11:48:33.410158  543705 cpu.go:275] no items to output this cycle
I0323 11:48:39.763486  543705 disk_info.go:125] begin check local disk info of client
I0323 11:48:39.766132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:48:39.766141  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f3f40 0xc000570000]
I0323 11:48:40.401380  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:48:40.401388  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:48:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:48:43.410729  543705 memory.go:191] Add success.
I0323 11:48:43.409898  543705 cpu.go:282] Add success.
I0323 11:48:43.420473  543705 net.go:648] Add success.
I0323 11:48:43.423429  543705 net.go:770] primary dev: ETH0
I0323 11:48:43.423444  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:48:43.423460  543705 net.go:698] Add success.
I0323 11:48:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:48:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:48:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:48:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:48:53.409791  543705 memory.go:184] no items to output this cycle
I0323 11:48:53.409850  543705 cpu.go:275] no items to output this cycle
E0323 11:49:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:49:03.409803  543705 memory.go:184] no items to output this cycle
I0323 11:49:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 11:49:13.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:49:13.409844  543705 memory.go:191] Add success.
I0323 11:49:13.409849  543705 cpu.go:282] Add success.
W0323 11:49:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:49:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:49:13.409898  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:49:13.420303  543705 net.go:648] Add success.
I0323 11:49:13.423249  543705 net.go:770] primary dev: ETH0
I0323 11:49:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:49:13.423288  543705 net.go:698] Add success.
I0323 11:49:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:49:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:49:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 11:49:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:49:14.456520  543705 disk_worker.go:494] system disk:vda1
I0323 11:49:14.456552  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:49:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:49:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:49:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:49:16.458112  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:49:16.472549  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:49:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:49:23.409817  543705 memory.go:184] no items to output this cycle
I0323 11:49:23.409826  543705 cpu.go:275] no items to output this cycle
I0323 11:49:33.409797  543705 cpu.go:275] no items to output this cycle
E0323 11:49:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:49:33.409823  543705 memory.go:184] no items to output this cycle
I0323 11:49:39.766228  543705 disk_info.go:125] begin check local disk info of client
I0323 11:49:39.768791  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:49:39.768797  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005de880 0xc0005de8c0]
E0323 11:49:43.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:49:43.410825  543705 memory.go:191] Add success.
I0323 11:49:43.410028  543705 cpu.go:282] Add success.
I0323 11:49:43.419730  543705 net.go:648] Add success.
I0323 11:49:43.422385  543705 net.go:770] primary dev: ETH0
I0323 11:49:43.422398  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:49:43.422409  543705 net.go:698] Add success.
I0323 11:49:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:49:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:49:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:49:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:49:53.409793  543705 memory.go:184] no items to output this cycle
I0323 11:49:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 11:50:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:50:03.409811  543705 memory.go:184] no items to output this cycle
I0323 11:50:03.409913  543705 cpu.go:275] no items to output this cycle
E0323 11:50:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:50:13.409807  543705 memory.go:191] Add success.
I0323 11:50:13.409808  543705 cpu.go:282] Add success.
W0323 11:50:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:50:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:50:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:50:13.420188  543705 net.go:648] Add success.
I0323 11:50:13.422922  543705 net.go:770] primary dev: ETH0
I0323 11:50:13.422935  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:50:13.422956  543705 net.go:698] Add success.
I0323 11:50:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:50:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:50:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0323 11:50:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:50:14.456651  543705 disk_worker.go:494] system disk:vda1
I0323 11:50:14.456684  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:50:15.461695  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:50:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:50:16.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:50:16.458133  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:50:16.472647  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:50:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:50:23.409800  543705 memory.go:184] no items to output this cycle
I0323 11:50:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 11:50:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:50:33.409799  543705 memory.go:184] no items to output this cycle
I0323 11:50:33.409895  543705 cpu.go:275] no items to output this cycle
I0323 11:50:39.769684  543705 disk_info.go:125] begin check local disk info of client
I0323 11:50:39.772315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:50:39.772322  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a080 0xc00027a0c0]
E0323 11:50:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:50:43.410833  543705 memory.go:191] Add success.
I0323 11:50:43.409932  543705 cpu.go:282] Add success.
I0323 11:50:43.419878  543705 net.go:648] Add success.
I0323 11:50:43.423122  543705 net.go:770] primary dev: ETH0
I0323 11:50:43.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:50:43.423156  543705 net.go:698] Add success.
I0323 11:50:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:50:46.458080  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:50:46.458109  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:50:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:50:53.409788  543705 memory.go:184] no items to output this cycle
I0323 11:50:53.409832  543705 cpu.go:275] no items to output this cycle
E0323 11:51:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:51:03.409794  543705 memory.go:184] no items to output this cycle
I0323 11:51:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 11:51:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:51:13.409811  543705 memory.go:191] Add success.
W0323 11:51:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:51:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:51:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:51:13.410173  543705 cpu.go:282] Add success.
I0323 11:51:13.419987  543705 net.go:648] Add success.
I0323 11:51:13.421179  543705 net.go:770] primary dev: ETH0
I0323 11:51:13.421196  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:51:13.421213  543705 net.go:698] Add success.
I0323 11:51:13.463283  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3250e4f0-b0d7-497a-b187-908eb407277b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:51:13.463316  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:51:14.455135  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:51:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:51:14.455278  543705 disk_worker.go:708] disk space is not compliant
W0323 11:51:14.455282  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:51:14.456854  543705 disk_worker.go:494] system disk:vda1
I0323 11:51:14.456887  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:51:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:51:16.458022  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:51:16.458114  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:51:16.458141  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:51:16.472585  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:51:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:51:23.409785  543705 memory.go:184] no items to output this cycle
I0323 11:51:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 11:51:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:51:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 11:51:33.409807  543705 memory.go:184] no items to output this cycle
I0323 11:51:39.772419  543705 disk_info.go:125] begin check local disk info of client
I0323 11:51:39.775029  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:51:39.775036  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a7c80 0xc0004a7cc0]
I0323 11:51:40.401740  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:51:40.401748  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:51:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:51:43.410714  543705 memory.go:191] Add success.
I0323 11:51:43.409835  543705 cpu.go:282] Add success.
I0323 11:51:43.420418  543705 net.go:648] Add success.
I0323 11:51:43.423180  543705 net.go:770] primary dev: ETH0
I0323 11:51:43.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:51:43.423209  543705 net.go:698] Add success.
I0323 11:51:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:51:46.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:51:46.458121  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:51:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:51:53.409813  543705 memory.go:184] no items to output this cycle
I0323 11:51:53.409823  543705 cpu.go:275] no items to output this cycle
E0323 11:52:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:52:03.409802  543705 memory.go:184] no items to output this cycle
I0323 11:52:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 11:52:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:52:13.409779  543705 memory.go:191] Add success.
W0323 11:52:13.409804  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 11:52:13.409805  543705 cpu.go:282] Add success.
W0323 11:52:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:52:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:52:13.420224  543705 net.go:648] Add success.
I0323 11:52:13.423403  543705 net.go:770] primary dev: ETH0
I0323 11:52:13.423421  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:52:13.423436  543705 net.go:698] Add success.
W0323 11:52:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:52:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 11:52:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:52:14.456794  543705 disk_worker.go:494] system disk:vda1
I0323 11:52:14.456835  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:52:14.457218  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:52:14.457226  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:52:14.457231  543705 custom_config.go:64] query custom config with name: gpu
E0323 11:52:15.456847  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:52:15.456857  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:52:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:52:16.457935  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:52:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:52:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:52:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:52:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:52:23.409776  543705 memory.go:184] no items to output this cycle
I0323 11:52:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 11:52:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:52:33.409777  543705 memory.go:184] no items to output this cycle
I0323 11:52:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 11:52:39.775120  543705 disk_info.go:125] begin check local disk info of client
I0323 11:52:39.777719  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:52:39.777726  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b37c0 0xc0003b3800]
E0323 11:52:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:52:43.410758  543705 memory.go:191] Add success.
I0323 11:52:43.409810  543705 cpu.go:282] Add success.
I0323 11:52:43.420609  543705 net.go:648] Add success.
I0323 11:52:43.423468  543705 net.go:770] primary dev: ETH0
I0323 11:52:43.423480  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:52:43.423492  543705 net.go:698] Add success.
I0323 11:52:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:52:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:52:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:52:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:52:53.409822  543705 memory.go:184] no items to output this cycle
I0323 11:52:53.409830  543705 cpu.go:275] no items to output this cycle
E0323 11:53:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:53:03.409836  543705 memory.go:184] no items to output this cycle
I0323 11:53:03.409835  543705 cpu.go:275] no items to output this cycle
E0323 11:53:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:53:13.409825  543705 cpu.go:282] Add success.
I0323 11:53:13.409843  543705 memory.go:191] Add success.
W0323 11:53:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:53:13.409897  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:53:13.409902  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:53:13.420439  543705 net.go:648] Add success.
I0323 11:53:13.423449  543705 net.go:770] primary dev: ETH0
I0323 11:53:13.423462  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:53:13.423474  543705 net.go:698] Add success.
I0323 11:53:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:53:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:53:14.455232  543705 disk_worker.go:708] disk space is not compliant
W0323 11:53:14.455235  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:53:14.456616  543705 disk_worker.go:494] system disk:vda1
I0323 11:53:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:53:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:53:16.465687  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:53:16.465788  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:53:16.465820  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:53:16.481749  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:53:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:53:23.409819  543705 memory.go:184] no items to output this cycle
I0323 11:53:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 11:53:33.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:53:33.409839  543705 memory.go:184] no items to output this cycle
I0323 11:53:33.409849  543705 cpu.go:275] no items to output this cycle
I0323 11:53:39.779570  543705 disk_info.go:125] begin check local disk info of client
I0323 11:53:39.782260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:53:39.782269  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ac4c0 0xc0004ac500]
E0323 11:53:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:53:43.410709  543705 memory.go:191] Add success.
I0323 11:53:43.409802  543705 cpu.go:282] Add success.
I0323 11:53:43.420440  543705 net.go:648] Add success.
I0323 11:53:43.423133  543705 net.go:770] primary dev: ETH0
I0323 11:53:43.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:53:43.423361  543705 net.go:698] Add success.
I0323 11:53:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:53:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:53:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:53:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:53:53.409791  543705 memory.go:184] no items to output this cycle
I0323 11:53:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 11:54:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:54:03.409766  543705 memory.go:184] no items to output this cycle
I0323 11:54:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 11:54:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:54:13.409815  543705 memory.go:191] Add success.
I0323 11:54:13.409824  543705 cpu.go:282] Add success.
W0323 11:54:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:54:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:54:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:54:13.420225  543705 net.go:648] Add success.
I0323 11:54:13.423002  543705 net.go:770] primary dev: ETH0
I0323 11:54:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:54:13.423032  543705 net.go:698] Add success.
I0323 11:54:13.468409  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"506627af-5ced-46d9-89cc-fd04438d0e0d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:54:13.468444  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 11:54:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:54:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:54:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 11:54:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:54:14.456739  543705 disk_worker.go:494] system disk:vda1
I0323 11:54:14.456768  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:54:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:54:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:54:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:54:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:54:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:54:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:54:23.409792  543705 memory.go:184] no items to output this cycle
I0323 11:54:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 11:54:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:54:33.409785  543705 memory.go:184] no items to output this cycle
I0323 11:54:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 11:54:39.782352  543705 disk_info.go:125] begin check local disk info of client
I0323 11:54:39.784955  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:54:39.784962  543705 disk_info.go:196] parse disk info done, disk is : [0xc000266340 0xc000266380]
I0323 11:54:40.402234  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:54:40.402241  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:54:43.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:54:43.409938  543705 memory.go:191] Add success.
I0323 11:54:43.410360  543705 cpu.go:282] Add success.
I0323 11:54:43.422593  543705 net.go:648] Add success.
I0323 11:54:43.470515  543705 net.go:770] primary dev: ETH0
I0323 11:54:43.470537  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:54:43.470556  543705 net.go:698] Add success.
I0323 11:54:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:54:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:54:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:54:53.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:54:53.409885  543705 memory.go:184] no items to output this cycle
I0323 11:54:53.409961  543705 cpu.go:275] no items to output this cycle
E0323 11:55:03.409925  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:55:03.409935  543705 cpu.go:275] no items to output this cycle
I0323 11:55:03.409944  543705 memory.go:184] no items to output this cycle
E0323 11:55:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:55:13.409819  543705 memory.go:191] Add success.
I0323 11:55:13.409834  543705 cpu.go:282] Add success.
W0323 11:55:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:55:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:55:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:55:13.420284  543705 net.go:648] Add success.
I0323 11:55:13.423131  543705 net.go:770] primary dev: ETH0
I0323 11:55:13.423143  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:55:13.423155  543705 net.go:698] Add success.
I0323 11:55:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:55:14.455143  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:55:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0323 11:55:14.455156  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:55:14.456478  543705 disk_worker.go:494] system disk:vda1
I0323 11:55:14.456521  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:55:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:55:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:55:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:55:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:55:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:55:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:55:23.409774  543705 memory.go:184] no items to output this cycle
I0323 11:55:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 11:55:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:55:33.409786  543705 memory.go:184] no items to output this cycle
I0323 11:55:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 11:55:39.785682  543705 disk_info.go:125] begin check local disk info of client
I0323 11:55:39.788171  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:55:39.788181  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267640 0xc0003e2000]
E0323 11:55:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:55:43.410653  543705 memory.go:191] Add success.
I0323 11:55:43.409821  543705 cpu.go:282] Add success.
I0323 11:55:43.420391  543705 net.go:648] Add success.
I0323 11:55:43.423462  543705 net.go:770] primary dev: ETH0
I0323 11:55:43.423475  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:55:43.423490  543705 net.go:698] Add success.
I0323 11:55:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:55:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:55:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:55:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:55:53.409808  543705 memory.go:184] no items to output this cycle
I0323 11:55:53.409836  543705 cpu.go:275] no items to output this cycle
E0323 11:56:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:56:03.409781  543705 memory.go:184] no items to output this cycle
I0323 11:56:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 11:56:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:56:13.409823  543705 memory.go:191] Add success.
I0323 11:56:13.409828  543705 cpu.go:282] Add success.
W0323 11:56:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:56:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:56:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:56:13.420116  543705 net.go:648] Add success.
I0323 11:56:13.422924  543705 net.go:770] primary dev: ETH0
I0323 11:56:13.422939  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:56:13.422953  543705 net.go:698] Add success.
I0323 11:56:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:56:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:56:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 11:56:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:56:14.456513  543705 disk_worker.go:494] system disk:vda1
I0323 11:56:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:56:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:56:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:56:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:56:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:56:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:56:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:56:23.409789  543705 memory.go:184] no items to output this cycle
I0323 11:56:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 11:56:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:56:33.409773  543705 memory.go:184] no items to output this cycle
I0323 11:56:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 11:56:39.789596  543705 disk_info.go:125] begin check local disk info of client
I0323 11:56:39.792167  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:56:39.792175  543705 disk_info.go:196] parse disk info done, disk is : [0xc000304c80 0xc000304cc0]
E0323 11:56:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:56:43.410807  543705 memory.go:191] Add success.
I0323 11:56:43.409834  543705 cpu.go:282] Add success.
I0323 11:56:43.420666  543705 net.go:648] Add success.
I0323 11:56:43.423363  543705 net.go:770] primary dev: ETH0
I0323 11:56:43.423378  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:56:43.423392  543705 net.go:698] Add success.
I0323 11:56:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:56:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:56:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:56:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:56:53.409796  543705 memory.go:184] no items to output this cycle
I0323 11:56:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 11:57:03.409913  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:57:03.409936  543705 memory.go:184] no items to output this cycle
I0323 11:57:03.410075  543705 cpu.go:275] no items to output this cycle
E0323 11:57:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:57:13.409793  543705 memory.go:191] Add success.
I0323 11:57:13.409800  543705 cpu.go:282] Add success.
W0323 11:57:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:57:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:57:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:57:13.420124  543705 net.go:648] Add success.
I0323 11:57:13.423018  543705 net.go:770] primary dev: ETH0
I0323 11:57:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:57:13.423047  543705 net.go:698] Add success.
I0323 11:57:13.428992  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 11:57:13.453165  543705 event_worker.go:152] Polling the log file for events...
I0323 11:57:13.463572  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"47eb7efd-1dc1-4d62-8cf0-3654926e8b08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 11:57:13.463603  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 11:57:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:57:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 11:57:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0323 11:57:14.456113  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 11:57:14.456122  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 11:57:14.456127  543705 custom_config.go:64] query custom config with name: gpu
I0323 11:57:14.456464  543705 disk_worker.go:494] system disk:vda1
I0323 11:57:14.456510  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 11:57:15.457015  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 11:57:15.457033  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:57:16.458095  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 11:57:16.458115  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 11:57:16.458155  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:57:16.458174  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:57:16.472607  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:57:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:57:23.409778  543705 memory.go:184] no items to output this cycle
I0323 11:57:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 11:57:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:57:33.409805  543705 memory.go:184] no items to output this cycle
I0323 11:57:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 11:57:39.793598  543705 disk_info.go:125] begin check local disk info of client
I0323 11:57:39.796162  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:57:39.796170  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039dc00 0xc00039dc40]
I0323 11:57:40.405246  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 11:57:40.405251  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 11:57:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:57:43.410762  543705 memory.go:191] Add success.
I0323 11:57:43.409834  543705 cpu.go:282] Add success.
I0323 11:57:43.420454  543705 net.go:648] Add success.
I0323 11:57:43.423504  543705 net.go:770] primary dev: ETH0
I0323 11:57:43.423528  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:57:43.423542  543705 net.go:698] Add success.
I0323 11:57:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:57:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:57:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:57:53.409941  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:57:53.409982  543705 memory.go:184] no items to output this cycle
I0323 11:57:53.409990  543705 cpu.go:275] no items to output this cycle
E0323 11:58:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:58:03.409787  543705 memory.go:184] no items to output this cycle
I0323 11:58:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 11:58:13.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:58:13.409783  543705 memory.go:191] Add success.
W0323 11:58:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:58:13.409819  543705 cpu.go:282] Add success.
I0323 11:58:13.409823  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:58:13.420117  543705 net.go:648] Add success.
I0323 11:58:13.422935  543705 net.go:770] primary dev: ETH0
I0323 11:58:13.422949  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:58:13.422961  543705 net.go:698] Add success.
I0323 11:58:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:58:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:58:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 11:58:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:58:14.456486  543705 disk_worker.go:494] system disk:vda1
I0323 11:58:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:58:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:58:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:58:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:58:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:58:16.472445  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:58:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:58:23.409794  543705 memory.go:184] no items to output this cycle
I0323 11:58:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 11:58:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:58:33.409779  543705 memory.go:184] no items to output this cycle
I0323 11:58:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 11:58:39.796257  543705 disk_info.go:125] begin check local disk info of client
I0323 11:58:39.798796  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:58:39.798803  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003845c0 0xc000384600]
E0323 11:58:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:58:43.410797  543705 memory.go:191] Add success.
I0323 11:58:43.409822  543705 cpu.go:282] Add success.
I0323 11:58:43.420514  543705 net.go:648] Add success.
I0323 11:58:43.423705  543705 net.go:770] primary dev: ETH0
I0323 11:58:43.423720  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:58:43.423734  543705 net.go:698] Add success.
I0323 11:58:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:58:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:58:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:58:53.409881  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:58:53.409910  543705 memory.go:184] no items to output this cycle
I0323 11:58:53.410072  543705 cpu.go:275] no items to output this cycle
E0323 11:59:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:59:03.409785  543705 memory.go:184] no items to output this cycle
I0323 11:59:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 11:59:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:59:13.409836  543705 memory.go:191] Add success.
I0323 11:59:13.409847  543705 cpu.go:282] Add success.
W0323 11:59:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 11:59:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 11:59:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 11:59:13.420420  543705 net.go:648] Add success.
I0323 11:59:13.423352  543705 net.go:770] primary dev: ETH0
I0323 11:59:13.423367  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:59:13.423384  543705 net.go:698] Add success.
I0323 11:59:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0323 11:59:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 11:59:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 11:59:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0323 11:59:14.456665  543705 disk_worker.go:494] system disk:vda1
I0323 11:59:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 11:59:15.456024  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 11:59:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:59:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:59:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0323 11:59:16.472453  543705 disk_local_worker.go:436] Get disk info: []
E0323 11:59:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:59:23.409775  543705 memory.go:184] no items to output this cycle
I0323 11:59:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 11:59:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:59:33.409821  543705 memory.go:184] no items to output this cycle
I0323 11:59:33.409836  543705 cpu.go:275] no items to output this cycle
I0323 11:59:39.800644  543705 disk_info.go:125] begin check local disk info of client
I0323 11:59:39.803249  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 11:59:39.803258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4000 0xc0003d4040]
E0323 11:59:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:59:43.410744  543705 memory.go:191] Add success.
I0323 11:59:43.409823  543705 cpu.go:282] Add success.
I0323 11:59:43.420472  543705 net.go:648] Add success.
I0323 11:59:43.423297  543705 net.go:770] primary dev: ETH0
I0323 11:59:43.423312  543705 net.go:802] Send network stats successfully!,count is 6
I0323 11:59:43.423334  543705 net.go:698] Add success.
I0323 11:59:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 11:59:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 11:59:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 11:59:53.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 11:59:53.409922  543705 memory.go:184] no items to output this cycle
I0323 11:59:53.409967  543705 cpu.go:275] no items to output this cycle
E0323 12:00:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:00:03.409795  543705 cpu.go:275] no items to output this cycle
I0323 12:00:03.409810  543705 memory.go:184] no items to output this cycle
E0323 12:00:13.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:00:13.409836  543705 memory.go:191] Add success.
I0323 12:00:13.409837  543705 cpu.go:282] Add success.
W0323 12:00:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:00:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:00:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:00:13.420255  543705 net.go:648] Add success.
I0323 12:00:13.423275  543705 net.go:770] primary dev: ETH0
I0323 12:00:13.423290  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:00:13.423304  543705 net.go:698] Add success.
I0323 12:00:13.468327  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d978959d-8b5a-4399-8384-fe4154877d64","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:00:13.468363  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:00:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:00:14.455133  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:00:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 12:00:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:00:14.456762  543705 disk_worker.go:494] system disk:vda1
I0323 12:00:14.456795  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:00:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:00:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:00:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:00:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:00:16.472385  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:00:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:00:23.409775  543705 memory.go:184] no items to output this cycle
I0323 12:00:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 12:00:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:00:33.409814  543705 memory.go:184] no items to output this cycle
I0323 12:00:33.409866  543705 cpu.go:275] no items to output this cycle
I0323 12:00:39.804662  543705 disk_info.go:125] begin check local disk info of client
I0323 12:00:39.807260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:00:39.807267  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a5140 0xc0002a5180]
I0323 12:00:40.405721  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:00:40.405726  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:00:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:00:43.410801  543705 memory.go:191] Add success.
I0323 12:00:43.409831  543705 cpu.go:282] Add success.
I0323 12:00:43.420633  543705 net.go:648] Add success.
I0323 12:00:43.423350  543705 net.go:770] primary dev: ETH0
I0323 12:00:43.423365  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:00:43.423378  543705 net.go:698] Add success.
I0323 12:00:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:00:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:00:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:00:53.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:00:53.409833  543705 memory.go:184] no items to output this cycle
I0323 12:00:53.409875  543705 cpu.go:275] no items to output this cycle
E0323 12:01:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:01:03.409815  543705 memory.go:184] no items to output this cycle
I0323 12:01:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 12:01:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:01:13.409802  543705 cpu.go:282] Add success.
I0323 12:01:13.409805  543705 memory.go:191] Add success.
W0323 12:01:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:01:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:01:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:01:13.420113  543705 net.go:648] Add success.
I0323 12:01:13.423181  543705 net.go:770] primary dev: ETH0
I0323 12:01:13.423195  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:01:13.423210  543705 net.go:698] Add success.
I0323 12:01:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:01:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:01:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 12:01:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:01:14.456560  543705 disk_worker.go:494] system disk:vda1
I0323 12:01:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:01:15.456007  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:01:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:01:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:01:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:01:16.472568  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:01:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:01:23.409813  543705 memory.go:184] no items to output this cycle
I0323 12:01:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 12:01:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:01:33.409789  543705 memory.go:184] no items to output this cycle
I0323 12:01:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 12:01:39.807382  543705 disk_info.go:125] begin check local disk info of client
I0323 12:01:39.811282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:01:39.811291  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049d100 0xc00049d140]
E0323 12:01:43.409839  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:01:43.409880  543705 memory.go:191] Add success.
I0323 12:01:43.410189  543705 cpu.go:282] Add success.
I0323 12:01:43.481919  543705 net.go:648] Add success.
I0323 12:01:43.526493  543705 net.go:770] primary dev: ETH0
I0323 12:01:43.526526  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:01:43.526546  543705 net.go:698] Add success.
I0323 12:01:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:01:46.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:01:46.458115  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:01:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:01:53.409793  543705 memory.go:184] no items to output this cycle
I0323 12:01:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 12:02:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:02:03.409798  543705 memory.go:184] no items to output this cycle
I0323 12:02:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 12:02:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:02:13.409799  543705 memory.go:191] Add success.
I0323 12:02:13.409800  543705 cpu.go:282] Add success.
W0323 12:02:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:02:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:02:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:02:13.420261  543705 net.go:648] Add success.
I0323 12:02:13.423045  543705 net.go:770] primary dev: ETH0
I0323 12:02:13.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:02:13.423070  543705 net.go:698] Add success.
W0323 12:02:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:02:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 12:02:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:02:14.456798  543705 disk_worker.go:494] system disk:vda1
I0323 12:02:14.456838  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:02:14.457168  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:02:14.457176  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:02:14.457181  543705 custom_config.go:64] query custom config with name: gpu
E0323 12:02:15.456762  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:02:15.456771  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 12:02:16.457910  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:02:16.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:02:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:02:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:02:16.472308  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:02:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:02:23.409776  543705 memory.go:184] no items to output this cycle
I0323 12:02:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 12:02:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:02:33.409787  543705 memory.go:184] no items to output this cycle
I0323 12:02:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 12:02:39.811375  543705 disk_info.go:125] begin check local disk info of client
I0323 12:02:39.813988  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:02:39.813994  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003dadc0 0xc0003dae00]
E0323 12:02:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:02:43.409791  543705 memory.go:191] Add success.
I0323 12:02:43.409874  543705 cpu.go:282] Add success.
I0323 12:02:43.420349  543705 net.go:648] Add success.
I0323 12:02:43.421620  543705 net.go:770] primary dev: ETH0
I0323 12:02:43.421633  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:02:43.421660  543705 net.go:698] Add success.
I0323 12:02:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:02:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:02:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:02:53.409786  543705 memory.go:184] no items to output this cycle
I0323 12:02:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 12:03:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:03:03.409785  543705 memory.go:184] no items to output this cycle
I0323 12:03:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 12:03:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:03:13.409825  543705 memory.go:191] Add success.
I0323 12:03:13.409830  543705 cpu.go:282] Add success.
W0323 12:03:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:03:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:03:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:03:13.420183  543705 net.go:648] Add success.
I0323 12:03:13.423050  543705 net.go:770] primary dev: ETH0
I0323 12:03:13.423063  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:03:13.423076  543705 net.go:698] Add success.
I0323 12:03:13.468707  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9a884b14-d692-4cd2-9668-7d09975d89df","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:03:13.468742  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:03:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:03:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:03:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 12:03:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:03:14.456526  543705 disk_worker.go:494] system disk:vda1
I0323 12:03:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:03:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:03:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:03:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:03:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:03:16.472460  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:03:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:03:23.409811  543705 memory.go:184] no items to output this cycle
I0323 12:03:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 12:03:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:03:33.409792  543705 memory.go:184] no items to output this cycle
I0323 12:03:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 12:03:39.815714  543705 disk_info.go:125] begin check local disk info of client
I0323 12:03:39.818322  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:03:39.818329  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004adc00 0xc0004adc40]
I0323 12:03:40.406789  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:03:40.406794  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:03:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:03:43.411050  543705 memory.go:191] Add success.
I0323 12:03:43.409882  543705 cpu.go:282] Add success.
I0323 12:03:43.419942  543705 net.go:648] Add success.
I0323 12:03:43.423150  543705 net.go:770] primary dev: ETH0
I0323 12:03:43.423164  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:03:43.423176  543705 net.go:698] Add success.
I0323 12:03:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:03:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:03:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:03:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:03:53.409772  543705 memory.go:184] no items to output this cycle
I0323 12:03:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 12:04:03.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:04:03.409826  543705 memory.go:184] no items to output this cycle
I0323 12:04:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 12:04:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:04:13.409821  543705 memory.go:191] Add success.
I0323 12:04:13.409826  543705 cpu.go:282] Add success.
W0323 12:04:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:04:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:04:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:04:13.420202  543705 net.go:648] Add success.
I0323 12:04:13.422954  543705 net.go:770] primary dev: ETH0
I0323 12:04:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:04:13.422981  543705 net.go:698] Add success.
I0323 12:04:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:04:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:04:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 12:04:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:04:14.456622  543705 disk_worker.go:494] system disk:vda1
I0323 12:04:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:04:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:04:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:04:16.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:04:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:04:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:04:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:04:23.409811  543705 memory.go:184] no items to output this cycle
I0323 12:04:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 12:04:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:04:33.409807  543705 memory.go:184] no items to output this cycle
I0323 12:04:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 12:04:39.819727  543705 disk_info.go:125] begin check local disk info of client
I0323 12:04:39.822365  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:04:39.822374  543705 disk_info.go:196] parse disk info done, disk is : [0xc000487f40 0xc000492000]
E0323 12:04:43.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:04:43.410790  543705 memory.go:191] Add success.
I0323 12:04:43.409825  543705 cpu.go:282] Add success.
I0323 12:04:43.420879  543705 net.go:648] Add success.
I0323 12:04:43.423719  543705 net.go:770] primary dev: ETH0
I0323 12:04:43.423740  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:04:43.423759  543705 net.go:698] Add success.
I0323 12:04:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:04:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:04:46.458111  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:04:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:04:53.409781  543705 memory.go:184] no items to output this cycle
I0323 12:04:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 12:05:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:05:03.409792  543705 memory.go:184] no items to output this cycle
I0323 12:05:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 12:05:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:05:13.409828  543705 memory.go:191] Add success.
I0323 12:05:13.409834  543705 cpu.go:282] Add success.
W0323 12:05:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:05:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:05:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:05:13.420245  543705 net.go:648] Add success.
I0323 12:05:13.423292  543705 net.go:770] primary dev: ETH0
I0323 12:05:13.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:05:13.423318  543705 net.go:698] Add success.
I0323 12:05:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:05:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:05:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 12:05:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:05:14.456549  543705 disk_worker.go:494] system disk:vda1
I0323 12:05:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:05:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:05:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:05:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:05:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:05:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:05:23.410226  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:05:23.410252  543705 memory.go:184] no items to output this cycle
I0323 12:05:23.410409  543705 cpu.go:275] no items to output this cycle
E0323 12:05:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:05:33.409784  543705 memory.go:184] no items to output this cycle
I0323 12:05:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 12:05:39.823757  543705 disk_info.go:125] begin check local disk info of client
I0323 12:05:39.826360  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:05:39.826366  543705 disk_info.go:196] parse disk info done, disk is : [0xc000386d80 0xc000386dc0]
E0323 12:05:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:05:43.410693  543705 memory.go:191] Add success.
I0323 12:05:43.409843  543705 cpu.go:282] Add success.
I0323 12:05:43.420492  543705 net.go:648] Add success.
I0323 12:05:43.423304  543705 net.go:770] primary dev: ETH0
I0323 12:05:43.423320  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:05:43.423334  543705 net.go:698] Add success.
I0323 12:05:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:05:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:05:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:05:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:05:53.409779  543705 memory.go:184] no items to output this cycle
I0323 12:05:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 12:06:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:06:03.409769  543705 memory.go:184] no items to output this cycle
I0323 12:06:03.409804  543705 cpu.go:275] no items to output this cycle
I0323 12:06:13.413950  543705 cpu.go:282] Add success.
E0323 12:06:13.414293  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:06:13.414320  543705 memory.go:191] Add success.
W0323 12:06:13.414352  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:06:13.414369  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:06:13.414373  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:06:13.446128  543705 net.go:648] Add success.
I0323 12:06:13.499943  543705 net.go:770] primary dev: ETH0
I0323 12:06:13.499965  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:06:13.499984  543705 net.go:698] Add success.
I0323 12:06:13.493746  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"22e97408-cda9-40f9-9c1d-66791af62a95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:06:13.500130  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:06:14.453979  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:06:14.454247  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:06:14.454313  543705 disk_worker.go:708] disk space is not compliant
W0323 12:06:14.454317  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:06:14.455690  543705 disk_worker.go:494] system disk:vda1
I0323 12:06:14.455717  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:06:15.455988  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:06:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:06:16.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:06:16.458105  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:06:16.472561  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:06:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:06:23.409800  543705 memory.go:184] no items to output this cycle
I0323 12:06:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 12:06:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:06:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 12:06:33.409793  543705 memory.go:184] no items to output this cycle
I0323 12:06:39.826456  543705 disk_info.go:125] begin check local disk info of client
I0323 12:06:39.829022  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:06:39.829030  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a4340 0xc0002a4380]
I0323 12:06:40.408978  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:06:40.408983  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:06:43.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:06:43.409811  543705 cpu.go:282] Add success.
I0323 12:06:43.409836  543705 memory.go:191] Add success.
I0323 12:06:43.419878  543705 net.go:770] primary dev: ETH0
I0323 12:06:43.419891  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:06:43.419905  543705 net.go:698] Add success.
I0323 12:06:43.420156  543705 net.go:648] Add success.
I0323 12:06:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:06:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:06:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:06:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:06:53.409779  543705 memory.go:184] no items to output this cycle
I0323 12:06:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 12:07:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:07:03.409802  543705 memory.go:184] no items to output this cycle
I0323 12:07:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 12:07:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:07:13.409803  543705 memory.go:191] Add success.
I0323 12:07:13.409805  543705 cpu.go:282] Add success.
W0323 12:07:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:07:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:07:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:07:13.420143  543705 net.go:648] Add success.
I0323 12:07:13.423096  543705 net.go:770] primary dev: ETH0
I0323 12:07:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:07:13.423122  543705 net.go:698] Add success.
I0323 12:07:13.453665  543705 event_worker.go:152] Polling the log file for events...
W0323 12:07:14.455183  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:07:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 12:07:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:07:14.455876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:07:14.455884  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:07:14.455890  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:07:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 12:07:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:07:15.456845  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:07:15.456854  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:07:16.458108  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:07:16.458127  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:07:16.458169  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:07:16.458188  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:07:16.472560  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:07:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:07:23.409776  543705 memory.go:184] no items to output this cycle
I0323 12:07:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:07:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:07:33.409804  543705 memory.go:184] no items to output this cycle
I0323 12:07:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 12:07:39.829678  543705 disk_info.go:125] begin check local disk info of client
I0323 12:07:39.832338  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:07:39.832345  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b6a80 0xc0002b6ac0]
E0323 12:07:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:07:43.409823  543705 memory.go:191] Add success.
I0323 12:07:43.409831  543705 cpu.go:282] Add success.
I0323 12:07:43.420227  543705 net.go:648] Add success.
I0323 12:07:43.421245  543705 net.go:770] primary dev: ETH0
I0323 12:07:43.421259  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:07:43.421273  543705 net.go:698] Add success.
I0323 12:07:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:07:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:07:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:07:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:07:53.409793  543705 memory.go:184] no items to output this cycle
I0323 12:07:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 12:08:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:08:03.409802  543705 memory.go:184] no items to output this cycle
I0323 12:08:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 12:08:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:08:13.409807  543705 memory.go:191] Add success.
I0323 12:08:13.409807  543705 cpu.go:282] Add success.
W0323 12:08:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:08:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:08:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:08:13.420183  543705 net.go:648] Add success.
I0323 12:08:13.423192  543705 net.go:770] primary dev: ETH0
I0323 12:08:13.423208  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:08:13.423222  543705 net.go:698] Add success.
I0323 12:08:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:08:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:08:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0323 12:08:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:08:14.456675  543705 disk_worker.go:494] system disk:vda1
I0323 12:08:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:08:15.456004  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:08:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:08:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:08:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:08:16.472508  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:08:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:08:23.409781  543705 memory.go:184] no items to output this cycle
I0323 12:08:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 12:08:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:08:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 12:08:33.409807  543705 memory.go:184] no items to output this cycle
I0323 12:08:39.833696  543705 disk_info.go:125] begin check local disk info of client
I0323 12:08:39.836424  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:08:39.836432  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005de040 0xc0005de080]
E0323 12:08:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:08:43.410892  543705 memory.go:191] Add success.
I0323 12:08:43.409841  543705 cpu.go:282] Add success.
I0323 12:08:43.420829  543705 net.go:648] Add success.
I0323 12:08:43.423625  543705 net.go:770] primary dev: ETH0
I0323 12:08:43.423644  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:08:43.423664  543705 net.go:698] Add success.
I0323 12:08:46.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:08:46.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:08:46.458123  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:08:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:08:53.409795  543705 cpu.go:275] no items to output this cycle
I0323 12:08:53.409801  543705 memory.go:184] no items to output this cycle
E0323 12:09:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:09:03.409801  543705 memory.go:184] no items to output this cycle
I0323 12:09:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 12:09:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:09:13.409829  543705 memory.go:191] Add success.
I0323 12:09:13.409840  543705 cpu.go:282] Add success.
W0323 12:09:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:09:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:09:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:09:13.420198  543705 net.go:648] Add success.
I0323 12:09:13.423124  543705 net.go:770] primary dev: ETH0
I0323 12:09:13.423139  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:09:13.423152  543705 net.go:698] Add success.
I0323 12:09:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:09:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:09:14.455228  543705 disk_worker.go:708] disk space is not compliant
W0323 12:09:14.455231  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:09:14.456610  543705 disk_worker.go:494] system disk:vda1
I0323 12:09:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:09:14.763536  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f9aeaba3-671f-4cd5-9062-7f1a37097270","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:09:14.763590  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:09:15.454984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:09:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:09:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:09:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:09:16.472489  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:09:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:09:23.409806  543705 memory.go:184] no items to output this cycle
I0323 12:09:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 12:09:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:09:33.409776  543705 memory.go:184] no items to output this cycle
I0323 12:09:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 12:09:39.837678  543705 disk_info.go:125] begin check local disk info of client
I0323 12:09:39.840262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:09:39.840269  543705 disk_info.go:196] parse disk info done, disk is : [0xc000340580 0xc0003405c0]
I0323 12:09:40.409162  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:09:40.409167  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:09:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:09:43.410806  543705 memory.go:191] Add success.
I0323 12:09:43.409814  543705 cpu.go:282] Add success.
I0323 12:09:43.420501  543705 net.go:648] Add success.
I0323 12:09:43.423418  543705 net.go:770] primary dev: ETH0
I0323 12:09:43.423433  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:09:43.423448  543705 net.go:698] Add success.
I0323 12:09:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:09:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:09:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:09:53.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:09:53.409919  543705 cpu.go:275] no items to output this cycle
I0323 12:09:53.409964  543705 memory.go:184] no items to output this cycle
E0323 12:10:03.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:10:03.409923  543705 memory.go:184] no items to output this cycle
I0323 12:10:03.410095  543705 cpu.go:275] no items to output this cycle
E0323 12:10:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:10:13.409800  543705 memory.go:191] Add success.
W0323 12:10:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:10:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:10:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:10:13.409848  543705 cpu.go:282] Add success.
I0323 12:10:13.420347  543705 net.go:648] Add success.
I0323 12:10:13.421619  543705 net.go:770] primary dev: ETH0
I0323 12:10:13.421632  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:10:13.421657  543705 net.go:698] Add success.
I0323 12:10:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:10:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:10:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0323 12:10:14.455247  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:10:14.456799  543705 disk_worker.go:494] system disk:vda1
I0323 12:10:14.456829  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:10:15.455988  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:10:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:10:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:10:16.458105  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:10:16.472506  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:10:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:10:23.409789  543705 memory.go:184] no items to output this cycle
I0323 12:10:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 12:10:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:10:33.409814  543705 memory.go:184] no items to output this cycle
I0323 12:10:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 12:10:39.841701  543705 disk_info.go:125] begin check local disk info of client
I0323 12:10:39.853375  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:10:39.853384  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ae800 0xc0002ae840]
E0323 12:10:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:10:43.410782  543705 memory.go:191] Add success.
I0323 12:10:43.409808  543705 cpu.go:282] Add success.
I0323 12:10:43.420506  543705 net.go:648] Add success.
I0323 12:10:43.423575  543705 net.go:770] primary dev: ETH0
I0323 12:10:43.423592  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:10:43.423607  543705 net.go:698] Add success.
I0323 12:10:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:10:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:10:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:10:53.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:10:53.409907  543705 memory.go:184] no items to output this cycle
I0323 12:10:53.409965  543705 cpu.go:275] no items to output this cycle
I0323 12:11:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 12:11:03.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:11:03.409837  543705 memory.go:184] no items to output this cycle
I0323 12:11:13.409873  543705 cpu.go:282] Add success.
E0323 12:11:13.410264  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:11:13.447258  543705 memory.go:191] Add success.
W0323 12:11:13.447293  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:11:13.447311  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:11:13.447315  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:11:13.433093  543705 net.go:648] Add success.
I0323 12:11:13.470895  543705 net.go:770] primary dev: ETH0
I0323 12:11:13.470924  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:11:13.470943  543705 net.go:698] Add success.
I0323 12:11:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:11:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:11:14.455269  543705 disk_worker.go:708] disk space is not compliant
W0323 12:11:14.455273  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:11:14.456862  543705 disk_worker.go:494] system disk:vda1
I0323 12:11:14.456896  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:11:15.455987  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:11:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:11:16.458130  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:11:16.458165  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:11:16.477679  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:11:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:11:23.409801  543705 memory.go:184] no items to output this cycle
I0323 12:11:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 12:11:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:11:33.409813  543705 memory.go:184] no items to output this cycle
I0323 12:11:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 12:11:39.853680  543705 disk_info.go:125] begin check local disk info of client
I0323 12:11:39.856253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:11:39.856261  543705 disk_info.go:196] parse disk info done, disk is : [0xc000272440 0xc000272480]
E0323 12:11:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:11:43.409848  543705 memory.go:191] Add success.
I0323 12:11:43.410358  543705 cpu.go:282] Add success.
I0323 12:11:43.419870  543705 net.go:648] Add success.
I0323 12:11:43.420952  543705 net.go:770] primary dev: ETH0
I0323 12:11:43.420968  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:11:43.420983  543705 net.go:698] Add success.
I0323 12:11:46.458017  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:11:46.458119  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:11:46.458160  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:11:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:11:53.409781  543705 memory.go:184] no items to output this cycle
I0323 12:11:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 12:12:03.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:12:03.409832  543705 memory.go:184] no items to output this cycle
I0323 12:12:03.409841  543705 cpu.go:275] no items to output this cycle
E0323 12:12:13.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:12:13.409824  543705 cpu.go:282] Add success.
I0323 12:12:13.409846  543705 memory.go:191] Add success.
W0323 12:12:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:12:13.409896  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:12:13.409900  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:12:13.420314  543705 net.go:648] Add success.
I0323 12:12:13.423207  543705 net.go:770] primary dev: ETH0
I0323 12:12:13.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:12:13.423234  543705 net.go:698] Add success.
I0323 12:12:13.513594  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1642c1c9-eb2c-4ace-b2b0-fdb59634c6a3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:12:13.513632  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 12:12:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:12:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 12:12:14.455234  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:12:14.456099  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:12:14.456109  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:12:14.456116  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:12:14.456664  543705 disk_worker.go:494] system disk:vda1
I0323 12:12:14.456698  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:12:15.457085  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:12:15.457100  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:12:16.458098  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:12:16.458163  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 12:12:16.458179  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:12:16.458186  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:12:16.472556  543705 disk_local_worker.go:436] Get disk info: []
I0323 12:12:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 12:12:23.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:12:23.409830  543705 memory.go:184] no items to output this cycle
E0323 12:12:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:12:33.409786  543705 memory.go:184] no items to output this cycle
I0323 12:12:33.409847  543705 cpu.go:275] no items to output this cycle
I0323 12:12:39.857682  543705 disk_info.go:125] begin check local disk info of client
I0323 12:12:39.860265  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:12:39.860273  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f900 0xc00032f940]
I0323 12:12:40.410142  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:12:40.410149  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:12:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:12:43.410686  543705 memory.go:191] Add success.
I0323 12:12:43.409819  543705 cpu.go:282] Add success.
I0323 12:12:43.420417  543705 net.go:648] Add success.
I0323 12:12:43.423273  543705 net.go:770] primary dev: ETH0
I0323 12:12:43.423287  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:12:43.423302  543705 net.go:698] Add success.
I0323 12:12:46.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:12:46.458101  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:12:46.458135  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:12:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:12:53.409813  543705 memory.go:184] no items to output this cycle
I0323 12:12:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 12:13:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:13:03.409827  543705 memory.go:184] no items to output this cycle
I0323 12:13:03.409851  543705 cpu.go:275] no items to output this cycle
E0323 12:13:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:13:13.409820  543705 cpu.go:282] Add success.
I0323 12:13:13.409834  543705 memory.go:191] Add success.
W0323 12:13:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:13:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:13:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:13:13.420161  543705 net.go:648] Add success.
I0323 12:13:13.421197  543705 net.go:770] primary dev: ETH0
I0323 12:13:13.421210  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:13:13.421222  543705 net.go:698] Add success.
I0323 12:13:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:13:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:13:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0323 12:13:14.455244  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:13:14.456682  543705 disk_worker.go:494] system disk:vda1
I0323 12:13:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:13:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:13:16.465689  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:13:16.465788  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:13:16.465823  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:13:16.477744  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:13:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:13:23.409817  543705 memory.go:184] no items to output this cycle
I0323 12:13:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 12:13:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:13:33.409784  543705 memory.go:184] no items to output this cycle
I0323 12:13:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 12:13:39.861687  543705 disk_info.go:125] begin check local disk info of client
I0323 12:13:39.864272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:13:39.864280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b3500 0xc0003b3540]
E0323 12:13:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:13:43.410798  543705 memory.go:191] Add success.
I0323 12:13:43.409808  543705 cpu.go:282] Add success.
I0323 12:13:43.420561  543705 net.go:648] Add success.
I0323 12:13:43.423654  543705 net.go:770] primary dev: ETH0
I0323 12:13:43.423668  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:13:43.423680  543705 net.go:698] Add success.
I0323 12:13:46.458018  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:13:46.458110  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:13:46.458151  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:13:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:13:53.409813  543705 memory.go:184] no items to output this cycle
I0323 12:13:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 12:14:03.409853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:14:03.409875  543705 memory.go:184] no items to output this cycle
I0323 12:14:03.409948  543705 cpu.go:275] no items to output this cycle
E0323 12:14:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:14:13.409794  543705 memory.go:191] Add success.
I0323 12:14:13.409818  543705 cpu.go:282] Add success.
W0323 12:14:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:14:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:14:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:14:13.420222  543705 net.go:648] Add success.
I0323 12:14:13.423476  543705 net.go:770] primary dev: ETH0
I0323 12:14:13.423488  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:14:13.423500  543705 net.go:698] Add success.
I0323 12:14:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:14:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:14:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 12:14:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:14:14.456528  543705 disk_worker.go:494] system disk:vda1
I0323 12:14:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:14:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:14:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:14:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:14:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:14:16.472516  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:14:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:14:23.409789  543705 memory.go:184] no items to output this cycle
I0323 12:14:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 12:14:33.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:14:33.409834  543705 memory.go:184] no items to output this cycle
I0323 12:14:33.409932  543705 cpu.go:275] no items to output this cycle
I0323 12:14:39.865688  543705 disk_info.go:125] begin check local disk info of client
I0323 12:14:39.868227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:14:39.868234  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf980 0xc0003bf9c0]
E0323 12:14:43.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:14:43.410592  543705 memory.go:191] Add success.
I0323 12:14:43.409873  543705 cpu.go:282] Add success.
I0323 12:14:43.420410  543705 net.go:648] Add success.
I0323 12:14:43.422980  543705 net.go:770] primary dev: ETH0
I0323 12:14:43.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:14:43.423010  543705 net.go:698] Add success.
I0323 12:14:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:14:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:14:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:14:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:14:53.409822  543705 memory.go:184] no items to output this cycle
I0323 12:14:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 12:15:03.409867  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:15:03.409888  543705 memory.go:184] no items to output this cycle
I0323 12:15:03.409958  543705 cpu.go:275] no items to output this cycle
E0323 12:15:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:15:13.409846  543705 memory.go:191] Add success.
I0323 12:15:13.409850  543705 cpu.go:282] Add success.
W0323 12:15:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:15:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:15:13.409898  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:15:13.420385  543705 net.go:648] Add success.
I0323 12:15:13.423489  543705 net.go:770] primary dev: ETH0
I0323 12:15:13.423504  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:15:13.423518  543705 net.go:698] Add success.
I0323 12:15:13.469195  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7e1d3513-3516-40e1-83c0-b5aa87e90dc3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:15:13.469229  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:15:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:15:14.455271  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:15:14.455287  543705 disk_worker.go:708] disk space is not compliant
W0323 12:15:14.455292  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:15:14.457233  543705 disk_worker.go:494] system disk:vda1
I0323 12:15:14.457272  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:15:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:15:16.458060  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:15:16.458143  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:15:16.458173  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:15:16.472811  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:15:23.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:15:23.409824  543705 memory.go:184] no items to output this cycle
I0323 12:15:23.409832  543705 cpu.go:275] no items to output this cycle
E0323 12:15:33.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:15:33.409827  543705 memory.go:184] no items to output this cycle
I0323 12:15:33.409863  543705 cpu.go:275] no items to output this cycle
I0323 12:15:39.869679  543705 disk_info.go:125] begin check local disk info of client
I0323 12:15:39.872284  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:15:39.872291  543705 disk_info.go:196] parse disk info done, disk is : [0xc000320d80 0xc000320dc0]
I0323 12:15:40.410634  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:15:40.410645  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:15:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:15:43.410616  543705 memory.go:191] Add success.
I0323 12:15:43.409833  543705 cpu.go:282] Add success.
I0323 12:15:43.420320  543705 net.go:648] Add success.
I0323 12:15:43.438192  543705 net.go:770] primary dev: ETH0
I0323 12:15:43.438207  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:15:43.438219  543705 net.go:698] Add success.
I0323 12:15:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:15:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:15:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:15:53.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:15:53.409842  543705 memory.go:184] no items to output this cycle
I0323 12:15:53.409850  543705 cpu.go:275] no items to output this cycle
E0323 12:16:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:16:03.409964  543705 memory.go:184] no items to output this cycle
I0323 12:16:03.410006  543705 cpu.go:275] no items to output this cycle
E0323 12:16:13.409858  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:16:13.409896  543705 memory.go:191] Add success.
W0323 12:16:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:16:13.409948  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:16:13.409952  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:16:13.410319  543705 cpu.go:282] Add success.
I0323 12:16:13.420591  543705 net.go:648] Add success.
I0323 12:16:13.423578  543705 net.go:770] primary dev: ETH0
I0323 12:16:13.423596  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:16:13.423613  543705 net.go:698] Add success.
I0323 12:16:14.455004  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:16:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:16:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0323 12:16:14.455243  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:16:14.456678  543705 disk_worker.go:494] system disk:vda1
I0323 12:16:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:16:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:16:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:16:16.458079  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:16:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:16:16.472537  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:16:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:16:23.409785  543705 memory.go:184] no items to output this cycle
I0323 12:16:23.409825  543705 cpu.go:275] no items to output this cycle
E0323 12:16:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:16:33.409785  543705 memory.go:184] no items to output this cycle
I0323 12:16:33.409848  543705 cpu.go:275] no items to output this cycle
I0323 12:16:39.873685  543705 disk_info.go:125] begin check local disk info of client
I0323 12:16:39.876311  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:16:39.876319  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312540 0xc000312580]
E0323 12:16:43.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:16:43.410666  543705 memory.go:191] Add success.
I0323 12:16:43.409927  543705 cpu.go:282] Add success.
I0323 12:16:43.420674  543705 net.go:648] Add success.
I0323 12:16:43.423425  543705 net.go:770] primary dev: ETH0
I0323 12:16:43.423444  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:16:43.423462  543705 net.go:698] Add success.
I0323 12:16:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:16:46.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:16:46.458110  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:16:53.409872  543705 cpu.go:275] no items to output this cycle
E0323 12:16:53.409989  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:16:53.410006  543705 memory.go:184] no items to output this cycle
E0323 12:17:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:17:03.409825  543705 cpu.go:275] no items to output this cycle
I0323 12:17:03.409834  543705 memory.go:184] no items to output this cycle
E0323 12:17:13.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:17:13.409844  543705 memory.go:191] Add success.
I0323 12:17:13.409852  543705 cpu.go:282] Add success.
W0323 12:17:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:17:13.409900  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:17:13.409904  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:17:13.420143  543705 net.go:648] Add success.
I0323 12:17:13.422871  543705 net.go:770] primary dev: ETH0
I0323 12:17:13.422901  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:17:13.422915  543705 net.go:698] Add success.
I0323 12:17:13.453472  543705 event_worker.go:152] Polling the log file for events...
W0323 12:17:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:17:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 12:17:14.455224  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:17:14.456097  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:17:14.456107  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:17:14.456114  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:17:14.456642  543705 disk_worker.go:494] system disk:vda1
I0323 12:17:14.456674  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:17:15.457079  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:17:15.457093  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:17:16.461696  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:17:16.461790  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:17:16.461823  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:17:16.476488  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:17:16.519140  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
E0323 12:17:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:17:23.409795  543705 cpu.go:275] no items to output this cycle
I0323 12:17:23.409805  543705 memory.go:184] no items to output this cycle
E0323 12:17:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:17:33.409783  543705 memory.go:184] no items to output this cycle
I0323 12:17:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 12:17:39.877681  543705 disk_info.go:125] begin check local disk info of client
I0323 12:17:39.880357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:17:39.880363  543705 disk_info.go:196] parse disk info done, disk is : [0xc000312900 0xc0003129c0]
E0323 12:17:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:17:43.410660  543705 memory.go:191] Add success.
I0323 12:17:43.409808  543705 cpu.go:282] Add success.
I0323 12:17:43.420355  543705 net.go:648] Add success.
I0323 12:17:43.423043  543705 net.go:770] primary dev: ETH0
I0323 12:17:43.423057  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:17:43.423070  543705 net.go:698] Add success.
I0323 12:17:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:17:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:17:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:17:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:17:53.409777  543705 memory.go:184] no items to output this cycle
I0323 12:17:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 12:18:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:18:03.409772  543705 memory.go:184] no items to output this cycle
I0323 12:18:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 12:18:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:18:13.409911  543705 memory.go:191] Add success.
I0323 12:18:13.409932  543705 cpu.go:282] Add success.
W0323 12:18:13.409948  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:18:13.409963  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:18:13.409966  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:18:13.419721  543705 net.go:648] Add success.
I0323 12:18:13.422787  543705 net.go:770] primary dev: ETH0
I0323 12:18:13.422800  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:18:13.422812  543705 net.go:698] Add success.
I0323 12:18:13.469708  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8d4d8d0f-2421-4915-8790-4d8eb9220849","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:18:13.469741  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:18:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:18:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:18:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 12:18:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:18:14.456742  543705 disk_worker.go:494] system disk:vda1
I0323 12:18:14.456771  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:18:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:18:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:18:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:18:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:18:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:18:23.409781  543705 memory.go:184] no items to output this cycle
I0323 12:18:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:18:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:18:33.409784  543705 memory.go:184] no items to output this cycle
I0323 12:18:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 12:18:39.881672  543705 disk_info.go:125] begin check local disk info of client
I0323 12:18:39.884227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:18:39.884234  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033ef40 0xc00033ef80]
I0323 12:18:40.411021  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:18:40.411026  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:18:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:18:43.410817  543705 memory.go:191] Add success.
I0323 12:18:43.409810  543705 cpu.go:282] Add success.
I0323 12:18:43.420513  543705 net.go:648] Add success.
I0323 12:18:43.423391  543705 net.go:770] primary dev: ETH0
I0323 12:18:43.423406  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:18:43.423420  543705 net.go:698] Add success.
I0323 12:18:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:18:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:18:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:18:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:18:53.409779  543705 cpu.go:275] no items to output this cycle
I0323 12:18:53.409782  543705 memory.go:184] no items to output this cycle
E0323 12:19:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:19:03.409779  543705 memory.go:184] no items to output this cycle
I0323 12:19:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 12:19:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:19:13.409795  543705 memory.go:191] Add success.
I0323 12:19:13.409810  543705 cpu.go:282] Add success.
W0323 12:19:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:19:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:19:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:19:13.419710  543705 net.go:648] Add success.
I0323 12:19:13.422602  543705 net.go:770] primary dev: ETH0
I0323 12:19:13.422617  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:19:13.422632  543705 net.go:698] Add success.
I0323 12:19:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:19:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:19:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 12:19:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:19:14.456549  543705 disk_worker.go:494] system disk:vda1
I0323 12:19:14.456577  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:19:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:19:16.458033  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:19:16.458111  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:19:16.458146  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:19:16.472642  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:19:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:19:23.409785  543705 memory.go:184] no items to output this cycle
I0323 12:19:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 12:19:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:19:33.409803  543705 memory.go:184] no items to output this cycle
I0323 12:19:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 12:19:39.885673  543705 disk_info.go:125] begin check local disk info of client
I0323 12:19:39.888226  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:19:39.888233  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033ea80 0xc00033eac0]
E0323 12:19:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:19:43.410728  543705 memory.go:191] Add success.
I0323 12:19:43.409806  543705 cpu.go:282] Add success.
I0323 12:19:43.420407  543705 net.go:648] Add success.
I0323 12:19:43.422989  543705 net.go:770] primary dev: ETH0
I0323 12:19:43.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:19:43.423017  543705 net.go:698] Add success.
I0323 12:19:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:19:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:19:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:19:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:19:53.409782  543705 memory.go:184] no items to output this cycle
I0323 12:19:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 12:20:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:20:03.409784  543705 memory.go:184] no items to output this cycle
I0323 12:20:03.409792  543705 cpu.go:275] no items to output this cycle
W0323 12:20:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:20:13.409735  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:20:13.409741  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:20:13.409931  543705 cpu.go:282] Add success.
E0323 12:20:13.409976  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:20:13.410078  543705 memory.go:191] Add success.
I0323 12:20:13.419730  543705 net.go:648] Add success.
I0323 12:20:13.422392  543705 net.go:770] primary dev: ETH0
I0323 12:20:13.422405  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:20:13.422417  543705 net.go:698] Add success.
I0323 12:20:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:20:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:20:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 12:20:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:20:14.456495  543705 disk_worker.go:494] system disk:vda1
I0323 12:20:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:20:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:20:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:20:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:20:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:20:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:20:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:20:23.409769  543705 memory.go:184] no items to output this cycle
I0323 12:20:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 12:20:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:20:33.409792  543705 memory.go:184] no items to output this cycle
I0323 12:20:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 12:20:39.889671  543705 disk_info.go:125] begin check local disk info of client
I0323 12:20:39.892275  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:20:39.892281  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033eec0 0xc00033ef00]
E0323 12:20:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:20:43.410627  543705 memory.go:191] Add success.
I0323 12:20:43.409813  543705 cpu.go:282] Add success.
I0323 12:20:43.420297  543705 net.go:648] Add success.
I0323 12:20:43.423026  543705 net.go:770] primary dev: ETH0
I0323 12:20:43.423039  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:20:43.423051  543705 net.go:698] Add success.
I0323 12:20:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:20:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:20:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:20:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:20:53.409774  543705 memory.go:184] no items to output this cycle
I0323 12:20:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 12:21:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:21:03.409795  543705 memory.go:184] no items to output this cycle
I0323 12:21:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 12:21:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:21:13.409792  543705 memory.go:191] Add success.
I0323 12:21:13.409814  543705 cpu.go:282] Add success.
W0323 12:21:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:21:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:21:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:21:13.420201  543705 net.go:648] Add success.
I0323 12:21:13.422922  543705 net.go:770] primary dev: ETH0
I0323 12:21:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:21:13.422951  543705 net.go:698] Add success.
I0323 12:21:13.517228  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"938a3c3b-43bd-4741-b265-1ad07c9d2358","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:21:13.517259  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:21:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:21:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:21:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0323 12:21:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:21:14.456738  543705 disk_worker.go:494] system disk:vda1
I0323 12:21:14.456773  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:21:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:21:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:21:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:21:16.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:21:16.472490  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:21:23.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:21:23.409831  543705 memory.go:184] no items to output this cycle
I0323 12:21:23.409843  543705 cpu.go:275] no items to output this cycle
E0323 12:21:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:21:33.409800  543705 memory.go:184] no items to output this cycle
I0323 12:21:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 12:21:39.893675  543705 disk_info.go:125] begin check local disk info of client
I0323 12:21:39.896232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:21:39.896238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4840 0xc0000c4880]
I0323 12:21:40.411968  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:21:40.411973  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:21:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:21:43.410657  543705 memory.go:191] Add success.
I0323 12:21:43.409811  543705 cpu.go:282] Add success.
I0323 12:21:43.420352  543705 net.go:648] Add success.
I0323 12:21:43.423217  543705 net.go:770] primary dev: ETH0
I0323 12:21:43.423231  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:21:43.423245  543705 net.go:698] Add success.
I0323 12:21:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:21:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:21:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:21:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:21:53.409782  543705 cpu.go:275] no items to output this cycle
I0323 12:21:53.409784  543705 memory.go:184] no items to output this cycle
E0323 12:22:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:22:03.409797  543705 memory.go:184] no items to output this cycle
I0323 12:22:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 12:22:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:22:13.409809  543705 memory.go:191] Add success.
I0323 12:22:13.409821  543705 cpu.go:282] Add success.
W0323 12:22:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:22:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:22:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:22:13.420182  543705 net.go:648] Add success.
I0323 12:22:13.423190  543705 net.go:770] primary dev: ETH0
I0323 12:22:13.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:22:13.423217  543705 net.go:698] Add success.
W0323 12:22:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:22:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 12:22:14.455186  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:22:14.457017  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:22:14.457027  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:22:14.457034  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:22:14.457084  543705 disk_worker.go:494] system disk:vda1
I0323 12:22:14.457127  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:22:15.456837  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:22:15.456846  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:22:16.457935  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:22:16.457934  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:22:16.457990  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:22:16.458009  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:22:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:22:23.410255  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:22:23.410261  543705 cpu.go:275] no items to output this cycle
I0323 12:22:23.410269  543705 memory.go:184] no items to output this cycle
E0323 12:22:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:22:33.409803  543705 memory.go:184] no items to output this cycle
I0323 12:22:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 12:22:39.897672  543705 disk_info.go:125] begin check local disk info of client
I0323 12:22:39.900201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:22:39.900208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e800 0xc00037e840]
E0323 12:22:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:22:43.410619  543705 memory.go:191] Add success.
I0323 12:22:43.409815  543705 cpu.go:282] Add success.
I0323 12:22:43.420314  543705 net.go:648] Add success.
I0323 12:22:43.422841  543705 net.go:770] primary dev: ETH0
I0323 12:22:43.422855  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:22:43.422871  543705 net.go:698] Add success.
I0323 12:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:22:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:22:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:22:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:22:53.409771  543705 memory.go:184] no items to output this cycle
I0323 12:22:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 12:23:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:23:03.409800  543705 memory.go:184] no items to output this cycle
I0323 12:23:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 12:23:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:23:13.409795  543705 memory.go:191] Add success.
I0323 12:23:13.409798  543705 cpu.go:282] Add success.
W0323 12:23:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:23:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:23:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:23:13.420072  543705 net.go:648] Add success.
I0323 12:23:13.422697  543705 net.go:770] primary dev: ETH0
I0323 12:23:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:23:13.422724  543705 net.go:698] Add success.
I0323 12:23:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:23:14.455590  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:23:14.455608  543705 disk_worker.go:708] disk space is not compliant
W0323 12:23:14.455612  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:23:14.457200  543705 disk_worker.go:494] system disk:vda1
I0323 12:23:14.457231  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:23:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:23:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:23:16.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:23:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:23:16.472472  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:23:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:23:23.409778  543705 memory.go:184] no items to output this cycle
I0323 12:23:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 12:23:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:23:33.409801  543705 memory.go:184] no items to output this cycle
I0323 12:23:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 12:23:39.901683  543705 disk_info.go:125] begin check local disk info of client
I0323 12:23:39.904254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:23:39.904260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4900 0xc0000c4940]
E0323 12:23:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:23:43.410737  543705 memory.go:191] Add success.
I0323 12:23:43.409799  543705 cpu.go:282] Add success.
I0323 12:23:43.420446  543705 net.go:648] Add success.
I0323 12:23:43.423203  543705 net.go:770] primary dev: ETH0
I0323 12:23:43.423218  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:23:43.423232  543705 net.go:698] Add success.
I0323 12:23:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:23:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:23:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:23:53.410338  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:23:53.410354  543705 memory.go:184] no items to output this cycle
I0323 12:23:53.410379  543705 cpu.go:275] no items to output this cycle
E0323 12:24:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:24:03.409802  543705 memory.go:184] no items to output this cycle
I0323 12:24:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 12:24:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:24:13.409830  543705 memory.go:191] Add success.
I0323 12:24:13.409835  543705 cpu.go:282] Add success.
W0323 12:24:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:24:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:24:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:24:13.420421  543705 net.go:648] Add success.
I0323 12:24:13.423231  543705 net.go:770] primary dev: ETH0
I0323 12:24:13.423247  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:24:13.423261  543705 net.go:698] Add success.
I0323 12:24:13.462963  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ad5de657-7315-4942-a233-9c68197a3ee5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:24:13.462996  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:24:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:24:14.455118  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:24:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 12:24:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:24:14.456609  543705 disk_worker.go:494] system disk:vda1
I0323 12:24:14.456640  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:24:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:24:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:24:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:24:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:24:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:24:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:24:23.409778  543705 memory.go:184] no items to output this cycle
I0323 12:24:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 12:24:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:24:33.409782  543705 memory.go:184] no items to output this cycle
I0323 12:24:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 12:24:39.905671  543705 disk_info.go:125] begin check local disk info of client
I0323 12:24:39.908298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:24:39.908306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab980 0xc0001ab9c0]
I0323 12:24:40.412548  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:24:40.412553  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:24:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:24:43.410568  543705 memory.go:191] Add success.
I0323 12:24:43.409817  543705 cpu.go:282] Add success.
I0323 12:24:43.420243  543705 net.go:648] Add success.
I0323 12:24:43.423100  543705 net.go:770] primary dev: ETH0
I0323 12:24:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:24:43.423129  543705 net.go:698] Add success.
I0323 12:24:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:24:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:24:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:24:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:24:53.409786  543705 memory.go:184] no items to output this cycle
I0323 12:24:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 12:25:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:25:03.409814  543705 memory.go:184] no items to output this cycle
I0323 12:25:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 12:25:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:25:13.409784  543705 memory.go:191] Add success.
I0323 12:25:13.409815  543705 cpu.go:282] Add success.
W0323 12:25:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:25:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:25:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:25:13.420646  543705 net.go:648] Add success.
I0323 12:25:13.423344  543705 net.go:770] primary dev: ETH0
I0323 12:25:13.423358  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:25:13.423369  543705 net.go:698] Add success.
I0323 12:25:14.454952  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:25:14.455174  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:25:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 12:25:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:25:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 12:25:14.456597  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:25:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:25:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:25:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:25:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:25:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:25:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:25:23.409778  543705 memory.go:184] no items to output this cycle
I0323 12:25:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 12:25:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:25:33.409795  543705 memory.go:184] no items to output this cycle
I0323 12:25:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 12:25:39.909674  543705 disk_info.go:125] begin check local disk info of client
I0323 12:25:39.912240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:25:39.912246  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e740 0xc00037e780]
E0323 12:25:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:25:43.410734  543705 memory.go:191] Add success.
I0323 12:25:43.409793  543705 cpu.go:282] Add success.
I0323 12:25:43.420404  543705 net.go:648] Add success.
I0323 12:25:43.423197  543705 net.go:770] primary dev: ETH0
I0323 12:25:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:25:43.423224  543705 net.go:698] Add success.
I0323 12:25:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:25:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:25:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:25:53.409784  543705 memory.go:184] no items to output this cycle
I0323 12:25:53.409784  543705 cpu.go:275] no items to output this cycle
E0323 12:26:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:26:03.409796  543705 memory.go:184] no items to output this cycle
I0323 12:26:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 12:26:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:26:13.409793  543705 memory.go:191] Add success.
I0323 12:26:13.409812  543705 cpu.go:282] Add success.
W0323 12:26:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:26:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:26:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:26:13.419723  543705 net.go:648] Add success.
I0323 12:26:13.422650  543705 net.go:770] primary dev: ETH0
I0323 12:26:13.422663  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:26:13.422674  543705 net.go:698] Add success.
I0323 12:26:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:26:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:26:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 12:26:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:26:14.456503  543705 disk_worker.go:494] system disk:vda1
I0323 12:26:14.456549  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:26:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:26:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:26:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:26:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:26:16.472454  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:26:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:26:23.409798  543705 memory.go:184] no items to output this cycle
I0323 12:26:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 12:26:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:26:33.409778  543705 memory.go:184] no items to output this cycle
I0323 12:26:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 12:26:39.913672  543705 disk_info.go:125] begin check local disk info of client
I0323 12:26:39.916198  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:26:39.916204  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f780 0xc00039f7c0]
E0323 12:26:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:26:43.410618  543705 memory.go:191] Add success.
I0323 12:26:43.409811  543705 cpu.go:282] Add success.
I0323 12:26:43.420396  543705 net.go:648] Add success.
I0323 12:26:43.423136  543705 net.go:770] primary dev: ETH0
I0323 12:26:43.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:26:43.423164  543705 net.go:698] Add success.
I0323 12:26:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:26:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:26:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:26:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:26:53.409795  543705 memory.go:184] no items to output this cycle
I0323 12:26:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 12:27:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:27:03.409782  543705 memory.go:184] no items to output this cycle
I0323 12:27:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 12:27:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:27:13.409814  543705 memory.go:191] Add success.
I0323 12:27:13.409819  543705 cpu.go:282] Add success.
W0323 12:27:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:27:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:27:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:27:13.420138  543705 net.go:648] Add success.
I0323 12:27:13.429191  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 12:27:13.429267  543705 net.go:770] primary dev: ETH0
I0323 12:27:13.429281  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:27:13.429295  543705 net.go:698] Add success.
I0323 12:27:13.452769  543705 event_worker.go:152] Polling the log file for events...
I0323 12:27:13.467801  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd4bdfc1-a215-4a77-be85-74eb7c114ed5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:27:13.467832  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 12:27:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:27:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 12:27:14.455192  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:27:14.455885  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:27:14.455893  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:27:14.455898  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:27:14.456540  543705 disk_worker.go:494] system disk:vda1
I0323 12:27:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:27:15.456795  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:27:15.456804  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:27:16.457940  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:27:16.457940  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:27:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:27:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:27:16.472354  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:27:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:27:23.409809  543705 memory.go:184] no items to output this cycle
I0323 12:27:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 12:27:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:27:33.409784  543705 memory.go:184] no items to output this cycle
I0323 12:27:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 12:27:39.917676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:27:39.920252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:27:39.920258  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aad40 0xc0001aad80]
I0323 12:27:40.413552  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:27:40.413558  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:27:43.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:27:43.410722  543705 memory.go:191] Add success.
I0323 12:27:43.409809  543705 cpu.go:282] Add success.
I0323 12:27:43.420493  543705 net.go:648] Add success.
I0323 12:27:43.423193  543705 net.go:770] primary dev: ETH0
I0323 12:27:43.423206  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:27:43.423219  543705 net.go:698] Add success.
I0323 12:27:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:27:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:27:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:27:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:27:53.409775  543705 memory.go:184] no items to output this cycle
I0323 12:27:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 12:28:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:28:03.409775  543705 memory.go:184] no items to output this cycle
I0323 12:28:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 12:28:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:28:13.409915  543705 memory.go:191] Add success.
I0323 12:28:13.409931  543705 cpu.go:282] Add success.
W0323 12:28:13.409947  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:28:13.409960  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:28:13.409965  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:28:13.419735  543705 net.go:648] Add success.
I0323 12:28:13.422248  543705 net.go:770] primary dev: ETH0
I0323 12:28:13.422261  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:28:13.422272  543705 net.go:698] Add success.
I0323 12:28:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:28:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:28:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 12:28:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:28:14.456599  543705 disk_worker.go:494] system disk:vda1
I0323 12:28:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:28:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:28:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:28:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:28:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:28:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:28:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:28:23.409777  543705 cpu.go:275] no items to output this cycle
I0323 12:28:23.409784  543705 memory.go:184] no items to output this cycle
E0323 12:28:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:28:33.409793  543705 memory.go:184] no items to output this cycle
I0323 12:28:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 12:28:39.921676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:28:39.924197  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:28:39.924203  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e640 0xc00039e680]
E0323 12:28:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:28:43.410718  543705 memory.go:191] Add success.
I0323 12:28:43.409789  543705 cpu.go:282] Add success.
I0323 12:28:43.420425  543705 net.go:648] Add success.
I0323 12:28:43.423473  543705 net.go:770] primary dev: ETH0
I0323 12:28:43.423488  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:28:43.423502  543705 net.go:698] Add success.
I0323 12:28:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:28:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:28:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:28:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:28:53.409770  543705 memory.go:184] no items to output this cycle
I0323 12:28:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 12:29:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:29:03.409775  543705 memory.go:184] no items to output this cycle
I0323 12:29:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 12:29:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:29:13.409810  543705 memory.go:191] Add success.
I0323 12:29:13.409820  543705 cpu.go:282] Add success.
W0323 12:29:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:29:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:29:13.409865  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:29:13.419723  543705 net.go:648] Add success.
I0323 12:29:13.422546  543705 net.go:770] primary dev: ETH0
I0323 12:29:13.422564  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:29:13.422575  543705 net.go:698] Add success.
I0323 12:29:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:29:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:29:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 12:29:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:29:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 12:29:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:29:15.455029  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:29:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:29:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:29:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:29:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:29:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:29:23.409809  543705 memory.go:184] no items to output this cycle
I0323 12:29:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 12:29:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:29:33.409777  543705 memory.go:184] no items to output this cycle
I0323 12:29:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 12:29:39.925671  543705 disk_info.go:125] begin check local disk info of client
I0323 12:29:39.928287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:29:39.928294  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e780 0xc00037e7c0]
E0323 12:29:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:29:43.410911  543705 memory.go:191] Add success.
I0323 12:29:43.409818  543705 cpu.go:282] Add success.
I0323 12:29:43.420601  543705 net.go:648] Add success.
I0323 12:29:43.423243  543705 net.go:770] primary dev: ETH0
I0323 12:29:43.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:29:43.423267  543705 net.go:698] Add success.
I0323 12:29:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:29:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:29:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:29:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:29:53.409782  543705 memory.go:184] no items to output this cycle
I0323 12:29:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 12:30:03.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:30:03.409769  543705 memory.go:184] no items to output this cycle
I0323 12:30:03.409891  543705 cpu.go:275] no items to output this cycle
E0323 12:30:13.409910  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:30:13.409939  543705 cpu.go:282] Add success.
I0323 12:30:13.410080  543705 memory.go:191] Add success.
W0323 12:30:13.410115  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:30:13.410132  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:30:13.410142  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:30:13.419751  543705 net.go:648] Add success.
I0323 12:30:13.422579  543705 net.go:770] primary dev: ETH0
I0323 12:30:13.422592  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:30:13.422604  543705 net.go:698] Add success.
I0323 12:30:13.468464  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ead90146-2091-4cb2-874f-918b1964f9ee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:30:13.468495  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:30:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:30:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:30:14.455150  543705 disk_worker.go:708] disk space is not compliant
W0323 12:30:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:30:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 12:30:14.456615  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:30:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:30:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:30:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:30:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:30:16.472424  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:30:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:30:23.409781  543705 memory.go:184] no items to output this cycle
I0323 12:30:23.409783  543705 cpu.go:275] no items to output this cycle
E0323 12:30:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:30:33.409793  543705 memory.go:184] no items to output this cycle
I0323 12:30:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 12:30:39.929673  543705 disk_info.go:125] begin check local disk info of client
I0323 12:30:39.932209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:30:39.932216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000253880 0xc0002538c0]
I0323 12:30:40.413712  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:30:40.413718  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:30:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:30:43.410664  543705 memory.go:191] Add success.
I0323 12:30:43.409793  543705 cpu.go:282] Add success.
I0323 12:30:43.420356  543705 net.go:648] Add success.
I0323 12:30:43.423089  543705 net.go:770] primary dev: ETH0
I0323 12:30:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:30:43.423116  543705 net.go:698] Add success.
I0323 12:30:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:30:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:30:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:30:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:30:53.409781  543705 memory.go:184] no items to output this cycle
I0323 12:30:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 12:31:03.409863  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:31:03.409883  543705 memory.go:184] no items to output this cycle
I0323 12:31:03.409956  543705 cpu.go:275] no items to output this cycle
E0323 12:31:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:31:13.409796  543705 memory.go:191] Add success.
I0323 12:31:13.409800  543705 cpu.go:282] Add success.
W0323 12:31:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:31:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:31:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:31:13.420137  543705 net.go:648] Add success.
I0323 12:31:13.422850  543705 net.go:770] primary dev: ETH0
I0323 12:31:13.422864  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:31:13.422876  543705 net.go:698] Add success.
I0323 12:31:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:31:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:31:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0323 12:31:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:31:14.456600  543705 disk_worker.go:494] system disk:vda1
I0323 12:31:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:31:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:31:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:31:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:31:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:31:16.472366  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:31:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:31:23.409807  543705 memory.go:184] no items to output this cycle
I0323 12:31:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 12:31:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:31:33.409764  543705 memory.go:184] no items to output this cycle
I0323 12:31:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 12:31:39.933673  543705 disk_info.go:125] begin check local disk info of client
I0323 12:31:39.936204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:31:39.936210  543705 disk_info.go:196] parse disk info done, disk is : [0xc00033f040 0xc00033f080]
E0323 12:31:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:31:43.410675  543705 memory.go:191] Add success.
I0323 12:31:43.409783  543705 cpu.go:282] Add success.
I0323 12:31:43.420458  543705 net.go:648] Add success.
I0323 12:31:43.423135  543705 net.go:770] primary dev: ETH0
I0323 12:31:43.423150  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:31:43.423165  543705 net.go:698] Add success.
I0323 12:31:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:31:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:31:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:31:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:31:53.409776  543705 memory.go:184] no items to output this cycle
I0323 12:31:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 12:32:03.409934  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:32:03.409943  543705 cpu.go:275] no items to output this cycle
I0323 12:32:03.409950  543705 memory.go:184] no items to output this cycle
E0323 12:32:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:32:13.409790  543705 memory.go:191] Add success.
W0323 12:32:13.409815  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 12:32:13.409818  543705 cpu.go:282] Add success.
W0323 12:32:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:32:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:32:13.420198  543705 net.go:648] Add success.
I0323 12:32:13.423111  543705 net.go:770] primary dev: ETH0
I0323 12:32:13.423125  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:32:13.423137  543705 net.go:698] Add success.
W0323 12:32:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:32:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 12:32:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:32:14.456798  543705 disk_worker.go:494] system disk:vda1
I0323 12:32:14.456839  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:32:14.457085  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:32:14.457093  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:32:14.457098  543705 custom_config.go:64] query custom config with name: gpu
E0323 12:32:15.456828  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:32:15.456838  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:32:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:32:16.457981  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:32:16.458025  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:32:16.458041  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:32:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:32:23.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:32:23.409766  543705 memory.go:184] no items to output this cycle
I0323 12:32:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 12:32:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:32:33.409794  543705 memory.go:184] no items to output this cycle
I0323 12:32:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 12:32:39.937672  543705 disk_info.go:125] begin check local disk info of client
I0323 12:32:39.940181  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:32:39.940187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000370200 0xc000370240]
E0323 12:32:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:32:43.410777  543705 memory.go:191] Add success.
I0323 12:32:43.409809  543705 cpu.go:282] Add success.
I0323 12:32:43.420460  543705 net.go:648] Add success.
I0323 12:32:43.424112  543705 net.go:770] primary dev: ETH0
I0323 12:32:43.424127  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:32:43.424141  543705 net.go:698] Add success.
I0323 12:32:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:32:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:32:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:32:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:32:53.409768  543705 memory.go:184] no items to output this cycle
I0323 12:32:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 12:33:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:33:03.409901  543705 memory.go:184] no items to output this cycle
I0323 12:33:03.409935  543705 cpu.go:275] no items to output this cycle
E0323 12:33:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:33:13.409819  543705 memory.go:191] Add success.
I0323 12:33:13.409832  543705 cpu.go:282] Add success.
W0323 12:33:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:33:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:33:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:33:13.420167  543705 net.go:648] Add success.
I0323 12:33:13.423043  543705 net.go:770] primary dev: ETH0
I0323 12:33:13.423058  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:33:13.423072  543705 net.go:698] Add success.
I0323 12:33:13.469568  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c3f91ab3-b69d-4985-9eef-c9ef11c6016d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:33:13.469602  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:33:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:33:14.455103  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:33:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 12:33:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:33:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 12:33:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:33:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:33:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:33:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:33:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:33:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:33:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:33:23.409779  543705 memory.go:184] no items to output this cycle
I0323 12:33:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:33:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:33:33.409793  543705 memory.go:184] no items to output this cycle
I0323 12:33:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 12:33:39.941675  543705 disk_info.go:125] begin check local disk info of client
I0323 12:33:39.944211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:33:39.944217  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027a580 0xc00027a5c0]
I0323 12:33:40.414536  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:33:40.414542  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:33:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:33:43.410766  543705 memory.go:191] Add success.
I0323 12:33:43.409803  543705 cpu.go:282] Add success.
I0323 12:33:43.420475  543705 net.go:648] Add success.
I0323 12:33:43.423144  543705 net.go:770] primary dev: ETH0
I0323 12:33:43.423159  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:33:43.423174  543705 net.go:698] Add success.
I0323 12:33:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:33:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:33:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:33:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:33:53.409795  543705 memory.go:184] no items to output this cycle
I0323 12:33:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 12:34:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:34:03.409776  543705 memory.go:184] no items to output this cycle
I0323 12:34:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 12:34:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:34:13.409934  543705 memory.go:191] Add success.
I0323 12:34:13.409942  543705 cpu.go:282] Add success.
W0323 12:34:13.409968  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:34:13.409988  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:34:13.409993  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:34:13.419738  543705 net.go:648] Add success.
I0323 12:34:13.422414  543705 net.go:770] primary dev: ETH0
I0323 12:34:13.422429  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:34:13.422440  543705 net.go:698] Add success.
I0323 12:34:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:34:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:34:14.455163  543705 disk_worker.go:708] disk space is not compliant
W0323 12:34:14.455166  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:34:14.456509  543705 disk_worker.go:494] system disk:vda1
I0323 12:34:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:34:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:34:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:34:16.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:34:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:34:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:34:23.410727  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:34:23.410743  543705 memory.go:184] no items to output this cycle
I0323 12:34:23.410765  543705 cpu.go:275] no items to output this cycle
E0323 12:34:33.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:34:33.409768  543705 memory.go:184] no items to output this cycle
I0323 12:34:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 12:34:39.945683  543705 disk_info.go:125] begin check local disk info of client
I0323 12:34:39.948207  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:34:39.948213  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b8180 0xc0003b81c0]
E0323 12:34:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:34:43.410634  543705 memory.go:191] Add success.
I0323 12:34:43.409804  543705 cpu.go:282] Add success.
I0323 12:34:43.420310  543705 net.go:648] Add success.
I0323 12:34:43.423206  543705 net.go:770] primary dev: ETH0
I0323 12:34:43.423219  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:34:43.423232  543705 net.go:698] Add success.
I0323 12:34:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:34:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:34:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:34:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:34:53.409777  543705 memory.go:184] no items to output this cycle
I0323 12:34:53.409801  543705 cpu.go:275] no items to output this cycle
I0323 12:35:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 12:35:03.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:35:03.409832  543705 memory.go:184] no items to output this cycle
E0323 12:35:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:35:13.409836  543705 memory.go:191] Add success.
I0323 12:35:13.409841  543705 cpu.go:282] Add success.
W0323 12:35:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:35:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:35:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:35:13.420676  543705 net.go:648] Add success.
I0323 12:35:13.423650  543705 net.go:770] primary dev: ETH0
I0323 12:35:13.423680  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:35:13.423692  543705 net.go:698] Add success.
I0323 12:35:14.453956  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:35:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:35:14.455258  543705 disk_worker.go:708] disk space is not compliant
W0323 12:35:14.455261  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:35:14.456624  543705 disk_worker.go:494] system disk:vda1
I0323 12:35:14.456654  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:35:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:35:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:35:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:35:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:35:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:35:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:35:23.409823  543705 memory.go:184] no items to output this cycle
I0323 12:35:23.409833  543705 cpu.go:275] no items to output this cycle
E0323 12:35:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:35:33.409785  543705 memory.go:184] no items to output this cycle
I0323 12:35:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 12:35:39.949672  543705 disk_info.go:125] begin check local disk info of client
I0323 12:35:39.952257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:35:39.952265  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 12:35:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:35:43.410623  543705 memory.go:191] Add success.
I0323 12:35:43.409796  543705 cpu.go:282] Add success.
I0323 12:35:43.420313  543705 net.go:648] Add success.
I0323 12:35:43.423143  543705 net.go:770] primary dev: ETH0
I0323 12:35:43.423156  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:35:43.423168  543705 net.go:698] Add success.
I0323 12:35:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:35:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:35:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:35:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:35:53.409812  543705 memory.go:184] no items to output this cycle
I0323 12:35:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 12:36:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:36:03.409789  543705 memory.go:184] no items to output this cycle
I0323 12:36:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 12:36:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:36:13.409819  543705 memory.go:191] Add success.
I0323 12:36:13.409825  543705 cpu.go:282] Add success.
W0323 12:36:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:36:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:36:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:36:13.420262  543705 net.go:648] Add success.
I0323 12:36:13.423101  543705 net.go:770] primary dev: ETH0
I0323 12:36:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:36:13.423125  543705 net.go:698] Add success.
I0323 12:36:13.468156  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ebaa31d7-0731-4c23-8e9b-6ca3f2267a19","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:36:13.468188  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:36:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:36:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:36:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 12:36:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:36:14.456623  543705 disk_worker.go:494] system disk:vda1
I0323 12:36:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:36:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:36:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:36:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:36:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:36:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:36:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:36:23.409797  543705 memory.go:184] no items to output this cycle
I0323 12:36:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 12:36:33.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:36:33.409767  543705 memory.go:184] no items to output this cycle
I0323 12:36:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 12:36:39.953676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:36:39.956215  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:36:39.956221  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e9c0 0xc00037ea00]
I0323 12:36:40.415541  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:36:40.415547  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:36:43.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:36:43.410683  543705 memory.go:191] Add success.
I0323 12:36:43.409798  543705 cpu.go:282] Add success.
I0323 12:36:43.420385  543705 net.go:648] Add success.
I0323 12:36:43.423248  543705 net.go:770] primary dev: ETH0
I0323 12:36:43.423261  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:36:43.423272  543705 net.go:698] Add success.
I0323 12:36:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:36:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:36:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:36:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:36:53.409782  543705 memory.go:184] no items to output this cycle
I0323 12:36:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 12:37:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:37:03.409789  543705 cpu.go:275] no items to output this cycle
I0323 12:37:03.409792  543705 memory.go:184] no items to output this cycle
W0323 12:37:13.409711  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:37:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:37:13.409738  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 12:37:13.409826  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:37:13.409837  543705 cpu.go:282] Add success.
I0323 12:37:13.409850  543705 memory.go:191] Add success.
I0323 12:37:13.420217  543705 net.go:648] Add success.
I0323 12:37:13.423171  543705 net.go:770] primary dev: ETH0
I0323 12:37:13.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:37:13.423194  543705 net.go:698] Add success.
I0323 12:37:13.452772  543705 event_worker.go:152] Polling the log file for events...
W0323 12:37:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:37:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 12:37:14.455160  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:37:14.456926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:37:14.456936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:37:14.456942  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:37:14.457016  543705 disk_worker.go:494] system disk:vda1
I0323 12:37:14.457045  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:37:15.456832  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:37:15.456840  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:37:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:37:16.457923  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:37:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:37:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:37:16.472320  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:37:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:37:23.409817  543705 memory.go:184] no items to output this cycle
I0323 12:37:23.409843  543705 cpu.go:275] no items to output this cycle
E0323 12:37:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:37:33.409792  543705 memory.go:184] no items to output this cycle
I0323 12:37:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 12:37:39.957677  543705 disk_info.go:125] begin check local disk info of client
I0323 12:37:39.960237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:37:39.960243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab700 0xc0001ab740]
E0323 12:37:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:37:43.410813  543705 memory.go:191] Add success.
I0323 12:37:43.409814  543705 cpu.go:282] Add success.
I0323 12:37:43.420530  543705 net.go:648] Add success.
I0323 12:37:43.423338  543705 net.go:770] primary dev: ETH0
I0323 12:37:43.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:37:43.423364  543705 net.go:698] Add success.
I0323 12:37:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:37:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:37:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:37:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:37:53.409783  543705 memory.go:184] no items to output this cycle
I0323 12:37:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 12:38:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:38:03.409814  543705 memory.go:184] no items to output this cycle
I0323 12:38:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 12:38:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:38:13.409802  543705 memory.go:191] Add success.
I0323 12:38:13.409805  543705 cpu.go:282] Add success.
W0323 12:38:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:38:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:38:13.420082  543705 net.go:648] Add success.
I0323 12:38:13.422930  543705 net.go:770] primary dev: ETH0
I0323 12:38:13.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:38:13.422957  543705 net.go:698] Add success.
I0323 12:38:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:38:14.455369  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:38:14.455383  543705 disk_worker.go:708] disk space is not compliant
W0323 12:38:14.455515  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:38:14.457484  543705 disk_worker.go:494] system disk:vda1
I0323 12:38:14.457514  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:38:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:38:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:38:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:38:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:38:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:38:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:38:23.409777  543705 memory.go:184] no items to output this cycle
I0323 12:38:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 12:38:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:38:33.409776  543705 memory.go:184] no items to output this cycle
I0323 12:38:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 12:38:39.961674  543705 disk_info.go:125] begin check local disk info of client
I0323 12:38:39.964262  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:38:39.964268  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba00 0xc00007ba40]
E0323 12:38:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:38:43.410721  543705 memory.go:191] Add success.
I0323 12:38:43.409793  543705 cpu.go:282] Add success.
I0323 12:38:43.420460  543705 net.go:648] Add success.
I0323 12:38:43.423418  543705 net.go:770] primary dev: ETH0
I0323 12:38:43.423431  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:38:43.423444  543705 net.go:698] Add success.
I0323 12:38:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:38:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:38:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:38:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:38:53.409804  543705 memory.go:184] no items to output this cycle
I0323 12:38:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 12:39:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:39:03.409810  543705 memory.go:184] no items to output this cycle
I0323 12:39:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 12:39:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:39:13.409786  543705 memory.go:191] Add success.
I0323 12:39:13.409787  543705 cpu.go:282] Add success.
W0323 12:39:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:39:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:39:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:39:13.420109  543705 net.go:648] Add success.
I0323 12:39:13.423005  543705 net.go:770] primary dev: ETH0
I0323 12:39:13.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:39:13.423031  543705 net.go:698] Add success.
I0323 12:39:13.463561  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9b33a4fc-a3d3-4c7b-9163-b231dd946b28","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:39:13.463594  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:39:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:39:14.455267  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:39:14.455278  543705 disk_worker.go:708] disk space is not compliant
W0323 12:39:14.455281  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:39:14.456668  543705 disk_worker.go:494] system disk:vda1
I0323 12:39:14.456711  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:39:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:39:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:39:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:39:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:39:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:39:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:39:23.409774  543705 memory.go:184] no items to output this cycle
I0323 12:39:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 12:39:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:39:33.409778  543705 memory.go:184] no items to output this cycle
I0323 12:39:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 12:39:39.965677  543705 disk_info.go:125] begin check local disk info of client
I0323 12:39:39.968231  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:39:39.968237  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d8640 0xc0004d8680]
I0323 12:39:40.416595  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:39:40.416600  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:39:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:39:43.410661  543705 memory.go:191] Add success.
I0323 12:39:43.409809  543705 cpu.go:282] Add success.
I0323 12:39:43.420408  543705 net.go:648] Add success.
I0323 12:39:43.423286  543705 net.go:770] primary dev: ETH0
I0323 12:39:43.423298  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:39:43.423311  543705 net.go:698] Add success.
I0323 12:39:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:39:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:39:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:39:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:39:53.409799  543705 memory.go:184] no items to output this cycle
I0323 12:39:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 12:40:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:40:03.409805  543705 memory.go:184] no items to output this cycle
I0323 12:40:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 12:40:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:40:13.409791  543705 memory.go:191] Add success.
I0323 12:40:13.409809  543705 cpu.go:282] Add success.
W0323 12:40:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:40:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:40:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:40:13.420185  543705 net.go:648] Add success.
I0323 12:40:13.423139  543705 net.go:770] primary dev: ETH0
I0323 12:40:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:40:13.423166  543705 net.go:698] Add success.
I0323 12:40:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:40:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:40:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 12:40:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:40:14.456841  543705 disk_worker.go:494] system disk:vda1
I0323 12:40:14.456872  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:40:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:40:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:40:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:40:16.458082  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:40:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:40:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:40:23.409761  543705 memory.go:184] no items to output this cycle
I0323 12:40:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 12:40:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:40:33.409761  543705 memory.go:184] no items to output this cycle
I0323 12:40:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 12:40:39.969676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:40:39.972211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:40:39.972217  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5180 0xc0000c51c0]
E0323 12:40:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:40:43.410782  543705 memory.go:191] Add success.
I0323 12:40:43.409820  543705 cpu.go:282] Add success.
I0323 12:40:43.420487  543705 net.go:648] Add success.
I0323 12:40:43.423598  543705 net.go:770] primary dev: ETH0
I0323 12:40:43.423617  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:40:43.423632  543705 net.go:698] Add success.
I0323 12:40:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:40:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:40:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:40:53.410370  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:40:53.410385  543705 memory.go:184] no items to output this cycle
I0323 12:40:53.410386  543705 cpu.go:275] no items to output this cycle
E0323 12:41:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:41:03.409770  543705 memory.go:184] no items to output this cycle
I0323 12:41:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 12:41:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:41:13.409787  543705 memory.go:191] Add success.
I0323 12:41:13.409807  543705 cpu.go:282] Add success.
W0323 12:41:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:41:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:41:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:41:13.420157  543705 net.go:648] Add success.
I0323 12:41:13.423109  543705 net.go:770] primary dev: ETH0
I0323 12:41:13.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:41:13.423134  543705 net.go:698] Add success.
I0323 12:41:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:41:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:41:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 12:41:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:41:14.456704  543705 disk_worker.go:494] system disk:vda1
I0323 12:41:14.456734  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:41:15.455948  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:41:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:41:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:41:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:41:16.472392  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:41:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:41:23.409776  543705 memory.go:184] no items to output this cycle
I0323 12:41:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 12:41:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:41:33.409781  543705 memory.go:184] no items to output this cycle
I0323 12:41:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 12:41:39.973676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:41:39.976230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:41:39.976236  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035b440 0xc00035b480]
E0323 12:41:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:41:43.410748  543705 memory.go:191] Add success.
I0323 12:41:43.409785  543705 cpu.go:282] Add success.
I0323 12:41:43.420437  543705 net.go:648] Add success.
I0323 12:41:43.423119  543705 net.go:770] primary dev: ETH0
I0323 12:41:43.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:41:43.423147  543705 net.go:698] Add success.
I0323 12:41:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:41:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:41:46.458084  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:41:53.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:41:53.409767  543705 memory.go:184] no items to output this cycle
I0323 12:41:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 12:42:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:42:03.409779  543705 memory.go:184] no items to output this cycle
I0323 12:42:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 12:42:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:42:13.409811  543705 memory.go:191] Add success.
I0323 12:42:13.409821  543705 cpu.go:282] Add success.
W0323 12:42:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:42:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:42:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:42:13.420507  543705 net.go:648] Add success.
I0323 12:42:13.423249  543705 net.go:770] primary dev: ETH0
I0323 12:42:13.423263  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:42:13.423276  543705 net.go:698] Add success.
I0323 12:42:13.462733  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"738a4dde-1915-44e2-a411-edfe5cbc85d3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:42:13.462769  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 12:42:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:42:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 12:42:14.455194  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:42:14.456805  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:42:14.456815  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:42:14.456821  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:42:14.457285  543705 disk_worker.go:494] system disk:vda1
I0323 12:42:14.457326  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:42:15.456870  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:42:15.456877  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 12:42:16.457942  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:42:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:42:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:42:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:42:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:42:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:42:23.409793  543705 memory.go:184] no items to output this cycle
I0323 12:42:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 12:42:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:42:33.409775  543705 memory.go:184] no items to output this cycle
I0323 12:42:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 12:42:39.977670  543705 disk_info.go:125] begin check local disk info of client
I0323 12:42:39.980263  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:42:39.980270  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b5380 0xc0004b53c0]
I0323 12:42:40.417549  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:42:40.417554  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:42:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:42:43.410679  543705 memory.go:191] Add success.
I0323 12:42:43.409801  543705 cpu.go:282] Add success.
I0323 12:42:43.420413  543705 net.go:648] Add success.
I0323 12:42:43.423105  543705 net.go:770] primary dev: ETH0
I0323 12:42:43.423119  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:42:43.423133  543705 net.go:698] Add success.
I0323 12:42:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:42:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:42:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:42:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:42:53.409781  543705 memory.go:184] no items to output this cycle
I0323 12:42:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 12:43:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:43:03.409774  543705 memory.go:184] no items to output this cycle
I0323 12:43:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:43:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:43:13.409781  543705 memory.go:191] Add success.
I0323 12:43:13.409799  543705 cpu.go:282] Add success.
W0323 12:43:13.409805  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:43:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:43:13.409819  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:43:13.420197  543705 net.go:648] Add success.
I0323 12:43:13.423276  543705 net.go:770] primary dev: ETH0
I0323 12:43:13.423289  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:43:13.423300  543705 net.go:698] Add success.
I0323 12:43:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:43:14.455105  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:43:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 12:43:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:43:14.456732  543705 disk_worker.go:494] system disk:vda1
I0323 12:43:14.456774  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:43:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:43:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:43:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:43:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:43:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:43:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:43:23.409790  543705 memory.go:184] no items to output this cycle
I0323 12:43:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 12:43:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:43:33.409766  543705 memory.go:184] no items to output this cycle
I0323 12:43:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 12:43:39.981675  543705 disk_info.go:125] begin check local disk info of client
I0323 12:43:39.984241  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:43:39.984247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e280 0xc00037e2c0]
E0323 12:43:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:43:43.410940  543705 memory.go:191] Add success.
I0323 12:43:43.409812  543705 cpu.go:282] Add success.
I0323 12:43:43.420592  543705 net.go:648] Add success.
I0323 12:43:43.423403  543705 net.go:770] primary dev: ETH0
I0323 12:43:43.423417  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:43:43.423432  543705 net.go:698] Add success.
I0323 12:43:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:43:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:43:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:43:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:43:53.409773  543705 memory.go:184] no items to output this cycle
I0323 12:43:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 12:44:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:44:03.409771  543705 memory.go:184] no items to output this cycle
I0323 12:44:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 12:44:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:44:13.409792  543705 memory.go:191] Add success.
I0323 12:44:13.409801  543705 cpu.go:282] Add success.
W0323 12:44:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:44:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:44:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:44:13.420164  543705 net.go:648] Add success.
I0323 12:44:13.423335  543705 net.go:770] primary dev: ETH0
I0323 12:44:13.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:44:13.423360  543705 net.go:698] Add success.
I0323 12:44:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:44:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:44:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 12:44:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:44:14.457073  543705 disk_worker.go:494] system disk:vda1
I0323 12:44:14.457109  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:44:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:44:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:44:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:44:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:44:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:44:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:44:23.409771  543705 memory.go:184] no items to output this cycle
I0323 12:44:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 12:44:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:44:33.409772  543705 memory.go:184] no items to output this cycle
I0323 12:44:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 12:44:39.985676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:44:39.988201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:44:39.988207  543705 disk_info.go:196] parse disk info done, disk is : [0xc000279c80 0xc000279cc0]
E0323 12:44:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:44:43.410835  543705 memory.go:191] Add success.
I0323 12:44:43.409817  543705 cpu.go:282] Add success.
I0323 12:44:43.420537  543705 net.go:648] Add success.
I0323 12:44:43.423692  543705 net.go:770] primary dev: ETH0
I0323 12:44:43.423707  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:44:43.423721  543705 net.go:698] Add success.
I0323 12:44:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:44:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:44:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:44:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:44:53.409774  543705 memory.go:184] no items to output this cycle
I0323 12:44:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 12:45:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:45:03.409765  543705 memory.go:184] no items to output this cycle
I0323 12:45:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 12:45:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:45:13.409818  543705 memory.go:191] Add success.
I0323 12:45:13.409829  543705 cpu.go:282] Add success.
W0323 12:45:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:45:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:45:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:45:13.420155  543705 net.go:648] Add success.
I0323 12:45:13.423175  543705 net.go:770] primary dev: ETH0
I0323 12:45:13.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:45:13.423200  543705 net.go:698] Add success.
I0323 12:45:13.467769  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"84503a10-f6f8-4b56-b4c5-a3ddcd81d478","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:45:13.467803  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:45:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:45:14.455188  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:45:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 12:45:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:45:14.457055  543705 disk_worker.go:494] system disk:vda1
I0323 12:45:14.457083  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:45:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:45:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:45:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:45:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:45:16.472431  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:45:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:45:23.409778  543705 memory.go:184] no items to output this cycle
I0323 12:45:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:45:33.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:45:33.409765  543705 memory.go:184] no items to output this cycle
I0323 12:45:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 12:45:39.989677  543705 disk_info.go:125] begin check local disk info of client
I0323 12:45:39.992234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:45:39.992240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4880 0xc0000c48c0]
I0323 12:45:40.418554  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:45:40.418559  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:45:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:45:43.410637  543705 memory.go:191] Add success.
I0323 12:45:43.409811  543705 cpu.go:282] Add success.
I0323 12:45:43.420389  543705 net.go:648] Add success.
I0323 12:45:43.423022  543705 net.go:770] primary dev: ETH0
I0323 12:45:43.423036  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:45:43.423048  543705 net.go:698] Add success.
I0323 12:45:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:45:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:45:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:45:53.410383  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:45:53.410398  543705 memory.go:184] no items to output this cycle
I0323 12:45:53.410400  543705 cpu.go:275] no items to output this cycle
E0323 12:46:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:46:03.409782  543705 memory.go:184] no items to output this cycle
I0323 12:46:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 12:46:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:46:13.409782  543705 memory.go:191] Add success.
W0323 12:46:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 12:46:13.409809  543705 cpu.go:282] Add success.
W0323 12:46:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:46:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:46:13.420110  543705 net.go:648] Add success.
I0323 12:46:13.423052  543705 net.go:770] primary dev: ETH0
I0323 12:46:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:46:13.423077  543705 net.go:698] Add success.
I0323 12:46:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:46:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:46:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 12:46:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:46:14.456612  543705 disk_worker.go:494] system disk:vda1
I0323 12:46:14.456645  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:46:15.456012  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:46:16.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:46:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:46:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:46:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:46:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:46:23.409790  543705 memory.go:184] no items to output this cycle
I0323 12:46:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 12:46:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:46:33.409773  543705 memory.go:184] no items to output this cycle
I0323 12:46:33.409779  543705 cpu.go:275] no items to output this cycle
I0323 12:46:39.993671  543705 disk_info.go:125] begin check local disk info of client
I0323 12:46:39.996202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:46:39.996209  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab6c0 0xc0001ab700]
E0323 12:46:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:46:43.410690  543705 memory.go:191] Add success.
I0323 12:46:43.409818  543705 cpu.go:282] Add success.
I0323 12:46:43.420376  543705 net.go:648] Add success.
I0323 12:46:43.423260  543705 net.go:770] primary dev: ETH0
I0323 12:46:43.423275  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:46:43.423289  543705 net.go:698] Add success.
I0323 12:46:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:46:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:46:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:46:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:46:53.409792  543705 memory.go:184] no items to output this cycle
I0323 12:46:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 12:47:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:47:03.409780  543705 memory.go:184] no items to output this cycle
I0323 12:47:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 12:47:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:47:13.409811  543705 memory.go:191] Add success.
I0323 12:47:13.409817  543705 cpu.go:282] Add success.
W0323 12:47:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:47:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:47:13.409862  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:47:13.420149  543705 net.go:648] Add success.
I0323 12:47:13.423102  543705 net.go:770] primary dev: ETH0
I0323 12:47:13.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:47:13.423129  543705 net.go:698] Add success.
I0323 12:47:13.453662  543705 event_worker.go:152] Polling the log file for events...
W0323 12:47:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:47:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 12:47:14.455180  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:47:14.456807  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:47:14.456816  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:47:14.456823  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:47:14.456871  543705 disk_worker.go:494] system disk:vda1
I0323 12:47:14.456912  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:47:15.456806  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:47:15.456815  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:47:16.457925  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:47:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:47:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:47:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:47:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:47:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:47:23.409808  543705 memory.go:184] no items to output this cycle
I0323 12:47:23.409814  543705 cpu.go:275] no items to output this cycle
E0323 12:47:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:47:33.409799  543705 memory.go:184] no items to output this cycle
I0323 12:47:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 12:47:39.997671  543705 disk_info.go:125] begin check local disk info of client
I0323 12:47:40.000310  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:47:40.000316  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ad80 0xc00035adc0]
E0323 12:47:43.410162  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:47:43.410189  543705 memory.go:191] Add success.
I0323 12:47:43.410201  543705 cpu.go:282] Add success.
I0323 12:47:43.420302  543705 net.go:648] Add success.
I0323 12:47:43.421270  543705 net.go:770] primary dev: ETH0
I0323 12:47:43.421285  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:47:43.421301  543705 net.go:698] Add success.
I0323 12:47:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:47:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:47:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:47:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:47:53.409783  543705 memory.go:184] no items to output this cycle
I0323 12:47:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 12:48:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:48:03.409794  543705 memory.go:184] no items to output this cycle
I0323 12:48:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 12:48:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:48:13.409822  543705 memory.go:191] Add success.
I0323 12:48:13.409830  543705 cpu.go:282] Add success.
W0323 12:48:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:48:13.412626  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:48:13.412631  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:48:13.420284  543705 net.go:648] Add success.
I0323 12:48:13.422101  543705 net.go:770] primary dev: ETH0
I0323 12:48:13.422116  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:48:13.422129  543705 net.go:698] Add success.
I0323 12:48:13.515547  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f2b314be-d746-4b2a-b8bc-776cedbf1ee4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:48:13.515588  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:48:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:48:14.455192  543705 disk_worker.go:708] disk space is not compliant
W0323 12:48:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:48:14.456546  543705 disk_worker.go:494] system disk:vda1
I0323 12:48:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:48:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:48:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:48:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:48:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:48:16.472496  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:48:23.409778  543705 memory.go:184] no items to output this cycle
I0323 12:48:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 12:48:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:48:33.409793  543705 memory.go:184] no items to output this cycle
I0323 12:48:33.409831  543705 cpu.go:275] no items to output this cycle
I0323 12:48:40.001676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:48:40.004142  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:48:40.004148  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c3c0 0xc00039c400]
I0323 12:48:40.419330  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:48:40.419335  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:48:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:48:43.410670  543705 memory.go:191] Add success.
I0323 12:48:43.409791  543705 cpu.go:282] Add success.
I0323 12:48:43.420371  543705 net.go:648] Add success.
I0323 12:48:43.423625  543705 net.go:770] primary dev: ETH0
I0323 12:48:43.423638  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:48:43.423651  543705 net.go:698] Add success.
I0323 12:48:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:48:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:48:46.458114  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:48:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:48:53.409777  543705 memory.go:184] no items to output this cycle
I0323 12:48:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 12:49:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:49:03.409769  543705 memory.go:184] no items to output this cycle
I0323 12:49:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 12:49:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:49:13.409815  543705 memory.go:191] Add success.
I0323 12:49:13.409818  543705 cpu.go:282] Add success.
W0323 12:49:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:49:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:49:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:49:13.420131  543705 net.go:648] Add success.
I0323 12:49:13.422874  543705 net.go:770] primary dev: ETH0
I0323 12:49:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:49:13.422900  543705 net.go:698] Add success.
I0323 12:49:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:49:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:49:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0323 12:49:14.455161  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:49:14.456500  543705 disk_worker.go:494] system disk:vda1
I0323 12:49:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:49:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:49:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:49:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:49:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:49:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:49:23.409787  543705 memory.go:184] no items to output this cycle
I0323 12:49:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 12:49:33.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:49:33.409765  543705 memory.go:184] no items to output this cycle
I0323 12:49:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 12:49:40.005667  543705 disk_info.go:125] begin check local disk info of client
I0323 12:49:40.008221  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:49:40.008227  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6040 0xc0004a6080]
E0323 12:49:43.409922  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:49:43.410941  543705 memory.go:191] Add success.
I0323 12:49:43.409984  543705 cpu.go:282] Add success.
I0323 12:49:43.419716  543705 net.go:648] Add success.
I0323 12:49:43.422661  543705 net.go:770] primary dev: ETH0
I0323 12:49:43.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:49:43.422685  543705 net.go:698] Add success.
I0323 12:49:46.458073  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:49:46.458148  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:49:46.458176  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:49:53.410269  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:49:53.410290  543705 memory.go:184] no items to output this cycle
I0323 12:49:53.410304  543705 cpu.go:275] no items to output this cycle
E0323 12:50:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:50:03.409768  543705 memory.go:184] no items to output this cycle
I0323 12:50:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 12:50:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:50:13.409826  543705 memory.go:191] Add success.
I0323 12:50:13.409841  543705 cpu.go:282] Add success.
W0323 12:50:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:50:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:50:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:50:13.420166  543705 net.go:648] Add success.
I0323 12:50:13.422910  543705 net.go:770] primary dev: ETH0
I0323 12:50:13.422923  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:50:13.422936  543705 net.go:698] Add success.
I0323 12:50:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:50:14.455130  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:50:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 12:50:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:50:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 12:50:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:50:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:50:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:50:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:50:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:50:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:50:23.409782  543705 memory.go:184] no items to output this cycle
I0323 12:50:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 12:50:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:50:33.409779  543705 memory.go:184] no items to output this cycle
I0323 12:50:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 12:50:40.009674  543705 disk_info.go:125] begin check local disk info of client
I0323 12:50:40.012250  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:50:40.012257  543705 disk_info.go:196] parse disk info done, disk is : [0xc00047b040 0xc00047b080]
E0323 12:50:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:50:43.410604  543705 memory.go:191] Add success.
I0323 12:50:43.409921  543705 cpu.go:282] Add success.
I0323 12:50:43.419725  543705 net.go:648] Add success.
I0323 12:50:43.422448  543705 net.go:770] primary dev: ETH0
I0323 12:50:43.422461  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:50:43.422472  543705 net.go:698] Add success.
I0323 12:50:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:50:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:50:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:50:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:50:53.409791  543705 memory.go:184] no items to output this cycle
I0323 12:50:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 12:51:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:51:03.409775  543705 memory.go:184] no items to output this cycle
I0323 12:51:03.409781  543705 cpu.go:275] no items to output this cycle
E0323 12:51:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:51:13.409807  543705 memory.go:191] Add success.
I0323 12:51:13.409809  543705 cpu.go:282] Add success.
W0323 12:51:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:51:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:51:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:51:13.420185  543705 net.go:648] Add success.
I0323 12:51:13.422768  543705 net.go:770] primary dev: ETH0
I0323 12:51:13.422781  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:51:13.422792  543705 net.go:698] Add success.
I0323 12:51:13.484494  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"98fddcc7-dd10-467d-a87e-581a6374c965","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:51:13.484529  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:51:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:51:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:51:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 12:51:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:51:14.456579  543705 disk_worker.go:494] system disk:vda1
I0323 12:51:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:51:15.455972  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:51:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:51:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:51:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:51:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:51:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:51:23.409799  543705 memory.go:184] no items to output this cycle
I0323 12:51:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 12:51:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:51:33.409809  543705 memory.go:184] no items to output this cycle
I0323 12:51:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 12:51:40.013676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:51:40.016328  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:51:40.016335  543705 disk_info.go:196] parse disk info done, disk is : [0xc000508100 0xc000508140]
I0323 12:51:40.419902  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:51:40.419907  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:51:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:51:43.410823  543705 memory.go:191] Add success.
I0323 12:51:43.409817  543705 cpu.go:282] Add success.
I0323 12:51:43.419759  543705 net.go:648] Add success.
I0323 12:51:43.422500  543705 net.go:770] primary dev: ETH0
I0323 12:51:43.422516  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:51:43.422530  543705 net.go:698] Add success.
I0323 12:51:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:51:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:51:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:51:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:51:53.409795  543705 cpu.go:275] no items to output this cycle
I0323 12:51:53.409801  543705 memory.go:184] no items to output this cycle
E0323 12:52:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:52:03.409796  543705 cpu.go:275] no items to output this cycle
I0323 12:52:03.409801  543705 memory.go:184] no items to output this cycle
E0323 12:52:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:52:13.409825  543705 memory.go:191] Add success.
I0323 12:52:13.409828  543705 cpu.go:282] Add success.
W0323 12:52:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:52:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:52:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:52:13.420090  543705 net.go:648] Add success.
I0323 12:52:13.422947  543705 net.go:770] primary dev: ETH0
I0323 12:52:13.422960  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:52:13.422972  543705 net.go:698] Add success.
W0323 12:52:14.455140  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:52:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 12:52:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:52:14.456860  543705 disk_worker.go:494] system disk:vda1
I0323 12:52:14.456904  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:52:14.457115  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:52:14.457125  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:52:14.457131  543705 custom_config.go:64] query custom config with name: gpu
E0323 12:52:15.456824  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:52:15.456833  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:52:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:52:16.457941  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:52:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:52:16.458002  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:52:16.472360  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:52:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:52:23.409793  543705 memory.go:184] no items to output this cycle
I0323 12:52:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 12:52:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:52:33.409786  543705 memory.go:184] no items to output this cycle
I0323 12:52:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 12:52:40.017676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:52:40.020245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:52:40.020251  543705 disk_info.go:196] parse disk info done, disk is : [0xc000267780 0xc0002677c0]
E0323 12:52:43.409884  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:52:43.410766  543705 memory.go:191] Add success.
I0323 12:52:43.409890  543705 cpu.go:282] Add success.
I0323 12:52:43.419737  543705 net.go:648] Add success.
I0323 12:52:43.422577  543705 net.go:770] primary dev: ETH0
I0323 12:52:43.422589  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:52:43.422601  543705 net.go:698] Add success.
I0323 12:52:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:52:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:52:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:52:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:52:53.409788  543705 memory.go:184] no items to output this cycle
I0323 12:52:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 12:53:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:53:03.409786  543705 memory.go:184] no items to output this cycle
I0323 12:53:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 12:53:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:53:13.409808  543705 memory.go:191] Add success.
I0323 12:53:13.409810  543705 cpu.go:282] Add success.
W0323 12:53:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:53:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:53:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:53:13.420145  543705 net.go:648] Add success.
I0323 12:53:13.422863  543705 net.go:770] primary dev: ETH0
I0323 12:53:13.422876  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:53:13.422889  543705 net.go:698] Add success.
I0323 12:53:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:53:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:53:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 12:53:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:53:14.456564  543705 disk_worker.go:494] system disk:vda1
I0323 12:53:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:53:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:53:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:53:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:53:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:53:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:53:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:53:23.409806  543705 memory.go:184] no items to output this cycle
I0323 12:53:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 12:53:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:53:33.409775  543705 memory.go:184] no items to output this cycle
I0323 12:53:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 12:53:40.021675  543705 disk_info.go:125] begin check local disk info of client
I0323 12:53:40.024222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:53:40.024228  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004da9c0 0xc0004daa00]
E0323 12:53:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:53:43.410599  543705 memory.go:191] Add success.
I0323 12:53:43.409785  543705 cpu.go:282] Add success.
I0323 12:53:43.419734  543705 net.go:648] Add success.
I0323 12:53:43.422403  543705 net.go:770] primary dev: ETH0
I0323 12:53:43.422418  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:53:43.422432  543705 net.go:698] Add success.
I0323 12:53:46.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:53:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:53:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:53:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:53:53.409782  543705 memory.go:184] no items to output this cycle
I0323 12:53:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 12:54:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:54:03.409797  543705 memory.go:184] no items to output this cycle
I0323 12:54:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 12:54:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:54:13.409786  543705 memory.go:191] Add success.
I0323 12:54:13.409787  543705 cpu.go:282] Add success.
W0323 12:54:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:54:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:54:13.409829  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:54:13.420085  543705 net.go:648] Add success.
I0323 12:54:13.422962  543705 net.go:770] primary dev: ETH0
I0323 12:54:13.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:54:13.422988  543705 net.go:698] Add success.
I0323 12:54:13.470176  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"31af0cb7-69df-4075-8e84-6527cbc3c598","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:54:13.470210  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 12:54:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:54:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:54:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 12:54:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:54:14.456562  543705 disk_worker.go:494] system disk:vda1
I0323 12:54:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:54:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:54:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:54:16.472365  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:54:23.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:54:23.409764  543705 memory.go:184] no items to output this cycle
I0323 12:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 12:54:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:54:33.409772  543705 memory.go:184] no items to output this cycle
I0323 12:54:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 12:54:40.025674  543705 disk_info.go:125] begin check local disk info of client
I0323 12:54:40.028227  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:54:40.028233  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ee640 0xc0004ee680]
I0323 12:54:40.420294  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:54:40.420299  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:54:43.409941  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:54:43.410953  543705 memory.go:191] Add success.
I0323 12:54:43.409951  543705 cpu.go:282] Add success.
I0323 12:54:43.419713  543705 net.go:648] Add success.
I0323 12:54:43.422574  543705 net.go:770] primary dev: ETH0
I0323 12:54:43.422587  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:54:43.422599  543705 net.go:698] Add success.
I0323 12:54:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:54:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:54:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:54:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:54:53.409788  543705 memory.go:184] no items to output this cycle
I0323 12:54:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 12:55:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:55:03.409770  543705 memory.go:184] no items to output this cycle
I0323 12:55:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:55:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:55:13.409815  543705 memory.go:191] Add success.
I0323 12:55:13.409824  543705 cpu.go:282] Add success.
W0323 12:55:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:55:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:55:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:55:13.420133  543705 net.go:648] Add success.
I0323 12:55:13.423138  543705 net.go:770] primary dev: ETH0
I0323 12:55:13.423151  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:55:13.423164  543705 net.go:698] Add success.
I0323 12:55:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:55:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:55:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 12:55:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:55:14.456510  543705 disk_worker.go:494] system disk:vda1
I0323 12:55:14.456557  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:55:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:55:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:55:16.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:55:16.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:55:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:55:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:55:23.409825  543705 memory.go:184] no items to output this cycle
I0323 12:55:23.409844  543705 cpu.go:275] no items to output this cycle
E0323 12:55:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:55:33.409778  543705 memory.go:184] no items to output this cycle
I0323 12:55:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 12:55:40.029673  543705 disk_info.go:125] begin check local disk info of client
I0323 12:55:40.032248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:55:40.032254  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481d40 0xc000481d80]
E0323 12:55:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:55:43.410972  543705 memory.go:191] Add success.
I0323 12:55:43.409819  543705 cpu.go:282] Add success.
I0323 12:55:43.420680  543705 net.go:648] Add success.
I0323 12:55:43.423365  543705 net.go:770] primary dev: ETH0
I0323 12:55:43.423379  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:55:43.423391  543705 net.go:698] Add success.
I0323 12:55:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:55:46.458080  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:55:46.458110  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:55:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:55:53.409811  543705 memory.go:184] no items to output this cycle
I0323 12:55:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 12:56:03.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:56:03.409768  543705 memory.go:184] no items to output this cycle
I0323 12:56:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 12:56:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:56:13.409815  543705 memory.go:191] Add success.
I0323 12:56:13.409820  543705 cpu.go:282] Add success.
W0323 12:56:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:56:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:56:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:56:13.420272  543705 net.go:648] Add success.
I0323 12:56:13.423177  543705 net.go:770] primary dev: ETH0
I0323 12:56:13.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:56:13.423206  543705 net.go:698] Add success.
I0323 12:56:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:56:14.455231  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:56:14.455243  543705 disk_worker.go:708] disk space is not compliant
W0323 12:56:14.455246  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:56:14.456655  543705 disk_worker.go:494] system disk:vda1
I0323 12:56:14.456687  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:56:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:56:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:56:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:56:16.472416  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:56:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:56:23.409772  543705 memory.go:184] no items to output this cycle
I0323 12:56:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 12:56:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:56:33.409774  543705 memory.go:184] no items to output this cycle
I0323 12:56:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 12:56:40.033674  543705 disk_info.go:125] begin check local disk info of client
I0323 12:56:40.036208  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:56:40.036214  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001f5c00 0xc0001f5c40]
E0323 12:56:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:56:43.410740  543705 memory.go:191] Add success.
I0323 12:56:43.409811  543705 cpu.go:282] Add success.
I0323 12:56:43.420424  543705 net.go:648] Add success.
I0323 12:56:43.423093  543705 net.go:770] primary dev: ETH0
I0323 12:56:43.423106  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:56:43.423118  543705 net.go:698] Add success.
I0323 12:56:46.458015  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:56:46.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:56:46.458122  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:56:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:56:53.409788  543705 memory.go:184] no items to output this cycle
I0323 12:56:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 12:57:03.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:57:03.410009  543705 memory.go:184] no items to output this cycle
I0323 12:57:03.410067  543705 cpu.go:275] no items to output this cycle
E0323 12:57:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:57:13.409784  543705 memory.go:191] Add success.
W0323 12:57:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 12:57:13.409812  543705 cpu.go:282] Add success.
W0323 12:57:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:57:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:57:13.420151  543705 net.go:648] Add success.
I0323 12:57:13.428758  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 12:57:13.428834  543705 net.go:770] primary dev: ETH0
I0323 12:57:13.428849  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:57:13.428863  543705 net.go:698] Add success.
I0323 12:57:13.453394  543705 event_worker.go:152] Polling the log file for events...
I0323 12:57:13.468722  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9b1574d-361b-4cca-badc-8b082191146d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 12:57:13.468756  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 12:57:14.454690  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:57:14.454704  543705 disk_worker.go:708] disk space is not compliant
W0323 12:57:14.454708  543705 disk_worker.go:728] disk inode is not compliant
E0323 12:57:14.455643  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 12:57:14.455653  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 12:57:14.455659  543705 custom_config.go:64] query custom config with name: gpu
I0323 12:57:14.456436  543705 disk_worker.go:494] system disk:vda1
I0323 12:57:14.456467  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 12:57:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 12:57:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:57:16.457942  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 12:57:16.457951  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 12:57:16.457994  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:57:16.458010  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:57:16.472348  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:57:23.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:57:23.409772  543705 memory.go:184] no items to output this cycle
I0323 12:57:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 12:57:33.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:57:33.409761  543705 memory.go:184] no items to output this cycle
I0323 12:57:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 12:57:40.037675  543705 disk_info.go:125] begin check local disk info of client
I0323 12:57:40.040230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:57:40.040236  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471f00 0xc000471f40]
I0323 12:57:40.421265  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 12:57:40.421280  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 12:57:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:57:43.410655  543705 memory.go:191] Add success.
I0323 12:57:43.409818  543705 cpu.go:282] Add success.
I0323 12:57:43.420615  543705 net.go:648] Add success.
I0323 12:57:43.423559  543705 net.go:770] primary dev: ETH0
I0323 12:57:43.423572  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:57:43.423599  543705 net.go:698] Add success.
I0323 12:57:46.458025  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:57:46.458097  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:57:46.458130  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:57:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:57:53.409795  543705 memory.go:184] no items to output this cycle
I0323 12:57:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 12:58:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:58:03.409796  543705 memory.go:184] no items to output this cycle
I0323 12:58:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 12:58:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:58:13.409781  543705 memory.go:191] Add success.
I0323 12:58:13.409805  543705 cpu.go:282] Add success.
W0323 12:58:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:58:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:58:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:58:13.420112  543705 net.go:648] Add success.
I0323 12:58:13.422863  543705 net.go:770] primary dev: ETH0
I0323 12:58:13.422882  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:58:13.422897  543705 net.go:698] Add success.
I0323 12:58:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:58:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:58:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 12:58:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:58:14.456607  543705 disk_worker.go:494] system disk:vda1
I0323 12:58:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:58:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:58:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:58:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:58:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:58:16.472386  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:58:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:58:23.409778  543705 cpu.go:275] no items to output this cycle
I0323 12:58:23.409788  543705 memory.go:184] no items to output this cycle
E0323 12:58:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:58:33.409801  543705 memory.go:184] no items to output this cycle
I0323 12:58:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 12:58:40.041676  543705 disk_info.go:125] begin check local disk info of client
I0323 12:58:40.044170  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:58:40.044178  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b2f40 0xc0003b2f80]
E0323 12:58:43.409942  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:58:43.410622  543705 memory.go:191] Add success.
I0323 12:58:43.409951  543705 cpu.go:282] Add success.
I0323 12:58:43.419733  543705 net.go:648] Add success.
I0323 12:58:43.422526  543705 net.go:770] primary dev: ETH0
I0323 12:58:43.422541  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:58:43.422554  543705 net.go:698] Add success.
I0323 12:58:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:58:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:58:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:58:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:58:53.409795  543705 memory.go:184] no items to output this cycle
I0323 12:58:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 12:59:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:59:03.409777  543705 memory.go:184] no items to output this cycle
I0323 12:59:03.409782  543705 cpu.go:275] no items to output this cycle
E0323 12:59:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:59:13.409791  543705 cpu.go:282] Add success.
I0323 12:59:13.409793  543705 memory.go:191] Add success.
W0323 12:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 12:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 12:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 12:59:13.420186  543705 net.go:648] Add success.
I0323 12:59:13.422598  543705 net.go:770] primary dev: ETH0
I0323 12:59:13.422611  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:59:13.422623  543705 net.go:698] Add success.
I0323 12:59:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 12:59:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 12:59:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 12:59:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 12:59:14.456648  543705 disk_worker.go:494] system disk:vda1
I0323 12:59:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 12:59:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 12:59:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:59:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:59:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 12:59:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0323 12:59:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:59:23.409786  543705 memory.go:184] no items to output this cycle
I0323 12:59:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 12:59:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:59:33.409787  543705 memory.go:184] no items to output this cycle
I0323 12:59:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 12:59:40.045678  543705 disk_info.go:125] begin check local disk info of client
I0323 12:59:40.048141  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 12:59:40.048147  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b540 0xc00007b580]
E0323 12:59:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:59:43.410998  543705 memory.go:191] Add success.
I0323 12:59:43.409827  543705 cpu.go:282] Add success.
I0323 12:59:43.420713  543705 net.go:648] Add success.
I0323 12:59:43.423726  543705 net.go:770] primary dev: ETH0
I0323 12:59:43.423739  543705 net.go:802] Send network stats successfully!,count is 6
I0323 12:59:43.423750  543705 net.go:698] Add success.
I0323 12:59:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 12:59:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 12:59:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 12:59:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 12:59:53.409785  543705 memory.go:184] no items to output this cycle
I0323 12:59:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 13:00:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:00:03.409794  543705 memory.go:184] no items to output this cycle
I0323 13:00:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 13:00:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:00:13.409781  543705 memory.go:191] Add success.
I0323 13:00:13.409802  543705 cpu.go:282] Add success.
W0323 13:00:13.409807  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:00:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:00:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:00:13.420284  543705 net.go:648] Add success.
I0323 13:00:13.423067  543705 net.go:770] primary dev: ETH0
I0323 13:00:13.423082  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:00:13.423096  543705 net.go:698] Add success.
I0323 13:00:13.468631  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"072bc80c-048a-402f-b6bc-6989acbd688d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:00:13.468661  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:00:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:00:14.455212  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:00:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0323 13:00:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:00:14.456664  543705 disk_worker.go:494] system disk:vda1
I0323 13:00:14.456696  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:00:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:00:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:00:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:00:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:00:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:00:23.409773  543705 memory.go:184] no items to output this cycle
I0323 13:00:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 13:00:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:00:33.409781  543705 memory.go:184] no items to output this cycle
I0323 13:00:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 13:00:40.049676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:00:40.052154  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:00:40.052160  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035b6c0 0xc00035b700]
I0323 13:00:40.422125  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:00:40.422130  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:00:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:00:43.410611  543705 memory.go:191] Add success.
I0323 13:00:43.409803  543705 cpu.go:282] Add success.
I0323 13:00:43.420298  543705 net.go:648] Add success.
I0323 13:00:43.423123  543705 net.go:770] primary dev: ETH0
I0323 13:00:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:00:43.423162  543705 net.go:698] Add success.
I0323 13:00:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:00:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:00:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:00:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:00:53.409795  543705 cpu.go:275] no items to output this cycle
I0323 13:00:53.409797  543705 memory.go:184] no items to output this cycle
E0323 13:01:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:01:03.409792  543705 memory.go:184] no items to output this cycle
I0323 13:01:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 13:01:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:01:13.409798  543705 memory.go:191] Add success.
I0323 13:01:13.409798  543705 cpu.go:282] Add success.
W0323 13:01:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:01:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:01:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:01:13.420492  543705 net.go:648] Add success.
I0323 13:01:13.423350  543705 net.go:770] primary dev: ETH0
I0323 13:01:13.423363  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:01:13.423377  543705 net.go:698] Add success.
I0323 13:01:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:01:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:01:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 13:01:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:01:14.456536  543705 disk_worker.go:494] system disk:vda1
I0323 13:01:14.456580  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:01:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:01:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:01:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:01:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:01:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:01:23.409783  543705 memory.go:184] no items to output this cycle
I0323 13:01:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 13:01:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:01:33.409781  543705 memory.go:184] no items to output this cycle
I0323 13:01:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 13:01:40.053676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:01:40.056253  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:01:40.056261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037edc0 0xc00037ee00]
E0323 13:01:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:01:43.410836  543705 memory.go:191] Add success.
I0323 13:01:43.409825  543705 cpu.go:282] Add success.
I0323 13:01:43.420529  543705 net.go:648] Add success.
I0323 13:01:43.423375  543705 net.go:770] primary dev: ETH0
I0323 13:01:43.423392  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:01:43.423405  543705 net.go:698] Add success.
I0323 13:01:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:01:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:01:46.458062  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:01:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:01:53.409779  543705 memory.go:184] no items to output this cycle
I0323 13:01:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 13:02:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:02:03.409781  543705 memory.go:184] no items to output this cycle
I0323 13:02:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 13:02:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:02:13.409786  543705 memory.go:191] Add success.
W0323 13:02:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 13:02:13.409818  543705 cpu.go:282] Add success.
W0323 13:02:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:02:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:02:13.420267  543705 net.go:648] Add success.
I0323 13:02:13.422876  543705 net.go:770] primary dev: ETH0
I0323 13:02:13.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:02:13.422906  543705 net.go:698] Add success.
W0323 13:02:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:02:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 13:02:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:02:14.456182  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:02:14.456193  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:02:14.456200  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:02:14.457971  543705 disk_worker.go:494] system disk:vda1
I0323 13:02:14.458004  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:02:15.456809  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:02:15.456818  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:02:16.457905  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:02:16.457905  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:02:16.457959  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:02:16.457981  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:02:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:02:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:02:23.409804  543705 memory.go:184] no items to output this cycle
I0323 13:02:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 13:02:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:02:33.409823  543705 memory.go:184] no items to output this cycle
I0323 13:02:33.409829  543705 cpu.go:275] no items to output this cycle
I0323 13:02:40.057684  543705 disk_info.go:125] begin check local disk info of client
I0323 13:02:40.060319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:02:40.060326  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
E0323 13:02:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:02:43.410698  543705 memory.go:191] Add success.
I0323 13:02:43.409817  543705 cpu.go:282] Add success.
I0323 13:02:43.420497  543705 net.go:648] Add success.
I0323 13:02:43.423447  543705 net.go:770] primary dev: ETH0
I0323 13:02:43.423461  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:02:43.423473  543705 net.go:698] Add success.
I0323 13:02:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:02:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:02:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:02:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:02:53.409804  543705 memory.go:184] no items to output this cycle
I0323 13:02:53.409854  543705 cpu.go:275] no items to output this cycle
E0323 13:03:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:03:03.409793  543705 memory.go:184] no items to output this cycle
I0323 13:03:03.409857  543705 cpu.go:275] no items to output this cycle
E0323 13:03:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:03:13.409843  543705 memory.go:191] Add success.
I0323 13:03:13.409869  543705 cpu.go:282] Add success.
W0323 13:03:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:03:13.409906  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:03:13.413049  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:03:13.419750  543705 net.go:648] Add success.
I0323 13:03:13.421591  543705 net.go:770] primary dev: ETH0
I0323 13:03:13.421606  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:03:13.421617  543705 net.go:698] Add success.
I0323 13:03:13.469675  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"97cb80eb-69f7-496c-a9b5-3268c163cba5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:03:13.469710  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:03:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:03:14.455132  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:03:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 13:03:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:03:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 13:03:14.456639  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:03:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:03:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:03:16.458103  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:03:16.458134  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:03:16.472769  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:03:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:03:23.409794  543705 memory.go:184] no items to output this cycle
I0323 13:03:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 13:03:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:03:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 13:03:33.409806  543705 memory.go:184] no items to output this cycle
I0323 13:03:40.061682  543705 disk_info.go:125] begin check local disk info of client
I0323 13:03:40.064257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:03:40.064263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab980 0xc0001ab9c0]
I0323 13:03:40.423176  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:03:40.423181  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:03:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:03:43.410711  543705 memory.go:191] Add success.
I0323 13:03:43.409829  543705 cpu.go:282] Add success.
I0323 13:03:43.420394  543705 net.go:648] Add success.
I0323 13:03:43.423019  543705 net.go:770] primary dev: ETH0
I0323 13:03:43.423032  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:03:43.423045  543705 net.go:698] Add success.
I0323 13:03:46.458029  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:03:46.458116  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:03:46.458154  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:03:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:03:53.409787  543705 memory.go:184] no items to output this cycle
I0323 13:03:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 13:04:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:04:03.409790  543705 memory.go:184] no items to output this cycle
I0323 13:04:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 13:04:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:04:13.409828  543705 memory.go:191] Add success.
I0323 13:04:13.409832  543705 cpu.go:282] Add success.
W0323 13:04:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:04:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:04:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:04:13.420146  543705 net.go:648] Add success.
I0323 13:04:13.422975  543705 net.go:770] primary dev: ETH0
I0323 13:04:13.422990  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:04:13.423001  543705 net.go:698] Add success.
I0323 13:04:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:04:14.455475  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:04:14.455490  543705 disk_worker.go:708] disk space is not compliant
W0323 13:04:14.455495  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:04:14.457765  543705 disk_worker.go:494] system disk:vda1
I0323 13:04:14.457796  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:04:15.455983  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:04:16.458015  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:04:16.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:04:16.458150  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:04:16.472571  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:04:23.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:04:23.409815  543705 memory.go:184] no items to output this cycle
I0323 13:04:23.409825  543705 cpu.go:275] no items to output this cycle
E0323 13:04:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:04:33.409792  543705 memory.go:184] no items to output this cycle
I0323 13:04:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 13:04:40.065678  543705 disk_info.go:125] begin check local disk info of client
I0323 13:04:40.068271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:04:40.068278  543705 disk_info.go:196] parse disk info done, disk is : [0xc000343300 0xc000343340]
E0323 13:04:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:04:43.410633  543705 memory.go:191] Add success.
I0323 13:04:43.409805  543705 cpu.go:282] Add success.
I0323 13:04:43.420335  543705 net.go:648] Add success.
I0323 13:04:43.423230  543705 net.go:770] primary dev: ETH0
I0323 13:04:43.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:04:43.423255  543705 net.go:698] Add success.
I0323 13:04:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:04:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:04:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:04:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:04:53.409776  543705 memory.go:184] no items to output this cycle
I0323 13:04:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 13:05:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:05:03.409788  543705 memory.go:184] no items to output this cycle
I0323 13:05:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:05:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:05:13.409793  543705 memory.go:191] Add success.
I0323 13:05:13.409813  543705 cpu.go:282] Add success.
W0323 13:05:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:05:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:05:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:05:13.420524  543705 net.go:648] Add success.
I0323 13:05:13.423346  543705 net.go:770] primary dev: ETH0
I0323 13:05:13.423359  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:05:13.423372  543705 net.go:698] Add success.
I0323 13:05:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:05:14.455324  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:05:14.455408  543705 disk_worker.go:708] disk space is not compliant
W0323 13:05:14.455413  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:05:14.457051  543705 disk_worker.go:494] system disk:vda1
I0323 13:05:14.457093  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:05:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:05:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:05:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:05:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:05:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:05:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:05:23.409814  543705 memory.go:184] no items to output this cycle
I0323 13:05:23.409830  543705 cpu.go:275] no items to output this cycle
E0323 13:05:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:05:33.409789  543705 memory.go:184] no items to output this cycle
I0323 13:05:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 13:05:40.069680  543705 disk_info.go:125] begin check local disk info of client
I0323 13:05:40.072279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:05:40.072286  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e400 0xc00037e440]
E0323 13:05:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:05:43.410764  543705 memory.go:191] Add success.
I0323 13:05:43.409850  543705 cpu.go:282] Add success.
I0323 13:05:43.420492  543705 net.go:648] Add success.
I0323 13:05:43.423154  543705 net.go:770] primary dev: ETH0
I0323 13:05:43.423172  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:05:43.423187  543705 net.go:698] Add success.
I0323 13:05:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:05:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:05:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:05:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:05:53.409785  543705 memory.go:184] no items to output this cycle
I0323 13:05:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 13:06:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:06:03.409806  543705 memory.go:184] no items to output this cycle
I0323 13:06:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 13:06:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:06:13.409806  543705 memory.go:191] Add success.
I0323 13:06:13.409809  543705 cpu.go:282] Add success.
W0323 13:06:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:06:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:06:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:06:13.420050  543705 net.go:648] Add success.
I0323 13:06:13.423065  543705 net.go:770] primary dev: ETH0
I0323 13:06:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:06:13.423092  543705 net.go:698] Add success.
I0323 13:06:13.831353  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c3b32aac-cd1c-4ce0-91b8-fc55ac4933f4","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:06:13.831387  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:06:14.453987  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:06:14.454648  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:06:14.454964  543705 disk_worker.go:708] disk space is not compliant
W0323 13:06:14.454970  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:06:14.456538  543705 disk_worker.go:494] system disk:vda1
I0323 13:06:14.456571  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:06:15.455614  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:06:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:06:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:06:16.458081  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:06:16.472474  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:06:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:06:23.409795  543705 cpu.go:275] no items to output this cycle
I0323 13:06:23.409796  543705 memory.go:184] no items to output this cycle
E0323 13:06:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:06:33.409781  543705 memory.go:184] no items to output this cycle
I0323 13:06:33.409850  543705 cpu.go:275] no items to output this cycle
I0323 13:06:40.073674  543705 disk_info.go:125] begin check local disk info of client
I0323 13:06:40.076242  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:06:40.076249  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035afc0 0xc00035b000]
I0323 13:06:40.423571  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:06:40.423576  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:06:43.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:06:43.410769  543705 memory.go:191] Add success.
I0323 13:06:43.409860  543705 cpu.go:282] Add success.
I0323 13:06:43.420654  543705 net.go:648] Add success.
I0323 13:06:43.424068  543705 net.go:770] primary dev: ETH0
I0323 13:06:43.424082  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:06:43.424095  543705 net.go:698] Add success.
I0323 13:06:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:06:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:06:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:06:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:06:53.409796  543705 memory.go:184] no items to output this cycle
I0323 13:06:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 13:07:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:07:03.409803  543705 cpu.go:275] no items to output this cycle
I0323 13:07:03.409813  543705 memory.go:184] no items to output this cycle
E0323 13:07:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:07:13.409810  543705 memory.go:191] Add success.
W0323 13:07:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:07:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:07:13.409849  543705 cpu.go:282] Add success.
I0323 13:07:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:07:13.420526  543705 net.go:648] Add success.
I0323 13:07:13.423480  543705 net.go:770] primary dev: ETH0
I0323 13:07:13.423497  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:07:13.423516  543705 net.go:698] Add success.
I0323 13:07:13.453119  543705 event_worker.go:152] Polling the log file for events...
W0323 13:07:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:07:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 13:07:14.455205  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:07:14.456384  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:07:14.456394  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:07:14.456401  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:07:14.456557  543705 disk_worker.go:494] system disk:vda1
I0323 13:07:14.456606  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:07:15.456831  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:07:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:07:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:07:16.457977  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:07:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:07:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:07:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:07:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:07:23.409792  543705 cpu.go:275] no items to output this cycle
I0323 13:07:23.409794  543705 memory.go:184] no items to output this cycle
E0323 13:07:33.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:07:33.409836  543705 memory.go:184] no items to output this cycle
I0323 13:07:33.410004  543705 cpu.go:275] no items to output this cycle
I0323 13:07:40.077684  543705 disk_info.go:125] begin check local disk info of client
I0323 13:07:40.080272  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:07:40.080278  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1040 0xc0003f1080]
E0323 13:07:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:07:43.410660  543705 memory.go:191] Add success.
I0323 13:07:43.409838  543705 cpu.go:282] Add success.
I0323 13:07:43.420548  543705 net.go:648] Add success.
I0323 13:07:43.423184  543705 net.go:770] primary dev: ETH0
I0323 13:07:43.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:07:43.423219  543705 net.go:698] Add success.
I0323 13:07:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:07:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:07:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:07:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:07:53.409818  543705 memory.go:184] no items to output this cycle
I0323 13:07:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 13:08:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:08:03.409780  543705 memory.go:184] no items to output this cycle
I0323 13:08:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 13:08:13.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:08:13.409845  543705 memory.go:191] Add success.
I0323 13:08:13.409856  543705 cpu.go:282] Add success.
W0323 13:08:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:08:13.409893  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:08:13.409897  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:08:13.420762  543705 net.go:648] Add success.
I0323 13:08:13.423826  543705 net.go:770] primary dev: ETH0
I0323 13:08:13.423846  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:08:13.423865  543705 net.go:698] Add success.
I0323 13:08:14.453945  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:08:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:08:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0323 13:08:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:08:14.456565  543705 disk_worker.go:494] system disk:vda1
I0323 13:08:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:08:15.456033  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:08:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:08:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:08:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:08:16.472493  543705 disk_local_worker.go:436] Get disk info: []
I0323 13:08:23.409859  543705 cpu.go:275] no items to output this cycle
E0323 13:08:23.409980  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:08:23.409998  543705 memory.go:184] no items to output this cycle
E0323 13:08:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:08:33.409812  543705 memory.go:184] no items to output this cycle
I0323 13:08:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 13:08:40.085697  543705 disk_info.go:125] begin check local disk info of client
I0323 13:08:40.092621  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:08:40.092630  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab640 0xc0001ab680]
E0323 13:08:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:08:43.410744  543705 memory.go:191] Add success.
I0323 13:08:43.409847  543705 cpu.go:282] Add success.
I0323 13:08:43.420510  543705 net.go:648] Add success.
I0323 13:08:43.423172  543705 net.go:770] primary dev: ETH0
I0323 13:08:43.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:08:43.423199  543705 net.go:698] Add success.
I0323 13:08:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:08:46.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:08:46.458086  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:08:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:08:53.409786  543705 memory.go:184] no items to output this cycle
I0323 13:08:53.409860  543705 cpu.go:275] no items to output this cycle
E0323 13:09:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:09:03.409806  543705 memory.go:184] no items to output this cycle
I0323 13:09:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 13:09:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:09:13.409805  543705 cpu.go:282] Add success.
I0323 13:09:13.409807  543705 memory.go:191] Add success.
W0323 13:09:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:09:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:09:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:09:13.420079  543705 net.go:648] Add success.
I0323 13:09:13.422953  543705 net.go:770] primary dev: ETH0
I0323 13:09:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:09:13.422978  543705 net.go:698] Add success.
I0323 13:09:13.545595  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b462aedc-105b-4185-9679-400b66b19bac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:09:13.545631  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:09:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:09:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:09:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 13:09:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:09:14.457422  543705 disk_worker.go:494] system disk:vda1
I0323 13:09:14.457548  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:09:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:09:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:09:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:09:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:09:16.472515  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:09:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:09:23.409794  543705 memory.go:184] no items to output this cycle
I0323 13:09:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 13:09:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:09:33.409809  543705 memory.go:184] no items to output this cycle
I0323 13:09:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 13:09:40.093684  543705 disk_info.go:125] begin check local disk info of client
I0323 13:09:40.096240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:09:40.096247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a880 0xc00035a8c0]
I0323 13:09:40.424609  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:09:40.424616  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:09:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:09:43.410741  543705 memory.go:191] Add success.
I0323 13:09:43.409822  543705 cpu.go:282] Add success.
I0323 13:09:43.420453  543705 net.go:648] Add success.
I0323 13:09:43.423394  543705 net.go:770] primary dev: ETH0
I0323 13:09:43.423407  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:09:43.423419  543705 net.go:698] Add success.
I0323 13:09:46.458037  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:09:46.458133  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:09:46.458167  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:09:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:09:53.409823  543705 memory.go:184] no items to output this cycle
I0323 13:09:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 13:10:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:10:03.409810  543705 memory.go:184] no items to output this cycle
I0323 13:10:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 13:10:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:10:13.409826  543705 memory.go:191] Add success.
I0323 13:10:13.409851  543705 cpu.go:282] Add success.
W0323 13:10:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:10:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:10:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:10:13.420168  543705 net.go:648] Add success.
I0323 13:10:13.423039  543705 net.go:770] primary dev: ETH0
I0323 13:10:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:10:13.423070  543705 net.go:698] Add success.
I0323 13:10:14.453970  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:10:14.454290  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:10:14.454306  543705 disk_worker.go:708] disk space is not compliant
W0323 13:10:14.454311  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:10:14.456087  543705 disk_worker.go:494] system disk:vda1
I0323 13:10:14.456137  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:10:15.456035  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:10:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:10:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:10:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:10:16.472684  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:10:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:10:23.409793  543705 memory.go:184] no items to output this cycle
I0323 13:10:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 13:10:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:10:33.409785  543705 memory.go:184] no items to output this cycle
I0323 13:10:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 13:10:40.097676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:10:40.100300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:10:40.100307  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481b00 0xc000481b40]
E0323 13:10:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:10:43.410614  543705 memory.go:191] Add success.
I0323 13:10:43.409787  543705 cpu.go:282] Add success.
I0323 13:10:43.420311  543705 net.go:648] Add success.
I0323 13:10:43.423104  543705 net.go:770] primary dev: ETH0
I0323 13:10:43.423116  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:10:43.423128  543705 net.go:698] Add success.
I0323 13:10:46.458017  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:10:46.458105  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:10:46.458133  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:10:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:10:53.409787  543705 memory.go:184] no items to output this cycle
I0323 13:10:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:11:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:11:03.409793  543705 memory.go:184] no items to output this cycle
I0323 13:11:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 13:11:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:11:13.409793  543705 memory.go:191] Add success.
I0323 13:11:13.409808  543705 cpu.go:282] Add success.
W0323 13:11:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:11:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:11:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:11:13.420097  543705 net.go:648] Add success.
I0323 13:11:13.422948  543705 net.go:770] primary dev: ETH0
I0323 13:11:13.422963  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:11:13.422977  543705 net.go:698] Add success.
I0323 13:11:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:11:14.455210  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:11:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 13:11:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:11:14.456673  543705 disk_worker.go:494] system disk:vda1
I0323 13:11:14.456709  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:11:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:11:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:11:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:11:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:11:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:11:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:11:23.409791  543705 memory.go:184] no items to output this cycle
I0323 13:11:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 13:11:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:11:33.409786  543705 memory.go:184] no items to output this cycle
I0323 13:11:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 13:11:40.101680  543705 disk_info.go:125] begin check local disk info of client
I0323 13:11:40.104223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:11:40.104230  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a780 0xc00007a7c0]
E0323 13:11:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:11:43.410818  543705 memory.go:191] Add success.
I0323 13:11:43.409829  543705 cpu.go:282] Add success.
I0323 13:11:43.420684  543705 net.go:648] Add success.
I0323 13:11:43.423557  543705 net.go:770] primary dev: ETH0
I0323 13:11:43.423571  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:11:43.423584  543705 net.go:698] Add success.
I0323 13:11:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:11:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:11:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:11:53.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:11:53.409808  543705 memory.go:184] no items to output this cycle
I0323 13:11:53.409819  543705 cpu.go:275] no items to output this cycle
E0323 13:12:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:12:03.409812  543705 memory.go:184] no items to output this cycle
I0323 13:12:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 13:12:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:12:13.409816  543705 memory.go:191] Add success.
I0323 13:12:13.409823  543705 cpu.go:282] Add success.
W0323 13:12:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:12:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:12:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:12:13.420217  543705 net.go:648] Add success.
I0323 13:12:13.422931  543705 net.go:770] primary dev: ETH0
I0323 13:12:13.422944  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:12:13.422957  543705 net.go:698] Add success.
I0323 13:12:13.464275  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab6b6b89-f70b-4a7e-bef7-c57f13a116c9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:12:13.464309  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 13:12:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:12:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0323 13:12:14.455242  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:12:14.456070  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:12:14.456079  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:12:14.456086  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:12:14.456689  543705 disk_worker.go:494] system disk:vda1
I0323 13:12:14.456819  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:12:15.456900  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:12:15.456912  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:12:16.457947  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:12:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:12:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:12:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:12:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:12:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:12:23.409785  543705 memory.go:184] no items to output this cycle
I0323 13:12:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 13:12:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:12:33.409809  543705 memory.go:184] no items to output this cycle
I0323 13:12:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 13:12:40.105681  543705 disk_info.go:125] begin check local disk info of client
I0323 13:12:40.108289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:12:40.108297  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f500 0xc00037f540]
I0323 13:12:40.424976  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:12:40.424982  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:12:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:12:43.409797  543705 memory.go:191] Add success.
I0323 13:12:43.409809  543705 cpu.go:282] Add success.
I0323 13:12:43.419992  543705 net.go:648] Add success.
I0323 13:12:43.420922  543705 net.go:770] primary dev: ETH0
I0323 13:12:43.420936  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:12:43.420949  543705 net.go:698] Add success.
I0323 13:12:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:12:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:12:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:12:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:12:53.409779  543705 memory.go:184] no items to output this cycle
I0323 13:12:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 13:13:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:13:03.409791  543705 memory.go:184] no items to output this cycle
I0323 13:13:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 13:13:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:13:13.409835  543705 memory.go:191] Add success.
I0323 13:13:13.409840  543705 cpu.go:282] Add success.
W0323 13:13:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:13:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:13:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:13:13.420371  543705 net.go:648] Add success.
I0323 13:13:13.423636  543705 net.go:770] primary dev: ETH0
I0323 13:13:13.423651  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:13:13.423664  543705 net.go:698] Add success.
I0323 13:13:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:13:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:13:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0323 13:13:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:13:14.456665  543705 disk_worker.go:494] system disk:vda1
I0323 13:13:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:13:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:13:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:13:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:13:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:13:16.472408  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:13:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:13:23.409791  543705 memory.go:184] no items to output this cycle
I0323 13:13:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 13:13:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:13:33.409785  543705 memory.go:184] no items to output this cycle
I0323 13:13:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 13:13:40.109680  543705 disk_info.go:125] begin check local disk info of client
I0323 13:13:40.112255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:13:40.112261  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e440 0xc00037e480]
E0323 13:13:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:13:43.410675  543705 memory.go:191] Add success.
I0323 13:13:43.409801  543705 cpu.go:282] Add success.
I0323 13:13:43.420393  543705 net.go:648] Add success.
I0323 13:13:43.423241  543705 net.go:770] primary dev: ETH0
I0323 13:13:43.423256  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:13:43.423270  543705 net.go:698] Add success.
I0323 13:13:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:13:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:13:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:13:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:13:53.409778  543705 memory.go:184] no items to output this cycle
I0323 13:13:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 13:14:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:14:03.409777  543705 memory.go:184] no items to output this cycle
I0323 13:14:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 13:14:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:14:13.409802  543705 memory.go:191] Add success.
I0323 13:14:13.409803  543705 cpu.go:282] Add success.
W0323 13:14:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:14:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:14:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:14:13.420219  543705 net.go:648] Add success.
I0323 13:14:13.422980  543705 net.go:770] primary dev: ETH0
I0323 13:14:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:14:13.423004  543705 net.go:698] Add success.
I0323 13:14:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:14:14.455101  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:14:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 13:14:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:14:14.456520  543705 disk_worker.go:494] system disk:vda1
I0323 13:14:14.456563  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:14:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:14:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:14:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:14:16.472430  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:14:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:14:23.409807  543705 memory.go:184] no items to output this cycle
I0323 13:14:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 13:14:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:14:33.409813  543705 memory.go:184] no items to output this cycle
I0323 13:14:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 13:14:40.113676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:14:40.116243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:14:40.116250  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037fec0 0xc00037ff00]
E0323 13:14:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:14:43.410916  543705 memory.go:191] Add success.
I0323 13:14:43.409815  543705 cpu.go:282] Add success.
I0323 13:14:43.420665  543705 net.go:648] Add success.
I0323 13:14:43.423523  543705 net.go:770] primary dev: ETH0
I0323 13:14:43.423535  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:14:43.423548  543705 net.go:698] Add success.
I0323 13:14:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:14:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:14:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:14:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:14:53.409788  543705 memory.go:184] no items to output this cycle
I0323 13:14:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 13:15:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:15:03.409786  543705 cpu.go:275] no items to output this cycle
I0323 13:15:03.409787  543705 memory.go:184] no items to output this cycle
E0323 13:15:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:15:13.409828  543705 memory.go:191] Add success.
I0323 13:15:13.409834  543705 cpu.go:282] Add success.
W0323 13:15:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:15:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:15:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:15:13.420111  543705 net.go:648] Add success.
I0323 13:15:13.422849  543705 net.go:770] primary dev: ETH0
I0323 13:15:13.422863  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:15:13.422875  543705 net.go:698] Add success.
I0323 13:15:13.464069  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ad04fef8-ddca-4cab-a55c-c3fbb946ab27","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:15:13.464102  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:15:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:15:14.455243  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:15:14.455259  543705 disk_worker.go:708] disk space is not compliant
W0323 13:15:14.455263  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:15:14.457266  543705 disk_worker.go:494] system disk:vda1
I0323 13:15:14.457316  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:15:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:15:16.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:15:16.458101  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:15:16.458135  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:15:16.472618  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:15:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:15:23.409812  543705 memory.go:184] no items to output this cycle
I0323 13:15:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 13:15:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:15:33.409786  543705 memory.go:184] no items to output this cycle
I0323 13:15:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 13:15:40.117684  543705 disk_info.go:125] begin check local disk info of client
I0323 13:15:40.120321  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:15:40.120327  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aafc0 0xc0001ab000]
I0323 13:15:40.425939  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:15:40.425945  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 13:15:43.409813  543705 cpu.go:282] Add success.
E0323 13:15:43.410117  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:15:43.410134  543705 memory.go:191] Add success.
I0323 13:15:43.420345  543705 net.go:648] Add success.
I0323 13:15:43.421470  543705 net.go:770] primary dev: ETH0
I0323 13:15:43.421483  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:15:43.421497  543705 net.go:698] Add success.
I0323 13:15:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:15:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:15:46.458106  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:15:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:15:53.409813  543705 memory.go:184] no items to output this cycle
I0323 13:15:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 13:16:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:16:03.409814  543705 memory.go:184] no items to output this cycle
I0323 13:16:03.409826  543705 cpu.go:275] no items to output this cycle
I0323 13:16:13.409869  543705 cpu.go:282] Add success.
E0323 13:16:13.410224  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:16:13.411720  543705 memory.go:191] Add success.
W0323 13:16:13.411759  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:16:13.411777  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:16:13.411781  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:16:13.420319  543705 net.go:648] Add success.
I0323 13:16:13.423617  543705 net.go:770] primary dev: ETH0
I0323 13:16:13.423636  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:16:13.423653  543705 net.go:698] Add success.
I0323 13:16:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:16:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:16:14.455234  543705 disk_worker.go:708] disk space is not compliant
W0323 13:16:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:16:14.456683  543705 disk_worker.go:494] system disk:vda1
I0323 13:16:14.456717  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:16:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:16:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:16:16.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:16:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:16:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:16:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:16:23.409822  543705 memory.go:184] no items to output this cycle
I0323 13:16:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 13:16:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:16:33.409812  543705 memory.go:184] no items to output this cycle
I0323 13:16:33.409853  543705 cpu.go:275] no items to output this cycle
I0323 13:16:40.121688  543705 disk_info.go:125] begin check local disk info of client
I0323 13:16:40.124306  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:16:40.124313  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c51c0 0xc0000c5200]
E0323 13:16:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:16:43.410928  543705 memory.go:191] Add success.
I0323 13:16:43.409895  543705 cpu.go:282] Add success.
I0323 13:16:43.420756  543705 net.go:648] Add success.
I0323 13:16:43.423641  543705 net.go:770] primary dev: ETH0
I0323 13:16:43.423661  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:16:43.423680  543705 net.go:698] Add success.
I0323 13:16:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:16:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:16:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:16:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:16:53.409827  543705 memory.go:184] no items to output this cycle
I0323 13:16:53.409851  543705 cpu.go:275] no items to output this cycle
E0323 13:17:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:17:03.409784  543705 memory.go:184] no items to output this cycle
I0323 13:17:03.409841  543705 cpu.go:275] no items to output this cycle
E0323 13:17:13.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:17:13.409836  543705 cpu.go:282] Add success.
I0323 13:17:13.409845  543705 memory.go:191] Add success.
W0323 13:17:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:17:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:17:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:17:13.420192  543705 net.go:648] Add success.
I0323 13:17:13.423188  543705 net.go:770] primary dev: ETH0
I0323 13:17:13.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:17:13.423212  543705 net.go:698] Add success.
I0323 13:17:13.452787  543705 event_worker.go:152] Polling the log file for events...
W0323 13:17:14.455208  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:17:14.455311  543705 disk_worker.go:708] disk space is not compliant
W0323 13:17:14.455316  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:17:14.456416  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:17:14.456425  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:17:14.456432  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:17:14.457198  543705 disk_worker.go:494] system disk:vda1
I0323 13:17:14.457230  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:17:15.457083  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:17:15.457098  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:17:16.458097  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:17:16.458158  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:17:16.458176  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:17:16.458202  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:17:16.472572  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:17:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:17:23.409909  543705 cpu.go:275] no items to output this cycle
I0323 13:17:23.409916  543705 memory.go:184] no items to output this cycle
E0323 13:17:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:17:33.409810  543705 memory.go:184] no items to output this cycle
I0323 13:17:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 13:17:40.125685  543705 disk_info.go:125] begin check local disk info of client
I0323 13:17:40.128270  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:17:40.128277  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035be00 0xc00035be40]
E0323 13:17:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:17:43.410652  543705 memory.go:191] Add success.
I0323 13:17:43.409820  543705 cpu.go:282] Add success.
I0323 13:17:43.420372  543705 net.go:648] Add success.
I0323 13:17:43.423181  543705 net.go:770] primary dev: ETH0
I0323 13:17:43.423193  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:17:43.423206  543705 net.go:698] Add success.
I0323 13:17:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:17:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:17:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:17:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:17:53.409795  543705 memory.go:184] no items to output this cycle
I0323 13:17:53.409849  543705 cpu.go:275] no items to output this cycle
E0323 13:18:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:18:03.409784  543705 memory.go:184] no items to output this cycle
I0323 13:18:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 13:18:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:18:13.409828  543705 memory.go:191] Add success.
I0323 13:18:13.409835  543705 cpu.go:282] Add success.
W0323 13:18:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:18:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:18:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:18:13.420168  543705 net.go:648] Add success.
I0323 13:18:13.422807  543705 net.go:770] primary dev: ETH0
I0323 13:18:13.422822  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:18:13.422834  543705 net.go:698] Add success.
I0323 13:18:13.467965  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cd1bbe29-8e59-49b6-8bfc-66129297ad1b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:18:13.468000  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:18:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:18:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:18:14.455182  543705 disk_worker.go:708] disk space is not compliant
W0323 13:18:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:18:14.456661  543705 disk_worker.go:494] system disk:vda1
I0323 13:18:14.456695  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:18:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:18:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:18:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:18:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:18:16.472490  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:18:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:18:23.409863  543705 memory.go:184] no items to output this cycle
I0323 13:18:23.409960  543705 cpu.go:275] no items to output this cycle
E0323 13:18:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:18:33.409813  543705 memory.go:184] no items to output this cycle
I0323 13:18:33.409831  543705 cpu.go:275] no items to output this cycle
I0323 13:18:40.129677  543705 disk_info.go:125] begin check local disk info of client
I0323 13:18:40.132261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:18:40.132267  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037ec40 0xc00037ec80]
I0323 13:18:40.426792  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:18:40.426799  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:18:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:18:43.410692  543705 memory.go:191] Add success.
I0323 13:18:43.409819  543705 cpu.go:282] Add success.
I0323 13:18:43.420427  543705 net.go:648] Add success.
I0323 13:18:43.423184  543705 net.go:770] primary dev: ETH0
I0323 13:18:43.423198  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:18:43.423211  543705 net.go:698] Add success.
I0323 13:18:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:18:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:18:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:18:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:18:53.409785  543705 memory.go:184] no items to output this cycle
I0323 13:18:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 13:19:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:19:03.409794  543705 memory.go:184] no items to output this cycle
I0323 13:19:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 13:19:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:19:13.409835  543705 memory.go:191] Add success.
I0323 13:19:13.409837  543705 cpu.go:282] Add success.
W0323 13:19:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:19:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:19:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:19:13.420194  543705 net.go:648] Add success.
I0323 13:19:13.422758  543705 net.go:770] primary dev: ETH0
I0323 13:19:13.422771  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:19:13.422783  543705 net.go:698] Add success.
I0323 13:19:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:19:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:19:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 13:19:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:19:14.456641  543705 disk_worker.go:494] system disk:vda1
I0323 13:19:14.456675  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:19:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:19:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:19:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:19:16.458075  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:19:16.472438  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:19:23.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:19:23.409932  543705 memory.go:184] no items to output this cycle
I0323 13:19:23.409974  543705 cpu.go:275] no items to output this cycle
E0323 13:19:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:19:33.409780  543705 memory.go:184] no items to output this cycle
I0323 13:19:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 13:19:40.133702  543705 disk_info.go:125] begin check local disk info of client
I0323 13:19:40.136299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:19:40.136306  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 13:19:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:19:43.410736  543705 memory.go:191] Add success.
I0323 13:19:43.409927  543705 cpu.go:282] Add success.
I0323 13:19:43.420655  543705 net.go:648] Add success.
I0323 13:19:43.423566  543705 net.go:770] primary dev: ETH0
I0323 13:19:43.423580  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:19:43.423592  543705 net.go:698] Add success.
I0323 13:19:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:19:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:19:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:19:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:19:53.409799  543705 memory.go:184] no items to output this cycle
I0323 13:19:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 13:20:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:20:03.409799  543705 memory.go:184] no items to output this cycle
I0323 13:20:03.409867  543705 cpu.go:275] no items to output this cycle
I0323 13:20:13.409876  543705 cpu.go:282] Add success.
E0323 13:20:13.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:20:13.410279  543705 memory.go:191] Add success.
W0323 13:20:13.410310  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:20:13.410326  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:20:13.410331  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:20:13.420392  543705 net.go:648] Add success.
I0323 13:20:13.421495  543705 net.go:770] primary dev: ETH0
I0323 13:20:13.421514  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:20:13.421532  543705 net.go:698] Add success.
I0323 13:20:14.453940  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:20:14.455232  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:20:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 13:20:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:20:14.456787  543705 disk_worker.go:494] system disk:vda1
I0323 13:20:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:20:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:20:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:20:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:20:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:20:16.472505  543705 disk_local_worker.go:436] Get disk info: []
I0323 13:20:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 13:20:23.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:20:23.409827  543705 memory.go:184] no items to output this cycle
E0323 13:20:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:20:33.409797  543705 memory.go:184] no items to output this cycle
I0323 13:20:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 13:20:40.137681  543705 disk_info.go:125] begin check local disk info of client
I0323 13:20:40.140371  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:20:40.140379  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b100 0xc00027b140]
I0323 13:20:43.409889  543705 cpu.go:282] Add success.
E0323 13:20:43.410899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:20:43.410922  543705 memory.go:191] Add success.
I0323 13:20:43.420012  543705 net.go:648] Add success.
I0323 13:20:43.421164  543705 net.go:770] primary dev: ETH0
I0323 13:20:43.421184  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:20:43.421204  543705 net.go:698] Add success.
I0323 13:20:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:20:46.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:20:46.458112  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:20:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 13:20:53.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:20:53.409835  543705 memory.go:184] no items to output this cycle
E0323 13:21:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:21:03.409816  543705 memory.go:184] no items to output this cycle
I0323 13:21:03.409825  543705 cpu.go:275] no items to output this cycle
E0323 13:21:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:21:13.409807  543705 memory.go:191] Add success.
W0323 13:21:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:21:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:21:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:21:13.409863  543705 cpu.go:282] Add success.
I0323 13:21:13.420412  543705 net.go:648] Add success.
I0323 13:21:13.421371  543705 net.go:770] primary dev: ETH0
I0323 13:21:13.421384  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:21:13.421396  543705 net.go:698] Add success.
I0323 13:21:13.479585  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af9e25a0-486d-44c5-b758-285a201329c0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:21:13.479619  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:21:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:21:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:21:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0323 13:21:14.455239  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:21:14.456693  543705 disk_worker.go:494] system disk:vda1
I0323 13:21:14.456745  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:21:15.455992  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:21:16.458018  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:21:16.458119  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:21:16.458158  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:21:16.472730  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:21:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:21:23.409798  543705 memory.go:184] no items to output this cycle
I0323 13:21:23.409862  543705 cpu.go:275] no items to output this cycle
E0323 13:21:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:21:33.409789  543705 memory.go:184] no items to output this cycle
I0323 13:21:33.409853  543705 cpu.go:275] no items to output this cycle
I0323 13:21:40.141683  543705 disk_info.go:125] begin check local disk info of client
I0323 13:21:40.144312  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:21:40.144319  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b780 0xc00007b7c0]
I0323 13:21:40.427886  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:21:40.427894  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:21:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:21:43.410631  543705 memory.go:191] Add success.
I0323 13:21:43.409820  543705 cpu.go:282] Add success.
I0323 13:21:43.420335  543705 net.go:648] Add success.
I0323 13:21:43.423095  543705 net.go:770] primary dev: ETH0
I0323 13:21:43.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:21:43.423121  543705 net.go:698] Add success.
I0323 13:21:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:21:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:21:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:21:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:21:53.409809  543705 memory.go:184] no items to output this cycle
I0323 13:21:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 13:22:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:22:03.409819  543705 memory.go:184] no items to output this cycle
I0323 13:22:03.409838  543705 cpu.go:275] no items to output this cycle
E0323 13:22:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:22:13.409815  543705 memory.go:191] Add success.
I0323 13:22:13.409817  543705 cpu.go:282] Add success.
W0323 13:22:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:22:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:22:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:22:13.420048  543705 net.go:648] Add success.
I0323 13:22:13.422897  543705 net.go:770] primary dev: ETH0
I0323 13:22:13.422911  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:22:13.422924  543705 net.go:698] Add success.
W0323 13:22:14.455278  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:22:14.455372  543705 disk_worker.go:708] disk space is not compliant
W0323 13:22:14.455377  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:22:14.457116  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:22:14.457127  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:22:14.457134  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:22:14.458875  543705 disk_worker.go:494] system disk:vda1
I0323 13:22:14.458917  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:22:15.457131  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:22:15.457152  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:22:16.458100  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:22:16.458161  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:22:16.458184  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:22:16.458197  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:22:16.472553  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:22:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:22:23.409809  543705 cpu.go:275] no items to output this cycle
I0323 13:22:23.409836  543705 memory.go:184] no items to output this cycle
I0323 13:22:33.409800  543705 cpu.go:275] no items to output this cycle
E0323 13:22:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:22:33.409821  543705 memory.go:184] no items to output this cycle
I0323 13:22:40.145684  543705 disk_info.go:125] begin check local disk info of client
I0323 13:22:40.148299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:22:40.148307  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5400 0xc0000c5440]
E0323 13:22:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:22:43.410705  543705 memory.go:191] Add success.
I0323 13:22:43.409844  543705 cpu.go:282] Add success.
I0323 13:22:43.420386  543705 net.go:648] Add success.
I0323 13:22:43.423161  543705 net.go:770] primary dev: ETH0
I0323 13:22:43.423174  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:22:43.423187  543705 net.go:698] Add success.
I0323 13:22:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:22:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:22:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:22:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:22:53.409809  543705 memory.go:184] no items to output this cycle
I0323 13:22:53.409844  543705 cpu.go:275] no items to output this cycle
E0323 13:23:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:23:03.409808  543705 cpu.go:275] no items to output this cycle
I0323 13:23:03.409813  543705 memory.go:184] no items to output this cycle
E0323 13:23:13.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:23:13.409841  543705 memory.go:191] Add success.
I0323 13:23:13.409850  543705 cpu.go:282] Add success.
W0323 13:23:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:23:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:23:13.409894  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:23:13.420352  543705 net.go:648] Add success.
I0323 13:23:13.423035  543705 net.go:770] primary dev: ETH0
I0323 13:23:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:23:13.423064  543705 net.go:698] Add success.
I0323 13:23:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:23:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:23:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0323 13:23:14.455237  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:23:14.456954  543705 disk_worker.go:494] system disk:vda1
I0323 13:23:14.456990  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:23:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:23:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:23:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:23:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:23:16.472514  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:23:23.410646  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:23:23.410666  543705 memory.go:184] no items to output this cycle
I0323 13:23:23.410727  543705 cpu.go:275] no items to output this cycle
E0323 13:23:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:23:33.409799  543705 memory.go:184] no items to output this cycle
I0323 13:23:33.409853  543705 cpu.go:275] no items to output this cycle
I0323 13:23:40.149701  543705 disk_info.go:125] begin check local disk info of client
I0323 13:23:40.153420  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:23:40.153429  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab2c0 0xc0001ab300]
E0323 13:23:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:23:43.410627  543705 memory.go:191] Add success.
I0323 13:23:43.409825  543705 cpu.go:282] Add success.
I0323 13:23:43.420581  543705 net.go:648] Add success.
I0323 13:23:43.423402  543705 net.go:770] primary dev: ETH0
I0323 13:23:43.423420  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:23:43.423438  543705 net.go:698] Add success.
I0323 13:23:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:23:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:23:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:23:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:23:53.409787  543705 memory.go:184] no items to output this cycle
I0323 13:23:53.409835  543705 cpu.go:275] no items to output this cycle
E0323 13:24:03.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:24:03.409820  543705 memory.go:184] no items to output this cycle
I0323 13:24:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 13:24:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:24:13.409831  543705 memory.go:191] Add success.
I0323 13:24:13.409839  543705 cpu.go:282] Add success.
W0323 13:24:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:24:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:24:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:24:13.420246  543705 net.go:648] Add success.
I0323 13:24:13.422885  543705 net.go:770] primary dev: ETH0
I0323 13:24:13.422898  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:24:13.422910  543705 net.go:698] Add success.
I0323 13:24:13.464101  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5d7d101e-b790-4ae5-b725-4293357979ab","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:24:13.464134  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:24:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:24:14.455221  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:24:14.455233  543705 disk_worker.go:708] disk space is not compliant
W0323 13:24:14.455236  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:24:14.457167  543705 disk_worker.go:494] system disk:vda1
I0323 13:24:14.457220  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:24:15.456008  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:24:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:24:16.458095  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:24:16.458126  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:24:16.472571  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:24:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:24:23.409787  543705 memory.go:184] no items to output this cycle
I0323 13:24:23.409848  543705 cpu.go:275] no items to output this cycle
E0323 13:24:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:24:33.409813  543705 memory.go:184] no items to output this cycle
I0323 13:24:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 13:24:40.153683  543705 disk_info.go:125] begin check local disk info of client
I0323 13:24:40.156326  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:24:40.156333  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037f680 0xc00037f6c0]
I0323 13:24:40.428959  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:24:40.428964  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:24:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:24:43.410679  543705 memory.go:191] Add success.
I0323 13:24:43.409838  543705 cpu.go:282] Add success.
I0323 13:24:43.420443  543705 net.go:648] Add success.
I0323 13:24:43.422961  543705 net.go:770] primary dev: ETH0
I0323 13:24:43.422976  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:24:43.422990  543705 net.go:698] Add success.
I0323 13:24:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:24:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:24:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:24:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:24:53.409798  543705 memory.go:184] no items to output this cycle
I0323 13:24:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 13:25:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:25:03.409832  543705 memory.go:184] no items to output this cycle
I0323 13:25:03.409836  543705 cpu.go:275] no items to output this cycle
E0323 13:25:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:25:13.409833  543705 memory.go:191] Add success.
I0323 13:25:13.409847  543705 cpu.go:282] Add success.
W0323 13:25:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:25:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:25:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:25:13.420230  543705 net.go:648] Add success.
I0323 13:25:13.422903  543705 net.go:770] primary dev: ETH0
I0323 13:25:13.422916  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:25:13.422928  543705 net.go:698] Add success.
I0323 13:25:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:25:14.455155  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:25:14.455227  543705 disk_worker.go:708] disk space is not compliant
W0323 13:25:14.455230  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:25:14.456947  543705 disk_worker.go:494] system disk:vda1
I0323 13:25:14.456981  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:25:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:25:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:25:16.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:25:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:25:16.472671  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:25:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:25:23.409812  543705 memory.go:184] no items to output this cycle
I0323 13:25:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 13:25:33.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:25:33.409801  543705 memory.go:184] no items to output this cycle
I0323 13:25:33.409869  543705 cpu.go:275] no items to output this cycle
I0323 13:25:40.157686  543705 disk_info.go:125] begin check local disk info of client
I0323 13:25:40.160327  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:25:40.160334  543705 disk_info.go:196] parse disk info done, disk is : [0xc000275b00 0xc000275b40]
E0323 13:25:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:25:43.410666  543705 memory.go:191] Add success.
I0323 13:25:43.409843  543705 cpu.go:282] Add success.
I0323 13:25:43.420355  543705 net.go:648] Add success.
I0323 13:25:43.423109  543705 net.go:770] primary dev: ETH0
I0323 13:25:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:25:43.423135  543705 net.go:698] Add success.
I0323 13:25:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:25:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:25:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:25:53.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:25:53.409814  543705 cpu.go:275] no items to output this cycle
I0323 13:25:53.409828  543705 memory.go:184] no items to output this cycle
E0323 13:26:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:26:03.409798  543705 memory.go:184] no items to output this cycle
I0323 13:26:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 13:26:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:26:13.409806  543705 memory.go:191] Add success.
I0323 13:26:13.409818  543705 cpu.go:282] Add success.
W0323 13:26:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:26:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:26:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:26:13.420142  543705 net.go:648] Add success.
I0323 13:26:13.423005  543705 net.go:770] primary dev: ETH0
I0323 13:26:13.423019  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:26:13.423031  543705 net.go:698] Add success.
I0323 13:26:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:26:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:26:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0323 13:26:14.455235  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:26:14.456612  543705 disk_worker.go:494] system disk:vda1
I0323 13:26:14.456643  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:26:15.455990  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:26:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:26:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:26:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:26:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:26:23.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:26:23.409828  543705 memory.go:184] no items to output this cycle
I0323 13:26:23.409842  543705 cpu.go:275] no items to output this cycle
E0323 13:26:33.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:26:33.409819  543705 memory.go:184] no items to output this cycle
I0323 13:26:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 13:26:40.161682  543705 disk_info.go:125] begin check local disk info of client
I0323 13:26:40.164255  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:26:40.164263  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004de8c0 0xc0004de900]
E0323 13:26:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:26:43.410808  543705 memory.go:191] Add success.
I0323 13:26:43.409846  543705 cpu.go:282] Add success.
I0323 13:26:43.420525  543705 net.go:648] Add success.
I0323 13:26:43.423330  543705 net.go:770] primary dev: ETH0
I0323 13:26:43.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:26:43.423356  543705 net.go:698] Add success.
I0323 13:26:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:26:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:26:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:26:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:26:53.409799  543705 memory.go:184] no items to output this cycle
I0323 13:26:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 13:27:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:27:03.409788  543705 memory.go:184] no items to output this cycle
I0323 13:27:03.409806  543705 cpu.go:275] no items to output this cycle
W0323 13:27:13.409713  543705 conf_downlod.go:84] cant't download conf: Get "": unsupported protocol scheme ""
W0323 13:27:13.409723  543705 conf_downlod.go:89] use old conf
E0323 13:27:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:27:13.409819  543705 memory.go:191] Add success.
I0323 13:27:13.409839  543705 cpu.go:282] Add success.
W0323 13:27:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:27:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:27:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:27:13.420113  543705 net.go:648] Add success.
I0323 13:27:13.428685  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 13:27:13.428756  543705 net.go:770] primary dev: ETH0
I0323 13:27:13.428769  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:27:13.428781  543705 net.go:698] Add success.
I0323 13:27:13.453321  543705 event_worker.go:152] Polling the log file for events...
I0323 13:27:13.463382  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e9644d5a-0ac3-4666-a9d5-e0204b3b3710","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:27:13.463417  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 13:27:14.455285  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:27:14.455303  543705 disk_worker.go:708] disk space is not compliant
W0323 13:27:14.455308  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:27:14.456406  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:27:14.456415  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:27:14.456422  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:27:14.457229  543705 disk_worker.go:494] system disk:vda1
I0323 13:27:14.457261  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:27:15.457125  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:27:15.457140  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:27:16.458133  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:27:16.458210  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:27:16.458235  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:27:16.458281  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:27:16.472793  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:27:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:27:23.409789  543705 memory.go:184] no items to output this cycle
I0323 13:27:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 13:27:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:27:33.409793  543705 memory.go:184] no items to output this cycle
I0323 13:27:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 13:27:40.165701  543705 disk_info.go:125] begin check local disk info of client
I0323 13:27:40.168328  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:27:40.168336  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e500 0xc00037e540]
I0323 13:27:40.429701  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:27:40.429708  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:27:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:27:43.410712  543705 memory.go:191] Add success.
I0323 13:27:43.409840  543705 cpu.go:282] Add success.
I0323 13:27:43.420441  543705 net.go:648] Add success.
I0323 13:27:43.423299  543705 net.go:770] primary dev: ETH0
I0323 13:27:43.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:27:43.423326  543705 net.go:698] Add success.
I0323 13:27:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:27:46.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:27:46.458111  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:27:53.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:27:53.409833  543705 memory.go:184] no items to output this cycle
I0323 13:27:53.409849  543705 cpu.go:275] no items to output this cycle
E0323 13:28:03.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:28:03.409837  543705 memory.go:184] no items to output this cycle
I0323 13:28:03.409877  543705 cpu.go:275] no items to output this cycle
E0323 13:28:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:28:13.409792  543705 memory.go:191] Add success.
W0323 13:28:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:28:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:28:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:28:13.409854  543705 cpu.go:282] Add success.
I0323 13:28:13.420341  543705 net.go:648] Add success.
I0323 13:28:13.421252  543705 net.go:770] primary dev: ETH0
I0323 13:28:13.421265  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:28:13.421277  543705 net.go:698] Add success.
I0323 13:28:14.454998  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:28:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:28:14.455297  543705 disk_worker.go:708] disk space is not compliant
W0323 13:28:14.455301  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:28:14.457260  543705 disk_worker.go:494] system disk:vda1
I0323 13:28:14.457309  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:28:15.456040  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:28:16.458028  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:28:16.458134  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:28:16.458177  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:28:16.472562  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:28:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:28:23.409799  543705 memory.go:184] no items to output this cycle
I0323 13:28:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 13:28:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:28:33.409809  543705 memory.go:184] no items to output this cycle
I0323 13:28:33.409842  543705 cpu.go:275] no items to output this cycle
I0323 13:28:40.169680  543705 disk_info.go:125] begin check local disk info of client
I0323 13:28:40.172297  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:28:40.172304  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035b0c0 0xc00035b100]
E0323 13:28:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:28:43.410768  543705 memory.go:191] Add success.
I0323 13:28:43.409804  543705 cpu.go:282] Add success.
I0323 13:28:43.420490  543705 net.go:648] Add success.
I0323 13:28:43.423230  543705 net.go:770] primary dev: ETH0
I0323 13:28:43.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:28:43.423255  543705 net.go:698] Add success.
I0323 13:28:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:28:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:28:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:28:53.409809  543705 memory.go:184] no items to output this cycle
I0323 13:28:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 13:29:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:29:03.409798  543705 memory.go:184] no items to output this cycle
I0323 13:29:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 13:29:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:29:13.409835  543705 memory.go:191] Add success.
I0323 13:29:13.409845  543705 cpu.go:282] Add success.
W0323 13:29:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:29:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:29:13.409881  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:29:13.420195  543705 net.go:648] Add success.
I0323 13:29:13.423058  543705 net.go:770] primary dev: ETH0
I0323 13:29:13.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:29:13.423083  543705 net.go:698] Add success.
I0323 13:29:14.453977  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:29:14.454344  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:29:14.454441  543705 disk_worker.go:708] disk space is not compliant
W0323 13:29:14.454446  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:29:14.457785  543705 disk_worker.go:494] system disk:vda1
I0323 13:29:14.457831  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:29:15.456001  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:29:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:29:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:29:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:29:16.472562  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:29:23.410049  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:29:23.410089  543705 cpu.go:275] no items to output this cycle
I0323 13:29:23.410456  543705 memory.go:184] no items to output this cycle
E0323 13:29:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:29:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 13:29:33.409827  543705 memory.go:184] no items to output this cycle
I0323 13:29:40.173681  543705 disk_info.go:125] begin check local disk info of client
I0323 13:29:40.176280  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:29:40.176287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bce80 0xc0002bcec0]
E0323 13:29:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:29:43.410714  543705 memory.go:191] Add success.
I0323 13:29:43.409819  543705 cpu.go:282] Add success.
I0323 13:29:43.420430  543705 net.go:648] Add success.
I0323 13:29:43.423367  543705 net.go:770] primary dev: ETH0
I0323 13:29:43.423384  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:29:43.423397  543705 net.go:698] Add success.
I0323 13:29:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:29:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:29:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:29:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:29:53.409794  543705 memory.go:184] no items to output this cycle
I0323 13:29:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 13:30:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:30:03.409812  543705 memory.go:184] no items to output this cycle
I0323 13:30:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 13:30:13.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:30:13.409828  543705 cpu.go:282] Add success.
I0323 13:30:13.409840  543705 memory.go:191] Add success.
W0323 13:30:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:30:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:30:13.409894  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:30:13.420188  543705 net.go:648] Add success.
I0323 13:30:13.422986  543705 net.go:770] primary dev: ETH0
I0323 13:30:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:30:13.423010  543705 net.go:698] Add success.
I0323 13:30:13.468793  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bcbddbc5-c1f9-4cc4-b9ba-40ec61164f3b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:30:13.468836  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:30:14.453950  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:30:14.455260  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:30:14.455275  543705 disk_worker.go:708] disk space is not compliant
W0323 13:30:14.455283  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:30:14.457269  543705 disk_worker.go:494] system disk:vda1
I0323 13:30:14.457318  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:30:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:30:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:30:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:30:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:30:16.472565  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:30:23.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:30:23.409924  543705 memory.go:184] no items to output this cycle
I0323 13:30:23.409925  543705 cpu.go:275] no items to output this cycle
E0323 13:30:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:30:33.409815  543705 memory.go:184] no items to output this cycle
I0323 13:30:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 13:30:40.177675  543705 disk_info.go:125] begin check local disk info of client
I0323 13:30:40.180268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:30:40.180275  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f40 0xc00037e040]
I0323 13:30:40.430644  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:30:40.430649  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:30:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:30:43.410673  543705 memory.go:191] Add success.
I0323 13:30:43.409786  543705 cpu.go:282] Add success.
I0323 13:30:43.420404  543705 net.go:648] Add success.
I0323 13:30:43.423484  543705 net.go:770] primary dev: ETH0
I0323 13:30:43.423497  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:30:43.423510  543705 net.go:698] Add success.
I0323 13:30:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:30:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:30:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:30:53.410358  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:30:53.410379  543705 memory.go:184] no items to output this cycle
I0323 13:30:53.410383  543705 cpu.go:275] no items to output this cycle
E0323 13:31:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:31:03.409796  543705 memory.go:184] no items to output this cycle
I0323 13:31:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 13:31:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:31:13.409797  543705 memory.go:191] Add success.
I0323 13:31:13.409798  543705 cpu.go:282] Add success.
W0323 13:31:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:31:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:31:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:31:13.420126  543705 net.go:648] Add success.
I0323 13:31:13.423176  543705 net.go:770] primary dev: ETH0
I0323 13:31:13.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:31:13.423201  543705 net.go:698] Add success.
I0323 13:31:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:31:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:31:14.455173  543705 disk_worker.go:708] disk space is not compliant
W0323 13:31:14.455176  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:31:14.456537  543705 disk_worker.go:494] system disk:vda1
I0323 13:31:14.456585  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:31:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:31:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:31:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:31:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:31:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:31:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:31:23.409788  543705 memory.go:184] no items to output this cycle
I0323 13:31:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 13:31:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:31:33.409785  543705 memory.go:184] no items to output this cycle
I0323 13:31:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 13:31:40.181674  543705 disk_info.go:125] begin check local disk info of client
I0323 13:31:40.184289  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:31:40.184296  543705 disk_info.go:196] parse disk info done, disk is : [0xc00037e840 0xc00037e880]
E0323 13:31:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:31:43.410686  543705 memory.go:191] Add success.
I0323 13:31:43.409836  543705 cpu.go:282] Add success.
I0323 13:31:43.420385  543705 net.go:648] Add success.
I0323 13:31:43.423021  543705 net.go:770] primary dev: ETH0
I0323 13:31:43.423035  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:31:43.423047  543705 net.go:698] Add success.
I0323 13:31:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:31:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:31:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:31:53.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:31:53.409816  543705 memory.go:184] no items to output this cycle
I0323 13:31:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 13:32:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:32:03.409789  543705 memory.go:184] no items to output this cycle
I0323 13:32:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 13:32:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:32:13.409797  543705 cpu.go:282] Add success.
I0323 13:32:13.409803  543705 memory.go:191] Add success.
W0323 13:32:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:32:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:32:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:32:13.420055  543705 net.go:648] Add success.
I0323 13:32:13.422821  543705 net.go:770] primary dev: ETH0
I0323 13:32:13.422835  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:32:13.422847  543705 net.go:698] Add success.
W0323 13:32:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:32:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 13:32:14.455187  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:32:14.456820  543705 disk_worker.go:494] system disk:vda1
I0323 13:32:14.456858  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:32:14.457021  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:32:14.457030  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:32:14.457035  543705 custom_config.go:64] query custom config with name: gpu
E0323 13:32:15.456911  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:32:15.456925  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:32:16.458056  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:32:16.458068  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:32:16.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:32:16.458131  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:32:16.472501  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:32:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:32:23.409791  543705 memory.go:184] no items to output this cycle
I0323 13:32:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:32:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:32:33.409788  543705 memory.go:184] no items to output this cycle
I0323 13:32:33.409794  543705 cpu.go:275] no items to output this cycle
I0323 13:32:40.185681  543705 disk_info.go:125] begin check local disk info of client
I0323 13:32:40.188342  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:32:40.188351  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035a480 0xc00035a4c0]
E0323 13:32:43.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:32:43.410748  543705 memory.go:191] Add success.
I0323 13:32:43.409835  543705 cpu.go:282] Add success.
I0323 13:32:43.420550  543705 net.go:648] Add success.
I0323 13:32:43.423186  543705 net.go:770] primary dev: ETH0
I0323 13:32:43.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:32:43.423212  543705 net.go:698] Add success.
I0323 13:32:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:32:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:32:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:32:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:32:53.409826  543705 memory.go:184] no items to output this cycle
I0323 13:32:53.409844  543705 cpu.go:275] no items to output this cycle
E0323 13:33:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:33:03.409803  543705 memory.go:184] no items to output this cycle
I0323 13:33:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 13:33:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:33:13.409812  543705 memory.go:191] Add success.
I0323 13:33:13.409817  543705 cpu.go:282] Add success.
W0323 13:33:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:33:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:33:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:33:13.420057  543705 net.go:648] Add success.
I0323 13:33:13.422897  543705 net.go:770] primary dev: ETH0
I0323 13:33:13.422910  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:33:13.422923  543705 net.go:698] Add success.
I0323 13:33:13.468066  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"af02b77c-9fd5-4595-aa33-4ffc8122be08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:33:13.468109  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:33:14.454997  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:33:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:33:14.455242  543705 disk_worker.go:708] disk space is not compliant
W0323 13:33:14.455245  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:33:14.456967  543705 disk_worker.go:494] system disk:vda1
I0323 13:33:14.457000  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:33:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:33:16.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:33:16.458089  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:33:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:33:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:33:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:33:23.409817  543705 memory.go:184] no items to output this cycle
I0323 13:33:23.409826  543705 cpu.go:275] no items to output this cycle
E0323 13:33:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:33:33.409833  543705 memory.go:184] no items to output this cycle
I0323 13:33:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 13:33:40.189691  543705 disk_info.go:125] begin check local disk info of client
I0323 13:33:40.192315  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:33:40.192323  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ff2c0 0xc0003ff300]
I0323 13:33:40.431624  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:33:40.431629  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:33:43.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:33:43.410599  543705 memory.go:191] Add success.
I0323 13:33:43.409831  543705 cpu.go:282] Add success.
I0323 13:33:43.420305  543705 net.go:648] Add success.
I0323 13:33:43.422880  543705 net.go:770] primary dev: ETH0
I0323 13:33:43.422892  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:33:43.422905  543705 net.go:698] Add success.
I0323 13:33:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:33:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:33:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:33:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:33:53.409800  543705 memory.go:184] no items to output this cycle
I0323 13:33:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 13:34:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:34:03.409805  543705 memory.go:184] no items to output this cycle
I0323 13:34:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 13:34:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:34:13.409804  543705 memory.go:191] Add success.
W0323 13:34:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 13:34:13.409833  543705 cpu.go:282] Add success.
W0323 13:34:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:34:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:34:13.420134  543705 net.go:648] Add success.
I0323 13:34:13.423075  543705 net.go:770] primary dev: ETH0
I0323 13:34:13.423088  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:34:13.423100  543705 net.go:698] Add success.
I0323 13:34:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:34:14.455296  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:34:14.455371  543705 disk_worker.go:708] disk space is not compliant
W0323 13:34:14.455378  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:34:14.456776  543705 disk_worker.go:494] system disk:vda1
I0323 13:34:14.456831  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:34:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:34:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:34:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:34:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:34:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:34:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:34:23.409788  543705 memory.go:184] no items to output this cycle
I0323 13:34:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 13:34:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:34:33.409784  543705 memory.go:184] no items to output this cycle
I0323 13:34:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 13:34:40.193676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:34:40.196282  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:34:40.196288  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5ec0 0xc0000c5f00]
E0323 13:34:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:34:43.410649  543705 memory.go:191] Add success.
I0323 13:34:43.409810  543705 cpu.go:282] Add success.
I0323 13:34:43.420397  543705 net.go:648] Add success.
I0323 13:34:43.423086  543705 net.go:770] primary dev: ETH0
I0323 13:34:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:34:43.423112  543705 net.go:698] Add success.
I0323 13:34:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:34:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:34:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:34:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:34:53.409781  543705 memory.go:184] no items to output this cycle
I0323 13:34:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 13:35:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:35:03.409786  543705 memory.go:184] no items to output this cycle
I0323 13:35:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 13:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:35:13.409793  543705 memory.go:191] Add success.
I0323 13:35:13.409807  543705 cpu.go:282] Add success.
W0323 13:35:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:35:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:35:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:35:13.420097  543705 net.go:648] Add success.
I0323 13:35:13.422989  543705 net.go:770] primary dev: ETH0
I0323 13:35:13.423004  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:35:13.423016  543705 net.go:698] Add success.
I0323 13:35:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:35:14.455322  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:35:14.455337  543705 disk_worker.go:708] disk space is not compliant
W0323 13:35:14.455341  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:35:14.457142  543705 disk_worker.go:494] system disk:vda1
I0323 13:35:14.457174  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:35:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:35:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:35:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:35:16.472529  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:35:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:35:23.409813  543705 memory.go:184] no items to output this cycle
I0323 13:35:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 13:35:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:35:33.409814  543705 memory.go:184] no items to output this cycle
I0323 13:35:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 13:35:40.197683  543705 disk_info.go:125] begin check local disk info of client
I0323 13:35:40.200331  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:35:40.200338  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b9c0 0xc00007ba00]
E0323 13:35:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:35:43.409822  543705 memory.go:191] Add success.
I0323 13:35:43.409832  543705 cpu.go:282] Add success.
I0323 13:35:43.420108  543705 net.go:648] Add success.
I0323 13:35:43.421074  543705 net.go:770] primary dev: ETH0
I0323 13:35:43.421090  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:35:43.421104  543705 net.go:698] Add success.
I0323 13:35:46.458010  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:35:46.458106  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:35:46.458141  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:35:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:35:53.409779  543705 memory.go:184] no items to output this cycle
I0323 13:35:53.409843  543705 cpu.go:275] no items to output this cycle
I0323 13:36:03.409808  543705 cpu.go:275] no items to output this cycle
E0323 13:36:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:36:03.409828  543705 memory.go:184] no items to output this cycle
E0323 13:36:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:36:13.409825  543705 memory.go:191] Add success.
I0323 13:36:13.409830  543705 cpu.go:282] Add success.
W0323 13:36:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:36:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:36:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:36:13.420070  543705 net.go:648] Add success.
I0323 13:36:13.423057  543705 net.go:770] primary dev: ETH0
I0323 13:36:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:36:13.423086  543705 net.go:698] Add success.
I0323 13:36:13.469180  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"da549347-29bd-46e0-9c1e-3008f3edaf5e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:36:13.469269  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 13:36:14.455220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:36:14.455322  543705 disk_worker.go:708] disk space is not compliant
W0323 13:36:14.455327  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:36:14.455802  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:36:14.458114  543705 disk_worker.go:494] system disk:vda1
I0323 13:36:14.458163  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:36:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:36:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:36:16.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:36:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:36:16.472535  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:36:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:36:23.409786  543705 memory.go:184] no items to output this cycle
I0323 13:36:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 13:36:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:36:33.409812  543705 memory.go:184] no items to output this cycle
I0323 13:36:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 13:36:40.201684  543705 disk_info.go:125] begin check local disk info of client
I0323 13:36:40.204327  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:36:40.204334  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039a340 0xc00039a380]
I0323 13:36:40.431969  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:36:40.431974  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:36:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:36:43.410774  543705 memory.go:191] Add success.
I0323 13:36:43.409828  543705 cpu.go:282] Add success.
I0323 13:36:43.420560  543705 net.go:648] Add success.
I0323 13:36:43.423211  543705 net.go:770] primary dev: ETH0
I0323 13:36:43.423224  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:36:43.423238  543705 net.go:698] Add success.
I0323 13:36:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:36:46.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:36:46.458125  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:36:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:36:53.409786  543705 memory.go:184] no items to output this cycle
I0323 13:36:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 13:37:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:37:03.409806  543705 memory.go:184] no items to output this cycle
I0323 13:37:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 13:37:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:37:13.409794  543705 memory.go:191] Add success.
I0323 13:37:13.409794  543705 cpu.go:282] Add success.
W0323 13:37:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:37:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:37:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:37:13.420112  543705 net.go:648] Add success.
I0323 13:37:13.422865  543705 net.go:770] primary dev: ETH0
I0323 13:37:13.422878  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:37:13.422889  543705 net.go:698] Add success.
I0323 13:37:13.452772  543705 event_worker.go:152] Polling the log file for events...
W0323 13:37:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:37:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 13:37:14.455197  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:37:14.455938  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:37:14.455946  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:37:14.455952  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:37:14.456565  543705 disk_worker.go:494] system disk:vda1
I0323 13:37:14.456594  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:37:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:37:15.456810  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:37:16.457907  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:37:16.457907  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:37:16.457961  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:37:16.457982  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:37:16.472301  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:37:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:37:23.409787  543705 cpu.go:275] no items to output this cycle
I0323 13:37:23.409789  543705 memory.go:184] no items to output this cycle
E0323 13:37:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:37:33.409790  543705 cpu.go:275] no items to output this cycle
I0323 13:37:33.409797  543705 memory.go:184] no items to output this cycle
I0323 13:37:40.205677  543705 disk_info.go:125] begin check local disk info of client
I0323 13:37:40.208239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:37:40.208245  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b600 0xc00007b640]
E0323 13:37:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:37:43.410676  543705 memory.go:191] Add success.
I0323 13:37:43.409806  543705 cpu.go:282] Add success.
I0323 13:37:43.420373  543705 net.go:648] Add success.
I0323 13:37:43.423189  543705 net.go:770] primary dev: ETH0
I0323 13:37:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:37:43.423214  543705 net.go:698] Add success.
I0323 13:37:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:37:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:37:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:37:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:37:53.409814  543705 memory.go:184] no items to output this cycle
I0323 13:37:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 13:38:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:38:03.409792  543705 memory.go:184] no items to output this cycle
I0323 13:38:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 13:38:13.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:38:13.409842  543705 memory.go:191] Add success.
W0323 13:38:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:38:13.409892  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:38:13.409896  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:38:13.409813  543705 cpu.go:282] Add success.
I0323 13:38:13.420299  543705 net.go:648] Add success.
I0323 13:38:13.421408  543705 net.go:770] primary dev: ETH0
I0323 13:38:13.421421  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:38:13.421434  543705 net.go:698] Add success.
I0323 13:38:14.453965  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:38:14.455274  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:38:14.455286  543705 disk_worker.go:708] disk space is not compliant
W0323 13:38:14.455289  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:38:14.456704  543705 disk_worker.go:494] system disk:vda1
I0323 13:38:14.456735  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:38:15.456017  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:38:16.458013  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:38:16.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:38:16.458126  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:38:16.472577  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:38:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:38:23.409817  543705 memory.go:184] no items to output this cycle
I0323 13:38:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 13:38:33.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:38:33.409821  543705 memory.go:184] no items to output this cycle
I0323 13:38:33.409834  543705 cpu.go:275] no items to output this cycle
I0323 13:38:40.209677  543705 disk_info.go:125] begin check local disk info of client
I0323 13:38:40.212268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:38:40.212274  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035bd00 0xc00035bd40]
E0323 13:38:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:38:43.410604  543705 memory.go:191] Add success.
I0323 13:38:43.409815  543705 cpu.go:282] Add success.
I0323 13:38:43.420285  543705 net.go:648] Add success.
I0323 13:38:43.422970  543705 net.go:770] primary dev: ETH0
I0323 13:38:43.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:38:43.422995  543705 net.go:698] Add success.
I0323 13:38:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:38:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:38:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:38:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:38:53.409817  543705 memory.go:184] no items to output this cycle
I0323 13:38:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 13:39:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:39:03.409784  543705 memory.go:184] no items to output this cycle
I0323 13:39:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 13:39:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:39:13.409826  543705 memory.go:191] Add success.
I0323 13:39:13.409829  543705 cpu.go:282] Add success.
W0323 13:39:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:39:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:39:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:39:13.420267  543705 net.go:648] Add success.
I0323 13:39:13.423583  543705 net.go:770] primary dev: ETH0
I0323 13:39:13.423596  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:39:13.423607  543705 net.go:698] Add success.
I0323 13:39:13.463239  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c202f2f1-353f-415a-bbb6-701c1903568d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:39:13.463271  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:39:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:39:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:39:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 13:39:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:39:14.456620  543705 disk_worker.go:494] system disk:vda1
I0323 13:39:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:39:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:39:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:39:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:39:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:39:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:39:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:39:23.409785  543705 memory.go:184] no items to output this cycle
I0323 13:39:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 13:39:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:39:33.409775  543705 memory.go:184] no items to output this cycle
I0323 13:39:33.409781  543705 cpu.go:275] no items to output this cycle
I0323 13:39:40.213697  543705 disk_info.go:125] begin check local disk info of client
I0323 13:39:40.216378  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:39:40.216386  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004865c0 0xc000486600]
I0323 13:39:40.432973  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:39:40.432978  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:39:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:39:43.410774  543705 memory.go:191] Add success.
I0323 13:39:43.409816  543705 cpu.go:282] Add success.
I0323 13:39:43.420547  543705 net.go:648] Add success.
I0323 13:39:43.423725  543705 net.go:770] primary dev: ETH0
I0323 13:39:43.423740  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:39:43.423756  543705 net.go:698] Add success.
I0323 13:39:46.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:39:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:39:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:39:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:39:53.409799  543705 memory.go:184] no items to output this cycle
I0323 13:39:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 13:40:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:40:03.409814  543705 memory.go:184] no items to output this cycle
I0323 13:40:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 13:40:13.409840  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:40:13.409868  543705 memory.go:191] Add success.
W0323 13:40:13.409899  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:40:13.409911  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:40:13.409918  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:40:13.409936  543705 cpu.go:282] Add success.
I0323 13:40:13.419739  543705 net.go:648] Add success.
I0323 13:40:13.422284  543705 net.go:770] primary dev: ETH0
I0323 13:40:13.422298  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:40:13.422309  543705 net.go:698] Add success.
I0323 13:40:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:40:14.455226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:40:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0323 13:40:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:40:14.456673  543705 disk_worker.go:494] system disk:vda1
I0323 13:40:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:40:15.455997  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:40:16.458010  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:40:16.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:40:16.458145  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:40:16.472574  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:40:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:40:23.409791  543705 memory.go:184] no items to output this cycle
I0323 13:40:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 13:40:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:40:33.409785  543705 memory.go:184] no items to output this cycle
I0323 13:40:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 13:40:40.217673  543705 disk_info.go:125] begin check local disk info of client
I0323 13:40:40.220286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:40:40.220292  543705 disk_info.go:196] parse disk info done, disk is : [0xc000291540 0xc000291580]
E0323 13:40:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:40:43.410795  543705 memory.go:191] Add success.
I0323 13:40:43.409824  543705 cpu.go:282] Add success.
I0323 13:40:43.420479  543705 net.go:648] Add success.
I0323 13:40:43.423654  543705 net.go:770] primary dev: ETH0
I0323 13:40:43.423666  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:40:43.423679  543705 net.go:698] Add success.
I0323 13:40:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:40:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:40:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:40:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:40:53.409811  543705 memory.go:184] no items to output this cycle
I0323 13:40:53.409819  543705 cpu.go:275] no items to output this cycle
E0323 13:41:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:41:03.409797  543705 cpu.go:275] no items to output this cycle
I0323 13:41:03.409799  543705 memory.go:184] no items to output this cycle
E0323 13:41:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:41:13.409916  543705 cpu.go:282] Add success.
I0323 13:41:13.409930  543705 memory.go:191] Add success.
W0323 13:41:13.409986  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:41:13.410013  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:41:13.410018  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:41:13.419739  543705 net.go:648] Add success.
I0323 13:41:13.422232  543705 net.go:770] primary dev: ETH0
I0323 13:41:13.422245  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:41:13.422257  543705 net.go:698] Add success.
I0323 13:41:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:41:14.455109  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:41:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 13:41:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:41:14.456754  543705 disk_worker.go:494] system disk:vda1
I0323 13:41:14.456796  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:41:15.456001  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:41:16.465690  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:41:16.465792  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:41:16.465828  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:41:16.473047  543705 disk_local_worker.go:436] Get disk info: []
I0323 13:41:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 13:41:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:41:23.409819  543705 memory.go:184] no items to output this cycle
E0323 13:41:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:41:33.409814  543705 memory.go:184] no items to output this cycle
I0323 13:41:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 13:41:40.221681  543705 disk_info.go:125] begin check local disk info of client
I0323 13:41:40.224443  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:41:40.224449  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5980 0xc0000c59c0]
E0323 13:41:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:41:43.410649  543705 memory.go:191] Add success.
I0323 13:41:43.409811  543705 cpu.go:282] Add success.
I0323 13:41:43.420531  543705 net.go:648] Add success.
I0323 13:41:43.423246  543705 net.go:770] primary dev: ETH0
I0323 13:41:43.423265  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:41:43.423283  543705 net.go:698] Add success.
I0323 13:41:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:41:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:41:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:41:53.410252  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:41:53.410270  543705 memory.go:184] no items to output this cycle
I0323 13:41:53.410305  543705 cpu.go:275] no items to output this cycle
E0323 13:42:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:42:03.409811  543705 memory.go:184] no items to output this cycle
I0323 13:42:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 13:42:13.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:42:13.409882  543705 memory.go:191] Add success.
W0323 13:42:13.409910  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 13:42:13.409996  543705 cpu.go:282] Add success.
W0323 13:42:13.410012  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:42:13.410018  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:42:13.419736  543705 net.go:648] Add success.
I0323 13:42:13.422592  543705 net.go:770] primary dev: ETH0
I0323 13:42:13.422607  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:42:13.422618  543705 net.go:698] Add success.
I0323 13:42:13.469039  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1fbc91e4-bccd-4691-8392-39e3c6cc9dfb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:42:13.469071  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 13:42:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:42:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 13:42:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:42:14.456981  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:42:14.456990  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:42:14.456996  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:42:14.457037  543705 disk_worker.go:494] system disk:vda1
I0323 13:42:14.457066  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:42:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:42:15.456849  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:42:16.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:42:16.458024  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:42:16.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:42:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:42:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:42:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:42:23.409802  543705 memory.go:184] no items to output this cycle
I0323 13:42:23.409819  543705 cpu.go:275] no items to output this cycle
E0323 13:42:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:42:33.409798  543705 memory.go:184] no items to output this cycle
I0323 13:42:33.409832  543705 cpu.go:275] no items to output this cycle
I0323 13:42:40.225706  543705 disk_info.go:125] begin check local disk info of client
I0323 13:42:40.229458  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:42:40.229467  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e840 0xc00039e880]
I0323 13:42:40.434035  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:42:40.434042  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:42:43.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:42:43.410756  543705 memory.go:191] Add success.
I0323 13:42:43.409852  543705 cpu.go:282] Add success.
I0323 13:42:43.420558  543705 net.go:648] Add success.
I0323 13:42:43.423286  543705 net.go:770] primary dev: ETH0
I0323 13:42:43.423301  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:42:43.423314  543705 net.go:698] Add success.
I0323 13:42:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:42:46.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:42:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:42:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:42:53.409799  543705 memory.go:184] no items to output this cycle
I0323 13:42:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 13:43:03.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:43:03.409826  543705 memory.go:184] no items to output this cycle
I0323 13:43:03.409835  543705 cpu.go:275] no items to output this cycle
E0323 13:43:13.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:43:13.409834  543705 cpu.go:282] Add success.
I0323 13:43:13.410014  543705 memory.go:191] Add success.
W0323 13:43:13.410053  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:43:13.410075  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:43:13.410080  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:43:13.419707  543705 net.go:648] Add success.
I0323 13:43:13.420691  543705 net.go:770] primary dev: ETH0
I0323 13:43:13.420705  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:43:13.420716  543705 net.go:698] Add success.
I0323 13:43:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:43:14.455228  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:43:14.455241  543705 disk_worker.go:708] disk space is not compliant
W0323 13:43:14.455244  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:43:14.456683  543705 disk_worker.go:494] system disk:vda1
I0323 13:43:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:43:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:43:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:43:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:43:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:43:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:43:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:43:23.409799  543705 memory.go:184] no items to output this cycle
I0323 13:43:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 13:43:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:43:33.409774  543705 memory.go:184] no items to output this cycle
I0323 13:43:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 13:43:40.229673  543705 disk_info.go:125] begin check local disk info of client
I0323 13:43:40.232225  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:43:40.232231  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004df380 0xc0004df3c0]
E0323 13:43:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:43:43.410738  543705 memory.go:191] Add success.
I0323 13:43:43.409845  543705 cpu.go:282] Add success.
I0323 13:43:43.420424  543705 net.go:648] Add success.
I0323 13:43:43.423261  543705 net.go:770] primary dev: ETH0
I0323 13:43:43.423274  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:43:43.423286  543705 net.go:698] Add success.
I0323 13:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:43:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:43:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:43:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:43:53.409787  543705 memory.go:184] no items to output this cycle
I0323 13:43:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 13:44:03.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:44:03.409820  543705 memory.go:184] no items to output this cycle
I0323 13:44:03.409843  543705 cpu.go:275] no items to output this cycle
E0323 13:44:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:44:13.409795  543705 memory.go:191] Add success.
I0323 13:44:13.409809  543705 cpu.go:282] Add success.
W0323 13:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:44:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:44:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:44:13.420411  543705 net.go:648] Add success.
I0323 13:44:13.423432  543705 net.go:770] primary dev: ETH0
I0323 13:44:13.423449  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:44:13.423462  543705 net.go:698] Add success.
I0323 13:44:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:44:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:44:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 13:44:14.455196  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:44:14.456561  543705 disk_worker.go:494] system disk:vda1
I0323 13:44:14.456610  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:44:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:44:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:44:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:44:16.458096  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:44:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:44:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:44:23.409820  543705 memory.go:184] no items to output this cycle
I0323 13:44:23.409831  543705 cpu.go:275] no items to output this cycle
E0323 13:44:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:44:33.409785  543705 memory.go:184] no items to output this cycle
I0323 13:44:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 13:44:40.233685  543705 disk_info.go:125] begin check local disk info of client
I0323 13:44:40.236309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:44:40.236317  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004728c0 0xc000472900]
E0323 13:44:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:44:43.410722  543705 memory.go:191] Add success.
I0323 13:44:43.409829  543705 cpu.go:282] Add success.
I0323 13:44:43.420421  543705 net.go:648] Add success.
I0323 13:44:43.423017  543705 net.go:770] primary dev: ETH0
I0323 13:44:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:44:43.423042  543705 net.go:698] Add success.
I0323 13:44:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:44:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:44:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:44:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:44:53.409813  543705 memory.go:184] no items to output this cycle
I0323 13:44:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 13:45:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:45:03.409785  543705 memory.go:184] no items to output this cycle
I0323 13:45:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 13:45:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:45:13.409788  543705 memory.go:191] Add success.
I0323 13:45:13.409806  543705 cpu.go:282] Add success.
W0323 13:45:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:45:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:45:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:45:13.420196  543705 net.go:648] Add success.
I0323 13:45:13.423104  543705 net.go:770] primary dev: ETH0
I0323 13:45:13.423134  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:45:13.423146  543705 net.go:698] Add success.
I0323 13:45:13.462906  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3a3a895f-591c-431e-aee7-c92d3f678d08","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:45:13.462936  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:45:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:45:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:45:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 13:45:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:45:14.456569  543705 disk_worker.go:494] system disk:vda1
I0323 13:45:14.456600  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:45:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:45:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:45:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:45:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:45:16.472472  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:45:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:45:23.409789  543705 memory.go:184] no items to output this cycle
I0323 13:45:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 13:45:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:45:33.409810  543705 memory.go:184] no items to output this cycle
I0323 13:45:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 13:45:40.237674  543705 disk_info.go:125] begin check local disk info of client
I0323 13:45:40.240271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:45:40.240278  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b140 0xc00007b180]
I0323 13:45:40.434625  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:45:40.434631  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:45:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:45:43.410686  543705 memory.go:191] Add success.
I0323 13:45:43.409808  543705 cpu.go:282] Add success.
I0323 13:45:43.420453  543705 net.go:648] Add success.
I0323 13:45:43.423541  543705 net.go:770] primary dev: ETH0
I0323 13:45:43.423554  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:45:43.423566  543705 net.go:698] Add success.
I0323 13:45:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:45:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:45:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:45:53.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:45:53.409803  543705 memory.go:184] no items to output this cycle
I0323 13:45:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 13:46:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:46:03.409809  543705 memory.go:184] no items to output this cycle
I0323 13:46:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 13:46:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:46:13.409831  543705 memory.go:191] Add success.
I0323 13:46:13.409839  543705 cpu.go:282] Add success.
W0323 13:46:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:46:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:46:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:46:13.420403  543705 net.go:648] Add success.
I0323 13:46:13.423188  543705 net.go:770] primary dev: ETH0
I0323 13:46:13.423203  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:46:13.423216  543705 net.go:698] Add success.
I0323 13:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:46:14.455145  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:46:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 13:46:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:46:14.456519  543705 disk_worker.go:494] system disk:vda1
I0323 13:46:14.456564  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:46:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:46:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:46:16.458072  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:46:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:46:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:46:23.409782  543705 memory.go:184] no items to output this cycle
I0323 13:46:23.409829  543705 cpu.go:275] no items to output this cycle
E0323 13:46:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:46:33.409783  543705 memory.go:184] no items to output this cycle
I0323 13:46:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 13:46:40.241678  543705 disk_info.go:125] begin check local disk info of client
I0323 13:46:40.244286  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:46:40.244292  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052bb40 0xc00052bb80]
E0323 13:46:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:46:43.410693  543705 memory.go:191] Add success.
I0323 13:46:43.409807  543705 cpu.go:282] Add success.
I0323 13:46:43.420410  543705 net.go:648] Add success.
I0323 13:46:43.423120  543705 net.go:770] primary dev: ETH0
I0323 13:46:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:46:43.423148  543705 net.go:698] Add success.
I0323 13:46:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:46:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:46:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:46:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:46:53.409808  543705 memory.go:184] no items to output this cycle
I0323 13:46:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 13:47:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:47:03.409780  543705 memory.go:184] no items to output this cycle
I0323 13:47:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 13:47:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:47:13.409791  543705 memory.go:191] Add success.
I0323 13:47:13.409792  543705 cpu.go:282] Add success.
W0323 13:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:47:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:47:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:47:13.420064  543705 net.go:648] Add success.
I0323 13:47:13.423289  543705 net.go:770] primary dev: ETH0
I0323 13:47:13.423302  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:47:13.423319  543705 net.go:698] Add success.
I0323 13:47:13.452769  543705 event_worker.go:152] Polling the log file for events...
W0323 13:47:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:47:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 13:47:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:47:14.455925  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:47:14.455934  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:47:14.455939  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:47:14.456578  543705 disk_worker.go:494] system disk:vda1
I0323 13:47:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:47:15.456833  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:47:15.456843  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:47:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:47:16.457989  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:47:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:47:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:47:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:47:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:47:23.409794  543705 memory.go:184] no items to output this cycle
I0323 13:47:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:47:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:47:33.409781  543705 memory.go:184] no items to output this cycle
I0323 13:47:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 13:47:40.245682  543705 disk_info.go:125] begin check local disk info of client
I0323 13:47:40.248288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:47:40.248295  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e800 0xc00039e840]
E0323 13:47:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:47:43.410887  543705 memory.go:191] Add success.
I0323 13:47:43.409824  543705 cpu.go:282] Add success.
I0323 13:47:43.420579  543705 net.go:648] Add success.
I0323 13:47:43.423543  543705 net.go:770] primary dev: ETH0
I0323 13:47:43.423555  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:47:43.423567  543705 net.go:698] Add success.
I0323 13:47:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:47:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:47:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:47:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:47:53.409808  543705 memory.go:184] no items to output this cycle
I0323 13:47:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 13:48:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:48:03.409786  543705 memory.go:184] no items to output this cycle
I0323 13:48:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 13:48:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:48:13.409795  543705 memory.go:191] Add success.
I0323 13:48:13.409819  543705 cpu.go:282] Add success.
W0323 13:48:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:48:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:48:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:48:13.420195  543705 net.go:648] Add success.
I0323 13:48:13.422893  543705 net.go:770] primary dev: ETH0
I0323 13:48:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:48:13.422931  543705 net.go:698] Add success.
I0323 13:48:13.463655  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f271ec9d-ec7d-4033-a066-533140ba33db","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:48:13.463699  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:48:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:48:14.455507  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:48:14.455519  543705 disk_worker.go:708] disk space is not compliant
W0323 13:48:14.455525  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:48:14.457530  543705 disk_worker.go:494] system disk:vda1
I0323 13:48:14.457558  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:48:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:48:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:48:16.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:48:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:48:16.472463  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:48:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:48:23.409775  543705 memory.go:184] no items to output this cycle
I0323 13:48:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:48:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:48:33.409784  543705 cpu.go:275] no items to output this cycle
I0323 13:48:33.409787  543705 memory.go:184] no items to output this cycle
I0323 13:48:40.249681  543705 disk_info.go:125] begin check local disk info of client
I0323 13:48:40.252281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:48:40.252288  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab6c0 0xc0001ab700]
I0323 13:48:40.435316  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:48:40.435321  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:48:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:48:43.410667  543705 memory.go:191] Add success.
I0323 13:48:43.409801  543705 cpu.go:282] Add success.
I0323 13:48:43.420462  543705 net.go:648] Add success.
I0323 13:48:43.423270  543705 net.go:770] primary dev: ETH0
I0323 13:48:43.423285  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:48:43.423299  543705 net.go:698] Add success.
I0323 13:48:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:48:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:48:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:48:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:48:53.409807  543705 memory.go:184] no items to output this cycle
I0323 13:48:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 13:49:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:49:03.409813  543705 memory.go:184] no items to output this cycle
I0323 13:49:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 13:49:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:49:13.409822  543705 memory.go:191] Add success.
I0323 13:49:13.409831  543705 cpu.go:282] Add success.
W0323 13:49:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:49:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:49:13.409875  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:49:13.420164  543705 net.go:648] Add success.
I0323 13:49:13.423208  543705 net.go:770] primary dev: ETH0
I0323 13:49:13.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:49:13.423234  543705 net.go:698] Add success.
I0323 13:49:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:49:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:49:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 13:49:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:49:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 13:49:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:49:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:49:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:49:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:49:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:49:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:49:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:49:23.409783  543705 memory.go:184] no items to output this cycle
I0323 13:49:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 13:49:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:49:33.409807  543705 memory.go:184] no items to output this cycle
I0323 13:49:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 13:49:40.253674  543705 disk_info.go:125] begin check local disk info of client
I0323 13:49:40.256244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:49:40.256251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005375c0 0xc000537600]
E0323 13:49:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:49:43.410597  543705 memory.go:191] Add success.
I0323 13:49:43.409816  543705 cpu.go:282] Add success.
I0323 13:49:43.420294  543705 net.go:648] Add success.
I0323 13:49:43.423138  543705 net.go:770] primary dev: ETH0
I0323 13:49:43.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:49:43.423165  543705 net.go:698] Add success.
I0323 13:49:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:49:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:49:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:49:53.410390  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:49:53.410409  543705 memory.go:184] no items to output this cycle
I0323 13:49:53.410422  543705 cpu.go:275] no items to output this cycle
E0323 13:50:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:50:03.409776  543705 memory.go:184] no items to output this cycle
I0323 13:50:03.409780  543705 cpu.go:275] no items to output this cycle
E0323 13:50:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:50:13.409830  543705 memory.go:191] Add success.
I0323 13:50:13.409834  543705 cpu.go:282] Add success.
W0323 13:50:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:50:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:50:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:50:13.420145  543705 net.go:648] Add success.
I0323 13:50:13.422923  543705 net.go:770] primary dev: ETH0
I0323 13:50:13.422936  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:50:13.422950  543705 net.go:698] Add success.
I0323 13:50:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:50:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:50:14.455160  543705 disk_worker.go:708] disk space is not compliant
W0323 13:50:14.455163  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:50:14.456509  543705 disk_worker.go:494] system disk:vda1
I0323 13:50:14.456555  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:50:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:50:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:50:16.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:50:16.458084  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:50:16.472476  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:50:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:50:23.409811  543705 memory.go:184] no items to output this cycle
I0323 13:50:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 13:50:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:50:33.409781  543705 memory.go:184] no items to output this cycle
I0323 13:50:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 13:50:40.257674  543705 disk_info.go:125] begin check local disk info of client
I0323 13:50:40.260417  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:50:40.260423  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536740 0xc000536780]
E0323 13:50:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:50:43.410805  543705 memory.go:191] Add success.
I0323 13:50:43.409820  543705 cpu.go:282] Add success.
I0323 13:50:43.420508  543705 net.go:648] Add success.
I0323 13:50:43.423455  543705 net.go:770] primary dev: ETH0
I0323 13:50:43.423469  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:50:43.423481  543705 net.go:698] Add success.
I0323 13:50:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:50:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:50:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:50:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:50:53.409811  543705 memory.go:184] no items to output this cycle
I0323 13:50:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 13:51:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:51:03.409785  543705 memory.go:184] no items to output this cycle
I0323 13:51:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 13:51:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:51:13.409797  543705 memory.go:191] Add success.
I0323 13:51:13.409797  543705 cpu.go:282] Add success.
W0323 13:51:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:51:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:51:13.409840  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:51:13.420195  543705 net.go:648] Add success.
I0323 13:51:13.422842  543705 net.go:770] primary dev: ETH0
I0323 13:51:13.422861  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:51:13.422876  543705 net.go:698] Add success.
I0323 13:51:13.469829  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"72b740a2-6ff3-40ad-b6f9-fb2fd8361422","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:51:13.469862  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:51:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:51:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:51:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 13:51:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:51:14.456534  543705 disk_worker.go:494] system disk:vda1
I0323 13:51:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:51:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:51:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:51:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:51:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:51:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:51:23.409908  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:51:23.409920  543705 cpu.go:275] no items to output this cycle
I0323 13:51:23.410001  543705 memory.go:184] no items to output this cycle
E0323 13:51:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:51:33.409795  543705 memory.go:184] no items to output this cycle
I0323 13:51:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 13:51:40.261676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:51:40.264239  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:51:40.264247  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
I0323 13:51:40.436220  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:51:40.436225  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:51:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:51:43.410674  543705 memory.go:191] Add success.
I0323 13:51:43.409809  543705 cpu.go:282] Add success.
I0323 13:51:43.420382  543705 net.go:648] Add success.
I0323 13:51:43.422997  543705 net.go:770] primary dev: ETH0
I0323 13:51:43.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:51:43.423023  543705 net.go:698] Add success.
I0323 13:51:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:51:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:51:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:51:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:51:53.409779  543705 memory.go:184] no items to output this cycle
I0323 13:51:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 13:52:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:52:03.409784  543705 memory.go:184] no items to output this cycle
I0323 13:52:03.409784  543705 cpu.go:275] no items to output this cycle
E0323 13:52:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:52:13.409788  543705 memory.go:191] Add success.
I0323 13:52:13.409791  543705 cpu.go:282] Add success.
W0323 13:52:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:52:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:52:13.409830  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:52:13.420144  543705 net.go:648] Add success.
I0323 13:52:13.422741  543705 net.go:770] primary dev: ETH0
I0323 13:52:13.422754  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:52:13.422766  543705 net.go:698] Add success.
W0323 13:52:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:52:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 13:52:14.455189  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:52:14.455922  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:52:14.455931  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:52:14.455936  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:52:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 13:52:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:52:15.456858  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:52:15.456867  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:52:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:52:16.457968  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:52:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:52:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:52:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:52:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:52:23.409779  543705 memory.go:184] no items to output this cycle
I0323 13:52:23.409780  543705 cpu.go:275] no items to output this cycle
E0323 13:52:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:52:33.409773  543705 memory.go:184] no items to output this cycle
I0323 13:52:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 13:52:40.265674  543705 disk_info.go:125] begin check local disk info of client
I0323 13:52:40.268203  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:52:40.268209  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536580 0xc0005365c0]
E0323 13:52:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:52:43.410770  543705 memory.go:191] Add success.
I0323 13:52:43.409802  543705 cpu.go:282] Add success.
I0323 13:52:43.420475  543705 net.go:648] Add success.
I0323 13:52:43.423557  543705 net.go:770] primary dev: ETH0
I0323 13:52:43.423572  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:52:43.423586  543705 net.go:698] Add success.
I0323 13:52:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:52:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:52:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:52:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:52:53.409788  543705 memory.go:184] no items to output this cycle
I0323 13:52:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 13:53:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:53:03.409788  543705 memory.go:184] no items to output this cycle
I0323 13:53:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 13:53:13.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:53:13.409781  543705 memory.go:191] Add success.
W0323 13:53:13.409808  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 13:53:13.409812  543705 cpu.go:282] Add success.
W0323 13:53:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:53:13.409822  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:53:13.420672  543705 net.go:648] Add success.
I0323 13:53:13.423595  543705 net.go:770] primary dev: ETH0
I0323 13:53:13.423608  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:53:13.423620  543705 net.go:698] Add success.
I0323 13:53:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:53:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:53:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 13:53:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:53:14.456621  543705 disk_worker.go:494] system disk:vda1
I0323 13:53:14.456650  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:53:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:53:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:53:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:53:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:53:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:53:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:53:23.409791  543705 memory.go:184] no items to output this cycle
I0323 13:53:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 13:53:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:53:33.409794  543705 memory.go:184] no items to output this cycle
I0323 13:53:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 13:53:40.269677  543705 disk_info.go:125] begin check local disk info of client
I0323 13:53:40.272213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:53:40.272219  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b540 0xc00007b580]
E0323 13:53:43.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:53:43.410800  543705 memory.go:191] Add success.
I0323 13:53:43.409789  543705 cpu.go:282] Add success.
I0323 13:53:43.420575  543705 net.go:648] Add success.
I0323 13:53:43.423448  543705 net.go:770] primary dev: ETH0
I0323 13:53:43.423464  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:53:43.423487  543705 net.go:698] Add success.
I0323 13:53:46.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:53:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:53:46.458059  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:53:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:53:53.409782  543705 memory.go:184] no items to output this cycle
I0323 13:53:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 13:54:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:54:03.409776  543705 memory.go:184] no items to output this cycle
I0323 13:54:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:54:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:54:13.409794  543705 memory.go:191] Add success.
I0323 13:54:13.409797  543705 cpu.go:282] Add success.
W0323 13:54:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:54:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:54:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:54:13.420127  543705 net.go:648] Add success.
I0323 13:54:13.422923  543705 net.go:770] primary dev: ETH0
I0323 13:54:13.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:54:13.422965  543705 net.go:698] Add success.
I0323 13:54:13.463417  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"36632748-c61d-44dd-aec0-cc7d0ab3debc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:54:13.463449  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 13:54:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:54:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:54:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 13:54:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:54:14.456676  543705 disk_worker.go:494] system disk:vda1
I0323 13:54:14.456710  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:54:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:54:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:54:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:54:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:54:16.472376  543705 disk_local_worker.go:436] Get disk info: []
I0323 13:54:23.409914  543705 cpu.go:275] no items to output this cycle
E0323 13:54:23.409994  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:54:23.410017  543705 memory.go:184] no items to output this cycle
E0323 13:54:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:54:33.409800  543705 memory.go:184] no items to output this cycle
I0323 13:54:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 13:54:40.274690  543705 disk_info.go:125] begin check local disk info of client
I0323 13:54:40.277240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:54:40.277246  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba80 0xc0001abac0]
I0323 13:54:40.437154  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:54:40.437159  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:54:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:54:43.410701  543705 memory.go:191] Add success.
I0323 13:54:43.409798  543705 cpu.go:282] Add success.
I0323 13:54:43.420458  543705 net.go:648] Add success.
I0323 13:54:43.423294  543705 net.go:770] primary dev: ETH0
I0323 13:54:43.423309  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:54:43.423321  543705 net.go:698] Add success.
I0323 13:54:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:54:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:54:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:54:53.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:54:53.409773  543705 memory.go:184] no items to output this cycle
I0323 13:54:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 13:55:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:55:03.409817  543705 memory.go:184] no items to output this cycle
I0323 13:55:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 13:55:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:55:13.409794  543705 memory.go:191] Add success.
I0323 13:55:13.409796  543705 cpu.go:282] Add success.
W0323 13:55:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:55:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:55:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:55:13.420066  543705 net.go:648] Add success.
I0323 13:55:13.423209  543705 net.go:770] primary dev: ETH0
I0323 13:55:13.423222  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:55:13.423234  543705 net.go:698] Add success.
I0323 13:55:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:55:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:55:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 13:55:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:55:14.456488  543705 disk_worker.go:494] system disk:vda1
I0323 13:55:14.456534  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:55:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:55:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:55:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:55:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:55:16.472433  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:55:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:55:23.409823  543705 memory.go:184] no items to output this cycle
I0323 13:55:23.409833  543705 cpu.go:275] no items to output this cycle
E0323 13:55:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:55:33.409780  543705 memory.go:184] no items to output this cycle
I0323 13:55:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 13:55:40.277683  543705 disk_info.go:125] begin check local disk info of client
I0323 13:55:40.280216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:55:40.280222  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b4c0 0xc00007b500]
E0323 13:55:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:55:43.410662  543705 memory.go:191] Add success.
I0323 13:55:43.409812  543705 cpu.go:282] Add success.
I0323 13:55:43.420371  543705 net.go:648] Add success.
I0323 13:55:43.423169  543705 net.go:770] primary dev: ETH0
I0323 13:55:43.423182  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:55:43.423195  543705 net.go:698] Add success.
I0323 13:55:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:55:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:55:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:55:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:55:53.409789  543705 memory.go:184] no items to output this cycle
I0323 13:55:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 13:56:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:56:03.409795  543705 memory.go:184] no items to output this cycle
I0323 13:56:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 13:56:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:56:13.409803  543705 memory.go:191] Add success.
I0323 13:56:13.409805  543705 cpu.go:282] Add success.
W0323 13:56:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:56:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:56:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:56:13.420057  543705 net.go:648] Add success.
I0323 13:56:13.422764  543705 net.go:770] primary dev: ETH0
I0323 13:56:13.422779  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:56:13.422792  543705 net.go:698] Add success.
I0323 13:56:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:56:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:56:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 13:56:14.455185  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:56:14.456540  543705 disk_worker.go:494] system disk:vda1
I0323 13:56:14.456568  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:56:15.455949  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:56:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:56:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:56:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:56:16.472451  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:56:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:56:23.409788  543705 memory.go:184] no items to output this cycle
I0323 13:56:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 13:56:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:56:33.409784  543705 memory.go:184] no items to output this cycle
I0323 13:56:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 13:56:40.281675  543705 disk_info.go:125] begin check local disk info of client
I0323 13:56:40.284301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:56:40.284308  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039e180 0xc00039e1c0]
E0323 13:56:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:56:43.410592  543705 memory.go:191] Add success.
I0323 13:56:43.409788  543705 cpu.go:282] Add success.
I0323 13:56:43.420287  543705 net.go:648] Add success.
I0323 13:56:43.422957  543705 net.go:770] primary dev: ETH0
I0323 13:56:43.422972  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:56:43.422987  543705 net.go:698] Add success.
I0323 13:56:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:56:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:56:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:56:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:56:53.409785  543705 memory.go:184] no items to output this cycle
I0323 13:56:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 13:57:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:57:03.409784  543705 memory.go:184] no items to output this cycle
I0323 13:57:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 13:57:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:57:13.409827  543705 memory.go:191] Add success.
I0323 13:57:13.409828  543705 cpu.go:282] Add success.
W0323 13:57:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:57:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:57:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:57:13.420219  543705 net.go:648] Add success.
I0323 13:57:13.423026  543705 net.go:770] primary dev: ETH0
I0323 13:57:13.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:57:13.423054  543705 net.go:698] Add success.
I0323 13:57:13.429140  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 13:57:13.453373  543705 event_worker.go:152] Polling the log file for events...
I0323 13:57:13.468685  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fe121bd-9faf-488b-adc5-087c61dd425a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 13:57:13.468719  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 13:57:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:57:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 13:57:14.455167  543705 disk_worker.go:728] disk inode is not compliant
E0323 13:57:14.456483  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 13:57:14.456493  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 13:57:14.456499  543705 custom_config.go:64] query custom config with name: gpu
I0323 13:57:14.457152  543705 disk_worker.go:494] system disk:vda1
I0323 13:57:14.457192  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 13:57:15.456816  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 13:57:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:57:16.457932  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 13:57:16.457932  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 13:57:16.458006  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:57:16.458026  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:57:16.472361  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:57:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:57:23.409783  543705 memory.go:184] no items to output this cycle
I0323 13:57:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 13:57:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:57:33.409788  543705 memory.go:184] no items to output this cycle
I0323 13:57:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 13:57:40.285676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:57:40.288299  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:57:40.288305  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c51c0 0xc0000c5200]
I0323 13:57:40.437639  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 13:57:40.437655  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 13:57:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:57:43.410730  543705 memory.go:191] Add success.
I0323 13:57:43.409814  543705 cpu.go:282] Add success.
I0323 13:57:43.420507  543705 net.go:648] Add success.
I0323 13:57:43.423125  543705 net.go:770] primary dev: ETH0
I0323 13:57:43.423140  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:57:43.423153  543705 net.go:698] Add success.
I0323 13:57:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:57:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:57:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:57:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:57:53.409814  543705 memory.go:184] no items to output this cycle
I0323 13:57:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 13:58:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:58:03.409804  543705 memory.go:184] no items to output this cycle
I0323 13:58:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 13:58:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:58:13.409791  543705 memory.go:191] Add success.
W0323 13:58:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 13:58:13.409821  543705 cpu.go:282] Add success.
W0323 13:58:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:58:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:58:13.420389  543705 net.go:648] Add success.
I0323 13:58:13.423286  543705 net.go:770] primary dev: ETH0
I0323 13:58:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:58:13.423312  543705 net.go:698] Add success.
I0323 13:58:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:58:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:58:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 13:58:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:58:14.456993  543705 disk_worker.go:494] system disk:vda1
I0323 13:58:14.457021  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:58:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:58:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:58:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:58:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:58:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:58:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:58:23.409796  543705 memory.go:184] no items to output this cycle
I0323 13:58:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 13:58:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:58:33.409773  543705 memory.go:184] no items to output this cycle
I0323 13:58:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 13:58:40.289676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:58:40.292267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:58:40.292274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa9c0 0xc0001aaa00]
E0323 13:58:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:58:43.410749  543705 memory.go:191] Add success.
I0323 13:58:43.409818  543705 cpu.go:282] Add success.
I0323 13:58:43.420459  543705 net.go:648] Add success.
I0323 13:58:43.423360  543705 net.go:770] primary dev: ETH0
I0323 13:58:43.423374  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:58:43.423387  543705 net.go:698] Add success.
I0323 13:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:58:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:58:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:58:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:58:53.409800  543705 memory.go:184] no items to output this cycle
I0323 13:58:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 13:59:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:59:03.409792  543705 memory.go:184] no items to output this cycle
I0323 13:59:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 13:59:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:59:13.409790  543705 memory.go:191] Add success.
I0323 13:59:13.409801  543705 cpu.go:282] Add success.
W0323 13:59:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 13:59:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 13:59:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 13:59:13.420260  543705 net.go:648] Add success.
I0323 13:59:13.423230  543705 net.go:770] primary dev: ETH0
I0323 13:59:13.423243  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:59:13.423255  543705 net.go:698] Add success.
I0323 13:59:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 13:59:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 13:59:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 13:59:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 13:59:14.456821  543705 disk_worker.go:494] system disk:vda1
I0323 13:59:14.456851  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 13:59:15.456010  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 13:59:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:59:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:59:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 13:59:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 13:59:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:59:23.409777  543705 memory.go:184] no items to output this cycle
I0323 13:59:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 13:59:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:59:33.409798  543705 memory.go:184] no items to output this cycle
I0323 13:59:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 13:59:40.293676  543705 disk_info.go:125] begin check local disk info of client
I0323 13:59:40.296259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 13:59:40.296266  543705 disk_info.go:196] parse disk info done, disk is : [0xc000480cc0 0xc000480d00]
E0323 13:59:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:59:43.410661  543705 memory.go:191] Add success.
I0323 13:59:43.409802  543705 cpu.go:282] Add success.
I0323 13:59:43.420367  543705 net.go:648] Add success.
I0323 13:59:43.422943  543705 net.go:770] primary dev: ETH0
I0323 13:59:43.422955  543705 net.go:802] Send network stats successfully!,count is 6
I0323 13:59:43.422970  543705 net.go:698] Add success.
I0323 13:59:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 13:59:46.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 13:59:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 13:59:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 13:59:53.409793  543705 memory.go:184] no items to output this cycle
I0323 13:59:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 14:00:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:00:03.409771  543705 memory.go:184] no items to output this cycle
I0323 14:00:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 14:00:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:00:13.409823  543705 memory.go:191] Add success.
I0323 14:00:13.409828  543705 cpu.go:282] Add success.
W0323 14:00:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:00:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:00:13.409874  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:00:13.420192  543705 net.go:648] Add success.
I0323 14:00:13.423512  543705 net.go:770] primary dev: ETH0
I0323 14:00:13.423524  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:00:13.423537  543705 net.go:698] Add success.
I0323 14:00:13.467972  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf6d41e6-f2a4-492e-9455-7bbf9dbfdd10","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:00:13.468005  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:00:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:00:14.455854  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:00:14.455868  543705 disk_worker.go:708] disk space is not compliant
W0323 14:00:14.455871  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:00:14.457694  543705 disk_worker.go:494] system disk:vda1
I0323 14:00:14.457722  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:00:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:00:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:00:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:00:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:00:16.472378  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:00:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:00:23.409813  543705 memory.go:184] no items to output this cycle
I0323 14:00:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 14:00:33.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:00:33.409762  543705 memory.go:184] no items to output this cycle
I0323 14:00:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 14:00:40.297682  543705 disk_info.go:125] begin check local disk info of client
I0323 14:00:40.300163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:00:40.300171  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536a40 0xc000536a80]
I0323 14:00:40.438648  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:00:40.438654  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:00:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:00:43.410752  543705 memory.go:191] Add success.
I0323 14:00:43.409817  543705 cpu.go:282] Add success.
I0323 14:00:43.420452  543705 net.go:648] Add success.
I0323 14:00:43.423245  543705 net.go:770] primary dev: ETH0
I0323 14:00:43.423258  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:00:43.423271  543705 net.go:698] Add success.
I0323 14:00:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:00:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:00:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:00:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:00:53.409789  543705 memory.go:184] no items to output this cycle
I0323 14:00:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 14:01:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:01:03.409796  543705 memory.go:184] no items to output this cycle
I0323 14:01:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 14:01:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:01:13.409784  543705 memory.go:191] Add success.
W0323 14:01:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 14:01:13.409816  543705 cpu.go:282] Add success.
W0323 14:01:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:01:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:01:13.420271  543705 net.go:648] Add success.
I0323 14:01:13.423000  543705 net.go:770] primary dev: ETH0
I0323 14:01:13.423015  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:01:13.423029  543705 net.go:698] Add success.
I0323 14:01:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:01:14.455186  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:01:14.455325  543705 disk_worker.go:708] disk space is not compliant
W0323 14:01:14.455330  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:01:14.456964  543705 disk_worker.go:494] system disk:vda1
I0323 14:01:14.456994  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:01:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:01:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:01:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:01:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:01:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:01:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:01:23.409779  543705 memory.go:184] no items to output this cycle
I0323 14:01:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 14:01:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:01:33.409772  543705 memory.go:184] no items to output this cycle
I0323 14:01:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 14:01:40.301679  543705 disk_info.go:125] begin check local disk info of client
I0323 14:01:40.304099  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:01:40.304106  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b2c00 0xc0002b2c40]
E0323 14:01:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:01:43.410617  543705 memory.go:191] Add success.
I0323 14:01:43.409804  543705 cpu.go:282] Add success.
I0323 14:01:43.420299  543705 net.go:648] Add success.
I0323 14:01:43.423168  543705 net.go:770] primary dev: ETH0
I0323 14:01:43.423183  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:01:43.423198  543705 net.go:698] Add success.
I0323 14:01:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:01:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:01:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:01:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:01:53.409779  543705 memory.go:184] no items to output this cycle
I0323 14:01:53.409785  543705 cpu.go:275] no items to output this cycle
E0323 14:02:03.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:02:03.409768  543705 memory.go:184] no items to output this cycle
I0323 14:02:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 14:02:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:02:13.409813  543705 memory.go:191] Add success.
I0323 14:02:13.409815  543705 cpu.go:282] Add success.
W0323 14:02:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:02:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:02:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:02:13.420225  543705 net.go:648] Add success.
I0323 14:02:13.422866  543705 net.go:770] primary dev: ETH0
I0323 14:02:13.422879  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:02:13.422891  543705 net.go:698] Add success.
W0323 14:02:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:02:14.455152  543705 disk_worker.go:708] disk space is not compliant
W0323 14:02:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:02:14.456114  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:02:14.456124  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:02:14.456130  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:02:14.456445  543705 disk_worker.go:494] system disk:vda1
I0323 14:02:14.456486  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:02:15.456860  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:02:15.456869  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:02:16.457923  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:02:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:02:16.457976  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:02:16.457996  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:02:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:02:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:02:23.409787  543705 memory.go:184] no items to output this cycle
I0323 14:02:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 14:02:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:02:33.409779  543705 memory.go:184] no items to output this cycle
I0323 14:02:33.409785  543705 cpu.go:275] no items to output this cycle
I0323 14:02:40.305677  543705 disk_info.go:125] begin check local disk info of client
I0323 14:02:40.308175  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:02:40.308181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abdc0 0xc0001abe00]
E0323 14:02:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:02:43.410604  543705 memory.go:191] Add success.
I0323 14:02:43.409810  543705 cpu.go:282] Add success.
I0323 14:02:43.420317  543705 net.go:648] Add success.
I0323 14:02:43.423053  543705 net.go:770] primary dev: ETH0
I0323 14:02:43.423066  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:02:43.423079  543705 net.go:698] Add success.
I0323 14:02:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:02:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:02:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:02:53.409787  543705 memory.go:184] no items to output this cycle
I0323 14:02:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 14:03:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:03:03.409782  543705 cpu.go:275] no items to output this cycle
I0323 14:03:03.409786  543705 memory.go:184] no items to output this cycle
E0323 14:03:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:03:13.409788  543705 cpu.go:282] Add success.
I0323 14:03:13.409789  543705 memory.go:191] Add success.
W0323 14:03:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:03:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:03:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:03:13.420058  543705 net.go:648] Add success.
I0323 14:03:13.422967  543705 net.go:770] primary dev: ETH0
I0323 14:03:13.422983  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:03:13.422997  543705 net.go:698] Add success.
I0323 14:03:13.469227  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d946e3e3-a142-4d72-a794-9c2ccf0426be","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:03:13.469262  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:03:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:03:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:03:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0323 14:03:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:03:14.456765  543705 disk_worker.go:494] system disk:vda1
I0323 14:03:14.456886  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:03:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:03:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:03:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:03:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:03:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:03:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:03:23.409778  543705 memory.go:184] no items to output this cycle
I0323 14:03:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 14:03:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:03:33.409774  543705 memory.go:184] no items to output this cycle
I0323 14:03:33.409780  543705 cpu.go:275] no items to output this cycle
I0323 14:03:40.309678  543705 disk_info.go:125] begin check local disk info of client
I0323 14:03:40.312204  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:03:40.312210  543705 disk_info.go:196] parse disk info done, disk is : [0xc000331140 0xc000331180]
I0323 14:03:40.438941  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:03:40.438946  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:03:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:03:43.410810  543705 memory.go:191] Add success.
I0323 14:03:43.409816  543705 cpu.go:282] Add success.
I0323 14:03:43.420516  543705 net.go:648] Add success.
I0323 14:03:43.423438  543705 net.go:770] primary dev: ETH0
I0323 14:03:43.423452  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:03:43.423464  543705 net.go:698] Add success.
I0323 14:03:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:03:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:03:46.458054  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:03:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:03:53.409784  543705 memory.go:184] no items to output this cycle
I0323 14:03:53.409818  543705 cpu.go:275] no items to output this cycle
E0323 14:04:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:04:03.409813  543705 memory.go:184] no items to output this cycle
I0323 14:04:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 14:04:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:04:13.409792  543705 memory.go:191] Add success.
W0323 14:04:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:04:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:04:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:04:13.409833  543705 cpu.go:282] Add success.
I0323 14:04:13.420110  543705 net.go:648] Add success.
I0323 14:04:13.422937  543705 net.go:770] primary dev: ETH0
I0323 14:04:13.422950  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:04:13.422961  543705 net.go:698] Add success.
I0323 14:04:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:04:14.455135  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:04:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 14:04:14.455203  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:04:14.456760  543705 disk_worker.go:494] system disk:vda1
I0323 14:04:14.456791  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:04:15.456013  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:04:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:04:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:04:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:04:16.472400  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:04:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:04:23.409794  543705 memory.go:184] no items to output this cycle
I0323 14:04:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 14:04:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:04:33.409783  543705 memory.go:184] no items to output this cycle
I0323 14:04:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 14:04:40.313679  543705 disk_info.go:125] begin check local disk info of client
I0323 14:04:40.316174  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:04:40.316181  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005369c0 0xc000536a00]
E0323 14:04:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:04:43.410655  543705 memory.go:191] Add success.
I0323 14:04:43.409831  543705 cpu.go:282] Add success.
I0323 14:04:43.420336  543705 net.go:648] Add success.
I0323 14:04:43.423158  543705 net.go:770] primary dev: ETH0
I0323 14:04:43.423173  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:04:43.423186  543705 net.go:698] Add success.
I0323 14:04:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:04:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:04:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:04:53.410394  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:04:53.410414  543705 memory.go:184] no items to output this cycle
I0323 14:04:53.410434  543705 cpu.go:275] no items to output this cycle
E0323 14:05:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:05:03.409802  543705 memory.go:184] no items to output this cycle
I0323 14:05:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 14:05:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:05:13.409836  543705 memory.go:191] Add success.
I0323 14:05:13.409838  543705 cpu.go:282] Add success.
W0323 14:05:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:05:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:05:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:05:13.420168  543705 net.go:648] Add success.
I0323 14:05:13.422987  543705 net.go:770] primary dev: ETH0
I0323 14:05:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:05:13.423011  543705 net.go:698] Add success.
I0323 14:05:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:05:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:05:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 14:05:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:05:14.456514  543705 disk_worker.go:494] system disk:vda1
I0323 14:05:14.456556  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:05:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:05:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:05:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:05:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:05:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:05:23.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:05:23.409935  543705 memory.go:184] no items to output this cycle
I0323 14:05:23.410026  543705 cpu.go:275] no items to output this cycle
E0323 14:05:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:05:33.409775  543705 memory.go:184] no items to output this cycle
I0323 14:05:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 14:05:40.317683  543705 disk_info.go:125] begin check local disk info of client
I0323 14:05:40.320209  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:05:40.320216  543705 disk_info.go:196] parse disk info done, disk is : [0xc000537a00 0xc000537a40]
E0323 14:05:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:05:43.410641  543705 memory.go:191] Add success.
I0323 14:05:43.409810  543705 cpu.go:282] Add success.
I0323 14:05:43.420425  543705 net.go:648] Add success.
I0323 14:05:43.422933  543705 net.go:770] primary dev: ETH0
I0323 14:05:43.422946  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:05:43.422958  543705 net.go:698] Add success.
I0323 14:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:05:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:05:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:05:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:05:53.409788  543705 memory.go:184] no items to output this cycle
I0323 14:05:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 14:06:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:06:03.409785  543705 memory.go:184] no items to output this cycle
I0323 14:06:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 14:06:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:06:13.409795  543705 memory.go:191] Add success.
I0323 14:06:13.409800  543705 cpu.go:282] Add success.
W0323 14:06:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:06:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:06:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:06:13.420126  543705 net.go:648] Add success.
I0323 14:06:13.422907  543705 net.go:770] primary dev: ETH0
I0323 14:06:13.422920  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:06:13.422931  543705 net.go:698] Add success.
I0323 14:06:13.468721  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4f065277-e71f-4305-ab5d-1efc670668a3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:06:13.468755  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:06:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:06:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:06:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 14:06:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:06:14.456594  543705 disk_worker.go:494] system disk:vda1
I0323 14:06:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:06:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:06:16.457969  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:06:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:06:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:06:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:06:23.409906  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:06:23.409926  543705 memory.go:184] no items to output this cycle
I0323 14:06:23.409950  543705 cpu.go:275] no items to output this cycle
E0323 14:06:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:06:33.409767  543705 memory.go:184] no items to output this cycle
I0323 14:06:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 14:06:40.321677  543705 disk_info.go:125] begin check local disk info of client
I0323 14:06:40.324223  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:06:40.324229  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536600 0xc000536640]
I0323 14:06:40.439927  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:06:40.439932  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:06:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:06:43.410571  543705 memory.go:191] Add success.
I0323 14:06:43.409816  543705 cpu.go:282] Add success.
I0323 14:06:43.420254  543705 net.go:648] Add success.
I0323 14:06:43.422907  543705 net.go:770] primary dev: ETH0
I0323 14:06:43.422932  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:06:43.422947  543705 net.go:698] Add success.
I0323 14:06:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:06:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:06:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:06:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:06:53.409779  543705 memory.go:184] no items to output this cycle
I0323 14:06:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 14:07:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:07:03.409782  543705 memory.go:184] no items to output this cycle
I0323 14:07:03.409786  543705 cpu.go:275] no items to output this cycle
E0323 14:07:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:07:13.409807  543705 memory.go:191] Add success.
I0323 14:07:13.409817  543705 cpu.go:282] Add success.
W0323 14:07:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:07:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:07:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:07:13.420196  543705 net.go:648] Add success.
I0323 14:07:13.423415  543705 net.go:770] primary dev: ETH0
I0323 14:07:13.423428  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:07:13.423442  543705 net.go:698] Add success.
I0323 14:07:13.452987  543705 event_worker.go:152] Polling the log file for events...
W0323 14:07:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:07:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 14:07:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:07:14.455880  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:07:14.455889  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:07:14.455895  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:07:14.456638  543705 disk_worker.go:494] system disk:vda1
I0323 14:07:14.456681  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:07:15.456843  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:07:15.456852  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:07:16.457894  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:07:16.457894  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:07:16.457949  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:07:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:07:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:07:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:07:23.409784  543705 memory.go:184] no items to output this cycle
I0323 14:07:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 14:07:33.410037  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:07:33.410065  543705 memory.go:184] no items to output this cycle
I0323 14:07:33.410167  543705 cpu.go:275] no items to output this cycle
I0323 14:07:40.327017  543705 disk_info.go:125] begin check local disk info of client
I0323 14:07:40.329547  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:07:40.329553  543705 disk_info.go:196] parse disk info done, disk is : [0xc000545700 0xc000545740]
E0323 14:07:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:07:43.410711  543705 memory.go:191] Add success.
I0323 14:07:43.409813  543705 cpu.go:282] Add success.
I0323 14:07:43.420437  543705 net.go:648] Add success.
I0323 14:07:43.423206  543705 net.go:770] primary dev: ETH0
I0323 14:07:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:07:43.423234  543705 net.go:698] Add success.
I0323 14:07:46.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:07:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:07:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:07:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:07:53.409776  543705 memory.go:184] no items to output this cycle
I0323 14:07:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 14:08:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:08:03.409803  543705 memory.go:184] no items to output this cycle
I0323 14:08:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 14:08:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:08:13.409783  543705 memory.go:191] Add success.
I0323 14:08:13.409804  543705 cpu.go:282] Add success.
W0323 14:08:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:08:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:08:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:08:13.420196  543705 net.go:648] Add success.
I0323 14:08:13.423153  543705 net.go:770] primary dev: ETH0
I0323 14:08:13.423167  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:08:13.423181  543705 net.go:698] Add success.
I0323 14:08:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:08:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:08:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 14:08:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:08:14.456591  543705 disk_worker.go:494] system disk:vda1
I0323 14:08:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:08:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:08:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:08:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:08:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:08:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:08:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:08:23.409772  543705 memory.go:184] no items to output this cycle
I0323 14:08:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 14:08:33.409905  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:08:33.409947  543705 cpu.go:275] no items to output this cycle
I0323 14:08:33.410029  543705 memory.go:184] no items to output this cycle
I0323 14:08:40.329681  543705 disk_info.go:125] begin check local disk info of client
I0323 14:08:40.332222  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:08:40.332229  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f6000 0xc0004f6040]
E0323 14:08:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:08:43.410861  543705 memory.go:191] Add success.
I0323 14:08:43.409822  543705 cpu.go:282] Add success.
I0323 14:08:43.420560  543705 net.go:648] Add success.
I0323 14:08:43.423293  543705 net.go:770] primary dev: ETH0
I0323 14:08:43.423306  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:08:43.423319  543705 net.go:698] Add success.
I0323 14:08:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:08:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:08:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:08:53.409816  543705 memory.go:184] no items to output this cycle
I0323 14:08:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 14:09:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:09:03.409796  543705 memory.go:184] no items to output this cycle
I0323 14:09:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 14:09:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:09:13.409796  543705 memory.go:191] Add success.
I0323 14:09:13.409818  543705 cpu.go:282] Add success.
W0323 14:09:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:09:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:09:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:09:13.420145  543705 net.go:648] Add success.
I0323 14:09:13.423040  543705 net.go:770] primary dev: ETH0
I0323 14:09:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:09:13.423071  543705 net.go:698] Add success.
I0323 14:09:13.463838  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4e9df764-12e6-4db2-94f1-0a879ef6672b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:09:13.463871  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:09:14.454955  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:09:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:09:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 14:09:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:09:14.456512  543705 disk_worker.go:494] system disk:vda1
I0323 14:09:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:09:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:09:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:09:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:09:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:09:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:09:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:09:23.409813  543705 memory.go:184] no items to output this cycle
I0323 14:09:23.409820  543705 cpu.go:275] no items to output this cycle
E0323 14:09:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:09:33.409786  543705 memory.go:184] no items to output this cycle
I0323 14:09:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 14:09:40.333679  543705 disk_info.go:125] begin check local disk info of client
I0323 14:09:40.336211  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:09:40.336226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00052a2c0 0xc00052a300]
I0323 14:09:40.440854  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:09:40.440859  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:09:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:09:43.410612  543705 memory.go:191] Add success.
I0323 14:09:43.409799  543705 cpu.go:282] Add success.
I0323 14:09:43.420347  543705 net.go:648] Add success.
I0323 14:09:43.423027  543705 net.go:770] primary dev: ETH0
I0323 14:09:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:09:43.423053  543705 net.go:698] Add success.
I0323 14:09:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:09:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:09:46.458063  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:09:53.410271  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:09:53.410292  543705 memory.go:184] no items to output this cycle
I0323 14:09:53.410301  543705 cpu.go:275] no items to output this cycle
E0323 14:10:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:10:03.409776  543705 memory.go:184] no items to output this cycle
I0323 14:10:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 14:10:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:10:13.409826  543705 memory.go:191] Add success.
I0323 14:10:13.409836  543705 cpu.go:282] Add success.
W0323 14:10:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:10:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:10:13.409880  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:10:13.420132  543705 net.go:648] Add success.
I0323 14:10:13.422755  543705 net.go:770] primary dev: ETH0
I0323 14:10:13.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:10:13.422781  543705 net.go:698] Add success.
I0323 14:10:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:10:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:10:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 14:10:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:10:14.456613  543705 disk_worker.go:494] system disk:vda1
I0323 14:10:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:10:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:10:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:10:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:10:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:10:16.472382  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:10:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:10:23.409768  543705 memory.go:184] no items to output this cycle
I0323 14:10:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 14:10:33.409895  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:10:33.409902  543705 cpu.go:275] no items to output this cycle
I0323 14:10:33.409913  543705 memory.go:184] no items to output this cycle
I0323 14:10:40.337673  543705 disk_info.go:125] begin check local disk info of client
I0323 14:10:40.340243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:10:40.340250  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f6600 0xc0004f6640]
E0323 14:10:43.409747  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:10:43.410694  543705 memory.go:191] Add success.
I0323 14:10:43.409797  543705 cpu.go:282] Add success.
I0323 14:10:43.420460  543705 net.go:648] Add success.
I0323 14:10:43.423115  543705 net.go:770] primary dev: ETH0
I0323 14:10:43.423126  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:10:43.423138  543705 net.go:698] Add success.
I0323 14:10:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:10:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:10:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:10:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:10:53.409785  543705 memory.go:184] no items to output this cycle
I0323 14:10:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 14:11:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:11:03.409771  543705 memory.go:184] no items to output this cycle
I0323 14:11:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 14:11:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:11:13.409791  543705 memory.go:191] Add success.
I0323 14:11:13.409795  543705 cpu.go:282] Add success.
W0323 14:11:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:11:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:11:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:11:13.420112  543705 net.go:648] Add success.
I0323 14:11:13.422915  543705 net.go:770] primary dev: ETH0
I0323 14:11:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:11:13.422942  543705 net.go:698] Add success.
I0323 14:11:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:11:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:11:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 14:11:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:11:14.456554  543705 disk_worker.go:494] system disk:vda1
I0323 14:11:14.456587  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:11:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:11:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:11:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:11:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:11:16.472569  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:11:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:11:23.409806  543705 memory.go:184] no items to output this cycle
I0323 14:11:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 14:11:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:11:33.409771  543705 memory.go:184] no items to output this cycle
I0323 14:11:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 14:11:40.341670  543705 disk_info.go:125] begin check local disk info of client
I0323 14:11:40.344244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:11:40.344251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004aed80 0xc0004aedc0]
E0323 14:11:43.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:11:43.410918  543705 memory.go:191] Add success.
I0323 14:11:43.409945  543705 cpu.go:282] Add success.
I0323 14:11:43.419713  543705 net.go:648] Add success.
I0323 14:11:43.422812  543705 net.go:770] primary dev: ETH0
I0323 14:11:43.422824  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:11:43.422836  543705 net.go:698] Add success.
I0323 14:11:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:11:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:11:46.458075  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:11:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:11:53.409813  543705 memory.go:184] no items to output this cycle
I0323 14:11:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 14:12:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:12:03.409789  543705 memory.go:184] no items to output this cycle
I0323 14:12:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 14:12:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:12:13.409812  543705 memory.go:191] Add success.
I0323 14:12:13.409817  543705 cpu.go:282] Add success.
W0323 14:12:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:12:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:12:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:12:13.420083  543705 net.go:648] Add success.
I0323 14:12:13.422791  543705 net.go:770] primary dev: ETH0
I0323 14:12:13.422805  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:12:13.422817  543705 net.go:698] Add success.
I0323 14:12:13.469123  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"55b00291-d47b-48a8-b88a-55026ffab9eb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:12:13.469156  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 14:12:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:12:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 14:12:14.455214  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:12:14.455926  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:12:14.455936  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:12:14.455941  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:12:14.456823  543705 disk_worker.go:494] system disk:vda1
I0323 14:12:14.456855  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:12:15.456822  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:12:15.456830  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:12:16.457911  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:12:16.457911  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:12:16.457965  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:12:16.457985  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:12:16.472304  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:12:23.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:12:23.409770  543705 memory.go:184] no items to output this cycle
I0323 14:12:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 14:12:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:12:33.409771  543705 memory.go:184] no items to output this cycle
I0323 14:12:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 14:12:40.345676  543705 disk_info.go:125] begin check local disk info of client
I0323 14:12:40.348214  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:12:40.348220  543705 disk_info.go:196] parse disk info done, disk is : [0xc000397a80 0xc000397ac0]
I0323 14:12:40.441798  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:12:40.441802  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:12:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:12:43.410677  543705 memory.go:191] Add success.
I0323 14:12:43.409788  543705 cpu.go:282] Add success.
I0323 14:12:43.419720  543705 net.go:648] Add success.
I0323 14:12:43.422552  543705 net.go:770] primary dev: ETH0
I0323 14:12:43.422565  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:12:43.422578  543705 net.go:698] Add success.
I0323 14:12:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:12:46.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:12:46.458049  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:12:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:12:53.409817  543705 memory.go:184] no items to output this cycle
I0323 14:12:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 14:13:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:13:03.409784  543705 cpu.go:275] no items to output this cycle
I0323 14:13:03.409790  543705 memory.go:184] no items to output this cycle
E0323 14:13:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:13:13.409790  543705 memory.go:191] Add success.
I0323 14:13:13.409795  543705 cpu.go:282] Add success.
W0323 14:13:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:13:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:13:13.409833  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:13:13.420037  543705 net.go:648] Add success.
I0323 14:13:13.423272  543705 net.go:770] primary dev: ETH0
I0323 14:13:13.423286  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:13:13.423299  543705 net.go:698] Add success.
I0323 14:13:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:13:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:13:14.455208  543705 disk_worker.go:708] disk space is not compliant
W0323 14:13:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:13:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 14:13:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:13:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:13:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:13:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:13:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:13:16.472393  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:13:23.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:13:23.409824  543705 memory.go:184] no items to output this cycle
I0323 14:13:23.409834  543705 cpu.go:275] no items to output this cycle
E0323 14:13:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:13:33.409808  543705 memory.go:184] no items to output this cycle
I0323 14:13:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 14:13:40.349673  543705 disk_info.go:125] begin check local disk info of client
I0323 14:13:40.352271  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:13:40.352278  543705 disk_info.go:196] parse disk info done, disk is : [0xc000513b00 0xc000513b40]
E0323 14:13:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:13:43.410707  543705 memory.go:191] Add success.
I0323 14:13:43.409824  543705 cpu.go:282] Add success.
I0323 14:13:43.419770  543705 net.go:648] Add success.
I0323 14:13:43.422627  543705 net.go:770] primary dev: ETH0
I0323 14:13:43.422658  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:13:43.422672  543705 net.go:698] Add success.
I0323 14:13:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:13:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:13:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:13:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:13:53.409821  543705 memory.go:184] no items to output this cycle
I0323 14:13:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 14:14:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:14:03.409784  543705 memory.go:184] no items to output this cycle
I0323 14:14:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 14:14:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:14:13.409807  543705 memory.go:191] Add success.
I0323 14:14:13.409807  543705 cpu.go:282] Add success.
W0323 14:14:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:14:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:14:13.409850  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:14:13.420132  543705 net.go:648] Add success.
I0323 14:14:13.423189  543705 net.go:770] primary dev: ETH0
I0323 14:14:13.423201  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:14:13.423214  543705 net.go:698] Add success.
I0323 14:14:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:14:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:14:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 14:14:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:14:14.456496  543705 disk_worker.go:494] system disk:vda1
I0323 14:14:14.456541  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:14:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:14:16.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:14:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:14:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:14:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:14:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:14:23.409783  543705 memory.go:184] no items to output this cycle
I0323 14:14:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 14:14:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:14:33.409791  543705 memory.go:184] no items to output this cycle
I0323 14:14:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 14:14:40.353672  543705 disk_info.go:125] begin check local disk info of client
I0323 14:14:40.356202  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:14:40.356208  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032ab80 0xc00032abc0]
E0323 14:14:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:14:43.410756  543705 memory.go:191] Add success.
I0323 14:14:43.409819  543705 cpu.go:282] Add success.
I0323 14:14:43.419709  543705 net.go:648] Add success.
I0323 14:14:43.422433  543705 net.go:770] primary dev: ETH0
I0323 14:14:43.422447  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:14:43.422458  543705 net.go:698] Add success.
I0323 14:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:14:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:14:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:14:53.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:14:53.409776  543705 memory.go:184] no items to output this cycle
I0323 14:14:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 14:15:03.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:15:03.409766  543705 memory.go:184] no items to output this cycle
I0323 14:15:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 14:15:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:15:13.409785  543705 memory.go:191] Add success.
I0323 14:15:13.409801  543705 cpu.go:282] Add success.
W0323 14:15:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:15:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:15:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:15:13.420139  543705 net.go:648] Add success.
I0323 14:15:13.423025  543705 net.go:770] primary dev: ETH0
I0323 14:15:13.423038  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:15:13.423050  543705 net.go:698] Add success.
I0323 14:15:13.517109  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d8b441a7-680f-4588-afd5-ae511a2adf2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:15:13.517144  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:15:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:15:14.455154  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:15:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 14:15:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:15:14.456501  543705 disk_worker.go:494] system disk:vda1
I0323 14:15:14.456545  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:15:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:15:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:15:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:15:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:15:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:15:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:15:23.409810  543705 memory.go:184] no items to output this cycle
I0323 14:15:23.409820  543705 cpu.go:275] no items to output this cycle
E0323 14:15:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:15:33.409785  543705 memory.go:184] no items to output this cycle
I0323 14:15:33.409789  543705 cpu.go:275] no items to output this cycle
I0323 14:15:40.357677  543705 disk_info.go:125] begin check local disk info of client
I0323 14:15:40.360217  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:15:40.360224  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004ff440 0xc0004ff480]
I0323 14:15:40.442757  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:15:40.442762  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:15:43.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:15:43.410813  543705 memory.go:191] Add success.
I0323 14:15:43.410012  543705 cpu.go:282] Add success.
I0323 14:15:43.419729  543705 net.go:648] Add success.
I0323 14:15:43.422412  543705 net.go:770] primary dev: ETH0
I0323 14:15:43.422425  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:15:43.422438  543705 net.go:698] Add success.
I0323 14:15:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:15:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:15:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:15:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:15:53.409793  543705 memory.go:184] no items to output this cycle
I0323 14:15:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 14:16:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:16:03.409772  543705 memory.go:184] no items to output this cycle
I0323 14:16:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 14:16:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:16:13.409824  543705 memory.go:191] Add success.
I0323 14:16:13.409831  543705 cpu.go:282] Add success.
W0323 14:16:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:16:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:16:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:16:13.420127  543705 net.go:648] Add success.
I0323 14:16:13.422758  543705 net.go:770] primary dev: ETH0
I0323 14:16:13.422773  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:16:13.422788  543705 net.go:698] Add success.
I0323 14:16:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:16:14.455178  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:16:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 14:16:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:16:14.456532  543705 disk_worker.go:494] system disk:vda1
I0323 14:16:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:16:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:16:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:16:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:16:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:16:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:16:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:16:23.409778  543705 memory.go:184] no items to output this cycle
I0323 14:16:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 14:16:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:16:33.409805  543705 memory.go:184] no items to output this cycle
I0323 14:16:33.409812  543705 cpu.go:275] no items to output this cycle
I0323 14:16:40.361673  543705 disk_info.go:125] begin check local disk info of client
I0323 14:16:40.364269  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:16:40.364276  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bd780 0xc0002bd7c0]
E0323 14:16:43.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:16:43.410825  543705 memory.go:191] Add success.
I0323 14:16:43.409996  543705 cpu.go:282] Add success.
I0323 14:16:43.419713  543705 net.go:648] Add success.
I0323 14:16:43.422297  543705 net.go:770] primary dev: ETH0
I0323 14:16:43.422312  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:16:43.422327  543705 net.go:698] Add success.
I0323 14:16:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:16:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:16:46.458071  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:16:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:16:53.409776  543705 memory.go:184] no items to output this cycle
I0323 14:16:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 14:17:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:17:03.409804  543705 memory.go:184] no items to output this cycle
I0323 14:17:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 14:17:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:17:13.409809  543705 memory.go:191] Add success.
I0323 14:17:13.409811  543705 cpu.go:282] Add success.
W0323 14:17:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:17:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:17:13.409852  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:17:13.420138  543705 net.go:648] Add success.
I0323 14:17:13.422813  543705 net.go:770] primary dev: ETH0
I0323 14:17:13.422827  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:17:13.422839  543705 net.go:698] Add success.
I0323 14:17:13.453484  543705 event_worker.go:152] Polling the log file for events...
W0323 14:17:14.455179  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:17:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 14:17:14.455191  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:17:14.455902  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:17:14.455911  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:17:14.455917  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:17:14.456549  543705 disk_worker.go:494] system disk:vda1
I0323 14:17:14.456578  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:17:15.456796  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:17:15.456806  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:17:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:17:16.457962  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:17:16.458017  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:17:16.458036  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:17:16.472380  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:17:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:17:23.409797  543705 memory.go:184] no items to output this cycle
I0323 14:17:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 14:17:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:17:33.409787  543705 memory.go:184] no items to output this cycle
I0323 14:17:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 14:17:40.365675  543705 disk_info.go:125] begin check local disk info of client
I0323 14:17:40.368230  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:17:40.368237  543705 disk_info.go:196] parse disk info done, disk is : [0xc00051c2c0 0xc00051c300]
I0323 14:17:43.409932  543705 cpu.go:282] Add success.
E0323 14:17:43.410054  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:17:43.410771  543705 memory.go:191] Add success.
I0323 14:17:43.419747  543705 net.go:648] Add success.
I0323 14:17:43.422921  543705 net.go:770] primary dev: ETH0
I0323 14:17:43.422933  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:17:43.422945  543705 net.go:698] Add success.
I0323 14:17:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:17:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:17:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:17:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:17:53.409783  543705 memory.go:184] no items to output this cycle
I0323 14:17:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 14:18:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:18:03.409768  543705 memory.go:184] no items to output this cycle
I0323 14:18:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 14:18:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:18:13.409815  543705 memory.go:191] Add success.
I0323 14:18:13.409822  543705 cpu.go:282] Add success.
W0323 14:18:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:18:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:18:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:18:13.420236  543705 net.go:648] Add success.
I0323 14:18:13.422980  543705 net.go:770] primary dev: ETH0
I0323 14:18:13.422993  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:18:13.423005  543705 net.go:698] Add success.
I0323 14:18:13.463371  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2df3a176-c57b-4a9e-befa-529125651e95","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:18:13.463408  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:18:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:18:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:18:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 14:18:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:18:14.456664  543705 disk_worker.go:494] system disk:vda1
I0323 14:18:14.456695  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:18:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:18:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:18:16.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:18:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:18:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:18:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:18:23.409785  543705 memory.go:184] no items to output this cycle
I0323 14:18:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 14:18:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:18:33.409782  543705 memory.go:184] no items to output this cycle
I0323 14:18:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 14:18:40.369683  543705 disk_info.go:125] begin check local disk info of client
I0323 14:18:40.372318  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:18:40.372326  543705 disk_info.go:196] parse disk info done, disk is : [0xc00035ed40 0xc00035ed80]
I0323 14:18:40.443772  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:18:40.443778  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:18:43.409948  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:18:43.410707  543705 memory.go:191] Add success.
I0323 14:18:43.410018  543705 cpu.go:282] Add success.
I0323 14:18:43.419750  543705 net.go:648] Add success.
I0323 14:18:43.422297  543705 net.go:770] primary dev: ETH0
I0323 14:18:43.422310  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:18:43.422323  543705 net.go:698] Add success.
I0323 14:18:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:18:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:18:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:18:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:18:53.409801  543705 cpu.go:275] no items to output this cycle
I0323 14:18:53.409808  543705 memory.go:184] no items to output this cycle
E0323 14:19:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:19:03.409801  543705 memory.go:184] no items to output this cycle
I0323 14:19:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 14:19:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:19:13.409795  543705 memory.go:191] Add success.
W0323 14:19:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:19:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:19:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:19:13.409870  543705 cpu.go:282] Add success.
I0323 14:19:13.420316  543705 net.go:648] Add success.
I0323 14:19:13.423315  543705 net.go:770] primary dev: ETH0
I0323 14:19:13.423334  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:19:13.423352  543705 net.go:698] Add success.
I0323 14:19:14.453939  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:19:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:19:14.455305  543705 disk_worker.go:708] disk space is not compliant
W0323 14:19:14.455310  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:19:14.457304  543705 disk_worker.go:494] system disk:vda1
I0323 14:19:14.457344  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:19:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:19:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:19:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:19:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:19:16.472455  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:19:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:19:23.409820  543705 memory.go:184] no items to output this cycle
I0323 14:19:23.409832  543705 cpu.go:275] no items to output this cycle
E0323 14:19:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:19:33.409807  543705 memory.go:184] no items to output this cycle
I0323 14:19:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 14:19:40.373686  543705 disk_info.go:125] begin check local disk info of client
I0323 14:19:40.376355  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:19:40.376362  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a5d40 0xc0004a5d80]
E0323 14:19:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:19:43.410649  543705 memory.go:191] Add success.
I0323 14:19:43.409843  543705 cpu.go:282] Add success.
I0323 14:19:43.420359  543705 net.go:648] Add success.
I0323 14:19:43.423005  543705 net.go:770] primary dev: ETH0
I0323 14:19:43.423018  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:19:43.423030  543705 net.go:698] Add success.
I0323 14:19:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:19:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:19:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:19:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:19:53.409797  543705 memory.go:184] no items to output this cycle
I0323 14:19:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 14:20:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:20:03.409809  543705 memory.go:184] no items to output this cycle
I0323 14:20:03.409820  543705 cpu.go:275] no items to output this cycle
I0323 14:20:13.409817  543705 cpu.go:282] Add success.
E0323 14:20:13.410107  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:20:13.410127  543705 memory.go:191] Add success.
W0323 14:20:13.410154  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:20:13.410165  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:20:13.410167  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:20:13.419858  543705 net.go:648] Add success.
I0323 14:20:13.420900  543705 net.go:770] primary dev: ETH0
I0323 14:20:13.420914  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:20:13.420926  543705 net.go:698] Add success.
I0323 14:20:14.453964  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:20:14.454332  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:20:14.454421  543705 disk_worker.go:708] disk space is not compliant
W0323 14:20:14.454426  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:20:14.457727  543705 disk_worker.go:494] system disk:vda1
I0323 14:20:14.457773  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:20:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:20:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:20:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:20:16.458090  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:20:16.472487  543705 disk_local_worker.go:436] Get disk info: []
I0323 14:20:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 14:20:23.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:20:23.409838  543705 memory.go:184] no items to output this cycle
E0323 14:20:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:20:33.409807  543705 memory.go:184] no items to output this cycle
I0323 14:20:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 14:20:40.377673  543705 disk_info.go:125] begin check local disk info of client
I0323 14:20:40.380234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:20:40.380240  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c9980 0xc0003c99c0]
E0323 14:20:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:20:43.410762  543705 memory.go:191] Add success.
I0323 14:20:43.409809  543705 cpu.go:282] Add success.
I0323 14:20:43.420555  543705 net.go:648] Add success.
I0323 14:20:43.423269  543705 net.go:770] primary dev: ETH0
I0323 14:20:43.423282  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:20:43.423294  543705 net.go:698] Add success.
I0323 14:20:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:20:46.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:20:46.458109  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:20:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:20:53.409823  543705 memory.go:184] no items to output this cycle
I0323 14:20:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 14:21:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:21:03.409791  543705 memory.go:184] no items to output this cycle
I0323 14:21:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 14:21:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:21:13.409812  543705 memory.go:191] Add success.
I0323 14:21:13.409822  543705 cpu.go:282] Add success.
W0323 14:21:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:21:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:21:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:21:13.420135  543705 net.go:648] Add success.
I0323 14:21:13.422830  543705 net.go:770] primary dev: ETH0
I0323 14:21:13.422843  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:21:13.422856  543705 net.go:698] Add success.
I0323 14:21:13.468938  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"403c38fb-780b-4829-abbe-cfddd6ee47cc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:21:13.468974  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:21:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:21:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:21:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 14:21:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:21:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 14:21:14.456668  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:21:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:21:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:21:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:21:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:21:16.472468  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:21:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:21:23.409783  543705 memory.go:184] no items to output this cycle
I0323 14:21:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 14:21:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:21:33.409801  543705 memory.go:184] no items to output this cycle
I0323 14:21:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 14:21:40.381677  543705 disk_info.go:125] begin check local disk info of client
I0323 14:21:40.384245  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:21:40.384255  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004af800 0xc0004af840]
I0323 14:21:40.444664  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:21:40.444669  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:21:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:21:43.410631  543705 memory.go:191] Add success.
I0323 14:21:43.409832  543705 cpu.go:282] Add success.
I0323 14:21:43.420561  543705 net.go:648] Add success.
I0323 14:21:43.423256  543705 net.go:770] primary dev: ETH0
I0323 14:21:43.423269  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:21:43.423281  543705 net.go:698] Add success.
I0323 14:21:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:21:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:21:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:21:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:21:53.409810  543705 memory.go:184] no items to output this cycle
I0323 14:21:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 14:22:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:22:03.409787  543705 cpu.go:275] no items to output this cycle
I0323 14:22:03.409789  543705 memory.go:184] no items to output this cycle
E0323 14:22:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:22:13.409807  543705 memory.go:191] Add success.
I0323 14:22:13.409815  543705 cpu.go:282] Add success.
W0323 14:22:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:22:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:22:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:22:13.420142  543705 net.go:648] Add success.
I0323 14:22:13.422973  543705 net.go:770] primary dev: ETH0
I0323 14:22:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:22:13.423014  543705 net.go:698] Add success.
W0323 14:22:14.455122  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:22:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 14:22:14.455187  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:22:14.456806  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:22:14.456816  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:22:14.456822  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:22:14.456869  543705 disk_worker.go:494] system disk:vda1
I0323 14:22:14.456915  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:22:15.456817  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:22:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:22:16.457904  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:22:16.457902  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:22:16.457960  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:22:16.457979  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:22:16.472319  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:22:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:22:23.409779  543705 memory.go:184] no items to output this cycle
I0323 14:22:23.409816  543705 cpu.go:275] no items to output this cycle
I0323 14:22:33.409866  543705 cpu.go:275] no items to output this cycle
E0323 14:22:33.409993  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:22:33.410015  543705 memory.go:184] no items to output this cycle
I0323 14:22:40.385677  543705 disk_info.go:125] begin check local disk info of client
I0323 14:22:40.388318  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:22:40.388325  543705 disk_info.go:196] parse disk info done, disk is : [0xc000369740 0xc000369780]
E0323 14:22:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:22:43.409810  543705 cpu.go:282] Add success.
I0323 14:22:43.410966  543705 memory.go:191] Add success.
I0323 14:22:43.419787  543705 net.go:648] Add success.
I0323 14:22:43.422742  543705 net.go:770] primary dev: ETH0
I0323 14:22:43.422755  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:22:43.422768  543705 net.go:698] Add success.
I0323 14:22:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:22:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:22:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:22:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:22:53.409793  543705 cpu.go:275] no items to output this cycle
I0323 14:22:53.409808  543705 memory.go:184] no items to output this cycle
E0323 14:23:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:23:03.409792  543705 cpu.go:275] no items to output this cycle
I0323 14:23:03.409803  543705 memory.go:184] no items to output this cycle
E0323 14:23:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:23:13.409834  543705 memory.go:191] Add success.
I0323 14:23:13.409835  543705 cpu.go:282] Add success.
W0323 14:23:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:23:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:23:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:23:13.420188  543705 net.go:648] Add success.
I0323 14:23:13.422822  543705 net.go:770] primary dev: ETH0
I0323 14:23:13.422837  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:23:13.422851  543705 net.go:698] Add success.
I0323 14:23:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:23:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:23:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 14:23:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:23:14.456547  543705 disk_worker.go:494] system disk:vda1
I0323 14:23:14.456591  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:23:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:23:16.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:23:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:23:16.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:23:16.472441  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:23:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:23:23.409789  543705 memory.go:184] no items to output this cycle
I0323 14:23:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 14:23:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:23:33.409778  543705 memory.go:184] no items to output this cycle
I0323 14:23:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 14:23:40.392070  543705 disk_info.go:125] begin check local disk info of client
I0323 14:23:40.394718  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:23:40.394726  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e800 0xc00049e840]
E0323 14:23:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:23:43.410751  543705 memory.go:191] Add success.
I0323 14:23:43.409795  543705 cpu.go:282] Add success.
I0323 14:23:43.420579  543705 net.go:648] Add success.
I0323 14:23:43.423346  543705 net.go:770] primary dev: ETH0
I0323 14:23:43.423362  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:23:43.423375  543705 net.go:698] Add success.
I0323 14:23:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:23:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:23:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:23:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:23:53.409797  543705 memory.go:184] no items to output this cycle
I0323 14:23:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 14:24:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:24:03.409782  543705 memory.go:184] no items to output this cycle
I0323 14:24:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 14:24:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:24:13.409796  543705 memory.go:191] Add success.
I0323 14:24:13.409815  543705 cpu.go:282] Add success.
W0323 14:24:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:24:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:24:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:24:13.420106  543705 net.go:648] Add success.
I0323 14:24:13.422628  543705 net.go:770] primary dev: ETH0
I0323 14:24:13.422642  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:24:13.422655  543705 net.go:698] Add success.
I0323 14:24:13.468333  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ab352947-bb52-4296-acf2-0df911f98d98","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:24:13.468367  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:24:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:24:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:24:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 14:24:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:24:14.456654  543705 disk_worker.go:494] system disk:vda1
I0323 14:24:14.456685  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:24:15.455975  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:24:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:24:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:24:16.458069  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:24:16.472444  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:24:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:24:23.409785  543705 memory.go:184] no items to output this cycle
I0323 14:24:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 14:24:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:24:33.409803  543705 memory.go:184] no items to output this cycle
I0323 14:24:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 14:24:40.397684  543705 disk_info.go:125] begin check local disk info of client
I0323 14:24:40.400248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:24:40.400262  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a6bc0 0xc0004a6c00]
I0323 14:24:40.445679  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:24:40.445684  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:24:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:24:43.410930  543705 memory.go:191] Add success.
I0323 14:24:43.409831  543705 cpu.go:282] Add success.
I0323 14:24:43.421057  543705 net.go:648] Add success.
I0323 14:24:43.423974  543705 net.go:770] primary dev: ETH0
I0323 14:24:43.423987  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:24:43.423999  543705 net.go:698] Add success.
I0323 14:24:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:24:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:24:46.458107  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:24:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:24:53.409812  543705 memory.go:184] no items to output this cycle
I0323 14:24:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 14:25:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:25:03.409796  543705 cpu.go:275] no items to output this cycle
I0323 14:25:03.409807  543705 memory.go:184] no items to output this cycle
W0323 14:25:13.409712  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:25:13.409729  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:25:13.409734  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 14:25:13.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:25:13.409826  543705 memory.go:191] Add success.
I0323 14:25:13.409834  543705 cpu.go:282] Add success.
I0323 14:25:13.420422  543705 net.go:648] Add success.
I0323 14:25:13.423290  543705 net.go:770] primary dev: ETH0
I0323 14:25:13.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:25:13.423328  543705 net.go:698] Add success.
I0323 14:25:14.453950  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:25:14.455296  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:25:14.455320  543705 disk_worker.go:708] disk space is not compliant
W0323 14:25:14.455325  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:25:14.457938  543705 disk_worker.go:494] system disk:vda1
I0323 14:25:14.457983  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:25:15.455998  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:25:16.458030  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:25:16.458106  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:25:16.458141  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:25:16.472643  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:25:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:25:23.409813  543705 memory.go:184] no items to output this cycle
I0323 14:25:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 14:25:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:25:33.409781  543705 memory.go:184] no items to output this cycle
I0323 14:25:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 14:25:40.401685  543705 disk_info.go:125] begin check local disk info of client
I0323 14:25:40.404366  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:25:40.404373  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a1a00 0xc0004a1a40]
E0323 14:25:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:25:43.410897  543705 memory.go:191] Add success.
I0323 14:25:43.409839  543705 cpu.go:282] Add success.
I0323 14:25:43.420803  543705 net.go:648] Add success.
I0323 14:25:43.423606  543705 net.go:770] primary dev: ETH0
I0323 14:25:43.423619  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:25:43.423630  543705 net.go:698] Add success.
I0323 14:25:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:25:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:25:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:25:53.410553  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:25:53.410585  543705 memory.go:184] no items to output this cycle
I0323 14:25:53.410589  543705 cpu.go:275] no items to output this cycle
E0323 14:26:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:26:03.409793  543705 memory.go:184] no items to output this cycle
I0323 14:26:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 14:26:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:26:13.409826  543705 memory.go:191] Add success.
I0323 14:26:13.409827  543705 cpu.go:282] Add success.
W0323 14:26:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:26:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:26:13.409870  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:26:13.420173  543705 net.go:648] Add success.
I0323 14:26:13.422738  543705 net.go:770] primary dev: ETH0
I0323 14:26:13.422751  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:26:13.422763  543705 net.go:698] Add success.
I0323 14:26:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:26:14.455146  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:26:14.455157  543705 disk_worker.go:708] disk space is not compliant
W0323 14:26:14.455160  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:26:14.456524  543705 disk_worker.go:494] system disk:vda1
I0323 14:26:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:26:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:26:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:26:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:26:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:26:16.472420  543705 disk_local_worker.go:436] Get disk info: []
I0323 14:26:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 14:26:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:26:23.409817  543705 memory.go:184] no items to output this cycle
E0323 14:26:33.409859  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:26:33.409880  543705 memory.go:184] no items to output this cycle
I0323 14:26:33.409928  543705 cpu.go:275] no items to output this cycle
I0323 14:26:40.405678  543705 disk_info.go:125] begin check local disk info of client
I0323 14:26:40.408290  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:26:40.408298  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d2000 0xc0004d2040]
E0323 14:26:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:26:43.410674  543705 memory.go:191] Add success.
I0323 14:26:43.409803  543705 cpu.go:282] Add success.
I0323 14:26:43.420469  543705 net.go:648] Add success.
I0323 14:26:43.423174  543705 net.go:770] primary dev: ETH0
I0323 14:26:43.423188  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:26:43.423201  543705 net.go:698] Add success.
I0323 14:26:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:26:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:26:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:26:53.409807  543705 memory.go:184] no items to output this cycle
I0323 14:26:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 14:27:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:27:03.409782  543705 memory.go:184] no items to output this cycle
I0323 14:27:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 14:27:13.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:27:13.409833  543705 memory.go:191] Add success.
I0323 14:27:13.409838  543705 cpu.go:282] Add success.
W0323 14:27:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:27:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:27:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:27:13.420183  543705 net.go:648] Add success.
I0323 14:27:13.429549  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 14:27:13.429630  543705 net.go:770] primary dev: ETH0
I0323 14:27:13.429663  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:27:13.429677  543705 net.go:698] Add success.
I0323 14:27:13.453260  543705 event_worker.go:152] Polling the log file for events...
I0323 14:27:13.468613  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"1284e940-9216-448c-8834-a4bff10ee90a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:27:13.468668  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 14:27:14.455370  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:27:14.455392  543705 disk_worker.go:708] disk space is not compliant
W0323 14:27:14.455396  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:27:14.456460  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:27:14.456470  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:27:14.456476  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:27:14.457209  543705 disk_worker.go:494] system disk:vda1
I0323 14:27:14.457249  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:27:15.456718  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:27:15.456733  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:27:16.458098  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:27:16.458162  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 14:27:16.458177  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:27:16.458185  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:27:16.472554  543705 disk_local_worker.go:436] Get disk info: []
I0323 14:27:23.409855  543705 cpu.go:275] no items to output this cycle
E0323 14:27:23.409980  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:27:23.409996  543705 memory.go:184] no items to output this cycle
E0323 14:27:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:27:33.409810  543705 memory.go:184] no items to output this cycle
I0323 14:27:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 14:27:40.409687  543705 disk_info.go:125] begin check local disk info of client
I0323 14:27:40.412237  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:27:40.412243  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a0200 0xc0004a0240]
I0323 14:27:40.446512  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:27:40.446517  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:27:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:27:43.410693  543705 memory.go:191] Add success.
I0323 14:27:43.409816  543705 cpu.go:282] Add success.
I0323 14:27:43.420383  543705 net.go:648] Add success.
I0323 14:27:43.423411  543705 net.go:770] primary dev: ETH0
I0323 14:27:43.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:27:43.423437  543705 net.go:698] Add success.
I0323 14:27:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:27:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:27:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:27:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:27:53.409791  543705 memory.go:184] no items to output this cycle
I0323 14:27:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 14:28:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:28:03.409768  543705 memory.go:184] no items to output this cycle
I0323 14:28:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 14:28:13.409940  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:28:13.409980  543705 memory.go:191] Add success.
W0323 14:28:13.410010  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:28:13.410022  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:28:13.410025  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:28:13.410029  543705 cpu.go:282] Add success.
I0323 14:28:13.419741  543705 net.go:648] Add success.
I0323 14:28:13.422746  543705 net.go:770] primary dev: ETH0
I0323 14:28:13.422760  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:28:13.422771  543705 net.go:698] Add success.
I0323 14:28:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:28:14.455225  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:28:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0323 14:28:14.455242  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:28:14.456678  543705 disk_worker.go:494] system disk:vda1
I0323 14:28:14.456714  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:28:15.455043  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:28:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:28:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:28:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:28:16.472506  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:28:23.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:28:23.409834  543705 memory.go:184] no items to output this cycle
I0323 14:28:23.409853  543705 cpu.go:275] no items to output this cycle
E0323 14:28:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:28:33.409797  543705 memory.go:184] no items to output this cycle
I0323 14:28:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 14:28:40.412842  543705 disk_info.go:125] begin check local disk info of client
I0323 14:28:40.415610  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:28:40.415617  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b600 0xc00007b640]
E0323 14:28:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:28:43.409803  543705 memory.go:191] Add success.
I0323 14:28:43.409849  543705 cpu.go:282] Add success.
I0323 14:28:43.420553  543705 net.go:648] Add success.
I0323 14:28:43.421525  543705 net.go:770] primary dev: ETH0
I0323 14:28:43.421538  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:28:43.421557  543705 net.go:698] Add success.
I0323 14:28:46.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:28:46.458085  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:28:46.458115  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:28:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:28:53.409814  543705 memory.go:184] no items to output this cycle
I0323 14:28:53.409829  543705 cpu.go:275] no items to output this cycle
E0323 14:29:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:29:03.409828  543705 memory.go:184] no items to output this cycle
I0323 14:29:03.409832  543705 cpu.go:275] no items to output this cycle
E0323 14:29:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:29:13.409829  543705 memory.go:191] Add success.
I0323 14:29:13.409842  543705 cpu.go:282] Add success.
W0323 14:29:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:29:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:29:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:29:13.420293  543705 net.go:648] Add success.
I0323 14:29:13.423285  543705 net.go:770] primary dev: ETH0
I0323 14:29:13.423299  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:29:13.423311  543705 net.go:698] Add success.
W0323 14:29:14.455215  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:29:14.455311  543705 disk_worker.go:708] disk space is not compliant
W0323 14:29:14.455316  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:29:14.455777  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:29:14.457299  543705 disk_worker.go:494] system disk:vda1
I0323 14:29:14.457344  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:29:15.455990  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:29:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:29:16.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:29:16.458118  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:29:16.472554  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:29:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:29:23.409787  543705 memory.go:184] no items to output this cycle
I0323 14:29:23.409800  543705 cpu.go:275] no items to output this cycle
E0323 14:29:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:29:33.409817  543705 memory.go:184] no items to output this cycle
I0323 14:29:33.409832  543705 cpu.go:275] no items to output this cycle
I0323 14:29:40.415811  543705 disk_info.go:125] begin check local disk info of client
I0323 14:29:40.418462  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:29:40.418470  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002424c0 0xc000242500]
E0323 14:29:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:29:43.410662  543705 memory.go:191] Add success.
I0323 14:29:43.409839  543705 cpu.go:282] Add success.
I0323 14:29:43.420468  543705 net.go:648] Add success.
I0323 14:29:43.423104  543705 net.go:770] primary dev: ETH0
I0323 14:29:43.423117  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:29:43.423130  543705 net.go:698] Add success.
I0323 14:29:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:29:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:29:46.458117  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:29:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:29:53.409797  543705 memory.go:184] no items to output this cycle
I0323 14:29:53.409835  543705 cpu.go:275] no items to output this cycle
E0323 14:30:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:30:03.409814  543705 memory.go:184] no items to output this cycle
I0323 14:30:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 14:30:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:30:13.409803  543705 memory.go:191] Add success.
I0323 14:30:13.409806  543705 cpu.go:282] Add success.
W0323 14:30:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:30:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:30:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:30:13.420054  543705 net.go:648] Add success.
I0323 14:30:13.423156  543705 net.go:770] primary dev: ETH0
I0323 14:30:13.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:30:13.423181  543705 net.go:698] Add success.
I0323 14:30:13.468849  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f93613ce-6ba9-4fd9-8b44-a0063e8b4bb9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:30:13.468891  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 14:30:14.456257  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:30:14.456355  543705 disk_worker.go:708] disk space is not compliant
W0323 14:30:14.456360  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:30:14.456817  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:30:14.516083  543705 disk_worker.go:494] system disk:vda1
I0323 14:30:14.516147  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:30:15.456003  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:30:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:30:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:30:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:30:16.472478  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:30:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:30:23.409801  543705 cpu.go:275] no items to output this cycle
I0323 14:30:23.409831  543705 memory.go:184] no items to output this cycle
E0323 14:30:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:30:33.409812  543705 memory.go:184] no items to output this cycle
I0323 14:30:33.409834  543705 cpu.go:275] no items to output this cycle
I0323 14:30:40.418841  543705 disk_info.go:125] begin check local disk info of client
I0323 14:30:40.421551  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:30:40.421559  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2200 0xc0004f2240]
I0323 14:30:40.446775  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:30:40.446780  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:30:43.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:30:43.410753  543705 memory.go:191] Add success.
I0323 14:30:43.409880  543705 cpu.go:282] Add success.
I0323 14:30:43.420631  543705 net.go:648] Add success.
I0323 14:30:43.423389  543705 net.go:770] primary dev: ETH0
I0323 14:30:43.423404  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:30:43.423418  543705 net.go:698] Add success.
I0323 14:30:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:30:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:30:46.458107  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:30:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:30:53.409806  543705 memory.go:184] no items to output this cycle
I0323 14:30:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 14:31:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:31:03.409814  543705 memory.go:184] no items to output this cycle
I0323 14:31:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 14:31:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:31:13.409815  543705 memory.go:191] Add success.
W0323 14:31:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:31:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:31:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:31:13.409859  543705 cpu.go:282] Add success.
I0323 14:31:13.420291  543705 net.go:648] Add success.
I0323 14:31:13.421270  543705 net.go:770] primary dev: ETH0
I0323 14:31:13.421283  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:31:13.421295  543705 net.go:698] Add success.
I0323 14:31:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:31:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:31:14.455215  543705 disk_worker.go:708] disk space is not compliant
W0323 14:31:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:31:14.456685  543705 disk_worker.go:494] system disk:vda1
I0323 14:31:14.456719  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:31:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:31:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:31:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:31:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:31:16.472543  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:31:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:31:23.409816  543705 memory.go:184] no items to output this cycle
I0323 14:31:23.409825  543705 cpu.go:275] no items to output this cycle
E0323 14:31:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:31:33.409802  543705 memory.go:184] no items to output this cycle
I0323 14:31:33.409841  543705 cpu.go:275] no items to output this cycle
I0323 14:31:40.421808  543705 disk_info.go:125] begin check local disk info of client
I0323 14:31:40.424472  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:31:40.424479  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f3ec0 0xc0004f3f00]
E0323 14:31:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:31:43.410664  543705 memory.go:191] Add success.
I0323 14:31:43.409851  543705 cpu.go:282] Add success.
I0323 14:31:43.420842  543705 net.go:648] Add success.
I0323 14:31:43.423588  543705 net.go:770] primary dev: ETH0
I0323 14:31:43.423605  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:31:43.423623  543705 net.go:698] Add success.
I0323 14:31:46.458013  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:31:46.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:31:46.458144  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:31:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:31:53.409821  543705 memory.go:184] no items to output this cycle
I0323 14:31:53.409830  543705 cpu.go:275] no items to output this cycle
E0323 14:32:03.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:32:03.409800  543705 memory.go:184] no items to output this cycle
I0323 14:32:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 14:32:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:32:13.409823  543705 memory.go:191] Add success.
I0323 14:32:13.409830  543705 cpu.go:282] Add success.
W0323 14:32:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:32:13.412610  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:32:13.412615  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:32:13.420312  543705 net.go:648] Add success.
I0323 14:32:13.422240  543705 net.go:770] primary dev: ETH0
I0323 14:32:13.422256  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:32:13.422269  543705 net.go:698] Add success.
W0323 14:32:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:32:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 14:32:14.455215  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:32:14.456102  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:32:14.456112  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:32:14.456118  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:32:14.456632  543705 disk_worker.go:494] system disk:vda1
I0323 14:32:14.456664  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:32:15.457053  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:32:15.457067  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:32:16.458117  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:32:16.458195  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 14:32:16.458193  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:32:16.458216  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:32:16.472677  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:32:23.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:32:23.409820  543705 memory.go:184] no items to output this cycle
I0323 14:32:23.409829  543705 cpu.go:275] no items to output this cycle
E0323 14:32:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:32:33.409784  543705 memory.go:184] no items to output this cycle
I0323 14:32:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 14:32:40.424865  543705 disk_info.go:125] begin check local disk info of client
I0323 14:32:40.427500  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:32:40.427507  543705 disk_info.go:196] parse disk info done, disk is : [0xc000467780 0xc0004677c0]
E0323 14:32:43.409880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:32:43.410863  543705 memory.go:191] Add success.
I0323 14:32:43.409964  543705 cpu.go:282] Add success.
I0323 14:32:43.419767  543705 net.go:648] Add success.
I0323 14:32:43.422410  543705 net.go:770] primary dev: ETH0
I0323 14:32:43.422425  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:32:43.422439  543705 net.go:698] Add success.
I0323 14:32:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:32:46.458087  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:32:46.458118  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:32:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:32:53.409783  543705 memory.go:184] no items to output this cycle
I0323 14:32:53.409841  543705 cpu.go:275] no items to output this cycle
E0323 14:33:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:33:03.409818  543705 memory.go:184] no items to output this cycle
I0323 14:33:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 14:33:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:33:13.409794  543705 memory.go:191] Add success.
W0323 14:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 14:33:13.409828  543705 cpu.go:282] Add success.
W0323 14:33:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:33:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:33:13.420119  543705 net.go:648] Add success.
I0323 14:33:13.422894  543705 net.go:770] primary dev: ETH0
I0323 14:33:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:33:13.422919  543705 net.go:698] Add success.
I0323 14:33:13.469334  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"590c131a-4414-41f9-ac41-cabe8308fa6e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:33:13.469368  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:33:14.454978  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:33:14.455127  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:33:14.455209  543705 disk_worker.go:708] disk space is not compliant
W0323 14:33:14.455212  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:33:14.456643  543705 disk_worker.go:494] system disk:vda1
I0323 14:33:14.456676  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:33:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:33:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:33:16.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:33:16.458114  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:33:16.472549  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:33:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:33:23.409794  543705 memory.go:184] no items to output this cycle
I0323 14:33:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 14:33:33.409898  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:33:33.409920  543705 cpu.go:275] no items to output this cycle
I0323 14:33:33.409920  543705 memory.go:184] no items to output this cycle
I0323 14:33:40.427854  543705 disk_info.go:125] begin check local disk info of client
I0323 14:33:40.430652  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:33:40.430661  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536000 0xc000536040]
I0323 14:33:40.447833  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:33:40.447838  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:33:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:33:43.410812  543705 memory.go:191] Add success.
I0323 14:33:43.409899  543705 cpu.go:282] Add success.
I0323 14:33:43.420668  543705 net.go:648] Add success.
I0323 14:33:43.423533  543705 net.go:770] primary dev: ETH0
I0323 14:33:43.423549  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:33:43.423564  543705 net.go:698] Add success.
I0323 14:33:46.458026  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:33:46.458118  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:33:46.458151  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:33:53.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:33:53.409789  543705 memory.go:184] no items to output this cycle
I0323 14:33:53.409849  543705 cpu.go:275] no items to output this cycle
E0323 14:34:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:34:03.409821  543705 memory.go:184] no items to output this cycle
I0323 14:34:03.409833  543705 cpu.go:275] no items to output this cycle
E0323 14:34:13.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:34:13.410092  543705 cpu.go:282] Add success.
I0323 14:34:13.410362  543705 memory.go:191] Add success.
W0323 14:34:13.412868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:34:13.412882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:34:13.412885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:34:13.420736  543705 net.go:648] Add success.
I0323 14:34:13.422901  543705 net.go:770] primary dev: ETH0
I0323 14:34:13.422918  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:34:13.422932  543705 net.go:698] Add success.
I0323 14:34:14.455006  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:34:14.455231  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:34:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 14:34:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:34:14.456825  543705 disk_worker.go:494] system disk:vda1
I0323 14:34:14.456875  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:34:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:34:16.458035  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:34:16.458130  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:34:16.458161  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:34:16.472851  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:34:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:34:23.409803  543705 memory.go:184] no items to output this cycle
I0323 14:34:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 14:34:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:34:33.409809  543705 memory.go:184] no items to output this cycle
I0323 14:34:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 14:34:40.430872  543705 disk_info.go:125] begin check local disk info of client
I0323 14:34:40.433536  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:34:40.433544  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 14:34:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:34:43.410687  543705 memory.go:191] Add success.
I0323 14:34:43.409910  543705 cpu.go:282] Add success.
I0323 14:34:43.420469  543705 net.go:648] Add success.
I0323 14:34:43.423391  543705 net.go:770] primary dev: ETH0
I0323 14:34:43.423406  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:34:43.423420  543705 net.go:698] Add success.
I0323 14:34:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:34:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:34:46.458109  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:34:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:34:53.409812  543705 memory.go:184] no items to output this cycle
I0323 14:34:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 14:35:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:35:03.409816  543705 memory.go:184] no items to output this cycle
I0323 14:35:03.409827  543705 cpu.go:275] no items to output this cycle
I0323 14:35:13.409865  543705 cpu.go:282] Add success.
E0323 14:35:13.410239  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:35:13.410264  543705 memory.go:191] Add success.
W0323 14:35:13.410295  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:35:13.410311  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:35:13.410315  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:35:13.420596  543705 net.go:648] Add success.
I0323 14:35:13.421753  543705 net.go:770] primary dev: ETH0
I0323 14:35:13.421772  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:35:13.421790  543705 net.go:698] Add success.
I0323 14:35:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:35:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:35:14.455214  543705 disk_worker.go:708] disk space is not compliant
W0323 14:35:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:35:14.456658  543705 disk_worker.go:494] system disk:vda1
I0323 14:35:14.456693  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:35:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:35:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:35:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:35:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:35:16.472461  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:35:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:35:23.409944  543705 memory.go:184] no items to output this cycle
I0323 14:35:23.410002  543705 cpu.go:275] no items to output this cycle
E0323 14:35:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:35:33.409816  543705 memory.go:184] no items to output this cycle
I0323 14:35:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 14:35:40.433886  543705 disk_info.go:125] begin check local disk info of client
I0323 14:35:40.436496  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:35:40.436503  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 14:35:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:35:43.410599  543705 memory.go:191] Add success.
I0323 14:35:43.409807  543705 cpu.go:282] Add success.
I0323 14:35:43.420344  543705 net.go:648] Add success.
I0323 14:35:43.422929  543705 net.go:770] primary dev: ETH0
I0323 14:35:43.422943  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:35:43.422955  543705 net.go:698] Add success.
I0323 14:35:46.458018  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:35:46.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:35:46.458150  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:35:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:35:53.409809  543705 memory.go:184] no items to output this cycle
I0323 14:35:53.409861  543705 cpu.go:275] no items to output this cycle
E0323 14:36:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:36:03.409784  543705 memory.go:184] no items to output this cycle
I0323 14:36:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 14:36:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:36:13.409801  543705 memory.go:191] Add success.
I0323 14:36:13.409800  543705 cpu.go:282] Add success.
W0323 14:36:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:36:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:36:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:36:13.420138  543705 net.go:648] Add success.
I0323 14:36:13.422965  543705 net.go:770] primary dev: ETH0
I0323 14:36:13.422981  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:36:13.422994  543705 net.go:698] Add success.
I0323 14:36:13.468762  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"461a48dc-fb27-43aa-ac73-cc89246862dd","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:36:13.468793  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:36:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:36:14.455125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:36:14.455198  543705 disk_worker.go:708] disk space is not compliant
W0323 14:36:14.455201  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:36:14.456610  543705 disk_worker.go:494] system disk:vda1
I0323 14:36:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:36:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:36:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:36:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:36:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:36:16.472376  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:36:23.409882  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:36:23.409901  543705 memory.go:184] no items to output this cycle
I0323 14:36:23.409972  543705 cpu.go:275] no items to output this cycle
E0323 14:36:33.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:36:33.409841  543705 memory.go:184] no items to output this cycle
I0323 14:36:33.409855  543705 cpu.go:275] no items to output this cycle
I0323 14:36:40.436887  543705 disk_info.go:125] begin check local disk info of client
I0323 14:36:40.439480  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:36:40.439487  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa300 0xc0001aa340]
I0323 14:36:40.448634  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:36:40.448639  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:36:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:36:43.410940  543705 memory.go:191] Add success.
I0323 14:36:43.409840  543705 cpu.go:282] Add success.
I0323 14:36:43.420411  543705 net.go:770] primary dev: ETH0
I0323 14:36:43.420423  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:36:43.420437  543705 net.go:698] Add success.
I0323 14:36:43.420767  543705 net.go:648] Add success.
I0323 14:36:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:36:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:36:46.458106  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:36:53.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:36:53.409843  543705 memory.go:184] no items to output this cycle
I0323 14:36:53.409859  543705 cpu.go:275] no items to output this cycle
E0323 14:37:03.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:37:03.409826  543705 memory.go:184] no items to output this cycle
I0323 14:37:03.409841  543705 cpu.go:275] no items to output this cycle
E0323 14:37:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:37:13.409832  543705 memory.go:191] Add success.
I0323 14:37:13.409845  543705 cpu.go:282] Add success.
W0323 14:37:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:37:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:37:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:37:13.420183  543705 net.go:648] Add success.
I0323 14:37:13.423081  543705 net.go:770] primary dev: ETH0
I0323 14:37:13.423094  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:37:13.423106  543705 net.go:698] Add success.
I0323 14:37:13.453664  543705 event_worker.go:152] Polling the log file for events...
W0323 14:37:14.455181  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:37:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 14:37:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:37:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 14:37:14.456590  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:37:14.457597  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:37:14.457606  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:37:14.457612  543705 custom_config.go:64] query custom config with name: gpu
E0323 14:37:15.457097  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:37:15.457112  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:37:16.458097  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:37:16.458161  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:37:16.458193  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:37:16.458198  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:37:16.472555  543705 disk_local_worker.go:436] Get disk info: []
I0323 14:37:23.409819  543705 cpu.go:275] no items to output this cycle
E0323 14:37:23.409845  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:37:23.409865  543705 memory.go:184] no items to output this cycle
E0323 14:37:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:37:33.409781  543705 memory.go:184] no items to output this cycle
I0323 14:37:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 14:37:40.439892  543705 disk_info.go:125] begin check local disk info of client
I0323 14:37:40.442472  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:37:40.442479  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2440 0xc0004f2480]
E0323 14:37:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:37:43.409817  543705 cpu.go:282] Add success.
I0323 14:37:43.409832  543705 memory.go:191] Add success.
I0323 14:37:43.420062  543705 net.go:648] Add success.
I0323 14:37:43.421096  543705 net.go:770] primary dev: ETH0
I0323 14:37:43.421115  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:37:43.421134  543705 net.go:698] Add success.
I0323 14:37:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:37:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:37:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:37:53.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:37:53.409867  543705 memory.go:184] no items to output this cycle
I0323 14:37:53.410021  543705 cpu.go:275] no items to output this cycle
E0323 14:38:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:38:03.409804  543705 memory.go:184] no items to output this cycle
I0323 14:38:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 14:38:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:38:13.409803  543705 memory.go:191] Add success.
I0323 14:38:13.409805  543705 cpu.go:282] Add success.
W0323 14:38:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:38:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:38:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:38:13.420121  543705 net.go:648] Add success.
I0323 14:38:13.423184  543705 net.go:770] primary dev: ETH0
I0323 14:38:13.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:38:13.423210  543705 net.go:698] Add success.
I0323 14:38:14.454998  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:38:14.455142  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:38:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0323 14:38:14.455219  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:38:14.457712  543705 disk_worker.go:494] system disk:vda1
I0323 14:38:14.457757  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:38:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:38:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:38:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:38:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:38:16.472469  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:38:23.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:38:23.409821  543705 memory.go:184] no items to output this cycle
I0323 14:38:23.409831  543705 cpu.go:275] no items to output this cycle
E0323 14:38:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:38:33.409781  543705 memory.go:184] no items to output this cycle
I0323 14:38:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 14:38:40.442921  543705 disk_info.go:125] begin check local disk info of client
I0323 14:38:40.445500  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:38:40.445507  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab800 0xc0001ab840]
E0323 14:38:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:38:43.410679  543705 memory.go:191] Add success.
I0323 14:38:43.409811  543705 cpu.go:282] Add success.
I0323 14:38:43.420393  543705 net.go:648] Add success.
I0323 14:38:43.423039  543705 net.go:770] primary dev: ETH0
I0323 14:38:43.423052  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:38:43.423065  543705 net.go:698] Add success.
I0323 14:38:46.458040  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:38:46.458135  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:38:46.458168  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:38:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:38:53.409794  543705 memory.go:184] no items to output this cycle
I0323 14:38:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 14:39:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:39:03.409791  543705 cpu.go:275] no items to output this cycle
I0323 14:39:03.409795  543705 memory.go:184] no items to output this cycle
E0323 14:39:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:39:13.409811  543705 memory.go:191] Add success.
I0323 14:39:13.409811  543705 cpu.go:282] Add success.
W0323 14:39:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:39:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:39:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:39:13.420322  543705 net.go:648] Add success.
I0323 14:39:13.423345  543705 net.go:770] primary dev: ETH0
I0323 14:39:13.423361  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:39:13.423376  543705 net.go:698] Add success.
I0323 14:39:13.463540  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f03cb83e-465c-4fb4-8b2a-0650a82d727b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:39:13.463574  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:39:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:39:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:39:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 14:39:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:39:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 14:39:14.456633  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:39:15.456006  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:39:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:39:16.458079  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:39:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:39:16.472472  543705 disk_local_worker.go:436] Get disk info: []
I0323 14:39:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 14:39:23.409908  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:39:23.409922  543705 memory.go:184] no items to output this cycle
E0323 14:39:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:39:33.409802  543705 memory.go:184] no items to output this cycle
I0323 14:39:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 14:39:40.445932  543705 disk_info.go:125] begin check local disk info of client
I0323 14:39:40.448531  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:39:40.448538  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b480 0xc00007b4c0]
I0323 14:39:40.449620  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:39:40.449624  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
E0323 14:39:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:39:43.409831  543705 memory.go:191] Add success.
I0323 14:39:43.409839  543705 cpu.go:282] Add success.
I0323 14:39:43.420146  543705 net.go:648] Add success.
I0323 14:39:43.421198  543705 net.go:770] primary dev: ETH0
I0323 14:39:43.421212  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:39:43.421226  543705 net.go:698] Add success.
I0323 14:39:46.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:39:46.458114  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:39:46.458147  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:39:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:39:53.409793  543705 memory.go:184] no items to output this cycle
I0323 14:39:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 14:40:03.413776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:40:03.413794  543705 memory.go:184] no items to output this cycle
I0323 14:40:03.413841  543705 cpu.go:275] no items to output this cycle
E0323 14:40:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:40:13.409833  543705 memory.go:191] Add success.
I0323 14:40:13.409838  543705 cpu.go:282] Add success.
W0323 14:40:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:40:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:40:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:40:13.420336  543705 net.go:648] Add success.
I0323 14:40:13.423198  543705 net.go:770] primary dev: ETH0
I0323 14:40:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:40:13.423227  543705 net.go:698] Add success.
I0323 14:40:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:40:14.455218  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:40:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 14:40:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:40:14.456648  543705 disk_worker.go:494] system disk:vda1
I0323 14:40:14.456680  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:40:15.456026  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:40:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:40:16.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:40:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:40:16.472474  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:40:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:40:23.409926  543705 memory.go:184] no items to output this cycle
I0323 14:40:23.409954  543705 cpu.go:275] no items to output this cycle
E0323 14:40:33.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:40:33.409839  543705 memory.go:184] no items to output this cycle
I0323 14:40:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 14:40:40.448990  543705 disk_info.go:125] begin check local disk info of client
I0323 14:40:40.451669  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:40:40.451677  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2000 0xc0004f2040]
E0323 14:40:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:40:43.410687  543705 memory.go:191] Add success.
I0323 14:40:43.409809  543705 cpu.go:282] Add success.
I0323 14:40:43.420412  543705 net.go:648] Add success.
I0323 14:40:43.423187  543705 net.go:770] primary dev: ETH0
I0323 14:40:43.423202  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:40:43.423217  543705 net.go:698] Add success.
I0323 14:40:46.458010  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:40:46.458108  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:40:46.458157  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:40:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:40:53.409815  543705 memory.go:184] no items to output this cycle
I0323 14:40:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 14:41:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:41:03.409821  543705 memory.go:184] no items to output this cycle
I0323 14:41:03.409820  543705 cpu.go:275] no items to output this cycle
E0323 14:41:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:41:13.409793  543705 memory.go:191] Add success.
W0323 14:41:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 14:41:13.409830  543705 cpu.go:282] Add success.
W0323 14:41:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:41:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:41:13.420138  543705 net.go:648] Add success.
I0323 14:41:13.423038  543705 net.go:770] primary dev: ETH0
I0323 14:41:13.423054  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:41:13.423068  543705 net.go:698] Add success.
I0323 14:41:14.455006  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:41:14.455237  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:41:14.455253  543705 disk_worker.go:708] disk space is not compliant
W0323 14:41:14.455257  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:41:14.457260  543705 disk_worker.go:494] system disk:vda1
I0323 14:41:14.457307  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:41:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:41:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:41:16.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:41:16.458083  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:41:16.472518  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:41:23.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:41:23.409834  543705 memory.go:184] no items to output this cycle
I0323 14:41:23.409856  543705 cpu.go:275] no items to output this cycle
E0323 14:41:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:41:33.409792  543705 memory.go:184] no items to output this cycle
I0323 14:41:33.409843  543705 cpu.go:275] no items to output this cycle
I0323 14:41:40.451767  543705 disk_info.go:125] begin check local disk info of client
I0323 14:41:40.454659  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:41:40.454667  543705 disk_info.go:196] parse disk info done, disk is : [0xc000507d80 0xc000507dc0]
I0323 14:41:43.409874  543705 cpu.go:282] Add success.
E0323 14:41:43.410355  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:41:43.411746  543705 memory.go:191] Add success.
I0323 14:41:43.420661  543705 net.go:648] Add success.
I0323 14:41:43.423476  543705 net.go:770] primary dev: ETH0
I0323 14:41:43.423492  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:41:43.423506  543705 net.go:698] Add success.
I0323 14:41:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:41:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:41:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:41:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:41:53.409794  543705 memory.go:184] no items to output this cycle
I0323 14:41:53.409838  543705 cpu.go:275] no items to output this cycle
E0323 14:42:03.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:42:03.409827  543705 cpu.go:275] no items to output this cycle
I0323 14:42:03.409830  543705 memory.go:184] no items to output this cycle
E0323 14:42:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:42:13.409816  543705 cpu.go:282] Add success.
I0323 14:42:13.409839  543705 memory.go:191] Add success.
W0323 14:42:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:42:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:42:13.409895  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:42:13.420356  543705 net.go:648] Add success.
I0323 14:42:13.423055  543705 net.go:770] primary dev: ETH0
I0323 14:42:13.423068  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:42:13.423080  543705 net.go:698] Add success.
I0323 14:42:13.464171  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5cdc3cd0-1799-479d-8482-50ff755236b5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:42:13.464206  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 14:42:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:42:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 14:42:14.455207  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:42:14.457167  543705 disk_worker.go:494] system disk:vda1
I0323 14:42:14.457206  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:42:14.457275  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:42:14.457286  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:42:14.457293  543705 custom_config.go:64] query custom config with name: gpu
E0323 14:42:15.457130  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:42:15.457145  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:42:16.459218  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:42:16.459308  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:42:16.459331  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:42:16.459456  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:42:16.473026  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:42:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:42:23.409804  543705 memory.go:184] no items to output this cycle
I0323 14:42:23.409850  543705 cpu.go:275] no items to output this cycle
E0323 14:42:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:42:33.409823  543705 memory.go:184] no items to output this cycle
I0323 14:42:33.409835  543705 cpu.go:275] no items to output this cycle
I0323 14:42:40.450015  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:42:40.450023  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 14:42:40.455080  543705 disk_info.go:125] begin check local disk info of client
I0323 14:42:40.457726  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:42:40.457732  543705 disk_info.go:196] parse disk info done, disk is : [0xc000377780 0xc0003777c0]
E0323 14:42:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:42:43.410657  543705 memory.go:191] Add success.
I0323 14:42:43.409799  543705 cpu.go:282] Add success.
I0323 14:42:43.420449  543705 net.go:648] Add success.
I0323 14:42:43.423121  543705 net.go:770] primary dev: ETH0
I0323 14:42:43.423135  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:42:43.423150  543705 net.go:698] Add success.
I0323 14:42:46.458014  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:42:46.458111  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:42:46.458150  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:42:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:42:53.409816  543705 memory.go:184] no items to output this cycle
I0323 14:42:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 14:43:03.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:43:03.409830  543705 memory.go:184] no items to output this cycle
I0323 14:43:03.409862  543705 cpu.go:275] no items to output this cycle
E0323 14:43:13.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:43:13.409827  543705 memory.go:191] Add success.
I0323 14:43:13.409835  543705 cpu.go:282] Add success.
W0323 14:43:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:43:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:43:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:43:13.420192  543705 net.go:648] Add success.
I0323 14:43:13.422830  543705 net.go:770] primary dev: ETH0
I0323 14:43:13.422842  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:43:13.422854  543705 net.go:698] Add success.
I0323 14:43:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:43:14.455129  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:43:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 14:43:14.455217  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:43:14.456640  543705 disk_worker.go:494] system disk:vda1
I0323 14:43:14.456675  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:43:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:43:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:43:16.458080  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:43:16.458105  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:43:16.472480  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:43:23.410253  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:43:23.410271  543705 memory.go:184] no items to output this cycle
I0323 14:43:23.410273  543705 cpu.go:275] no items to output this cycle
E0323 14:43:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:43:33.409821  543705 memory.go:184] no items to output this cycle
I0323 14:43:33.409832  543705 cpu.go:275] no items to output this cycle
I0323 14:43:40.457830  543705 disk_info.go:125] begin check local disk info of client
I0323 14:43:40.460486  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:43:40.460494  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0800 0xc0002b0840]
E0323 14:43:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:43:43.410690  543705 memory.go:191] Add success.
I0323 14:43:43.409841  543705 cpu.go:282] Add success.
I0323 14:43:43.420439  543705 net.go:648] Add success.
I0323 14:43:43.422921  543705 net.go:770] primary dev: ETH0
I0323 14:43:43.422937  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:43:43.422951  543705 net.go:698] Add success.
I0323 14:43:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:43:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:43:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:43:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 14:43:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:43:53.409819  543705 memory.go:184] no items to output this cycle
E0323 14:44:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:44:03.409817  543705 memory.go:184] no items to output this cycle
I0323 14:44:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 14:44:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:44:13.409828  543705 memory.go:191] Add success.
I0323 14:44:13.409832  543705 cpu.go:282] Add success.
W0323 14:44:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:44:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:44:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:44:13.420177  543705 net.go:648] Add success.
I0323 14:44:13.423101  543705 net.go:770] primary dev: ETH0
I0323 14:44:13.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:44:13.423126  543705 net.go:698] Add success.
I0323 14:44:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:44:14.455175  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:44:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 14:44:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:44:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 14:44:14.456666  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:44:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:44:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:44:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:44:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:44:16.472384  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:44:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:44:23.409776  543705 memory.go:184] no items to output this cycle
I0323 14:44:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 14:44:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:44:33.409877  543705 cpu.go:275] no items to output this cycle
I0323 14:44:33.409893  543705 memory.go:184] no items to output this cycle
I0323 14:44:40.461007  543705 disk_info.go:125] begin check local disk info of client
I0323 14:44:40.463642  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:44:40.463650  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005140c0 0xc000514100]
E0323 14:44:43.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:44:43.410724  543705 memory.go:191] Add success.
I0323 14:44:43.409798  543705 cpu.go:282] Add success.
I0323 14:44:43.420422  543705 net.go:648] Add success.
I0323 14:44:43.423086  543705 net.go:770] primary dev: ETH0
I0323 14:44:43.423099  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:44:43.423112  543705 net.go:698] Add success.
I0323 14:44:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:44:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:44:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:44:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:44:53.409782  543705 memory.go:184] no items to output this cycle
I0323 14:44:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 14:45:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:45:03.409818  543705 memory.go:184] no items to output this cycle
I0323 14:45:03.409833  543705 cpu.go:275] no items to output this cycle
E0323 14:45:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:45:13.409814  543705 memory.go:191] Add success.
I0323 14:45:13.409816  543705 cpu.go:282] Add success.
W0323 14:45:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:45:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:45:13.409858  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:45:13.420158  543705 net.go:648] Add success.
I0323 14:45:13.422898  543705 net.go:770] primary dev: ETH0
I0323 14:45:13.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:45:13.422924  543705 net.go:698] Add success.
I0323 14:45:13.470185  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3a19fa42-f04d-4ce6-a4cc-dcf3c482f2e9","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:45:13.470217  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:45:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:45:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:45:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 14:45:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:45:14.456533  543705 disk_worker.go:494] system disk:vda1
I0323 14:45:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:45:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:45:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:45:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:45:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:45:16.472485  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:45:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:45:23.409784  543705 memory.go:184] no items to output this cycle
I0323 14:45:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 14:45:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:45:33.409815  543705 memory.go:184] no items to output this cycle
I0323 14:45:33.409829  543705 cpu.go:275] no items to output this cycle
I0323 14:45:40.451020  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:45:40.451028  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 14:45:40.464067  543705 disk_info.go:125] begin check local disk info of client
I0323 14:45:40.466686  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:45:40.466693  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a8b00 0xc0002a8b40]
E0323 14:45:43.409862  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:45:43.410871  543705 memory.go:191] Add success.
I0323 14:45:43.410107  543705 cpu.go:282] Add success.
I0323 14:45:43.419758  543705 net.go:648] Add success.
I0323 14:45:43.422631  543705 net.go:770] primary dev: ETH0
I0323 14:45:43.422644  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:45:43.422656  543705 net.go:698] Add success.
I0323 14:45:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:45:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:45:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:45:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:45:53.409821  543705 memory.go:184] no items to output this cycle
I0323 14:45:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 14:46:03.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:46:03.409769  543705 memory.go:184] no items to output this cycle
I0323 14:46:03.409802  543705 cpu.go:275] no items to output this cycle
E0323 14:46:13.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:46:13.409791  543705 memory.go:191] Add success.
W0323 14:46:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 14:46:13.409816  543705 cpu.go:282] Add success.
W0323 14:46:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:46:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:46:13.420305  543705 net.go:648] Add success.
I0323 14:46:13.423186  543705 net.go:770] primary dev: ETH0
I0323 14:46:13.423204  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:46:13.423216  543705 net.go:698] Add success.
I0323 14:46:14.454957  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:46:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:46:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 14:46:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:46:14.456597  543705 disk_worker.go:494] system disk:vda1
I0323 14:46:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:46:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:46:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:46:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:46:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:46:16.472449  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:46:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:46:23.409816  543705 memory.go:184] no items to output this cycle
I0323 14:46:23.409829  543705 cpu.go:275] no items to output this cycle
E0323 14:46:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:46:33.409806  543705 memory.go:184] no items to output this cycle
I0323 14:46:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 14:46:40.466781  543705 disk_info.go:125] begin check local disk info of client
I0323 14:46:40.469325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:46:40.469331  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049a980 0xc00049a9c0]
E0323 14:46:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:46:43.410678  543705 memory.go:191] Add success.
I0323 14:46:43.409804  543705 cpu.go:282] Add success.
I0323 14:46:43.420606  543705 net.go:648] Add success.
I0323 14:46:43.423439  543705 net.go:770] primary dev: ETH0
I0323 14:46:43.423453  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:46:43.423465  543705 net.go:698] Add success.
I0323 14:46:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:46:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:46:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:46:53.409791  543705 cpu.go:275] no items to output this cycle
E0323 14:46:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:46:53.409812  543705 memory.go:184] no items to output this cycle
E0323 14:47:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:47:03.409806  543705 memory.go:184] no items to output this cycle
I0323 14:47:03.409819  543705 cpu.go:275] no items to output this cycle
E0323 14:47:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:47:13.409783  543705 memory.go:191] Add success.
W0323 14:47:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:47:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:47:13.409821  543705 cpu.go:282] Add success.
I0323 14:47:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:47:13.420223  543705 net.go:648] Add success.
I0323 14:47:13.423133  543705 net.go:770] primary dev: ETH0
I0323 14:47:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:47:13.423177  543705 net.go:698] Add success.
I0323 14:47:13.453758  543705 event_worker.go:152] Polling the log file for events...
W0323 14:47:14.455139  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:47:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 14:47:14.455208  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:47:14.456046  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:47:14.456056  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:47:14.456062  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:47:14.456627  543705 disk_worker.go:494] system disk:vda1
I0323 14:47:14.456662  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:47:15.456839  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:47:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:47:16.457943  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:47:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:47:16.457999  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:47:16.458020  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:47:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:47:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:47:23.409793  543705 memory.go:184] no items to output this cycle
I0323 14:47:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 14:47:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:47:33.409780  543705 memory.go:184] no items to output this cycle
I0323 14:47:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 14:47:40.470052  543705 disk_info.go:125] begin check local disk info of client
I0323 14:47:40.472652  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:47:40.472659  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003fea40 0xc0003fea80]
E0323 14:47:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:47:43.410635  543705 memory.go:191] Add success.
I0323 14:47:43.409820  543705 cpu.go:282] Add success.
I0323 14:47:43.420376  543705 net.go:648] Add success.
I0323 14:47:43.423339  543705 net.go:770] primary dev: ETH0
I0323 14:47:43.423352  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:47:43.423364  543705 net.go:698] Add success.
I0323 14:47:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:47:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:47:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:47:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:47:53.409819  543705 memory.go:184] no items to output this cycle
I0323 14:47:53.410071  543705 cpu.go:275] no items to output this cycle
E0323 14:48:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:48:03.409784  543705 memory.go:184] no items to output this cycle
I0323 14:48:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 14:48:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:48:13.409794  543705 memory.go:191] Add success.
I0323 14:48:13.409795  543705 cpu.go:282] Add success.
W0323 14:48:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:48:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:48:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:48:13.420144  543705 net.go:648] Add success.
I0323 14:48:13.423098  543705 net.go:770] primary dev: ETH0
I0323 14:48:13.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:48:13.423123  543705 net.go:698] Add success.
I0323 14:48:13.470904  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2dc73fb1-7604-4a6c-808a-ae7419e4bdb8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:48:13.470939  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:48:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:48:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:48:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 14:48:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:48:14.456676  543705 disk_worker.go:494] system disk:vda1
I0323 14:48:14.456718  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:48:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:48:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:48:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:48:16.458050  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:48:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:48:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:48:23.409778  543705 memory.go:184] no items to output this cycle
I0323 14:48:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 14:48:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:48:33.409786  543705 memory.go:184] no items to output this cycle
I0323 14:48:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 14:48:40.452033  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:48:40.452040  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 14:48:40.473133  543705 disk_info.go:125] begin check local disk info of client
I0323 14:48:40.475698  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:48:40.475704  543705 disk_info.go:196] parse disk info done, disk is : [0xc000352380 0xc0003523c0]
E0323 14:48:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:48:43.410655  543705 memory.go:191] Add success.
I0323 14:48:43.409802  543705 cpu.go:282] Add success.
I0323 14:48:43.420357  543705 net.go:648] Add success.
I0323 14:48:43.422979  543705 net.go:770] primary dev: ETH0
I0323 14:48:43.422994  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:48:43.423008  543705 net.go:698] Add success.
I0323 14:48:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:48:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:48:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:48:53.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:48:53.409821  543705 memory.go:184] no items to output this cycle
I0323 14:48:53.409835  543705 cpu.go:275] no items to output this cycle
E0323 14:49:03.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:49:03.409878  543705 memory.go:184] no items to output this cycle
I0323 14:49:03.409958  543705 cpu.go:275] no items to output this cycle
E0323 14:49:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:49:13.409832  543705 memory.go:191] Add success.
I0323 14:49:13.409846  543705 cpu.go:282] Add success.
W0323 14:49:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:49:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:49:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:49:13.420296  543705 net.go:648] Add success.
I0323 14:49:13.423157  543705 net.go:770] primary dev: ETH0
I0323 14:49:13.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:49:13.423183  543705 net.go:698] Add success.
I0323 14:49:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:49:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:49:14.455165  543705 disk_worker.go:708] disk space is not compliant
W0323 14:49:14.455168  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:49:14.456514  543705 disk_worker.go:494] system disk:vda1
I0323 14:49:14.456561  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:49:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:49:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:49:16.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:49:16.458085  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:49:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:49:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:49:23.409788  543705 memory.go:184] no items to output this cycle
I0323 14:49:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 14:49:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:49:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 14:49:33.409810  543705 memory.go:184] no items to output this cycle
I0323 14:49:40.476024  543705 disk_info.go:125] begin check local disk info of client
I0323 14:49:40.478596  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:49:40.478602  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536540 0xc000536580]
E0323 14:49:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:49:43.410666  543705 memory.go:191] Add success.
I0323 14:49:43.409843  543705 cpu.go:282] Add success.
I0323 14:49:43.420355  543705 net.go:648] Add success.
I0323 14:49:43.423055  543705 net.go:770] primary dev: ETH0
I0323 14:49:43.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:49:43.423084  543705 net.go:698] Add success.
I0323 14:49:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:49:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:49:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:49:53.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:49:53.409837  543705 memory.go:184] no items to output this cycle
I0323 14:49:53.409849  543705 cpu.go:275] no items to output this cycle
E0323 14:50:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:50:03.409894  543705 cpu.go:275] no items to output this cycle
I0323 14:50:03.409905  543705 memory.go:184] no items to output this cycle
E0323 14:50:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:50:13.409844  543705 memory.go:191] Add success.
I0323 14:50:13.409848  543705 cpu.go:282] Add success.
W0323 14:50:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:50:13.409887  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:50:13.409891  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:50:13.420212  543705 net.go:648] Add success.
I0323 14:50:13.423017  543705 net.go:770] primary dev: ETH0
I0323 14:50:13.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:50:13.423041  543705 net.go:698] Add success.
I0323 14:50:14.454959  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:50:14.455138  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:50:14.455151  543705 disk_worker.go:708] disk space is not compliant
W0323 14:50:14.455153  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:50:14.456493  543705 disk_worker.go:494] system disk:vda1
I0323 14:50:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:50:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:50:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:50:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:50:16.458055  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:50:16.472410  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:50:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:50:23.409781  543705 memory.go:184] no items to output this cycle
I0323 14:50:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 14:50:33.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:50:33.409804  543705 memory.go:184] no items to output this cycle
I0323 14:50:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 14:50:40.479096  543705 disk_info.go:125] begin check local disk info of client
I0323 14:50:40.481666  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:50:40.481672  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002ad2c0 0xc0002ad300]
E0323 14:50:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:50:43.410813  543705 memory.go:191] Add success.
I0323 14:50:43.409818  543705 cpu.go:282] Add success.
I0323 14:50:43.420520  543705 net.go:648] Add success.
I0323 14:50:43.423615  543705 net.go:770] primary dev: ETH0
I0323 14:50:43.423629  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:50:43.423641  543705 net.go:698] Add success.
I0323 14:50:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:50:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:50:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:50:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:50:53.409814  543705 memory.go:184] no items to output this cycle
I0323 14:50:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 14:51:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:51:03.409783  543705 memory.go:184] no items to output this cycle
I0323 14:51:03.409787  543705 cpu.go:275] no items to output this cycle
E0323 14:51:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:51:13.409786  543705 memory.go:191] Add success.
I0323 14:51:13.409805  543705 cpu.go:282] Add success.
W0323 14:51:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:51:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:51:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:51:13.419715  543705 net.go:648] Add success.
I0323 14:51:13.422590  543705 net.go:770] primary dev: ETH0
I0323 14:51:13.422603  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:51:13.422615  543705 net.go:698] Add success.
I0323 14:51:13.468344  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"3116c83c-f9c4-4d29-906b-aad338a77315","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:51:13.468376  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:51:14.454979  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:51:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:51:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 14:51:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:51:14.456672  543705 disk_worker.go:494] system disk:vda1
I0323 14:51:14.456702  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:51:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:51:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:51:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:51:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:51:16.472373  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:51:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:51:23.409808  543705 memory.go:184] no items to output this cycle
I0323 14:51:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 14:51:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:51:33.409785  543705 memory.go:184] no items to output this cycle
I0323 14:51:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 14:51:40.453017  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:51:40.453024  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 14:51:40.482152  543705 disk_info.go:125] begin check local disk info of client
I0323 14:51:40.484679  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:51:40.484685  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005174c0 0xc000517500]
E0323 14:51:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:51:43.410706  543705 memory.go:191] Add success.
I0323 14:51:43.409832  543705 cpu.go:282] Add success.
I0323 14:51:43.420499  543705 net.go:648] Add success.
I0323 14:51:43.423104  543705 net.go:770] primary dev: ETH0
I0323 14:51:43.423118  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:51:43.423133  543705 net.go:698] Add success.
I0323 14:51:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:51:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:51:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:51:53.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:51:53.409817  543705 cpu.go:275] no items to output this cycle
I0323 14:51:53.409831  543705 memory.go:184] no items to output this cycle
E0323 14:52:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:52:03.409811  543705 memory.go:184] no items to output this cycle
I0323 14:52:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 14:52:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:52:13.409788  543705 memory.go:191] Add success.
I0323 14:52:13.409808  543705 cpu.go:282] Add success.
W0323 14:52:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:52:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:52:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:52:13.420228  543705 net.go:648] Add success.
I0323 14:52:13.423324  543705 net.go:770] primary dev: ETH0
I0323 14:52:13.423337  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:52:13.423349  543705 net.go:698] Add success.
W0323 14:52:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:52:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 14:52:14.455188  543705 disk_worker.go:728] disk inode is not compliant
E0323 14:52:14.455871  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:52:14.455879  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:52:14.455885  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:52:14.456553  543705 disk_worker.go:494] system disk:vda1
I0323 14:52:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:52:15.457017  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:52:15.457030  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:52:16.458042  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 14:52:16.458042  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:52:16.458101  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:52:16.458121  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:52:16.472510  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:52:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:52:23.409787  543705 memory.go:184] no items to output this cycle
I0323 14:52:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 14:52:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:52:33.409784  543705 memory.go:184] no items to output this cycle
I0323 14:52:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 14:52:40.485074  543705 disk_info.go:125] begin check local disk info of client
I0323 14:52:40.487653  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:52:40.487659  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5f00 0xc0000c5f40]
E0323 14:52:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:52:43.410736  543705 memory.go:191] Add success.
I0323 14:52:43.409834  543705 cpu.go:282] Add success.
I0323 14:52:43.420452  543705 net.go:648] Add success.
I0323 14:52:43.423205  543705 net.go:770] primary dev: ETH0
I0323 14:52:43.423221  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:52:43.423237  543705 net.go:698] Add success.
I0323 14:52:46.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:52:46.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:52:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:52:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:52:53.409779  543705 memory.go:184] no items to output this cycle
I0323 14:52:53.409881  543705 cpu.go:275] no items to output this cycle
E0323 14:53:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:53:03.409789  543705 memory.go:184] no items to output this cycle
I0323 14:53:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 14:53:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:53:13.409820  543705 memory.go:191] Add success.
I0323 14:53:13.409829  543705 cpu.go:282] Add success.
W0323 14:53:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:53:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:53:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:53:13.420404  543705 net.go:648] Add success.
I0323 14:53:13.423034  543705 net.go:770] primary dev: ETH0
I0323 14:53:13.423049  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:53:13.423063  543705 net.go:698] Add success.
I0323 14:53:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:53:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:53:14.455162  543705 disk_worker.go:708] disk space is not compliant
W0323 14:53:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:53:14.456764  543705 disk_worker.go:494] system disk:vda1
I0323 14:53:14.456806  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:53:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:53:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:53:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:53:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:53:16.472450  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:53:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:53:23.409789  543705 memory.go:184] no items to output this cycle
I0323 14:53:23.409792  543705 cpu.go:275] no items to output this cycle
E0323 14:53:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:53:33.409785  543705 memory.go:184] no items to output this cycle
I0323 14:53:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 14:53:40.487749  543705 disk_info.go:125] begin check local disk info of client
I0323 14:53:40.490364  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:53:40.490371  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 14:53:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:53:43.410725  543705 memory.go:191] Add success.
I0323 14:53:43.409819  543705 cpu.go:282] Add success.
I0323 14:53:43.420432  543705 net.go:648] Add success.
I0323 14:53:43.423269  543705 net.go:770] primary dev: ETH0
I0323 14:53:43.423284  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:53:43.423298  543705 net.go:698] Add success.
I0323 14:53:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:53:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:53:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:53:53.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:53:53.409841  543705 memory.go:184] no items to output this cycle
I0323 14:53:53.410008  543705 cpu.go:275] no items to output this cycle
E0323 14:54:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:54:03.409787  543705 memory.go:184] no items to output this cycle
I0323 14:54:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 14:54:13.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:54:13.409784  543705 memory.go:191] Add success.
W0323 14:54:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 14:54:13.409814  543705 cpu.go:282] Add success.
W0323 14:54:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:54:13.409826  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:54:13.420215  543705 net.go:648] Add success.
I0323 14:54:13.422987  543705 net.go:770] primary dev: ETH0
I0323 14:54:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:54:13.423012  543705 net.go:698] Add success.
I0323 14:54:13.468200  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ee6e078d-62fd-4c05-af75-c786f82eb8fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:54:13.468235  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 14:54:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:54:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:54:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 14:54:14.455223  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:54:14.456944  543705 disk_worker.go:494] system disk:vda1
I0323 14:54:14.456972  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:54:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:54:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:54:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:54:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:54:16.472394  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:54:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:54:23.409778  543705 memory.go:184] no items to output this cycle
I0323 14:54:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 14:54:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:54:33.409799  543705 memory.go:184] no items to output this cycle
I0323 14:54:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 14:54:40.454040  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:54:40.454047  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 14:54:40.491213  543705 disk_info.go:125] begin check local disk info of client
I0323 14:54:40.493868  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:54:40.493875  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536e40 0xc000536e80]
E0323 14:54:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:54:43.410763  543705 memory.go:191] Add success.
I0323 14:54:43.409794  543705 cpu.go:282] Add success.
I0323 14:54:43.420525  543705 net.go:648] Add success.
I0323 14:54:43.423425  543705 net.go:770] primary dev: ETH0
I0323 14:54:43.423440  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:54:43.423453  543705 net.go:698] Add success.
I0323 14:54:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:54:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:54:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:54:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:54:53.409803  543705 memory.go:184] no items to output this cycle
I0323 14:54:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 14:55:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:55:03.409781  543705 memory.go:184] no items to output this cycle
I0323 14:55:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 14:55:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:55:13.409790  543705 memory.go:191] Add success.
I0323 14:55:13.409807  543705 cpu.go:282] Add success.
W0323 14:55:13.409816  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:55:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:55:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:55:13.420062  543705 net.go:648] Add success.
I0323 14:55:13.422842  543705 net.go:770] primary dev: ETH0
I0323 14:55:13.422856  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:55:13.422869  543705 net.go:698] Add success.
I0323 14:55:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:55:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:55:14.455193  543705 disk_worker.go:708] disk space is not compliant
W0323 14:55:14.455195  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:55:14.456571  543705 disk_worker.go:494] system disk:vda1
I0323 14:55:14.456602  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:55:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:55:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:55:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:55:16.458065  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:55:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:55:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:55:23.409819  543705 memory.go:184] no items to output this cycle
I0323 14:55:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 14:55:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:55:33.409795  543705 memory.go:184] no items to output this cycle
I0323 14:55:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 14:55:40.494115  543705 disk_info.go:125] begin check local disk info of client
I0323 14:55:40.496768  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:55:40.496774  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536f00 0xc000536f40]
E0323 14:55:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:55:43.410623  543705 memory.go:191] Add success.
I0323 14:55:43.409813  543705 cpu.go:282] Add success.
I0323 14:55:43.420302  543705 net.go:648] Add success.
I0323 14:55:43.422761  543705 net.go:770] primary dev: ETH0
I0323 14:55:43.422774  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:55:43.422786  543705 net.go:698] Add success.
I0323 14:55:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:55:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:55:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:55:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:55:53.409792  543705 memory.go:184] no items to output this cycle
I0323 14:55:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 14:56:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:56:03.409815  543705 memory.go:184] no items to output this cycle
I0323 14:56:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 14:56:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:56:13.409793  543705 memory.go:191] Add success.
I0323 14:56:13.409796  543705 cpu.go:282] Add success.
W0323 14:56:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:56:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:56:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:56:13.420091  543705 net.go:648] Add success.
I0323 14:56:13.422685  543705 net.go:770] primary dev: ETH0
I0323 14:56:13.422700  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:56:13.422713  543705 net.go:698] Add success.
I0323 14:56:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:56:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:56:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 14:56:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:56:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 14:56:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:56:15.455953  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:56:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:56:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:56:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:56:16.472402  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:56:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:56:23.409776  543705 memory.go:184] no items to output this cycle
I0323 14:56:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 14:56:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:56:33.409785  543705 memory.go:184] no items to output this cycle
I0323 14:56:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 14:56:40.497133  543705 disk_info.go:125] begin check local disk info of client
I0323 14:56:40.499798  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:56:40.499806  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2040 0xc0004f2080]
E0323 14:56:43.409744  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:56:43.410761  543705 memory.go:191] Add success.
I0323 14:56:43.409810  543705 cpu.go:282] Add success.
I0323 14:56:43.420472  543705 net.go:648] Add success.
I0323 14:56:43.423236  543705 net.go:770] primary dev: ETH0
I0323 14:56:43.423251  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:56:43.423265  543705 net.go:698] Add success.
I0323 14:56:46.458052  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:56:46.458115  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:56:46.458140  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:56:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:56:53.409784  543705 memory.go:184] no items to output this cycle
I0323 14:56:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 14:57:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:57:03.409778  543705 memory.go:184] no items to output this cycle
I0323 14:57:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 14:57:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:57:13.409815  543705 memory.go:191] Add success.
I0323 14:57:13.409820  543705 cpu.go:282] Add success.
W0323 14:57:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:57:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:57:13.409867  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:57:13.420185  543705 net.go:648] Add success.
I0323 14:57:13.429237  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 14:57:13.429317  543705 net.go:770] primary dev: ETH0
I0323 14:57:13.429330  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:57:13.429345  543705 net.go:698] Add success.
I0323 14:57:13.452773  543705 event_worker.go:152] Polling the log file for events...
I0323 14:57:13.466194  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6d0fcd6b-8728-473d-b067-4cffafcb174b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 14:57:13.466235  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 14:57:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:57:14.455174  543705 disk_worker.go:708] disk space is not compliant
W0323 14:57:14.455177  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:57:14.456825  543705 disk_worker.go:494] system disk:vda1
E0323 14:57:14.456833  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 14:57:14.456841  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 14:57:14.456847  543705 custom_config.go:64] query custom config with name: gpu
I0323 14:57:14.456878  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 14:57:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 14:57:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 14:57:16.457956  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 14:57:16.457962  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:57:16.458016  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:57:16.458033  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:57:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:57:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:57:23.409798  543705 memory.go:184] no items to output this cycle
I0323 14:57:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 14:57:33.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:57:33.409773  543705 memory.go:184] no items to output this cycle
I0323 14:57:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 14:57:40.455023  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 14:57:40.455031  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 14:57:40.500244  543705 disk_info.go:125] begin check local disk info of client
I0323 14:57:40.502906  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:57:40.502912  543705 disk_info.go:196] parse disk info done, disk is : [0xc000509300 0xc000509340]
E0323 14:57:43.409750  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:57:43.410673  543705 memory.go:191] Add success.
I0323 14:57:43.409808  543705 cpu.go:282] Add success.
I0323 14:57:43.420407  543705 net.go:648] Add success.
I0323 14:57:43.423683  543705 net.go:770] primary dev: ETH0
I0323 14:57:43.423696  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:57:43.423708  543705 net.go:698] Add success.
I0323 14:57:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:57:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:57:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:57:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:57:53.409784  543705 cpu.go:275] no items to output this cycle
I0323 14:57:53.409792  543705 memory.go:184] no items to output this cycle
E0323 14:58:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:58:03.409812  543705 memory.go:184] no items to output this cycle
I0323 14:58:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 14:58:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:58:13.409806  543705 memory.go:191] Add success.
I0323 14:58:13.409806  543705 cpu.go:282] Add success.
W0323 14:58:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:58:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:58:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:58:13.420173  543705 net.go:648] Add success.
I0323 14:58:13.423073  543705 net.go:770] primary dev: ETH0
I0323 14:58:13.423086  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:58:13.423097  543705 net.go:698] Add success.
I0323 14:58:14.454973  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:58:14.455148  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:58:14.455159  543705 disk_worker.go:708] disk space is not compliant
W0323 14:58:14.455162  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:58:14.456500  543705 disk_worker.go:494] system disk:vda1
I0323 14:58:14.456528  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:58:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:58:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:58:16.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:58:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:58:16.472456  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:58:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:58:23.409793  543705 memory.go:184] no items to output this cycle
I0323 14:58:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 14:58:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:58:33.409787  543705 memory.go:184] no items to output this cycle
I0323 14:58:33.409803  543705 cpu.go:275] no items to output this cycle
I0323 14:58:40.503001  543705 disk_info.go:125] begin check local disk info of client
I0323 14:58:40.505674  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:58:40.505681  543705 disk_info.go:196] parse disk info done, disk is : [0xc000537e80 0xc000537ec0]
E0323 14:58:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:58:43.410644  543705 memory.go:191] Add success.
I0323 14:58:43.409843  543705 cpu.go:282] Add success.
I0323 14:58:43.420352  543705 net.go:648] Add success.
I0323 14:58:43.423089  543705 net.go:770] primary dev: ETH0
I0323 14:58:43.423102  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:58:43.423115  543705 net.go:698] Add success.
I0323 14:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:58:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:58:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:58:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:58:53.409779  543705 memory.go:184] no items to output this cycle
I0323 14:58:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 14:59:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:59:03.409784  543705 memory.go:184] no items to output this cycle
I0323 14:59:03.409785  543705 cpu.go:275] no items to output this cycle
E0323 14:59:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:59:13.409791  543705 memory.go:191] Add success.
I0323 14:59:13.409815  543705 cpu.go:282] Add success.
W0323 14:59:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 14:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 14:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 14:59:13.420156  543705 net.go:648] Add success.
I0323 14:59:13.423418  543705 net.go:770] primary dev: ETH0
I0323 14:59:13.423432  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:59:13.423446  543705 net.go:698] Add success.
I0323 14:59:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 14:59:14.455131  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 14:59:14.455216  543705 disk_worker.go:708] disk space is not compliant
W0323 14:59:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 14:59:14.456605  543705 disk_worker.go:494] system disk:vda1
I0323 14:59:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 14:59:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 14:59:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:59:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:59:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 14:59:16.472387  543705 disk_local_worker.go:436] Get disk info: []
E0323 14:59:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:59:23.409785  543705 memory.go:184] no items to output this cycle
I0323 14:59:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 14:59:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:59:33.409801  543705 memory.go:184] no items to output this cycle
I0323 14:59:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 14:59:40.506175  543705 disk_info.go:125] begin check local disk info of client
I0323 14:59:40.508730  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 14:59:40.508737  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f3ac0 0xc0004f3b00]
E0323 14:59:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:59:43.410664  543705 memory.go:191] Add success.
I0323 14:59:43.409825  543705 cpu.go:282] Add success.
I0323 14:59:43.419726  543705 net.go:648] Add success.
I0323 14:59:43.422453  543705 net.go:770] primary dev: ETH0
I0323 14:59:43.422466  543705 net.go:802] Send network stats successfully!,count is 6
I0323 14:59:43.422477  543705 net.go:698] Add success.
I0323 14:59:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 14:59:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 14:59:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 14:59:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 14:59:53.409807  543705 memory.go:184] no items to output this cycle
I0323 14:59:53.409818  543705 cpu.go:275] no items to output this cycle
E0323 15:00:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:00:03.409783  543705 memory.go:184] no items to output this cycle
I0323 15:00:03.409788  543705 cpu.go:275] no items to output this cycle
E0323 15:00:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:00:13.409835  543705 memory.go:191] Add success.
I0323 15:00:13.409842  543705 cpu.go:282] Add success.
W0323 15:00:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:00:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:00:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:00:13.420247  543705 net.go:648] Add success.
I0323 15:00:13.423347  543705 net.go:770] primary dev: ETH0
I0323 15:00:13.423369  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:00:13.423381  543705 net.go:698] Add success.
I0323 15:00:13.507568  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"dee434e3-09df-40d4-8bc1-1841e87ffc4c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:00:13.507602  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:00:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:00:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:00:14.455188  543705 disk_worker.go:708] disk space is not compliant
W0323 15:00:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:00:14.456744  543705 disk_worker.go:494] system disk:vda1
I0323 15:00:14.456771  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:00:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:00:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:00:16.458026  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:00:16.458048  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:00:16.472377  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:00:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:00:23.409783  543705 memory.go:184] no items to output this cycle
I0323 15:00:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 15:00:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:00:33.409786  543705 memory.go:184] no items to output this cycle
I0323 15:00:33.409788  543705 cpu.go:275] no items to output this cycle
I0323 15:00:40.456034  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:00:40.456042  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:00:40.509289  543705 disk_info.go:125] begin check local disk info of client
I0323 15:00:40.511857  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:00:40.511863  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d1740 0xc0003d1780]
E0323 15:00:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:00:43.411034  543705 memory.go:191] Add success.
I0323 15:00:43.409833  543705 cpu.go:282] Add success.
I0323 15:00:43.419959  543705 net.go:648] Add success.
I0323 15:00:43.422810  543705 net.go:770] primary dev: ETH0
I0323 15:00:43.422826  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:00:43.422839  543705 net.go:698] Add success.
I0323 15:00:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:00:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:00:46.458083  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:00:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:00:53.409812  543705 memory.go:184] no items to output this cycle
I0323 15:00:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 15:01:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:01:03.409781  543705 memory.go:184] no items to output this cycle
I0323 15:01:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 15:01:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:01:13.409795  543705 memory.go:191] Add success.
I0323 15:01:13.409816  543705 cpu.go:282] Add success.
W0323 15:01:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:01:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:01:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:01:13.420241  543705 net.go:648] Add success.
I0323 15:01:13.423049  543705 net.go:770] primary dev: ETH0
I0323 15:01:13.423062  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:01:13.423075  543705 net.go:698] Add success.
I0323 15:01:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:01:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:01:14.455189  543705 disk_worker.go:708] disk space is not compliant
W0323 15:01:14.455192  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:01:14.456598  543705 disk_worker.go:494] system disk:vda1
I0323 15:01:14.456630  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:01:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:01:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:01:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:01:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:01:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:01:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:01:23.409786  543705 memory.go:184] no items to output this cycle
I0323 15:01:23.409790  543705 cpu.go:275] no items to output this cycle
E0323 15:01:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:01:33.409785  543705 memory.go:184] no items to output this cycle
I0323 15:01:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 15:01:40.512206  543705 disk_info.go:125] begin check local disk info of client
I0323 15:01:40.514808  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:01:40.514815  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aaf40 0xc0001aaf80]
E0323 15:01:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:01:43.410774  543705 memory.go:191] Add success.
I0323 15:01:43.409815  543705 cpu.go:282] Add success.
I0323 15:01:43.420594  543705 net.go:648] Add success.
I0323 15:01:43.423417  543705 net.go:770] primary dev: ETH0
I0323 15:01:43.423430  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:01:43.423442  543705 net.go:698] Add success.
I0323 15:01:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:01:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:01:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:01:53.410249  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:01:53.410268  543705 memory.go:184] no items to output this cycle
I0323 15:01:53.410280  543705 cpu.go:275] no items to output this cycle
E0323 15:02:03.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:02:03.409781  543705 memory.go:184] no items to output this cycle
I0323 15:02:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 15:02:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:02:13.409783  543705 memory.go:191] Add success.
W0323 15:02:13.409809  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:02:13.409816  543705 cpu.go:282] Add success.
W0323 15:02:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:02:13.409824  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:02:13.420123  543705 net.go:648] Add success.
I0323 15:02:13.422936  543705 net.go:770] primary dev: ETH0
I0323 15:02:13.422951  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:02:13.422965  543705 net.go:698] Add success.
W0323 15:02:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:02:14.455218  543705 disk_worker.go:708] disk space is not compliant
W0323 15:02:14.455221  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:02:14.456078  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:02:14.456088  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:02:14.456095  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:02:14.456665  543705 disk_worker.go:494] system disk:vda1
I0323 15:02:14.456713  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:02:15.456801  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:02:15.456809  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:02:16.457920  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:02:16.457919  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:02:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:02:16.457994  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:02:16.472323  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:02:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:02:23.409784  543705 memory.go:184] no items to output this cycle
I0323 15:02:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 15:02:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:02:33.409776  543705 memory.go:184] no items to output this cycle
I0323 15:02:33.409796  543705 cpu.go:275] no items to output this cycle
I0323 15:02:40.514899  543705 disk_info.go:125] begin check local disk info of client
I0323 15:02:40.517533  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:02:40.517541  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aba40 0xc0001aba80]
E0323 15:02:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:02:43.410696  543705 memory.go:191] Add success.
I0323 15:02:43.409804  543705 cpu.go:282] Add success.
I0323 15:02:43.420544  543705 net.go:648] Add success.
I0323 15:02:43.423188  543705 net.go:770] primary dev: ETH0
I0323 15:02:43.423200  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:02:43.423212  543705 net.go:698] Add success.
I0323 15:02:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:02:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:02:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:02:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:02:53.409804  543705 memory.go:184] no items to output this cycle
I0323 15:02:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:03:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:03:03.409780  543705 memory.go:184] no items to output this cycle
I0323 15:03:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 15:03:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:03:13.409792  543705 memory.go:191] Add success.
I0323 15:03:13.409806  543705 cpu.go:282] Add success.
W0323 15:03:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:03:13.412773  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:03:13.412778  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:03:13.420372  543705 net.go:648] Add success.
I0323 15:03:13.422380  543705 net.go:770] primary dev: ETH0
I0323 15:03:13.422393  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:03:13.422405  543705 net.go:698] Add success.
I0323 15:03:13.467998  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7d668e18-da5b-47e3-90a5-ced4999de515","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:03:13.468033  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:03:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:03:14.455095  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:03:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 15:03:14.455165  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:03:14.456539  543705 disk_worker.go:494] system disk:vda1
I0323 15:03:14.456581  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:03:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:03:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:03:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:03:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:03:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:03:23.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:03:23.409831  543705 memory.go:184] no items to output this cycle
I0323 15:03:23.409846  543705 cpu.go:275] no items to output this cycle
E0323 15:03:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:03:33.409791  543705 memory.go:184] no items to output this cycle
I0323 15:03:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 15:03:40.457039  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:03:40.457047  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:03:40.518347  543705 disk_info.go:125] begin check local disk info of client
I0323 15:03:40.520802  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:03:40.520808  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003b9840 0xc0003b9880]
E0323 15:03:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:03:43.410663  543705 memory.go:191] Add success.
I0323 15:03:43.409796  543705 cpu.go:282] Add success.
I0323 15:03:43.420390  543705 net.go:648] Add success.
I0323 15:03:43.423099  543705 net.go:770] primary dev: ETH0
I0323 15:03:43.423114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:03:43.423129  543705 net.go:698] Add success.
I0323 15:03:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:03:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:03:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:03:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:03:53.409791  543705 memory.go:184] no items to output this cycle
I0323 15:03:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 15:04:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:04:03.409804  543705 memory.go:184] no items to output this cycle
I0323 15:04:03.409822  543705 cpu.go:275] no items to output this cycle
E0323 15:04:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:04:13.409804  543705 memory.go:191] Add success.
I0323 15:04:13.409809  543705 cpu.go:282] Add success.
W0323 15:04:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:04:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:04:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:04:13.420527  543705 net.go:648] Add success.
I0323 15:04:13.423497  543705 net.go:770] primary dev: ETH0
I0323 15:04:13.423510  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:04:13.423522  543705 net.go:698] Add success.
I0323 15:04:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:04:14.455164  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:04:14.455175  543705 disk_worker.go:708] disk space is not compliant
W0323 15:04:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:04:14.456502  543705 disk_worker.go:494] system disk:vda1
I0323 15:04:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:04:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:04:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:04:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:04:16.458077  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:04:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:04:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:04:23.409816  543705 memory.go:184] no items to output this cycle
I0323 15:04:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 15:04:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:04:33.409795  543705 memory.go:184] no items to output this cycle
I0323 15:04:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 15:04:40.521248  543705 disk_info.go:125] begin check local disk info of client
I0323 15:04:40.523865  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:04:40.523873  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002af740 0xc0002af780]
E0323 15:04:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:04:43.410755  543705 memory.go:191] Add success.
I0323 15:04:43.409809  543705 cpu.go:282] Add success.
I0323 15:04:43.420455  543705 net.go:648] Add success.
I0323 15:04:43.423357  543705 net.go:770] primary dev: ETH0
I0323 15:04:43.423371  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:04:43.423383  543705 net.go:698] Add success.
I0323 15:04:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:04:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:04:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:04:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:04:53.409817  543705 memory.go:184] no items to output this cycle
I0323 15:04:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 15:05:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:05:03.409798  543705 memory.go:184] no items to output this cycle
I0323 15:05:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 15:05:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:05:13.409802  543705 memory.go:191] Add success.
I0323 15:05:13.409827  543705 cpu.go:282] Add success.
W0323 15:05:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:05:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:05:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:05:13.420147  543705 net.go:648] Add success.
I0323 15:05:13.423130  543705 net.go:770] primary dev: ETH0
I0323 15:05:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:05:13.423159  543705 net.go:698] Add success.
I0323 15:05:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:05:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:05:14.455169  543705 disk_worker.go:708] disk space is not compliant
W0323 15:05:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:05:14.456486  543705 disk_worker.go:494] system disk:vda1
I0323 15:05:14.456530  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:05:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:05:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:05:16.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:05:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:05:16.472446  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:05:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:05:23.409786  543705 memory.go:184] no items to output this cycle
I0323 15:05:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 15:05:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:05:33.409792  543705 memory.go:184] no items to output this cycle
I0323 15:05:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 15:05:40.523958  543705 disk_info.go:125] begin check local disk info of client
I0323 15:05:40.526546  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:05:40.526552  543705 disk_info.go:196] parse disk info done, disk is : [0xc000344e00 0xc000344e40]
E0323 15:05:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:05:43.410599  543705 memory.go:191] Add success.
I0323 15:05:43.409814  543705 cpu.go:282] Add success.
I0323 15:05:43.420305  543705 net.go:648] Add success.
I0323 15:05:43.423024  543705 net.go:770] primary dev: ETH0
I0323 15:05:43.423037  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:05:43.423049  543705 net.go:698] Add success.
I0323 15:05:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:05:46.458094  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:05:46.458129  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:05:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:05:53.409775  543705 memory.go:184] no items to output this cycle
I0323 15:05:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:06:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:06:03.409798  543705 cpu.go:275] no items to output this cycle
I0323 15:06:03.409800  543705 memory.go:184] no items to output this cycle
E0323 15:06:13.409875  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:06:13.409908  543705 memory.go:191] Add success.
W0323 15:06:13.409945  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:06:13.409949  543705 cpu.go:282] Add success.
W0323 15:06:13.409958  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:06:13.409961  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:06:13.419708  543705 net.go:648] Add success.
I0323 15:06:13.422603  543705 net.go:770] primary dev: ETH0
I0323 15:06:13.422615  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:06:13.422626  543705 net.go:698] Add success.
I0323 15:06:13.602374  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fea3e5de-8faa-4db1-974a-4e688fcfb0ff","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:06:13.602406  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:06:14.453976  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:06:14.454125  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:06:14.454187  543705 disk_worker.go:708] disk space is not compliant
W0323 15:06:14.454190  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:06:14.455549  543705 disk_worker.go:494] system disk:vda1
I0323 15:06:14.455603  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:06:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:06:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:06:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:06:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:06:16.472379  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:06:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:06:23.409784  543705 memory.go:184] no items to output this cycle
I0323 15:06:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 15:06:33.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:06:33.409806  543705 memory.go:184] no items to output this cycle
I0323 15:06:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 15:06:40.458056  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:06:40.458063  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:06:40.527387  543705 disk_info.go:125] begin check local disk info of client
I0323 15:06:40.529912  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:06:40.529918  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b00 0xc0000c4b40]
E0323 15:06:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:06:43.410947  543705 memory.go:191] Add success.
I0323 15:06:43.409827  543705 cpu.go:282] Add success.
I0323 15:06:43.420593  543705 net.go:648] Add success.
I0323 15:06:43.423382  543705 net.go:770] primary dev: ETH0
I0323 15:06:43.423396  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:06:43.423410  543705 net.go:698] Add success.
I0323 15:06:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:06:46.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:06:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:06:53.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:06:53.409767  543705 memory.go:184] no items to output this cycle
I0323 15:06:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 15:07:03.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:07:03.409774  543705 memory.go:184] no items to output this cycle
I0323 15:07:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 15:07:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:07:13.409808  543705 memory.go:191] Add success.
I0323 15:07:13.409816  543705 cpu.go:282] Add success.
W0323 15:07:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:07:13.409855  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:07:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:07:13.419706  543705 net.go:648] Add success.
I0323 15:07:13.422661  543705 net.go:770] primary dev: ETH0
I0323 15:07:13.422674  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:07:13.422685  543705 net.go:698] Add success.
I0323 15:07:13.453239  543705 event_worker.go:152] Polling the log file for events...
W0323 15:07:14.455102  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:07:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 15:07:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:07:14.456775  543705 disk_worker.go:494] system disk:vda1
I0323 15:07:14.456814  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:07:14.457057  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:07:14.457064  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:07:14.457069  543705 custom_config.go:64] query custom config with name: gpu
E0323 15:07:15.456772  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:07:15.456780  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:07:16.457928  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:07:16.457928  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:07:16.457984  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:07:16.458005  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:07:16.472321  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:07:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:07:23.409790  543705 memory.go:184] no items to output this cycle
I0323 15:07:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 15:07:33.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:07:33.409777  543705 memory.go:184] no items to output this cycle
I0323 15:07:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 15:07:40.530003  543705 disk_info.go:125] begin check local disk info of client
I0323 15:07:40.532563  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:07:40.532571  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001abd40 0xc0001abd80]
E0323 15:07:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:07:43.410719  543705 memory.go:191] Add success.
I0323 15:07:43.409820  543705 cpu.go:282] Add success.
I0323 15:07:43.420461  543705 net.go:648] Add success.
I0323 15:07:43.423416  543705 net.go:770] primary dev: ETH0
I0323 15:07:43.423431  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:07:43.423444  543705 net.go:698] Add success.
I0323 15:07:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:07:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:07:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:07:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:07:53.409795  543705 memory.go:184] no items to output this cycle
I0323 15:07:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 15:08:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:08:03.409787  543705 memory.go:184] no items to output this cycle
I0323 15:08:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:08:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:08:13.409814  543705 memory.go:191] Add success.
I0323 15:08:13.409816  543705 cpu.go:282] Add success.
W0323 15:08:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:08:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:08:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:08:13.419720  543705 net.go:648] Add success.
I0323 15:08:13.422908  543705 net.go:770] primary dev: ETH0
I0323 15:08:13.422927  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:08:13.422942  543705 net.go:698] Add success.
I0323 15:08:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:08:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:08:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 15:08:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:08:14.456505  543705 disk_worker.go:494] system disk:vda1
I0323 15:08:14.456548  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:08:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:08:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:08:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:08:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:08:16.472428  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:08:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:08:23.409780  543705 memory.go:184] no items to output this cycle
I0323 15:08:23.409787  543705 cpu.go:275] no items to output this cycle
E0323 15:08:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:08:33.409787  543705 memory.go:184] no items to output this cycle
I0323 15:08:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 15:08:40.533359  543705 disk_info.go:125] begin check local disk info of client
I0323 15:08:40.535912  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:08:40.535919  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4a00 0xc0000c4a40]
E0323 15:08:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:08:43.410737  543705 memory.go:191] Add success.
I0323 15:08:43.409818  543705 cpu.go:282] Add success.
I0323 15:08:43.420490  543705 net.go:648] Add success.
I0323 15:08:43.423121  543705 net.go:770] primary dev: ETH0
I0323 15:08:43.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:08:43.423146  543705 net.go:698] Add success.
I0323 15:08:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:08:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:08:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:08:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:08:53.409800  543705 memory.go:184] no items to output this cycle
I0323 15:08:53.409810  543705 cpu.go:275] no items to output this cycle
E0323 15:09:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:09:03.409785  543705 cpu.go:275] no items to output this cycle
I0323 15:09:03.409795  543705 memory.go:184] no items to output this cycle
E0323 15:09:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:09:13.409883  543705 memory.go:191] Add success.
W0323 15:09:13.409914  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:09:13.409927  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:09:13.409930  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:09:13.409960  543705 cpu.go:282] Add success.
I0323 15:09:13.419727  543705 net.go:648] Add success.
I0323 15:09:13.422672  543705 net.go:770] primary dev: ETH0
I0323 15:09:13.422685  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:09:13.422697  543705 net.go:698] Add success.
I0323 15:09:13.463280  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6d1aa284-05db-4a4b-be64-b7cd75ba46f8","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:09:13.463311  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:09:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:09:14.455106  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:09:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 15:09:14.455169  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:09:14.456477  543705 disk_worker.go:494] system disk:vda1
I0323 15:09:14.456518  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:09:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:09:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:09:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:09:16.458056  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:09:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:09:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:09:23.409777  543705 memory.go:184] no items to output this cycle
I0323 15:09:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 15:09:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:09:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 15:09:33.409798  543705 memory.go:184] no items to output this cycle
I0323 15:09:40.459043  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:09:40.459050  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:09:40.536422  543705 disk_info.go:125] begin check local disk info of client
I0323 15:09:40.538933  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:09:40.538939  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005079c0 0xc000507a00]
E0323 15:09:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:09:43.410744  543705 memory.go:191] Add success.
I0323 15:09:43.409818  543705 cpu.go:282] Add success.
I0323 15:09:43.420523  543705 net.go:648] Add success.
I0323 15:09:43.423070  543705 net.go:770] primary dev: ETH0
I0323 15:09:43.423084  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:09:43.423096  543705 net.go:698] Add success.
I0323 15:09:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:09:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:09:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:09:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:09:53.409798  543705 memory.go:184] no items to output this cycle
I0323 15:09:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:10:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:10:03.409781  543705 memory.go:184] no items to output this cycle
I0323 15:10:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 15:10:13.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:10:13.409781  543705 memory.go:191] Add success.
W0323 15:10:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:10:13.409812  543705 cpu.go:282] Add success.
W0323 15:10:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:10:13.409820  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:10:13.419754  543705 net.go:648] Add success.
I0323 15:10:13.422396  543705 net.go:770] primary dev: ETH0
I0323 15:10:13.422408  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:10:13.422420  543705 net.go:698] Add success.
I0323 15:10:14.454956  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:10:14.455096  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:10:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 15:10:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:10:14.456483  543705 disk_worker.go:494] system disk:vda1
I0323 15:10:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:10:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:10:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:10:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:10:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:10:16.472381  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:10:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:10:23.409818  543705 memory.go:184] no items to output this cycle
I0323 15:10:23.409829  543705 cpu.go:275] no items to output this cycle
E0323 15:10:33.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:10:33.409767  543705 memory.go:184] no items to output this cycle
I0323 15:10:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 15:10:40.539337  543705 disk_info.go:125] begin check local disk info of client
I0323 15:10:40.541890  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:10:40.541896  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4bc0 0xc0003d4c00]
E0323 15:10:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:10:43.410691  543705 memory.go:191] Add success.
I0323 15:10:43.409814  543705 cpu.go:282] Add success.
I0323 15:10:43.420414  543705 net.go:648] Add success.
I0323 15:10:43.423240  543705 net.go:770] primary dev: ETH0
I0323 15:10:43.423254  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:10:43.423267  543705 net.go:698] Add success.
I0323 15:10:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:10:46.458058  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:10:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:10:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:10:53.409774  543705 memory.go:184] no items to output this cycle
I0323 15:10:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 15:11:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:11:03.409791  543705 memory.go:184] no items to output this cycle
I0323 15:11:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 15:11:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:11:13.409794  543705 memory.go:191] Add success.
I0323 15:11:13.409800  543705 cpu.go:282] Add success.
W0323 15:11:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:11:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:11:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:11:13.419733  543705 net.go:648] Add success.
I0323 15:11:13.422613  543705 net.go:770] primary dev: ETH0
I0323 15:11:13.422626  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:11:13.422637  543705 net.go:698] Add success.
I0323 15:11:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:11:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:11:14.455161  543705 disk_worker.go:708] disk space is not compliant
W0323 15:11:14.455164  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:11:14.456476  543705 disk_worker.go:494] system disk:vda1
I0323 15:11:14.456519  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:11:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:11:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:11:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:11:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:11:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:11:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:11:23.409787  543705 memory.go:184] no items to output this cycle
I0323 15:11:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 15:11:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:11:33.409781  543705 memory.go:184] no items to output this cycle
I0323 15:11:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 15:11:40.542351  543705 disk_info.go:125] begin check local disk info of client
I0323 15:11:40.544867  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:11:40.544873  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4ec0 0xc0003d4f00]
E0323 15:11:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:11:43.410786  543705 memory.go:191] Add success.
I0323 15:11:43.409828  543705 cpu.go:282] Add success.
I0323 15:11:43.420678  543705 net.go:648] Add success.
I0323 15:11:43.423554  543705 net.go:770] primary dev: ETH0
I0323 15:11:43.423585  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:11:43.423599  543705 net.go:698] Add success.
I0323 15:11:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:11:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:11:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:11:53.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:11:53.409777  543705 memory.go:184] no items to output this cycle
I0323 15:11:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:12:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:12:03.409801  543705 memory.go:184] no items to output this cycle
I0323 15:12:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 15:12:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:12:13.409905  543705 memory.go:191] Add success.
W0323 15:12:13.409937  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:12:13.409950  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:12:13.409953  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:12:13.409967  543705 cpu.go:282] Add success.
I0323 15:12:13.419760  543705 net.go:648] Add success.
I0323 15:12:13.422800  543705 net.go:770] primary dev: ETH0
I0323 15:12:13.422814  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:12:13.422826  543705 net.go:698] Add success.
I0323 15:12:13.469235  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"937d14ad-483b-49be-9dc9-b2b03b3a000c","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:12:13.469268  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 15:12:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:12:14.455240  543705 disk_worker.go:708] disk space is not compliant
W0323 15:12:14.455244  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:12:14.455914  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:12:14.455924  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:12:14.455929  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:12:14.456859  543705 disk_worker.go:494] system disk:vda1
I0323 15:12:14.456903  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:12:15.456842  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:12:15.456851  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:12:16.457914  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:12:16.457922  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:12:16.457966  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:12:16.457983  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:12:16.472328  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:12:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:12:23.409783  543705 memory.go:184] no items to output this cycle
I0323 15:12:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 15:12:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:12:33.409782  543705 memory.go:184] no items to output this cycle
I0323 15:12:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 15:12:40.460050  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:12:40.460057  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:12:40.545476  543705 disk_info.go:125] begin check local disk info of client
I0323 15:12:40.547986  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:12:40.547992  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0323 15:12:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:12:43.410586  543705 memory.go:191] Add success.
I0323 15:12:43.409797  543705 cpu.go:282] Add success.
I0323 15:12:43.420286  543705 net.go:648] Add success.
I0323 15:12:43.422859  543705 net.go:770] primary dev: ETH0
I0323 15:12:43.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:12:43.422885  543705 net.go:698] Add success.
I0323 15:12:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:12:46.458053  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:12:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:12:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:12:53.409768  543705 memory.go:184] no items to output this cycle
I0323 15:12:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 15:13:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:13:03.409820  543705 memory.go:184] no items to output this cycle
I0323 15:13:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 15:13:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:13:13.409785  543705 memory.go:191] Add success.
I0323 15:13:13.409789  543705 cpu.go:282] Add success.
W0323 15:13:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:13:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:13:13.409950  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:13:13.419723  543705 net.go:648] Add success.
I0323 15:13:13.422430  543705 net.go:770] primary dev: ETH0
I0323 15:13:13.422448  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:13:13.422463  543705 net.go:698] Add success.
I0323 15:13:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:13:14.455100  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:13:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 15:13:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:13:14.456490  543705 disk_worker.go:494] system disk:vda1
I0323 15:13:14.456535  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:13:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:13:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:13:16.458030  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:13:16.458053  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:13:16.472351  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:13:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:13:23.409805  543705 memory.go:184] no items to output this cycle
I0323 15:13:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 15:13:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:13:33.409802  543705 memory.go:184] no items to output this cycle
I0323 15:13:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 15:13:40.548386  543705 disk_info.go:125] begin check local disk info of client
I0323 15:13:40.550953  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:13:40.550960  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003ba4c0 0xc0003ba500]
E0323 15:13:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:13:43.410710  543705 memory.go:191] Add success.
I0323 15:13:43.409786  543705 cpu.go:282] Add success.
I0323 15:13:43.420515  543705 net.go:648] Add success.
I0323 15:13:43.423440  543705 net.go:770] primary dev: ETH0
I0323 15:13:43.423455  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:13:43.423468  543705 net.go:698] Add success.
I0323 15:13:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:13:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:13:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:13:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:13:53.409786  543705 memory.go:184] no items to output this cycle
I0323 15:13:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 15:14:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:14:03.409780  543705 memory.go:184] no items to output this cycle
I0323 15:14:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 15:14:13.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:14:13.409784  543705 memory.go:191] Add success.
I0323 15:14:13.409784  543705 cpu.go:282] Add success.
W0323 15:14:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:14:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:14:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:14:13.420328  543705 net.go:648] Add success.
I0323 15:14:13.423424  543705 net.go:770] primary dev: ETH0
I0323 15:14:13.423439  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:14:13.423452  543705 net.go:698] Add success.
I0323 15:14:14.454967  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:14:14.455185  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:14:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 15:14:14.455199  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:14:14.456623  543705 disk_worker.go:494] system disk:vda1
I0323 15:14:14.456656  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:14:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:14:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:14:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:14:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:14:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:14:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:14:23.409781  543705 memory.go:184] no items to output this cycle
I0323 15:14:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 15:14:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:14:33.409791  543705 memory.go:184] no items to output this cycle
I0323 15:14:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 15:14:40.551403  543705 disk_info.go:125] begin check local disk info of client
I0323 15:14:40.553993  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:14:40.554000  543705 disk_info.go:196] parse disk info done, disk is : [0xc00032f840 0xc00032f880]
E0323 15:14:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:14:43.410676  543705 memory.go:191] Add success.
I0323 15:14:43.409822  543705 cpu.go:282] Add success.
I0323 15:14:43.420445  543705 net.go:648] Add success.
I0323 15:14:43.423100  543705 net.go:770] primary dev: ETH0
I0323 15:14:43.423115  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:14:43.423128  543705 net.go:698] Add success.
I0323 15:14:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:14:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:14:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:14:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:14:53.409778  543705 memory.go:184] no items to output this cycle
I0323 15:14:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 15:15:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:15:03.409801  543705 memory.go:184] no items to output this cycle
I0323 15:15:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 15:15:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:15:13.409811  543705 memory.go:191] Add success.
I0323 15:15:13.409819  543705 cpu.go:282] Add success.
W0323 15:15:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:15:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:15:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:15:13.420166  543705 net.go:648] Add success.
I0323 15:15:13.423109  543705 net.go:770] primary dev: ETH0
I0323 15:15:13.423147  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:15:13.423160  543705 net.go:698] Add success.
I0323 15:15:13.571787  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"a6c3fe1a-3a25-4c6e-8f20-f697f2c9de27","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:15:13.571818  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:15:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:15:14.455157  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:15:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 15:15:14.455170  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:15:14.456532  543705 disk_worker.go:494] system disk:vda1
I0323 15:15:14.456595  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:15:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:15:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:15:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:15:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:15:16.472404  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:15:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:15:23.409785  543705 memory.go:184] no items to output this cycle
I0323 15:15:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 15:15:33.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:15:33.409777  543705 memory.go:184] no items to output this cycle
I0323 15:15:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 15:15:40.461057  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:15:40.461063  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:15:40.554479  543705 disk_info.go:125] begin check local disk info of client
I0323 15:15:40.556983  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:15:40.556988  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004912c0 0xc000491300]
E0323 15:15:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:15:43.410696  543705 memory.go:191] Add success.
I0323 15:15:43.409803  543705 cpu.go:282] Add success.
I0323 15:15:43.420429  543705 net.go:648] Add success.
I0323 15:15:43.423520  543705 net.go:770] primary dev: ETH0
I0323 15:15:43.423533  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:15:43.423547  543705 net.go:698] Add success.
I0323 15:15:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:15:46.458035  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:15:46.458058  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:15:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:15:53.409778  543705 memory.go:184] no items to output this cycle
I0323 15:15:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 15:16:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:16:03.409774  543705 memory.go:184] no items to output this cycle
I0323 15:16:03.409799  543705 cpu.go:275] no items to output this cycle
E0323 15:16:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:16:13.409809  543705 memory.go:191] Add success.
I0323 15:16:13.409824  543705 cpu.go:282] Add success.
W0323 15:16:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:16:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:16:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:16:13.420172  543705 net.go:648] Add success.
I0323 15:16:13.423455  543705 net.go:770] primary dev: ETH0
I0323 15:16:13.423468  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:16:13.423482  543705 net.go:698] Add success.
I0323 15:16:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:16:14.455289  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:16:14.455306  543705 disk_worker.go:708] disk space is not compliant
W0323 15:16:14.455310  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:16:14.457010  543705 disk_worker.go:494] system disk:vda1
I0323 15:16:14.457039  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:16:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:16:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:16:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:16:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:16:16.472371  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:16:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:16:23.409802  543705 memory.go:184] no items to output this cycle
I0323 15:16:23.409812  543705 cpu.go:275] no items to output this cycle
E0323 15:16:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:16:33.409806  543705 memory.go:184] no items to output this cycle
I0323 15:16:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 15:16:40.557433  543705 disk_info.go:125] begin check local disk info of client
I0323 15:16:40.560034  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:16:40.560041  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005175c0 0xc000517600]
E0323 15:16:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:16:43.410693  543705 memory.go:191] Add success.
I0323 15:16:43.409785  543705 cpu.go:282] Add success.
I0323 15:16:43.420410  543705 net.go:648] Add success.
I0323 15:16:43.423027  543705 net.go:770] primary dev: ETH0
I0323 15:16:43.423040  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:16:43.423054  543705 net.go:698] Add success.
I0323 15:16:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:16:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:16:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:16:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:16:53.409773  543705 memory.go:184] no items to output this cycle
I0323 15:16:53.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:17:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:17:03.409782  543705 memory.go:184] no items to output this cycle
I0323 15:17:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 15:17:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:17:13.409815  543705 memory.go:191] Add success.
I0323 15:17:13.409822  543705 cpu.go:282] Add success.
W0323 15:17:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:17:13.409865  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:17:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:17:13.420279  543705 net.go:648] Add success.
I0323 15:17:13.422994  543705 net.go:770] primary dev: ETH0
I0323 15:17:13.423010  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:17:13.423029  543705 net.go:698] Add success.
I0323 15:17:13.453566  543705 event_worker.go:152] Polling the log file for events...
W0323 15:17:14.455097  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:17:14.455158  543705 disk_worker.go:708] disk space is not compliant
W0323 15:17:14.455161  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:17:14.456132  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:17:14.456142  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:17:14.456149  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:17:14.456470  543705 disk_worker.go:494] system disk:vda1
I0323 15:17:14.456499  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:17:15.456889  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:17:15.456898  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:17:16.457924  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:17:16.457924  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:17:16.457977  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:17:16.457997  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:17:16.472329  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:17:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:17:23.409794  543705 memory.go:184] no items to output this cycle
I0323 15:17:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 15:17:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:17:33.409784  543705 memory.go:184] no items to output this cycle
I0323 15:17:33.409786  543705 cpu.go:275] no items to output this cycle
I0323 15:17:40.560123  543705 disk_info.go:125] begin check local disk info of client
I0323 15:17:40.562713  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:17:40.562720  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007afc0 0xc00007b000]
E0323 15:17:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:17:43.410739  543705 memory.go:191] Add success.
I0323 15:17:43.409813  543705 cpu.go:282] Add success.
I0323 15:17:43.420435  543705 net.go:648] Add success.
I0323 15:17:43.423466  543705 net.go:770] primary dev: ETH0
I0323 15:17:43.423480  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:17:43.423506  543705 net.go:698] Add success.
I0323 15:17:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:17:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:17:46.458080  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:17:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:17:53.409792  543705 memory.go:184] no items to output this cycle
I0323 15:17:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 15:18:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:18:03.409776  543705 memory.go:184] no items to output this cycle
I0323 15:18:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:18:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:18:13.409826  543705 memory.go:191] Add success.
I0323 15:18:13.409829  543705 cpu.go:282] Add success.
W0323 15:18:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:18:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:18:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:18:13.420164  543705 net.go:648] Add success.
I0323 15:18:13.422828  543705 net.go:770] primary dev: ETH0
I0323 15:18:13.422841  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:18:13.422854  543705 net.go:698] Add success.
I0323 15:18:13.921909  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"b6866d48-2520-42fb-8ce7-f0333d19d210","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:18:13.921944  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:18:14.454629  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:18:14.454871  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:18:14.454880  543705 disk_worker.go:708] disk space is not compliant
W0323 15:18:14.454883  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:18:14.456406  543705 disk_worker.go:494] system disk:vda1
I0323 15:18:14.456437  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:18:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:18:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:18:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:18:16.458190  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:18:16.472089  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:18:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:18:23.409781  543705 memory.go:184] no items to output this cycle
I0323 15:18:23.409785  543705 cpu.go:275] no items to output this cycle
E0323 15:18:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:18:33.409781  543705 memory.go:184] no items to output this cycle
I0323 15:18:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 15:18:40.462061  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:18:40.462068  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:18:40.563557  543705 disk_info.go:125] begin check local disk info of client
I0323 15:18:40.566163  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:18:40.566169  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371e40 0xc000371e80]
E0323 15:18:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:18:43.410701  543705 memory.go:191] Add success.
I0323 15:18:43.409816  543705 cpu.go:282] Add success.
I0323 15:18:43.420411  543705 net.go:648] Add success.
I0323 15:18:43.423296  543705 net.go:770] primary dev: ETH0
I0323 15:18:43.423312  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:18:43.423327  543705 net.go:698] Add success.
I0323 15:18:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:18:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:18:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:18:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:18:53.409787  543705 memory.go:184] no items to output this cycle
I0323 15:18:53.409786  543705 cpu.go:275] no items to output this cycle
E0323 15:19:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:19:03.409790  543705 memory.go:184] no items to output this cycle
I0323 15:19:03.409793  543705 cpu.go:275] no items to output this cycle
E0323 15:19:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:19:13.409794  543705 memory.go:191] Add success.
I0323 15:19:13.409795  543705 cpu.go:282] Add success.
W0323 15:19:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:19:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:19:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:19:13.420081  543705 net.go:648] Add success.
I0323 15:19:13.422859  543705 net.go:770] primary dev: ETH0
I0323 15:19:13.422873  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:19:13.422897  543705 net.go:698] Add success.
I0323 15:19:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:19:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:19:14.455167  543705 disk_worker.go:708] disk space is not compliant
W0323 15:19:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:19:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 15:19:14.456550  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:19:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:19:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:19:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:19:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:19:16.472421  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:19:23.409876  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:19:23.409908  543705 memory.go:184] no items to output this cycle
I0323 15:19:23.409943  543705 cpu.go:275] no items to output this cycle
E0323 15:19:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:19:33.409772  543705 memory.go:184] no items to output this cycle
I0323 15:19:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 15:19:40.566253  543705 disk_info.go:125] begin check local disk info of client
I0323 15:19:40.568840  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:19:40.568846  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003cf400 0xc0003cf440]
E0323 15:19:43.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:19:43.410751  543705 memory.go:191] Add success.
I0323 15:19:43.409818  543705 cpu.go:282] Add success.
I0323 15:19:43.420549  543705 net.go:648] Add success.
I0323 15:19:43.423029  543705 net.go:770] primary dev: ETH0
I0323 15:19:43.423041  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:19:43.423054  543705 net.go:698] Add success.
I0323 15:19:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:19:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:19:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:19:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:19:53.409803  543705 memory.go:184] no items to output this cycle
I0323 15:19:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 15:20:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:20:03.409803  543705 memory.go:184] no items to output this cycle
I0323 15:20:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 15:20:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:20:13.409802  543705 memory.go:191] Add success.
I0323 15:20:13.409827  543705 cpu.go:282] Add success.
W0323 15:20:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:20:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:20:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:20:13.420203  543705 net.go:648] Add success.
I0323 15:20:13.423179  543705 net.go:770] primary dev: ETH0
I0323 15:20:13.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:20:13.423202  543705 net.go:698] Add success.
I0323 15:20:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:20:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:20:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 15:20:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:20:14.456507  543705 disk_worker.go:494] system disk:vda1
I0323 15:20:14.456551  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:20:15.455958  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:20:16.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:20:16.458028  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:20:16.458049  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:20:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:20:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:20:23.409767  543705 memory.go:184] no items to output this cycle
I0323 15:20:23.409795  543705 cpu.go:275] no items to output this cycle
E0323 15:20:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:20:33.409815  543705 memory.go:184] no items to output this cycle
I0323 15:20:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 15:20:40.568931  543705 disk_info.go:125] begin check local disk info of client
I0323 15:20:40.571507  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:20:40.571514  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 15:20:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:20:43.410766  543705 memory.go:191] Add success.
I0323 15:20:43.409810  543705 cpu.go:282] Add success.
I0323 15:20:43.420460  543705 net.go:648] Add success.
I0323 15:20:43.423302  543705 net.go:770] primary dev: ETH0
I0323 15:20:43.423315  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:20:43.423327  543705 net.go:698] Add success.
I0323 15:20:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:20:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:20:46.458082  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:20:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:20:53.409793  543705 memory.go:184] no items to output this cycle
I0323 15:20:53.409806  543705 cpu.go:275] no items to output this cycle
E0323 15:21:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:21:03.409788  543705 memory.go:184] no items to output this cycle
I0323 15:21:03.409791  543705 cpu.go:275] no items to output this cycle
E0323 15:21:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:21:13.409816  543705 memory.go:191] Add success.
I0323 15:21:13.409828  543705 cpu.go:282] Add success.
W0323 15:21:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:21:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:21:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:21:13.420161  543705 net.go:648] Add success.
I0323 15:21:13.422827  543705 net.go:770] primary dev: ETH0
I0323 15:21:13.422840  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:21:13.422853  543705 net.go:698] Add success.
I0323 15:21:13.468827  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ce781e3d-854d-4298-a939-18c6f0dfaf0b","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:21:13.468860  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:21:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:21:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:21:14.455222  543705 disk_worker.go:708] disk space is not compliant
W0323 15:21:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:21:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 15:21:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:21:15.455954  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:21:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:21:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:21:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:21:16.472418  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:21:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:21:23.409788  543705 memory.go:184] no items to output this cycle
I0323 15:21:23.409796  543705 cpu.go:275] no items to output this cycle
E0323 15:21:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:21:33.409793  543705 memory.go:184] no items to output this cycle
I0323 15:21:33.409797  543705 cpu.go:275] no items to output this cycle
I0323 15:21:40.463059  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:21:40.463065  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:21:40.571550  543705 disk_info.go:125] begin check local disk info of client
I0323 15:21:40.574131  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:21:40.574136  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b0bc0 0xc0002b0c00]
E0323 15:21:43.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:21:43.410678  543705 memory.go:191] Add success.
I0323 15:21:43.409806  543705 cpu.go:282] Add success.
I0323 15:21:43.420341  543705 net.go:648] Add success.
I0323 15:21:43.423091  543705 net.go:770] primary dev: ETH0
I0323 15:21:43.423103  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:21:43.423117  543705 net.go:698] Add success.
I0323 15:21:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:21:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:21:46.458072  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:21:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:21:53.409771  543705 memory.go:184] no items to output this cycle
I0323 15:21:53.409789  543705 cpu.go:275] no items to output this cycle
E0323 15:22:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:22:03.409782  543705 memory.go:184] no items to output this cycle
I0323 15:22:03.409789  543705 cpu.go:275] no items to output this cycle
E0323 15:22:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:22:13.409781  543705 memory.go:191] Add success.
I0323 15:22:13.409800  543705 cpu.go:282] Add success.
W0323 15:22:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:22:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:22:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:22:13.420049  543705 net.go:648] Add success.
I0323 15:22:13.422891  543705 net.go:770] primary dev: ETH0
I0323 15:22:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:22:13.422916  543705 net.go:698] Add success.
W0323 15:22:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:22:14.455178  543705 disk_worker.go:708] disk space is not compliant
W0323 15:22:14.455181  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:22:14.456930  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:22:14.456939  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:22:14.456945  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:22:14.457017  543705 disk_worker.go:494] system disk:vda1
I0323 15:22:14.457060  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:22:15.456835  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:22:15.456844  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:22:16.457926  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:22:16.457925  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:22:16.457978  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:22:16.457998  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:22:16.472311  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:22:23.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:22:23.409914  543705 memory.go:184] no items to output this cycle
I0323 15:22:23.409932  543705 cpu.go:275] no items to output this cycle
E0323 15:22:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:22:33.409809  543705 memory.go:184] no items to output this cycle
I0323 15:22:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 15:22:40.574226  543705 disk_info.go:125] begin check local disk info of client
I0323 15:22:40.576784  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:22:40.576790  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 15:22:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:22:43.410817  543705 memory.go:191] Add success.
I0323 15:22:43.409812  543705 cpu.go:282] Add success.
I0323 15:22:43.420499  543705 net.go:648] Add success.
I0323 15:22:43.423300  543705 net.go:770] primary dev: ETH0
I0323 15:22:43.423313  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:22:43.423325  543705 net.go:698] Add success.
I0323 15:22:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:22:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:22:46.458077  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:22:53.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:22:53.409795  543705 memory.go:184] no items to output this cycle
I0323 15:22:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 15:23:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:23:03.409807  543705 memory.go:184] no items to output this cycle
I0323 15:23:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 15:23:13.409752  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:23:13.409778  543705 memory.go:191] Add success.
W0323 15:23:13.409803  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:23:13.409803  543705 cpu.go:282] Add success.
W0323 15:23:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:23:13.409817  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:23:13.420323  543705 net.go:648] Add success.
I0323 15:23:13.422862  543705 net.go:770] primary dev: ETH0
I0323 15:23:13.422881  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:23:13.422897  543705 net.go:698] Add success.
I0323 15:23:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:23:14.455176  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:23:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 15:23:14.455189  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:23:14.456587  543705 disk_worker.go:494] system disk:vda1
I0323 15:23:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:23:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:23:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:23:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:23:16.458062  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:23:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:23:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:23:23.409801  543705 memory.go:184] no items to output this cycle
I0323 15:23:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:23:33.409886  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:23:33.409909  543705 memory.go:184] no items to output this cycle
I0323 15:23:33.409972  543705 cpu.go:275] no items to output this cycle
I0323 15:23:40.577524  543705 disk_info.go:125] begin check local disk info of client
I0323 15:23:40.580096  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:23:40.580103  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003e9cc0 0xc0003e9d00]
E0323 15:23:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:23:43.410714  543705 memory.go:191] Add success.
I0323 15:23:43.409800  543705 cpu.go:282] Add success.
I0323 15:23:43.420489  543705 net.go:648] Add success.
I0323 15:23:43.423574  543705 net.go:770] primary dev: ETH0
I0323 15:23:43.423586  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:23:43.423599  543705 net.go:698] Add success.
I0323 15:23:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:23:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:23:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:23:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:23:53.409774  543705 memory.go:184] no items to output this cycle
I0323 15:23:53.409782  543705 cpu.go:275] no items to output this cycle
E0323 15:24:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:24:03.409776  543705 memory.go:184] no items to output this cycle
I0323 15:24:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 15:24:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:24:13.409812  543705 memory.go:191] Add success.
I0323 15:24:13.409816  543705 cpu.go:282] Add success.
W0323 15:24:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:24:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:24:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:24:13.420061  543705 net.go:648] Add success.
I0323 15:24:13.422986  543705 net.go:770] primary dev: ETH0
I0323 15:24:13.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:24:13.423010  543705 net.go:698] Add success.
I0323 15:24:13.463890  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"d46998bf-04f7-450f-8fb6-7916431d126d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:24:13.463921  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:24:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:24:14.455153  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:24:14.455164  543705 disk_worker.go:708] disk space is not compliant
W0323 15:24:14.455167  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:24:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 15:24:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:24:15.455615  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:24:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:24:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:24:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:24:16.472398  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:24:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:24:23.409788  543705 memory.go:184] no items to output this cycle
I0323 15:24:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 15:24:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:24:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 15:24:33.409785  543705 memory.go:184] no items to output this cycle
I0323 15:24:40.464078  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:24:40.464084  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:24:40.580603  543705 disk_info.go:125] begin check local disk info of client
I0323 15:24:40.583244  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:24:40.583251  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 15:24:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:24:43.410732  543705 memory.go:191] Add success.
I0323 15:24:43.409789  543705 cpu.go:282] Add success.
I0323 15:24:43.420461  543705 net.go:648] Add success.
I0323 15:24:43.423519  543705 net.go:770] primary dev: ETH0
I0323 15:24:43.423535  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:24:43.423549  543705 net.go:698] Add success.
I0323 15:24:46.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:24:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:24:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:24:53.410233  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:24:53.410249  543705 memory.go:184] no items to output this cycle
I0323 15:24:53.410273  543705 cpu.go:275] no items to output this cycle
E0323 15:25:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:25:03.409785  543705 cpu.go:275] no items to output this cycle
I0323 15:25:03.409786  543705 memory.go:184] no items to output this cycle
E0323 15:25:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:25:13.409788  543705 memory.go:191] Add success.
I0323 15:25:13.409802  543705 cpu.go:282] Add success.
W0323 15:25:13.409814  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:25:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:25:13.409828  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:25:13.420116  543705 net.go:648] Add success.
I0323 15:25:13.422916  543705 net.go:770] primary dev: ETH0
I0323 15:25:13.422930  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:25:13.422946  543705 net.go:698] Add success.
I0323 15:25:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:25:14.455121  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:25:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 15:25:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:25:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 15:25:14.456612  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:25:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:25:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:25:16.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:25:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:25:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:25:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:25:23.409914  543705 cpu.go:275] no items to output this cycle
I0323 15:25:23.409918  543705 memory.go:184] no items to output this cycle
E0323 15:25:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:25:33.409797  543705 memory.go:184] no items to output this cycle
I0323 15:25:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 15:25:40.583335  543705 disk_info.go:125] begin check local disk info of client
I0323 15:25:40.585943  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:25:40.585949  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004710c0 0xc000471100]
E0323 15:25:43.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:25:43.410691  543705 memory.go:191] Add success.
I0323 15:25:43.409808  543705 cpu.go:282] Add success.
I0323 15:25:43.420389  543705 net.go:648] Add success.
I0323 15:25:43.423240  543705 net.go:770] primary dev: ETH0
I0323 15:25:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:25:43.423264  543705 net.go:698] Add success.
I0323 15:25:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:25:46.458032  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:25:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:25:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:25:53.409775  543705 memory.go:184] no items to output this cycle
I0323 15:25:53.409799  543705 cpu.go:275] no items to output this cycle
E0323 15:26:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:26:03.409777  543705 memory.go:184] no items to output this cycle
I0323 15:26:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:26:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:26:13.409796  543705 memory.go:191] Add success.
I0323 15:26:13.409797  543705 cpu.go:282] Add success.
W0323 15:26:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:26:13.409837  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:26:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:26:13.420090  543705 net.go:648] Add success.
I0323 15:26:13.422894  543705 net.go:770] primary dev: ETH0
I0323 15:26:13.422907  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:26:13.422920  543705 net.go:698] Add success.
I0323 15:26:14.454961  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:26:14.455092  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:26:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 15:26:14.455180  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:26:14.456577  543705 disk_worker.go:494] system disk:vda1
I0323 15:26:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:26:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:26:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:26:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:26:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:26:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:26:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:26:23.409803  543705 memory.go:184] no items to output this cycle
I0323 15:26:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 15:26:33.409873  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:26:33.409908  543705 memory.go:184] no items to output this cycle
I0323 15:26:33.410009  543705 cpu.go:275] no items to output this cycle
I0323 15:26:40.586573  543705 disk_info.go:125] begin check local disk info of client
I0323 15:26:40.589110  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:26:40.589117  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536000 0xc000536040]
E0323 15:26:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:26:43.410645  543705 memory.go:191] Add success.
I0323 15:26:43.409813  543705 cpu.go:282] Add success.
I0323 15:26:43.420344  543705 net.go:648] Add success.
I0323 15:26:43.422874  543705 net.go:770] primary dev: ETH0
I0323 15:26:43.422887  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:26:43.422900  543705 net.go:698] Add success.
I0323 15:26:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:26:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:26:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:26:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:26:53.409775  543705 cpu.go:275] no items to output this cycle
I0323 15:26:53.409779  543705 memory.go:184] no items to output this cycle
E0323 15:27:03.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:27:03.409775  543705 memory.go:184] no items to output this cycle
I0323 15:27:03.409795  543705 cpu.go:275] no items to output this cycle
E0323 15:27:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:27:13.409775  543705 memory.go:191] Add success.
W0323 15:27:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:27:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:27:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:27:13.409831  543705 cpu.go:282] Add success.
I0323 15:27:13.420224  543705 net.go:648] Add success.
I0323 15:27:13.423021  543705 net.go:770] primary dev: ETH0
I0323 15:27:13.423033  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:27:13.423044  543705 net.go:698] Add success.
I0323 15:27:13.429167  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 15:27:13.453339  543705 event_worker.go:152] Polling the log file for events...
I0323 15:27:13.468274  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7161e60b-1bd4-4a6c-afeb-d6cbb398f38f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:27:13.468308  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 15:27:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:27:14.455177  543705 disk_worker.go:708] disk space is not compliant
W0323 15:27:14.455179  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:27:14.455912  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:27:14.455921  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:27:14.455926  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:27:14.456563  543705 disk_worker.go:494] system disk:vda1
I0323 15:27:14.456592  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:27:15.456788  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:27:15.456797  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 15:27:16.457947  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:27:16.457948  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:27:16.458005  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:27:16.458024  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:27:16.472335  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:27:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:27:23.409800  543705 memory.go:184] no items to output this cycle
I0323 15:27:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 15:27:33.409887  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:27:33.409912  543705 cpu.go:275] no items to output this cycle
I0323 15:27:33.409916  543705 memory.go:184] no items to output this cycle
I0323 15:27:40.465076  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:27:40.465082  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:27:40.589198  543705 disk_info.go:125] begin check local disk info of client
I0323 15:27:40.591780  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:27:40.591788  543705 disk_info.go:196] parse disk info done, disk is : [0xc000328280 0xc0003282c0]
E0323 15:27:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:27:43.410635  543705 memory.go:191] Add success.
I0323 15:27:43.409792  543705 cpu.go:282] Add success.
I0323 15:27:43.420339  543705 net.go:648] Add success.
I0323 15:27:43.422978  543705 net.go:770] primary dev: ETH0
I0323 15:27:43.422999  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:27:43.423013  543705 net.go:698] Add success.
I0323 15:27:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:27:46.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:27:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:27:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:27:53.409795  543705 memory.go:184] no items to output this cycle
I0323 15:27:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 15:28:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:28:03.409777  543705 memory.go:184] no items to output this cycle
I0323 15:28:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 15:28:13.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:28:13.409776  543705 memory.go:191] Add success.
W0323 15:28:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:28:13.409801  543705 cpu.go:282] Add success.
W0323 15:28:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:28:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:28:13.420186  543705 net.go:648] Add success.
I0323 15:28:13.423176  543705 net.go:770] primary dev: ETH0
I0323 15:28:13.423191  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:28:13.423204  543705 net.go:698] Add success.
I0323 15:28:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:28:14.455194  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:28:14.455205  543705 disk_worker.go:708] disk space is not compliant
W0323 15:28:14.455208  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:28:14.456583  543705 disk_worker.go:494] system disk:vda1
I0323 15:28:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:28:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:28:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:28:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:28:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:28:16.472427  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:28:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:28:23.409778  543705 memory.go:184] no items to output this cycle
I0323 15:28:23.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:28:33.409869  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:28:33.409875  543705 cpu.go:275] no items to output this cycle
I0323 15:28:33.409892  543705 memory.go:184] no items to output this cycle
I0323 15:28:40.592595  543705 disk_info.go:125] begin check local disk info of client
I0323 15:28:40.595195  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:28:40.595202  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498f40 0xc000498f80]
E0323 15:28:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:28:43.410703  543705 memory.go:191] Add success.
I0323 15:28:43.409787  543705 cpu.go:282] Add success.
I0323 15:28:43.420439  543705 net.go:648] Add success.
I0323 15:28:43.423259  543705 net.go:770] primary dev: ETH0
I0323 15:28:43.423273  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:28:43.423287  543705 net.go:698] Add success.
I0323 15:28:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:28:46.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:28:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:28:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:28:53.409774  543705 memory.go:184] no items to output this cycle
I0323 15:28:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 15:29:03.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:29:03.409779  543705 memory.go:184] no items to output this cycle
I0323 15:29:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:29:13.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:29:13.409815  543705 memory.go:191] Add success.
I0323 15:29:13.409827  543705 cpu.go:282] Add success.
W0323 15:29:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:29:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:29:13.409868  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:29:13.420155  543705 net.go:648] Add success.
I0323 15:29:13.422983  543705 net.go:770] primary dev: ETH0
I0323 15:29:13.422996  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:29:13.423008  543705 net.go:698] Add success.
I0323 15:29:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:29:14.455197  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:29:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 15:29:14.455209  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:29:14.456614  543705 disk_worker.go:494] system disk:vda1
I0323 15:29:14.456644  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:29:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:29:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:29:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:29:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:29:16.472457  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:29:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:29:23.409777  543705 memory.go:184] no items to output this cycle
I0323 15:29:23.409906  543705 cpu.go:275] no items to output this cycle
E0323 15:29:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:29:33.409824  543705 memory.go:184] no items to output this cycle
I0323 15:29:33.409836  543705 cpu.go:275] no items to output this cycle
I0323 15:29:40.595618  543705 disk_info.go:125] begin check local disk info of client
I0323 15:29:40.598254  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:29:40.598260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4540 0xc0000c4580]
E0323 15:29:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:29:43.410665  543705 memory.go:191] Add success.
I0323 15:29:43.409824  543705 cpu.go:282] Add success.
I0323 15:29:43.420370  543705 net.go:648] Add success.
I0323 15:29:43.422954  543705 net.go:770] primary dev: ETH0
I0323 15:29:43.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:29:43.422980  543705 net.go:698] Add success.
I0323 15:29:46.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:29:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:29:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:29:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:29:53.409786  543705 cpu.go:275] no items to output this cycle
I0323 15:29:53.409790  543705 memory.go:184] no items to output this cycle
E0323 15:30:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:30:03.409816  543705 memory.go:184] no items to output this cycle
I0323 15:30:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 15:30:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:30:13.409799  543705 memory.go:191] Add success.
I0323 15:30:13.409817  543705 cpu.go:282] Add success.
W0323 15:30:13.409826  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:30:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:30:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:30:13.420158  543705 net.go:648] Add success.
I0323 15:30:13.422704  543705 net.go:770] primary dev: ETH0
I0323 15:30:13.422718  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:30:13.422730  543705 net.go:698] Add success.
I0323 15:30:13.611828  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"4aacabc8-09a2-4157-a3a1-a4b8715bd5cf","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:30:13.611868  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:30:14.453960  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:30:14.455165  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:30:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 15:30:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:30:14.456602  543705 disk_worker.go:494] system disk:vda1
I0323 15:30:14.456649  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:30:15.455951  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:30:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:30:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:30:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:30:16.472406  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:30:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:30:23.409819  543705 memory.go:184] no items to output this cycle
I0323 15:30:23.409833  543705 cpu.go:275] no items to output this cycle
E0323 15:30:33.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:30:33.409790  543705 memory.go:184] no items to output this cycle
I0323 15:30:33.409805  543705 cpu.go:275] no items to output this cycle
I0323 15:30:40.466078  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:30:40.466084  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:30:40.598340  543705 disk_info.go:125] begin check local disk info of client
I0323 15:30:40.600914  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:30:40.600920  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2140 0xc0004f2180]
E0323 15:30:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:30:43.410636  543705 memory.go:191] Add success.
I0323 15:30:43.409847  543705 cpu.go:282] Add success.
I0323 15:30:43.420410  543705 net.go:648] Add success.
I0323 15:30:43.423059  543705 net.go:770] primary dev: ETH0
I0323 15:30:43.423074  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:30:43.423088  543705 net.go:698] Add success.
I0323 15:30:46.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:30:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:30:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:30:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:30:53.409767  543705 memory.go:184] no items to output this cycle
I0323 15:30:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 15:31:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:31:03.409785  543705 memory.go:184] no items to output this cycle
I0323 15:31:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 15:31:13.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:31:13.409817  543705 cpu.go:282] Add success.
I0323 15:31:13.409846  543705 memory.go:191] Add success.
W0323 15:31:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:31:13.409902  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:31:13.409906  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:31:13.420377  543705 net.go:648] Add success.
I0323 15:31:13.423200  543705 net.go:770] primary dev: ETH0
I0323 15:31:13.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:31:13.423226  543705 net.go:698] Add success.
I0323 15:31:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:31:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:31:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 15:31:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:31:14.456582  543705 disk_worker.go:494] system disk:vda1
I0323 15:31:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:31:15.456022  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:31:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:31:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:31:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:31:16.472411  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:31:23.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:31:23.409763  543705 memory.go:184] no items to output this cycle
I0323 15:31:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 15:31:33.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:31:33.409811  543705 memory.go:184] no items to output this cycle
I0323 15:31:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 15:31:40.601641  543705 disk_info.go:125] begin check local disk info of client
I0323 15:31:40.604213  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:31:40.604219  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2180 0xc0004f21c0]
E0323 15:31:43.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:31:43.410828  543705 memory.go:191] Add success.
I0323 15:31:43.409805  543705 cpu.go:282] Add success.
I0323 15:31:43.420515  543705 net.go:648] Add success.
I0323 15:31:43.423447  543705 net.go:770] primary dev: ETH0
I0323 15:31:43.423461  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:31:43.423473  543705 net.go:698] Add success.
I0323 15:31:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:31:46.458048  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:31:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:31:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:31:53.409776  543705 cpu.go:275] no items to output this cycle
I0323 15:31:53.409778  543705 memory.go:184] no items to output this cycle
E0323 15:32:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:32:03.409804  543705 memory.go:184] no items to output this cycle
I0323 15:32:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:32:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:32:13.409793  543705 cpu.go:282] Add success.
I0323 15:32:13.409795  543705 memory.go:191] Add success.
W0323 15:32:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:32:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:32:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:32:13.420050  543705 net.go:648] Add success.
I0323 15:32:13.422885  543705 net.go:770] primary dev: ETH0
I0323 15:32:13.422899  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:32:13.422911  543705 net.go:698] Add success.
W0323 15:32:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:32:14.455196  543705 disk_worker.go:708] disk space is not compliant
W0323 15:32:14.455198  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:32:14.455899  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:32:14.455907  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:32:14.455913  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:32:14.456580  543705 disk_worker.go:494] system disk:vda1
I0323 15:32:14.456609  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:32:15.456814  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:32:15.456824  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:32:16.457921  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:32:16.457921  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:32:16.457975  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:32:16.457995  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:32:16.472324  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:32:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:32:23.409786  543705 memory.go:184] no items to output this cycle
I0323 15:32:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 15:32:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:32:33.409800  543705 memory.go:184] no items to output this cycle
I0323 15:32:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 15:32:40.604304  543705 disk_info.go:125] begin check local disk info of client
I0323 15:32:40.606860  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:32:40.606866  543705 disk_info.go:196] parse disk info done, disk is : [0xc000494a80 0xc000494ac0]
E0323 15:32:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:32:43.410650  543705 memory.go:191] Add success.
I0323 15:32:43.409795  543705 cpu.go:282] Add success.
I0323 15:32:43.420326  543705 net.go:648] Add success.
I0323 15:32:43.423097  543705 net.go:770] primary dev: ETH0
I0323 15:32:43.423111  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:32:43.423125  543705 net.go:698] Add success.
I0323 15:32:46.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:32:46.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:32:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:32:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:32:53.409777  543705 cpu.go:275] no items to output this cycle
I0323 15:32:53.409779  543705 memory.go:184] no items to output this cycle
E0323 15:33:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:33:03.409773  543705 memory.go:184] no items to output this cycle
I0323 15:33:03.409801  543705 cpu.go:275] no items to output this cycle
E0323 15:33:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:33:13.409793  543705 memory.go:191] Add success.
I0323 15:33:13.409818  543705 cpu.go:282] Add success.
W0323 15:33:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:33:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:33:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:33:13.420292  543705 net.go:648] Add success.
I0323 15:33:13.423156  543705 net.go:770] primary dev: ETH0
I0323 15:33:13.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:33:13.423186  543705 net.go:698] Add success.
I0323 15:33:13.602746  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"da1fb4e9-ab1f-458d-8208-b789d430010f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:33:13.602780  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:33:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:33:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:33:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 15:33:14.455220  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:33:14.456611  543705 disk_worker.go:494] system disk:vda1
I0323 15:33:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:33:15.455969  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:33:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:33:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:33:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:33:16.472439  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:33:23.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:33:23.409785  543705 memory.go:184] no items to output this cycle
I0323 15:33:23.409788  543705 cpu.go:275] no items to output this cycle
E0323 15:33:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:33:33.409805  543705 memory.go:184] no items to output this cycle
I0323 15:33:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 15:33:40.467093  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:33:40.467100  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:33:40.607785  543705 disk_info.go:125] begin check local disk info of client
I0323 15:33:40.610362  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:33:40.610368  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003d4e00 0xc0003d4e40]
E0323 15:33:43.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:33:43.410746  543705 memory.go:191] Add success.
I0323 15:33:43.409810  543705 cpu.go:282] Add success.
I0323 15:33:43.420432  543705 net.go:648] Add success.
I0323 15:33:43.423216  543705 net.go:770] primary dev: ETH0
I0323 15:33:43.423230  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:33:43.423243  543705 net.go:698] Add success.
I0323 15:33:46.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:33:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:33:46.458067  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:33:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:33:53.409775  543705 memory.go:184] no items to output this cycle
I0323 15:33:53.409788  543705 cpu.go:275] no items to output this cycle
E0323 15:34:03.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:34:03.409773  543705 memory.go:184] no items to output this cycle
I0323 15:34:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 15:34:13.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:34:13.409792  543705 cpu.go:282] Add success.
I0323 15:34:13.409793  543705 memory.go:191] Add success.
W0323 15:34:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:34:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:34:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:34:13.420237  543705 net.go:648] Add success.
I0323 15:34:13.423263  543705 net.go:770] primary dev: ETH0
I0323 15:34:13.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:34:13.423291  543705 net.go:698] Add success.
I0323 15:34:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:34:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:34:14.455184  543705 disk_worker.go:708] disk space is not compliant
W0323 15:34:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:34:14.456978  543705 disk_worker.go:494] system disk:vda1
I0323 15:34:14.457008  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:34:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:34:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:34:16.458038  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:34:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:34:16.472452  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:34:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:34:23.409798  543705 memory.go:184] no items to output this cycle
I0323 15:34:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 15:34:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:34:33.409772  543705 memory.go:184] no items to output this cycle
I0323 15:34:33.409780  543705 cpu.go:275] no items to output this cycle
I0323 15:34:40.610457  543705 disk_info.go:125] begin check local disk info of client
I0323 15:34:40.613059  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:34:40.613066  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 15:34:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:34:43.410709  543705 memory.go:191] Add success.
I0323 15:34:43.409818  543705 cpu.go:282] Add success.
I0323 15:34:43.420415  543705 net.go:648] Add success.
I0323 15:34:43.423072  543705 net.go:770] primary dev: ETH0
I0323 15:34:43.423085  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:34:43.423098  543705 net.go:698] Add success.
I0323 15:34:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:34:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:34:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:34:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:34:53.409771  543705 memory.go:184] no items to output this cycle
I0323 15:34:53.409795  543705 cpu.go:275] no items to output this cycle
E0323 15:35:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:35:03.409800  543705 memory.go:184] no items to output this cycle
I0323 15:35:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:35:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:35:13.409791  543705 memory.go:191] Add success.
I0323 15:35:13.409807  543705 cpu.go:282] Add success.
W0323 15:35:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:35:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:35:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:35:13.420162  543705 net.go:648] Add success.
I0323 15:35:13.422943  543705 net.go:770] primary dev: ETH0
I0323 15:35:13.422956  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:35:13.422974  543705 net.go:698] Add success.
I0323 15:35:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:35:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:35:14.455191  543705 disk_worker.go:708] disk space is not compliant
W0323 15:35:14.455194  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:35:14.456852  543705 disk_worker.go:494] system disk:vda1
I0323 15:35:14.456881  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:35:15.455947  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:35:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:35:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:35:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:35:16.472437  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:35:23.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:35:23.409770  543705 memory.go:184] no items to output this cycle
I0323 15:35:23.409777  543705 cpu.go:275] no items to output this cycle
E0323 15:35:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:35:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 15:35:33.409847  543705 memory.go:184] no items to output this cycle
I0323 15:35:40.613677  543705 disk_info.go:125] begin check local disk info of client
I0323 15:35:40.616219  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:35:40.616226  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a700 0xc00007a740]
E0323 15:35:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:35:43.410698  543705 memory.go:191] Add success.
I0323 15:35:43.409817  543705 cpu.go:282] Add success.
I0323 15:35:43.420420  543705 net.go:648] Add success.
I0323 15:35:43.423339  543705 net.go:770] primary dev: ETH0
I0323 15:35:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:35:43.423368  543705 net.go:698] Add success.
I0323 15:35:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:35:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:35:46.458065  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:35:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:35:53.409780  543705 memory.go:184] no items to output this cycle
I0323 15:35:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 15:36:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:36:03.409811  543705 memory.go:184] no items to output this cycle
I0323 15:36:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 15:36:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:36:13.409787  543705 memory.go:191] Add success.
I0323 15:36:13.409805  543705 cpu.go:282] Add success.
W0323 15:36:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:36:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:36:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:36:13.420144  543705 net.go:648] Add success.
I0323 15:36:13.422853  543705 net.go:770] primary dev: ETH0
I0323 15:36:13.422869  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:36:13.422882  543705 net.go:698] Add success.
I0323 15:36:13.468378  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"07f93a15-58bc-453b-b290-cf7b64629462","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:36:13.468409  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:36:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:36:14.455108  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:36:14.455280  543705 disk_worker.go:708] disk space is not compliant
W0323 15:36:14.455287  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:36:14.456857  543705 disk_worker.go:494] system disk:vda1
I0323 15:36:14.456898  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:36:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:36:16.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:36:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:36:16.458052  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:36:16.472375  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:36:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:36:23.409778  543705 memory.go:184] no items to output this cycle
I0323 15:36:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 15:36:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:36:33.409777  543705 memory.go:184] no items to output this cycle
I0323 15:36:33.409782  543705 cpu.go:275] no items to output this cycle
I0323 15:36:40.468110  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:36:40.468119  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:36:40.616799  543705 disk_info.go:125] begin check local disk info of client
I0323 15:36:40.619471  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:36:40.619479  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002bfb00 0xc0002bfb40]
E0323 15:36:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:36:43.410669  543705 memory.go:191] Add success.
I0323 15:36:43.409832  543705 cpu.go:282] Add success.
I0323 15:36:43.420382  543705 net.go:648] Add success.
I0323 15:36:43.423180  543705 net.go:770] primary dev: ETH0
I0323 15:36:43.423194  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:36:43.423206  543705 net.go:698] Add success.
I0323 15:36:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:36:46.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:36:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:36:53.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:36:53.409776  543705 memory.go:184] no items to output this cycle
I0323 15:36:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 15:37:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:37:03.409813  543705 memory.go:184] no items to output this cycle
I0323 15:37:03.409824  543705 cpu.go:275] no items to output this cycle
I0323 15:37:13.409809  543705 cpu.go:282] Add success.
E0323 15:37:13.409821  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:37:13.409848  543705 memory.go:191] Add success.
W0323 15:37:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:37:13.409897  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:37:13.409901  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:37:13.420110  543705 net.go:648] Add success.
I0323 15:37:13.421090  543705 net.go:770] primary dev: ETH0
I0323 15:37:13.421109  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:37:13.421126  543705 net.go:698] Add success.
I0323 15:37:13.453720  543705 event_worker.go:152] Polling the log file for events...
W0323 15:37:14.455426  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:37:14.455446  543705 disk_worker.go:708] disk space is not compliant
W0323 15:37:14.455451  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:37:14.456467  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:37:14.456477  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:37:14.456484  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:37:14.457635  543705 disk_worker.go:494] system disk:vda1
I0323 15:37:14.457688  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:37:15.457046  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:37:15.457060  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:37:16.458103  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:37:16.458175  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 15:37:16.458177  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:37:16.458196  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:37:16.472576  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:37:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:37:23.409823  543705 memory.go:184] no items to output this cycle
I0323 15:37:23.409832  543705 cpu.go:275] no items to output this cycle
E0323 15:37:33.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:37:33.409780  543705 memory.go:184] no items to output this cycle
I0323 15:37:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 15:37:40.619755  543705 disk_info.go:125] begin check local disk info of client
I0323 15:37:40.622382  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:37:40.622389  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f3240 0xc0004f3280]
E0323 15:37:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:37:43.410852  543705 memory.go:191] Add success.
I0323 15:37:43.409841  543705 cpu.go:282] Add success.
I0323 15:37:43.420572  543705 net.go:648] Add success.
I0323 15:37:43.423173  543705 net.go:770] primary dev: ETH0
I0323 15:37:43.423189  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:37:43.423203  543705 net.go:698] Add success.
I0323 15:37:46.458019  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:37:46.458114  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:37:46.458148  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:37:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:37:53.409788  543705 memory.go:184] no items to output this cycle
I0323 15:37:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 15:38:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:38:03.409789  543705 memory.go:184] no items to output this cycle
I0323 15:38:03.409797  543705 cpu.go:275] no items to output this cycle
E0323 15:38:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:38:13.409793  543705 memory.go:191] Add success.
W0323 15:38:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:38:13.409820  543705 cpu.go:282] Add success.
W0323 15:38:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:38:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:38:13.420206  543705 net.go:648] Add success.
I0323 15:38:13.422875  543705 net.go:770] primary dev: ETH0
I0323 15:38:13.422888  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:38:13.422901  543705 net.go:698] Add success.
I0323 15:38:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:38:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:38:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 15:38:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:38:14.457124  543705 disk_worker.go:494] system disk:vda1
I0323 15:38:14.457157  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:38:15.455966  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:38:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:38:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:38:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:38:16.472467  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:38:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:38:23.409788  543705 memory.go:184] no items to output this cycle
I0323 15:38:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 15:38:33.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:38:33.409841  543705 memory.go:184] no items to output this cycle
I0323 15:38:33.409854  543705 cpu.go:275] no items to output this cycle
I0323 15:38:40.622747  543705 disk_info.go:125] begin check local disk info of client
I0323 15:38:40.625311  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:38:40.625318  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a07c0 0xc0002a0800]
E0323 15:38:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:38:43.410717  543705 memory.go:191] Add success.
I0323 15:38:43.409801  543705 cpu.go:282] Add success.
I0323 15:38:43.420405  543705 net.go:648] Add success.
I0323 15:38:43.423018  543705 net.go:770] primary dev: ETH0
I0323 15:38:43.423031  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:38:43.423044  543705 net.go:698] Add success.
I0323 15:38:46.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:38:46.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:38:46.458128  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:38:53.410352  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:38:53.410370  543705 memory.go:184] no items to output this cycle
I0323 15:38:53.410399  543705 cpu.go:275] no items to output this cycle
E0323 15:39:03.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:39:03.409792  543705 memory.go:184] no items to output this cycle
I0323 15:39:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 15:39:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:39:13.409828  543705 cpu.go:282] Add success.
I0323 15:39:13.409843  543705 memory.go:191] Add success.
W0323 15:39:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:39:13.409892  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:39:13.409896  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:39:13.420493  543705 net.go:648] Add success.
I0323 15:39:13.423178  543705 net.go:770] primary dev: ETH0
I0323 15:39:13.423196  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:39:13.423216  543705 net.go:698] Add success.
I0323 15:39:13.468512  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e6df6dca-7ca2-4517-ab06-db816d62636a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:39:13.468556  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:39:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:39:14.455230  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:39:14.455244  543705 disk_worker.go:708] disk space is not compliant
W0323 15:39:14.455247  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:39:14.456962  543705 disk_worker.go:494] system disk:vda1
I0323 15:39:14.456996  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:39:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:39:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:39:16.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:39:16.458094  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:39:16.472498  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:39:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:39:23.409798  543705 memory.go:184] no items to output this cycle
I0323 15:39:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 15:39:33.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:39:33.409786  543705 memory.go:184] no items to output this cycle
I0323 15:39:33.409808  543705 cpu.go:275] no items to output this cycle
I0323 15:39:40.469115  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:39:40.469123  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:39:40.625677  543705 disk_info.go:125] begin check local disk info of client
I0323 15:39:40.628264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:39:40.628271  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f21c0 0xc0004f2200]
E0323 15:39:43.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:39:43.410904  543705 memory.go:191] Add success.
I0323 15:39:43.409913  543705 cpu.go:282] Add success.
I0323 15:39:43.419799  543705 net.go:648] Add success.
I0323 15:39:43.422376  543705 net.go:770] primary dev: ETH0
I0323 15:39:43.422391  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:39:43.422404  543705 net.go:698] Add success.
I0323 15:39:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:39:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:39:46.458093  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:39:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:39:53.409803  543705 memory.go:184] no items to output this cycle
I0323 15:39:53.409831  543705 cpu.go:275] no items to output this cycle
E0323 15:40:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:40:03.409815  543705 memory.go:184] no items to output this cycle
I0323 15:40:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 15:40:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:40:13.409831  543705 memory.go:191] Add success.
I0323 15:40:13.409843  543705 cpu.go:282] Add success.
W0323 15:40:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:40:13.409881  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:40:13.409885  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:40:13.420194  543705 net.go:648] Add success.
I0323 15:40:13.423002  543705 net.go:770] primary dev: ETH0
I0323 15:40:13.423017  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:40:13.423031  543705 net.go:698] Add success.
I0323 15:40:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:40:14.455137  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:40:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 15:40:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:40:14.456617  543705 disk_worker.go:494] system disk:vda1
I0323 15:40:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:40:15.455996  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:40:16.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:40:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:40:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:40:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:40:23.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:40:23.409830  543705 memory.go:184] no items to output this cycle
I0323 15:40:23.409839  543705 cpu.go:275] no items to output this cycle
E0323 15:40:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:40:33.409789  543705 memory.go:184] no items to output this cycle
I0323 15:40:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 15:40:40.628781  543705 disk_info.go:125] begin check local disk info of client
I0323 15:40:40.631400  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:40:40.631407  543705 disk_info.go:196] parse disk info done, disk is : [0xc000397400 0xc000397440]
E0323 15:40:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:40:43.411108  543705 memory.go:191] Add success.
I0323 15:40:43.409845  543705 cpu.go:282] Add success.
I0323 15:40:43.419841  543705 net.go:648] Add success.
I0323 15:40:43.422982  543705 net.go:770] primary dev: ETH0
I0323 15:40:43.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:40:43.423010  543705 net.go:698] Add success.
I0323 15:40:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:40:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:40:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:40:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:40:53.409793  543705 memory.go:184] no items to output this cycle
I0323 15:40:53.409825  543705 cpu.go:275] no items to output this cycle
E0323 15:41:03.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:41:03.409826  543705 memory.go:184] no items to output this cycle
I0323 15:41:03.409839  543705 cpu.go:275] no items to output this cycle
E0323 15:41:13.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:41:13.409834  543705 memory.go:191] Add success.
I0323 15:41:13.409845  543705 cpu.go:282] Add success.
W0323 15:41:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:41:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:41:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:41:13.420313  543705 net.go:648] Add success.
I0323 15:41:13.423399  543705 net.go:770] primary dev: ETH0
I0323 15:41:13.423413  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:41:13.423428  543705 net.go:698] Add success.
I0323 15:41:14.453942  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:41:14.455211  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:41:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0323 15:41:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:41:14.456615  543705 disk_worker.go:494] system disk:vda1
I0323 15:41:14.456669  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:41:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:41:16.458020  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:41:16.458126  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:41:16.458160  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:41:16.472615  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:41:23.409829  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:41:23.409852  543705 memory.go:184] no items to output this cycle
I0323 15:41:23.409867  543705 cpu.go:275] no items to output this cycle
E0323 15:41:33.409828  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:41:33.409839  543705 cpu.go:275] no items to output this cycle
I0323 15:41:33.409852  543705 memory.go:184] no items to output this cycle
I0323 15:41:40.631819  543705 disk_info.go:125] begin check local disk info of client
I0323 15:41:40.646285  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:41:40.646293  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0400 0xc0002a0440]
E0323 15:41:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:41:43.410805  543705 memory.go:191] Add success.
I0323 15:41:43.409882  543705 cpu.go:282] Add success.
I0323 15:41:43.420522  543705 net.go:648] Add success.
I0323 15:41:43.423616  543705 net.go:770] primary dev: ETH0
I0323 15:41:43.423629  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:41:43.423642  543705 net.go:698] Add success.
I0323 15:41:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:41:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:41:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:41:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:41:53.409818  543705 memory.go:184] no items to output this cycle
I0323 15:41:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 15:42:03.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:42:03.409827  543705 cpu.go:275] no items to output this cycle
I0323 15:42:03.409844  543705 memory.go:184] no items to output this cycle
E0323 15:42:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:42:13.409846  543705 memory.go:191] Add success.
I0323 15:42:13.409865  543705 cpu.go:282] Add success.
W0323 15:42:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:42:13.409895  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:42:13.409899  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:42:13.420321  543705 net.go:648] Add success.
I0323 15:42:13.423207  543705 net.go:770] primary dev: ETH0
I0323 15:42:13.423226  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:42:13.423240  543705 net.go:698] Add success.
I0323 15:42:13.469904  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"c9088355-e119-4242-b7a5-bd3ceb32a793","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:42:13.469939  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 15:42:14.455223  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:42:14.455236  543705 disk_worker.go:708] disk space is not compliant
W0323 15:42:14.455239  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:42:14.456110  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:42:14.456120  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:42:14.456126  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:42:14.456658  543705 disk_worker.go:494] system disk:vda1
I0323 15:42:14.456690  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:42:15.457018  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:42:15.457032  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:42:16.458152  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:42:16.458226  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:42:16.458252  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:42:16.458261  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:42:16.472700  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:42:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:42:23.409792  543705 memory.go:184] no items to output this cycle
I0323 15:42:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 15:42:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:42:33.409795  543705 memory.go:184] no items to output this cycle
I0323 15:42:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 15:42:40.470167  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:42:40.470176  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:42:40.646992  543705 disk_info.go:125] begin check local disk info of client
I0323 15:42:40.649578  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:42:40.649585  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536400 0xc000536440]
E0323 15:42:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:42:43.410743  543705 memory.go:191] Add success.
I0323 15:42:43.409832  543705 cpu.go:282] Add success.
I0323 15:42:43.420480  543705 net.go:648] Add success.
I0323 15:42:43.423201  543705 net.go:770] primary dev: ETH0
I0323 15:42:43.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:42:43.423225  543705 net.go:698] Add success.
I0323 15:42:46.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:42:46.458100  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:42:46.458134  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:42:53.409753  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:42:53.409771  543705 memory.go:184] no items to output this cycle
I0323 15:42:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 15:43:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:43:03.409789  543705 memory.go:184] no items to output this cycle
I0323 15:43:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:43:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:43:13.409827  543705 memory.go:191] Add success.
I0323 15:43:13.409833  543705 cpu.go:282] Add success.
W0323 15:43:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:43:13.409874  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:43:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:43:13.420179  543705 net.go:648] Add success.
I0323 15:43:13.422769  543705 net.go:770] primary dev: ETH0
I0323 15:43:13.422784  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:43:13.422799  543705 net.go:698] Add success.
I0323 15:43:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:43:14.455134  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:43:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 15:43:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:43:14.456562  543705 disk_worker.go:494] system disk:vda1
I0323 15:43:14.456607  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:43:15.455982  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:43:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:43:16.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:43:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:43:16.472496  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:43:23.409913  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:43:23.409933  543705 memory.go:184] no items to output this cycle
I0323 15:43:23.410044  543705 cpu.go:275] no items to output this cycle
E0323 15:43:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:43:33.409788  543705 memory.go:184] no items to output this cycle
I0323 15:43:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 15:43:40.649686  543705 disk_info.go:125] begin check local disk info of client
I0323 15:43:40.652298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:43:40.652306  543705 disk_info.go:196] parse disk info done, disk is : [0xc000396000 0xc000396040]
E0323 15:43:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:43:43.410813  543705 memory.go:191] Add success.
I0323 15:43:43.409821  543705 cpu.go:282] Add success.
I0323 15:43:43.420514  543705 net.go:648] Add success.
I0323 15:43:43.423183  543705 net.go:770] primary dev: ETH0
I0323 15:43:43.423197  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:43:43.423209  543705 net.go:698] Add success.
I0323 15:43:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:43:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:43:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:43:53.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:43:53.409807  543705 memory.go:184] no items to output this cycle
I0323 15:43:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 15:44:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:44:03.409794  543705 memory.go:184] no items to output this cycle
I0323 15:44:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 15:44:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:44:13.409796  543705 memory.go:191] Add success.
W0323 15:44:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:44:13.409823  543705 cpu.go:282] Add success.
W0323 15:44:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:44:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:44:13.420207  543705 net.go:648] Add success.
I0323 15:44:13.422954  543705 net.go:770] primary dev: ETH0
I0323 15:44:13.422970  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:44:13.422990  543705 net.go:698] Add success.
I0323 15:44:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:44:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:44:14.455225  543705 disk_worker.go:708] disk space is not compliant
W0323 15:44:14.455228  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:44:14.456679  543705 disk_worker.go:494] system disk:vda1
I0323 15:44:14.456713  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:44:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:44:16.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:44:16.458065  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:44:16.458092  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:44:16.472479  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:44:23.409907  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:44:23.409959  543705 memory.go:184] no items to output this cycle
I0323 15:44:23.410032  543705 cpu.go:275] no items to output this cycle
E0323 15:44:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:44:33.409820  543705 memory.go:184] no items to output this cycle
I0323 15:44:33.409829  543705 cpu.go:275] no items to output this cycle
I0323 15:44:40.652396  543705 disk_info.go:125] begin check local disk info of client
I0323 15:44:40.654980  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:44:40.654986  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa280 0xc0001aa2c0]
E0323 15:44:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:44:43.410644  543705 memory.go:191] Add success.
I0323 15:44:43.409848  543705 cpu.go:282] Add success.
I0323 15:44:43.420418  543705 net.go:648] Add success.
I0323 15:44:43.423174  543705 net.go:770] primary dev: ETH0
I0323 15:44:43.423187  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:44:43.423199  543705 net.go:698] Add success.
I0323 15:44:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:44:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:44:46.458106  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:44:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:44:53.409814  543705 memory.go:184] no items to output this cycle
I0323 15:44:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:45:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:45:03.409803  543705 memory.go:184] no items to output this cycle
I0323 15:45:03.409851  543705 cpu.go:275] no items to output this cycle
E0323 15:45:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:45:13.409826  543705 memory.go:191] Add success.
I0323 15:45:13.409833  543705 cpu.go:282] Add success.
W0323 15:45:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:45:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:45:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:45:13.420208  543705 net.go:648] Add success.
I0323 15:45:13.422939  543705 net.go:770] primary dev: ETH0
I0323 15:45:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:45:13.422969  543705 net.go:698] Add success.
I0323 15:45:13.468449  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"8ea15eba-0911-458b-8e84-d651bf211683","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:45:13.468487  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:45:14.454993  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:45:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:45:14.455219  543705 disk_worker.go:708] disk space is not compliant
W0323 15:45:14.455222  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:45:14.456634  543705 disk_worker.go:494] system disk:vda1
I0323 15:45:14.456690  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:45:15.455974  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:45:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:45:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:45:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:45:16.472474  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:45:23.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:45:23.409912  543705 memory.go:184] no items to output this cycle
I0323 15:45:23.409983  543705 cpu.go:275] no items to output this cycle
E0323 15:45:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:45:33.409822  543705 memory.go:184] no items to output this cycle
I0323 15:45:33.409835  543705 cpu.go:275] no items to output this cycle
I0323 15:45:40.471117  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:45:40.471124  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:45:40.656020  543705 disk_info.go:125] begin check local disk info of client
I0323 15:45:40.658568  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:45:40.658575  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 15:45:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:45:43.410685  543705 memory.go:191] Add success.
I0323 15:45:43.409807  543705 cpu.go:282] Add success.
I0323 15:45:43.420451  543705 net.go:648] Add success.
I0323 15:45:43.423223  543705 net.go:770] primary dev: ETH0
I0323 15:45:43.423238  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:45:43.423252  543705 net.go:698] Add success.
I0323 15:45:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:45:46.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:45:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:45:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:45:53.409779  543705 memory.go:184] no items to output this cycle
I0323 15:45:53.409796  543705 cpu.go:275] no items to output this cycle
E0323 15:46:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:46:03.409809  543705 memory.go:184] no items to output this cycle
I0323 15:46:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 15:46:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:46:13.409794  543705 memory.go:191] Add success.
I0323 15:46:13.409812  543705 cpu.go:282] Add success.
W0323 15:46:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:46:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:46:13.409838  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:46:13.420182  543705 net.go:648] Add success.
I0323 15:46:13.422997  543705 net.go:770] primary dev: ETH0
I0323 15:46:13.423011  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:46:13.423025  543705 net.go:698] Add success.
I0323 15:46:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:46:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:46:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 15:46:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:46:14.456604  543705 disk_worker.go:494] system disk:vda1
I0323 15:46:14.456652  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:46:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:46:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:46:16.472401  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:46:23.410299  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:46:23.410320  543705 memory.go:184] no items to output this cycle
I0323 15:46:23.410323  543705 cpu.go:275] no items to output this cycle
E0323 15:46:33.409961  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:46:33.409963  543705 cpu.go:275] no items to output this cycle
I0323 15:46:33.410017  543705 memory.go:184] no items to output this cycle
I0323 15:46:40.658669  543705 disk_info.go:125] begin check local disk info of client
I0323 15:46:40.661259  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:46:40.661267  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536000 0xc000536040]
E0323 15:46:43.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:46:43.410711  543705 memory.go:191] Add success.
I0323 15:46:43.409838  543705 cpu.go:282] Add success.
I0323 15:46:43.420478  543705 net.go:648] Add success.
I0323 15:46:43.423109  543705 net.go:770] primary dev: ETH0
I0323 15:46:43.423123  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:46:43.423137  543705 net.go:698] Add success.
I0323 15:46:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:46:46.458092  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:46:46.458119  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:46:53.410411  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:46:53.410428  543705 memory.go:184] no items to output this cycle
I0323 15:46:53.410434  543705 cpu.go:275] no items to output this cycle
E0323 15:47:03.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:47:03.409824  543705 memory.go:184] no items to output this cycle
I0323 15:47:03.409836  543705 cpu.go:275] no items to output this cycle
E0323 15:47:13.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:47:13.409791  543705 memory.go:191] Add success.
W0323 15:47:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:47:13.409821  543705 cpu.go:282] Add success.
W0323 15:47:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:47:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:47:13.420115  543705 net.go:648] Add success.
I0323 15:47:13.423270  543705 net.go:770] primary dev: ETH0
I0323 15:47:13.423283  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:47:13.423294  543705 net.go:698] Add success.
I0323 15:47:13.452950  543705 event_worker.go:152] Polling the log file for events...
W0323 15:47:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:47:14.455204  543705 disk_worker.go:708] disk space is not compliant
W0323 15:47:14.455207  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:47:14.457191  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:47:14.457202  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:47:14.457204  543705 disk_worker.go:494] system disk:vda1
I0323 15:47:14.457210  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:47:14.457251  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:47:15.456859  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:47:15.456870  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:47:16.458092  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:47:16.458161  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:47:16.458186  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:47:16.458187  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:47:16.472566  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:47:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:47:23.409782  543705 memory.go:184] no items to output this cycle
I0323 15:47:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 15:47:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:47:33.409817  543705 memory.go:184] no items to output this cycle
I0323 15:47:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 15:47:40.661673  543705 disk_info.go:125] begin check local disk info of client
I0323 15:47:40.664323  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:47:40.664331  543705 disk_info.go:196] parse disk info done, disk is : [0xc000244100 0xc000244180]
E0323 15:47:43.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:47:43.409810  543705 memory.go:191] Add success.
I0323 15:47:43.409852  543705 cpu.go:282] Add success.
I0323 15:47:43.420072  543705 net.go:648] Add success.
I0323 15:47:43.421070  543705 net.go:770] primary dev: ETH0
I0323 15:47:43.421084  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:47:43.421095  543705 net.go:698] Add success.
I0323 15:47:46.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:47:46.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:47:46.458113  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:47:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:47:53.409793  543705 cpu.go:275] no items to output this cycle
I0323 15:47:53.409807  543705 memory.go:184] no items to output this cycle
E0323 15:48:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:48:03.409794  543705 memory.go:184] no items to output this cycle
I0323 15:48:03.409798  543705 cpu.go:275] no items to output this cycle
I0323 15:48:13.409807  543705 cpu.go:282] Add success.
E0323 15:48:13.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:48:13.409838  543705 memory.go:191] Add success.
W0323 15:48:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:48:13.409890  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:48:13.409894  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:48:13.420530  543705 net.go:648] Add success.
I0323 15:48:13.423894  543705 net.go:770] primary dev: ETH0
I0323 15:48:13.423912  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:48:13.423930  543705 net.go:698] Add success.
I0323 15:48:13.463613  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"76d434ed-402e-4f26-9072-4ed4477151f2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:48:13.463647  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:48:14.454984  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:48:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:48:14.455248  543705 disk_worker.go:708] disk space is not compliant
W0323 15:48:14.455251  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:48:14.456660  543705 disk_worker.go:494] system disk:vda1
I0323 15:48:14.456694  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:48:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:48:16.458037  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:48:16.458117  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:48:16.458145  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:48:16.472710  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:48:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:48:23.409783  543705 memory.go:184] no items to output this cycle
I0323 15:48:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 15:48:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:48:33.409797  543705 memory.go:184] no items to output this cycle
I0323 15:48:33.409806  543705 cpu.go:275] no items to output this cycle
I0323 15:48:40.472204  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:48:40.472214  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:48:40.664418  543705 disk_info.go:125] begin check local disk info of client
I0323 15:48:40.667066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:48:40.667072  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536280 0xc0005362c0]
E0323 15:48:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:48:43.410787  543705 memory.go:191] Add success.
I0323 15:48:43.409790  543705 cpu.go:282] Add success.
I0323 15:48:43.420568  543705 net.go:648] Add success.
I0323 15:48:43.423490  543705 net.go:770] primary dev: ETH0
I0323 15:48:43.423506  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:48:43.423524  543705 net.go:698] Add success.
I0323 15:48:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:48:46.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:48:46.458057  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:48:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:48:53.409794  543705 memory.go:184] no items to output this cycle
I0323 15:48:53.409805  543705 cpu.go:275] no items to output this cycle
E0323 15:49:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:49:03.409789  543705 memory.go:184] no items to output this cycle
I0323 15:49:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 15:49:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:49:13.409790  543705 memory.go:191] Add success.
I0323 15:49:13.409791  543705 cpu.go:282] Add success.
W0323 15:49:13.409817  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:49:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:49:13.409831  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:49:13.420122  543705 net.go:648] Add success.
I0323 15:49:13.422520  543705 net.go:770] primary dev: ETH0
I0323 15:49:13.422534  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:49:13.422545  543705 net.go:698] Add success.
I0323 15:49:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:49:14.455170  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:49:14.455180  543705 disk_worker.go:708] disk space is not compliant
W0323 15:49:14.455184  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:49:14.456606  543705 disk_worker.go:494] system disk:vda1
I0323 15:49:14.456636  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:49:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:49:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:49:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:49:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:49:16.472412  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:49:23.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:49:23.409833  543705 memory.go:184] no items to output this cycle
I0323 15:49:23.409856  543705 cpu.go:275] no items to output this cycle
E0323 15:49:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:49:33.409822  543705 memory.go:184] no items to output this cycle
I0323 15:49:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 15:49:40.667801  543705 disk_info.go:125] begin check local disk info of client
I0323 15:49:40.670480  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:49:40.670488  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0000 0xc0002a0040]
E0323 15:49:43.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:49:43.409852  543705 memory.go:191] Add success.
I0323 15:49:43.410371  543705 cpu.go:282] Add success.
I0323 15:49:43.419986  543705 net.go:648] Add success.
I0323 15:49:43.421094  543705 net.go:770] primary dev: ETH0
I0323 15:49:43.421114  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:49:43.421134  543705 net.go:698] Add success.
I0323 15:49:46.458013  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:49:46.458106  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:49:46.458142  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:49:53.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:49:53.409801  543705 memory.go:184] no items to output this cycle
I0323 15:49:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 15:50:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:50:03.409817  543705 memory.go:184] no items to output this cycle
I0323 15:50:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 15:50:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:50:13.409803  543705 memory.go:191] Add success.
W0323 15:50:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:50:13.409834  543705 cpu.go:282] Add success.
W0323 15:50:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:50:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:50:13.420296  543705 net.go:648] Add success.
I0323 15:50:13.422979  543705 net.go:770] primary dev: ETH0
I0323 15:50:13.422995  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:50:13.423009  543705 net.go:698] Add success.
I0323 15:50:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:50:14.455124  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:50:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 15:50:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:50:14.456634  543705 disk_worker.go:494] system disk:vda1
I0323 15:50:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:50:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:50:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:50:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:50:16.458057  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:50:16.472405  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:50:23.409900  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:50:23.409920  543705 cpu.go:275] no items to output this cycle
I0323 15:50:23.409927  543705 memory.go:184] no items to output this cycle
E0323 15:50:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:50:33.409811  543705 memory.go:184] no items to output this cycle
I0323 15:50:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 15:50:40.671007  543705 disk_info.go:125] begin check local disk info of client
I0323 15:50:40.673598  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:50:40.673605  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049af00 0xc00049af40]
E0323 15:50:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:50:43.410706  543705 memory.go:191] Add success.
I0323 15:50:43.409814  543705 cpu.go:282] Add success.
I0323 15:50:43.420494  543705 net.go:648] Add success.
I0323 15:50:43.423228  543705 net.go:770] primary dev: ETH0
I0323 15:50:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:50:43.423253  543705 net.go:698] Add success.
I0323 15:50:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:50:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:50:46.458099  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:50:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:50:53.409799  543705 memory.go:184] no items to output this cycle
I0323 15:50:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:51:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:51:03.409797  543705 memory.go:184] no items to output this cycle
I0323 15:51:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:51:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:51:13.409827  543705 memory.go:191] Add success.
I0323 15:51:13.409844  543705 cpu.go:282] Add success.
W0323 15:51:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:51:13.409875  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:51:13.409878  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:51:13.420533  543705 net.go:648] Add success.
I0323 15:51:13.423302  543705 net.go:770] primary dev: ETH0
I0323 15:51:13.423317  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:51:13.423329  543705 net.go:698] Add success.
I0323 15:51:13.468339  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"43b05e17-64e4-4297-be13-e5a7e83e0630","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:51:13.468375  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:51:14.453939  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:51:14.455291  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:51:14.455305  543705 disk_worker.go:708] disk space is not compliant
W0323 15:51:14.455309  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:51:14.457714  543705 disk_worker.go:494] system disk:vda1
I0323 15:51:14.457761  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:51:15.455988  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:51:16.457599  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:51:16.457703  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:51:16.457737  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:51:16.472067  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:51:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:51:23.409816  543705 memory.go:184] no items to output this cycle
I0323 15:51:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 15:51:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:51:33.409813  543705 memory.go:184] no items to output this cycle
I0323 15:51:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 15:51:40.473176  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:51:40.473186  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:51:40.673695  543705 disk_info.go:125] begin check local disk info of client
I0323 15:51:40.677420  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:51:40.677430  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002b3ec0 0xc0002b3f00]
E0323 15:51:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:51:43.410730  543705 memory.go:191] Add success.
I0323 15:51:43.409823  543705 cpu.go:282] Add success.
I0323 15:51:43.420429  543705 net.go:648] Add success.
I0323 15:51:43.423291  543705 net.go:770] primary dev: ETH0
I0323 15:51:43.423303  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:51:43.423317  543705 net.go:698] Add success.
I0323 15:51:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:51:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:51:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:51:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:51:53.409791  543705 memory.go:184] no items to output this cycle
I0323 15:51:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 15:52:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:52:03.409813  543705 memory.go:184] no items to output this cycle
I0323 15:52:03.409827  543705 cpu.go:275] no items to output this cycle
E0323 15:52:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:52:13.409820  543705 memory.go:191] Add success.
I0323 15:52:13.409827  543705 cpu.go:282] Add success.
W0323 15:52:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:52:13.409869  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:52:13.409873  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:52:13.420153  543705 net.go:648] Add success.
I0323 15:52:13.422923  543705 net.go:770] primary dev: ETH0
I0323 15:52:13.422938  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:52:13.422953  543705 net.go:698] Add success.
W0323 15:52:14.455141  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:52:14.455153  543705 disk_worker.go:708] disk space is not compliant
W0323 15:52:14.455155  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:52:14.455995  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:52:14.456005  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:52:14.456010  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:52:14.456459  543705 disk_worker.go:494] system disk:vda1
I0323 15:52:14.456488  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:52:15.456830  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:52:15.456841  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:52:16.457953  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:52:16.457964  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:52:16.458004  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:52:16.458021  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:52:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:52:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:52:23.409781  543705 memory.go:184] no items to output this cycle
I0323 15:52:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 15:52:33.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:52:33.409839  543705 memory.go:184] no items to output this cycle
I0323 15:52:33.409993  543705 cpu.go:275] no items to output this cycle
I0323 15:52:40.677695  543705 disk_info.go:125] begin check local disk info of client
I0323 15:52:40.680351  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:52:40.680358  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ae80 0xc00007aec0]
E0323 15:52:43.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:52:43.410936  543705 memory.go:191] Add success.
I0323 15:52:43.409846  543705 cpu.go:282] Add success.
I0323 15:52:43.420699  543705 net.go:648] Add success.
I0323 15:52:43.423377  543705 net.go:770] primary dev: ETH0
I0323 15:52:43.423390  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:52:43.423402  543705 net.go:698] Add success.
I0323 15:52:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:52:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:52:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:52:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:52:53.409827  543705 memory.go:184] no items to output this cycle
I0323 15:52:53.409873  543705 cpu.go:275] no items to output this cycle
E0323 15:53:03.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:53:03.409795  543705 memory.go:184] no items to output this cycle
I0323 15:53:03.409861  543705 cpu.go:275] no items to output this cycle
E0323 15:53:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:53:13.409805  543705 memory.go:191] Add success.
I0323 15:53:13.409823  543705 cpu.go:282] Add success.
W0323 15:53:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:53:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:53:13.409848  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:53:13.420063  543705 net.go:648] Add success.
I0323 15:53:13.423055  543705 net.go:770] primary dev: ETH0
I0323 15:53:13.423070  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:53:13.423083  543705 net.go:698] Add success.
I0323 15:53:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:53:14.455159  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:53:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 15:53:14.455172  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:53:14.456536  543705 disk_worker.go:494] system disk:vda1
I0323 15:53:14.456583  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:53:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:53:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:53:16.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:53:16.458071  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:53:16.472447  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:53:23.409894  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:53:23.409917  543705 memory.go:184] no items to output this cycle
I0323 15:53:23.409919  543705 cpu.go:275] no items to output this cycle
E0323 15:53:33.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:53:33.409831  543705 memory.go:184] no items to output this cycle
I0323 15:53:33.409841  543705 cpu.go:275] no items to output this cycle
I0323 15:53:40.680449  543705 disk_info.go:125] begin check local disk info of client
I0323 15:53:40.683143  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:53:40.683152  543705 disk_info.go:196] parse disk info done, disk is : [0xc000579340 0xc000579380]
E0323 15:53:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:53:43.410724  543705 memory.go:191] Add success.
I0323 15:53:43.409923  543705 cpu.go:282] Add success.
I0323 15:53:43.420790  543705 net.go:648] Add success.
I0323 15:53:43.423673  543705 net.go:770] primary dev: ETH0
I0323 15:53:43.423692  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:53:43.423711  543705 net.go:698] Add success.
I0323 15:53:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:53:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:53:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:53:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:53:53.409797  543705 memory.go:184] no items to output this cycle
I0323 15:53:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 15:54:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:54:03.409789  543705 memory.go:184] no items to output this cycle
I0323 15:54:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:54:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:54:13.409816  543705 memory.go:191] Add success.
I0323 15:54:13.409817  543705 cpu.go:282] Add success.
W0323 15:54:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:54:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:54:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:54:13.420177  543705 net.go:648] Add success.
I0323 15:54:13.422880  543705 net.go:770] primary dev: ETH0
I0323 15:54:13.422895  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:54:13.422908  543705 net.go:698] Add success.
I0323 15:54:13.464675  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e15001ad-14c0-434f-88ca-f108a411291e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:54:13.464721  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 15:54:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:54:14.455117  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:54:14.455186  543705 disk_worker.go:708] disk space is not compliant
W0323 15:54:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:54:14.456624  543705 disk_worker.go:494] system disk:vda1
I0323 15:54:14.456687  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:54:15.455976  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:54:16.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:54:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:54:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:54:16.472502  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:54:23.409893  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:54:23.409913  543705 memory.go:184] no items to output this cycle
I0323 15:54:23.409990  543705 cpu.go:275] no items to output this cycle
E0323 15:54:33.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:54:33.409774  543705 memory.go:184] no items to output this cycle
I0323 15:54:33.409817  543705 cpu.go:275] no items to output this cycle
I0323 15:54:40.474133  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:54:40.474141  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:54:40.683234  543705 disk_info.go:125] begin check local disk info of client
I0323 15:54:40.685859  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:54:40.685865  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab000 0xc0001ab040]
E0323 15:54:43.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:54:43.410900  543705 memory.go:191] Add success.
I0323 15:54:43.409848  543705 cpu.go:282] Add success.
I0323 15:54:43.420654  543705 net.go:648] Add success.
I0323 15:54:43.423325  543705 net.go:770] primary dev: ETH0
I0323 15:54:43.423338  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:54:43.423351  543705 net.go:698] Add success.
I0323 15:54:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:54:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:54:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:54:53.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:54:53.409796  543705 memory.go:184] no items to output this cycle
I0323 15:54:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 15:55:03.409755  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:55:03.409787  543705 memory.go:184] no items to output this cycle
I0323 15:55:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 15:55:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:55:13.409805  543705 memory.go:191] Add success.
I0323 15:55:13.409822  543705 cpu.go:282] Add success.
W0323 15:55:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:55:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:55:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:55:13.420185  543705 net.go:648] Add success.
I0323 15:55:13.423060  543705 net.go:770] primary dev: ETH0
I0323 15:55:13.423091  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:55:13.423105  543705 net.go:698] Add success.
I0323 15:55:14.454969  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:55:14.455156  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:55:14.455168  543705 disk_worker.go:708] disk space is not compliant
W0323 15:55:14.455171  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:55:14.456511  543705 disk_worker.go:494] system disk:vda1
I0323 15:55:14.456560  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:55:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:55:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:55:16.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:55:16.458073  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:55:16.472420  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:55:23.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:55:23.409920  543705 memory.go:184] no items to output this cycle
I0323 15:55:23.409987  543705 cpu.go:275] no items to output this cycle
E0323 15:55:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:55:33.409809  543705 memory.go:184] no items to output this cycle
I0323 15:55:33.409815  543705 cpu.go:275] no items to output this cycle
I0323 15:55:40.685951  543705 disk_info.go:125] begin check local disk info of client
I0323 15:55:40.688526  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:55:40.688533  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5080 0xc0000c50c0]
E0323 15:55:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:55:43.410795  543705 memory.go:191] Add success.
I0323 15:55:43.409840  543705 cpu.go:282] Add success.
I0323 15:55:43.420549  543705 net.go:648] Add success.
I0323 15:55:43.423483  543705 net.go:770] primary dev: ETH0
I0323 15:55:43.423498  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:55:43.423512  543705 net.go:698] Add success.
I0323 15:55:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:55:46.458056  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:55:46.458088  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:55:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:55:53.409789  543705 memory.go:184] no items to output this cycle
I0323 15:55:53.409800  543705 cpu.go:275] no items to output this cycle
E0323 15:56:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:56:03.409789  543705 memory.go:184] no items to output this cycle
I0323 15:56:03.409851  543705 cpu.go:275] no items to output this cycle
E0323 15:56:13.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:56:13.409820  543705 memory.go:191] Add success.
I0323 15:56:13.409824  543705 cpu.go:282] Add success.
W0323 15:56:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:56:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:56:13.409886  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:56:13.420154  543705 net.go:648] Add success.
I0323 15:56:13.422581  543705 net.go:770] primary dev: ETH0
I0323 15:56:13.422596  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:56:13.422611  543705 net.go:698] Add success.
I0323 15:56:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:56:14.455184  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:56:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 15:56:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:56:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 15:56:14.456623  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:56:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:56:16.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:56:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:56:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:56:16.472459  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:56:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:56:23.409788  543705 cpu.go:275] no items to output this cycle
I0323 15:56:23.409795  543705 memory.go:184] no items to output this cycle
E0323 15:56:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:56:33.409792  543705 memory.go:184] no items to output this cycle
I0323 15:56:33.409801  543705 cpu.go:275] no items to output this cycle
I0323 15:56:40.689094  543705 disk_info.go:125] begin check local disk info of client
I0323 15:56:40.691701  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:56:40.691709  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004b1c80 0xc0004b1cc0]
E0323 15:56:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:56:43.410681  543705 memory.go:191] Add success.
I0323 15:56:43.409826  543705 cpu.go:282] Add success.
I0323 15:56:43.420383  543705 net.go:648] Add success.
I0323 15:56:43.422960  543705 net.go:770] primary dev: ETH0
I0323 15:56:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:56:43.422986  543705 net.go:698] Add success.
I0323 15:56:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:56:46.458057  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:56:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:56:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:56:53.409787  543705 memory.go:184] no items to output this cycle
I0323 15:56:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 15:57:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:57:03.409788  543705 memory.go:184] no items to output this cycle
I0323 15:57:03.409804  543705 cpu.go:275] no items to output this cycle
E0323 15:57:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:57:13.409794  543705 memory.go:191] Add success.
W0323 15:57:13.409819  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 15:57:13.409828  543705 cpu.go:282] Add success.
W0323 15:57:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:57:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:57:13.420098  543705 net.go:648] Add success.
I0323 15:57:13.422955  543705 net.go:770] primary dev: ETH0
I0323 15:57:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:57:13.422981  543705 net.go:698] Add success.
I0323 15:57:13.429213  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 15:57:13.453386  543705 event_worker.go:152] Polling the log file for events...
I0323 15:57:13.468139  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e84000fc-b7c0-433f-853c-a0eae32c5510","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 15:57:13.468175  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 15:57:14.455238  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:57:14.455254  543705 disk_worker.go:708] disk space is not compliant
W0323 15:57:14.455258  543705 disk_worker.go:728] disk inode is not compliant
E0323 15:57:14.456203  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 15:57:14.456213  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 15:57:14.456219  543705 custom_config.go:64] query custom config with name: gpu
I0323 15:57:14.457130  543705 disk_worker.go:494] system disk:vda1
I0323 15:57:14.457174  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 15:57:15.456910  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 15:57:15.456930  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:57:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 15:57:16.458135  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 15:57:16.458173  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:57:16.458197  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:57:16.472559  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:57:23.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:57:23.409790  543705 memory.go:184] no items to output this cycle
I0323 15:57:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 15:57:33.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:57:33.409831  543705 memory.go:184] no items to output this cycle
I0323 15:57:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 15:57:40.475178  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 15:57:40.475187  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 15:57:40.691804  543705 disk_info.go:125] begin check local disk info of client
I0323 15:57:40.694447  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:57:40.694454  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 15:57:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:57:43.410849  543705 memory.go:191] Add success.
I0323 15:57:43.409811  543705 cpu.go:282] Add success.
I0323 15:57:43.420561  543705 net.go:648] Add success.
I0323 15:57:43.423312  543705 net.go:770] primary dev: ETH0
I0323 15:57:43.423327  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:57:43.423340  543705 net.go:698] Add success.
I0323 15:57:46.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:57:46.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:57:46.458117  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:57:53.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:57:53.409810  543705 memory.go:184] no items to output this cycle
I0323 15:57:53.409817  543705 cpu.go:275] no items to output this cycle
E0323 15:58:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:58:03.409816  543705 memory.go:184] no items to output this cycle
I0323 15:58:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 15:58:13.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:58:13.409803  543705 memory.go:191] Add success.
I0323 15:58:13.409804  543705 cpu.go:282] Add success.
W0323 15:58:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:58:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:58:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:58:13.420182  543705 net.go:648] Add success.
I0323 15:58:13.422784  543705 net.go:770] primary dev: ETH0
I0323 15:58:13.422797  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:58:13.422810  543705 net.go:698] Add success.
I0323 15:58:14.454985  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:58:14.455128  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:58:14.455195  543705 disk_worker.go:708] disk space is not compliant
W0323 15:58:14.455198  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:58:14.456539  543705 disk_worker.go:494] system disk:vda1
I0323 15:58:14.456586  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:58:15.455980  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:58:16.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:58:16.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:58:16.458133  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:58:16.472663  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:58:23.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:58:23.409782  543705 memory.go:184] no items to output this cycle
I0323 15:58:23.409805  543705 cpu.go:275] no items to output this cycle
E0323 15:58:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:58:33.409796  543705 memory.go:184] no items to output this cycle
I0323 15:58:33.409814  543705 cpu.go:275] no items to output this cycle
I0323 15:58:40.694545  543705 disk_info.go:125] begin check local disk info of client
I0323 15:58:40.697151  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:58:40.697158  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034ee00 0xc00034ee40]
E0323 15:58:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:58:43.410748  543705 memory.go:191] Add success.
I0323 15:58:43.409836  543705 cpu.go:282] Add success.
I0323 15:58:43.420475  543705 net.go:648] Add success.
I0323 15:58:43.423172  543705 net.go:770] primary dev: ETH0
I0323 15:58:43.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:58:43.423200  543705 net.go:698] Add success.
I0323 15:58:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:58:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:58:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:58:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:58:53.409824  543705 memory.go:184] no items to output this cycle
I0323 15:58:53.409837  543705 cpu.go:275] no items to output this cycle
E0323 15:59:03.409852  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:59:03.409874  543705 memory.go:184] no items to output this cycle
I0323 15:59:03.409878  543705 cpu.go:275] no items to output this cycle
E0323 15:59:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:59:13.409794  543705 memory.go:191] Add success.
I0323 15:59:13.409815  543705 cpu.go:282] Add success.
W0323 15:59:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 15:59:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 15:59:13.409835  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 15:59:13.420117  543705 net.go:648] Add success.
I0323 15:59:13.422752  543705 net.go:770] primary dev: ETH0
I0323 15:59:13.422768  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:59:13.422782  543705 net.go:698] Add success.
I0323 15:59:14.454988  543705 custom_config.go:64] query custom config with name: gpu
W0323 15:59:14.455201  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 15:59:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 15:59:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 15:59:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 15:59:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 15:59:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 15:59:16.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:59:16.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:59:16.458123  543705 gpu_kunlun.go:227] Add success, len:1
I0323 15:59:16.472497  543705 disk_local_worker.go:436] Get disk info: []
E0323 15:59:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:59:23.409793  543705 memory.go:184] no items to output this cycle
I0323 15:59:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 15:59:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:59:33.409789  543705 memory.go:184] no items to output this cycle
I0323 15:59:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 15:59:40.697675  543705 disk_info.go:125] begin check local disk info of client
I0323 15:59:40.700216  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 15:59:40.700222  543705 disk_info.go:196] parse disk info done, disk is : [0xc000293280 0xc0002932c0]
E0323 15:59:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:59:43.410928  543705 memory.go:191] Add success.
I0323 15:59:43.409835  543705 cpu.go:282] Add success.
I0323 15:59:43.419717  543705 net.go:648] Add success.
I0323 15:59:43.422679  543705 net.go:770] primary dev: ETH0
I0323 15:59:43.422693  543705 net.go:802] Send network stats successfully!,count is 6
I0323 15:59:43.422706  543705 net.go:698] Add success.
I0323 15:59:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 15:59:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 15:59:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 15:59:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 15:59:53.409806  543705 cpu.go:275] no items to output this cycle
I0323 15:59:53.409822  543705 memory.go:184] no items to output this cycle
E0323 16:00:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:00:03.409812  543705 memory.go:184] no items to output this cycle
I0323 16:00:03.409822  543705 cpu.go:275] no items to output this cycle
I0323 16:00:13.409809  543705 cpu.go:282] Add success.
E0323 16:00:13.410106  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:00:13.410123  543705 memory.go:191] Add success.
W0323 16:00:13.410167  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:00:13.410178  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:00:13.410182  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:00:13.419775  543705 net.go:648] Add success.
I0323 16:00:13.420689  543705 net.go:770] primary dev: ETH0
I0323 16:00:13.420702  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:00:13.420715  543705 net.go:698] Add success.
I0323 16:00:13.593744  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"176e3201-90fb-4528-9eb4-90e746b2625d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:00:13.593810  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:00:14.453934  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:00:14.455383  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:00:14.455398  543705 disk_worker.go:708] disk space is not compliant
W0323 16:00:14.455402  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:00:14.456808  543705 disk_worker.go:494] system disk:vda1
I0323 16:00:14.456843  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:00:15.456032  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:00:16.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:00:16.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:00:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:00:16.472661  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:00:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:00:23.409793  543705 memory.go:184] no items to output this cycle
I0323 16:00:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 16:00:33.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:00:33.409798  543705 memory.go:184] no items to output this cycle
I0323 16:00:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 16:00:40.476212  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:00:40.476222  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:00:40.701272  543705 disk_info.go:125] begin check local disk info of client
I0323 16:00:40.704978  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:00:40.704987  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046efc0 0xc00046f000]
E0323 16:00:43.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:00:43.410705  543705 memory.go:191] Add success.
I0323 16:00:43.409805  543705 cpu.go:282] Add success.
I0323 16:00:43.420421  543705 net.go:648] Add success.
I0323 16:00:43.422898  543705 net.go:770] primary dev: ETH0
I0323 16:00:43.422912  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:00:43.422926  543705 net.go:698] Add success.
I0323 16:00:46.457995  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:00:46.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:00:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:00:53.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:00:53.409786  543705 memory.go:184] no items to output this cycle
I0323 16:00:53.409787  543705 cpu.go:275] no items to output this cycle
E0323 16:01:03.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:01:03.409807  543705 memory.go:184] no items to output this cycle
I0323 16:01:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 16:01:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:01:13.409835  543705 memory.go:191] Add success.
I0323 16:01:13.409836  543705 cpu.go:282] Add success.
W0323 16:01:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:01:13.412614  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:01:13.412619  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:01:13.420317  543705 net.go:648] Add success.
I0323 16:01:13.422178  543705 net.go:770] primary dev: ETH0
I0323 16:01:13.422191  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:01:13.422205  543705 net.go:698] Add success.
W0323 16:01:14.458045  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:01:14.458141  543705 disk_worker.go:708] disk space is not compliant
W0323 16:01:14.458146  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:01:14.458484  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:01:14.517098  543705 disk_worker.go:494] system disk:vda1
I0323 16:01:14.517150  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:01:15.455990  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:01:16.458046  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:01:16.458143  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:01:16.458173  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:01:16.472847  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:01:23.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:01:23.409847  543705 memory.go:184] no items to output this cycle
I0323 16:01:23.409974  543705 cpu.go:275] no items to output this cycle
E0323 16:01:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:01:33.409789  543705 memory.go:184] no items to output this cycle
I0323 16:01:33.409802  543705 cpu.go:275] no items to output this cycle
I0323 16:01:40.705696  543705 disk_info.go:125] begin check local disk info of client
I0323 16:01:40.708682  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:01:40.708690  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003c0100 0xc0003c0140]
E0323 16:01:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:01:43.410787  543705 memory.go:191] Add success.
I0323 16:01:43.409812  543705 cpu.go:282] Add success.
I0323 16:01:43.420527  543705 net.go:648] Add success.
I0323 16:01:43.423233  543705 net.go:770] primary dev: ETH0
I0323 16:01:43.423252  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:01:43.423277  543705 net.go:698] Add success.
I0323 16:01:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:01:46.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:01:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:01:53.410421  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:01:53.410444  543705 memory.go:184] no items to output this cycle
I0323 16:01:53.410455  543705 cpu.go:275] no items to output this cycle
E0323 16:02:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:02:03.409819  543705 memory.go:184] no items to output this cycle
I0323 16:02:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 16:02:13.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:02:13.409812  543705 memory.go:191] Add success.
I0323 16:02:13.409813  543705 cpu.go:282] Add success.
W0323 16:02:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:02:13.409853  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:02:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:02:13.420778  543705 net.go:648] Add success.
I0323 16:02:13.423386  543705 net.go:770] primary dev: ETH0
I0323 16:02:13.423400  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:02:13.423413  543705 net.go:698] Add success.
W0323 16:02:14.455193  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:02:14.455304  543705 disk_worker.go:708] disk space is not compliant
W0323 16:02:14.455309  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:02:14.458150  543705 disk_worker.go:494] system disk:vda1
I0323 16:02:14.458194  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:02:14.458779  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:02:14.458789  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:02:14.458795  543705 custom_config.go:64] query custom config with name: gpu
E0323 16:02:15.523132  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:02:15.523150  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:02:16.458092  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:02:16.458151  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:02:16.458169  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:02:16.458194  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:02:16.472550  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:02:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:02:23.409786  543705 memory.go:184] no items to output this cycle
I0323 16:02:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 16:02:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:02:33.409780  543705 memory.go:184] no items to output this cycle
I0323 16:02:33.409787  543705 cpu.go:275] no items to output this cycle
I0323 16:02:40.709678  543705 disk_info.go:125] begin check local disk info of client
I0323 16:02:40.712348  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:02:40.712355  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0323 16:02:43.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:02:43.409794  543705 memory.go:191] Add success.
I0323 16:02:43.410008  543705 cpu.go:282] Add success.
I0323 16:02:43.420193  543705 net.go:648] Add success.
I0323 16:02:43.421264  543705 net.go:770] primary dev: ETH0
I0323 16:02:43.421282  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:02:43.421302  543705 net.go:698] Add success.
I0323 16:02:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:02:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:02:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:02:53.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:02:53.409827  543705 memory.go:184] no items to output this cycle
I0323 16:02:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 16:03:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:03:03.409799  543705 memory.go:184] no items to output this cycle
I0323 16:03:03.409868  543705 cpu.go:275] no items to output this cycle
E0323 16:03:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:03:13.409806  543705 memory.go:191] Add success.
I0323 16:03:13.409822  543705 cpu.go:282] Add success.
W0323 16:03:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:03:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:03:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:03:13.419771  543705 net.go:648] Add success.
I0323 16:03:13.422887  543705 net.go:770] primary dev: ETH0
I0323 16:03:13.422900  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:03:13.422912  543705 net.go:698] Add success.
I0323 16:03:13.469009  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f9534474-dc43-41a5-b0fa-0d3e893d2ce7","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:03:13.469043  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:03:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:03:14.455199  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:03:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 16:03:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:03:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 16:03:14.456638  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:03:15.455979  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:03:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:03:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:03:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:03:16.472471  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:03:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:03:23.409797  543705 memory.go:184] no items to output this cycle
I0323 16:03:23.409854  543705 cpu.go:275] no items to output this cycle
E0323 16:03:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:03:33.409790  543705 memory.go:184] no items to output this cycle
I0323 16:03:33.409811  543705 cpu.go:275] no items to output this cycle
I0323 16:03:40.477198  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:03:40.477206  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:03:40.713393  543705 disk_info.go:125] begin check local disk info of client
I0323 16:03:40.715950  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:03:40.715956  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536300 0xc000536340]
E0323 16:03:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:03:43.410882  543705 memory.go:191] Add success.
I0323 16:03:43.409817  543705 cpu.go:282] Add success.
I0323 16:03:43.420599  543705 net.go:648] Add success.
I0323 16:03:43.423908  543705 net.go:770] primary dev: ETH0
I0323 16:03:43.423923  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:03:43.423938  543705 net.go:698] Add success.
I0323 16:03:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:03:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:03:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:03:53.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:03:53.409803  543705 memory.go:184] no items to output this cycle
I0323 16:03:53.409804  543705 cpu.go:275] no items to output this cycle
E0323 16:04:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:04:03.409781  543705 memory.go:184] no items to output this cycle
I0323 16:04:03.409815  543705 cpu.go:275] no items to output this cycle
E0323 16:04:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:04:13.409901  543705 memory.go:191] Add success.
W0323 16:04:13.409931  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:04:13.409943  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:04:13.409946  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:04:13.410070  543705 cpu.go:282] Add success.
I0323 16:04:13.419721  543705 net.go:648] Add success.
I0323 16:04:13.422652  543705 net.go:770] primary dev: ETH0
I0323 16:04:13.422666  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:04:13.422678  543705 net.go:698] Add success.
I0323 16:04:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:04:14.455136  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:04:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 16:04:14.455213  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:04:14.456575  543705 disk_worker.go:494] system disk:vda1
I0323 16:04:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:04:15.455960  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:04:16.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:04:16.458029  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:04:16.458051  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:04:16.472363  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:04:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:04:23.409780  543705 memory.go:184] no items to output this cycle
I0323 16:04:23.409779  543705 cpu.go:275] no items to output this cycle
E0323 16:04:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:04:33.409781  543705 memory.go:184] no items to output this cycle
I0323 16:04:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 16:04:40.716044  543705 disk_info.go:125] begin check local disk info of client
I0323 16:04:40.718713  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:04:40.718720  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2040 0xc0004f2080]
E0323 16:04:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:04:43.410783  543705 memory.go:191] Add success.
I0323 16:04:43.409834  543705 cpu.go:282] Add success.
I0323 16:04:43.420523  543705 net.go:648] Add success.
I0323 16:04:43.423340  543705 net.go:770] primary dev: ETH0
I0323 16:04:43.423353  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:04:43.423365  543705 net.go:698] Add success.
I0323 16:04:46.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:04:46.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:04:46.458117  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:04:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 16:04:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:04:53.409823  543705 memory.go:184] no items to output this cycle
E0323 16:05:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:05:03.409790  543705 memory.go:184] no items to output this cycle
I0323 16:05:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 16:05:13.409968  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:05:13.410005  543705 memory.go:191] Add success.
W0323 16:05:13.410042  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:05:13.410053  543705 cpu.go:282] Add success.
W0323 16:05:13.410060  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:05:13.413142  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:05:13.419769  543705 net.go:648] Add success.
I0323 16:05:13.421746  543705 net.go:770] primary dev: ETH0
I0323 16:05:13.421761  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:05:13.421775  543705 net.go:698] Add success.
I0323 16:05:14.453958  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:05:14.455339  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:05:14.455351  543705 disk_worker.go:708] disk space is not compliant
W0323 16:05:14.455353  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:05:14.457690  543705 disk_worker.go:494] system disk:vda1
I0323 16:05:14.457736  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:05:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:05:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:05:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:05:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:05:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:05:23.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:05:23.409827  543705 memory.go:184] no items to output this cycle
I0323 16:05:23.409837  543705 cpu.go:275] no items to output this cycle
I0323 16:05:33.409822  543705 cpu.go:275] no items to output this cycle
E0323 16:05:33.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:05:33.409843  543705 memory.go:184] no items to output this cycle
I0323 16:05:40.718812  543705 disk_info.go:125] begin check local disk info of client
I0323 16:05:40.721309  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:05:40.721317  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab280 0xc0001ab2c0]
E0323 16:05:43.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:05:43.410709  543705 memory.go:191] Add success.
I0323 16:05:43.409812  543705 cpu.go:282] Add success.
I0323 16:05:43.420403  543705 net.go:648] Add success.
I0323 16:05:43.423198  543705 net.go:770] primary dev: ETH0
I0323 16:05:43.423211  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:05:43.423223  543705 net.go:698] Add success.
I0323 16:05:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:05:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:05:46.458073  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:05:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:05:53.409817  543705 memory.go:184] no items to output this cycle
I0323 16:05:53.409832  543705 cpu.go:275] no items to output this cycle
E0323 16:06:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:06:03.409785  543705 memory.go:184] no items to output this cycle
I0323 16:06:03.409810  543705 cpu.go:275] no items to output this cycle
E0323 16:06:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:06:13.409785  543705 memory.go:191] Add success.
I0323 16:06:13.409795  543705 cpu.go:282] Add success.
W0323 16:06:13.409810  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:06:13.412524  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:06:13.412528  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:06:13.420357  543705 net.go:648] Add success.
I0323 16:06:13.422159  543705 net.go:770] primary dev: ETH0
I0323 16:06:13.422171  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:06:13.422182  543705 net.go:698] Add success.
I0323 16:06:13.477399  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"23978938-ee41-40bb-82ef-12f552d89ef0","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:06:13.477429  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:06:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:06:14.455120  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:06:14.455206  543705 disk_worker.go:708] disk space is not compliant
W0323 16:06:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:06:14.456622  543705 disk_worker.go:494] system disk:vda1
I0323 16:06:14.456651  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:06:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:06:16.457982  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:06:16.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:06:16.458076  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:06:16.472423  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:06:23.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:06:23.409794  543705 memory.go:184] no items to output this cycle
I0323 16:06:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 16:06:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:06:33.409807  543705 memory.go:184] no items to output this cycle
I0323 16:06:33.409859  543705 cpu.go:275] no items to output this cycle
I0323 16:06:40.478170  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:06:40.478177  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:06:40.721672  543705 disk_info.go:125] begin check local disk info of client
I0323 16:06:40.724319  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:06:40.724325  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004d0200 0xc0004d0240]
E0323 16:06:43.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:06:43.410902  543705 memory.go:191] Add success.
I0323 16:06:43.409837  543705 cpu.go:282] Add success.
I0323 16:06:43.420637  543705 net.go:648] Add success.
I0323 16:06:43.423444  543705 net.go:770] primary dev: ETH0
I0323 16:06:43.423459  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:06:43.423472  543705 net.go:698] Add success.
I0323 16:06:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:06:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:06:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:06:53.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:06:53.409855  543705 memory.go:184] no items to output this cycle
I0323 16:06:53.409994  543705 cpu.go:275] no items to output this cycle
E0323 16:07:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:07:03.409805  543705 cpu.go:275] no items to output this cycle
I0323 16:07:03.409808  543705 memory.go:184] no items to output this cycle
E0323 16:07:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:07:13.409835  543705 memory.go:191] Add success.
I0323 16:07:13.409836  543705 cpu.go:282] Add success.
W0323 16:07:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:07:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:07:13.409887  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:07:13.420360  543705 net.go:648] Add success.
I0323 16:07:13.423405  543705 net.go:770] primary dev: ETH0
I0323 16:07:13.423423  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:07:13.423439  543705 net.go:698] Add success.
I0323 16:07:13.452771  543705 event_worker.go:152] Polling the log file for events...
W0323 16:07:14.454279  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:07:14.454386  543705 disk_worker.go:708] disk space is not compliant
W0323 16:07:14.454391  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:07:14.457246  543705 disk_worker.go:494] system disk:vda1
I0323 16:07:14.457289  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:07:14.457446  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:07:14.457454  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:07:14.457458  543705 custom_config.go:64] query custom config with name: gpu
E0323 16:07:15.457278  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:07:15.457293  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:07:16.458132  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 16:07:16.458208  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:07:16.458212  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:07:16.458239  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:07:16.472667  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:07:23.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:07:23.409793  543705 memory.go:184] no items to output this cycle
I0323 16:07:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 16:07:33.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:07:33.409841  543705 memory.go:184] no items to output this cycle
I0323 16:07:33.409853  543705 cpu.go:275] no items to output this cycle
I0323 16:07:40.725280  543705 disk_info.go:125] begin check local disk info of client
I0323 16:07:40.727928  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:07:40.727937  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004adc40 0xc0004adc80]
E0323 16:07:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:07:43.410641  543705 memory.go:191] Add success.
I0323 16:07:43.409825  543705 cpu.go:282] Add success.
I0323 16:07:43.420341  543705 net.go:648] Add success.
I0323 16:07:43.423155  543705 net.go:770] primary dev: ETH0
I0323 16:07:43.423169  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:07:43.423181  543705 net.go:698] Add success.
I0323 16:07:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:07:46.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:07:46.458122  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:07:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:07:53.409787  543705 memory.go:184] no items to output this cycle
I0323 16:07:53.409845  543705 cpu.go:275] no items to output this cycle
E0323 16:08:03.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:08:03.409815  543705 memory.go:184] no items to output this cycle
I0323 16:08:03.409832  543705 cpu.go:275] no items to output this cycle
E0323 16:08:13.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:08:13.409826  543705 memory.go:191] Add success.
I0323 16:08:13.409832  543705 cpu.go:282] Add success.
W0323 16:08:13.409860  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:08:13.412638  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:08:13.412642  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:08:13.420333  543705 net.go:648] Add success.
I0323 16:08:13.422095  543705 net.go:770] primary dev: ETH0
I0323 16:08:13.422120  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:08:13.422136  543705 net.go:698] Add success.
I0323 16:08:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:08:14.455290  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:08:14.455306  543705 disk_worker.go:708] disk space is not compliant
W0323 16:08:14.455310  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:08:14.460102  543705 disk_worker.go:494] system disk:vda1
I0323 16:08:14.460149  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:08:15.455984  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:08:16.458007  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:08:16.458089  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:08:16.458130  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:08:16.472540  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:08:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:08:23.409782  543705 memory.go:184] no items to output this cycle
I0323 16:08:23.409789  543705 cpu.go:275] no items to output this cycle
E0323 16:08:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:08:33.409791  543705 memory.go:184] no items to output this cycle
I0323 16:08:33.409798  543705 cpu.go:275] no items to output this cycle
I0323 16:08:40.728024  543705 disk_info.go:125] begin check local disk info of client
I0323 16:08:40.730646  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:08:40.730653  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003bf080 0xc0003bf0c0]
E0323 16:08:43.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:08:43.410737  543705 memory.go:191] Add success.
I0323 16:08:43.409806  543705 cpu.go:282] Add success.
I0323 16:08:43.420452  543705 net.go:648] Add success.
I0323 16:08:43.422864  543705 net.go:770] primary dev: ETH0
I0323 16:08:43.422877  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:08:43.422889  543705 net.go:698] Add success.
I0323 16:08:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:08:46.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:08:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:08:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:08:53.409794  543705 memory.go:184] no items to output this cycle
I0323 16:08:53.409803  543705 cpu.go:275] no items to output this cycle
E0323 16:09:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:09:03.409786  543705 memory.go:184] no items to output this cycle
I0323 16:09:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 16:09:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:09:13.409796  543705 memory.go:191] Add success.
I0323 16:09:13.409817  543705 cpu.go:282] Add success.
W0323 16:09:13.409823  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:09:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:09:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:09:13.420124  543705 net.go:648] Add success.
I0323 16:09:13.422826  543705 net.go:770] primary dev: ETH0
I0323 16:09:13.422839  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:09:13.422851  543705 net.go:698] Add success.
I0323 16:09:13.952283  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"f22a281f-2aff-4e73-b6fb-3e2df276472e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:09:13.952336  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:09:14.454709  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:09:14.455043  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:09:14.455145  543705 disk_worker.go:708] disk space is not compliant
W0323 16:09:14.455151  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:09:14.456850  543705 disk_worker.go:494] system disk:vda1
I0323 16:09:14.456881  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:09:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:09:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:09:16.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:09:16.458110  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:09:16.472707  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:09:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:09:23.409792  543705 memory.go:184] no items to output this cycle
I0323 16:09:23.409797  543705 cpu.go:275] no items to output this cycle
E0323 16:09:33.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:09:33.409818  543705 memory.go:184] no items to output this cycle
I0323 16:09:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 16:09:40.479190  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:09:40.479199  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:09:40.730740  543705 disk_info.go:125] begin check local disk info of client
I0323 16:09:40.733359  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:09:40.733365  543705 disk_info.go:196] parse disk info done, disk is : [0xc000393940 0xc000393980]
E0323 16:09:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:09:43.410831  543705 memory.go:191] Add success.
I0323 16:09:43.409811  543705 cpu.go:282] Add success.
I0323 16:09:43.420531  543705 net.go:648] Add success.
I0323 16:09:43.423598  543705 net.go:770] primary dev: ETH0
I0323 16:09:43.423613  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:09:43.423627  543705 net.go:698] Add success.
I0323 16:09:46.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:09:46.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:09:46.458113  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:09:53.410210  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:09:53.410222  543705 cpu.go:275] no items to output this cycle
I0323 16:09:53.410226  543705 memory.go:184] no items to output this cycle
E0323 16:10:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:10:03.409811  543705 memory.go:184] no items to output this cycle
I0323 16:10:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 16:10:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:10:13.409786  543705 memory.go:191] Add success.
I0323 16:10:13.409804  543705 cpu.go:282] Add success.
W0323 16:10:13.409813  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:10:13.409825  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:10:13.409827  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:10:13.420213  543705 net.go:648] Add success.
I0323 16:10:13.423067  543705 net.go:770] primary dev: ETH0
I0323 16:10:13.423081  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:10:13.423093  543705 net.go:698] Add success.
I0323 16:10:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:10:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:10:14.455200  543705 disk_worker.go:708] disk space is not compliant
W0323 16:10:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:10:14.456586  543705 disk_worker.go:494] system disk:vda1
I0323 16:10:14.456618  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:10:15.455993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:10:16.457978  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:10:16.458041  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:10:16.458064  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:10:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:10:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:10:23.409780  543705 cpu.go:275] no items to output this cycle
I0323 16:10:23.409792  543705 memory.go:184] no items to output this cycle
E0323 16:10:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:10:33.409814  543705 memory.go:184] no items to output this cycle
I0323 16:10:33.409826  543705 cpu.go:275] no items to output this cycle
I0323 16:10:40.733705  543705 disk_info.go:125] begin check local disk info of client
I0323 16:10:40.745357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:10:40.745367  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b840 0xc00007b880]
E0323 16:10:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:10:43.410766  543705 memory.go:191] Add success.
I0323 16:10:43.409805  543705 cpu.go:282] Add success.
I0323 16:10:43.420475  543705 net.go:648] Add success.
I0323 16:10:43.423368  543705 net.go:770] primary dev: ETH0
I0323 16:10:43.423383  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:10:43.423396  543705 net.go:698] Add success.
I0323 16:10:46.458021  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:10:46.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:10:46.458151  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:10:53.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:10:53.409793  543705 memory.go:184] no items to output this cycle
I0323 16:10:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 16:11:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:11:03.409805  543705 memory.go:184] no items to output this cycle
I0323 16:11:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 16:11:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:11:13.409805  543705 memory.go:191] Add success.
W0323 16:11:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:11:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:11:13.409846  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:11:13.409852  543705 cpu.go:282] Add success.
I0323 16:11:13.420602  543705 net.go:648] Add success.
I0323 16:11:13.421594  543705 net.go:770] primary dev: ETH0
I0323 16:11:13.421607  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:11:13.421620  543705 net.go:698] Add success.
I0323 16:11:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:11:14.455152  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:11:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 16:11:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:11:14.456645  543705 disk_worker.go:494] system disk:vda1
I0323 16:11:14.456679  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:11:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:11:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:11:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:11:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:11:16.472474  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:11:23.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:11:23.409807  543705 memory.go:184] no items to output this cycle
I0323 16:11:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 16:11:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:11:33.409813  543705 memory.go:184] no items to output this cycle
I0323 16:11:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 16:11:40.745674  543705 disk_info.go:125] begin check local disk info of client
I0323 16:11:40.748180  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:11:40.748187  543705 disk_info.go:196] parse disk info done, disk is : [0xc000537c80 0xc000537cc0]
E0323 16:11:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:11:43.410688  543705 memory.go:191] Add success.
I0323 16:11:43.409819  543705 cpu.go:282] Add success.
I0323 16:11:43.420427  543705 net.go:648] Add success.
I0323 16:11:43.423485  543705 net.go:770] primary dev: ETH0
I0323 16:11:43.423499  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:11:43.423514  543705 net.go:698] Add success.
I0323 16:11:46.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:11:46.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:11:46.458066  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:11:53.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:11:53.409771  543705 memory.go:184] no items to output this cycle
I0323 16:11:53.409793  543705 cpu.go:275] no items to output this cycle
E0323 16:12:03.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:12:03.409805  543705 memory.go:184] no items to output this cycle
I0323 16:12:03.409818  543705 cpu.go:275] no items to output this cycle
E0323 16:12:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:12:13.409807  543705 memory.go:191] Add success.
I0323 16:12:13.409808  543705 cpu.go:282] Add success.
W0323 16:12:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:12:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:12:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:12:13.420164  543705 net.go:648] Add success.
I0323 16:12:13.422699  543705 net.go:770] primary dev: ETH0
I0323 16:12:13.422712  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:12:13.422724  543705 net.go:698] Add success.
W0323 16:12:14.454514  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:12:14.454531  543705 disk_worker.go:708] disk space is not compliant
W0323 16:12:14.454536  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:12:14.455508  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:12:14.455518  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:12:14.455525  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:12:14.456867  543705 disk_worker.go:494] system disk:vda1
I0323 16:12:14.456908  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:12:15.269833  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"7278e16a-f043-48d4-9b74-d4c9cdc72a43","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:12:15.269880  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
E0323 16:12:15.456979  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:12:15.456993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:12:16.458100  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:12:16.458168  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 16:12:16.458187  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:12:16.458193  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:12:16.472588  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:12:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:12:23.409784  543705 memory.go:184] no items to output this cycle
I0323 16:12:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 16:12:33.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:12:33.409785  543705 memory.go:184] no items to output this cycle
I0323 16:12:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 16:12:40.480191  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:12:40.480200  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:12:40.749480  543705 disk_info.go:125] begin check local disk info of client
I0323 16:12:40.752108  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:12:40.752116  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001ab900 0xc0001ab940]
E0323 16:12:43.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:12:43.410641  543705 memory.go:191] Add success.
I0323 16:12:43.409800  543705 cpu.go:282] Add success.
I0323 16:12:43.420320  543705 net.go:648] Add success.
I0323 16:12:43.423060  543705 net.go:770] primary dev: ETH0
I0323 16:12:43.423072  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:12:43.423086  543705 net.go:698] Add success.
I0323 16:12:46.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:12:46.458051  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:12:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:12:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:12:53.409794  543705 memory.go:184] no items to output this cycle
I0323 16:12:53.409807  543705 cpu.go:275] no items to output this cycle
E0323 16:13:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:13:03.409777  543705 memory.go:184] no items to output this cycle
I0323 16:13:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 16:13:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:13:13.409826  543705 memory.go:191] Add success.
I0323 16:13:13.409832  543705 cpu.go:282] Add success.
W0323 16:13:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:13:13.409876  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:13:13.409879  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:13:13.420159  543705 net.go:648] Add success.
I0323 16:13:13.422731  543705 net.go:770] primary dev: ETH0
I0323 16:13:13.422745  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:13:13.422757  543705 net.go:698] Add success.
I0323 16:13:14.454966  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:13:14.455173  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:13:14.455183  543705 disk_worker.go:708] disk space is not compliant
W0323 16:13:14.455186  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:13:14.458962  543705 disk_worker.go:494] system disk:vda1
I0323 16:13:14.458992  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:13:15.455950  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:13:16.457965  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:13:16.458022  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:13:16.458042  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:13:16.472350  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:13:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:13:23.409817  543705 memory.go:184] no items to output this cycle
I0323 16:13:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 16:13:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:13:33.409781  543705 memory.go:184] no items to output this cycle
I0323 16:13:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 16:13:40.753410  543705 disk_info.go:125] begin check local disk info of client
I0323 16:13:40.755982  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:13:40.755989  543705 disk_info.go:196] parse disk info done, disk is : [0xc000381780 0xc0003817c0]
E0323 16:13:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:13:43.410736  543705 memory.go:191] Add success.
I0323 16:13:43.409800  543705 cpu.go:282] Add success.
I0323 16:13:43.420500  543705 net.go:648] Add success.
I0323 16:13:43.423280  543705 net.go:770] primary dev: ETH0
I0323 16:13:43.423295  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:13:43.423310  543705 net.go:698] Add success.
I0323 16:13:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:13:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:13:46.458111  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:13:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:13:53.409808  543705 memory.go:184] no items to output this cycle
I0323 16:13:53.409816  543705 cpu.go:275] no items to output this cycle
E0323 16:14:03.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:14:03.409804  543705 memory.go:184] no items to output this cycle
I0323 16:14:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 16:14:13.409745  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:14:13.409773  543705 memory.go:191] Add success.
W0323 16:14:13.409800  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:14:13.409802  543705 cpu.go:282] Add success.
W0323 16:14:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:14:13.409815  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:14:13.420157  543705 net.go:648] Add success.
I0323 16:14:13.423010  543705 net.go:770] primary dev: ETH0
I0323 16:14:13.423024  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:14:13.423037  543705 net.go:698] Add success.
I0323 16:14:14.454972  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:14:14.455191  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:14:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 16:14:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:14:14.456595  543705 disk_worker.go:494] system disk:vda1
I0323 16:14:14.456627  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:14:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:14:16.457975  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:14:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:14:16.458059  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:14:16.472383  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:14:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:14:23.409794  543705 memory.go:184] no items to output this cycle
I0323 16:14:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 16:14:33.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:14:33.409778  543705 memory.go:184] no items to output this cycle
I0323 16:14:33.409813  543705 cpu.go:275] no items to output this cycle
I0323 16:14:40.757452  543705 disk_info.go:125] begin check local disk info of client
I0323 16:14:40.760182  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:14:40.760191  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005706c0 0xc000570700]
E0323 16:14:43.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:14:43.410788  543705 memory.go:191] Add success.
I0323 16:14:43.409829  543705 cpu.go:282] Add success.
I0323 16:14:43.420504  543705 net.go:648] Add success.
I0323 16:14:43.422990  543705 net.go:770] primary dev: ETH0
I0323 16:14:43.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:14:43.423016  543705 net.go:698] Add success.
I0323 16:14:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:14:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:14:46.458104  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:14:53.410276  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:14:53.410293  543705 memory.go:184] no items to output this cycle
I0323 16:14:53.410294  543705 cpu.go:275] no items to output this cycle
E0323 16:15:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:15:03.409805  543705 cpu.go:275] no items to output this cycle
I0323 16:15:03.409824  543705 memory.go:184] no items to output this cycle
E0323 16:15:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:15:13.409812  543705 memory.go:191] Add success.
W0323 16:15:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:15:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:15:13.409853  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:15:13.409860  543705 cpu.go:282] Add success.
I0323 16:15:13.420291  543705 net.go:648] Add success.
I0323 16:15:13.421173  543705 net.go:770] primary dev: ETH0
I0323 16:15:13.421186  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:15:13.421200  543705 net.go:698] Add success.
I0323 16:15:13.469344  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5004b3c1-08f0-4e24-b8f2-e37039643a2a","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:15:13.469382  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:15:14.455096  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:15:14.455233  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:15:14.455245  543705 disk_worker.go:708] disk space is not compliant
W0323 16:15:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:15:14.456819  543705 disk_worker.go:494] system disk:vda1
I0323 16:15:14.456849  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:15:15.455332  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:15:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:15:16.458080  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:15:16.458111  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:15:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:15:23.409823  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:15:23.409843  543705 memory.go:184] no items to output this cycle
I0323 16:15:23.409844  543705 cpu.go:275] no items to output this cycle
E0323 16:15:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:15:33.409796  543705 memory.go:184] no items to output this cycle
I0323 16:15:33.409820  543705 cpu.go:275] no items to output this cycle
I0323 16:15:40.481155  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:15:40.481161  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:15:40.761495  543705 disk_info.go:125] begin check local disk info of client
I0323 16:15:40.764061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:15:40.764066  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ad00 0xc00007ad40]
E0323 16:15:43.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:15:43.410635  543705 memory.go:191] Add success.
I0323 16:15:43.409819  543705 cpu.go:282] Add success.
I0323 16:15:43.420345  543705 net.go:648] Add success.
I0323 16:15:43.423013  543705 net.go:770] primary dev: ETH0
I0323 16:15:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:15:43.423046  543705 net.go:698] Add success.
I0323 16:15:46.457970  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:15:46.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:15:46.458061  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:15:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:15:53.409813  543705 memory.go:184] no items to output this cycle
I0323 16:15:53.409826  543705 cpu.go:275] no items to output this cycle
E0323 16:16:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:16:03.409818  543705 memory.go:184] no items to output this cycle
I0323 16:16:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 16:16:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:16:13.409816  543705 memory.go:191] Add success.
I0323 16:16:13.409821  543705 cpu.go:282] Add success.
W0323 16:16:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:16:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:16:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:16:13.420057  543705 net.go:648] Add success.
I0323 16:16:13.422691  543705 net.go:770] primary dev: ETH0
I0323 16:16:13.422710  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:16:13.422743  543705 net.go:698] Add success.
I0323 16:16:14.454986  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:16:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:16:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0323 16:16:14.455227  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:16:14.456632  543705 disk_worker.go:494] system disk:vda1
I0323 16:16:14.456665  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:16:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:16:16.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:16:16.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:16:16.458091  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:16:16.472473  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:16:23.409914  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:16:23.409961  543705 memory.go:184] no items to output this cycle
I0323 16:16:23.409943  543705 cpu.go:275] no items to output this cycle
E0323 16:16:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:16:33.409823  543705 memory.go:184] no items to output this cycle
I0323 16:16:33.409859  543705 cpu.go:275] no items to output this cycle
I0323 16:16:40.764162  543705 disk_info.go:125] begin check local disk info of client
I0323 16:16:40.766784  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:16:40.766791  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aae40 0xc0001aae80]
E0323 16:16:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:16:43.410750  543705 memory.go:191] Add success.
I0323 16:16:43.409853  543705 cpu.go:282] Add success.
I0323 16:16:43.420553  543705 net.go:648] Add success.
I0323 16:16:43.423198  543705 net.go:770] primary dev: ETH0
I0323 16:16:43.423213  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:16:43.423228  543705 net.go:698] Add success.
I0323 16:16:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:16:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:16:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:16:53.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:16:53.409833  543705 memory.go:184] no items to output this cycle
I0323 16:16:53.409834  543705 cpu.go:275] no items to output this cycle
E0323 16:17:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:17:03.409822  543705 memory.go:184] no items to output this cycle
I0323 16:17:03.409860  543705 cpu.go:275] no items to output this cycle
E0323 16:17:13.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:17:13.409803  543705 memory.go:191] Add success.
I0323 16:17:13.409808  543705 cpu.go:282] Add success.
W0323 16:17:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:17:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:17:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:17:13.420089  543705 net.go:648] Add success.
I0323 16:17:13.423017  543705 net.go:770] primary dev: ETH0
I0323 16:17:13.423030  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:17:13.423042  543705 net.go:698] Add success.
I0323 16:17:13.453630  543705 event_worker.go:152] Polling the log file for events...
W0323 16:17:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:17:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 16:17:14.455173  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:17:14.455883  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:17:14.455892  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:17:14.455898  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:17:14.456540  543705 disk_worker.go:494] system disk:vda1
I0323 16:17:14.456570  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:17:15.456852  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:17:15.456862  543705 custom_config.go:64] query custom config with name: huawei_npu
E0323 16:17:16.456955  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:17:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:17:16.458052  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:17:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:17:16.472396  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:17:23.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:17:23.409767  543705 memory.go:184] no items to output this cycle
I0323 16:17:23.409784  543705 cpu.go:275] no items to output this cycle
E0323 16:17:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:17:33.409807  543705 memory.go:184] no items to output this cycle
I0323 16:17:33.409831  543705 cpu.go:275] no items to output this cycle
I0323 16:17:40.768495  543705 disk_info.go:125] begin check local disk info of client
I0323 16:17:40.780132  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:17:40.780141  543705 disk_info.go:196] parse disk info done, disk is : [0xc00027b180 0xc00027b1c0]
I0323 16:17:43.409801  543705 cpu.go:282] Add success.
E0323 16:17:43.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:17:43.409846  543705 memory.go:191] Add success.
I0323 16:17:43.420264  543705 net.go:648] Add success.
I0323 16:17:43.421324  543705 net.go:770] primary dev: ETH0
I0323 16:17:43.421342  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:17:43.421361  543705 net.go:698] Add success.
I0323 16:17:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:17:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:17:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:17:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:17:53.409790  543705 memory.go:184] no items to output this cycle
I0323 16:17:53.409811  543705 cpu.go:275] no items to output this cycle
E0323 16:18:03.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:18:03.409816  543705 cpu.go:275] no items to output this cycle
I0323 16:18:03.409829  543705 memory.go:184] no items to output this cycle
E0323 16:18:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:18:13.409791  543705 memory.go:191] Add success.
I0323 16:18:13.409791  543705 cpu.go:282] Add success.
W0323 16:18:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:18:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:18:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:18:13.420203  543705 net.go:648] Add success.
I0323 16:18:13.423163  543705 net.go:770] primary dev: ETH0
I0323 16:18:13.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:18:13.423192  543705 net.go:698] Add success.
I0323 16:18:13.464138  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9003edb1-7470-46e7-9b13-c8bb07f4944f","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:18:13.464170  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:18:14.454963  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:18:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:18:14.455224  543705 disk_worker.go:708] disk space is not compliant
W0323 16:18:14.455226  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:18:14.456764  543705 disk_worker.go:494] system disk:vda1
I0323 16:18:14.456793  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:18:15.455959  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:18:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:18:16.458036  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:18:16.458060  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:18:16.472390  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:18:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:18:23.409789  543705 memory.go:184] no items to output this cycle
I0323 16:18:23.409803  543705 cpu.go:275] no items to output this cycle
E0323 16:18:33.409877  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:18:33.409890  543705 cpu.go:275] no items to output this cycle
I0323 16:18:33.409899  543705 memory.go:184] no items to output this cycle
I0323 16:18:40.482199  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:18:40.482207  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:18:40.781628  543705 disk_info.go:125] begin check local disk info of client
I0323 16:18:40.784279  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:18:40.784287  543705 disk_info.go:196] parse disk info done, disk is : [0xc000485540 0xc000485580]
E0323 16:18:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:18:43.410632  543705 memory.go:191] Add success.
I0323 16:18:43.409827  543705 cpu.go:282] Add success.
I0323 16:18:43.420376  543705 net.go:648] Add success.
I0323 16:18:43.423081  543705 net.go:770] primary dev: ETH0
I0323 16:18:43.423096  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:18:43.423110  543705 net.go:698] Add success.
I0323 16:18:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:18:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:18:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:18:53.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:18:53.409804  543705 cpu.go:275] no items to output this cycle
I0323 16:18:53.409810  543705 memory.go:184] no items to output this cycle
E0323 16:19:03.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:19:03.409800  543705 memory.go:184] no items to output this cycle
I0323 16:19:03.409848  543705 cpu.go:275] no items to output this cycle
E0323 16:19:13.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:19:13.409813  543705 memory.go:191] Add success.
W0323 16:19:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:19:13.409854  543705 cpu.go:282] Add success.
W0323 16:19:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:19:13.409859  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:19:13.420582  543705 net.go:648] Add success.
I0323 16:19:13.423963  543705 net.go:770] primary dev: ETH0
I0323 16:19:13.423981  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:19:13.423999  543705 net.go:698] Add success.
I0323 16:19:14.453943  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:19:14.455477  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:19:14.455494  543705 disk_worker.go:708] disk space is not compliant
W0323 16:19:14.455499  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:19:14.457586  543705 disk_worker.go:494] system disk:vda1
I0323 16:19:14.457628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:19:15.455978  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:19:16.458017  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:19:16.458096  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:19:16.458138  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:19:16.472583  543705 disk_local_worker.go:436] Get disk info: []
I0323 16:19:23.409802  543705 cpu.go:275] no items to output this cycle
E0323 16:19:23.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:19:23.409822  543705 memory.go:184] no items to output this cycle
E0323 16:19:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:19:33.409893  543705 cpu.go:275] no items to output this cycle
I0323 16:19:33.409905  543705 memory.go:184] no items to output this cycle
I0323 16:19:40.784383  543705 disk_info.go:125] begin check local disk info of client
I0323 16:19:40.786954  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:19:40.786961  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 16:19:43.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:19:43.410806  543705 memory.go:191] Add success.
I0323 16:19:43.409883  543705 cpu.go:282] Add success.
I0323 16:19:43.420557  543705 net.go:648] Add success.
I0323 16:19:43.423330  543705 net.go:770] primary dev: ETH0
I0323 16:19:43.423343  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:19:43.423355  543705 net.go:698] Add success.
I0323 16:19:46.457988  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:19:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:19:46.458096  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:19:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:19:53.409777  543705 memory.go:184] no items to output this cycle
I0323 16:19:53.409780  543705 cpu.go:275] no items to output this cycle
E0323 16:20:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:20:03.409805  543705 memory.go:184] no items to output this cycle
I0323 16:20:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 16:20:13.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:20:13.409784  543705 memory.go:191] Add success.
I0323 16:20:13.409801  543705 cpu.go:282] Add success.
W0323 16:20:13.409811  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:20:13.409822  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:20:13.409825  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:20:13.420104  543705 net.go:648] Add success.
I0323 16:20:13.423067  543705 net.go:770] primary dev: ETH0
I0323 16:20:13.423079  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:20:13.423091  543705 net.go:698] Add success.
I0323 16:20:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:20:14.455161  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:20:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 16:20:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:20:14.456509  543705 disk_worker.go:494] system disk:vda1
I0323 16:20:14.456554  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:20:15.455957  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:20:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:20:16.458033  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:20:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:20:16.472388  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:20:23.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:20:23.409850  543705 memory.go:184] no items to output this cycle
I0323 16:20:23.409861  543705 cpu.go:275] no items to output this cycle
E0323 16:20:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:20:33.409793  543705 memory.go:184] no items to output this cycle
I0323 16:20:33.410042  543705 cpu.go:275] no items to output this cycle
I0323 16:20:40.787066  543705 disk_info.go:125] begin check local disk info of client
I0323 16:20:40.789760  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:20:40.789771  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0000 0xc0002a0040]
E0323 16:20:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:20:43.409817  543705 memory.go:191] Add success.
I0323 16:20:43.409856  543705 cpu.go:282] Add success.
I0323 16:20:43.420134  543705 net.go:648] Add success.
I0323 16:20:43.421178  543705 net.go:770] primary dev: ETH0
I0323 16:20:43.421191  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:20:43.421203  543705 net.go:698] Add success.
I0323 16:20:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:20:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:20:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:20:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 16:20:53.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:20:53.409827  543705 memory.go:184] no items to output this cycle
E0323 16:21:03.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:21:03.409800  543705 cpu.go:275] no items to output this cycle
I0323 16:21:03.409806  543705 memory.go:184] no items to output this cycle
E0323 16:21:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:21:13.409810  543705 cpu.go:282] Add success.
I0323 16:21:13.409819  543705 memory.go:191] Add success.
W0323 16:21:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:21:13.409859  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:21:13.409863  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:21:13.420149  543705 net.go:648] Add success.
I0323 16:21:13.422892  543705 net.go:770] primary dev: ETH0
I0323 16:21:13.422903  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:21:13.422915  543705 net.go:698] Add success.
I0323 16:21:13.469829  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"87ede234-8c0e-45cb-ac2d-73ef6d916f41","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:21:13.469864  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:21:14.454962  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:21:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:21:14.455199  543705 disk_worker.go:708] disk space is not compliant
W0323 16:21:14.455202  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:21:14.456599  543705 disk_worker.go:494] system disk:vda1
I0323 16:21:14.456632  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:21:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:21:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:21:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:21:16.458066  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:21:16.472419  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:21:23.409746  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:21:23.409762  543705 memory.go:184] no items to output this cycle
I0323 16:21:23.409799  543705 cpu.go:275] no items to output this cycle
E0323 16:21:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:21:33.409791  543705 memory.go:184] no items to output this cycle
I0323 16:21:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 16:21:40.483171  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:21:40.483178  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:21:40.790727  543705 disk_info.go:125] begin check local disk info of client
I0323 16:21:40.793243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:21:40.793248  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003143c0 0xc000314400]
E0323 16:21:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:21:43.410639  543705 memory.go:191] Add success.
I0323 16:21:43.409806  543705 cpu.go:282] Add success.
I0323 16:21:43.420343  543705 net.go:648] Add success.
I0323 16:21:43.422993  543705 net.go:770] primary dev: ETH0
I0323 16:21:43.423008  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:21:43.423023  543705 net.go:698] Add success.
I0323 16:21:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:21:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:21:46.458094  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:21:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:21:53.409788  543705 memory.go:184] no items to output this cycle
I0323 16:21:53.409846  543705 cpu.go:275] no items to output this cycle
E0323 16:22:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:22:03.409787  543705 memory.go:184] no items to output this cycle
I0323 16:22:03.409794  543705 cpu.go:275] no items to output this cycle
E0323 16:22:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:22:13.409793  543705 memory.go:191] Add success.
I0323 16:22:13.409794  543705 cpu.go:282] Add success.
W0323 16:22:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:22:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:22:13.409837  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:22:13.420158  543705 net.go:648] Add success.
I0323 16:22:13.422920  543705 net.go:770] primary dev: ETH0
I0323 16:22:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:22:13.422948  543705 net.go:698] Add success.
W0323 16:22:14.455114  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:22:14.455176  543705 disk_worker.go:708] disk space is not compliant
W0323 16:22:14.455179  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:22:14.456786  543705 disk_worker.go:494] system disk:vda1
I0323 16:22:14.456824  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:22:14.457144  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:22:14.457155  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:22:14.457161  543705 custom_config.go:64] query custom config with name: gpu
E0323 16:22:15.456819  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:22:15.456828  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:22:16.457949  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 16:22:16.457958  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:22:16.458000  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:22:16.458017  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:22:16.472349  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:22:23.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:22:23.409774  543705 memory.go:184] no items to output this cycle
I0323 16:22:23.409786  543705 cpu.go:275] no items to output this cycle
E0323 16:22:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:22:33.409800  543705 memory.go:184] no items to output this cycle
I0323 16:22:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 16:22:40.793692  543705 disk_info.go:125] begin check local disk info of client
I0323 16:22:40.796542  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:22:40.796551  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0000 0xc0002a0040]
E0323 16:22:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:22:43.410694  543705 memory.go:191] Add success.
I0323 16:22:43.409810  543705 cpu.go:282] Add success.
I0323 16:22:43.420439  543705 net.go:648] Add success.
I0323 16:22:43.423240  543705 net.go:770] primary dev: ETH0
I0323 16:22:43.423253  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:22:43.423266  543705 net.go:698] Add success.
I0323 16:22:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:22:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:22:46.458081  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:22:53.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:22:53.409785  543705 memory.go:184] no items to output this cycle
I0323 16:22:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 16:23:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:23:03.409783  543705 memory.go:184] no items to output this cycle
I0323 16:23:03.409790  543705 cpu.go:275] no items to output this cycle
E0323 16:23:13.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:23:13.409776  543705 memory.go:191] Add success.
W0323 16:23:13.409801  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:23:13.409803  543705 cpu.go:282] Add success.
W0323 16:23:13.409812  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:23:13.409816  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:23:13.420065  543705 net.go:648] Add success.
I0323 16:23:13.422976  543705 net.go:770] primary dev: ETH0
I0323 16:23:13.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:23:13.423001  543705 net.go:698] Add success.
I0323 16:23:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:23:14.455112  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:23:14.455194  543705 disk_worker.go:708] disk space is not compliant
W0323 16:23:14.455197  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:23:14.456593  543705 disk_worker.go:494] system disk:vda1
I0323 16:23:14.456624  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:23:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:23:16.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:23:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:23:16.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:23:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:23:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:23:23.409780  543705 memory.go:184] no items to output this cycle
I0323 16:23:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 16:23:33.409879  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:23:33.409901  543705 memory.go:184] no items to output this cycle
I0323 16:23:33.409983  543705 cpu.go:275] no items to output this cycle
I0323 16:23:40.796645  543705 disk_info.go:125] begin check local disk info of client
I0323 16:23:40.799264  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:23:40.799271  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0000 0xc0002a0040]
E0323 16:23:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:23:43.410880  543705 memory.go:191] Add success.
I0323 16:23:43.409830  543705 cpu.go:282] Add success.
I0323 16:23:43.420649  543705 net.go:648] Add success.
I0323 16:23:43.423387  543705 net.go:770] primary dev: ETH0
I0323 16:23:43.423402  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:23:43.423417  543705 net.go:698] Add success.
I0323 16:23:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:23:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:23:46.458085  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:23:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:23:53.409788  543705 memory.go:184] no items to output this cycle
I0323 16:23:53.409799  543705 cpu.go:275] no items to output this cycle
I0323 16:24:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 16:24:03.409813  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:24:03.409834  543705 memory.go:184] no items to output this cycle
E0323 16:24:13.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:24:13.409827  543705 memory.go:191] Add success.
I0323 16:24:13.409842  543705 cpu.go:282] Add success.
W0323 16:24:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:24:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:24:13.409876  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:24:13.420278  543705 net.go:648] Add success.
I0323 16:24:13.423166  543705 net.go:770] primary dev: ETH0
I0323 16:24:13.423186  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:24:13.423224  543705 net.go:698] Add success.
I0323 16:24:13.474030  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"5f1a7652-0e01-416f-a6a4-4506b2052380","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:24:13.474067  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:24:14.454965  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:24:14.455150  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:24:14.455230  543705 disk_worker.go:708] disk space is not compliant
W0323 16:24:14.455233  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:24:14.456670  543705 disk_worker.go:494] system disk:vda1
I0323 16:24:14.456704  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:24:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:24:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:24:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:24:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:24:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:24:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:24:23.409819  543705 memory.go:184] no items to output this cycle
I0323 16:24:23.409835  543705 cpu.go:275] no items to output this cycle
E0323 16:24:33.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:24:33.409827  543705 memory.go:184] no items to output this cycle
I0323 16:24:33.409838  543705 cpu.go:275] no items to output this cycle
I0323 16:24:40.484198  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:24:40.484205  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:24:40.799374  543705 disk_info.go:125] begin check local disk info of client
I0323 16:24:40.802066  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:24:40.802074  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a2180 0xc0002a21c0]
E0323 16:24:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:24:43.409845  543705 memory.go:191] Add success.
I0323 16:24:43.409862  543705 cpu.go:282] Add success.
I0323 16:24:43.420313  543705 net.go:648] Add success.
I0323 16:24:43.421333  543705 net.go:770] primary dev: ETH0
I0323 16:24:43.421349  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:24:43.421363  543705 net.go:698] Add success.
I0323 16:24:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:24:46.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:24:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:24:53.409818  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:24:53.409843  543705 memory.go:184] no items to output this cycle
I0323 16:24:53.410003  543705 cpu.go:275] no items to output this cycle
E0323 16:25:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:25:03.409785  543705 memory.go:184] no items to output this cycle
I0323 16:25:03.409809  543705 cpu.go:275] no items to output this cycle
E0323 16:25:13.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:25:13.409819  543705 cpu.go:282] Add success.
I0323 16:25:13.409820  543705 memory.go:191] Add success.
W0323 16:25:13.409848  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:25:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:25:13.409864  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:25:13.420172  543705 net.go:648] Add success.
I0323 16:25:13.422877  543705 net.go:770] primary dev: ETH0
I0323 16:25:13.422890  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:25:13.422902  543705 net.go:698] Add success.
I0323 16:25:14.453935  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:25:14.455204  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:25:14.455217  543705 disk_worker.go:708] disk space is not compliant
W0323 16:25:14.455221  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:25:14.456613  543705 disk_worker.go:494] system disk:vda1
I0323 16:25:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:25:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:25:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:25:16.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:25:16.458099  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:25:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:25:23.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:25:23.409831  543705 memory.go:184] no items to output this cycle
I0323 16:25:23.410000  543705 cpu.go:275] no items to output this cycle
E0323 16:25:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:25:33.409823  543705 memory.go:184] no items to output this cycle
I0323 16:25:33.409852  543705 cpu.go:275] no items to output this cycle
I0323 16:25:40.802175  543705 disk_info.go:125] begin check local disk info of client
I0323 16:25:40.804862  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:25:40.804869  543705 disk_info.go:196] parse disk info done, disk is : [0xc000548900 0xc000548940]
E0323 16:25:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:25:43.410794  543705 memory.go:191] Add success.
I0323 16:25:43.409815  543705 cpu.go:282] Add success.
I0323 16:25:43.420533  543705 net.go:648] Add success.
I0323 16:25:43.423408  543705 net.go:770] primary dev: ETH0
I0323 16:25:43.423424  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:25:43.423439  543705 net.go:698] Add success.
I0323 16:25:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:25:46.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:25:46.458114  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:25:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:25:53.409787  543705 memory.go:184] no items to output this cycle
I0323 16:25:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 16:26:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:26:03.409786  543705 memory.go:184] no items to output this cycle
I0323 16:26:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 16:26:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:26:13.409830  543705 memory.go:191] Add success.
I0323 16:26:13.409838  543705 cpu.go:282] Add success.
W0323 16:26:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:26:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:26:13.409883  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:26:13.420337  543705 net.go:648] Add success.
I0323 16:26:13.423147  543705 net.go:770] primary dev: ETH0
I0323 16:26:13.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:26:13.423175  543705 net.go:698] Add success.
I0323 16:26:14.454977  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:26:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:26:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 16:26:14.455211  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:26:14.456576  543705 disk_worker.go:494] system disk:vda1
I0323 16:26:14.456613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:26:15.455973  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:26:16.457997  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:26:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:26:16.458104  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:26:16.472483  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:26:23.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:26:23.409803  543705 memory.go:184] no items to output this cycle
I0323 16:26:23.409820  543705 cpu.go:275] no items to output this cycle
E0323 16:26:33.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:26:33.409793  543705 memory.go:184] no items to output this cycle
I0323 16:26:33.409847  543705 cpu.go:275] no items to output this cycle
I0323 16:26:40.805699  543705 disk_info.go:125] begin check local disk info of client
I0323 16:26:40.808358  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:26:40.808366  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002676c0 0xc000267700]
E0323 16:26:43.409838  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:26:43.410952  543705 memory.go:191] Add success.
I0323 16:26:43.409881  543705 cpu.go:282] Add success.
I0323 16:26:43.419823  543705 net.go:648] Add success.
I0323 16:26:43.423106  543705 net.go:770] primary dev: ETH0
I0323 16:26:43.423122  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:26:43.423136  543705 net.go:698] Add success.
I0323 16:26:46.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:26:46.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:26:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:26:53.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:26:53.409823  543705 memory.go:184] no items to output this cycle
I0323 16:26:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 16:27:03.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:27:03.409790  543705 memory.go:184] no items to output this cycle
I0323 16:27:03.409855  543705 cpu.go:275] no items to output this cycle
E0323 16:27:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:27:13.409792  543705 memory.go:191] Add success.
I0323 16:27:13.409815  543705 cpu.go:282] Add success.
W0323 16:27:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:27:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:27:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:27:13.420279  543705 net.go:648] Add success.
I0323 16:27:13.429050  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 16:27:13.429126  543705 net.go:770] primary dev: ETH0
I0323 16:27:13.429139  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:27:13.429150  543705 net.go:698] Add success.
I0323 16:27:13.453684  543705 event_worker.go:152] Polling the log file for events...
I0323 16:27:13.464654  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e36e635a-326b-47ec-a587-f5d6dd8f252e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:27:13.464687  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 16:27:14.455177  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:27:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 16:27:14.455190  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:27:14.456850  543705 disk_worker.go:494] system disk:vda1
E0323 16:27:14.456876  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
I0323 16:27:14.456892  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:27:14.456895  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:27:14.456901  543705 custom_config.go:64] query custom config with name: gpu
E0323 16:27:15.456840  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:27:15.456848  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:27:16.457958  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 16:27:16.457966  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:27:16.458010  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:27:16.458025  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:27:16.472355  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:27:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:27:23.409777  543705 memory.go:184] no items to output this cycle
I0323 16:27:23.409806  543705 cpu.go:275] no items to output this cycle
E0323 16:27:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:27:33.409797  543705 memory.go:184] no items to output this cycle
I0323 16:27:33.409807  543705 cpu.go:275] no items to output this cycle
I0323 16:27:40.485225  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:27:40.485233  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:27:40.809686  543705 disk_info.go:125] begin check local disk info of client
I0323 16:27:40.819136  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:27:40.819145  543705 disk_info.go:196] parse disk info done, disk is : [0xc000347d80 0xc000347dc0]
I0323 16:27:43.409944  543705 cpu.go:282] Add success.
E0323 16:27:43.409966  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:27:43.410971  543705 memory.go:191] Add success.
I0323 16:27:43.420653  543705 net.go:648] Add success.
I0323 16:27:43.423526  543705 net.go:770] primary dev: ETH0
I0323 16:27:43.423539  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:27:43.423551  543705 net.go:698] Add success.
I0323 16:27:46.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:27:46.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:27:46.458097  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:27:53.409878  543705 cpu.go:275] no items to output this cycle
E0323 16:27:53.410015  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:27:53.410030  543705 memory.go:184] no items to output this cycle
E0323 16:28:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:28:03.409817  543705 memory.go:184] no items to output this cycle
I0323 16:28:03.409858  543705 cpu.go:275] no items to output this cycle
E0323 16:28:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:28:13.409840  543705 memory.go:191] Add success.
I0323 16:28:13.409843  543705 cpu.go:282] Add success.
W0323 16:28:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:28:13.409882  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:28:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:28:13.420197  543705 net.go:648] Add success.
I0323 16:28:13.422951  543705 net.go:770] primary dev: ETH0
I0323 16:28:13.422964  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:28:13.422977  543705 net.go:698] Add success.
I0323 16:28:14.453934  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:28:14.455180  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:28:14.455248  543705 disk_worker.go:708] disk space is not compliant
W0323 16:28:14.455252  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:28:14.456778  543705 disk_worker.go:494] system disk:vda1
I0323 16:28:14.456821  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:28:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:28:16.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:28:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:28:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:28:16.472486  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:28:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:28:23.409815  543705 memory.go:184] no items to output this cycle
I0323 16:28:23.409854  543705 cpu.go:275] no items to output this cycle
E0323 16:28:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:28:33.409811  543705 memory.go:184] no items to output this cycle
I0323 16:28:33.409834  543705 cpu.go:275] no items to output this cycle
I0323 16:28:40.819242  543705 disk_info.go:125] begin check local disk info of client
I0323 16:28:40.821907  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:28:40.821913  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004dbec0 0xc0004dbf00]
E0323 16:28:43.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:28:43.410739  543705 memory.go:191] Add success.
I0323 16:28:43.409874  543705 cpu.go:282] Add success.
I0323 16:28:43.419730  543705 net.go:648] Add success.
I0323 16:28:43.422586  543705 net.go:770] primary dev: ETH0
I0323 16:28:43.422601  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:28:43.422616  543705 net.go:698] Add success.
I0323 16:28:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:28:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:28:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:28:53.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:28:53.409777  543705 memory.go:184] no items to output this cycle
I0323 16:28:53.409797  543705 cpu.go:275] no items to output this cycle
E0323 16:29:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:29:03.409798  543705 cpu.go:275] no items to output this cycle
I0323 16:29:03.409805  543705 memory.go:184] no items to output this cycle
E0323 16:29:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:29:13.409793  543705 memory.go:191] Add success.
W0323 16:29:13.409820  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:29:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:29:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:29:13.409880  543705 cpu.go:282] Add success.
I0323 16:29:13.420100  543705 net.go:770] primary dev: ETH0
I0323 16:29:13.420113  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:29:13.420127  543705 net.go:698] Add success.
I0323 16:29:13.420368  543705 net.go:648] Add success.
I0323 16:29:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:29:14.455365  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:29:14.455461  543705 disk_worker.go:708] disk space is not compliant
W0323 16:29:14.455465  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:29:14.458714  543705 disk_worker.go:494] system disk:vda1
I0323 16:29:14.458766  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:29:15.455989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:29:16.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:29:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:29:16.458102  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:29:16.472493  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:29:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:29:23.409812  543705 memory.go:184] no items to output this cycle
I0323 16:29:23.409820  543705 cpu.go:275] no items to output this cycle
I0323 16:29:33.409793  543705 cpu.go:275] no items to output this cycle
E0323 16:29:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:29:33.409816  543705 memory.go:184] no items to output this cycle
I0323 16:29:40.822003  543705 disk_info.go:125] begin check local disk info of client
I0323 16:29:40.824583  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:29:40.824594  543705 disk_info.go:196] parse disk info done, disk is : [0xc000324740 0xc000324780]
E0323 16:29:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:29:43.410698  543705 memory.go:191] Add success.
I0323 16:29:43.409805  543705 cpu.go:282] Add success.
I0323 16:29:43.419724  543705 net.go:648] Add success.
I0323 16:29:43.422905  543705 net.go:770] primary dev: ETH0
I0323 16:29:43.422917  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:29:43.422930  543705 net.go:698] Add success.
I0323 16:29:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:29:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:29:46.458105  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:29:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:29:53.409821  543705 memory.go:184] no items to output this cycle
I0323 16:29:53.409833  543705 cpu.go:275] no items to output this cycle
E0323 16:30:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:30:03.409787  543705 memory.go:184] no items to output this cycle
I0323 16:30:03.409850  543705 cpu.go:275] no items to output this cycle
E0323 16:30:13.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:30:13.409843  543705 memory.go:191] Add success.
I0323 16:30:13.409850  543705 cpu.go:282] Add success.
W0323 16:30:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:30:13.409908  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:30:13.409912  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:30:13.420264  543705 net.go:648] Add success.
I0323 16:30:13.424100  543705 net.go:770] primary dev: ETH0
I0323 16:30:13.424115  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:30:13.424129  543705 net.go:698] Add success.
I0323 16:30:13.468981  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"cf835beb-9822-47e5-99c2-237c9240a96d","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:30:13.469014  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:30:14.454975  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:30:14.455149  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:30:14.455221  543705 disk_worker.go:708] disk space is not compliant
W0323 16:30:14.455225  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:30:14.456604  543705 disk_worker.go:494] system disk:vda1
I0323 16:30:14.456637  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:30:15.455999  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:30:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:30:16.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:30:16.458127  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:30:16.472534  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:30:23.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:30:23.409825  543705 memory.go:184] no items to output this cycle
I0323 16:30:23.409836  543705 cpu.go:275] no items to output this cycle
E0323 16:30:33.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:30:33.409803  543705 memory.go:184] no items to output this cycle
I0323 16:30:33.409851  543705 cpu.go:275] no items to output this cycle
I0323 16:30:40.486257  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:30:40.486267  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:30:40.825684  543705 disk_info.go:125] begin check local disk info of client
I0323 16:30:40.828287  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:30:40.828295  543705 disk_info.go:196] parse disk info done, disk is : [0xc000481300 0xc000481340]
E0323 16:30:43.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:30:43.410792  543705 memory.go:191] Add success.
I0323 16:30:43.409848  543705 cpu.go:282] Add success.
I0323 16:30:43.419735  543705 net.go:648] Add success.
I0323 16:30:43.422231  543705 net.go:770] primary dev: ETH0
I0323 16:30:43.422244  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:30:43.422261  543705 net.go:698] Add success.
I0323 16:30:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:30:46.458079  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:30:46.458109  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:30:53.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:30:53.409797  543705 cpu.go:275] no items to output this cycle
I0323 16:30:53.409809  543705 memory.go:184] no items to output this cycle
E0323 16:31:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:31:03.409783  543705 memory.go:184] no items to output this cycle
I0323 16:31:03.409788  543705 cpu.go:275] no items to output this cycle
W0323 16:31:13.409713  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:31:13.409736  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:31:13.409742  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 16:31:13.409835  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:31:13.409835  543705 cpu.go:282] Add success.
I0323 16:31:13.409855  543705 memory.go:191] Add success.
I0323 16:31:13.420083  543705 net.go:648] Add success.
I0323 16:31:13.422634  543705 net.go:770] primary dev: ETH0
I0323 16:31:13.422647  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:31:13.422659  543705 net.go:698] Add success.
I0323 16:31:14.454968  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:31:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:31:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 16:31:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:31:14.456599  543705 disk_worker.go:494] system disk:vda1
I0323 16:31:14.456628  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:31:15.455962  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:31:16.457985  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:31:16.458059  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:31:16.458088  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:31:16.472547  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:31:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:31:23.409809  543705 memory.go:184] no items to output this cycle
I0323 16:31:23.409813  543705 cpu.go:275] no items to output this cycle
E0323 16:31:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:31:33.409792  543705 memory.go:184] no items to output this cycle
I0323 16:31:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 16:31:40.829680  543705 disk_info.go:125] begin check local disk info of client
I0323 16:31:40.832243  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:31:40.832250  543705 disk_info.go:196] parse disk info done, disk is : [0xc000371040 0xc000371080]
E0323 16:31:43.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:31:43.410679  543705 memory.go:191] Add success.
I0323 16:31:43.409825  543705 cpu.go:282] Add success.
I0323 16:31:43.420532  543705 net.go:648] Add success.
I0323 16:31:43.423413  543705 net.go:770] primary dev: ETH0
I0323 16:31:43.423425  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:31:43.423437  543705 net.go:698] Add success.
I0323 16:31:46.458012  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:31:46.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:31:46.458151  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:31:53.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:31:53.409819  543705 memory.go:184] no items to output this cycle
I0323 16:31:53.409828  543705 cpu.go:275] no items to output this cycle
E0323 16:32:03.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:32:03.409780  543705 memory.go:184] no items to output this cycle
I0323 16:32:03.409810  543705 cpu.go:275] no items to output this cycle
W0323 16:32:13.409732  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:32:13.409751  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:32:13.409756  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:32:13.409829  543705 cpu.go:282] Add success.
E0323 16:32:13.409857  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:32:13.409880  543705 memory.go:191] Add success.
I0323 16:32:13.420081  543705 net.go:648] Add success.
I0323 16:32:13.422930  543705 net.go:770] primary dev: ETH0
I0323 16:32:13.422954  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:32:13.422966  543705 net.go:698] Add success.
W0323 16:32:14.455207  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:32:14.455223  543705 disk_worker.go:708] disk space is not compliant
W0323 16:32:14.455225  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:32:14.456120  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:32:14.456131  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:32:14.456137  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:32:14.456663  543705 disk_worker.go:494] system disk:vda1
I0323 16:32:14.456700  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:32:15.457086  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:32:15.457101  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:32:16.458099  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:32:16.458174  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:32:16.458200  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:32:16.458288  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:32:16.472620  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:32:23.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:32:23.409806  543705 memory.go:184] no items to output this cycle
I0323 16:32:23.409816  543705 cpu.go:275] no items to output this cycle
E0323 16:32:33.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:32:33.409788  543705 memory.go:184] no items to output this cycle
I0323 16:32:33.409842  543705 cpu.go:275] no items to output this cycle
I0323 16:32:40.833693  543705 disk_info.go:125] begin check local disk info of client
I0323 16:32:40.837542  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:32:40.837551  543705 disk_info.go:196] parse disk info done, disk is : [0xc00029a780 0xc00029a7c0]
E0323 16:32:43.409909  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:32:43.410727  543705 memory.go:191] Add success.
I0323 16:32:43.409939  543705 cpu.go:282] Add success.
I0323 16:32:43.419835  543705 net.go:648] Add success.
I0323 16:32:43.422760  543705 net.go:770] primary dev: ETH0
I0323 16:32:43.422775  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:32:43.422786  543705 net.go:698] Add success.
I0323 16:32:46.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:32:46.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:32:46.458108  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:32:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:32:53.409791  543705 memory.go:184] no items to output this cycle
I0323 16:32:53.409829  543705 cpu.go:275] no items to output this cycle
E0323 16:33:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:33:03.409794  543705 memory.go:184] no items to output this cycle
I0323 16:33:03.409835  543705 cpu.go:275] no items to output this cycle
E0323 16:33:13.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:33:13.409805  543705 memory.go:191] Add success.
W0323 16:33:13.409831  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:33:13.409834  543705 cpu.go:282] Add success.
W0323 16:33:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:33:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:33:13.420184  543705 net.go:648] Add success.
I0323 16:33:13.423121  543705 net.go:770] primary dev: ETH0
I0323 16:33:13.423136  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:33:13.423151  543705 net.go:698] Add success.
I0323 16:33:13.469569  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"6d070b08-6f29-4e77-81a6-97ace4865bf5","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:33:13.469605  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:33:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:33:14.455182  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:33:14.455251  543705 disk_worker.go:708] disk space is not compliant
W0323 16:33:14.455254  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:33:14.456785  543705 disk_worker.go:494] system disk:vda1
I0323 16:33:14.456818  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:33:15.456000  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:33:16.458018  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:33:16.458113  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:33:16.458146  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:33:16.472551  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:33:23.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:33:23.409785  543705 cpu.go:275] no items to output this cycle
I0323 16:33:23.409795  543705 memory.go:184] no items to output this cycle
E0323 16:33:33.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:33:33.409871  543705 memory.go:184] no items to output this cycle
I0323 16:33:33.409940  543705 cpu.go:275] no items to output this cycle
I0323 16:33:40.487207  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:33:40.487214  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:33:40.837672  543705 disk_info.go:125] begin check local disk info of client
I0323 16:33:40.840232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:33:40.840239  543705 disk_info.go:196] parse disk info done, disk is : [0xc000486000 0xc000486040]
E0323 16:33:43.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:33:43.410611  543705 memory.go:191] Add success.
I0323 16:33:43.409819  543705 cpu.go:282] Add success.
I0323 16:33:43.420306  543705 net.go:648] Add success.
I0323 16:33:43.422818  543705 net.go:770] primary dev: ETH0
I0323 16:33:43.422831  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:33:43.422842  543705 net.go:698] Add success.
I0323 16:33:46.457989  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:33:46.458055  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:33:46.458079  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:33:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:33:53.409788  543705 memory.go:184] no items to output this cycle
I0323 16:33:53.409812  543705 cpu.go:275] no items to output this cycle
E0323 16:34:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:34:03.409784  543705 memory.go:184] no items to output this cycle
I0323 16:34:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 16:34:13.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:34:13.409815  543705 memory.go:191] Add success.
I0323 16:34:13.409826  543705 cpu.go:282] Add success.
W0323 16:34:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:34:13.409862  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:34:13.409866  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:34:13.420673  543705 net.go:648] Add success.
I0323 16:34:13.423885  543705 net.go:770] primary dev: ETH0
I0323 16:34:13.423898  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:34:13.423911  543705 net.go:698] Add success.
I0323 16:34:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:34:14.455190  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:34:14.455201  543705 disk_worker.go:708] disk space is not compliant
W0323 16:34:14.455204  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:34:14.456586  543705 disk_worker.go:494] system disk:vda1
I0323 16:34:14.456617  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:34:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:34:16.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:34:16.458049  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:34:16.458080  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:34:16.472413  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:34:23.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:34:23.409890  543705 memory.go:184] no items to output this cycle
I0323 16:34:23.409896  543705 cpu.go:275] no items to output this cycle
E0323 16:34:33.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:34:33.409792  543705 cpu.go:275] no items to output this cycle
I0323 16:34:33.409804  543705 memory.go:184] no items to output this cycle
I0323 16:34:40.841688  543705 disk_info.go:125] begin check local disk info of client
I0323 16:34:40.844374  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:34:40.844381  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4540 0xc0000c4580]
E0323 16:34:43.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:34:43.410831  543705 memory.go:191] Add success.
I0323 16:34:43.409807  543705 cpu.go:282] Add success.
I0323 16:34:43.420595  543705 net.go:648] Add success.
I0323 16:34:43.423599  543705 net.go:770] primary dev: ETH0
I0323 16:34:43.423614  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:34:43.423629  543705 net.go:698] Add success.
I0323 16:34:46.457966  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:34:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:34:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:34:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:34:53.409798  543705 memory.go:184] no items to output this cycle
I0323 16:34:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 16:35:03.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:35:03.409817  543705 memory.go:184] no items to output this cycle
I0323 16:35:03.409840  543705 cpu.go:275] no items to output this cycle
E0323 16:35:13.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:35:13.409807  543705 memory.go:191] Add success.
I0323 16:35:13.409830  543705 cpu.go:282] Add success.
W0323 16:35:13.409835  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:35:13.409849  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:35:13.409851  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:35:13.420172  543705 net.go:648] Add success.
I0323 16:35:13.422644  543705 net.go:770] primary dev: ETH0
I0323 16:35:13.422659  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:35:13.422674  543705 net.go:698] Add success.
I0323 16:35:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:35:14.455222  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:35:14.455235  543705 disk_worker.go:708] disk space is not compliant
W0323 16:35:14.455238  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:35:14.456719  543705 disk_worker.go:494] system disk:vda1
I0323 16:35:14.456757  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:35:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:35:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:35:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:35:16.458105  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:35:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:35:23.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:35:23.409779  543705 memory.go:184] no items to output this cycle
I0323 16:35:23.409801  543705 cpu.go:275] no items to output this cycle
E0323 16:35:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:35:33.409784  543705 memory.go:184] no items to output this cycle
I0323 16:35:33.409810  543705 cpu.go:275] no items to output this cycle
I0323 16:35:40.845678  543705 disk_info.go:125] begin check local disk info of client
I0323 16:35:40.848267  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:35:40.848274  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0280 0xc0002a02c0]
E0323 16:35:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:35:43.410644  543705 memory.go:191] Add success.
I0323 16:35:43.409808  543705 cpu.go:282] Add success.
I0323 16:35:43.420402  543705 net.go:648] Add success.
I0323 16:35:43.423301  543705 net.go:770] primary dev: ETH0
I0323 16:35:43.423314  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:35:43.423327  543705 net.go:698] Add success.
I0323 16:35:46.457968  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:35:46.458039  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:35:46.458064  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:35:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:35:53.409809  543705 cpu.go:275] no items to output this cycle
I0323 16:35:53.409821  543705 memory.go:184] no items to output this cycle
E0323 16:36:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:36:03.409785  543705 memory.go:184] no items to output this cycle
I0323 16:36:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 16:36:13.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:36:13.409820  543705 memory.go:191] Add success.
I0323 16:36:13.409826  543705 cpu.go:282] Add success.
W0323 16:36:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:36:13.409868  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:36:13.409872  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:36:13.420146  543705 net.go:648] Add success.
I0323 16:36:13.422933  543705 net.go:770] primary dev: ETH0
I0323 16:36:13.422948  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:36:13.422962  543705 net.go:698] Add success.
I0323 16:36:13.463506  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"92530ece-45b7-4fcb-b604-93d992e9187e","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:36:13.463541  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:36:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:36:14.455107  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:36:14.455171  543705 disk_worker.go:708] disk space is not compliant
W0323 16:36:14.455174  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:36:14.456541  543705 disk_worker.go:494] system disk:vda1
I0323 16:36:14.456596  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:36:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:36:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:36:16.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:36:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:36:16.472414  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:36:23.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:36:23.409798  543705 memory.go:184] no items to output this cycle
I0323 16:36:23.409811  543705 cpu.go:275] no items to output this cycle
E0323 16:36:33.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:36:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 16:36:33.409829  543705 memory.go:184] no items to output this cycle
I0323 16:36:40.488604  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:36:40.488614  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:36:40.849345  543705 disk_info.go:125] begin check local disk info of client
I0323 16:36:40.852061  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:36:40.852069  543705 disk_info.go:196] parse disk info done, disk is : [0xc000483240 0xc000483280]
E0323 16:36:43.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:36:43.410640  543705 memory.go:191] Add success.
I0323 16:36:43.409852  543705 cpu.go:282] Add success.
I0323 16:36:43.420351  543705 net.go:648] Add success.
I0323 16:36:43.422975  543705 net.go:770] primary dev: ETH0
I0323 16:36:43.422989  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:36:43.423004  543705 net.go:698] Add success.
I0323 16:36:46.458004  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:36:46.458102  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:36:46.458133  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:36:53.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:36:53.409833  543705 memory.go:184] no items to output this cycle
I0323 16:36:53.409854  543705 cpu.go:275] no items to output this cycle
E0323 16:37:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:37:03.409806  543705 memory.go:184] no items to output this cycle
I0323 16:37:03.409823  543705 cpu.go:275] no items to output this cycle
E0323 16:37:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:37:13.409837  543705 memory.go:191] Add success.
I0323 16:37:13.409842  543705 cpu.go:282] Add success.
W0323 16:37:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:37:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:37:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:37:13.420557  543705 net.go:648] Add success.
I0323 16:37:13.423297  543705 net.go:770] primary dev: ETH0
I0323 16:37:13.423312  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:37:13.423326  543705 net.go:698] Add success.
I0323 16:37:13.452977  543705 event_worker.go:152] Polling the log file for events...
W0323 16:37:14.455205  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:37:14.455220  543705 disk_worker.go:708] disk space is not compliant
W0323 16:37:14.455222  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:37:14.456119  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:37:14.456131  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:37:14.456137  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:37:14.457011  543705 disk_worker.go:494] system disk:vda1
I0323 16:37:14.457050  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:37:15.457045  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:37:15.457060  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:37:16.458106  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:37:16.458178  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:37:16.458207  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:37:16.458294  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:37:16.472640  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:37:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:37:23.409808  543705 memory.go:184] no items to output this cycle
I0323 16:37:23.409819  543705 cpu.go:275] no items to output this cycle
E0323 16:37:33.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:37:33.409786  543705 memory.go:184] no items to output this cycle
I0323 16:37:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 16:37:40.853683  543705 disk_info.go:125] begin check local disk info of client
I0323 16:37:40.856330  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:37:40.856337  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007ba40 0xc00007ba80]
E0323 16:37:43.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:37:43.410627  543705 memory.go:191] Add success.
I0323 16:37:43.409868  543705 cpu.go:282] Add success.
I0323 16:37:43.420443  543705 net.go:648] Add success.
I0323 16:37:43.423029  543705 net.go:770] primary dev: ETH0
I0323 16:37:43.423044  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:37:43.423058  543705 net.go:698] Add success.
I0323 16:37:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:37:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:37:46.458092  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:37:53.410591  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:37:53.410610  543705 memory.go:184] no items to output this cycle
I0323 16:37:53.410624  543705 cpu.go:275] no items to output this cycle
E0323 16:38:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:38:03.409783  543705 memory.go:184] no items to output this cycle
I0323 16:38:03.409792  543705 cpu.go:275] no items to output this cycle
E0323 16:38:13.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:38:13.409838  543705 memory.go:191] Add success.
I0323 16:38:13.409853  543705 cpu.go:282] Add success.
W0323 16:38:13.409870  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:38:13.409886  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:38:13.409890  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:38:13.420365  543705 net.go:648] Add success.
I0323 16:38:13.423292  543705 net.go:770] primary dev: ETH0
I0323 16:38:13.423311  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:38:13.423330  543705 net.go:698] Add success.
I0323 16:38:14.454982  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:38:14.455195  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:38:14.455207  543705 disk_worker.go:708] disk space is not compliant
W0323 16:38:14.455210  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:38:14.456865  543705 disk_worker.go:494] system disk:vda1
I0323 16:38:14.456897  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:38:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:38:16.458011  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:38:16.458095  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:38:16.458123  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:38:16.472659  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:38:23.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:38:23.409776  543705 memory.go:184] no items to output this cycle
I0323 16:38:23.409808  543705 cpu.go:275] no items to output this cycle
E0323 16:38:33.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:38:33.409780  543705 memory.go:184] no items to output this cycle
I0323 16:38:33.409783  543705 cpu.go:275] no items to output this cycle
I0323 16:38:40.857675  543705 disk_info.go:125] begin check local disk info of client
I0323 16:38:40.860201  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:38:40.860207  543705 disk_info.go:196] parse disk info done, disk is : [0xc000327a80 0xc000327ac0]
E0323 16:38:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:38:43.410674  543705 memory.go:191] Add success.
I0323 16:38:43.409841  543705 cpu.go:282] Add success.
I0323 16:38:43.420386  543705 net.go:648] Add success.
I0323 16:38:43.423230  543705 net.go:770] primary dev: ETH0
I0323 16:38:43.423245  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:38:43.423260  543705 net.go:698] Add success.
I0323 16:38:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:38:46.458066  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:38:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:38:53.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:38:53.409818  543705 memory.go:184] no items to output this cycle
I0323 16:38:53.409829  543705 cpu.go:275] no items to output this cycle
E0323 16:39:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:39:03.409782  543705 memory.go:184] no items to output this cycle
I0323 16:39:03.409803  543705 cpu.go:275] no items to output this cycle
E0323 16:39:13.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:39:13.409796  543705 memory.go:191] Add success.
W0323 16:39:13.409824  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:39:13.409829  543705 cpu.go:282] Add success.
W0323 16:39:13.409836  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:39:13.409839  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:39:13.420188  543705 net.go:648] Add success.
I0323 16:39:13.423229  543705 net.go:770] primary dev: ETH0
I0323 16:39:13.423242  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:39:13.423255  543705 net.go:698] Add success.
I0323 16:39:13.462803  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bb8d6dd6-2a93-44fe-969b-5b37b8d095fb","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:39:13.462838  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:39:14.454998  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:39:14.455214  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:39:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 16:39:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:39:14.456649  543705 disk_worker.go:494] system disk:vda1
I0323 16:39:14.456684  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:39:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:39:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:39:16.458073  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:39:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:39:16.472470  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:39:23.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:39:23.409813  543705 memory.go:184] no items to output this cycle
I0323 16:39:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 16:39:33.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:39:33.409790  543705 memory.go:184] no items to output this cycle
I0323 16:39:33.409791  543705 cpu.go:275] no items to output this cycle
I0323 16:39:40.489218  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:39:40.489226  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:39:40.861670  543705 disk_info.go:125] begin check local disk info of client
I0323 16:39:40.864224  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:39:40.864230  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003852c0 0xc000385300]
E0323 16:39:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:39:43.410558  543705 memory.go:191] Add success.
I0323 16:39:43.409816  543705 cpu.go:282] Add success.
I0323 16:39:43.420265  543705 net.go:648] Add success.
I0323 16:39:43.422880  543705 net.go:770] primary dev: ETH0
I0323 16:39:43.422894  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:39:43.422907  543705 net.go:698] Add success.
I0323 16:39:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:39:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:39:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:39:53.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:39:53.409789  543705 memory.go:184] no items to output this cycle
I0323 16:39:53.409838  543705 cpu.go:275] no items to output this cycle
E0323 16:40:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:40:03.409789  543705 memory.go:184] no items to output this cycle
I0323 16:40:03.409796  543705 cpu.go:275] no items to output this cycle
E0323 16:40:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:40:13.409824  543705 memory.go:191] Add success.
I0323 16:40:13.409835  543705 cpu.go:282] Add success.
W0323 16:40:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:40:13.409873  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:40:13.409877  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:40:13.420246  543705 net.go:648] Add success.
I0323 16:40:13.423097  543705 net.go:770] primary dev: ETH0
I0323 16:40:13.423110  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:40:13.423121  543705 net.go:698] Add success.
I0323 16:40:14.454958  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:40:14.455077  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:40:14.455137  543705 disk_worker.go:708] disk space is not compliant
W0323 16:40:14.455140  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:40:14.456480  543705 disk_worker.go:494] system disk:vda1
I0323 16:40:14.456523  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:40:15.455967  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:40:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:40:16.458031  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:40:16.458054  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:40:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:40:23.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:40:23.409807  543705 memory.go:184] no items to output this cycle
I0323 16:40:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 16:40:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:40:33.409794  543705 memory.go:184] no items to output this cycle
I0323 16:40:33.409841  543705 cpu.go:275] no items to output this cycle
I0323 16:40:40.865678  543705 disk_info.go:125] begin check local disk info of client
I0323 16:40:40.868252  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:40:40.868260  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f29c0 0xc0004f2a00]
E0323 16:40:43.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:40:43.410970  543705 memory.go:191] Add success.
I0323 16:40:43.409836  543705 cpu.go:282] Add success.
I0323 16:40:43.420693  543705 net.go:648] Add success.
I0323 16:40:43.423497  543705 net.go:770] primary dev: ETH0
I0323 16:40:43.423510  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:40:43.423522  543705 net.go:698] Add success.
I0323 16:40:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:40:46.458067  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:40:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:40:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:40:53.409798  543705 memory.go:184] no items to output this cycle
I0323 16:40:53.409801  543705 cpu.go:275] no items to output this cycle
E0323 16:41:03.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:41:03.409787  543705 memory.go:184] no items to output this cycle
I0323 16:41:03.409816  543705 cpu.go:275] no items to output this cycle
E0323 16:41:13.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:41:13.409817  543705 memory.go:191] Add success.
I0323 16:41:13.409817  543705 cpu.go:282] Add success.
W0323 16:41:13.409846  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:41:13.409858  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:41:13.409861  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:41:13.420462  543705 net.go:648] Add success.
I0323 16:41:13.423059  543705 net.go:770] primary dev: ETH0
I0323 16:41:13.423073  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:41:13.423085  543705 net.go:698] Add success.
I0323 16:41:14.453934  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:41:14.455398  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:41:14.455508  543705 disk_worker.go:708] disk space is not compliant
W0323 16:41:14.455513  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:41:14.457536  543705 disk_worker.go:494] system disk:vda1
I0323 16:41:14.457580  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:41:15.455965  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:41:16.458044  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:41:16.458148  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:41:16.458182  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:41:16.472875  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:41:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:41:23.409818  543705 memory.go:184] no items to output this cycle
I0323 16:41:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 16:41:33.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:41:33.409823  543705 memory.go:184] no items to output this cycle
I0323 16:41:33.409833  543705 cpu.go:275] no items to output this cycle
I0323 16:41:40.869687  543705 disk_info.go:125] begin check local disk info of client
I0323 16:41:40.872261  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:41:40.872267  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0540 0xc0002a0580]
E0323 16:41:43.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:41:43.410688  543705 memory.go:191] Add success.
I0323 16:41:43.409820  543705 cpu.go:282] Add success.
I0323 16:41:43.420386  543705 net.go:648] Add success.
I0323 16:41:43.423014  543705 net.go:770] primary dev: ETH0
I0323 16:41:43.423029  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:41:43.423044  543705 net.go:698] Add success.
I0323 16:41:46.457992  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:41:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:41:46.458100  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:41:53.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:41:53.409801  543705 memory.go:184] no items to output this cycle
I0323 16:41:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 16:42:03.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:42:03.409816  543705 memory.go:184] no items to output this cycle
I0323 16:42:03.409832  543705 cpu.go:275] no items to output this cycle
E0323 16:42:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:42:13.409835  543705 memory.go:191] Add success.
I0323 16:42:13.409843  543705 cpu.go:282] Add success.
W0323 16:42:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:42:13.409884  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:42:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:42:13.420337  543705 net.go:648] Add success.
I0323 16:42:13.421468  543705 net.go:770] primary dev: ETH0
I0323 16:42:13.421484  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:42:13.421499  543705 net.go:698] Add success.
I0323 16:42:13.468660  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef7cd24c-4849-48ad-adce-3bc4d2ece6fc","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:42:13.468705  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 16:42:14.455611  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:42:14.455639  543705 disk_worker.go:708] disk space is not compliant
W0323 16:42:14.455643  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:42:14.456381  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:42:14.456391  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:42:14.456398  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:42:14.457210  543705 disk_worker.go:494] system disk:vda1
I0323 16:42:14.457253  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:42:15.457124  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:42:15.457139  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:42:16.458159  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:42:16.458235  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:42:16.458260  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:42:16.458268  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:42:16.472702  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:42:23.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:42:23.409804  543705 cpu.go:275] no items to output this cycle
I0323 16:42:23.409805  543705 memory.go:184] no items to output this cycle
E0323 16:42:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:42:33.409820  543705 memory.go:184] no items to output this cycle
I0323 16:42:33.409833  543705 cpu.go:275] no items to output this cycle
I0323 16:42:40.490206  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:42:40.490213  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:42:40.873671  543705 disk_info.go:125] begin check local disk info of client
I0323 16:42:40.876232  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:42:40.876238  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4900 0xc0000c4940]
E0323 16:42:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:42:43.410707  543705 memory.go:191] Add success.
I0323 16:42:43.409810  543705 cpu.go:282] Add success.
I0323 16:42:43.420426  543705 net.go:648] Add success.
I0323 16:42:43.423227  543705 net.go:770] primary dev: ETH0
I0323 16:42:43.423241  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:42:43.423253  543705 net.go:698] Add success.
I0323 16:42:46.457689  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:42:46.457801  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:42:46.457833  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:42:53.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:42:53.409821  543705 memory.go:184] no items to output this cycle
I0323 16:42:53.409834  543705 cpu.go:275] no items to output this cycle
E0323 16:43:03.409833  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:43:03.409855  543705 memory.go:184] no items to output this cycle
I0323 16:43:03.409987  543705 cpu.go:275] no items to output this cycle
E0323 16:43:13.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:43:13.409802  543705 memory.go:191] Add success.
I0323 16:43:13.409827  543705 cpu.go:282] Add success.
W0323 16:43:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:43:13.409843  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:43:13.409845  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:43:13.420182  543705 net.go:648] Add success.
I0323 16:43:13.422956  543705 net.go:770] primary dev: ETH0
I0323 16:43:13.422969  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:43:13.422981  543705 net.go:698] Add success.
I0323 16:43:14.454964  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:43:14.455089  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:43:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 16:43:14.455250  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:43:14.456677  543705 disk_worker.go:494] system disk:vda1
I0323 16:43:14.456735  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:43:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:43:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:43:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:43:16.458061  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:43:16.472369  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:43:23.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:43:23.409781  543705 memory.go:184] no items to output this cycle
I0323 16:43:23.409793  543705 cpu.go:275] no items to output this cycle
E0323 16:43:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:43:33.409781  543705 memory.go:184] no items to output this cycle
I0323 16:43:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 16:43:40.877676  543705 disk_info.go:125] begin check local disk info of client
I0323 16:43:40.880234  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:43:40.880241  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004a85c0 0xc0004a8600]
E0323 16:43:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:43:43.410726  543705 memory.go:191] Add success.
I0323 16:43:43.409804  543705 cpu.go:282] Add success.
I0323 16:43:43.420431  543705 net.go:648] Add success.
I0323 16:43:43.423511  543705 net.go:770] primary dev: ETH0
I0323 16:43:43.423524  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:43:43.423536  543705 net.go:698] Add success.
I0323 16:43:46.457979  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:43:46.458045  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:43:46.458070  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:43:53.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:43:53.409795  543705 memory.go:184] no items to output this cycle
I0323 16:43:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 16:44:03.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:44:03.409825  543705 memory.go:184] no items to output this cycle
I0323 16:44:03.409841  543705 cpu.go:275] no items to output this cycle
E0323 16:44:13.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:44:13.409839  543705 memory.go:191] Add success.
I0323 16:44:13.409846  543705 cpu.go:282] Add success.
W0323 16:44:13.409872  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:44:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:44:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:44:13.420209  543705 net.go:648] Add success.
I0323 16:44:13.423052  543705 net.go:770] primary dev: ETH0
I0323 16:44:13.423065  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:44:13.423078  543705 net.go:698] Add success.
I0323 16:44:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:44:14.455472  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:44:14.455485  543705 disk_worker.go:708] disk space is not compliant
W0323 16:44:14.455488  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:44:14.457486  543705 disk_worker.go:494] system disk:vda1
I0323 16:44:14.457516  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:44:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:44:16.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:44:16.458071  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:44:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:44:16.472465  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:44:23.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:44:23.409795  543705 memory.go:184] no items to output this cycle
I0323 16:44:23.409853  543705 cpu.go:275] no items to output this cycle
E0323 16:44:33.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:44:33.409796  543705 memory.go:184] no items to output this cycle
I0323 16:44:33.409819  543705 cpu.go:275] no items to output this cycle
I0323 16:44:40.881686  543705 disk_info.go:125] begin check local disk info of client
I0323 16:44:40.884479  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:44:40.884487  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa480 0xc0001aa4c0]
E0323 16:44:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:44:43.410592  543705 memory.go:191] Add success.
I0323 16:44:43.409829  543705 cpu.go:282] Add success.
I0323 16:44:43.420378  543705 net.go:648] Add success.
I0323 16:44:43.422958  543705 net.go:770] primary dev: ETH0
I0323 16:44:43.422973  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:44:43.422988  543705 net.go:698] Add success.
I0323 16:44:46.458025  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:44:46.458124  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:44:46.458157  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:44:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:44:53.409825  543705 memory.go:184] no items to output this cycle
I0323 16:44:53.409830  543705 cpu.go:275] no items to output this cycle
E0323 16:45:03.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:45:03.409797  543705 memory.go:184] no items to output this cycle
I0323 16:45:03.409808  543705 cpu.go:275] no items to output this cycle
I0323 16:45:13.413942  543705 cpu.go:282] Add success.
E0323 16:45:13.414232  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:45:13.414255  543705 memory.go:191] Add success.
W0323 16:45:13.414289  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:45:13.414307  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:45:13.414312  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:45:13.429822  543705 net.go:648] Add success.
I0323 16:45:13.462447  543705 net.go:770] primary dev: ETH0
I0323 16:45:13.462468  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:45:13.462487  543705 net.go:698] Add success.
I0323 16:45:13.489802  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"79ff69fc-81df-4f13-8d23-851cd08f56c3","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:45:13.489856  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:45:14.455108  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:45:14.455200  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:45:14.455212  543705 disk_worker.go:708] disk space is not compliant
W0323 16:45:14.455215  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:45:14.456659  543705 disk_worker.go:494] system disk:vda1
I0323 16:45:14.456725  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:45:15.456007  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:45:16.457609  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:45:16.457721  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:45:16.457760  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:45:16.472258  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:45:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:45:23.409798  543705 memory.go:184] no items to output this cycle
I0323 16:45:23.409857  543705 cpu.go:275] no items to output this cycle
E0323 16:45:33.409754  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:45:33.409776  543705 memory.go:184] no items to output this cycle
I0323 16:45:33.409821  543705 cpu.go:275] no items to output this cycle
I0323 16:45:40.491237  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:45:40.491244  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:45:40.885700  543705 disk_info.go:125] begin check local disk info of client
I0323 16:45:40.891657  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:45:40.891665  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f3440 0xc0004f3480]
E0323 16:45:43.409830  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:45:43.409867  543705 memory.go:191] Add success.
I0323 16:45:43.410352  543705 cpu.go:282] Add success.
I0323 16:45:43.420037  543705 net.go:648] Add success.
I0323 16:45:43.421159  543705 net.go:770] primary dev: ETH0
I0323 16:45:43.421179  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:45:43.421200  543705 net.go:698] Add success.
I0323 16:45:46.457994  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:45:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:45:46.458095  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:45:53.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:45:53.409792  543705 memory.go:184] no items to output this cycle
I0323 16:45:53.409900  543705 cpu.go:275] no items to output this cycle
E0323 16:46:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:46:03.409825  543705 memory.go:184] no items to output this cycle
I0323 16:46:03.409834  543705 cpu.go:275] no items to output this cycle
E0323 16:46:13.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:46:13.409791  543705 memory.go:191] Add success.
W0323 16:46:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:46:13.409819  543705 cpu.go:282] Add success.
W0323 16:46:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:46:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:46:13.420123  543705 net.go:648] Add success.
I0323 16:46:13.423038  543705 net.go:770] primary dev: ETH0
I0323 16:46:13.423051  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:46:13.423063  543705 net.go:698] Add success.
I0323 16:46:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:46:14.455160  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:46:14.455170  543705 disk_worker.go:708] disk space is not compliant
W0323 16:46:14.455173  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:46:14.456529  543705 disk_worker.go:494] system disk:vda1
I0323 16:46:14.456579  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:46:15.455956  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:46:16.457973  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:46:16.458037  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:46:16.458058  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:46:16.472372  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:46:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:46:23.409776  543705 memory.go:184] no items to output this cycle
I0323 16:46:23.409798  543705 cpu.go:275] no items to output this cycle
E0323 16:46:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:46:33.409793  543705 memory.go:184] no items to output this cycle
I0323 16:46:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 16:46:40.893680  543705 disk_info.go:125] begin check local disk info of client
I0323 16:46:40.896301  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:46:40.896309  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004608c0 0xc000460900]
E0323 16:46:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:46:43.410725  543705 memory.go:191] Add success.
I0323 16:46:43.409822  543705 cpu.go:282] Add success.
I0323 16:46:43.420545  543705 net.go:648] Add success.
I0323 16:46:43.423363  543705 net.go:770] primary dev: ETH0
I0323 16:46:43.423376  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:46:43.423389  543705 net.go:698] Add success.
I0323 16:46:46.458010  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:46:46.458093  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:46:46.458124  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:46:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:46:53.409790  543705 memory.go:184] no items to output this cycle
I0323 16:46:53.409792  543705 cpu.go:275] no items to output this cycle
E0323 16:47:03.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:47:03.409793  543705 memory.go:184] no items to output this cycle
I0323 16:47:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 16:47:13.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:47:13.409802  543705 memory.go:191] Add success.
W0323 16:47:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:47:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:47:13.409840  543705 cpu.go:282] Add success.
I0323 16:47:13.409842  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:47:13.420156  543705 net.go:648] Add success.
I0323 16:47:13.422954  543705 net.go:770] primary dev: ETH0
I0323 16:47:13.422967  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:47:13.422979  543705 net.go:698] Add success.
I0323 16:47:13.453545  543705 event_worker.go:152] Polling the log file for events...
W0323 16:47:14.454425  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:47:14.454523  543705 disk_worker.go:708] disk space is not compliant
W0323 16:47:14.454528  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:47:14.455358  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:47:14.455369  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:47:14.455375  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:47:14.456268  543705 disk_worker.go:494] system disk:vda1
I0323 16:47:14.456303  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:47:15.457097  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:47:15.457112  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:47:16.458152  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:47:16.458224  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:47:16.458251  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:47:16.458256  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:47:16.472783  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:47:23.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:47:23.409787  543705 memory.go:184] no items to output this cycle
I0323 16:47:23.409847  543705 cpu.go:275] no items to output this cycle
E0323 16:47:33.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:47:33.409786  543705 memory.go:184] no items to output this cycle
I0323 16:47:33.409793  543705 cpu.go:275] no items to output this cycle
I0323 16:47:40.897674  543705 disk_info.go:125] begin check local disk info of client
I0323 16:47:40.900313  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:47:40.900320  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a0a40 0xc0002a0a80]
E0323 16:47:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:47:43.410760  543705 memory.go:191] Add success.
I0323 16:47:43.409860  543705 cpu.go:282] Add success.
I0323 16:47:43.420509  543705 net.go:648] Add success.
I0323 16:47:43.423583  543705 net.go:770] primary dev: ETH0
I0323 16:47:43.423602  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:47:43.423621  543705 net.go:698] Add success.
I0323 16:47:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:47:46.458083  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:47:46.458114  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:47:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:47:53.409791  543705 memory.go:184] no items to output this cycle
I0323 16:47:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 16:48:03.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:48:03.409796  543705 cpu.go:275] no items to output this cycle
I0323 16:48:03.409804  543705 memory.go:184] no items to output this cycle
E0323 16:48:13.409757  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:48:13.409781  543705 memory.go:191] Add success.
I0323 16:48:13.409792  543705 cpu.go:282] Add success.
W0323 16:48:13.409806  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:48:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:48:13.409821  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:48:13.420122  543705 net.go:648] Add success.
I0323 16:48:13.422672  543705 net.go:770] primary dev: ETH0
I0323 16:48:13.422688  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:48:13.422699  543705 net.go:698] Add success.
I0323 16:48:13.463695  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"024b0c85-8ed9-4471-8aa0-137e906c2dee","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:48:13.463728  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:48:14.454971  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:48:14.455192  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:48:14.455203  543705 disk_worker.go:708] disk space is not compliant
W0323 16:48:14.455206  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:48:14.456632  543705 disk_worker.go:494] system disk:vda1
I0323 16:48:14.456663  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:48:15.455964  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:48:16.457963  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:48:16.458024  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:48:16.458121  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:48:16.472091  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:48:23.409759  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:48:23.409775  543705 memory.go:184] no items to output this cycle
I0323 16:48:23.409791  543705 cpu.go:275] no items to output this cycle
E0323 16:48:33.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:48:33.409808  543705 memory.go:184] no items to output this cycle
I0323 16:48:33.409822  543705 cpu.go:275] no items to output this cycle
I0323 16:48:40.492218  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:48:40.492225  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:48:40.901669  543705 disk_info.go:125] begin check local disk info of client
I0323 16:48:40.904240  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:48:40.904246  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007b040 0xc00007b080]
E0323 16:48:43.409756  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:48:43.410649  543705 memory.go:191] Add success.
I0323 16:48:43.409805  543705 cpu.go:282] Add success.
I0323 16:48:43.420399  543705 net.go:648] Add success.
I0323 16:48:43.422972  543705 net.go:770] primary dev: ETH0
I0323 16:48:43.422988  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:48:43.423001  543705 net.go:698] Add success.
I0323 16:48:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:48:46.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:48:46.458087  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:48:53.409827  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:48:53.409849  543705 memory.go:184] no items to output this cycle
I0323 16:48:53.409856  543705 cpu.go:275] no items to output this cycle
E0323 16:49:03.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:49:03.409777  543705 memory.go:184] no items to output this cycle
I0323 16:49:03.409807  543705 cpu.go:275] no items to output this cycle
E0323 16:49:13.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:49:13.409790  543705 memory.go:191] Add success.
I0323 16:49:13.409794  543705 cpu.go:282] Add success.
W0323 16:49:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:49:13.409830  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:49:13.409834  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:49:13.420071  543705 net.go:648] Add success.
I0323 16:49:13.422908  543705 net.go:770] primary dev: ETH0
I0323 16:49:13.422921  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:49:13.422933  543705 net.go:698] Add success.
I0323 16:49:14.453937  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:49:14.455168  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:49:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0323 16:49:14.455240  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:49:14.456609  543705 disk_worker.go:494] system disk:vda1
I0323 16:49:14.456646  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:49:15.456491  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:49:16.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:49:16.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:49:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:49:16.472542  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:49:23.409902  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:49:23.409929  543705 memory.go:184] no items to output this cycle
I0323 16:49:23.409985  543705 cpu.go:275] no items to output this cycle
E0323 16:49:33.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:49:33.409815  543705 memory.go:184] no items to output this cycle
I0323 16:49:33.409829  543705 cpu.go:275] no items to output this cycle
I0323 16:49:40.905699  543705 disk_info.go:125] begin check local disk info of client
I0323 16:49:40.915118  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:49:40.915125  543705 disk_info.go:196] parse disk info done, disk is : [0xc000544380 0xc0005443c0]
E0323 16:49:43.409786  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:49:43.410641  543705 memory.go:191] Add success.
I0323 16:49:43.409830  543705 cpu.go:282] Add success.
I0323 16:49:43.420429  543705 net.go:648] Add success.
I0323 16:49:43.423060  543705 net.go:770] primary dev: ETH0
I0323 16:49:43.423075  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:49:43.423089  543705 net.go:698] Add success.
I0323 16:49:46.457984  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:49:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:49:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:49:53.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:49:53.409839  543705 memory.go:184] no items to output this cycle
I0323 16:49:53.409860  543705 cpu.go:275] no items to output this cycle
E0323 16:50:03.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:50:03.409794  543705 memory.go:184] no items to output this cycle
I0323 16:50:03.409800  543705 cpu.go:275] no items to output this cycle
E0323 16:50:13.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:50:13.409799  543705 memory.go:191] Add success.
I0323 16:50:13.409799  543705 cpu.go:282] Add success.
W0323 16:50:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:50:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:50:13.409843  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:50:13.420101  543705 net.go:648] Add success.
I0323 16:50:13.422808  543705 net.go:770] primary dev: ETH0
I0323 16:50:13.422821  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:50:13.422833  543705 net.go:698] Add success.
I0323 16:50:14.454970  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:50:14.455187  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:50:14.455197  543705 disk_worker.go:708] disk space is not compliant
W0323 16:50:14.455200  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:50:14.456589  543705 disk_worker.go:494] system disk:vda1
I0323 16:50:14.456621  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:50:15.455961  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:50:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:50:16.458040  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:50:16.458070  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:50:16.472409  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:50:23.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:50:23.409778  543705 memory.go:184] no items to output this cycle
I0323 16:50:23.409782  543705 cpu.go:275] no items to output this cycle
E0323 16:50:33.409820  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:50:33.409849  543705 memory.go:184] no items to output this cycle
I0323 16:50:33.409995  543705 cpu.go:275] no items to output this cycle
I0323 16:50:40.915243  543705 disk_info.go:125] begin check local disk info of client
I0323 16:50:40.921580  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:50:40.921588  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039f2c0 0xc00039f300]
E0323 16:50:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:50:43.410612  543705 memory.go:191] Add success.
I0323 16:50:43.409841  543705 cpu.go:282] Add success.
I0323 16:50:43.420466  543705 net.go:648] Add success.
I0323 16:50:43.423264  543705 net.go:770] primary dev: ETH0
I0323 16:50:43.423279  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:50:43.423293  543705 net.go:698] Add success.
I0323 16:50:46.457967  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:50:46.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:50:46.458069  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:50:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:50:53.409799  543705 memory.go:184] no items to output this cycle
I0323 16:50:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 16:51:03.409788  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:51:03.409809  543705 memory.go:184] no items to output this cycle
I0323 16:51:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 16:51:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:51:13.409833  543705 memory.go:191] Add success.
I0323 16:51:13.409835  543705 cpu.go:282] Add success.
W0323 16:51:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:51:13.409883  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:51:13.409888  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:51:13.420255  543705 net.go:648] Add success.
I0323 16:51:13.423257  543705 net.go:770] primary dev: ETH0
I0323 16:51:13.423270  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:51:13.423283  543705 net.go:698] Add success.
I0323 16:51:13.463188  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9fd45650-3003-4708-8bc8-1b52aeccba78","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:51:13.463223  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:51:14.453993  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:51:14.454202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:51:14.454271  543705 disk_worker.go:708] disk space is not compliant
W0323 16:51:14.454274  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:51:14.455645  543705 disk_worker.go:494] system disk:vda1
I0323 16:51:14.455675  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:51:15.455986  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:51:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:51:16.458077  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:51:16.458106  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:51:16.472477  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:51:23.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:51:23.409892  543705 cpu.go:275] no items to output this cycle
I0323 16:51:23.409915  543705 memory.go:184] no items to output this cycle
E0323 16:51:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:51:33.409814  543705 memory.go:184] no items to output this cycle
I0323 16:51:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 16:51:40.493224  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:51:40.493231  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:51:40.921674  543705 disk_info.go:125] begin check local disk info of client
I0323 16:51:40.924281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:51:40.924287  543705 disk_info.go:196] parse disk info done, disk is : [0xc0003f1380 0xc0003f13c0]
E0323 16:51:43.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:51:43.410899  543705 memory.go:191] Add success.
I0323 16:51:43.409821  543705 cpu.go:282] Add success.
I0323 16:51:43.420667  543705 net.go:648] Add success.
I0323 16:51:43.423514  543705 net.go:770] primary dev: ETH0
I0323 16:51:43.423529  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:51:43.423541  543705 net.go:698] Add success.
I0323 16:51:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:51:46.458046  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:51:46.458068  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:51:53.410391  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:51:53.410407  543705 memory.go:184] no items to output this cycle
I0323 16:51:53.410430  543705 cpu.go:275] no items to output this cycle
E0323 16:52:03.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:52:03.409806  543705 memory.go:184] no items to output this cycle
I0323 16:52:03.409837  543705 cpu.go:275] no items to output this cycle
E0323 16:52:13.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:52:13.409811  543705 memory.go:191] Add success.
I0323 16:52:13.409812  543705 cpu.go:282] Add success.
W0323 16:52:13.409840  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:52:13.409852  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:52:13.409856  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:52:13.420268  543705 net.go:648] Add success.
I0323 16:52:13.422991  543705 net.go:770] primary dev: ETH0
I0323 16:52:13.423006  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:52:13.423020  543705 net.go:698] Add success.
W0323 16:52:14.454348  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:52:14.454366  543705 disk_worker.go:708] disk space is not compliant
W0323 16:52:14.454371  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:52:14.457369  543705 disk_worker.go:494] system disk:vda1
I0323 16:52:14.457412  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:52:14.457638  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:52:14.458207  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:52:14.458217  543705 custom_config.go:64] query custom config with name: gpu
E0323 16:52:15.456637  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:52:15.456651  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:52:16.458109  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:52:16.458190  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:52:16.458213  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:52:16.458286  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:52:16.472667  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:52:23.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:52:23.409800  543705 memory.go:184] no items to output this cycle
I0323 16:52:23.409809  543705 cpu.go:275] no items to output this cycle
E0323 16:52:33.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:52:33.409796  543705 memory.go:184] no items to output this cycle
I0323 16:52:33.409823  543705 cpu.go:275] no items to output this cycle
I0323 16:52:40.925684  543705 disk_info.go:125] begin check local disk info of client
I0323 16:52:40.928300  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:52:40.928307  543705 disk_info.go:196] parse disk info done, disk is : [0xc00024c300 0xc00024c340]
E0323 16:52:43.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:52:43.410889  543705 memory.go:191] Add success.
I0323 16:52:43.409893  543705 cpu.go:282] Add success.
I0323 16:52:43.420682  543705 net.go:648] Add success.
I0323 16:52:43.423706  543705 net.go:770] primary dev: ETH0
I0323 16:52:43.423720  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:52:43.423734  543705 net.go:698] Add success.
I0323 16:52:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:52:46.458062  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:52:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:52:53.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:52:53.409796  543705 memory.go:184] no items to output this cycle
I0323 16:52:53.409800  543705 cpu.go:275] no items to output this cycle
I0323 16:53:03.417879  543705 cpu.go:275] no items to output this cycle
E0323 16:53:03.417880  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:53:03.417899  543705 memory.go:184] no items to output this cycle
E0323 16:53:13.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:53:13.409838  543705 memory.go:191] Add success.
I0323 16:53:13.409844  543705 cpu.go:282] Add success.
W0323 16:53:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:53:13.409888  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:53:13.409892  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:53:13.420248  543705 net.go:648] Add success.
I0323 16:53:13.423267  543705 net.go:770] primary dev: ETH0
I0323 16:53:13.423280  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:53:13.423292  543705 net.go:698] Add success.
I0323 16:53:14.454989  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:53:14.455144  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:53:14.455243  543705 disk_worker.go:708] disk space is not compliant
W0323 16:53:14.455248  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:53:14.456806  543705 disk_worker.go:494] system disk:vda1
I0323 16:53:14.456852  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:53:15.455989  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:53:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:53:16.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:53:16.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:53:16.472552  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:53:23.411959  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:53:23.411984  543705 memory.go:184] no items to output this cycle
I0323 16:53:23.412118  543705 cpu.go:275] no items to output this cycle
E0323 16:53:33.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:53:33.409941  543705 memory.go:184] no items to output this cycle
I0323 16:53:33.410384  543705 cpu.go:275] no items to output this cycle
I0323 16:53:40.929687  543705 disk_info.go:125] begin check local disk info of client
I0323 16:53:40.932305  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:53:40.932313  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 16:53:43.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:53:43.410781  543705 memory.go:191] Add success.
I0323 16:53:43.409826  543705 cpu.go:282] Add success.
I0323 16:53:43.420561  543705 net.go:648] Add success.
I0323 16:53:43.423386  543705 net.go:770] primary dev: ETH0
I0323 16:53:43.423404  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:53:43.423420  543705 net.go:698] Add success.
I0323 16:53:46.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:53:46.458075  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:53:46.458113  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:53:53.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:53:53.409823  543705 memory.go:184] no items to output this cycle
I0323 16:53:53.409831  543705 cpu.go:275] no items to output this cycle
E0323 16:54:03.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:54:03.409820  543705 memory.go:184] no items to output this cycle
I0323 16:54:03.409830  543705 cpu.go:275] no items to output this cycle
E0323 16:54:13.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:54:13.409830  543705 memory.go:191] Add success.
I0323 16:54:13.409853  543705 cpu.go:282] Add success.
W0323 16:54:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:54:13.409878  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:54:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:54:13.420423  543705 net.go:648] Add success.
I0323 16:54:13.423527  543705 net.go:770] primary dev: ETH0
I0323 16:54:13.423542  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:54:13.423556  543705 net.go:698] Add success.
I0323 16:54:13.463691  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"2a61104b-5e5c-4f12-a204-f6e0deeecc43","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:54:13.463737  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 16:54:14.453938  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:54:14.455248  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:54:14.455260  543705 disk_worker.go:708] disk space is not compliant
W0323 16:54:14.455263  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:54:14.457272  543705 disk_worker.go:494] system disk:vda1
I0323 16:54:14.457313  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:54:15.455985  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:54:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:54:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:54:16.458095  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:54:16.472481  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:54:23.409819  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:54:23.409843  543705 memory.go:184] no items to output this cycle
I0323 16:54:23.409987  543705 cpu.go:275] no items to output this cycle
E0323 16:54:33.409945  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:54:33.409970  543705 memory.go:184] no items to output this cycle
I0323 16:54:33.410099  543705 cpu.go:275] no items to output this cycle
I0323 16:54:40.494237  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:54:40.494245  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:54:40.933695  543705 disk_info.go:125] begin check local disk info of client
I0323 16:54:40.937494  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:54:40.937503  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a000 0xc00007a700]
E0323 16:54:43.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:54:43.410812  543705 memory.go:191] Add success.
I0323 16:54:43.409816  543705 cpu.go:282] Add success.
I0323 16:54:43.420525  543705 net.go:648] Add success.
I0323 16:54:43.423494  543705 net.go:770] primary dev: ETH0
I0323 16:54:43.423517  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:54:43.423538  543705 net.go:698] Add success.
I0323 16:54:46.458005  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:54:46.458099  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:54:46.458133  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:54:53.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:54:53.409783  543705 memory.go:184] no items to output this cycle
I0323 16:54:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 16:55:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:55:03.409787  543705 memory.go:184] no items to output this cycle
I0323 16:55:03.409814  543705 cpu.go:275] no items to output this cycle
E0323 16:55:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:55:13.409812  543705 memory.go:191] Add success.
I0323 16:55:13.409816  543705 cpu.go:282] Add success.
W0323 16:55:13.409842  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:55:13.409854  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:55:13.409857  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:55:13.420122  543705 net.go:648] Add success.
I0323 16:55:13.423165  543705 net.go:770] primary dev: ETH0
I0323 16:55:13.423178  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:55:13.423189  543705 net.go:698] Add success.
I0323 16:55:14.453947  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:55:14.455340  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:55:14.455356  543705 disk_worker.go:708] disk space is not compliant
W0323 16:55:14.455360  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:55:14.457274  543705 disk_worker.go:494] system disk:vda1
I0323 16:55:14.457328  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:55:15.454996  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:55:16.458041  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:55:16.458139  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:55:16.458171  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:55:16.472861  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:55:23.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:55:23.409788  543705 memory.go:184] no items to output this cycle
I0323 16:55:23.409807  543705 cpu.go:275] no items to output this cycle
E0323 16:55:33.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:55:33.409845  543705 memory.go:184] no items to output this cycle
I0323 16:55:33.409985  543705 cpu.go:275] no items to output this cycle
I0323 16:55:40.938451  543705 disk_info.go:125] begin check local disk info of client
I0323 16:55:40.941087  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:55:40.941094  543705 disk_info.go:196] parse disk info done, disk is : [0xc000241300 0xc000241340]
E0323 16:55:43.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:55:43.410723  543705 memory.go:191] Add success.
I0323 16:55:43.409815  543705 cpu.go:282] Add success.
I0323 16:55:43.420465  543705 net.go:648] Add success.
I0323 16:55:43.423477  543705 net.go:770] primary dev: ETH0
I0323 16:55:43.423492  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:55:43.423507  543705 net.go:698] Add success.
I0323 16:55:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:55:46.458070  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:55:46.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:55:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 16:55:53.409804  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:55:53.409822  543705 memory.go:184] no items to output this cycle
E0323 16:56:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:56:03.409812  543705 memory.go:184] no items to output this cycle
I0323 16:56:03.409828  543705 cpu.go:275] no items to output this cycle
E0323 16:56:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:56:13.409818  543705 memory.go:191] Add success.
I0323 16:56:13.409824  543705 cpu.go:282] Add success.
W0323 16:56:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:56:13.409867  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:56:13.409871  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:56:13.420118  543705 net.go:648] Add success.
I0323 16:56:13.423138  543705 net.go:770] primary dev: ETH0
I0323 16:56:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:56:13.423169  543705 net.go:698] Add success.
I0323 16:56:14.453940  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:56:14.455203  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:56:14.455296  543705 disk_worker.go:708] disk space is not compliant
W0323 16:56:14.455301  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:56:14.457161  543705 disk_worker.go:494] system disk:vda1
I0323 16:56:14.457195  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:56:15.455990  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:56:16.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:56:16.458060  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:56:16.458087  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:56:16.472458  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:56:23.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:56:23.409815  543705 memory.go:184] no items to output this cycle
I0323 16:56:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 16:56:33.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:56:33.409789  543705 memory.go:184] no items to output this cycle
I0323 16:56:33.409818  543705 cpu.go:275] no items to output this cycle
I0323 16:56:40.941685  543705 disk_info.go:125] begin check local disk info of client
I0323 16:56:40.944294  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:56:40.944302  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4100 0xc0000c4140]
E0323 16:56:43.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:56:43.409855  543705 memory.go:191] Add success.
I0323 16:56:43.410292  543705 cpu.go:282] Add success.
I0323 16:56:43.419992  543705 net.go:648] Add success.
I0323 16:56:43.421114  543705 net.go:770] primary dev: ETH0
I0323 16:56:43.421133  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:56:43.421152  543705 net.go:698] Add success.
I0323 16:56:46.458037  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:56:46.458135  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:56:46.458169  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:56:53.409863  543705 cpu.go:275] no items to output this cycle
E0323 16:56:53.409996  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:56:53.410012  543705 memory.go:184] no items to output this cycle
E0323 16:57:03.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:57:03.409823  543705 memory.go:184] no items to output this cycle
I0323 16:57:03.409851  543705 cpu.go:275] no items to output this cycle
E0323 16:57:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:57:13.409794  543705 memory.go:191] Add success.
W0323 16:57:13.409821  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 16:57:13.409831  543705 cpu.go:282] Add success.
W0323 16:57:13.409833  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:57:13.409836  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:57:13.420226  543705 net.go:648] Add success.
I0323 16:57:13.423140  543705 net.go:770] primary dev: ETH0
I0323 16:57:13.423153  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:57:13.423164  543705 net.go:698] Add success.
I0323 16:57:13.429863  543705 net.go:1082] primary mac is: fa:27:00:10:e8:f4,dev: ETH0
I0323 16:57:13.453031  543705 event_worker.go:152] Polling the log file for events...
I0323 16:57:13.464300  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"fde1ff09-6184-4a6e-8903-804ed0b58d48","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 16:57:13.464332  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 16:57:14.455119  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:57:14.455181  543705 disk_worker.go:708] disk space is not compliant
W0323 16:57:14.455184  543705 disk_worker.go:728] disk inode is not compliant
E0323 16:57:14.455987  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 16:57:14.455996  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 16:57:14.456001  543705 custom_config.go:64] query custom config with name: gpu
I0323 16:57:14.456466  543705 disk_worker.go:494] system disk:vda1
I0323 16:57:14.456497  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 16:57:15.471518  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 16:57:15.471536  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:57:16.458468  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:57:16.458538  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:57:16.458564  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:57:16.459074  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 16:57:16.472177  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:57:23.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:57:23.409922  543705 memory.go:184] no items to output this cycle
I0323 16:57:23.409948  543705 cpu.go:275] no items to output this cycle
E0323 16:57:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:57:33.409826  543705 memory.go:184] no items to output this cycle
I0323 16:57:33.409842  543705 cpu.go:275] no items to output this cycle
I0323 16:57:40.495253  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 16:57:40.495261  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 16:57:40.945687  543705 disk_info.go:125] begin check local disk info of client
I0323 16:57:40.948257  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:57:40.948264  543705 disk_info.go:196] parse disk info done, disk is : [0xc0004f2f40 0xc0004f2f80]
E0323 16:57:43.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:57:43.410814  543705 memory.go:191] Add success.
I0323 16:57:43.409813  543705 cpu.go:282] Add success.
I0323 16:57:43.420586  543705 net.go:648] Add success.
I0323 16:57:43.423380  543705 net.go:770] primary dev: ETH0
I0323 16:57:43.423394  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:57:43.423407  543705 net.go:698] Add success.
I0323 16:57:46.458031  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:57:46.458122  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:57:46.458158  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:57:53.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:57:53.409828  543705 memory.go:184] no items to output this cycle
I0323 16:57:53.409854  543705 cpu.go:275] no items to output this cycle
E0323 16:58:03.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:58:03.409785  543705 memory.go:184] no items to output this cycle
I0323 16:58:03.409904  543705 cpu.go:275] no items to output this cycle
E0323 16:58:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:58:13.409815  543705 memory.go:191] Add success.
I0323 16:58:13.409819  543705 cpu.go:282] Add success.
W0323 16:58:13.409845  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:58:13.409857  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:58:13.409860  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:58:13.420148  543705 net.go:648] Add success.
I0323 16:58:13.423335  543705 net.go:770] primary dev: ETH0
I0323 16:58:13.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:58:13.423361  543705 net.go:698] Add success.
I0323 16:58:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:58:14.455172  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:58:14.455185  543705 disk_worker.go:708] disk space is not compliant
W0323 16:58:14.455188  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:58:14.456567  543705 disk_worker.go:494] system disk:vda1
I0323 16:58:14.456616  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:58:15.455971  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:58:16.457998  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:58:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:58:16.458109  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:58:16.472485  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:58:23.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:58:23.409790  543705 memory.go:184] no items to output this cycle
I0323 16:58:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 16:58:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:58:33.409816  543705 memory.go:184] no items to output this cycle
I0323 16:58:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 16:58:40.949686  543705 disk_info.go:125] begin check local disk info of client
I0323 16:58:40.952357  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:58:40.952366  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa740 0xc0001aa780]
E0323 16:58:43.409808  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:58:43.410741  543705 memory.go:191] Add success.
I0323 16:58:43.409927  543705 cpu.go:282] Add success.
I0323 16:58:43.420793  543705 net.go:648] Add success.
I0323 16:58:43.423696  543705 net.go:770] primary dev: ETH0
I0323 16:58:43.423715  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:58:43.423733  543705 net.go:698] Add success.
I0323 16:58:46.457980  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:58:46.458050  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:58:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:58:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 16:58:53.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:58:53.409819  543705 memory.go:184] no items to output this cycle
E0323 16:59:03.409815  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:59:03.409838  543705 memory.go:184] no items to output this cycle
I0323 16:59:03.409846  543705 cpu.go:275] no items to output this cycle
W0323 16:59:13.409722  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 16:59:13.409740  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 16:59:13.409746  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 16:59:13.409812  543705 cpu.go:282] Add success.
E0323 16:59:13.409831  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:59:13.409854  543705 memory.go:191] Add success.
I0323 16:59:13.420137  543705 net.go:648] Add success.
I0323 16:59:13.422988  543705 net.go:770] primary dev: ETH0
I0323 16:59:13.423003  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:59:13.423021  543705 net.go:698] Add success.
I0323 16:59:14.454981  543705 custom_config.go:64] query custom config with name: gpu
W0323 16:59:14.455162  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 16:59:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0323 16:59:14.455242  543705 disk_worker.go:728] disk inode is not compliant
I0323 16:59:14.456676  543705 disk_worker.go:494] system disk:vda1
I0323 16:59:14.456707  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 16:59:15.455968  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 16:59:16.458014  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:59:16.458114  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:59:16.458154  543705 gpu_kunlun.go:227] Add success, len:1
I0323 16:59:16.472753  543705 disk_local_worker.go:436] Get disk info: []
E0323 16:59:23.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:59:23.409788  543705 memory.go:184] no items to output this cycle
I0323 16:59:23.409842  543705 cpu.go:275] no items to output this cycle
E0323 16:59:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:59:33.409797  543705 memory.go:184] no items to output this cycle
I0323 16:59:33.409809  543705 cpu.go:275] no items to output this cycle
I0323 16:59:40.953687  543705 disk_info.go:125] begin check local disk info of client
I0323 16:59:40.956288  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 16:59:40.956295  543705 disk_info.go:196] parse disk info done, disk is : [0xc000498d40 0xc000498d80]
I0323 16:59:43.409810  543705 cpu.go:282] Add success.
E0323 16:59:43.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:59:43.410801  543705 memory.go:191] Add success.
I0323 16:59:43.420567  543705 net.go:648] Add success.
I0323 16:59:43.423528  543705 net.go:770] primary dev: ETH0
I0323 16:59:43.423554  543705 net.go:802] Send network stats successfully!,count is 6
I0323 16:59:43.423568  543705 net.go:698] Add success.
I0323 16:59:46.458033  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 16:59:46.458134  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 16:59:46.458166  543705 gpu_kunlun.go:227] Add success, len:1
E0323 16:59:53.409781  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 16:59:53.409798  543705 memory.go:184] no items to output this cycle
I0323 16:59:53.409798  543705 cpu.go:275] no items to output this cycle
E0323 17:00:03.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:00:03.409821  543705 memory.go:184] no items to output this cycle
I0323 17:00:03.409844  543705 cpu.go:275] no items to output this cycle
W0323 17:00:13.409720  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:00:13.409737  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:00:13.409742  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:00:13.409810  543705 cpu.go:282] Add success.
E0323 17:00:13.409826  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:00:13.409846  543705 memory.go:191] Add success.
I0323 17:00:13.420381  543705 net.go:648] Add success.
I0323 17:00:13.423497  543705 net.go:770] primary dev: ETH0
I0323 17:00:13.423519  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:00:13.423538  543705 net.go:698] Add success.
I0323 17:00:13.463460  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"ef2745f3-8d9d-4f4c-9750-8dfbed3027e6","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:00:13.463491  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 17:00:14.454995  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:00:14.455237  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:00:14.455249  543705 disk_worker.go:708] disk space is not compliant
W0323 17:00:14.455252  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:00:14.457276  543705 disk_worker.go:494] system disk:vda1
I0323 17:00:14.457341  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:00:15.455476  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:00:16.458000  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:00:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:00:16.458116  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:00:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:00:23.409767  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:00:23.409787  543705 memory.go:184] no items to output this cycle
I0323 17:00:23.409815  543705 cpu.go:275] no items to output this cycle
E0323 17:00:33.409777  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:00:33.409796  543705 memory.go:184] no items to output this cycle
I0323 17:00:33.409816  543705 cpu.go:275] no items to output this cycle
I0323 17:00:40.496302  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:00:40.496311  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:00:40.956392  543705 disk_info.go:125] begin check local disk info of client
I0323 17:00:40.958974  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:00:40.958981  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005367c0 0xc000536800]
E0323 17:00:43.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:00:43.410756  543705 memory.go:191] Add success.
I0323 17:00:43.409814  543705 cpu.go:282] Add success.
I0323 17:00:43.420588  543705 net.go:648] Add success.
I0323 17:00:43.423648  543705 net.go:770] primary dev: ETH0
I0323 17:00:43.423669  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:00:43.423688  543705 net.go:698] Add success.
I0323 17:00:46.458003  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:00:46.458085  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:00:46.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:00:53.410480  543705 cpu.go:275] no items to output this cycle
E0323 17:00:53.410617  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:00:53.410634  543705 memory.go:184] no items to output this cycle
E0323 17:01:03.409806  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:01:03.409833  543705 memory.go:184] no items to output this cycle
I0323 17:01:03.409854  543705 cpu.go:275] no items to output this cycle
I0323 17:01:13.409869  543705 cpu.go:282] Add success.
E0323 17:01:13.410251  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:01:13.410276  543705 memory.go:191] Add success.
W0323 17:01:13.410310  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:01:13.410327  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:01:13.410332  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:01:13.420268  543705 net.go:648] Add success.
I0323 17:01:13.421272  543705 net.go:770] primary dev: ETH0
I0323 17:01:13.421286  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:01:13.421298  543705 net.go:698] Add success.
I0323 17:01:14.455006  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:01:14.455273  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:01:14.455288  543705 disk_worker.go:708] disk space is not compliant
W0323 17:01:14.455293  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:01:14.457179  543705 disk_worker.go:494] system disk:vda1
I0323 17:01:14.457208  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:01:15.457717  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:01:16.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:01:16.458068  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:01:16.458098  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:01:16.472466  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:01:23.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:01:23.409793  543705 memory.go:184] no items to output this cycle
I0323 17:01:23.409807  543705 cpu.go:275] no items to output this cycle
I0323 17:01:33.409793  543705 cpu.go:275] no items to output this cycle
E0323 17:01:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:01:33.409819  543705 memory.go:184] no items to output this cycle
I0323 17:01:40.959102  543705 disk_info.go:125] begin check local disk info of client
I0323 17:01:40.969430  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:01:40.969440  543705 disk_info.go:196] parse disk info done, disk is : [0xc000473640 0xc000473680]
E0323 17:01:43.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:01:43.410783  543705 memory.go:191] Add success.
I0323 17:01:43.409890  543705 cpu.go:282] Add success.
I0323 17:01:43.420543  543705 net.go:648] Add success.
I0323 17:01:43.423275  543705 net.go:770] primary dev: ETH0
I0323 17:01:43.423291  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:01:43.423306  543705 net.go:698] Add success.
I0323 17:01:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:01:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:01:46.458101  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:01:53.409758  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:01:53.409780  543705 memory.go:184] no items to output this cycle
I0323 17:01:53.409808  543705 cpu.go:275] no items to output this cycle
E0323 17:02:03.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:02:03.409788  543705 memory.go:184] no items to output this cycle
I0323 17:02:03.409813  543705 cpu.go:275] no items to output this cycle
E0323 17:02:13.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:02:13.409831  543705 memory.go:191] Add success.
I0323 17:02:13.409835  543705 cpu.go:282] Add success.
W0323 17:02:13.409864  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:02:13.412536  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:02:13.412540  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:02:13.420293  543705 net.go:648] Add success.
I0323 17:02:13.422253  543705 net.go:770] primary dev: ETH0
I0323 17:02:13.422272  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:02:13.422291  543705 net.go:698] Add success.
W0323 17:02:14.454226  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:02:14.454328  543705 disk_worker.go:708] disk space is not compliant
W0323 17:02:14.454333  543705 disk_worker.go:728] disk inode is not compliant
E0323 17:02:14.456916  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 17:02:14.456928  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 17:02:14.456934  543705 custom_config.go:64] query custom config with name: gpu
I0323 17:02:14.457300  543705 disk_worker.go:494] system disk:vda1
I0323 17:02:14.457351  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 17:02:15.457048  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 17:02:15.457063  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:02:16.458088  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:02:16.458159  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
E0323 17:02:16.458163  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 17:02:16.458180  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:02:16.472554  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:02:23.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:02:23.409801  543705 memory.go:184] no items to output this cycle
I0323 17:02:23.409804  543705 cpu.go:275] no items to output this cycle
E0323 17:02:33.409792  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:02:33.409812  543705 memory.go:184] no items to output this cycle
I0323 17:02:33.409824  543705 cpu.go:275] no items to output this cycle
I0323 17:02:40.969675  543705 disk_info.go:125] begin check local disk info of client
I0323 17:02:40.972256  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:02:40.972262  543705 disk_info.go:196] parse disk info done, disk is : [0xc00049e3c0 0xc00049e400]
E0323 17:02:43.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:02:43.410645  543705 memory.go:191] Add success.
I0323 17:02:43.409846  543705 cpu.go:282] Add success.
I0323 17:02:43.420331  543705 net.go:648] Add success.
I0323 17:02:43.423159  543705 net.go:770] primary dev: ETH0
I0323 17:02:43.423171  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:02:43.423185  543705 net.go:698] Add success.
I0323 17:02:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:02:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:02:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:02:53.409769  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:02:53.409787  543705 memory.go:184] no items to output this cycle
I0323 17:02:53.409826  543705 cpu.go:275] no items to output this cycle
E0323 17:03:03.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:03:03.409825  543705 memory.go:184] no items to output this cycle
I0323 17:03:03.409831  543705 cpu.go:275] no items to output this cycle
E0323 17:03:13.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:03:13.409801  543705 memory.go:191] Add success.
W0323 17:03:13.409828  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 17:03:13.409839  543705 cpu.go:282] Add success.
W0323 17:03:13.409841  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:03:13.409844  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:03:13.420179  543705 net.go:648] Add success.
I0323 17:03:13.423121  543705 net.go:770] primary dev: ETH0
I0323 17:03:13.423133  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:03:13.423145  543705 net.go:698] Add success.
I0323 17:03:13.468628  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"e0fb2749-9a4f-437d-be46-b093f84cd848","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:03:13.468671  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 17:03:14.453960  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:03:14.455424  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:03:14.455439  543705 disk_worker.go:708] disk space is not compliant
W0323 17:03:14.455443  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:03:14.475387  543705 disk_worker.go:494] system disk:vda1
I0323 17:03:14.475456  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:03:15.455952  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:03:16.465693  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:03:16.465806  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:03:16.465844  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:03:16.473680  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:03:23.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:03:23.409794  543705 memory.go:184] no items to output this cycle
I0323 17:03:23.409824  543705 cpu.go:275] no items to output this cycle
E0323 17:03:33.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:03:33.409819  543705 memory.go:184] no items to output this cycle
I0323 17:03:33.409833  543705 cpu.go:275] no items to output this cycle
I0323 17:03:40.497257  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:03:40.497265  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:03:40.973683  543705 disk_info.go:125] begin check local disk info of client
I0323 17:03:40.976281  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:03:40.976287  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007aec0 0xc00007af00]
E0323 17:03:43.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:03:43.410756  543705 memory.go:191] Add success.
I0323 17:03:43.409829  543705 cpu.go:282] Add success.
I0323 17:03:43.420518  543705 net.go:648] Add success.
I0323 17:03:43.423319  543705 net.go:770] primary dev: ETH0
I0323 17:03:43.423332  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:03:43.423345  543705 net.go:698] Add success.
I0323 17:03:46.457912  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:03:46.457991  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:03:46.458014  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:03:53.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:03:53.409794  543705 memory.go:184] no items to output this cycle
I0323 17:03:53.409827  543705 cpu.go:275] no items to output this cycle
E0323 17:04:03.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:04:03.409780  543705 memory.go:184] no items to output this cycle
I0323 17:04:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 17:04:13.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:04:13.409806  543705 memory.go:191] Add success.
W0323 17:04:13.409838  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 17:04:13.409839  543705 cpu.go:282] Add success.
W0323 17:04:13.409851  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:04:13.409854  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:04:13.420072  543705 net.go:648] Add success.
I0323 17:04:13.423134  543705 net.go:770] primary dev: ETH0
I0323 17:04:13.423148  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:04:13.423162  543705 net.go:698] Add success.
I0323 17:04:14.454976  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:04:14.455163  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:04:14.455237  543705 disk_worker.go:708] disk space is not compliant
W0323 17:04:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:04:14.456898  543705 disk_worker.go:494] system disk:vda1
I0323 17:04:14.456930  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:04:15.455963  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:04:16.458022  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:04:16.458125  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:04:16.458162  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:04:16.472633  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:04:23.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:04:23.409783  543705 memory.go:184] no items to output this cycle
I0323 17:04:23.409826  543705 cpu.go:275] no items to output this cycle
E0323 17:04:33.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:04:33.409824  543705 memory.go:184] no items to output this cycle
I0323 17:04:33.409837  543705 cpu.go:275] no items to output this cycle
I0323 17:04:40.977684  543705 disk_info.go:125] begin check local disk info of client
I0323 17:04:40.980248  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:04:40.980255  543705 disk_info.go:196] parse disk info done, disk is : [0xc00039c640 0xc00039c680]
E0323 17:04:43.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:04:43.410705  543705 memory.go:191] Add success.
I0323 17:04:43.409848  543705 cpu.go:282] Add success.
I0323 17:04:43.420412  543705 net.go:648] Add success.
I0323 17:04:43.423608  543705 net.go:770] primary dev: ETH0
I0323 17:04:43.423624  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:04:43.423638  543705 net.go:698] Add success.
I0323 17:04:46.458006  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:04:46.458090  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:04:46.458117  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:04:53.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:04:53.409821  543705 memory.go:184] no items to output this cycle
I0323 17:04:53.409840  543705 cpu.go:275] no items to output this cycle
E0323 17:05:03.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:05:03.409814  543705 memory.go:184] no items to output this cycle
I0323 17:05:03.409821  543705 cpu.go:275] no items to output this cycle
E0323 17:05:13.409814  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:05:13.409865  543705 memory.go:191] Add success.
W0323 17:05:13.409899  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:05:13.409916  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:05:13.409922  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:05:13.410413  543705 cpu.go:282] Add success.
I0323 17:05:13.420700  543705 net.go:648] Add success.
I0323 17:05:13.421918  543705 net.go:770] primary dev: ETH0
I0323 17:05:13.421939  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:05:13.421958  543705 net.go:698] Add success.
I0323 17:05:14.454990  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:05:14.455189  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:05:14.455202  543705 disk_worker.go:708] disk space is not compliant
W0323 17:05:14.455205  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:05:14.456598  543705 disk_worker.go:494] system disk:vda1
I0323 17:05:14.456641  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:05:15.456036  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:05:16.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:05:16.458086  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:05:16.458115  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:05:16.472494  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:05:23.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:05:23.409817  543705 memory.go:184] no items to output this cycle
I0323 17:05:23.409827  543705 cpu.go:275] no items to output this cycle
E0323 17:05:33.409803  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:05:33.409828  543705 memory.go:184] no items to output this cycle
I0323 17:05:33.409998  543705 cpu.go:275] no items to output this cycle
I0323 17:05:40.981687  543705 disk_info.go:125] begin check local disk info of client
I0323 17:05:40.984365  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:05:40.984372  543705 disk_info.go:196] parse disk info done, disk is : [0xc0002a01c0 0xc0002a0200]
E0323 17:05:43.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:05:43.409815  543705 cpu.go:282] Add success.
I0323 17:05:43.410748  543705 memory.go:191] Add success.
I0323 17:05:43.420476  543705 net.go:648] Add success.
I0323 17:05:43.423419  543705 net.go:770] primary dev: ETH0
I0323 17:05:43.423434  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:05:43.423450  543705 net.go:698] Add success.
I0323 17:05:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:05:46.458061  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:05:46.458089  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:05:53.409826  543705 cpu.go:275] no items to output this cycle
E0323 17:05:53.409939  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:05:53.409951  543705 memory.go:184] no items to output this cycle
E0323 17:06:03.409811  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:06:03.409834  543705 memory.go:184] no items to output this cycle
I0323 17:06:03.409854  543705 cpu.go:275] no items to output this cycle
E0323 17:06:13.409761  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:06:13.409792  543705 memory.go:191] Add success.
W0323 17:06:13.409818  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
I0323 17:06:13.409822  543705 cpu.go:282] Add success.
W0323 17:06:13.409829  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:06:13.409832  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:06:13.420277  543705 net.go:648] Add success.
I0323 17:06:13.423126  543705 net.go:770] primary dev: ETH0
I0323 17:06:13.423142  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:06:13.423160  543705 net.go:698] Add success.
I0323 17:06:13.467710  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"be635a8a-8ebd-4202-82f0-126482b3d441","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:06:13.467750  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 17:06:14.453964  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:06:14.454297  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:06:14.454315  543705 disk_worker.go:708] disk space is not compliant
W0323 17:06:14.454319  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:06:14.457890  543705 disk_worker.go:494] system disk:vda1
I0323 17:06:14.457959  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:06:15.455993  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:06:16.457598  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:06:16.457691  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:06:16.457722  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:06:16.472088  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:06:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:06:23.409813  543705 memory.go:184] no items to output this cycle
I0323 17:06:23.409822  543705 cpu.go:275] no items to output this cycle
E0323 17:06:33.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:06:33.409793  543705 memory.go:184] no items to output this cycle
I0323 17:06:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 17:06:40.498264  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:06:40.498272  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:06:40.985674  543705 disk_info.go:125] begin check local disk info of client
I0323 17:06:40.988260  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:06:40.988266  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536340 0xc000536380]
E0323 17:06:43.409748  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:06:43.410876  543705 memory.go:191] Add success.
I0323 17:06:43.409817  543705 cpu.go:282] Add success.
I0323 17:06:43.420612  543705 net.go:648] Add success.
I0323 17:06:43.423342  543705 net.go:770] primary dev: ETH0
I0323 17:06:43.423355  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:06:43.423367  543705 net.go:698] Add success.
I0323 17:06:46.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:06:46.458054  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:06:46.458078  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:06:53.409770  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:06:53.409787  543705 memory.go:184] no items to output this cycle
I0323 17:06:53.409790  543705 cpu.go:275] no items to output this cycle
E0323 17:07:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:07:03.409796  543705 cpu.go:275] no items to output this cycle
I0323 17:07:03.409807  543705 memory.go:184] no items to output this cycle
E0323 17:07:13.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:07:13.409825  543705 memory.go:191] Add success.
I0323 17:07:13.409825  543705 cpu.go:282] Add success.
W0323 17:07:13.409856  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:07:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:07:13.409869  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:07:13.420199  543705 net.go:648] Add success.
I0323 17:07:13.423095  543705 net.go:770] primary dev: ETH0
I0323 17:07:13.423108  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:07:13.423121  543705 net.go:698] Add success.
I0323 17:07:13.453664  543705 event_worker.go:152] Polling the log file for events...
W0323 17:07:14.455196  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:07:14.455210  543705 disk_worker.go:708] disk space is not compliant
W0323 17:07:14.455214  543705 disk_worker.go:728] disk inode is not compliant
E0323 17:07:14.456076  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 17:07:14.456087  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 17:07:14.456094  543705 custom_config.go:64] query custom config with name: gpu
I0323 17:07:14.456654  543705 disk_worker.go:494] system disk:vda1
I0323 17:07:14.456688  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 17:07:15.456854  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 17:07:15.456864  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:07:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
E0323 17:07:16.457984  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 17:07:16.458027  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:07:16.458043  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:07:16.472374  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:07:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:07:23.409810  543705 memory.go:184] no items to output this cycle
I0323 17:07:23.409818  543705 cpu.go:275] no items to output this cycle
E0323 17:07:33.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:07:33.409812  543705 memory.go:184] no items to output this cycle
I0323 17:07:33.409825  543705 cpu.go:275] no items to output this cycle
I0323 17:07:40.989691  543705 disk_info.go:125] begin check local disk info of client
I0323 17:07:40.992387  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:07:40.992396  543705 disk_info.go:196] parse disk info done, disk is : [0xc0001aa000 0xc0001aa040]
E0323 17:07:43.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:07:43.410704  543705 memory.go:191] Add success.
I0323 17:07:43.409805  543705 cpu.go:282] Add success.
I0323 17:07:43.420529  543705 net.go:648] Add success.
I0323 17:07:43.423257  543705 net.go:770] primary dev: ETH0
I0323 17:07:43.423271  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:07:43.423283  543705 net.go:698] Add success.
I0323 17:07:46.457987  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:07:46.458064  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:07:46.458091  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:07:53.409766  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:07:53.409783  543705 memory.go:184] no items to output this cycle
I0323 17:07:53.409814  543705 cpu.go:275] no items to output this cycle
E0323 17:08:03.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:08:03.409787  543705 memory.go:184] no items to output this cycle
I0323 17:08:03.409793  543705 cpu.go:275] no items to output this cycle
W0323 17:08:13.409709  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:08:13.409726  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:08:13.409731  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 17:08:13.409805  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:08:13.409822  543705 memory.go:191] Add success.
I0323 17:08:13.409831  543705 cpu.go:282] Add success.
I0323 17:08:13.420046  543705 net.go:648] Add success.
I0323 17:08:13.423041  543705 net.go:770] primary dev: ETH0
I0323 17:08:13.423055  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:08:13.423067  543705 net.go:698] Add success.
I0323 17:08:14.454960  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:08:14.455113  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:08:14.455187  543705 disk_worker.go:708] disk space is not compliant
W0323 17:08:14.455191  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:08:14.456590  543705 disk_worker.go:494] system disk:vda1
I0323 17:08:14.456622  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:08:15.455955  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:08:16.457976  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:08:16.458042  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:08:16.458067  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:08:16.472443  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:08:23.409878  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:08:23.409902  543705 memory.go:184] no items to output this cycle
I0323 17:08:23.409973  543705 cpu.go:275] no items to output this cycle
E0323 17:08:33.409782  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:08:33.409795  543705 cpu.go:275] no items to output this cycle
I0323 17:08:33.409801  543705 memory.go:184] no items to output this cycle
I0323 17:08:40.993677  543705 disk_info.go:125] begin check local disk info of client
I0323 17:08:40.996191  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:08:40.996198  543705 disk_info.go:196] parse disk info done, disk is : [0xc0005783c0 0xc000578400]
E0323 17:08:43.409751  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:08:43.410595  543705 memory.go:191] Add success.
I0323 17:08:43.409810  543705 cpu.go:282] Add success.
I0323 17:08:43.420306  543705 net.go:648] Add success.
I0323 17:08:43.422959  543705 net.go:770] primary dev: ETH0
I0323 17:08:43.422974  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:08:43.422988  543705 net.go:698] Add success.
I0323 17:08:46.457972  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:08:46.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:08:46.458074  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:08:53.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:08:53.409780  543705 memory.go:184] no items to output this cycle
I0323 17:08:53.409813  543705 cpu.go:275] no items to output this cycle
E0323 17:09:03.409765  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:09:03.409783  543705 memory.go:184] no items to output this cycle
I0323 17:09:03.409806  543705 cpu.go:275] no items to output this cycle
E0323 17:09:13.409793  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:09:13.409828  543705 memory.go:191] Add success.
I0323 17:09:13.409840  543705 cpu.go:282] Add success.
W0323 17:09:13.409861  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:09:13.409877  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:09:13.409882  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:09:13.420114  543705 net.go:648] Add success.
I0323 17:09:13.423420  543705 net.go:770] primary dev: ETH0
I0323 17:09:13.423433  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:09:13.423445  543705 net.go:698] Add success.
I0323 17:09:13.468196  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"bc285e14-8c97-4da3-8f67-2a37c35c14d2","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:09:13.468234  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 17:09:14.454980  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:09:14.455202  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:09:14.455213  543705 disk_worker.go:708] disk space is not compliant
W0323 17:09:14.455216  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:09:14.456772  543705 disk_worker.go:494] system disk:vda1
I0323 17:09:14.456817  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:09:15.455977  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:09:16.457977  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:09:16.458044  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:09:16.458074  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:09:16.472448  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:09:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:09:23.409922  543705 memory.go:184] no items to output this cycle
I0323 17:09:23.409923  543705 cpu.go:275] no items to output this cycle
E0323 17:09:33.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:09:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 17:09:33.409812  543705 memory.go:184] no items to output this cycle
I0323 17:09:40.499243  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:09:40.499251  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:09:40.997673  543705 disk_info.go:125] begin check local disk info of client
I0323 17:09:41.000186  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:09:41.000192  543705 disk_info.go:196] parse disk info done, disk is : [0xc00007a740 0xc00007a780]
E0323 17:09:43.409762  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:09:43.410839  543705 memory.go:191] Add success.
I0323 17:09:43.409809  543705 cpu.go:282] Add success.
I0323 17:09:43.420550  543705 net.go:648] Add success.
I0323 17:09:43.423748  543705 net.go:770] primary dev: ETH0
I0323 17:09:43.423761  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:09:43.423773  543705 net.go:698] Add success.
I0323 17:09:46.457971  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:09:46.458047  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:09:46.458076  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:09:53.409768  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:09:53.409787  543705 memory.go:184] no items to output this cycle
I0323 17:09:53.409802  543705 cpu.go:275] no items to output this cycle
E0323 17:10:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:10:03.409789  543705 memory.go:184] no items to output this cycle
I0323 17:10:03.409798  543705 cpu.go:275] no items to output this cycle
E0323 17:10:13.409774  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:10:13.409802  543705 memory.go:191] Add success.
I0323 17:10:13.409803  543705 cpu.go:282] Add success.
W0323 17:10:13.409832  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:10:13.409844  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:10:13.409847  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:10:13.420263  543705 net.go:648] Add success.
I0323 17:10:13.423008  543705 net.go:770] primary dev: ETH0
I0323 17:10:13.423022  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:10:13.423036  543705 net.go:698] Add success.
I0323 17:10:14.454987  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:10:14.455123  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:10:14.455190  543705 disk_worker.go:708] disk space is not compliant
W0323 17:10:14.455193  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:10:14.456592  543705 disk_worker.go:494] system disk:vda1
I0323 17:10:14.456653  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:10:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:10:16.457974  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:10:16.458034  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:10:16.458063  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:10:16.472399  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:10:23.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:10:23.409814  543705 memory.go:184] no items to output this cycle
I0323 17:10:23.409828  543705 cpu.go:275] no items to output this cycle
E0323 17:10:33.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:10:33.409789  543705 memory.go:184] no items to output this cycle
I0323 17:10:33.409830  543705 cpu.go:275] no items to output this cycle
I0323 17:10:41.001689  543705 disk_info.go:125] begin check local disk info of client
I0323 17:10:41.004325  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:10:41.004333  543705 disk_info.go:196] parse disk info done, disk is : [0xc00046af40 0xc00046af80]
E0323 17:10:43.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:10:43.410714  543705 memory.go:191] Add success.
I0323 17:10:43.409810  543705 cpu.go:282] Add success.
I0323 17:10:43.420406  543705 net.go:648] Add success.
I0323 17:10:43.423330  543705 net.go:770] primary dev: ETH0
I0323 17:10:43.423348  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:10:43.423366  543705 net.go:698] Add success.
I0323 17:10:46.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:10:46.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:10:46.458113  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:10:53.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:10:53.409790  543705 memory.go:184] no items to output this cycle
I0323 17:10:53.409821  543705 cpu.go:275] no items to output this cycle
E0323 17:11:03.409778  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:11:03.409797  543705 memory.go:184] no items to output this cycle
I0323 17:11:03.409822  543705 cpu.go:275] no items to output this cycle
W0323 17:11:13.409737  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:11:13.409759  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:11:13.409765  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
E0323 17:11:13.409856  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:11:13.409867  543705 cpu.go:282] Add success.
I0323 17:11:13.409880  543705 memory.go:191] Add success.
I0323 17:11:13.420592  543705 net.go:648] Add success.
I0323 17:11:13.423553  543705 net.go:770] primary dev: ETH0
I0323 17:11:13.423567  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:11:13.423578  543705 net.go:698] Add success.
I0323 17:11:14.453948  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:11:14.455167  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:11:14.455250  543705 disk_worker.go:708] disk space is not compliant
W0323 17:11:14.455253  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:11:14.456635  543705 disk_worker.go:494] system disk:vda1
I0323 17:11:14.456667  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:11:15.455997  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:11:16.457986  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:11:16.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:11:16.458101  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:11:16.472498  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:11:23.409807  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:11:23.409829  543705 memory.go:184] no items to output this cycle
I0323 17:11:23.409839  543705 cpu.go:275] no items to output this cycle
E0323 17:11:33.409809  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:11:33.409832  543705 memory.go:184] no items to output this cycle
I0323 17:11:33.409843  543705 cpu.go:275] no items to output this cycle
I0323 17:11:41.005685  543705 disk_info.go:125] begin check local disk info of client
I0323 17:11:41.008345  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:11:41.008351  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000e4ac0 0xc0000e4b00]
E0323 17:11:43.409851  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:11:43.409873  543705 cpu.go:282] Add success.
I0323 17:11:43.409883  543705 memory.go:191] Add success.
I0323 17:11:43.420329  543705 net.go:648] Add success.
I0323 17:11:43.421446  543705 net.go:770] primary dev: ETH0
I0323 17:11:43.421462  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:11:43.421476  543705 net.go:698] Add success.
I0323 17:11:46.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:11:46.458104  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:11:46.458139  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:11:53.410275  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:11:53.410297  543705 memory.go:184] no items to output this cycle
I0323 17:11:53.410301  543705 cpu.go:275] no items to output this cycle
E0323 17:12:03.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:12:03.409798  543705 memory.go:184] no items to output this cycle
I0323 17:12:03.409909  543705 cpu.go:275] no items to output this cycle
E0323 17:12:13.417853  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:12:13.417892  543705 memory.go:191] Add success.
W0323 17:12:13.417925  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:12:13.417942  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:12:13.417946  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:12:13.418367  543705 cpu.go:282] Add success.
I0323 17:12:13.422079  543705 net.go:648] Add success.
I0323 17:12:13.454346  543705 net.go:770] primary dev: ETH0
I0323 17:12:13.454364  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:12:13.454384  543705 net.go:698] Add success.
I0323 17:12:13.485709  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"9285ff0e-707d-4b9c-8e2b-ac4fe32779ce","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:12:13.485763  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
W0323 17:12:14.454220  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:12:14.454321  543705 disk_worker.go:708] disk space is not compliant
W0323 17:12:14.454326  543705 disk_worker.go:728] disk inode is not compliant
E0323 17:12:14.455114  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 17:12:14.455124  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 17:12:14.455131  543705 custom_config.go:64] query custom config with name: gpu
I0323 17:12:14.456550  543705 disk_worker.go:494] system disk:vda1
I0323 17:12:14.456582  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 17:12:15.457107  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 17:12:15.457121  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:12:16.458696  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:12:16.458779  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:12:16.458801  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:12:16.458959  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 17:12:16.472515  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:12:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:12:23.409810  543705 memory.go:184] no items to output this cycle
I0323 17:12:23.409823  543705 cpu.go:275] no items to output this cycle
E0323 17:12:33.410645  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:12:33.410665  543705 memory.go:184] no items to output this cycle
I0323 17:12:33.410768  543705 cpu.go:275] no items to output this cycle
I0323 17:12:40.500276  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:12:40.500283  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:12:41.009699  543705 disk_info.go:125] begin check local disk info of client
I0323 17:12:41.013495  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:12:41.013503  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c46c0 0xc0000c4700]
E0323 17:12:43.409783  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:12:43.410717  543705 memory.go:191] Add success.
I0323 17:12:43.409836  543705 cpu.go:282] Add success.
I0323 17:12:43.420398  543705 net.go:648] Add success.
I0323 17:12:43.423264  543705 net.go:770] primary dev: ETH0
I0323 17:12:43.423277  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:12:43.423291  543705 net.go:698] Add success.
I0323 17:12:46.458002  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:12:46.458084  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:12:46.458113  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:12:53.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:12:53.409842  543705 memory.go:184] no items to output this cycle
I0323 17:12:53.409857  543705 cpu.go:275] no items to output this cycle
E0323 17:13:03.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:13:03.409799  543705 memory.go:184] no items to output this cycle
I0323 17:13:03.409817  543705 cpu.go:275] no items to output this cycle
E0323 17:13:13.409817  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:13:13.409837  543705 cpu.go:282] Add success.
I0323 17:13:13.409846  543705 memory.go:191] Add success.
W0323 17:13:13.409879  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:13:13.409894  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:13:13.409900  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:13:13.420314  543705 net.go:648] Add success.
I0323 17:13:13.423139  543705 net.go:770] primary dev: ETH0
I0323 17:13:13.423152  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:13:13.423166  543705 net.go:698] Add success.
I0323 17:13:14.454974  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:13:14.455213  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:13:14.455226  543705 disk_worker.go:708] disk space is not compliant
W0323 17:13:14.455229  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:13:14.456669  543705 disk_worker.go:494] system disk:vda1
I0323 17:13:14.456703  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:13:15.456015  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:13:16.457981  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:13:16.458043  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:13:16.458068  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:13:16.472391  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:13:23.409749  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:13:23.409765  543705 memory.go:184] no items to output this cycle
I0323 17:13:23.409810  543705 cpu.go:275] no items to output this cycle
E0323 17:13:33.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:13:33.409784  543705 memory.go:184] no items to output this cycle
I0323 17:13:33.409800  543705 cpu.go:275] no items to output this cycle
I0323 17:13:41.013678  543705 disk_info.go:125] begin check local disk info of client
I0323 17:13:41.016273  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:13:41.016280  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4b00 0xc0000c4b40]
E0323 17:13:43.409785  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:13:43.410681  543705 memory.go:191] Add success.
I0323 17:13:43.409842  543705 cpu.go:282] Add success.
I0323 17:13:43.420472  543705 net.go:648] Add success.
I0323 17:13:43.423282  543705 net.go:770] primary dev: ETH0
I0323 17:13:43.423296  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:13:43.423310  543705 net.go:698] Add success.
I0323 17:13:46.457990  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:13:46.458063  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:13:46.458090  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:13:53.410411  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:13:53.410428  543705 memory.go:184] no items to output this cycle
I0323 17:13:53.410429  543705 cpu.go:275] no items to output this cycle
E0323 17:14:03.409763  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:14:03.409782  543705 memory.go:184] no items to output this cycle
I0323 17:14:03.409805  543705 cpu.go:275] no items to output this cycle
E0323 17:14:13.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:14:13.409823  543705 memory.go:191] Add success.
I0323 17:14:13.409829  543705 cpu.go:282] Add success.
W0323 17:14:13.409871  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:14:13.409889  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:14:13.409893  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:14:13.420211  543705 net.go:648] Add success.
I0323 17:14:13.422921  543705 net.go:770] primary dev: ETH0
I0323 17:14:13.422934  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:14:13.422947  543705 net.go:698] Add success.
I0323 17:14:14.454992  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:14:14.455227  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:14:14.455238  543705 disk_worker.go:708] disk space is not compliant
W0323 17:14:14.455241  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:14:14.456678  543705 disk_worker.go:494] system disk:vda1
I0323 17:14:14.456713  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:14:15.455970  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:14:16.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:14:16.458072  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:14:16.458100  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:14:16.472502  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:14:23.409760  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:14:23.409779  543705 memory.go:184] no items to output this cycle
I0323 17:14:23.409811  543705 cpu.go:275] no items to output this cycle
I0323 17:14:33.409798  543705 cpu.go:275] no items to output this cycle
E0323 17:14:33.409800  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:14:33.409818  543705 memory.go:184] no items to output this cycle
I0323 17:14:41.017695  543705 disk_info.go:125] begin check local disk info of client
I0323 17:14:41.021430  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:14:41.021440  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c4c80 0xc0000c4cc0]
E0323 17:14:43.409810  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:14:43.410656  543705 memory.go:191] Add success.
I0323 17:14:43.409861  543705 cpu.go:282] Add success.
I0323 17:14:43.420409  543705 net.go:648] Add success.
I0323 17:14:43.423144  543705 net.go:770] primary dev: ETH0
I0323 17:14:43.423162  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:14:43.423179  543705 net.go:698] Add success.
I0323 17:14:46.457999  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:14:46.458078  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:14:46.458110  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:14:53.409780  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:14:53.409799  543705 memory.go:184] no items to output this cycle
I0323 17:14:53.409815  543705 cpu.go:275] no items to output this cycle
E0323 17:15:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:15:03.409811  543705 memory.go:184] no items to output this cycle
I0323 17:15:03.409826  543705 cpu.go:275] no items to output this cycle
E0323 17:15:13.409776  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:15:13.409806  543705 memory.go:191] Add success.
W0323 17:15:13.409834  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:15:13.409847  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:15:13.409849  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:15:13.409851  543705 cpu.go:282] Add success.
I0323 17:15:13.420353  543705 net.go:648] Add success.
I0323 17:15:13.421338  543705 net.go:770] primary dev: ETH0
I0323 17:15:13.421351  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:15:13.421365  543705 net.go:698] Add success.
I0323 17:15:13.468639  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"49a278bd-e516-4edc-ad74-490bb4b00add","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:15:13.468676  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 17:15:14.454996  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:15:14.455216  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:15:14.455246  543705 disk_worker.go:708] disk space is not compliant
W0323 17:15:14.455249  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:15:14.459581  543705 disk_worker.go:494] system disk:vda1
I0323 17:15:14.459613  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:15:15.455991  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:15:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:15:16.458082  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:15:16.458113  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:15:16.472516  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:15:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:15:23.409810  543705 memory.go:184] no items to output this cycle
I0323 17:15:23.409821  543705 cpu.go:275] no items to output this cycle
E0323 17:15:33.409812  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:15:33.409827  543705 cpu.go:275] no items to output this cycle
I0323 17:15:33.409834  543705 memory.go:184] no items to output this cycle
I0323 17:15:40.501274  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:15:40.501282  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:15:41.021685  543705 disk_info.go:125] begin check local disk info of client
I0323 17:15:41.024298  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:15:41.024305  543705 disk_info.go:196] parse disk info done, disk is : [0xc000537540 0xc000537580]
E0323 17:15:43.409772  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:15:43.410648  543705 memory.go:191] Add success.
I0323 17:15:43.409830  543705 cpu.go:282] Add success.
I0323 17:15:43.420475  543705 net.go:648] Add success.
I0323 17:15:43.423486  543705 net.go:770] primary dev: ETH0
I0323 17:15:43.423504  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:15:43.423524  543705 net.go:698] Add success.
I0323 17:15:46.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:15:46.458081  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:15:46.458111  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:15:53.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:15:53.409810  543705 memory.go:184] no items to output this cycle
I0323 17:15:53.409822  543705 cpu.go:275] no items to output this cycle
E0323 17:16:03.409794  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:16:03.409816  543705 memory.go:184] no items to output this cycle
I0323 17:16:03.409829  543705 cpu.go:275] no items to output this cycle
E0323 17:16:13.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:16:13.409830  543705 memory.go:191] Add success.
W0323 17:16:13.409863  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:16:13.409880  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:16:13.409884  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:16:13.409805  543705 cpu.go:282] Add success.
I0323 17:16:13.420498  543705 net.go:648] Add success.
I0323 17:16:13.421508  543705 net.go:770] primary dev: ETH0
I0323 17:16:13.421525  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:16:13.421543  543705 net.go:698] Add success.
I0323 17:16:14.453951  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:16:14.455301  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:16:14.455316  543705 disk_worker.go:708] disk space is not compliant
W0323 17:16:14.455321  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:16:14.457590  543705 disk_worker.go:494] system disk:vda1
I0323 17:16:14.457643  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:16:15.455991  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:16:16.458024  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:16:16.458120  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:16:16.458154  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:16:16.472637  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:16:23.409798  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:16:23.409819  543705 memory.go:184] no items to output this cycle
I0323 17:16:23.409831  543705 cpu.go:275] no items to output this cycle
E0323 17:16:33.409775  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:16:33.409793  543705 memory.go:184] no items to output this cycle
I0323 17:16:33.409848  543705 cpu.go:275] no items to output this cycle
I0323 17:16:41.025704  543705 disk_info.go:125] begin check local disk info of client
I0323 17:16:41.029454  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:16:41.029464  543705 disk_info.go:196] parse disk info done, disk is : [0xc000471000 0xc000471040]
E0323 17:16:43.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:16:43.409831  543705 memory.go:191] Add success.
I0323 17:16:43.409837  543705 cpu.go:282] Add success.
I0323 17:16:43.420137  543705 net.go:648] Add success.
I0323 17:16:43.421170  543705 net.go:770] primary dev: ETH0
I0323 17:16:43.421185  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:16:43.421199  543705 net.go:698] Add success.
I0323 17:16:46.458008  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:16:46.458088  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:16:46.458114  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:16:53.409791  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:16:53.409815  543705 memory.go:184] no items to output this cycle
I0323 17:16:53.409824  543705 cpu.go:275] no items to output this cycle
E0323 17:17:03.409764  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:17:03.409787  543705 memory.go:184] no items to output this cycle
I0323 17:17:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 17:17:13.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:17:13.409800  543705 cpu.go:282] Add success.
I0323 17:17:13.409811  543705 memory.go:191] Add success.
W0323 17:17:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:17:13.409850  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:17:13.409855  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:17:13.420166  543705 net.go:648] Add success.
I0323 17:17:13.423131  543705 net.go:770] primary dev: ETH0
I0323 17:17:13.423146  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:17:13.423160  543705 net.go:698] Add success.
I0323 17:17:13.453725  543705 event_worker.go:152] Polling the log file for events...
W0323 17:17:14.455217  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:17:14.455231  543705 disk_worker.go:708] disk space is not compliant
W0323 17:17:14.455234  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:17:14.456611  543705 disk_worker.go:494] system disk:vda1
I0323 17:17:14.456642  543705 disk_worker.go:432] add disk info successfully, len:33
E0323 17:17:14.457524  543705 exec_cmd.go:13] Can not obtain CombinedOutput for "dcgmi discovery --list", operation name: check dcgm discover, error:exit status 127
E0323 17:17:14.457533  543705 gpu_controllor.go:82] DCGM discovery failed: exit status 127
I0323 17:17:14.457540  543705 custom_config.go:64] query custom config with name: gpu
E0323 17:17:15.457069  543705 npu_controller.go:127] Fail to call npu-smi, exit status 127, bash: line 1: /usr/local/sbin/npu-smi: No such file or directory
I0323 17:17:15.457083  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:17:16.458132  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:17:16.458196  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:17:16.458224  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:17:16.458405  543705 rdma_controller.go:118] fail to call which hccn_too, err:exit status 1, output:
I0323 17:17:16.472650  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:17:23.409796  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:17:23.409820  543705 memory.go:184] no items to output this cycle
I0323 17:17:23.409829  543705 cpu.go:275] no items to output this cycle
E0323 17:17:33.409779  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:17:33.409797  543705 memory.go:184] no items to output this cycle
I0323 17:17:33.409799  543705 cpu.go:275] no items to output this cycle
I0323 17:17:41.029682  543705 disk_info.go:125] begin check local disk info of client
I0323 17:17:41.032268  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:17:41.032276  543705 disk_info.go:196] parse disk info done, disk is : [0xc00034c500 0xc00034c540]
E0323 17:17:43.409801  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:17:43.410756  543705 memory.go:191] Add success.
I0323 17:17:43.409837  543705 cpu.go:282] Add success.
I0323 17:17:43.420501  543705 net.go:648] Add success.
I0323 17:17:43.423438  543705 net.go:770] primary dev: ETH0
I0323 17:17:43.423452  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:17:43.423465  543705 net.go:698] Add success.
I0323 17:17:46.457983  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:17:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:17:46.458102  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:17:53.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:17:53.409810  543705 memory.go:184] no items to output this cycle
I0323 17:17:53.409820  543705 cpu.go:275] no items to output this cycle
E0323 17:18:03.409790  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:18:03.409810  543705 memory.go:184] no items to output this cycle
I0323 17:18:03.409811  543705 cpu.go:275] no items to output this cycle
E0323 17:18:13.409771  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:18:13.409800  543705 memory.go:191] Add success.
I0323 17:18:13.409822  543705 cpu.go:282] Add success.
W0323 17:18:13.409827  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:18:13.409839  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:18:13.409841  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:18:13.420185  543705 net.go:648] Add success.
I0323 17:18:13.423044  543705 net.go:770] primary dev: ETH0
I0323 17:18:13.423059  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:18:13.423071  543705 net.go:698] Add success.
I0323 17:18:13.464309  543705 http_client.go:75] url:http://bcm.baidubce.com/csm/api/v1/agentControl?instance=b68fcff7-5bec-43da-a6d9-4d16e8b44d62&uid=fde98ef1ae594742bdfe91499b97205e, resp:{"requestId":"194a4ad8-658c-4460-8d73-826ce981e5ac","message":"","success":true,"result":{"userId":"fde98ef1ae594742bdfe91499b97205e","instance":{"uuid":"b68fcff7-5bec-43da-a6d9-4d16e8b44d62","shortId":"","type":""},"controls":[{"name":"rdma","enable":true},{"name":"gpu","enable":false}],"rdmaControls":null},"code":200}
I0323 17:18:13.464342  543705 custom_config.go:56] updated config: {
    "gpu": {
        "Name": "gpu",
        "Enable": false
    },
    "rdma": {
        "Name": "rdma",
        "Enable": true
    }
}
I0323 17:18:14.453965  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:18:14.455228  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:18:14.455239  543705 disk_worker.go:708] disk space is not compliant
W0323 17:18:14.455242  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:18:14.456753  543705 disk_worker.go:494] system disk:vda1
I0323 17:18:14.456801  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:18:15.455995  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:18:16.458022  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:18:16.458121  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:18:16.458159  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:18:16.472593  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:18:23.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:18:23.409807  543705 memory.go:184] no items to output this cycle
I0323 17:18:23.409853  543705 cpu.go:275] no items to output this cycle
E0323 17:18:33.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:18:33.409804  543705 cpu.go:275] no items to output this cycle
I0323 17:18:33.409807  543705 memory.go:184] no items to output this cycle
I0323 17:18:40.502311  543705 nvml_worker.go:363] ERROR_LIBRARY_NOT_FOUND
I0323 17:18:40.502320  543705 nvml_worker.go:378] NVIDIA driver is not ready, try again in 1 minute.
I0323 17:18:41.033698  543705 disk_info.go:125] begin check local disk info of client
I0323 17:18:41.036350  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:18:41.036358  543705 disk_info.go:196] parse disk info done, disk is : [0xc0000c5100 0xc0000c5140]
E0323 17:18:43.409784  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:18:43.410707  543705 memory.go:191] Add success.
I0323 17:18:43.409813  543705 cpu.go:282] Add success.
I0323 17:18:43.420408  543705 net.go:648] Add success.
I0323 17:18:43.423035  543705 net.go:770] primary dev: ETH0
I0323 17:18:43.423048  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:18:43.423061  543705 net.go:698] Add success.
I0323 17:18:46.457993  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:18:46.458069  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:18:46.458109  543705 gpu_kunlun.go:227] Add success, len:1
E0323 17:18:53.409787  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:18:53.409809  543705 memory.go:184] no items to output this cycle
I0323 17:18:53.409848  543705 cpu.go:275] no items to output this cycle
E0323 17:19:03.409789  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:19:03.409811  543705 memory.go:184] no items to output this cycle
I0323 17:19:03.409851  543705 cpu.go:275] no items to output this cycle
E0323 17:19:13.409802  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:19:13.409824  543705 cpu.go:282] Add success.
I0323 17:19:13.409832  543705 memory.go:191] Add success.
W0323 17:19:13.409866  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:19:13.409885  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:19:13.409889  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:19:13.420382  543705 net.go:648] Add success.
I0323 17:19:13.423186  543705 net.go:770] primary dev: ETH0
I0323 17:19:13.423199  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:19:13.423212  543705 net.go:698] Add success.
I0323 17:19:14.454994  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:19:14.455198  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:19:14.455211  543705 disk_worker.go:708] disk space is not compliant
W0323 17:19:14.455214  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:19:14.456619  543705 disk_worker.go:494] system disk:vda1
I0323 17:19:14.456670  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:19:15.455981  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:19:16.457996  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:19:16.458076  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:19:16.458107  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:19:16.472509  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:19:23.409797  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:19:23.409818  543705 memory.go:184] no items to output this cycle
I0323 17:19:23.409830  543705 cpu.go:275] no items to output this cycle
E0323 17:19:33.409816  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:19:33.409828  543705 cpu.go:275] no items to output this cycle
I0323 17:19:33.409836  543705 memory.go:184] no items to output this cycle
I0323 17:19:41.037683  543705 disk_info.go:125] begin check local disk info of client
I0323 17:19:41.040411  543705 disk_info.go:161] parse disk info: sr0  QM00051
vda  v-xC6VYz3d
I0323 17:19:41.040418  543705 disk_info.go:196] parse disk info done, disk is : [0xc000536740 0xc000536780]
E0323 17:19:43.409799  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:19:43.410825  543705 memory.go:191] Add success.
I0323 17:19:43.409841  543705 cpu.go:282] Add success.
I0323 17:19:43.420528  543705 net.go:648] Add success.
I0323 17:19:43.423418  543705 net.go:770] primary dev: ETH0
I0323 17:19:43.423431  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:19:43.423445  543705 net.go:698] Add success.
I0323 17:19:46.457991  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:19:46.458074  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:19:46.458103  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:19:53.409809  543705 cpu.go:275] no items to output this cycle
E0323 17:19:53.409885  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:19:53.409897  543705 memory.go:184] no items to output this cycle
E0323 17:20:03.409773  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:20:03.409791  543705 memory.go:184] no items to output this cycle
I0323 17:20:03.409812  543705 cpu.go:275] no items to output this cycle
E0323 17:20:13.410058  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:20:13.410095  543705 memory.go:191] Add success.
I0323 17:20:13.410098  543705 cpu.go:282] Add success.
W0323 17:20:13.410127  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ce_count
W0323 17:20:13.410144  543705 memory.go:428] No files found matching pattern: /sys/devices/system/edac/mc/mc*/ue_count
I0323 17:20:13.410147  543705 memory.go:414] ECC metrics: ce_delta=0, ue_delta=0
I0323 17:20:13.420392  543705 net.go:648] Add success.
I0323 17:20:13.423315  543705 net.go:770] primary dev: ETH0
I0323 17:20:13.423328  543705 net.go:802] Send network stats successfully!,count is 6
I0323 17:20:13.423340  543705 net.go:698] Add success.
I0323 17:20:14.454983  543705 custom_config.go:64] query custom config with name: gpu
W0323 17:20:14.455166  543705 disk_worker.go:1014] failed to get sector size for device ramfs: lstat /sys/class/block/ramfs: no such file or directory
W0323 17:20:14.455279  543705 disk_worker.go:708] disk space is not compliant
W0323 17:20:14.455284  543705 disk_worker.go:728] disk inode is not compliant
I0323 17:20:14.457828  543705 disk_worker.go:494] system disk:vda1
I0323 17:20:14.457869  543705 disk_worker.go:432] add disk info successfully, len:33
I0323 17:20:15.462079  543705 custom_config.go:64] query custom config with name: huawei_npu
I0323 17:20:16.458001  543705 gpu_kunlun.go:233] start to get kun lun stat by xpuml
I0323 17:20:16.458085  543705 gpu_kunlun.go:354] start to get kun lun stat by xpu_smi
I0323 17:20:16.458126  543705 gpu_kunlun.go:227] Add success, len:1
I0323 17:20:16.472675  543705 disk_local_worker.go:436] Get disk info: []
E0323 17:20:23.409899  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:20:23.409927  543705 memory.go:184] no items to output this cycle
I0323 17:20:23.410015  543705 cpu.go:275] no items to output this cycle
E0323 17:20:33.409795  543705 memory.go:175] total swap size <= 0, can not cal swap used percent
I0323 17:20:33.409820  543705 memory.go:184] no items to output this cycle
I0323 17:20:33.409828  543705 cpu.go:275] no items to output this cycle